From 4869c23f5f9a0514f06d1b36387b4769204269e4 Mon Sep 17 00:00:00 2001
From: hubicka <hubicka@138bc75d-0d04-0410-961f-82ee72b054a4>
Date: Sun, 17 Apr 2011 14:22:20 +0000
Subject: [PATCH] 	* lto-symtab.c (lto_cgraph_replace_node): When call
 statement is 	present, also set gimple_call_set_cannot_inline. 	*
 ipa-inline.c: Update toplevel comment. 	(MAX_TIME): Remove. 
 (cgraph_clone_inlined_nodes): Fix linebreaks. 	(cgraph_check_inline_limits):
 Restructure to ... 	(caller_growth_limits): ... this one; be more tolerant
 	on growth in nested inline chains; add explanatory comment; 	fix
 stack accounting thinko introduced by previous patch. 
 (cgraph_default_inline_p): Remove. 	(report_inline_failed_reason): New
 function. 	(can_inline_edge_p): New function. 
 (can_early_inline_edge_p): New function. 	(leaf_node_p): Move upwards in
 file. 	(want_early_inline_function_p): New function. 
 (want_inline_small_function_p): New function. 
 (want_inline_self_recursive_call_p): New function. 	(cgraph_edge_badness):
 Rename to ... 	(edge_badness) ... this one; fix linebreaks. 
 (update_edge_key): Update call of edge_baddness; add 	detailed dump about
 queue updates. 	(update_caller_keys): Use can_inline_edge_p and 
 want_inline_small_function_p. 	(cgraph_decide_recursive_inlining): Rename
 to... 	(recursive_inlining): Use can_inline_edge_p and 
 want_inline_self_recursive_call_p; simplify and 	remove no longer valid
 FIXME. 	(cgraph_set_inline_failed): Remove. 
 (add_new_edges_to_heap): Use can_inline_edge_p and 
 want_inline_small_function_p. 	(cgraph_decide_inlining_of_small_functions):
 Rename to ... 	(inline_small_functions): ... this one; cleanup; use 
 can/want predicates; cleanup debug ouput; work edges 	till fibheap is
 exhausted and do not stop once unit 	growth is reached; remove later loop
 processing remaining 	edges. 	(cgraph_flatten): Rename to ... 
 (flatten_function): ... this one; use can_inline_edge_p 	and
 can_early_inline_edge_p predicates. 	(cgraph_decide_inlining): Rename to
 ... 	(ipa_inline): ... this one; remove unreachable nodes before 
 inlining functions called once; simplify the pass. 
 (cgraph_perform_always_inlining): Rename to ... 
 (inline_always_inline_functions): ... this one; use 
 DECL_DISREGARD_INLINE_LIMITS; use can_inline_edge_p 	predicate 
 (cgraph_decide_inlining_incrementally): Rename to ... 
 (early_inline_small_functions): ... this one; simplify 	using new
 predicates; cleanup; make dumps prettier. 	(cgraph_early_inlining):
 Rename to ... 	(early_inliner): newer inline regular functions into
 always-inlines; 	fix updating of call stmt summaries. 
 (pass_early_inline): Update for new names. 	(inline_transform): Fix
 formating. 	(gate_cgraph_decide_inlining): Rename to ... 
 (pass_ipa_inline): ... this one. 	* ipa-inline.h (inline_summary):
 Remove disregard_inline_limits. 	* ipa-inline-analysis.c
 (dump_inline_summary): Update. 	(compute_inline_parameters): Do not
 compute disregard_inline_limits; 	look for mismatching arguments. 
 (estimate_growth): Fix handlig of non-trivial self recursion. 
 (inline_read_summary): Do not read info->disregard_inline_limits. 
 (inline_write_summary): Do not write info->disregard_inline_limits. 	*
 tree-inline.c (inline_forbidden_into_p, tree_can_inline_p): Remove and 
 move all checks into can_inline_edge_p predicate; re-enable code comparing 
 optimization levels. 	(expand_call_inline): Do not test
 inline_forbidden_into_p. 	* Makefile.in (ipa-inline.o): Update
 arguments.

	* gcc.dg/winline-5.c: Update testcase.


git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@172609 138bc75d-0d04-0410-961f-82ee72b054a4
---
 gcc/ChangeLog                    |   71 +++
 gcc/Makefile.in                  |    4 +-
 gcc/cif-code.def                 |   16 +
 gcc/ipa-inline-analysis.c        |   64 +-
 gcc/ipa-inline.c                 | 1223 +++++++++++++++++++++-----------------
 gcc/ipa-inline.h                 |    2 -
 gcc/testsuite/ChangeLog          |    4 +
 gcc/testsuite/gcc.dg/winline-5.c |    8 -
 gcc/tree-inline.c                |   99 ---
 gcc/tree-inline.h                |    1 -
 10 files changed, 825 insertions(+), 667 deletions(-)

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 444ddb1..23199ca 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,74 @@
+2011-04-17  Jan Hubicka  <jh@suse.cz>
+
+	* lto-symtab.c (lto_cgraph_replace_node): When call statement is
+	present, also set gimple_call_set_cannot_inline.
+	* ipa-inline.c: Update toplevel comment.
+	(MAX_TIME): Remove.
+	(cgraph_clone_inlined_nodes): Fix linebreaks.
+	(cgraph_check_inline_limits): Restructure to ...
+	(caller_growth_limits): ... this one; be more tolerant
+	on growth in nested inline chains; add explanatory comment;
+	fix stack accounting thinko introduced by previous patch.
+	(cgraph_default_inline_p): Remove.
+	(report_inline_failed_reason): New function.
+	(can_inline_edge_p): New function.
+	(can_early_inline_edge_p): New function.
+	(leaf_node_p): Move upwards in file.
+	(want_early_inline_function_p): New function.
+	(want_inline_small_function_p): New function.
+	(want_inline_self_recursive_call_p): New function.
+	(cgraph_edge_badness): Rename to ...
+	(edge_badness) ... this one; fix linebreaks.
+	(update_edge_key): Update call of edge_baddness; add
+	detailed dump about queue updates.
+	(update_caller_keys): Use can_inline_edge_p and
+	want_inline_small_function_p.
+	(cgraph_decide_recursive_inlining): Rename to...
+	(recursive_inlining): Use can_inline_edge_p and
+	want_inline_self_recursive_call_p; simplify and
+	remove no longer valid FIXME.
+	(cgraph_set_inline_failed): Remove.
+	(add_new_edges_to_heap): Use can_inline_edge_p and
+	want_inline_small_function_p.
+	(cgraph_decide_inlining_of_small_functions): Rename to ...
+	(inline_small_functions): ... this one; cleanup; use
+	can/want predicates; cleanup debug ouput; work edges
+	till fibheap is exhausted and do not stop once unit
+	growth is reached; remove later loop processing remaining
+	edges.
+	(cgraph_flatten): Rename to ...
+	(flatten_function): ... this one; use can_inline_edge_p
+	and can_early_inline_edge_p predicates.
+	(cgraph_decide_inlining): Rename to ...
+	(ipa_inline): ... this one; remove unreachable nodes before
+	inlining functions called once; simplify the pass.
+	(cgraph_perform_always_inlining): Rename to ...
+	(inline_always_inline_functions): ... this one; use
+	DECL_DISREGARD_INLINE_LIMITS; use can_inline_edge_p
+	predicate
+	(cgraph_decide_inlining_incrementally): Rename to ...
+	(early_inline_small_functions): ... this one; simplify
+	using new predicates; cleanup; make dumps prettier.
+	(cgraph_early_inlining): Rename to ...
+	(early_inliner): newer inline regular functions into always-inlines;
+	fix updating of call stmt summaries.
+	(pass_early_inline): Update for new names.
+	(inline_transform): Fix formating.
+	(gate_cgraph_decide_inlining): Rename to ...
+	(pass_ipa_inline): ... this one.
+	* ipa-inline.h (inline_summary): Remove disregard_inline_limits.
+	* ipa-inline-analysis.c (dump_inline_summary): Update.
+	(compute_inline_parameters): Do not compute disregard_inline_limits;
+	look for mismatching arguments.
+	(estimate_growth): Fix handlig of non-trivial self recursion.
+	(inline_read_summary): Do not read info->disregard_inline_limits.
+	(inline_write_summary): Do not write info->disregard_inline_limits.
+	* tree-inline.c (inline_forbidden_into_p, tree_can_inline_p): Remove and
+	move all checks into can_inline_edge_p predicate; re-enable code comparing
+	optimization levels.
+	(expand_call_inline): Do not test inline_forbidden_into_p.
+	* Makefile.in (ipa-inline.o): Update arguments.
+
 2011-04-17  Revital Eres  <revital.eres@linaro.org>
 
 	* ddg.c (free_ddg_all_sccs): Free sccs field in struct ddg_all_sccs.
diff --git a/gcc/Makefile.in b/gcc/Makefile.in
index f88d61d..a1b7859 100644
--- a/gcc/Makefile.in
+++ b/gcc/Makefile.in
@@ -3028,8 +3028,8 @@ matrix-reorg.o : matrix-reorg.c $(CONFIG_H) $(SYSTEM_H) coretypes.h  \
 ipa-inline.o : ipa-inline.c gt-ipa-inline.h $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \
    $(TREE_H) langhooks.h $(TREE_INLINE_H) $(FLAGS_H) $(CGRAPH_H) intl.h \
    $(DIAGNOSTIC_H) $(FIBHEAP_H) $(PARAMS_H) $(TIMEVAR_H) $(TREE_PASS_H) \
-   $(HASHTAB_H) $(COVERAGE_H) $(GGC_H) $(TREE_FLOW_H) $(RTL_H) $(IPA_PROP_H) \
-   $(EXCEPT_H) gimple-pretty-print.h ipa-inline.h
+   $(COVERAGE_H) $(GGC_H) $(TREE_FLOW_H) $(RTL_H) $(IPA_PROP_H) \
+   $(EXCEPT_H) gimple-pretty-print.h ipa-inline.h $(TARGET_H)
 ipa-inline-analysis.o : ipa-inline-analysis.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \
    $(TREE_H) langhooks.h $(TREE_INLINE_H) $(FLAGS_H) $(CGRAPH_H) intl.h \
    $(DIAGNOSTIC_H) $(PARAMS_H) $(TIMEVAR_H) $(TREE_PASS_H) \
diff --git a/gcc/cif-code.def b/gcc/cif-code.def
index 1f3258f..d10efdf 100644
--- a/gcc/cif-code.def
+++ b/gcc/cif-code.def
@@ -79,6 +79,8 @@ DEFCIFCODE(OPTIMIZING_FOR_SIZE,
 
 /* Inlining failed because of mismatched options or arguments.  */
 DEFCIFCODE(TARGET_OPTION_MISMATCH, N_("target specific option mismatch"))
+DEFCIFCODE(TARGET_OPTIMIZATION_MISMATCH,
+	   N_("optimization level attribute mismatch"))
 DEFCIFCODE(MISMATCHED_ARGUMENTS, N_("mismatched arguments"))
 
 /* Call was originally indirect.  */
@@ -89,4 +91,18 @@ DEFCIFCODE(ORIGINALLY_INDIRECT_CALL,
 DEFCIFCODE(INDIRECT_UNKNOWN_CALL,
 	   N_("indirect function call with a yet undetermined callee"))
 
+/* We can't inline different EH personalities together.  */
+DEFCIFCODE(EH_PERSONALITY,
+	   N_("excepion handling personality mismatch"))
+
+/* Don't inline if the callee can throw non-call exceptions but the
+   caller cannot.  */
+DEFCIFCODE(NON_CALL_EXCEPTIONS,
+	   N_("excepion handling personality mismatch"))
+
+/* Don't inline if the callee can throw non-call exceptions but the
+   caller cannot.  */
+DEFCIFCODE(OPTIMIZATION_MISMATCH,
+	   N_("optimization mode mismatch"))
+
 DEFCIFCODE(OVERWRITABLE, N_("function body can be overwriten at linktime"))
diff --git a/gcc/ipa-inline-analysis.c b/gcc/ipa-inline-analysis.c
index c225778..30fbcbc 100644
--- a/gcc/ipa-inline-analysis.c
+++ b/gcc/ipa-inline-analysis.c
@@ -131,7 +131,7 @@ dump_inline_summary (FILE *f, struct cgraph_node *node)
       struct inline_summary *s = inline_summary (node);
       fprintf (f, "Inline summary for %s/%i", cgraph_node_name (node),
 	       node->uid);
-      if (s->disregard_inline_limits)
+      if (DECL_DISREGARD_INLINE_LIMITS (node->decl))
 	fprintf (f, " always_inline");
       if (s->inlinable)
 	fprintf (f, " inlinable");
@@ -142,7 +142,7 @@ dump_inline_summary (FILE *f, struct cgraph_node *node)
       fprintf (f, "  global time:     %i\n", s->time);
       fprintf (f, "  self size:       %i, benefit: %i\n",
 	       s->self_size, s->size_inlining_benefit);
-      fprintf (f, "  global size:     %i", s->size);
+      fprintf (f, "  global size:     %i\n", s->size);
       fprintf (f, "  self stack:      %i\n",
 	       (int)s->estimated_self_stack_size);
       fprintf (f, "  global stack:    %i\n\n",
@@ -303,6 +303,17 @@ estimate_function_body_sizes (struct cgraph_node *node)
 	      struct cgraph_edge *edge = cgraph_edge (node, stmt);
 	      edge->call_stmt_size = this_size;
 	      edge->call_stmt_time = this_time;
+
+	      /* Do not inline calls where we cannot triviall work around mismatches
+		 in argument or return types.  */
+	      if (edge->callee
+		  && !gimple_check_call_matching_types (stmt, edge->callee->decl))
+		{
+		  edge->call_stmt_cannot_inline_p = true;
+		  gimple_call_set_cannot_inline (stmt, true);
+		}
+	      else
+		gcc_assert (!gimple_call_cannot_inline_p (stmt));
 	    }
 
 	  this_time *= freq;
@@ -364,8 +375,6 @@ compute_inline_parameters (struct cgraph_node *node)
 
   /* Can this function be inlined at all?  */
   info->inlinable = tree_inlinable_function_p (node->decl);
-  if (!info->inlinable)
-    info->disregard_inline_limits = 0;
 
   /* Inlinable functions always can change signature.  */
   if (info->inlinable)
@@ -388,8 +397,6 @@ compute_inline_parameters (struct cgraph_node *node)
   info->estimated_growth = INT_MIN;
   info->stack_frame_offset = 0;
   info->estimated_stack_size = info->estimated_self_stack_size;
-  info->disregard_inline_limits
-    = DECL_DISREGARD_INLINE_LIMITS (node->decl);
 }
 
 
@@ -483,25 +490,34 @@ estimate_growth (struct cgraph_node *node)
 
   for (e = node->callers; e; e = e->next_caller)
     {
-      if (e->caller == node)
+      gcc_checking_assert (e->inline_failed);
+
+      if (e->caller == node
+	  || (e->caller->global.inlined_to
+	      && e->caller->global.inlined_to == node))
         self_recursive = true;
-      if (e->inline_failed)
-	growth += estimate_edge_growth (e);
+      growth += estimate_edge_growth (e);
+    }
+     
+
+  /* For self recursive functions the growth estimation really should be
+     infinity.  We don't want to return very large values because the growth
+     plays various roles in badness computation fractions.  Be sure to not
+     return zero or negative growths. */
+  if (self_recursive)
+    growth = growth < info->size ? info->size : growth;
+  else
+    {
+      if (cgraph_will_be_removed_from_program_if_no_direct_calls (node)
+	  && !DECL_EXTERNAL (node->decl))
+	growth -= info->size;
+      /* COMDAT functions are very often not shared across multiple units since they
+	 come from various template instantiations.  Take this into account.  */
+      else  if (DECL_COMDAT (node->decl)
+		&& cgraph_can_remove_if_no_direct_calls_p (node))
+	growth -= (info->size
+		   * (100 - PARAM_VALUE (PARAM_COMDAT_SHARING_PROBABILITY)) + 50) / 100;
     }
-
-  /* ??? Wrong for non-trivially self recursive functions or cases where
-     we decide to not inline for different reasons, but it is not big deal
-     as in that case we will keep the body around, but we will also avoid
-     some inlining.  */
-  if (cgraph_will_be_removed_from_program_if_no_direct_calls (node)
-      && !DECL_EXTERNAL (node->decl) && !self_recursive)
-    growth -= info->size;
-  /* COMDAT functions are very often not shared across multiple units since they
-     come from various template instantiations.  Take this into account.  */
-  else  if (DECL_COMDAT (node->decl) && !self_recursive
-	    && cgraph_can_remove_if_no_direct_calls_p (node))
-    growth -= (info->size
-	       * (100 - PARAM_VALUE (PARAM_COMDAT_SHARING_PROBABILITY)) + 50) / 100;
 
   info->estimated_growth = growth;
   return growth;
@@ -621,7 +637,6 @@ inline_read_summary (void)
 	      bp = lto_input_bitpack (ib);
 	      info->inlinable = bp_unpack_value (&bp, 1);
 	      info->versionable = bp_unpack_value (&bp, 1);
-	      info->disregard_inline_limits = bp_unpack_value (&bp, 1);
 	    }
 
 	  lto_destroy_simple_input_block (file_data,
@@ -688,7 +703,6 @@ inline_write_summary (cgraph_node_set set,
 	  bp = bitpack_create (ob->main_stream);
 	  bp_pack_value (&bp, info->inlinable, 1);
 	  bp_pack_value (&bp, info->versionable, 1);
-	  bp_pack_value (&bp, info->disregard_inline_limits, 1);
 	  lto_output_bitpack (&bp);
 	}
     }
diff --git a/gcc/ipa-inline.c b/gcc/ipa-inline.c
index c605eae..ecad8fa 100644
--- a/gcc/ipa-inline.c
+++ b/gcc/ipa-inline.c
@@ -21,73 +21,86 @@ along with GCC; see the file COPYING3.  If not see
 
 /*  Inlining decision heuristics
 
-    We separate inlining decisions from the inliner itself and store it
-    inside callgraph as so called inline plan.  Refer to cgraph.c
-    documentation about particular representation of inline plans in the
-    callgraph.
+    The implementation of inliner is organized as follows:
 
-    There are three major parts of this file:
+    Transformation of callgraph to represent inlining decisions.
 
-    cgraph_mark_inline_edge implementation
+      The inline decisions are stored in callgraph in "inline plan" and
+      all applied later.
 
-      This function allows to mark given call inline and performs necessary
-      modifications of cgraph (production of the clones and updating overall
-      statistics)
+      To mark given call inline, use cgraph_mark_inline function.
+      The function marks the edge inlinable and, if neccesary, produces
+      virtual clone in the callgraph representing the new copy of callee's
+      function body.
+
+      The inline plan is applied on given function body by inline_transform. 
 
     inlining heuristics limits
 
-      These functions allow to check that particular inlining is allowed
-      by the limits specified by user (allowed function growth, overall unit
-      growth and so on).
+      can_inline_edge_p allow to check that particular inlining is allowed
+      by the limits specified by user (allowed function growth, growth and so
+      on).
+
+      Functions are inlined when it is obvious the result is profitable (such
+      as functions called once or when inlining reduce code size).
+      In addition to that we perform inlining of small functions and recursive
+      inlining.
 
     inlining heuristics
 
-      This is implementation of IPA pass aiming to get as much of benefit
-      from inlining obeying the limits checked above.
+       The inliner itself is split into two passes:
+
+       pass_early_inlining
 
-      The implementation of particular heuristics is separated from
-      the rest of code to make it easier to replace it with more complicated
-      implementation in the future.  The rest of inlining code acts as a
-      library aimed to modify the callgraph and verify that the parameters
-      on code size growth fits.
+	 Simple local inlining pass inlining callees into current function.
+	 This pass makes no use of whole unit analysis and thus it can do only
+	 very simple decisions based on local properties.
 
-      To mark given call inline, use cgraph_mark_inline function, the
-      verification is performed by cgraph_default_inline_p and
-      cgraph_check_inline_limits.
+	 The strength of the pass is that it is run in topological order
+	 (reverse postorder) on the callgraph. Functions are converted into SSA
+	 form just before this pass and optimized subsequently. As a result, the
+	 callees of the function seen by the early inliner was already optimized
+	 and results of early inlining adds a lot of optimization oppurtunities
+	 for the local optimization.
 
-      The heuristics implements simple knapsack style algorithm ordering
-      all functions by their "profitability" (estimated by code size growth)
-      and inlining them in priority order.
+	 The pass handle the obvious inlining decisions within the copmilation
+	 unit - inlining auto inline functions, inlining for size and
+	 flattening.
 
-      cgraph_decide_inlining implements heuristics taking whole callgraph
-      into account, while cgraph_decide_inlining_incrementally considers
-      only one function at a time and is used by early inliner.
+	 main strength of the pass is the ability to eliminate abstraction
+	 penalty in C++ code (via combination of inlining and early
+	 optimization) and thus improve quality of analysis done by real IPA
+	 optimizers.
 
-   The inliner itself is split into two passes:
+	 Because of lack of whole unit knowledge, the pass can not really make
+	 good code size/performance tradeoffs.  It however does very simple
+	 speculative inlining allowing code size to grow by
+	 EARLY_INLINING_INSNS when calee is leaf function.  In this case the
+	 optimizations perfomed later are very likely to eliminate the cost.
 
-   pass_early_inlining
+       pass_ipa_inline
 
-     Simple local inlining pass inlining callees into current function.  This
-     pass makes no global whole compilation unit analysis and this when allowed
-     to do inlining expanding code size it might result in unbounded growth of
-     whole unit.
+	 This is the real inliner able to handle inlining with whole program
+	 knowledge. It performs following steps:
 
-     The pass is run during conversion into SSA form.  Only functions already
-     converted into SSA form are inlined, so the conversion must happen in
-     topological order on the callgraph (that is maintained by pass manager).
-     The functions after inlining are early optimized so the early inliner sees
-     unoptimized function itself, but all considered callees are already
-     optimized allowing it to unfold abstraction penalty on C++ effectively and
-     cheaply.
+	 1) inlining of small functions.  This is implemented by greedy
+	 algorithm ordering all inlinable cgraph edges by their badness and
+	 inlining them in this order as long as inline limits allows doing so.
 
-   pass_ipa_inline
+	 This heuristics is not very good on inlining recursive calls. Recursive
+	 calls can be inlined with results similar to loop unrolling. To do so,
+	 special purpose recursive inliner is executed on function when
+	 recursive edge is met as viable candidate.
 
-     This is the main pass implementing simple greedy algorithm to do inlining
-     of small functions that results in overall growth of compilation unit and
-     inlining of functions called once.  The pass compute just so called inline
-     plan (representation of inlining to be done in callgraph) and unlike early
-     inlining it is not performing the inlining itself.
- */
+	 2) Unreachable functions are removed from callgraph.  Inlining leads
+	 to devirtualization and other modification of callgraph so functions
+	 may become unreachable during the process. Also functions declared as
+	 extern inline or virtual functions are removed, since after inlining
+	 we no longer need the offline bodies.
+
+	 3) Functions called once and not exported from the unit are inlined.
+	 This should almost always lead to reduction of code size by eliminating
+	 the need for offline copy of the function.  */
 
 #include "config.h"
 #include "system.h"
@@ -105,18 +118,15 @@ along with GCC; see the file COPYING3.  If not see
 #include "fibheap.h"
 #include "intl.h"
 #include "tree-pass.h"
-#include "hashtab.h"
 #include "coverage.h"
 #include "ggc.h"
-#include "tree-flow.h"
 #include "rtl.h"
+#include "tree-flow.h"
 #include "ipa-prop.h"
 #include "except.h"
+#include "target.h"
 #include "ipa-inline.h"
 
-#define MAX_TIME 1000000000
-
-
 /* Statistics we collect about inlining algorithm.  */
 static int ncalls_inlined;
 static int nfunctions_inlined;
@@ -163,11 +173,12 @@ cgraph_clone_inlined_nodes (struct cgraph_edge *e, bool duplicate,
       /* We may eliminate the need for out-of-line copy to be output.
 	 In that case just go ahead and re-use it.  */
       if (!e->callee->callers->next_caller
-	  /* Recursive inlining never wants the master clone to be overwritten.  */
+	  /* Recursive inlining never wants the master clone to
+	     be overwritten.  */
 	  && update_original
-	  /* FIXME: When address is taken of DECL_EXTERNAL function we still can remove its
-	     offline copy, but we would need to keep unanalyzed node in the callgraph so
-	     references can point to it.  */
+	  /* FIXME: When address is taken of DECL_EXTERNAL function we still
+	     can remove its offline copy, but we would need to keep unanalyzed
+	     node in the callgraph so references can point to it.  */
 	  && !e->callee->address_taken
 	  && cgraph_can_remove_if_no_direct_calls_p (e->callee)
 	  /* Inlining might enable more devirtualizing, so we want to remove
@@ -175,7 +186,8 @@ cgraph_clone_inlined_nodes (struct cgraph_edge *e, bool duplicate,
 	     Lacking may edges in callgraph we just preserve them post
 	     inlining.  */
 	  && (!DECL_VIRTUAL_P (e->callee->decl)
-	      || (!DECL_COMDAT (e->callee->decl) && !DECL_EXTERNAL (e->callee->decl)))
+	      || (!DECL_COMDAT (e->callee->decl)
+		  && !DECL_EXTERNAL (e->callee->decl)))
 	  /* Don't reuse if more than one function shares a comdat group.
 	     If the other function(s) are needed, we need to emit even
 	     this function out of line.  */
@@ -214,7 +226,8 @@ cgraph_clone_inlined_nodes (struct cgraph_edge *e, bool duplicate,
       + caller_info->estimated_self_stack_size;
   peak = callee_info->stack_frame_offset
       + callee_info->estimated_self_stack_size;
-  if (inline_summary (e->callee->global.inlined_to)->estimated_stack_size < peak)
+  if (inline_summary (e->callee->global.inlined_to)->estimated_stack_size
+      < peak)
     inline_summary (e->callee->global.inlined_to)->estimated_stack_size = peak;
   cgraph_propagate_frequency (e->callee);
 
@@ -272,33 +285,52 @@ cgraph_mark_inline_edge (struct cgraph_edge *e, bool update_original,
     return false;
 }
 
-/* Return false when inlining edge E is not good idea
-   as it would cause too large growth of the callers function body
-   or stack frame size.  *REASON if non-NULL is updated if the
-   inlining is not a good idea.  */
+/* Return false when inlining edge E would lead to violating
+   limits on function unit growth or stack usage growth.  
+
+   The relative function body growth limit is present generally
+   to avoid problems with non-linear behaviour of the compiler.
+   To allow inlining huge functions into tiny wrapper, the limit
+   is always based on the bigger of the two functions considered.
+
+   For stack growth limits we always base the growth in stack usage
+   of the callers.  We want to prevent applications from segfaulting
+   on stack overflow when functions with huge stack frames gets
+   inlined. */
 
 static bool
-cgraph_check_inline_limits (struct cgraph_edge *e,
-			    cgraph_inline_failed_t *reason)
+caller_growth_limits (struct cgraph_edge *e)
 {
   struct cgraph_node *to = e->caller;
   struct cgraph_node *what = e->callee;
   int newsize;
-  int limit;
-  HOST_WIDE_INT stack_size_limit, inlined_stack;
-  struct inline_summary *info, *what_info;
-
-  if (to->global.inlined_to)
-    to = to->global.inlined_to;
+  int limit = 0;
+  HOST_WIDE_INT stack_size_limit = 0, inlined_stack;
+  struct inline_summary *info, *what_info, *outer_info = inline_summary (to);
+
+  /* Look for function e->caller is inlined to.  While doing
+     so work out the largest function body on the way.  As
+     described above, we want to base our function growth
+     limits based on that.  Not on the self size of the
+     outer function, not on the self size of inline code
+     we immediately inline to.  This is the most relaxed
+     interpretation of the rule "do not grow large functions
+     too much in order to prevent compiler from exploding".  */
+  do
+    {
+      info = inline_summary (to);
+      if (limit < info->self_size)
+	limit = info->self_size;
+      if (stack_size_limit < info->estimated_self_stack_size)
+	stack_size_limit = info->estimated_self_stack_size;
+      if (to->global.inlined_to)
+        to = to->callers->caller;
+    }
+  while (to->global.inlined_to);
 
-  info = inline_summary (to);
   what_info = inline_summary (what);
 
-  /* When inlining large function body called once into small function,
-     take the inlined function as base for limiting the growth.  */
-  if (info->self_size > what_info->self_size)
-    limit = info->self_size;
-  else
+  if (limit < what_info->self_size)
     limit = what_info->self_size;
 
   limit += limit * PARAM_VALUE (PARAM_LARGE_FUNCTION_GROWTH) / 100;
@@ -310,79 +342,421 @@ cgraph_check_inline_limits (struct cgraph_edge *e,
       && newsize > PARAM_VALUE (PARAM_LARGE_FUNCTION_INSNS)
       && newsize > limit)
     {
-      if (reason)
-        *reason = CIF_LARGE_FUNCTION_GROWTH_LIMIT;
+      e->inline_failed = CIF_LARGE_FUNCTION_GROWTH_LIMIT;
       return false;
     }
 
-  stack_size_limit = info->estimated_self_stack_size;
+  /* FIXME: Stack size limit often prevents inlining in fortran programs
+     due to large i/o datastructures used by the fortran frontend.
+     We ought to ignore this limit when we know that the edge is executed
+     on every invocation of the caller (i.e. its call statement dominates
+     exit block).  We do not track this information, yet.  */
+  stack_size_limit += (stack_size_limit
+		       * PARAM_VALUE (PARAM_STACK_FRAME_GROWTH) / 100);
 
-  stack_size_limit += stack_size_limit * PARAM_VALUE (PARAM_STACK_FRAME_GROWTH) / 100;
-
-  inlined_stack = (info->stack_frame_offset
-		   + info->estimated_self_stack_size
+  inlined_stack = (outer_info->stack_frame_offset
+		   + outer_info->estimated_self_stack_size
 		   + what_info->estimated_stack_size);
-  if (inlined_stack  > stack_size_limit
+  /* Check new stack consumption with stack consumption at the place
+     stack is used.  */
+  if (inlined_stack > stack_size_limit
+      /* If function already has large stack usage from sibbling
+	 inline call, we can inline, too.
+	 This bit overoptimistically assume that we are good at stack
+	 packing.  */
+      && inlined_stack > info->estimated_stack_size
       && inlined_stack > PARAM_VALUE (PARAM_LARGE_STACK_FRAME))
     {
-      if (reason)
-        *reason = CIF_LARGE_STACK_FRAME_GROWTH_LIMIT;
+      e->inline_failed = CIF_LARGE_STACK_FRAME_GROWTH_LIMIT;
       return false;
     }
   return true;
 }
 
-/* Return true when function N is small enough to be inlined.  */
+/* Dump info about why inlining has failed.  */
+
+static void
+report_inline_failed_reason (struct cgraph_edge *e)
+{
+  if (dump_file)
+    {
+      fprintf (dump_file, "  not inlinable: %s/%i -> %s/%i, %s\n",
+	       cgraph_node_name (e->caller), e->caller->uid,
+	       cgraph_node_name (e->callee), e->callee->uid,
+	       cgraph_inline_failed_string (e->inline_failed));
+    }
+}
+
+/* Decide if we can inline the edge and possibly update
+   inline_failed reason.  
+   We check whether inlining is possible at all and whether
+   caller growth limits allow doing so.  
+
+   if REPORT is true, output reason to the dump file.  */
 
 static bool
-cgraph_default_inline_p (struct cgraph_node *n, cgraph_inline_failed_t *reason)
+can_inline_edge_p (struct cgraph_edge *e, bool report)
 {
-  tree decl = n->decl;
-  struct inline_summary *info = inline_summary (n);
+  bool inlinable = true;
+  tree caller_tree = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (e->caller->decl);
+  tree callee_tree = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (e->callee->decl);
 
-  if (info->disregard_inline_limits)
-    return true;
+  gcc_assert (e->inline_failed);
 
-  if (!flag_inline_small_functions && !DECL_DECLARED_INLINE_P (decl))
+  if (!e->callee->analyzed)
+    {
+      e->inline_failed = CIF_BODY_NOT_AVAILABLE;
+      inlinable = false;
+    }
+  else if (!inline_summary (e->callee)->inlinable)
+    {
+      e->inline_failed = CIF_FUNCTION_NOT_INLINABLE;
+      inlinable = false;
+    }
+  else if (cgraph_function_body_availability (e->callee) <= AVAIL_OVERWRITABLE)
     {
-      if (reason)
-	*reason = CIF_FUNCTION_NOT_INLINE_CANDIDATE;
+      e->inline_failed = CIF_OVERWRITABLE;
       return false;
     }
-  if (!n->analyzed)
+  else if (e->call_stmt_cannot_inline_p)
+    {
+      e->inline_failed = CIF_MISMATCHED_ARGUMENTS;
+      inlinable = false;
+    }
+  /* Don't inline if the functions have different EH personalities.  */
+  else if (DECL_FUNCTION_PERSONALITY (e->caller->decl)
+	   && DECL_FUNCTION_PERSONALITY (e->callee->decl)
+	   && (DECL_FUNCTION_PERSONALITY (e->caller->decl)
+	       != DECL_FUNCTION_PERSONALITY (e->callee->decl)))
+    {
+      e->inline_failed = CIF_EH_PERSONALITY;
+      inlinable = false;
+    }
+  /* Don't inline if the callee can throw non-call exceptions but the
+     caller cannot.
+     FIXME: this is obviously wrong for LTO where STRUCT_FUNCTION is missing.
+     Move the flag into cgraph node or mirror it in the inline summary.  */
+  else if (DECL_STRUCT_FUNCTION (e->callee->decl)
+	   && DECL_STRUCT_FUNCTION (e->callee->decl)->can_throw_non_call_exceptions
+	   && !(DECL_STRUCT_FUNCTION (e->caller->decl)
+	        && DECL_STRUCT_FUNCTION (e->caller->decl)->can_throw_non_call_exceptions))
+    {
+      e->inline_failed = CIF_NON_CALL_EXCEPTIONS;
+      inlinable = false;
+    }
+  /* Check compatibility of target optimizatio noptions.  */
+  else if (!targetm.target_option.can_inline_p (e->caller->decl,
+						e->callee->decl))
+    {
+      e->inline_failed = CIF_TARGET_OPTION_MISMATCH;
+      inlinable = false;
+    }
+  /* Check if caller growth allows the inlining.  */
+  else if (!DECL_DISREGARD_INLINE_LIMITS (e->callee->decl)
+           && !caller_growth_limits (e))
+    inlinable = false;
+  /* Don't inline a function with a higher optimization level than the
+     caller.  FIXME: this is really just tip of iceberg of handling
+     optimization attribute.  */
+  else if (caller_tree != callee_tree)
     {
-      if (reason)
-	*reason = CIF_BODY_NOT_AVAILABLE;
+      struct cl_optimization *caller_opt
+	= TREE_OPTIMIZATION ((caller_tree)
+			     ? caller_tree
+			     : optimization_default_node);
+
+      struct cl_optimization *callee_opt
+	= TREE_OPTIMIZATION ((callee_tree)
+			     ? callee_tree
+			     : optimization_default_node);
+
+      if ((caller_opt->x_optimize > callee_opt->x_optimize)
+	  || (caller_opt->x_optimize_size != callee_opt->x_optimize_size))
+	{
+          e->inline_failed = CIF_TARGET_OPTIMIZATION_MISMATCH;
+	  inlinable = false;
+	}
+    }
+
+  /* Be sure that the cannot_inline_p flag is up to date.  */
+  gcc_checking_assert (!e->call_stmt
+		       || (gimple_call_cannot_inline_p (e->call_stmt)
+		           == e->call_stmt_cannot_inline_p)
+		       /* In -flto-partition=none mode we really keep things out of
+			  sync because call_stmt_cannot_inline_p is set at cgraph
+			  merging when function bodies are not there yet.  */
+		       || (in_lto_p && !gimple_call_cannot_inline_p (e->call_stmt)));
+  if (!inlinable && report)
+    report_inline_failed_reason (e);
+  return inlinable;
+}
+
+
+/* Return true if the edge E is inlinable during early inlining.  */
+
+static bool
+can_early_inline_edge_p (struct cgraph_edge *e)
+{
+  /* Early inliner might get called at WPA stage when IPA pass adds new
+     function.  In this case we can not really do any of early inlining
+     because function bodies are missing.  */
+  if (!gimple_has_body_p (e->callee->decl))
+    {
+      e->inline_failed = CIF_BODY_NOT_AVAILABLE;
       return false;
     }
-  if (cgraph_function_body_availability (n) <= AVAIL_OVERWRITABLE)
+  /* In early inliner some of callees may not be in SSA form yet
+     (i.e. the callgraph is cyclic and we did not process
+     the callee by early inliner, yet).  We don't have CIF code for this
+     case; later we will re-do the decision in the real inliner.  */
+  if (!gimple_in_ssa_p (DECL_STRUCT_FUNCTION (e->caller->decl))
+      || !gimple_in_ssa_p (DECL_STRUCT_FUNCTION (e->callee->decl)))
     {
-      if (reason)
-	*reason = CIF_OVERWRITABLE;
+      if (dump_file)
+	fprintf (dump_file, "  edge not inlinable: not in SSA form\n");
       return false;
     }
+  if (!can_inline_edge_p (e, true))
+    return false;
+  return true;
+}
+
+
+/* Return true when N is leaf function.  Accept cheap builtins
+   in leaf functions.  */
+
+static bool
+leaf_node_p (struct cgraph_node *n)
+{
+  struct cgraph_edge *e;
+  for (e = n->callees; e; e = e->next_callee)
+    if (!is_inexpensive_builtin (e->callee->decl))
+      return false;
+  return true;
+}
+
 
+/* Return true if we are interested in inlining small function.  */
 
-  if (DECL_DECLARED_INLINE_P (decl))
+static bool
+want_early_inline_function_p (struct cgraph_edge *e)
+{
+  bool want_inline = true;
+
+  if (DECL_DISREGARD_INLINE_LIMITS (e->callee->decl))
+    ;
+  else if (!DECL_DECLARED_INLINE_P (e->callee->decl)
+	   && !flag_inline_small_functions)
+    {
+      e->inline_failed = CIF_FUNCTION_NOT_INLINE_CANDIDATE;
+      report_inline_failed_reason (e);
+      want_inline = false;
+    }
+  else
     {
-      if (info->size >= MAX_INLINE_INSNS_SINGLE)
+      int growth = estimate_edge_growth (e);
+      if (growth <= 0)
+	;
+      else if (!cgraph_maybe_hot_edge_p (e)
+	       && growth > 0)
+	{
+	  if (dump_file)
+	    fprintf (dump_file, "  will not early inline: %s/%i->%s/%i, "
+		     "call is cold and code would grow by %i\n",
+		     cgraph_node_name (e->caller), e->caller->uid,
+		     cgraph_node_name (e->callee), e->callee->uid,
+		     growth);
+	  want_inline = false;
+	}
+      else if (!leaf_node_p (e->callee)
+	       && growth > 0)
 	{
-	  if (reason)
-	    *reason = CIF_MAX_INLINE_INSNS_SINGLE_LIMIT;
-	  return false;
+	  if (dump_file)
+	    fprintf (dump_file, "  will not early inline: %s/%i->%s/%i, "
+		     "callee is not leaf and code would grow by %i\n",
+		     cgraph_node_name (e->caller), e->caller->uid,
+		     cgraph_node_name (e->callee), e->callee->uid,
+		     growth);
+	  want_inline = false;
 	}
+      else if (growth > PARAM_VALUE (PARAM_EARLY_INLINING_INSNS))
+	{
+	  if (dump_file)
+	    fprintf (dump_file, "  will not early inline: %s/%i->%s/%i, "
+		     "growth %i exceeds --param early-inlining-insns\n",
+		     cgraph_node_name (e->caller), e->caller->uid,
+		     cgraph_node_name (e->callee), e->callee->uid,
+		     growth);
+	  want_inline = false;
+	}
+    }
+  return want_inline;
+}
+
+/* Return true if we are interested in inlining small function.
+   When REPORT is true, report reason to dump file.  */
+
+static bool
+want_inline_small_function_p (struct cgraph_edge *e, bool report)
+{
+  bool want_inline = true;
+
+  if (DECL_DISREGARD_INLINE_LIMITS (e->callee->decl))
+    ;
+  else if (!DECL_DECLARED_INLINE_P (e->callee->decl)
+	   && !flag_inline_small_functions)
+    {
+      e->inline_failed = CIF_FUNCTION_NOT_INLINE_CANDIDATE;
+      want_inline = false;
     }
   else
     {
-      if (info->size >= MAX_INLINE_INSNS_AUTO)
+      int growth = estimate_edge_growth (e);
+
+      if (growth <= 0)
+	;
+      else if (DECL_DECLARED_INLINE_P (e->callee->decl)
+	       && growth >= MAX_INLINE_INSNS_SINGLE)
+	{
+          e->inline_failed = CIF_MAX_INLINE_INSNS_SINGLE_LIMIT;
+	  want_inline = false;
+	}
+      else if (!DECL_DECLARED_INLINE_P (e->callee->decl)
+	       && !flag_inline_functions)
+	{
+          e->inline_failed = CIF_NOT_DECLARED_INLINED;
+	  want_inline = false;
+	}
+      else if (!DECL_DECLARED_INLINE_P (e->callee->decl)
+	       && growth >= MAX_INLINE_INSNS_AUTO)
+	{
+          e->inline_failed = CIF_MAX_INLINE_INSNS_AUTO_LIMIT;
+	  want_inline = false;
+	}
+      else if (!cgraph_maybe_hot_edge_p (e)
+	       && estimate_growth (e->callee) > 0)
 	{
-	  if (reason)
-	    *reason = CIF_MAX_INLINE_INSNS_AUTO_LIMIT;
-	  return false;
+          e->inline_failed = CIF_UNLIKELY_CALL;
+	  want_inline = false;
 	}
     }
+  if (!want_inline && report)
+    report_inline_failed_reason (e);
+  return want_inline;
+}
 
-  return true;
+/* EDGE is self recursive edge.
+   We hand two cases - when function A is inlining into itself
+   or when function A is being inlined into another inliner copy of function
+   A within function B.  
+
+   In first case OUTER_NODE points to the toplevel copy of A, while
+   in the second case OUTER_NODE points to the outermost copy of A in B.
+
+   In both cases we want to be extra selective since
+   inlining the call will just introduce new recursive calls to appear.  */
+static bool
+want_inline_self_recursive_call_p (struct cgraph_edge *edge,
+				   struct cgraph_node *outer_node,
+				   bool peeling,
+				   int depth)
+{
+  char const *reason = NULL;
+  bool want_inline = true;
+  int caller_freq = CGRAPH_FREQ_BASE;
+  int max_depth = PARAM_VALUE (PARAM_MAX_INLINE_RECURSIVE_DEPTH_AUTO);
+
+  if (DECL_DECLARED_INLINE_P (edge->callee->decl))
+    max_depth = PARAM_VALUE (PARAM_MAX_INLINE_RECURSIVE_DEPTH);
+
+  if (!cgraph_maybe_hot_edge_p (edge))
+    {
+      reason = "recursive call is cold";
+      want_inline = false;
+    }
+  else if (max_count && !outer_node->count)
+    {
+      reason = "not executed in profile";
+      want_inline = false;
+    }
+  else if (depth > max_depth)
+    {
+      reason = "--param max-inline-recursive-depth exceeded.";
+      want_inline = false;
+    }
+
+  if (outer_node->global.inlined_to)
+    caller_freq = outer_node->callers->frequency;
+
+  if (!want_inline)
+    ;
+  /* Inlining of self recursive function into copy of itself within other function
+     is transformation similar to loop peeling.
+
+     Peeling is profitable if we can inline enough copies to make probablility
+     of actual call to the self recursive function very small.  Be sure that
+     the probability of recursion is small.
+
+     We ensure that the frequency of recusing is at most 1 - (1/max_depth).
+     This way the expected number of recusion is at most max_depth.  */
+  else if (peeling)
+    {
+      int max_prob = CGRAPH_FREQ_BASE - ((CGRAPH_FREQ_BASE + max_depth - 1)
+					 / max_depth);
+      int i;
+      for (i = 1; i < depth; i++)
+	max_prob = max_prob * max_prob / CGRAPH_FREQ_BASE;
+      if (max_count
+	  && (edge->count * CGRAPH_FREQ_BASE / outer_node->count
+	      >= max_prob))
+	{
+	  reason = "profile of recursive call is too large";
+	  want_inline = false;
+	}
+      if (!max_count
+	  && (edge->frequency * CGRAPH_FREQ_BASE / caller_freq
+	      >= max_prob))
+	{
+	  reason = "frequency of recursive call is too large";
+	  want_inline = false;
+	}
+    }
+  /* Recusive inlining, i.e. equivalent of unrolling, is profitable if recusion
+     depth is large.  We reduce function call overhead and increase chances that
+     things fit in hardware return predictor.
+
+     Recursive inlining might however increase cost of stack frame setup
+     actually slowing down functions whose recursion tree is wide rather than
+     deep.
+
+     Deciding reliably on when to do recursive inlining withthout profile feedback
+     is tricky.  For now we disable recursive inlining when probability of self
+     recursion is low. 
+
+     Recursive inlining of self recursive call within loop also results in large loop
+     depths that generally optimize badly.  We may want to throttle down inlining
+     in those cases.  In particular this seems to happen in one of libstdc++ rb tree
+     methods.  */
+  else
+    {
+      if (max_count
+	  && (edge->count * 100 / outer_node->count
+	      <= PARAM_VALUE (PARAM_MIN_INLINE_RECURSIVE_PROBABILITY)))
+	{
+	  reason = "profile of recursive call is too small";
+	  want_inline = false;
+	}
+      else if (!max_count
+	       && (edge->frequency * 100 / caller_freq
+	           <= PARAM_VALUE (PARAM_MIN_INLINE_RECURSIVE_PROBABILITY)))
+	{
+	  reason = "frequency of recursive call is too small";
+	  want_inline = false;
+	}
+    }
+  if (!want_inline && dump_file)
+    fprintf (dump_file, "   not inlining recursively: %s\n", reason);
+  return want_inline;
 }
 
 /* A cost model driving the inlining heuristics in a way so the edges with
@@ -392,13 +766,13 @@ cgraph_default_inline_p (struct cgraph_node *n, cgraph_inline_failed_t *reason)
    of the function or function body size.  */
 
 static int
-cgraph_edge_badness (struct cgraph_edge *edge, bool dump)
+edge_badness (struct cgraph_edge *edge, bool dump)
 {
   gcov_type badness;
   int growth;
   struct inline_summary *callee_info = inline_summary (edge->callee);
 
-  if (callee_info->disregard_inline_limits)
+  if (DECL_DISREGARD_INLINE_LIMITS (edge->callee->decl))
     return INT_MIN;
 
   growth = estimate_edge_growth (edge);
@@ -488,7 +862,8 @@ cgraph_edge_badness (struct cgraph_edge *edge, bool dump)
 	  fprintf (dump_file,
 		   "      %i: guessed profile. frequency %i, overall growth %i,"
 		   " benefit %i%%, divisor %i\n",
-		   (int) badness, edge->frequency, growth_for_all, benefitperc, div);
+		   (int) badness, edge->frequency, growth_for_all,
+		   benefitperc, div);
 	}
     }
   /* When function local profile is not available or it does not give
@@ -523,10 +898,10 @@ cgraph_edge_badness (struct cgraph_edge *edge, bool dump)
 }
 
 /* Recompute badness of EDGE and update its key in HEAP if needed.  */
-static void
+static inline void
 update_edge_key (fibheap_t heap, struct cgraph_edge *edge)
 {
-  int badness = cgraph_edge_badness (edge, false);
+  int badness = edge_badness (edge, false);
   if (edge->aux)
     {
       fibnode_t n = (fibnode_t) edge->aux;
@@ -539,11 +914,30 @@ update_edge_key (fibheap_t heap, struct cgraph_edge *edge)
       if (badness < n->key)
 	{
 	  fibheap_replace_key (heap, n, badness);
+	  if (dump_file && (dump_flags & TDF_DETAILS))
+	    {
+	      fprintf (dump_file,
+		       "  decreasing badness %s/%i -> %s/%i, %i to %i\n",
+		       cgraph_node_name (edge->caller), edge->caller->uid,
+		       cgraph_node_name (edge->callee), edge->callee->uid,
+		       (int)n->key,
+		       badness);
+	    }
 	  gcc_checking_assert (n->key == badness);
 	}
     }
   else
-    edge->aux = fibheap_insert (heap, badness, edge);
+    {
+       if (dump_file && (dump_flags & TDF_DETAILS))
+	 {
+	   fprintf (dump_file,
+		    "  enqueuing call %s/%i -> %s/%i, badness %i\n",
+		    cgraph_node_name (edge->caller), edge->caller->uid,
+		    cgraph_node_name (edge->callee), edge->callee->uid,
+		    badness);
+	 }
+      edge->aux = fibheap_insert (heap, badness, edge);
+    }
 }
 
 /* Recompute heap nodes for each of caller edge.  */
@@ -553,7 +947,6 @@ update_caller_keys (fibheap_t heap, struct cgraph_node *node,
 		    bitmap updated_nodes)
 {
   struct cgraph_edge *edge;
-  cgraph_inline_failed_t failed_reason;
 
   if (!inline_summary (node)->inlinable
       || cgraph_function_body_availability (node) <= AVAIL_OVERWRITABLE
@@ -569,23 +962,20 @@ update_caller_keys (fibheap_t heap, struct cgraph_node *node,
       break;
   if (!edge)
     return;
-  /* Prune out edges we won't inline into anymore.  */
-  if (!cgraph_default_inline_p (node, &failed_reason))
-    {
-      for (; edge; edge = edge->next_caller)
-	if (edge->aux)
+
+  for (; edge; edge = edge->next_caller)
+    if (edge->inline_failed)
+      {
+	if (can_inline_edge_p (edge, false)
+	    && want_inline_small_function_p (edge, false))
+          update_edge_key (heap, edge);
+	else if (edge->aux)
 	  {
+	    report_inline_failed_reason (edge);
 	    fibheap_delete_node (heap, (fibnode_t) edge->aux);
 	    edge->aux = NULL;
-	    if (edge->inline_failed)
-	      edge->inline_failed = failed_reason;
 	  }
-      return;
-    }
-
-  for (; edge; edge = edge->next_caller)
-    if (edge->inline_failed)
-      update_edge_key (heap, edge);
+      }
 }
 
 /* Recompute heap nodes for each uninlined call.
@@ -613,12 +1003,7 @@ update_callee_keys (fibheap_t heap, struct cgraph_node *node,
 	    && !bitmap_bit_p (updated_nodes, e->callee->uid))
 	  {
 	    inline_summary (node)->estimated_growth = INT_MIN;
-	    /* If function becomes uninlinable, we need to remove it from the heap.  */
-	    if (!cgraph_default_inline_p (e->callee, &e->inline_failed))
-	      update_caller_keys (heap, e->callee, updated_nodes);
-	    else
-	    /* Otherwise update just edge E.  */
-	      update_edge_key (heap, e);
+	    update_edge_key (heap, e);
 	  }
 	if (e->next_callee)
 	  e = e->next_callee;
@@ -702,16 +1087,14 @@ lookup_recursive_calls (struct cgraph_node *node, struct cgraph_node *where,
    is NULL.  */
 
 static bool
-cgraph_decide_recursive_inlining (struct cgraph_edge *edge,
-				  VEC (cgraph_edge_p, heap) **new_edges)
+recursive_inlining (struct cgraph_edge *edge,
+		    VEC (cgraph_edge_p, heap) **new_edges)
 {
   int limit = PARAM_VALUE (PARAM_MAX_INLINE_INSNS_RECURSIVE_AUTO);
-  int max_depth = PARAM_VALUE (PARAM_MAX_INLINE_RECURSIVE_DEPTH_AUTO);
-  int probability = PARAM_VALUE (PARAM_MIN_INLINE_RECURSIVE_PROBABILITY);
   fibheap_t heap;
   struct cgraph_node *node;
   struct cgraph_edge *e;
-  struct cgraph_node *master_clone, *next;
+  struct cgraph_node *master_clone = NULL, *next;
   int depth = 0;
   int n = 0;
 
@@ -719,25 +1102,11 @@ cgraph_decide_recursive_inlining (struct cgraph_edge *edge,
   if (node->global.inlined_to)
     node = node->global.inlined_to;
 
-  /* It does not make sense to recursively inline always-inline functions
-     as we are going to sorry() on the remaining calls anyway.  */
-  if (inline_summary (node)->disregard_inline_limits
-      && lookup_attribute ("always_inline", DECL_ATTRIBUTES (node->decl)))
-    return false;
-
-  if (optimize_function_for_size_p (DECL_STRUCT_FUNCTION (node->decl))
-      || (!flag_inline_functions && !DECL_DECLARED_INLINE_P (node->decl)))
-    return false;
-
   if (DECL_DECLARED_INLINE_P (node->decl))
-    {
-      limit = PARAM_VALUE (PARAM_MAX_INLINE_INSNS_RECURSIVE);
-      max_depth = PARAM_VALUE (PARAM_MAX_INLINE_RECURSIVE_DEPTH);
-    }
+    limit = PARAM_VALUE (PARAM_MAX_INLINE_INSNS_RECURSIVE);
 
   /* Make sure that function is small enough to be considered for inlining.  */
-  if (!max_depth
-      || estimate_size_after_inlining (node, edge)  >= limit)
+  if (estimate_size_after_inlining (node, edge)  >= limit)
     return false;
   heap = fibheap_new ();
   lookup_recursive_calls (node, node, heap);
@@ -752,14 +1121,6 @@ cgraph_decide_recursive_inlining (struct cgraph_edge *edge,
 	     "  Performing recursive inlining on %s\n",
 	     cgraph_node_name (node));
 
-  /* We need original clone to copy around.  */
-  master_clone = cgraph_clone_node (node, node->decl,
-				    node->count, CGRAPH_FREQ_BASE, 1,
-  				    false, NULL);
-  for (e = master_clone->callees; e; e = e->next_callee)
-    if (!e->inline_failed)
-      cgraph_clone_inlined_nodes (e, true, false);
-
   /* Do the inlining and update list of recursive call during process.  */
   while (!fibheap_empty (heap))
     {
@@ -770,35 +1131,17 @@ cgraph_decide_recursive_inlining (struct cgraph_edge *edge,
       if (estimate_size_after_inlining (node, curr) > limit)
 	break;
 
+      if (!can_inline_edge_p (curr, true))
+	continue;
+
       depth = 1;
       for (cnode = curr->caller;
 	   cnode->global.inlined_to; cnode = cnode->callers->caller)
 	if (node->decl == curr->callee->decl)
 	  depth++;
-      if (depth > max_depth)
-	{
-          if (dump_file)
-	    fprintf (dump_file,
-		     "   maximal depth reached\n");
-	  continue;
-	}
 
-      if (max_count)
-	{
-          if (!cgraph_maybe_hot_edge_p (curr))
-	    {
-	      if (dump_file)
-		fprintf (dump_file, "   Not inlining cold call\n");
-	      continue;
-	    }
-          if (curr->count * 100 / node->count < probability)
-	    {
-	      if (dump_file)
-		fprintf (dump_file,
-			 "   Probability of edge is too small\n");
-	      continue;
-	    }
-	}
+      if (!want_inline_self_recursive_call_p (curr, node, false, depth))
+	continue;
 
       if (dump_file)
 	{
@@ -811,18 +1154,34 @@ cgraph_decide_recursive_inlining (struct cgraph_edge *edge,
 	    }
 	  fprintf (dump_file, "\n");
 	}
+      if (!master_clone)
+	{
+	  /* We need original clone to copy around.  */
+	  master_clone = cgraph_clone_node (node, node->decl,
+					    node->count, CGRAPH_FREQ_BASE, 1,
+					    false, NULL);
+	  for (e = master_clone->callees; e; e = e->next_callee)
+	    if (!e->inline_failed)
+	      cgraph_clone_inlined_nodes (e, true, false);
+	}
+
       cgraph_redirect_edge_callee (curr, master_clone);
       cgraph_mark_inline_edge (curr, false, new_edges);
       lookup_recursive_calls (node, curr->callee, heap);
       n++;
     }
+
   if (!fibheap_empty (heap) && dump_file)
     fprintf (dump_file, "    Recursive inlining growth limit met.\n");
-
   fibheap_delete (heap);
+
+  if (!master_clone)
+    return false;
+
   if (dump_file)
     fprintf (dump_file,
-	     "\n   Inlined %i times, body grown from size %i to %i, time %i to %i\n", n,
+	     "\n   Inlined %i times, "
+	     "body grown from size %i to %i, time %i to %i\n", n,
 	     inline_summary (master_clone)->size, inline_summary (node)->size,
 	     inline_summary (master_clone)->time, inline_summary (node)->time);
 
@@ -837,27 +1196,7 @@ cgraph_decide_recursive_inlining (struct cgraph_edge *edge,
 	cgraph_remove_node (node);
     }
   cgraph_remove_node (master_clone);
-  /* FIXME: Recursive inlining actually reduces number of calls of the
-     function.  At this place we should probably walk the function and
-     inline clones and compensate the counts accordingly.  This probably
-     doesn't matter much in practice.  */
-  return n > 0;
-}
-
-/* Set inline_failed for all callers of given function to REASON.  */
-
-static void
-cgraph_set_inline_failed (struct cgraph_node *node,
-			  cgraph_inline_failed_t reason)
-{
-  struct cgraph_edge *e;
-
-  if (dump_file)
-    fprintf (dump_file, "Inlining failed: %s\n",
-	     cgraph_inline_failed_string (reason));
-  for (e = node->callers; e; e = e->next_caller)
-    if (e->inline_failed)
-      e->inline_failed = reason;
+  return true;
 }
 
 /* Given whole compilation unit estimate of INSNS, compute how large we can
@@ -884,8 +1223,9 @@ add_new_edges_to_heap (fibheap_t heap, VEC (cgraph_edge_p, heap) *new_edges)
       gcc_assert (!edge->aux);
       if (inline_summary (edge->callee)->inlinable
 	  && edge->inline_failed
-	  && cgraph_default_inline_p (edge->callee, &edge->inline_failed))
-        edge->aux = fibheap_insert (heap, cgraph_edge_badness (edge, false), edge);
+	  && can_inline_edge_p (edge, true)
+	  && want_inline_small_function_p (edge, true))
+        edge->aux = fibheap_insert (heap, edge_badness (edge, false), edge);
     }
 }
 
@@ -898,11 +1238,10 @@ add_new_edges_to_heap (fibheap_t heap, VEC (cgraph_edge_p, heap) *new_edges)
    to be passed to cgraph_inlined_into and cgraph_inlined_callees.  */
 
 static void
-cgraph_decide_inlining_of_small_functions (void)
+inline_small_functions (void)
 {
   struct cgraph_node *node;
   struct cgraph_edge *edge;
-  cgraph_inline_failed_t failed_reason;
   fibheap_t heap = fibheap_new ();
   bitmap updated_nodes = BITMAP_ALLOC (NULL);
   int min_size, max_size;
@@ -921,31 +1260,20 @@ cgraph_decide_inlining_of_small_functions (void)
       {
 	struct inline_summary *info = inline_summary (node);
 
-	if (!info->inlinable || !node->callers)
-	  {
-	    struct cgraph_edge *e;
-	    for (e = node->callers; e; e = e->next_caller)
-	      {
-		gcc_assert (e->inline_failed);
-		e->inline_failed = CIF_FUNCTION_NOT_INLINABLE;
-	      }
-	    continue;
-	  }
 	if (dump_file)
-	  fprintf (dump_file, "Considering inline candidate %s.\n", cgraph_node_name (node));
+	  fprintf (dump_file, "Enqueueing calls of %s/%i.\n",
+		   cgraph_node_name (node), node->uid);
 
 	info->estimated_growth = INT_MIN;
-	if (!cgraph_default_inline_p (node, &failed_reason))
-	  {
-	    cgraph_set_inline_failed (node, failed_reason);
-	    continue;
-	  }
 
 	for (edge = node->callers; edge; edge = edge->next_caller)
-	  if (edge->inline_failed)
+	  if (edge->inline_failed
+	      && can_inline_edge_p (edge, true)
+	      && want_inline_small_function_p (edge, true)
+	      && edge->inline_failed)
 	    {
 	      gcc_assert (!edge->aux);
-	      edge->aux = fibheap_insert (heap, cgraph_edge_badness (edge, false), edge);
+	      update_edge_key (heap, edge);
 	    }
       }
 
@@ -960,7 +1288,6 @@ cgraph_decide_inlining_of_small_functions (void)
       int badness = fibheap_min_key (heap);
       int current_badness;
       int growth;
-      cgraph_inline_failed_t not_good = CIF_OK;
 
       edge = (struct cgraph_edge *) fibheap_extract_min (heap);
       gcc_assert (edge->aux);
@@ -971,13 +1298,16 @@ cgraph_decide_inlining_of_small_functions (void)
       /* When updating the edge costs, we only decrease badness in the keys.
 	 When the badness increase, we keep the heap as it is and re-insert
 	 key now.  */
-      current_badness = cgraph_edge_badness (edge, false);
+      current_badness = edge_badness (edge, false);
       gcc_assert (current_badness >= badness);
       if (current_badness != badness)
 	{
 	  edge->aux = fibheap_insert (heap, current_badness, edge);
 	  continue;
 	}
+
+      if (!can_inline_edge_p (edge, true))
+	continue;
       
       callee = edge->callee;
       growth = estimate_edge_growth (edge);
@@ -989,81 +1319,32 @@ cgraph_decide_inlining_of_small_functions (void)
 		   inline_summary (edge->callee)->size);
 	  fprintf (dump_file,
 		   " to be inlined into %s in %s:%i\n"
-		   " Estimated growth after inlined into all callees is %+i insns.\n"
+		   " Estimated growth after inlined into all is %+i insns.\n"
 		   " Estimated badness is %i, frequency %.2f.\n",
 		   cgraph_node_name (edge->caller),
 		   flag_wpa ? "unknown"
 		   : gimple_filename ((const_gimple) edge->call_stmt),
-		   flag_wpa ? -1 : gimple_lineno ((const_gimple) edge->call_stmt),
+		   flag_wpa ? -1
+		   : gimple_lineno ((const_gimple) edge->call_stmt),
 		   estimate_growth (edge->callee),
 		   badness,
 		   edge->frequency / (double)CGRAPH_FREQ_BASE);
 	  if (edge->count)
-	    fprintf (dump_file," Called "HOST_WIDEST_INT_PRINT_DEC"x\n", edge->count);
+	    fprintf (dump_file," Called "HOST_WIDEST_INT_PRINT_DEC"x\n",
+		     edge->count);
 	  if (dump_flags & TDF_DETAILS)
-	    cgraph_edge_badness (edge, true);
-	}
-
-      /* When not having profile info ready we don't weight by any way the
-         position of call in procedure itself.  This means if call of
-	 function A from function B seems profitable to inline, the recursive
-	 call of function A in inline copy of A in B will look profitable too
-	 and we end up inlining until reaching maximal function growth.  This
-	 is not good idea so prohibit the recursive inlining.
-
-	 ??? When the frequencies are taken into account we might not need this
-	 restriction.
-
-	 We need to be careful here, in some testcases, e.g. directives.c in
-	 libcpp, we can estimate self recursive function to have negative growth
-	 for inlining completely.
-	 */
-      if (!edge->count)
-	{
-	  where = edge->caller;
-	  while (where->global.inlined_to)
-	    {
-	      if (where->decl == edge->callee->decl)
-		break;
-	      where = where->callers->caller;
-	    }
-	  if (where->global.inlined_to)
-	    {
-	      edge->inline_failed
-		= (inline_summary (edge->callee)->disregard_inline_limits
-		   ? CIF_RECURSIVE_INLINING : CIF_UNSPECIFIED);
-	      if (dump_file)
-		fprintf (dump_file, " inline_failed:Recursive inlining performed only for function itself.\n");
-	      continue;
-	    }
+	    edge_badness (edge, true);
 	}
 
-      if (inline_summary (edge->callee)->disregard_inline_limits)
-	;
-      else if (!cgraph_maybe_hot_edge_p (edge))
- 	not_good = CIF_UNLIKELY_CALL;
-      else if (!flag_inline_functions
-	  && !DECL_DECLARED_INLINE_P (edge->callee->decl))
- 	not_good = CIF_NOT_DECLARED_INLINED;
-      else if (optimize_function_for_size_p (DECL_STRUCT_FUNCTION(edge->caller->decl)))
- 	not_good = CIF_OPTIMIZING_FOR_SIZE;
-      if (not_good && growth > 0 && estimate_growth (edge->callee) > 0)
-	{
-	  edge->inline_failed = not_good;
-	  if (dump_file)
-	    fprintf (dump_file, " inline_failed:%s.\n",
-		     cgraph_inline_failed_string (edge->inline_failed));
-	  continue;
-	}
-      if (!cgraph_default_inline_p (edge->callee, &edge->inline_failed))
+      if (overall_size + growth > max_size
+	  && !DECL_DISREGARD_INLINE_LIMITS (edge->callee->decl))
 	{
-	  if (dump_file)
-	    fprintf (dump_file, " inline_failed:%s.\n",
-		     cgraph_inline_failed_string (edge->inline_failed));
+	  edge->inline_failed = CIF_INLINE_UNIT_GROWTH_LIMIT;
+	  report_inline_failed_reason (edge);
 	  continue;
 	}
-      if (!tree_can_inline_p (edge)
-	  || edge->call_stmt_cannot_inline_p)
+
+      if (!want_inline_small_function_p (edge, true))
 	{
 	  if (dump_file)
 	    fprintf (dump_file, " inline_failed:%s.\n",
@@ -1075,9 +1356,9 @@ cgraph_decide_inlining_of_small_functions (void)
 	  where = edge->caller;
 	  if (where->global.inlined_to)
 	    where = where->global.inlined_to;
-	  if (!cgraph_decide_recursive_inlining (edge,
-						 flag_indirect_inlining
-						 ? &new_indirect_edges : NULL))
+	  if (!recursive_inlining (edge,
+				   flag_indirect_inlining
+				   ? &new_indirect_edges : NULL))
 	    {
 	      edge->inline_failed = CIF_RECURSIVE_INLINING;
 	      continue;
@@ -1089,14 +1370,33 @@ cgraph_decide_inlining_of_small_functions (void)
       else
 	{
 	  struct cgraph_node *callee;
-	  if (!cgraph_check_inline_limits (edge, &edge->inline_failed))
+	  struct cgraph_node *outer_node = NULL;
+	  int depth = 0;
+
+	  /* Consider the case where self recursive function A is inlined into B.
+	     This is desired optimization in some cases, since it leads to effect
+	     similar of loop peeling and we might completely optimize out the
+	     recursive call.  However we must be extra selective.  */
+
+	  where = edge->caller;
+	  while (where->global.inlined_to)
 	    {
-	      if (dump_file)
-		fprintf (dump_file, " Not inlining into %s:%s.\n",
-			 cgraph_node_name (edge->caller),
-			 cgraph_inline_failed_string (edge->inline_failed));
+	      if (where->decl == edge->callee->decl)
+		outer_node = where, depth++;
+	      where = where->callers->caller;
+	    }
+	  if (outer_node
+	      && !want_inline_self_recursive_call_p (edge, outer_node,
+						     true, depth))
+	    {
+	      edge->inline_failed
+		= (DECL_DISREGARD_INLINE_LIMITS (edge->callee->decl)
+		   ? CIF_RECURSIVE_INLINING : CIF_UNSPECIFIED);
 	      continue;
 	    }
+	  else if (depth && dump_file)
+	    fprintf (dump_file, " Peeling recursion with depth %i\n", depth);
+
 	  callee = edge->callee;
 	  gcc_checking_assert (!callee->global.inlined_to);
 	  cgraph_mark_inline_edge (edge, true, &new_indirect_edges);
@@ -1148,43 +1448,6 @@ cgraph_decide_inlining_of_small_functions (void)
 	    fprintf (dump_file, "New minimal size reached: %i\n", min_size);
 	}
     }
-  while (!fibheap_empty (heap))
-    {
-      int badness = fibheap_min_key (heap);
-
-      edge = (struct cgraph_edge *) fibheap_extract_min (heap);
-      gcc_assert (edge->aux);
-      edge->aux = NULL;
-      if (!edge->inline_failed)
-	continue;
-#ifdef ENABLE_CHECKING
-      gcc_assert (cgraph_edge_badness (edge, false) >= badness);
-#endif
-      if (dump_file)
-	{
-	  fprintf (dump_file,
-		   "\nSkipping %s with %i size\n",
-		   cgraph_node_name (edge->callee),
-		   inline_summary (edge->callee)->size);
-	  fprintf (dump_file,
-		   " called by %s in %s:%i\n"
-		   " Estimated growth after inlined into all callees is %+i insns.\n"
-		   " Estimated badness is %i, frequency %.2f.\n",
-		   cgraph_node_name (edge->caller),
-		   flag_wpa ? "unknown"
-		   : gimple_filename ((const_gimple) edge->call_stmt),
-		   flag_wpa ? -1 : gimple_lineno ((const_gimple) edge->call_stmt),
-		   estimate_growth (edge->callee),
-		   badness,
-		   edge->frequency / (double)CGRAPH_FREQ_BASE);
-	  if (edge->count)
-	    fprintf (dump_file," Called "HOST_WIDEST_INT_PRINT_DEC"x\n", edge->count);
-	  if (dump_flags & TDF_DETAILS)
-	    cgraph_edge_badness (edge, true);
-	}
-      if (!inline_summary (edge->callee)->disregard_inline_limits && edge->inline_failed)
-	edge->inline_failed = CIF_INLINE_UNIT_GROWTH_LIMIT;
-    }
 
   if (new_indirect_edges)
     VEC_free (cgraph_edge_p, heap, new_indirect_edges);
@@ -1195,7 +1458,7 @@ cgraph_decide_inlining_of_small_functions (void)
 /* Flatten NODE from the IPA inliner.  */
 
 static void
-cgraph_flatten (struct cgraph_node *node)
+flatten_function (struct cgraph_node *node)
 {
   struct cgraph_edge *e;
 
@@ -1208,22 +1471,6 @@ cgraph_flatten (struct cgraph_node *node)
     {
       struct cgraph_node *orig_callee;
 
-      if (e->call_stmt_cannot_inline_p)
-	{
-	  if (dump_file)
-	    fprintf (dump_file, "Not inlining: %s",
-		     cgraph_inline_failed_string (e->inline_failed));
-	  continue;
-	}
-
-      if (!e->callee->analyzed)
-	{
-	  if (dump_file)
-	    fprintf (dump_file,
-		     "Not inlining: Function body not available.\n");
-	  continue;
-	}
-
       /* We've hit cycle?  It is time to give up.  */
       if (e->callee->aux)
 	{
@@ -1240,10 +1487,18 @@ cgraph_flatten (struct cgraph_node *node)
 	 it in order to fully flatten the leaves.  */
       if (!e->inline_failed)
 	{
-	  cgraph_flatten (e->callee);
+	  flatten_function (e->callee);
 	  continue;
 	}
 
+      /* Flatten attribute needs to be processed during late inlining. For
+	 extra code quality we however do flattening during early optimization,
+	 too.  */
+      if (cgraph_state != CGRAPH_STATE_IPA_SSA
+	  ? !can_inline_edge_p (e, true)
+	  : !can_early_inline_edge_p (e))
+	continue;
+
       if (cgraph_edge_recursive_p (e))
 	{
 	  if (dump_file)
@@ -1251,14 +1506,6 @@ cgraph_flatten (struct cgraph_node *node)
 	  continue;
 	}
 
-      if (!tree_can_inline_p (e))
-	{
-	  if (dump_file)
-	    fprintf (dump_file, "Not inlining: %s",
-		     cgraph_inline_failed_string (e->inline_failed));
-	  continue;
-	}
-
       if (gimple_in_ssa_p (DECL_STRUCT_FUNCTION (node->decl))
 	  != gimple_in_ssa_p (DECL_STRUCT_FUNCTION (e->callee->decl)))
 	{
@@ -1277,7 +1524,7 @@ cgraph_flatten (struct cgraph_node *node)
       cgraph_mark_inline_edge (e, true, NULL);
       if (e->callee != orig_callee)
 	orig_callee->aux = (void *) node;
-      cgraph_flatten (e->callee);
+      flatten_function (e->callee);
       if (e->callee != orig_callee)
 	orig_callee->aux = NULL;
     }
@@ -1289,7 +1536,7 @@ cgraph_flatten (struct cgraph_node *node)
    expenses on updating data structures.  */
 
 static unsigned int
-cgraph_decide_inlining (void)
+ipa_inline (void)
 {
   struct cgraph_node *node;
   int nnodes;
@@ -1366,41 +1613,44 @@ cgraph_decide_inlining (void)
 	  if (dump_file)
 	    fprintf (dump_file,
 		     "Flattening %s\n", cgraph_node_name (node));
-	  cgraph_flatten (node);
+	  flatten_function (node);
 	}
     }
 
-  cgraph_decide_inlining_of_small_functions ();
+  inline_small_functions ();
+  cgraph_remove_unreachable_nodes (true, dump_file);
+  free (order);
 
+  /* We already perform some inlining of functions called once during
+     inlining small functions above.  After unreachable nodes are removed,
+     we still might do a quick check that nothing new is found.  */
   if (flag_inline_functions_called_once)
     {
       if (dump_file)
 	fprintf (dump_file, "\nDeciding on functions called once:\n");
 
       /* And finally decide what functions are called once.  */
-      for (i = nnodes - 1; i >= 0; i--)
+      for (node = cgraph_nodes; node; node = node->next)
 	{
-	  node = order[i];
-
 	  if (node->callers
 	      && !node->callers->next_caller
 	      && !node->global.inlined_to
-	      && cgraph_will_be_removed_from_program_if_no_direct_calls (node)
-	      && inline_summary (node)->inlinable
-	      && cgraph_function_body_availability (node) >= AVAIL_AVAILABLE
 	      && node->callers->inline_failed
 	      && node->callers->caller != node
 	      && node->callers->caller->global.inlined_to != node
-	      && !node->callers->call_stmt_cannot_inline_p
-	      && tree_can_inline_p (node->callers)
-	      && !DECL_EXTERNAL (node->decl))
+	      && cgraph_will_be_removed_from_program_if_no_direct_calls (node)
+	      && inline_summary (node)->inlinable
+	      && cgraph_function_body_availability (node) >= AVAIL_AVAILABLE
+	      && !DECL_EXTERNAL (node->decl)
+	      && can_inline_edge_p (node->callers, true))
 	    {
-	      cgraph_inline_failed_t reason;
+	      struct cgraph_node *caller = node->callers->caller;
+
 	      old_size = overall_size;
 	      if (dump_file)
 		{
 		  fprintf (dump_file,
-			   "\nConsidering %s size %i.\n",
+			   "\nInlining %s size %i.\n",
 			   cgraph_node_name (node), inline_summary (node)->size);
 		  fprintf (dump_file,
 			   " Called once from %s %i insns.\n",
@@ -1408,25 +1658,14 @@ cgraph_decide_inlining (void)
 			   inline_summary (node->callers->caller)->size);
 		}
 
-	      if (cgraph_check_inline_limits (node->callers, &reason))
-		{
-		  struct cgraph_node *caller = node->callers->caller;
-		  cgraph_mark_inline_edge (node->callers, true, NULL);
-		  if (dump_file)
-		    fprintf (dump_file,
-			     " Inlined into %s which now has %i size"
-			     " for a net change of %+i size.\n",
-			     cgraph_node_name (caller),
-			     inline_summary (caller)->size,
-			     overall_size - old_size);
-		}
-	      else
-		{
-		  if (dump_file)
-		    fprintf (dump_file,
-			     " Not inlining: %s.\n",
-                             cgraph_inline_failed_string (reason));
-		}
+	      cgraph_mark_inline_edge (node->callers, true, NULL);
+	      if (dump_file)
+		fprintf (dump_file,
+			 " Inlined into %s which now has %i size"
+			 " for a net change of %+i size.\n",
+			 cgraph_node_name (caller),
+			 inline_summary (caller)->size,
+			 overall_size - old_size);
 	    }
 	}
     }
@@ -1441,92 +1680,39 @@ cgraph_decide_inlining (void)
 	     "size %i turned to %i size.\n\n",
 	     ncalls_inlined, nfunctions_inlined, initial_size,
 	     overall_size);
-  free (order);
   /* In WPA we use inline summaries for partitioning process.  */
   if (!flag_wpa)
     inline_free_summary ();
   return 0;
 }
 
-/* Return true when N is leaf function.  Accept cheap builtins
-   in leaf functions.  */
-
-static bool
-leaf_node_p (struct cgraph_node *n)
-{
-  struct cgraph_edge *e;
-  for (e = n->callees; e; e = e->next_callee)
-    if (!is_inexpensive_builtin (e->callee->decl))
-      return false;
-  return true;
-}
-
-/* Return true if the edge E is inlinable during early inlining.  */
-
-static bool
-cgraph_edge_early_inlinable_p (struct cgraph_edge *e, FILE *file)
-{
-  if (!inline_summary (e->callee)->inlinable)
-    {
-      if (file)
-	fprintf (file, "Not inlining: Function not inlinable.\n");
-      return false;
-    }
-  if (!e->callee->analyzed)
-    {
-      if (file)
-	fprintf (file, "Not inlining: Function body not available.\n");
-      return false;
-    }
-  if (!tree_can_inline_p (e)
-      || e->call_stmt_cannot_inline_p)
-    {
-      if (file)
-	fprintf (file, "Not inlining: %s.\n",
-		 cgraph_inline_failed_string (e->inline_failed));
-      return false;
-    }
-  if (!gimple_in_ssa_p (DECL_STRUCT_FUNCTION (e->caller->decl))
-      || !gimple_in_ssa_p (DECL_STRUCT_FUNCTION (e->callee->decl)))
-    {
-      if (file)
-	fprintf (file, "Not inlining: not in SSA form.\n");
-      return false;
-    }
-  return true;
-}
-
 /* Inline always-inline function calls in NODE.  */
 
 static bool
-cgraph_perform_always_inlining (struct cgraph_node *node)
+inline_always_inline_functions (struct cgraph_node *node)
 {
   struct cgraph_edge *e;
   bool inlined = false;
 
   for (e = node->callees; e; e = e->next_callee)
     {
-      if (!inline_summary (e->callee)->disregard_inline_limits)
+      if (!DECL_DISREGARD_INLINE_LIMITS (e->callee->decl))
 	continue;
 
-      if (dump_file)
-	fprintf (dump_file,
-		 "Considering always-inline candidate %s.\n",
-		 cgraph_node_name (e->callee));
-
       if (cgraph_edge_recursive_p (e))
 	{
 	  if (dump_file)
-	    fprintf (dump_file, "Not inlining: recursive call.\n");
+	    fprintf (dump_file, "  Not inlining recursive call to %s.\n",
+		     cgraph_node_name (e->callee));
 	  e->inline_failed = CIF_RECURSIVE_INLINING;
 	  continue;
 	}
 
-      if (!cgraph_edge_early_inlinable_p (e, dump_file))
+      if (!can_early_inline_edge_p (e))
 	continue;
 
       if (dump_file)
-	fprintf (dump_file, " Inlining %s into %s.\n",
+	fprintf (dump_file, "  Inlining %s into %s (always_inline).\n",
 		 cgraph_node_name (e->callee),
 		 cgraph_node_name (e->caller));
       cgraph_mark_inline_edge (e, true, NULL);
@@ -1540,24 +1726,15 @@ cgraph_perform_always_inlining (struct cgraph_node *node)
    expenses on updating data structures.  */
 
 static bool
-cgraph_decide_inlining_incrementally (struct cgraph_node *node)
+early_inline_small_functions (struct cgraph_node *node)
 {
   struct cgraph_edge *e;
   bool inlined = false;
-  cgraph_inline_failed_t failed_reason;
-
-  /* Never inline regular functions into always-inline functions
-     during incremental inlining.  */
-  if (inline_summary (node)->disregard_inline_limits)
-    return false;
 
   for (e = node->callees; e; e = e->next_callee)
     {
-      int allowed_growth = 0;
-
       if (!inline_summary (e->callee)->inlinable
-	  || !e->inline_failed
-	  || inline_summary (e->callee)->disregard_inline_limits)
+	  || !e->inline_failed)
 	continue;
 
       /* Do not consider functions not declared inline.  */
@@ -1570,47 +1747,25 @@ cgraph_decide_inlining_incrementally (struct cgraph_node *node)
 	fprintf (dump_file, "Considering inline candidate %s.\n",
 		 cgraph_node_name (e->callee));
 
+      if (!can_early_inline_edge_p (e))
+	continue;
+
       if (cgraph_edge_recursive_p (e))
 	{
 	  if (dump_file)
-	    fprintf (dump_file, "Not inlining: recursive call.\n");
+	    fprintf (dump_file, "  Not inlining: recursive call.\n");
 	  continue;
 	}
 
-      if (!cgraph_edge_early_inlinable_p (e, dump_file))
+      if (!want_early_inline_function_p (e))
 	continue;
 
-      if (cgraph_maybe_hot_edge_p (e) && leaf_node_p (e->callee)
-	  && optimize_function_for_speed_p (cfun))
-	allowed_growth = PARAM_VALUE (PARAM_EARLY_INLINING_INSNS);
-
-      /* When the function body would grow and inlining the function
-	 won't eliminate the need for offline copy of the function,
-	 don't inline.  */
-      if (estimate_edge_growth (e) > allowed_growth)
-	{
-	  if (dump_file)
-	    fprintf (dump_file,
-		     "Not inlining: code size would grow by %i.\n",
-		     estimate_edge_growth (e));
-	  continue;
-	}
-      if (!cgraph_check_inline_limits (e, &e->inline_failed))
-	{
-	  if (dump_file)
-	    fprintf (dump_file, "Not inlining: %s.\n",
-		     cgraph_inline_failed_string (e->inline_failed));
-	  continue;
-	}
-      if (cgraph_default_inline_p (e->callee, &failed_reason))
-	{
-	  if (dump_file)
-	    fprintf (dump_file, " Inlining %s into %s.\n",
-		     cgraph_node_name (e->callee),
-		     cgraph_node_name (e->caller));
-	  cgraph_mark_inline_edge (e, true, NULL);
-	  inlined = true;
-	}
+      if (dump_file)
+	fprintf (dump_file, " Inlining %s into %s.\n",
+		 cgraph_node_name (e->callee),
+		 cgraph_node_name (e->caller));
+      cgraph_mark_inline_edge (e, true, NULL);
+      inlined = true;
     }
 
   return inlined;
@@ -1626,7 +1781,7 @@ static GTY ((length ("nnodes"))) struct cgraph_node **order;
    passes to be somewhat more effective and avoids some code duplication in
    later real inlining pass for testcases with very many function calls.  */
 static unsigned int
-cgraph_early_inlining (void)
+early_inliner (void)
 {
   struct cgraph_node *node = cgraph_get_node (current_function_decl);
   struct cgraph_edge *edge;
@@ -1643,11 +1798,20 @@ cgraph_early_inlining (void)
 
   /* Even when not optimizing or not inlining inline always-inline
      functions.  */
-  inlined = cgraph_perform_always_inlining (node);
+  inlined = inline_always_inline_functions (node);
 
   if (!optimize
       || flag_no_inline
-      || !flag_early_inlining)
+      || !flag_early_inlining
+      /* Never inline regular functions into always-inline functions
+	 during incremental inlining.  This sucks as functions calling
+	 always inline functions will get less optimized, but at the
+	 same time inlining of functions calling always inline
+	 functoin into an always inline function might introduce
+	 cycles of edges to be always inlined in the callgraph.
+
+	 We might want to be smarter and just avoid this type of inlining.  */
+      || DECL_DISREGARD_INLINE_LIMITS (node->decl))
     ;
   else if (lookup_attribute ("flatten",
 			     DECL_ATTRIBUTES (node->decl)) != NULL)
@@ -1657,7 +1821,7 @@ cgraph_early_inlining (void)
       if (dump_file)
 	fprintf (dump_file,
 		 "Flattening %s\n", cgraph_node_name (node));
-      cgraph_flatten (node);
+      flatten_function (node);
       inlined = true;
     }
   else
@@ -1665,10 +1829,22 @@ cgraph_early_inlining (void)
       /* We iterate incremental inlining to get trivial cases of indirect
 	 inlining.  */
       while (iterations < PARAM_VALUE (PARAM_EARLY_INLINER_MAX_ITERATIONS)
-	     && cgraph_decide_inlining_incrementally (node))
+	     && early_inline_small_functions (node))
 	{
 	  timevar_push (TV_INTEGRATION);
 	  todo |= optimize_inline_calls (current_function_decl);
+
+	  /* Technically we ought to recompute inline parameters so the new
+ 	     iteration of early inliner works as expected.  We however have
+	     values approximately right and thus we only need to update edge
+	     info that might be cleared out for newly discovered edges.  */
+	  for (edge = node->callees; edge; edge = edge->next_callee)
+	    {
+	      edge->call_stmt_size
+		= estimate_num_insns (edge->call_stmt, &eni_size_weights);
+	      edge->call_stmt_time
+		= estimate_num_insns (edge->call_stmt, &eni_time_weights);
+	    }
 	  timevar_pop (TV_INTEGRATION);
 	  iterations++;
 	  inlined = false;
@@ -1681,19 +1857,6 @@ cgraph_early_inlining (void)
     {
       timevar_push (TV_INTEGRATION);
       todo |= optimize_inline_calls (current_function_decl);
-
-      /* Technically we ought to recompute inline parameters so the new iteration of
-	 early inliner works as expected.  We however have values approximately right
-	 and thus we only need to update edge info that might be cleared out for
-	 newly discovered edges.  */
-      for (edge = node->callees; edge; edge = edge->next_callee)
-	{
-	  edge->call_stmt_size
-	    = estimate_num_insns (edge->call_stmt, &eni_size_weights);
-	  edge->call_stmt_time
-	    = estimate_num_insns (edge->call_stmt, &eni_time_weights);
-	}
-
       timevar_pop (TV_INTEGRATION);
     }
 
@@ -1708,7 +1871,7 @@ struct gimple_opt_pass pass_early_inline =
   GIMPLE_PASS,
   "einline",	 			/* name */
   NULL,					/* gate */
-  cgraph_early_inlining,		/* execute */
+  early_inliner,			/* execute */
   NULL,					/* sub */
   NULL,					/* next */
   0,					/* static_pass_number */
@@ -1730,8 +1893,8 @@ inline_transform (struct cgraph_node *node)
   struct cgraph_edge *e;
   bool inline_p = false;
 
-  /* FIXME: Currently the pass manager is adding inline transform more than once to some
-     clones.  This needs revisiting after WPA cleanups.  */
+  /* FIXME: Currently the pass manager is adding inline transform more than
+     once to some clones.  This needs revisiting after WPA cleanups.  */
   if (cfun->after_inlining)
     return 0;
 
@@ -1762,7 +1925,7 @@ inline_transform (struct cgraph_node *node)
    happens during early inlining.  */
 
 static bool
-gate_cgraph_decide_inlining (void)
+gate_ipa_inline (void)
 {
   /* ???  We'd like to skip this if not optimizing or not inlining as
      all always-inline functions have been processed by early
@@ -1777,8 +1940,8 @@ struct ipa_opt_pass_d pass_ipa_inline =
  {
   IPA_PASS,
   "inline",				/* name */
-  gate_cgraph_decide_inlining,		/* gate */
-  cgraph_decide_inlining,		/* execute */
+  gate_ipa_inline,			/* gate */
+  ipa_inline,				/* execute */
   NULL,					/* sub */
   NULL,					/* next */
   0,					/* static_pass_number */
diff --git a/gcc/ipa-inline.h b/gcc/ipa-inline.h
index 58bcdd9..1fd1f5c 100644
--- a/gcc/ipa-inline.h
+++ b/gcc/ipa-inline.h
@@ -41,8 +41,6 @@ struct inline_summary
   /* False when there something makes versioning impossible.
      Currently computed and used only by ipa-cp.  */
   unsigned versionable : 1;
-  /* True when function should be inlined independently on its size.  */
-  unsigned disregard_inline_limits : 1;
 
   /* Information about function that will result after applying all the
      inline decisions present in the callgraph.  Generally kept up to
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 8e25848..2cf1dc6 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,7 @@
+2011-04-17  Jan Hubicka  <jh@suse.cz>
+
+	* gcc.dg/winline-5.c: Update testcase.
+
 2011-04-17  Eric Botcazou  <ebotcazou@adacore.com>
 
 	* gnat.dg/discr27.ad[sb]: Move dg directive.
diff --git a/gcc/testsuite/gcc.dg/winline-5.c b/gcc/testsuite/gcc.dg/winline-5.c
index ad1fc41..a42ae94 100644
--- a/gcc/testsuite/gcc.dg/winline-5.c
+++ b/gcc/testsuite/gcc.dg/winline-5.c
@@ -15,15 +15,7 @@ inline int q(void) /* { dg-warning "inline-unit-growth" } */
 	big();
 	big();
 }
-inline int q1(void)
-{
-	big();
-	big();
-	big();
-}
 int t (void)
 {
- /* We allow one inlining over limit.  */
-	q1();
 	return q ();		 /* { dg-warning "called from here" } */
 }
diff --git a/gcc/tree-inline.c b/gcc/tree-inline.c
index 79c9538..ea4baac 100644
--- a/gcc/tree-inline.c
+++ b/gcc/tree-inline.c
@@ -3138,29 +3138,6 @@ inline_forbidden_p (tree fndecl)
   return forbidden_p;
 }
 
-/* Return true if CALLEE cannot be inlined into CALLER.  */
-
-static bool
-inline_forbidden_into_p (tree caller, tree callee)
-{
-  /* Don't inline if the functions have different EH personalities.  */
-  if (DECL_FUNCTION_PERSONALITY (caller)
-      && DECL_FUNCTION_PERSONALITY (callee)
-      && (DECL_FUNCTION_PERSONALITY (caller)
-	  != DECL_FUNCTION_PERSONALITY (callee)))
-    return true;
-
-  /* Don't inline if the callee can throw non-call exceptions but the
-     caller cannot.  */
-  if (DECL_STRUCT_FUNCTION (callee)
-      && DECL_STRUCT_FUNCTION (callee)->can_throw_non_call_exceptions
-      && !(DECL_STRUCT_FUNCTION (caller)
-	   && DECL_STRUCT_FUNCTION (caller)->can_throw_non_call_exceptions))
-    return true;
-
-  return false;
-}
-
 /* Returns nonzero if FN is a function that does not have any
    fundamental inline blocking properties.  */
 
@@ -3750,10 +3727,6 @@ expand_call_inline (basic_block bb, gimple stmt, copy_body_data *id)
       && gimple_has_body_p (DECL_ABSTRACT_ORIGIN (fn)))
     fn = DECL_ABSTRACT_ORIGIN (fn);
 
-  /* First check that inlining isn't simply forbidden in this case.  */
-  if (inline_forbidden_into_p (cg_edge->caller->decl, cg_edge->callee->decl))
-    goto egress;
-
   /* Don't try to inline functions that are not well-suited to inlining.  */
   if (!cgraph_inline_p (cg_edge, &reason))
     {
@@ -5298,75 +5271,3 @@ build_duplicate_type (tree type)
 
   return type;
 }
-
-/* Return whether it is safe to inline a function because it used different
-   target specific options or call site actual types mismatch parameter types.
-   E is the call edge to be checked.  */
-bool
-tree_can_inline_p (struct cgraph_edge *e)
-{
-#if 0
-  /* This causes a regression in SPEC in that it prevents a cold function from
-     inlining a hot function.  Perhaps this should only apply to functions
-     that the user declares hot/cold/optimize explicitly.  */
-
-  /* Don't inline a function with a higher optimization level than the
-     caller, or with different space constraints (hot/cold functions).  */
-  tree caller_tree = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (caller);
-  tree callee_tree = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (callee);
-
-  if (caller_tree != callee_tree)
-    {
-      struct cl_optimization *caller_opt
-	= TREE_OPTIMIZATION ((caller_tree)
-			     ? caller_tree
-			     : optimization_default_node);
-
-      struct cl_optimization *callee_opt
-	= TREE_OPTIMIZATION ((callee_tree)
-			     ? callee_tree
-			     : optimization_default_node);
-
-      if ((caller_opt->optimize > callee_opt->optimize)
-	  || (caller_opt->optimize_size != callee_opt->optimize_size))
-	return false;
-    }
-#endif
-  tree caller, callee;
-
-  caller = e->caller->decl;
-  callee = e->callee->decl;
-
-  /* First check that inlining isn't simply forbidden in this case.  */
-  if (inline_forbidden_into_p (caller, callee))
-    {
-      e->inline_failed = CIF_UNSPECIFIED;
-      if (e->call_stmt)
-	gimple_call_set_cannot_inline (e->call_stmt, true);
-      return false;
-    }
-
-  /* Allow the backend to decide if inlining is ok.  */
-  if (!targetm.target_option.can_inline_p (caller, callee))
-    {
-      e->inline_failed = CIF_TARGET_OPTION_MISMATCH;
-      if (e->call_stmt)
-	gimple_call_set_cannot_inline (e->call_stmt, true);
-      e->call_stmt_cannot_inline_p = true;
-      return false;
-    }
-
-  /* Do not inline calls where we cannot triviall work around mismatches
-     in argument or return types.  */
-  if (e->call_stmt
-      && !gimple_check_call_matching_types (e->call_stmt, callee))
-    {
-      e->inline_failed = CIF_MISMATCHED_ARGUMENTS;
-      if (e->call_stmt)
-	gimple_call_set_cannot_inline (e->call_stmt, true);
-      e->call_stmt_cannot_inline_p = true;
-      return false;
-    }
-
-  return true;
-}
diff --git a/gcc/tree-inline.h b/gcc/tree-inline.h
index 66cc334..fb039e3 100644
--- a/gcc/tree-inline.h
+++ b/gcc/tree-inline.h
@@ -182,7 +182,6 @@ int estimate_num_insns (gimple, eni_weights *);
 int estimate_num_insns_fn (tree, eni_weights *);
 int count_insns_seq (gimple_seq, eni_weights *);
 bool tree_versionable_function_p (tree);
-bool tree_can_inline_p (struct cgraph_edge *e);
 
 extern tree remap_decl (tree decl, copy_body_data *id);
 extern tree remap_type (tree type, copy_body_data *id);
-- 
2.7.4