From a41f2a28f67a3cfe026e6f842c8db4ad2383ceba Mon Sep 17 00:00:00 2001 From: hubicka Date: Fri, 22 Apr 2011 20:04:42 +0000 Subject: [PATCH] * gengtype.c (open_base_files): Add ipa-inline.h include. * ipa-cp.c (ipcp_get_lattice, ipcp_lattice_from_jfunc): Move to ipa-prop.c update all uses. * ipa-prop.c: (ipa_get_lattice, ipa_lattice_from_jfunc): ... here. * ipa-inline-transform.c (inline_call): Use inline_merge_summary to merge summary of inlined function into former caller. * ipa-inline.c (max_benefit): Remove. (edge_badness): Compensate for removal of benefits. (update_caller_keys): Use reset_node_growth_cache/reset_edge_growth_cache. (update_callee_keys): Likewise. (update_all_callee_keys): Likewise. (inline_small_functions): Do not collect max_benefit; do not reset stimated_growth; call free_growth_caches and initialize_growth_caches. * ipa-inline.h (struct condition, type clause_t, struct predicate, struct size_time_entry): New structures. (INLINE_SIZE_SCALE, INLINE_TIME_SCALE, MAX_CLAUSES): New constants. (inline_summary): Remove size_inlining_benefit, time_inlining_benefit and estimated_growth. (edge_growth_cache_entry): New structure. (node_growth_cache, edge_growth_cache): New global vars. (estimate_growth): Turn into inline. (inline_merge_summary, do_estimate_edge_growth, do_estimate_edge_time, initialize_growth_caches, free_growth_caches): Declare. (estimate_edge_growth): Rewrite. (estimate_edge_time): Implement as inline cache lookup. (reset_node_growth_cache, reset_edge_growth_cache): New inline functions. (MAX_TIME): Reduce to allow multiplicatoin by INLINE_SIZE_SCALE. (NUM_CONDITIONS): New constant. (predicate_conditions): New enum. (IS_NOT_CONSTANT): New constant. (edge_removal_hook_holder): New var. (node_growth_cache, edge_growth_cache): New global vars. (true_predicate, single_cond_predicate, false_predicate, not_inlined_predicate, add_condition, add_clause, and_predicates, or_predicates, predicates_equal_p, evaulate_predicate, dump_condition, dump_clause, dump_predicate, account_size_time, evaulate_conditions_for_edge): New functions. (inline_summary_alloc): Move to heap. (inline_node_removal_hook): Clear condition and entry vectors. (inline_edge_removal_hook): New function. (initialize_growth_caches, free_growth_caches): New function. (dump_inline_summary): Update. (edge_execution_predicate): New function. (will_be_nonconstant_predicate): New function. (estimate_function_body_sizes): Compute BB and constantness predicates. (compute_inline_parameters): Do not clear estimated_growth. (estimate_edge_size_and_time): New function. (estimate_calls_size_and_time): New function. (estimate_callee_size_and_time): New function. (remap_predicate): New function. (inline_merge_summary): New function. (do_estimate_edge_time): New function based on... (estimate_edge_time): ... this one. (do_estimate_edge_growth): New function. (do_estimate_growth): New function based on.... (estimate_growth): ... this one. (inline_analyze_function): Analyze after deciding on jump functions. (inline_read_section): New function. (inline_read_summary): Use it. (inline_write_summary): Write all the new data. * ipa-prop.c (ipa_get_param_decl_index): Export. (ipa_lattice_from_jfunc): Move here from ipa-cp.c * ipa-prop.h (ipa_get_param_decl_index, ipa_lattice_from_jfunc): Declare. (ipa_get_lattice): Move hre from ipa-cp.c * Makefile.in (GTFILES): Add ipa-inline.h and ipa-inline-analysis.c * params.def (PARAM_EARLY_INLINING_INSNS): Set to 11. * cgraph.h (cgraph_clone_inlined_nodes, compute_inline_parameters, cgraph_edge_inlinable_p): Remove. * cgraphunit.c: Include ipainline.h (cgraph_process_new_functions): Update call of compute_inline_parameters. * gcc.dg/tree-ssa/pr38699.c: Fix testcase. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@172873 138bc75d-0d04-0410-961f-82ee72b054a4 --- gcc/ChangeLog | 72 ++ gcc/Makefile.in | 3 +- gcc/cgraph.h | 5 - gcc/cgraphunit.c | 3 +- gcc/gengtype.c | 3 +- gcc/ipa-cp.c | 95 +-- gcc/ipa-inline-analysis.c | 1193 ++++++++++++++++++++++++++++--- gcc/ipa-inline-transform.c | 18 +- gcc/ipa-inline.c | 78 +- gcc/ipa-inline.h | 160 ++++- gcc/ipa-prop.c | 65 +- gcc/ipa-prop.h | 11 + gcc/ipa-split.c | 2 +- gcc/params.def | 2 +- gcc/testsuite/ChangeLog | 4 + gcc/testsuite/gcc.dg/tree-ssa/pr38699.c | 7 +- gcc/tree-sra.c | 2 +- 17 files changed, 1437 insertions(+), 286 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index f837af9..71e2d3c 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,75 @@ +2011-04-22 Jan Hubicka + + * gengtype.c (open_base_files): Add ipa-inline.h include. + * ipa-cp.c (ipcp_get_lattice, ipcp_lattice_from_jfunc): Move to ipa-prop.c + update all uses. + * ipa-prop.c: (ipa_get_lattice, ipa_lattice_from_jfunc): ... here. + * ipa-inline-transform.c (inline_call): Use inline_merge_summary to merge + summary of inlined function into former caller. + * ipa-inline.c (max_benefit): Remove. + (edge_badness): Compensate for removal of benefits. + (update_caller_keys): Use reset_node_growth_cache/reset_edge_growth_cache. + (update_callee_keys): Likewise. + (update_all_callee_keys): Likewise. + (inline_small_functions): Do not collect max_benefit; do not + reset stimated_growth; call free_growth_caches and initialize_growth_caches. + * ipa-inline.h (struct condition, type clause_t, struct predicate, struct + size_time_entry): New structures. + (INLINE_SIZE_SCALE, INLINE_TIME_SCALE, MAX_CLAUSES): New constants. + (inline_summary): Remove size_inlining_benefit, time_inlining_benefit and + estimated_growth. + (edge_growth_cache_entry): New structure. + (node_growth_cache, edge_growth_cache): New global vars. + (estimate_growth): Turn into inline. + (inline_merge_summary, do_estimate_edge_growth, do_estimate_edge_time, + initialize_growth_caches, free_growth_caches): Declare. + (estimate_edge_growth): Rewrite. + (estimate_edge_time): Implement as inline cache lookup. + (reset_node_growth_cache, reset_edge_growth_cache): New inline functions. + (MAX_TIME): Reduce to allow multiplicatoin by INLINE_SIZE_SCALE. + (NUM_CONDITIONS): New constant. + (predicate_conditions): New enum. + (IS_NOT_CONSTANT): New constant. + (edge_removal_hook_holder): New var. + (node_growth_cache, edge_growth_cache): New global vars. + (true_predicate, single_cond_predicate, false_predicate, not_inlined_predicate, + add_condition, add_clause, and_predicates, or_predicates, predicates_equal_p, + evaulate_predicate, dump_condition, dump_clause, dump_predicate, account_size_time, + evaulate_conditions_for_edge): New functions. + (inline_summary_alloc): Move to heap. + (inline_node_removal_hook): Clear condition and entry vectors. + (inline_edge_removal_hook): New function. + (initialize_growth_caches, free_growth_caches): New function. + (dump_inline_summary): Update. + (edge_execution_predicate): New function. + (will_be_nonconstant_predicate): New function. + (estimate_function_body_sizes): Compute BB and constantness predicates. + (compute_inline_parameters): Do not clear estimated_growth. + (estimate_edge_size_and_time): New function. + (estimate_calls_size_and_time): New function. + (estimate_callee_size_and_time): New function. + (remap_predicate): New function. + (inline_merge_summary): New function. + (do_estimate_edge_time): New function based on... + (estimate_edge_time): ... this one. + (do_estimate_edge_growth): New function. + (do_estimate_growth): New function based on.... + (estimate_growth): ... this one. + (inline_analyze_function): Analyze after deciding on jump functions. + (inline_read_section): New function. + (inline_read_summary): Use it. + (inline_write_summary): Write all the new data. + * ipa-prop.c (ipa_get_param_decl_index): Export. + (ipa_lattice_from_jfunc): Move here from ipa-cp.c + * ipa-prop.h (ipa_get_param_decl_index, ipa_lattice_from_jfunc): Declare. + (ipa_get_lattice): Move hre from ipa-cp.c + * Makefile.in (GTFILES): Add ipa-inline.h and ipa-inline-analysis.c + * params.def (PARAM_EARLY_INLINING_INSNS): Set to 11. + * cgraph.h (cgraph_clone_inlined_nodes, compute_inline_parameters, + cgraph_edge_inlinable_p): Remove. + * cgraphunit.c: Include ipainline.h + (cgraph_process_new_functions): Update call of compute_inline_parameters. + 2011-04-22 Richard Guenther * tree.c (build_int_cst): Properly create canonicalized integer diff --git a/gcc/Makefile.in b/gcc/Makefile.in index 212b538..9b85ad0 100644 --- a/gcc/Makefile.in +++ b/gcc/Makefile.in @@ -2997,7 +2997,7 @@ cgraphunit.o : cgraphunit.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \ $(TREE_FLOW_H) $(TREE_PASS_H) debug.h $(DIAGNOSTIC_H) \ $(FIBHEAP_H) output.h $(PARAMS_H) $(RTL_H) $(TIMEVAR_H) $(IPA_PROP_H) \ gt-cgraphunit.h tree-iterator.h $(COVERAGE_H) $(TREE_DUMP_H) \ - tree-pretty-print.h gimple-pretty-print.h + tree-pretty-print.h gimple-pretty-print.h ipa-inline.h cgraphbuild.o : cgraphbuild.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \ $(TREE_H) langhooks.h $(CGRAPH_H) intl.h pointer-set.h $(GIMPLE_H) \ $(TREE_FLOW_H) $(TREE_PASS_H) $(IPA_UTILS_H) $(EXCEPT_H) \ @@ -3782,6 +3782,7 @@ GTFILES = $(CPP_ID_DATA_H) $(srcdir)/input.h $(srcdir)/coretypes.h \ $(srcdir)/ipa-prop.h \ $(srcdir)/lto-streamer.h \ $(srcdir)/target-globals.h \ + $(srcdir)/ipa-inline.h \ @all_gtfiles@ # Compute the list of GT header files from the corresponding C sources, diff --git a/gcc/cgraph.h b/gcc/cgraph.h index b57a5e0..54e7594 100644 --- a/gcc/cgraph.h +++ b/gcc/cgraph.h @@ -721,11 +721,6 @@ varpool_next_static_initializer (struct varpool_node *node) for ((node) = varpool_first_static_initializer (); (node); \ (node) = varpool_next_static_initializer (node)) -/* In ipa-inline.c */ -void cgraph_clone_inlined_nodes (struct cgraph_edge *, bool, bool); -void compute_inline_parameters (struct cgraph_node *); -cgraph_inline_failed_t cgraph_edge_inlinable_p (struct cgraph_edge *); - /* Create a new static variable of type TYPE. */ tree add_new_static_var (tree type); diff --git a/gcc/cgraphunit.c b/gcc/cgraphunit.c index 7e7530b..8803cf6 100644 --- a/gcc/cgraphunit.c +++ b/gcc/cgraphunit.c @@ -138,6 +138,7 @@ along with GCC; see the file COPYING3. If not see #include "output.h" #include "coverage.h" #include "plugin.h" +#include "ipa-inline.h" static void cgraph_expand_all_functions (void); static void cgraph_mark_functions_to_output (void); @@ -252,7 +253,7 @@ cgraph_process_new_functions (void) || !optimize) execute_pass_list (pass_early_local_passes.pass.sub); else - compute_inline_parameters (node); + compute_inline_parameters (node, true); free_dominance_info (CDI_POST_DOMINATORS); free_dominance_info (CDI_DOMINATORS); pop_cfun (); diff --git a/gcc/gengtype.c b/gcc/gengtype.c index 5dd877e..95e2a92 100644 --- a/gcc/gengtype.c +++ b/gcc/gengtype.c @@ -1559,7 +1559,8 @@ open_base_files (void) "optabs.h", "libfuncs.h", "debug.h", "ggc.h", "cgraph.h", "tree-flow.h", "reload.h", "cpp-id-data.h", "tree-chrec.h", "cfglayout.h", "except.h", "output.h", "gimple.h", "cfgloop.h", - "target.h", "ipa-prop.h", "lto-streamer.h", "target-globals.h", NULL + "target.h", "ipa-prop.h", "lto-streamer.h", "target-globals.h", + "ipa-inline.h", NULL }; const char *const *ifp; outf_p gtype_desc_c; diff --git a/gcc/ipa-cp.c b/gcc/ipa-cp.c index 261d074..fd88fc7 100644 --- a/gcc/ipa-cp.c +++ b/gcc/ipa-cp.c @@ -278,77 +278,6 @@ ipa_lattice_meet (struct ipcp_lattice *res, struct ipcp_lattice *lat1, res->constant = lat1->constant; } -/* Return the lattice corresponding to the Ith formal parameter of the function - described by INFO. */ -static inline struct ipcp_lattice * -ipcp_get_lattice (struct ipa_node_params *info, int i) -{ - return &(info->params[i].ipcp_lattice); -} - -/* Given the jump function JFUNC, compute the lattice LAT that describes the - value coming down the callsite. INFO describes the caller node so that - pass-through jump functions can be evaluated. */ -static void -ipcp_lattice_from_jfunc (struct ipa_node_params *info, struct ipcp_lattice *lat, - struct ipa_jump_func *jfunc) -{ - if (jfunc->type == IPA_JF_CONST) - { - lat->type = IPA_CONST_VALUE; - lat->constant = jfunc->value.constant; - } - else if (jfunc->type == IPA_JF_PASS_THROUGH) - { - struct ipcp_lattice *caller_lat; - tree cst; - - caller_lat = ipcp_get_lattice (info, jfunc->value.pass_through.formal_id); - lat->type = caller_lat->type; - if (caller_lat->type != IPA_CONST_VALUE) - return; - cst = caller_lat->constant; - - if (jfunc->value.pass_through.operation != NOP_EXPR) - { - tree restype; - if (TREE_CODE_CLASS (jfunc->value.pass_through.operation) - == tcc_comparison) - restype = boolean_type_node; - else - restype = TREE_TYPE (cst); - cst = fold_binary (jfunc->value.pass_through.operation, - restype, cst, jfunc->value.pass_through.operand); - } - if (!cst || !is_gimple_ip_invariant (cst)) - lat->type = IPA_BOTTOM; - lat->constant = cst; - } - else if (jfunc->type == IPA_JF_ANCESTOR) - { - struct ipcp_lattice *caller_lat; - tree t; - - caller_lat = ipcp_get_lattice (info, jfunc->value.ancestor.formal_id); - lat->type = caller_lat->type; - if (caller_lat->type != IPA_CONST_VALUE) - return; - if (TREE_CODE (caller_lat->constant) != ADDR_EXPR) - { - /* This can happen when the constant is a NULL pointer. */ - lat->type = IPA_BOTTOM; - return; - } - t = TREE_OPERAND (caller_lat->constant, 0); - t = build_ref_for_offset (EXPR_LOCATION (t), t, - jfunc->value.ancestor.offset, - jfunc->value.ancestor.type, NULL, false); - lat->constant = build_fold_addr_expr (t); - } - else - lat->type = IPA_BOTTOM; -} - /* True when OLD_LAT and NEW_LAT values are not the same. */ static bool @@ -384,7 +313,7 @@ ipcp_print_all_lattices (FILE * f) count = ipa_get_param_count (info); for (i = 0; i < count; i++) { - struct ipcp_lattice *lat = ipcp_get_lattice (info, i); + struct ipcp_lattice *lat = ipa_get_lattice (info, i); fprintf (f, " param [%d]: ", i); if (lat->type == IPA_CONST_VALUE) @@ -582,7 +511,7 @@ ipcp_initialize_node_lattices (struct cgraph_node *node) for (i = 0; i < ipa_get_param_count (info) ; i++) { - ipcp_get_lattice (info, i)->type = type; + ipa_get_lattice (info, i)->type = type; if (type == IPA_BOTTOM) ipa_set_param_cannot_devirtualize (info, i); } @@ -659,7 +588,7 @@ ipcp_change_tops_to_bottom (void) count = ipa_get_param_count (info); for (i = 0; i < count; i++) { - struct ipcp_lattice *lat = ipcp_get_lattice (info, i); + struct ipcp_lattice *lat = ipa_get_lattice (info, i); if (lat->type == IPA_TOP) { prop_again = true; @@ -842,8 +771,8 @@ ipcp_propagate_stage (void) for (i = 0; i < count; i++) { jump_func = ipa_get_ith_jump_func (args, i); - ipcp_lattice_from_jfunc (info, &inc_lat, jump_func); - dest_lat = ipcp_get_lattice (callee_info, i); + ipa_lattice_from_jfunc (info, &inc_lat, jump_func); + dest_lat = ipa_get_lattice (callee_info, i); ipa_lattice_meet (&new_lat, &inc_lat, dest_lat); if (ipcp_lattice_changed (&new_lat, dest_lat)) { @@ -1031,7 +960,7 @@ ipcp_need_redirect_p (struct cgraph_edge *cs) count = ipa_get_param_count (orig_callee_info); for (i = 0; i < count; i++) { - struct ipcp_lattice *lat = ipcp_get_lattice (orig_callee_info, i); + struct ipcp_lattice *lat = ipa_get_lattice (orig_callee_info, i); struct ipa_jump_func *jump_func; jump_func = ipa_get_ith_jump_func (IPA_EDGE_REF (cs), i); @@ -1067,7 +996,7 @@ ipcp_update_callgraph (void) args_to_skip = BITMAP_ALLOC (NULL); for (i = 0; i < count; i++) { - struct ipcp_lattice *lat = ipcp_get_lattice (info, i); + struct ipcp_lattice *lat = ipa_get_lattice (info, i); /* We can proactively remove obviously unused arguments. */ if (!ipa_is_param_used (info, i)) @@ -1155,7 +1084,7 @@ ipcp_estimate_growth (struct cgraph_node *node) if (node->local.can_change_signature) for (i = 0; i < count; i++) { - struct ipcp_lattice *lat = ipcp_get_lattice (info, i); + struct ipcp_lattice *lat = ipa_get_lattice (info, i); /* We can proactively remove obviously unused arguments. */ if (!ipa_is_param_used (info, i)) @@ -1237,7 +1166,7 @@ ipcp_process_devirtualization_opportunities (struct cgraph_node *node) if (param_index == -1) continue; - lat = ipcp_get_lattice (info, param_index); + lat = ipa_get_lattice (info, param_index); token = ie->indirect_info->otr_token; anc_offset = ie->indirect_info->anc_offset; otr_type = ie->indirect_info->otr_type; @@ -1309,7 +1238,7 @@ ipcp_const_param_count (struct cgraph_node *node) for (i = 0; i < count; i++) { - struct ipcp_lattice *lat = ipcp_get_lattice (info, i); + struct ipcp_lattice *lat = ipa_get_lattice (info, i); if ((ipcp_lat_is_insertable (lat) /* Do not count obviously unused arguments. */ && ipa_is_param_used (info, i)) @@ -1436,7 +1365,7 @@ ipcp_insert_stage (void) args_to_skip = NULL; for (i = 0; i < count; i++) { - struct ipcp_lattice *lat = ipcp_get_lattice (info, i); + struct ipcp_lattice *lat = ipa_get_lattice (info, i); parm_tree = ipa_get_param (info, i); /* We can proactively remove obviously unused arguments. */ @@ -1504,7 +1433,7 @@ ipcp_insert_stage (void) info = IPA_NODE_REF (node); for (i = 0; i < count; i++) { - struct ipcp_lattice *lat = ipcp_get_lattice (info, i); + struct ipcp_lattice *lat = ipa_get_lattice (info, i); if (lat->type == IPA_CONST_VALUE) ipcp_discover_new_direct_edges (node1, i, lat->constant); } diff --git a/gcc/ipa-inline-analysis.c b/gcc/ipa-inline-analysis.c index 30fbcbc..921f5d3 100644 --- a/gcc/ipa-inline-analysis.c +++ b/gcc/ipa-inline-analysis.c @@ -36,9 +36,31 @@ along with GCC; see the file COPYING3. If not see the function created by applying all the inline decisions already present in the callgraph). - We also provide accestor to the inline_summary datastructure and + We provide accestor to the inline_summary datastructure and basic logic updating the parameters when inlining is performed. + The summaries are context sensitive. Context means + 1) partial assignment of known constant values of operands + 2) whether function is inlined into the call or not. + It is easy to add more variants. To represent function size and time + that depends on context (i.e. it is known to be optimized away when + context is known either by inlining or from IP-CP and clonning), + we use predicates. Predicates are logical formulas in + conjunctive-disjunctive form consisting of clauses. Clauses are bitmaps + specifying what conditions must be true. Conditions are simple test + of the form described above. + + In order to make predicate (possibly) true, all of its clauses must + be (possibly) true. To make clause (possibly) true, one of conditions + it mentions must be (possibly) true. There are fixed bounds on + number of clauses and conditions and all the manipulation functions + are conservative in positive direction. I.e. we may lose precision + by thinking that predicate may be true even when it is not. + + estimate_edge_size and estimate_edge_growth can be used to query + function size/time in the given context. inline_merge_summary merges + properties of caller and callee after inlining. + Finally pass_inline_parameters is exported. This is used to drive computation of function parameters used by the early inliner. IPA inlined performs analysis via its analyze_function method. */ @@ -64,18 +86,408 @@ along with GCC; see the file COPYING3. If not see #include "lto-streamer.h" #include "ipa-inline.h" -#define MAX_TIME 1000000000 +/* Estimate runtime of function can easilly run into huge numbers with many + nested loops. Be sure we can compute time * INLINE_SIZE_SCALE in integer. + For anything larger we use gcov_type. */ +#define MAX_TIME 1000000 + +/* Number of bits in integer, but we really want to be stable across different + hosts. */ +#define NUM_CONDITIONS 32 + +enum predicate_conditions +{ + predicate_false_condition = 0, + predicate_not_inlined_condition = 1, + predicate_first_dynamic_condition = 2 +}; + +/* Special condition code we use to represent test that operand is compile time + constant. */ +#define IS_NOT_CONSTANT ERROR_MARK /* Holders of ipa cgraph hooks: */ static struct cgraph_node_hook_list *function_insertion_hook_holder; static struct cgraph_node_hook_list *node_removal_hook_holder; static struct cgraph_2node_hook_list *node_duplication_hook_holder; +static struct cgraph_edge_hook_list *edge_removal_hook_holder; static void inline_node_removal_hook (struct cgraph_node *, void *); static void inline_node_duplication_hook (struct cgraph_node *, struct cgraph_node *, void *); -/* VECtor holding inline summaries. */ -VEC(inline_summary_t,heap) *inline_summary_vec; +/* VECtor holding inline summaries. + In GGC memory because conditions might point to constant trees. */ +VEC(inline_summary_t,gc) *inline_summary_vec; + +/* Cached node/edge growths. */ +VEC(int,heap) *node_growth_cache; +VEC(edge_growth_cache_entry,heap) *edge_growth_cache; + + +/* Return true predicate (tautology). + We represent it by empty list of clauses. */ + +static inline struct predicate +true_predicate (void) +{ + struct predicate p; + p.clause[0]=0; + return p; +} + + +/* Return predicate testing single condition number COND. */ + +static inline struct predicate +single_cond_predicate (int cond) +{ + struct predicate p; + p.clause[0]=1 << cond; + p.clause[1]=0; + return p; +} + + +/* Return false predicate. First clause require false condition. */ + +static inline struct predicate +false_predicate (void) +{ + return single_cond_predicate (predicate_false_condition); +} + + +/* Return predicate that is set true when function is not inlined. */ +static inline struct predicate +not_inlined_predicate (void) +{ + return single_cond_predicate (predicate_not_inlined_condition); +} + + +/* Add condition to condition list CONDS. */ + +static struct predicate +add_condition (struct inline_summary *summary, int operand_num, + enum tree_code code, tree val) +{ + int i; + struct condition *c; + struct condition new_cond; + + for (i = 0; VEC_iterate (condition, summary->conds, i, c); i++) + { + if (c->operand_num == operand_num + && c->code == code + && c->val == val) + return single_cond_predicate (i + predicate_first_dynamic_condition); + } + /* Too many conditions. Give up and return constant true. */ + if (i == NUM_CONDITIONS - predicate_first_dynamic_condition) + return true_predicate (); + + new_cond.operand_num = operand_num; + new_cond.code = code; + new_cond.val = val; + VEC_safe_push (condition, gc, summary->conds, &new_cond); + return single_cond_predicate (i + predicate_first_dynamic_condition); +} + + +/* Add clause CLAUSE into the predicate. */ + +static inline void +add_clause (struct predicate *p, clause_t clause) +{ + int i; + int insert_here = 0; + /* True clause. */ + if (!clause) + return; + + /* Flase clause makes the whole predicate false. Kill the other variants. */ + if (clause & (1 << predicate_false_condition)) + { + p->clause[0] = (1 << predicate_false_condition); + p->clause[1] = 0; + } + for (i = 0; i < MAX_CLAUSES; i++) + { + if (p->clause[i] == clause) + return; + if (!p->clause[i]) + break; + if (p->clause[i] < clause && !insert_here) + insert_here = i; + } + /* We run out of variants. Be conservative in positive direciton. */ + if (i == MAX_CLAUSES) + return; + /* Keep clauses ordered by index, so equivalence testing is easy. */ + p->clause[i + 1] = 0; + for (;i > insert_here; i--) + p->clause[i] = p->clause[i - 1]; + p->clause[insert_here] = clause; +} + + +/* Return P & P2. */ + +static struct predicate +and_predicates (struct predicate *p, struct predicate *p2) +{ + struct predicate out = *p; + int i; + for (i = 0; p2->clause[i]; i++) + add_clause (&out, p2->clause[i]); + return out; +} + + +/* Return P | P2. */ + +static struct predicate +or_predicates (struct predicate *p, struct predicate *p2) +{ + struct predicate out = true_predicate (); + int i,j; + /* If one of conditions is false, return the other. */ + if (p2->clause[0] == 1 << predicate_false_condition) + { + gcc_checking_assert (!p2->clause[1]); + return *p; + } + if (p->clause[0] == 1 << predicate_false_condition) + { + gcc_checking_assert (!p->clause[1]); + return *p2; + } + for (i = 0; p->clause[i]; i++) + for (j = 0; p2->clause[j]; j++) + add_clause (&out, p->clause[i] | p2->clause[j]); + return out; +} + + +/* Return true if predicates are obviously equal. */ + +static inline bool +predicates_equal_p (struct predicate *p, struct predicate *p2) +{ + int i; + for (i = 0; p->clause[i]; i++) + if (p->clause[i] != p2->clause[i]) + return false; + return !p2->clause[i]; +} + + +/* Having partial truth assignment in POSSIBLE_TRUTHS, return false if predicate P + to be false. */ + +static bool +evaulate_predicate (struct predicate *p, clause_t possible_truths) +{ + int i; + + /* True remains true. */ + if (!p->clause[0]) + return true; + + /* See if we can find clause we can disprove. */ + for (i = 0; p->clause[i]; i++) + if (!(p->clause[i] & possible_truths)) + return false; + return true; +} + + +/* Dump conditional COND. */ + +static void +dump_condition (FILE *f, conditions conditions, int cond) +{ + condition *c; + if (cond == predicate_false_condition) + fprintf (f, "false"); + else if (cond == predicate_not_inlined_condition) + fprintf (f, "not inlined"); + else + { + c = VEC_index (condition, conditions, cond - predicate_first_dynamic_condition); + fprintf (f, "op%i", c->operand_num); + if (c->code == IS_NOT_CONSTANT) + { + fprintf (f, " not constant"); + return; + } + fprintf (f, " %s ", op_symbol_code (c->code)); + print_generic_expr (f, c->val, 1); + } +} + + +/* Dump clause CLAUSE. */ + +static void +dump_clause (FILE *f, conditions conds, clause_t clause) +{ + int i; + bool found = false; + fprintf (f, "("); + if (!clause) + fprintf (f, "true"); + for (i = 0; i < NUM_CONDITIONS; i++) + if (clause & (1 << i)) + { + if (found) + fprintf (f, " || "); + found = true; + dump_condition (f, conds, i); + } + fprintf (f, ")"); +} + + +/* Dump predicate PREDICATE. */ + +static void +dump_predicate (FILE *f, conditions conds, struct predicate *pred) +{ + int i; + if (!pred->clause[0]) + dump_clause (f, conds, 0); + else + for (i = 0; pred->clause[i]; i++) + { + if (i) + fprintf (f, " && "); + dump_clause (f, conds, pred->clause[i]); + } + fprintf (f, "\n"); +} + + +/* Record SIZE and TIME under condition PRED into the inline summary. */ + +static void +account_size_time (struct inline_summary *summary, int size, int time, struct predicate *pred) +{ + size_time_entry *e; + bool found = false; + int i; + + if (pred->clause[0] == (1 << predicate_false_condition)) + return; + + /* We need to create initial empty unconitional clause, but otherwie + we don't need to account empty times and sizes. */ + if (!size && !time && summary->conds) + return; + + /* Watch overflow that might result from insane profiles. */ + if (time > MAX_TIME * INLINE_TIME_SCALE) + time = MAX_TIME * INLINE_TIME_SCALE; + gcc_assert (time >= 0); + + for (i = 0; VEC_iterate (size_time_entry, summary->entry, i, e); i++) + if (predicates_equal_p (&e->predicate, pred)) + { + found = true; + break; + } + if (i == 32) + { + i = 0; + found = true; + e = VEC_index (size_time_entry, summary->entry, 0); + gcc_assert (!e->predicate.clause[0]); + } + if (dump_file && (dump_flags & TDF_DETAILS) && (time || size)) + { + fprintf (dump_file, "\t\tAccounting size:%3.2f, time:%3.2f on %spredicate:", + ((double)size) / INLINE_SIZE_SCALE, ((double)time) / INLINE_TIME_SCALE, + found ? "" : "new "); + dump_predicate (dump_file, summary->conds, pred); + } + if (!found) + { + struct size_time_entry new_entry; + new_entry.size = size; + new_entry.time = time; + new_entry.predicate = *pred; + VEC_safe_push (size_time_entry, gc, summary->entry, &new_entry); + } + else + { + e->size += size; + e->time += time; + if (e->time > MAX_TIME * INLINE_TIME_SCALE) + e->time = MAX_TIME * INLINE_TIME_SCALE; + } +} + + +/* Work out what conditions might be true at invocation of E. */ + +static clause_t +evaulate_conditions_for_edge (struct cgraph_edge *e, bool inline_p) +{ + clause_t clause = inline_p ? 0 : 1 << predicate_not_inlined_condition; + struct inline_summary *info = inline_summary (e->callee); + int i; + + if (ipa_node_params_vector && info->conds + /* FIXME: it seems that we forget to get argument count in some cases, + probaby for previously indirect edges or so. */ + && ipa_get_cs_argument_count (IPA_EDGE_REF (e))) + { + struct ipa_node_params *parms_info; + struct ipa_edge_args *args = IPA_EDGE_REF (e); + int i, count = ipa_get_cs_argument_count (args); + struct ipcp_lattice lat; + struct condition *c; + VEC (tree, heap) *known_vals = NULL; + + if (e->caller->global.inlined_to) + parms_info = IPA_NODE_REF (e->caller->global.inlined_to); + else + parms_info = IPA_NODE_REF (e->caller); + + VEC_safe_grow_cleared (tree, heap, known_vals, count); + for (i = 0; i < count; i++) + { + ipa_lattice_from_jfunc (parms_info, &lat, ipa_get_ith_jump_func (args, i)); + if (lat.type == IPA_CONST_VALUE) + VEC_replace (tree, known_vals, i, lat.constant); + } + for (i = 0; VEC_iterate (condition, info->conds, i, c); i++) + { + tree val = VEC_index (tree, known_vals, c->operand_num); + tree res; + + if (!val) + { + clause |= 1 << (i + predicate_first_dynamic_condition); + continue; + } + if (c->code == IS_NOT_CONSTANT) + continue; + res = fold_binary_to_constant (c->code, boolean_type_node, val, c->val); + if (res + && integer_zerop (res)) + continue; + clause |= 1 << (i + predicate_first_dynamic_condition); + } + VEC_free (tree, heap, known_vals); + } + else + for (i = 0; i < (int)VEC_length (condition, info->conds); i++) + clause |= 1 << (i + predicate_first_dynamic_condition); + + return clause; +} + /* Allocate the inline summary vector or resize it to cover all cgraph nodes. */ @@ -91,7 +503,7 @@ inline_summary_alloc (void) if (VEC_length (inline_summary_t, inline_summary_vec) <= (unsigned) cgraph_max_uid) - VEC_safe_grow_cleared (inline_summary_t, heap, + VEC_safe_grow_cleared (inline_summary_t, gc, inline_summary_vec, cgraph_max_uid + 1); } @@ -105,7 +517,11 @@ inline_node_removal_hook (struct cgraph_node *node, void *data ATTRIBUTE_UNUSED) <= (unsigned)node->uid) return; info = inline_summary (node); - info->estimated_growth = INT_MIN; + reset_node_growth_cache (node); + VEC_free (condition, gc, info->conds); + VEC_free (size_time_entry, gc, info->entry); + info->conds = NULL; + info->entry = NULL; memset (info, 0, sizeof (inline_summary_t)); } @@ -120,15 +536,58 @@ inline_node_duplication_hook (struct cgraph_node *src, struct cgraph_node *dst, info = inline_summary (dst); memcpy (info, inline_summary (src), sizeof (struct inline_summary)); - info->estimated_growth = INT_MIN; + info->conds = VEC_copy (condition, gc, info->conds); + info->entry = VEC_copy (size_time_entry, gc, info->entry); +} + + +/* Keep edge cache consistent across edge removal. */ + +static void +inline_edge_removal_hook (struct cgraph_edge *edge, void *data ATTRIBUTE_UNUSED) +{ + reset_edge_growth_cache (edge); +} + + +/* Initialize growth caches. */ + +void +initialize_growth_caches (void) +{ + if (!edge_removal_hook_holder) + edge_removal_hook_holder = + cgraph_add_edge_removal_hook (&inline_edge_removal_hook, NULL); + if (cgraph_edge_max_uid) + VEC_safe_grow_cleared (edge_growth_cache_entry, heap, edge_growth_cache, + cgraph_edge_max_uid); + if (cgraph_max_uid) + VEC_safe_grow_cleared (int, heap, node_growth_cache, cgraph_max_uid); +} + + +/* Free growth caches. */ + +void +free_growth_caches (void) +{ + if (edge_removal_hook_holder) + cgraph_remove_edge_removal_hook (edge_removal_hook_holder); + VEC_free (edge_growth_cache_entry, heap, edge_growth_cache); + edge_growth_cache = 0; + VEC_free (int, heap, node_growth_cache); + node_growth_cache = 0; } + static void -dump_inline_summary (FILE *f, struct cgraph_node *node) +dump_inline_summary (FILE * f, struct cgraph_node *node) { if (node->analyzed) { struct inline_summary *s = inline_summary (node); + size_time_entry *e; + int i; fprintf (f, "Inline summary for %s/%i", cgraph_node_name (node), node->uid); if (DECL_DISREGARD_INLINE_LIMITS (node->decl)) @@ -137,16 +596,26 @@ dump_inline_summary (FILE *f, struct cgraph_node *node) fprintf (f, " inlinable"); if (s->versionable) fprintf (f, " versionable"); - fprintf (f, "\n self time: %i, benefit: %i\n", - s->self_time, s->time_inlining_benefit); + fprintf (f, "\n self time: %i\n", + s->self_time); fprintf (f, " global time: %i\n", s->time); - fprintf (f, " self size: %i, benefit: %i\n", - s->self_size, s->size_inlining_benefit); + fprintf (f, " self size: %i\n", + s->self_size); fprintf (f, " global size: %i\n", s->size); fprintf (f, " self stack: %i\n", - (int)s->estimated_self_stack_size); - fprintf (f, " global stack: %i\n\n", - (int)s->estimated_stack_size); + (int) s->estimated_self_stack_size); + fprintf (f, " global stack: %i\n", + (int) s->estimated_stack_size); + for (i = 0; + VEC_iterate (size_time_entry, s->entry, i, e); + i++) + { + fprintf (f, " size:%f, time:%f, predicate:", + (double) e->size / INLINE_SIZE_SCALE, + (double) e->time / INLINE_TIME_SCALE); + dump_predicate (f, s->conds, &e->predicate); + } + fprintf (f, "\n"); } } @@ -259,31 +728,164 @@ eliminated_by_inlining_prob (gimple stmt) } -/* Compute function body size parameters for NODE. */ +/* Return predicate that must be true when is E executable. */ + +static struct predicate +edge_execution_predicate (struct ipa_node_params *info, + struct inline_summary *summary, + edge e) +{ + struct predicate p = true_predicate (); + gimple last; + tree op; + int index; + enum tree_code code; + + if (e->src == ENTRY_BLOCK_PTR) + return p; + + last = last_stmt (e->src); + /* TODO: handle switch. */ + if (!last + || gimple_code (last) != GIMPLE_COND) + return p; + if (!is_gimple_ip_invariant (gimple_cond_rhs (last))) + return p; + op = gimple_cond_lhs (last); + /* TODO: handle conditionals like + var = op0 < 4; + if (var != 0) + and __bulitin_constant_p. */ + if (TREE_CODE (op) != SSA_NAME + || !SSA_NAME_IS_DEFAULT_DEF (op)) + return p; + index = ipa_get_param_decl_index (info, SSA_NAME_VAR (op)); + if (index == -1) + return p; + code = gimple_cond_code (last); + + if (EDGE_TRUE_VALUE) + code = invert_tree_comparison (code, + HONOR_NANS (TYPE_MODE (TREE_TYPE (op)))); + + return add_condition (summary, + index, + gimple_cond_code (last), + gimple_cond_rhs (last)); +} + +static struct predicate +will_be_nonconstant_predicate (struct ipa_node_params *info, + struct inline_summary *summary, + gimple stmt) +{ + struct predicate p = true_predicate (); + ssa_op_iter iter; + tree use; + struct predicate op_non_const; + + /* What statments might be optimized away + when their arguments are constant + TODO: also trivial builtins, especially builtin_constant_p. */ + if (gimple_code (stmt) != GIMPLE_ASSIGN + && gimple_code (stmt) != GIMPLE_COND + && gimple_code (stmt) != GIMPLE_SWITCH) + return p; + + /* Stores and loads will stay anyway. */ + if (gimple_vuse (stmt)) + return p; + + /* See if we understand all operands before we start + adding conditionals. */ + FOR_EACH_SSA_TREE_OPERAND (use, stmt, iter, SSA_OP_USE) + { + /* TODO: handle nested expressions and constant + array accesses. */ + if (TREE_CODE (use) != SSA_NAME + || !SSA_NAME_IS_DEFAULT_DEF (use) + || ipa_get_param_decl_index (info, SSA_NAME_VAR (use)) < 0) + return p; + } + op_non_const = false_predicate (); + FOR_EACH_SSA_TREE_OPERAND (use, stmt, iter, SSA_OP_USE) + { + p = add_condition (summary, + ipa_get_param_decl_index (info, SSA_NAME_VAR (use)), + IS_NOT_CONSTANT, NULL); + op_non_const = or_predicates (&p, &op_non_const); + } + return op_non_const; +} + + +/* Compute function body size parameters for NODE. + When EARLY is true, we compute only simple summaries without + non-trivial predicates to drive the early inliner. */ static void -estimate_function_body_sizes (struct cgraph_node *node) +estimate_function_body_sizes (struct cgraph_node *node, bool early) { gcov_type time = 0; - gcov_type time_inlining_benefit = 0; /* Estimate static overhead for function prologue/epilogue and alignment. */ int size = 2; /* Benefits are scaled by probability of elimination that is in range <0,2>. */ - int size_inlining_benefit = 2 * 2; basic_block bb; gimple_stmt_iterator bsi; struct function *my_function = DECL_STRUCT_FUNCTION (node->decl); int freq; + struct inline_summary *info = inline_summary (node); + struct predicate bb_predicate; + struct ipa_node_params *parms_info; + + parms_info = ipa_node_params_vector && !early ? IPA_NODE_REF (node) : NULL; + + info->conds = 0; + info->entry = 0; + if (dump_file) - fprintf (dump_file, "Analyzing function body size: %s\n", + fprintf (dump_file, "\nAnalyzing function body size: %s\n", cgraph_node_name (node)); + /* When we run into maximal number of entries, we assign everything to the + constant truth case. Be sure to have it in list. */ + bb_predicate = true_predicate (); + account_size_time (info, 0, 0, &bb_predicate); + + bb_predicate = not_inlined_predicate (); + account_size_time (info, 2 * INLINE_SIZE_SCALE, 0, &bb_predicate); + + gcc_assert (my_function && my_function->cfg); FOR_EACH_BB_FN (bb, my_function) { + edge e; + edge_iterator ei; + freq = compute_call_stmt_bb_frequency (node->decl, bb); + + /* TODO: Obviously predicates can be propagated down across CFG. */ + if (parms_info) + { + bb_predicate = false_predicate (); + FOR_EACH_EDGE (e, ei, bb->preds) + { + struct predicate ep; + ep = edge_execution_predicate (parms_info, info, e); + bb_predicate = or_predicates (&ep, &bb_predicate); + } + } + else + bb_predicate = true_predicate (); + + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "\n BB %i predicate:", bb->index); + dump_predicate (dump_file, info->conds, &bb_predicate); + } + for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi)) { gimple stmt = gsi_stmt (bsi); @@ -293,9 +895,10 @@ estimate_function_body_sizes (struct cgraph_node *node) if (dump_file && (dump_flags & TDF_DETAILS)) { - fprintf (dump_file, " freq:%6i size:%3i time:%3i ", - freq, this_size, this_time); + fprintf (dump_file, " "); print_gimple_stmt (dump_file, stmt, 0, 0); + fprintf (dump_file, "\t\tfreq:%3.2f size:%3i time:%3i\n", + ((double)freq)/CGRAPH_FREQ_BASE, this_size, this_time); } if (is_gimple_call (stmt)) @@ -304,8 +907,8 @@ estimate_function_body_sizes (struct cgraph_node *node) edge->call_stmt_size = this_size; edge->call_stmt_time = this_time; - /* Do not inline calls where we cannot triviall work around mismatches - in argument or return types. */ + /* Do not inline calls where we cannot triviall work around + mismatches in argument or return types. */ if (edge->callee && !gimple_check_call_matching_types (stmt, edge->callee->decl)) { @@ -316,46 +919,70 @@ estimate_function_body_sizes (struct cgraph_node *node) gcc_assert (!gimple_call_cannot_inline_p (stmt)); } - this_time *= freq; - time += this_time; - size += this_size; + if (this_time || this_size) + { + struct predicate will_be_nonconstant; + struct predicate p; + + this_time *= freq; + time += this_time; + size += this_size; - prob = eliminated_by_inlining_prob (stmt); - if (prob == 1 && dump_file && (dump_flags & TDF_DETAILS)) - fprintf (dump_file, " 50%% will be eliminated by inlining\n"); - if (prob == 2 && dump_file && (dump_flags & TDF_DETAILS)) - fprintf (dump_file, " will eliminated by inlining\n"); + prob = eliminated_by_inlining_prob (stmt); + if (prob == 1 && dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "\t\t50%% will be eliminated by inlining\n"); + if (prob == 2 && dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "\t\twill eliminated by inlining\n"); + + if (parms_info) + { + will_be_nonconstant + = will_be_nonconstant_predicate (parms_info, info, stmt); + p = and_predicates (&bb_predicate, &will_be_nonconstant); + } + else + p = true_predicate (); - size_inlining_benefit += this_size * prob; - time_inlining_benefit += this_time * prob; + /* We account everything but the calls. Calls have their own + size/time info attached to cgraph edges. This is neccesary + in order to make the cost disappear after inlining. */ + if (!is_gimple_call (stmt)) + { + if (prob) + { + struct predicate ip = not_inlined_predicate (); + ip = and_predicates (&ip, &p); + account_size_time (info, this_size * prob, + this_time * prob, &ip); + } + if (prob != 2) + account_size_time (info, this_size * (2 - prob), + this_time * (2 - prob), &p); + } - gcc_assert (time >= 0); - gcc_assert (size >= 0); + gcc_assert (time >= 0); + gcc_assert (size >= 0); + } } } time = (time + CGRAPH_FREQ_BASE / 2) / CGRAPH_FREQ_BASE; - time_inlining_benefit = ((time_inlining_benefit + CGRAPH_FREQ_BASE) - / (CGRAPH_FREQ_BASE * 2)); - size_inlining_benefit = (size_inlining_benefit + 1) / 2; - if (time_inlining_benefit > MAX_TIME) - time_inlining_benefit = MAX_TIME; if (time > MAX_TIME) time = MAX_TIME; - if (dump_file) - fprintf (dump_file, "Overall function body time: %i-%i size: %i-%i\n", - (int)time, (int)time_inlining_benefit, - size, size_inlining_benefit); inline_summary (node)->self_time = time; inline_summary (node)->self_size = size; - inline_summary (node)->time_inlining_benefit = time_inlining_benefit; - inline_summary (node)->size_inlining_benefit = size_inlining_benefit; + if (dump_file) + { + fprintf (dump_file, "\n"); + dump_inline_summary (dump_file, node); + } } -/* Compute parameters of functions used by inliner. */ +/* Compute parameters of functions used by inliner. + EARLY is true when we compute parameters for the early inliner */ void -compute_inline_parameters (struct cgraph_node *node) +compute_inline_parameters (struct cgraph_node *node, bool early) { HOST_WIDE_INT self_stack_size; struct cgraph_edge *e; @@ -389,12 +1016,11 @@ compute_inline_parameters (struct cgraph_node *node) break; node->local.can_change_signature = !e; } - estimate_function_body_sizes (node); + estimate_function_body_sizes (node, early); /* Inlining characteristics are maintained by the cgraph_mark_inline. */ info->time = info->self_time; info->size = info->self_size; - info->estimated_growth = INT_MIN; info->stack_frame_offset = 0; info->estimated_stack_size = info->estimated_self_stack_size; } @@ -406,7 +1032,7 @@ compute_inline_parameters (struct cgraph_node *node) static unsigned int compute_inline_parameters_for_current (void) { - compute_inline_parameters (cgraph_get_node (current_function_decl)); + compute_inline_parameters (cgraph_get_node (current_function_decl), true); return 0; } @@ -430,20 +1056,279 @@ struct gimple_opt_pass pass_inline_parameters = }; -/* Estimate the time cost for the caller when inlining EDGE. */ +/* Increase SIZE and TIME for size and time needed to handle edge E. */ + +static void +estimate_edge_size_and_time (struct cgraph_edge *e, int *size, int *time) +{ + *size += e->call_stmt_size * INLINE_SIZE_SCALE; + *time += (e->call_stmt_time + * e->frequency * (INLINE_TIME_SCALE / CGRAPH_FREQ_BASE)); + if (*time > MAX_TIME * INLINE_TIME_SCALE) + *time = MAX_TIME * INLINE_TIME_SCALE; +} + + +/* Increase SIZE and TIME for size and time needed to handle all calls in NODE. */ + +static void +estimate_calls_size_and_time (struct cgraph_node *node, int *size, int *time) +{ + struct cgraph_edge *e; + for (e = node->callees; e; e = e->next_callee) + if (e->inline_failed) + estimate_edge_size_and_time (e, size, time); + else + estimate_calls_size_and_time (e->callee, size, time); + /* TODO: look for devirtualizing oppurtunities. */ + for (e = node->indirect_calls; e; e = e->next_callee) + estimate_edge_size_and_time (e, size, time); +} + + +/* Estimate size and time needed to execute callee of EDGE assuming + that parameters known to be constant at caller of EDGE are + propagated. If INLINE_P is true, it is assumed that call will + be inlined. */ -static inline int -estimate_edge_time (struct cgraph_edge *edge) +static void +estimate_callee_size_and_time (struct cgraph_edge *edge, bool inline_p, + int *ret_size, int *ret_time) { - int call_stmt_time; struct inline_summary *info = inline_summary (edge->callee); + clause_t clause = evaulate_conditions_for_edge (edge, inline_p); + size_time_entry *e; + int size = 0, time = 0; + int i; + + if (dump_file + && (dump_flags & TDF_DETAILS)) + { + bool found = false; + fprintf (dump_file, " Estimating callee body: %s/%i\n" + " Known to be false: ", + cgraph_node_name (edge->callee), + edge->callee->uid); + + for (i = predicate_not_inlined_condition; + i < (predicate_first_dynamic_condition + + (int)VEC_length (condition, info->conds)); i++) + if (!(clause & (1 << i))) + { + if (found) + fprintf (dump_file, ", "); + found = true; + dump_condition (dump_file, info->conds, i); + } + } + + for (i = 0; VEC_iterate (size_time_entry, info->entry, i, e); i++) + if (evaulate_predicate (&e->predicate, clause)) + time += e->time, size += e->size; - call_stmt_time = edge->call_stmt_time; - gcc_checking_assert (call_stmt_time); - return (((gcov_type)info->time - - info->time_inlining_benefit - - call_stmt_time) * edge->frequency - + CGRAPH_FREQ_BASE / 2) / CGRAPH_FREQ_BASE; + if (time > MAX_TIME * INLINE_TIME_SCALE) + time = MAX_TIME * INLINE_TIME_SCALE; + + estimate_calls_size_and_time (edge->callee, &size, &time); + time = (time + INLINE_TIME_SCALE / 2) / INLINE_TIME_SCALE; + size = (size + INLINE_SIZE_SCALE / 2) / INLINE_SIZE_SCALE; + + + if (dump_file + && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "\n size:%i time:%i\n", size, time); + if (ret_time) + *ret_time = time; + if (ret_size) + *ret_size = size; + return; +} + + +/* Translate all conditions from callee representation into caller representaiton and + symbolically evaulate predicate P into new predicate. */ + +static struct predicate +remap_predicate (struct inline_summary *info, struct inline_summary *callee_info, + struct predicate *p, + VEC (int, heap) *operand_map, + clause_t possible_truths) +{ + int i; + struct predicate out = true_predicate (); + + /* True predicate is easy. */ + if (p->clause[0] == 0) + return *p; + for (i = 0; p->clause[i]; i++) + { + clause_t clause = p->clause[i]; + int cond; + struct predicate clause_predicate = false_predicate (); + + for (cond = 0; cond < NUM_CONDITIONS; cond ++) + /* Do we have condition we can't disprove? */ + if (clause & possible_truths & (1 << cond)) + { + struct predicate cond_predicate; + /* Work out if the condition can translate to predicate in the + inlined function. */ + if (cond >= predicate_first_dynamic_condition) + { + struct condition *c; + + c = VEC_index (condition, callee_info->conds, + cond - predicate_first_dynamic_condition); + /* See if we can remap condition operand to caller's operand. + Otherwise give up. */ + if (!operand_map + || VEC_index (int, operand_map, c->operand_num) == -1) + cond_predicate = true_predicate (); + else + cond_predicate = add_condition (info, + VEC_index (int, operand_map, + c->operand_num), + c->code, c->val); + } + /* Fixed conditions remains same, construct single + condition predicate. */ + else + { + cond_predicate.clause[0] = 1 << cond; + cond_predicate.clause[1] = 0; + } + clause_predicate = or_predicates (&clause_predicate, &cond_predicate); + } + out = and_predicates (&out, &clause_predicate); + } + return out; +} + + +/* We inlined EDGE. Update summary of the function we inlined into. */ + +void +inline_merge_summary (struct cgraph_edge *edge) +{ + struct inline_summary *callee_info = inline_summary (edge->callee); + struct cgraph_node *to = (edge->caller->global.inlined_to + ? edge->caller->global.inlined_to : edge->caller); + struct inline_summary *info = inline_summary (to); + clause_t clause = 0; /* not_inline is known to be false. */ + size_time_entry *e; + VEC (int, heap) *operand_map = NULL; + int i; + + if (ipa_node_params_vector && callee_info->conds + /* FIXME: it seems that we forget to get argument count in some cases, + probaby for previously indirect edges or so. + Removing the test leads to ICE on tramp3d. */ + && ipa_get_cs_argument_count (IPA_EDGE_REF (edge))) + { + struct ipa_edge_args *args = IPA_EDGE_REF (edge); + int count = ipa_get_cs_argument_count (args); + int i; + + clause = evaulate_conditions_for_edge (edge, true); + VEC_safe_grow_cleared (int, heap, operand_map, count); + for (i = 0; i < count; i++) + { + struct ipa_jump_func *jfunc = ipa_get_ith_jump_func (args, i); + int map = -1; + /* TODO: handle non-NOPs when merging. */ + if (jfunc->type == IPA_JF_PASS_THROUGH + && jfunc->value.pass_through.operation == NOP_EXPR) + map = jfunc->value.pass_through.formal_id; + VEC_replace (int, operand_map, i, map); + } + } + for (i = 0; VEC_iterate (size_time_entry, callee_info->entry, i, e); i++) + { + struct predicate p = remap_predicate (info, callee_info, + &e->predicate, operand_map, clause); + gcov_type add_time = ((gcov_type)e->time * edge->frequency + + CGRAPH_FREQ_BASE / 2) / CGRAPH_FREQ_BASE; + if (add_time > MAX_TIME) + add_time = MAX_TIME; + account_size_time (info, e->size, add_time, &p); + } + info->size = 0; + info->time = 0; + for (i = 0; VEC_iterate (size_time_entry, info->entry, i, e); i++) + info->size += e->size, info->time += e->time; + estimate_calls_size_and_time (to, &info->size, &info->time); + info->time = (info->time + INLINE_TIME_SCALE / 2) / INLINE_TIME_SCALE; + info->size = (info->size + INLINE_SIZE_SCALE / 2) / INLINE_SIZE_SCALE; +} + + +/* Estimate the time cost for the caller when inlining EDGE. + Only to be called via estimate_edge_time, that handles the + caching mechanism. + + When caching, also update the cache entry. Compute both time and + size, since we always need both metrics eventually. */ + +int +do_estimate_edge_time (struct cgraph_edge *edge) +{ + int time; + int size; + gcov_type ret; + + gcc_checking_assert (edge->inline_failed); + estimate_callee_size_and_time (edge, true, &size, &time); + + ret = (((gcov_type)time - edge->call_stmt_time) * edge->frequency + + CGRAPH_FREQ_BASE / 2) / CGRAPH_FREQ_BASE; + if (ret > MAX_TIME) + ret = MAX_TIME; + + /* When caching, update the cache entry. */ + if (edge_growth_cache) + { + int ret_size; + if ((int)VEC_length (edge_growth_cache_entry, edge_growth_cache) + <= edge->uid) + VEC_safe_grow_cleared (edge_growth_cache_entry, heap, edge_growth_cache, + cgraph_edge_max_uid); + VEC_index (edge_growth_cache_entry, edge_growth_cache, edge->uid)->time + = ret + (ret >= 0); + + ret_size = size - edge->call_stmt_size; + gcc_checking_assert (edge->call_stmt_size); + VEC_index (edge_growth_cache_entry, edge_growth_cache, edge->uid)->size + = ret_size + (ret_size >= 0); + } + return ret; +} + + +/* Estimate the growth of the caller when inlining EDGE. + Only to be called via estimate_edge_size. */ + +int +do_estimate_edge_growth (struct cgraph_edge *edge) +{ + int size; + + /* When we do caching, use do_estimate_edge_time to populate the entry. */ + + if (edge_growth_cache) + { + do_estimate_edge_time (edge); + size = VEC_index (edge_growth_cache_entry, + edge_growth_cache, + edge->uid)->size; + gcc_checking_assert (size); + return size - (size > 0); + } + + /* Early inliner runs without caching, go ahead and do the dirty work. */ + gcc_checking_assert (edge->inline_failed); + estimate_callee_size_and_time (edge, true, &size, NULL); + gcc_checking_assert (edge->call_stmt_size); + return size - edge->call_stmt_size; } @@ -478,16 +1363,13 @@ estimate_size_after_inlining (struct cgraph_node *node, /* Estimate the growth caused by inlining NODE into all callees. */ int -estimate_growth (struct cgraph_node *node) +do_estimate_growth (struct cgraph_node *node) { int growth = 0; struct cgraph_edge *e; bool self_recursive = false; struct inline_summary *info = inline_summary (node); - if (info->estimated_growth != INT_MIN) - return info->estimated_growth; - for (e = node->callers; e; e = e->next_caller) { gcc_checking_assert (e->inline_failed); @@ -519,7 +1401,12 @@ estimate_growth (struct cgraph_node *node) * (100 - PARAM_VALUE (PARAM_COMDAT_SHARING_PROBABILITY)) + 50) / 100; } - info->estimated_growth = growth; + if (node_growth_cache) + { + if ((int)VEC_length (int, node_growth_cache) <= node->uid) + VEC_safe_grow_cleared (int, heap, node_growth_cache, cgraph_max_uid); + VEC_replace (int, node_growth_cache, node->uid, growth + (growth >= 0)); + } return growth; } @@ -547,11 +1434,14 @@ inline_analyze_function (struct cgraph_node *node) push_cfun (DECL_STRUCT_FUNCTION (node->decl)); current_function_decl = node->decl; - compute_inline_parameters (node); + if (dump_file) + fprintf (dump_file, "\nAnalyzing function: %s/%u\n", + cgraph_node_name (node), node->uid); /* FIXME: We should remove the optimize check after we ensure we never run IPA passes when not optimizing. */ if (flag_indirect_inlining && optimize) inline_indirect_intraprocedural_analysis (node); + compute_inline_parameters (node, false); current_function_decl = NULL; pop_cfun (); @@ -586,6 +1476,87 @@ inline_generate_summary (void) } +/* Stream in inline summaries from the section. */ + +static void +inline_read_section (struct lto_file_decl_data *file_data, const char *data, + size_t len) +{ + const struct lto_function_header *header = + (const struct lto_function_header *) data; + const int32_t cfg_offset = sizeof (struct lto_function_header); + const int32_t main_offset = cfg_offset + header->cfg_size; + const int32_t string_offset = main_offset + header->main_size; + struct data_in *data_in; + struct lto_input_block ib; + unsigned int i, count2, j; + unsigned int f_count; + + LTO_INIT_INPUT_BLOCK (ib, (const char *) data + main_offset, 0, + header->main_size); + + data_in = + lto_data_in_create (file_data, (const char *) data + string_offset, + header->string_size, NULL); + f_count = lto_input_uleb128 (&ib); + for (i = 0; i < f_count; i++) + { + unsigned int index; + struct cgraph_node *node; + struct inline_summary *info; + lto_cgraph_encoder_t encoder; + struct bitpack_d bp; + + index = lto_input_uleb128 (&ib); + encoder = file_data->cgraph_node_encoder; + node = lto_cgraph_encoder_deref (encoder, index); + info = inline_summary (node); + + info->estimated_stack_size + = info->estimated_self_stack_size = lto_input_uleb128 (&ib); + info->size = info->self_size = lto_input_uleb128 (&ib); + info->time = info->self_time = lto_input_uleb128 (&ib); + + bp = lto_input_bitpack (&ib); + info->inlinable = bp_unpack_value (&bp, 1); + info->versionable = bp_unpack_value (&bp, 1); + + count2 = lto_input_uleb128 (&ib); + gcc_assert (!info->conds); + for (j = 0; j < count2; j++) + { + struct condition c; + c.operand_num = lto_input_uleb128 (&ib); + c.code = (enum tree_code) lto_input_uleb128 (&ib); + c.val = lto_input_tree (&ib, data_in); + VEC_safe_push (condition, gc, info->conds, &c); + } + count2 = lto_input_uleb128 (&ib); + gcc_assert (!info->entry); + for (j = 0; j < count2; j++) + { + struct size_time_entry e; + clause_t clause; + int k = 0; + + e.size = lto_input_uleb128 (&ib); + e.time = lto_input_uleb128 (&ib); + do + { + clause = e.predicate.clause[k++] = lto_input_uleb128 (&ib); + } + while (clause); + + VEC_safe_push (size_time_entry, gc, info->entry, &e); + } + } + + lto_free_section_data (file_data, LTO_section_inline_summary, NULL, data, + len); + lto_data_in_delete (data_in); +} + + /* Read inline summary. Jump functions are shared among ipa-cp and inliner, so when ipa-cp is active, we don't need to write them twice. */ @@ -603,46 +1574,8 @@ inline_read_summary (void) { size_t len; const char *data = lto_get_section_data (file_data, LTO_section_inline_summary, NULL, &len); - - struct lto_input_block *ib - = lto_create_simple_input_block (file_data, - LTO_section_inline_summary, - &data, &len); - if (ib) - { - unsigned int i; - unsigned int f_count = lto_input_uleb128 (ib); - - for (i = 0; i < f_count; i++) - { - unsigned int index; - struct cgraph_node *node; - struct inline_summary *info; - lto_cgraph_encoder_t encoder; - struct bitpack_d bp; - - index = lto_input_uleb128 (ib); - encoder = file_data->cgraph_node_encoder; - node = lto_cgraph_encoder_deref (encoder, index); - info = inline_summary (node); - - info->estimated_stack_size - = info->estimated_self_stack_size = lto_input_uleb128 (ib); - info->size = info->self_size = lto_input_uleb128 (ib); - info->size_inlining_benefit = lto_input_uleb128 (ib); - info->time = info->self_time = lto_input_uleb128 (ib); - info->time_inlining_benefit = lto_input_uleb128 (ib); - info->estimated_growth = INT_MIN; - - bp = lto_input_bitpack (ib); - info->inlinable = bp_unpack_value (&bp, 1); - info->versionable = bp_unpack_value (&bp, 1); - } - - lto_destroy_simple_input_block (file_data, - LTO_section_inline_summary, - ib, data, len); - } + if (data) + inline_read_section (file_data, data, len); else /* Fatal error here. We do not want to support compiling ltrans units with different version of compiler or different flags than the WPA unit, so @@ -669,8 +1602,7 @@ inline_write_summary (cgraph_node_set set, varpool_node_set vset ATTRIBUTE_UNUSED) { struct cgraph_node *node; - struct lto_simple_output_block *ob - = lto_create_simple_output_block (LTO_section_inline_summary); + struct output_block *ob = create_output_block (LTO_section_inline_summary); lto_cgraph_encoder_t encoder = ob->decl_state->cgraph_node_encoder; unsigned int count = 0; int i; @@ -687,6 +1619,10 @@ inline_write_summary (cgraph_node_set set, { struct inline_summary *info = inline_summary (node); struct bitpack_d bp; + int i; + size_time_entry *e; + struct condition *c; + lto_output_uleb128_stream (ob->main_stream, lto_cgraph_encoder_encode (encoder, node)); @@ -695,18 +1631,42 @@ inline_write_summary (cgraph_node_set set, lto_output_sleb128_stream (ob->main_stream, info->self_size); lto_output_sleb128_stream (ob->main_stream, - info->size_inlining_benefit); - lto_output_sleb128_stream (ob->main_stream, info->self_time); - lto_output_sleb128_stream (ob->main_stream, - info->time_inlining_benefit); bp = bitpack_create (ob->main_stream); bp_pack_value (&bp, info->inlinable, 1); bp_pack_value (&bp, info->versionable, 1); lto_output_bitpack (&bp); + lto_output_uleb128_stream (ob->main_stream, + VEC_length (condition, info->conds)); + for (i = 0; VEC_iterate (condition, info->conds, i, c); i++) + { + lto_output_uleb128_stream (ob->main_stream, + c->operand_num); + lto_output_uleb128_stream (ob->main_stream, + c->code); + lto_output_tree (ob, c->val, true); + } + lto_output_uleb128_stream (ob->main_stream, + VEC_length (size_time_entry, info->entry)); + for (i = 0; + VEC_iterate (size_time_entry, info->entry, i, e); + i++) + { + int j; + lto_output_uleb128_stream (ob->main_stream, + e->time); + lto_output_uleb128_stream (ob->main_stream, + e->size); + for (j = 0; e->predicate.clause[j]; j++) + lto_output_uleb128_stream (ob->main_stream, + e->predicate.clause[j]); + lto_output_uleb128_stream (ob->main_stream, 0); + } } } - lto_destroy_simple_output_block (ob); + lto_output_1_stream (ob->main_stream, 0); + produce_asm (ob, NULL); + destroy_output_block (ob); if (flag_indirect_inlining && !flag_ipa_cp) ipa_prop_write_jump_functions (set); @@ -727,5 +1687,6 @@ inline_free_summary (void) if (node_duplication_hook_holder) cgraph_remove_node_duplication_hook (node_duplication_hook_holder); node_duplication_hook_holder = NULL; - VEC_free (inline_summary_t, heap, inline_summary_vec); + VEC_free (inline_summary_t, gc, inline_summary_vec); + inline_summary_vec = NULL; } diff --git a/gcc/ipa-inline-transform.c b/gcc/ipa-inline-transform.c index 25ad84a..1fdb6d0 100644 --- a/gcc/ipa-inline-transform.c +++ b/gcc/ipa-inline-transform.c @@ -175,7 +175,6 @@ inline_call (struct cgraph_edge *e, bool update_original, int old_size = 0, new_size = 0; struct cgraph_node *to = NULL; struct cgraph_edge *curr = e; - struct inline_summary *info; /* Don't inline inlined edges. */ gcc_assert (e->inline_failed); @@ -185,18 +184,15 @@ inline_call (struct cgraph_edge *e, bool update_original, e->inline_failed = CIF_OK; DECL_POSSIBLY_INLINED (e->callee->decl) = true; + to = e->caller; + if (to->global.inlined_to) + to = to->global.inlined_to; + old_size = inline_summary (to)->size; + inline_merge_summary (e); + new_size = inline_summary (to)->size; + clone_inlined_nodes (e, true, update_original, overall_size); - /* Now update size of caller and all functions caller is inlined into. */ - for (;e && !e->inline_failed; e = e->caller->callers) - { - to = e->caller; - info = inline_summary (to); - old_size = info->size; - new_size = estimate_size_after_inlining (to, curr); - info->size = new_size; - info->time = estimate_time_after_inlining (to, curr); - } gcc_assert (curr->callee->global.inlined_to == to); if (overall_size && new_size > old_size) *overall_size += new_size - old_size; diff --git a/gcc/ipa-inline.c b/gcc/ipa-inline.c index 48d3898..02cc773 100644 --- a/gcc/ipa-inline.c +++ b/gcc/ipa-inline.c @@ -117,7 +117,7 @@ along with GCC; see the file COPYING3. If not see /* Statistics we collect about inlining algorithm. */ static int overall_size; -static gcov_type max_count, max_benefit; +static gcov_type max_count; /* Return false when inlining edge E would lead to violating limits on function unit growth or stack usage growth. @@ -633,27 +633,23 @@ static int edge_badness (struct cgraph_edge *edge, bool dump) { gcov_type badness; - int growth; + int growth, time_growth; struct inline_summary *callee_info = inline_summary (edge->callee); if (DECL_DISREGARD_INLINE_LIMITS (edge->callee->decl)) return INT_MIN; growth = estimate_edge_growth (edge); + time_growth = estimate_edge_time (edge); if (dump) { fprintf (dump_file, " Badness calculation for %s -> %s\n", cgraph_node_name (edge->caller), cgraph_node_name (edge->callee)); - fprintf (dump_file, " growth %i, time %i-%i, size %i-%i\n", + fprintf (dump_file, " growth size %i, time %i\n", growth, - callee_info->time, - callee_info->time_inlining_benefit - + edge->call_stmt_time, - callee_info->size, - callee_info->size_inlining_benefit - + edge->call_stmt_size); + time_growth); } /* Always prefer inlining saving code size. */ @@ -669,11 +665,16 @@ edge_badness (struct cgraph_edge *edge, bool dump) So we optimize for overall number of "executed" inlined calls. */ else if (max_count) { + int benefitperc; + benefitperc = (((gcov_type)callee_info->time + * edge->frequency / CGRAPH_FREQ_BASE - time_growth) * 100 + / (callee_info->time + 1) + 1); + benefitperc = MIN (benefitperc, 100); + benefitperc = MAX (benefitperc, 0); badness = ((int) - ((double) edge->count * INT_MIN / max_count / (max_benefit + 1)) * - (callee_info->time_inlining_benefit - + edge->call_stmt_time + 1)) / growth; + ((double) edge->count * INT_MIN / max_count / 100) * + benefitperc) / growth; /* Be sure that insanity of the profile won't lead to increasing counts in the scalling and thus to overflow in the computation above. */ @@ -685,9 +686,7 @@ edge_badness (struct cgraph_edge *edge, bool dump) " * Relative benefit %f\n", (int) badness, (double) badness / INT_MIN, (double) edge->count / max_count, - (double) (inline_summary (edge->callee)-> - time_inlining_benefit - + edge->call_stmt_time + 1) / (max_benefit + 1)); + (double) benefitperc); } } @@ -706,11 +705,11 @@ edge_badness (struct cgraph_edge *edge, bool dump) int benefitperc; int growth_for_all; badness = growth * 10000; - benefitperc = - 100 * (callee_info->time_inlining_benefit - + edge->call_stmt_time) - / (callee_info->time + 1) + 1; + benefitperc = (((gcov_type)callee_info->time + * edge->frequency / CGRAPH_FREQ_BASE - time_growth) * 100 + / (callee_info->time + 1) + 1); benefitperc = MIN (benefitperc, 100); + benefitperc = MAX (benefitperc, 0); div *= benefitperc; /* Decrease badness if call is nested. */ @@ -822,7 +821,7 @@ update_caller_keys (fibheap_t heap, struct cgraph_node *node, return; if (!bitmap_set_bit (updated_nodes, node->uid)) return; - inline_summary (node)->estimated_growth = INT_MIN; + reset_node_growth_cache (node); /* See if there is something to do. */ for (edge = node->callers; edge; edge = edge->next_caller) @@ -834,6 +833,7 @@ update_caller_keys (fibheap_t heap, struct cgraph_node *node, for (; edge; edge = edge->next_caller) if (edge->inline_failed) { + reset_edge_growth_cache (edge); if (can_inline_edge_p (edge, false) && want_inline_small_function_p (edge, false)) update_edge_key (heap, edge); @@ -857,7 +857,7 @@ update_callee_keys (fibheap_t heap, struct cgraph_node *node, { struct cgraph_edge *e = node->callees; - inline_summary (node)->estimated_growth = INT_MIN; + reset_node_growth_cache (node); if (!e) return; @@ -866,12 +866,13 @@ update_callee_keys (fibheap_t heap, struct cgraph_node *node, e = e->callee->callees; else { + reset_edge_growth_cache (e); if (e->inline_failed && inline_summary (e->callee)->inlinable && cgraph_function_body_availability (e->callee) >= AVAIL_AVAILABLE && !bitmap_bit_p (updated_nodes, e->callee->uid)) { - inline_summary (node)->estimated_growth = INT_MIN; + reset_node_growth_cache (node); update_edge_key (heap, e); } if (e->next_callee) @@ -899,7 +900,7 @@ update_all_callee_keys (fibheap_t heap, struct cgraph_node *node, { struct cgraph_edge *e = node->callees; - inline_summary (node)->estimated_growth = INT_MIN; + reset_node_growth_cache (node); if (!e) return; @@ -1131,7 +1132,7 @@ inline_small_functions (void) metrics. */ max_count = 0; - max_benefit = 0; + initialize_growth_caches (); for (node = cgraph_nodes; node; node = node->next) if (node->analyzed @@ -1139,20 +1140,12 @@ inline_small_functions (void) { struct inline_summary *info = inline_summary (node); - info->estimated_growth = INT_MIN; - if (!DECL_EXTERNAL (node->decl)) initial_size += info->size; for (edge = node->callers; edge; edge = edge->next_caller) - { - int benefit = (info->time_inlining_benefit - + edge->call_stmt_time); - if (max_count < edge->count) - max_count = edge->count; - if (max_benefit < benefit) - max_benefit = benefit; - } + if (max_count < edge->count) + max_count = edge->count; } overall_size = initial_size; @@ -1354,14 +1347,15 @@ inline_small_functions (void) } } + free_growth_caches (); if (new_indirect_edges) VEC_free (cgraph_edge_p, heap, new_indirect_edges); fibheap_delete (heap); if (dump_file) fprintf (dump_file, "Unit growth for small function inlining: %i->%i (%i%%)\n", - overall_size, initial_size, - overall_size * 100 / (initial_size + 1) - 100); + initial_size, overall_size, + initial_size ? overall_size * 100 / (initial_size) - 100: 0); BITMAP_FREE (updated_nodes); } @@ -1369,7 +1363,7 @@ inline_small_functions (void) at IPA inlining time. */ static void -flatten_function (struct cgraph_node *node) +flatten_function (struct cgraph_node *node, bool early) { struct cgraph_edge *e; @@ -1398,14 +1392,14 @@ flatten_function (struct cgraph_node *node) it in order to fully flatten the leaves. */ if (!e->inline_failed) { - flatten_function (e->callee); + flatten_function (e->callee, early); continue; } /* Flatten attribute needs to be processed during late inlining. For extra code quality we however do flattening during early optimization, too. */ - if (cgraph_state != CGRAPH_STATE_IPA_SSA + if (!early ? !can_inline_edge_p (e, true) : !can_early_inline_edge_p (e)) continue; @@ -1435,7 +1429,7 @@ flatten_function (struct cgraph_node *node) inline_call (e, true, NULL, NULL); if (e->callee != orig_callee) orig_callee->aux = (void *) node; - flatten_function (e->callee); + flatten_function (e->callee, early); if (e->callee != orig_callee) orig_callee->aux = NULL; } @@ -1488,7 +1482,7 @@ ipa_inline (void) if (dump_file) fprintf (dump_file, "Flattening %s\n", cgraph_node_name (node)); - flatten_function (node); + flatten_function (node, false); } } @@ -1696,7 +1690,7 @@ early_inliner (void) if (dump_file) fprintf (dump_file, "Flattening %s\n", cgraph_node_name (node)); - flatten_function (node); + flatten_function (node, true); inlined = true; } else diff --git a/gcc/ipa-inline.h b/gcc/ipa-inline.h index 48415e6..4fe4489 100644 --- a/gcc/ipa-inline.h +++ b/gcc/ipa-inline.h @@ -19,9 +19,60 @@ You should have received a copy of the GNU General Public License along with GCC; see the file COPYING3. If not see . */ -/* Function inlining information. */ +/* Representation of inline parameters that do depend on context function is + inlined into (i.e. known constant values of function parameters. + + Conditions that are interesting for function body are collected into CONDS + vector. They are of simple for function_param OP VAL, where VAL is + IPA invariant. The conditions are then refered by predicates. */ + +typedef struct GTY(()) condition + { + tree val; + int operand_num; + enum tree_code code; + } condition; + +DEF_VEC_O (condition); +DEF_VEC_ALLOC_O (condition, gc); + +typedef VEC(condition,gc) *conditions; + +/* Representation of predicates i.e. formulas using conditions defined + above. Predicates are simple logical formulas in conjunctive-disjunctive + form. + + Predicate is array of clauses terminated by 0. Every clause must be true + in order to make predicate true. + Clauses are represented as bitmaps of conditions. One of conditions + must be true in order for clause to be true. */ + +#define MAX_CLAUSES 8 +typedef int clause_t; +struct GTY(()) predicate +{ + clause_t clause[MAX_CLAUSES + 1]; +}; + +/* Represnetation of function body size and time depending on the inline + context. We keep simple array of record, every containing of predicate + and time/size to account. -struct inline_summary + We keep values scaled up, so fractional sizes and times can be + accounted. */ +#define INLINE_SIZE_SCALE 2 +#define INLINE_TIME_SCALE (CGRAPH_FREQ_BASE * 2) +typedef struct GTY(()) size_time_entry +{ + struct predicate predicate; + int size; + int time; +} size_time_entry; +DEF_VEC_O (size_time_entry); +DEF_VEC_ALLOC_O (size_time_entry, gc); + +/* Function inlining information. */ +struct GTY(()) inline_summary { /* Information about the function body itself. */ @@ -29,12 +80,8 @@ struct inline_summary HOST_WIDE_INT estimated_self_stack_size; /* Size of the function body. */ int self_size; - /* How many instructions are likely going to disappear after inlining. */ - int size_inlining_benefit; - /* Estimated time spent executing the function body. */ + /* Time of the function body. */ int self_time; - /* How much time is going to be saved by inlining. */ - int time_inlining_benefit; /* False when there something makes inlining impossible (such as va_arg). */ unsigned inlinable : 1; @@ -53,15 +100,25 @@ struct inline_summary /* Estimated size of the function after inlining. */ int time; int size; - /* Cached estimated growth after inlining. - INT_MIN if not computed. */ - int estimated_growth; + + conditions conds; + VEC(size_time_entry,gc) *entry; }; typedef struct inline_summary inline_summary_t; DEF_VEC_O(inline_summary_t); -DEF_VEC_ALLOC_O(inline_summary_t,heap); -extern VEC(inline_summary_t,heap) *inline_summary_vec; +DEF_VEC_ALLOC_O(inline_summary_t,gc); +extern GTY(()) VEC(inline_summary_t,gc) *inline_summary_vec; + +typedef struct edge_growth_cache_entry +{ + int time, size; +} edge_growth_cache_entry; +DEF_VEC_O(edge_growth_cache_entry); +DEF_VEC_ALLOC_O(edge_growth_cache_entry,heap); + +extern VEC(int,heap) *node_growth_cache; +extern VEC(edge_growth_cache_entry,heap) *edge_growth_cache; /* In ipa-inline-analysis.c */ void debug_inline_summary (struct cgraph_node *); @@ -73,7 +130,13 @@ void inline_free_summary (void); void initialize_inline_failed (struct cgraph_edge *); int estimate_time_after_inlining (struct cgraph_node *, struct cgraph_edge *); int estimate_size_after_inlining (struct cgraph_node *, struct cgraph_edge *); -int estimate_growth (struct cgraph_node *); +int do_estimate_growth (struct cgraph_node *); +void inline_merge_summary (struct cgraph_edge *edge); +int do_estimate_edge_growth (struct cgraph_edge *edge); +int do_estimate_edge_time (struct cgraph_edge *edge); +void initialize_growth_caches (void); +void free_growth_caches (void); +void compute_inline_parameters (struct cgraph_node *, bool); /* In ipa-inline-transform.c */ bool inline_call (struct cgraph_edge *, bool, VEC (cgraph_edge_p, heap) **, int *); @@ -89,16 +152,71 @@ inline_summary (struct cgraph_node *node) return VEC_index (inline_summary_t, inline_summary_vec, node->uid); } -/* Estimate the growth of the caller when inlining EDGE. */ + +/* Return estimated unit growth after inlning all calls to NODE. + Quick accesors to the inline growth caches. + For convenience we keep zero 0 as unknown. Because growth + can be both positive and negative, we simply increase positive + growths by 1. */ +static inline int +estimate_growth (struct cgraph_node *node) +{ + int ret; + if ((int)VEC_length (int, node_growth_cache) <= node->uid + || !(ret = VEC_index (int, node_growth_cache, node->uid))) + return do_estimate_growth (node); + return ret - (ret > 0); +} + + +/* Return estimated callee growth after inlining EDGE. */ static inline int estimate_edge_growth (struct cgraph_edge *edge) { - int call_stmt_size; - struct inline_summary *info = inline_summary (edge->callee); - call_stmt_size = edge->call_stmt_size; - gcc_checking_assert (call_stmt_size); - return (info->size - - info->size_inlining_benefit - - call_stmt_size); + int ret; + if ((int)VEC_length (edge_growth_cache_entry, edge_growth_cache) <= edge->uid + || !(ret = VEC_index (edge_growth_cache_entry, + edge_growth_cache, + edge->uid)->size)) + return do_estimate_edge_growth (edge); + return ret - (ret > 0); +} + + +/* Return estimated callee runtime increase after inlning + EDGE. */ + +static inline int +estimate_edge_time (struct cgraph_edge *edge) +{ + int ret; + if ((int)VEC_length (edge_growth_cache_entry, edge_growth_cache) <= edge->uid + || !(ret = VEC_index (edge_growth_cache_entry, + edge_growth_cache, + edge->uid)->size)) + return do_estimate_edge_time (edge); + return ret - (ret > 0); +} + + +/* Reset cached value for NODE. */ + +static inline void +reset_node_growth_cache (struct cgraph_node *node) +{ + if ((int)VEC_length (int, node_growth_cache) > node->uid) + VEC_replace (int, node_growth_cache, node->uid, 0); +} + +/* Reset cached value for EDGE. */ + +static inline void +reset_edge_growth_cache (struct cgraph_edge *edge) +{ + if ((int)VEC_length (edge_growth_cache_entry, edge_growth_cache) > edge->uid) + { + struct edge_growth_cache_entry zero = {0, 0}; + VEC_replace (edge_growth_cache_entry, edge_growth_cache, edge->uid, &zero); + } } diff --git a/gcc/ipa-prop.c b/gcc/ipa-prop.c index d738654..afec188 100644 --- a/gcc/ipa-prop.c +++ b/gcc/ipa-prop.c @@ -126,7 +126,7 @@ ipa_pop_func_from_list (struct ipa_func_list **wl) /* Return index of the formal whose tree is PTREE in function which corresponds to INFO. */ -static int +int ipa_get_param_decl_index (struct ipa_node_params *info, tree ptree) { int i, count; @@ -2986,3 +2986,66 @@ ipa_update_after_lto_read (void) ipa_set_called_with_variable_arg (IPA_NODE_REF (cs->callee)); } } + +/* Given the jump function JFUNC, compute the lattice LAT that describes the + value coming down the callsite. INFO describes the caller node so that + pass-through jump functions can be evaluated. */ +void +ipa_lattice_from_jfunc (struct ipa_node_params *info, struct ipcp_lattice *lat, + struct ipa_jump_func *jfunc) +{ + if (jfunc->type == IPA_JF_CONST) + { + lat->type = IPA_CONST_VALUE; + lat->constant = jfunc->value.constant; + } + else if (jfunc->type == IPA_JF_PASS_THROUGH) + { + struct ipcp_lattice *caller_lat; + tree cst; + + caller_lat = ipa_get_lattice (info, jfunc->value.pass_through.formal_id); + lat->type = caller_lat->type; + if (caller_lat->type != IPA_CONST_VALUE) + return; + cst = caller_lat->constant; + + if (jfunc->value.pass_through.operation != NOP_EXPR) + { + tree restype; + if (TREE_CODE_CLASS (jfunc->value.pass_through.operation) + == tcc_comparison) + restype = boolean_type_node; + else + restype = TREE_TYPE (cst); + cst = fold_binary (jfunc->value.pass_through.operation, + restype, cst, jfunc->value.pass_through.operand); + } + if (!cst || !is_gimple_ip_invariant (cst)) + lat->type = IPA_BOTTOM; + lat->constant = cst; + } + else if (jfunc->type == IPA_JF_ANCESTOR) + { + struct ipcp_lattice *caller_lat; + tree t; + + caller_lat = ipa_get_lattice (info, jfunc->value.ancestor.formal_id); + lat->type = caller_lat->type; + if (caller_lat->type != IPA_CONST_VALUE) + return; + if (TREE_CODE (caller_lat->constant) != ADDR_EXPR) + { + /* This can happen when the constant is a NULL pointer. */ + lat->type = IPA_BOTTOM; + return; + } + t = TREE_OPERAND (caller_lat->constant, 0); + t = build_ref_for_offset (EXPR_LOCATION (t), t, + jfunc->value.ancestor.offset, + jfunc->value.ancestor.type, NULL, false); + lat->constant = build_fold_addr_expr (t); + } + else + lat->type = IPA_BOTTOM; +} diff --git a/gcc/ipa-prop.h b/gcc/ipa-prop.h index 9244d5a..3ae0d1b 100644 --- a/gcc/ipa-prop.h +++ b/gcc/ipa-prop.h @@ -515,9 +515,20 @@ void ipa_dump_param_adjustments (FILE *, ipa_parm_adjustment_vec, tree); void ipa_prop_write_jump_functions (cgraph_node_set set); void ipa_prop_read_jump_functions (void); void ipa_update_after_lto_read (void); +int ipa_get_param_decl_index (struct ipa_node_params *, tree); +void ipa_lattice_from_jfunc (struct ipa_node_params *info, struct ipcp_lattice *lat, + struct ipa_jump_func *jfunc); /* From tree-sra.c: */ tree build_ref_for_offset (location_t, tree, HOST_WIDE_INT, tree, gimple_stmt_iterator *, bool); +/* Return the lattice corresponding to the Ith formal parameter of the function + described by INFO. */ +static inline struct ipcp_lattice * +ipa_get_lattice (struct ipa_node_params *info, int i) +{ + return &(info->params[i].ipcp_lattice); +} + #endif /* IPA_PROP_H */ diff --git a/gcc/ipa-split.c b/gcc/ipa-split.c index 47109ab..9579d41 100644 --- a/gcc/ipa-split.c +++ b/gcc/ipa-split.c @@ -1254,7 +1254,7 @@ split_function (struct split_point *split_point) } free_dominance_info (CDI_DOMINATORS); free_dominance_info (CDI_POST_DOMINATORS); - compute_inline_parameters (node); + compute_inline_parameters (node, true); } /* Execute function splitting pass. */ diff --git a/gcc/params.def b/gcc/params.def index 5e8be21..fa89a52 100644 --- a/gcc/params.def +++ b/gcc/params.def @@ -188,7 +188,7 @@ DEFPARAM(PARAM_IPCP_UNIT_GROWTH, DEFPARAM(PARAM_EARLY_INLINING_INSNS, "early-inlining-insns", "Maximal estimated growth of function body caused by early inlining of single call", - 10, 0, 0) + 11, 0, 0) DEFPARAM(PARAM_LARGE_STACK_FRAME, "large-stack-frame", "The size of stack frame to be considered large", diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 639886a..40b9e6d 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,7 @@ +2011-04-22 Jan Hubicka + + * gcc.dg/tree-ssa/pr38699.c: Fix testcase. + 2011-04-22 Jakub Jelinek PR tree-optimization/48717 diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr38699.c b/gcc/testsuite/gcc.dg/tree-ssa/pr38699.c index 6845324..21b3351 100644 --- a/gcc/testsuite/gcc.dg/tree-ssa/pr38699.c +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr38699.c @@ -17,6 +17,7 @@ #define PORTC _SFR_IO8(0x15) #define PORTD _SFR_IO8(0x12) + static void delay_wait_us( unsigned char timeout ) { __asm__ __volatile__ ("wdr"); @@ -27,8 +28,12 @@ static void delay_wait_us( unsigned char timeout ) { while(!(TIFR & (1 << (TOV0)))); } +/* The original testcase was multiplying by 1000. Gcc is now smart enough + to work out that actual parameter is 5000 that is not what testcase was + about. Obstructate the code somewhat then. */ +int a; static void delay_wait_us_ms( unsigned char timeout ) { - delay_wait_us( timeout * 1000 ); + delay_wait_us( timeout * a ); } diff --git a/gcc/tree-sra.c b/gcc/tree-sra.c index 56a9346..e13a9f8 100644 --- a/gcc/tree-sra.c +++ b/gcc/tree-sra.c @@ -4366,7 +4366,7 @@ convert_callers (struct cgraph_node *node, tree old_decl, for (cs = node->callers; cs; cs = cs->next_caller) if (bitmap_set_bit (recomputed_callers, cs->caller->uid) && gimple_in_ssa_p (DECL_STRUCT_FUNCTION (cs->caller->decl))) - compute_inline_parameters (cs->caller); + compute_inline_parameters (cs->caller, true); BITMAP_FREE (recomputed_callers); current_function_decl = old_cur_fndecl; -- 2.7.4