From 71e37927a59cb6c7a102008d82a23ccedfae5864 Mon Sep 17 00:00:00 2001 From: hubicka Date: Wed, 1 Apr 2015 07:41:17 +0000 Subject: [PATCH] * lto-cgraph.c (lto_output_node, input_overwrite_node): Stream split_part. * ipa-inline.c (edge_badness): Add wrapper penalty. (sum_callers): Move up. (inline_small_functions): Set single_caller. * ipa-inline.h (inline_summary): Add single_caller. * ipa-split.c (split_function): Set split_part. (cgraph_node::create_clone): Do not shadow decl; copy split_part. * cgraph.h (cgraph_node): Add split_part. * gcc.dg/ipa/inlinehint-4.c: New testcase. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@221806 138bc75d-0d04-0410-961f-82ee72b054a4 --- gcc/ChangeLog | 12 ++++ gcc/cgraph.h | 2 + gcc/ipa-inline.c | 117 ++++++++++++++++++++++++++------ gcc/ipa-inline.h | 3 + gcc/ipa-split.c | 2 + gcc/lto-cgraph.c | 2 + gcc/testsuite/ChangeLog | 4 ++ gcc/testsuite/gcc.dg/ipa/inlinehint-4.c | 40 +++++++++++ 8 files changed, 161 insertions(+), 21 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/ipa/inlinehint-4.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index f707096..9c39d76 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,15 @@ +2015-03-31 Jan Hubicka + + * lto-cgraph.c (lto_output_node, input_overwrite_node): Stream + split_part. + * ipa-inline.c (edge_badness): Add wrapper penalty. + (sum_callers): Move up. + (inline_small_functions): Set single_caller. + * ipa-inline.h (inline_summary): Add single_caller. + * ipa-split.c (split_function): Set split_part. + (cgraph_node::create_clone): Do not shadow decl; copy split_part. + * cgraph.h (cgraph_node): Add split_part. + 2015-03-31 Uros Bizjak PR target/58945 diff --git a/gcc/cgraph.h b/gcc/cgraph.h index 650e689..cf8c7b6 100644 --- a/gcc/cgraph.h +++ b/gcc/cgraph.h @@ -1319,6 +1319,8 @@ public: unsigned merged : 1; /* True if function was created to be executed in parallel. */ unsigned parallelized_function : 1; + /* True if function is part split out by ipa-split. */ + unsigned split_part : 1; private: /* Worker for call_for_symbol_and_aliases. */ diff --git a/gcc/ipa-inline.c b/gcc/ipa-inline.c index bc32846..77d6d85 100644 --- a/gcc/ipa-inline.c +++ b/gcc/ipa-inline.c @@ -1088,6 +1088,7 @@ edge_badness (struct cgraph_edge *edge, bool dump) else if (opt_for_fn (caller->decl, flag_guess_branch_prob) || caller->count) { sreal numerator, denominator; + int overall_growth; numerator = (compute_uninlined_call_time (callee_info, edge) - compute_inlined_call_time (edge, edge_time)); @@ -1098,8 +1099,74 @@ edge_badness (struct cgraph_edge *edge, bool dump) else if (opt_for_fn (caller->decl, flag_branch_probabilities)) numerator = numerator >> 11; denominator = growth; - if (callee_info->growth > 0) - denominator *= callee_info->growth * callee_info->growth; + + overall_growth = callee_info->growth; + + /* Look for inliner wrappers of the form: + + inline_caller () + { + do_fast_job... + if (need_more_work) + noninline_callee (); + } + Withhout panilizing this case, we usually inline noninline_callee + into the inline_caller because overall_growth is small preventing + further inlining of inline_caller. + + Penalize only callgraph edges to functions with small overall + growth ... + */ + if (growth > overall_growth + /* ... and having only one caller which is not inlined ... */ + && callee_info->single_caller + && !edge->caller->global.inlined_to + /* ... and edges executed only conditionally ... */ + && edge->frequency < CGRAPH_FREQ_BASE + /* ... consider case where callee is not inline but caller is ... */ + && ((!DECL_DECLARED_INLINE_P (edge->callee->decl) + && DECL_DECLARED_INLINE_P (caller->decl)) + /* ... or when early optimizers decided to split and edge + frequency still indicates splitting is a win ... */ + || (callee->split_part && !caller->split_part + && edge->frequency + < CGRAPH_FREQ_BASE + * PARAM_VALUE + (PARAM_PARTIAL_INLINING_ENTRY_PROBABILITY) / 100 + /* ... and do not overwrite user specified hints. */ + && (!DECL_DECLARED_INLINE_P (edge->callee->decl) + || DECL_DECLARED_INLINE_P (caller->decl))))) + { + struct inline_summary *caller_info = inline_summaries->get (caller); + int caller_growth = caller_info->growth; + + /* Only apply the penalty when caller looks like inline candidate, + and it is not called once and. */ + if (!caller_info->single_caller && overall_growth < caller_growth + && caller_info->inlinable + && caller_info->size + < (DECL_DECLARED_INLINE_P (caller->decl) + ? MAX_INLINE_INSNS_SINGLE : MAX_INLINE_INSNS_AUTO)) + { + if (dump) + fprintf (dump_file, + " Wrapper penalty. Increasing growth %i to %i\n", + overall_growth, caller_growth); + overall_growth = caller_growth; + } + } + if (overall_growth > 0) + { + /* Strongly preffer functions with few callers that can be inlined + fully. The square root here leads to smaller binaries at average. + Watch however for extreme cases and return to linear function + when growth is large. */ + if (overall_growth < 256) + overall_growth *= overall_growth; + else + overall_growth += 256 * 256 - 256; + denominator *= overall_growth; + } badness = - numerator / denominator; @@ -1109,13 +1176,15 @@ edge_badness (struct cgraph_edge *edge, bool dump) " %f: guessed profile. frequency %f, count %"PRId64 " caller count %"PRId64 " time w/o inlining %f, time w inlining %f" - " overall growth %i (current) %i (original)\n", - badness.to_double (), (double)edge->frequency / CGRAPH_FREQ_BASE, + " overall growth %i (current) %i (original)" + " %i (compensated)\n", + badness.to_double (), + (double)edge->frequency / CGRAPH_FREQ_BASE, edge->count, caller->count, compute_uninlined_call_time (callee_info, edge).to_double (), compute_inlined_call_time (edge, edge_time).to_double (), estimate_growth (callee), - callee_info->growth); + callee_info->growth, overall_growth); } } /* When function local profile is not available or it does not give @@ -1133,8 +1202,8 @@ edge_badness (struct cgraph_edge *edge, bool dump) else badness = badness << nest; if (dump) - fprintf (dump_file, " %f: no profile. nest %i\n", badness.to_double (), - nest); + fprintf (dump_file, " %f: no profile. nest %i\n", + badness.to_double (), nest); } gcc_checking_assert (badness != 0); @@ -1649,6 +1718,20 @@ inline_account_function_p (struct cgraph_node *node) && node->frequency != NODE_FREQUENCY_UNLIKELY_EXECUTED); } +/* Count number of callers of NODE and store it into DATA (that + points to int. Worker for cgraph_for_node_and_aliases. */ + +static bool +sum_callers (struct cgraph_node *node, void *data) +{ + struct cgraph_edge *e; + int *num_calls = (int *)data; + + for (e = node->callers; e; e = e->next_caller) + (*num_calls)++; + return false; +} + /* We use greedy algorithm for inlining of small functions: All inline candidates are put into prioritized heap ordered in increasing badness. @@ -1693,6 +1776,12 @@ inline_small_functions (void) if (inline_account_function_p (node)) initial_size += info->size; info->growth = estimate_growth (node); + + int num_calls = 0; + node->call_for_symbol_and_aliases (sum_callers, &num_calls, + true); + if (num_calls == 1) + info->single_caller = true; if (dfs && dfs->next_cycle) { struct cgraph_node *n2; @@ -2085,20 +2174,6 @@ flatten_function (struct cgraph_node *node, bool early) inline_update_overall_summary (node); } -/* Count number of callers of NODE and store it into DATA (that - points to int. Worker for cgraph_for_node_and_aliases. */ - -static bool -sum_callers (struct cgraph_node *node, void *data) -{ - struct cgraph_edge *e; - int *num_calls = (int *)data; - - for (e = node->callers; e; e = e->next_caller) - (*num_calls)++; - return false; -} - /* Inline NODE to all callers. Worker for cgraph_for_node_and_aliases. DATA points to number of calls originally found so we avoid infinite recursion. */ diff --git a/gcc/ipa-inline.h b/gcc/ipa-inline.h index ed4d66f..85041f6 100644 --- a/gcc/ipa-inline.h +++ b/gcc/ipa-inline.h @@ -129,6 +129,9 @@ struct GTY(()) inline_summary /* True when function contains cilk spawn (and thus we can not inline into it). */ unsigned contains_cilk_spawn : 1; + /* True wen there is only one caller of the function before small function + inlining. */ + unsigned int single_caller : 1; /* Information about function that will result after applying all the inline decisions present in the callgraph. Generally kept up to diff --git a/gcc/ipa-split.c b/gcc/ipa-split.c index 5d5db0e..a28f3a1 100644 --- a/gcc/ipa-split.c +++ b/gcc/ipa-split.c @@ -1402,6 +1402,8 @@ split_function (basic_block return_bb, struct split_point *split_point, (vNULL, NULL, args_to_skip, !split_part_return_p, split_point->split_bbs, split_point->entry_bb, "part"); + node->split_part = true; + /* Let's take a time profile for splitted function. */ node->tp_first_run = cur_node->tp_first_run + 1; diff --git a/gcc/lto-cgraph.c b/gcc/lto-cgraph.c index 088de86..fa18d36 100644 --- a/gcc/lto-cgraph.c +++ b/gcc/lto-cgraph.c @@ -578,6 +578,7 @@ lto_output_node (struct lto_simple_output_block *ob, struct cgraph_node *node, bp_pack_enum (&bp, ld_plugin_symbol_resolution, LDPR_NUM_KNOWN, node->resolution); bp_pack_value (&bp, node->instrumentation_clone, 1); + bp_pack_value (&bp, node->split_part, 1); streamer_write_bitpack (&bp); streamer_write_data_stream (ob->main_stream, section, strlen (section) + 1); @@ -1214,6 +1215,7 @@ input_overwrite_node (struct lto_file_decl_data *file_data, node->resolution = bp_unpack_enum (bp, ld_plugin_symbol_resolution, LDPR_NUM_KNOWN); node->instrumentation_clone = bp_unpack_value (bp, 1); + node->split_part = bp_unpack_value (bp, 1); gcc_assert (flag_ltrans || (!node->in_other_partition && !node->used_from_other_partition)); diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index c9739af..0dd0421 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,7 @@ +2015-03-31 Jan Hubicka + + * gcc.dg/ipa/inlinehint-4.c: New testcase. + 2015-03-31 Alex Velenko * gcc.target/arm/pr45701-1.c (history_expand_line_internal): Add an diff --git a/gcc/testsuite/gcc.dg/ipa/inlinehint-4.c b/gcc/testsuite/gcc.dg/ipa/inlinehint-4.c new file mode 100644 index 0000000..52d2f1a --- /dev/null +++ b/gcc/testsuite/gcc.dg/ipa/inlinehint-4.c @@ -0,0 +1,40 @@ +/* { dg-options "-O3 -fdump-ipa-inline-details -fno-early-inlining --param large-unit-insns=1" } */ +/* { dg-add-options bind_pic_locally } */ +int *hashval; +int *hash; +int hash_size; + +static int +lookup_slow (int val) +{ + int i = val % hash_size; + while (hashval[i] && hashval[i] != val) + i++; + return hash[i]; +} + +static inline int +lookup (int val) +{ + static int cache, cache_val; + if (val == cache_val) + return cache; + else + { + cache_val = val; + cache = lookup_slow (val); + return cache; + } +} + +int +test (int i) +{ + return lookup (i) + lookup (2 * i) + lookup (3 * i) + lookup (4 * i) + + lookup (5 * i) + lookup (6 * i) + lookup (7 * i) + lookup (8 * i) + + lookup (9 * i); +} +/* { dg-final { scan-ipa-dump "Wrapper penalty" "inline" } } */ +/* { dg-final { scan-ipa-dump-not "Inlining lookup_slow to lookup" "inline" } } */ +/* { dg-final { scan-ipa-dump "Inlining lookup to test" "inline" } } */ +/* { dg-final { cleanup-ipa-dump "inline" } } */ -- 2.7.4