From f5b25e15165adde1356af42e9066ab75c5b37a19 Mon Sep 17 00:00:00 2001 From: Jan Hubicka Date: Thu, 16 Jan 2020 23:55:44 +0100 Subject: [PATCH] Make profile estimation more precise While analyzing code size regression in SPEC2k GCC binary I noticed that we perform some inline decisions because we think that number of executions are very high. In particular there was inline decision inlining gen_rtx_fmt_ee to find_reloads believing that it is called 4 billion times. This turned out to be cummulation of roundoff errors in propagate_freq which was bit mechanically updated from original sreals to C++ sreals and later to new probabilities. This led us to estimate that a loopback edge is reached with probability 2.3 which was capped to 1-1/10000 and since this happened in nested loop it quickly escalated to large values. Originally capping to REG_BR_PROB_BASE avoided such problems but now we have much higher range. This patch avoids going from probabilites to REG_BR_PROB_BASE so precision is kept. In addition it makes the propagation to not estimate more than param-max-predicted-loop-iterations. The first change makes the cap to not be triggered on the gcc build, but it is still better to be safe than sorry. * ipa-fnsummary.c (estimate_calls_size_and_time): Fix formating of dump. * params.opt: (max-predicted-iterations): Set bounds. * predict.c (real_almost_one, real_br_prob_base, real_inv_br_prob_base, real_one_half, real_bb_freq_max): Remove. (propagate_freq): Add max_cyclic_prob parameter; cap cyclic probabilities; do not truncate to reg_br_prob_bases. (estimate_loops_at_level): Pass max_cyclic_prob. (estimate_loops): Compute max_cyclic_prob. (estimate_bb_frequencies): Do not initialize real_*; update calculation of back edge prob. * profile-count.c (profile_probability::to_sreal): New. * profile-count.h (class sreal): Move up in file. (profile_probability::to_sreal): Declare. --- gcc/ChangeLog | 17 +++++++++ gcc/ipa-fnsummary.c | 2 +- gcc/params.opt | 2 +- gcc/predict.c | 101 +++++++++++++++++++++++----------------------------- gcc/profile-count.c | 9 +++++ gcc/profile-count.h | 5 +-- 6 files changed, 76 insertions(+), 60 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index b98ead8..907d0f7 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,20 @@ +2020-01-16 Jan Hubicka + + * ipa-fnsummary.c (estimate_calls_size_and_time): Fix formating of + dump. + * params.opt: (max-predicted-iterations): Set bounds. + * predict.c (real_almost_one, real_br_prob_base, + real_inv_br_prob_base, real_one_half, real_bb_freq_max): Remove. + (propagate_freq): Add max_cyclic_prob parameter; cap cyclic + probabilities; do not truncate to reg_br_prob_bases. + (estimate_loops_at_level): Pass max_cyclic_prob. + (estimate_loops): Compute max_cyclic_prob. + (estimate_bb_frequencies): Do not initialize real_*; update calculation + of back edge prob. + * profile-count.c (profile_probability::to_sreal): New. + * profile-count.h (class sreal): Move up in file. + (profile_probability::to_sreal): Declare. + 2020-01-16 Stam Markianos-Wright * config/arm/arm.c diff --git a/gcc/ipa-fnsummary.c b/gcc/ipa-fnsummary.c index 4e1c587..dbd53f1 100644 --- a/gcc/ipa-fnsummary.c +++ b/gcc/ipa-fnsummary.c @@ -3258,7 +3258,7 @@ estimate_calls_size_and_time (struct cgraph_node *node, int *size, gcc_assert (*size == old_size); if (time && (*time - old_time > 1 || *time - old_time < -1) && dump_file) - fprintf (dump_file, "Time mismatch in call summary %f!=%f", + fprintf (dump_file, "Time mismatch in call summary %f!=%f\n", old_time.to_double (), time->to_double ()); } diff --git a/gcc/params.opt b/gcc/params.opt index 31cc200..f02c769 100644 --- a/gcc/params.opt +++ b/gcc/params.opt @@ -555,7 +555,7 @@ Common Joined UInteger Var(param_max_pow_sqrt_depth) Init(5) IntegerRange(1, 32) Maximum depth of sqrt chains to use when synthesizing exponentiation by a real constant. -param=max-predicted-iterations= -Common Joined UInteger Var(param_max_predicted_iterations) Init(100) Param Optimization +Common Joined UInteger Var(param_max_predicted_iterations) Init(100) IntegerRange(1, 65536) Param Optimization The maximum number of loop iterations we predict statically. -param=max-reload-search-insns= diff --git a/gcc/predict.c b/gcc/predict.c index 02c5fe0..c3aed9e 100644 --- a/gcc/predict.c +++ b/gcc/predict.c @@ -76,10 +76,6 @@ enum predictor_reason static const char *reason_messages[] = {"", " (ignored)", " (single edge duplicate)", " (edge pair duplicate)"}; -/* real constants: 0, 1, 1-1/REG_BR_PROB_BASE, REG_BR_PROB_BASE, - 1/REG_BR_PROB_BASE, 0.5, BB_FREQ_MAX. */ -static sreal real_almost_one, real_br_prob_base, - real_inv_br_prob_base, real_one_half, real_bb_freq_max; static void combine_predictions_for_insn (rtx_insn *, basic_block); static void dump_prediction (FILE *, enum br_predictor, int, basic_block, @@ -3266,7 +3262,8 @@ public: TOVISIT, starting in HEAD. */ static void -propagate_freq (basic_block head, bitmap tovisit) +propagate_freq (basic_block head, bitmap tovisit, + sreal max_cyclic_prob) { basic_block bb; basic_block last; @@ -3322,22 +3319,14 @@ propagate_freq (basic_block head, bitmap tovisit) FOR_EACH_EDGE (e, ei, bb->preds) if (EDGE_INFO (e)->back_edge) - { - cyclic_probability += EDGE_INFO (e)->back_edge_prob; - } + cyclic_probability += EDGE_INFO (e)->back_edge_prob; else if (!(e->flags & EDGE_DFS_BACK)) { - /* frequency += (e->probability - * BLOCK_INFO (e->src)->frequency / - REG_BR_PROB_BASE); */ - /* FIXME: Graphite is producing edges with no profile. Once this is fixed, drop this. */ sreal tmp = e->probability.initialized_p () ? - e->probability.to_reg_br_prob_base () : 0; - tmp *= BLOCK_INFO (e->src)->frequency; - tmp *= real_inv_br_prob_base; - frequency += tmp; + e->probability.to_sreal () : 0; + frequency += tmp * BLOCK_INFO (e->src)->frequency; } if (cyclic_probability == 0) @@ -3346,14 +3335,29 @@ propagate_freq (basic_block head, bitmap tovisit) } else { - if (cyclic_probability > real_almost_one) - cyclic_probability = real_almost_one; - - /* BLOCK_INFO (bb)->frequency = frequency - / (1 - cyclic_probability) */ - - cyclic_probability = sreal (1) - cyclic_probability; - BLOCK_INFO (bb)->frequency = frequency / cyclic_probability; + if (cyclic_probability > max_cyclic_prob) + { + if (dump_file) + fprintf (dump_file, + "cyclic probability of bb %i is %f (capped to %f)" + "; turning freq %f", + bb->index, cyclic_probability.to_double (), + max_cyclic_prob.to_double (), + frequency.to_double ()); + + cyclic_probability = max_cyclic_prob; + } + else if (dump_file) + fprintf (dump_file, + "cyclic probability of bb %i is %f; turning freq %f", + bb->index, cyclic_probability.to_double (), + frequency.to_double ()); + + BLOCK_INFO (bb)->frequency = frequency + / (sreal (1) - cyclic_probability); + if (dump_file) + fprintf (dump_file, " to %f\n", + BLOCK_INFO (bb)->frequency.to_double ()); } } @@ -3362,16 +3366,11 @@ propagate_freq (basic_block head, bitmap tovisit) e = find_edge (bb, head); if (e) { - /* EDGE_INFO (e)->back_edge_prob - = ((e->probability * BLOCK_INFO (bb)->frequency) - / REG_BR_PROB_BASE); */ - /* FIXME: Graphite is producing edges with no profile. Once this is fixed, drop this. */ sreal tmp = e->probability.initialized_p () ? - e->probability.to_reg_br_prob_base () : 0; - tmp *= BLOCK_INFO (bb)->frequency; - EDGE_INFO (e)->back_edge_prob = tmp * real_inv_br_prob_base; + e->probability.to_sreal () : 0; + EDGE_INFO (e)->back_edge_prob = tmp * BLOCK_INFO (bb)->frequency; } /* Propagate to successor blocks. */ @@ -3396,7 +3395,7 @@ propagate_freq (basic_block head, bitmap tovisit) /* Estimate frequencies in loops at same nest level. */ static void -estimate_loops_at_level (class loop *first_loop) +estimate_loops_at_level (class loop *first_loop, sreal max_cyclic_prob) { class loop *loop; @@ -3407,7 +3406,7 @@ estimate_loops_at_level (class loop *first_loop) unsigned i; auto_bitmap tovisit; - estimate_loops_at_level (loop->inner); + estimate_loops_at_level (loop->inner, max_cyclic_prob); /* Find current loop back edge and mark it. */ e = loop_latch_edge (loop); @@ -3417,7 +3416,7 @@ estimate_loops_at_level (class loop *first_loop) for (i = 0; i < loop->num_nodes; i++) bitmap_set_bit (tovisit, bbs[i]->index); free (bbs); - propagate_freq (loop->header, tovisit); + propagate_freq (loop->header, tovisit, max_cyclic_prob); } } @@ -3428,17 +3427,18 @@ estimate_loops (void) { auto_bitmap tovisit; basic_block bb; + sreal max_cyclic_prob = (sreal)1 - (sreal)1 / param_max_predicted_iterations; /* Start by estimating the frequencies in the loops. */ if (number_of_loops (cfun) > 1) - estimate_loops_at_level (current_loops->tree_root->inner); + estimate_loops_at_level (current_loops->tree_root->inner, max_cyclic_prob); /* Now propagate the frequencies through all the blocks. */ FOR_ALL_BB_FN (bb, cfun) { bitmap_set_bit (tovisit, bb->index); } - propagate_freq (ENTRY_BLOCK_PTR_FOR_FN (cfun), tovisit); + propagate_freq (ENTRY_BLOCK_PTR_FOR_FN (cfun), tovisit, max_cyclic_prob); } /* Drop the profile for NODE to guessed, and update its frequency based on @@ -3844,21 +3844,6 @@ estimate_bb_frequencies (bool force) if (force || profile_status_for_fn (cfun) != PROFILE_READ || !update_max_bb_count ()) { - static int real_values_initialized = 0; - - if (!real_values_initialized) - { - real_values_initialized = 1; - real_br_prob_base = REG_BR_PROB_BASE; - /* Scaling frequencies up to maximal profile count may result in - frequent overflows especially when inlining loops. - Small scalling results in unnecesary precision loss. Stay in - the half of the (exponential) range. */ - real_bb_freq_max = (uint64_t)1 << (profile_count::n_bits / 2); - real_one_half = sreal (1, -1); - real_inv_br_prob_base = sreal (1) / real_br_prob_base; - real_almost_one = sreal (1) - real_inv_br_prob_base; - } mark_dfs_back_edges (); @@ -3879,10 +3864,10 @@ estimate_bb_frequencies (bool force) this is fixed, drop this. */ if (e->probability.initialized_p ()) EDGE_INFO (e)->back_edge_prob - = e->probability.to_reg_br_prob_base (); + = e->probability.to_sreal (); else - EDGE_INFO (e)->back_edge_prob = REG_BR_PROB_BASE / 2; - EDGE_INFO (e)->back_edge_prob *= real_inv_br_prob_base; + /* back_edge_prob = 0.5 */ + EDGE_INFO (e)->back_edge_prob = sreal (1, -1); } } @@ -3895,14 +3880,18 @@ estimate_bb_frequencies (bool force) if (freq_max < BLOCK_INFO (bb)->frequency) freq_max = BLOCK_INFO (bb)->frequency; - freq_max = real_bb_freq_max / freq_max; + /* Scaling frequencies up to maximal profile count may result in + frequent overflows especially when inlining loops. + Small scalling results in unnecesary precision loss. Stay in + the half of the (exponential) range. */ + freq_max = (sreal (1) << (profile_count::n_bits / 2)) / freq_max; if (freq_max < 16) freq_max = 16; profile_count ipa_count = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count.ipa (); cfun->cfg->count_max = profile_count::uninitialized (); FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb) { - sreal tmp = BLOCK_INFO (bb)->frequency * freq_max + real_one_half; + sreal tmp = BLOCK_INFO (bb)->frequency * freq_max + sreal (1, -1); profile_count count = profile_count::from_gcov_type (tmp.to_int ()); /* If we have profile feedback in which this function was never diff --git a/gcc/profile-count.c b/gcc/profile-count.c index d463ddd..0c79229 100644 --- a/gcc/profile-count.c +++ b/gcc/profile-count.c @@ -446,3 +446,12 @@ profile_probability::combine_with_count (profile_count count1, else return *this * even () + other * even (); } + +/* Return probability as sreal in range [0, 1]. */ + +sreal +profile_probability::to_sreal () const +{ + gcc_checking_assert (initialized_p ()); + return ((sreal)m_val) >> (n_bits - 2); +} diff --git a/gcc/profile-count.h b/gcc/profile-count.h index 987d3ce..09217a8 100644 --- a/gcc/profile-count.h +++ b/gcc/profile-count.h @@ -23,6 +23,7 @@ along with GCC; see the file COPYING3. If not see struct function; struct profile_count; +class sreal; /* Quality of the profile count. Because gengtype does not support enums inside of classes, this is in global namespace. */ @@ -614,6 +615,8 @@ public: profile_probability other, profile_count count2) const; + /* Return probability as sreal. */ + sreal to_sreal () const; /* LTO streaming support. */ static profile_probability stream_in (class lto_input_block *); void stream_out (struct output_block *); @@ -674,8 +677,6 @@ public: */ -class sreal; - struct GTY(()) profile_count { public: -- 2.7.4