From 09bac500fc57a399ef7df847f29c21b956280306 Mon Sep 17 00:00:00 2001 From: Jan Hubicka Date: Thu, 28 Jul 2005 07:41:29 +0000 Subject: [PATCH] cfg.c (update_bb_profile_for_threading): Use RDIV. * cfg.c (update_bb_profile_for_threading): Use RDIV. (scale_bbs_frequencies_int): Likewise, assert for possible overflow. (scale_bbs_frequencies_gcov_type): Be more curefull about overflows and roundoff errors. * tree-cfg.c (tree_duplicate_sese_region): Use counts for updating profile when available. * update-loopch.c: New testcase. From-SVN: r102466 --- gcc/ChangeLog | 11 +++++- gcc/cfg.c | 53 ++++++++++++++++++++------ gcc/testsuite/ChangeLog | 4 ++ gcc/testsuite/gcc.dg/tree-prof/update-loopch.c | 21 ++++++++++ gcc/tree-cfg.c | 48 +++++++++++++++++------ 5 files changed, 112 insertions(+), 25 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/tree-prof/update-loopch.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 264b072..c6b4ecb 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,4 +1,13 @@ -2005-07-28 Jan Beulich +2005-07-28 Jan Hubicka + + * cfg.c (update_bb_profile_for_threading): Use RDIV. + (scale_bbs_frequencies_int): Likewise, assert for possible overflow. + (scale_bbs_frequencies_gcov_type): Be more curefull about overflows and + roundoff errors. + * tree-cfg.c (tree_duplicate_sese_region): Use counts for updating + profile when available. + +2005-07-28 Jan Beulich * config/ia64/ia64.c (ia64_load_pair_ok): New. (ia64_print_operand): Describe and handle 'X'. diff --git a/gcc/cfg.c b/gcc/cfg.c index e6af6cf..9644132 100644 --- a/gcc/cfg.c +++ b/gcc/cfg.c @@ -893,11 +893,11 @@ update_bb_profile_for_threading (basic_block bb, int edge_frequency, } else if (prob != REG_BR_PROB_BASE) { - int scale = 65536 * REG_BR_PROB_BASE / prob; + int scale = RDIV (65536 * REG_BR_PROB_BASE, prob); FOR_EACH_EDGE (c, ei, bb->succs) { - c->probability = (c->probability * scale) / 65536; + c->probability = RDIV (c->probability * scale, 65536); if (c->probability > REG_BR_PROB_BASE) c->probability = REG_BR_PROB_BASE; } @@ -925,16 +925,23 @@ scale_bbs_frequencies_int (basic_block *bbs, int nbbs, int num, int den) num = 0; if (num > den) return; + /* Assume that the users are producing the fraction from frequencies + that never grow far enought to risk arithmetic overflow. */ + gcc_assert (num < 65536); for (i = 0; i < nbbs; i++) { edge_iterator ei; - bbs[i]->frequency = (bbs[i]->frequency * num) / den; + bbs[i]->frequency = RDIV (bbs[i]->frequency * num, den); bbs[i]->count = RDIV (bbs[i]->count * num, den); FOR_EACH_EDGE (e, ei, bbs[i]->succs) - e->count = (e->count * num) /den; + e->count = RDIV (e->count * num, den); } } +/* numbers smaller than this value are safe to multiply without getting + 64bit overflow. */ +#define MAX_SAFE_MULTIPLIER (1 << (sizeof (HOST_WIDEST_INT) * 4 - 1)) + /* Multiply all frequencies of basic blocks in array BBS of length NBBS by NUM/DEN, in gcov_type arithmetic. More accurate than previous function but considerably slower. */ @@ -944,15 +951,37 @@ scale_bbs_frequencies_gcov_type (basic_block *bbs, int nbbs, gcov_type num, { int i; edge e; + gcov_type fraction = RDIV (num * 65536, den); - for (i = 0; i < nbbs; i++) - { - edge_iterator ei; - bbs[i]->frequency = (bbs[i]->frequency * num) / den; - bbs[i]->count = RDIV (bbs[i]->count * num, den); - FOR_EACH_EDGE (e, ei, bbs[i]->succs) - e->count = (e->count * num) /den; - } + gcc_assert (fraction >= 0); + + if (num < MAX_SAFE_MULTIPLIER) + for (i = 0; i < nbbs; i++) + { + edge_iterator ei; + bbs[i]->frequency = RDIV (bbs[i]->frequency * num, den); + if (bbs[i]->count <= MAX_SAFE_MULTIPLIER) + bbs[i]->count = RDIV (bbs[i]->count * num, den); + else + bbs[i]->count = RDIV (bbs[i]->count * fraction, 65536); + FOR_EACH_EDGE (e, ei, bbs[i]->succs) + if (bbs[i]->count <= MAX_SAFE_MULTIPLIER) + e->count = RDIV (e->count * num, den); + else + e->count = RDIV (e->count * fraction, 65536); + } + else + for (i = 0; i < nbbs; i++) + { + edge_iterator ei; + if (sizeof (gcov_type) > sizeof (int)) + bbs[i]->frequency = RDIV (bbs[i]->frequency * num, den); + else + bbs[i]->frequency = RDIV (bbs[i]->frequency * fraction, 65536); + bbs[i]->count = RDIV (bbs[i]->count * fraction, 65536); + FOR_EACH_EDGE (e, ei, bbs[i]->succs) + e->count = RDIV (e->count * fraction, 65536); + } } /* Data structures used to maintain mapping between basic blocks and diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index ddf272d..4eb0e04 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,7 @@ +2005-07-28 Jan Hubicka + + * update-loopch.c: New testcase. + 2005-07-27 James A. Morrison PR rtl-optimization/23047 diff --git a/gcc/testsuite/gcc.dg/tree-prof/update-loopch.c b/gcc/testsuite/gcc.dg/tree-prof/update-loopch.c new file mode 100644 index 0000000..1e5ccaa --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-prof/update-loopch.c @@ -0,0 +1,21 @@ +/* { dg-options "-O2 -fdump-tree_profile -fdump-tree-optimized" } */ +int max = 33333; +int a[8]; +int +main () +{ + int i; + for (i = 0; i < max; i++) + { + a[i % 8]++; + } + return 0; +} +/* Loop header copying will peel away the initial conditional, so the loop body + is once reached directly from entry point of function, rest via loopback + edge. */ +/* { dg-final-use { scan-tree-dump-not "count:33333" "tree_profile"} } */ +/* { dg-final-use { scan-tree-dump "count:33332" "optimized"} } */ +/* { dg-final-use { scan-tree-dump-not "Invalid sum" "optimized"} } */ +/* { dg-final-use { cleanup-tree-dump "tree_profile" } } */ +/* { dg-final-use { cleanup-tree-dump "optimized" } } */ diff --git a/gcc/tree-cfg.c b/gcc/tree-cfg.c index 5e06476..bf25f83 100644 --- a/gcc/tree-cfg.c +++ b/gcc/tree-cfg.c @@ -4246,7 +4246,8 @@ tree_duplicate_sese_region (edge entry, edge exit, edge exit_copy; basic_block *doms; edge redirected; - int total_freq, entry_freq; + int total_freq = 0, entry_freq = 0; + gcov_type total_count = 0, entry_count = 0; if (!can_copy_bbs_p (region, n_region)) return false; @@ -4300,19 +4301,42 @@ tree_duplicate_sese_region (edge entry, edge exit, n_doms = get_dominated_by_region (CDI_DOMINATORS, region, n_region, doms); - total_freq = entry->dest->frequency; - entry_freq = EDGE_FREQUENCY (entry); - /* Fix up corner cases, to avoid division by zero or creation of negative - frequencies. */ - if (total_freq == 0) - total_freq = 1; - else if (entry_freq > total_freq) - entry_freq = total_freq; + if (entry->dest->count) + { + total_count = entry->dest->count; + entry_count = entry->count; + /* Fix up corner cases, to avoid division by zero or creation of negative + frequencies. */ + if (entry_count > total_count) + entry_count = total_count; + } + else + { + total_freq = entry->dest->frequency; + entry_freq = EDGE_FREQUENCY (entry); + /* Fix up corner cases, to avoid division by zero or creation of negative + frequencies. */ + if (total_freq == 0) + total_freq = 1; + else if (entry_freq > total_freq) + entry_freq = total_freq; + } copy_bbs (region, n_region, region_copy, &exit, 1, &exit_copy, loop); - scale_bbs_frequencies_int (region, n_region, total_freq - entry_freq, - total_freq); - scale_bbs_frequencies_int (region_copy, n_region, entry_freq, total_freq); + if (total_count) + { + scale_bbs_frequencies_gcov_type (region, n_region, + total_count - entry_count, + total_count); + scale_bbs_frequencies_gcov_type (region_copy, n_region, entry_count, + total_count); + } + else + { + scale_bbs_frequencies_int (region, n_region, total_freq - entry_freq, + total_freq); + scale_bbs_frequencies_int (region_copy, n_region, entry_freq, total_freq); + } if (copying_header) { -- 2.7.4