From 478baf913e79d1d3219b513b494943c830850593 Mon Sep 17 00:00:00 2001 From: Jeff Law Date: Wed, 23 Mar 2016 07:20:16 -0600 Subject: [PATCH] re PR tree-optimization/64058 (Performance degradation after r216304) PR tree-optimization/64058 * tree-ssa-coalesce.c (struct coalesce_pair): Add new field CONFLICT_COUNT. (struct ssa_conflicts): Move up earlier in the file. (conflicts_, var_map_): New static variables. (initialize_conflict_count): New function to initialize the CONFLICT_COUNT field for each conflict pair. (compare_pairs): Lazily initialize the conflict count and use it as the first tie-breaker. (sort_coalesce_list): Add new arguments conflicts, map. Initialize and wipe conflicts_ and map_ around the call to qsort. Remove special case for 2 coalesce pairs. * bitmap.c (bitmap_count_unique_bits): New function. (bitmap_count_bits_in_word): New function, extracted from bitmap_count_bits. (bitmap_count_bits): Use bitmap_count_bits_in_word. * bitmap.h (bitmap_count_unique_bits): Declare it. From-SVN: r234425 --- gcc/ChangeLog | 20 ++++++++ gcc/bitmap.c | 63 +++++++++++++++++++++---- gcc/bitmap.h | 3 ++ gcc/tree-ssa-coalesce.c | 123 +++++++++++++++++++++++++++++++++++------------- 4 files changed, 168 insertions(+), 41 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 21366dc..f9b2af2 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,23 @@ +2016-03-23 Jeff Law + + PR tree-optimization/64058 + * tree-ssa-coalesce.c (struct coalesce_pair): Add new field + CONFLICT_COUNT. + (struct ssa_conflicts): Move up earlier in the file. + (conflicts_, var_map_): New static variables. + (initialize_conflict_count): New function to initialize the + CONFLICT_COUNT field for each conflict pair. + (compare_pairs): Lazily initialize the conflict count and use it + as the first tie-breaker. + (sort_coalesce_list): Add new arguments conflicts, map. Initialize + and wipe conflicts_ and map_ around the call to qsort. Remove + special case for 2 coalesce pairs. + * bitmap.c (bitmap_count_unique_bits): New function. + (bitmap_count_bits_in_word): New function, extracted from + bitmap_count_bits. + (bitmap_count_bits): Use bitmap_count_bits_in_word. + * bitmap.h (bitmap_count_unique_bits): Declare it. + 2016-03-23 Ilya Enkovich PR target/69917 diff --git a/gcc/bitmap.c b/gcc/bitmap.c index ac20ae5..0c05512 100644 --- a/gcc/bitmap.c +++ b/gcc/bitmap.c @@ -662,6 +662,26 @@ bitmap_popcount (BITMAP_WORD a) return ret; } #endif + +/* Count and return the number of bits set in the bitmap word BITS. */ +static unsigned long +bitmap_count_bits_in_word (const BITMAP_WORD *bits) +{ + unsigned long count = 0; + + for (unsigned ix = 0; ix != BITMAP_ELEMENT_WORDS; ix++) + { +#if GCC_VERSION >= 3400 + /* Note that popcountl matches BITMAP_WORD in type, so the actual size + of BITMAP_WORD is not material. */ + count += __builtin_popcountl (bits[ix]); +#else + count += bitmap_popcount (bits[ix]); +#endif + } + return count; +} + /* Count the number of bits set in the bitmap, and return it. */ unsigned long @@ -669,19 +689,44 @@ bitmap_count_bits (const_bitmap a) { unsigned long count = 0; const bitmap_element *elt; - unsigned ix; for (elt = a->first; elt; elt = elt->next) + count += bitmap_count_bits_in_word (elt->bits); + + return count; +} + +/* Count the number of unique bits set in A and B and return it. */ + +unsigned long +bitmap_count_unique_bits (const_bitmap a, const_bitmap b) +{ + unsigned long count = 0; + const bitmap_element *elt_a, *elt_b; + + for (elt_a = a->first, elt_b = b->first; elt_a && elt_b; ) { - for (ix = 0; ix != BITMAP_ELEMENT_WORDS; ix++) + /* If we're at different indices, then count all the bits + in the lower element. If we're at the same index, then + count the bits in the IOR of the two elements. */ + if (elt_a->indx < elt_b->indx) { -#if GCC_VERSION >= 3400 - /* Note that popcountl matches BITMAP_WORD in type, so the actual size - of BITMAP_WORD is not material. */ - count += __builtin_popcountl (elt->bits[ix]); -#else - count += bitmap_popcount (elt->bits[ix]); -#endif + count += bitmap_count_bits_in_word (elt_a->bits); + elt_a = elt_a->next; + } + else if (elt_b->indx < elt_a->indx) + { + count += bitmap_count_bits_in_word (elt_b->bits); + elt_b = elt_b->next; + } + else + { + BITMAP_WORD bits[BITMAP_ELEMENT_WORDS]; + for (unsigned ix = 0; ix != BITMAP_ELEMENT_WORDS; ix++) + bits[ix] = elt_a->bits[ix] | elt_b->bits[ix]; + count += bitmap_count_bits_in_word (bits); + elt_a = elt_a->next; + elt_b = elt_b->next; } } return count; diff --git a/gcc/bitmap.h b/gcc/bitmap.h index 805e37e..1115711 100644 --- a/gcc/bitmap.h +++ b/gcc/bitmap.h @@ -280,6 +280,9 @@ extern bool bitmap_single_bit_set_p (const_bitmap); /* Count the number of bits set in the bitmap. */ extern unsigned long bitmap_count_bits (const_bitmap); +/* Count the number of unique bits set across the two bitmaps. */ +extern unsigned long bitmap_count_unique_bits (const_bitmap, const_bitmap); + /* Boolean operations on bitmaps. The _into variants are two operand versions that modify the first source operand. The other variants are three operand versions that to not destroy the source bitmaps. diff --git a/gcc/tree-ssa-coalesce.c b/gcc/tree-ssa-coalesce.c index e49876e..57fc653 100644 --- a/gcc/tree-ssa-coalesce.c +++ b/gcc/tree-ssa-coalesce.c @@ -51,12 +51,40 @@ struct coalesce_pair int second_element; int cost; + /* A count of the number of unique partitions this pair would conflict + with if coalescing was successful. This is the secondary sort key, + given two pairs with equal costs, we will prefer the pair with a smaller + conflict set. + + This is lazily initialized when we discover two coalescing pairs have + the same primary cost. + + Note this is not updated and propagated as pairs are coalesced. */ + int conflict_count; + /* The order in which coalescing pairs are discovered is recorded in this field, which is used as the final tie breaker when sorting coalesce pairs. */ int index; }; +/* This represents a conflict graph. Implemented as an array of bitmaps. + A full matrix is used for conflicts rather than just upper triangular form. + this make sit much simpler and faster to perform conflict merges. */ + +struct ssa_conflicts +{ + bitmap_obstack obstack; /* A place to allocate our bitmaps. */ + vec conflicts; +}; + +/* The narrow API of the qsort comparison function doesn't allow easy + access to additional arguments. So we have two globals (ick) to hold + the data we need. They're initialized before the call to qsort and + wiped immediately after. */ +static ssa_conflicts *conflicts_; +static var_map map_; + /* Coalesce pair hashtable helpers. */ struct coalesce_pair_hasher : nofree_ptr_hash @@ -303,6 +331,7 @@ find_coalesce_pair (coalesce_list *cl, int p1, int p2, bool create) pair->second_element = p.second_element; pair->cost = 0; pair->index = num_coalesce_pairs (cl); + pair->conflict_count = 0; *slot = pair; } @@ -345,21 +374,66 @@ add_coalesce (coalesce_list *cl, int p1, int p2, int value) } } +/* Compute and record how many unique conflicts would exist for the + representative partition for each coalesce pair in CL. + + CONFLICTS is the conflict graph and MAP is the current partition view. */ + +static void +initialize_conflict_count (coalesce_pair *p, + ssa_conflicts *conflicts, + var_map map) +{ + int p1 = var_to_partition (map, ssa_name (p->first_element)); + int p2 = var_to_partition (map, ssa_name (p->second_element)); + + /* 4 cases. If both P1 and P2 have conflicts, then build their + union and count the members. Else handle the degenerate cases + in the obvious ways. */ + if (conflicts->conflicts[p1] && conflicts->conflicts[p2]) + p->conflict_count = bitmap_count_unique_bits (conflicts->conflicts[p1], + conflicts->conflicts[p2]); + else if (conflicts->conflicts[p1]) + p->conflict_count = bitmap_count_bits (conflicts->conflicts[p1]); + else if (conflicts->conflicts[p2]) + p->conflict_count = bitmap_count_bits (conflicts->conflicts[p2]); + else + p->conflict_count = 0; +} + /* Comparison function to allow qsort to sort P1 and P2 in Ascending order. */ static int compare_pairs (const void *p1, const void *p2) { - const coalesce_pair *const *const pp1 = (const coalesce_pair *const *) p1; - const coalesce_pair *const *const pp2 = (const coalesce_pair *const *) p2; + coalesce_pair *const *const pp1 = (coalesce_pair *const *) p1; + coalesce_pair *const *const pp2 = (coalesce_pair *const *) p2; int result; result = (* pp1)->cost - (* pp2)->cost; - /* And if everything else is equal, then sort based on which - coalesce pair was found first. */ + /* We use the size of the resulting conflict set as the secondary sort key. + Given two equal costing coalesce pairs, we want to prefer the pair that + has the smaller conflict set. */ if (result == 0) - result = (*pp2)->index - (*pp1)->index; + { + if (flag_expensive_optimizations) + { + /* Lazily initialize the conflict counts as it's fairly expensive + to compute. */ + if ((*pp2)->conflict_count == 0) + initialize_conflict_count (*pp2, conflicts_, map_); + if ((*pp1)->conflict_count == 0) + initialize_conflict_count (*pp1, conflicts_, map_); + + result = (*pp2)->conflict_count - (*pp1)->conflict_count; + } + + /* And if everything else is equal, then sort based on which + coalesce pair was found first. */ + if (result == 0) + result = (*pp2)->index - (*pp1)->index; + } return result; } @@ -374,7 +448,7 @@ compare_pairs (const void *p1, const void *p2) in order from most important coalesce to least important. */ static void -sort_coalesce_list (coalesce_list *cl) +sort_coalesce_list (coalesce_list *cl, ssa_conflicts *conflicts, var_map map) { unsigned x, num; coalesce_pair *p; @@ -400,19 +474,14 @@ sort_coalesce_list (coalesce_list *cl) if (num == 1) return; - /* If there are only 2, just pick swap them if the order isn't correct. */ - if (num == 2) - { - if (cl->sorted[0]->cost > cl->sorted[1]->cost) - std::swap (cl->sorted[0], cl->sorted[1]); - return; - } - - /* Only call qsort if there are more than 2 items. - ??? Maybe std::sort will do better, provided that compare_pairs - can be inlined. */ - if (num > 2) - qsort (cl->sorted, num, sizeof (coalesce_pair *), compare_pairs); + /* We don't want to depend on qsort_r, so we have to stuff away + additional data into globals so it can be referenced in + compare_pairs. */ + conflicts_ = conflicts; + map_ = map; + qsort (cl->sorted, num, sizeof (coalesce_pair *), compare_pairs); + conflicts_ = NULL; + map_ = NULL; } @@ -437,7 +506,7 @@ dump_coalesce_list (FILE *f, coalesce_list *cl) print_generic_expr (f, var1, TDF_SLIM); fprintf (f, " <-> "); print_generic_expr (f, var2, TDF_SLIM); - fprintf (f, " (%1d), ", node->cost); + fprintf (f, " (%1d, %1d), ", node->cost, node->conflict_count); fprintf (f, "\n"); } } @@ -447,7 +516,7 @@ dump_coalesce_list (FILE *f, coalesce_list *cl) for (x = cl->num_sorted - 1 ; x >=0; x--) { node = cl->sorted[x]; - fprintf (f, "(%d) ", node->cost); + fprintf (f, "(%d, %d) ", node->cost, node->conflict_count); var = ssa_name (node->first_element); print_generic_expr (f, var, TDF_SLIM); fprintf (f, " <-> "); @@ -459,16 +528,6 @@ dump_coalesce_list (FILE *f, coalesce_list *cl) } -/* This represents a conflict graph. Implemented as an array of bitmaps. - A full matrix is used for conflicts rather than just upper triangular form. - this make sit much simpler and faster to perform conflict merges. */ - -struct ssa_conflicts -{ - bitmap_obstack obstack; /* A place to allocate our bitmaps. */ - vec conflicts; -}; - /* Return an empty new conflict graph for SIZE elements. */ static inline ssa_conflicts * @@ -1800,7 +1859,7 @@ coalesce_ssa_name (void) if (dump_file && (dump_flags & TDF_DETAILS)) ssa_conflicts_dump (dump_file, graph); - sort_coalesce_list (cl); + sort_coalesce_list (cl, graph, map); if (dump_file && (dump_flags & TDF_DETAILS)) { -- 2.7.4