From cad9aa150ba3ff8c1dc34bf428fc7146dce463b0 Mon Sep 17 00:00:00 2001 From: Maxim Kuvyrkov Date: Tue, 27 Jul 2010 19:48:15 +0000 Subject: [PATCH] re PR target/42495 (redundant memory load) PR target/42495 PR middle-end/42574 * basic-block.h (get_dominated_to_depth): Declare. * dominance.c (get_dominated_to_depth): New function, use get_all_dominated_blocks as a base. (get_all_dominated_blocks): Use get_dominated_to_depth. * gcse.c (occr_t, VEC (occr_t, heap)): Define. (hoist_exprs): Remove. (alloc_code_hoist_mem, free_code_hoist_mem): Update. (compute_code_hoist_vbeinout): Add debug print outs. (hoist_code): Partially rewrite, simplify. Use get_dominated_to_depth. * params.def (PARAM_MAX_HOIST_DEPTH): New parameter to avoid quadratic behavior. * params.h (MAX_HOIST_DEPTH): New macro. * doc/invoke.texi (max-hoist-depth): Document. From-SVN: r162597 --- gcc/ChangeLog | 20 ++++ gcc/basic-block.h | 2 + gcc/doc/invoke.texi | 6 ++ gcc/dominance.c | 22 ++++- gcc/gcse.c | 276 +++++++++++++++++++++++++++------------------------- gcc/params.def | 8 ++ gcc/params.h | 2 + 7 files changed, 203 insertions(+), 133 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 9a28043..8c7db8f 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,5 +1,25 @@ 2010-07-27 Maxim Kuvyrkov + PR target/42495 + PR middle-end/42574 + * basic-block.h (get_dominated_to_depth): Declare. + * dominance.c (get_dominated_to_depth): New function, use + get_all_dominated_blocks as a base. + (get_all_dominated_blocks): Use get_dominated_to_depth. + + * gcse.c (occr_t, VEC (occr_t, heap)): Define. + (hoist_exprs): Remove. + (alloc_code_hoist_mem, free_code_hoist_mem): Update. + (compute_code_hoist_vbeinout): Add debug print outs. + (hoist_code): Partially rewrite, simplify. Use get_dominated_to_depth. + + * params.def (PARAM_MAX_HOIST_DEPTH): New parameter to avoid + quadratic behavior. + * params.h (MAX_HOIST_DEPTH): New macro. + * doc/invoke.texi (max-hoist-depth): Document. + +2010-07-27 Maxim Kuvyrkov + PR rtl-optimization/40956 * config/arm/arm.c (thumb1_size_rtx_costs): Fix cost of simple constants. diff --git a/gcc/basic-block.h b/gcc/basic-block.h index 135c0c2..1bf192d 100644 --- a/gcc/basic-block.h +++ b/gcc/basic-block.h @@ -854,6 +854,8 @@ extern VEC (basic_block, heap) *get_dominated_by (enum cdi_direction, basic_bloc extern VEC (basic_block, heap) *get_dominated_by_region (enum cdi_direction, basic_block *, unsigned); +extern VEC (basic_block, heap) *get_dominated_to_depth (enum cdi_direction, + basic_block, int); extern VEC (basic_block, heap) *get_all_dominated_blocks (enum cdi_direction, basic_block); extern void add_to_dominance_info (enum cdi_direction, basic_block); diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index 0db0f59..78f81e2 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -8260,6 +8260,12 @@ the more aggressive code hoisting will be. Specifying 0 will allow all expressions to travel unrestricted distances. The default value is 3. +@item max-hoist-depth +The depth of search in the dominator tree for expressions to hoist. +This is used to avoid quadratic behavior in hoisting algorithm. +The value of 0 will avoid limiting the search, but may slow down compilation +of huge functions. The default value is 30. + @item max-unrolled-insns The maximum number of instructions that a loop should have if that loop is unrolled, and if the loop is unrolled, it determines how many times diff --git a/gcc/dominance.c b/gcc/dominance.c index 2e5f3ee..688c87b 100644 --- a/gcc/dominance.c +++ b/gcc/dominance.c @@ -784,16 +784,20 @@ get_dominated_by_region (enum cdi_direction dir, basic_block *region, } /* Returns the list of basic blocks including BB dominated by BB, in the - direction DIR. The vector will be sorted in preorder. */ + direction DIR up to DEPTH in the dominator tree. The DEPTH of zero will + produce a vector containing all dominated blocks. The vector will be sorted + in preorder. */ VEC (basic_block, heap) * -get_all_dominated_blocks (enum cdi_direction dir, basic_block bb) +get_dominated_to_depth (enum cdi_direction dir, basic_block bb, int depth) { VEC(basic_block, heap) *bbs = NULL; unsigned i; + unsigned next_level_start; i = 0; VEC_safe_push (basic_block, heap, bbs, bb); + next_level_start = 1; /* = VEC_length (basic_block, bbs); */ do { @@ -804,12 +808,24 @@ get_all_dominated_blocks (enum cdi_direction dir, basic_block bb) son; son = next_dom_son (dir, son)) VEC_safe_push (basic_block, heap, bbs, son); + + if (i == next_level_start && --depth) + next_level_start = VEC_length (basic_block, bbs); } - while (i < VEC_length (basic_block, bbs)); + while (i < next_level_start); return bbs; } +/* Returns the list of basic blocks including BB dominated by BB, in the + direction DIR. The vector will be sorted in preorder. */ + +VEC (basic_block, heap) * +get_all_dominated_blocks (enum cdi_direction dir, basic_block bb) +{ + return get_dominated_to_depth (dir, bb, 0); +} + /* Redirect all edges pointing to BB to TO. */ void redirect_immediate_dominators (enum cdi_direction dir, basic_block bb, diff --git a/gcc/gcse.c b/gcc/gcse.c index fc1013b..a60310f 100644 --- a/gcc/gcse.c +++ b/gcc/gcse.c @@ -329,6 +329,10 @@ struct occr char copied_p; }; +typedef struct occr *occr_t; +DEF_VEC_P (occr_t); +DEF_VEC_ALLOC_P (occr_t, heap); + /* Expression and copy propagation hash tables. Each hash table is an array of buckets. ??? It is known that if it were an array of entries, structure elements @@ -4163,9 +4167,6 @@ add_label_notes (rtx x, rtx insn) static sbitmap *hoist_vbein; static sbitmap *hoist_vbeout; -/* Hoistable expressions. */ -static sbitmap *hoist_exprs; - /* ??? We could compute post dominators and run this algorithm in reverse to perform tail merging, doing so would probably be more effective than the tail merging code in jump.c. @@ -4184,7 +4185,6 @@ alloc_code_hoist_mem (int n_blocks, int n_exprs) hoist_vbein = sbitmap_vector_alloc (n_blocks, n_exprs); hoist_vbeout = sbitmap_vector_alloc (n_blocks, n_exprs); - hoist_exprs = sbitmap_vector_alloc (n_blocks, n_exprs); } /* Free vars used for code hoisting analysis. */ @@ -4198,7 +4198,6 @@ free_code_hoist_mem (void) sbitmap_vector_free (hoist_vbein); sbitmap_vector_free (hoist_vbeout); - sbitmap_vector_free (hoist_exprs); free_dominance_info (CDI_DOMINATORS); } @@ -4249,7 +4248,17 @@ compute_code_hoist_vbeinout (void) } if (dump_file) - fprintf (dump_file, "hoisting vbeinout computation: %d passes\n", passes); + { + fprintf (dump_file, "hoisting vbeinout computation: %d passes\n", passes); + + FOR_EACH_BB (bb) + { + fprintf (dump_file, "vbein (%d): ", bb->index); + dump_sbitmap_file (dump_file, hoist_vbein[bb->index]); + fprintf (dump_file, "vbeout(%d): ", bb->index); + dump_sbitmap_file (dump_file, hoist_vbeout[bb->index]); + } + } } /* Top level routine to do the dataflow analysis needed by code hoisting. */ @@ -4352,6 +4361,8 @@ static int hoist_code (void) { basic_block bb, dominated; + VEC (basic_block, heap) *dom_tree_walk; + unsigned int dom_tree_walk_index; VEC (basic_block, heap) *domby; unsigned int i,j; struct expr **index_map; @@ -4360,8 +4371,6 @@ hoist_code (void) int *bb_size; int changed = 0; - sbitmap_vector_zero (hoist_exprs, last_basic_block); - /* Compute a mapping from expression number (`bitmap_index') to hash table entry. */ @@ -4400,34 +4409,72 @@ hoist_code (void) bb_size[bb->index] = to_head; } + gcc_assert (EDGE_COUNT (ENTRY_BLOCK_PTR->succs) == 1 + && (EDGE_SUCC (ENTRY_BLOCK_PTR, 0)->dest + == ENTRY_BLOCK_PTR->next_bb)); + + dom_tree_walk = get_all_dominated_blocks (CDI_DOMINATORS, + ENTRY_BLOCK_PTR->next_bb); + /* Walk over each basic block looking for potentially hoistable expressions, nothing gets hoisted from the entry block. */ - FOR_EACH_BB (bb) + for (dom_tree_walk_index = 0; + VEC_iterate (basic_block, dom_tree_walk, dom_tree_walk_index, bb); + dom_tree_walk_index++) { - int found = 0; - int insn_inserted_p; + domby = get_dominated_to_depth (CDI_DOMINATORS, bb, MAX_HOIST_DEPTH); + + if (VEC_length (basic_block, domby) == 0) + continue; - domby = get_dominated_by (CDI_DOMINATORS, bb); /* Examine each expression that is very busy at the exit of this block. These are the potentially hoistable expressions. */ for (i = 0; i < hoist_vbeout[bb->index]->n_bits; i++) { - int hoistable = 0; - if (TEST_BIT (hoist_vbeout[bb->index], i)) { + /* Current expression. */ + struct expr *expr = index_map[i]; + /* Number of occurences of EXPR that can be hoisted to BB. */ + int hoistable = 0; + /* Basic blocks that have occurences reachable from BB. */ + bitmap_head _from_bbs, *from_bbs = &_from_bbs; + /* Occurences reachable from BB. */ + VEC (occr_t, heap) *occrs_to_hoist = NULL; + /* We want to insert the expression into BB only once, so + note when we've inserted it. */ + int insn_inserted_p; + occr_t occr; + + bitmap_initialize (from_bbs, 0); + /* If an expression is computed in BB and is available at end of BB, hoist all occurences dominated by BB to BB. */ if (TEST_BIT (comp[bb->index], i)) - hoistable++; + { + occr = find_occr_in_bb (expr->antic_occr, bb); + + if (occr) + { + /* An occurence might've been already deleted + while processing a dominator of BB. */ + if (occr->deleted_p) + gcc_assert (MAX_HOIST_DEPTH > 1); + else + { + gcc_assert (NONDEBUG_INSN_P (occr->insn)); + hoistable++; + } + } + else + hoistable++; + } /* We've found a potentially hoistable expression, now we look at every block BB dominates to see if it computes the expression. */ for (j = 0; VEC_iterate (basic_block, domby, j, dominated); j++) { - struct expr *expr = index_map[i]; - struct occr *occr = NULL; int max_distance; /* Ignore self dominance. */ @@ -4439,22 +4486,25 @@ hoist_code (void) if (!TEST_BIT (antloc[dominated->index], i)) continue; - max_distance = expr->max_distance; - if (max_distance > 0) - { - struct occr *occr; - - occr = find_occr_in_bb (expr->antic_occr, dominated); - gcc_assert (occr); - - gcc_assert (NONDEBUG_INSN_P (occr->insn)); + occr = find_occr_in_bb (expr->antic_occr, dominated); + gcc_assert (occr); - /* Adjust MAX_DISTANCE to account for the fact that - OCCR won't have to travel all of DOMINATED, but - only part of it. */ - max_distance += (bb_size[dominated->index] - - to_bb_head[INSN_UID (occr->insn)]); + /* An occurence might've been already deleted + while processing a dominator of BB. */ + if (occr->deleted_p) + { + gcc_assert (MAX_HOIST_DEPTH > 1); + continue; } + gcc_assert (NONDEBUG_INSN_P (occr->insn)); + + max_distance = expr->max_distance; + if (max_distance > 0) + /* Adjust MAX_DISTANCE to account for the fact that + OCCR won't have to travel all of DOMINATED, but + only part of it. */ + max_distance += (bb_size[dominated->index] + - to_bb_head[INSN_UID (occr->insn)]); /* Note if the expression would reach the dominated block unimpared if it was placed at the end of BB. @@ -4463,11 +4513,16 @@ hoist_code (void) from a dominated block into BB. */ if (hoist_expr_reaches_here_p (bb, i, dominated, NULL, max_distance, bb_size)) - hoistable++; + { + hoistable++; + VEC_safe_push (occr_t, heap, + occrs_to_hoist, occr); + bitmap_set_bit (from_bbs, dominated->index); + } } /* If we found more than one hoistable occurrence of this - expression, then note it in the bitmap of expressions to + expression, then note it in the vector of expressions to hoist. It makes no sense to hoist things which are computed in only one BB, and doing so tends to pessimize register allocation. One could increase this value to try harder @@ -4478,115 +4533,76 @@ hoist_code (void) to nullify any benefit we get from code hoisting. */ if (hoistable > 1 && dbg_cnt (hoist_insn)) { - SET_BIT (hoist_exprs[bb->index], i); - found = 1; + /* If (hoistable != VEC_length), then there is + an occurence of EXPR in BB itself. Don't waste + time looking for LCA in this case. */ + if ((unsigned) hoistable + == VEC_length (occr_t, occrs_to_hoist)) + { + basic_block lca; + + lca = nearest_common_dominator_for_set (CDI_DOMINATORS, + from_bbs); + if (lca != bb) + /* Punt, it's better to hoist these occurences to + LCA. */ + VEC_free (occr_t, heap, occrs_to_hoist); + } } - } - } - /* If we found nothing to hoist, then quit now. */ - if (! found) - { - VEC_free (basic_block, heap, domby); - continue; - } + else + /* Punt, no point hoisting a single occurence. */ + VEC_free (occr_t, heap, occrs_to_hoist); - /* Loop over all the hoistable expressions. */ - for (i = 0; i < hoist_exprs[bb->index]->n_bits; i++) - { - /* We want to insert the expression into BB only once, so - note when we've inserted it. */ - insn_inserted_p = 0; + insn_inserted_p = 0; - /* These tests should be the same as the tests above. */ - if (TEST_BIT (hoist_exprs[bb->index], i)) - { - /* We've found a potentially hoistable expression, now - we look at every block BB dominates to see if it - computes the expression. */ - for (j = 0; VEC_iterate (basic_block, domby, j, dominated); j++) + /* Walk through occurences of I'th expressions we want + to hoist to BB and make the transformations. */ + for (j = 0; + VEC_iterate (occr_t, occrs_to_hoist, j, occr); + j++) { - struct expr *expr = index_map[i]; - int max_distance; - - /* Ignore self dominance. */ - if (bb == dominated) - continue; - - /* We've found a dominated block, now see if it computes - the busy expression and whether or not moving that - expression to the "beginning" of that block is safe. */ - if (!TEST_BIT (antloc[dominated->index], i)) - continue; - - max_distance = expr->max_distance; - if (max_distance > 0) + rtx insn; + rtx set; + + gcc_assert (!occr->deleted_p); + + insn = occr->insn; + set = single_set (insn); + gcc_assert (set); + + /* Create a pseudo-reg to store the result of reaching + expressions into. Get the mode for the new pseudo + from the mode of the original destination pseudo. + + It is important to use new pseudos whenever we + emit a set. This will allow reload to use + rematerialization for such registers. */ + if (!insn_inserted_p) + expr->reaching_reg + = gen_reg_rtx_and_attrs (SET_DEST (set)); + + gcse_emit_move_after (expr->reaching_reg, SET_DEST (set), + insn); + delete_insn (insn); + occr->deleted_p = 1; + changed = 1; + gcse_subst_count++; + + if (!insn_inserted_p) { - occr = find_occr_in_bb (expr->antic_occr, dominated); - gcc_assert (occr); - - gcc_assert (NONDEBUG_INSN_P (occr->insn)); - - /* Adjust MAX_DISTANCE to account for the fact that - OCCR won't have to travel all of DOMINATED, but - only part of it. */ - max_distance += (bb_size[dominated->index] - - to_bb_head[INSN_UID (occr->insn)]); - } - - /* The expression is computed in the dominated block and - it would be safe to compute it at the start of the - dominated block. Now we have to determine if the - expression would reach the dominated block if it was - placed at the end of BB. - Note: the fact that hoist_exprs has i-th bit set means - that /some/, not necesserilly all, occurences from - the dominated blocks can be hoisted to BB. Here we check - if a specific occurence can be hoisted to BB. */ - if (hoist_expr_reaches_here_p (bb, i, dominated, NULL, - max_distance, bb_size)) - { - rtx insn; - rtx set; - - if (!occr) - { - occr = find_occr_in_bb (expr->antic_occr, dominated); - gcc_assert (occr); - } - - insn = occr->insn; - set = single_set (insn); - gcc_assert (set); - - /* Create a pseudo-reg to store the result of reaching - expressions into. Get the mode for the new pseudo - from the mode of the original destination pseudo. - - It is important to use new pseudos whenever we - emit a set. This will allow reload to use - rematerialization for such registers. */ - if (!insn_inserted_p) - expr->reaching_reg - = gen_reg_rtx_and_attrs (SET_DEST (set)); - - gcse_emit_move_after (expr->reaching_reg, SET_DEST (set), insn); - delete_insn (insn); - occr->deleted_p = 1; - changed = 1; - gcse_subst_count++; - - if (!insn_inserted_p) - { - insert_insn_end_basic_block (index_map[i], bb); - insn_inserted_p = 1; - } + insert_insn_end_basic_block (expr, bb); + insn_inserted_p = 1; } } + + VEC_free (occr_t, heap, occrs_to_hoist); + bitmap_clear (from_bbs); } } VEC_free (basic_block, heap, domby); } + VEC_free (basic_block, heap, dom_tree_walk); free (bb_size); free (to_bb_head); free (index_map); diff --git a/gcc/params.def b/gcc/params.def index ada50f6..1d4e687 100644 --- a/gcc/params.def +++ b/gcc/params.def @@ -240,6 +240,14 @@ DEFPARAM(PARAM_GCSE_UNRESTRICTED_COST, "Cost at which GCSE optimizations will not constraint the distance an expression can travel", 3, 0, 0) +/* How deep from a given basic block the dominator tree should be searched + for expressions to hoist to the block. The value of 0 will avoid limiting + the search. */ +DEFPARAM(PARAM_MAX_HOIST_DEPTH, + "max-hoist-depth", + "Maximum depth of search in the dominator tree for expressions to hoist", + 30, 0, 0) + /* This parameter limits the number of insns in a loop that will be unrolled, and by how much the loop is unrolled. diff --git a/gcc/params.h b/gcc/params.h index 174edc1..aa96c81 100644 --- a/gcc/params.h +++ b/gcc/params.h @@ -129,6 +129,8 @@ typedef enum compiler_param PARAM_VALUE (PARAM_GCSE_COST_DISTANCE_RATIO) #define GCSE_UNRESTRICTED_COST \ PARAM_VALUE (PARAM_GCSE_UNRESTRICTED_COST) +#define MAX_HOIST_DEPTH \ + PARAM_VALUE (PARAM_MAX_HOIST_DEPTH) #define MAX_UNROLLED_INSNS \ PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) #define MAX_SMS_LOOP_NUMBER \ -- 2.7.4