From 4efd8968f8bf48265f18022b4eda5e7a99c24445 Mon Sep 17 00:00:00 2001 From: Richard Biener Date: Wed, 17 Jul 2019 11:21:49 +0000 Subject: [PATCH] re PR tree-optimization/91178 (Infinite recursion in split_constant_offset in slp after r260289) 2019-07-17 Richard Biener PR tree-optimization/91178 * tree-ssa.c (release_defs_bitset): Iterate from higher to lower SSA names to avoid quadratic behavior in the common case. * tree-data-ref.c (split_constant_offset): Add limit argument and pass it down. Initialize it from PARAM_SSA_NAME_DEF_CHAIN_LIMIT. (split_constant_offset_1): Add limit argument and use it to limit SSA def walking. Optimize the common plus/minus case. From-SVN: r273550 --- gcc/ChangeLog | 10 ++++++++++ gcc/tree-data-ref.c | 39 +++++++++++++++++++++++++++------------ gcc/tree-ssa.c | 36 +++++++++++++++++++++--------------- 3 files changed, 58 insertions(+), 27 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index b15db2d..f13d8a6 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,6 +1,16 @@ 2019-07-17 Richard Biener PR tree-optimization/91178 + * tree-ssa.c (release_defs_bitset): Iterate from higher to + lower SSA names to avoid quadratic behavior in the common case. + * tree-data-ref.c (split_constant_offset): Add limit argument + and pass it down. Initialize it from PARAM_SSA_NAME_DEF_CHAIN_LIMIT. + (split_constant_offset_1): Add limit argument and use it to + limit SSA def walking. Optimize the common plus/minus case. + +2019-07-17 Richard Biener + + PR tree-optimization/91178 * tree-vect-stmts.c (get_group_load_store_type): For SLP loads with a gap larger than the vector size always use VMAT_STRIDED_SLP. diff --git a/gcc/tree-data-ref.c b/gcc/tree-data-ref.c index df1a7b8..7f75b7e 100644 --- a/gcc/tree-data-ref.c +++ b/gcc/tree-data-ref.c @@ -583,7 +583,8 @@ debug_ddrs (vec ddrs) static void split_constant_offset (tree exp, tree *var, tree *off, - hash_map > &cache); + hash_map > &cache, + unsigned *limit); /* Helper function for split_constant_offset. Expresses OP0 CODE OP1 (the type of the result is TYPE) as VAR + OFF, where OFF is a nonzero @@ -594,7 +595,8 @@ split_constant_offset (tree exp, tree *var, tree *off, static bool split_constant_offset_1 (tree type, tree op0, enum tree_code code, tree op1, tree *var, tree *off, - hash_map > &cache) + hash_map > &cache, + unsigned *limit) { tree var0, var1; tree off0, off1; @@ -615,8 +617,15 @@ split_constant_offset_1 (tree type, tree op0, enum tree_code code, tree op1, /* FALLTHROUGH */ case PLUS_EXPR: case MINUS_EXPR: - split_constant_offset (op0, &var0, &off0, cache); - split_constant_offset (op1, &var1, &off1, cache); + if (TREE_CODE (op1) == INTEGER_CST) + { + split_constant_offset (op0, &var0, &off0, cache, limit); + *var = var0; + *off = size_binop (ocode, off0, fold_convert (ssizetype, op1)); + return true; + } + split_constant_offset (op0, &var0, &off0, cache, limit); + split_constant_offset (op1, &var1, &off1, cache, limit); *var = fold_build2 (code, type, var0, var1); *off = size_binop (ocode, off0, off1); return true; @@ -625,7 +634,7 @@ split_constant_offset_1 (tree type, tree op0, enum tree_code code, tree op1, if (TREE_CODE (op1) != INTEGER_CST) return false; - split_constant_offset (op0, &var0, &off0, cache); + split_constant_offset (op0, &var0, &off0, cache, limit); *var = fold_build2 (MULT_EXPR, type, var0, op1); *off = size_binop (MULT_EXPR, off0, fold_convert (ssizetype, op1)); return true; @@ -649,7 +658,7 @@ split_constant_offset_1 (tree type, tree op0, enum tree_code code, tree op1, if (poffset) { - split_constant_offset (poffset, &poffset, &off1, cache); + split_constant_offset (poffset, &poffset, &off1, cache, limit); off0 = size_binop (PLUS_EXPR, off0, off1); if (POINTER_TYPE_P (TREE_TYPE (base))) base = fold_build_pointer_plus (base, poffset); @@ -719,11 +728,15 @@ split_constant_offset_1 (tree type, tree op0, enum tree_code code, tree op1, e = std::make_pair (op0, ssize_int (0)); } + if (*limit == 0) + return false; + --*limit; + var0 = gimple_assign_rhs1 (def_stmt); var1 = gimple_assign_rhs2 (def_stmt); bool res = split_constant_offset_1 (type, var0, subcode, var1, - var, off, cache); + var, off, cache, limit); if (res && use_cache) *cache.get (op0) = std::make_pair (*var, *off); return res; @@ -746,7 +759,7 @@ split_constant_offset_1 (tree type, tree op0, enum tree_code code, tree op1, /* Split the unconverted operand and try to prove that wrapping isn't a problem. */ tree tmp_var, tmp_off; - split_constant_offset (op0, &tmp_var, &tmp_off, cache); + split_constant_offset (op0, &tmp_var, &tmp_off, cache, limit); /* See whether we have an SSA_NAME whose range is known to be [A, B]. */ @@ -781,7 +794,7 @@ split_constant_offset_1 (tree type, tree op0, enum tree_code code, tree op1, *off = wide_int_to_tree (ssizetype, diff); } else - split_constant_offset (op0, &var0, off, cache); + split_constant_offset (op0, &var0, off, cache, limit); *var = fold_convert (type, var0); return true; } @@ -798,7 +811,8 @@ split_constant_offset_1 (tree type, tree op0, enum tree_code code, tree op1, static void split_constant_offset (tree exp, tree *var, tree *off, - hash_map > &cache) + hash_map > &cache, + unsigned *limit) { tree type = TREE_TYPE (exp), op0, op1, e, o; enum tree_code code; @@ -812,7 +826,7 @@ split_constant_offset (tree exp, tree *var, tree *off, code = TREE_CODE (exp); extract_ops_from_tree (exp, &code, &op0, &op1); - if (split_constant_offset_1 (type, op0, code, op1, &e, &o, cache)) + if (split_constant_offset_1 (type, op0, code, op1, &e, &o, cache, limit)) { *var = e; *off = o; @@ -822,10 +836,11 @@ split_constant_offset (tree exp, tree *var, tree *off, void split_constant_offset (tree exp, tree *var, tree *off) { + unsigned limit = PARAM_VALUE (PARAM_SSA_NAME_DEF_CHAIN_LIMIT); static hash_map > *cache; if (!cache) cache = new hash_map > (37); - split_constant_offset (exp, var, off, *cache); + split_constant_offset (exp, var, off, *cache, &limit); cache->empty (); } diff --git a/gcc/tree-ssa.c b/gcc/tree-ssa.c index 16eaa8e..b4b5c90 100644 --- a/gcc/tree-ssa.c +++ b/gcc/tree-ssa.c @@ -559,20 +559,25 @@ release_defs_bitset (bitmap toremove) /* Performing a topological sort is probably overkill, this will most likely run in slightly superlinear time, rather than the - pathological quadratic worst case. */ + pathological quadratic worst case. + But iterate from max SSA name version to min one because + that mimics allocation order during code generation behavior best. + Use an array for this which we compact on-the-fly with a NULL + marker moving towards the end of the vector. */ + auto_vec names; + names.reserve (bitmap_count_bits (toremove) + 1); + names.quick_push (NULL_TREE); + EXECUTE_IF_SET_IN_BITMAP (toremove, 0, j, bi) + names.quick_push (ssa_name (j)); + + bitmap_tree_view (toremove); while (!bitmap_empty_p (toremove)) { - unsigned to_remove_bit = -1U; - EXECUTE_IF_SET_IN_BITMAP (toremove, 0, j, bi) + j = names.length () - 1; + for (unsigned i = names.length () - 1; names[i];) { - if (to_remove_bit != -1U) - { - bitmap_clear_bit (toremove, to_remove_bit); - to_remove_bit = -1U; - } - bool remove_now = true; - tree var = ssa_name (j); + tree var = names[i]; gimple *stmt; imm_use_iterator uit; @@ -617,14 +622,15 @@ release_defs_bitset (bitmap toremove) gsi_remove (&gsi, true); release_defs (def); } - - to_remove_bit = j; + bitmap_clear_bit (toremove, SSA_NAME_VERSION (var)); } + else + --i; + if (--j != i) + names[i] = names[j]; } - if (to_remove_bit != -1U) - bitmap_clear_bit (toremove, to_remove_bit); } - + bitmap_list_view (toremove); } /* Disable warnings about missing quoting in GCC diagnostics for -- 2.7.4