From fc7b4248172561a9ee310e2d43d8a485a5c9e108 Mon Sep 17 00:00:00 2001 From: Richard Biener Date: Fri, 11 Dec 2020 10:52:58 +0100 Subject: [PATCH] tree-optimization/98235 - limit SLP discovery With following backedges and the SLP discovery cache not being permute aware we have to put some discovery limits in place again. That's also the opportunity to ditch the separate limit on the number of permutes we try, so the patch limits the overall work done (as in vect_build_slp_tree cache misses) to what we compute as max_tree_size which is based on the number of scalar stmts in the vectorized region. Note the limit is global and there's no attempt to divide the allowed work evenly amongst opportunities, so one degenerate can eat it all up. That's probably only relevant for BB vectorization where the limit is based on up to the size of the whole function. 2020-12-11 Richard Biener PR tree-optimization/98235 * tree-vect-slp.c (vect_build_slp_tree): Exchange npermutes for limit. Decrement that for each cache miss and fail discovery when it reaches zero. (vect_build_slp_tree_2): Remove npermutes handling and simply pass down limit. (vect_build_slp_instance): Use pass down limit. (vect_analyze_slp_instance): Likewise. (vect_analyze_slp): Base the SLP discovery limit on max_tree_size and pass it down. * gcc.dg/torture/pr98235.c: New testcase. --- gcc/testsuite/gcc.dg/torture/pr98235.c | 34 ++++++++++++++++ gcc/tree-vect-slp.c | 74 ++++++++++++++++++++-------------- 2 files changed, 77 insertions(+), 31 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/torture/pr98235.c diff --git a/gcc/testsuite/gcc.dg/torture/pr98235.c b/gcc/testsuite/gcc.dg/torture/pr98235.c new file mode 100644 index 0000000..5f59013 --- /dev/null +++ b/gcc/testsuite/gcc.dg/torture/pr98235.c @@ -0,0 +1,34 @@ +/* { dg-do compile } */ +/* { dg-additional-options "-fallow-store-data-races" } */ + +char tcube[3][9]; +int cur_move; +void perm_cube(void) { + int i, j, k, tmp; + for (; i < cur_move; i++) + while (k-- >= 0) + switch (j) { + case 0: + tmp = tcube[0][6]; + tcube[2][8] = tcube[0][8]; + tcube[0][8] = tmp; + tmp = tcube[0][5]; + tcube[0][5] = tcube[1][8]; + tcube[1][8] = tcube[2][5]; + tcube[2][5] = tcube[1][2]; + tcube[1][2] = tcube[2][1]; + tcube[2][1] = tcube[1][0]; + tcube[0][6] = tmp; + tmp = tcube[0][3]; + tcube[0][3] = tcube[1][0]; + tcube[1][0] = tcube[2][3]; + tcube[2][3] = tcube[1][6]; + tcube[1][6] = tmp; + break; + case 5: + tmp = tcube[2][0]; + tcube[2][0] = tcube[2][2]; + tcube[2][2] = tcube[2][8]; + tcube[2][3] = tmp; + } +} diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c index e93e9c7..2d55885 100644 --- a/gcc/tree-vect-slp.c +++ b/gcc/tree-vect-slp.c @@ -1375,14 +1375,14 @@ static slp_tree vect_build_slp_tree_2 (vec_info *vinfo, slp_tree node, vec stmts, unsigned int group_size, poly_uint64 *max_nunits, - bool *matches, unsigned *npermutes, unsigned *tree_size, + bool *matches, unsigned *limit, unsigned *tree_size, scalar_stmts_to_slp_tree_map_t *bst_map); static slp_tree vect_build_slp_tree (vec_info *vinfo, vec stmts, unsigned int group_size, poly_uint64 *max_nunits, - bool *matches, unsigned *npermutes, unsigned *tree_size, + bool *matches, unsigned *limit, unsigned *tree_size, scalar_stmts_to_slp_tree_map_t *bst_map) { if (slp_tree *leader = bst_map->get (stmts)) @@ -1405,10 +1405,26 @@ vect_build_slp_tree (vec_info *vinfo, SLP_TREE_SCALAR_STMTS (res) = stmts; bst_map->put (stmts.copy (), res); + if (*limit == 0) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_NOTE, vect_location, + "SLP discovery limit exceeded\n"); + bool existed_p = bst_map->put (stmts, NULL); + gcc_assert (existed_p); + /* Mark the node invalid so we can detect those when still in use + as backedge destinations. */ + SLP_TREE_SCALAR_STMTS (res) = vNULL; + SLP_TREE_DEF_TYPE (res) = vect_uninitialized_def; + vect_free_slp_tree (res); + return NULL; + } + --*limit; + poly_uint64 this_max_nunits = 1; slp_tree res_ = vect_build_slp_tree_2 (vinfo, res, stmts, group_size, &this_max_nunits, - matches, npermutes, tree_size, bst_map); + matches, limit, tree_size, bst_map); if (!res_) { bool existed_p = bst_map->put (stmts, NULL); @@ -1441,7 +1457,7 @@ static slp_tree vect_build_slp_tree_2 (vec_info *vinfo, slp_tree node, vec stmts, unsigned int group_size, poly_uint64 *max_nunits, - bool *matches, unsigned *npermutes, unsigned *tree_size, + bool *matches, unsigned *limit, unsigned *tree_size, scalar_stmts_to_slp_tree_map_t *bst_map) { unsigned nops, i, this_tree_size = 0; @@ -1687,7 +1703,7 @@ vect_build_slp_tree_2 (vec_info *vinfo, slp_tree node, if ((child = vect_build_slp_tree (vinfo, oprnd_info->def_stmts, group_size, &this_max_nunits, - matches, npermutes, + matches, limit, &this_tree_size, bst_map)) != NULL) { oprnd_info->def_stmts = vNULL; @@ -1708,12 +1724,7 @@ vect_build_slp_tree_2 (vec_info *vinfo, slp_tree node, && is_gimple_assign (stmt_info->stmt) /* Swapping operands for reductions breaks assumptions later on. */ && STMT_VINFO_DEF_TYPE (stmt_info) != vect_reduction_def - && STMT_VINFO_DEF_TYPE (stmt_info) != vect_double_reduction_def - /* Do so only if the number of not successful permutes was nor more - than a cut-ff as re-trying the recursive match on - possibly each level of the tree would expose exponential - behavior. */ - && *npermutes < 4) + && STMT_VINFO_DEF_TYPE (stmt_info) != vect_double_reduction_def) { /* See whether we can swap the matching or the non-matching stmt operands. */ @@ -1759,17 +1770,13 @@ vect_build_slp_tree_2 (vec_info *vinfo, slp_tree node, bool *tem = XALLOCAVEC (bool, group_size); if ((child = vect_build_slp_tree (vinfo, oprnd_info->def_stmts, group_size, &this_max_nunits, - tem, npermutes, + tem, limit, &this_tree_size, bst_map)) != NULL) { oprnd_info->def_stmts = vNULL; children.safe_push (child); continue; } - /* We do not undo the swapping here since it might still be - the better order for the second operand in case we build - the first one from scalars below. */ - ++*npermutes; } fail: @@ -2213,7 +2220,7 @@ static bool vect_analyze_slp_instance (vec_info *vinfo, scalar_stmts_to_slp_tree_map_t *bst_map, stmt_vec_info stmt_info, slp_instance_kind kind, - unsigned max_tree_size); + unsigned max_tree_size, unsigned *limit); /* Analyze an SLP instance starting from SCALAR_STMTS which are a group of KIND. Return true if successful. */ @@ -2223,7 +2230,7 @@ vect_build_slp_instance (vec_info *vinfo, slp_instance_kind kind, vec &scalar_stmts, stmt_vec_info root_stmt_info, - unsigned max_tree_size, + unsigned max_tree_size, unsigned *limit, scalar_stmts_to_slp_tree_map_t *bst_map, /* ??? We need stmt_info for group splitting. */ stmt_vec_info stmt_info_) @@ -2240,12 +2247,11 @@ vect_build_slp_instance (vec_info *vinfo, /* Build the tree for the SLP instance. */ unsigned int group_size = scalar_stmts.length (); bool *matches = XALLOCAVEC (bool, group_size); - unsigned npermutes = 0; poly_uint64 max_nunits = 1; unsigned tree_size = 0; unsigned i; slp_tree node = vect_build_slp_tree (vinfo, scalar_stmts, group_size, - &max_nunits, matches, &npermutes, + &max_nunits, matches, limit, &tree_size, bst_map); if (node != NULL) { @@ -2413,7 +2419,8 @@ vect_build_slp_instance (vec_info *vinfo, stmt_vec_info rest = vect_split_slp_store_group (stmt_info, group1_size); bool res = vect_analyze_slp_instance (vinfo, bst_map, stmt_info, - kind, max_tree_size); + kind, max_tree_size, + limit); /* Split the rest at the failure point and possibly re-analyze the remaining matching part if it has at least two lanes. */ @@ -2425,13 +2432,15 @@ vect_build_slp_instance (vec_info *vinfo, rest = vect_split_slp_store_group (rest, i - group1_size); if (i - group1_size > 1) res |= vect_analyze_slp_instance (vinfo, bst_map, rest2, - kind, max_tree_size); + kind, max_tree_size, + limit); } /* Re-analyze the non-matching tail if it has at least two lanes. */ if (i + 1 < group_size) res |= vect_analyze_slp_instance (vinfo, bst_map, - rest, kind, max_tree_size); + rest, kind, max_tree_size, + limit); return res; } } @@ -2456,10 +2465,10 @@ vect_build_slp_instance (vec_info *vinfo, DR_GROUP_GAP (stmt_info) = 0; bool res = vect_analyze_slp_instance (vinfo, bst_map, stmt_info, - kind, max_tree_size); + kind, max_tree_size, limit); if (i + 1 < group_size) res |= vect_analyze_slp_instance (vinfo, bst_map, - rest, kind, max_tree_size); + rest, kind, max_tree_size, limit); return res; } @@ -2484,7 +2493,7 @@ vect_analyze_slp_instance (vec_info *vinfo, scalar_stmts_to_slp_tree_map_t *bst_map, stmt_vec_info stmt_info, slp_instance_kind kind, - unsigned max_tree_size) + unsigned max_tree_size, unsigned *limit) { unsigned int i; vec scalar_stmts; @@ -2556,7 +2565,7 @@ vect_analyze_slp_instance (vec_info *vinfo, bool res = vect_build_slp_instance (vinfo, kind, scalar_stmts, kind == slp_inst_kind_ctor ? stmt_info : NULL, - max_tree_size, bst_map, + max_tree_size, limit, bst_map, kind == slp_inst_kind_store ? stmt_info : NULL); @@ -2577,6 +2586,8 @@ vect_analyze_slp (vec_info *vinfo, unsigned max_tree_size) DUMP_VECT_SCOPE ("vect_analyze_slp"); + unsigned limit = max_tree_size; + scalar_stmts_to_slp_tree_map_t *bst_map = new scalar_stmts_to_slp_tree_map_t (); @@ -2585,7 +2596,7 @@ vect_analyze_slp (vec_info *vinfo, unsigned max_tree_size) vect_analyze_slp_instance (vinfo, bst_map, first_element, STMT_VINFO_GROUPED_ACCESS (first_element) ? slp_inst_kind_store : slp_inst_kind_ctor, - max_tree_size); + max_tree_size, &limit); if (bb_vec_info bb_vinfo = dyn_cast (vinfo)) { @@ -2595,7 +2606,7 @@ vect_analyze_slp (vec_info *vinfo, unsigned max_tree_size) if (vect_build_slp_instance (bb_vinfo, bb_vinfo->roots[i].kind, bb_vinfo->roots[i].stmts, bb_vinfo->roots[i].root, - max_tree_size, bst_map, NULL)) + max_tree_size, &limit, bst_map, NULL)) bb_vinfo->roots[i].stmts = vNULL; } } @@ -2609,7 +2620,7 @@ vect_analyze_slp (vec_info *vinfo, unsigned max_tree_size) ; else if (! vect_analyze_slp_instance (vinfo, bst_map, first_element, slp_inst_kind_reduc_chain, - max_tree_size)) + max_tree_size, &limit)) { /* Dissolve reduction chain group. */ stmt_vec_info vinfo = first_element; @@ -2630,7 +2641,8 @@ vect_analyze_slp (vec_info *vinfo, unsigned max_tree_size) /* Find SLP sequences starting from groups of reductions. */ if (loop_vinfo->reductions.length () > 1) vect_analyze_slp_instance (vinfo, bst_map, loop_vinfo->reductions[0], - slp_inst_kind_reduc_group, max_tree_size); + slp_inst_kind_reduc_group, max_tree_size, + &limit); } /* The map keeps a reference on SLP nodes built, release that. */ -- 2.7.4