From 46a58c779af3055a4b10b285a1f4be28abe4351c Mon Sep 17 00:00:00 2001 From: Richard Biener Date: Fri, 4 Sep 2020 14:35:39 +0200 Subject: [PATCH] tree-optimization/96920 - another ICE when vectorizing nested cycles This refines the previous fix for PR96698 by re-doing how and where we arrange for setting vectorized cycle PHI backedge values. 2020-09-04 Richard Biener PR tree-optimization/96698 PR tree-optimization/96920 * tree-vectorizer.h (loop_vec_info::reduc_latch_defs): Remove. (loop_vec_info::reduc_latch_slp_defs): Likewise. * tree-vect-stmts.c (vect_transform_stmt): Remove vectorized cycle PHI latch code. * tree-vect-loop.c (maybe_set_vectorized_backedge_value): New helper to set vectorized cycle PHI latch values. (vect_transform_loop): Walk over all PHIs again after vectorizing them, calling maybe_set_vectorized_backedge_value. Call maybe_set_vectorized_backedge_value for each vectorized stmt. Remove delayed update code. * tree-vect-slp.c (vect_analyze_slp_instance): Initialize SLP instance reduc_phis member. (vect_schedule_slp): Set vectorized cycle PHI latch values. * gfortran.dg/vect/pr96920.f90: New testcase. * gcc.dg/vect/pr96920.c: Likewise. --- gcc/testsuite/gcc.dg/vect/pr96920.c | 20 ++++++ gcc/testsuite/gfortran.dg/vect/pr96920.f90 | 37 +++++++++++ gcc/tree-vect-loop.c | 102 +++++++++++++++++++---------- gcc/tree-vect-slp.c | 20 ++++++ gcc/tree-vect-stmts.c | 27 -------- gcc/tree-vectorizer.h | 5 -- 6 files changed, 144 insertions(+), 67 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/vect/pr96920.c create mode 100644 gcc/testsuite/gfortran.dg/vect/pr96920.f90 diff --git a/gcc/testsuite/gcc.dg/vect/pr96920.c b/gcc/testsuite/gcc.dg/vect/pr96920.c new file mode 100644 index 0000000..af5da4a --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/pr96920.c @@ -0,0 +1,20 @@ +/* { dg-do compile } */ + +int a[1024]; +int b[2048]; + +void foo (int x, int y) +{ + for (int i = 0; i < 1024; ++i) + { + int tem0 = b[2*i]; + int tem1 = b[2*i+1]; + for (int j = 0; j < 32; ++j) + { + int tem = tem0; + tem0 = tem1; + tem1 = tem; + a[i] += tem0; + } + } +} diff --git a/gcc/testsuite/gfortran.dg/vect/pr96920.f90 b/gcc/testsuite/gfortran.dg/vect/pr96920.f90 new file mode 100644 index 0000000..e1dc0ad --- /dev/null +++ b/gcc/testsuite/gfortran.dg/vect/pr96920.f90 @@ -0,0 +1,37 @@ +! { dg-do compile } + subroutine ice(npoint, nterm, x, g) + implicit none + integer norder + parameter (norder=10) + integer j + integer k + integer ii + integer nterm + integer npoint + real b(norder) + real c(norder) + real d(norder) + real x(npoint) + real g(npoint) + real gg + real prev + real prev2 + + j = 1 + 100 continue + j = j+1 + if (nterm == j) then + do ii=1,npoint + k = nterm + gg= d(k) + prev= 0.0 + do k=k-1,1,-1 + prev2= prev + prev= gg + gg = d(k)+(x(ii)-b(k))*prev-c(k+1)*prev2 + enddo + g(ii) = gg + enddo + endif + go to 100 + end diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c index 3857638..9799b3d 100644 --- a/gcc/tree-vect-loop.c +++ b/gcc/tree-vect-loop.c @@ -8555,6 +8555,42 @@ scale_profile_for_vect_loop (class loop *loop, unsigned vf) scale_bbs_frequencies (&loop->latch, 1, exit_l->probability / prob); } +/* For a vectorized stmt DEF_STMT_INFO adjust all vectorized PHI + latch edge values originally defined by it. */ + +static void +maybe_set_vectorized_backedge_value (loop_vec_info loop_vinfo, + stmt_vec_info def_stmt_info) +{ + tree def = gimple_get_lhs (vect_orig_stmt (def_stmt_info)->stmt); + if (!def || TREE_CODE (def) != SSA_NAME) + return; + stmt_vec_info phi_info; + imm_use_iterator iter; + use_operand_p use_p; + FOR_EACH_IMM_USE_FAST (use_p, iter, def) + if (gphi *phi = dyn_cast (USE_STMT (use_p))) + if (gimple_bb (phi)->loop_father->header == gimple_bb (phi) + && (phi_info = loop_vinfo->lookup_stmt (phi)) + && VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (phi_info)) + && STMT_VINFO_REDUC_TYPE (phi_info) != FOLD_LEFT_REDUCTION + && STMT_VINFO_REDUC_TYPE (phi_info) != EXTRACT_LAST_REDUCTION) + { + loop_p loop = gimple_bb (phi)->loop_father; + edge e = loop_latch_edge (loop); + if (PHI_ARG_DEF_FROM_EDGE (phi, e) == def) + { + vec &phi_defs = STMT_VINFO_VEC_STMTS (phi_info); + vec &latch_defs = STMT_VINFO_VEC_STMTS (def_stmt_info); + gcc_assert (phi_defs.length () == latch_defs.length ()); + for (unsigned i = 0; i < phi_defs.length (); ++i) + add_phi_arg (as_a (phi_defs[i]), + gimple_get_lhs (latch_defs[i]), e, + gimple_phi_arg_location (phi, e->dest_idx)); + } + } +} + /* Vectorize STMT_INFO if relevant, inserting any new instructions before GSI. When vectorizing STMT_INFO as a store, set *SEEN_STORE to its stmt_vec_info. */ @@ -8933,7 +8969,7 @@ vect_transform_loop (loop_vec_info loop_vinfo, gimple *loop_vectorized_call) for (gphi_iterator si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si)) - { + { gphi *phi = si.phi (); if (dump_enabled_p ()) dump_printf_loc (MSG_NOTE, vect_location, @@ -8968,6 +9004,27 @@ vect_transform_loop (loop_vec_info loop_vinfo, gimple *loop_vectorized_call) } } + for (gphi_iterator si = gsi_start_phis (bb); !gsi_end_p (si); + gsi_next (&si)) + { + gphi *phi = si.phi (); + stmt_info = loop_vinfo->lookup_stmt (phi); + if (!stmt_info) + continue; + + if (!STMT_VINFO_RELEVANT_P (stmt_info) + && !STMT_VINFO_LIVE_P (stmt_info)) + continue; + + if ((STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def + || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def + || STMT_VINFO_DEF_TYPE (stmt_info) == vect_double_reduction_def + || STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle + || STMT_VINFO_DEF_TYPE (stmt_info) == vect_internal_def) + && ! PURE_SLP_STMT (stmt_info)) + maybe_set_vectorized_backedge_value (loop_vinfo, stmt_info); + } + for (gimple_stmt_iterator si = gsi_start_bb (bb); !gsi_end_p (si);) { @@ -9005,9 +9062,16 @@ vect_transform_loop (loop_vec_info loop_vinfo, gimple *loop_vectorized_call) = STMT_VINFO_RELATED_STMT (stmt_info); vect_transform_loop_stmt (loop_vinfo, pat_stmt_info, &si, &seen_store); + maybe_set_vectorized_backedge_value (loop_vinfo, + pat_stmt_info); + } + else + { + vect_transform_loop_stmt (loop_vinfo, stmt_info, &si, + &seen_store); + maybe_set_vectorized_backedge_value (loop_vinfo, + stmt_info); } - vect_transform_loop_stmt (loop_vinfo, stmt_info, &si, - &seen_store); } gsi_next (&si); if (seen_store) @@ -9025,38 +9089,6 @@ vect_transform_loop (loop_vec_info loop_vinfo, gimple *loop_vectorized_call) } } - /* Fill in backedge defs of reductions. */ - for (unsigned i = 0; i < loop_vinfo->reduc_latch_defs.length (); ++i) - { - stmt_vec_info stmt_info = loop_vinfo->reduc_latch_defs[i]; - stmt_vec_info orig_stmt_info = vect_orig_stmt (stmt_info); - vec &phi_info - = STMT_VINFO_VEC_STMTS (STMT_VINFO_REDUC_DEF (orig_stmt_info)); - vec &vec_stmt - = STMT_VINFO_VEC_STMTS (stmt_info); - gcc_assert (phi_info.length () == vec_stmt.length ()); - gphi *phi - = dyn_cast (STMT_VINFO_REDUC_DEF (orig_stmt_info)->stmt); - edge e = loop_latch_edge (gimple_bb (phi_info[0])->loop_father); - for (unsigned j = 0; j < phi_info.length (); ++j) - add_phi_arg (as_a (phi_info[j]), - gimple_get_lhs (vec_stmt[j]), e, - gimple_phi_arg_location (phi, e->dest_idx)); - } - for (unsigned i = 0; i < loop_vinfo->reduc_latch_slp_defs.length (); ++i) - { - slp_tree slp_node = loop_vinfo->reduc_latch_slp_defs[i].first; - slp_tree phi_node = loop_vinfo->reduc_latch_slp_defs[i].second; - gphi *phi = as_a (SLP_TREE_SCALAR_STMTS (phi_node)[0]->stmt); - e = loop_latch_edge (gimple_bb (phi)->loop_father); - gcc_assert (SLP_TREE_VEC_STMTS (phi_node).length () - == SLP_TREE_VEC_STMTS (slp_node).length ()); - for (unsigned j = 0; j < SLP_TREE_VEC_STMTS (phi_node).length (); ++j) - add_phi_arg (as_a (SLP_TREE_VEC_STMTS (phi_node)[j]), - vect_get_slp_vect_def (slp_node, j), - e, gimple_phi_arg_location (phi, e->dest_idx)); - } - /* Stub out scalar statements that must not survive vectorization. Doing this here helps with grouped statements, or statements that are involved in patterns. */ diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c index 15e5f27..811d1b2 100644 --- a/gcc/tree-vect-slp.c +++ b/gcc/tree-vect-slp.c @@ -2244,6 +2244,7 @@ vect_analyze_slp_instance (vec_info *vinfo, SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor; SLP_INSTANCE_LOADS (new_instance) = vNULL; SLP_INSTANCE_ROOT_STMT (new_instance) = constructor ? stmt_info : NULL; + new_instance->reduc_phis = NULL; vect_gather_slp_loads (new_instance, node); if (dump_enabled_p ()) @@ -4565,6 +4566,25 @@ vect_schedule_slp (vec_info *vinfo) stmt_vec_info store_info; unsigned int j; + /* For reductions set the latch values of the vectorized PHIs. */ + if (instance->reduc_phis + && STMT_VINFO_REDUC_TYPE (SLP_TREE_REPRESENTATIVE + (instance->reduc_phis)) != FOLD_LEFT_REDUCTION + && STMT_VINFO_REDUC_TYPE (SLP_TREE_REPRESENTATIVE + (instance->reduc_phis)) != EXTRACT_LAST_REDUCTION) + { + slp_tree slp_node = root; + slp_tree phi_node = instance->reduc_phis; + gphi *phi = as_a (SLP_TREE_SCALAR_STMTS (phi_node)[0]->stmt); + edge e = loop_latch_edge (gimple_bb (phi)->loop_father); + gcc_assert (SLP_TREE_VEC_STMTS (phi_node).length () + == SLP_TREE_VEC_STMTS (slp_node).length ()); + for (unsigned j = 0; j < SLP_TREE_VEC_STMTS (phi_node).length (); ++j) + add_phi_arg (as_a (SLP_TREE_VEC_STMTS (phi_node)[j]), + vect_get_slp_vect_def (slp_node, j), + e, gimple_phi_arg_location (phi, e->dest_idx)); + } + /* Remove scalar call stmts. Do not do this for basic-block vectorization as not all uses may be vectorized. ??? Why should this be necessary? DCE should be able to diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c index 7e072a2..c733967 100644 --- a/gcc/tree-vect-stmts.c +++ b/gcc/tree-vect-stmts.c @@ -10921,33 +10921,6 @@ vect_transform_stmt (vec_info *vinfo, if (STMT_VINFO_TYPE (stmt_info) == store_vec_info_type) return is_store; - /* If this stmt defines a value used on a backedge, record it so - we can update the vectorized PHIs later. */ - stmt_vec_info orig_stmt_info = vect_orig_stmt (stmt_info); - stmt_vec_info reduc_info; - if (STMT_VINFO_REDUC_DEF (orig_stmt_info) - && vect_stmt_to_vectorize (orig_stmt_info) == stmt_info - && (reduc_info = info_for_reduction (vinfo, orig_stmt_info)) - && STMT_VINFO_REDUC_TYPE (reduc_info) != FOLD_LEFT_REDUCTION - && STMT_VINFO_REDUC_TYPE (reduc_info) != EXTRACT_LAST_REDUCTION) - { - gphi *phi; - edge e; - if (!slp_node - && (phi = dyn_cast - (STMT_VINFO_REDUC_DEF (orig_stmt_info)->stmt)) - && dominated_by_p (CDI_DOMINATORS, - gimple_bb (orig_stmt_info->stmt), gimple_bb (phi)) - && (e = loop_latch_edge (gimple_bb (phi)->loop_father)) - && (PHI_ARG_DEF_FROM_EDGE (phi, e) - == gimple_get_lhs (orig_stmt_info->stmt))) - as_a (vinfo)->reduc_latch_defs.safe_push (stmt_info); - else if (slp_node - && slp_node != slp_node_instance->reduc_phis) - as_a (vinfo)->reduc_latch_slp_defs.safe_push - (std::make_pair (slp_node, slp_node_instance->reduc_phis)); - } - /* Handle stmts whose DEF is used outside the loop-nest that is being vectorized. */ if (is_a (vinfo)) diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h index f36e2ad..8551b68 100644 --- a/gcc/tree-vectorizer.h +++ b/gcc/tree-vectorizer.h @@ -627,11 +627,6 @@ public: stmt in the chain. */ auto_vec reduction_chains; - /* The vectorized stmts defining the latch values of the reduction - they are involved with. */ - auto_vec reduc_latch_defs; - auto_vec > reduc_latch_slp_defs; - /* Cost vector for a single scalar iteration. */ auto_vec scalar_cost_vec; -- 2.7.4