From 24c271effea34f9832d5daafcd3e5178ba6ae678 Mon Sep 17 00:00:00 2001 From: irar Date: Sun, 16 Oct 2011 10:47:12 +0000 Subject: [PATCH] * tree-vect-stmts.c (vectorizable_load): For SLP without permutation treat the first load of the node as the first element in its interleaving chain. * tree-vect-slp.c (vect_get_and_check_slp_defs): Swap the operands if necessary and possible. (vect_build_slp_tree): Add new argument. Allow load groups of any size in basic blocks. Keep all the loads for further permutation check. Use the new argument to determine if there is a permutation. Update the recursive calls. (vect_supported_load_permutation_p): Allow subchains of interleaving chains in basic block vectorization. (vect_analyze_slp_instance): Update the call to vect_build_slp_tree. Check load permutation based on the new parameter. (vect_schedule_slp_instance): Don't start from the first element in interleaving chain unless the loads are permuted. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@180055 138bc75d-0d04-0410-961f-82ee72b054a4 --- gcc/ChangeLog | 18 ++++ gcc/testsuite/ChangeLog | 4 + gcc/testsuite/gcc.dg/vect/bb-slp-29.c | 59 +++++++++++ gcc/tree-vect-slp.c | 181 +++++++++++++++++++++++++++------- gcc/tree-vect-stmts.c | 5 + 5 files changed, 233 insertions(+), 34 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/vect/bb-slp-29.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 3334608..969bc1e 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,21 @@ +2011-10-16 Ira Rosen + + * tree-vect-stmts.c (vectorizable_load): For SLP without permutation + treat the first load of the node as the first element in its + interleaving chain. + * tree-vect-slp.c (vect_get_and_check_slp_defs): Swap the operands if + necessary and possible. + (vect_build_slp_tree): Add new argument. Allow load groups of any size + in basic blocks. Keep all the loads for further permutation check. + Use the new argument to determine if there is a permutation. Update + the recursive calls. + (vect_supported_load_permutation_p): Allow subchains of interleaving + chains in basic block vectorization. + (vect_analyze_slp_instance): Update the call to vect_build_slp_tree. + Check load permutation based on the new parameter. + (vect_schedule_slp_instance): Don't start from the first element in + interleaving chain unless the loads are permuted. + 2011-10-15 Jan Hubicka PR target/48668 diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index ce5705e..dbe22fe 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,7 @@ +2011-10-16 Ira Rosen + + * gcc.dg/vect/bb-slp-29.c: New test. + 2011-10-15 Paolo Carlini PR c++/50732 diff --git a/gcc/testsuite/gcc.dg/vect/bb-slp-29.c b/gcc/testsuite/gcc.dg/vect/bb-slp-29.c new file mode 100644 index 0000000..e37b96d --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/bb-slp-29.c @@ -0,0 +1,59 @@ +/* { dg-require-effective-target vect_int } */ + +#include +#include "tree-vect.h" + +#define A 3 +#define B 4 +#define N 256 + +short src[N], dst[N]; + +void foo (short * __restrict__ dst, short * __restrict__ src, int h, int stride, int dummy) +{ + int i; + h /= 16; + for (i = 0; i < h; i++) + { + dst[0] = A*src[0] + B*src[1]; + dst[1] = A*src[1] + B*src[2]; + dst[2] = A*src[2] + B*src[3]; + dst[3] = A*src[3] + B*src[4]; + dst[4] = A*src[4] + B*src[5]; + dst[5] = A*src[5] + B*src[6]; + dst[6] = A*src[6] + B*src[7]; + dst[7] = A*src[7] + B*src[8]; + dst += stride; + src += stride; + if (dummy == 32) + abort (); + } +} + + +int main (void) +{ + int i; + + check_vect (); + + for (i = 0; i < N; i++) + { + dst[i] = 0; + src[i] = i; + } + + foo (dst, src, N, 8, 0); + + for (i = 0; i < N/2; i++) + { + if (dst[i] != A * src[i] + B * src[i+1]) + abort (); + } + + return 0; +} + +/* { dg-final { scan-tree-dump-times "basic block vectorized using SLP" 1 "slp" { target { vect_int_mult && vect_element_align } } } } */ +/* { dg-final { cleanup-tree-dump "slp" } } */ + diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c index 852d94c..4fdc854 100644 --- a/gcc/tree-vect-slp.c +++ b/gcc/tree-vect-slp.c @@ -116,13 +116,15 @@ vect_get_and_check_slp_defs (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo, { tree oprnd; unsigned int i, number_of_oprnds; - tree def; + tree def[2]; gimple def_stmt; enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type}; stmt_vec_info stmt_info = vinfo_for_stmt (VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0)); enum gimple_rhs_class rhs_class; struct loop *loop = NULL; + enum tree_code rhs_code; + bool different_types = false; if (loop_vinfo) loop = LOOP_VINFO_LOOP (loop_vinfo); @@ -134,7 +136,7 @@ vect_get_and_check_slp_defs (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo, { oprnd = gimple_op (stmt, i + 1); - if (!vect_is_simple_use (oprnd, loop_vinfo, bb_vinfo, &def_stmt, &def, + if (!vect_is_simple_use (oprnd, loop_vinfo, bb_vinfo, &def_stmt, &def[i], &dt[i]) || (!def_stmt && dt[i] != vect_constant_def)) { @@ -189,11 +191,11 @@ vect_get_and_check_slp_defs (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo, switch (gimple_code (def_stmt)) { case GIMPLE_PHI: - def = gimple_phi_result (def_stmt); + def[i] = gimple_phi_result (def_stmt); break; case GIMPLE_ASSIGN: - def = gimple_assign_lhs (def_stmt); + def[i] = gimple_assign_lhs (def_stmt); break; default: @@ -207,8 +209,8 @@ vect_get_and_check_slp_defs (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo, { /* op0 of the first stmt of the group - store its info. */ *first_stmt_dt0 = dt[i]; - if (def) - *first_stmt_def0_type = TREE_TYPE (def); + if (def[i]) + *first_stmt_def0_type = TREE_TYPE (def[i]); else *first_stmt_const_oprnd = oprnd; @@ -228,8 +230,8 @@ vect_get_and_check_slp_defs (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo, { /* op1 of the first stmt of the group - store its info. */ *first_stmt_dt1 = dt[i]; - if (def) - *first_stmt_def1_type = TREE_TYPE (def); + if (def[i]) + *first_stmt_def1_type = TREE_TYPE (def[i]); else { /* We assume that the stmt contains only one constant @@ -255,24 +257,56 @@ vect_get_and_check_slp_defs (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo, && ((*first_stmt_dt0 != dt[i] && !(*first_stmt_dt0 == vect_reduction_def && dt[i] == vect_internal_def)) - || (*first_stmt_def0_type && def + || (*first_stmt_def0_type && def[0] && !types_compatible_p (*first_stmt_def0_type, - TREE_TYPE (def))))) + TREE_TYPE (def[0]))))) || (i == 1 && ((*first_stmt_dt1 != dt[i] && !(*first_stmt_dt1 == vect_reduction_def && dt[i] == vect_internal_def)) - || (*first_stmt_def1_type && def + || (*first_stmt_def1_type && def[1] && !types_compatible_p (*first_stmt_def1_type, - TREE_TYPE (def))))) - || (!def + TREE_TYPE (def[1]))))) + || (!def[i] && !types_compatible_p (TREE_TYPE (*first_stmt_const_oprnd), - TREE_TYPE (oprnd)))) + TREE_TYPE (oprnd))) + || different_types) { - if (vect_print_dump_info (REPORT_SLP)) - fprintf (vect_dump, "Build SLP failed: different types "); + if (i != number_of_oprnds - 1) + different_types = true; + else + { + if (is_gimple_assign (stmt) + && (rhs_code = gimple_assign_rhs_code (stmt)) + && TREE_CODE_CLASS (rhs_code) == tcc_binary + && commutative_tree_code (rhs_code) + && *first_stmt_dt0 == dt[1] + && *first_stmt_dt1 == dt[0] + && def[0] && def[1] + && !(*first_stmt_def0_type + && !types_compatible_p (*first_stmt_def0_type, + TREE_TYPE (def[1]))) + && !(*first_stmt_def1_type + && !types_compatible_p (*first_stmt_def1_type, + TREE_TYPE (def[0])))) + { + if (vect_print_dump_info (REPORT_SLP)) + { + fprintf (vect_dump, "Swapping operands of "); + print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM); + } + + swap_tree_operands (stmt, gimple_assign_rhs1_ptr (stmt), + gimple_assign_rhs2_ptr (stmt)); + } + else + { + if (vect_print_dump_info (REPORT_SLP)) + fprintf (vect_dump, "Build SLP failed: different types "); - return false; + return false; + } + } } } } @@ -286,10 +320,10 @@ vect_get_and_check_slp_defs (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo, case vect_internal_def: case vect_reduction_def: - if (i == 0) + if ((i == 0 && !different_types) || (i == 1 && different_types)) VEC_safe_push (gimple, heap, *def_stmts0, def_stmt); else - VEC_safe_push (gimple, heap, *def_stmts1, def_stmt); + VEC_safe_push (gimple, heap, *def_stmts1, def_stmt); break; default: @@ -297,7 +331,7 @@ vect_get_and_check_slp_defs (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo, if (vect_print_dump_info (REPORT_SLP)) { fprintf (vect_dump, "Build SLP failed: illegal type of def "); - print_generic_expr (vect_dump, def, TDF_SLIM); + print_generic_expr (vect_dump, def[i], TDF_SLIM); } return false; @@ -320,7 +354,7 @@ vect_build_slp_tree (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo, int ncopies_for_cost, unsigned int *max_nunits, VEC (int, heap) **load_permutation, VEC (slp_tree, heap) **loads, - unsigned int vectorization_factor) + unsigned int vectorization_factor, bool *loads_permuted) { VEC (gimple, heap) *def_stmts0 = VEC_alloc (gimple, heap, group_size); VEC (gimple, heap) *def_stmts1 = VEC_alloc (gimple, heap, group_size); @@ -531,7 +565,8 @@ vect_build_slp_tree (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo, /* Check that the size of interleaved loads group is not greater than the SLP group size. */ - if (GROUP_SIZE (vinfo_for_stmt (stmt)) > ncopies * group_size) + if (loop_vinfo + && GROUP_SIZE (vinfo_for_stmt (stmt)) > ncopies * group_size) { if (vect_print_dump_info (REPORT_SLP)) { @@ -652,19 +687,22 @@ vect_build_slp_tree (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo, /* Strided loads were reached - stop the recursion. */ if (stop_recursion) { + VEC_safe_push (slp_tree, heap, *loads, *node); if (permutation) { - VEC_safe_push (slp_tree, heap, *loads, *node); + + *loads_permuted = true; *inside_cost += targetm.vectorize.builtin_vectorization_cost (vec_perm, NULL, 0) * group_size; } else - { - /* We don't check here complex numbers chains, so we keep them in - LOADS for further check in vect_supported_load_permutation_p. */ + { + /* We don't check here complex numbers chains, so we set + LOADS_PERMUTED for further check in + vect_supported_load_permutation_p. */ if (rhs_code == REALPART_EXPR || rhs_code == IMAGPART_EXPR) - VEC_safe_push (slp_tree, heap, *loads, *node); + *loads_permuted = true; } return true; @@ -683,7 +721,7 @@ vect_build_slp_tree (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo, if (!vect_build_slp_tree (loop_vinfo, bb_vinfo, &left_node, group_size, inside_cost, outside_cost, ncopies_for_cost, max_nunits, load_permutation, loads, - vectorization_factor)) + vectorization_factor, loads_permuted)) return false; SLP_TREE_LEFT (*node) = left_node; @@ -701,7 +739,7 @@ vect_build_slp_tree (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo, if (!vect_build_slp_tree (loop_vinfo, bb_vinfo, &right_node, group_size, inside_cost, outside_cost, ncopies_for_cost, max_nunits, load_permutation, loads, - vectorization_factor)) + vectorization_factor, loads_permuted)) return false; SLP_TREE_RIGHT (*node) = right_node; @@ -887,8 +925,10 @@ vect_supported_load_permutation_p (slp_instance slp_instn, int group_size, bool supported, bad_permutation = false; sbitmap load_index; slp_tree node, other_complex_node; - gimple stmt, first = NULL, other_node_first; + gimple stmt, first = NULL, other_node_first, load, next_load, first_load; unsigned complex_numbers = 0; + struct data_reference *dr; + bb_vec_info bb_vinfo; /* FORNOW: permutations are only supported in SLP. */ if (!slp_instn) @@ -1050,6 +1090,76 @@ vect_supported_load_permutation_p (slp_instance slp_instn, int group_size, } } + /* In basic block vectorization we allow any subchain of an interleaving + chain. + FORNOW: not supported in loop SLP because of realignment compications. */ + bb_vinfo = STMT_VINFO_BB_VINFO (vinfo_for_stmt (stmt)); + bad_permutation = false; + /* Check that for every node in the instance teh loads form a subchain. */ + if (bb_vinfo) + { + FOR_EACH_VEC_ELT (slp_tree, SLP_INSTANCE_LOADS (slp_instn), i, node) + { + next_load = NULL; + first_load = NULL; + FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), j, load) + { + if (!first_load) + first_load = GROUP_FIRST_ELEMENT (vinfo_for_stmt (load)); + else if (first_load + != GROUP_FIRST_ELEMENT (vinfo_for_stmt (load))) + { + bad_permutation = true; + break; + } + + if (j != 0 && next_load != load) + { + bad_permutation = true; + break; + } + + next_load = GROUP_NEXT_ELEMENT (vinfo_for_stmt (load)); + } + + if (bad_permutation) + break; + } + + /* Check that the alignment of the first load in every subchain, i.e., + the first statement in every load node, is supported. */ + if (!bad_permutation) + { + FOR_EACH_VEC_ELT (slp_tree, SLP_INSTANCE_LOADS (slp_instn), i, node) + { + first_load = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0); + if (first_load + != GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_load))) + { + dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_load)); + if (vect_supportable_dr_alignment (dr, false) + == dr_unaligned_unsupported) + { + if (vect_print_dump_info (REPORT_SLP)) + { + fprintf (vect_dump, "unsupported unaligned load "); + print_gimple_stmt (vect_dump, first_load, 0, + TDF_SLIM); + } + bad_permutation = true; + break; + } + } + } + + if (!bad_permutation) + { + VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (slp_instn)); + return true; + } + } + } + /* FORNOW: the only supported permutation is 0..01..1.. of length equal to GROUP_SIZE and where each sequence of same drs is of GROUP_SIZE length as well (unless it's reduction). */ @@ -1159,6 +1269,7 @@ vect_analyze_slp_instance (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo, VEC (int, heap) *load_permutation; VEC (slp_tree, heap) *loads; struct data_reference *dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt)); + bool loads_permuted = false; if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt))) { @@ -1250,7 +1361,7 @@ vect_analyze_slp_instance (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo, if (vect_build_slp_tree (loop_vinfo, bb_vinfo, &node, group_size, &inside_cost, &outside_cost, ncopies_for_cost, &max_nunits, &load_permutation, &loads, - vectorization_factor)) + vectorization_factor, &loads_permuted)) { /* Calculate the unrolling factor based on the smallest type. */ if (max_nunits > nunits) @@ -1275,7 +1386,8 @@ vect_analyze_slp_instance (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo, SLP_INSTANCE_LOADS (new_instance) = loads; SLP_INSTANCE_FIRST_LOAD_STMT (new_instance) = NULL; SLP_INSTANCE_LOAD_PERMUTATION (new_instance) = load_permutation; - if (VEC_length (slp_tree, loads)) + + if (loads_permuted) { if (!vect_supported_load_permutation_p (new_instance, group_size, load_permutation)) @@ -2567,10 +2679,11 @@ vect_schedule_slp_instance (slp_tree node, slp_instance instance, /* Loads should be inserted before the first load. */ if (SLP_INSTANCE_FIRST_LOAD_STMT (instance) && STMT_VINFO_STRIDED_ACCESS (stmt_info) - && !REFERENCE_CLASS_P (gimple_get_lhs (stmt))) + && !REFERENCE_CLASS_P (gimple_get_lhs (stmt)) + && SLP_INSTANCE_LOAD_PERMUTATION (instance)) si = gsi_for_stmt (SLP_INSTANCE_FIRST_LOAD_STMT (instance)); else if (is_pattern_stmt_p (stmt_info)) - si = gsi_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info)); + si = gsi_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info)); else si = gsi_for_stmt (stmt); diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c index d986ff8..0df3c0c 100644 --- a/gcc/tree-vect-stmts.c +++ b/gcc/tree-vect-stmts.c @@ -4241,6 +4241,11 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt, if (strided_load) { first_stmt = GROUP_FIRST_ELEMENT (stmt_info); + if (slp + && !SLP_INSTANCE_LOAD_PERMUTATION (slp_node_instance) + && first_stmt != VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0)) + first_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0); + /* Check if the chain of loads is already vectorized. */ if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt))) { -- 2.7.4