From d2ad9321b602192cc1a143bab0cf984986a6ca05 Mon Sep 17 00:00:00 2001 From: irar Date: Mon, 12 Nov 2007 09:32:13 +0000 Subject: [PATCH] PR tree-optimization/33953 * tree-vect-transform.c (vectorizable_operation): In case of SLP, allocate vec_oprnds1 according to the number of created vector statements. In case of shift with scalar argument, store scalar operand for every vector statement to be created for the SLP node. Fix a comment. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@130096 138bc75d-0d04-0410-961f-82ee72b054a4 --- gcc/ChangeLog | 9 +++++++ gcc/testsuite/ChangeLog | 5 ++++ gcc/testsuite/gcc.dg/vect/pr33953.c | 35 ++++++++++++++++++++++++++ gcc/tree-vect-transform.c | 49 ++++++++++++++++++++++++++++--------- 4 files changed, 87 insertions(+), 11 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/vect/pr33953.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 0d0f62b..1093ef3 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,12 @@ +2007-11-12 Ira Rosen + + PR tree-optimization/33953 + * tree-vect-transform.c (vectorizable_operation): In case of SLP, + allocate vec_oprnds1 according to the number of created vector + statements. In case of shift with scalar argument, store scalar operand + for every vector statement to be created for the SLP node. Fix a + comment. + 2007-11-10 Steven Bosscher * gcse.c (CUID_INSN): Remove. diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index b55f3c0..476b82b 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,8 @@ +2007-11-12 Ira Rosen + + PR tree-optimization/33953 + * gcc.dg/vect/pr33953.c: New testcase. + 2007-11-11 Tom Tromey PR c++/17577: diff --git a/gcc/testsuite/gcc.dg/vect/pr33953.c b/gcc/testsuite/gcc.dg/vect/pr33953.c new file mode 100644 index 0000000..ef1e248 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/pr33953.c @@ -0,0 +1,35 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target vect_int } */ + +typedef unsigned int UINT32; + +void blockmove_NtoN_blend_noremap32 (const UINT32 *srcdata, int srcwidth, + int srcheight, int srcmodulo, + UINT32 *dstdata, int dstmodulo, + int srcshift) +{ + UINT32 *end; + + while (srcheight) + { + while (dstdata <= end - 8) + { + dstdata[0] |= srcdata[0] << srcshift; + dstdata[1] |= srcdata[1] << srcshift; + dstdata[2] |= srcdata[2] << srcshift; + dstdata[3] |= srcdata[3] << srcshift; + dstdata[4] |= srcdata[4] << srcshift; + dstdata[5] |= srcdata[5] << srcshift; + dstdata[6] |= srcdata[6] << srcshift; + dstdata[7] |= srcdata[7] << srcshift; + dstdata += 8; + srcdata += 8; + } + } +} + +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" } } */ +/* { dg-final { cleanup-tree-dump "vect" } } */ + + diff --git a/gcc/tree-vect-transform.c b/gcc/tree-vect-transform.c index 736f0c9..faf3b3a 100644 --- a/gcc/tree-vect-transform.c +++ b/gcc/tree-vect-transform.c @@ -3785,6 +3785,8 @@ vectorizable_operation (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt, int j, i; VEC(tree,heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL; tree vop0, vop1; + unsigned int k; + bool scalar_shift_arg = false; /* FORNOW: SLP with multiple types is not supported. The SLP analysis verifies this, so we can safely override NCOPIES with 1 here. */ @@ -3901,14 +3903,18 @@ vectorizable_operation (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt, /* Invariant argument is needed for a vector shift by a scalar shift operand. */ optab_op2_mode = insn_data[icode].operand[2].mode; - if (! (VECTOR_MODE_P (optab_op2_mode) - || dt[1] == vect_constant_def - || dt[1] == vect_invariant_def)) + if (!VECTOR_MODE_P (optab_op2_mode)) { - if (vect_print_dump_info (REPORT_DETAILS)) - fprintf (vect_dump, "operand mode requires invariant argument."); - return false; - } + if (dt[1] != vect_constant_def && dt[1] != vect_invariant_def) + { + if (vect_print_dump_info (REPORT_DETAILS)) + fprintf (vect_dump, "operand mode requires invariant" + " argument."); + return false; + } + + scalar_shift_arg = true; + } } if (!vec_stmt) /* transformation not required. */ @@ -3928,10 +3934,21 @@ vectorizable_operation (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt, /* Handle def. */ vec_dest = vect_create_destination_var (scalar_dest, vectype); + /* Allocate VECs for vector operands. In case of SLP, vector operands are + created in the previous stages of the recursion, so no allocation is + needed, except for the case of shift with scalar shift argument. In that + case we store the scalar operand in VEC_OPRNDS1 for every vector stmt to + be created to vectorize the SLP group, i.e., SLP_NODE->VEC_STMTS_SIZE. + In case of loop-based vectorization we allocate VECs of size 1. We + allocate VEC_OPRNDS1 only in case of binary operation. */ if (!slp_node) - vec_oprnds0 = VEC_alloc (tree, heap, 1); - if (op_type == binary_op) - vec_oprnds1 = VEC_alloc (tree, heap, 1); + { + vec_oprnds0 = VEC_alloc (tree, heap, 1); + if (op_type == binary_op) + vec_oprnds1 = VEC_alloc (tree, heap, 1); + } + else if (scalar_shift_arg) + vec_oprnds1 = VEC_alloc (tree, heap, slp_node->vec_stmts_size); /* In case the vectorization factor (VF) is bigger than the number of elements that we can fit in a vectype (nunits), we have to generate @@ -4006,10 +4023,20 @@ vectorizable_operation (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt, fprintf (vect_dump, "operand 1 using scalar mode."); vec_oprnd1 = op1; VEC_quick_push (tree, vec_oprnds1, vec_oprnd1); + if (slp_node) + { + /* Store vec_oprnd1 for every vector stmt to be created + for SLP_NODE. We check during the analysis that all the + shift arguments are the same. + TODO: Allow different constants for different vector + stmts generated for an SLP instance. */ + for (k = 0; k < slp_node->vec_stmts_size - 1; k++) + VEC_quick_push (tree, vec_oprnds1, vec_oprnd1); + } } } - /* vec_oprnd is available if operand 1 should be of a scalar-type + /* vec_oprnd1 is available if operand 1 should be of a scalar-type (a special case for certain kind of vector shifts); otherwise, operand 1 should be of a vector type (the usual case). */ if (op_type == binary_op && !vec_oprnd1) -- 2.7.4