re PR tree-optimization/88315 (SAD and DOT_PROD SLP reductions with initial value...
authorRichard Biener <rguenther@suse.de>
Tue, 4 Dec 2018 08:23:40 +0000 (08:23 +0000)
committerRichard Biener <rguenth@gcc.gnu.org>
Tue, 4 Dec 2018 08:23:40 +0000 (08:23 +0000)
2018-12-04  Richard Biener  <rguenther@suse.de>

PR tree-optimization/88315
* tree-vect-loop.c (get_initial_defs_for_reduction): Simplify
and fix initialization vector for SAD and DOT_PROD SLP reductions.

* gcc.dg/vect/slp-reduc-sad.c: Adjust to provide non-trivial
initial value.

From-SVN: r266771

gcc/ChangeLog
gcc/testsuite/ChangeLog
gcc/testsuite/gcc.dg/vect/slp-reduc-sad.c
gcc/tree-vect-loop.c

index 19d130b..335caf6 100644 (file)
@@ -1,3 +1,9 @@
+2018-12-04  Richard Biener  <rguenther@suse.de>
+
+       PR tree-optimization/88315
+       * tree-vect-loop.c (get_initial_defs_for_reduction): Simplify
+       and fix initialization vector for SAD and DOT_PROD SLP reductions.
+
 2018-12-03  Sandra Loosemore  <sandra@codesourcery.com>
 
        PR c/59039
index f6bd456..edd24f2 100644 (file)
@@ -1,3 +1,9 @@
+2018-12-04  Richard Biener  <rguenther@suse.de>
+
+       PR tree-optimization/88315
+       * gcc.dg/vect/slp-reduc-sad.c: Adjust to provide non-trivial
+       initial value.
+
 2018-12-03  Jakub Jelinek  <jakub@redhat.com>
 
        PR middle-end/64242
index d921c7c..5f7a3e0 100644 (file)
@@ -12,7 +12,7 @@ extern void abort (void);
 int __attribute__((noinline,noclone))
 foo (uint8_t *pix1, uint8_t *pix2, int i_stride_pix2)
 {
-  int i_sum = 0;
+  int i_sum = 5;
   for( int y = 0; y < 16; y++ )
     {
       i_sum += abs ( pix1[0] - pix2[0] );
@@ -52,7 +52,7 @@ main ()
       __asm__ volatile ("");
     }
 
-  if (foo (X, Y, 16) != 32512)
+  if (foo (X, Y, 16) != 32512 + 5)
     abort ();
 
   return 0;
index 633c315..fa926f4 100644 (file)
@@ -4100,12 +4100,8 @@ get_initial_defs_for_reduction (slp_tree slp_node,
   unsigned HOST_WIDE_INT nunits;
   unsigned j, number_of_places_left_in_vector;
   tree vector_type;
-  tree vop;
-  int group_size = stmts.length ();
-  unsigned int vec_num, i;
-  unsigned number_of_copies = 1;
-  vec<tree> voprnds;
-  voprnds.create (number_of_vectors);
+  unsigned int group_size = stmts.length ();
+  unsigned int i;
   struct loop *loop;
   auto_vec<tree, 16> permute_results;
 
@@ -4138,115 +4134,78 @@ get_initial_defs_for_reduction (slp_tree slp_node,
   if (!TYPE_VECTOR_SUBPARTS (vector_type).is_constant (&nunits))
     nunits = group_size;
 
-  number_of_copies = nunits * number_of_vectors / group_size;
-
   number_of_places_left_in_vector = nunits;
   bool constant_p = true;
   tree_vector_builder elts (vector_type, nunits, 1);
   elts.quick_grow (nunits);
-  for (j = 0; j < number_of_copies; j++)
+  for (j = 0; j < nunits * number_of_vectors; ++j)
     {
-      for (i = group_size - 1; stmts.iterate (i, &stmt_vinfo); i--)
-        {
-         tree op;
-         /* Get the def before the loop.  In reduction chain we have only
-            one initial value.  */
-         if ((j != (number_of_copies - 1)
-              || (reduc_chain && i != 0))
-             && neutral_op)
-           op = neutral_op;
-         else
-           op = PHI_ARG_DEF_FROM_EDGE (stmt_vinfo->stmt, pe);
+      tree op;
+      i = j % group_size;
+      stmt_vinfo = stmts[i];
 
-          /* Create 'vect_ = {op0,op1,...,opn}'.  */
-          number_of_places_left_in_vector--;
-         elts[number_of_places_left_in_vector] = op;
-         if (!CONSTANT_CLASS_P (op))
-           constant_p = false;
+      /* Get the def before the loop.  In reduction chain we have only
+        one initial value.  Else we have as many as PHIs in the group.  */
+      if (reduc_chain)
+       op = j != 0 ? neutral_op : PHI_ARG_DEF_FROM_EDGE (stmt_vinfo->stmt, pe);
+      else if (((vec_oprnds->length () + 1) * nunits
+               - number_of_places_left_in_vector >= group_size)
+              && neutral_op)
+       op = neutral_op;
+      else
+       op = PHI_ARG_DEF_FROM_EDGE (stmt_vinfo->stmt, pe);
 
-          if (number_of_places_left_in_vector == 0)
-            {
-             gimple_seq ctor_seq = NULL;
-             tree init;
-             if (constant_p && !neutral_op
-                 ? multiple_p (TYPE_VECTOR_SUBPARTS (vector_type), nunits)
-                 : known_eq (TYPE_VECTOR_SUBPARTS (vector_type), nunits))
-               /* Build the vector directly from ELTS.  */
-               init = gimple_build_vector (&ctor_seq, &elts);
-             else if (neutral_op)
-               {
-                 /* Build a vector of the neutral value and shift the
-                    other elements into place.  */
-                 init = gimple_build_vector_from_val (&ctor_seq, vector_type,
-                                                      neutral_op);
-                 int k = nunits;
-                 while (k > 0 && elts[k - 1] == neutral_op)
-                   k -= 1;
-                 while (k > 0)
-                   {
-                     k -= 1;
-                     init = gimple_build (&ctor_seq, CFN_VEC_SHL_INSERT,
-                                          vector_type, init, elts[k]);
-                   }
-               }
-             else
+      /* Create 'vect_ = {op0,op1,...,opn}'.  */
+      number_of_places_left_in_vector--;
+      elts[nunits - number_of_places_left_in_vector - 1] = op;
+      if (!CONSTANT_CLASS_P (op))
+       constant_p = false;
+
+      if (number_of_places_left_in_vector == 0)
+       {
+         gimple_seq ctor_seq = NULL;
+         tree init;
+         if (constant_p && !neutral_op
+             ? multiple_p (TYPE_VECTOR_SUBPARTS (vector_type), nunits)
+             : known_eq (TYPE_VECTOR_SUBPARTS (vector_type), nunits))
+           /* Build the vector directly from ELTS.  */
+           init = gimple_build_vector (&ctor_seq, &elts);
+         else if (neutral_op)
+           {
+             /* Build a vector of the neutral value and shift the
+                other elements into place.  */
+             init = gimple_build_vector_from_val (&ctor_seq, vector_type,
+                                                  neutral_op);
+             int k = nunits;
+             while (k > 0 && elts[k - 1] == neutral_op)
+               k -= 1;
+             while (k > 0)
                {
-                 /* First time round, duplicate ELTS to fill the
-                    required number of vectors, then cherry pick the
-                    appropriate result for each iteration.  */
-                 if (vec_oprnds->is_empty ())
-                   duplicate_and_interleave (&ctor_seq, vector_type, elts,
-                                             number_of_vectors,
-                                             permute_results);
-                 init = permute_results[number_of_vectors - j - 1];
+                 k -= 1;
+                 init = gimple_build (&ctor_seq, CFN_VEC_SHL_INSERT,
+                                      vector_type, init, elts[k]);
                }
-             if (ctor_seq != NULL)
-               gsi_insert_seq_on_edge_immediate (pe, ctor_seq);
-             voprnds.quick_push (init);
-
-              number_of_places_left_in_vector = nunits;
-             elts.new_vector (vector_type, nunits, 1);
-             elts.quick_grow (nunits);
-             constant_p = true;
-            }
-        }
-    }
-
-  /* Since the vectors are created in the reverse order, we should invert
-     them.  */
-  vec_num = voprnds.length ();
-  for (j = vec_num; j != 0; j--)
-    {
-      vop = voprnds[j - 1];
-      vec_oprnds->quick_push (vop);
-    }
-
-  voprnds.release ();
-
-  /* In case that VF is greater than the unrolling factor needed for the SLP
-     group of stmts, NUMBER_OF_VECTORS to be created is greater than
-     NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
-     to replicate the vectors.  */
-  tree neutral_vec = NULL;
-  while (number_of_vectors > vec_oprnds->length ())
-    {
-      if (neutral_op)
-        {
-          if (!neutral_vec)
+           }
+         else
            {
-             gimple_seq ctor_seq = NULL;
-             neutral_vec = gimple_build_vector_from_val
-               (&ctor_seq, vector_type, neutral_op);
-             if (ctor_seq != NULL)
-               gsi_insert_seq_on_edge_immediate (pe, ctor_seq);
+             /* First time round, duplicate ELTS to fill the
+                required number of vectors, then cherry pick the
+                appropriate result for each iteration.  */
+             if (vec_oprnds->is_empty ())
+               duplicate_and_interleave (&ctor_seq, vector_type, elts,
+                                         number_of_vectors,
+                                         permute_results);
+             init = permute_results[number_of_vectors - j - 1];
            }
-          vec_oprnds->quick_push (neutral_vec);
-        }
-      else
-        {
-          for (i = 0; vec_oprnds->iterate (i, &vop) && i < vec_num; i++)
-            vec_oprnds->quick_push (vop);
-        }
+         if (ctor_seq != NULL)
+           gsi_insert_seq_on_edge_immediate (pe, ctor_seq);
+         vec_oprnds->quick_push (init);
+
+         number_of_places_left_in_vector = nunits;
+         elts.new_vector (vector_type, nunits, 1);
+         elts.quick_grow (nunits);
+         constant_p = true;
+       }
     }
 }