2015-05-28 Richard Biener <rguenther@suse.de>
authorrguenth <rguenth@138bc75d-0d04-0410-961f-82ee72b054a4>
Thu, 28 May 2015 13:29:41 +0000 (13:29 +0000)
committerrguenth <rguenth@138bc75d-0d04-0410-961f-82ee72b054a4>
Thu, 28 May 2015 13:29:41 +0000 (13:29 +0000)
* tree-vect-loop.c (vect_fixup_reduc_chain): New function.
(vect_fixup_scalar_cycles_with_patterns): Likewise.
(vect_analyze_loop_2): Call vect_fixup_scalar_cycles_with_patterns
after pattern recog.
(vect_create_epilog_for_reduction): Properly handle reductions
with patterns.
(vectorizable_reduction): Likewise.
* tree-vect-slp.c (vect_analyze_slp_instance): Properly mark
reduction chains.
(vect_get_constant_vectors): Create the correct number of
initial values for reductions.
(vect_schedule_slp_instance): Handle reduction chains that are
type changing properly.
* tree-vect-stmts.c (vect_analyze_stmt): Adjust.

* gcc.dg/vect/slp-reduc-sad.c: New testcase.

git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@223818 138bc75d-0d04-0410-961f-82ee72b054a4

gcc/ChangeLog
gcc/testsuite/ChangeLog
gcc/testsuite/gcc.dg/vect/slp-reduc-sad.c [new file with mode: 0644]
gcc/tree-vect-loop.c
gcc/tree-vect-slp.c
gcc/tree-vect-stmts.c

index 9e81104..2a2829d 100644 (file)
@@ -1,5 +1,22 @@
 2015-05-28  Richard Biener  <rguenther@suse.de>
 
+       * tree-vect-loop.c (vect_fixup_reduc_chain): New function.
+       (vect_fixup_scalar_cycles_with_patterns): Likewise.
+       (vect_analyze_loop_2): Call vect_fixup_scalar_cycles_with_patterns
+       after pattern recog.
+       (vect_create_epilog_for_reduction): Properly handle reductions
+       with patterns.
+       (vectorizable_reduction): Likewise.
+       * tree-vect-slp.c (vect_analyze_slp_instance): Properly mark
+       reduction chains.
+       (vect_get_constant_vectors): Create the correct number of
+       initial values for reductions.
+       (vect_schedule_slp_instance): Handle reduction chains that are
+       type changing properly.
+       * tree-vect-stmts.c (vect_analyze_stmt): Adjust.
+
+2015-05-28  Richard Biener  <rguenther@suse.de>
+
        PR tree-optimization/66142
        * tree-ssa-sccvn.c (vn_reference_lookup_3): Handle non-GIMPLE
        values better in memcpy destination handling.  Handle non-aliasing
index ddc74fc..1a2b185 100644 (file)
@@ -1,5 +1,9 @@
 2015-05-28  Richard Biener  <rguenther@suse.de>
 
+       * gcc.dg/vect/slp-reduc-sad.c: New testcase.
+
+2015-05-28  Richard Biener  <rguenther@suse.de>
+
        PR tree-optimization/66142
        * gcc.dg/tree-ssa/ssa-fre-44.c: Fixup.
 
diff --git a/gcc/testsuite/gcc.dg/vect/slp-reduc-sad.c b/gcc/testsuite/gcc.dg/vect/slp-reduc-sad.c
new file mode 100644 (file)
index 0000000..472544f
--- /dev/null
@@ -0,0 +1,64 @@
+/* { dg-require-effective-target vect_usad_char } */
+
+#include "tree-vect.h"
+
+typedef unsigned int uint32_t;
+typedef unsigned short uint16_t;
+typedef unsigned char uint8_t;
+
+extern int abs (int);
+extern void abort (void);
+
+int __attribute__((noinline,noclone))
+foo (uint8_t *pix1, uint8_t *pix2, int i_stride_pix2)
+{
+  int i_sum = 0;
+  for( int y = 0; y < 16; y++ )
+    {
+      i_sum += abs ( pix1[0] - pix2[0] );
+      i_sum += abs ( pix1[1] - pix2[1] );
+      i_sum += abs ( pix1[2] - pix2[2] );
+      i_sum += abs ( pix1[3] - pix2[3] );
+      i_sum += abs ( pix1[4] - pix2[4] );
+      i_sum += abs ( pix1[5] - pix2[5] );
+      i_sum += abs ( pix1[6] - pix2[6] );
+      i_sum += abs ( pix1[7] - pix2[7] );
+      i_sum += abs ( pix1[8] - pix2[8] );
+      i_sum += abs ( pix1[9] - pix2[9] );
+      i_sum += abs ( pix1[10] - pix2[10] );
+      i_sum += abs ( pix1[11] - pix2[11] );
+      i_sum += abs ( pix1[12] - pix2[12] );
+      i_sum += abs ( pix1[13] - pix2[13] );
+      i_sum += abs ( pix1[14] - pix2[14] );
+      i_sum += abs ( pix1[15] - pix2[15] );
+      pix1 += 16;
+      pix2 += i_stride_pix2;
+    }
+  return i_sum; 
+}
+
+int
+main ()
+{
+  check_vect ();
+
+  uint8_t X[16*16];
+  uint8_t Y[16*16];
+
+  for (int i = 0; i < 16*16; ++i)
+    {
+      X[i] = i;
+      Y[i] = 16*16 - i;
+      __asm__ volatile ("");
+    }
+
+  if (foo (X, Y, 16) != 32512)
+    abort ();
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump "vect_recog_sad_pattern: detected" "vect" } } */
+/* { dg-final { scan-tree-dump "vectorizing stmts using SLP" "vect" } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
index 8fe4dc6..71df11c 100644 (file)
@@ -828,6 +828,45 @@ vect_analyze_scalar_cycles (loop_vec_info loop_vinfo)
     vect_analyze_scalar_cycles_1 (loop_vinfo, loop->inner);
 }
 
+/* Transfer group and reduction information from STMT to its pattern stmt.  */
+
+static void
+vect_fixup_reduc_chain (gimple stmt)
+{
+  gimple firstp = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt));
+  gimple stmtp;
+  gcc_assert (!GROUP_FIRST_ELEMENT (vinfo_for_stmt (firstp))
+             && GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)));
+  GROUP_SIZE (vinfo_for_stmt (firstp)) = GROUP_SIZE (vinfo_for_stmt (stmt));
+  do
+    {
+      stmtp = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt));
+      GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmtp)) = firstp;
+      stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt));
+      if (stmt)
+       GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmtp))
+         = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt));
+    }
+  while (stmt);
+  STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmtp)) = vect_reduction_def;
+}
+
+/* Fixup scalar cycles that now have their stmts detected as patterns.  */
+
+static void
+vect_fixup_scalar_cycles_with_patterns (loop_vec_info loop_vinfo)
+{
+  gimple first;
+  unsigned i;
+
+  FOR_EACH_VEC_ELT (LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo), i, first)
+    if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (first)))
+      {
+       vect_fixup_reduc_chain (first);
+       LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo)[i]
+         = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (first));
+      }
+}
 
 /* Function vect_get_loop_niters.
 
@@ -1708,6 +1747,8 @@ vect_analyze_loop_2 (loop_vec_info loop_vinfo)
 
   vect_pattern_recog (loop_vinfo, NULL);
 
+  vect_fixup_scalar_cycles_with_patterns (loop_vinfo);
+
   /* Analyze the access patterns of the data-refs in the loop (consecutive,
      complex, etc.). FORNOW: Only handle consecutive access pattern.  */
 
@@ -4573,8 +4614,12 @@ vect_finalize_reduction:
      exit phi node.  */
   if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
     {
-      scalar_dest = gimple_assign_lhs (
-                       SLP_TREE_SCALAR_STMTS (slp_node)[group_size - 1]);
+      gimple dest_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[group_size - 1];
+      /* Handle reduction patterns.  */
+      if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (dest_stmt)))
+       dest_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (dest_stmt));
+
+      scalar_dest = gimple_assign_lhs (dest_stmt);
       group_size = 1;
     }
 
@@ -4875,12 +4920,17 @@ vectorizable_reduction (gimple stmt, gimple_stmt_iterator *gsi,
   auto_vec<gimple> phis;
   int vec_num;
   tree def0, def1, tem, op0, op1 = NULL_TREE;
+  bool first_p = true;
 
   /* In case of reduction chain we switch to the first stmt in the chain, but
      we don't update STMT_INFO, since only the last stmt is marked as reduction
      and has reduction properties.  */
-  if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
-    stmt = GROUP_FIRST_ELEMENT (stmt_info);
+  if (GROUP_FIRST_ELEMENT (stmt_info)
+      && GROUP_FIRST_ELEMENT (stmt_info) != stmt)
+    {
+      stmt = GROUP_FIRST_ELEMENT (stmt_info);
+      first_p = false;
+    }
 
   if (nested_in_vect_loop_p (loop, stmt))
     {
@@ -4903,8 +4953,8 @@ vectorizable_reduction (gimple stmt, gimple_stmt_iterator *gsi,
     return false;
 
   /* Make sure it was already recognized as a reduction computation.  */
-  if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_reduction_def
-      && STMT_VINFO_DEF_TYPE (stmt_info) != vect_nested_cycle)
+  if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) != vect_reduction_def
+      && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) != vect_nested_cycle)
     return false;
 
   /* 2. Has this been recognized as a reduction pattern?
@@ -4914,7 +4964,7 @@ vectorizable_reduction (gimple stmt, gimple_stmt_iterator *gsi,
      the STMT_VINFO_RELATED_STMT field records the last stmt in
      the original sequence that constitutes the pattern.  */
 
-  orig_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
+  orig_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt));
   if (orig_stmt)
     {
       orig_stmt_info = vinfo_for_stmt (orig_stmt);
@@ -5040,20 +5090,16 @@ vectorizable_reduction (gimple stmt, gimple_stmt_iterator *gsi,
       return false;
     }
 
+  gimple tmp = vect_is_simple_reduction (loop_vinfo, reduc_def_stmt,
+                                        !nested_cycle, &dummy);
   if (orig_stmt)
-    gcc_assert (orig_stmt == vect_is_simple_reduction (loop_vinfo,
-                                                       reduc_def_stmt,
-                                                       !nested_cycle,
-                                                       &dummy));
+    gcc_assert (tmp == orig_stmt
+               || GROUP_FIRST_ELEMENT (vinfo_for_stmt (tmp)) == orig_stmt);
   else
-    {
-      gimple tmp = vect_is_simple_reduction (loop_vinfo, reduc_def_stmt,
-                                             !nested_cycle, &dummy);
-      /* We changed STMT to be the first stmt in reduction chain, hence we
-         check that in this case the first element in the chain is STMT.  */
-      gcc_assert (stmt == tmp
-                  || GROUP_FIRST_ELEMENT (vinfo_for_stmt (tmp)) == stmt);
-    }
+    /* We changed STMT to be the first stmt in reduction chain, hence we
+       check that in this case the first element in the chain is STMT.  */
+    gcc_assert (stmt == tmp
+               || GROUP_FIRST_ELEMENT (vinfo_for_stmt (tmp)) == stmt);
 
   if (STMT_VINFO_LIVE_P (vinfo_for_stmt (reduc_def_stmt)))
     return false;
@@ -5267,8 +5313,9 @@ vectorizable_reduction (gimple stmt, gimple_stmt_iterator *gsi,
 
   if (!vec_stmt) /* transformation not required.  */
     {
-      if (!vect_model_reduction_cost (stmt_info, epilog_reduc_code, ncopies,
-                                     reduc_index))
+      if (first_p
+         && !vect_model_reduction_cost (stmt_info, epilog_reduc_code, ncopies,
+                                        reduc_index))
         return false;
       STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
       return true;
@@ -5324,11 +5371,7 @@ vectorizable_reduction (gimple stmt, gimple_stmt_iterator *gsi,
   prev_stmt_info = NULL;
   prev_phi_info = NULL;
   if (slp_node)
-    {
-      vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
-      gcc_assert (TYPE_VECTOR_SUBPARTS (vectype_out) 
-                  == TYPE_VECTOR_SUBPARTS (vectype_in));
-    }
+    vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
   else
     {
       vec_num = 1;
index 9137144..f38191d 100644 (file)
@@ -1793,6 +1793,11 @@ vect_analyze_slp_instance (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
             scalar_stmts.safe_push (next);
           next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next));
         }
+      /* Mark the first element of the reduction chain as reduction to properly
+        transform the node.  In the reduction analysis phase only the last
+        element of the chain is marked as reduction.  */
+      if (!STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt)))
+       STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = vect_reduction_def;
     }
   else
     {
@@ -2738,7 +2743,7 @@ vect_get_constant_vectors (tree op, slp_tree slp_node,
      (s1, s2, ..., s8).  We will create two vectors {s1, s2, s3, s4} and
      {s5, s6, s7, s8}.  */
 
-  number_of_copies = least_common_multiple (nunits, group_size) / group_size;
+  number_of_copies = nunits * number_of_vectors / group_size;
 
   number_of_places_left_in_vector = nunits;
   elts = XALLOCAVEC (tree, nunits);
@@ -3383,8 +3388,14 @@ vect_schedule_slp_instance (slp_tree node, slp_instance instance,
      for the scalar stmts in each node of the SLP tree.  Number of vector
      elements in one vector iteration is the number of scalar elements in
      one scalar iteration (GROUP_SIZE) multiplied by VF divided by vector
-     size.  */
-  vec_stmts_size = (vectorization_factor * group_size) / nunits;
+     size.
+     Unless this is a SLP reduction in which case the number of vector
+     stmts is equal to the number of vector stmts of the children.  */
+  if (GROUP_FIRST_ELEMENT (stmt_info)
+      && !STMT_VINFO_GROUPED_ACCESS (stmt_info))
+    vec_stmts_size = SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_CHILDREN (node)[0]);
+  else
+    vec_stmts_size = (vectorization_factor * group_size) / nunits;
 
   if (!SLP_TREE_VEC_STMTS (node).exists ())
     {
index 06a59cb..6b018e5 100644 (file)
@@ -7310,9 +7310,11 @@ vect_analyze_stmt (gimple stmt, bool *need_to_vectorize, slp_tree node)
 
       case vect_reduction_def:
       case vect_nested_cycle:
-         gcc_assert (!bb_vinfo && (relevance == vect_used_in_outer
-                     || relevance == vect_used_in_outer_by_reduction
-                     || relevance == vect_unused_in_scope));
+         gcc_assert (!bb_vinfo
+                    && (relevance == vect_used_in_outer
+                        || relevance == vect_used_in_outer_by_reduction
+                        || relevance == vect_used_by_reduction
+                        || relevance == vect_unused_in_scope));
          break;
 
       case vect_induction_def: