re PR tree-optimization/66036 (strided group loads are not vectorized)
authorRichard Biener <rguenther@suse.de>
Fri, 8 May 2015 15:13:55 +0000 (15:13 +0000)
committerRichard Biener <rguenth@gcc.gnu.org>
Fri, 8 May 2015 15:13:55 +0000 (15:13 +0000)
2015-05-08  Richard Biener  <rguenther@suse.de>

        PR tree-optimization/66036
* tree-vect-data-refs.c (vect_compute_data_ref_alignment):
Handle strided group loads.
(vect_verify_datarefs_alignment): Likewise.
(vect_enhance_data_refs_alignment): Likewise.
(vect_analyze_group_access): Likewise.
(vect_analyze_data_ref_access): Likewise.
(vect_analyze_data_ref_accesses): Likewise.
* tree-vect-stmts.c (vect_model_load_cost): Likewise.
(vectorizable_load): Likewise.

* gcc.dg/vect/slp-41.c: New testcase.

From-SVN: r222914

gcc/ChangeLog
gcc/testsuite/ChangeLog
gcc/testsuite/gcc.dg/vect/slp-41.c [new file with mode: 0644]
gcc/tree-vect-data-refs.c
gcc/tree-vect-stmts.c

index 5f15755..f7f03b3 100644 (file)
@@ -1,3 +1,16 @@
+2015-05-08  Richard Biener  <rguenther@suse.de>
+
+       PR tree-optimization/66036
+       * tree-vect-data-refs.c (vect_compute_data_ref_alignment):
+       Handle strided group loads.
+       (vect_verify_datarefs_alignment): Likewise.
+       (vect_enhance_data_refs_alignment): Likewise.
+       (vect_analyze_group_access): Likewise.
+       (vect_analyze_data_ref_access): Likewise.
+       (vect_analyze_data_ref_accesses): Likewise.
+       * tree-vect-stmts.c (vect_model_load_cost): Likewise.
+       (vectorizable_load): Likewise.
+
 2015-05-08  Segher Boessenkool  <segher@kernel.crashing.org>
 
        * config/rs6000/rs6000.md: Require operand inequality in one
index c904070..2b6f663 100644 (file)
@@ -1,3 +1,8 @@
+2015-05-08  Richard Biener  <rguenther@suse.de>
+
+       PR tree-optimization/66036
+       * gcc.dg/vect/slp-41.c: New testcase.
+
 2015-05-08  Mikael Morin  <mikael@gcc.gnu.org>
 
        * gfortran.dg/elemental_optional_args_7.f90: New.
diff --git a/gcc/testsuite/gcc.dg/vect/slp-41.c b/gcc/testsuite/gcc.dg/vect/slp-41.c
new file mode 100644 (file)
index 0000000..7d487b4
--- /dev/null
@@ -0,0 +1,69 @@
+/* { dg-require-effective-target vect_int } */
+/* { dg-require-effective-target vect_pack_trunc } */
+/* { dg-require-effective-target vect_unpack } */
+/* { dg-require-effective-target vect_hw_misalign } */
+
+#include "tree-vect.h"
+
+void __attribute__((noinline,noclone))
+testi (int *p, short *q, int stride, int n)
+{
+  int i;
+  for (i = 0; i < n; ++i)
+    {
+      q[i*4+0] = p[i*stride+0];
+      q[i*4+1] = p[i*stride+1];
+      q[i*4+2] = p[i*stride+2];
+      q[i*4+3] = p[i*stride+3];
+    }
+}
+
+void __attribute__((noinline,noclone))
+testi2 (int *q, short *p, int stride, int n)
+{
+  int i;
+  for (i = 0; i < n; ++i)
+    {
+      q[i*4+0] = p[i*stride+0];
+      q[i*4+1] = p[i*stride+1];
+      q[i*4+2] = p[i*stride+2];
+      q[i*4+3] = p[i*stride+3];
+    }
+}
+
+int ia[256];
+short sa[256];
+
+extern void abort (void);
+
+int main()
+{
+  int i;
+
+  check_vect ();
+
+  for (i = 0; i < 256; ++i)
+    {
+      ia[i] = sa[i] = i;
+       __asm__ volatile ("");
+    }
+  testi (ia, sa, 8, 32);
+  for (i = 0; i < 128; ++i)
+    if (sa[i] != ia[(i / 4) * 8 + i % 4])
+      abort ();
+
+  for (i = 0; i < 256; ++i)
+    {
+      ia[i] = sa[i] = i;
+       __asm__ volatile ("");
+    }
+  testi2 (ia, sa, 8, 32);
+  for (i = 0; i < 128; ++i)
+    if (ia[i] != sa[(i / 4) * 8 + i % 4])
+      abort ();
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
index 0992d6c..7e93899 100644 (file)
@@ -649,7 +649,7 @@ vect_compute_data_ref_alignment (struct data_reference *dr)
   tree vectype;
   tree base, base_addr;
   bool base_aligned;
-  tree misalign;
+  tree misalign = NULL_TREE;
   tree aligned_to;
   unsigned HOST_WIDE_INT alignment;
 
@@ -665,10 +665,12 @@ vect_compute_data_ref_alignment (struct data_reference *dr)
 
   /* Strided loads perform only component accesses, misalignment information
      is irrelevant for them.  */
-  if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
+  if (STMT_VINFO_STRIDE_LOAD_P (stmt_info)
+      && !STMT_VINFO_GROUPED_ACCESS (stmt_info))
     return true;
 
-  misalign = DR_INIT (dr);
+  if (tree_fits_shwi_p (DR_STEP (dr)))
+    misalign = DR_INIT (dr);
   aligned_to = DR_ALIGNED_TO (dr);
   base_addr = DR_BASE_ADDRESS (dr);
   vectype = STMT_VINFO_VECTYPE (stmt_info);
@@ -682,9 +684,9 @@ vect_compute_data_ref_alignment (struct data_reference *dr)
   if (loop && nested_in_vect_loop_p (loop, stmt))
     {
       tree step = DR_STEP (dr);
-      HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
 
-      if (dr_step % GET_MODE_SIZE (TYPE_MODE (vectype)) == 0)
+      if (tree_fits_shwi_p (step)
+         && tree_to_shwi (step) % GET_MODE_SIZE (TYPE_MODE (vectype)) == 0)
         {
           if (dump_enabled_p ())
             dump_printf_loc (MSG_NOTE, vect_location,
@@ -710,9 +712,9 @@ vect_compute_data_ref_alignment (struct data_reference *dr)
   if (!loop)
     {
       tree step = DR_STEP (dr);
-      HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
 
-      if (dr_step % GET_MODE_SIZE (TYPE_MODE (vectype)) != 0)
+      if (tree_fits_shwi_p (step)
+         && tree_to_shwi (step) % GET_MODE_SIZE (TYPE_MODE (vectype)) != 0)
        {
          if (dump_enabled_p ())
            dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
@@ -942,7 +944,8 @@ vect_verify_datarefs_alignment (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
 
       /* Strided loads perform only component accesses, alignment is
         irrelevant for them.  */
-      if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
+      if (STMT_VINFO_STRIDE_LOAD_P (stmt_info)
+         && !STMT_VINFO_GROUPED_ACCESS (stmt_info))
        continue;
 
       supportable_dr_alignment = vect_supportable_dr_alignment (dr, false);
@@ -1409,7 +1412,8 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
 
       /* Strided loads perform only component accesses, alignment is
         irrelevant for them.  */
-      if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
+      if (STMT_VINFO_STRIDE_LOAD_P (stmt_info)
+         && !STMT_VINFO_GROUPED_ACCESS (stmt_info))
        continue;
 
       supportable_dr_alignment = vect_supportable_dr_alignment (dr, true);
@@ -1701,7 +1705,8 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
 
          /* Strided loads perform only component accesses, alignment is
             irrelevant for them.  */
-         if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
+         if (STMT_VINFO_STRIDE_LOAD_P (stmt_info)
+             && !STMT_VINFO_GROUPED_ACCESS (stmt_info))
            continue;
 
          save_misalignment = DR_MISALIGNMENT (dr);
@@ -1819,10 +1824,15 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
                  && GROUP_FIRST_ELEMENT (stmt_info) != stmt))
            continue;
 
-         /* Strided loads perform only component accesses, alignment is
-            irrelevant for them.  */
          if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
-           continue;
+           {
+             /* Strided loads perform only component accesses, alignment is
+                irrelevant for them.  */
+             if (!STMT_VINFO_GROUPED_ACCESS (stmt_info))
+               continue;
+             do_versioning = false;
+             break;
+           }
 
          supportable_dr_alignment = vect_supportable_dr_alignment (dr, false);
 
@@ -2035,7 +2045,7 @@ vect_analyze_group_access (struct data_reference *dr)
   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
-  HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
+  HOST_WIDE_INT dr_step = -1;
   HOST_WIDE_INT groupsize, last_accessed_element = 1;
   bool slp_impossible = false;
   struct loop *loop = NULL;
@@ -2045,7 +2055,13 @@ vect_analyze_group_access (struct data_reference *dr)
 
   /* For interleaving, GROUPSIZE is STEP counted in elements, i.e., the
      size of the interleaving group (including gaps).  */
-  groupsize = absu_hwi (dr_step) / type_size;
+  if (tree_fits_shwi_p (step))
+    {
+      dr_step = tree_to_shwi (step);
+      groupsize = absu_hwi (dr_step) / type_size;
+    }
+  else
+    groupsize = 0;
 
   /* Not consecutive access is possible only if it is a part of interleaving.  */
   if (!GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
@@ -2120,7 +2136,6 @@ vect_analyze_group_access (struct data_reference *dr)
       tree prev_init = DR_INIT (data_ref);
       gimple prev = stmt;
       HOST_WIDE_INT diff, gaps = 0;
-      unsigned HOST_WIDE_INT count_in_bytes;
 
       while (next)
         {
@@ -2185,30 +2200,12 @@ vect_analyze_group_access (struct data_reference *dr)
           count++;
         }
 
-      /* COUNT is the number of accesses found, we multiply it by the size of
-         the type to get COUNT_IN_BYTES.  */
-      count_in_bytes = type_size * count;
-
-      /* Check that the size of the interleaving (including gaps) is not
-         greater than STEP.  */
-      if (dr_step != 0
-         && absu_hwi (dr_step) < count_in_bytes + gaps * type_size)
-        {
-          if (dump_enabled_p ())
-            {
-              dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
-                               "interleaving size is greater than step for ");
-              dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
-                                 DR_REF (dr));
-              dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
-            }
-          return false;
-        }
+      if (groupsize == 0)
+        groupsize = count + gaps;
 
-      /* Check that the size of the interleaving is equal to STEP for stores,
+      /* Check that the size of the interleaving is equal to count for stores,
          i.e., that there are no gaps.  */
-      if (dr_step != 0
-         && absu_hwi (dr_step) != count_in_bytes)
+      if (groupsize != count)
         {
           if (DR_IS_READ (dr))
             {
@@ -2227,26 +2224,6 @@ vect_analyze_group_access (struct data_reference *dr)
             }
         }
 
-      /* Check that STEP is a multiple of type size.  */
-      if (dr_step != 0
-         && (dr_step % type_size) != 0)
-        {
-          if (dump_enabled_p ())
-            {
-              dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
-                               "step is not a multiple of type size: step ");
-              dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, step);
-              dump_printf (MSG_MISSED_OPTIMIZATION, " size ");
-              dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
-                                 TYPE_SIZE_UNIT (scalar_type));
-              dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
-            }
-          return false;
-        }
-
-      if (groupsize == 0)
-        groupsize = count + gaps;
-
       GROUP_SIZE (vinfo_for_stmt (stmt)) = groupsize;
       if (dump_enabled_p ())
         dump_printf_loc (MSG_NOTE, vect_location,
@@ -2366,9 +2343,12 @@ vect_analyze_data_ref_access (struct data_reference *dr)
       return false;
     }
 
+
   /* Assume this is a DR handled by non-constant strided load case.  */
   if (TREE_CODE (step) != INTEGER_CST)
-    return STMT_VINFO_STRIDE_LOAD_P (stmt_info);
+    return (STMT_VINFO_STRIDE_LOAD_P (stmt_info)
+           && (!STMT_VINFO_GROUPED_ACCESS (stmt_info)
+               || vect_analyze_group_access (dr)));
 
   /* Not consecutive access - check if it's a part of interleaving group.  */
   return vect_analyze_group_access (dr);
@@ -2570,15 +2550,16 @@ vect_analyze_data_ref_accesses (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
              || !gimple_assign_single_p (DR_STMT (drb)))
            break;
 
-         /* Check that the data-refs have the same constant size and step.  */
+         /* Check that the data-refs have the same constant size.  */
          tree sza = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra)));
          tree szb = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb)));
          if (!tree_fits_uhwi_p (sza)
              || !tree_fits_uhwi_p (szb)
-             || !tree_int_cst_equal (sza, szb)
-             || !tree_fits_shwi_p (DR_STEP (dra))
-             || !tree_fits_shwi_p (DR_STEP (drb))
-             || !tree_int_cst_equal (DR_STEP (dra), DR_STEP (drb)))
+             || !tree_int_cst_equal (sza, szb))
+           break;
+
+         /* Check that the data-refs have the same step.  */
+         if (!operand_equal_p (DR_STEP (dra), DR_STEP (drb), 0))
            break;
 
          /* Do not place the same access in the interleaving chain twice.  */
@@ -2611,11 +2592,15 @@ vect_analyze_data_ref_accesses (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
                  != type_size_a))
            break;
 
-         /* The step (if not zero) is greater than the difference between
-            data-refs' inits.  This splits groups into suitable sizes.  */
-         HOST_WIDE_INT step = tree_to_shwi (DR_STEP (dra));
-         if (step != 0 && step <= (init_b - init_a))
-           break;
+         /* If the step (if not zero or non-constant) is greater than the
+            difference between data-refs' inits this splits groups into
+            suitable sizes.  */
+         if (tree_fits_shwi_p (DR_STEP (dra)))
+           {
+             HOST_WIDE_INT step = tree_to_shwi (DR_STEP (dra));
+             if (step != 0 && step <= (init_b - init_a))
+               break;
+           }
 
          if (dump_enabled_p ())
            {
index 31f26e7..f82decb 100644 (file)
@@ -1112,7 +1112,8 @@ vect_model_load_cost (stmt_vec_info stmt_info, int ncopies,
      equivalent to the cost of GROUP_SIZE separate loads.  If a grouped
      access is instead being provided by a load-and-permute operation,
      include the cost of the permutes.  */
-  if (!load_lanes_p && group_size > 1)
+  if (!load_lanes_p && group_size > 1
+      && !STMT_VINFO_STRIDE_LOAD_P (stmt_info))
     {
       /* Uses an even and odd extract operations or shuffle operations
         for each needed permute.  */
@@ -1127,15 +1128,14 @@ vect_model_load_cost (stmt_vec_info stmt_info, int ncopies,
     }
 
   /* The loads themselves.  */
-  if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
+  if (STMT_VINFO_STRIDE_LOAD_P (stmt_info)
+      && !STMT_VINFO_GROUPED_ACCESS (stmt_info))
     {
       /* N scalar loads plus gathering them into a vector.  */
       tree vectype = STMT_VINFO_VECTYPE (stmt_info);
       inside_cost += record_stmt_cost (body_cost_vec,
                                       ncopies * TYPE_VECTOR_SUBPARTS (vectype),
                                       scalar_load, stmt_info, 0, vect_body);
-      inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_construct,
-                                      stmt_info, 0, vect_body);
     }
   else
     vect_get_load_cost (first_dr, ncopies,
@@ -1143,6 +1143,9 @@ vect_model_load_cost (stmt_vec_info stmt_info, int ncopies,
                         || group_size > 1 || slp_node),
                        &inside_cost, &prologue_cost, 
                        prologue_cost_vec, body_cost_vec, true);
+  if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
+      inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_construct,
+                                      stmt_info, 0, vect_body);
 
   if (dump_enabled_p ())
     dump_printf_loc (MSG_NOTE, vect_location,
@@ -5657,7 +5660,7 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
   gimple ptr_incr = NULL;
   int nunits = TYPE_VECTOR_SUBPARTS (vectype);
   int ncopies;
-  int i, j, group_size, group_gap;
+  int i, j, group_size = -1, group_gap;
   tree msq = NULL_TREE, lsq;
   tree offset = NULL_TREE;
   tree byte_offset = NULL_TREE;
@@ -5790,9 +5793,11 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
          return false;
        }
 
-      if (!slp && !PURE_SLP_STMT (stmt_info))
+      group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
+      if (!slp
+         && !PURE_SLP_STMT (stmt_info)
+         && !STMT_VINFO_STRIDE_LOAD_P (stmt_info))
        {
-         group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
          if (vect_load_lanes_supported (vectype, group_size))
            load_lanes_p = true;
          else if (!vect_grouped_load_supported (vectype, group_size))
@@ -5847,7 +5852,22 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
        }
     }
   else if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
-    ;
+    {
+      if ((grouped_load
+          && (slp || PURE_SLP_STMT (stmt_info)))
+         && (group_size > nunits
+             || nunits % group_size != 0
+             /* ???  During analysis phase we are not called with the
+                slp node/instance we are in so whether we'll end up
+                with a permutation we don't know.  Still we don't
+                support load permutations.  */
+             || slp_perm))
+       {
+         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+                          "unhandled strided group load\n");
+         return false;
+       }
+    }
   else
     {
       negative = tree_int_cst_compare (nested_in_vect_loop
@@ -6136,34 +6156,65 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
       prev_stmt_info = NULL;
       running_off = offvar;
       alias_off = build_int_cst (reference_alias_ptr_type (DR_REF (dr)), 0);
+      int nloads = nunits;
+      tree ltype = TREE_TYPE (vectype);
+      if (slp)
+       {
+         nloads = nunits / group_size;
+         if (group_size < nunits)
+           ltype = build_vector_type (TREE_TYPE (vectype), group_size);
+         else
+           ltype = vectype;
+         ltype = build_aligned_type (ltype, TYPE_ALIGN (TREE_TYPE (vectype)));
+         ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
+         gcc_assert (!slp_perm);
+       }
       for (j = 0; j < ncopies; j++)
        {
          tree vec_inv;
 
-         vec_alloc (v, nunits);
-         for (i = 0; i < nunits; i++)
+         if (nloads > 1)
+           {
+             vec_alloc (v, nloads);
+             for (i = 0; i < nloads; i++)
+               {
+                 tree newref, newoff;
+                 gimple incr;
+                 newref = build2 (MEM_REF, ltype, running_off, alias_off);
+
+                 newref = force_gimple_operand_gsi (gsi, newref, true,
+                                                    NULL_TREE, true,
+                                                    GSI_SAME_STMT);
+                 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, newref);
+                 newoff = copy_ssa_name (running_off);
+                 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
+                                             running_off, stride_step);
+                 vect_finish_stmt_generation (stmt, incr, gsi);
+
+                 running_off = newoff;
+               }
+
+             vec_inv = build_constructor (vectype, v);
+             new_temp = vect_init_vector (stmt, vec_inv, vectype, gsi);
+             new_stmt = SSA_NAME_DEF_STMT (new_temp);
+           }
+         else
            {
-             tree newref, newoff;
-             gimple incr;
-             newref = build2 (MEM_REF, TREE_TYPE (vectype),
-                              running_off, alias_off);
-
-             newref = force_gimple_operand_gsi (gsi, newref, true,
-                                                NULL_TREE, true,
-                                                GSI_SAME_STMT);
-             CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, newref);
-             newoff = copy_ssa_name (running_off);
-             incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
+             new_stmt = gimple_build_assign (make_ssa_name (ltype),
+                                             build2 (MEM_REF, ltype,
+                                                     running_off, alias_off));
+             vect_finish_stmt_generation (stmt, new_stmt, gsi);
+
+             tree newoff = copy_ssa_name (running_off);
+             gimple incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
                                          running_off, stride_step);
              vect_finish_stmt_generation (stmt, incr, gsi);
 
              running_off = newoff;
            }
 
-         vec_inv = build_constructor (vectype, v);
-         new_temp = vect_init_vector (stmt, vec_inv, vectype, gsi);
-         new_stmt = SSA_NAME_DEF_STMT (new_temp);
-
+         if (slp)
+           SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
          if (j == 0)
            STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
          else