Check TYPE_OVERFLOW_WRAPS for parloops reductions
authorvries <vries@138bc75d-0d04-0410-961f-82ee72b054a4>
Thu, 23 Jul 2015 12:17:52 +0000 (12:17 +0000)
committervries <vries@138bc75d-0d04-0410-961f-82ee72b054a4>
Thu, 23 Jul 2015 12:17:52 +0000 (12:17 +0000)
2015-07-23  Tom de Vries  <tom@codesourcery.com>

* tree-parloops.c (gather_scalar_reductions): Add arg to call to
vect_force_simple_reduction.
* tree-vect-loop.c (vect_analyze_scalar_cycles_1): Same.
(vect_is_simple_reduction_1): Add and handle
need_wrapping_integral_overflow parameter.
(vect_is_simple_reduction, vect_force_simple_reduction): Add and pass
need_wrapping_integral_overflow parameter.
(vectorizable_reduction): Add arg to call to vect_is_simple_reduction.
* tree-vectorizer.h (vect_force_simple_reduction): Add parameter to decl.

* gcc.dg/autopar/outer-4.c: Add xfail.
* gcc.dg/autopar/outer-5.c: Same.
* gcc.dg/autopar/outer-6.c: Same.
* gcc.dg/autopar/reduc-2.c: Same.
* gcc.dg/autopar/reduc-2char.c: Same.
* gcc.dg/autopar/reduc-2short.c: Same.
* gcc.dg/autopar/reduc-8.c: Same.
* gcc.dg/autopar/uns-outer-4.c: New test.
* gcc.dg/autopar/uns-outer-5.c: New test.
* gcc.dg/autopar/uns-outer-6.c: New test.

git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@226107 138bc75d-0d04-0410-961f-82ee72b054a4

15 files changed:
gcc/ChangeLog
gcc/testsuite/ChangeLog
gcc/testsuite/gcc.dg/autopar/outer-4.c
gcc/testsuite/gcc.dg/autopar/outer-5.c
gcc/testsuite/gcc.dg/autopar/outer-6.c
gcc/testsuite/gcc.dg/autopar/reduc-2.c
gcc/testsuite/gcc.dg/autopar/reduc-2char.c
gcc/testsuite/gcc.dg/autopar/reduc-2short.c
gcc/testsuite/gcc.dg/autopar/reduc-8.c
gcc/testsuite/gcc.dg/autopar/uns-outer-4.c [new file with mode: 0644]
gcc/testsuite/gcc.dg/autopar/uns-outer-5.c [new file with mode: 0644]
gcc/testsuite/gcc.dg/autopar/uns-outer-6.c [new file with mode: 0644]
gcc/tree-parloops.c
gcc/tree-vect-loop.c
gcc/tree-vectorizer.h

index 6e1b53e..81c60be 100644 (file)
@@ -1,3 +1,15 @@
+2015-07-23  Tom de Vries  <tom@codesourcery.com>
+
+       * tree-parloops.c (gather_scalar_reductions): Add arg to call to
+       vect_force_simple_reduction.
+       * tree-vect-loop.c (vect_analyze_scalar_cycles_1): Same.
+       (vect_is_simple_reduction_1): Add and handle
+       need_wrapping_integral_overflow parameter.
+       (vect_is_simple_reduction, vect_force_simple_reduction): Add and pass
+       need_wrapping_integral_overflow parameter.
+       (vectorizable_reduction): Add arg to call to vect_is_simple_reduction.
+       * tree-vectorizer.h (vect_force_simple_reduction): Add parameter to decl.
+
 2015-07-23  Yuri Rumyantsev  <ysrumyan@gmail.com>
 
        PR tree-optimization/66926,66951
index 672be70..3a6374c 100644 (file)
@@ -1,3 +1,16 @@
+2015-07-23  Tom de Vries  <tom@codesourcery.com>
+
+       * gcc.dg/autopar/outer-4.c: Add xfail.
+       * gcc.dg/autopar/outer-5.c: Same.
+       * gcc.dg/autopar/outer-6.c: Same.
+       * gcc.dg/autopar/reduc-2.c: Same.
+       * gcc.dg/autopar/reduc-2char.c: Same.
+       * gcc.dg/autopar/reduc-2short.c: Same.
+       * gcc.dg/autopar/reduc-8.c: Same.
+       * gcc.dg/autopar/uns-outer-4.c: New test.
+       * gcc.dg/autopar/uns-outer-5.c: New test.
+       * gcc.dg/autopar/uns-outer-6.c: New test.
+
 2015-07-23  Richard Biener  <rguenther@suse.de>
 
        PR tree-optimization/66952
index 6fd37c5..2027499 100644 (file)
@@ -32,4 +32,4 @@ int main(void)
 
 
 /* { dg-final { scan-tree-dump-times "parallelizing outer loop" 1 "parloops" { xfail *-*-* } } } */
-/* { dg-final { scan-tree-dump-times "loopfn" 4 "optimized" } } */
+/* { dg-final { scan-tree-dump-times "loopfn" 4 "optimized" { xfail *-*-* } } } */
index 6a0ae91..d6e0dd3 100644 (file)
@@ -45,4 +45,4 @@ int main(void)
 }
 
 /* { dg-final { scan-tree-dump-times "parallelizing outer loop" 1 "parloops" { xfail *-*-* } } } */
-/* { dg-final { scan-tree-dump-times "loopfn" 4 "optimized" } } */
+/* { dg-final { scan-tree-dump-times "loopfn" 4 "optimized" { xfail *-*-* } } } */
index 6bef7cc..726794c 100644 (file)
@@ -44,6 +44,6 @@ int main(void)
 
 
 /* Check that outer loop is parallelized.  */
-/* { dg-final { scan-tree-dump-times "parallelizing outer loop" 1 "parloops" } } */
+/* { dg-final { scan-tree-dump-times "parallelizing outer loop" 1 "parloops" { xfail *-*-* } } } */
 /* { dg-final { scan-tree-dump-times "parallelizing inner loop" 0 "parloops" } } */
-/* { dg-final { scan-tree-dump-times "loopfn" 4 "optimized" } } */
+/* { dg-final { scan-tree-dump-times "loopfn" 4 "optimized" { xfail *-*-* } } } */
index 3ad16e4..2f4883d 100644 (file)
@@ -63,6 +63,6 @@ int main (void)
   return 0;
 }
 
-/* { dg-final { scan-tree-dump-times "Detected reduction" 3 "parloops" } } */
-/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 4 "parloops" } } */
+/* { dg-final { scan-tree-dump-times "Detected reduction" 3 "parloops" { xfail *-*-* } } } */
+/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 4 "parloops" { xfail *-*-* } } } */
 
index 072489f..14867f3 100644 (file)
@@ -60,7 +60,7 @@ int main (void)
 }
 
 
-/* { dg-final { scan-tree-dump-times "Detected reduction" 2 "parloops" } } */
-/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 3 "parloops" } } */
+/* { dg-final { scan-tree-dump-times "Detected reduction" 2 "parloops" { xfail *-*-* } } } */
+/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 3 "parloops" { xfail *-*-* } } } */
 
 
index 4dbbc8a..7c19cc5 100644 (file)
@@ -59,6 +59,6 @@ int main (void)
 }
 
 
-/* { dg-final { scan-tree-dump-times "Detected reduction" 2 "parloops" } } */
-/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 3 "parloops" } } */
+/* { dg-final { scan-tree-dump-times "Detected reduction" 2 "parloops" { xfail *-*-* } } } */
+/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 3 "parloops" { xfail *-*-* } } } */
 
index 16fb954..1d05c48 100644 (file)
@@ -84,5 +84,5 @@ int main (void)
 }
 
 
-/* { dg-final { scan-tree-dump-times "Detected reduction" 2 "parloops" } } */
-/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 3 "parloops" } } */
+/* { dg-final { scan-tree-dump-times "Detected reduction" 2 "parloops" { xfail *-*-* } } } */
+/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 3 "parloops" { xfail *-*-* } } } */
diff --git a/gcc/testsuite/gcc.dg/autopar/uns-outer-4.c b/gcc/testsuite/gcc.dg/autopar/uns-outer-4.c
new file mode 100644 (file)
index 0000000..ef9fc2a
--- /dev/null
@@ -0,0 +1,36 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-parallelize-loops=4 -fdump-tree-parloops-details -fdump-tree-optimized" } */
+
+void abort (void);
+
+unsigned int g_sum=0;
+unsigned int x[500][500];
+
+void __attribute__((noinline))
+parloop (int N)
+{
+  int i, j;
+  unsigned int sum;
+
+  /* Double reduction is currently not supported, outer loop is not
+     parallelized.  Inner reduction is detected, inner loop is
+     parallelized.  */
+  sum = 0;
+  for (i = 0; i < N; i++)
+    for (j = 0; j < N; j++)
+      sum += x[i][j];
+
+  g_sum = sum;
+}
+
+int
+main (void)
+{
+  parloop (500);
+
+  return 0;
+}
+
+
+/* { dg-final { scan-tree-dump-times "parallelizing outer loop" 1 "parloops" { xfail *-*-* } } } */
+/* { dg-final { scan-tree-dump-times "loopfn" 4 "optimized" { xfail *-*-* } } } */
diff --git a/gcc/testsuite/gcc.dg/autopar/uns-outer-5.c b/gcc/testsuite/gcc.dg/autopar/uns-outer-5.c
new file mode 100644 (file)
index 0000000..a929e5d
--- /dev/null
@@ -0,0 +1,49 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-parallelize-loops=4 -fdump-tree-parloops-details -fdump-tree-optimized" } */
+
+void abort (void);
+
+unsigned int x[500][500];
+unsigned int y[500];
+unsigned int g_sum=0;
+
+void __attribute__((noinline))
+init (int i, int j)
+{
+  x[i][j]=1;
+}
+
+void __attribute__((noinline))
+parloop (int N)
+{
+  int i, j;
+  unsigned int sum;
+
+  /* Inner cycle is currently not supported, outer loop is not
+     parallelized.  Inner reduction is detected, inner loop is
+     parallelized.  */
+  for (i = 0; i < N; i++)
+    {
+      sum = 0;
+      for (j = 0; j < N; j++)
+       sum += x[i][j];
+      y[i]=sum;
+    }
+  g_sum = sum;
+}
+
+int
+main (void)
+{
+  int i, j;
+  for (i = 0; i < 500; i++)
+    for (j = 0; j < 500; j++)
+      init (i, j);
+
+  parloop (500);
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "parallelizing outer loop" 1 "parloops" { xfail *-*-* } } } */
+/* { dg-final { scan-tree-dump-times "loopfn" 4 "optimized" } } */
diff --git a/gcc/testsuite/gcc.dg/autopar/uns-outer-6.c b/gcc/testsuite/gcc.dg/autopar/uns-outer-6.c
new file mode 100644 (file)
index 0000000..5c745f8
--- /dev/null
@@ -0,0 +1,51 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-parallelize-loops=4 -fdump-tree-parloops-details -fdump-tree-optimized" } */
+
+void abort (void);
+
+unsigned int x[500][500];
+unsigned int y[500];
+unsigned int g_sum=0;
+
+
+void __attribute__((noinline))
+init (int i, int j)
+{
+  x[i][j]=1;
+}
+
+void __attribute__((noinline))
+parloop (int N)
+{
+  int i, j;
+  unsigned int sum;
+
+  /* Outer loop reduction, outerloop is parallelized.  */
+  sum=0;
+  for (i = 0; i < N; i++)
+    {
+      for (j = 0; j < N; j++)
+       y[i]=x[i][j];
+      sum += y[i];
+    }
+  g_sum = sum;
+}
+
+int
+main (void)
+{
+  int i, j;
+  for (i = 0; i < 500; i++)
+    for (j = 0; j < 500; j++)
+      init (i, j);
+
+  parloop (500);
+
+  return 0;
+}
+
+
+/* Check that outer loop is parallelized.  */
+/* { dg-final { scan-tree-dump-times "parallelizing outer loop" 1 "parloops" } } */
+/* { dg-final { scan-tree-dump-times "parallelizing inner loop" 0 "parloops" } } */
+/* { dg-final { scan-tree-dump-times "loopfn" 4 "optimized" } } */
index ec41834..88f22e8 100644 (file)
@@ -2376,9 +2376,9 @@ gather_scalar_reductions (loop_p loop, reduction_info_table_type *reduction_list
       if (!simple_iv (loop, loop, res, &iv, true)
        && simple_loop_info)
        {
-           gimple reduc_stmt = vect_force_simple_reduction (simple_loop_info,
-                                                           phi, true,
-                                                           &double_reduc);
+          gimple reduc_stmt
+            = vect_force_simple_reduction (simple_loop_info, phi, true,
+                                           &double_reduc, true);
           if (reduc_stmt && !double_reduc)
               build_new_reduction (reduction_list, reduc_stmt, phi);
         }
index 9145dbf..c31bfbd 100644 (file)
@@ -715,7 +715,7 @@ vect_analyze_scalar_cycles_1 (loop_vec_info loop_vinfo, struct loop *loop)
 
       nested_cycle = (loop != LOOP_VINFO_LOOP (loop_vinfo));
       reduc_stmt = vect_force_simple_reduction (loop_vinfo, phi, !nested_cycle,
-                                               &double_reduc);
+                                               &double_reduc, false);
       if (reduc_stmt)
         {
           if (double_reduc)
@@ -2339,7 +2339,7 @@ vect_is_slp_reduction (loop_vec_info loop_info, gimple phi, gimple first_stmt)
 static gimple
 vect_is_simple_reduction_1 (loop_vec_info loop_info, gimple phi,
                            bool check_reduction, bool *double_reduc,
-                           bool modify)
+                           bool modify, bool need_wrapping_integral_overflow)
 {
   struct loop *loop = (gimple_bb (phi))->loop_father;
   struct loop *vect_loop = LOOP_VINFO_LOOP (loop_info);
@@ -2613,14 +2613,26 @@ vect_is_simple_reduction_1 (loop_vec_info loop_info, gimple phi,
                        "reduction: unsafe fp math optimization: ");
       return NULL;
     }
-  else if (INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_TRAPS (type)
-          && check_reduction)
+  else if (INTEGRAL_TYPE_P (type) && check_reduction)
     {
-      /* Changing the order of operations changes the semantics.  */
-      if (dump_enabled_p ())
-       report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
-                       "reduction: unsafe int math optimization: ");
-      return NULL;
+      if (TYPE_OVERFLOW_TRAPS (type))
+       {
+         /* Changing the order of operations changes the semantics.  */
+         if (dump_enabled_p ())
+           report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
+                           "reduction: unsafe int math optimization"
+                           " (overflow traps): ");
+         return NULL;
+       }
+      if (need_wrapping_integral_overflow && !TYPE_OVERFLOW_WRAPS (type))
+       {
+         /* Changing the order of operations changes the semantics.  */
+         if (dump_enabled_p ())
+           report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
+                           "reduction: unsafe int math optimization"
+                           " (overflow doesn't wrap): ");
+         return NULL;
+       }
     }
   else if (SAT_FIXED_POINT_TYPE_P (type) && check_reduction)
     {
@@ -2749,10 +2761,12 @@ vect_is_simple_reduction_1 (loop_vec_info loop_info, gimple phi,
 
 static gimple
 vect_is_simple_reduction (loop_vec_info loop_info, gimple phi,
-                          bool check_reduction, bool *double_reduc)
+                         bool check_reduction, bool *double_reduc,
+                         bool need_wrapping_integral_overflow)
 {
   return vect_is_simple_reduction_1 (loop_info, phi, check_reduction,
-                                    double_reduc, false);
+                                    double_reduc, false,
+                                    need_wrapping_integral_overflow);
 }
 
 /* Wrapper around vect_is_simple_reduction_1, which will modify code
@@ -2761,10 +2775,12 @@ vect_is_simple_reduction (loop_vec_info loop_info, gimple phi,
 
 gimple
 vect_force_simple_reduction (loop_vec_info loop_info, gimple phi,
-                          bool check_reduction, bool *double_reduc)
+                            bool check_reduction, bool *double_reduc,
+                            bool need_wrapping_integral_overflow)
 {
   return vect_is_simple_reduction_1 (loop_info, phi, check_reduction,
-                                    double_reduc, true);
+                                    double_reduc, true,
+                                    need_wrapping_integral_overflow);
 }
 
 /* Calculate cost of peeling the loop PEEL_ITERS_PROLOGUE times.  */
@@ -5074,7 +5090,7 @@ vectorizable_reduction (gimple stmt, gimple_stmt_iterator *gsi,
     }
 
   gimple tmp = vect_is_simple_reduction (loop_vinfo, reduc_def_stmt,
-                                        !nested_cycle, &dummy);
+                                        !nested_cycle, &dummy, false);
   if (orig_stmt)
     gcc_assert (tmp == orig_stmt
                || GROUP_FIRST_ELEMENT (vinfo_for_stmt (tmp)) == orig_stmt);
index 48c1f8d..dfa8795 100644 (file)
@@ -1090,7 +1090,8 @@ extern tree vect_create_addr_base_for_vector_ref (gimple, gimple_seq *,
 /* In tree-vect-loop.c.  */
 /* FORNOW: Used in tree-parloops.c.  */
 extern void destroy_loop_vec_info (loop_vec_info, bool);
-extern gimple vect_force_simple_reduction (loop_vec_info, gimple, bool, bool *);
+extern gimple vect_force_simple_reduction (loop_vec_info, gimple, bool, bool *,
+                                          bool);
 /* Drive for loop analysis stage.  */
 extern loop_vec_info vect_analyze_loop (struct loop *);
 /* Drive for loop transformation stage.  */