Add missing target check for fully-masked fold-left reductions
authorRichard Sandiford <richard.sandiford@arm.com>
Fri, 27 Dec 2019 16:54:54 +0000 (16:54 +0000)
committerRichard Sandiford <rsandifo@gcc.gnu.org>
Fri, 27 Dec 2019 16:54:54 +0000 (16:54 +0000)
The fold-left reduction code has a (rarely-used) fallback that handles
cases in which the loop is fully-masked and the target has no native
support for the reduction.  The fallback includea a VEC_COND_EXPR
between the reduction vector and a safe value, so we should check
whether that VEC_COND_EXPR is supported.

2019-12-27  Richard Sandiford  <richard.sandiford@arm.com>

gcc/
* tree-vect-loop.c (vectorizable_reduction): Check whether the
target supports the required VEC_COND_EXPR operation before
allowing the fallback handling of masked fold-left reductions.

gcc/testsuite/
* gcc.target/aarch64/sve/mixed_size_10.c: New test.

From-SVN: r279742

gcc/ChangeLog
gcc/testsuite/ChangeLog
gcc/testsuite/gcc.target/aarch64/sve/mixed_size_10.c [new file with mode: 0644]
gcc/tree-vect-loop.c

index 525a8f0..e30f6bb 100644 (file)
@@ -1,3 +1,9 @@
+2019-12-27  Richard Sandiford  <richard.sandiford@arm.com>
+
+       * tree-vect-loop.c (vectorizable_reduction): Check whether the
+       target supports the required VEC_COND_EXPR operation before
+       allowing the fallback handling of masked fold-left reductions.
+
 2019-12-24  Jiufu Guo  <guojiufu@linux.ibm.com>
 
        * config/rs6000/rs6000.c (rs6000_option_override_internal): Enable
index 0db2e22..d47dd3b 100644 (file)
@@ -1,3 +1,7 @@
+2019-12-27  Richard Sandiford  <richard.sandiford@arm.com>
+
+       * gcc.target/aarch64/sve/mixed_size_10.c: New test.
+
 2019-12-26  Jakub Jelinek  <jakub@redhat.com>
 
        PR c++/92438
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/mixed_size_10.c b/gcc/testsuite/gcc.target/aarch64/sve/mixed_size_10.c
new file mode 100644 (file)
index 0000000..bf22428
--- /dev/null
@@ -0,0 +1,13 @@
+/* { dg-options "-O3 -msve-vector-bits=256 -fno-tree-loop-distribution" } */
+
+float
+f (float *restrict x, double *restrict y)
+{
+  float res = 0.0;
+  for (int i = 0; i < 100; ++i)
+    {
+      res += x[i];
+      y[i] += y[i - 4] * 11;
+    }
+  return res;
+}
index 68699f2..9b0cbcc 100644 (file)
@@ -6718,6 +6718,18 @@ vectorizable_reduction (stmt_vec_info stmt_info, slp_tree slp_node,
                             " conditional operation is available.\n");
          LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
        }
+      else if (reduction_type == FOLD_LEFT_REDUCTION
+              && reduc_fn == IFN_LAST
+              && !expand_vec_cond_expr_p (vectype_in,
+                                          truth_type_for (vectype_in),
+                                          SSA_NAME))
+       {
+         if (dump_enabled_p ())
+           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+                            "can't use a fully-masked loop because no"
+                            " conditional operation is available.\n");
+         LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
+       }
       else
        vect_record_loop_mask (loop_vinfo, masks, ncopies * vec_num,
                               vectype_in, NULL);