re PR middle-end/91272 ([SVE] Use fully-masked loops for CLASTB reductions)
authorPrathamesh Kulkarni <prathamesh.kulkarni@linaro.org>
Mon, 28 Oct 2019 14:50:58 +0000 (14:50 +0000)
committerPrathamesh Kulkarni <prathamesh3492@gcc.gnu.org>
Mon, 28 Oct 2019 14:50:58 +0000 (14:50 +0000)
2019-10-28  Prathamesh Kulkarni  <prathamesh.kulkarni@linaro.org>

PR middle-end/91272
* tree-vect-stmts.c (vectorizable_condition): Support
EXTRACT_LAST_REDUCTION with fully-masked loops.

testsuite/
* gcc.target/aarch64/sve/clastb_1.c: Add dg-scan.
* gcc.target/aarch64/sve/clastb_2.c: Likewise.
* gcc.target/aarch64/sve/clastb_3.c: Likewise.
* gcc.target/aarch64/sve/clastb_4.c: Likewise.
* gcc.target/aarch64/sve/clastb_5.c: Likewise.
* gcc.target/aarch64/sve/clastb_6.c: Likewise.
* gcc.target/aarch64/sve/clastb_7.c: Likewise.
* gcc.target/aarch64/sve/clastb_8.c: Likewise.

From-SVN: r277524

gcc/ChangeLog
gcc/testsuite/ChangeLog
gcc/testsuite/gcc.target/aarch64/sve/clastb_1.c
gcc/testsuite/gcc.target/aarch64/sve/clastb_2.c
gcc/testsuite/gcc.target/aarch64/sve/clastb_3.c
gcc/testsuite/gcc.target/aarch64/sve/clastb_4.c
gcc/testsuite/gcc.target/aarch64/sve/clastb_5.c
gcc/testsuite/gcc.target/aarch64/sve/clastb_6.c
gcc/testsuite/gcc.target/aarch64/sve/clastb_7.c
gcc/testsuite/gcc.target/aarch64/sve/clastb_8.c
gcc/tree-vect-stmts.c

index c9daad9..f3410eb 100644 (file)
@@ -1,3 +1,9 @@
+2019-10-28  Prathamesh Kulkarni  <prathamesh.kulkarni@linaro.org>
+
+       PR middle-end/91272
+       * tree-vect-stmts.c (vectorizable_condition): Support
+       EXTRACT_LAST_REDUCTION with fully-masked loops.
+
 2019-10-28  Richard Biener  <rguenther@suse.de>
 
        PR tree-optimization/92252
index 5d1ddbe..f46cc93 100644 (file)
@@ -1,3 +1,15 @@
+2019-10-28  Prathamesh Kulkarni  <prathamesh.kulkarni@linaro.org>
+
+       PR middle-end/91272
+       * gcc.target/aarch64/sve/clastb_1.c: Add dg-scan.
+       * gcc.target/aarch64/sve/clastb_2.c: Likewise.
+       * gcc.target/aarch64/sve/clastb_3.c: Likewise.
+       * gcc.target/aarch64/sve/clastb_4.c: Likewise.
+       * gcc.target/aarch64/sve/clastb_5.c: Likewise.
+       * gcc.target/aarch64/sve/clastb_6.c: Likewise.
+       * gcc.target/aarch64/sve/clastb_7.c: Likewise.
+       * gcc.target/aarch64/sve/clastb_8.c: Likewise.
+
 2019-10-28  Richard Biener  <rguenther@suse.de>
 
        PR tree-optimization/92252
index d4f9b0b..d3ea52d 100644 (file)
@@ -1,5 +1,5 @@
 /* { dg-do assemble { target aarch64_asm_sve_ok } } */
-/* { dg-options "-O2 -ftree-vectorize --save-temps" } */
+/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-details --save-temps" } */
 
 #define N 32
 
@@ -17,4 +17,5 @@ condition_reduction (int *a, int min_v)
   return last;
 }
 
-/* { dg-final { scan-assembler {\tclastb\tw[0-9]+, p[0-7], w[0-9]+, z[0-9]+\.s} } } */
+/* { dg-final { scan-tree-dump "using a fully-masked loop." "vect" } } */
+/* { dg-final { scan-assembler {\tclastb\ts[0-9]+, p[0-7], s[0-9]+, z[0-9]+\.s} } } */
index 2c49bd3..c222b70 100644 (file)
@@ -1,5 +1,5 @@
 /* { dg-do assemble { target aarch64_asm_sve_ok } } */
-/* { dg-options "-O2 -ftree-vectorize --save-temps" } */
+/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-details --save-temps" } */
 
 #include <stdint.h>
 
@@ -23,4 +23,5 @@ condition_reduction (TYPE *a, TYPE min_v)
   return last;
 }
 
-/* { dg-final { scan-assembler {\tclastb\tw[0-9]+, p[0-7]+, w[0-9]+, z[0-9]+\.s} } } */
+/* { dg-final { scan-tree-dump "using a fully-masked loop." "vect" } } */
+/* { dg-final { scan-assembler {\tclastb\ts[0-9]+, p[0-7], s[0-9]+, z[0-9]+\.s} } } */
index 35344f4..5aaa71f 100644 (file)
@@ -1,8 +1,9 @@
 /* { dg-do assemble { target aarch64_asm_sve_ok } } */
-/* { dg-options "-O2 -ftree-vectorize --save-temps" } */
+/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-details --save-temps" } */
 
 #define TYPE uint8_t
 
 #include "clastb_2.c"
 
-/* { dg-final { scan-assembler {\tclastb\tw[0-9]+, p[0-7]+, w[0-9]+, z[0-9]+\.b} } } */
+/* { dg-final { scan-tree-dump "using a fully-masked loop." "vect" } } */
+/* { dg-final { scan-assembler {\tclastb\tb[0-9]+, p[0-7], b[0-9]+, z[0-9]+\.b} } } */
index ce58abd..b4db170 100644 (file)
@@ -1,8 +1,9 @@
 /* { dg-do assemble { target aarch64_asm_sve_ok } } */
-/* { dg-options "-O2 -ftree-vectorize --save-temps" } */
+/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-details --save-temps" } */
 
 #define TYPE int16_t
 
 #include "clastb_2.c"
 
+/* { dg-final { scan-tree-dump "using a fully-masked loop." "vect" } } */
 /* { dg-final { scan-assembler {\tclastb\tw[0-9]+, p[0-7], w[0-9]+, z[0-9]+\.h} } } */
index 2b9783d..28d40a0 100644 (file)
@@ -1,8 +1,9 @@
 /* { dg-do assemble { target aarch64_asm_sve_ok } } */
-/* { dg-options "-O2 -ftree-vectorize --save-temps" } */
+/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-details --save-temps" } */
 
 #define TYPE uint64_t
 
 #include "clastb_2.c"
 
-/* { dg-final { scan-assembler {\tclastb\tx[0-9]+, p[0-7], x[0-9]+, z[0-9]+\.d} } } */
+/* { dg-final { scan-tree-dump "using a fully-masked loop." "vect" } } */
+/* { dg-final { scan-assembler {\tclastb\td[0-9]+, p[0-7], d[0-9]+, z[0-9]+\.d} } } */
index c47d303..38632a2 100644 (file)
@@ -1,5 +1,5 @@
 /* { dg-do assemble { target aarch64_asm_sve_ok } } */
-/* { dg-options "-O2 -ftree-vectorize --save-temps" } */
+/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-details --save-temps" } */
 
 #define N 32
 
@@ -21,4 +21,5 @@ condition_reduction (TYPE *a, TYPE min_v)
   return last;
 }
 
+/* { dg-final { scan-tree-dump "using a fully-masked loop." "vect" } } */
 /* { dg-final { scan-assembler {\tclastb\ts[0-9]+, p[0-7], s[0-9]+, z[0-9]+\.s} } } */
index 3345f87..e5307d2 100644 (file)
@@ -1,7 +1,8 @@
 /* { dg-do assemble { target aarch64_asm_sve_ok } } */
-/* { dg-options "-O2 -ftree-vectorize --save-temps" } */
+/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-details --save-temps" } */
 
 #define TYPE double
 #include "clastb_6.c"
 
+/* { dg-final { scan-tree-dump "using a fully-masked loop." "vect" } } */
 /* { dg-final { scan-assembler {\tclastb\td[0-9]+, p[0-7], d[0-9]+, z[0-9]+\.d} } } */
index d86a428..583fc8d 100644 (file)
@@ -1,5 +1,5 @@
 /* { dg-do assemble { target aarch64_asm_sve_ok } } */
-/* { dg-options "-O2 -ftree-vectorize -msve-vector-bits=256 --save-temps" } */
+/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-details -msve-vector-bits=256 --save-temps" } */
 
 #include <stdint.h>
 
@@ -19,6 +19,7 @@ TEST_TYPE (uint16_t);
 TEST_TYPE (uint32_t);
 TEST_TYPE (uint64_t);
 
+/* { dg-final { scan-tree-dump-times "using a fully-masked loop." 4 "vect" } } */
 /* { dg-final { scan-assembler {\tclastb\t(b[0-9]+), p[0-7], \1, z[0-9]+\.b\n} } } */
 /* { dg-final { scan-assembler {\tclastb\t(h[0-9]+), p[0-7], \1, z[0-9]+\.h\n} } } */
 /* { dg-final { scan-assembler {\tclastb\t(s[0-9]+), p[0-7], \1, z[0-9]+\.s\n} } } */
index 19ac82f..36821ce 100644 (file)
@@ -10050,16 +10050,6 @@ vectorizable_condition (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
                return false;
            }
        }
-      if (loop_vinfo
-         && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo)
-         && reduction_type == EXTRACT_LAST_REDUCTION)
-       {
-         if (dump_enabled_p ())
-           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
-                            "can't yet use a fully-masked loop for"
-                            " EXTRACT_LAST_REDUCTION.\n");
-         LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
-       }
       if (expand_vec_cond_expr_p (vectype, comp_vectype,
                                     cond_code))
        {
@@ -10089,31 +10079,31 @@ vectorizable_condition (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
   /* Handle cond expr.  */
   for (j = 0; j < ncopies; j++)
     {
-      tree loop_mask = NULL_TREE;
       bool swap_cond_operands = false;
 
       /* See whether another part of the vectorized code applies a loop
         mask to the condition, or to its inverse.  */
 
+      vec_loop_masks *masks = NULL;
       if (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
        {
-         scalar_cond_masked_key cond (cond_expr, ncopies);
-         if (loop_vinfo->scalar_cond_masked_set.contains (cond))
-           {
-             vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo);
-             loop_mask = vect_get_loop_mask (gsi, masks, ncopies, vectype, j);
-           }
+         if (reduction_type == EXTRACT_LAST_REDUCTION)
+           masks = &LOOP_VINFO_MASKS (loop_vinfo);
          else
            {
-             bool honor_nans = HONOR_NANS (TREE_TYPE (cond.op0));
-             cond.code = invert_tree_comparison (cond.code, honor_nans);
+             scalar_cond_masked_key cond (cond_expr, ncopies);
              if (loop_vinfo->scalar_cond_masked_set.contains (cond))
+               masks = &LOOP_VINFO_MASKS (loop_vinfo);
+             else
                {
-                 vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo);
-                 loop_mask = vect_get_loop_mask (gsi, masks, ncopies,
-                                                 vectype, j);
-                 cond_code = cond.code;
-                 swap_cond_operands = true;
+                 bool honor_nans = HONOR_NANS (TREE_TYPE (cond.op0));
+                 cond.code = invert_tree_comparison (cond.code, honor_nans);
+                 if (loop_vinfo->scalar_cond_masked_set.contains (cond))
+                   {
+                     masks = &LOOP_VINFO_MASKS (loop_vinfo);
+                     cond_code = cond.code;
+                     swap_cond_operands = true;
+                   }
                }
            }
        }
@@ -10248,28 +10238,10 @@ vectorizable_condition (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
             vec != { 0, ... } (masked in the MASK_LOAD,
             unmasked in the VEC_COND_EXPR).  */
 
-         if (loop_mask)
-           {
-             if (COMPARISON_CLASS_P (vec_compare))
-               {
-                 tree tmp = make_ssa_name (vec_cmp_type);
-                 tree op0 = TREE_OPERAND (vec_compare, 0);
-                 tree op1 = TREE_OPERAND (vec_compare, 1);
-                 gassign *g = gimple_build_assign (tmp,
-                                                   TREE_CODE (vec_compare),
-                                                   op0, op1);
-                 vect_finish_stmt_generation (stmt_info, g, gsi);
-                 vec_compare = tmp;
-               }
-
-             tree tmp2 = make_ssa_name (vec_cmp_type);
-             gassign *g = gimple_build_assign (tmp2, BIT_AND_EXPR,
-                                               vec_compare, loop_mask);
-             vect_finish_stmt_generation (stmt_info, g, gsi);
-             vec_compare = tmp2;
-           }
+         /* Force vec_compare to be an SSA_NAME rather than a comparison,
+            in cases where that's necessary.  */
 
-         if (reduction_type == EXTRACT_LAST_REDUCTION)
+         if (masks || reduction_type == EXTRACT_LAST_REDUCTION)
            {
              if (!is_gimple_val (vec_compare))
                {
@@ -10279,6 +10251,7 @@ vectorizable_condition (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
                  vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
                  vec_compare = vec_compare_name;
                }
+
              if (must_invert_cmp_result)
                {
                  tree vec_compare_name = make_ssa_name (vec_cmp_type);
@@ -10288,6 +10261,24 @@ vectorizable_condition (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
                  vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
                  vec_compare = vec_compare_name;
                }
+
+             if (masks)
+               {
+                 unsigned vec_num = vec_oprnds0.length ();
+                 tree loop_mask
+                   = vect_get_loop_mask (gsi, masks, vec_num * ncopies,
+                                         vectype, vec_num * j + i);
+                 tree tmp2 = make_ssa_name (vec_cmp_type);
+                 gassign *g
+                   = gimple_build_assign (tmp2, BIT_AND_EXPR, vec_compare,
+                                          loop_mask);
+                 vect_finish_stmt_generation (stmt_info, g, gsi);
+                 vec_compare = tmp2;
+               }
+           }
+
+         if (reduction_type == EXTRACT_LAST_REDUCTION)
+           {
              gcall *new_stmt = gimple_build_call_internal
                (IFN_FOLD_EXTRACT_LAST, 3, else_clause, vec_compare,
                 vec_then_clause);