tree-optimization/104010 - fix SLP scalar costing with patterns
authorRichard Biener <rguenther@suse.de>
Wed, 13 Apr 2022 11:49:45 +0000 (13:49 +0200)
committerRichard Biener <rguenther@suse.de>
Tue, 19 Apr 2022 14:42:04 +0000 (16:42 +0200)
When doing BB vectorization the scalar cost compute is derailed
by patterns, causing lanes to be considered live and thus not
costed on the scalar side.  For the testcase in PR104010 this
prevents vectorization which was done by GCC 11.  PR103941
shows similar cases of missed optimizations that are fixed by
this patch.

2022-04-13  Richard Biener  <rguenther@suse.de>

PR tree-optimization/104010
PR tree-optimization/103941
* tree-vect-slp.cc (vect_bb_slp_scalar_cost): When
we run into stmts in patterns continue walking those
for uses outside of the vectorized region instead of
marking the lane live.

* gcc.target/i386/pr103941-1.c: New testcase.
* gcc.target/i386/pr103941-2.c: Likewise.

gcc/testsuite/gcc.target/i386/pr103941-1.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/pr103941-2.c [new file with mode: 0644]
gcc/tree-vect-slp.cc

diff --git a/gcc/testsuite/gcc.target/i386/pr103941-1.c b/gcc/testsuite/gcc.target/i386/pr103941-1.c
new file mode 100644 (file)
index 0000000..524fdd0
--- /dev/null
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -msse2" } */
+
+unsigned char ur[16], ua[16], ub[16];
+
+void avgu_v2qi (void)
+{
+  int i;
+
+  for (i = 0; i < 2; i++)
+    ur[i] = (ua[i] + ub[i] + 1) >> 1;
+}
+
+/* { dg-final { scan-assembler "pavgb" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr103941-2.c b/gcc/testsuite/gcc.target/i386/pr103941-2.c
new file mode 100644 (file)
index 0000000..972a32b
--- /dev/null
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -msse2" } */
+
+void foo (int *c, float *x, float *y)
+{
+  c[0] = x[0] < y[0];
+  c[1] = x[1] < y[1];
+  c[2] = x[2] < y[2];
+  c[3] = x[3] < y[3];
+}
+
+/* { dg-final { scan-assembler "cmpltps" } } */
index 4ac2b70..805dd7e 100644 (file)
@@ -5185,22 +5185,46 @@ vect_bb_slp_scalar_cost (vec_info *vinfo,
         the scalar cost.  */
       if (!STMT_VINFO_LIVE_P (stmt_info))
        {
-         FOR_EACH_PHI_OR_STMT_DEF (def_p, orig_stmt, op_iter, SSA_OP_DEF)
+         auto_vec<gimple *, 8> worklist;
+         hash_set<gimple *> *worklist_visited = NULL;
+         worklist.quick_push (orig_stmt);
+         do
            {
-             imm_use_iterator use_iter;
-             gimple *use_stmt;
-             FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, DEF_FROM_PTR (def_p))
-               if (!is_gimple_debug (use_stmt))
-                 {
-                   stmt_vec_info use_stmt_info = vinfo->lookup_stmt (use_stmt);
-                   if (!use_stmt_info
-                       || !vectorized_scalar_stmts.contains (use_stmt_info))
+             gimple *work_stmt = worklist.pop ();
+             FOR_EACH_PHI_OR_STMT_DEF (def_p, work_stmt, op_iter, SSA_OP_DEF)
+               {
+                 imm_use_iterator use_iter;
+                 gimple *use_stmt;
+                 FOR_EACH_IMM_USE_STMT (use_stmt, use_iter,
+                                        DEF_FROM_PTR (def_p))
+                   if (!is_gimple_debug (use_stmt))
                      {
-                       (*life)[i] = true;
-                       break;
+                       stmt_vec_info use_stmt_info
+                         = vinfo->lookup_stmt (use_stmt);
+                       if (!use_stmt_info
+                           || !vectorized_scalar_stmts.contains (use_stmt_info))
+                         {
+                           if (use_stmt_info
+                               && STMT_VINFO_IN_PATTERN_P (use_stmt_info))
+                             {
+                               /* For stmts participating in patterns we have
+                                  to check its uses recursively.  */
+                               if (!worklist_visited)
+                                 worklist_visited = new hash_set<gimple *> ();
+                               if (!worklist_visited->add (use_stmt))
+                                 worklist.safe_push (use_stmt);
+                               continue;
+                             }
+                           (*life)[i] = true;
+                           goto next_lane;
+                         }
                      }
-                 }
+               }
            }
+         while (!worklist.is_empty ());
+next_lane:
+         if (worklist_visited)
+           delete worklist_visited;
          if ((*life)[i])
            continue;
        }