Adjust BB vectorization SLP build heuristics
authorRichard Biener <rguenther@suse.de>
Wed, 14 Oct 2020 13:37:51 +0000 (15:37 +0200)
committerRichard Biener <rguenther@suse.de>
Fri, 16 Oct 2020 11:44:37 +0000 (13:44 +0200)
This changes SLP def gathering to not fail due to mismatched
def type but instead demote the def to external.  This allows the
new testcase to be vectorized in full (with GCC 10 it is not
vectorized at all and with current trunk we vectorize only the
store).  This is important since with BB vectorization being
applied to bigger pieces of code the chance that we mix
internal and external defs for an operand that should end up
treated as external (built from scalars) increases.

2020-10-16  Richard Biener  <rguenther@suse.de>

* tree-vect-slp.c (vect_get_and_check_slp_defs): For BB
vectorization swap operands only if it helps, demote mismatches to
external.

* gcc.dg/vect/bb-slp-53.c: New testcase.

gcc/testsuite/gcc.dg/vect/bb-slp-53.c [new file with mode: 0644]
gcc/tree-vect-slp.c

diff --git a/gcc/testsuite/gcc.dg/vect/bb-slp-53.c b/gcc/testsuite/gcc.dg/vect/bb-slp-53.c
new file mode 100644 (file)
index 0000000..f3b5f31
--- /dev/null
@@ -0,0 +1,20 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target vect_double } */
+
+double a[2], b[2];
+
+void foo(double x, double y)
+{
+  double breakme1 = y + 3.;
+  double a1 = b[1] + 2.;
+  double breakme0 = x;
+  double a0 = b[0] + 1.;
+  a[0] = a0 * breakme0;
+  a[1] = a1 * breakme1;
+}
+
+/* We should vectorize the SLP opportunity starting from the
+   grouped store to a[] including the load from b[] at the
+   leaf even though the multiplication requires another
+   vector invariant to be built.  */
+/* { dg-final { scan-tree-dump "transform load" "slp2" } } */
index cfb79e2..c3e6d67 100644 (file)
@@ -564,8 +564,15 @@ vect_get_and_check_slp_defs (vec_info *vinfo, unsigned char swap,
                      != (oprnd_info->first_dt != vect_reduction_def))))
            {
              /* Try swapping operands if we got a mismatch.  For BB
-                vectorization only in case that will improve things.  */
-             if (i == commutative_op && !swapped)
+                vectorization only in case it will clearly improve things.  */
+             if (i == commutative_op && !swapped
+                 && (!is_a <bb_vec_info> (vinfo)
+                     || (!vect_def_types_match ((*oprnds_info)[i+1]->first_dt,
+                                                dts[i+1])
+                         && (vect_def_types_match (oprnd_info->first_dt,
+                                                   dts[i+1])
+                             || vect_def_types_match
+                                  ((*oprnds_info)[i+1]->first_dt, dts[i])))))
                {
                  if (dump_enabled_p ())
                    dump_printf_loc (MSG_NOTE, vect_location,
@@ -579,10 +586,22 @@ vect_get_and_check_slp_defs (vec_info *vinfo, unsigned char swap,
                  continue;
                }
 
-             if (dump_enabled_p ())
-               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
-                                "Build SLP failed: different types\n");
-             return 1;
+             if (is_a <bb_vec_info> (vinfo))
+               {
+                 /* Now for commutative ops we should see whether we can
+                    make the other operand matching.  */
+                 if (dump_enabled_p ())
+                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+                                    "treating operand as external\n");
+                 oprnd_info->first_dt = dt = vect_external_def;
+               }
+             else
+               {
+                 if (dump_enabled_p ())
+                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+                                    "Build SLP failed: different types\n");
+                 return 1;
+               }
            }
 
       /* Make sure to demote the overall operand to external.  */