Refactor SLP permute opt propagation
authorRichard Biener <rguenther@suse.de>
Tue, 29 Jun 2021 09:30:23 +0000 (11:30 +0200)
committerRichard Biener <rguenther@suse.de>
Tue, 29 Jun 2021 11:10:36 +0000 (13:10 +0200)
This rewrites the SLP permute opt propagation to elide the visited
bit for an incoming permute of -1 as well as allowing the initial
propagation to take more than one iteration before starting on
materialization.  As we still lack propagation in the reverse
direction I've added gcc.dg/vect/bb-slp-71.c and a stopgap to
restrict "any" permute handling to the supported cases.

2021-06-29  Richard Biener  <rguenther@suse.de>

* tree-vect-slp.c (slpg_vertex::visited): Remove.
(vect_slp_perms_eq): Handle -1 permutes.
(vect_optimize_slp): Rewrite permute propagation.

* gcc.dg/vect/pr67790.c: Un-XFAIL.
* gcc.dg/vect/bb-slp-71.c: New testcase.

gcc/testsuite/gcc.dg/vect/bb-slp-71.c [new file with mode: 0644]
gcc/testsuite/gcc.dg/vect/pr67790.c
gcc/tree-vect-slp.c

diff --git a/gcc/testsuite/gcc.dg/vect/bb-slp-71.c b/gcc/testsuite/gcc.dg/vect/bb-slp-71.c
new file mode 100644 (file)
index 0000000..6816511
--- /dev/null
@@ -0,0 +1,32 @@
+/* { dg-do run } */
+
+#include "tree-vect.h"
+
+int a[4], b[4];
+
+void __attribute__((noipa))
+foo(int x, int y)
+{
+  int tem0 = x + 1;
+  int tem1 = y + 2;
+  int tem2 = x + 3;
+  int tem3 = y + 4;
+  a[0] = tem0 + b[1];
+  a[1] = tem1 + b[0];
+  a[2] = tem2 + b[2];
+  a[3] = tem3 + b[3];
+}
+
+int main()
+{
+  check_vect ();
+
+  b[0] = 10;
+  b[1] = 14;
+  b[2] = 18;
+  b[3] = 22;
+  foo (-1, -3);
+  if (a[0] != 14 || a[1] != 9 || a[2] != 20 || a[3] != 23)
+    __builtin_abort ();
+  return 0;
+}
index 0555d41..32eacd9 100644 (file)
@@ -38,4 +38,4 @@ int main()
 }
 
 /* { dg-final { scan-tree-dump "vectorizing stmts using SLP" "vect" } } */
-/* { dg-final { scan-tree-dump-times "VEC_PERM_EXPR" 0 "vect" { xfail *-*-* } } } */
+/* { dg-final { scan-tree-dump-times "VEC_PERM_EXPR" 0 "vect" } } */
index 63b6e6a..524bfaa 100644 (file)
@@ -3470,12 +3470,11 @@ vect_analyze_slp (vec_info *vinfo, unsigned max_tree_size)
 struct slpg_vertex
 {
   slpg_vertex (slp_tree node_)
-    : node (node_), visited (0), perm_out (0), materialize (0) {}
+    : node (node_), perm_out (-1), materialize (0) {}
 
   int get_perm_in () const { return materialize ? materialize : perm_out; }
 
   slp_tree node;
-  unsigned visited : 1;
   /* The permutation on the outgoing lanes (towards SLP parents).  */
   int perm_out;
   /* The permutation that is applied by this node.  perm_out is
@@ -3567,7 +3566,8 @@ vect_slp_perms_eq (const vec<vec<unsigned> > &perms,
                   int perm_a, int perm_b)
 {
   return (perm_a == perm_b
-         || (perms[perm_a].length () == perms[perm_b].length ()
+         || (perm_a != -1 && perm_b != -1
+             && perms[perm_a].length () == perms[perm_b].length ()
              && memcmp (&perms[perm_a][0], &perms[perm_b][0],
                         sizeof (unsigned) * perms[perm_a].length ()) == 0));
 }
@@ -3614,7 +3614,7 @@ vect_optimize_slp (vec_info *vinfo)
       /* Leafs do not change across iterations.  Note leafs also double
         as entries to the reverse graph.  */
       if (!slpg->vertices[idx].succ)
-       vertices[idx].visited = 1;
+       vertices[idx].perm_out = 0;
       /* Loads are the only thing generating permutes.  */
       if (!SLP_TREE_LOAD_PERMUTATION (node).exists ())
        continue;
@@ -3668,12 +3668,17 @@ vect_optimize_slp (vec_info *vinfo)
 
   /* Propagate permutes along the graph and compute materialization points.  */
   bool changed;
+  bool do_materialization = false;
   unsigned iteration = 0;
   do
     {
       changed = false;
       ++iteration;
 
+      if (dump_enabled_p ())
+       dump_printf_loc (MSG_NOTE, vect_location,
+                        "SLP optimize iteration %d\n", iteration);
+
       for (i = vertices.length (); i > 0 ; --i)
        {
          int idx = ipo[i-1];
@@ -3685,19 +3690,21 @@ vect_optimize_slp (vec_info *vinfo)
              || SLP_TREE_DEF_TYPE (node) == vect_constant_def)
            continue;
 
-         vertices[idx].visited = 1;
-
          /* We still eventually have failed backedge SLP nodes in the
             graph, those are only cancelled when analyzing operations.
             Simply treat them as transparent ops, propagating permutes
             through them.  */
          if (SLP_TREE_DEF_TYPE (node) == vect_internal_def)
            {
-             /* We do not handle stores with a permutation.  */
+             /* We do not handle stores with a permutation, so all
+                incoming permutes must have been materialized.  */
              stmt_vec_info rep = SLP_TREE_REPRESENTATIVE (node);
              if (STMT_VINFO_DATA_REF (rep)
                  && DR_IS_WRITE (STMT_VINFO_DATA_REF (rep)))
-               continue;
+               {
+                 vertices[idx].perm_out = 0;
+                 continue;
+               }
              /* We cannot move a permute across an operation that is
                 not independent on lanes.  Note this is an explicit
                 negative list since that's much shorter than the respective
@@ -3710,63 +3717,82 @@ vect_optimize_slp (vec_info *vinfo)
                  case CFN_COMPLEX_MUL:
                  case CFN_COMPLEX_MUL_CONJ:
                  case CFN_VEC_ADDSUB:
+                   vertices[idx].perm_out = 0;
                    continue;
                  default:;
                  }
            }
 
-         int perm = -1;
-         for (graph_edge *succ = slpg->vertices[idx].succ;
-              succ; succ = succ->succ_next)
-           {
-             int succ_idx = succ->dest;
-             /* Handle unvisited nodes optimistically.  */
-             /* ???  But for constants once we want to handle non-bijective
-                permutes we have to verify the permute, when unifying lanes,
-                will not unify different constants.  For example see
-                gcc.dg/vect/bb-slp-14.c for a case that would break.  */
-             if (!vertices[succ_idx].visited)
-               continue;
-             int succ_perm = vertices[succ_idx].perm_out;
-             if (perm == -1)
-               perm = succ_perm;
-             else if (succ_perm == 0)
-               {
-                 perm = 0;
-                 break;
-               }
-             else if (!vect_slp_perms_eq (perms, perm, succ_perm))
-               {
-                 perm = 0;
-                 break;
-               }
-           }
-
-         if (perm == -1)
+         int perm;
+         if (!slpg->vertices[idx].succ)
            /* Pick up pre-computed leaf values.  */
            perm = vertices[idx].perm_out;
-         else if (!vect_slp_perms_eq (perms, perm,
-                                      vertices[idx].get_perm_in ()))
+         else
            {
-             if (iteration > 1)
-               /* Make sure we eventually converge.  */
-               gcc_checking_assert (perm == 0);
-             if (perm == 0)
+             perm = -1;
+             bool all_constant = true;
+             for (graph_edge *succ = slpg->vertices[idx].succ;
+                  succ; succ = succ->succ_next)
                {
-                 vertices[idx].perm_out = 0;
-                 vertices[idx].materialize = 0;
+                 int succ_idx = succ->dest;
+                 slp_tree succ_node = vertices[succ_idx].node;
+                 if (SLP_TREE_DEF_TYPE (succ_node) != vect_external_def
+                     && SLP_TREE_DEF_TYPE (succ_node) != vect_constant_def)
+                   all_constant = false;
+                 int succ_perm = vertices[succ_idx].perm_out;
+                 /* Handle unvisited (and constant) nodes optimistically.  */
+                 /* ???  But for constants once we want to handle
+                    non-bijective permutes we have to verify the permute,
+                    when unifying lanes, will not unify different constants.
+                    For example see gcc.dg/vect/bb-slp-14.c for a case
+                    that would break.  */
+                 if (succ_perm == -1)
+                   continue;
+                 if (perm == -1)
+                   perm = succ_perm;
+                 else if (succ_perm == 0)
+                   {
+                     perm = 0;
+                     break;
+                   }
+                 else if (!vect_slp_perms_eq (perms, perm, succ_perm))
+                   {
+                     perm = 0;
+                     break;
+                   }
+               }
+             /* We still lack a forward propagation of materializations
+                and thus only allow "any" permutes on constant or external
+                nodes which we handle during materialization by looking
+                at SLP children.  So avoid having internal "any" permutes
+                for now, see gcc.dg/vect/bb-slp-71.c for a testcase that
+                breaks when removing this restriction.  */
+             if (perm == -1 && all_constant)
+               perm = 0;
+
+             if (!vect_slp_perms_eq (perms, perm,
+                                     vertices[idx].get_perm_in ()))
+               {
+                 /* Make sure we eventually converge.  */
+                 gcc_checking_assert (vertices[idx].get_perm_in () == -1
+                                      || perm == 0);
+                 if (perm == 0)
+                   {
+                     vertices[idx].perm_out = 0;
+                     vertices[idx].materialize = 0;
+                   }
+                 if (!vertices[idx].materialize)
+                   vertices[idx].perm_out = perm;
+                 changed = true;
                }
-             if (!vertices[idx].materialize)
-               vertices[idx].perm_out = perm;
-             changed = true;
            }
 
-         if (perm == 0)
+         /* Elide pruning at materialization points in the first
+            iteration phase.  */
+         if (!do_materialization)
            continue;
 
-         /* Elide pruning at materialization points in the first
-            iteration so every node was visited once at least.  */
-         if (iteration == 1)
+         if (perm == 0 || perm == -1)
            continue;
 
          /* Decide on permute materialization.  Look whether there's
@@ -3784,8 +3810,8 @@ vect_optimize_slp (vec_info *vinfo)
            for (graph_edge *pred = slpg->vertices[idx].pred;
                 pred; pred = pred->pred_next)
              {
-               gcc_checking_assert (vertices[pred->src].visited);
                int pred_perm = vertices[pred->src].get_perm_in ();
+               gcc_checking_assert (pred_perm != -1);
                if (!vect_slp_perms_eq (perms, perm, pred_perm))
                  {
                    all_preds_permuted = false;
@@ -3800,8 +3826,16 @@ vect_optimize_slp (vec_info *vinfo)
              vertices[idx].perm_out = 0;
            }
        }
+
+      /* If the initial propagation converged, switch on materialization
+        and re-propagate.  */
+      if (!changed && !do_materialization)
+       {
+         do_materialization = true;
+         changed = true;
+       }
     }
-  while (changed || iteration == 1);
+  while (changed);
 
   /* Materialize.  */
   for (i = 0; i < vertices.length (); ++i)