tree-optimization/101178 - handle VEC_PERM in SLP permute propagation

author Richard Biener <rguenther@suse.de>

Wed, 30 Jun 2021 14:28:50 +0000 (16:28 +0200)

committer Richard Biener <rguenther@suse.de>

Thu, 1 Jul 2021 07:47:58 +0000 (09:47 +0200)
author Richard Biener <rguenther@suse.de>
Wed, 30 Jun 2021 14:28:50 +0000 (16:28 +0200)
committer Richard Biener <rguenther@suse.de>
Thu, 1 Jul 2021 07:47:58 +0000 (09:47 +0200)
diff --git a/gcc/testsuite/gcc.dg/vect/bb-slp-72.c b/gcc/testsuite/gcc.dg/vect/bb-slp-72.c

new file mode 100644 (file)

index 0000000..5b243fc
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/bb-slp-72.c
@@ -0,0 +1,29 @@
+/* { dg-do run } */
+
+#include "tree-vect.h"
+
+double x[2], y[2], z[2], w[2];
+
+void __attribute__((noipa)) foo ()
+{
+  double tem0 = x[1] + y[1];
+  double tem1 = x[0] - y[0];
+  double tem2 = z[1] * tem0;
+  double tem3 = z[0] * tem1;
+  z[0] = tem2 - w[0];
+  z[1] = tem3 + w[1];
+}
+
+int main()
+{
+  check_vect ();
+
+  x[0] = 1.; x[1] = 2.;
+  y[0] = 7.; y[1] = -5.;
+  z[0] = 2.; z[1] = 3.;
+  w[0] = 9.; w[1] = -5.;
+  foo ();
+  if (z[0] != -18. || z[1] != -17.)
+    __builtin_abort ();
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.dg/vect/bb-slp-73.c b/gcc/testsuite/gcc.dg/vect/bb-slp-73.c

new file mode 100644 (file)

index 0000000..d4c8a51
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/bb-slp-73.c
@@ -0,0 +1,29 @@
+/* { dg-do run } */
+
+#include "tree-vect.h"
+
+double x[2], y[2], z[2], w[2];
+
+void __attribute__((noipa)) foo ()
+{
+  double tem0 = x[1] + y[1];
+  double tem1 = x[0] - y[0];
+  double tem2 = z[1] * tem0;
+  double tem3 = z[0] * tem1;
+  z[0] = tem2 - w[1];
+  z[1] = tem3 + w[0];
+}
+
+int main()
+{
+  check_vect ();
+
+  x[0] = 1.; x[1] = 2.;
+  y[0] = 7.; y[1] = -5.;
+  z[0] = 2.; z[1] = 3.;
+  w[0] = 9.; w[1] = -5.;
+  foo ();
+  if (z[0] != -4. || z[1] != -3.)
+    __builtin_abort ();
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.dg/vect/bb-slp-74.c b/gcc/testsuite/gcc.dg/vect/bb-slp-74.c

new file mode 100644 (file)

index 0000000..d3d5a02
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/bb-slp-74.c
@@ -0,0 +1,30 @@
+/* { dg-do run } */
+
+#include "tree-vect.h"
+
+double a[2], b[2], c[2];
+
+void __attribute__((noipa)) foo ()
+{
+  double tem0 = a[1] + b[1];
+  double tem1 = a[0] - b[0];
+  c[0] = 2. * tem0;
+  c[1] = 5. * tem1;
+}
+
+int main()
+{
+  check_vect ();
+
+  a[0] = 1.; a[1] = 3.;
+  b[0] = -5.; b[1] = 13.;
+  foo ();
+  if (c[0] != 32. || c[1] != 30.)
+    __builtin_abort ();
+  return 0;
+}
+
+/* We'd like to see at most one VEC_PERM_EXPR, not one for a blend
+   and one for a permute materialized somewhere else.  But addsub
+   pattern recog can likely get in the way here.  */
+/* { dg-final { scan-tree-dump-times "  \[^ \]\+ = VEC_PERM_EXPR" 1 "slp2" } } */
diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c

index 10195d3629fb147e25d3c103c3c3eb2b01efaf8e..966b281ffaeec718d6fd2dc3fd9cbec3b67688ac 100644 (file)
--- a/gcc/tree-vect-slp.c
+++ b/gcc/tree-vect-slp.c
@@ -3470,16 +3470,19 @@ vect_analyze_slp (vec_info *vinfo, unsigned max_tree_size)
  struct slpg_vertex
  {
    slpg_vertex (slp_tree node_)
-    : node (node_), perm_out (-1), materialize (0) {}
+    : node (node_), perm_in (-1), perm_out (-1) {}
  
-  int get_perm_in () const { return materialize ? materialize : perm_out; }
+  int get_perm_materialized () const
+    { return perm_in != perm_out ? perm_in : 0; }
  
    slp_tree node;
-  /* The permutation on the outgoing lanes (towards SLP parents).  */
+  /* The common permutation on the incoming lanes (towards SLP children).  */
+  int perm_in;
+  /* The permutation on the outgoing lanes (towards SLP parents).  When
+     the node is a materialization point for a permute this differs
+     from perm_in (and is then usually zero).  Materialization happens
+     on the input side.  */
    int perm_out;
-  /* The permutation that is applied by this node.  perm_out is
-     relative to this.  */
-  int materialize;
  };
  
  /* Fill the vertices and leafs vector with all nodes in the SLP graph.  */
@@ -3614,7 +3617,11 @@ vect_optimize_slp (vec_info *vinfo)
        /* Leafs do not change across iterations.  Note leafs also double
          as entries to the reverse graph.  */
        if (!slpg->vertices[idx].succ)
-       vertices[idx].perm_out = 0;
+       {
+         vertices[idx].perm_in = 0;
+         vertices[idx].perm_out = 0;
+       }
+
        /* Loads are the only thing generating permutes.  */
        if (!SLP_TREE_LOAD_PERMUTATION (node).exists ())
         continue;
@@ -3663,6 +3670,7 @@ vect_optimize_slp (vec_info *vinfo)
        for (unsigned j = 0; j < SLP_TREE_LANES (node); ++j)
         perm[j] = SLP_TREE_LOAD_PERMUTATION (node)[j] - imin;
        perms.safe_push (perm);
+      vertices[idx].perm_in = perms.length () - 1;
        vertices[idx].perm_out = perms.length () - 1;
      }
  
@@ -3702,8 +3710,11 @@ vect_optimize_slp (vec_info *vinfo)
               if (STMT_VINFO_DATA_REF (rep)
                   && DR_IS_WRITE (STMT_VINFO_DATA_REF (rep)))
                 {
+                 /* ???  We're forcing materialization in place
+                    of the child here, we'd need special handling
+                    in materialization to leave perm_in -1 here.  */
+                 vertices[idx].perm_in = 0;
                   vertices[idx].perm_out = 0;
-                 continue;
                 }
               /* We cannot move a permute across an operation that is
                  not independent on lanes.  Note this is an explicit
@@ -3717,20 +3728,19 @@ vect_optimize_slp (vec_info *vinfo)
                   case CFN_COMPLEX_MUL:
                   case CFN_COMPLEX_MUL_CONJ:
                   case CFN_VEC_ADDSUB:
+                   vertices[idx].perm_in = 0;
                     vertices[idx].perm_out = 0;
-                   continue;
                   default:;
                   }
             }
  
-         int perm;
           if (!slpg->vertices[idx].succ)
             /* Pick up pre-computed leaf values.  */
-           perm = vertices[idx].perm_out;
+           ;
           else
             {
               bool any_succ_perm_out_m1 = false;
-             perm = vertices[idx].get_perm_in ();
+             int perm_in = vertices[idx].perm_in;
               for (graph_edge *succ = slpg->vertices[idx].succ;
                    succ; succ = succ->succ_next)
                 {
@@ -3752,18 +3762,18 @@ vect_optimize_slp (vec_info *vinfo)
                         any_succ_perm_out_m1 = true;
                       continue;
                     }
-                 if (perm == -1)
-                   perm = succ_perm;
+                 if (perm_in == -1)
+                   perm_in = succ_perm;
                   else if (succ_perm == 0
-                          || !vect_slp_perms_eq (perms, perm, succ_perm))
+                          || !vect_slp_perms_eq (perms, perm_in, succ_perm))
                     {
-                     perm = 0;
+                     perm_in = 0;
                       break;
                     }
                 }
  
               /* Adjust any incoming permutes we treated optimistically.  */
-             if (perm != -1 && any_succ_perm_out_m1)
+             if (perm_in != -1 && any_succ_perm_out_m1)
                 {
                   for (graph_edge *succ = slpg->vertices[idx].succ;
                        succ; succ = succ->succ_next)
@@ -3772,24 +3782,36 @@ vect_optimize_slp (vec_info *vinfo)
                       if (vertices[succ->dest].perm_out == -1
                           && SLP_TREE_DEF_TYPE (succ_node) != vect_external_def
                           && SLP_TREE_DEF_TYPE (succ_node) != vect_constant_def)
-                       vertices[succ->dest].perm_out = perm;
+                       {
+                         vertices[succ->dest].perm_out = perm_in;
+                         /* And ensure this propagates.  */
+                         if (vertices[succ->dest].perm_in == -1)
+                           vertices[succ->dest].perm_in = perm_in;
+                       }
                     }
                   changed = true;
                 }
  
-             if (!vect_slp_perms_eq (perms, perm,
-                                     vertices[idx].get_perm_in ()))
+             if (!vect_slp_perms_eq (perms, perm_in,
+                                     vertices[idx].perm_in))
                 {
                   /* Make sure we eventually converge.  */
-                 gcc_checking_assert (vertices[idx].get_perm_in () == -1
-                                      || perm == 0);
-                 if (perm == 0)
-                   {
-                     vertices[idx].perm_out = 0;
-                     vertices[idx].materialize = 0;
-                   }
-                 if (!vertices[idx].materialize)
-                   vertices[idx].perm_out = perm;
+                 gcc_checking_assert (vertices[idx].perm_in == -1
+                                      || perm_in == 0);
+                 vertices[idx].perm_in = perm_in;
+
+                 /* While we can handle VEC_PERM nodes as transparent
+                    pass-through they can be a cheap materialization
+                    point as well.  In addition they can act as source
+                    of a random permutation as well.
+                    The following ensures that former materialization
+                    points that now have zero incoming permutes no
+                    longer appear as such and that former "any" permutes
+                    get pass-through.  We keep VEC_PERM nodes optimistic
+                    as "any" outgoing permute though.  */
+                 if (vertices[idx].perm_out != 0
+                     && SLP_TREE_CODE (node) != VEC_PERM_EXPR)
+                   vertices[idx].perm_out = perm_in;
                   changed = true;
                 }
             }
@@ -3799,25 +3821,19 @@ vect_optimize_slp (vec_info *vinfo)
           if (!do_materialization)
             continue;
  
+         int perm = vertices[idx].perm_out;
           if (perm == 0 || perm == -1)
             continue;
  
           /* Decide on permute materialization.  Look whether there's
              a use (pred) edge that is permuted differently than us.
-            In that case mark ourselves so the permutation is applied.
-            For VEC_PERM_EXPRs the permutation doesn't carry along
-            from children to parents so force materialization at the
-            point of the VEC_PERM_EXPR.  In principle VEC_PERM_EXPRs
-            are a source of an arbitrary permutation again, similar
-            to constants/externals - that's something we do not yet
-            optimally handle.  */
-         bool all_preds_permuted = (SLP_TREE_CODE (node) != VEC_PERM_EXPR
-                                    && slpg->vertices[idx].pred != NULL);
+            In that case mark ourselves so the permutation is applied.  */
+         bool all_preds_permuted = slpg->vertices[idx].pred != NULL;
           if (all_preds_permuted)
             for (graph_edge *pred = slpg->vertices[idx].pred;
                  pred; pred = pred->pred_next)
               {
-               int pred_perm = vertices[pred->src].get_perm_in ();
+               int pred_perm = vertices[pred->src].perm_in;
                 gcc_checking_assert (pred_perm != -1);
                 if (!vect_slp_perms_eq (perms, perm, pred_perm))
                   {
@@ -3827,10 +3843,8 @@ vect_optimize_slp (vec_info *vinfo)
               }
           if (!all_preds_permuted)
             {
-             if (!vertices[idx].materialize)
-               changed = true;
-             vertices[idx].materialize = perm;
               vertices[idx].perm_out = 0;
+             changed = true;
             }
         }
  
@@ -3848,46 +3862,43 @@ vect_optimize_slp (vec_info *vinfo)
    /* Materialize.  */
    for (i = 0; i < vertices.length (); ++i)
      {
-      int perm = vertices[i].get_perm_in ();
-      if (perm <= 0)
-       continue;
-
+      int perm_in = vertices[i].perm_in;
        slp_tree node = vertices[i].node;
  
-      /* First permute invariant/external original successors.  */
+      /* First permute invariant/external original successors, we handle
+        those optimistically during propagation and duplicate them if
+        they are used with different permutations.  */
        unsigned j;
        slp_tree child;
-      FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
-       {
-         if (!child
-             || (SLP_TREE_DEF_TYPE (child) != vect_constant_def
-                 && SLP_TREE_DEF_TYPE (child) != vect_external_def))
-           continue;
+      if (perm_in > 0)
+       FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
+         {
+           if (!child
+               || (SLP_TREE_DEF_TYPE (child) != vect_constant_def
+                   && SLP_TREE_DEF_TYPE (child) != vect_external_def))
+             continue;
  
-         /* If the vector is uniform there's nothing to do.  */
-         if (vect_slp_tree_uniform_p (child))
-           continue;
+           /* If the vector is uniform there's nothing to do.  */
+           if (vect_slp_tree_uniform_p (child))
+             continue;
  
-         /* We can end up sharing some externals via two_operator
-            handling.  Be prepared to unshare those.  */
-         if (child->refcnt != 1)
-           {
-             gcc_assert (slpg->vertices[child->vertex].pred->pred_next);
-             SLP_TREE_CHILDREN (node)[j] = child
-               = vect_create_new_slp_node
-                   (SLP_TREE_SCALAR_OPS (child).copy ());
-           }
-         vect_slp_permute (perms[perm],
-                           SLP_TREE_SCALAR_OPS (child), true);
-       }
+           /* We can end up sharing some externals via two_operator
+              handling.  Be prepared to unshare those.  */
+           if (child->refcnt != 1)
+             {
+               gcc_assert (slpg->vertices[child->vertex].pred->pred_next);
+               SLP_TREE_CHILDREN (node)[j] = child
+                 = vect_create_new_slp_node
+                     (SLP_TREE_SCALAR_OPS (child).copy ());
+             }
+           vect_slp_permute (perms[perm_in],
+                             SLP_TREE_SCALAR_OPS (child), true);
+         }
  
-      if (vertices[i].materialize)
+      if (SLP_TREE_CODE (node) == VEC_PERM_EXPR)
         {
-         if (SLP_TREE_LOAD_PERMUTATION (node).exists ())
-           /* For loads simply drop the permutation, the load permutation
-              already performs the desired permutation.  */
-           ;
-         else if (SLP_TREE_LANE_PERMUTATION (node).exists ())
+         /* Apply the common permutes to the input vectors.  */
+         if (perm_in > 0)
             {
               /* If the node is already a permute node we can apply
                  the permutation to the lane selection, effectively
@@ -3896,12 +3907,30 @@ vect_optimize_slp (vec_info *vinfo)
                 dump_printf_loc (MSG_NOTE, vect_location,
                                  "simplifying permute node %p\n",
                                  node);
-
               for (unsigned k = 0;
                    k < SLP_TREE_LANE_PERMUTATION (node).length (); ++k)
                 SLP_TREE_LANE_PERMUTATION (node)[k].second
-                 = perms[perm][SLP_TREE_LANE_PERMUTATION (node)[k].second];
+                 = perms[perm_in][SLP_TREE_LANE_PERMUTATION (node)[k].second];
+           }
+         /* Apply the anticipated output permute to the permute and
+            stmt vectors.  */
+         int perm_out = vertices[i].perm_out;
+         if (perm_out > 0)
+           {
+             vect_slp_permute (perms[perm_out],
+                               SLP_TREE_SCALAR_STMTS (node), true);
+             vect_slp_permute (perms[perm_out],
+                               SLP_TREE_LANE_PERMUTATION (node), true);
             }
+       }
+      else if (vertices[i].get_perm_materialized () != 0)
+       {
+         if (SLP_TREE_LOAD_PERMUTATION (node).exists ())
+           /* For loads simply drop the permutation, the load permutation
+              already performs the desired permutation.  */
+           ;
+         else if (SLP_TREE_LANE_PERMUTATION (node).exists ())
+           gcc_unreachable ();
           else
             {
               if (dump_enabled_p ())
@@ -3916,7 +3945,7 @@ vect_optimize_slp (vec_info *vinfo)
               SLP_TREE_CHILDREN (node) = vNULL;
               SLP_TREE_SCALAR_STMTS (copy)
                 = SLP_TREE_SCALAR_STMTS (node).copy ();
-             vect_slp_permute (perms[perm],
+             vect_slp_permute (perms[perm_in],
                                 SLP_TREE_SCALAR_STMTS (copy), true);
               gcc_assert (!SLP_TREE_SCALAR_OPS (node).exists ());
               SLP_TREE_REPRESENTATIVE (copy) = SLP_TREE_REPRESENTATIVE (node);
@@ -3936,28 +3965,31 @@ vect_optimize_slp (vec_info *vinfo)
               SLP_TREE_LANE_PERMUTATION (node).create (SLP_TREE_LANES (node));
               for (unsigned j = 0; j < SLP_TREE_LANES (node); ++j)
                 SLP_TREE_LANE_PERMUTATION (node)
-                 .quick_push (std::make_pair (0, perms[perm][j]));
+                 .quick_push (std::make_pair (0, perms[perm_in][j]));
               SLP_TREE_CODE (node) = VEC_PERM_EXPR;
             }
         }
-      else
+      else if (perm_in > 0) /* perm_in == perm_out */
         {
           /* Apply the reverse permutation to our stmts.  */
-         vect_slp_permute (perms[perm],
+         vect_slp_permute (perms[perm_in],
                             SLP_TREE_SCALAR_STMTS (node), true);
-         /* And to the load permutation, which we can simply
+         /* And to the lane/load permutation, which we can simply
              make regular by design.  */
           if (SLP_TREE_LOAD_PERMUTATION (node).exists ())
             {
+             gcc_assert (!SLP_TREE_LANE_PERMUTATION (node).exists ());
               /* ???  When we handle non-bijective permutes the idea
                  is that we can force the load-permutation to be
                  { min, min + 1, min + 2, ... max }.  But then the
                  scalar defs might no longer match the lane content
                  which means wrong-code with live lane vectorization.
                  So we possibly have to have NULL entries for those.  */
-             vect_slp_permute (perms[perm],
+             vect_slp_permute (perms[perm_in],
                                 SLP_TREE_LOAD_PERMUTATION (node), true);
             }
+         else if (SLP_TREE_LANE_PERMUTATION (node).exists ())
+           gcc_unreachable ();
         }
      }
  
@@ -3991,14 +4023,14 @@ vect_optimize_slp (vec_info *vinfo)
             }
           else if (SLP_TREE_LOAD_PERMUTATION (old).exists ()
                    && SLP_TREE_REF_COUNT (old) == 1
-                  && vertices[old->vertex].materialize)
+                  && vertices[old->vertex].get_perm_materialized () != 0)
             {
               /* ???  For loads the situation is more complex since
                  we can't modify the permute in place in case the
                  node is used multiple times.  In fact for loads this
                  should be somehow handled in the propagation engine.  */
               /* Apply the reverse permutation to our stmts.  */
-             int perm = vertices[old->vertex].get_perm_in ();
+             int perm = vertices[old->vertex].get_perm_materialized ();
               vect_slp_permute (perms[perm],
                                 SLP_TREE_SCALAR_STMTS (old), true);
               vect_slp_permute (perms[perm],
author	Richard Biener <rguenther@suse.de>
	Wed, 30 Jun 2021 14:28:50 +0000 (16:28 +0200)
committer	Richard Biener <rguenther@suse.de>
	Thu, 1 Jul 2021 07:47:58 +0000 (09:47 +0200)
gcc/testsuite/gcc.dg/vect/bb-slp-72.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.dg/vect/bb-slp-73.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.dg/vect/bb-slp-74.c	[new file with mode: 0644]	patch \| blob
gcc/tree-vect-slp.c		patch \| blob \| history