Re-instantiate SLP induction IV CSE
authorRichard Biener <rguenther@suse.de>
Wed, 4 Nov 2020 08:41:48 +0000 (09:41 +0100)
committerRichard Biener <rguenther@suse.de>
Wed, 4 Nov 2020 09:59:55 +0000 (10:59 +0100)
This re-instantiates the previously removed CSE, fixing the
FAIL of gcc.dg/vect/costmodel/x86_64/costmodel-pr30843.c
It turns out the previous approach still works.

2020-11-04  Richard Biener  <rguenther@suse.de>

* tree-vect-loop.c (vectorizable_induction): Re-instantiate
previously removed CSE of SLP IVs.

gcc/tree-vect-loop.c

index 41e2e2a..c09aa39 100644 (file)
@@ -7874,8 +7874,16 @@ vectorizable_induction (loop_vec_info loop_vinfo,
       if (nested_in_vect_loop)
        nivs = nvects;
       else
-       nivs = least_common_multiple (group_size,
-                                     const_nunits) / const_nunits;
+       {
+         /* Compute the number of distinct IVs we need.  First reduce
+            group_size if it is a multiple of const_nunits so we get
+            one IV for a group_size of 4 but const_nunits 2.  */
+         unsigned group_sizep = group_size;
+         if (group_sizep % const_nunits == 0)
+           group_sizep = group_sizep / const_nunits;
+         nivs = least_common_multiple (group_sizep,
+                                       const_nunits) / const_nunits;
+       }
       tree stept = TREE_TYPE (step_vectype);
       tree lupdate_mul = NULL_TREE;
       if (!nested_in_vect_loop)
@@ -7975,6 +7983,15 @@ vectorizable_induction (loop_vec_info loop_vinfo,
 
          SLP_TREE_VEC_STMTS (slp_node).quick_push (induction_phi);
        }
+      if (!nested_in_vect_loop)
+       {
+         /* Fill up to the number of vectors we need for the whole group.  */
+         nivs = least_common_multiple (group_size,
+                                       const_nunits) / const_nunits;
+         for (; ivn < nivs; ++ivn)
+           SLP_TREE_VEC_STMTS (slp_node)
+             .quick_push (SLP_TREE_VEC_STMTS (slp_node)[0]);
+       }
 
       /* Re-use IVs when we can.  We are generating further vector
         stmts by adding VF' * stride to the IVs generated above.  */