[VPlan] Adjust mergeReplicateRegions to be in line with mergeBlock (NFC)
authorFlorian Hahn <flo@fhahn.com>
Sun, 1 Jan 2023 19:48:48 +0000 (19:48 +0000)
committerFlorian Hahn <flo@fhahn.com>
Sun, 1 Jan 2023 19:48:49 +0000 (19:48 +0000)
Adjust mergeReplicateRegions to be in line with
mergeBlocksIntoPredecessors added in 36d70a6aea6b by collecting only the
valid candidates first.

Also rename to mergeReplicateRegionsIntoSuccessors and add missing
doc-comment.

This addresses post-commit suggestions by @Ayal.

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
llvm/lib/Transforms/Vectorize/VPlanTransforms.h

index ac70a9a..aade9df 100644 (file)
@@ -9148,7 +9148,8 @@ VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes(
   bool ShouldSimplify = true;
   while (ShouldSimplify) {
     ShouldSimplify = VPlanTransforms::sinkScalarOperands(*Plan);
-    ShouldSimplify |= VPlanTransforms::mergeReplicateRegions(*Plan);
+    ShouldSimplify |=
+        VPlanTransforms::mergeReplicateRegionsIntoSuccessors(*Plan);
     ShouldSimplify |= VPlanTransforms::mergeBlocksIntoPredecessors(*Plan);
   }
 
index 09b173f..c684099 100644 (file)
@@ -229,20 +229,17 @@ static VPBasicBlock *getPredicatedThenBlock(VPRegionBlock *R) {
   return nullptr;
 }
 
-bool VPlanTransforms::mergeReplicateRegions(VPlan &Plan) {
+bool VPlanTransforms::mergeReplicateRegionsIntoSuccessors(VPlan &Plan) {
   SetVector<VPRegionBlock *> DeletedRegions;
 
-  // Collect region blocks to process up-front, to avoid iterator invalidation
-  // issues while merging regions.
-  SmallVector<VPRegionBlock *, 8> CandidateRegions(
-      VPBlockUtils::blocksOnly<VPRegionBlock>(depth_first(
-          VPBlockRecursiveTraversalWrapper<VPBlockBase *>(Plan.getEntry()))));
-
-  // Check if Base is a predicated triangle, followed by an empty block,
-  // followed by another predicate triangle. If that's the case, move the
-  // recipes from the first to the second triangle.
-  for (VPRegionBlock *Region1 : CandidateRegions) {
-    if (DeletedRegions.contains(Region1))
+  // Collect replicate regions followed by an empty block, followed by another
+  // replicate region with matching masks to process front. This is to avoid
+  // iterator invalidation issues while merging regions.
+  SmallVector<VPRegionBlock *, 8> WorkList;
+  for (VPRegionBlock *Region1 :
+       VPBlockUtils::blocksOnly<VPRegionBlock>(depth_first(
+           VPBlockRecursiveTraversalWrapper<VPBlockBase *>(Plan.getEntry())))) {
+    if (!Region1->isReplicator())
       continue;
     auto *MiddleBasicBlock =
         dyn_cast_or_null<VPBasicBlock>(Region1->getSingleSuccessor());
@@ -251,20 +248,30 @@ bool VPlanTransforms::mergeReplicateRegions(VPlan &Plan) {
 
     auto *Region2 =
         dyn_cast_or_null<VPRegionBlock>(MiddleBasicBlock->getSingleSuccessor());
-    if (!Region2)
+    if (!Region2 || !Region2->isReplicator())
       continue;
 
     VPValue *Mask1 = getPredicatedMask(Region1);
     VPValue *Mask2 = getPredicatedMask(Region2);
     if (!Mask1 || Mask1 != Mask2)
       continue;
+
+    assert(Mask1 && Mask2 && "both region must have conditions");
+    WorkList.push_back(Region1);
+  }
+
+  // Move recipes from Region1 to its successor region, if both are triangles.
+  for (VPRegionBlock *Region1 : WorkList) {
+    if (DeletedRegions.contains(Region1))
+      continue;
+    auto *MiddleBasicBlock = cast<VPBasicBlock>(Region1->getSingleSuccessor());
+    auto *Region2 = cast<VPRegionBlock>(MiddleBasicBlock->getSingleSuccessor());
+
     VPBasicBlock *Then1 = getPredicatedThenBlock(Region1);
     VPBasicBlock *Then2 = getPredicatedThenBlock(Region2);
     if (!Then1 || !Then2)
       continue;
 
-    assert(Mask1 && Mask2 && "both region must have conditions");
-
     // Note: No fusion-preventing memory dependencies are expected in either
     // region. Such dependencies should be rejected during earlier dependence
     // checks, which guarantee accesses can be re-ordered for vectorization.
index 27eefe1..be0d8e7 100644 (file)
@@ -38,7 +38,10 @@ struct VPlanTransforms {
 
   static bool sinkScalarOperands(VPlan &Plan);
 
-  static bool mergeReplicateRegions(VPlan &Plan);
+  /// Merge replicate regions in their successor region, if a replicate region
+  /// is connected to a successor replicate region with the same predicate by a
+  /// single, empty VPBasicBlock.
+  static bool mergeReplicateRegionsIntoSuccessors(VPlan &Plan);
 
   /// Remove redundant VPBasicBlocks by merging them into their predecessor if
   /// the predecessor has a single successor.