From 89718815c6694445fa401929702f3bc28f597141 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Sun, 1 Jan 2023 19:48:48 +0000 Subject: [PATCH] [VPlan] Adjust mergeReplicateRegions to be in line with mergeBlock (NFC) Adjust mergeReplicateRegions to be in line with mergeBlocksIntoPredecessors added in 36d70a6aea6b by collecting only the valid candidates first. Also rename to mergeReplicateRegionsIntoSuccessors and add missing doc-comment. This addresses post-commit suggestions by @Ayal. --- llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 3 +- llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp | 37 ++++++++++++++--------- llvm/lib/Transforms/Vectorize/VPlanTransforms.h | 5 ++- 3 files changed, 28 insertions(+), 17 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index ac70a9a..aade9df 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -9148,7 +9148,8 @@ VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes( bool ShouldSimplify = true; while (ShouldSimplify) { ShouldSimplify = VPlanTransforms::sinkScalarOperands(*Plan); - ShouldSimplify |= VPlanTransforms::mergeReplicateRegions(*Plan); + ShouldSimplify |= + VPlanTransforms::mergeReplicateRegionsIntoSuccessors(*Plan); ShouldSimplify |= VPlanTransforms::mergeBlocksIntoPredecessors(*Plan); } diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index 09b173f..c684099 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -229,20 +229,17 @@ static VPBasicBlock *getPredicatedThenBlock(VPRegionBlock *R) { return nullptr; } -bool VPlanTransforms::mergeReplicateRegions(VPlan &Plan) { +bool VPlanTransforms::mergeReplicateRegionsIntoSuccessors(VPlan &Plan) { SetVector DeletedRegions; - // Collect region blocks to process up-front, to avoid iterator invalidation - // issues while merging regions. - SmallVector CandidateRegions( - VPBlockUtils::blocksOnly(depth_first( - VPBlockRecursiveTraversalWrapper(Plan.getEntry())))); - - // Check if Base is a predicated triangle, followed by an empty block, - // followed by another predicate triangle. If that's the case, move the - // recipes from the first to the second triangle. - for (VPRegionBlock *Region1 : CandidateRegions) { - if (DeletedRegions.contains(Region1)) + // Collect replicate regions followed by an empty block, followed by another + // replicate region with matching masks to process front. This is to avoid + // iterator invalidation issues while merging regions. + SmallVector WorkList; + for (VPRegionBlock *Region1 : + VPBlockUtils::blocksOnly(depth_first( + VPBlockRecursiveTraversalWrapper(Plan.getEntry())))) { + if (!Region1->isReplicator()) continue; auto *MiddleBasicBlock = dyn_cast_or_null(Region1->getSingleSuccessor()); @@ -251,20 +248,30 @@ bool VPlanTransforms::mergeReplicateRegions(VPlan &Plan) { auto *Region2 = dyn_cast_or_null(MiddleBasicBlock->getSingleSuccessor()); - if (!Region2) + if (!Region2 || !Region2->isReplicator()) continue; VPValue *Mask1 = getPredicatedMask(Region1); VPValue *Mask2 = getPredicatedMask(Region2); if (!Mask1 || Mask1 != Mask2) continue; + + assert(Mask1 && Mask2 && "both region must have conditions"); + WorkList.push_back(Region1); + } + + // Move recipes from Region1 to its successor region, if both are triangles. + for (VPRegionBlock *Region1 : WorkList) { + if (DeletedRegions.contains(Region1)) + continue; + auto *MiddleBasicBlock = cast(Region1->getSingleSuccessor()); + auto *Region2 = cast(MiddleBasicBlock->getSingleSuccessor()); + VPBasicBlock *Then1 = getPredicatedThenBlock(Region1); VPBasicBlock *Then2 = getPredicatedThenBlock(Region2); if (!Then1 || !Then2) continue; - assert(Mask1 && Mask2 && "both region must have conditions"); - // Note: No fusion-preventing memory dependencies are expected in either // region. Such dependencies should be rejected during earlier dependence // checks, which guarantee accesses can be re-ordered for vectorization. diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h index 27eefe1..be0d8e7 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h @@ -38,7 +38,10 @@ struct VPlanTransforms { static bool sinkScalarOperands(VPlan &Plan); - static bool mergeReplicateRegions(VPlan &Plan); + /// Merge replicate regions in their successor region, if a replicate region + /// is connected to a successor replicate region with the same predicate by a + /// single, empty VPBasicBlock. + static bool mergeReplicateRegionsIntoSuccessors(VPlan &Plan); /// Remove redundant VPBasicBlocks by merging them into their predecessor if /// the predecessor has a single successor. -- 2.7.4