From e91e62db14a5e3175e80cb9985d724fbba10588c Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Tue, 27 Dec 2022 18:08:31 +0000 Subject: [PATCH] [LV] Sink scalar operands and merge regions repeatedly. Merging regions can enable new sinking opportunities (e.g. if users of a scalar value are moved from different VPBBs into the same VPBB). Sinking in turn can also enable new merging opportunities (e.g. if a recipe between to merge-able regions is moved. To enable more sinking opportunities, repeat sinking & merging if regions could be merged. Also fix mergeReplicateRegions to return the correct Changed status. Reviewed By: Ayal Differential Revision: https://reviews.llvm.org/D139788 --- llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 10 ++++++++-- llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp | 3 +-- ...first-order-recurrence-sink-replicate-region.ll | 22 +++------------------- .../LoopVectorize/vplan-sink-scalars-and-merge.ll | 6 +++--- 4 files changed, 15 insertions(+), 26 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 104d53f0d..0c427f1 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -9146,8 +9146,14 @@ VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes( VPlanTransforms::optimizeInductions(*Plan, *PSE.getSE()); VPlanTransforms::removeDeadRecipes(*Plan); - VPlanTransforms::sinkScalarOperands(*Plan); - VPlanTransforms::mergeReplicateRegions(*Plan); + + bool ShouldSimplify = true; + while (ShouldSimplify) { + ShouldSimplify = VPlanTransforms::sinkScalarOperands(*Plan); + ShouldSimplify |= VPlanTransforms::mergeReplicateRegions(*Plan); + ShouldSimplify |= VPlanTransforms::mergeBlocksIntoPredecessors(*Plan); + } + VPlanTransforms::removeRedundantExpandSCEVRecipes(*Plan); VPlanTransforms::mergeBlocksIntoPredecessors(*Plan); diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index 0cb1120..d57b2c5 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -225,7 +225,6 @@ static VPBasicBlock *getPredicatedThenBlock(VPRegionBlock *R) { bool VPlanTransforms::mergeReplicateRegions(VPlan &Plan) { SetVector DeletedRegions; - bool Changed = false; // Collect region blocks to process up-front, to avoid iterator invalidation // issues while merging regions. @@ -304,7 +303,7 @@ bool VPlanTransforms::mergeReplicateRegions(VPlan &Plan) { for (VPRegionBlock *ToDelete : DeletedRegions) delete ToDelete; - return Changed; + return !DeletedRegions.empty(); } bool VPlanTransforms::mergeBlocksIntoPredecessors(VPlan &Plan) { diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll index e208f4d..a29c619 100644 --- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll +++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll @@ -356,24 +356,6 @@ define void @sink_replicate_region_after_replicate_region(i32* %ptr, i32* noalia ; CHECK-NEXT: EMIT vp<[[MASK:%.+]]> = icmp ule ir<%iv> vp<[[BTC]]> ; CHECK-NEXT: WIDEN ir<%recur.next> = sext ir<%y> ; CHECK-NEXT: EMIT vp<[[SPLICE:%.+]]> = first-order splice ir<%recur> ir<%recur.next> -; CHECK-NEXT: Successor(s): pred.srem -; CHECK-EMPTY: -; CHECK-NEXT: pred.srem: { -; CHECK-NEXT: pred.srem.entry: -; CHECK-NEXT: BRANCH-ON-MASK vp<[[MASK]]> -; CHECK-NEXT: Successor(s): pred.srem.if, pred.srem.continue -; CHECK-EMPTY: -; CHECK-NEXT: pred.srem.if: -; CHECK-NEXT: REPLICATE ir<%rem> = srem vp<[[SPLICE]]>, ir<%x> -; CHECK-NEXT: Successor(s): pred.srem.continue -; CHECK-EMPTY: -; CHECK-NEXT: pred.srem.continue: -; CHECK-NEXT: PHI-PREDICATED-INSTRUCTION vp<[[PRED:%.+]]> = ir<%rem> -; CHECK-NEXT: No successors -; CHECK-NEXT: } -; CHECK-NEXT: Successor(s): loop.1.split -; CHECK-EMPTY: -; CHECK-NEXT: loop.1.split: ; CHECK-NEXT: Successor(s): pred.store ; CHECK-EMPTY: ; CHECK-NEXT: pred.store: { @@ -382,7 +364,8 @@ define void @sink_replicate_region_after_replicate_region(i32* %ptr, i32* noalia ; CHECK-NEXT: Successor(s): pred.store.if, pred.store.continue ; CHECK-EMPTY: ; CHECK-NEXT: pred.store.if: -; CHECK-NEXT: REPLICATE ir<%rem.div> = sdiv ir<20>, vp<[[PRED]]> +; CHECK-NEXT: REPLICATE ir<%rem> = srem vp<[[SPLICE]]>, ir<%x> +; CHECK-NEXT: REPLICATE ir<%rem.div> = sdiv ir<20>, ir<%rem> ; CHECK-NEXT: REPLICATE ir<%gep> = getelementptr ir<%ptr>, vp<[[STEPS]]> ; CHECK-NEXT: REPLICATE store ir<%rem.div>, ir<%gep> ; CHECK-NEXT: REPLICATE ir<%gep.2> = getelementptr ir<%dst.2>, vp<[[STEPS]]> @@ -390,6 +373,7 @@ define void @sink_replicate_region_after_replicate_region(i32* %ptr, i32* noalia ; CHECK-NEXT: Successor(s): pred.store.continue ; CHECK-EMPTY: ; CHECK-NEXT: pred.store.continue: +; CHECK-NEXT: PHI-PREDICATED-INSTRUCTION vp<[[PRED:%.+]]> = ir<%rem> ; CHECK-NEXT: PHI-PREDICATED-INSTRUCTION vp<[[PRED2:%.+]]> = ir<%rem.div> ; CHECK-NEXT: No successors ; CHECK-NEXT: } diff --git a/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll b/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll index be7afff..a98035c 100644 --- a/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll @@ -621,7 +621,6 @@ define void @merge_3_replicate_region(i32 %k, i32 %j) { ; CHECK-NEXT: WIDEN-INDUCTION %iv = phi 0, %iv.next, ir<1> ; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1> ; CHECK-NEXT: EMIT vp<[[MASK:%.+]]> = icmp ule ir<%iv> vp<[[BTC]]> -; CHECK-NEXT: REPLICATE ir<%gep.a> = getelementptr ir<@a>, ir<0>, vp<[[STEPS]]> ; CHECK-NEXT: Successor(s): pred.store ; CHECK-EMPTY: ; CHECK-NEXT: pred.store: { @@ -630,6 +629,7 @@ define void @merge_3_replicate_region(i32 %k, i32 %j) { ; CHECK-NEXT: Successor(s): pred.store.if, pred.store.continue ; CHECK-EMPTY: ; CHECK-NEXT: pred.store.if: +; CHECK-NEXT: REPLICATE ir<%gep.a> = getelementptr ir<@a>, ir<0>, vp<[[STEPS]]> ; CHECK-NEXT: REPLICATE ir<%lv.a> = load ir<%gep.a> ; CHECK-NEXT: REPLICATE ir<%gep.b> = getelementptr ir<@b>, ir<0>, vp<[[STEPS]]> ; CHECK-NEXT: REPLICATE ir<%lv.b> = load ir<%gep.b> @@ -719,9 +719,7 @@ define void @update_2_uses_in_same_recipe_in_merged_block(i32 %k) { ; CHECK-NEXT: vector.body: ; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ; CHECK-NEXT: WIDEN-INDUCTION %iv = phi 0, %iv.next, ir<1> -; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1> ; CHECK-NEXT: EMIT vp<[[MASK:%.+]]> = icmp ule ir<%iv> vp<[[BTC]]> -; CHECK-NEXT: REPLICATE ir<%gep.a> = getelementptr ir<@a>, ir<0>, vp<[[STEPS]]> ; CHECK-NEXT: Successor(s): pred.store ; CHECK-EMPTY: ; CHECK-NEXT: pred.store: { @@ -730,6 +728,8 @@ define void @update_2_uses_in_same_recipe_in_merged_block(i32 %k) { ; CHECK-NEXT: Successor(s): pred.store.if, pred.store.continue ; CHECK-EMPTY: ; CHECK-NEXT: pred.store.if: +; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1> +; CHECK-NEXT: REPLICATE ir<%gep.a> = getelementptr ir<@a>, ir<0>, vp<[[STEPS]]> ; CHECK-NEXT: REPLICATE ir<%lv.a> = load ir<%gep.a> ; CHECK-NEXT: REPLICATE ir<%div> = sdiv ir<%lv.a>, ir<%lv.a> ; CHECK-NEXT: REPLICATE store ir<%div>, ir<%gep.a> -- 2.7.4