From aa2414729ebbcb2d8f162e9002a3a6aa768b1f9d Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Fri, 30 Dec 2022 18:24:34 +0000 Subject: [PATCH] [VPlan] Also consider operands of sink candidates in same block. Even if the the sink candidate is already in the target block, its operands can be candidates for sinking. Queue them up as well. Also moves the queuing logic to a helper. --- llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp | 21 +++++++++++++-------- .../first-order-recurrence-sink-replicate-region.ll | 2 +- .../LoopVectorize/interleave-and-scalarize-only.ll | 2 +- .../LoopVectorize/vplan-sink-scalars-and-merge.ll | 4 ++-- 4 files changed, 17 insertions(+), 12 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index d57b2c5..89d50fcf 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -112,14 +112,18 @@ bool VPlanTransforms::sinkScalarOperands(VPlan &Plan) { // First, collect the operands of all predicated replicate recipes as seeds // for sinking. SetVector> WorkList; + auto QueueOperands = [&WorkList](VPRecipeBase *R) { + for (VPValue *Op : R->operands()) + if (auto *Def = Op->getDefiningRecipe()) + WorkList.insert(std::make_pair(R->getParent(), Def)); + }; + for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly(Iter)) { for (auto &Recipe : *VPBB) { auto *RepR = dyn_cast(&Recipe); if (!RepR || !RepR->isPredicated()) continue; - for (VPValue *Op : RepR->operands()) - if (auto *Def = Op->getDefiningRecipe()) - WorkList.insert(std::make_pair(VPBB, Def)); + QueueOperands(RepR); } } @@ -129,8 +133,11 @@ bool VPlanTransforms::sinkScalarOperands(VPlan &Plan) { VPBasicBlock *SinkTo; VPRecipeBase *SinkCandidate; std::tie(SinkTo, SinkCandidate) = WorkList.pop_back_val(); - if (SinkCandidate->getParent() == SinkTo || - SinkCandidate->mayHaveSideEffects() || + if (SinkCandidate->getParent() == SinkTo) { + QueueOperands(SinkCandidate); + continue; + } + if (SinkCandidate->mayHaveSideEffects() || SinkCandidate->mayReadOrWriteMemory()) continue; if (auto *RepR = dyn_cast(SinkCandidate)) { @@ -184,9 +191,7 @@ bool VPlanTransforms::sinkScalarOperands(VPlan &Plan) { } } SinkCandidate->moveBefore(*SinkTo, SinkTo->getFirstNonPhi()); - for (VPValue *Op : SinkCandidate->operands()) - if (auto *Def = Op->getDefiningRecipe()) - WorkList.insert(std::make_pair(SinkTo, Def)); + QueueOperands(SinkCandidate); Changed = true; } return Changed; diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll index 2ccb573..6251468 100644 --- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll +++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll @@ -352,7 +352,6 @@ define void @sink_replicate_region_after_replicate_region(ptr %ptr, ptr noalias ; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ; CHECK-NEXT: FIRST-ORDER-RECURRENCE-PHI ir<%recur> = phi ir<0>, ir<%recur.next> ; CHECK-NEXT: WIDEN-INDUCTION %iv = phi 0, %iv.next, ir<1> -; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1> ; CHECK-NEXT: EMIT vp<[[MASK:%.+]]> = icmp ule ir<%iv> vp<[[BTC]]> ; CHECK-NEXT: WIDEN ir<%recur.next> = sext ir<%y> ; CHECK-NEXT: EMIT vp<[[SPLICE:%.+]]> = first-order splice ir<%recur> ir<%recur.next> @@ -365,6 +364,7 @@ define void @sink_replicate_region_after_replicate_region(ptr %ptr, ptr noalias ; CHECK-EMPTY: ; CHECK-NEXT: pred.store.if: ; CHECK-NEXT: REPLICATE ir<%rem> = srem vp<[[SPLICE]]>, ir<%x> +; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1> ; CHECK-NEXT: REPLICATE ir<%rem.div> = sdiv ir<20>, ir<%rem> ; CHECK-NEXT: REPLICATE ir<%gep> = getelementptr ir<%ptr>, vp<[[STEPS]]> ; CHECK-NEXT: REPLICATE store ir<%rem.div>, ir<%gep> diff --git a/llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll b/llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll index 3322ef2..86982cc 100644 --- a/llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll +++ b/llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll @@ -72,7 +72,6 @@ declare i32 @llvm.smin.i32(i32, i32) ; DBG-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ; DBG-NEXT: vp<[[DERIVED_IV:%.+]]> = DERIVED-IV ir + vp<[[CAN_IV]]> * ir ; DBG-NEXT: vp<[[STEPS1:%.+]]> = SCALAR-STEPS vp<[[DERIVED_IV]]>, ir -; DBG-NEXT: vp<[[STEPS2:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1> ; DBG-NEXT: Successor(s): pred.store ; DBG-EMPTY: ; DBG-NEXT: pred.store: { @@ -81,6 +80,7 @@ declare i32 @llvm.smin.i32(i32, i32) ; DBG-NEXT: Successor(s): pred.store.if, pred.store.continue ; DBG-EMPTY: ; DBG-NEXT: pred.store.if: +; DBG-NEXT: vp<[[STEPS2:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1> ; DBG-NEXT: CLONE ir<%gep.src> = getelementptr ir<%src>, vp<[[STEPS2]]> ; DBG-NEXT: CLONE ir<%l> = load ir<%gep.src> ; DBG-NEXT: CLONE ir<%gep.dst> = getelementptr ir<%dst>, vp<[[STEPS2]]> diff --git a/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll b/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll index a98035c..3c552f7 100644 --- a/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll @@ -22,7 +22,6 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3 ; CHECK-NEXT: vector.body: ; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ; CHECK-NEXT: WIDEN-INDUCTION %iv = phi 0, %iv.next, ir<1> -; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1> ; CHECK-NEXT: EMIT vp<[[MASK:%.+]]> = icmp ule ir<%iv> vp<[[BTC]]> ; CHECK-NEXT: Successor(s): pred.store @@ -32,6 +31,7 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3 ; CHECK-NEXT: Successor(s): pred.store.if, pred.store.continue ; CHECK: pred.store.if: +; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1> ; CHECK-NEXT: REPLICATE ir<%gep.b> = getelementptr ir<@b>, ir<0>, vp<[[STEPS]]> ; CHECK-NEXT: REPLICATE ir<%lv.b> = load ir<%gep.b> ; CHECK-NEXT: REPLICATE ir<%add> = add ir<%lv.b>, ir<10> @@ -992,7 +992,6 @@ define void @merge_with_dead_gep_between_regions(i32 %n, ptr noalias %src, ptr n ; CHECK-NEXT: vector.body: ; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ; CHECK-NEXT: vp<[[DERIVED_IV:%.+]]> = DERIVED-IV ir<%n> + vp<[[CAN_IV]]> * ir<-1> -; CHECK-NEXT: vp<[[SCALAR_STEPS:%.+]]> = SCALAR-STEPS vp<[[DERIVED_IV]]>, ir<-1> ; CHECK-NEXT: EMIT vp<[[WIDE_IV:%.+]]> = WIDEN-CANONICAL-INDUCTION vp<[[CAN_IV]]> ; CHECK-NEXT: EMIT vp<[[MASK:%.+]]> = icmp ule vp<[[WIDE_IV]]> vp<[[BTC]]> ; CHECK-NEXT: Successor(s): pred.store @@ -1003,6 +1002,7 @@ define void @merge_with_dead_gep_between_regions(i32 %n, ptr noalias %src, ptr n ; CHECK-NEXT: Successor(s): pred.store.if, pred.store.continue ; CHECK-EMPTY: ; CHECK-NEXT: pred.store.if: +; CHECK-NEXT: vp<[[SCALAR_STEPS:%.+]]> = SCALAR-STEPS vp<[[DERIVED_IV]]>, ir<-1> ; CHECK-NEXT: REPLICATE ir<%gep.src> = getelementptr ir<%src>, vp<[[SCALAR_STEPS]]> ; CHECK-NEXT: REPLICATE ir<%l> = load ir<%gep.src> ; CHECK-NEXT: REPLICATE ir<%gep.dst> = getelementptr ir<%dst>, vp<[[SCALAR_STEPS]]> -- 2.7.4