From e898479f2b3d95ccda9e143865eedd615c667e22 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Wed, 14 Dec 2022 17:55:30 +0000 Subject: [PATCH] [VPlan] Sink non-uniform recieps for scalar plans. In scalar plans, replicate recipes will only generate a single value per UF, independent of whether they are uniform or not. So don't consider uniformity for plans with scalar VFs only. This allows us to handle a few additional cases in VPlan sinking instead of non-VPlan sinkScalarOperands. Depends on D133762. Reviewed By: Ayal Differential Revision: https://reviews.llvm.org/D134218 --- llvm/lib/Transforms/Vectorize/VPlan.h | 2 ++ llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp | 5 ++++- .../Transforms/LoopVectorize/interleave-and-scalarize-only.ll | 8 ++++---- .../Transforms/LoopVectorize/vplan-sink-scalars-and-merge-vf1.ll | 4 ++-- 4 files changed, 12 insertions(+), 7 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index c2c495c..a6a3d5b 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -2671,6 +2671,8 @@ public: bool hasVF(ElementCount VF) { return VFs.count(VF); } + bool hasScalarVFOnly() const { return VFs.size() == 1 && VFs[0].isScalar(); } + const std::string &getName() const { return Name; } void setName(const Twine &newName) { Name = newName.str(); } diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index dac8b4e..986735f 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -123,6 +123,7 @@ bool VPlanTransforms::sinkScalarOperands(VPlan &Plan) { } } + bool ScalarVFOnly = Plan.hasScalarVFOnly(); // Try to sink each replicate or scalar IV steps recipe in the worklist. while (!WorkList.empty()) { VPBasicBlock *SinkTo; @@ -133,7 +134,7 @@ bool VPlanTransforms::sinkScalarOperands(VPlan &Plan) { SinkCandidate->mayReadOrWriteMemory()) continue; if (auto *RepR = dyn_cast(SinkCandidate)) { - if (RepR->isUniform()) + if (!ScalarVFOnly && RepR->isUniform()) continue; } else if (!isa(SinkCandidate)) continue; @@ -159,6 +160,8 @@ bool VPlanTransforms::sinkScalarOperands(VPlan &Plan) { continue; if (NeedsDuplicating) { + if (ScalarVFOnly) + continue; Instruction *I = cast( cast(SinkCandidate)->getUnderlyingValue()); auto *Clone = diff --git a/llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll b/llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll index b0b49d8..df41b06 100644 --- a/llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll +++ b/llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll @@ -76,8 +76,6 @@ declare i32 @llvm.smin.i32(i32, i32) ; DBG-NEXT: Successor(s): cond.false ; DBG-EMPTY: ; DBG-NEXT: cond.false: -; DBG-NEXT: CLONE ir<%gep.src> = getelementptr ir<%src>, vp<[[STEPS2]]> -; DBG-NEXT: CLONE ir<%gep.dst> = getelementptr ir<%dst>, vp<[[STEPS2]]> ; DBG-NEXT: Successor(s): cond.false.0 ; DBG-EMPTY: ; DBG-NEXT: cond.false.0: @@ -89,7 +87,9 @@ declare i32 @llvm.smin.i32(i32, i32) ; DBG-NEXT: Successor(s): pred.store.if, pred.store.continue ; DBG-EMPTY: ; DBG-NEXT: pred.store.if: +; DBG-NEXT: CLONE ir<%gep.src> = getelementptr ir<%src>, vp<[[STEPS2]]> ; DBG-NEXT: CLONE ir<%l> = load ir<%gep.src> +; DBG-NEXT: CLONE ir<%gep.dst> = getelementptr ir<%dst>, vp<[[STEPS2]]> ; DBG-NEXT: CLONE store ir<%l>, ir<%gep.dst> ; DBG-NEXT: Successor(s): pred.store.continue ; DBG-EMPTY: @@ -124,9 +124,9 @@ define void @test_scalarize_with_branch_cond(ptr %src, ptr %dst) { ; CHECK-NEXT: br i1 [[INDUCTION]], label %pred.store.if, label %pred.store.continue ; CHECK: pred.store.if: ; CHECK-NEXT: [[INDUCTION4:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr %dst, i64 [[INDUCTION4]] ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr %src, i64 [[INDUCTION4]] ; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr %dst, i64 [[INDUCTION4]] ; CHECK-NEXT: store i32 [[TMP4]], ptr [[TMP1]], align 4 ; CHECK-NEXT: br label %pred.store.continue ; CHECK: pred.store.continue: @@ -134,9 +134,9 @@ define void @test_scalarize_with_branch_cond(ptr %src, ptr %dst) { ; CHECK-NEXT: br i1 [[INDUCTION3]], label %pred.store.if4, label %pred.store.continue5 ; CHECK: pred.store.if4: ; CHECK-NEXT: [[INDUCTION5:%.*]] = add i64 [[INDEX]], 1 -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr %dst, i64 [[INDUCTION5]] ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr %src, i64 [[INDUCTION5]] ; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr %dst, i64 [[INDUCTION5]] ; CHECK-NEXT: store i32 [[TMP7]], ptr [[TMP2]], align 4 ; CHECK-NEXT: br label %pred.store.continue5 ; CHECK: pred.store.continue5: diff --git a/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge-vf1.ll b/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge-vf1.ll index 08cba06..e918d95 100644 --- a/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge-vf1.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge-vf1.ll @@ -20,8 +20,6 @@ define void @sink_with_sideeffects(i1 %c, ptr %ptr) { ; CHECK-NEXT: CLONE ir<%tmp2> = getelementptr ir<%ptr>, vp<[[STEPS]]> ; CHECK-NEXT: CLONE ir<%tmp3> = load ir<%tmp2> ; CHECK-NEXT: CLONE store ir<0>, ir<%tmp2> -; CHECK-NEXT: CLONE ir<%tmp4> = zext ir<%tmp3> -; CHECK-NEXT: CLONE ir<%tmp5> = trunc ir<%tmp4> ; CHECK-NEXT: Successor(s): if.then ; CHECK: if.then: @@ -33,6 +31,8 @@ define void @sink_with_sideeffects(i1 %c, ptr %ptr) { ; CHECK-NEXT: Successor(s): pred.store.if, pred.store.continue ; CHECK: pred.store.if: +; CHECK-NEXT: CLONE ir<%tmp4> = zext ir<%tmp3> +; CHECK-NEXT: CLONE ir<%tmp5> = trunc ir<%tmp4> ; CHECK-NEXT: CLONE store ir<%tmp5>, ir<%tmp2> ; CHECK-NEXT: Successor(s): pred.store.continue -- 2.7.4