From 3d706c20f8e69795835d68b7a5ac63966027f1c1 Mon Sep 17 00:00:00 2001 From: David Sherwood Date: Mon, 4 Oct 2021 17:31:35 +0100 Subject: [PATCH] [NFC][LoopVectorize] Remove setBestPlan in favour of getBestPlanFor I have removed LoopVectorizationPlanner::setBestPlan, since this function is quite aggressive because it deletes all other plans except the one containing the pair required. The code is currently written to assume that all pairs will live in the same vplan. This is overly restrictive, since scalable VFs live in different plans to fixed-width VFS. When we add support for vectorising epilogue loops when the main loop uses scalable vectors then we will the vplan for the main loop will be different to the epilogue. Instead I have added a new function called LoopVectorizationPlanner::getBestPlanFor that returns the best vplan for the pair requested and leaves all the vplans untouched. We then pass this best vplan to LoopVectorizationPlanner::executePlan which now takes an additional VPlanPtr argument. Differential revision: https://reviews.llvm.org/D111125 --- .../Vectorize/LoopVectorizationPlanner.h | 15 +++---- llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 51 +++++++++++----------- .../Transforms/LoopVectorize/PowerPC/reg-usage.ll | 10 ++--- 3 files changed, 36 insertions(+), 40 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h index 870e717..5a2eb80 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h +++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h @@ -268,12 +268,6 @@ class LoopVectorizationPlanner { /// A builder used to construct the current plan. VPBuilder Builder; - /// The best number of elements of the vector types used in the - /// transformed loop. BestVF = None means that vectorization is - /// disabled. - Optional BestVF = None; - unsigned BestUF = 0; - public: LoopVectorizationPlanner(Loop *L, LoopInfo *LI, const TargetLibraryInfo *TLI, const TargetTransformInfo *TTI, @@ -295,12 +289,13 @@ public: /// VF and its cost. VectorizationFactor planInVPlanNativePath(ElementCount UserVF); - /// Finalize the best decision and dispose of all other VPlans. - void setBestPlan(ElementCount VF, unsigned UF); + /// Return the best VPlan for \p VF. + VPlan &getBestPlanFor(ElementCount VF) const; /// Generate the IR code for the body of the vectorized loop according to the - /// best selected VPlan. - void executePlan(InnerLoopVectorizer &LB, DominatorTree *DT); + /// best selected \p VF, \p UF and VPlan \p BestPlan. + void executePlan(ElementCount VF, unsigned UF, VPlan &BestPlan, + InnerLoopVectorizer &LB, DominatorTree *DT); #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) void printPlans(raw_ostream &O); diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index c7dd99b..b591135 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -8189,28 +8189,30 @@ LoopVectorizationPlanner::plan(ElementCount UserVF, unsigned UserIC) { return SelectedVF; } -void LoopVectorizationPlanner::setBestPlan(ElementCount VF, unsigned UF) { - LLVM_DEBUG(dbgs() << "Setting best plan to VF=" << VF << ", UF=" << UF - << '\n'); - BestVF = VF; - BestUF = UF; +VPlan &LoopVectorizationPlanner::getBestPlanFor(ElementCount VF) const { + assert(count_if(VPlans, + [VF](const VPlanPtr &Plan) { return Plan->hasVF(VF); }) == + 1 && + "Best VF has not a single VPlan."); - erase_if(VPlans, [VF](const VPlanPtr &Plan) { - return !Plan->hasVF(VF); - }); - assert(VPlans.size() == 1 && "Best VF has not a single VPlan."); + for (const VPlanPtr &Plan : VPlans) { + if (Plan->hasVF(VF)) + return *Plan.get(); + } + llvm_unreachable("No plan found!"); } -void LoopVectorizationPlanner::executePlan(InnerLoopVectorizer &ILV, +void LoopVectorizationPlanner::executePlan(ElementCount BestVF, unsigned BestUF, + VPlan &BestVPlan, + InnerLoopVectorizer &ILV, DominatorTree *DT) { + LLVM_DEBUG(dbgs() << "Executing best plan with VF=" << BestVF << ", UF=" << BestUF + << '\n'); + // Perform the actual loop transformation. // 1. Create a new empty loop. Unlink the old loop and connect the new one. - assert(BestVF.hasValue() && "Vectorization Factor is missing"); - assert(VPlans.size() == 1 && "Not a single VPlan to execute."); - - VPTransformState State{ - *BestVF, BestUF, LI, DT, ILV.Builder, &ILV, VPlans.front().get()}; + VPTransformState State{BestVF, BestUF, LI, DT, ILV.Builder, &ILV, &BestVPlan}; State.CFG.PrevBB = ILV.createVectorizedLoopSkeleton(); State.TripCount = ILV.getOrCreateTripCount(nullptr); State.CanonicalIV = ILV.Induction; @@ -8226,7 +8228,7 @@ void LoopVectorizationPlanner::executePlan(InnerLoopVectorizer &ILV, //===------------------------------------------------===// // 2. Copy and widen instructions from the old loop into the new loop. - VPlans.front()->execute(&State); + BestVPlan.execute(&State); // 3. Fix the vectorized code: take care of header phi's, live-outs, // predication, updating analyses. @@ -10064,7 +10066,7 @@ static bool processLoopInVPlanNativePath( VectorizationFactor::Disabled() == VF) return false; - LVP.setBestPlan(VF.Width, 1); + VPlan &BestPlan = LVP.getBestPlanFor(VF.Width); { GeneratedRTChecks Checks(*PSE.getSE(), DT, LI, @@ -10073,7 +10075,7 @@ static bool processLoopInVPlanNativePath( &CM, BFI, PSI, Checks); LLVM_DEBUG(dbgs() << "Vectorizing outer loop in \"" << L->getHeader()->getParent()->getName() << "\"\n"); - LVP.executePlan(LB, DT); + LVP.executePlan(VF.Width, 1, BestPlan, LB, DT); } // Mark the loop as already vectorized to avoid vectorizing again. @@ -10395,7 +10397,7 @@ bool LoopVectorizePass::processLoop(Loop *L) { F->getParent()->getDataLayout()); if (!VF.Width.isScalar() || IC > 1) Checks.Create(L, *LVL.getLAI(), PSE.getUnionPredicate()); - LVP.setBestPlan(VF.Width, IC); + VPlan &BestPlan = LVP.getBestPlanFor(VF.Width); using namespace ore; if (!VectorizeLoop) { @@ -10404,7 +10406,7 @@ bool LoopVectorizePass::processLoop(Loop *L) { // interleave it. InnerLoopUnroller Unroller(L, PSE, LI, DT, TLI, TTI, AC, ORE, IC, &LVL, &CM, BFI, PSI, Checks); - LVP.executePlan(Unroller, DT); + LVP.executePlan(VF.Width, IC, BestPlan, Unroller, DT); ORE->emit([&]() { return OptimizationRemark(LV_NAME, "Interleaved", L->getStartLoc(), @@ -10427,8 +10429,7 @@ bool LoopVectorizePass::processLoop(Loop *L) { EpilogueVectorizerMainLoop MainILV(L, PSE, LI, DT, TLI, TTI, AC, ORE, EPI, &LVL, &CM, BFI, PSI, Checks); - LVP.setBestPlan(EPI.MainLoopVF, EPI.MainLoopUF); - LVP.executePlan(MainILV, DT); + LVP.executePlan(EPI.MainLoopVF, EPI.MainLoopUF, BestPlan, MainILV, DT); ++LoopsVectorized; simplifyLoop(L, DT, LI, SE, AC, nullptr, false /* PreserveLCSSA */); @@ -10436,13 +10437,13 @@ bool LoopVectorizePass::processLoop(Loop *L) { // Second pass vectorizes the epilogue and adjusts the control flow // edges from the first pass. - LVP.setBestPlan(EPI.EpilogueVF, EPI.EpilogueUF); EPI.MainLoopVF = EPI.EpilogueVF; EPI.MainLoopUF = EPI.EpilogueUF; EpilogueVectorizerEpilogueLoop EpilogILV(L, PSE, LI, DT, TLI, TTI, AC, ORE, EPI, &LVL, &CM, BFI, PSI, Checks); - LVP.executePlan(EpilogILV, DT); + LVP.executePlan(EPI.EpilogueVF, EPI.EpilogueUF, BestPlan, EpilogILV, + DT); ++LoopsEpilogueVectorized; if (!MainILV.areSafetyChecksAdded()) @@ -10450,7 +10451,7 @@ bool LoopVectorizePass::processLoop(Loop *L) { } else { InnerLoopVectorizer LB(L, PSE, LI, DT, TLI, TTI, AC, ORE, VF.Width, IC, &LVL, &CM, BFI, PSI, Checks); - LVP.executePlan(LB, DT); + LVP.executePlan(VF.Width, IC, BestPlan, LB, DT); ++LoopsVectorized; // Add metadata to disable runtime unrolling a scalar loop when there diff --git a/llvm/test/Transforms/LoopVectorize/PowerPC/reg-usage.ll b/llvm/test/Transforms/LoopVectorize/PowerPC/reg-usage.ll index 39b91e2..e3285e6 100644 --- a/llvm/test/Transforms/LoopVectorize/PowerPC/reg-usage.ll +++ b/llvm/test/Transforms/LoopVectorize/PowerPC/reg-usage.ll @@ -8,9 +8,9 @@ define i32 @foo() { ; CHECK-LABEL: foo -; CHECK-PWR8: Setting best plan to VF=16, UF=4 +; CHECK-PWR8: Executing best plan with VF=16, UF=4 -; CHECK-PWR9: Setting best plan to VF=8, UF=8 +; CHECK-PWR9: Executing best plan with VF=8, UF=8 entry: @@ -46,7 +46,7 @@ define i32 @goo() { ; CHECK-LABEL: goo -; CHECK: Setting best plan to VF=16, UF=4 +; CHECK: Executing best plan with VF=16, UF=4 entry: br label %for.body @@ -79,7 +79,7 @@ for.body: ; preds = %for.body, %entry define i64 @bar(i64* nocapture %a) { ; CHECK-LABEL: bar -; CHECK: Setting best plan to VF=2, UF=12 +; CHECK: Executing best plan with VF=2, UF=12 entry: br label %for.body @@ -107,7 +107,7 @@ for.body: define void @hoo(i32 %n) { ; CHECK-LABEL: hoo -; CHECK: Setting best plan to VF=1, UF=12 +; CHECK: Executing best plan with VF=1, UF=12 entry: br label %for.body -- 2.7.4