From 4ba8720f8844ae740e9424f90487ec308a22f40c Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Tue, 4 May 2021 16:18:53 +0100 Subject: [PATCH] [VPlan] Representing backedge def-use feeding reduction phis. This patch updates the code handling reduction recipes to also keep track of the incoming value from the latch in the recipe. This is needed to model the def-use chains completely in VPlan, so that it is possible to replace the incoming value with an arbitrary VPValue. Reviewed By: Ayal Differential Revision: https://reviews.llvm.org/D99294 --- llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 29 ++++++++++++++++++---- llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h | 8 ++++++ llvm/lib/Transforms/Vectorize/VPlan.h | 14 ++++++++--- .../Transforms/LoopVectorize/vplan-printing.ll | 2 +- 4 files changed, 44 insertions(+), 9 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 19bc3a5..fb0daff 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -4321,8 +4321,6 @@ void InnerLoopVectorizer::fixReduction(VPWidenPHIRecipe *PhiR, // Reductions do not have to start at zero. They can start with // any loop invariant values. - BasicBlock *OrigLatch = OrigLoop->getLoopLatch(); - Value *OrigLoopVal = OrigPhi->getIncomingValueForBlock(OrigLatch); BasicBlock *VectorLoopLatch = LI->getLoopFor(LoopVectorBody)->getLoopLatch(); bool IsOrdered = State.VF.isVector() && IsInLoopReductionPhi && @@ -4332,9 +4330,10 @@ void InnerLoopVectorizer::fixReduction(VPWidenPHIRecipe *PhiR, if (IsOrdered && Part > 0) break; Value *VecRdxPhi = State.get(PhiR->getVPSingleValue(), Part); - Value *Val = State.get(State.Plan->getVPValue(OrigLoopVal), Part); + Value *Val = State.get(PhiR->getBackedgeValue(), Part); if (IsOrdered) - Val = State.get(State.Plan->getVPValue(OrigLoopVal), UF - 1); + Val = State.get(PhiR->getBackedgeValue(), UF - 1); + cast(VecRdxPhi)->addIncoming(Val, VectorLoopLatch); } @@ -8766,6 +8765,16 @@ VPWidenRecipe *VPRecipeBuilder::tryToWiden(Instruction *I, return new VPWidenRecipe(*I, make_range(Operands.begin(), Operands.end())); } +void VPRecipeBuilder::fixHeaderPhis() { + BasicBlock *OrigLatch = OrigLoop->getLoopLatch(); + for (VPWidenPHIRecipe *R : PhisToFix) { + auto *PN = cast(R->getUnderlyingValue()); + VPRecipeBase *IncR = + getRecipe(cast(PN->getIncomingValueForBlock(OrigLatch))); + R->addOperand(IncR->getVPSingleValue()); + } +} + VPBasicBlock *VPRecipeBuilder::handleReplication( Instruction *I, VFRange &Range, VPBasicBlock *VPBB, VPlanPtr &Plan) { @@ -8869,7 +8878,15 @@ VPRecipeBuilder::tryToCreateWidenRecipe(Instruction *Instr, assert(RdxDesc.getRecurrenceStartValue() == Phi->getIncomingValueForBlock(OrigLoop->getLoopPreheader())); VPValue *StartV = Operands[0]; - return toVPRecipeResult(new VPWidenPHIRecipe(Phi, RdxDesc, *StartV)); + + // Record the PHI and the incoming value from the backedge, so we can add + // the incoming value from the backedge after all recipes have been + // created. + auto *PhiRecipe = new VPWidenPHIRecipe(Phi, RdxDesc, *StartV); + PhisToFix.push_back(PhiRecipe); + recordRecipeOf(cast( + Phi->getIncomingValueForBlock(OrigLoop->getLoopLatch()))); + return toVPRecipeResult(PhiRecipe); } return toVPRecipeResult(new VPWidenPHIRecipe(Phi)); @@ -9057,6 +9074,8 @@ VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes( } } + RecipeBuilder.fixHeaderPhis(); + // Discard empty dummy pre-entry VPBasicBlock. Note that other VPBasicBlocks // may also be empty, such as the last one VPBB, reflecting original // basic-blocks with no recipes. diff --git a/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h b/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h index a101730..bc1a039 100644 --- a/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h +++ b/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h @@ -56,6 +56,10 @@ class VPRecipeBuilder { // marked by having a nullptr entry in this map. DenseMap Ingredient2Recipe; + /// Cross-iteration reduction phis for which we need to add the incoming value + /// from the backedge after all recipes have been created. + SmallVector PhisToFix; + /// Check if \p I can be widened at the start of \p Range and possibly /// decrease the range such that the returned value holds for the entire \p /// Range. The function should not be called for memory instructions or calls. @@ -165,6 +169,10 @@ public: VPBasicBlock *handleReplication( Instruction *I, VFRange &Range, VPBasicBlock *VPBB, VPlanPtr &Plan); + + /// Add the incoming values from the backedge to reduction cross-iteration + /// phis. + void fixHeaderPhis(); }; } // end namespace llvm diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index 3217cab..26ec86e 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -993,9 +993,10 @@ public: /// A recipe for handling all phi nodes except for integer and FP inductions. /// For reduction PHIs, RdxDesc must point to the corresponding recurrence -/// descriptor and the start value is the first operand of the recipe. -/// In the VPlan native path, all incoming VPValues & VPBasicBlock pairs are -/// managed in the recipe directly. +/// descriptor, the start value is the first operand of the recipe and the +/// incoming value from the backedge is the second operand. In the VPlan native +/// path, all incoming VPValues & VPBasicBlock pairs are managed in the recipe +/// directly. class VPWidenPHIRecipe : public VPRecipeBase, public VPValue { /// Descriptor for a reduction PHI. RecurrenceDescriptor *RdxDesc = nullptr; @@ -1040,6 +1041,13 @@ public: return getNumOperands() == 0 ? nullptr : getOperand(0); } + /// Returns the incoming value from the loop backedge, if it is a reduction. + VPValue *getBackedgeValue() { + assert(RdxDesc && "second incoming value is only guaranteed to be backedge " + "value for reductions"); + return getOperand(1); + } + /// Adds a pair (\p IncomingV, \p IncomingBlock) to the phi. void addIncoming(VPValue *IncomingV, VPBasicBlock *IncomingBlock) { addOperand(IncomingV); diff --git a/llvm/test/Transforms/LoopVectorize/vplan-printing.ll b/llvm/test/Transforms/LoopVectorize/vplan-printing.ll index dcee58d..7a93d30 100644 --- a/llvm/test/Transforms/LoopVectorize/vplan-printing.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-printing.ll @@ -79,7 +79,7 @@ define float @print_reduction(i64 %n, float* noalias %y) { ; CHECK: VPlan 'Initial VPlan for VF={4},UF>=1' { ; CHECK-NEXT: for.body: ; CHECK-NEXT: WIDEN-INDUCTION %iv = phi %iv.next, 0 -; CHECK-NEXT: WIDEN-PHI %red = phi %red.next, 0.000000e+00 +; CHECK-NEXT: WIDEN-PHI ir<%red> = phi ir<0.000000e+00>, ir<%red.next> ; CHECK-NEXT: CLONE ir<%arrayidx> = getelementptr ir<%y>, ir<%iv> ; CHECK-NEXT: WIDEN ir<%lv> = load ir<%arrayidx> ; CHECK-NEXT: REDUCE ir<%red.next> = ir<%red> + reduce.fadd (ir<%lv>) -- 2.7.4