From c493e9216bd1700577b209a0e2d290145653e40d Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Fri, 8 Jan 2021 17:40:20 +0000 Subject: [PATCH] [VPlan] Move reduction start value creation to widenPHIRecipe. This was suggested to prepare for D93975. By moving the start value creation to widenPHInstruction, we set the stage to manage the start value directly in VPWidenPHIRecipe, which be used subsequently to set the 'resume' value for reductions during epilogue vectorization. It also moves RdxDesc to the recipe, so we do not have to rely on Legal to look it up later. Reviewed By: gilr Differential Revision: https://reviews.llvm.org/D94175 --- llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 111 +++++++++++++----------- llvm/lib/Transforms/Vectorize/VPlan.h | 13 +++ 2 files changed, 73 insertions(+), 51 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 21d6149..fe86a52 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -518,7 +518,8 @@ public: /// Vectorize a single PHINode in a block. This method handles the induction /// variable canonicalization. It supports both VF = 1 for unrolled loops and /// arbitrary length vectors. - void widenPHIInstruction(Instruction *PN, unsigned UF, ElementCount VF); + void widenPHIInstruction(Instruction *PN, RecurrenceDescriptor *RdxDesc, + Value *StartV, unsigned UF, ElementCount VF); /// A helper function to scalarize a single Instruction in the innermost loop. /// Generates a sequence of scalar instances for each lane between \p MinLane @@ -4154,8 +4155,6 @@ void InnerLoopVectorizer::fixFirstOrderRecurrence(PHINode *Phi) { } void InnerLoopVectorizer::fixReduction(PHINode *Phi) { - Constant *Zero = Builder.getInt32(0); - // Get it's reduction variable descriptor. assert(Legal->isReductionVariable(Phi) && "Unable to find the reduction variable"); @@ -4167,46 +4166,9 @@ void InnerLoopVectorizer::fixReduction(PHINode *Phi) { setDebugLocFromInst(Builder, ReductionStartValue); bool IsInLoopReductionPhi = Cost->isInLoopReduction(Phi); - // We need to generate a reduction vector from the incoming scalar. - // To do so, we need to generate the 'identity' vector and override - // one of the elements with the incoming scalar reduction. We need - // to do it in the vector-loop preheader. - Builder.SetInsertPoint(LoopVectorPreHeader->getTerminator()); - // This is the vector-clone of the value that leaves the loop. Type *VecTy = getOrCreateVectorValue(LoopExitInst, 0)->getType(); - // Find the reduction identity variable. Zero for addition, or, xor, - // one for multiplication, -1 for And. - Value *Identity; - Value *VectorStart; - if (RecurrenceDescriptor::isMinMaxRecurrenceKind(RK)) { - // MinMax reduction have the start value as their identify. - if (VF.isScalar() || IsInLoopReductionPhi) { - VectorStart = Identity = ReductionStartValue; - } else { - VectorStart = Identity = - Builder.CreateVectorSplat(VF, ReductionStartValue, "minmax.ident"); - } - } else { - // Handle other reduction kinds: - Constant *Iden = RecurrenceDescriptor::getRecurrenceIdentity( - RK, VecTy->getScalarType()); - if (VF.isScalar() || IsInLoopReductionPhi) { - Identity = Iden; - // This vector is the Identity vector where the first element is the - // incoming scalar reduction. - VectorStart = ReductionStartValue; - } else { - Identity = ConstantVector::getSplat(VF, Iden); - - // This vector is the Identity vector where the first element is the - // incoming scalar reduction. - VectorStart = - Builder.CreateInsertElement(Identity, ReductionStartValue, Zero); - } - } - // Wrap flags are in general invalid after vectorization, clear them. clearReductionWrapFlags(RdxDesc); @@ -4220,10 +4182,6 @@ void InnerLoopVectorizer::fixReduction(PHINode *Phi) { for (unsigned Part = 0; Part < UF; ++Part) { Value *VecRdxPhi = getOrCreateVectorValue(Phi, Part); Value *Val = getOrCreateVectorValue(LoopVal, Part); - // Make sure to add the reduction start value only to the - // first unroll part. - Value *StartVal = (Part == 0) ? VectorStart : Identity; - cast(VecRdxPhi)->addIncoming(StartVal, LoopVectorPreHeader); cast(VecRdxPhi) ->addIncoming(Val, LI->getLoopFor(LoopVectorBody)->getLoopLatch()); } @@ -4598,7 +4556,9 @@ void InnerLoopVectorizer::widenGEP(GetElementPtrInst *GEP, VPValue *VPDef, } } -void InnerLoopVectorizer::widenPHIInstruction(Instruction *PN, unsigned UF, +void InnerLoopVectorizer::widenPHIInstruction(Instruction *PN, + RecurrenceDescriptor *RdxDesc, + Value *StartV, unsigned UF, ElementCount VF) { assert(!VF.isScalable() && "scalable vectors not yet supported."); PHINode *P = cast(PN); @@ -4623,20 +4583,60 @@ void InnerLoopVectorizer::widenPHIInstruction(Instruction *PN, unsigned UF, // Phi nodes have cycles, so we need to vectorize them in two stages. This is // stage #1: We create a new vector PHI node with no incoming edges. We'll use // this value when we vectorize all of the instructions that use the PHI. - if (Legal->isReductionVariable(P) || Legal->isFirstOrderRecurrence(P)) { + if (RdxDesc || Legal->isFirstOrderRecurrence(P)) { + Value *Iden = nullptr; + bool ScalarPHI = + (VF.isScalar()) || Cost->isInLoopReduction(cast(PN)); + Type *VecTy = + ScalarPHI ? PN->getType() : VectorType::get(PN->getType(), VF); + + if (RdxDesc) { + assert(Legal->isReductionVariable(P) && StartV && + "RdxDesc should only be set for reduction variables; in that case " + "a StartV is also required"); + RecurKind RK = RdxDesc->getRecurrenceKind(); + if (RecurrenceDescriptor::isMinMaxRecurrenceKind(RK)) { + // MinMax reduction have the start value as their identify. + if (ScalarPHI) { + Iden = StartV; + } else { + IRBuilderBase::InsertPointGuard IPBuilder(Builder); + Builder.SetInsertPoint(LoopVectorPreHeader->getTerminator()); + StartV = Iden = Builder.CreateVectorSplat(VF, StartV, "minmax.ident"); + } + } else { + Constant *IdenC = RecurrenceDescriptor::getRecurrenceIdentity( + RK, VecTy->getScalarType()); + Iden = IdenC; + + if (!ScalarPHI) { + Iden = ConstantVector::getSplat(VF, IdenC); + IRBuilderBase::InsertPointGuard IPBuilder(Builder); + Builder.SetInsertPoint(LoopVectorPreHeader->getTerminator()); + Constant *Zero = Builder.getInt32(0); + StartV = Builder.CreateInsertElement(Iden, StartV, Zero); + } + } + } + for (unsigned Part = 0; Part < UF; ++Part) { // This is phase one of vectorizing PHIs. - bool ScalarPHI = - (VF.isScalar()) || Cost->isInLoopReduction(cast(PN)); - Type *VecTy = - ScalarPHI ? PN->getType() : VectorType::get(PN->getType(), VF); Value *EntryPart = PHINode::Create( VecTy, 2, "vec.phi", &*LoopVectorBody->getFirstInsertionPt()); VectorLoopValueMap.setVectorValue(P, Part, EntryPart); + if (StartV) { + // Make sure to add the reduction start value only to the + // first unroll part. + Value *StartVal = (Part == 0) ? StartV : Iden; + cast(EntryPart)->addIncoming(StartVal, LoopVectorPreHeader); + } } return; } + assert(!Legal->isReductionVariable(P) && + "reductions should be handled above"); + setDebugLocFromInst(Builder, P); // This PHINode must be an induction variable. @@ -8380,6 +8380,12 @@ VPRecipeBase *VPRecipeBuilder::tryToCreateWidenRecipe(Instruction *Instr, return tryToBlend(Phi, Plan); if ((Recipe = tryToOptimizeInductionPHI(Phi, *Plan))) return Recipe; + + if (Legal->isReductionVariable(Phi)) { + RecurrenceDescriptor &RdxDesc = Legal->getReductionVars()[Phi]; + return new VPWidenPHIRecipe(Phi, RdxDesc); + } + return new VPWidenPHIRecipe(Phi); } @@ -8796,7 +8802,10 @@ void VPWidenIntOrFpInductionRecipe::execute(VPTransformState &State) { } void VPWidenPHIRecipe::execute(VPTransformState &State) { - State.ILV->widenPHIInstruction(Phi, State.UF, State.VF); + Value *StartV = nullptr; + if (RdxDesc) + StartV = RdxDesc->getRecurrenceStartValue(); + State.ILV->widenPHIInstruction(Phi, RdxDesc, StartV, State.UF, State.VF); } void VPBlendRecipe::execute(VPTransformState &State) { diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index 1926c9255..2f01daf 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -971,10 +971,23 @@ public: }; /// A recipe for handling all phi nodes except for integer and FP inductions. +/// For reduction PHIs, RdxDesc must point to the corresponding recurrence +/// descriptor. class VPWidenPHIRecipe : public VPRecipeBase { PHINode *Phi; + /// Descriptor for a reduction PHI. + RecurrenceDescriptor *RdxDesc = nullptr; + public: + /// Create a new VPWidenPHIRecipe for the reduction \p Phi described by \p + /// RdxDesc. + VPWidenPHIRecipe(PHINode *Phi, RecurrenceDescriptor &RdxDesc) + : VPWidenPHIRecipe(Phi) { + this->RdxDesc = &RdxDesc; + } + + /// Create a VPWidenPHIRecipe for \p Phi VPWidenPHIRecipe(PHINode *Phi) : VPRecipeBase(VPWidenPHISC), Phi(Phi) { new VPValue(Phi, this); } -- 2.7.4