From 922af076c70f2a7d7d997af5bd5ca218ae609e70 Mon Sep 17 00:00:00 2001 From: Matthew Simpson Date: Thu, 1 Sep 2016 18:14:27 +0000 Subject: [PATCH] [LV] Move VectorParts allocation and mapping into PHI widening (NFC) This patch moves the allocation of VectorParts for PHI nodes into the actual PHI widening code. Previously, we allocated these VectorParts in vectorizeBlockInLoop, and passed them by reference to widenPHIInstruction. Upon returning, we would then map the VectorParts in VectorLoopValueMap. This behavior is problematic for the cases where we only want to generate a scalar version of a PHI node. For example, if in the future we only generate a scalar version of an induction variable, we would end up inserting an empty vector entry into the map once we return to vectorizeBlockInLoop. We now no longer need to pass VectorParts to the various PHI widening functions, and we can keep VectorParts allocation as close as possible to the point at which they are actually mapped in VectorLoopValueMap. llvm-svn: 280390 --- llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 67 ++++++++++++++----------- 1 file changed, 38 insertions(+), 29 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 3e57360..15e82dd 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -415,8 +415,8 @@ protected: /// Vectorize a single PHINode in a block. This method handles the induction /// variable canonicalization. It supports both VF = 1 for unrolled loops and /// arbitrary length vectors. - void widenPHIInstruction(Instruction *PN, VectorParts &Entry, unsigned UF, - unsigned VF, PhiVector *PV); + void widenPHIInstruction(Instruction *PN, unsigned UF, unsigned VF, + PhiVector *PV); /// Insert the new loop to the loop hierarchy and pass manager /// and update the analysis passes. @@ -455,16 +455,16 @@ protected: /// Create a vector induction phi node based on an existing scalar one. This /// currently only works for integer induction variables with a constant - /// step. If \p TruncType is non-null, instead of widening the original IV, - /// we widen a version of the IV truncated to \p TruncType. + /// step. \p EntryVal is the value from the original loop that maps to the + /// vector phi node. If \p EntryVal is a truncate instruction, instead of + /// widening the original IV, we widen a version of the IV truncated to \p + /// EntryVal's type. void createVectorIntInductionPHI(const InductionDescriptor &II, - VectorParts &Entry, IntegerType *TruncType); + Instruction *EntryVal); /// Widen an integer induction variable \p IV. If \p Trunc is provided, the - /// induction variable will first be truncated to the corresponding type. The - /// widened values are placed in \p Entry. - void widenIntInduction(PHINode *IV, VectorParts &Entry, - TruncInst *Trunc = nullptr); + /// induction variable will first be truncated to the corresponding type. + void widenIntInduction(PHINode *IV, TruncInst *Trunc = nullptr); /// Returns true if we should generate a scalar version of \p IV. bool needsScalarInduction(Instruction *IV) const; @@ -2035,7 +2035,7 @@ Value *InnerLoopVectorizer::getBroadcastInstrs(Value *V) { } void InnerLoopVectorizer::createVectorIntInductionPHI( - const InductionDescriptor &II, VectorParts &Entry, IntegerType *TruncType) { + const InductionDescriptor &II, Instruction *EntryVal) { Value *Start = II.getStartValue(); ConstantInt *Step = II.getConstIntStepValue(); assert(Step && "Can not widen an IV with a non-constant step"); @@ -2043,7 +2043,8 @@ void InnerLoopVectorizer::createVectorIntInductionPHI( // Construct the initial value of the vector IV in the vector loop preheader auto CurrIP = Builder.saveIP(); Builder.SetInsertPoint(LoopVectorPreHeader->getTerminator()); - if (TruncType) { + if (isa(EntryVal)) { + auto *TruncType = cast(EntryVal->getType()); Step = ConstantInt::getSigned(TruncType, Step->getSExtValue()); Start = Builder.CreateCast(Instruction::Trunc, Start, TruncType); } @@ -2059,11 +2060,15 @@ void InnerLoopVectorizer::createVectorIntInductionPHI( PHINode *VecInd = PHINode::Create(SteppedStart->getType(), 2, "vec.ind", &*LoopVectorBody->getFirstInsertionPt()); Instruction *LastInduction = VecInd; + VectorParts Entry(UF); for (unsigned Part = 0; Part < UF; ++Part) { Entry[Part] = LastInduction; LastInduction = cast( Builder.CreateAdd(LastInduction, SplatVF, "step.add")); } + VectorLoopValueMap.initVector(EntryVal, Entry); + if (isa(EntryVal)) + addMetadata(Entry, EntryVal); // Move the last step to the end of the latch block. This ensures consistent // placement of all induction updates. @@ -2087,8 +2092,7 @@ bool InnerLoopVectorizer::needsScalarInduction(Instruction *IV) const { return any_of(IV->users(), isScalarInst); } -void InnerLoopVectorizer::widenIntInduction(PHINode *IV, VectorParts &Entry, - TruncInst *Trunc) { +void InnerLoopVectorizer::widenIntInduction(PHINode *IV, TruncInst *Trunc) { auto II = Legal->getInductionVars()->find(IV); assert(II != Legal->getInductionVars()->end() && "IV is not an induction"); @@ -2096,9 +2100,6 @@ void InnerLoopVectorizer::widenIntInduction(PHINode *IV, VectorParts &Entry, auto ID = II->second; assert(IV->getType() == ID.getStartValue()->getType() && "Types must match"); - // If a truncate instruction was provided, get the smaller type. - auto *TruncType = Trunc ? cast(Trunc->getType()) : nullptr; - // The scalar value to broadcast. This will be derived from the canonical // induction variable. Value *ScalarIV = nullptr; @@ -2128,7 +2129,7 @@ void InnerLoopVectorizer::widenIntInduction(PHINode *IV, VectorParts &Entry, // loop iteration. if (VF > 1 && IV->getType() == Induction->getType() && Step && !Legal->isScalarAfterVectorization(EntryVal)) { - createVectorIntInductionPHI(ID, Entry, TruncType); + createVectorIntInductionPHI(ID, EntryVal); VectorizedIV = true; } @@ -2138,7 +2139,8 @@ void InnerLoopVectorizer::widenIntInduction(PHINode *IV, VectorParts &Entry, // induction variable and constant step. Otherwise, derive these values from // the induction descriptor. if (!VectorizedIV || NeedsScalarIV) { - if (TruncType) { + if (Trunc) { + auto *TruncType = cast(Trunc->getType()); assert(Step && "Truncation requires constant integer step"); auto StepInt = cast(Step)->getSExtValue(); ScalarIV = Builder.CreateCast(Instruction::Trunc, Induction, TruncType); @@ -2163,8 +2165,12 @@ void InnerLoopVectorizer::widenIntInduction(PHINode *IV, VectorParts &Entry, // induction variable, and build the necessary step vectors. if (!VectorizedIV) { Value *Broadcasted = getBroadcastInstrs(ScalarIV); + VectorParts Entry(UF); for (unsigned Part = 0; Part < UF; ++Part) Entry[Part] = getStepVector(Broadcasted, VF * Part, Step); + VectorLoopValueMap.initVector(EntryVal, Entry); + if (Trunc) + addMetadata(Entry, Trunc); } // If an induction variable is only used for counting loop iterations or @@ -4371,12 +4377,12 @@ InnerLoopVectorizer::createBlockInMask(BasicBlock *BB) { return BlockMask; } -void InnerLoopVectorizer::widenPHIInstruction( - Instruction *PN, InnerLoopVectorizer::VectorParts &Entry, unsigned UF, - unsigned VF, PhiVector *PV) { +void InnerLoopVectorizer::widenPHIInstruction(Instruction *PN, unsigned UF, + unsigned VF, PhiVector *PV) { PHINode *P = cast(PN); // Handle recurrences. if (Legal->isReductionVariable(P) || Legal->isFirstOrderRecurrence(P)) { + VectorParts Entry(UF); for (unsigned part = 0; part < UF; ++part) { // This is phase one of vectorizing PHIs. Type *VecTy = @@ -4384,6 +4390,7 @@ void InnerLoopVectorizer::widenPHIInstruction( Entry[part] = PHINode::Create( VecTy, 2, "vec.phi", &*LoopVectorBody->getFirstInsertionPt()); } + VectorLoopValueMap.initVector(P, Entry); PV->push_back(P); return; } @@ -4404,6 +4411,7 @@ void InnerLoopVectorizer::widenPHIInstruction( // SELECT(Mask3, In3, // SELECT(Mask2, In2, // ( ...))) + VectorParts Entry(UF); for (unsigned In = 0; In < NumIncoming; In++) { VectorParts Cond = createEdgeMask(P->getIncomingBlock(In), P->getParent()); @@ -4421,6 +4429,7 @@ void InnerLoopVectorizer::widenPHIInstruction( "predphi"); } } + VectorLoopValueMap.initVector(P, Entry); return; } @@ -4437,7 +4446,7 @@ void InnerLoopVectorizer::widenPHIInstruction( case InductionDescriptor::IK_NoInduction: llvm_unreachable("Unknown induction"); case InductionDescriptor::IK_IntInduction: - return widenIntInduction(P, Entry); + return widenIntInduction(P); case InductionDescriptor::IK_PtrInduction: { // Handle the pointer induction variable case. assert(P->getType()->isPointerTy() && "Unexpected type."); @@ -4446,6 +4455,7 @@ void InnerLoopVectorizer::widenPHIInstruction( PtrInd = Builder.CreateSExtOrTrunc(PtrInd, II.getStep()->getType()); // This is the vector of results. Notice that we don't generate // vector geps because scalar geps result in better code. + VectorParts Entry(UF); for (unsigned part = 0; part < UF; ++part) { if (VF == 1) { int EltIndex = part; @@ -4469,6 +4479,7 @@ void InnerLoopVectorizer::widenPHIInstruction( } Entry[part] = VecVal; } + VectorLoopValueMap.initVector(P, Entry); return; } case InductionDescriptor::IK_FpInduction: { @@ -4488,9 +4499,11 @@ void InnerLoopVectorizer::widenPHIInstruction( // After broadcasting the induction variable we need to make the vector // consecutive by adding StepVal*0, StepVal*1, StepVal*2, etc. Value *StepVal = cast(II.getStep())->getValue(); + VectorParts Entry(UF); for (unsigned part = 0; part < UF; ++part) Entry[part] = getStepVector(Broadcasted, VF * part, StepVal, II.getInductionOpcode()); + VectorLoopValueMap.initVector(P, Entry); return; } } @@ -4524,9 +4537,7 @@ void InnerLoopVectorizer::vectorizeBlockInLoop(BasicBlock *BB, PhiVector *PV) { continue; case Instruction::PHI: { // Vectorize PHINodes. - VectorParts Entry(UF); - widenPHIInstruction(&I, Entry, UF, VF, PV); - VectorLoopValueMap.initVector(&I, Entry); + widenPHIInstruction(&I, UF, VF, PV); continue; } // End of PHI. @@ -4648,7 +4659,6 @@ void InnerLoopVectorizer::vectorizeBlockInLoop(BasicBlock *BB, PhiVector *PV) { case Instruction::BitCast: { auto *CI = dyn_cast(&I); setDebugLocFromInst(Builder, CI); - VectorParts Entry(UF); // Optimize the special case where the source is a constant integer // induction variable. Notice that we can only optimize the 'trunc' case @@ -4657,9 +4667,7 @@ void InnerLoopVectorizer::vectorizeBlockInLoop(BasicBlock *BB, PhiVector *PV) { auto ID = Legal->getInductionVars()->lookup(OldInduction); if (isa(CI) && CI->getOperand(0) == OldInduction && ID.getConstIntStepValue()) { - widenIntInduction(OldInduction, Entry, cast(CI)); - VectorLoopValueMap.initVector(&I, Entry); - addMetadata(Entry, &I); + widenIntInduction(OldInduction, cast(CI)); break; } @@ -4668,6 +4676,7 @@ void InnerLoopVectorizer::vectorizeBlockInLoop(BasicBlock *BB, PhiVector *PV) { (VF == 1) ? CI->getType() : VectorType::get(CI->getType(), VF); const VectorParts &A = getVectorValue(CI->getOperand(0)); + VectorParts Entry(UF); for (unsigned Part = 0; Part < UF; ++Part) Entry[Part] = Builder.CreateCast(CI->getOpcode(), A[Part], DestTy); VectorLoopValueMap.initVector(&I, Entry); -- 2.7.4