if (OrigLoop->isLoopExiting(Src))
return EdgeMaskCache[Edge] = SrcMask;
- VPValue *EdgeMask = Plan.getVPValueOrAddLiveIn(BI->getCondition());
+ VPValue *EdgeMask = Plan.getOrAddVPValue(BI->getCondition());
assert(EdgeMask && "No Edge Mask found for condition");
if (BI->getSuccessor(0) != Dst)
// 'select i1 SrcMask, i1 EdgeMask, i1 false'.
// The select version does not introduce new UB if SrcMask is false and
// EdgeMask is poison. Using 'and' here introduces undefined behavior.
- VPValue *False = Plan.getVPValueOrAddLiveIn(
+ VPValue *False = Plan.getOrAddVPValue(
ConstantInt::getFalse(BI->getCondition()->getType()));
EdgeMask =
Builder.createSelect(SrcMask, EdgeMask, False, BI->getDebugLoc());
/// Creates a VPWidenIntOrFpInductionRecpipe for \p Phi. If needed, it will also
/// insert a recipe to expand the step for the induction recipe.
-static VPWidenIntOrFpInductionRecipe *
-createWidenInductionRecipes(PHINode *Phi, Instruction *PhiOrTrunc,
- VPValue *Start, const InductionDescriptor &IndDesc,
- VPlan &Plan, ScalarEvolution &SE, Loop &OrigLoop,
- VFRange &Range) {
+static VPWidenIntOrFpInductionRecipe *createWidenInductionRecipes(
+ PHINode *Phi, Instruction *PhiOrTrunc, VPValue *Start,
+ const InductionDescriptor &IndDesc, LoopVectorizationCostModel &CM,
+ VPlan &Plan, ScalarEvolution &SE, Loop &OrigLoop, VFRange &Range) {
+ // Returns true if an instruction \p I should be scalarized instead of
+ // vectorized for the chosen vectorization factor.
+ auto ShouldScalarizeInstruction = [&CM](Instruction *I, ElementCount VF) {
+ return CM.isScalarAfterVectorization(I, VF) ||
+ CM.isProfitableToScalarize(I, VF);
+ };
+
+ bool NeedsScalarIVOnly = LoopVectorizationPlanner::getDecisionAndClampRange(
+ [&](ElementCount VF) {
+ return ShouldScalarizeInstruction(PhiOrTrunc, VF);
+ },
+ Range);
assert(IndDesc.getStartValue() ==
Phi->getIncomingValueForBlock(OrigLoop.getLoopPreheader()));
assert(SE.isLoopInvariant(IndDesc.getStep(), &OrigLoop) &&
VPValue *Step =
vputils::getOrCreateVPValueForSCEVExpr(Plan, IndDesc.getStep(), SE);
if (auto *TruncI = dyn_cast<TruncInst>(PhiOrTrunc)) {
- return new VPWidenIntOrFpInductionRecipe(Phi, Start, Step, IndDesc, TruncI);
+ return new VPWidenIntOrFpInductionRecipe(Phi, Start, Step, IndDesc, TruncI,
+ !NeedsScalarIVOnly);
}
assert(isa<PHINode>(PhiOrTrunc) && "must be a phi node here");
- return new VPWidenIntOrFpInductionRecipe(Phi, Start, Step, IndDesc);
+ return new VPWidenIntOrFpInductionRecipe(Phi, Start, Step, IndDesc,
+ !NeedsScalarIVOnly);
}
VPRecipeBase *VPRecipeBuilder::tryToOptimizeInductionPHI(
// Check if this is an integer or fp induction. If so, build the recipe that
// produces its scalar and vector values.
if (auto *II = Legal->getIntOrFpInductionDescriptor(Phi))
- return createWidenInductionRecipes(Phi, Phi, Operands[0], *II, Plan,
+ return createWidenInductionRecipes(Phi, Phi, Operands[0], *II, CM, Plan,
*PSE.getSE(), *OrigLoop, Range);
// Check if this is pointer induction. If so, build the recipe for it.
auto *Phi = cast<PHINode>(I->getOperand(0));
const InductionDescriptor &II = *Legal->getIntOrFpInductionDescriptor(Phi);
- VPValue *Start = Plan.getVPValueOrAddLiveIn(II.getStartValue());
- return createWidenInductionRecipes(Phi, I, Start, II, Plan, *PSE.getSE(),
- *OrigLoop, Range);
+ VPValue *Start = Plan.getOrAddVPValue(II.getStartValue());
+ return createWidenInductionRecipes(Phi, I, Start, II, CM, Plan,
+ *PSE.getSE(), *OrigLoop, Range);
}
return nullptr;
}
if (Legal->isMaskRequired(CI))
Mask = createBlockInMask(CI->getParent(), *Plan);
else
- Mask = Plan->getVPValueOrAddLiveIn(ConstantInt::getTrue(
+ Mask = Plan->getOrAddVPValue(ConstantInt::getTrue(
IntegerType::getInt1Ty(Variant->getFunctionType()->getContext())));
VFShape Shape = VFShape::get(*CI, VariantVF, /*HasGlobalPred=*/true);
if (CM.isPredicatedInst(I)) {
SmallVector<VPValue *> Ops(Operands.begin(), Operands.end());
VPValue *Mask = createBlockInMask(I->getParent(), *Plan);
- VPValue *One = Plan->getVPValueOrAddLiveIn(
- ConstantInt::get(I->getType(), 1u, false));
+ VPValue *One =
+ Plan->getOrAddExternalDef(ConstantInt::get(I->getType(), 1u, false));
auto *SafeRHS =
new VPInstruction(Instruction::Select, {Mask, Ops[1], One},
I->getDebugLoc());
static void addCanonicalIVRecipes(VPlan &Plan, Type *IdxTy, DebugLoc DL,
TailFoldingStyle Style) {
Value *StartIdx = ConstantInt::get(IdxTy, 0);
- auto *StartV = Plan.getVPValueOrAddLiveIn(StartIdx);
+ auto *StartV = Plan.getOrAddVPValue(StartIdx);
// Add a VPCanonicalIVPHIRecipe starting at 0 to the header.
auto *CanonicalIVPHI = new VPCanonicalIVPHIRecipe(StartV, DL);
for (PHINode &ExitPhi : ExitBB->phis()) {
Value *IncomingValue =
ExitPhi.getIncomingValueForBlock(ExitingBB);
- VPValue *V = Plan.getVPValueOrAddLiveIn(IncomingValue);
+ VPValue *V = Plan.getOrAddVPValue(IncomingValue, true);
Plan.addLiveOut(&ExitPhi, V);
}
}
SmallVector<VPValue *, 4> Operands;
auto *Phi = dyn_cast<PHINode>(Instr);
if (Phi && Phi->getParent() == OrigLoop->getHeader()) {
- Operands.push_back(Plan->getVPValueOrAddLiveIn(
+ Operands.push_back(Plan->getOrAddVPValue(
Phi->getIncomingValueForBlock(OrigLoop->getLoopPreheader())));
} else {
auto OpRange = Plan->mapToVPValues(Instr->operands());
IndPhi, *ID, {EPI.MainLoopIterationCountCheck});
}
assert(ResumeV && "Must have a resume value");
- VPValue *StartVal = BestEpiPlan.getVPValueOrAddLiveIn(ResumeV);
+ VPValue *StartVal = BestEpiPlan.getOrAddExternalDef(ResumeV);
cast<VPHeaderPHIRecipe>(&R)->setStartValue(StartVal);
}
VPBlockBase::deleteCFG(Entry);
}
- for (VPValue *VPV : VPLiveInsToFree)
+ for (VPValue *VPV : VPValuesToFree)
delete VPV;
if (TripCount)
delete TripCount;
if (BackedgeTakenCount)
delete BackedgeTakenCount;
+ for (auto &P : VPExternalDefs)
+ delete P.second;
}
VPActiveLaneMaskPHIRecipe *VPlan::getActiveLaneMaskPhi() {
// needs to be changed from zero to the value after the main vector loop.
// FIXME: Improve modeling for canonical IV start values in the epilogue loop.
if (CanonicalIVStartValue) {
- VPValue *VPV = getVPValueOrAddLiveIn(CanonicalIVStartValue);
+ VPValue *VPV = getOrAddExternalDef(CanonicalIVStartValue);
auto *IV = getCanonicalIV();
assert(all_of(IV->users(),
[](const VPUser *U) {
VPValue *vputils::getOrCreateVPValueForSCEVExpr(VPlan &Plan, const SCEV *Expr,
ScalarEvolution &SE) {
if (auto *E = dyn_cast<SCEVConstant>(Expr))
- return Plan.getVPValueOrAddLiveIn(E->getValue());
+ return Plan.getOrAddExternalDef(E->getValue());
if (auto *E = dyn_cast<SCEVUnknown>(Expr))
- return Plan.getVPValueOrAddLiveIn(E->getValue());
+ return Plan.getOrAddExternalDef(E->getValue());
VPBasicBlock *Preheader = Plan.getEntry()->getEntryBasicBlock();
VPExpandSCEVRecipe *Step = new VPExpandSCEVRecipe(Expr, SE);
PHINode *IV;
TruncInst *Trunc;
const InductionDescriptor &IndDesc;
+ bool NeedsVectorIV;
public:
VPWidenIntOrFpInductionRecipe(PHINode *IV, VPValue *Start, VPValue *Step,
- const InductionDescriptor &IndDesc)
+ const InductionDescriptor &IndDesc,
+ bool NeedsVectorIV)
: VPHeaderPHIRecipe(VPDef::VPWidenIntOrFpInductionSC, IV, Start), IV(IV),
- Trunc(nullptr), IndDesc(IndDesc) {
+ Trunc(nullptr), IndDesc(IndDesc), NeedsVectorIV(NeedsVectorIV) {
addOperand(Step);
}
VPWidenIntOrFpInductionRecipe(PHINode *IV, VPValue *Start, VPValue *Step,
const InductionDescriptor &IndDesc,
- TruncInst *Trunc)
+ TruncInst *Trunc, bool NeedsVectorIV)
: VPHeaderPHIRecipe(VPDef::VPWidenIntOrFpInductionSC, Trunc, Start),
- IV(IV), Trunc(Trunc), IndDesc(IndDesc) {
+ IV(IV), Trunc(Trunc), IndDesc(IndDesc), NeedsVectorIV(NeedsVectorIV) {
addOperand(Step);
}
const Type *getScalarType() const {
return Trunc ? Trunc->getType() : IV->getType();
}
+
+ /// Returns true if a vector phi needs to be created for the induction.
+ bool needsVectorIV() const { return NeedsVectorIV; }
};
class VPWidenPointerInductionRecipe : public VPHeaderPHIRecipe {
/// Holds the name of the VPlan, for printing.
std::string Name;
+ /// Holds all the external definitions created for this VPlan. External
+ /// definitions must be immutable and hold a pointer to their underlying IR.
+ DenseMap<Value *, VPValue *> VPExternalDefs;
+
/// Represents the trip count of the original loop, for folding
/// the tail.
VPValue *TripCount = nullptr;
/// VPlan.
Value2VPValueTy Value2VPValue;
- /// Contains all the external definitions created for this VPlan. External
- /// definitions are VPValues that hold a pointer to their underlying IR.
- SmallVector<VPValue *, 16> VPLiveInsToFree;
+ /// Contains all VPValues that been allocated by addVPValue directly and need
+ /// to be free when the plan's destructor is called.
+ SmallVector<VPValue *, 16> VPValuesToFree;
/// Indicates whether it is safe use the Value2VPValue mapping or if the
/// mapping cannot be used any longer, because it is stale.
void setName(const Twine &newName) { Name = newName.str(); }
+ /// Get the existing or add a new external definition for \p V.
+ VPValue *getOrAddExternalDef(Value *V) {
+ auto I = VPExternalDefs.insert({V, nullptr});
+ if (I.second)
+ I.first->second = new VPValue(V);
+ return I.first->second;
+ }
+
+ void addVPValue(Value *V) {
+ assert(Value2VPValueEnabled &&
+ "IR value to VPValue mapping may be out of date!");
+ assert(V && "Trying to add a null Value to VPlan");
+ assert(!Value2VPValue.count(V) && "Value already exists in VPlan");
+ VPValue *VPV = new VPValue(V);
+ Value2VPValue[V] = VPV;
+ VPValuesToFree.push_back(VPV);
+ }
+
void addVPValue(Value *V, VPValue *VPV) {
- assert((Value2VPValueEnabled || !VPV->getDefiningRecipe()) &&
- "Value2VPValue mapping may be out of date!");
+ assert(Value2VPValueEnabled && "Value2VPValue mapping may be out of date!");
assert(V && "Trying to add a null Value to VPlan");
assert(!Value2VPValue.count(V) && "Value already exists in VPlan");
Value2VPValue[V] = VPV;
}
/// Returns the VPValue for \p V. \p OverrideAllowed can be used to disable
- /// /// checking whether it is safe to query VPValues using IR Values.
+ /// checking whether it is safe to query VPValues using IR Values.
VPValue *getVPValue(Value *V, bool OverrideAllowed = false) {
+ assert((OverrideAllowed || isa<Constant>(V) || Value2VPValueEnabled) &&
+ "Value2VPValue mapping may be out of date!");
assert(V && "Trying to get the VPValue of a null Value");
assert(Value2VPValue.count(V) && "Value does not exist in VPlan");
- assert((!Value2VPValue[V]->getDefiningRecipe() || Value2VPValueEnabled ||
- OverrideAllowed) &&
- "Value2VPValue mapping may be out of date!");
return Value2VPValue[V];
}
- /// Gets the VPValue for \p V or adds a new live-in (if none exists yet) for
- /// \p V.
- VPValue *getVPValueOrAddLiveIn(Value *V) {
+ /// Gets the VPValue or adds a new one (if none exists yet) for \p V. \p
+ /// OverrideAllowed can be used to disable checking whether it is safe to
+ /// query VPValues using IR Values.
+ VPValue *getOrAddVPValue(Value *V, bool OverrideAllowed = false) {
+ assert((OverrideAllowed || isa<Constant>(V) || Value2VPValueEnabled) &&
+ "Value2VPValue mapping may be out of date!");
assert(V && "Trying to get or add the VPValue of a null Value");
- if (!Value2VPValue.count(V)) {
- VPValue *VPV = new VPValue(V);
- VPLiveInsToFree.push_back(VPV);
- addVPValue(V, VPV);
- }
-
+ if (!Value2VPValue.count(V))
+ addVPValue(V);
return getVPValue(V);
}
iterator_range<mapped_iterator<Use *, std::function<VPValue *(Value *)>>>
mapToVPValues(User::op_range Operands) {
std::function<VPValue *(Value *)> Fn = [this](Value *Op) {
- return getVPValueOrAddLiveIn(Op);
+ return getOrAddVPValue(Op);
};
return map_range(Operands, Fn);
}
// A and B: Create VPValue and add it to the pool of external definitions and
// to the Value->VPValue map.
- VPValue *NewVPVal = Plan.getVPValueOrAddLiveIn(IRVal);
+ VPValue *NewVPVal = Plan.getOrAddExternalDef(IRVal);
IRDef2VPValue[IRVal] = NewVPVal;
return NewVPVal;
}
for (auto &I : *ThePreheaderBB) {
if (I.getType()->isVoidTy())
continue;
- IRDef2VPValue[&I] = Plan.getVPValueOrAddLiveIn(&I);
+ IRDef2VPValue[&I] = Plan.getOrAddExternalDef(&I);
}
// Create empty VPBB for Loop H so that we can link PH->H.
VPBlockBase *HeaderVPBB = getOrCreateVPBB(TheLoop->getHeader());
if (auto *VPPhi = dyn_cast<VPWidenPHIRecipe>(&Ingredient)) {
auto *Phi = cast<PHINode>(VPPhi->getUnderlyingValue());
if (const auto *II = GetIntOrFpInductionDescriptor(Phi)) {
- VPValue *Start = Plan->getVPValueOrAddLiveIn(II->getStartValue());
+ VPValue *Start = Plan->getOrAddVPValue(II->getStartValue());
VPValue *Step =
vputils::getOrCreateVPValueForSCEVExpr(*Plan, II->getStep(), SE);
- NewRecipe = new VPWidenIntOrFpInductionRecipe(Phi, Start, Step, *II);
+ NewRecipe =
+ new VPWidenIntOrFpInductionRecipe(Phi, Start, Step, *II, true);
} else {
Plan->addVPValue(Phi, VPPhi);
continue;
// Create VPWidenMemoryInstructionRecipe for loads and stores.
if (LoadInst *Load = dyn_cast<LoadInst>(Inst)) {
NewRecipe = new VPWidenMemoryInstructionRecipe(
- *Load,
- Plan->getVPValueOrAddLiveIn(getLoadStorePointerOperand(Inst)),
+ *Load, Plan->getOrAddVPValue(getLoadStorePointerOperand(Inst)),
nullptr /*Mask*/, false /*Consecutive*/, false /*Reverse*/);
} else if (StoreInst *Store = dyn_cast<StoreInst>(Inst)) {
NewRecipe = new VPWidenMemoryInstructionRecipe(
- *Store,
- Plan->getVPValueOrAddLiveIn(getLoadStorePointerOperand(Inst)),
- Plan->getVPValueOrAddLiveIn(Store->getValueOperand()),
- nullptr /*Mask*/, false /*Consecutive*/, false /*Reverse*/);
+ *Store, Plan->getOrAddVPValue(getLoadStorePointerOperand(Inst)),
+ Plan->getOrAddVPValue(Store->getValueOperand()), nullptr /*Mask*/,
+ false /*Consecutive*/, false /*Reverse*/);
} else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Inst)) {
NewRecipe =
new VPWidenGEPRecipe(GEP, Plan->mapToVPValues(GEP->operands()));
// everything WidenNewIV's users need. That is, WidenOriginalIV will
// generate a vector phi or all users of WidenNewIV demand the first lane
// only.
- if (any_of(WidenOriginalIV->users(),
- [WidenOriginalIV](VPUser *U) {
- return !U->usesScalars(WidenOriginalIV);
- }) ||
+ if (WidenOriginalIV->needsVectorIV() ||
vputils::onlyFirstLaneUsed(WidenNewIV)) {
WidenNewIV->replaceAllUsesWith(WidenOriginalIV);
WidenNewIV->eraseFromParent();
return;
LLVMContext &Ctx = SE.getContext();
- auto *BOC = new VPInstruction(
- VPInstruction::BranchOnCond,
- {Plan.getVPValueOrAddLiveIn(ConstantInt::getTrue(Ctx))});
+ auto *BOC =
+ new VPInstruction(VPInstruction::BranchOnCond,
+ {Plan.getOrAddExternalDef(ConstantInt::getTrue(Ctx))});
Term->eraseFromParent();
ExitingVPBB->appendRecipe(BOC);
Plan.setVF(BestVF);
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
// Add an external value to check we do not print the list of external values,
// as this is not required with the new printing.
- Plan->getVPValueOrAddLiveIn(&*F->arg_begin());
+ Plan->addVPValue(&*F->arg_begin());
std::string FullDump;
raw_string_ostream OS(FullDump);
Plan->printDOT(OS);
BinaryOperator::CreateAdd(UndefValue::get(Int32), UndefValue::get(Int32));
AI->setName("a");
SmallVector<VPValue *, 2> Args;
- VPValue *ExtVPV1 = Plan.getVPValueOrAddLiveIn(ConstantInt::get(Int32, 1));
- VPValue *ExtVPV2 = Plan.getVPValueOrAddLiveIn(ConstantInt::get(Int32, 2));
+ VPValue *ExtVPV1 = Plan.getOrAddExternalDef(ConstantInt::get(Int32, 1));
+ VPValue *ExtVPV2 = Plan.getOrAddExternalDef(ConstantInt::get(Int32, 2));
Args.push_back(ExtVPV1);
Args.push_back(ExtVPV2);
VPWidenRecipe *WidenR =