This patch extends the scope of VPlan to also model the pre-header.
The pre-header can be used to place recipes that should be code-gen'd
outside the loop, like SCEV expansion.
Depends on D121623.
Reviewed By: Ayal
Differential Revision: https://reviews.llvm.org/D121624
BrInst->setDebugLoc(ScalarLatchTerm->getDebugLoc());
ReplaceInstWithInst(LoopMiddleBlock->getTerminator(), BrInst);
- SplitBlock(LoopVectorPreHeader, LoopVectorPreHeader->getTerminator(), DT, LI,
- nullptr, Twine(Prefix) + "vector.body");
-
- // Update dominator for loop exit.
+ // Update dominator for loop exit. During skeleton creation, only the vector
+ // pre-header and the middle block are created. The vector loop is entirely
+ // created during VPlan exection.
if (!Cost->requiresScalarEpilogue(VF))
// If there is an epilogue which must run, there's no edge from the
// middle block to exit blocks and thus no need to update the immediate
|/ |
| v
| [ ] \
- | [ ]_| <-- vector loop.
+ | [ ]_| <-- vector loop (created during VPlan execution).
| |
| v
\ -[ ] <--- middle-block.
// Perform the actual loop transformation.
- // 1. Create a new empty loop. Unlink the old loop and connect the new one.
+ // 1. Set up the skeleton for vectorization, including vector pre-header and
+ // middle block. The vector loop is created during VPlan execution.
VPTransformState State{BestVF, BestUF, LI, DT, ILV.Builder, &ILV, &BestVPlan};
Value *CanonicalIVStartValue;
- std::tie(State.CFG.VectorPreHeader, CanonicalIVStartValue) =
+ std::tie(State.CFG.PrevBB, CanonicalIVStartValue) =
ILV.createVectorizedLoopSkeleton();
ILV.collectPoisonGeneratingRecipes(State);
auto *CanonicalIVPHI = new VPCanonicalIVPHIRecipe(StartV, DL);
VPRegionBlock *TopRegion = Plan.getVectorLoopRegion();
VPBasicBlock *Header = TopRegion->getEntryBasicBlock();
- if (IsVPlanNative)
- Header = cast<VPBasicBlock>(Header->getSingleSuccessor());
Header->insert(CanonicalIVPHI, Header->begin());
auto *CanonicalIVIncrement =
CanonicalIVPHI->addOperand(CanonicalIVIncrement);
VPBasicBlock *EB = TopRegion->getExitBasicBlock();
- if (IsVPlanNative) {
- EB = cast<VPBasicBlock>(EB->getSinglePredecessor());
+ if (IsVPlanNative)
EB->setCondBit(nullptr);
- }
EB->appendRecipe(CanonicalIVIncrement);
auto *BranchOnCount =
// visit each basic block after having visited its predecessor basic blocks.
// ---------------------------------------------------------------------------
- // Create initial VPlan skeleton, with separate header and latch blocks.
- VPBasicBlock *HeaderVPBB = new VPBasicBlock();
+ // Create initial VPlan skeleton, starting with a block for the pre-header,
+ // followed by a region for the vector loop. The skeleton vector loop region
+ // contains a header and latch block.
+ VPBasicBlock *Preheader = new VPBasicBlock("vector.ph");
+ auto Plan = std::make_unique<VPlan>(Preheader);
+
+ VPBasicBlock *HeaderVPBB = new VPBasicBlock("vector.body");
VPBasicBlock *LatchVPBB = new VPBasicBlock("vector.latch");
VPBlockUtils::insertBlockAfter(LatchVPBB, HeaderVPBB);
auto *TopRegion = new VPRegionBlock(HeaderVPBB, LatchVPBB, "vector loop");
- auto Plan = std::make_unique<VPlan>(TopRegion);
+ VPBlockUtils::insertBlockAfter(TopRegion, Preheader);
Instruction *DLInst =
getDebugLocFromInstOrOperands(Legal->getPrimaryInduction());
// Relevant instructions from basic block BB will be grouped into VPRecipe
// ingredients and fill a new VPBasicBlock.
unsigned VPBBsForBB = 0;
- VPBB->setName(BB->getName());
+ if (VPBB != HeaderVPBB)
+ VPBB->setName(BB->getName());
Builder.setInsertPoint(VPBB);
// Introduce each ingredient into VPlan.
[this](PHINode *P) { return Legal->getIntOrFpInductionDescriptor(P); },
DeadInstructions, *PSE.getSE());
+ // Update plan to be compatible with the inner loop vectorizer for
+ // code-generation.
+ VPRegionBlock *LoopRegion = Plan->getVectorLoopRegion();
+ VPBasicBlock *Preheader = LoopRegion->getEntryBasicBlock();
+ VPBasicBlock *Exit = LoopRegion->getExitBasicBlock();
+ VPBlockBase *Latch = Exit->getSinglePredecessor();
+ VPBlockBase *Header = Preheader->getSingleSuccessor();
+
+ // 1. Move preheader block out of main vector loop.
+ Preheader->setParent(LoopRegion->getParent());
+ VPBlockUtils::disconnectBlocks(Preheader, Header);
+ VPBlockUtils::connectBlocks(Preheader, LoopRegion);
+ Plan->setEntry(Preheader);
+
+ // 2. Disconnect backedge and exit block.
+ VPBlockUtils::disconnectBlocks(Latch, Header);
+ VPBlockUtils::disconnectBlocks(Latch, Exit);
+
+ // 3. Update entry and exit of main vector loop region.
+ LoopRegion->setEntry(Header);
+ LoopRegion->setExit(Latch);
+
+ // 4. Remove exit block.
+ delete Exit;
+
addCanonicalIVRecipes(*Plan, Legal->getWidestInductionType(), DebugLoc(),
true, true);
return Plan;
auto &DL = EntryVal->getModule()->getDataLayout();
+ BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
// Generate code for the induction step. Note that induction steps are
// required to be loop-invariant
auto CreateStepValue = [&](const SCEV *Step) -> Value * {
if (SE.isSCEVable(IV->getType())) {
SCEVExpander Exp(SE, DL, "induction");
return Exp.expandCodeFor(Step, Step->getType(),
- State.CFG.VectorPreHeader->getTerminator());
+ VectorPH->getTerminator());
}
return cast<SCEVUnknown>(Step)->getValue();
};
// Construct the initial value of the vector IV in the vector loop preheader
auto CurrIP = Builder.saveIP();
- Builder.SetInsertPoint(State.CFG.VectorPreHeader->getTerminator());
+ Builder.SetInsertPoint(VectorPH->getTerminator());
if (isa<TruncInst>(EntryVal)) {
assert(Start->getType()->isIntegerTy() &&
"Truncation requires an integer type");
}
LastInduction->setName("vec.ind.next");
- VecInd->addIncoming(SteppedStart, State.CFG.VectorPreHeader);
+ VecInd->addIncoming(SteppedStart, VectorPH);
// Add induction update using an incorrect block temporarily. The phi node
// will be fixed after VPlan execution. Note that at this point the latch
// block cannot be used, as it does not exist yet.
// TODO: Model increment value in VPlan, by turning the recipe into a
// multi-def and a subclass of VPHeaderPHIRecipe.
- VecInd->addIncoming(LastInduction, State.CFG.VectorPreHeader);
+ VecInd->addIncoming(LastInduction, VectorPH);
}
void VPWidenPointerInductionRecipe::execute(VPTransformState &State) {
Type *ScStValueType = ScalarStartValue->getType();
PHINode *NewPointerPhi =
PHINode::Create(ScStValueType, 2, "pointer.phi", CanonicalIV);
- NewPointerPhi->addIncoming(ScalarStartValue, State.CFG.VectorPreHeader);
+
+ BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
+ NewPointerPhi->addIncoming(ScalarStartValue, VectorPH);
// A pointer induction, performed by using a gep
const DataLayout &DL = NewPointerPhi->getModule()->getDataLayout();
// block cannot be used, as it does not exist yet.
// TODO: Model increment value in VPlan, by turning the recipe into a
// multi-def and a subclass of VPHeaderPHIRecipe.
- NewPointerPhi->addIncoming(InductionGEP, State.CFG.VectorPreHeader);
+ NewPointerPhi->addIncoming(InductionGEP, VectorPH);
// Create UF many actual address geps that use the pointer
// phi as base and a vectorized version of the step value
// set(Def, Extract, Instance);
return Extract;
}
+BasicBlock *VPTransformState::CFGState::getPreheaderBBFor(VPRecipeBase *R) {
+ VPRegionBlock *LoopRegion = R->getParent()->getEnclosingLoopRegion();
+ return VPBB2IRBB[LoopRegion->getPreheaderVPBB()];
+}
BasicBlock *
VPBasicBlock::createEmptyBasicBlock(VPTransformState::CFGState &CFG) {
assert(PredBB && "Predecessor basic-block not found building successor.");
auto *PredBBTerminator = PredBB->getTerminator();
LLVM_DEBUG(dbgs() << "LV: draw edge from" << PredBB->getName() << '\n');
- if (isa<UnreachableInst>(PredBBTerminator)) {
+
+ auto *TermBr = dyn_cast<BranchInst>(PredBBTerminator);
+ if (isa<UnreachableInst>(PredBBTerminator) ||
+ (TermBr && !TermBr->isConditional())) {
assert(PredVPSuccessors.size() == 1 &&
"Predecessor ending w/o branch must have single successor.");
- DebugLoc DL = PredBBTerminator->getDebugLoc();
- PredBBTerminator->eraseFromParent();
- auto *Br = BranchInst::Create(NewBB, PredBB);
- Br->setDebugLoc(DL);
+ if (TermBr) {
+ TermBr->setSuccessor(0, NewBB);
+ } else {
+ DebugLoc DL = PredBBTerminator->getDebugLoc();
+ PredBBTerminator->eraseFromParent();
+ auto *Br = BranchInst::Create(NewBB, PredBB);
+ Br->setDebugLoc(DL);
+ }
} else {
- assert(PredVPSuccessors.size() == 2 &&
- "Predecessor ending with branch must have two successors.");
- unsigned idx = PredVPSuccessors.front() == this ? 0 : 1;
- assert(!PredBBTerminator->getSuccessor(idx) &&
- "Trying to reset an existing successor block.");
- PredBBTerminator->setSuccessor(idx, NewBB);
+ if (PredVPSuccessors.size() == 2) {
+ unsigned idx = PredVPSuccessors.front() == this ? 0 : 1;
+ assert(!PredBBTerminator->getSuccessor(idx) &&
+ "Trying to reset an existing successor block.");
+ PredBBTerminator->setSuccessor(idx, NewBB);
+ } else {
+ auto *Reg = dyn_cast<VPRegionBlock>(PredVPBB->getParent());
+ assert(Reg && !Reg->isReplicator());
+ assert(this == Reg->getSingleSuccessor());
+ PredBBTerminator->setSuccessor(0, NewBB);
+ PredBBTerminator->setSuccessor(
+ 1, CFG.VPBB2IRBB[Reg->getEntryBasicBlock()]);
+ }
}
}
return NewBB;
VPBlockBase *SingleHPred = nullptr;
BasicBlock *NewBB = State->CFG.PrevBB; // Reuse it if possible.
+ auto IsNonReplicateR = [](VPBlockBase *BB) {
+ auto *R = dyn_cast<VPRegionBlock>(BB);
+ return R && !R->isReplicator();
+ };
+
// 1. Create an IR basic block, or reuse the last one if possible.
// The last IR basic block is reused, as an optimization, in three cases:
- // A. the first VPBB reuses the loop header BB - when PrevVPBB is null;
+ // A. the first VPBB reuses the loop pre-header BB - when PrevVPBB is null;
// B. when the current VPBB has a single (hierarchical) predecessor which
- // is PrevVPBB and the latter has a single (hierarchical) successor; and
+ // is PrevVPBB and the latter has a single (hierarchical) successor which
+ // both are in the same non-replicator region; and
// C. when the current VPBB is an entry of a region replica - where PrevVPBB
// is the exit of this region from a previous instance, or the predecessor
// of this region.
if (PrevVPBB && /* A */
!((SingleHPred = getSingleHierarchicalPredecessor()) &&
SingleHPred->getExitBasicBlock() == PrevVPBB &&
- PrevVPBB->getSingleHierarchicalSuccessor()) && /* B */
- !(Replica && getPredecessors().empty())) { /* C */
+ PrevVPBB->getSingleHierarchicalSuccessor() &&
+ (SingleHPred->getParent() == getEnclosingLoopRegion() &&
+ !IsNonReplicateR(SingleHPred))) && /* B */
+ !(Replica && getPredecessors().empty())) { /* C */
NewBB = createEmptyBasicBlock(State->CFG);
State->Builder.SetInsertPoint(NewBB);
// Temporarily terminate with unreachable until CFG is rewired.
UnreachableInst *Terminator = State->Builder.CreateUnreachable();
// Register NewBB in its loop. In innermost loops its the same for all BB's.
- State->CurrentVectorLoop->addBasicBlockToLoop(NewBB, *State->LI);
+ if (State->CurrentVectorLoop)
+ State->CurrentVectorLoop->addBasicBlockToLoop(NewBB, *State->LI);
State->Builder.SetInsertPoint(Terminator);
State->CFG.PrevBB = NewBB;
- } else {
- // If the current VPBB is re-using the header block from skeleton creation,
- // move it to the new vector loop.
- VPBasicBlock *HeaderVPBB =
- getPlan()->getVectorLoopRegion()->getEntryBasicBlock();
- if (EnableVPlanNativePath)
- HeaderVPBB = cast<VPBasicBlock>(HeaderVPBB->getSingleSuccessor());
- if (this == HeaderVPBB) {
- assert(State->CurrentVectorLoop);
- State->LI->removeBlock(State->CFG.PrevBB);
- State->CurrentVectorLoop->addBasicBlockToLoop(State->CFG.PrevBB,
- *State->LI);
- }
}
// 2. Fill the IR basic block with IR instructions.
return SplitBlock;
}
+VPRegionBlock *VPBasicBlock::getEnclosingLoopRegion() {
+ VPRegionBlock *P = getParent();
+ if (P && P->isReplicator()) {
+ P = P->getParent();
+ assert(!cast<VPRegionBlock>(P)->isReplicator() &&
+ "unexpected nested replicate regions");
+ }
+ return P;
+}
+
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void VPBlockBase::printSuccessors(raw_ostream &O, const Twine &Indent) const {
if (getSuccessors().empty()) {
// Create and register the new vector loop.
Loop *PrevLoop = State->CurrentVectorLoop;
State->CurrentVectorLoop = State->LI->AllocateLoop();
- Loop *ParentLoop = State->LI->getLoopFor(State->CFG.VectorPreHeader);
+ BasicBlock *VectorPH = State->CFG.VPBB2IRBB[getPreheaderVPBB()];
+ Loop *ParentLoop = State->LI->getLoopFor(VectorPH);
// Insert the new loop into the loop nest and register the new basic blocks
// before calling any utilities such as SCEV that require valid LoopInfo.
// Visit the VPBlocks connected to "this", starting from it.
for (VPBlockBase *Block : RPOT) {
- if (EnableVPlanNativePath) {
- // The inner loop vectorization path does not represent loop preheader
- // and exit blocks as part of the VPlan. In the VPlan-native path, skip
- // vectorizing loop preheader block. In future, we may replace this
- // check with the check for loop preheader.
- if (Block->getNumPredecessors() == 0)
- continue;
-
- // Skip vectorizing loop exit block. In future, we may replace this
- // check with the check for loop exit.
- if (Block->getNumSuccessors() == 0)
- continue;
- }
-
LLVM_DEBUG(dbgs() << "LV: VPBlock in RPO " << Block->getName() << '\n');
Block->execute(State);
}
// Check if the backedge taken count is needed, and if so build it.
if (BackedgeTakenCount && BackedgeTakenCount->getNumUsers()) {
- IRBuilder<> Builder(State.CFG.VectorPreHeader->getTerminator());
+ IRBuilder<> Builder(State.CFG.PrevBB->getTerminator());
auto *TCMO = Builder.CreateSub(TripCountV,
ConstantInt::get(TripCountV->getType(), 1),
"trip.count.minus.1");
}
}
-/// Generate the code inside the body of the vectorized loop. Assumes a single
-/// LoopVectorBody basic-block was created for this. Introduce additional
-/// basic-blocks as needed, and fill them all.
+/// Generate the code inside the preheader and body of the vectorized loop.
+/// Assumes a single pre-header basic-block was created for this. Introduce
+/// additional basic-blocks as needed, and fill them all.
void VPlan::execute(VPTransformState *State) {
// Set the reverse mapping from VPValues to Values for code generation.
for (auto &Entry : Value2VPValue)
// Initialize CFG state.
State->CFG.PrevVPBB = nullptr;
- BasicBlock *VectorHeaderBB = State->CFG.VectorPreHeader->getSingleSuccessor();
- State->CFG.PrevBB = VectorHeaderBB;
- State->CFG.ExitBB = VectorHeaderBB->getSingleSuccessor();
- State->CurrentVectorLoop = State->LI->getLoopFor(VectorHeaderBB);
-
- // Remove the edge between Header and Latch to allow other connections.
- // Temporarily terminate with unreachable until CFG is rewired.
- // Note: this asserts the generated code's assumption that
- // getFirstInsertionPt() can be dereferenced into an Instruction.
- VectorHeaderBB->getTerminator()->eraseFromParent();
- State->Builder.SetInsertPoint(VectorHeaderBB);
- UnreachableInst *Terminator = State->Builder.CreateUnreachable();
- State->Builder.SetInsertPoint(Terminator);
-
- // Generate code in loop body.
+ State->CFG.ExitBB = State->CFG.PrevBB->getSingleSuccessor();
+ BasicBlock *VectorPreHeader = State->CFG.PrevBB;
+ State->Builder.SetInsertPoint(VectorPreHeader->getTerminator());
+
+ // Generate code in the loop pre-header and body.
for (VPBlockBase *Block : depth_first(Entry))
Block->execute(State);
// Fix the latch value of canonical, reduction and first-order recurrences
// phis in the vector loop.
VPBasicBlock *Header = getVectorLoopRegion()->getEntryBasicBlock();
- if (Header->empty()) {
- assert(EnableVPlanNativePath);
- Header = cast<VPBasicBlock>(Header->getSingleSuccessor());
- }
for (VPRecipeBase &R : Header->phis()) {
// Skip phi-like recipes that generate their backedege values themselves.
if (isa<VPWidenPHIRecipe>(&R))
}
// We do not attempt to preserve DT for outer loop vectorization currently.
- if (!EnableVPlanNativePath)
+ if (!EnableVPlanNativePath) {
+ BasicBlock *VectorHeaderBB = State->CFG.VPBB2IRBB[Header];
+ State->DT->addNewBlock(VectorHeaderBB, VectorPreHeader);
updateDominatorTree(State->DT, VectorHeaderBB, VectorLatchBB,
State->CFG.ExitBB);
+ }
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
Value *Start = getStartValue()->getLiveInIRValue();
PHINode *EntryPart = PHINode::Create(
Start->getType(), 2, "index", &*State.CFG.PrevBB->getFirstInsertionPt());
- EntryPart->addIncoming(Start, State.CFG.VectorPreHeader);
+
+ BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
+ EntryPart->addIncoming(Start, VectorPH);
EntryPart->setDebugLoc(DL);
for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part)
State.set(this, EntryPart, Part);
void VPExpandSCEVRecipe::execute(VPTransformState &State) {
assert(!State.Instance && "cannot be used in per-lane");
- const DataLayout &DL =
- State.CFG.VectorPreHeader->getModule()->getDataLayout();
+ const DataLayout &DL = State.CFG.PrevBB->getModule()->getDataLayout();
SCEVExpander Exp(SE, DL, "induction");
- Value *Res = Exp.expandCodeFor(Expr, Expr->getType(),
- State.CFG.VectorPreHeader->getTerminator());
+
+ BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
+ Value *Res =
+ Exp.expandCodeFor(Expr, Expr->getType(), VectorPH->getTerminator());
for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part)
State.set(this, Res, Part);
? VectorInit->getType()
: VectorType::get(VectorInit->getType(), State.VF);
+ BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
if (State.VF.isVector()) {
auto *IdxTy = Builder.getInt32Ty();
auto *One = ConstantInt::get(IdxTy, 1);
IRBuilder<>::InsertPointGuard Guard(Builder);
- Builder.SetInsertPoint(State.CFG.VectorPreHeader->getTerminator());
+ Builder.SetInsertPoint(VectorPH->getTerminator());
auto *RuntimeVF = getRuntimeVF(Builder, IdxTy, State.VF);
auto *LastIdx = Builder.CreateSub(RuntimeVF, One);
VectorInit = Builder.CreateInsertElement(
// Create a phi node for the new recurrence.
PHINode *EntryPart = PHINode::Create(
VecTy, 2, "vector.recur", &*State.CFG.PrevBB->getFirstInsertionPt());
- EntryPart->addIncoming(VectorInit, State.CFG.VectorPreHeader);
+ EntryPart->addIncoming(VectorInit, VectorPH);
State.set(this, EntryPart, 0);
}
State.set(this, EntryPart, Part);
}
+ BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
+
// Reductions do not have to start at zero. They can start with
// any loop invariant values.
VPValue *StartVPV = getStartValue();
Iden = StartV;
} else {
IRBuilderBase::InsertPointGuard IPBuilder(Builder);
- Builder.SetInsertPoint(State.CFG.VectorPreHeader->getTerminator());
+ Builder.SetInsertPoint(VectorPH->getTerminator());
StartV = Iden =
Builder.CreateVectorSplat(State.VF, StartV, "minmax.ident");
}
if (!ScalarPHI) {
Iden = Builder.CreateVectorSplat(State.VF, Iden);
IRBuilderBase::InsertPointGuard IPBuilder(Builder);
- Builder.SetInsertPoint(State.CFG.VectorPreHeader->getTerminator());
+ Builder.SetInsertPoint(VectorPH->getTerminator());
Constant *Zero = Builder.getInt32(0);
StartV = Builder.CreateInsertElement(Iden, StartV, Zero);
}
// Make sure to add the reduction start value only to the
// first unroll part.
Value *StartVal = (Part == 0) ? StartV : Iden;
- cast<PHINode>(EntryPart)->addIncoming(StartVal, State.CFG.VectorPreHeader);
+ cast<PHINode>(EntryPart)->addIncoming(StartVal, VectorPH);
}
}
/// vector loop.
BasicBlock *ExitBB = nullptr;
- /// The IR BasicBlock that is the preheader of the vector loop in the output
- /// IR.
- /// FIXME: The vector preheader should also be modeled in VPlan, so any code
- /// that needs to be added to the preheader gets directly generated by
- /// VPlan. There should be no need to manage a pointer to the IR BasicBlock.
- BasicBlock *VectorPreHeader = nullptr;
-
/// A mapping of each VPBasicBlock to the corresponding BasicBlock. In case
/// of replication, maps the BasicBlock of the last replica created.
SmallDenseMap<VPBasicBlock *, BasicBlock *> VPBB2IRBB;
SmallVector<VPBasicBlock *, 8> VPBBsToFix;
CFGState() = default;
+
+ /// Returns the BasicBlock* mapped to the pre-header of the loop region
+ /// containing \p R.
+ BasicBlock *getPreheaderBBFor(VPRecipeBase *R);
} CFG;
/// Hold a pointer to LoopInfo to register new basic blocks in the loop.
/// SplitAt to the new block. Returns the new block.
VPBasicBlock *splitAt(iterator SplitAt);
+ VPRegionBlock *getEnclosingLoopRegion();
+
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
/// Print this VPBsicBlock to \p O, prefixing all lines with \p Indent. \p
/// SlotTracker is used to print unnamed VPValue's using consequtive numbers.
ExitBlock->setParent(this);
}
+ /// Returns the pre-header VPBasicBlock of the loop region.
+ VPBasicBlock *getPreheaderVPBB() {
+ assert(!isReplicator() && "should only get pre-header of loop regions");
+ return getSinglePredecessor()->getExitBasicBlock();
+ }
+
/// An indicator whether this region is to generate multiple replicated
/// instances of output IR corresponding to its VPBlockBases.
bool isReplicator() const { return IsReplicator; }
/// Returns the VPRegionBlock of the vector loop.
VPRegionBlock *getVectorLoopRegion() {
- return cast<VPRegionBlock>(getEntry());
+ if (auto *R = dyn_cast<VPRegionBlock>(getEntry()))
+ return R;
+ return cast<VPRegionBlock>(getEntry()->getSingleSuccessor());
}
const VPRegionBlock *getVectorLoopRegion() const {
- return cast<VPRegionBlock>(getEntry());
+ if (auto *R = dyn_cast<VPRegionBlock>(getEntry()))
+ return R;
+ return cast<VPRegionBlock>(getEntry()->getSingleSuccessor());
}
/// Returns the canonical induction recipe of the vector loop.
; CHECK: VPlan 'Initial VPlan for VF={vscale x 2},UF>=1' {
; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count
; CHECK-EMPTY:
+; CHECK-NEXT: vector.ph:
+; CHECK-NEXT: Successor(s): vector loop
+; CHECK-EMPTY:
; CHECK-NEXT: <x1> vector loop: {
; CHECK-NEXT: vector.body:
; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION
; CHECK-NOT: LV: Found uniform instruction: %tmp3 = getelementptr inbounds %data, %data* %d, i64 0, i32 0, i64 %i
; CHECK-NOT: LV: Found uniform instruction: %i = phi i64 [ %i.next, %for.body ], [ 0, %entry ]
; CHECK-NOT: LV: Found uniform instruction: %i.next = add nuw nsw i64 %i, 5
+; CHECK: define void @PR31671(
; CHECK: vector.ph:
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x float> poison, float %x, i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x float> [[BROADCAST_SPLATINSERT]], <16 x float> poison, <16 x i32> zeroinitializer
; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i32 [[N]], 0
; CHECK-NEXT: br i1 [[TMP17]], label [[DOT_CRIT_EDGE:%.*]], label [[DOTLR_PH_PREHEADER:%.*]]
; CHECK: .lr.ph.preheader:
-; CHECK-NEXT: br i1 false, label [[SCALAR_PH8:%.*]], label [[VECTOR_PH10:%.*]]
-; CHECK: vector.ph10:
+; CHECK-NEXT: br i1 false, label [[SCALAR_PH8:%.*]], label [[VECTOR_PH9:%.*]]
+; CHECK: vector.ph9:
; CHECK-NEXT: [[TMP18:%.*]] = add i32 [[N]], -1
; CHECK-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64
; CHECK-NEXT: [[N_RND_UP11:%.*]] = add nuw nsw i64 [[TMP19]], 4
; CHECK-NEXT: [[BROADCAST_SPLATINSERT18:%.*]] = insertelement <4 x i64> poison, i64 [[TMP19]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT19:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT18]], <4 x i64> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: br label [[VECTOR_BODY9:%.*]]
-; CHECK: vector.body9:
-; CHECK-NEXT: [[INDEX20:%.*]] = phi i64 [ 0, [[VECTOR_PH10]] ], [ [[INDEX_NEXT31:%.*]], [[PRED_STORE_CONTINUE30:%.*]] ]
+; CHECK: vector.body19:
+; CHECK-NEXT: [[INDEX20:%.*]] = phi i64 [ 0, [[VECTOR_PH9]] ], [ [[INDEX_NEXT31:%.*]], [[PRED_STORE_CONTINUE30:%.*]] ]
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 [[I_0_LCSSA]], [[INDEX20]]
; CHECK-NEXT: [[BROADCAST_SPLATINSERT21:%.*]] = insertelement <4 x i64> poison, i64 [[INDEX20]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT22:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT21]], <4 x i64> poison, <4 x i32> zeroinitializer
; CHECK-EMPTY:
; CHECK-NEXT: Live-in vp<[[BTC:%.+]]> = backedge-taken count
; CHECK-EMPTY:
+; CHECK-NEXT: vector.ph:
+; CHECK-NEXT: Successor(s): vector loop
+; CHECK-EMPTY:
; CHECK-NEXT: <x1> vector loop: {
; CHECK-NEXT: vector.body:
; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION
; CHECK-EMPTY:
; CHECK-NEXT: Live-in vp<[[BTC:%.+]]> = backedge-taken count
; CHECK-EMPTY:
+; CHECK-NEXT: vector.ph:
+; CHECK-NEXT: Successor(s): vector loop
+; CHECK-EMPTY:
; CHECK-NEXT: <x1> vector loop: {
; CHECK-NEXT: vector.body:
; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION
; CHECK-EMPTY:
; CHECK-NEXT: Live-in vp<[[BTC:%.+]]> = backedge-taken count
; CHECK-EMPTY:
+; CHECK-NEXT: vector.ph:
+; CHECK-NEXT: Successor(s): vector loop
+; CHECK-EMPTY:
; CHECK-NEXT: <x1> vector loop: {
; CHECK-NEXT: vector.body:
; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION
; CHECK-EMPTY:
; CHECK-NEXT: Live-in vp<[[BTC:%.+]]> = backedge-taken count
; CHECK-EMPTY:
+; CHECK-NEXT: vector.ph:
+; CHECK-NEXT: Successor(s): vector loop
+; CHECK-EMPTY:
; CHECK-NEXT: <x1> vector loop: {
; CHECK-NEXT: vector.body:
; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION
; CHECK-EMPTY:
; CHECK-NEXT: Live-in vp<[[BTC:%.+]]> = backedge-taken count
; CHECK-EMPTY:
+; CHECK-NEXT: vector.ph:
+; CHECK-NEXT: Successor(s): vector loop
+; CHECK-EMPTY:
; CHECK-NEXT: <x1> vector loop: {
; CHECK-NEXT: vector.body:
; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION
; CHECK-EMPTY:
; CHECK-NEXT: Live-in vp<[[BTC:%.+]]> = backedge-taken count
; CHECK-EMPTY:
+; CHECK-NEXT: vector.ph:
+; CHECK-NEXT: Successor(s): vector loop
+; CHECK-EMPTY:
; CHECK-NEXT: <x1> vector loop: {
; CHECK-NEXT: vector.body:
; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION
; Verify that -vplan-print-in-dot-format option works.
define void @print_call_and_memory(i64 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
-; CHECK: subgraph cluster_N0 {
+; CHECK: digraph VPlan {
+; CHECK-NEXT: graph [labelloc=t, fontsize=30; label="Vectorization Plan\nInitial VPlan for VF=\{4\},UF\>=1"]
+; CHECK-NEXT: node [shape=rect, fontname=Courier, fontsize=30]
+; CHECK-NEXT: edge [fontname=Courier, fontsize=30]
+; CHECK-NEXT: compound=true
+; CHECK-NEXT: N0 [label =
+; CHECK-NEXT: "vector.ph:\l" +
+; CHECK-NEXT: "Successor(s): vector loop\l"
+; CHECK-NEXT: ]
+; CHECK-NEXT: N0 -> N1 [ label="" lhead=cluster_N2]
+; CHECK-NEXT: subgraph cluster_N2 {
; CHECK-NEXT: fontname=Courier
; CHECK-NEXT: label="\<x1\> vector loop"
; CHECK-NEXT: N1 [label =
; CHECK: VPlan 'Initial VPlan for VF={4},UF>=1' {
; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count
; CHECK-EMPTY:
+; CHECK-NEXT: vector.ph:
+; CHECK-NEXT: Successor(s): vector loop
+; CHECK-EMPTY:
; CHECK-NEXT: <x1> vector loop: {
; CHECK-NEXT: vector.body:
; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION
; CHECK: VPlan 'Initial VPlan for VF={4},UF>=1' {
; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count
; CHECK-EMPTY:
+; CHECK-NEXT: vector.ph:
+; CHECK-NEXT: Successor(s): vector loop
+; CHECK-EMPTY:
; CHECK-NEXT: <x1> vector loop: {
; CHECK-NEXT: vector.body:
; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION
; CHECK: VPlan 'Initial VPlan for VF={4},UF>=1' {
; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count
; CHECK-EMPTY:
+; CHECK-NEXT: vector.ph:
+; CHECK-NEXT: Successor(s): vector loop
+; CHECK-EMPTY:
; CHECK-NEXT: <x1> vector loop: {
; CHECK-NEXT: vector.body:
; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION
; CHECK: VPlan 'Initial VPlan for VF={4},UF>=1' {
; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count
; CHECK-EMPTY:
+; CHECK-NEXT: vector.ph:
+; CHECK-NEXT: Successor(s): vector loop
+; CHECK-EMPTY:
; CHECK-NEXT: <x1> vector loop: {
; CHECK-NEXT: vector.body:
; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION
; CHECK: VPlan 'Initial VPlan for VF={4},UF>=1' {
; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count
; CHECK-EMPTY:
+; CHECK-NEXT: vector.ph:
+; CHECK-NEXT: Successor(s): vector loop
+; CHECK-EMPTY:
; CHECK-NEXT: <x1> vector loop: {
; CHECK-NEXT: vector.body:
; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION
; CHECK: VPlan 'Initial VPlan for VF={4},UF>=1' {
; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count
; CHECK-EMPTY:
+; CHECK-NEXT: vector.ph:
+; CHECK-NEXT: Successor(s): vector loop
+; CHECK-EMPTY:
; CHECK-NEXT: <x1> vector loop: {
; CHECK-NEXT: vector.body:
; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION
; CHECK: VPlan 'Initial VPlan for VF={4},UF>=1' {
; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count
; CHECK-EMPTY:
+; CHECK-NEXT: vector.ph:
+; CHECK-NEXT: Successor(s): vector loop
+; CHECK-EMPTY:
; CHECK-NEXT: <x1> vector loop: {
; CHECK-NEXT: vector.body:
; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION
; CHECK: VPlan 'Initial VPlan for VF={4},UF>=1' {
; CHECK-NEXT: Live-in vp<%0> = vector-trip-count
; CHECK-EMPTY:
+; CHECK-NEXT: vector.ph:
+; CHECK-NEXT: Successor(s): vector loop
+; CHECK-EMPTY:
; CHECK-NEXT: <x1> vector loop: {
; CHECK-NEXT: vector.body:
; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION
; CHECK: VPlan 'Initial VPlan for VF={1},UF>=1' {
; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count
; CHECK-EMPTY:
+; CHECK-NEXT: vector.ph:
+; CHECK-NEXT: Successor(s): vector loop
+; CHECK-EMPTY:
; CHECK-NEXT: <x1> vector loop: {
; CHECK-NEXT: vector.body:
; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION
; CHECK-EMPTY:
; CHECK-NEXT: Live-in vp<[[BTC:%.+]]> = backedge-taken count
; CHECK-EMPTY:
+; CHECK-NEXT: vector.ph:
+; CHECK-NEXT: Successor(s): vector loop
+; CHECK-EMPTY:
; CHECK-NEXT: <x1> vector loop: {
; CHECK-NEXT: vector.body:
; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION
; CHECK-EMPTY:
; CHECK-NEXT: Live-in vp<[[BTC:%.+]]> = backedge-taken count
; CHECK-EMPTY:
+; CHECK-NEXT: vector.ph:
+; CHECK-NEXT: Successor(s): vector loop
+; CHECK-EMPTY:
; CHECK-NEXT: <x1> vector loop: {
; CHECK-NEXT: vector.body:
; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION
; CHECK-EMPTY:
; CHECK-NEXT: Live-in vp<[[BTC:%.+]]> = backedge-taken count
; CHECK-EMPTY:
+; CHECK-NEXT: vector.ph:
+; CHECK-NEXT: Successor(s): vector loop
+; CHECK-EMPTY:
; CHECK-NEXT: <x1> vector loop: {
; CHECK-NEXT: vector.body:
; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION
; CHECK-EMPTY:
; CHECK-NEXT: Live-in vp<[[BTC:%.+]]> = backedge-taken count
; CHECK-EMPTY:
+; CHECK-NEXT: vector.ph:
+; CHECK-NEXT: Successor(s): vector loop
+; CHECK-EMPTY:
; CHECK-NEXT: <x1> vector loop: {
; CHECK-NEXT: vector.body:
; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION
; CHECK-EMPTY:
; CHECK-NEXT: Live-in vp<[[BTC:%.+]]> = backedge-taken count
; CHECK-EMPTY:
+; CHECK-NEXT: vector.ph:
+; CHECK-NEXT: Successor(s): vector loop
+; CHECK-EMPTY:
; CHECK-NEXT: <x1> vector loop: {
; CHECK-NEXT: vector.body:
; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION
; CHECK-EMPTY:
; CHECK-NEXT: Live-in vp<[[BTC:%.+]]> = backedge-taken count
; CHECK-EMPTY:
+; CHECK-NEXT: vector.ph:
+; CHECK-NEXT: Successor(s): vector loop
+; CHECK-EMPTY:
; CHECK-NEXT: <x1> vector loop: {
; CHECK-NEXT: vector.body:
; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION
; CHECK-EMPTY:
; CHECK-NEXT: Live-in vp<[[BTC:%.+]]> = backedge-taken count
; CHECK-EMPTY:
+; CHECK-NEXT: vector.ph:
+; CHECK-NEXT: Successor(s): vector loop
+; CHECK-EMPTY:
; CHECK-NEXT: <x1> vector loop: {
; CHECK-NEXT: vector.body:
; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION
; CHECK-EMPTY:
; CHECK-NEXT: Live-in vp<[[BTC:%.+]]> = backedge-taken count
; CHECK-EMPTY:
+; CHECK-NEXT: vector.ph:
+; CHECK-NEXT: Successor(s): vector loop
+; CHECK-EMPTY:
; CHECK-NEXT: <x1> vector loop: {
; CHECK-NEXT: vector.body:
; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION
; CHECK-EMPTY:
; CHECK-NEXT: Live-in vp<[[BTC:%.+]]> = backedge-taken count
; CHECK-EMPTY:
+; CHECK-NEXT: vector.ph:
+; CHECK-NEXT: Successor(s): vector loop
+; CHECK-EMPTY:
; CHECK-NEXT: <x1> vector loop: {
; CHECK-NEXT: vector.body:
; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION
; CHECK-EMPTY:
; CHECK-NEXT: Live-in vp<[[BTC:%.+]]> = backedge-taken count
; CHECK-EMPTY:
+; CHECK-NEXT: vector.ph:
+; CHECK-NEXT: Successor(s): vector loop
+; CHECK-EMPTY:
; CHECK-NEXT: <x1> vector loop: {
; CHECK-NEXT: vector.body:
; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION
; CHECK: VPlan 'Initial VPlan for VF={2},UF>=1' {
; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count
; CHECK-EMPTY:
+; CHECK-NEXT: vector.ph:
+; CHECK-NEXT: Successor(s): vector loop
+; CHECK-EMPTY:
; CHECK-NEXT: <x1> vector loop: {
; CHECK-NEXT: vector.body:
; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION
; CHECK: VPlan 'Initial VPlan for VF={2},UF>=1' {
; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count
; CHECK-EMPTY:
+; CHECK-NEXT: vector.ph:
+; CHECK-NEXT: Successor(s): vector loop
+; CHECK-EMPTY:
; CHECK-NEXT: <x1> vector loop: {
; CHECK-NEXT: vector.body:
; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION