#include "llvm/ADT/None.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
return None;
}
+// Forward declare GeneratedRTChecks.
+class GeneratedRTChecks;
+
namespace llvm {
/// InnerLoopVectorizer vectorizes loops which contain only one basic
OptimizationRemarkEmitter *ORE, ElementCount VecWidth,
unsigned UnrollFactor, LoopVectorizationLegality *LVL,
LoopVectorizationCostModel *CM, BlockFrequencyInfo *BFI,
- ProfileSummaryInfo *PSI)
+ ProfileSummaryInfo *PSI, GeneratedRTChecks &RTChecks)
: OrigLoop(OrigLoop), PSE(PSE), LI(LI), DT(DT), TLI(TLI), TTI(TTI),
AC(AC), ORE(ORE), VF(VecWidth), UF(UnrollFactor),
Builder(PSE.getSE()->getContext()), Legal(LVL), Cost(CM), BFI(BFI),
- PSI(PSI) {
+ PSI(PSI), RTChecks(RTChecks) {
// Query this against the original loop and save it here because the profile
// of the original loop header may change as the transformation happens.
OptForSizeBasedOnProfile = llvm::shouldOptimizeForSize(
void emitMinimumIterationCountCheck(Loop *L, BasicBlock *Bypass);
/// Emit a bypass check to see if all of the SCEV assumptions we've
- /// had to make are correct.
- void emitSCEVChecks(Loop *L, BasicBlock *Bypass);
+ /// had to make are correct. Returns the block containing the checks or
+ /// nullptr if no checks have been added.
+ BasicBlock *emitSCEVChecks(Loop *L, BasicBlock *Bypass);
/// Emit bypass checks to check any memory assumptions we may have made.
- void emitMemRuntimeChecks(Loop *L, BasicBlock *Bypass);
+ /// Returns the block containing the checks or nullptr if no checks have been
+ /// added.
+ BasicBlock *emitMemRuntimeChecks(Loop *L, BasicBlock *Bypass);
/// Compute the transformed value of Index at offset StartValue using step
/// StepValue.
// Whether this loop should be optimized for size based on profile guided size
// optimizatios.
bool OptForSizeBasedOnProfile;
+
+ /// Structure to hold information about generated runtime checks, responsible
+ /// for cleaning the checks, if vectorization turns out unprofitable.
+ GeneratedRTChecks &RTChecks;
};
class InnerLoopUnroller : public InnerLoopVectorizer {
OptimizationRemarkEmitter *ORE, unsigned UnrollFactor,
LoopVectorizationLegality *LVL,
LoopVectorizationCostModel *CM, BlockFrequencyInfo *BFI,
- ProfileSummaryInfo *PSI)
+ ProfileSummaryInfo *PSI, GeneratedRTChecks &Check)
: InnerLoopVectorizer(OrigLoop, PSE, LI, DT, TLI, TTI, AC, ORE,
ElementCount::getFixed(1), UnrollFactor, LVL, CM,
- BFI, PSI) {}
+ BFI, PSI, Check) {}
private:
Value *getBroadcastInstrs(Value *V) override;
const TargetTransformInfo *TTI, AssumptionCache *AC,
OptimizationRemarkEmitter *ORE, EpilogueLoopVectorizationInfo &EPI,
LoopVectorizationLegality *LVL, llvm::LoopVectorizationCostModel *CM,
- BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI)
+ BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI,
+ GeneratedRTChecks &Checks)
: InnerLoopVectorizer(OrigLoop, PSE, LI, DT, TLI, TTI, AC, ORE,
- EPI.MainLoopVF, EPI.MainLoopUF, LVL, CM, BFI, PSI),
+ EPI.MainLoopVF, EPI.MainLoopUF, LVL, CM, BFI, PSI,
+ Checks),
EPI(EPI) {}
// Override this function to handle the more complex control flow around the
const TargetTransformInfo *TTI, AssumptionCache *AC,
OptimizationRemarkEmitter *ORE, EpilogueLoopVectorizationInfo &EPI,
LoopVectorizationLegality *LVL, llvm::LoopVectorizationCostModel *CM,
- BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI)
+ BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI,
+ GeneratedRTChecks &Check)
: InnerLoopAndEpilogueVectorizer(OrigLoop, PSE, LI, DT, TLI, TTI, AC, ORE,
- EPI, LVL, CM, BFI, PSI) {}
+ EPI, LVL, CM, BFI, PSI, Check) {}
/// Implements the interface for creating a vectorized skeleton using the
/// *main loop* strategy (ie the first pass of vplan execution).
BasicBlock *createEpilogueVectorizedLoopSkeleton() final override;
// their epilogues.
class EpilogueVectorizerEpilogueLoop : public InnerLoopAndEpilogueVectorizer {
public:
- EpilogueVectorizerEpilogueLoop(Loop *OrigLoop, PredicatedScalarEvolution &PSE,
- LoopInfo *LI, DominatorTree *DT,
- const TargetLibraryInfo *TLI,
- const TargetTransformInfo *TTI, AssumptionCache *AC,
- OptimizationRemarkEmitter *ORE,
- EpilogueLoopVectorizationInfo &EPI,
- LoopVectorizationLegality *LVL,
- llvm::LoopVectorizationCostModel *CM,
- BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI)
+ EpilogueVectorizerEpilogueLoop(
+ Loop *OrigLoop, PredicatedScalarEvolution &PSE, LoopInfo *LI,
+ DominatorTree *DT, const TargetLibraryInfo *TLI,
+ const TargetTransformInfo *TTI, AssumptionCache *AC,
+ OptimizationRemarkEmitter *ORE, EpilogueLoopVectorizationInfo &EPI,
+ LoopVectorizationLegality *LVL, llvm::LoopVectorizationCostModel *CM,
+ BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI,
+ GeneratedRTChecks &Checks)
: InnerLoopAndEpilogueVectorizer(OrigLoop, PSE, LI, DT, TLI, TTI, AC, ORE,
- EPI, LVL, CM, BFI, PSI) {}
+ EPI, LVL, CM, BFI, PSI, Checks) {}
/// Implements the interface for creating a vectorized skeleton using the
/// *epilogue loop* strategy (ie the second pass of vplan execution).
BasicBlock *createEpilogueVectorizedLoopSkeleton() final override;
/// Profitable vector factors.
SmallVector<VectorizationFactor, 8> ProfitableVFs;
};
-
} // end namespace llvm
+/// Helper struct to manage generating runtime checks for vectorization.
+///
+/// The runtime checks are created up-front in temporary blocks to allow better
+/// estimating the cost and un-linked from the existing IR. After deciding to
+/// vectorize, the checks are moved back. If deciding not to vectorize, the
+/// temporary blocks are completely removed.
+class GeneratedRTChecks {
+ /// Basic block which contains the generated SCEV checks, if any.
+ BasicBlock *SCEVCheckBlock = nullptr;
+
+ /// The value representing the result of the generated SCEV checks. If it is
+ /// nullptr, either no SCEV checks have been generated or they have been used.
+ Value *SCEVCheckCond = nullptr;
+
+ /// Basic block which contains the generated memory runtime checks, if any.
+ BasicBlock *MemCheckBlock = nullptr;
+
+ /// The value representing the result of the generated memory runtime checks.
+ /// If it is nullptr, either no memory runtime checks have been generated or
+ /// they have been used.
+ Instruction *MemRuntimeCheckCond = nullptr;
+
+ DominatorTree *DT;
+ LoopInfo *LI;
+
+ SCEVExpander SCEVExp;
+ SCEVExpander MemCheckExp;
+
+public:
+ GeneratedRTChecks(ScalarEvolution &SE, DominatorTree *DT, LoopInfo *LI,
+ const DataLayout &DL)
+ : DT(DT), LI(LI), SCEVExp(SE, DL, "scev.check"),
+ MemCheckExp(SE, DL, "scev.check") {}
+
+ /// Generate runtime checks in SCEVCheckBlock and MemCheckBlock, so we can
+ /// accurately estimate the cost of the runtime checks. The blocks are
+ /// un-linked from the IR and is added back during vector code generation. If
+ /// there is no vector code generation, the check blocks are removed
+ /// completely.
+ void Create(Loop *L, const LoopAccessInfo &LAI,
+ const SCEVUnionPredicate &UnionPred) {
+
+ BasicBlock *LoopHeader = L->getHeader();
+ BasicBlock *Preheader = L->getLoopPreheader();
+
+ // Use SplitBlock to create blocks for SCEV & memory runtime checks to
+ // ensure the blocks are properly added to LoopInfo & DominatorTree. Those
+ // may be used by SCEVExpander. The blocks will be un-linked from their
+ // predecessors and removed from LI & DT at the end of the function.
+ if (!UnionPred.isAlwaysTrue()) {
+ SCEVCheckBlock = SplitBlock(Preheader, Preheader->getTerminator(), DT, LI,
+ nullptr, "vector.scevcheck");
+
+ SCEVCheckCond = SCEVExp.expandCodeForPredicate(
+ &UnionPred, SCEVCheckBlock->getTerminator());
+ }
+
+ const auto &RtPtrChecking = *LAI.getRuntimePointerChecking();
+ if (RtPtrChecking.Need) {
+ auto *Pred = SCEVCheckBlock ? SCEVCheckBlock : Preheader;
+ MemCheckBlock = SplitBlock(Pred, Pred->getTerminator(), DT, LI, nullptr,
+ "vector.memcheck");
+
+ std::tie(std::ignore, MemRuntimeCheckCond) =
+ addRuntimeChecks(MemCheckBlock->getTerminator(), L,
+ RtPtrChecking.getChecks(), MemCheckExp);
+ assert(MemRuntimeCheckCond &&
+ "no RT checks generated although RtPtrChecking "
+ "claimed checks are required");
+ }
+
+ if (!MemCheckBlock && !SCEVCheckBlock)
+ return;
+
+ // Unhook the temporary block with the checks, update various places
+ // accordingly.
+ if (SCEVCheckBlock)
+ SCEVCheckBlock->replaceAllUsesWith(Preheader);
+ if (MemCheckBlock)
+ MemCheckBlock->replaceAllUsesWith(Preheader);
+
+ if (SCEVCheckBlock) {
+ SCEVCheckBlock->getTerminator()->moveBefore(Preheader->getTerminator());
+ new UnreachableInst(Preheader->getContext(), SCEVCheckBlock);
+ Preheader->getTerminator()->eraseFromParent();
+ }
+ if (MemCheckBlock) {
+ MemCheckBlock->getTerminator()->moveBefore(Preheader->getTerminator());
+ new UnreachableInst(Preheader->getContext(), MemCheckBlock);
+ Preheader->getTerminator()->eraseFromParent();
+ }
+
+ DT->changeImmediateDominator(LoopHeader, Preheader);
+ if (MemCheckBlock) {
+ DT->eraseNode(MemCheckBlock);
+ LI->removeBlock(MemCheckBlock);
+ }
+ if (SCEVCheckBlock) {
+ DT->eraseNode(SCEVCheckBlock);
+ LI->removeBlock(SCEVCheckBlock);
+ }
+ }
+
+ /// Remove the created SCEV & memory runtime check blocks & instructions, if
+ /// unused.
+ ~GeneratedRTChecks() {
+ SCEVExpanderCleaner SCEVCleaner(SCEVExp, *DT);
+ SCEVExpanderCleaner MemCheckCleaner(MemCheckExp, *DT);
+ if (!SCEVCheckCond)
+ SCEVCleaner.markResultUsed();
+
+ if (!MemRuntimeCheckCond)
+ MemCheckCleaner.markResultUsed();
+
+ if (MemRuntimeCheckCond) {
+ auto &SE = *MemCheckExp.getSE();
+ // Memory runtime check generation creates compares that use expanded
+ // values. Remove them before running the SCEVExpanderCleaners.
+ for (auto &I : make_early_inc_range(reverse(*MemCheckBlock))) {
+ if (MemCheckExp.isInsertedInstruction(&I))
+ continue;
+ SE.forgetValue(&I);
+ SE.eraseValueFromMap(&I);
+ I.eraseFromParent();
+ }
+ }
+ MemCheckCleaner.cleanup();
+ SCEVCleaner.cleanup();
+
+ if (SCEVCheckCond)
+ SCEVCheckBlock->eraseFromParent();
+ if (MemRuntimeCheckCond)
+ MemCheckBlock->eraseFromParent();
+ }
+
+ /// Adds the generated SCEVCheckBlock before \p LoopVectorPreHeader and
+ /// adjusts the branches to branch to the vector preheader or \p Bypass,
+ /// depending on the generated condition.
+ BasicBlock *emitSCEVChecks(Loop *L, BasicBlock *Bypass,
+ BasicBlock *LoopVectorPreHeader,
+ BasicBlock *LoopExitBlock) {
+ if (!SCEVCheckCond)
+ return nullptr;
+ if (auto *C = dyn_cast<ConstantInt>(SCEVCheckCond))
+ if (C->isZero())
+ return nullptr;
+
+ auto *Pred = LoopVectorPreHeader->getSinglePredecessor();
+
+ BranchInst::Create(LoopVectorPreHeader, SCEVCheckBlock);
+ // Create new preheader for vector loop.
+ if (auto *PL = LI->getLoopFor(LoopVectorPreHeader))
+ PL->addBasicBlockToLoop(SCEVCheckBlock, *LI);
+
+ SCEVCheckBlock->getTerminator()->eraseFromParent();
+ SCEVCheckBlock->moveBefore(LoopVectorPreHeader);
+ Pred->getTerminator()->replaceSuccessorWith(LoopVectorPreHeader,
+ SCEVCheckBlock);
+
+ DT->addNewBlock(SCEVCheckBlock, Pred);
+ DT->changeImmediateDominator(LoopVectorPreHeader, SCEVCheckBlock);
+
+ ReplaceInstWithInst(
+ SCEVCheckBlock->getTerminator(),
+ BranchInst::Create(Bypass, LoopVectorPreHeader, SCEVCheckCond));
+ // Mark the check as used, to prevent it from being removed during cleanup.
+ SCEVCheckCond = nullptr;
+ return SCEVCheckBlock;
+ }
+
+ /// Adds the generated MemCheckBlock before \p LoopVectorPreHeader and adjusts
+ /// the branches to branch to the vector preheader or \p Bypass, depending on
+ /// the generated condition.
+ BasicBlock *emitMemRuntimeChecks(Loop *L, BasicBlock *Bypass,
+ BasicBlock *LoopVectorPreHeader) {
+ // Check if we generated code that checks in runtime if arrays overlap.
+ if (!MemRuntimeCheckCond)
+ return nullptr;
+
+ auto *Pred = LoopVectorPreHeader->getSinglePredecessor();
+ Pred->getTerminator()->replaceSuccessorWith(LoopVectorPreHeader,
+ MemCheckBlock);
+
+ DT->addNewBlock(MemCheckBlock, Pred);
+ DT->changeImmediateDominator(LoopVectorPreHeader, MemCheckBlock);
+ MemCheckBlock->moveBefore(LoopVectorPreHeader);
+
+ if (auto *PL = LI->getLoopFor(LoopVectorPreHeader))
+ PL->addBasicBlockToLoop(MemCheckBlock, *LI);
+
+ ReplaceInstWithInst(
+ MemCheckBlock->getTerminator(),
+ BranchInst::Create(Bypass, LoopVectorPreHeader, MemRuntimeCheckCond));
+ MemCheckBlock->getTerminator()->setDebugLoc(
+ Pred->getTerminator()->getDebugLoc());
+
+ // Mark the check as used, to prevent it from being removed during cleanup.
+ MemRuntimeCheckCond = nullptr;
+ return MemCheckBlock;
+ }
+};
+
// Return true if \p OuterLp is an outer loop annotated with hints for explicit
// vectorization. The loop needs to be annotated with #pragma omp simd
// simdlen(#) or #pragma clang vectorize(enable) vectorize_width(#). If the
LoopBypassBlocks.push_back(TCCheckBlock);
}
-void InnerLoopVectorizer::emitSCEVChecks(Loop *L, BasicBlock *Bypass) {
- // Reuse existing vector loop preheader for SCEV checks.
- // Note that new preheader block is generated for vector loop.
- BasicBlock *const SCEVCheckBlock = LoopVectorPreHeader;
-
- // Generate the code to check that the SCEV assumptions that we made.
- // We want the new basic block to start at the first instruction in a
- // sequence of instructions that form a check.
- SCEVExpander Exp(*PSE.getSE(), Bypass->getModule()->getDataLayout(),
- "scev.check");
- Value *SCEVCheck = Exp.expandCodeForPredicate(
- &PSE.getUnionPredicate(), SCEVCheckBlock->getTerminator());
-
- if (auto *C = dyn_cast<ConstantInt>(SCEVCheck))
- if (C->isZero())
- return;
+BasicBlock *InnerLoopVectorizer::emitSCEVChecks(Loop *L, BasicBlock *Bypass) {
+
+ BasicBlock *const SCEVCheckBlock =
+ RTChecks.emitSCEVChecks(L, Bypass, LoopVectorPreHeader, LoopExitBlock);
+ if (!SCEVCheckBlock)
+ return nullptr;
assert(!(SCEVCheckBlock->getParent()->hasOptSize() ||
(OptForSizeBasedOnProfile &&
Cost->Hints->getForce() != LoopVectorizeHints::FK_Enabled)) &&
"Cannot SCEV check stride or overflow when optimizing for size");
- SCEVCheckBlock->setName("vector.scevcheck");
- // Create new preheader for vector loop.
- LoopVectorPreHeader =
- SplitBlock(SCEVCheckBlock, SCEVCheckBlock->getTerminator(), DT, LI,
- nullptr, "vector.ph");
// Update dominator only if this is first RT check.
if (LoopBypassBlocks.empty()) {
DT->changeImmediateDominator(LoopExitBlock, SCEVCheckBlock);
}
- ReplaceInstWithInst(
- SCEVCheckBlock->getTerminator(),
- BranchInst::Create(Bypass, LoopVectorPreHeader, SCEVCheck));
LoopBypassBlocks.push_back(SCEVCheckBlock);
AddedSafetyChecks = true;
+ return SCEVCheckBlock;
}
-void InnerLoopVectorizer::emitMemRuntimeChecks(Loop *L, BasicBlock *Bypass) {
+BasicBlock *InnerLoopVectorizer::emitMemRuntimeChecks(Loop *L,
+ BasicBlock *Bypass) {
// VPlan-native path does not do any analysis for runtime checks currently.
if (EnableVPlanNativePath)
- return;
+ return nullptr;
- // Reuse existing vector loop preheader for runtime memory checks.
- // Note that new preheader block is generated for vector loop.
- BasicBlock *const MemCheckBlock = L->getLoopPreheader();
+ BasicBlock *const MemCheckBlock =
+ RTChecks.emitMemRuntimeChecks(L, Bypass, LoopVectorPreHeader);
- // Generate the code that checks in runtime if arrays overlap. We put the
- // checks into a separate block to make the more common case of few elements
- // faster.
- auto *LAI = Legal->getLAI();
- const auto &RtPtrChecking = *LAI->getRuntimePointerChecking();
- if (!RtPtrChecking.Need)
- return;
+ // Check if we generated code that checks in runtime if arrays overlap. We put
+ // the checks into a separate block to make the more common case of few
+ // elements faster.
+ if (!MemCheckBlock)
+ return nullptr;
if (MemCheckBlock->getParent()->hasOptSize() || OptForSizeBasedOnProfile) {
assert(Cost->Hints->getForce() == LoopVectorizeHints::FK_Enabled &&
});
}
- MemCheckBlock->setName("vector.memcheck");
- // Create new preheader for vector loop.
- LoopVectorPreHeader =
- SplitBlock(MemCheckBlock, MemCheckBlock->getTerminator(), DT, LI, nullptr,
- "vector.ph");
-
- auto *CondBranch = cast<BranchInst>(
- Builder.CreateCondBr(Builder.getTrue(), Bypass, LoopVectorPreHeader));
- ReplaceInstWithInst(MemCheckBlock->getTerminator(), CondBranch);
LoopBypassBlocks.push_back(MemCheckBlock);
- AddedSafetyChecks = true;
-
- // Update dominator only if this is first RT check.
- if (LoopBypassBlocks.empty()) {
- DT->changeImmediateDominator(Bypass, MemCheckBlock);
- DT->changeImmediateDominator(LoopExitBlock, MemCheckBlock);
- }
- Instruction *FirstCheckInst;
- Instruction *MemRuntimeCheck;
- SCEVExpander Exp(*PSE.getSE(), MemCheckBlock->getModule()->getDataLayout(),
- "induction");
- std::tie(FirstCheckInst, MemRuntimeCheck) = addRuntimeChecks(
- MemCheckBlock->getTerminator(), OrigLoop, RtPtrChecking.getChecks(), Exp);
- assert(MemRuntimeCheck && "no RT checks generated although RtPtrChecking "
- "claimed checks are required");
- CondBranch->setCondition(MemRuntimeCheck);
+ AddedSafetyChecks = true;
// We currently don't use LoopVersioning for the actual loop cloning but we
// still use it to add the noalias metadata.
Legal->getLAI()->getRuntimePointerChecking()->getChecks(), OrigLoop, LI,
DT, PSE.getSE());
LVer->prepareNoAliasMetadata();
+ return MemCheckBlock;
}
Value *InnerLoopVectorizer::emitTransformedIndex(
// Generate the code to check any assumptions that we've made for SCEV
// expressions.
- BasicBlock *SavedPreHeader = LoopVectorPreHeader;
- emitSCEVChecks(Lp, LoopScalarPreHeader);
-
- // If a safety check was generated save it.
- if (SavedPreHeader != LoopVectorPreHeader)
- EPI.SCEVSafetyCheck = SavedPreHeader;
+ EPI.SCEVSafetyCheck = emitSCEVChecks(Lp, LoopScalarPreHeader);
// Generate the code that checks at runtime if arrays overlap. We put the
// checks into a separate block to make the more common case of few elements
// faster.
- SavedPreHeader = LoopVectorPreHeader;
- emitMemRuntimeChecks(Lp, LoopScalarPreHeader);
-
- // If a safety check was generated save/overwite it.
- if (SavedPreHeader != LoopVectorPreHeader)
- EPI.MemSafetyCheck = SavedPreHeader;
+ EPI.MemSafetyCheck = emitMemRuntimeChecks(Lp, LoopScalarPreHeader);
// Generate the iteration count check for the main loop, *after* the check
// for the epilogue loop, so that the path-length is shorter for the case
LVP.setBestPlan(VF.Width, 1);
- InnerLoopVectorizer LB(L, PSE, LI, DT, TLI, TTI, AC, ORE, VF.Width, 1, LVL,
- &CM, BFI, PSI);
- LLVM_DEBUG(dbgs() << "Vectorizing outer loop in \""
- << L->getHeader()->getParent()->getName() << "\"\n");
- LVP.executePlan(LB, DT);
+ {
+ GeneratedRTChecks Checks(*PSE.getSE(), DT, LI,
+ F->getParent()->getDataLayout());
+ InnerLoopVectorizer LB(L, PSE, LI, DT, TLI, TTI, AC, ORE, VF.Width, 1, LVL,
+ &CM, BFI, PSI, Checks);
+ LLVM_DEBUG(dbgs() << "Vectorizing outer loop in \""
+ << L->getHeader()->getParent()->getName() << "\"\n");
+ LVP.executePlan(LB, DT);
+ }
// Mark the loop as already vectorized to avoid vectorizing again.
Hints.setAlreadyVectorized();
-
assert(!verifyFunction(*L->getHeader()->getParent(), &dbgs()));
return true;
}
LLVM_DEBUG(dbgs() << "LV: Interleave Count is " << IC << '\n');
}
- LVP.setBestPlan(VF.Width, IC);
-
- using namespace ore;
bool DisableRuntimeUnroll = false;
MDNode *OrigLoopID = L->getLoopID();
+ {
+ // Optimistically generate runtime checks. Drop them if they turn out to not
+ // be profitable. Limit the scope of Checks, so the cleanup happens
+ // immediately after vector codegeneration is done.
+ GeneratedRTChecks Checks(*PSE.getSE(), DT, LI,
+ F->getParent()->getDataLayout());
+ if (!VF.Width.isScalar() || IC > 1)
+ Checks.Create(L, *LVL.getLAI(), PSE.getUnionPredicate());
+ LVP.setBestPlan(VF.Width, IC);
+
+ using namespace ore;
+ if (!VectorizeLoop) {
+ assert(IC > 1 && "interleave count should not be 1 or 0");
+ // If we decided that it is not legal to vectorize the loop, then
+ // interleave it.
+ InnerLoopUnroller Unroller(L, PSE, LI, DT, TLI, TTI, AC, ORE, IC, &LVL,
+ &CM, BFI, PSI, Checks);
+ LVP.executePlan(Unroller, DT);
- if (!VectorizeLoop) {
- assert(IC > 1 && "interleave count should not be 1 or 0");
- // If we decided that it is not legal to vectorize the loop, then
- // interleave it.
- InnerLoopUnroller Unroller(L, PSE, LI, DT, TLI, TTI, AC, ORE, IC, &LVL, &CM,
- BFI, PSI);
- LVP.executePlan(Unroller, DT);
-
- ORE->emit([&]() {
- return OptimizationRemark(LV_NAME, "Interleaved", L->getStartLoc(),
- L->getHeader())
- << "interleaved loop (interleaved count: "
- << NV("InterleaveCount", IC) << ")";
- });
- } else {
- // If we decided that it is *legal* to vectorize the loop, then do it.
-
- // Consider vectorizing the epilogue too if it's profitable.
- VectorizationFactor EpilogueVF =
- CM.selectEpilogueVectorizationFactor(VF.Width, LVP);
- if (EpilogueVF.Width.isVector()) {
-
- // The first pass vectorizes the main loop and creates a scalar epilogue
- // to be vectorized by executing the plan (potentially with a different
- // factor) again shortly afterwards.
- EpilogueLoopVectorizationInfo EPI(VF.Width.getKnownMinValue(), IC,
- EpilogueVF.Width.getKnownMinValue(), 1);
- EpilogueVectorizerMainLoop MainILV(L, PSE, LI, DT, TLI, TTI, AC, ORE, EPI,
- &LVL, &CM, BFI, PSI);
-
- LVP.setBestPlan(EPI.MainLoopVF, EPI.MainLoopUF);
- LVP.executePlan(MainILV, DT);
- ++LoopsVectorized;
-
- simplifyLoop(L, DT, LI, SE, AC, nullptr, false /* PreserveLCSSA */);
- formLCSSARecursively(*L, *DT, LI, SE);
-
- // Second pass vectorizes the epilogue and adjusts the control flow
- // edges from the first pass.
- LVP.setBestPlan(EPI.EpilogueVF, EPI.EpilogueUF);
- EPI.MainLoopVF = EPI.EpilogueVF;
- EPI.MainLoopUF = EPI.EpilogueUF;
- EpilogueVectorizerEpilogueLoop EpilogILV(L, PSE, LI, DT, TLI, TTI, AC,
- ORE, EPI, &LVL, &CM, BFI, PSI);
- LVP.executePlan(EpilogILV, DT);
- ++LoopsEpilogueVectorized;
-
- if (!MainILV.areSafetyChecksAdded())
- DisableRuntimeUnroll = true;
+ ORE->emit([&]() {
+ return OptimizationRemark(LV_NAME, "Interleaved", L->getStartLoc(),
+ L->getHeader())
+ << "interleaved loop (interleaved count: "
+ << NV("InterleaveCount", IC) << ")";
+ });
} else {
- InnerLoopVectorizer LB(L, PSE, LI, DT, TLI, TTI, AC, ORE, VF.Width, IC,
- &LVL, &CM, BFI, PSI);
- LVP.executePlan(LB, DT);
- ++LoopsVectorized;
-
- // Add metadata to disable runtime unrolling a scalar loop when there are
- // no runtime checks about strides and memory. A scalar loop that is
- // rarely used is not worth unrolling.
- if (!LB.areSafetyChecksAdded())
- DisableRuntimeUnroll = true;
- }
+ // If we decided that it is *legal* to vectorize the loop, then do it.
+
+ // Consider vectorizing the epilogue too if it's profitable.
+ VectorizationFactor EpilogueVF =
+ CM.selectEpilogueVectorizationFactor(VF.Width, LVP);
+ if (EpilogueVF.Width.isVector()) {
+
+ // The first pass vectorizes the main loop and creates a scalar epilogue
+ // to be vectorized by executing the plan (potentially with a different
+ // factor) again shortly afterwards.
+ EpilogueLoopVectorizationInfo EPI(VF.Width.getKnownMinValue(), IC,
+ EpilogueVF.Width.getKnownMinValue(),
+ 1);
+ EpilogueVectorizerMainLoop MainILV(L, PSE, LI, DT, TLI, TTI, AC, ORE,
+ EPI, &LVL, &CM, BFI, PSI, Checks);
+
+ LVP.setBestPlan(EPI.MainLoopVF, EPI.MainLoopUF);
+ LVP.executePlan(MainILV, DT);
+ ++LoopsVectorized;
- // Report the vectorization decision.
- ORE->emit([&]() {
- return OptimizationRemark(LV_NAME, "Vectorized", L->getStartLoc(),
- L->getHeader())
- << "vectorized loop (vectorization width: "
- << NV("VectorizationFactor", VF.Width)
- << ", interleaved count: " << NV("InterleaveCount", IC) << ")";
- });
+ simplifyLoop(L, DT, LI, SE, AC, nullptr, false /* PreserveLCSSA */);
+ formLCSSARecursively(*L, *DT, LI, SE);
+
+ // Second pass vectorizes the epilogue and adjusts the control flow
+ // edges from the first pass.
+ LVP.setBestPlan(EPI.EpilogueVF, EPI.EpilogueUF);
+ EPI.MainLoopVF = EPI.EpilogueVF;
+ EPI.MainLoopUF = EPI.EpilogueUF;
+ EpilogueVectorizerEpilogueLoop EpilogILV(L, PSE, LI, DT, TLI, TTI, AC,
+ ORE, EPI, &LVL, &CM, BFI, PSI,
+ Checks);
+ LVP.executePlan(EpilogILV, DT);
+ ++LoopsEpilogueVectorized;
+
+ if (!MainILV.areSafetyChecksAdded())
+ DisableRuntimeUnroll = true;
+ } else {
+ InnerLoopVectorizer LB(L, PSE, LI, DT, TLI, TTI, AC, ORE, VF.Width, IC,
+ &LVL, &CM, BFI, PSI, Checks);
+ LVP.executePlan(LB, DT);
+ ++LoopsVectorized;
+
+ // Add metadata to disable runtime unrolling a scalar loop when there
+ // are no runtime checks about strides and memory. A scalar loop that is
+ // rarely used is not worth unrolling.
+ if (!LB.areSafetyChecksAdded())
+ DisableRuntimeUnroll = true;
+ }
+ // Report the vectorization decision.
+ ORE->emit([&]() {
+ return OptimizationRemark(LV_NAME, "Vectorized", L->getStartLoc(),
+ L->getHeader())
+ << "vectorized loop (vectorization width: "
+ << NV("VectorizationFactor", VF.Width)
+ << ", interleaved count: " << NV("InterleaveCount", IC) << ")";
+ });
+ }
if (ORE->allowExtraAnalysis(LV_NAME))
checkMixedPrecision(L, ORE);