"invariance in loop using invariant start (default = 8)"));
static bool inSubLoop(BasicBlock *BB, Loop *CurLoop, LoopInfo *LI);
-static bool isNotUsedOrFreeInLoop(const Instruction &I, const Loop *CurLoop,
- const LoopSafetyInfo *SafetyInfo,
- TargetTransformInfo *TTI, bool &FreeInLoop);
+static bool isNotUsedInLoop(const Instruction &I, const Loop *CurLoop,
+ const LoopSafetyInfo *SafetyInfo);
static bool hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop,
const LoopSafetyInfo *SafetyInfo,
OptimizationRemarkEmitter *ORE);
static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT,
const Loop *CurLoop, const LoopSafetyInfo *SafetyInfo,
- OptimizationRemarkEmitter *ORE, bool FreeInLoop);
+ OptimizationRemarkEmitter *ORE);
static bool isSafeToExecuteUnconditionally(Instruction &Inst,
const DominatorTree *DT,
const Loop *CurLoop,
namespace {
struct LoopInvariantCodeMotion {
bool runOnLoop(Loop *L, AliasAnalysis *AA, LoopInfo *LI, DominatorTree *DT,
- TargetLibraryInfo *TLI, TargetTransformInfo *TTI,
- ScalarEvolution *SE, MemorySSA *MSSA,
+ TargetLibraryInfo *TLI, ScalarEvolution *SE, MemorySSA *MSSA,
OptimizationRemarkEmitter *ORE, bool DeleteAST);
DenseMap<Loop *, AliasSetTracker *> &getLoopToAliasSetMap() {
&getAnalysis<LoopInfoWrapperPass>().getLoopInfo(),
&getAnalysis<DominatorTreeWrapperPass>().getDomTree(),
&getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(),
- &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(
- *L->getHeader()->getParent()),
SE ? &SE->getSE() : nullptr, MSSA, &ORE, false);
}
AU.addRequired<TargetLibraryInfoWrapperPass>();
if (EnableMSSALoopDependency)
AU.addRequired<MemorySSAWrapperPass>();
- AU.addRequired<TargetTransformInfoWrapperPass>();
getLoopAnalysisUsage(AU);
}
"cached at a higher level");
LoopInvariantCodeMotion LICM;
- if (!LICM.runOnLoop(&L, &AR.AA, &AR.LI, &AR.DT, &AR.TLI, &AR.TTI, &AR.SE,
- AR.MSSA, ORE, true))
+ if (!LICM.runOnLoop(&L, &AR.AA, &AR.LI, &AR.DT, &AR.TLI, &AR.SE, AR.MSSA, ORE,
+ true))
return PreservedAnalyses::all();
auto PA = getLoopPassPreservedAnalyses();
false, false)
INITIALIZE_PASS_DEPENDENCY(LoopPass)
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass)
INITIALIZE_PASS_END(LegacyLICMPass, "licm", "Loop Invariant Code Motion", false,
false)
/// We should delete AST for inner loops in the new pass manager to avoid
/// memory leak.
///
-bool LoopInvariantCodeMotion::runOnLoop(
- Loop *L, AliasAnalysis *AA, LoopInfo *LI, DominatorTree *DT,
- TargetLibraryInfo *TLI, TargetTransformInfo *TTI, ScalarEvolution *SE,
- MemorySSA *MSSA, OptimizationRemarkEmitter *ORE, bool DeleteAST) {
+bool LoopInvariantCodeMotion::runOnLoop(Loop *L, AliasAnalysis *AA,
+ LoopInfo *LI, DominatorTree *DT,
+ TargetLibraryInfo *TLI,
+ ScalarEvolution *SE, MemorySSA *MSSA,
+ OptimizationRemarkEmitter *ORE,
+ bool DeleteAST) {
bool Changed = false;
assert(L->isLCSSAForm(*DT) && "Loop is not in LCSSA form.");
// instructions, we perform another pass to hoist them out of the loop.
//
if (L->hasDedicatedExits())
- Changed |= sinkRegion(DT->getNode(L->getHeader()), AA, LI, DT, TLI, TTI, L,
+ Changed |= sinkRegion(DT->getNode(L->getHeader()), AA, LI, DT, TLI, L,
CurAST, &SafetyInfo, ORE);
if (Preheader)
Changed |= hoistRegion(DT->getNode(L->getHeader()), AA, LI, DT, TLI, L,
/// definitions, allowing us to sink a loop body in one pass without iteration.
///
bool llvm::sinkRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI,
- DominatorTree *DT, TargetLibraryInfo *TLI,
- TargetTransformInfo *TTI, Loop *CurLoop,
+ DominatorTree *DT, TargetLibraryInfo *TLI, Loop *CurLoop,
AliasSetTracker *CurAST, LoopSafetyInfo *SafetyInfo,
OptimizationRemarkEmitter *ORE) {
// outside of the loop. In this case, it doesn't even matter if the
// operands of the instruction are loop invariant.
//
- bool FreeInLoop = false;
- if (isNotUsedOrFreeInLoop(I, CurLoop, SafetyInfo, TTI, FreeInLoop) &&
+ if (isNotUsedInLoop(I, CurLoop, SafetyInfo) &&
canSinkOrHoistInst(I, AA, DT, CurLoop, CurAST, SafetyInfo, ORE)) {
- if (sink(I, LI, DT, CurLoop, SafetyInfo, ORE, FreeInLoop)) {
- if (!FreeInLoop) {
- ++II;
- CurAST->deleteValue(&I);
- I.eraseFromParent();
- }
+ if (sink(I, LI, DT, CurLoop, SafetyInfo, ORE)) {
+ ++II;
+ CurAST->deleteValue(&I);
+ I.eraseFromParent();
Changed = true;
}
}
return true;
}
-/// Return true if the instruction is free in the loop.
-static bool isFreeInLoop(const Instruction &I, const Loop *CurLoop,
- const TargetTransformInfo *TTI) {
- if (const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(&I)) {
- if (TTI->getUserCost(&I) != TargetTransformInfo::TCC_Free)
- return false;
- // For a GEP, we cannot simply use getUserCost because currently it
- // optimistically assume that a GEP will fold into addressing mode
- // regardless of its users.
- const BasicBlock *BB = I.getParent();
- for (const User *U : I.users()) {
- const Instruction *UI = cast<Instruction>(U);
- if (CurLoop->contains(UI) &&
- (BB != UI->getParent() ||
- (!isa<StoreInst>(UI) && !isa<LoadInst>(UI))))
- return false;
- }
- return true;
- } else
- return TTI->getUserCost(&I) == TargetTransformInfo::TCC_Free;
-}
-
/// Return true if the only users of this instruction are outside of
/// the loop. If this is true, we can sink the instruction to the exit
/// blocks of the loop.
///
-/// We also return true if the instruction could be folded away in lowering.
-/// (e.g., a GEP can be folded into a load as an addressing mode in the loop).
-static bool isNotUsedOrFreeInLoop(const Instruction &I, const Loop *CurLoop,
- const LoopSafetyInfo *SafetyInfo,
- TargetTransformInfo *TTI, bool &FreeInLoop) {
+static bool isNotUsedInLoop(const Instruction &I, const Loop *CurLoop,
+ const LoopSafetyInfo *SafetyInfo) {
const auto &BlockColors = SafetyInfo->BlockColors;
- bool IsFree = isFreeInLoop(I, CurLoop, TTI);
for (const User *U : I.users()) {
const Instruction *UI = cast<Instruction>(U);
if (const PHINode *PN = dyn_cast<PHINode>(UI)) {
return false;
}
- if (CurLoop->contains(UI)) {
- if (IsFree) {
- FreeInLoop = true;
- continue;
- }
+ if (CurLoop->contains(UI))
return false;
- }
}
return true;
}
///
static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT,
const Loop *CurLoop, const LoopSafetyInfo *SafetyInfo,
- OptimizationRemarkEmitter *ORE, bool FreeInLoop) {
+ OptimizationRemarkEmitter *ORE) {
DEBUG(dbgs() << "LICM sinking instruction: " << I << "\n");
ORE->emit([&]() {
return OptimizationRemark(DEBUG_TYPE, "InstSunk", &I)
else if (isa<CallInst>(I))
++NumMovedCalls;
++NumSunk;
+ Changed = true;
// Iterate over users to be ready for actual sinking. Replace users via
// unrechable blocks with undef and make all user PHIs trivially replcable.
Use &U = UI.getUse();
++UI;
- if (VisitedUsers.count(User) || CurLoop->contains(User))
+ if (VisitedUsers.count(User))
continue;
if (!DT->isReachableFromEntry(User->getParent())) {
U = UndefValue::get(I.getType());
- Changed = true;
continue;
}
BasicBlock *BB = PN->getIncomingBlock(U);
if (!DT->isReachableFromEntry(BB)) {
U = UndefValue::get(I.getType());
- Changed = true;
continue;
}
continue;
if (!canSplitPredecessors(PN))
- return Changed;
+ return false;
// Split predecessors of the PHI so that we can make users trivially
// replacable.
UE = I.user_end();
}
- if (VisitedUsers.empty())
- return Changed;
-
#ifndef NDEBUG
SmallVector<BasicBlock *, 32> ExitBlocks;
CurLoop->getUniqueExitBlocks(ExitBlocks);
// If this instruction is only used outside of the loop, then all users are
// PHI nodes in exit blocks due to LCSSA form. Just RAUW them with clones of
// the instruction.
- SmallSetVector<User*, 8> Users(I.user_begin(), I.user_end());
- for (auto *UI : Users) {
- auto *User = cast<Instruction>(UI);
-
- if (CurLoop->contains(User))
- continue;
-
- PHINode *PN = cast<PHINode>(User);
+ while (!I.use_empty()) {
+ Value::user_iterator UI = I.user_begin();
+ PHINode *PN = cast<PHINode>(*UI);
assert(ExitBlockSet.count(PN->getParent()) &&
"The LCSSA PHI is not in an exit block!");
// The PHI must be trivially replacable.
SafetyInfo, CurLoop);
PN->replaceAllUsesWith(New);
PN->eraseFromParent();
- Changed = true;
}
return Changed;
}
+++ /dev/null
-; RUN: opt < %s -licm -S | FileCheck %s
-target triple = "aarch64--linux-gnueabi"
-
-; CHECK-LABEL:@test1
-; CHECK-LABEL:loopexit1:
-; CHECK: %[[PHI:.+]] = phi i8** [ %arrayidx0, %if.end ]
-; CHECK: getelementptr inbounds i8*, i8** %[[PHI]], i64 1
-define i8** @test1(i32 %j, i8** readonly %P, i8* readnone %Q) {
-entry:
- %cmp0 = icmp slt i32 0, %j
- br i1 %cmp0, label %for.body.lr.ph, label %return
-
-for.body.lr.ph:
- br label %for.body
-
-for.body:
- %P.addr = phi i8** [ %P, %for.body.lr.ph ], [ %arrayidx0, %if.end ]
- %i0 = phi i32 [ 0, %for.body.lr.ph ], [ %i.add, %if.end]
-
- %i0.ext = sext i32 %i0 to i64
- %arrayidx0 = getelementptr inbounds i8*, i8** %P.addr, i64 %i0.ext
- %l0 = load i8*, i8** %arrayidx0, align 8
- %cmp1 = icmp ugt i8* %l0, %Q
- br i1 %cmp1, label %loopexit0, label %if.end
-
-if.end: ; preds = %for.body
- %arrayidx1 = getelementptr inbounds i8*, i8** %arrayidx0, i64 1
- %l1 = load i8*, i8** %arrayidx1, align 8
- %cmp4 = icmp ugt i8* %l1, %Q
- %i.add = add nsw i32 %i0, 2
- br i1 %cmp4, label %loopexit1, label %for.body
-
-loopexit0:
- %p1 = phi i8** [%arrayidx0, %for.body]
- br label %return
-
-loopexit1:
- %p2 = phi i8** [%arrayidx1, %if.end]
- br label %return
-
-return:
- %retval.0 = phi i8** [ %p1, %loopexit0 ], [%p2, %loopexit1], [ null, %entry ]
- ret i8** %retval.0
-}
-
-; CHECK-LABEL: @test2
-; CHECK-LABEL: loopexit2:
-; CHECK: %[[PHI:.*]] = phi i8** [ %add.ptr, %if.end ]
-; CHECK: getelementptr inbounds i8*, i8** %[[PHI]]
-define i8** @test2(i32 %j, i8** readonly %P, i8* readnone %Q) {
-
-entry:
- br label %for.body
-
-for.cond:
- %i.addr.0 = phi i32 [ %add, %if.end ]
- %P.addr.0 = phi i8** [ %add.ptr, %if.end ]
- %cmp = icmp slt i32 %i.addr.0, %j
- br i1 %cmp, label %for.body, label %loopexit0
-
-for.body:
- %P.addr = phi i8** [ %P, %entry ], [ %P.addr.0, %for.cond ]
- %i.addr = phi i32 [ 0, %entry ], [ %i.addr.0, %for.cond ]
-
- %idx.ext = sext i32 %i.addr to i64
- %add.ptr = getelementptr inbounds i8*, i8** %P.addr, i64 %idx.ext
- %l0 = load i8*, i8** %add.ptr, align 8
-
- %cmp1 = icmp ugt i8* %l0, %Q
- br i1 %cmp1, label %loopexit1, label %if.end
-
-if.end:
- %add.i = add i32 %i.addr, 1
- %idx2.ext = sext i32 %add.i to i64
- %arrayidx2 = getelementptr inbounds i8*, i8** %add.ptr, i64 %idx2.ext
- %l1 = load i8*, i8** %arrayidx2, align 8
- %cmp2 = icmp ugt i8* %l1, %Q
- %add = add nsw i32 %add.i, 1
- br i1 %cmp2, label %loopexit2, label %for.cond
-
-loopexit0:
- %p0 = phi i8** [ null, %for.cond ]
- br label %return
-
-loopexit1:
- %p1 = phi i8** [ %add.ptr, %for.body ]
- br label %return
-
-loopexit2:
- %p2 = phi i8** [ %arrayidx2, %if.end ]
- br label %return
-
-return:
- %retval.0 = phi i8** [ %p1, %loopexit1 ], [ %p2, %loopexit2 ], [ %p0, %loopexit0 ]
- ret i8** %retval.0
-}
-
-
-; CHECK-LABEL: @test3
-; CHECK-LABEL: loopexit1:
-; CHECK: %[[ADD:.*]] = phi i64 [ %add, %if.end ]
-; CHECK: %[[ADDR:.*]] = phi i8** [ %P.addr, %if.end ]
-; CHECK: %[[TRUNC:.*]] = trunc i64 %[[ADD]] to i32
-; CHECK: getelementptr inbounds i8*, i8** %[[ADDR]], i32 %[[TRUNC]]
-; CHECK: call void @dummy(i32 %[[TRUNC]])
-define i8** @test3(i64 %j, i8** readonly %P, i8* readnone %Q) {
-entry:
- %cmp0 = icmp slt i64 0, %j
- br i1 %cmp0, label %for.body.lr.ph, label %return
-
-for.body.lr.ph:
- br label %for.body
-
-for.body:
- %P.addr = phi i8** [ %P, %for.body.lr.ph ], [ %arrayidx0, %if.end ]
- %i0 = phi i32 [ 0, %for.body.lr.ph ], [ %i.add, %if.end]
-
- %i0.ext = sext i32 %i0 to i64
- %arrayidx0 = getelementptr inbounds i8*, i8** %P.addr, i64 %i0.ext
- %l0 = load i8*, i8** %arrayidx0, align 8
- %cmp1 = icmp ugt i8* %l0, %Q
- br i1 %cmp1, label %loopexit0, label %if.end
-
-if.end: ; preds = %for.body
- %add = add i64 %i0.ext, 1
- %trunc = trunc i64 %add to i32
- %arrayidx1 = getelementptr inbounds i8*, i8** %P.addr, i32 %trunc
- %l1 = load i8*, i8** %arrayidx1, align 8
- %cmp4 = icmp ugt i8* %l1, %Q
- %i.add = add nsw i32 %i0, 2
- br i1 %cmp4, label %loopexit1, label %for.body
-
-loopexit0:
- %p1 = phi i8** [%arrayidx0, %for.body]
- br label %return
-
-loopexit1:
- %p2 = phi i8** [%arrayidx1, %if.end]
- call void @dummy(i32 %trunc)
- br label %return
-
-return:
- %retval.0 = phi i8** [ %p1, %loopexit0 ], [%p2, %loopexit1], [ null, %entry ]
- ret i8** %retval.0
-}
-
-declare void @dummy(i32)