/// BlockFrequencyInfo, TargetLibraryInfo, Loop, AliasSet information for all
/// instructions of the loop and loop safety information as
/// arguments. Diagnostics is emitted via \p ORE. It returns changed status.
+/// \p CurLoop is a loop to do sinking on. \p OutermostLoop is used only when
+/// this function is called by \p sinkRegionForLoopNest.
bool sinkRegion(DomTreeNode *, AAResults *, LoopInfo *, DominatorTree *,
BlockFrequencyInfo *, TargetLibraryInfo *,
- TargetTransformInfo *, Loop *, AliasSetTracker *,
+ TargetTransformInfo *, Loop *CurLoop, AliasSetTracker *,
MemorySSAUpdater *, ICFLoopSafetyInfo *,
- SinkAndHoistLICMFlags &, OptimizationRemarkEmitter *);
+ SinkAndHoistLICMFlags &, OptimizationRemarkEmitter *,
+ Loop *OutermostLoop = nullptr);
+
+/// Call sinkRegion on loops contained within the specified loop
+/// in order from innermost to outermost.
+bool sinkRegionForLoopNest(DomTreeNode *, AAResults *, LoopInfo *,
+ DominatorTree *, BlockFrequencyInfo *,
+ TargetLibraryInfo *, TargetTransformInfo *, Loop *,
+ AliasSetTracker *, MemorySSAUpdater *,
+ ICFLoopSafetyInfo *, SinkAndHoistLICMFlags &,
+ OptimizationRemarkEmitter *);
/// Walk the specified region of the CFG (defined by all blocks
/// dominated by the specified block, and that are in the current loop) in depth
static bool inSubLoop(BasicBlock *BB, Loop *CurLoop, LoopInfo *LI);
static bool isNotUsedOrFreeInLoop(const Instruction &I, const Loop *CurLoop,
const LoopSafetyInfo *SafetyInfo,
- TargetTransformInfo *TTI, bool &FreeInLoop);
+ TargetTransformInfo *TTI, bool &FreeInLoop,
+ bool LoopNestMode);
static void hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop,
BasicBlock *Dest, ICFLoopSafetyInfo *SafetyInfo,
MemorySSAUpdater *MSSAU, ScalarEvolution *SE,
// instructions, we perform another pass to hoist them out of the loop.
if (L->hasDedicatedExits())
Changed |=
- sinkRegion(DT->getNode(L->getHeader()), AA, LI, DT, BFI, TLI, TTI, L,
- CurAST.get(), MSSAU.get(), &SafetyInfo, *Flags.get(), ORE);
+ LoopNestMode
+ ? sinkRegionForLoopNest(DT->getNode(L->getHeader()), AA, LI, DT,
+ BFI, TLI, TTI, L, CurAST.get(), MSSAU.get(),
+ &SafetyInfo, *Flags.get(), ORE)
+ : sinkRegion(DT->getNode(L->getHeader()), AA, LI, DT, BFI, TLI, TTI,
+ L, CurAST.get(), MSSAU.get(), &SafetyInfo,
+ *Flags.get(), ORE);
Flags->setIsSink(false);
if (Preheader)
Changed |= hoistRegion(DT->getNode(L->getHeader()), AA, LI, DT, BFI, TLI, L,
Loop *CurLoop, AliasSetTracker *CurAST,
MemorySSAUpdater *MSSAU, ICFLoopSafetyInfo *SafetyInfo,
SinkAndHoistLICMFlags &Flags,
- OptimizationRemarkEmitter *ORE) {
+ OptimizationRemarkEmitter *ORE, Loop *OutermostLoop) {
// Verify inputs.
assert(N != nullptr && AA != nullptr && LI != nullptr && DT != nullptr &&
// operands of the instruction are loop invariant.
//
bool FreeInLoop = false;
+ bool LoopNestMode = OutermostLoop != nullptr;
if (!I.mayHaveSideEffects() &&
- isNotUsedOrFreeInLoop(I, CurLoop, SafetyInfo, TTI, FreeInLoop) &&
+ isNotUsedOrFreeInLoop(I, LoopNestMode ? OutermostLoop : CurLoop,
+ SafetyInfo, TTI, FreeInLoop, LoopNestMode) &&
canSinkOrHoistInst(I, AA, DT, CurLoop, CurAST, MSSAU, true, &Flags,
ORE)) {
if (sink(I, LI, DT, BFI, CurLoop, SafetyInfo, MSSAU, ORE)) {
return Changed;
}
+bool llvm::sinkRegionForLoopNest(
+ DomTreeNode *N, AAResults *AA, LoopInfo *LI, DominatorTree *DT,
+ BlockFrequencyInfo *BFI, TargetLibraryInfo *TLI, TargetTransformInfo *TTI,
+ Loop *CurLoop, AliasSetTracker *CurAST, MemorySSAUpdater *MSSAU,
+ ICFLoopSafetyInfo *SafetyInfo, SinkAndHoistLICMFlags &Flags,
+ OptimizationRemarkEmitter *ORE) {
+
+ bool Changed = false;
+ SmallPriorityWorklist<Loop *, 4> Worklist;
+ Worklist.insert(CurLoop);
+ appendLoopsToWorklist(*CurLoop, Worklist);
+ while (!Worklist.empty()) {
+ Loop *L = Worklist.pop_back_val();
+ Changed |=
+ sinkRegion(DT->getNode(L->getHeader()), AA, LI, DT, BFI, TLI, TTI, L,
+ CurAST, MSSAU, SafetyInfo, Flags, ORE, CurLoop);
+ }
+ return Changed;
+}
+
namespace {
// This is a helper class for hoistRegion to make it able to hoist control flow
// in order to be able to hoist phis. The way this works is that we initially
/// (e.g., a GEP can be folded into a load as an addressing mode in the loop).
static bool isNotUsedOrFreeInLoop(const Instruction &I, const Loop *CurLoop,
const LoopSafetyInfo *SafetyInfo,
- TargetTransformInfo *TTI, bool &FreeInLoop) {
+ TargetTransformInfo *TTI, bool &FreeInLoop,
+ bool LoopNestMode) {
const auto &BlockColors = SafetyInfo->getBlockColors();
bool IsFree = isFreeInLoop(I, CurLoop, TTI);
for (const User *U : I.users()) {
if (!BlockColors.empty() &&
BlockColors.find(const_cast<BasicBlock *>(BB))->second.size() != 1)
return false;
+
+ if (LoopNestMode) {
+ while (isa<PHINode>(UI) && UI->hasOneUser() &&
+ UI->getNumOperands() == 1) {
+ if (!CurLoop->contains(UI))
+ break;
+ UI = cast<Instruction>(U->user_back());
+ }
+ }
}
if (CurLoop->contains(UI)) {
--- /dev/null
+; RUN: opt -passes='loop(licm)' -S %s | FileCheck %s --check-prefixes CHECK,LICM
+; RUN: opt -passes='loop(lnicm)' -S %s | FileCheck %s --check-prefixes CHECK,LNICM
+
+; This test represents the following function:
+;
+; double sin(double);
+; int abs(int);
+; double test(double x, int y[10]) {
+; double t = 0; int s = 0;
+; for (int i = 0; i < 10; i++) {
+; for (int j = 0; j < 10; j++) {
+; t = sin(x);
+; s = abs(i);
+; }
+; y[i] = s;
+; }
+; return t;
+; }
+;
+; We only want to sink the call of sin out of the loop nest.
+; LICM also sinks the call of abs out of j-loop, but LNICM doesn't do so
+; to try to make a perfect loop nest. (though y[i] = s; still prevents the
+; loop nest from being a perfect loop nest in this test case)
+
+define dso_local double @test(double %x, i32* noalias %y) {
+entry:
+ br label %for.body
+
+for.body:
+ %i.02 = phi i32 [ 0, %entry ], [ %inc6, %for.end ]
+ br label %for.body3
+
+; CHECK: for.body3:
+; LNICM: call i32 @abs(i32 %i.02)
+; LICM-NOT: call i32 @abs(i32 %i.02)
+for.body3:
+ %j.01 = phi i32 [ 0, %for.body ], [ %inc, %for.body3 ]
+ %call = call double @sin(double %x)
+ %call4 = call i32 @abs(i32 %i.02)
+ %inc = add nsw i32 %j.01, 1
+ %cmp2 = icmp slt i32 %inc, 10
+ br i1 %cmp2, label %for.body3, label %for.end
+
+; CHECK: for.end:
+; LICM: call i32 @abs(i32 %i.02)
+; LNICM-NOT: call i32 @abs(i32 %i.02)
+for.end:
+ %s.1.lcssa = phi i32 [ %call4, %for.body3 ]
+ %t.1.lcssa = phi double [ %call, %for.body3 ]
+ %idxprom = sext i32 %i.02 to i64
+ %arrayidx = getelementptr inbounds i32, i32* %y, i64 %idxprom
+ store i32 %s.1.lcssa, i32* %arrayidx, align 4
+ %inc6 = add nsw i32 %i.02, 1
+ %cmp = icmp slt i32 %inc6, 10
+ br i1 %cmp, label %for.body, label %for.end7
+
+; CHECK: for.end7:
+; CHECK: call double @sin(double %x)
+for.end7:
+ %t.0.lcssa = phi double [ %t.1.lcssa, %for.end ]
+ ret double %t.0.lcssa
+}
+
+declare dso_local double @sin(double) #0
+
+declare dso_local i32 @abs(i32) #0
+
+attributes #0 = { nounwind readnone willreturn }