From: maekawatoshiki Date: Thu, 12 Aug 2021 15:53:03 +0000 (+0900) Subject: [LICM] Support sinking in LNICM X-Git-Tag: upstream/15.0.7~34062 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=dd3eea65662a675b78ac2319fd675a22345f5ca3;p=platform%2Fupstream%2Fllvm.git [LICM] Support sinking in LNICM Currently, LNICM pass does not support sinking instructions out of loop nest. This patch enables LNICM to sink down as many instructions to the exit block of outermost loop as possible. Reviewed By: Whitney Differential Revision: https://reviews.llvm.org/D107219 --- diff --git a/llvm/include/llvm/Transforms/Utils/LoopUtils.h b/llvm/include/llvm/Transforms/Utils/LoopUtils.h index 247b911..9428e2d 100644 --- a/llvm/include/llvm/Transforms/Utils/LoopUtils.h +++ b/llvm/include/llvm/Transforms/Utils/LoopUtils.h @@ -147,11 +147,23 @@ protected: /// BlockFrequencyInfo, TargetLibraryInfo, Loop, AliasSet information for all /// instructions of the loop and loop safety information as /// arguments. Diagnostics is emitted via \p ORE. It returns changed status. +/// \p CurLoop is a loop to do sinking on. \p OutermostLoop is used only when +/// this function is called by \p sinkRegionForLoopNest. bool sinkRegion(DomTreeNode *, AAResults *, LoopInfo *, DominatorTree *, BlockFrequencyInfo *, TargetLibraryInfo *, - TargetTransformInfo *, Loop *, AliasSetTracker *, + TargetTransformInfo *, Loop *CurLoop, AliasSetTracker *, MemorySSAUpdater *, ICFLoopSafetyInfo *, - SinkAndHoistLICMFlags &, OptimizationRemarkEmitter *); + SinkAndHoistLICMFlags &, OptimizationRemarkEmitter *, + Loop *OutermostLoop = nullptr); + +/// Call sinkRegion on loops contained within the specified loop +/// in order from innermost to outermost. +bool sinkRegionForLoopNest(DomTreeNode *, AAResults *, LoopInfo *, + DominatorTree *, BlockFrequencyInfo *, + TargetLibraryInfo *, TargetTransformInfo *, Loop *, + AliasSetTracker *, MemorySSAUpdater *, + ICFLoopSafetyInfo *, SinkAndHoistLICMFlags &, + OptimizationRemarkEmitter *); /// Walk the specified region of the CFG (defined by all blocks /// dominated by the specified block, and that are in the current loop) in depth diff --git a/llvm/lib/Transforms/Scalar/LICM.cpp b/llvm/lib/Transforms/Scalar/LICM.cpp index 30058df..33c5abf 100644 --- a/llvm/lib/Transforms/Scalar/LICM.cpp +++ b/llvm/lib/Transforms/Scalar/LICM.cpp @@ -151,7 +151,8 @@ cl::opt llvm::SetLicmMssaNoAccForPromotionCap( static bool inSubLoop(BasicBlock *BB, Loop *CurLoop, LoopInfo *LI); static bool isNotUsedOrFreeInLoop(const Instruction &I, const Loop *CurLoop, const LoopSafetyInfo *SafetyInfo, - TargetTransformInfo *TTI, bool &FreeInLoop); + TargetTransformInfo *TTI, bool &FreeInLoop, + bool LoopNestMode); static void hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop, BasicBlock *Dest, ICFLoopSafetyInfo *SafetyInfo, MemorySSAUpdater *MSSAU, ScalarEvolution *SE, @@ -436,8 +437,13 @@ bool LoopInvariantCodeMotion::runOnLoop( // instructions, we perform another pass to hoist them out of the loop. if (L->hasDedicatedExits()) Changed |= - sinkRegion(DT->getNode(L->getHeader()), AA, LI, DT, BFI, TLI, TTI, L, - CurAST.get(), MSSAU.get(), &SafetyInfo, *Flags.get(), ORE); + LoopNestMode + ? sinkRegionForLoopNest(DT->getNode(L->getHeader()), AA, LI, DT, + BFI, TLI, TTI, L, CurAST.get(), MSSAU.get(), + &SafetyInfo, *Flags.get(), ORE) + : sinkRegion(DT->getNode(L->getHeader()), AA, LI, DT, BFI, TLI, TTI, + L, CurAST.get(), MSSAU.get(), &SafetyInfo, + *Flags.get(), ORE); Flags->setIsSink(false); if (Preheader) Changed |= hoistRegion(DT->getNode(L->getHeader()), AA, LI, DT, BFI, TLI, L, @@ -555,7 +561,7 @@ bool llvm::sinkRegion(DomTreeNode *N, AAResults *AA, LoopInfo *LI, Loop *CurLoop, AliasSetTracker *CurAST, MemorySSAUpdater *MSSAU, ICFLoopSafetyInfo *SafetyInfo, SinkAndHoistLICMFlags &Flags, - OptimizationRemarkEmitter *ORE) { + OptimizationRemarkEmitter *ORE, Loop *OutermostLoop) { // Verify inputs. assert(N != nullptr && AA != nullptr && LI != nullptr && DT != nullptr && @@ -598,8 +604,10 @@ bool llvm::sinkRegion(DomTreeNode *N, AAResults *AA, LoopInfo *LI, // operands of the instruction are loop invariant. // bool FreeInLoop = false; + bool LoopNestMode = OutermostLoop != nullptr; if (!I.mayHaveSideEffects() && - isNotUsedOrFreeInLoop(I, CurLoop, SafetyInfo, TTI, FreeInLoop) && + isNotUsedOrFreeInLoop(I, LoopNestMode ? OutermostLoop : CurLoop, + SafetyInfo, TTI, FreeInLoop, LoopNestMode) && canSinkOrHoistInst(I, AA, DT, CurLoop, CurAST, MSSAU, true, &Flags, ORE)) { if (sink(I, LI, DT, BFI, CurLoop, SafetyInfo, MSSAU, ORE)) { @@ -618,6 +626,26 @@ bool llvm::sinkRegion(DomTreeNode *N, AAResults *AA, LoopInfo *LI, return Changed; } +bool llvm::sinkRegionForLoopNest( + DomTreeNode *N, AAResults *AA, LoopInfo *LI, DominatorTree *DT, + BlockFrequencyInfo *BFI, TargetLibraryInfo *TLI, TargetTransformInfo *TTI, + Loop *CurLoop, AliasSetTracker *CurAST, MemorySSAUpdater *MSSAU, + ICFLoopSafetyInfo *SafetyInfo, SinkAndHoistLICMFlags &Flags, + OptimizationRemarkEmitter *ORE) { + + bool Changed = false; + SmallPriorityWorklist Worklist; + Worklist.insert(CurLoop); + appendLoopsToWorklist(*CurLoop, Worklist); + while (!Worklist.empty()) { + Loop *L = Worklist.pop_back_val(); + Changed |= + sinkRegion(DT->getNode(L->getHeader()), AA, LI, DT, BFI, TLI, TTI, L, + CurAST, MSSAU, SafetyInfo, Flags, ORE, CurLoop); + } + return Changed; +} + namespace { // This is a helper class for hoistRegion to make it able to hoist control flow // in order to be able to hoist phis. The way this works is that we initially @@ -1443,7 +1471,8 @@ static bool isFreeInLoop(const Instruction &I, const Loop *CurLoop, /// (e.g., a GEP can be folded into a load as an addressing mode in the loop). static bool isNotUsedOrFreeInLoop(const Instruction &I, const Loop *CurLoop, const LoopSafetyInfo *SafetyInfo, - TargetTransformInfo *TTI, bool &FreeInLoop) { + TargetTransformInfo *TTI, bool &FreeInLoop, + bool LoopNestMode) { const auto &BlockColors = SafetyInfo->getBlockColors(); bool IsFree = isFreeInLoop(I, CurLoop, TTI); for (const User *U : I.users()) { @@ -1460,6 +1489,15 @@ static bool isNotUsedOrFreeInLoop(const Instruction &I, const Loop *CurLoop, if (!BlockColors.empty() && BlockColors.find(const_cast(BB))->second.size() != 1) return false; + + if (LoopNestMode) { + while (isa(UI) && UI->hasOneUser() && + UI->getNumOperands() == 1) { + if (!CurLoop->contains(UI)) + break; + UI = cast(U->user_back()); + } + } } if (CurLoop->contains(UI)) { diff --git a/llvm/test/Transforms/LICM/lnicm-sink.ll b/llvm/test/Transforms/LICM/lnicm-sink.ll new file mode 100644 index 0000000..0dc47e5 --- /dev/null +++ b/llvm/test/Transforms/LICM/lnicm-sink.ll @@ -0,0 +1,68 @@ +; RUN: opt -passes='loop(licm)' -S %s | FileCheck %s --check-prefixes CHECK,LICM +; RUN: opt -passes='loop(lnicm)' -S %s | FileCheck %s --check-prefixes CHECK,LNICM + +; This test represents the following function: +; +; double sin(double); +; int abs(int); +; double test(double x, int y[10]) { +; double t = 0; int s = 0; +; for (int i = 0; i < 10; i++) { +; for (int j = 0; j < 10; j++) { +; t = sin(x); +; s = abs(i); +; } +; y[i] = s; +; } +; return t; +; } +; +; We only want to sink the call of sin out of the loop nest. +; LICM also sinks the call of abs out of j-loop, but LNICM doesn't do so +; to try to make a perfect loop nest. (though y[i] = s; still prevents the +; loop nest from being a perfect loop nest in this test case) + +define dso_local double @test(double %x, i32* noalias %y) { +entry: + br label %for.body + +for.body: + %i.02 = phi i32 [ 0, %entry ], [ %inc6, %for.end ] + br label %for.body3 + +; CHECK: for.body3: +; LNICM: call i32 @abs(i32 %i.02) +; LICM-NOT: call i32 @abs(i32 %i.02) +for.body3: + %j.01 = phi i32 [ 0, %for.body ], [ %inc, %for.body3 ] + %call = call double @sin(double %x) + %call4 = call i32 @abs(i32 %i.02) + %inc = add nsw i32 %j.01, 1 + %cmp2 = icmp slt i32 %inc, 10 + br i1 %cmp2, label %for.body3, label %for.end + +; CHECK: for.end: +; LICM: call i32 @abs(i32 %i.02) +; LNICM-NOT: call i32 @abs(i32 %i.02) +for.end: + %s.1.lcssa = phi i32 [ %call4, %for.body3 ] + %t.1.lcssa = phi double [ %call, %for.body3 ] + %idxprom = sext i32 %i.02 to i64 + %arrayidx = getelementptr inbounds i32, i32* %y, i64 %idxprom + store i32 %s.1.lcssa, i32* %arrayidx, align 4 + %inc6 = add nsw i32 %i.02, 1 + %cmp = icmp slt i32 %inc6, 10 + br i1 %cmp, label %for.body, label %for.end7 + +; CHECK: for.end7: +; CHECK: call double @sin(double %x) +for.end7: + %t.0.lcssa = phi double [ %t.1.lcssa, %for.end ] + ret double %t.0.lcssa +} + +declare dso_local double @sin(double) #0 + +declare dso_local i32 @abs(i32) #0 + +attributes #0 = { nounwind readnone willreturn }