#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/PostDominators.h"
#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
using namespace llvm;
return CurrentDepth;
}
+const BasicBlock &LoopNest::skipEmptyBlockUntil(const BasicBlock *From,
+ const BasicBlock *End) {
+ assert(From && "Expecting valid From");
+ assert(End && "Expecting valid End");
+
+ if (From == End || !From->getSingleSuccessor())
+ return *From;
+
+ auto IsEmpty = [](const BasicBlock *BB) {
+ return (BB->getInstList().size() == 1);
+ };
+
+ // Visited is used to avoid running into an infinite loop.
+ SmallPtrSet<const BasicBlock *, 4> Visited;
+ const BasicBlock *BB = From->getSingleSuccessor();
+ const BasicBlock *PredBB = BB;
+ while (BB && BB != End && IsEmpty(BB) && !Visited.count(BB)) {
+ Visited.insert(BB);
+ PredBB = BB;
+ BB = BB->getSingleSuccessor();
+ }
+
+ return (BB == End) ? *End : *PredBB;
+}
+
static bool checkLoopsStructure(const Loop &OuterLoop, const Loop &InnerLoop,
ScalarEvolution &SE) {
// The inner loop must be the only outer loop's child.
// Ensure the only branch that may exist between the loops is the inner loop
// guard.
if (OuterLoopHeader != InnerLoopPreHeader) {
- const BranchInst *BI =
- dyn_cast<BranchInst>(OuterLoopHeader->getTerminator());
-
- if (!BI || BI != InnerLoop.getLoopGuardBranch())
- return false;
-
- bool InnerLoopExitContainsLCSSA = ContainsLCSSAPhi(*InnerLoopExit);
-
- // The successors of the inner loop guard should be the inner loop
- // preheader and the outer loop latch.
- for (const BasicBlock *Succ : BI->successors()) {
- if (Succ == InnerLoopPreHeader)
- continue;
- if (Succ == OuterLoopLatch)
- continue;
-
- // If `InnerLoopExit` contains LCSSA Phi instructions, additional block
- // may be inserted before the `OuterLoopLatch` to which `BI` jumps. The
- // loops are still considered perfectly nested if the extra block only
- // contains Phi instructions from InnerLoopExit and OuterLoopHeader.
- if (InnerLoopExitContainsLCSSA && IsExtraPhiBlock(*Succ) &&
- Succ->getSingleSuccessor() == OuterLoopLatch) {
- // Points to the extra block so that we can reference it later in the
- // final check. We can also conclude that the inner loop is
- // guarded and there exists LCSSA Phi node in the exit block later if we
- // see a non-null `ExtraPhiBlock`.
- ExtraPhiBlock = Succ;
- continue;
- }
+ const BasicBlock &SingleSucc =
+ LoopNest::skipEmptyBlockUntil(OuterLoopHeader, InnerLoopPreHeader);
- DEBUG_WITH_TYPE(VerboseDebug, {
- dbgs() << "Inner loop guard successor " << Succ->getName()
- << " doesn't lead to inner loop preheader or "
- "outer loop latch.\n";
- });
- return false;
+ // no conditional branch present
+ if (&SingleSucc != InnerLoopPreHeader) {
+ const BranchInst *BI = dyn_cast<BranchInst>(SingleSucc.getTerminator());
+
+ if (!BI || BI != InnerLoop.getLoopGuardBranch())
+ return false;
+
+ bool InnerLoopExitContainsLCSSA = ContainsLCSSAPhi(*InnerLoopExit);
+
+ // The successors of the inner loop guard should be the inner loop
+ // preheader or the outer loop latch possibly through empty blocks.
+ for (const BasicBlock *Succ : BI->successors()) {
+ const BasicBlock *PotentialInnerPreHeader = Succ;
+ const BasicBlock *PotentialOuterLatch = Succ;
+
+ // Ensure the inner loop guard successor is empty before skipping
+ // blocks.
+ if (Succ->getInstList().size() == 1) {
+ PotentialInnerPreHeader =
+ &LoopNest::skipEmptyBlockUntil(Succ, InnerLoopPreHeader);
+ PotentialOuterLatch =
+ &LoopNest::skipEmptyBlockUntil(Succ, OuterLoopLatch);
+ }
+
+ if (PotentialInnerPreHeader == InnerLoopPreHeader)
+ continue;
+ if (PotentialOuterLatch == OuterLoopLatch)
+ continue;
+
+ // If `InnerLoopExit` contains LCSSA Phi instructions, additional block
+ // may be inserted before the `OuterLoopLatch` to which `BI` jumps. The
+ // loops are still considered perfectly nested if the extra block only
+ // contains Phi instructions from InnerLoopExit and OuterLoopHeader.
+ if (InnerLoopExitContainsLCSSA && IsExtraPhiBlock(*Succ) &&
+ Succ->getSingleSuccessor() == OuterLoopLatch) {
+ // Points to the extra block so that we can reference it later in the
+ // final check. We can also conclude that the inner loop is
+ // guarded and there exists LCSSA Phi node in the exit block later if
+ // we see a non-null `ExtraPhiBlock`.
+ ExtraPhiBlock = Succ;
+ continue;
+ }
+
+ DEBUG_WITH_TYPE(VerboseDebug, {
+ dbgs() << "Inner loop guard successor " << Succ->getName()
+ << " doesn't lead to inner loop preheader or "
+ "outer loop latch.\n";
+ });
+ return false;
+ }
}
}
- // Ensure the inner loop exit block leads to the outer loop latch.
- const BasicBlock *SuccInner = InnerLoopExit->getSingleSuccessor();
- if (!SuccInner ||
- (SuccInner != OuterLoopLatch && SuccInner != ExtraPhiBlock)) {
+ // Ensure the inner loop exit block lead to the outer loop latch possibly
+ // through empty blocks.
+ const BasicBlock &SuccInner =
+ LoopNest::skipEmptyBlockUntil(InnerLoop.getExitBlock(), OuterLoopLatch);
+ if (&SuccInner != OuterLoopLatch && &SuccInner != ExtraPhiBlock) {
DEBUG_WITH_TYPE(
VerboseDebug,
dbgs() << "Inner loop exit block " << *InnerLoopExit
ret void
}
+define void @perf_nest_2D_3(i32** %y, i32** %x, i64 signext %nx, i64 signext %ny) {
+; CHECK-LABEL: IsPerfect=true, Depth=1, OutermostLoop: perf_nest_2D_3_loop_j, Loops: ( perf_nest_2D_3_loop_j )
+; CHECK-LABEL: IsPerfect=true, Depth=2, OutermostLoop: perf_nest_2D_3_loop_i, Loops: ( perf_nest_2D_3_loop_i perf_nest_2D_3_loop_j )
+entry:
+ br label %perf_nest_2D_3_loop_i
+
+perf_nest_2D_3_loop_i:
+ %i = phi i64 [ 0, %entry ], [ %inc13, %inc_i ]
+ %cmp21 = icmp slt i64 0, %ny
+ br label %singleSucc
+
+singleSucc:
+ br i1 %cmp21, label %preheader.j, label %for.end
+
+preheader.j:
+ br label %perf_nest_2D_3_loop_j
+
+perf_nest_2D_3_loop_j:
+ %j = phi i64 [ 0, %preheader.j ], [ %inc, %inc_j ]
+ %arrayidx = getelementptr inbounds i32*, i32** %x, i64 %j
+ %0 = load i32*, i32** %arrayidx, align 8
+ %arrayidx6 = getelementptr inbounds i32, i32* %0, i64 %j
+ %1 = load i32, i32* %arrayidx6, align 4
+ %arrayidx8 = getelementptr inbounds i32*, i32** %y, i64 %j
+ %2 = load i32*, i32** %arrayidx8, align 8
+ %arrayidx11 = getelementptr inbounds i32, i32* %2, i64 %i
+ store i32 %1, i32* %arrayidx11, align 4
+ br label %inc_j
+
+inc_j:
+ %inc = add nsw i64 %j, 1
+ %cmp2 = icmp slt i64 %inc, %ny
+ br i1 %cmp2, label %perf_nest_2D_3_loop_j, label %for.exit
+
+for.exit:
+ br label %for.end
+
+for.end:
+ br label %inc_i
+
+inc_i:
+ %inc13 = add nsw i64 %i, 1
+ %cmp = icmp slt i64 %inc13, %nx
+ br i1 %cmp, label %perf_nest_2D_3_loop_i, label %perf_nest_2D_3_loop_i_end
+
+perf_nest_2D_3_loop_i_end:
+ ret void
+}
+
; Test a perfect 3-dim loop nest of the form:
; for (i=0; i<nx; ++i)
; for (j=0; j<ny; ++j)