bool UpperBound;
/// Allow peeling off loop iterations.
bool AllowPeeling;
+ /// Allow peeling off loop iterations for loop nests.
+ bool AllowLoopNestsPeeling;
/// Allow unrolling of all the iterations of the runtime loop remainder.
bool UnrollRemainder;
/// Allow unroll and jam. Used to enable unroll and jam for the target.
#include "llvm/Analysis/DemandedBits.h"
#include "llvm/Analysis/EHPersonalities.h"
#include "llvm/Analysis/IVDescriptors.h"
+#include "llvm/Analysis/LoopPass.h"
#include "llvm/Analysis/MustExecute.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Operator.h"
#include "llvm/IR/ValueHandle.h"
#include "llvm/Support/Casting.h"
+#include "llvm/Transforms/Utils/ValueMapper.h"
namespace llvm {
/// already reversed loops in LI.
/// FIXME: Consider changing the order in LoopInfo.
void appendLoopsToWorklist(LoopInfo &, SmallPriorityWorklist<Loop *, 4> &);
+
+/// Recursively clone the specified loop and all of its children,
+/// mapping the blocks with the specified map.
+Loop *cloneLoop(Loop *L, Loop *PL, ValueToValueMapTy &VM,
+ LoopInfo *LI, LPPassManager *LPM);
+
} // end namespace llvm
#endif // LLVM_TRANSFORMS_UTILS_LOOPUTILS_H
cl::desc("Allows loops to be peeled when the dynamic "
"trip count is known to be low."));
+static cl::opt<bool> UnrollAllowLoopNestsPeeling(
+ "unroll-allow-loop-nests-peeling", cl::init(false), cl::Hidden,
+ cl::desc("Allows loop nests to be peeled."));
+
static cl::opt<bool> UnrollUnrollRemainder(
"unroll-remainder", cl::Hidden,
cl::desc("Allow the loop remainder to be unrolled."));
UP.Force = false;
UP.UpperBound = false;
UP.AllowPeeling = true;
+ UP.AllowLoopNestsPeeling = false;
UP.UnrollAndJam = false;
UP.PeelProfiledIterations = true;
UP.UnrollAndJamInnerLoopThreshold = 60;
UP.UpperBound = false;
if (UnrollAllowPeeling.getNumOccurrences() > 0)
UP.AllowPeeling = UnrollAllowPeeling;
+ if (UnrollAllowLoopNestsPeeling.getNumOccurrences() > 0)
+ UP.AllowLoopNestsPeeling = UnrollAllowLoopNestsPeeling;
if (UnrollUnrollRemainder.getNumOccurrences() > 0)
UP.UnrollRemainder = UnrollUnrollRemainder;
return true;
}
-/// Recursively clone the specified loop and all of its children,
-/// mapping the blocks with the specified map.
-static Loop *cloneLoop(Loop *L, Loop *PL, ValueToValueMapTy &VM, LoopInfo *LI,
- LPPassManager *LPM) {
- Loop &New = *LI->AllocateLoop();
- if (PL)
- PL->addChildLoop(&New);
- else
- LI->addTopLevelLoop(&New);
- LPM->addLoop(New);
-
- // Add all of the blocks in L to the new loop.
- for (Loop::block_iterator I = L->block_begin(), E = L->block_end();
- I != E; ++I)
- if (LI->getLoopFor(*I) == L)
- New.addBasicBlockToLoop(cast<BasicBlock>(VM[*I]), *LI);
-
- // Add all of the subloops to the new loop.
- for (Loop *I : *L)
- cloneLoop(I, &New, VM, LI, LPM);
-
- return &New;
-}
-
/// Emit a conditional branch on two values if LIC == Val, branch to TrueDst,
/// otherwise branch to FalseDest. Insert the code immediately before OldBranch
/// and remove (but not erase!) it from the function.
if (!canPeel(L))
return;
- // Only try to peel innermost loops.
- if (!L->empty())
+ // Only try to peel innermost loops by default.
+ // The constraint can be relaxed by the target in TTI.getUnrollingPreferences
+ // or by the flag -unroll-allow-loop-nests-peeling.
+ if (!UP.AllowLoopNestsPeeling && !L->empty())
return;
// If the user provided a peel count, use that.
BasicBlock *NewBB = CloneBasicBlock(*BB, VMap, ".peel", F);
NewBlocks.push_back(NewBB);
- if (ParentLoop)
+ // If an original block is an immediate child of the loop L, its copy
+ // is a child of a ParentLoop after peeling. If a block is a child of
+ // a nested loop, it is handled in the cloneLoop() call below.
+ if (ParentLoop && LI->getLoopFor(*BB) == L)
ParentLoop->addBasicBlockToLoop(NewBB, *LI);
VMap[*BB] = NewBB;
}
}
+ // Recursively create the new Loop objects for nested loops, if any,
+ // to preserve LoopInfo.
+ for (Loop *ChildLoop : *L) {
+ cloneLoop(ChildLoop, ParentLoop, VMap, LI, nullptr);
+ }
+
// Hook-up the control flow for the newly inserted blocks.
// The new header is hooked up directly to the "top", which is either
// the original loop preheader (for the first iteration) or the previous
SmallPriorityWorklist<Loop *, 4> &Worklist) {
appendReversedLoopsToWorklist(LI, Worklist);
}
+
+Loop *llvm::cloneLoop(Loop *L, Loop *PL, ValueToValueMapTy &VM,
+ LoopInfo *LI, LPPassManager *LPM) {
+ Loop &New = *LI->AllocateLoop();
+ if (PL)
+ PL->addChildLoop(&New);
+ else
+ LI->addTopLevelLoop(&New);
+
+ if (LPM)
+ LPM->addLoop(New);
+
+ // Add all of the blocks in L to the new loop.
+ for (Loop::block_iterator I = L->block_begin(), E = L->block_end();
+ I != E; ++I)
+ if (LI->getLoopFor(*I) == L)
+ New.addBasicBlockToLoop(cast<BasicBlock>(VM[*I]), *LI);
+
+ // Add all of the subloops to the new loop.
+ for (Loop *I : *L)
+ cloneLoop(I, &New, VM, LI, LPM);
+
+ return &New;
+}
ret void
}
-; In this case we cannot peel the inner loop, because the condition involves
-; the outer induction variable.
-define void @test5(i32 %k) {
-; CHECK-LABEL: @test5(
-; CHECK-NEXT: for.body.lr.ph:
-; CHECK-NEXT: br label [[OUTER_HEADER:%.*]]
-; CHECK: outer.header:
-; CHECK-NEXT: [[J:%.*]] = phi i32 [ 0, [[FOR_BODY_LR_PH:%.*]] ], [ [[J_INC:%.*]], [[OUTER_INC:%.*]] ]
-; CHECK-NEXT: br label [[FOR_BODY:%.*]]
-; CHECK: for.body:
-; CHECK-NEXT: [[I_05:%.*]] = phi i32 [ 0, [[OUTER_HEADER]] ], [ [[INC:%.*]], [[FOR_INC:%.*]] ]
-; CHECK-NEXT: [[CMP1:%.*]] = icmp ult i32 [[J]], 2
-; CHECK-NEXT: br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
-; CHECK: if.then:
-; CHECK-NEXT: call void @f1()
-; CHECK-NEXT: br label [[FOR_INC]]
-; CHECK: if.else:
-; CHECK-NEXT: call void @f2()
-; CHECK-NEXT: br label [[FOR_INC]]
-; CHECK: for.inc:
-; CHECK-NEXT: [[INC]] = add nsw i32 [[I_05]], 1
-; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[INC]], [[K:%.*]]
-; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[OUTER_INC]]
-; CHECK: outer.inc:
-; CHECK-NEXT: [[J_INC]] = add nsw i32 [[J]], 1
-; CHECK-NEXT: [[OUTER_CMP:%.*]] = icmp slt i32 [[J_INC]], [[K]]
-; CHECK-NEXT: br i1 [[OUTER_CMP]], label [[OUTER_HEADER]], label [[FOR_END:%.*]]
-; CHECK: for.end:
-; CHECK-NEXT: ret void
-;
-for.body.lr.ph:
- br label %outer.header
-
-outer.header:
- %j = phi i32 [ 0, %for.body.lr.ph ], [ %j.inc, %outer.inc ]
- br label %for.body
-
-for.body:
- %i.05 = phi i32 [ 0, %outer.header ], [ %inc, %for.inc ]
- %cmp1 = icmp ult i32 %j, 2
- br i1 %cmp1, label %if.then, label %if.else
-
-if.then:
- call void @f1()
- br label %for.inc
-
-if.else:
- call void @f2()
- br label %for.inc
-
-for.inc:
- %inc = add nsw i32 %i.05, 1
- %cmp = icmp slt i32 %inc, %k
- br i1 %cmp, label %for.body, label %outer.inc
-
-outer.inc:
- %j.inc = add nsw i32 %j, 1
- %outer.cmp = icmp slt i32 %j.inc, %k
- br i1 %outer.cmp, label %outer.header, label %for.end
-
-
-for.end:
- ret void
-}
-
; In this test, the condition involves 2 AddRecs. Without evaluating both
; AddRecs, we cannot prove that the condition becomes known in the loop body
; after peeling.
-define void @test6(i32 %k) {
-; CHECK-LABEL: @test6(
+define void @test5(i32 %k) {
+; CHECK-LABEL: @test5(
; CHECK-NEXT: entry:
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
ret void
}
-define void @test7(i32 %k) {
-; CHECK-LABEL: @test7(
+define void @test6(i32 %k) {
+; CHECK-LABEL: @test6(
; CHECK-NEXT: for.body.lr.ph:
; CHECK-NEXT: br label [[FOR_BODY_PEEL_BEGIN:%.*]]
; CHECK: for.body.peel.begin:
ret void
}
-define void @test8(i32 %k) {
-; CHECK-LABEL: @test8(
+define void @test7(i32 %k) {
+; CHECK-LABEL: @test7(
; CHECK-NEXT: for.body.lr.ph:
; CHECK-NEXT: br label [[FOR_BODY_PEEL_BEGIN:%.*]]
; CHECK: for.body.peel.begin:
; Comparison with non-monotonic predicate due to possible wrapping, loop
; body cannot be simplified.
-define void @test9(i32 %k) {
-; CHECK-LABEL: @test9(
+define void @test8(i32 %k) {
+; CHECK-LABEL: @test8(
; CHECK-NEXT: for.body.lr.ph:
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
}
; CHECK-NOT: llvm.loop.unroll.disable
-define void @test_10__peel_first_iter_via_slt_pred(i32 %len) {
-; CHECK-LABEL: @test_10__peel_first_iter_via_slt_pred(
+define void @test_9__peel_first_iter_via_slt_pred(i32 %len) {
+; CHECK-LABEL: @test_9__peel_first_iter_via_slt_pred(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[LEN:%.*]], 0
; CHECK-NEXT: br i1 [[CMP5]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]]
br i1 %exitcond, label %for.cond.cleanup, label %for.body
}
-define void @test_11__peel_first_iter_via_sgt_pred(i32 %len) {
-; CHECK-LABEL: @test_11__peel_first_iter_via_sgt_pred(
+define void @test_10__peel_first_iter_via_sgt_pred(i32 %len) {
+; CHECK-LABEL: @test_10__peel_first_iter_via_sgt_pred(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[LEN:%.*]], 0
; CHECK-NEXT: br i1 [[CMP5]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]]
; NOTE: here we should only peel the first iteration,
; i.e. all calls to sink() must stay in loop.
-define void @test12__peel_first_iter_via_eq_pred(i32 %len) {
-; CHECK-LABEL: @test12__peel_first_iter_via_eq_pred(
+define void @test11__peel_first_iter_via_eq_pred(i32 %len) {
+; CHECK-LABEL: @test11__peel_first_iter_via_eq_pred(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[LEN:%.*]], 0
; CHECK-NEXT: br i1 [[CMP5]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]]
; NOTE: here we should only peel the first iteration,
; i.e. all calls to sink() must stay in loop.
-define void @test13__peel_first_iter_via_ne_pred(i32 %len) {
-; CHECK-LABEL: @test13__peel_first_iter_via_ne_pred(
+define void @test12__peel_first_iter_via_ne_pred(i32 %len) {
+; CHECK-LABEL: @test12__peel_first_iter_via_ne_pred(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[LEN:%.*]], 0
; CHECK-NEXT: br i1 [[CMP5]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]]
}
; No peeling is profitable here.
-define void @test14__ivar_mod2_is_1(i32 %len) {
-; CHECK-LABEL: @test14__ivar_mod2_is_1(
+define void @test13__ivar_mod2_is_1(i32 %len) {
+; CHECK-LABEL: @test13__ivar_mod2_is_1(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[LEN:%.*]], 0
; CHECK-NEXT: br i1 [[CMP5]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]]
}
; No peeling is profitable here.
-define void @test15__ivar_mod2_is_0(i32 %len) {
-; CHECK-LABEL: @test15__ivar_mod2_is_0(
+define void @test14__ivar_mod2_is_0(i32 %len) {
+; CHECK-LABEL: @test14__ivar_mod2_is_0(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[LEN:%.*]], 0
; CHECK-NEXT: br i1 [[CMP5]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]]
br i1 %exitcond, label %for.cond.cleanup, label %for.body
}
-; Similar to @test7, we need to peel one extra iteration, and we can't do that
+; Similar to @test6, we need to peel one extra iteration, and we can't do that
; as per the -unroll-peel-max-count=4, so this shouldn't be peeled at all.
-define void @test16(i32 %k) {
-; CHECK-LABEL: @test16(
+define void @test15(i32 %k) {
+; CHECK-LABEL: @test15(
; CHECK-NEXT: for.body.lr.ph:
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
ret void
}
-; Similar to @test8, we need to peel one extra iteration, and we can't do that
+; Similar to @test7, we need to peel one extra iteration, and we can't do that
; as per the -unroll-peel-max-count=4, so this shouldn't be peeled at all.
-define void @test17(i32 %k) {
-; CHECK-LABEL: @test17(
+define void @test16(i32 %k) {
+; CHECK-LABEL: @test16(
; CHECK-NEXT: for.body.lr.ph:
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
--- /dev/null
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -S -loop-unroll -unroll-peel-max-count=4 -verify-dom-info | FileCheck %s
+; RUN: opt < %s -S -loop-unroll -unroll-peel-max-count=4 -unroll-allow-loop-nests-peeling -verify-dom-info | FileCheck %s --check-prefix PEELED
+
+declare void @f1()
+declare void @f2()
+
+; In this case we cannot peel the inner loop, because the condition involves
+; the outer induction variable.
+; Peel the loop nest if allowed by the flag -unroll-allow-loop-nests-peeling.
+define void @test1(i32 %k) {
+; CHECK-LABEL: @test1(
+; CHECK-NEXT: for.body.lr.ph:
+; CHECK-NEXT: br label [[OUTER_HEADER:%.*]]
+; CHECK: outer.header:
+; CHECK-NEXT: [[J:%.*]] = phi i32 [ 0, [[FOR_BODY_LR_PH:%.*]] ], [ [[J_INC:%.*]], [[OUTER_INC:%.*]] ]
+; CHECK-NEXT: br label [[FOR_BODY:%.*]]
+; CHECK: for.body:
+; CHECK-NEXT: [[I_05:%.*]] = phi i32 [ 0, [[OUTER_HEADER]] ], [ [[INC:%.*]], [[FOR_INC:%.*]] ]
+; CHECK-NEXT: [[CMP1:%.*]] = icmp ult i32 [[J]], 2
+; CHECK-NEXT: br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
+; CHECK: if.then:
+; CHECK-NEXT: call void @f1()
+; CHECK-NEXT: br label [[FOR_INC]]
+; CHECK: if.else:
+; CHECK-NEXT: call void @f2()
+; CHECK-NEXT: br label [[FOR_INC]]
+; CHECK: for.inc:
+; CHECK-NEXT: [[INC]] = add nsw i32 [[I_05]], 1
+; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[INC]], [[K:%.*]]
+; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[OUTER_INC]]
+; CHECK: outer.inc:
+; CHECK-NEXT: [[J_INC]] = add nsw i32 [[J]], 1
+; CHECK-NEXT: [[OUTER_CMP:%.*]] = icmp slt i32 [[J_INC]], [[K]]
+; CHECK-NEXT: br i1 [[OUTER_CMP]], label [[OUTER_HEADER]], label [[FOR_END:%.*]], !llvm.loop !{{.*}}
+; CHECK: for.end:
+; CHECK-NEXT: ret void
+;
+; PEELED-LABEL: @test1(
+; PEELED-NEXT: for.body.lr.ph:
+; PEELED-NEXT: br label [[OUTER_HEADER_PEEL_BEGIN:%.*]]
+; PEELED: outer.header.peel.begin:
+; PEELED-NEXT: br label [[OUTER_HEADER_PEEL:%.*]]
+; PEELED: outer.header.peel:
+; PEELED-NEXT: br label [[FOR_BODY_PEEL:%.*]]
+; PEELED: for.body.peel:
+; PEELED-NEXT: [[I_05_PEEL:%.*]] = phi i32 [ 0, [[OUTER_HEADER_PEEL]] ], [ [[INC_PEEL:%.*]], [[FOR_INC_PEEL:%.*]] ]
+; PEELED-NEXT: [[CMP1_PEEL:%.*]] = icmp ult i32 0, 2
+; PEELED-NEXT: br i1 [[CMP1_PEEL]], label [[IF_THEN_PEEL:%.*]], label [[IF_ELSE_PEEL:%.*]]
+; PEELED: if.else.peel:
+; PEELED-NEXT: call void @f2()
+; PEELED-NEXT: br label [[FOR_INC_PEEL]]
+; PEELED: if.then.peel:
+; PEELED-NEXT: call void @f1()
+; PEELED-NEXT: br label [[FOR_INC_PEEL]]
+; PEELED: for.inc.peel:
+; PEELED-NEXT: [[INC_PEEL]] = add nsw i32 [[I_05_PEEL]], 1
+; PEELED-NEXT: [[CMP_PEEL:%.*]] = icmp slt i32 [[INC_PEEL]], [[K:%.*]]
+; PEELED-NEXT: br i1 [[CMP_PEEL]], label [[FOR_BODY_PEEL]], label [[OUTER_INC_PEEL:%.*]]
+; PEELED: outer.inc.peel:
+; PEELED-NEXT: [[J_INC_PEEL:%.*]] = add nsw i32 0, 1
+; PEELED-NEXT: [[OUTER_CMP_PEEL:%.*]] = icmp slt i32 [[J_INC_PEEL]], [[K]]
+; PEELED-NEXT: br i1 [[OUTER_CMP_PEEL]], label [[OUTER_HEADER_PEEL_NEXT:%.*]], label [[FOR_END:%[^,]*]]
+; Verify that MD_loop metadata is dropped.
+; PEELED-NOT: , !llvm.loop !{{[0-9]*}}
+; PEELED: outer.header.peel.next:
+; PEELED-NEXT: br label [[OUTER_HEADER_PEEL2:%.*]]
+; PEELED: outer.header.peel2:
+; PEELED-NEXT: br label [[FOR_BODY_PEEL3:%.*]]
+; PEELED: for.body.peel3:
+; PEELED-NEXT: [[I_05_PEEL4:%.*]] = phi i32 [ 0, [[OUTER_HEADER_PEEL2]] ], [ [[INC_PEEL9:%.*]], [[FOR_INC_PEEL8:%.*]] ]
+; PEELED-NEXT: [[CMP1_PEEL5:%.*]] = icmp ult i32 [[J_INC_PEEL]], 2
+; PEELED-NEXT: br i1 [[CMP1_PEEL5]], label [[IF_THEN_PEEL7:%.*]], label [[IF_ELSE_PEEL6:%.*]]
+; PEELED: if.else.peel6:
+; PEELED-NEXT: call void @f2()
+; PEELED-NEXT: br label [[FOR_INC_PEEL8]]
+; PEELED: if.then.peel7:
+; PEELED-NEXT: call void @f1()
+; PEELED-NEXT: br label [[FOR_INC_PEEL8]]
+; PEELED: for.inc.peel8:
+; PEELED-NEXT: [[INC_PEEL9]] = add nsw i32 [[I_05_PEEL4]], 1
+; PEELED-NEXT: [[CMP_PEEL10:%.*]] = icmp slt i32 [[INC_PEEL9]], [[K]]
+; PEELED-NEXT: br i1 [[CMP_PEEL10]], label [[FOR_BODY_PEEL3]], label [[OUTER_INC_PEEL11:%.*]]
+; PEELED: outer.inc.peel11:
+; PEELED-NEXT: [[J_INC_PEEL12:%.*]] = add nsw i32 [[J_INC_PEEL]], 1
+; PEELED-NEXT: [[OUTER_CMP_PEEL13:%.*]] = icmp slt i32 [[J_INC_PEEL12]], [[K]]
+; PEELED-NEXT: br i1 [[OUTER_CMP_PEEL13]], label [[OUTER_HEADER_PEEL_NEXT1:%.*]], label [[FOR_END]]
+; Verify that MD_loop metadata is dropped.
+; PEELED-NOT: , !llvm.loop !{{[0-9]*}}
+; PEELED: outer.header.peel.next1:
+; PEELED-NEXT: br label [[OUTER_HEADER_PEEL_NEXT14:%.*]]
+; PEELED: outer.header.peel.next14:
+; PEELED-NEXT: br label [[FOR_BODY_LR_PH_PEEL_NEWPH:%.*]]
+; PEELED: for.body.lr.ph.peel.newph:
+; PEELED-NEXT: br label [[OUTER_HEADER:%.*]]
+; PEELED: outer.header:
+; PEELED-NEXT: [[J:%.*]] = phi i32 [ [[J_INC_PEEL12]], [[FOR_BODY_LR_PH_PEEL_NEWPH]] ], [ [[J_INC:%.*]], [[OUTER_INC:%.*]] ]
+; PEELED-NEXT: br label [[FOR_BODY:%.*]]
+; PEELED: for.body:
+; PEELED-NEXT: [[I_05:%.*]] = phi i32 [ 0, [[OUTER_HEADER]] ], [ [[INC:%.*]], [[FOR_INC:%.*]] ]
+; PEELED-NEXT: br i1 false, label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
+; PEELED: if.then:
+; PEELED-NEXT: call void @f1()
+; PEELED-NEXT: br label [[FOR_INC]]
+; PEELED: if.else:
+; PEELED-NEXT: call void @f2()
+; PEELED-NEXT: br label [[FOR_INC]]
+; PEELED: for.inc:
+; PEELED-NEXT: [[INC]] = add nsw i32 [[I_05]], 1
+; PEELED-NEXT: [[CMP:%.*]] = icmp slt i32 [[INC]], [[K]]
+; PEELED-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[OUTER_INC]]
+; PEELED: outer.inc:
+; PEELED-NEXT: [[J_INC]] = add nuw nsw i32 [[J]], 1
+; PEELED-NEXT: [[OUTER_CMP:%.*]] = icmp slt i32 [[J_INC]], [[K]]
+; PEELED-NEXT: br i1 [[OUTER_CMP]], label [[OUTER_HEADER]], label [[FOR_END_LOOPEXIT:%.*]], !llvm.loop !{{.*}}
+; PEELED: for.end.loopexit:
+; PEELED-NEXT: br label [[FOR_END]]
+; PEELED: for.end:
+; PEELED-NEXT: ret void
+;
+for.body.lr.ph:
+ br label %outer.header
+
+outer.header:
+ %j = phi i32 [ 0, %for.body.lr.ph ], [ %j.inc, %outer.inc ]
+ br label %for.body
+
+for.body:
+ %i.05 = phi i32 [ 0, %outer.header ], [ %inc, %for.inc ]
+ %cmp1 = icmp ult i32 %j, 2
+ br i1 %cmp1, label %if.then, label %if.else
+
+if.then:
+ call void @f1()
+ br label %for.inc
+
+if.else:
+ call void @f2()
+ br label %for.inc
+
+for.inc:
+ %inc = add nsw i32 %i.05, 1
+ %cmp = icmp slt i32 %inc, %k
+ br i1 %cmp, label %for.body, label %outer.inc
+
+outer.inc:
+ %j.inc = add nsw i32 %j, 1
+ %outer.cmp = icmp slt i32 %j.inc, %k
+ br i1 %outer.cmp, label %outer.header, label %for.end, !llvm.loop !0
+
+for.end:
+ ret void
+}
+
+!0 = distinct !{!0}