std::pair<bool, Optional<unsigned>>
haveIdenticalTripCounts(const FusionCandidate &FC0,
const FusionCandidate &FC1) const {
-
const SCEV *TripCount0 = SE.getBackedgeTakenCount(FC0.L);
if (isa<SCEVCouldNotCompute>(TripCount0)) {
UncomputableTripCount++;
return Fused;
}
+ // Returns true if the instruction \p I can be hoisted to the end of the
+ // preheader of \p FC0. \p SafeToHoist contains the instructions that are
+ // known to be safe to hoist. The instructions encountered that cannot be
+ // hoisted are in \p NotHoisting.
+ // TODO: Move functionality into CodeMoverUtils
+ bool canHoistInst(Instruction &I,
+ const SmallVector<Instruction *, 4> &SafeToHoist,
+ const SmallVector<Instruction *, 4> &NotHoisting,
+ const FusionCandidate &FC0) const {
+ const BasicBlock *FC0PreheaderTarget = FC0.Preheader->getSingleSuccessor();
+ assert(FC0PreheaderTarget &&
+ "Expected single successor for loop preheader.");
+
+ for (Use &Op : I.operands()) {
+ if (auto *OpInst = dyn_cast<Instruction>(Op)) {
+ bool OpHoisted = is_contained(SafeToHoist, OpInst);
+ // Check if we have already decided to hoist this operand. In this
+ // case, it does not dominate FC0 *yet*, but will after we hoist it.
+ if (!(OpHoisted || DT.dominates(OpInst, FC0PreheaderTarget))) {
+ return false;
+ }
+ }
+ }
+
+ // If this isn't a memory inst, hoisting is safe
+ if (!I.mayReadOrWriteMemory())
+ return true;
+
+ LLVM_DEBUG(dbgs() << "Checking if this mem inst can be hoisted.\n");
+ for (Instruction *NotHoistedInst : NotHoisting) {
+ if (auto D = DI.depends(&I, NotHoistedInst, true)) {
+ // Dependency is not read-before-write, write-before-read or
+ // write-before-write
+ if (D->isFlow() || D->isAnti() || D->isOutput()) {
+ LLVM_DEBUG(dbgs() << "Inst depends on an instruction in FC1's "
+ "preheader that is not being hoisted.\n");
+ return false;
+ }
+ }
+ }
+
+ for (Instruction *ReadInst : FC0.MemReads) {
+ if (auto D = DI.depends(ReadInst, &I, true)) {
+ // Dependency is not read-before-write
+ if (D->isAnti()) {
+ LLVM_DEBUG(dbgs() << "Inst depends on a read instruction in FC0.\n");
+ return false;
+ }
+ }
+ }
+
+ for (Instruction *WriteInst : FC0.MemWrites) {
+ if (auto D = DI.depends(WriteInst, &I, true)) {
+ // Dependency is not write-before-read or write-before-write
+ if (D->isFlow() || D->isOutput()) {
+ LLVM_DEBUG(dbgs() << "Inst depends on a write instruction in FC0.\n");
+ return false;
+ }
+ }
+ }
+ return true;
+ }
+
+ // Returns true if the instruction \p I can be sunk to the top of the exit
+ // block of \p FC1.
+ // TODO: Move functionality into CodeMoverUtils
+ bool canSinkInst(Instruction &I, const FusionCandidate &FC1) const {
+ for (User *U : I.users()) {
+ if (auto *UI{dyn_cast<Instruction>(U)}) {
+ // Cannot sink if user in loop
+ // If FC1 has phi users of this value, we cannot sink it into FC1.
+ if (FC1.L->contains(UI)) {
+ // Cannot hoist or sink this instruction. No hoisting/sinking
+ // should take place, loops should not fuse
+ return false;
+ }
+ }
+ }
+
+ // If this isn't a memory inst, sinking is safe
+ if (!I.mayReadOrWriteMemory())
+ return true;
+
+ for (Instruction *ReadInst : FC1.MemReads) {
+ if (auto D = DI.depends(&I, ReadInst, true)) {
+ // Dependency is not write-before-read
+ if (D->isFlow()) {
+ LLVM_DEBUG(dbgs() << "Inst depends on a read instruction in FC1.\n");
+ return false;
+ }
+ }
+ }
+
+ for (Instruction *WriteInst : FC1.MemWrites) {
+ if (auto D = DI.depends(&I, WriteInst, true)) {
+ // Dependency is not write-before-write or read-before-write
+ if (D->isOutput() || D->isAnti()) {
+ LLVM_DEBUG(dbgs() << "Inst depends on a write instruction in FC1.\n");
+ return false;
+ }
+ }
+ }
+
+ return true;
+ }
+
/// Collect instructions in the \p FC1 Preheader that can be hoisted
/// to the \p FC0 Preheader or sunk into the \p FC1 Body
bool collectMovablePreheaderInsts(
SmallVector<Instruction *, 4> &SafeToHoist,
SmallVector<Instruction *, 4> &SafeToSink) const {
BasicBlock *FC1Preheader = FC1.Preheader;
+ // Save the instructions that are not being hoisted, so we know not to hoist
+ // mem insts that they dominate.
+ SmallVector<Instruction *, 4> NotHoisting;
+
for (Instruction &I : *FC1Preheader) {
// Can't move a branch
if (&I == FC1Preheader->getTerminator())
// TODO: The case of mayReadFromMemory we can handle but requires
// additional work with a dependence analysis so for now we give
// up on memory reads.
- if (I.mayHaveSideEffects() || I.mayReadFromMemory()) {
- LLVM_DEBUG(dbgs() << "Inst: " << I << " may have side-effects.\n");
+ if (I.mayThrow() || !I.willReturn()) {
+ LLVM_DEBUG(dbgs() << "Inst: " << I << " may throw or won't return.\n");
return false;
}
LLVM_DEBUG(dbgs() << "Checking Inst: " << I << "\n");
- // First check if can be hoisted
- // If the operands of this instruction dominate the FC0 Preheader
- // target block, then it is safe to move them to the end of the FC0
- const BasicBlock *FC0PreheaderTarget =
- FC0.Preheader->getSingleSuccessor();
- assert(FC0PreheaderTarget &&
- "Expected single successor for loop preheader.");
- bool CanHoistInst = true;
- for (Use &Op : I.operands()) {
- if (auto *OpInst = dyn_cast<Instruction>(Op)) {
- bool OpHoisted = is_contained(SafeToHoist, OpInst);
- // Check if we have already decided to hoist this operand. In this
- // case, it does not dominate FC0 *yet*, but will after we hoist it.
- if (!(OpHoisted || DT.dominates(OpInst, FC0PreheaderTarget))) {
- CanHoistInst = false;
- break;
- }
- }
+ if (I.isAtomic() || I.isVolatile()) {
+ LLVM_DEBUG(
+ dbgs() << "\tInstruction is volatile or atomic. Cannot move it.\n");
+ return false;
}
- if (CanHoistInst) {
+
+ if (canHoistInst(I, SafeToHoist, NotHoisting, FC0)) {
SafeToHoist.push_back(&I);
LLVM_DEBUG(dbgs() << "\tSafe to hoist.\n");
} else {
LLVM_DEBUG(dbgs() << "\tCould not hoist. Trying to sink...\n");
+ NotHoisting.push_back(&I);
- for (User *U : I.users()) {
- if (auto *UI{dyn_cast<Instruction>(U)}) {
- // Cannot sink if user in loop
- // If FC1 has phi users of this value, we cannot sink it into FC1.
- if (FC1.L->contains(UI)) {
- // Cannot hoist or sink this instruction. No hoisting/sinking
- // should take place, loops should not fuse
- LLVM_DEBUG(dbgs() << "\tCould not sink.\n");
- return false;
- }
- }
+ if (canSinkInst(I, FC1)) {
+ SafeToSink.push_back(&I);
+ LLVM_DEBUG(dbgs() << "\tSafe to sink.\n");
+ } else {
+ LLVM_DEBUG(dbgs() << "\tCould not sink.\n");
+ return false;
}
- SafeToSink.push_back(&I);
- LLVM_DEBUG(dbgs() << "\tSafe to sink.\n");
}
}
LLVM_DEBUG(
const FusionCandidate &FC1,
SmallVector<Instruction *, 4> &HoistInsts,
SmallVector<Instruction *, 4> &SinkInsts) const {
-
// All preheader instructions except the branch must be hoisted or sunk
assert(HoistInsts.size() + SinkInsts.size() == FC1.Preheader->size() - 1 &&
"Attempting to sink and hoist preheader instructions, but not all "
--- /dev/null
+; RUN: opt -S -loop-simplify -loop-fusion -debug-only=loop-fusion < %s 2>&1 | FileCheck %s
+; REQUIRES: asserts
+; CHECK: Safe to hoist.
+
+@A = common global [100 x i32] zeroinitializer, align 16
+define void @hoist_preheader(i32 %N) {
+; CHECK-LABEL: @hoist_preheader(
+; CHECK-NEXT: pre1:
+; CHECK-NEXT: [[PTR:%.*]] = alloca i32, align 4
+; CHECK-NEXT: [[B:%.*]] = load i32, i32* [[PTR]], align 4
+; CHECK-NEXT: br label [[BODY1:%.*]]
+; CHECK: body1:
+; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[BODY1]] ], [ 0, [[PRE1:%.*]] ]
+; CHECK-NEXT: [[I2:%.*]] = phi i32 [ [[I_NEXT2:%.*]], [[BODY1]] ], [ 0, [[PRE1]] ]
+; CHECK-NEXT: [[I_NEXT]] = add i32 1, [[I]]
+; CHECK-NEXT: [[COND:%.*]] = icmp ne i32 [[I]], [[N:%.*]]
+; CHECK-NEXT: [[I_NEXT2]] = add i32 1, [[I2]]
+; CHECK-NEXT: [[COND2:%.*]] = icmp ne i32 [[I2]], [[N]]
+; CHECK-NEXT: br i1 [[COND2]], label [[BODY1]], label [[EXIT:%.*]]
+; CHECK: exit:
+; CHECK-NEXT: ret void
+;
+pre1:
+ %ptr = alloca i32
+ br label %body1
+
+body1: ; preds = %pre1, %body1
+ %i = phi i32 [%i_next, %body1], [0, %pre1]
+ %i_next = add i32 1, %i
+ %cond = icmp ne i32 %i, %N
+ br i1 %cond, label %body1, label %pre2
+
+pre2:
+ %b = load i32, i32 * %ptr
+ br label %body2
+
+body2: ; preds = %pre2, %body2
+ %i2 = phi i32 [%i_next2, %body2], [0, %pre2]
+ %i_next2 = add i32 1, %i2
+ %cond2 = icmp ne i32 %i2, %N
+ br i1 %cond2, label %body2, label %exit
+
+exit:
+ ret void
+}
--- /dev/null
+; RUN: opt -S -loop-simplify -loop-fusion -debug-only=loop-fusion < %s 2>&1 | FileCheck %s
+; REQUIRES: asserts
+; CHECK: Safe to hoist.
+
+@A = common global [100 x i32] zeroinitializer, align 16
+define void @hoist_preheader(i32 %N) {
+; CHECK-LABEL: @hoist_preheader(
+; CHECK-NEXT: pre1:
+; CHECK-NEXT: [[PTR:%.*]] = alloca i32, align 4
+; CHECK-NEXT: store i32 3, i32* [[PTR]], align 4
+; CHECK-NEXT: br label [[BODY1:%.*]]
+; CHECK: body1:
+; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[BODY1]] ], [ 0, [[PRE1:%.*]] ]
+; CHECK-NEXT: [[I2:%.*]] = phi i32 [ [[I_NEXT2:%.*]], [[BODY1]] ], [ 0, [[PRE1]] ]
+; CHECK-NEXT: [[I_NEXT]] = add i32 1, [[I]]
+; CHECK-NEXT: [[COND:%.*]] = icmp ne i32 [[I]], [[N:%.*]]
+; CHECK-NEXT: [[I_NEXT2]] = add i32 1, [[I2]]
+; CHECK-NEXT: [[COND2:%.*]] = icmp ne i32 [[I2]], [[N]]
+; CHECK-NEXT: br i1 [[COND2]], label [[BODY1]], label [[EXIT:%.*]]
+; CHECK: exit:
+; CHECK-NEXT: ret void
+;
+pre1:
+ %ptr = alloca i32
+ br label %body1
+
+body1: ; preds = %pre1, %body1
+ %i = phi i32 [%i_next, %body1], [0, %pre1]
+ %i_next = add i32 1, %i
+ %cond = icmp ne i32 %i, %N
+ br i1 %cond, label %body1, label %pre2
+
+pre2:
+ store i32 3, i32* %ptr
+ br label %body2
+
+body2: ; preds = %pre2, %body2
+ %i2 = phi i32 [%i_next2, %body2], [0, %pre2]
+ %i_next2 = add i32 1, %i2
+ %cond2 = icmp ne i32 %i2, %N
+ br i1 %cond2, label %body2, label %exit
+
+exit:
+ ret void
+}
--- /dev/null
+; RUN: opt -S -loop-simplify -loop-fusion -debug-only=loop-fusion < %s 2>&1 | FileCheck %s
+; REQUIRES: asserts
+; CHECK: Could not hoist/sink all instructions
+
+define void @sink_preheader(i32 %N) {
+; CHECK:pre1:
+; CHECK-NEXT: %ptr = alloca i32
+; CHECK-NEXT: br label %body1
+pre1:
+ %ptr = alloca i32
+ br label %body1
+
+; CHECK:body1:
+; CHECK-NOT: store atomic i32 3, i32* %ptr seq_cst, align 4
+body1: ; preds = %pre1, %body1
+ %i = phi i32 [%i_next, %body1], [0, %pre1]
+ %i_next = add i32 1, %i
+ %cond = icmp ne i32 %i, %N
+ br i1 %cond, label %body1, label %pre2
+
+; CHECK:pre2:
+; CHECK-NEXT: store atomic i32 3, i32* %ptr seq_cst, align 4
+pre2:
+ store atomic i32 3, i32* %ptr seq_cst, align 4
+ br label %body2
+
+; CHECK: body2:
+; CHECK-NOT: store atomic i32 3, i32* %ptr seq_cst, align 4
+body2: ; preds = %pre2, %body2
+ %i2 = phi i32 [%i_next2, %body2], [0, %pre2]
+ %i_next2 = add i32 1, %i2
+ %cond2 = icmp ne i32 %i2, %N
+ br i1 %cond2, label %body2, label %exit
+
+; CHECK: exit:
+exit:
+ ret void
+}
--- /dev/null
+; RUN: opt -S -loop-simplify -loop-fusion -debug-only=loop-fusion < %s 2>&1 | FileCheck %s
+; REQUIRES: asserts
+; CHECK: Could not hoist/sink all instructions
+
+@A = common global [100 x i32] zeroinitializer, align 16
+define void @sink_preheader(i32 %N) {
+; CHECK:pre1:
+; CHECK-NEXT: %ptr = alloca i32, align 4
+; CHECK-NEXT: br label %body1
+pre1:
+ %ptr = alloca i32, align 4
+ br label %body1
+
+; CHECK:body1:
+; CHECK-NOT: %stay =
+body1: ; preds = %pre1, %body1
+ %i = phi i32 [%i_next, %body1], [0, %pre1]
+ %i_next = add i32 1, %i
+ %cond = icmp ne i32 %i, %N
+ store i32 3, i32* %ptr
+ br i1 %cond, label %body1, label %pre2
+
+; CHECK:pre2:
+; CHECK-NEXT: %stay = load i32, i32* %ptr
+pre2:
+ %stay = load i32, i32* %ptr
+ br label %body2
+
+; CHECK: body2:
+; CHECK-NOT: %stay =
+body2: ; preds = %pre2, %body2
+ %i2 = phi i32 [%i_next2, %body2], [0, %pre2]
+ %i_next2 = add i32 1, %i2
+ %cond2 = icmp ne i32 %i2, %N
+ store i32 3, i32* %ptr
+ br i1 %cond2, label %body2, label %exit
+
+; CHECK: exit:
+; CHECK-NOT: %stay =
+exit:
+ ret void
+}
; RUN: opt -S -loop-simplify -loop-fusion -debug-only=loop-fusion < %s 2>&1 | FileCheck %s
; REQUIRES: asserts
-; CHECK: have side-effects.
; CHECK: Could not hoist/sink all instructions
@A = common global [100 x i32] zeroinitializer, align 16
; CHECK-NEXT: %ptr = alloca i32, align 4
; CHECK-NEXT: br label %body1
pre1:
- %ptr = alloca i32
+ %ptr = alloca i32, align 4
br label %body1
; CHECK:body1:
-; CHECK-NOT: %stay =
+; CHECK-NOT: store i32 3, i32* %ptr
body1: ; preds = %pre1, %body1
%i = phi i32 [%i_next, %body1], [0, %pre1]
%i_next = add i32 1, %i
%cond = icmp ne i32 %i, %N
+ %load1 = load i32, i32* %ptr
br i1 %cond, label %body1, label %pre2
; CHECK:pre2:
br label %body2
; CHECK: body2:
-; CHECK-NOT: %stay =
+; CHECK-NOT: store i32 3, i32* %ptr
body2: ; preds = %pre2, %body2
%i2 = phi i32 [%i_next2, %body2], [0, %pre2]
%i_next2 = add i32 1, %i2
%cond2 = icmp ne i32 %i2, %N
+ %load2 = load i32, i32* %ptr
br i1 %cond2, label %body2, label %exit
; CHECK: exit:
-; CHECK-NOT: %stay =
+; CHECK-NOT: store i32 3, i32* %ptr
exit:
ret void
}
; RUN: opt -S -loop-simplify -loop-fusion -debug-only=loop-fusion < %s 2>&1 | FileCheck %s
; REQUIRES: asserts
-; CHECK: may have side-effects
; CHECK: Could not hoist/sink all instructions
declare void @unknown_func()
br label %body1
; CHECK:body1:
-; CHECK-NOT: %stay =
+; CHECK-NOT: call void @unknown_func()
body1: ; preds = %pre1, %body1
%i = phi i32 [%i_next, %body1], [0, %pre1]
%i_next = add i32 1, %i
br label %body2
; CHECK: body2:
-; CHECK-NOT: %stay =
+; CHECK-NOT: call void @unknown_func()
body2: ; preds = %pre2, %body2
%i2 = phi i32 [%i_next2, %body2], [0, %pre2]
%i_next2 = add i32 1, %i2
br i1 %cond2, label %body2, label %exit
; CHECK: exit:
-; CHECK-NOT: %stay =
exit:
ret void
}
--- /dev/null
+; RUN: opt -S -loop-simplify -loop-fusion -debug-only=loop-fusion < %s 2>&1 | FileCheck %s
+; REQUIRES: asserts
+; CHECK: Could not hoist/sink all instructions
+
+declare void @unknown_func()
+
+define void @sink_preheader(i32 %N) {
+; CHECK:pre1:
+; CHECK-NEXT: %ptr = alloca i32
+; CHECK-NEXT: br label %body1
+pre1:
+ %ptr = alloca i32
+ br label %body1
+
+; CHECK:body1:
+; CHECK-NOT: store volatile i32 3, i32* %ptr
+body1: ; preds = %pre1, %body1
+ %i = phi i32 [%i_next, %body1], [0, %pre1]
+ %i_next = add i32 1, %i
+ %cond = icmp ne i32 %i, %N
+ br i1 %cond, label %body1, label %pre2
+
+; CHECK:pre2:
+; CHECK-NEXT: store volatile i32 3, i32* %ptr
+pre2:
+ store volatile i32 3, i32* %ptr
+ br label %body2
+
+; CHECK: body2:
+; CHECK-NOT: store volatile i32 3, i32* %ptr
+body2: ; preds = %pre2, %body2
+ %i2 = phi i32 [%i_next2, %body2], [0, %pre2]
+ %i_next2 = add i32 1, %i2
+ %cond2 = icmp ne i32 %i2, %N
+ br i1 %cond2, label %body2, label %exit
+
+; CHECK: exit:
+exit:
+ ret void
+}
--- /dev/null
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S -loop-simplify -loop-fusion -debug-only=loop-fusion < %s 2>&1 | FileCheck %s
+; REQUIRES: asserts
+; CHECK: Safe to sink.
+
+@A = common global [100 x i32] zeroinitializer, align 16
+define void @sink_preheader(i32 %N) {
+; CHECK-LABEL: @sink_preheader(
+; CHECK-NEXT: pre1:
+; CHECK-NEXT: [[PTR:%.*]] = alloca i32, align 4
+; CHECK-NEXT: br label [[BODY1:%.*]]
+; CHECK: body1:
+; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[BODY1]] ], [ 0, [[PRE1:%.*]] ]
+; CHECK-NEXT: [[I2:%.*]] = phi i32 [ [[I_NEXT2:%.*]], [[BODY1]] ], [ 0, [[PRE1]] ]
+; CHECK-NEXT: [[I_NEXT]] = add i32 1, [[I]]
+; CHECK-NEXT: [[COND:%.*]] = icmp ne i32 [[I]], [[N:%.*]]
+; CHECK-NEXT: store i32 3, i32* [[PTR]], align 4
+; CHECK-NEXT: [[I_NEXT2]] = add i32 1, [[I2]]
+; CHECK-NEXT: [[COND2:%.*]] = icmp ne i32 [[I2]], [[N]]
+; CHECK-NEXT: br i1 [[COND2]], label [[BODY1]], label [[EXIT:%.*]]
+; CHECK: exit:
+; CHECK-NEXT: [[B:%.*]] = load i32, i32* [[PTR]], align 4
+; CHECK-NEXT: ret void
+;
+pre1:
+ %ptr = alloca i32
+ br label %body1
+
+body1: ; preds = %pre1, %body1
+ %i = phi i32 [%i_next, %body1], [0, %pre1]
+ %i_next = add i32 1, %i
+ %cond = icmp ne i32 %i, %N
+ store i32 3, i32* %ptr
+ br i1 %cond, label %body1, label %pre2
+
+pre2:
+ %b = load i32, i32 * %ptr
+ br label %body2
+
+body2: ; preds = %pre2, %body2
+ %i2 = phi i32 [%i_next2, %body2], [0, %pre2]
+ %i_next2 = add i32 1, %i2
+ %cond2 = icmp ne i32 %i2, %N
+ br i1 %cond2, label %body2, label %exit
+
+exit:
+ ret void
+}
--- /dev/null
+; RUN: opt -S -loop-simplify -loop-fusion -debug-only=loop-fusion < %s 2>&1 | FileCheck %s
+; REQUIRES: asserts
+
+@A = common global [100 x i32] zeroinitializer, align 16
+define void @sink_preheader(i32 %N) {
+; CHECK-LABEL: @sink_preheader(
+; CHECK-NEXT: pre1:
+; CHECK-NEXT: [[PTR:%.*]] = alloca i32, align 4
+; CHECK-NEXT: br label [[BODY1:%.*]]
+; CHECK: body1:
+; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[BODY1]] ], [ 0, [[PRE1:%.*]] ]
+; CHECK-NEXT: [[I2:%.*]] = phi i32 [ [[I_NEXT2:%.*]], [[BODY1]] ], [ 0, [[PRE1]] ]
+; CHECK-NEXT: [[I_NEXT]] = add i32 1, [[I]]
+; CHECK-NEXT: [[COND:%.*]] = icmp ne i32 [[I]], [[N:%.*]]
+; CHECK-NEXT: [[B:%.*]] = load i32, i32* [[PTR]], align 4
+; CHECK-NEXT: [[I_NEXT2]] = add i32 1, [[I2]]
+; CHECK-NEXT: [[COND2:%.*]] = icmp ne i32 [[I2]], [[N]]
+; CHECK-NEXT: br i1 [[COND2]], label [[BODY1]], label [[EXIT:%.*]]
+; CHECK: exit:
+; CHECK-NEXT: store i32 3, i32* [[PTR]], align 4
+; CHECK-NEXT: ret void
+;
+pre1:
+ %ptr = alloca i32
+ br label %body1
+
+body1: ; preds = %pre1, %body1
+ %i = phi i32 [%i_next, %body1], [0, %pre1]
+ %i_next = add i32 1, %i
+ %cond = icmp ne i32 %i, %N
+ %b = load i32, i32 * %ptr
+ br i1 %cond, label %body1, label %pre2
+
+pre2:
+ store i32 3, i32* %ptr
+ br label %body2
+
+body2: ; preds = %pre2, %body2
+ %i2 = phi i32 [%i_next2, %body2], [0, %pre2]
+ %i_next2 = add i32 1, %i2
+ %cond2 = icmp ne i32 %i2, %N
+ br i1 %cond2, label %body2, label %exit
+
+exit:
+ ret void
+}