return LoopUnrollResult::Unmodified;
}
- // Find trip count and trip multiple if count is not available
+ // Find the smallest exact trip count for any exit. This is an upper bound
+ // on the loop trip count, but an exit at an earlier iteration is still
+ // possible. An unroll by the smallest exact trip count guarantees that all
+ // brnaches relating to at least one exit can be eliminated. This is unlike
+ // the max trip count, which only guarantees that the backedge can be broken.
unsigned TripCount = 0;
unsigned TripMultiple = 1;
- // If there are multiple exiting blocks but one of them is the latch, use the
- // latch for the trip count estimation. Otherwise insist on a single exiting
- // block for the trip count estimation.
- BasicBlock *ExitingBlock = L->getLoopLatch();
- if (!ExitingBlock || !L->isLoopExiting(ExitingBlock))
- ExitingBlock = L->getExitingBlock();
- if (ExitingBlock) {
- TripCount = SE.getSmallConstantTripCount(L, ExitingBlock);
- TripMultiple = SE.getSmallConstantTripMultiple(L, ExitingBlock);
+ SmallVector<BasicBlock *, 8> ExitingBlocks;
+ L->getExitingBlocks(ExitingBlocks);
+ for (BasicBlock *ExitingBlock : ExitingBlocks)
+ if (unsigned TC = SE.getSmallConstantTripCount(L, ExitingBlock))
+ if (!TripCount || TC < TripCount)
+ TripCount = TripMultiple = TC;
+
+ if (!TripCount) {
+ // If no exact trip count is known, determine the trip multiple of either
+ // the loop latch or the single exiting block.
+ // TODO: Relax for multiple exits.
+ BasicBlock *ExitingBlock = L->getLoopLatch();
+ if (!ExitingBlock || !L->isLoopExiting(ExitingBlock))
+ ExitingBlock = L->getExitingBlock();
+ if (ExitingBlock)
+ TripMultiple = SE.getSmallConstantTripMultiple(L, ExitingBlock);
}
// If the loop contains a convergent operation, the prelude we'd add
; CHECK-NEXT: store i64 -8661621401413125213, i64* [[A2_1]], align 8
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
-; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[START:%.*]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ]
-; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV]], 2
-; CHECK-NEXT: br i1 [[EXITCOND]], label [[EXIT:%.*]], label [[LATCH]]
+; CHECK-NEXT: br label [[LATCH:%.*]]
; CHECK: latch:
-; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
-; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds [2 x i64], [2 x i64]* [[A1]], i64 0, i64 [[IV]]
-; CHECK-NEXT: [[GEP2:%.*]] = getelementptr inbounds [2 x i64], [2 x i64]* [[A2]], i64 0, i64 [[IV]]
+; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds [2 x i64], [2 x i64]* [[A1]], i64 0, i64 0
+; CHECK-NEXT: [[GEP2:%.*]] = getelementptr inbounds [2 x i64], [2 x i64]* [[A2]], i64 0, i64 0
; CHECK-NEXT: [[LOAD1:%.*]] = load i64, i64* [[GEP1]], align 8
; CHECK-NEXT: [[LOAD2:%.*]] = load i64, i64* [[GEP2]], align 8
; CHECK-NEXT: [[EXITCOND2:%.*]] = icmp eq i64 [[LOAD1]], [[LOAD2]]
-; CHECK-NEXT: br i1 [[EXITCOND2]], label [[LOOP]], label [[EXIT]]
+; CHECK-NEXT: br i1 [[EXITCOND2]], label [[LOOP_1:%.*]], label [[EXIT:%.*]]
; CHECK: exit:
-; CHECK-NEXT: [[EXIT_VAL:%.*]] = phi i1 [ false, [[LATCH]] ], [ true, [[LOOP]] ]
+; CHECK-NEXT: [[EXIT_VAL:%.*]] = phi i1 [ false, [[LATCH]] ], [ false, [[LATCH_1:%.*]] ], [ true, [[LOOP_2:%.*]] ], [ false, [[LATCH_2:%.*]] ]
; CHECK-NEXT: ret i1 [[EXIT_VAL]]
+; CHECK: loop.1:
+; CHECK-NEXT: br label [[LATCH_1]]
+; CHECK: latch.1:
+; CHECK-NEXT: [[GEP1_1:%.*]] = getelementptr inbounds [2 x i64], [2 x i64]* [[A1]], i64 0, i64 1
+; CHECK-NEXT: [[GEP2_1:%.*]] = getelementptr inbounds [2 x i64], [2 x i64]* [[A2]], i64 0, i64 1
+; CHECK-NEXT: [[LOAD1_1:%.*]] = load i64, i64* [[GEP1_1]], align 8
+; CHECK-NEXT: [[LOAD2_1:%.*]] = load i64, i64* [[GEP2_1]], align 8
+; CHECK-NEXT: [[EXITCOND2_1:%.*]] = icmp eq i64 [[LOAD1_1]], [[LOAD2_1]]
+; CHECK-NEXT: br i1 [[EXITCOND2_1]], label [[LOOP_2]], label [[EXIT]]
+; CHECK: loop.2:
+; CHECK-NEXT: br i1 true, label [[EXIT]], label [[LATCH_2]]
+; CHECK: latch.2:
+; CHECK-NEXT: br label [[EXIT]]
;
start:
%a1 = alloca [2 x i64], align 8
ret void
}
-; TODO: We should fully unroll this by 10, leave the unrolled latch
+; Fully unroll this loop by 10, but leave the unrolled latch
; tests since we don't know if %N < 10, and break the backedge.
define void @test2(i64 %N) {
; CHECK-LABEL: @test2(
; CHECK-NEXT: entry:
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
-; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ]
-; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
; CHECK-NEXT: call void @bar()
-; CHECK-NEXT: [[CMP1:%.*]] = icmp ule i64 [[IV]], 10
-; CHECK-NEXT: br i1 [[CMP1]], label [[LATCH]], label [[EXIT:%.*]]
+; CHECK-NEXT: br label [[LATCH:%.*]]
; CHECK: latch:
; CHECK-NEXT: call void @bar()
-; CHECK-NEXT: [[CMP2:%.*]] = icmp ule i64 [[IV]], [[N:%.*]]
-; CHECK-NEXT: br i1 [[CMP2]], label [[LOOP]], label [[EXIT]]
+; CHECK-NEXT: br i1 true, label [[LOOP_1:%.*]], label [[EXIT:%.*]]
; CHECK: exit:
; CHECK-NEXT: ret void
+; CHECK: loop.1:
+; CHECK-NEXT: call void @bar()
+; CHECK-NEXT: br label [[LATCH_1:%.*]]
+; CHECK: latch.1:
+; CHECK-NEXT: call void @bar()
+; CHECK-NEXT: [[CMP2_1:%.*]] = icmp ule i64 1, [[N:%.*]]
+; CHECK-NEXT: br i1 [[CMP2_1]], label [[LOOP_2:%.*]], label [[EXIT]]
+; CHECK: loop.2:
+; CHECK-NEXT: call void @bar()
+; CHECK-NEXT: br label [[LATCH_2:%.*]]
+; CHECK: latch.2:
+; CHECK-NEXT: call void @bar()
+; CHECK-NEXT: [[CMP2_2:%.*]] = icmp ule i64 2, [[N]]
+; CHECK-NEXT: br i1 [[CMP2_2]], label [[LOOP_3:%.*]], label [[EXIT]]
+; CHECK: loop.3:
+; CHECK-NEXT: call void @bar()
+; CHECK-NEXT: br label [[LATCH_3:%.*]]
+; CHECK: latch.3:
+; CHECK-NEXT: call void @bar()
+; CHECK-NEXT: [[CMP2_3:%.*]] = icmp ule i64 3, [[N]]
+; CHECK-NEXT: br i1 [[CMP2_3]], label [[LOOP_4:%.*]], label [[EXIT]]
+; CHECK: loop.4:
+; CHECK-NEXT: call void @bar()
+; CHECK-NEXT: br label [[LATCH_4:%.*]]
+; CHECK: latch.4:
+; CHECK-NEXT: call void @bar()
+; CHECK-NEXT: [[CMP2_4:%.*]] = icmp ule i64 4, [[N]]
+; CHECK-NEXT: br i1 [[CMP2_4]], label [[LOOP_5:%.*]], label [[EXIT]]
+; CHECK: loop.5:
+; CHECK-NEXT: call void @bar()
+; CHECK-NEXT: br label [[LATCH_5:%.*]]
+; CHECK: latch.5:
+; CHECK-NEXT: call void @bar()
+; CHECK-NEXT: [[CMP2_5:%.*]] = icmp ule i64 5, [[N]]
+; CHECK-NEXT: br i1 [[CMP2_5]], label [[LOOP_6:%.*]], label [[EXIT]]
+; CHECK: loop.6:
+; CHECK-NEXT: call void @bar()
+; CHECK-NEXT: br label [[LATCH_6:%.*]]
+; CHECK: latch.6:
+; CHECK-NEXT: call void @bar()
+; CHECK-NEXT: [[CMP2_6:%.*]] = icmp ule i64 6, [[N]]
+; CHECK-NEXT: br i1 [[CMP2_6]], label [[LOOP_7:%.*]], label [[EXIT]]
+; CHECK: loop.7:
+; CHECK-NEXT: call void @bar()
+; CHECK-NEXT: br label [[LATCH_7:%.*]]
+; CHECK: latch.7:
+; CHECK-NEXT: call void @bar()
+; CHECK-NEXT: [[CMP2_7:%.*]] = icmp ule i64 7, [[N]]
+; CHECK-NEXT: br i1 [[CMP2_7]], label [[LOOP_8:%.*]], label [[EXIT]]
+; CHECK: loop.8:
+; CHECK-NEXT: call void @bar()
+; CHECK-NEXT: br label [[LATCH_8:%.*]]
+; CHECK: latch.8:
+; CHECK-NEXT: call void @bar()
+; CHECK-NEXT: [[CMP2_8:%.*]] = icmp ule i64 8, [[N]]
+; CHECK-NEXT: br i1 [[CMP2_8]], label [[LOOP_9:%.*]], label [[EXIT]]
+; CHECK: loop.9:
+; CHECK-NEXT: call void @bar()
+; CHECK-NEXT: br label [[LATCH_9:%.*]]
+; CHECK: latch.9:
+; CHECK-NEXT: call void @bar()
+; CHECK-NEXT: [[CMP2_9:%.*]] = icmp ule i64 9, [[N]]
+; CHECK-NEXT: br i1 [[CMP2_9]], label [[LOOP_10:%.*]], label [[EXIT]]
+; CHECK: loop.10:
+; CHECK-NEXT: call void @bar()
+; CHECK-NEXT: br label [[LATCH_10:%.*]]
+; CHECK: latch.10:
+; CHECK-NEXT: call void @bar()
+; CHECK-NEXT: [[CMP2_10:%.*]] = icmp ule i64 10, [[N]]
+; CHECK-NEXT: br i1 [[CMP2_10]], label [[LOOP_11:%.*]], label [[EXIT]]
+; CHECK: loop.11:
+; CHECK-NEXT: call void @bar()
+; CHECK-NEXT: br i1 false, label [[LATCH_11:%.*]], label [[EXIT]]
+; CHECK: latch.11:
+; CHECK-NEXT: call void @bar()
+; CHECK-NEXT: br label [[EXIT]]
;
entry:
br label %loop
; CHECK-NEXT: start:
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
-; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[START:%.*]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ]
-; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV]], 24
-; CHECK-NEXT: br i1 [[EXITCOND]], label [[EXIT:%.*]], label [[LATCH]]
+; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[START:%.*]] ], [ [[IV_NEXT_4:%.*]], [[LATCH_4:%.*]] ]
+; CHECK-NEXT: br label [[LATCH:%.*]]
; CHECK: latch:
-; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT: [[IV_NEXT:%.*]] = add nuw nsw i64 [[IV]], 1
; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds i64, i64* [[A1:%.*]], i64 [[IV]]
; CHECK-NEXT: [[GEP2:%.*]] = getelementptr inbounds i64, i64* [[A2:%.*]], i64 [[IV]]
; CHECK-NEXT: [[LOAD1:%.*]] = load i64, i64* [[GEP1]], align 8
; CHECK-NEXT: [[LOAD2:%.*]] = load i64, i64* [[GEP2]], align 8
; CHECK-NEXT: [[EXITCOND2:%.*]] = icmp eq i64 [[LOAD1]], [[LOAD2]]
-; CHECK-NEXT: br i1 [[EXITCOND2]], label [[LOOP]], label [[EXIT]]
+; CHECK-NEXT: br i1 [[EXITCOND2]], label [[LOOP_1:%.*]], label [[EXIT:%.*]]
; CHECK: exit:
-; CHECK-NEXT: [[EXIT_VAL:%.*]] = phi i1 [ false, [[LATCH]] ], [ true, [[LOOP]] ]
+; CHECK-NEXT: [[EXIT_VAL:%.*]] = phi i1 [ false, [[LATCH]] ], [ false, [[LATCH_1:%.*]] ], [ false, [[LATCH_2:%.*]] ], [ false, [[LATCH_3:%.*]] ], [ true, [[LOOP_4:%.*]] ], [ false, [[LATCH_4]] ]
; CHECK-NEXT: ret i1 [[EXIT_VAL]]
+; CHECK: loop.1:
+; CHECK-NEXT: br label [[LATCH_1]]
+; CHECK: latch.1:
+; CHECK-NEXT: [[IV_NEXT_1:%.*]] = add nuw nsw i64 [[IV_NEXT]], 1
+; CHECK-NEXT: [[GEP1_1:%.*]] = getelementptr inbounds i64, i64* [[A1]], i64 [[IV_NEXT]]
+; CHECK-NEXT: [[GEP2_1:%.*]] = getelementptr inbounds i64, i64* [[A2]], i64 [[IV_NEXT]]
+; CHECK-NEXT: [[LOAD1_1:%.*]] = load i64, i64* [[GEP1_1]], align 8
+; CHECK-NEXT: [[LOAD2_1:%.*]] = load i64, i64* [[GEP2_1]], align 8
+; CHECK-NEXT: [[EXITCOND2_1:%.*]] = icmp eq i64 [[LOAD1_1]], [[LOAD2_1]]
+; CHECK-NEXT: br i1 [[EXITCOND2_1]], label [[LOOP_2:%.*]], label [[EXIT]]
+; CHECK: loop.2:
+; CHECK-NEXT: br label [[LATCH_2]]
+; CHECK: latch.2:
+; CHECK-NEXT: [[IV_NEXT_2:%.*]] = add nuw nsw i64 [[IV_NEXT_1]], 1
+; CHECK-NEXT: [[GEP1_2:%.*]] = getelementptr inbounds i64, i64* [[A1]], i64 [[IV_NEXT_1]]
+; CHECK-NEXT: [[GEP2_2:%.*]] = getelementptr inbounds i64, i64* [[A2]], i64 [[IV_NEXT_1]]
+; CHECK-NEXT: [[LOAD1_2:%.*]] = load i64, i64* [[GEP1_2]], align 8
+; CHECK-NEXT: [[LOAD2_2:%.*]] = load i64, i64* [[GEP2_2]], align 8
+; CHECK-NEXT: [[EXITCOND2_2:%.*]] = icmp eq i64 [[LOAD1_2]], [[LOAD2_2]]
+; CHECK-NEXT: br i1 [[EXITCOND2_2]], label [[LOOP_3:%.*]], label [[EXIT]]
+; CHECK: loop.3:
+; CHECK-NEXT: br label [[LATCH_3]]
+; CHECK: latch.3:
+; CHECK-NEXT: [[IV_NEXT_3:%.*]] = add nuw nsw i64 [[IV_NEXT_2]], 1
+; CHECK-NEXT: [[GEP1_3:%.*]] = getelementptr inbounds i64, i64* [[A1]], i64 [[IV_NEXT_2]]
+; CHECK-NEXT: [[GEP2_3:%.*]] = getelementptr inbounds i64, i64* [[A2]], i64 [[IV_NEXT_2]]
+; CHECK-NEXT: [[LOAD1_3:%.*]] = load i64, i64* [[GEP1_3]], align 8
+; CHECK-NEXT: [[LOAD2_3:%.*]] = load i64, i64* [[GEP2_3]], align 8
+; CHECK-NEXT: [[EXITCOND2_3:%.*]] = icmp eq i64 [[LOAD1_3]], [[LOAD2_3]]
+; CHECK-NEXT: br i1 [[EXITCOND2_3]], label [[LOOP_4]], label [[EXIT]]
+; CHECK: loop.4:
+; CHECK-NEXT: [[EXITCOND_4:%.*]] = icmp eq i64 [[IV_NEXT_3]], 24
+; CHECK-NEXT: br i1 [[EXITCOND_4]], label [[EXIT]], label [[LATCH_4]]
+; CHECK: latch.4:
+; CHECK-NEXT: [[IV_NEXT_4]] = add nuw nsw i64 [[IV_NEXT_3]], 1
+; CHECK-NEXT: [[GEP1_4:%.*]] = getelementptr inbounds i64, i64* [[A1]], i64 [[IV_NEXT_3]]
+; CHECK-NEXT: [[GEP2_4:%.*]] = getelementptr inbounds i64, i64* [[A2]], i64 [[IV_NEXT_3]]
+; CHECK-NEXT: [[LOAD1_4:%.*]] = load i64, i64* [[GEP1_4]], align 8
+; CHECK-NEXT: [[LOAD2_4:%.*]] = load i64, i64* [[GEP2_4]], align 8
+; CHECK-NEXT: [[EXITCOND2_4:%.*]] = icmp eq i64 [[LOAD1_4]], [[LOAD2_4]]
+; CHECK-NEXT: br i1 [[EXITCOND2_4]], label [[LOOP]], label [[EXIT]]
;
start:
br label %loop
; CHECK-LABEL: @test(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[N2:%.*]] = add i32 [[S:%.*]], 123
-; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[N:%.*]], 1
-; CHECK-NEXT: [[TMP1:%.*]] = sub i32 [[TMP0]], [[S]]
-; CHECK-NEXT: [[TMP2:%.*]] = sub i32 [[N]], [[S]]
-; CHECK-NEXT: [[XTRAITER:%.*]] = and i32 [[TMP1]], 7
-; CHECK-NEXT: [[LCMP_MOD:%.*]] = icmp ne i32 [[XTRAITER]], 0
-; CHECK-NEXT: br i1 [[LCMP_MOD]], label [[LOOP_PROL_PREHEADER:%.*]], label [[LOOP_PROL_LOOPEXIT:%.*]]
-; CHECK: loop.prol.preheader:
-; CHECK-NEXT: br label [[LOOP_PROL:%.*]]
-; CHECK: loop.prol:
-; CHECK-NEXT: [[I_PROL:%.*]] = phi i32 [ [[S]], [[LOOP_PROL_PREHEADER]] ], [ [[I_INC_PROL:%.*]], [[LATCH_PROL:%.*]] ]
-; CHECK-NEXT: [[PROL_ITER:%.*]] = phi i32 [ [[XTRAITER]], [[LOOP_PROL_PREHEADER]] ], [ [[PROL_ITER_SUB:%.*]], [[LATCH_PROL]] ]
-; CHECK-NEXT: [[C1_PROL:%.*]] = icmp eq i32 [[I_PROL]], [[N2]]
-; CHECK-NEXT: br i1 [[C1_PROL]], label [[EXIT1_LOOPEXIT1:%.*]], label [[LATCH_PROL]]
-; CHECK: latch.prol:
-; CHECK-NEXT: [[C2_PROL:%.*]] = icmp eq i32 [[I_PROL]], [[N]]
-; CHECK-NEXT: [[I_INC_PROL]] = add i32 [[I_PROL]], 1
-; CHECK-NEXT: [[PROL_ITER_SUB]] = sub i32 [[PROL_ITER]], 1
-; CHECK-NEXT: [[PROL_ITER_CMP:%.*]] = icmp ne i32 [[PROL_ITER_SUB]], 0
-; CHECK-NEXT: br i1 [[PROL_ITER_CMP]], label [[LOOP_PROL]], label [[LOOP_PROL_LOOPEXIT_UNR_LCSSA:%.*]], !llvm.loop [[LOOP0:![0-9]+]]
-; CHECK: loop.prol.loopexit.unr-lcssa:
-; CHECK-NEXT: [[I_UNR_PH:%.*]] = phi i32 [ [[I_INC_PROL]], [[LATCH_PROL]] ]
-; CHECK-NEXT: br label [[LOOP_PROL_LOOPEXIT]]
-; CHECK: loop.prol.loopexit:
-; CHECK-NEXT: [[I_UNR:%.*]] = phi i32 [ [[S]], [[ENTRY:%.*]] ], [ [[I_UNR_PH]], [[LOOP_PROL_LOOPEXIT_UNR_LCSSA]] ]
-; CHECK-NEXT: [[TMP3:%.*]] = icmp ult i32 [[TMP2]], 7
-; CHECK-NEXT: br i1 [[TMP3]], label [[EXIT2:%.*]], label [[ENTRY_NEW:%.*]]
-; CHECK: entry.new:
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
-; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_UNR]], [[ENTRY_NEW]] ], [ [[I_INC_7:%.*]], [[LATCH_7:%.*]] ]
+; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[S]], [[ENTRY:%.*]] ], [ [[I_INC:%.*]], [[LATCH:%.*]] ]
; CHECK-NEXT: [[C1:%.*]] = icmp eq i32 [[I]], [[N2]]
-; CHECK-NEXT: br i1 [[C1]], label [[EXIT1_LOOPEXIT:%.*]], label [[LATCH:%.*]]
+; CHECK-NEXT: br i1 [[C1]], label [[EXIT1:%.*]], label [[LATCH]]
; CHECK: latch:
-; CHECK-NEXT: [[I_INC:%.*]] = add i32 [[I]], 1
-; CHECK-NEXT: [[C1_1:%.*]] = icmp eq i32 [[I_INC]], [[N2]]
-; CHECK-NEXT: br i1 [[C1_1]], label [[EXIT1_LOOPEXIT]], label [[LATCH_1:%.*]]
-; CHECK: exit1.loopexit:
-; CHECK-NEXT: br label [[EXIT1:%.*]]
-; CHECK: exit1.loopexit1:
-; CHECK-NEXT: br label [[EXIT1]]
+; CHECK-NEXT: [[C2:%.*]] = icmp eq i32 [[I]], [[N:%.*]]
+; CHECK-NEXT: [[I_INC]] = add i32 [[I]], 1
+; CHECK-NEXT: br i1 [[C2]], label [[EXIT2:%.*]], label [[LOOP]]
; CHECK: exit1:
; CHECK-NEXT: ret void
-; CHECK: exit2.unr-lcssa:
-; CHECK-NEXT: br label [[EXIT2]]
; CHECK: exit2:
; CHECK-NEXT: ret void
-; CHECK: latch.1:
-; CHECK-NEXT: [[I_INC_1:%.*]] = add i32 [[I_INC]], 1
-; CHECK-NEXT: [[C1_2:%.*]] = icmp eq i32 [[I_INC_1]], [[N2]]
-; CHECK-NEXT: br i1 [[C1_2]], label [[EXIT1_LOOPEXIT]], label [[LATCH_2:%.*]]
-; CHECK: latch.2:
-; CHECK-NEXT: [[I_INC_2:%.*]] = add i32 [[I_INC_1]], 1
-; CHECK-NEXT: [[C1_3:%.*]] = icmp eq i32 [[I_INC_2]], [[N2]]
-; CHECK-NEXT: br i1 [[C1_3]], label [[EXIT1_LOOPEXIT]], label [[LATCH_3:%.*]]
-; CHECK: latch.3:
-; CHECK-NEXT: [[I_INC_3:%.*]] = add i32 [[I_INC_2]], 1
-; CHECK-NEXT: [[C1_4:%.*]] = icmp eq i32 [[I_INC_3]], [[N2]]
-; CHECK-NEXT: br i1 [[C1_4]], label [[EXIT1_LOOPEXIT]], label [[LATCH_4:%.*]]
-; CHECK: latch.4:
-; CHECK-NEXT: [[I_INC_4:%.*]] = add i32 [[I_INC_3]], 1
-; CHECK-NEXT: [[C1_5:%.*]] = icmp eq i32 [[I_INC_4]], [[N2]]
-; CHECK-NEXT: br i1 [[C1_5]], label [[EXIT1_LOOPEXIT]], label [[LATCH_5:%.*]]
-; CHECK: latch.5:
-; CHECK-NEXT: [[I_INC_5:%.*]] = add i32 [[I_INC_4]], 1
-; CHECK-NEXT: [[C1_6:%.*]] = icmp eq i32 [[I_INC_5]], [[N2]]
-; CHECK-NEXT: br i1 [[C1_6]], label [[EXIT1_LOOPEXIT]], label [[LATCH_6:%.*]]
-; CHECK: latch.6:
-; CHECK-NEXT: [[I_INC_6:%.*]] = add i32 [[I_INC_5]], 1
-; CHECK-NEXT: [[C1_7:%.*]] = icmp eq i32 [[I_INC_6]], [[N2]]
-; CHECK-NEXT: br i1 [[C1_7]], label [[EXIT1_LOOPEXIT]], label [[LATCH_7]]
-; CHECK: latch.7:
-; CHECK-NEXT: [[C2_7:%.*]] = icmp eq i32 [[I_INC_6]], [[N]]
-; CHECK-NEXT: [[I_INC_7]] = add i32 [[I_INC_6]], 1
-; CHECK-NEXT: br i1 [[C2_7]], label [[EXIT2_UNR_LCSSA:%.*]], label [[LOOP]]
;
entry:
%n2 = add i32 %s, 123
; SCEV unrolling properly handles loops with multiple exits. In this
; case, the computed trip count based on a canonical IV is *not* for a
-; latch block. Canonical unrolling incorrectly unrolls it, but SCEV
-; unrolling does not.
+; latch block.
define i64 @earlyLoopTest(i64* %base) nounwind {
; CHECK-LABEL: @earlyLoopTest(
; CHECK-NEXT: entry:
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
-; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[TAIL:%.*]] ]
-; CHECK-NEXT: [[S:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[S_NEXT:%.*]], [[TAIL]] ]
-; CHECK-NEXT: [[ADR:%.*]] = getelementptr i64, i64* [[BASE:%.*]], i64 [[IV]]
-; CHECK-NEXT: [[VAL:%.*]] = load i64, i64* [[ADR]], align 4
-; CHECK-NEXT: [[S_NEXT]] = add i64 [[S]], [[VAL]]
-; CHECK-NEXT: [[INC]] = add nuw nsw i64 [[IV]], 1
-; CHECK-NEXT: [[CMP:%.*]] = icmp ne i64 [[INC]], 4
-; CHECK-NEXT: br i1 [[CMP]], label [[TAIL]], label [[EXIT1:%.*]]
+; CHECK-NEXT: [[VAL:%.*]] = load i64, i64* [[BASE:%.*]], align 4
+; CHECK-NEXT: br label [[TAIL:%.*]]
; CHECK: tail:
; CHECK-NEXT: [[CMP2:%.*]] = icmp ne i64 [[VAL]], 0
-; CHECK-NEXT: br i1 [[CMP2]], label [[LOOP]], label [[EXIT2:%.*]]
+; CHECK-NEXT: br i1 [[CMP2]], label [[LOOP_1:%.*]], label [[EXIT2:%.*]]
; CHECK: exit1:
-; CHECK-NEXT: [[S_LCSSA:%.*]] = phi i64 [ [[S]], [[LOOP]] ]
+; CHECK-NEXT: [[S_LCSSA:%.*]] = phi i64 [ [[S_NEXT_2:%.*]], [[LOOP_3:%.*]] ]
; CHECK-NEXT: ret i64 [[S_LCSSA]]
; CHECK: exit2:
-; CHECK-NEXT: [[S_NEXT_LCSSA1:%.*]] = phi i64 [ [[S_NEXT]], [[TAIL]] ]
+; CHECK-NEXT: [[S_NEXT_LCSSA1:%.*]] = phi i64 [ [[VAL]], [[TAIL]] ], [ [[S_NEXT_1:%.*]], [[TAIL_1:%.*]] ], [ [[S_NEXT_2]], [[TAIL_2:%.*]] ], [ [[S_NEXT_3:%.*]], [[TAIL_3:%.*]] ]
; CHECK-NEXT: ret i64 [[S_NEXT_LCSSA1]]
+; CHECK: loop.1:
+; CHECK-NEXT: [[ADR_1:%.*]] = getelementptr i64, i64* [[BASE]], i64 1
+; CHECK-NEXT: [[VAL_1:%.*]] = load i64, i64* [[ADR_1]], align 4
+; CHECK-NEXT: [[S_NEXT_1]] = add i64 [[VAL]], [[VAL_1]]
+; CHECK-NEXT: br label [[TAIL_1]]
+; CHECK: tail.1:
+; CHECK-NEXT: [[CMP2_1:%.*]] = icmp ne i64 [[VAL_1]], 0
+; CHECK-NEXT: br i1 [[CMP2_1]], label [[LOOP_2:%.*]], label [[EXIT2]]
+; CHECK: loop.2:
+; CHECK-NEXT: [[ADR_2:%.*]] = getelementptr i64, i64* [[BASE]], i64 2
+; CHECK-NEXT: [[VAL_2:%.*]] = load i64, i64* [[ADR_2]], align 4
+; CHECK-NEXT: [[S_NEXT_2]] = add i64 [[S_NEXT_1]], [[VAL_2]]
+; CHECK-NEXT: br label [[TAIL_2]]
+; CHECK: tail.2:
+; CHECK-NEXT: [[CMP2_2:%.*]] = icmp ne i64 [[VAL_2]], 0
+; CHECK-NEXT: br i1 [[CMP2_2]], label [[LOOP_3]], label [[EXIT2]]
+; CHECK: loop.3:
+; CHECK-NEXT: [[ADR_3:%.*]] = getelementptr i64, i64* [[BASE]], i64 3
+; CHECK-NEXT: [[VAL_3:%.*]] = load i64, i64* [[ADR_3]], align 4
+; CHECK-NEXT: [[S_NEXT_3]] = add i64 [[S_NEXT_2]], [[VAL_3]]
+; CHECK-NEXT: br i1 false, label [[TAIL_3]], label [[EXIT1:%.*]]
+; CHECK: tail.3:
+; CHECK-NEXT: br label [[EXIT2]]
;
entry:
br label %loop
; CHECK-NEXT: entry:
; CHECK-NEXT: br label [[L1:%.*]]
; CHECK: l1:
-; CHECK-NEXT: [[IV1:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC1:%.*]], [[L2:%.*]] ]
-; CHECK-NEXT: [[INC1]] = add nuw nsw i32 [[IV1]], 1
-; CHECK-NEXT: [[ADR:%.*]] = getelementptr i32, i32* [[BASE:%.*]], i32 [[IV1]]
-; CHECK-NEXT: [[VAL:%.*]] = load i32, i32* [[ADR]], align 4
-; CHECK-NEXT: br i1 false, label [[L2]], label [[EXIT1:%.*]]
+; CHECK-NEXT: [[VAL:%.*]] = load i32, i32* [[BASE:%.*]], align 4
+; CHECK-NEXT: br i1 false, label [[L2:%.*]], label [[EXIT1:%.*]]
; CHECK: l2:
-; CHECK-NEXT: br i1 true, label [[L1]], label [[EXIT2:%.*]]
+; CHECK-NEXT: ret i32 [[VAL]]
; CHECK: exit1:
; CHECK-NEXT: ret i32 1
-; CHECK: exit2:
-; CHECK-NEXT: [[VAL_LCSSA1:%.*]] = phi i32 [ [[VAL]], [[L2]] ]
-; CHECK-NEXT: ret i32 [[VAL_LCSSA1]]
;
entry:
br label %l1
}
-; SCEV should not unroll a multi-exit loops unless the latch block has
-; a known trip count, regardless of the early exit trip counts. The
-; LoopUnroll utility uses this assumption to optimize the latch
-; block's branch.
+; SCEV can unroll a multi-exit loops even if the latch block has no
+; known trip count, but an early exit has a known trip count. In this
+; case we must be careful not to optimize the latch branch away.
define i32 @multiExitIncomplete(i32* %base) nounwind {
; CHECK-LABEL: @multiExitIncomplete(
; CHECK-NEXT: entry:
; CHECK-NEXT: br label [[L1:%.*]]
; CHECK: l1:
-; CHECK-NEXT: [[IV1:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC1:%.*]], [[L3:%.*]] ]
-; CHECK-NEXT: [[INC1]] = add nuw i32 [[IV1]], 1
-; CHECK-NEXT: [[ADR:%.*]] = getelementptr i32, i32* [[BASE:%.*]], i32 [[IV1]]
-; CHECK-NEXT: [[VAL:%.*]] = load i32, i32* [[ADR]], align 4
-; CHECK-NEXT: [[CMP1:%.*]] = icmp ult i32 [[IV1]], 5
-; CHECK-NEXT: br i1 [[CMP1]], label [[L2:%.*]], label [[EXIT1:%.*]]
+; CHECK-NEXT: [[VAL:%.*]] = load i32, i32* [[BASE:%.*]], align 4
+; CHECK-NEXT: br label [[L2:%.*]]
; CHECK: l2:
-; CHECK-NEXT: br i1 true, label [[L3]], label [[EXIT2:%.*]]
+; CHECK-NEXT: br label [[L3:%.*]]
; CHECK: l3:
; CHECK-NEXT: [[CMP3:%.*]] = icmp ne i32 [[VAL]], 0
-; CHECK-NEXT: br i1 [[CMP3]], label [[L1]], label [[EXIT3:%.*]]
+; CHECK-NEXT: br i1 [[CMP3]], label [[L1_1:%.*]], label [[EXIT3:%.*]]
; CHECK: exit1:
; CHECK-NEXT: ret i32 1
; CHECK: exit2:
; CHECK-NEXT: ret i32 2
; CHECK: exit3:
; CHECK-NEXT: ret i32 3
+; CHECK: l1.1:
+; CHECK-NEXT: [[ADR_1:%.*]] = getelementptr i32, i32* [[BASE]], i32 1
+; CHECK-NEXT: [[VAL_1:%.*]] = load i32, i32* [[ADR_1]], align 4
+; CHECK-NEXT: br label [[L2_1:%.*]]
+; CHECK: l2.1:
+; CHECK-NEXT: br label [[L3_1:%.*]]
+; CHECK: l3.1:
+; CHECK-NEXT: [[CMP3_1:%.*]] = icmp ne i32 [[VAL_1]], 0
+; CHECK-NEXT: br i1 [[CMP3_1]], label [[L1_2:%.*]], label [[EXIT3]]
+; CHECK: l1.2:
+; CHECK-NEXT: [[ADR_2:%.*]] = getelementptr i32, i32* [[BASE]], i32 2
+; CHECK-NEXT: [[VAL_2:%.*]] = load i32, i32* [[ADR_2]], align 4
+; CHECK-NEXT: br label [[L2_2:%.*]]
+; CHECK: l2.2:
+; CHECK-NEXT: br label [[L3_2:%.*]]
+; CHECK: l3.2:
+; CHECK-NEXT: [[CMP3_2:%.*]] = icmp ne i32 [[VAL_2]], 0
+; CHECK-NEXT: br i1 [[CMP3_2]], label [[L1_3:%.*]], label [[EXIT3]]
+; CHECK: l1.3:
+; CHECK-NEXT: [[ADR_3:%.*]] = getelementptr i32, i32* [[BASE]], i32 3
+; CHECK-NEXT: [[VAL_3:%.*]] = load i32, i32* [[ADR_3]], align 4
+; CHECK-NEXT: br label [[L2_3:%.*]]
+; CHECK: l2.3:
+; CHECK-NEXT: br label [[L3_3:%.*]]
+; CHECK: l3.3:
+; CHECK-NEXT: [[CMP3_3:%.*]] = icmp ne i32 [[VAL_3]], 0
+; CHECK-NEXT: br i1 [[CMP3_3]], label [[L1_4:%.*]], label [[EXIT3]]
+; CHECK: l1.4:
+; CHECK-NEXT: [[ADR_4:%.*]] = getelementptr i32, i32* [[BASE]], i32 4
+; CHECK-NEXT: [[VAL_4:%.*]] = load i32, i32* [[ADR_4]], align 4
+; CHECK-NEXT: br label [[L2_4:%.*]]
+; CHECK: l2.4:
+; CHECK-NEXT: br label [[L3_4:%.*]]
+; CHECK: l3.4:
+; CHECK-NEXT: [[CMP3_4:%.*]] = icmp ne i32 [[VAL_4]], 0
+; CHECK-NEXT: br i1 [[CMP3_4]], label [[L1_5:%.*]], label [[EXIT3]]
+; CHECK: l1.5:
+; CHECK-NEXT: br i1 false, label [[L2_5:%.*]], label [[EXIT1:%.*]]
+; CHECK: l2.5:
+; CHECK-NEXT: br i1 true, label [[L3_5:%.*]], label [[EXIT2:%.*]]
+; CHECK: l3.5:
+; CHECK-NEXT: br label [[EXIT3]]
;
entry:
br label %l1
; CHECK-NEXT: entry:
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
-; CHECK-NEXT: [[B_03:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[ADD:%.*]], [[FOR_COND:%.*]] ]
-; CHECK-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[B_03]], 0
-; CHECK-NEXT: [[ADD]] = add nuw nsw i32 [[B_03]], 8
-; CHECK-NEXT: br i1 [[TOBOOL]], label [[FOR_COND]], label [[RETURN:%.*]]
+; CHECK-NEXT: br label [[FOR_COND:%.*]]
; CHECK: for.cond:
-; CHECK-NEXT: br i1 false, label [[RETURN]], label [[FOR_BODY]]
+; CHECK-NEXT: br i1 false, label [[RETURN:%.*]], label [[FOR_BODY_1:%.*]]
; CHECK: return:
-; CHECK-NEXT: [[B_03_LCSSA:%.*]] = phi i32 [ 8, [[FOR_BODY]] ], [ 0, [[FOR_COND]] ]
-; CHECK-NEXT: [[RETVAL_0:%.*]] = phi i32 [ 1, [[FOR_BODY]] ], [ 0, [[FOR_COND]] ]
+; CHECK-NEXT: [[B_03_LCSSA:%.*]] = phi i32 [ 0, [[FOR_COND]] ], [ 8, [[FOR_BODY_1]] ], [ 0, [[FOR_COND_1:%.*]] ]
+; CHECK-NEXT: [[RETVAL_0:%.*]] = phi i32 [ 0, [[FOR_COND]] ], [ 1, [[FOR_BODY_1]] ], [ 0, [[FOR_COND_1]] ]
; CHECK-NEXT: store i32 [[B_03_LCSSA]], i32* [[A:%.*]], align 4
; CHECK-NEXT: ret void
+; CHECK: for.body.1:
+; CHECK-NEXT: br i1 false, label [[FOR_COND_1]], label [[RETURN]]
+; CHECK: for.cond.1:
+; CHECK-NEXT: br label [[RETURN]]
;
entry:
br label %for.body
; CHECK: for.cond.i:
; CHECK-NEXT: br label [[FOR_COND_I]]
; CHECK: Proc2.exit:
-; CHECK-NEXT: br label [[FOR_COND31]]
+; CHECK-NEXT: unreachable
; CHECK: for.end94:
; CHECK-NEXT: ret void
;
; CHECK-NEXT: entry:
; CHECK-NEXT: br label [[HEADER:%.*]]
; CHECK: header:
-; CHECK-NEXT: [[RES:%.*]] = phi i16 [ 123, [[ENTRY:%.*]] ], [ [[RES_NEXT:%.*]], [[LATCH:%.*]] ]
-; CHECK-NEXT: [[I_0:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[INC9:%.*]], [[LATCH]] ]
-; CHECK-NEXT: [[PTR:%.*]] = getelementptr inbounds i16, i16* [[A:%.*]], i64 [[I_0]]
-; CHECK-NEXT: [[LV:%.*]] = load i16, i16* [[PTR]], align 2
-; CHECK-NEXT: [[RES_NEXT]] = add i16 [[RES]], [[LV]]
-; CHECK-NEXT: [[CMP:%.*]] = icmp ult i64 [[I_0]], 3
-; CHECK-NEXT: br i1 [[CMP]], label [[EXITING_1:%.*]], label [[EXIT:%.*]]
+; CHECK-NEXT: [[LV:%.*]] = load i16, i16* [[A:%.*]], align 2
+; CHECK-NEXT: [[RES_NEXT:%.*]] = add i16 123, [[LV]]
+; CHECK-NEXT: br label [[EXITING_1:%.*]]
; CHECK: exiting.1:
; CHECK-NEXT: [[EC_1:%.*]] = icmp eq i16 [[LV]], [[X:%.*]]
-; CHECK-NEXT: br i1 [[EC_1]], label [[EXIT]], label [[EXITING_2:%.*]]
+; CHECK-NEXT: br i1 [[EC_1]], label [[EXIT:%.*]], label [[EXITING_2:%.*]]
; CHECK: exiting.2:
; CHECK-NEXT: [[EC_2:%.*]] = icmp eq i16 [[LV]], [[Y:%.*]]
-; CHECK-NEXT: br i1 [[EC_2]], label [[EXIT]], label [[LATCH]]
+; CHECK-NEXT: br i1 [[EC_2]], label [[EXIT]], label [[LATCH:%.*]]
; CHECK: latch:
-; CHECK-NEXT: [[INC9]] = add i64 [[I_0]], 1
-; CHECK-NEXT: br label [[HEADER]]
+; CHECK-NEXT: [[PTR_1:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 1
+; CHECK-NEXT: [[LV_1:%.*]] = load i16, i16* [[PTR_1]], align 2
+; CHECK-NEXT: [[RES_NEXT_1:%.*]] = add i16 [[RES_NEXT]], [[LV_1]]
+; CHECK-NEXT: br label [[EXITING_1_1:%.*]]
; CHECK: exit:
-; CHECK-NEXT: [[RES_LCSSA:%.*]] = phi i16 [ [[RES_NEXT]], [[HEADER]] ], [ 0, [[EXITING_1]] ], [ 1, [[EXITING_2]] ]
+; CHECK-NEXT: [[RES_LCSSA:%.*]] = phi i16 [ 0, [[EXITING_1]] ], [ 1, [[EXITING_2]] ], [ 0, [[EXITING_1_1]] ], [ 1, [[EXITING_2_1:%.*]] ], [ 0, [[EXITING_1_2:%.*]] ], [ 1, [[EXITING_2_2:%.*]] ], [ [[RES_NEXT_3:%.*]], [[LATCH_2:%.*]] ], [ 0, [[EXITING_1_3:%.*]] ], [ 1, [[EXITING_2_3:%.*]] ]
; CHECK-NEXT: ret i16 [[RES_LCSSA]]
+; CHECK: exiting.1.1:
+; CHECK-NEXT: [[EC_1_1:%.*]] = icmp eq i16 [[LV_1]], [[X]]
+; CHECK-NEXT: br i1 [[EC_1_1]], label [[EXIT]], label [[EXITING_2_1]]
+; CHECK: exiting.2.1:
+; CHECK-NEXT: [[EC_2_1:%.*]] = icmp eq i16 [[LV_1]], [[Y]]
+; CHECK-NEXT: br i1 [[EC_2_1]], label [[EXIT]], label [[LATCH_1:%.*]]
+; CHECK: latch.1:
+; CHECK-NEXT: [[PTR_2:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 2
+; CHECK-NEXT: [[LV_2:%.*]] = load i16, i16* [[PTR_2]], align 2
+; CHECK-NEXT: [[RES_NEXT_2:%.*]] = add i16 [[RES_NEXT_1]], [[LV_2]]
+; CHECK-NEXT: br label [[EXITING_1_2]]
+; CHECK: exiting.1.2:
+; CHECK-NEXT: [[EC_1_2:%.*]] = icmp eq i16 [[LV_2]], [[X]]
+; CHECK-NEXT: br i1 [[EC_1_2]], label [[EXIT]], label [[EXITING_2_2]]
+; CHECK: exiting.2.2:
+; CHECK-NEXT: [[EC_2_2:%.*]] = icmp eq i16 [[LV_2]], [[Y]]
+; CHECK-NEXT: br i1 [[EC_2_2]], label [[EXIT]], label [[LATCH_2]]
+; CHECK: latch.2:
+; CHECK-NEXT: [[PTR_3:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 3
+; CHECK-NEXT: [[LV_3:%.*]] = load i16, i16* [[PTR_3]], align 2
+; CHECK-NEXT: [[RES_NEXT_3]] = add i16 [[RES_NEXT_2]], [[LV_3]]
+; CHECK-NEXT: br i1 false, label [[EXITING_1_3]], label [[EXIT]]
+; CHECK: exiting.1.3:
+; CHECK-NEXT: [[EC_1_3:%.*]] = icmp eq i16 [[LV_3]], [[X]]
+; CHECK-NEXT: br i1 [[EC_1_3]], label [[EXIT]], label [[EXITING_2_3]]
+; CHECK: exiting.2.3:
+; CHECK-NEXT: [[EC_2_3:%.*]] = icmp eq i16 [[LV_3]], [[Y]]
+; CHECK-NEXT: br i1 [[EC_2_3]], label [[EXIT]], label [[LATCH_3:%.*]]
+; CHECK: latch.3:
+; CHECK-NEXT: unreachable
;
entry:
br label %header