/// check is redundant and can be constant-folded away. The induction
/// variable is not required to be the canonical {0,+,1} induction variable.
Optional<Range> computeSafeIterationSpace(ScalarEvolution &SE,
- const SCEVAddRecExpr *IndVar) const;
+ const SCEVAddRecExpr *IndVar,
+ bool IsLatchSigned) const;
/// Parse out a set of inductive range checks from \p BI and append them to \p
/// Checks.
/// range, returns None.
Optional<InductiveRangeCheck::Range>
InductiveRangeCheck::computeSafeIterationSpace(
- ScalarEvolution &SE, const SCEVAddRecExpr *IndVar) const {
+ ScalarEvolution &SE, const SCEVAddRecExpr *IndVar,
+ bool IsLatchSigned) const {
// IndVar is of the form "A + B * I" (where "I" is the canonical induction
// variable, that may or may not exist as a real llvm::Value in the loop) and
// this inductive range check is a range check on the "C + D * I" ("C" is
//
// 0 <= M + 1 * IndVar < L given L >= 0 (i.e. N == 1)
//
- // The inequality is satisfied by -M <= IndVar < (L - M) [^1]. All additions
- // and subtractions are twos-complement wrapping and comparisons are signed.
- //
- // Proof:
- //
- // If there exists IndVar such that -M <= IndVar < (L - M) then it follows
- // that -M <= (-M + L) [== Eq. 1]. Since L >= 0, if (-M + L) sign-overflows
- // then (-M + L) < (-M). Hence by [Eq. 1], (-M + L) could not have
- // overflown.
- //
- // This means IndVar = t + (-M) for t in [0, L). Hence (IndVar + M) = t.
- // Hence 0 <= (IndVar + M) < L
-
- // [^1]: Note that the solution does _not_ apply if L < 0; consider values M =
- // 127, IndVar = 126 and L = -2 in an i8 world.
+ // Here L stands for upper limit of the safe iteration space.
+ // The inequality is satisfied by (0 - M) <= IndVar < (L - M). To avoid
+ // overflows when calculating (0 - M) and (L - M) we, depending on type of
+ // IV's iteration space, limit the calculations by borders of the iteration
+ // space. For example, if IndVar is unsigned, (0 - M) overflows for any M > 0.
+ // If we figured out that "anything greater than (-M) is safe", we strengthen
+ // this to "everything greater than 0 is safe", assuming that values between
+ // -M and 0 just do not exist in unsigned iteration space, and we don't want
+ // to deal with overflown values.
if (!IndVar->isAffine())
return None;
return None;
assert(!D->getValue()->isZero() && "Recurrence with zero step?");
+ unsigned BitWidth = cast<IntegerType>(IndVar->getType())->getBitWidth();
+ const SCEV *SIntMax = SE.getConstant(APInt::getSignedMaxValue(BitWidth));
+ // Substract Y from X so that it does not go through border of the IV
+ // iteration space. Mathematically, it is equivalent to:
+ //
+ // ClampedSubstract(X, Y) = min(max(X - Y, INT_MIN), INT_MAX). [1]
+ //
+ // In [1], 'X - Y' is a mathematical substraction (result is not bounded to
+ // any width of bit grid). But after we take min/max, the result is
+ // guaranteed to be within [INT_MIN, INT_MAX].
+ //
+ // In [1], INT_MAX and INT_MIN are respectively signed and unsigned max/min
+ // values, depending on type of latch condition that defines IV iteration
+ // space.
+ auto ClampedSubstract = [&](const SCEV *X, const SCEV *Y) {
+ assert(SE.isKnownNonNegative(X) &&
+ "We can only substract from values in [0; SINT_MAX]!");
+ if (IsLatchSigned) {
+ // X is a number from signed range, Y is interpreted as signed.
+ // Even if Y is SINT_MAX, (X - Y) does not reach SINT_MIN. So the only
+ // thing we should care about is that we didn't cross SINT_MAX.
+ // So, if Y is positive, we substract Y safely.
+ // Rule 1: Y > 0 ---> Y.
+ // If 0 <= -Y <= (SINT_MAX - X), we substract Y safely.
+ // Rule 2: Y >=s (X - SINT_MAX) ---> Y.
+ // If 0 <= (SINT_MAX - X) < -Y, we can only substract (X - SINT_MAX).
+ // Rule 3: Y <s (X - SINT_MAX) ---> (X - SINT_MAX).
+ // It gives us smax(Y, X - SINT_MAX) to substract in all cases.
+ const SCEV *XMinusSIntMax = SE.getMinusSCEV(X, SIntMax);
+ return SE.getMinusSCEV(X, SE.getSMaxExpr(Y, XMinusSIntMax));
+ } else
+ // X is a number from unsigned range, Y is interpreted as signed.
+ // Even if Y is SINT_MIN, (X - Y) does not reach UINT_MAX. So the only
+ // thing we should care about is that we didn't cross zero.
+ // So, if Y is negative, we substract Y safely.
+ // Rule 1: Y <s 0 ---> Y.
+ // If 0 <= Y <= X, we substract Y safely.
+ // Rule 2: Y <=s X ---> Y.
+ // If 0 <= X < Y, we should stop at 0 and can only substract X.
+ // Rule 3: Y >s X ---> X.
+ // It gives us smin(X, Y) to substract in all cases.
+ return SE.getMinusSCEV(X, SE.getSMinExpr(X, Y));
+ };
const SCEV *M = SE.getMinusSCEV(C, A);
- const SCEV *Begin = SE.getNegativeSCEV(M);
- const SCEV *UpperLimit = nullptr;
+ const SCEV *Zero = SE.getZero(M->getType());
+ const SCEV *Begin = ClampedSubstract(Zero, M);
+ const SCEV *L = nullptr;
// We strengthen "0 <= I" to "0 <= I < INT_SMAX" and "I < L" to "0 <= I < L".
// We can potentially do much better here.
- if (const SCEV *L = getEnd())
- UpperLimit = L;
+ if (const SCEV *EndLimit = getEnd())
+ L = EndLimit;
else {
assert(Kind == InductiveRangeCheck::RANGE_CHECK_LOWER && "invariant!");
- unsigned BitWidth = cast<IntegerType>(IndVar->getType())->getBitWidth();
- UpperLimit = SE.getConstant(APInt::getSignedMaxValue(BitWidth));
+ L = SIntMax;
}
-
- const SCEV *End = SE.getMinusSCEV(UpperLimit, M);
+ const SCEV *End = ClampedSubstract(L, M);
return InductiveRangeCheck::Range(Begin, End);
}
Instruction *ExprInsertPt = Preheader->getTerminator();
SmallVector<InductiveRangeCheck, 4> RangeChecksToEliminate;
- auto RangeIsNonNegative = [&](InductiveRangeCheck::Range &R) {
- return SE.isKnownNonNegative(R.getBegin()) &&
- SE.isKnownNonNegative(R.getEnd());
- };
// Basing on the type of latch predicate, we interpret the IV iteration range
// as signed or unsigned range. We use different min/max functions (signed or
// unsigned) when intersecting this range with safe iteration ranges implied
IRBuilder<> B(ExprInsertPt);
for (InductiveRangeCheck &IRC : RangeChecks) {
- auto Result = IRC.computeSafeIterationSpace(SE, IndVar);
+ auto Result = IRC.computeSafeIterationSpace(SE, IndVar,
+ LS.IsSignedPredicate);
if (Result.hasValue()) {
- // Intersecting a signed and an unsigned ranges may produce incorrect
- // results because we can use neither signed nor unsigned min/max for
- // reliably correct intersection if a range contains negative values
- // which are either actually negative or big positive. Intersection is
- // safe in two following cases:
- // 1. Both ranges are signed/unsigned, then we use signed/unsigned min/max
- // respectively for their intersection;
- // 2. IRC safe iteration space only contains values from [0, SINT_MAX].
- // The interpretation of these values is unambiguous.
- // We take the type of IV iteration range as a reference (we will
- // intersect it with the resulting range of all IRC's later in
- // calculateSubRanges). Only ranges of IRC of the same type are considered
- // for removal unless we prove that its range doesn't contain ambiguous
- // values.
- if (IRC.isSigned() != LS.IsSignedPredicate &&
- !RangeIsNonNegative(Result.getValue()))
- continue;
auto MaybeSafeIterRange =
IntersectRange(SE, SafeIterRange, Result.getValue());
if (MaybeSafeIterRange.hasValue()) {
--- /dev/null
+; RUN: opt -verify-loop-info -irce-print-changed-loops -irce -S < %s 2>&1 | FileCheck %s
+
+; Make sure we can eliminate range check with signed latch, unsigned IRC and
+; positive offset. The safe iteration space is:
+; No preloop,
+; %exit.mainloop.at = smax (0, -1 - smax(12 - %len, -102)).
+; Formula verification:
+; %len = 10
+; %exit.mainloop.at = 0
+; %len = 50
+; %exit.mainloop.at = 50 - 13 = 37.
+; %len = 100
+; %exit.mainloop.at = 100 - 13 = 87.
+; %len = 150
+; %exit.mainloop.at = 101.
+; %len = SINT_MAX
+; %exit.mainloop.at = 101
+
+define void @test_01(i32* %arr, i32* %a_len_ptr) #0 {
+
+; CHECK-LABEL: test_01(
+; CHECK-NOT: preloop
+; CHECK: entry:
+; CHECK-NEXT: %len = load i32, i32* %a_len_ptr, !range !0
+; CHECK-NEXT: [[SUB1:%[^ ]+]] = sub i32 12, %len
+; CHECK-NEXT: [[CMP1:%[^ ]+]] = icmp sgt i32 [[SUB1]], -102
+; CHECK-NEXT: [[SMAX:%[^ ]+]] = select i1 [[CMP1]], i32 [[SUB1]], i32 -102
+; CHECK-NEXT: [[SUB2:%[^ ]+]] = sub i32 -1, [[SMAX]]
+; CHECK-NEXT: [[CMP2:%[^ ]+]] = icmp sgt i32 [[SUB2]], 0
+; CHECK-NEXT: %exit.mainloop.at = select i1 [[CMP2]], i32 [[SUB2]], i32 0
+; CHECK-NEXT: [[GOTO_LOOP:%[^ ]+]] = icmp slt i32 0, %exit.mainloop.at
+; CHECK-NEXT: br i1 [[GOTO_LOOP]], label %loop.preheader, label %main.pseudo.exit
+; CHECK: loop
+; CHECK: br i1 true, label %in.bounds
+; CHECK: postloop:
+
+entry:
+ %len = load i32, i32* %a_len_ptr, !range !0
+ br label %loop
+
+loop:
+ %idx = phi i32 [ 0, %entry ], [ %idx.next, %in.bounds ]
+ %idx.next = add i32 %idx, 1
+ %idx.offset = add i32 %idx, 13
+ %abc = icmp ult i32 %idx.offset, %len
+ br i1 %abc, label %in.bounds, label %out.of.bounds
+
+in.bounds:
+ %addr = getelementptr i32, i32* %arr, i32 %idx
+ store i32 0, i32* %addr
+ %next = icmp slt i32 %idx.next, 101
+ br i1 %next, label %loop, label %exit
+
+out.of.bounds:
+ ret void
+
+exit:
+ ret void
+}
+
+; Make sure we can eliminate range check with signed latch, unsigned IRC and
+; negative offset. The safe iteration space is:
+; %exit.preloop.at = 13
+; %exit.mainloop.at = smax(-1 - smax(smax(%len - SINT_MAX, -13) - 1 - %len, -102), 0)
+; Formula verification:
+; %len = 10
+; %exit.mainloop.at = 0
+; %len = 50
+; %exit.mainloop.at = 63
+; %len = 100
+; %exit.mainloop.at = 101
+; %len = 150
+; %exit.mainloop.at = 101
+; %len = SINT_MAX
+; %exit.mainloop.at = 101
+
+define void @test_02(i32* %arr, i32* %a_len_ptr) #0 {
+
+; CHECK-LABEL: test_02(
+; CHECK: entry:
+; CHECK-NEXT: %len = load i32, i32* %a_len_ptr, !range !0
+; CHECK-NEXT: [[LEN_MINUS_SMAX:%[^ ]+]] = add i32 %len, -2147483647
+; CHECK-NEXT: [[CMP1:%[^ ]+]] = icmp sgt i32 [[LEN_MINUS_SMAX]], -13
+; CHECK-NEXT: [[SMAX1:%[^ ]+]] = select i1 [[CMP1]], i32 [[LEN_MINUS_SMAX]], i32 -13
+; CHECK-NEXT: [[ADD1:%[^ ]+]] = add i32 [[SMAX1]], -1
+; CHECK-NEXT: [[SUB1:%[^ ]+]] = sub i32 [[ADD1]], %len
+; CHECK-NEXT: [[CMP2:%[^ ]+]] = icmp sgt i32 [[SUB1]], -102
+; CHECK-NEXT: [[SMAX2:%[^ ]+]] = select i1 [[CMP2]], i32 [[SUB1]], i32 -102
+; CHECK-NEXT: [[SUB2:%[^ ]+]] = sub i32 -1, [[SMAX2]]
+; CHECK-NEXT: [[CMP3:%[^ ]+]] = icmp sgt i32 [[SUB2]], 0
+; CHECK-NEXT: %exit.mainloop.at = select i1 [[CMP3]], i32 [[SUB2]], i32 0
+; CHECK-NEXT: br i1 true, label %loop.preloop.preheader
+; CHECK: loop.preloop:
+; CHECK-NEXT: %idx.preloop = phi i32 [ %idx.next.preloop, %in.bounds.preloop ], [ 0, %loop.preloop.preheader ]
+; CHECK-NEXT: %idx.next.preloop = add i32 %idx.preloop, 1
+; CHECK-NEXT: %idx.offset.preloop = sub i32 %idx.preloop, 13
+; CHECK-NEXT: %abc.preloop = icmp ult i32 %idx.offset.preloop, %len
+; CHECK-NEXT: br i1 %abc.preloop, label %in.bounds.preloop, label %out.of.bounds.loopexit
+; CHECK: in.bounds.preloop:
+; CHECK-NEXT: %addr.preloop = getelementptr i32, i32* %arr, i32 %idx.preloop
+; CHECK-NEXT: store i32 0, i32* %addr.preloop
+; CHECK-NEXT: %next.preloop = icmp slt i32 %idx.next.preloop, 101
+; CHECK-NEXT: [[PRELOOP_COND:%[^ ]+]] = icmp slt i32 %idx.next.preloop, 13
+; CHECK-NEXT: br i1 [[PRELOOP_COND]], label %loop.preloop, label %preloop.exit.selector
+; CHECK: postloop:
+
+entry:
+ %len = load i32, i32* %a_len_ptr, !range !0
+ br label %loop
+
+loop:
+ %idx = phi i32 [ 0, %entry ], [ %idx.next, %in.bounds ]
+ %idx.next = add i32 %idx, 1
+ %idx.offset = sub i32 %idx, 13
+ %abc = icmp ult i32 %idx.offset, %len
+ br i1 %abc, label %in.bounds, label %out.of.bounds
+
+in.bounds:
+ %addr = getelementptr i32, i32* %arr, i32 %idx
+ store i32 0, i32* %addr
+ %next = icmp slt i32 %idx.next, 101
+ br i1 %next, label %loop, label %exit
+
+out.of.bounds:
+ ret void
+
+exit:
+ ret void
+}
+
+; Make sure we can eliminate range check with unsigned latch, signed IRC and
+; positive offset. The safe iteration space is:
+; No preloop,
+; %exit.mainloop.at = -1 - umax(-2 - %len - smax(-1 - %len, -14), -102)
+; Formula verification:
+; %len = 10
+; %exit.mainloop.at = 0
+; %len = 50
+; %exit.mainloop.at = 37
+; %len = 100
+; %exit.mainloop.at = 87
+; %len = 150
+; %exit.mainloop.at = 101
+; %len = SINT_MAX
+; %exit.mainloop.at = 101
+
+define void @test_03(i32* %arr, i32* %a_len_ptr) #0 {
+
+; CHECK-LABEL: test_03(
+; CHECK-NOT: preloop
+; CHECK: entry:
+; CHECK-NEXT: %len = load i32, i32* %a_len_ptr, !range !0
+; CHECK-NEXT: [[SUB1:%[^ ]+]] = sub i32 -2, %len
+; CHECK-NEXT: [[SUB2:%[^ ]+]] = sub i32 -1, %len
+; CHECK-NEXT: [[CMP1:%[^ ]+]] = icmp sgt i32 [[SUB2]], -14
+; CHECK-NEXT: [[SMAX1:%[^ ]+]] = select i1 [[CMP1]], i32 [[SUB2]], i32 -14
+; CHECK-NEXT: [[SUB3:%[^ ]+]] = sub i32 [[SUB1]], [[SMAX1]]
+; CHECK-NEXT: [[CMP2:%[^ ]+]] = icmp ugt i32 [[SUB3]], -102
+; CHECK-NEXT: [[UMAX1:%[^ ]+]] = select i1 [[CMP2]], i32 [[SUB3]], i32 -102
+; CHECK-NEXT: %exit.mainloop.at = sub i32 -1, [[UMAX1]]
+; CHECK-NEXT: [[CMP3:%[^ ]+]] = icmp ult i32 0, %exit.mainloop.at
+; CHECK-NEXT: br i1 [[CMP3]], label %loop.preheader, label %main.pseudo.exit
+; CHECK: postloop:
+
+entry:
+ %len = load i32, i32* %a_len_ptr, !range !0
+ br label %loop
+
+loop:
+ %idx = phi i32 [ 0, %entry ], [ %idx.next, %in.bounds ]
+ %idx.next = add i32 %idx, 1
+ %idx.offset = add i32 %idx, 13
+ %abc = icmp slt i32 %idx.offset, %len
+ br i1 %abc, label %in.bounds, label %out.of.bounds
+
+in.bounds:
+ %addr = getelementptr i32, i32* %arr, i32 %idx
+ store i32 0, i32* %addr
+ %next = icmp ult i32 %idx.next, 101
+ br i1 %next, label %loop, label %exit
+
+out.of.bounds:
+ ret void
+
+exit:
+ ret void
+}
+
+; Make sure we can eliminate range check with unsigned latch, signed IRC and
+; positive offset. The safe iteration space is:
+; %exit.preloop.at = 13
+; %exit.mainloop.at = -1 - umax(-14 - %len, -102)
+; Formula verification:
+; %len = 10
+; %exit.mainloop.at = 23
+; %len = 50
+; %exit.mainloop.at = 63
+; %len = 100
+; %exit.mainloop.at = 101
+; %len = 150
+; %exit.mainloop.at = 101
+; %len = SINT_MAX
+; %exit.mainloop.at = 101
+
+define void @test_04(i32* %arr, i32* %a_len_ptr) #0 {
+
+; CHECK-LABEL: test_04(
+; CHECK: entry:
+; CHECK-NEXT: %len = load i32, i32* %a_len_ptr, !range !0
+; CHECK-NEXT: [[SUB1:%[^ ]+]] = sub i32 -14, %len
+; CHECK-NEXT: [[CMP1:%[^ ]+]] = icmp ugt i32 [[SUB1]], -102
+; CHECK-NEXT: [[UMAX1:%[^ ]+]] = select i1 [[CMP1]], i32 [[SUB1]], i32 -102
+; CHECK-NEXT: %exit.mainloop.at = sub i32 -1, [[UMAX1]]
+; CHECK-NEXT: br i1 true, label %loop.preloop.preheader
+; CHECK: in.bounds.preloop:
+; CHECK-NEXT: %addr.preloop = getelementptr i32, i32* %arr, i32 %idx.preloop
+; CHECK-NEXT: store i32 0, i32* %addr.preloop
+; CHECK-NEXT: %next.preloop = icmp ult i32 %idx.next.preloop, 101
+; CHECK-NEXT: [[PRELOOP_COND:%[^ ]+]] = icmp ult i32 %idx.next.preloop, 13
+; CHECK-NEXT: br i1 [[PRELOOP_COND]], label %loop.preloop, label %preloop.exit.selector
+; CHECK: postloop:
+
+entry:
+ %len = load i32, i32* %a_len_ptr, !range !0
+ br label %loop
+
+loop:
+ %idx = phi i32 [ 0, %entry ], [ %idx.next, %in.bounds ]
+ %idx.next = add i32 %idx, 1
+ %idx.offset = sub i32 %idx, 13
+ %abc = icmp slt i32 %idx.offset, %len
+ br i1 %abc, label %in.bounds, label %out.of.bounds
+
+in.bounds:
+ %addr = getelementptr i32, i32* %arr, i32 %idx
+ store i32 0, i32* %addr
+ %next = icmp ult i32 %idx.next, 101
+ br i1 %next, label %loop, label %exit
+
+out.of.bounds:
+ ret void
+
+exit:
+ ret void
+}
+
+; Signed latch, signed RC, positive offset. Same as test_01.
+define void @test_05(i32* %arr, i32* %a_len_ptr) #0 {
+
+; CHECK-LABEL: test_05(
+; CHECK-NOT: preloop
+; CHECK: entry:
+; CHECK-NEXT: %len = load i32, i32* %a_len_ptr, !range !0
+; CHECK-NEXT: [[SUB1:%[^ ]+]] = sub i32 12, %len
+; CHECK-NEXT: [[CMP1:%[^ ]+]] = icmp sgt i32 [[SUB1]], -102
+; CHECK-NEXT: [[SMAX:%[^ ]+]] = select i1 [[CMP1]], i32 [[SUB1]], i32 -102
+; CHECK-NEXT: [[SUB2:%[^ ]+]] = sub i32 -1, [[SMAX]]
+; CHECK-NEXT: [[CMP2:%[^ ]+]] = icmp sgt i32 [[SUB2]], 0
+; CHECK-NEXT: %exit.mainloop.at = select i1 [[CMP2]], i32 [[SUB2]], i32 0
+; CHECK-NEXT: [[GOTO_LOOP:%[^ ]+]] = icmp slt i32 0, %exit.mainloop.at
+; CHECK-NEXT: br i1 [[GOTO_LOOP]], label %loop.preheader, label %main.pseudo.exit
+; CHECK: loop
+; CHECK: br i1 true, label %in.bounds
+; CHECK: postloop:
+
+entry:
+ %len = load i32, i32* %a_len_ptr, !range !0
+ br label %loop
+
+loop:
+ %idx = phi i32 [ 0, %entry ], [ %idx.next, %in.bounds ]
+ %idx.next = add i32 %idx, 1
+ %idx.offset = add i32 %idx, 13
+ %abc = icmp slt i32 %idx.offset, %len
+ br i1 %abc, label %in.bounds, label %out.of.bounds
+
+in.bounds:
+ %addr = getelementptr i32, i32* %arr, i32 %idx
+ store i32 0, i32* %addr
+ %next = icmp slt i32 %idx.next, 101
+ br i1 %next, label %loop, label %exit
+
+out.of.bounds:
+ ret void
+
+exit:
+ ret void
+}
+
+; Signed latch, signed RC, negative offset. Same as test_02.
+define void @test_06(i32* %arr, i32* %a_len_ptr) #0 {
+
+; CHECK-LABEL: test_06(
+; CHECK: entry:
+; CHECK-NEXT: %len = load i32, i32* %a_len_ptr, !range !0
+; CHECK-NEXT: [[LEN_MINUS_SMAX:%[^ ]+]] = add i32 %len, -2147483647
+; CHECK-NEXT: [[CMP1:%[^ ]+]] = icmp sgt i32 [[LEN_MINUS_SMAX]], -13
+; CHECK-NEXT: [[SMAX1:%[^ ]+]] = select i1 [[CMP1]], i32 [[LEN_MINUS_SMAX]], i32 -13
+; CHECK-NEXT: [[ADD1:%[^ ]+]] = add i32 [[SMAX1]], -1
+; CHECK-NEXT: [[SUB1:%[^ ]+]] = sub i32 [[ADD1]], %len
+; CHECK-NEXT: [[CMP2:%[^ ]+]] = icmp sgt i32 [[SUB1]], -102
+; CHECK-NEXT: [[SMAX2:%[^ ]+]] = select i1 [[CMP2]], i32 [[SUB1]], i32 -102
+; CHECK-NEXT: [[SUB2:%[^ ]+]] = sub i32 -1, [[SMAX2]]
+; CHECK-NEXT: [[CMP3:%[^ ]+]] = icmp sgt i32 [[SUB2]], 0
+; CHECK-NEXT: %exit.mainloop.at = select i1 [[CMP3]], i32 [[SUB2]], i32 0
+; CHECK-NEXT: br i1 true, label %loop.preloop.preheader
+; CHECK: in.bounds.preloop:
+; CHECK-NEXT: %addr.preloop = getelementptr i32, i32* %arr, i32 %idx.preloop
+; CHECK-NEXT: store i32 0, i32* %addr.preloop
+; CHECK-NEXT: %next.preloop = icmp slt i32 %idx.next.preloop, 101
+; CHECK-NEXT: [[PRELOOP_COND:%[^ ]+]] = icmp slt i32 %idx.next.preloop, 13
+; CHECK-NEXT: br i1 [[PRELOOP_COND]], label %loop.preloop, label %preloop.exit.selector
+; CHECK: postloop:
+
+entry:
+ %len = load i32, i32* %a_len_ptr, !range !0
+ br label %loop
+
+loop:
+ %idx = phi i32 [ 0, %entry ], [ %idx.next, %in.bounds ]
+ %idx.next = add i32 %idx, 1
+ %idx.offset = sub i32 %idx, 13
+ %abc = icmp slt i32 %idx.offset, %len
+ br i1 %abc, label %in.bounds, label %out.of.bounds
+
+in.bounds:
+ %addr = getelementptr i32, i32* %arr, i32 %idx
+ store i32 0, i32* %addr
+ %next = icmp slt i32 %idx.next, 101
+ br i1 %next, label %loop, label %exit
+
+out.of.bounds:
+ ret void
+
+exit:
+ ret void
+}
+
+; Unsigned latch, Unsigned RC, negative offset. Same as test_03.
+define void @test_07(i32* %arr, i32* %a_len_ptr) #0 {
+
+; CHECK-LABEL: test_07(
+; CHECK-NOT: preloop
+; CHECK: entry:
+; CHECK-NEXT: %len = load i32, i32* %a_len_ptr, !range !0
+; CHECK-NEXT: [[SUB1:%[^ ]+]] = sub i32 -2, %len
+; CHECK-NEXT: [[SUB2:%[^ ]+]] = sub i32 -1, %len
+; CHECK-NEXT: [[CMP1:%[^ ]+]] = icmp sgt i32 [[SUB2]], -14
+; CHECK-NEXT: [[SMAX1:%[^ ]+]] = select i1 [[CMP1]], i32 [[SUB2]], i32 -14
+; CHECK-NEXT: [[SUB3:%[^ ]+]] = sub i32 [[SUB1]], [[SMAX1]]
+; CHECK-NEXT: [[CMP2:%[^ ]+]] = icmp ugt i32 [[SUB3]], -102
+; CHECK-NEXT: [[UMAX1:%[^ ]+]] = select i1 [[CMP2]], i32 [[SUB3]], i32 -102
+; CHECK-NEXT: %exit.mainloop.at = sub i32 -1, [[UMAX1]]
+; CHECK-NEXT: [[CMP3:%[^ ]+]] = icmp ult i32 0, %exit.mainloop.at
+; CHECK-NEXT: br i1 [[CMP3]], label %loop.preheader, label %main.pseudo.exit
+; CHECK: loop
+; CHECK: br i1 true, label %in.bounds
+; CHECK: postloop:
+
+entry:
+ %len = load i32, i32* %a_len_ptr, !range !0
+ br label %loop
+
+loop:
+ %idx = phi i32 [ 0, %entry ], [ %idx.next, %in.bounds ]
+ %idx.next = add i32 %idx, 1
+ %idx.offset = add i32 %idx, 13
+ %abc = icmp ult i32 %idx.offset, %len
+ br i1 %abc, label %in.bounds, label %out.of.bounds
+
+in.bounds:
+ %addr = getelementptr i32, i32* %arr, i32 %idx
+ store i32 0, i32* %addr
+ %next = icmp ult i32 %idx.next, 101
+ br i1 %next, label %loop, label %exit
+
+out.of.bounds:
+ ret void
+
+exit:
+ ret void
+}
+
+; Unsigned latch, Unsigned RC, negative offset. Same as test_04.
+define void @test_08(i32* %arr, i32* %a_len_ptr) #0 {
+
+; CHECK-LABEL: test_08(
+; CHECK: entry:
+; CHECK-NEXT: %len = load i32, i32* %a_len_ptr, !range !0
+; CHECK-NEXT: [[SUB1:%[^ ]+]] = sub i32 -14, %len
+; CHECK-NEXT: [[CMP1:%[^ ]+]] = icmp ugt i32 [[SUB1]], -102
+; CHECK-NEXT: [[UMAX1:%[^ ]+]] = select i1 [[CMP1]], i32 [[SUB1]], i32 -102
+; CHECK-NEXT: %exit.mainloop.at = sub i32 -1, [[UMAX1]]
+; CHECK-NEXT: br i1 true, label %loop.preloop.preheader
+; CHECK: in.bounds.preloop:
+; CHECK-NEXT: %addr.preloop = getelementptr i32, i32* %arr, i32 %idx.preloop
+; CHECK-NEXT: store i32 0, i32* %addr.preloop
+; CHECK-NEXT: %next.preloop = icmp ult i32 %idx.next.preloop, 101
+; CHECK-NEXT: [[PRELOOP_COND:%[^ ]+]] = icmp ult i32 %idx.next.preloop, 13
+; CHECK-NEXT: br i1 [[PRELOOP_COND]], label %loop.preloop, label %preloop.exit.selector
+; CHECK: postloop:
+
+entry:
+ %len = load i32, i32* %a_len_ptr, !range !0
+ br label %loop
+
+loop:
+ %idx = phi i32 [ 0, %entry ], [ %idx.next, %in.bounds ]
+ %idx.next = add i32 %idx, 1
+ %idx.offset = sub i32 %idx, 13
+ %abc = icmp ult i32 %idx.offset, %len
+ br i1 %abc, label %in.bounds, label %out.of.bounds
+
+in.bounds:
+ %addr = getelementptr i32, i32* %arr, i32 %idx
+ store i32 0, i32* %addr
+ %next = icmp ult i32 %idx.next, 101
+ br i1 %next, label %loop, label %exit
+
+out.of.bounds:
+ ret void
+
+exit:
+ ret void
+}
+
+!0 = !{i32 0, i32 2147483647}