}
}
- if (!isa<SCEVUnknown>(LHS)) {
+ if (!isa<SCEVUnknown>(LHS) && isa<SCEVUnknown>(RHS)) {
std::swap(LHS, RHS);
Predicate = CmpInst::getSwappedPredicate(Predicate);
}
+ // Check for a condition of the form (-C1 + X < C2). InstCombine will
+ // create this form when combining two checks of the form (X u< C2 + C1) and
+ // (X >=u C1).
+ auto MatchRangeCheckIdiom = [this, Predicate, LHS, RHS, &RewriteMap]() {
+ auto *AddExpr = dyn_cast<SCEVAddExpr>(LHS);
+ if (!AddExpr || AddExpr->getNumOperands() != 2)
+ return false;
+
+ auto *C1 = dyn_cast<SCEVConstant>(AddExpr->getOperand(0));
+ auto *LHSUnknown = dyn_cast<SCEVUnknown>(AddExpr->getOperand(1));
+ auto *C2 = dyn_cast<SCEVConstant>(RHS);
+ if (!C1 || !C2 || !LHSUnknown)
+ return false;
+
+ auto ExactRegion =
+ ConstantRange::makeExactICmpRegion(Predicate, C2->getAPInt())
+ .sub(C1->getAPInt());
+
+ // Bail out, unless we have a non-wrapping, monotonic range.
+ if (ExactRegion.isWrappedSet() || ExactRegion.isFullSet())
+ return false;
+ auto I = RewriteMap.find(LHSUnknown->getValue());
+ const SCEV *RewrittenLHS = I != RewriteMap.end() ? I->second : LHS;
+ RewriteMap[LHSUnknown->getValue()] = getUMaxExpr(
+ getConstant(ExactRegion.getUnsignedMin()),
+ getUMinExpr(RewrittenLHS, getConstant(ExactRegion.getUnsignedMax())));
+ return true;
+ };
+ if (MatchRangeCheckIdiom())
+ return;
+
// For now, limit to conditions that provide information about unknown
// expressions. RHS also cannot contain add recurrences.
auto *LHSUnknown = dyn_cast<SCEVUnknown>(LHS);
; CHECK-NEXT: %N.off = add i32 %N, -1
; CHECK-NEXT: --> (-1 + %N) U: full-set S: full-set
; CHECK-NEXT: %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
-; CHECK-NEXT: --> {0,+,1}<nuw><nsw><%loop> U: [0,-2147483648) S: [0,-2147483648) Exits: (-1 + %N) LoopDispositions: { %loop: Computable }
+; CHECK-NEXT: --> {0,+,1}<nuw><nsw><%loop> U: [0,7) S: [0,7) Exits: (-1 + %N) LoopDispositions: { %loop: Computable }
; CHECK-NEXT: %gep = getelementptr inbounds i16, i16* %pred, i32 %iv
; CHECK-NEXT: --> {%pred,+,2}<nuw><%loop> U: full-set S: full-set Exits: ((2 * (zext i32 (-1 + %N) to i64))<nuw><nsw> + %pred) LoopDispositions: { %loop: Computable }
; CHECK-NEXT: %iv.next = add nuw nsw i32 %iv, 1
-; CHECK-NEXT: --> {1,+,1}<nuw><nsw><%loop> U: [1,-2147483648) S: [1,-2147483648) Exits: %N LoopDispositions: { %loop: Computable }
+; CHECK-NEXT: --> {1,+,1}<nuw><nsw><%loop> U: [1,8) S: [1,8) Exits: %N LoopDispositions: { %loop: Computable }
; CHECK-NEXT: Determining loop execution counts for: @optimized_range_check_unsigned
; CHECK-NEXT: Loop %loop: backedge-taken count is (-1 + %N)
-; CHECK-NEXT: Loop %loop: max backedge-taken count is -1
+; CHECK-NEXT: Loop %loop: max backedge-taken count is 6
; CHECK-NEXT: Loop %loop: Predicated backedge-taken count is (-1 + %N)
; CHECK-NEXT: Predicates:
; CHECK: Loop %loop: Trip multiple is 1
ret void
}
+; The function below uses a single condition to ensure %N > 2 && %N < 22.
+; InstCombine transforms such checks with 2 conditions to a single check as in
+; the test function.
+define void @optimized_range_check_unsigned2(i16* %pred, i32 %N) {
+; CHECK-LABEL: 'optimized_range_check_unsigned2'
+; CHECK-NEXT: Classifying expressions for: @optimized_range_check_unsigned2
+; CHECK-NEXT: %N.off = add i32 %N, -2
+; CHECK-NEXT: --> (-2 + %N) U: full-set S: full-set
+; CHECK-NEXT: %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
+; CHECK-NEXT: --> {0,+,1}<nuw><nsw><%loop> U: [0,21) S: [0,21) Exits: (-1 + %N) LoopDispositions: { %loop: Computable }
+; CHECK-NEXT: %gep = getelementptr inbounds i16, i16* %pred, i32 %iv
+; CHECK-NEXT: --> {%pred,+,2}<nuw><%loop> U: full-set S: full-set Exits: ((2 * (zext i32 (-1 + %N) to i64))<nuw><nsw> + %pred) LoopDispositions: { %loop: Computable }
+; CHECK-NEXT: %iv.next = add nuw nsw i32 %iv, 1
+; CHECK-NEXT: --> {1,+,1}<nuw><nsw><%loop> U: [1,22) S: [1,22) Exits: %N LoopDispositions: { %loop: Computable }
+; CHECK-NEXT: Determining loop execution counts for: @optimized_range_check_unsigned2
+; CHECK-NEXT: Loop %loop: backedge-taken count is (-1 + %N)
+; CHECK-NEXT: Loop %loop: max backedge-taken count is 20
+; CHECK-NEXT: Loop %loop: Predicated backedge-taken count is (-1 + %N)
+; CHECK-NEXT: Predicates:
+; CHECK: Loop %loop: Trip multiple is 1
+;
+entry:
+ %N.off = add i32 %N, -2
+ %cmp = icmp ult i32 %N.off, 20
+ br i1 %cmp, label %loop, label %exit
+
+loop:
+ %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
+ %gep = getelementptr inbounds i16, i16* %pred, i32 %iv
+ store i16 0, i16* %gep, align 2
+ %iv.next = add nuw nsw i32 %iv, 1
+ %ec = icmp eq i32 %iv.next, %N
+ br i1 %ec, label %exit, label %loop
+
+exit:
+ ret void
+}
+
; Similar to @optimized_range_check_unsigned, but the initial compare checks
; against unsigned max (-1), which breaks the range check idiom.
define void @not_optimized_range_check_unsigned1(i16* %pred, i32 %N) {