[IRCE] Smart range intersection

author Max Kazantsev <max.kazantsev@azul.com>

Mon, 20 Nov 2017 06:07:57 +0000 (06:07 +0000)

committer Max Kazantsev <max.kazantsev@azul.com>

Mon, 20 Nov 2017 06:07:57 +0000 (06:07 +0000)
author Max Kazantsev <max.kazantsev@azul.com>
Mon, 20 Nov 2017 06:07:57 +0000 (06:07 +0000)
committer Max Kazantsev <max.kazantsev@azul.com>
Mon, 20 Nov 2017 06:07:57 +0000 (06:07 +0000)
diff --git a/llvm/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp b/llvm/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp

index 9f514e609c4688996fd32d31ef9f875953f64941..6e02e38a28f8c9dbeed08d27e10f52a65519e2a4 100644 (file)
--- a/llvm/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp
+++ b/llvm/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp
@@ -228,7 +228,8 @@ public:
    /// check is redundant and can be constant-folded away.  The induction
    /// variable is not required to be the canonical {0,+,1} induction variable.
    Optional<Range> computeSafeIterationSpace(ScalarEvolution &SE,
-                                            const SCEVAddRecExpr *IndVar) const;
+                                            const SCEVAddRecExpr *IndVar,
+                                            bool IsLatchSigned) const;
  
    /// Parse out a set of inductive range checks from \p BI and append them to \p
    /// Checks.
@@ -1599,7 +1600,8 @@ bool LoopConstrainer::run() {
  /// range, returns None.
  Optional<InductiveRangeCheck::Range>
  InductiveRangeCheck::computeSafeIterationSpace(
-    ScalarEvolution &SE, const SCEVAddRecExpr *IndVar) const {
+    ScalarEvolution &SE, const SCEVAddRecExpr *IndVar,
+    bool IsLatchSigned) const {
    // IndVar is of the form "A + B * I" (where "I" is the canonical induction
    // variable, that may or may not exist as a real llvm::Value in the loop) and
    // this inductive range check is a range check on the "C + D * I" ("C" is
@@ -1610,21 +1612,15 @@ InductiveRangeCheck::computeSafeIterationSpace(
    //
    //   0 <= M + 1 * IndVar < L given L >= 0  (i.e. N == 1)
    //
-  // The inequality is satisfied by -M <= IndVar < (L - M) [^1].  All additions
-  // and subtractions are twos-complement wrapping and comparisons are signed.
-  //
-  // Proof:
-  //
-  //   If there exists IndVar such that -M <= IndVar < (L - M) then it follows
-  //   that -M <= (-M + L) [== Eq. 1].  Since L >= 0, if (-M + L) sign-overflows
-  //   then (-M + L) < (-M).  Hence by [Eq. 1], (-M + L) could not have
-  //   overflown.
-  //
-  //   This means IndVar = t + (-M) for t in [0, L).  Hence (IndVar + M) = t.
-  //   Hence 0 <= (IndVar + M) < L
-
-  // [^1]: Note that the solution does _not_ apply if L < 0; consider values M =
-  // 127, IndVar = 126 and L = -2 in an i8 world.
+  // Here L stands for upper limit of the safe iteration space.
+  // The inequality is satisfied by (0 - M) <= IndVar < (L - M). To avoid
+  // overflows when calculating (0 - M) and (L - M) we, depending on type of
+  // IV's iteration space, limit the calculations by borders of the iteration
+  // space. For example, if IndVar is unsigned, (0 - M) overflows for any M > 0.
+  // If we figured out that "anything greater than (-M) is safe", we strengthen
+  // this to "everything greater than 0 is safe", assuming that values between
+  // -M and 0 just do not exist in unsigned iteration space, and we don't want
+  // to deal with overflown values.
  
    if (!IndVar->isAffine())
      return None;
@@ -1641,22 +1637,64 @@ InductiveRangeCheck::computeSafeIterationSpace(
      return None;
  
    assert(!D->getValue()->isZero() && "Recurrence with zero step?");
+  unsigned BitWidth = cast<IntegerType>(IndVar->getType())->getBitWidth();
+  const SCEV *SIntMax = SE.getConstant(APInt::getSignedMaxValue(BitWidth));
  
+  // Substract Y from X so that it does not go through border of the IV
+  // iteration space. Mathematically, it is equivalent to:
+  //
+  //    ClampedSubstract(X, Y) = min(max(X - Y, INT_MIN), INT_MAX).        [1]
+  //
+  // In [1], 'X - Y' is a mathematical substraction (result is not bounded to
+  // any width of bit grid). But after we take min/max, the result is
+  // guaranteed to be within [INT_MIN, INT_MAX].
+  //
+  // In [1], INT_MAX and INT_MIN are respectively signed and unsigned max/min
+  // values, depending on type of latch condition that defines IV iteration
+  // space.
+  auto ClampedSubstract = [&](const SCEV *X, const SCEV *Y) {
+    assert(SE.isKnownNonNegative(X) &&
+           "We can only substract from values in [0; SINT_MAX]!");
+    if (IsLatchSigned) {
+      // X is a number from signed range, Y is interpreted as signed.
+      // Even if Y is SINT_MAX, (X - Y) does not reach SINT_MIN. So the only
+      // thing we should care about is that we didn't cross SINT_MAX.
+      // So, if Y is positive, we substract Y safely.
+      //   Rule 1: Y > 0 ---> Y.
+      // If 0 <= -Y <= (SINT_MAX - X), we substract Y safely.
+      //   Rule 2: Y >=s (X - SINT_MAX) ---> Y.
+      // If 0 <= (SINT_MAX - X) < -Y, we can only substract (X - SINT_MAX).
+      //   Rule 3: Y <s (X - SINT_MAX) ---> (X - SINT_MAX).
+      // It gives us smax(Y, X - SINT_MAX) to substract in all cases.
+      const SCEV *XMinusSIntMax = SE.getMinusSCEV(X, SIntMax);
+      return SE.getMinusSCEV(X, SE.getSMaxExpr(Y, XMinusSIntMax));
+    } else
+      // X is a number from unsigned range, Y is interpreted as signed.
+      // Even if Y is SINT_MIN, (X - Y) does not reach UINT_MAX. So the only
+      // thing we should care about is that we didn't cross zero.
+      // So, if Y is negative, we substract Y safely.
+      //   Rule 1: Y <s 0 ---> Y.
+      // If 0 <= Y <= X, we substract Y safely.
+      //   Rule 2: Y <=s X ---> Y.
+      // If 0 <= X < Y, we should stop at 0 and can only substract X.
+      //   Rule 3: Y >s X ---> X.
+      // It gives us smin(X, Y) to substract in all cases.
+      return SE.getMinusSCEV(X, SE.getSMinExpr(X, Y));
+  };
    const SCEV *M = SE.getMinusSCEV(C, A);
-  const SCEV *Begin = SE.getNegativeSCEV(M);
-  const SCEV *UpperLimit = nullptr;
+  const SCEV *Zero = SE.getZero(M->getType());
+  const SCEV *Begin = ClampedSubstract(Zero, M);
+  const SCEV *L = nullptr;
  
    // We strengthen "0 <= I" to "0 <= I < INT_SMAX" and "I < L" to "0 <= I < L".
    // We can potentially do much better here.
-  if (const SCEV *L = getEnd())
-    UpperLimit = L;
+  if (const SCEV *EndLimit = getEnd())
+    L = EndLimit;
    else {
      assert(Kind == InductiveRangeCheck::RANGE_CHECK_LOWER && "invariant!");
-    unsigned BitWidth = cast<IntegerType>(IndVar->getType())->getBitWidth();
-    UpperLimit = SE.getConstant(APInt::getSignedMaxValue(BitWidth));
+    L = SIntMax;
    }
-
-  const SCEV *End = SE.getMinusSCEV(UpperLimit, M);
+  const SCEV *End = ClampedSubstract(L, M);
    return InductiveRangeCheck::Range(Begin, End);
  }
  
@@ -1776,10 +1814,6 @@ bool InductiveRangeCheckElimination::runOnLoop(Loop *L, LPPassManager &LPM) {
    Instruction *ExprInsertPt = Preheader->getTerminator();
  
    SmallVector<InductiveRangeCheck, 4> RangeChecksToEliminate;
-  auto RangeIsNonNegative = [&](InductiveRangeCheck::Range &R) {
-    return SE.isKnownNonNegative(R.getBegin()) &&
-           SE.isKnownNonNegative(R.getEnd());
-  };
    // Basing on the type of latch predicate, we interpret the IV iteration range
    // as signed or unsigned range. We use different min/max functions (signed or
    // unsigned) when intersecting this range with safe iteration ranges implied
@@ -1789,25 +1823,9 @@ bool InductiveRangeCheckElimination::runOnLoop(Loop *L, LPPassManager &LPM) {
  
    IRBuilder<> B(ExprInsertPt);
    for (InductiveRangeCheck &IRC : RangeChecks) {
-    auto Result = IRC.computeSafeIterationSpace(SE, IndVar);
+    auto Result = IRC.computeSafeIterationSpace(SE, IndVar,
+                                                LS.IsSignedPredicate);
      if (Result.hasValue()) {
-      // Intersecting a signed and an unsigned ranges may produce incorrect
-      // results because we can use neither signed nor unsigned min/max for
-      // reliably correct intersection if a range contains negative values
-      // which are either actually negative or big positive. Intersection is
-      // safe in two following cases:
-      // 1. Both ranges are signed/unsigned, then we use signed/unsigned min/max
-      //    respectively for their intersection;
-      // 2. IRC safe iteration space only contains values from [0, SINT_MAX].
-      //    The interpretation of these values is unambiguous.
-      // We take the type of IV iteration range as a reference (we will
-      // intersect it with the resulting range of all IRC's later in
-      // calculateSubRanges). Only ranges of IRC of the same type are considered
-      // for removal unless we prove that its range doesn't contain ambiguous
-      // values.
-      if (IRC.isSigned() != LS.IsSignedPredicate &&
-          !RangeIsNonNegative(Result.getValue()))
-        continue;
        auto MaybeSafeIterRange =
            IntersectRange(SE, SafeIterRange, Result.getValue());
        if (MaybeSafeIterRange.hasValue()) {
diff --git a/llvm/test/Transforms/IRCE/clamp.ll b/llvm/test/Transforms/IRCE/clamp.ll

index 5215ba9d9e8ba5b788987cad6eb6865bd83febb6..dfced7cf0ec3980db8ed0bcefb64a11638a36654 100644 (file)
--- a/llvm/test/Transforms/IRCE/clamp.ll
+++ b/llvm/test/Transforms/IRCE/clamp.ll
@@ -4,7 +4,7 @@
  ; calculation of post-loop exit condition.
  
  ; CHECK-LABEL: irce: in function test_01: constrained Loop at depth 1 containing: %loop<header><exiting>,%in_bounds<exiting>,%not_zero<latch><exiting>
-; CHECK-LABEL: irce: in function test_02: constrained Loop at depth 1 containing: %loop<header><exiting>,%in_bounds<latch><exiting>
+; CHECK-NOT: irce: in function test_02: constrained Loop
  
  define void @test_01() {
  
@@ -69,42 +69,9 @@ in_bounds:                                       ; preds = %loop
  
  define void @test_02() {
  
-; CHECK-LABEL: test_02
-; CHECK-NOT:   br i1 false, label %loop.preloop.preheader, label %preloop.pseudo.exit
-; CHECK-NOT:   postloop
-; CHECK:       entry:
-; CHECK-NEXT:    br i1 true, label %loop.preloop.preheader, label %preloop.pseudo.exit
-; CHECK:       mainloop:
-; CHECK-NEXT:    br label %loop
-; CHECK:       loop:
-; CHECK-NEXT:    %iv1 = phi i64 [ %iv1.preloop.copy, %mainloop ], [ %iv1.next, %in_bounds ]
-; CHECK-NEXT:    %iv2 = phi i64 [ %iv2.preloop.copy, %mainloop ], [ %iv2.next, %in_bounds ]
-; CHECK-NEXT:    %iv2.offset = add i64 %iv2, 1
-; CHECK-NEXT:    %rc = icmp ult i64 %iv2.offset, 400
-; CHECK-NEXT:    br i1 true, label %in_bounds, label %bci_321.loopexit1
-; CHECK:       in_bounds:
-; CHECK-NEXT:    %iv1.next = add nuw nsw i64 %iv1, 2
-; CHECK-NEXT:    %iv2.next = add nuw nsw i64 %iv2, 2
-; CHECK-NEXT:    %cond = icmp ugt i64 %iv1, 204
-; CHECK-NEXT:    br i1 %cond, label %bci_321.loopexit1, label %loop
-; CHECK:       loop.preloop:
-; CHECK-NEXT:    %iv1.preloop = phi i64 [ %iv1.next.preloop, %in_bounds.preloop ], [ 3, %loop.preloop.preheader ]
-; CHECK-NEXT:    %iv2.preloop = phi i64 [ %iv2.next.preloop, %in_bounds.preloop ], [ 4294967295, %loop.preloop.preheader ]
-; CHECK-NEXT:    %iv2.offset.preloop = add i64 %iv2.preloop, 1
-; CHECK-NEXT:    %rc.preloop = icmp ult i64 %iv2.offset.preloop, 400
-; CHECK-NEXT:    br i1 %rc.preloop, label %in_bounds.preloop, label %bci_321.loopexit
-; CHECK:       in_bounds.preloop:
-; CHECK-NEXT:    %iv1.next.preloop = add nuw nsw i64 %iv1.preloop, 2
-; CHECK-NEXT:    %iv2.next.preloop = add nuw nsw i64 %iv2.preloop, 2
-; CHECK-NEXT:    %cond.preloop = icmp ugt i64 %iv1.preloop, 204
-; CHECK-NEXT:    [[C0:%[^ ]+]] = icmp ult i64 %iv1.preloop, 205
-; CHECK-NEXT:    [[C1:%[^ ]+]] = xor i1 [[C0]], true
-; CHECK-NEXT:    br i1 [[C1]], label %preloop.exit.selector, label %loop.preloop
-; CHECK:       preloop.pseudo.exit:
-; CHECK-NEXT:    %iv1.preloop.copy = phi i64 [ 3, %entry ], [ %iv1.next.preloop.lcssa, %preloop.exit.selector ]
-; CHECK-NEXT:    %iv2.preloop.copy = phi i64 [ 4294967295, %entry ], [ %iv2.next.preloop.lcssa, %preloop.exit.selector ]
-; CHECK-NEXT:    %indvar.end = phi i64 [ 1, %entry ], [ %iv1.preloop.lcssa, %preloop.exit.selector ]
-; CHECK-NEXT:    br label %mainloop
+; Now IRCE is smart enough to understand that the safe range here is empty.
+; Previously it executed the entire loop in safe preloop and never actually
+; entered the main loop.
  
  entry:
    br label %loop
diff --git a/llvm/test/Transforms/IRCE/range_intersect_miscompile.ll b/llvm/test/Transforms/IRCE/range_intersect_miscompile.ll

index a1b9b0f521a5af3a5bb8ab39905d31e1a93c5539..9dd3b6f151dfb0f42ae5ae63c096d858d02a0177 100644 (file)
--- a/llvm/test/Transforms/IRCE/range_intersect_miscompile.ll
+++ b/llvm/test/Transforms/IRCE/range_intersect_miscompile.ll
@@ -2,8 +2,8 @@
  
  ; CHECK-LABEL: irce: in function test_01: constrained Loop at depth 1 containing:
  ; CHECK-LABEL: irce: in function test_02: constrained Loop at depth 1 containing:
-; CHECK-NOT: irce: in function test_03: constrained Loop
-; CHECK-NOT: irce: in function test_04: constrained Loop
+; CHECK-LABEL: irce: in function test_03: constrained Loop at depth 1 containing:
+; CHECK-LABEL: irce: in function test_04: constrained Loop at depth 1 containing:
  ; CHECK-LABEL: irce: in function test_05: constrained Loop at depth 1 containing:
  
  ; This test used to demonstrate a miscompile: the outer loop's IV iterates in
@@ -120,11 +120,18 @@ deopt:                                          ; preds = %range_check_block
  }
  
  ; Range check is made against 0, so the safe iteration range is empty. IRCE
-; should not apply.
+; should not apply to the inner loop. The condition %tmp2 can be eliminated.
  
  define void @test_03() {
  
  ; CHECK-LABEL: test_03
+; CHECK-NOT:   preloop
+; CHECK-NOT:   postloop
+; CHECK:         %tmp2 = icmp sgt i32 %iv.prev, -1
+; CHECK-NEXT:    br i1 true, label %loop_header.split.us, label %exit
+; CHECK:       range_check_block:
+; CHECK-NEXT:    %range_check = icmp slt i32 %iv, 0
+; CHECK-NEXT:    br i1 %range_check, label %loop_latch, label %deopt
  
  entry:
    br label %loop_header
@@ -162,10 +169,18 @@ deopt:                                          ; preds = %range_check_block
  
  ; We do not know whether %n is positive or negative, so we prohibit IRCE in
  ; order to avoid incorrect intersection of signed and unsigned ranges.
+; The condition %tmp2 can be eliminated.
  
  define void @test_04(i32* %p) {
  
  ; CHECK-LABEL: test_04
+; CHECK-NOT:   preloop
+; CHECK-NOT:   postloop
+; CHECK:         %tmp2 = icmp sgt i32 %iv.prev, -1
+; CHECK-NEXT:    br i1 true, label %loop_header.split.us, label %exit
+; CHECK:       range_check_block:
+; CHECK-NEXT:    %range_check = icmp slt i32 %iv, %n
+; CHECK-NEXT:    br i1 %range_check, label %loop_latch, label %deopt
  
  entry:
    %n = load i32, i32* %p
diff --git a/llvm/test/Transforms/IRCE/ranges_of_different_types.ll b/llvm/test/Transforms/IRCE/ranges_of_different_types.ll

new file mode 100644 (file)

index 0000000..c38ef24
--- /dev/null
+++ b/llvm/test/Transforms/IRCE/ranges_of_different_types.ll
@@ -0,0 +1,427 @@
+; RUN: opt -verify-loop-info -irce-print-changed-loops -irce -S < %s 2>&1 | FileCheck %s
+
+; Make sure we can eliminate range check with signed latch, unsigned IRC and
+; positive offset. The safe iteration space is:
+; No preloop,
+; %exit.mainloop.at = smax (0, -1 - smax(12 - %len, -102)).
+; Formula verification:
+; %len = 10
+; %exit.mainloop.at = 0
+; %len = 50
+; %exit.mainloop.at = 50 - 13 = 37.
+; %len = 100
+; %exit.mainloop.at = 100 - 13 = 87.
+; %len = 150
+; %exit.mainloop.at = 101.
+; %len = SINT_MAX
+; %exit.mainloop.at = 101
+
+define void @test_01(i32* %arr, i32* %a_len_ptr) #0 {
+
+; CHECK-LABEL: test_01(
+; CHECK-NOT:     preloop
+; CHECK:         entry:
+; CHECK-NEXT:      %len = load i32, i32* %a_len_ptr, !range !0
+; CHECK-NEXT:      [[SUB1:%[^ ]+]] = sub i32 12, %len
+; CHECK-NEXT:      [[CMP1:%[^ ]+]] = icmp sgt i32 [[SUB1]], -102
+; CHECK-NEXT:      [[SMAX:%[^ ]+]] = select i1 [[CMP1]], i32 [[SUB1]], i32 -102
+; CHECK-NEXT:      [[SUB2:%[^ ]+]] = sub i32 -1, [[SMAX]]
+; CHECK-NEXT:      [[CMP2:%[^ ]+]] = icmp sgt i32 [[SUB2]], 0
+; CHECK-NEXT:      %exit.mainloop.at = select i1 [[CMP2]], i32 [[SUB2]], i32 0
+; CHECK-NEXT:      [[GOTO_LOOP:%[^ ]+]] = icmp slt i32 0, %exit.mainloop.at
+; CHECK-NEXT:      br i1 [[GOTO_LOOP]], label %loop.preheader, label %main.pseudo.exit
+; CHECK:         loop
+; CHECK:           br i1 true, label %in.bounds
+; CHECK:         postloop:
+
+entry:
+  %len = load i32, i32* %a_len_ptr, !range !0
+  br label %loop
+
+loop:
+  %idx = phi i32 [ 0, %entry ], [ %idx.next, %in.bounds ]
+  %idx.next = add i32 %idx, 1
+  %idx.offset = add i32 %idx, 13
+  %abc = icmp ult i32 %idx.offset, %len
+  br i1 %abc, label %in.bounds, label %out.of.bounds
+
+in.bounds:
+  %addr = getelementptr i32, i32* %arr, i32 %idx
+  store i32 0, i32* %addr
+  %next = icmp slt i32 %idx.next, 101
+  br i1 %next, label %loop, label %exit
+
+out.of.bounds:
+  ret void
+
+exit:
+  ret void
+}
+
+; Make sure we can eliminate range check with signed latch, unsigned IRC and
+; negative offset. The safe iteration space is:
+; %exit.preloop.at = 13
+; %exit.mainloop.at = smax(-1 - smax(smax(%len - SINT_MAX, -13) - 1 - %len, -102), 0)
+; Formula verification:
+; %len = 10
+; %exit.mainloop.at = 0
+; %len = 50
+; %exit.mainloop.at = 63
+; %len = 100
+; %exit.mainloop.at = 101
+; %len = 150
+; %exit.mainloop.at = 101
+; %len = SINT_MAX
+; %exit.mainloop.at = 101
+
+define void @test_02(i32* %arr, i32* %a_len_ptr) #0 {
+
+; CHECK-LABEL: test_02(
+; CHECK:         entry:
+; CHECK-NEXT:      %len = load i32, i32* %a_len_ptr, !range !0
+; CHECK-NEXT:      [[LEN_MINUS_SMAX:%[^ ]+]] = add i32 %len, -2147483647
+; CHECK-NEXT:      [[CMP1:%[^ ]+]] = icmp sgt i32 [[LEN_MINUS_SMAX]], -13
+; CHECK-NEXT:      [[SMAX1:%[^ ]+]] = select i1 [[CMP1]], i32 [[LEN_MINUS_SMAX]], i32 -13
+; CHECK-NEXT:      [[ADD1:%[^ ]+]] = add i32 [[SMAX1]], -1
+; CHECK-NEXT:      [[SUB1:%[^ ]+]] = sub i32 [[ADD1]], %len
+; CHECK-NEXT:      [[CMP2:%[^ ]+]] = icmp sgt i32 [[SUB1]], -102
+; CHECK-NEXT:      [[SMAX2:%[^ ]+]] = select i1 [[CMP2]], i32 [[SUB1]], i32 -102
+; CHECK-NEXT:      [[SUB2:%[^ ]+]] = sub i32 -1, [[SMAX2]]
+; CHECK-NEXT:      [[CMP3:%[^ ]+]] = icmp sgt i32 [[SUB2]], 0
+; CHECK-NEXT:      %exit.mainloop.at = select i1 [[CMP3]], i32 [[SUB2]], i32 0
+; CHECK-NEXT:      br i1 true, label %loop.preloop.preheader
+; CHECK:         loop.preloop:
+; CHECK-NEXT:      %idx.preloop = phi i32 [ %idx.next.preloop, %in.bounds.preloop ], [ 0, %loop.preloop.preheader ]
+; CHECK-NEXT:      %idx.next.preloop = add i32 %idx.preloop, 1
+; CHECK-NEXT:      %idx.offset.preloop = sub i32 %idx.preloop, 13
+; CHECK-NEXT:      %abc.preloop = icmp ult i32 %idx.offset.preloop, %len
+; CHECK-NEXT:      br i1 %abc.preloop, label %in.bounds.preloop, label %out.of.bounds.loopexit
+; CHECK:         in.bounds.preloop:
+; CHECK-NEXT:      %addr.preloop = getelementptr i32, i32* %arr, i32 %idx.preloop
+; CHECK-NEXT:      store i32 0, i32* %addr.preloop
+; CHECK-NEXT:      %next.preloop = icmp slt i32 %idx.next.preloop, 101
+; CHECK-NEXT:      [[PRELOOP_COND:%[^ ]+]] = icmp slt i32 %idx.next.preloop, 13
+; CHECK-NEXT:      br i1 [[PRELOOP_COND]], label %loop.preloop, label %preloop.exit.selector
+; CHECK:         postloop:
+
+entry:
+  %len = load i32, i32* %a_len_ptr, !range !0
+  br label %loop
+
+loop:
+  %idx = phi i32 [ 0, %entry ], [ %idx.next, %in.bounds ]
+  %idx.next = add i32 %idx, 1
+  %idx.offset = sub i32 %idx, 13
+  %abc = icmp ult i32 %idx.offset, %len
+  br i1 %abc, label %in.bounds, label %out.of.bounds
+
+in.bounds:
+  %addr = getelementptr i32, i32* %arr, i32 %idx
+  store i32 0, i32* %addr
+  %next = icmp slt i32 %idx.next, 101
+  br i1 %next, label %loop, label %exit
+
+out.of.bounds:
+  ret void
+
+exit:
+  ret void
+}
+
+; Make sure we can eliminate range check with unsigned latch, signed IRC and
+; positive offset. The safe iteration space is:
+; No preloop,
+; %exit.mainloop.at = -1 - umax(-2 - %len - smax(-1 - %len, -14), -102)
+; Formula verification:
+; %len = 10
+; %exit.mainloop.at = 0
+; %len = 50
+; %exit.mainloop.at = 37
+; %len = 100
+; %exit.mainloop.at = 87
+; %len = 150
+; %exit.mainloop.at = 101
+; %len = SINT_MAX
+; %exit.mainloop.at = 101
+
+define void @test_03(i32* %arr, i32* %a_len_ptr) #0 {
+
+; CHECK-LABEL: test_03(
+; CHECK-NOT:     preloop
+; CHECK:         entry:
+; CHECK-NEXT:      %len = load i32, i32* %a_len_ptr, !range !0
+; CHECK-NEXT:      [[SUB1:%[^ ]+]] = sub i32 -2, %len
+; CHECK-NEXT:      [[SUB2:%[^ ]+]] = sub i32 -1, %len
+; CHECK-NEXT:      [[CMP1:%[^ ]+]] = icmp sgt i32 [[SUB2]], -14
+; CHECK-NEXT:      [[SMAX1:%[^ ]+]] = select i1 [[CMP1]], i32 [[SUB2]], i32 -14
+; CHECK-NEXT:      [[SUB3:%[^ ]+]] = sub i32 [[SUB1]], [[SMAX1]]
+; CHECK-NEXT:      [[CMP2:%[^ ]+]] = icmp ugt i32 [[SUB3]], -102
+; CHECK-NEXT:      [[UMAX1:%[^ ]+]] = select i1 [[CMP2]], i32 [[SUB3]], i32 -102
+; CHECK-NEXT:      %exit.mainloop.at = sub i32 -1, [[UMAX1]]
+; CHECK-NEXT:      [[CMP3:%[^ ]+]] = icmp ult i32 0, %exit.mainloop.at
+; CHECK-NEXT:      br i1 [[CMP3]], label %loop.preheader, label %main.pseudo.exit
+; CHECK:         postloop:
+
+entry:
+  %len = load i32, i32* %a_len_ptr, !range !0
+  br label %loop
+
+loop:
+  %idx = phi i32 [ 0, %entry ], [ %idx.next, %in.bounds ]
+  %idx.next = add i32 %idx, 1
+  %idx.offset = add i32 %idx, 13
+  %abc = icmp slt i32 %idx.offset, %len
+  br i1 %abc, label %in.bounds, label %out.of.bounds
+
+in.bounds:
+  %addr = getelementptr i32, i32* %arr, i32 %idx
+  store i32 0, i32* %addr
+  %next = icmp ult i32 %idx.next, 101
+  br i1 %next, label %loop, label %exit
+
+out.of.bounds:
+  ret void
+
+exit:
+  ret void
+}
+
+; Make sure we can eliminate range check with unsigned latch, signed IRC and
+; positive offset. The safe iteration space is:
+; %exit.preloop.at = 13
+; %exit.mainloop.at = -1 - umax(-14 - %len, -102)
+; Formula verification:
+; %len = 10
+; %exit.mainloop.at = 23
+; %len = 50
+; %exit.mainloop.at = 63
+; %len = 100
+; %exit.mainloop.at = 101
+; %len = 150
+; %exit.mainloop.at = 101
+; %len = SINT_MAX
+; %exit.mainloop.at = 101
+
+define void @test_04(i32* %arr, i32* %a_len_ptr) #0 {
+
+; CHECK-LABEL: test_04(
+; CHECK:         entry:
+; CHECK-NEXT:      %len = load i32, i32* %a_len_ptr, !range !0
+; CHECK-NEXT:      [[SUB1:%[^ ]+]] = sub i32 -14, %len
+; CHECK-NEXT:      [[CMP1:%[^ ]+]] = icmp ugt i32 [[SUB1]], -102
+; CHECK-NEXT:      [[UMAX1:%[^ ]+]] = select i1 [[CMP1]], i32 [[SUB1]], i32 -102
+; CHECK-NEXT:      %exit.mainloop.at = sub i32 -1, [[UMAX1]]
+; CHECK-NEXT:      br i1 true, label %loop.preloop.preheader
+; CHECK:         in.bounds.preloop:
+; CHECK-NEXT:      %addr.preloop = getelementptr i32, i32* %arr, i32 %idx.preloop
+; CHECK-NEXT:      store i32 0, i32* %addr.preloop
+; CHECK-NEXT:      %next.preloop = icmp ult i32 %idx.next.preloop, 101
+; CHECK-NEXT:      [[PRELOOP_COND:%[^ ]+]] = icmp ult i32 %idx.next.preloop, 13
+; CHECK-NEXT:      br i1 [[PRELOOP_COND]], label %loop.preloop, label %preloop.exit.selector
+; CHECK:         postloop:
+
+entry:
+  %len = load i32, i32* %a_len_ptr, !range !0
+  br label %loop
+
+loop:
+  %idx = phi i32 [ 0, %entry ], [ %idx.next, %in.bounds ]
+  %idx.next = add i32 %idx, 1
+  %idx.offset = sub i32 %idx, 13
+  %abc = icmp slt i32 %idx.offset, %len
+  br i1 %abc, label %in.bounds, label %out.of.bounds
+
+in.bounds:
+  %addr = getelementptr i32, i32* %arr, i32 %idx
+  store i32 0, i32* %addr
+  %next = icmp ult i32 %idx.next, 101
+  br i1 %next, label %loop, label %exit
+
+out.of.bounds:
+  ret void
+
+exit:
+  ret void
+}
+
+; Signed latch, signed RC, positive offset. Same as test_01.
+define void @test_05(i32* %arr, i32* %a_len_ptr) #0 {
+
+; CHECK-LABEL: test_05(
+; CHECK-NOT:     preloop
+; CHECK:         entry:
+; CHECK-NEXT:      %len = load i32, i32* %a_len_ptr, !range !0
+; CHECK-NEXT:      [[SUB1:%[^ ]+]] = sub i32 12, %len
+; CHECK-NEXT:      [[CMP1:%[^ ]+]] = icmp sgt i32 [[SUB1]], -102
+; CHECK-NEXT:      [[SMAX:%[^ ]+]] = select i1 [[CMP1]], i32 [[SUB1]], i32 -102
+; CHECK-NEXT:      [[SUB2:%[^ ]+]] = sub i32 -1, [[SMAX]]
+; CHECK-NEXT:      [[CMP2:%[^ ]+]] = icmp sgt i32 [[SUB2]], 0
+; CHECK-NEXT:      %exit.mainloop.at = select i1 [[CMP2]], i32 [[SUB2]], i32 0
+; CHECK-NEXT:      [[GOTO_LOOP:%[^ ]+]] = icmp slt i32 0, %exit.mainloop.at
+; CHECK-NEXT:      br i1 [[GOTO_LOOP]], label %loop.preheader, label %main.pseudo.exit
+; CHECK:         loop
+; CHECK:           br i1 true, label %in.bounds
+; CHECK:         postloop:
+
+entry:
+  %len = load i32, i32* %a_len_ptr, !range !0
+  br label %loop
+
+loop:
+  %idx = phi i32 [ 0, %entry ], [ %idx.next, %in.bounds ]
+  %idx.next = add i32 %idx, 1
+  %idx.offset = add i32 %idx, 13
+  %abc = icmp slt i32 %idx.offset, %len
+  br i1 %abc, label %in.bounds, label %out.of.bounds
+
+in.bounds:
+  %addr = getelementptr i32, i32* %arr, i32 %idx
+  store i32 0, i32* %addr
+  %next = icmp slt i32 %idx.next, 101
+  br i1 %next, label %loop, label %exit
+
+out.of.bounds:
+  ret void
+
+exit:
+  ret void
+}
+
+; Signed latch, signed RC, negative offset. Same as test_02.
+define void @test_06(i32* %arr, i32* %a_len_ptr) #0 {
+
+; CHECK-LABEL: test_06(
+; CHECK:         entry:
+; CHECK-NEXT:      %len = load i32, i32* %a_len_ptr, !range !0
+; CHECK-NEXT:      [[LEN_MINUS_SMAX:%[^ ]+]] = add i32 %len, -2147483647
+; CHECK-NEXT:      [[CMP1:%[^ ]+]] = icmp sgt i32 [[LEN_MINUS_SMAX]], -13
+; CHECK-NEXT:      [[SMAX1:%[^ ]+]] = select i1 [[CMP1]], i32 [[LEN_MINUS_SMAX]], i32 -13
+; CHECK-NEXT:      [[ADD1:%[^ ]+]] = add i32 [[SMAX1]], -1
+; CHECK-NEXT:      [[SUB1:%[^ ]+]] = sub i32 [[ADD1]], %len
+; CHECK-NEXT:      [[CMP2:%[^ ]+]] = icmp sgt i32 [[SUB1]], -102
+; CHECK-NEXT:      [[SMAX2:%[^ ]+]] = select i1 [[CMP2]], i32 [[SUB1]], i32 -102
+; CHECK-NEXT:      [[SUB2:%[^ ]+]] = sub i32 -1, [[SMAX2]]
+; CHECK-NEXT:      [[CMP3:%[^ ]+]] = icmp sgt i32 [[SUB2]], 0
+; CHECK-NEXT:      %exit.mainloop.at = select i1 [[CMP3]], i32 [[SUB2]], i32 0
+; CHECK-NEXT:      br i1 true, label %loop.preloop.preheader
+; CHECK:         in.bounds.preloop:
+; CHECK-NEXT:      %addr.preloop = getelementptr i32, i32* %arr, i32 %idx.preloop
+; CHECK-NEXT:      store i32 0, i32* %addr.preloop
+; CHECK-NEXT:      %next.preloop = icmp slt i32 %idx.next.preloop, 101
+; CHECK-NEXT:      [[PRELOOP_COND:%[^ ]+]] = icmp slt i32 %idx.next.preloop, 13
+; CHECK-NEXT:      br i1 [[PRELOOP_COND]], label %loop.preloop, label %preloop.exit.selector
+; CHECK:         postloop:
+
+entry:
+  %len = load i32, i32* %a_len_ptr, !range !0
+  br label %loop
+
+loop:
+  %idx = phi i32 [ 0, %entry ], [ %idx.next, %in.bounds ]
+  %idx.next = add i32 %idx, 1
+  %idx.offset = sub i32 %idx, 13
+  %abc = icmp slt i32 %idx.offset, %len
+  br i1 %abc, label %in.bounds, label %out.of.bounds
+
+in.bounds:
+  %addr = getelementptr i32, i32* %arr, i32 %idx
+  store i32 0, i32* %addr
+  %next = icmp slt i32 %idx.next, 101
+  br i1 %next, label %loop, label %exit
+
+out.of.bounds:
+  ret void
+
+exit:
+  ret void
+}
+
+; Unsigned latch, Unsigned RC, negative offset. Same as test_03.
+define void @test_07(i32* %arr, i32* %a_len_ptr) #0 {
+
+; CHECK-LABEL: test_07(
+; CHECK-NOT:     preloop
+; CHECK:         entry:
+; CHECK-NEXT:      %len = load i32, i32* %a_len_ptr, !range !0
+; CHECK-NEXT:      [[SUB1:%[^ ]+]] = sub i32 -2, %len
+; CHECK-NEXT:      [[SUB2:%[^ ]+]] = sub i32 -1, %len
+; CHECK-NEXT:      [[CMP1:%[^ ]+]] = icmp sgt i32 [[SUB2]], -14
+; CHECK-NEXT:      [[SMAX1:%[^ ]+]] = select i1 [[CMP1]], i32 [[SUB2]], i32 -14
+; CHECK-NEXT:      [[SUB3:%[^ ]+]] = sub i32 [[SUB1]], [[SMAX1]]
+; CHECK-NEXT:      [[CMP2:%[^ ]+]] = icmp ugt i32 [[SUB3]], -102
+; CHECK-NEXT:      [[UMAX1:%[^ ]+]] = select i1 [[CMP2]], i32 [[SUB3]], i32 -102
+; CHECK-NEXT:      %exit.mainloop.at = sub i32 -1, [[UMAX1]]
+; CHECK-NEXT:      [[CMP3:%[^ ]+]] = icmp ult i32 0, %exit.mainloop.at
+; CHECK-NEXT:      br i1 [[CMP3]], label %loop.preheader, label %main.pseudo.exit
+; CHECK:         loop
+; CHECK:           br i1 true, label %in.bounds
+; CHECK:         postloop:
+
+entry:
+  %len = load i32, i32* %a_len_ptr, !range !0
+  br label %loop
+
+loop:
+  %idx = phi i32 [ 0, %entry ], [ %idx.next, %in.bounds ]
+  %idx.next = add i32 %idx, 1
+  %idx.offset = add i32 %idx, 13
+  %abc = icmp ult i32 %idx.offset, %len
+  br i1 %abc, label %in.bounds, label %out.of.bounds
+
+in.bounds:
+  %addr = getelementptr i32, i32* %arr, i32 %idx
+  store i32 0, i32* %addr
+  %next = icmp ult i32 %idx.next, 101
+  br i1 %next, label %loop, label %exit
+
+out.of.bounds:
+  ret void
+
+exit:
+  ret void
+}
+
+; Unsigned latch, Unsigned RC, negative offset. Same as test_04.
+define void @test_08(i32* %arr, i32* %a_len_ptr) #0 {
+
+; CHECK-LABEL: test_08(
+; CHECK:         entry:
+; CHECK-NEXT:      %len = load i32, i32* %a_len_ptr, !range !0
+; CHECK-NEXT:      [[SUB1:%[^ ]+]] = sub i32 -14, %len
+; CHECK-NEXT:      [[CMP1:%[^ ]+]] = icmp ugt i32 [[SUB1]], -102
+; CHECK-NEXT:      [[UMAX1:%[^ ]+]] = select i1 [[CMP1]], i32 [[SUB1]], i32 -102
+; CHECK-NEXT:      %exit.mainloop.at = sub i32 -1, [[UMAX1]]
+; CHECK-NEXT:      br i1 true, label %loop.preloop.preheader
+; CHECK:         in.bounds.preloop:
+; CHECK-NEXT:      %addr.preloop = getelementptr i32, i32* %arr, i32 %idx.preloop
+; CHECK-NEXT:      store i32 0, i32* %addr.preloop
+; CHECK-NEXT:      %next.preloop = icmp ult i32 %idx.next.preloop, 101
+; CHECK-NEXT:      [[PRELOOP_COND:%[^ ]+]] = icmp ult i32 %idx.next.preloop, 13
+; CHECK-NEXT:      br i1 [[PRELOOP_COND]], label %loop.preloop, label %preloop.exit.selector
+; CHECK:         postloop:
+
+entry:
+  %len = load i32, i32* %a_len_ptr, !range !0
+  br label %loop
+
+loop:
+  %idx = phi i32 [ 0, %entry ], [ %idx.next, %in.bounds ]
+  %idx.next = add i32 %idx, 1
+  %idx.offset = sub i32 %idx, 13
+  %abc = icmp ult i32 %idx.offset, %len
+  br i1 %abc, label %in.bounds, label %out.of.bounds
+
+in.bounds:
+  %addr = getelementptr i32, i32* %arr, i32 %idx
+  store i32 0, i32* %addr
+  %next = icmp ult i32 %idx.next, 101
+  br i1 %next, label %loop, label %exit
+
+out.of.bounds:
+  ret void
+
+exit:
+  ret void
+}
+
+!0 = !{i32 0, i32 2147483647}
diff --git a/llvm/test/Transforms/IRCE/single-access-with-preloop.ll b/llvm/test/Transforms/IRCE/single-access-with-preloop.ll

index 5293efcc4ba626d36f0e1754fe4ac577f0d8215d..4b93122b6e729d0c2534b54f530400e043c21df7 100644 (file)
--- a/llvm/test/Transforms/IRCE/single-access-with-preloop.ll
+++ b/llvm/test/Transforms/IRCE/single-access-with-preloop.ll
@@ -30,7 +30,10 @@ define void @single_access_with_preloop(i32 *%arr, i32 *%a_len_ptr, i32 %n, i32
  
  ; CHECK-LABEL: @single_access_with_preloop(
  ; CHECK: loop.preheader:
-; CHECK: [[not_safe_start:[^ ]+]] = add i32 %offset, -1
+; CHECK: [[check_min_sint_offset:[^ ]+]] = icmp sgt i32 %offset, -2147483647
+; CHECK: [[safe_offset_preloop:[^ ]+]] = select i1 [[check_min_sint_offset]], i32 %offset, i32 -2147483647
+; If Offset was a SINT_MIN, we could have an overflow here. That is why we calculated its safe version.
+; CHECK: [[not_safe_start:[^ ]+]] = add i32 [[safe_offset_preloop]], -1
  ; CHECK: [[not_n:[^ ]+]] = sub i32 -1, %n
  ; CHECK: [[not_exit_preloop_at_cond_loclamp:[^ ]+]] = icmp sgt i32 [[not_safe_start]], [[not_n]]
  ; CHECK: [[not_exit_preloop_at_loclamp:[^ ]+]] = select i1 [[not_exit_preloop_at_cond_loclamp]], i32 [[not_safe_start]], i32 [[not_n]]
@@ -39,11 +42,17 @@ define void @single_access_with_preloop(i32 *%arr, i32 *%a_len_ptr, i32 %n, i32
  ; CHECK: [[exit_preloop_at:[^ ]+]] = select i1 [[exit_preloop_at_cond]], i32 [[exit_preloop_at_loclamp]], i32 0
  
  
-; CHECK: [[not_safe_start_2:[^ ]+]] = add i32 %offset, -1
+; CHECK: [[len_minus_sint_max:[^ ]+]] = add i32 %len, -2147483647
+; CHECK: [[check_len_min_sint_offset:[^ ]+]] = icmp sgt i32 %offset, [[len_minus_sint_max]]
+; CHECK: [[safe_offset_mainloop:[^ ]+]] = select i1 [[check_len_min_sint_offset]], i32 %offset, i32 [[len_minus_sint_max]]
+; CHECK: [[not_safe_start_2:[^ ]+]] = add i32 [[safe_offset_mainloop]], -1
+; If Offset was a SINT_MIN, we could have an overflow here. That is why we calculated its safe version.
  ; CHECK: [[not_safe_upper_end:[^ ]+]] = sub i32 [[not_safe_start_2]], %len
  ; CHECK: [[not_exit_mainloop_at_cond_loclamp:[^ ]+]] = icmp sgt i32 [[not_safe_upper_end]], [[not_n]]
  ; CHECK: [[not_exit_mainloop_at_loclamp:[^ ]+]] = select i1 [[not_exit_mainloop_at_cond_loclamp]], i32 [[not_safe_upper_end]], i32 [[not_n]]
-; CHECK: [[not_safe_lower_end:[^ ]+]] = add i32 %offset, -2147483648
+; CHECK: [[check_offset_mainloop_2:[^ ]+]] = icmp sgt i32 %offset, 0
+; CHECK: [[safe_offset_mainloop_2:[^ ]+]] = select i1 [[check_offset_mainloop_2]], i32 %offset, i32 0
+; CHECK: [[not_safe_lower_end:[^ ]+]] = add i32 [[safe_offset_mainloop_2]], -2147483648
  ; CHECK: [[not_exit_mainloop_at_cond_hiclamp:[^ ]+]] = icmp sgt i32 [[not_exit_mainloop_at_loclamp]], [[not_safe_lower_end]]
  ; CHECK: [[not_exit_mainloop_at_hiclamp:[^ ]+]] = select i1 [[not_exit_mainloop_at_cond_hiclamp]], i32 [[not_exit_mainloop_at_loclamp]], i32 [[not_safe_lower_end]]
  ; CHECK: [[exit_mainloop_at_hiclamp:[^ ]+]] = sub i32 -1, [[not_exit_mainloop_at_hiclamp]]
@@ -57,7 +66,7 @@ define void @single_access_with_preloop(i32 *%arr, i32 *%a_len_ptr, i32 %n, i32
  ; CHECK: %abc.high = icmp slt i32 %array.idx, %len
  ; CHECK: %abc.low = icmp sge i32 %array.idx, 0
  ; CHECK: %abc = and i1 true, true
-; CHECK: br i1 %abc, label %in.bounds, label %out.of.bounds.loopexit8
+; CHECK: br i1 %abc, label %in.bounds, label %out.of.bounds.loopexit11
  
  ; CHECK: in.bounds:
  ; CHECK: [[continue_mainloop_cond:[^ ]+]] = icmp slt i32 %idx.next, [[exit_mainloop_at]]
author	Max Kazantsev <max.kazantsev@azul.com>
	Mon, 20 Nov 2017 06:07:57 +0000 (06:07 +0000)
committer	Max Kazantsev <max.kazantsev@azul.com>
	Mon, 20 Nov 2017 06:07:57 +0000 (06:07 +0000)
llvm/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp		patch \| blob \| history
llvm/test/Transforms/IRCE/clamp.ll		patch \| blob \| history
llvm/test/Transforms/IRCE/range_intersect_miscompile.ll		patch \| blob \| history
llvm/test/Transforms/IRCE/ranges_of_different_types.ll	[new file with mode: 0644]	patch \| blob
llvm/test/Transforms/IRCE/single-access-with-preloop.ll		patch \| blob \| history