From 2f6ae28152154e30aeb0c05b4a195f2dd7d2719b Mon Sep 17 00:00:00 2001 From: Max Kazantsev Date: Fri, 4 Aug 2017 07:01:04 +0000 Subject: [PATCH] [IRCE] Handle loops with step different from 1/-1 This patch generalizes IRCE to handle IV steps that are not equal to 1 or -1. Differential Revision: https://reviews.llvm.org/D35539 llvm-svn: 310032 --- .../Scalar/InductiveRangeCheckElimination.cpp | 167 +++++--- llvm/test/Transforms/IRCE/stride_more_than_1.ll | 468 +++++++++++++++++++++ 2 files changed, 570 insertions(+), 65 deletions(-) create mode 100644 llvm/test/Transforms/IRCE/stride_more_than_1.ll diff --git a/llvm/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp b/llvm/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp index ec6d17d..6c136a6 100644 --- a/llvm/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp +++ b/llvm/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp @@ -457,6 +457,7 @@ struct LoopStructure { Value *IndVarNext; Value *IndVarStart; + Value *IndVarStep; Value *LoopExitAt; bool IndVarIncreasing; bool IsSignedPredicate; @@ -464,8 +465,8 @@ struct LoopStructure { LoopStructure() : Tag(""), Header(nullptr), Latch(nullptr), LatchBr(nullptr), LatchExit(nullptr), LatchBrExitIdx(-1), IndVarNext(nullptr), - IndVarStart(nullptr), LoopExitAt(nullptr), IndVarIncreasing(false), - IsSignedPredicate(true) {} + IndVarStart(nullptr), IndVarStep(nullptr), LoopExitAt(nullptr), + IndVarIncreasing(false), IsSignedPredicate(true) {} template LoopStructure map(M Map) const { LoopStructure Result; @@ -477,6 +478,7 @@ struct LoopStructure { Result.LatchBrExitIdx = LatchBrExitIdx; Result.IndVarNext = Map(IndVarNext); Result.IndVarStart = Map(IndVarStart); + Result.IndVarStep = Map(IndVarStep); Result.LoopExitAt = Map(LoopExitAt); Result.IndVarIncreasing = IndVarIncreasing; Result.IsSignedPredicate = IsSignedPredicate; @@ -660,6 +662,21 @@ static bool CanBeMax(ScalarEvolution &SE, const SCEV *S, bool Signed) { SE.getUnsignedRange(S).contains(Max); } +static bool SumCanReachMax(ScalarEvolution &SE, const SCEV *S1, const SCEV *S2, + bool Signed) { + // S1 < INT_MAX - S2 ===> S1 + S2 < INT_MAX. + assert(SE.isKnownNonNegative(S2) && + "We expected the 2nd arg to be non-negative!"); + const SCEV *Max = SE.getConstant( + Signed ? APInt::getSignedMaxValue( + cast(S1->getType())->getBitWidth()) + : APInt::getMaxValue( + cast(S1->getType())->getBitWidth())); + const SCEV *CapForS1 = SE.getMinusSCEV(Max, S2); + return !SE.isKnownPredicate(Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT, + S1, CapForS1); +} + static bool CanBeMin(ScalarEvolution &SE, const SCEV *S, bool Signed) { APInt Min = Signed ? APInt::getSignedMinValue(cast(S->getType())->getBitWidth()) : @@ -668,6 +685,21 @@ static bool CanBeMin(ScalarEvolution &SE, const SCEV *S, bool Signed) { SE.getUnsignedRange(S).contains(Min); } +static bool SumCanReachMin(ScalarEvolution &SE, const SCEV *S1, const SCEV *S2, + bool Signed) { + // S1 > INT_MIN - S2 ===> S1 + S2 > INT_MIN. + assert(SE.isKnownNonPositive(S2) && + "We expected the 2nd arg to be non-positive!"); + const SCEV *Max = SE.getConstant( + Signed ? APInt::getSignedMinValue( + cast(S1->getType())->getBitWidth()) + : APInt::getMinValue( + cast(S1->getType())->getBitWidth())); + const SCEV *CapForS1 = SE.getMinusSCEV(Max, S2); + return !SE.isKnownPredicate(Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT, + S1, CapForS1); +} + Optional LoopStructure::parseLoopStructure(ScalarEvolution &SE, BranchProbabilityInfo &BPI, @@ -772,7 +804,11 @@ LoopStructure::parseLoopStructure(ScalarEvolution &SE, return AR->getNoWrapFlags(SCEV::FlagNSW) != SCEV::FlagAnyWrap; }; - auto IsInductionVar = [&](const SCEVAddRecExpr *AR, bool &IsIncreasing) { + // Here we check whether the suggested AddRec is an induction variable that + // can be handled (i.e. with known constant step), and if yes, calculate its + // step and identify whether it is increasing or decreasing. + auto IsInductionVar = [&](const SCEVAddRecExpr *AR, bool &IsIncreasing, + ConstantInt *&StepCI) { if (!AR->isAffine()) return false; @@ -784,12 +820,10 @@ LoopStructure::parseLoopStructure(ScalarEvolution &SE, if (const SCEVConstant *StepExpr = dyn_cast(AR->getStepRecurrence(SE))) { - ConstantInt *StepCI = StepExpr->getValue(); + StepCI = StepExpr->getValue(); assert(!StepCI->isZero() && "Zero step?"); - if (StepCI->isOne() || StepCI->isMinusOne()) { - IsIncreasing = StepCI->isOne(); - return true; - } + IsIncreasing = !StepCI->isNegative(); + return true; } return false; @@ -801,7 +835,8 @@ LoopStructure::parseLoopStructure(ScalarEvolution &SE, const SCEVAddRecExpr *IndVarNext = cast(LeftSCEV); bool IsIncreasing = false; bool IsSignedPredicate = true; - if (!IsInductionVar(IndVarNext, IsIncreasing)) { + ConstantInt *StepCI; + if (!IsInductionVar(IndVarNext, IsIncreasing, StepCI)) { FailureReason = "LHS in icmp not induction variable"; return None; } @@ -809,34 +844,37 @@ LoopStructure::parseLoopStructure(ScalarEvolution &SE, const SCEV *StartNext = IndVarNext->getStart(); const SCEV *Addend = SE.getNegativeSCEV(IndVarNext->getStepRecurrence(SE)); const SCEV *IndVarStart = SE.getAddExpr(StartNext, Addend); + const SCEV *Step = SE.getSCEV(StepCI); ConstantInt *One = ConstantInt::get(IndVarTy, 1); if (IsIncreasing) { bool DecreasedRightValueByOne = false; - // Try to turn eq/ne predicates to those we can work with. - if (Pred == ICmpInst::ICMP_NE && LatchBrExitIdx == 1) - // while (++i != len) { while (++i < len) { - // ... ---> ... - // } } - // If both parts are known non-negative, it is profitable to use unsigned - // comparison in increasing loop. This allows us to make the comparison - // check against "RightSCEV + 1" more optimistic. - if (SE.isKnownNonNegative(IndVarStart) && - SE.isKnownNonNegative(RightSCEV)) - Pred = ICmpInst::ICMP_ULT; - else - Pred = ICmpInst::ICMP_SLT; - else if (Pred == ICmpInst::ICMP_EQ && LatchBrExitIdx == 0 && - !CanBeMin(SE, RightSCEV, /* IsSignedPredicate */ true)) { - // while (true) { while (true) { - // if (++i == len) ---> if (++i > len - 1) - // break; break; - // ... ... - // } } - // TODO: Insert ICMP_UGT if both are non-negative? - Pred = ICmpInst::ICMP_SGT; - RightSCEV = SE.getMinusSCEV(RightSCEV, SE.getOne(RightSCEV->getType())); - DecreasedRightValueByOne = true; + if (StepCI->isOne()) { + // Try to turn eq/ne predicates to those we can work with. + if (Pred == ICmpInst::ICMP_NE && LatchBrExitIdx == 1) + // while (++i != len) { while (++i < len) { + // ... ---> ... + // } } + // If both parts are known non-negative, it is profitable to use + // unsigned comparison in increasing loop. This allows us to make the + // comparison check against "RightSCEV + 1" more optimistic. + if (SE.isKnownNonNegative(IndVarStart) && + SE.isKnownNonNegative(RightSCEV)) + Pred = ICmpInst::ICMP_ULT; + else + Pred = ICmpInst::ICMP_SLT; + else if (Pred == ICmpInst::ICMP_EQ && LatchBrExitIdx == 0 && + !CanBeMin(SE, RightSCEV, /* IsSignedPredicate */ true)) { + // while (true) { while (true) { + // if (++i == len) ---> if (++i > len - 1) + // break; break; + // ... ... + // } } + // TODO: Insert ICMP_UGT if both are non-negative? + Pred = ICmpInst::ICMP_SGT; + RightSCEV = SE.getMinusSCEV(RightSCEV, SE.getOne(RightSCEV->getType())); + DecreasedRightValueByOne = true; + } } bool LTPred = (Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_ULT); @@ -857,7 +895,9 @@ LoopStructure::parseLoopStructure(ScalarEvolution &SE, IsSignedPredicate ? CmpInst::ICMP_SLT : CmpInst::ICMP_ULT; if (LatchBrExitIdx == 0) { - if (CanBeMax(SE, RightSCEV, IsSignedPredicate)) { + const SCEV *StepMinusOne = SE.getMinusSCEV(Step, + SE.getOne(Step->getType())); + if (SumCanReachMax(SE, RightSCEV, StepMinusOne, IsSignedPredicate)) { // TODO: this restriction is easily removable -- we just have to // remember that the icmp was an slt and not an sle. FailureReason = "limit may overflow when coercing le to lt"; @@ -866,7 +906,7 @@ LoopStructure::parseLoopStructure(ScalarEvolution &SE, if (!SE.isLoopEntryGuardedByCond( &L, BoundPred, IndVarStart, - SE.getAddExpr(RightSCEV, SE.getOne(RightSCEV->getType())))) { + SE.getAddExpr(RightSCEV, Step))) { FailureReason = "Induction variable start not bounded by upper limit"; return None; } @@ -887,26 +927,28 @@ LoopStructure::parseLoopStructure(ScalarEvolution &SE, } } else { bool IncreasedRightValueByOne = false; - // Try to turn eq/ne predicates to those we can work with. - if (Pred == ICmpInst::ICMP_NE && LatchBrExitIdx == 1) - // while (--i != len) { while (--i > len) { - // ... ---> ... - // } } - // We intentionally don't turn the predicate into UGT even if we know that - // both operands are non-negative, because it will only pessimize our - // check against "RightSCEV - 1". - Pred = ICmpInst::ICMP_SGT; - else if (Pred == ICmpInst::ICMP_EQ && LatchBrExitIdx == 0 && - !CanBeMax(SE, RightSCEV, /* IsSignedPredicate */ true)) { - // while (true) { while (true) { - // if (--i == len) ---> if (--i < len + 1) - // break; break; - // ... ... - // } } - // TODO: Insert ICMP_ULT if both are non-negative? - Pred = ICmpInst::ICMP_SLT; - RightSCEV = SE.getAddExpr(RightSCEV, SE.getOne(RightSCEV->getType())); - IncreasedRightValueByOne = true; + if (StepCI->isMinusOne()) { + // Try to turn eq/ne predicates to those we can work with. + if (Pred == ICmpInst::ICMP_NE && LatchBrExitIdx == 1) + // while (--i != len) { while (--i > len) { + // ... ---> ... + // } } + // We intentionally don't turn the predicate into UGT even if we know + // that both operands are non-negative, because it will only pessimize + // our check against "RightSCEV - 1". + Pred = ICmpInst::ICMP_SGT; + else if (Pred == ICmpInst::ICMP_EQ && LatchBrExitIdx == 0 && + !CanBeMax(SE, RightSCEV, /* IsSignedPredicate */ true)) { + // while (true) { while (true) { + // if (--i == len) ---> if (--i < len + 1) + // break; break; + // ... ... + // } } + // TODO: Insert ICMP_ULT if both are non-negative? + Pred = ICmpInst::ICMP_SLT; + RightSCEV = SE.getAddExpr(RightSCEV, SE.getOne(RightSCEV->getType())); + IncreasedRightValueByOne = true; + } } bool LTPred = (Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_ULT); @@ -928,7 +970,8 @@ LoopStructure::parseLoopStructure(ScalarEvolution &SE, IsSignedPredicate ? CmpInst::ICMP_SGT : CmpInst::ICMP_UGT; if (LatchBrExitIdx == 0) { - if (CanBeMin(SE, RightSCEV, IsSignedPredicate)) { + const SCEV *StepPlusOne = SE.getAddExpr(Step, SE.getOne(Step->getType())); + if (SumCanReachMin(SE, RightSCEV, StepPlusOne, IsSignedPredicate)) { // TODO: this restriction is easily removable -- we just have to // remember that the icmp was an sgt and not an sge. FailureReason = "limit may overflow when coercing ge to gt"; @@ -979,6 +1022,7 @@ LoopStructure::parseLoopStructure(ScalarEvolution &SE, Result.LatchExit = LatchExit; Result.LatchBrExitIdx = LatchBrExitIdx; Result.IndVarStart = IndVarStartV; + Result.IndVarStep = StepCI; Result.IndVarNext = LeftValue; Result.IndVarIncreasing = IsIncreasing; Result.LoopExitAt = RightValue; @@ -1529,8 +1573,6 @@ InductiveRangeCheck::computeSafeIterationSpace( // this inductive range check is a range check on the "C + D * I" ("C" is // getOffset() and "D" is getScale()). We rewrite the value being range // checked to "M + N * IndVar" where "N" = "D * B^(-1)" and "M" = "C - NA". - // Currently we support this only for "B" = "D" = { 1 or -1 }, but the code - // can be generalized as needed. // // The actual inequalities we solve are of the form // @@ -1567,11 +1609,9 @@ InductiveRangeCheck::computeSafeIterationSpace( return None; ConstantInt *ConstD = D->getValue(); - if (!(ConstD->isMinusOne() || ConstD->isOne())) - return None; + assert(!ConstD->isZero() && "Recurrence with zero step?"); const SCEV *M = SE.getMinusSCEV(C, A); - const SCEV *Begin = SE.getNegativeSCEV(M); const SCEV *UpperLimit = nullptr; @@ -1659,11 +1699,8 @@ bool InductiveRangeCheckElimination::runOnLoop(Loop *L, LPPassManager &LPM) { return false; } LoopStructure LS = MaybeLoopStructure.getValue(); - bool Increasing = LS.IndVarIncreasing; - const SCEV *MinusOne = - SE.getConstant(LS.IndVarNext->getType(), Increasing ? -1 : 1, true); const SCEVAddRecExpr *IndVar = - cast(SE.getAddExpr(SE.getSCEV(LS.IndVarNext), MinusOne)); + cast(SE.getMinusSCEV(SE.getSCEV(LS.IndVarNext), SE.getSCEV(LS.IndVarStep))); Optional SafeIterRange; Instruction *ExprInsertPt = Preheader->getTerminator(); diff --git a/llvm/test/Transforms/IRCE/stride_more_than_1.ll b/llvm/test/Transforms/IRCE/stride_more_than_1.ll new file mode 100644 index 0000000..0918aeb --- /dev/null +++ b/llvm/test/Transforms/IRCE/stride_more_than_1.ll @@ -0,0 +1,468 @@ +; RUN: opt -verify-loop-info -irce-print-changed-loops -irce -S < %s 2>&1 | FileCheck %s + +; CHECK: irce: in function test_01: constrained Loop at depth 1 containing: %loop
,%in.bounds +; CHECK: irce: in function test_02: constrained Loop at depth 1 containing: %loop
,%in.bounds +; CHECK: irce: in function test_03: constrained Loop at depth 1 containing: %loop
,%in.bounds +; CHECK-NOT: irce: in function test_04: constrained Loop at depth 1 containing: %loop
,%in.bounds +; CHECK: irce: in function test_05: constrained Loop at depth 1 containing: %loop
,%in.bounds +; CHECK: irce: in function test_06: constrained Loop at depth 1 containing: %loop
,%in.bounds +; CHECK-NOT: irce: in function test_07: constrained Loop at depth 1 containing: %loop
,%in.bounds +; CHECK: irce: in function test_08: constrained Loop at depth 1 containing: %loop
,%in.bounds + +; IV = 0; IV s -1; IV -= 7; 0 <= Len <= 50. IRCE is allowed. +define void @test_05(i32* %arr, i32* %a_len_ptr) { + +; CHECK: @test_05( +; CHECK: entry: +; CHECK-NEXT: %len = load i32, i32* %a_len_ptr +; CHECK-NEXT: %exit.preloop.at = add i32 %len, -1 +; CHECK-NEXT: [[COND1:%[^ ]+]] = icmp sgt i32 100, %exit.preloop.at +; CHECK-NEXT: br i1 [[COND1]], label %loop.preloop.preheader, label %preloop.pseudo.exit +; CHECK: loop.preloop.preheader: +; CHECK-NEXT: br label %loop.preloop +; CHECK: mainloop: +; CHECK-NEXT: br label %loop +; CHECK: loop: +; CHECK-NEXT: %idx = phi i32 [ %idx.preloop.copy, %mainloop ], [ %idx.next, %in.bounds ] +; CHECK-NEXT: %idx.next = add i32 %idx, -7 +; CHECK-NEXT: %abc = icmp slt i32 %idx, %len +; CHECK-NEXT: br i1 true, label %in.bounds, label %out.of.bounds.loopexit1 +; CHECK: in.bounds: +; CHECK-NEXT: %addr = getelementptr i32, i32* %arr, i32 %idx +; CHECK-NEXT: store i32 0, i32* %addr +; CHECK-NEXT: %next = icmp sgt i32 %idx.next, -1 +; CHECK-NEXT: br i1 %next, label %loop, label %exit.loopexit +; CHECK: loop.preloop: +; CHECK-NEXT: %idx.preloop = phi i32 [ %idx.next.preloop, %in.bounds.preloop ], [ 100, %loop.preloop.preheader ] +; CHECK-NEXT: %idx.next.preloop = add i32 %idx.preloop, -7 +; CHECK-NEXT: %abc.preloop = icmp slt i32 %idx.preloop, %len +; CHECK-NEXT: br i1 %abc.preloop, label %in.bounds.preloop, label %out.of.bounds.loopexit +; CHECK: in.bounds.preloop: +; CHECK-NEXT: %addr.preloop = getelementptr i32, i32* %arr, i32 %idx.preloop +; CHECK-NEXT: store i32 0, i32* %addr.preloop +; CHECK-NEXT: %next.preloop = icmp sgt i32 %idx.next.preloop, -1 +; CHECK-NEXT: [[COND2:%[^ ]+]] = icmp sgt i32 %idx.next.preloop, %exit.preloop.at +; CHECK-NEXT: br i1 [[COND2]], label %loop.preloop, label %preloop.exit.selector +; CHECK: preloop.exit.selector: +; CHECK-NEXT: %idx.next.preloop.lcssa = phi i32 [ %idx.next.preloop, %in.bounds.preloop ] +; CHECK-NEXT: [[COND3:%[^ ]+]] = icmp sgt i32 %idx.next.preloop.lcssa, -1 +; CHECK-NEXT: br i1 [[COND3]], label %preloop.pseudo.exit, label %exit +; CHECK: preloop.pseudo.exit: +; CHECK-NEXT: %idx.preloop.copy = phi i32 [ 100, %entry ], [ %idx.next.preloop.lcssa, %preloop.exit.selector ] +; CHECK-NEXT: %indvar.end = phi i32 [ 100, %entry ], [ %idx.next.preloop.lcssa, %preloop.exit.selector ] +; CHECK-NEXT: br label %mainloop + +entry: + %len = load i32, i32* %a_len_ptr, !range !0 + br label %loop + +loop: + %idx = phi i32 [ 100, %entry ], [ %idx.next, %in.bounds ] + %idx.next = add i32 %idx, -7 + %abc = icmp slt i32 %idx, %len + br i1 %abc, label %in.bounds, label %out.of.bounds + +in.bounds: + %addr = getelementptr i32, i32* %arr, i32 %idx + store i32 0, i32* %addr + %next = icmp sgt i32 %idx.next, -1 + br i1 %next, label %loop, label %exit + +out.of.bounds: + ret void + +exit: + ret void +} + +; IV = MAX_INT - 7; IV >u 6; IV -= 7; 10 <= Len <= 50. IRCE is allowed. +define void @test_06(i32* %arr, i32* %a_len_ptr) { + +; CHECK: @test_06( +; CHECK: entry: +; CHECK-NEXT: %len = load i32, i32* %a_len_ptr +; CHECK-NEXT: %exit.preloop.at = add i32 %len, -1 +; CHECK-NEXT: [[COND1:%[^ ]+]] = icmp ugt i32 2147483640, %exit.preloop.at +; CHECK-NEXT: br i1 [[COND1]], label %loop.preloop.preheader, label %preloop.pseudo.exit +; CHECK: loop.preloop.preheader: +; CHECK-NEXT: br label %loop.preloop +; CHECK: mainloop: +; CHECK-NEXT: br label %loop +; CHECK: loop: +; CHECK-NEXT: %idx = phi i32 [ %idx.preloop.copy, %mainloop ], [ %idx.next, %in.bounds ] +; CHECK-NEXT: %idx.next = add i32 %idx, -7 +; CHECK-NEXT: %abc = icmp slt i32 %idx, %len +; CHECK-NEXT: br i1 true, label %in.bounds, label %out.of.bounds.loopexit1 +; CHECK: in.bounds: +; CHECK-NEXT: %addr = getelementptr i32, i32* %arr, i32 %idx +; CHECK-NEXT: store i32 0, i32* %addr +; CHECK-NEXT: %next = icmp ugt i32 %idx.next, 6 +; CHECK-NEXT: br i1 %next, label %loop, label %exit.loopexit +; CHECK: loop.preloop: +; CHECK-NEXT: %idx.preloop = phi i32 [ %idx.next.preloop, %in.bounds.preloop ], [ 2147483640, %loop.preloop.preheader ] +; CHECK-NEXT: %idx.next.preloop = add i32 %idx.preloop, -7 +; CHECK-NEXT: %abc.preloop = icmp slt i32 %idx.preloop, %len +; CHECK-NEXT: br i1 %abc.preloop, label %in.bounds.preloop, label %out.of.bounds.loopexit +; CHECK: in.bounds.preloop: +; CHECK-NEXT: %addr.preloop = getelementptr i32, i32* %arr, i32 %idx.preloop +; CHECK-NEXT: store i32 0, i32* %addr.preloop +; CHECK-NEXT: %next.preloop = icmp ugt i32 %idx.next.preloop, 6 +; CHECK-NEXT: [[COND2:%[^ ]+]] = icmp ugt i32 %idx.next.preloop, %exit.preloop.at +; CHECK-NEXT: br i1 [[COND2]], label %loop.preloop, label %preloop.exit.selector +; CHECK: preloop.exit.selector: +; CHECK-NEXT: %idx.next.preloop.lcssa = phi i32 [ %idx.next.preloop, %in.bounds.preloop ] +; CHECK-NEXT: [[COND3:%[^ ]+]] = icmp ugt i32 %idx.next.preloop.lcssa, 6 +; CHECK-NEXT: br i1 [[COND3]], label %preloop.pseudo.exit, label %exit +; CHECK: preloop.pseudo.exit: +; CHECK-NEXT: %idx.preloop.copy = phi i32 [ 2147483640, %entry ], [ %idx.next.preloop.lcssa, %preloop.exit.selector ] +; CHECK-NEXT: %indvar.end = phi i32 [ 2147483640, %entry ], [ %idx.next.preloop.lcssa, %preloop.exit.selector ] +; CHECK-NEXT: br label %mainloop + +entry: + %len = load i32, i32* %a_len_ptr, !range !3 + br label %loop + +loop: + %idx = phi i32 [ 2147483640, %entry ], [ %idx.next, %in.bounds ] + %idx.next = add i32 %idx, -7 + %abc = icmp slt i32 %idx, %len + br i1 %abc, label %in.bounds, label %out.of.bounds + +in.bounds: + %addr = getelementptr i32, i32* %arr, i32 %idx + store i32 0, i32* %addr + %next = icmp ugt i32 %idx.next, 6 + br i1 %next, label %loop, label %exit + +out.of.bounds: + ret void + +exit: + ret void +} + +; IV = MAX_INT - 7; IV >u 5; IV -= 7; 10 <= Len <= 50. IRCE is not allowed, +; because we can cross the 0 border. +define void @test_07(i32* %arr, i32* %a_len_ptr) { + +; CHECK: @test_07( + +entry: + %len = load i32, i32* %a_len_ptr, !range !3 + br label %loop + +loop: + %idx = phi i32 [ 2147483640, %entry ], [ %idx.next, %in.bounds ] + %idx.next = add i32 %idx, -7 + %abc = icmp slt i32 %idx, %len + br i1 %abc, label %in.bounds, label %out.of.bounds + +in.bounds: + %addr = getelementptr i32, i32* %arr, i32 %idx + store i32 0, i32* %addr + %next = icmp ugt i32 %idx.next, 5 + br i1 %next, label %loop, label %exit + +out.of.bounds: + ret void + +exit: + ret void +} + +; IV = MAX_INT; IV >u 6; IV -= 7; 10 <= Len <= 50. IRCE is allowed. +define void @test_08(i32* %arr, i32* %a_len_ptr) { + +; CHECK: @test_08( +; CHECK: entry: +; CHECK-NEXT: %len = load i32, i32* %a_len_ptr +; CHECK-NEXT: %exit.preloop.at = add i32 %len, -1 +; CHECK-NEXT: [[COND1:%[^ ]+]] = icmp ugt i32 2147483647, %exit.preloop.at +; CHECK-NEXT: br i1 [[COND1]], label %loop.preloop.preheader, label %preloop.pseudo.exit +; CHECK: loop.preloop.preheader: +; CHECK-NEXT: br label %loop.preloop +; CHECK: mainloop: +; CHECK-NEXT: br label %loop +; CHECK: loop: +; CHECK-NEXT: %idx = phi i32 [ %idx.preloop.copy, %mainloop ], [ %idx.next, %in.bounds ] +; CHECK-NEXT: %idx.next = add i32 %idx, -7 +; CHECK-NEXT: %abc = icmp slt i32 %idx, %len +; CHECK-NEXT: br i1 true, label %in.bounds, label %out.of.bounds.loopexit1 +; CHECK: in.bounds: +; CHECK-NEXT: %addr = getelementptr i32, i32* %arr, i32 %idx +; CHECK-NEXT: store i32 0, i32* %addr +; CHECK-NEXT: %next = icmp ugt i32 %idx.next, 6 +; CHECK-NEXT: br i1 %next, label %loop, label %exit.loopexit +; CHECK: loop.preloop: +; CHECK-NEXT: %idx.preloop = phi i32 [ %idx.next.preloop, %in.bounds.preloop ], [ 2147483647, %loop.preloop.preheader ] +; CHECK-NEXT: %idx.next.preloop = add i32 %idx.preloop, -7 +; CHECK-NEXT: %abc.preloop = icmp slt i32 %idx.preloop, %len +; CHECK-NEXT: br i1 %abc.preloop, label %in.bounds.preloop, label %out.of.bounds.loopexit +; CHECK: in.bounds.preloop: +; CHECK-NEXT: %addr.preloop = getelementptr i32, i32* %arr, i32 %idx.preloop +; CHECK-NEXT: store i32 0, i32* %addr.preloop +; CHECK-NEXT: %next.preloop = icmp ugt i32 %idx.next.preloop, 6 +; CHECK-NEXT: [[COND2:%[^ ]+]] = icmp ugt i32 %idx.next.preloop, %exit.preloop.at +; CHECK-NEXT: br i1 [[COND2]], label %loop.preloop, label %preloop.exit.selector +; CHECK: preloop.exit.selector: +; CHECK-NEXT: %idx.next.preloop.lcssa = phi i32 [ %idx.next.preloop, %in.bounds.preloop ] +; CHECK-NEXT: [[COND3:%[^ ]+]] = icmp ugt i32 %idx.next.preloop.lcssa, 6 +; CHECK-NEXT: br i1 [[COND3]], label %preloop.pseudo.exit, label %exit +; CHECK: preloop.pseudo.exit: +; CHECK-NEXT: %idx.preloop.copy = phi i32 [ 2147483647, %entry ], [ %idx.next.preloop.lcssa, %preloop.exit.selector ] +; CHECK-NEXT: %indvar.end = phi i32 [ 2147483647, %entry ], [ %idx.next.preloop.lcssa, %preloop.exit.selector ] +; CHECK-NEXT: br label %mainloop + +entry: + %len = load i32, i32* %a_len_ptr, !range !3 + br label %loop + +loop: + %idx = phi i32 [ 2147483647, %entry ], [ %idx.next, %in.bounds ] + %idx.next = add i32 %idx, -7 + %abc = icmp slt i32 %idx, %len + br i1 %abc, label %in.bounds, label %out.of.bounds + +in.bounds: + %addr = getelementptr i32, i32* %arr, i32 %idx + store i32 0, i32* %addr + %next = icmp ugt i32 %idx.next, 6 + br i1 %next, label %loop, label %exit + +out.of.bounds: + ret void + +exit: + ret void +} + +!0 = !{i32 0, i32 50} +!1 = !{i32 0, i32 2147483640} +!2 = !{i32 0, i32 2147483641} +!3 = !{i32 10, i32 50} -- 2.7.4