From: Philip Reames Date: Sat, 1 Jun 2019 00:31:58 +0000 (+0000) Subject: [LoopPred] Handle a subset of NE comparison based latches X-Git-Tag: llvmorg-9.0.0-rc1~4067 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=099eca832e7ef22c7229bc707789bc680ea228bd;p=platform%2Fupstream%2Fllvm.git [LoopPred] Handle a subset of NE comparison based latches At the moment, LoopPredication completely bails out if it sees a latch of the form: %cmp = icmp ne %iv, %N br i1 %cmp, label %loop, label %exit OR %cmp = icmp ne %iv.next, %NPlus1 br i1 %cmp, label %loop, label %exit This is unfortunate since this is exactly the form that LFTR likes to produce. So, go ahead and recognize simple cases where we can. For pre-increment loops, we leverage the fact that LFTR likes canonical counters (i.e. those starting at zero) and a (presumed) range fact on RHS to discharge the check trivially. For post-increment forms, the key insight is in remembering that LFTR had to insert a (N+1) for the RHS. CVP can hopefully prove that add nsw/nuw (if there's appropriate range on N to start with). This leaves us both with the post-inc IV and the RHS involving an nsw/nuw add, and SCEV can discharge that with no problem. This does still need to be extended to handle non-one steps, or other harder patterns of variable (but range restricted) starting values. That'll come later. Differential Revision: https://reviews.llvm.org/D62748 llvm-svn: 362282 --- diff --git a/llvm/lib/Transforms/Scalar/LoopPredication.cpp b/llvm/lib/Transforms/Scalar/LoopPredication.cpp index 15e5b6433af2..1503b5000c14 100644 --- a/llvm/lib/Transforms/Scalar/LoopPredication.cpp +++ b/llvm/lib/Transforms/Scalar/LoopPredication.cpp @@ -230,23 +230,23 @@ static cl::opt PredicateWidenableBranchGuards( cl::init(true)); namespace { -class LoopPredication { - /// Represents an induction variable check: - /// icmp Pred, , - struct LoopICmp { - ICmpInst::Predicate Pred; - const SCEVAddRecExpr *IV; - const SCEV *Limit; - LoopICmp(ICmpInst::Predicate Pred, const SCEVAddRecExpr *IV, - const SCEV *Limit) - : Pred(Pred), IV(IV), Limit(Limit) {} - LoopICmp() {} - void dump() { - dbgs() << "LoopICmp Pred = " << Pred << ", IV = " << *IV - << ", Limit = " << *Limit << "\n"; - } - }; +/// Represents an induction variable check: +/// icmp Pred, , +struct LoopICmp { + ICmpInst::Predicate Pred; + const SCEVAddRecExpr *IV; + const SCEV *Limit; + LoopICmp(ICmpInst::Predicate Pred, const SCEVAddRecExpr *IV, + const SCEV *Limit) + : Pred(Pred), IV(IV), Limit(Limit) {} + LoopICmp() {} + void dump() { + dbgs() << "LoopICmp Pred = " << Pred << ", IV = " << *IV + << ", Limit = " << *Limit << "\n"; + } +}; +class LoopPredication { AliasAnalysis *AA; ScalarEvolution *SE; BranchProbabilityInfo *BPI; @@ -382,7 +382,7 @@ PreservedAnalyses LoopPredicationPass::run(Loop &L, LoopAnalysisManager &AM, return getLoopPassPreservedAnalyses(); } -Optional +Optional LoopPredication::parseLoopICmp(ICmpInst::Predicate Pred, Value *LHS, Value *RHS) { const SCEV *LHSS = SE->getSCEV(LHS); @@ -428,7 +428,7 @@ Value *LoopPredication::expandCheck(SCEVExpander &Expander, return Builder.CreateICmp(Pred, LHSV, RHSV); } -Optional +Optional LoopPredication::generateLoopLatchCheck(Type *RangeCheckType) { auto *LatchType = LatchCheck.IV->getType(); @@ -518,7 +518,7 @@ bool LoopPredication::isLoopInvariantValue(const SCEV* S) { } Optional LoopPredication::widenICmpRangeCheckIncrementingLoop( - LoopPredication::LoopICmp LatchCheck, LoopPredication::LoopICmp RangeCheck, + LoopICmp LatchCheck, LoopICmp RangeCheck, SCEVExpander &Expander, Instruction *Guard) { auto *Ty = RangeCheck.IV->getType(); // Generate the widened condition for the forward loop: @@ -567,7 +567,7 @@ Optional LoopPredication::widenICmpRangeCheckIncrementingLoop( } Optional LoopPredication::widenICmpRangeCheckDecrementingLoop( - LoopPredication::LoopICmp LatchCheck, LoopPredication::LoopICmp RangeCheck, + LoopICmp LatchCheck, LoopICmp RangeCheck, SCEVExpander &Expander, Instruction *Guard) { auto *Ty = RangeCheck.IV->getType(); const SCEV *GuardStart = RangeCheck.IV->getStart(); @@ -614,6 +614,17 @@ Optional LoopPredication::widenICmpRangeCheckDecrementingLoop( return Builder.CreateAnd(FirstIterationCheck, LimitCheck); } +static void normalizePredicate(ScalarEvolution *SE, Loop *L, + LoopICmp& RC) { + // LFTR canonicalizes checks to the ICMP_NE form instead of an ULT/SLT form. + // Normalize back to the ULT/SLT form for ease of handling. + if (RC.Pred == ICmpInst::ICMP_NE && + RC.IV->getStepRecurrence(*SE)->isOne() && + SE->isKnownPredicate(ICmpInst::ICMP_ULE, RC.IV->getStart(), RC.Limit)) + RC.Pred = ICmpInst::ICMP_ULT; +} + + /// If ICI can be widened to a loop invariant condition emits the loop /// invariant condition in the loop preheader and return it, otherwise /// returns None. @@ -798,7 +809,7 @@ bool LoopPredication::widenWidenableBranchGuardConditions( return true; } -Optional LoopPredication::parseLoopLatchICmp() { +Optional LoopPredication::parseLoopLatchICmp() { using namespace PatternMatch; BasicBlock *LoopLatch = L->getLoopLatch(); @@ -852,6 +863,7 @@ Optional LoopPredication::parseLoopLatchICmp() { } }; + normalizePredicate(SE, L, *Result); if (IsUnsupportedPredicate(Step, Result->Pred)) { LLVM_DEBUG(dbgs() << "Unsupported loop latch predicate(" << Result->Pred << ")!\n"); diff --git a/llvm/test/Transforms/LoopPredication/basic.ll b/llvm/test/Transforms/LoopPredication/basic.ll index e71a8b113a83..6f294b5b67ee 100644 --- a/llvm/test/Transforms/LoopPredication/basic.ll +++ b/llvm/test/Transforms/LoopPredication/basic.ll @@ -1603,11 +1603,13 @@ define i32 @ne_latch_zext(i32* %array, i32 %length, i16 %n16) { ; CHECK-NEXT: loop.preheader: ; CHECK-NEXT: [[N:%.*]] = zext i16 [[N16:%.*]] to i32 ; CHECK-NEXT: [[NPLUS1:%.*]] = add nuw nsw i32 [[N]], 1 +; CHECK-NEXT: [[TMP0:%.*]] = icmp ule i32 [[NPLUS1]], [[LENGTH:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 0, [[LENGTH]] +; CHECK-NEXT: [[TMP2:%.*]] = and i1 [[TMP1]], [[TMP0]] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[LOOP]] ], [ 0, [[LOOP_PREHEADER:%.*]] ] -; CHECK-NEXT: [[WITHIN_BOUNDS:%.*]] = icmp ult i32 [[I]], [[LENGTH:%.*]] -; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 [[WITHIN_BOUNDS]], i32 9) [ "deopt"() ] +; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 [[TMP2]], i32 9) [ "deopt"() ] ; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i32 [[I]], 1 ; CHECK-NEXT: [[CONTINUE:%.*]] = icmp ne i32 [[I_NEXT]], [[NPLUS1]] ; CHECK-NEXT: br i1 [[CONTINUE]], label [[LOOP]], label [[EXIT:%.*]] @@ -1637,11 +1639,14 @@ define i32 @ne_latch_zext_preinc(i32* %array, i32 %length, i16 %n16) { ; CHECK-LABEL: @ne_latch_zext_preinc( ; CHECK-NEXT: loop.preheader: ; CHECK-NEXT: [[N:%.*]] = zext i16 [[N16:%.*]] to i32 +; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[LENGTH:%.*]], -1 +; CHECK-NEXT: [[TMP1:%.*]] = icmp ule i32 [[N]], [[TMP0]] +; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i32 0, [[LENGTH]] +; CHECK-NEXT: [[TMP3:%.*]] = and i1 [[TMP2]], [[TMP1]] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[LOOP]] ], [ 0, [[LOOP_PREHEADER:%.*]] ] -; CHECK-NEXT: [[WITHIN_BOUNDS:%.*]] = icmp ult i32 [[I]], [[LENGTH:%.*]] -; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 [[WITHIN_BOUNDS]], i32 9) [ "deopt"() ] +; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 [[TMP3]], i32 9) [ "deopt"() ] ; CHECK-NEXT: [[I_NEXT]] = add nuw i32 [[I]], 1 ; CHECK-NEXT: [[CONTINUE:%.*]] = icmp ne i32 [[I]], [[N]] ; CHECK-NEXT: br i1 [[CONTINUE]], label [[LOOP]], label [[EXIT:%.*]] @@ -1715,11 +1720,14 @@ define i32 @ne_latch_dom_check_preinc(i32* %array, i32 %length, i32 %n) { ; CHECK-NEXT: [[TMP5:%.*]] = icmp sle i32 [[N:%.*]], 0 ; CHECK-NEXT: br i1 [[TMP5]], label [[EXIT:%.*]], label [[LOOP_PREHEADER:%.*]] ; CHECK: loop.preheader: +; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[LENGTH:%.*]], -1 +; CHECK-NEXT: [[TMP1:%.*]] = icmp ule i32 [[N]], [[TMP0]] +; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i32 0, [[LENGTH]] +; CHECK-NEXT: [[TMP3:%.*]] = and i1 [[TMP2]], [[TMP1]] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[LOOP]] ], [ 0, [[LOOP_PREHEADER]] ] -; CHECK-NEXT: [[WITHIN_BOUNDS:%.*]] = icmp ult i32 [[I]], [[LENGTH:%.*]] -; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 [[WITHIN_BOUNDS]], i32 9) [ "deopt"() ] +; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 [[TMP3]], i32 9) [ "deopt"() ] ; CHECK-NEXT: [[I_NEXT]] = add nuw i32 [[I]], 1 ; CHECK-NEXT: [[CONTINUE:%.*]] = icmp ne i32 [[I]], [[N]] ; CHECK-NEXT: br i1 [[CONTINUE]], label [[LOOP]], label [[EXIT_LOOPEXIT:%.*]]