From 6cdca906c79fb4e0eae940f11d585c1b08358104 Mon Sep 17 00:00:00 2001 From: Philip Reames Date: Tue, 7 Sep 2021 16:58:18 -0700 Subject: [PATCH] [SCEV] Use no-self-wrap flags infered from exit structure to compute trip count The basic problem being solved is that we largely give up when encountering a trip count involving an IV which is not an addrec. We will fall back to the brute force constant eval, but that doesn't have the information about the fact that we can't cycle back through the same set of values. There's a high level design question of whether this is the right place to handle this, and if not, where that place is. The major alternative here would be to return a conservative upper bound, and then rely on two invocations of indvars to add the facts to the narrow IV, and then reconstruct SCEV. (I have not implemented the alternative and am not 100% sure this would work out.) That's arguably more in line with existing code, but I find this substantially easier to reason about. During review, no one expressed a strong opinion, so we went with this one. Differential Revision: D108651 --- llvm/lib/Analysis/ScalarEvolution.cpp | 83 +++++++++++++++------- .../ScalarEvolution/trip-count-implied-addrec.ll | 4 +- 2 files changed, 59 insertions(+), 28 deletions(-) diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp index c4fc253..50901f3 100644 --- a/llvm/lib/Analysis/ScalarEvolution.cpp +++ b/llvm/lib/Analysis/ScalarEvolution.cpp @@ -11581,6 +11581,62 @@ ScalarEvolution::howManyLessThans(const SCEV *LHS, const SCEV *RHS, const SCEVAddRecExpr *IV = dyn_cast(LHS); bool PredicatedIV = false; + auto canAssumeNoSelfWrap = [&](const SCEVAddRecExpr *AR) { + // Can we prove this loop *must* be UB if overflow of IV occurs? + // Reasoning goes as follows: + // * Suppose the IV did self wrap. + // * If Stride evenly divides the iteration space, then once wrap + // occurs, the loop must revisit the same values. + // * We know that RHS is invariant, and that none of those values + // caused this exit to be taken previously. Thus, this exit is + // dynamically dead. + // * If this is the sole exit, then a dead exit implies the loop + // must be infinite if there are no abnormal exits. + // * If the loop were infinite, then it must either not be mustprogress + // or have side effects. Otherwise, it must be UB. + // * It can't (by assumption), be UB so we have contradicted our + // premise and can conclude the IV did not in fact self-wrap. + if (!isLoopInvariant(RHS, L)) + return false; + + auto *StrideC = dyn_cast(AR->getStepRecurrence(*this)); + if (!StrideC || !StrideC->getAPInt().isPowerOf2()) + return false; + + if (!ControlsExit || !loopHasNoAbnormalExits(L)) + return false; + + return loopIsFiniteByAssumption(L); + }; + + if (!IV) { + if (auto *ZExt = dyn_cast(LHS)) { + const SCEVAddRecExpr *AR = dyn_cast(ZExt->getOperand()); + if (AR && AR->getLoop() == L && AR->isAffine()) { + auto Flags = AR->getNoWrapFlags(); + if (!hasFlags(Flags, SCEV::FlagNW) && canAssumeNoSelfWrap(AR)) { + Flags = setFlags(Flags, SCEV::FlagNW); + + SmallVector Operands{AR->operands()}; + Flags = StrengthenNoWrapFlags(this, scAddRecExpr, Operands, Flags); + + setNoWrapFlags(const_cast(AR), Flags); + } + if (AR->hasNoUnsignedWrap()) { + // Emulate what getZeroExtendExpr would have done during construction + // if we'd been able to infer the fact just above at that time. + const SCEV *Step = AR->getStepRecurrence(*this); + Type *Ty = ZExt->getType(); + auto *S = getAddRecExpr( + getExtendAddRecStart(AR, Ty, this, 0), + getZeroExtendExpr(Step, Ty, 0), L, AR->getNoWrapFlags()); + IV = dyn_cast(S); + } + } + } + } + + if (!IV && AllowPredicates) { // Try to make this an AddRec using runtime tests, in the first X // iterations of this loop, where X is the SCEV expression found by the @@ -11694,37 +11750,12 @@ ScalarEvolution::howManyLessThans(const SCEV *LHS, const SCEV *RHS, } } else if (!Stride->isOne() && !NoWrap) { auto isUBOnWrap = [&]() { - // Can we prove this loop *must* be UB if overflow of IV occurs? - // Reasoning goes as follows: - // * Suppose the IV did self wrap. - // * If Stride evenly divides the iteration space, then once wrap - // occurs, the loop must revisit the same values. - // * We know that RHS is invariant, and that none of those values - // caused this exit to be taken previously. Thus, this exit is - // dynamically dead. - // * If this is the sole exit, then a dead exit implies the loop - // must be infinite if there are no abnormal exits. - // * If the loop were infinite, then it must either not be mustprogress - // or have side effects. Otherwise, it must be UB. - // * It can't (by assumption), be UB so we have contradicted our - // premise and can conclude the IV did not in fact self-wrap. // From no-self-wrap, we need to then prove no-(un)signed-wrap. This // follows trivially from the fact that every (un)signed-wrapped, but // not self-wrapped value must be LT than the last value before // (un)signed wrap. Since we know that last value didn't exit, nor // will any smaller one. - - if (!isLoopInvariant(RHS, L)) - return false; - - auto *StrideC = dyn_cast(Stride); - if (!StrideC || !StrideC->getAPInt().isPowerOf2()) - return false; - - if (!ControlsExit || !loopHasNoAbnormalExits(L)) - return false; - - return loopIsFiniteByAssumption(L); + return canAssumeNoSelfWrap(IV); }; // Avoid proven overflow cases: this will ensure that the backedge taken diff --git a/llvm/test/Analysis/ScalarEvolution/trip-count-implied-addrec.ll b/llvm/test/Analysis/ScalarEvolution/trip-count-implied-addrec.ll index ab4a435..f75d78e 100644 --- a/llvm/test/Analysis/ScalarEvolution/trip-count-implied-addrec.ll +++ b/llvm/test/Analysis/ScalarEvolution/trip-count-implied-addrec.ll @@ -9,8 +9,8 @@ @G = external global i8 ; CHECK-LABEL: Determining loop execution counts for: @nw_implies_nuw -; CHECK: Loop %for.body: Unpredictable backedge-taken count -; CHECK: Loop %for.body: Unpredictable max backedge-taken count +; CHECK: Loop %for.body: backedge-taken count is %n +; CHECK: Loop %for.body: max backedge-taken count is -1 define void @nw_implies_nuw(i16 %n) mustprogress { entry: br label %for.body -- 2.7.4