}
};
- /// SCEVWrapPredicate - This class represents an assumption
- /// made on an AddRec expression. Given an affine AddRec expression
- /// {a,+,b}, we assume that it has the nssw or nusw flags (defined
- /// below).
+ /// SCEVWrapPredicate - This class represents an assumption made on an AddRec
+ /// expression. Given an affine AddRec expression {a,+,b}, we assume that it
+ /// has the nssw or nusw flags (defined below) in the first X iterations of
+ /// the loop, where X is a SCEV expression returned by
+ /// getPredicatedBackedgeTakenCount).
+ ///
+ /// Note that this does not imply that X is equal to the backedge taken
+ /// count. This means that if we have a nusw predicate for i32 {0,+,1} with a
+ /// predicated backedge taken count of X, we only guarantee that {0,+,1} has
+ /// nusw in the first X iterations. {0,+,1} may still wrap in the loop if we
+ /// have more than X iterations.
class SCEVWrapPredicate final : public SCEVPredicate {
public:
/// Similar to SCEV::NoWrapFlags, but with slightly different semantics
const SCEV *Exact;
const SCEV *Max;
+ /// A predicate union guard for this ExitLimit. The result is only
+ /// valid if this predicate evaluates to 'true' at run-time.
+ SCEVUnionPredicate Pred;
+
/*implicit*/ ExitLimit(const SCEV *E) : Exact(E), Max(E) {}
- ExitLimit(const SCEV *E, const SCEV *M) : Exact(E), Max(M) {
+ ExitLimit(const SCEV *E, const SCEV *M, SCEVUnionPredicate &P)
+ : Exact(E), Max(M), Pred(P) {
assert((isa<SCEVCouldNotCompute>(Exact) ||
!isa<SCEVCouldNotCompute>(Max)) &&
"Exact is not allowed to be less precise than Max");
return !isa<SCEVCouldNotCompute>(Exact) ||
!isa<SCEVCouldNotCompute>(Max);
}
+
+ /// Test whether this ExitLimit contains all information.
+ bool hasFullInfo() const { return !isa<SCEVCouldNotCompute>(Exact); }
};
+ /// Forward declaration of ExitNotTakenExtras
+ struct ExitNotTakenExtras;
+
/// Information about the number of times a particular loop exit may be
/// reached before exiting the loop.
struct ExitNotTakenInfo {
AssertingVH<BasicBlock> ExitingBlock;
const SCEV *ExactNotTaken;
- PointerIntPair<ExitNotTakenInfo*, 1> NextExit;
+
+ PointerIntPair<ExitNotTakenExtras *, 1> ExtraInfo;
ExitNotTakenInfo() : ExitingBlock(nullptr), ExactNotTaken(nullptr) {}
+ ExitNotTakenInfo(BasicBlock *ExitBlock, const SCEV *Expr,
+ ExitNotTakenExtras *Ptr)
+ : ExitingBlock(ExitBlock), ExactNotTaken(Expr) {
+ ExtraInfo.setPointer(Ptr);
+ }
/// Return true if all loop exits are computable.
- bool isCompleteList() const {
- return NextExit.getInt() == 0;
+ bool isCompleteList() const { return ExtraInfo.getInt() == 0; }
+
+ /// Sets the incomplete property, indicating that one of the loop exits
+ /// doesn't have a corresponding ExitNotTakenInfo entry.
+ void setIncomplete() { ExtraInfo.setInt(1); }
+
+ /// Returns a pointer to the predicate associated with this information,
+ /// or nullptr if this doesn't exist (meaning always true).
+ SCEVUnionPredicate *getPred() const {
+ if (auto *Info = ExtraInfo.getPointer())
+ return &Info->Pred;
+
+ return nullptr;
}
- void setIncomplete() { NextExit.setInt(1); }
+ /// Return true if the SCEV predicate associated with this information
+ /// is always true.
+ bool hasAlwaysTruePred() const {
+ return !getPred() || getPred()->isAlwaysTrue();
+ }
- /// Return a pointer to the next exit's not-taken info.
- ExitNotTakenInfo *getNextExit() const {
- return NextExit.getPointer();
+ /// Defines a simple forward iterator for ExitNotTakenInfo.
+ class ExitNotTakenInfoIterator
+ : public std::iterator<std::forward_iterator_tag, ExitNotTakenInfo> {
+ const ExitNotTakenInfo *Start;
+ unsigned Position;
+
+ public:
+ ExitNotTakenInfoIterator(const ExitNotTakenInfo *Start,
+ unsigned Position)
+ : Start(Start), Position(Position) {}
+
+ const ExitNotTakenInfo &operator*() const {
+ if (Position == 0)
+ return *Start;
+
+ return Start->ExtraInfo.getPointer()->Exits[Position - 1];
+ }
+
+ const ExitNotTakenInfo *operator->() const {
+ if (Position == 0)
+ return Start;
+
+ return &Start->ExtraInfo.getPointer()->Exits[Position - 1];
+ }
+
+ bool operator==(const ExitNotTakenInfoIterator &RHS) const {
+ return Start == RHS.Start && Position == RHS.Position;
+ }
+
+ bool operator!=(const ExitNotTakenInfoIterator &RHS) const {
+ return Start != RHS.Start || Position != RHS.Position;
+ }
+
+ ExitNotTakenInfoIterator &operator++() { // Preincrement
+ if (!Start)
+ return *this;
+
+ unsigned Elements =
+ Start->ExtraInfo.getPointer()
+ ? Start->ExtraInfo.getPointer()->Exits.size() + 1
+ : 1;
+
+ ++Position;
+
+ // We've run out of elements.
+ if (Position == Elements) {
+ Start = nullptr;
+ Position = 0;
+ }
+
+ return *this;
+ }
+ ExitNotTakenInfoIterator operator++(int) { // Postincrement
+ ExitNotTakenInfoIterator Tmp = *this;
+ ++*this;
+ return Tmp;
+ }
+ };
+
+ /// Iterators
+ ExitNotTakenInfoIterator begin() const {
+ return ExitNotTakenInfoIterator(this, 0);
+ }
+ ExitNotTakenInfoIterator end() const {
+ return ExitNotTakenInfoIterator(nullptr, 0);
}
+ };
- void setNextExit(ExitNotTakenInfo *ENT) { NextExit.setPointer(ENT); }
+ /// Describes the extra information that a ExitNotTakenInfo can have.
+ struct ExitNotTakenExtras {
+ /// The predicate associated with the ExitNotTakenInfo struct.
+ SCEVUnionPredicate Pred;
+
+ /// The extra exits in the loop. Only the ExitNotTakenExtras structure
+ /// pointed to by the first ExitNotTakenInfo struct (associated with the
+ /// first loop exit) will populate this vector to prevent having
+ /// redundant information.
+ SmallVector<ExitNotTakenInfo, 4> Exits;
+ };
+
+ /// A struct containing the information attached to a backedge.
+ struct EdgeInfo {
+ EdgeInfo(BasicBlock *Block, const SCEV *Taken, SCEVUnionPredicate &P) :
+ ExitBlock(Block), Taken(Taken), Pred(std::move(P)) {}
+
+ /// The exit basic block.
+ BasicBlock *ExitBlock;
+
+ /// The (exact) number of time we take the edge back.
+ const SCEV *Taken;
+
+ /// The SCEV predicated associated with Taken. If Pred doesn't evaluate
+ /// to true, the information in Taken is not valid (or equivalent with
+ /// a CouldNotCompute.
+ SCEVUnionPredicate Pred;
};
/// Information about the backedge-taken count of a loop. This currently
ExitNotTakenInfo ExitNotTaken;
/// An expression indicating the least maximum backedge-taken count of the
- /// loop that is known, or a SCEVCouldNotCompute.
+ /// loop that is known, or a SCEVCouldNotCompute. This expression is only
+ /// valid if the predicates associated with all loop exits are true.
const SCEV *Max;
public:
BackedgeTakenInfo() : Max(nullptr) {}
/// Initialize BackedgeTakenInfo from a list of exact exit counts.
- BackedgeTakenInfo(
- SmallVectorImpl< std::pair<BasicBlock *, const SCEV *> > &ExitCounts,
- bool Complete, const SCEV *MaxCount);
+ BackedgeTakenInfo(SmallVectorImpl<EdgeInfo> &ExitCounts, bool Complete,
+ const SCEV *MaxCount);
/// Test whether this BackedgeTakenInfo contains any computed information,
/// or whether it's all SCEVCouldNotCompute values.
return ExitNotTaken.ExitingBlock || !isa<SCEVCouldNotCompute>(Max);
}
+ /// Test whether this BackedgeTakenInfo contains complete information.
+ bool hasFullInfo() const { return ExitNotTaken.isCompleteList(); }
+
/// Return an expression indicating the exact backedge-taken count of the
- /// loop if it is known, or SCEVCouldNotCompute otherwise. This is the
+ /// loop if it is known or SCEVCouldNotCompute otherwise. This is the
/// number of times the loop header can be guaranteed to execute, minus
/// one.
- const SCEV *getExact(ScalarEvolution *SE) const;
+ ///
+ /// If the SCEV predicate associated with the answer can be different
+ /// from AlwaysTrue, we must add a (non null) Predicates argument.
+ /// The SCEV predicate associated with the answer will be added to
+ /// Predicates. A run-time check needs to be emitted for the SCEV
+ /// predicate in order for the answer to be valid.
+ ///
+ /// Note that we should always know if we need to pass a predicate
+ /// argument or not from the way the ExitCounts vector was computed.
+ /// If we allowed SCEV predicates to be generated when populating this
+ /// vector, this information can contain them and therefore a
+ /// SCEVPredicate argument should be added to getExact.
+ const SCEV *getExact(ScalarEvolution *SE,
+ SCEVUnionPredicate *Predicates = nullptr) const;
/// Return the number of times this loop exit may fall through to the back
/// edge, or SCEVCouldNotCompute. The loop is guaranteed not to exit via
/// Cache the backedge-taken count of the loops for this function as they
/// are computed.
- DenseMap<const Loop*, BackedgeTakenInfo> BackedgeTakenCounts;
+ DenseMap<const Loop *, BackedgeTakenInfo> BackedgeTakenCounts;
+
+ /// Cache the predicated backedge-taken count of the loops for this
+ /// function as they are computed.
+ DenseMap<const Loop *, BackedgeTakenInfo> PredicatedBackedgeTakenCounts;
/// This map contains entries for all of the PHI instructions that we
/// attempt to compute constant evolutions for. This allows us to avoid
void forgetSymbolicName(Instruction *I, const SCEV *SymName);
/// Return the BackedgeTakenInfo for the given loop, lazily computing new
- /// values if the loop hasn't been analyzed yet.
+ /// values if the loop hasn't been analyzed yet. The returned result is
+ /// guaranteed not to be predicated.
const BackedgeTakenInfo &getBackedgeTakenInfo(const Loop *L);
+ /// Similar to getBackedgeTakenInfo, but will add predicates as required
+ /// with the purpose of returning complete information.
+ const BackedgeTakenInfo &getPredicatedBackedgeTakenInfo(const Loop *L);
+
/// Compute the number of times the specified loop will iterate.
- BackedgeTakenInfo computeBackedgeTakenCount(const Loop *L);
+ /// If AllowPredicates is set, we will create new SCEV predicates as
+ /// necessary in order to return an exact answer.
+ BackedgeTakenInfo computeBackedgeTakenCount(const Loop *L,
+ bool AllowPredicates = false);
/// Compute the number of times the backedge of the specified loop will
- /// execute if it exits via the specified block.
- ExitLimit computeExitLimit(const Loop *L, BasicBlock *ExitingBlock);
+ /// execute if it exits via the specified block. If AllowPredicates is set,
+ /// this call will try to use a minimal set of SCEV predicates in order to
+ /// return an exact answer.
+ ExitLimit computeExitLimit(const Loop *L, BasicBlock *ExitingBlock,
+ bool AllowPredicates = false);
/// Compute the number of times the backedge of the specified loop will
/// execute if its exit condition were a conditional branch of ExitCond,
- /// TBB, and FBB.
+ /// TBB, and FBB. If AllowPredicates is set, this call will try to use a
+ /// minimal set of SCEV predicates in order to return an exact answer.
ExitLimit computeExitLimitFromCond(const Loop *L,
Value *ExitCond,
BasicBlock *TBB,
BasicBlock *FBB,
- bool IsSubExpr);
+ bool IsSubExpr,
+ bool AllowPredicates = false);
/// Compute the number of times the backedge of the specified loop will
/// execute if its exit condition were a conditional branch of the ICmpInst
- /// ExitCond, TBB, and FBB.
+ /// ExitCond, TBB, and FBB. If AllowPredicates is set, this call will try
+ /// to use a minimal set of SCEV predicates in order to return an exact
+ /// answer.
ExitLimit computeExitLimitFromICmp(const Loop *L,
ICmpInst *ExitCond,
BasicBlock *TBB,
BasicBlock *FBB,
- bool IsSubExpr);
+ bool IsSubExpr,
+ bool AllowPredicates = false);
/// Compute the number of times the backedge of the specified loop will
/// execute if its exit condition were a switch with a single exiting case
/// Return the number of times an exit condition comparing the specified
/// value to zero will execute. If not computable, return CouldNotCompute.
- ExitLimit HowFarToZero(const SCEV *V, const Loop *L, bool IsSubExpr);
+ /// If AllowPredicates is set, this call will try to use a minimal set of
+ /// SCEV predicates in order to return an exact answer.
+ ExitLimit HowFarToZero(const SCEV *V, const Loop *L, bool IsSubExpr,
+ bool AllowPredicates = false);
/// Return the number of times an exit condition checking the specified
/// value for nonzero will execute. If not computable, return
/// Return the number of times an exit condition containing the specified
/// less-than comparison will execute. If not computable, return
/// CouldNotCompute. isSigned specifies whether the less-than is signed.
- ExitLimit HowManyLessThans(const SCEV *LHS, const SCEV *RHS,
- const Loop *L, bool isSigned, bool IsSubExpr);
+ /// If AllowPredicates is set, this call will try to use a minimal set of
+ /// SCEV predicates in order to return an exact answer.
+ ExitLimit HowManyLessThans(const SCEV *LHS, const SCEV *RHS, const Loop *L,
+ bool isSigned, bool IsSubExpr,
+ bool AllowPredicates = false);
+
ExitLimit HowManyGreaterThans(const SCEV *LHS, const SCEV *RHS,
- const Loop *L, bool isSigned, bool IsSubExpr);
+ const Loop *L, bool isSigned, bool IsSubExpr,
+ bool AllowPredicates = false);
/// Return a predecessor of BB (which may not be an immediate predecessor)
/// which has exactly one successor from which BB is reachable, or null if
///
const SCEV *getBackedgeTakenCount(const Loop *L);
+ /// Similar to getBackedgeTakenCount, except it will add a set of
+ /// SCEV predicates to Predicates that are required to be true in order for
+ /// the answer to be correct. Predicates can be checked with run-time
+ /// checks and can be used to perform loop versioning.
+ const SCEV *getPredicatedBackedgeTakenCount(const Loop *L,
+ SCEVUnionPredicate &Predicates);
+
/// Similar to getBackedgeTakenCount, except return the least SCEV value
/// that is known never to be less than the actual backedge taken count.
const SCEV *getMaxBackedgeTakenCount(const Loop *L);
/// by ScalarEvolution is guaranteed to be preserved, even when adding new
/// predicates.
const SCEV *getSCEV(Value *V);
+ /// Get the (predicated) backedge count for the analyzed loop.
+ const SCEV *getBackedgeTakenCount();
/// \brief Adds a new predicate.
void addPredicate(const SCEVPredicate &Pred);
/// \brief Attempts to produce an AddRecExpr for V by adding additional
/// figure out if the predicate has changed from the last rewrite of the
/// SCEV. If so, we need to perform a new rewrite.
unsigned Generation;
+ /// The backedge taken count.
+ const SCEV *BackedgeCount;
};
}
else {
const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Sc);
assert(AR && "Invalid addrec expression");
- const SCEV *Ex = SE->getBackedgeTakenCount(Lp);
+ const SCEV *Ex = PSE.getBackedgeTakenCount();
ScStart = AR->getStart();
ScEnd = AR->evaluateAtIteration(Ex, *SE);
}
// ScalarEvolution needs to be able to find the exit count.
- const SCEV *ExitCount = PSE.getSE()->getBackedgeTakenCount(TheLoop);
+ const SCEV *ExitCount = PSE.getBackedgeTakenCount();
if (ExitCount == PSE.getSE()->getCouldNotCompute()) {
emitAnalysis(LoopAccessReport()
<< "could not determine number of loop iterations");
return getBackedgeTakenInfo(L).getExact(ExitingBlock, this);
}
+const SCEV *
+ScalarEvolution::getPredicatedBackedgeTakenCount(const Loop *L,
+ SCEVUnionPredicate &Preds) {
+ return getPredicatedBackedgeTakenInfo(L).getExact(this, &Preds);
+}
+
/// getBackedgeTakenCount - If the specified loop has a predictable
/// backedge-taken count, return it, otherwise return a SCEVCouldNotCompute
/// object. The backedge-taken count is the number of times the loop header
}
const ScalarEvolution::BackedgeTakenInfo &
+ScalarEvolution::getPredicatedBackedgeTakenInfo(const Loop *L) {
+ auto &BTI = getBackedgeTakenInfo(L);
+ if (BTI.hasFullInfo())
+ return BTI;
+
+ auto Pair = PredicatedBackedgeTakenCounts.insert({L, BackedgeTakenInfo()});
+
+ if (!Pair.second)
+ return Pair.first->second;
+
+ BackedgeTakenInfo Result =
+ computeBackedgeTakenCount(L, /*AllowPredicates=*/true);
+
+ return PredicatedBackedgeTakenCounts.find(L)->second = Result;
+}
+
+const ScalarEvolution::BackedgeTakenInfo &
ScalarEvolution::getBackedgeTakenInfo(const Loop *L) {
// Initially insert an invalid entry for this loop. If the insertion
// succeeds, proceed to actually compute a backedge-taken count and
/// compute a trip count, or if the loop is deleted.
void ScalarEvolution::forgetLoop(const Loop *L) {
// Drop any stored trip count value.
- DenseMap<const Loop*, BackedgeTakenInfo>::iterator BTCPos =
- BackedgeTakenCounts.find(L);
- if (BTCPos != BackedgeTakenCounts.end()) {
- BTCPos->second.clear();
- BackedgeTakenCounts.erase(BTCPos);
- }
+ auto RemoveLoopFromBackedgeMap =
+ [L](DenseMap<const Loop *, BackedgeTakenInfo> &Map) {
+ auto BTCPos = Map.find(L);
+ if (BTCPos != Map.end()) {
+ BTCPos->second.clear();
+ Map.erase(BTCPos);
+ }
+ };
+
+ RemoveLoopFromBackedgeMap(BackedgeTakenCounts);
+ RemoveLoopFromBackedgeMap(PredicatedBackedgeTakenCounts);
// Drop information about expressions based on loop-header PHIs.
SmallVector<Instruction *, 16> Worklist;
/// is the caller's responsibility to specify the relevant loop exit using
/// getExact(ExitingBlock, SE).
const SCEV *
-ScalarEvolution::BackedgeTakenInfo::getExact(ScalarEvolution *SE) const {
+ScalarEvolution::BackedgeTakenInfo::getExact(
+ ScalarEvolution *SE, SCEVUnionPredicate *Preds) const {
// If any exits were not computable, the loop is not computable.
if (!ExitNotTaken.isCompleteList()) return SE->getCouldNotCompute();
assert(ExitNotTaken.ExactNotTaken && "uninitialized not-taken info");
const SCEV *BECount = nullptr;
- for (const ExitNotTakenInfo *ENT = &ExitNotTaken;
- ENT != nullptr; ENT = ENT->getNextExit()) {
-
- assert(ENT->ExactNotTaken != SE->getCouldNotCompute() && "bad exit SCEV");
+ for (auto &ENT : ExitNotTaken) {
+ assert(ENT.ExactNotTaken != SE->getCouldNotCompute() && "bad exit SCEV");
if (!BECount)
- BECount = ENT->ExactNotTaken;
- else if (BECount != ENT->ExactNotTaken)
+ BECount = ENT.ExactNotTaken;
+ else if (BECount != ENT.ExactNotTaken)
return SE->getCouldNotCompute();
+ if (Preds && ENT.getPred())
+ Preds->add(ENT.getPred());
+
+ assert((Preds || ENT.hasAlwaysTruePred()) &&
+ "Predicate should be always true!");
}
+
assert(BECount && "Invalid not taken count for loop exit");
return BECount;
}
const SCEV *
ScalarEvolution::BackedgeTakenInfo::getExact(BasicBlock *ExitingBlock,
ScalarEvolution *SE) const {
- for (const ExitNotTakenInfo *ENT = &ExitNotTaken;
- ENT != nullptr; ENT = ENT->getNextExit()) {
+ for (auto &ENT : ExitNotTaken)
+ if (ENT.ExitingBlock == ExitingBlock && ENT.hasAlwaysTruePred())
+ return ENT.ExactNotTaken;
- if (ENT->ExitingBlock == ExitingBlock)
- return ENT->ExactNotTaken;
- }
return SE->getCouldNotCompute();
}
/// getMax - Get the max backedge taken count for the loop.
const SCEV *
ScalarEvolution::BackedgeTakenInfo::getMax(ScalarEvolution *SE) const {
+ for (auto &ENT : ExitNotTaken)
+ if (!ENT.hasAlwaysTruePred())
+ return SE->getCouldNotCompute();
+
return Max ? Max : SE->getCouldNotCompute();
}
if (!ExitNotTaken.ExitingBlock)
return false;
- for (const ExitNotTakenInfo *ENT = &ExitNotTaken;
- ENT != nullptr; ENT = ENT->getNextExit()) {
-
- if (ENT->ExactNotTaken != SE->getCouldNotCompute()
- && SE->hasOperand(ENT->ExactNotTaken, S)) {
+ for (auto &ENT : ExitNotTaken)
+ if (ENT.ExactNotTaken != SE->getCouldNotCompute() &&
+ SE->hasOperand(ENT.ExactNotTaken, S))
return true;
- }
- }
+
return false;
}
/// Allocate memory for BackedgeTakenInfo and copy the not-taken count of each
/// computable exit into a persistent ExitNotTakenInfo array.
ScalarEvolution::BackedgeTakenInfo::BackedgeTakenInfo(
- SmallVectorImpl< std::pair<BasicBlock *, const SCEV *> > &ExitCounts,
- bool Complete, const SCEV *MaxCount) : Max(MaxCount) {
+ SmallVectorImpl<EdgeInfo> &ExitCounts, bool Complete, const SCEV *MaxCount)
+ : Max(MaxCount) {
if (!Complete)
ExitNotTaken.setIncomplete();
unsigned NumExits = ExitCounts.size();
if (NumExits == 0) return;
- ExitNotTaken.ExitingBlock = ExitCounts[0].first;
- ExitNotTaken.ExactNotTaken = ExitCounts[0].second;
- if (NumExits == 1) return;
+ ExitNotTaken.ExitingBlock = ExitCounts[0].ExitBlock;
+ ExitNotTaken.ExactNotTaken = ExitCounts[0].Taken;
+
+ // Determine the number of ExitNotTakenExtras structures that we need.
+ unsigned ExtraInfoSize = 0;
+ if (NumExits > 1)
+ ExtraInfoSize = 1 + std::count_if(std::next(ExitCounts.begin()),
+ ExitCounts.end(), [](EdgeInfo &Entry) {
+ return !Entry.Pred.isAlwaysTrue();
+ });
+ else if (!ExitCounts[0].Pred.isAlwaysTrue())
+ ExtraInfoSize = 1;
+
+ ExitNotTakenExtras *ENT = nullptr;
+
+ // Allocate the ExitNotTakenExtras structures and initialize the first
+ // element (ExitNotTaken).
+ if (ExtraInfoSize > 0) {
+ ENT = new ExitNotTakenExtras[ExtraInfoSize];
+ ExitNotTaken.ExtraInfo.setPointer(&ENT[0]);
+ *ExitNotTaken.getPred() = std::move(ExitCounts[0].Pred);
+ }
+
+ if (NumExits == 1)
+ return;
+
+ auto &Exits = ExitNotTaken.ExtraInfo.getPointer()->Exits;
// Handle the rare case of multiple computable exits.
- ExitNotTakenInfo *ENT = new ExitNotTakenInfo[NumExits-1];
+ for (unsigned i = 1, PredPos = 1; i < NumExits; ++i) {
+ ExitNotTakenExtras *Ptr = nullptr;
+ if (!ExitCounts[i].Pred.isAlwaysTrue()) {
+ Ptr = &ENT[PredPos++];
+ Ptr->Pred = std::move(ExitCounts[i].Pred);
+ }
- ExitNotTakenInfo *PrevENT = &ExitNotTaken;
- for (unsigned i = 1; i < NumExits; ++i, PrevENT = ENT, ++ENT) {
- PrevENT->setNextExit(ENT);
- ENT->ExitingBlock = ExitCounts[i].first;
- ENT->ExactNotTaken = ExitCounts[i].second;
+ Exits.emplace_back(ExitCounts[i].ExitBlock, ExitCounts[i].Taken, Ptr);
}
}
void ScalarEvolution::BackedgeTakenInfo::clear() {
ExitNotTaken.ExitingBlock = nullptr;
ExitNotTaken.ExactNotTaken = nullptr;
- delete[] ExitNotTaken.getNextExit();
+ delete[] ExitNotTaken.ExtraInfo.getPointer();
}
/// computeBackedgeTakenCount - Compute the number of times the backedge
/// of the specified loop will execute.
ScalarEvolution::BackedgeTakenInfo
-ScalarEvolution::computeBackedgeTakenCount(const Loop *L) {
+ScalarEvolution::computeBackedgeTakenCount(const Loop *L,
+ bool AllowPredicates) {
SmallVector<BasicBlock *, 8> ExitingBlocks;
L->getExitingBlocks(ExitingBlocks);
- SmallVector<std::pair<BasicBlock *, const SCEV *>, 4> ExitCounts;
+ SmallVector<EdgeInfo, 4> ExitCounts;
bool CouldComputeBECount = true;
BasicBlock *Latch = L->getLoopLatch(); // may be NULL.
const SCEV *MustExitMaxBECount = nullptr;
// Compute the ExitLimit for each loop exit. Use this to populate ExitCounts
// and compute maxBECount.
+ // Do a union of all the predicates here.
for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) {
BasicBlock *ExitBB = ExitingBlocks[i];
- ExitLimit EL = computeExitLimit(L, ExitBB);
+ ExitLimit EL = computeExitLimit(L, ExitBB, AllowPredicates);
+
+ assert((AllowPredicates || EL.Pred.isAlwaysTrue()) &&
+ "Predicated exit limit when predicates are not allowed!");
// 1. For each exit that can be computed, add an entry to ExitCounts.
// CouldComputeBECount is true only if all exits can be computed.
// we won't be able to compute an exact value for the loop.
CouldComputeBECount = false;
else
- ExitCounts.push_back({ExitBB, EL.Exact});
+ ExitCounts.emplace_back(EdgeInfo(ExitBB, EL.Exact, EL.Pred));
// 2. Derive the loop's MaxBECount from each exit's max number of
// non-exiting iterations. Partition the loop exits into two kinds:
}
ScalarEvolution::ExitLimit
-ScalarEvolution::computeExitLimit(const Loop *L, BasicBlock *ExitingBlock) {
+ScalarEvolution::computeExitLimit(const Loop *L, BasicBlock *ExitingBlock,
+ bool AllowPredicates) {
// Okay, we've chosen an exiting block. See what condition causes us to exit
// at this block and remember the exit block and whether all other targets
if (BranchInst *BI = dyn_cast<BranchInst>(Term)) {
assert(BI->isConditional() && "If unconditional, it can't be in loop!");
// Proceed to the next level to examine the exit condition expression.
- return computeExitLimitFromCond(L, BI->getCondition(), BI->getSuccessor(0),
- BI->getSuccessor(1),
- /*ControlsExit=*/IsOnlyExit);
+ return computeExitLimitFromCond(
+ L, BI->getCondition(), BI->getSuccessor(0), BI->getSuccessor(1),
+ /*ControlsExit=*/IsOnlyExit, AllowPredicates);
}
if (SwitchInst *SI = dyn_cast<SwitchInst>(Term))
Value *ExitCond,
BasicBlock *TBB,
BasicBlock *FBB,
- bool ControlsExit) {
+ bool ControlsExit,
+ bool AllowPredicates) {
// Check if the controlling expression for this loop is an And or Or.
if (BinaryOperator *BO = dyn_cast<BinaryOperator>(ExitCond)) {
if (BO->getOpcode() == Instruction::And) {
// Recurse on the operands of the and.
bool EitherMayExit = L->contains(TBB);
ExitLimit EL0 = computeExitLimitFromCond(L, BO->getOperand(0), TBB, FBB,
- ControlsExit && !EitherMayExit);
+ ControlsExit && !EitherMayExit,
+ AllowPredicates);
ExitLimit EL1 = computeExitLimitFromCond(L, BO->getOperand(1), TBB, FBB,
- ControlsExit && !EitherMayExit);
+ ControlsExit && !EitherMayExit,
+ AllowPredicates);
const SCEV *BECount = getCouldNotCompute();
const SCEV *MaxBECount = getCouldNotCompute();
if (EitherMayExit) {
BECount = EL0.Exact;
}
+ SCEVUnionPredicate NP;
+ NP.add(&EL0.Pred);
+ NP.add(&EL1.Pred);
// There are cases (e.g. PR26207) where computeExitLimitFromCond is able
// to be more aggressive when computing BECount than when computing
// MaxBECount. In these cases it is possible for EL0.Exact and EL1.Exact
!isa<SCEVCouldNotCompute>(BECount))
MaxBECount = BECount;
- return ExitLimit(BECount, MaxBECount);
+ return ExitLimit(BECount, MaxBECount, NP);
}
if (BO->getOpcode() == Instruction::Or) {
// Recurse on the operands of the or.
bool EitherMayExit = L->contains(FBB);
ExitLimit EL0 = computeExitLimitFromCond(L, BO->getOperand(0), TBB, FBB,
- ControlsExit && !EitherMayExit);
+ ControlsExit && !EitherMayExit,
+ AllowPredicates);
ExitLimit EL1 = computeExitLimitFromCond(L, BO->getOperand(1), TBB, FBB,
- ControlsExit && !EitherMayExit);
+ ControlsExit && !EitherMayExit,
+ AllowPredicates);
const SCEV *BECount = getCouldNotCompute();
const SCEV *MaxBECount = getCouldNotCompute();
if (EitherMayExit) {
BECount = EL0.Exact;
}
- return ExitLimit(BECount, MaxBECount);
+ SCEVUnionPredicate NP;
+ NP.add(&EL0.Pred);
+ NP.add(&EL1.Pred);
+ return ExitLimit(BECount, MaxBECount, NP);
}
}
// With an icmp, it may be feasible to compute an exact backedge-taken count.
// Proceed to the next level to examine the icmp.
- if (ICmpInst *ExitCondICmp = dyn_cast<ICmpInst>(ExitCond))
- return computeExitLimitFromICmp(L, ExitCondICmp, TBB, FBB, ControlsExit);
+ if (ICmpInst *ExitCondICmp = dyn_cast<ICmpInst>(ExitCond)) {
+ ExitLimit EL =
+ computeExitLimitFromICmp(L, ExitCondICmp, TBB, FBB, ControlsExit);
+ if (EL.hasFullInfo() || !AllowPredicates)
+ return EL;
+
+ // Try again, but use SCEV predicates this time.
+ return computeExitLimitFromICmp(L, ExitCondICmp, TBB, FBB, ControlsExit,
+ /*AllowPredicates=*/true);
+ }
// Check for a constant condition. These are normally stripped out by
// SimplifyCFG, but ScalarEvolution may be used by a pass which wishes to
ICmpInst *ExitCond,
BasicBlock *TBB,
BasicBlock *FBB,
- bool ControlsExit) {
+ bool ControlsExit,
+ bool AllowPredicates) {
// If the condition was exit on true, convert the condition to exit on false
ICmpInst::Predicate Cond;
switch (Cond) {
case ICmpInst::ICMP_NE: { // while (X != Y)
// Convert to: while (X-Y != 0)
- ExitLimit EL = HowFarToZero(getMinusSCEV(LHS, RHS), L, ControlsExit);
+ ExitLimit EL = HowFarToZero(getMinusSCEV(LHS, RHS), L, ControlsExit,
+ AllowPredicates);
if (EL.hasAnyInfo()) return EL;
break;
}
case ICmpInst::ICMP_SLT:
case ICmpInst::ICMP_ULT: { // while (X < Y)
bool IsSigned = Cond == ICmpInst::ICMP_SLT;
- ExitLimit EL = HowManyLessThans(LHS, RHS, L, IsSigned, ControlsExit);
+ ExitLimit EL = HowManyLessThans(LHS, RHS, L, IsSigned, ControlsExit,
+ AllowPredicates);
if (EL.hasAnyInfo()) return EL;
break;
}
case ICmpInst::ICMP_SGT:
case ICmpInst::ICMP_UGT: { // while (X > Y)
bool IsSigned = Cond == ICmpInst::ICMP_SGT;
- ExitLimit EL = HowManyGreaterThans(LHS, RHS, L, IsSigned, ControlsExit);
+ ExitLimit EL =
+ HowManyGreaterThans(LHS, RHS, L, IsSigned, ControlsExit,
+ AllowPredicates);
if (EL.hasAnyInfo()) return EL;
break;
}
unsigned BitWidth = getTypeSizeInBits(RHS->getType());
const SCEV *UpperBound =
getConstant(getEffectiveSCEVType(RHS->getType()), BitWidth);
- return ExitLimit(getCouldNotCompute(), UpperBound);
+ SCEVUnionPredicate P;
+ return ExitLimit(getCouldNotCompute(), UpperBound, P);
}
return getCouldNotCompute();
/// effectively V != 0. We know and take advantage of the fact that this
/// expression only being used in a comparison by zero context.
ScalarEvolution::ExitLimit
-ScalarEvolution::HowFarToZero(const SCEV *V, const Loop *L, bool ControlsExit) {
+ScalarEvolution::HowFarToZero(const SCEV *V, const Loop *L, bool ControlsExit,
+ bool AllowPredicates) {
+ SCEVUnionPredicate P;
// If the value is a constant
if (const SCEVConstant *C = dyn_cast<SCEVConstant>(V)) {
// If the value is already zero, the branch will execute zero times.
}
const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(V);
+ if (!AddRec && AllowPredicates)
+ // Try to make this an AddRec using runtime tests, in the first X
+ // iterations of this loop, where X is the SCEV expression found by the
+ // algorithm below.
+ AddRec = convertSCEVToAddRecWithPredicates(V, L, P);
+
if (!AddRec || AddRec->getLoop() != L)
return getCouldNotCompute();
// should not accept a root of 2.
const SCEV *Val = AddRec->evaluateAtIteration(R1, *this);
if (Val->isZero())
- return R1; // We found a quadratic root!
+ return ExitLimit(R1, R1, P); // We found a quadratic root!
}
}
return getCouldNotCompute();
else
MaxBECount = getConstant(CountDown ? CR.getUnsignedMax()
: -CR.getUnsignedMin());
- return ExitLimit(Distance, MaxBECount);
+ return ExitLimit(Distance, MaxBECount, P);
}
// As a special case, handle the instance where Step is a positive power of
auto *NarrowTy = IntegerType::get(getContext(), NarrowWidth);
auto *WideTy = Distance->getType();
- return getZeroExtendExpr(getTruncateExpr(ModuloResult, NarrowTy), WideTy);
+ const SCEV *Limit =
+ getZeroExtendExpr(getTruncateExpr(ModuloResult, NarrowTy), WideTy);
+ return ExitLimit(Limit, Limit, P);
}
}
if (ControlsExit && AddRec->hasNoSelfWrap()) {
const SCEV *Exact =
getUDivExpr(Distance, CountDown ? getNegativeSCEV(Step) : Step);
- return ExitLimit(Exact, Exact);
+ return ExitLimit(Exact, Exact, P);
}
// Then, try to solve the above equation provided that Start is constant.
- if (const SCEVConstant *StartC = dyn_cast<SCEVConstant>(Start))
- return SolveLinEquationWithOverflow(StepC->getAPInt(), -StartC->getAPInt(),
- *this);
+ if (const SCEVConstant *StartC = dyn_cast<SCEVConstant>(Start)) {
+ const SCEV *E = SolveLinEquationWithOverflow(
+ StepC->getValue()->getValue(), -StartC->getValue()->getValue(), *this);
+ return ExitLimit(E, E, P);
+ }
return getCouldNotCompute();
}
ScalarEvolution::ExitLimit
ScalarEvolution::HowManyLessThans(const SCEV *LHS, const SCEV *RHS,
const Loop *L, bool IsSigned,
- bool ControlsExit) {
+ bool ControlsExit, bool AllowPredicates) {
+ SCEVUnionPredicate P;
// We handle only IV < Invariant
if (!isLoopInvariant(RHS, L))
return getCouldNotCompute();
const SCEVAddRecExpr *IV = dyn_cast<SCEVAddRecExpr>(LHS);
+ if (!IV && AllowPredicates)
+ // Try to make this an AddRec using runtime tests, in the first X
+ // iterations of this loop, where X is the SCEV expression found by the
+ // algorithm below.
+ IV = convertSCEVToAddRecWithPredicates(LHS, L, P);
// Avoid weird loops
if (!IV || IV->getLoop() != L || !IV->isAffine())
if (isa<SCEVCouldNotCompute>(MaxBECount))
MaxBECount = BECount;
- return ExitLimit(BECount, MaxBECount);
+ return ExitLimit(BECount, MaxBECount, P);
}
ScalarEvolution::ExitLimit
ScalarEvolution::HowManyGreaterThans(const SCEV *LHS, const SCEV *RHS,
const Loop *L, bool IsSigned,
- bool ControlsExit) {
+ bool ControlsExit, bool AllowPredicates) {
+ SCEVUnionPredicate P;
// We handle only IV > Invariant
if (!isLoopInvariant(RHS, L))
return getCouldNotCompute();
const SCEVAddRecExpr *IV = dyn_cast<SCEVAddRecExpr>(LHS);
+ if (!IV && AllowPredicates)
+ // Try to make this an AddRec using runtime tests, in the first X
+ // iterations of this loop, where X is the SCEV expression found by the
+ // algorithm below.
+ IV = convertSCEVToAddRecWithPredicates(LHS, L, P);
// Avoid weird loops
if (!IV || IV->getLoop() != L || !IV->isAffine())
if (isa<SCEVCouldNotCompute>(MaxBECount))
MaxBECount = BECount;
- return ExitLimit(BECount, MaxBECount);
+ return ExitLimit(BECount, MaxBECount, P);
}
/// getNumIterationsInRange - Return the number of iterations of this loop that
ValueExprMap(std::move(Arg.ValueExprMap)),
WalkingBEDominatingConds(false), ProvingSplitPredicate(false),
BackedgeTakenCounts(std::move(Arg.BackedgeTakenCounts)),
+ PredicatedBackedgeTakenCounts(
+ std::move(Arg.PredicatedBackedgeTakenCounts)),
ConstantEvolutionLoopExitValue(
std::move(Arg.ConstantEvolutionLoopExitValue)),
ValuesAtScopes(std::move(Arg.ValuesAtScopes)),
// that a loop had multiple computable exits.
for (auto &BTCI : BackedgeTakenCounts)
BTCI.second.clear();
+ for (auto &BTCI : PredicatedBackedgeTakenCounts)
+ BTCI.second.clear();
assert(PendingLoopPredicates.empty() && "isImpliedCond garbage");
assert(!WalkingBEDominatingConds && "isLoopBackedgeGuardedByCond garbage!");
OS << "Unpredictable max backedge-taken count. ";
}
+ OS << "\n"
+ "Loop ";
+ L->getHeader()->printAsOperand(OS, /*PrintType=*/false);
+ OS << ": ";
+
+ SCEVUnionPredicate Pred;
+ auto PBT = SE->getPredicatedBackedgeTakenCount(L, Pred);
+ if (!isa<SCEVCouldNotCompute>(PBT)) {
+ OS << "Predicated backedge-taken count is " << *PBT << "\n";
+ OS << " Predicates:\n";
+ Pred.print(OS, 4);
+ } else {
+ OS << "Unpredictable predicated backedge-taken count. ";
+ }
OS << "\n";
}
ExprValueMap.erase(S);
HasRecMap.erase(S);
- for (DenseMap<const Loop*, BackedgeTakenInfo>::iterator I =
- BackedgeTakenCounts.begin(), E = BackedgeTakenCounts.end(); I != E; ) {
- BackedgeTakenInfo &BEInfo = I->second;
- if (BEInfo.hasOperand(S, this)) {
- BEInfo.clear();
- BackedgeTakenCounts.erase(I++);
- }
- else
- ++I;
- }
+ auto RemoveSCEVFromBackedgeMap =
+ [S, this](DenseMap<const Loop *, BackedgeTakenInfo> &Map) {
+ for (auto I = Map.begin(), E = Map.end(); I != E;) {
+ BackedgeTakenInfo &BEInfo = I->second;
+ if (BEInfo.hasOperand(S, this)) {
+ BEInfo.clear();
+ Map.erase(I++);
+ } else
+ ++I;
+ }
+ };
+
+ RemoveSCEVFromBackedgeMap(BackedgeTakenCounts);
+ RemoveSCEVFromBackedgeMap(PredicatedBackedgeTakenCounts);
}
typedef DenseMap<const Loop *, std::string> VerifyMap;
PredicatedScalarEvolution::PredicatedScalarEvolution(ScalarEvolution &SE,
Loop &L)
- : SE(SE), L(L), Generation(0) {}
+ : SE(SE), L(L), Generation(0), BackedgeCount(nullptr) {}
const SCEV *PredicatedScalarEvolution::getSCEV(Value *V) {
const SCEV *Expr = SE.getSCEV(V);
return NewSCEV;
}
+const SCEV *PredicatedScalarEvolution::getBackedgeTakenCount() {
+ if (!BackedgeCount) {
+ SCEVUnionPredicate BackedgePred;
+ BackedgeCount = SE.getPredicatedBackedgeTakenCount(&L, BackedgePred);
+ addPredicate(BackedgePred);
+ }
+ return BackedgeCount;
+}
+
void PredicatedScalarEvolution::addPredicate(const SCEVPredicate &Pred) {
if (Preds.implies(&Pred))
return;
return New;
}
-PredicatedScalarEvolution::
-PredicatedScalarEvolution(const PredicatedScalarEvolution &Init) :
- RewriteMap(Init.RewriteMap), SE(Init.SE), L(Init.L), Preds(Init.Preds),
- Generation(Init.Generation) {
+PredicatedScalarEvolution::PredicatedScalarEvolution(
+ const PredicatedScalarEvolution &Init)
+ : RewriteMap(Init.RewriteMap), SE(Init.SE), L(Init.L), Preds(Init.Preds),
+ Generation(Init.Generation), BackedgeCount(Init.BackedgeCount) {
for (auto I = Init.FlagsMap.begin(), E = Init.FlagsMap.end(); I != E; ++I)
FlagsMap.insert(*I);
}
assert(AR->isAffine() && "Cannot generate RT check for "
"non-affine expression");
- const SCEV *ExitCount = SE.getBackedgeTakenCount(AR->getLoop());
+ SCEVUnionPredicate Pred;
+ const SCEV *ExitCount =
+ SE.getPredicatedBackedgeTakenCount(AR->getLoop(), Pred);
const SCEV *Step = AR->getStepRecurrence(SE);
const SCEV *Start = AR->getStart();
IRBuilder<> Builder(L->getLoopPreheader()->getTerminator());
// Find the loop boundaries.
ScalarEvolution *SE = PSE.getSE();
- const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(OrigLoop);
+ const SCEV *BackedgeTakenCount = PSE.getBackedgeTakenCount();
assert(BackedgeTakenCount != SE->getCouldNotCompute() &&
"Invalid loop count");
}
// ScalarEvolution needs to be able to find the exit count.
- const SCEV *ExitCount = PSE.getSE()->getBackedgeTakenCount(TheLoop);
+ const SCEV *ExitCount = PSE.getBackedgeTakenCount();
if (ExitCount == PSE.getSE()->getCouldNotCompute()) {
emitAnalysis(VectorizationReport()
<< "could not determine number of loop iterations");
--- /dev/null
+; RUN: opt < %s -analyze -scalar-evolution | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+@A = weak global [1000 x i32] zeroinitializer, align 32
+
+; The resulting predicate is i16 {0,+,1} <nssw>, meanining
+; that the resulting backedge expression will be valid for:
+; (1 + (-1 smax %M)) <= MAX_INT16
+;
+; At the limit condition for M (MAX_INT16 - 1) we have in the
+; last iteration:
+; i0 <- MAX_INT16
+; i0.ext <- MAX_INT16
+;
+; and therefore no wrapping happend for i0 or i0.ext
+; throughout the execution of the loop. The resulting predicated
+; backedge taken count is correct.
+
+; CHECK: Classifying expressions for: @test1
+; CHECK: %i.0.ext = sext i16 %i.0 to i32
+; CHECK-NEXT: --> (sext i16 {0,+,1}<%bb3> to i32)
+; CHECK: Loop %bb3: Unpredictable backedge-taken count.
+; CHECK-NEXT: Loop %bb3: Unpredictable max backedge-taken count.
+; CHECK-NEXT: Loop %bb3: Predicated backedge-taken count is (1 + (-1 smax %M))
+; CHECK-NEXT: Predicates:
+; CHECK-NEXT: {0,+,1}<%bb3> Added Flags: <nssw>
+define void @test1(i32 %N, i32 %M) {
+entry:
+ br label %bb3
+
+bb: ; preds = %bb3
+ %tmp = getelementptr [1000 x i32], [1000 x i32]* @A, i32 0, i16 %i.0 ; <i32*> [#uses=1]
+ store i32 123, i32* %tmp
+ %tmp2 = add i16 %i.0, 1 ; <i32> [#uses=1]
+ br label %bb3
+
+bb3: ; preds = %bb, %entry
+ %i.0 = phi i16 [ 0, %entry ], [ %tmp2, %bb ] ; <i32> [#uses=3]
+ %i.0.ext = sext i16 %i.0 to i32
+ %tmp3 = icmp sle i32 %i.0.ext, %M ; <i1> [#uses=1]
+ br i1 %tmp3, label %bb, label %bb5
+
+bb5: ; preds = %bb3
+ br label %return
+
+return: ; preds = %bb5
+ ret void
+}
+
+; The predicated backedge taken count is:
+; (2 + (zext i16 %Start to i32) + ((-2 + (-1 * (sext i16 %Start to i32)))
+; smax (-1 + (-1 * %M)))
+; )
+
+; -1 + (-1 * %M) <= (-2 + (-1 * (sext i16 %Start to i32))
+; The predicated backedge taken count is 0.
+; From the IR, this is correct since we will bail out at the
+; first iteration.
+
+
+; * -1 + (-1 * %M) > (-2 + (-1 * (sext i16 %Start to i32))
+; or: %M < 1 + (sext i16 %Start to i32)
+;
+; The predicated backedge taken count is 1 + (zext i16 %Start to i32) - %M
+;
+; If %M >= MIN_INT + 1, this predicated backedge taken count would be correct (even
+; without predicates). However, for %M < MIN_INT this would be an infinite loop.
+; In these cases, the {%Start,+,-1} <nusw> predicate would be false, as the
+; final value of the expression {%Start,+,-1} expression (%M - 1) would not be
+; representable as an i16.
+
+; There is also a limit case here where the value of %M is MIN_INT. In this case
+; we still have an infinite loop, since icmp sge %x, MIN_INT will always return
+; true.
+
+; CHECK: Classifying expressions for: @test2
+
+; CHECK: %i.0.ext = sext i16 %i.0 to i32
+; CHECK-NEXT: --> (sext i16 {%Start,+,-1}<%bb3> to i32)
+; CHECK: Loop %bb3: Unpredictable backedge-taken count.
+; CHECK-NEXT: Loop %bb3: Unpredictable max backedge-taken count.
+; CHECK-NEXT: Loop %bb3: Predicated backedge-taken count is (2 + (sext i16 %Start to i32) + ((-2 + (-1 * (sext i16 %Start to i32))) smax (-1 + (-1 * %M))))
+; CHECK-NEXT: Predicates:
+; CHECK-NEXT: {%Start,+,-1}<%bb3> Added Flags: <nssw>
+
+define void @test2(i32 %N, i32 %M, i16 %Start) {
+entry:
+ br label %bb3
+
+bb: ; preds = %bb3
+ %tmp = getelementptr [1000 x i32], [1000 x i32]* @A, i32 0, i16 %i.0 ; <i32*> [#uses=1]
+ store i32 123, i32* %tmp
+ %tmp2 = sub i16 %i.0, 1 ; <i32> [#uses=1]
+ br label %bb3
+
+bb3: ; preds = %bb, %entry
+ %i.0 = phi i16 [ %Start, %entry ], [ %tmp2, %bb ] ; <i32> [#uses=3]
+ %i.0.ext = sext i16 %i.0 to i32
+ %tmp3 = icmp sge i32 %i.0.ext, %M ; <i1> [#uses=1]
+ br i1 %tmp3, label %bb, label %bb5
+
+bb5: ; preds = %bb3
+ br label %return
+
+return: ; preds = %bb5
+ ret void
+}
+
--- /dev/null
+; RUN: opt -mtriple=aarch64--linux-gnueabi -loop-vectorize -force-vector-width=4 -force-vector-interleave=1 < %s -S | FileCheck %s
+
+; The following tests contain loops for which SCEV cannot determine the backedge
+; taken count. This is because the backedge taken condition is produced by an
+; icmp with one of the sides being a loop varying non-AddRec expression.
+; However, there is a possibility to normalize this to an AddRec expression
+; using SCEV predicates. This allows us to compute a 'guarded' backedge count.
+; The Loop Vectorizer is able to version to loop in order to use this guarded
+; backedge count and vectorize more loops.
+
+
+; CHECK-LABEL: test_sge
+; CHECK-LABEL: vector.scevcheck
+; CHECK-LABEL: vector.body
+define void @test_sge(i32* noalias %A,
+ i32* noalias %B,
+ i32* noalias %C, i32 %N) {
+entry:
+ %cmp13 = icmp eq i32 %N, 0
+ br i1 %cmp13, label %for.end, label %for.body.preheader
+
+for.body.preheader:
+ br label %for.body
+
+for.body:
+ %indvars.iv = phi i16 [ %indvars.next, %for.body ], [ 0, %for.body.preheader ]
+ %indvars.next = add i16 %indvars.iv, 1
+ %indvars.ext = zext i16 %indvars.iv to i32
+
+ %arrayidx = getelementptr inbounds i32, i32* %B, i32 %indvars.ext
+ %0 = load i32, i32* %arrayidx, align 4
+ %arrayidx3 = getelementptr inbounds i32, i32* %C, i32 %indvars.ext
+ %1 = load i32, i32* %arrayidx3, align 4
+
+ %mul4 = mul i32 %1, %0
+
+ %arrayidx7 = getelementptr inbounds i32, i32* %A, i32 %indvars.ext
+ store i32 %mul4, i32* %arrayidx7, align 4
+
+ %exitcond = icmp sge i32 %indvars.ext, %N
+ br i1 %exitcond, label %for.end.loopexit, label %for.body
+
+for.end.loopexit:
+ br label %for.end
+
+for.end:
+ ret void
+}
+
+; CHECK-LABEL: test_uge
+; CHECK-LABEL: vector.scevcheck
+; CHECK-LABEL: vector.body
+define void @test_uge(i32* noalias %A,
+ i32* noalias %B,
+ i32* noalias %C, i32 %N, i32 %Offset) {
+entry:
+ %cmp13 = icmp eq i32 %N, 0
+ br i1 %cmp13, label %for.end, label %for.body.preheader
+
+for.body.preheader:
+ br label %for.body
+
+for.body:
+ %indvars.iv = phi i16 [ %indvars.next, %for.body ], [ 0, %for.body.preheader ]
+ %indvars.next = add i16 %indvars.iv, 1
+
+ %indvars.ext = sext i16 %indvars.iv to i32
+ %indvars.access = add i32 %Offset, %indvars.ext
+
+ %arrayidx = getelementptr inbounds i32, i32* %B, i32 %indvars.access
+ %0 = load i32, i32* %arrayidx, align 4
+ %arrayidx3 = getelementptr inbounds i32, i32* %C, i32 %indvars.access
+ %1 = load i32, i32* %arrayidx3, align 4
+
+ %mul4 = add i32 %1, %0
+
+ %arrayidx7 = getelementptr inbounds i32, i32* %A, i32 %indvars.access
+ store i32 %mul4, i32* %arrayidx7, align 4
+
+ %exitcond = icmp uge i32 %indvars.ext, %N
+ br i1 %exitcond, label %for.end.loopexit, label %for.body
+
+for.end.loopexit:
+ br label %for.end
+
+for.end:
+ ret void
+}
+
+; CHECK-LABEL: test_ule
+; CHECK-LABEL: vector.scevcheck
+; CHECK-LABEL: vector.body
+define void @test_ule(i32* noalias %A,
+ i32* noalias %B,
+ i32* noalias %C, i32 %N,
+ i16 %M) {
+entry:
+ %cmp13 = icmp eq i32 %N, 0
+ br i1 %cmp13, label %for.end, label %for.body.preheader
+
+for.body.preheader:
+ br label %for.body
+
+for.body:
+ %indvars.iv = phi i16 [ %indvars.next, %for.body ], [ %M, %for.body.preheader ]
+ %indvars.next = sub i16 %indvars.iv, 1
+ %indvars.ext = zext i16 %indvars.iv to i32
+
+ %arrayidx = getelementptr inbounds i32, i32* %B, i32 %indvars.ext
+ %0 = load i32, i32* %arrayidx, align 4
+ %arrayidx3 = getelementptr inbounds i32, i32* %C, i32 %indvars.ext
+ %1 = load i32, i32* %arrayidx3, align 4
+
+ %mul4 = mul i32 %1, %0
+
+ %arrayidx7 = getelementptr inbounds i32, i32* %A, i32 %indvars.ext
+ store i32 %mul4, i32* %arrayidx7, align 4
+
+ %exitcond = icmp ule i32 %indvars.ext, %N
+ br i1 %exitcond, label %for.end.loopexit, label %for.body
+
+for.end.loopexit:
+ br label %for.end
+
+for.end:
+ ret void
+}
+
+; CHECK-LABEL: test_sle
+; CHECK-LABEL: vector.scevcheck
+; CHECK-LABEL: vector.body
+define void @test_sle(i32* noalias %A,
+ i32* noalias %B,
+ i32* noalias %C, i32 %N,
+ i16 %M) {
+entry:
+ %cmp13 = icmp eq i32 %N, 0
+ br i1 %cmp13, label %for.end, label %for.body.preheader
+
+for.body.preheader:
+ br label %for.body
+
+for.body:
+ %indvars.iv = phi i16 [ %indvars.next, %for.body ], [ %M, %for.body.preheader ]
+ %indvars.next = sub i16 %indvars.iv, 1
+ %indvars.ext = sext i16 %indvars.iv to i32
+
+ %arrayidx = getelementptr inbounds i32, i32* %B, i32 %indvars.ext
+ %0 = load i32, i32* %arrayidx, align 4
+ %arrayidx3 = getelementptr inbounds i32, i32* %C, i32 %indvars.ext
+ %1 = load i32, i32* %arrayidx3, align 4
+
+ %mul4 = mul i32 %1, %0
+
+ %arrayidx7 = getelementptr inbounds i32, i32* %A, i32 %indvars.ext
+ store i32 %mul4, i32* %arrayidx7, align 4
+
+ %exitcond = icmp sle i32 %indvars.ext, %N
+ br i1 %exitcond, label %for.end.loopexit, label %for.body
+
+for.end.loopexit:
+ br label %for.end
+
+for.end:
+ ret void
+}
%indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
%arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv, !dbg !16
%0 = trunc i64 %indvars.iv to i32, !dbg !16
+ %ld = load i32, i32* %arrayidx, align 4
store i32 %0, i32* %arrayidx, align 4, !dbg !16, !tbaa !18
- %cmp3 = icmp sle i32 %0, %Length, !dbg !22
+ %cmp3 = icmp sle i32 %ld, %Length, !dbg !22
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1, !dbg !12
%1 = trunc i64 %indvars.iv.next to i32
%cmp = icmp slt i32 %1, %Length, !dbg !12