From: Silviu Baranga Date: Wed, 6 Apr 2016 13:18:26 +0000 (+0000) Subject: [SCEV] Introduce a guarded backedge taken count and use it in LAA and LV X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=72b4a4a3308312af1b51d8cebc59b090a28e2ebc;p=platform%2Fupstream%2Fllvm.git [SCEV] Introduce a guarded backedge taken count and use it in LAA and LV Summary: When the backedge taken codition is computed from an icmp, SCEV can deduce the backedge taken count only if one of the sides of the icmp is an AddRecExpr. However, due to sign/zero extensions, we sometimes end up with something that is not an AddRecExpr. However, we can use SCEV predicates to produce a 'guarded' expression. This change adds a method to SCEV to get this expression, and the SCEV predicate associated with it. In HowManyGreaterThans and HowManyLessThans we will now add a SCEV predicate associated with the guarded backedge taken count when the analyzed SCEV expression is not an AddRecExpr. Note that we only do this as an alternative to returning a 'CouldNotCompute'. We use new feature in Loop Access Analysis and LoopVectorize to analyze and transform more loops. Reviewers: anemet, mzolotukhin, hfinkel, sanjoy Subscribers: flyingforyou, mcrosier, atrick, mssimpso, sanjoy, mzolotukhin, llvm-commits Differential Revision: http://reviews.llvm.org/D17201 llvm-svn: 265535 --- diff --git a/llvm/include/llvm/Analysis/ScalarEvolution.h b/llvm/include/llvm/Analysis/ScalarEvolution.h index 3a84e77..f102f2e 100644 --- a/llvm/include/llvm/Analysis/ScalarEvolution.h +++ b/llvm/include/llvm/Analysis/ScalarEvolution.h @@ -270,10 +270,17 @@ namespace llvm { } }; - /// SCEVWrapPredicate - This class represents an assumption - /// made on an AddRec expression. Given an affine AddRec expression - /// {a,+,b}, we assume that it has the nssw or nusw flags (defined - /// below). + /// SCEVWrapPredicate - This class represents an assumption made on an AddRec + /// expression. Given an affine AddRec expression {a,+,b}, we assume that it + /// has the nssw or nusw flags (defined below) in the first X iterations of + /// the loop, where X is a SCEV expression returned by + /// getPredicatedBackedgeTakenCount). + /// + /// Note that this does not imply that X is equal to the backedge taken + /// count. This means that if we have a nusw predicate for i32 {0,+,1} with a + /// predicated backedge taken count of X, we only guarantee that {0,+,1} has + /// nusw in the first X iterations. {0,+,1} may still wrap in the loop if we + /// have more than X iterations. class SCEVWrapPredicate final : public SCEVPredicate { public: /// Similar to SCEV::NoWrapFlags, but with slightly different semantics @@ -520,9 +527,14 @@ namespace llvm { const SCEV *Exact; const SCEV *Max; + /// A predicate union guard for this ExitLimit. The result is only + /// valid if this predicate evaluates to 'true' at run-time. + SCEVUnionPredicate Pred; + /*implicit*/ ExitLimit(const SCEV *E) : Exact(E), Max(E) {} - ExitLimit(const SCEV *E, const SCEV *M) : Exact(E), Max(M) { + ExitLimit(const SCEV *E, const SCEV *M, SCEVUnionPredicate &P) + : Exact(E), Max(M), Pred(P) { assert((isa(Exact) || !isa(Max)) && "Exact is not allowed to be less precise than Max"); @@ -534,30 +546,146 @@ namespace llvm { return !isa(Exact) || !isa(Max); } + + /// Test whether this ExitLimit contains all information. + bool hasFullInfo() const { return !isa(Exact); } }; + /// Forward declaration of ExitNotTakenExtras + struct ExitNotTakenExtras; + /// Information about the number of times a particular loop exit may be /// reached before exiting the loop. struct ExitNotTakenInfo { AssertingVH ExitingBlock; const SCEV *ExactNotTaken; - PointerIntPair NextExit; + + PointerIntPair ExtraInfo; ExitNotTakenInfo() : ExitingBlock(nullptr), ExactNotTaken(nullptr) {} + ExitNotTakenInfo(BasicBlock *ExitBlock, const SCEV *Expr, + ExitNotTakenExtras *Ptr) + : ExitingBlock(ExitBlock), ExactNotTaken(Expr) { + ExtraInfo.setPointer(Ptr); + } /// Return true if all loop exits are computable. - bool isCompleteList() const { - return NextExit.getInt() == 0; + bool isCompleteList() const { return ExtraInfo.getInt() == 0; } + + /// Sets the incomplete property, indicating that one of the loop exits + /// doesn't have a corresponding ExitNotTakenInfo entry. + void setIncomplete() { ExtraInfo.setInt(1); } + + /// Returns a pointer to the predicate associated with this information, + /// or nullptr if this doesn't exist (meaning always true). + SCEVUnionPredicate *getPred() const { + if (auto *Info = ExtraInfo.getPointer()) + return &Info->Pred; + + return nullptr; } - void setIncomplete() { NextExit.setInt(1); } + /// Return true if the SCEV predicate associated with this information + /// is always true. + bool hasAlwaysTruePred() const { + return !getPred() || getPred()->isAlwaysTrue(); + } - /// Return a pointer to the next exit's not-taken info. - ExitNotTakenInfo *getNextExit() const { - return NextExit.getPointer(); + /// Defines a simple forward iterator for ExitNotTakenInfo. + class ExitNotTakenInfoIterator + : public std::iterator { + const ExitNotTakenInfo *Start; + unsigned Position; + + public: + ExitNotTakenInfoIterator(const ExitNotTakenInfo *Start, + unsigned Position) + : Start(Start), Position(Position) {} + + const ExitNotTakenInfo &operator*() const { + if (Position == 0) + return *Start; + + return Start->ExtraInfo.getPointer()->Exits[Position - 1]; + } + + const ExitNotTakenInfo *operator->() const { + if (Position == 0) + return Start; + + return &Start->ExtraInfo.getPointer()->Exits[Position - 1]; + } + + bool operator==(const ExitNotTakenInfoIterator &RHS) const { + return Start == RHS.Start && Position == RHS.Position; + } + + bool operator!=(const ExitNotTakenInfoIterator &RHS) const { + return Start != RHS.Start || Position != RHS.Position; + } + + ExitNotTakenInfoIterator &operator++() { // Preincrement + if (!Start) + return *this; + + unsigned Elements = + Start->ExtraInfo.getPointer() + ? Start->ExtraInfo.getPointer()->Exits.size() + 1 + : 1; + + ++Position; + + // We've run out of elements. + if (Position == Elements) { + Start = nullptr; + Position = 0; + } + + return *this; + } + ExitNotTakenInfoIterator operator++(int) { // Postincrement + ExitNotTakenInfoIterator Tmp = *this; + ++*this; + return Tmp; + } + }; + + /// Iterators + ExitNotTakenInfoIterator begin() const { + return ExitNotTakenInfoIterator(this, 0); + } + ExitNotTakenInfoIterator end() const { + return ExitNotTakenInfoIterator(nullptr, 0); } + }; - void setNextExit(ExitNotTakenInfo *ENT) { NextExit.setPointer(ENT); } + /// Describes the extra information that a ExitNotTakenInfo can have. + struct ExitNotTakenExtras { + /// The predicate associated with the ExitNotTakenInfo struct. + SCEVUnionPredicate Pred; + + /// The extra exits in the loop. Only the ExitNotTakenExtras structure + /// pointed to by the first ExitNotTakenInfo struct (associated with the + /// first loop exit) will populate this vector to prevent having + /// redundant information. + SmallVector Exits; + }; + + /// A struct containing the information attached to a backedge. + struct EdgeInfo { + EdgeInfo(BasicBlock *Block, const SCEV *Taken, SCEVUnionPredicate &P) : + ExitBlock(Block), Taken(Taken), Pred(std::move(P)) {} + + /// The exit basic block. + BasicBlock *ExitBlock; + + /// The (exact) number of time we take the edge back. + const SCEV *Taken; + + /// The SCEV predicated associated with Taken. If Pred doesn't evaluate + /// to true, the information in Taken is not valid (or equivalent with + /// a CouldNotCompute. + SCEVUnionPredicate Pred; }; /// Information about the backedge-taken count of a loop. This currently @@ -569,16 +697,16 @@ namespace llvm { ExitNotTakenInfo ExitNotTaken; /// An expression indicating the least maximum backedge-taken count of the - /// loop that is known, or a SCEVCouldNotCompute. + /// loop that is known, or a SCEVCouldNotCompute. This expression is only + /// valid if the predicates associated with all loop exits are true. const SCEV *Max; public: BackedgeTakenInfo() : Max(nullptr) {} /// Initialize BackedgeTakenInfo from a list of exact exit counts. - BackedgeTakenInfo( - SmallVectorImpl< std::pair > &ExitCounts, - bool Complete, const SCEV *MaxCount); + BackedgeTakenInfo(SmallVectorImpl &ExitCounts, bool Complete, + const SCEV *MaxCount); /// Test whether this BackedgeTakenInfo contains any computed information, /// or whether it's all SCEVCouldNotCompute values. @@ -586,11 +714,27 @@ namespace llvm { return ExitNotTaken.ExitingBlock || !isa(Max); } + /// Test whether this BackedgeTakenInfo contains complete information. + bool hasFullInfo() const { return ExitNotTaken.isCompleteList(); } + /// Return an expression indicating the exact backedge-taken count of the - /// loop if it is known, or SCEVCouldNotCompute otherwise. This is the + /// loop if it is known or SCEVCouldNotCompute otherwise. This is the /// number of times the loop header can be guaranteed to execute, minus /// one. - const SCEV *getExact(ScalarEvolution *SE) const; + /// + /// If the SCEV predicate associated with the answer can be different + /// from AlwaysTrue, we must add a (non null) Predicates argument. + /// The SCEV predicate associated with the answer will be added to + /// Predicates. A run-time check needs to be emitted for the SCEV + /// predicate in order for the answer to be valid. + /// + /// Note that we should always know if we need to pass a predicate + /// argument or not from the way the ExitCounts vector was computed. + /// If we allowed SCEV predicates to be generated when populating this + /// vector, this information can contain them and therefore a + /// SCEVPredicate argument should be added to getExact. + const SCEV *getExact(ScalarEvolution *SE, + SCEVUnionPredicate *Predicates = nullptr) const; /// Return the number of times this loop exit may fall through to the back /// edge, or SCEVCouldNotCompute. The loop is guaranteed not to exit via @@ -611,7 +755,11 @@ namespace llvm { /// Cache the backedge-taken count of the loops for this function as they /// are computed. - DenseMap BackedgeTakenCounts; + DenseMap BackedgeTakenCounts; + + /// Cache the predicated backedge-taken count of the loops for this + /// function as they are computed. + DenseMap PredicatedBackedgeTakenCounts; /// This map contains entries for all of the PHI instructions that we /// attempt to compute constant evolutions for. This allows us to avoid @@ -713,33 +861,49 @@ namespace llvm { void forgetSymbolicName(Instruction *I, const SCEV *SymName); /// Return the BackedgeTakenInfo for the given loop, lazily computing new - /// values if the loop hasn't been analyzed yet. + /// values if the loop hasn't been analyzed yet. The returned result is + /// guaranteed not to be predicated. const BackedgeTakenInfo &getBackedgeTakenInfo(const Loop *L); + /// Similar to getBackedgeTakenInfo, but will add predicates as required + /// with the purpose of returning complete information. + const BackedgeTakenInfo &getPredicatedBackedgeTakenInfo(const Loop *L); + /// Compute the number of times the specified loop will iterate. - BackedgeTakenInfo computeBackedgeTakenCount(const Loop *L); + /// If AllowPredicates is set, we will create new SCEV predicates as + /// necessary in order to return an exact answer. + BackedgeTakenInfo computeBackedgeTakenCount(const Loop *L, + bool AllowPredicates = false); /// Compute the number of times the backedge of the specified loop will - /// execute if it exits via the specified block. - ExitLimit computeExitLimit(const Loop *L, BasicBlock *ExitingBlock); + /// execute if it exits via the specified block. If AllowPredicates is set, + /// this call will try to use a minimal set of SCEV predicates in order to + /// return an exact answer. + ExitLimit computeExitLimit(const Loop *L, BasicBlock *ExitingBlock, + bool AllowPredicates = false); /// Compute the number of times the backedge of the specified loop will /// execute if its exit condition were a conditional branch of ExitCond, - /// TBB, and FBB. + /// TBB, and FBB. If AllowPredicates is set, this call will try to use a + /// minimal set of SCEV predicates in order to return an exact answer. ExitLimit computeExitLimitFromCond(const Loop *L, Value *ExitCond, BasicBlock *TBB, BasicBlock *FBB, - bool IsSubExpr); + bool IsSubExpr, + bool AllowPredicates = false); /// Compute the number of times the backedge of the specified loop will /// execute if its exit condition were a conditional branch of the ICmpInst - /// ExitCond, TBB, and FBB. + /// ExitCond, TBB, and FBB. If AllowPredicates is set, this call will try + /// to use a minimal set of SCEV predicates in order to return an exact + /// answer. ExitLimit computeExitLimitFromICmp(const Loop *L, ICmpInst *ExitCond, BasicBlock *TBB, BasicBlock *FBB, - bool IsSubExpr); + bool IsSubExpr, + bool AllowPredicates = false); /// Compute the number of times the backedge of the specified loop will /// execute if its exit condition were a switch with a single exiting case @@ -777,7 +941,10 @@ namespace llvm { /// Return the number of times an exit condition comparing the specified /// value to zero will execute. If not computable, return CouldNotCompute. - ExitLimit HowFarToZero(const SCEV *V, const Loop *L, bool IsSubExpr); + /// If AllowPredicates is set, this call will try to use a minimal set of + /// SCEV predicates in order to return an exact answer. + ExitLimit HowFarToZero(const SCEV *V, const Loop *L, bool IsSubExpr, + bool AllowPredicates = false); /// Return the number of times an exit condition checking the specified /// value for nonzero will execute. If not computable, return @@ -787,10 +954,15 @@ namespace llvm { /// Return the number of times an exit condition containing the specified /// less-than comparison will execute. If not computable, return /// CouldNotCompute. isSigned specifies whether the less-than is signed. - ExitLimit HowManyLessThans(const SCEV *LHS, const SCEV *RHS, - const Loop *L, bool isSigned, bool IsSubExpr); + /// If AllowPredicates is set, this call will try to use a minimal set of + /// SCEV predicates in order to return an exact answer. + ExitLimit HowManyLessThans(const SCEV *LHS, const SCEV *RHS, const Loop *L, + bool isSigned, bool IsSubExpr, + bool AllowPredicates = false); + ExitLimit HowManyGreaterThans(const SCEV *LHS, const SCEV *RHS, - const Loop *L, bool isSigned, bool IsSubExpr); + const Loop *L, bool isSigned, bool IsSubExpr, + bool AllowPredicates = false); /// Return a predecessor of BB (which may not be an immediate predecessor) /// which has exactly one successor from which BB is reachable, or null if @@ -1168,6 +1340,13 @@ namespace llvm { /// const SCEV *getBackedgeTakenCount(const Loop *L); + /// Similar to getBackedgeTakenCount, except it will add a set of + /// SCEV predicates to Predicates that are required to be true in order for + /// the answer to be correct. Predicates can be checked with run-time + /// checks and can be used to perform loop versioning. + const SCEV *getPredicatedBackedgeTakenCount(const Loop *L, + SCEVUnionPredicate &Predicates); + /// Similar to getBackedgeTakenCount, except return the least SCEV value /// that is known never to be less than the actual backedge taken count. const SCEV *getMaxBackedgeTakenCount(const Loop *L); @@ -1493,6 +1672,8 @@ namespace llvm { /// by ScalarEvolution is guaranteed to be preserved, even when adding new /// predicates. const SCEV *getSCEV(Value *V); + /// Get the (predicated) backedge count for the analyzed loop. + const SCEV *getBackedgeTakenCount(); /// \brief Adds a new predicate. void addPredicate(const SCEVPredicate &Pred); /// \brief Attempts to produce an AddRecExpr for V by adding additional @@ -1536,6 +1717,8 @@ namespace llvm { /// figure out if the predicate has changed from the last rewrite of the /// SCEV. If so, we need to perform a new rewrite. unsigned Generation; + /// The backedge taken count. + const SCEV *BackedgeCount; }; } diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp index c67c581..d1eac46 100644 --- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp +++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp @@ -140,7 +140,7 @@ void RuntimePointerChecking::insert(Loop *Lp, Value *Ptr, bool WritePtr, else { const SCEVAddRecExpr *AR = dyn_cast(Sc); assert(AR && "Invalid addrec expression"); - const SCEV *Ex = SE->getBackedgeTakenCount(Lp); + const SCEV *Ex = PSE.getBackedgeTakenCount(); ScStart = AR->getStart(); ScEnd = AR->evaluateAtIteration(Ex, *SE); @@ -1460,7 +1460,7 @@ bool LoopAccessInfo::canAnalyzeLoop() { } // ScalarEvolution needs to be able to find the exit count. - const SCEV *ExitCount = PSE.getSE()->getBackedgeTakenCount(TheLoop); + const SCEV *ExitCount = PSE.getBackedgeTakenCount(); if (ExitCount == PSE.getSE()->getCouldNotCompute()) { emitAnalysis(LoopAccessReport() << "could not determine number of loop iterations"); diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp index 402a1b7..36b4348 100644 --- a/llvm/lib/Analysis/ScalarEvolution.cpp +++ b/llvm/lib/Analysis/ScalarEvolution.cpp @@ -5223,6 +5223,12 @@ const SCEV *ScalarEvolution::getExitCount(Loop *L, BasicBlock *ExitingBlock) { return getBackedgeTakenInfo(L).getExact(ExitingBlock, this); } +const SCEV * +ScalarEvolution::getPredicatedBackedgeTakenCount(const Loop *L, + SCEVUnionPredicate &Preds) { + return getPredicatedBackedgeTakenInfo(L).getExact(this, &Preds); +} + /// getBackedgeTakenCount - If the specified loop has a predictable /// backedge-taken count, return it, otherwise return a SCEVCouldNotCompute /// object. The backedge-taken count is the number of times the loop header @@ -5258,6 +5264,23 @@ PushLoopPHIs(const Loop *L, SmallVectorImpl &Worklist) { } const ScalarEvolution::BackedgeTakenInfo & +ScalarEvolution::getPredicatedBackedgeTakenInfo(const Loop *L) { + auto &BTI = getBackedgeTakenInfo(L); + if (BTI.hasFullInfo()) + return BTI; + + auto Pair = PredicatedBackedgeTakenCounts.insert({L, BackedgeTakenInfo()}); + + if (!Pair.second) + return Pair.first->second; + + BackedgeTakenInfo Result = + computeBackedgeTakenCount(L, /*AllowPredicates=*/true); + + return PredicatedBackedgeTakenCounts.find(L)->second = Result; +} + +const ScalarEvolution::BackedgeTakenInfo & ScalarEvolution::getBackedgeTakenInfo(const Loop *L) { // Initially insert an invalid entry for this loop. If the insertion // succeeds, proceed to actually compute a backedge-taken count and @@ -5337,12 +5360,17 @@ ScalarEvolution::getBackedgeTakenInfo(const Loop *L) { /// compute a trip count, or if the loop is deleted. void ScalarEvolution::forgetLoop(const Loop *L) { // Drop any stored trip count value. - DenseMap::iterator BTCPos = - BackedgeTakenCounts.find(L); - if (BTCPos != BackedgeTakenCounts.end()) { - BTCPos->second.clear(); - BackedgeTakenCounts.erase(BTCPos); - } + auto RemoveLoopFromBackedgeMap = + [L](DenseMap &Map) { + auto BTCPos = Map.find(L); + if (BTCPos != Map.end()) { + BTCPos->second.clear(); + Map.erase(BTCPos); + } + }; + + RemoveLoopFromBackedgeMap(BackedgeTakenCounts); + RemoveLoopFromBackedgeMap(PredicatedBackedgeTakenCounts); // Drop information about expressions based on loop-header PHIs. SmallVector Worklist; @@ -5411,7 +5439,8 @@ void ScalarEvolution::forgetValue(Value *V) { /// is the caller's responsibility to specify the relevant loop exit using /// getExact(ExitingBlock, SE). const SCEV * -ScalarEvolution::BackedgeTakenInfo::getExact(ScalarEvolution *SE) const { +ScalarEvolution::BackedgeTakenInfo::getExact( + ScalarEvolution *SE, SCEVUnionPredicate *Preds) const { // If any exits were not computable, the loop is not computable. if (!ExitNotTaken.isCompleteList()) return SE->getCouldNotCompute(); @@ -5420,16 +5449,20 @@ ScalarEvolution::BackedgeTakenInfo::getExact(ScalarEvolution *SE) const { assert(ExitNotTaken.ExactNotTaken && "uninitialized not-taken info"); const SCEV *BECount = nullptr; - for (const ExitNotTakenInfo *ENT = &ExitNotTaken; - ENT != nullptr; ENT = ENT->getNextExit()) { - - assert(ENT->ExactNotTaken != SE->getCouldNotCompute() && "bad exit SCEV"); + for (auto &ENT : ExitNotTaken) { + assert(ENT.ExactNotTaken != SE->getCouldNotCompute() && "bad exit SCEV"); if (!BECount) - BECount = ENT->ExactNotTaken; - else if (BECount != ENT->ExactNotTaken) + BECount = ENT.ExactNotTaken; + else if (BECount != ENT.ExactNotTaken) return SE->getCouldNotCompute(); + if (Preds && ENT.getPred()) + Preds->add(ENT.getPred()); + + assert((Preds || ENT.hasAlwaysTruePred()) && + "Predicate should be always true!"); } + assert(BECount && "Invalid not taken count for loop exit"); return BECount; } @@ -5438,18 +5471,20 @@ ScalarEvolution::BackedgeTakenInfo::getExact(ScalarEvolution *SE) const { const SCEV * ScalarEvolution::BackedgeTakenInfo::getExact(BasicBlock *ExitingBlock, ScalarEvolution *SE) const { - for (const ExitNotTakenInfo *ENT = &ExitNotTaken; - ENT != nullptr; ENT = ENT->getNextExit()) { + for (auto &ENT : ExitNotTaken) + if (ENT.ExitingBlock == ExitingBlock && ENT.hasAlwaysTruePred()) + return ENT.ExactNotTaken; - if (ENT->ExitingBlock == ExitingBlock) - return ENT->ExactNotTaken; - } return SE->getCouldNotCompute(); } /// getMax - Get the max backedge taken count for the loop. const SCEV * ScalarEvolution::BackedgeTakenInfo::getMax(ScalarEvolution *SE) const { + for (auto &ENT : ExitNotTaken) + if (!ENT.hasAlwaysTruePred()) + return SE->getCouldNotCompute(); + return Max ? Max : SE->getCouldNotCompute(); } @@ -5461,22 +5496,19 @@ bool ScalarEvolution::BackedgeTakenInfo::hasOperand(const SCEV *S, if (!ExitNotTaken.ExitingBlock) return false; - for (const ExitNotTakenInfo *ENT = &ExitNotTaken; - ENT != nullptr; ENT = ENT->getNextExit()) { - - if (ENT->ExactNotTaken != SE->getCouldNotCompute() - && SE->hasOperand(ENT->ExactNotTaken, S)) { + for (auto &ENT : ExitNotTaken) + if (ENT.ExactNotTaken != SE->getCouldNotCompute() && + SE->hasOperand(ENT.ExactNotTaken, S)) return true; - } - } + return false; } /// Allocate memory for BackedgeTakenInfo and copy the not-taken count of each /// computable exit into a persistent ExitNotTakenInfo array. ScalarEvolution::BackedgeTakenInfo::BackedgeTakenInfo( - SmallVectorImpl< std::pair > &ExitCounts, - bool Complete, const SCEV *MaxCount) : Max(MaxCount) { + SmallVectorImpl &ExitCounts, bool Complete, const SCEV *MaxCount) + : Max(MaxCount) { if (!Complete) ExitNotTaken.setIncomplete(); @@ -5484,18 +5516,43 @@ ScalarEvolution::BackedgeTakenInfo::BackedgeTakenInfo( unsigned NumExits = ExitCounts.size(); if (NumExits == 0) return; - ExitNotTaken.ExitingBlock = ExitCounts[0].first; - ExitNotTaken.ExactNotTaken = ExitCounts[0].second; - if (NumExits == 1) return; + ExitNotTaken.ExitingBlock = ExitCounts[0].ExitBlock; + ExitNotTaken.ExactNotTaken = ExitCounts[0].Taken; + + // Determine the number of ExitNotTakenExtras structures that we need. + unsigned ExtraInfoSize = 0; + if (NumExits > 1) + ExtraInfoSize = 1 + std::count_if(std::next(ExitCounts.begin()), + ExitCounts.end(), [](EdgeInfo &Entry) { + return !Entry.Pred.isAlwaysTrue(); + }); + else if (!ExitCounts[0].Pred.isAlwaysTrue()) + ExtraInfoSize = 1; + + ExitNotTakenExtras *ENT = nullptr; + + // Allocate the ExitNotTakenExtras structures and initialize the first + // element (ExitNotTaken). + if (ExtraInfoSize > 0) { + ENT = new ExitNotTakenExtras[ExtraInfoSize]; + ExitNotTaken.ExtraInfo.setPointer(&ENT[0]); + *ExitNotTaken.getPred() = std::move(ExitCounts[0].Pred); + } + + if (NumExits == 1) + return; + + auto &Exits = ExitNotTaken.ExtraInfo.getPointer()->Exits; // Handle the rare case of multiple computable exits. - ExitNotTakenInfo *ENT = new ExitNotTakenInfo[NumExits-1]; + for (unsigned i = 1, PredPos = 1; i < NumExits; ++i) { + ExitNotTakenExtras *Ptr = nullptr; + if (!ExitCounts[i].Pred.isAlwaysTrue()) { + Ptr = &ENT[PredPos++]; + Ptr->Pred = std::move(ExitCounts[i].Pred); + } - ExitNotTakenInfo *PrevENT = &ExitNotTaken; - for (unsigned i = 1; i < NumExits; ++i, PrevENT = ENT, ++ENT) { - PrevENT->setNextExit(ENT); - ENT->ExitingBlock = ExitCounts[i].first; - ENT->ExactNotTaken = ExitCounts[i].second; + Exits.emplace_back(ExitCounts[i].ExitBlock, ExitCounts[i].Taken, Ptr); } } @@ -5503,17 +5560,18 @@ ScalarEvolution::BackedgeTakenInfo::BackedgeTakenInfo( void ScalarEvolution::BackedgeTakenInfo::clear() { ExitNotTaken.ExitingBlock = nullptr; ExitNotTaken.ExactNotTaken = nullptr; - delete[] ExitNotTaken.getNextExit(); + delete[] ExitNotTaken.ExtraInfo.getPointer(); } /// computeBackedgeTakenCount - Compute the number of times the backedge /// of the specified loop will execute. ScalarEvolution::BackedgeTakenInfo -ScalarEvolution::computeBackedgeTakenCount(const Loop *L) { +ScalarEvolution::computeBackedgeTakenCount(const Loop *L, + bool AllowPredicates) { SmallVector ExitingBlocks; L->getExitingBlocks(ExitingBlocks); - SmallVector, 4> ExitCounts; + SmallVector ExitCounts; bool CouldComputeBECount = true; BasicBlock *Latch = L->getLoopLatch(); // may be NULL. const SCEV *MustExitMaxBECount = nullptr; @@ -5521,9 +5579,13 @@ ScalarEvolution::computeBackedgeTakenCount(const Loop *L) { // Compute the ExitLimit for each loop exit. Use this to populate ExitCounts // and compute maxBECount. + // Do a union of all the predicates here. for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) { BasicBlock *ExitBB = ExitingBlocks[i]; - ExitLimit EL = computeExitLimit(L, ExitBB); + ExitLimit EL = computeExitLimit(L, ExitBB, AllowPredicates); + + assert((AllowPredicates || EL.Pred.isAlwaysTrue()) && + "Predicated exit limit when predicates are not allowed!"); // 1. For each exit that can be computed, add an entry to ExitCounts. // CouldComputeBECount is true only if all exits can be computed. @@ -5532,7 +5594,7 @@ ScalarEvolution::computeBackedgeTakenCount(const Loop *L) { // we won't be able to compute an exact value for the loop. CouldComputeBECount = false; else - ExitCounts.push_back({ExitBB, EL.Exact}); + ExitCounts.emplace_back(EdgeInfo(ExitBB, EL.Exact, EL.Pred)); // 2. Derive the loop's MaxBECount from each exit's max number of // non-exiting iterations. Partition the loop exits into two kinds: @@ -5566,7 +5628,8 @@ ScalarEvolution::computeBackedgeTakenCount(const Loop *L) { } ScalarEvolution::ExitLimit -ScalarEvolution::computeExitLimit(const Loop *L, BasicBlock *ExitingBlock) { +ScalarEvolution::computeExitLimit(const Loop *L, BasicBlock *ExitingBlock, + bool AllowPredicates) { // Okay, we've chosen an exiting block. See what condition causes us to exit // at this block and remember the exit block and whether all other targets @@ -5631,9 +5694,9 @@ ScalarEvolution::computeExitLimit(const Loop *L, BasicBlock *ExitingBlock) { if (BranchInst *BI = dyn_cast(Term)) { assert(BI->isConditional() && "If unconditional, it can't be in loop!"); // Proceed to the next level to examine the exit condition expression. - return computeExitLimitFromCond(L, BI->getCondition(), BI->getSuccessor(0), - BI->getSuccessor(1), - /*ControlsExit=*/IsOnlyExit); + return computeExitLimitFromCond( + L, BI->getCondition(), BI->getSuccessor(0), BI->getSuccessor(1), + /*ControlsExit=*/IsOnlyExit, AllowPredicates); } if (SwitchInst *SI = dyn_cast(Term)) @@ -5656,16 +5719,19 @@ ScalarEvolution::computeExitLimitFromCond(const Loop *L, Value *ExitCond, BasicBlock *TBB, BasicBlock *FBB, - bool ControlsExit) { + bool ControlsExit, + bool AllowPredicates) { // Check if the controlling expression for this loop is an And or Or. if (BinaryOperator *BO = dyn_cast(ExitCond)) { if (BO->getOpcode() == Instruction::And) { // Recurse on the operands of the and. bool EitherMayExit = L->contains(TBB); ExitLimit EL0 = computeExitLimitFromCond(L, BO->getOperand(0), TBB, FBB, - ControlsExit && !EitherMayExit); + ControlsExit && !EitherMayExit, + AllowPredicates); ExitLimit EL1 = computeExitLimitFromCond(L, BO->getOperand(1), TBB, FBB, - ControlsExit && !EitherMayExit); + ControlsExit && !EitherMayExit, + AllowPredicates); const SCEV *BECount = getCouldNotCompute(); const SCEV *MaxBECount = getCouldNotCompute(); if (EitherMayExit) { @@ -5692,6 +5758,9 @@ ScalarEvolution::computeExitLimitFromCond(const Loop *L, BECount = EL0.Exact; } + SCEVUnionPredicate NP; + NP.add(&EL0.Pred); + NP.add(&EL1.Pred); // There are cases (e.g. PR26207) where computeExitLimitFromCond is able // to be more aggressive when computing BECount than when computing // MaxBECount. In these cases it is possible for EL0.Exact and EL1.Exact @@ -5700,15 +5769,17 @@ ScalarEvolution::computeExitLimitFromCond(const Loop *L, !isa(BECount)) MaxBECount = BECount; - return ExitLimit(BECount, MaxBECount); + return ExitLimit(BECount, MaxBECount, NP); } if (BO->getOpcode() == Instruction::Or) { // Recurse on the operands of the or. bool EitherMayExit = L->contains(FBB); ExitLimit EL0 = computeExitLimitFromCond(L, BO->getOperand(0), TBB, FBB, - ControlsExit && !EitherMayExit); + ControlsExit && !EitherMayExit, + AllowPredicates); ExitLimit EL1 = computeExitLimitFromCond(L, BO->getOperand(1), TBB, FBB, - ControlsExit && !EitherMayExit); + ControlsExit && !EitherMayExit, + AllowPredicates); const SCEV *BECount = getCouldNotCompute(); const SCEV *MaxBECount = getCouldNotCompute(); if (EitherMayExit) { @@ -5735,14 +5806,25 @@ ScalarEvolution::computeExitLimitFromCond(const Loop *L, BECount = EL0.Exact; } - return ExitLimit(BECount, MaxBECount); + SCEVUnionPredicate NP; + NP.add(&EL0.Pred); + NP.add(&EL1.Pred); + return ExitLimit(BECount, MaxBECount, NP); } } // With an icmp, it may be feasible to compute an exact backedge-taken count. // Proceed to the next level to examine the icmp. - if (ICmpInst *ExitCondICmp = dyn_cast(ExitCond)) - return computeExitLimitFromICmp(L, ExitCondICmp, TBB, FBB, ControlsExit); + if (ICmpInst *ExitCondICmp = dyn_cast(ExitCond)) { + ExitLimit EL = + computeExitLimitFromICmp(L, ExitCondICmp, TBB, FBB, ControlsExit); + if (EL.hasFullInfo() || !AllowPredicates) + return EL; + + // Try again, but use SCEV predicates this time. + return computeExitLimitFromICmp(L, ExitCondICmp, TBB, FBB, ControlsExit, + /*AllowPredicates=*/true); + } // Check for a constant condition. These are normally stripped out by // SimplifyCFG, but ScalarEvolution may be used by a pass which wishes to @@ -5766,7 +5848,8 @@ ScalarEvolution::computeExitLimitFromICmp(const Loop *L, ICmpInst *ExitCond, BasicBlock *TBB, BasicBlock *FBB, - bool ControlsExit) { + bool ControlsExit, + bool AllowPredicates) { // If the condition was exit on true, convert the condition to exit on false ICmpInst::Predicate Cond; @@ -5823,7 +5906,8 @@ ScalarEvolution::computeExitLimitFromICmp(const Loop *L, switch (Cond) { case ICmpInst::ICMP_NE: { // while (X != Y) // Convert to: while (X-Y != 0) - ExitLimit EL = HowFarToZero(getMinusSCEV(LHS, RHS), L, ControlsExit); + ExitLimit EL = HowFarToZero(getMinusSCEV(LHS, RHS), L, ControlsExit, + AllowPredicates); if (EL.hasAnyInfo()) return EL; break; } @@ -5836,14 +5920,17 @@ ScalarEvolution::computeExitLimitFromICmp(const Loop *L, case ICmpInst::ICMP_SLT: case ICmpInst::ICMP_ULT: { // while (X < Y) bool IsSigned = Cond == ICmpInst::ICMP_SLT; - ExitLimit EL = HowManyLessThans(LHS, RHS, L, IsSigned, ControlsExit); + ExitLimit EL = HowManyLessThans(LHS, RHS, L, IsSigned, ControlsExit, + AllowPredicates); if (EL.hasAnyInfo()) return EL; break; } case ICmpInst::ICMP_SGT: case ICmpInst::ICMP_UGT: { // while (X > Y) bool IsSigned = Cond == ICmpInst::ICMP_SGT; - ExitLimit EL = HowManyGreaterThans(LHS, RHS, L, IsSigned, ControlsExit); + ExitLimit EL = + HowManyGreaterThans(LHS, RHS, L, IsSigned, ControlsExit, + AllowPredicates); if (EL.hasAnyInfo()) return EL; break; } @@ -6105,7 +6192,8 @@ ScalarEvolution::ExitLimit ScalarEvolution::computeShiftCompareExitLimit( unsigned BitWidth = getTypeSizeInBits(RHS->getType()); const SCEV *UpperBound = getConstant(getEffectiveSCEVType(RHS->getType()), BitWidth); - return ExitLimit(getCouldNotCompute(), UpperBound); + SCEVUnionPredicate P; + return ExitLimit(getCouldNotCompute(), UpperBound, P); } return getCouldNotCompute(); @@ -6882,7 +6970,9 @@ SolveQuadraticEquation(const SCEVAddRecExpr *AddRec, ScalarEvolution &SE) { /// effectively V != 0. We know and take advantage of the fact that this /// expression only being used in a comparison by zero context. ScalarEvolution::ExitLimit -ScalarEvolution::HowFarToZero(const SCEV *V, const Loop *L, bool ControlsExit) { +ScalarEvolution::HowFarToZero(const SCEV *V, const Loop *L, bool ControlsExit, + bool AllowPredicates) { + SCEVUnionPredicate P; // If the value is a constant if (const SCEVConstant *C = dyn_cast(V)) { // If the value is already zero, the branch will execute zero times. @@ -6891,6 +6981,12 @@ ScalarEvolution::HowFarToZero(const SCEV *V, const Loop *L, bool ControlsExit) { } const SCEVAddRecExpr *AddRec = dyn_cast(V); + if (!AddRec && AllowPredicates) + // Try to make this an AddRec using runtime tests, in the first X + // iterations of this loop, where X is the SCEV expression found by the + // algorithm below. + AddRec = convertSCEVToAddRecWithPredicates(V, L, P); + if (!AddRec || AddRec->getLoop() != L) return getCouldNotCompute(); @@ -6915,7 +7011,7 @@ ScalarEvolution::HowFarToZero(const SCEV *V, const Loop *L, bool ControlsExit) { // should not accept a root of 2. const SCEV *Val = AddRec->evaluateAtIteration(R1, *this); if (Val->isZero()) - return R1; // We found a quadratic root! + return ExitLimit(R1, R1, P); // We found a quadratic root! } } return getCouldNotCompute(); @@ -6972,7 +7068,7 @@ ScalarEvolution::HowFarToZero(const SCEV *V, const Loop *L, bool ControlsExit) { else MaxBECount = getConstant(CountDown ? CR.getUnsignedMax() : -CR.getUnsignedMin()); - return ExitLimit(Distance, MaxBECount); + return ExitLimit(Distance, MaxBECount, P); } // As a special case, handle the instance where Step is a positive power of @@ -7025,7 +7121,9 @@ ScalarEvolution::HowFarToZero(const SCEV *V, const Loop *L, bool ControlsExit) { auto *NarrowTy = IntegerType::get(getContext(), NarrowWidth); auto *WideTy = Distance->getType(); - return getZeroExtendExpr(getTruncateExpr(ModuloResult, NarrowTy), WideTy); + const SCEV *Limit = + getZeroExtendExpr(getTruncateExpr(ModuloResult, NarrowTy), WideTy); + return ExitLimit(Limit, Limit, P); } } @@ -7037,13 +7135,15 @@ ScalarEvolution::HowFarToZero(const SCEV *V, const Loop *L, bool ControlsExit) { if (ControlsExit && AddRec->hasNoSelfWrap()) { const SCEV *Exact = getUDivExpr(Distance, CountDown ? getNegativeSCEV(Step) : Step); - return ExitLimit(Exact, Exact); + return ExitLimit(Exact, Exact, P); } // Then, try to solve the above equation provided that Start is constant. - if (const SCEVConstant *StartC = dyn_cast(Start)) - return SolveLinEquationWithOverflow(StepC->getAPInt(), -StartC->getAPInt(), - *this); + if (const SCEVConstant *StartC = dyn_cast(Start)) { + const SCEV *E = SolveLinEquationWithOverflow( + StepC->getValue()->getValue(), -StartC->getValue()->getValue(), *this); + return ExitLimit(E, E, P); + } return getCouldNotCompute(); } @@ -8486,12 +8586,18 @@ const SCEV *ScalarEvolution::computeBECount(const SCEV *Delta, const SCEV *Step, ScalarEvolution::ExitLimit ScalarEvolution::HowManyLessThans(const SCEV *LHS, const SCEV *RHS, const Loop *L, bool IsSigned, - bool ControlsExit) { + bool ControlsExit, bool AllowPredicates) { + SCEVUnionPredicate P; // We handle only IV < Invariant if (!isLoopInvariant(RHS, L)) return getCouldNotCompute(); const SCEVAddRecExpr *IV = dyn_cast(LHS); + if (!IV && AllowPredicates) + // Try to make this an AddRec using runtime tests, in the first X + // iterations of this loop, where X is the SCEV expression found by the + // algorithm below. + IV = convertSCEVToAddRecWithPredicates(LHS, L, P); // Avoid weird loops if (!IV || IV->getLoop() != L || !IV->isAffine()) @@ -8560,18 +8666,24 @@ ScalarEvolution::HowManyLessThans(const SCEV *LHS, const SCEV *RHS, if (isa(MaxBECount)) MaxBECount = BECount; - return ExitLimit(BECount, MaxBECount); + return ExitLimit(BECount, MaxBECount, P); } ScalarEvolution::ExitLimit ScalarEvolution::HowManyGreaterThans(const SCEV *LHS, const SCEV *RHS, const Loop *L, bool IsSigned, - bool ControlsExit) { + bool ControlsExit, bool AllowPredicates) { + SCEVUnionPredicate P; // We handle only IV > Invariant if (!isLoopInvariant(RHS, L)) return getCouldNotCompute(); const SCEVAddRecExpr *IV = dyn_cast(LHS); + if (!IV && AllowPredicates) + // Try to make this an AddRec using runtime tests, in the first X + // iterations of this loop, where X is the SCEV expression found by the + // algorithm below. + IV = convertSCEVToAddRecWithPredicates(LHS, L, P); // Avoid weird loops if (!IV || IV->getLoop() != L || !IV->isAffine()) @@ -8642,7 +8754,7 @@ ScalarEvolution::HowManyGreaterThans(const SCEV *LHS, const SCEV *RHS, if (isa(MaxBECount)) MaxBECount = BECount; - return ExitLimit(BECount, MaxBECount); + return ExitLimit(BECount, MaxBECount, P); } /// getNumIterationsInRange - Return the number of iterations of this loop that @@ -9346,6 +9458,8 @@ ScalarEvolution::ScalarEvolution(ScalarEvolution &&Arg) ValueExprMap(std::move(Arg.ValueExprMap)), WalkingBEDominatingConds(false), ProvingSplitPredicate(false), BackedgeTakenCounts(std::move(Arg.BackedgeTakenCounts)), + PredicatedBackedgeTakenCounts( + std::move(Arg.PredicatedBackedgeTakenCounts)), ConstantEvolutionLoopExitValue( std::move(Arg.ConstantEvolutionLoopExitValue)), ValuesAtScopes(std::move(Arg.ValuesAtScopes)), @@ -9378,6 +9492,8 @@ ScalarEvolution::~ScalarEvolution() { // that a loop had multiple computable exits. for (auto &BTCI : BackedgeTakenCounts) BTCI.second.clear(); + for (auto &BTCI : PredicatedBackedgeTakenCounts) + BTCI.second.clear(); assert(PendingLoopPredicates.empty() && "isImpliedCond garbage"); assert(!WalkingBEDominatingConds && "isLoopBackedgeGuardedByCond garbage!"); @@ -9420,6 +9536,20 @@ static void PrintLoopInfo(raw_ostream &OS, ScalarEvolution *SE, OS << "Unpredictable max backedge-taken count. "; } + OS << "\n" + "Loop "; + L->getHeader()->printAsOperand(OS, /*PrintType=*/false); + OS << ": "; + + SCEVUnionPredicate Pred; + auto PBT = SE->getPredicatedBackedgeTakenCount(L, Pred); + if (!isa(PBT)) { + OS << "Predicated backedge-taken count is " << *PBT << "\n"; + OS << " Predicates:\n"; + Pred.print(OS, 4); + } else { + OS << "Unpredictable predicated backedge-taken count. "; + } OS << "\n"; } @@ -9704,16 +9834,20 @@ void ScalarEvolution::forgetMemoizedResults(const SCEV *S) { ExprValueMap.erase(S); HasRecMap.erase(S); - for (DenseMap::iterator I = - BackedgeTakenCounts.begin(), E = BackedgeTakenCounts.end(); I != E; ) { - BackedgeTakenInfo &BEInfo = I->second; - if (BEInfo.hasOperand(S, this)) { - BEInfo.clear(); - BackedgeTakenCounts.erase(I++); - } - else - ++I; - } + auto RemoveSCEVFromBackedgeMap = + [S, this](DenseMap &Map) { + for (auto I = Map.begin(), E = Map.end(); I != E;) { + BackedgeTakenInfo &BEInfo = I->second; + if (BEInfo.hasOperand(S, this)) { + BEInfo.clear(); + Map.erase(I++); + } else + ++I; + } + }; + + RemoveSCEVFromBackedgeMap(BackedgeTakenCounts); + RemoveSCEVFromBackedgeMap(PredicatedBackedgeTakenCounts); } typedef DenseMap VerifyMap; @@ -10128,7 +10262,7 @@ void SCEVUnionPredicate::add(const SCEVPredicate *N) { PredicatedScalarEvolution::PredicatedScalarEvolution(ScalarEvolution &SE, Loop &L) - : SE(SE), L(L), Generation(0) {} + : SE(SE), L(L), Generation(0), BackedgeCount(nullptr) {} const SCEV *PredicatedScalarEvolution::getSCEV(Value *V) { const SCEV *Expr = SE.getSCEV(V); @@ -10149,6 +10283,15 @@ const SCEV *PredicatedScalarEvolution::getSCEV(Value *V) { return NewSCEV; } +const SCEV *PredicatedScalarEvolution::getBackedgeTakenCount() { + if (!BackedgeCount) { + SCEVUnionPredicate BackedgePred; + BackedgeCount = SE.getPredicatedBackedgeTakenCount(&L, BackedgePred); + addPredicate(BackedgePred); + } + return BackedgeCount; +} + void PredicatedScalarEvolution::addPredicate(const SCEVPredicate &Pred) { if (Preds.implies(&Pred)) return; @@ -10214,10 +10357,10 @@ const SCEVAddRecExpr *PredicatedScalarEvolution::getAsAddRec(Value *V) { return New; } -PredicatedScalarEvolution:: -PredicatedScalarEvolution(const PredicatedScalarEvolution &Init) : - RewriteMap(Init.RewriteMap), SE(Init.SE), L(Init.L), Preds(Init.Preds), - Generation(Init.Generation) { +PredicatedScalarEvolution::PredicatedScalarEvolution( + const PredicatedScalarEvolution &Init) + : RewriteMap(Init.RewriteMap), SE(Init.SE), L(Init.L), Preds(Init.Preds), + Generation(Init.Generation), BackedgeCount(Init.BackedgeCount) { for (auto I = Init.FlagsMap.begin(), E = Init.FlagsMap.end(); I != E; ++I) FlagsMap.insert(*I); } diff --git a/llvm/lib/Analysis/ScalarEvolutionExpander.cpp b/llvm/lib/Analysis/ScalarEvolutionExpander.cpp index 4db3c7f..d9d2a8a 100644 --- a/llvm/lib/Analysis/ScalarEvolutionExpander.cpp +++ b/llvm/lib/Analysis/ScalarEvolutionExpander.cpp @@ -2004,7 +2004,9 @@ Value *SCEVExpander::generateOverflowCheck(const SCEVAddRecExpr *AR, assert(AR->isAffine() && "Cannot generate RT check for " "non-affine expression"); - const SCEV *ExitCount = SE.getBackedgeTakenCount(AR->getLoop()); + SCEVUnionPredicate Pred; + const SCEV *ExitCount = + SE.getPredicatedBackedgeTakenCount(AR->getLoop(), Pred); const SCEV *Step = AR->getStepRecurrence(SE); const SCEV *Start = AR->getStart(); diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 201e9e9..61d9ace 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -2778,7 +2778,7 @@ Value *InnerLoopVectorizer::getOrCreateTripCount(Loop *L) { IRBuilder<> Builder(L->getLoopPreheader()->getTerminator()); // Find the loop boundaries. ScalarEvolution *SE = PSE.getSE(); - const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(OrigLoop); + const SCEV *BackedgeTakenCount = PSE.getBackedgeTakenCount(); assert(BackedgeTakenCount != SE->getCouldNotCompute() && "Invalid loop count"); @@ -4425,7 +4425,7 @@ bool LoopVectorizationLegality::canVectorize() { } // ScalarEvolution needs to be able to find the exit count. - const SCEV *ExitCount = PSE.getSE()->getBackedgeTakenCount(TheLoop); + const SCEV *ExitCount = PSE.getBackedgeTakenCount(); if (ExitCount == PSE.getSE()->getCouldNotCompute()) { emitAnalysis(VectorizationReport() << "could not determine number of loop iterations"); diff --git a/llvm/test/Analysis/ScalarEvolution/predicated-trip-count.ll b/llvm/test/Analysis/ScalarEvolution/predicated-trip-count.ll new file mode 100644 index 0000000..2db0a8b --- /dev/null +++ b/llvm/test/Analysis/ScalarEvolution/predicated-trip-count.ll @@ -0,0 +1,109 @@ +; RUN: opt < %s -analyze -scalar-evolution | FileCheck %s + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +@A = weak global [1000 x i32] zeroinitializer, align 32 + +; The resulting predicate is i16 {0,+,1} , meanining +; that the resulting backedge expression will be valid for: +; (1 + (-1 smax %M)) <= MAX_INT16 +; +; At the limit condition for M (MAX_INT16 - 1) we have in the +; last iteration: +; i0 <- MAX_INT16 +; i0.ext <- MAX_INT16 +; +; and therefore no wrapping happend for i0 or i0.ext +; throughout the execution of the loop. The resulting predicated +; backedge taken count is correct. + +; CHECK: Classifying expressions for: @test1 +; CHECK: %i.0.ext = sext i16 %i.0 to i32 +; CHECK-NEXT: --> (sext i16 {0,+,1}<%bb3> to i32) +; CHECK: Loop %bb3: Unpredictable backedge-taken count. +; CHECK-NEXT: Loop %bb3: Unpredictable max backedge-taken count. +; CHECK-NEXT: Loop %bb3: Predicated backedge-taken count is (1 + (-1 smax %M)) +; CHECK-NEXT: Predicates: +; CHECK-NEXT: {0,+,1}<%bb3> Added Flags: +define void @test1(i32 %N, i32 %M) { +entry: + br label %bb3 + +bb: ; preds = %bb3 + %tmp = getelementptr [1000 x i32], [1000 x i32]* @A, i32 0, i16 %i.0 ; [#uses=1] + store i32 123, i32* %tmp + %tmp2 = add i16 %i.0, 1 ; [#uses=1] + br label %bb3 + +bb3: ; preds = %bb, %entry + %i.0 = phi i16 [ 0, %entry ], [ %tmp2, %bb ] ; [#uses=3] + %i.0.ext = sext i16 %i.0 to i32 + %tmp3 = icmp sle i32 %i.0.ext, %M ; [#uses=1] + br i1 %tmp3, label %bb, label %bb5 + +bb5: ; preds = %bb3 + br label %return + +return: ; preds = %bb5 + ret void +} + +; The predicated backedge taken count is: +; (2 + (zext i16 %Start to i32) + ((-2 + (-1 * (sext i16 %Start to i32))) +; smax (-1 + (-1 * %M))) +; ) + +; -1 + (-1 * %M) <= (-2 + (-1 * (sext i16 %Start to i32)) +; The predicated backedge taken count is 0. +; From the IR, this is correct since we will bail out at the +; first iteration. + + +; * -1 + (-1 * %M) > (-2 + (-1 * (sext i16 %Start to i32)) +; or: %M < 1 + (sext i16 %Start to i32) +; +; The predicated backedge taken count is 1 + (zext i16 %Start to i32) - %M +; +; If %M >= MIN_INT + 1, this predicated backedge taken count would be correct (even +; without predicates). However, for %M < MIN_INT this would be an infinite loop. +; In these cases, the {%Start,+,-1} predicate would be false, as the +; final value of the expression {%Start,+,-1} expression (%M - 1) would not be +; representable as an i16. + +; There is also a limit case here where the value of %M is MIN_INT. In this case +; we still have an infinite loop, since icmp sge %x, MIN_INT will always return +; true. + +; CHECK: Classifying expressions for: @test2 + +; CHECK: %i.0.ext = sext i16 %i.0 to i32 +; CHECK-NEXT: --> (sext i16 {%Start,+,-1}<%bb3> to i32) +; CHECK: Loop %bb3: Unpredictable backedge-taken count. +; CHECK-NEXT: Loop %bb3: Unpredictable max backedge-taken count. +; CHECK-NEXT: Loop %bb3: Predicated backedge-taken count is (2 + (sext i16 %Start to i32) + ((-2 + (-1 * (sext i16 %Start to i32))) smax (-1 + (-1 * %M)))) +; CHECK-NEXT: Predicates: +; CHECK-NEXT: {%Start,+,-1}<%bb3> Added Flags: + +define void @test2(i32 %N, i32 %M, i16 %Start) { +entry: + br label %bb3 + +bb: ; preds = %bb3 + %tmp = getelementptr [1000 x i32], [1000 x i32]* @A, i32 0, i16 %i.0 ; [#uses=1] + store i32 123, i32* %tmp + %tmp2 = sub i16 %i.0, 1 ; [#uses=1] + br label %bb3 + +bb3: ; preds = %bb, %entry + %i.0 = phi i16 [ %Start, %entry ], [ %tmp2, %bb ] ; [#uses=3] + %i.0.ext = sext i16 %i.0 to i32 + %tmp3 = icmp sge i32 %i.0.ext, %M ; [#uses=1] + br i1 %tmp3, label %bb, label %bb5 + +bb5: ; preds = %bb3 + br label %return + +return: ; preds = %bb5 + ret void +} + diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/backedge-overflow.ll b/llvm/test/Transforms/LoopVectorize/AArch64/backedge-overflow.ll new file mode 100644 index 0000000..aba47f6 --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/AArch64/backedge-overflow.ll @@ -0,0 +1,166 @@ +; RUN: opt -mtriple=aarch64--linux-gnueabi -loop-vectorize -force-vector-width=4 -force-vector-interleave=1 < %s -S | FileCheck %s + +; The following tests contain loops for which SCEV cannot determine the backedge +; taken count. This is because the backedge taken condition is produced by an +; icmp with one of the sides being a loop varying non-AddRec expression. +; However, there is a possibility to normalize this to an AddRec expression +; using SCEV predicates. This allows us to compute a 'guarded' backedge count. +; The Loop Vectorizer is able to version to loop in order to use this guarded +; backedge count and vectorize more loops. + + +; CHECK-LABEL: test_sge +; CHECK-LABEL: vector.scevcheck +; CHECK-LABEL: vector.body +define void @test_sge(i32* noalias %A, + i32* noalias %B, + i32* noalias %C, i32 %N) { +entry: + %cmp13 = icmp eq i32 %N, 0 + br i1 %cmp13, label %for.end, label %for.body.preheader + +for.body.preheader: + br label %for.body + +for.body: + %indvars.iv = phi i16 [ %indvars.next, %for.body ], [ 0, %for.body.preheader ] + %indvars.next = add i16 %indvars.iv, 1 + %indvars.ext = zext i16 %indvars.iv to i32 + + %arrayidx = getelementptr inbounds i32, i32* %B, i32 %indvars.ext + %0 = load i32, i32* %arrayidx, align 4 + %arrayidx3 = getelementptr inbounds i32, i32* %C, i32 %indvars.ext + %1 = load i32, i32* %arrayidx3, align 4 + + %mul4 = mul i32 %1, %0 + + %arrayidx7 = getelementptr inbounds i32, i32* %A, i32 %indvars.ext + store i32 %mul4, i32* %arrayidx7, align 4 + + %exitcond = icmp sge i32 %indvars.ext, %N + br i1 %exitcond, label %for.end.loopexit, label %for.body + +for.end.loopexit: + br label %for.end + +for.end: + ret void +} + +; CHECK-LABEL: test_uge +; CHECK-LABEL: vector.scevcheck +; CHECK-LABEL: vector.body +define void @test_uge(i32* noalias %A, + i32* noalias %B, + i32* noalias %C, i32 %N, i32 %Offset) { +entry: + %cmp13 = icmp eq i32 %N, 0 + br i1 %cmp13, label %for.end, label %for.body.preheader + +for.body.preheader: + br label %for.body + +for.body: + %indvars.iv = phi i16 [ %indvars.next, %for.body ], [ 0, %for.body.preheader ] + %indvars.next = add i16 %indvars.iv, 1 + + %indvars.ext = sext i16 %indvars.iv to i32 + %indvars.access = add i32 %Offset, %indvars.ext + + %arrayidx = getelementptr inbounds i32, i32* %B, i32 %indvars.access + %0 = load i32, i32* %arrayidx, align 4 + %arrayidx3 = getelementptr inbounds i32, i32* %C, i32 %indvars.access + %1 = load i32, i32* %arrayidx3, align 4 + + %mul4 = add i32 %1, %0 + + %arrayidx7 = getelementptr inbounds i32, i32* %A, i32 %indvars.access + store i32 %mul4, i32* %arrayidx7, align 4 + + %exitcond = icmp uge i32 %indvars.ext, %N + br i1 %exitcond, label %for.end.loopexit, label %for.body + +for.end.loopexit: + br label %for.end + +for.end: + ret void +} + +; CHECK-LABEL: test_ule +; CHECK-LABEL: vector.scevcheck +; CHECK-LABEL: vector.body +define void @test_ule(i32* noalias %A, + i32* noalias %B, + i32* noalias %C, i32 %N, + i16 %M) { +entry: + %cmp13 = icmp eq i32 %N, 0 + br i1 %cmp13, label %for.end, label %for.body.preheader + +for.body.preheader: + br label %for.body + +for.body: + %indvars.iv = phi i16 [ %indvars.next, %for.body ], [ %M, %for.body.preheader ] + %indvars.next = sub i16 %indvars.iv, 1 + %indvars.ext = zext i16 %indvars.iv to i32 + + %arrayidx = getelementptr inbounds i32, i32* %B, i32 %indvars.ext + %0 = load i32, i32* %arrayidx, align 4 + %arrayidx3 = getelementptr inbounds i32, i32* %C, i32 %indvars.ext + %1 = load i32, i32* %arrayidx3, align 4 + + %mul4 = mul i32 %1, %0 + + %arrayidx7 = getelementptr inbounds i32, i32* %A, i32 %indvars.ext + store i32 %mul4, i32* %arrayidx7, align 4 + + %exitcond = icmp ule i32 %indvars.ext, %N + br i1 %exitcond, label %for.end.loopexit, label %for.body + +for.end.loopexit: + br label %for.end + +for.end: + ret void +} + +; CHECK-LABEL: test_sle +; CHECK-LABEL: vector.scevcheck +; CHECK-LABEL: vector.body +define void @test_sle(i32* noalias %A, + i32* noalias %B, + i32* noalias %C, i32 %N, + i16 %M) { +entry: + %cmp13 = icmp eq i32 %N, 0 + br i1 %cmp13, label %for.end, label %for.body.preheader + +for.body.preheader: + br label %for.body + +for.body: + %indvars.iv = phi i16 [ %indvars.next, %for.body ], [ %M, %for.body.preheader ] + %indvars.next = sub i16 %indvars.iv, 1 + %indvars.ext = sext i16 %indvars.iv to i32 + + %arrayidx = getelementptr inbounds i32, i32* %B, i32 %indvars.ext + %0 = load i32, i32* %arrayidx, align 4 + %arrayidx3 = getelementptr inbounds i32, i32* %C, i32 %indvars.ext + %1 = load i32, i32* %arrayidx3, align 4 + + %mul4 = mul i32 %1, %0 + + %arrayidx7 = getelementptr inbounds i32, i32* %A, i32 %indvars.ext + store i32 %mul4, i32* %arrayidx7, align 4 + + %exitcond = icmp sle i32 %indvars.ext, %N + br i1 %exitcond, label %for.end.loopexit, label %for.body + +for.end.loopexit: + br label %for.end + +for.end: + ret void +} diff --git a/llvm/test/Transforms/LoopVectorize/X86/vectorization-remarks-missed.ll b/llvm/test/Transforms/LoopVectorize/X86/vectorization-remarks-missed.ll index 42ec3b3..740ff36 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/vectorization-remarks-missed.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/vectorization-remarks-missed.ll @@ -54,8 +54,9 @@ for.body: ; preds = %entry, %for.body %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv, !dbg !16 %0 = trunc i64 %indvars.iv to i32, !dbg !16 + %ld = load i32, i32* %arrayidx, align 4 store i32 %0, i32* %arrayidx, align 4, !dbg !16, !tbaa !18 - %cmp3 = icmp sle i32 %0, %Length, !dbg !22 + %cmp3 = icmp sle i32 %ld, %Length, !dbg !22 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1, !dbg !12 %1 = trunc i64 %indvars.iv.next to i32 %cmp = icmp slt i32 %1, %Length, !dbg !12