[SCEV] Introduce a guarded backedge taken count and use it in LAA and LV

author Silviu Baranga <silviu.baranga@arm.com>

Wed, 6 Apr 2016 13:18:26 +0000 (13:18 +0000)

committer Silviu Baranga <silviu.baranga@arm.com>

Wed, 6 Apr 2016 13:18:26 +0000 (13:18 +0000)
author Silviu Baranga <silviu.baranga@arm.com>
Wed, 6 Apr 2016 13:18:26 +0000 (13:18 +0000)
committer Silviu Baranga <silviu.baranga@arm.com>
Wed, 6 Apr 2016 13:18:26 +0000 (13:18 +0000)
diff --git a/llvm/include/llvm/Analysis/ScalarEvolution.h b/llvm/include/llvm/Analysis/ScalarEvolution.h

index 3a84e77..f102f2e 100644 (file)
--- a/llvm/include/llvm/Analysis/ScalarEvolution.h
+++ b/llvm/include/llvm/Analysis/ScalarEvolution.h
@@ -270,10 +270,17 @@ namespace llvm {
      }
    };
  
-  /// SCEVWrapPredicate - This class represents an assumption
-  /// made on an AddRec expression. Given an affine AddRec expression
-  /// {a,+,b}, we assume that it has the nssw or nusw flags (defined
-  /// below).
+  /// SCEVWrapPredicate - This class represents an assumption made on an AddRec
+  /// expression. Given an affine AddRec expression {a,+,b}, we assume that it
+  /// has the nssw or nusw flags (defined below) in the first X iterations of
+  /// the loop, where X is a SCEV expression returned by
+  /// getPredicatedBackedgeTakenCount).
+  ///
+  /// Note that this does not imply that X is equal to the backedge taken
+  /// count. This means that if we have a nusw predicate for i32 {0,+,1} with a
+  /// predicated backedge taken count of X, we only guarantee that {0,+,1} has
+  /// nusw in the first X iterations. {0,+,1} may still wrap in the loop if we
+  /// have more than X iterations.
    class SCEVWrapPredicate final : public SCEVPredicate {
    public:
      /// Similar to SCEV::NoWrapFlags, but with slightly different semantics
@@ -520,9 +527,14 @@ namespace llvm {
        const SCEV *Exact;
        const SCEV *Max;
  
+      /// A predicate union guard for this ExitLimit. The result is only
+      /// valid if this predicate evaluates to 'true' at run-time.
+      SCEVUnionPredicate Pred;
+
        /*implicit*/ ExitLimit(const SCEV *E) : Exact(E), Max(E) {}
  
-      ExitLimit(const SCEV *E, const SCEV *M) : Exact(E), Max(M) {
+      ExitLimit(const SCEV *E, const SCEV *M, SCEVUnionPredicate &P)
+          : Exact(E), Max(M), Pred(P) {
          assert((isa<SCEVCouldNotCompute>(Exact) ||
                  !isa<SCEVCouldNotCompute>(Max)) &&
                 "Exact is not allowed to be less precise than Max");
@@ -534,30 +546,146 @@ namespace llvm {
          return !isa<SCEVCouldNotCompute>(Exact) ||
            !isa<SCEVCouldNotCompute>(Max);
        }
+
+      /// Test whether this ExitLimit contains all information.
+      bool hasFullInfo() const { return !isa<SCEVCouldNotCompute>(Exact); }
      };
  
+    /// Forward declaration of ExitNotTakenExtras
+    struct ExitNotTakenExtras;
+
      /// Information about the number of times a particular loop exit may be
      /// reached before exiting the loop.
      struct ExitNotTakenInfo {
        AssertingVH<BasicBlock> ExitingBlock;
        const SCEV *ExactNotTaken;
-      PointerIntPair<ExitNotTakenInfo*, 1> NextExit;
+
+      PointerIntPair<ExitNotTakenExtras *, 1> ExtraInfo;
  
        ExitNotTakenInfo() : ExitingBlock(nullptr), ExactNotTaken(nullptr) {}
+      ExitNotTakenInfo(BasicBlock *ExitBlock, const SCEV *Expr,
+                       ExitNotTakenExtras *Ptr)
+          : ExitingBlock(ExitBlock), ExactNotTaken(Expr) {
+        ExtraInfo.setPointer(Ptr);
+      }
  
        /// Return true if all loop exits are computable.
-      bool isCompleteList() const {
-        return NextExit.getInt() == 0;
+      bool isCompleteList() const { return ExtraInfo.getInt() == 0; }
+
+      /// Sets the incomplete property, indicating that one of the loop exits
+      /// doesn't have a corresponding ExitNotTakenInfo entry.
+      void setIncomplete() { ExtraInfo.setInt(1); }
+
+      /// Returns a pointer to the predicate associated with this information,
+      /// or nullptr if this doesn't exist (meaning always true).
+      SCEVUnionPredicate *getPred() const {
+        if (auto *Info = ExtraInfo.getPointer())
+          return &Info->Pred;
+
+        return nullptr;
        }
  
-      void setIncomplete() { NextExit.setInt(1); }
+      /// Return true if the SCEV predicate associated with this information
+      /// is always true.
+      bool hasAlwaysTruePred() const {
+        return !getPred() || getPred()->isAlwaysTrue();
+      }
  
-      /// Return a pointer to the next exit's not-taken info.
-      ExitNotTakenInfo *getNextExit() const {
-        return NextExit.getPointer();
+      /// Defines a simple forward iterator for ExitNotTakenInfo.
+      class ExitNotTakenInfoIterator
+          : public std::iterator<std::forward_iterator_tag, ExitNotTakenInfo> {
+        const ExitNotTakenInfo *Start;
+        unsigned Position;
+
+      public:
+        ExitNotTakenInfoIterator(const ExitNotTakenInfo *Start,
+                                 unsigned Position)
+            : Start(Start), Position(Position) {}
+
+        const ExitNotTakenInfo &operator*() const {
+          if (Position == 0)
+            return *Start;
+
+          return Start->ExtraInfo.getPointer()->Exits[Position - 1];
+        }
+
+        const ExitNotTakenInfo *operator->() const {
+          if (Position == 0)
+            return Start;
+
+          return &Start->ExtraInfo.getPointer()->Exits[Position - 1];
+        }
+
+        bool operator==(const ExitNotTakenInfoIterator &RHS) const {
+          return Start == RHS.Start && Position == RHS.Position;
+        }
+
+        bool operator!=(const ExitNotTakenInfoIterator &RHS) const {
+          return Start != RHS.Start || Position != RHS.Position;
+        }
+
+        ExitNotTakenInfoIterator &operator++() { // Preincrement
+          if (!Start)
+            return *this;
+
+          unsigned Elements =
+              Start->ExtraInfo.getPointer()
+                  ? Start->ExtraInfo.getPointer()->Exits.size() + 1
+                  : 1;
+
+          ++Position;
+
+          // We've run out of elements.
+          if (Position == Elements) {
+            Start = nullptr;
+            Position = 0;
+          }
+
+          return *this;
+        }
+        ExitNotTakenInfoIterator operator++(int) { // Postincrement
+          ExitNotTakenInfoIterator Tmp = *this;
+          ++*this;
+          return Tmp;
+        }
+      };
+
+      /// Iterators
+      ExitNotTakenInfoIterator begin() const {
+        return ExitNotTakenInfoIterator(this, 0);
+      }
+      ExitNotTakenInfoIterator end() const {
+        return ExitNotTakenInfoIterator(nullptr, 0);
        }
+    };
  
-      void setNextExit(ExitNotTakenInfo *ENT) { NextExit.setPointer(ENT); }
+    /// Describes the extra information that a ExitNotTakenInfo can have.
+    struct ExitNotTakenExtras {
+      /// The predicate associated with the ExitNotTakenInfo struct.
+      SCEVUnionPredicate Pred;
+
+      /// The extra exits in the loop. Only the ExitNotTakenExtras structure
+      /// pointed to by the first ExitNotTakenInfo struct (associated with the
+      /// first loop exit) will populate this vector to prevent having
+      /// redundant information.
+      SmallVector<ExitNotTakenInfo, 4> Exits;
+    };
+
+    /// A struct containing the information attached to a backedge.
+    struct EdgeInfo {
+      EdgeInfo(BasicBlock *Block, const SCEV *Taken, SCEVUnionPredicate &P) :
+          ExitBlock(Block), Taken(Taken), Pred(std::move(P)) {}
+
+      /// The exit basic block.
+      BasicBlock *ExitBlock;
+
+      /// The (exact) number of time we take the edge back.
+      const SCEV *Taken;
+
+      /// The SCEV predicated associated with Taken. If Pred doesn't evaluate
+      /// to true, the information in Taken is not valid (or equivalent with
+      /// a CouldNotCompute.
+      SCEVUnionPredicate Pred;
      };
  
      /// Information about the backedge-taken count of a loop. This currently
@@ -569,16 +697,16 @@ namespace llvm {
        ExitNotTakenInfo ExitNotTaken;
  
        /// An expression indicating the least maximum backedge-taken count of the
-      /// loop that is known, or a SCEVCouldNotCompute.
+      /// loop that is known, or a SCEVCouldNotCompute. This expression is only
+      /// valid if the predicates associated with all loop exits are true.
        const SCEV *Max;
  
      public:
        BackedgeTakenInfo() : Max(nullptr) {}
  
        /// Initialize BackedgeTakenInfo from a list of exact exit counts.
-      BackedgeTakenInfo(
-        SmallVectorImpl< std::pair<BasicBlock *, const SCEV *> > &ExitCounts,
-        bool Complete, const SCEV *MaxCount);
+      BackedgeTakenInfo(SmallVectorImpl<EdgeInfo> &ExitCounts, bool Complete,
+                        const SCEV *MaxCount);
  
        /// Test whether this BackedgeTakenInfo contains any computed information,
        /// or whether it's all SCEVCouldNotCompute values.
@@ -586,11 +714,27 @@ namespace llvm {
          return ExitNotTaken.ExitingBlock || !isa<SCEVCouldNotCompute>(Max);
        }
  
+      /// Test whether this BackedgeTakenInfo contains complete information.
+      bool hasFullInfo() const { return ExitNotTaken.isCompleteList(); }
+
        /// Return an expression indicating the exact backedge-taken count of the
-      /// loop if it is known, or SCEVCouldNotCompute otherwise. This is the
+      /// loop if it is known or SCEVCouldNotCompute otherwise. This is the
        /// number of times the loop header can be guaranteed to execute, minus
        /// one.
-      const SCEV *getExact(ScalarEvolution *SE) const;
+      ///
+      /// If the SCEV predicate associated with the answer can be different
+      /// from AlwaysTrue, we must add a (non null) Predicates argument.
+      /// The SCEV predicate associated with the answer will be added to
+      /// Predicates. A run-time check needs to be emitted for the SCEV
+      /// predicate in order for the answer to be valid.
+      ///
+      /// Note that we should always know if we need to pass a predicate
+      /// argument or not from the way the ExitCounts vector was computed.
+      /// If we allowed SCEV predicates to be generated when populating this
+      /// vector, this information can contain them and therefore a
+      /// SCEVPredicate argument should be added to getExact.
+      const SCEV *getExact(ScalarEvolution *SE,
+                           SCEVUnionPredicate *Predicates = nullptr) const;
  
        /// Return the number of times this loop exit may fall through to the back
        /// edge, or SCEVCouldNotCompute. The loop is guaranteed not to exit via
@@ -611,7 +755,11 @@ namespace llvm {
  
      /// Cache the backedge-taken count of the loops for this function as they
      /// are computed.
-    DenseMap<const Loop*, BackedgeTakenInfo> BackedgeTakenCounts;
+    DenseMap<const Loop *, BackedgeTakenInfo> BackedgeTakenCounts;
+
+    /// Cache the predicated backedge-taken count of the loops for this
+    /// function as they are computed.
+    DenseMap<const Loop *, BackedgeTakenInfo> PredicatedBackedgeTakenCounts;
  
      /// This map contains entries for all of the PHI instructions that we
      /// attempt to compute constant evolutions for.  This allows us to avoid
@@ -713,33 +861,49 @@ namespace llvm {
      void forgetSymbolicName(Instruction *I, const SCEV *SymName);
  
      /// Return the BackedgeTakenInfo for the given loop, lazily computing new
-    /// values if the loop hasn't been analyzed yet.
+    /// values if the loop hasn't been analyzed yet. The returned result is
+    /// guaranteed not to be predicated.
      const BackedgeTakenInfo &getBackedgeTakenInfo(const Loop *L);
  
+    /// Similar to getBackedgeTakenInfo, but will add predicates as required
+    /// with the purpose of returning complete information.
+    const BackedgeTakenInfo &getPredicatedBackedgeTakenInfo(const Loop *L);
+
      /// Compute the number of times the specified loop will iterate.
-    BackedgeTakenInfo computeBackedgeTakenCount(const Loop *L);
+    /// If AllowPredicates is set, we will create new SCEV predicates as
+    /// necessary in order to return an exact answer.
+    BackedgeTakenInfo computeBackedgeTakenCount(const Loop *L,
+                                                bool AllowPredicates = false);
  
      /// Compute the number of times the backedge of the specified loop will
-    /// execute if it exits via the specified block.
-    ExitLimit computeExitLimit(const Loop *L, BasicBlock *ExitingBlock);
+    /// execute if it exits via the specified block. If AllowPredicates is set,
+    /// this call will try to use a minimal set of SCEV predicates in order to
+    /// return an exact answer.
+    ExitLimit computeExitLimit(const Loop *L, BasicBlock *ExitingBlock,
+                               bool AllowPredicates = false);
  
      /// Compute the number of times the backedge of the specified loop will
      /// execute if its exit condition were a conditional branch of ExitCond,
-    /// TBB, and FBB.
+    /// TBB, and FBB. If AllowPredicates is set, this call will try to use a
+    /// minimal set of SCEV predicates in order to return an exact answer.
      ExitLimit computeExitLimitFromCond(const Loop *L,
                                         Value *ExitCond,
                                         BasicBlock *TBB,
                                         BasicBlock *FBB,
-                                       bool IsSubExpr);
+                                       bool IsSubExpr,
+                                       bool AllowPredicates = false);
  
      /// Compute the number of times the backedge of the specified loop will
      /// execute if its exit condition were a conditional branch of the ICmpInst
-    /// ExitCond, TBB, and FBB.
+    /// ExitCond, TBB, and FBB. If AllowPredicates is set, this call will try
+    /// to use a minimal set of SCEV predicates in order to return an exact
+    /// answer.
      ExitLimit computeExitLimitFromICmp(const Loop *L,
                                         ICmpInst *ExitCond,
                                         BasicBlock *TBB,
                                         BasicBlock *FBB,
-                                       bool IsSubExpr);
+                                       bool IsSubExpr,
+                                       bool AllowPredicates = false);
  
      /// Compute the number of times the backedge of the specified loop will
      /// execute if its exit condition were a switch with a single exiting case
@@ -777,7 +941,10 @@ namespace llvm {
  
      /// Return the number of times an exit condition comparing the specified
      /// value to zero will execute.  If not computable, return CouldNotCompute.
-    ExitLimit HowFarToZero(const SCEV *V, const Loop *L, bool IsSubExpr);
+    /// If AllowPredicates is set, this call will try to use a minimal set of
+    /// SCEV predicates in order to return an exact answer.
+    ExitLimit HowFarToZero(const SCEV *V, const Loop *L, bool IsSubExpr,
+                           bool AllowPredicates = false);
  
      /// Return the number of times an exit condition checking the specified
      /// value for nonzero will execute.  If not computable, return
@@ -787,10 +954,15 @@ namespace llvm {
      /// Return the number of times an exit condition containing the specified
      /// less-than comparison will execute.  If not computable, return
      /// CouldNotCompute. isSigned specifies whether the less-than is signed.
-    ExitLimit HowManyLessThans(const SCEV *LHS, const SCEV *RHS,
-                               const Loop *L, bool isSigned, bool IsSubExpr);
+    /// If AllowPredicates is set, this call will try to use a minimal set of
+    /// SCEV predicates in order to return an exact answer.
+    ExitLimit HowManyLessThans(const SCEV *LHS, const SCEV *RHS, const Loop *L,
+                               bool isSigned, bool IsSubExpr,
+                               bool AllowPredicates = false);
+
      ExitLimit HowManyGreaterThans(const SCEV *LHS, const SCEV *RHS,
-                                  const Loop *L, bool isSigned, bool IsSubExpr);
+                                  const Loop *L, bool isSigned, bool IsSubExpr,
+                                  bool AllowPredicates = false);
  
      /// Return a predecessor of BB (which may not be an immediate predecessor)
      /// which has exactly one successor from which BB is reachable, or null if
@@ -1168,6 +1340,13 @@ namespace llvm {
      ///
      const SCEV *getBackedgeTakenCount(const Loop *L);
  
+    /// Similar to getBackedgeTakenCount, except it will add a set of
+    /// SCEV predicates to Predicates that are required to be true in order for
+    /// the answer to be correct. Predicates can be checked with run-time
+    /// checks and can be used to perform loop versioning.
+    const SCEV *getPredicatedBackedgeTakenCount(const Loop *L,
+                                                SCEVUnionPredicate &Predicates);
+
      /// Similar to getBackedgeTakenCount, except return the least SCEV value
      /// that is known never to be less than the actual backedge taken count.
      const SCEV *getMaxBackedgeTakenCount(const Loop *L);
@@ -1493,6 +1672,8 @@ namespace llvm {
      /// by ScalarEvolution is guaranteed to be preserved, even when adding new
      /// predicates.
      const SCEV *getSCEV(Value *V);
+    /// Get the (predicated) backedge count for the analyzed loop.
+    const SCEV *getBackedgeTakenCount();
      /// \brief Adds a new predicate.
      void addPredicate(const SCEVPredicate &Pred);
      /// \brief Attempts to produce an AddRecExpr for V by adding additional
@@ -1536,6 +1717,8 @@ namespace llvm {
      /// figure out if the predicate has changed from the last rewrite of the
      /// SCEV. If so, we need to perform a new rewrite.
      unsigned Generation;
+    /// The backedge taken count.
+    const SCEV *BackedgeCount;
    };
  }
  
diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp

index c67c581..d1eac46 100644 (file)
--- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp
+++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
@@ -140,7 +140,7 @@ void RuntimePointerChecking::insert(Loop *Lp, Value *Ptr, bool WritePtr,
    else {
      const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Sc);
      assert(AR && "Invalid addrec expression");
-    const SCEV *Ex = SE->getBackedgeTakenCount(Lp);
+    const SCEV *Ex = PSE.getBackedgeTakenCount();
  
      ScStart = AR->getStart();
      ScEnd = AR->evaluateAtIteration(Ex, *SE);
@@ -1460,7 +1460,7 @@ bool LoopAccessInfo::canAnalyzeLoop() {
    }
  
    // ScalarEvolution needs to be able to find the exit count.
-  const SCEV *ExitCount = PSE.getSE()->getBackedgeTakenCount(TheLoop);
+  const SCEV *ExitCount = PSE.getBackedgeTakenCount();
    if (ExitCount == PSE.getSE()->getCouldNotCompute()) {
      emitAnalysis(LoopAccessReport()
                   << "could not determine number of loop iterations");
diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp

index 402a1b7..36b4348 100644 (file)
--- a/llvm/lib/Analysis/ScalarEvolution.cpp
+++ b/llvm/lib/Analysis/ScalarEvolution.cpp
@@ -5223,6 +5223,12 @@ const SCEV *ScalarEvolution::getExitCount(Loop *L, BasicBlock *ExitingBlock) {
    return getBackedgeTakenInfo(L).getExact(ExitingBlock, this);
  }
  
+const SCEV *
+ScalarEvolution::getPredicatedBackedgeTakenCount(const Loop *L,
+                                                 SCEVUnionPredicate &Preds) {
+  return getPredicatedBackedgeTakenInfo(L).getExact(this, &Preds);
+}
+
  /// getBackedgeTakenCount - If the specified loop has a predictable
  /// backedge-taken count, return it, otherwise return a SCEVCouldNotCompute
  /// object. The backedge-taken count is the number of times the loop header
@@ -5258,6 +5264,23 @@ PushLoopPHIs(const Loop *L, SmallVectorImpl<Instruction *> &Worklist) {
  }
  
  const ScalarEvolution::BackedgeTakenInfo &
+ScalarEvolution::getPredicatedBackedgeTakenInfo(const Loop *L) {
+  auto &BTI = getBackedgeTakenInfo(L);
+  if (BTI.hasFullInfo())
+    return BTI;
+
+  auto Pair = PredicatedBackedgeTakenCounts.insert({L, BackedgeTakenInfo()});
+
+  if (!Pair.second)
+    return Pair.first->second;
+
+  BackedgeTakenInfo Result =
+      computeBackedgeTakenCount(L, /*AllowPredicates=*/true);
+
+  return PredicatedBackedgeTakenCounts.find(L)->second = Result;
+}
+
+const ScalarEvolution::BackedgeTakenInfo &
  ScalarEvolution::getBackedgeTakenInfo(const Loop *L) {
    // Initially insert an invalid entry for this loop. If the insertion
    // succeeds, proceed to actually compute a backedge-taken count and
@@ -5337,12 +5360,17 @@ ScalarEvolution::getBackedgeTakenInfo(const Loop *L) {
  /// compute a trip count, or if the loop is deleted.
  void ScalarEvolution::forgetLoop(const Loop *L) {
    // Drop any stored trip count value.
-  DenseMap<const Loop*, BackedgeTakenInfo>::iterator BTCPos =
-    BackedgeTakenCounts.find(L);
-  if (BTCPos != BackedgeTakenCounts.end()) {
-    BTCPos->second.clear();
-    BackedgeTakenCounts.erase(BTCPos);
-  }
+  auto RemoveLoopFromBackedgeMap =
+      [L](DenseMap<const Loop *, BackedgeTakenInfo> &Map) {
+        auto BTCPos = Map.find(L);
+        if (BTCPos != Map.end()) {
+          BTCPos->second.clear();
+          Map.erase(BTCPos);
+        }
+      };
+
+  RemoveLoopFromBackedgeMap(BackedgeTakenCounts);
+  RemoveLoopFromBackedgeMap(PredicatedBackedgeTakenCounts);
  
    // Drop information about expressions based on loop-header PHIs.
    SmallVector<Instruction *, 16> Worklist;
@@ -5411,7 +5439,8 @@ void ScalarEvolution::forgetValue(Value *V) {
  /// is the caller's responsibility to specify the relevant loop exit using
  /// getExact(ExitingBlock, SE).
  const SCEV *
-ScalarEvolution::BackedgeTakenInfo::getExact(ScalarEvolution *SE) const {
+ScalarEvolution::BackedgeTakenInfo::getExact(
+    ScalarEvolution *SE, SCEVUnionPredicate *Preds) const {
    // If any exits were not computable, the loop is not computable.
    if (!ExitNotTaken.isCompleteList()) return SE->getCouldNotCompute();
  
@@ -5420,16 +5449,20 @@ ScalarEvolution::BackedgeTakenInfo::getExact(ScalarEvolution *SE) const {
    assert(ExitNotTaken.ExactNotTaken && "uninitialized not-taken info");
  
    const SCEV *BECount = nullptr;
-  for (const ExitNotTakenInfo *ENT = &ExitNotTaken;
-       ENT != nullptr; ENT = ENT->getNextExit()) {
-
-    assert(ENT->ExactNotTaken != SE->getCouldNotCompute() && "bad exit SCEV");
+  for (auto &ENT : ExitNotTaken) {
+    assert(ENT.ExactNotTaken != SE->getCouldNotCompute() && "bad exit SCEV");
  
      if (!BECount)
-      BECount = ENT->ExactNotTaken;
-    else if (BECount != ENT->ExactNotTaken)
+      BECount = ENT.ExactNotTaken;
+    else if (BECount != ENT.ExactNotTaken)
        return SE->getCouldNotCompute();
+    if (Preds && ENT.getPred())
+      Preds->add(ENT.getPred());
+
+    assert((Preds || ENT.hasAlwaysTruePred()) &&
+           "Predicate should be always true!");
    }
+
    assert(BECount && "Invalid not taken count for loop exit");
    return BECount;
  }
@@ -5438,18 +5471,20 @@ ScalarEvolution::BackedgeTakenInfo::getExact(ScalarEvolution *SE) const {
  const SCEV *
  ScalarEvolution::BackedgeTakenInfo::getExact(BasicBlock *ExitingBlock,
                                               ScalarEvolution *SE) const {
-  for (const ExitNotTakenInfo *ENT = &ExitNotTaken;
-       ENT != nullptr; ENT = ENT->getNextExit()) {
+  for (auto &ENT : ExitNotTaken)
+    if (ENT.ExitingBlock == ExitingBlock && ENT.hasAlwaysTruePred())
+      return ENT.ExactNotTaken;
  
-    if (ENT->ExitingBlock == ExitingBlock)
-      return ENT->ExactNotTaken;
-  }
    return SE->getCouldNotCompute();
  }
  
  /// getMax - Get the max backedge taken count for the loop.
  const SCEV *
  ScalarEvolution::BackedgeTakenInfo::getMax(ScalarEvolution *SE) const {
+  for (auto &ENT : ExitNotTaken)
+    if (!ENT.hasAlwaysTruePred())
+      return SE->getCouldNotCompute();
+
    return Max ? Max : SE->getCouldNotCompute();
  }
  
@@ -5461,22 +5496,19 @@ bool ScalarEvolution::BackedgeTakenInfo::hasOperand(const SCEV *S,
    if (!ExitNotTaken.ExitingBlock)
      return false;
  
-  for (const ExitNotTakenInfo *ENT = &ExitNotTaken;
-       ENT != nullptr; ENT = ENT->getNextExit()) {
-
-    if (ENT->ExactNotTaken != SE->getCouldNotCompute()
-        && SE->hasOperand(ENT->ExactNotTaken, S)) {
+  for (auto &ENT : ExitNotTaken)
+    if (ENT.ExactNotTaken != SE->getCouldNotCompute() &&
+        SE->hasOperand(ENT.ExactNotTaken, S))
        return true;
-    }
-  }
+
    return false;
  }
  
  /// Allocate memory for BackedgeTakenInfo and copy the not-taken count of each
  /// computable exit into a persistent ExitNotTakenInfo array.
  ScalarEvolution::BackedgeTakenInfo::BackedgeTakenInfo(
-  SmallVectorImpl< std::pair<BasicBlock *, const SCEV *> > &ExitCounts,
-  bool Complete, const SCEV *MaxCount) : Max(MaxCount) {
+    SmallVectorImpl<EdgeInfo> &ExitCounts, bool Complete, const SCEV *MaxCount)
+    : Max(MaxCount) {
  
    if (!Complete)
      ExitNotTaken.setIncomplete();
@@ -5484,18 +5516,43 @@ ScalarEvolution::BackedgeTakenInfo::BackedgeTakenInfo(
    unsigned NumExits = ExitCounts.size();
    if (NumExits == 0) return;
  
-  ExitNotTaken.ExitingBlock = ExitCounts[0].first;
-  ExitNotTaken.ExactNotTaken = ExitCounts[0].second;
-  if (NumExits == 1) return;
+  ExitNotTaken.ExitingBlock = ExitCounts[0].ExitBlock;
+  ExitNotTaken.ExactNotTaken = ExitCounts[0].Taken;
+
+  // Determine the number of ExitNotTakenExtras structures that we need.
+  unsigned ExtraInfoSize = 0;
+  if (NumExits > 1)
+    ExtraInfoSize = 1 + std::count_if(std::next(ExitCounts.begin()),
+                                      ExitCounts.end(), [](EdgeInfo &Entry) {
+                                        return !Entry.Pred.isAlwaysTrue();
+                                      });
+  else if (!ExitCounts[0].Pred.isAlwaysTrue())
+    ExtraInfoSize = 1;
+
+  ExitNotTakenExtras *ENT = nullptr;
+
+  // Allocate the ExitNotTakenExtras structures and initialize the first
+  // element (ExitNotTaken).
+  if (ExtraInfoSize > 0) {
+    ENT = new ExitNotTakenExtras[ExtraInfoSize];
+    ExitNotTaken.ExtraInfo.setPointer(&ENT[0]);
+    *ExitNotTaken.getPred() = std::move(ExitCounts[0].Pred);
+  }
+
+  if (NumExits == 1)
+    return;
+
+  auto &Exits = ExitNotTaken.ExtraInfo.getPointer()->Exits;
  
    // Handle the rare case of multiple computable exits.
-  ExitNotTakenInfo *ENT = new ExitNotTakenInfo[NumExits-1];
+  for (unsigned i = 1, PredPos = 1; i < NumExits; ++i) {
+    ExitNotTakenExtras *Ptr = nullptr;
+    if (!ExitCounts[i].Pred.isAlwaysTrue()) {
+      Ptr = &ENT[PredPos++];
+      Ptr->Pred = std::move(ExitCounts[i].Pred);
+    }
  
-  ExitNotTakenInfo *PrevENT = &ExitNotTaken;
-  for (unsigned i = 1; i < NumExits; ++i, PrevENT = ENT, ++ENT) {
-    PrevENT->setNextExit(ENT);
-    ENT->ExitingBlock = ExitCounts[i].first;
-    ENT->ExactNotTaken = ExitCounts[i].second;
+    Exits.emplace_back(ExitCounts[i].ExitBlock, ExitCounts[i].Taken, Ptr);
    }
  }
  
@@ -5503,17 +5560,18 @@ ScalarEvolution::BackedgeTakenInfo::BackedgeTakenInfo(
  void ScalarEvolution::BackedgeTakenInfo::clear() {
    ExitNotTaken.ExitingBlock = nullptr;
    ExitNotTaken.ExactNotTaken = nullptr;
-  delete[] ExitNotTaken.getNextExit();
+  delete[] ExitNotTaken.ExtraInfo.getPointer();
  }
  
  /// computeBackedgeTakenCount - Compute the number of times the backedge
  /// of the specified loop will execute.
  ScalarEvolution::BackedgeTakenInfo
-ScalarEvolution::computeBackedgeTakenCount(const Loop *L) {
+ScalarEvolution::computeBackedgeTakenCount(const Loop *L,
+                                           bool AllowPredicates) {
    SmallVector<BasicBlock *, 8> ExitingBlocks;
    L->getExitingBlocks(ExitingBlocks);
  
-  SmallVector<std::pair<BasicBlock *, const SCEV *>, 4> ExitCounts;
+  SmallVector<EdgeInfo, 4> ExitCounts;
    bool CouldComputeBECount = true;
    BasicBlock *Latch = L->getLoopLatch(); // may be NULL.
    const SCEV *MustExitMaxBECount = nullptr;
@@ -5521,9 +5579,13 @@ ScalarEvolution::computeBackedgeTakenCount(const Loop *L) {
  
    // Compute the ExitLimit for each loop exit. Use this to populate ExitCounts
    // and compute maxBECount.
+  // Do a union of all the predicates here.
    for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) {
      BasicBlock *ExitBB = ExitingBlocks[i];
-    ExitLimit EL = computeExitLimit(L, ExitBB);
+    ExitLimit EL = computeExitLimit(L, ExitBB, AllowPredicates);
+
+    assert((AllowPredicates || EL.Pred.isAlwaysTrue()) &&
+           "Predicated exit limit when predicates are not allowed!");
  
      // 1. For each exit that can be computed, add an entry to ExitCounts.
      // CouldComputeBECount is true only if all exits can be computed.
@@ -5532,7 +5594,7 @@ ScalarEvolution::computeBackedgeTakenCount(const Loop *L) {
        // we won't be able to compute an exact value for the loop.
        CouldComputeBECount = false;
      else
-      ExitCounts.push_back({ExitBB, EL.Exact});
+      ExitCounts.emplace_back(EdgeInfo(ExitBB, EL.Exact, EL.Pred));
  
      // 2. Derive the loop's MaxBECount from each exit's max number of
      // non-exiting iterations. Partition the loop exits into two kinds:
@@ -5566,7 +5628,8 @@ ScalarEvolution::computeBackedgeTakenCount(const Loop *L) {
  }
  
  ScalarEvolution::ExitLimit
-ScalarEvolution::computeExitLimit(const Loop *L, BasicBlock *ExitingBlock) {
+ScalarEvolution::computeExitLimit(const Loop *L, BasicBlock *ExitingBlock,
+                                  bool AllowPredicates) {
  
    // Okay, we've chosen an exiting block.  See what condition causes us to exit
    // at this block and remember the exit block and whether all other targets
@@ -5631,9 +5694,9 @@ ScalarEvolution::computeExitLimit(const Loop *L, BasicBlock *ExitingBlock) {
    if (BranchInst *BI = dyn_cast<BranchInst>(Term)) {
      assert(BI->isConditional() && "If unconditional, it can't be in loop!");
      // Proceed to the next level to examine the exit condition expression.
-    return computeExitLimitFromCond(L, BI->getCondition(), BI->getSuccessor(0),
-                                    BI->getSuccessor(1),
-                                    /*ControlsExit=*/IsOnlyExit);
+    return computeExitLimitFromCond(
+        L, BI->getCondition(), BI->getSuccessor(0), BI->getSuccessor(1),
+        /*ControlsExit=*/IsOnlyExit, AllowPredicates);
    }
  
    if (SwitchInst *SI = dyn_cast<SwitchInst>(Term))
@@ -5656,16 +5719,19 @@ ScalarEvolution::computeExitLimitFromCond(const Loop *L,
                                            Value *ExitCond,
                                            BasicBlock *TBB,
                                            BasicBlock *FBB,
-                                          bool ControlsExit) {
+                                          bool ControlsExit,
+                                          bool AllowPredicates) {
    // Check if the controlling expression for this loop is an And or Or.
    if (BinaryOperator *BO = dyn_cast<BinaryOperator>(ExitCond)) {
      if (BO->getOpcode() == Instruction::And) {
        // Recurse on the operands of the and.
        bool EitherMayExit = L->contains(TBB);
        ExitLimit EL0 = computeExitLimitFromCond(L, BO->getOperand(0), TBB, FBB,
-                                               ControlsExit && !EitherMayExit);
+                                               ControlsExit && !EitherMayExit,
+                                               AllowPredicates);
        ExitLimit EL1 = computeExitLimitFromCond(L, BO->getOperand(1), TBB, FBB,
-                                               ControlsExit && !EitherMayExit);
+                                               ControlsExit && !EitherMayExit,
+                                               AllowPredicates);
        const SCEV *BECount = getCouldNotCompute();
        const SCEV *MaxBECount = getCouldNotCompute();
        if (EitherMayExit) {
@@ -5692,6 +5758,9 @@ ScalarEvolution::computeExitLimitFromCond(const Loop *L,
            BECount = EL0.Exact;
        }
  
+      SCEVUnionPredicate NP;
+      NP.add(&EL0.Pred);
+      NP.add(&EL1.Pred);
        // There are cases (e.g. PR26207) where computeExitLimitFromCond is able
        // to be more aggressive when computing BECount than when computing
        // MaxBECount.  In these cases it is possible for EL0.Exact and EL1.Exact
@@ -5700,15 +5769,17 @@ ScalarEvolution::computeExitLimitFromCond(const Loop *L,
            !isa<SCEVCouldNotCompute>(BECount))
          MaxBECount = BECount;
  
-      return ExitLimit(BECount, MaxBECount);
+      return ExitLimit(BECount, MaxBECount, NP);
      }
      if (BO->getOpcode() == Instruction::Or) {
        // Recurse on the operands of the or.
        bool EitherMayExit = L->contains(FBB);
        ExitLimit EL0 = computeExitLimitFromCond(L, BO->getOperand(0), TBB, FBB,
-                                               ControlsExit && !EitherMayExit);
+                                               ControlsExit && !EitherMayExit,
+                                               AllowPredicates);
        ExitLimit EL1 = computeExitLimitFromCond(L, BO->getOperand(1), TBB, FBB,
-                                               ControlsExit && !EitherMayExit);
+                                               ControlsExit && !EitherMayExit,
+                                               AllowPredicates);
        const SCEV *BECount = getCouldNotCompute();
        const SCEV *MaxBECount = getCouldNotCompute();
        if (EitherMayExit) {
@@ -5735,14 +5806,25 @@ ScalarEvolution::computeExitLimitFromCond(const Loop *L,
            BECount = EL0.Exact;
        }
  
-      return ExitLimit(BECount, MaxBECount);
+      SCEVUnionPredicate NP;
+      NP.add(&EL0.Pred);
+      NP.add(&EL1.Pred);
+      return ExitLimit(BECount, MaxBECount, NP);
      }
    }
  
    // With an icmp, it may be feasible to compute an exact backedge-taken count.
    // Proceed to the next level to examine the icmp.
-  if (ICmpInst *ExitCondICmp = dyn_cast<ICmpInst>(ExitCond))
-    return computeExitLimitFromICmp(L, ExitCondICmp, TBB, FBB, ControlsExit);
+  if (ICmpInst *ExitCondICmp = dyn_cast<ICmpInst>(ExitCond)) {
+    ExitLimit EL =
+        computeExitLimitFromICmp(L, ExitCondICmp, TBB, FBB, ControlsExit);
+    if (EL.hasFullInfo() || !AllowPredicates)
+      return EL;
+
+    // Try again, but use SCEV predicates this time.
+    return computeExitLimitFromICmp(L, ExitCondICmp, TBB, FBB, ControlsExit,
+                                    /*AllowPredicates=*/true);
+  }
  
    // Check for a constant condition. These are normally stripped out by
    // SimplifyCFG, but ScalarEvolution may be used by a pass which wishes to
@@ -5766,7 +5848,8 @@ ScalarEvolution::computeExitLimitFromICmp(const Loop *L,
                                            ICmpInst *ExitCond,
                                            BasicBlock *TBB,
                                            BasicBlock *FBB,
-                                          bool ControlsExit) {
+                                          bool ControlsExit,
+                                          bool AllowPredicates) {
  
    // If the condition was exit on true, convert the condition to exit on false
    ICmpInst::Predicate Cond;
@@ -5823,7 +5906,8 @@ ScalarEvolution::computeExitLimitFromICmp(const Loop *L,
    switch (Cond) {
    case ICmpInst::ICMP_NE: {                     // while (X != Y)
      // Convert to: while (X-Y != 0)
-    ExitLimit EL = HowFarToZero(getMinusSCEV(LHS, RHS), L, ControlsExit);
+    ExitLimit EL = HowFarToZero(getMinusSCEV(LHS, RHS), L, ControlsExit,
+                                AllowPredicates);
      if (EL.hasAnyInfo()) return EL;
      break;
    }
@@ -5836,14 +5920,17 @@ ScalarEvolution::computeExitLimitFromICmp(const Loop *L,
    case ICmpInst::ICMP_SLT:
    case ICmpInst::ICMP_ULT: {                    // while (X < Y)
      bool IsSigned = Cond == ICmpInst::ICMP_SLT;
-    ExitLimit EL = HowManyLessThans(LHS, RHS, L, IsSigned, ControlsExit);
+    ExitLimit EL = HowManyLessThans(LHS, RHS, L, IsSigned, ControlsExit,
+                                    AllowPredicates);
      if (EL.hasAnyInfo()) return EL;
      break;
    }
    case ICmpInst::ICMP_SGT:
    case ICmpInst::ICMP_UGT: {                    // while (X > Y)
      bool IsSigned = Cond == ICmpInst::ICMP_SGT;
-    ExitLimit EL = HowManyGreaterThans(LHS, RHS, L, IsSigned, ControlsExit);
+    ExitLimit EL =
+        HowManyGreaterThans(LHS, RHS, L, IsSigned, ControlsExit,
+                            AllowPredicates);
      if (EL.hasAnyInfo()) return EL;
      break;
    }
@@ -6105,7 +6192,8 @@ ScalarEvolution::ExitLimit ScalarEvolution::computeShiftCompareExitLimit(
      unsigned BitWidth = getTypeSizeInBits(RHS->getType());
      const SCEV *UpperBound =
          getConstant(getEffectiveSCEVType(RHS->getType()), BitWidth);
-    return ExitLimit(getCouldNotCompute(), UpperBound);
+    SCEVUnionPredicate P;
+    return ExitLimit(getCouldNotCompute(), UpperBound, P);
    }
  
    return getCouldNotCompute();
@@ -6882,7 +6970,9 @@ SolveQuadraticEquation(const SCEVAddRecExpr *AddRec, ScalarEvolution &SE) {
  /// effectively V != 0.  We know and take advantage of the fact that this
  /// expression only being used in a comparison by zero context.
  ScalarEvolution::ExitLimit
-ScalarEvolution::HowFarToZero(const SCEV *V, const Loop *L, bool ControlsExit) {
+ScalarEvolution::HowFarToZero(const SCEV *V, const Loop *L, bool ControlsExit,
+                              bool AllowPredicates) {
+  SCEVUnionPredicate P;
    // If the value is a constant
    if (const SCEVConstant *C = dyn_cast<SCEVConstant>(V)) {
      // If the value is already zero, the branch will execute zero times.
@@ -6891,6 +6981,12 @@ ScalarEvolution::HowFarToZero(const SCEV *V, const Loop *L, bool ControlsExit) {
    }
  
    const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(V);
+  if (!AddRec && AllowPredicates)
+    // Try to make this an AddRec using runtime tests, in the first X
+    // iterations of this loop, where X is the SCEV expression found by the
+    // algorithm below.
+    AddRec = convertSCEVToAddRecWithPredicates(V, L, P);
+
    if (!AddRec || AddRec->getLoop() != L)
      return getCouldNotCompute();
  
@@ -6915,7 +7011,7 @@ ScalarEvolution::HowFarToZero(const SCEV *V, const Loop *L, bool ControlsExit) {
          // should not accept a root of 2.
          const SCEV *Val = AddRec->evaluateAtIteration(R1, *this);
          if (Val->isZero())
-          return R1;  // We found a quadratic root!
+          return ExitLimit(R1, R1, P); // We found a quadratic root!
        }
      }
      return getCouldNotCompute();
@@ -6972,7 +7068,7 @@ ScalarEvolution::HowFarToZero(const SCEV *V, const Loop *L, bool ControlsExit) {
      else
        MaxBECount = getConstant(CountDown ? CR.getUnsignedMax()
                                           : -CR.getUnsignedMin());
-    return ExitLimit(Distance, MaxBECount);
+    return ExitLimit(Distance, MaxBECount, P);
    }
  
    // As a special case, handle the instance where Step is a positive power of
@@ -7025,7 +7121,9 @@ ScalarEvolution::HowFarToZero(const SCEV *V, const Loop *L, bool ControlsExit) {
        auto *NarrowTy = IntegerType::get(getContext(), NarrowWidth);
        auto *WideTy = Distance->getType();
  
-      return getZeroExtendExpr(getTruncateExpr(ModuloResult, NarrowTy), WideTy);
+      const SCEV *Limit =
+          getZeroExtendExpr(getTruncateExpr(ModuloResult, NarrowTy), WideTy);
+      return ExitLimit(Limit, Limit, P);
      }
    }
  
@@ -7037,13 +7135,15 @@ ScalarEvolution::HowFarToZero(const SCEV *V, const Loop *L, bool ControlsExit) {
    if (ControlsExit && AddRec->hasNoSelfWrap()) {
      const SCEV *Exact =
          getUDivExpr(Distance, CountDown ? getNegativeSCEV(Step) : Step);
-    return ExitLimit(Exact, Exact);
+    return ExitLimit(Exact, Exact, P);
    }
  
    // Then, try to solve the above equation provided that Start is constant.
-  if (const SCEVConstant *StartC = dyn_cast<SCEVConstant>(Start))
-    return SolveLinEquationWithOverflow(StepC->getAPInt(), -StartC->getAPInt(),
-                                        *this);
+  if (const SCEVConstant *StartC = dyn_cast<SCEVConstant>(Start)) {
+    const SCEV *E = SolveLinEquationWithOverflow(
+        StepC->getValue()->getValue(), -StartC->getValue()->getValue(), *this);
+    return ExitLimit(E, E, P);
+  }
    return getCouldNotCompute();
  }
  
@@ -8486,12 +8586,18 @@ const SCEV *ScalarEvolution::computeBECount(const SCEV *Delta, const SCEV *Step,
  ScalarEvolution::ExitLimit
  ScalarEvolution::HowManyLessThans(const SCEV *LHS, const SCEV *RHS,
                                    const Loop *L, bool IsSigned,
-                                  bool ControlsExit) {
+                                  bool ControlsExit, bool AllowPredicates) {
+  SCEVUnionPredicate P;
    // We handle only IV < Invariant
    if (!isLoopInvariant(RHS, L))
      return getCouldNotCompute();
  
    const SCEVAddRecExpr *IV = dyn_cast<SCEVAddRecExpr>(LHS);
+  if (!IV && AllowPredicates)
+    // Try to make this an AddRec using runtime tests, in the first X
+    // iterations of this loop, where X is the SCEV expression found by the
+    // algorithm below.
+    IV = convertSCEVToAddRecWithPredicates(LHS, L, P);
  
    // Avoid weird loops
    if (!IV || IV->getLoop() != L || !IV->isAffine())
@@ -8560,18 +8666,24 @@ ScalarEvolution::HowManyLessThans(const SCEV *LHS, const SCEV *RHS,
    if (isa<SCEVCouldNotCompute>(MaxBECount))
      MaxBECount = BECount;
  
-  return ExitLimit(BECount, MaxBECount);
+  return ExitLimit(BECount, MaxBECount, P);
  }
  
  ScalarEvolution::ExitLimit
  ScalarEvolution::HowManyGreaterThans(const SCEV *LHS, const SCEV *RHS,
                                       const Loop *L, bool IsSigned,
-                                     bool ControlsExit) {
+                                     bool ControlsExit, bool AllowPredicates) {
+  SCEVUnionPredicate P;
    // We handle only IV > Invariant
    if (!isLoopInvariant(RHS, L))
      return getCouldNotCompute();
  
    const SCEVAddRecExpr *IV = dyn_cast<SCEVAddRecExpr>(LHS);
+  if (!IV && AllowPredicates)
+    // Try to make this an AddRec using runtime tests, in the first X
+    // iterations of this loop, where X is the SCEV expression found by the
+    // algorithm below.
+    IV = convertSCEVToAddRecWithPredicates(LHS, L, P);
  
    // Avoid weird loops
    if (!IV || IV->getLoop() != L || !IV->isAffine())
@@ -8642,7 +8754,7 @@ ScalarEvolution::HowManyGreaterThans(const SCEV *LHS, const SCEV *RHS,
    if (isa<SCEVCouldNotCompute>(MaxBECount))
      MaxBECount = BECount;
  
-  return ExitLimit(BECount, MaxBECount);
+  return ExitLimit(BECount, MaxBECount, P);
  }
  
  /// getNumIterationsInRange - Return the number of iterations of this loop that
@@ -9346,6 +9458,8 @@ ScalarEvolution::ScalarEvolution(ScalarEvolution &&Arg)
        ValueExprMap(std::move(Arg.ValueExprMap)),
        WalkingBEDominatingConds(false), ProvingSplitPredicate(false),
        BackedgeTakenCounts(std::move(Arg.BackedgeTakenCounts)),
+      PredicatedBackedgeTakenCounts(
+          std::move(Arg.PredicatedBackedgeTakenCounts)),
        ConstantEvolutionLoopExitValue(
            std::move(Arg.ConstantEvolutionLoopExitValue)),
        ValuesAtScopes(std::move(Arg.ValuesAtScopes)),
@@ -9378,6 +9492,8 @@ ScalarEvolution::~ScalarEvolution() {
    // that a loop had multiple computable exits.
    for (auto &BTCI : BackedgeTakenCounts)
      BTCI.second.clear();
+  for (auto &BTCI : PredicatedBackedgeTakenCounts)
+    BTCI.second.clear();
  
    assert(PendingLoopPredicates.empty() && "isImpliedCond garbage");
    assert(!WalkingBEDominatingConds && "isLoopBackedgeGuardedByCond garbage!");
@@ -9420,6 +9536,20 @@ static void PrintLoopInfo(raw_ostream &OS, ScalarEvolution *SE,
      OS << "Unpredictable max backedge-taken count. ";
    }
  
+  OS << "\n"
+        "Loop ";
+  L->getHeader()->printAsOperand(OS, /*PrintType=*/false);
+  OS << ": ";
+
+  SCEVUnionPredicate Pred;
+  auto PBT = SE->getPredicatedBackedgeTakenCount(L, Pred);
+  if (!isa<SCEVCouldNotCompute>(PBT)) {
+    OS << "Predicated backedge-taken count is " << *PBT << "\n";
+    OS << " Predicates:\n";
+    Pred.print(OS, 4);
+  } else {
+    OS << "Unpredictable predicated backedge-taken count. ";
+  }
    OS << "\n";
  }
  
@@ -9704,16 +9834,20 @@ void ScalarEvolution::forgetMemoizedResults(const SCEV *S) {
    ExprValueMap.erase(S);
    HasRecMap.erase(S);
  
-  for (DenseMap<const Loop*, BackedgeTakenInfo>::iterator I =
-         BackedgeTakenCounts.begin(), E = BackedgeTakenCounts.end(); I != E; ) {
-    BackedgeTakenInfo &BEInfo = I->second;
-    if (BEInfo.hasOperand(S, this)) {
-      BEInfo.clear();
-      BackedgeTakenCounts.erase(I++);
-    }
-    else
-      ++I;
-  }
+  auto RemoveSCEVFromBackedgeMap =
+      [S, this](DenseMap<const Loop *, BackedgeTakenInfo> &Map) {
+        for (auto I = Map.begin(), E = Map.end(); I != E;) {
+          BackedgeTakenInfo &BEInfo = I->second;
+          if (BEInfo.hasOperand(S, this)) {
+            BEInfo.clear();
+            Map.erase(I++);
+          } else
+            ++I;
+        }
+      };
+
+  RemoveSCEVFromBackedgeMap(BackedgeTakenCounts);
+  RemoveSCEVFromBackedgeMap(PredicatedBackedgeTakenCounts);
  }
  
  typedef DenseMap<const Loop *, std::string> VerifyMap;
@@ -10128,7 +10262,7 @@ void SCEVUnionPredicate::add(const SCEVPredicate *N) {
  
  PredicatedScalarEvolution::PredicatedScalarEvolution(ScalarEvolution &SE,
                                                       Loop &L)
-    : SE(SE), L(L), Generation(0) {}
+    : SE(SE), L(L), Generation(0), BackedgeCount(nullptr) {}
  
  const SCEV *PredicatedScalarEvolution::getSCEV(Value *V) {
    const SCEV *Expr = SE.getSCEV(V);
@@ -10149,6 +10283,15 @@ const SCEV *PredicatedScalarEvolution::getSCEV(Value *V) {
    return NewSCEV;
  }
  
+const SCEV *PredicatedScalarEvolution::getBackedgeTakenCount() {
+  if (!BackedgeCount) {
+    SCEVUnionPredicate BackedgePred;
+    BackedgeCount = SE.getPredicatedBackedgeTakenCount(&L, BackedgePred);
+    addPredicate(BackedgePred);
+  }
+  return BackedgeCount;
+}
+
  void PredicatedScalarEvolution::addPredicate(const SCEVPredicate &Pred) {
    if (Preds.implies(&Pred))
      return;
@@ -10214,10 +10357,10 @@ const SCEVAddRecExpr *PredicatedScalarEvolution::getAsAddRec(Value *V) {
    return New;
  }
  
-PredicatedScalarEvolution::
-PredicatedScalarEvolution(const PredicatedScalarEvolution &Init) :
-  RewriteMap(Init.RewriteMap), SE(Init.SE), L(Init.L), Preds(Init.Preds),
-  Generation(Init.Generation) {
+PredicatedScalarEvolution::PredicatedScalarEvolution(
+    const PredicatedScalarEvolution &Init)
+    : RewriteMap(Init.RewriteMap), SE(Init.SE), L(Init.L), Preds(Init.Preds),
+      Generation(Init.Generation), BackedgeCount(Init.BackedgeCount) {
    for (auto I = Init.FlagsMap.begin(), E = Init.FlagsMap.end(); I != E; ++I)
      FlagsMap.insert(*I);
  }
diff --git a/llvm/lib/Analysis/ScalarEvolutionExpander.cpp b/llvm/lib/Analysis/ScalarEvolutionExpander.cpp

index 4db3c7f..d9d2a8a 100644 (file)
--- a/llvm/lib/Analysis/ScalarEvolutionExpander.cpp
+++ b/llvm/lib/Analysis/ScalarEvolutionExpander.cpp
@@ -2004,7 +2004,9 @@ Value *SCEVExpander::generateOverflowCheck(const SCEVAddRecExpr *AR,
    assert(AR->isAffine() && "Cannot generate RT check for "
                             "non-affine expression");
  
-  const SCEV *ExitCount = SE.getBackedgeTakenCount(AR->getLoop());
+  SCEVUnionPredicate Pred;
+  const SCEV *ExitCount =
+      SE.getPredicatedBackedgeTakenCount(AR->getLoop(), Pred);
    const SCEV *Step = AR->getStepRecurrence(SE);
    const SCEV *Start = AR->getStart();
  
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

index 201e9e9..61d9ace 100644 (file)
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -2778,7 +2778,7 @@ Value *InnerLoopVectorizer::getOrCreateTripCount(Loop *L) {
    IRBuilder<> Builder(L->getLoopPreheader()->getTerminator());
    // Find the loop boundaries.
    ScalarEvolution *SE = PSE.getSE();
-  const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(OrigLoop);
+  const SCEV *BackedgeTakenCount = PSE.getBackedgeTakenCount();
    assert(BackedgeTakenCount != SE->getCouldNotCompute() &&
           "Invalid loop count");
  
@@ -4425,7 +4425,7 @@ bool LoopVectorizationLegality::canVectorize() {
    }
  
    // ScalarEvolution needs to be able to find the exit count.
-  const SCEV *ExitCount = PSE.getSE()->getBackedgeTakenCount(TheLoop);
+  const SCEV *ExitCount = PSE.getBackedgeTakenCount();
    if (ExitCount == PSE.getSE()->getCouldNotCompute()) {
      emitAnalysis(VectorizationReport()
                   << "could not determine number of loop iterations");
diff --git a/llvm/test/Analysis/ScalarEvolution/predicated-trip-count.ll b/llvm/test/Analysis/ScalarEvolution/predicated-trip-count.ll

new file mode 100644 (file)

index 0000000..2db0a8b
--- /dev/null
+++ b/llvm/test/Analysis/ScalarEvolution/predicated-trip-count.ll
@@ -0,0 +1,109 @@
+; RUN: opt < %s -analyze -scalar-evolution | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+@A = weak global [1000 x i32] zeroinitializer, align 32
+
+; The resulting predicate is i16 {0,+,1} <nssw>, meanining
+; that the resulting backedge expression will be valid for:
+;   (1 + (-1 smax %M)) <= MAX_INT16
+;
+; At the limit condition for M (MAX_INT16 - 1) we have in the
+; last iteration:
+;    i0 <- MAX_INT16
+;    i0.ext <- MAX_INT16
+;
+; and therefore no wrapping happend for i0 or i0.ext
+; throughout the execution of the loop. The resulting predicated
+; backedge taken count is correct.
+
+; CHECK: Classifying expressions for: @test1
+; CHECK: %i.0.ext = sext i16 %i.0 to i32
+; CHECK-NEXT:  -->  (sext i16 {0,+,1}<%bb3> to i32)
+; CHECK:      Loop %bb3: Unpredictable backedge-taken count.
+; CHECK-NEXT: Loop %bb3: Unpredictable max backedge-taken count.
+; CHECK-NEXT: Loop %bb3: Predicated backedge-taken count is (1 + (-1 smax %M))
+; CHECK-NEXT: Predicates:
+; CHECK-NEXT:    {0,+,1}<%bb3> Added Flags: <nssw>
+define void @test1(i32 %N, i32 %M) {
+entry:
+        br label %bb3
+
+bb:             ; preds = %bb3
+        %tmp = getelementptr [1000 x i32], [1000 x i32]* @A, i32 0, i16 %i.0          ; <i32*> [#uses=1]
+        store i32 123, i32* %tmp
+        %tmp2 = add i16 %i.0, 1         ; <i32> [#uses=1]
+        br label %bb3
+
+bb3:            ; preds = %bb, %entry
+        %i.0 = phi i16 [ 0, %entry ], [ %tmp2, %bb ]            ; <i32> [#uses=3]
+        %i.0.ext = sext i16 %i.0 to i32
+        %tmp3 = icmp sle i32 %i.0.ext, %M          ; <i1> [#uses=1]
+        br i1 %tmp3, label %bb, label %bb5
+
+bb5:            ; preds = %bb3
+        br label %return
+
+return:         ; preds = %bb5
+        ret void
+}
+
+; The predicated backedge taken count is:
+;    (2 + (zext i16 %Start to i32) + ((-2 + (-1 * (sext i16 %Start to i32)))
+;                                     smax (-1 + (-1 * %M)))
+;    )
+
+; -1 + (-1 * %M) <= (-2 + (-1 * (sext i16 %Start to i32))
+; The predicated backedge taken count is 0.
+; From the IR, this is correct since we will bail out at the
+; first iteration.
+
+
+; * -1 + (-1 * %M) > (-2 + (-1 * (sext i16 %Start to i32))
+; or: %M < 1 + (sext i16 %Start to i32)
+;
+; The predicated backedge taken count is 1 + (zext i16 %Start to i32) - %M
+;
+; If %M >= MIN_INT + 1, this predicated backedge taken count would be correct (even
+; without predicates). However, for %M < MIN_INT this would be an infinite loop.
+; In these cases, the {%Start,+,-1} <nusw> predicate would be false, as the
+; final value of the expression {%Start,+,-1} expression (%M - 1) would not be
+; representable as an i16.
+
+; There is also a limit case here where the value of %M is MIN_INT. In this case
+; we still have an infinite loop, since icmp sge %x, MIN_INT will always return
+; true.
+
+; CHECK: Classifying expressions for: @test2
+
+; CHECK:      %i.0.ext = sext i16 %i.0 to i32
+; CHECK-NEXT:    -->  (sext i16 {%Start,+,-1}<%bb3> to i32)
+; CHECK:       Loop %bb3: Unpredictable backedge-taken count.
+; CHECK-NEXT:  Loop %bb3: Unpredictable max backedge-taken count.
+; CHECK-NEXT:  Loop %bb3: Predicated backedge-taken count is (2 + (sext i16 %Start to i32) + ((-2 + (-1 * (sext i16 %Start to i32))) smax (-1 + (-1 * %M))))
+; CHECK-NEXT:  Predicates:
+; CHECK-NEXT:    {%Start,+,-1}<%bb3> Added Flags: <nssw>
+
+define void @test2(i32 %N, i32 %M, i16 %Start) {
+entry:
+        br label %bb3
+
+bb:             ; preds = %bb3
+        %tmp = getelementptr [1000 x i32], [1000 x i32]* @A, i32 0, i16 %i.0          ; <i32*> [#uses=1]
+        store i32 123, i32* %tmp
+        %tmp2 = sub i16 %i.0, 1         ; <i32> [#uses=1]
+        br label %bb3
+
+bb3:            ; preds = %bb, %entry
+        %i.0 = phi i16 [ %Start, %entry ], [ %tmp2, %bb ]            ; <i32> [#uses=3]
+        %i.0.ext = sext i16 %i.0 to i32
+        %tmp3 = icmp sge i32 %i.0.ext, %M          ; <i1> [#uses=1]
+        br i1 %tmp3, label %bb, label %bb5
+
+bb5:            ; preds = %bb3
+        br label %return
+
+return:         ; preds = %bb5
+        ret void
+}
+
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/backedge-overflow.ll b/llvm/test/Transforms/LoopVectorize/AArch64/backedge-overflow.ll

new file mode 100644 (file)

index 0000000..aba47f6
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/backedge-overflow.ll
@@ -0,0 +1,166 @@
+; RUN: opt -mtriple=aarch64--linux-gnueabi -loop-vectorize -force-vector-width=4 -force-vector-interleave=1 < %s -S | FileCheck %s
+
+; The following tests contain loops for which SCEV cannot determine the backedge
+; taken count. This is because the backedge taken condition is produced by an
+; icmp with one of the sides being a loop varying non-AddRec expression.
+; However, there is a possibility to normalize this to an AddRec expression
+; using SCEV predicates. This allows us to compute a 'guarded' backedge count.
+; The Loop Vectorizer is able to version to loop in order to use this guarded
+; backedge count and vectorize more loops.
+
+
+; CHECK-LABEL: test_sge
+; CHECK-LABEL: vector.scevcheck
+; CHECK-LABEL: vector.body
+define void @test_sge(i32* noalias %A,
+                      i32* noalias %B,
+                      i32* noalias %C, i32 %N) {
+entry:
+  %cmp13 = icmp eq i32 %N, 0
+  br i1 %cmp13, label %for.end, label %for.body.preheader
+
+for.body.preheader:
+  br label %for.body
+
+for.body:
+  %indvars.iv = phi i16 [ %indvars.next, %for.body ], [ 0, %for.body.preheader ]
+  %indvars.next = add i16 %indvars.iv, 1
+  %indvars.ext = zext i16 %indvars.iv to i32
+
+  %arrayidx = getelementptr inbounds i32, i32* %B, i32 %indvars.ext
+  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx3 = getelementptr inbounds i32, i32* %C, i32 %indvars.ext
+  %1 = load i32, i32* %arrayidx3, align 4
+
+  %mul4 = mul i32 %1, %0
+
+  %arrayidx7 = getelementptr inbounds i32, i32* %A, i32 %indvars.ext
+  store i32 %mul4, i32* %arrayidx7, align 4
+
+  %exitcond = icmp sge i32 %indvars.ext, %N
+  br i1 %exitcond, label %for.end.loopexit, label %for.body
+
+for.end.loopexit:
+  br label %for.end
+
+for.end:
+  ret void
+}
+
+; CHECK-LABEL: test_uge
+; CHECK-LABEL: vector.scevcheck
+; CHECK-LABEL: vector.body
+define void @test_uge(i32* noalias %A,
+                      i32* noalias %B,
+                      i32* noalias %C, i32 %N, i32 %Offset) {
+entry:
+  %cmp13 = icmp eq i32 %N, 0
+  br i1 %cmp13, label %for.end, label %for.body.preheader
+
+for.body.preheader:
+  br label %for.body
+
+for.body:
+  %indvars.iv = phi i16 [ %indvars.next, %for.body ], [ 0, %for.body.preheader ]
+  %indvars.next = add i16 %indvars.iv, 1
+
+  %indvars.ext = sext i16 %indvars.iv to i32
+  %indvars.access = add i32 %Offset, %indvars.ext
+
+  %arrayidx = getelementptr inbounds i32, i32* %B, i32 %indvars.access
+  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx3 = getelementptr inbounds i32, i32* %C, i32 %indvars.access
+  %1 = load i32, i32* %arrayidx3, align 4
+
+  %mul4 = add i32 %1, %0
+
+  %arrayidx7 = getelementptr inbounds i32, i32* %A, i32 %indvars.access
+  store i32 %mul4, i32* %arrayidx7, align 4
+
+  %exitcond = icmp uge i32 %indvars.ext, %N
+  br i1 %exitcond, label %for.end.loopexit, label %for.body
+
+for.end.loopexit:
+  br label %for.end
+
+for.end:
+  ret void
+}
+
+; CHECK-LABEL: test_ule
+; CHECK-LABEL: vector.scevcheck
+; CHECK-LABEL: vector.body
+define void @test_ule(i32* noalias %A,
+                      i32* noalias %B,
+                      i32* noalias %C, i32 %N,
+                      i16 %M) {
+entry:
+  %cmp13 = icmp eq i32 %N, 0
+  br i1 %cmp13, label %for.end, label %for.body.preheader
+
+for.body.preheader:
+  br label %for.body
+
+for.body:
+  %indvars.iv = phi i16 [ %indvars.next, %for.body ], [ %M, %for.body.preheader ]
+  %indvars.next = sub i16 %indvars.iv, 1
+  %indvars.ext = zext i16 %indvars.iv to i32
+
+  %arrayidx = getelementptr inbounds i32, i32* %B, i32 %indvars.ext
+  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx3 = getelementptr inbounds i32, i32* %C, i32 %indvars.ext
+  %1 = load i32, i32* %arrayidx3, align 4
+
+  %mul4 = mul i32 %1, %0
+
+  %arrayidx7 = getelementptr inbounds i32, i32* %A, i32 %indvars.ext
+  store i32 %mul4, i32* %arrayidx7, align 4
+
+  %exitcond = icmp ule i32 %indvars.ext, %N
+  br i1 %exitcond, label %for.end.loopexit, label %for.body
+
+for.end.loopexit:
+  br label %for.end
+
+for.end:
+  ret void
+}
+
+; CHECK-LABEL: test_sle
+; CHECK-LABEL: vector.scevcheck
+; CHECK-LABEL: vector.body
+define void @test_sle(i32* noalias %A,
+                   i32* noalias %B,
+                   i32* noalias %C, i32 %N,
+                   i16 %M) {
+entry:
+  %cmp13 = icmp eq i32 %N, 0
+  br i1 %cmp13, label %for.end, label %for.body.preheader
+
+for.body.preheader:
+  br label %for.body
+
+for.body:
+  %indvars.iv = phi i16 [ %indvars.next, %for.body ], [ %M, %for.body.preheader ]
+  %indvars.next = sub i16 %indvars.iv, 1
+  %indvars.ext = sext i16 %indvars.iv to i32
+
+  %arrayidx = getelementptr inbounds i32, i32* %B, i32 %indvars.ext
+  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx3 = getelementptr inbounds i32, i32* %C, i32 %indvars.ext
+  %1 = load i32, i32* %arrayidx3, align 4
+
+  %mul4 = mul i32 %1, %0
+
+  %arrayidx7 = getelementptr inbounds i32, i32* %A, i32 %indvars.ext
+  store i32 %mul4, i32* %arrayidx7, align 4
+
+  %exitcond = icmp sle i32 %indvars.ext, %N
+  br i1 %exitcond, label %for.end.loopexit, label %for.body
+
+for.end.loopexit:
+  br label %for.end
+
+for.end:
+  ret void
+}
diff --git a/llvm/test/Transforms/LoopVectorize/X86/vectorization-remarks-missed.ll b/llvm/test/Transforms/LoopVectorize/X86/vectorization-remarks-missed.ll

index 42ec3b3..740ff36 100644 (file)
--- a/llvm/test/Transforms/LoopVectorize/X86/vectorization-remarks-missed.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/vectorization-remarks-missed.ll
@@ -54,8 +54,9 @@ for.body:                                         ; preds = %entry, %for.body
    %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
    %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv, !dbg !16
    %0 = trunc i64 %indvars.iv to i32, !dbg !16
+  %ld = load i32, i32* %arrayidx, align 4
    store i32 %0, i32* %arrayidx, align 4, !dbg !16, !tbaa !18
-  %cmp3 = icmp sle i32 %0, %Length, !dbg !22
+  %cmp3 = icmp sle i32 %ld, %Length, !dbg !22
    %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1, !dbg !12
    %1 = trunc i64 %indvars.iv.next to i32
    %cmp = icmp slt i32 %1, %Length, !dbg !12
author	Silviu Baranga <silviu.baranga@arm.com>
	Wed, 6 Apr 2016 13:18:26 +0000 (13:18 +0000)
committer	Silviu Baranga <silviu.baranga@arm.com>
	Wed, 6 Apr 2016 13:18:26 +0000 (13:18 +0000)
llvm/include/llvm/Analysis/ScalarEvolution.h		patch \| blob \| history
llvm/lib/Analysis/LoopAccessAnalysis.cpp		patch \| blob \| history
llvm/lib/Analysis/ScalarEvolution.cpp		patch \| blob \| history
llvm/lib/Analysis/ScalarEvolutionExpander.cpp		patch \| blob \| history
llvm/lib/Transforms/Vectorize/LoopVectorize.cpp		patch \| blob \| history
llvm/test/Analysis/ScalarEvolution/predicated-trip-count.ll	[new file with mode: 0644]	patch \| blob
llvm/test/Transforms/LoopVectorize/AArch64/backedge-overflow.ll	[new file with mode: 0644]	patch \| blob
llvm/test/Transforms/LoopVectorize/X86/vectorization-remarks-missed.ll		patch \| blob \| history