Revert "[NFC] factor out unrolling decision logic"

author Geoffrey Martin-Noble <gcmn@google.com>

Wed, 18 Aug 2021 18:36:25 +0000 (11:36 -0700)

committer Geoffrey Martin-Noble <gcmn@google.com>

Wed, 18 Aug 2021 18:38:48 +0000 (11:38 -0700)
author Geoffrey Martin-Noble <gcmn@google.com>
Wed, 18 Aug 2021 18:36:25 +0000 (11:36 -0700)
committer Geoffrey Martin-Noble <gcmn@google.com>
Wed, 18 Aug 2021 18:38:48 +0000 (11:38 -0700)
diff --git a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp

index a5a1302..34dad2a 100644 (file)
--- a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
@@ -65,7 +65,6 @@
  #include <cassert>
  #include <cstdint>
  #include <limits>
-#include <optional>
  #include <string>
  #include <tuple>
  #include <utility>
@@ -320,16 +319,6 @@ struct EstimatedUnrollCost {
    unsigned RolledDynamicCost;
  };
  
-struct PragmaInfo {
-  PragmaInfo(bool UUC, bool PFU, unsigned PC, bool PEU)
-      : UserUnrollCount(UUC), PragmaFullUnroll(PFU), PragmaCount(PC),
-        PragmaEnableUnroll(PEU) {}
-  const bool UserUnrollCount;
-  const bool PragmaFullUnroll;
-  const unsigned PragmaCount;
-  const bool PragmaEnableUnroll;
-};
-
  } // end anonymous namespace
  
  /// Figure out if the loop is worth full unrolling.
@@ -758,132 +747,13 @@ public:
  
    // Returns loop size estimation for unrolled loop, given the unrolling
    // configuration specified by UP.
-  uint64_t
-  getUnrolledLoopSize(const TargetTransformInfo::UnrollingPreferences &UP,
-                      const unsigned CountOverwrite = 0) const {
+  uint64_t getUnrolledLoopSize(TargetTransformInfo::UnrollingPreferences &UP) {
      assert(LoopSize >= UP.BEInsns &&
             "LoopSize should not be less than BEInsns!");
-    if (CountOverwrite)
-      return static_cast<uint64_t>(LoopSize - UP.BEInsns) * CountOverwrite +
-             UP.BEInsns;
-    else
-      return static_cast<uint64_t>(LoopSize - UP.BEInsns) * UP.Count +
-             UP.BEInsns;
+    return (uint64_t)(LoopSize - UP.BEInsns) * UP.Count + UP.BEInsns;
    }
  };
  
-static Optional<unsigned>
-shouldPragmaUnroll(Loop *L, const PragmaInfo &PInfo,
-                   const unsigned TripMultiple, const unsigned TripCount,
-                   const UnrollCostEstimator UCE,
-                   const TargetTransformInfo::UnrollingPreferences &UP) {
-
-  // Using unroll pragma
-  // 1st priority is unroll count set by "unroll-count" option.
-
-  if (PInfo.UserUnrollCount) {
-    if (UP.AllowRemainder &&
-        UCE.getUnrolledLoopSize(UP, (unsigned)UnrollCount) < UP.Threshold)
-      return (unsigned)UnrollCount;
-  }
-
-  // 2nd priority is unroll count set by pragma.
-  if (PInfo.PragmaCount > 0) {
-    if ((UP.AllowRemainder || (TripMultiple % PInfo.PragmaCount == 0)) &&
-        UCE.getUnrolledLoopSize(UP, PInfo.PragmaCount) < PragmaUnrollThreshold)
-      return PInfo.PragmaCount;
-  }
-
-  if (PInfo.PragmaFullUnroll && TripCount != 0) {
-    if (UCE.getUnrolledLoopSize(UP, TripCount) < PragmaUnrollThreshold)
-      return TripCount;
-  }
-  // if didn't return until here, should continue to other priorties
-  return None;
-}
-
-static Optional<unsigned> shouldFullUnroll(
-    Loop *L, const TargetTransformInfo &TTI, DominatorTree &DT,
-    ScalarEvolution &SE, const SmallPtrSetImpl<const Value *> &EphValues,
-    const unsigned FullUnrollTripCount, const UnrollCostEstimator UCE,
-    const TargetTransformInfo::UnrollingPreferences &UP) {
-
-  if (FullUnrollTripCount && FullUnrollTripCount <= UP.FullUnrollMaxCount) {
-    // When computing the unrolled size, note that BEInsns are not replicated
-    // like the rest of the loop body.
-    if (UCE.getUnrolledLoopSize(UP) < UP.Threshold) {
-      return FullUnrollTripCount;
-
-    } else {
-      // The loop isn't that small, but we still can fully unroll it if that
-      // helps to remove a significant number of instructions.
-      // To check that, run additional analysis on the loop.
-      if (Optional<EstimatedUnrollCost> Cost = analyzeLoopUnrollCost(
-              L, FullUnrollTripCount, DT, SE, EphValues, TTI,
-              UP.Threshold * UP.MaxPercentThresholdBoost / 100,
-              UP.MaxIterationsCountToAnalyze)) {
-        unsigned Boost =
-            getFullUnrollBoostingFactor(*Cost, UP.MaxPercentThresholdBoost);
-        if (Cost->UnrolledCost < UP.Threshold * Boost / 100) {
-          return FullUnrollTripCount;
-        }
-      }
-    }
-  }
-  return None;
-}
-
-static Optional<unsigned>
-shouldPartialUnroll(const unsigned LoopSize, const unsigned TripCount,
-                    const UnrollCostEstimator UCE,
-                    const TargetTransformInfo::UnrollingPreferences &UP) {
-
-  unsigned count = UP.Count;
-  if (TripCount) {
-    if (!UP.Partial) {
-      LLVM_DEBUG(dbgs() << "  will not try to unroll partially because "
-                        << "-unroll-allow-partial not given\n");
-      count = 0;
-      return count;
-    }
-    if (count == 0)
-      count = TripCount;
-    if (UP.PartialThreshold != NoThreshold) {
-      // Reduce unroll count to be modulo of TripCount for partial unrolling.
-      if (UCE.getUnrolledLoopSize(UP, count) > UP.PartialThreshold)
-        count = (std::max(UP.PartialThreshold, UP.BEInsns + 1) - UP.BEInsns) /
-                (LoopSize - UP.BEInsns);
-      if (count > UP.MaxCount)
-        count = UP.MaxCount;
-      while (count != 0 && TripCount % count != 0)
-        count--;
-      if (UP.AllowRemainder && count <= 1) {
-        // If there is no Count that is modulo of TripCount, set Count to
-        // largest power-of-two factor that satisfies the threshold limit.
-        // As we'll create fixup loop, do the type of unrolling only if
-        // remainder loop is allowed.
-        count = UP.DefaultUnrollRuntimeCount;
-        while (count != 0 &&
-               UCE.getUnrolledLoopSize(UP, count) > UP.PartialThreshold)
-          count >>= 1;
-      }
-      if (count < 2) {
-        count = 0;
-      }
-    } else {
-      count = TripCount;
-    }
-    if (count > UP.MaxCount)
-      count = UP.MaxCount;
-
-    LLVM_DEBUG(dbgs() << "  partially unrolling with count: " << count << "\n");
-
-    return count;
-  }
-
-  // if didn't return until here, should continue to other priorties
-  return None;
-}
  // Returns true if unroll count was set explicitly.
  // Calculates unroll count and writes it to UP.Count.
  // Unless IgnoreUser is true, will also use metadata and command-line options
@@ -901,18 +771,7 @@ bool llvm::computeUnrollCount(
      TargetTransformInfo::PeelingPreferences &PP, bool &UseUpperBound) {
  
    UnrollCostEstimator UCE(*L, LoopSize);
-  Optional<unsigned> UnrollFactor;
-
-  const bool UserUnrollCount = UnrollCount.getNumOccurrences() > 0;
-  const bool PragmaFullUnroll = hasUnrollFullPragma(L);
-  const unsigned PragmaCount = unrollCountPragmaValue(L);
-  const bool PragmaEnableUnroll = hasUnrollEnablePragma(L);
-
-  const bool ExplicitUnroll = PragmaCount > 0 || PragmaFullUnroll ||
-                              PragmaEnableUnroll || UserUnrollCount;
  
-  PragmaInfo PInfo(UserUnrollCount, PragmaFullUnroll, PragmaCount,
-                   PragmaEnableUnroll);
    // Use an explicit peel count that has been specified for testing. In this
    // case it's not permitted to also specify an explicit unroll count.
    if (PP.PeelCount) {
@@ -924,29 +783,47 @@ bool llvm::computeUnrollCount(
      UP.Runtime = false;
      return true;
    }
+
    // Check for explicit Count.
    // 1st priority is unroll count set by "unroll-count" option.
-  // 2nd priority is unroll count set by pragma.
-  UnrollFactor = shouldPragmaUnroll(L, PInfo, TripMultiple, TripCount, UCE, UP);
+  bool UserUnrollCount = UnrollCount.getNumOccurrences() > 0;
+  if (UserUnrollCount) {
+    UP.Count = UnrollCount;
+    UP.AllowExpensiveTripCount = true;
+    UP.Force = true;
+    if (UP.AllowRemainder && UCE.getUnrolledLoopSize(UP) < UP.Threshold)
+      return true;
+  }
  
-  if (UnrollFactor) {
-    UP.Count = *UnrollFactor;
+  // 2nd priority is unroll count set by pragma.
+  unsigned PragmaCount = unrollCountPragmaValue(L);
+  if (PragmaCount > 0) {
+    UP.Count = PragmaCount;
+    UP.Runtime = true;
+    UP.AllowExpensiveTripCount = true;
+    UP.Force = true;
+    if ((UP.AllowRemainder || (TripMultiple % PragmaCount == 0)) &&
+        UCE.getUnrolledLoopSize(UP) < PragmaUnrollThreshold)
+      return true;
+  }
+  bool PragmaFullUnroll = hasUnrollFullPragma(L);
+  if (PragmaFullUnroll && TripCount != 0) {
+    UP.Count = TripCount;
+    if (UCE.getUnrolledLoopSize(UP) < PragmaUnrollThreshold)
+      return false;
+  }
  
-    if (UserUnrollCount || (PragmaCount > 0)) {
-      UP.AllowExpensiveTripCount = true;
-      UP.Force = true;
-    }
-    UP.Runtime |= (PragmaCount > 0);
-    return ExplicitUnroll;
-  } else {
-    if (ExplicitUnroll && TripCount != 0) {
-      // If the loop has an unrolling pragma, we want to be more aggressive with
-      // unrolling limits. Set thresholds to at least the PragmaUnrollThreshold
-      // value which is larger than the default limits.
-      UP.Threshold = std::max<unsigned>(UP.Threshold, PragmaUnrollThreshold);
-      UP.PartialThreshold =
-          std::max<unsigned>(UP.PartialThreshold, PragmaUnrollThreshold);
-    }
+  bool PragmaEnableUnroll = hasUnrollEnablePragma(L);
+  bool ExplicitUnroll = PragmaCount > 0 || PragmaFullUnroll ||
+                        PragmaEnableUnroll || UserUnrollCount;
+
+  if (ExplicitUnroll && TripCount != 0) {
+    // If the loop has an unrolling pragma, we want to be more aggressive with
+    // unrolling limits. Set thresholds to at least the PragmaUnrollThreshold
+    // value which is larger than the default limits.
+    UP.Threshold = std::max<unsigned>(UP.Threshold, PragmaUnrollThreshold);
+    UP.PartialThreshold =
+        std::max<unsigned>(UP.PartialThreshold, PragmaUnrollThreshold);
    }
  
    // 3rd priority is full unroll count.
@@ -976,20 +853,28 @@ bool llvm::computeUnrollCount(
    unsigned FullUnrollTripCount =
        ExactTripCount ? ExactTripCount : FullUnrollMaxTripCount;
    UP.Count = FullUnrollTripCount;
-
-  UnrollFactor =
-      shouldFullUnroll(L, TTI, DT, SE, EphValues, FullUnrollTripCount, UCE, UP);
-
-  // if shouldFullUnroll can do the unrolling, some side parameteres should be
-  // set
-  if (UnrollFactor) {
-    UP.Count = *UnrollFactor;
-    UseUpperBound = (FullUnrollMaxTripCount == FullUnrollTripCount);
-    TripCount = FullUnrollTripCount;
-    TripMultiple = UP.UpperBound ? 1 : TripMultiple;
-    return ExplicitUnroll;
-  } else {
-    UP.Count = FullUnrollTripCount;
+  if (FullUnrollTripCount && FullUnrollTripCount <= UP.FullUnrollMaxCount) {
+    // When computing the unrolled size, note that BEInsns are not replicated
+    // like the rest of the loop body.
+    if (UCE.getUnrolledLoopSize(UP) < UP.Threshold) {
+      UseUpperBound = (FullUnrollMaxTripCount == FullUnrollTripCount);
+      return ExplicitUnroll;
+    } else {
+      // The loop isn't that small, but we still can fully unroll it if that
+      // helps to remove a significant number of instructions.
+      // To check that, run additional analysis on the loop.
+      if (Optional<EstimatedUnrollCost> Cost = analyzeLoopUnrollCost(
+              L, FullUnrollTripCount, DT, SE, EphValues, TTI,
+              UP.Threshold * UP.MaxPercentThresholdBoost / 100,
+              UP.MaxIterationsCountToAnalyze)) {
+        unsigned Boost =
+            getFullUnrollBoostingFactor(*Cost, UP.MaxPercentThresholdBoost);
+        if (Cost->UnrolledCost < UP.Threshold * Boost / 100) {
+          UseUpperBound = (FullUnrollMaxTripCount == FullUnrollTripCount);
+          return ExplicitUnroll;
+        }
+      }
+    }
    }
  
    // 4th priority is loop peeling.
@@ -1000,31 +885,39 @@ bool llvm::computeUnrollCount(
      return ExplicitUnroll;
    }
  
-  // Before starting partial unrolling, set up.partial to true,
-  // if user explicitly asked  for unrolling
-  if (TripCount)
-    UP.Partial |= ExplicitUnroll;
-
    // 5th priority is partial unrolling.
    // Try partial unroll only when TripCount could be statically calculated.
-  UnrollFactor = shouldPartialUnroll(LoopSize, TripCount, UCE, UP);
-
-  if (UnrollFactor) {
-    UP.Count = *UnrollFactor;
-
-    if ((PragmaFullUnroll || PragmaEnableUnroll) && TripCount &&
-        UP.Count != TripCount)
-      ORE->emit([&]() {
-        return OptimizationRemarkMissed(DEBUG_TYPE,
-                                        "FullUnrollAsDirectedTooLarge",
-                                        L->getStartLoc(), L->getHeader())
-               << "Unable to fully unroll loop as directed by unroll pragma "
-                  "because "
-                  "unrolled size is too large.";
-      });
-
+  if (TripCount) {
+    UP.Partial |= ExplicitUnroll;
+    if (!UP.Partial) {
+      LLVM_DEBUG(dbgs() << "  will not try to unroll partially because "
+                        << "-unroll-allow-partial not given\n");
+      UP.Count = 0;
+      return false;
+    }
+    if (UP.Count == 0)
+      UP.Count = TripCount;
      if (UP.PartialThreshold != NoThreshold) {
-      if (UP.Count == 0) {
+      // Reduce unroll count to be modulo of TripCount for partial unrolling.
+      if (UCE.getUnrolledLoopSize(UP) > UP.PartialThreshold)
+        UP.Count =
+            (std::max(UP.PartialThreshold, UP.BEInsns + 1) - UP.BEInsns) /
+            (LoopSize - UP.BEInsns);
+      if (UP.Count > UP.MaxCount)
+        UP.Count = UP.MaxCount;
+      while (UP.Count != 0 && TripCount % UP.Count != 0)
+        UP.Count--;
+      if (UP.AllowRemainder && UP.Count <= 1) {
+        // If there is no Count that is modulo of TripCount, set Count to
+        // largest power-of-two factor that satisfies the threshold limit.
+        // As we'll create fixup loop, do the type of unrolling only if
+        // remainder loop is allowed.
+        UP.Count = UP.DefaultUnrollRuntimeCount;
+        while (UP.Count != 0 &&
+               UCE.getUnrolledLoopSize(UP) > UP.PartialThreshold)
+          UP.Count >>= 1;
+      }
+      if (UP.Count < 2) {
          if (PragmaEnableUnroll)
            ORE->emit([&]() {
              return OptimizationRemarkMissed(DEBUG_TYPE,
@@ -1034,8 +927,25 @@ bool llvm::computeUnrollCount(
                        "pragma "
                        "because unrolled size is too large.";
            });
+        UP.Count = 0;
        }
+    } else {
+      UP.Count = TripCount;
      }
+    if (UP.Count > UP.MaxCount)
+      UP.Count = UP.MaxCount;
+    if ((PragmaFullUnroll || PragmaEnableUnroll) && TripCount &&
+        UP.Count != TripCount)
+      ORE->emit([&]() {
+        return OptimizationRemarkMissed(DEBUG_TYPE,
+                                        "FullUnrollAsDirectedTooLarge",
+                                        L->getStartLoc(), L->getHeader())
+               << "Unable to fully unroll loop as directed by unroll pragma "
+                  "because "
+                  "unrolled size is too large.";
+      });
+    LLVM_DEBUG(dbgs() << "  partially unrolling with count: " << UP.Count
+                      << "\n");
      return ExplicitUnroll;
    }
    assert(TripCount == 0 &&
@@ -1072,6 +982,8 @@ bool llvm::computeUnrollCount(
          UP.AllowExpensiveTripCount = true;
      }
    }
+
+  // Reduce count based on the type of unrolling and the threshold values.
    UP.Runtime |= PragmaEnableUnroll || PragmaCount > 0 || UserUnrollCount;
    if (!UP.Runtime) {
      LLVM_DEBUG(
@@ -1106,7 +1018,7 @@ bool llvm::computeUnrollCount(
  
      using namespace ore;
  
-    if (unrollCountPragmaValue(L) > 0 && !UP.AllowRemainder)
+    if (PragmaCount > 0 && !UP.AllowRemainder)
        ORE->emit([&]() {
          return OptimizationRemarkMissed(DEBUG_TYPE,
                                          "DifferentUnrollCountFromDirected",
author	Geoffrey Martin-Noble <gcmn@google.com>
	Wed, 18 Aug 2021 18:36:25 +0000 (11:36 -0700)
committer	Geoffrey Martin-Noble <gcmn@google.com>
	Wed, 18 Aug 2021 18:38:48 +0000 (11:38 -0700)