Loop Unroll: add options and tweak to make Partial unrolling more useful
authorFiona Glaser <escha@apple.com>
Wed, 6 Apr 2016 16:57:25 +0000 (16:57 +0000)
committerFiona Glaser <escha@apple.com>
Wed, 6 Apr 2016 16:57:25 +0000 (16:57 +0000)
1. Add FullUnrollMaxCount option that works like MaxCount, but also limits
   the unroll count for fully unrolled loops. So if a loop has an iteration
   count over this, it won't fully unroll.
2. Add CLI options for MaxCount and the new option, so they can be tested
   (plus a test).
3. Make partial unrolling obey MaxCount.

An example use-case (the out of tree one this is originally designed for) is
a target’s TTI can analyze a loop and decide on a max unroll count separate
from the size threshold, e.g. based on register pressure, then constrain
LoopUnroll to not exceed that, regardless of the size of the unrolled loop.

llvm-svn: 265562

llvm/include/llvm/Analysis/TargetTransformInfo.h
llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp

index 57d0cf4..13e0729 100644 (file)
@@ -260,6 +260,10 @@ public:
     // (set to UINT_MAX to disable). This does not apply in cases where the
     // loop is being fully unrolled.
     unsigned MaxCount;
+    /// Set the maximum unrolling factor for full unrolling. Like MaxCount, but
+    /// applies even if full unrolling is selected. This allows a target to fall
+    /// back to Partial unrolling if full unrolling is above FullUnrollMaxCount.
+    unsigned FullUnrollMaxCount;
     /// Allow partial unrolling (unrolling of loops to expand the size of the
     /// loop body, not only to eliminate small constant-trip-count loops).
     bool Partial;
index e5a9513..01a8ad8 100644 (file)
@@ -65,6 +65,15 @@ UnrollCount("unroll-count", cl::Hidden,
   cl::desc("Use this unroll count for all loops including those with "
            "unroll_count pragma values, for testing purposes"));
 
+static cl::opt<unsigned>
+UnrollMaxCount("unroll-max-count", cl::Hidden,
+  cl::desc("Set the max unroll count for partial and runtime unrolling, for"
+           "testing purposes"));
+
+static cl::opt<unsigned>
+UnrollFullMaxCount("unroll-full-max-count", cl::Hidden,
+  cl::desc("Set the max unroll count for full unrolling, for testing purposes"));
+
 static cl::opt<bool>
 UnrollAllowPartial("unroll-allow-partial", cl::Hidden,
   cl::desc("Allows loops to be partially unrolled until "
@@ -107,6 +116,7 @@ static TargetTransformInfo::UnrollingPreferences gatherUnrollingPreferences(
   UP.PartialOptSizeThreshold = UP.OptSizeThreshold;
   UP.Count = 0;
   UP.MaxCount = UINT_MAX;
+  UP.FullUnrollMaxCount = UINT_MAX;
   UP.Partial = false;
   UP.Runtime = false;
   UP.AllowExpensiveTripCount = false;
@@ -138,6 +148,10 @@ static TargetTransformInfo::UnrollingPreferences gatherUnrollingPreferences(
     UP.DynamicCostSavingsDiscount = UnrollDynamicCostSavingsDiscount;
   if (UnrollCount.getNumOccurrences() > 0)
     UP.Count = UnrollCount;
+  if (UnrollMaxCount.getNumOccurrences() > 0)
+    UP.MaxCount = UnrollMaxCount;
+  if (UnrollFullMaxCount.getNumOccurrences() > 0)
+    UP.FullUnrollMaxCount = UnrollFullMaxCount;
   if (UnrollAllowPartial.getNumOccurrences() > 0)
     UP.Partial = UnrollAllowPartial;
   if (UnrollRuntime.getNumOccurrences() > 0)
@@ -566,6 +580,7 @@ static bool tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI,
     Count = TripCount == 0 ? DefaultUnrollRuntimeCount : TripCount;
   if (TripCount && Count > TripCount)
     Count = TripCount;
+  Count = std::min(Count, UP.FullUnrollMaxCount);
 
   unsigned NumInlineCandidates;
   bool NotDuplicatable;
@@ -633,10 +648,12 @@ static bool tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI,
                    << "-unroll-allow-partial not given\n");
       return false;
     }
-    if (UP.PartialThreshold != NoThreshold &&
-        UnrolledSize > UP.PartialThreshold) {
+    if (UP.PartialThreshold != NoThreshold && Count > 1) {
       // Reduce unroll count to be modulo of TripCount for partial unrolling.
-      Count = (std::max(UP.PartialThreshold, 3u) - 2) / (LoopSize - 2);
+      if (UnrolledSize > UP.PartialThreshold)
+        Count = (std::max(UP.PartialThreshold, 3u) - 2) / (LoopSize - 2);
+      if (Count > UP.MaxCount)
+        Count = UP.MaxCount;
       while (Count != 0 && TripCount % Count != 0)
         Count--;
       if (AllowRuntime && Count <= 1) {