bool Force;
/// Allow using trip count upper bound to unroll loops.
bool UpperBound;
- /// Allow peeling off loop iterations for loops with low dynamic tripcount.
+ /// Allow peeling off loop iterations.
bool AllowPeeling;
/// Allow unrolling of all the iterations of the runtime loop remainder.
bool UnrollRemainder;
/// Allow unroll and jam. Used to enable unroll and jam for the target.
bool UnrollAndJam;
+ /// Allow peeling basing on profile. Uses to enable peeling off all
+ /// iterations basing on provided profile.
+ /// If the value is true the peeling cost model can decide to peel only
+ /// some iterations and in this case it will set this to false.
+ bool PeelProfiledIterations;
/// Threshold for unroll and jam, for inner loop size. The 'Threshold'
/// value above is used during unroll and jam for the outer loop size.
/// This value is used in the same manner to limit the size of the inner
UP.UpperBound = false;
UP.AllowPeeling = true;
UP.UnrollAndJam = false;
+ UP.PeelProfiledIterations = true;
UP.UnrollAndJamInnerLoopThreshold = 60;
// Override with any target specific settings
// If the loop was peeled, we already "used up" the profile information
// we had, so we don't want to unroll or peel again.
if (UnrollResult != LoopUnrollResult::FullyUnrolled &&
- (IsCountSetExplicitly || UP.PeelCount))
+ (IsCountSetExplicitly || (UP.PeelProfiledIterations && UP.PeelCount)))
L->setLoopAlreadyUnrolled();
return UnrollResult;
"unroll-peel-multi-deopt-exit", cl::init(true), cl::Hidden,
cl::desc("Allow peeling of loops with multiple deopt exits."));
+static const char *PeeledCountMetaData = "llvm.loop.peeled.count";
+
// Designates that a Phi is estimated to become invariant after an "infinite"
// number of loop iterations (i.e. only may become an invariant if the loop is
// fully unrolled).
LLVM_DEBUG(dbgs() << "Force-peeling first " << UnrollForcePeelCount
<< " iterations.\n");
UP.PeelCount = UnrollForcePeelCount;
+ UP.PeelProfiledIterations = true;
return;
}
if (!UP.AllowPeeling)
return;
+ unsigned AlreadyPeeled = 0;
+ if (auto Peeled = getOptionalIntLoopAttribute(L, PeeledCountMetaData))
+ AlreadyPeeled = *Peeled;
+ // Stop if we already peeled off the maximum number of iterations.
+ if (AlreadyPeeled >= UnrollPeelMaxCount)
+ return;
+
// Here we try to get rid of Phis which become invariants after 1, 2, ..., N
// iterations of the loop. For this we compute the number for iterations after
// which every Phi is guaranteed to become an invariant, and try to peel the
DesiredPeelCount = std::min(DesiredPeelCount, MaxPeelCount);
// Consider max peel count limitation.
assert(DesiredPeelCount > 0 && "Wrong loop size estimation?");
- LLVM_DEBUG(dbgs() << "Peel " << DesiredPeelCount
- << " iteration(s) to turn"
- << " some Phis into invariants.\n");
- UP.PeelCount = DesiredPeelCount;
- return;
+ if (DesiredPeelCount + AlreadyPeeled <= UnrollPeelMaxCount) {
+ LLVM_DEBUG(dbgs() << "Peel " << DesiredPeelCount
+ << " iteration(s) to turn"
+ << " some Phis into invariants.\n");
+ UP.PeelCount = DesiredPeelCount;
+ UP.PeelProfiledIterations = false;
+ return;
+ }
}
}
if (TripCount)
return;
+ // Do not apply profile base peeling if it is disabled.
+ if (!UP.PeelProfiledIterations)
+ return;
// If we don't know the trip count, but have reason to believe the average
// trip count is low, peeling should be beneficial, since we will usually
// hit the peeled section.
<< "\n");
if (*PeelCount) {
- if ((*PeelCount <= UnrollPeelMaxCount) &&
+ if ((*PeelCount + AlreadyPeeled <= UnrollPeelMaxCount) &&
(LoopSize * (*PeelCount + 1) <= UP.Threshold)) {
LLVM_DEBUG(dbgs() << "Peeling first " << *PeelCount
<< " iterations.\n");
return;
}
LLVM_DEBUG(dbgs() << "Requested peel count: " << *PeelCount << "\n");
+ LLVM_DEBUG(dbgs() << "Already peel count: " << AlreadyPeeled << "\n");
LLVM_DEBUG(dbgs() << "Max peel count: " << UnrollPeelMaxCount << "\n");
LLVM_DEBUG(dbgs() << "Peel cost: " << LoopSize * (*PeelCount + 1)
<< "\n");
NumPeeled++;
+ // Update Metadata for count of peeled off iterations.
+ unsigned AlreadyPeeled = 0;
+ if (auto Peeled = getOptionalIntLoopAttribute(L, PeeledCountMetaData))
+ AlreadyPeeled = *Peeled;
+ addStringMetadataToLoop(L, PeeledCountMetaData, AlreadyPeeled + PeelCount);
+
return true;
}
--- /dev/null
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -S -loop-unroll -loop-unroll -verify-dom-info -debug-only=loop-unroll -unroll-peel-max-count=7 2>&1 | FileCheck %s
+; REQUIRES: asserts
+
+declare void @f1()
+declare void @f2()
+
+; Check that we can peel off iterations that make conditions true.
+; The second invocation of loop-unroll will do profile based peeling of
+; remained iterations.
+define void @test1(i32 %k) !prof !4 {
+; CHECK: Loop Unroll: F[test1] Loop %for.body
+; CHECK: PEELING loop %for.body with iteration count 2!
+; CHECK: PEELING loop %for.body with iteration count 4!
+; CHECK: llvm.loop.unroll.disable
+for.body.lr.ph:
+ br label %for.body
+
+for.body:
+ %i.05 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.inc ]
+ %cmp1 = icmp ult i32 %i.05, 2
+ br i1 %cmp1, label %if.then, label %if.else
+
+if.then:
+ call void @f1()
+ br label %for.inc
+
+if.else:
+ call void @f2()
+ br label %for.inc
+
+for.inc:
+ %inc = add nsw i32 %i.05, 1
+ %cmp = icmp slt i32 %inc, %k
+ br i1 %cmp, label %for.body, label %for.end, !llvm.loop !1, !prof !2
+
+for.end:
+ ret void
+}
+
+!1 = distinct !{!1}
+!2 = !{!"branch_weights", i32 6, i32 1}
+!4 = !{!"function_entry_count", i64 1}
--- /dev/null
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -S -loop-unroll -loop-unroll -verify-dom-info -debug-only=loop-unroll -unroll-peel-max-count=7 2>&1 | FileCheck %s
+; REQUIRES: asserts
+
+declare void @f1()
+declare void @f2()
+
+; Check that we can peel off iterations that make conditions true.
+; The second invocation of loop-unroll will NOT do profile based peeling of
+; remained iterations because the total number of peeled iterations exceeds
+; threashold specified with -unroll-peel-max-count=7.
+define void @test2(i32 %k) !prof !4 {
+; CHECK: Loop Unroll: F[test2] Loop %for.body
+; CHECK: PEELING loop %for.body with iteration count 2!
+; CHECK-NOT: llvm.loop.unroll.disable
+for.body.lr.ph:
+ br label %for.body
+
+for.body:
+ %i.05 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.inc ]
+ %cmp1 = icmp ult i32 %i.05, 2
+ br i1 %cmp1, label %if.then, label %if.else
+
+if.then:
+ call void @f1()
+ br label %for.inc
+
+if.else:
+ call void @f2()
+ br label %for.inc
+
+for.inc:
+ %inc = add nsw i32 %i.05, 1
+ %cmp = icmp slt i32 %inc, %k
+ br i1 %cmp, label %for.body, label %for.end, !llvm.loop !1, !prof !3
+
+for.end:
+ ret void
+}
+
+!1 = distinct !{!1}
+!3 = !{!"branch_weights", i32 8, i32 1}
+!4 = !{!"function_entry_count", i64 1}
for.end:
ret void
}
+; CHECK-NOT: llvm.loop.unroll.disable
\ No newline at end of file