/// pair of exact and max expressions that are eventually summarized in
/// ExitNotTakenInfo and BackedgeTakenInfo.
struct ExitLimit {
- const SCEV *ExactNotTaken;
- const SCEV *MaxNotTaken;
+ const SCEV *ExactNotTaken; //< The exit is not taken exactly this many times
+ const SCEV *MaxNotTaken; //< The exit is not taken at most this many times
+ bool MaxOrZero; //< Not taken either exactly MaxNotTaken or zero times
/// A set of predicate guards for this ExitLimit. The result is only valid
/// if all of the predicates in \c Predicates evaluate to 'true' at
Predicates.insert(P);
}
- /*implicit*/ ExitLimit(const SCEV *E) : ExactNotTaken(E), MaxNotTaken(E) {}
+ /*implicit*/ ExitLimit(const SCEV *E)
+ : ExactNotTaken(E), MaxNotTaken(E), MaxOrZero(false) {}
ExitLimit(
- const SCEV *E, const SCEV *M,
+ const SCEV *E, const SCEV *M, bool MaxOrZero,
ArrayRef<const SmallPtrSetImpl<const SCEVPredicate *> *> PredSetList)
- : ExactNotTaken(E), MaxNotTaken(M) {
+ : ExactNotTaken(E), MaxNotTaken(M), MaxOrZero(MaxOrZero) {
assert((isa<SCEVCouldNotCompute>(ExactNotTaken) ||
!isa<SCEVCouldNotCompute>(MaxNotTaken)) &&
"Exact is not allowed to be less precise than Max");
addPredicate(P);
}
- ExitLimit(const SCEV *E, const SCEV *M,
+ ExitLimit(const SCEV *E, const SCEV *M, bool MaxOrZero,
const SmallPtrSetImpl<const SCEVPredicate *> &PredSet)
- : ExitLimit(E, M, {&PredSet}) {}
+ : ExitLimit(E, M, MaxOrZero, {&PredSet}) {}
- ExitLimit(const SCEV *E, const SCEV *M) : ExitLimit(E, M, None) {}
+ ExitLimit(const SCEV *E, const SCEV *M, bool MaxOrZero)
+ : ExitLimit(E, M, MaxOrZero, None) {}
/// Test whether this ExitLimit contains any computed information, or
/// whether it's all SCEVCouldNotCompute values.
/// ExitNotTaken has an element for every exiting block in the loop.
PointerIntPair<const SCEV *, 1> MaxAndComplete;
+ /// True iff the backedge is taken either exactly Max or zero times.
+ bool MaxOrZero;
+
/// \name Helper projection functions on \c MaxAndComplete.
/// @{
bool isComplete() const { return MaxAndComplete.getInt(); }
/// Initialize BackedgeTakenInfo from a list of exact exit counts.
BackedgeTakenInfo(SmallVectorImpl<EdgeExitInfo> &&ExitCounts, bool Complete,
- const SCEV *MaxCount);
+ const SCEV *MaxCount, bool MaxOrZero);
/// Test whether this BackedgeTakenInfo contains any computed information,
/// or whether it's all SCEVCouldNotCompute values.
/// Get the max backedge taken count for the loop.
const SCEV *getMax(ScalarEvolution *SE) const;
+ /// Return true if the number of times this backedge is taken is either the
+ /// value returned by getMax or zero.
+ bool isMaxOrZero(ScalarEvolution *SE) const;
+
/// Return true if any backedge taken count expressions refer to the given
/// subexpression.
bool hasOperand(const SCEV *S, ScalarEvolution *SE) const;
/// that is known never to be less than the actual backedge taken count.
const SCEV *getMaxBackedgeTakenCount(const Loop *L);
+ /// Return true if the backedge taken count is either the value returned by
+ /// getMaxBackedgeTakenCount or zero.
+ bool isBackedgeTakenCountMaxOrZero(const Loop *L);
+
/// Return true if the specified loop has an analyzable loop-invariant
/// backedge-taken count.
bool hasLoopInvariantBackedgeTakenCount(const Loop *L);
bool UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool Force,
bool AllowRuntime, bool AllowExpensiveTripCount,
- bool UseUpperBound, unsigned TripMultiple, LoopInfo *LI,
- ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC,
+ bool PreserveCondBr, bool PreserveOnlyFirst,
+ unsigned TripMultiple, LoopInfo *LI, ScalarEvolution *SE,
+ DominatorTree *DT, AssumptionCache *AC,
OptimizationRemarkEmitter *ORE, bool PreserveLCSSA);
bool UnrollRuntimeLoopRemainder(Loop *L, unsigned Count,
return getBackedgeTakenInfo(L).getMax(this);
}
+bool ScalarEvolution::isBackedgeTakenCountMaxOrZero(const Loop *L) {
+ return getBackedgeTakenInfo(L).isMaxOrZero(this);
+}
+
/// Push PHI nodes in the header of the given loop onto the given Worklist.
static void
PushLoopPHIs(const Loop *L, SmallVectorImpl<Instruction *> &Worklist) {
return getMax();
}
+bool ScalarEvolution::BackedgeTakenInfo::isMaxOrZero(ScalarEvolution *SE) const {
+ auto PredicateNotAlwaysTrue = [](const ExitNotTakenInfo &ENT) {
+ return !ENT.hasAlwaysTruePredicate();
+ };
+ return MaxOrZero && !any_of(ExitNotTaken, PredicateNotAlwaysTrue);
+}
+
bool ScalarEvolution::BackedgeTakenInfo::hasOperand(const SCEV *S,
ScalarEvolution *SE) const {
if (getMax() && getMax() != SE->getCouldNotCompute() &&
ScalarEvolution::BackedgeTakenInfo::BackedgeTakenInfo(
SmallVectorImpl<ScalarEvolution::BackedgeTakenInfo::EdgeExitInfo>
&&ExitCounts,
- bool Complete, const SCEV *MaxCount)
- : MaxAndComplete(MaxCount, Complete) {
+ bool Complete, const SCEV *MaxCount, bool MaxOrZero)
+ : MaxAndComplete(MaxCount, Complete), MaxOrZero(MaxOrZero) {
typedef ScalarEvolution::BackedgeTakenInfo::EdgeExitInfo EdgeExitInfo;
ExitNotTaken.reserve(ExitCounts.size());
std::transform(
BasicBlock *Latch = L->getLoopLatch(); // may be NULL.
const SCEV *MustExitMaxBECount = nullptr;
const SCEV *MayExitMaxBECount = nullptr;
+ bool MustExitMaxOrZero = false;
// Compute the ExitLimit for each loop exit. Use this to populate ExitCounts
// and compute maxBECount.
// computable EL.MaxNotTaken.
if (EL.MaxNotTaken != getCouldNotCompute() && Latch &&
DT.dominates(ExitBB, Latch)) {
- if (!MustExitMaxBECount)
+ if (!MustExitMaxBECount) {
MustExitMaxBECount = EL.MaxNotTaken;
- else {
+ MustExitMaxOrZero = EL.MaxOrZero;
+ } else {
MustExitMaxBECount =
getUMinFromMismatchedTypes(MustExitMaxBECount, EL.MaxNotTaken);
}
}
const SCEV *MaxBECount = MustExitMaxBECount ? MustExitMaxBECount :
(MayExitMaxBECount ? MayExitMaxBECount : getCouldNotCompute());
+ // The loop backedge will be taken the maximum or zero times if there's
+ // a single exit that must be taken the maximum or zero times.
+ bool MaxOrZero = (MustExitMaxOrZero && ExitingBlocks.size() == 1);
return BackedgeTakenInfo(std::move(ExitCounts), CouldComputeBECount,
- MaxBECount);
+ MaxBECount, MaxOrZero);
}
ScalarEvolution::ExitLimit
!isa<SCEVCouldNotCompute>(BECount))
MaxBECount = BECount;
- return ExitLimit(BECount, MaxBECount, {&EL0.Predicates, &EL1.Predicates});
+ return ExitLimit(BECount, MaxBECount, false,
+ {&EL0.Predicates, &EL1.Predicates});
}
if (BO->getOpcode() == Instruction::Or) {
// Recurse on the operands of the or.
BECount = EL0.ExactNotTaken;
}
- return ExitLimit(BECount, MaxBECount, {&EL0.Predicates, &EL1.Predicates});
+ return ExitLimit(BECount, MaxBECount, false,
+ {&EL0.Predicates, &EL1.Predicates});
}
}
unsigned BitWidth = getTypeSizeInBits(RHS->getType());
const SCEV *UpperBound =
getConstant(getEffectiveSCEVType(RHS->getType()), BitWidth);
- return ExitLimit(getCouldNotCompute(), UpperBound);
+ return ExitLimit(getCouldNotCompute(), UpperBound, false);
}
return getCouldNotCompute();
// should not accept a root of 2.
const SCEV *Val = AddRec->evaluateAtIteration(R1, *this);
if (Val->isZero())
- return ExitLimit(R1, R1, Predicates); // We found a quadratic root!
+ // We found a quadratic root!
+ return ExitLimit(R1, R1, false, Predicates);
}
}
return getCouldNotCompute();
else
MaxBECount = getConstant(CountDown ? CR.getUnsignedMax()
: -CR.getUnsignedMin());
- return ExitLimit(Distance, MaxBECount, Predicates);
+ return ExitLimit(Distance, MaxBECount, false, Predicates);
}
// As a special case, handle the instance where Step is a positive power of
const SCEV *Limit =
getZeroExtendExpr(getTruncateExpr(ModuloResult, NarrowTy), WideTy);
- return ExitLimit(Limit, Limit, Predicates);
+ return ExitLimit(Limit, Limit, false, Predicates);
}
}
loopHasNoAbnormalExits(AddRec->getLoop())) {
const SCEV *Exact =
getUDivExpr(Distance, CountDown ? getNegativeSCEV(Step) : Step);
- return ExitLimit(Exact, Exact, Predicates);
+ return ExitLimit(Exact, Exact, false, Predicates);
}
// Then, try to solve the above equation provided that Start is constant.
if (const SCEVConstant *StartC = dyn_cast<SCEVConstant>(Start)) {
const SCEV *E = SolveLinEquationWithOverflow(
StepC->getValue()->getValue(), -StartC->getValue()->getValue(), *this);
- return ExitLimit(E, E, Predicates);
+ return ExitLimit(E, E, false, Predicates);
}
return getCouldNotCompute();
}
}
const SCEV *MaxBECount;
+ bool MaxOrZero = false;
if (isa<SCEVConstant>(BECount))
MaxBECount = BECount;
- else if (isa<SCEVConstant>(BECountIfBackedgeTaken))
+ else if (isa<SCEVConstant>(BECountIfBackedgeTaken)) {
// If we know exactly how many times the backedge will be taken if it's
// taken at least once, then the backedge count will either be that or
// zero.
MaxBECount = BECountIfBackedgeTaken;
- else {
+ MaxOrZero = true;
+ } else {
// Calculate the maximum backedge count based on the range of values
// permitted by Start, End, and Stride.
APInt MinStart = IsSigned ? getSignedRange(Start).getSignedMin()
if (isa<SCEVCouldNotCompute>(MaxBECount))
MaxBECount = BECount;
- return ExitLimit(BECount, MaxBECount, Predicates);
+ return ExitLimit(BECount, MaxBECount, MaxOrZero, Predicates);
}
ScalarEvolution::ExitLimit
if (isa<SCEVCouldNotCompute>(MaxBECount))
MaxBECount = BECount;
- return ExitLimit(BECount, MaxBECount, Predicates);
+ return ExitLimit(BECount, MaxBECount, false, Predicates);
}
const SCEV *SCEVAddRecExpr::getNumIterationsInRange(const ConstantRange &Range,
if (!isa<SCEVCouldNotCompute>(SE->getMaxBackedgeTakenCount(L))) {
OS << "max backedge-taken count is " << *SE->getMaxBackedgeTakenCount(L);
+ if (SE->isBackedgeTakenCountMaxOrZero(L))
+ OS << ", actual taken count either this or zero.";
} else {
OS << "Unpredictable max backedge-taken count. ";
}
if (Convergent)
UP.AllowRemainder = false;
- // Try to find the trip count upper bound if it is allowed and we cannot find
- // exact trip count.
- if (UP.UpperBound) {
- if (!TripCount) {
- MaxTripCount = SE->getSmallConstantMaxTripCount(L);
- // Only unroll with small upper bound.
- if (MaxTripCount > UnrollMaxUpperBound)
- MaxTripCount = 0;
+ // Try to find the trip count upper bound if we cannot find the exact trip
+ // count.
+ bool MaxOrZero = false;
+ if (!TripCount) {
+ MaxTripCount = SE->getSmallConstantMaxTripCount(L);
+ MaxOrZero = SE->isBackedgeTakenCountMaxOrZero(L);
+ // We can unroll by the upper bound amount if it's generally allowed or if
+ // we know that the loop is executed either the upper bound or zero times.
+ // (MaxOrZero unrolling keeps only the first loop test, so the number of
+ // loop tests remains the same compared to the non-unrolled version, whereas
+ // the generic upper bound unrolling keeps all but the last loop test so the
+ // number of loop tests goes up which may end up being worse on targets with
+ // constriained branch predictor resources so is controlled by an option.)
+ // In addition we only unroll small upper bounds.
+ if (!(UP.UpperBound || MaxOrZero) || MaxTripCount > UnrollMaxUpperBound) {
+ MaxTripCount = 0;
}
}
// Unroll the loop.
if (!UnrollLoop(L, UP.Count, TripCount, UP.Force, UP.Runtime,
- UP.AllowExpensiveTripCount, UseUpperBound, TripMultiple, LI,
- SE, &DT, &AC, &ORE, PreserveLCSSA))
+ UP.AllowExpensiveTripCount, UseUpperBound, MaxOrZero,
+ TripMultiple, LI, SE, &DT, &AC, &ORE, PreserveLCSSA))
return false;
// If loop has an unroll count pragma or unrolled by explicitly set count
///
/// PreserveCondBr indicates whether the conditional branch of the LatchBlock
/// needs to be preserved. It is needed when we use trip count upper bound to
-/// fully unroll the loop.
+/// fully unroll the loop. If PreserveOnlyFirst is also set then only the first
+/// conditional branch needs to be preserved.
///
/// Similarly, TripMultiple divides the number of times that the LatchBlock may
/// execute without exiting the loop.
/// DominatorTree if they are non-null.
bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool Force,
bool AllowRuntime, bool AllowExpensiveTripCount,
- bool PreserveCondBr, unsigned TripMultiple, LoopInfo *LI,
- ScalarEvolution *SE, DominatorTree *DT,
- AssumptionCache *AC, OptimizationRemarkEmitter *ORE,
- bool PreserveLCSSA) {
+ bool PreserveCondBr, bool PreserveOnlyFirst,
+ unsigned TripMultiple, LoopInfo *LI, ScalarEvolution *SE,
+ DominatorTree *DT, AssumptionCache *AC,
+ OptimizationRemarkEmitter *ORE, bool PreserveLCSSA) {
BasicBlock *Preheader = L->getLoopPreheader();
if (!Preheader) {
DEBUG(dbgs() << " Can't unroll; loop preheader-insertion failed.\n");
assert(NeedConditional &&
"NeedCondition cannot be modified by both complete "
"unrolling and runtime unrolling");
- NeedConditional = (PreserveCondBr && j);
+ NeedConditional = (PreserveCondBr && j && !(PreserveOnlyFirst && i != 0));
} else if (j != BreakoutTrip && (TripMultiple == 0 || j % TripMultiple != 0)) {
// If we know the trip count or a multiple of it, we can safely use an
// unconditional branch for some iterations.
; CHECK-LABEL: Determining loop execution counts for: @u_0
; CHECK-NEXT: Loop %loop: backedge-taken count is (-100 + (-1 * %rhs) + ((100 + %rhs) umax %rhs))
-; CHECK-NEXT: Loop %loop: max backedge-taken count is -100
+; CHECK-NEXT: Loop %loop: max backedge-taken count is -100, actual taken count either this or zero.
leave:
ret void
; CHECK-LABEL: Determining loop execution counts for: @u_1
; CHECK-NEXT: Loop %loop: backedge-taken count is ((-1 * %start) + ((-100 + %start) umax %start))
-; CHECK-NEXT: Loop %loop: max backedge-taken count is -100
+; CHECK-NEXT: Loop %loop: max backedge-taken count is -100, actual taken count either this or zero.
leave:
ret void
; CHECK-LABEL: Determining loop execution counts for: @s_0
; CHECK-NEXT: Loop %loop: backedge-taken count is (-100 + (-1 * %rhs) + ((100 + %rhs) smax %rhs))
-; CHECK-NEXT: Loop %loop: max backedge-taken count is -100
+; CHECK-NEXT: Loop %loop: max backedge-taken count is -100, actual taken count either this or zero.
leave:
ret void
; CHECK-LABEL: Determining loop execution counts for: @s_1
; CHECK-NEXT: Loop %loop: backedge-taken count is ((-1 * %start) + ((-100 + %start) smax %start))
-; CHECK-NEXT: Loop %loop: max backedge-taken count is -100
+; CHECK-NEXT: Loop %loop: max backedge-taken count is -100, actual taken count either this or zero.
leave:
ret void
; CHECK-LABEL: Determining loop execution counts for: @s32_max1
; CHECK-NEXT: Loop %do.body: backedge-taken count is ((-1 * %n) + ((1 + %n) smax %n))
-; CHECK-NEXT: Loop %do.body: max backedge-taken count is 1
+; CHECK-NEXT: Loop %do.body: max backedge-taken count is 1, actual taken count either this or zero.
do.end:
ret void
; CHECK-LABEL: Determining loop execution counts for: @s32_max2
; CHECK-NEXT: Loop %do.body: backedge-taken count is ((-1 * %n) + ((2 + %n) smax %n))
-; CHECK-NEXT: Loop %do.body: max backedge-taken count is 2
+; CHECK-NEXT: Loop %do.body: max backedge-taken count is 2, actual taken count either this or zero.
do.end:
ret void
; CHECK-LABEL: Determining loop execution counts for: @s32_maxx
; CHECK-NEXT: Loop %do.body: backedge-taken count is ((-1 * %n) + ((%n + %x) smax %n))
-; CHECK-NEXT: Loop %do.body: max backedge-taken count is -1
+; CHECK-NEXT: Loop %do.body: max backedge-taken count is -1{{$}}
do.end:
ret void
; CHECK-LABEL: Determining loop execution counts for: @s32_max2_unpredictable_exit
; CHECK-NEXT: Loop %do.body: <multiple exits> Unpredictable backedge-taken count.
-; CHECK-NEXT: Loop %do.body: max backedge-taken count is 2
+; CHECK-NEXT: Loop %do.body: max backedge-taken count is 2{{$}}
do.end:
ret void
; CHECK-LABEL: Determining loop execution counts for: @u32_max1
; CHECK-NEXT: Loop %do.body: backedge-taken count is ((-1 * %n) + ((1 + %n) umax %n))
-; CHECK-NEXT: Loop %do.body: max backedge-taken count is 1
+; CHECK-NEXT: Loop %do.body: max backedge-taken count is 1, actual taken count either this or zero.
do.end:
ret void
; CHECK-LABEL: Determining loop execution counts for: @u32_max2
; CHECK-NEXT: Loop %do.body: backedge-taken count is ((-1 * %n) + ((2 + %n) umax %n))
-; CHECK-NEXT: Loop %do.body: max backedge-taken count is 2
+; CHECK-NEXT: Loop %do.body: max backedge-taken count is 2, actual taken count either this or zero.
do.end:
ret void
; CHECK-LABEL: Determining loop execution counts for: @u32_maxx
; CHECK-NEXT: Loop %do.body: backedge-taken count is ((-1 * %n) + ((%n + %x) umax %n))
-; CHECK-NEXT: Loop %do.body: max backedge-taken count is -1
+; CHECK-NEXT: Loop %do.body: max backedge-taken count is -1{{$}}
do.end:
ret void
; CHECK-LABEL: Determining loop execution counts for: @u32_max2_unpredictable_exit
; CHECK-NEXT: Loop %do.body: <multiple exits> Unpredictable backedge-taken count.
-; CHECK-NEXT: Loop %do.body: max backedge-taken count is 2
+; CHECK-NEXT: Loop %do.body: max backedge-taken count is 2{{$}}
do.end:
ret void
--- /dev/null
+; RUN: opt -S -loop-unroll < %s | FileCheck %s
+
+; Unroll twice, with first loop exit kept
+; CHECK-LABEL: @s32_max1
+; CHECK: do.body:
+; CHECK: store
+; CHECK: br i1 %cmp, label %do.body.1, label %do.end
+; CHECK: do.end:
+; CHECK: ret void
+; CHECK: do.body.1:
+; CHECK: store
+; CHECK: br label %do.end
+define void @s32_max1(i32 %n, i32* %p) {
+entry:
+ %add = add i32 %n, 1
+ br label %do.body
+
+do.body:
+ %i.0 = phi i32 [ %n, %entry ], [ %inc, %do.body ]
+ %arrayidx = getelementptr i32, i32* %p, i32 %i.0
+ store i32 %i.0, i32* %arrayidx, align 4
+ %inc = add i32 %i.0, 1
+ %cmp = icmp slt i32 %i.0, %add
+ br i1 %cmp, label %do.body, label %do.end ; taken either 0 or 1 times
+
+do.end:
+ ret void
+}
+
+; Unroll thrice, with first loop exit kept
+; CHECK-LABEL: @s32_max2
+; CHECK: do.body:
+; CHECK: store
+; CHECK: br i1 %cmp, label %do.body.1, label %do.end
+; CHECK: do.end:
+; CHECK: ret void
+; CHECK: do.body.1:
+; CHECK: store
+; CHECK: store
+; CHECK: br label %do.end
+define void @s32_max2(i32 %n, i32* %p) {
+entry:
+ %add = add i32 %n, 2
+ br label %do.body
+
+do.body:
+ %i.0 = phi i32 [ %n, %entry ], [ %inc, %do.body ]
+ %arrayidx = getelementptr i32, i32* %p, i32 %i.0
+ store i32 %i.0, i32* %arrayidx, align 4
+ %inc = add i32 %i.0, 1
+ %cmp = icmp slt i32 %i.0, %add
+ br i1 %cmp, label %do.body, label %do.end ; taken either 0 or 2 times
+
+do.end:
+ ret void
+}
+
+; Should not be unrolled
+; CHECK-LABEL: @s32_maxx
+; CHECK: do.body:
+; CHECK: do.end:
+; CHECK-NOT: do.body.1:
+define void @s32_maxx(i32 %n, i32 %x, i32* %p) {
+entry:
+ %add = add i32 %x, %n
+ br label %do.body
+
+do.body:
+ %i.0 = phi i32 [ %n, %entry ], [ %inc, %do.body ]
+ %arrayidx = getelementptr i32, i32* %p, i32 %i.0
+ store i32 %i.0, i32* %arrayidx, align 4
+ %inc = add i32 %i.0, 1
+ %cmp = icmp slt i32 %i.0, %add
+ br i1 %cmp, label %do.body, label %do.end ; taken either 0 or x times
+
+do.end:
+ ret void
+}
+
+; Should not be unrolled
+; CHECK-LABEL: @s32_max2_unpredictable_exit
+; CHECK: do.body:
+; CHECK: do.end:
+; CHECK-NOT: do.body.1:
+define void @s32_max2_unpredictable_exit(i32 %n, i32 %x, i32* %p) {
+entry:
+ %add = add i32 %n, 2
+ br label %do.body
+
+do.body:
+ %i.0 = phi i32 [ %n, %entry ], [ %inc, %if.end ]
+ %cmp = icmp eq i32 %i.0, %x
+ br i1 %cmp, label %do.end, label %if.end ; unpredictable
+
+if.end:
+ %arrayidx = getelementptr i32, i32* %p, i32 %i.0
+ store i32 %i.0, i32* %arrayidx, align 4
+ %inc = add i32 %i.0, 1
+ %cmp1 = icmp slt i32 %i.0, %add
+ br i1 %cmp1, label %do.body, label %do.end ; taken either 0 or 2 times
+
+do.end:
+ ret void
+}
+
+; Unroll twice, with first loop exit kept
+; CHECK-LABEL: @u32_max1
+; CHECK: do.body:
+; CHECK: store
+; CHECK: br i1 %cmp, label %do.body.1, label %do.end
+; CHECK: do.end:
+; CHECK: ret void
+; CHECK: do.body.1:
+; CHECK: store
+; CHECK: br label %do.end
+define void @u32_max1(i32 %n, i32* %p) {
+entry:
+ %add = add i32 %n, 1
+ br label %do.body
+
+do.body:
+ %i.0 = phi i32 [ %n, %entry ], [ %inc, %do.body ]
+ %arrayidx = getelementptr i32, i32* %p, i32 %i.0
+ store i32 %i.0, i32* %arrayidx, align 4
+ %inc = add i32 %i.0, 1
+ %cmp = icmp ult i32 %i.0, %add
+ br i1 %cmp, label %do.body, label %do.end ; taken either 0 or 1 times
+
+do.end:
+ ret void
+}
+
+; Unroll thrice, with first loop exit kept
+; CHECK-LABEL: @u32_max2
+; CHECK: do.body:
+; CHECK: store
+; CHECK: br i1 %cmp, label %do.body.1, label %do.end
+; CHECK: do.end:
+; CHECK: ret void
+; CHECK: do.body.1:
+; CHECK: store
+; CHECK: store
+; CHECK: br label %do.end
+define void @u32_max2(i32 %n, i32* %p) {
+entry:
+ %add = add i32 %n, 2
+ br label %do.body
+
+do.body:
+ %i.0 = phi i32 [ %n, %entry ], [ %inc, %do.body ]
+ %arrayidx = getelementptr i32, i32* %p, i32 %i.0
+ store i32 %i.0, i32* %arrayidx, align 4
+ %inc = add i32 %i.0, 1
+ %cmp = icmp ult i32 %i.0, %add
+ br i1 %cmp, label %do.body, label %do.end ; taken either 0 or 2 times
+
+do.end:
+ ret void
+}
+
+; Should not be unrolled
+; CHECK-LABEL: @u32_maxx
+; CHECK: do.body:
+; CHECK: do.end:
+; CHECK-NOT: do.body.1:
+define void @u32_maxx(i32 %n, i32 %x, i32* %p) {
+entry:
+ %add = add i32 %x, %n
+ br label %do.body
+
+do.body:
+ %i.0 = phi i32 [ %n, %entry ], [ %inc, %do.body ]
+ %arrayidx = getelementptr i32, i32* %p, i32 %i.0
+ store i32 %i.0, i32* %arrayidx, align 4
+ %inc = add i32 %i.0, 1
+ %cmp = icmp ult i32 %i.0, %add
+ br i1 %cmp, label %do.body, label %do.end ; taken either 0 or x times
+
+do.end:
+ ret void
+}
+
+; Should not be unrolled
+; CHECK-LABEL: @u32_max2_unpredictable_exit
+; CHECK: do.body:
+; CHECK: do.end:
+; CHECK-NOT: do.body.1:
+define void @u32_max2_unpredictable_exit(i32 %n, i32 %x, i32* %p) {
+entry:
+ %add = add i32 %n, 2
+ br label %do.body
+
+do.body:
+ %i.0 = phi i32 [ %n, %entry ], [ %inc, %if.end ]
+ %cmp = icmp eq i32 %i.0, %x
+ br i1 %cmp, label %do.end, label %if.end ; unpredictable
+
+if.end:
+ %arrayidx = getelementptr i32, i32* %p, i32 %i.0
+ store i32 %i.0, i32* %arrayidx, align 4
+ %inc = add i32 %i.0, 1
+ %cmp1 = icmp ult i32 %i.0, %add
+ br i1 %cmp1, label %do.body, label %do.end ; taken either 0 or 2 times
+
+do.end:
+ ret void
+}