/// which have the exact same opcode and finds all inputs which are loop
/// invariant. For some operations these can be re-associated and unswitched out
/// of the loop entirely.
-static SmallVector<Value *, 4>
+static TinyPtrVector<Value *>
collectHomogenousInstGraphLoopInvariants(Loop &L, Instruction &Root,
LoopInfo &LI) {
- SmallVector<Value *, 4> Invariants;
assert(!L.isLoopInvariant(&Root) &&
"Only need to walk the graph if root itself is not invariant.");
+ TinyPtrVector<Value *> Invariants;
// Build a worklist and recurse through operators collecting invariants.
SmallVector<Instruction *, 4> Worklist;
llvm_unreachable("Basic blocks should never be empty!");
}
+/// Insert code to test a set of loop invariant values, and conditionally branch
+/// on them.
+static void buildPartialUnswitchConditionalBranch(BasicBlock &BB,
+ ArrayRef<Value *> Invariants,
+ bool Direction,
+ BasicBlock &UnswitchedSucc,
+ BasicBlock &NormalSucc) {
+ IRBuilder<> IRB(&BB);
+ Value *Cond = Invariants.front();
+ for (Value *Invariant :
+ make_range(std::next(Invariants.begin()), Invariants.end()))
+ if (Direction)
+ Cond = IRB.CreateOr(Cond, Invariant);
+ else
+ Cond = IRB.CreateAnd(Cond, Invariant);
+
+ IRB.CreateCondBr(Cond, Direction ? &UnswitchedSucc : &NormalSucc,
+ Direction ? &NormalSucc : &UnswitchedSucc);
+}
+
/// Rewrite the PHI nodes in an unswitched loop exit basic block.
///
/// Requires that the loop exit and unswitched basic block are the same, and
LLVM_DEBUG(dbgs() << " Trying to unswitch branch: " << BI << "\n");
// The loop invariant values that we want to unswitch.
- SmallVector<Value *, 4> Invariants;
+ TinyPtrVector<Value *> Invariants;
// When true, we're fully unswitching the branch rather than just unswitching
// some input conditions to the branch.
} else {
// Only unswitching a subset of inputs to the condition, so we will need to
// build a new branch that merges the invariant inputs.
- IRBuilder<> IRB(OldPH);
- Value *Cond = Invariants.front();
if (ExitDirection)
assert(cast<Instruction>(BI.getCondition())->getOpcode() ==
Instruction::Or &&
assert(cast<Instruction>(BI.getCondition())->getOpcode() ==
Instruction::And &&
"Must have an `and` of `i1`s for the condition!");
- for (Value *Invariant :
- make_range(std::next(Invariants.begin()), Invariants.end()))
- if (ExitDirection)
- Cond = IRB.CreateOr(Cond, Invariant);
- else
- Cond = IRB.CreateAnd(Cond, Invariant);
-
- BasicBlock *Succs[2];
- Succs[LoopExitSuccIdx] = UnswitchedBB;
- Succs[1 - LoopExitSuccIdx] = NewPH;
- IRB.CreateCondBr(Cond, Succs[0], Succs[1]);
+ buildPartialUnswitchConditionalBranch(*OldPH, Invariants, ExitDirection,
+ *UnswitchedBB, *NewPH);
}
// Rewrite the relevant PHI nodes.
/// Once unswitching has been performed it runs the provided callback to report
/// the new loops and no-longer valid loops to the caller.
static bool unswitchInvariantBranch(
- Loop &L, BranchInst &BI, DominatorTree &DT, LoopInfo &LI,
- AssumptionCache &AC,
+ Loop &L, BranchInst &BI, ArrayRef<Value *> Invariants, DominatorTree &DT,
+ LoopInfo &LI, AssumptionCache &AC,
function_ref<void(bool, ArrayRef<Loop *>)> UnswitchCB) {
- assert(BI.isConditional() && "Can only unswitch a conditional branch!");
- assert(L.isLoopInvariant(BI.getCondition()) &&
- "Can only unswitch an invariant branch condition!");
-
- // Constant and BBs tracking the cloned and continuing successor.
- const int ClonedSucc = 0;
auto *ParentBB = BI.getParent();
+
+ // We can only unswitch conditional branches with an invariant condition or
+ // combining invariant conditions with an instruction.
+ assert(BI.isConditional() && "Can only unswitch a conditional branch!");
+ bool FullUnswitch = BI.getCondition() == Invariants[0];
+ if (FullUnswitch)
+ assert(Invariants.size() == 1 &&
+ "Cannot have other invariants with full unswitching!");
+ else
+ assert(isa<Instruction>(BI.getCondition()) &&
+ "Partial unswitching requires an instruction as the condition!");
+
+ // Constant and BBs tracking the cloned and continuing successor. When we are
+ // unswitching the entire condition, this can just be trivially chosen to
+ // unswitch towards `true`. However, when we are unswitching a set of
+ // invariants combined with `and` or `or`, the combining operation determines
+ // the best direction to unswitch: we want to unswitch the direction that will
+ // collapse the branch.
+ bool Direction = true;
+ int ClonedSucc = 0;
+ if (!FullUnswitch) {
+ if (cast<Instruction>(BI.getCondition())->getOpcode() != Instruction::Or) {
+ assert(cast<Instruction>(BI.getCondition())->getOpcode() == Instruction::And &&
+ "Only `or` and `and` instructions can combine invariants being unswitched.");
+ Direction = false;
+ ClonedSucc = 1;
+ }
+ }
auto *UnswitchedSuccBB = BI.getSuccessor(ClonedSucc);
auto *ContinueSuccBB = BI.getSuccessor(1 - ClonedSucc);
return true;
});
}
- // Similarly, if the edge we *are* cloning in the unswitch (the unswitched
- // edge) dominates its target, we will end up with dead nodes in the original
- // loop and its exits that will need to be deleted. Here, we just retain that
- // the property holds and will compute the deleted set later.
+ // If we are doing full unswitching, then similarly to the above, the edge we
+ // *are* cloning in the unswitch (the unswitched edge) dominates its target,
+ // we will end up with dead nodes in the original loop and its exits that will
+ // need to be deleted. Here, we just retain that the property holds and will
+ // compute the deleted set later.
bool DeleteUnswitchedSucc =
- UnswitchedSuccBB->getUniquePredecessor() ||
- llvm::all_of(predecessors(UnswitchedSuccBB), [&](BasicBlock *PredBB) {
- return PredBB == ParentBB || DT.dominates(UnswitchedSuccBB, PredBB);
- });
+ FullUnswitch &&
+ (UnswitchedSuccBB->getUniquePredecessor() ||
+ llvm::all_of(predecessors(UnswitchedSuccBB), [&](BasicBlock *PredBB) {
+ return PredBB == ParentBB || DT.dominates(UnswitchedSuccBB, PredBB);
+ }));
// Split the preheader, so that we know that there is a safe place to insert
// the conditional branch. We will change the preheader to have a conditional
L, LoopPH, SplitBB, ExitBlocks, ParentBB, UnswitchedSuccBB,
ContinueSuccBB, SkippedLoopAndExitBlocks, VMap, DTUpdates, AC, DT, LI);
- // Remove the parent as a predecessor of the unswitched successor.
- UnswitchedSuccBB->removePredecessor(ParentBB, /*DontDeleteUselessPHIs*/ true);
-
- // Now splice the branch from the original loop and use it to select between
- // the two loops.
+ // The stitching of the branched code back together depends on whether we're
+ // doing full unswitching or not with the exception that we always want to
+ // nuke the initial terminator placed in the split block.
SplitBB->getTerminator()->eraseFromParent();
- SplitBB->getInstList().splice(SplitBB->end(), ParentBB->getInstList(), BI);
- BI.setSuccessor(ClonedSucc, ClonedPH);
- BI.setSuccessor(1 - ClonedSucc, LoopPH);
-
- // Create a new unconditional branch to the continuing block (as opposed to
- // the one cloned).
- BranchInst::Create(ContinueSuccBB, ParentBB);
+ if (FullUnswitch) {
+ // Remove the parent as a predecessor of the
+ // unswitched successor.
+ UnswitchedSuccBB->removePredecessor(ParentBB,
+ /*DontDeleteUselessPHIs*/ true);
+ DTUpdates.push_back({DominatorTree::Delete, ParentBB, UnswitchedSuccBB});
+
+ // Now splice the branch from the original loop and use it to select between
+ // the two loops.
+ SplitBB->getInstList().splice(SplitBB->end(), ParentBB->getInstList(), BI);
+ BI.setSuccessor(ClonedSucc, ClonedPH);
+ BI.setSuccessor(1 - ClonedSucc, LoopPH);
+
+ // Create a new unconditional branch to the continuing block (as opposed to
+ // the one cloned).
+ BranchInst::Create(ContinueSuccBB, ParentBB);
+ } else {
+ // When doing a partial unswitch, we have to do a bit more work to build up
+ // the branch in the split block.
+ buildPartialUnswitchConditionalBranch(*SplitBB, Invariants, Direction,
+ *ClonedPH, *LoopPH);
+ }
// Before we update the dominator tree, collect the dead blocks if we're going
// to end up deleting the unswitched successor.
}
}
- // Add the remaining edges to our updates and apply them to get an up-to-date
+ // Add the remaining edge to our updates and apply them to get an up-to-date
// dominator tree. Note that this will cause the dead blocks above to be
// unreachable and no longer in the dominator tree.
- DTUpdates.push_back({DominatorTree::Delete, ParentBB, UnswitchedSuccBB});
DTUpdates.push_back({DominatorTree::Insert, SplitBB, ClonedPH});
DT.applyUpdates(DTUpdates);
// verification steps.
assert(DT.verify(DominatorTree::VerificationLevel::Fast));
+ // Now we want to replace all the uses of the invariants within both the
+ // original and cloned blocks. We do this here so that we can use the now
+ // updated dominator tree to identify which side the users are on.
+ ConstantInt *UnswitchedReplacement =
+ Direction ? ConstantInt::getTrue(BI.getContext())
+ : ConstantInt::getFalse(BI.getContext());
+ ConstantInt *ContinueReplacement =
+ Direction ? ConstantInt::getFalse(BI.getContext())
+ : ConstantInt::getTrue(BI.getContext());
+ for (Value *Invariant : Invariants)
+ for (auto UI = Invariant->use_begin(), UE = Invariant->use_end();
+ UI != UE;) {
+ // Grab the use and walk past it so we can clobber it in the use list.
+ Use *U = &*UI++;
+ Instruction *UserI = dyn_cast<Instruction>(U->getUser());
+ if (!UserI)
+ continue;
+
+ // Replace it with the 'continue' side if in the main loop body, and the
+ // unswitched if in the cloned blocks.
+ if (DT.dominates(LoopPH, UserI->getParent()))
+ U->set(ContinueReplacement);
+ else if (DT.dominates(ClonedPH, UserI->getParent()))
+ U->set(UnswitchedReplacement);
+ }
+
// We can change which blocks are exit blocks of all the cloned sibling
// loops, the current loop, and any parent loops which shared exit blocks
// with the current loop. As a consequence, we need to re-form LCSSA for
return Cost;
}
-/// Unswitch control flow predicated on loop invariant conditions.
-///
-/// This first hoists all branches or switches which are trivial (IE, do not
-/// require duplicating any part of the loop) out of the loop body. It then
-/// looks at other loop invariant control flows and tries to unswitch those as
-/// well by cloning the loop if the result is small enough.
-static bool
-unswitchLoop(Loop &L, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC,
- TargetTransformInfo &TTI, bool NonTrivial,
- function_ref<void(bool, ArrayRef<Loop *>)> UnswitchCB) {
- assert(L.isRecursivelyLCSSAForm(DT, LI) &&
- "Loops must be in LCSSA form before unswitching.");
+static bool unswitchBestCondition(
+ Loop &L, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC,
+ TargetTransformInfo &TTI,
+ function_ref<void(bool, ArrayRef<Loop *>)> UnswitchCB) {
+ // Collect all invariant conditions within this loop (as opposed to an inner
+ // loop which would be handled when visiting that inner loop).
+ SmallVector<std::pair<TerminatorInst *, TinyPtrVector<Value *>>, 4>
+ UnswitchCandidates;
+ for (auto *BB : L.blocks()) {
+ if (LI.getLoopFor(BB) != &L)
+ continue;
- // Must be in loop simplified form: we need a preheader and dedicated exits.
- if (!L.isLoopSimplifyForm())
- return false;
+ auto *BI = dyn_cast<BranchInst>(BB->getTerminator());
+ // FIXME: Handle switches here!
+ if (!BI || !BI->isConditional() || isa<Constant>(BI->getCondition()) ||
+ BI->getSuccessor(0) == BI->getSuccessor(1))
+ continue;
- // Try trivial unswitch first before loop over other basic blocks in the loop.
- if (unswitchAllTrivialConditions(L, DT, LI)) {
- // If we unswitched successfully we will want to clean up the loop before
- // processing it further so just mark it as unswitched and return.
- UnswitchCB(/*CurrentLoopValid*/ true, {});
- return true;
- }
+ if (L.isLoopInvariant(BI->getCondition())) {
+ UnswitchCandidates.push_back({BI, {BI->getCondition()}});
+ continue;
+ }
- // If we're not doing non-trivial unswitching, we're done. We both accept
- // a parameter but also check a local flag that can be used for testing
- // a debugging.
- if (!NonTrivial && !EnableNonTrivialUnswitch)
- return false;
+ Instruction &CondI = *cast<Instruction>(BI->getCondition());
+ if (CondI.getOpcode() != Instruction::And &&
+ CondI.getOpcode() != Instruction::Or)
+ continue;
- // Collect all remaining invariant branch conditions within this loop (as
- // opposed to an inner loop which would be handled when visiting that inner
- // loop).
- SmallVector<TerminatorInst *, 4> UnswitchCandidates;
- for (auto *BB : L.blocks())
- if (LI.getLoopFor(BB) == &L)
- if (auto *BI = dyn_cast<BranchInst>(BB->getTerminator()))
- if (BI->isConditional() && L.isLoopInvariant(BI->getCondition()) &&
- BI->getSuccessor(0) != BI->getSuccessor(1))
- UnswitchCandidates.push_back(BI);
+ TinyPtrVector<Value *> Invariants =
+ collectHomogenousInstGraphLoopInvariants(L, CondI, LI);
+ if (Invariants.empty())
+ continue;
+
+ UnswitchCandidates.push_back({BI, std::move(Invariants)});
+ }
// If we didn't find any candidates, we're done.
if (UnswitchCandidates.empty())
SmallDenseMap<DomTreeNode *, int, 4> DTCostMap;
// Given a terminator which might be unswitched, computes the non-duplicated
// cost for that terminator.
- auto ComputeUnswitchedCost = [&](TerminatorInst *TI) {
- BasicBlock &BB = *TI->getParent();
+ auto ComputeUnswitchedCost = [&](TerminatorInst &TI, bool FullUnswitch) {
+ BasicBlock &BB = *TI.getParent();
SmallPtrSet<BasicBlock *, 4> Visited;
int Cost = LoopCost;
if (!Visited.insert(SuccBB).second)
continue;
+ // If this is a partial unswitch candidate, then it must be a conditional
+ // branch with a condition of either `or` or `and`. In that case, one of
+ // the successors is necessarily duplicated, so don't even try to remove
+ // its cost.
+ if (!FullUnswitch) {
+ auto &BI = cast<BranchInst>(TI);
+ if (cast<Instruction>(BI.getCondition())->getOpcode() ==
+ Instruction::And) {
+ if (SuccBB == BI.getSuccessor(1))
+ continue;
+ } else {
+ assert(cast<Instruction>(BI.getCondition())->getOpcode() ==
+ Instruction::Or &&
+ "Only `and` and `or` conditions can result in a partial "
+ "unswitch!");
+ if (SuccBB == BI.getSuccessor(0))
+ continue;
+ }
+ }
+
// This successor's domtree will not need to be duplicated after
// unswitching if the edge to the successor dominates it (and thus the
// entire tree). This essentially means there is no other path into this
};
TerminatorInst *BestUnswitchTI = nullptr;
int BestUnswitchCost;
- for (TerminatorInst *CandidateTI : UnswitchCandidates) {
- int CandidateCost = ComputeUnswitchedCost(CandidateTI);
+ ArrayRef<Value *> BestUnswitchInvariants;
+ for (auto &TerminatorAndInvariants : UnswitchCandidates) {
+ TerminatorInst &TI = *TerminatorAndInvariants.first;
+ ArrayRef<Value *> Invariants = TerminatorAndInvariants.second;
+ BranchInst *BI = dyn_cast<BranchInst>(&TI);
+ int CandidateCost =
+ ComputeUnswitchedCost(TI, /*FullUnswitch*/ Invariants.size() == 1 && BI &&
+ Invariants[0] == BI->getCondition());
LLVM_DEBUG(dbgs() << " Computed cost of " << CandidateCost
- << " for unswitch candidate: " << *CandidateTI << "\n");
+ << " for unswitch candidate: " << TI << "\n");
if (!BestUnswitchTI || CandidateCost < BestUnswitchCost) {
- BestUnswitchTI = CandidateTI;
+ BestUnswitchTI = &TI;
BestUnswitchCost = CandidateCost;
+ BestUnswitchInvariants = Invariants;
}
}
return false;
}
+ auto *UnswitchBI = dyn_cast<BranchInst>(BestUnswitchTI);
+ if (!UnswitchBI) {
+ // FIXME: Add support for unswitching a switch here!
+ LLVM_DEBUG(dbgs() << "Cannot unswitch anything but a branch!\n");
+ return false;
+ }
+
LLVM_DEBUG(dbgs() << " Trying to unswitch non-trivial (cost = "
- << BestUnswitchCost << ") branch: " << *BestUnswitchTI
- << "\n");
- return unswitchInvariantBranch(L, cast<BranchInst>(*BestUnswitchTI), DT, LI,
+ << BestUnswitchCost << ") branch: " << *UnswitchBI << "\n");
+ return unswitchInvariantBranch(L, *UnswitchBI, BestUnswitchInvariants, DT, LI,
AC, UnswitchCB);
}
+/// Unswitch control flow predicated on loop invariant conditions.
+///
+/// This first hoists all branches or switches which are trivial (IE, do not
+/// require duplicating any part of the loop) out of the loop body. It then
+/// looks at other loop invariant control flows and tries to unswitch those as
+/// well by cloning the loop if the result is small enough.
+static bool
+unswitchLoop(Loop &L, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC,
+ TargetTransformInfo &TTI, bool NonTrivial,
+ function_ref<void(bool, ArrayRef<Loop *>)> UnswitchCB) {
+ assert(L.isRecursivelyLCSSAForm(DT, LI) &&
+ "Loops must be in LCSSA form before unswitching.");
+ bool Changed = false;
+
+ // Must be in loop simplified form: we need a preheader and dedicated exits.
+ if (!L.isLoopSimplifyForm())
+ return false;
+
+ // Try trivial unswitch first before loop over other basic blocks in the loop.
+ if (unswitchAllTrivialConditions(L, DT, LI)) {
+ // If we unswitched successfully we will want to clean up the loop before
+ // processing it further so just mark it as unswitched and return.
+ UnswitchCB(/*CurrentLoopValid*/ true, {});
+ return true;
+ }
+
+ // If we're not doing non-trivial unswitching, we're done. We both accept
+ // a parameter but also check a local flag that can be used for testing
+ // a debugging.
+ if (!NonTrivial && !EnableNonTrivialUnswitch)
+ return false;
+
+ // For non-trivial unswitching, because it often creates new loops, we rely on
+ // the pass manager to iterate on the loops rather than trying to immediately
+ // reach a fixed point. There is no substantial advantage to iterating
+ // internally, and if any of the new loops are simplified enough to contain
+ // trivial unswitching we want to prefer those.
+
+ // Try to unswitch the best invariant condition. We prefer this full unswitch to
+ // a partial unswitch when possible below the threshold.
+ if (unswitchBestCondition(L, DT, LI, AC, TTI, UnswitchCB))
+ return true;
+
+ // No other opportunities to unswitch.
+ return Changed;
+}
+
PreservedAnalyses SimpleLoopUnswitchPass::run(Loop &L, LoopAnalysisManager &AM,
LoopStandardAnalysisResults &AR,
LPMUpdater &U) {
ret i32 0
; CHECK: loop_exit:
; CHECK-NEXT: ret
-}
\ No newline at end of file
+}
+
+; Non-trivial partial loop unswitching of an invariant input to an 'or'.
+define i32 @test25(i1* %ptr, i1 %cond) {
+; CHECK-LABEL: @test25(
+entry:
+ br label %loop_begin
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br i1 %cond, label %entry.split.us, label %entry.split
+
+loop_begin:
+ %v1 = load i1, i1* %ptr
+ %cond_or = or i1 %v1, %cond
+ br i1 %cond_or, label %loop_a, label %loop_b
+
+loop_a:
+ call void @a()
+ br label %latch
+; The 'loop_a' unswitched loop.
+;
+; CHECK: entry.split.us:
+; CHECK-NEXT: br label %loop_begin.us
+;
+; CHECK: loop_begin.us:
+; CHECK-NEXT: %[[V1_US:.*]] = load i1, i1* %ptr
+; CHECK-NEXT: %[[OR_US:.*]] = or i1 %[[V1_US]], true
+; CHECK-NEXT: br label %loop_a.us
+;
+; CHECK: loop_a.us:
+; CHECK-NEXT: call void @a()
+; CHECK-NEXT: br label %latch.us
+;
+; CHECK: latch.us:
+; CHECK-NEXT: %[[V2_US:.*]] = load i1, i1* %ptr
+; CHECK-NEXT: br i1 %[[V2_US]], label %loop_begin.us, label %loop_exit.split.us
+;
+; CHECK: loop_exit.split.us:
+; CHECK-NEXT: br label %loop_exit
+
+loop_b:
+ call void @b()
+ br label %latch
+; The original loop.
+;
+; CHECK: entry.split:
+; CHECK-NEXT: br label %loop_begin
+;
+; CHECK: loop_begin:
+; CHECK-NEXT: %[[V1:.*]] = load i1, i1* %ptr
+; CHECK-NEXT: %[[OR:.*]] = or i1 %[[V1]], false
+; CHECK-NEXT: br i1 %[[OR]], label %loop_a, label %loop_b
+;
+; CHECK: loop_a:
+; CHECK-NEXT: call void @a()
+; CHECK-NEXT: br label %latch
+;
+; CHECK: loop_b:
+; CHECK-NEXT: call void @b()
+; CHECK-NEXT: br label %latch
+
+latch:
+ %v2 = load i1, i1* %ptr
+ br i1 %v2, label %loop_begin, label %loop_exit
+; CHECK: latch:
+; CHECK-NEXT: %[[V2:.*]] = load i1, i1* %ptr
+; CHECK-NEXT: br i1 %[[V2]], label %loop_begin, label %loop_exit.split
+
+loop_exit:
+ ret i32 0
+; CHECK: loop_exit.split:
+; CHECK-NEXT: br label %loop_exit
+;
+; CHECK: loop_exit:
+; CHECK-NEXT: ret
+}
+
+; Non-trivial partial loop unswitching of multiple invariant inputs to an `and`
+; chain.
+define i32 @test26(i1* %ptr1, i1* %ptr2, i1* %ptr3, i1 %cond1, i1 %cond2, i1 %cond3) {
+; CHECK-LABEL: @test26(
+entry:
+ br label %loop_begin
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %[[INV_AND:.*]] = and i1 %cond3, %cond1
+; CHECK-NEXT: br i1 %[[INV_AND]], label %entry.split, label %entry.split.us
+
+loop_begin:
+ %v1 = load i1, i1* %ptr1
+ %v2 = load i1, i1* %ptr2
+ %cond_and1 = and i1 %v1, %cond1
+ %cond_or1 = or i1 %v2, %cond2
+ %cond_and2 = and i1 %cond_and1, %cond_or1
+ %cond_and3 = and i1 %cond_and2, %cond3
+ br i1 %cond_and3, label %loop_a, label %loop_b
+; The 'loop_b' unswitched loop.
+;
+; CHECK: entry.split.us:
+; CHECK-NEXT: br label %loop_begin.us
+;
+; CHECK: loop_begin.us:
+; CHECK-NEXT: %[[V1_US:.*]] = load i1, i1* %ptr1
+; CHECK-NEXT: %[[V2_US:.*]] = load i1, i1* %ptr2
+; CHECK-NEXT: %[[AND1_US:.*]] = and i1 %[[V1_US]], false
+; CHECK-NEXT: %[[OR1_US:.*]] = or i1 %[[V2_US]], %cond2
+; CHECK-NEXT: %[[AND2_US:.*]] = and i1 %[[AND1_US]], %[[OR1_US]]
+; CHECK-NEXT: %[[AND3_US:.*]] = and i1 %[[AND2_US]], false
+; CHECK-NEXT: br label %loop_b.us
+;
+; CHECK: loop_b.us:
+; CHECK-NEXT: call void @b()
+; CHECK-NEXT: br label %latch.us
+;
+; CHECK: latch.us:
+; CHECK-NEXT: %[[V3_US:.*]] = load i1, i1* %ptr3
+; CHECK-NEXT: br i1 %[[V3_US]], label %loop_begin.us, label %loop_exit.split.us
+;
+; CHECK: loop_exit.split.us:
+; CHECK-NEXT: br label %loop_exit
+
+; The original loop.
+;
+; CHECK: entry.split:
+; CHECK-NEXT: br label %loop_begin
+;
+; CHECK: loop_begin:
+; CHECK-NEXT: %[[V1:.*]] = load i1, i1* %ptr1
+; CHECK-NEXT: %[[V2:.*]] = load i1, i1* %ptr2
+; CHECK-NEXT: %[[AND1:.*]] = and i1 %[[V1]], true
+; CHECK-NEXT: %[[OR1:.*]] = or i1 %[[V2]], %cond2
+; CHECK-NEXT: %[[AND2:.*]] = and i1 %[[AND1]], %[[OR1]]
+; CHECK-NEXT: %[[AND3:.*]] = and i1 %[[AND2]], true
+; CHECK-NEXT: br i1 %[[AND3]], label %loop_a, label %loop_b
+
+loop_a:
+ call void @a()
+ br label %latch
+; CHECK: loop_a:
+; CHECK-NEXT: call void @a()
+; CHECK-NEXT: br label %latch
+
+loop_b:
+ call void @b()
+ br label %latch
+; CHECK: loop_b:
+; CHECK-NEXT: call void @b()
+; CHECK-NEXT: br label %latch
+
+latch:
+ %v3 = load i1, i1* %ptr3
+ br i1 %v3, label %loop_begin, label %loop_exit
+; CHECK: latch:
+; CHECK-NEXT: %[[V3:.*]] = load i1, i1* %ptr3
+; CHECK-NEXT: br i1 %[[V3]], label %loop_begin, label %loop_exit.split
+
+loop_exit:
+ ret i32 0
+; CHECK: loop_exit.split:
+; CHECK-NEXT: br label %loop_exit
+;
+; CHECK: loop_exit:
+; CHECK-NEXT: ret
+}