const Loop &L, const LoopInfo &LI, AAResults &AA,
const MemorySSAUpdater *MSSAU) {
assert(UnswitchCandidates.empty() && "Should be!");
+
+ auto AddUnswitchCandidatesForInst = [&](Instruction *I, Value *Cond) {
+ Cond = skipTrivialSelect(Cond);
+ if (isa<Constant>(Cond))
+ return;
+ if (L.isLoopInvariant(Cond)) {
+ UnswitchCandidates.push_back({I, {Cond}});
+ return;
+ }
+ if (match(Cond, m_CombineOr(m_LogicalAnd(), m_LogicalOr()))) {
+ TinyPtrVector<Value *> Invariants =
+ collectHomogenousInstGraphLoopInvariants(
+ L, *static_cast<Instruction *>(Cond), LI);
+ if (!Invariants.empty())
+ UnswitchCandidates.push_back({I, std::move(Invariants)});
+ }
+ };
+
// Whether or not we should also collect guards in the loop.
bool CollectGuards = false;
if (UnswitchGuards) {
for (auto &I : *BB) {
if (auto *SI = dyn_cast<SelectInst>(&I)) {
- auto *Cond = skipTrivialSelect(SI->getCondition());
- // restrict to simple boolean selects
- if (!isa<Constant>(Cond) && L.isLoopInvariant(Cond) && Cond->getType()->isIntegerTy(1))
- UnswitchCandidates.push_back({&I, {Cond}});
+ auto *Cond = SI->getCondition();
+ // Do not unswitch vector selects and logical and/or selects
+ if (Cond->getType()->isIntegerTy(1) && !SI->getType()->isIntegerTy(1))
+ AddUnswitchCandidatesForInst(SI, Cond);
} else if (CollectGuards && isGuard(&I)) {
auto *Cond =
skipTrivialSelect(cast<IntrinsicInst>(&I)->getArgOperand(0));
}
auto *BI = dyn_cast<BranchInst>(BB->getTerminator());
- if (!BI || !BI->isConditional() || isa<Constant>(BI->getCondition()) ||
+ if (!BI || !BI->isConditional() ||
BI->getSuccessor(0) == BI->getSuccessor(1))
continue;
- Value *Cond = skipTrivialSelect(BI->getCondition());
- if (isa<Constant>(Cond))
- continue;
-
- if (L.isLoopInvariant(Cond)) {
- UnswitchCandidates.push_back({BI, {Cond}});
- continue;
- }
-
- Instruction &CondI = *cast<Instruction>(Cond);
- if (match(&CondI, m_CombineOr(m_LogicalAnd(), m_LogicalOr()))) {
- TinyPtrVector<Value *> Invariants =
- collectHomogenousInstGraphLoopInvariants(L, CondI, LI);
- if (Invariants.empty())
- continue;
-
- UnswitchCandidates.push_back({BI, std::move(Invariants)});
- continue;
- }
+ AddUnswitchCandidatesForInst(BI, BI->getCondition());
}
if (MSSAU && !findOptionMDForLoop(&L, "llvm.loop.unswitch.partial.disable") &&
// cost for that terminator.
auto ComputeUnswitchedCost = [&](Instruction &TI,
bool FullUnswitch) -> InstructionCost {
+ // Unswitching selects unswitches the entire loop.
+ if (isa<SelectInst>(TI))
+ return LoopCost;
+
BasicBlock &BB = *TI.getParent();
SmallPtrSet<BasicBlock *, 4> Visited;
// loop. This is computing the new cost of unswitching a condition.
// Note that guards always have 2 unique successors that are implicit and
// will be materialized if we decide to unswitch it.
- int SuccessorsCount =
- isGuard(&TI) || isa<SelectInst>(TI) ? 2 : Visited.size();
+ int SuccessorsCount = isGuard(&TI) ? 2 : Visited.size();
assert(SuccessorsCount > 1 &&
"Cannot unswitch a condition without multiple distinct successors!");
return (LoopCost - Cost) * (SuccessorsCount - 1);
; CHECK-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[NUM]], 0
; CHECK-NEXT: br i1 [[CMP6]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]]
; CHECK: for.body.preheader:
+; CHECK-NEXT: [[COND_FR:%.*]] = freeze i1 [[COND]]
+; CHECK-NEXT: br i1 [[COND_FR]], label [[FOR_BODY_PREHEADER_SPLIT:%.*]], label [[FOR_BODY_PREHEADER_SPLIT_US:%.*]]
+; CHECK: for.body.preheader.split.us:
+; CHECK-NEXT: br label [[FOR_BODY_US:%.*]]
+; CHECK: for.body.us:
+; CHECK-NEXT: [[I_07_US:%.*]] = phi i32 [ [[INC_US:%.*]], [[TMP0:%.*]] ], [ 0, [[FOR_BODY_PREHEADER_SPLIT_US]] ]
+; CHECK-NEXT: br label [[TMP0]]
+; CHECK: 0:
+; CHECK-NEXT: [[UNSWITCHED_SELECT_US:%.*]] = phi i32 [ 0, [[FOR_BODY_US]] ]
+; CHECK-NEXT: tail call void @bar(i32 noundef [[UNSWITCHED_SELECT_US]])
+; CHECK-NEXT: [[INC_US]] = add nuw nsw i32 [[I_07_US]], 1
+; CHECK-NEXT: [[EXITCOND_NOT_US:%.*]] = icmp eq i32 [[INC_US]], [[NUM]]
+; CHECK-NEXT: br i1 [[EXITCOND_NOT_US]], label [[FOR_COND_CLEANUP_LOOPEXIT_SPLIT_US:%.*]], label [[FOR_BODY_US]]
+; CHECK: for.cond.cleanup.loopexit.split.us:
+; CHECK-NEXT: br label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]]
+; CHECK: for.body.preheader.split:
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
+; CHECK: for.cond.cleanup.loopexit.split:
+; CHECK-NEXT: br label [[FOR_COND_CLEANUP_LOOPEXIT]]
; CHECK: for.cond.cleanup.loopexit:
; CHECK-NEXT: br label [[FOR_COND_CLEANUP]]
; CHECK: for.cond.cleanup:
; CHECK-NEXT: ret i32 undef
; CHECK: for.body:
-; CHECK-NEXT: [[I_07:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
+; CHECK-NEXT: [[I_07:%.*]] = phi i32 [ [[INC:%.*]], [[TMP3:%.*]] ], [ 0, [[FOR_BODY_PREHEADER_SPLIT]] ]
; CHECK-NEXT: [[REM:%.*]] = and i32 [[I_07]], 1
; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i32 [[REM]], 0
-; CHECK-NEXT: [[TMP0:%.*]] = and i1 [[COND]], [[CMP1]]
-; CHECK-NEXT: [[COND2:%.*]] = select i1 [[TMP0]], i32 [[I_07]], i32 0
-; CHECK-NEXT: tail call void @bar(i32 noundef [[COND2]])
+; CHECK-NEXT: [[TMP1:%.*]] = and i1 true, [[CMP1]]
+; CHECK-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP3]]
+; CHECK: 2:
+; CHECK-NEXT: br label [[TMP3]]
+; CHECK: 3:
+; CHECK-NEXT: [[UNSWITCHED_SELECT:%.*]] = phi i32 [ [[I_07]], [[TMP2]] ], [ 0, [[FOR_BODY]] ]
+; CHECK-NEXT: tail call void @bar(i32 noundef [[UNSWITCHED_SELECT]])
; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_07]], 1
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC]], [[NUM]]
-; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[FOR_BODY]]
+; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP_LOOPEXIT_SPLIT:%.*]], label [[FOR_BODY]]
;
entry:
%cmp6 = icmp sgt i32 %num, 0
; CHECK-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[NUM]], 0
; CHECK-NEXT: br i1 [[CMP6]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]]
; CHECK: for.body.preheader:
+; CHECK-NEXT: [[COND_FR:%.*]] = freeze i1 [[COND]]
+; CHECK-NEXT: br i1 [[COND_FR]], label [[FOR_BODY_PREHEADER_SPLIT:%.*]], label [[FOR_BODY_PREHEADER_SPLIT_US:%.*]]
+; CHECK: for.body.preheader.split.us:
+; CHECK-NEXT: br label [[FOR_BODY_US:%.*]]
+; CHECK: for.body.us:
+; CHECK-NEXT: [[I_07_US:%.*]] = phi i32 [ [[INC_US:%.*]], [[TMP0:%.*]] ], [ 0, [[FOR_BODY_PREHEADER_SPLIT_US]] ]
+; CHECK-NEXT: br label [[TMP0]]
+; CHECK: 0:
+; CHECK-NEXT: [[UNSWITCHED_SELECT_US:%.*]] = phi i32 [ 0, [[FOR_BODY_US]] ]
+; CHECK-NEXT: tail call void @bar(i32 noundef [[UNSWITCHED_SELECT_US]])
+; CHECK-NEXT: [[INC_US]] = add nuw nsw i32 [[I_07_US]], 1
+; CHECK-NEXT: [[EXITCOND_NOT_US:%.*]] = icmp eq i32 [[INC_US]], [[NUM]]
+; CHECK-NEXT: br i1 [[EXITCOND_NOT_US]], label [[FOR_COND_CLEANUP_LOOPEXIT_SPLIT_US:%.*]], label [[FOR_BODY_US]]
+; CHECK: for.cond.cleanup.loopexit.split.us:
+; CHECK-NEXT: br label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]]
+; CHECK: for.body.preheader.split:
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
+; CHECK: for.cond.cleanup.loopexit.split:
+; CHECK-NEXT: br label [[FOR_COND_CLEANUP_LOOPEXIT]]
; CHECK: for.cond.cleanup.loopexit:
; CHECK-NEXT: br label [[FOR_COND_CLEANUP]]
; CHECK: for.cond.cleanup:
; CHECK-NEXT: ret i32 undef
; CHECK: for.body:
-; CHECK-NEXT: [[I_07:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
+; CHECK-NEXT: [[I_07:%.*]] = phi i32 [ [[INC:%.*]], [[TMP3:%.*]] ], [ 0, [[FOR_BODY_PREHEADER_SPLIT]] ]
; CHECK-NEXT: [[REM:%.*]] = and i32 [[I_07]], 1
; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i32 [[REM]], 0
-; CHECK-NEXT: [[TMP0:%.*]] = and i1 [[CMP1]], [[COND]]
-; CHECK-NEXT: [[COND2:%.*]] = select i1 [[TMP0]], i32 [[I_07]], i32 0
-; CHECK-NEXT: tail call void @bar(i32 noundef [[COND2]])
+; CHECK-NEXT: [[TMP1:%.*]] = and i1 [[CMP1]], true
+; CHECK-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP3]]
+; CHECK: 2:
+; CHECK-NEXT: br label [[TMP3]]
+; CHECK: 3:
+; CHECK-NEXT: [[UNSWITCHED_SELECT:%.*]] = phi i32 [ [[I_07]], [[TMP2]] ], [ 0, [[FOR_BODY]] ]
+; CHECK-NEXT: tail call void @bar(i32 noundef [[UNSWITCHED_SELECT]])
; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_07]], 1
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC]], [[NUM]]
-; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[FOR_BODY]]
+; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP_LOOPEXIT_SPLIT:%.*]], label [[FOR_BODY]]
;
entry:
%cmp6 = icmp sgt i32 %num, 0
; CHECK-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[NUM]], 0
; CHECK-NEXT: br i1 [[CMP6]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]]
; CHECK: for.body.preheader:
+; CHECK-NEXT: [[COND_FR:%.*]] = freeze i1 [[COND]]
+; CHECK-NEXT: br i1 [[COND_FR]], label [[FOR_BODY_PREHEADER_SPLIT_US:%.*]], label [[FOR_BODY_PREHEADER_SPLIT:%.*]]
+; CHECK: for.body.preheader.split.us:
+; CHECK-NEXT: br label [[FOR_BODY_US:%.*]]
+; CHECK: for.body.us:
+; CHECK-NEXT: [[I_07_US:%.*]] = phi i32 [ [[INC_US:%.*]], [[TMP1:%.*]] ], [ 0, [[FOR_BODY_PREHEADER_SPLIT_US]] ]
+; CHECK-NEXT: br label [[TMP0:%.*]]
+; CHECK: 0:
+; CHECK-NEXT: br label [[TMP1]]
+; CHECK: 1:
+; CHECK-NEXT: [[UNSWITCHED_SELECT_US:%.*]] = phi i32 [ [[I_07_US]], [[TMP0]] ]
+; CHECK-NEXT: tail call void @bar(i32 noundef [[UNSWITCHED_SELECT_US]])
+; CHECK-NEXT: [[INC_US]] = add nuw nsw i32 [[I_07_US]], 1
+; CHECK-NEXT: [[EXITCOND_NOT_US:%.*]] = icmp eq i32 [[INC_US]], [[NUM]]
+; CHECK-NEXT: br i1 [[EXITCOND_NOT_US]], label [[FOR_COND_CLEANUP_LOOPEXIT_SPLIT_US:%.*]], label [[FOR_BODY_US]]
+; CHECK: for.cond.cleanup.loopexit.split.us:
+; CHECK-NEXT: br label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]]
+; CHECK: for.body.preheader.split:
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
+; CHECK: for.cond.cleanup.loopexit.split:
+; CHECK-NEXT: br label [[FOR_COND_CLEANUP_LOOPEXIT]]
; CHECK: for.cond.cleanup.loopexit:
; CHECK-NEXT: br label [[FOR_COND_CLEANUP]]
; CHECK: for.cond.cleanup:
; CHECK-NEXT: ret i32 undef
; CHECK: for.body:
-; CHECK-NEXT: [[I_07:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
+; CHECK-NEXT: [[I_07:%.*]] = phi i32 [ [[INC:%.*]], [[TMP4:%.*]] ], [ 0, [[FOR_BODY_PREHEADER_SPLIT]] ]
; CHECK-NEXT: [[REM:%.*]] = and i32 [[I_07]], 1
; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i32 [[REM]], 0
-; CHECK-NEXT: [[TMP0:%.*]] = or i1 [[COND]], [[CMP1]]
-; CHECK-NEXT: [[COND2:%.*]] = select i1 [[TMP0]], i32 [[I_07]], i32 0
-; CHECK-NEXT: tail call void @bar(i32 noundef [[COND2]])
+; CHECK-NEXT: [[TMP2:%.*]] = or i1 false, [[CMP1]]
+; CHECK-NEXT: br i1 [[TMP2]], label [[TMP3:%.*]], label [[TMP4]]
+; CHECK: 3:
+; CHECK-NEXT: br label [[TMP4]]
+; CHECK: 4:
+; CHECK-NEXT: [[UNSWITCHED_SELECT:%.*]] = phi i32 [ [[I_07]], [[TMP3]] ], [ 0, [[FOR_BODY]] ]
+; CHECK-NEXT: tail call void @bar(i32 noundef [[UNSWITCHED_SELECT]])
; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_07]], 1
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC]], [[NUM]]
-; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[FOR_BODY]]
+; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP_LOOPEXIT_SPLIT:%.*]], label [[FOR_BODY]]
;
entry:
%cmp6 = icmp sgt i32 %num, 0
; CHECK-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[NUM]], 0
; CHECK-NEXT: br i1 [[CMP6]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]]
; CHECK: for.body.preheader:
+; CHECK-NEXT: [[COND_FR:%.*]] = freeze i1 [[COND]]
+; CHECK-NEXT: br i1 [[COND_FR]], label [[FOR_BODY_PREHEADER_SPLIT_US:%.*]], label [[FOR_BODY_PREHEADER_SPLIT:%.*]]
+; CHECK: for.body.preheader.split.us:
+; CHECK-NEXT: br label [[FOR_BODY_US:%.*]]
+; CHECK: for.body.us:
+; CHECK-NEXT: [[I_07_US:%.*]] = phi i32 [ [[INC_US:%.*]], [[TMP1:%.*]] ], [ 0, [[FOR_BODY_PREHEADER_SPLIT_US]] ]
+; CHECK-NEXT: br label [[TMP0:%.*]]
+; CHECK: 0:
+; CHECK-NEXT: br label [[TMP1]]
+; CHECK: 1:
+; CHECK-NEXT: [[UNSWITCHED_SELECT_US:%.*]] = phi i32 [ [[I_07_US]], [[TMP0]] ]
+; CHECK-NEXT: tail call void @bar(i32 noundef [[UNSWITCHED_SELECT_US]])
+; CHECK-NEXT: [[INC_US]] = add nuw nsw i32 [[I_07_US]], 1
+; CHECK-NEXT: [[EXITCOND_NOT_US:%.*]] = icmp eq i32 [[INC_US]], [[NUM]]
+; CHECK-NEXT: br i1 [[EXITCOND_NOT_US]], label [[FOR_COND_CLEANUP_LOOPEXIT_SPLIT_US:%.*]], label [[FOR_BODY_US]]
+; CHECK: for.cond.cleanup.loopexit.split.us:
+; CHECK-NEXT: br label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]]
+; CHECK: for.body.preheader.split:
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
+; CHECK: for.cond.cleanup.loopexit.split:
+; CHECK-NEXT: br label [[FOR_COND_CLEANUP_LOOPEXIT]]
; CHECK: for.cond.cleanup.loopexit:
; CHECK-NEXT: br label [[FOR_COND_CLEANUP]]
; CHECK: for.cond.cleanup:
; CHECK-NEXT: ret i32 undef
; CHECK: for.body:
-; CHECK-NEXT: [[I_07:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
+; CHECK-NEXT: [[I_07:%.*]] = phi i32 [ [[INC:%.*]], [[TMP4:%.*]] ], [ 0, [[FOR_BODY_PREHEADER_SPLIT]] ]
; CHECK-NEXT: [[REM:%.*]] = and i32 [[I_07]], 1
; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i32 [[REM]], 0
-; CHECK-NEXT: [[TMP0:%.*]] = or i1 [[CMP1]], [[COND]]
-; CHECK-NEXT: [[COND2:%.*]] = select i1 [[TMP0]], i32 [[I_07]], i32 0
-; CHECK-NEXT: tail call void @bar(i32 noundef [[COND2]])
+; CHECK-NEXT: [[TMP2:%.*]] = or i1 [[CMP1]], false
+; CHECK-NEXT: br i1 [[TMP2]], label [[TMP3:%.*]], label [[TMP4]]
+; CHECK: 3:
+; CHECK-NEXT: br label [[TMP4]]
+; CHECK: 4:
+; CHECK-NEXT: [[UNSWITCHED_SELECT:%.*]] = phi i32 [ [[I_07]], [[TMP3]] ], [ 0, [[FOR_BODY]] ]
+; CHECK-NEXT: tail call void @bar(i32 noundef [[UNSWITCHED_SELECT]])
; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_07]], 1
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC]], [[NUM]]
-; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[FOR_BODY]]
+; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP_LOOPEXIT_SPLIT:%.*]], label [[FOR_BODY]]
;
entry:
%cmp6 = icmp sgt i32 %num, 0