/// Record ICmp conditions relevant to any argument in CS following Pred's
/// single predecessors. If there are conflicting conditions along a path, like
-/// x == 1 and x == 0, the first condition will be used.
+/// x == 1 and x == 0, the first condition will be used. We stop once we reach
+/// an edge to StopAt.
static void recordConditions(CallSite CS, BasicBlock *Pred,
- ConditionsTy &Conditions) {
- recordCondition(CS, Pred, CS.getInstruction()->getParent(), Conditions);
+ ConditionsTy &Conditions, BasicBlock *StopAt) {
BasicBlock *From = Pred;
BasicBlock *To = Pred;
SmallPtrSet<BasicBlock *, 4> Visited;
- while (!Visited.count(From->getSinglePredecessor()) &&
+ while (To != StopAt && !Visited.count(From->getSinglePredecessor()) &&
(From = From->getSinglePredecessor())) {
recordCondition(CS, From, To, Conditions);
Visited.insert(From);
static void splitCallSite(
CallSite CS,
const SmallVectorImpl<std::pair<BasicBlock *, ConditionsTy>> &Preds,
- DominatorTree *DT) {
+ DominatorTree &DT) {
Instruction *Instr = CS.getInstruction();
BasicBlock *TailBB = Instr->getParent();
bool IsMustTailCall = CS.isMustTailCall();
BasicBlock *PredBB = Preds[i].first;
BasicBlock *SplitBlock = DuplicateInstructionsInSplitBetween(
TailBB, PredBB, &*std::next(Instr->getIterator()), ValueToValueMaps[i],
- DT);
+ &DT);
assert(SplitBlock && "Unexpected new basic block split.");
Instruction *NewCI =
return false;
}
-static bool tryToSplitOnPHIPredicatedArgument(CallSite CS, DominatorTree *DT) {
+static bool tryToSplitOnPHIPredicatedArgument(CallSite CS, DominatorTree &DT) {
if (!isPredicatedOnPHI(CS))
return false;
return true;
}
-static bool tryToSplitOnPredicatedArgument(CallSite CS, DominatorTree *DT) {
+static bool tryToSplitOnPredicatedArgument(CallSite CS, DominatorTree &DT) {
auto Preds = getTwoPredecessors(CS.getInstruction()->getParent());
if (Preds[0] == Preds[1])
return false;
+ // We can stop recording conditions once we reached the immediate dominator
+ // for the block containing the call site. Conditions in predecessors of the
+ // that node will be the same for all paths to the call site and splitting
+ // is not beneficial.
+ auto *CSDTNode = DT.getNode(CS.getInstruction()->getParent());
+ BasicBlock *StopAt = CSDTNode ? CSDTNode->getIDom()->getBlock() : nullptr;
+
SmallVector<std::pair<BasicBlock *, ConditionsTy>, 2> PredsCS;
for (auto *Pred : make_range(Preds.rbegin(), Preds.rend())) {
ConditionsTy Conditions;
- recordConditions(CS, Pred, Conditions);
+ // Record condition on edge BB(CS) <- Pred
+ recordCondition(CS, Pred, CS.getInstruction()->getParent(), Conditions);
+ // Record conditions followng Pred's single predecessors.
+ recordConditions(CS, Pred, Conditions, StopAt);
PredsCS.push_back({Pred, Conditions});
}
}))
return false;
+ // Record common conditions starting from StopAt. Those conditions hold for
+ // all paths to CS. Adding them gives the inliner a better chance at inlining
+ // CS.
+ ConditionsTy CommonConditions;
+ if (StopAt)
+ recordConditions(CS, StopAt, CommonConditions, nullptr);
+ if (!CommonConditions.empty())
+ for (auto &Pred : PredsCS) {
+ Pred.second.insert(Pred.second.end(), CommonConditions.begin(),
+ CommonConditions.end());
+ }
+
splitCallSite(CS, PredsCS, DT);
return true;
}
static bool tryToSplitCallSite(CallSite CS, TargetTransformInfo &TTI,
- DominatorTree *DT) {
+ DominatorTree &DT) {
if (!CS.arg_size() || !canSplitCallSite(CS, TTI))
return false;
return tryToSplitOnPredicatedArgument(CS, DT) ||
}
static bool doCallSiteSplitting(Function &F, TargetLibraryInfo &TLI,
- TargetTransformInfo &TTI, DominatorTree *DT) {
+ TargetTransformInfo &TTI, DominatorTree &DT) {
bool Changed = false;
for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE;) {
BasicBlock &BB = *BI++;
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<TargetLibraryInfoWrapperPass>();
AU.addRequired<TargetTransformInfoWrapperPass>();
+ AU.addRequired<DominatorTreeWrapperPass>();
AU.addPreserved<DominatorTreeWrapperPass>();
FunctionPass::getAnalysisUsage(AU);
}
auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
- auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>();
- return doCallSiteSplitting(F, TLI, TTI,
- DTWP ? &DTWP->getDomTree() : nullptr);
+ auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+ return doCallSiteSplitting(F, TLI, TTI, DT);
}
};
} // namespace
"Call-site splitting", false, false)
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_END(CallSiteSplittingLegacyPass, "callsite-splitting",
"Call-site splitting", false, false)
FunctionPass *llvm::createCallSiteSplittingPass() {
FunctionAnalysisManager &AM) {
auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
auto &TTI = AM.getResult<TargetIRAnalysis>(F);
- auto *DT = AM.getCachedResult<DominatorTreeAnalysis>(F);
+ auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
if (!doCallSiteSplitting(F, TLI, TTI, DT))
return PreservedAnalyses::all();
;CHECK-LABEL: Header2.split:
;CHECK: %[[CALL1:.*]] = call i32 @callee(i32* nonnull %a, i32 %v, i32 10)
;CHECK-LABEL: TBB.split:
-;CHECK: %[[CALL2:.*]] = call i32 @callee(i32* nonnull %a, i32 %v, i32 %p)
+;CHECK: %[[CALL2:.*]] = call i32 @callee(i32* %a, i32 %v, i32 %p)
;CHECK-LABEL: Tail
;CHECK: %[[MERGED:.*]] = phi i32 [ %[[CALL1]], %Header2.split ], [ %[[CALL2]], %TBB.split ]
;CHECK: ret i32 %[[MERGED]]
define i32 @test_ne_eq_ne(i32* %a, i32 %v, i32 %p) {
Header:
%tobool1 = icmp ne i32* %a, null
- br i1 %tobool1, label %Header2, label %End
+ br i1 %tobool1, label %Header2, label %TBB
Header2:
%tobool2 = icmp eq i32 %p, 10
;CHECK-LABEL: Header2.split:
;CHECK: %[[CALL1:.*]] = call i32 @callee(i32* nonnull %a, i32 %v, i32 %p)
;CHECK-LABEL: TBB.split:
-;CHECK: %[[CALL2:.*]] = call i32 @callee(i32* nonnull %a, i32 %v, i32 %p)
+;CHECK: %[[CALL2:.*]] = call i32 @callee(i32* %a, i32 %v, i32 %p)
;CHECK-LABEL: Tail
;CHECK: %[[MERGED:.*]] = phi i32 [ %[[CALL1]], %Header2.split ], [ %[[CALL2]], %TBB.split ]
;CHECK: ret i32 %[[MERGED]]
define i32 @test_ne_ne_ne_constrain_same_pointer_arg(i32* %a, i32 %v, i32 %p, i32* %a2, i32* %a3) {
Header:
%tobool1 = icmp ne i32* %a, null
- br i1 %tobool1, label %Header2, label %End
+ br i1 %tobool1, label %Header2, label %TBB
Header2:
%tobool2 = icmp ne i32* %a, %a2
;CHECK-LABEL: Header2.split:
;CHECK: %[[CALL1:.*]] = call i32 @callee(i32* nonnull %a, i32 %v, i32 10)
;CHECK-LABEL: TBB.split:
-;CHECK: %[[CALL2:.*]] = call i32 @callee(i32* nonnull %a, i32 1, i32 %p)
+;CHECK: %[[CALL2:.*]] = call i32 @callee(i32* %a, i32 1, i32 %p)
;CHECK-LABEL: Tail
;CHECK: %[[MERGED:.*]] = phi i32 [ %[[CALL1]], %Header2.split ], [ %[[CALL2]], %TBB.split ]
;CHECK: ret i32 %[[MERGED]]
define i32 @test_eq_eq_eq_untaken(i32* %a, i32 %v, i32 %p) {
Header:
%tobool1 = icmp eq i32* %a, null
- br i1 %tobool1, label %End, label %Header2
+ br i1 %tobool1, label %TBB, label %Header2
Header2:
%tobool2 = icmp eq i32 %p, 10
;CHECK-LABEL: Header2.split:
;CHECK: %[[CALL1:.*]] = call i32 @callee(i32* null, i32 %v, i32 10)
;CHECK-LABEL: TBB.split:
-;CHECK: %[[CALL2:.*]] = call i32 @callee(i32* null, i32 %v, i32 %p)
+;CHECK: %[[CALL2:.*]] = call i32 @callee(i32* %a, i32 %v, i32 %p)
;CHECK-LABEL: Tail
;CHECK: %[[MERGED:.*]] = phi i32 [ %[[CALL1]], %Header2.split ], [ %[[CALL2]], %TBB.split ]
;CHECK: ret i32 %[[MERGED]]
define i32 @test_ne_eq_ne_untaken(i32* %a, i32 %v, i32 %p) {
Header:
%tobool1 = icmp ne i32* %a, null
- br i1 %tobool1, label %End, label %Header2
+ br i1 %tobool1, label %TBB, label %Header2
Header2:
%tobool2 = icmp eq i32 %p, 10
ret i32 %v
}
+;CHECK-LABEL: @test_cond_no_effect
+;CHECK-NOT: Header.split:
+;CHECK-NOT: TBB.split:
+;CHECK-LABEL: Tail:
+;CHECK: %r = call i32 @callee(i32* %a, i32 %v, i32 0)
+;CHECK: ret i32 %r
+define i32 @test_cond_no_effect(i32* %a, i32 %v) {
+Entry:
+ %tobool1 = icmp eq i32* %a, null
+ br i1 %tobool1, label %Header, label %End
+
+Header:
+ br i1 undef, label %Tail, label %TBB
+
+TBB:
+ br i1 undef, label %Tail, label %End
+
+Tail:
+ %r = call i32 @callee(i32* %a, i32 %v, i32 0)
+ ret i32 %r
+
+End:
+ ret i32 %v
+}
+
;CHECK-LABEL: @test_unreachable
;CHECK-LABEL: Header.split:
;CHECK: %[[CALL1:.*]] = call i32 @callee(i32* %a, i32 %v, i32 10)