[CallSiteSplitting] Only record conditions up to the IDom(call site).

author Florian Hahn <florian.hahn@arm.com>

Fri, 9 Nov 2018 10:23:46 +0000 (10:23 +0000)

committer Florian Hahn <florian.hahn@arm.com>

Fri, 9 Nov 2018 10:23:46 +0000 (10:23 +0000)
author Florian Hahn <florian.hahn@arm.com>
Fri, 9 Nov 2018 10:23:46 +0000 (10:23 +0000)
committer Florian Hahn <florian.hahn@arm.com>
Fri, 9 Nov 2018 10:23:46 +0000 (10:23 +0000)
diff --git a/llvm/lib/Transforms/Scalar/CallSiteSplitting.cpp b/llvm/lib/Transforms/Scalar/CallSiteSplitting.cpp

index b9e8e3424ccfe61e80587adad135b83daf69b056..208dd5a5a833821aeb17592cb3fed2d1350141e1 100644 (file)
--- a/llvm/lib/Transforms/Scalar/CallSiteSplitting.cpp
+++ b/llvm/lib/Transforms/Scalar/CallSiteSplitting.cpp
@@ -149,14 +149,14 @@ static void recordCondition(CallSite CS, BasicBlock *From, BasicBlock *To,
  
  /// Record ICmp conditions relevant to any argument in CS following Pred's
  /// single predecessors. If there are conflicting conditions along a path, like
-/// x == 1 and x == 0, the first condition will be used.
+/// x == 1 and x == 0, the first condition will be used. We stop once we reach
+/// an edge to StopAt.
  static void recordConditions(CallSite CS, BasicBlock *Pred,
-                             ConditionsTy &Conditions) {
-  recordCondition(CS, Pred, CS.getInstruction()->getParent(), Conditions);
+                             ConditionsTy &Conditions, BasicBlock *StopAt) {
    BasicBlock *From = Pred;
    BasicBlock *To = Pred;
    SmallPtrSet<BasicBlock *, 4> Visited;
-  while (!Visited.count(From->getSinglePredecessor()) &&
+  while (To != StopAt && !Visited.count(From->getSinglePredecessor()) &&
           (From = From->getSinglePredecessor())) {
      recordCondition(CS, From, To, Conditions);
      Visited.insert(From);
@@ -302,7 +302,7 @@ static void copyMustTailReturn(BasicBlock *SplitBB, Instruction *CI,
  static void splitCallSite(
      CallSite CS,
      const SmallVectorImpl<std::pair<BasicBlock *, ConditionsTy>> &Preds,
-    DominatorTree *DT) {
+    DominatorTree &DT) {
    Instruction *Instr = CS.getInstruction();
    BasicBlock *TailBB = Instr->getParent();
    bool IsMustTailCall = CS.isMustTailCall();
@@ -327,7 +327,7 @@ static void splitCallSite(
      BasicBlock *PredBB = Preds[i].first;
      BasicBlock *SplitBlock = DuplicateInstructionsInSplitBetween(
          TailBB, PredBB, &*std::next(Instr->getIterator()), ValueToValueMaps[i],
-        DT);
+        &DT);
      assert(SplitBlock && "Unexpected new basic block split.");
  
      Instruction *NewCI =
@@ -438,7 +438,7 @@ static bool isPredicatedOnPHI(CallSite CS) {
    return false;
  }
  
-static bool tryToSplitOnPHIPredicatedArgument(CallSite CS, DominatorTree *DT) {
+static bool tryToSplitOnPHIPredicatedArgument(CallSite CS, DominatorTree &DT) {
    if (!isPredicatedOnPHI(CS))
      return false;
  
@@ -449,15 +449,25 @@ static bool tryToSplitOnPHIPredicatedArgument(CallSite CS, DominatorTree *DT) {
    return true;
  }
  
-static bool tryToSplitOnPredicatedArgument(CallSite CS, DominatorTree *DT) {
+static bool tryToSplitOnPredicatedArgument(CallSite CS, DominatorTree &DT) {
    auto Preds = getTwoPredecessors(CS.getInstruction()->getParent());
    if (Preds[0] == Preds[1])
      return false;
  
+  // We can stop recording conditions once we reached the immediate dominator
+  // for the block containing the call site. Conditions in predecessors of the
+  // that node will be the same for all paths to the call site and splitting
+  // is not beneficial.
+  auto *CSDTNode = DT.getNode(CS.getInstruction()->getParent());
+  BasicBlock *StopAt = CSDTNode ? CSDTNode->getIDom()->getBlock() : nullptr;
+
    SmallVector<std::pair<BasicBlock *, ConditionsTy>, 2> PredsCS;
    for (auto *Pred : make_range(Preds.rbegin(), Preds.rend())) {
      ConditionsTy Conditions;
-    recordConditions(CS, Pred, Conditions);
+    // Record condition on edge BB(CS) <- Pred
+    recordCondition(CS, Pred, CS.getInstruction()->getParent(), Conditions);
+    // Record conditions followng Pred's single predecessors.
+    recordConditions(CS, Pred, Conditions, StopAt);
      PredsCS.push_back({Pred, Conditions});
    }
  
@@ -466,12 +476,24 @@ static bool tryToSplitOnPredicatedArgument(CallSite CS, DominatorTree *DT) {
        }))
      return false;
  
+  // Record common conditions starting from StopAt. Those conditions hold for
+  // all paths to CS. Adding them gives the inliner a better chance at inlining
+  // CS.
+  ConditionsTy CommonConditions;
+  if (StopAt)
+    recordConditions(CS, StopAt, CommonConditions, nullptr);
+  if (!CommonConditions.empty())
+    for (auto &Pred : PredsCS) {
+      Pred.second.insert(Pred.second.end(), CommonConditions.begin(),
+                         CommonConditions.end());
+    }
+
    splitCallSite(CS, PredsCS, DT);
    return true;
  }
  
  static bool tryToSplitCallSite(CallSite CS, TargetTransformInfo &TTI,
-                               DominatorTree *DT) {
+                               DominatorTree &DT) {
    if (!CS.arg_size() || !canSplitCallSite(CS, TTI))
      return false;
    return tryToSplitOnPredicatedArgument(CS, DT) ||
@@ -479,7 +501,7 @@ static bool tryToSplitCallSite(CallSite CS, TargetTransformInfo &TTI,
  }
  
  static bool doCallSiteSplitting(Function &F, TargetLibraryInfo &TLI,
-                                TargetTransformInfo &TTI, DominatorTree *DT) {
+                                TargetTransformInfo &TTI, DominatorTree &DT) {
    bool Changed = false;
    for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE;) {
      BasicBlock &BB = *BI++;
@@ -524,6 +546,7 @@ struct CallSiteSplittingLegacyPass : public FunctionPass {
    void getAnalysisUsage(AnalysisUsage &AU) const override {
      AU.addRequired<TargetLibraryInfoWrapperPass>();
      AU.addRequired<TargetTransformInfoWrapperPass>();
+    AU.addRequired<DominatorTreeWrapperPass>();
      AU.addPreserved<DominatorTreeWrapperPass>();
      FunctionPass::getAnalysisUsage(AU);
    }
@@ -534,9 +557,8 @@ struct CallSiteSplittingLegacyPass : public FunctionPass {
  
      auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
      auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
-    auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>();
-    return doCallSiteSplitting(F, TLI, TTI,
-                               DTWP ? &DTWP->getDomTree() : nullptr);
+    auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+    return doCallSiteSplitting(F, TLI, TTI, DT);
    }
  };
  } // namespace
@@ -546,6 +568,7 @@ INITIALIZE_PASS_BEGIN(CallSiteSplittingLegacyPass, "callsite-splitting",
                        "Call-site splitting", false, false)
  INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
  INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
  INITIALIZE_PASS_END(CallSiteSplittingLegacyPass, "callsite-splitting",
                      "Call-site splitting", false, false)
  FunctionPass *llvm::createCallSiteSplittingPass() {
@@ -556,7 +579,7 @@ PreservedAnalyses CallSiteSplittingPass::run(Function &F,
                                               FunctionAnalysisManager &AM) {
    auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
    auto &TTI = AM.getResult<TargetIRAnalysis>(F);
-  auto *DT = AM.getCachedResult<DominatorTreeAnalysis>(F);
+  auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
  
    if (!doCallSiteSplitting(F, TLI, TTI, DT))
      return PreservedAnalyses::all();
diff --git a/llvm/test/Other/new-pm-lto-defaults.ll b/llvm/test/Other/new-pm-lto-defaults.ll

index 03f4804a2ba7c29998bf396c568d1f169a3fd4cb..ee2f0414e6285d9965e588a0ca13ad847d6f43ca 100644 (file)
--- a/llvm/test/Other/new-pm-lto-defaults.ll
+++ b/llvm/test/Other/new-pm-lto-defaults.ll
@@ -38,13 +38,13 @@
  ; CHECK-O2-NEXT: Running pass: CallSiteSplittingPass on foo
  ; CHECK-O2-NEXT: Running analysis: TargetLibraryAnalysis on foo
  ; CHECK-O2-NEXT: Running analysis: TargetIRAnalysis on foo
+; CHECK-O2-NEXT: Running analysis: DominatorTreeAnalysis on foo
  ; CHECK-O2-NEXT: Finished llvm::Function pass manager run.
  ; CHECK-O2-NEXT: PGOIndirectCallPromotion
  ; CHECK-O2-NEXT: Running analysis: ProfileSummaryAnalysis
  ; CHECK-O2-NEXT: Running analysis: OptimizationRemarkEmitterAnalysis
  ; CHECK-O2-NEXT: Running pass: IPSCCPPass
-; CHECK-O2-DAG: Running analysis: AssumptionAnalysis on foo
-; CHECK-O2-DAG: Running analysis: DominatorTreeAnalysis on foo
+; CHECK-O2-NEXT: Running analysis: AssumptionAnalysis on foo
  ; CHECK-O2-NEXT: Running pass: CalledValuePropagationPass
  ; CHECK-O-NEXT: Running pass: ModuleToPostOrderCGSCCPassAdaptor<{{.*}}PostOrderFunctionAttrsPass>
  ; CHECK-O-NEXT: Running analysis: InnerAnalysisManagerProxy<{{.*}}SCC
diff --git a/llvm/test/Other/opt-O3-pipeline.ll b/llvm/test/Other/opt-O3-pipeline.ll

index 33033fef1837797b225da037915b15e0fec1613a..51b6a6d43864c24632916bd888f5823a36645bda 100644 (file)
--- a/llvm/test/Other/opt-O3-pipeline.ll
+++ b/llvm/test/Other/opt-O3-pipeline.ll
@@ -28,6 +28,7 @@
  ; CHECK-NEXT:     Force set function attributes
  ; CHECK-NEXT:     Infer set function attributes
  ; CHECK-NEXT:     FunctionPass Manager
+; CHECK-NEXT:       Dominator Tree Construction
  ; CHECK-NEXT:       Call-site splitting
  ; CHECK-NEXT:     Interprocedural Sparse Conditional Constant Propagation
  ; CHECK-NEXT:       Unnamed pass: implement Pass::getPassName()
diff --git a/llvm/test/Transforms/CallSiteSplitting/callsite-split-or-phi.ll b/llvm/test/Transforms/CallSiteSplitting/callsite-split-or-phi.ll

index d5f31f9ac91c197c2b91e5ae08a058e8b3ca4e19..2a400c93ae20bc3125250f15cce2890646979184 100644 (file)
--- a/llvm/test/Transforms/CallSiteSplitting/callsite-split-or-phi.ll
+++ b/llvm/test/Transforms/CallSiteSplitting/callsite-split-or-phi.ll
@@ -123,14 +123,14 @@ End:
  ;CHECK-LABEL: Header2.split:
  ;CHECK: %[[CALL1:.*]] = call i32 @callee(i32* nonnull %a, i32 %v, i32 10)
  ;CHECK-LABEL: TBB.split:
-;CHECK: %[[CALL2:.*]] = call i32 @callee(i32* nonnull %a, i32 %v, i32 %p)
+;CHECK: %[[CALL2:.*]] = call i32 @callee(i32* %a, i32 %v, i32 %p)
  ;CHECK-LABEL: Tail
  ;CHECK: %[[MERGED:.*]] = phi i32 [ %[[CALL1]], %Header2.split ], [ %[[CALL2]], %TBB.split ]
  ;CHECK: ret i32 %[[MERGED]]
  define i32 @test_ne_eq_ne(i32* %a, i32 %v, i32 %p) {
  Header:
    %tobool1 = icmp ne i32* %a, null
-  br i1 %tobool1, label %Header2, label %End
+  br i1 %tobool1, label %Header2, label %TBB
  
  Header2:
    %tobool2 = icmp eq i32 %p, 10
@@ -178,14 +178,14 @@ End:
  ;CHECK-LABEL: Header2.split:
  ;CHECK: %[[CALL1:.*]] = call i32 @callee(i32* nonnull %a, i32 %v, i32 %p)
  ;CHECK-LABEL: TBB.split:
-;CHECK: %[[CALL2:.*]] = call i32 @callee(i32* nonnull %a, i32 %v, i32 %p)
+;CHECK: %[[CALL2:.*]] = call i32 @callee(i32* %a, i32 %v, i32 %p)
  ;CHECK-LABEL: Tail
  ;CHECK: %[[MERGED:.*]] = phi i32 [ %[[CALL1]], %Header2.split ], [ %[[CALL2]], %TBB.split ]
  ;CHECK: ret i32 %[[MERGED]]
  define i32 @test_ne_ne_ne_constrain_same_pointer_arg(i32* %a, i32 %v, i32 %p, i32* %a2, i32* %a3) {
  Header:
    %tobool1 = icmp ne i32* %a, null
-  br i1 %tobool1, label %Header2, label %End
+  br i1 %tobool1, label %Header2, label %TBB
  
  Header2:
    %tobool2 = icmp ne i32* %a, %a2
@@ -235,14 +235,14 @@ End:
  ;CHECK-LABEL: Header2.split:
  ;CHECK: %[[CALL1:.*]] = call i32 @callee(i32* nonnull %a, i32 %v, i32 10)
  ;CHECK-LABEL: TBB.split:
-;CHECK: %[[CALL2:.*]] = call i32 @callee(i32* nonnull %a, i32 1, i32 %p)
+;CHECK: %[[CALL2:.*]] = call i32 @callee(i32* %a, i32 1, i32 %p)
  ;CHECK-LABEL: Tail
  ;CHECK: %[[MERGED:.*]] = phi i32 [ %[[CALL1]], %Header2.split ], [ %[[CALL2]], %TBB.split ]
  ;CHECK: ret i32 %[[MERGED]]
  define i32 @test_eq_eq_eq_untaken(i32* %a, i32 %v, i32 %p) {
  Header:
    %tobool1 = icmp eq i32* %a, null
-  br i1 %tobool1, label %End, label %Header2
+  br i1 %tobool1, label %TBB, label %Header2
  
  Header2:
    %tobool2 = icmp eq i32 %p, 10
@@ -290,14 +290,14 @@ End:
  ;CHECK-LABEL: Header2.split:
  ;CHECK: %[[CALL1:.*]] = call i32 @callee(i32* null, i32 %v, i32 10)
  ;CHECK-LABEL: TBB.split:
-;CHECK: %[[CALL2:.*]] = call i32 @callee(i32* null, i32 %v, i32 %p)
+;CHECK: %[[CALL2:.*]] = call i32 @callee(i32* %a, i32 %v, i32 %p)
  ;CHECK-LABEL: Tail
  ;CHECK: %[[MERGED:.*]] = phi i32 [ %[[CALL1]], %Header2.split ], [ %[[CALL2]], %TBB.split ]
  ;CHECK: ret i32 %[[MERGED]]
  define i32 @test_ne_eq_ne_untaken(i32* %a, i32 %v, i32 %p) {
  Header:
    %tobool1 = icmp ne i32* %a, null
-  br i1 %tobool1, label %End, label %Header2
+  br i1 %tobool1, label %TBB, label %Header2
  
  Header2:
    %tobool2 = icmp eq i32 %p, 10
@@ -489,6 +489,31 @@ End:
    ret i32 %v
  }
  
+;CHECK-LABEL: @test_cond_no_effect
+;CHECK-NOT: Header.split:
+;CHECK-NOT: TBB.split:
+;CHECK-LABEL: Tail:
+;CHECK: %r = call i32 @callee(i32* %a, i32 %v, i32 0)
+;CHECK: ret i32 %r
+define i32 @test_cond_no_effect(i32* %a, i32 %v) {
+Entry:
+  %tobool1 = icmp eq i32* %a, null
+  br i1 %tobool1, label %Header, label %End
+
+Header:
+  br i1 undef, label %Tail, label %TBB
+
+TBB:
+  br i1 undef, label %Tail, label %End
+
+Tail:
+  %r = call i32 @callee(i32* %a, i32 %v, i32 0)
+  ret i32 %r
+
+End:
+  ret i32 %v
+}
+
  ;CHECK-LABEL: @test_unreachable
  ;CHECK-LABEL: Header.split:
  ;CHECK: %[[CALL1:.*]] = call i32 @callee(i32* %a, i32 %v, i32 10)
author	Florian Hahn <florian.hahn@arm.com>
	Fri, 9 Nov 2018 10:23:46 +0000 (10:23 +0000)
committer	Florian Hahn <florian.hahn@arm.com>
	Fri, 9 Nov 2018 10:23:46 +0000 (10:23 +0000)
llvm/lib/Transforms/Scalar/CallSiteSplitting.cpp		patch \| blob \| history
llvm/test/Other/new-pm-lto-defaults.ll		patch \| blob \| history
llvm/test/Other/opt-O3-pipeline.ll		patch \| blob \| history
llvm/test/Transforms/CallSiteSplitting/callsite-split-or-phi.ll		patch \| blob \| history