From cddf3c5e1c4c99c56aebe56b3fe96d56fe2fe578 Mon Sep 17 00:00:00 2001 From: Balaram Makam Date: Thu, 26 Oct 2017 22:34:01 +0000 Subject: [PATCH] [CGP] Merge empty case blocks if no extra moves are added. Summary: Currently we skip merging when extra moves may be added in the header of switch instead of the case block, if the case block is used as an incoming block of a PHI. If all the incoming values of the PHIs are non-constants and the destination block is dominated by the switch block then extra moves are likely not added by ISel, so there is no need to skip merging in this case. Reviewers: efriedma, junbuml, davidxl, hfinkel, qcolombet Reviewed By: efriedma Subscribers: dberlin, kuhar, mcrosier, llvm-commits Differential Revision: https://reviews.llvm.org/D37343 llvm-svn: 316711 --- llvm/lib/CodeGen/CodeGenPrepare.cpp | 47 +++++++--- .../CodeGenPrepare/skip-merging-case-block.ll | 100 ++++++++++++++++++++- 2 files changed, 134 insertions(+), 13 deletions(-) diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp index 1e5f153..9f9bde4 100644 --- a/llvm/lib/CodeGen/CodeGenPrepare.cpp +++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp @@ -212,6 +212,7 @@ class TypePromotionTransaction; const TargetTransformInfo *TTI = nullptr; const TargetLibraryInfo *TLInfo; const LoopInfo *LI; + DominatorTree *DT; std::unique_ptr BFI; std::unique_ptr BPI; @@ -262,6 +263,7 @@ class TypePromotionTransaction; void getAnalysisUsage(AnalysisUsage &AU) const override { // FIXME: When we can selectively preserve passes, preserve the domtree. + AU.addRequired(); AU.addRequired(); AU.addRequired(); AU.addRequired(); @@ -343,6 +345,8 @@ bool CodeGenPrepare::runOnFunction(Function &F) { TLInfo = &getAnalysis().getTLI(); TTI = &getAnalysis().getTTI(F); LI = &getAnalysis().getLoopInfo(); + DT = &getAnalysis().getDomTree(); + OptSize = F.optForSize(); if (ProfileGuidedSectionPrefix) { @@ -755,6 +759,11 @@ bool CodeGenPrepare::isMergingEmptyBlockProfitable(BasicBlock *BB, return true; SmallPtrSet SameIncomingValueBBs; + SmallVector PNs; + + for (auto DestBBI = DestBB->begin(); + auto *DestPN = dyn_cast(&*DestBBI); ++DestBBI) + PNs.push_back(DestPN); // Find all other incoming blocks from which incoming values of all PHIs in // DestBB are the same as the ones from BB. @@ -764,16 +773,10 @@ bool CodeGenPrepare::isMergingEmptyBlockProfitable(BasicBlock *BB, if (DestBBPred == BB) continue; - bool HasAllSameValue = true; - BasicBlock::const_iterator DestBBI = DestBB->begin(); - while (const PHINode *DestPN = dyn_cast(DestBBI++)) { - if (DestPN->getIncomingValueForBlock(BB) != - DestPN->getIncomingValueForBlock(DestBBPred)) { - HasAllSameValue = false; - break; - } - } - if (HasAllSameValue) + if (llvm::all_of(PNs, [&](PHINode *PN) { + return (PN->getIncomingValueForBlock(BB) == + PN->getIncomingValueForBlock(DestBBPred)); + })) SameIncomingValueBBs.insert(DestBBPred); } @@ -783,6 +786,14 @@ bool CodeGenPrepare::isMergingEmptyBlockProfitable(BasicBlock *BB, if (SameIncomingValueBBs.count(Pred)) return true; + // Check to see if none of the phis have constant incoming values for BB and + // Pred dominates DestBB, in such case extra COPYs are likely not added, so + // there is no reason to skip merging. + if (DT->dominates(Pred, DestBB) && llvm::none_of(PNs, [BB](PHINode *PN) { + return isa(PN->getIncomingValueForBlock(BB)); + })) + return true; + if (!BFI) { Function &F = *BB->getParent(); LoopInfo LI{DominatorTree(F)}; @@ -885,7 +896,7 @@ void CodeGenPrepare::eliminateMostlyEmptyBlock(BasicBlock *BB) { // Remember if SinglePred was the entry block of the function. If so, we // will need to move BB back to the entry position. bool isEntry = SinglePred == &SinglePred->getParent()->getEntryBlock(); - MergeBasicBlockIntoOnlyPred(DestBB, nullptr); + MergeBasicBlockIntoOnlyPred(DestBB, DT); if (isEntry && BB != &BB->getParent()->getEntryBlock()) BB->moveBefore(&BB->getParent()->getEntryBlock()); @@ -926,7 +937,21 @@ void CodeGenPrepare::eliminateMostlyEmptyBlock(BasicBlock *BB) { // The PHIs are now updated, change everything that refers to BB to use // DestBB and remove BB. + SmallVector Updates; + for (auto *PredBB : predecessors(BB)) { + if (PredBB == BB) + continue; + DominatorTree::UpdateType UT = {DominatorTree::Delete, PredBB, BB}; + if (!is_contained(Updates, UT)) { + Updates.push_back(UT); + if (PredBB != DestBB) + Updates.push_back({DominatorTree::Insert, PredBB, DestBB}); + } + } BB->replaceAllUsesWith(DestBB); + DT->applyUpdates(Updates); + BB->getTerminator()->eraseFromParent(); + DT->deleteEdge(BB, DestBB); BB->eraseFromParent(); ++NumBlocksElim; diff --git a/llvm/test/Transforms/CodeGenPrepare/skip-merging-case-block.ll b/llvm/test/Transforms/CodeGenPrepare/skip-merging-case-block.ll index 194c86b..b062e5f 100644 --- a/llvm/test/Transforms/CodeGenPrepare/skip-merging-case-block.ll +++ b/llvm/test/Transforms/CodeGenPrepare/skip-merging-case-block.ll @@ -143,11 +143,107 @@ declare void @calldefault(...) local_unnamed_addr !1 = !{!"branch_weights", i32 1 , i32 5, i32 1,i32 1, i32 1} !2 = !{!"branch_weights", i32 1 , i32 4, i32 1,i32 1, i32 1} +; while.cond does not dominate return, expect to skip merging empty block +; return.loopexit into return. +@b = external global i32, align 4 +@a = external global i32*, align 8 + +define void @f_switch4(i32 %i) local_unnamed_addr personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +; CHECK-LABEL: @f_switch4 +entry: + %0 = load i32, i32* @b, align 4 + %cond = icmp eq i32 %0, 6 + br i1 %cond, label %return, label %if.end + +if.end: ; preds = %entry + %add = add i32 %i, 2 + %1 = load i32*, i32** @a, align 8 + %magicptr = ptrtoint i32* %1 to i32 + br label %while.cond + +; CHECK-LABEL: while.cond: +; CHECK: i32 0, label %return.loopexit +; CHECK: i32 47, label %return.loopexit +while.cond: ; preds = %while.cond.backedge, %if.end + switch i32 %magicptr, label %while.cond.if.end10_crit_edge [ + i32 0, label %return.loopexit + i32 47, label %return.loopexit + ] + +while.cond.if.end10_crit_edge: ; preds = %while.cond + br label %while.cond.backedge + +while.cond.backedge: ; preds = %while.cond.if.end10_crit_edge, %if.then9 + br label %while.cond + +return.loopexit: ; preds = %while.cond + br label %return + +; CHECK_LABEL: return: +; CHECK: %{{.*}} = phi i32 [ 0, %entry ], [ %add, %return.loopexit ] +return: ; preds = %return.loopexit, %entry + %retval.4 = phi i32 [ 0, %entry ], [ %add, %return.loopexit ] + ret void +} +declare i32 @__gxx_personality_v0(...) + +; Expect to merge empty block while.cond2.loopexit into while.cond2 +define i32 @f_switch5(i32 %i) local_unnamed_addr { +; CHECK-LABEL: @f_switch5 +entry: + %0 = load i32, i32* @b, align 4 + %cond = icmp eq i32 %0, 6 + br i1 %cond, label %while.cond.preheader, label %sw.epilog + +while.cond.preheader: ; preds = %entry + %1 = load i32*, i32** @a, align 8 + %magicptr = ptrtoint i32* %1 to i64 + %arrayidx = getelementptr inbounds i32, i32* %1, i64 1 + br label %while.cond + +; CHECK-LABEL: while.cond: +; CHECK: i64 32, label %while.cond2 +; CHECK: i64 0, label %while.cond2 +while.cond: ; preds = %land.rhs, %while.cond.preheader + switch i64 %magicptr, label %land.rhs [ + i64 32, label %while.cond2.loopexit + i64 0, label %while.cond2.loopexit + ] + +land.rhs: ; preds = %while.cond + %2 = load i32, i32* %arrayidx, align 4 + %tobool1 = icmp eq i32 %2, 0 + br i1 %tobool1, label %while.cond2thread-pre-split.loopexit, label %while.cond + +while.cond2thread-pre-split.loopexit: ; preds = %land.rhs + br label %while.cond2thread-pre-split + +while.cond2thread-pre-split: ; preds = %while.body4, %while.cond2thread-pre-split.loopexit + %.pr = phi i32* [ %.pr.pre, %while.body4 ], [ %1, %while.cond2thread-pre-split.loopexit ] + br label %while.cond2 + +while.cond2.loopexit: ; preds = %while.cond, %while.cond + br label %while.cond2 + +; CHECK-LABEL: while.cond2: +; CHECK: %{{.*}} = phi i32* [ %.pr.pre, %while.body4 ], [ %1, %land.rhs ], [ %1, %while.cond ], [ %1, %while.cond ] +while.cond2: ; preds = %while.cond2.loopexit, %while.cond2thread-pre-split + %3 = phi i32* [ %.pr, %while.cond2thread-pre-split ], [ %1, %while.cond2.loopexit ] + %tobool3 = icmp eq i32* %3, null + br i1 %tobool3, label %sw.epilog, label %while.body4 + +while.body4: ; preds = %while.cond2 + tail call void bitcast (void (...)* @fn2 to void ()*)() + %.pr.pre = load i32*, i32** @a, align 8 + br label %while.cond2thread-pre-split + +sw.epilog: ; preds = %while.cond2, %entry + ret i32 undef +} + ; This test that BFI/BPI is created without any assertion in isMergingEmptyBlockProfitable() ; in the case where empty blocks are removed before creating BFI/BPI. -@b = common global i32 0, align 4 -@a = common global i32* null, align 8 define i32 @should_not_assert(i32 %i) local_unnamed_addr { entry: %0 = load i32, i32* @b, align 4 -- 2.7.4