From b7599329fc28ec8f7086157ea50202fa5ba47f89 Mon Sep 17 00:00:00 2001 From: Hans Wennborg Date: Mon, 2 May 2016 17:22:54 +0000 Subject: [PATCH] [SimplifyCFG] Extend TryToSimplifyUncondBranchFromEmptyBlock for empty block including lifetime intrinsics Make it possible that TryToSimplifyUncondBranchFromEmptyBlock merges empty basic block including lifetime intrinsics as well as phi nodes and unconditional branch into its successor or predecessor(s). If successor of empty block has single predecessor, all contents including lifetime intrinsics are sinked into the successor. Otherwise, they are hoisted into its predecessor(s) and then merged into the predecessor(s). Patch by Josh Yoon ! Differential Revision: http://reviews.llvm.org/D19257 llvm-svn: 268254 --- llvm/include/llvm/IR/BasicBlock.h | 9 ++ llvm/lib/IR/BasicBlock.cpp | 24 +++ llvm/lib/Transforms/Utils/Local.cpp | 203 ++++++++++++++++------- llvm/lib/Transforms/Utils/SimplifyCFG.cpp | 9 +- llvm/test/Transforms/SimplifyCFG/lifetime.ll | 232 +++++++++++++++++++++++++++ 5 files changed, 418 insertions(+), 59 deletions(-) diff --git a/llvm/include/llvm/IR/BasicBlock.h b/llvm/include/llvm/IR/BasicBlock.h index e7daf6e..867b485 100644 --- a/llvm/include/llvm/IR/BasicBlock.h +++ b/llvm/include/llvm/IR/BasicBlock.h @@ -152,6 +152,15 @@ public: return const_cast(this)->getFirstNonPHIOrDbgOrLifetime(); } + /// \brief Returns a pointer to the first instruction in this block that is + /// not a PHINode, a debug intrinsic, a lifetime intrinsic, or a bitcast + /// instruction coupled with the following lifetime intrinsic. + Instruction *getFirstNonPHIOrDbgOrLifetimeOrBitCast(); + const Instruction *getFirstNonPHIOrDbgOrLifetimeOrBitCast() const { + return const_cast(this) + ->getFirstNonPHIOrDbgOrLifetimeOrBitCast(); + } + /// \brief Returns an iterator to the first instruction in this block that is /// suitable for inserting a non-PHI instruction. /// diff --git a/llvm/lib/IR/BasicBlock.cpp b/llvm/lib/IR/BasicBlock.cpp index 9f806fa..604ea3a 100644 --- a/llvm/lib/IR/BasicBlock.cpp +++ b/llvm/lib/IR/BasicBlock.cpp @@ -206,6 +206,30 @@ Instruction* BasicBlock::getFirstNonPHIOrDbgOrLifetime() { return nullptr; } +Instruction *BasicBlock::getFirstNonPHIOrDbgOrLifetimeOrBitCast() { + for (Instruction &I : *this) { + if (isa(I) || isa(I)) + continue; + + if (auto *II = dyn_cast(&I)) + if (II->getIntrinsicID() == Intrinsic::lifetime_start || + II->getIntrinsicID() == Intrinsic::lifetime_end) + continue; + + if (auto *BCI = dyn_cast(&I)) { + if (auto *II = dyn_cast(++I.getIterator())) { + if ((II->getIntrinsicID() == Intrinsic::lifetime_start || + II->getIntrinsicID() == Intrinsic::lifetime_end) && + II->getOperand(1) == BCI) { + continue; + } + } + } + return &I; + } + return nullptr; +} + BasicBlock::iterator BasicBlock::getFirstInsertionPt() { Instruction *FirstNonPHI = getFirstNonPHI(); if (!FirstNonPHI) diff --git a/llvm/lib/Transforms/Utils/Local.cpp b/llvm/lib/Transforms/Utils/Local.cpp index 5af3712..988717f 100644 --- a/llvm/lib/Transforms/Utils/Local.cpp +++ b/llvm/lib/Transforms/Utils/Local.cpp @@ -800,6 +800,55 @@ static void redirectValuesFromPredecessorsToPhi(BasicBlock *BB, replaceUndefValuesInPhi(PN, IncomingValues); } +/// Return true if BB has lifetime.end intrinsic. +/// +static bool hasLifetime(BasicBlock *BB) { + for (auto &I : *BB) { + if (const IntrinsicInst *II = dyn_cast(&I)) { + if (II->getIntrinsicID() == Intrinsic::lifetime_end || + II->getIntrinsicID() == Intrinsic::lifetime_start) { + return true; + } + } + } + return false; +} + +/// hoistLifetimeFromEmptyBlockToPred - Hoist lifetime.end intrinsics and +/// related bitcast instructions from BB to predecessors of BB. +/// +static bool hoistLifetimeFromEmptyBlockToPred(BasicBlock *BB) { + // Check to see if all Preds have single successor and if not, we cannot + // hoist lifetime intrinsics because it would change semantics. + for (auto Pred : predecessors(BB)) + if (!Pred->getSingleSuccessor()) + return false; + + // Hoist all lifetime.end intrinsics and related bitcast instrunctions + // in BB to Preds. + for (auto &I : *BB) { + if (auto *II = dyn_cast(&I)) { + if (II->getIntrinsicID() == Intrinsic::lifetime_end || + II->getIntrinsicID() == Intrinsic::lifetime_start) { + for (auto Pred : predecessors(BB)) { + Instruction *NewII = I.clone(); + NewII->insertBefore(Pred->getTerminator()); + + if (I.getIterator() != BB->begin()) { + if (auto BC = dyn_cast(--I.getIterator())) { + assert(BC == I.getOperand(1)); + auto NewBC = BC->clone(); + NewBC->insertBefore(NewII); + NewII->setOperand(1, NewBC); + } + } + } + } + } + } + return true; +} + /// TryToSimplifyUncondBranchFromEmptyBlock - BB is known to contain an /// unconditional branch, and contains no instructions other than PHI nodes, /// potential side-effect free intrinsics and the branch. If possible, @@ -813,74 +862,118 @@ bool llvm::TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB) { BasicBlock *Succ = cast(BB->getTerminator())->getSuccessor(0); if (BB == Succ) return false; - // Check to see if merging these blocks would cause conflicts for any of the - // phi nodes in BB or Succ. If not, we can safely merge. - if (!CanPropagatePredecessorsForPHIs(BB, Succ)) return false; - - // Check for cases where Succ has multiple predecessors and a PHI node in BB - // has uses which will not disappear when the PHI nodes are merged. It is - // possible to handle such cases, but difficult: it requires checking whether - // BB dominates Succ, which is non-trivial to calculate in the case where - // Succ has multiple predecessors. Also, it requires checking whether - // constructing the necessary self-referential PHI node doesn't introduce any - // conflicts; this isn't too difficult, but the previous code for doing this - // was incorrect. - // - // Note that if this check finds a live use, BB dominates Succ, so BB is - // something like a loop pre-header (or rarely, a part of an irreducible CFG); - // folding the branch isn't profitable in that case anyway. - if (!Succ->getSinglePredecessor()) { - BasicBlock::iterator BBI = BB->begin(); - while (isa(*BBI)) { - for (Use &U : BBI->uses()) { - if (PHINode* PN = dyn_cast(U.getUser())) { - if (PN->getIncomingBlock(U) != BB) + // If BB has lifetime.end intrinsics, simplify BB under more constraints. + if (hasLifetime(BB)) { + // Check to see if BB and its predecessors and successors have PHI. + if (isa(BB->begin())) + return false; + + for (auto Pred : predecessors(BB)) + if (isa(Pred->begin())) + return false; + + for (auto Succ : successors(BB)) + if (isa(Succ->begin())) + return false; + + if (Succ->getSinglePredecessor()) { + // BB is the only predecessor of Succ, so Succ will end up with exactly + // the same predecessors BB had. + + // Copy over any debug or lifetime instruction. + BB->getTerminator()->eraseFromParent(); + Succ->getInstList().splice(Succ->getFirstNonPHI()->getIterator(), + BB->getInstList()); + + } else { + // Unless BB is the only predecessor of Succ, hoist lifetime intrinsics + // to predecessors of BB and simplify BB. + if (!hoistLifetimeFromEmptyBlockToPred(BB)) { + return false; + } + } + + DEBUG(dbgs() << "Killing Trivial BB: \n" << *BB); + + // Everything that jumped to BB now goes to Succ. + BB->replaceAllUsesWith(Succ); + if (!Succ->hasName()) + Succ->takeName(BB); + BB->eraseFromParent(); // Delete the old basic block. + return true; + } else { + // Check to see if merging these blocks would cause conflicts for any of the + // phi nodes in BB or Succ. If not, we can safely merge. + if (!CanPropagatePredecessorsForPHIs(BB, Succ)) + return false; + + // Check for cases where Succ has multiple predecessors and a PHI node in BB + // has uses which will not disappear when the PHI nodes are merged. It is + // possible to handle such cases, but difficult: it requires checking + // whether BB dominates Succ, which is non-trivial to calculate in the + // case where Succ has multiple predecessors. Also, it requires checking + // whether constructing the necessary self-referential PHI node doesn't + // introduce any conflicts; this isn't too difficult, but the previous code + // for doing this was incorrect. + // + // Note that if this check finds a live use, BB dominates Succ, so BB is + // something like a loop pre-header (or rarely, a part of an irreducible + // CFG); + // folding the branch isn't profitable in that case anyway. + if (!Succ->getSinglePredecessor()) { + BasicBlock::iterator BBI = BB->begin(); + while (isa(*BBI)) { + for (Use &U : BBI->uses()) { + if (PHINode *PN = dyn_cast(U.getUser())) { + if (PN->getIncomingBlock(U) != BB) + return false; + } else { return false; - } else { - return false; + } } + ++BBI; } - ++BBI; } - } - DEBUG(dbgs() << "Killing Trivial BB: \n" << *BB); + DEBUG(dbgs() << "Killing Trivial BB: \n" << *BB); - if (isa(Succ->begin())) { - // If there is more than one pred of succ, and there are PHI nodes in - // the successor, then we need to add incoming edges for the PHI nodes - // - const PredBlockVector BBPreds(pred_begin(BB), pred_end(BB)); + if (isa(Succ->begin())) { + // If there is more than one pred of succ, and there are PHI nodes in + // the successor, then we need to add incoming edges for the PHI nodes + // + const PredBlockVector BBPreds(pred_begin(BB), pred_end(BB)); - // Loop over all of the PHI nodes in the successor of BB. - for (BasicBlock::iterator I = Succ->begin(); isa(I); ++I) { - PHINode *PN = cast(I); + // Loop over all of the PHI nodes in the successor of BB. + for (BasicBlock::iterator I = Succ->begin(); isa(I); ++I) { + PHINode *PN = cast(I); - redirectValuesFromPredecessorsToPhi(BB, BBPreds, PN); + redirectValuesFromPredecessorsToPhi(BB, BBPreds, PN); + } } - } - if (Succ->getSinglePredecessor()) { - // BB is the only predecessor of Succ, so Succ will end up with exactly - // the same predecessors BB had. + if (Succ->getSinglePredecessor()) { + // BB is the only predecessor of Succ, so Succ will end up with exactly + // the same predecessors BB had. - // Copy over any phi, debug or lifetime instruction. - BB->getTerminator()->eraseFromParent(); - Succ->getInstList().splice(Succ->getFirstNonPHI()->getIterator(), - BB->getInstList()); - } else { - while (PHINode *PN = dyn_cast(&BB->front())) { - // We explicitly check for such uses in CanPropagatePredecessorsForPHIs. - assert(PN->use_empty() && "There shouldn't be any uses here!"); - PN->eraseFromParent(); + // Copy over any phi, debug or lifetime instruction. + BB->getTerminator()->eraseFromParent(); + Succ->getInstList().splice(Succ->getFirstNonPHI()->getIterator(), + BB->getInstList()); + } else { + while (PHINode *PN = dyn_cast(&BB->front())) { + // We explicitly check for such uses in CanPropagatePredecessorsForPHIs. + assert(PN->use_empty() && "There shouldn't be any uses here!"); + PN->eraseFromParent(); + } } - } - // Everything that jumped to BB now goes to Succ. - BB->replaceAllUsesWith(Succ); - if (!Succ->hasName()) Succ->takeName(BB); - BB->eraseFromParent(); // Delete the old basic block. - return true; + // Everything that jumped to BB now goes to Succ. + BB->replaceAllUsesWith(Succ); + if (!Succ->hasName()) + Succ->takeName(BB); + BB->eraseFromParent(); // Delete the old basic block. + return true; + } } /// EliminateDuplicatePHINodes - Check for and eliminate duplicate PHI diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp index 6ac039d..7454a96 100644 --- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -5056,14 +5056,15 @@ bool SimplifyCFGOpt::SimplifyUncondBranch(BranchInst *BI, IRBuilder<> &Builder){ if (SinkCommon && SinkThenElseCodeToEnd(BI)) return true; - - // If the Terminator is the only non-phi instruction, simplify the block. - // if LoopHeader is provided, check if the block is a loop header + // If the Terminator is the only non-phi instruction except for bitcast + // instruction coupled with the following lifetime intrinsic, simplify the + // block. If LoopHeader is provided, check if the block is a loop header // (This is for early invocations before loop simplify and vectorization // to keep canonical loop forms for nested loops. // These blocks can be eliminated when the pass is invoked later // in the back-end.) - BasicBlock::iterator I = BB->getFirstNonPHIOrDbg()->getIterator(); + BasicBlock::iterator I = + BB->getFirstNonPHIOrDbgOrLifetimeOrBitCast()->getIterator(); if (I->isTerminator() && BB != &BB->getParent()->getEntryBlock() && (!LoopHeaders || !LoopHeaders->count(BB)) && TryToSimplifyUncondBranchFromEmptyBlock(BB)) diff --git a/llvm/test/Transforms/SimplifyCFG/lifetime.ll b/llvm/test/Transforms/SimplifyCFG/lifetime.ll index 7c66be5..5cb0a40 100644 --- a/llvm/test/Transforms/SimplifyCFG/lifetime.ll +++ b/llvm/test/Transforms/SimplifyCFG/lifetime.ll @@ -1,6 +1,9 @@ ; RUN: opt < %s -simplifycfg -S | FileCheck %s ; Test that a lifetime intrinsic isn't removed because that would change semantics +; This case is that predecessor(s) of the target empty block (bb0) has multiple +; successors (bb0 and bb1) and its successor has multiple predecessors (entry and +; bb0). ; CHECK: foo ; CHECK: entry: @@ -27,3 +30,232 @@ declare void @f() declare void @llvm.lifetime.start(i64, i8* nocapture) nounwind declare void @llvm.lifetime.end(i64, i8* nocapture) nounwind + +; Test that empty block including lifetime intrinsic and not related bitcast +; instruction cannot be removed. It is because the block is not empty. + +; CHECK-LABEL: coo +; CHECK-LABEL: entry: +; CHECK-LABEL: if.then: +; CHECK-LABEL: if.else: +; CHECK-LABEL: if.end: +; CHECK-LABEL: bb: +; CHECK: ret + +define void @coo(i1 %x, i1 %y) { +entry: + %a = alloca i8, align 4 + %b = alloca i32, align 4 + br label %while.cond + +while.cond: ; preds = %if.end, %entry + br i1 %y, label %while.body, label %bb + +while.body: ; preds = %while.cond + call void @llvm.lifetime.start(i64 4, i8* %a) + %c = load i8, i8* %a, align 4 + br i1 %x, label %if.then, label %if.else + +if.then: ; preds = %while.body + %d = add i8 %c, 1 + br label %if.end + +if.else: ; preds = %while.body + %e = sub i8 %c, 1 + br label %if.end + +if.end: ; preds = %if.else, %if.then + %f = bitcast i32* %b to i8* + call void @llvm.lifetime.end(i64 4, i8* %a) + br label %while.cond + +bb: ; preds = %while.cond + ret void +} + +; Test that empty block including lifetime intrinsic can be removed. +; Lifetime.end intrinsic is moved to predecessors because successor has +; multiple predecessors. + +; CHECK-LABEL: soo +; CHECK-LABEL: entry: +; CHECK-LABEL: if.then: +; CHECK-NEXT: %e +; CHECK-NEXT: call void @llvm.lifetime.end +; CHECK-LABEL: if.else: +; CHECK-NEXT: %g +; CHECK-NEXT: call void @llvm.lifetime.end +; CHECK-NEXT: br label %while.cond +; CHECK-NOT: if.end: +; CHECK: ret + +define void @soo(i1 %x, i1 %y) { +entry: + %a = alloca i8, align 4 + br label %while.cond + +while.cond: ; preds = %if.end, %entry + br i1 %y, label %while.body, label %bb + +while.body: ; preds = %while.cond + call void @llvm.lifetime.start(i64 4, i8* %a) + %d = load i8, i8* %a, align 4 + br i1 %x, label %if.then, label %if.else + +if.then: ; preds = %while.body + %e = add i8 %d, 1 + br label %if.end + +if.else: ; preds = %while.body + %g = sub i8 %d, 1 + br label %if.end + +if.end: ; preds = %if.else, %if.then + call void @llvm.lifetime.end(i64 4, i8* %a) + br label %while.cond + +bb: ; preds = %while.cond + ret void +} + +; Test that empty block including lifetime intrinsic and related bitcast +; instruction can be removed. Lifetime.end intrinsic and related bitcast +; instruction are moved to predecessors because successor has multiple +; predecessors. + +; CHECK-LABEL: boo +; CHECK-LABEL: entry: +; CHECK-LABEL: if.then: +; CHECK-NEXT: %e +; CHECK-NEXT: %[[T:[^ ]+]] = bitcast +; CHECK-NEXT: call void @llvm.lifetime.end(i64 4, i8* %[[T]]) +; CHECK-LABEL: if.else: +; CHECK-NEXT: %g +; CHECK-NEXT: %[[B:[^ ]+]] = bitcast +; CHECK-NEXT: call void @llvm.lifetime.end(i64 4, i8* %[[B]]) +; CHECK-NEXT: br label %while.cond +; CHECK-NOT: if.end: +; CHECK: ret + +define void @boo(i1 %x, i1 %y) { +entry: + %a = alloca i32, align 4 + br label %while.cond + +while.cond: ; preds = %if.end, %entry + br i1 %y, label %while.body, label %bb + +while.body: ; preds = %while.cond + %b = bitcast i32* %a to i8* + call void @llvm.lifetime.start(i64 4, i8* %b) + %d = load i32, i32* %a, align 4 + br i1 %x, label %if.then, label %if.else + +if.then: ; preds = %while.body + %e = add i32 %d, 1 + br label %if.end + +if.else: ; preds = %while.body + %g = sub i32 %d, 1 + br label %if.end + +if.end: ; preds = %if.else, %if.then + %c = bitcast i32* %a to i8* + call void @llvm.lifetime.end(i64 4, i8* %c) + br label %while.cond + +bb: ; preds = %while.cond + ret void +} + +; Test that empty block including lifetime intrinsic can be removed. +; Lifetime.start intrinsic is moved to predecessors because successor has +; multiple predecessors. + +; CHECK-LABEL: koo +; CHECK-LABEL: entry: +; CHECK-LABEL: if.then: +; CHECK-NEXT: call void @f +; CHECK-NEXT: call void @llvm.lifetime.start +; CHECK-LABEL: if.else: +; CHECK-NEXT: call void @g +; CHECK-NEXT: call void @llvm.lifetime.start +; CHECK-NEXT: br label %bb +; CHECK-NOT: if.end: +; CHECK: ret + +define void @koo(i1 %x, i1 %y, i1 %z) { +entry: + %a = alloca i8, align 4 + br i1 %z, label %bb, label %bb0 + +bb0: ; preds = %entry + br i1 %x, label %if.then, label %if.else + +if.then: ; preds = %bb0 + call void @f() + br label %if.end + +if.else: ; preds = %bb0 + call void @g() + br label %if.end + +if.end: ; preds = %if.else, %if.then + call void @llvm.lifetime.start(i64 4, i8* %a) + br label %bb + +bb: ; preds = %if.end, %entry + %d = load i8, i8* %a, align 4 + call void @llvm.lifetime.end(i64 4, i8* %a) + ret void +} + +declare void @g() + +; Test that empty block including lifetime intrinsic and related bitcast +; instruction can be removed. Lifetime.start intrinsic and related bitcast +; instruction are moved to predecessors because successor has multiple +; predecessors. + + +; CHECK-LABEL: goo +; CHECK-LABEL: entry: +; CHECK-LABEL: if.then: +; CHECK-NEXT: call void @f +; CHECK-NEXT: %[[T:[^ ]+]] = bitcast +; CHECK-NEXT: call void @llvm.lifetime.start(i64 4, i8* %[[T]]) +; CHECK-LABEL: if.else: +; CHECK-NEXT: call void @g +; CHECK-NEXT: %[[B:[^ ]+]] = bitcast +; CHECK-NEXT: call void @llvm.lifetime.start(i64 4, i8* %[[B]]) +; CHECK-NEXT: br label %bb +; CHECK-NOT: if.end: +; CHECK: ret + +define void @goo(i1 %x, i1 %y, i1 %z) { +entry: + %a = alloca i32, align 4 + br i1 %z, label %bb, label %bb0 + +bb0: ; preds = %entry + br i1 %x, label %if.then, label %if.else + +if.then: ; preds = %bb0 + call void @f() + br label %if.end + +if.else: ; preds = %bb0 + call void @g() + br label %if.end + +if.end: ; preds = %if.else, %if.then + %b = bitcast i32* %a to i8* + call void @llvm.lifetime.start(i64 4, i8* %b) + br label %bb + +bb: ; preds = %if.end, %entry + %d = load i32, i32* %a, align 4 + %c = bitcast i32* %a to i8* + call void @llvm.lifetime.end(i64 4, i8* %c) + ret void +} -- 2.7.4