/// To. Copies DebugLoc from BI to I, if I doesn't already have a DebugLoc.
void ReplaceInstWithInst(Instruction *From, Instruction *To);
+/// Check if we can prove that all paths starting from this block converge
+/// to a block that either has a @llvm.experimental.deoptimize call
+/// prior to its terminating return instruction or is terminated by unreachable.
+/// All blocks in the traversed sequence must have a single predecessor, maybe
+/// except for the last one.
+bool IsBlockFollowedByDeoptOrUnreachable(const BasicBlock *BB);
+
/// Option class for critical edge splitting.
///
/// This provides a builder interface for overriding the default options used
#include "llvm/IR/Value.h"
#include "llvm/IR/ValueHandle.h"
#include "llvm/Support/Casting.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/Local.h"
#define DEBUG_TYPE "basicblock-utils"
+static cl::opt<unsigned> MaxDeoptimizingCheckDepth(
+ "max-deopt-check-depth", cl::init(8), cl::Hidden,
+ cl::desc("Set the maximum path length when checking whether a basic block "
+ "is deoptimizing"));
+
void llvm::DetatchDeadBlocks(
ArrayRef<BasicBlock *> BBs,
SmallVectorImpl<DominatorTree::UpdateType> *Updates,
BI = New;
}
+bool llvm::IsBlockFollowedByDeoptOrUnreachable(const BasicBlock *BB) {
+ // Remember visited blocks to avoid infinite loop
+ SmallPtrSet<const BasicBlock *, 8> VisitedBlocks;
+ unsigned Depth = 0;
+ while (BB && Depth++ < MaxDeoptimizingCheckDepth &&
+ VisitedBlocks.insert(BB).second) {
+ if (BB->getTerminatingDeoptimizeCall() ||
+ isa<UnreachableInst>(BB->getTerminator()))
+ return true;
+ BB = BB->getSingleSuccessor();
+ }
+ return false;
+}
+
void llvm::ReplaceInstWithInst(Instruction *From, Instruction *To) {
BasicBlock::iterator BI(From);
ReplaceInstWithInst(From->getParent()->getInstList(), BI, To);
SmallVector<BasicBlock *, 4> Exits;
L->getUniqueNonLatchExitBlocks(Exits);
// The latch must either be the only exiting block or all non-latch exit
- // blocks have either a deopt or unreachable terminator. Both deopt and
- // unreachable terminators are a strong indication they are not taken. Note
- // that this is a profitability check, not a legality check. Also note that
- // LoopPeeling currently can only update the branch weights of latch blocks
- // and branch weights to blocks with deopt or unreachable do not need
+ // blocks have either a deopt or unreachable terminator or compose a chain of
+ // blocks where the last one is either deopt or unreachable terminated. Both
+ // deopt and unreachable terminators are a strong indication they are not
+ // taken. Note that this is a profitability check, not a legality check. Also
+ // note that LoopPeeling currently can only update the branch weights of latch
+ // blocks and branch weights to blocks with deopt or unreachable do not need
// updating.
- return all_of(Exits, [](const BasicBlock *BB) {
- return BB->getTerminatingDeoptimizeCall() ||
- isa<UnreachableInst>(BB->getTerminator());
+ return all_of(Exits, [&](const BasicBlock *BB) {
+ return IsBlockFollowedByDeoptOrUnreachable(BB);
});
}
SmallVector<std::pair<BasicBlock *, BasicBlock *>, 4> ExitEdges;
L->getExitEdges(ExitEdges);
- DenseMap<BasicBlock *, BasicBlock *> ExitIDom;
- if (DT) {
- // We'd like to determine the idom of exit block after peeling one
- // iteration.
- // Let Exit is exit block.
- // Let ExitingSet - is a set of predecessors of Exit block. They are exiting
- // blocks.
- // Let Latch' and ExitingSet' are copies after a peeling.
- // We'd like to find an idom'(Exit) - idom of Exit after peeling.
- // It is an evident that idom'(Exit) will be the nearest common dominator
- // of ExitingSet and ExitingSet'.
- // idom(Exit) is a nearest common dominator of ExitingSet.
- // idom(Exit)' is a nearest common dominator of ExitingSet'.
- // Taking into account that we have a single Latch, Latch' will dominate
- // Header and idom(Exit).
- // So the idom'(Exit) is nearest common dominator of idom(Exit)' and Latch'.
- // All these basic blocks are in the same loop, so what we find is
- // (nearest common dominator of idom(Exit) and Latch)'.
- // In the loop below we remember nearest common dominator of idom(Exit) and
- // Latch to update idom of Exit later.
- assert(L->hasDedicatedExits() && "No dedicated exits?");
- for (auto Edge : ExitEdges) {
- if (ExitIDom.count(Edge.second))
- continue;
- BasicBlock *BB = DT->findNearestCommonDominator(
- DT->getNode(Edge.second)->getIDom()->getBlock(), Latch);
- assert(L->contains(BB) && "IDom is not in a loop");
- ExitIDom[Edge.second] = BB;
- }
- }
-
Function *F = Header->getParent();
// Set up all the necessary basic blocks. It is convenient to split the
SmallVector<MDNode *, 6> LoopLocalNoAliasDeclScopes;
identifyNoAliasScopesToClone(L->getBlocks(), LoopLocalNoAliasDeclScopes);
+ SmallVector<DominatorTree::UpdateType, 8> DTUpdates;
+
// For each peeled-off iteration, make a copy of the loop.
for (unsigned Iter = 0; Iter < PeelCount; ++Iter) {
SmallVector<BasicBlock *, 8> NewBlocks;
// previous one.
remapInstructionsInBlocks(NewBlocks, VMap);
- if (DT) {
- // Latches of the cloned loops dominate over the loop exit, so idom of the
- // latter is the first cloned loop body, as original PreHeader dominates
- // the original loop body.
- if (Iter == 0)
- for (auto Exit : ExitIDom)
- DT->changeImmediateDominator(Exit.first,
- cast<BasicBlock>(LVMap[Exit.second]));
-#ifdef EXPENSIVE_CHECKS
- assert(DT->verify(DominatorTree::VerificationLevel::Fast));
-#endif
- }
+ // If DT is available, insert edges from cloned exiting blocks to the exits
+ if (DT)
+ for (auto Exit : ExitEdges)
+ DTUpdates.push_back(
+ {DT->Insert, cast<BasicBlock>(LVMap[Exit.first]), Exit.second});
auto *LatchBRCopy = cast<BranchInst>(VMap[LatchBR]);
updateBranchWeights(InsertBot, LatchBRCopy, ExitWeight, FallThroughWeight);
// We modified the loop, update SE.
SE->forgetTopmostLoop(L);
+ DT->applyUpdates(DTUpdates);
+
// Finally DomtTree must be correct.
assert(DT->verify(DominatorTree::VerificationLevel::Fast));
define void @peel_exits_to_blocks_branch_to_unreachable_block(i32* %ptr, i32 %N, i32 %x, i1 %c.1) {
; CHECK-LABEL: @peel_exits_to_blocks_branch_to_unreachable_block(
; CHECK-NEXT: entry:
+; CHECK-NEXT: br label [[LOOP_HEADER_PEEL_BEGIN:%.*]]
+; CHECK: loop.header.peel.begin:
+; CHECK-NEXT: br label [[LOOP_HEADER_PEEL:%.*]]
+; CHECK: loop.header.peel:
+; CHECK-NEXT: [[C_PEEL:%.*]] = icmp ult i32 1, 2
+; CHECK-NEXT: br i1 [[C_PEEL]], label [[THEN_PEEL:%.*]], label [[ELSE_PEEL:%.*]]
+; CHECK: else.peel:
+; CHECK-NEXT: [[C_2_PEEL:%.*]] = icmp eq i32 1, [[X:%.*]]
+; CHECK-NEXT: br i1 [[C_2_PEEL]], label [[EXIT_2:%.*]], label [[LOOP_LATCH_PEEL:%.*]]
+; CHECK: then.peel:
+; CHECK-NEXT: br i1 [[C_1:%.*]], label [[EXIT_1:%.*]], label [[LOOP_LATCH_PEEL]]
+; CHECK: loop.latch.peel:
+; CHECK-NEXT: [[M_PEEL:%.*]] = phi i32 [ 0, [[THEN_PEEL]] ], [ [[X]], [[ELSE_PEEL]] ]
+; CHECK-NEXT: [[GEP_PEEL:%.*]] = getelementptr i32, i32* [[PTR:%.*]], i32 1
+; CHECK-NEXT: store i32 [[M_PEEL]], i32* [[GEP_PEEL]], align 4
+; CHECK-NEXT: [[IV_NEXT_PEEL:%.*]] = add nuw nsw i32 1, 1
+; CHECK-NEXT: [[C_3_PEEL:%.*]] = icmp ult i32 1, 1000
+; CHECK-NEXT: br i1 [[C_3_PEEL]], label [[LOOP_HEADER_PEEL_NEXT:%.*]], label [[EXIT:%.*]]
+; CHECK: loop.header.peel.next:
+; CHECK-NEXT: br label [[LOOP_HEADER_PEEL_NEXT1:%.*]]
+; CHECK: loop.header.peel.next1:
+; CHECK-NEXT: br label [[ENTRY_PEEL_NEWPH:%.*]]
+; CHECK: entry.peel.newph:
; CHECK-NEXT: br label [[LOOP_HEADER:%.*]]
; CHECK: loop.header:
-; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 1, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ]
-; CHECK-NEXT: [[C:%.*]] = icmp ult i32 [[IV]], 2
-; CHECK-NEXT: br i1 [[C]], label [[THEN:%.*]], label [[ELSE:%.*]]
+; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[IV_NEXT_PEEL]], [[ENTRY_PEEL_NEWPH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ]
+; CHECK-NEXT: br i1 false, label [[THEN:%.*]], label [[ELSE:%.*]]
; CHECK: then:
-; CHECK-NEXT: br i1 [[C_1:%.*]], label [[EXIT_1:%.*]], label [[LOOP_LATCH]]
+; CHECK-NEXT: br i1 [[C_1]], label [[EXIT_1_LOOPEXIT:%.*]], label [[LOOP_LATCH]]
; CHECK: else:
-; CHECK-NEXT: [[C_2:%.*]] = icmp eq i32 [[IV]], [[X:%.*]]
-; CHECK-NEXT: br i1 [[C_2]], label [[EXIT_2:%.*]], label [[LOOP_LATCH]]
+; CHECK-NEXT: [[C_2:%.*]] = icmp eq i32 [[IV]], [[X]]
+; CHECK-NEXT: br i1 [[C_2]], label [[EXIT_2_LOOPEXIT:%.*]], label [[LOOP_LATCH]]
; CHECK: loop.latch:
; CHECK-NEXT: [[M:%.*]] = phi i32 [ 0, [[THEN]] ], [ [[X]], [[ELSE]] ]
-; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, i32* [[PTR:%.*]], i32 [[IV]]
+; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, i32* [[PTR]], i32 [[IV]]
; CHECK-NEXT: store i32 [[M]], i32* [[GEP]], align 4
; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1
; CHECK-NEXT: [[C_3:%.*]] = icmp ult i32 [[IV]], 1000
-; CHECK-NEXT: br i1 [[C_3]], label [[LOOP_HEADER]], label [[EXIT:%.*]]
+; CHECK-NEXT: br i1 [[C_3]], label [[LOOP_HEADER]], label [[EXIT_LOOPEXIT:%.*]], !llvm.loop [[LOOP2:![0-9]+]]
+; CHECK: exit.loopexit:
+; CHECK-NEXT: br label [[EXIT]]
; CHECK: exit:
; CHECK-NEXT: ret void
+; CHECK: exit.1.loopexit:
+; CHECK-NEXT: br label [[EXIT_1]]
; CHECK: exit.1:
; CHECK-NEXT: call void @foo()
; CHECK-NEXT: br label [[UNREACHABLE_TERM:%.*]]
+; CHECK: exit.2.loopexit:
+; CHECK-NEXT: br label [[EXIT_2]]
; CHECK: exit.2:
; CHECK-NEXT: call void @bar()
; CHECK-NEXT: br label [[UNREACHABLE_TERM]]