From 8a959625c433f311233682afa7bfe1c76367700d Mon Sep 17 00:00:00 2001 From: Max Kazantsev Date: Fri, 8 Oct 2021 09:09:22 +0700 Subject: [PATCH] [LoopPeel] Peel loops with deoptimizing exits Added support for peeling loops with "deoptimizing" exits - such exits that it or any of its children (or any of their children, etc) either has a @llvm.experimental.deoptimize call prior to the terminating return instruction of this basic block or is terminated with unreachable. All blocks in the the sequence must have a single successor, maybe except for the last one. Previously we only checked the exit block for being deoptimizing. Now we check if the last reachable block from the exit is deoptimizing. Patch by Dmitry Makogon! Differential Revision: https://reviews.llvm.org/D110922 Reviewed By: mkazantsev --- .../llvm/Transforms/Utils/BasicBlockUtils.h | 7 +++ llvm/lib/Transforms/Utils/BasicBlockUtils.cpp | 20 +++++++ llvm/lib/Transforms/Utils/LoopPeel.cpp | 62 ++++++---------------- .../LoopUnroll/peel-multiple-unreachable-exits.ll | 44 ++++++++++++--- 4 files changed, 79 insertions(+), 54 deletions(-) diff --git a/llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h b/llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h index 4bea4c2..52a3fcc 100644 --- a/llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h +++ b/llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h @@ -129,6 +129,13 @@ void ReplaceInstWithInst(BasicBlock::InstListType &BIL, /// To. Copies DebugLoc from BI to I, if I doesn't already have a DebugLoc. void ReplaceInstWithInst(Instruction *From, Instruction *To); +/// Check if we can prove that all paths starting from this block converge +/// to a block that either has a @llvm.experimental.deoptimize call +/// prior to its terminating return instruction or is terminated by unreachable. +/// All blocks in the traversed sequence must have a single predecessor, maybe +/// except for the last one. +bool IsBlockFollowedByDeoptOrUnreachable(const BasicBlock *BB); + /// Option class for critical edge splitting. /// /// This provides a builder interface for overriding the default options used diff --git a/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp b/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp index 1a07697..de6c2af 100644 --- a/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp +++ b/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp @@ -39,6 +39,7 @@ #include "llvm/IR/Value.h" #include "llvm/IR/ValueHandle.h" #include "llvm/Support/Casting.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/Local.h" @@ -52,6 +53,11 @@ using namespace llvm; #define DEBUG_TYPE "basicblock-utils" +static cl::opt MaxDeoptimizingCheckDepth( + "max-deopt-check-depth", cl::init(8), cl::Hidden, + cl::desc("Set the maximum path length when checking whether a basic block " + "is deoptimizing")); + void llvm::DetatchDeadBlocks( ArrayRef BBs, SmallVectorImpl *Updates, @@ -485,6 +491,20 @@ void llvm::ReplaceInstWithInst(BasicBlock::InstListType &BIL, BI = New; } +bool llvm::IsBlockFollowedByDeoptOrUnreachable(const BasicBlock *BB) { + // Remember visited blocks to avoid infinite loop + SmallPtrSet VisitedBlocks; + unsigned Depth = 0; + while (BB && Depth++ < MaxDeoptimizingCheckDepth && + VisitedBlocks.insert(BB).second) { + if (BB->getTerminatingDeoptimizeCall() || + isa(BB->getTerminator())) + return true; + BB = BB->getSingleSuccessor(); + } + return false; +} + void llvm::ReplaceInstWithInst(Instruction *From, Instruction *To) { BasicBlock::iterator BI(From); ReplaceInstWithInst(From->getParent()->getInstList(), BI, To); diff --git a/llvm/lib/Transforms/Utils/LoopPeel.cpp b/llvm/lib/Transforms/Utils/LoopPeel.cpp index a6cdce1..41b4ca7 100644 --- a/llvm/lib/Transforms/Utils/LoopPeel.cpp +++ b/llvm/lib/Transforms/Utils/LoopPeel.cpp @@ -102,15 +102,15 @@ bool llvm::canPeel(Loop *L) { SmallVector Exits; L->getUniqueNonLatchExitBlocks(Exits); // The latch must either be the only exiting block or all non-latch exit - // blocks have either a deopt or unreachable terminator. Both deopt and - // unreachable terminators are a strong indication they are not taken. Note - // that this is a profitability check, not a legality check. Also note that - // LoopPeeling currently can only update the branch weights of latch blocks - // and branch weights to blocks with deopt or unreachable do not need + // blocks have either a deopt or unreachable terminator or compose a chain of + // blocks where the last one is either deopt or unreachable terminated. Both + // deopt and unreachable terminators are a strong indication they are not + // taken. Note that this is a profitability check, not a legality check. Also + // note that LoopPeeling currently can only update the branch weights of latch + // blocks and branch weights to blocks with deopt or unreachable do not need // updating. - return all_of(Exits, [](const BasicBlock *BB) { - return BB->getTerminatingDeoptimizeCall() || - isa(BB->getTerminator()); + return all_of(Exits, [&](const BasicBlock *BB) { + return IsBlockFollowedByDeoptOrUnreachable(BB); }); } @@ -667,37 +667,6 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI, SmallVector, 4> ExitEdges; L->getExitEdges(ExitEdges); - DenseMap ExitIDom; - if (DT) { - // We'd like to determine the idom of exit block after peeling one - // iteration. - // Let Exit is exit block. - // Let ExitingSet - is a set of predecessors of Exit block. They are exiting - // blocks. - // Let Latch' and ExitingSet' are copies after a peeling. - // We'd like to find an idom'(Exit) - idom of Exit after peeling. - // It is an evident that idom'(Exit) will be the nearest common dominator - // of ExitingSet and ExitingSet'. - // idom(Exit) is a nearest common dominator of ExitingSet. - // idom(Exit)' is a nearest common dominator of ExitingSet'. - // Taking into account that we have a single Latch, Latch' will dominate - // Header and idom(Exit). - // So the idom'(Exit) is nearest common dominator of idom(Exit)' and Latch'. - // All these basic blocks are in the same loop, so what we find is - // (nearest common dominator of idom(Exit) and Latch)'. - // In the loop below we remember nearest common dominator of idom(Exit) and - // Latch to update idom of Exit later. - assert(L->hasDedicatedExits() && "No dedicated exits?"); - for (auto Edge : ExitEdges) { - if (ExitIDom.count(Edge.second)) - continue; - BasicBlock *BB = DT->findNearestCommonDominator( - DT->getNode(Edge.second)->getIDom()->getBlock(), Latch); - assert(L->contains(BB) && "IDom is not in a loop"); - ExitIDom[Edge.second] = BB; - } - } - Function *F = Header->getParent(); // Set up all the necessary basic blocks. It is convenient to split the @@ -769,6 +738,8 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI, SmallVector LoopLocalNoAliasDeclScopes; identifyNoAliasScopesToClone(L->getBlocks(), LoopLocalNoAliasDeclScopes); + SmallVector DTUpdates; + // For each peeled-off iteration, make a copy of the loop. for (unsigned Iter = 0; Iter < PeelCount; ++Iter) { SmallVector NewBlocks; @@ -782,14 +753,11 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI, // previous one. remapInstructionsInBlocks(NewBlocks, VMap); + // If DT is available, insert edges from cloned exiting blocks to the exits if (DT) { - // Latches of the cloned loops dominate over the loop exit, so idom of the - // latter is the first cloned loop body, as original PreHeader dominates - // the original loop body. - if (Iter == 0) - for (auto Exit : ExitIDom) - DT->changeImmediateDominator(Exit.first, - cast(LVMap[Exit.second])); + for (auto Exit : ExitEdges) + DTUpdates.push_back( + {DT->Insert, cast(LVMap[Exit.first]), Exit.second}); #ifdef EXPENSIVE_CHECKS assert(DT->verify(DominatorTree::VerificationLevel::Fast)); #endif @@ -836,6 +804,8 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI, // We modified the loop, update SE. SE->forgetTopmostLoop(L); + DT->applyUpdates(DTUpdates); + // Finally DomtTree must be correct. assert(DT->verify(DominatorTree::VerificationLevel::Fast)); diff --git a/llvm/test/Transforms/LoopUnroll/peel-multiple-unreachable-exits.ll b/llvm/test/Transforms/LoopUnroll/peel-multiple-unreachable-exits.ll index 98c78ff..bf264aa 100644 --- a/llvm/test/Transforms/LoopUnroll/peel-multiple-unreachable-exits.ll +++ b/llvm/test/Transforms/LoopUnroll/peel-multiple-unreachable-exits.ll @@ -193,28 +193,56 @@ unreachable.exit: define void @peel_exits_to_blocks_branch_to_unreachable_block(i32* %ptr, i32 %N, i32 %x, i1 %c.1) { ; CHECK-LABEL: @peel_exits_to_blocks_branch_to_unreachable_block( ; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[LOOP_HEADER_PEEL_BEGIN:%.*]] +; CHECK: loop.header.peel.begin: +; CHECK-NEXT: br label [[LOOP_HEADER_PEEL:%.*]] +; CHECK: loop.header.peel: +; CHECK-NEXT: [[C_PEEL:%.*]] = icmp ult i32 1, 2 +; CHECK-NEXT: br i1 [[C_PEEL]], label [[THEN_PEEL:%.*]], label [[ELSE_PEEL:%.*]] +; CHECK: else.peel: +; CHECK-NEXT: [[C_2_PEEL:%.*]] = icmp eq i32 1, [[X:%.*]] +; CHECK-NEXT: br i1 [[C_2_PEEL]], label [[EXIT_2:%.*]], label [[LOOP_LATCH_PEEL:%.*]] +; CHECK: then.peel: +; CHECK-NEXT: br i1 [[C_1:%.*]], label [[EXIT_1:%.*]], label [[LOOP_LATCH_PEEL]] +; CHECK: loop.latch.peel: +; CHECK-NEXT: [[M_PEEL:%.*]] = phi i32 [ 0, [[THEN_PEEL]] ], [ [[X]], [[ELSE_PEEL]] ] +; CHECK-NEXT: [[GEP_PEEL:%.*]] = getelementptr i32, i32* [[PTR:%.*]], i32 1 +; CHECK-NEXT: store i32 [[M_PEEL]], i32* [[GEP_PEEL]], align 4 +; CHECK-NEXT: [[IV_NEXT_PEEL:%.*]] = add nuw nsw i32 1, 1 +; CHECK-NEXT: [[C_3_PEEL:%.*]] = icmp ult i32 1, 1000 +; CHECK-NEXT: br i1 [[C_3_PEEL]], label [[LOOP_HEADER_PEEL_NEXT:%.*]], label [[EXIT:%.*]] +; CHECK: loop.header.peel.next: +; CHECK-NEXT: br label [[LOOP_HEADER_PEEL_NEXT1:%.*]] +; CHECK: loop.header.peel.next1: +; CHECK-NEXT: br label [[ENTRY_PEEL_NEWPH:%.*]] +; CHECK: entry.peel.newph: ; CHECK-NEXT: br label [[LOOP_HEADER:%.*]] ; CHECK: loop.header: -; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 1, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] -; CHECK-NEXT: [[C:%.*]] = icmp ult i32 [[IV]], 2 -; CHECK-NEXT: br i1 [[C]], label [[THEN:%.*]], label [[ELSE:%.*]] +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[IV_NEXT_PEEL]], [[ENTRY_PEEL_NEWPH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] +; CHECK-NEXT: br i1 false, label [[THEN:%.*]], label [[ELSE:%.*]] ; CHECK: then: -; CHECK-NEXT: br i1 [[C_1:%.*]], label [[EXIT_1:%.*]], label [[LOOP_LATCH]] +; CHECK-NEXT: br i1 [[C_1]], label [[EXIT_1_LOOPEXIT:%.*]], label [[LOOP_LATCH]] ; CHECK: else: -; CHECK-NEXT: [[C_2:%.*]] = icmp eq i32 [[IV]], [[X:%.*]] -; CHECK-NEXT: br i1 [[C_2]], label [[EXIT_2:%.*]], label [[LOOP_LATCH]] +; CHECK-NEXT: [[C_2:%.*]] = icmp eq i32 [[IV]], [[X]] +; CHECK-NEXT: br i1 [[C_2]], label [[EXIT_2_LOOPEXIT:%.*]], label [[LOOP_LATCH]] ; CHECK: loop.latch: ; CHECK-NEXT: [[M:%.*]] = phi i32 [ 0, [[THEN]] ], [ [[X]], [[ELSE]] ] -; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, i32* [[PTR:%.*]], i32 [[IV]] +; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, i32* [[PTR]], i32 [[IV]] ; CHECK-NEXT: store i32 [[M]], i32* [[GEP]], align 4 ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1 ; CHECK-NEXT: [[C_3:%.*]] = icmp ult i32 [[IV]], 1000 -; CHECK-NEXT: br i1 [[C_3]], label [[LOOP_HEADER]], label [[EXIT:%.*]] +; CHECK-NEXT: br i1 [[C_3]], label [[LOOP_HEADER]], label [[EXIT_LOOPEXIT:%.*]], !llvm.loop [[LOOP2:![0-9]+]] +; CHECK: exit.loopexit: +; CHECK-NEXT: br label [[EXIT]] ; CHECK: exit: ; CHECK-NEXT: ret void +; CHECK: exit.1.loopexit: +; CHECK-NEXT: br label [[EXIT_1]] ; CHECK: exit.1: ; CHECK-NEXT: call void @foo() ; CHECK-NEXT: br label [[UNREACHABLE_TERM:%.*]] +; CHECK: exit.2.loopexit: +; CHECK-NEXT: br label [[EXIT_2]] ; CHECK: exit.2: ; CHECK-NEXT: call void @bar() ; CHECK-NEXT: br label [[UNREACHABLE_TERM]] -- 2.7.4