From 9258e9d190af643ed5d770fa8a965af3b7090502 Mon Sep 17 00:00:00 2001 From: Philip Reames Date: Fri, 27 Apr 2018 17:29:10 +0000 Subject: [PATCH] [LoopGuardWidening] Split out a loop pass version of GuardWidening The idea is to have a pass which performs the same transformation as GuardWidening, but can be run within a loop pass manager without disrupting the pass manager structure. As demonstrated by the test case, this doesn't quite get there because of issues with post dom, but it gives a good step in the right direction. the motivation is purely to reduce compile time since we can now preserve locality during the loop walk. This patch only includes a legacy pass. A follow up will add a new style pass as well. llvm-svn: 331060 --- llvm/include/llvm/InitializePasses.h | 1 + llvm/include/llvm/LinkAllPasses.h | 1 + llvm/include/llvm/Transforms/Scalar.h | 10 +++ llvm/lib/Transforms/Scalar/GuardWidening.cpp | 83 +++++++++++++++++++--- llvm/lib/Transforms/Scalar/Scalar.cpp | 1 + .../test/Transforms/GuardWidening/loop-schedule.ll | 41 +++++++++++ 6 files changed, 128 insertions(+), 9 deletions(-) create mode 100644 llvm/test/Transforms/GuardWidening/loop-schedule.ll diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h index e6ceeff..e7a9b96 100644 --- a/llvm/include/llvm/InitializePasses.h +++ b/llvm/include/llvm/InitializePasses.h @@ -162,6 +162,7 @@ void initializeGlobalOptLegacyPassPass(PassRegistry&); void initializeGlobalSplitPass(PassRegistry&); void initializeGlobalsAAWrapperPassPass(PassRegistry&); void initializeGuardWideningLegacyPassPass(PassRegistry&); +void initializeLoopGuardWideningLegacyPassPass(PassRegistry&); void initializeIPCPPass(PassRegistry&); void initializeIPSCCPLegacyPassPass(PassRegistry&); void initializeIRTranslatorPass(PassRegistry&); diff --git a/llvm/include/llvm/LinkAllPasses.h b/llvm/include/llvm/LinkAllPasses.h index d3d1841..a49c4aa 100644 --- a/llvm/include/llvm/LinkAllPasses.h +++ b/llvm/include/llvm/LinkAllPasses.h @@ -111,6 +111,7 @@ namespace { (void) llvm::createGlobalOptimizerPass(); (void) llvm::createGlobalsAAWrapperPass(); (void) llvm::createGuardWideningPass(); + (void) llvm::createLoopGuardWideningPass(); (void) llvm::createIPConstantPropagationPass(); (void) llvm::createIPSCCPPass(); (void) llvm::createInductiveRangeCheckEliminationPass(); diff --git a/llvm/include/llvm/Transforms/Scalar.h b/llvm/include/llvm/Transforms/Scalar.h index 599332d..37b229e 100644 --- a/llvm/include/llvm/Transforms/Scalar.h +++ b/llvm/include/llvm/Transforms/Scalar.h @@ -99,6 +99,16 @@ FunctionPass *createGuardWideningPass(); //===----------------------------------------------------------------------===// // +// LoopGuardWidening - Analogous to the GuardWidening pass, but restricted to a +// single loop at a time for use within a LoopPassManager. Desired effect is +// to widen guards into preheader or a single guard within loop if that's not +// possible. +// +Pass *createLoopGuardWideningPass(); + + +//===----------------------------------------------------------------------===// +// // BitTrackingDCE - This pass uses a bit-tracking DCE algorithm in order to // remove computations of dead bits. // diff --git a/llvm/lib/Transforms/Scalar/GuardWidening.cpp b/llvm/lib/Transforms/Scalar/GuardWidening.cpp index e7697e7..b1d11af 100644 --- a/llvm/lib/Transforms/Scalar/GuardWidening.cpp +++ b/llvm/lib/Transforms/Scalar/GuardWidening.cpp @@ -40,9 +40,11 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Scalar/GuardWidening.h" +#include #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DepthFirstIterator.h" #include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/PostDominators.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/ConstantRange.h" @@ -53,6 +55,7 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/KnownBits.h" #include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Utils/LoopUtils.h" using namespace llvm; @@ -65,6 +68,11 @@ class GuardWideningImpl { PostDominatorTree &PDT; LoopInfo &LI; + /// Together, these describe the region of interest. This might be all of + /// the blocks within a function, or only a given loop's blocks and preheader. + DomTreeNode *Root; + std::function BlockFilter; + /// The set of guards whose conditions have been widened into dominating /// guards. SmallVector EliminatedGuards; @@ -205,9 +213,11 @@ class GuardWideningImpl { } public: + explicit GuardWideningImpl(DominatorTree &DT, PostDominatorTree &PDT, - LoopInfo &LI) - : DT(DT), PDT(PDT), LI(LI) {} + LoopInfo &LI, DomTreeNode *Root, + std::function BlockFilter) + : DT(DT), PDT(PDT), LI(LI), Root(Root), BlockFilter(BlockFilter) {} /// The entry point for this pass. bool run(); @@ -220,9 +230,12 @@ bool GuardWideningImpl::run() { DenseMap> GuardsInBlock; bool Changed = false; - for (auto DFI = df_begin(DT.getRootNode()), DFE = df_end(DT.getRootNode()); + for (auto DFI = df_begin(Root), DFE = df_end(Root); DFI != DFE; ++DFI) { auto *BB = (*DFI)->getBlock(); + if (!BlockFilter(BB)) + continue; + auto &CurrentList = GuardsInBlock[BB]; for (auto &I : *BB) @@ -233,6 +246,7 @@ bool GuardWideningImpl::run() { Changed |= eliminateGuardViaWidening(II, DFI, GuardsInBlock); } + assert(EliminatedGuards.empty() || Changed); for (auto *II : EliminatedGuards) if (!WidenedGuards.count(II)) II->eraseFromParent(); @@ -252,6 +266,8 @@ bool GuardWideningImpl::eliminateGuardViaWidening( // for the most profit. for (unsigned i = 0, e = DFSI.getPathLength(); i != e; ++i) { auto *CurBB = DFSI.getPath(i)->getBlock(); + if (!BlockFilter(CurBB)) + break; auto *CurLoop = LI.getLoopFor(CurBB); assert(GuardsInBlock.count(CurBB) && "Must have been populated by now!"); const auto &GuardsInCurBB = GuardsInBlock.find(CurBB)->second; @@ -647,7 +663,8 @@ PreservedAnalyses GuardWideningPass::run(Function &F, auto &DT = AM.getResult(F); auto &LI = AM.getResult(F); auto &PDT = AM.getResult(F); - if (!GuardWideningImpl(DT, PDT, LI).run()) + if (!GuardWideningImpl(DT, PDT, LI, DT.getRootNode(), + [](BasicBlock*) { return true; } ).run()) return PreservedAnalyses::all(); PreservedAnalyses PA; @@ -658,7 +675,6 @@ PreservedAnalyses GuardWideningPass::run(Function &F, namespace { struct GuardWideningLegacyPass : public FunctionPass { static char ID; - GuardWideningPass Impl; GuardWideningLegacyPass() : FunctionPass(ID) { initializeGuardWideningLegacyPassPass(*PassRegistry::getPassRegistry()); @@ -667,10 +683,11 @@ struct GuardWideningLegacyPass : public FunctionPass { bool runOnFunction(Function &F) override { if (skipFunction(F)) return false; - return GuardWideningImpl( - getAnalysis().getDomTree(), - getAnalysis().getPostDomTree(), - getAnalysis().getLoopInfo()).run(); + auto &DT = getAnalysis().getDomTree(); + auto &LI = getAnalysis().getLoopInfo(); + auto &PDT = getAnalysis().getPostDomTree(); + return GuardWideningImpl(DT, PDT, LI, DT.getRootNode(), + [](BasicBlock*) { return true; } ).run(); } void getAnalysisUsage(AnalysisUsage &AU) const override { @@ -680,9 +697,43 @@ struct GuardWideningLegacyPass : public FunctionPass { AU.addRequired(); } }; + +/// Same as above, but restricted to a single loop at a time. Can be +/// scheduled with other loop passes w/o breaking out of LPM +struct LoopGuardWideningLegacyPass : public LoopPass { + static char ID; + + LoopGuardWideningLegacyPass() : LoopPass(ID) { + initializeLoopGuardWideningLegacyPassPass(*PassRegistry::getPassRegistry()); + } + + bool runOnLoop(Loop *L, LPPassManager &LPM) override { + if (skipLoop(L)) + return false; + auto &DT = getAnalysis().getDomTree(); + auto &LI = getAnalysis().getLoopInfo(); + auto &PDT = getAnalysis().getPostDomTree(); + BasicBlock *RootBB = L->getLoopPredecessor(); + if (!RootBB) + RootBB = L->getHeader(); + auto BlockFilter = [&](BasicBlock *BB) { + return BB == RootBB || L->contains(BB); + }; + return GuardWideningImpl(DT, PDT, LI, + DT.getNode(RootBB), BlockFilter).run(); + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + getLoopAnalysisUsage(AU); + AU.addRequired(); + AU.addPreserved(); + } +}; } char GuardWideningLegacyPass::ID = 0; +char LoopGuardWideningLegacyPass::ID = 0; INITIALIZE_PASS_BEGIN(GuardWideningLegacyPass, "guard-widening", "Widen guards", false, false) @@ -692,6 +743,20 @@ INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) INITIALIZE_PASS_END(GuardWideningLegacyPass, "guard-widening", "Widen guards", false, false) +INITIALIZE_PASS_BEGIN(LoopGuardWideningLegacyPass, "loop-guard-widening", + "Widen guards (within a single loop, as a loop pass)", + false, false) +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) +INITIALIZE_PASS_DEPENDENCY(PostDominatorTreeWrapperPass) +INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) +INITIALIZE_PASS_END(LoopGuardWideningLegacyPass, "loop-guard-widening", + "Widen guards (within a single loop, as a loop pass)", + false, false) + FunctionPass *llvm::createGuardWideningPass() { return new GuardWideningLegacyPass(); } + +Pass *llvm::createLoopGuardWideningPass() { + return new LoopGuardWideningLegacyPass(); +} diff --git a/llvm/lib/Transforms/Scalar/Scalar.cpp b/llvm/lib/Transforms/Scalar/Scalar.cpp index 24cbdc7..36f5ba7 100644 --- a/llvm/lib/Transforms/Scalar/Scalar.cpp +++ b/llvm/lib/Transforms/Scalar/Scalar.cpp @@ -45,6 +45,7 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) { initializeScalarizerPass(Registry); initializeDSELegacyPassPass(Registry); initializeGuardWideningLegacyPassPass(Registry); + initializeLoopGuardWideningLegacyPassPass(Registry); initializeGVNLegacyPassPass(Registry); initializeNewGVNLegacyPassPass(Registry); initializeEarlyCSELegacyPassPass(Registry); diff --git a/llvm/test/Transforms/GuardWidening/loop-schedule.ll b/llvm/test/Transforms/GuardWidening/loop-schedule.ll new file mode 100644 index 0000000..163fb52 --- /dev/null +++ b/llvm/test/Transforms/GuardWidening/loop-schedule.ll @@ -0,0 +1,41 @@ +; RUN: opt -S -licm -loop-guard-widening -licm -debug-pass=Structure < %s 2>&1 | FileCheck %s + +; Main point of this test is to check the scheduling -- there should be +; no analysis passes needed between LICM and LoopGuardWidening + +; TODO: Because guard widdening currently requires post-dom, we end up +; breaking the loop pass manager to compute it. Need to either make all +; loop passes preserve postdom (hard) or make it optional in guard widdening +; CHECK: Loop Pass Manager +; CHECK: Loop Invariant Code Motion +; CHECK: Post-Dominator Tree Construction +; CHECK: Loop Pass Manager +; CHECK: Widen guards (within a single loop, as a loop pass) +; CHECK: Loop Invariant Code Motion + +declare void @llvm.experimental.guard(i1,...) + +define void @iter(i32 %a, i32 %b, i1* %c_p) { +; CHECK-LABEL @iter +; CHECK: %cond_0 = icmp ult i32 %a, 10 +; CHECK: %cond_1 = icmp ult i32 %b, 10 +; CHECK: %wide.chk = and i1 %cond_0, %cond_1 +; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 %wide.chk) [ "deopt"() ] + +entry: + %cond_0 = icmp ult i32 %a, 10 + call void (i1, ...) @llvm.experimental.guard(i1 %cond_0) [ "deopt"() ] + br label %loop + +loop: ; preds = %loop.preheader, %loop + %cond_1 = icmp ult i32 %b, 10 + call void (i1, ...) @llvm.experimental.guard(i1 %cond_1) [ "deopt"() ] + %cnd = load i1, i1* %c_p + br i1 %cnd, label %loop, label %leave.loopexit + +leave.loopexit: ; preds = %loop + br label %leave + +leave: ; preds = %leave.loopexit, %entry + ret void +} -- 2.7.4