From b2593f78cabbea2703d6442c016638740fe5f660 Mon Sep 17 00:00:00 2001 From: Adam Nemet Date: Mon, 18 Jul 2016 16:29:27 +0000 Subject: [PATCH] [LoopDist] Port to new PM Summary: The direct motivation for the port is to ensure that the OptRemarkEmitter tests work with the new PM. This remains a function pass because we not only create multiple loops but could also version the original loop. In the test I need to invoke opt with -passes='require,loop-distribute'. LoopDistribute does not directly depend on AA however LAA does. LAA uses getCachedResult so I *think* we need manually pull in 'aa'. Reviewers: davidxl, silvas Subscribers: sanjoy, llvm-commits, mzolotukhin Differential Revision: https://reviews.llvm.org/D22437 llvm-svn: 275811 --- llvm/include/llvm/InitializePasses.h | 2 +- .../llvm/Transforms/Scalar/LoopDistribute.h | 30 ++++++ llvm/lib/Passes/PassBuilder.cpp | 1 + llvm/lib/Passes/PassRegistry.def | 1 + llvm/lib/Transforms/Scalar/LoopDistribute.cpp | 108 ++++++++++++++------- llvm/lib/Transforms/Scalar/Scalar.cpp | 2 +- .../diagnostics-with-hotness-lazy-BFI.ll | 5 + .../LoopDistribute/diagnostics-with-hotness.ll | 5 + 8 files changed, 119 insertions(+), 35 deletions(-) create mode 100644 llvm/include/llvm/Transforms/Scalar/LoopDistribute.h diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h index 86ea80e..90ff82f 100644 --- a/llvm/include/llvm/InitializePasses.h +++ b/llvm/include/llvm/InitializePasses.h @@ -177,7 +177,7 @@ void initializeLocalStackSlotPassPass(PassRegistry&); void initializeLoopAccessLegacyAnalysisPass(PassRegistry&); void initializeLoopDataPrefetchPass(PassRegistry&); void initializeLoopDeletionLegacyPassPass(PassRegistry&); -void initializeLoopDistributePass(PassRegistry&); +void initializeLoopDistributeLegacyPass(PassRegistry&); void initializeLoopExtractorPass(PassRegistry&); void initializeLoopIdiomRecognizeLegacyPassPass(PassRegistry&); void initializeLoopInfoWrapperPassPass(PassRegistry&); diff --git a/llvm/include/llvm/Transforms/Scalar/LoopDistribute.h b/llvm/include/llvm/Transforms/Scalar/LoopDistribute.h new file mode 100644 index 0000000..ddde595 --- /dev/null +++ b/llvm/include/llvm/Transforms/Scalar/LoopDistribute.h @@ -0,0 +1,30 @@ +//===- LoopDistribute.cpp - Loop Distribution Pass --------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the Loop Distribution Pass. Its main focus is to +// distribute loops that cannot be vectorized due to dependence cycles. It +// tries to isolate the offending dependences into a new loop allowing +// vectorization of the remaining parts. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_SCALAR_LOOPDISTRIBUTE_H +#define LLVM_TRANSFORMS_SCALAR_LOOPDISTRIBUTE_H + +#include "llvm/IR/PassManager.h" + +namespace llvm { + +class LoopDistributePass : public PassInfoMixin { +public: + PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); +}; +} // end namespace llvm + +#endif // LLVM_TRANSFORMS_SCALAR_LOOPDISTRIBUTE_H diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp index aa276be..0e64df8 100644 --- a/llvm/lib/Passes/PassBuilder.cpp +++ b/llvm/lib/Passes/PassBuilder.cpp @@ -92,6 +92,7 @@ #include "llvm/Transforms/Scalar/JumpThreading.h" #include "llvm/Transforms/Scalar/LICM.h" #include "llvm/Transforms/Scalar/LoopDeletion.h" +#include "llvm/Transforms/Scalar/LoopDistribute.h" #include "llvm/Transforms/Scalar/LoopIdiomRecognize.h" #include "llvm/Transforms/Scalar/LoopInstSimplify.h" #include "llvm/Transforms/Scalar/LoopRotation.h" diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def index 96f6cb8..b717057 100644 --- a/llvm/lib/Passes/PassRegistry.def +++ b/llvm/lib/Passes/PassRegistry.def @@ -151,6 +151,7 @@ FUNCTION_PASS("mldst-motion", MergedLoadStoreMotionPass()) FUNCTION_PASS("jump-threading", JumpThreadingPass()) FUNCTION_PASS("partially-inline-libcalls", PartiallyInlineLibCallsPass()) FUNCTION_PASS("lcssa", LCSSAPass()) +FUNCTION_PASS("loop-distribute", LoopDistributePass()) FUNCTION_PASS("loop-vectorize", LoopVectorizePass()) FUNCTION_PASS("print", PrintFunctionPass(dbgs())) FUNCTION_PASS("print", AssumptionPrinterPass(dbgs())) diff --git a/llvm/lib/Transforms/Scalar/LoopDistribute.cpp b/llvm/lib/Transforms/Scalar/LoopDistribute.cpp index f1e0ec4..7eca28e 100644 --- a/llvm/lib/Transforms/Scalar/LoopDistribute.cpp +++ b/llvm/lib/Transforms/Scalar/LoopDistribute.cpp @@ -22,6 +22,7 @@ // //===----------------------------------------------------------------------===// +#include "llvm/Transforms/Scalar/LoopDistribute.h" #include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/EquivalenceClasses.h" #include "llvm/ADT/STLExtras.h" @@ -29,6 +30,7 @@ #include "llvm/Analysis/BlockFrequencyInfo.h" #include "llvm/Analysis/LoopAccessAnalysis.h" #include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/LoopPassManager.h" #include "llvm/Analysis/OptimizationDiagnosticInfo.h" #include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/Dominators.h" @@ -597,7 +599,7 @@ public: } /// \brief Try to distribute an inner-most loop. - bool processLoop(LoopAccessLegacyAnalysis *LAA) { + bool processLoop(std::function &GetLAA) { assert(L->empty() && "Only process inner loops."); DEBUG(dbgs() << "\nLDist: In \"" << L->getHeader()->getParent()->getName() @@ -610,7 +612,7 @@ public: return fail("multiple exit blocks"); // LAA will check that we only have a single exiting block. - LAI = &LAA->getInfo(L); + LAI = &GetLAA(*L); // Currently, we only distribute to isolate the part of the loop with // dependence cycles to enable partial vectorization. @@ -860,19 +862,50 @@ private: Optional IsForced; }; +/// Shared implementation between new and old PMs. +static bool runImpl(Function &F, LoopInfo *LI, DominatorTree *DT, + ScalarEvolution *SE, OptimizationRemarkEmitter *ORE, + std::function &GetLAA, + bool ProcessAllLoops) { + // Build up a worklist of inner-loops to vectorize. This is necessary as the + // act of distributing a loop creates new loops and can invalidate iterators + // across the loops. + SmallVector Worklist; + + for (Loop *TopLevelLoop : *LI) + for (Loop *L : depth_first(TopLevelLoop)) + // We only handle inner-most loops. + if (L->empty()) + Worklist.push_back(L); + + // Now walk the identified inner loops. + bool Changed = false; + for (Loop *L : Worklist) { + LoopDistributeForLoop LDL(L, &F, LI, DT, SE, ORE); + + // If distribution was forced for the specific loop to be + // enabled/disabled, follow that. Otherwise use the global flag. + if (LDL.isForced().getValueOr(ProcessAllLoops)) + Changed |= LDL.processLoop(GetLAA); + } + + // Process each loop nest in the function. + return Changed; +} + /// \brief The pass class. -class LoopDistribute : public FunctionPass { +class LoopDistributeLegacy : public FunctionPass { public: /// \p ProcessAllLoopsByDefault specifies whether loop distribution should be /// performed by default. Pass -enable-loop-distribute={0,1} overrides this /// default. We use this to keep LoopDistribution off by default when invoked /// from the optimization pipeline but on when invoked explicitly from opt. - LoopDistribute(bool ProcessAllLoopsByDefault = true) + LoopDistributeLegacy(bool ProcessAllLoopsByDefault = true) : FunctionPass(ID), ProcessAllLoops(ProcessAllLoopsByDefault) { // The default is set by the caller. if (EnableLoopDistribute.getNumOccurrences() > 0) ProcessAllLoops = EnableLoopDistribute; - initializeLoopDistributePass(*PassRegistry::getPassRegistry()); + initializeLoopDistributeLegacyPass(*PassRegistry::getPassRegistry()); } bool runOnFunction(Function &F) override { @@ -884,31 +917,10 @@ public: auto *DT = &getAnalysis().getDomTree(); auto *SE = &getAnalysis().getSE(); auto *ORE = &getAnalysis().getORE(); + std::function GetLAA = + [&](Loop &L) -> const LoopAccessInfo & { return LAA->getInfo(&L); }; - // Build up a worklist of inner-loops to vectorize. This is necessary as the - // act of distributing a loop creates new loops and can invalidate iterators - // across the loops. - SmallVector Worklist; - - for (Loop *TopLevelLoop : *LI) - for (Loop *L : depth_first(TopLevelLoop)) - // We only handle inner-most loops. - if (L->empty()) - Worklist.push_back(L); - - // Now walk the identified inner loops. - bool Changed = false; - for (Loop *L : Worklist) { - LoopDistributeForLoop LDL(L, &F, LI, DT, SE, ORE); - - // If distribution was forced for the specific loop to be - // enabled/disabled, follow that. Otherwise use the global flag. - if (LDL.isForced().getValueOr(ProcessAllLoops)) - Changed |= LDL.processLoop(LAA); - } - - // Process each loop nest in the function. - return Changed; + return runImpl(F, LI, DT, SE, ORE, GetLAA, ProcessAllLoops); } void getAnalysisUsage(AnalysisUsage &AU) const override { @@ -930,19 +942,49 @@ private: }; } // anonymous namespace -char LoopDistribute::ID; +PreservedAnalyses LoopDistributePass::run(Function &F, + FunctionAnalysisManager &AM) { + // FIXME: This does not currently match the behavior from the old PM. + // ProcessAllLoops with the old PM defaults to true when invoked from opt and + // false when invoked from the optimization pipeline. + bool ProcessAllLoops = false; + if (EnableLoopDistribute.getNumOccurrences() > 0) + ProcessAllLoops = EnableLoopDistribute; + + auto &LI = AM.getResult(F); + auto &DT = AM.getResult(F); + auto &SE = AM.getResult(F); + auto &ORE = AM.getResult(F); + + auto &LAM = AM.getResult(F).getManager(); + std::function GetLAA = + [&](Loop &L) -> const LoopAccessInfo & { + return LAM.getResult(L); + }; + + bool Changed = runImpl(F, &LI, &DT, &SE, &ORE, GetLAA, ProcessAllLoops); + if (!Changed) + return PreservedAnalyses::all(); + PreservedAnalyses PA; + PA.preserve(); + PA.preserve(); + return PA; +} + +char LoopDistributeLegacy::ID; static const char ldist_name[] = "Loop Distribition"; -INITIALIZE_PASS_BEGIN(LoopDistribute, LDIST_NAME, ldist_name, false, false) +INITIALIZE_PASS_BEGIN(LoopDistributeLegacy, LDIST_NAME, ldist_name, false, + false) INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(LoopAccessLegacyAnalysis) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass) INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass) -INITIALIZE_PASS_END(LoopDistribute, LDIST_NAME, ldist_name, false, false) +INITIALIZE_PASS_END(LoopDistributeLegacy, LDIST_NAME, ldist_name, false, false) namespace llvm { FunctionPass *createLoopDistributePass(bool ProcessAllLoopsByDefault) { - return new LoopDistribute(ProcessAllLoopsByDefault); + return new LoopDistributeLegacy(ProcessAllLoopsByDefault); } } diff --git a/llvm/lib/Transforms/Scalar/Scalar.cpp b/llvm/lib/Transforms/Scalar/Scalar.cpp index dd87273..f235b12 100644 --- a/llvm/lib/Transforms/Scalar/Scalar.cpp +++ b/llvm/lib/Transforms/Scalar/Scalar.cpp @@ -86,7 +86,7 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) { initializePlaceBackedgeSafepointsImplPass(Registry); initializePlaceSafepointsPass(Registry); initializeFloat2IntLegacyPassPass(Registry); - initializeLoopDistributePass(Registry); + initializeLoopDistributeLegacyPass(Registry); initializeLoopLoadEliminationPass(Registry); initializeLoopSimplifyCFGLegacyPassPass(Registry); initializeLoopVersioningPassPass(Registry); diff --git a/llvm/test/Transforms/LoopDistribute/diagnostics-with-hotness-lazy-BFI.ll b/llvm/test/Transforms/LoopDistribute/diagnostics-with-hotness-lazy-BFI.ll index a67a71c..819c002 100644 --- a/llvm/test/Transforms/LoopDistribute/diagnostics-with-hotness-lazy-BFI.ll +++ b/llvm/test/Transforms/LoopDistribute/diagnostics-with-hotness-lazy-BFI.ll @@ -5,6 +5,11 @@ ; RUN: opt -loop-distribute -S -pass-remarks-missed=loop-distribute \ ; RUN: -debug-only=block-freq < %s 2>&1 | FileCheck %s --check-prefix=NO_HOTNESS +; RUN: opt -passes='require,loop-distribute' -S -pass-remarks-missed=loop-distribute \ +; RUN: -debug-only=block-freq -pass-remarks-with-hotness < %s 2>&1 | FileCheck %s --check-prefix=HOTNESS +; RUN: opt -passes='require,loop-distribute' -S -pass-remarks-missed=loop-distribute \ +; RUN: -debug-only=block-freq < %s 2>&1 | FileCheck %s --check-prefix=NO_HOTNESS + ; REQUIRES: asserts ; HOTNESS: block-frequency: forced diff --git a/llvm/test/Transforms/LoopDistribute/diagnostics-with-hotness.ll b/llvm/test/Transforms/LoopDistribute/diagnostics-with-hotness.ll index 21ddfcd..8689bfe 100644 --- a/llvm/test/Transforms/LoopDistribute/diagnostics-with-hotness.ll +++ b/llvm/test/Transforms/LoopDistribute/diagnostics-with-hotness.ll @@ -3,6 +3,11 @@ ; RUN: opt -loop-distribute -S -pass-remarks-missed=loop-distribute \ ; RUN: < %s 2>&1 | FileCheck %s --check-prefix=NO_HOTNESS +; RUN: opt -passes='require,loop-distribute' -S -pass-remarks-missed=loop-distribute \ +; RUN: -pass-remarks-with-hotness < %s 2>&1 | FileCheck %s --check-prefix=HOTNESS +; RUN: opt -passes='require,loop-distribute' -S -pass-remarks-missed=loop-distribute \ +; RUN: < %s 2>&1 | FileCheck %s --check-prefix=NO_HOTNESS + ; REQUIRES: asserts ; This is the input program: -- 2.7.4