From 9548b74a831ea005649465797f359e0521f3b8a9 Mon Sep 17 00:00:00 2001 From: Johannes Doerfert Date: Wed, 6 Nov 2019 23:20:06 -0600 Subject: [PATCH] [OpenMP] Introduce the OpenMPOpt transformation pass The OpenMPOpt pass is a CGSCC pass in which OpenMP specific optimizations can reside. The OpenMPOpt pass uses the OpenMPKinds.def file to identify runtime calls and their uses. This allows targeted transformations and eases their implementation. This initial patch deduplicates `__kmpc_global_thread_num` and `omp_get_thread_num` calls. We can also identify arguments that are equivalent to such a call result and use it instead. Later we can determine "gtid" arguments based on the use in kernel functions etc. Reviewed By: JonChesterfield Differential Revision: https://reviews.llvm.org/D69930 --- llvm/include/llvm/Frontend/OpenMP/OMPKinds.def | 2 + llvm/include/llvm/InitializePasses.h | 1 + llvm/include/llvm/LinkAllPasses.h | 1 + llvm/include/llvm/Transforms/IPO.h | 4 + llvm/include/llvm/Transforms/IPO/OpenMPOpt.h | 54 ++++ llvm/lib/LTO/LTOCodeGenerator.cpp | 1 + llvm/lib/Passes/PassBuilder.cpp | 6 + llvm/lib/Passes/PassRegistry.def | 1 + llvm/lib/Transforms/IPO/CMakeLists.txt | 1 + llvm/lib/Transforms/IPO/IPO.cpp | 1 + llvm/lib/Transforms/IPO/LLVMBuild.txt | 2 +- llvm/lib/Transforms/IPO/OpenMPOpt.cpp | 419 +++++++++++++++++++++++++ llvm/lib/Transforms/IPO/PassManagerBuilder.cpp | 10 + llvm/test/Other/new-pm-defaults.ll | 1 + llvm/test/Other/new-pm-thinlto-defaults.ll | 1 + llvm/test/Other/opt-O2-pipeline.ll | 1 + llvm/test/Other/opt-O3-pipeline.ll | 1 + llvm/test/Other/opt-Os-pipeline.ll | 1 + llvm/test/Other/pass-pipelines.ll | 1 + llvm/test/Transforms/OpenMP/gtid.ll | 86 +++++ 20 files changed, 594 insertions(+), 1 deletion(-) create mode 100644 llvm/include/llvm/Transforms/IPO/OpenMPOpt.h create mode 100644 llvm/lib/Transforms/IPO/OpenMPOpt.cpp create mode 100644 llvm/test/Transforms/OpenMP/gtid.ll diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def index 7fc2dbf..8e01676 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def +++ b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def @@ -177,6 +177,8 @@ __OMP_RTL(__kmpc_end_serialized_parallel, false, Void, IdentPtr, Int32) __OMP_RTL(omp_get_thread_num, false, Int32, ) +__OMP_RTL(__last, false, Void, ) + #undef __OMP_RTL #undef OMP_RTL diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h index a5e1310..a8245d8 100644 --- a/llvm/include/llvm/InitializePasses.h +++ b/llvm/include/llvm/InitializePasses.h @@ -71,6 +71,7 @@ void initializeAggressiveInstCombinerLegacyPassPass(PassRegistry&); void initializeAliasSetPrinterPass(PassRegistry&); void initializeAlignmentFromAssumptionsPass(PassRegistry&); void initializeAlwaysInlinerLegacyPassPass(PassRegistry&); +void initializeOpenMPOptLegacyPassPass(PassRegistry &); void initializeArgPromotionPass(PassRegistry&); void initializeAssumptionCacheTrackerPass(PassRegistry&); void initializeAtomicExpandPass(PassRegistry&); diff --git a/llvm/include/llvm/LinkAllPasses.h b/llvm/include/llvm/LinkAllPasses.h index aa64296..5b3bf3e 100644 --- a/llvm/include/llvm/LinkAllPasses.h +++ b/llvm/include/llvm/LinkAllPasses.h @@ -71,6 +71,7 @@ namespace { (void) llvm::createAggressiveDCEPass(); (void) llvm::createAggressiveInstCombinerPass(); (void) llvm::createBitTrackingDCEPass(); + (void) llvm::createOpenMPOptLegacyPass(); (void) llvm::createArgumentPromotionPass(); (void) llvm::createAlignmentFromAssumptionsPass(); (void) llvm::createBasicAAWrapperPass(); diff --git a/llvm/include/llvm/Transforms/IPO.h b/llvm/include/llvm/Transforms/IPO.h index ee411a1..16ce814 100644 --- a/llvm/include/llvm/Transforms/IPO.h +++ b/llvm/include/llvm/Transforms/IPO.h @@ -153,6 +153,10 @@ ModulePass *createDeadArgHackingPass(); Pass *createArgumentPromotionPass(unsigned maxElements = 3); //===----------------------------------------------------------------------===// +/// createOpenMPOptLegacyPass - OpenMP specific optimizations. +Pass *createOpenMPOptLegacyPass(); + +//===----------------------------------------------------------------------===// /// createIPConstantPropagationPass - This pass propagates constants from call /// sites into the bodies of functions. /// diff --git a/llvm/include/llvm/Transforms/IPO/OpenMPOpt.h b/llvm/include/llvm/Transforms/IPO/OpenMPOpt.h new file mode 100644 index 0000000..0bd81ea --- /dev/null +++ b/llvm/include/llvm/Transforms/IPO/OpenMPOpt.h @@ -0,0 +1,54 @@ +//===- IPO/OpenMPOpt.h - Collection of OpenMP optimizations -----*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_IPO_OPENMP_OPT_H +#define LLVM_TRANSFORMS_IPO_OPENMP_OPT_H + +#include "llvm/Analysis/CGSCCPassManager.h" +#include "llvm/Analysis/LazyCallGraph.h" +#include "llvm/IR/PassManager.h" + +namespace llvm { + +namespace omp { + +/// Helper to remember if the module contains OpenMP (runtime calls), to be used +/// foremost with containsOpenMP. +struct OpenMPInModule { + OpenMPInModule &operator=(bool Found) { + if (Found) + Value = OpenMPInModule::OpenMP::FOUND; + else + Value = OpenMPInModule::OpenMP::NOT_FOUND; + return *this; + } + bool isKnown() { return Value != OpenMP::UNKNOWN; } + operator bool() { return Value != OpenMP::NOT_FOUND; } + +private: + enum class OpenMP { FOUND, NOT_FOUND, UNKNOWN } Value = OpenMP::UNKNOWN; +}; + +/// Helper to determine if \p M contains OpenMP (runtime calls). +bool containsOpenMP(Module &M, OpenMPInModule &OMPInModule); + +} // namespace omp + +/// OpenMP optimizations pass. +class OpenMPOptPass : public PassInfoMixin { + /// Helper to remember if the module contains OpenMP (runtime calls). + omp::OpenMPInModule OMPInModule; + +public: + PreservedAnalyses run(LazyCallGraph::SCC &C, CGSCCAnalysisManager &AM, + LazyCallGraph &CG, CGSCCUpdateResult &UR); +}; + +} // end namespace llvm + +#endif // LLVM_TRANSFORMS_IPO_OPENMP_OPT_H diff --git a/llvm/lib/LTO/LTOCodeGenerator.cpp b/llvm/lib/LTO/LTOCodeGenerator.cpp index a79a523..88f433a 100644 --- a/llvm/lib/LTO/LTOCodeGenerator.cpp +++ b/llvm/lib/LTO/LTOCodeGenerator.cpp @@ -134,6 +134,7 @@ void LTOCodeGenerator::initializeLTOPasses() { initializeSimpleInlinerPass(R); initializePruneEHPass(R); initializeGlobalDCELegacyPassPass(R); + initializeOpenMPOptLegacyPassPass(R); initializeArgPromotionPass(R); initializeJumpThreadingPass(R); initializeSROALegacyPassPass(R); diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp index 8dc1468..2ab0445 100644 --- a/llvm/lib/Passes/PassBuilder.cpp +++ b/llvm/lib/Passes/PassBuilder.cpp @@ -87,6 +87,7 @@ #include "llvm/Transforms/IPO/Internalize.h" #include "llvm/Transforms/IPO/LowerTypeTests.h" #include "llvm/Transforms/IPO/MergeFunctions.h" +#include "llvm/Transforms/IPO/OpenMPOpt.h" #include "llvm/Transforms/IPO/PartialInlining.h" #include "llvm/Transforms/IPO/SCCP.h" #include "llvm/Transforms/IPO/SampleProfile.h" @@ -837,6 +838,11 @@ PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level, if (Level == OptimizationLevel::O3) MainCGPipeline.addPass(ArgumentPromotionPass()); + // Try to perform OpenMP specific optimizations. This is a (quick!) no-op if + // there are no OpenMP runtime calls present in the module. + if (Level == OptimizationLevel::O2 || Level == OptimizationLevel::O3) + MainCGPipeline.addPass(OpenMPOptPass()); + // Lastly, add the core function simplification pipeline nested inside the // CGSCC walk. MainCGPipeline.addPass(createCGSCCToFunctionPassAdaptor( diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def index 6bffe1a..83ed93d 100644 --- a/llvm/lib/Passes/PassRegistry.def +++ b/llvm/lib/Passes/PassRegistry.def @@ -109,6 +109,7 @@ CGSCC_PASS("argpromotion", ArgumentPromotionPass()) CGSCC_PASS("invalidate", InvalidateAllAnalysesPass()) CGSCC_PASS("function-attrs", PostOrderFunctionAttrsPass()) CGSCC_PASS("inline", InlinerPass()) +CGSCC_PASS("openmpopt", OpenMPOptPass()) CGSCC_PASS("no-op-cgscc", NoOpCGSCCPass()) #undef CGSCC_PASS diff --git a/llvm/lib/Transforms/IPO/CMakeLists.txt b/llvm/lib/Transforms/IPO/CMakeLists.txt index 375d2fb..034450f 100644 --- a/llvm/lib/Transforms/IPO/CMakeLists.txt +++ b/llvm/lib/Transforms/IPO/CMakeLists.txt @@ -26,6 +26,7 @@ add_llvm_component_library(LLVMipo LoopExtractor.cpp LowerTypeTests.cpp MergeFunctions.cpp + OpenMPOpt.cpp PartialInlining.cpp PassManagerBuilder.cpp PruneEH.cpp diff --git a/llvm/lib/Transforms/IPO/IPO.cpp b/llvm/lib/Transforms/IPO/IPO.cpp index 8a15800..3b6038e 100644 --- a/llvm/lib/Transforms/IPO/IPO.cpp +++ b/llvm/lib/Transforms/IPO/IPO.cpp @@ -23,6 +23,7 @@ using namespace llvm; void llvm::initializeIPO(PassRegistry &Registry) { + initializeOpenMPOptLegacyPassPass(Registry); initializeArgPromotionPass(Registry); initializeCalledValuePropagationLegacyPassPass(Registry); initializeConstantMergeLegacyPassPass(Registry); diff --git a/llvm/lib/Transforms/IPO/LLVMBuild.txt b/llvm/lib/Transforms/IPO/LLVMBuild.txt index 14aa4e4..ea207e8 100644 --- a/llvm/lib/Transforms/IPO/LLVMBuild.txt +++ b/llvm/lib/Transforms/IPO/LLVMBuild.txt @@ -19,4 +19,4 @@ type = Library name = IPO parent = Transforms library_name = ipo -required_libraries = AggressiveInstCombine Analysis BitReader BitWriter Core InstCombine IRReader Linker Object ProfileData Scalar Support TransformUtils Vectorize Instrumentation +required_libraries = AggressiveInstCombine Analysis BitReader BitWriter Core FrontendOpenMP InstCombine IRReader Linker Object ProfileData Scalar Support TransformUtils Vectorize Instrumentation diff --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp new file mode 100644 index 0000000..1822b5e --- /dev/null +++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp @@ -0,0 +1,419 @@ +//===-- IPO/OpenMPOpt.cpp - Collection of OpenMP specific optimizations ---===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// OpenMP specific optimizations: +// +// - Deduplication of runtime calls, e.g., omp_get_thread_num. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/IPO/OpenMPOpt.h" + +#include "llvm/ADT/EnumeratedArray.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/CallGraph.h" +#include "llvm/Analysis/CallGraphSCCPass.h" +#include "llvm/Frontend/OpenMP/OMPConstants.h" +#include "llvm/IR/CallSite.h" +#include "llvm/InitializePasses.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Transforms/IPO.h" +#include "llvm/Transforms/Utils/CallGraphUpdater.h" + +using namespace llvm; +using namespace omp; +using namespace types; + +#define DEBUG_TYPE "openmp-opt" + +static cl::opt DisableOpenMPOptimizations( + "openmp-opt-disable", cl::ZeroOrMore, + cl::desc("Disable OpenMP specific optimizations."), cl::Hidden, + cl::init(false)); + +STATISTIC(NumOpenMPRuntimeCallsDeduplicated, + "Number of OpenMP runtime calls deduplicated"); +STATISTIC(NumOpenMPRuntimeFunctionsIdentified, + "Number of OpenMP runtime functions identified"); +STATISTIC(NumOpenMPRuntimeFunctionUsesIdentified, + "Number of OpenMP runtime function uses identified"); + +static constexpr auto TAG = "[" DEBUG_TYPE "]"; + +namespace { +struct OpenMPOpt { + + OpenMPOpt(SmallPtrSetImpl &SCC, + SmallPtrSetImpl &ModuleSlice, + CallGraphUpdater &CGUpdater) + : M(*(*SCC.begin())->getParent()), SCC(SCC), ModuleSlice(ModuleSlice), + CGUpdater(CGUpdater) { + initializeTypes(M); + initializeRuntimeFunctions(); + } + + /// Generic information that describes a runtime function + struct RuntimeFunctionInfo { + /// The kind, as described by the RuntimeFunction enum. + RuntimeFunction Kind; + + /// The name of the function. + StringRef Name; + + /// Flag to indicate a variadic function. + bool IsVarArg; + + /// The return type of the function. + Type *ReturnType; + + /// The argument types of the function. + SmallVector ArgumentTypes; + + /// The declaration if available. + Function *Declaration; + + /// Uses of this runtime function per function containing the use. + DenseMap> UsesMap; + + /// Return the number of arguments (or the minimal number for variadic + /// functions). + size_t getNumArgs() const { return ArgumentTypes.size(); } + + /// Run the callback \p CB on each use and forget the use if the result is + /// true. The callback will be fed the function in which the use was + /// encountered as second argument. + void foreachUse(function_ref CB) { + SmallVector ToBeDeleted; + for (auto &It : UsesMap) { + ToBeDeleted.clear(); + for (Use *U : It.second) + if (CB(*U, *It.first)) + ToBeDeleted.push_back(U); + for (Use *U : ToBeDeleted) + It.second.erase(U); + } + } + }; + + /// Run all OpenMP optimizations on the underlying SCC/ModuleSlice. + bool run() { + bool Changed = false; + + LLVM_DEBUG(dbgs() << TAG << "Run on SCC with " << SCC.size() + << " functions in a slice with " << ModuleSlice.size() + << " functions\n"); + + Changed |= deduplicateRuntimeCalls(); + + return Changed; + } + +private: + /// Try to eliminiate runtime calls by reusing existing ones. + bool deduplicateRuntimeCalls() { + bool Changed = false; + + SmallSetVector GTIdArgs; + collectGlobalThreadIdArguments(GTIdArgs); + LLVM_DEBUG(dbgs() << TAG << "Found " << GTIdArgs.size() + << " global thread ID arguments\n"); + + for (Function *F : SCC) { + Value *GTIdArg = nullptr; + for (Argument &Arg : F->args()) + if (GTIdArgs.count(&Arg)) { + GTIdArg = &Arg; + break; + } + Changed |= deduplicateRuntimeCalls( + *F, RFIs[OMPRTL___kmpc_global_thread_num], GTIdArg); + Changed |= deduplicateRuntimeCalls(*F, RFIs[OMPRTL_omp_get_thread_num]); + } + + return Changed; + } + + /// Try to eliminiate calls of \p RFI in \p F by reusing an existing one or + /// \p ReplVal if given. + bool deduplicateRuntimeCalls(Function &F, RuntimeFunctionInfo &RFI, + Value *ReplVal = nullptr) { + auto &Uses = RFI.UsesMap[&F]; + if (Uses.size() + (ReplVal != nullptr) < 2) + return false; + + LLVM_DEBUG(dbgs() << TAG << "Deduplicate " << Uses.size() << " uses of " + << RFI.Name + << (ReplVal ? " with an existing value\n" : "\n") + << "\n"); + assert(!ReplVal || (isa(ReplVal) && + cast(ReplVal)->getParent() == &F) && + "Unexpected replacement value!"); + if (!ReplVal) { + for (Use *U : Uses) + if (CallInst *CI = getCallIfRegularCall(*U, &RFI)) { + CI->moveBefore(&*F.getEntryBlock().getFirstInsertionPt()); + ReplVal = CI; + break; + } + if (!ReplVal) + return false; + } + + bool Changed = false; + auto ReplaceAndDeleteCB = [&](Use &U, Function &Caller) { + CallInst *CI = getCallIfRegularCall(U, &RFI); + if (!CI || CI == ReplVal || &F != &Caller) + return false; + assert(CI->getCaller() == &F && "Unexpected call!"); + CGUpdater.removeCallSite(*CI); + CI->replaceAllUsesWith(ReplVal); + CI->eraseFromParent(); + ++NumOpenMPRuntimeCallsDeduplicated; + Changed = true; + return true; + }; + RFI.foreachUse(ReplaceAndDeleteCB); + + return Changed; + } + + /// Collect arguments that represent the global thread id in \p GTIdArgs. + void collectGlobalThreadIdArguments(SmallSetVector >IdArgs) { + // TODO: Below we basically perform a fixpoint iteration with a pessimistic + // initialization. We could define an AbstractAttribute instead and + // run the Attributor here once it can be run as an SCC pass. + + // Helper to check the argument \p ArgNo at all call sites of \p F for + // a GTId. + auto CallArgOpIsGTId = [&](Function &F, unsigned ArgNo, CallInst &RefCI) { + if (!F.hasLocalLinkage()) + return false; + for (Use &U : F.uses()) { + if (CallInst *CI = getCallIfRegularCall(U)) { + Value *ArgOp = CI->getArgOperand(ArgNo); + if (CI == &RefCI || GTIdArgs.count(ArgOp) || + getCallIfRegularCall(*ArgOp, + &RFIs[OMPRTL___kmpc_global_thread_num])) + continue; + } + return false; + } + return true; + }; + + // Helper to identify uses of a GTId as GTId arguments. + auto AddUserArgs = [&](Value >Id) { + for (Use &U : GTId.uses()) + if (CallInst *CI = dyn_cast(U.getUser())) + if (CI->isArgOperand(&U)) + if (Function *Callee = CI->getCalledFunction()) + if (CallArgOpIsGTId(*Callee, U.getOperandNo(), *CI)) + GTIdArgs.insert(Callee->getArg(U.getOperandNo())); + }; + + // The argument users of __kmpc_global_thread_num calls are GTIds. + RuntimeFunctionInfo &GlobThreadNumRFI = + RFIs[OMPRTL___kmpc_global_thread_num]; + for (auto &It : GlobThreadNumRFI.UsesMap) + for (Use *U : It.second) + if (CallInst *CI = getCallIfRegularCall(*U, &GlobThreadNumRFI)) + AddUserArgs(*CI); + + // Transitively search for more arguments by looking at the users of the + // ones we know already. During the search the GTIdArgs vector is extended + // so we cannot cache the size nor can we use a range based for. + for (unsigned u = 0; u < GTIdArgs.size(); ++u) + AddUserArgs(*GTIdArgs[u]); + } + + /// Return the call if \p U is a callee use in a regular call. If \p RFI is + /// given it has to be the callee or a nullptr is returned. + CallInst *getCallIfRegularCall(Use &U, RuntimeFunctionInfo *RFI = nullptr) { + CallInst *CI = dyn_cast(U.getUser()); + if (CI && CI->isCallee(&U) && !CI->hasOperandBundles() && + (!RFI || CI->getCalledFunction() == RFI->Declaration)) + return CI; + return nullptr; + } + + /// Return the call if \p V is a regular call. If \p RFI is given it has to be + /// the callee or a nullptr is returned. + CallInst *getCallIfRegularCall(Value &V, RuntimeFunctionInfo *RFI = nullptr) { + CallInst *CI = dyn_cast(&V); + if (CI && !CI->hasOperandBundles() && + (!RFI || CI->getCalledFunction() == RFI->Declaration)) + return CI; + return nullptr; + } + + /// Helper to initialize all runtime function information for those defined in + /// OpenMPKinds.def. + void initializeRuntimeFunctions() { + // Helper to collect all uses of the decleration in the UsesMap. + auto CollectUses = [&](RuntimeFunctionInfo &RFI) { + unsigned NumUses = 0; + if (!RFI.Declaration) + return NumUses; + + NumOpenMPRuntimeFunctionsIdentified += 1; + NumOpenMPRuntimeFunctionUsesIdentified += RFI.Declaration->getNumUses(); + + // TODO: We directly convert uses into proper calls and unknown uses. + for (Use &U : RFI.Declaration->uses()) { + if (Instruction *UserI = dyn_cast(U.getUser())) { + if (ModuleSlice.count(UserI->getFunction())) { + RFI.UsesMap[UserI->getFunction()].insert(&U); + ++NumUses; + } + } else { + RFI.UsesMap[nullptr].insert(&U); + ++NumUses; + } + } + return NumUses; + }; + +#define OMP_RTL(_Enum, _Name, _IsVarArg, _ReturnType, ...) \ + { \ + auto &RFI = RFIs[_Enum]; \ + RFI.Kind = _Enum; \ + RFI.Name = _Name; \ + RFI.IsVarArg = _IsVarArg; \ + RFI.ReturnType = _ReturnType; \ + RFI.ArgumentTypes = SmallVector({__VA_ARGS__}); \ + RFI.Declaration = M.getFunction(_Name); \ + unsigned NumUses = CollectUses(RFI); \ + (void)NumUses; \ + LLVM_DEBUG({ \ + dbgs() << TAG << RFI.Name << (RFI.Declaration ? "" : " not") \ + << " found\n"; \ + if (RFI.Declaration) \ + dbgs() << TAG << "-> got " << NumUses << " uses in " \ + << RFI.UsesMap.size() << " different functions.\n"; \ + }); \ + } +#include "llvm/Frontend/OpenMP/OMPKinds.def" + + // TODO: We should validate the declaration agains the types we expect. + // TODO: We should attach the attributes defined in OMPKinds.def. + } + + /// The underyling module. + Module &M; + + /// The SCC we are operating on. + SmallPtrSetImpl &SCC; + + /// The slice of the module we are allowed to look at. + SmallPtrSetImpl &ModuleSlice; + + /// Callback to update the call graph, the first argument is a removed call, + /// the second an optional replacement call. + CallGraphUpdater &CGUpdater; + + /// Map from runtime function kind to the runtime function description. + EnumeratedArray + RFIs; +}; +} // namespace + +PreservedAnalyses OpenMPOptPass::run(LazyCallGraph::SCC &C, + CGSCCAnalysisManager &AM, + LazyCallGraph &CG, CGSCCUpdateResult &UR) { + if (!containsOpenMP(*C.begin()->getFunction().getParent(), OMPInModule)) + return PreservedAnalyses::all(); + + if (DisableOpenMPOptimizations) + return PreservedAnalyses::all(); + + SmallPtrSet SCC; + for (LazyCallGraph::Node &N : C) + SCC.insert(&N.getFunction()); + + if (SCC.empty()) + return PreservedAnalyses::all(); + + CallGraphUpdater CGUpdater; + CGUpdater.initialize(CG, C, AM, UR); + // TODO: Compute the module slice we are allowed to look at. + OpenMPOpt OMPOpt(SCC, SCC, CGUpdater); + bool Changed = OMPOpt.run(); + (void)Changed; + return PreservedAnalyses::all(); +} + +namespace { + +struct OpenMPOptLegacyPass : public CallGraphSCCPass { + CallGraphUpdater CGUpdater; + OpenMPInModule OMPInModule; + static char ID; + + OpenMPOptLegacyPass() : CallGraphSCCPass(ID) { + initializeOpenMPOptLegacyPassPass(*PassRegistry::getPassRegistry()); + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + CallGraphSCCPass::getAnalysisUsage(AU); + } + + bool doInitialization(CallGraph &CG) override { + // Disable the pass if there is no OpenMP (runtime call) in the module. + containsOpenMP(CG.getModule(), OMPInModule); + return false; + } + + bool runOnSCC(CallGraphSCC &CGSCC) override { + if (!containsOpenMP(CGSCC.getCallGraph().getModule(), OMPInModule)) + return false; + if (DisableOpenMPOptimizations || skipSCC(CGSCC)) + return false; + + SmallPtrSet SCC; + for (CallGraphNode *CGN : CGSCC) + if (Function *Fn = CGN->getFunction()) + if (!Fn->isDeclaration()) + SCC.insert(Fn); + + if (SCC.empty()) + return false; + + CallGraph &CG = getAnalysis().getCallGraph(); + CGUpdater.initialize(CG, CGSCC); + + // TODO: Compute the module slice we are allowed to look at. + OpenMPOpt OMPOpt(SCC, SCC, CGUpdater); + return OMPOpt.run(); + } + + bool doFinalization(CallGraph &CG) override { return CGUpdater.finalize(); } +}; + +} // end anonymous namespace + +bool llvm::omp::containsOpenMP(Module &M, OpenMPInModule &OMPInModule) { + if (OMPInModule.isKnown()) + return OMPInModule; + +#define OMP_RTL(_Enum, _Name, ...) \ + if (M.getFunction(_Name)) \ + return OMPInModule = true; +#include "llvm/Frontend/OpenMP/OMPKinds.def" + return OMPInModule = false; +} + +char OpenMPOptLegacyPass::ID = 0; + +INITIALIZE_PASS_BEGIN(OpenMPOptLegacyPass, "openmpopt", + "OpenMP specific optimizations", false, false) +INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass) +INITIALIZE_PASS_END(OpenMPOptLegacyPass, "openmpopt", + "OpenMP specific optimizations", false, false) + +Pass *llvm::createOpenMPOptLegacyPass() { return new OpenMPOptLegacyPass(); } diff --git a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp index 7cfc29f..86ea65f 100644 --- a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp +++ b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp @@ -599,6 +599,11 @@ void PassManagerBuilder::populateModulePassManager( RunInliner = true; } + // Try to perform OpenMP specific optimizations. This is a (quick!) no-op if + // there are no OpenMP runtime calls present in the module. + if (OptLevel > 1) + MPM.add(createOpenMPOptLegacyPass()); + MPM.add(createPostOrderFunctionAttrsLegacyPass()); if (OptLevel > 2) MPM.add(createArgumentPromotionPass()); // Scalarize uninlined fn args @@ -930,6 +935,11 @@ void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) { // CSFDO instrumentation and use pass. addPGOInstrPasses(PM, /* IsCS */ true); + // Try to perform OpenMP specific optimizations. This is a (quick!) no-op if + // there are no OpenMP runtime calls present in the module. + if (OptLevel > 1) + PM.add(createOpenMPOptLegacyPass()); + // Optimize globals again if we ran the inliner. if (RunInliner) PM.add(createGlobalOptimizerPass()); diff --git a/llvm/test/Other/new-pm-defaults.ll b/llvm/test/Other/new-pm-defaults.ll index 1dc96ef..a386272 100644 --- a/llvm/test/Other/new-pm-defaults.ll +++ b/llvm/test/Other/new-pm-defaults.ll @@ -142,6 +142,7 @@ ; CHECK-O-NEXT: Running pass: InlinerPass ; CHECK-O-NEXT: Running pass: PostOrderFunctionAttrsPass ; CHECK-O3-NEXT: Running pass: ArgumentPromotionPass +; CHECK-O-NEXT: Running pass: OpenMPOptPass on (foo) ; CHECK-O-NEXT: Running pass: CGSCCToFunctionPassAdaptor<{{.*}}PassManager{{.*}}> ; CHECK-O-NEXT: Starting llvm::Function pass manager run. ; CHECK-O-NEXT: Running pass: SROA diff --git a/llvm/test/Other/new-pm-thinlto-defaults.ll b/llvm/test/Other/new-pm-thinlto-defaults.ll index 48d59dd..e44f206 100644 --- a/llvm/test/Other/new-pm-thinlto-defaults.ll +++ b/llvm/test/Other/new-pm-thinlto-defaults.ll @@ -107,6 +107,7 @@ ; CHECK-O-NEXT: Running pass: InlinerPass ; CHECK-O-NEXT: Running pass: PostOrderFunctionAttrsPass ; CHECK-O3-NEXT: Running pass: ArgumentPromotionPass +; CHECK-O-NEXT: Running pass: OpenMPOptPass on (foo) ; CHECK-O-NEXT: Running pass: CGSCCToFunctionPassAdaptor<{{.*}}PassManager{{.*}}> ; CHECK-O-NEXT: Starting llvm::Function pass manager run. ; CHECK-O-NEXT: Running pass: SROA diff --git a/llvm/test/Other/opt-O2-pipeline.ll b/llvm/test/Other/opt-O2-pipeline.ll index 37ffab4..2e42119 100644 --- a/llvm/test/Other/opt-O2-pipeline.ll +++ b/llvm/test/Other/opt-O2-pipeline.ll @@ -59,6 +59,7 @@ ; CHECK-NEXT: Call Graph SCC Pass Manager ; CHECK-NEXT: Remove unused exception handling info ; CHECK-NEXT: Function Integration/Inlining +; CHECK-NEXT: OpenMP specific optimizations ; CHECK-NEXT: Deduce function attributes ; CHECK-NEXT: FunctionPass Manager ; CHECK-NEXT: Dominator Tree Construction diff --git a/llvm/test/Other/opt-O3-pipeline.ll b/llvm/test/Other/opt-O3-pipeline.ll index 3deea84..5796358 100644 --- a/llvm/test/Other/opt-O3-pipeline.ll +++ b/llvm/test/Other/opt-O3-pipeline.ll @@ -62,6 +62,7 @@ ; CHECK-NEXT: Call Graph SCC Pass Manager ; CHECK-NEXT: Remove unused exception handling info ; CHECK-NEXT: Function Integration/Inlining +; CHECK-NEXT: OpenMP specific optimizations ; CHECK-NEXT: Deduce function attributes ; CHECK-NEXT: Promote 'by reference' arguments to scalars ; CHECK-NEXT: FunctionPass Manager diff --git a/llvm/test/Other/opt-Os-pipeline.ll b/llvm/test/Other/opt-Os-pipeline.ll index 59b0720..8417223 100644 --- a/llvm/test/Other/opt-Os-pipeline.ll +++ b/llvm/test/Other/opt-Os-pipeline.ll @@ -59,6 +59,7 @@ ; CHECK-NEXT: Call Graph SCC Pass Manager ; CHECK-NEXT: Remove unused exception handling info ; CHECK-NEXT: Function Integration/Inlining +; CHECK-NEXT: OpenMP specific optimizations ; CHECK-NEXT: Deduce function attributes ; CHECK-NEXT: FunctionPass Manager ; CHECK-NEXT: Dominator Tree Construction diff --git a/llvm/test/Other/pass-pipelines.ll b/llvm/test/Other/pass-pipelines.ll index 6853fd9..9be9823 100644 --- a/llvm/test/Other/pass-pipelines.ll +++ b/llvm/test/Other/pass-pipelines.ll @@ -46,6 +46,7 @@ ; CHECK-O2-NEXT: Call Graph SCC Pass Manager ; CHECK-O2-NEXT: Remove unused exception handling info ; CHECK-O2-NEXT: Function Integration/Inlining +; CHECK-O2-NEXT: OpenMP specific optimizations ; CHECK-O2-NEXT: Deduce function attributes ; Next up is the main function pass pipeline. It shouldn't be split up and ; should contain the main loop pass pipeline as well. diff --git a/llvm/test/Transforms/OpenMP/gtid.ll b/llvm/test/Transforms/OpenMP/gtid.ll new file mode 100644 index 0000000..93a72ca --- /dev/null +++ b/llvm/test/Transforms/OpenMP/gtid.ll @@ -0,0 +1,86 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature +; RUN: opt -openmpopt -S < %s | FileCheck %s +; RUN: opt -passes=openmpopt -S < %s | FileCheck %s +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" + +%struct.ident_t = type { i32, i32, i32, i32, i8* } + +@0 = private unnamed_addr global %struct.ident_t { i32 0, i32 34, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @.str, i32 0, i32 0) }, align 8 +@.str = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00", align 1 + +declare i32 @__kmpc_global_thread_num(%struct.ident_t*) +declare void @useI32(i32) + +define void @external(i1 %c) { +; CHECK-LABEL: define {{[^@]+}}@external +; CHECK-SAME: (i1 [[C:%.*]]) +; CHECK-NEXT: entry: +; CHECK-NEXT: [[C2:%.*]] = tail call i32 @__kmpc_global_thread_num(%struct.ident_t* nonnull @0) +; CHECK-NEXT: br i1 [[C]], label [[T:%.*]], label [[E:%.*]] +; CHECK: t: +; CHECK-NEXT: call void @internal(i32 [[C2]], i32 [[C2]]) +; CHECK-NEXT: call void @useI32(i32 [[C2]]) +; CHECK-NEXT: br label [[M:%.*]] +; CHECK: e: +; CHECK-NEXT: call void @internal(i32 [[C2]], i32 [[C2]]) +; CHECK-NEXT: call void @useI32(i32 [[C2]]) +; CHECK-NEXT: br label [[M]] +; CHECK: m: +; CHECK-NEXT: call void @internal(i32 0, i32 [[C2]]) +; CHECK-NEXT: call void @useI32(i32 [[C2]]) +; CHECK-NEXT: ret void +; +entry: + br i1 %c, label %t, label %e +t: + %c0 = tail call i32 @__kmpc_global_thread_num(%struct.ident_t* nonnull @0) + call void @internal(i32 %c0, i32 %c0) + call void @useI32(i32 %c0) + br label %m +e: + %c1 = tail call i32 @__kmpc_global_thread_num(%struct.ident_t* nonnull @0) + call void @internal(i32 %c1, i32 %c1) + call void @useI32(i32 %c1) + br label %m +m: + %c2 = tail call i32 @__kmpc_global_thread_num(%struct.ident_t* nonnull @0) + call void @internal(i32 0, i32 %c2) + call void @useI32(i32 %c2) + ret void +} + +define internal void @internal(i32 %not_gtid, i32 %gtid) { +; CHECK-LABEL: define {{[^@]+}}@internal +; CHECK-SAME: (i32 [[NOT_GTID:%.*]], i32 [[GTID:%.*]]) +; CHECK-NEXT: entry: +; CHECK-NEXT: [[C:%.*]] = icmp eq i32 [[GTID]], [[GTID]] +; CHECK-NEXT: br i1 [[C]], label [[T:%.*]], label [[E:%.*]] +; CHECK: t: +; CHECK-NEXT: call void @useI32(i32 [[GTID]]) +; CHECK-NEXT: call void @external(i1 [[C]]) +; CHECK-NEXT: br label [[M:%.*]] +; CHECK: e: +; CHECK-NEXT: call void @useI32(i32 [[GTID]]) +; CHECK-NEXT: br label [[M]] +; CHECK: m: +; CHECK-NEXT: call void @useI32(i32 [[GTID]]) +; CHECK-NEXT: ret void +; +entry: + %cc = tail call i32 @__kmpc_global_thread_num(%struct.ident_t* nonnull @0) + %c = icmp eq i32 %cc, %gtid + br i1 %c, label %t, label %e +t: + %c0 = tail call i32 @__kmpc_global_thread_num(%struct.ident_t* nonnull @0) + call void @useI32(i32 %c0) + call void @external(i1 %c) + br label %m +e: + %c1 = tail call i32 @__kmpc_global_thread_num(%struct.ident_t* nonnull @0) + call void @useI32(i32 %c1) + br label %m +m: + %c2 = tail call i32 @__kmpc_global_thread_num(%struct.ident_t* nonnull @0) + call void @useI32(i32 %c2) + ret void +} -- 2.7.4