[OpenMP] Introduce the OpenMPOpt transformation pass
authorJohannes Doerfert <johannes@jdoerfert.de>
Thu, 7 Nov 2019 05:20:06 +0000 (23:20 -0600)
committerJohannes Doerfert <johannes@jdoerfert.de>
Sat, 8 Feb 2020 20:47:03 +0000 (14:47 -0600)
The OpenMPOpt pass is a CGSCC pass in which OpenMP specific
optimizations can reside.

The OpenMPOpt pass uses the OpenMPKinds.def file to identify runtime
calls and their uses. This allows targeted transformations and eases
their implementation.

This initial patch deduplicates `__kmpc_global_thread_num` and
`omp_get_thread_num` calls. We can also identify arguments that are
equivalent to such a call result and use it instead. Later we can
determine "gtid" arguments based on the use in kernel functions etc.

Reviewed By: JonChesterfield

Differential Revision: https://reviews.llvm.org/D69930

20 files changed:
llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
llvm/include/llvm/InitializePasses.h
llvm/include/llvm/LinkAllPasses.h
llvm/include/llvm/Transforms/IPO.h
llvm/include/llvm/Transforms/IPO/OpenMPOpt.h [new file with mode: 0644]
llvm/lib/LTO/LTOCodeGenerator.cpp
llvm/lib/Passes/PassBuilder.cpp
llvm/lib/Passes/PassRegistry.def
llvm/lib/Transforms/IPO/CMakeLists.txt
llvm/lib/Transforms/IPO/IPO.cpp
llvm/lib/Transforms/IPO/LLVMBuild.txt
llvm/lib/Transforms/IPO/OpenMPOpt.cpp [new file with mode: 0644]
llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
llvm/test/Other/new-pm-defaults.ll
llvm/test/Other/new-pm-thinlto-defaults.ll
llvm/test/Other/opt-O2-pipeline.ll
llvm/test/Other/opt-O3-pipeline.ll
llvm/test/Other/opt-Os-pipeline.ll
llvm/test/Other/pass-pipelines.ll
llvm/test/Transforms/OpenMP/gtid.ll [new file with mode: 0644]

index 7fc2dbf1466496e6f55fd3bc87c5252b6555f6bd..8e016760dd675679491f8a5d22633809df7b0d8e 100644 (file)
@@ -177,6 +177,8 @@ __OMP_RTL(__kmpc_end_serialized_parallel, false, Void, IdentPtr, Int32)
 
 __OMP_RTL(omp_get_thread_num, false, Int32, )
 
+__OMP_RTL(__last, false, Void, )
+
 #undef __OMP_RTL
 #undef OMP_RTL
 
index a5e1310e28b98e75ee107bc3a8af0cf6f0a82e9a..a8245d8823cd40942bf59b6929f75046e4b2c0de 100644 (file)
@@ -71,6 +71,7 @@ void initializeAggressiveInstCombinerLegacyPassPass(PassRegistry&);
 void initializeAliasSetPrinterPass(PassRegistry&);
 void initializeAlignmentFromAssumptionsPass(PassRegistry&);
 void initializeAlwaysInlinerLegacyPassPass(PassRegistry&);
+void initializeOpenMPOptLegacyPassPass(PassRegistry &);
 void initializeArgPromotionPass(PassRegistry&);
 void initializeAssumptionCacheTrackerPass(PassRegistry&);
 void initializeAtomicExpandPass(PassRegistry&);
index aa64296f9428e44592563797b6b22c6f9ac1f7ac..5b3bf3e66a439a5aa9846cfb10887b1e86f08784 100644 (file)
@@ -71,6 +71,7 @@ namespace {
       (void) llvm::createAggressiveDCEPass();
       (void) llvm::createAggressiveInstCombinerPass();
       (void) llvm::createBitTrackingDCEPass();
+      (void) llvm::createOpenMPOptLegacyPass();
       (void) llvm::createArgumentPromotionPass();
       (void) llvm::createAlignmentFromAssumptionsPass();
       (void) llvm::createBasicAAWrapperPass();
index ee411a10740aee4727770e02a07cd5f5a7731918..16ce814000195c443713b22d176ddb382237f6db 100644 (file)
@@ -152,6 +152,10 @@ ModulePass *createDeadArgHackingPass();
 ///
 Pass *createArgumentPromotionPass(unsigned maxElements = 3);
 
+//===----------------------------------------------------------------------===//
+/// createOpenMPOptLegacyPass - OpenMP specific optimizations.
+Pass *createOpenMPOptLegacyPass();
+
 //===----------------------------------------------------------------------===//
 /// createIPConstantPropagationPass - This pass propagates constants from call
 /// sites into the bodies of functions.
diff --git a/llvm/include/llvm/Transforms/IPO/OpenMPOpt.h b/llvm/include/llvm/Transforms/IPO/OpenMPOpt.h
new file mode 100644 (file)
index 0000000..0bd81ea
--- /dev/null
@@ -0,0 +1,54 @@
+//===- IPO/OpenMPOpt.h - Collection of OpenMP optimizations -----*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_IPO_OPENMP_OPT_H
+#define LLVM_TRANSFORMS_IPO_OPENMP_OPT_H
+
+#include "llvm/Analysis/CGSCCPassManager.h"
+#include "llvm/Analysis/LazyCallGraph.h"
+#include "llvm/IR/PassManager.h"
+
+namespace llvm {
+
+namespace omp {
+
+/// Helper to remember if the module contains OpenMP (runtime calls), to be used
+/// foremost with containsOpenMP.
+struct OpenMPInModule {
+  OpenMPInModule &operator=(bool Found) {
+    if (Found)
+      Value = OpenMPInModule::OpenMP::FOUND;
+    else
+      Value = OpenMPInModule::OpenMP::NOT_FOUND;
+    return *this;
+  }
+  bool isKnown() { return Value != OpenMP::UNKNOWN; }
+  operator bool() { return Value != OpenMP::NOT_FOUND; }
+
+private:
+  enum class OpenMP { FOUND, NOT_FOUND, UNKNOWN } Value = OpenMP::UNKNOWN;
+};
+
+/// Helper to determine if \p M contains OpenMP (runtime calls).
+bool containsOpenMP(Module &M, OpenMPInModule &OMPInModule);
+
+} // namespace omp
+
+/// OpenMP optimizations pass.
+class OpenMPOptPass : public PassInfoMixin<OpenMPOptPass> {
+  /// Helper to remember if the module contains OpenMP (runtime calls).
+  omp::OpenMPInModule OMPInModule;
+
+public:
+  PreservedAnalyses run(LazyCallGraph::SCC &C, CGSCCAnalysisManager &AM,
+                        LazyCallGraph &CG, CGSCCUpdateResult &UR);
+};
+
+} // end namespace llvm
+
+#endif // LLVM_TRANSFORMS_IPO_OPENMP_OPT_H
index a79a52311c2740b6741006bb92c12bae015d62fa..88f433a9e03525f3654eaebf97e869982f3cb78a 100644 (file)
@@ -134,6 +134,7 @@ void LTOCodeGenerator::initializeLTOPasses() {
   initializeSimpleInlinerPass(R);
   initializePruneEHPass(R);
   initializeGlobalDCELegacyPassPass(R);
+  initializeOpenMPOptLegacyPassPass(R);
   initializeArgPromotionPass(R);
   initializeJumpThreadingPass(R);
   initializeSROALegacyPassPass(R);
index 8dc14680e6badd7906cc1e75905b98fbc90c7c2b..2ab0445bc6de0ffedb4cebad40fc12bec77e9618 100644 (file)
@@ -87,6 +87,7 @@
 #include "llvm/Transforms/IPO/Internalize.h"
 #include "llvm/Transforms/IPO/LowerTypeTests.h"
 #include "llvm/Transforms/IPO/MergeFunctions.h"
+#include "llvm/Transforms/IPO/OpenMPOpt.h"
 #include "llvm/Transforms/IPO/PartialInlining.h"
 #include "llvm/Transforms/IPO/SCCP.h"
 #include "llvm/Transforms/IPO/SampleProfile.h"
@@ -837,6 +838,11 @@ PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level,
   if (Level == OptimizationLevel::O3)
     MainCGPipeline.addPass(ArgumentPromotionPass());
 
+  // Try to perform OpenMP specific optimizations. This is a (quick!) no-op if
+  // there are no OpenMP runtime calls present in the module.
+  if (Level == OptimizationLevel::O2 || Level == OptimizationLevel::O3)
+    MainCGPipeline.addPass(OpenMPOptPass());
+
   // Lastly, add the core function simplification pipeline nested inside the
   // CGSCC walk.
   MainCGPipeline.addPass(createCGSCCToFunctionPassAdaptor(
index 6bffe1a5b5c775b7a8e696b2bccfebf4e1ab5cfc..83ed93dd4718fc4cd1ebbf36e8fe3291f69dfa21 100644 (file)
@@ -109,6 +109,7 @@ CGSCC_PASS("argpromotion", ArgumentPromotionPass())
 CGSCC_PASS("invalidate<all>", InvalidateAllAnalysesPass())
 CGSCC_PASS("function-attrs", PostOrderFunctionAttrsPass())
 CGSCC_PASS("inline", InlinerPass())
+CGSCC_PASS("openmpopt", OpenMPOptPass())
 CGSCC_PASS("no-op-cgscc", NoOpCGSCCPass())
 #undef CGSCC_PASS
 
index 375d2fb3063be8ed81aea0949bc7dc6c76a4f963..034450f440ba52374abf9c9dfec69712fe9071d4 100644 (file)
@@ -26,6 +26,7 @@ add_llvm_component_library(LLVMipo
   LoopExtractor.cpp
   LowerTypeTests.cpp
   MergeFunctions.cpp
+  OpenMPOpt.cpp
   PartialInlining.cpp
   PassManagerBuilder.cpp
   PruneEH.cpp
index 8a15800cbdb5bcf4ac38a8178844e2de32baeba7..3b6038e830712926eda0f2cbda626a69c25ec7f9 100644 (file)
@@ -23,6 +23,7 @@
 using namespace llvm;
 
 void llvm::initializeIPO(PassRegistry &Registry) {
+  initializeOpenMPOptLegacyPassPass(Registry);
   initializeArgPromotionPass(Registry);
   initializeCalledValuePropagationLegacyPassPass(Registry);
   initializeConstantMergeLegacyPassPass(Registry);
index 14aa4e4ba3725161a179396de9c0ecdd22ece94e..ea207e81fce88fca0ef9fb1ed0883a7cb94f7d3a 100644 (file)
@@ -19,4 +19,4 @@ type = Library
 name = IPO
 parent = Transforms
 library_name = ipo
-required_libraries = AggressiveInstCombine Analysis BitReader BitWriter Core InstCombine IRReader Linker Object ProfileData Scalar Support TransformUtils Vectorize Instrumentation
+required_libraries = AggressiveInstCombine Analysis BitReader BitWriter Core FrontendOpenMP InstCombine IRReader Linker Object ProfileData Scalar Support TransformUtils Vectorize Instrumentation
diff --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
new file mode 100644 (file)
index 0000000..1822b5e
--- /dev/null
@@ -0,0 +1,419 @@
+//===-- IPO/OpenMPOpt.cpp - Collection of OpenMP specific optimizations ---===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// OpenMP specific optimizations:
+//
+// - Deduplication of runtime calls, e.g., omp_get_thread_num.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/IPO/OpenMPOpt.h"
+
+#include "llvm/ADT/EnumeratedArray.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/CallGraph.h"
+#include "llvm/Analysis/CallGraphSCCPass.h"
+#include "llvm/Frontend/OpenMP/OMPConstants.h"
+#include "llvm/IR/CallSite.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Transforms/IPO.h"
+#include "llvm/Transforms/Utils/CallGraphUpdater.h"
+
+using namespace llvm;
+using namespace omp;
+using namespace types;
+
+#define DEBUG_TYPE "openmp-opt"
+
+static cl::opt<bool> DisableOpenMPOptimizations(
+    "openmp-opt-disable", cl::ZeroOrMore,
+    cl::desc("Disable OpenMP specific optimizations."), cl::Hidden,
+    cl::init(false));
+
+STATISTIC(NumOpenMPRuntimeCallsDeduplicated,
+          "Number of OpenMP runtime calls deduplicated");
+STATISTIC(NumOpenMPRuntimeFunctionsIdentified,
+          "Number of OpenMP runtime functions identified");
+STATISTIC(NumOpenMPRuntimeFunctionUsesIdentified,
+          "Number of OpenMP runtime function uses identified");
+
+static constexpr auto TAG = "[" DEBUG_TYPE "]";
+
+namespace {
+struct OpenMPOpt {
+
+  OpenMPOpt(SmallPtrSetImpl<Function *> &SCC,
+            SmallPtrSetImpl<Function *> &ModuleSlice,
+            CallGraphUpdater &CGUpdater)
+      : M(*(*SCC.begin())->getParent()), SCC(SCC), ModuleSlice(ModuleSlice),
+        CGUpdater(CGUpdater) {
+    initializeTypes(M);
+    initializeRuntimeFunctions();
+  }
+
+  /// Generic information that describes a runtime function
+  struct RuntimeFunctionInfo {
+    /// The kind, as described by the RuntimeFunction enum.
+    RuntimeFunction Kind;
+
+    /// The name of the function.
+    StringRef Name;
+
+    /// Flag to indicate a variadic function.
+    bool IsVarArg;
+
+    /// The return type of the function.
+    Type *ReturnType;
+
+    /// The argument types of the function.
+    SmallVector<Type *, 8> ArgumentTypes;
+
+    /// The declaration if available.
+    Function *Declaration;
+
+    /// Uses of this runtime function per function containing the use.
+    DenseMap<Function *, SmallPtrSet<Use *, 16>> UsesMap;
+
+    /// Return the number of arguments (or the minimal number for variadic
+    /// functions).
+    size_t getNumArgs() const { return ArgumentTypes.size(); }
+
+    /// Run the callback \p CB on each use and forget the use if the result is
+    /// true. The callback will be fed the function in which the use was
+    /// encountered as second argument.
+    void foreachUse(function_ref<bool(Use &, Function &)> CB) {
+      SmallVector<Use *, 8> ToBeDeleted;
+      for (auto &It : UsesMap) {
+        ToBeDeleted.clear();
+        for (Use *U : It.second)
+          if (CB(*U, *It.first))
+            ToBeDeleted.push_back(U);
+        for (Use *U : ToBeDeleted)
+          It.second.erase(U);
+      }
+    }
+  };
+
+  /// Run all OpenMP optimizations on the underlying SCC/ModuleSlice.
+  bool run() {
+    bool Changed = false;
+
+    LLVM_DEBUG(dbgs() << TAG << "Run on SCC with " << SCC.size()
+                      << " functions in a slice with " << ModuleSlice.size()
+                      << " functions\n");
+
+    Changed |= deduplicateRuntimeCalls();
+
+    return Changed;
+  }
+
+private:
+  /// Try to eliminiate runtime calls by reusing existing ones.
+  bool deduplicateRuntimeCalls() {
+    bool Changed = false;
+
+    SmallSetVector<Value *, 16> GTIdArgs;
+    collectGlobalThreadIdArguments(GTIdArgs);
+    LLVM_DEBUG(dbgs() << TAG << "Found " << GTIdArgs.size()
+                      << " global thread ID arguments\n");
+
+    for (Function *F : SCC) {
+      Value *GTIdArg = nullptr;
+      for (Argument &Arg : F->args())
+        if (GTIdArgs.count(&Arg)) {
+          GTIdArg = &Arg;
+          break;
+        }
+      Changed |= deduplicateRuntimeCalls(
+          *F, RFIs[OMPRTL___kmpc_global_thread_num], GTIdArg);
+      Changed |= deduplicateRuntimeCalls(*F, RFIs[OMPRTL_omp_get_thread_num]);
+    }
+
+    return Changed;
+  }
+
+  /// Try to eliminiate calls of \p RFI in \p F by reusing an existing one or
+  /// \p ReplVal if given.
+  bool deduplicateRuntimeCalls(Function &F, RuntimeFunctionInfo &RFI,
+                               Value *ReplVal = nullptr) {
+    auto &Uses = RFI.UsesMap[&F];
+    if (Uses.size() + (ReplVal != nullptr) < 2)
+      return false;
+
+    LLVM_DEBUG(dbgs() << TAG << "Deduplicate " << Uses.size() << " uses of "
+                      << RFI.Name
+                      << (ReplVal ? " with an existing value\n" : "\n")
+                      << "\n");
+    assert(!ReplVal || (isa<Argument>(ReplVal) &&
+                        cast<Argument>(ReplVal)->getParent() == &F) &&
+                           "Unexpected replacement value!");
+    if (!ReplVal) {
+      for (Use *U : Uses)
+        if (CallInst *CI = getCallIfRegularCall(*U, &RFI)) {
+          CI->moveBefore(&*F.getEntryBlock().getFirstInsertionPt());
+          ReplVal = CI;
+          break;
+        }
+      if (!ReplVal)
+        return false;
+    }
+
+    bool Changed = false;
+    auto ReplaceAndDeleteCB = [&](Use &U, Function &Caller) {
+      CallInst *CI = getCallIfRegularCall(U, &RFI);
+      if (!CI || CI == ReplVal || &F != &Caller)
+        return false;
+      assert(CI->getCaller() == &F && "Unexpected call!");
+      CGUpdater.removeCallSite(*CI);
+      CI->replaceAllUsesWith(ReplVal);
+      CI->eraseFromParent();
+      ++NumOpenMPRuntimeCallsDeduplicated;
+      Changed = true;
+      return true;
+    };
+    RFI.foreachUse(ReplaceAndDeleteCB);
+
+    return Changed;
+  }
+
+  /// Collect arguments that represent the global thread id in \p GTIdArgs.
+  void collectGlobalThreadIdArguments(SmallSetVector<Value *, 16> &GTIdArgs) {
+    // TODO: Below we basically perform a fixpoint iteration with a pessimistic
+    //       initialization. We could define an AbstractAttribute instead and
+    //       run the Attributor here once it can be run as an SCC pass.
+
+    // Helper to check the argument \p ArgNo at all call sites of \p F for
+    // a GTId.
+    auto CallArgOpIsGTId = [&](Function &F, unsigned ArgNo, CallInst &RefCI) {
+      if (!F.hasLocalLinkage())
+        return false;
+      for (Use &U : F.uses()) {
+        if (CallInst *CI = getCallIfRegularCall(U)) {
+          Value *ArgOp = CI->getArgOperand(ArgNo);
+          if (CI == &RefCI || GTIdArgs.count(ArgOp) ||
+              getCallIfRegularCall(*ArgOp,
+                                   &RFIs[OMPRTL___kmpc_global_thread_num]))
+            continue;
+        }
+        return false;
+      }
+      return true;
+    };
+
+    // Helper to identify uses of a GTId as GTId arguments.
+    auto AddUserArgs = [&](Value &GTId) {
+      for (Use &U : GTId.uses())
+        if (CallInst *CI = dyn_cast<CallInst>(U.getUser()))
+          if (CI->isArgOperand(&U))
+            if (Function *Callee = CI->getCalledFunction())
+              if (CallArgOpIsGTId(*Callee, U.getOperandNo(), *CI))
+                GTIdArgs.insert(Callee->getArg(U.getOperandNo()));
+    };
+
+    // The argument users of __kmpc_global_thread_num calls are GTIds.
+    RuntimeFunctionInfo &GlobThreadNumRFI =
+        RFIs[OMPRTL___kmpc_global_thread_num];
+    for (auto &It : GlobThreadNumRFI.UsesMap)
+      for (Use *U : It.second)
+        if (CallInst *CI = getCallIfRegularCall(*U, &GlobThreadNumRFI))
+          AddUserArgs(*CI);
+
+    // Transitively search for more arguments by looking at the users of the
+    // ones we know already. During the search the GTIdArgs vector is extended
+    // so we cannot cache the size nor can we use a range based for.
+    for (unsigned u = 0; u < GTIdArgs.size(); ++u)
+      AddUserArgs(*GTIdArgs[u]);
+  }
+
+  /// Return the call if \p U is a callee use in a regular call. If \p RFI is
+  /// given it has to be the callee or a nullptr is returned.
+  CallInst *getCallIfRegularCall(Use &U, RuntimeFunctionInfo *RFI = nullptr) {
+    CallInst *CI = dyn_cast<CallInst>(U.getUser());
+    if (CI && CI->isCallee(&U) && !CI->hasOperandBundles() &&
+        (!RFI || CI->getCalledFunction() == RFI->Declaration))
+      return CI;
+    return nullptr;
+  }
+
+  /// Return the call if \p V is a regular call. If \p RFI is given it has to be
+  /// the callee or a nullptr is returned.
+  CallInst *getCallIfRegularCall(Value &V, RuntimeFunctionInfo *RFI = nullptr) {
+    CallInst *CI = dyn_cast<CallInst>(&V);
+    if (CI && !CI->hasOperandBundles() &&
+        (!RFI || CI->getCalledFunction() == RFI->Declaration))
+      return CI;
+    return nullptr;
+  }
+
+  /// Helper to initialize all runtime function information for those defined in
+  /// OpenMPKinds.def.
+  void initializeRuntimeFunctions() {
+    // Helper to collect all uses of the decleration in the UsesMap.
+    auto CollectUses = [&](RuntimeFunctionInfo &RFI) {
+      unsigned NumUses = 0;
+      if (!RFI.Declaration)
+        return NumUses;
+
+      NumOpenMPRuntimeFunctionsIdentified += 1;
+      NumOpenMPRuntimeFunctionUsesIdentified += RFI.Declaration->getNumUses();
+
+      // TODO: We directly convert uses into proper calls and unknown uses.
+      for (Use &U : RFI.Declaration->uses()) {
+        if (Instruction *UserI = dyn_cast<Instruction>(U.getUser())) {
+          if (ModuleSlice.count(UserI->getFunction())) {
+            RFI.UsesMap[UserI->getFunction()].insert(&U);
+            ++NumUses;
+          }
+        } else {
+          RFI.UsesMap[nullptr].insert(&U);
+          ++NumUses;
+        }
+      }
+      return NumUses;
+    };
+
+#define OMP_RTL(_Enum, _Name, _IsVarArg, _ReturnType, ...)                     \
+  {                                                                            \
+    auto &RFI = RFIs[_Enum];                                                   \
+    RFI.Kind = _Enum;                                                          \
+    RFI.Name = _Name;                                                          \
+    RFI.IsVarArg = _IsVarArg;                                                  \
+    RFI.ReturnType = _ReturnType;                                              \
+    RFI.ArgumentTypes = SmallVector<Type *, 8>({__VA_ARGS__});                 \
+    RFI.Declaration = M.getFunction(_Name);                                    \
+    unsigned NumUses = CollectUses(RFI);                                       \
+    (void)NumUses;                                                             \
+    LLVM_DEBUG({                                                               \
+      dbgs() << TAG << RFI.Name << (RFI.Declaration ? "" : " not")             \
+             << " found\n";                                                    \
+      if (RFI.Declaration)                                                     \
+        dbgs() << TAG << "-> got " << NumUses << " uses in "                   \
+               << RFI.UsesMap.size() << " different functions.\n";             \
+    });                                                                        \
+  }
+#include "llvm/Frontend/OpenMP/OMPKinds.def"
+
+    // TODO: We should validate the declaration agains the types we expect.
+    // TODO: We should attach the attributes defined in OMPKinds.def.
+  }
+
+  /// The underyling module.
+  Module &M;
+
+  /// The SCC we are operating on.
+  SmallPtrSetImpl<Function *> &SCC;
+
+  /// The slice of the module we are allowed to look at.
+  SmallPtrSetImpl<Function *> &ModuleSlice;
+
+  /// Callback to update the call graph, the first argument is a removed call,
+  /// the second an optional replacement call.
+  CallGraphUpdater &CGUpdater;
+
+  /// Map from runtime function kind to the runtime function description.
+  EnumeratedArray<RuntimeFunctionInfo, RuntimeFunction,
+                  RuntimeFunction::OMPRTL___last>
+      RFIs;
+};
+} // namespace
+
+PreservedAnalyses OpenMPOptPass::run(LazyCallGraph::SCC &C,
+                                     CGSCCAnalysisManager &AM,
+                                     LazyCallGraph &CG, CGSCCUpdateResult &UR) {
+  if (!containsOpenMP(*C.begin()->getFunction().getParent(), OMPInModule))
+    return PreservedAnalyses::all();
+
+  if (DisableOpenMPOptimizations)
+    return PreservedAnalyses::all();
+
+  SmallPtrSet<Function *, 16> SCC;
+  for (LazyCallGraph::Node &N : C)
+    SCC.insert(&N.getFunction());
+
+  if (SCC.empty())
+    return PreservedAnalyses::all();
+
+  CallGraphUpdater CGUpdater;
+  CGUpdater.initialize(CG, C, AM, UR);
+  // TODO: Compute the module slice we are allowed to look at.
+  OpenMPOpt OMPOpt(SCC, SCC, CGUpdater);
+  bool Changed = OMPOpt.run();
+  (void)Changed;
+  return PreservedAnalyses::all();
+}
+
+namespace {
+
+struct OpenMPOptLegacyPass : public CallGraphSCCPass {
+  CallGraphUpdater CGUpdater;
+  OpenMPInModule OMPInModule;
+  static char ID;
+
+  OpenMPOptLegacyPass() : CallGraphSCCPass(ID) {
+    initializeOpenMPOptLegacyPassPass(*PassRegistry::getPassRegistry());
+  }
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    CallGraphSCCPass::getAnalysisUsage(AU);
+  }
+
+  bool doInitialization(CallGraph &CG) override {
+    // Disable the pass if there is no OpenMP (runtime call) in the module.
+    containsOpenMP(CG.getModule(), OMPInModule);
+    return false;
+  }
+
+  bool runOnSCC(CallGraphSCC &CGSCC) override {
+    if (!containsOpenMP(CGSCC.getCallGraph().getModule(), OMPInModule))
+      return false;
+    if (DisableOpenMPOptimizations || skipSCC(CGSCC))
+      return false;
+
+    SmallPtrSet<Function *, 16> SCC;
+    for (CallGraphNode *CGN : CGSCC)
+      if (Function *Fn = CGN->getFunction())
+        if (!Fn->isDeclaration())
+          SCC.insert(Fn);
+
+    if (SCC.empty())
+      return false;
+
+    CallGraph &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph();
+    CGUpdater.initialize(CG, CGSCC);
+
+    // TODO: Compute the module slice we are allowed to look at.
+    OpenMPOpt OMPOpt(SCC, SCC, CGUpdater);
+    return OMPOpt.run();
+  }
+
+  bool doFinalization(CallGraph &CG) override { return CGUpdater.finalize(); }
+};
+
+} // end anonymous namespace
+
+bool llvm::omp::containsOpenMP(Module &M, OpenMPInModule &OMPInModule) {
+  if (OMPInModule.isKnown())
+    return OMPInModule;
+
+#define OMP_RTL(_Enum, _Name, ...)                                             \
+  if (M.getFunction(_Name))                                                    \
+    return OMPInModule = true;
+#include "llvm/Frontend/OpenMP/OMPKinds.def"
+  return OMPInModule = false;
+}
+
+char OpenMPOptLegacyPass::ID = 0;
+
+INITIALIZE_PASS_BEGIN(OpenMPOptLegacyPass, "openmpopt",
+                      "OpenMP specific optimizations", false, false)
+INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass)
+INITIALIZE_PASS_END(OpenMPOptLegacyPass, "openmpopt",
+                    "OpenMP specific optimizations", false, false)
+
+Pass *llvm::createOpenMPOptLegacyPass() { return new OpenMPOptLegacyPass(); }
index 7cfc29f7bf7a752be9887e114f47b1531564fca1..86ea65fe8e30f7da12b8321d6151a5d0b99d064a 100644 (file)
@@ -599,6 +599,11 @@ void PassManagerBuilder::populateModulePassManager(
     RunInliner = true;
   }
 
+  // Try to perform OpenMP specific optimizations. This is a (quick!) no-op if
+  // there are no OpenMP runtime calls present in the module.
+  if (OptLevel > 1)
+    MPM.add(createOpenMPOptLegacyPass());
+
   MPM.add(createPostOrderFunctionAttrsLegacyPass());
   if (OptLevel > 2)
     MPM.add(createArgumentPromotionPass()); // Scalarize uninlined fn args
@@ -930,6 +935,11 @@ void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) {
   // CSFDO instrumentation and use pass.
   addPGOInstrPasses(PM, /* IsCS */ true);
 
+  // Try to perform OpenMP specific optimizations. This is a (quick!) no-op if
+  // there are no OpenMP runtime calls present in the module.
+  if (OptLevel > 1)
+    PM.add(createOpenMPOptLegacyPass());
+
   // Optimize globals again if we ran the inliner.
   if (RunInliner)
     PM.add(createGlobalOptimizerPass());
index 1dc96ef3a14c8657c89df06a475178621c40a0a8..a386272428d86b47cc7e6ae9a81d08454179993e 100644 (file)
 ; CHECK-O-NEXT: Running pass: InlinerPass
 ; CHECK-O-NEXT: Running pass: PostOrderFunctionAttrsPass
 ; CHECK-O3-NEXT: Running pass: ArgumentPromotionPass
+; CHECK-O-NEXT: Running pass: OpenMPOptPass on (foo)
 ; CHECK-O-NEXT: Running pass: CGSCCToFunctionPassAdaptor<{{.*}}PassManager{{.*}}>
 ; CHECK-O-NEXT: Starting llvm::Function pass manager run.
 ; CHECK-O-NEXT: Running pass: SROA
index 48d59dd6aa773badc49b16aeae0506c01d9ddf65..e44f206e299d2df9f48d4a0d28ef64e49af93242 100644 (file)
 ; CHECK-O-NEXT: Running pass: InlinerPass
 ; CHECK-O-NEXT: Running pass: PostOrderFunctionAttrsPass
 ; CHECK-O3-NEXT: Running pass: ArgumentPromotionPass
+; CHECK-O-NEXT: Running pass: OpenMPOptPass on (foo)
 ; CHECK-O-NEXT: Running pass: CGSCCToFunctionPassAdaptor<{{.*}}PassManager{{.*}}>
 ; CHECK-O-NEXT: Starting llvm::Function pass manager run.
 ; CHECK-O-NEXT: Running pass: SROA
index 37ffab4bee5c1b4409c0497476e620583bba58c7..2e42119c6c526ac8bd68217b57079267d99245d3 100644 (file)
@@ -59,6 +59,7 @@
 ; CHECK-NEXT:     Call Graph SCC Pass Manager
 ; CHECK-NEXT:       Remove unused exception handling info
 ; CHECK-NEXT:       Function Integration/Inlining
+; CHECK-NEXT:       OpenMP specific optimizations
 ; CHECK-NEXT:       Deduce function attributes
 ; CHECK-NEXT:       FunctionPass Manager
 ; CHECK-NEXT:         Dominator Tree Construction
index 3deea84eb069298a08f92b39ba6635574cad8122..57963580be541d5f7b562a2d82293e0a38e4f476 100644 (file)
@@ -62,6 +62,7 @@
 ; CHECK-NEXT:     Call Graph SCC Pass Manager
 ; CHECK-NEXT:       Remove unused exception handling info
 ; CHECK-NEXT:       Function Integration/Inlining
+; CHECK-NEXT:       OpenMP specific optimizations
 ; CHECK-NEXT:       Deduce function attributes
 ; CHECK-NEXT:       Promote 'by reference' arguments to scalars
 ; CHECK-NEXT:       FunctionPass Manager
index 59b0720fd0e84e09eb11711c545c4cd9737502c0..84172237b93c14e53734f7365af4097a5fd68166 100644 (file)
@@ -59,6 +59,7 @@
 ; CHECK-NEXT:     Call Graph SCC Pass Manager
 ; CHECK-NEXT:       Remove unused exception handling info
 ; CHECK-NEXT:       Function Integration/Inlining
+; CHECK-NEXT:       OpenMP specific optimizations
 ; CHECK-NEXT:       Deduce function attributes
 ; CHECK-NEXT:       FunctionPass Manager
 ; CHECK-NEXT:         Dominator Tree Construction
index 6853fd9cbab388f9b27d288a425a4e104b8251d0..9be9823a5dc8e614982e863ce3a667df7b9f365c 100644 (file)
@@ -46,6 +46,7 @@
 ; CHECK-O2-NEXT: Call Graph SCC Pass Manager
 ; CHECK-O2-NEXT: Remove unused exception handling info
 ; CHECK-O2-NEXT: Function Integration/Inlining
+; CHECK-O2-NEXT: OpenMP specific optimizations
 ; CHECK-O2-NEXT: Deduce function attributes
 ; Next up is the main function pass pipeline. It shouldn't be split up and
 ; should contain the main loop pass pipeline as well.
diff --git a/llvm/test/Transforms/OpenMP/gtid.ll b/llvm/test/Transforms/OpenMP/gtid.ll
new file mode 100644 (file)
index 0000000..93a72ca
--- /dev/null
@@ -0,0 +1,86 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature
+; RUN: opt -openmpopt -S < %s | FileCheck %s
+; RUN: opt -passes=openmpopt -S < %s | FileCheck %s
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+
+%struct.ident_t = type { i32, i32, i32, i32, i8* }
+
+@0 = private unnamed_addr global %struct.ident_t { i32 0, i32 34, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @.str, i32 0, i32 0) }, align 8
+@.str = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00", align 1
+
+declare i32 @__kmpc_global_thread_num(%struct.ident_t*)
+declare void @useI32(i32)
+
+define void @external(i1 %c) {
+; CHECK-LABEL: define {{[^@]+}}@external
+; CHECK-SAME: (i1 [[C:%.*]])
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[C2:%.*]] = tail call i32 @__kmpc_global_thread_num(%struct.ident_t* nonnull @0)
+; CHECK-NEXT:    br i1 [[C]], label [[T:%.*]], label [[E:%.*]]
+; CHECK:       t:
+; CHECK-NEXT:    call void @internal(i32 [[C2]], i32 [[C2]])
+; CHECK-NEXT:    call void @useI32(i32 [[C2]])
+; CHECK-NEXT:    br label [[M:%.*]]
+; CHECK:       e:
+; CHECK-NEXT:    call void @internal(i32 [[C2]], i32 [[C2]])
+; CHECK-NEXT:    call void @useI32(i32 [[C2]])
+; CHECK-NEXT:    br label [[M]]
+; CHECK:       m:
+; CHECK-NEXT:    call void @internal(i32 0, i32 [[C2]])
+; CHECK-NEXT:    call void @useI32(i32 [[C2]])
+; CHECK-NEXT:    ret void
+;
+entry:
+  br i1 %c, label %t, label %e
+t:
+  %c0 = tail call i32 @__kmpc_global_thread_num(%struct.ident_t* nonnull @0)
+  call void @internal(i32 %c0, i32 %c0)
+  call void @useI32(i32 %c0)
+  br label %m
+e:
+  %c1 = tail call i32 @__kmpc_global_thread_num(%struct.ident_t* nonnull @0)
+  call void @internal(i32 %c1, i32 %c1)
+  call void @useI32(i32 %c1)
+  br label %m
+m:
+  %c2 = tail call i32 @__kmpc_global_thread_num(%struct.ident_t* nonnull @0)
+  call void @internal(i32 0, i32 %c2)
+  call void @useI32(i32 %c2)
+  ret void
+}
+
+define internal void @internal(i32 %not_gtid, i32 %gtid) {
+; CHECK-LABEL: define {{[^@]+}}@internal
+; CHECK-SAME: (i32 [[NOT_GTID:%.*]], i32 [[GTID:%.*]])
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[C:%.*]] = icmp eq i32 [[GTID]], [[GTID]]
+; CHECK-NEXT:    br i1 [[C]], label [[T:%.*]], label [[E:%.*]]
+; CHECK:       t:
+; CHECK-NEXT:    call void @useI32(i32 [[GTID]])
+; CHECK-NEXT:    call void @external(i1 [[C]])
+; CHECK-NEXT:    br label [[M:%.*]]
+; CHECK:       e:
+; CHECK-NEXT:    call void @useI32(i32 [[GTID]])
+; CHECK-NEXT:    br label [[M]]
+; CHECK:       m:
+; CHECK-NEXT:    call void @useI32(i32 [[GTID]])
+; CHECK-NEXT:    ret void
+;
+entry:
+  %cc = tail call i32 @__kmpc_global_thread_num(%struct.ident_t* nonnull @0)
+  %c = icmp eq i32 %cc, %gtid
+  br i1 %c, label %t, label %e
+t:
+  %c0 = tail call i32 @__kmpc_global_thread_num(%struct.ident_t* nonnull @0)
+  call void @useI32(i32 %c0)
+  call void @external(i1 %c)
+  br label %m
+e:
+  %c1 = tail call i32 @__kmpc_global_thread_num(%struct.ident_t* nonnull @0)
+  call void @useI32(i32 %c1)
+  br label %m
+m:
+  %c2 = tail call i32 @__kmpc_global_thread_num(%struct.ident_t* nonnull @0)
+  call void @useI32(i32 %c2)
+  ret void
+}