Temporarily revert "[ThinLTO] Re-order modules for optimal multi-threaded processing"

author Jordan Rupprecht <rupprecht@google.com>

Fri, 9 Oct 2020 21:36:20 +0000 (14:36 -0700)

committer Jordan Rupprecht <rupprecht@google.com>

Fri, 9 Oct 2020 21:36:20 +0000 (14:36 -0700)
author Jordan Rupprecht <rupprecht@google.com>
Fri, 9 Oct 2020 21:36:20 +0000 (14:36 -0700)
committer Jordan Rupprecht <rupprecht@google.com>
Fri, 9 Oct 2020 21:36:20 +0000 (14:36 -0700)
diff --git a/llvm/include/llvm/LTO/LTO.h b/llvm/include/llvm/LTO/LTO.h

index a47f0cc..93456c0 100644 (file)
--- a/llvm/include/llvm/LTO/LTO.h
+++ b/llvm/include/llvm/LTO/LTO.h
@@ -91,10 +91,6 @@ setupLLVMOptimizationRemarks(LLVMContext &Context, StringRef RemarksFilename,
  Expected<std::unique_ptr<ToolOutputFile>>
  setupStatsFile(StringRef StatsFilename);
  
-/// Produces a container ordering for optimal multi-threaded processing. Returns
-/// ordered indices to elements in the input array.
-std::vector<int> generateModulesOrdering(ArrayRef<BitcodeModule *> R);
-
  class LTO;
  struct SymbolResolution;
  class ThinBackendProc;
diff --git a/llvm/lib/LTO/LTO.cpp b/llvm/lib/LTO/LTO.cpp

index 4fbb3ad..6230216 100644 (file)
--- a/llvm/lib/LTO/LTO.cpp
+++ b/llvm/lib/LTO/LTO.cpp
@@ -1443,21 +1443,15 @@ Error LTO::runThinLTO(AddStreamFn AddStream, NativeObjectCache Cache,
    auto &ModuleMap =
        ThinLTO.ModulesToCompile ? *ThinLTO.ModulesToCompile : ThinLTO.ModuleMap;
  
-  std::vector<BitcodeModule *> ModulesVec;
-  ModulesVec.reserve(ModuleMap.size());
-  for (auto &Mod : ModuleMap)
-    ModulesVec.push_back(&Mod.second);
-  std::vector<int> ModulesOrdering = generateModulesOrdering(ModulesVec);
-
    // Tasks 0 through ParallelCodeGenParallelismLevel-1 are reserved for combined
    // module and parallel code generation partitions.
-  for (auto IndexCount : ModulesOrdering) {
-    auto &Mod = *(ModuleMap.begin() + IndexCount);
-    if (Error E = BackendProc->start(
-            RegularLTO.ParallelCodeGenParallelismLevel + IndexCount, Mod.second,
-            ImportLists[Mod.first], ExportLists[Mod.first],
-            ResolvedODR[Mod.first], ThinLTO.ModuleMap))
+  unsigned Task = RegularLTO.ParallelCodeGenParallelismLevel;
+  for (auto &Mod : ModuleMap) {
+    if (Error E = BackendProc->start(Task, Mod.second, ImportLists[Mod.first],
+                                     ExportLists[Mod.first],
+                                     ResolvedODR[Mod.first], ThinLTO.ModuleMap))
        return E;
+    ++Task;
    }
  
    return BackendProc->wait();
@@ -1501,18 +1495,3 @@ lto::setupStatsFile(StringRef StatsFilename) {
    StatsFile->keep();
    return std::move(StatsFile);
  }
-
-// Compute the ordering we will process the inputs: the rough heuristic here
-// is to sort them per size so that the largest module get schedule as soon as
-// possible. This is purely a compile-time optimization.
-std::vector<int> lto::generateModulesOrdering(ArrayRef<BitcodeModule *> R) {
-  std::vector<int> ModulesOrdering;
-  ModulesOrdering.resize(R.size());
-  std::iota(ModulesOrdering.begin(), ModulesOrdering.end(), 0);
-  llvm::sort(ModulesOrdering, [&](int LeftIndex, int RightIndex) {
-    auto LSize = R[LeftIndex]->getBuffer().size();
-    auto RSize = R[RightIndex]->getBuffer().size();
-    return LSize > RSize;
-  });
-  return ModulesOrdering;
-}
diff --git a/llvm/lib/LTO/ThinLTOCodeGenerator.cpp b/llvm/lib/LTO/ThinLTOCodeGenerator.cpp

index 3f71487..14dae84 100644 (file)
--- a/llvm/lib/LTO/ThinLTOCodeGenerator.cpp
+++ b/llvm/lib/LTO/ThinLTOCodeGenerator.cpp
@@ -1054,11 +1054,19 @@ void ThinLTOCodeGenerator::run() {
      ModuleToDefinedGVSummaries[ModuleIdentifier];
    }
  
-  std::vector<BitcodeModule *> ModulesVec;
-  ModulesVec.reserve(Modules.size());
-  for (auto &Mod : Modules)
-    ModulesVec.push_back(&Mod->getSingleBitcodeModule());
-  std::vector<int> ModulesOrdering = lto::generateModulesOrdering(ModulesVec);
+  // Compute the ordering we will process the inputs: the rough heuristic here
+  // is to sort them per size so that the largest module get schedule as soon as
+  // possible. This is purely a compile-time optimization.
+  std::vector<int> ModulesOrdering;
+  ModulesOrdering.resize(Modules.size());
+  std::iota(ModulesOrdering.begin(), ModulesOrdering.end(), 0);
+  llvm::sort(ModulesOrdering, [&](int LeftIndex, int RightIndex) {
+    auto LSize =
+        Modules[LeftIndex]->getSingleBitcodeModule().getBuffer().size();
+    auto RSize =
+        Modules[RightIndex]->getSingleBitcodeModule().getBuffer().size();
+    return LSize > RSize;
+  });
  
    // Parallel optimizer + codegen
    {
author	Jordan Rupprecht <rupprecht@google.com>
	Fri, 9 Oct 2020 21:36:20 +0000 (14:36 -0700)
committer	Jordan Rupprecht <rupprecht@google.com>
	Fri, 9 Oct 2020 21:36:20 +0000 (14:36 -0700)
llvm/include/llvm/LTO/LTO.h		patch \| blob \| history
llvm/lib/LTO/LTO.cpp		patch \| blob \| history
llvm/lib/LTO/ThinLTOCodeGenerator.cpp		patch \| blob \| history