From 0e13a0331fb90078bf71cc0c4612492a6954a5d0 Mon Sep 17 00:00:00 2001
From: Alexandre Ganea <alexandre.ganea@ubisoft.com>
Date: Fri, 24 Apr 2020 15:28:01 -0400
Subject: [PATCH] [llvm-cov] Prevent llvm-cov from using too many threads

As reported here: https://reviews.llvm.org/D75153#1987272

Before, each instance of llvm-cov was creating one thread per hardware core, which wasn't needed probably because the number of inputs were small. This was probably causing a thread rlimit issue on large core count systems.

After this patch, the previous behavior is restored (to what was before rG8404aeb5):

If --num-threads is not specified, we create one thread per input, up to num.cores.
When specified, --num-threads indicates any number of threads, with no upper limit.

Differential Revision: https://reviews.llvm.org/D78408
---
 llvm/include/llvm/Support/Threading.h        |  4 ++++
 llvm/lib/Support/Threading.cpp               |  9 +++++----
 llvm/tools/dsymutil/dsymutil.cpp             | 14 +++++++++-----
 llvm/tools/llvm-cov/CodeCoverage.cpp         | 16 +++++++++-------
 llvm/tools/llvm-cov/CoverageExporterJson.cpp | 12 ++++++++----
 llvm/tools/llvm-cov/CoverageReport.cpp       | 13 ++++++++-----
 6 files changed, 43 insertions(+), 25 deletions(-)

diff --git a/llvm/include/llvm/Support/Threading.h b/llvm/include/llvm/Support/Threading.h
index f133c79..1300057 100644
--- a/llvm/include/llvm/Support/Threading.h
+++ b/llvm/include/llvm/Support/Threading.h
@@ -157,6 +157,10 @@ void llvm_execute_on_thread_async(
     // std::thread per core.
     bool UseHyperThreads = true;
 
+    // If set, will constrain 'ThreadsRequested' to the number of hardware
+    // threads, or hardware cores.
+    bool Limit = false;
+
     /// Retrieves the max available threads for the current strategy. This
     /// accounts for affinity masks and takes advantage of all CPU sockets.
     unsigned compute_thread_count() const;
diff --git a/llvm/lib/Support/Threading.cpp b/llvm/lib/Support/Threading.cpp
index 4bf373d..61f8ee5 100644
--- a/llvm/lib/Support/Threading.cpp
+++ b/llvm/lib/Support/Threading.cpp
@@ -84,14 +84,15 @@ void llvm::llvm_execute_on_thread_async(
 int computeHostNumHardwareThreads();
 
 unsigned llvm::ThreadPoolStrategy::compute_thread_count() const {
-  if (ThreadsRequested > 0)
-    return ThreadsRequested;
-
   int MaxThreadCount = UseHyperThreads ? computeHostNumHardwareThreads()
                                        : sys::getHostNumPhysicalCores();
   if (MaxThreadCount <= 0)
     MaxThreadCount = 1;
-  return MaxThreadCount;
+  if (ThreadsRequested == 0)
+    return MaxThreadCount;
+  if (!Limit)
+    return ThreadsRequested;
+  return std::min((unsigned)MaxThreadCount, ThreadsRequested);
 }
 
 namespace {
diff --git a/llvm/tools/dsymutil/dsymutil.cpp b/llvm/tools/dsymutil/dsymutil.cpp
index 8b70673..b0a6e7a 100644
--- a/llvm/tools/dsymutil/dsymutil.cpp
+++ b/llvm/tools/dsymutil/dsymutil.cpp
@@ -547,10 +547,14 @@ int main(int argc, char **argv) {
     // Shared a single binary holder for all the link steps.
     BinaryHolder BinHolder;
 
-    unsigned ThreadCount = Options.LinkOpts.Threads;
-    if (!ThreadCount)
-      ThreadCount = DebugMapPtrsOrErr->size();
-    ThreadPool Threads(hardware_concurrency(ThreadCount));
+    ThreadPoolStrategy S = hardware_concurrency(Options.LinkOpts.Threads);
+    if (Options.LinkOpts.Threads == 0) {
+      // If NumThreads is not specified, create one thread for each input, up to
+      // the number of hardware threads.
+      S.ThreadsRequested = DebugMapPtrsOrErr->size();
+      S.Limit = true;
+    }
+    ThreadPool Threads(S);
 
     // If there is more than one link to execute, we need to generate
     // temporary files.
@@ -625,7 +629,7 @@ int main(int argc, char **argv) {
       // FIXME: The DwarfLinker can have some very deep recursion that can max
       // out the (significantly smaller) stack when using threads. We don't
       // want this limitation when we only have a single thread.
-      if (ThreadCount == 1)
+      if (S.ThreadsRequested == 1)
         LinkLambda(OS, Options.LinkOpts);
       else
         Threads.async(LinkLambda, OS, Options.LinkOpts);
diff --git a/llvm/tools/llvm-cov/CodeCoverage.cpp b/llvm/tools/llvm-cov/CodeCoverage.cpp
index b2024e9..1d46443 100644
--- a/llvm/tools/llvm-cov/CodeCoverage.cpp
+++ b/llvm/tools/llvm-cov/CodeCoverage.cpp
@@ -943,19 +943,21 @@ int CodeCoverageTool::doShow(int argc, const char **argv,
       (SourceFiles.size() != 1) || ViewOpts.hasOutputDirectory() ||
       (ViewOpts.Format == CoverageViewOptions::OutputFormat::HTML);
 
-  auto NumThreads = ViewOpts.NumThreads;
-
-  // If NumThreads is not specified, auto-detect a good default.
-  if (NumThreads == 0)
-    NumThreads = SourceFiles.size();
+  ThreadPoolStrategy S = hardware_concurrency(ViewOpts.NumThreads);
+  if (ViewOpts.NumThreads == 0) {
+    // If NumThreads is not specified, create one thread for each input, up to
+    // the number of hardware cores.
+    S = heavyweight_hardware_concurrency(SourceFiles.size());
+    S.Limit = true;
+  }
 
-  if (!ViewOpts.hasOutputDirectory() || NumThreads == 1) {
+  if (!ViewOpts.hasOutputDirectory() || S.ThreadsRequested == 1) {
     for (const std::string &SourceFile : SourceFiles)
       writeSourceFileView(SourceFile, Coverage.get(), Printer.get(),
                           ShowFilenames);
   } else {
     // In -output-dir mode, it's safe to use multiple threads to print files.
-    ThreadPool Pool(heavyweight_hardware_concurrency(NumThreads));
+    ThreadPool Pool(S);
     for (const std::string &SourceFile : SourceFiles)
       Pool.async(&CodeCoverageTool::writeSourceFileView, this, SourceFile,
                  Coverage.get(), Printer.get(), ShowFilenames);
diff --git a/llvm/tools/llvm-cov/CoverageExporterJson.cpp b/llvm/tools/llvm-cov/CoverageExporterJson.cpp
index c5d1b24..c8bb1aa 100644
--- a/llvm/tools/llvm-cov/CoverageExporterJson.cpp
+++ b/llvm/tools/llvm-cov/CoverageExporterJson.cpp
@@ -163,10 +163,14 @@ json::Array renderFiles(const coverage::CoverageMapping &Coverage,
                         ArrayRef<std::string> SourceFiles,
                         ArrayRef<FileCoverageSummary> FileReports,
                         const CoverageViewOptions &Options) {
-  auto NumThreads = Options.NumThreads;
-  if (NumThreads == 0)
-    NumThreads = SourceFiles.size();
-  ThreadPool Pool(heavyweight_hardware_concurrency(NumThreads));
+  ThreadPoolStrategy S = hardware_concurrency(Options.NumThreads);
+  if (Options.NumThreads == 0) {
+    // If NumThreads is not specified, create one thread for each input, up to
+    // the number of hardware cores.
+    S = heavyweight_hardware_concurrency(SourceFiles.size());
+    S.Limit = true;
+  }
+  ThreadPool Pool(S);
   json::Array FileArray;
   std::mutex FileArrayMutex;
 
diff --git a/llvm/tools/llvm-cov/CoverageReport.cpp b/llvm/tools/llvm-cov/CoverageReport.cpp
index 187e2dc..8509710 100644
--- a/llvm/tools/llvm-cov/CoverageReport.cpp
+++ b/llvm/tools/llvm-cov/CoverageReport.cpp
@@ -352,12 +352,15 @@ std::vector<FileCoverageSummary> CoverageReport::prepareFileReports(
     ArrayRef<std::string> Files, const CoverageViewOptions &Options,
     const CoverageFilter &Filters) {
   unsigned LCP = getRedundantPrefixLen(Files);
-  auto NumThreads = Options.NumThreads;
 
-  // If NumThreads is not specified, auto-detect a good default.
-  if (NumThreads == 0)
-    NumThreads = Files.size();
-  ThreadPool Pool(heavyweight_hardware_concurrency(NumThreads));
+  ThreadPoolStrategy S = hardware_concurrency(Options.NumThreads);
+  if (Options.NumThreads == 0) {
+    // If NumThreads is not specified, create one thread for each input, up to
+    // the number of hardware cores.
+    S = heavyweight_hardware_concurrency(Files.size());
+    S.Limit = true;
+  }
+  ThreadPool Pool(S);
 
   std::vector<FileCoverageSummary> FileReports;
   FileReports.reserve(Files.size());
-- 
2.7.4