[Support] On Windows 11 and Windows Server 2022, fix an affinity mask issue on large...

author Alexandre Ganea <alex_toresh@yahoo.fr>

Thu, 5 Jan 2023 20:27:30 +0000 (15:27 -0500)

committer Alexandre Ganea <alex_toresh@yahoo.fr>

Fri, 6 Jan 2023 22:03:43 +0000 (17:03 -0500)
author Alexandre Ganea <alex_toresh@yahoo.fr>
Thu, 5 Jan 2023 20:27:30 +0000 (15:27 -0500)
committer Alexandre Ganea <alex_toresh@yahoo.fr>
Fri, 6 Jan 2023 22:03:43 +0000 (17:03 -0500)
diff --git a/llvm/docs/ReleaseNotes.rst b/llvm/docs/ReleaseNotes.rst

index 30d3d7e5be6af7b053fe76f4a9cd72acae649e87..53ea9cd3f0bef97324bd6e9de9e7db0dbe68101d 100644 (file)
--- a/llvm/docs/ReleaseNotes.rst
+++ b/llvm/docs/ReleaseNotes.rst
@@ -176,6 +176,14 @@ Changes to the Windows Target
    This roughly makes hidden visibility work like it does for other object
    file formats.
  
+* When using multi-threaded LLVM tools (such as LLD) on a Windows host with a
+  large number of processors or CPU sockets, previously the LLVM ThreadPool
+  would span out threads to use all processors.
+  Starting with Windows Server 2022 and Windows 11, the behavior has changed,
+  the OS now spans out threads automatically to all processors. This also fixes
+  an affinity mask issue.
+  (`D138747 <https://reviews.llvm.org/D138747>`_)
+
  Changes to the X86 Backend
  --------------------------
  
diff --git a/llvm/include/llvm/Support/Windows/WindowsSupport.h b/llvm/include/llvm/Support/Windows/WindowsSupport.h

index 917822678e979690dc09173510f8c62569e467f1..d3aacd14b2097b1e7e13c1003987c1fd52e0cf76 100644 (file)
--- a/llvm/include/llvm/Support/Windows/WindowsSupport.h
+++ b/llvm/include/llvm/Support/Windows/WindowsSupport.h
@@ -59,6 +59,9 @@ namespace llvm {
  /// yet have VersionHelpers.h, so we have our own helper.
  bool RunningWindows8OrGreater();
  
+/// Determines if the program is running on Windows 11 or Windows Server 2022.
+bool RunningWindows11OrGreater();
+
  /// Returns the Windows version as Major.Minor.0.BuildNumber. Uses
  /// RtlGetVersion or GetVersionEx under the hood depending on what is available.
  /// GetVersionEx is deprecated, but this API exposes the build number which can
diff --git a/llvm/lib/Support/Windows/Process.inc b/llvm/lib/Support/Windows/Process.inc

index 4786199b4d9e3ab25a03d812d928ac126e78befe..493209052a1c547bd99086eabd43e85f53d1db0e 100644 (file)
--- a/llvm/lib/Support/Windows/Process.inc
+++ b/llvm/lib/Support/Windows/Process.inc
@@ -477,20 +477,30 @@ unsigned Process::GetRandomNumber() {
  typedef NTSTATUS(WINAPI *RtlGetVersionPtr)(PRTL_OSVERSIONINFOW);
  #define STATUS_SUCCESS ((NTSTATUS)0x00000000L)
  
-llvm::VersionTuple llvm::GetWindowsOSVersion() {
-  HMODULE hMod = ::GetModuleHandleW(L"ntdll.dll");
-  if (hMod) {
+static RTL_OSVERSIONINFOEXW GetWindowsVer() {
+  auto getVer = []() -> RTL_OSVERSIONINFOEXW {
+    HMODULE hMod = ::GetModuleHandleW(L"ntdll.dll");
+    assert(hMod);
+
      auto getVer = (RtlGetVersionPtr)::GetProcAddress(hMod, "RtlGetVersion");
-    if (getVer) {
-      RTL_OSVERSIONINFOEXW info{};
-      info.dwOSVersionInfoSize = sizeof(info);
-      if (getVer((PRTL_OSVERSIONINFOW)&info) == STATUS_SUCCESS) {
-        return llvm::VersionTuple(info.dwMajorVersion, info.dwMinorVersion, 0,
-                                  info.dwBuildNumber);
-      }
-    }
-  }
-  return llvm::VersionTuple(0, 0, 0, 0);
+    assert(getVer);
+
+    RTL_OSVERSIONINFOEXW info{};
+    info.dwOSVersionInfoSize = sizeof(info);
+    NTSTATUS r = getVer((PRTL_OSVERSIONINFOW)&info);
+    (void)r;
+    assert(r == STATUS_SUCCESS);
+
+    return info;
+  };
+  static RTL_OSVERSIONINFOEXW info = getVer();
+  return info;
+}
+
+llvm::VersionTuple llvm::GetWindowsOSVersion() {
+  RTL_OSVERSIONINFOEXW info = GetWindowsVer();
+  return llvm::VersionTuple(info.dwMajorVersion, info.dwMinorVersion, 0,
+                            info.dwBuildNumber);
  }
  
  bool llvm::RunningWindows8OrGreater() {
@@ -498,6 +508,19 @@ bool llvm::RunningWindows8OrGreater() {
    return GetWindowsOSVersion() >= llvm::VersionTuple(6, 2, 0, 0);
  }
  
+bool llvm::RunningWindows11OrGreater() {
+  RTL_OSVERSIONINFOEXW info = GetWindowsVer();
+  auto ver = llvm::VersionTuple(info.dwMajorVersion, info.dwMinorVersion, 0,
+                                info.dwBuildNumber);
+
+  // Windows Server 2022
+  if (info.wProductType == VER_NT_SERVER)
+    return ver >= llvm::VersionTuple(10, 0, 0, 20348);
+
+  // Windows 11
+  return ver >= llvm::VersionTuple(10, 0, 0, 22000);
+}
+
  [[noreturn]] void Process::ExitNoCleanup(int RetCode) {
    TerminateProcess(GetCurrentProcess(), RetCode);
    llvm_unreachable("TerminateProcess doesn't return");
diff --git a/llvm/lib/Support/Windows/Threading.inc b/llvm/lib/Support/Windows/Threading.inc

index 2c16fe442b70318eb58a933a2aa03ccff41b5a40..aa47484cb5ceca2d07bcc62f665a2e186536aae0 100644 (file)
--- a/llvm/lib/Support/Windows/Threading.inc
+++ b/llvm/lib/Support/Windows/Threading.inc
@@ -159,6 +159,22 @@ static bool IterateProcInfo(LOGICAL_PROCESSOR_RELATIONSHIP Relationship, F Fn) {
    return true;
  }
  
+static std::optional<std::vector<USHORT>> getActiveGroups() {
+  USHORT Count = 0;
+  if (::GetProcessGroupAffinity(GetCurrentProcess(), &Count, nullptr))
+    return std::nullopt;
+
+  if (GetLastError() != ERROR_INSUFFICIENT_BUFFER)
+    return std::nullopt;
+
+  std::vector<USHORT> Groups;
+  Groups.resize(Count);
+  if (!::GetProcessGroupAffinity(GetCurrentProcess(), &Count, Groups.data()))
+    return std::nullopt;
+
+  return Groups;
+}
+
  static ArrayRef<ProcessorGroup> getProcessorGroups() {
    auto computeGroups = []() {
      SmallVector<ProcessorGroup, 4> Groups;
@@ -193,22 +209,28 @@ static ArrayRef<ProcessorGroup> getProcessorGroups() {
      if (!IterateProcInfo(RelationProcessorCore, HandleProc))
        return std::vector<ProcessorGroup>();
  
+    auto ActiveGroups = getActiveGroups();
+    if (!ActiveGroups)
+      return std::vector<ProcessorGroup>();
+
      // If there's an affinity mask set, assume the user wants to constrain the
      // current process to only a single CPU group. On Windows, it is not
      // possible for affinity masks to cross CPU group boundaries.
      DWORD_PTR ProcessAffinityMask = 0, SystemAffinityMask = 0;
      if (::GetProcessAffinityMask(GetCurrentProcess(), &ProcessAffinityMask,
-                                 &SystemAffinityMask) &&
-        ProcessAffinityMask != SystemAffinityMask) {
-      // We don't expect more that 4 CPU groups on Windows (256 processors).
-      USHORT GroupCount = 4;
-      USHORT GroupArray[4]{};
-      if (::GetProcessGroupAffinity(GetCurrentProcess(), &GroupCount,
-                                    GroupArray)) {
-        assert(GroupCount == 1 &&
-               "On startup, a program is expected to be assigned only to "
-               "one processor group!");
-        unsigned CurrentGroupID = GroupArray[0];
+                                 &SystemAffinityMask)) {
+
+      if (ProcessAffinityMask != SystemAffinityMask) {
+        if (llvm::RunningWindows11OrGreater() && ActiveGroups->size() > 1) {
+          // The process affinity mask is spurious, due to an OS bug, ignore it.
+          return std::vector<ProcessorGroup>(Groups.begin(), Groups.end());
+        }
+
+        assert(ActiveGroups->size() == 1 &&
+               "When an affinity mask is set, the process is expected to be "
+               "assigned to a single processor group!");
+
+        unsigned CurrentGroupID = (*ActiveGroups)[0];
          ProcessorGroup NewG{Groups[CurrentGroupID]};
          NewG.Affinity = ProcessAffinityMask;
          NewG.UsableThreads = countPopulation(ProcessAffinityMask);
@@ -216,7 +238,6 @@ static ArrayRef<ProcessorGroup> getProcessorGroups() {
          Groups.push_back(NewG);
        }
      }
-
      return std::vector<ProcessorGroup>(Groups.begin(), Groups.end());
    };
    static auto Groups = computeGroups();
@@ -273,6 +294,12 @@ llvm::ThreadPoolStrategy::compute_cpu_socket(unsigned ThreadPoolNum) const {
  // Assign the current thread to a more appropriate CPU socket or CPU group
  void llvm::ThreadPoolStrategy::apply_thread_strategy(
      unsigned ThreadPoolNum) const {
+
+  // After Windows 11 and Windows Server 2022, let the OS do the scheduling,
+  // since a process automatically gains access to all processor groups.
+  if (llvm::RunningWindows11OrGreater())
+    return;
+
    std::optional<unsigned> Socket = compute_cpu_socket(ThreadPoolNum);
    if (!Socket)
      return;
diff --git a/llvm/unittests/Support/ThreadPool.cpp b/llvm/unittests/Support/ThreadPool.cpp

index fd9d7272e7e0b6bef4cdd37cc530d6137dd0d4f4..faaeea6d80fd90cf736e27413d6f2dcf916732a7 100644 (file)
--- a/llvm/unittests/Support/ThreadPool.cpp
+++ b/llvm/unittests/Support/ThreadPool.cpp
@@ -18,6 +18,10 @@
  #include "llvm/Support/TargetSelect.h"
  #include "llvm/Support/Threading.h"
  
+#ifdef _WIN32
+#include "llvm/Support/Windows/WindowsSupport.h"
+#endif
+
  #include <chrono>
  #include <thread>
  
@@ -378,12 +382,22 @@ ThreadPoolTest::RunOnAllSockets(ThreadPoolStrategy S) {
  
  TEST_F(ThreadPoolTest, AllThreads_UseAllRessources) {
    CHECK_UNSUPPORTED();
+  // After Windows 11, the OS is free to deploy the threads on any CPU socket.
+  // We cannot relibly ensure that all thread affinity mask are covered,
+  // therefore this test should not run.
+  if (llvm::RunningWindows11OrGreater())
+    return;
    std::vector<llvm::BitVector> ThreadsUsed = RunOnAllSockets({});
    ASSERT_EQ(llvm::get_cpus(), ThreadsUsed.size());
  }
  
  TEST_F(ThreadPoolTest, AllThreads_OneThreadPerCore) {
    CHECK_UNSUPPORTED();
+  // After Windows 11, the OS is free to deploy the threads on any CPU socket.
+  // We cannot relibly ensure that all thread affinity mask are covered,
+  // therefore this test should not run.
+  if (llvm::RunningWindows11OrGreater())
+    return;
    std::vector<llvm::BitVector> ThreadsUsed =
        RunOnAllSockets(llvm::heavyweight_hardware_concurrency());
    ASSERT_EQ(llvm::get_cpus(), ThreadsUsed.size());
author	Alexandre Ganea <alex_toresh@yahoo.fr>
	Thu, 5 Jan 2023 20:27:30 +0000 (15:27 -0500)
committer	Alexandre Ganea <alex_toresh@yahoo.fr>
	Fri, 6 Jan 2023 22:03:43 +0000 (17:03 -0500)
llvm/docs/ReleaseNotes.rst		patch \| blob \| history
llvm/include/llvm/Support/Windows/WindowsSupport.h		patch \| blob \| history
llvm/lib/Support/Windows/Process.inc		patch \| blob \| history
llvm/lib/Support/Windows/Threading.inc		patch \| blob \| history
llvm/unittests/Support/ThreadPool.cpp		patch \| blob \| history