This roughly makes hidden visibility work like it does for other object
file formats.
+* When using multi-threaded LLVM tools (such as LLD) on a Windows host with a
+ large number of processors or CPU sockets, previously the LLVM ThreadPool
+ would span out threads to use all processors.
+ Starting with Windows Server 2022 and Windows 11, the behavior has changed,
+ the OS now spans out threads automatically to all processors. This also fixes
+ an affinity mask issue.
+ (`D138747 <https://reviews.llvm.org/D138747>`_)
+
Changes to the X86 Backend
--------------------------
/// yet have VersionHelpers.h, so we have our own helper.
bool RunningWindows8OrGreater();
+/// Determines if the program is running on Windows 11 or Windows Server 2022.
+bool RunningWindows11OrGreater();
+
/// Returns the Windows version as Major.Minor.0.BuildNumber. Uses
/// RtlGetVersion or GetVersionEx under the hood depending on what is available.
/// GetVersionEx is deprecated, but this API exposes the build number which can
typedef NTSTATUS(WINAPI *RtlGetVersionPtr)(PRTL_OSVERSIONINFOW);
#define STATUS_SUCCESS ((NTSTATUS)0x00000000L)
-llvm::VersionTuple llvm::GetWindowsOSVersion() {
- HMODULE hMod = ::GetModuleHandleW(L"ntdll.dll");
- if (hMod) {
+static RTL_OSVERSIONINFOEXW GetWindowsVer() {
+ auto getVer = []() -> RTL_OSVERSIONINFOEXW {
+ HMODULE hMod = ::GetModuleHandleW(L"ntdll.dll");
+ assert(hMod);
+
auto getVer = (RtlGetVersionPtr)::GetProcAddress(hMod, "RtlGetVersion");
- if (getVer) {
- RTL_OSVERSIONINFOEXW info{};
- info.dwOSVersionInfoSize = sizeof(info);
- if (getVer((PRTL_OSVERSIONINFOW)&info) == STATUS_SUCCESS) {
- return llvm::VersionTuple(info.dwMajorVersion, info.dwMinorVersion, 0,
- info.dwBuildNumber);
- }
- }
- }
- return llvm::VersionTuple(0, 0, 0, 0);
+ assert(getVer);
+
+ RTL_OSVERSIONINFOEXW info{};
+ info.dwOSVersionInfoSize = sizeof(info);
+ NTSTATUS r = getVer((PRTL_OSVERSIONINFOW)&info);
+ (void)r;
+ assert(r == STATUS_SUCCESS);
+
+ return info;
+ };
+ static RTL_OSVERSIONINFOEXW info = getVer();
+ return info;
+}
+
+llvm::VersionTuple llvm::GetWindowsOSVersion() {
+ RTL_OSVERSIONINFOEXW info = GetWindowsVer();
+ return llvm::VersionTuple(info.dwMajorVersion, info.dwMinorVersion, 0,
+ info.dwBuildNumber);
}
bool llvm::RunningWindows8OrGreater() {
return GetWindowsOSVersion() >= llvm::VersionTuple(6, 2, 0, 0);
}
+bool llvm::RunningWindows11OrGreater() {
+ RTL_OSVERSIONINFOEXW info = GetWindowsVer();
+ auto ver = llvm::VersionTuple(info.dwMajorVersion, info.dwMinorVersion, 0,
+ info.dwBuildNumber);
+
+ // Windows Server 2022
+ if (info.wProductType == VER_NT_SERVER)
+ return ver >= llvm::VersionTuple(10, 0, 0, 20348);
+
+ // Windows 11
+ return ver >= llvm::VersionTuple(10, 0, 0, 22000);
+}
+
[[noreturn]] void Process::ExitNoCleanup(int RetCode) {
TerminateProcess(GetCurrentProcess(), RetCode);
llvm_unreachable("TerminateProcess doesn't return");
return true;
}
+static std::optional<std::vector<USHORT>> getActiveGroups() {
+ USHORT Count = 0;
+ if (::GetProcessGroupAffinity(GetCurrentProcess(), &Count, nullptr))
+ return std::nullopt;
+
+ if (GetLastError() != ERROR_INSUFFICIENT_BUFFER)
+ return std::nullopt;
+
+ std::vector<USHORT> Groups;
+ Groups.resize(Count);
+ if (!::GetProcessGroupAffinity(GetCurrentProcess(), &Count, Groups.data()))
+ return std::nullopt;
+
+ return Groups;
+}
+
static ArrayRef<ProcessorGroup> getProcessorGroups() {
auto computeGroups = []() {
SmallVector<ProcessorGroup, 4> Groups;
if (!IterateProcInfo(RelationProcessorCore, HandleProc))
return std::vector<ProcessorGroup>();
+ auto ActiveGroups = getActiveGroups();
+ if (!ActiveGroups)
+ return std::vector<ProcessorGroup>();
+
// If there's an affinity mask set, assume the user wants to constrain the
// current process to only a single CPU group. On Windows, it is not
// possible for affinity masks to cross CPU group boundaries.
DWORD_PTR ProcessAffinityMask = 0, SystemAffinityMask = 0;
if (::GetProcessAffinityMask(GetCurrentProcess(), &ProcessAffinityMask,
- &SystemAffinityMask) &&
- ProcessAffinityMask != SystemAffinityMask) {
- // We don't expect more that 4 CPU groups on Windows (256 processors).
- USHORT GroupCount = 4;
- USHORT GroupArray[4]{};
- if (::GetProcessGroupAffinity(GetCurrentProcess(), &GroupCount,
- GroupArray)) {
- assert(GroupCount == 1 &&
- "On startup, a program is expected to be assigned only to "
- "one processor group!");
- unsigned CurrentGroupID = GroupArray[0];
+ &SystemAffinityMask)) {
+
+ if (ProcessAffinityMask != SystemAffinityMask) {
+ if (llvm::RunningWindows11OrGreater() && ActiveGroups->size() > 1) {
+ // The process affinity mask is spurious, due to an OS bug, ignore it.
+ return std::vector<ProcessorGroup>(Groups.begin(), Groups.end());
+ }
+
+ assert(ActiveGroups->size() == 1 &&
+ "When an affinity mask is set, the process is expected to be "
+ "assigned to a single processor group!");
+
+ unsigned CurrentGroupID = (*ActiveGroups)[0];
ProcessorGroup NewG{Groups[CurrentGroupID]};
NewG.Affinity = ProcessAffinityMask;
NewG.UsableThreads = countPopulation(ProcessAffinityMask);
Groups.push_back(NewG);
}
}
-
return std::vector<ProcessorGroup>(Groups.begin(), Groups.end());
};
static auto Groups = computeGroups();
// Assign the current thread to a more appropriate CPU socket or CPU group
void llvm::ThreadPoolStrategy::apply_thread_strategy(
unsigned ThreadPoolNum) const {
+
+ // After Windows 11 and Windows Server 2022, let the OS do the scheduling,
+ // since a process automatically gains access to all processor groups.
+ if (llvm::RunningWindows11OrGreater())
+ return;
+
std::optional<unsigned> Socket = compute_cpu_socket(ThreadPoolNum);
if (!Socket)
return;
#include "llvm/Support/TargetSelect.h"
#include "llvm/Support/Threading.h"
+#ifdef _WIN32
+#include "llvm/Support/Windows/WindowsSupport.h"
+#endif
+
#include <chrono>
#include <thread>
TEST_F(ThreadPoolTest, AllThreads_UseAllRessources) {
CHECK_UNSUPPORTED();
+ // After Windows 11, the OS is free to deploy the threads on any CPU socket.
+ // We cannot relibly ensure that all thread affinity mask are covered,
+ // therefore this test should not run.
+ if (llvm::RunningWindows11OrGreater())
+ return;
std::vector<llvm::BitVector> ThreadsUsed = RunOnAllSockets({});
ASSERT_EQ(llvm::get_cpus(), ThreadsUsed.size());
}
TEST_F(ThreadPoolTest, AllThreads_OneThreadPerCore) {
CHECK_UNSUPPORTED();
+ // After Windows 11, the OS is free to deploy the threads on any CPU socket.
+ // We cannot relibly ensure that all thread affinity mask are covered,
+ // therefore this test should not run.
+ if (llvm::RunningWindows11OrGreater())
+ return;
std::vector<llvm::BitVector> ThreadsUsed =
RunOnAllSockets(llvm::heavyweight_hardware_concurrency());
ASSERT_EQ(llvm::get_cpus(), ThreadsUsed.size());