BEGIN_QCALL;
+#ifndef FEATURE_PAL
CPUGroupInfo::EnsureInitialized();
if(CPUGroupInfo::CanEnableThreadUseAllCpuGroups())
{
processorCount = CPUGroupInfo::GetNumActiveProcessors();
}
-
+#endif // !FEATURE_PAL
// Processor count will be 0 if CPU groups are disabled/not supported
if(processorCount == 0)
{
public:
+ static const size_t BitsetDataSize = MAX_SUPPORTED_CPUS / BitsPerBitsetEntry;
+
AffinitySet()
{
memset(m_bitset, 0, sizeof(m_bitset));
}
+ uintptr_t* GetBitsetData()
+ {
+ return m_bitset;
+ }
+
// Check if the set contains a processor
bool Contains(size_t cpuIndex) const
{
// Is NUMA support available
static bool CanEnableGCNumaAware();
- // Gets the NUMA node for the processor
- static bool GetNumaProcessorNode(uint16_t proc_no, uint16_t *node_no);
-
// Get processor number and optionally its NUMA node number for the specified heap number
// Parameters:
// heap_number - heap number to get the result for
#include "globals.h"
#include "cgroup.h"
+#if HAVE_NUMA_H
+
+#include <numa.h>
+#include <numaif.h>
+#include <dlfcn.h>
+
+// List of all functions from the numa library that are used
+#define FOR_ALL_NUMA_FUNCTIONS \
+ PER_FUNCTION_BLOCK(mbind) \
+ PER_FUNCTION_BLOCK(numa_available) \
+ PER_FUNCTION_BLOCK(numa_max_node) \
+ PER_FUNCTION_BLOCK(numa_node_of_cpu)
+
+// Declare pointers to all the used numa functions
+#define PER_FUNCTION_BLOCK(fn) extern decltype(fn)* fn##_ptr;
+FOR_ALL_NUMA_FUNCTIONS
+#undef PER_FUNCTION_BLOCK
+
+// Redefine all calls to numa functions as calls through pointers that are set
+// to the functions of libnuma in the initialization.
+#define mbind(...) mbind_ptr(__VA_ARGS__)
+#define numa_available() numa_available_ptr()
+#define numa_max_node() numa_max_node_ptr()
+#define numa_node_of_cpu(...) numa_node_of_cpu_ptr(__VA_ARGS__)
+
+#endif // HAVE_NUMA_H
+
#if defined(_ARM_) || defined(_ARM64_)
#define SYSCONF_GET_NUMPROCS _SC_NPROCESSORS_CONF
#else
AffinitySet g_processAffinitySet;
+#if HAVE_CPUSET_T
+typedef cpuset_t cpu_set_t;
+#endif
+
+// The highest NUMA node available
+int g_highestNumaNode = 0;
+// Is numa available
+bool g_numaAvailable = false;
+
+void* g_numaHandle = nullptr;
+
+#if HAVE_NUMA_H
+#define PER_FUNCTION_BLOCK(fn) decltype(fn)* fn##_ptr;
+FOR_ALL_NUMA_FUNCTIONS
+#undef PER_FUNCTION_BLOCK
+#endif // HAVE_NUMA_H
+
+
+// Initialize data structures for getting and setting thread affinities to processors and
+// querying NUMA related processor information.
+// On systems with no NUMA support, it behaves as if there was a single NUMA node with
+// a single group of processors.
+void NUMASupportInitialize()
+{
+#if HAVE_NUMA_H
+ g_numaHandle = dlopen("libnuma.so", RTLD_LAZY);
+ if (g_numaHandle == 0)
+ {
+ g_numaHandle = dlopen("libnuma.so.1", RTLD_LAZY);
+ }
+ if (g_numaHandle != 0)
+ {
+ dlsym(g_numaHandle, "numa_allocate_cpumask");
+#define PER_FUNCTION_BLOCK(fn) \
+ fn##_ptr = (decltype(fn)*)dlsym(g_numaHandle, #fn); \
+ if (fn##_ptr == NULL) { fprintf(stderr, "Cannot get symbol " #fn " from libnuma\n"); abort(); }
+FOR_ALL_NUMA_FUNCTIONS
+#undef PER_FUNCTION_BLOCK
+
+ if (numa_available() == -1)
+ {
+ dlclose(g_numaHandle);
+ }
+ else
+ {
+ g_numaAvailable = true;
+ g_highestNumaNode = numa_max_node();
+ }
+ }
+#endif // HAVE_NUMA_H
+ if (!g_numaAvailable)
+ {
+ // No NUMA
+ g_highestNumaNode = 0;
+ }
+}
+
+// Cleanup of the NUMA support data structures
+void NUMASupportCleanup()
+{
+#if HAVE_NUMA_H
+ if (g_numaAvailable)
+ {
+ dlclose(g_numaHandle);
+ }
+#endif // HAVE_NUMA_H
+}
+
// Initialize the interface implementation
// Return:
// true if it has succeeded, false if it has failed
#endif // HAVE_SCHED_GETAFFINITY
+ NUMASupportInitialize();
+
return true;
}
munmap(g_helperPage, OS_PAGE_SIZE);
CleanupCGroup();
+ NUMASupportCleanup();
}
// Get numeric id of the current thread if possible on the
// true if it has succeeded, false if it has failed
bool GCToOSInterface::VirtualCommit(void* address, size_t size, uint16_t node)
{
- assert(node == NUMA_NODE_UNDEFINED && "Numa allocation is not ported to local GC on unix yet");
- return mprotect(address, size, PROT_WRITE | PROT_READ) == 0;
+ bool success = mprotect(address, size, PROT_WRITE | PROT_READ) == 0;
+
+#if HAVE_NUMA_H
+ if (success && g_numaAvailable && (node != NUMA_NODE_UNDEFINED))
+ {
+ if ((int)node <= g_highestNumaNode)
+ {
+ int nodeMaskLength = (g_highestNumaNode + 1 + sizeof(unsigned long) - 1) / sizeof(unsigned long);
+ unsigned long *nodeMask = (unsigned long*)alloca(nodeMaskLength * sizeof(unsigned long));
+ memset(nodeMask, 0, nodeMaskLength);
+
+ int index = node / sizeof(unsigned long);
+ int mask = ((unsigned long)1) << (node & (sizeof(unsigned long) - 1));
+ nodeMask[index] = mask;
+
+ int st = mbind(address, size, MPOL_PREFERRED, nodeMask, g_highestNumaNode, 0);
+ assert(st == 0);
+ // If the mbind fails, we still return the allocated memory since the node is just a hint
+ }
+ }
+#endif // HAVE_NUMA_H
+
+ return success;
}
// Decomit virtual memory range.
bool GCToOSInterface::CanEnableGCNumaAware()
{
- return false;
-}
-
-bool GCToOSInterface::GetNumaProcessorNode(uint16_t proc_no, uint16_t *node_no)
-{
- assert(!"Numa has not been ported to local GC for unix");
- return false;
+ return g_numaAvailable;
}
// Get processor number and optionally its NUMA node number for the specified heap number
if (GCToOSInterface::CanEnableGCNumaAware())
{
- if (!GCToOSInterface::GetNumaProcessorNode(procNumber, node_no))
- {
- *node_no = NUMA_NODE_UNDEFINED;
- }
+ int result = numa_node_of_cpu(procNumber);
+ *node_no = (result >= 0) ? (uint16_t)result : NUMA_NODE_UNDEFINED;
}
else
{
return g_fEnableGCNumaAware;
}
-bool GCToOSInterface::GetNumaProcessorNode(uint16_t proc_no, uint16_t *node_no)
-{
- GroupProcNo groupProcNo(proc_no);
-
- PROCESSOR_NUMBER procNumber;
- procNumber.Group = groupProcNo.GetGroup();
- procNumber.Number = (BYTE)groupProcNo.GetProcIndex();
- procNumber.Reserved = 0;
-
- assert(g_fEnableGCNumaAware);
- return ::GetNumaProcessorNodeEx(&procNumber, node_no) != FALSE;
-}
-
// Get processor number and optionally its NUMA node number for the specified heap number
// Parameters:
// heap_number - heap number to get the result for
{
bool success = false;
- if (CanEnableGCCPUGroups())
+ // Locate heap_number-th available processor
+ uint16_t procNumber;
+ size_t cnt = heap_number;
+ for (uint16_t i = 0; i < GCToOSInterface::GetTotalProcessorCount(); i++)
{
- uint16_t gn, gpn;
- GetGroupForProcessor((uint16_t)heap_number, &gn, &gpn);
-
- *proc_no = GroupProcNo(gn, gpn).GetCombinedValue();
-
- if (GCToOSInterface::CanEnableGCNumaAware())
+ if (g_processAffinitySet.Contains(i))
{
- if (!GCToOSInterface::GetNumaProcessorNode(*proc_no, node_no))
+ if (cnt == 0)
{
- *node_no = NUMA_NODE_UNDEFINED;
+ procNumber = i;
+ success = true;
+ break;
}
+
+ cnt--;
+ }
+ }
+
+ if (success)
+ {
+ WORD gn, gpn;
+
+ if (CanEnableGCCPUGroups())
+ {
+ GetGroupForProcessor(procNumber, &gn, &gpn);
}
else
- { // no numa setting, each cpu group is treated as a node
- *node_no = gn;
+ {
+ gn = GroupProcNo::NoGroup;
+ gpn = procNumber;
}
- success = true;
- }
- else
- {
- int bit_number = 0;
- uint8_t proc_number = 0;
- for (uintptr_t mask = 1; mask != 0; mask <<= 1)
+ GroupProcNo groupProcNo(gn, gpn);
+ *proc_no = groupProcNo.GetCombinedValue();
+
+ if (GCToOSInterface::CanEnableGCNumaAware())
{
- if (g_processAffinitySet.Contains(proc_number))
+ PROCESSOR_NUMBER procNumber;
+
+ if (CanEnableGCCPUGroups())
{
- if (bit_number == heap_number)
- {
- *proc_no = GroupProcNo(GroupProcNo::NoGroup, proc_number).GetCombinedValue();
+ procNumber.Group = gn;
+ }
+ else
+ {
+ // Get the current processor group
+ PROCESSOR_NUMBER procNumber;
+ GetCurrentProcessorNumberEx(&procNumber);
+ }
- if (GCToOSInterface::CanEnableGCNumaAware())
- {
- if (!GCToOSInterface::GetNumaProcessorNode(proc_number, node_no))
- {
- *node_no = NUMA_NODE_UNDEFINED;
- }
- }
+ procNumber.Number = (BYTE)gpn;
+ procNumber.Reserved = 0;
- success = true;
- break;
- }
- bit_number++;
+ if (GetNumaProcessorNodeEx(&procNumber, node_no))
+ {
+ *node_no = NUMA_NODE_UNDEFINED;
}
- proc_number++;
+ }
+ else
+ { // no numa setting, each cpu group is treated as a node
+ *node_no = groupProcNo.GetGroup();
}
}
// Allocate free memory with specific alignment
//
LPVOID ClrVirtualAllocAligned(LPVOID lpAddress, SIZE_T dwSize, DWORD flAllocationType, DWORD flProtect, SIZE_T alignment);
-
-//******************************************************************************
-// Returns the number of processors that a process has been configured to run on
-//******************************************************************************
+
class NumaNodeInfo
{
private:
static LPVOID VirtualAllocExNuma(HANDLE hProc, LPVOID lpAddr, SIZE_T size,
DWORD allocType, DWORD prot, DWORD node);
+#ifndef FEATURE_PAL
static BOOL GetNumaProcessorNodeEx(PPROCESSOR_NUMBER proc_no, PUSHORT node_no);
+#else // !FEATURE_PAL
+ static BOOL GetNumaProcessorNodeEx(USHORT proc_no, PUSHORT node_no);
+#endif // !FEATURE_PAL
#endif
};
+#ifndef FEATURE_PAL
+
struct CPU_Group_Info
{
WORD nr_active; // at most 64
}
};
-int GetCurrentProcessCpuCount();
DWORD_PTR GetCurrentProcessCpuMask();
+#endif // !FEATURE_PAL
+
+//******************************************************************************
+// Returns the number of processors that a process has been configured to run on
+//******************************************************************************
+int GetCurrentProcessCpuCount();
+
uint32_t GetOsPageSize();
// NUMA related APIs
//
-typedef enum _PROCESSOR_CACHE_TYPE {
- CacheUnified,
- CacheInstruction,
- CacheData,
- CacheTrace
-} PROCESSOR_CACHE_TYPE;
-
-typedef struct _PROCESSOR_NUMBER {
- WORD Group;
- BYTE Number;
- BYTE Reserved;
-} PROCESSOR_NUMBER, *PPROCESSOR_NUMBER;
-
-typedef enum _LOGICAL_PROCESSOR_RELATIONSHIP {
- RelationProcessorCore,
- RelationNumaNode,
- RelationCache,
- RelationProcessorPackage,
- RelationGroup,
- RelationAll = 0xffff
-} LOGICAL_PROCESSOR_RELATIONSHIP;
-
-typedef ULONG_PTR KAFFINITY;
-
-#define ANYSIZE_ARRAY 1
-
-typedef struct _GROUP_AFFINITY {
- KAFFINITY Mask;
- WORD Group;
- WORD Reserved[3];
-} GROUP_AFFINITY, *PGROUP_AFFINITY;
-
-typedef struct _PROCESSOR_GROUP_INFO {
- BYTE MaximumProcessorCount;
- BYTE ActiveProcessorCount;
- BYTE Reserved[38];
- KAFFINITY ActiveProcessorMask;
-} PROCESSOR_GROUP_INFO, *PPROCESSOR_GROUP_INFO;
-
-typedef struct _PROCESSOR_RELATIONSHIP {
- BYTE Flags;
- BYTE EfficiencyClass;
- BYTE Reserved[21];
- WORD GroupCount;
- GROUP_AFFINITY GroupMask[ANYSIZE_ARRAY];
-} PROCESSOR_RELATIONSHIP, *PPROCESSOR_RELATIONSHIP;
-
-typedef struct _GROUP_RELATIONSHIP {
- WORD MaximumGroupCount;
- WORD ActiveGroupCount;
- BYTE Reserved[20];
- PROCESSOR_GROUP_INFO GroupInfo[ANYSIZE_ARRAY];
-} GROUP_RELATIONSHIP, *PGROUP_RELATIONSHIP;
-
-typedef struct _NUMA_NODE_RELATIONSHIP {
- DWORD NodeNumber;
- BYTE Reserved[20];
- GROUP_AFFINITY GroupMask;
-} NUMA_NODE_RELATIONSHIP, *PNUMA_NODE_RELATIONSHIP;
-
-typedef struct _CACHE_RELATIONSHIP {
- BYTE Level;
- BYTE Associativity;
- WORD LineSize;
- DWORD CacheSize;
- PROCESSOR_CACHE_TYPE Type;
- BYTE Reserved[20];
- GROUP_AFFINITY GroupMask;
-} CACHE_RELATIONSHIP, *PCACHE_RELATIONSHIP;
-
-typedef struct _SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX {
- LOGICAL_PROCESSOR_RELATIONSHIP Relationship;
- DWORD Size;
- union {
- PROCESSOR_RELATIONSHIP Processor;
- NUMA_NODE_RELATIONSHIP NumaNode;
- CACHE_RELATIONSHIP Cache;
- GROUP_RELATIONSHIP Group;
- };
-} SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX, *PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX;
-
-
PALIMPORT
BOOL
PALAPI
PALIMPORT
BOOL
PALAPI
-GetNumaProcessorNodeEx(
- IN PPROCESSOR_NUMBER Processor,
- OUT PUSHORT NodeNumber
-);
+PAL_GetNumaProcessorNode(WORD procNo, WORD* node);
PALIMPORT
LPVOID
PALIMPORT
BOOL
PALAPI
-GetLogicalProcessorInformationEx(
- IN LOGICAL_PROCESSOR_RELATIONSHIP RelationshipType,
- OUT OPTIONAL PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX Buffer,
- IN OUT PDWORD ReturnedLength
-);
-
-PALIMPORT
-DWORD_PTR
-PALAPI
-SetThreadAffinityMask(
- IN HANDLE hThread,
- IN DWORD_PTR dwThreadAffinityMask
-);
-
-PALIMPORT
-BOOL
-PALAPI
-SetThreadGroupAffinity(
- IN HANDLE hThread,
- IN const GROUP_AFFINITY *GroupAffinity,
- OUT OPTIONAL PGROUP_AFFINITY PreviousGroupAffinity
-);
+PAL_SetCurrentThreadAffinity(WORD procNo);
PALIMPORT
BOOL
PALAPI
-GetThreadGroupAffinity(
- IN HANDLE hThread,
- OUT PGROUP_AFFINITY GroupAffinity
-);
-
-PALIMPORT
-VOID
-PALAPI
-GetCurrentProcessorNumberEx(
- OUT PPROCESSOR_NUMBER ProcNumber
-);
-
-PALIMPORT
-BOOL
-PALAPI
-GetProcessAffinityMask(
- IN HANDLE hProcess,
- OUT PDWORD_PTR lpProcessAffinityMask,
- OUT PDWORD_PTR lpSystemAffinityMask
-);
-
-PALIMPORT
-BOOL
-PALAPI
-SetThreadIdealProcessorEx(
- IN HANDLE hThread,
- IN PPROCESSOR_NUMBER lpIdealProcessor,
- OUT PPROCESSOR_NUMBER lpPreviousIdealProcessor
-);
+PAL_GetCurrentThreadAffinitySet(SIZE_T size, UINT_PTR* data);
//
// The types of events that can be logged.
bool
ReadMemoryValueFromFile(const char* filename, size_t* val);
+DWORD
+GetTotalCpuCount();
+
#ifdef __APPLE__
bool
GetApplicationContainerFolder(PathCharString& buffer, const char *applicationGroupId, int applicationGroupIdLength);
#endif
#endif // __APPLE__
-
-DWORD
-PALAPI
-PAL_GetLogicalCpuCountFromOS()
+DWORD GetTotalCpuCount()
{
int nrcpus = 0;
-#if HAVE_SCHED_GETAFFINITY
-
- cpu_set_t cpuSet;
- int st = sched_getaffinity(0, sizeof(cpu_set_t), &cpuSet);
- if (st != 0)
- {
- ASSERT("sched_getaffinity failed (%d)\n", errno);
- }
-
- nrcpus = CPU_COUNT(&cpuSet);
-#elif HAVE_SYSCONF
+#if HAVE_SYSCONF
#if defined(_ARM_) || defined(_ARM64_)
#define SYSCONF_GET_NUMPROCS _SC_NPROCESSORS_CONF
{
ASSERT("sysctl failed for HW_NCPU (%d)\n", errno);
}
+#else // HAVE_SYSCONF
+#error "Don't know how to get total CPU count on this platform"
#endif // HAVE_SYSCONF
return nrcpus;
}
+DWORD
+PALAPI
+PAL_GetLogicalCpuCountFromOS()
+{
+ int nrcpus = 0;
+
+#if HAVE_SCHED_GETAFFINITY
+
+ cpu_set_t cpuSet;
+ int st = sched_getaffinity(0, sizeof(cpu_set_t), &cpuSet);
+ if (st != 0)
+ {
+ ASSERT("sched_getaffinity failed (%d)\n", errno);
+ }
+
+ nrcpus = CPU_COUNT(&cpuSet);
+#else // HAVE_SCHED_GETAFFINITY
+ nrcpus = GetTotalCpuCount();
+#endif // HAVE_SCHED_GETAFFINITY
+
+ return nrcpus;
+}
+
/*++
Function:
GetSystemInfo
typedef cpuset_t cpu_set_t;
#endif
-// CPU affinity descriptor
-struct CpuAffinity
-{
- // NUMA node
- BYTE Node;
- // CPU number relative to the group the CPU is in
- BYTE Number;
- // CPU group
- WORD Group;
-};
-
-// Array mapping global CPU index to its affinity
-CpuAffinity *g_cpuToAffinity = NULL;
-
-// Array mapping CPU group and index in the group to the global CPU index
-short *g_groupAndIndexToCpu = NULL;
-// Array mapping CPU group to the corresponding affinity mask of the CPUs in the group
-KAFFINITY *g_groupToCpuMask = NULL;
-// Array mapping CPU group to the number of processors in the group
-BYTE *g_groupToCpuCount = NULL;
-
-// Total number of processors in the system
-int g_cpuCount = 0;
-// Total number of possible processors in the system
-int g_possibleCpuCount = 0;
-// Total number of CPU groups
-int g_groupCount = 0;
// The highest NUMA node available
int g_highestNumaNode = 0;
// Is numa available
#undef PER_FUNCTION_BLOCK
#endif // HAVE_NUMA_H
-static const int MaxCpusPerGroup = 8 * sizeof(KAFFINITY);
-static const WORD NO_GROUP = 0xffff;
-
-/*++
-Function:
- FreeLookupArrays
-
-Free CPU and group lookup arrays
---*/
-VOID
-FreeLookupArrays()
-{
- free(g_groupAndIndexToCpu);
- free(g_cpuToAffinity);
- free(g_groupToCpuMask);
- free(g_groupToCpuCount);
-
- g_groupAndIndexToCpu = NULL;
- g_cpuToAffinity = NULL;
- g_groupToCpuMask = NULL;
- g_groupToCpuCount = NULL;
-}
-
-/*++
-Function:
- AllocateLookupArrays
-
-Allocate CPU and group lookup arrays
-Return TRUE if the allocation succeeded
---*/
-BOOL
-AllocateLookupArrays()
-{
- g_groupAndIndexToCpu = (short*)malloc(g_groupCount * MaxCpusPerGroup * sizeof(short));
- if (g_groupAndIndexToCpu == NULL)
- {
- goto FAILED;
- }
-
- g_cpuToAffinity = (CpuAffinity*)malloc(g_possibleCpuCount * sizeof(CpuAffinity));
- if (g_cpuToAffinity == NULL)
- {
- goto FAILED;
- }
-
- g_groupToCpuMask = (KAFFINITY*)malloc(g_groupCount * sizeof(KAFFINITY));
- if (g_groupToCpuMask == NULL)
- {
- goto FAILED;
- }
-
- g_groupToCpuCount = (BYTE*)malloc(g_groupCount * sizeof(BYTE));
- if (g_groupToCpuCount == NULL)
- {
- goto FAILED;
- }
-
- memset(g_groupAndIndexToCpu, 0xff, g_groupCount * MaxCpusPerGroup * sizeof(short));
- memset(g_cpuToAffinity, 0xff, g_possibleCpuCount * sizeof(CpuAffinity));
- memset(g_groupToCpuMask, 0, g_groupCount * sizeof(KAFFINITY));
- memset(g_groupToCpuCount, 0, g_groupCount * sizeof(BYTE));
-
- return TRUE;
-
-FAILED:
- FreeLookupArrays();
-
- return FALSE;
-}
-
-/*++
-Function:
- GetFullAffinityMask
-
-Get affinity mask for the specified number of processors with all
-the processors enabled.
---*/
-KAFFINITY GetFullAffinityMask(int cpuCount)
-{
- if ((size_t)cpuCount < sizeof(KAFFINITY) * 8)
- {
- return ((KAFFINITY)1 << (cpuCount)) - 1;
- }
-
- return ~(KAFFINITY)0;
-}
/*++
Function:
else
{
g_numaAvailable = true;
-
- struct bitmask *mask = numa_allocate_cpumask();
- int numaNodesCount = numa_max_node() + 1;
-
- g_possibleCpuCount = numa_num_possible_cpus();
- g_cpuCount = 0;
- g_groupCount = 0;
-
- for (int i = 0; i < numaNodesCount; i++)
- {
- int st = numa_node_to_cpus(i, mask);
- // The only failure that can happen is that the mask is not large enough
- // but that cannot happen since the mask was allocated by numa_allocate_cpumask
- _ASSERTE(st == 0);
- unsigned int nodeCpuCount = numa_bitmask_weight(mask);
- g_cpuCount += nodeCpuCount;
- unsigned int nodeGroupCount = (nodeCpuCount + MaxCpusPerGroup - 1) / MaxCpusPerGroup;
- g_groupCount += nodeGroupCount;
- }
-
- if (!AllocateLookupArrays())
- {
- dlclose(numaHandle);
- return FALSE;
- }
-
- WORD currentGroup = 0;
- int currentGroupCpus = 0;
-
- for (int i = 0; i < numaNodesCount; i++)
- {
- int st = numa_node_to_cpus(i, mask);
- // The only failure that can happen is that the mask is not large enough
- // but that cannot happen since the mask was allocated by numa_allocate_cpumask
- _ASSERTE(st == 0);
- unsigned int nodeCpuCount = numa_bitmask_weight(mask);
- unsigned int nodeGroupCount = (nodeCpuCount + MaxCpusPerGroup - 1) / MaxCpusPerGroup;
- for (int j = 0; j < g_possibleCpuCount; j++)
- {
- if (numa_bitmask_isbitset(mask, j))
- {
- if (currentGroupCpus == MaxCpusPerGroup)
- {
- g_groupToCpuCount[currentGroup] = MaxCpusPerGroup;
- g_groupToCpuMask[currentGroup] = GetFullAffinityMask(MaxCpusPerGroup);
- currentGroupCpus = 0;
- currentGroup++;
- }
- g_cpuToAffinity[j].Node = i;
- g_cpuToAffinity[j].Group = currentGroup;
- g_cpuToAffinity[j].Number = currentGroupCpus;
- g_groupAndIndexToCpu[currentGroup * MaxCpusPerGroup + currentGroupCpus] = j;
- currentGroupCpus++;
- }
- }
-
- if (currentGroupCpus != 0)
- {
- g_groupToCpuCount[currentGroup] = currentGroupCpus;
- g_groupToCpuMask[currentGroup] = GetFullAffinityMask(currentGroupCpus);
- currentGroupCpus = 0;
- currentGroup++;
- }
- }
-
- numa_free_cpumask(mask);
-
g_highestNumaNode = numa_max_node();
}
}
if (!g_numaAvailable)
{
// No NUMA
- g_possibleCpuCount = PAL_GetLogicalCpuCountFromOS();
- g_cpuCount = PAL_GetLogicalCpuCountFromOS();
- g_groupCount = 1;
g_highestNumaNode = 0;
-
- if (!AllocateLookupArrays())
- {
- return FALSE;
- }
-
- for (int i = 0; i < g_possibleCpuCount; i++)
- {
- g_cpuToAffinity[i].Number = i;
- g_cpuToAffinity[i].Group = 0;
- }
}
return TRUE;
VOID
NUMASupportCleanup()
{
- FreeLookupArrays();
#if HAVE_NUMA_H
if (g_numaAvailable)
{
/*++
Function:
- GetNumaProcessorNodeEx
-
-See MSDN doc.
---*/
-BOOL
-PALAPI
-GetNumaProcessorNodeEx(
- IN PPROCESSOR_NUMBER Processor,
- OUT PUSHORT NodeNumber
-)
-{
- PERF_ENTRY(GetNumaProcessorNodeEx);
- ENTRY("GetNumaProcessorNodeEx(Processor=%p, NodeNumber=%p)\n", Processor, NodeNumber);
+ PAL_GetNumaProcessorNode
- BOOL success = FALSE;
+Abstract
+ Get NUMA node of a processor
- if ((Processor->Group < g_groupCount) &&
- (Processor->Number < MaxCpusPerGroup) &&
- (Processor->Reserved == 0))
- {
- short cpu = g_groupAndIndexToCpu[Processor->Group * MaxCpusPerGroup + Processor->Number];
- if (cpu != -1)
- {
- *NodeNumber = g_cpuToAffinity[cpu].Node;
- success = TRUE;
- }
- }
-
- if (!success)
- {
- *NodeNumber = 0xffff;
- SetLastError(ERROR_INVALID_PARAMETER);
- }
+Parameters:
+ procNo - number of the processor to get the NUMA node for
+ node - the resulting NUMA node
- LOGEXIT("GetNumaProcessorNodeEx returns BOOL %d\n", success);
- PERF_EXIT(GetNumaProcessorNodeEx);
-
- return success;
-}
-
-/*++
-Function:
- GetLogicalProcessorInformationEx
-
-See MSDN doc.
---*/
-BOOL
-PALAPI
-GetLogicalProcessorInformationEx(
- IN LOGICAL_PROCESSOR_RELATIONSHIP RelationshipType,
- OUT OPTIONAL PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX Buffer,
- IN OUT PDWORD ReturnedLength
-)
-{
- PERF_ENTRY(GetLogicalProcessorInformationEx);
- ENTRY("GetLogicalProcessorInformationEx(RelationshipType=%d, Buffer=%p, ReturnedLength=%p)\n", RelationshipType, Buffer, ReturnedLength);
-
- BOOL success = FALSE;
-
- if (RelationshipType == RelationGroup)
- {
- size_t requiredSize = __builtin_offsetof(SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX, Group);
- requiredSize += __builtin_offsetof(GROUP_RELATIONSHIP, GroupInfo);
- requiredSize += g_groupCount * sizeof(PROCESSOR_GROUP_INFO);
-
- if (*ReturnedLength >= requiredSize)
- {
- Buffer->Relationship = RelationGroup;
- Buffer->Size = requiredSize;
- Buffer->Group.MaximumGroupCount = g_groupCount;
- Buffer->Group.ActiveGroupCount = g_groupCount;
- for (int i = 0; i < g_groupCount; i++)
- {
- Buffer->Group.GroupInfo[i].MaximumProcessorCount = MaxCpusPerGroup;
- Buffer->Group.GroupInfo[i].ActiveProcessorCount = g_groupToCpuCount[i];
- Buffer->Group.GroupInfo[i].ActiveProcessorMask = g_groupToCpuMask[i];
- }
-
- success = TRUE;
- }
- else
- {
- SetLastError(ERROR_INSUFFICIENT_BUFFER);
- }
-
- *ReturnedLength = requiredSize;
- }
- else
- {
- // We only support the group relationship
- SetLastError(ERROR_INVALID_PARAMETER);
- }
-
- LOGEXIT("GetLogicalProcessorInformationEx returns BOOL %d\n", success);
- PERF_EXIT(GetLogicalProcessorInformationEx);
-
- return success;
-}
-
-/*++
-Function:
- GetThreadGroupAffinityInternal
-
-Get the group affinity for the specified pthread
---*/
-BOOL
-GetThreadGroupAffinityInternal(
- IN pthread_t thread,
- OUT PGROUP_AFFINITY GroupAffinity
-)
-{
- BOOL success = FALSE;
-
-#if HAVE_PTHREAD_GETAFFINITY_NP
- cpu_set_t cpuSet;
-
- int st = pthread_getaffinity_np(thread, sizeof(cpu_set_t), &cpuSet);
-
- if (st == 0)
- {
- WORD group = NO_GROUP;
- KAFFINITY mask = 0;
-
- for (int i = 0; i < g_possibleCpuCount; i++)
- {
- if (CPU_ISSET(i, &cpuSet))
- {
- WORD g = g_cpuToAffinity[i].Group;
- // Unless the thread affinity was already set by SetThreadGroupAffinity, it is possible that
- // the current thread has affinity with processors from multiple groups. So we report just the
- // first group we find.
- if (group == NO_GROUP || g == group)
- {
- group = g;
- mask |= ((KAFFINITY)1) << g_cpuToAffinity[i].Number;
- }
- }
- }
-
- GroupAffinity->Group = group;
- GroupAffinity->Mask = mask;
- success = TRUE;
- }
- else
- {
- SetLastError(ERROR_GEN_FAILURE);
- }
-#else // HAVE_PTHREAD_GETAFFINITY_NP
- // There is no API to manage thread affinity, so let's return a group affinity
- // with all the CPUs on the system.
- GroupAffinity->Group = 0;
- GroupAffinity->Mask = GetFullAffinityMask(g_possibleCpuCount);
- success = TRUE;
-#endif // HAVE_PTHREAD_GETAFFINITY_NP
-
- return success;
-}
-
-/*++
-Function:
- GetThreadGroupAffinity
-
-See MSDN doc.
---*/
-BOOL
-PALAPI
-GetThreadGroupAffinity(
- IN HANDLE hThread,
- OUT PGROUP_AFFINITY GroupAffinity
-)
-{
- PERF_ENTRY(GetThreadGroupAffinity);
- ENTRY("GetThreadGroupAffinity(hThread=%p, GroupAffinity=%p)\n", hThread, GroupAffinity);
- CPalThread *pCurrentThread = InternalGetCurrentThread();
- CPalThread *pTargetThread = NULL;
- IPalObject *pTargetThreadObject = NULL;
-
- PAL_ERROR palErr =
- InternalGetThreadDataFromHandle(pCurrentThread, hThread,
- 0, // THREAD_SET_CONTEXT
- &pTargetThread, &pTargetThreadObject);
-
- if (NO_ERROR != palErr)
- {
- ERROR("Unable to obtain thread data for handle %p (error %x)!\n", hThread,
- palErr);
- return FALSE;
- }
-
- BOOL success = GetThreadGroupAffinityInternal(
- pTargetThread->GetPThreadSelf(), GroupAffinity);
- LOGEXIT("GetThreadGroupAffinity returns BOOL %d\n", success);
- PERF_EXIT(GetThreadGroupAffinity);
-
- return success;
-}
-
-
-/*++
-Function:
- SetThreadGroupAffinity
-
-See MSDN doc.
+Return value:
+ TRUE if the function was able to get the NUMA node, FALSE if it has failed.
--*/
BOOL
PALAPI
-SetThreadGroupAffinity(
- IN HANDLE hThread,
- IN const GROUP_AFFINITY *GroupAffinity,
- OUT OPTIONAL PGROUP_AFFINITY PreviousGroupAffinity
-)
+PAL_GetNumaProcessorNode(WORD procNo, WORD* node)
{
- PERF_ENTRY(SetThreadGroupAffinity);
- ENTRY("SetThreadGroupAffinity(hThread=%p, GroupAffinity=%p, PreviousGroupAffinity=%p)\n", hThread, GroupAffinity, PreviousGroupAffinity);
-
- CPalThread *pCurrentThread = InternalGetCurrentThread();
- CPalThread *pTargetThread = NULL;
- IPalObject *pTargetThreadObject = NULL;
-
- PAL_ERROR palErr =
- InternalGetThreadDataFromHandle(pCurrentThread, hThread,
- 0, // THREAD_SET_CONTEXT
- &pTargetThread, &pTargetThreadObject);
-
- if (NO_ERROR != palErr)
- {
- ERROR("Unable to obtain thread data for handle %p (error %x)!\n", hThread,
- palErr);
- return FALSE;
- }
-
- pthread_t thread = pTargetThread->GetPThreadSelf();
-
- if (PreviousGroupAffinity != NULL)
- {
- GetThreadGroupAffinityInternal(thread, PreviousGroupAffinity);
- }
-
-#if HAVE_PTHREAD_GETAFFINITY_NP
- int groupStartIndex = GroupAffinity->Group * MaxCpusPerGroup;
- KAFFINITY mask = 1;
- cpu_set_t cpuSet;
- CPU_ZERO(&cpuSet);
-
- for (int i = 0; i < MaxCpusPerGroup; i++, mask <<= 1)
- {
- if (GroupAffinity->Mask & mask)
- {
- int cpu = g_groupAndIndexToCpu[groupStartIndex + i];
- if (cpu != -1)
- {
- CPU_SET(cpu, &cpuSet);
- }
- }
- }
-
- int st = pthread_setaffinity_np(thread, sizeof(cpu_set_t), &cpuSet);
-
- if (st != 0)
- {
- switch (st)
- {
- case EINVAL:
- // There is no processor in the mask that is allowed to execute the process
- SetLastError(ERROR_INVALID_PARAMETER);
- break;
- case ESRCH:
- SetLastError(ERROR_INVALID_HANDLE);
- break;
- default:
- SetLastError(ERROR_GEN_FAILURE);
- break;
- }
- }
-
- BOOL success = (st == 0);
-#else // HAVE_PTHREAD_GETAFFINITY_NP
- // There is no API to manage thread affinity, so let's ignore the request
- BOOL success = TRUE;
-#endif // HAVE_PTHREAD_GETAFFINITY_NP
-
- LOGEXIT("SetThreadGroupAffinity returns BOOL %d\n", success);
- PERF_EXIT(SetThreadGroupAffinity);
-
- return success;
-}
-
-/*++
-Function:
- SetThreadAffinityMask
-
-See MSDN doc.
---*/
-DWORD_PTR
-PALAPI
-SetThreadAffinityMask(
- IN HANDLE hThread,
- IN DWORD_PTR dwThreadAffinityMask
-)
-{
- PERF_ENTRY(SetThreadAffinityMask);
- ENTRY("SetThreadAffinityMask(hThread=%p, dwThreadAffinityMask=%p)\n", hThread, dwThreadAffinityMask);
-
- CPalThread *pCurrentThread = InternalGetCurrentThread();
- CPalThread *pTargetThread = NULL;
- IPalObject *pTargetThreadObject = NULL;
-
- PAL_ERROR palErr =
- InternalGetThreadDataFromHandle(pCurrentThread, hThread,
- 0, // THREAD_SET_CONTEXT
- &pTargetThread, &pTargetThreadObject);
-
- if (NO_ERROR != palErr)
- {
- ERROR("Unable to obtain thread data for handle %p (error %x)!\n", hThread,
- palErr);
- return 0;
- }
-
- pthread_t thread = pTargetThread->GetPThreadSelf();
-
-#if HAVE_PTHREAD_GETAFFINITY_NP
- cpu_set_t prevCpuSet;
- CPU_ZERO(&prevCpuSet);
- KAFFINITY prevMask = 0;
-
- int st = pthread_getaffinity_np(thread, sizeof(cpu_set_t), &prevCpuSet);
-
- if (st == 0)
- {
- for (int i = 0; i < std::min(8 * (int)sizeof(KAFFINITY), g_possibleCpuCount); i++)
- {
- if (CPU_ISSET(i, &prevCpuSet))
- {
- prevMask |= ((KAFFINITY)1) << i;
- }
- }
- }
-
- cpu_set_t cpuSet;
- CPU_ZERO(&cpuSet);
-
- int cpu = 0;
- while (dwThreadAffinityMask)
- {
- if (dwThreadAffinityMask & 1)
- {
- CPU_SET(cpu, &cpuSet);
- }
- cpu++;
- dwThreadAffinityMask >>= 1;
- }
-
- st = pthread_setaffinity_np(thread, sizeof(cpu_set_t), &cpuSet);
-
- if (st != 0)
- {
- switch (st)
- {
- case EINVAL:
- // There is no processor in the mask that is allowed to execute the
- // process
- SetLastError(ERROR_INVALID_PARAMETER);
- break;
- case ESRCH:
- SetLastError(ERROR_INVALID_HANDLE);
- break;
- default:
- SetLastError(ERROR_GEN_FAILURE);
- break;
- }
- }
-
- DWORD_PTR ret = (st == 0) ? prevMask : 0;
-#else // HAVE_PTHREAD_GETAFFINITY_NP
- // There is no API to manage thread affinity, so let's ignore the request
- DWORD_PTR ret = 0;
-#endif // HAVE_PTHREAD_GETAFFINITY_NP
- LOGEXIT("SetThreadAffinityMask returns %lu\n", ret);
- PERF_EXIT(SetThreadAffinityMask);
-
- return ret;
-}
-
-/*++
-Function:
- GetCurrentProcessorNumberEx
-
-See MSDN doc.
---*/
-VOID
-PALAPI
-GetCurrentProcessorNumberEx(
- OUT PPROCESSOR_NUMBER ProcNumber
-)
-{
- PERF_ENTRY(GetCurrentProcessorNumberEx);
- ENTRY("GetCurrentProcessorNumberEx(ProcNumber=%p\n", ProcNumber);
-
- DWORD cpu = GetCurrentProcessorNumber();
- _ASSERTE((int)cpu < g_possibleCpuCount);
- ProcNumber->Group = g_cpuToAffinity[cpu].Group;
- ProcNumber->Number = g_cpuToAffinity[cpu].Number;
-
- LOGEXIT("GetCurrentProcessorNumberEx\n");
- PERF_EXIT(GetCurrentProcessorNumberEx);
-}
-
-/*++
-Function:
- GetProcessAffinityMask
-
-See MSDN doc.
---*/
-BOOL
-PALAPI
-GetProcessAffinityMask(
- IN HANDLE hProcess,
- OUT PDWORD_PTR lpProcessAffinityMask,
- OUT PDWORD_PTR lpSystemAffinityMask
-)
-{
- PERF_ENTRY(GetProcessAffinityMask);
- ENTRY("GetProcessAffinityMask(hProcess=%p, lpProcessAffinityMask=%p, lpSystemAffinityMask=%p\n", hProcess, lpProcessAffinityMask, lpSystemAffinityMask);
-
- BOOL success = FALSE;
-
- if (hProcess == GetCurrentProcess())
+#if HAVE_NUMA_H
+ if (g_numaAvailable)
{
- int cpuCountInMask = (g_cpuCount > 64) ? 64 : g_cpuCount;
-
- DWORD_PTR systemMask = GetFullAffinityMask(cpuCountInMask);
-
-#if HAVE_SCHED_GETAFFINITY
- int pid = getpid();
- cpu_set_t cpuSet;
- int st = sched_getaffinity(pid, sizeof(cpu_set_t), &cpuSet);
- if (st == 0)
+ int result = numa_node_of_cpu(procNo);
+ if (result >= 0)
{
- DWORD_PTR processMask = 0;
-
- for (int i = 0; i < cpuCountInMask; i++)
- {
- if (CPU_ISSET(i, &cpuSet))
- {
- processMask |= ((DWORD_PTR)1) << i;
- }
- }
-
- success = TRUE;
-
- *lpProcessAffinityMask = processMask;
- *lpSystemAffinityMask = systemMask;
- }
- else if (errno == EINVAL)
- {
- // There are more processors than can fit in a cpu_set_t
- // return all bits set for all processors (upto 64) for both masks.
- *lpProcessAffinityMask = systemMask;
- *lpSystemAffinityMask = systemMask;
- success = TRUE;
- }
- else
- {
- // We should not get any of the errors that the sched_getaffinity can return since none
- // of them applies for the current thread, so this is an unexpected kind of failure.
- SetLastError(ERROR_GEN_FAILURE);
+ *node = (WORD)result;
+ return TRUE;
}
-#else // HAVE_SCHED_GETAFFINITY
- // There is no API to manage thread affinity, so let's return both affinity masks
- // with all the CPUs on the system set.
- *lpSystemAffinityMask = systemMask;
- *lpProcessAffinityMask = systemMask;
-
- success = TRUE;
-#endif // HAVE_SCHED_GETAFFINITY
- }
- else
- {
- // PAL supports getting affinity mask for the current process only
- SetLastError(ERROR_INVALID_PARAMETER);
}
+#endif // HAVE_NUMA_H
- LOGEXIT("GetProcessAffinityMask returns BOOL %d\n", success);
- PERF_EXIT(GetProcessAffinityMask);
-
- return success;
+ return FALSE;
}
/*++
return result;
}
-
-/*++
-Function:
- SetThreadIdealProcessorEx
-
-See MSDN doc.
---*/
-BOOL
-PALAPI
-SetThreadIdealProcessorEx(
- IN HANDLE hThread,
- IN PPROCESSOR_NUMBER lpIdealProcessor,
- OUT PPROCESSOR_NUMBER lpPreviousIdealProcessor)
-{
- PERF_ENTRY(SetThreadIdealProcessorEx);
- ENTRY("SetThreadIdealProcessorEx(hThread=%p, lpIdealProcessor=%p)\n", hThread, lpIdealProcessor);
-
- CPalThread *pCurrentThread = InternalGetCurrentThread();
- CPalThread *pTargetThread = NULL;
- IPalObject *pTargetThreadObject = NULL;
-
- PAL_ERROR palErr =
- InternalGetThreadDataFromHandle(pCurrentThread, hThread,
- 0, // THREAD_SET_CONTEXT
- &pTargetThread, &pTargetThreadObject);
-
- if (NO_ERROR != palErr)
- {
- ERROR("Unable to obtain thread data for handle %p (error %x)!\n", hThread,
- palErr);
- return 0;
- }
-
- pthread_t thread = pTargetThread->GetPThreadSelf();
-
-#if HAVE_PTHREAD_GETAFFINITY_NP
- int cpu = -1;
- if ((lpIdealProcessor->Group < g_groupCount) &&
- (lpIdealProcessor->Number < MaxCpusPerGroup) &&
- (lpIdealProcessor->Reserved == 0))
- {
- cpu = g_groupAndIndexToCpu[lpIdealProcessor->Group * MaxCpusPerGroup + lpIdealProcessor->Number];
- }
-
- if (cpu == -1)
- {
- SetLastError(ERROR_INVALID_PARAMETER);
- return FALSE;
- }
-
- if (lpPreviousIdealProcessor != NULL)
- {
- cpu_set_t prevCpuSet;
- CPU_ZERO(&prevCpuSet);
- DWORD prevCpu = GetCurrentProcessorNumber();
-
- int st = pthread_getaffinity_np(thread, sizeof(cpu_set_t), &prevCpuSet);
-
- if (st == 0)
- {
- for (int i = 0; i < g_possibleCpuCount; i++)
- {
- if (CPU_ISSET(i, &prevCpuSet))
- {
- prevCpu = i;
- break;
- }
- }
- }
-
- _ASSERTE((int)prevCpu < g_possibleCpuCount);
- lpPreviousIdealProcessor->Group = g_cpuToAffinity[prevCpu].Group;
- lpPreviousIdealProcessor->Number = g_cpuToAffinity[prevCpu].Number;
- lpPreviousIdealProcessor->Reserved = 0;
- }
-
- cpu_set_t cpuSet;
- CPU_ZERO(&cpuSet);
- CPU_SET(cpu, &cpuSet);
-
- int st = pthread_setaffinity_np(thread, sizeof(cpu_set_t), &cpuSet);
-
- if (st != 0)
- {
- switch (st)
- {
- case EINVAL:
- // There is no processor in the mask that is allowed to execute the
- // process
- SetLastError(ERROR_INVALID_PARAMETER);
- break;
- case ESRCH:
- SetLastError(ERROR_INVALID_HANDLE);
- break;
- default:
- SetLastError(ERROR_GEN_FAILURE);
- break;
- }
- }
-
- BOOL success = (st == 0);
-
-#else // HAVE_PTHREAD_GETAFFINITY_NP
- // There is no API to manage thread affinity, so let's ignore the request
- BOOL success = FALSE;
-#endif // HAVE_PTHREAD_GETAFFINITY_NP
-
- LOGEXIT("SetThreadIdealProcessorEx returns BOOL %d\n", success);
- PERF_EXIT(SetThreadIdealProcessorEx);
-
- return success;
-}
#include <numa.h>
#include <numaif.h>
-#define numa_free_cpumask numa_bitmask_free
-
// List of all functions from the numa library that are used
#define FOR_ALL_NUMA_FUNCTIONS \
PER_FUNCTION_BLOCK(numa_available) \
PER_FUNCTION_BLOCK(mbind) \
- PER_FUNCTION_BLOCK(numa_num_possible_cpus) \
PER_FUNCTION_BLOCK(numa_max_node) \
- PER_FUNCTION_BLOCK(numa_allocate_cpumask) \
- PER_FUNCTION_BLOCK(numa_node_to_cpus) \
- PER_FUNCTION_BLOCK(numa_bitmask_weight) \
- PER_FUNCTION_BLOCK(numa_bitmask_isbitset) \
- PER_FUNCTION_BLOCK(numa_bitmask_free)
+ PER_FUNCTION_BLOCK(numa_node_of_cpu)
// Declare pointers to all the used numa functions
#define PER_FUNCTION_BLOCK(fn) extern decltype(fn)* fn##_ptr;
// to the functions of libnuma in the initialization.
#define numa_available() numa_available_ptr()
#define mbind(...) mbind_ptr(__VA_ARGS__)
-#define numa_num_possible_cpus() numa_num_possible_cpus_ptr()
#define numa_max_node() numa_max_node_ptr()
-#define numa_allocate_cpumask() numa_allocate_cpumask_ptr()
-#define numa_node_to_cpus(...) numa_node_to_cpus_ptr(__VA_ARGS__)
-#define numa_bitmask_weight(...) numa_bitmask_weight_ptr(__VA_ARGS__)
-#define numa_bitmask_isbitset(...) numa_bitmask_isbitset_ptr(__VA_ARGS__)
-#define numa_bitmask_free(...) numa_bitmask_free_ptr(__VA_ARGS__)
+#define numa_node_of_cpu(...) numa_node_of_cpu_ptr(__VA_ARGS__)
#endif // HAVE_NUMA_H
#include "pal/fakepoll.h"
#endif // HAVE_POLL
#include <limits.h>
+#include <algorithm>
#if HAVE_SYS_LWP_H
#include <sys/lwp.h>
}
#endif // HAVE_MACH_EXCEPTIONS
+
+/*++
+Function:
+ PAL_SetCurrentThreadAffinity
+
+Abstract
+ Set affinity of the current thread to the specified processor.
+
+Parameters:
+ procNo - number of the processor to affinitize the current thread to
+
+Return value:
+ TRUE if the function was able to set the affinity, FALSE if it has failed.
+--*/
+BOOL
+PALAPI
+PAL_SetCurrentThreadAffinity(WORD procNo)
+{
+#if HAVE_PTHREAD_GETAFFINITY_NP
+ cpu_set_t cpuSet;
+ CPU_ZERO(&cpuSet);
+
+ int st = pthread_getaffinity_np(pthread_self(), sizeof(cpu_set_t), &cpuSet);
+
+ if (st == 0)
+ {
+ CPU_SET(procNo, &cpuSet);
+ st = pthread_setaffinity_np(pthread_self(), sizeof(cpu_set_t), &cpuSet);
+ }
+
+ return st == 0;
+#else // HAVE_PTHREAD_GETAFFINITY_NP
+ // There is no API to manage thread affinity, so let's ignore the request
+ return FALSE;
+#endif // HAVE_PTHREAD_GETAFFINITY_NP
+}
+
+/*++
+Function:
+ PAL_SetCurrentThreadAffinity
+
+Abstract
+ Get affinity set of the current thread. The set is represented by an array of "size" entries of UINT_PTR type.
+
+Parameters:
+ size - number of entries in the "data" array
+ data - pointer to the data of the resulting set, the LSB of the first entry in the array represents processor 0
+
+Return value:
+ TRUE if the function was able to get the affinity set, FALSE if it has failed.
+--*/
+BOOL
+PALAPI
+PAL_GetCurrentThreadAffinitySet(SIZE_T size, UINT_PTR* data)
+{
+ cpu_set_t cpuSet;
+ CPU_ZERO(&cpuSet);
+
+#if HAVE_PTHREAD_GETAFFINITY_NP
+ int st = pthread_getaffinity_np(pthread_self(), sizeof(cpu_set_t), &cpuSet);
+
+ if (st == 0)
+ {
+ const SIZE_T BitsPerBitsetEntry = 8 * sizeof(UINT_PTR);
+ int nrcpus = GetTotalCpuCount();
+
+ // Get info for as much processors as it is possible to fit into the resulting set
+ SIZE_T remainingCount = std::min(size * BitsPerBitsetEntry, (SIZE_T)nrcpus);
+ SIZE_T i = 0;
+ while (remainingCount != 0)
+ {
+ UINT_PTR entry = 0;
+ SIZE_T bitsToCopy = std::min(remainingCount, BitsPerBitsetEntry);
+ SIZE_T cpuSetOffset = i * BitsPerBitsetEntry;
+ for (SIZE_T j = 0; j < bitsToCopy; j++)
+ {
+ if (CPU_ISSET(cpuSetOffset + j, &cpuSet))
+ {
+ entry |= (UINT_PTR)1 << j;
+ }
+ }
+ remainingCount -= bitsToCopy;
+ data[i++] = entry;
+ }
+ }
+
+ return st == 0;
+#else // HAVE_PTHREAD_GETAFFINITY_NP
+ // There is no API to manage thread affinity, so let's ignore the request
+ return FALSE;
+#endif // HAVE_PTHREAD_GETAFFINITY_NP
+}
return ::VirtualAllocExNuma(hProc, lpAddr, dwSize, allocType, prot, node);
}
+#ifndef FEATURE_PAL
/*static*/ BOOL NumaNodeInfo::GetNumaProcessorNodeEx(PPROCESSOR_NUMBER proc_no, PUSHORT node_no)
{
return ::GetNumaProcessorNodeEx(proc_no, node_no);
}
+#else // !FEATURE_PAL
+/*static*/ BOOL NumaNodeInfo::GetNumaProcessorNodeEx(USHORT proc_no, PUSHORT node_no)
+{
+ return PAL_GetNumaProcessorNode(proc_no, node_no);
+}
+#endif // !FEATURE_PAL
#endif
/*static*/ BOOL NumaNodeInfo::m_enableGCNumaAware = FALSE;
if (CLRConfig::GetConfigValue(CLRConfig::UNSUPPORTED_GCNumaAware) == 0)
return FALSE;
-#ifndef FEATURE_PAL
- // check if required APIs are supported
- HMODULE hMod = GetModuleHandleW(WINDOWS_KERNEL32_DLLNAME_W);
-#else
- HMODULE hMod = GetCLRModule();
-#endif
- if (hMod == NULL)
- return FALSE;
-
// fail to get the highest numa node number
if (!::GetNumaHighestNodeNumber(&highest) || (highest == 0))
return FALSE;
m_enableGCNumaAware = InitNumaNodeInfoAPI();
}
+#ifndef FEATURE_PAL
+
//******************************************************************************
-// NumaNodeInfo
+// CPUGroupInfo
//******************************************************************************
#if !defined(FEATURE_REDHAWK)
/*static*/ //CPUGroupInfo::PNTQSIEx CPUGroupInfo::m_pNtQuerySystemInformationEx = NULL;
LIMITED_METHOD_CONTRACT;
return m_threadUseAllCpuGroups;
}
+#endif // !FEATURE_PAL
//******************************************************************************
// Returns the number of processors that a process has been configured to run on
return cCPUs;
unsigned int count = 0;
+
+#ifndef FEATURE_PAL
DWORD_PTR pmask, smask;
if (!GetProcessAffinityMask(GetCurrentProcess(), &pmask, &smask))
count = 64;
}
-#ifdef FEATURE_PAL
- uint32_t cpuLimit;
+#else // !FEATURE_PAL
+ count = PAL_GetLogicalCpuCountFromOS();
+ uint32_t cpuLimit;
if (PAL_GetCpuLimit(&cpuLimit) && cpuLimit < count)
count = cpuLimit;
-#endif
+#endif // !FEATURE_PAL
cCPUs = count;
return count;
}
+#ifndef FEATURE_PAL
DWORD_PTR GetCurrentProcessCpuMask()
{
CONTRACTL
return 0;
#endif
}
+#endif // !FEATURE_PAL
uint32_t GetOsPageSizeUncached()
{
// Need to do this as early as possible. Used by creating object handle
// table inside Ref_Initialization() before GC is initialized.
NumaNodeInfo::InitNumaNodeInfo();
+#ifndef FEATURE_PAL
CPUGroupInfo::EnsureInitialized();
-
+#endif // !FEATURE_PAL
// Initialize global configuration settings based on startup flags
// This needs to be done before the EE has started
tieredCompilation_StartupTier_CallCountingDelayMs =
CLRConfig::GetConfigValue(CLRConfig::UNSUPPORTED_TC_StartupTier_CallCountingDelayMs);
- if (CPUGroupInfo::HadSingleProcessorAtStartup())
+
+#ifndef FEATURE_PAL
+ bool hadSingleProcessorAtStartup = g_SystemInfo.dwNumberOfProcessors == 1;//CPUGroupInfo::HadSingleProcessorAtStartup();
+#else // !FEATURE_PAL
+ bool hadSingleProcessorAtStartup = g_SystemInfo.dwNumberOfProcessors == 1;
+#endif // !FEATURE_PAL
+
+ if (hadSingleProcessorAtStartup)
{
DWORD delayMultiplier =
CLRConfig::GetConfigValue(CLRConfig::UNSUPPORTED_TC_StartupTier_DelaySingleProcMultiplier);
static AffinitySet g_processAffinitySet;
+#ifdef FEATURE_PAL
+static uint32_t g_currentProcessCpuCount;
+#endif // FEATURE_PAL
+
class GroupProcNo
{
uint16_t m_groupProc;
#ifdef FEATURE_PAL
g_pageSizeUnixInl = GetOsPageSize();
-#endif
+ g_currentProcessCpuCount = PAL_GetLogicalCpuCountFromOS();
+ if (PAL_GetCurrentThreadAffinitySet(AffinitySet::BitsetDataSize, g_processAffinitySet.GetBitsetData()))
+ {
+ assert(g_currentProcessCpuCount == g_processAffinitySet.Count());
+ }
+ else
+ {
+ // There is no way to get affinity on the current OS, set the affinity set to reflect all processors
+ for (size_t i = 0; i < g_currentProcessCpuCount; i++)
+ {
+ g_processAffinitySet.Add(i);
+ }
+ }
+#else // FEATURE_PAL
if (CPUGroupInfo::CanEnableGCCPUGroups())
{
// When CPU groups are enabled, then the process is not bound by the process affinity set at process launch.
}
}
}
+#endif // FEATURE_PAL
return true;
}
LIMITED_METHOD_CONTRACT;
bool success = true;
-
+#ifndef FEATURE_PAL
GroupProcNo srcGroupProcNo(srcProcNo);
GroupProcNo dstGroupProcNo(dstProcNo);
success = !!SetThreadIdealProcessorEx(GetCurrentThread(), &proc, NULL);
}
-#if !defined(FEATURE_PAL)
else
{
if (GetThreadIdealProcessorEx(GetCurrentThread(), &proc))
success = !!SetThreadIdealProcessorEx(GetCurrentThread(), &proc, &proc);
}
}
-#endif // !defined(FEATURE_PAL)
-#endif
-
+#endif // !FEATURE_CORESYSTEM
return success;
+
+#else // !FEATURE_PAL
+ return GCToOSInterface::SetThreadAffinity(dstProcNo);
+
+#endif // !FEATURE_PAL
}
// Get the number of the current processor
bool GCToOSInterface::SetThreadAffinity(uint16_t procNo)
{
LIMITED_METHOD_CONTRACT;
-
+#ifndef FEATURE_PAL
GroupProcNo groupProcNo(procNo);
if (groupProcNo.GetGroup() != GroupProcNo::NoGroup)
{
return !!SetThreadAffinityMask(GetCurrentThread(), (DWORD_PTR)1 << groupProcNo.GetProcIndex());
}
+#else // FEATURE_PAL
+ return PAL_SetCurrentThreadAffinity(procNo);
+#endif // FEATURE_PAL
}
// Boosts the calling thread's thread priority to a level higher than the default
// set of enabled processors
const AffinitySet* GCToOSInterface::SetGCThreadsAffinitySet(uintptr_t configAffinityMask, const AffinitySet* configAffinitySet)
{
+#ifndef FEATURE_PAL
if (CPUGroupInfo::CanEnableGCCPUGroups())
+#endif // !FEATURE_PAL
{
if (!configAffinitySet->IsEmpty())
{
}
}
}
+#ifndef FEATURE_PAL
else
{
if (configAffinityMask != 0)
}
}
}
+#endif // !FEATURE_PAL
return &g_processAffinitySet;
}
{
LIMITED_METHOD_CONTRACT;
+#ifndef FEATURE_PAL
// GetCurrentProcessCpuCount only returns up to 64 procs.
return CPUGroupInfo::CanEnableGCCPUGroups() ?
GCToOSInterface::GetTotalProcessorCount():
::GetCurrentProcessCpuCount();
+#else // !FEATURE_PAL
+ return g_currentProcessCpuCount;
+#endif // !FEATURE_PAL
}
// Return the size of the user-mode portion of the virtual address space of this process.
{
LIMITED_METHOD_CONTRACT;
+#ifndef FEATURE_PAL
if (CPUGroupInfo::CanEnableGCCPUGroups())
{
return CPUGroupInfo::GetNumActiveProcessors();
{
return g_SystemInfo.dwNumberOfProcessors;
}
+#else // !FEATURE_PAL
+ return g_currentProcessCpuCount;
+#endif // !FEATURE_PAL
}
bool GCToOSInterface::CanEnableGCNumaAware()
return NumaNodeInfo::CanEnableGCNumaAware() != FALSE;
}
-bool GCToOSInterface::GetNumaProcessorNode(uint16_t proc_no, uint16_t *node_no)
-{
- LIMITED_METHOD_CONTRACT;
-
- GroupProcNo groupProcNo(proc_no);
-
- PROCESSOR_NUMBER procNumber;
- procNumber.Group = groupProcNo.GetGroup();
- procNumber.Number = (BYTE)groupProcNo.GetProcIndex();
- procNumber.Reserved = 0;
-
- return NumaNodeInfo::GetNumaProcessorNodeEx(&procNumber, node_no) != FALSE;
-}
-
// Get processor number and optionally its NUMA node number for the specified heap number
// Parameters:
// heap_number - heap number to get the result for
{
bool success = false;
- if (CPUGroupInfo::CanEnableGCCPUGroups())
+ // Locate heap_number-th available processor
+ uint16_t procNumber;
+ size_t cnt = heap_number;
+ for (uint16_t i = 0; i < GCToOSInterface::GetTotalProcessorCount(); i++)
{
- uint16_t gn, gpn;
- CPUGroupInfo::GetGroupForProcessor((uint16_t)heap_number, &gn, &gpn);
+ if (g_processAffinitySet.Contains(i))
+ {
+ if (cnt == 0)
+ {
+ procNumber = i;
+ success = true;
+ break;
+ }
+
+ cnt--;
+ }
+ }
+
+ if (success)
+ {
+#ifndef FEATURE_PAL
+ WORD gn, gpn;
+
+ if (CPUGroupInfo::CanEnableGCCPUGroups())
+ {
+ CPUGroupInfo::GetGroupForProcessor(procNumber, &gn, &gpn);
+ }
+ else
+ {
+ gn = GroupProcNo::NoGroup;
+ gpn = procNumber;
+ }
+
+ GroupProcNo groupProcNo(gn, gpn);
+ *proc_no = groupProcNo.GetCombinedValue();
- *proc_no = GroupProcNo(gn, gpn).GetCombinedValue();
if (GCToOSInterface::CanEnableGCNumaAware())
{
- if (!GCToOSInterface::GetNumaProcessorNode(*proc_no, node_no))
+ PROCESSOR_NUMBER procNumber;
+
+ if (CPUGroupInfo::CanEnableGCCPUGroups())
+ {
+ procNumber.Group = gn;
+ }
+ else
+ {
+ // Get the current processor group
+ PROCESSOR_NUMBER procNumber;
+ GetCurrentProcessorNumberEx(&procNumber);
+ }
+
+ procNumber.Number = (BYTE)gpn;
+ procNumber.Reserved = 0;
+
+ if (NumaNodeInfo::GetNumaProcessorNodeEx(&procNumber, node_no))
{
*node_no = NUMA_NODE_UNDEFINED;
}
}
else
{ // no numa setting, each cpu group is treated as a node
- *node_no = gn;
+ *node_no = groupProcNo.GetGroup();
}
-
- success = true;
- }
- else
- {
- int bit_number = 0;
- uint8_t proc_number = 0;
- for (uintptr_t mask = 1; mask != 0; mask <<= 1)
+#else // !FEATURE_PAL
+ *proc_no = procNumber;
+ if (!GCToOSInterface::CanEnableGCNumaAware() || !NumaNodeInfo::GetNumaProcessorNodeEx(procNumber, (WORD*)node_no))
{
- if (g_processAffinitySet.Contains(proc_number))
- {
- if (bit_number == heap_number)
- {
- *proc_no = GroupProcNo(GroupProcNo::NoGroup, proc_number).GetCombinedValue();
-
- if (GCToOSInterface::CanEnableGCNumaAware())
- {
- if (!GCToOSInterface::GetNumaProcessorNode(proc_number, node_no))
- {
- *node_no = NUMA_NODE_UNDEFINED;
- }
- }
-
- success = true;
- break;
- }
- bit_number++;
- }
- proc_number++;
+ *node_no = NUMA_NODE_UNDEFINED;
}
+#endif // !FEATURE_PAL
}
return success;
size_t index_offset = 0;
char* number_end;
+#ifndef FEATURE_PAL
size_t group_number = strtoul(*config_string, &number_end, 10);
if ((number_end == *config_string) || (*number_end != ':'))
index_offset = group_begin;
*config_string = number_end + 1;
+#endif // !FEATURE_PAL
size_t start, end;
if (!ParseIndexOrRange(config_string, &start, &end))
return false;
}
+#ifndef FEATURE_PAL
if ((start >= group_size) || (end >= group_size))
{
// Invalid CPU index values or range
return false;
}
+#endif // !FEATURE_PAL
*start_index = index_offset + start;
*end_index = index_offset + end;
GC_TRIGGERS;
}
CONTRACTL_END;
-
+#ifndef FEATURE_PAL
if (!CPUGroupInfo::CanEnableGCCPUGroups() || !CPUGroupInfo::CanEnableThreadUseAllCpuGroups())
return;
CPUGroupInfo::SetThreadGroupAffinity(GetThreadHandle(), &groupAffinity, NULL);
m_wCPUGroup = groupAffinity.Group;
m_pAffinityMask = groupAffinity.Mask;
+#endif // !FEATURE_PAL
}
void Thread::ClearThreadCPUGroupAffinity()
GC_NOTRIGGER;
}
CONTRACTL_END;
-
+#ifndef FEATURE_PAL
if (!CPUGroupInfo::CanEnableGCCPUGroups() || !CPUGroupInfo::CanEnableThreadUseAllCpuGroups())
return;
m_wCPUGroup = 0;
m_pAffinityMask = 0;
+#endif // !FEATURE_PAL
}
DWORD Thread::StartThread()
m_fGCSpecial = FALSE;
+#ifndef FEATURE_PAL
m_wCPUGroup = 0;
m_pAffinityMask = 0;
+#endif // !FEATURE_PAL
m_pAllLoggedTypes = NULL;
void SetGCSpecial(bool fGCSpecial);
private:
+#ifndef FEATURE_PAL
WORD m_wCPUGroup;
DWORD_PTR m_pAffinityMask;
-
+#endif // !FEATURE_PAL
public:
void ChooseThreadCPUGroupAffinity();
void ClearThreadCPUGroupAffinity();
UnManagedPerAppDomainTPCount* pADTPCount;
pADTPCount = PerAppDomainTPCountList::GetUnmanagedTPCount();
+#ifndef FEATURE_PAL
//ThreadPool_CPUGroup
CPUGroupInfo::EnsureInitialized();
if (CPUGroupInfo::CanEnableGCCPUGroups() && CPUGroupInfo::CanEnableThreadUseAllCpuGroups())
NumberOfProcessors = CPUGroupInfo::GetNumActiveProcessors();
else
NumberOfProcessors = GetCurrentProcessCpuCount();
+#else // !FEATURE_PAL
+ NumberOfProcessors = GetCurrentProcessCpuCount();
+#endif // !FEATURE_PAL
InitPlatformVariables();
EX_TRY
RetiredWorkerSemaphore = new CLRLifoSemaphore();
RetiredWorkerSemaphore->Create(0, ThreadCounter::MaxPossibleCount);
+#ifndef FEATURE_PAL
//ThreadPool_CPUGroup
if (CPUGroupInfo::CanEnableGCCPUGroups() && CPUGroupInfo::CanEnableThreadUseAllCpuGroups())
RecycledLists.Initialize( CPUGroupInfo::GetNumActiveProcessors() );
else
RecycledLists.Initialize( g_SystemInfo.dwNumberOfProcessors );
- /*
- {
- SYSTEM_INFO sysInfo;
-
- ::GetSystemInfo( &sysInfo );
-
- RecycledLists.Initialize( sysInfo.dwNumberOfProcessors );
- }
- */
+#else // !FEATURE_PAL
+ RecycledLists.Initialize( g_SystemInfo.dwNumberOfProcessors );
+#endif // !FEATURE_PAL
}
EX_CATCH
{
return 0;
}
+#ifndef FEATURE_PAL
//GateThread can start before EESetup, so ensure CPU group information is initialized;
CPUGroupInfo::EnsureInitialized();
-
+#endif // !FEATURE_PAL
// initialize CPU usage information structure;
prevCPUInfo.idleTime.QuadPart = 0;
prevCPUInfo.kernelTime.QuadPart = 0;
{
LIMITED_METHOD_CONTRACT;
+ DWORD processorNumber = 0;
+
+#ifndef FEATURE_PAL
if (CPUGroupInfo::CanEnableGCCPUGroups() && CPUGroupInfo::CanEnableThreadUseAllCpuGroups())
- return pRecycledListPerProcessor[CPUGroupInfo::CalculateCurrentProcessorNumber()][memType];
+ processorNumber = CPUGroupInfo::CalculateCurrentProcessorNumber();
else
// Turns out GetCurrentProcessorNumber can return a value greater than the number of processors reported by
// GetSystemInfo, if we're running in WOW64 on a machine with >32 processors.
- return pRecycledListPerProcessor[GetCurrentProcessorNumber()%NumberOfProcessors][memType];
+ processorNumber = GetCurrentProcessorNumber()%NumberOfProcessors;
+#else // !FEATURE_PAL
+ if (PAL_HasGetCurrentProcessorNumber())
+ {
+ processorNumber = GetCurrentProcessorNumber();
+ }
+#endif // !FEATURE_PAL
+ return pRecycledListPerProcessor[processorNumber][memType];
}
};