#define E_FAIL 0x80004005
#define E_OUTOFMEMORY 0x8007000E
#define COR_E_EXECUTIONENGINE 0x80131506
+#define CLR_E_GC_BAD_AFFINITY_CONFIG 0x8013200A
#define NOERROR 0x0
#define ERROR_TIMEOUT 1460
#define YieldProcessor System_YieldProcessor
#endif
-#define NUMA_NODE_UNDEFINED UINT32_MAX
+#define NUMA_NODE_UNDEFINED UINT16_MAX
// Critical section used by the GC
class CLRCriticalSection
};
};
-// Affinity of a GC thread
-struct GCThreadAffinity
-{
- static const int None = -1;
-
- // Processor group index, None if no group is specified
- int Group;
- // Processor index, None if no affinity is specified
- int Processor;
-};
-
// An event is a synchronization object whose state can be set and reset
// indicating that an event has occured. It is used pervasively throughout
// the GC.
// GC thread function prototype
typedef void (*GCThreadFunction)(void* param);
+#ifdef BIT64
+// Right now we support maximum 1024 procs - meaning that we will create at most
+// that many GC threads and GC heaps.
+#define MAX_SUPPORTED_CPUS 1024
+#else
+#define MAX_SUPPORTED_CPUS 64
+#endif // BIT64
+
+// Add of processor indices used to store affinity.
+class AffinitySet
+{
+ static const size_t BitsPerBitsetEntry = 8 * sizeof(uintptr_t);
+
+ uintptr_t m_bitset[MAX_SUPPORTED_CPUS / BitsPerBitsetEntry];
+
+ uintptr_t GetBitsetEntryMask(size_t cpuIndex)
+ {
+ return (uintptr_t)1 << (cpuIndex & (BitsPerBitsetEntry - 1));
+ }
+
+ size_t GetBitsetEntryIndex(size_t cpuIndex)
+ {
+ return cpuIndex / BitsPerBitsetEntry;
+ }
+
+public:
+
+ AffinitySet()
+ {
+ memset(m_bitset, 0, sizeof(m_bitset));
+ }
+
+ // Check if the set contains a processor
+ bool Contains(size_t cpuIndex)
+ {
+ return (m_bitset[GetBitsetEntryIndex(cpuIndex)] & GetBitsetEntryMask(cpuIndex)) != 0;
+ }
+
+ // Add a processor to the set
+ void Add(size_t cpuIndex)
+ {
+ m_bitset[GetBitsetEntryIndex(cpuIndex)] |= GetBitsetEntryMask(cpuIndex);
+ }
+
+ // Remove a processor from the set
+ void Remove(size_t cpuIndex)
+ {
+ m_bitset[GetBitsetEntryIndex(cpuIndex)] &= ~GetBitsetEntryMask(cpuIndex);
+ }
+
+ // Check if the set is empty
+ bool IsEmpty()
+ {
+ for (size_t i = 0; i < MAX_SUPPORTED_CPUS / BitsPerBitsetEntry; i++)
+ {
+ if (m_bitset[i] != 0)
+ {
+ return false;
+ }
+ }
+
+ return true;
+ }
+
+ // Return number of processors in the affinity set
+ size_t Count()
+ {
+ size_t count = 0;
+ for (size_t i = 0; i < MAX_SUPPORTED_CPUS; i++)
+ {
+ if (Contains(i))
+ {
+ count++;
+ }
+ }
+
+ return count;
+ }
+};
+
// Interface that the GC uses to invoke OS specific functionality
class GCToOSInterface
{
// size - size of the virtual memory range
// Return:
// true if it has succeeded, false if it has failed
- static bool VirtualCommit(void *address, size_t size, uint32_t node = NUMA_NODE_UNDEFINED);
+ static bool VirtualCommit(void *address, size_t size, uint16_t node = NUMA_NODE_UNDEFINED);
// Decomit virtual memory range.
// Parameters:
// Check if the OS supports getting current processor number
static bool CanGetCurrentProcessorNumber();
- // Set ideal processor for the current thread
+ // Add ideal processor for the current thread
// Parameters:
- // processorIndex - index of the processor in the group
- // affinity - ideal processor affinity for the thread
+ // srcProcNo - processor number the thread currently runs on
+ // dstProcNo - processor number the thread should be migrated to
// Return:
// true if it has succeeded, false if it has failed
- static bool SetCurrentThreadIdealAffinity(GCThreadAffinity* affinity);
+ static bool MigrateThread(uint16_t srcProcNo, uint16_t dstProcNo);
// Get numeric id of the current thread if possible on the
// current platform. It is indended for logging purposes only.
// The number of processors
static uint32_t GetCurrentProcessCpuCount();
- // Sets the calling thread's affinity to only run on the processor specified
- // in the GCThreadAffinity structure.
+ // Sets the calling thread's affinity to only run on the processor specified.
// Parameters:
- // affinity - The requested affinity for the calling thread. At most one processor
- // can be provided.
+ // procNo - The requested affinity for the calling thread.
+ //
// Return:
// true if setting the affinity was successful, false otherwise.
- static bool SetThreadAffinity(GCThreadAffinity* affinity);
+ static bool SetThreadAffinity(uint16_t procNo);
// Boosts the calling thread's thread priority to a level higher than the default
// for new threads.
// true if the priority boost was successful, false otherwise.
static bool BoostThreadPriority();
- // Get affinity mask of the current process
- // Parameters:
- // processMask - affinity mask for the specified process
- // systemMask - affinity mask for the system
+ // Get set of processors enabled for GC for the current process
// Return:
- // true if it has succeeded, false if it has failed
- // Remarks:
- // A process affinity mask is a bit vector in which each bit represents the processors that
- // a process is allowed to run on. A system affinity mask is a bit vector in which each bit
- // represents the processors that are configured into a system.
- // A process affinity mask is a subset of the system affinity mask. A process is only allowed
- // to run on the processors configured into a system. Therefore, the process affinity mask cannot
- // specify a 1 bit for a processor when the system affinity mask specifies a 0 bit for that processor.
- static bool GetCurrentProcessAffinityMask(uintptr_t *processMask, uintptr_t *systemMask);
+ // set of enabled processors
+ static AffinitySet* GetCurrentProcessAffinitySet();
//
// Global memory info
static bool CanEnableGCNumaAware();
// Gets the NUMA node for the processor
- static bool GetNumaProcessorNode(PPROCESSOR_NUMBER proc_no, uint16_t *node_no);
-
- // Are CPU groups enabled
- static bool CanEnableGCCPUGroups();
+ static bool GetNumaProcessorNode(uint16_t proc_no, uint16_t *node_no);
- // Get the CPU group for the specified processor
- static void GetGroupForProcessor(uint16_t processor_number, uint16_t* group_number, uint16_t* group_processor_number);
+ // Get processor number and optionally its NUMA node number for the specified heap number
+ // Parameters:
+ // heap_number - heap number to get the result for
+ // proc_no - set to the selected processor number
+ // node_no - set to the NUMA node of the selected processor or to NUMA_NODE_UNDEFINED
+ // Return:
+ // true if it succeeded
+ static bool GetProcessorForHeap(uint16_t heap_number, uint16_t* proc_no, uint16_t* node_no);
};
#define LOH_PIN_QUEUE_LENGTH 100
#define LOH_PIN_DECAY 10
-#ifdef BIT64
-// Right now we support maximum 1024 procs - meaning that we will create at most
-// that many GC threads and GC heaps.
-#define MAX_SUPPORTED_CPUS 1024
-#else
-#define MAX_SUPPORTED_CPUS 64
-#endif // BIT64
-
uint32_t yp_spin_count_unit = 0;
size_t loh_size_threshold = LARGE_OBJECT_SIZE;
static uint16_t proc_no_to_heap_no[MAX_SUPPORTED_CPUS];
static uint16_t heap_no_to_proc_no[MAX_SUPPORTED_CPUS];
static uint16_t heap_no_to_numa_node[MAX_SUPPORTED_CPUS];
- static uint16_t heap_no_to_cpu_group[MAX_SUPPORTED_CPUS];
- static uint16_t heap_no_to_group_proc[MAX_SUPPORTED_CPUS];
static uint16_t numa_node_to_heap_map[MAX_SUPPORTED_CPUS+4];
static int access_time(uint8_t *sniff_buffer, int heap_number, unsigned sniff_index, unsigned n_sniff_buffers)
heap_no_to_numa_node[heap_number] = numa_node;
}
- static uint16_t find_cpu_group_from_heap_no(int heap_number)
- {
- return heap_no_to_cpu_group[heap_number];
- }
-
- static void set_cpu_group_for_heap(int heap_number, uint16_t group_number)
- {
- heap_no_to_cpu_group[heap_number] = group_number;
- }
-
- static uint16_t find_group_proc_from_heap_no(int heap_number)
- {
- return heap_no_to_group_proc[heap_number];
- }
-
- static void set_group_proc_for_heap(int heap_number, uint16_t group_proc)
- {
- heap_no_to_group_proc[heap_number] = group_proc;
- }
-
static void init_numa_node_to_heap_map(int nheaps)
{ // called right after GCHeap::Init() for each heap is finished
// when numa is not enabled, heap_no_to_numa_node[] are all filled
uint16_t heap_select::proc_no_to_heap_no[MAX_SUPPORTED_CPUS];
uint16_t heap_select::heap_no_to_proc_no[MAX_SUPPORTED_CPUS];
uint16_t heap_select::heap_no_to_numa_node[MAX_SUPPORTED_CPUS];
-uint16_t heap_select::heap_no_to_cpu_group[MAX_SUPPORTED_CPUS];
-uint16_t heap_select::heap_no_to_group_proc[MAX_SUPPORTED_CPUS];
uint16_t heap_select::numa_node_to_heap_map[MAX_SUPPORTED_CPUS+4];
BOOL gc_heap::create_thread_support (unsigned number_of_heaps)
}
}
-void set_thread_group_affinity_for_heap(int heap_number, GCThreadAffinity* affinity)
+void set_thread_affinity_for_heap(int heap_number)
{
- affinity->Group = GCThreadAffinity::None;
- affinity->Processor = GCThreadAffinity::None;
+ uint16_t proc_no;
+ uint16_t node_no;
- uint16_t gn, gpn;
- GCToOSInterface::GetGroupForProcessor((uint16_t)heap_number, &gn, &gpn);
-
- int bit_number = 0;
- for (uintptr_t mask = 1; mask !=0; mask <<=1)
+ if (GCToOSInterface::GetProcessorForHeap(heap_number, &proc_no, &node_no))
{
- if (bit_number == gpn)
+ heap_select::set_proc_no_for_heap(heap_number, proc_no);
+ if (node_no != NUMA_NODE_UNDEFINED)
{
- dprintf(3, ("using processor group %d, mask %Ix for heap %d\n", gn, mask, heap_number));
- affinity->Processor = gpn;
- affinity->Group = gn;
- heap_select::set_cpu_group_for_heap(heap_number, gn);
- heap_select::set_group_proc_for_heap(heap_number, gpn);
- if (GCToOSInterface::CanEnableGCNumaAware())
- {
- PROCESSOR_NUMBER proc_no;
- proc_no.Group = gn;
- proc_no.Number = (uint8_t)gpn;
- proc_no.Reserved = 0;
-
- uint16_t node_no = 0;
- if (GCToOSInterface::GetNumaProcessorNode(&proc_no, &node_no))
- heap_select::set_numa_node_for_heap(heap_number, node_no);
- }
- else
- { // no numa setting, each cpu group is treated as a node
- heap_select::set_numa_node_for_heap(heap_number, gn);
- }
- return;
+ heap_select::set_numa_node_for_heap(heap_number, node_no);
}
- bit_number++;
- }
-}
-
-void set_thread_affinity_mask_for_heap(int heap_number, GCThreadAffinity* affinity)
-{
- affinity->Group = GCThreadAffinity::None;
- affinity->Processor = GCThreadAffinity::None;
-
- uintptr_t pmask = process_mask;
- int bit_number = 0;
- uint8_t proc_number = 0;
- for (uintptr_t mask = 1; mask != 0; mask <<= 1)
- {
- if ((mask & pmask) != 0)
+ if (!GCToOSInterface::SetThreadAffinity(proc_no))
{
- if (bit_number == heap_number)
- {
- dprintf (3, ("Using processor %d for heap %d", proc_number, heap_number));
- affinity->Processor = proc_number;
- heap_select::set_proc_no_for_heap(heap_number, proc_number);
- if (GCToOSInterface::CanEnableGCNumaAware())
- {
- uint16_t node_no = 0;
- PROCESSOR_NUMBER proc_no;
- proc_no.Group = 0;
- proc_no.Number = (uint8_t)proc_number;
- proc_no.Reserved = 0;
- if (GCToOSInterface::GetNumaProcessorNode(&proc_no, &node_no))
- {
- heap_select::set_numa_node_for_heap(heap_number, node_no);
- }
- }
- return;
- }
- bit_number++;
+ dprintf(1, ("Failed to set thread affinity for server GC thread"));
}
- proc_number++;
}
}
{
if (GCToOSInterface::CanEnableGCNumaAware())
{
- uint32_t numa_node = heap_select::find_numa_node_from_heap_no(h_number);
+ uint16_t numa_node = heap_select::find_numa_node_from_heap_no(h_number);
if (GCToOSInterface::VirtualCommit(addr, size, numa_node))
return true;
}
acontext->set_alloc_heap(GCHeap::GetHeap(max_hp->heap_number));
if (!gc_thread_no_affinitize_p)
{
- if (GCToOSInterface::CanEnableGCCPUGroups())
- { //only set ideal processor when max_hp and org_hp are in the same cpu
- //group. DO NOT MOVE THREADS ACROSS CPU GROUPS
- uint16_t org_gn = heap_select::find_cpu_group_from_heap_no(org_hp->heap_number);
- uint16_t max_gn = heap_select::find_cpu_group_from_heap_no(max_hp->heap_number);
- if (org_gn == max_gn) //only set within CPU group, so SetThreadIdealProcessor is enough
- {
- uint16_t group_proc_no = heap_select::find_group_proc_from_heap_no(max_hp->heap_number);
-
- GCThreadAffinity affinity;
- affinity.Processor = group_proc_no;
- affinity.Group = org_gn;
- if (!GCToOSInterface::SetCurrentThreadIdealAffinity(&affinity))
- {
- dprintf (3, ("Failed to set the ideal processor and group for heap %d.",
- org_hp->heap_number));
- }
- }
- }
- else
- {
- uint16_t proc_no = heap_select::find_proc_no_from_heap_no(max_hp->heap_number);
-
- GCThreadAffinity affinity;
- affinity.Processor = proc_no;
- affinity.Group = GCThreadAffinity::None;
+ uint16_t src_proc_no = heap_select::find_proc_no_from_heap_no(org_hp->heap_number);
+ uint16_t dst_proc_no = heap_select::find_proc_no_from_heap_no(max_hp->heap_number);
- if (!GCToOSInterface::SetCurrentThreadIdealAffinity(&affinity))
- {
- dprintf (3, ("Failed to set the ideal processor for heap %d.",
- org_hp->heap_number));
- }
+ if (!GCToOSInterface::MigrateThread(src_proc_no, dst_proc_no))
+ {
+ dprintf (3, ("Failed to set the ideal processor for heap %d.",
+ org_hp->heap_number));
}
}
dprintf (3, ("Switching context %p (home heap %d) ",
acontext,
- acontext->get_home_heap()->pGenGCHeap->heap_number));
+ acontext->get_home_heap()->pGenGCHeap->heap_number));
dprintf (3, (" from heap %d (%Id free bytes, %d contexts) ",
org_hp->heap_number,
org_size,
gc_heap* heap = (gc_heap*)arg;
if (!gc_thread_no_affinitize_p)
{
- GCThreadAffinity affinity;
- affinity.Group = GCThreadAffinity::None;
- affinity.Processor = GCThreadAffinity::None;
-
// We are about to set affinity for GC threads. It is a good place to set up NUMA and
// CPU groups because the process mask, processor number, and group number are all
// readily available.
- if (GCToOSInterface::CanEnableGCCPUGroups())
- set_thread_group_affinity_for_heap(heap->heap_number, &affinity);
- else
- set_thread_affinity_mask_for_heap(heap->heap_number, &affinity);
-
- if (!GCToOSInterface::SetThreadAffinity(&affinity))
- {
- dprintf(1, ("Failed to set thread affinity for server GC thread"));
- }
+ set_thread_affinity_for_heap(heap->heap_number);
}
// server GC threads run at a higher priority than normal.
uint32_t nhp_from_config = 0;
#ifdef MULTIPLE_HEAPS
+ AffinitySet config_affinity_set;
+
+ // Get the affinity set configured by the user
+ uintptr_t heap_affinity_mask = GCConfig::GetGCHeapAffinitizeMask();
+ if (heap_affinity_mask != 0)
+ {
+ for (size_t i = 0; i < 8 * sizeof(uintptr_t); i++)
+ {
+ if (heap_affinity_mask & ((uintptr_t)1 << i))
+ {
+ config_affinity_set.Add(i);
+ }
+ }
+ }
+ else
+ {
+ GCConfigStringHolder cpu_index_ranges_holder(GCConfig::GetGCHeapAffinitizeRanges());
+ const char* cpu_index_ranges = cpu_index_ranges_holder.Get();
+
+ // The cpu index ranges is a comma separated list of indices or ranges of indices (e.g. 1-5).
+ // Example 1,3,5,7-9,12
+
+ if (cpu_index_ranges != NULL)
+ {
+ char* number_end;
+
+ do
+ {
+ size_t start_index = strtoul(cpu_index_ranges, &number_end, 10);
+
+ if (number_end == cpu_index_ranges)
+ {
+ // No number found, invalid format
+ break;
+ }
+
+ size_t end_index = start_index;
+
+ if (*number_end == '-')
+ {
+ char* range_end_start = number_end + 1;
+ end_index = strtoul(range_end_start, &number_end, 10);
+ if (number_end == range_end_start)
+ {
+ // No number found, invalid format
+ break;
+ }
+ }
+
+ if ((start_index < MAX_SUPPORTED_CPUS) && end_index < (MAX_SUPPORTED_CPUS))
+ {
+ for (size_t i = start_index; i <= end_index; i++)
+ {
+ config_affinity_set.Add(i);
+ }
+ }
+
+ cpu_index_ranges = number_end + 1;
+ }
+ while (*number_end == ',');
+ }
+ }
+
+ AffinitySet* process_affinity_set = GCToOSInterface::GetCurrentProcessAffinitySet();
+
+ if (!config_affinity_set.IsEmpty())
+ {
+ // Update the process affinity set using the configured set
+ for (size_t i = 0; i < MAX_SUPPORTED_CPUS; i++)
+ {
+ if (process_affinity_set->Contains(i) && !config_affinity_set.Contains(i))
+ {
+ process_affinity_set->Remove(i);
+ }
+ }
+ }
+
+ if (process_affinity_set->IsEmpty())
+ {
+ return CLR_E_GC_BAD_AFFINITY_CONFIG;
+ }
+
nhp_from_config = static_cast<uint32_t>(GCConfig::GetHeapCount());
- // GetCurrentProcessCpuCount only returns up to 64 procs.
- uint32_t nhp_from_process = GCToOSInterface::CanEnableGCCPUGroups() ?
- GCToOSInterface::GetTotalProcessorCount():
- GCToOSInterface::GetCurrentProcessCpuCount();
+ uint32_t nhp_from_process = GCToOSInterface::GetCurrentProcessCpuCount();
if (nhp_from_config)
{
#ifndef FEATURE_REDHAWK
gc_heap::gc_thread_no_affinitize_p = (gc_heap::heap_hard_limit ? false : (GCConfig::GetNoAffinitize() != 0));
- size_t gc_thread_affinity_mask = static_cast<size_t>(GCConfig::GetGCHeapAffinitizeMask());
-
if (gc_heap::heap_hard_limit)
{
- gc_heap::gc_thread_no_affinitize_p = (gc_thread_affinity_mask == 0);
+ gc_heap::gc_thread_no_affinitize_p = (config_affinity_set.Count() == 0);
}
if (!(gc_heap::gc_thread_no_affinitize_p))
{
- if (!(GCToOSInterface::CanEnableGCCPUGroups()))
- {
- uintptr_t pmask, smask;
- if (GCToOSInterface::GetCurrentProcessAffinityMask(&pmask, &smask))
- {
- pmask &= smask;
-
-#ifdef FEATURE_PAL
- // GetCurrentProcessAffinityMask can return pmask=0 and smask=0 on
- // systems with more than 1 NUMA node. The pmask decides the
- // number of GC heaps to be used and the processors they are
- // affinitized with. So pmask is now set to reflect that 64
- // processors are available to begin with. The actual processors in
- // the system may be lower and are taken into account before
- // finalizing the number of heaps.
- if (!pmask)
- {
- pmask = SIZE_T_MAX;
- }
-#endif // FEATURE_PAL
-
- if (gc_thread_affinity_mask)
- {
- pmask &= gc_thread_affinity_mask;
- }
-
- process_mask = pmask;
-
- unsigned int set_bits_in_pmask = 0;
- while (pmask)
- {
- if (pmask & 1)
- set_bits_in_pmask++;
- pmask >>= 1;
- }
-
- nhp = min (nhp, set_bits_in_pmask);
+ uint32_t num_affinitized_processors = (uint32_t)process_affinity_set->Count();
+ if (num_affinitized_processors != 0)
+ {
+ nhp = min(nhp, num_affinitized_processors);
+ }
#ifdef FEATURE_PAL
- // Limit the GC heaps to the number of processors available in the system.
- nhp = min (nhp, GCToOSInterface::GetTotalProcessorCount());
+ // Limit the GC heaps to the number of processors available in the system.
+ nhp = min (nhp, GCToOSInterface::GetTotalProcessorCount());
#endif // FEATURE_PAL
- }
- else
- {
- gc_heap::gc_thread_no_affinitize_p = true;
- }
- }
}
#endif //!FEATURE_REDHAWK
#endif //MULTIPLE_HEAPS
"Specifies the ratio compacting GCs vs sweeping") \
INT_CONFIG(GCHeapAffinitizeMask, "GCHeapAffinitizeMask", 0, \
"Specifies processor mask for Server GC threads") \
+ STRING_CONFIG(GCHeapAffinitizeRanges, "GCHeapAffinitizeRanges", \
+ "Specifies list of processors for Server GC threads") \
INT_CONFIG(GCHighMemPercent, "GCHighMemPercent", 0, \
"The percent for GC to consider as high memory") \
INT_CONFIG(GCProvModeStress, "GCProvModeStress", 0, \
#cmakedefine01 HAVE_PTHREAD_CONDATTR_SETCLOCK
#cmakedefine01 HAVE_MACH_ABSOLUTE_TIME
#cmakedefine01 HAVE_SCHED_GETAFFINITY
+#cmakedefine01 HAVE_PTHREAD_GETAFFINITY_NP
#endif // __CONFIG_H__
}
" HAVE_MACH_ABSOLUTE_TIME)
+
check_library_exists(c sched_getaffinity "" HAVE_SCHED_GETAFFINITY)
+check_library_exists(pthread pthread_create "" HAVE_LIBPTHREAD)
+
+if (HAVE_LIBPTHREAD)
+ set(PTHREAD_LIBRARY pthread)
+elseif (HAVE_PTHREAD_IN_LIBC)
+ set(PTHREAD_LIBRARY c)
+endif()
+
+check_library_exists(${PTHREAD_LIBRARY} pthread_getaffinity_np "" HAVE_PTHREAD_GETAFFINITY_NP)
configure_file(${CMAKE_CURRENT_LIST_DIR}/config.h.in ${CMAKE_CURRENT_BINARY_DIR}/config.h)
#define SYSCONF_GET_NUMPROCS _SC_NPROCESSORS_ONLN
#endif
-// The cachced number of logical CPUs observed.
+// The cached number of logical CPUs observed.
static uint32_t g_logicalCpuCount = 0;
+// The cached number of CPUs available for the current process.
+static uint32_t g_currentProcessCpuCount = 0;
+
// Helper memory page used by the FlushProcessWriteBuffers
static uint8_t* g_helperPage = 0;
uint32_t g_pageSizeUnixInl = 0;
+AffinitySet g_processAffinitySet;
+
// Initialize the interface implementation
// Return:
// true if it has succeeded, false if it has failed
InitializeCGroup();
+#if HAVE_SCHED_GETAFFINITY
+
+ g_currentProcessCpuCount = 0;
+
+ cpu_set_t cpuSet;
+ int st = sched_getaffinity(0, sizeof(cpu_set_t), &cpuSet);
+
+ if (st == 0)
+ {
+ for (size_t i = 0; i < g_logicalCpuCount; i++)
+ {
+ if (CPU_ISSET(i, &cpuSet))
+ {
+ g_currentProcessCpuCount++;
+ g_processAffinitySet.Add(i);
+ }
+ }
+ }
+ else
+ {
+ // We should not get any of the errors that the sched_getaffinity can return since none
+ // of them applies for the current thread, so this is an unexpected kind of failure.
+ assert(false);
+ }
+
+#else // HAVE_SCHED_GETAFFINITY
+
+ g_currentProcessCpuCount = g_logicalCpuCount;
+
+ for (size_t i = 0; i < g_logicalCpuCount; i++)
+ {
+ g_processAffinitySet.Add(i);
+ }
+
+#endif // HAVE_SCHED_GETAFFINITY
+
return true;
}
return getpid();
}
-// Set ideal affinity for the current thread
+// Set ideal processor for the current thread
// Parameters:
-// affinity - ideal processor affinity for the thread
+// srcProcNo - processor number the thread currently runs on
+// dstProcNo - processor number the thread should be migrated to
// Return:
// true if it has succeeded, false if it has failed
-bool GCToOSInterface::SetCurrentThreadIdealAffinity(GCThreadAffinity* affinity)
+bool GCToOSInterface::MigrateThread(uint16_t srcProcNo, uint16_t dstProcNo)
{
- // TODO(segilles)
- return false;
+ return GCToOSInterface::SetThreadAffinity(dstProcNo);
}
// Get the number of the current processor
// size - size of the virtual memory range
// Return:
// true if it has succeeded, false if it has failed
-bool GCToOSInterface::VirtualCommit(void* address, size_t size, uint32_t node)
+bool GCToOSInterface::VirtualCommit(void* address, size_t size, uint16_t node)
{
assert(node == NUMA_NODE_UNDEFINED && "Numa allocation is not ported to local GC on unix yet");
return mprotect(address, size, PROT_WRITE | PROT_READ) == 0;
}
// Sets the calling thread's affinity to only run on the processor specified
-// in the GCThreadAffinity structure.
// Parameters:
-// affinity - The requested affinity for the calling thread. At most one processor
-// can be provided.
+// procNo - The requested processor for the calling thread.
// Return:
// true if setting the affinity was successful, false otherwise.
-bool GCToOSInterface::SetThreadAffinity(GCThreadAffinity* affinity)
+bool GCToOSInterface::SetThreadAffinity(uint16_t procNo)
{
- // [LOCALGC TODO] Thread affinity for unix
+#if HAVE_PTHREAD_GETAFFINITY_NP
+ cpu_set_t cpuSet;
+ CPU_ZERO(&cpuSet);
+ CPU_SET((int)procNo, &cpuSet);
+
+ int st = pthread_setaffinity_np(pthread_self(), sizeof(cpu_set_t), &cpuSet);
+
+ return (st == 0);
+
+#else // HAVE_PTHREAD_GETAFFINITY_NP
+ // There is no API to manage thread affinity, so let's ignore the request
return false;
+#endif // HAVE_PTHREAD_GETAFFINITY_NP
}
// Boosts the calling thread's thread priority to a level higher than the default
return false;
}
-/*++
-Function:
- GetFullAffinityMask
-
-Get affinity mask for the specified number of processors with all
-the processors enabled.
---*/
-static uintptr_t GetFullAffinityMask(int cpuCount)
-{
- if ((size_t)cpuCount < sizeof(uintptr_t) * 8)
- {
- return ((uintptr_t)1 << cpuCount) - 1;
- }
-
- return ~(uintptr_t)0;
-}
-
-// Get affinity mask of the current process
-// Parameters:
-// processMask - affinity mask for the specified process
-// systemMask - affinity mask for the system
+// Get set of processors enabled for GC for the current process
// Return:
-// true if it has succeeded, false if it has failed
-// Remarks:
-// A process affinity mask is a bit vector in which each bit represents the processors that
-// a process is allowed to run on. A system affinity mask is a bit vector in which each bit
-// represents the processors that are configured into a system.
-// A process affinity mask is a subset of the system affinity mask. A process is only allowed
-// to run on the processors configured into a system. Therefore, the process affinity mask cannot
-// specify a 1 bit for a processor when the system affinity mask specifies a 0 bit for that processor.
-bool GCToOSInterface::GetCurrentProcessAffinityMask(uintptr_t* processAffinityMask, uintptr_t* systemAffinityMask)
+// set of enabled processors
+AffinitySet* GCToOSInterface::GetCurrentProcessAffinitySet()
{
- unsigned int cpuCountInMask = (g_logicalCpuCount > 64) ? 64 : g_logicalCpuCount;
-
- uintptr_t systemMask = GetFullAffinityMask(cpuCountInMask);
-
-#if HAVE_SCHED_GETAFFINITY
-
- int pid = getpid();
- cpu_set_t cpuSet;
- int st = sched_getaffinity(pid, sizeof(cpu_set_t), &cpuSet);
- if (st == 0)
- {
- uintptr_t processMask = 0;
-
- for (unsigned int i = 0; i < cpuCountInMask; i++)
- {
- if (CPU_ISSET(i, &cpuSet))
- {
- processMask |= ((uintptr_t)1) << i;
- }
- }
-
- *processAffinityMask = processMask;
- *systemAffinityMask = systemMask;
- return true;
- }
- else if (errno == EINVAL)
- {
- // There are more processors than can fit in a cpu_set_t
- // return all bits set for all processors (upto 64) for both masks
- *processAffinityMask = systemMask;
- *systemAffinityMask = systemMask;
- return true;
- }
- else
- {
- // We should not get any of the errors that the sched_getaffinity can return since none
- // of them applies for the current thread, so this is an unexpected kind of failure.
- return false;
- }
-
-#else // HAVE_SCHED_GETAFFINITY
-
- // There is no API to manage thread affinity, so let's return both affinity masks
- // with all the CPUs on the system set.
- *systemAffinityMask = systemMask;
- *processAffinityMask = systemMask;
- return true;
-
-#endif // HAVE_SCHED_GETAFFINITY
+ return &g_processAffinitySet;
}
// Get number of processors assigned to the current process
// The number of processors
uint32_t GCToOSInterface::GetCurrentProcessCpuCount()
{
- uintptr_t pmask, smask;
- uint32_t cpuLimit;
-
- if (!GetCurrentProcessAffinityMask(&pmask, &smask))
- return 1;
-
- pmask &= smask;
-
- unsigned int count = 0;
- while (pmask)
- {
- pmask &= (pmask - 1);
- count++;
- }
-
- // GetProcessAffinityMask can return pmask=0 and smask=0 on systems with more
- // than 64 processors, which would leave us with a count of 0. Since the GC
- // expects there to be at least one processor to run on (and thus at least one
- // heap), we'll return 64 here if count is 0, since there are likely a ton of
- // processors available in that case. The GC also cannot (currently) handle
- // the case where there are more than 64 processors, so we will return a
- // maximum of 64 here.
- if (count == 0 || count > 64)
- count = 64;
-
- if (GetCpuLimit(&cpuLimit) && cpuLimit < count)
- count = cpuLimit;
-
- return count;
+ return g_currentProcessCpuCount;
}
// Return the size of the user-mode portion of the virtual address space of this process.
return false;
}
-bool GCToOSInterface::GetNumaProcessorNode(PPROCESSOR_NUMBER proc_no, uint16_t *node_no)
+bool GCToOSInterface::GetNumaProcessorNode(uint16_t proc_no, uint16_t *node_no)
{
assert(!"Numa has not been ported to local GC for unix");
return false;
}
-bool GCToOSInterface::CanEnableGCCPUGroups()
+// Get processor number and optionally its NUMA node number for the specified heap number
+// Parameters:
+// heap_number - heap number to get the result for
+// proc_no - set to the selected processor number
+// node_no - set to the NUMA node of the selected processor or to NUMA_NODE_UNDEFINED
+// Return:
+// true if it succeeded
+bool GCToOSInterface::GetProcessorForHeap(uint16_t heap_number, uint16_t* proc_no, uint16_t* node_no)
{
- return false;
-}
+ bool success = false;
-void GCToOSInterface::GetGroupForProcessor(uint16_t processor_number, uint16_t* group_number, uint16_t* group_processor_number)
-{
- assert(!"CpuGroup has not been ported to local GC for unix");
+ uint16_t availableProcNumber = 0;
+ for (size_t procNumber = 0; procNumber < g_logicalCpuCount; procNumber++)
+ {
+ if (g_processAffinitySet.Contains(procNumber))
+ {
+ if (availableProcNumber == heap_number)
+ {
+ *proc_no = procNumber;
+
+ if (GCToOSInterface::CanEnableGCNumaAware())
+ {
+ if (!GCToOSInterface::GetNumaProcessorNode(procNumber, node_no))
+ {
+ *node_no = NUMA_NODE_UNDEFINED;
+ }
+ }
+ else
+ {
+ *node_no = NUMA_NODE_UNDEFINED;
+ }
+
+ success = true;
+ break;
+ }
+ availableProcNumber++;
+ }
+ }
+
+ return success;
}
// Initialize the critical section
// memory on the machine/in the container, we need to restrict by the VM.
static bool g_UseRestrictedVirtualMemory = false;
+static AffinitySet g_processAffinitySet;
+
typedef BOOL (WINAPI *PIS_PROCESS_IN_JOB)(HANDLE processHandle, HANDLE jobHandle, BOOL* result);
typedef BOOL (WINAPI *PQUERY_INFORMATION_JOB_OBJECT)(HANDLE jobHandle, JOBOBJECTINFOCLASS jobObjectInfoClass, void* lpJobObjectInfo, DWORD cbJobObjectInfoLength, LPDWORD lpReturnLength);
static bool g_fEnableGCNumaAware;
+class GroupProcNo
+{
+ uint16_t m_groupProc;
+
+public:
+
+ static const uint16_t NoGroup = 0x3ff;
+
+ GroupProcNo(uint16_t groupProc) : m_groupProc(groupProc)
+ {
+ }
+
+ GroupProcNo(uint16_t group, uint16_t procIndex) : m_groupProc((group << 6) | procIndex)
+ {
+ assert(group <= 0x3ff);
+ assert(procIndex <= 0x3f);
+ }
+
+ uint16_t GetGroup() { return m_groupProc >> 6; }
+ uint16_t GetProcIndex() { return m_groupProc & 0x3f; }
+ uint16_t GetCombinedValue() { return m_groupProc; }
+};
+
struct CPU_Group_Info
{
WORD nr_active; // at most 64
return cache_size;
}
+bool CanEnableGCCPUGroups()
+{
+ return g_fEnableGCCPUGroups;
+}
+
+// Get the CPU group for the specified processor
+void GetGroupForProcessor(uint16_t processor_number, uint16_t* group_number, uint16_t* group_processor_number)
+{
+ assert(g_fEnableGCCPUGroups);
+
+#if !defined(FEATURE_REDHAWK) && (defined(_TARGET_AMD64_) || defined(_TARGET_ARM64_))
+ WORD bTemp = 0;
+ WORD bDiff = processor_number - bTemp;
+
+ for (WORD i=0; i < g_nGroups; i++)
+ {
+ bTemp += g_CPUGroupInfoArray[i].nr_active;
+ if (bTemp > processor_number)
+ {
+ *group_number = i;
+ *group_processor_number = bDiff;
+ break;
+ }
+ bDiff = processor_number - bTemp;
+ }
+#else
+ *group_number = 0;
+ *group_processor_number = 0;
+#endif
+}
+
} // anonymous namespace
// Initialize the interface implementation
InitNumaNodeInfo();
InitCPUGroupInfo();
+ uintptr_t pmask, smask;
+ if (!!::GetProcessAffinityMask(::GetCurrentProcess(), (PDWORD_PTR)&pmask, (PDWORD_PTR)&smask))
+ {
+ pmask &= smask;
+
+ for (size_t i = 0; i < 8 * sizeof(uintptr_t); i++)
+ {
+ if ((pmask & ((uintptr_t)1 << i)) != 0)
+ {
+ g_processAffinitySet.Add(i);
+ }
+ }
+ }
+
return true;
}
return ::GetCurrentThreadId();
}
-// Set ideal affinity for the current thread
+// Set ideal processor for the current thread
// Parameters:
-// affinity - ideal processor affinity for the thread
+// srcProcNo - processor number the thread currently runs on
+// dstProcNo - processor number the thread should be migrated to
// Return:
// true if it has succeeded, false if it has failed
-bool GCToOSInterface::SetCurrentThreadIdealAffinity(GCThreadAffinity* affinity)
+bool GCToOSInterface::MigrateThread(uint16_t srcProcNo, uint16_t dstProcNo)
{
+ LIMITED_METHOD_CONTRACT;
+
bool success = true;
+ GroupProcNo srcGroupProcNo(srcProcNo);
+ GroupProcNo dstGroupProcNo(dstProcNo);
+
+ if (CanEnableGCCPUGroups())
+ {
+ if (srcGroupProcNo.GetGroup() != dstGroupProcNo.GetGroup())
+ {
+ //only set ideal processor when srcProcNo and dstProcNo are in the same cpu
+ //group. DO NOT MOVE THREADS ACROSS CPU GROUPS
+ return true;
+ }
+ }
+
#if !defined(FEATURE_CORESYSTEM)
- SetThreadIdealProcessor(GetCurrentThread(), (DWORD)affinity->Processor);
+ SetThreadIdealProcessor(GetCurrentThread(), (DWORD)dstGroupProcNo.GetProcIndex());
#else
PROCESSOR_NUMBER proc;
- if (affinity->Group != -1)
+ if (dstGroupProcNo.GetGroup() != GroupProcNo::NoGroup)
{
- proc.Group = (WORD)affinity->Group;
- proc.Number = (BYTE)affinity->Processor;
+ proc.Group = (WORD)dstGroupProcNo.GetGroup();
+ proc.Number = (BYTE)dstGroupProcNo.GetProcIndex();
proc.Reserved = 0;
success = !!SetThreadIdealProcessorEx(GetCurrentThread(), &proc, NULL);
{
if (GetThreadIdealProcessorEx(GetCurrentThread(), &proc))
{
- proc.Number = affinity->Processor;
- success = !!SetThreadIdealProcessorEx(GetCurrentThread(), &proc, NULL);
+ proc.Number = (BYTE)dstGroupProcNo.GetProcIndex();
+ success = !!SetThreadIdealProcessorEx(GetCurrentThread(), &proc, &proc);
}
}
#endif
// size - size of the virtual memory range
// Return:
// true if it has succeeded, false if it has failed
-bool GCToOSInterface::VirtualCommit(void* address, size_t size, uint32_t node)
+bool GCToOSInterface::VirtualCommit(void* address, size_t size, uint16_t node)
{
if (node == NUMA_NODE_UNDEFINED)
{
}
// Sets the calling thread's affinity to only run on the processor specified
-// in the GCThreadAffinity structure.
// Parameters:
-// affinity - The requested affinity for the calling thread. At most one processor
-// can be provided.
+// procNo - The requested processor for the calling thread.
// Return:
// true if setting the affinity was successful, false otherwise.
-bool GCToOSInterface::SetThreadAffinity(GCThreadAffinity* affinity)
+bool GCToOSInterface::SetThreadAffinity(uint16_t procNo)
{
- assert(affinity != nullptr);
- if (affinity->Group != GCThreadAffinity::None)
- {
- assert(affinity->Processor != GCThreadAffinity::None);
+ GroupProcNo groupProcNo(procNo);
+ if (groupProcNo.GetGroup() != GroupProcNo::NoGroup)
+ {
GROUP_AFFINITY ga;
- ga.Group = (WORD)affinity->Group;
+ ga.Group = (WORD)groupProcNo.GetGroup();
ga.Reserved[0] = 0; // reserve must be filled with zero
ga.Reserved[1] = 0; // otherwise call may fail
ga.Reserved[2] = 0;
- ga.Mask = (size_t)1 << affinity->Processor;
+ ga.Mask = (size_t)1 << groupProcNo.GetProcIndex();
return !!SetThreadGroupAffinity(GetCurrentThread(), &ga, nullptr);
}
- else if (affinity->Processor != GCThreadAffinity::None)
+ else
{
- return !!SetThreadAffinityMask(GetCurrentThread(), (DWORD_PTR)1 << affinity->Processor);
+ return !!SetThreadAffinityMask(GetCurrentThread(), (DWORD_PTR)1 << groupProcNo.GetProcIndex());
}
-
- // Given affinity must specify at least one processor to use.
- return false;
}
// Boosts the calling thread's thread priority to a level higher than the default
return !!SetThreadPriority(GetCurrentThread(), THREAD_PRIORITY_HIGHEST);
}
-// Get affinity mask of the current process
-// Parameters:
-// processMask - affinity mask for the specified process
-// systemMask - affinity mask for the system
+// Get set of processors enabled for GC for the current process
// Return:
-// true if it has succeeded, false if it has failed
-// Remarks:
-// A process affinity mask is a bit vector in which each bit represents the processors that
-// a process is allowed to run on. A system affinity mask is a bit vector in which each bit
-// represents the processors that are configured into a system.
-// A process affinity mask is a subset of the system affinity mask. A process is only allowed
-// to run on the processors configured into a system. Therefore, the process affinity mask cannot
-// specify a 1 bit for a processor when the system affinity mask specifies a 0 bit for that processor.
-bool GCToOSInterface::GetCurrentProcessAffinityMask(uintptr_t* processMask, uintptr_t* systemMask)
-{
- return !!::GetProcessAffinityMask(::GetCurrentProcess(), (PDWORD_PTR)processMask, (PDWORD_PTR)systemMask);
+// set of enabled processors
+AffinitySet* GCToOSInterface::GetCurrentProcessAffinitySet()
+{
+ return &g_processAffinitySet;
}
// Get number of processors assigned to the current process
if (cCPUs != 0)
return cCPUs;
- int count = 0;
- DWORD_PTR pmask, smask;
+ int count;
- if (!GetProcessAffinityMask(GetCurrentProcess(), &pmask, &smask))
+ if (CanEnableGCCPUGroups())
{
- count = 1;
+ count = GCToOSInterface::GetTotalProcessorCount();
}
else
{
- pmask &= smask;
+ DWORD_PTR pmask, smask;
- while (pmask)
+ if (!GetProcessAffinityMask(GetCurrentProcess(), &pmask, &smask))
{
- pmask &= (pmask - 1);
- count++;
+ count = 1;
}
+ else
+ {
+ count = 0;
+ pmask &= smask;
- // GetProcessAffinityMask can return pmask=0 and smask=0 on systems with more
- // than 64 processors, which would leave us with a count of 0. Since the GC
- // expects there to be at least one processor to run on (and thus at least one
- // heap), we'll return 64 here if count is 0, since there are likely a ton of
- // processors available in that case. The GC also cannot (currently) handle
- // the case where there are more than 64 processors, so we will return a
- // maximum of 64 here.
- if (count == 0 || count > 64)
- count = 64;
+ while (pmask)
+ {
+ pmask &= (pmask - 1);
+ count++;
+ }
+
+ // GetProcessAffinityMask can return pmask=0 and smask=0 on systems with more
+ // than 64 processors, which would leave us with a count of 0. Since the GC
+ // expects there to be at least one processor to run on (and thus at least one
+ // heap), we'll return 64 here if count is 0, since there are likely a ton of
+ // processors available in that case. The GC also cannot (currently) handle
+ // the case where there are more than 64 processors, so we will return a
+ // maximum of 64 here.
+ if (count == 0 || count > 64)
+ count = 64;
+ }
}
cCPUs = count;
return g_fEnableGCNumaAware;
}
-bool GCToOSInterface::GetNumaProcessorNode(PPROCESSOR_NUMBER proc_no, uint16_t *node_no)
+bool GCToOSInterface::GetNumaProcessorNode(uint16_t proc_no, uint16_t *node_no)
{
+ GroupProcNo groupProcNo(proc_no);
+
+ PROCESSOR_NUMBER procNumber;
+ procNumber.Group = groupProcNo.GetGroup();
+ procNumber.Number = (BYTE)groupProcNo.GetProcIndex();
+ procNumber.Reserved = 0;
+
assert(g_fEnableGCNumaAware);
- return ::GetNumaProcessorNodeEx(proc_no, node_no) != FALSE;
+ return ::GetNumaProcessorNodeEx(&procNumber, node_no) != FALSE;
}
-bool GCToOSInterface::CanEnableGCCPUGroups()
+// Get processor number and optionally its NUMA node number for the specified heap number
+// Parameters:
+// heap_number - heap number to get the result for
+// proc_no - set to the selected processor number
+// node_no - set to the NUMA node of the selected processor or to NUMA_NODE_UNDEFINED
+// Return:
+// true if it succeeded
+bool GCToOSInterface::GetProcessorForHeap(uint16_t heap_number, uint16_t* proc_no, uint16_t* node_no)
{
- return g_fEnableGCCPUGroups;
-}
+ bool success = false;
-void GCToOSInterface::GetGroupForProcessor(uint16_t processor_number, uint16_t* group_number, uint16_t* group_processor_number)
-{
- assert(g_fEnableGCCPUGroups);
+ if (CanEnableGCCPUGroups())
+ {
+ uint16_t gn, gpn;
+ GetGroupForProcessor((uint16_t)heap_number, &gn, &gpn);
-#if !defined(FEATURE_REDHAWK) && (defined(_TARGET_AMD64_) || defined(_TARGET_ARM64_))
- WORD bTemp = 0;
- WORD bDiff = processor_number - bTemp;
+// dprintf(3, ("using processor group %d, mask %Ix for heap %d\n", gn, (uintptr_t)1 << gpn), heap_number));
+ *proc_no = GroupProcNo(gn, gpn).GetCombinedValue();
- for (WORD i=0; i < g_nGroups; i++)
+ if (GCToOSInterface::CanEnableGCNumaAware())
+ {
+ if (!GCToOSInterface::GetNumaProcessorNode(*proc_no, node_no))
+ {
+ *node_no = NUMA_NODE_UNDEFINED;
+ }
+ }
+ else
+ { // no numa setting, each cpu group is treated as a node
+ *node_no = gn;
+ }
+
+ success = true;
+ }
+ else
{
- bTemp += g_CPUGroupInfoArray[i].nr_active;
- if (bTemp > processor_number)
+ int bit_number = 0;
+ uint8_t proc_number = 0;
+ for (uintptr_t mask = 1; mask != 0; mask <<= 1)
{
- *group_number = i;
- *group_processor_number = bDiff;
- break;
+ if (g_processAffinitySet.Contains(proc_number))
+ {
+ if (bit_number == heap_number)
+ {
+ //dprintf (3, ("Using processor %d for heap %d", proc_number, heap_number));
+ *proc_no = GroupProcNo(GroupProcNo::NoGroup, proc_number).GetCombinedValue();
+
+ if (GCToOSInterface::CanEnableGCNumaAware())
+ {
+ if (!GCToOSInterface::GetNumaProcessorNode(proc_number, node_no))
+ {
+ *node_no = NUMA_NODE_UNDEFINED;
+ }
+ }
+
+ success = true;
+ break;
+ }
+ bit_number++;
+ }
+ proc_number++;
}
- bDiff = processor_number - bTemp;
}
-#else
- *group_number = 0;
- *group_processor_number = 0;
-#endif
+
+ return success;
}
// Parameters of the GC thread stub
<Comment>During a GC when we try to allocate memory for GC datastructures we could not.</Comment>
</HRESULT>
+<HRESULT NumericValue="0x8013200A">
+ <SymbolicName>CLR_E_GC_BAD_AFFINITY_CONFIG</SymbolicName>
+ <Message>"GC affinity mask or set doesn't contain any CPUs the current process is affinitized to."</Message>
+ <Comment>During a GC initialization, the affinity mask specified via GCHeapAffinitizeMask or GCHeapAffinitizeRanges didn't contain any CPUs the current process is affinitized to.</Comment>
+</HRESULT>
+
<HRESULT NumericValue="E_ACCESSDENIED">
<SymbolicName>COR_E_UNAUTHORIZEDACCESS</SymbolicName>
<Comment> 0x80070005 // Access is denied.</Comment>
MSG_FOR_URT_HR(CLR_E_BIND_NI_SECURITY_FAILURE) "Native image was generated in a different trust level than present at runtime"
MSG_FOR_URT_HR(CLR_E_BIND_NI_DEP_IDENTITY_MISMATCH) "Native image identity mismatch with respect to its dependencies"
MSG_FOR_URT_HR(CLR_E_GC_OOM) "Failfast due to an OOM during a GC"
+ MSG_FOR_URT_HR(CLR_E_GC_BAD_AFFINITY_CONFIG) "GC affinity mask or set doesn't contain any CPUs the current process is affinitized to."
MSG_FOR_URT_HR(COR_E_BADIMAGEFORMAT) "The format of a DLL or executable being loaded is invalid."
END
#define CLR_E_BIND_NI_SECURITY_FAILURE EMAKEHR(0x2007)
#define CLR_E_BIND_NI_DEP_IDENTITY_MISMATCH EMAKEHR(0x2008)
#define CLR_E_GC_OOM EMAKEHR(0x2009)
+#define CLR_E_GC_BAD_AFFINITY_CONFIG EMAKEHR(0x200a)
#define COR_E_UNAUTHORIZEDACCESS E_ACCESSDENIED
#define COR_E_ARGUMENT E_INVALIDARG
#define COR_E_INVALIDCAST E_NOINTERFACE
return g_pFreeObjectMethodTable;
}
-// These are arbitrary, we shouldn't ever be having confrig keys or values
+// This is arbitrary, we shouldn't ever be having config keys
// longer than these lengths.
const size_t MaxConfigKeyLength = 255;
-const size_t MaxConfigValueLength = 255;
bool GCToEEInterface::GetBooleanConfigValue(const char* key, bool* value)
{
return false;
}
+ int charCount = WideCharToMultiByte(CP_ACP, 0, out, -1 /* out is null-terminated */, NULL, 0, nullptr, nullptr);
+ if (charCount == 0)
+ {
+ // this should only happen if the config subsystem gives us a string that's not valid
+ // unicode.
+ CLRConfig::FreeConfigString(out);
+ return false;
+ }
+
// not allocated on the stack since it escapes this function
- AStringHolder configResult = new (nothrow) char[MaxConfigValueLength];
+ AStringHolder configResult = new (nothrow) char[charCount];
if (!configResult)
{
CLRConfig::FreeConfigString(out);
}
if (WideCharToMultiByte(CP_ACP, 0, out, -1 /* out is null-terminated */,
- configResult.GetValue(), MaxConfigKeyLength, nullptr, nullptr) == 0)
+ configResult.GetValue(), charCount, nullptr, nullptr) == 0)
{
- // this should only happen if the config subsystem gives us a string that's not valid
- // unicode.
+ // this should never happen, the previous call to WideCharToMultiByte that computed the charCount should
+ // have caught all issues.
+ assert(false);
CLRConfig::FreeConfigString(out);
return false;
}
uint32_t g_pageSizeUnixInl = 0;
#endif
+static AffinitySet g_processAffinitySet;
+
+class GroupProcNo
+{
+ uint16_t m_groupProc;
+
+public:
+
+ static const uint16_t NoGroup = 0x3ff;
+
+ GroupProcNo(uint16_t groupProc) : m_groupProc(groupProc)
+ {
+ }
+
+ GroupProcNo(uint16_t group, uint16_t procIndex) : m_groupProc((group << 6) | procIndex)
+ {
+ assert(group <= 0x3ff);
+ assert(procIndex <= 0x3f);
+ }
+
+ uint16_t GetGroup() { return m_groupProc >> 6; }
+ uint16_t GetProcIndex() { return m_groupProc & 0x3f; }
+ uint16_t GetCombinedValue() { return m_groupProc; }
+};
// Initialize the interface implementation
// Return:
g_pageSizeUnixInl = GetOsPageSize();
#endif
+ uintptr_t pmask, smask;
+ if (!!::GetProcessAffinityMask(::GetCurrentProcess(), (PDWORD_PTR)&pmask, (PDWORD_PTR)&smask))
+ {
+ pmask &= smask;
+
+ for (size_t i = 0; i < 8 * sizeof(uintptr_t); i++)
+ {
+ if ((pmask & ((uintptr_t)1 << i)) != 0)
+ {
+ g_processAffinitySet.Add(i);
+ }
+ }
+ }
+
return true;
}
return ::GetCurrentProcessId();
}
-// Set ideal affinity for the current thread
+// Set ideal processor for the current thread
// Parameters:
-// affinity - ideal processor affinity for the thread
+// srcProcNo - processor number the thread currently runs on
+// dstProcNo - processor number the thread should be migrated to
// Return:
// true if it has succeeded, false if it has failed
-bool GCToOSInterface::SetCurrentThreadIdealAffinity(GCThreadAffinity* affinity)
+bool GCToOSInterface::MigrateThread(uint16_t srcProcNo, uint16_t dstProcNo)
{
LIMITED_METHOD_CONTRACT;
bool success = true;
+ GroupProcNo srcGroupProcNo(srcProcNo);
+ GroupProcNo dstGroupProcNo(dstProcNo);
+
+ if (CPUGroupInfo::CanEnableGCCPUGroups())
+ {
+ if (srcGroupProcNo.GetGroup() != dstGroupProcNo.GetGroup())
+ {
+ //only set ideal processor when srcProcNo and dstProcNo are in the same cpu
+ //group. DO NOT MOVE THREADS ACROSS CPU GROUPS
+ return true;
+ }
+ }
+
#if !defined(FEATURE_CORESYSTEM)
- SetThreadIdealProcessor(GetCurrentThread(), (DWORD)affinity->Processor);
+ SetThreadIdealProcessor(GetCurrentThread(), (DWORD)dstGroupProcNo.GetProcIndex());
#else
PROCESSOR_NUMBER proc;
- if (affinity->Group != -1)
+ if (dstGroupProcNo.GetGroup() != GroupProcNo::NoGroup)
{
- proc.Group = (WORD)affinity->Group;
- proc.Number = (BYTE)affinity->Processor;
+ proc.Group = (WORD)dstGroupProcNo.GetGroup();
+ proc.Number = (BYTE)dstGroupProcNo.GetProcIndex();
proc.Reserved = 0;
-
+
success = !!SetThreadIdealProcessorEx(GetCurrentThread(), &proc, NULL);
}
#if !defined(FEATURE_PAL)
{
if (GetThreadIdealProcessorEx(GetCurrentThread(), &proc))
{
- proc.Number = (BYTE)affinity->Processor;
+ proc.Number = (BYTE)dstGroupProcNo.GetProcIndex();
success = !!SetThreadIdealProcessorEx(GetCurrentThread(), &proc, &proc);
- }
+ }
}
#endif // !defined(FEATURE_PAL)
#endif
// size - size of the virtual memory range
// Return:
// true if it has succeeded, false if it has failed
-bool GCToOSInterface::VirtualCommit(void* address, size_t size, uint32_t node)
+bool GCToOSInterface::VirtualCommit(void* address, size_t size, uint16_t node)
{
LIMITED_METHOD_CONTRACT;
}
// Sets the calling thread's affinity to only run on the processor specified
-// in the GCThreadAffinity structure.
// Parameters:
-// affinity - The requested affinity for the calling thread. At most one processor
-// can be provided.
+// procNo - The requested processor for the calling thread.
// Return:
// true if setting the affinity was successful, false otherwise.
-bool GCToOSInterface::SetThreadAffinity(GCThreadAffinity* affinity)
+bool GCToOSInterface::SetThreadAffinity(uint16_t procNo)
{
LIMITED_METHOD_CONTRACT;
- assert(affinity != nullptr);
- if (affinity->Group != GCThreadAffinity::None)
+ GroupProcNo groupProcNo(procNo);
+
+ if (groupProcNo.GetGroup() != GroupProcNo::NoGroup)
{
- assert(affinity->Processor != GCThreadAffinity::None);
-
GROUP_AFFINITY ga;
- ga.Group = (WORD)affinity->Group;
+ ga.Group = (WORD)groupProcNo.GetGroup();
ga.Reserved[0] = 0; // reserve must be filled with zero
ga.Reserved[1] = 0; // otherwise call may fail
ga.Reserved[2] = 0;
- ga.Mask = (size_t)1 << affinity->Processor;
+ ga.Mask = (size_t)1 << groupProcNo.GetProcIndex();
return !!SetThreadGroupAffinity(GetCurrentThread(), &ga, nullptr);
}
- else if (affinity->Processor != GCThreadAffinity::None)
+ else
{
- return !!SetThreadAffinityMask(GetCurrentThread(), (DWORD_PTR)1 << affinity->Processor);
+ return !!SetThreadAffinityMask(GetCurrentThread(), (DWORD_PTR)1 << groupProcNo.GetProcIndex());
}
-
- // Given affinity must specify at least one processor to use.
- return false;
}
// Boosts the calling thread's thread priority to a level higher than the default
return !!SetThreadPriority(GetCurrentThread(), THREAD_PRIORITY_HIGHEST);
}
-// Get affinity mask of the current process
-// Parameters:
-// processMask - affinity mask for the specified process
-// systemMask - affinity mask for the system
+// Get set of processors enabled for GC for the current process
// Return:
-// true if it has succeeded, false if it has failed
-// Remarks:
-// A process affinity mask is a bit vector in which each bit represents the processors that
-// a process is allowed to run on. A system affinity mask is a bit vector in which each bit
-// represents the processors that are configured into a system.
-// A process affinity mask is a subset of the system affinity mask. A process is only allowed
-// to run on the processors configured into a system. Therefore, the process affinity mask cannot
-// specify a 1 bit for a processor when the system affinity mask specifies a 0 bit for that processor.
-bool GCToOSInterface::GetCurrentProcessAffinityMask(uintptr_t* processMask, uintptr_t* systemMask)
+// set of enabled processors
+AffinitySet* GCToOSInterface::GetCurrentProcessAffinitySet()
{
- LIMITED_METHOD_CONTRACT;
-
- return !!::GetProcessAffinityMask(GetCurrentProcess(), (PDWORD_PTR)processMask, (PDWORD_PTR)systemMask);
+ return &g_processAffinitySet;
}
// Get number of processors assigned to the current process
{
LIMITED_METHOD_CONTRACT;
- return ::GetCurrentProcessCpuCount();
+ // GetCurrentProcessCpuCount only returns up to 64 procs.
+ return CPUGroupInfo::CanEnableGCCPUGroups() ?
+ GCToOSInterface::GetTotalProcessorCount():
+ ::GetCurrentProcessCpuCount();
}
// Return the size of the user-mode portion of the virtual address space of this process.
return NumaNodeInfo::CanEnableGCNumaAware() != FALSE;
}
-bool GCToOSInterface::GetNumaProcessorNode(PPROCESSOR_NUMBER proc_no, uint16_t *node_no)
+bool GCToOSInterface::GetNumaProcessorNode(uint16_t proc_no, uint16_t *node_no)
{
LIMITED_METHOD_CONTRACT;
- return NumaNodeInfo::GetNumaProcessorNodeEx(proc_no, node_no) != FALSE;
-}
+ GroupProcNo groupProcNo(proc_no);
-bool GCToOSInterface::CanEnableGCCPUGroups()
-{
- LIMITED_METHOD_CONTRACT;
+ PROCESSOR_NUMBER procNumber;
+ procNumber.Group = groupProcNo.GetGroup();
+ procNumber.Number = (BYTE)groupProcNo.GetProcIndex();
+ procNumber.Reserved = 0;
- return CPUGroupInfo::CanEnableGCCPUGroups() != FALSE;
+ return NumaNodeInfo::GetNumaProcessorNodeEx(&procNumber, node_no) != FALSE;
}
-void GCToOSInterface::GetGroupForProcessor(uint16_t processor_number, uint16_t* group_number, uint16_t* group_processor_number)
+// Get processor number and optionally its NUMA node number for the specified heap number
+// Parameters:
+// heap_number - heap number to get the result for
+// proc_no - set to the selected processor number
+// node_no - set to the NUMA node of the selected processor or to NUMA_NODE_UNDEFINED
+// Return:
+// true if it succeeded
+bool GCToOSInterface::GetProcessorForHeap(uint16_t heap_number, uint16_t* proc_no, uint16_t* node_no)
{
- LIMITED_METHOD_CONTRACT;
+ bool success = false;
- return CPUGroupInfo::GetGroupForProcessor(processor_number, group_number, group_processor_number);
+ if (CPUGroupInfo::CanEnableGCCPUGroups())
+ {
+ uint16_t gn, gpn;
+ CPUGroupInfo::GetGroupForProcessor((uint16_t)heap_number, &gn, &gpn);
+
+// dprintf(3, ("using processor group %d, mask %Ix for heap %d\n", gn, (uintptr_t)1 << gpn), heap_number));
+ *proc_no = GroupProcNo(gn, gpn).GetCombinedValue();
+ if (GCToOSInterface::CanEnableGCNumaAware())
+ {
+ if (!GCToOSInterface::GetNumaProcessorNode(*proc_no, node_no))
+ {
+ *node_no = NUMA_NODE_UNDEFINED;
+ }
+ }
+ else
+ { // no numa setting, each cpu group is treated as a node
+ *node_no = gn;
+ }
+
+ success = true;
+ }
+ else
+ {
+ int bit_number = 0;
+ uint8_t proc_number = 0;
+ for (uintptr_t mask = 1; mask != 0; mask <<= 1)
+ {
+ if (g_processAffinitySet.Contains(proc_number))
+ {
+ if (bit_number == heap_number)
+ {
+ //dprintf (3, ("Using processor %d for heap %d", proc_number, heap_number));
+ *proc_no = GroupProcNo(GroupProcNo::NoGroup, proc_number).GetCombinedValue();
+
+ if (GCToOSInterface::CanEnableGCNumaAware())
+ {
+ if (!GCToOSInterface::GetNumaProcessorNode(proc_number, node_no))
+ {
+ *node_no = NUMA_NODE_UNDEFINED;
+ }
+ }
+
+ success = true;
+ break;
+ }
+ bit_number++;
+ }
+ proc_number++;
+ }
+ }
+
+ return success;
}
// Initialize the critical section