1 // Licensed to the .NET Foundation under one or more agreements.
2 // The .NET Foundation licenses this file to you under the MIT license.
3 // See the LICENSE file in the project root for more information.
11 #include "env/gcenv.structs.h"
12 #include "env/gcenv.base.h"
13 #include "env/gcenv.os.h"
14 #include "env/gcenv.ee.h"
15 #include "env/gcenv.windows.inl"
16 #include "env/volatile.h"
19 GCSystemInfo g_SystemInfo;
21 typedef BOOL (WINAPI *PGET_PROCESS_MEMORY_INFO)(HANDLE handle, PROCESS_MEMORY_COUNTERS* memCounters, uint32_t cb);
22 static PGET_PROCESS_MEMORY_INFO GCGetProcessMemoryInfo = 0;
24 static size_t g_RestrictedPhysicalMemoryLimit = (size_t)UINTPTR_MAX;
26 // For 32-bit processes the virtual address range could be smaller than the amount of physical
27 // memory on the machine/in the container, we need to restrict by the VM.
28 static bool g_UseRestrictedVirtualMemory = false;
30 static AffinitySet g_processAffinitySet;
32 typedef BOOL (WINAPI *PIS_PROCESS_IN_JOB)(HANDLE processHandle, HANDLE jobHandle, BOOL* result);
33 typedef BOOL (WINAPI *PQUERY_INFORMATION_JOB_OBJECT)(HANDLE jobHandle, JOBOBJECTINFOCLASS jobObjectInfoClass, void* lpJobObjectInfo, DWORD cbJobObjectInfoLength, LPDWORD lpReturnLength);
37 static bool g_fEnableGCNumaAware;
45 static const uint16_t NoGroup = 0x3ff;
47 GroupProcNo(uint16_t groupProc) : m_groupProc(groupProc)
51 GroupProcNo(uint16_t group, uint16_t procIndex) : m_groupProc((group << 6) | procIndex)
53 assert(group <= 0x3ff);
54 assert(procIndex <= 0x3f);
57 uint16_t GetGroup() { return m_groupProc >> 6; }
58 uint16_t GetProcIndex() { return m_groupProc & 0x3f; }
59 uint16_t GetCombinedValue() { return m_groupProc; }
64 WORD nr_active; // at most 64
68 DWORD_PTR active_mask;
70 DWORD activeThreadWeight;
73 static bool g_fEnableGCCPUGroups;
74 static bool g_fHadSingleProcessorAtStartup;
75 static DWORD g_nGroups;
76 static DWORD g_nProcessors;
77 static CPU_Group_Info *g_CPUGroupInfoArray;
79 void InitNumaNodeInfo()
83 g_fEnableGCNumaAware = false;
85 if (!GCConfig::GetGCNumaAware())
88 // fail to get the highest numa node number
89 if (!GetNumaHighestNodeNumber(&highest) || (highest == 0))
92 g_fEnableGCNumaAware = true;
96 #if (defined(_TARGET_AMD64_) || defined(_TARGET_ARM64_))
97 // Calculate greatest common divisor
98 DWORD GCD(DWORD u, DWORD v)
110 // Calculate least common multiple
111 DWORD LCM(DWORD u, DWORD v)
113 return u / GCD(u, v) * v;
117 bool InitCPUGroupInfoArray()
119 #if (defined(_TARGET_AMD64_) || defined(_TARGET_ARM64_))
120 BYTE *bBuffer = NULL;
121 SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *pSLPIEx = NULL;
122 SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *pRecord = NULL;
124 DWORD byteOffset = 0;
125 DWORD dwNumElements = 0;
128 if (GetLogicalProcessorInformationEx(RelationGroup, pSLPIEx, &cbSLPIEx) &&
129 GetLastError() != ERROR_INSUFFICIENT_BUFFER)
134 // Fail to allocate buffer
135 bBuffer = new (std::nothrow) BYTE[ cbSLPIEx ];
139 pSLPIEx = (SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *)bBuffer;
140 if (!GetLogicalProcessorInformationEx(RelationGroup, pSLPIEx, &cbSLPIEx))
147 while (byteOffset < cbSLPIEx)
149 if (pRecord->Relationship == RelationGroup)
151 g_nGroups = pRecord->Group.ActiveGroupCount;
154 byteOffset += pRecord->Size;
155 pRecord = (SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *)(bBuffer + byteOffset);
158 g_CPUGroupInfoArray = new (std::nothrow) CPU_Group_Info[g_nGroups];
159 if (g_CPUGroupInfoArray == NULL)
165 for (DWORD i = 0; i < g_nGroups; i++)
167 g_CPUGroupInfoArray[i].nr_active = (WORD)pRecord->Group.GroupInfo[i].ActiveProcessorCount;
168 g_CPUGroupInfoArray[i].active_mask = pRecord->Group.GroupInfo[i].ActiveProcessorMask;
169 g_nProcessors += g_CPUGroupInfoArray[i].nr_active;
170 dwWeight = LCM(dwWeight, (DWORD)g_CPUGroupInfoArray[i].nr_active);
173 // The number of threads per group that can be supported will depend on the number of CPU groups
174 // and the number of LPs within each processor group. For example, when the number of LPs in
175 // CPU groups is the same and is 64, the number of threads per group before weight overflow
176 // would be 2^32/2^6 = 2^26 (64M threads)
177 for (DWORD i = 0; i < g_nGroups; i++)
179 g_CPUGroupInfoArray[i].groupWeight = dwWeight / (DWORD)g_CPUGroupInfoArray[i].nr_active;
180 g_CPUGroupInfoArray[i].activeThreadWeight = 0;
183 delete[] bBuffer; // done with it; free it
190 bool InitCPUGroupInfoRange()
192 #if (defined(_TARGET_AMD64_) || defined(_TARGET_ARM64_))
196 for (WORD i = 0; i < g_nGroups; i++)
198 nr_proc += g_CPUGroupInfoArray[i].nr_active;
199 g_CPUGroupInfoArray[i].begin = begin;
200 g_CPUGroupInfoArray[i].end = nr_proc - 1;
210 void InitCPUGroupInfo()
212 g_fEnableGCCPUGroups = false;
214 #if (defined(_TARGET_AMD64_) || defined(_TARGET_ARM64_))
215 if (!GCConfig::GetGCCpuGroup())
218 if (!InitCPUGroupInfoArray())
221 if (!InitCPUGroupInfoRange())
224 // only enable CPU groups if more than one group exists
225 g_fEnableGCCPUGroups = g_nGroups > 1;
226 #endif // _TARGET_AMD64_ || _TARGET_ARM64_
228 // Determine if the process is affinitized to a single processor (or if the system has a single processor)
229 DWORD_PTR processAffinityMask, systemAffinityMask;
230 if (::GetProcessAffinityMask(::GetCurrentProcess(), &processAffinityMask, &systemAffinityMask))
232 processAffinityMask &= systemAffinityMask;
233 if (processAffinityMask != 0 && // only one CPU group is involved
234 (processAffinityMask & (processAffinityMask - 1)) == 0) // only one bit is set
236 g_fHadSingleProcessorAtStartup = true;
241 void GetProcessMemoryLoad(LPMEMORYSTATUSEX pMSEX)
243 pMSEX->dwLength = sizeof(MEMORYSTATUSEX);
244 BOOL fRet = ::GlobalMemoryStatusEx(pMSEX);
248 static size_t GetRestrictedPhysicalMemoryLimit()
250 LIMITED_METHOD_CONTRACT;
252 // The limit was cached already
253 if (g_RestrictedPhysicalMemoryLimit != (size_t)UINTPTR_MAX)
254 return g_RestrictedPhysicalMemoryLimit;
256 size_t job_physical_memory_limit = (size_t)UINTPTR_MAX;
257 uint64_t total_virtual = 0;
258 uint64_t total_physical = 0;
259 BOOL in_job_p = FALSE;
260 HINSTANCE hinstKernel32 = 0;
262 PIS_PROCESS_IN_JOB GCIsProcessInJob = 0;
263 PQUERY_INFORMATION_JOB_OBJECT GCQueryInformationJobObject = 0;
265 hinstKernel32 = LoadLibraryEx(L"kernel32.dll", nullptr, LOAD_LIBRARY_SEARCH_SYSTEM32);
269 GCIsProcessInJob = (PIS_PROCESS_IN_JOB)GetProcAddress(hinstKernel32, "IsProcessInJob");
270 if (!GCIsProcessInJob)
273 if (!GCIsProcessInJob(GetCurrentProcess(), NULL, &in_job_p))
278 GCGetProcessMemoryInfo = (PGET_PROCESS_MEMORY_INFO)GetProcAddress(hinstKernel32, "K32GetProcessMemoryInfo");
280 if (!GCGetProcessMemoryInfo)
283 GCQueryInformationJobObject = (PQUERY_INFORMATION_JOB_OBJECT)GetProcAddress(hinstKernel32, "QueryInformationJobObject");
285 if (!GCQueryInformationJobObject)
288 JOBOBJECT_EXTENDED_LIMIT_INFORMATION limit_info;
289 if (GCQueryInformationJobObject (NULL, JobObjectExtendedLimitInformation, &limit_info,
290 sizeof(limit_info), NULL))
292 size_t job_memory_limit = (size_t)UINTPTR_MAX;
293 size_t job_process_memory_limit = (size_t)UINTPTR_MAX;
294 size_t job_workingset_limit = (size_t)UINTPTR_MAX;
296 // Notes on the NT job object:
298 // You can specific a bigger process commit or working set limit than
299 // job limit which is pointless so we use the smallest of all 3 as
300 // to calculate our "physical memory load" or "available physical memory"
301 // when running inside a job object, ie, we treat this as the amount of physical memory
302 // our process is allowed to use.
304 // The commit limit is already reflected by default when you run in a
305 // job but the physical memory load is not.
307 if ((limit_info.BasicLimitInformation.LimitFlags & JOB_OBJECT_LIMIT_JOB_MEMORY) != 0)
308 job_memory_limit = limit_info.JobMemoryLimit;
309 if ((limit_info.BasicLimitInformation.LimitFlags & JOB_OBJECT_LIMIT_PROCESS_MEMORY) != 0)
310 job_process_memory_limit = limit_info.ProcessMemoryLimit;
311 if ((limit_info.BasicLimitInformation.LimitFlags & JOB_OBJECT_LIMIT_WORKINGSET) != 0)
312 job_workingset_limit = limit_info.BasicLimitInformation.MaximumWorkingSetSize;
314 if ((job_memory_limit != (size_t)UINTPTR_MAX) ||
315 (job_process_memory_limit != (size_t)UINTPTR_MAX) ||
316 (job_workingset_limit != (size_t)UINTPTR_MAX))
318 job_physical_memory_limit = min (job_memory_limit, job_process_memory_limit);
319 job_physical_memory_limit = min (job_physical_memory_limit, job_workingset_limit);
322 ::GetProcessMemoryLoad(&ms);
323 total_virtual = ms.ullTotalVirtual;
324 total_physical = ms.ullAvailPhys;
326 // A sanity check in case someone set a larger limit than there is actual physical memory.
327 job_physical_memory_limit = (size_t) min (job_physical_memory_limit, ms.ullTotalPhys);
333 if (job_physical_memory_limit == (size_t)UINTPTR_MAX)
335 job_physical_memory_limit = 0;
337 if (hinstKernel32 != 0)
339 FreeLibrary(hinstKernel32);
341 GCGetProcessMemoryInfo = 0;
345 // Check to see if we are limited by VM.
346 if (total_virtual == 0)
349 ::GetProcessMemoryLoad(&ms);
351 total_virtual = ms.ullTotalVirtual;
352 total_physical = ms.ullTotalPhys;
355 if (job_physical_memory_limit != 0)
357 total_physical = job_physical_memory_limit;
360 if (total_virtual < total_physical)
362 if (hinstKernel32 != 0)
364 // We can also free the lib here - if we are limited by VM we will not be calling
365 // GetProcessMemoryInfo.
366 FreeLibrary(hinstKernel32);
367 GCGetProcessMemoryInfo = 0;
369 g_UseRestrictedVirtualMemory = true;
370 job_physical_memory_limit = (size_t)total_virtual;
373 VolatileStore(&g_RestrictedPhysicalMemoryLimit, job_physical_memory_limit);
374 return g_RestrictedPhysicalMemoryLimit;
377 // This function checks to see if GetLogicalProcessorInformation API is supported.
378 // On success, this function allocates a SLPI array, sets nEntries to number
379 // of elements in the SLPI array and returns a pointer to the SLPI array after filling it with information.
381 // Note: If successful, GetLPI allocates memory for the SLPI array and expects the caller to
382 // free the memory once the caller is done using the information in the SLPI array.
383 SYSTEM_LOGICAL_PROCESSOR_INFORMATION *GetLPI(PDWORD nEntries)
386 DWORD dwNumElements = 0;
387 SYSTEM_LOGICAL_PROCESSOR_INFORMATION *pslpi = NULL;
389 // We setup the first call to GetLogicalProcessorInformation to fail so that we can obtain
390 // the size of the buffer required to allocate for the SLPI array that is returned
392 if (!GetLogicalProcessorInformation(pslpi, &cbslpi) &&
393 GetLastError() != ERROR_INSUFFICIENT_BUFFER)
395 // If we fail with anything other than an ERROR_INSUFFICIENT_BUFFER here, we punt with failure.
401 // compute the number of SLPI entries required to hold the information returned from GLPI
403 dwNumElements = cbslpi / sizeof(SYSTEM_LOGICAL_PROCESSOR_INFORMATION);
405 // allocate a buffer in the free heap to hold an array of SLPI entries from GLPI, number of elements in the array is dwNumElements
407 pslpi = new (std::nothrow) SYSTEM_LOGICAL_PROCESSOR_INFORMATION[ dwNumElements ];
411 // the memory allocation failed
415 // Make call to GetLogicalProcessorInformation. Returns array of SLPI structures
417 if (!GetLogicalProcessorInformation(pslpi, &cbslpi))
419 // GetLogicalProcessorInformation failed
420 delete[] pslpi ; //Allocation was fine but the API call itself failed and so we are releasing the memory before the return NULL.
424 // GetLogicalProcessorInformation successful, set nEntries to number of entries in the SLPI array
425 *nEntries = dwNumElements;
427 return pslpi; // return pointer to SLPI array
431 // This function returns the size of highest level cache on the physical chip. If it cannot
432 // determine the cachesize this function returns 0.
433 size_t GetLogicalProcessorCacheSizeFromOS()
435 size_t cache_size = 0;
438 // Try to use GetLogicalProcessorInformation API and get a valid pointer to the SLPI array if successful. Returns NULL
439 // if API not present or on failure.
441 SYSTEM_LOGICAL_PROCESSOR_INFORMATION *pslpi = GetLPI(&nEntries) ;
445 // GetLogicalProcessorInformation not supported or failed.
449 // Crack the information. Iterate through all the SLPI array entries for all processors in system.
450 // Will return the greatest of all the processor cache sizes or zero
452 size_t last_cache_size = 0;
454 for (DWORD i=0; i < nEntries; i++)
456 if (pslpi[i].Relationship == RelationCache)
458 last_cache_size = max(last_cache_size, pslpi[i].Cache.Size);
461 cache_size = last_cache_size;
466 delete[] pslpi; // release the memory allocated for the SLPI array.
471 bool CanEnableGCCPUGroups()
473 return g_fEnableGCCPUGroups;
476 // Get the CPU group for the specified processor
477 void GetGroupForProcessor(uint16_t processor_number, uint16_t* group_number, uint16_t* group_processor_number)
479 assert(g_fEnableGCCPUGroups);
481 #if !defined(FEATURE_REDHAWK) && (defined(_TARGET_AMD64_) || defined(_TARGET_ARM64_))
483 WORD bDiff = processor_number - bTemp;
485 for (WORD i=0; i < g_nGroups; i++)
487 bTemp += g_CPUGroupInfoArray[i].nr_active;
488 if (bTemp > processor_number)
491 *group_processor_number = bDiff;
494 bDiff = processor_number - bTemp;
498 *group_processor_number = 0;
502 } // anonymous namespace
504 // Initialize the interface implementation
506 // true if it has succeeded, false if it has failed
507 bool GCToOSInterface::Initialize()
509 SYSTEM_INFO systemInfo;
510 GetSystemInfo(&systemInfo);
512 g_SystemInfo.dwNumberOfProcessors = systemInfo.dwNumberOfProcessors;
513 g_SystemInfo.dwPageSize = systemInfo.dwPageSize;
514 g_SystemInfo.dwAllocationGranularity = systemInfo.dwAllocationGranularity;
516 assert(systemInfo.dwPageSize == 0x1000);
521 if (CanEnableGCCPUGroups())
523 // When CPU groups are enabled, then the process is not bound by the process affinity set at process launch.
524 // Set the initial affinity mask so that all processors are enabled.
525 for (size_t i = 0; i < g_nProcessors; i++)
527 g_processAffinitySet.Add(i);
532 // When CPU groups are disabled, the process affinity mask specified at the process launch cannot be
534 uintptr_t pmask, smask;
535 if (!!::GetProcessAffinityMask(::GetCurrentProcess(), (PDWORD_PTR)&pmask, (PDWORD_PTR)&smask))
539 for (size_t i = 0; i < 8 * sizeof(uintptr_t); i++)
541 if ((pmask & ((uintptr_t)1 << i)) != 0)
543 g_processAffinitySet.Add(i);
552 // Shutdown the interface implementation
553 void GCToOSInterface::Shutdown()
558 // Get numeric id of the current thread if possible on the
559 // current platform. It is indended for logging purposes only.
561 // Numeric id of the current thread or 0 if the
562 uint64_t GCToOSInterface::GetCurrentThreadIdForLogging()
564 return ::GetCurrentThreadId();
567 // Get id of the process
568 uint32_t GCToOSInterface::GetCurrentProcessId()
570 return ::GetCurrentThreadId();
573 // Set ideal processor for the current thread
575 // srcProcNo - processor number the thread currently runs on
576 // dstProcNo - processor number the thread should be migrated to
578 // true if it has succeeded, false if it has failed
579 bool GCToOSInterface::SetCurrentThreadIdealAffinity(uint16_t srcProcNo, uint16_t dstProcNo)
581 LIMITED_METHOD_CONTRACT;
585 GroupProcNo srcGroupProcNo(srcProcNo);
586 GroupProcNo dstGroupProcNo(dstProcNo);
588 if (CanEnableGCCPUGroups())
590 if (srcGroupProcNo.GetGroup() != dstGroupProcNo.GetGroup())
592 //only set ideal processor when srcProcNo and dstProcNo are in the same cpu
593 //group. DO NOT MOVE THREADS ACROSS CPU GROUPS
598 #if !defined(FEATURE_CORESYSTEM)
599 SetThreadIdealProcessor(GetCurrentThread(), (DWORD)dstGroupProcNo.GetProcIndex());
601 PROCESSOR_NUMBER proc;
603 if (dstGroupProcNo.GetGroup() != GroupProcNo::NoGroup)
605 proc.Group = (WORD)dstGroupProcNo.GetGroup();
606 proc.Number = (BYTE)dstGroupProcNo.GetProcIndex();
609 success = !!SetThreadIdealProcessorEx(GetCurrentThread(), &proc, NULL);
613 if (GetThreadIdealProcessorEx(GetCurrentThread(), &proc))
615 proc.Number = (BYTE)dstGroupProcNo.GetProcIndex();
616 success = !!SetThreadIdealProcessorEx(GetCurrentThread(), &proc, &proc);
624 // Get the number of the current processor
625 uint32_t GCToOSInterface::GetCurrentProcessorNumber()
627 assert(GCToOSInterface::CanGetCurrentProcessorNumber());
628 return ::GetCurrentProcessorNumber();
631 // Check if the OS supports getting current processor number
632 bool GCToOSInterface::CanGetCurrentProcessorNumber()
634 // on all Windows platforms we support this API exists
638 // Flush write buffers of processors that are executing threads of the current process
639 void GCToOSInterface::FlushProcessWriteBuffers()
641 ::FlushProcessWriteBuffers();
644 // Break into a debugger
645 void GCToOSInterface::DebugBreak()
650 // Causes the calling thread to sleep for the specified number of milliseconds
652 // sleepMSec - time to sleep before switching to another thread
653 void GCToOSInterface::Sleep(uint32_t sleepMSec)
655 // TODO(segilles) CLR implementation of __SwitchToThread spins for short sleep durations
656 // to avoid context switches - is that interesting or useful here?
659 ::SleepEx(sleepMSec, FALSE);
663 // Causes the calling thread to yield execution to another thread that is ready to run on the current processor.
665 // switchCount - number of times the YieldThread was called in a loop
666 void GCToOSInterface::YieldThread(uint32_t switchCount)
668 UNREFERENCED_PARAMETER(switchCount);
672 // Reserve virtual memory range.
674 // address - starting virtual address, it can be NULL to let the function choose the starting address
675 // size - size of the virtual memory range
676 // alignment - requested memory alignment, 0 means no specific alignment requested
677 // flags - flags to control special settings like write watching
679 // Starting virtual address of the reserved range
680 void* GCToOSInterface::VirtualReserve(size_t size, size_t alignment, uint32_t flags)
682 // Windows already ensures 64kb alignment on VirtualAlloc. The current CLR
683 // implementation ignores it on Windows, other than making some sanity checks on it.
684 UNREFERENCED_PARAMETER(alignment);
685 assert((alignment & (alignment - 1)) == 0);
686 assert(alignment <= 0x10000);
687 DWORD memFlags = (flags & VirtualReserveFlags::WriteWatch) ? (MEM_RESERVE | MEM_WRITE_WATCH) : MEM_RESERVE;
688 return ::VirtualAlloc(nullptr, size, memFlags, PAGE_READWRITE);
691 // Release virtual memory range previously reserved using VirtualReserve
693 // address - starting virtual address
694 // size - size of the virtual memory range
696 // true if it has succeeded, false if it has failed
697 bool GCToOSInterface::VirtualRelease(void* address, size_t size)
699 return !!::VirtualFree(address, 0, MEM_RELEASE);
702 // Commit virtual memory range. It must be part of a range reserved using VirtualReserve.
704 // address - starting virtual address
705 // size - size of the virtual memory range
707 // true if it has succeeded, false if it has failed
708 bool GCToOSInterface::VirtualCommit(void* address, size_t size, uint16_t node)
710 if (node == NUMA_NODE_UNDEFINED)
712 return ::VirtualAlloc(address, size, MEM_COMMIT, PAGE_READWRITE) != nullptr;
716 assert(g_fEnableGCNumaAware);
717 return ::VirtualAllocExNuma(::GetCurrentProcess(), address, size, MEM_COMMIT, PAGE_READWRITE, node) != nullptr;
721 // Decomit virtual memory range.
723 // address - starting virtual address
724 // size - size of the virtual memory range
726 // true if it has succeeded, false if it has failed
727 bool GCToOSInterface::VirtualDecommit(void* address, size_t size)
729 return !!::VirtualFree(address, size, MEM_DECOMMIT);
732 // Reset virtual memory range. Indicates that data in the memory range specified by address and size is no
733 // longer of interest, but it should not be decommitted.
735 // address - starting virtual address
736 // size - size of the virtual memory range
737 // unlock - true if the memory range should also be unlocked
739 // true if it has succeeded, false if it has failed. Returns false also if
740 // unlocking was requested but the unlock failed.
741 bool GCToOSInterface::VirtualReset(void * address, size_t size, bool unlock)
743 bool success = ::VirtualAlloc(address, size, MEM_RESET, PAGE_READWRITE) != nullptr;
744 if (success && unlock)
746 ::VirtualUnlock(address, size);
752 // Check if the OS supports write watching
753 bool GCToOSInterface::SupportsWriteWatch()
755 void* mem = GCToOSInterface::VirtualReserve(g_SystemInfo.dwAllocationGranularity, 0, VirtualReserveFlags::WriteWatch);
758 GCToOSInterface::VirtualRelease(mem, g_SystemInfo.dwAllocationGranularity);
765 // Reset the write tracking state for the specified virtual memory range.
767 // address - starting virtual address
768 // size - size of the virtual memory range
769 void GCToOSInterface::ResetWriteWatch(void* address, size_t size)
771 ::ResetWriteWatch(address, size);
774 // Retrieve addresses of the pages that are written to in a region of virtual memory
776 // resetState - true indicates to reset the write tracking state
777 // address - starting virtual address
778 // size - size of the virtual memory range
779 // pageAddresses - buffer that receives an array of page addresses in the memory region
780 // pageAddressesCount - on input, size of the lpAddresses array, in array elements
781 // on output, the number of page addresses that are returned in the array.
783 // true if it has succeeded, false if it has failed
784 bool GCToOSInterface::GetWriteWatch(bool resetState, void* address, size_t size, void** pageAddresses, uintptr_t* pageAddressesCount)
786 uint32_t flags = resetState ? 1 : 0;
789 bool success = ::GetWriteWatch(flags, address, size, pageAddresses, (ULONG_PTR*)pageAddressesCount, &granularity) == 0;
792 assert(granularity == OS_PAGE_SIZE);
798 // Get size of the largest cache on the processor die
800 // trueSize - true to return true cache size, false to return scaled up size based on
801 // the processor architecture
804 size_t GCToOSInterface::GetCacheSizePerLogicalCpu(bool trueSize)
806 static size_t maxSize;
807 static size_t maxTrueSize;
811 // maxSize and maxTrueSize cached
825 __cpuid(dwBuffer, 0);
827 int maxCpuId = dwBuffer[0];
829 if (dwBuffer[1] == 'uneG')
831 if (dwBuffer[3] == 'Ieni')
833 if (dwBuffer[2] == 'letn')
835 maxTrueSize = GetLogicalProcessorCacheSizeFromOS(); //use OS API for cache enumeration on LH and above
839 // If we're running on a Prescott or greater core, EM64T tests
840 // show that starting with a gen0 larger than LLC improves performance.
841 // Thus, start with a gen0 size that is larger than the cache. The value of
842 // 3 is a reasonable tradeoff between workingset and performance.
843 maxSize = maxTrueSize * 3;
848 maxSize = maxTrueSize;
854 if (dwBuffer[1] == 'htuA') {
855 if (dwBuffer[3] == 'itne') {
856 if (dwBuffer[2] == 'DMAc') {
857 __cpuid(dwBuffer, 0x80000000);
858 if (dwBuffer[0] >= 0x80000006)
860 __cpuid(dwBuffer, 0x80000006);
862 DWORD dwL2CacheBits = dwBuffer[2];
863 DWORD dwL3CacheBits = dwBuffer[3];
865 maxTrueSize = (size_t)((dwL2CacheBits >> 16) * 1024); // L2 cache size in ECX bits 31-16
867 __cpuid(dwBuffer, 0x1);
868 DWORD dwBaseFamily = (dwBuffer[0] & (0xF << 8)) >> 8;
869 DWORD dwExtFamily = (dwBuffer[0] & (0xFF << 20)) >> 20;
870 DWORD dwFamily = dwBaseFamily >= 0xF ? dwBaseFamily + dwExtFamily : dwBaseFamily;
872 if (dwFamily >= 0x10)
874 BOOL bSkipAMDL3 = FALSE;
876 if (dwFamily == 0x10) // are we running on a Barcelona (Family 10h) processor?
879 DWORD dwBaseModel = (dwBuffer[0] & (0xF << 4)) >> 4 ;
880 DWORD dwExtModel = (dwBuffer[0] & (0xF << 16)) >> 16;
881 DWORD dwModel = dwBaseFamily >= 0xF ? (dwExtModel << 4) | dwBaseModel : dwBaseModel;
886 // 65nm parts do not benefit from larger Gen0
898 // 45nm Greyhound parts (and future parts based on newer northbridge) benefit
899 // from increased gen0 size, taking L3 into account
900 __cpuid(dwBuffer, 0x80000008);
901 DWORD dwNumberOfCores = (dwBuffer[2] & (0xFF)) + 1; // NC is in ECX bits 7-0
903 DWORD dwL3CacheSize = (size_t)((dwL3CacheBits >> 18) * 512 * 1024); // L3 size in EDX bits 31-18 * 512KB
904 // L3 is shared between cores
905 dwL3CacheSize = dwL3CacheSize / dwNumberOfCores;
906 maxTrueSize += dwL3CacheSize; // due to exclusive caches, add L3 size (possibly zero) to L2
907 // L1 is too small to worry about, so ignore it
912 maxSize = maxTrueSize;
919 maxSize = maxTrueSize = GetLogicalProcessorCacheSizeFromOS() ; // Returns the size of the highest level processor cache
923 // Bigger gen0 size helps arm64 targets
924 maxSize = maxTrueSize * 3;
927 // printf("GetCacheSizePerLogicalCpu returns %d, adjusted size %d\n", maxSize, maxTrueSize);
934 // Sets the calling thread's affinity to only run on the processor specified
936 // procNo - The requested processor for the calling thread.
938 // true if setting the affinity was successful, false otherwise.
939 bool GCToOSInterface::SetThreadAffinity(uint16_t procNo)
941 GroupProcNo groupProcNo(procNo);
943 if (groupProcNo.GetGroup() != GroupProcNo::NoGroup)
946 ga.Group = (WORD)groupProcNo.GetGroup();
947 ga.Reserved[0] = 0; // reserve must be filled with zero
948 ga.Reserved[1] = 0; // otherwise call may fail
950 ga.Mask = (size_t)1 << groupProcNo.GetProcIndex();
951 return !!SetThreadGroupAffinity(GetCurrentThread(), &ga, nullptr);
955 return !!SetThreadAffinityMask(GetCurrentThread(), (DWORD_PTR)1 << groupProcNo.GetProcIndex());
959 // Boosts the calling thread's thread priority to a level higher than the default
964 // true if the priority boost was successful, false otherwise.
965 bool GCToOSInterface::BoostThreadPriority()
967 return !!SetThreadPriority(GetCurrentThread(), THREAD_PRIORITY_HIGHEST);
970 // Set the set of processors enabled for GC threads for the current process based on config specified affinity mask and set
972 // configAffinityMask - mask specified by the GCHeapAffinitizeMask config
973 // configAffinitySet - affinity set specified by the GCHeapAffinitizeRanges config
975 // set of enabled processors
976 const AffinitySet* GCToOSInterface::SetGCThreadsAffinitySet(uintptr_t configAffinityMask, const AffinitySet* configAffinitySet)
978 // When the configAffinitySet is not empty, enforce the cpu groups
979 if (CanEnableGCCPUGroups())
981 if (!configAffinitySet->IsEmpty())
983 // Update the process affinity set using the configured set
984 for (size_t i = 0; i < MAX_SUPPORTED_CPUS; i++)
986 if (g_processAffinitySet.Contains(i) && !configAffinitySet->Contains(i))
988 g_processAffinitySet.Remove(i);
995 if (configAffinityMask != 0)
997 // Update the process affinity set using the configured mask
998 for (size_t i = 0; i < 8 * sizeof(uintptr_t); i++)
1000 if (g_processAffinitySet.Contains(i) && ((configAffinityMask & ((uintptr_t)1 << i)) == 0))
1002 g_processAffinitySet.Remove(i);
1008 return &g_processAffinitySet;
1011 // Get number of processors assigned to the current process
1013 // The number of processors
1014 uint32_t GCToOSInterface::GetCurrentProcessCpuCount()
1016 static int cCPUs = 0;
1023 if (CanEnableGCCPUGroups())
1025 count = GCToOSInterface::GetTotalProcessorCount();
1029 DWORD_PTR pmask, smask;
1031 if (!GetProcessAffinityMask(GetCurrentProcess(), &pmask, &smask))
1042 pmask &= (pmask - 1);
1046 // GetProcessAffinityMask can return pmask=0 and smask=0 on systems with more
1047 // than 64 processors, which would leave us with a count of 0. Since the GC
1048 // expects there to be at least one processor to run on (and thus at least one
1049 // heap), we'll return 64 here if count is 0, since there are likely a ton of
1050 // processors available in that case. The GC also cannot (currently) handle
1051 // the case where there are more than 64 processors, so we will return a
1052 // maximum of 64 here.
1053 if (count == 0 || count > 64)
1063 // Return the size of the user-mode portion of the virtual address space of this process.
1065 // non zero if it has succeeded, 0 if it has failed
1066 size_t GCToOSInterface::GetVirtualMemoryLimit()
1068 MEMORYSTATUSEX memStatus;
1069 GetProcessMemoryLoad(&memStatus);
1070 assert(memStatus.ullAvailVirtual != 0);
1071 return (size_t)memStatus.ullAvailVirtual;
1074 // Get the physical memory that this process can use.
1076 // non zero if it has succeeded, 0 if it has failed
1078 // If a process runs with a restricted memory limit, it returns the limit. If there's no limit
1079 // specified, it returns amount of actual physical memory.
1080 uint64_t GCToOSInterface::GetPhysicalMemoryLimit(bool* is_restricted)
1083 *is_restricted = false;
1085 size_t restricted_limit = GetRestrictedPhysicalMemoryLimit();
1086 if (restricted_limit != 0)
1088 if (is_restricted && !g_UseRestrictedVirtualMemory)
1089 *is_restricted = true;
1091 return restricted_limit;
1094 MEMORYSTATUSEX memStatus;
1095 GetProcessMemoryLoad(&memStatus);
1096 assert(memStatus.ullTotalPhys != 0);
1097 return memStatus.ullTotalPhys;
1100 // Get memory status
1102 // memory_load - A number between 0 and 100 that specifies the approximate percentage of physical memory
1103 // that is in use (0 indicates no memory use and 100 indicates full memory use).
1104 // available_physical - The amount of physical memory currently available, in bytes.
1105 // available_page_file - The maximum amount of memory the current process can commit, in bytes.
1106 void GCToOSInterface::GetMemoryStatus(uint32_t* memory_load, uint64_t* available_physical, uint64_t* available_page_file)
1108 uint64_t restricted_limit = GetRestrictedPhysicalMemoryLimit();
1109 if (restricted_limit != 0)
1111 size_t workingSetSize;
1112 BOOL status = FALSE;
1113 if (!g_UseRestrictedVirtualMemory)
1115 PROCESS_MEMORY_COUNTERS pmc;
1116 status = GCGetProcessMemoryInfo(GetCurrentProcess(), &pmc, sizeof(pmc));
1117 workingSetSize = pmc.WorkingSetSize;
1123 *memory_load = (uint32_t)((float)workingSetSize * 100.0 / (float)restricted_limit);
1124 if (available_physical)
1126 if(workingSetSize > restricted_limit)
1127 *available_physical = 0;
1129 *available_physical = restricted_limit - workingSetSize;
1131 // Available page file doesn't mean much when physical memory is restricted since
1132 // we don't know how much of it is available to this process so we are not going to
1133 // bother to make another OS call for it.
1134 if (available_page_file)
1135 *available_page_file = 0;
1142 ::GetProcessMemoryLoad(&ms);
1144 if (g_UseRestrictedVirtualMemory)
1146 _ASSERTE (ms.ullTotalVirtual == restricted_limit);
1147 if (memory_load != NULL)
1148 *memory_load = (uint32_t)((float)(ms.ullTotalVirtual - ms.ullAvailVirtual) * 100.0 / (float)ms.ullTotalVirtual);
1149 if (available_physical != NULL)
1150 *available_physical = ms.ullTotalVirtual;
1152 // Available page file isn't helpful when we are restricted by virtual memory
1153 // since the amount of memory we can reserve is less than the amount of
1154 // memory we can commit.
1155 if (available_page_file != NULL)
1156 *available_page_file = 0;
1160 if (memory_load != NULL)
1161 *memory_load = ms.dwMemoryLoad;
1162 if (available_physical != NULL)
1163 *available_physical = ms.ullAvailPhys;
1164 if (available_page_file != NULL)
1165 *available_page_file = ms.ullAvailPageFile;
1169 // Get a high precision performance counter
1171 // The counter value
1172 int64_t GCToOSInterface::QueryPerformanceCounter()
1175 if (!::QueryPerformanceCounter(&ts))
1177 assert(false && "Failed to query performance counter");
1183 // Get a frequency of the high precision performance counter
1185 // The counter frequency
1186 int64_t GCToOSInterface::QueryPerformanceFrequency()
1189 if (!::QueryPerformanceFrequency(&ts))
1191 assert(false && "Failed to query performance counter");
1197 // Get a time stamp with a low precision
1199 // Time stamp in milliseconds
1200 uint32_t GCToOSInterface::GetLowPrecisionTimeStamp()
1202 return ::GetTickCount();
1205 // Gets the total number of processors on the machine, not taking
1206 // into account current process affinity.
1208 // Number of processors on the machine
1209 uint32_t GCToOSInterface::GetTotalProcessorCount()
1211 if (CanEnableGCCPUGroups())
1213 return g_nProcessors;
1217 return g_SystemInfo.dwNumberOfProcessors;
1221 bool GCToOSInterface::CanEnableGCNumaAware()
1223 return g_fEnableGCNumaAware;
1226 bool GCToOSInterface::GetNumaProcessorNode(uint16_t proc_no, uint16_t *node_no)
1228 GroupProcNo groupProcNo(proc_no);
1230 PROCESSOR_NUMBER procNumber;
1231 procNumber.Group = groupProcNo.GetGroup();
1232 procNumber.Number = (BYTE)groupProcNo.GetProcIndex();
1233 procNumber.Reserved = 0;
1235 assert(g_fEnableGCNumaAware);
1236 return ::GetNumaProcessorNodeEx(&procNumber, node_no) != FALSE;
1239 // Get processor number and optionally its NUMA node number for the specified heap number
1241 // heap_number - heap number to get the result for
1242 // proc_no - set to the selected processor number
1243 // node_no - set to the NUMA node of the selected processor or to NUMA_NODE_UNDEFINED
1245 // true if it succeeded
1246 bool GCToOSInterface::GetProcessorForHeap(uint16_t heap_number, uint16_t* proc_no, uint16_t* node_no)
1248 bool success = false;
1250 if (CanEnableGCCPUGroups())
1253 GetGroupForProcessor((uint16_t)heap_number, &gn, &gpn);
1255 *proc_no = GroupProcNo(gn, gpn).GetCombinedValue();
1257 if (GCToOSInterface::CanEnableGCNumaAware())
1259 if (!GCToOSInterface::GetNumaProcessorNode(*proc_no, node_no))
1261 *node_no = NUMA_NODE_UNDEFINED;
1265 { // no numa setting, each cpu group is treated as a node
1274 uint8_t proc_number = 0;
1275 for (uintptr_t mask = 1; mask != 0; mask <<= 1)
1277 if (g_processAffinitySet.Contains(proc_number))
1279 if (bit_number == heap_number)
1281 *proc_no = GroupProcNo(GroupProcNo::NoGroup, proc_number).GetCombinedValue();
1283 if (GCToOSInterface::CanEnableGCNumaAware())
1285 if (!GCToOSInterface::GetNumaProcessorNode(proc_number, node_no))
1287 *node_no = NUMA_NODE_UNDEFINED;
1303 // Parse the confing string describing affinitization ranges and update the passed in affinitySet accordingly
1305 // config_string - string describing the affinitization range, platform specific
1306 // start_index - the range start index extracted from the config_string
1307 // end_index - the range end index extracted from the config_string, equal to the start_index if only an index and not a range was passed in
1309 // true if the configString was successfully parsed, false if it was not correct
1310 bool GCToOSInterface::ParseGCHeapAffinitizeRangesEntry(const char** config_string, size_t* start_index, size_t* end_index)
1312 assert(g_fEnableGCCPUGroups);
1315 size_t group_number = strtoul(*config_string, &number_end, 10);
1317 if ((number_end == *config_string) || (*number_end != ':'))
1319 // No number or no colon after the number found, invalid format
1323 if (group_number >= g_nGroups)
1325 // Group number out of range
1329 *config_string = number_end + 1;
1332 if (!ParseIndexOrRange(config_string, &start, &end))
1337 uint16_t group_processor_count = g_CPUGroupInfoArray[group_number].nr_active;
1338 if ((start >= group_processor_count) || (end >= group_processor_count))
1340 // Invalid CPU index values or range
1344 uint16_t group_begin = g_CPUGroupInfoArray[group_number].begin;
1346 *start_index = group_begin + start;
1347 *end_index = group_begin + end;
1352 // Parameters of the GC thread stub
1353 struct GCThreadStubParam
1355 GCThreadFunction GCThreadFunction;
1356 void* GCThreadParam;
1359 // GC thread stub to convert GC thread function to an OS specific thread function
1360 static DWORD GCThreadStub(void* param)
1362 GCThreadStubParam *stubParam = (GCThreadStubParam*)param;
1363 GCThreadFunction function = stubParam->GCThreadFunction;
1364 void* threadParam = stubParam->GCThreadParam;
1368 function(threadParam);
1373 // Initialize the critical section
1374 void CLRCriticalSection::Initialize()
1376 ::InitializeCriticalSection(&m_cs);
1379 // Destroy the critical section
1380 void CLRCriticalSection::Destroy()
1382 ::DeleteCriticalSection(&m_cs);
1385 // Enter the critical section. Blocks until the section can be entered.
1386 void CLRCriticalSection::Enter()
1388 ::EnterCriticalSection(&m_cs);
1391 // Leave the critical section
1392 void CLRCriticalSection::Leave()
1394 ::LeaveCriticalSection(&m_cs);
1397 // WindowsEvent is an implementation of GCEvent that forwards
1398 // directly to Win32 APIs.
1405 Impl() : m_hEvent(INVALID_HANDLE_VALUE) {}
1407 bool IsValid() const
1409 return m_hEvent != INVALID_HANDLE_VALUE;
1415 BOOL result = SetEvent(m_hEvent);
1416 assert(result && "SetEvent failed");
1422 BOOL result = ResetEvent(m_hEvent);
1423 assert(result && "ResetEvent failed");
1426 uint32_t Wait(uint32_t timeout, bool alertable)
1428 UNREFERENCED_PARAMETER(alertable);
1431 return WaitForSingleObject(m_hEvent, timeout);
1437 BOOL result = CloseHandle(m_hEvent);
1438 assert(result && "CloseHandle failed");
1439 m_hEvent = INVALID_HANDLE_VALUE;
1442 bool CreateAutoEvent(bool initialState)
1444 m_hEvent = CreateEvent(nullptr, false, initialState, nullptr);
1448 bool CreateManualEvent(bool initialState)
1450 m_hEvent = CreateEvent(nullptr, true, initialState, nullptr);
1460 void GCEvent::CloseEvent()
1462 assert(m_impl != nullptr);
1463 m_impl->CloseEvent();
1468 assert(m_impl != nullptr);
1472 void GCEvent::Reset()
1474 assert(m_impl != nullptr);
1478 uint32_t GCEvent::Wait(uint32_t timeout, bool alertable)
1480 assert(m_impl != nullptr);
1481 return m_impl->Wait(timeout, alertable);
1484 bool GCEvent::CreateAutoEventNoThrow(bool initialState)
1486 // [DESKTOP TODO] The difference between events and OS events is
1487 // whether or not the hosting API is made aware of them. When (if)
1488 // we implement hosting support for Local GC, we will need to be
1489 // aware of the host here.
1490 return CreateOSAutoEventNoThrow(initialState);
1493 bool GCEvent::CreateManualEventNoThrow(bool initialState)
1495 // [DESKTOP TODO] The difference between events and OS events is
1496 // whether or not the hosting API is made aware of them. When (if)
1497 // we implement hosting support for Local GC, we will need to be
1498 // aware of the host here.
1499 return CreateOSManualEventNoThrow(initialState);
1502 bool GCEvent::CreateOSAutoEventNoThrow(bool initialState)
1504 assert(m_impl == nullptr);
1505 std::unique_ptr<GCEvent::Impl> event(new (std::nothrow) GCEvent::Impl());
1511 if (!event->CreateAutoEvent(initialState))
1516 m_impl = event.release();
1520 bool GCEvent::CreateOSManualEventNoThrow(bool initialState)
1522 assert(m_impl == nullptr);
1523 std::unique_ptr<GCEvent::Impl> event(new (std::nothrow) GCEvent::Impl());
1529 if (!event->CreateManualEvent(initialState))
1534 m_impl = event.release();