src/gc/windows/gcenv.windows.cpp

   1 // Licensed to the .NET Foundation under one or more agreements.
   2 // The .NET Foundation licenses this file to you under the MIT license.
   3 // See the LICENSE file in the project root for more information.
   4
   5 #include <cstdint>
   6 #include <cassert>
   7 #include <cstddef>
   8 #include <memory>
   9 #include "windows.h"
  10 #include "psapi.h"
  11 #include "env/gcenv.structs.h"
  12 #include "env/gcenv.base.h"
  13 #include "env/gcenv.os.h"
  14 #include "env/gcenv.ee.h"
  15 #include "env/gcenv.windows.inl"
  16 #include "env/volatile.h"
  17 #include "gcconfig.h"
  18
  19 GCSystemInfo g_SystemInfo;
  20
  21 typedef BOOL (WINAPI *PGET_PROCESS_MEMORY_INFO)(HANDLE handle, PROCESS_MEMORY_COUNTERS* memCounters, uint32_t cb);
  22 static PGET_PROCESS_MEMORY_INFO GCGetProcessMemoryInfo = 0;
  23
  24 static size_t g_RestrictedPhysicalMemoryLimit = (size_t)UINTPTR_MAX;
  25
  26 // For 32-bit processes the virtual address range could be smaller than the amount of physical
  27 // memory on the machine/in the container, we need to restrict by the VM.
  28 static bool g_UseRestrictedVirtualMemory = false;
  29
  30 static AffinitySet g_processAffinitySet;
  31
  32 typedef BOOL (WINAPI *PIS_PROCESS_IN_JOB)(HANDLE processHandle, HANDLE jobHandle, BOOL* result);
  33 typedef BOOL (WINAPI *PQUERY_INFORMATION_JOB_OBJECT)(HANDLE jobHandle, JOBOBJECTINFOCLASS jobObjectInfoClass, void* lpJobObjectInfo, DWORD cbJobObjectInfoLength, LPDWORD lpReturnLength);
  34
  35 namespace {
  36
  37 static bool g_fEnableGCNumaAware;
  38
  39 class GroupProcNo
  40 {
  41     uint16_t m_groupProc;
  42
  43 public:
  44
  45     static const uint16_t NoGroup = 0x3ff;
  46
  47     GroupProcNo(uint16_t groupProc) : m_groupProc(groupProc)
  48     {
  49     }
  50
  51     GroupProcNo(uint16_t group, uint16_t procIndex) : m_groupProc((group << 6) | procIndex)
  52     {
  53         assert(group <= 0x3ff);
  54         assert(procIndex <= 0x3f);
  55     }
  56
  57     uint16_t GetGroup() { return m_groupProc >> 6; }
  58     uint16_t GetProcIndex() { return m_groupProc & 0x3f; }
  59     uint16_t GetCombinedValue() { return m_groupProc; }
  60 };
  61
  62 struct CPU_Group_Info
  63 {
  64     WORD    nr_active;  // at most 64
  65     WORD    reserved[1];
  66     WORD    begin;
  67     WORD    end;
  68     DWORD_PTR   active_mask;
  69     DWORD   groupWeight;
  70     DWORD   activeThreadWeight;
  71 };
  72
  73 static bool g_fEnableGCCPUGroups;
  74 static bool g_fHadSingleProcessorAtStartup;
  75 static DWORD  g_nGroups;
  76 static DWORD g_nProcessors;
  77 static CPU_Group_Info *g_CPUGroupInfoArray;
  78
  79 void InitNumaNodeInfo()
  80 {
  81     ULONG highest = 0;
  82
  83     g_fEnableGCNumaAware = false;
  84
  85     if (!GCConfig::GetGCNumaAware())
  86         return;
  87
  88     // fail to get the highest numa node number
  89     if (!GetNumaHighestNodeNumber(&highest) || (highest == 0))
  90         return;
  91
  92     g_fEnableGCNumaAware = true;
  93     return;
  94 }
  95
  96 #if (defined(_TARGET_AMD64_) || defined(_TARGET_ARM64_))
  97 // Calculate greatest common divisor
  98 DWORD GCD(DWORD u, DWORD v)
  99 {
 100     while (v != 0)
 101     {
 102         DWORD dwTemp = v;
 103         v = u % v;
 104         u = dwTemp;
 105     }
 106
 107     return u;
 108 }
 109
 110 // Calculate least common multiple
 111 DWORD LCM(DWORD u, DWORD v)
 112 {
 113     return u / GCD(u, v) * v;
 114 }
 115 #endif
 116
 117 bool InitCPUGroupInfoArray()
 118 {
 119 #if (defined(_TARGET_AMD64_) || defined(_TARGET_ARM64_))
 120     BYTE *bBuffer = NULL;
 121     SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *pSLPIEx = NULL;
 122     SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *pRecord = NULL;
 123     DWORD cbSLPIEx = 0;
 124     DWORD byteOffset = 0;
 125     DWORD dwNumElements = 0;
 126     DWORD dwWeight = 1;
 127
 128     if (GetLogicalProcessorInformationEx(RelationGroup, pSLPIEx, &cbSLPIEx) &&
 129                       GetLastError() != ERROR_INSUFFICIENT_BUFFER)
 130         return false;
 131
 132     assert(cbSLPIEx);
 133
 134     // Fail to allocate buffer
 135     bBuffer = new (std::nothrow) BYTE[ cbSLPIEx ];
 136     if (bBuffer == NULL)
 137         return false;
 138
 139     pSLPIEx = (SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *)bBuffer;
 140     if (!GetLogicalProcessorInformationEx(RelationGroup, pSLPIEx, &cbSLPIEx))
 141     {
 142         delete[] bBuffer;
 143         return false;
 144     }
 145
 146     pRecord = pSLPIEx;
 147     while (byteOffset < cbSLPIEx)
 148     {
 149         if (pRecord->Relationship == RelationGroup)
 150         {
 151             g_nGroups = pRecord->Group.ActiveGroupCount;
 152             break;
 153         }
 154         byteOffset += pRecord->Size;
 155         pRecord = (SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *)(bBuffer + byteOffset);
 156     }
 157
 158     g_CPUGroupInfoArray = new (std::nothrow) CPU_Group_Info[g_nGroups];
 159     if (g_CPUGroupInfoArray == NULL)
 160     {
 161         delete[] bBuffer;
 162         return false;
 163     }
 164
 165     for (DWORD i = 0; i < g_nGroups; i++)
 166     {
 167         g_CPUGroupInfoArray[i].nr_active   = (WORD)pRecord->Group.GroupInfo[i].ActiveProcessorCount;
 168         g_CPUGroupInfoArray[i].active_mask = pRecord->Group.GroupInfo[i].ActiveProcessorMask;
 169         g_nProcessors += g_CPUGroupInfoArray[i].nr_active;
 170         dwWeight = LCM(dwWeight, (DWORD)g_CPUGroupInfoArray[i].nr_active);
 171     }
 172
 173     // The number of threads per group that can be supported will depend on the number of CPU groups
 174     // and the number of LPs within each processor group. For example, when the number of LPs in
 175     // CPU groups is the same and is 64, the number of threads per group before weight overflow
 176     // would be 2^32/2^6 = 2^26 (64M threads)
 177     for (DWORD i = 0; i < g_nGroups; i++)
 178     {
 179         g_CPUGroupInfoArray[i].groupWeight = dwWeight / (DWORD)g_CPUGroupInfoArray[i].nr_active;
 180         g_CPUGroupInfoArray[i].activeThreadWeight = 0;
 181     }
 182
 183     delete[] bBuffer;  // done with it; free it
 184     return true;
 185 #else
 186     return false;
 187 #endif
 188 }
 189
 190 bool InitCPUGroupInfoRange()
 191 {
 192 #if (defined(_TARGET_AMD64_) || defined(_TARGET_ARM64_))
 193     WORD begin   = 0;
 194     WORD nr_proc = 0;
 195
 196     for (WORD i = 0; i < g_nGroups; i++)
 197     {
 198         nr_proc += g_CPUGroupInfoArray[i].nr_active;
 199         g_CPUGroupInfoArray[i].begin = begin;
 200         g_CPUGroupInfoArray[i].end   = nr_proc - 1;
 201         begin = nr_proc;
 202     }
 203
 204     return true;
 205 #else
 206     return false;
 207 #endif
 208 }
 209
 210 void InitCPUGroupInfo()
 211 {
 212     g_fEnableGCCPUGroups = false;
 213
 214 #if (defined(_TARGET_AMD64_) || defined(_TARGET_ARM64_))
 215     if (!GCConfig::GetGCCpuGroup())
 216         return;
 217
 218     if (!InitCPUGroupInfoArray())
 219         return;
 220
 221     if (!InitCPUGroupInfoRange())
 222         return;
 223
 224     // only enable CPU groups if more than one group exists
 225     g_fEnableGCCPUGroups = g_nGroups > 1;
 226 #endif // _TARGET_AMD64_ || _TARGET_ARM64_
 227
 228     // Determine if the process is affinitized to a single processor (or if the system has a single processor)
 229     DWORD_PTR processAffinityMask, systemAffinityMask;
 230     if (::GetProcessAffinityMask(::GetCurrentProcess(), &processAffinityMask, &systemAffinityMask))
 231     {
 232         processAffinityMask &= systemAffinityMask;
 233         if (processAffinityMask != 0 && // only one CPU group is involved
 234             (processAffinityMask & (processAffinityMask - 1)) == 0) // only one bit is set
 235         {
 236             g_fHadSingleProcessorAtStartup = true;
 237         }
 238     }
 239 }
 240
 241 void GetProcessMemoryLoad(LPMEMORYSTATUSEX pMSEX)
 242 {
 243     pMSEX->dwLength = sizeof(MEMORYSTATUSEX);
 244     BOOL fRet = ::GlobalMemoryStatusEx(pMSEX);
 245     assert(fRet);
 246 }
 247
 248 static size_t GetRestrictedPhysicalMemoryLimit()
 249 {
 250     LIMITED_METHOD_CONTRACT;
 251
 252     // The limit was cached already
 253     if (g_RestrictedPhysicalMemoryLimit != (size_t)UINTPTR_MAX)
 254         return g_RestrictedPhysicalMemoryLimit;
 255
 256     size_t job_physical_memory_limit = (size_t)UINTPTR_MAX;
 257     uint64_t total_virtual = 0;
 258     uint64_t total_physical = 0;
 259     BOOL in_job_p = FALSE;
 260     HINSTANCE hinstKernel32 = 0;
 261
 262     PIS_PROCESS_IN_JOB GCIsProcessInJob = 0;
 263     PQUERY_INFORMATION_JOB_OBJECT GCQueryInformationJobObject = 0;
 264
 265     hinstKernel32 = LoadLibraryEx(L"kernel32.dll", nullptr, LOAD_LIBRARY_SEARCH_SYSTEM32);
 266     if (!hinstKernel32)
 267         goto exit;
 268
 269     GCIsProcessInJob = (PIS_PROCESS_IN_JOB)GetProcAddress(hinstKernel32, "IsProcessInJob");
 270     if (!GCIsProcessInJob)
 271         goto exit;
 272
 273     if (!GCIsProcessInJob(GetCurrentProcess(), NULL, &in_job_p))
 274         goto exit;
 275
 276     if (in_job_p)
 277     {
 278         GCGetProcessMemoryInfo = (PGET_PROCESS_MEMORY_INFO)GetProcAddress(hinstKernel32, "K32GetProcessMemoryInfo");
 279
 280         if (!GCGetProcessMemoryInfo)
 281             goto exit;
 282
 283         GCQueryInformationJobObject = (PQUERY_INFORMATION_JOB_OBJECT)GetProcAddress(hinstKernel32, "QueryInformationJobObject");
 284
 285         if (!GCQueryInformationJobObject)
 286             goto exit;
 287
 288         JOBOBJECT_EXTENDED_LIMIT_INFORMATION limit_info;
 289         if (GCQueryInformationJobObject (NULL, JobObjectExtendedLimitInformation, &limit_info,
 290             sizeof(limit_info), NULL))
 291         {
 292             size_t job_memory_limit = (size_t)UINTPTR_MAX;
 293             size_t job_process_memory_limit = (size_t)UINTPTR_MAX;
 294             size_t job_workingset_limit = (size_t)UINTPTR_MAX;
 295
 296             // Notes on the NT job object:
 297             //
 298             // You can specific a bigger process commit or working set limit than
 299             // job limit which is pointless so we use the smallest of all 3 as
 300             // to calculate our "physical memory load" or "available physical memory"
 301             // when running inside a job object, ie, we treat this as the amount of physical memory
 302             // our process is allowed to use.
 303             //
 304             // The commit limit is already reflected by default when you run in a
 305             // job but the physical memory load is not.
 306             //
 307             if ((limit_info.BasicLimitInformation.LimitFlags & JOB_OBJECT_LIMIT_JOB_MEMORY) != 0)
 308                 job_memory_limit = limit_info.JobMemoryLimit;
 309             if ((limit_info.BasicLimitInformation.LimitFlags & JOB_OBJECT_LIMIT_PROCESS_MEMORY) != 0)
 310                 job_process_memory_limit = limit_info.ProcessMemoryLimit;
 311             if ((limit_info.BasicLimitInformation.LimitFlags & JOB_OBJECT_LIMIT_WORKINGSET) != 0)
 312                 job_workingset_limit = limit_info.BasicLimitInformation.MaximumWorkingSetSize;
 313
 314             if ((job_memory_limit != (size_t)UINTPTR_MAX) ||
 315                 (job_process_memory_limit != (size_t)UINTPTR_MAX) ||
 316                 (job_workingset_limit != (size_t)UINTPTR_MAX))
 317             {
 318                 job_physical_memory_limit = min (job_memory_limit, job_process_memory_limit);
 319                 job_physical_memory_limit = min (job_physical_memory_limit, job_workingset_limit);
 320
 321                 MEMORYSTATUSEX ms;
 322                 ::GetProcessMemoryLoad(&ms);
 323                 total_virtual = ms.ullTotalVirtual;
 324                 total_physical = ms.ullAvailPhys;
 325
 326                 // A sanity check in case someone set a larger limit than there is actual physical memory.
 327                 job_physical_memory_limit = (size_t) min (job_physical_memory_limit, ms.ullTotalPhys);
 328             }
 329         }
 330     }
 331
 332 exit:
 333     if (job_physical_memory_limit == (size_t)UINTPTR_MAX)
 334     {
 335         job_physical_memory_limit = 0;
 336
 337         if (hinstKernel32 != 0)
 338         {
 339             FreeLibrary(hinstKernel32);
 340             hinstKernel32 = 0;
 341             GCGetProcessMemoryInfo = 0;
 342         }
 343     }
 344
 345     // Check to see if we are limited by VM.
 346     if (total_virtual == 0)
 347     {
 348         MEMORYSTATUSEX ms;
 349         ::GetProcessMemoryLoad(&ms);
 350
 351         total_virtual = ms.ullTotalVirtual;
 352         total_physical = ms.ullTotalPhys;
 353     }
 354
 355     if (job_physical_memory_limit != 0)
 356     {
 357         total_physical = job_physical_memory_limit;
 358     }
 359
 360     if (total_virtual < total_physical)
 361     {
 362         if (hinstKernel32 != 0)
 363         {
 364             // We can also free the lib here - if we are limited by VM we will not be calling
 365             // GetProcessMemoryInfo.
 366             FreeLibrary(hinstKernel32);
 367             GCGetProcessMemoryInfo = 0;
 368         }
 369         g_UseRestrictedVirtualMemory = true;
 370         job_physical_memory_limit = (size_t)total_virtual;
 371     }
 372
 373     VolatileStore(&g_RestrictedPhysicalMemoryLimit, job_physical_memory_limit);
 374     return g_RestrictedPhysicalMemoryLimit;
 375 }
 376
 377 // This function checks to see if GetLogicalProcessorInformation API is supported.
 378 // On success, this function allocates a SLPI array, sets nEntries to number
 379 // of elements in the SLPI array and returns a pointer to the SLPI array after filling it with information.
 380 //
 381 // Note: If successful, GetLPI allocates memory for the SLPI array and expects the caller to
 382 // free the memory once the caller is done using the information in the SLPI array.
 383 SYSTEM_LOGICAL_PROCESSOR_INFORMATION *GetLPI(PDWORD nEntries)
 384 {
 385     DWORD cbslpi = 0;
 386     DWORD dwNumElements = 0;
 387     SYSTEM_LOGICAL_PROCESSOR_INFORMATION *pslpi = NULL;
 388
 389     // We setup the first call to GetLogicalProcessorInformation to fail so that we can obtain
 390     // the size of the buffer required to allocate for the SLPI array that is returned
 391
 392     if (!GetLogicalProcessorInformation(pslpi, &cbslpi) &&
 393             GetLastError() != ERROR_INSUFFICIENT_BUFFER)
 394     {
 395         // If we fail with anything other than an ERROR_INSUFFICIENT_BUFFER here, we punt with failure.
 396         return NULL;
 397     }
 398
 399     _ASSERTE(cbslpi);
 400
 401     // compute the number of SLPI entries required to hold the information returned from GLPI
 402
 403     dwNumElements = cbslpi / sizeof(SYSTEM_LOGICAL_PROCESSOR_INFORMATION);
 404
 405     // allocate a buffer in the free heap to hold an array of SLPI entries from GLPI, number of elements in the array is dwNumElements
 406
 407     pslpi = new (std::nothrow) SYSTEM_LOGICAL_PROCESSOR_INFORMATION[ dwNumElements ];
 408
 409     if (pslpi == NULL)
 410     {
 411         // the memory allocation failed
 412         return NULL;
 413     }
 414
 415     // Make call to GetLogicalProcessorInformation. Returns array of SLPI structures
 416
 417     if (!GetLogicalProcessorInformation(pslpi, &cbslpi))
 418     {
 419         // GetLogicalProcessorInformation failed
 420         delete[] pslpi ; //Allocation was fine but the API call itself failed and so we are releasing the memory before the return NULL.
 421         return NULL ;
 422     }
 423
 424     // GetLogicalProcessorInformation successful, set nEntries to number of entries in the SLPI array
 425     *nEntries  = dwNumElements;
 426
 427     return pslpi;    // return pointer to SLPI array
 428
 429 }//GetLPI
 430
 431 // This function returns the size of highest level cache on the physical chip.   If it cannot
 432 // determine the cachesize this function returns 0.
 433 size_t GetLogicalProcessorCacheSizeFromOS()
 434 {
 435     size_t cache_size = 0;
 436     DWORD nEntries = 0;
 437
 438     // Try to use GetLogicalProcessorInformation API and get a valid pointer to the SLPI array if successful.  Returns NULL
 439     // if API not present or on failure.
 440
 441     SYSTEM_LOGICAL_PROCESSOR_INFORMATION *pslpi = GetLPI(&nEntries) ;
 442
 443     if (pslpi == NULL)
 444     {
 445         // GetLogicalProcessorInformation not supported or failed.
 446         goto Exit;
 447     }
 448
 449     // Crack the information. Iterate through all the SLPI array entries for all processors in system.
 450     // Will return the greatest of all the processor cache sizes or zero
 451     {
 452         size_t last_cache_size = 0;
 453
 454         for (DWORD i=0; i < nEntries; i++)
 455         {
 456             if (pslpi[i].Relationship == RelationCache)
 457             {
 458                 last_cache_size = max(last_cache_size, pslpi[i].Cache.Size);
 459             }
 460         }
 461         cache_size = last_cache_size;
 462     }
 463 Exit:
 464
 465     if(pslpi)
 466         delete[] pslpi;  // release the memory allocated for the SLPI array.
 467
 468     return cache_size;
 469 }
 470
 471 bool CanEnableGCCPUGroups()
 472 {
 473     return g_fEnableGCCPUGroups;
 474 }
 475
 476 // Get the CPU group for the specified processor
 477 void GetGroupForProcessor(uint16_t processor_number, uint16_t* group_number, uint16_t* group_processor_number)
 478 {
 479     assert(g_fEnableGCCPUGroups);
 480
 481 #if !defined(FEATURE_REDHAWK) && (defined(_TARGET_AMD64_) || defined(_TARGET_ARM64_))
 482     WORD bTemp = 0;
 483     WORD bDiff = processor_number - bTemp;
 484
 485     for (WORD i=0; i < g_nGroups; i++)
 486     {
 487         bTemp += g_CPUGroupInfoArray[i].nr_active;
 488         if (bTemp > processor_number)
 489         {
 490             *group_number = i;
 491             *group_processor_number = bDiff;
 492             break;
 493         }
 494         bDiff = processor_number - bTemp;
 495     }
 496 #else
 497     *group_number = 0;
 498     *group_processor_number = 0;
 499 #endif
 500 }
 501
 502 } // anonymous namespace
 503
 504 // Initialize the interface implementation
 505 // Return:
 506 //  true if it has succeeded, false if it has failed
 507 bool GCToOSInterface::Initialize()
 508 {
 509     SYSTEM_INFO systemInfo;
 510     GetSystemInfo(&systemInfo);
 511
 512     g_SystemInfo.dwNumberOfProcessors = systemInfo.dwNumberOfProcessors;
 513     g_SystemInfo.dwPageSize = systemInfo.dwPageSize;
 514     g_SystemInfo.dwAllocationGranularity = systemInfo.dwAllocationGranularity;
 515
 516     assert(systemInfo.dwPageSize == 0x1000);
 517
 518     InitNumaNodeInfo();
 519     InitCPUGroupInfo();
 520
 521     if (CanEnableGCCPUGroups())
 522     {
 523         // When CPU groups are enabled, then the process is not bound by the process affinity set at process launch.
 524         // Set the initial affinity mask so that all processors are enabled.
 525         for (size_t i = 0; i < g_nProcessors; i++)
 526         {
 527             g_processAffinitySet.Add(i);
 528         }
 529     }
 530     else
 531     {
 532         // When CPU groups are disabled, the process affinity mask specified at the process launch cannot be
 533         // escaped.
 534         uintptr_t pmask, smask;
 535         if (!!::GetProcessAffinityMask(::GetCurrentProcess(), (PDWORD_PTR)&pmask, (PDWORD_PTR)&smask))
 536         {
 537             pmask &= smask;
 538
 539             for (size_t i = 0; i < 8 * sizeof(uintptr_t); i++)
 540             {
 541                 if ((pmask & ((uintptr_t)1 << i)) != 0)
 542                 {
 543                     g_processAffinitySet.Add(i);
 544                 }
 545             }
 546         }
 547     }
 548
 549     return true;
 550 }
 551
 552 // Shutdown the interface implementation
 553 void GCToOSInterface::Shutdown()
 554 {
 555     // nothing to do.
 556 }
 557
 558 // Get numeric id of the current thread if possible on the
 559 // current platform. It is indended for logging purposes only.
 560 // Return:
 561 //  Numeric id of the current thread or 0 if the
 562 uint64_t GCToOSInterface::GetCurrentThreadIdForLogging()
 563 {
 564     return ::GetCurrentThreadId();
 565 }
 566
 567 // Get id of the process
 568 uint32_t GCToOSInterface::GetCurrentProcessId()
 569 {
 570     return ::GetCurrentThreadId();
 571 }
 572
 573 // Set ideal processor for the current thread
 574 // Parameters:
 575 //  srcProcNo - processor number the thread currently runs on
 576 //  dstProcNo - processor number the thread should be migrated to
 577 // Return:
 578 //  true if it has succeeded, false if it has failed
 579 bool GCToOSInterface::SetCurrentThreadIdealAffinity(uint16_t srcProcNo, uint16_t dstProcNo)
 580 {
 581     LIMITED_METHOD_CONTRACT;
 582
 583     bool success = true;
 584
 585     GroupProcNo srcGroupProcNo(srcProcNo);
 586     GroupProcNo dstGroupProcNo(dstProcNo);
 587
 588     if (CanEnableGCCPUGroups())
 589     {
 590         if (srcGroupProcNo.GetGroup() != dstGroupProcNo.GetGroup())
 591         {
 592             //only set ideal processor when srcProcNo and dstProcNo are in the same cpu
 593             //group. DO NOT MOVE THREADS ACROSS CPU GROUPS
 594             return true;
 595         }
 596     }
 597
 598 #if !defined(FEATURE_CORESYSTEM)
 599     SetThreadIdealProcessor(GetCurrentThread(), (DWORD)dstGroupProcNo.GetProcIndex());
 600 #else
 601     PROCESSOR_NUMBER proc;
 602
 603     if (dstGroupProcNo.GetGroup() != GroupProcNo::NoGroup)
 604     {
 605         proc.Group = (WORD)dstGroupProcNo.GetGroup();
 606         proc.Number = (BYTE)dstGroupProcNo.GetProcIndex();
 607         proc.Reserved = 0;
 608
 609         success = !!SetThreadIdealProcessorEx(GetCurrentThread(), &proc, NULL);
 610     }
 611     else
 612     {
 613         if (GetThreadIdealProcessorEx(GetCurrentThread(), &proc))
 614         {
 615             proc.Number = (BYTE)dstGroupProcNo.GetProcIndex();
 616             success = !!SetThreadIdealProcessorEx(GetCurrentThread(), &proc, &proc);
 617         }
 618     }
 619 #endif
 620
 621     return success;
 622 }
 623
 624 // Get the number of the current processor
 625 uint32_t GCToOSInterface::GetCurrentProcessorNumber()
 626 {
 627     assert(GCToOSInterface::CanGetCurrentProcessorNumber());
 628     return ::GetCurrentProcessorNumber();
 629 }
 630
 631 // Check if the OS supports getting current processor number
 632 bool GCToOSInterface::CanGetCurrentProcessorNumber()
 633 {
 634     // on all Windows platforms we support this API exists
 635     return true;
 636 }
 637
 638 // Flush write buffers of processors that are executing threads of the current process
 639 void GCToOSInterface::FlushProcessWriteBuffers()
 640 {
 641     ::FlushProcessWriteBuffers();
 642 }
 643
 644 // Break into a debugger
 645 void GCToOSInterface::DebugBreak()
 646 {
 647     ::DebugBreak();
 648 }
 649
 650 // Causes the calling thread to sleep for the specified number of milliseconds
 651 // Parameters:
 652 //  sleepMSec   - time to sleep before switching to another thread
 653 void GCToOSInterface::Sleep(uint32_t sleepMSec)
 654 {
 655     // TODO(segilles) CLR implementation of __SwitchToThread spins for short sleep durations
 656     // to avoid context switches - is that interesting or useful here?
 657     if (sleepMSec > 0)
 658     {
 659         ::SleepEx(sleepMSec, FALSE);
 660     }
 661 }
 662
 663 // Causes the calling thread to yield execution to another thread that is ready to run on the current processor.
 664 // Parameters:
 665 //  switchCount - number of times the YieldThread was called in a loop
 666 void GCToOSInterface::YieldThread(uint32_t switchCount)
 667 {
 668     UNREFERENCED_PARAMETER(switchCount);
 669     SwitchToThread();
 670 }
 671
 672 // Reserve virtual memory range.
 673 // Parameters:
 674 //  address   - starting virtual address, it can be NULL to let the function choose the starting address
 675 //  size      - size of the virtual memory range
 676 //  alignment - requested memory alignment, 0 means no specific alignment requested
 677 //  flags     - flags to control special settings like write watching
 678 // Return:
 679 //  Starting virtual address of the reserved range
 680 void* GCToOSInterface::VirtualReserve(size_t size, size_t alignment, uint32_t flags)
 681 {
 682     // Windows already ensures 64kb alignment on VirtualAlloc. The current CLR
 683     // implementation ignores it on Windows, other than making some sanity checks on it.
 684     UNREFERENCED_PARAMETER(alignment);
 685     assert((alignment & (alignment - 1)) == 0);
 686     assert(alignment <= 0x10000);
 687     DWORD memFlags = (flags & VirtualReserveFlags::WriteWatch) ? (MEM_RESERVE | MEM_WRITE_WATCH) : MEM_RESERVE;
 688     return ::VirtualAlloc(nullptr, size, memFlags, PAGE_READWRITE);
 689 }
 690
 691 // Release virtual memory range previously reserved using VirtualReserve
 692 // Parameters:
 693 //  address - starting virtual address
 694 //  size    - size of the virtual memory range
 695 // Return:
 696 //  true if it has succeeded, false if it has failed
 697 bool GCToOSInterface::VirtualRelease(void* address, size_t size)
 698 {
 699     return !!::VirtualFree(address, 0, MEM_RELEASE);
 700 }
 701
 702 // Commit virtual memory range. It must be part of a range reserved using VirtualReserve.
 703 // Parameters:
 704 //  address - starting virtual address
 705 //  size    - size of the virtual memory range
 706 // Return:
 707 //  true if it has succeeded, false if it has failed
 708 bool GCToOSInterface::VirtualCommit(void* address, size_t size, uint16_t node)
 709 {
 710     if (node == NUMA_NODE_UNDEFINED)
 711     {
 712         return ::VirtualAlloc(address, size, MEM_COMMIT, PAGE_READWRITE) != nullptr;
 713     }
 714     else
 715     {
 716         assert(g_fEnableGCNumaAware);
 717         return ::VirtualAllocExNuma(::GetCurrentProcess(), address, size, MEM_COMMIT, PAGE_READWRITE, node) != nullptr;
 718     }
 719 }
 720
 721 // Decomit virtual memory range.
 722 // Parameters:
 723 //  address - starting virtual address
 724 //  size    - size of the virtual memory range
 725 // Return:
 726 //  true if it has succeeded, false if it has failed
 727 bool GCToOSInterface::VirtualDecommit(void* address, size_t size)
 728 {
 729     return !!::VirtualFree(address, size, MEM_DECOMMIT);
 730 }
 731
 732 // Reset virtual memory range. Indicates that data in the memory range specified by address and size is no
 733 // longer of interest, but it should not be decommitted.
 734 // Parameters:
 735 //  address - starting virtual address
 736 //  size    - size of the virtual memory range
 737 //  unlock  - true if the memory range should also be unlocked
 738 // Return:
 739 //  true if it has succeeded, false if it has failed. Returns false also if
 740 //  unlocking was requested but the unlock failed.
 741 bool GCToOSInterface::VirtualReset(void * address, size_t size, bool unlock)
 742 {
 743     bool success = ::VirtualAlloc(address, size, MEM_RESET, PAGE_READWRITE) != nullptr;
 744     if (success && unlock)
 745     {
 746         ::VirtualUnlock(address, size);
 747     }
 748
 749     return success;
 750 }
 751
 752 // Check if the OS supports write watching
 753 bool GCToOSInterface::SupportsWriteWatch()
 754 {
 755     void* mem = GCToOSInterface::VirtualReserve(g_SystemInfo.dwAllocationGranularity, 0, VirtualReserveFlags::WriteWatch);
 756     if (mem != nullptr)
 757     {
 758         GCToOSInterface::VirtualRelease(mem, g_SystemInfo.dwAllocationGranularity);
 759         return true;
 760     }
 761
 762     return false;
 763 }
 764
 765 // Reset the write tracking state for the specified virtual memory range.
 766 // Parameters:
 767 //  address - starting virtual address
 768 //  size    - size of the virtual memory range
 769 void GCToOSInterface::ResetWriteWatch(void* address, size_t size)
 770 {
 771     ::ResetWriteWatch(address, size);
 772 }
 773
 774 // Retrieve addresses of the pages that are written to in a region of virtual memory
 775 // Parameters:
 776 //  resetState         - true indicates to reset the write tracking state
 777 //  address            - starting virtual address
 778 //  size               - size of the virtual memory range
 779 //  pageAddresses      - buffer that receives an array of page addresses in the memory region
 780 //  pageAddressesCount - on input, size of the lpAddresses array, in array elements
 781 //                       on output, the number of page addresses that are returned in the array.
 782 // Return:
 783 //  true if it has succeeded, false if it has failed
 784 bool GCToOSInterface::GetWriteWatch(bool resetState, void* address, size_t size, void** pageAddresses, uintptr_t* pageAddressesCount)
 785 {
 786     uint32_t flags = resetState ? 1 : 0;
 787     ULONG granularity;
 788
 789     bool success = ::GetWriteWatch(flags, address, size, pageAddresses, (ULONG_PTR*)pageAddressesCount, &granularity) == 0;
 790     if (success)
 791     {
 792         assert(granularity == OS_PAGE_SIZE);
 793     }
 794
 795     return success;
 796 }
 797
 798 // Get size of the largest cache on the processor die
 799 // Parameters:
 800 //  trueSize - true to return true cache size, false to return scaled up size based on
 801 //             the processor architecture
 802 // Return:
 803 //  Size of the cache
 804 size_t GCToOSInterface::GetCacheSizePerLogicalCpu(bool trueSize)
 805 {
 806     static size_t maxSize;
 807     static size_t maxTrueSize;
 808
 809     if (maxSize)
 810     {
 811         // maxSize and maxTrueSize cached
 812         if (trueSize)
 813         {
 814             return maxTrueSize;
 815         }
 816         else
 817         {
 818             return maxSize;
 819         }
 820     }
 821
 822 #ifdef _X86_
 823     int dwBuffer[4];
 824
 825     __cpuid(dwBuffer, 0);
 826
 827     int maxCpuId = dwBuffer[0];
 828
 829     if (dwBuffer[1] == 'uneG')
 830     {
 831         if (dwBuffer[3] == 'Ieni')
 832         {
 833             if (dwBuffer[2] == 'letn')
 834             {
 835                 maxTrueSize = GetLogicalProcessorCacheSizeFromOS(); //use OS API for cache enumeration on LH and above
 836 #ifdef BIT64
 837                 if (maxCpuId >= 2)
 838                 {
 839                     // If we're running on a Prescott or greater core, EM64T tests
 840                     // show that starting with a gen0 larger than LLC improves performance.
 841                     // Thus, start with a gen0 size that is larger than the cache.  The value of
 842                     // 3 is a reasonable tradeoff between workingset and performance.
 843                     maxSize = maxTrueSize * 3;
 844                 }
 845                 else
 846 #endif
 847                 {
 848                     maxSize = maxTrueSize;
 849                 }
 850             }
 851         }
 852     }
 853
 854     if (dwBuffer[1] == 'htuA') {
 855         if (dwBuffer[3] == 'itne') {
 856             if (dwBuffer[2] == 'DMAc') {
 857                 __cpuid(dwBuffer, 0x80000000);
 858                 if (dwBuffer[0] >= 0x80000006)
 859                 {
 860                     __cpuid(dwBuffer, 0x80000006);
 861
 862                     DWORD dwL2CacheBits = dwBuffer[2];
 863                     DWORD dwL3CacheBits = dwBuffer[3];
 864
 865                     maxTrueSize = (size_t)((dwL2CacheBits >> 16) * 1024);    // L2 cache size in ECX bits 31-16
 866
 867                     __cpuid(dwBuffer, 0x1);
 868                     DWORD dwBaseFamily = (dwBuffer[0] & (0xF << 8)) >> 8;
 869                     DWORD dwExtFamily  = (dwBuffer[0] & (0xFF << 20)) >> 20;
 870                     DWORD dwFamily = dwBaseFamily >= 0xF ? dwBaseFamily + dwExtFamily : dwBaseFamily;
 871
 872                     if (dwFamily >= 0x10)
 873                     {
 874                         BOOL bSkipAMDL3 = FALSE;
 875
 876                         if (dwFamily == 0x10)   // are we running on a Barcelona (Family 10h) processor?
 877                         {
 878                             // check model
 879                             DWORD dwBaseModel = (dwBuffer[0] & (0xF << 4)) >> 4 ;
 880                             DWORD dwExtModel  = (dwBuffer[0] & (0xF << 16)) >> 16;
 881                             DWORD dwModel = dwBaseFamily >= 0xF ? (dwExtModel << 4) | dwBaseModel : dwBaseModel;
 882
 883                             switch (dwModel)
 884                             {
 885                                 case 0x2:
 886                                     // 65nm parts do not benefit from larger Gen0
 887                                     bSkipAMDL3 = TRUE;
 888                                     break;
 889
 890                                 case 0x4:
 891                                 default:
 892                                     bSkipAMDL3 = FALSE;
 893                             }
 894                         }
 895
 896                         if (!bSkipAMDL3)
 897                         {
 898                             // 45nm Greyhound parts (and future parts based on newer northbridge) benefit
 899                             // from increased gen0 size, taking L3 into account
 900                             __cpuid(dwBuffer, 0x80000008);
 901                             DWORD dwNumberOfCores = (dwBuffer[2] & (0xFF)) + 1;     // NC is in ECX bits 7-0
 902
 903                             DWORD dwL3CacheSize = (size_t)((dwL3CacheBits >> 18) * 512 * 1024);  // L3 size in EDX bits 31-18 * 512KB
 904                             // L3 is shared between cores
 905                             dwL3CacheSize = dwL3CacheSize / dwNumberOfCores;
 906                             maxTrueSize += dwL3CacheSize;       // due to exclusive caches, add L3 size (possibly zero) to L2
 907                                                                 // L1 is too small to worry about, so ignore it
 908                         }
 909                     }
 910
 911
 912                     maxSize = maxTrueSize;
 913                 }
 914             }
 915         }
 916     }
 917
 918 #else
 919     maxSize = maxTrueSize = GetLogicalProcessorCacheSizeFromOS() ; // Returns the size of the highest level processor cache
 920 #endif
 921
 922 #if defined(_ARM64_)
 923     // Bigger gen0 size helps arm64 targets
 924     maxSize = maxTrueSize * 3;
 925 #endif
 926
 927     //    printf("GetCacheSizePerLogicalCpu returns %d, adjusted size %d\n", maxSize, maxTrueSize);
 928     if (trueSize)
 929         return maxTrueSize;
 930     else
 931         return maxSize;
 932 }
 933
 934 // Sets the calling thread's affinity to only run on the processor specified
 935 // Parameters:
 936 //  procNo - The requested processor for the calling thread.
 937 // Return:
 938 //  true if setting the affinity was successful, false otherwise.
 939 bool GCToOSInterface::SetThreadAffinity(uint16_t procNo)
 940 {
 941     GroupProcNo groupProcNo(procNo);
 942
 943     if (groupProcNo.GetGroup() != GroupProcNo::NoGroup)
 944     {
 945         GROUP_AFFINITY ga;
 946         ga.Group = (WORD)groupProcNo.GetGroup();
 947         ga.Reserved[0] = 0; // reserve must be filled with zero
 948         ga.Reserved[1] = 0; // otherwise call may fail
 949         ga.Reserved[2] = 0;
 950         ga.Mask = (size_t)1 << groupProcNo.GetProcIndex();
 951         return !!SetThreadGroupAffinity(GetCurrentThread(), &ga, nullptr);
 952     }
 953     else
 954     {
 955         return !!SetThreadAffinityMask(GetCurrentThread(), (DWORD_PTR)1 << groupProcNo.GetProcIndex());
 956     }
 957 }
 958
 959 // Boosts the calling thread's thread priority to a level higher than the default
 960 // for new threads.
 961 // Parameters:
 962 //  None.
 963 // Return:
 964 //  true if the priority boost was successful, false otherwise.
 965 bool GCToOSInterface::BoostThreadPriority()
 966 {
 967     return !!SetThreadPriority(GetCurrentThread(), THREAD_PRIORITY_HIGHEST);
 968 }
 969
 970 // Set the set of processors enabled for GC threads for the current process based on config specified affinity mask and set
 971 // Parameters:
 972 //  configAffinityMask - mask specified by the GCHeapAffinitizeMask config
 973 //  configAffinitySet  - affinity set specified by the GCHeapAffinitizeRanges config
 974 // Return:
 975 //  set of enabled processors
 976 const AffinitySet* GCToOSInterface::SetGCThreadsAffinitySet(uintptr_t configAffinityMask, const AffinitySet* configAffinitySet)
 977 {
 978     // When the configAffinitySet is not empty, enforce the cpu groups
 979     if (CanEnableGCCPUGroups())
 980     {
 981         if (!configAffinitySet->IsEmpty())
 982         {
 983             // Update the process affinity set using the configured set
 984             for (size_t i = 0; i < MAX_SUPPORTED_CPUS; i++)
 985             {
 986                 if (g_processAffinitySet.Contains(i) && !configAffinitySet->Contains(i))
 987                 {
 988                     g_processAffinitySet.Remove(i);
 989                 }
 990             }
 991         }
 992     }
 993     else
 994     {
 995         if (configAffinityMask != 0)
 996         {
 997             // Update the process affinity set using the configured mask
 998             for (size_t i = 0; i < 8 * sizeof(uintptr_t); i++)
 999             {
1000                 if (g_processAffinitySet.Contains(i) && ((configAffinityMask & ((uintptr_t)1 << i)) == 0))
1001                 {
1002                     g_processAffinitySet.Remove(i);
1003                 }
1004             }
1005         }
1006     }
1007
1008     return &g_processAffinitySet;
1009 }
1010
1011 // Get number of processors assigned to the current process
1012 // Return:
1013 //  The number of processors
1014 uint32_t GCToOSInterface::GetCurrentProcessCpuCount()
1015 {
1016     static int cCPUs = 0;
1017
1018     if (cCPUs != 0)
1019         return cCPUs;
1020
1021     int count;
1022
1023     if (CanEnableGCCPUGroups())
1024     {
1025         count = GCToOSInterface::GetTotalProcessorCount();
1026     }
1027     else
1028     {
1029         DWORD_PTR pmask, smask;
1030
1031         if (!GetProcessAffinityMask(GetCurrentProcess(), &pmask, &smask))
1032         {
1033             count = 1;
1034         }
1035         else
1036         {
1037             count = 0;
1038             pmask &= smask;
1039
1040             while (pmask)
1041             {
1042                 pmask &= (pmask - 1);
1043                 count++;
1044             }
1045
1046             // GetProcessAffinityMask can return pmask=0 and smask=0 on systems with more
1047             // than 64 processors, which would leave us with a count of 0.  Since the GC
1048             // expects there to be at least one processor to run on (and thus at least one
1049             // heap), we'll return 64 here if count is 0, since there are likely a ton of
1050             // processors available in that case.  The GC also cannot (currently) handle
1051             // the case where there are more than 64 processors, so we will return a
1052             // maximum of 64 here.
1053             if (count == 0 || count > 64)
1054                 count = 64;
1055         }
1056     }
1057
1058     cCPUs = count;
1059
1060     return count;
1061 }
1062
1063 // Return the size of the user-mode portion of the virtual address space of this process.
1064 // Return:
1065 //  non zero if it has succeeded, 0 if it has failed
1066 size_t GCToOSInterface::GetVirtualMemoryLimit()
1067 {
1068     MEMORYSTATUSEX memStatus;
1069     GetProcessMemoryLoad(&memStatus);
1070     assert(memStatus.ullAvailVirtual != 0);
1071     return (size_t)memStatus.ullAvailVirtual;
1072 }
1073
1074 // Get the physical memory that this process can use.
1075 // Return:
1076 //  non zero if it has succeeded, 0 if it has failed
1077 // Remarks:
1078 //  If a process runs with a restricted memory limit, it returns the limit. If there's no limit
1079 //  specified, it returns amount of actual physical memory.
1080 uint64_t GCToOSInterface::GetPhysicalMemoryLimit(bool* is_restricted)
1081 {
1082     if (is_restricted)
1083         *is_restricted = false;
1084
1085     size_t restricted_limit = GetRestrictedPhysicalMemoryLimit();
1086     if (restricted_limit != 0)
1087     {
1088         if (is_restricted && !g_UseRestrictedVirtualMemory)
1089             *is_restricted = true;
1090
1091         return restricted_limit;
1092     }
1093
1094     MEMORYSTATUSEX memStatus;
1095     GetProcessMemoryLoad(&memStatus);
1096     assert(memStatus.ullTotalPhys != 0);
1097     return memStatus.ullTotalPhys;
1098 }
1099
1100 // Get memory status
1101 // Parameters:
1102 //  memory_load - A number between 0 and 100 that specifies the approximate percentage of physical memory
1103 //      that is in use (0 indicates no memory use and 100 indicates full memory use).
1104 //  available_physical - The amount of physical memory currently available, in bytes.
1105 //  available_page_file - The maximum amount of memory the current process can commit, in bytes.
1106 void GCToOSInterface::GetMemoryStatus(uint32_t* memory_load, uint64_t* available_physical, uint64_t* available_page_file)
1107 {
1108     uint64_t restricted_limit = GetRestrictedPhysicalMemoryLimit();
1109     if (restricted_limit != 0)
1110     {
1111         size_t workingSetSize;
1112         BOOL status = FALSE;
1113         if (!g_UseRestrictedVirtualMemory)
1114         {
1115             PROCESS_MEMORY_COUNTERS pmc;
1116             status = GCGetProcessMemoryInfo(GetCurrentProcess(), &pmc, sizeof(pmc));
1117             workingSetSize = pmc.WorkingSetSize;
1118         }
1119
1120         if(status)
1121         {
1122             if (memory_load)
1123                 *memory_load = (uint32_t)((float)workingSetSize * 100.0 / (float)restricted_limit);
1124             if (available_physical)
1125             {
1126                 if(workingSetSize > restricted_limit)
1127                     *available_physical = 0;
1128                 else
1129                     *available_physical = restricted_limit - workingSetSize;
1130             }
1131             // Available page file doesn't mean much when physical memory is restricted since
1132             // we don't know how much of it is available to this process so we are not going to
1133             // bother to make another OS call for it.
1134             if (available_page_file)
1135                 *available_page_file = 0;
1136
1137             return;
1138         }
1139     }
1140
1141     MEMORYSTATUSEX ms;
1142     ::GetProcessMemoryLoad(&ms);
1143
1144     if (g_UseRestrictedVirtualMemory)
1145     {
1146         _ASSERTE (ms.ullTotalVirtual == restricted_limit);
1147         if (memory_load != NULL)
1148             *memory_load = (uint32_t)((float)(ms.ullTotalVirtual - ms.ullAvailVirtual) * 100.0 / (float)ms.ullTotalVirtual);
1149         if (available_physical != NULL)
1150             *available_physical = ms.ullTotalVirtual;
1151
1152         // Available page file isn't helpful when we are restricted by virtual memory
1153         // since the amount of memory we can reserve is less than the amount of
1154         // memory we can commit.
1155         if (available_page_file != NULL)
1156             *available_page_file = 0;
1157     }
1158     else
1159     {
1160         if (memory_load != NULL)
1161             *memory_load = ms.dwMemoryLoad;
1162         if (available_physical != NULL)
1163             *available_physical = ms.ullAvailPhys;
1164         if (available_page_file != NULL)
1165             *available_page_file = ms.ullAvailPageFile;
1166     }
1167 }
1168
1169 // Get a high precision performance counter
1170 // Return:
1171 //  The counter value
1172 int64_t GCToOSInterface::QueryPerformanceCounter()
1173 {
1174     LARGE_INTEGER ts;
1175     if (!::QueryPerformanceCounter(&ts))
1176     {
1177         assert(false && "Failed to query performance counter");
1178     }
1179
1180     return ts.QuadPart;
1181 }
1182
1183 // Get a frequency of the high precision performance counter
1184 // Return:
1185 //  The counter frequency
1186 int64_t GCToOSInterface::QueryPerformanceFrequency()
1187 {
1188     LARGE_INTEGER ts;
1189     if (!::QueryPerformanceFrequency(&ts))
1190     {
1191         assert(false && "Failed to query performance counter");
1192     }
1193
1194     return ts.QuadPart;
1195 }
1196
1197 // Get a time stamp with a low precision
1198 // Return:
1199 //  Time stamp in milliseconds
1200 uint32_t GCToOSInterface::GetLowPrecisionTimeStamp()
1201 {
1202     return ::GetTickCount();
1203 }
1204
1205 // Gets the total number of processors on the machine, not taking
1206 // into account current process affinity.
1207 // Return:
1208 //  Number of processors on the machine
1209 uint32_t GCToOSInterface::GetTotalProcessorCount()
1210 {
1211     if (CanEnableGCCPUGroups())
1212     {
1213         return g_nProcessors;
1214     }
1215     else
1216     {
1217         return g_SystemInfo.dwNumberOfProcessors;
1218     }
1219 }
1220
1221 bool GCToOSInterface::CanEnableGCNumaAware()
1222 {
1223     return g_fEnableGCNumaAware;
1224 }
1225
1226 bool GCToOSInterface::GetNumaProcessorNode(uint16_t proc_no, uint16_t *node_no)
1227 {
1228     GroupProcNo groupProcNo(proc_no);
1229
1230     PROCESSOR_NUMBER procNumber;
1231     procNumber.Group    = groupProcNo.GetGroup();
1232     procNumber.Number   = (BYTE)groupProcNo.GetProcIndex();
1233     procNumber.Reserved = 0;
1234
1235     assert(g_fEnableGCNumaAware);
1236     return ::GetNumaProcessorNodeEx(&procNumber, node_no) != FALSE;
1237 }
1238
1239 // Get processor number and optionally its NUMA node number for the specified heap number
1240 // Parameters:
1241 //  heap_number - heap number to get the result for
1242 //  proc_no     - set to the selected processor number
1243 //  node_no     - set to the NUMA node of the selected processor or to NUMA_NODE_UNDEFINED
1244 // Return:
1245 //  true if it succeeded
1246 bool GCToOSInterface::GetProcessorForHeap(uint16_t heap_number, uint16_t* proc_no, uint16_t* node_no)
1247 {
1248     bool success = false;
1249
1250     if (CanEnableGCCPUGroups())
1251     {
1252         uint16_t gn, gpn;
1253         GetGroupForProcessor((uint16_t)heap_number, &gn, &gpn);
1254
1255         *proc_no = GroupProcNo(gn, gpn).GetCombinedValue();
1256
1257         if (GCToOSInterface::CanEnableGCNumaAware())
1258         {
1259             if (!GCToOSInterface::GetNumaProcessorNode(*proc_no, node_no))
1260             {
1261                 *node_no = NUMA_NODE_UNDEFINED;
1262             }
1263         }
1264         else
1265         {   // no numa setting, each cpu group is treated as a node
1266             *node_no = gn;
1267         }
1268
1269         success = true;
1270     }
1271     else
1272     {
1273         int bit_number = 0;
1274         uint8_t proc_number = 0;
1275         for (uintptr_t mask = 1; mask != 0; mask <<= 1)
1276         {
1277             if (g_processAffinitySet.Contains(proc_number))
1278             {
1279                 if (bit_number == heap_number)
1280                 {
1281                     *proc_no = GroupProcNo(GroupProcNo::NoGroup, proc_number).GetCombinedValue();
1282
1283                     if (GCToOSInterface::CanEnableGCNumaAware())
1284                     {
1285                         if (!GCToOSInterface::GetNumaProcessorNode(proc_number, node_no))
1286                         {
1287                             *node_no = NUMA_NODE_UNDEFINED;
1288                         }
1289                     }
1290
1291                     success = true;
1292                     break;
1293                 }
1294                 bit_number++;
1295             }
1296             proc_number++;
1297         }
1298     }
1299
1300     return success;
1301 }
1302
1303 // Parse the confing string describing affinitization ranges and update the passed in affinitySet accordingly
1304 // Parameters:
1305 //  config_string - string describing the affinitization range, platform specific
1306 //  start_index  - the range start index extracted from the config_string
1307 //  end_index    - the range end index extracted from the config_string, equal to the start_index if only an index and not a range was passed in
1308 // Return:
1309 //  true if the configString was successfully parsed, false if it was not correct
1310 bool GCToOSInterface::ParseGCHeapAffinitizeRangesEntry(const char** config_string, size_t* start_index, size_t* end_index)
1311 {
1312     assert(g_fEnableGCCPUGroups);
1313
1314     char* number_end;
1315     size_t group_number = strtoul(*config_string, &number_end, 10);
1316
1317     if ((number_end == *config_string) || (*number_end != ':'))
1318     {
1319         // No number or no colon after the number found, invalid format
1320         return false;
1321     }
1322
1323     if (group_number >= g_nGroups)
1324     {
1325         // Group number out of range
1326         return false;
1327     }
1328
1329     *config_string = number_end + 1;
1330
1331     size_t start, end;
1332     if (!ParseIndexOrRange(config_string, &start, &end))
1333     {
1334         return false;
1335     }
1336
1337     uint16_t group_processor_count = g_CPUGroupInfoArray[group_number].nr_active;
1338     if ((start >= group_processor_count) || (end >= group_processor_count))
1339     {
1340         // Invalid CPU index values or range
1341         return false;
1342     }
1343
1344     uint16_t group_begin = g_CPUGroupInfoArray[group_number].begin;
1345
1346     *start_index = group_begin + start;
1347     *end_index = group_begin + end;
1348
1349     return true;
1350 }
1351
1352 // Parameters of the GC thread stub
1353 struct GCThreadStubParam
1354 {
1355     GCThreadFunction GCThreadFunction;
1356     void* GCThreadParam;
1357 };
1358
1359 // GC thread stub to convert GC thread function to an OS specific thread function
1360 static DWORD GCThreadStub(void* param)
1361 {
1362     GCThreadStubParam *stubParam = (GCThreadStubParam*)param;
1363     GCThreadFunction function = stubParam->GCThreadFunction;
1364     void* threadParam = stubParam->GCThreadParam;
1365
1366     delete stubParam;
1367
1368     function(threadParam);
1369
1370     return 0;
1371 }
1372
1373 // Initialize the critical section
1374 void CLRCriticalSection::Initialize()
1375 {
1376     ::InitializeCriticalSection(&m_cs);
1377 }
1378
1379 // Destroy the critical section
1380 void CLRCriticalSection::Destroy()
1381 {
1382     ::DeleteCriticalSection(&m_cs);
1383 }
1384
1385 // Enter the critical section. Blocks until the section can be entered.
1386 void CLRCriticalSection::Enter()
1387 {
1388     ::EnterCriticalSection(&m_cs);
1389 }
1390
1391 // Leave the critical section
1392 void CLRCriticalSection::Leave()
1393 {
1394     ::LeaveCriticalSection(&m_cs);
1395 }
1396
1397 // WindowsEvent is an implementation of GCEvent that forwards
1398 // directly to Win32 APIs.
1399 class GCEvent::Impl
1400 {
1401 private:
1402     HANDLE m_hEvent;
1403
1404 public:
1405     Impl() : m_hEvent(INVALID_HANDLE_VALUE) {}
1406
1407     bool IsValid() const
1408     {
1409         return m_hEvent != INVALID_HANDLE_VALUE;
1410     }
1411
1412     void Set()
1413     {
1414         assert(IsValid());
1415         BOOL result = SetEvent(m_hEvent);
1416         assert(result && "SetEvent failed");
1417     }
1418
1419     void Reset()
1420     {
1421         assert(IsValid());
1422         BOOL result = ResetEvent(m_hEvent);
1423         assert(result && "ResetEvent failed");
1424     }
1425
1426     uint32_t Wait(uint32_t timeout, bool alertable)
1427     {
1428         UNREFERENCED_PARAMETER(alertable);
1429         assert(IsValid());
1430
1431         return WaitForSingleObject(m_hEvent, timeout);
1432     }
1433
1434     void CloseEvent()
1435     {
1436         assert(IsValid());
1437         BOOL result = CloseHandle(m_hEvent);
1438         assert(result && "CloseHandle failed");
1439         m_hEvent = INVALID_HANDLE_VALUE;
1440     }
1441
1442     bool CreateAutoEvent(bool initialState)
1443     {
1444         m_hEvent = CreateEvent(nullptr, false, initialState, nullptr);
1445         return IsValid();
1446     }
1447
1448     bool CreateManualEvent(bool initialState)
1449     {
1450         m_hEvent = CreateEvent(nullptr, true, initialState, nullptr);
1451         return IsValid();
1452     }
1453 };
1454
1455 GCEvent::GCEvent()
1456   : m_impl(nullptr)
1457 {
1458 }
1459
1460 void GCEvent::CloseEvent()
1461 {
1462     assert(m_impl != nullptr);
1463     m_impl->CloseEvent();
1464 }
1465
1466 void GCEvent::Set()
1467 {
1468     assert(m_impl != nullptr);
1469     m_impl->Set();
1470 }
1471
1472 void GCEvent::Reset()
1473 {
1474     assert(m_impl != nullptr);
1475     m_impl->Reset();
1476 }
1477
1478 uint32_t GCEvent::Wait(uint32_t timeout, bool alertable)
1479 {
1480     assert(m_impl != nullptr);
1481     return m_impl->Wait(timeout, alertable);
1482 }
1483
1484 bool GCEvent::CreateAutoEventNoThrow(bool initialState)
1485 {
1486     // [DESKTOP TODO] The difference between events and OS events is
1487     // whether or not the hosting API is made aware of them. When (if)
1488     // we implement hosting support for Local GC, we will need to be
1489     // aware of the host here.
1490     return CreateOSAutoEventNoThrow(initialState);
1491 }
1492
1493 bool GCEvent::CreateManualEventNoThrow(bool initialState)
1494 {
1495     // [DESKTOP TODO] The difference between events and OS events is
1496     // whether or not the hosting API is made aware of them. When (if)
1497     // we implement hosting support for Local GC, we will need to be
1498     // aware of the host here.
1499     return CreateOSManualEventNoThrow(initialState);
1500 }
1501
1502 bool GCEvent::CreateOSAutoEventNoThrow(bool initialState)
1503 {
1504     assert(m_impl == nullptr);
1505     std::unique_ptr<GCEvent::Impl> event(new (std::nothrow) GCEvent::Impl());
1506     if (!event)
1507     {
1508         return false;
1509     }
1510
1511     if (!event->CreateAutoEvent(initialState))
1512     {
1513         return false;
1514     }
1515
1516     m_impl = event.release();
1517     return true;
1518 }
1519
1520 bool GCEvent::CreateOSManualEventNoThrow(bool initialState)
1521 {
1522     assert(m_impl == nullptr);
1523     std::unique_ptr<GCEvent::Impl> event(new (std::nothrow) GCEvent::Impl());
1524     if (!event)
1525     {
1526         return false;
1527     }
1528
1529     if (!event->CreateManualEvent(initialState))
1530     {
1531         return false;
1532     }
1533
1534     m_impl = event.release();
1535     return true;
1536 }