#endif // TARGET_UNIX
}
+#ifdef TARGET_WINDOWS
+
+// This function checks to see if GetLogicalProcessorInformation API is supported.
+// On success, this function allocates a SLPI array, sets nEntries to number
+// of elements in the SLPI array and returns a pointer to the SLPI array after filling it with information.
+//
+// Note: If successful, IsGLPISupported allocates memory for the SLPI array and expects the caller to
+// free the memory once the caller is done using the information in the SLPI array.
+//
+// If the API is not supported or any failure, returns NULL
+//
+SYSTEM_LOGICAL_PROCESSOR_INFORMATION *IsGLPISupported( PDWORD nEntries )
+{
+ DWORD cbslpi = 0;
+ DWORD dwNumElements = 0;
+ SYSTEM_LOGICAL_PROCESSOR_INFORMATION *pslpi = NULL;
+
+ // We setup the first call to GetLogicalProcessorInformation to fail so that we can obtain
+ // the size of the buffer required to allocate for the SLPI array that is returned
+
+ if (!GetLogicalProcessorInformation(pslpi, &cbslpi) &&
+ GetLastError() != ERROR_INSUFFICIENT_BUFFER)
+ {
+ // If we fail with anything other than an ERROR_INSUFFICIENT_BUFFER here, we punt with failure.
+ return NULL;
+ }
+
+ _ASSERTE(cbslpi);
+
+ // compute the number of SLPI entries required to hold the information returned from GLPI
+
+ dwNumElements = cbslpi / sizeof(SYSTEM_LOGICAL_PROCESSOR_INFORMATION);
+
+ // allocate a buffer in the free heap to hold an array of SLPI entries from GLPI, number of elements in the array is dwNumElements
+
+ pslpi = new (nothrow) SYSTEM_LOGICAL_PROCESSOR_INFORMATION[ dwNumElements ];
+
+ if(pslpi == NULL)
+ {
+ // the memory allocation failed
+ return NULL;
+ }
+
+ // Make call to GetLogicalProcessorInformation. Returns array of SLPI structures
+
+ if (!GetLogicalProcessorInformation(pslpi, &cbslpi))
+ {
+ // GetLogicalProcessorInformation failed
+ delete[] pslpi ; //Allocation was fine but the API call itself failed and so we are releasing the memory before the return NULL.
+ return NULL ;
+ }
+
+ // GetLogicalProcessorInformation successful, set nEntries to number of entries in the SLPI array
+ *nEntries = dwNumElements;
+
+ return pslpi; // return pointer to SLPI array
+}
+
+// This function returns the size of highest level cache on the physical chip. If it cannot
+// determine the cachesize this function returns 0.
+size_t GetLogicalProcessorCacheSizeFromOS()
+{
+ size_t cache_size = 0;
+ DWORD nEntries = 0;
+
+ // Try to use GetLogicalProcessorInformation API and get a valid pointer to the SLPI array if successful. Returns NULL
+ // if API not present or on failure.
+
+ SYSTEM_LOGICAL_PROCESSOR_INFORMATION *pslpi = IsGLPISupported(&nEntries) ;
+
+ if (pslpi == NULL)
+ {
+ // GetLogicalProcessorInformation not supported or failed.
+ goto Exit;
+ }
+
+ // Crack the information. Iterate through all the SLPI array entries for all processors in system.
+ // Will return the greatest of all the processor cache sizes or zero
+ {
+ size_t last_cache_size = 0;
+
+ for (DWORD i=0; i < nEntries; i++)
+ {
+ if (pslpi[i].Relationship == RelationCache)
+ {
+ last_cache_size = max(last_cache_size, pslpi[i].Cache.Size);
+ }
+ }
+ cache_size = last_cache_size;
+ }
+
+Exit:
+ if(pslpi)
+ delete[] pslpi; // release the memory allocated for the SLPI array.
+
+ return cache_size;
+}
+
+#endif // TARGET_WINDOWS
+
// Get size of the largest cache on the processor die
// Parameters:
// trueSize - true to return true cache size, false to return scaled up size based on
{
LIMITED_METHOD_CONTRACT;
- return ::GetCacheSizePerLogicalCpu(trueSize);
+ static volatile size_t s_maxSize;
+ static volatile size_t s_maxTrueSize;
+
+ size_t size = trueSize ? s_maxTrueSize : s_maxSize;
+ if (size != 0)
+ return size;
+
+ size_t maxSize, maxTrueSize;
+
+ maxSize = maxTrueSize = GetLogicalProcessorCacheSizeFromOS() ; // Returns the size of the highest level processor cache
+
+#if defined(TARGET_ARM64)
+ // Bigger gen0 size helps arm64 targets
+ maxSize = maxTrueSize * 3;
+#endif
+
+ s_maxSize = maxSize;
+ s_maxTrueSize = maxTrueSize;
+
+ // printf("GetCacheSizePerLogicalCpu returns %d, adjusted size %d\n", maxSize, maxTrueSize);
+ return trueSize ? maxTrueSize : maxSize;
}
// Sets the calling thread's affinity to only run on the processor specified
return true;
}
-#ifndef CROSSGEN_COMPILE
-
-//-----------------------------------------------------------------------------
-#ifndef TARGET_UNIX
-
-// This function checks to see if GetLogicalProcessorInformation API is supported.
-// On success, this function allocates a SLPI array, sets nEntries to number
-// of elements in the SLPI array and returns a pointer to the SLPI array after filling it with information.
-//
-// Note: If successful, IsGLPISupported allocates memory for the SLPI array and expects the caller to
-// free the memory once the caller is done using the information in the SLPI array.
-//
-// If the API is not supported or any failure, returns NULL
-//
-SYSTEM_LOGICAL_PROCESSOR_INFORMATION *IsGLPISupported( PDWORD nEntries )
-{
- DWORD cbslpi = 0;
- DWORD dwNumElements = 0;
- SYSTEM_LOGICAL_PROCESSOR_INFORMATION *pslpi = NULL;
-
- // We setup the first call to GetLogicalProcessorInformation to fail so that we can obtain
- // the size of the buffer required to allocate for the SLPI array that is returned
-
- if (!GetLogicalProcessorInformation(pslpi, &cbslpi) &&
- GetLastError() != ERROR_INSUFFICIENT_BUFFER)
- {
- // If we fail with anything other than an ERROR_INSUFFICIENT_BUFFER here, we punt with failure.
- return NULL;
- }
-
- _ASSERTE(cbslpi);
-
- // compute the number of SLPI entries required to hold the information returned from GLPI
-
- dwNumElements = cbslpi / sizeof(SYSTEM_LOGICAL_PROCESSOR_INFORMATION);
-
- // allocate a buffer in the free heap to hold an array of SLPI entries from GLPI, number of elements in the array is dwNumElements
-
- pslpi = new (nothrow) SYSTEM_LOGICAL_PROCESSOR_INFORMATION[ dwNumElements ];
-
- if(pslpi == NULL)
- {
- // the memory allocation failed
- return NULL;
- }
-
- // Make call to GetLogicalProcessorInformation. Returns array of SLPI structures
-
- if (!GetLogicalProcessorInformation(pslpi, &cbslpi))
- {
- // GetLogicalProcessorInformation failed
- delete[] pslpi ; //Allocation was fine but the API call itself failed and so we are releasing the memory before the return NULL.
- return NULL ;
- }
-
- // GetLogicalProcessorInformation successful, set nEntries to number of entries in the SLPI array
- *nEntries = dwNumElements;
-
- return pslpi; // return pointer to SLPI array
-
-}//IsGLPISupported
-
-// This function returns the size of highest level cache on the physical chip. If it cannot
-// determine the cachesize this function returns 0.
-size_t GetLogicalProcessorCacheSizeFromOS()
-{
- size_t cache_size = 0;
- DWORD nEntries = 0;
-
- // Try to use GetLogicalProcessorInformation API and get a valid pointer to the SLPI array if successful. Returns NULL
- // if API not present or on failure.
-
- SYSTEM_LOGICAL_PROCESSOR_INFORMATION *pslpi = IsGLPISupported(&nEntries) ;
-
- if (pslpi == NULL)
- {
- // GetLogicalProcessorInformation not supported or failed.
- goto Exit;
- }
-
- // Crack the information. Iterate through all the SLPI array entries for all processors in system.
- // Will return the greatest of all the processor cache sizes or zero
- {
- size_t last_cache_size = 0;
-
- for (DWORD i=0; i < nEntries; i++)
- {
- if (pslpi[i].Relationship == RelationCache)
- {
- last_cache_size = max(last_cache_size, pslpi[i].Cache.Size);
- }
- }
- cache_size = last_cache_size;
- }
-Exit:
-
- if(pslpi)
- delete[] pslpi; // release the memory allocated for the SLPI array.
-
- return cache_size;
-}
-
-#endif // !TARGET_UNIX
-
-// This function returns the number of logical processors on a given physical chip. If it cannot
-// determine the number of logical cpus, or the machine is not populated uniformly with the same
-// type of processors, this function returns 0.
-
-DWORD GetLogicalCpuCountFromOS()
-{
- // No CONTRACT possible because GetLogicalCpuCount uses SEH
-
- STATIC_CONTRACT_THROWS;
- STATIC_CONTRACT_GC_NOTRIGGER;
-
- static DWORD val = 0;
- DWORD retVal = 0;
-
-#ifdef TARGET_UNIX
- retVal = PAL_GetLogicalCpuCountFromOS();
-#else // TARGET_UNIX
-
- DWORD nEntries = 0;
-
- DWORD prevcount = 0;
- DWORD count = 1;
-
- // Try to use GetLogicalProcessorInformation API and get a valid pointer to the SLPI array if successful. Returns NULL
- // if API not present or on failure.
- SYSTEM_LOGICAL_PROCESSOR_INFORMATION *pslpi = IsGLPISupported(&nEntries) ;
-
- if (pslpi == NULL)
- {
- // GetLogicalProcessorInformation no supported
- goto lDone;
- }
-
- for (DWORD j = 0; j < nEntries; j++)
- {
- if (pslpi[j].Relationship == RelationProcessorCore)
- {
- // LTP_PC_SMT indicates HT or SMT
- if (pslpi[j].ProcessorCore.Flags == LTP_PC_SMT)
- {
- SIZE_T pmask = pslpi[j].ProcessorMask;
-
- // Count the processors in the mask
- //
- // These are not the fastest bit counters. There may be processor intrinsics
- // (which would be best), but there are variants faster than these:
- // See http://en.wikipedia.org/wiki/Hamming_weight.
- // This is the naive implementation.
-#if !HOST_64BIT
- count = (pmask & 0x55555555) + ((pmask >> 1) & 0x55555555);
- count = (count & 0x33333333) + ((count >> 2) & 0x33333333);
- count = (count & 0x0F0F0F0F) + ((count >> 4) & 0x0F0F0F0F);
- count = (count & 0x00FF00FF) + ((count >> 8) & 0x00FF00FF);
- count = (count & 0x0000FFFF) + ((count >> 16)& 0x0000FFFF);
-#else
- pmask = (pmask & 0x5555555555555555ull) + ((pmask >> 1) & 0x5555555555555555ull);
- pmask = (pmask & 0x3333333333333333ull) + ((pmask >> 2) & 0x3333333333333333ull);
- pmask = (pmask & 0x0f0f0f0f0f0f0f0full) + ((pmask >> 4) & 0x0f0f0f0f0f0f0f0full);
- pmask = (pmask & 0x00ff00ff00ff00ffull) + ((pmask >> 8) & 0x00ff00ff00ff00ffull);
- pmask = (pmask & 0x0000ffff0000ffffull) + ((pmask >> 16) & 0x0000ffff0000ffffull);
- pmask = (pmask & 0x00000000ffffffffull) + ((pmask >> 32) & 0x00000000ffffffffull);
- count = static_cast<DWORD>(pmask);
-#endif // !HOST_64BIT else
- assert (count > 0);
-
- if (prevcount)
- {
- if (count != prevcount)
- {
- retVal = 1; // masks are not symmetric
- goto lDone;
- }
- }
-
- prevcount = count;
- }
- }
- }
-
- retVal = count;
-
-lDone:
-
- if(pslpi)
- {
- delete[] pslpi; // release the memory allocated for the SLPI array
- }
-#endif // TARGET_UNIX
-
- return retVal;
-}
-
-#if defined(TARGET_X86) || defined(TARGET_AMD64)
-
-#define CACHE_WAY_BITS 0xFFC00000 // number of cache WAYS-Associativity is returned in EBX[31:22] (10 bits) using cpuid function 4
-#define CACHE_PARTITION_BITS 0x003FF000 // number of cache Physical Partitions is returned in EBX[21:12] (10 bits) using cpuid function 4
-#define CACHE_LINESIZE_BITS 0x00000FFF // Linesize returned in EBX[11:0] (12 bits) using cpuid function 4
-
-// these are defined in src\VM\AMD64\asmhelpers.asm / cgenx86.cpp
-extern "C" DWORD __stdcall getcpuid(DWORD arg1, unsigned char result[16]);
-extern "C" DWORD __stdcall getextcpuid(DWORD arg1, DWORD arg2, unsigned char result[16]);
-
-// The following function uses a deterministic mechanism for enumerating/calculating the details of the cache hierarychy at runtime
-// by using deterministic cache parameter leafs on Prescott and higher processors.
-// If successful, this function returns the cache size in bytes of the highest level on-die cache. Returns 0 on failure.
-
-size_t GetIntelDeterministicCacheEnum()
-{
- LIMITED_METHOD_CONTRACT;
- size_t retVal = 0;
- unsigned char buffer[16];
- size_t buflen = ARRAYSIZE(buffer);
-
- DWORD maxCpuid = getextcpuid(0,0,buffer);
- DWORD dwBuffer[4];
- memcpy(dwBuffer, buffer, buflen);
-
- if( (maxCpuid > 3) && (maxCpuid < 0x80000000) ) // Deterministic Cache Enum is Supported
- {
- DWORD dwCacheWays, dwCachePartitions, dwLineSize, dwSets;
- DWORD retEAX = 0;
- DWORD loopECX = 0;
- size_t maxSize = 0;
- size_t curSize = 0;
-
- // Make First call to getextcpuid with loopECX=0. loopECX provides an index indicating which level to return information about.
- // The second parameter is input EAX=4, to specify we want deterministic cache parameter leaf information.
- // getextcpuid with EAX=4 should be executed with loopECX = 0,1, ... until retEAX [4:0] contains 00000b, indicating no more
- // cache levels are supported.
-
- getextcpuid(loopECX, 4, buffer);
- memcpy(dwBuffer, buffer, buflen);
- retEAX = dwBuffer[0]; // get EAX
-
- int i = 0;
- while(retEAX & 0x1f) // Crack cache enums and loop while EAX > 0
- {
-
- dwCacheWays = (dwBuffer[1] & CACHE_WAY_BITS) >> 22;
- dwCachePartitions = (dwBuffer[1] & CACHE_PARTITION_BITS) >> 12;
- dwLineSize = dwBuffer[1] & CACHE_LINESIZE_BITS;
- dwSets = dwBuffer[2]; // ECX
-
- curSize = (dwCacheWays+1)*(dwCachePartitions+1)*(dwLineSize+1)*(dwSets+1);
-
- if (maxSize < curSize)
- maxSize = curSize;
-
- loopECX++;
- getextcpuid(loopECX, 4, buffer);
- memcpy(dwBuffer, buffer, buflen);
- retEAX = dwBuffer[0] ; // get EAX[4:0];
- i++;
- if (i > 16) { // prevent infinite looping
- return 0;
- }
- }
- retVal = maxSize;
- }
- return retVal ;
-}
-
-// The following function uses CPUID function 2 with descriptor values to determine the cache size. This requires a-priori
-// knowledge of the descriptor values. This works on gallatin and prior processors (already released processors).
-// If successful, this function returns the cache size in bytes of the highest level on-die cache. Returns 0 on failure.
-
-size_t GetIntelDescriptorValuesCache()
-{
- LIMITED_METHOD_CONTRACT;
- size_t size = 0;
- size_t maxSize = 0;
- unsigned char buffer[16];
-
- getextcpuid(0,2, buffer); // call CPUID with EAX function 2H to obtain cache descriptor values
-
- for (int i = buffer[0]; --i >= 0; )
- {
- int j;
- for (j = 3; j < 16; j += 4)
- {
- // if the information in a register is marked invalid, set to null descriptors
- if (buffer[j] & 0x80)
- {
- buffer[j-3] = 0;
- buffer[j-2] = 0;
- buffer[j-1] = 0;
- buffer[j-0] = 0;
- }
- }
-
- for (j = 1; j < 16; j++)
- {
- switch (buffer[j]) // need to add descriptor values for 8M and 12M when they become known
- {
- case 0x41:
- case 0x79:
- size = 128*1024;
- break;
-
- case 0x42:
- case 0x7A:
- case 0x82:
- size = 256*1024;
- break;
-
- case 0x22:
- case 0x43:
- case 0x7B:
- case 0x83:
- case 0x86:
- size = 512*1024;
- break;
-
- case 0x23:
- case 0x44:
- case 0x7C:
- case 0x84:
- case 0x87:
- size = 1024*1024;
- break;
-
- case 0x25:
- case 0x45:
- case 0x85:
- size = 2*1024*1024;
- break;
-
- case 0x29:
- size = 4*1024*1024;
- break;
- }
- if (maxSize < size)
- maxSize = size;
- }
-
- if (i > 0)
- getextcpuid(0,2, buffer);
- }
- return maxSize;
-}
-
-
-
-#define NUM_LOGICAL_BITS 0x00FF0000 // EBX[23:16] Bit 16-23 in ebx contains the number of logical
- // processors per physical processor (using cpuid function 1)
-#define INITIAL_APIC_ID_BITS 0xFF000000 // EBX[31:24] Bits 24-31 (8 bits) return the 8-bit unique
- // initial APIC ID for the processor this code is running on.
- // Default value = 0xff if HT is not supported
-
-// This function uses CPUID function 1 to return the number of logical processors on a given physical chip.
-// It returns the number of logicals processors on a physical chip.
-
-DWORD GetLogicalCpuCountFallback()
-{
- BYTE LogicalNum = 0;
- BYTE PhysicalNum = 0;
- DWORD lProcCounter = 0;
- unsigned char buffer[16];
-
- DWORD* dwBuffer = (DWORD*)buffer;
- DWORD retVal = 1;
-
- getextcpuid(0,1, buffer); //call CPUID with EAX=1
-
- if (dwBuffer[3] & (1<<28)) // edx:bit 28 is HT bit
- {
- PhysicalNum = (BYTE) g_SystemInfo.dwNumberOfProcessors ; // total # of processors
- LogicalNum = (BYTE) ((dwBuffer[1] & NUM_LOGICAL_BITS) >> 16); // # of logical per physical
-
- if(LogicalNum > 1)
- {
-#ifdef FEATURE_CORESYSTEM
- // CoreSystem doesn't expose GetProcessAffinityMask or SetProcessAffinityMask or anything
- // functionally equivalent. Just assume 1:1 mapping if we get here (in reality we shouldn't since
- // all CoreSystems support GetLogicalProcessorInformation so GetLogicalCpuCountFromOS should have
- // taken care of everything.
- goto fDone;
-#else // FEATURE_CORESYSTEM
- HANDLE hCurrentProcessHandle;
- DWORD_PTR dwProcessAffinity;
- DWORD_PTR dwSystemAffinity;
- DWORD_PTR dwAffinityMask;
-
- // Calculate the appropriate shifts and mask based on the
- // number of logical processors.
-
- BYTE i = 1, PHY_ID_MASK = 0xFF, PHY_ID_SHIFT = 0;
- while (i < LogicalNum)
- {
- i *= 2;
- PHY_ID_MASK <<= 1;
- PHY_ID_SHIFT++;
- }
- hCurrentProcessHandle = GetCurrentProcess();
-
- GetProcessAffinityMask(hCurrentProcessHandle, &dwProcessAffinity, &dwSystemAffinity);
-
- // Check if available process affinity mask is equal to the available system affinity mask
- // If the masks are equal, then all the processors the OS utilizes are available to the
- // application.
-
- if (dwProcessAffinity != dwSystemAffinity)
- {
- retVal = 0;
- goto fDone;
- }
-
- dwAffinityMask = 1;
-
- // loop over all processors, running APIC ID retrieval code starting
- // with the first one by setting process affinity.
- while (dwAffinityMask != 0 && dwAffinityMask <= dwProcessAffinity)
- {
- // Check if this CPU is available
- if (dwAffinityMask & dwProcessAffinity)
- {
- if (SetProcessAffinityMask(hCurrentProcessHandle, dwAffinityMask))
- {
- BYTE APIC_ID, LOG_ID, PHY_ID;
- __SwitchToThread(0, CALLER_LIMITS_SPINNING); // Give OS time to switch CPU
-
- getextcpuid(0,1, buffer); //call cpuid with EAX=1
-
- APIC_ID = (dwBuffer[1] & INITIAL_APIC_ID_BITS) >> 24;
- LOG_ID = APIC_ID & ~PHY_ID_MASK;
- PHY_ID = APIC_ID >> PHY_ID_SHIFT;
- if (LOG_ID != 0)
- lProcCounter++;
- }
- }
- dwAffinityMask = dwAffinityMask << 1;
- }
- // Reset the processor affinity
-
- SetProcessAffinityMask(hCurrentProcessHandle, dwProcessAffinity);
-
- // Check if HT is enabled on all the processors
- if(lProcCounter > 0 && (lProcCounter == (DWORD)(PhysicalNum / LogicalNum)))
- {
- retVal = lProcCounter;
- goto fDone;
- }
-#endif // FEATURE_CORESYSTEM
- }
- }
-fDone:
-
- return retVal;
-}
-
-#endif // TARGET_X86 || TARGET_AMD64
-
-#if defined (TARGET_X86) || defined (TARGET_AMD64)
-static size_t GetCacheSizeFromCpuId()
-{
- STATIC_CONTRACT_NOTHROW;
- STATIC_CONTRACT_GC_NOTRIGGER;
-
- // Can't return from a PAL_TRY. Instead, have it write to its parameter.
- struct Param : DefaultCatchFilterParam {
- size_t maxSize;
- } param;
- param.pv = COMPLUS_EXCEPTION_EXECUTE_HANDLER;
- param.maxSize = 0;
-
- PAL_TRY(Param *, pParam, ¶m)
- {
- size_t& maxSize = pParam->maxSize;
-
- unsigned char buffer[16];
- DWORD* dwBuffer = (DWORD*)buffer;
-
- DWORD maxCpuId = getcpuid(0, buffer);
-
- if (memcmp(buffer + 4, "GenuineIntel", 12) == 0)
- {
- /*
- //The following lines are commented because the OS API on Windows 2003 SP1 is not returning the Cache Relation information on x86.
- //Once the OS API (LH and above) is updated with this information, we should start using the OS API to get the cache enumeration by
- //uncommenting the lines below.
-
- maxSize = GetLogicalProcessorCacheSizeFromOS(); //use OS API for cache enumeration on LH and above
- */
- maxSize = 0;
- if (maxCpuId >= 2) // cpuid support for cache size determination is available
- {
- maxSize = GetIntelDeterministicCacheEnum(); // try to use use deterministic cache size enumeration
- if (!maxSize)
- { // deterministic enumeration failed, fallback to legacy enumeration using descriptor values
- maxSize = GetIntelDescriptorValuesCache();
- }
- }
-
- // TODO: Currently GetLogicalCpuCountFromOS() and GetLogicalCpuCountFallback() are broken on
- // multi-core processor, but we never call into those two functions since we don't halve the
- // gen0size when it's prescott and above processor. We keep the old version here for earlier
- // generation system(Northwood based), perf data suggests on those systems, halve gen0 size
- // still boost the performance(ex:Biztalk boosts about 17%). So on earlier systems(Northwood)
- // based, we still go ahead and halve gen0 size. The logic in GetLogicalCpuCountFromOS()
- // and GetLogicalCpuCountFallback() works fine for those earlier generation systems.
- // If it's a Prescott and above processor or Multi-core, perf data suggests not to halve gen0
- // size at all gives us overall better performance.
- // This is going to be fixed with a new version in orcas time frame.
- if (maxCpuId >= 2 && !((maxCpuId > 3) && (maxCpuId < 0x80000000)))
- {
- DWORD logicalProcessorCount = GetLogicalCpuCountFromOS(); //try to obtain HT enumeration from OS API
-
- if (!logicalProcessorCount)
- {
- logicalProcessorCount = GetLogicalCpuCountFallback(); // OS API failed, Fallback to HT enumeration using CPUID
- }
-
- if (logicalProcessorCount)
- {
- maxSize = maxSize / logicalProcessorCount;
- }
- }
- }
- else if (memcmp(buffer + 4, "AuthenticAMD", 12) == 0)
- {
- if (getcpuid(0x80000000, buffer) >= 0x80000006)
- {
- getcpuid(0x80000006, buffer);
-
- DWORD dwL2CacheBits = dwBuffer[2];
- DWORD dwL3CacheBits = dwBuffer[3];
-
- maxSize = (size_t)((dwL2CacheBits >> 16) * 1024); // L2 cache size in ECX bits 31-16
-
- getcpuid(0x1, buffer);
- DWORD dwBaseFamily = (dwBuffer[0] & (0xF << 8)) >> 8;
- DWORD dwExtFamily = (dwBuffer[0] & (0xFF << 20)) >> 20;
- DWORD dwFamily = dwBaseFamily >= 0xF ? dwBaseFamily + dwExtFamily : dwBaseFamily;
-
- if (dwFamily >= 0x10)
- {
- BOOL bSkipAMDL3 = FALSE;
-
- if (dwFamily == 0x10) // are we running on a Barcelona (Family 10h) processor?
- {
- // check model
- DWORD dwBaseModel = (dwBuffer[0] & (0xF << 4)) >> 4 ;
- DWORD dwExtModel = (dwBuffer[0] & (0xF << 16)) >> 16;
- DWORD dwModel = dwBaseFamily >= 0xF ? (dwExtModel << 4) | dwBaseModel : dwBaseModel;
-
- switch (dwModel)
- {
- case 0x2:
- // 65nm parts do not benefit from larger Gen0
- bSkipAMDL3 = TRUE;
- break;
-
- case 0x4:
- default:
- bSkipAMDL3 = FALSE;
- }
- }
-
- if (!bSkipAMDL3)
- {
- // 45nm Greyhound parts (and future parts based on newer northbridge) benefit
- // from increased gen0 size, taking L3 into account
- getcpuid(0x80000008, buffer);
- DWORD dwNumberOfCores = (dwBuffer[2] & (0xFF)) + 1; // NC is in ECX bits 7-0
-
- DWORD dwL3CacheSize = (size_t)((dwL3CacheBits >> 18) * 512 * 1024); // L3 size in EDX bits 31-18 * 512KB
- // L3 is shared between cores
- dwL3CacheSize = dwL3CacheSize / dwNumberOfCores;
- maxSize += dwL3CacheSize; // due to exclusive caches, add L3 size (possibly zero) to L2
- // L1 is too small to worry about, so ignore it
- }
- }
- }
- }
- }
- PAL_EXCEPT_FILTER(DefaultCatchFilter)
- {
- }
- PAL_ENDTRY
-
- return param.maxSize;
-}
-#endif // TARGET_X86
-
-// fix this if/when AMD does multicore or SMT
-size_t GetCacheSizePerLogicalCpu(BOOL bTrueSize)
-{
- // No CONTRACT possible because GetCacheSizePerLogicalCpu uses SEH
-
- STATIC_CONTRACT_NOTHROW;
- STATIC_CONTRACT_GC_NOTRIGGER;
-
- static volatile size_t s_maxSize;
- static volatile size_t s_maxTrueSize;
-
- size_t size = bTrueSize ? s_maxTrueSize : s_maxSize;
- if (size != 0)
- return size;
-
- size_t maxSize = 0;
- size_t maxTrueSize = 0;
-
- // For x86, always get from cpuid.
-#if !defined (TARGET_X86)
- maxSize = maxTrueSize = GetLogicalProcessorCacheSizeFromOS() ; // Returns the size of the highest level processor cache
-#endif
-
-#if defined (TARGET_X86) || defined(TARGET_AMD64)
- if (maxSize == 0)
- {
- maxSize = maxTrueSize = GetCacheSizeFromCpuId();
- }
-#elif defined(TARGET_ARM64)
- // Bigger gen0 size helps arm64 targets
- maxSize = maxTrueSize * 3;
-#endif
-
- s_maxSize = maxSize;
- s_maxTrueSize = maxTrueSize;
-
- // printf("GetCacheSizePerLogicalCpu returns %d, adjusted size %d\n", maxSize, maxTrueSize);
- return bTrueSize ? maxTrueSize : maxSize;
-}
-#endif // CROSSGEN_COMPILE
-
LPVOID
CLRMapViewOfFile(
IN HANDLE hFileMappingObject,