// performance data seems to indicate halving the size results
// in optimal perf. Ask for adjusted gen0 size.
gen0size = max(GCToOSInterface::GetLargestOnDieCacheSize(FALSE)/GCToOSInterface::GetLogicalCpuCount(),(256*1024));
-#if (defined(_TARGET_AMD64_))
+
// if gen0 size is too large given the available memory, reduce it.
// Get true cache size, as we don't want to reduce below this.
size_t trueSize = max(GCToOSInterface::GetLargestOnDieCacheSize(TRUE)/GCToOSInterface::GetLogicalCpuCount(),(256*1024));
break;
}
}
-#endif //_TARGET_AMD64_
-
#else //SERVER_GC
gen0size = max((4*GCToOSInterface::GetLargestOnDieCacheSize(TRUE)/5),(256*1024));
#endif //SERVER_GC
bool ReadMemoryValueFromFile(const char* filename, size_t* val)
{
- bool result = false;
- char *line = nullptr;
- size_t lineLen = 0;
- char* endptr = nullptr;
- size_t num = 0, l, multiplier;
-
- if (val == nullptr)
- return false;
-
- FILE* file = fopen(filename, "r");
- if (file == nullptr)
- goto done;
-
- if (getline(&line, &lineLen, file) == -1)
- goto done;
-
- errno = 0;
- num = strtoull(line, &endptr, 0);
- if (errno != 0)
- goto done;
-
- multiplier = 1;
- switch(*endptr)
- {
- case 'g':
- case 'G': multiplier = 1024;
- case 'm':
- case 'M': multiplier = multiplier*1024;
- case 'k':
- case 'K': multiplier = multiplier*1024;
- }
-
- *val = num * multiplier;
- result = true;
- if (*val/multiplier != num)
- result = false;
- done:
- if (file)
- fclose(file);
- free(line);
- return result;
+ return ::ReadMemoryValueFromFile(filename, val);
}
long long ReadCpuCGroupValue(const char* subsystemFilename){
return HAVE_SCHED_GETCPU;
}
+bool
+ReadMemoryValueFromFile(const char* filename, size_t* val)
+{
+ bool result = false;
+ char *line = nullptr;
+ size_t lineLen = 0;
+ char* endptr = nullptr;
+ size_t num = 0, l, multiplier;
+
+ if (val == nullptr)
+ return false;
+
+ FILE* file = fopen(filename, "r");
+ if (file == nullptr)
+ goto done;
+
+ if (getline(&line, &lineLen, file) == -1)
+ goto done;
+
+ errno = 0;
+ num = strtoull(line, &endptr, 0);
+ if (errno != 0)
+ goto done;
+
+ multiplier = 1;
+ switch(*endptr)
+ {
+ case 'g':
+ case 'G': multiplier = 1024;
+ case 'm':
+ case 'M': multiplier = multiplier*1024;
+ case 'k':
+ case 'K': multiplier = multiplier*1024;
+ }
+
+ *val = num * multiplier;
+ result = true;
+ if (*val/multiplier != num)
+ result = false;
+done:
+ if (file)
+ fclose(file);
+ free(line);
+ return result;
+}
+
size_t
PALAPI
PAL_GetLogicalProcessorCacheSizeFromOS()
cacheSize = max(cacheSize, sysconf(_SC_LEVEL4_CACHE_SIZE));
#endif
+#if defined(_ARM64_)
+ if(cacheSize == 0)
+ {
+ size_t size;
+
+ if(ReadMemoryValueFromFile("/sys/devices/system/cpu/cpu0/cache/index0/size", &size))
+ cacheSize = max(cacheSize, size);
+ if(ReadMemoryValueFromFile("/sys/devices/system/cpu/cpu0/cache/index1/size", &size))
+ cacheSize = max(cacheSize, size);
+ if(ReadMemoryValueFromFile("/sys/devices/system/cpu/cpu0/cache/index2/size", &size))
+ cacheSize = max(cacheSize, size);
+ if(ReadMemoryValueFromFile("/sys/devices/system/cpu/cpu0/cache/index3/size", &size))
+ cacheSize = max(cacheSize, size);
+ if(ReadMemoryValueFromFile("/sys/devices/system/cpu/cpu0/cache/index4/size", &size))
+ cacheSize = max(cacheSize, size);
+ }
+
+ if(cacheSize == 0)
+ {
+ // It is currently expected to be missing cache size info
+ //
+ // _SC_LEVEL*_*CACHE_SIZE is not yet present. Work is in progress to enable this for arm64
+ //
+ // /sys/devices/system/cpu/cpu*/cache/index*/ is also not yet present in most systems.
+ // Arm64 patch is in Linux kernel tip.
+ //
+ // midr_el1 is available in "/sys/devices/system/cpu/cpu0/regs/identification/midr_el1",
+ // but without an exhaustive list of ARM64 processors any decode of midr_el1
+ // Would likely be incomplete
+
+ // Published information on ARM64 architectures is limited.
+ // If we use recent high core count chips as a guide for state of the art, we find
+ // total L3 cache to be 1-2MB/core. As always, there are exceptions.
+
+ // Estimate cache size based on CPU count
+ // Assume lower core count are lighter weight parts which are likely to have smaller caches
+ // Assume L3$/CPU grows linearly from 256K to 1.5M/CPU as logicalCPUs grows from 2 to 12 CPUs
+ DWORD logicalCPUs = PAL_GetLogicalCpuCountFromOS();
+
+ cacheSize = logicalCPUs*min(1536, max(256, logicalCPUs*128))*1024;
+ }
+#endif
+
return cacheSize;
}
STATIC_CONTRACT_NOTHROW;
STATIC_CONTRACT_GC_NOTRIGGER;
-#if defined(_TARGET_AMD64_) || defined (_TARGET_X86_)
-
static size_t maxSize;
static size_t maxTrueSize;
}
}
+#if defined(_TARGET_AMD64_) || defined (_TARGET_X86_)
DefaultCatchFilterParam param;
param.pv = COMPLUS_EXCEPTION_EXECUTE_HANDLER;
{
}
PAL_ENDTRY
+#else
+ maxSize = maxTrueSize = GetLogicalProcessorCacheSizeFromOS() ; // Returns the size of the highest level processor cache
+#endif
+
+#if defined(_TARGET_ARM64_)
+ // Bigger gen0 size helps arm64 targets
+ maxSize = maxTrueSize * 3;
+#endif
// printf("GetLargestOnDieCacheSize returns %d, adjusted size %d\n", maxSize, maxTrueSize);
if (bTrueSize)
return maxTrueSize;
else
return maxSize;
-
-#else
- size_t cache_size = GetLogicalProcessorCacheSizeFromOS() ; // Returns the size of the highest level processor cache
- return cache_size;
-
-#endif
}
//---------------------------------------------------------------------