From 7c08d791ee4fabf96d96b66dec803602e621057c Mon Sep 17 00:00:00 2001 From: "H.J. Lu" Date: Thu, 19 May 2016 09:09:00 -0700 Subject: [PATCH] Check the HTT bit before counting logical threads Skip counting logical threads for Intel processors if the HTT bit is 0 which indicates there is only a single logical processor. * sysdeps/x86/cacheinfo.c (init_cacheinfo): Skip counting logical threads if the HTT bit is 0. * sysdeps/x86/cpu-features.h (bit_cpu_HTT): New. (index_cpu_HTT): Likewise. (reg_HTT): Likewise. --- ChangeLog | 8 +++ sysdeps/x86/cacheinfo.c | 158 +++++++++++++++++++++++---------------------- sysdeps/x86/cpu-features.h | 3 + 3 files changed, 93 insertions(+), 76 deletions(-) diff --git a/ChangeLog b/ChangeLog index 8a4918c..8adf828 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,13 @@ 2016-05-19 H.J. Lu + * sysdeps/x86/cacheinfo.c (init_cacheinfo): Skip counting + logical threads if the HTT bit is 0. + * sysdeps/x86/cpu-features.h (bit_cpu_HTT): New. + (index_cpu_HTT): Likewise. + (reg_HTT): Likewise. + +2016-05-19 H.J. Lu + [BZ #20115] * sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S (__memset): Remove alignments on jump targets. diff --git a/sysdeps/x86/cacheinfo.c b/sysdeps/x86/cacheinfo.c index 8408624..1f46d9d 100644 --- a/sysdeps/x86/cacheinfo.c +++ b/sysdeps/x86/cacheinfo.c @@ -506,99 +506,105 @@ init_cacheinfo (void) shared = core; } - /* Figure out the number of logical threads that share the - highest cache level. */ - if (max_cpuid >= 4) + /* A value of 0 for the HTT bit indicates there is only a single + logical processor. */ + if (HAS_CPU_FEATURE (HTT)) { - unsigned int family = GLRO(dl_x86_cpu_features).family; - unsigned int model = GLRO(dl_x86_cpu_features).model; + /* Figure out the number of logical threads that share the + highest cache level. */ + if (max_cpuid >= 4) + { + unsigned int family = GLRO(dl_x86_cpu_features).family; + unsigned int model = GLRO(dl_x86_cpu_features).model; - int i = 0; + int i = 0; - /* Query until desired cache level is enumerated. */ - do - { - __cpuid_count (4, i++, eax, ebx, ecx, edx); - - /* There seems to be a bug in at least some Pentium Ds - which sometimes fail to iterate all cache parameters. - Do not loop indefinitely here, stop in this case and - assume there is no such information. */ - if ((eax & 0x1f) == 0) - goto intel_bug_no_cache_info; - } - while (((eax >> 5) & 0x7) != level); + /* Query until desired cache level is enumerated. */ + do + { + __cpuid_count (4, i++, eax, ebx, ecx, edx); + + /* There seems to be a bug in at least some Pentium Ds + which sometimes fail to iterate all cache parameters. + Do not loop indefinitely here, stop in this case and + assume there is no such information. */ + if ((eax & 0x1f) == 0) + goto intel_bug_no_cache_info; + } + while (((eax >> 5) & 0x7) != level); - /* Check if cache is inclusive of lower cache levels. */ - inclusive_cache = (edx & 0x2) != 0; + /* Check if cache is inclusive of lower cache levels. */ + inclusive_cache = (edx & 0x2) != 0; - threads = (eax >> 14) & 0x3ff; + threads = (eax >> 14) & 0x3ff; - /* If max_cpuid >= 11, THREADS is the maximum number of - addressable IDs for logical processors sharing the - cache, instead of the maximum number of threads - sharing the cache. */ - if (threads && max_cpuid >= 11) - { - /* Find the number of logical processors shipped in - one core and apply count mask. */ - i = 0; - while (1) + /* If max_cpuid >= 11, THREADS is the maximum number of + addressable IDs for logical processors sharing the + cache, instead of the maximum number of threads + sharing the cache. */ + if (threads && max_cpuid >= 11) { - __cpuid_count (11, i++, eax, ebx, ecx, edx); - - int shipped = ebx & 0xff; - int type = ecx & 0xff0; - if (shipped == 0 || type == 0) - break; - else if (type == 0x200) + /* Find the number of logical processors shipped in + one core and apply count mask. */ + i = 0; + while (1) { - int count_mask; - - /* Compute count mask. */ - asm ("bsr %1, %0" - : "=r" (count_mask) : "g" (threads)); - count_mask = ~(-1 << (count_mask + 1)); - threads = (shipped - 1) & count_mask; + __cpuid_count (11, i++, eax, ebx, ecx, edx); + + int shipped = ebx & 0xff; + int type = ecx & 0xff0; + if (shipped == 0 || type == 0) + break; + else if (type == 0x200) + { + int count_mask; + + /* Compute count mask. */ + asm ("bsr %1, %0" + : "=r" (count_mask) : "g" (threads)); + count_mask = ~(-1 << (count_mask + 1)); + threads = (shipped - 1) & count_mask; + break; + } + } + } + threads += 1; + if (threads > 2 && level == 2 && family == 6) + { + switch (model) + { + case 0x57: + /* Knights Landing has L2 cache shared by 2 cores. */ + case 0x37: + case 0x4a: + case 0x4d: + case 0x5a: + case 0x5d: + /* Silvermont has L2 cache shared by 2 cores. */ + threads = 2; + break; + default: break; } } } - threads += 1; - if (threads > 2 && level == 2 && family == 6) + else { - switch (model) - { - case 0x57: - /* Knights Landing has L2 cache shared by 2 cores. */ - case 0x37: - case 0x4a: - case 0x4d: - case 0x5a: - case 0x5d: - /* Silvermont has L2 cache shared by 2 cores. */ - threads = 2; - break; - default: - break; - } +intel_bug_no_cache_info: + /* Assume that all logical threads share the highest cache + level. */ + + threads + = ((GLRO(dl_x86_cpu_features).cpuid[COMMON_CPUID_INDEX_1].ebx + >> 16) & 0xff); } - } - else - { - intel_bug_no_cache_info: - /* Assume that all logical threads share the highest cache level. */ - threads - = ((GLRO(dl_x86_cpu_features).cpuid[COMMON_CPUID_INDEX_1].ebx - >> 16) & 0xff); + /* Cap usage of highest cache level to the number of supported + threads. */ + if (shared > 0 && threads > 0) + shared /= threads; } - /* Cap usage of highest cache level to the number of supported - threads. */ - if (shared > 0 && threads > 0) - shared /= threads; - /* Account for non-inclusive L2 and L3 caches. */ if (level == 3 && !inclusive_cache) shared += core; diff --git a/sysdeps/x86/cpu-features.h b/sysdeps/x86/cpu-features.h index 9529d61..2bd9371 100644 --- a/sysdeps/x86/cpu-features.h +++ b/sysdeps/x86/cpu-features.h @@ -51,6 +51,7 @@ #define bit_cpu_POPCOUNT (1 << 23) #define bit_cpu_FMA (1 << 12) #define bit_cpu_FMA4 (1 << 16) +#define bit_cpu_HTT (1 << 28) /* COMMON_CPUID_INDEX_7. */ #define bit_cpu_ERMS (1 << 9) @@ -235,6 +236,7 @@ extern const struct cpu_features *__get_cpu_features (void) # define index_cpu_FMA4 COMMON_CPUID_INDEX_80000001 # define index_cpu_POPCOUNT COMMON_CPUID_INDEX_1 # define index_cpu_OSXSAVE COMMON_CPUID_INDEX_1 +# define index_cpu_HTT COMMON_CPUID_INDEX_1 # define reg_CX8 edx # define reg_CMOV edx @@ -252,6 +254,7 @@ extern const struct cpu_features *__get_cpu_features (void) # define reg_FMA4 ecx # define reg_POPCOUNT ecx # define reg_OSXSAVE ecx +# define reg_HTT edx # define index_arch_Fast_Rep_String FEATURE_INDEX_1 # define index_arch_Fast_Copy_Backward FEATURE_INDEX_1 -- 2.7.4