From 59ca535576d31554c68c22ac3314ef0842018341 Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Wed, 9 Jun 2021 09:24:45 -0700 Subject: [PATCH] util: Use maximum number of CPUs for determining cache topology MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit This prevents problems when some CPUs are offline. In a four CPU system, if CPUs 1 and 2 are offline, the cache topology code would only examine CPUs 0 and 1... giving incorrect information. The types are changed to int16_t so that the offset of num_L3_caches does not change. This triggered a STATIC_ASSERT failure: STATIC_ASSERT(offsetof(struct util_cpu_caps_t, num_L3_caches) == 5 * sizeof(uint32_t)); I'm assuming there's some assembly code or something that depends on this offset, and I don't feel like messing with it. Reviewed-by: Marek Olšák Part-of: --- src/util/u_cpu_detect.c | 10 ++++++++-- src/util/u_cpu_detect.h | 18 +++++++++++++++++- 2 files changed, 25 insertions(+), 3 deletions(-) diff --git a/src/util/u_cpu_detect.c b/src/util/u_cpu_detect.c index 6925b09..e0ed1a4 100644 --- a/src/util/u_cpu_detect.c +++ b/src/util/u_cpu_detect.c @@ -468,13 +468,18 @@ get_cpu_topology(void) * * Querying the APIC ID can only be done by pinning the current thread * to each core. The original affinity mask is saved. + * + * Loop over all possible CPUs even though some may be offline. */ - for (unsigned i = 0; i < util_cpu_caps.nr_cpus && i < UTIL_MAX_CPUS; + for (unsigned i = 0; i < util_cpu_caps.max_cpus && i < UTIL_MAX_CPUS; i++) { uint32_t cpu_bit = 1u << (i % 32); mask[i / 32] = cpu_bit; + /* The assumption is that trying to bind the thread to a CPU that is + * offline will fail. + */ if (util_set_current_thread_affinity(mask, !saved ? saved_mask : NULL, util_cpu_caps.num_cpu_mask_bits)) { @@ -535,7 +540,7 @@ get_cpu_topology(void) fprintf(stderr, "CPU <-> L3 cache mapping:\n"); for (unsigned i = 0; i < util_cpu_caps.num_L3_caches; i++) { fprintf(stderr, " - L3 %u mask = ", i); - for (int j = util_cpu_caps.nr_cpus - 1; j >= 0; j -= 32) + for (int j = util_cpu_caps.max_cpus - 1; j >= 0; j -= 32) fprintf(stderr, "%08x ", util_cpu_caps.L3_affinity_mask[i][j / 32]); fprintf(stderr, "\n"); } @@ -621,6 +626,7 @@ util_cpu_detect_once(void) util_cpu_caps.nr_cpus = MAX2(1, available_cpus); total_cpus = MAX2(total_cpus, util_cpu_caps.nr_cpus); + util_cpu_caps.max_cpus = total_cpus; util_cpu_caps.num_cpu_mask_bits = align(total_cpus, 32); /* Make the fallback cacheline size nonzero so that it can be diff --git a/src/util/u_cpu_detect.h b/src/util/u_cpu_detect.h index 73271ef..3e78445 100644 --- a/src/util/u_cpu_detect.h +++ b/src/util/u_cpu_detect.h @@ -56,7 +56,23 @@ enum cpu_family { typedef uint32_t util_affinity_mask[UTIL_MAX_CPUS / 32]; struct util_cpu_caps_t { - int nr_cpus; + /** + * Number of CPUs available to the process. + * + * This will be less than or equal to \c max_cpus. This is the number of + * CPUs that are online and available to the process. + */ + int16_t nr_cpus; + + /** + * Maximum number of CPUs that can be online in the system. + * + * This will be greater than or equal to \c nr_cpus. This is the number of + * CPUs installed in the system. \c nr_cpus will be less if some CPUs are + * offline. + */ + int16_t max_cpus; + enum cpu_family family; /* Feature flags */ -- 2.7.4