From c1e53367dab15e41814cff4e37df8ec4ac8fb9d7 Mon Sep 17 00:00:00 2001 From: Srikar Dronamraju Date: Thu, 15 Apr 2021 17:39:34 +0530 Subject: [PATCH] powerpc/smp: Cache CPU to chip lookup On systems with large CPUs per node, even with the filtered matching of related CPUs, there can be large number of calls to cpu_to_chip_id for the same CPU. For example with 4096 vCPU, 1 node QEMU configuration, with 4 threads per core, system could be see upto 1024 calls to cpu_to_chip_id() for the same CPU. On a given system, cpu_to_chip_id() for a given CPU would always return the same. Hence cache the result in a lookup table for use in subsequent calls. Since all CPUs sharing the same core will belong to the same chip, the lookup_table has an entry for one CPU per core. chip_id_lookup_table is not being freed and would be used on subsequent CPU online post CPU offline. Reported-by: Daniel Henrique Barboza Suggested-by: Michael Ellerman Signed-off-by: Srikar Dronamraju Tested-by: Daniel Henrique Barboza Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210415120934.232271-4-srikar@linux.vnet.ibm.com --- arch/powerpc/include/asm/smp.h | 1 + arch/powerpc/kernel/prom.c | 19 +++++++++++++++---- arch/powerpc/kernel/smp.c | 21 +++++++++++++++++++-- 3 files changed, 35 insertions(+), 6 deletions(-) diff --git a/arch/powerpc/include/asm/smp.h b/arch/powerpc/include/asm/smp.h index 47081a9..03b3d010 100644 --- a/arch/powerpc/include/asm/smp.h +++ b/arch/powerpc/include/asm/smp.h @@ -31,6 +31,7 @@ extern u32 *cpu_to_phys_id; extern bool coregroup_enabled; extern int cpu_to_chip_id(int cpu); +extern int *chip_id_lookup_table; #ifdef CONFIG_SMP diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index a8b2d6b..fbe9dee 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -65,6 +65,8 @@ #define DBG(fmt...) #endif +int *chip_id_lookup_table; + #ifdef CONFIG_PPC64 int __initdata iommu_is_off; int __initdata iommu_force_on; @@ -914,13 +916,22 @@ EXPORT_SYMBOL(of_get_ibm_chip_id); int cpu_to_chip_id(int cpu) { struct device_node *np; + int ret = -1, idx; + + idx = cpu / threads_per_core; + if (chip_id_lookup_table && chip_id_lookup_table[idx] != -1) + return chip_id_lookup_table[idx]; np = of_get_cpu_node(cpu, NULL); - if (!np) - return -1; + if (np) { + ret = of_get_ibm_chip_id(np); + of_node_put(np); + + if (chip_id_lookup_table) + chip_id_lookup_table[idx] = ret; + } - of_node_put(np); - return of_get_ibm_chip_id(np); + return ret; } EXPORT_SYMBOL(cpu_to_chip_id); diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index f8576a2..ad3e974 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -1073,6 +1073,20 @@ void __init smp_prepare_cpus(unsigned int max_cpus) cpu_smallcore_mask(boot_cpuid)); } + if (cpu_to_chip_id(boot_cpuid) != -1) { + int idx = num_possible_cpus() / threads_per_core; + + /* + * All threads of a core will all belong to the same core, + * chip_id_lookup_table will have one entry per core. + * Assumption: if boot_cpuid doesn't have a chip-id, then no + * other CPUs, will also not have chip-id. + */ + chip_id_lookup_table = kcalloc(idx, sizeof(int), GFP_KERNEL); + if (chip_id_lookup_table) + memset(chip_id_lookup_table, -1, sizeof(int) * idx); + } + if (smp_ops && smp_ops->probe) smp_ops->probe(); } @@ -1468,8 +1482,8 @@ static void add_cpu_to_masks(int cpu) { struct cpumask *(*submask_fn)(int) = cpu_sibling_mask; int first_thread = cpu_first_thread_sibling(cpu); - int chip_id = cpu_to_chip_id(cpu); cpumask_var_t mask; + int chip_id = -1; bool ret; int i; @@ -1492,7 +1506,10 @@ static void add_cpu_to_masks(int cpu) if (has_coregroup_support()) update_coregroup_mask(cpu, &mask); - if (chip_id == -1 || !ret) { + if (chip_id_lookup_table && ret) + chip_id = cpu_to_chip_id(cpu); + + if (chip_id == -1) { cpumask_copy(per_cpu(cpu_core_map, cpu), cpu_cpu_mask(cpu)); goto out; } -- 2.7.4