x86/apic: Reduce cache line misses in __x2apic_send_IPI_mask()

author Eric Dumazet <edumazet@google.com>

Thu, 7 Oct 2021 14:35:56 +0000 (07:35 -0700)

committer Peter Zijlstra <peterz@infradead.org>

Fri, 29 Oct 2021 08:02:17 +0000 (10:02 +0200)
author Eric Dumazet <edumazet@google.com>
Thu, 7 Oct 2021 14:35:56 +0000 (07:35 -0700)
committer Peter Zijlstra <peterz@infradead.org>
Fri, 29 Oct 2021 08:02:17 +0000 (10:02 +0200)
diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c

index f4da9bb..e696e22 100644 (file)
--- a/arch/x86/kernel/apic/x2apic_cluster.c
+++ b/arch/x86/kernel/apic/x2apic_cluster.c
@@ -15,9 +15,15 @@ struct cluster_mask {
         struct cpumask  mask;
  };
  
-static DEFINE_PER_CPU(u32, x86_cpu_to_logical_apicid);
+/*
+ * __x2apic_send_IPI_mask() possibly needs to read
+ * x86_cpu_to_logical_apicid for all online cpus in a sequential way.
+ * Using per cpu variable would cost one cache line per cpu.
+ */
+static u32 *x86_cpu_to_logical_apicid __read_mostly;
+
  static DEFINE_PER_CPU(cpumask_var_t, ipi_mask);
-static DEFINE_PER_CPU(struct cluster_mask *, cluster_masks);
+static DEFINE_PER_CPU_READ_MOSTLY(struct cluster_mask *, cluster_masks);
  static struct cluster_mask *cluster_hotplug_mask;
  
  static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
@@ -27,7 +33,7 @@ static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
  
  static void x2apic_send_IPI(int cpu, int vector)
  {
-       u32 dest = per_cpu(x86_cpu_to_logical_apicid, cpu);
+       u32 dest = x86_cpu_to_logical_apicid[cpu];
  
         /* x2apic MSRs are special and need a special fence: */
         weak_wrmsr_fence();
@@ -58,7 +64,7 @@ __x2apic_send_IPI_mask(const struct cpumask *mask, int vector, int apic_dest)
  
                 dest = 0;
                 for_each_cpu_and(clustercpu, tmpmsk, &cmsk->mask)
-                       dest |= per_cpu(x86_cpu_to_logical_apicid, clustercpu);
+                       dest |= x86_cpu_to_logical_apicid[clustercpu];
  
                 if (!dest)
                         continue;
@@ -94,7 +100,7 @@ static void x2apic_send_IPI_all(int vector)
  
  static u32 x2apic_calc_apicid(unsigned int cpu)
  {
-       return per_cpu(x86_cpu_to_logical_apicid, cpu);
+       return x86_cpu_to_logical_apicid[cpu];
  }
  
  static void init_x2apic_ldr(void)
@@ -103,7 +109,7 @@ static void init_x2apic_ldr(void)
         u32 cluster, apicid = apic_read(APIC_LDR);
         unsigned int cpu;
  
-       this_cpu_write(x86_cpu_to_logical_apicid, apicid);
+       x86_cpu_to_logical_apicid[smp_processor_id()] = apicid;
  
         if (cmsk)
                 goto update;
@@ -166,12 +172,21 @@ static int x2apic_dead_cpu(unsigned int dead_cpu)
  
  static int x2apic_cluster_probe(void)
  {
+       u32 slots;
+
         if (!x2apic_mode)
                 return 0;
  
+       slots = max_t(u32, L1_CACHE_BYTES/sizeof(u32), nr_cpu_ids);
+       x86_cpu_to_logical_apicid = kcalloc(slots, sizeof(u32), GFP_KERNEL);
+       if (!x86_cpu_to_logical_apicid)
+               return 0;
+
         if (cpuhp_setup_state(CPUHP_X2APIC_PREPARE, "x86/x2apic:prepare",
                               x2apic_prepare_cpu, x2apic_dead_cpu) < 0) {
                 pr_err("Failed to register X2APIC_PREPARE\n");
+               kfree(x86_cpu_to_logical_apicid);
+               x86_cpu_to_logical_apicid = NULL;
                 return 0;
         }
         init_x2apic_ldr();
author	Eric Dumazet <edumazet@google.com>
	Thu, 7 Oct 2021 14:35:56 +0000 (07:35 -0700)
committer	Peter Zijlstra <peterz@infradead.org>
	Fri, 29 Oct 2021 08:02:17 +0000 (10:02 +0200)