x86: Put hot per CPU variables into a struct
authorThomas Gleixner <tglx@linutronix.de>
Thu, 15 Sep 2022 11:11:01 +0000 (13:11 +0200)
committerPeter Zijlstra <peterz@infradead.org>
Mon, 17 Oct 2022 14:41:03 +0000 (16:41 +0200)
The layout of per-cpu variables is at the mercy of the compiler. This
can lead to random performance fluctuations from build to build.

Create a structure to hold some of the hottest per-cpu variables,
starting with current_task.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/20220915111145.179707194@infradead.org
arch/x86/include/asm/current.h
arch/x86/kernel/cpu/common.c
arch/x86/kernel/process_32.c
arch/x86/kernel/process_64.c
arch/x86/kernel/smpboot.c

index 3e204e6..63c42ac 100644 (file)
@@ -3,16 +3,29 @@
 #define _ASM_X86_CURRENT_H
 
 #include <linux/compiler.h>
-#include <asm/percpu.h>
 
 #ifndef __ASSEMBLY__
+
+#include <linux/cache.h>
+#include <asm/percpu.h>
+
 struct task_struct;
 
-DECLARE_PER_CPU(struct task_struct *, current_task);
+struct pcpu_hot {
+       union {
+               struct {
+                       struct task_struct      *current_task;
+               };
+               u8      pad[64];
+       };
+};
+static_assert(sizeof(struct pcpu_hot) == 64);
+
+DECLARE_PER_CPU_ALIGNED(struct pcpu_hot, pcpu_hot);
 
 static __always_inline struct task_struct *get_current(void)
 {
-       return this_cpu_read_stable(current_task);
+       return this_cpu_read_stable(pcpu_hot.current_task);
 }
 
 #define current get_current()
index 8e87318..5207153 100644 (file)
@@ -2012,18 +2012,16 @@ static __init int setup_clearcpuid(char *arg)
 }
 __setup("clearcpuid=", setup_clearcpuid);
 
+DEFINE_PER_CPU_ALIGNED(struct pcpu_hot, pcpu_hot) = {
+       .current_task   = &init_task,
+};
+EXPORT_PER_CPU_SYMBOL(pcpu_hot);
+
 #ifdef CONFIG_X86_64
 DEFINE_PER_CPU_FIRST(struct fixed_percpu_data,
                     fixed_percpu_data) __aligned(PAGE_SIZE) __visible;
 EXPORT_PER_CPU_SYMBOL_GPL(fixed_percpu_data);
 
-/*
- * The following percpu variables are hot.  Align current_task to
- * cacheline size such that they fall in the same cacheline.
- */
-DEFINE_PER_CPU(struct task_struct *, current_task) ____cacheline_aligned =
-       &init_task;
-EXPORT_PER_CPU_SYMBOL(current_task);
 
 DEFINE_PER_CPU(void *, hardirq_stack_ptr);
 DEFINE_PER_CPU(bool, hardirq_stack_inuse);
@@ -2083,8 +2081,6 @@ void syscall_init(void)
 
 #else  /* CONFIG_X86_64 */
 
-DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task;
-EXPORT_PER_CPU_SYMBOL(current_task);
 DEFINE_PER_CPU(int, __preempt_count) = INIT_PREEMPT_COUNT;
 EXPORT_PER_CPU_SYMBOL(__preempt_count);
 
index 2f314b1..807da45 100644 (file)
@@ -207,7 +207,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
        if (prev->gs | next->gs)
                loadsegment(gs, next->gs);
 
-       this_cpu_write(current_task, next_p);
+       raw_cpu_write(pcpu_hot.current_task, next_p);
 
        switch_fpu_finish();
 
index 6b3418b..c4f6cac 100644 (file)
@@ -617,7 +617,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
        /*
         * Switch the PDA and FPU contexts.
         */
-       this_cpu_write(current_task, next_p);
+       raw_cpu_write(pcpu_hot.current_task, next_p);
        this_cpu_write(cpu_current_top_of_stack, task_top_of_stack(next_p));
 
        switch_fpu_finish();
index ce8728d..05f3157 100644 (file)
@@ -1046,7 +1046,7 @@ int common_cpu_up(unsigned int cpu, struct task_struct *idle)
        /* Just in case we booted with a single CPU. */
        alternatives_enable_smp();
 
-       per_cpu(current_task, cpu) = idle;
+       per_cpu(pcpu_hot.current_task, cpu) = idle;
        cpu_init_stack_canary(cpu, idle);
 
        /* Initialize the interrupt stack(s) */