percpu: Wire up cmpxchg128
authorPeter Zijlstra <peterz@infradead.org>
Wed, 31 May 2023 13:08:39 +0000 (15:08 +0200)
committerPeter Zijlstra <peterz@infradead.org>
Mon, 5 Jun 2023 07:36:37 +0000 (09:36 +0200)
In order to replace cmpxchg_double() with the newly minted
cmpxchg128() family of functions, wire it up in this_cpu_cmpxchg().

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Mark Rutland <mark.rutland@arm.com>
Tested-by: Mark Rutland <mark.rutland@arm.com>
Link: https://lore.kernel.org/r/20230531132323.654945124@infradead.org
arch/arm64/include/asm/percpu.h
arch/s390/include/asm/percpu.h
arch/x86/include/asm/percpu.h
arch/x86/lib/Makefile
arch/x86/lib/cmpxchg16b_emu.S
arch/x86/lib/cmpxchg8b_emu.S
include/asm-generic/percpu.h

index b9ba19d..2549829 100644 (file)
@@ -140,6 +140,10 @@ PERCPU_RET_OP(add, add, ldadd)
  * re-enabling preemption for preemptible kernels, but doing that in a way
  * which builds inside a module would mean messing directly with the preempt
  * count. If you do this, peterz and tglx will hunt you down.
+ *
+ * Not to mention it'll break the actual preemption model for missing a
+ * preemption point when TIF_NEED_RESCHED gets set while preemption is
+ * disabled.
  */
 #define this_cpu_cmpxchg_double_8(ptr1, ptr2, o1, o2, n1, n2)          \
 ({                                                                     \
@@ -240,6 +244,22 @@ PERCPU_RET_OP(add, add, ldadd)
 #define this_cpu_cmpxchg_8(pcp, o, n)  \
        _pcp_protect_return(cmpxchg_relaxed, pcp, o, n)
 
+#define this_cpu_cmpxchg64(pcp, o, n)  this_cpu_cmpxchg_8(pcp, o, n)
+
+#define this_cpu_cmpxchg128(pcp, o, n)                                 \
+({                                                                     \
+       typedef typeof(pcp) pcp_op_T__;                                 \
+       u128 old__, new__, ret__;                                       \
+       pcp_op_T__ *ptr__;                                              \
+       old__ = o;                                                      \
+       new__ = n;                                                      \
+       preempt_disable_notrace();                                      \
+       ptr__ = raw_cpu_ptr(&(pcp));                                    \
+       ret__ = cmpxchg128_local((void *)ptr__, old__, new__);          \
+       preempt_enable_notrace();                                       \
+       ret__;                                                          \
+})
+
 #ifdef __KVM_NVHE_HYPERVISOR__
 extern unsigned long __hyp_per_cpu_offset(unsigned int cpu);
 #define __per_cpu_offset
index 081837b..5603402 100644 (file)
 #define this_cpu_cmpxchg_4(pcp, oval, nval) arch_this_cpu_cmpxchg(pcp, oval, nval)
 #define this_cpu_cmpxchg_8(pcp, oval, nval) arch_this_cpu_cmpxchg(pcp, oval, nval)
 
+#define this_cpu_cmpxchg64(pcp, o, n)  this_cpu_cmpxchg_8(pcp, o, n)
+
+#define this_cpu_cmpxchg128(pcp, oval, nval)                           \
+({                                                                     \
+       typedef typeof(pcp) pcp_op_T__;                                 \
+       u128 old__, new__, ret__;                                       \
+       pcp_op_T__ *ptr__;                                              \
+       old__ = oval;                                                   \
+       new__ = nval;                                                   \
+       preempt_disable_notrace();                                      \
+       ptr__ = raw_cpu_ptr(&(pcp));                                    \
+       ret__ = cmpxchg128((void *)ptr__, old__, new__);                \
+       preempt_enable_notrace();                                       \
+       ret__;                                                          \
+})
+
 #define arch_this_cpu_xchg(pcp, nval)                                  \
 ({                                                                     \
        typeof(pcp) *ptr__;                                             \
index 13c0d63..4232fb2 100644 (file)
@@ -210,6 +210,67 @@ do {                                                                       \
        (typeof(_var))(unsigned long) pco_old__;                        \
 })
 
+#if defined(CONFIG_X86_32) && !defined(CONFIG_UML)
+#define percpu_cmpxchg64_op(size, qual, _var, _oval, _nval)            \
+({                                                                     \
+       union {                                                         \
+               u64 var;                                                \
+               struct {                                                \
+                       u32 low, high;                                  \
+               };                                                      \
+       } old__, new__;                                                 \
+                                                                       \
+       old__.var = _oval;                                              \
+       new__.var = _nval;                                              \
+                                                                       \
+       asm qual (ALTERNATIVE("leal %P[var], %%esi; call this_cpu_cmpxchg8b_emu", \
+                             "cmpxchg8b " __percpu_arg([var]), X86_FEATURE_CX8) \
+                 : [var] "+m" (_var),                                  \
+                   "+a" (old__.low),                                   \
+                   "+d" (old__.high)                                   \
+                 : "b" (new__.low),                                    \
+                   "c" (new__.high)                                    \
+                 : "memory", "esi");                                   \
+                                                                       \
+       old__.var;                                                      \
+})
+
+#define raw_cpu_cmpxchg64(pcp, oval, nval)     percpu_cmpxchg64_op(8,         , pcp, oval, nval)
+#define this_cpu_cmpxchg64(pcp, oval, nval)    percpu_cmpxchg64_op(8, volatile, pcp, oval, nval)
+#endif
+
+#ifdef CONFIG_X86_64
+#define raw_cpu_cmpxchg64(pcp, oval, nval)     percpu_cmpxchg_op(8,         , pcp, oval, nval);
+#define this_cpu_cmpxchg64(pcp, oval, nval)    percpu_cmpxchg_op(8, volatile, pcp, oval, nval);
+
+#define percpu_cmpxchg128_op(size, qual, _var, _oval, _nval)           \
+({                                                                     \
+       union {                                                         \
+               u128 var;                                               \
+               struct {                                                \
+                       u64 low, high;                                  \
+               };                                                      \
+       } old__, new__;                                                 \
+                                                                       \
+       old__.var = _oval;                                              \
+       new__.var = _nval;                                              \
+                                                                       \
+       asm qual (ALTERNATIVE("leaq %P[var], %%rsi; call this_cpu_cmpxchg16b_emu", \
+                             "cmpxchg16b " __percpu_arg([var]), X86_FEATURE_CX16) \
+                 : [var] "+m" (_var),                                  \
+                   "+a" (old__.low),                                   \
+                   "+d" (old__.high)                                   \
+                 : "b" (new__.low),                                    \
+                   "c" (new__.high)                                    \
+                 : "memory", "rsi");                                   \
+                                                                       \
+       old__.var;                                                      \
+})
+
+#define raw_cpu_cmpxchg128(pcp, oval, nval)    percpu_cmpxchg128_op(16,         , pcp, oval, nval)
+#define this_cpu_cmpxchg128(pcp, oval, nval)   percpu_cmpxchg128_op(16, volatile, pcp, oval, nval)
+#endif
+
 /*
  * this_cpu_read() makes gcc load the percpu variable every time it is
  * accessed while this_cpu_read_stable() allows the value to be cached.
@@ -341,12 +402,13 @@ do {                                                                      \
        bool __ret;                                                     \
        typeof(pcp1) __o1 = (o1), __n1 = (n1);                          \
        typeof(pcp2) __o2 = (o2), __n2 = (n2);                          \
-       alternative_io("leaq %P1,%%rsi\n\tcall this_cpu_cmpxchg16b_emu\n\t", \
-                      "cmpxchg16b " __percpu_arg(1) "\n\tsetz %0\n\t", \
-                      X86_FEATURE_CX16,                                \
-                      ASM_OUTPUT2("=a" (__ret), "+m" (pcp1),           \
-                                  "+m" (pcp2), "+d" (__o2)),           \
-                      "b" (__n1), "c" (__n2), "a" (__o1) : "rsi");     \
+       asm volatile (ALTERNATIVE("leaq %P1, %%rsi; call this_cpu_cmpxchg16b_emu", \
+                                 "cmpxchg16b " __percpu_arg(1), X86_FEATURE_CX16) \
+                            "setz %0"                                  \
+                            : "=a" (__ret), "+m" (pcp1)                \
+                            : "b" (__n1), "c" (__n2),                  \
+                              "a" (__o1), "d" (__o2)                   \
+                            : "memory", "rsi");                        \
        __ret;                                                          \
 })
 
index 01932af..ea3a28e 100644 (file)
@@ -61,8 +61,9 @@ ifeq ($(CONFIG_X86_32),y)
         lib-y += strstr_32.o
         lib-y += string_32.o
         lib-y += memmove_32.o
+        lib-y += cmpxchg8b_emu.o
 ifneq ($(CONFIG_X86_CMPXCHG64),y)
-        lib-y += cmpxchg8b_emu.o atomic64_386_32.o
+        lib-y += atomic64_386_32.o
 endif
 else
         obj-y += iomap_copy_64.o
index 33c70c0..6962df3 100644 (file)
@@ -1,47 +1,54 @@
 /* SPDX-License-Identifier: GPL-2.0-only */
 #include <linux/linkage.h>
 #include <asm/percpu.h>
+#include <asm/processor-flags.h>
 
 .text
 
 /*
+ * Emulate 'cmpxchg16b %gs:(%rsi)'
+ *
  * Inputs:
  * %rsi : memory location to compare
  * %rax : low 64 bits of old value
  * %rdx : high 64 bits of old value
  * %rbx : low 64 bits of new value
  * %rcx : high 64 bits of new value
- * %al  : Operation successful
+ *
+ * Notably this is not LOCK prefixed and is not safe against NMIs
  */
 SYM_FUNC_START(this_cpu_cmpxchg16b_emu)
 
-#
-# Emulate 'cmpxchg16b %gs:(%rsi)' except we return the result in %al not
-# via the ZF.  Caller will access %al to get result.
-#
-# Note that this is only useful for a cpuops operation.  Meaning that we
-# do *not* have a fully atomic operation but just an operation that is
-# *atomic* on a single cpu (as provided by the this_cpu_xx class of
-# macros).
-#
        pushfq
        cli
 
-       cmpq PER_CPU_VAR((%rsi)), %rax
-       jne .Lnot_same
-       cmpq PER_CPU_VAR(8(%rsi)), %rdx
-       jne .Lnot_same
+       /* if (*ptr == old) */
+       cmpq    PER_CPU_VAR(0(%rsi)), %rax
+       jne     .Lnot_same
+       cmpq    PER_CPU_VAR(8(%rsi)), %rdx
+       jne     .Lnot_same
 
-       movq %rbx, PER_CPU_VAR((%rsi))
-       movq %rcx, PER_CPU_VAR(8(%rsi))
+       /* *ptr = new */
+       movq    %rbx, PER_CPU_VAR(0(%rsi))
+       movq    %rcx, PER_CPU_VAR(8(%rsi))
+
+       /* set ZF in EFLAGS to indicate success */
+       orl     $X86_EFLAGS_ZF, (%rsp)
 
        popfq
-       mov $1, %al
        RET
 
 .Lnot_same:
+       /* *ptr != old */
+
+       /* old = *ptr */
+       movq    PER_CPU_VAR(0(%rsi)), %rax
+       movq    PER_CPU_VAR(8(%rsi)), %rdx
+
+       /* clear ZF in EFLAGS to indicate failure */
+       andl    $(~X86_EFLAGS_ZF), (%rsp)
+
        popfq
-       xor %al,%al
        RET
 
 SYM_FUNC_END(this_cpu_cmpxchg16b_emu)
index 6a912d5..4980525 100644 (file)
@@ -2,10 +2,16 @@
 
 #include <linux/linkage.h>
 #include <asm/export.h>
+#include <asm/percpu.h>
+#include <asm/processor-flags.h>
 
 .text
 
+#ifndef CONFIG_X86_CMPXCHG64
+
 /*
+ * Emulate 'cmpxchg8b (%esi)' on UP
+ *
  * Inputs:
  * %esi : memory location to compare
  * %eax : low 32 bits of old value
  */
 SYM_FUNC_START(cmpxchg8b_emu)
 
-#
-# Emulate 'cmpxchg8b (%esi)' on UP except we don't
-# set the whole ZF thing (caller will just compare
-# eax:edx with the expected value)
-#
        pushfl
        cli
 
-       cmpl  (%esi), %eax
-       jne .Lnot_same
-       cmpl 4(%esi), %edx
-       jne .Lhalf_same
+       cmpl    0(%esi), %eax
+       jne     .Lnot_same
+       cmpl    4(%esi), %edx
+       jne     .Lnot_same
+
+       movl    %ebx, 0(%esi)
+       movl    %ecx, 4(%esi)
 
-       movl %ebx,  (%esi)
-       movl %ecx, 4(%esi)
+       orl     $X86_EFLAGS_ZF, (%esp)
 
        popfl
        RET
 
 .Lnot_same:
-       movl  (%esi), %eax
-.Lhalf_same:
-       movl 4(%esi), %edx
+       movl    0(%esi), %eax
+       movl    4(%esi), %edx
+
+       andl    $(~X86_EFLAGS_ZF), (%esp)
 
        popfl
        RET
 
 SYM_FUNC_END(cmpxchg8b_emu)
 EXPORT_SYMBOL(cmpxchg8b_emu)
+
+#endif
+
+#ifndef CONFIG_UML
+
+SYM_FUNC_START(this_cpu_cmpxchg8b_emu)
+
+       pushfl
+       cli
+
+       cmpl    PER_CPU_VAR(0(%esi)), %eax
+       jne     .Lnot_same2
+       cmpl    PER_CPU_VAR(4(%esi)), %edx
+       jne     .Lnot_same2
+
+       movl    %ebx, PER_CPU_VAR(0(%esi))
+       movl    %ecx, PER_CPU_VAR(4(%esi))
+
+       orl     $X86_EFLAGS_ZF, (%esp)
+
+       popfl
+       RET
+
+.Lnot_same2:
+       movl    PER_CPU_VAR(0(%esi)), %eax
+       movl    PER_CPU_VAR(4(%esi)), %edx
+
+       andl    $(~X86_EFLAGS_ZF), (%esp)
+
+       popfl
+       RET
+
+SYM_FUNC_END(this_cpu_cmpxchg8b_emu)
+
+#endif
index 96af32c..5c66e44 100644 (file)
@@ -350,6 +350,25 @@ do {                                                                       \
 #endif
 #endif
 
+#ifndef raw_cpu_try_cmpxchg64
+#ifdef raw_cpu_cmpxchg64
+#define raw_cpu_try_cmpxchg64(pcp, ovalp, nval) \
+       __cpu_fallback_try_cmpxchg(pcp, ovalp, nval, raw_cpu_cmpxchg64)
+#else
+#define raw_cpu_try_cmpxchg64(pcp, ovalp, nval) \
+       raw_cpu_generic_try_cmpxchg(pcp, ovalp, nval)
+#endif
+#endif
+#ifndef raw_cpu_try_cmpxchg128
+#ifdef raw_cpu_cmpxchg128
+#define raw_cpu_try_cmpxchg128(pcp, ovalp, nval) \
+       __cpu_fallback_try_cmpxchg(pcp, ovalp, nval, raw_cpu_cmpxchg128)
+#else
+#define raw_cpu_try_cmpxchg128(pcp, ovalp, nval) \
+       raw_cpu_generic_try_cmpxchg(pcp, ovalp, nval)
+#endif
+#endif
+
 #ifndef raw_cpu_cmpxchg_1
 #define raw_cpu_cmpxchg_1(pcp, oval, nval) \
        raw_cpu_generic_cmpxchg(pcp, oval, nval)
@@ -367,6 +386,15 @@ do {                                                                       \
        raw_cpu_generic_cmpxchg(pcp, oval, nval)
 #endif
 
+#ifndef raw_cpu_cmpxchg64
+#define raw_cpu_cmpxchg64(pcp, oval, nval) \
+       raw_cpu_generic_cmpxchg(pcp, oval, nval)
+#endif
+#ifndef raw_cpu_cmpxchg128
+#define raw_cpu_cmpxchg128(pcp, oval, nval) \
+       raw_cpu_generic_cmpxchg(pcp, oval, nval)
+#endif
+
 #ifndef raw_cpu_cmpxchg_double_1
 #define raw_cpu_cmpxchg_double_1(pcp1, pcp2, oval1, oval2, nval1, nval2) \
        raw_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2)
@@ -512,6 +540,25 @@ do {                                                                       \
 #endif
 #endif
 
+#ifndef this_cpu_try_cmpxchg64
+#ifdef this_cpu_cmpxchg64
+#define this_cpu_try_cmpxchg64(pcp, ovalp, nval) \
+       __cpu_fallback_try_cmpxchg(pcp, ovalp, nval, this_cpu_cmpxchg64)
+#else
+#define this_cpu_try_cmpxchg64(pcp, ovalp, nval) \
+       this_cpu_generic_try_cmpxchg(pcp, ovalp, nval)
+#endif
+#endif
+#ifndef this_cpu_try_cmpxchg128
+#ifdef this_cpu_cmpxchg128
+#define this_cpu_try_cmpxchg128(pcp, ovalp, nval) \
+       __cpu_fallback_try_cmpxchg(pcp, ovalp, nval, this_cpu_cmpxchg128)
+#else
+#define this_cpu_try_cmpxchg128(pcp, ovalp, nval) \
+       this_cpu_generic_try_cmpxchg(pcp, ovalp, nval)
+#endif
+#endif
+
 #ifndef this_cpu_cmpxchg_1
 #define this_cpu_cmpxchg_1(pcp, oval, nval) \
        this_cpu_generic_cmpxchg(pcp, oval, nval)
@@ -529,6 +576,15 @@ do {                                                                       \
        this_cpu_generic_cmpxchg(pcp, oval, nval)
 #endif
 
+#ifndef this_cpu_cmpxchg64
+#define this_cpu_cmpxchg64(pcp, oval, nval) \
+       this_cpu_generic_cmpxchg(pcp, oval, nval)
+#endif
+#ifndef this_cpu_cmpxchg128
+#define this_cpu_cmpxchg128(pcp, oval, nval) \
+       this_cpu_generic_cmpxchg(pcp, oval, nval)
+#endif
+
 #ifndef this_cpu_cmpxchg_double_1
 #define this_cpu_cmpxchg_double_1(pcp1, pcp2, oval1, oval2, nval1, nval2) \
        this_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2)