powerpc/qspinlock: powerpc qspinlock implementation
authorNicholas Piggin <npiggin@gmail.com>
Mon, 28 Nov 2022 03:11:13 +0000 (13:11 +1000)
committerMichael Ellerman <mpe@ellerman.id.au>
Fri, 2 Dec 2022 06:48:02 +0000 (17:48 +1100)
Add a powerpc specific implementation of queued spinlocks. This is the
build framework with a very simple (non-queued) spinlock implementation
to begin with. Later changes add queueing, and other features and
optimisations one-at-a-time. It is done this way to more easily see how
the queued spinlocks are built, and to make performance and correctness
bisects more useful.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
[mpe: Drop paravirt.h & processor.h changes to fix 32-bit build]
[mpe: Fix 32-bit build of qspinlock.o & disallow GENERIC_LOCKBREAK per Nick]
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/CONLLQB6DCJU.2ZPOS7T6S5GRR@bobo
arch/powerpc/Kconfig
arch/powerpc/include/asm/qspinlock.h
arch/powerpc/include/asm/qspinlock_paravirt.h [deleted file]
arch/powerpc/include/asm/qspinlock_types.h [new file with mode: 0644]
arch/powerpc/include/asm/spinlock.h
arch/powerpc/include/asm/spinlock_types.h
arch/powerpc/lib/Makefile
arch/powerpc/lib/qspinlock.c [new file with mode: 0644]

index 699df27..7fbdf22 100644 (file)
@@ -96,7 +96,7 @@ config LOCKDEP_SUPPORT
 config GENERIC_LOCKBREAK
        bool
        default y
-       depends on SMP && PREEMPTION
+       depends on SMP && PREEMPTION && !PPC_QUEUED_SPINLOCKS
 
 config GENERIC_HWEIGHT
        bool
@@ -154,7 +154,6 @@ config PPC
        select ARCH_USE_CMPXCHG_LOCKREF         if PPC64
        select ARCH_USE_MEMTEST
        select ARCH_USE_QUEUED_RWLOCKS          if PPC_QUEUED_SPINLOCKS
-       select ARCH_USE_QUEUED_SPINLOCKS        if PPC_QUEUED_SPINLOCKS
        select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT
        select ARCH_WANT_IPC_PARSE_VERSION
        select ARCH_WANT_IRQS_OFF_ACTIVATE_MM
index b676c4f..5e62573 100644 (file)
@@ -2,83 +2,55 @@
 #ifndef _ASM_POWERPC_QSPINLOCK_H
 #define _ASM_POWERPC_QSPINLOCK_H
 
-#include <asm-generic/qspinlock_types.h>
+#include <linux/atomic.h>
+#include <linux/compiler.h>
+#include <asm/qspinlock_types.h>
 #include <asm/paravirt.h>
 
-#define _Q_PENDING_LOOPS       (1 << 9) /* not tuned */
-
-#ifdef CONFIG_PARAVIRT_SPINLOCKS
-extern void native_queued_spin_lock_slowpath(struct qspinlock *lock, u32 val);
-extern void __pv_queued_spin_lock_slowpath(struct qspinlock *lock, u32 val);
-extern void __pv_queued_spin_unlock(struct qspinlock *lock);
-
-static __always_inline void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val)
+static __always_inline int queued_spin_is_locked(struct qspinlock *lock)
 {
-       if (!is_shared_processor())
-               native_queued_spin_lock_slowpath(lock, val);
-       else
-               __pv_queued_spin_lock_slowpath(lock, val);
+       return atomic_read(&lock->val);
 }
 
-#define queued_spin_unlock queued_spin_unlock
-static inline void queued_spin_unlock(struct qspinlock *lock)
+static __always_inline int queued_spin_value_unlocked(struct qspinlock lock)
 {
-       if (!is_shared_processor())
-               smp_store_release(&lock->locked, 0);
-       else
-               __pv_queued_spin_unlock(lock);
+       return !atomic_read(&lock.val);
 }
 
-#else
-extern void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val);
-#endif
-
-static __always_inline void queued_spin_lock(struct qspinlock *lock)
+static __always_inline int queued_spin_is_contended(struct qspinlock *lock)
 {
-       u32 val = 0;
-
-       if (likely(arch_atomic_try_cmpxchg_lock(&lock->val, &val, _Q_LOCKED_VAL)))
-               return;
-
-       queued_spin_lock_slowpath(lock, val);
+       return 0;
 }
-#define queued_spin_lock queued_spin_lock
 
-#ifdef CONFIG_PARAVIRT_SPINLOCKS
-#define SPIN_THRESHOLD (1<<15) /* not tuned */
-
-static __always_inline void pv_wait(u8 *ptr, u8 val)
+static __always_inline int queued_spin_trylock(struct qspinlock *lock)
 {
-       if (*ptr != val)
-               return;
-       yield_to_any();
-       /*
-        * We could pass in a CPU here if waiting in the queue and yield to
-        * the previous CPU in the queue.
-        */
+       return atomic_cmpxchg_acquire(&lock->val, 0, 1) == 0;
 }
 
-static __always_inline void pv_kick(int cpu)
+void queued_spin_lock_slowpath(struct qspinlock *lock);
+
+static __always_inline void queued_spin_lock(struct qspinlock *lock)
 {
-       prod_cpu(cpu);
+       if (!queued_spin_trylock(lock))
+               queued_spin_lock_slowpath(lock);
 }
 
-extern void __pv_init_lock_hash(void);
-
-static inline void pv_spinlocks_init(void)
+static inline void queued_spin_unlock(struct qspinlock *lock)
 {
-       __pv_init_lock_hash();
+       atomic_set_release(&lock->val, 0);
 }
 
-#endif
-
-/*
- * Queued spinlocks rely heavily on smp_cond_load_relaxed() to busy-wait,
- * which was found to have performance problems if implemented with
- * the preferred spin_begin()/spin_end() SMT priority pattern. Use the
- * generic version instead.
- */
+#define arch_spin_is_locked(l)         queued_spin_is_locked(l)
+#define arch_spin_is_contended(l)      queued_spin_is_contended(l)
+#define arch_spin_value_unlocked(l)    queued_spin_value_unlocked(l)
+#define arch_spin_lock(l)              queued_spin_lock(l)
+#define arch_spin_trylock(l)           queued_spin_trylock(l)
+#define arch_spin_unlock(l)            queued_spin_unlock(l)
 
-#include <asm-generic/qspinlock.h>
+#ifdef CONFIG_PARAVIRT_SPINLOCKS
+void pv_spinlocks_init(void);
+#else
+static inline void pv_spinlocks_init(void) { }
+#endif
 
 #endif /* _ASM_POWERPC_QSPINLOCK_H */
diff --git a/arch/powerpc/include/asm/qspinlock_paravirt.h b/arch/powerpc/include/asm/qspinlock_paravirt.h
deleted file mode 100644 (file)
index 6b60e77..0000000
+++ /dev/null
@@ -1,7 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-#ifndef _ASM_POWERPC_QSPINLOCK_PARAVIRT_H
-#define _ASM_POWERPC_QSPINLOCK_PARAVIRT_H
-
-EXPORT_SYMBOL(__pv_queued_spin_unlock);
-
-#endif /* _ASM_POWERPC_QSPINLOCK_PARAVIRT_H */
diff --git a/arch/powerpc/include/asm/qspinlock_types.h b/arch/powerpc/include/asm/qspinlock_types.h
new file mode 100644 (file)
index 0000000..59606bc
--- /dev/null
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#ifndef _ASM_POWERPC_QSPINLOCK_TYPES_H
+#define _ASM_POWERPC_QSPINLOCK_TYPES_H
+
+#include <linux/types.h>
+
+typedef struct qspinlock {
+       atomic_t val;
+} arch_spinlock_t;
+
+#define        __ARCH_SPIN_LOCK_UNLOCKED       { .val = ATOMIC_INIT(0) }
+
+#endif /* _ASM_POWERPC_QSPINLOCK_TYPES_H */
index bd75872..7dafca8 100644 (file)
@@ -13,7 +13,7 @@
 /* See include/linux/spinlock.h */
 #define smp_mb__after_spinlock()       smp_mb()
 
-#ifndef CONFIG_PARAVIRT_SPINLOCKS
+#ifndef CONFIG_PPC_QUEUED_SPINLOCKS
 static inline void pv_spinlocks_init(void) { }
 #endif
 
index d5f8a74..40b0144 100644 (file)
@@ -7,7 +7,7 @@
 #endif
 
 #ifdef CONFIG_PPC_QUEUED_SPINLOCKS
-#include <asm-generic/qspinlock_types.h>
+#include <asm/qspinlock_types.h>
 #include <asm-generic/qrwlock_types.h>
 #else
 #include <asm/simple_spinlock_types.h>
index 8560c91..4de71cb 100644 (file)
@@ -52,7 +52,9 @@ obj-$(CONFIG_PPC_BOOK3S_64) += copyuser_power7.o copypage_power7.o \
 obj64-y        += copypage_64.o copyuser_64.o mem_64.o hweight_64.o \
           memcpy_64.o copy_mc_64.o
 
-ifndef CONFIG_PPC_QUEUED_SPINLOCKS
+ifdef CONFIG_PPC_QUEUED_SPINLOCKS
+obj-$(CONFIG_SMP)      += qspinlock.o
+else
 obj64-$(CONFIG_SMP)    += locks.o
 endif
 
diff --git a/arch/powerpc/lib/qspinlock.c b/arch/powerpc/lib/qspinlock.c
new file mode 100644 (file)
index 0000000..1c669b5
--- /dev/null
@@ -0,0 +1,17 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+#include <linux/export.h>
+#include <linux/processor.h>
+#include <asm/qspinlock.h>
+
+void queued_spin_lock_slowpath(struct qspinlock *lock)
+{
+       while (!queued_spin_trylock(lock))
+               cpu_relax();
+}
+EXPORT_SYMBOL(queued_spin_lock_slowpath);
+
+#ifdef CONFIG_PARAVIRT_SPINLOCKS
+void pv_spinlocks_init(void)
+{
+}
+#endif