From b3a73b7db2b6cb3b2e5bfda5518a0e92230ef673 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Sat, 26 Nov 2022 19:59:18 +1000 Subject: [PATCH] powerpc/qspinlock: convert atomic operations to assembly This uses more optimal ll/sc style access patterns (rather than cmpxchg), and also sets the EH=1 lock hint on those operations which acquire ownership of the lock. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221126095932.1234527-4-npiggin@gmail.com --- arch/powerpc/include/asm/qspinlock.h | 24 +++++++-- arch/powerpc/include/asm/qspinlock_types.h | 4 +- arch/powerpc/lib/qspinlock.c | 82 +++++++++++++++++------------- 3 files changed, 68 insertions(+), 42 deletions(-) diff --git a/arch/powerpc/include/asm/qspinlock.h b/arch/powerpc/include/asm/qspinlock.h index 713f662..c16e1f0 100644 --- a/arch/powerpc/include/asm/qspinlock.h +++ b/arch/powerpc/include/asm/qspinlock.h @@ -2,29 +2,43 @@ #ifndef _ASM_POWERPC_QSPINLOCK_H #define _ASM_POWERPC_QSPINLOCK_H -#include #include #include #include static __always_inline int queued_spin_is_locked(struct qspinlock *lock) { - return atomic_read(&lock->val); + return READ_ONCE(lock->val); } static __always_inline int queued_spin_value_unlocked(struct qspinlock lock) { - return !atomic_read(&lock.val); + return !lock.val; } static __always_inline int queued_spin_is_contended(struct qspinlock *lock) { - return !!(atomic_read(&lock->val) & _Q_TAIL_CPU_MASK); + return !!(READ_ONCE(lock->val) & _Q_TAIL_CPU_MASK); } static __always_inline int queued_spin_trylock(struct qspinlock *lock) { - return atomic_cmpxchg_acquire(&lock->val, 0, _Q_LOCKED_VAL) == 0; + u32 prev; + + asm volatile( +"1: lwarx %0,0,%1,%3 # queued_spin_trylock \n" +" cmpwi 0,%0,0 \n" +" bne- 2f \n" +" stwcx. %2,0,%1 \n" +" bne- 1b \n" +"\t" PPC_ACQUIRE_BARRIER " \n" +"2: \n" + : "=&r" (prev) + : "r" (&lock->val), "r" (_Q_LOCKED_VAL), + "i" (IS_ENABLED(CONFIG_PPC64)) + : "cr0", "memory"); + + return likely(prev == 0); } void queued_spin_lock_slowpath(struct qspinlock *lock); diff --git a/arch/powerpc/include/asm/qspinlock_types.h b/arch/powerpc/include/asm/qspinlock_types.h index fe87181..b9a5a52 100644 --- a/arch/powerpc/include/asm/qspinlock_types.h +++ b/arch/powerpc/include/asm/qspinlock_types.h @@ -7,7 +7,7 @@ typedef struct qspinlock { union { - atomic_t val; + u32 val; #ifdef __LITTLE_ENDIAN struct { @@ -23,7 +23,7 @@ typedef struct qspinlock { }; } arch_spinlock_t; -#define __ARCH_SPIN_LOCK_UNLOCKED { { .val = ATOMIC_INIT(0) } } +#define __ARCH_SPIN_LOCK_UNLOCKED { { .val = 0 } } /* * Bitfields in the lock word: diff --git a/arch/powerpc/lib/qspinlock.c b/arch/powerpc/lib/qspinlock.c index 8650462..645d9af 100644 --- a/arch/powerpc/lib/qspinlock.c +++ b/arch/powerpc/lib/qspinlock.c @@ -1,5 +1,4 @@ // SPDX-License-Identifier: GPL-2.0-or-later -#include #include #include #include @@ -22,12 +21,12 @@ struct qnodes { static DEFINE_PER_CPU_ALIGNED(struct qnodes, qnodes); -static inline int encode_tail_cpu(int cpu) +static inline u32 encode_tail_cpu(int cpu) { return (cpu + 1) << _Q_TAIL_CPU_OFFSET; } -static inline int decode_tail_cpu(int val) +static inline int decode_tail_cpu(u32 val) { return (val >> _Q_TAIL_CPU_OFFSET) - 1; } @@ -39,26 +38,34 @@ static inline int decode_tail_cpu(int val) * This is used by the head of the queue to acquire the lock and clean up * its tail if it was the last one queued. */ -static __always_inline int set_locked_clean_tail(struct qspinlock *lock, int tail) +static __always_inline u32 set_locked_clean_tail(struct qspinlock *lock, u32 tail) { - int val = atomic_read(&lock->val); - - BUG_ON(val & _Q_LOCKED_VAL); - - /* If we're the last queued, must clean up the tail. */ - if ((val & _Q_TAIL_CPU_MASK) == tail) { - if (atomic_cmpxchg_acquire(&lock->val, val, _Q_LOCKED_VAL) == val) - return val; - /* Another waiter must have enqueued */ - val = atomic_read(&lock->val); - BUG_ON(val & _Q_LOCKED_VAL); - } - - /* We must be the owner, just set the lock bit and acquire */ - atomic_or(_Q_LOCKED_VAL, &lock->val); - __atomic_acquire_fence(); - - return val; + u32 newval = _Q_LOCKED_VAL; + u32 prev, tmp; + + asm volatile( +"1: lwarx %0,0,%2,%6 # set_locked_clean_tail \n" + /* Test whether the lock tail == tail */ +" and %1,%0,%5 \n" +" cmpw 0,%1,%3 \n" + /* Merge the new locked value */ +" or %1,%1,%4 \n" +" bne 2f \n" + /* If the lock tail matched, then clear it, otherwise leave it. */ +" andc %1,%1,%5 \n" +"2: stwcx. %1,0,%2 \n" +" bne- 1b \n" +"\t" PPC_ACQUIRE_BARRIER " \n" +"3: \n" + : "=&r" (prev), "=&r" (tmp) + : "r" (&lock->val), "r"(tail), "r" (newval), + "r" (_Q_TAIL_CPU_MASK), + "i" (IS_ENABLED(CONFIG_PPC64)) + : "cr0", "memory"); + + BUG_ON(prev & _Q_LOCKED_VAL); + + return prev; } /* @@ -68,20 +75,25 @@ static __always_inline int set_locked_clean_tail(struct qspinlock *lock, int tai * acquire barrier in get_tail_qnode() when the next CPU finds this tail * value. */ -static __always_inline int publish_tail_cpu(struct qspinlock *lock, int tail) +static __always_inline u32 publish_tail_cpu(struct qspinlock *lock, u32 tail) { - for (;;) { - int val = atomic_read(&lock->val); - int newval = (val & ~_Q_TAIL_CPU_MASK) | tail; - int old; - - old = atomic_cmpxchg_release(&lock->val, val, newval); - if (old == val) - return old; - } + u32 prev, tmp; + + asm volatile( +"\t" PPC_RELEASE_BARRIER " \n" +"1: lwarx %0,0,%2 # publish_tail_cpu \n" +" andc %1,%0,%4 \n" +" or %1,%1,%3 \n" +" stwcx. %1,0,%2 \n" +" bne- 1b \n" + : "=&r" (prev), "=&r"(tmp) + : "r" (&lock->val), "r" (tail), "r"(_Q_TAIL_CPU_MASK) + : "cr0", "memory"); + + return prev; } -static struct qnode *get_tail_qnode(struct qspinlock *lock, int val) +static struct qnode *get_tail_qnode(struct qspinlock *lock, u32 val) { int cpu = decode_tail_cpu(val); struct qnodes *qnodesp = per_cpu_ptr(&qnodes, cpu); @@ -109,7 +121,7 @@ static inline void queued_spin_lock_mcs_queue(struct qspinlock *lock) { struct qnodes *qnodesp; struct qnode *next, *node; - int val, old, tail; + u32 val, old, tail; int idx; BUILD_BUG_ON(CONFIG_NR_CPUS >= (1U << _Q_TAIL_CPU_BITS)); @@ -156,7 +168,7 @@ static inline void queued_spin_lock_mcs_queue(struct qspinlock *lock) /* We're at the head of the waitqueue, wait for the lock. */ for (;;) { - val = atomic_read(&lock->val); + val = READ_ONCE(lock->val); if (!(val & _Q_LOCKED_VAL)) break; -- 2.7.4