powerpc/qspinlock: add mcs queueing for contended waiters

author Nicholas Piggin <npiggin@gmail.com>

Sat, 26 Nov 2022 09:59:16 +0000 (19:59 +1000)

committer Michael Ellerman <mpe@ellerman.id.au>

Fri, 2 Dec 2022 06:48:49 +0000 (17:48 +1100)
author Nicholas Piggin <npiggin@gmail.com>
Sat, 26 Nov 2022 09:59:16 +0000 (19:59 +1000)
committer Michael Ellerman <mpe@ellerman.id.au>
Fri, 2 Dec 2022 06:48:49 +0000 (17:48 +1100)
diff --git a/arch/powerpc/include/asm/qspinlock.h b/arch/powerpc/include/asm/qspinlock.h

index 5e62573..6946dba 100644 (file)
--- a/arch/powerpc/include/asm/qspinlock.h
+++ b/arch/powerpc/include/asm/qspinlock.h
@@ -19,12 +19,12 @@ static __always_inline int queued_spin_value_unlocked(struct qspinlock lock)
  
  static __always_inline int queued_spin_is_contended(struct qspinlock *lock)
  {
-       return 0;
+       return !!(atomic_read(&lock->val) & _Q_TAIL_CPU_MASK);
  }
  
  static __always_inline int queued_spin_trylock(struct qspinlock *lock)
  {
-       return atomic_cmpxchg_acquire(&lock->val, 0, 1) == 0;
+       return atomic_cmpxchg_acquire(&lock->val, 0, _Q_LOCKED_VAL) == 0;
  }
  
  void queued_spin_lock_slowpath(struct qspinlock *lock);
@@ -37,7 +37,11 @@ static __always_inline void queued_spin_lock(struct qspinlock *lock)
  
  static inline void queued_spin_unlock(struct qspinlock *lock)
  {
-       atomic_set_release(&lock->val, 0);
+       for (;;) {
+               int val = atomic_read(&lock->val);
+               if (atomic_cmpxchg_release(&lock->val, val, val & ~_Q_LOCKED_VAL) == val)
+                       return;
+       }
  }
  
  #define arch_spin_is_locked(l)         queued_spin_is_locked(l)
diff --git a/arch/powerpc/include/asm/qspinlock_types.h b/arch/powerpc/include/asm/qspinlock_types.h

index 59606bc..20a36df 100644 (file)
--- a/arch/powerpc/include/asm/qspinlock_types.h
+++ b/arch/powerpc/include/asm/qspinlock_types.h
@@ -10,4 +10,27 @@ typedef struct qspinlock {
  
  #define        __ARCH_SPIN_LOCK_UNLOCKED       { .val = ATOMIC_INIT(0) }
  
+/*
+ * Bitfields in the lock word:
+ *
+ *     0: locked bit
+ *  1-16: unused bits
+ * 17-31: tail cpu (+1)
+ */
+#define        _Q_SET_MASK(type)       (((1U << _Q_ ## type ## _BITS) - 1)\
+                                     << _Q_ ## type ## _OFFSET)
+/* 0x00000001 */
+#define _Q_LOCKED_OFFSET       0
+#define _Q_LOCKED_BITS         1
+#define _Q_LOCKED_VAL          (1U << _Q_LOCKED_OFFSET)
+
+/* 0xfffe0000 */
+#define _Q_TAIL_CPU_OFFSET     17
+#define _Q_TAIL_CPU_BITS       15
+#define _Q_TAIL_CPU_MASK       _Q_SET_MASK(TAIL_CPU)
+
+#if CONFIG_NR_CPUS >= (1U << _Q_TAIL_CPU_BITS)
+#error "qspinlock does not support such large CONFIG_NR_CPUS"
+#endif
+
  #endif /* _ASM_POWERPC_QSPINLOCK_TYPES_H */
diff --git a/arch/powerpc/lib/qspinlock.c b/arch/powerpc/lib/qspinlock.c

index 1c669b5..8650462 100644 (file)
--- a/arch/powerpc/lib/qspinlock.c
+++ b/arch/powerpc/lib/qspinlock.c
@@ -1,12 +1,193 @@
  // SPDX-License-Identifier: GPL-2.0-or-later
+#include <linux/atomic.h>
+#include <linux/bug.h>
+#include <linux/compiler.h>
  #include <linux/export.h>
-#include <linux/processor.h>
+#include <linux/percpu.h>
+#include <linux/smp.h>
  #include <asm/qspinlock.h>
  
-void queued_spin_lock_slowpath(struct qspinlock *lock)
+#define MAX_NODES      4
+
+struct qnode {
+       struct qnode    *next;
+       struct qspinlock *lock;
+       u8              locked; /* 1 if lock acquired */
+};
+
+struct qnodes {
+       int             count;
+       struct qnode nodes[MAX_NODES];
+};
+
+static DEFINE_PER_CPU_ALIGNED(struct qnodes, qnodes);
+
+static inline int encode_tail_cpu(int cpu)
+{
+       return (cpu + 1) << _Q_TAIL_CPU_OFFSET;
+}
+
+static inline int decode_tail_cpu(int val)
+{
+       return (val >> _Q_TAIL_CPU_OFFSET) - 1;
+}
+
+/*
+ * Try to acquire the lock if it was not already locked. If the tail matches
+ * mytail then clear it, otherwise leave it unchnaged. Return previous value.
+ *
+ * This is used by the head of the queue to acquire the lock and clean up
+ * its tail if it was the last one queued.
+ */
+static __always_inline int set_locked_clean_tail(struct qspinlock *lock, int tail)
+{
+       int val = atomic_read(&lock->val);
+
+       BUG_ON(val & _Q_LOCKED_VAL);
+
+       /* If we're the last queued, must clean up the tail. */
+       if ((val & _Q_TAIL_CPU_MASK) == tail) {
+               if (atomic_cmpxchg_acquire(&lock->val, val, _Q_LOCKED_VAL) == val)
+                       return val;
+               /* Another waiter must have enqueued */
+               val = atomic_read(&lock->val);
+               BUG_ON(val & _Q_LOCKED_VAL);
+       }
+
+       /* We must be the owner, just set the lock bit and acquire */
+       atomic_or(_Q_LOCKED_VAL, &lock->val);
+       __atomic_acquire_fence();
+
+       return val;
+}
+
+/*
+ * Publish our tail, replacing previous tail. Return previous value.
+ *
+ * This provides a release barrier for publishing node, this pairs with the
+ * acquire barrier in get_tail_qnode() when the next CPU finds this tail
+ * value.
+ */
+static __always_inline int publish_tail_cpu(struct qspinlock *lock, int tail)
+{
+       for (;;) {
+               int val = atomic_read(&lock->val);
+               int newval = (val & ~_Q_TAIL_CPU_MASK) | tail;
+               int old;
+
+               old = atomic_cmpxchg_release(&lock->val, val, newval);
+               if (old == val)
+                       return old;
+       }
+}
+
+static struct qnode *get_tail_qnode(struct qspinlock *lock, int val)
+{
+       int cpu = decode_tail_cpu(val);
+       struct qnodes *qnodesp = per_cpu_ptr(&qnodes, cpu);
+       int idx;
+
+       /*
+        * After publishing the new tail and finding a previous tail in the
+        * previous val (which is the control dependency), this barrier
+        * orders the release barrier in publish_tail_cpu performed by the
+        * last CPU, with subsequently looking at its qnode structures
+        * after the barrier.
+        */
+       smp_acquire__after_ctrl_dep();
+
+       for (idx = 0; idx < MAX_NODES; idx++) {
+               struct qnode *qnode = &qnodesp->nodes[idx];
+               if (qnode->lock == lock)
+                       return qnode;
+       }
+
+       BUG();
+}
+
+static inline void queued_spin_lock_mcs_queue(struct qspinlock *lock)
  {
-       while (!queued_spin_trylock(lock))
+       struct qnodes *qnodesp;
+       struct qnode *next, *node;
+       int val, old, tail;
+       int idx;
+
+       BUILD_BUG_ON(CONFIG_NR_CPUS >= (1U << _Q_TAIL_CPU_BITS));
+
+       qnodesp = this_cpu_ptr(&qnodes);
+       if (unlikely(qnodesp->count >= MAX_NODES)) {
+               while (!queued_spin_trylock(lock))
+                       cpu_relax();
+               return;
+       }
+
+       idx = qnodesp->count++;
+       /*
+        * Ensure that we increment the head node->count before initialising
+        * the actual node. If the compiler is kind enough to reorder these
+        * stores, then an IRQ could overwrite our assignments.
+        */
+       barrier();
+       node = &qnodesp->nodes[idx];
+       node->next = NULL;
+       node->lock = lock;
+       node->locked = 0;
+
+       tail = encode_tail_cpu(smp_processor_id());
+
+       old = publish_tail_cpu(lock, tail);
+
+       /*
+        * If there was a previous node; link it and wait until reaching the
+        * head of the waitqueue.
+        */
+       if (old & _Q_TAIL_CPU_MASK) {
+               struct qnode *prev = get_tail_qnode(lock, old);
+
+               /* Link @node into the waitqueue. */
+               WRITE_ONCE(prev->next, node);
+
+               /* Wait for mcs node lock to be released */
+               while (!node->locked)
+                       cpu_relax();
+
+               smp_rmb(); /* acquire barrier for the mcs lock */
+       }
+
+       /* We're at the head of the waitqueue, wait for the lock. */
+       for (;;) {
+               val = atomic_read(&lock->val);
+               if (!(val & _Q_LOCKED_VAL))
+                       break;
+
+               cpu_relax();
+       }
+
+       /* If we're the last queued, must clean up the tail. */
+       old = set_locked_clean_tail(lock, tail);
+       if ((old & _Q_TAIL_CPU_MASK) == tail)
+               goto release; /* Another waiter must have enqueued */
+
+       /* There is a next, must wait for node->next != NULL (MCS protocol) */
+       while (!(next = READ_ONCE(node->next)))
                 cpu_relax();
+
+       /*
+        * Unlock the next mcs waiter node. Release barrier is not required
+        * here because the acquirer is only accessing the lock word, and
+        * the acquire barrier we took the lock with orders that update vs
+        * this store to locked. The corresponding barrier is the smp_rmb()
+        * acquire barrier for mcs lock, above.
+        */
+       WRITE_ONCE(next->locked, 1);
+
+release:
+       qnodesp->count--; /* release the node */
+}
+
+void queued_spin_lock_slowpath(struct qspinlock *lock)
+{
+       queued_spin_lock_mcs_queue(lock);
  }
  EXPORT_SYMBOL(queued_spin_lock_slowpath);
author	Nicholas Piggin <npiggin@gmail.com>
	Sat, 26 Nov 2022 09:59:16 +0000 (19:59 +1000)
committer	Michael Ellerman <mpe@ellerman.id.au>
	Fri, 2 Dec 2022 06:48:49 +0000 (17:48 +1100)
arch/powerpc/include/asm/qspinlock.h		patch \| blob \| history
arch/powerpc/include/asm/qspinlock_types.h		patch \| blob \| history
arch/powerpc/lib/qspinlock.c		patch \| blob \| history