ARCv2: spinlock/rwlock/atomics: Delayed retry of failed SCOND with exponential backoff
authorVineet Gupta <vgupta@synopsys.com>
Tue, 14 Jul 2015 14:20:18 +0000 (19:50 +0530)
committerVineet Gupta <vgupta@synopsys.com>
Tue, 4 Aug 2015 03:56:34 +0000 (09:26 +0530)
This is to workaround the llock/scond livelock

HS38x4 could get into a LLOCK/SCOND livelock in case of multiple overlapping
coherency transactions in the SCU. The exclusive line state keeps rotating
among contenting cores leading to a never ending cycle. So break the cycle
by deferring the retry of failed exclusive access (SCOND). The actual delay
needed is function of number of contending cores as well as the unrelated
coherency traffic from other cores. To keep the code simple, start off with
small delay of 1 which would suffice most cases and in case of contention
double the delay. Eventually the delay is sufficient such that the coherency
pipeline is drained, thus a subsequent exclusive access would succeed.

Link: http://lkml.kernel.org/r/1438612568-28265-1-git-send-email-vgupta@synopsys.com
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
arch/arc/Kconfig
arch/arc/include/asm/atomic.h
arch/arc/include/asm/spinlock.h
arch/arc/kernel/setup.c

index a5fccdf..bd4670d 100644 (file)
@@ -365,6 +365,11 @@ config ARC_HAS_LLSC
        default y
        depends on !ARC_CANT_LLSC
 
+config ARC_STAR_9000923308
+       bool "Workaround for llock/scond livelock"
+       default y
+       depends on ISA_ARCV2 && SMP && ARC_HAS_LLSC
+
 config ARC_HAS_SWAPE
        bool "Insn: SWAPE (endian-swap)"
        default y
index 3dd36c1..629dfd0 100644 (file)
 
 #define atomic_set(v, i) (((v)->counter) = (i))
 
+#ifdef CONFIG_ARC_STAR_9000923308
+
+#define SCOND_FAIL_RETRY_VAR_DEF                                               \
+       unsigned int delay = 1, tmp;                                            \
+
+#define SCOND_FAIL_RETRY_ASM                                                   \
+       "       bz      4f                      \n"                             \
+       "   ; --- scond fail delay ---          \n"                             \
+       "       mov     %[tmp], %[delay]        \n"     /* tmp = delay */       \
+       "2:     brne.d  %[tmp], 0, 2b           \n"     /* while (tmp != 0) */  \
+       "       sub     %[tmp], %[tmp], 1       \n"     /* tmp-- */             \
+       "       asl.f   %[delay], %[delay], 1   \n"     /* delay *= 2 */        \
+       "       mov.z   %[delay], 1             \n"     /* handle overflow */   \
+       "       b       1b                      \n"     /* start over */        \
+       "4: ; --- success ---                   \n"                             \
+
+#define SCOND_FAIL_RETRY_VARS                                                  \
+         ,[delay] "+&r" (delay),[tmp] "=&r"    (tmp)                           \
+
+#else  /* !CONFIG_ARC_STAR_9000923308 */
+
+#define SCOND_FAIL_RETRY_VAR_DEF
+
+#define SCOND_FAIL_RETRY_ASM                                                   \
+       "       bnz     1b                      \n"                             \
+
+#define SCOND_FAIL_RETRY_VARS
+
+#endif
+
 #define ATOMIC_OP(op, c_op, asm_op)                                    \
 static inline void atomic_##op(int i, atomic_t *v)                     \
 {                                                                      \
-       unsigned int val;                                               \
+       unsigned int val;                                               \
+       SCOND_FAIL_RETRY_VAR_DEF                                        \
                                                                        \
        __asm__ __volatile__(                                           \
        "1:     llock   %[val], [%[ctr]]                \n"             \
        "       " #asm_op " %[val], %[val], %[i]        \n"             \
        "       scond   %[val], [%[ctr]]                \n"             \
-       "       bnz     1b                              \n"             \
+       "                                               \n"             \
+       SCOND_FAIL_RETRY_ASM                                            \
+                                                                       \
        : [val] "=&r"   (val) /* Early clobber to prevent reg reuse */  \
+         SCOND_FAIL_RETRY_VARS                                         \
        : [ctr] "r"     (&v->counter), /* Not "m": llock only supports reg direct addr mode */  \
          [i]   "ir"    (i)                                             \
        : "cc");                                                        \
@@ -42,7 +76,8 @@ static inline void atomic_##op(int i, atomic_t *v)                    \
 #define ATOMIC_OP_RETURN(op, c_op, asm_op)                             \
 static inline int atomic_##op##_return(int i, atomic_t *v)             \
 {                                                                      \
-       unsigned int val;                                               \
+       unsigned int val;                                               \
+       SCOND_FAIL_RETRY_VAR_DEF                                        \
                                                                        \
        /*                                                              \
         * Explicit full memory barrier needed before/after as          \
@@ -54,8 +89,11 @@ static inline int atomic_##op##_return(int i, atomic_t *v)           \
        "1:     llock   %[val], [%[ctr]]                \n"             \
        "       " #asm_op " %[val], %[val], %[i]        \n"             \
        "       scond   %[val], [%[ctr]]                \n"             \
-       "       bnz     1b                              \n"             \
+       "                                               \n"             \
+       SCOND_FAIL_RETRY_ASM                                            \
+                                                                       \
        : [val] "=&r"   (val)                                           \
+         SCOND_FAIL_RETRY_VARS                                         \
        : [ctr] "r"     (&v->counter),                                  \
          [i]   "ir"    (i)                                             \
        : "cc");                                                        \
@@ -142,6 +180,9 @@ ATOMIC_OP(and, &=, and)
 #undef ATOMIC_OPS
 #undef ATOMIC_OP_RETURN
 #undef ATOMIC_OP
+#undef SCOND_FAIL_RETRY_VAR_DEF
+#undef SCOND_FAIL_RETRY_ASM
+#undef SCOND_FAIL_RETRY_VARS
 
 /**
  * __atomic_add_unless - add unless the number is a given value
index 9fd5a02..2a84525 100644 (file)
 
 #ifdef CONFIG_ARC_HAS_LLSC
 
+/*
+ * A normal LLOCK/SCOND based system, w/o need for livelock workaround
+ */
+#ifndef CONFIG_ARC_STAR_9000923308
+
 static inline void arch_spin_lock(arch_spinlock_t *lock)
 {
        unsigned int val;
@@ -233,6 +238,294 @@ static inline void arch_write_unlock(arch_rwlock_t *rw)
        smp_mb();
 }
 
+#else  /* CONFIG_ARC_STAR_9000923308 */
+
+/*
+ * HS38x4 could get into a LLOCK/SCOND livelock in case of multiple overlapping
+ * coherency transactions in the SCU. The exclusive line state keeps rotating
+ * among contenting cores leading to a never ending cycle. So break the cycle
+ * by deferring the retry of failed exclusive access (SCOND). The actual delay
+ * needed is function of number of contending cores as well as the unrelated
+ * coherency traffic from other cores. To keep the code simple, start off with
+ * small delay of 1 which would suffice most cases and in case of contention
+ * double the delay. Eventually the delay is sufficient such that the coherency
+ * pipeline is drained, thus a subsequent exclusive access would succeed.
+ */
+
+#define SCOND_FAIL_RETRY_VAR_DEF                                               \
+       unsigned int delay, tmp;                                                \
+
+#define SCOND_FAIL_RETRY_ASM                                                   \
+       "   ; --- scond fail delay ---          \n"                             \
+       "       mov     %[tmp], %[delay]        \n"     /* tmp = delay */       \
+       "2:     brne.d  %[tmp], 0, 2b           \n"     /* while (tmp != 0) */  \
+       "       sub     %[tmp], %[tmp], 1       \n"     /* tmp-- */             \
+       "       asl.f   %[delay], %[delay], 1   \n"     /* delay *= 2 */        \
+       "       mov.z   %[delay], 1             \n"     /* handle overflow */   \
+       "       b       1b                      \n"     /* start over */        \
+       "                                       \n"                             \
+       "4: ; --- done ---                      \n"                             \
+
+#define SCOND_FAIL_RETRY_VARS                                                  \
+         ,[delay] "=&r" (delay), [tmp] "=&r"   (tmp)                           \
+
+static inline void arch_spin_lock(arch_spinlock_t *lock)
+{
+       unsigned int val;
+       SCOND_FAIL_RETRY_VAR_DEF;
+
+       smp_mb();
+
+       __asm__ __volatile__(
+       "0:     mov     %[delay], 1             \n"
+       "1:     llock   %[val], [%[slock]]      \n"
+       "       breq    %[val], %[LOCKED], 1b   \n"     /* spin while LOCKED */
+       "       scond   %[LOCKED], [%[slock]]   \n"     /* acquire */
+       "       bz      4f                      \n"     /* done */
+       "                                       \n"
+       SCOND_FAIL_RETRY_ASM
+
+       : [val]         "=&r"   (val)
+         SCOND_FAIL_RETRY_VARS
+       : [slock]       "r"     (&(lock->slock)),
+         [LOCKED]      "r"     (__ARCH_SPIN_LOCK_LOCKED__)
+       : "memory", "cc");
+
+       smp_mb();
+}
+
+/* 1 - lock taken successfully */
+static inline int arch_spin_trylock(arch_spinlock_t *lock)
+{
+       unsigned int val, got_it = 0;
+       SCOND_FAIL_RETRY_VAR_DEF;
+
+       smp_mb();
+
+       __asm__ __volatile__(
+       "0:     mov     %[delay], 1             \n"
+       "1:     llock   %[val], [%[slock]]      \n"
+       "       breq    %[val], %[LOCKED], 4f   \n"     /* already LOCKED, just bail */
+       "       scond   %[LOCKED], [%[slock]]   \n"     /* acquire */
+       "       bz.d    4f                      \n"
+       "       mov.z   %[got_it], 1            \n"     /* got it */
+       "                                       \n"
+       SCOND_FAIL_RETRY_ASM
+
+       : [val]         "=&r"   (val),
+         [got_it]      "+&r"   (got_it)
+         SCOND_FAIL_RETRY_VARS
+       : [slock]       "r"     (&(lock->slock)),
+         [LOCKED]      "r"     (__ARCH_SPIN_LOCK_LOCKED__)
+       : "memory", "cc");
+
+       smp_mb();
+
+       return got_it;
+}
+
+static inline void arch_spin_unlock(arch_spinlock_t *lock)
+{
+       smp_mb();
+
+       lock->slock = __ARCH_SPIN_LOCK_UNLOCKED__;
+
+       smp_mb();
+}
+
+/*
+ * Read-write spinlocks, allowing multiple readers but only one writer.
+ * Unfair locking as Writers could be starved indefinitely by Reader(s)
+ */
+
+static inline void arch_read_lock(arch_rwlock_t *rw)
+{
+       unsigned int val;
+       SCOND_FAIL_RETRY_VAR_DEF;
+
+       smp_mb();
+
+       /*
+        * zero means writer holds the lock exclusively, deny Reader.
+        * Otherwise grant lock to first/subseq reader
+        *
+        *      if (rw->counter > 0) {
+        *              rw->counter--;
+        *              ret = 1;
+        *      }
+        */
+
+       __asm__ __volatile__(
+       "0:     mov     %[delay], 1             \n"
+       "1:     llock   %[val], [%[rwlock]]     \n"
+       "       brls    %[val], %[WR_LOCKED], 1b\n"     /* <= 0: spin while write locked */
+       "       sub     %[val], %[val], 1       \n"     /* reader lock */
+       "       scond   %[val], [%[rwlock]]     \n"
+       "       bz      4f                      \n"     /* done */
+       "                                       \n"
+       SCOND_FAIL_RETRY_ASM
+
+       : [val]         "=&r"   (val)
+         SCOND_FAIL_RETRY_VARS
+       : [rwlock]      "r"     (&(rw->counter)),
+         [WR_LOCKED]   "ir"    (0)
+       : "memory", "cc");
+
+       smp_mb();
+}
+
+/* 1 - lock taken successfully */
+static inline int arch_read_trylock(arch_rwlock_t *rw)
+{
+       unsigned int val, got_it = 0;
+       SCOND_FAIL_RETRY_VAR_DEF;
+
+       smp_mb();
+
+       __asm__ __volatile__(
+       "0:     mov     %[delay], 1             \n"
+       "1:     llock   %[val], [%[rwlock]]     \n"
+       "       brls    %[val], %[WR_LOCKED], 4f\n"     /* <= 0: already write locked, bail */
+       "       sub     %[val], %[val], 1       \n"     /* counter-- */
+       "       scond   %[val], [%[rwlock]]     \n"
+       "       bz.d    4f                      \n"
+       "       mov.z   %[got_it], 1            \n"     /* got it */
+       "                                       \n"
+       SCOND_FAIL_RETRY_ASM
+
+       : [val]         "=&r"   (val),
+         [got_it]      "+&r"   (got_it)
+         SCOND_FAIL_RETRY_VARS
+       : [rwlock]      "r"     (&(rw->counter)),
+         [WR_LOCKED]   "ir"    (0)
+       : "memory", "cc");
+
+       smp_mb();
+
+       return got_it;
+}
+
+static inline void arch_write_lock(arch_rwlock_t *rw)
+{
+       unsigned int val;
+       SCOND_FAIL_RETRY_VAR_DEF;
+
+       smp_mb();
+
+       /*
+        * If reader(s) hold lock (lock < __ARCH_RW_LOCK_UNLOCKED__),
+        * deny writer. Otherwise if unlocked grant to writer
+        * Hence the claim that Linux rwlocks are unfair to writers.
+        * (can be starved for an indefinite time by readers).
+        *
+        *      if (rw->counter == __ARCH_RW_LOCK_UNLOCKED__) {
+        *              rw->counter = 0;
+        *              ret = 1;
+        *      }
+        */
+
+       __asm__ __volatile__(
+       "0:     mov     %[delay], 1             \n"
+       "1:     llock   %[val], [%[rwlock]]     \n"
+       "       brne    %[val], %[UNLOCKED], 1b \n"     /* while !UNLOCKED spin */
+       "       mov     %[val], %[WR_LOCKED]    \n"
+       "       scond   %[val], [%[rwlock]]     \n"
+       "       bz      4f                      \n"
+       "                                       \n"
+       SCOND_FAIL_RETRY_ASM
+
+       : [val]         "=&r"   (val)
+         SCOND_FAIL_RETRY_VARS
+       : [rwlock]      "r"     (&(rw->counter)),
+         [UNLOCKED]    "ir"    (__ARCH_RW_LOCK_UNLOCKED__),
+         [WR_LOCKED]   "ir"    (0)
+       : "memory", "cc");
+
+       smp_mb();
+}
+
+/* 1 - lock taken successfully */
+static inline int arch_write_trylock(arch_rwlock_t *rw)
+{
+       unsigned int val, got_it = 0;
+       SCOND_FAIL_RETRY_VAR_DEF;
+
+       smp_mb();
+
+       __asm__ __volatile__(
+       "0:     mov     %[delay], 1             \n"
+       "1:     llock   %[val], [%[rwlock]]     \n"
+       "       brne    %[val], %[UNLOCKED], 4f \n"     /* !UNLOCKED, bail */
+       "       mov     %[val], %[WR_LOCKED]    \n"
+       "       scond   %[val], [%[rwlock]]     \n"
+       "       bz.d    4f                      \n"
+       "       mov.z   %[got_it], 1            \n"     /* got it */
+       "                                       \n"
+       SCOND_FAIL_RETRY_ASM
+
+       : [val]         "=&r"   (val),
+         [got_it]      "+&r"   (got_it)
+         SCOND_FAIL_RETRY_VARS
+       : [rwlock]      "r"     (&(rw->counter)),
+         [UNLOCKED]    "ir"    (__ARCH_RW_LOCK_UNLOCKED__),
+         [WR_LOCKED]   "ir"    (0)
+       : "memory", "cc");
+
+       smp_mb();
+
+       return got_it;
+}
+
+static inline void arch_read_unlock(arch_rwlock_t *rw)
+{
+       unsigned int val;
+
+       smp_mb();
+
+       /*
+        * rw->counter++;
+        */
+       __asm__ __volatile__(
+       "1:     llock   %[val], [%[rwlock]]     \n"
+       "       add     %[val], %[val], 1       \n"
+       "       scond   %[val], [%[rwlock]]     \n"
+       "       bnz     1b                      \n"
+       "                                       \n"
+       : [val]         "=&r"   (val)
+       : [rwlock]      "r"     (&(rw->counter))
+       : "memory", "cc");
+
+       smp_mb();
+}
+
+static inline void arch_write_unlock(arch_rwlock_t *rw)
+{
+       unsigned int val;
+
+       smp_mb();
+
+       /*
+        * rw->counter = __ARCH_RW_LOCK_UNLOCKED__;
+        */
+       __asm__ __volatile__(
+       "1:     llock   %[val], [%[rwlock]]     \n"
+       "       scond   %[UNLOCKED], [%[rwlock]]\n"
+       "       bnz     1b                      \n"
+       "                                       \n"
+       : [val]         "=&r"   (val)
+       : [rwlock]      "r"     (&(rw->counter)),
+         [UNLOCKED]    "r"     (__ARCH_RW_LOCK_UNLOCKED__)
+       : "memory", "cc");
+
+       smp_mb();
+}
+
+#undef SCOND_FAIL_RETRY_VAR_DEF
+#undef SCOND_FAIL_RETRY_ASM
+#undef SCOND_FAIL_RETRY_VARS
+
+#endif /* CONFIG_ARC_STAR_9000923308 */
+
 #else  /* !CONFIG_ARC_HAS_LLSC */
 
 static inline void arch_spin_lock(arch_spinlock_t *lock)
index f2f771b..cabde9d 100644 (file)
@@ -336,6 +336,10 @@ static void arc_chk_core_config(void)
                pr_warn("CONFIG_ARC_FPU_SAVE_RESTORE needed for working apps\n");
        else if (!cpu->extn.fpu_dp && fpu_enabled)
                panic("FPU non-existent, disable CONFIG_ARC_FPU_SAVE_RESTORE\n");
+
+       if (is_isa_arcv2() && IS_ENABLED(CONFIG_SMP) && cpu->isa.atomic &&
+           !IS_ENABLED(CONFIG_ARC_STAR_9000923308))
+               panic("llock/scond livelock workaround missing\n");
 }
 
 /*