csky: atomic: Add conditional atomic operations' optimization
authorGuo Ren <guoren@linux.alibaba.com>
Wed, 13 Apr 2022 07:27:52 +0000 (15:27 +0800)
committerGuo Ren <guoren@linux.alibaba.com>
Mon, 25 Apr 2022 05:51:43 +0000 (13:51 +0800)
Add conditional atomic operations' optimization:
 - arch_atomic_fetch_add_unless
 - arch_atomic_inc_unless_negative
 - arch_atomic_dec_unless_positive
 - arch_atomic_dec_if_positive

Comments by Boqun:

FWIW, you probably need to make sure that a barrier instruction inside
an lr/sc loop is a good thing. IIUC, the execution time of a barrier
instruction is determined by the status of store buffers and invalidate
queues (and probably other stuffs), so it may increase the execution
time of the lr/sc loop, and make it unlikely to succeed. But this really
depends on how the arch executes these instructions.

Signed-off-by: Guo Ren <guoren@linux.alibaba.com>
Signed-off-by: Guo Ren <guoren@kernel.org>
Cc: Boqun Feng <boqun.feng@gmail.com>
arch/csky/include/asm/atomic.h

index 56c9dc8..60406ef 100644 (file)
@@ -100,6 +100,101 @@ ATOMIC_OPS(xor)
 
 #undef ATOMIC_FETCH_OP
 
+static __always_inline int
+arch_atomic_fetch_add_unless(atomic_t *v, int a, int u)
+{
+       int prev, tmp;
+
+       __asm__ __volatile__ (
+               RELEASE_FENCE
+               "1:     ldex.w          %0, (%3)        \n"
+               "       cmpne           %0, %4          \n"
+               "       bf              2f              \n"
+               "       mov             %1, %0          \n"
+               "       add             %1, %2          \n"
+               "       stex.w          %1, (%3)        \n"
+               "       bez             %1, 1b          \n"
+               FULL_FENCE
+               "2:\n"
+               : "=&r" (prev), "=&r" (tmp)
+               : "r" (a), "r" (&v->counter), "r" (u)
+               : "memory");
+
+       return prev;
+}
+#define arch_atomic_fetch_add_unless arch_atomic_fetch_add_unless
+
+static __always_inline bool
+arch_atomic_inc_unless_negative(atomic_t *v)
+{
+       int rc, tmp;
+
+       __asm__ __volatile__ (
+               RELEASE_FENCE
+               "1:     ldex.w          %0, (%2)        \n"
+               "       movi            %1, 0           \n"
+               "       blz             %0, 2f          \n"
+               "       movi            %1, 1           \n"
+               "       addi            %0, 1           \n"
+               "       stex.w          %0, (%2)        \n"
+               "       bez             %0, 1b          \n"
+               FULL_FENCE
+               "2:\n"
+               : "=&r" (tmp), "=&r" (rc)
+               : "r" (&v->counter)
+               : "memory");
+
+       return tmp ? true : false;
+
+}
+#define arch_atomic_inc_unless_negative arch_atomic_inc_unless_negative
+
+static __always_inline bool
+arch_atomic_dec_unless_positive(atomic_t *v)
+{
+       int rc, tmp;
+
+       __asm__ __volatile__ (
+               RELEASE_FENCE
+               "1:     ldex.w          %0, (%2)        \n"
+               "       movi            %1, 0           \n"
+               "       bhz             %0, 2f          \n"
+               "       movi            %1, 1           \n"
+               "       subi            %0, 1           \n"
+               "       stex.w          %0, (%2)        \n"
+               "       bez             %0, 1b          \n"
+               FULL_FENCE
+               "2:\n"
+               : "=&r" (tmp), "=&r" (rc)
+               : "r" (&v->counter)
+               : "memory");
+
+       return tmp ? true : false;
+}
+#define arch_atomic_dec_unless_positive arch_atomic_dec_unless_positive
+
+static __always_inline int
+arch_atomic_dec_if_positive(atomic_t *v)
+{
+       int dec, tmp;
+
+       __asm__ __volatile__ (
+               RELEASE_FENCE
+               "1:     ldex.w          %0, (%2)        \n"
+               "       subi            %1, %0, 1       \n"
+               "       blz             %1, 2f          \n"
+               "       stex.w          %1, (%2)        \n"
+               "       bez             %1, 1b          \n"
+               FULL_FENCE
+               "2:\n"
+               : "=&r" (dec), "=&r" (tmp)
+               : "r" (&v->counter)
+               : "memory");
+
+       return dec - 1;
+}
+#define arch_atomic_dec_if_positive arch_atomic_dec_if_positive
+
 #define ATOMIC_OP()                                                    \
 static __always_inline                                                 \
 int arch_atomic_xchg_relaxed(atomic_t *v, int n)                       \