arm64: futex: Bound number of LDXR/STXR loops in FUTEX_WAKE_OP
authorWill Deacon <will.deacon@arm.com>
Mon, 8 Apr 2019 13:23:17 +0000 (14:23 +0100)
committerWill Deacon <will.deacon@arm.com>
Fri, 26 Apr 2019 12:57:43 +0000 (13:57 +0100)
Our futex implementation makes use of LDXR/STXR loops to perform atomic
updates to user memory from atomic context. This can lead to latency
problems if we end up spinning around the LL/SC sequence at the expense
of doing something useful.

Rework our futex atomic operations so that we return -EAGAIN if we fail
to update the futex word after 128 attempts. The core futex code will
reschedule if necessary and we'll try again later.

Cc: <stable@kernel.org>
Fixes: 6170a97460db ("arm64: Atomic operations")
Signed-off-by: Will Deacon <will.deacon@arm.com>
arch/arm64/include/asm/futex.h

index e1d95f0..2d78ea6 100644 (file)
 
 #include <asm/errno.h>
 
+#define FUTEX_MAX_LOOPS        128 /* What's the largest number you can think of? */
+
 #define __futex_atomic_op(insn, ret, oldval, uaddr, tmp, oparg)                \
 do {                                                                   \
+       unsigned int loops = FUTEX_MAX_LOOPS;                           \
+                                                                       \
        uaccess_enable();                                               \
        asm volatile(                                                   \
 "      prfm    pstl1strm, %2\n"                                        \
 "1:    ldxr    %w1, %2\n"                                              \
        insn "\n"                                                       \
 "2:    stlxr   %w0, %w3, %2\n"                                         \
-"      cbnz    %w0, 1b\n"                                              \
-"      dmb     ish\n"                                                  \
+"      cbz     %w0, 3f\n"                                              \
+"      sub     %w4, %w4, %w0\n"                                        \
+"      cbnz    %w4, 1b\n"                                              \
+"      mov     %w0, %w7\n"                                             \
 "3:\n"                                                                 \
+"      dmb     ish\n"                                                  \
 "      .pushsection .fixup,\"ax\"\n"                                   \
 "      .align  2\n"                                                    \
-"4:    mov     %w0, %w5\n"                                             \
+"4:    mov     %w0, %w6\n"                                             \
 "      b       3b\n"                                                   \
 "      .popsection\n"                                                  \
        _ASM_EXTABLE(1b, 4b)                                            \
        _ASM_EXTABLE(2b, 4b)                                            \
-       : "=&r" (ret), "=&r" (oldval), "+Q" (*uaddr), "=&r" (tmp)       \
-       : "r" (oparg), "Ir" (-EFAULT)                                   \
+       : "=&r" (ret), "=&r" (oldval), "+Q" (*uaddr), "=&r" (tmp),      \
+         "+r" (loops)                                                  \
+       : "r" (oparg), "Ir" (-EFAULT), "Ir" (-EAGAIN)                   \
        : "memory");                                                    \
        uaccess_disable();                                              \
 } while (0)
@@ -57,23 +65,23 @@ arch_futex_atomic_op_inuser(int op, int oparg, int *oval, u32 __user *_uaddr)
 
        switch (op) {
        case FUTEX_OP_SET:
-               __futex_atomic_op("mov  %w3, %w4",
+               __futex_atomic_op("mov  %w3, %w5",
                                  ret, oldval, uaddr, tmp, oparg);
                break;
        case FUTEX_OP_ADD:
-               __futex_atomic_op("add  %w3, %w1, %w4",
+               __futex_atomic_op("add  %w3, %w1, %w5",
                                  ret, oldval, uaddr, tmp, oparg);
                break;
        case FUTEX_OP_OR:
-               __futex_atomic_op("orr  %w3, %w1, %w4",
+               __futex_atomic_op("orr  %w3, %w1, %w5",
                                  ret, oldval, uaddr, tmp, oparg);
                break;
        case FUTEX_OP_ANDN:
-               __futex_atomic_op("and  %w3, %w1, %w4",
+               __futex_atomic_op("and  %w3, %w1, %w5",
                                  ret, oldval, uaddr, tmp, ~oparg);
                break;
        case FUTEX_OP_XOR:
-               __futex_atomic_op("eor  %w3, %w1, %w4",
+               __futex_atomic_op("eor  %w3, %w1, %w5",
                                  ret, oldval, uaddr, tmp, oparg);
                break;
        default:
@@ -93,6 +101,7 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *_uaddr,
                              u32 oldval, u32 newval)
 {
        int ret = 0;
+       unsigned int loops = FUTEX_MAX_LOOPS;
        u32 val, tmp;
        u32 __user *uaddr;
 
@@ -104,20 +113,24 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *_uaddr,
        asm volatile("// futex_atomic_cmpxchg_inatomic\n"
 "      prfm    pstl1strm, %2\n"
 "1:    ldxr    %w1, %2\n"
-"      sub     %w3, %w1, %w4\n"
-"      cbnz    %w3, 3f\n"
-"2:    stlxr   %w3, %w5, %2\n"
-"      cbnz    %w3, 1b\n"
-"      dmb     ish\n"
+"      sub     %w3, %w1, %w5\n"
+"      cbnz    %w3, 4f\n"
+"2:    stlxr   %w3, %w6, %2\n"
+"      cbz     %w3, 3f\n"
+"      sub     %w4, %w4, %w3\n"
+"      cbnz    %w4, 1b\n"
+"      mov     %w0, %w8\n"
 "3:\n"
+"      dmb     ish\n"
+"4:\n"
 "      .pushsection .fixup,\"ax\"\n"
-"4:    mov     %w0, %w6\n"
-"      b       3b\n"
+"5:    mov     %w0, %w7\n"
+"      b       4b\n"
 "      .popsection\n"
-       _ASM_EXTABLE(1b, 4b)
-       _ASM_EXTABLE(2b, 4b)
-       : "+r" (ret), "=&r" (val), "+Q" (*uaddr), "=&r" (tmp)
-       : "r" (oldval), "r" (newval), "Ir" (-EFAULT)
+       _ASM_EXTABLE(1b, 5b)
+       _ASM_EXTABLE(2b, 5b)
+       : "+r" (ret), "=&r" (val), "+Q" (*uaddr), "=&r" (tmp), "+r" (loops)
+       : "r" (oldval), "r" (newval), "Ir" (-EFAULT), "Ir" (-EAGAIN)
        : "memory");
        uaccess_disable();