riscv: fix race when vmap stack overflow
authorJisheng Zhang <jszhang@kernel.org>
Sun, 30 Oct 2022 12:45:17 +0000 (20:45 +0800)
committerPalmer Dabbelt <palmer@rivosinc.com>
Wed, 30 Nov 2022 02:16:55 +0000 (18:16 -0800)
Currently, when detecting vmap stack overflow, riscv firstly switches
to the so called shadow stack, then use this shadow stack to call the
get_overflow_stack() to get the overflow stack. However, there's
a race here if two or more harts use the same shadow stack at the same
time.

To solve this race, we introduce spin_shadow_stack atomic var, which
will be swap between its own address and 0 in atomic way, when the
var is set, it means the shadow_stack is being used; when the var
is cleared, it means the shadow_stack isn't being used.

Fixes: 31da94c25aea ("riscv: add VMAP_STACK overflow detection")
Signed-off-by: Jisheng Zhang <jszhang@kernel.org>
Suggested-by: Guo Ren <guoren@kernel.org>
Reviewed-by: Guo Ren <guoren@kernel.org>
Link: https://lore.kernel.org/r/20221030124517.2370-1-jszhang@kernel.org
[Palmer: Add AQ to the swap, and also some comments.]
Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
arch/riscv/include/asm/asm.h
arch/riscv/kernel/entry.S
arch/riscv/kernel/traps.c

index 618d7c5..e15a1c9 100644 (file)
@@ -23,6 +23,7 @@
 #define REG_L          __REG_SEL(ld, lw)
 #define REG_S          __REG_SEL(sd, sw)
 #define REG_SC         __REG_SEL(sc.d, sc.w)
+#define REG_AMOSWAP_AQ __REG_SEL(amoswap.d.aq, amoswap.w.aq)
 #define REG_ASM                __REG_SEL(.dword, .word)
 #define SZREG          __REG_SEL(8, 4)
 #define LGREG          __REG_SEL(3, 2)
index 98f5026..5fdb6ba 100644 (file)
@@ -387,6 +387,19 @@ handle_syscall_trace_exit:
 
 #ifdef CONFIG_VMAP_STACK
 handle_kernel_stack_overflow:
+       /*
+        * Takes the psuedo-spinlock for the shadow stack, in case multiple
+        * harts are concurrently overflowing their kernel stacks.  We could
+        * store any value here, but since we're overflowing the kernel stack
+        * already we only have SP to use as a scratch register.  So we just
+        * swap in the address of the spinlock, as that's definately non-zero.
+        *
+        * Pairs with a store_release in handle_bad_stack().
+        */
+1:     la sp, spin_shadow_stack
+       REG_AMOSWAP_AQ sp, sp, (sp)
+       bnez sp, 1b
+
        la sp, shadow_stack
        addi sp, sp, SHADOW_OVERFLOW_STACK_SIZE
 
index bb6a450..be54cce 100644 (file)
@@ -213,11 +213,29 @@ asmlinkage unsigned long get_overflow_stack(void)
                OVERFLOW_STACK_SIZE;
 }
 
+/*
+ * A pseudo spinlock to protect the shadow stack from being used by multiple
+ * harts concurrently.  This isn't a real spinlock because the lock side must
+ * be taken without a valid stack and only a single register, it's only taken
+ * while in the process of panicing anyway so the performance and error
+ * checking a proper spinlock gives us doesn't matter.
+ */
+unsigned long spin_shadow_stack;
+
 asmlinkage void handle_bad_stack(struct pt_regs *regs)
 {
        unsigned long tsk_stk = (unsigned long)current->stack;
        unsigned long ovf_stk = (unsigned long)this_cpu_ptr(overflow_stack);
 
+       /*
+        * We're done with the shadow stack by this point, as we're on the
+        * overflow stack.  Tell any other concurrent overflowing harts that
+        * they can proceed with panicing by releasing the pseudo-spinlock.
+        *
+        * This pairs with an amoswap.aq in handle_kernel_stack_overflow.
+        */
+       smp_store_release(&spin_shadow_stack, 0);
+
        console_verbose();
 
        pr_emerg("Insufficient stack space to handle exception!\n");