From 82caf7aba107dbc0e70c330786bed9961a098ab0 Mon Sep 17 00:00:00 2001 From: Alexander Gordeev Date: Mon, 3 Apr 2023 08:44:39 +0200 Subject: [PATCH] s390/kdump: rework invocation of DAT-off code Calling kdump kernel is a two-step process that involves invocation of the purgatory code: first time - to verify the new kernel checksum and second time - to call the new kernel itself. The purgatory code operates on real addresses and does not expect any memory protection. Therefore, before the purgatory code is entered the DAT mode is always turned off. However, it is only restored upon return from the new kernel checksum verification. In case the purgatory was called to start the new kernel and failed the control is returned to the old kernel, but the DAT mode continues staying off. The new kernel start failure is unlikely and leads to the disabled wait state anyway. Still that poses a risk, since the kernel code in general is not DAT-off safe and even calling the disabled_wait() function might crash. Introduce call_nodat() macro that allows entering DAT-off mode, calling an arbitrary function and restoring DAT mode back on. Switch all invocations of DAT-off code to that macro and avoid the above described scenario altogether. Name the call_nodat() macro in small letters after the already existing call_on_stack() and put it to the same header file. Signed-off-by: Alexander Gordeev Reviewed-by: Heiko Carstens [hca@linux.ibm.com: some small modifications to call_nodat() macro] Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/stacktrace.h | 47 ++++++++++++++++++++++++++++++++++++++ arch/s390/kernel/machine_kexec.c | 28 +++++++++++------------ 2 files changed, 61 insertions(+), 14 deletions(-) diff --git a/arch/s390/include/asm/stacktrace.h b/arch/s390/include/asm/stacktrace.h index 25e833c..1966422 100644 --- a/arch/s390/include/asm/stacktrace.h +++ b/arch/s390/include/asm/stacktrace.h @@ -189,4 +189,51 @@ static __always_inline unsigned long get_stack_pointer(struct task_struct *task, (rettype)r2; \ }) +/* + * Use call_nodat() to call a function with DAT disabled. + * Proper sign and zero extension of function arguments is done. + * Usage: + * + * rc = call_nodat(nr, rettype, fn, t1, a1, t2, a2, ...) + * + * - nr specifies the number of function arguments of fn. + * - fn is the function to be called, where fn is a physical address. + * - rettype is the return type of fn. + * - t1, a1, ... are pairs, where t1 must match the type of the first + * argument of fn, t2 the second, etc. a1 is the corresponding + * first function argument (not name), etc. + * + * fn() is called with standard C function call ABI, with the exception + * that no useful stackframe or stackpointer is passed via register 15. + * Therefore the called function must not use r15 to access the stack. + */ +#define call_nodat(nr, rettype, fn, ...) \ +({ \ + rettype (*__fn)(CALL_PARM_##nr(__VA_ARGS__)) = (fn); \ + psw_t psw_enter, psw_leave; \ + CALL_LARGS_##nr(__VA_ARGS__); \ + CALL_REGS_##nr; \ + \ + CALL_TYPECHECK_##nr(__VA_ARGS__); \ + psw_enter.mask = PSW_KERNEL_BITS & ~PSW_MASK_DAT; \ + psw_enter.addr = (unsigned long)__fn; \ + asm volatile( \ + " epsw 0,1\n" \ + " risbg 1,0,0,31,32\n" \ + " larl 7,1f\n" \ + " stg 1,%[psw_leave]\n" \ + " stg 7,8+%[psw_leave]\n" \ + " la 7,%[psw_leave]\n" \ + " lra 7,0(7)\n" \ + " larl 1,0f\n" \ + " lra 14,0(1)\n" \ + " lpswe %[psw_enter]\n" \ + "0: lpswe 0(7)\n" \ + "1:\n" \ + : CALL_FMT_##nr, [psw_leave] "=Q" (psw_leave) \ + : [psw_enter] "Q" (psw_enter) \ + : "7", CALL_CLOBBER_##nr); \ + (rettype)r2; \ +}) + #endif /* _ASM_S390_STACKTRACE_H */ diff --git a/arch/s390/kernel/machine_kexec.c b/arch/s390/kernel/machine_kexec.c index fb88767..f5d8abf 100644 --- a/arch/s390/kernel/machine_kexec.c +++ b/arch/s390/kernel/machine_kexec.c @@ -30,6 +30,7 @@ #include typedef void (*relocate_kernel_t)(unsigned long, unsigned long, unsigned long); +typedef int (*purgatory_t)(int); extern const unsigned char relocate_kernel[]; extern const unsigned long long relocate_kernel_len; @@ -40,11 +41,14 @@ extern const unsigned long long relocate_kernel_len; * Reset the system, copy boot CPU registers to absolute zero, * and jump to the kdump image */ -static void __do_machine_kdump(void *image) +static void __do_machine_kdump(void *data) { - int (*start_kdump)(int); + struct kimage *image = data; + purgatory_t purgatory; unsigned long prefix; + purgatory = (purgatory_t)image->start; + /* store_status() saved the prefix register to lowcore */ prefix = (unsigned long) S390_lowcore.prefixreg_save_area; @@ -59,11 +63,9 @@ static void __do_machine_kdump(void *image) memcpy(absolute_pointer(__LC_FPREGS_SAVE_AREA), phys_to_virt(prefix + __LC_FPREGS_SAVE_AREA), 512); - __load_psw_mask(PSW_MASK_BASE | PSW_DEFAULT_KEY | PSW_MASK_EA | PSW_MASK_BA); - start_kdump = (void *)((struct kimage *) image)->start; - start_kdump(1); + call_nodat(1, int, purgatory, int, 1); - /* Die if start_kdump returns */ + /* Die if kdump returns */ disabled_wait(); } @@ -112,13 +114,9 @@ static noinline void __machine_kdump(void *image) static int do_start_kdump(struct kimage *image) { - int (*start_kdump)(int) = (void *)image->start; - int rc; + purgatory_t purgatory = (purgatory_t)image->start; - __arch_local_irq_stnsm(0xfb); /* disable DAT */ - rc = start_kdump(0); - __arch_local_irq_stosm(0x04); /* enable DAT */ - return rc; + return call_nodat(1, int, purgatory, int, 0); } #endif /* CONFIG_CRASH_DUMP */ @@ -258,8 +256,10 @@ static void __do_machine_kexec(void *data) diag308_subcode |= DIAG308_FLAG_EI; s390_reset_system(); - __arch_local_irq_stnsm(0xfb); /* disable DAT - avoid no-execute */ - (*(relocate_kernel_t)data_mover)(entry, image->start, diag308_subcode); + call_nodat(3, void, (relocate_kernel_t)data_mover, + unsigned long, entry, + unsigned long, image->start, + unsigned long, diag308_subcode); /* Die if kexec returns */ disabled_wait(); -- 2.7.4