s390/kdump: rework invocation of DAT-off code
authorAlexander Gordeev <agordeev@linux.ibm.com>
Mon, 3 Apr 2023 06:44:39 +0000 (08:44 +0200)
committerVasily Gorbik <gor@linux.ibm.com>
Wed, 19 Apr 2023 15:24:16 +0000 (17:24 +0200)
Calling kdump kernel is a two-step process that involves
invocation of the purgatory code: first time - to verify
the new kernel checksum and second time - to call the new
kernel itself.

The purgatory code operates on real addresses and does not
expect any memory protection. Therefore, before the purgatory
code is entered the DAT mode is always turned off. However,
it is only restored upon return from the new kernel checksum
verification. In case the purgatory was called to start the
new kernel and failed the control is returned to the old
kernel, but the DAT mode continues staying off.

The new kernel start failure is unlikely and leads to the
disabled wait state anyway. Still that poses a risk, since
the kernel code in general is not DAT-off safe and even
calling the disabled_wait() function might crash.

Introduce call_nodat() macro that allows entering DAT-off
mode, calling an arbitrary function and restoring DAT mode
back on. Switch all invocations of DAT-off code to that
macro and avoid the above described scenario altogether.

Name the call_nodat() macro in small letters after the
already existing call_on_stack() and put it to the same
header file.

Signed-off-by: Alexander Gordeev <agordeev@linux.ibm.com>
Reviewed-by: Heiko Carstens <hca@linux.ibm.com>
[hca@linux.ibm.com: some small modifications to call_nodat() macro]
Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
arch/s390/include/asm/stacktrace.h
arch/s390/kernel/machine_kexec.c

index 25e833c..1966422 100644 (file)
@@ -189,4 +189,51 @@ static __always_inline unsigned long get_stack_pointer(struct task_struct *task,
        (rettype)r2;                                                    \
 })
 
+/*
+ * Use call_nodat() to call a function with DAT disabled.
+ * Proper sign and zero extension of function arguments is done.
+ * Usage:
+ *
+ * rc = call_nodat(nr, rettype, fn, t1, a1, t2, a2, ...)
+ *
+ * - nr specifies the number of function arguments of fn.
+ * - fn is the function to be called, where fn is a physical address.
+ * - rettype is the return type of fn.
+ * - t1, a1, ... are pairs, where t1 must match the type of the first
+ *   argument of fn, t2 the second, etc. a1 is the corresponding
+ *   first function argument (not name), etc.
+ *
+ * fn() is called with standard C function call ABI, with the exception
+ * that no useful stackframe or stackpointer is passed via register 15.
+ * Therefore the called function must not use r15 to access the stack.
+ */
+#define call_nodat(nr, rettype, fn, ...)                               \
+({                                                                     \
+       rettype (*__fn)(CALL_PARM_##nr(__VA_ARGS__)) = (fn);            \
+       psw_t psw_enter, psw_leave;                                     \
+       CALL_LARGS_##nr(__VA_ARGS__);                                   \
+       CALL_REGS_##nr;                                                 \
+                                                                       \
+       CALL_TYPECHECK_##nr(__VA_ARGS__);                               \
+       psw_enter.mask = PSW_KERNEL_BITS & ~PSW_MASK_DAT;               \
+       psw_enter.addr = (unsigned long)__fn;                           \
+       asm volatile(                                                   \
+               "       epsw    0,1\n"                                  \
+               "       risbg   1,0,0,31,32\n"                          \
+               "       larl    7,1f\n"                                 \
+               "       stg     1,%[psw_leave]\n"                       \
+               "       stg     7,8+%[psw_leave]\n"                     \
+               "       la      7,%[psw_leave]\n"                       \
+               "       lra     7,0(7)\n"                               \
+               "       larl    1,0f\n"                                 \
+               "       lra     14,0(1)\n"                              \
+               "       lpswe   %[psw_enter]\n"                         \
+               "0:     lpswe   0(7)\n"                                 \
+               "1:\n"                                                  \
+               : CALL_FMT_##nr, [psw_leave] "=Q" (psw_leave)           \
+               : [psw_enter] "Q" (psw_enter)                           \
+               : "7", CALL_CLOBBER_##nr);                              \
+       (rettype)r2;                                                    \
+})
+
 #endif /* _ASM_S390_STACKTRACE_H */
index fb88767..f5d8abf 100644 (file)
@@ -30,6 +30,7 @@
 #include <asm/sclp.h>
 
 typedef void (*relocate_kernel_t)(unsigned long, unsigned long, unsigned long);
+typedef int (*purgatory_t)(int);
 
 extern const unsigned char relocate_kernel[];
 extern const unsigned long long relocate_kernel_len;
@@ -40,11 +41,14 @@ extern const unsigned long long relocate_kernel_len;
  * Reset the system, copy boot CPU registers to absolute zero,
  * and jump to the kdump image
  */
-static void __do_machine_kdump(void *image)
+static void __do_machine_kdump(void *data)
 {
-       int (*start_kdump)(int);
+       struct kimage *image = data;
+       purgatory_t purgatory;
        unsigned long prefix;
 
+       purgatory = (purgatory_t)image->start;
+
        /* store_status() saved the prefix register to lowcore */
        prefix = (unsigned long) S390_lowcore.prefixreg_save_area;
 
@@ -59,11 +63,9 @@ static void __do_machine_kdump(void *image)
        memcpy(absolute_pointer(__LC_FPREGS_SAVE_AREA),
               phys_to_virt(prefix + __LC_FPREGS_SAVE_AREA), 512);
 
-       __load_psw_mask(PSW_MASK_BASE | PSW_DEFAULT_KEY | PSW_MASK_EA | PSW_MASK_BA);
-       start_kdump = (void *)((struct kimage *) image)->start;
-       start_kdump(1);
+       call_nodat(1, int, purgatory, int, 1);
 
-       /* Die if start_kdump returns */
+       /* Die if kdump returns */
        disabled_wait();
 }
 
@@ -112,13 +114,9 @@ static noinline void __machine_kdump(void *image)
 
 static int do_start_kdump(struct kimage *image)
 {
-       int (*start_kdump)(int) = (void *)image->start;
-       int rc;
+       purgatory_t purgatory = (purgatory_t)image->start;
 
-       __arch_local_irq_stnsm(0xfb); /* disable DAT */
-       rc = start_kdump(0);
-       __arch_local_irq_stosm(0x04); /* enable DAT */
-       return rc;
+       return call_nodat(1, int, purgatory, int, 0);
 }
 
 #endif /* CONFIG_CRASH_DUMP */
@@ -258,8 +256,10 @@ static void __do_machine_kexec(void *data)
                diag308_subcode |= DIAG308_FLAG_EI;
        s390_reset_system();
 
-       __arch_local_irq_stnsm(0xfb); /* disable DAT - avoid no-execute */
-       (*(relocate_kernel_t)data_mover)(entry, image->start, diag308_subcode);
+       call_nodat(3, void, (relocate_kernel_t)data_mover,
+                  unsigned long, entry,
+                  unsigned long, image->start,
+                  unsigned long, diag308_subcode);
 
        /* Die if kexec returns */
        disabled_wait();