s390: restore address space when returning to user space
authorHeiko Carstens <heiko.carstens@de.ibm.com>
Fri, 17 Feb 2017 07:13:28 +0000 (08:13 +0100)
committerMartin Schwidefsky <schwidefsky@de.ibm.com>
Thu, 23 Feb 2017 09:06:38 +0000 (10:06 +0100)
Unbalanced set_fs usages (e.g. early exit from a function and a
forgotten set_fs(USER_DS) call) may lead to a situation where the
secondary asce is the kernel space asce when returning to user
space. This would allow user space to modify kernel space at will.

This would only be possible with the above mentioned kernel bug,
however we can detect this and fix the secondary asce before returning
to user space.

Therefore a new TIF_ASCE_SECONDARY which is used within set_fs. When
returning to user space check if TIF_ASCE_SECONDARY is set, which
would indicate a bug. If it is set print a message to the console,
fixup the secondary asce, and then return to user space.

This is similar to what is being discussed for x86 and arm:
"[RFC] syscalls: Restore address limit after a syscall".

Reviewed-by: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
arch/s390/include/asm/processor.h
arch/s390/include/asm/uaccess.h
arch/s390/kernel/entry.S
arch/s390/kernel/entry.h
arch/s390/kernel/process.c

index 3c8fae0..c57c5c2 100644 (file)
 
 #define CIF_MCCK_PENDING       0       /* machine check handling is pending */
 #define CIF_ASCE_PRIMARY       1       /* primary asce needs fixup / uaccess */
-#define CIF_NOHZ_DELAY         2       /* delay HZ disable for a tick */
-#define CIF_FPU                        3       /* restore FPU registers */
-#define CIF_IGNORE_IRQ         4       /* ignore interrupt (for udelay) */
-#define CIF_ENABLED_WAIT       5       /* in enabled wait state */
+#define CIF_ASCE_SECONDARY     2       /* secondary asce needs fixup / uaccess */
+#define CIF_NOHZ_DELAY         3       /* delay HZ disable for a tick */
+#define CIF_FPU                        4       /* restore FPU registers */
+#define CIF_IGNORE_IRQ         5       /* ignore interrupt (for udelay) */
+#define CIF_ENABLED_WAIT       6       /* in enabled wait state */
 
 #define _CIF_MCCK_PENDING      _BITUL(CIF_MCCK_PENDING)
 #define _CIF_ASCE_PRIMARY      _BITUL(CIF_ASCE_PRIMARY)
+#define _CIF_ASCE_SECONDARY    _BITUL(CIF_ASCE_SECONDARY)
 #define _CIF_NOHZ_DELAY                _BITUL(CIF_NOHZ_DELAY)
 #define _CIF_FPU               _BITUL(CIF_FPU)
 #define _CIF_IGNORE_IRQ                _BITUL(CIF_IGNORE_IRQ)
@@ -200,10 +202,12 @@ struct stack_frame {
 struct task_struct;
 struct mm_struct;
 struct seq_file;
+struct pt_regs;
 
 typedef int (*dump_trace_func_t)(void *data, unsigned long address, int reliable);
 void dump_trace(dump_trace_func_t func, void *data,
                struct task_struct *task, unsigned long sp);
+void show_registers(struct pt_regs *regs);
 
 void show_cacheinfo(struct seq_file *m);
 
index b2988fc..136932f 100644 (file)
@@ -14,6 +14,7 @@
  */
 #include <linux/sched.h>
 #include <linux/errno.h>
+#include <asm/processor.h>
 #include <asm/ctl_reg.h>
 
 #define VERIFY_READ     0
 
 #define get_ds()        (KERNEL_DS)
 #define get_fs()        (current->thread.mm_segment)
-
-#define set_fs(x)                                                      \
-do {                                                                   \
-       unsigned long __pto;                                            \
-       current->thread.mm_segment = (x);                               \
-       __pto = current->thread.mm_segment.ar4 ?                        \
-               S390_lowcore.user_asce : S390_lowcore.kernel_asce;      \
-       __ctl_load(__pto, 7, 7);                                        \
-} while (0)
-
 #define segment_eq(a,b) ((a).ar4 == (b).ar4)
 
+static inline void set_fs(mm_segment_t fs)
+{
+       current->thread.mm_segment = fs;
+       if (segment_eq(fs, KERNEL_DS)) {
+               set_cpu_flag(CIF_ASCE_SECONDARY);
+               __ctl_load(S390_lowcore.kernel_asce, 7, 7);
+       } else {
+               clear_cpu_flag(CIF_ASCE_SECONDARY);
+               __ctl_load(S390_lowcore.user_asce, 7, 7);
+       }
+}
+
 static inline int __range_ok(unsigned long addr, unsigned long size)
 {
        return 1;
index ae7d1a2..dff2152 100644 (file)
@@ -50,7 +50,8 @@ _TIF_WORK     = (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED | \
                   _TIF_UPROBE)
 _TIF_TRACE     = (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | _TIF_SECCOMP | \
                   _TIF_SYSCALL_TRACEPOINT)
-_CIF_WORK      = (_CIF_MCCK_PENDING | _CIF_ASCE_PRIMARY | _CIF_FPU)
+_CIF_WORK      = (_CIF_MCCK_PENDING | _CIF_ASCE_PRIMARY | \
+                  _CIF_ASCE_SECONDARY | _CIF_FPU)
 _PIF_WORK      = (_PIF_PER_TRAP)
 
 #define BASED(name) name-cleanup_critical(%r13)
@@ -339,8 +340,8 @@ ENTRY(system_call)
        jo      .Lsysc_notify_resume
        TSTMSK  __LC_CPU_FLAGS,_CIF_FPU
        jo      .Lsysc_vxrs
-       TSTMSK  __LC_CPU_FLAGS,_CIF_ASCE_PRIMARY
-       jo      .Lsysc_asce_primary
+       TSTMSK  __LC_CPU_FLAGS,(_CIF_ASCE_PRIMARY|_CIF_ASCE_SECONDARY)
+       jnz     .Lsysc_asce
        j       .Lsysc_return           # beware of critical section cleanup
 
 #
@@ -358,12 +359,15 @@ ENTRY(system_call)
        jg      s390_handle_mcck        # TIF bit will be cleared by handler
 
 #
-# _CIF_ASCE_PRIMARY is set, load user space asce
+# _CIF_ASCE_PRIMARY and/or CIF_ASCE_SECONDARY set, load user space asce
 #
-.Lsysc_asce_primary:
+.Lsysc_asce:
        ni      __LC_CPU_FLAGS+7,255-_CIF_ASCE_PRIMARY
        lctlg   %c1,%c1,__LC_USER_ASCE          # load primary asce
-       j       .Lsysc_return
+       TSTMSK  __LC_CPU_FLAGS,_CIF_ASCE_SECONDARY
+       jz      .Lsysc_return
+       larl    %r14,.Lsysc_return
+       jg      set_fs_fixup
 
 #
 # CIF_FPU is set, restore floating-point controls and floating-point registers.
@@ -661,8 +665,8 @@ ENTRY(io_int_handler)
        jo      .Lio_notify_resume
        TSTMSK  __LC_CPU_FLAGS,_CIF_FPU
        jo      .Lio_vxrs
-       TSTMSK  __LC_CPU_FLAGS,_CIF_ASCE_PRIMARY
-       jo      .Lio_asce_primary
+       TSTMSK  __LC_CPU_FLAGS,(_CIF_ASCE_PRIMARY|_CIF_ASCE_SECONDARY)
+       jnz     .Lio_asce
        j       .Lio_return             # beware of critical section cleanup
 
 #
@@ -675,12 +679,15 @@ ENTRY(io_int_handler)
        j       .Lio_return
 
 #
-# _CIF_ASCE_PRIMARY is set, load user space asce
+# _CIF_ASCE_PRIMARY and/or CIF_ASCE_SECONDARY set, load user space asce
 #
-.Lio_asce_primary:
+.Lio_asce:
        ni      __LC_CPU_FLAGS+7,255-_CIF_ASCE_PRIMARY
        lctlg   %c1,%c1,__LC_USER_ASCE          # load primary asce
-       j       .Lio_return
+       TSTMSK  __LC_CPU_FLAGS,_CIF_ASCE_SECONDARY
+       jz      .Lio_return
+       larl    %r14,.Lio_return
+       jg      set_fs_fixup
 
 #
 # CIF_FPU is set, restore floating-point controls and floating-point registers.
index e79f030..33f9018 100644 (file)
@@ -80,5 +80,6 @@ long sys_s390_pci_mmio_read(unsigned long, void __user *, size_t);
 DECLARE_PER_CPU(u64, mt_cycles[8]);
 
 void verify_facilities(void);
+void set_fs_fixup(void);
 
 #endif /* _ENTRY_H */
index c5b86b4..a49dc2b 100644 (file)
@@ -234,3 +234,16 @@ unsigned long arch_randomize_brk(struct mm_struct *mm)
        ret = PAGE_ALIGN(mm->brk + brk_rnd());
        return (ret > mm->brk) ? ret : mm->brk;
 }
+
+void set_fs_fixup(void)
+{
+       struct pt_regs *regs = current_pt_regs();
+       static bool warned;
+
+       set_fs(USER_DS);
+       if (warned)
+               return;
+       WARN(1, "Unbalanced set_fs - int code: 0x%x\n", regs->int_code);
+       show_registers(regs);
+       warned = true;
+}