x86: Rewrite ret_from_fork() in C
authorBrian Gerst <brgerst@gmail.com>
Fri, 23 Jun 2023 22:55:29 +0000 (18:55 -0400)
committerPeter Zijlstra <peterz@infradead.org>
Mon, 10 Jul 2023 07:52:25 +0000 (09:52 +0200)
When kCFI is enabled, special handling is needed for the indirect call
to the kernel thread function.  Rewrite the ret_from_fork() function in
C so that the compiler can properly handle the indirect call.

Suggested-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Brian Gerst <brgerst@gmail.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Kees Cook <keescook@chromium.org>
Reviewed-by: Sami Tolvanen <samitolvanen@google.com>
Link: https://lkml.kernel.org/r/20230623225529.34590-3-brgerst@gmail.com
arch/x86/entry/entry_32.S
arch/x86/entry/entry_64.S
arch/x86/include/asm/switch_to.h
arch/x86/kernel/process.c

index e56123f..6e6af42 100644 (file)
@@ -727,36 +727,22 @@ SYM_CODE_END(__switch_to_asm)
  * edi: kernel thread arg
  */
 .pushsection .text, "ax"
-SYM_CODE_START(ret_from_fork)
+SYM_CODE_START(ret_from_fork_asm)
+       movl    %esp, %edx      /* regs */
+
        /* return address for the stack unwinder */
        pushl   $.Lsyscall_32_done
 
        FRAME_BEGIN
-       pushl   %eax
-       call    schedule_tail
+       /* prev already in EAX */
+       movl    %ebx, %ecx      /* fn */
+       pushl   %edi            /* fn_arg */
+       call    ret_from_fork
        addl    $4, %esp
        FRAME_END
 
-       testl   %ebx, %ebx
-       jnz     1f              /* kernel threads are uncommon */
-
-2:
-       /* When we fork, we trace the syscall return in the child, too. */
-       leal    4(%esp), %eax
-       call    syscall_exit_to_user_mode
        RET
-
-       /* kernel thread */
-1:     movl    %edi, %eax
-       CALL_NOSPEC ebx
-       /*
-        * A kernel thread is allowed to return here after successfully
-        * calling kernel_execve().  Exit to userspace to complete the execve()
-        * syscall.
-        */
-       movl    $0, PT_EAX(%esp)
-       jmp     2b
-SYM_CODE_END(ret_from_fork)
+SYM_CODE_END(ret_from_fork_asm)
 .popsection
 
 SYM_ENTRY(__begin_SYSENTER_singlestep_region, SYM_L_GLOBAL, SYM_A_NONE)
index f31e286..91f6818 100644 (file)
@@ -284,36 +284,19 @@ SYM_FUNC_END(__switch_to_asm)
  * r12: kernel thread arg
  */
 .pushsection .text, "ax"
-       __FUNC_ALIGN
-SYM_CODE_START_NOALIGN(ret_from_fork)
-       UNWIND_HINT_END_OF_STACK
+SYM_CODE_START(ret_from_fork_asm)
+       UNWIND_HINT_REGS
        ANNOTATE_NOENDBR // copy_thread
        CALL_DEPTH_ACCOUNT
-       movq    %rax, %rdi
-       call    schedule_tail                   /* rdi: 'prev' task parameter */
 
-       testq   %rbx, %rbx                      /* from kernel_thread? */
-       jnz     1f                              /* kernel threads are uncommon */
+       movq    %rax, %rdi              /* prev */
+       movq    %rsp, %rsi              /* regs */
+       movq    %rbx, %rdx              /* fn */
+       movq    %r12, %rcx              /* fn_arg */
+       call    ret_from_fork
 
-2:
-       UNWIND_HINT_REGS
-       movq    %rsp, %rdi
-       call    syscall_exit_to_user_mode       /* returns with IRQs disabled */
        jmp     swapgs_restore_regs_and_return_to_usermode
-
-1:
-       /* kernel thread */
-       UNWIND_HINT_END_OF_STACK
-       movq    %r12, %rdi
-       CALL_NOSPEC rbx
-       /*
-        * A kernel thread is allowed to return here after successfully
-        * calling kernel_execve().  Exit to userspace to complete the execve()
-        * syscall.
-        */
-       movq    $0, RAX(%rsp)
-       jmp     2b
-SYM_CODE_END(ret_from_fork)
+SYM_CODE_END(ret_from_fork_asm)
 .popsection
 
 .macro DEBUG_ENTRY_ASSERT_IRQS_OFF
index 5c91305..f42dbf1 100644 (file)
@@ -12,7 +12,9 @@ struct task_struct *__switch_to_asm(struct task_struct *prev,
 __visible struct task_struct *__switch_to(struct task_struct *prev,
                                          struct task_struct *next);
 
-asmlinkage void ret_from_fork(void);
+asmlinkage void ret_from_fork_asm(void);
+__visible void ret_from_fork(struct task_struct *prev, struct pt_regs *regs,
+                            int (*fn)(void *), void *fn_arg);
 
 /*
  * This is the structure pointed to by thread.sp for an inactive task.  The
index ff9b80a..72015db 100644 (file)
@@ -28,6 +28,7 @@
 #include <linux/static_call.h>
 #include <trace/events/power.h>
 #include <linux/hw_breakpoint.h>
+#include <linux/entry-common.h>
 #include <asm/cpu.h>
 #include <asm/apic.h>
 #include <linux/uaccess.h>
@@ -134,6 +135,25 @@ static int set_new_tls(struct task_struct *p, unsigned long tls)
                return do_set_thread_area_64(p, ARCH_SET_FS, tls);
 }
 
+__visible void ret_from_fork(struct task_struct *prev, struct pt_regs *regs,
+                                    int (*fn)(void *), void *fn_arg)
+{
+       schedule_tail(prev);
+
+       /* Is this a kernel thread? */
+       if (unlikely(fn)) {
+               fn(fn_arg);
+               /*
+                * A kernel thread is allowed to return here after successfully
+                * calling kernel_execve().  Exit to userspace to complete the
+                * execve() syscall.
+                */
+               regs->ax = 0;
+       }
+
+       syscall_exit_to_user_mode(regs);
+}
+
 int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
 {
        unsigned long clone_flags = args->flags;
@@ -149,7 +169,7 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
        frame = &fork_frame->frame;
 
        frame->bp = encode_frame_pointer(childregs);
-       frame->ret_addr = (unsigned long) ret_from_fork;
+       frame->ret_addr = (unsigned long) ret_from_fork_asm;
        p->thread.sp = (unsigned long) fork_frame;
        p->thread.io_bitmap = NULL;
        p->thread.iopl_warn = 0;