From de77c3a5b95c95a4915142071643d94e3e1ada35 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 7 Jan 2022 12:18:12 -0600 Subject: [PATCH] exit: Move force_uaccess back into do_exit With kernel threads on architectures that still have set_fs/get_fs running as KERNEL_DS moving force_uaccess_begin does not appear safe. Calling force_uaccess_begin is a noop on anything people care about. Update the comment to explain why this code while looking like an obvious candidate for moving to make_task_dead probably needs to remain in do_exit until set_fs/get_fs are entirely removed from the kernel. Fixes: 05ea0424f0e2 ("exit: Move oops specific logic from do_exit into make_task_dead") Suggested-by: Al Viro Link: https://lkml.kernel.org/r/YdUxGKRcSiDy8jGg@zeniv-ca.linux.org.uk Signed-off-by: "Eric W. Biederman" --- kernel/exit.c | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/kernel/exit.c b/kernel/exit.c index db4eeb7f..fc0726c 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -737,6 +737,20 @@ void __noreturn do_exit(long code) WARN_ON(blk_needs_flush_plug(tsk)); + /* + * If do_dead is called because this processes oopsed, it's possible + * that get_fs() was left as KERNEL_DS, so reset it to USER_DS before + * continuing. Amongst other possible reasons, this is to prevent + * mm_release()->clear_child_tid() from writing to a user-controlled + * kernel address. + * + * On uptodate architectures force_uaccess_begin is a noop. On + * architectures that still have set_fs/get_fs in addition to handling + * oopses handles kernel threads that run as set_fs(KERNEL_DS) by + * default. + */ + force_uaccess_begin(); + profile_task_exit(tsk); kcov_task_exit(tsk); @@ -862,15 +876,6 @@ void __noreturn make_task_dead(int signr) if (unlikely(!tsk->pid)) panic("Attempted to kill the idle task!"); - /* - * If make_task_dead is called because this processes oopsed, it's possible - * that get_fs() was left as KERNEL_DS, so reset it to USER_DS before - * continuing. Amongst other possible reasons, this is to prevent - * mm_release()->clear_child_tid() from writing to a user-controlled - * kernel address. - */ - force_uaccess_begin(); - if (unlikely(in_atomic())) { pr_info("note: %s[%d] exited with preempt_count %d\n", current->comm, task_pid_nr(current), -- 2.7.4