seccomp: release filter after task is fully dead
authorChristian Brauner <christian.brauner@ubuntu.com>
Sun, 31 May 2020 11:50:29 +0000 (13:50 +0200)
committerKees Cook <keescook@chromium.org>
Fri, 10 Jul 2020 23:01:51 +0000 (16:01 -0700)
The seccomp filter used to be released in free_task() which is called
asynchronously via call_rcu() and assorted mechanisms. Since we need
to inform tasks waiting on the seccomp notifier when a filter goes empty
we will notify them as soon as a task has been marked fully dead in
release_task(). To not split seccomp cleanup into two parts, move
filter release out of free_task() and into release_task() after we've
unhashed struct task from struct pid, exited signals, and unlinked it
from the threadgroups' thread list. We'll put the empty filter
notification infrastructure into it in a follow up patch.

This also renames put_seccomp_filter() to seccomp_filter_release() which
is a more descriptive name of what we're doing here especially once
we've added the empty filter notification mechanism in there.

We're also NULL-ing the task's filter tree entrypoint which seems
cleaner than leaving a dangling pointer in there. Note that this shouldn't
need any memory barriers since we're calling this when the task is in
release_task() which means it's EXIT_DEAD. So it can't modify its seccomp
filters anymore. You can also see this from the point where we're calling
seccomp_filter_release(). It's after __exit_signal() and at this point,
tsk->sighand will already have been NULLed which is required for
thread-sync and filter installation alike.

Cc: Tycho Andersen <tycho@tycho.ws>
Cc: Kees Cook <keescook@chromium.org>
Cc: Matt Denton <mpdenton@google.com>
Cc: Sargun Dhillon <sargun@sargun.me>
Cc: Jann Horn <jannh@google.com>
Cc: Chris Palmer <palmer@google.com>
Cc: Aleksa Sarai <cyphar@cyphar.com>
Cc: Robert Sesek <rsesek@google.com>
Cc: Jeffrey Vander Stoep <jeffv@google.com>
Cc: Linux Containers <containers@lists.linux-foundation.org>
Signed-off-by: Christian Brauner <christian.brauner@ubuntu.com>
Link: https://lore.kernel.org/r/20200531115031.391515-2-christian.brauner@ubuntu.com
Signed-off-by: Kees Cook <keescook@chromium.org>
include/linux/seccomp.h
kernel/exit.c
kernel/fork.c
kernel/seccomp.c

index 2ec2720..babcd6c 100644 (file)
@@ -84,10 +84,10 @@ static inline int seccomp_mode(struct seccomp *s)
 #endif /* CONFIG_SECCOMP */
 
 #ifdef CONFIG_SECCOMP_FILTER
-extern void put_seccomp_filter(struct task_struct *tsk);
+extern void seccomp_filter_release(struct task_struct *tsk);
 extern void get_seccomp_filter(struct task_struct *tsk);
 #else  /* CONFIG_SECCOMP_FILTER */
-static inline void put_seccomp_filter(struct task_struct *tsk)
+static inline void seccomp_filter_release(struct task_struct *tsk)
 {
        return;
 }
index 727150f..00d77e5 100644 (file)
@@ -217,6 +217,7 @@ repeat:
        }
 
        write_unlock_irq(&tasklist_lock);
+       seccomp_filter_release(p);
        proc_flush_pid(thread_pid);
        put_pid(thread_pid);
        release_thread(p);
index 142b236..c51a9cd 100644 (file)
@@ -473,7 +473,6 @@ void free_task(struct task_struct *tsk)
 #endif
        rt_mutex_debug_task_free(tsk);
        ftrace_graph_exit_task(tsk);
-       put_seccomp_filter(tsk);
        arch_release_task_struct(tsk);
        if (tsk->flags & PF_KTHREAD)
                free_kthread_struct(tsk);
index d4dd334..0ca6d52 100644 (file)
@@ -368,6 +368,42 @@ static inline pid_t seccomp_can_sync_threads(void)
        return 0;
 }
 
+static inline void seccomp_filter_free(struct seccomp_filter *filter)
+{
+       if (filter) {
+               bpf_prog_destroy(filter->prog);
+               kfree(filter);
+       }
+}
+
+static void __put_seccomp_filter(struct seccomp_filter *orig)
+{
+       /* Clean up single-reference branches iteratively. */
+       while (orig && refcount_dec_and_test(&orig->refs)) {
+               struct seccomp_filter *freeme = orig;
+               orig = orig->prev;
+               seccomp_filter_free(freeme);
+       }
+}
+
+/**
+ * seccomp_filter_release - Detach the task from its filter tree
+ *                         and drop its reference count during
+ *                         exit.
+ *
+ * This function should only be called when the task is exiting as
+ * it detaches it from its filter tree. As such, READ_ONCE() and
+ * barriers are not needed here, as would normally be needed.
+ */
+void seccomp_filter_release(struct task_struct *tsk)
+{
+       struct seccomp_filter *orig = tsk->seccomp.filter;
+
+       /* Detach task from its filter tree. */
+       tsk->seccomp.filter = NULL;
+       __put_seccomp_filter(orig);
+}
+
 /**
  * seccomp_sync_threads: sets all threads to use current's filter
  *
@@ -397,7 +433,7 @@ static inline void seccomp_sync_threads(unsigned long flags)
                 * current's path will hold a reference.  (This also
                 * allows a put before the assignment.)
                 */
-               put_seccomp_filter(thread);
+               __put_seccomp_filter(thread->seccomp.filter);
                smp_store_release(&thread->seccomp.filter,
                                  caller->seccomp.filter);
                atomic_set(&thread->seccomp.filter_count,
@@ -571,30 +607,6 @@ void get_seccomp_filter(struct task_struct *tsk)
        __get_seccomp_filter(orig);
 }
 
-static inline void seccomp_filter_free(struct seccomp_filter *filter)
-{
-       if (filter) {
-               bpf_prog_destroy(filter->prog);
-               kfree(filter);
-       }
-}
-
-static void __put_seccomp_filter(struct seccomp_filter *orig)
-{
-       /* Clean up single-reference branches iteratively. */
-       while (orig && refcount_dec_and_test(&orig->refs)) {
-               struct seccomp_filter *freeme = orig;
-               orig = orig->prev;
-               seccomp_filter_free(freeme);
-       }
-}
-
-/* put_seccomp_filter - decrements the ref count of tsk->seccomp.filter */
-void put_seccomp_filter(struct task_struct *tsk)
-{
-       __put_seccomp_filter(tsk->seccomp.filter);
-}
-
 static void seccomp_init_siginfo(kernel_siginfo_t *info, int syscall, int reason)
 {
        clear_siginfo(info);