perf: Add support for event removal on exec
authorMarco Elver <elver@google.com>
Thu, 8 Apr 2021 10:35:59 +0000 (12:35 +0200)
committerPeter Zijlstra <peterz@infradead.org>
Fri, 16 Apr 2021 14:32:41 +0000 (16:32 +0200)
Adds bit perf_event_attr::remove_on_exec, to support removing an event
from a task on exec.

This option supports the case where an event is supposed to be
process-wide only, and should not propagate beyond exec, to limit
monitoring to the original process image only.

Suggested-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Marco Elver <elver@google.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20210408103605.1676875-5-elver@google.com
include/uapi/linux/perf_event.h
kernel/events/core.c

index 813efb6..8c5b9f5 100644 (file)
@@ -390,7 +390,8 @@ struct perf_event_attr {
                                text_poke      :  1, /* include text poke events */
                                build_id       :  1, /* use build id in mmap2 events */
                                inherit_thread :  1, /* children only inherit if cloned with CLONE_THREAD */
-                               __reserved_1   : 28;
+                               remove_on_exec :  1, /* event is removed from task on exec */
+                               __reserved_1   : 27;
 
        union {
                __u32           wakeup_events;    /* wakeup every n events */
index 3e3c00f..e4a584b 100644 (file)
@@ -4248,6 +4248,57 @@ out:
                put_ctx(clone_ctx);
 }
 
+static void perf_remove_from_owner(struct perf_event *event);
+static void perf_event_exit_event(struct perf_event *event,
+                                 struct perf_event_context *ctx);
+
+/*
+ * Removes all events from the current task that have been marked
+ * remove-on-exec, and feeds their values back to parent events.
+ */
+static void perf_event_remove_on_exec(int ctxn)
+{
+       struct perf_event_context *ctx, *clone_ctx = NULL;
+       struct perf_event *event, *next;
+       LIST_HEAD(free_list);
+       unsigned long flags;
+       bool modified = false;
+
+       ctx = perf_pin_task_context(current, ctxn);
+       if (!ctx)
+               return;
+
+       mutex_lock(&ctx->mutex);
+
+       if (WARN_ON_ONCE(ctx->task != current))
+               goto unlock;
+
+       list_for_each_entry_safe(event, next, &ctx->event_list, event_entry) {
+               if (!event->attr.remove_on_exec)
+                       continue;
+
+               if (!is_kernel_event(event))
+                       perf_remove_from_owner(event);
+
+               modified = true;
+
+               perf_event_exit_event(event, ctx);
+       }
+
+       raw_spin_lock_irqsave(&ctx->lock, flags);
+       if (modified)
+               clone_ctx = unclone_ctx(ctx);
+       --ctx->pin_count;
+       raw_spin_unlock_irqrestore(&ctx->lock, flags);
+
+unlock:
+       mutex_unlock(&ctx->mutex);
+
+       put_ctx(ctx);
+       if (clone_ctx)
+               put_ctx(clone_ctx);
+}
+
 struct perf_read_data {
        struct perf_event *event;
        bool group;
@@ -7560,18 +7611,18 @@ void perf_event_exec(void)
        struct perf_event_context *ctx;
        int ctxn;
 
-       rcu_read_lock();
        for_each_task_context_nr(ctxn) {
-               ctx = current->perf_event_ctxp[ctxn];
-               if (!ctx)
-                       continue;
-
                perf_event_enable_on_exec(ctxn);
+               perf_event_remove_on_exec(ctxn);
 
-               perf_iterate_ctx(ctx, perf_event_addr_filters_exec, NULL,
-                                  true);
+               rcu_read_lock();
+               ctx = rcu_dereference(current->perf_event_ctxp[ctxn]);
+               if (ctx) {
+                       perf_iterate_ctx(ctx, perf_event_addr_filters_exec,
+                                        NULL, true);
+               }
+               rcu_read_unlock();
        }
-       rcu_read_unlock();
 }
 
 struct remote_output {
@@ -11656,6 +11707,9 @@ static int perf_copy_attr(struct perf_event_attr __user *uattr,
        if (!attr->inherit && attr->inherit_thread)
                return -EINVAL;
 
+       if (attr->remove_on_exec && attr->enable_on_exec)
+               return -EINVAL;
+
 out:
        return ret;