Revert "perf: Rework perf_event_exit_event()"
authorSasha Levin <sashal@kernel.org>
Thu, 25 Nov 2021 00:18:11 +0000 (19:18 -0500)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Fri, 26 Nov 2021 09:39:22 +0000 (10:39 +0100)
This reverts commit 94902ee2996a7f71471138093495df452dab87b6 which is
upstream commit ef54c1a476aef7eef26fe13ea10dc090952c00f8.

Reverting for now due to issues that need to get fixed upstream.

Signed-off-by: Sasha Levin <sashal@kernel.org>
include/linux/perf_event.h
kernel/events/core.c

index c095e71..ce14fb2 100644 (file)
@@ -607,7 +607,6 @@ struct swevent_hlist {
 #define PERF_ATTACH_TASK_DATA  0x08
 #define PERF_ATTACH_ITRACE     0x10
 #define PERF_ATTACH_SCHED_CB   0x20
-#define PERF_ATTACH_CHILD      0x40
 
 struct perf_cgroup;
 struct perf_buffer;
index 9084177..639b99a 100644 (file)
@@ -2209,26 +2209,6 @@ out:
        perf_event__header_size(leader);
 }
 
-static void sync_child_event(struct perf_event *child_event);
-
-static void perf_child_detach(struct perf_event *event)
-{
-       struct perf_event *parent_event = event->parent;
-
-       if (!(event->attach_state & PERF_ATTACH_CHILD))
-               return;
-
-       event->attach_state &= ~PERF_ATTACH_CHILD;
-
-       if (WARN_ON_ONCE(!parent_event))
-               return;
-
-       lockdep_assert_held(&parent_event->child_mutex);
-
-       sync_child_event(event);
-       list_del_init(&event->child_list);
-}
-
 static bool is_orphaned_event(struct perf_event *event)
 {
        return event->state == PERF_EVENT_STATE_DEAD;
@@ -2336,7 +2316,6 @@ group_sched_out(struct perf_event *group_event,
 }
 
 #define DETACH_GROUP   0x01UL
-#define DETACH_CHILD   0x02UL
 
 /*
  * Cross CPU call to remove a performance event
@@ -2360,8 +2339,6 @@ __perf_remove_from_context(struct perf_event *event,
        event_sched_out(event, cpuctx, ctx);
        if (flags & DETACH_GROUP)
                perf_group_detach(event);
-       if (flags & DETACH_CHILD)
-               perf_child_detach(event);
        list_del_event(event, ctx);
 
        if (!ctx->nr_events && ctx->is_active) {
@@ -2390,21 +2367,25 @@ static void perf_remove_from_context(struct perf_event *event, unsigned long fla
 
        lockdep_assert_held(&ctx->mutex);
 
+       event_function_call(event, __perf_remove_from_context, (void *)flags);
+
        /*
-        * Because of perf_event_exit_task(), perf_remove_from_context() ought
-        * to work in the face of TASK_TOMBSTONE, unlike every other
-        * event_function_call() user.
+        * The above event_function_call() can NO-OP when it hits
+        * TASK_TOMBSTONE. In that case we must already have been detached
+        * from the context (by perf_event_exit_event()) but the grouping
+        * might still be in-tact.
         */
-       raw_spin_lock_irq(&ctx->lock);
-       if (!ctx->is_active) {
-               __perf_remove_from_context(event, __get_cpu_context(ctx),
-                                          ctx, (void *)flags);
+       WARN_ON_ONCE(event->attach_state & PERF_ATTACH_CONTEXT);
+       if ((flags & DETACH_GROUP) &&
+           (event->attach_state & PERF_ATTACH_GROUP)) {
+               /*
+                * Since in that case we cannot possibly be scheduled, simply
+                * detach now.
+                */
+               raw_spin_lock_irq(&ctx->lock);
+               perf_group_detach(event);
                raw_spin_unlock_irq(&ctx->lock);
-               return;
        }
-       raw_spin_unlock_irq(&ctx->lock);
-
-       event_function_call(event, __perf_remove_from_context, (void *)flags);
 }
 
 /*
@@ -12296,17 +12277,14 @@ void perf_pmu_migrate_context(struct pmu *pmu, int src_cpu, int dst_cpu)
 }
 EXPORT_SYMBOL_GPL(perf_pmu_migrate_context);
 
-static void sync_child_event(struct perf_event *child_event)
+static void sync_child_event(struct perf_event *child_event,
+                              struct task_struct *child)
 {
        struct perf_event *parent_event = child_event->parent;
        u64 child_val;
 
-       if (child_event->attr.inherit_stat) {
-               struct task_struct *task = child_event->ctx->task;
-
-               if (task && task != TASK_TOMBSTONE)
-                       perf_event_read_event(child_event, task);
-       }
+       if (child_event->attr.inherit_stat)
+               perf_event_read_event(child_event, child);
 
        child_val = perf_event_count(child_event);
 
@@ -12321,53 +12299,60 @@ static void sync_child_event(struct perf_event *child_event)
 }
 
 static void
-perf_event_exit_event(struct perf_event *event, struct perf_event_context *ctx)
+perf_event_exit_event(struct perf_event *child_event,
+                     struct perf_event_context *child_ctx,
+                     struct task_struct *child)
 {
-       struct perf_event *parent_event = event->parent;
-       unsigned long detach_flags = 0;
-
-       if (parent_event) {
-               /*
-                * Do not destroy the 'original' grouping; because of the
-                * context switch optimization the original events could've
-                * ended up in a random child task.
-                *
-                * If we were to destroy the original group, all group related
-                * operations would cease to function properly after this
-                * random child dies.
-                *
-                * Do destroy all inherited groups, we don't care about those
-                * and being thorough is better.
-                */
-               detach_flags = DETACH_GROUP | DETACH_CHILD;
-               mutex_lock(&parent_event->child_mutex);
-       }
+       struct perf_event *parent_event = child_event->parent;
 
-       perf_remove_from_context(event, detach_flags);
+       /*
+        * Do not destroy the 'original' grouping; because of the context
+        * switch optimization the original events could've ended up in a
+        * random child task.
+        *
+        * If we were to destroy the original group, all group related
+        * operations would cease to function properly after this random
+        * child dies.
+        *
+        * Do destroy all inherited groups, we don't care about those
+        * and being thorough is better.
+        */
+       raw_spin_lock_irq(&child_ctx->lock);
+       WARN_ON_ONCE(child_ctx->is_active);
 
-       raw_spin_lock_irq(&ctx->lock);
-       if (event->state > PERF_EVENT_STATE_EXIT)
-               perf_event_set_state(event, PERF_EVENT_STATE_EXIT);
-       raw_spin_unlock_irq(&ctx->lock);
+       if (parent_event)
+               perf_group_detach(child_event);
+       list_del_event(child_event, child_ctx);
+       perf_event_set_state(child_event, PERF_EVENT_STATE_EXIT); /* is_event_hup() */
+       raw_spin_unlock_irq(&child_ctx->lock);
 
        /*
-        * Child events can be freed.
+        * Parent events are governed by their filedesc, retain them.
         */
-       if (parent_event) {
-               mutex_unlock(&parent_event->child_mutex);
-               /*
-                * Kick perf_poll() for is_event_hup();
-                */
-               perf_event_wakeup(parent_event);
-               free_event(event);
-               put_event(parent_event);
+       if (!parent_event) {
+               perf_event_wakeup(child_event);
                return;
        }
+       /*
+        * Child events can be cleaned up.
+        */
+
+       sync_child_event(child_event, child);
 
        /*
-        * Parent events are governed by their filedesc, retain them.
+        * Remove this event from the parent's list
+        */
+       WARN_ON_ONCE(parent_event->ctx->parent_ctx);
+       mutex_lock(&parent_event->child_mutex);
+       list_del_init(&child_event->child_list);
+       mutex_unlock(&parent_event->child_mutex);
+
+       /*
+        * Kick perf_poll() for is_event_hup().
         */
-       perf_event_wakeup(event);
+       perf_event_wakeup(parent_event);
+       free_event(child_event);
+       put_event(parent_event);
 }
 
 static void perf_event_exit_task_context(struct task_struct *child, int ctxn)
@@ -12424,7 +12409,7 @@ static void perf_event_exit_task_context(struct task_struct *child, int ctxn)
        perf_event_task(child, child_ctx, 0);
 
        list_for_each_entry_safe(child_event, next, &child_ctx->event_list, event_entry)
-               perf_event_exit_event(child_event, child_ctx);
+               perf_event_exit_event(child_event, child_ctx, child);
 
        mutex_unlock(&child_ctx->mutex);
 
@@ -12684,7 +12669,6 @@ inherit_event(struct perf_event *parent_event,
         */
        raw_spin_lock_irqsave(&child_ctx->lock, flags);
        add_event_to_ctx(child_event, child_ctx);
-       child_event->attach_state |= PERF_ATTACH_CHILD;
        raw_spin_unlock_irqrestore(&child_ctx->lock, flags);
 
        /*