perf/core: Fix event schedule order
authorPeter Zijlstra <peterz@infradead.org>
Mon, 13 Nov 2017 13:28:30 +0000 (14:28 +0100)
committerIngo Molnar <mingo@kernel.org>
Mon, 12 Mar 2018 14:28:49 +0000 (15:28 +0100)
Scheduling in events with cpu=-1 before events with cpu=# changes
semantics and is undesirable in that it would priorize these events.

Given that groups->index is across all groups we actually have an
inter-group ordering, meaning we can merge-sort two groups, which is
just what we need to preserve semantics.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Mark Rutland <mark.rutland@arm.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Alexey Budankov <alexey.budankov@linux.intel.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: David Carrillo-Cisneros <davidcc@google.com>
Cc: Dmitri Prokhorov <Dmitry.Prohorov@intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Kan Liang <kan.liang@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Valery Cherepennikov <valery.cherepennikov@intel.com>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
kernel/events/core.c

index 22165b0..2d8c020 100644 (file)
@@ -1609,6 +1609,21 @@ perf_event_groups_first(struct perf_event_groups *groups, int cpu)
 }
 
 /*
+ * Like rb_entry_next_safe() for the @cpu subtree.
+ */
+static struct perf_event *
+perf_event_groups_next(struct perf_event *event)
+{
+       struct perf_event *next;
+
+       next = rb_entry_safe(rb_next(&event->group_node), typeof(*event), group_node);
+       if (next && next->cpu == event->cpu)
+               return next;
+
+       return NULL;
+}
+
+/*
  * Rotate the @cpu subtree.
  *
  * Re-insert the leftmost event at the tail of the subtree.
@@ -2354,22 +2369,6 @@ static int group_can_go_on(struct perf_event *event,
        return can_add_hw;
 }
 
-static int
-flexible_group_sched_in(struct perf_event *event,
-                       struct perf_event_context *ctx,
-                       struct perf_cpu_context *cpuctx,
-                       int *can_add_hw)
-{
-       if (event->state <= PERF_EVENT_STATE_OFF || !event_filter_match(event))
-               return 0;
-
-       if (group_can_go_on(event, cpuctx, *can_add_hw))
-               if (group_sched_in(event, cpuctx, ctx))
-                       *can_add_hw = 0;
-
-       return 1;
-}
-
 static void add_event_to_ctx(struct perf_event *event,
                               struct perf_event_context *ctx)
 {
@@ -3185,52 +3184,112 @@ static void cpu_ctx_sched_out(struct perf_cpu_context *cpuctx,
        ctx_sched_out(&cpuctx->ctx, cpuctx, event_type);
 }
 
-static void
-ctx_pinned_sched_in(struct perf_event_context *ctx,
-                   struct perf_cpu_context *cpuctx)
+static int visit_groups_merge(struct perf_event_groups *groups, int cpu,
+                             int (*func)(struct perf_event *, void *), void *data)
 {
-       int sw = -1, cpu = smp_processor_id();
-       struct perf_event *event;
-       int can_add_hw;
+       struct perf_event **evt, *evt1, *evt2;
+       int ret;
 
-       perf_event_groups_for_each_cpu(event, sw,
-                       &ctx->pinned_groups, group_node) {
-               can_add_hw = 1;
-               if (flexible_group_sched_in(event, ctx, cpuctx, &can_add_hw)) {
-                       if (event->state == PERF_EVENT_STATE_INACTIVE)
-                               perf_event_set_state(event,
-                                               PERF_EVENT_STATE_ERROR);
+       evt1 = perf_event_groups_first(groups, -1);
+       evt2 = perf_event_groups_first(groups, cpu);
+
+       while (evt1 || evt2) {
+               if (evt1 && evt2) {
+                       if (evt1->group_index < evt2->group_index)
+                               evt = &evt1;
+                       else
+                               evt = &evt2;
+               } else if (evt1) {
+                       evt = &evt1;
+               } else {
+                       evt = &evt2;
                }
+
+               ret = func(*evt, data);
+               if (ret)
+                       return ret;
+
+               *evt = perf_event_groups_next(*evt);
        }
 
-       perf_event_groups_for_each_cpu(event, cpu,
-                       &ctx->pinned_groups, group_node) {
-               can_add_hw = 1;
-               if (flexible_group_sched_in(event, ctx, cpuctx, &can_add_hw)) {
-                       if (event->state == PERF_EVENT_STATE_INACTIVE)
-                               perf_event_set_state(event,
-                                               PERF_EVENT_STATE_ERROR);
-               }
+       return 0;
+}
+
+struct sched_in_data {
+       struct perf_event_context *ctx;
+       struct perf_cpu_context *cpuctx;
+       int can_add_hw;
+};
+
+static int pinned_sched_in(struct perf_event *event, void *data)
+{
+       struct sched_in_data *sid = data;
+
+       if (event->state <= PERF_EVENT_STATE_OFF)
+               return 0;
+
+       if (!event_filter_match(event))
+               return 0;
+
+       if (group_can_go_on(event, sid->cpuctx, sid->can_add_hw))
+               group_sched_in(event, sid->cpuctx, sid->ctx);
+
+       /*
+        * If this pinned group hasn't been scheduled,
+        * put it in error state.
+        */
+       if (event->state == PERF_EVENT_STATE_INACTIVE)
+               perf_event_set_state(event, PERF_EVENT_STATE_ERROR);
+
+       return 0;
+}
+
+static int flexible_sched_in(struct perf_event *event, void *data)
+{
+       struct sched_in_data *sid = data;
+
+       if (event->state <= PERF_EVENT_STATE_OFF)
+               return 0;
+
+       if (!event_filter_match(event))
+               return 0;
+
+       if (group_can_go_on(event, sid->cpuctx, sid->can_add_hw)) {
+               if (group_sched_in(event, sid->cpuctx, sid->ctx))
+                       sid->can_add_hw = 0;
        }
+
+       return 0;
 }
 
 static void
-ctx_flexible_sched_in(struct perf_event_context *ctx,
-                     struct perf_cpu_context *cpuctx)
+ctx_pinned_sched_in(struct perf_event_context *ctx,
+                   struct perf_cpu_context *cpuctx)
 {
-       int sw = -1, cpu = smp_processor_id();
-       struct perf_event *event;
-       int can_add_hw = 1;
+       struct sched_in_data sid = {
+               .ctx = ctx,
+               .cpuctx = cpuctx,
+               .can_add_hw = 1,
+       };
 
-       perf_event_groups_for_each_cpu(event, sw,
-                       &ctx->flexible_groups, group_node)
-               flexible_group_sched_in(event, ctx, cpuctx, &can_add_hw);
+       visit_groups_merge(&ctx->pinned_groups,
+                          smp_processor_id(),
+                          pinned_sched_in, &sid);
+}
 
-       can_add_hw = 1;
-       perf_event_groups_for_each_cpu(event, cpu,
-                       &ctx->flexible_groups, group_node)
-               flexible_group_sched_in(event, ctx, cpuctx, &can_add_hw);
+static void
+ctx_flexible_sched_in(struct perf_event_context *ctx,
+                     struct perf_cpu_context *cpuctx)
+{
+       struct sched_in_data sid = {
+               .ctx = ctx,
+               .cpuctx = cpuctx,
+               .can_add_hw = 1,
+       };
 
+       visit_groups_merge(&ctx->flexible_groups,
+                          smp_processor_id(),
+                          flexible_sched_in, &sid);
 }
 
 static void