perf: Add a pmu capability for "exclusive" events

author Alexander Shishkin <alexander.shishkin@linux.intel.com>

Fri, 30 Jan 2015 10:31:06 +0000 (12:31 +0200)

committer Ingo Molnar <mingo@kernel.org>

Thu, 2 Apr 2015 15:14:12 +0000 (17:14 +0200)
author Alexander Shishkin <alexander.shishkin@linux.intel.com>
Fri, 30 Jan 2015 10:31:06 +0000 (12:31 +0200)
committer Ingo Molnar <mingo@kernel.org>
Thu, 2 Apr 2015 15:14:12 +0000 (17:14 +0200)
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h

index 13a1eb3..f936a1e 100644 (file)
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -176,6 +176,7 @@ struct perf_event;
  #define PERF_PMU_CAP_NO_NMI                    0x02
  #define PERF_PMU_CAP_AUX_NO_SG                 0x04
  #define PERF_PMU_CAP_AUX_SW_DOUBLEBUF          0x08
+#define PERF_PMU_CAP_EXCLUSIVE                 0x10
  
  /**
   * struct pmu - generic performance monitoring unit
@@ -196,6 +197,7 @@ struct pmu {
  
         int * __percpu                  pmu_disable_count;
         struct perf_cpu_context * __percpu pmu_cpu_context;
+       atomic_t                        exclusive_cnt; /* < 0: cpu; > 0: tsk */
         int                             task_ctx_nr;
         int                             hrtimer_interval_ms;
  
diff --git a/kernel/events/core.c b/kernel/events/core.c

index da51128..6d9fdae 100644 (file)
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -3459,6 +3459,91 @@ static void unaccount_event(struct perf_event *event)
         unaccount_event_cpu(event, event->cpu);
  }
  
+/*
+ * The following implement mutual exclusion of events on "exclusive" pmus
+ * (PERF_PMU_CAP_EXCLUSIVE). Such pmus can only have one event scheduled
+ * at a time, so we disallow creating events that might conflict, namely:
+ *
+ *  1) cpu-wide events in the presence of per-task events,
+ *  2) per-task events in the presence of cpu-wide events,
+ *  3) two matching events on the same context.
+ *
+ * The former two cases are handled in the allocation path (perf_event_alloc(),
+ * __free_event()), the latter -- before the first perf_install_in_context().
+ */
+static int exclusive_event_init(struct perf_event *event)
+{
+       struct pmu *pmu = event->pmu;
+
+       if (!(pmu->capabilities & PERF_PMU_CAP_EXCLUSIVE))
+               return 0;
+
+       /*
+        * Prevent co-existence of per-task and cpu-wide events on the
+        * same exclusive pmu.
+        *
+        * Negative pmu::exclusive_cnt means there are cpu-wide
+        * events on this "exclusive" pmu, positive means there are
+        * per-task events.
+        *
+        * Since this is called in perf_event_alloc() path, event::ctx
+        * doesn't exist yet; it is, however, safe to use PERF_ATTACH_TASK
+        * to mean "per-task event", because unlike other attach states it
+        * never gets cleared.
+        */
+       if (event->attach_state & PERF_ATTACH_TASK) {
+               if (!atomic_inc_unless_negative(&pmu->exclusive_cnt))
+                       return -EBUSY;
+       } else {
+               if (!atomic_dec_unless_positive(&pmu->exclusive_cnt))
+                       return -EBUSY;
+       }
+
+       return 0;
+}
+
+static void exclusive_event_destroy(struct perf_event *event)
+{
+       struct pmu *pmu = event->pmu;
+
+       if (!(pmu->capabilities & PERF_PMU_CAP_EXCLUSIVE))
+               return;
+
+       /* see comment in exclusive_event_init() */
+       if (event->attach_state & PERF_ATTACH_TASK)
+               atomic_dec(&pmu->exclusive_cnt);
+       else
+               atomic_inc(&pmu->exclusive_cnt);
+}
+
+static bool exclusive_event_match(struct perf_event *e1, struct perf_event *e2)
+{
+       if ((e1->pmu->capabilities & PERF_PMU_CAP_EXCLUSIVE) &&
+           (e1->cpu == e2->cpu ||
+            e1->cpu == -1 ||
+            e2->cpu == -1))
+               return true;
+       return false;
+}
+
+/* Called under the same ctx::mutex as perf_install_in_context() */
+static bool exclusive_event_installable(struct perf_event *event,
+                                       struct perf_event_context *ctx)
+{
+       struct perf_event *iter_event;
+       struct pmu *pmu = event->pmu;
+
+       if (!(pmu->capabilities & PERF_PMU_CAP_EXCLUSIVE))
+               return true;
+
+       list_for_each_entry(iter_event, &ctx->event_list, event_entry) {
+               if (exclusive_event_match(iter_event, event))
+                       return false;
+       }
+
+       return true;
+}
+
  static void __free_event(struct perf_event *event)
  {
         if (!event->parent) {
@@ -3472,8 +3557,10 @@ static void __free_event(struct perf_event *event)
         if (event->ctx)
                 put_ctx(event->ctx);
  
-       if (event->pmu)
+       if (event->pmu) {
+               exclusive_event_destroy(event);
                 module_put(event->pmu->module);
+       }
  
         call_rcu(&event->rcu_head, free_event_rcu);
  }
@@ -7150,6 +7237,7 @@ got_cpu_context:
                 pmu->event_idx = perf_event_idx_default;
  
         list_add_rcu(&pmu->entry, &pmus);
+       atomic_set(&pmu->exclusive_cnt, 0);
         ret = 0;
  unlock:
         mutex_unlock(&pmus_lock);
@@ -7405,16 +7493,23 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,
                 goto err_ns;
         }
  
+       err = exclusive_event_init(event);
+       if (err)
+               goto err_pmu;
+
         if (!event->parent) {
                 if (event->attr.sample_type & PERF_SAMPLE_CALLCHAIN) {
                         err = get_callchain_buffers();
                         if (err)
-                               goto err_pmu;
+                               goto err_per_task;
                 }
         }
  
         return event;
  
+err_per_task:
+       exclusive_event_destroy(event);
+
  err_pmu:
         if (event->destroy)
                 event->destroy(event);
@@ -7819,6 +7914,11 @@ SYSCALL_DEFINE5(perf_event_open,
                 goto err_alloc;
         }
  
+       if ((pmu->capabilities & PERF_PMU_CAP_EXCLUSIVE) && group_leader) {
+               err = -EBUSY;
+               goto err_context;
+       }
+
         if (task) {
                 put_task_struct(task);
                 task = NULL;
@@ -7941,6 +8041,13 @@ SYSCALL_DEFINE5(perf_event_open,
                 get_ctx(ctx);
         }
  
+       if (!exclusive_event_installable(event, ctx)) {
+               err = -EBUSY;
+               mutex_unlock(&ctx->mutex);
+               fput(event_file);
+               goto err_context;
+       }
+
         perf_install_in_context(ctx, event, event->cpu);
         perf_unpin_context(ctx);
  
@@ -8032,6 +8139,14 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu,
  
         WARN_ON_ONCE(ctx->parent_ctx);
         mutex_lock(&ctx->mutex);
+       if (!exclusive_event_installable(event, ctx)) {
+               mutex_unlock(&ctx->mutex);
+               perf_unpin_context(ctx);
+               put_ctx(ctx);
+               err = -EBUSY;
+               goto err_free;
+       }
+
         perf_install_in_context(ctx, event, cpu);
         perf_unpin_context(ctx);
         mutex_unlock(&ctx->mutex);
author	Alexander Shishkin <alexander.shishkin@linux.intel.com>
	Fri, 30 Jan 2015 10:31:06 +0000 (12:31 +0200)
committer	Ingo Molnar <mingo@kernel.org>
	Thu, 2 Apr 2015 15:14:12 +0000 (17:14 +0200)
include/linux/perf_event.h		patch \| blob \| history
kernel/events/core.c		patch \| blob \| history