perf: Fix race between event install and jump_labels

author Peter Zijlstra <peterz@infradead.org>

Wed, 24 Feb 2016 17:45:45 +0000 (18:45 +0100)

committer Ingo Molnar <mingo@kernel.org>

Thu, 25 Feb 2016 07:42:34 +0000 (08:42 +0100)
author Peter Zijlstra <peterz@infradead.org>
Wed, 24 Feb 2016 17:45:45 +0000 (18:45 +0100)
committer Ingo Molnar <mingo@kernel.org>
Thu, 25 Feb 2016 07:42:34 +0000 (08:42 +0100)
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h

index 3915661..f5c5a3f 100644 (file)
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -906,7 +906,7 @@ perf_sw_event_sched(u32 event_id, u64 nr, u64 addr)
         }
  }
  
-extern struct static_key_deferred perf_sched_events;
+extern struct static_key_false perf_sched_events;
  
  static __always_inline bool
  perf_sw_migrate_enabled(void)
@@ -925,7 +925,7 @@ static inline void perf_event_task_migrate(struct task_struct *task)
  static inline void perf_event_task_sched_in(struct task_struct *prev,
                                             struct task_struct *task)
  {
-       if (static_key_false(&perf_sched_events.key))
+       if (static_branch_unlikely(&perf_sched_events))
                 __perf_event_task_sched_in(prev, task);
  
         if (perf_sw_migrate_enabled() && task->sched_migrated) {
@@ -942,7 +942,7 @@ static inline void perf_event_task_sched_out(struct task_struct *prev,
  {
         perf_sw_event_sched(PERF_COUNT_SW_CONTEXT_SWITCHES, 1, 0);
  
-       if (static_key_false(&perf_sched_events.key))
+       if (static_branch_unlikely(&perf_sched_events))
                 __perf_event_task_sched_out(prev, next);
  }
  
diff --git a/kernel/events/core.c b/kernel/events/core.c

index 92d6999..ea064ca 100644 (file)
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -321,7 +321,13 @@ enum event_type_t {
   * perf_sched_events : >0 events exist
   * perf_cgroup_events: >0 per-cpu cgroup events exist on this cpu
   */
-struct static_key_deferred perf_sched_events __read_mostly;
+
+static void perf_sched_delayed(struct work_struct *work);
+DEFINE_STATIC_KEY_FALSE(perf_sched_events);
+static DECLARE_DELAYED_WORK(perf_sched_work, perf_sched_delayed);
+static DEFINE_MUTEX(perf_sched_mutex);
+static atomic_t perf_sched_count;
+
  static DEFINE_PER_CPU(atomic_t, perf_cgroup_events);
  static DEFINE_PER_CPU(int, perf_sched_cb_usages);
  
@@ -3536,12 +3542,22 @@ static void unaccount_event(struct perf_event *event)
         if (has_branch_stack(event))
                 dec = true;
  
-       if (dec)
-               static_key_slow_dec_deferred(&perf_sched_events);
+       if (dec) {
+               if (!atomic_add_unless(&perf_sched_count, -1, 1))
+                       schedule_delayed_work(&perf_sched_work, HZ);
+       }
  
         unaccount_event_cpu(event, event->cpu);
  }
  
+static void perf_sched_delayed(struct work_struct *work)
+{
+       mutex_lock(&perf_sched_mutex);
+       if (atomic_dec_and_test(&perf_sched_count))
+               static_branch_disable(&perf_sched_events);
+       mutex_unlock(&perf_sched_mutex);
+}
+
  /*
   * The following implement mutual exclusion of events on "exclusive" pmus
   * (PERF_PMU_CAP_EXCLUSIVE). Such pmus can only have one event scheduled
@@ -7780,8 +7796,28 @@ static void account_event(struct perf_event *event)
         if (is_cgroup_event(event))
                 inc = true;
  
-       if (inc)
-               static_key_slow_inc(&perf_sched_events.key);
+       if (inc) {
+               if (atomic_inc_not_zero(&perf_sched_count))
+                       goto enabled;
+
+               mutex_lock(&perf_sched_mutex);
+               if (!atomic_read(&perf_sched_count)) {
+                       static_branch_enable(&perf_sched_events);
+                       /*
+                        * Guarantee that all CPUs observe they key change and
+                        * call the perf scheduling hooks before proceeding to
+                        * install events that need them.
+                        */
+                       synchronize_sched();
+               }
+               /*
+                * Now that we have waited for the sync_sched(), allow further
+                * increments to by-pass the mutex.
+                */
+               atomic_inc(&perf_sched_count);
+               mutex_unlock(&perf_sched_mutex);
+       }
+enabled:
  
         account_event_cpu(event, event->cpu);
  }
@@ -9344,9 +9380,6 @@ void __init perf_event_init(void)
         ret = init_hw_breakpoint();
         WARN(ret, "hw_breakpoint initialization failed with: %d", ret);
  
-       /* do not patch jump label more than once per second */
-       jump_label_rate_limit(&perf_sched_events, HZ);
-
         /*
          * Build time assertion that we keep the data_head at the intended
          * location.  IOW, validation we got the __reserved[] size right.
author	Peter Zijlstra <peterz@infradead.org>
	Wed, 24 Feb 2016 17:45:45 +0000 (18:45 +0100)
committer	Ingo Molnar <mingo@kernel.org>
	Thu, 25 Feb 2016 07:42:34 +0000 (08:42 +0100)
include/linux/perf_event.h		patch \| blob \| history
kernel/events/core.c		patch \| blob \| history