perf/x86/intel: Implement cross-HT corruption bug workaround

author Maria Dimakopoulou <maria.n.dimakopoulou@gmail.com>

Mon, 17 Nov 2014 19:06:58 +0000 (20:06 +0100)

committer Ingo Molnar <mingo@kernel.org>

Thu, 2 Apr 2015 15:33:12 +0000 (17:33 +0200)
author Maria Dimakopoulou <maria.n.dimakopoulou@gmail.com>
Mon, 17 Nov 2014 19:06:58 +0000 (20:06 +0100)
committer Ingo Molnar <mingo@kernel.org>
Thu, 2 Apr 2015 15:33:12 +0000 (17:33 +0200)
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c

index 7175540..b8b7a12 100644 (file)
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -779,7 +779,7 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
         struct event_constraint *c;
         unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
         struct perf_event *e;
-       int i, wmin, wmax, num = 0;
+       int i, wmin, wmax, unsched = 0;
         struct hw_perf_event *hwc;
  
         bitmap_zero(used_mask, X86_PMC_IDX_MAX);
@@ -822,14 +822,20 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
  
         /* slow path */
         if (i != n)
-               num = perf_assign_events(cpuc->event_list, n, wmin,
-                                        wmax, assign);
+               unsched = perf_assign_events(cpuc->event_list, n, wmin,
+                                            wmax, assign);
  
         /*
-        * Mark the event as committed, so we do not put_constraint()
-        * in case new events are added and fail scheduling.
+        * In case of success (unsched = 0), mark events as committed,
+        * so we do not put_constraint() in case new events are added
+        * and fail to be scheduled
+        *
+        * We invoke the lower level commit callback to lock the resource
+        *
+        * We do not need to do all of this in case we are called to
+        * validate an event group (assign == NULL)
          */
-       if (!num && assign) {
+       if (!unsched && assign) {
                 for (i = 0; i < n; i++) {
                         e = cpuc->event_list[i];
                         e->hw.flags |= PERF_X86_EVENT_COMMITTED;
@@ -837,11 +843,9 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
                                 x86_pmu.commit_scheduling(cpuc, e, assign[i]);
                 }
         }
-       /*
-        * scheduling failed or is just a simulation,
-        * free resources if necessary
-        */
-       if (!assign || num) {
+
+       if (!assign || unsched) {
+
                 for (i = 0; i < n; i++) {
                         e = cpuc->event_list[i];
                         /*
@@ -851,6 +855,9 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
                         if ((e->hw.flags & PERF_X86_EVENT_COMMITTED))
                                 continue;
  
+                       /*
+                        * release events that failed scheduling
+                        */
                         if (x86_pmu.put_event_constraints)
                                 x86_pmu.put_event_constraints(cpuc, e);
                 }
@@ -859,7 +866,7 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
         if (x86_pmu.stop_scheduling)
                 x86_pmu.stop_scheduling(cpuc);
  
-       return num ? -EINVAL : 0;
+       return unsched ? -EINVAL : 0;
  }
  
  /*
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h

index f31f90e..236afee 100644 (file)
--- a/arch/x86/kernel/cpu/perf_event.h
+++ b/arch/x86/kernel/cpu/perf_event.h
@@ -72,6 +72,7 @@ struct event_constraint {
  #define PERF_X86_EVENT_PEBS_LD_HSW     0x10 /* haswell style datala, load */
  #define PERF_X86_EVENT_PEBS_NA_HSW     0x20 /* haswell style datala, unknown */
  #define PERF_X86_EVENT_EXCL            0x40 /* HT exclusivity on counter */
+#define PERF_X86_EVENT_DYNAMIC         0x80 /* dynamic alloc'd constraint */
  #define PERF_X86_EVENT_RDPMC_ALLOWED   0x40 /* grant rdpmc permission */
  
  
@@ -133,6 +134,7 @@ enum intel_excl_state_type {
  struct intel_excl_states {
         enum intel_excl_state_type init_state[X86_PMC_IDX_MAX];
         enum intel_excl_state_type state[X86_PMC_IDX_MAX];
+       bool sched_started; /* true if scheduling has started */
  };
  
  struct intel_excl_cntrs {
@@ -296,6 +298,10 @@ struct cpu_hw_events {
  #define INTEL_FLAGS_UEVENT_CONSTRAINT(c, n)    \
         EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS)
  
+#define INTEL_EXCLUEVT_CONSTRAINT(c, n)        \
+       __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK, \
+                          HWEIGHT(n), 0, PERF_X86_EVENT_EXCL)
+
  #define INTEL_PLD_CONSTRAINT(c, n)     \
         __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \
                            HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_LDLAT)
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c

index 7f54000..91cc774 100644 (file)
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -1845,7 +1845,7 @@ x86_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
  }
  
  static struct event_constraint *
-intel_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
+__intel_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
                             struct perf_event *event)
  {
         struct event_constraint *c;
@@ -1866,6 +1866,254 @@ intel_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
  }
  
  static void
+intel_start_scheduling(struct cpu_hw_events *cpuc)
+{
+       struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs;
+       struct intel_excl_states *xl, *xlo;
+       int tid = cpuc->excl_thread_id;
+       int o_tid = 1 - tid; /* sibling thread */
+
+       /*
+        * nothing needed if in group validation mode
+        */
+       if (cpuc->is_fake)
+               return;
+       /*
+        * no exclusion needed
+        */
+       if (!excl_cntrs)
+               return;
+
+       xlo = &excl_cntrs->states[o_tid];
+       xl = &excl_cntrs->states[tid];
+
+       xl->sched_started = true;
+
+       /*
+        * lock shared state until we are done scheduling
+        * in stop_event_scheduling()
+        * makes scheduling appear as a transaction
+        */
+       WARN_ON_ONCE(!irqs_disabled());
+       raw_spin_lock(&excl_cntrs->lock);
+
+       /*
+        * save initial state of sibling thread
+        */
+       memcpy(xlo->init_state, xlo->state, sizeof(xlo->init_state));
+}
+
+static void
+intel_stop_scheduling(struct cpu_hw_events *cpuc)
+{
+       struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs;
+       struct intel_excl_states *xl, *xlo;
+       int tid = cpuc->excl_thread_id;
+       int o_tid = 1 - tid; /* sibling thread */
+
+       /*
+        * nothing needed if in group validation mode
+        */
+       if (cpuc->is_fake)
+               return;
+       /*
+        * no exclusion needed
+        */
+       if (!excl_cntrs)
+               return;
+
+       xlo = &excl_cntrs->states[o_tid];
+       xl = &excl_cntrs->states[tid];
+
+       /*
+        * make new sibling thread state visible
+        */
+       memcpy(xlo->state, xlo->init_state, sizeof(xlo->state));
+
+       xl->sched_started = false;
+       /*
+        * release shared state lock (acquired in intel_start_scheduling())
+        */
+       raw_spin_unlock(&excl_cntrs->lock);
+}
+
+static struct event_constraint *
+intel_get_excl_constraints(struct cpu_hw_events *cpuc, struct perf_event *event,
+                          int idx, struct event_constraint *c)
+{
+       struct event_constraint *cx;
+       struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs;
+       struct intel_excl_states *xl, *xlo;
+       int is_excl, i;
+       int tid = cpuc->excl_thread_id;
+       int o_tid = 1 - tid; /* alternate */
+
+       /*
+        * validating a group does not require
+        * enforcing cross-thread  exclusion
+        */
+       if (cpuc->is_fake)
+               return c;
+
+       /*
+        * event requires exclusive counter access
+        * across HT threads
+        */
+       is_excl = c->flags & PERF_X86_EVENT_EXCL;
+
+       /*
+        * xl = state of current HT
+        * xlo = state of sibling HT
+        */
+       xl = &excl_cntrs->states[tid];
+       xlo = &excl_cntrs->states[o_tid];
+
+       cx = c;
+
+       /*
+        * because we modify the constraint, we need
+        * to make a copy. Static constraints come
+        * from static const tables.
+        *
+        * only needed when constraint has not yet
+        * been cloned (marked dynamic)
+        */
+       if (!(c->flags & PERF_X86_EVENT_DYNAMIC)) {
+
+               /* sanity check */
+               if (idx < 0)
+                       return &emptyconstraint;
+
+               /*
+                * grab pre-allocated constraint entry
+                */
+               cx = &cpuc->constraint_list[idx];
+
+               /*
+                * initialize dynamic constraint
+                * with static constraint
+                */
+               memcpy(cx, c, sizeof(*cx));
+
+               /*
+                * mark constraint as dynamic, so we
+                * can free it later on
+                */
+               cx->flags |= PERF_X86_EVENT_DYNAMIC;
+       }
+
+       /*
+        * From here on, the constraint is dynamic.
+        * Either it was just allocated above, or it
+        * was allocated during a earlier invocation
+        * of this function
+        */
+
+       /*
+        * Modify static constraint with current dynamic
+        * state of thread
+        *
+        * EXCLUSIVE: sibling counter measuring exclusive event
+        * SHARED   : sibling counter measuring non-exclusive event
+        * UNUSED   : sibling counter unused
+        */
+       for_each_set_bit(i, cx->idxmsk, X86_PMC_IDX_MAX) {
+               /*
+                * exclusive event in sibling counter
+                * our corresponding counter cannot be used
+                * regardless of our event
+                */
+               if (xl->state[i] == INTEL_EXCL_EXCLUSIVE)
+                       __clear_bit(i, cx->idxmsk);
+               /*
+                * if measuring an exclusive event, sibling
+                * measuring non-exclusive, then counter cannot
+                * be used
+                */
+               if (is_excl && xl->state[i] == INTEL_EXCL_SHARED)
+                       __clear_bit(i, cx->idxmsk);
+       }
+
+       /*
+        * recompute actual bit weight for scheduling algorithm
+        */
+       cx->weight = hweight64(cx->idxmsk64);
+
+       /*
+        * if we return an empty mask, then switch
+        * back to static empty constraint to avoid
+        * the cost of freeing later on
+        */
+       if (cx->weight == 0)
+               cx = &emptyconstraint;
+
+       return cx;
+}
+
+static struct event_constraint *
+intel_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
+                           struct perf_event *event)
+{
+       struct event_constraint *c = event->hw.constraint;
+
+       /*
+        * first time only
+        * - static constraint: no change across incremental scheduling calls
+        * - dynamic constraint: handled by intel_get_excl_constraints()
+        */
+       if (!c)
+               c = __intel_get_event_constraints(cpuc, idx, event);
+
+       if (cpuc->excl_cntrs)
+               return intel_get_excl_constraints(cpuc, event, idx, c);
+
+       return c;
+}
+
+static void intel_put_excl_constraints(struct cpu_hw_events *cpuc,
+               struct perf_event *event)
+{
+       struct hw_perf_event *hwc = &event->hw;
+       struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs;
+       struct intel_excl_states *xlo, *xl;
+       unsigned long flags = 0; /* keep compiler happy */
+       int tid = cpuc->excl_thread_id;
+       int o_tid = 1 - tid;
+
+       /*
+        * nothing needed if in group validation mode
+        */
+       if (cpuc->is_fake)
+               return;
+
+       WARN_ON_ONCE(!excl_cntrs);
+
+       if (!excl_cntrs)
+               return;
+
+       xl = &excl_cntrs->states[tid];
+       xlo = &excl_cntrs->states[o_tid];
+
+       /*
+        * put_constraint may be called from x86_schedule_events()
+        * which already has the lock held so here make locking
+        * conditional
+        */
+       if (!xl->sched_started)
+               raw_spin_lock_irqsave(&excl_cntrs->lock, flags);
+
+       /*
+        * if event was actually assigned, then mark the
+        * counter state as unused now
+        */
+       if (hwc->idx >= 0)
+               xlo->state[hwc->idx] = INTEL_EXCL_UNUSED;
+
+       if (!xl->sched_started)
+               raw_spin_unlock_irqrestore(&excl_cntrs->lock, flags);
+}
+
+static void
  intel_put_shared_regs_event_constraints(struct cpu_hw_events *cpuc,
                                         struct perf_event *event)
  {
@@ -1883,7 +2131,57 @@ intel_put_shared_regs_event_constraints(struct cpu_hw_events *cpuc,
  static void intel_put_event_constraints(struct cpu_hw_events *cpuc,
                                         struct perf_event *event)
  {
+       struct event_constraint *c = event->hw.constraint;
+
         intel_put_shared_regs_event_constraints(cpuc, event);
+
+       /*
+        * is PMU has exclusive counter restrictions, then
+        * all events are subject to and must call the
+        * put_excl_constraints() routine
+        */
+       if (c && cpuc->excl_cntrs)
+               intel_put_excl_constraints(cpuc, event);
+
+       /* cleanup dynamic constraint */
+       if (c && (c->flags & PERF_X86_EVENT_DYNAMIC))
+               event->hw.constraint = NULL;
+}
+
+static void intel_commit_scheduling(struct cpu_hw_events *cpuc,
+                                   struct perf_event *event, int cntr)
+{
+       struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs;
+       struct event_constraint *c = event->hw.constraint;
+       struct intel_excl_states *xlo, *xl;
+       int tid = cpuc->excl_thread_id;
+       int o_tid = 1 - tid;
+       int is_excl;
+
+       if (cpuc->is_fake || !c)
+               return;
+
+       is_excl = c->flags & PERF_X86_EVENT_EXCL;
+
+       if (!(c->flags & PERF_X86_EVENT_DYNAMIC))
+               return;
+
+       WARN_ON_ONCE(!excl_cntrs);
+
+       if (!excl_cntrs)
+               return;
+
+       xl = &excl_cntrs->states[tid];
+       xlo = &excl_cntrs->states[o_tid];
+
+       WARN_ON_ONCE(!raw_spin_is_locked(&excl_cntrs->lock));
+
+       if (cntr >= 0) {
+               if (is_excl)
+                       xlo->init_state[cntr] = INTEL_EXCL_EXCLUSIVE;
+               else
+                       xlo->init_state[cntr] = INTEL_EXCL_SHARED;
+       }
  }
  
  static void intel_pebs_aliases_core2(struct perf_event *event)
@@ -2349,6 +2647,13 @@ static void intel_pmu_cpu_dying(int cpu)
                 cpuc->constraint_list = NULL;
         }
  
+       c = cpuc->excl_cntrs;
+       if (c) {
+               if (c->core_id == -1 || --c->refcnt == 0)
+                       kfree(c);
+               cpuc->excl_cntrs = NULL;
+       }
+
         fini_debug_store_on_cpu(cpu);
  }
author	Maria Dimakopoulou <maria.n.dimakopoulou@gmail.com>
	Mon, 17 Nov 2014 19:06:58 +0000 (20:06 +0100)
committer	Ingo Molnar <mingo@kernel.org>
	Thu, 2 Apr 2015 15:33:12 +0000 (17:33 +0200)
arch/x86/kernel/cpu/perf_event.c		patch \| blob \| history
arch/x86/kernel/cpu/perf_event.h		patch \| blob \| history
arch/x86/kernel/cpu/perf_event_intel.c		patch \| blob \| history