perf/x86/intel: Fix PEBS memory access info encoding for ADL

author Kan Liang <kan.liang@linux.intel.com>

Wed, 29 Jun 2022 15:08:39 +0000 (08:08 -0700)

committer Peter Zijlstra <peterz@infradead.org>

Mon, 4 Jul 2022 07:23:09 +0000 (09:23 +0200)
author Kan Liang <kan.liang@linux.intel.com>
Wed, 29 Jun 2022 15:08:39 +0000 (08:08 -0700)
committer Peter Zijlstra <peterz@infradead.org>
Mon, 4 Jul 2022 07:23:09 +0000 (09:23 +0200)
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c

index 45024ab..07d4a5f 100644 (file)
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -4141,6 +4141,8 @@ tnt_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
  {
         struct event_constraint *c;
  
+       c = intel_get_event_constraints(cpuc, idx, event);
+
         /*
          * :ppp means to do reduced skid PEBS,
          * which is available on PMC0 and fixed counter 0.
@@ -4153,8 +4155,6 @@ tnt_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
                 return &counter0_constraint;
         }
  
-       c = intel_get_event_constraints(cpuc, idx, event);
-
         return c;
  }
  
@@ -6242,6 +6242,7 @@ __init int intel_pmu_init(void)
                 x86_pmu.flags |= PMU_FL_MEM_LOADS_AUX;
                 x86_pmu.lbr_pt_coexist = true;
                 intel_pmu_pebs_data_source_skl(false);
+               x86_pmu.pebs_latency_data = adl_latency_data_small;
                 x86_pmu.num_topdown_events = 8;
                 x86_pmu.update_topdown_event = adl_update_topdown_event;
                 x86_pmu.set_topdown_event_period = adl_set_topdown_event_period;
diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c

index 376cc3d..de84385 100644 (file)
--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -171,6 +171,49 @@ static u64 precise_datala_hsw(struct perf_event *event, u64 status)
         return dse.val;
  }
  
+static inline void pebs_set_tlb_lock(u64 *val, bool tlb, bool lock)
+{
+       /*
+        * TLB access
+        * 0 = did not miss 2nd level TLB
+        * 1 = missed 2nd level TLB
+        */
+       if (tlb)
+               *val |= P(TLB, MISS) | P(TLB, L2);
+       else
+               *val |= P(TLB, HIT) | P(TLB, L1) | P(TLB, L2);
+
+       /* locked prefix */
+       if (lock)
+               *val |= P(LOCK, LOCKED);
+}
+
+/* Retrieve the latency data for e-core of ADL */
+u64 adl_latency_data_small(struct perf_event *event, u64 status)
+{
+       union intel_x86_pebs_dse dse;
+       u64 val;
+
+       WARN_ON_ONCE(hybrid_pmu(event->pmu)->cpu_type == hybrid_big);
+
+       dse.val = status;
+
+       val = pebs_data_source[dse.ld_dse];
+
+       /*
+        * For the atom core on ADL,
+        * bit 4: lock, bit 5: TLB access.
+        */
+       pebs_set_tlb_lock(&val, dse.ld_locked, dse.ld_stlb_miss);
+
+       if (dse.ld_data_blk)
+               val |= P(BLK, DATA);
+       else
+               val |= P(BLK, NA);
+
+       return val;
+}
+
  static u64 load_latency_data(u64 status)
  {
         union intel_x86_pebs_dse dse;
@@ -190,21 +233,8 @@ static u64 load_latency_data(u64 status)
                 val |= P(TLB, NA) | P(LOCK, NA);
                 return val;
         }
-       /*
-        * bit 4: TLB access
-        * 0 = did not miss 2nd level TLB
-        * 1 = missed 2nd level TLB
-        */
-       if (dse.ld_stlb_miss)
-               val |= P(TLB, MISS) | P(TLB, L2);
-       else
-               val |= P(TLB, HIT) | P(TLB, L1) | P(TLB, L2);
  
-       /*
-        * bit 5: locked prefix
-        */
-       if (dse.ld_locked)
-               val |= P(LOCK, LOCKED);
+       pebs_set_tlb_lock(&val, dse.ld_stlb_miss, dse.ld_locked);
  
         /*
          * Ice Lake and earlier models do not support block infos.
@@ -245,21 +275,7 @@ static u64 store_latency_data(u64 status)
          */
         val = pebs_data_source[dse.st_lat_dse];
  
-       /*
-        * bit 4: TLB access
-        * 0 = did not miss 2nd level TLB
-        * 1 = missed 2nd level TLB
-        */
-       if (dse.st_lat_stlb_miss)
-               val |= P(TLB, MISS) | P(TLB, L2);
-       else
-               val |= P(TLB, HIT) | P(TLB, L1) | P(TLB, L2);
-
-       /*
-        * bit 5: locked prefix
-        */
-       if (dse.st_lat_locked)
-               val |= P(LOCK, LOCKED);
+       pebs_set_tlb_lock(&val, dse.st_lat_stlb_miss, dse.st_lat_locked);
  
         val |= P(BLK, NA);
  
@@ -781,8 +797,8 @@ struct event_constraint intel_glm_pebs_event_constraints[] = {
  
  struct event_constraint intel_grt_pebs_event_constraints[] = {
         /* Allow all events as PEBS with no flags */
-       INTEL_PLD_CONSTRAINT(0x5d0, 0xf),
-       INTEL_PSD_CONSTRAINT(0x6d0, 0xf),
+       INTEL_HYBRID_LAT_CONSTRAINT(0x5d0, 0xf),
+       INTEL_HYBRID_LAT_CONSTRAINT(0x6d0, 0xf),
         EVENT_CONSTRAINT_END
  };
  
@@ -1446,6 +1462,8 @@ static u64 get_data_src(struct perf_event *event, u64 aux)
                 val = load_latency_data(aux);
         else if (fl & PERF_X86_EVENT_PEBS_STLAT)
                 val = store_latency_data(aux);
+       else if (fl & PERF_X86_EVENT_PEBS_LAT_HYBRID)
+               val = x86_pmu.pebs_latency_data(event, aux);
         else if (fst && (fl & PERF_X86_EVENT_PEBS_HSW_PREC))
                 val = precise_datala_hsw(event, aux);
         else if (fst)
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h

index 1ca6200..2d11445 100644 (file)
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -84,6 +84,7 @@ static inline bool constraint_match(struct event_constraint *c, u64 ecode)
  #define PERF_X86_EVENT_TOPDOWN         0x04000 /* Count Topdown slots/metrics events */
  #define PERF_X86_EVENT_PEBS_STLAT      0x08000 /* st+stlat data address sampling */
  #define PERF_X86_EVENT_AMD_BRS         0x10000 /* AMD Branch Sampling */
+#define PERF_X86_EVENT_PEBS_LAT_HYBRID 0x20000 /* ld and st lat for hybrid */
  
  static inline bool is_topdown_count(struct perf_event *event)
  {
@@ -461,6 +462,10 @@ struct cpu_hw_events {
         __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \
                           HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_ST)
  
+#define INTEL_HYBRID_LAT_CONSTRAINT(c, n)      \
+       __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \
+                         HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_LAT_HYBRID)
+
  /* Event constraint, but match on all event flags too. */
  #define INTEL_FLAGS_EVENT_CONSTRAINT(c, n) \
         EVENT_CONSTRAINT(c, n, ARCH_PERFMON_EVENTSEL_EVENT|X86_ALL_EVENT_FLAGS)
@@ -826,6 +831,7 @@ struct x86_pmu {
         void            (*drain_pebs)(struct pt_regs *regs, struct perf_sample_data *data);
         struct event_constraint *pebs_constraints;
         void            (*pebs_aliases)(struct perf_event *event);
+       u64             (*pebs_latency_data)(struct perf_event *event, u64 status);
         unsigned long   large_pebs_flags;
         u64             rtm_abort_event;
  
@@ -1393,6 +1399,8 @@ void intel_pmu_disable_bts(void);
  
  int intel_pmu_drain_bts_buffer(void);
  
+u64 adl_latency_data_small(struct perf_event *event, u64 status);
+
  extern struct event_constraint intel_core2_pebs_event_constraints[];
  
  extern struct event_constraint intel_atom_pebs_event_constraints[];
author	Kan Liang <kan.liang@linux.intel.com>
	Wed, 29 Jun 2022 15:08:39 +0000 (08:08 -0700)
committer	Peter Zijlstra <peterz@infradead.org>
	Mon, 4 Jul 2022 07:23:09 +0000 (09:23 +0200)
arch/x86/events/intel/core.c		patch \| blob \| history
arch/x86/events/intel/ds.c		patch \| blob \| history
arch/x86/events/perf_event.h		patch \| blob \| history