perf/x86/intel: Hybrid PMU support for perf capabilities
authorKan Liang <kan.liang@linux.intel.com>
Mon, 12 Apr 2021 14:30:44 +0000 (07:30 -0700)
committerPeter Zijlstra <peterz@infradead.org>
Mon, 19 Apr 2021 18:03:24 +0000 (20:03 +0200)
Some platforms, e.g. Alder Lake, have hybrid architecture. Although most
PMU capabilities are the same, there are still some unique PMU
capabilities for different hybrid PMUs. Perf should register a dedicated
pmu for each hybrid PMU.

Add a new struct x86_hybrid_pmu, which saves the dedicated pmu and
capabilities for each hybrid PMU.

The architecture MSR, MSR_IA32_PERF_CAPABILITIES, only indicates the
architecture features which are available on all hybrid PMUs. The
architecture features are stored in the global x86_pmu.intel_cap.

For Alder Lake, the model-specific features are perf metrics and
PEBS-via-PT. The corresponding bits of the global x86_pmu.intel_cap
should be 0 for these two features. Perf should not use the global
intel_cap to check the features on a hybrid system.
Add a dedicated intel_cap in the x86_hybrid_pmu to store the
model-specific capabilities. Use the dedicated intel_cap to replace
the global intel_cap for thse two features. The dedicated intel_cap
will be set in the following "Add Alder Lake Hybrid support" patch.

Add is_hybrid() to distinguish a hybrid system. ADL may have an
alternative configuration. With that configuration, the
X86_FEATURE_HYBRID_CPU is not set. Perf cannot rely on the feature bit.
Add a new static_key_false, perf_is_hybrid, to indicate a hybrid system.
It will be assigned in the following "Add Alder Lake Hybrid support"
patch as well.

Suggested-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/1618237865-33448-5-git-send-email-kan.liang@linux.intel.com
arch/x86/events/core.c
arch/x86/events/intel/core.c
arch/x86/events/intel/ds.c
arch/x86/events/perf_event.h
arch/x86/include/asm/msr-index.h

index a49a8bd..7fc2001 100644 (file)
@@ -54,6 +54,7 @@ DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = {
 
 DEFINE_STATIC_KEY_FALSE(rdpmc_never_available_key);
 DEFINE_STATIC_KEY_FALSE(rdpmc_always_available_key);
+DEFINE_STATIC_KEY_FALSE(perf_is_hybrid);
 
 /*
  * This here uses DEFINE_STATIC_CALL_NULL() to get a static_call defined
@@ -1105,8 +1106,9 @@ static void del_nr_metric_event(struct cpu_hw_events *cpuc,
 static int collect_event(struct cpu_hw_events *cpuc, struct perf_event *event,
                         int max_count, int n)
 {
+       union perf_capabilities intel_cap = hybrid(cpuc->pmu, intel_cap);
 
-       if (x86_pmu.intel_cap.perf_metrics && add_nr_metric_event(cpuc, event))
+       if (intel_cap.perf_metrics && add_nr_metric_event(cpuc, event))
                return -EINVAL;
 
        if (n >= max_count + cpuc->n_metric)
@@ -1581,6 +1583,7 @@ void x86_pmu_stop(struct perf_event *event, int flags)
 static void x86_pmu_del(struct perf_event *event, int flags)
 {
        struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+       union perf_capabilities intel_cap = hybrid(cpuc->pmu, intel_cap);
        int i;
 
        /*
@@ -1620,7 +1623,7 @@ static void x86_pmu_del(struct perf_event *event, int flags)
        }
        cpuc->event_constraint[i-1] = NULL;
        --cpuc->n_events;
-       if (x86_pmu.intel_cap.perf_metrics)
+       if (intel_cap.perf_metrics)
                del_nr_metric_event(cpuc, event);
 
        perf_event_update_userpage(event);
index f116c63..dc9e2fb 100644 (file)
@@ -3646,6 +3646,12 @@ static inline bool is_mem_loads_aux_event(struct perf_event *event)
        return (event->attr.config & INTEL_ARCH_EVENT_MASK) == X86_CONFIG(.event=0x03, .umask=0x82);
 }
 
+static inline bool intel_pmu_has_cap(struct perf_event *event, int idx)
+{
+       union perf_capabilities *intel_cap = &hybrid(event->pmu, intel_cap);
+
+       return test_bit(idx, (unsigned long *)&intel_cap->capabilities);
+}
 
 static int intel_pmu_hw_config(struct perf_event *event)
 {
@@ -3712,7 +3718,7 @@ static int intel_pmu_hw_config(struct perf_event *event)
         * with a slots event as group leader. When the slots event
         * is used in a metrics group, it too cannot support sampling.
         */
-       if (x86_pmu.intel_cap.perf_metrics && is_topdown_event(event)) {
+       if (intel_pmu_has_cap(event, PERF_CAP_METRICS_IDX) && is_topdown_event(event)) {
                if (event->attr.config1 || event->attr.config2)
                        return -EINVAL;
 
@@ -4219,8 +4225,16 @@ static void intel_pmu_cpu_starting(int cpu)
        if (x86_pmu.version > 1)
                flip_smm_bit(&x86_pmu.attr_freeze_on_smi);
 
-       /* Disable perf metrics if any added CPU doesn't support it. */
-       if (x86_pmu.intel_cap.perf_metrics) {
+       /*
+        * Disable perf metrics if any added CPU doesn't support it.
+        *
+        * Turn off the check for a hybrid architecture, because the
+        * architecture MSR, MSR_IA32_PERF_CAPABILITIES, only indicate
+        * the architecture features. The perf metrics is a model-specific
+        * feature for now. The corresponding bit should always be 0 on
+        * a hybrid platform, e.g., Alder Lake.
+        */
+       if (!is_hybrid() && x86_pmu.intel_cap.perf_metrics) {
                union perf_capabilities perf_cap;
 
                rdmsrl(MSR_IA32_PERF_CAPABILITIES, perf_cap.capabilities);
@@ -5770,7 +5784,7 @@ __init int intel_pmu_init(void)
                pr_cont("full-width counters, ");
        }
 
-       if (x86_pmu.intel_cap.perf_metrics)
+       if (!is_hybrid() && x86_pmu.intel_cap.perf_metrics)
                x86_pmu.intel_ctrl |= 1ULL << GLOBAL_CTRL_EN_PERF_METRICS;
 
        return 0;
index 1bfea8c..9328aa1 100644 (file)
@@ -2205,7 +2205,7 @@ void __init intel_ds_init(void)
                        }
                        pr_cont("PEBS fmt4%c%s, ", pebs_type, pebs_qual);
 
-                       if (x86_pmu.intel_cap.pebs_output_pt_available) {
+                       if (!is_hybrid() && x86_pmu.intel_cap.pebs_output_pt_available) {
                                pr_cont("PEBS-via-PT, ");
                                x86_get_pmu(smp_processor_id())->capabilities |= PERF_PMU_CAP_AUX_OUTPUT;
                        }
index da947d3..85910e2 100644 (file)
@@ -631,6 +631,29 @@ enum {
        x86_lbr_exclusive_max,
 };
 
+struct x86_hybrid_pmu {
+       struct pmu                      pmu;
+       union perf_capabilities         intel_cap;
+};
+
+static __always_inline struct x86_hybrid_pmu *hybrid_pmu(struct pmu *pmu)
+{
+       return container_of(pmu, struct x86_hybrid_pmu, pmu);
+}
+
+extern struct static_key_false perf_is_hybrid;
+#define is_hybrid()            static_branch_unlikely(&perf_is_hybrid)
+
+#define hybrid(_pmu, _field)                           \
+(*({                                                   \
+       typeof(&x86_pmu._field) __Fp = &x86_pmu._field; \
+                                                       \
+       if (is_hybrid() && (_pmu))                      \
+               __Fp = &hybrid_pmu(_pmu)->_field;       \
+                                                       \
+       __Fp;                                           \
+}))
+
 /*
  * struct x86_pmu - generic x86 pmu
  */
@@ -817,6 +840,16 @@ struct x86_pmu {
        int (*check_period) (struct perf_event *event, u64 period);
 
        int (*aux_output_match) (struct perf_event *event);
+
+       /*
+        * Hybrid support
+        *
+        * Most PMU capabilities are the same among different hybrid PMUs.
+        * The global x86_pmu saves the architecture capabilities, which
+        * are available for all PMUs. The hybrid_pmu only includes the
+        * unique capabilities.
+        */
+       struct x86_hybrid_pmu           *hybrid_pmu;
 };
 
 struct x86_perf_task_context_opt {
index 546d6ec..163f5d2 100644 (file)
 #define MSR_PEBS_DATA_CFG              0x000003f2
 #define MSR_IA32_DS_AREA               0x00000600
 #define MSR_IA32_PERF_CAPABILITIES     0x00000345
+#define PERF_CAP_METRICS_IDX           15
+#define PERF_CAP_PT_IDX                        16
+
 #define MSR_PEBS_LD_LAT_THRESHOLD      0x000003f6
 
 #define MSR_IA32_RTIT_CTL              0x00000570