perf: Extend PERF_TYPE_HARDWARE and PERF_TYPE_HW_CACHE
authorKan Liang <kan.liang@linux.intel.com>
Mon, 12 Apr 2021 14:31:01 +0000 (07:31 -0700)
committerPeter Zijlstra <peterz@infradead.org>
Mon, 19 Apr 2021 18:03:29 +0000 (20:03 +0200)
Current Hardware events and Hardware cache events have special perf
types, PERF_TYPE_HARDWARE and PERF_TYPE_HW_CACHE. The two types don't
pass the PMU type in the user interface. For a hybrid system, the perf
subsystem doesn't know which PMU the events belong to. The first capable
PMU will always be assigned to the events. The events never get a chance
to run on the other capable PMUs.

Extend the two types to become PMU aware types. The PMU type ID is
stored at attr.config[63:32].

Add a new PMU capability, PERF_PMU_CAP_EXTENDED_HW_TYPE, to indicate a
PMU which supports the extended PERF_TYPE_HARDWARE and
PERF_TYPE_HW_CACHE.

The PMU type is only required when searching a specific PMU. The PMU
specific codes will only be interested in the 'real' config value, which
is stored in the low 32 bit of the event->attr.config. Update the
event->attr.config in the generic code, so the PMU specific codes don't
need to calculate it separately.

If a user specifies a PMU type, but the PMU doesn't support the extended
type, error out.

If an event cannot be initialized in a PMU specified by a user, error
out immediately. Perf should not try to open it on other PMUs.

The new PMU capability is only set for the X86 hybrid PMUs for now.
Other architectures, e.g., ARM, may need it as well. The support on ARM
may be implemented later separately.

Suggested-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/1618237865-33448-22-git-send-email-kan.liang@linux.intel.com
arch/x86/events/core.c
include/linux/perf_event.h
include/uapi/linux/perf_event.h
kernel/events/core.c

index 4f6595e..3fe66b7 100644 (file)
@@ -2173,6 +2173,7 @@ static int __init init_hw_perf_events(void)
                        hybrid_pmu->pmu.type = -1;
                        hybrid_pmu->pmu.attr_update = x86_pmu.attr_update;
                        hybrid_pmu->pmu.capabilities |= PERF_PMU_CAP_HETEROGENEOUS_CPUS;
+                       hybrid_pmu->pmu.capabilities |= PERF_PMU_CAP_EXTENDED_HW_TYPE;
 
                        err = perf_pmu_register(&hybrid_pmu->pmu, hybrid_pmu->name,
                                                (hybrid_pmu->cpu_type == hybrid_big) ? PERF_TYPE_RAW : -1);
index 61b3851..a763928 100644 (file)
@@ -260,15 +260,16 @@ struct perf_event;
 /**
  * pmu::capabilities flags
  */
-#define PERF_PMU_CAP_NO_INTERRUPT              0x01
-#define PERF_PMU_CAP_NO_NMI                    0x02
-#define PERF_PMU_CAP_AUX_NO_SG                 0x04
-#define PERF_PMU_CAP_EXTENDED_REGS             0x08
-#define PERF_PMU_CAP_EXCLUSIVE                 0x10
-#define PERF_PMU_CAP_ITRACE                    0x20
-#define PERF_PMU_CAP_HETEROGENEOUS_CPUS                0x40
-#define PERF_PMU_CAP_NO_EXCLUDE                        0x80
-#define PERF_PMU_CAP_AUX_OUTPUT                        0x100
+#define PERF_PMU_CAP_NO_INTERRUPT              0x0001
+#define PERF_PMU_CAP_NO_NMI                    0x0002
+#define PERF_PMU_CAP_AUX_NO_SG                 0x0004
+#define PERF_PMU_CAP_EXTENDED_REGS             0x0008
+#define PERF_PMU_CAP_EXCLUSIVE                 0x0010
+#define PERF_PMU_CAP_ITRACE                    0x0020
+#define PERF_PMU_CAP_HETEROGENEOUS_CPUS                0x0040
+#define PERF_PMU_CAP_NO_EXCLUDE                        0x0080
+#define PERF_PMU_CAP_AUX_OUTPUT                        0x0100
+#define PERF_PMU_CAP_EXTENDED_HW_TYPE          0x0200
 
 struct perf_output_handle;
 
index 0b58970..e54e639 100644 (file)
@@ -38,6 +38,21 @@ enum perf_type_id {
 };
 
 /*
+ * attr.config layout for type PERF_TYPE_HARDWARE and PERF_TYPE_HW_CACHE
+ * PERF_TYPE_HARDWARE:                 0xEEEEEEEE000000AA
+ *                                     AA: hardware event ID
+ *                                     EEEEEEEE: PMU type ID
+ * PERF_TYPE_HW_CACHE:                 0xEEEEEEEE00DDCCBB
+ *                                     BB: hardware cache ID
+ *                                     CC: hardware cache op ID
+ *                                     DD: hardware cache op result ID
+ *                                     EEEEEEEE: PMU type ID
+ * If the PMU type ID is 0, the PERF_TYPE_RAW will be applied.
+ */
+#define PERF_PMU_TYPE_SHIFT            32
+#define PERF_HW_EVENT_MASK             0xffffffff
+
+/*
  * Generalized performance event event_id types, used by the
  * attr.event_id parameter of the sys_perf_event_open()
  * syscall:
index 6f0723c..928b166 100644 (file)
@@ -11220,6 +11220,7 @@ static int perf_try_init_event(struct pmu *pmu, struct perf_event *event)
 
 static struct pmu *perf_init_event(struct perf_event *event)
 {
+       bool extended_type = false;
        int idx, type, ret;
        struct pmu *pmu;
 
@@ -11238,16 +11239,27 @@ static struct pmu *perf_init_event(struct perf_event *event)
         * are often aliases for PERF_TYPE_RAW.
         */
        type = event->attr.type;
-       if (type == PERF_TYPE_HARDWARE || type == PERF_TYPE_HW_CACHE)
-               type = PERF_TYPE_RAW;
+       if (type == PERF_TYPE_HARDWARE || type == PERF_TYPE_HW_CACHE) {
+               type = event->attr.config >> PERF_PMU_TYPE_SHIFT;
+               if (!type) {
+                       type = PERF_TYPE_RAW;
+               } else {
+                       extended_type = true;
+                       event->attr.config &= PERF_HW_EVENT_MASK;
+               }
+       }
 
 again:
        rcu_read_lock();
        pmu = idr_find(&pmu_idr, type);
        rcu_read_unlock();
        if (pmu) {
+               if (event->attr.type != type && type != PERF_TYPE_RAW &&
+                   !(pmu->capabilities & PERF_PMU_CAP_EXTENDED_HW_TYPE))
+                       goto fail;
+
                ret = perf_try_init_event(pmu, event);
-               if (ret == -ENOENT && event->attr.type != type) {
+               if (ret == -ENOENT && event->attr.type != type && !extended_type) {
                        type = event->attr.type;
                        goto again;
                }
@@ -11268,6 +11280,7 @@ again:
                        goto unlock;
                }
        }
+fail:
        pmu = ERR_PTR(-ENOENT);
 unlock:
        srcu_read_unlock(&pmus_srcu, idx);