perf script: Add synthesized Intel PT power and ptwrite events
authorAdrian Hunter <adrian.hunter@intel.com>
Fri, 30 Jun 2017 08:36:42 +0000 (11:36 +0300)
committerArnaldo Carvalho de Melo <acme@redhat.com>
Fri, 30 Jun 2017 14:40:20 +0000 (11:40 -0300)
Add definitions for synthesized Intel PT events for power and ptwrite.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Link: http://lkml.kernel.org/r/1498811802-2301-1-git-send-email-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
tools/perf/builtin-script.c
tools/perf/util/event.h

index e87b480bbdd00a355fb56a00f8516b45fa6d34b1..b458a0cc3544eef7b62d1eccc5ec3086bfa6a00c 100644 (file)
@@ -1203,10 +1203,122 @@ static void print_sample_bpf_output(struct perf_sample *sample)
                       (char *)(sample->raw_data));
 }
 
-static void print_sample_synth(struct perf_sample *sample __maybe_unused,
+static void print_sample_spacing(int len, int spacing)
+{
+       if (len > 0 && len < spacing)
+               printf("%*s", spacing - len, "");
+}
+
+static void print_sample_pt_spacing(int len)
+{
+       print_sample_spacing(len, 34);
+}
+
+static void print_sample_synth_ptwrite(struct perf_sample *sample)
+{
+       struct perf_synth_intel_ptwrite *data = perf_sample__synth_ptr(sample);
+       int len;
+
+       if (perf_sample__bad_synth_size(sample, *data))
+               return;
+
+       len = printf(" IP: %u payload: %#" PRIx64 " ",
+                    data->ip, le64_to_cpu(data->payload));
+       print_sample_pt_spacing(len);
+}
+
+static void print_sample_synth_mwait(struct perf_sample *sample)
+{
+       struct perf_synth_intel_mwait *data = perf_sample__synth_ptr(sample);
+       int len;
+
+       if (perf_sample__bad_synth_size(sample, *data))
+               return;
+
+       len = printf(" hints: %#x extensions: %#x ",
+                    data->hints, data->extensions);
+       print_sample_pt_spacing(len);
+}
+
+static void print_sample_synth_pwre(struct perf_sample *sample)
+{
+       struct perf_synth_intel_pwre *data = perf_sample__synth_ptr(sample);
+       int len;
+
+       if (perf_sample__bad_synth_size(sample, *data))
+               return;
+
+       len = printf(" hw: %u cstate: %u sub-cstate: %u ",
+                    data->hw, data->cstate, data->subcstate);
+       print_sample_pt_spacing(len);
+}
+
+static void print_sample_synth_exstop(struct perf_sample *sample)
+{
+       struct perf_synth_intel_exstop *data = perf_sample__synth_ptr(sample);
+       int len;
+
+       if (perf_sample__bad_synth_size(sample, *data))
+               return;
+
+       len = printf(" IP: %u ", data->ip);
+       print_sample_pt_spacing(len);
+}
+
+static void print_sample_synth_pwrx(struct perf_sample *sample)
+{
+       struct perf_synth_intel_pwrx *data = perf_sample__synth_ptr(sample);
+       int len;
+
+       if (perf_sample__bad_synth_size(sample, *data))
+               return;
+
+       len = printf(" deepest cstate: %u last cstate: %u wake reason: %#x ",
+                    data->deepest_cstate, data->last_cstate,
+                    data->wake_reason);
+       print_sample_pt_spacing(len);
+}
+
+static void print_sample_synth_cbr(struct perf_sample *sample)
+{
+       struct perf_synth_intel_cbr *data = perf_sample__synth_ptr(sample);
+       unsigned int percent, freq;
+       int len;
+
+       if (perf_sample__bad_synth_size(sample, *data))
+               return;
+
+       freq = (le32_to_cpu(data->freq) + 500) / 1000;
+       len = printf(" cbr: %2u freq: %4u MHz ", data->cbr, freq);
+       if (data->max_nonturbo) {
+               percent = (5 + (1000 * data->cbr) / data->max_nonturbo) / 10;
+               len += printf("(%3u%%) ", percent);
+       }
+       print_sample_pt_spacing(len);
+}
+
+static void print_sample_synth(struct perf_sample *sample,
                               struct perf_evsel *evsel)
 {
        switch (evsel->attr.config) {
+       case PERF_SYNTH_INTEL_PTWRITE:
+               print_sample_synth_ptwrite(sample);
+               break;
+       case PERF_SYNTH_INTEL_MWAIT:
+               print_sample_synth_mwait(sample);
+               break;
+       case PERF_SYNTH_INTEL_PWRE:
+               print_sample_synth_pwre(sample);
+               break;
+       case PERF_SYNTH_INTEL_EXSTOP:
+               print_sample_synth_exstop(sample);
+               break;
+       case PERF_SYNTH_INTEL_PWRX:
+               print_sample_synth_pwrx(sample);
+               break;
+       case PERF_SYNTH_INTEL_CBR:
+               print_sample_synth_cbr(sample);
+               break;
        default:
                break;
        }
index 855733c2adcf4fe0d94b144cb03d7e703439dcbe..9967c87af7a665c6a21d139c27832547ef04c790 100644 (file)
@@ -255,6 +255,124 @@ enum auxtrace_error_type {
 /* Attribute type for custom synthesized events */
 #define PERF_TYPE_SYNTH                (INT_MAX + 1U)
 
+/* Attribute config for custom synthesized events */
+enum perf_synth_id {
+       PERF_SYNTH_INTEL_PTWRITE,
+       PERF_SYNTH_INTEL_MWAIT,
+       PERF_SYNTH_INTEL_PWRE,
+       PERF_SYNTH_INTEL_EXSTOP,
+       PERF_SYNTH_INTEL_PWRX,
+       PERF_SYNTH_INTEL_CBR,
+};
+
+/*
+ * Raw data formats for synthesized events. Note that 4 bytes of padding are
+ * present to match the 'size' member of PERF_SAMPLE_RAW data which is always
+ * 8-byte aligned. That means we must dereference raw_data with an offset of 4.
+ * Refer perf_sample__synth_ptr() and perf_synth__raw_data().  It also means the
+ * structure sizes are 4 bytes bigger than the raw_size, refer
+ * perf_synth__raw_size().
+ */
+
+struct perf_synth_intel_ptwrite {
+       u32 padding;
+       union {
+               struct {
+                       u32     ip              :  1,
+                               reserved        : 31;
+               };
+               u32     flags;
+       };
+       u64     payload;
+};
+
+struct perf_synth_intel_mwait {
+       u32 padding;
+       u32 reserved;
+       union {
+               struct {
+                       u64     hints           :  8,
+                               reserved1       : 24,
+                               extensions      :  2,
+                               reserved2       : 30;
+               };
+               u64     payload;
+       };
+};
+
+struct perf_synth_intel_pwre {
+       u32 padding;
+       u32 reserved;
+       union {
+               struct {
+                       u64     reserved1       :  7,
+                               hw              :  1,
+                               subcstate       :  4,
+                               cstate          :  4,
+                               reserved2       : 48;
+               };
+               u64     payload;
+       };
+};
+
+struct perf_synth_intel_exstop {
+       u32 padding;
+       union {
+               struct {
+                       u32     ip              :  1,
+                               reserved        : 31;
+               };
+               u32     flags;
+       };
+};
+
+struct perf_synth_intel_pwrx {
+       u32 padding;
+       u32 reserved;
+       union {
+               struct {
+                       u64     deepest_cstate  :  4,
+                               last_cstate     :  4,
+                               wake_reason     :  4,
+                               reserved1       : 52;
+               };
+               u64     payload;
+       };
+};
+
+struct perf_synth_intel_cbr {
+       u32 padding;
+       union {
+               struct {
+                       u32     cbr             :  8,
+                               reserved1       :  8,
+                               max_nonturbo    :  8,
+                               reserved2       :  8;
+               };
+               u32     flags;
+       };
+       u32 freq;
+       u32 reserved3;
+};
+
+/*
+ * raw_data is always 4 bytes from an 8-byte boundary, so subtract 4 to get
+ * 8-byte alignment.
+ */
+static inline void *perf_sample__synth_ptr(struct perf_sample *sample)
+{
+       return sample->raw_data - 4;
+}
+
+static inline void *perf_synth__raw_data(void *p)
+{
+       return p + 4;
+}
+
+#define perf_synth__raw_size(d) (sizeof(d) - 4)
+
+#define perf_sample__bad_synth_size(s, d) ((s)->raw_size < sizeof(d) - 4)
+
 /*
  * The kernel collects the number of events it couldn't send in a stretch and
  * when possible sends this number in a PERF_RECORD_LOST event. The number of