From 410136f5dd96b6013fe6d1011b523b1c247e1ccb Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Tue, 12 Nov 2013 17:58:49 +0100 Subject: [PATCH] tools/perf/stat: Add event unit and scale support This patch adds perf stat support for handling event units and scales as exported by the kernel. The kernel can export PMU events actual unit and scaling factor via sysfs: $ ls -1 /sys/devices/power/events/energy-* /sys/devices/power/events/energy-cores /sys/devices/power/events/energy-cores.scale /sys/devices/power/events/energy-cores.unit /sys/devices/power/events/energy-pkg /sys/devices/power/events/energy-pkg.scale /sys/devices/power/events/energy-pkg.unit $ cat /sys/devices/power/events/energy-cores.scale 2.3283064365386962890625e-10 $ cat cat /sys/devices/power/events/energy-cores.unit Joules This patch modifies the pmu event alias code to check for the presence of the .unit and .scale files to load the corresponding values. They are then used by perf stat transparently: # perf stat -a -e power/energy-pkg/,power/energy-cores/,cycles -I 1000 sleep 1000 # time counts unit events 1.000214717 3.07 Joules power/energy-pkg/ [100.00%] 1.000214717 0.53 Joules power/energy-cores/ 1.000214717 12965028 cycles [100.00%] 2.000749289 3.01 Joules power/energy-pkg/ 2.000749289 0.52 Joules power/energy-cores/ 2.000749289 15817043 cycles When the event does not have an explicit unit exported by the kernel, nothing is printed. In csv output mode, there will be an empty field. Special thanks to Jiri for providing the supporting code in the parser to trigger reading of the scale and unit files. Signed-off-by: Stephane Eranian Reviewed-by: Jiri Olsa Reviewed-by: Andi Kleen Signed-off-by: Peter Zijlstra Cc: zheng.z.yan@intel.com Cc: bp@alien8.de Cc: maria.n.dimakopoulou@gmail.com Cc: acme@redhat.com Link: http://lkml.kernel.org/r/1384275531-10892-3-git-send-email-eranian@google.com Signed-off-by: Ingo Molnar --- tools/perf/builtin-stat.c | 114 +++++++++++++++++++++++++--------- tools/perf/util/evsel.c | 2 + tools/perf/util/evsel.h | 3 + tools/perf/util/parse-events.c | 28 ++++++--- tools/perf/util/pmu.c | 138 +++++++++++++++++++++++++++++++++++++++-- tools/perf/util/pmu.h | 3 +- 6 files changed, 244 insertions(+), 44 deletions(-) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index ee0d565..dab98b5 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -138,6 +138,7 @@ static const char *post_cmd = NULL; static bool sync_run = false; static unsigned int interval = 0; static unsigned int initial_delay = 0; +static unsigned int unit_width = 4; /* strlen("unit") */ static bool forever = false; static struct timespec ref_time; static struct cpu_map *aggr_map; @@ -461,17 +462,17 @@ static void print_interval(void) if (num_print_interval == 0 && !csv_output) { switch (aggr_mode) { case AGGR_SOCKET: - fprintf(output, "# time socket cpus counts events\n"); + fprintf(output, "# time socket cpus counts %*s events\n", unit_width, "unit"); break; case AGGR_CORE: - fprintf(output, "# time core cpus counts events\n"); + fprintf(output, "# time core cpus counts %*s events\n", unit_width, "unit"); break; case AGGR_NONE: - fprintf(output, "# time CPU counts events\n"); + fprintf(output, "# time CPU counts %*s events\n", unit_width, "unit"); break; case AGGR_GLOBAL: default: - fprintf(output, "# time counts events\n"); + fprintf(output, "# time counts %*s events\n", unit_width, "unit"); } } @@ -516,6 +517,7 @@ static int __run_perf_stat(int argc, const char **argv) unsigned long long t0, t1; struct perf_evsel *counter; struct timespec ts; + size_t l; int status = 0; const bool forks = (argc > 0); @@ -565,6 +567,10 @@ static int __run_perf_stat(int argc, const char **argv) return -1; } counter->supported = true; + + l = strlen(counter->unit); + if (l > unit_width) + unit_width = l; } if (perf_evlist__apply_filters(evsel_list)) { @@ -704,14 +710,25 @@ static void aggr_printout(struct perf_evsel *evsel, int id, int nr) static void nsec_printout(int cpu, int nr, struct perf_evsel *evsel, double avg) { double msecs = avg / 1e6; - const char *fmt = csv_output ? "%.6f%s%s" : "%18.6f%s%-25s"; + const char *fmt_v, *fmt_n; char name[25]; + fmt_v = csv_output ? "%.6f%s" : "%18.6f%s"; + fmt_n = csv_output ? "%s" : "%-25s"; + aggr_printout(evsel, cpu, nr); scnprintf(name, sizeof(name), "%s%s", perf_evsel__name(evsel), csv_output ? "" : " (msec)"); - fprintf(output, fmt, msecs, csv_sep, name); + + fprintf(output, fmt_v, msecs, csv_sep); + + if (csv_output) + fprintf(output, "%s%s", evsel->unit, csv_sep); + else + fprintf(output, "%-*s%s", unit_width, evsel->unit, csv_sep); + + fprintf(output, fmt_n, name); if (evsel->cgrp) fprintf(output, "%s%s", csv_sep, evsel->cgrp->name); @@ -908,21 +925,31 @@ static void print_ll_cache_misses(int cpu, static void abs_printout(int cpu, int nr, struct perf_evsel *evsel, double avg) { double total, ratio = 0.0, total2; + double sc = evsel->scale; const char *fmt; - if (csv_output) - fmt = "%.0f%s%s"; - else if (big_num) - fmt = "%'18.0f%s%-25s"; - else - fmt = "%18.0f%s%-25s"; + if (csv_output) { + fmt = sc != 1.0 ? "%.2f%s" : "%.0f%s"; + } else { + if (big_num) + fmt = sc != 1.0 ? "%'18.2f%s" : "%'18.0f%s"; + else + fmt = sc != 1.0 ? "%18.2f%s" : "%18.0f%s"; + } aggr_printout(evsel, cpu, nr); if (aggr_mode == AGGR_GLOBAL) cpu = 0; - fprintf(output, fmt, avg, csv_sep, perf_evsel__name(evsel)); + fprintf(output, fmt, avg, csv_sep); + + if (evsel->unit) + fprintf(output, "%-*s%s", + csv_output ? 0 : unit_width, + evsel->unit, csv_sep); + + fprintf(output, "%-*s", csv_output ? 0 : 25, perf_evsel__name(evsel)); if (evsel->cgrp) fprintf(output, "%s%s", csv_sep, evsel->cgrp->name); @@ -941,7 +968,10 @@ static void abs_printout(int cpu, int nr, struct perf_evsel *evsel, double avg) if (total && avg) { ratio = total / avg; - fprintf(output, "\n # %5.2f stalled cycles per insn", ratio); + fprintf(output, "\n"); + if (aggr_mode == AGGR_NONE) + fprintf(output, " "); + fprintf(output, " # %5.2f stalled cycles per insn", ratio); } } else if (perf_evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES) && @@ -1061,6 +1091,7 @@ static void print_aggr(char *prefix) { struct perf_evsel *counter; int cpu, cpu2, s, s2, id, nr; + double uval; u64 ena, run, val; if (!(aggr_map || aggr_get_id)) @@ -1087,11 +1118,17 @@ static void print_aggr(char *prefix) if (run == 0 || ena == 0) { aggr_printout(counter, id, nr); - fprintf(output, "%*s%s%*s", + fprintf(output, "%*s%s", csv_output ? 0 : 18, counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED, - csv_sep, - csv_output ? 0 : -24, + csv_sep); + + fprintf(output, "%-*s%s", + csv_output ? 0 : unit_width, + counter->unit, csv_sep); + + fprintf(output, "%*s", + csv_output ? 0 : -25, perf_evsel__name(counter)); if (counter->cgrp) @@ -1101,11 +1138,12 @@ static void print_aggr(char *prefix) fputc('\n', output); continue; } + uval = val * counter->scale; if (nsec_counter(counter)) - nsec_printout(id, nr, counter, val); + nsec_printout(id, nr, counter, uval); else - abs_printout(id, nr, counter, val); + abs_printout(id, nr, counter, uval); if (!csv_output) { print_noise(counter, 1.0); @@ -1128,16 +1166,21 @@ static void print_counter_aggr(struct perf_evsel *counter, char *prefix) struct perf_stat *ps = counter->priv; double avg = avg_stats(&ps->res_stats[0]); int scaled = counter->counts->scaled; + double uval; if (prefix) fprintf(output, "%s", prefix); if (scaled == -1) { - fprintf(output, "%*s%s%*s", + fprintf(output, "%*s%s", csv_output ? 0 : 18, counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED, - csv_sep, - csv_output ? 0 : -24, + csv_sep); + fprintf(output, "%-*s%s", + csv_output ? 0 : unit_width, + counter->unit, csv_sep); + fprintf(output, "%*s", + csv_output ? 0 : -25, perf_evsel__name(counter)); if (counter->cgrp) @@ -1147,10 +1190,12 @@ static void print_counter_aggr(struct perf_evsel *counter, char *prefix) return; } + uval = avg * counter->scale; + if (nsec_counter(counter)) - nsec_printout(-1, 0, counter, avg); + nsec_printout(-1, 0, counter, uval); else - abs_printout(-1, 0, counter, avg); + abs_printout(-1, 0, counter, uval); print_noise(counter, avg); @@ -1177,6 +1222,7 @@ static void print_counter_aggr(struct perf_evsel *counter, char *prefix) static void print_counter(struct perf_evsel *counter, char *prefix) { u64 ena, run, val; + double uval; int cpu; for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) { @@ -1188,14 +1234,20 @@ static void print_counter(struct perf_evsel *counter, char *prefix) fprintf(output, "%s", prefix); if (run == 0 || ena == 0) { - fprintf(output, "CPU%*d%s%*s%s%*s", + fprintf(output, "CPU%*d%s%*s%s", csv_output ? 0 : -4, perf_evsel__cpus(counter)->map[cpu], csv_sep, csv_output ? 0 : 18, counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED, - csv_sep, - csv_output ? 0 : -24, - perf_evsel__name(counter)); + csv_sep); + + fprintf(output, "%-*s%s", + csv_output ? 0 : unit_width, + counter->unit, csv_sep); + + fprintf(output, "%*s", + csv_output ? 0 : -25, + perf_evsel__name(counter)); if (counter->cgrp) fprintf(output, "%s%s", @@ -1205,10 +1257,12 @@ static void print_counter(struct perf_evsel *counter, char *prefix) continue; } + uval = val * counter->scale; + if (nsec_counter(counter)) - nsec_printout(cpu, 0, counter, val); + nsec_printout(cpu, 0, counter, uval); else - abs_printout(cpu, 0, counter, val); + abs_printout(cpu, 0, counter, uval); if (!csv_output) { print_noise(counter, 1.0); diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 46dd4c2..dad6492 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -162,6 +162,8 @@ void perf_evsel__init(struct perf_evsel *evsel, evsel->idx = idx; evsel->attr = *attr; evsel->leader = evsel; + evsel->unit = ""; + evsel->scale = 1.0; INIT_LIST_HEAD(&evsel->node); hists__init(&evsel->hists); evsel->sample_size = __perf_evsel__sample_size(attr->sample_type); diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index 1ea7c92..8120eeb 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -68,6 +68,8 @@ struct perf_evsel { u32 ids; struct hists hists; char *name; + double scale; + const char *unit; struct event_format *tp_format; union { void *priv; @@ -138,6 +140,7 @@ extern const char *perf_evsel__sw_names[PERF_COUNT_SW_MAX]; int __perf_evsel__hw_cache_type_op_res_name(u8 type, u8 op, u8 result, char *bf, size_t size); const char *perf_evsel__name(struct perf_evsel *evsel); + const char *perf_evsel__group_name(struct perf_evsel *evsel); int perf_evsel__group_desc(struct perf_evsel *evsel, char *buf, size_t size); diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 6de6f89..969cb8f 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -269,9 +269,10 @@ const char *event_type(int type) -static int __add_event(struct list_head *list, int *idx, - struct perf_event_attr *attr, - char *name, struct cpu_map *cpus) +static struct perf_evsel * +__add_event(struct list_head *list, int *idx, + struct perf_event_attr *attr, + char *name, struct cpu_map *cpus) { struct perf_evsel *evsel; @@ -279,19 +280,19 @@ static int __add_event(struct list_head *list, int *idx, evsel = perf_evsel__new_idx(attr, (*idx)++); if (!evsel) - return -ENOMEM; + return NULL; evsel->cpus = cpus; if (name) evsel->name = strdup(name); list_add_tail(&evsel->node, list); - return 0; + return evsel; } static int add_event(struct list_head *list, int *idx, struct perf_event_attr *attr, char *name) { - return __add_event(list, idx, attr, name, NULL); + return __add_event(list, idx, attr, name, NULL) ? 0 : -ENOMEM; } static int parse_aliases(char *str, const char *names[][PERF_EVSEL__MAX_ALIASES], int size) @@ -633,6 +634,9 @@ int parse_events_add_pmu(struct list_head *list, int *idx, { struct perf_event_attr attr; struct perf_pmu *pmu; + struct perf_evsel *evsel; + char *unit; + double scale; pmu = perf_pmu__find(name); if (!pmu) @@ -640,7 +644,7 @@ int parse_events_add_pmu(struct list_head *list, int *idx, memset(&attr, 0, sizeof(attr)); - if (perf_pmu__check_alias(pmu, head_config)) + if (perf_pmu__check_alias(pmu, head_config, &unit, &scale)) return -EINVAL; /* @@ -652,8 +656,14 @@ int parse_events_add_pmu(struct list_head *list, int *idx, if (perf_pmu__config(pmu, &attr, head_config)) return -EINVAL; - return __add_event(list, idx, &attr, pmu_event_name(head_config), - pmu->cpus); + evsel = __add_event(list, idx, &attr, pmu_event_name(head_config), + pmu->cpus); + if (evsel) { + evsel->unit = unit; + evsel->scale = scale; + } + + return evsel ? 0 : -ENOMEM; } int parse_events__modifier_group(struct list_head *list, diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index c232d8d..56fc10a 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -1,19 +1,23 @@ #include #include -#include #include #include #include #include "fs.h" +#include #include "util.h" #include "pmu.h" #include "parse-events.h" #include "cpumap.h" +#define UNIT_MAX_LEN 31 /* max length for event unit name */ + struct perf_pmu_alias { char *name; struct list_head terms; struct list_head list; + char unit[UNIT_MAX_LEN+1]; + double scale; }; struct perf_pmu_format { @@ -94,7 +98,80 @@ static int pmu_format(const char *name, struct list_head *format) return 0; } -static int perf_pmu__new_alias(struct list_head *list, char *name, FILE *file) +static int perf_pmu__parse_scale(struct perf_pmu_alias *alias, char *dir, char *name) +{ + struct stat st; + ssize_t sret; + char scale[128]; + int fd, ret = -1; + char path[PATH_MAX]; + char *lc; + + snprintf(path, PATH_MAX, "%s/%s.scale", dir, name); + + fd = open(path, O_RDONLY); + if (fd == -1) + return -1; + + if (fstat(fd, &st) < 0) + goto error; + + sret = read(fd, scale, sizeof(scale)-1); + if (sret < 0) + goto error; + + scale[sret] = '\0'; + /* + * save current locale + */ + lc = setlocale(LC_NUMERIC, NULL); + + /* + * force to C locale to ensure kernel + * scale string is converted correctly. + * kernel uses default C locale. + */ + setlocale(LC_NUMERIC, "C"); + + alias->scale = strtod(scale, NULL); + + /* restore locale */ + setlocale(LC_NUMERIC, lc); + + ret = 0; +error: + close(fd); + return ret; +} + +static int perf_pmu__parse_unit(struct perf_pmu_alias *alias, char *dir, char *name) +{ + char path[PATH_MAX]; + ssize_t sret; + int fd; + + snprintf(path, PATH_MAX, "%s/%s.unit", dir, name); + + fd = open(path, O_RDONLY); + if (fd == -1) + return -1; + + sret = read(fd, alias->unit, UNIT_MAX_LEN); + if (sret < 0) + goto error; + + close(fd); + + alias->unit[sret] = '\0'; + + return 0; +error: + close(fd); + alias->unit[0] = '\0'; + return -1; +} + +static int perf_pmu__new_alias(struct list_head *list, char *dir, char *name, FILE *file) { struct perf_pmu_alias *alias; char buf[256]; @@ -110,6 +187,9 @@ static int perf_pmu__new_alias(struct list_head *list, char *name, FILE *file) return -ENOMEM; INIT_LIST_HEAD(&alias->terms); + alias->scale = 1.0; + alias->unit[0] = '\0'; + ret = parse_events_terms(&alias->terms, buf); if (ret) { free(alias); @@ -117,7 +197,14 @@ static int perf_pmu__new_alias(struct list_head *list, char *name, FILE *file) } alias->name = strdup(name); + /* + * load unit name and scale if available + */ + perf_pmu__parse_unit(alias, dir, name); + perf_pmu__parse_scale(alias, dir, name); + list_add_tail(&alias->list, list); + return 0; } @@ -129,6 +216,7 @@ static int pmu_aliases_parse(char *dir, struct list_head *head) { struct dirent *evt_ent; DIR *event_dir; + size_t len; int ret = 0; event_dir = opendir(dir); @@ -143,13 +231,24 @@ static int pmu_aliases_parse(char *dir, struct list_head *head) if (!strcmp(name, ".") || !strcmp(name, "..")) continue; + /* + * skip .unit and .scale info files + * parsed in perf_pmu__new_alias() + */ + len = strlen(name); + if (len > 5 && !strcmp(name + len - 5, ".unit")) + continue; + if (len > 6 && !strcmp(name + len - 6, ".scale")) + continue; + snprintf(path, PATH_MAX, "%s/%s", dir, name); ret = -EINVAL; file = fopen(path, "r"); if (!file) break; - ret = perf_pmu__new_alias(head, name, file); + + ret = perf_pmu__new_alias(head, dir, name, file); fclose(file); } @@ -508,16 +607,42 @@ static struct perf_pmu_alias *pmu_find_alias(struct perf_pmu *pmu, return NULL; } + +static int check_unit_scale(struct perf_pmu_alias *alias, + char **unit, double *scale) +{ + /* + * Only one term in event definition can + * define unit and scale, fail if there's + * more than one. + */ + if ((*unit && alias->unit) || + (*scale && alias->scale)) + return -EINVAL; + + if (alias->unit) + *unit = alias->unit; + + if (alias->scale) + *scale = alias->scale; + + return 0; +} + /* * Find alias in the terms list and replace it with the terms * defined for the alias */ -int perf_pmu__check_alias(struct perf_pmu *pmu, struct list_head *head_terms) +int perf_pmu__check_alias(struct perf_pmu *pmu, struct list_head *head_terms, + char **unit, double *scale) { struct parse_events_term *term, *h; struct perf_pmu_alias *alias; int ret; + *unit = NULL; + *scale = 0; + list_for_each_entry_safe(term, h, head_terms, list) { alias = pmu_find_alias(pmu, term); if (!alias) @@ -525,6 +650,11 @@ int perf_pmu__check_alias(struct perf_pmu *pmu, struct list_head *head_terms) ret = pmu_alias_terms(alias, &term->list); if (ret) return ret; + + ret = check_unit_scale(alias, unit, scale); + if (ret) + return ret; + list_del(&term->list); free(term); } diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h index 1179b26..9183380 100644 --- a/tools/perf/util/pmu.h +++ b/tools/perf/util/pmu.h @@ -28,7 +28,8 @@ int perf_pmu__config(struct perf_pmu *pmu, struct perf_event_attr *attr, int perf_pmu__config_terms(struct list_head *formats, struct perf_event_attr *attr, struct list_head *head_terms); -int perf_pmu__check_alias(struct perf_pmu *pmu, struct list_head *head_terms); +int perf_pmu__check_alias(struct perf_pmu *pmu, struct list_head *head_terms, + char **unit, double *scale); struct list_head *perf_pmu__alias(struct perf_pmu *pmu, struct list_head *head_terms); int perf_pmu_wrap(void); -- 2.7.4