perf stat: Merge event counts from all hybrid PMUs
authorZhengjun Xing <zhengjun.xing@linux.intel.com>
Fri, 22 Apr 2022 06:56:34 +0000 (14:56 +0800)
committerArnaldo Carvalho de Melo <acme@redhat.com>
Fri, 22 Apr 2022 17:23:35 +0000 (14:23 -0300)
For hybrid events, by default stat aggregates and reports the event counts
per pmu.

  # ./perf stat -e cycles -a  sleep 1

   Performance counter stats for 'system wide':

      14,066,877,268      cpu_core/cycles/
       6,814,443,147      cpu_atom/cycles/

         1.002760625 seconds time elapsed

Sometimes, it's also useful to aggregate event counts from all PMUs.
Create a new option '--hybrid-merge' to enable that behavior and report
the counts without PMUs.

  # ./perf stat -e cycles -a --hybrid-merge  sleep 1

   Performance counter stats for 'system wide':

      20,732,982,512      cycles

         1.002776793 seconds time elapsed

Reviewed-by: Kan Liang <kan.liang@linux.intel.com>
Signed-off-by: Xing Zhengjun <zhengjun.xing@linux.intel.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexander Shishkin <alexander.shishkin@intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: https://lore.kernel.org/r/20220422065635.767648-2-zhengjun.xing@linux.intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
tools/perf/Documentation/perf-stat.txt
tools/perf/builtin-stat.c
tools/perf/util/stat-display.c
tools/perf/util/stat.h

index c06c341..8d1cde0 100644 (file)
@@ -454,6 +454,16 @@ Multiple events are created from a single event specification when:
 2. Aliases, which are listed immediately after the Kernel PMU events
    by perf list, are used.
 
+--hybrid-merge::
+Merge the hybrid event counts from all PMUs.
+
+For hybrid events, by default, the stat aggregates and reports the event
+counts per PMU. But sometimes, it's also useful to aggregate event counts
+from all PMUs. This option enables that behavior and reports the counts
+without PMUs.
+
+For non-hybrid events, it should be no effect.
+
 --smi-cost::
 Measure SMI cost if msr/aperf/ and msr/smi/ events are supported.
 
index dea34c8..5958bfd 100644 (file)
@@ -1258,6 +1258,8 @@ static struct option stat_options[] = {
        OPT_SET_UINT('A', "no-aggr", &stat_config.aggr_mode,
                    "disable CPU count aggregation", AGGR_NONE),
        OPT_BOOLEAN(0, "no-merge", &stat_config.no_merge, "Do not merge identical named events"),
+       OPT_BOOLEAN(0, "hybrid-merge", &stat_config.hybrid_merge,
+                   "Merge identical named hybrid events"),
        OPT_STRING('x', "field-separator", &stat_config.csv_sep, "separator",
                   "print counts with custom separator"),
        OPT_CALLBACK('G', "cgroup", &evsel_list, "name",
index 46b3dd1..d9629a8 100644 (file)
@@ -612,6 +612,19 @@ static bool hybrid_uniquify(struct evsel *evsel)
        return perf_pmu__has_hybrid() && !is_uncore(evsel);
 }
 
+static bool hybrid_merge(struct evsel *counter, struct perf_stat_config *config,
+                        bool check)
+{
+       if (hybrid_uniquify(counter)) {
+               if (check)
+                       return config && config->hybrid_merge;
+               else
+                       return config && !config->hybrid_merge;
+       }
+
+       return false;
+}
+
 static bool collect_data(struct perf_stat_config *config, struct evsel *counter,
                            void (*cb)(struct perf_stat_config *config, struct evsel *counter, void *data,
                                       bool first),
@@ -620,9 +633,9 @@ static bool collect_data(struct perf_stat_config *config, struct evsel *counter,
        if (counter->merged_stat)
                return false;
        cb(config, counter, data, true);
-       if (config->no_merge || hybrid_uniquify(counter))
+       if (config->no_merge || hybrid_merge(counter, config, false))
                uniquify_event_name(counter, config);
-       else if (counter->auto_merge_stats)
+       else if (counter->auto_merge_stats || hybrid_merge(counter, config, true))
                collect_all_aliases(config, counter, cb, data);
        return true;
 }
index e31c94d..b5aeb8e 100644 (file)
@@ -127,6 +127,7 @@ struct perf_stat_config {
        bool                     ru_display;
        bool                     big_num;
        bool                     no_merge;
+       bool                     hybrid_merge;
        bool                     walltime_run_table;
        bool                     all_kernel;
        bool                     all_user;