perf stat: Fix wrong per-thread runtime stat for interval mode
authorJin Yao <yao.jin@linux.intel.com>
Wed, 20 May 2020 04:27:33 +0000 (12:27 +0800)
committerArnaldo Carvalho de Melo <acme@redhat.com>
Thu, 28 May 2020 13:03:27 +0000 (10:03 -0300)
  root@kbl-ppc:~# perf stat --per-thread -e cycles,instructions -I1000 --interval-count 2
       1.004171683             perf-3696              8,747,311      cycles
          ...
       1.004171683             perf-3696                691,730      instructions              #    0.08  insn per cycle
          ...
       2.006490373             perf-3696              1,749,936      cycles
          ...
       2.006490373             perf-3696              1,484,582      instructions              #    0.28  insn per cycle
          ...

Let's see interval 2.006490373

  perf-3696              1,749,936      cycles
  perf-3696              1,484,582      instructions              #    0.28  insn per cycle

insn per cycle = 1,484,582 / 1,749,936 = 0.85.

But now it's 0.28, that's not correct.

stat_config.stats[] records the per-thread runtime stat. But for
interval mode, it should be reset for each interval.

So now, with this patch,

  root@kbl-ppc:~# perf stat --per-thread -e cycles,instructions -I1000 --interval-count 2
       1.005818121             perf-8633              9,898,045      cycles
          ...
       1.005818121             perf-8633                693,298      instructions              #    0.07  insn per cycle
          ...
       2.007863743             perf-8633              1,551,619      cycles
          ...
       2.007863743             perf-8633              1,317,514      instructions              #    0.85  insn per cycle
          ...

Let's check interval 2.007863743.

insn per cycle = 1,317,514 / 1,551,619 = 0.85. It's correct.

This patch creates runtime_stat_reset, places it next to
untime_stat_new/runtime_stat_delete and moves all runtime_stat
functions before process_interval.

Committer testing:

After the patch:

  # perf stat --per-thread -e cycles,instructions -I1000 --interval-count 2  |& grep sssd_nss-1130
     2.011309774  sssd_nss-1130   56,585  cycles
     2.011309774  sssd_nss-1130   13,121  instructions  # 0.23 insn per cycle
  # python
  >>> 13121.0 / 56585
  0.23188124061146947
  >>>

Fixes: commit 14e72a21c783 ("perf stat: Update or print per-thread stats")
Signed-off-by: Jin Yao <yao.jin@linux.intel.com>
Reviewed-by: Jiri Olsa <jolsa@redhat.com>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Jin Yao <yao.jin@intel.com>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lore.kernel.org/lkml/20200520042737.24160-2-yao.jin@linux.intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
tools/perf/builtin-stat.c

index 4deb2d4..b03f06b 100644 (file)
@@ -351,6 +351,46 @@ static void read_counters(struct timespec *rs)
        }
 }
 
+static int runtime_stat_new(struct perf_stat_config *config, int nthreads)
+{
+       int i;
+
+       config->stats = calloc(nthreads, sizeof(struct runtime_stat));
+       if (!config->stats)
+               return -1;
+
+       config->stats_num = nthreads;
+
+       for (i = 0; i < nthreads; i++)
+               runtime_stat__init(&config->stats[i]);
+
+       return 0;
+}
+
+static void runtime_stat_delete(struct perf_stat_config *config)
+{
+       int i;
+
+       if (!config->stats)
+               return;
+
+       for (i = 0; i < config->stats_num; i++)
+               runtime_stat__exit(&config->stats[i]);
+
+       zfree(&config->stats);
+}
+
+static void runtime_stat_reset(struct perf_stat_config *config)
+{
+       int i;
+
+       if (!config->stats)
+               return;
+
+       for (i = 0; i < config->stats_num; i++)
+               perf_stat__reset_shadow_per_stat(&config->stats[i]);
+}
+
 static void process_interval(void)
 {
        struct timespec ts, rs;
@@ -359,6 +399,7 @@ static void process_interval(void)
        diff_timespec(&rs, &ts, &ref_time);
 
        perf_stat__reset_shadow_per_stat(&rt_stat);
+       runtime_stat_reset(&stat_config);
        read_counters(&rs);
 
        if (STAT_RECORD) {
@@ -1737,35 +1778,6 @@ int process_cpu_map_event(struct perf_session *session,
        return set_maps(st);
 }
 
-static int runtime_stat_new(struct perf_stat_config *config, int nthreads)
-{
-       int i;
-
-       config->stats = calloc(nthreads, sizeof(struct runtime_stat));
-       if (!config->stats)
-               return -1;
-
-       config->stats_num = nthreads;
-
-       for (i = 0; i < nthreads; i++)
-               runtime_stat__init(&config->stats[i]);
-
-       return 0;
-}
-
-static void runtime_stat_delete(struct perf_stat_config *config)
-{
-       int i;
-
-       if (!config->stats)
-               return;
-
-       for (i = 0; i < config->stats_num; i++)
-               runtime_stat__exit(&config->stats[i]);
-
-       zfree(&config->stats);
-}
-
 static const char * const stat_report_usage[] = {
        "perf stat report [<options>]",
        NULL,