perf stat: Fix counting when initial delay configured
authorChangbin Du <changbin.du@huawei.com>
Thu, 2 Mar 2023 03:11:44 +0000 (11:11 +0800)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Fri, 17 Mar 2023 07:50:24 +0000 (08:50 +0100)
[ Upstream commit 25f69c69bc3ca8c781a94473f28d443d745768e3 ]

When creating counters with initial delay configured, the enable_on_exec
field is not set. So we need to enable the counters later. The problem
is, when a workload is specified the target__none() is true. So we also
need to check stat_config.initial_delay.

In this change, we add a new field 'initial_delay' for struct target
which could be shared by other subcommands. And define
target__enable_on_exec() which returns whether enable_on_exec should be
set on normal cases.

Before this fix the event is not counted:

  $ ./perf stat -e instructions -D 100 sleep 2
  Events disabled
  Events enabled

   Performance counter stats for 'sleep 2':

       <not counted>      instructions

         1.901661124 seconds time elapsed

         0.001602000 seconds user
         0.000000000 seconds sys

After fix it works:

  $ ./perf stat -e instructions -D 100 sleep 2
  Events disabled
  Events enabled

   Performance counter stats for 'sleep 2':

             404,214      instructions

         1.901743475 seconds time elapsed

         0.001617000 seconds user
         0.000000000 seconds sys

Fixes: c587e77e100fa40e ("perf stat: Do not delay the workload with --delay")
Signed-off-by: Changbin Du <changbin.du@huawei.com>
Acked-by: Namhyung Kim <namhyung@kernel.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Hui Wang <hw.huiwang@huawei.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: https://lore.kernel.org/r/20230302031146.2801588-2-changbin.du@huawei.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: Sasha Levin <sashal@kernel.org>
tools/perf/builtin-stat.c
tools/perf/util/stat.c
tools/perf/util/stat.h
tools/perf/util/target.h

index 978fdc6..f6427e3 100644 (file)
@@ -528,12 +528,7 @@ static int enable_counters(void)
                        return err;
        }
 
-       /*
-        * We need to enable counters only if:
-        * - we don't have tracee (attaching to task or cpu)
-        * - we have initial delay configured
-        */
-       if (!target__none(&target)) {
+       if (!target__enable_on_exec(&target)) {
                if (!all_counters_use_bpf)
                        evlist__enable(evsel_list);
        }
@@ -906,7 +901,7 @@ try_again_reset:
                        return err;
        }
 
-       if (stat_config.initial_delay) {
+       if (target.initial_delay) {
                pr_info(EVLIST_DISABLED_MSG);
        } else {
                err = enable_counters();
@@ -918,8 +913,8 @@ try_again_reset:
        if (forks)
                evlist__start_workload(evsel_list);
 
-       if (stat_config.initial_delay > 0) {
-               usleep(stat_config.initial_delay * USEC_PER_MSEC);
+       if (target.initial_delay > 0) {
+               usleep(target.initial_delay * USEC_PER_MSEC);
                err = enable_counters();
                if (err)
                        return -1;
@@ -1243,7 +1238,7 @@ static struct option stat_options[] = {
                     "aggregate counts per thread", AGGR_THREAD),
        OPT_SET_UINT(0, "per-node", &stat_config.aggr_mode,
                     "aggregate counts per numa node", AGGR_NODE),
-       OPT_INTEGER('D', "delay", &stat_config.initial_delay,
+       OPT_INTEGER('D', "delay", &target.initial_delay,
                    "ms to wait before starting measurement after program start (-1: start with events disabled)"),
        OPT_CALLBACK_NOOPT(0, "metric-only", &stat_config.metric_only, NULL,
                        "Only print computed metrics. No raw values", enable_metric_only),
index 8ec8bb4..b63b3a3 100644 (file)
@@ -583,11 +583,7 @@ int create_perf_stat_counter(struct evsel *evsel,
        if (evsel__is_group_leader(evsel)) {
                attr->disabled = 1;
 
-               /*
-                * In case of initial_delay we enable tracee
-                * events manually.
-                */
-               if (target__none(target) && !config->initial_delay)
+               if (target__enable_on_exec(target))
                        attr->enable_on_exec = 1;
        }
 
index 35c940d..05c5125 100644 (file)
@@ -145,7 +145,6 @@ struct perf_stat_config {
        FILE                    *output;
        unsigned int             interval;
        unsigned int             timeout;
-       int                      initial_delay;
        unsigned int             unit_width;
        unsigned int             metric_only_len;
        int                      times;
index daec6cb..880f1af 100644 (file)
@@ -18,6 +18,7 @@ struct target {
        bool         per_thread;
        bool         use_bpf;
        bool         hybrid;
+       int          initial_delay;
        const char   *attr_map;
 };
 
@@ -72,6 +73,17 @@ static inline bool target__none(struct target *target)
        return !target__has_task(target) && !target__has_cpu(target);
 }
 
+static inline bool target__enable_on_exec(struct target *target)
+{
+       /*
+        * Normally enable_on_exec should be set if:
+        *  1) The tracee process is forked (not attaching to existed task or cpu).
+        *  2) And initial_delay is not configured.
+        * Otherwise, we enable tracee events manually.
+        */
+       return target__none(target) && !target->initial_delay;
+}
+
 static inline bool target__has_per_thread(struct target *target)
 {
        return target->system_wide && target->per_thread;