perf offcpu: Track child processes
authorNamhyung Kim <namhyung@kernel.org>
Thu, 11 Aug 2022 18:54:55 +0000 (11:54 -0700)
committerArnaldo Carvalho de Melo <acme@redhat.com>
Thu, 11 Aug 2022 20:57:34 +0000 (17:57 -0300)
When -p option used or a workload is given, it needs to handle child
processes.  The perf_event can inherit those task events
automatically.  We can add a new BPF program in task_newtask
tracepoint to track child processes.

Before:
  $ sudo perf record --off-cpu -- perf bench sched messaging
  $ sudo perf report --stat | grep -A1 offcpu
  offcpu-time stats:
            SAMPLE events:        1

After:
  $ sudo perf record -a --off-cpu -- perf bench sched messaging
  $ sudo perf report --stat | grep -A1 offcpu
  offcpu-time stats:
            SAMPLE events:      856

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Cc: Blake Jones <blakejones@google.com>
Cc: Hao Luo <haoluo@google.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Milian Wolff <milian.wolff@kdab.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Song Liu <songliubraving@fb.com>
Cc: bpf@vger.kernel.org
Link: https://lore.kernel.org/r/20220811185456.194721-4-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
tools/perf/util/bpf_off_cpu.c
tools/perf/util/bpf_skel/off_cpu.bpf.c

index f7ee0c7a53f06149640e1d5d952f6819d99d5532..c257813e674ef02fd9691f9a708b3c3217ba2022 100644 (file)
@@ -17,6 +17,7 @@
 #include "bpf_skel/off_cpu.skel.h"
 
 #define MAX_STACKS  32
+#define MAX_PROC  4096
 /* we don't need actual timestamp, just want to put the samples at last */
 #define OFF_CPU_TIMESTAMP  (~0ull << 32)
 
@@ -164,10 +165,16 @@ int off_cpu_prepare(struct evlist *evlist, struct target *target,
 
                        ntasks++;
                }
+
+               if (ntasks < MAX_PROC)
+                       ntasks = MAX_PROC;
+
                bpf_map__set_max_entries(skel->maps.task_filter, ntasks);
        } else if (target__has_task(target)) {
                ntasks = perf_thread_map__nr(evlist->core.threads);
                bpf_map__set_max_entries(skel->maps.task_filter, ntasks);
+       } else if (target__none(target)) {
+               bpf_map__set_max_entries(skel->maps.task_filter, MAX_PROC);
        }
 
        if (evlist__first(evlist)->cgrp) {
index 143a8b7acf87a5b3eb6868ecd40b1e4746c2ce91..c4ba2bcf179f44c1069825a54e7c01b3c6ffcb9c 100644 (file)
@@ -12,6 +12,9 @@
 #define TASK_INTERRUPTIBLE     0x0001
 #define TASK_UNINTERRUPTIBLE   0x0002
 
+/* create a new thread */
+#define CLONE_THREAD  0x10000
+
 #define MAX_STACKS   32
 #define MAX_ENTRIES  102400
 
@@ -220,6 +223,33 @@ next:
        return 0;
 }
 
+SEC("tp_btf/task_newtask")
+int on_newtask(u64 *ctx)
+{
+       struct task_struct *task;
+       u64 clone_flags;
+       u32 pid;
+       u8 val = 1;
+
+       if (!uses_tgid)
+               return 0;
+
+       task = (struct task_struct *)bpf_get_current_task();
+
+       pid = BPF_CORE_READ(task, tgid);
+       if (!bpf_map_lookup_elem(&task_filter, &pid))
+               return 0;
+
+       task = (struct task_struct *)ctx[0];
+       clone_flags = ctx[1];
+
+       pid = task->tgid;
+       if (!(clone_flags & CLONE_THREAD))
+               bpf_map_update_elem(&task_filter, &pid, &val, BPF_NOEXIST);
+
+       return 0;
+}
+
 SEC("tp_btf/sched_switch")
 int on_switch(u64 *ctx)
 {