perf record: Add cgroup support for off-cpu profiling

author Namhyung Kim <namhyung@kernel.org>

Wed, 18 May 2022 22:47:24 +0000 (15:47 -0700)

committer Arnaldo Carvalho de Melo <acme@redhat.com>

Thu, 26 May 2022 15:36:58 +0000 (12:36 -0300)
author Namhyung Kim <namhyung@kernel.org>
Wed, 18 May 2022 22:47:24 +0000 (15:47 -0700)
committer Arnaldo Carvalho de Melo <acme@redhat.com>
Thu, 26 May 2022 15:36:58 +0000 (12:36 -0300)
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c

index 9601438..9a71f03 100644 (file)
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -892,7 +892,7 @@ static int record__config_text_poke(struct evlist *evlist)
  
  static int record__config_off_cpu(struct record *rec)
  {
-       return off_cpu_prepare(rec->evlist, &rec->opts.target);
+       return off_cpu_prepare(rec->evlist, &rec->opts.target, &rec->opts);
  }
  
  static bool record__kcore_readable(struct machine *machine)
diff --git a/tools/perf/util/bpf_off_cpu.c b/tools/perf/util/bpf_off_cpu.c

index 874856c..b73e84a 100644 (file)
--- a/tools/perf/util/bpf_off_cpu.c
+++ b/tools/perf/util/bpf_off_cpu.c
@@ -5,10 +5,12 @@
  #include "util/evlist.h"
  #include "util/off_cpu.h"
  #include "util/perf-hooks.h"
+#include "util/record.h"
  #include "util/session.h"
  #include "util/target.h"
  #include "util/cpumap.h"
  #include "util/thread_map.h"
+#include "util/cgroup.h"
  #include <bpf/bpf.h>
  
  #include "bpf_skel/off_cpu.skel.h"
@@ -24,6 +26,7 @@ struct off_cpu_key {
         u32 tgid;
         u32 stack_id;
         u32 state;
+       u64 cgroup_id;
  };
  
  union off_cpu_data {
@@ -116,10 +119,11 @@ static void check_sched_switch_args(void)
         }
  }
  
-int off_cpu_prepare(struct evlist *evlist, struct target *target)
+int off_cpu_prepare(struct evlist *evlist, struct target *target,
+                   struct record_opts *opts)
  {
         int err, fd, i;
-       int ncpus = 1, ntasks = 1;
+       int ncpus = 1, ntasks = 1, ncgrps = 1;
  
         if (off_cpu_config(evlist) < 0) {
                 pr_err("Failed to config off-cpu BPF event\n");
@@ -143,6 +147,21 @@ int off_cpu_prepare(struct evlist *evlist, struct target *target)
                 bpf_map__set_max_entries(skel->maps.task_filter, ntasks);
         }
  
+       if (evlist__first(evlist)->cgrp) {
+               ncgrps = evlist->core.nr_entries - 1; /* excluding a dummy */
+               bpf_map__set_max_entries(skel->maps.cgroup_filter, ncgrps);
+
+               if (!cgroup_is_v2("perf_event"))
+                       skel->rodata->uses_cgroup_v1 = true;
+       }
+
+       if (opts->record_cgroup) {
+               skel->rodata->needs_cgroup = true;
+
+               if (!cgroup_is_v2("perf_event"))
+                       skel->rodata->uses_cgroup_v1 = true;
+       }
+
         set_max_rlimit();
         check_sched_switch_args();
  
@@ -178,6 +197,29 @@ int off_cpu_prepare(struct evlist *evlist, struct target *target)
                 }
         }
  
+       if (evlist__first(evlist)->cgrp) {
+               struct evsel *evsel;
+               u8 val = 1;
+
+               skel->bss->has_cgroup = 1;
+               fd = bpf_map__fd(skel->maps.cgroup_filter);
+
+               evlist__for_each_entry(evlist, evsel) {
+                       struct cgroup *cgrp = evsel->cgrp;
+
+                       if (cgrp == NULL)
+                               continue;
+
+                       if (!cgrp->id && read_cgroup_id(cgrp) < 0) {
+                               pr_err("Failed to read cgroup id of %s\n",
+                                      cgrp->name);
+                               goto out;
+                       }
+
+                       bpf_map_update_elem(fd, &cgrp->id, &val, BPF_ANY);
+               }
+       }
+
         err = off_cpu_bpf__attach(skel);
         if (err) {
                 pr_err("Failed to attach off-cpu BPF skeleton\n");
@@ -275,6 +317,8 @@ int off_cpu_write(struct perf_session *session)
                         /* calculate sample callchain data array length */
                         n += len + 2;
                 }
+               if (sample_type & PERF_SAMPLE_CGROUP)
+                       data.array[n++] = key.cgroup_id;
                 /* TODO: handle more sample types */
  
                 size = n * sizeof(u64);
diff --git a/tools/perf/util/bpf_skel/off_cpu.bpf.c b/tools/perf/util/bpf_skel/off_cpu.bpf.c

index 986d7db..792ae28 100644 (file)
--- a/tools/perf/util/bpf_skel/off_cpu.bpf.c
+++ b/tools/perf/util/bpf_skel/off_cpu.bpf.c
@@ -26,6 +26,7 @@ struct offcpu_key {
         __u32 tgid;
         __u32 stack_id;
         __u32 state;
+       __u64 cgroup_id;
  };
  
  struct {
@@ -63,6 +64,13 @@ struct {
         __uint(max_entries, 1);
  } task_filter SEC(".maps");
  
+struct {
+       __uint(type, BPF_MAP_TYPE_HASH);
+       __uint(key_size, sizeof(__u64));
+       __uint(value_size, sizeof(__u8));
+       __uint(max_entries, 1);
+} cgroup_filter SEC(".maps");
+
  /* old kernel task_struct definition */
  struct task_struct___old {
         long state;
@@ -71,8 +79,11 @@ struct task_struct___old {
  int enabled = 0;
  int has_cpu = 0;
  int has_task = 0;
+int has_cgroup = 0;
  
  const volatile bool has_prev_state = false;
+const volatile bool needs_cgroup = false;
+const volatile bool uses_cgroup_v1 = false;
  
  /*
   * Old kernel used to call it task_struct->state and now it's '__state'.
@@ -92,6 +103,18 @@ static inline int get_task_state(struct task_struct *t)
         return BPF_CORE_READ(t_old, state);
  }
  
+static inline __u64 get_cgroup_id(struct task_struct *t)
+{
+       struct cgroup *cgrp;
+
+       if (uses_cgroup_v1)
+               cgrp = BPF_CORE_READ(t, cgroups, subsys[perf_event_cgrp_id], cgroup);
+       else
+               cgrp = BPF_CORE_READ(t, cgroups, dfl_cgrp);
+
+       return BPF_CORE_READ(cgrp, kn, id);
+}
+
  static inline int can_record(struct task_struct *t, int state)
  {
         /* kernel threads don't have user stack */
@@ -120,6 +143,15 @@ static inline int can_record(struct task_struct *t, int state)
                         return 0;
         }
  
+       if (has_cgroup) {
+               __u8 *ok;
+               __u64 cgrp_id = get_cgroup_id(t);
+
+               ok = bpf_map_lookup_elem(&cgroup_filter, &cgrp_id);
+               if (!ok)
+                       return 0;
+       }
+
         return 1;
  }
  
@@ -156,6 +188,7 @@ next:
                         .tgid = next->tgid,
                         .stack_id = pelem->stack_id,
                         .state = pelem->state,
+                       .cgroup_id = needs_cgroup ? get_cgroup_id(next) : 0,
                 };
                 __u64 delta = ts - pelem->timestamp;
                 __u64 *total;
diff --git a/tools/perf/util/off_cpu.h b/tools/perf/util/off_cpu.h

index f47af02..548008f 100644 (file)
--- a/tools/perf/util/off_cpu.h
+++ b/tools/perf/util/off_cpu.h
@@ -4,15 +4,18 @@
  struct evlist;
  struct target;
  struct perf_session;
+struct record_opts;
  
  #define OFFCPU_EVENT  "offcpu-time"
  
  #ifdef HAVE_BPF_SKEL
-int off_cpu_prepare(struct evlist *evlist, struct target *target);
+int off_cpu_prepare(struct evlist *evlist, struct target *target,
+                   struct record_opts *opts);
  int off_cpu_write(struct perf_session *session);
  #else
  static inline int off_cpu_prepare(struct evlist *evlist __maybe_unused,
-                                 struct target *target __maybe_unused)
+                                 struct target *target __maybe_unused,
+                                 struct record_opts *opts __maybe_unused)
  {
         return -1;
  }
author	Namhyung Kim <namhyung@kernel.org>
	Wed, 18 May 2022 22:47:24 +0000 (15:47 -0700)
committer	Arnaldo Carvalho de Melo <acme@redhat.com>
	Thu, 26 May 2022 15:36:58 +0000 (12:36 -0300)
tools/perf/builtin-record.c		patch \| blob \| history
tools/perf/util/bpf_off_cpu.c		patch \| blob \| history
tools/perf/util/bpf_skel/off_cpu.bpf.c		patch \| blob \| history
tools/perf/util/off_cpu.h		patch \| blob \| history