perf lock contention: Show per-cpu rq_lock with address
authorNamhyung Kim <namhyung@kernel.org>
Mon, 13 Mar 2023 20:48:24 +0000 (13:48 -0700)
committerArnaldo Carvalho de Melo <acme@redhat.com>
Tue, 14 Mar 2023 11:33:34 +0000 (08:33 -0300)
Using the BPF_PROG_RUN mechanism, we can run a raw_tp BPF program to
collect some semi-global locks like per-cpu locks.  Let's add runqueue
locks using bpf_per_cpu_ptr() helper.

  $ sudo ./perf lock con -abl -- sleep 1
   contended   total wait     max wait     avg wait            address   symbol

         248      3.25 ms     32.23 us     13.10 us   ffff8cc75cfd2940   siglock
          60    217.91 us      9.69 us      3.63 us   ffff8cc700061c00
           8     70.23 us     13.86 us      8.78 us   ffff8cc703629484
           4     56.32 us     35.81 us     14.08 us   ffff8cc78b66f778   mmap_lock
           4     16.70 us      5.18 us      4.18 us   ffff8cc7036a0684
           3      4.99 us      2.65 us      1.66 us   ffff8d053da30c80   rq_lock
           2      3.44 us      2.28 us      1.72 us   ffff8d053dcf0c80   rq_lock
           9      2.51 us       371 ns       278 ns   ffff8ccb92479440
           2      2.11 us      1.24 us      1.06 us   ffff8d053db30c80   rq_lock
           2      2.06 us      1.69 us      1.03 us   ffff8d053d970c80   rq_lock

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Cc: Boqun Feng <boqun.feng@gmail.com>
Cc: Hao Luo <haoluo@google.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Juri Lelli <juri.lelli@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Song Liu <song@kernel.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Waiman Long <longman@redhat.com>
Cc: Will Deacon <will@kernel.org>
Cc: bpf@vger.kernel.org
Link: https://lore.kernel.org/r/20230313204825.2665483-2-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
tools/perf/util/bpf_lock_contention.c
tools/perf/util/bpf_skel/lock_contention.bpf.c
tools/perf/util/bpf_skel/lock_data.h

index 51631af..235fc71 100644 (file)
@@ -151,6 +151,8 @@ int lock_contention_prepare(struct lock_contention *con)
        skel->bss->needs_callstack = con->save_callstack;
        skel->bss->lock_owner = con->owner;
 
+       bpf_program__set_autoload(skel->progs.collect_lock_syms, false);
+
        lock_contention_bpf__attach(skel);
        return 0;
 }
@@ -198,14 +200,26 @@ static const char *lock_contention_get_name(struct lock_contention *con,
        }
 
        if (con->aggr_mode == LOCK_AGGR_ADDR) {
+               int lock_fd = bpf_map__fd(skel->maps.lock_syms);
+
+               /* per-process locks set upper bits of the flags */
                if (flags & LCD_F_MMAP_LOCK)
                        return "mmap_lock";
                if (flags & LCD_F_SIGHAND_LOCK)
                        return "siglock";
+
+               /* global locks with symbols */
                sym = machine__find_kernel_symbol(machine, key->lock_addr, &kmap);
                if (sym)
-                       name = sym->name;
-               return name;
+                       return sym->name;
+
+               /* try semi-global locks collected separately */
+               if (!bpf_map_lookup_elem(lock_fd, &key->lock_addr, &flags)) {
+                       if (flags == LOCK_CLASS_RQLOCK)
+                               return "rq_lock";
+               }
+
+               return "";
        }
 
        /* LOCK_AGGR_CALLER: skip lock internal functions */
@@ -258,6 +272,15 @@ int lock_contention_read(struct lock_contention *con)
                thread__set_comm(idle, "swapper", /*timestamp=*/0);
        }
 
+       if (con->aggr_mode == LOCK_AGGR_ADDR) {
+               DECLARE_LIBBPF_OPTS(bpf_test_run_opts, opts,
+                       .flags = BPF_F_TEST_RUN_ON_CPU,
+               );
+               int prog_fd = bpf_program__fd(skel->progs.collect_lock_syms);
+
+               bpf_prog_test_run_opts(prog_fd, &opts);
+       }
+
        /* make sure it loads the kernel map */
        map__load(maps__first(machine->kmaps));
 
index f76cde0..ed91609 100644 (file)
@@ -10,6 +10,9 @@
 /* default buffer size */
 #define MAX_ENTRIES  10240
 
+/* for collect_lock_syms().  4096 was rejected by the verifier */
+#define MAX_CPUS  1024
+
 /* lock contention flags from include/trace/events/lock.h */
 #define LCB_F_SPIN     (1U << 0)
 #define LCB_F_READ     (1U << 1)
@@ -58,6 +61,13 @@ struct {
 
 struct {
        __uint(type, BPF_MAP_TYPE_HASH);
+       __uint(key_size, sizeof(__u64));
+       __uint(value_size, sizeof(__u32));
+       __uint(max_entries, 16384);
+} lock_syms SEC(".maps");
+
+struct {
+       __uint(type, BPF_MAP_TYPE_HASH);
        __uint(key_size, sizeof(__u32));
        __uint(value_size, sizeof(__u8));
        __uint(max_entries, 1);
@@ -384,4 +394,25 @@ int contention_end(u64 *ctx)
        return 0;
 }
 
+extern struct rq runqueues __ksym;
+
+SEC("raw_tp/bpf_test_finish")
+int BPF_PROG(collect_lock_syms)
+{
+       __u64 lock_addr;
+       __u32 lock_flag;
+
+       for (int i = 0; i < MAX_CPUS; i++) {
+               struct rq *rq = bpf_per_cpu_ptr(&runqueues, i);
+
+               if (rq == NULL)
+                       break;
+
+               lock_addr = (__u64)&rq->__lock;
+               lock_flag = LOCK_CLASS_RQLOCK;
+               bpf_map_update_elem(&lock_syms, &lock_addr, &lock_flag, BPF_ANY);
+       }
+       return 0;
+}
+
 char LICENSE[] SEC("license") = "Dual BSD/GPL";
index 5ed1a09..e59366f 100644 (file)
@@ -36,4 +36,9 @@ enum lock_aggr_mode {
        LOCK_AGGR_CALLER,
 };
 
+enum lock_class_sym {
+       LOCK_CLASS_NONE,
+       LOCK_CLASS_RQLOCK,
+};
+
 #endif /* UTIL_BPF_SKEL_LOCK_DATA_H */