perf bpf skels: Stop using vmlinux.h generated from BTF, use subset of used structs...
authorArnaldo Carvalho de Melo <acme@redhat.com>
Thu, 4 May 2023 22:03:51 +0000 (19:03 -0300)
committerArnaldo Carvalho de Melo <acme@redhat.com>
Fri, 5 May 2023 22:18:39 +0000 (19:18 -0300)
Linus reported a build break due to using a vmlinux without a BTF elf
section to generate the vmlinux.h header with bpftool for use in the BPF
tools in tools/perf/util/bpf_skel/*.bpf.c.

Instead add a vmlinux.h file with the structs needed with the fields the
tools need, marking the structs with __attribute__((preserve_access_index)),
so that libbpf's CO-RE code can fixup the struct field offsets.

In some cases the vmlinux.h file that was being generated by bpftool
from the kernel BTF information was not needed at all, just including
linux/bpf.h, sometimes linux/perf_event.h was enough as non-UAPI
types were not being used.

To keep te patch small, include those UAPI headers from the trimmed down
vmlinux.h file, that then provides the tools with just the structs and
the subset of its fields needed for them.

Testing it:

  # perf lock contention -b find / > /dev/null
  ^C contended   total wait     max wait     avg wait         type   caller

           7     53.59 us     10.86 us      7.66 us     rwlock:R   start_this_handle+0xa0
           2     30.35 us     21.99 us     15.17 us  rwsem:R   iterate_dir+0x52
           1   9.04 us      9.04 us      9.04 us     rwlock:W   start_this_handle+0x291
           1   8.73 us      8.73 us      8.73 us     spinlock   raw_spin_rq_lock_nested+0x1e
  #
  # perf lock contention -abl find / > /dev/null
  ^C contended   total wait     max wait     avg wait            address   symbol

           1    262.96 ms    262.96 ms    262.96 ms   ffff8e67502d0170    (mutex)
          12    244.24 us     39.91 us     20.35 us   ffff8e6af56f8070   mmap_lock (rwsem)
           7     30.28 us      6.85 us      4.33 us   ffff8e6c865f1d40   rq_lock (spinlock)
           3   7.42 us      4.03 us      2.47 us   ffff8e6c864b1d40   rq_lock (spinlock)
           2   3.72 us      2.19 us      1.86 us   ffff8e6c86571d40   rq_lock (spinlock)
           1   2.42 us      2.42 us      2.42 us   ffff8e6c86471d40   rq_lock (spinlock)
           4   2.11 us 559 ns       527 ns   ffffffff9a146c80   rcu_state (spinlock)
           3   1.45 us 818 ns       482 ns   ffff8e674ae8384c    (rwlock)
           1    870 ns 870 ns       870 ns   ffff8e68456ee060    (rwlock)
           1    663 ns 663 ns       663 ns   ffff8e6c864f1d40   rq_lock (spinlock)
           1    573 ns 573 ns       573 ns   ffff8e6c86531d40   rq_lock (spinlock)
           1    472 ns 472 ns       472 ns   ffff8e6c86431740    (spinlock)
           1    397 ns 397 ns       397 ns   ffff8e67413a4f04    (spinlock)
  #
  # perf test offcpu
  95: perf record offcpu profiling tests                              : Ok
  #
  # perf kwork latency --use-bpf
  Starting trace, Hit <Ctrl+C> to stop and report
  ^C
    Kwork Name                     | Cpu  | Avg delay     | Count     | Max delay     | Max delay start     | Max delay end   |
   --------------------------------------------------------------------------------------------------------------------------------
    (w)flush_memcg_stats_dwork     | 0000 |   1056.212 ms |         2 |   2112.345 ms |     550113.229573 s |     550115.341919 s |
    (w)toggle_allocation_gate    | 0000 |     10.144 ms |        62 |    416.389 ms |     550113.453518 s |     550113.869907 s |
    (w)0xffff8e6748e28080          | 0002 |  0.623 ms |         1 |      0.623 ms |     550110.989841 s |     550110.990464 s |
    (w)vmstat_shepherd             | 0000 |  0.586 ms |        10 |      2.828 ms |     550111.971536 s |     550111.974364 s |
    (w)vmstat_update               | 0007 |  0.363 ms |         5 |      1.634 ms |     550113.222520 s |     550113.224154 s |
    (w)vmstat_update               | 0000 |  0.324 ms |        10 |      2.827 ms |     550111.971526 s |     550111.974354 s |
    (w)0xffff8e674c5f4a58          | 0002 |  0.102 ms |         5 |      0.134 ms |     550110.989839 s |     550110.989972 s |
    (w)psi_avgs_work               | 0001 |  0.086 ms |         3 |      0.107 ms |     550114.957852 s |     550114.957959 s |
    (w)psi_avgs_work               | 0000 |  0.079 ms |         5 |      0.100 ms |     550118.605668 s |     550118.605768 s |
    (w)kfree_rcu_monitor           | 0006 |  0.079 ms |         1 |      0.079 ms |     550110.925821 s |     550110.925900 s |
    (w)psi_avgs_work               | 0004 |  0.079 ms |         1 |      0.079 ms |     550109.581835 s |     550109.581914 s |
    (w)psi_avgs_work               | 0001 |  0.078 ms |         1 |      0.078 ms |     550109.197809 s |     550109.197887 s |
    (w)psi_avgs_work               | 0002 |  0.077 ms |         5 |      0.086 ms |     550110.669819 s |     550110.669905 s |
  <SNIP>
  # strace -e bpf -o perf-stat-bpf-counters.output perf stat -e cycles --bpf-counters sleep 1

   Performance counter stats for 'sleep 1':

           6,197,983   cycles

         1.003922848 seconds time elapsed

         0.000000000 seconds user
         0.002032000 seconds sys

  # head -7 perf-stat-bpf-counters.output
  bpf(BPF_OBJ_GET, {pathname="/sys/fs/bpf/perf_attr_map", bpf_fd=0, file_flags=0}, 16) = 3
  bpf(BPF_OBJ_GET_INFO_BY_FD, {info={bpf_fd=3, info_len=88, info=0x7ffcead64990}}, 16) = 0
  bpf(BPF_MAP_LOOKUP_ELEM, {map_fd=3, key=0x24129e0, value=0x7ffcead65a48, flags=BPF_ANY}, 32) = 0
  bpf(BPF_LINK_GET_FD_BY_ID, {link_id=1252}, 12) = -1 ENOENT (No such file or directory)
  bpf(BPF_PROG_LOAD, {prog_type=BPF_PROG_TYPE_SOCKET_FILTER, insn_cnt=2, insns=0x7ffcead65780, license="GPL", log_level=0, log_size=0, log_buf=NULL, kern_version=KERNEL_VERSION(0, 0, 0), prog_flags=0, prog_name="", prog_ifindex=0, expected_attach_type=BPF_CGROUP_INET_INGRESS, prog_btf_fd=0, func_info_rec_size=0,
+func_info=NULL, func_info_cnt=0, line_info_rec_size=0, line_info=NULL, line_info_cnt=0, attach_btf_id=0, attach_prog_fd=0}, 116) = 4
  bpf(BPF_PROG_LOAD, {prog_type=BPF_PROG_TYPE_SOCKET_FILTER, insn_cnt=2, insns=0x7ffcead65920, license="GPL", log_level=0, log_size=0, log_buf=NULL, kern_version=KERNEL_VERSION(0, 0, 0), prog_flags=0, prog_name="", prog_ifindex=0, expected_attach_type=BPF_CGROUP_INET_INGRESS, prog_btf_fd=0, func_info_rec_size=0,
+func_info=NULL, func_info_cnt=0, line_info_rec_size=0, line_info=NULL, line_info_cnt=0, attach_btf_id=0, attach_prog_fd=0, fd_array=NULL}, 128) = 4
  bpf(BPF_BTF_LOAD, {btf="\237\353\1\0\30\0\0\0\0\0\0\0\20\0\0\0\20\0\0\0\5\0\0\0\1\0\0\0\0\0\0\1"..., btf_log_buf=NULL, btf_size=45, btf_log_size=0, btf_log_level=0}, 28) = 4
  #

Reported-by: Linus Torvalds <torvalds@linux-foundation.org>
Suggested-by: Andrii Nakryiko <andrii@kernel.org>
Tested-by: Namhyung Kim <namhyung@kernel.org>
Tested-by: Song Liu <song@kernel.org>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Ian Rogers <irogers@google.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Co-developed-by: Jiri Olsa <jolsa@kernel.org>
Link: https://lore.kernel.org/lkml/ZFU1PJrn8YtHIqno@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
tools/perf/Makefile.perf
tools/perf/util/bpf_skel/.gitignore
tools/perf/util/bpf_skel/vmlinux.h [new file with mode: 0644]

index 48aba18..61c33d1 100644 (file)
@@ -1063,25 +1063,7 @@ $(BPFTOOL): | $(SKEL_TMP_OUT)
        $(Q)CFLAGS= $(MAKE) -C ../bpf/bpftool \
                OUTPUT=$(SKEL_TMP_OUT)/ bootstrap
 
-VMLINUX_BTF_PATHS ?= $(if $(O),$(O)/vmlinux)                           \
-                    $(if $(KBUILD_OUTPUT),$(KBUILD_OUTPUT)/vmlinux)    \
-                    ../../vmlinux                                      \
-                    /sys/kernel/btf/vmlinux                            \
-                    /boot/vmlinux-$(shell uname -r)
-VMLINUX_BTF ?= $(abspath $(firstword $(wildcard $(VMLINUX_BTF_PATHS))))
-
-$(SKEL_OUT)/vmlinux.h: $(VMLINUX_BTF) $(BPFTOOL)
-ifeq ($(VMLINUX_H),)
-       $(QUIET_GEN)$(BPFTOOL) btf dump file $< format c > $@ || \
-       (echo "Failure to generate vmlinux.h needed for the recommended BPF skeleton support." && \
-       echo "To disable this use the build option NO_BPF_SKEL=1." && \
-       echo "Alternatively point at a pre-generated vmlinux.h with VMLINUX_H=<path>." && \
-       false)
-else
-       $(Q)cp "$(VMLINUX_H)" $@
-endif
-
-$(SKEL_TMP_OUT)/%.bpf.o: util/bpf_skel/%.bpf.c $(LIBBPF) $(SKEL_OUT)/vmlinux.h | $(SKEL_TMP_OUT)
+$(SKEL_TMP_OUT)/%.bpf.o: util/bpf_skel/%.bpf.c $(LIBBPF) | $(SKEL_TMP_OUT)
        $(QUIET_CLANG)$(CLANG) -g -O2 -target bpf -Wall -Werror $(BPF_INCLUDE) \
          -c $(filter util/bpf_skel/%.bpf.c,$^) -o $@ && $(LLVM_STRIP) -g $@
 
index cd01455..7a1c832 100644 (file)
@@ -1,4 +1,3 @@
 # SPDX-License-Identifier: GPL-2.0-only
 .tmp
 *.skel.h
-vmlinux.h
diff --git a/tools/perf/util/bpf_skel/vmlinux.h b/tools/perf/util/bpf_skel/vmlinux.h
new file mode 100644 (file)
index 0000000..449b1ea
--- /dev/null
@@ -0,0 +1,173 @@
+#ifndef __VMLINUX_H
+#define __VMLINUX_H
+
+#include <linux/bpf.h>
+#include <linux/types.h>
+#include <linux/perf_event.h>
+#include <stdbool.h>
+
+// non-UAPI kernel data structures, used in the .bpf.c BPF tool component.
+
+// Just the fields used in these tools preserving the access index so that
+// libbpf can fixup offsets with the ones used in the kernel when loading the
+// BPF bytecode, if they differ from what is used here.
+
+typedef __u8 u8;
+typedef __u32 u32;
+typedef __u64 u64;
+typedef __s64 s64;
+
+typedef int pid_t;
+
+enum cgroup_subsys_id {
+       perf_event_cgrp_id  = 8,
+};
+
+enum {
+       HI_SOFTIRQ = 0,
+       TIMER_SOFTIRQ,
+       NET_TX_SOFTIRQ,
+       NET_RX_SOFTIRQ,
+       BLOCK_SOFTIRQ,
+       IRQ_POLL_SOFTIRQ,
+       TASKLET_SOFTIRQ,
+       SCHED_SOFTIRQ,
+       HRTIMER_SOFTIRQ,
+       RCU_SOFTIRQ,    /* Preferable RCU should always be the last softirq */
+
+       NR_SOFTIRQS
+};
+
+typedef struct {
+       s64     counter;
+} __attribute__((preserve_access_index)) atomic64_t;
+
+typedef atomic64_t atomic_long_t;
+
+struct raw_spinlock {
+       int rawlock;
+} __attribute__((preserve_access_index));
+
+typedef struct raw_spinlock raw_spinlock_t;
+
+typedef struct {
+       struct raw_spinlock rlock;
+} __attribute__((preserve_access_index)) spinlock_t;
+
+struct sighand_struct {
+       spinlock_t siglock;
+} __attribute__((preserve_access_index));
+
+struct rw_semaphore {
+       atomic_long_t owner;
+} __attribute__((preserve_access_index));
+
+struct mutex {
+       atomic_long_t owner;
+} __attribute__((preserve_access_index));
+
+struct kernfs_node {
+       u64 id;
+} __attribute__((preserve_access_index));
+
+struct cgroup {
+       struct kernfs_node *kn;
+       int                level;
+}  __attribute__((preserve_access_index));
+
+struct cgroup_subsys_state {
+       struct cgroup *cgroup;
+} __attribute__((preserve_access_index));
+
+struct css_set {
+       struct cgroup_subsys_state *subsys[13];
+       struct cgroup *dfl_cgrp;
+} __attribute__((preserve_access_index));
+
+struct mm_struct {
+       struct rw_semaphore mmap_lock;
+} __attribute__((preserve_access_index));
+
+struct task_struct {
+       unsigned int          flags;
+       struct mm_struct      *mm;
+       pid_t                 pid;
+       pid_t                 tgid;
+       char                  comm[16];
+       struct sighand_struct *sighand;
+       struct css_set        *cgroups;
+} __attribute__((preserve_access_index));
+
+struct trace_entry {
+       short unsigned int type;
+       unsigned char      flags;
+       unsigned char      preempt_count;
+       int                pid;
+} __attribute__((preserve_access_index));
+
+struct trace_event_raw_irq_handler_entry {
+       struct trace_entry ent;
+       int                irq;
+       u32                __data_loc_name;
+       char               __data[];
+} __attribute__((preserve_access_index));
+
+struct trace_event_raw_irq_handler_exit {
+       struct trace_entry ent;
+       int                irq;
+       int                ret;
+       char               __data[];
+} __attribute__((preserve_access_index));
+
+struct trace_event_raw_softirq {
+       struct trace_entry ent;
+       unsigned int       vec;
+       char               __data[];
+} __attribute__((preserve_access_index));
+
+struct trace_event_raw_workqueue_execute_start {
+       struct trace_entry ent;
+       void               *work;
+       void               *function;
+       char               __data[];
+} __attribute__((preserve_access_index));
+
+struct trace_event_raw_workqueue_execute_end {
+       struct trace_entry ent;
+       void               *work;
+       void               *function;
+       char              __data[];
+} __attribute__((preserve_access_index));
+
+struct trace_event_raw_workqueue_activate_work {
+       struct trace_entry ent;
+       void               *work;
+       char               __data[];
+} __attribute__((preserve_access_index));
+
+struct perf_sample_data {
+       u64                      addr;
+       u64                      period;
+       union perf_sample_weight weight;
+       u64                      txn;
+       union perf_mem_data_src  data_src;
+       u64                      ip;
+       struct {
+               u32              pid;
+               u32              tid;
+       } tid_entry;
+       u64                      time;
+       u64                      id;
+       struct {
+               u32              cpu;
+       } cpu_entry;
+       u64                      phys_addr;
+       u64                      data_page_size;
+       u64                      code_page_size;
+} __attribute__((__aligned__(64))) __attribute__((preserve_access_index));
+
+struct bpf_perf_event_data_kern {
+       struct perf_sample_data *data;
+       struct perf_event       *event;
+} __attribute__((preserve_access_index));
+#endif // __VMLINUX_H