libbpf-tools: add ext4dist
authorWenbo Zhang <ethercflow@gmail.com>
Fri, 26 Mar 2021 06:01:07 +0000 (14:01 +0800)
committeryonghong-song <ys114321@gmail.com>
Thu, 1 Apr 2021 14:53:16 +0000 (07:53 -0700)
Signed-off-by: Wenbo Zhang <ethercflow@gmail.com>
libbpf-tools/.gitignore
libbpf-tools/Makefile
libbpf-tools/ext4dist.bpf.c [new file with mode: 0644]
libbpf-tools/ext4dist.c [new file with mode: 0644]
libbpf-tools/ext4dist.h [new file with mode: 0644]
libbpf-tools/trace_helpers.c
libbpf-tools/trace_helpers.h

index 00f921daec6bc66e892ee102cc97ee6f03762263..76bcc6e59981a427ad2b7ff6cef29d2d283fba9f 100644 (file)
@@ -9,6 +9,7 @@
 /cpufreq
 /drsnoop
 /execsnoop
+/ext4dist
 /filelife
 /funclatency
 /hardirqs
index 87df7e486d2526172269060bb43d8eee0d627b11..7423546c34d82368768ecec90670259ed78e9180 100644 (file)
@@ -26,6 +26,7 @@ APPS = \
        cpufreq \
        drsnoop \
        execsnoop \
+       ext4dist \
        filelife \
        funclatency \
        hardirqs \
diff --git a/libbpf-tools/ext4dist.bpf.c b/libbpf-tools/ext4dist.bpf.c
new file mode 100644 (file)
index 0000000..a68d934
--- /dev/null
@@ -0,0 +1,165 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2021 Wenbo Zhang
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_core_read.h>
+#include <bpf/bpf_tracing.h>
+#include "bits.bpf.h"
+#include "ext4dist.h"
+
+const volatile bool targ_ms = false;
+const volatile pid_t targ_tgid = 0;
+
+#define MAX_ENTRIES    10240
+
+struct hist hists[__MAX_FOP_TYPE];
+
+struct {
+       __uint(type, BPF_MAP_TYPE_HASH);
+       __uint(max_entries, MAX_ENTRIES);
+       __type(key, u32);
+       __type(value, u64);
+} starts SEC(".maps");
+
+static int trace_entry(void)
+{
+       u64 id = bpf_get_current_pid_tgid();
+       u32 tgid = id >> 32;
+       u32 pid = id;
+       u64 ts;
+
+       if (targ_tgid && targ_tgid != tgid)
+               return 0;
+       ts = bpf_ktime_get_ns();
+       bpf_map_update_elem(&starts, &pid, &ts, BPF_ANY);
+
+       return 0;
+}
+
+SEC("kprobe/ext4_file_read_iter")
+int BPF_KPROBE(kprobe1)
+{
+       return trace_entry();
+}
+
+SEC("kprobe/ext4_file_write_iter")
+int BPF_KPROBE(kprobe2)
+{
+       return trace_entry();
+}
+
+SEC("kprobe/ext4_file_open")
+int BPF_KPROBE(kprobe3)
+{
+       return trace_entry();
+}
+
+SEC("kprobe/ext4_sync_file")
+int BPF_KPROBE(kprobe4)
+{
+       return trace_entry();
+}
+
+SEC("fentry/ext4_file_read_iter")
+int BPF_PROG(fentry1)
+{
+       return trace_entry();
+}
+
+SEC("fentry/ext4_file_write_iter")
+int BPF_PROG(fentry2)
+{
+       return trace_entry();
+}
+
+SEC("fentry/ext4_file_open")
+int BPF_PROG(fentry3)
+{
+       return trace_entry();
+}
+
+SEC("fentry/ext4_sync_file")
+int BPF_PROG(fentry4)
+{
+       return trace_entry();
+}
+
+static int trace_return(enum ext4_fop_type type)
+{
+       u64 *tsp, slot, ts = bpf_ktime_get_ns();
+       u64 id = bpf_get_current_pid_tgid();
+       u32 pid = id;
+       s64 delta;
+
+       tsp = bpf_map_lookup_elem(&starts, &pid);
+       if (!tsp)
+               return 0;
+       delta = (s64)(ts - *tsp);
+       if (delta < 0)
+               goto cleanup;
+
+       if (targ_ms)
+               delta /= 1000000U;
+       else
+               delta /= 1000U;
+       slot = log2l(delta);
+       if (slot >= MAX_SLOTS)
+               slot = MAX_SLOTS - 1;
+       if (type >= __MAX_FOP_TYPE)
+               goto cleanup;
+       __sync_fetch_and_add(&hists[type].slots[slot], 1);
+
+cleanup:
+       bpf_map_delete_elem(&starts, &pid);
+       return 0;
+}
+
+SEC("kretprobe/ext4_file_read_iter")
+int BPF_KRETPROBE(kretprobe1)
+{
+       return trace_return(READ_ITER);
+}
+
+SEC("kretprobe/ext4_file_write_iter")
+int BPF_KRETPROBE(kretprobe2)
+{
+       return trace_return(WRITE_ITER);
+}
+
+SEC("kretprobe/ext4_file_open")
+int BPF_KRETPROBE(kretprobe3)
+{
+       return trace_return(OPEN);
+}
+
+SEC("kretprobe/ext4_sync_file")
+int BPF_KRETPROBE(kretprobe4)
+{
+       return trace_return(FSYNC);
+}
+
+SEC("fexit/ext4_file_read_iter")
+int BPF_PROG(fexit1)
+{
+       return trace_return(READ_ITER);
+}
+
+SEC("fexit/ext4_file_write_iter")
+int BPF_PROG(fexit2)
+{
+       return trace_return(WRITE_ITER);
+}
+
+SEC("fexit/ext4_file_open")
+int BPF_PROG(fexit3)
+{
+       return trace_return(OPEN);
+}
+
+SEC("fexit/ext4_sync_file")
+int BPF_PROG(fexit4)
+{
+       return trace_return(FSYNC);
+}
+
+char LICENSE[] SEC("license") = "GPL";
diff --git a/libbpf-tools/ext4dist.c b/libbpf-tools/ext4dist.c
new file mode 100644 (file)
index 0000000..2593e43
--- /dev/null
@@ -0,0 +1,255 @@
+// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+// Copyright (c) 2021 Wenbo Zhang
+//
+// Based on ext4dist(8) from BCC by Brendan Gregg.
+// 9-Feb-2021   Wenbo Zhang   Created this.
+#include <argp.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+#include <unistd.h>
+#include <bpf/libbpf.h>
+#include <bpf/bpf.h>
+#include "ext4dist.h"
+#include "ext4dist.skel.h"
+#include "trace_helpers.h"
+
+static struct env {
+       bool timestamp;
+       bool milliseconds;
+       pid_t pid;
+       time_t interval;
+       int times;
+       bool verbose;
+} env = {
+       .interval = 99999999,
+       .times = 99999999,
+};
+
+static volatile bool exiting;
+
+const char *argp_program_version = "ext4dist 0.1";
+const char *argp_program_bug_address =
+       "https://github.com/iovisor/bcc/tree/master/libbpf-tools";
+const char argp_program_doc[] =
+"Summarize ext4 operation latency.\n"
+"\n"
+"Usage: ext4dist [-h] [-T] [-m] [-p PID] [interval] [count]\n"
+"\n"
+"EXAMPLES:\n"
+"    ext4dist          # show operation latency as a histogram\n"
+"    ext4dist -p 181   # trace PID 181 only\n"
+"    ext4dist 1 10     # print 1 second summaries, 10 times\n"
+"    ext4dist -m 5     # 5s summaries, milliseconds\n";
+
+static const struct argp_option opts[] = {
+       { "timestamp", 'T', NULL, 0, "Print timestamp" },
+       { "milliseconds", 'm', NULL, 0, "Millisecond histogram" },
+       { "pid", 'p', "PID", 0, "Process PID to trace" },
+       { "verbose", 'v', NULL, 0, "Verbose debug output" },
+       {},
+};
+
+static error_t parse_arg(int key, char *arg, struct argp_state *state)
+{
+       static int pos_args;
+
+       switch (key) {
+       case 'v':
+               env.verbose = true;
+               break;
+       case 'T':
+               env.timestamp = true;
+               break;
+       case 'm':
+               env.milliseconds = true;
+               break;
+       case 'p':
+               errno = 0;
+               env.pid = strtol(arg, NULL, 10);
+               if (errno || env.pid <= 0) {
+                       fprintf(stderr, "invalid PID: %s\n", arg);
+                       argp_usage(state);
+               }
+               break;
+       case ARGP_KEY_ARG:
+               errno = 0;
+               if (pos_args == 0) {
+                       env.interval = strtol(arg, NULL, 10);
+                       if (errno) {
+                               fprintf(stderr, "invalid internal\n");
+                               argp_usage(state);
+                       }
+               } else if (pos_args == 1) {
+                       env.times = strtol(arg, NULL, 10);
+                       if (errno) {
+                               fprintf(stderr, "invalid times\n");
+                               argp_usage(state);
+                       }
+               } else {
+                       fprintf(stderr,
+                               "unrecognized positional argument: %s\n", arg);
+                       argp_usage(state);
+               }
+               pos_args++;
+               break;
+       default:
+               return ARGP_ERR_UNKNOWN;
+       }
+       return 0;
+}
+
+int libbpf_print_fn(enum libbpf_print_level level,
+                   const char *format, va_list args)
+{
+       if (level == LIBBPF_DEBUG && !env.verbose)
+               return 0;
+       return vfprintf(stderr, format, args);
+}
+
+static void sig_handler(int sig)
+{
+       exiting = true;
+}
+
+static char *fop_names[] = {
+       [READ_ITER] = "read_iter",
+       [WRITE_ITER] = "write_iter",
+       [OPEN] = "open",
+       [FSYNC] = "fsync",
+};
+
+static struct hist zero;
+
+static int print_hists(struct ext4dist_bpf__bss *bss)
+{
+       const char *units = env.milliseconds ? "msecs" : "usecs";
+       enum ext4_fop_type type;
+
+       for (type = READ_ITER; type < __MAX_FOP_TYPE; type++) {
+               struct hist hist = bss->hists[type];
+
+               bss->hists[type] = zero;
+               if (!memcmp(&zero, &hist, sizeof(hist)))
+                       continue;
+               printf("operation = '%s'\n", fop_names[type]);
+               print_log2_hist(hist.slots, MAX_SLOTS, units);
+               printf("\n");
+       }
+
+       return 0;
+}
+
+static bool should_fallback(void)
+{
+       /*
+        * Check whether EXT4 is compiled into a kernel module and whether
+        * the kernel supports module BTF.
+        *
+        * The purpose of this check is if the kernel supports module BTF,
+        * we can use fentry to get better performance, otherwise we need
+        * to fall back to use kprobe to be compatible with the old kernel.
+        */
+       if (is_kernel_module("ext4") && !access("/sys/kernel/btf/ext4", R_OK))
+               return true;
+       return false;
+}
+
+int main(int argc, char **argv)
+{
+       static const struct argp argp = {
+               .options = opts,
+               .parser = parse_arg,
+               .doc = argp_program_doc,
+       };
+       struct ext4dist_bpf *skel;
+       struct tm *tm;
+       char ts[32];
+       time_t t;
+       int err;
+
+       err = argp_parse(&argp, argc, argv, 0, NULL, NULL);
+       if (err)
+               return err;
+
+       libbpf_set_print(libbpf_print_fn);
+
+       err = bump_memlock_rlimit();
+       if (err) {
+               fprintf(stderr, "failed to increase rlimit: %d\n", err);
+               return 1;
+       }
+
+       skel = ext4dist_bpf__open();
+       if (!skel) {
+               fprintf(stderr, "failed to open BPF skelect\n");
+               return 1;
+       }
+
+       /* initialize global data (filtering options) */
+       skel->rodata->targ_ms = env.milliseconds;
+       skel->rodata->targ_tgid = env.pid;
+
+       if (should_fallback()) {
+               bpf_program__set_autoload(skel->progs.fentry1, false);
+               bpf_program__set_autoload(skel->progs.fentry2, false);
+               bpf_program__set_autoload(skel->progs.fentry3, false);
+               bpf_program__set_autoload(skel->progs.fentry4, false);
+               bpf_program__set_autoload(skel->progs.fexit1, false);
+               bpf_program__set_autoload(skel->progs.fexit2, false);
+               bpf_program__set_autoload(skel->progs.fexit3, false);
+               bpf_program__set_autoload(skel->progs.fexit4, false);
+       } else {
+               bpf_program__set_autoload(skel->progs.kprobe1, false);
+               bpf_program__set_autoload(skel->progs.kprobe2, false);
+               bpf_program__set_autoload(skel->progs.kprobe3, false);
+               bpf_program__set_autoload(skel->progs.kprobe4, false);
+               bpf_program__set_autoload(skel->progs.kretprobe1, false);
+               bpf_program__set_autoload(skel->progs.kretprobe2, false);
+               bpf_program__set_autoload(skel->progs.kretprobe3, false);
+               bpf_program__set_autoload(skel->progs.kretprobe4, false);
+       }
+
+       err = ext4dist_bpf__load(skel);
+       if (err) {
+               fprintf(stderr, "failed to load BPF skelect: %d\n", err);
+               goto cleanup;
+       }
+
+       err = ext4dist_bpf__attach(skel);
+       if (err) {
+               fprintf(stderr, "failed to attach BPF programs\n");
+               goto cleanup;
+       }
+
+       signal(SIGINT, sig_handler);
+
+       printf("Tracing ext4 operation latency... Hit Ctrl-C to end.\n");
+
+       /* main: poll */
+       while (1) {
+               sleep(env.interval);
+               printf("\n");
+
+               if (env.timestamp) {
+                       time(&t);
+                       tm = localtime(&t);
+                       strftime(ts, sizeof(ts), "%H:%M:%S", tm);
+                       printf("%-8s\n", ts);
+               }
+
+               err = print_hists(skel->bss);
+               if (err)
+                       break;
+
+               if (exiting || --env.times == 0)
+                       break;
+       }
+
+cleanup:
+       ext4dist_bpf__destroy(skel);
+
+       return err != 0;
+}
diff --git a/libbpf-tools/ext4dist.h b/libbpf-tools/ext4dist.h
new file mode 100644 (file)
index 0000000..0137de6
--- /dev/null
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
+#ifndef __EXT4DIST_H
+#define __EXT4DIST_H
+
+enum ext4_fop_type {
+       READ_ITER,
+       WRITE_ITER,
+       OPEN,
+       FSYNC,
+       __MAX_FOP_TYPE,
+};
+
+#define MAX_SLOTS      27
+
+struct hist {
+       __u32 slots[MAX_SLOTS];
+};
+
+#endif /* __EXT4DIST_H */
index 53dad4f585aca648b1417611d93c26bd5879f7c3..1a8fafc9ec0398d218292b662701d48dca7c6762 100644 (file)
@@ -6,7 +6,6 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
-#include <stdbool.h>
 #include <sys/resource.h>
 #include <time.h>
 #include "trace_helpers.h"
@@ -373,3 +372,26 @@ int bump_memlock_rlimit(void)
 
        return setrlimit(RLIMIT_MEMLOCK, &rlim_new);
 }
+
+bool is_kernel_module(const char *name)
+{
+       bool found = false;
+       char buf[64];
+       FILE *f;
+
+       f = fopen("/proc/modules", "r");
+       if (!f)
+               return false;
+
+       while (fgets(buf, sizeof(buf), f) != NULL) {
+               if (sscanf(buf, "%s %*s\n", buf) != 1)
+                       break;
+               if (!strcmp(buf, name)) {
+                       found = true;
+                       break;
+               }
+       }
+
+       fclose(f);
+       return found;
+}
index 67fdb3507dd06a28f30a07915af07887b92af09e..f4fbb849841f5a80706e68c47d7f7eba68cca1b8 100644 (file)
@@ -2,6 +2,8 @@
 #ifndef __TRACE_HELPERS_H
 #define __TRACE_HELPERS_H
 
+#include <stdbool.h>
+
 #define NSEC_PER_SEC           1000000000ULL
 
 struct ksym {
@@ -39,4 +41,6 @@ void print_linear_hist(unsigned int *vals, int vals_size, unsigned int base,
 unsigned long long get_ktime_ns(void);
 int bump_memlock_rlimit(void);
 
+bool is_kernel_module(const char *name);
+
 #endif /* __TRACE_HELPERS_H */