libbpf-tools: add fsdist
authorHengqi Chen <chenhengqi@outlook.com>
Wed, 19 May 2021 16:00:32 +0000 (00:00 +0800)
committeryonghong-song <ys114321@gmail.com>
Wed, 2 Jun 2021 15:02:26 +0000 (08:02 -0700)
fsdist is a multitool which show filesystem latency.
Currently we support btrfs/ext4/nfs/xfs filesystems.
It behaves the same as its counterpart in BCC tools
named btrfsdist.py/ext4dist.py/nfsdist.py/xfsdist.py

Signed-off-by: Hengqi Chen <chenhengqi@outlook.com>
libbpf-tools/.gitignore
libbpf-tools/Makefile
libbpf-tools/fsdist.bpf.c [new file with mode: 0644]
libbpf-tools/fsdist.c [new file with mode: 0644]
libbpf-tools/fsdist.h [new file with mode: 0644]

index f1305ba526faab0c8f4919df3d4b9bfd9fa07610..b5ff0eff086bd4fc80e7eb778e9061cd1bf8eeec 100644 (file)
@@ -11,6 +11,7 @@
 /execsnoop
 /ext4dist
 /filelife
+/fsdist
 /funclatency
 /gethostlatency
 /hardirqs
index 3b43c3073e24e0e1c53a71f48c95bf3253b68d65..edf4852f1827a212d04b99ba49ca40637f1b9bf7 100644 (file)
@@ -28,6 +28,7 @@ APPS = \
        execsnoop \
        ext4dist \
        filelife \
+       fsdist \
        funclatency \
        gethostlatency \
        hardirqs \
diff --git a/libbpf-tools/fsdist.bpf.c b/libbpf-tools/fsdist.bpf.c
new file mode 100644 (file)
index 0000000..4321e3b
--- /dev/null
@@ -0,0 +1,193 @@
+/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
+/* Copyright (c) 2021 Hengqi Chen */
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include "bits.bpf.h"
+#include "fsdist.h"
+
+#define MAX_ENTRIES    10240
+
+const volatile pid_t target_pid = 0;
+const volatile bool in_ms = false;
+
+struct {
+       __uint(type, BPF_MAP_TYPE_HASH);
+       __uint(max_entries, MAX_ENTRIES);
+       __type(key, __u32);
+       __type(value, __u64);
+} starts SEC(".maps");
+
+struct hist hists[MAX_OP] = {};
+
+static int probe_entry()
+{
+       __u64 pid_tgid = bpf_get_current_pid_tgid();
+       __u32 pid = pid_tgid >> 32;
+       __u32 tid = (__u32)pid_tgid;
+       __u64 ts;
+
+       if (target_pid && target_pid != pid)
+               return 0;
+
+       ts = bpf_ktime_get_ns();
+       bpf_map_update_elem(&starts, &tid, &ts, BPF_ANY);
+       return 0;
+}
+
+static int probe_return(enum fs_file_op op)
+{
+       __u64 pid_tgid = bpf_get_current_pid_tgid();
+       __u32 pid = pid_tgid >> 32;
+       __u32 tid = (__u32)pid_tgid;
+       __u64 ts = bpf_ktime_get_ns();
+       __u64 *tsp, slot;
+       __s64 delta;
+
+       tsp = bpf_map_lookup_elem(&starts, &tid);
+       if (!tsp)
+               return 0;
+
+       if (op >= MAX_OP)
+               goto cleanup;
+
+       delta = (__s64)(ts - *tsp);
+       if (delta < 0)
+               goto cleanup;
+
+       if (in_ms)
+               delta /= 1000000;
+       else
+               delta /= 1000;
+
+       slot = log2l(delta);
+       if (slot >= MAX_SLOTS)
+               slot = MAX_SLOTS - 1;
+       __sync_fetch_and_add(&hists[op].slots[slot], 1);
+
+cleanup:
+       bpf_map_delete_elem(&starts, &tid);
+       return 0;
+}
+
+SEC("kprobe/dummy_file_read")
+int BPF_KPROBE(file_read_entry)
+{
+       return probe_entry();
+}
+
+SEC("kretprobe/dummy_file_read")
+int BPF_KRETPROBE(file_read_exit)
+{
+       return probe_return(READ);
+}
+
+SEC("kprobe/dummy_file_write")
+int BPF_KPROBE(file_write_entry)
+{
+       return probe_entry();
+}
+
+SEC("kretprobe/dummy_file_write")
+int BPF_KRETPROBE(file_write_exit)
+{
+       return probe_return(WRITE);
+}
+
+SEC("kprobe/dummy_file_open")
+int BPF_KPROBE(file_open_entry)
+{
+       return probe_entry();
+}
+
+SEC("kretprobe/dummy_file_open")
+int BPF_KRETPROBE(file_open_exit)
+{
+       return probe_return(OPEN);
+}
+
+SEC("kprobe/dummy_file_sync")
+int BPF_KPROBE(file_sync_entry)
+{
+       return probe_entry();
+}
+
+SEC("kretprobe/dummy_file_sync")
+int BPF_KRETPROBE(file_sync_exit)
+{
+       return probe_return(FSYNC);
+}
+
+SEC("kprobe/dummy_getattr")
+int BPF_KPROBE(getattr_entry)
+{
+       return probe_entry();
+}
+
+SEC("kretprobe/dummy_getattr")
+int BPF_KRETPROBE(getattr_exit)
+{
+       return probe_return(GETATTR);
+}
+
+SEC("fentry/dummy_file_read")
+int BPF_PROG(file_read_fentry)
+{
+       return probe_entry();
+}
+
+SEC("fexit/dummy_file_read")
+int BPF_PROG(file_read_fexit)
+{
+       return probe_return(READ);
+}
+
+SEC("fentry/dummy_file_write")
+int BPF_PROG(file_write_fentry)
+{
+       return probe_entry();
+}
+
+SEC("fexit/dummy_file_write")
+int BPF_PROG(file_write_fexit)
+{
+       return probe_return(WRITE);
+}
+
+SEC("fentry/dummy_file_open")
+int BPF_PROG(file_open_fentry)
+{
+       return probe_entry();
+}
+
+SEC("fexit/dummy_file_open")
+int BPF_PROG(file_open_fexit)
+{
+       return probe_return(OPEN);
+}
+
+SEC("fentry/dummy_file_sync")
+int BPF_PROG(file_sync_fentry)
+{
+       return probe_entry();
+}
+
+SEC("fexit/dummy_file_sync")
+int BPF_PROG(file_sync_fexit)
+{
+       return probe_return(FSYNC);
+}
+
+SEC("fentry/dummy_getattr")
+int BPF_PROG(getattr_fentry)
+{
+       return probe_entry();
+}
+
+SEC("fexit/dummy_getattr")
+int BPF_PROG(getattr_fexit)
+{
+       return probe_return(GETATTR);
+}
+
+char LICENSE[] SEC("license") = "Dual BSD/GPL";
diff --git a/libbpf-tools/fsdist.c b/libbpf-tools/fsdist.c
new file mode 100644 (file)
index 0000000..c8b6e24
--- /dev/null
@@ -0,0 +1,453 @@
+/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
+
+/*
+ * fsdist  Summarize file system operations latency.
+ *
+ * Copyright (c) 2021 Hengqi Chen
+ * 20-May-2021   Hengqi Chen   Created this.
+ */
+#include <argp.h>
+#include <libgen.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+#include <unistd.h>
+
+#include <bpf/libbpf.h>
+#include <bpf/bpf.h>
+
+#include "fsdist.h"
+#include "fsdist.skel.h"
+#include "trace_helpers.h"
+
+#define warn(...) fprintf(stderr, __VA_ARGS__)
+
+enum fs_type {
+       NONE,
+       BTRFS,
+       EXT4,
+       NFS,
+       XFS,
+};
+
+static struct fs_config {
+       const char *fs;
+       const char *op_funcs[MAX_OP];
+} fs_configs[] = {
+       [BTRFS] = { "btrfs", {
+               [READ] = "btrfs_file_read_iter",
+               [WRITE] = "btrfs_file_write_iter",
+               [OPEN] = "btrfs_file_open",
+               [FSYNC] = "btrfs_sync_file",
+               [GETATTR] = NULL, /* not supported */
+       }},
+       [EXT4] = { "ext4", {
+               [READ] = "ext4_file_read_iter",
+               [WRITE] = "ext4_file_write_iter",
+               [OPEN] = "ext4_file_open",
+               [FSYNC] = "ext4_sync_file",
+               [GETATTR] = "ext4_file_getattr",
+       }},
+       [NFS] = { "nfs", {
+               [READ] = "nfs_file_read",
+               [WRITE] = "nfs_file_write",
+               [OPEN] = "nfs_file_open",
+               [FSYNC] = "nfs_file_fsync",
+               [GETATTR] = "nfs_getattr",
+       }},
+       [XFS] = { "xfs", {
+               [READ] = "xfs_file_read_iter",
+               [WRITE] = "xfs_file_write_iter",
+               [OPEN] = "xfs_file_open",
+               [FSYNC] = "xfs_file_fsync",
+               [GETATTR] = NULL, /* not supported */
+       }},
+};
+
+static char *file_op_names[] = {
+       [READ] = "read",
+       [WRITE] = "write",
+       [OPEN] = "open",
+       [FSYNC] = "fsync",
+       [GETATTR] = "getattr",
+};
+
+static struct hist zero;
+static volatile sig_atomic_t exiting;
+
+/* options */
+static enum fs_type fs_type = NONE;
+static bool emit_timestamp = false;
+static bool timestamp_in_ms = false;
+static pid_t target_pid = 0;
+static int interval = 99999999;
+static int count = 99999999;
+static bool verbose = false;
+
+const char *argp_program_version = "fsdist 0.1";
+const char *argp_program_bug_address =
+       "https://github.com/iovisor/bcc/tree/master/libbpf-tools";
+const char argp_program_doc[] =
+"Summarize file system operations latency.\n"
+"\n"
+"Usage: fsdist [-h] [-t] [-T] [-m] [-p PID] [interval] [count]\n"
+"\n"
+"EXAMPLES:\n"
+"    fsdist -t ext4             # show ext4 operations latency as a histogram\n"
+"    fsdist -t nfs -p 1216      # trace nfs operations with PID 1216 only\n"
+"    fsdist -t xfs 1 10         # trace xfs operations, 1s summaries, 10 times\n"
+"    fsdist -t btrfs -m 5       # trace btrfs operation, 5s summaries, in ms\n";
+
+static const struct argp_option opts[] = {
+       { "timestamp", 'T', NULL, 0, "Print timestamp" },
+       { "milliseconds", 'm', NULL, 0, "Millisecond histogram" },
+       { "pid", 'p', "PID", 0, "Process ID to trace" },
+       { "type", 't', "Filesystem", 0, "Which filesystem to trace, [btrfs/ext4/nfs/xfs]" },
+       { "verbose", 'v', NULL, 0, "Verbose debug output" },
+       { NULL, 'h', NULL, OPTION_HIDDEN, "Show the full help" },
+       {},
+};
+
+static error_t parse_arg(int key, char *arg, struct argp_state *state)
+{
+       static int pos_args;
+
+       switch (key) {
+       case 'v':
+               verbose = true;
+               break;
+       case 'T':
+               emit_timestamp = true;
+               break;
+       case 'm':
+               timestamp_in_ms = true;
+               break;
+       case 't':
+               if (!strcmp(arg, "btrfs")) {
+                       fs_type = BTRFS;
+               } else if (!strcmp(arg, "ext4")) {
+                       fs_type = EXT4;
+               } else if (!strcmp(arg, "nfs")) {
+                       fs_type = NFS;
+               } else if (!strcmp(arg, "xfs")) {
+                       fs_type = XFS;
+               } else {
+                       warn("invalid filesystem\n");
+                       argp_usage(state);
+               }
+               break;
+       case 'p':
+               errno = 0;
+               target_pid = strtol(arg, NULL, 10);
+               if (errno || target_pid <= 0) {
+                       warn("invalid PID: %s\n", arg);
+                       argp_usage(state);
+               }
+               break;
+       case 'h':
+               argp_state_help(state, stderr, ARGP_HELP_STD_HELP);
+               break;
+       case ARGP_KEY_ARG:
+               errno = 0;
+               if (pos_args == 0) {
+                       interval = strtol(arg, NULL, 10);
+                       if (errno) {
+                               warn("invalid internal\n");
+                               argp_usage(state);
+                       }
+               } else if (pos_args == 1) {
+                       count = strtol(arg, NULL, 10);
+                       if (errno) {
+                               warn("invalid count\n");
+                               argp_usage(state);
+                       }
+               } else {
+                       warn("unrecognized positional argument: %s\n", arg);
+                       argp_usage(state);
+               }
+               pos_args++;
+               break;
+       default:
+               return ARGP_ERR_UNKNOWN;
+       }
+       return 0;
+}
+
+static void alias_parse(char *prog)
+{
+       char *name = basename(prog);
+
+       if (!strcmp(name, "btrfsdist")) {
+               fs_type = BTRFS;
+       } else if (!strcmp(name, "ext4dist")) {
+               fs_type = EXT4;
+       } else if (!strcmp(name, "nfsdist")) {
+               fs_type = NFS;
+       } else if (!strcmp(name, "xfsdist")) {
+               fs_type = XFS;
+       }
+}
+
+static int libbpf_print_fn(enum libbpf_print_level level,
+                          const char *format, va_list args)
+{
+       if (level == LIBBPF_DEBUG && !verbose)
+               return 0;
+       return vfprintf(stderr, format, args);
+}
+
+static void sig_handler(int sig)
+{
+       exiting = 1;
+}
+
+static int print_hists(struct fsdist_bpf__bss *bss)
+{
+       const char *units = timestamp_in_ms ? "msecs" : "usecs";
+       enum fs_file_op op;
+
+       for (op = READ; op < MAX_OP; op++) {
+               struct hist hist = bss->hists[op];
+
+               bss->hists[op] = zero;
+               if (!memcmp(&zero, &hist, sizeof(hist)))
+                       continue;
+               printf("operation = '%s'\n", file_op_names[op]);
+               print_log2_hist(hist.slots, MAX_SLOTS, units);
+               printf("\n");
+       }
+       return 0;
+}
+
+static bool check_fentry()
+{
+       int i;
+       const char *fn_name, *module;
+       bool support_fentry = true;
+
+       for (i = 0; i < MAX_OP; i++) {
+               fn_name = fs_configs[fs_type].op_funcs[i];
+               module = fs_configs[fs_type].fs;
+               if (fn_name && !fentry_exists(fn_name, NULL)
+                   && !fentry_exists(fn_name, module)) {
+                       support_fentry = false;
+                       break;
+               }
+       }
+       return support_fentry;
+}
+
+static int fentry_set_attach_target(struct fsdist_bpf *obj)
+{
+       struct fs_config *cfg = &fs_configs[fs_type];
+       int err = 0;
+
+       err = err ?: bpf_program__set_attach_target(obj->progs.file_read_fentry, 0, cfg->op_funcs[READ]);
+       err = err ?: bpf_program__set_attach_target(obj->progs.file_read_fexit, 0, cfg->op_funcs[READ]);
+       err = err ?: bpf_program__set_attach_target(obj->progs.file_write_fentry, 0, cfg->op_funcs[WRITE]);
+       err = err ?: bpf_program__set_attach_target(obj->progs.file_write_fexit, 0, cfg->op_funcs[WRITE]);
+       err = err ?: bpf_program__set_attach_target(obj->progs.file_open_fentry, 0, cfg->op_funcs[OPEN]);
+       err = err ?: bpf_program__set_attach_target(obj->progs.file_open_fexit, 0, cfg->op_funcs[OPEN]);
+       err = err ?: bpf_program__set_attach_target(obj->progs.file_sync_fentry, 0, cfg->op_funcs[FSYNC]);
+       err = err ?: bpf_program__set_attach_target(obj->progs.file_sync_fexit, 0, cfg->op_funcs[FSYNC]);
+       if (cfg->op_funcs[GETATTR]) {
+               err = err ?: bpf_program__set_attach_target(obj->progs.getattr_fentry, 0, cfg->op_funcs[GETATTR]);
+               err = err ?: bpf_program__set_attach_target(obj->progs.getattr_fexit, 0, cfg->op_funcs[GETATTR]);
+       } else {
+               bpf_program__set_autoload(obj->progs.getattr_fentry, false);
+               bpf_program__set_autoload(obj->progs.getattr_fexit, false);
+       }
+       return err;
+}
+
+static void disable_fentry(struct fsdist_bpf *obj)
+{
+       bpf_program__set_autoload(obj->progs.file_read_fentry, false);
+       bpf_program__set_autoload(obj->progs.file_read_fexit, false);
+       bpf_program__set_autoload(obj->progs.file_write_fentry, false);
+       bpf_program__set_autoload(obj->progs.file_write_fexit, false);
+       bpf_program__set_autoload(obj->progs.file_open_fentry, false);
+       bpf_program__set_autoload(obj->progs.file_open_fexit, false);
+       bpf_program__set_autoload(obj->progs.file_sync_fentry, false);
+       bpf_program__set_autoload(obj->progs.file_sync_fexit, false);
+       bpf_program__set_autoload(obj->progs.getattr_fentry, false);
+       bpf_program__set_autoload(obj->progs.getattr_fexit, false);
+}
+
+static void disable_kprobes(struct fsdist_bpf *obj)
+{
+       bpf_program__set_autoload(obj->progs.file_read_entry, false);
+       bpf_program__set_autoload(obj->progs.file_read_exit, false);
+       bpf_program__set_autoload(obj->progs.file_write_entry, false);
+       bpf_program__set_autoload(obj->progs.file_write_exit, false);
+       bpf_program__set_autoload(obj->progs.file_open_entry, false);
+       bpf_program__set_autoload(obj->progs.file_open_exit, false);
+       bpf_program__set_autoload(obj->progs.file_sync_entry, false);
+       bpf_program__set_autoload(obj->progs.file_sync_exit, false);
+       bpf_program__set_autoload(obj->progs.getattr_entry, false);
+       bpf_program__set_autoload(obj->progs.getattr_exit, false);
+}
+
+static int attach_kprobes(struct fsdist_bpf *obj)
+{
+       long err = 0;
+       struct fs_config *cfg = &fs_configs[fs_type];
+
+       /* READ */
+       obj->links.file_read_entry = bpf_program__attach_kprobe(obj->progs.file_read_entry, false, cfg->op_funcs[READ]);
+       err = libbpf_get_error(obj->links.file_read_entry);
+       if (err)
+               goto errout;
+       obj->links.file_read_exit = bpf_program__attach_kprobe(obj->progs.file_read_exit, true, cfg->op_funcs[READ]);
+       err = libbpf_get_error(obj->links.file_read_exit);
+       if (err)
+               goto errout;
+       /* WRITE */
+       obj->links.file_write_entry = bpf_program__attach_kprobe(obj->progs.file_write_entry, false, cfg->op_funcs[WRITE]);
+       err = libbpf_get_error(obj->links.file_write_entry);
+       if (err)
+               goto errout;
+       obj->links.file_write_exit = bpf_program__attach_kprobe(obj->progs.file_write_exit, true, cfg->op_funcs[WRITE]);
+       err = libbpf_get_error(obj->links.file_write_exit);
+       if (err)
+               goto errout;
+       /* OPEN */
+       obj->links.file_open_entry = bpf_program__attach_kprobe(obj->progs.file_open_entry, false, cfg->op_funcs[OPEN]);
+       err = libbpf_get_error(obj->links.file_open_entry);
+       if (err)
+               goto errout;
+       obj->links.file_open_exit = bpf_program__attach_kprobe(obj->progs.file_open_exit, true, cfg->op_funcs[OPEN]);
+       err = libbpf_get_error(obj->links.file_open_exit);
+       if (err)
+               goto errout;
+       /* FSYNC */
+       obj->links.file_sync_entry = bpf_program__attach_kprobe(obj->progs.file_sync_entry, false, cfg->op_funcs[FSYNC]);
+       err = libbpf_get_error(obj->links.file_sync_entry);
+       if (err)
+               goto errout;
+       obj->links.file_sync_exit = bpf_program__attach_kprobe(obj->progs.file_sync_exit, true, cfg->op_funcs[FSYNC]);
+       err = libbpf_get_error(obj->links.file_sync_exit);
+       if (err)
+               goto errout;
+       /* GETATTR */
+       if (!cfg->op_funcs[GETATTR])
+               return 0;
+       obj->links.getattr_entry = bpf_program__attach_kprobe(obj->progs.getattr_entry, false, cfg->op_funcs[GETATTR]);
+       err = libbpf_get_error(obj->links.getattr_entry);
+       if (err)
+               goto errout;
+       obj->links.getattr_exit = bpf_program__attach_kprobe(obj->progs.getattr_exit, true, cfg->op_funcs[GETATTR]);
+       err = libbpf_get_error(obj->links.getattr_exit);
+       if (err)
+               goto errout;
+       return 0;
+errout:
+       warn("failed to attach kprobe: %ld\n", err);
+       return err;
+}
+
+int main(int argc, char **argv)
+{
+       static const struct argp argp = {
+               .options = opts,
+               .parser = parse_arg,
+               .doc = argp_program_doc,
+       };
+       struct fsdist_bpf *skel;
+       struct tm *tm;
+       char ts[32];
+       time_t t;
+       int err;
+       bool support_fentry;
+
+       alias_parse(argv[0]);
+       err = argp_parse(&argp, argc, argv, 0, NULL, NULL);
+       if (err)
+               return err;
+       if (fs_type == NONE) {
+               warn("filesystem must be specified using -t option.\n");
+               return 1;
+       }
+
+       libbpf_set_print(libbpf_print_fn);
+
+       err = bump_memlock_rlimit();
+       if (err) {
+               warn("failed to increase rlimit: %d\n", err);
+               return 1;
+       }
+
+       skel = fsdist_bpf__open();
+       if (!skel) {
+               warn("failed to open BPF object\n");
+               return 1;
+       }
+
+       skel->rodata->target_pid = target_pid;
+       skel->rodata->in_ms = timestamp_in_ms;
+
+       /*
+        * before load
+        * if fentry is supported, we set attach target and disable kprobes
+        * otherwise, we disable fentry and attach kprobes after loading
+        */
+       support_fentry = check_fentry();
+       if (support_fentry) {
+               err = fentry_set_attach_target(skel);
+               if (err) {
+                       warn("failed to set attach target: %d\n", err);
+                       goto cleanup;
+               }
+               disable_kprobes(skel);
+       } else {
+               disable_fentry(skel);
+       }
+
+       err = fsdist_bpf__load(skel);
+       if (err) {
+               warn("failed to load BPF object: %d\n", err);
+               goto cleanup;
+       }
+
+       /*
+        * after load
+        * if fentry is supported, let libbpf do auto load
+        * otherwise, we attach to kprobes manually
+        */
+       err = support_fentry ? fsdist_bpf__attach(skel) : attach_kprobes(skel);
+       if (err) {
+               warn("failed to attach BPF programs: %d\n", err);
+               goto cleanup;
+       }
+
+       signal(SIGINT, sig_handler);
+
+       printf("Tracing %s operation latency... Hit Ctrl-C to end.\n",
+              fs_configs[fs_type].fs);
+
+       while (1) {
+               sleep(interval);
+               printf("\n");
+
+               if (emit_timestamp) {
+                       time(&t);
+                       tm = localtime(&t);
+                       strftime(ts, sizeof(ts), "%H:%M:%S", tm);
+                       printf("%-8s\n", ts);
+               }
+
+               err = print_hists(skel->bss);
+               if (err)
+                       break;
+
+               if (exiting || --count == 0)
+                       break;
+       }
+
+cleanup:
+       fsdist_bpf__destroy(skel);
+
+       return err != 0;
+}
diff --git a/libbpf-tools/fsdist.h b/libbpf-tools/fsdist.h
new file mode 100644 (file)
index 0000000..a4184fc
--- /dev/null
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
+#ifndef __FSDIST_H
+#define __FSDIST_H
+
+enum fs_file_op {
+       READ,
+       WRITE,
+       OPEN,
+       FSYNC,
+       GETATTR,
+       MAX_OP,
+};
+
+#define MAX_SLOTS      32
+
+struct hist {
+       __u32 slots[MAX_SLOTS];
+};
+
+#endif /* __FSDIST_H */