libbpf-tools: migrate xfsslower to fsslower
authorHengqi Chen <chenhengqi@outlook.com>
Sat, 5 Jun 2021 02:44:46 +0000 (10:44 +0800)
committeryonghong-song <ys114321@gmail.com>
Thu, 10 Jun 2021 16:29:27 +0000 (09:29 -0700)
This commit migrates xfsslower to a generic fsslower which supports
tracing multiple file systems. It works the same way as the original
tool except that the users are supposed to specify which file systems
to trace using -t option.

sudo ./fsslower -t ext4 -m 1
Tracing ext4 operations slower than 1 ms... Hit Ctrl-C to end.
TIME     COMM             PID     T BYTES   OFF_KB   LAT(ms) FILENAME
10:36:07 code             6896    F LL_MAX  0           2.40 state.vscdb-journal
10:36:07 code             6896    F LL_MAX  0           1.74 state.vscdb-journal
10:36:07 code             6896    F LL_MAX  0           1.78 state.vscdb

Signed-off-by: Hengqi Chen <chenhengqi@outlook.com>
libbpf-tools/.gitignore
libbpf-tools/Makefile
libbpf-tools/fsslower.bpf.c [new file with mode: 0644]
libbpf-tools/fsslower.c [new file with mode: 0644]
libbpf-tools/fsslower.h [new file with mode: 0644]
libbpf-tools/xfsslower.bpf.c [deleted file]
libbpf-tools/xfsslower.c [deleted file]
libbpf-tools/xfsslower.h [deleted file]

index b5ff0eff086bd4fc80e7eb778e9061cd1bf8eeec..a345b351f22e53c289d7c944cda8689310bf8ed9 100644 (file)
@@ -4,18 +4,22 @@
 /biosnoop
 /biostacks
 /bitesize
+/btrfsslower
 /cachestat
 /cpudist
 /cpufreq
 /drsnoop
 /execsnoop
 /ext4dist
+/ext4slower
 /filelife
 /fsdist
+/fsslower
 /funclatency
 /gethostlatency
 /hardirqs
 /llcstat
+/nfsslower
 /numamove
 /offcputime
 /opensnoop
index fae9aa34e43dcca1f457f5aa7ba11a4af91c1f2b..1f2dddcf3200b23676d145a7d06e713a2490f590 100644 (file)
@@ -29,6 +29,7 @@ APPS = \
        ext4dist \
        filelife \
        fsdist \
+       fsslower \
        funclatency \
        gethostlatency \
        hardirqs \
@@ -46,9 +47,12 @@ APPS = \
        tcpconnect \
        tcpconnlat \
        vfsstat \
-       xfsslower \
        #
 
+FSSLOWER_ALIASES = btrfsslower ext4slower nfsslower xfsslower
+
+APP_ALIASES = $(FSSLOWER_ALIASES)
+
 COMMON_OBJ = \
        $(OUTPUT)/trace_helpers.o \
        $(OUTPUT)/syscall_helpers.o \
@@ -58,7 +62,7 @@ COMMON_OBJ = \
        #
 
 .PHONY: all
-all: $(APPS)
+all: $(APPS) $(APP_ALIASES)
 
 ifeq ($(V),1)
 Q =
@@ -72,7 +76,7 @@ endif
 .PHONY: clean
 clean:
        $(call msg,CLEAN)
-       $(Q)rm -rf $(OUTPUT) $(APPS)
+       $(Q)rm -rf $(OUTPUT) $(APPS) $(APP_ALIASES)
 
 $(OUTPUT) $(OUTPUT)/libbpf:
        $(call msg,MKDIR,$@)
@@ -106,10 +110,15 @@ $(LIBBPF_OBJ): $(wildcard $(LIBBPF_SRC)/*.[ch]) | $(OUTPUT)/libbpf
                    INCLUDEDIR= LIBDIR= UAPIDIR=                              \
                    install
 
-install: $(APPS)
+$(FSSLOWER_ALIASES): fsslower
+       $(call msg,SYMLINK,$@)
+       $(Q)ln -s $^ $@
+
+install: $(APPS) $(APP_ALIASES)
        $(call msg, INSTALL libbpf-tools)
        $(Q)$(INSTALL) -m 0755 -d $(DESTDIR)$(prefix)/bin
        $(Q)$(INSTALL) $(APPS) $(DESTDIR)$(prefix)/bin
+       $(Q)cp -a $(APP_ALIASES) $(DESTDIR)$(prefix)/bin
 
 # delete failed targets
 .DELETE_ON_ERROR:
diff --git a/libbpf-tools/fsslower.bpf.c b/libbpf-tools/fsslower.bpf.c
new file mode 100644 (file)
index 0000000..cd6b66c
--- /dev/null
@@ -0,0 +1,208 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2020 Wenbo Zhang */
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_core_read.h>
+#include <bpf/bpf_tracing.h>
+#include "bits.bpf.h"
+#include "fsslower.h"
+
+#define MAX_ENTRIES    8192
+
+const volatile pid_t target_pid = 0;
+const volatile __u64 min_lat_ns = 0;
+
+struct data {
+       __u64 ts;
+       loff_t start;
+       loff_t end;
+       struct file *fp;
+};
+
+struct {
+       __uint(type, BPF_MAP_TYPE_HASH);
+       __uint(max_entries, MAX_ENTRIES);
+       __type(key, __u32);
+       __type(value, struct data);
+} starts SEC(".maps");
+
+struct {
+       __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
+       __uint(key_size, sizeof(__u32));
+       __uint(value_size, sizeof(__u32));
+} events SEC(".maps");
+
+static int probe_entry(struct file *fp, loff_t start, loff_t end)
+{
+       __u64 pid_tgid = bpf_get_current_pid_tgid();
+       __u32 pid = pid_tgid >> 32;
+       __u32 tid = (__u32)pid_tgid;
+       struct data data;
+
+       if (!fp)
+               return 0;
+
+       if (target_pid && target_pid != pid)
+               return 0;
+
+       data.ts = bpf_ktime_get_ns();
+       data.start = start;
+       data.end = end;
+       data.fp = fp;
+       bpf_map_update_elem(&starts, &tid, &data, BPF_ANY);
+       return 0;
+}
+
+static int probe_exit(void *ctx, enum fs_file_op op, ssize_t size)
+{
+       __u64 pid_tgid = bpf_get_current_pid_tgid();
+       __u32 pid = pid_tgid >> 32;
+       __u32 tid = (__u32)pid_tgid;
+       __u64 end_ns, delta_ns;
+       const __u8 *file_name;
+       struct data *datap;
+       struct event event = {};
+       struct dentry *dentry;
+       struct file *fp;
+
+       if (target_pid && target_pid != pid)
+               return 0;
+
+       datap = bpf_map_lookup_elem(&starts, &tid);
+       if (!datap)
+               return 0;
+
+       bpf_map_delete_elem(&starts, &tid);
+
+       end_ns = bpf_ktime_get_ns();
+       delta_ns = end_ns - datap->ts;
+       if (delta_ns <= min_lat_ns)
+               return 0;
+
+       event.delta_us = delta_ns / 1000;
+       event.end_ns = end_ns;
+       event.offset = datap->start;
+       if (op != FSYNC)
+               event.size = size;
+       else
+               event.size = datap->end - datap->start;
+       event.pid = pid;
+       event.op = op;
+       fp = datap->fp;
+       dentry = BPF_CORE_READ(fp, f_path.dentry);
+       file_name = BPF_CORE_READ(dentry, d_name.name);
+       bpf_probe_read_kernel_str(&event.file, sizeof(event.file), file_name);
+       bpf_get_current_comm(&event.task, sizeof(event.task));
+       bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, &event, sizeof(event));
+       return 0;
+}
+
+SEC("kprobe/dummy_file_read")
+int BPF_KPROBE(file_read_entry, struct kiocb *iocb)
+{
+       struct file *fp = BPF_CORE_READ(iocb, ki_filp);
+       loff_t start = BPF_CORE_READ(iocb, ki_pos);
+
+       return probe_entry(fp, start, 0);
+}
+
+SEC("kretprobe/dummy_file_read")
+int BPF_KRETPROBE(file_read_exit, ssize_t ret)
+{
+       return probe_exit(ctx, READ, ret);
+}
+
+SEC("kprobe/dummy_file_write")
+int BPF_KPROBE(file_write_entry, struct kiocb *iocb)
+{
+       struct file *fp = BPF_CORE_READ(iocb, ki_filp);
+       loff_t start = BPF_CORE_READ(iocb, ki_pos);
+
+       return probe_entry(fp, start, 0);
+}
+
+SEC("kretprobe/dummy_file_write")
+int BPF_KRETPROBE(file_write_exit, ssize_t ret)
+{
+       return probe_exit(ctx, WRITE, ret);
+}
+
+SEC("kprobe/dummy_file_open")
+int BPF_KPROBE(file_open_entry, struct inode *inode, struct file *file)
+{
+       return probe_entry(file, 0, 0);
+}
+
+SEC("kretprobe/dummy_file_open")
+int BPF_KRETPROBE(file_open_exit)
+{
+       return probe_exit(ctx, OPEN, 0);
+}
+
+SEC("kprobe/dummy_file_sync")
+int BPF_KPROBE(file_sync_entry, struct file *file, loff_t start, loff_t end)
+{
+       return probe_entry(file, start, end);
+}
+
+SEC("kretprobe/dummy_file_sync")
+int BPF_KRETPROBE(file_sync_exit)
+{
+       return probe_exit(ctx, FSYNC, 0);
+}
+
+SEC("fentry/dummy_file_read")
+int BPF_PROG(file_read_fentry, struct kiocb *iocb)
+{
+       struct file *fp = iocb->ki_filp;
+       loff_t start = iocb->ki_pos;
+
+       return probe_entry(fp, start, 0);
+}
+
+SEC("fexit/dummy_file_read")
+int BPF_PROG(file_read_fexit, struct kiocb *iocb, struct iov_iter *to, ssize_t ret)
+{
+       return probe_exit(ctx, READ, ret);
+}
+
+SEC("fentry/dummy_file_write")
+int BPF_PROG(file_write_fentry, struct kiocb *iocb)
+{
+       struct file *fp = iocb->ki_filp;
+       loff_t start = iocb->ki_pos;
+
+       return probe_entry(fp, start, 0);
+}
+
+SEC("fexit/dummy_file_write")
+int BPF_PROG(file_write_fexit, struct kiocb *iocb, struct iov_iter *from, ssize_t ret)
+{
+       return probe_exit(ctx, WRITE, ret);
+}
+
+SEC("fentry/dummy_file_open")
+int BPF_PROG(file_open_fentry, struct inode *inode, struct file *file)
+{
+       return probe_entry(file, 0, 0);
+}
+
+SEC("fexit/dummy_file_open")
+int BPF_PROG(file_open_fexit)
+{
+       return probe_exit(ctx, OPEN, 0);
+}
+
+SEC("fentry/dummy_file_sync")
+int BPF_PROG(file_sync_fentry, struct file *file, loff_t start, loff_t end)
+{
+       return probe_entry(file, start, end);
+}
+
+SEC("fexit/dummy_file_sync")
+int BPF_PROG(file_sync_fexit)
+{
+       return probe_exit(ctx, FSYNC, 0);
+}
+
+char LICENSE[] SEC("license") = "GPL";
diff --git a/libbpf-tools/fsslower.c b/libbpf-tools/fsslower.c
new file mode 100644 (file)
index 0000000..b21cd7f
--- /dev/null
@@ -0,0 +1,464 @@
+/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
+
+/*
+ * fsslower  Trace file system operations slower than a threshold.
+ *
+ * Copyright (c) 2020 Wenbo Zhang
+ * Copyright (c) 2021 Hengqi Chen
+ *
+ * Based on xfsslower(8) from BCC by Brendan Gregg & Dina Goldshtein.
+ * 9-Mar-2020   Wenbo Zhang   Created this.
+ * 27-May-2021  Hengqi Chen   Migrated to fsslower.
+ */
+#include <argp.h>
+#include <libgen.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+#include <unistd.h>
+
+#include <bpf/libbpf.h>
+#include <bpf/bpf.h>
+
+#include "fsslower.h"
+#include "fsslower.skel.h"
+#include "trace_helpers.h"
+
+#define PERF_BUFFER_PAGES      64
+#define PERF_POLL_TIMEOUT_MS   100
+
+#define warn(...) fprintf(stderr, __VA_ARGS__)
+
+enum fs_type {
+       NONE,
+       BTRFS,
+       EXT4,
+       NFS,
+       XFS,
+};
+
+static struct fs_config {
+       const char *fs;
+       const char *op_funcs[MAX_OP];
+} fs_configs[] = {
+       [BTRFS] = { "btrfs", {
+               [READ] = "btrfs_file_read_iter",
+               [WRITE] = "btrfs_file_write_iter",
+               [OPEN] = "btrfs_file_open",
+               [FSYNC] = "btrfs_sync_file",
+       }},
+       [EXT4] = { "ext4", {
+               [READ] = "ext4_file_read_iter",
+               [WRITE] = "ext4_file_write_iter",
+               [OPEN] = "ext4_file_open",
+               [FSYNC] = "ext4_sync_file",
+       }},
+       [NFS] = { "nfs", {
+               [READ] = "nfs_file_read",
+               [WRITE] = "nfs_file_write",
+               [OPEN] = "nfs_file_open",
+               [FSYNC] = "nfs_file_fsync",
+       }},
+       [XFS] = { "xfs", {
+               [READ] = "xfs_file_read_iter",
+               [WRITE] = "xfs_file_write_iter",
+               [OPEN] = "xfs_file_open",
+               [FSYNC] = "xfs_file_fsync",
+       }},
+};
+
+static char file_op[] = {
+       [READ] = 'R',
+       [WRITE] = 'W',
+       [OPEN] = 'O',
+       [FSYNC] = 'F',
+};
+
+static volatile sig_atomic_t exiting;
+
+/* options */
+static enum fs_type fs_type = NONE;
+static pid_t target_pid = 0;
+static time_t duration = 0;
+static __u64 min_lat_ms = 10;
+static bool csv = false;
+static bool verbose = false;
+
+const char *argp_program_version = "fsslower 0.1";
+const char *argp_program_bug_address =
+       "https://github.com/iovisor/bcc/tree/master/libbpf-tools";
+const char argp_program_doc[] =
+"Trace file system operations slower than a threshold.\n"
+"\n"
+"Usage: fsslower [-h] [-t FS] [-p PID] [-m MIN] [-d DURATION] [-c]\n"
+"\n"
+"EXAMPLES:\n"
+"    fsslower -t ext4             # trace ext4 operations slower than 10 ms\n"
+"    fsslower -t nfs -p 1216      # trace nfs operations with PID 1216 only\n"
+"    fsslower -t xfs -c -d 1      # trace xfs operations for 1s with csv output\n";
+
+static const struct argp_option opts[] = {
+       { "csv", 'c', NULL, 0, "Output as csv" },
+       { "duration", 'd', "DURATION", 0, "Total duration of trace in seconds" },
+       { "pid", 'p', "PID", 0, "Process ID to trace" },
+       { "min", 'm', "MIN", 0, "Min latency to trace, in ms (default 10)" },
+       { "type", 't', "Filesystem", 0, "Which filesystem to trace, [btrfs/ext4/nfs/xfs]" },
+       { "verbose", 'v', NULL, 0, "Verbose debug output" },
+       { NULL, 'h', NULL, OPTION_HIDDEN, "Show the full help" },
+       {},
+};
+
+static error_t parse_arg(int key, char *arg, struct argp_state *state)
+{
+       switch (key) {
+       case 'v':
+               verbose = true;
+               break;
+       case 'c':
+               csv = true;
+               break;
+       case 'd':
+               errno = 0;
+               duration = strtol(arg, NULL, 10);
+               if (errno || duration <= 0) {
+                       warn("invalid DURATION: %s\n", arg);
+                       argp_usage(state);
+               }
+               break;
+       case 'm':
+               errno = 0;
+               min_lat_ms = strtoll(arg, NULL, 10);
+               if (errno || min_lat_ms < 0) {
+                       warn("invalid latency (in ms): %s\n", arg);
+               }
+               break;
+       case 't':
+               if (!strcmp(arg, "btrfs")) {
+                       fs_type = BTRFS;
+               } else if (!strcmp(arg, "ext4")) {
+                       fs_type = EXT4;
+               } else if (!strcmp(arg, "nfs")) {
+                       fs_type = NFS;
+               } else if (!strcmp(arg, "xfs")) {
+                       fs_type = XFS;
+               } else {
+                       warn("invalid filesystem\n");
+                       argp_usage(state);
+               }
+               break;
+       case 'p':
+               errno = 0;
+               target_pid = strtol(arg, NULL, 10);
+               if (errno || target_pid <= 0) {
+                       warn("invalid PID: %s\n", arg);
+                       argp_usage(state);
+               }
+               break;
+       case 'h':
+               argp_state_help(state, stderr, ARGP_HELP_STD_HELP);
+               break;
+       default:
+               return ARGP_ERR_UNKNOWN;
+       }
+       return 0;
+}
+
+static void alias_parse(char *prog)
+{
+       char *name = basename(prog);
+
+       if (!strcmp(name, "btrfsslower")) {
+               fs_type = BTRFS;
+       } else if (!strcmp(name, "ext4slower")) {
+               fs_type = EXT4;
+       } else if (!strcmp(name, "nfsslower")) {
+               fs_type = NFS;
+       } else if (!strcmp(name, "xfsslower")) {
+               fs_type = XFS;
+       }
+}
+
+static int libbpf_print_fn(enum libbpf_print_level level,
+                          const char *format, va_list args)
+{
+       if (level == LIBBPF_DEBUG && !verbose)
+               return 0;
+       return vfprintf(stderr, format, args);
+}
+
+static void sig_handler(int sig)
+{
+       exiting = 1;
+}
+
+static bool check_fentry()
+{
+       int i;
+       const char *fn_name, *module;
+       bool support_fentry = true;
+
+       for (i = 0; i < MAX_OP; i++) {
+               fn_name = fs_configs[fs_type].op_funcs[i];
+               module = fs_configs[fs_type].fs;
+               if (fn_name && !fentry_exists(fn_name, NULL)
+                   && !fentry_exists(fn_name, module)) {
+                       support_fentry = false;
+                       break;
+               }
+       }
+       return support_fentry;
+}
+
+static int fentry_set_attach_target(struct fsslower_bpf *obj)
+{
+       struct fs_config *cfg = &fs_configs[fs_type];
+       int err = 0;
+
+       err = err ?: bpf_program__set_attach_target(obj->progs.file_read_fentry, 0, cfg->op_funcs[READ]);
+       err = err ?: bpf_program__set_attach_target(obj->progs.file_read_fexit, 0, cfg->op_funcs[READ]);
+       err = err ?: bpf_program__set_attach_target(obj->progs.file_write_fentry, 0, cfg->op_funcs[WRITE]);
+       err = err ?: bpf_program__set_attach_target(obj->progs.file_write_fexit, 0, cfg->op_funcs[WRITE]);
+       err = err ?: bpf_program__set_attach_target(obj->progs.file_open_fentry, 0, cfg->op_funcs[OPEN]);
+       err = err ?: bpf_program__set_attach_target(obj->progs.file_open_fexit, 0, cfg->op_funcs[OPEN]);
+       err = err ?: bpf_program__set_attach_target(obj->progs.file_sync_fentry, 0, cfg->op_funcs[FSYNC]);
+       err = err ?: bpf_program__set_attach_target(obj->progs.file_sync_fexit, 0, cfg->op_funcs[FSYNC]);
+       return err;
+}
+
+static void disable_fentry(struct fsslower_bpf *obj)
+{
+       bpf_program__set_autoload(obj->progs.file_read_fentry, false);
+       bpf_program__set_autoload(obj->progs.file_read_fexit, false);
+       bpf_program__set_autoload(obj->progs.file_write_fentry, false);
+       bpf_program__set_autoload(obj->progs.file_write_fexit, false);
+       bpf_program__set_autoload(obj->progs.file_open_fentry, false);
+       bpf_program__set_autoload(obj->progs.file_open_fexit, false);
+       bpf_program__set_autoload(obj->progs.file_sync_fentry, false);
+       bpf_program__set_autoload(obj->progs.file_sync_fexit, false);
+}
+
+static void disable_kprobes(struct fsslower_bpf *obj)
+{
+       bpf_program__set_autoload(obj->progs.file_read_entry, false);
+       bpf_program__set_autoload(obj->progs.file_read_exit, false);
+       bpf_program__set_autoload(obj->progs.file_write_entry, false);
+       bpf_program__set_autoload(obj->progs.file_write_exit, false);
+       bpf_program__set_autoload(obj->progs.file_open_entry, false);
+       bpf_program__set_autoload(obj->progs.file_open_exit, false);
+       bpf_program__set_autoload(obj->progs.file_sync_entry, false);
+       bpf_program__set_autoload(obj->progs.file_sync_exit, false);
+}
+
+static int attach_kprobes(struct fsslower_bpf *obj)
+{
+       long err = 0;
+       struct fs_config *cfg = &fs_configs[fs_type];
+
+       /* READ */
+       obj->links.file_read_entry = bpf_program__attach_kprobe(obj->progs.file_read_entry, false, cfg->op_funcs[READ]);
+       err = libbpf_get_error(obj->links.file_read_entry);
+       if (err)
+               goto errout;
+       obj->links.file_read_exit = bpf_program__attach_kprobe(obj->progs.file_read_exit, true, cfg->op_funcs[READ]);
+       err = libbpf_get_error(obj->links.file_read_exit);
+       if (err)
+               goto errout;
+       /* WRITE */
+       obj->links.file_write_entry = bpf_program__attach_kprobe(obj->progs.file_write_entry, false, cfg->op_funcs[WRITE]);
+       err = libbpf_get_error(obj->links.file_write_entry);
+       if (err)
+               goto errout;
+       obj->links.file_write_exit = bpf_program__attach_kprobe(obj->progs.file_write_exit, true, cfg->op_funcs[WRITE]);
+       err = libbpf_get_error(obj->links.file_write_exit);
+       if (err)
+               goto errout;
+       /* OPEN */
+       obj->links.file_open_entry = bpf_program__attach_kprobe(obj->progs.file_open_entry, false, cfg->op_funcs[OPEN]);
+       err = libbpf_get_error(obj->links.file_open_entry);
+       if (err)
+               goto errout;
+       obj->links.file_open_exit = bpf_program__attach_kprobe(obj->progs.file_open_exit, true, cfg->op_funcs[OPEN]);
+       err = libbpf_get_error(obj->links.file_open_exit);
+       if (err)
+               goto errout;
+       /* FSYNC */
+       obj->links.file_sync_entry = bpf_program__attach_kprobe(obj->progs.file_sync_entry, false, cfg->op_funcs[FSYNC]);
+       err = libbpf_get_error(obj->links.file_sync_entry);
+       if (err)
+               goto errout;
+       obj->links.file_sync_exit = bpf_program__attach_kprobe(obj->progs.file_sync_exit, true, cfg->op_funcs[FSYNC]);
+       err = libbpf_get_error(obj->links.file_sync_exit);
+       if (err)
+               goto errout;
+       return 0;
+
+errout:
+       warn("failed to attach kprobe: %ld\n", err);
+       return err;
+}
+
+static void print_headers()
+{
+       const char *fs = fs_configs[fs_type].fs;
+
+       if (csv) {
+               printf("ENDTIME_ns,TASK,PID,TYPE,BYTES,OFFSET_b,LATENCY_us,FILE\n");
+               return;
+       }
+
+       if (min_lat_ms)
+               printf("Tracing %s operations slower than %llu ms", fs, min_lat_ms);
+       else
+               printf("Tracing %s operations", fs);
+
+       if (duration)
+               printf(" for %ld secs.\n", duration);
+       else
+               printf("... Hit Ctrl-C to end.\n");
+
+       printf("%-8s %-16s %-7s %1s %-7s %-8s %7s %s\n",
+              "TIME", "COMM", "PID", "T", "BYTES", "OFF_KB", "LAT(ms)", "FILENAME");
+}
+
+static void handle_event(void *ctx, int cpu, void *data, __u32 data_sz)
+{
+       const struct event *e = data;
+       struct tm *tm;
+       char ts[32];
+       time_t t;
+
+       if (csv) {
+               printf("%lld,%s,%d,%c,", e->end_ns, e->task, e->pid, file_op[e->op]);
+               if (e->size == LLONG_MAX)
+                       printf("LL_MAX,");
+               else
+                       printf("%ld,", e->size);
+               printf("%lld,%lld,%s\n", e->offset, e->delta_us, e->file);
+               return;
+       }
+
+       time(&t);
+       tm = localtime(&t);
+       strftime(ts, sizeof(ts), "%H:%M:%S", tm);
+
+       printf("%-8s %-16s %-7d %c ", ts, e->task, e->pid, file_op[e->op]);
+       if (e->size == LLONG_MAX)
+               printf("%-7s ", "LL_MAX");
+       else
+               printf("%-7ld ", e->size);
+       printf("%-8lld %7.2f %s\n", e->offset / 1024, (double)e->delta_us / 1000, e->file);
+}
+
+static void handle_lost_events(void *ctx, int cpu, __u64 lost_cnt)
+{
+       warn("lost %llu events on CPU #%d\n", lost_cnt, cpu);
+}
+
+int main(int argc, char **argv)
+{
+       static const struct argp argp = {
+               .options = opts,
+               .parser = parse_arg,
+               .doc = argp_program_doc,
+       };
+       struct perf_buffer_opts pb_opts;
+       struct perf_buffer *pb = NULL;
+       struct fsslower_bpf *skel;
+       __u64 time_end = 0;
+       int err;
+       bool support_fentry;
+
+       alias_parse(argv[0]);
+       err = argp_parse(&argp, argc, argv, 0, NULL, NULL);
+       if (err)
+               return err;
+       if (fs_type == NONE) {
+               warn("filesystem must be specified using -t option.\n");
+               return 1;
+       }
+
+       libbpf_set_print(libbpf_print_fn);
+
+       err = bump_memlock_rlimit();
+       if (err) {
+               warn("failed to increase rlimit: %d\n", err);
+               return 1;
+       }
+
+       skel = fsslower_bpf__open();
+       if (!skel) {
+               warn("failed to open BPF object\n");
+               return 1;
+       }
+
+       skel->rodata->target_pid = target_pid;
+       skel->rodata->min_lat_ns = min_lat_ms * 1000 * 1000;
+
+       /*
+        * before load
+        * if fentry is supported, we set attach target and disable kprobes
+        * otherwise, we disable fentry and attach kprobes after loading
+        */
+       support_fentry = check_fentry();
+       if (support_fentry) {
+               err = fentry_set_attach_target(skel);
+               if (err) {
+                       warn("failed to set attach target: %d\n", err);
+                       goto cleanup;
+               }
+               disable_kprobes(skel);
+       } else {
+               disable_fentry(skel);
+       }
+
+       err = fsslower_bpf__load(skel);
+       if (err) {
+               warn("failed to load BPF object: %d\n", err);
+               goto cleanup;
+       }
+
+       /*
+        * after load
+        * if fentry is supported, let libbpf do auto load
+        * otherwise, we attach to kprobes manually
+        */
+       err = support_fentry ? fsslower_bpf__attach(skel) : attach_kprobes(skel);
+       if (err) {
+               warn("failed to attach BPF programs: %d\n", err);
+               goto cleanup;
+       }
+
+       signal(SIGINT, sig_handler);
+
+       pb_opts.sample_cb = handle_event;
+       pb_opts.lost_cb = handle_lost_events;
+       pb = perf_buffer__new(bpf_map__fd(skel->maps.events), PERF_BUFFER_PAGES,
+                             &pb_opts);
+       err = libbpf_get_error(pb);
+       if (err) {
+               pb = NULL;
+               warn("failed to open perf buffer: %d\n", err);
+               goto cleanup;
+       }
+
+       print_headers();
+
+       if (duration)
+               time_end = get_ktime_ns() + duration * NSEC_PER_SEC;
+
+       while (1) {
+               if ((err = perf_buffer__poll(pb, PERF_POLL_TIMEOUT_MS)) < 0)
+                       break;
+               if (duration && get_ktime_ns() > time_end)
+                       goto cleanup;
+       }
+       warn("failed with polling perf buffer: %d\n", err);
+
+cleanup:
+       perf_buffer__free(pb);
+       fsslower_bpf__destroy(skel);
+
+       return err != 0;
+}
diff --git a/libbpf-tools/fsslower.h b/libbpf-tools/fsslower.h
new file mode 100644 (file)
index 0000000..5c5ec4b
--- /dev/null
@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
+#ifndef __FSSLOWER_H
+#define __FSSLOWER_H
+
+#define FILE_NAME_LEN  32
+#define TASK_COMM_LEN  16
+
+enum fs_file_op {
+       READ,
+       WRITE,
+       OPEN,
+       FSYNC,
+       MAX_OP,
+};
+
+struct event {
+       __u64 delta_us;
+       __u64 end_ns;
+       __s64 offset;
+       ssize_t size;
+       pid_t pid;
+       enum fs_file_op op;
+       char file[FILE_NAME_LEN];
+       char task[TASK_COMM_LEN];
+};
+
+#endif /* __FSSLOWER_H */
diff --git a/libbpf-tools/xfsslower.bpf.c b/libbpf-tools/xfsslower.bpf.c
deleted file mode 100644 (file)
index 05962f4..0000000
+++ /dev/null
@@ -1,158 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-// Copyright (c) 2020 Wenbo Zhang
-#include <vmlinux.h>
-#include <bpf/bpf_helpers.h>
-#include <bpf/bpf_core_read.h>
-#include <bpf/bpf_tracing.h>
-#include "xfsslower.h"
-
-const volatile pid_t targ_tgid = 0;
-const volatile __u64 min_lat = 0;
-
-struct piddata {
-       u64 ts;
-       loff_t start;
-       loff_t end;
-       struct file *fp;
-};
-
-struct {
-       __uint(type, BPF_MAP_TYPE_HASH);
-       __uint(max_entries, 8192);
-       __type(key, u32);
-       __type(value, struct piddata);
-} start SEC(".maps");
-
-struct {
-       __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
-       __uint(key_size, sizeof(u32));
-       __uint(value_size, sizeof(u32));
-} events SEC(".maps");
-
-static __always_inline int
-probe_entry(struct file *fp, loff_t s, loff_t e)
-{
-       u64 id = bpf_get_current_pid_tgid();
-       struct piddata piddata;
-       u32 tgid = id >> 32;
-       u32 pid = id;
-
-       if (!fp)
-               return 0;
-       if (targ_tgid && targ_tgid != tgid)
-               return 0;
-
-       piddata.ts = bpf_ktime_get_ns();
-       piddata.start = s;
-       piddata.end = e;
-       piddata.fp = fp;
-       bpf_map_update_elem(&start, &pid, &piddata, 0);
-       return 0;
-}
-
-SEC("kprobe/xfs_file_read_iter")
-int BPF_KPROBE(xfs_file_read_iter, struct kiocb *iocb)
-{
-       struct file *fp = BPF_CORE_READ(iocb, ki_filp);
-       loff_t start = BPF_CORE_READ(iocb, ki_pos);
-
-       return probe_entry(fp, start, 0);
-}
-
-SEC("kprobe/xfs_file_write_iter")
-int BPF_KPROBE(xfs_file_write_iter, struct kiocb *iocb)
-{
-       struct file *fp = BPF_CORE_READ(iocb, ki_filp);
-       loff_t start = BPF_CORE_READ(iocb, ki_pos);
-
-       return probe_entry(fp, start, 0);
-}
-
-SEC("kprobe/xfs_file_open")
-int BPF_KPROBE(xfs_file_open, struct inode *inode, struct file *file)
-{
-       return probe_entry(file, 0, 0);
-}
-
-SEC("kprobe/xfs_file_fsync")
-int BPF_KPROBE(xfs_file_fsync, struct file *file, loff_t start,
-              loff_t end)
-{
-       return probe_entry(file, start, end);
-}
-
-static __always_inline int
-probe_exit(struct pt_regs *ctx, char type, ssize_t size)
-{
-       u64 id = bpf_get_current_pid_tgid();
-       u64 end_ns = bpf_ktime_get_ns();
-       struct piddata *piddatap;
-       struct event event = {};
-       struct dentry *dentry;
-       const u8 *qs_name_ptr;
-       u32 tgid = id >> 32;
-       struct file *fp;
-       u32 pid = id;
-       u64 delta_us;
-       u32 qs_len;
-
-       if (targ_tgid && targ_tgid != tgid)
-               return 0;
-
-       piddatap = bpf_map_lookup_elem(&start, &pid);
-       if (!piddatap)
-               return 0;    /* missed entry */
-
-       delta_us = (end_ns - piddatap->ts) / 1000;
-       bpf_map_delete_elem(&start, &pid);
-
-       if ((s64)delta_us < 0 || delta_us <= min_lat * 1000)
-               return 0;
-
-       fp = piddatap->fp;
-       dentry = BPF_CORE_READ(fp, f_path.dentry);
-       qs_len = BPF_CORE_READ(dentry, d_name.len);
-       qs_name_ptr = BPF_CORE_READ(dentry, d_name.name);
-       bpf_probe_read_kernel_str(&event.file, sizeof(event.file), qs_name_ptr);
-       bpf_get_current_comm(&event.task, sizeof(event.task));
-       event.delta_us = delta_us;
-       event.end_ns = end_ns;
-       event.offset = piddatap->start;
-       if (type != TRACE_FSYNC)
-               event.size = size;
-       else
-               event.size = piddatap->end - piddatap->start;
-       event.type = type;
-       event.tgid = tgid;
-
-       /* output */
-       bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU,
-                             &event, sizeof(event));
-       return 0;
-}
-
-SEC("kretprobe/xfs_file_read_iter")
-int BPF_KRETPROBE(xfs_file_read_iters_ret, ssize_t ret)
-{
-       return probe_exit(ctx, TRACE_READ, ret);
-}
-
-SEC("kretprobe/xfs_file_write_iter")
-int BPF_KRETPROBE(xfs_file_write_iter_ret, ssize_t ret)
-{
-       return probe_exit(ctx, TRACE_WRITE, ret);
-}
-
-SEC("kretprobe/xfs_file_open")
-int BPF_KRETPROBE(xfs_file_open_ret)
-{
-       return probe_exit(ctx, TRACE_OPEN, 0);
-}
-
-SEC("kretprobe/xfs_file_fsync")
-int BPF_KRETPROBE(xfs_file_sync_ret)
-{
-       return probe_exit(ctx, TRACE_FSYNC, 0);
-}
-
-char LICENSE[] SEC("license") = "GPL";
diff --git a/libbpf-tools/xfsslower.c b/libbpf-tools/xfsslower.c
deleted file mode 100644 (file)
index 9f000d9..0000000
+++ /dev/null
@@ -1,240 +0,0 @@
-// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
-// Copyright (c) 2020 Wenbo Zhang
-//
-// Based on xfsslower(8) from BCC by Brendan Gregg & Dina Goldshtein.
-// 9-Mar-2020   Wenbo Zhang   Created this.
-#include <argp.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <time.h>
-#include <unistd.h>
-#include <bpf/libbpf.h>
-#include <bpf/bpf.h>
-#include "xfsslower.h"
-#include "xfsslower.skel.h"
-#include "trace_helpers.h"
-
-#define PERF_BUFFER_PAGES      64
-#define PERF_BUFFER_TIME_MS    10
-#define PERF_POLL_TIMEOUT_MS   100
-
-static struct env {
-       pid_t pid;
-       time_t duration;
-       __u64 min_lat;
-       bool csv;
-       bool verbose;
-} env = {
-       .min_lat = 10000,
-};
-
-const char *argp_program_version = "xfsslower 0.1";
-const char *argp_program_bug_address =
-       "https://github.com/iovisor/bcc/tree/master/libbpf-tools";
-const char argp_program_doc[] =
-"Trace common XFS file operations slower than a threshold.\n"
-"\n"
-"Usage: xfslower [--help] [-p PID] [-m MIN] [-d DURATION] [-c]\n"
-"\n"
-"EXAMPLES:\n"
-"    xfsslower          # trace operations slower than 10 ms (default)"
-"    xfsslower 0        # trace all operations (warning: verbose)\n"
-"    xfsslower -p 123   # trace pid 123\n"
-"    xfsslower -c -d 1  # ... 1s, parsable output (csv)";
-
-static const struct argp_option opts[] = {
-       { "csv", 'c', NULL, 0, "Output as csv" },
-       { "duration", 'd', "DURATION", 0, "Total duration of trace in seconds" },
-       { "pid", 'p', "PID", 0, "Process PID to trace" },
-       { "min", 'm', "MIN", 0, "Min latency of trace in ms (default 10)" },
-       { "verbose", 'v', NULL, 0, "Verbose debug output" },
-       { NULL, 'h', NULL, OPTION_HIDDEN, "Show the full help" },
-       {},
-};
-
-static error_t parse_arg(int key, char *arg, struct argp_state *state)
-{
-       long long min_lat;
-       time_t duration;
-       int pid;
-
-       switch (key) {
-       case 'h':
-               argp_state_help(state, stderr, ARGP_HELP_STD_HELP);
-               break;
-       case 'v':
-               env.verbose = true;
-               break;
-       case 'c':
-               env.csv = true;
-               break;
-       case 'd':
-               errno = 0;
-               duration = strtol(arg, NULL, 10);
-               if (errno || duration <= 0) {
-                       fprintf(stderr, "invalid DURATION: %s\n", arg);
-                       argp_usage(state);
-               }
-               env.duration = duration;
-               break;
-       case 'm':
-               errno = 0;
-               min_lat = strtoll(arg, NULL, 10);
-               if (errno || min_lat < 0) {
-                       fprintf(stderr, "invalid delay (in ms): %s\n", arg);
-               }
-               env.min_lat = min_lat;
-               break;
-       case 'p':
-               errno = 0;
-               pid = strtol(arg, NULL, 10);
-               if (errno || pid <= 0) {
-                       fprintf(stderr, "invalid PID: %s\n", arg);
-                       argp_usage(state);
-               }
-               env.pid = pid;
-               break;
-       default:
-               return ARGP_ERR_UNKNOWN;
-       }
-       return 0;
-}
-
-int libbpf_print_fn(enum libbpf_print_level level,
-                   const char *format, va_list args)
-{
-       if (level == LIBBPF_DEBUG && !env.verbose)
-               return 0;
-       return vfprintf(stderr, format, args);
-}
-
-void handle_event(void *ctx, int cpu, void *data, __u32 data_sz)
-{
-       const struct event *e = data;
-       struct tm *tm;
-       char ts[32];
-       time_t t;
-
-       time(&t);
-       tm = localtime(&t);
-       strftime(ts, sizeof(ts), "%H:%M:%S", tm);
-
-       if (env.csv) {
-               printf("%lld,%s,%d,%c,", e->end_ns, e->task, e->tgid, e->type);
-               if (e->size == LLONG_MAX)
-                       printf("LL_MAX,");
-               else
-                       printf("%ld,", e->size);
-               printf("%lld,%lld,%s\n", e->offset, e->delta_us, e->file);
-       } else {
-               printf("%-8s %-14.14s %-6d %c ", ts, e->task, e->tgid, e->type);
-               if (e->size == LLONG_MAX)
-                       printf("%-7s ", "LL_MAX");
-               else
-                       printf("%-7ld ", e->size);
-               printf("%-8lld %7.2f %s\n", e->offset / 1024,
-                      (double)e->delta_us / 1000, e->file);
-       }
-}
-
-void handle_lost_events(void *ctx, int cpu, __u64 lost_cnt)
-{
-       fprintf(stderr, "lost %llu events on CPU #%d\n", lost_cnt, cpu);
-}
-
-int main(int argc, char **argv)
-{
-       static const struct argp argp = {
-               .options = opts,
-               .parser = parse_arg,
-               .doc = argp_program_doc,
-       };
-       struct perf_buffer_opts pb_opts;
-       struct perf_buffer *pb = NULL;
-       struct xfsslower_bpf *obj;
-       __u64 time_end = 0;
-       int err;
-
-       err = argp_parse(&argp, argc, argv, 0, NULL, NULL);
-       if (err)
-               return err;
-
-       libbpf_set_print(libbpf_print_fn);
-
-       err = bump_memlock_rlimit();
-       if (err) {
-               fprintf(stderr, "failed to increase rlimit: %d\n", err);
-               return 1;
-       }
-
-       obj = xfsslower_bpf__open();
-       if (!obj) {
-               fprintf(stderr, "failed to open BPF object\n");
-               return 1;
-       }
-
-       /* initialize global data (filtering options) */
-       obj->rodata->min_lat = env.min_lat;
-       obj->rodata->targ_tgid = env.pid;
-
-       err = xfsslower_bpf__load(obj);
-       if (err) {
-               fprintf(stderr, "failed to load BPF object: %d\n", err);
-               goto cleanup;
-       }
-
-       err = xfsslower_bpf__attach(obj);
-       if (err) {
-               fprintf(stderr, "failed to attach BPF programs\n");
-               goto cleanup;
-       }
-
-       if (env.csv)
-               printf("ENDTIME_us,TASK,PID,TYPE,BYTES,OFFSET_b,LATENCY_us,FILE");
-       else {
-               if (env.min_lat)
-                       printf("Tracing XFS operations slower than %llu ms",
-                               env.min_lat);
-               else
-                       printf("Tracing XFS operations");
-               if (env.duration)
-                       printf(" for %ld secs.\n", env.duration);
-               else
-                       printf("... Hit Ctrl-C to end.\n");
-               printf("%-8s %-14s %-6s %1s %-7s %-8s %7s %s",
-                       "TIME", "COMM", "PID", "T", "BYTES", "OFF_KB", "LAT(ms)",
-                       "FILENAME\n");
-       }
-
-       pb_opts.sample_cb = handle_event;
-       pb_opts.lost_cb = handle_lost_events;
-       pb = perf_buffer__new(bpf_map__fd(obj->maps.events), PERF_BUFFER_PAGES,
-                             &pb_opts);
-       err = libbpf_get_error(pb);
-       if (err) {
-               pb = NULL;
-               fprintf(stderr, "failed to open perf buffer: %d\n", err);
-               goto cleanup;
-       }
-
-       /* setup duration */
-       if (env.duration)
-               time_end = get_ktime_ns() + env.duration * NSEC_PER_SEC;
-
-       /* main: poll */
-       while (1) {
-               usleep(PERF_BUFFER_TIME_MS * 1000);
-               if ((err = perf_buffer__poll(pb, PERF_POLL_TIMEOUT_MS)) < 0)
-                       break;
-               if (env.duration && get_ktime_ns() > time_end)
-                       goto cleanup;
-       }
-       fprintf(stderr, "failed with polling perf buffer: %d\n", err);
-
-cleanup:
-       perf_buffer__free(pb);
-       xfsslower_bpf__destroy(obj);
-
-       return err != 0;
-}
diff --git a/libbpf-tools/xfsslower.h b/libbpf-tools/xfsslower.h
deleted file mode 100644 (file)
index 16db77e..0000000
+++ /dev/null
@@ -1,24 +0,0 @@
-/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
-#ifndef __XFSSLOWER_H
-#define __XFSSLOWER_H
-
-#define DNAME_INLINE_LEN       32
-#define TASK_COMM_LEN          16
-
-#define TRACE_READ   'R'
-#define TRACE_WRITE  'W'
-#define TRACE_OPEN   'O'
-#define TRACE_FSYNC  'F'
-
-struct event {
-       char file[DNAME_INLINE_LEN];
-       char task[TASK_COMM_LEN];
-       __u64 delta_us;
-       __u64 end_ns;
-       __s64 offset;
-       ssize_t size;
-       pid_t tgid;
-       char type;
-};
-
-#endif /* __DRSNOOP_H */