From 9e38ee193b376fbada6ed68534329f6ed8848caf Mon Sep 17 00:00:00 2001 From: Hengqi Chen Date: Sat, 5 Jun 2021 10:44:46 +0800 Subject: [PATCH] libbpf-tools: migrate xfsslower to fsslower This commit migrates xfsslower to a generic fsslower which supports tracing multiple file systems. It works the same way as the original tool except that the users are supposed to specify which file systems to trace using -t option. sudo ./fsslower -t ext4 -m 1 Tracing ext4 operations slower than 1 ms... Hit Ctrl-C to end. TIME COMM PID T BYTES OFF_KB LAT(ms) FILENAME 10:36:07 code 6896 F LL_MAX 0 2.40 state.vscdb-journal 10:36:07 code 6896 F LL_MAX 0 1.74 state.vscdb-journal 10:36:07 code 6896 F LL_MAX 0 1.78 state.vscdb Signed-off-by: Hengqi Chen --- libbpf-tools/.gitignore | 4 + libbpf-tools/Makefile | 17 +- libbpf-tools/fsslower.bpf.c | 208 ++++++++++++++++ libbpf-tools/fsslower.c | 464 +++++++++++++++++++++++++++++++++++ libbpf-tools/fsslower.h | 27 ++ libbpf-tools/xfsslower.bpf.c | 158 ------------ libbpf-tools/xfsslower.c | 240 ------------------ libbpf-tools/xfsslower.h | 24 -- 8 files changed, 716 insertions(+), 426 deletions(-) create mode 100644 libbpf-tools/fsslower.bpf.c create mode 100644 libbpf-tools/fsslower.c create mode 100644 libbpf-tools/fsslower.h delete mode 100644 libbpf-tools/xfsslower.bpf.c delete mode 100644 libbpf-tools/xfsslower.c delete mode 100644 libbpf-tools/xfsslower.h diff --git a/libbpf-tools/.gitignore b/libbpf-tools/.gitignore index b5ff0eff..a345b351 100644 --- a/libbpf-tools/.gitignore +++ b/libbpf-tools/.gitignore @@ -4,18 +4,22 @@ /biosnoop /biostacks /bitesize +/btrfsslower /cachestat /cpudist /cpufreq /drsnoop /execsnoop /ext4dist +/ext4slower /filelife /fsdist +/fsslower /funclatency /gethostlatency /hardirqs /llcstat +/nfsslower /numamove /offcputime /opensnoop diff --git a/libbpf-tools/Makefile b/libbpf-tools/Makefile index fae9aa34..1f2dddcf 100644 --- a/libbpf-tools/Makefile +++ b/libbpf-tools/Makefile @@ -29,6 +29,7 @@ APPS = \ ext4dist \ filelife \ fsdist \ + fsslower \ funclatency \ gethostlatency \ hardirqs \ @@ -46,9 +47,12 @@ APPS = \ tcpconnect \ tcpconnlat \ vfsstat \ - xfsslower \ # +FSSLOWER_ALIASES = btrfsslower ext4slower nfsslower xfsslower + +APP_ALIASES = $(FSSLOWER_ALIASES) + COMMON_OBJ = \ $(OUTPUT)/trace_helpers.o \ $(OUTPUT)/syscall_helpers.o \ @@ -58,7 +62,7 @@ COMMON_OBJ = \ # .PHONY: all -all: $(APPS) +all: $(APPS) $(APP_ALIASES) ifeq ($(V),1) Q = @@ -72,7 +76,7 @@ endif .PHONY: clean clean: $(call msg,CLEAN) - $(Q)rm -rf $(OUTPUT) $(APPS) + $(Q)rm -rf $(OUTPUT) $(APPS) $(APP_ALIASES) $(OUTPUT) $(OUTPUT)/libbpf: $(call msg,MKDIR,$@) @@ -106,10 +110,15 @@ $(LIBBPF_OBJ): $(wildcard $(LIBBPF_SRC)/*.[ch]) | $(OUTPUT)/libbpf INCLUDEDIR= LIBDIR= UAPIDIR= \ install -install: $(APPS) +$(FSSLOWER_ALIASES): fsslower + $(call msg,SYMLINK,$@) + $(Q)ln -s $^ $@ + +install: $(APPS) $(APP_ALIASES) $(call msg, INSTALL libbpf-tools) $(Q)$(INSTALL) -m 0755 -d $(DESTDIR)$(prefix)/bin $(Q)$(INSTALL) $(APPS) $(DESTDIR)$(prefix)/bin + $(Q)cp -a $(APP_ALIASES) $(DESTDIR)$(prefix)/bin # delete failed targets .DELETE_ON_ERROR: diff --git a/libbpf-tools/fsslower.bpf.c b/libbpf-tools/fsslower.bpf.c new file mode 100644 index 00000000..cd6b66c2 --- /dev/null +++ b/libbpf-tools/fsslower.bpf.c @@ -0,0 +1,208 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2020 Wenbo Zhang */ +#include +#include +#include +#include +#include "bits.bpf.h" +#include "fsslower.h" + +#define MAX_ENTRIES 8192 + +const volatile pid_t target_pid = 0; +const volatile __u64 min_lat_ns = 0; + +struct data { + __u64 ts; + loff_t start; + loff_t end; + struct file *fp; +}; + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, MAX_ENTRIES); + __type(key, __u32); + __type(value, struct data); +} starts SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY); + __uint(key_size, sizeof(__u32)); + __uint(value_size, sizeof(__u32)); +} events SEC(".maps"); + +static int probe_entry(struct file *fp, loff_t start, loff_t end) +{ + __u64 pid_tgid = bpf_get_current_pid_tgid(); + __u32 pid = pid_tgid >> 32; + __u32 tid = (__u32)pid_tgid; + struct data data; + + if (!fp) + return 0; + + if (target_pid && target_pid != pid) + return 0; + + data.ts = bpf_ktime_get_ns(); + data.start = start; + data.end = end; + data.fp = fp; + bpf_map_update_elem(&starts, &tid, &data, BPF_ANY); + return 0; +} + +static int probe_exit(void *ctx, enum fs_file_op op, ssize_t size) +{ + __u64 pid_tgid = bpf_get_current_pid_tgid(); + __u32 pid = pid_tgid >> 32; + __u32 tid = (__u32)pid_tgid; + __u64 end_ns, delta_ns; + const __u8 *file_name; + struct data *datap; + struct event event = {}; + struct dentry *dentry; + struct file *fp; + + if (target_pid && target_pid != pid) + return 0; + + datap = bpf_map_lookup_elem(&starts, &tid); + if (!datap) + return 0; + + bpf_map_delete_elem(&starts, &tid); + + end_ns = bpf_ktime_get_ns(); + delta_ns = end_ns - datap->ts; + if (delta_ns <= min_lat_ns) + return 0; + + event.delta_us = delta_ns / 1000; + event.end_ns = end_ns; + event.offset = datap->start; + if (op != FSYNC) + event.size = size; + else + event.size = datap->end - datap->start; + event.pid = pid; + event.op = op; + fp = datap->fp; + dentry = BPF_CORE_READ(fp, f_path.dentry); + file_name = BPF_CORE_READ(dentry, d_name.name); + bpf_probe_read_kernel_str(&event.file, sizeof(event.file), file_name); + bpf_get_current_comm(&event.task, sizeof(event.task)); + bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, &event, sizeof(event)); + return 0; +} + +SEC("kprobe/dummy_file_read") +int BPF_KPROBE(file_read_entry, struct kiocb *iocb) +{ + struct file *fp = BPF_CORE_READ(iocb, ki_filp); + loff_t start = BPF_CORE_READ(iocb, ki_pos); + + return probe_entry(fp, start, 0); +} + +SEC("kretprobe/dummy_file_read") +int BPF_KRETPROBE(file_read_exit, ssize_t ret) +{ + return probe_exit(ctx, READ, ret); +} + +SEC("kprobe/dummy_file_write") +int BPF_KPROBE(file_write_entry, struct kiocb *iocb) +{ + struct file *fp = BPF_CORE_READ(iocb, ki_filp); + loff_t start = BPF_CORE_READ(iocb, ki_pos); + + return probe_entry(fp, start, 0); +} + +SEC("kretprobe/dummy_file_write") +int BPF_KRETPROBE(file_write_exit, ssize_t ret) +{ + return probe_exit(ctx, WRITE, ret); +} + +SEC("kprobe/dummy_file_open") +int BPF_KPROBE(file_open_entry, struct inode *inode, struct file *file) +{ + return probe_entry(file, 0, 0); +} + +SEC("kretprobe/dummy_file_open") +int BPF_KRETPROBE(file_open_exit) +{ + return probe_exit(ctx, OPEN, 0); +} + +SEC("kprobe/dummy_file_sync") +int BPF_KPROBE(file_sync_entry, struct file *file, loff_t start, loff_t end) +{ + return probe_entry(file, start, end); +} + +SEC("kretprobe/dummy_file_sync") +int BPF_KRETPROBE(file_sync_exit) +{ + return probe_exit(ctx, FSYNC, 0); +} + +SEC("fentry/dummy_file_read") +int BPF_PROG(file_read_fentry, struct kiocb *iocb) +{ + struct file *fp = iocb->ki_filp; + loff_t start = iocb->ki_pos; + + return probe_entry(fp, start, 0); +} + +SEC("fexit/dummy_file_read") +int BPF_PROG(file_read_fexit, struct kiocb *iocb, struct iov_iter *to, ssize_t ret) +{ + return probe_exit(ctx, READ, ret); +} + +SEC("fentry/dummy_file_write") +int BPF_PROG(file_write_fentry, struct kiocb *iocb) +{ + struct file *fp = iocb->ki_filp; + loff_t start = iocb->ki_pos; + + return probe_entry(fp, start, 0); +} + +SEC("fexit/dummy_file_write") +int BPF_PROG(file_write_fexit, struct kiocb *iocb, struct iov_iter *from, ssize_t ret) +{ + return probe_exit(ctx, WRITE, ret); +} + +SEC("fentry/dummy_file_open") +int BPF_PROG(file_open_fentry, struct inode *inode, struct file *file) +{ + return probe_entry(file, 0, 0); +} + +SEC("fexit/dummy_file_open") +int BPF_PROG(file_open_fexit) +{ + return probe_exit(ctx, OPEN, 0); +} + +SEC("fentry/dummy_file_sync") +int BPF_PROG(file_sync_fentry, struct file *file, loff_t start, loff_t end) +{ + return probe_entry(file, start, end); +} + +SEC("fexit/dummy_file_sync") +int BPF_PROG(file_sync_fexit) +{ + return probe_exit(ctx, FSYNC, 0); +} + +char LICENSE[] SEC("license") = "GPL"; diff --git a/libbpf-tools/fsslower.c b/libbpf-tools/fsslower.c new file mode 100644 index 00000000..b21cd7f0 --- /dev/null +++ b/libbpf-tools/fsslower.c @@ -0,0 +1,464 @@ +/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ + +/* + * fsslower Trace file system operations slower than a threshold. + * + * Copyright (c) 2020 Wenbo Zhang + * Copyright (c) 2021 Hengqi Chen + * + * Based on xfsslower(8) from BCC by Brendan Gregg & Dina Goldshtein. + * 9-Mar-2020 Wenbo Zhang Created this. + * 27-May-2021 Hengqi Chen Migrated to fsslower. + */ +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "fsslower.h" +#include "fsslower.skel.h" +#include "trace_helpers.h" + +#define PERF_BUFFER_PAGES 64 +#define PERF_POLL_TIMEOUT_MS 100 + +#define warn(...) fprintf(stderr, __VA_ARGS__) + +enum fs_type { + NONE, + BTRFS, + EXT4, + NFS, + XFS, +}; + +static struct fs_config { + const char *fs; + const char *op_funcs[MAX_OP]; +} fs_configs[] = { + [BTRFS] = { "btrfs", { + [READ] = "btrfs_file_read_iter", + [WRITE] = "btrfs_file_write_iter", + [OPEN] = "btrfs_file_open", + [FSYNC] = "btrfs_sync_file", + }}, + [EXT4] = { "ext4", { + [READ] = "ext4_file_read_iter", + [WRITE] = "ext4_file_write_iter", + [OPEN] = "ext4_file_open", + [FSYNC] = "ext4_sync_file", + }}, + [NFS] = { "nfs", { + [READ] = "nfs_file_read", + [WRITE] = "nfs_file_write", + [OPEN] = "nfs_file_open", + [FSYNC] = "nfs_file_fsync", + }}, + [XFS] = { "xfs", { + [READ] = "xfs_file_read_iter", + [WRITE] = "xfs_file_write_iter", + [OPEN] = "xfs_file_open", + [FSYNC] = "xfs_file_fsync", + }}, +}; + +static char file_op[] = { + [READ] = 'R', + [WRITE] = 'W', + [OPEN] = 'O', + [FSYNC] = 'F', +}; + +static volatile sig_atomic_t exiting; + +/* options */ +static enum fs_type fs_type = NONE; +static pid_t target_pid = 0; +static time_t duration = 0; +static __u64 min_lat_ms = 10; +static bool csv = false; +static bool verbose = false; + +const char *argp_program_version = "fsslower 0.1"; +const char *argp_program_bug_address = + "https://github.com/iovisor/bcc/tree/master/libbpf-tools"; +const char argp_program_doc[] = +"Trace file system operations slower than a threshold.\n" +"\n" +"Usage: fsslower [-h] [-t FS] [-p PID] [-m MIN] [-d DURATION] [-c]\n" +"\n" +"EXAMPLES:\n" +" fsslower -t ext4 # trace ext4 operations slower than 10 ms\n" +" fsslower -t nfs -p 1216 # trace nfs operations with PID 1216 only\n" +" fsslower -t xfs -c -d 1 # trace xfs operations for 1s with csv output\n"; + +static const struct argp_option opts[] = { + { "csv", 'c', NULL, 0, "Output as csv" }, + { "duration", 'd', "DURATION", 0, "Total duration of trace in seconds" }, + { "pid", 'p', "PID", 0, "Process ID to trace" }, + { "min", 'm', "MIN", 0, "Min latency to trace, in ms (default 10)" }, + { "type", 't', "Filesystem", 0, "Which filesystem to trace, [btrfs/ext4/nfs/xfs]" }, + { "verbose", 'v', NULL, 0, "Verbose debug output" }, + { NULL, 'h', NULL, OPTION_HIDDEN, "Show the full help" }, + {}, +}; + +static error_t parse_arg(int key, char *arg, struct argp_state *state) +{ + switch (key) { + case 'v': + verbose = true; + break; + case 'c': + csv = true; + break; + case 'd': + errno = 0; + duration = strtol(arg, NULL, 10); + if (errno || duration <= 0) { + warn("invalid DURATION: %s\n", arg); + argp_usage(state); + } + break; + case 'm': + errno = 0; + min_lat_ms = strtoll(arg, NULL, 10); + if (errno || min_lat_ms < 0) { + warn("invalid latency (in ms): %s\n", arg); + } + break; + case 't': + if (!strcmp(arg, "btrfs")) { + fs_type = BTRFS; + } else if (!strcmp(arg, "ext4")) { + fs_type = EXT4; + } else if (!strcmp(arg, "nfs")) { + fs_type = NFS; + } else if (!strcmp(arg, "xfs")) { + fs_type = XFS; + } else { + warn("invalid filesystem\n"); + argp_usage(state); + } + break; + case 'p': + errno = 0; + target_pid = strtol(arg, NULL, 10); + if (errno || target_pid <= 0) { + warn("invalid PID: %s\n", arg); + argp_usage(state); + } + break; + case 'h': + argp_state_help(state, stderr, ARGP_HELP_STD_HELP); + break; + default: + return ARGP_ERR_UNKNOWN; + } + return 0; +} + +static void alias_parse(char *prog) +{ + char *name = basename(prog); + + if (!strcmp(name, "btrfsslower")) { + fs_type = BTRFS; + } else if (!strcmp(name, "ext4slower")) { + fs_type = EXT4; + } else if (!strcmp(name, "nfsslower")) { + fs_type = NFS; + } else if (!strcmp(name, "xfsslower")) { + fs_type = XFS; + } +} + +static int libbpf_print_fn(enum libbpf_print_level level, + const char *format, va_list args) +{ + if (level == LIBBPF_DEBUG && !verbose) + return 0; + return vfprintf(stderr, format, args); +} + +static void sig_handler(int sig) +{ + exiting = 1; +} + +static bool check_fentry() +{ + int i; + const char *fn_name, *module; + bool support_fentry = true; + + for (i = 0; i < MAX_OP; i++) { + fn_name = fs_configs[fs_type].op_funcs[i]; + module = fs_configs[fs_type].fs; + if (fn_name && !fentry_exists(fn_name, NULL) + && !fentry_exists(fn_name, module)) { + support_fentry = false; + break; + } + } + return support_fentry; +} + +static int fentry_set_attach_target(struct fsslower_bpf *obj) +{ + struct fs_config *cfg = &fs_configs[fs_type]; + int err = 0; + + err = err ?: bpf_program__set_attach_target(obj->progs.file_read_fentry, 0, cfg->op_funcs[READ]); + err = err ?: bpf_program__set_attach_target(obj->progs.file_read_fexit, 0, cfg->op_funcs[READ]); + err = err ?: bpf_program__set_attach_target(obj->progs.file_write_fentry, 0, cfg->op_funcs[WRITE]); + err = err ?: bpf_program__set_attach_target(obj->progs.file_write_fexit, 0, cfg->op_funcs[WRITE]); + err = err ?: bpf_program__set_attach_target(obj->progs.file_open_fentry, 0, cfg->op_funcs[OPEN]); + err = err ?: bpf_program__set_attach_target(obj->progs.file_open_fexit, 0, cfg->op_funcs[OPEN]); + err = err ?: bpf_program__set_attach_target(obj->progs.file_sync_fentry, 0, cfg->op_funcs[FSYNC]); + err = err ?: bpf_program__set_attach_target(obj->progs.file_sync_fexit, 0, cfg->op_funcs[FSYNC]); + return err; +} + +static void disable_fentry(struct fsslower_bpf *obj) +{ + bpf_program__set_autoload(obj->progs.file_read_fentry, false); + bpf_program__set_autoload(obj->progs.file_read_fexit, false); + bpf_program__set_autoload(obj->progs.file_write_fentry, false); + bpf_program__set_autoload(obj->progs.file_write_fexit, false); + bpf_program__set_autoload(obj->progs.file_open_fentry, false); + bpf_program__set_autoload(obj->progs.file_open_fexit, false); + bpf_program__set_autoload(obj->progs.file_sync_fentry, false); + bpf_program__set_autoload(obj->progs.file_sync_fexit, false); +} + +static void disable_kprobes(struct fsslower_bpf *obj) +{ + bpf_program__set_autoload(obj->progs.file_read_entry, false); + bpf_program__set_autoload(obj->progs.file_read_exit, false); + bpf_program__set_autoload(obj->progs.file_write_entry, false); + bpf_program__set_autoload(obj->progs.file_write_exit, false); + bpf_program__set_autoload(obj->progs.file_open_entry, false); + bpf_program__set_autoload(obj->progs.file_open_exit, false); + bpf_program__set_autoload(obj->progs.file_sync_entry, false); + bpf_program__set_autoload(obj->progs.file_sync_exit, false); +} + +static int attach_kprobes(struct fsslower_bpf *obj) +{ + long err = 0; + struct fs_config *cfg = &fs_configs[fs_type]; + + /* READ */ + obj->links.file_read_entry = bpf_program__attach_kprobe(obj->progs.file_read_entry, false, cfg->op_funcs[READ]); + err = libbpf_get_error(obj->links.file_read_entry); + if (err) + goto errout; + obj->links.file_read_exit = bpf_program__attach_kprobe(obj->progs.file_read_exit, true, cfg->op_funcs[READ]); + err = libbpf_get_error(obj->links.file_read_exit); + if (err) + goto errout; + /* WRITE */ + obj->links.file_write_entry = bpf_program__attach_kprobe(obj->progs.file_write_entry, false, cfg->op_funcs[WRITE]); + err = libbpf_get_error(obj->links.file_write_entry); + if (err) + goto errout; + obj->links.file_write_exit = bpf_program__attach_kprobe(obj->progs.file_write_exit, true, cfg->op_funcs[WRITE]); + err = libbpf_get_error(obj->links.file_write_exit); + if (err) + goto errout; + /* OPEN */ + obj->links.file_open_entry = bpf_program__attach_kprobe(obj->progs.file_open_entry, false, cfg->op_funcs[OPEN]); + err = libbpf_get_error(obj->links.file_open_entry); + if (err) + goto errout; + obj->links.file_open_exit = bpf_program__attach_kprobe(obj->progs.file_open_exit, true, cfg->op_funcs[OPEN]); + err = libbpf_get_error(obj->links.file_open_exit); + if (err) + goto errout; + /* FSYNC */ + obj->links.file_sync_entry = bpf_program__attach_kprobe(obj->progs.file_sync_entry, false, cfg->op_funcs[FSYNC]); + err = libbpf_get_error(obj->links.file_sync_entry); + if (err) + goto errout; + obj->links.file_sync_exit = bpf_program__attach_kprobe(obj->progs.file_sync_exit, true, cfg->op_funcs[FSYNC]); + err = libbpf_get_error(obj->links.file_sync_exit); + if (err) + goto errout; + return 0; + +errout: + warn("failed to attach kprobe: %ld\n", err); + return err; +} + +static void print_headers() +{ + const char *fs = fs_configs[fs_type].fs; + + if (csv) { + printf("ENDTIME_ns,TASK,PID,TYPE,BYTES,OFFSET_b,LATENCY_us,FILE\n"); + return; + } + + if (min_lat_ms) + printf("Tracing %s operations slower than %llu ms", fs, min_lat_ms); + else + printf("Tracing %s operations", fs); + + if (duration) + printf(" for %ld secs.\n", duration); + else + printf("... Hit Ctrl-C to end.\n"); + + printf("%-8s %-16s %-7s %1s %-7s %-8s %7s %s\n", + "TIME", "COMM", "PID", "T", "BYTES", "OFF_KB", "LAT(ms)", "FILENAME"); +} + +static void handle_event(void *ctx, int cpu, void *data, __u32 data_sz) +{ + const struct event *e = data; + struct tm *tm; + char ts[32]; + time_t t; + + if (csv) { + printf("%lld,%s,%d,%c,", e->end_ns, e->task, e->pid, file_op[e->op]); + if (e->size == LLONG_MAX) + printf("LL_MAX,"); + else + printf("%ld,", e->size); + printf("%lld,%lld,%s\n", e->offset, e->delta_us, e->file); + return; + } + + time(&t); + tm = localtime(&t); + strftime(ts, sizeof(ts), "%H:%M:%S", tm); + + printf("%-8s %-16s %-7d %c ", ts, e->task, e->pid, file_op[e->op]); + if (e->size == LLONG_MAX) + printf("%-7s ", "LL_MAX"); + else + printf("%-7ld ", e->size); + printf("%-8lld %7.2f %s\n", e->offset / 1024, (double)e->delta_us / 1000, e->file); +} + +static void handle_lost_events(void *ctx, int cpu, __u64 lost_cnt) +{ + warn("lost %llu events on CPU #%d\n", lost_cnt, cpu); +} + +int main(int argc, char **argv) +{ + static const struct argp argp = { + .options = opts, + .parser = parse_arg, + .doc = argp_program_doc, + }; + struct perf_buffer_opts pb_opts; + struct perf_buffer *pb = NULL; + struct fsslower_bpf *skel; + __u64 time_end = 0; + int err; + bool support_fentry; + + alias_parse(argv[0]); + err = argp_parse(&argp, argc, argv, 0, NULL, NULL); + if (err) + return err; + if (fs_type == NONE) { + warn("filesystem must be specified using -t option.\n"); + return 1; + } + + libbpf_set_print(libbpf_print_fn); + + err = bump_memlock_rlimit(); + if (err) { + warn("failed to increase rlimit: %d\n", err); + return 1; + } + + skel = fsslower_bpf__open(); + if (!skel) { + warn("failed to open BPF object\n"); + return 1; + } + + skel->rodata->target_pid = target_pid; + skel->rodata->min_lat_ns = min_lat_ms * 1000 * 1000; + + /* + * before load + * if fentry is supported, we set attach target and disable kprobes + * otherwise, we disable fentry and attach kprobes after loading + */ + support_fentry = check_fentry(); + if (support_fentry) { + err = fentry_set_attach_target(skel); + if (err) { + warn("failed to set attach target: %d\n", err); + goto cleanup; + } + disable_kprobes(skel); + } else { + disable_fentry(skel); + } + + err = fsslower_bpf__load(skel); + if (err) { + warn("failed to load BPF object: %d\n", err); + goto cleanup; + } + + /* + * after load + * if fentry is supported, let libbpf do auto load + * otherwise, we attach to kprobes manually + */ + err = support_fentry ? fsslower_bpf__attach(skel) : attach_kprobes(skel); + if (err) { + warn("failed to attach BPF programs: %d\n", err); + goto cleanup; + } + + signal(SIGINT, sig_handler); + + pb_opts.sample_cb = handle_event; + pb_opts.lost_cb = handle_lost_events; + pb = perf_buffer__new(bpf_map__fd(skel->maps.events), PERF_BUFFER_PAGES, + &pb_opts); + err = libbpf_get_error(pb); + if (err) { + pb = NULL; + warn("failed to open perf buffer: %d\n", err); + goto cleanup; + } + + print_headers(); + + if (duration) + time_end = get_ktime_ns() + duration * NSEC_PER_SEC; + + while (1) { + if ((err = perf_buffer__poll(pb, PERF_POLL_TIMEOUT_MS)) < 0) + break; + if (duration && get_ktime_ns() > time_end) + goto cleanup; + } + warn("failed with polling perf buffer: %d\n", err); + +cleanup: + perf_buffer__free(pb); + fsslower_bpf__destroy(skel); + + return err != 0; +} diff --git a/libbpf-tools/fsslower.h b/libbpf-tools/fsslower.h new file mode 100644 index 00000000..5c5ec4b9 --- /dev/null +++ b/libbpf-tools/fsslower.h @@ -0,0 +1,27 @@ +/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ +#ifndef __FSSLOWER_H +#define __FSSLOWER_H + +#define FILE_NAME_LEN 32 +#define TASK_COMM_LEN 16 + +enum fs_file_op { + READ, + WRITE, + OPEN, + FSYNC, + MAX_OP, +}; + +struct event { + __u64 delta_us; + __u64 end_ns; + __s64 offset; + ssize_t size; + pid_t pid; + enum fs_file_op op; + char file[FILE_NAME_LEN]; + char task[TASK_COMM_LEN]; +}; + +#endif /* __FSSLOWER_H */ diff --git a/libbpf-tools/xfsslower.bpf.c b/libbpf-tools/xfsslower.bpf.c deleted file mode 100644 index 05962f46..00000000 --- a/libbpf-tools/xfsslower.bpf.c +++ /dev/null @@ -1,158 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -// Copyright (c) 2020 Wenbo Zhang -#include -#include -#include -#include -#include "xfsslower.h" - -const volatile pid_t targ_tgid = 0; -const volatile __u64 min_lat = 0; - -struct piddata { - u64 ts; - loff_t start; - loff_t end; - struct file *fp; -}; - -struct { - __uint(type, BPF_MAP_TYPE_HASH); - __uint(max_entries, 8192); - __type(key, u32); - __type(value, struct piddata); -} start SEC(".maps"); - -struct { - __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY); - __uint(key_size, sizeof(u32)); - __uint(value_size, sizeof(u32)); -} events SEC(".maps"); - -static __always_inline int -probe_entry(struct file *fp, loff_t s, loff_t e) -{ - u64 id = bpf_get_current_pid_tgid(); - struct piddata piddata; - u32 tgid = id >> 32; - u32 pid = id; - - if (!fp) - return 0; - if (targ_tgid && targ_tgid != tgid) - return 0; - - piddata.ts = bpf_ktime_get_ns(); - piddata.start = s; - piddata.end = e; - piddata.fp = fp; - bpf_map_update_elem(&start, &pid, &piddata, 0); - return 0; -} - -SEC("kprobe/xfs_file_read_iter") -int BPF_KPROBE(xfs_file_read_iter, struct kiocb *iocb) -{ - struct file *fp = BPF_CORE_READ(iocb, ki_filp); - loff_t start = BPF_CORE_READ(iocb, ki_pos); - - return probe_entry(fp, start, 0); -} - -SEC("kprobe/xfs_file_write_iter") -int BPF_KPROBE(xfs_file_write_iter, struct kiocb *iocb) -{ - struct file *fp = BPF_CORE_READ(iocb, ki_filp); - loff_t start = BPF_CORE_READ(iocb, ki_pos); - - return probe_entry(fp, start, 0); -} - -SEC("kprobe/xfs_file_open") -int BPF_KPROBE(xfs_file_open, struct inode *inode, struct file *file) -{ - return probe_entry(file, 0, 0); -} - -SEC("kprobe/xfs_file_fsync") -int BPF_KPROBE(xfs_file_fsync, struct file *file, loff_t start, - loff_t end) -{ - return probe_entry(file, start, end); -} - -static __always_inline int -probe_exit(struct pt_regs *ctx, char type, ssize_t size) -{ - u64 id = bpf_get_current_pid_tgid(); - u64 end_ns = bpf_ktime_get_ns(); - struct piddata *piddatap; - struct event event = {}; - struct dentry *dentry; - const u8 *qs_name_ptr; - u32 tgid = id >> 32; - struct file *fp; - u32 pid = id; - u64 delta_us; - u32 qs_len; - - if (targ_tgid && targ_tgid != tgid) - return 0; - - piddatap = bpf_map_lookup_elem(&start, &pid); - if (!piddatap) - return 0; /* missed entry */ - - delta_us = (end_ns - piddatap->ts) / 1000; - bpf_map_delete_elem(&start, &pid); - - if ((s64)delta_us < 0 || delta_us <= min_lat * 1000) - return 0; - - fp = piddatap->fp; - dentry = BPF_CORE_READ(fp, f_path.dentry); - qs_len = BPF_CORE_READ(dentry, d_name.len); - qs_name_ptr = BPF_CORE_READ(dentry, d_name.name); - bpf_probe_read_kernel_str(&event.file, sizeof(event.file), qs_name_ptr); - bpf_get_current_comm(&event.task, sizeof(event.task)); - event.delta_us = delta_us; - event.end_ns = end_ns; - event.offset = piddatap->start; - if (type != TRACE_FSYNC) - event.size = size; - else - event.size = piddatap->end - piddatap->start; - event.type = type; - event.tgid = tgid; - - /* output */ - bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, - &event, sizeof(event)); - return 0; -} - -SEC("kretprobe/xfs_file_read_iter") -int BPF_KRETPROBE(xfs_file_read_iters_ret, ssize_t ret) -{ - return probe_exit(ctx, TRACE_READ, ret); -} - -SEC("kretprobe/xfs_file_write_iter") -int BPF_KRETPROBE(xfs_file_write_iter_ret, ssize_t ret) -{ - return probe_exit(ctx, TRACE_WRITE, ret); -} - -SEC("kretprobe/xfs_file_open") -int BPF_KRETPROBE(xfs_file_open_ret) -{ - return probe_exit(ctx, TRACE_OPEN, 0); -} - -SEC("kretprobe/xfs_file_fsync") -int BPF_KRETPROBE(xfs_file_sync_ret) -{ - return probe_exit(ctx, TRACE_FSYNC, 0); -} - -char LICENSE[] SEC("license") = "GPL"; diff --git a/libbpf-tools/xfsslower.c b/libbpf-tools/xfsslower.c deleted file mode 100644 index 9f000d95..00000000 --- a/libbpf-tools/xfsslower.c +++ /dev/null @@ -1,240 +0,0 @@ -// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) -// Copyright (c) 2020 Wenbo Zhang -// -// Based on xfsslower(8) from BCC by Brendan Gregg & Dina Goldshtein. -// 9-Mar-2020 Wenbo Zhang Created this. -#include -#include -#include -#include -#include -#include -#include -#include -#include "xfsslower.h" -#include "xfsslower.skel.h" -#include "trace_helpers.h" - -#define PERF_BUFFER_PAGES 64 -#define PERF_BUFFER_TIME_MS 10 -#define PERF_POLL_TIMEOUT_MS 100 - -static struct env { - pid_t pid; - time_t duration; - __u64 min_lat; - bool csv; - bool verbose; -} env = { - .min_lat = 10000, -}; - -const char *argp_program_version = "xfsslower 0.1"; -const char *argp_program_bug_address = - "https://github.com/iovisor/bcc/tree/master/libbpf-tools"; -const char argp_program_doc[] = -"Trace common XFS file operations slower than a threshold.\n" -"\n" -"Usage: xfslower [--help] [-p PID] [-m MIN] [-d DURATION] [-c]\n" -"\n" -"EXAMPLES:\n" -" xfsslower # trace operations slower than 10 ms (default)" -" xfsslower 0 # trace all operations (warning: verbose)\n" -" xfsslower -p 123 # trace pid 123\n" -" xfsslower -c -d 1 # ... 1s, parsable output (csv)"; - -static const struct argp_option opts[] = { - { "csv", 'c', NULL, 0, "Output as csv" }, - { "duration", 'd', "DURATION", 0, "Total duration of trace in seconds" }, - { "pid", 'p', "PID", 0, "Process PID to trace" }, - { "min", 'm', "MIN", 0, "Min latency of trace in ms (default 10)" }, - { "verbose", 'v', NULL, 0, "Verbose debug output" }, - { NULL, 'h', NULL, OPTION_HIDDEN, "Show the full help" }, - {}, -}; - -static error_t parse_arg(int key, char *arg, struct argp_state *state) -{ - long long min_lat; - time_t duration; - int pid; - - switch (key) { - case 'h': - argp_state_help(state, stderr, ARGP_HELP_STD_HELP); - break; - case 'v': - env.verbose = true; - break; - case 'c': - env.csv = true; - break; - case 'd': - errno = 0; - duration = strtol(arg, NULL, 10); - if (errno || duration <= 0) { - fprintf(stderr, "invalid DURATION: %s\n", arg); - argp_usage(state); - } - env.duration = duration; - break; - case 'm': - errno = 0; - min_lat = strtoll(arg, NULL, 10); - if (errno || min_lat < 0) { - fprintf(stderr, "invalid delay (in ms): %s\n", arg); - } - env.min_lat = min_lat; - break; - case 'p': - errno = 0; - pid = strtol(arg, NULL, 10); - if (errno || pid <= 0) { - fprintf(stderr, "invalid PID: %s\n", arg); - argp_usage(state); - } - env.pid = pid; - break; - default: - return ARGP_ERR_UNKNOWN; - } - return 0; -} - -int libbpf_print_fn(enum libbpf_print_level level, - const char *format, va_list args) -{ - if (level == LIBBPF_DEBUG && !env.verbose) - return 0; - return vfprintf(stderr, format, args); -} - -void handle_event(void *ctx, int cpu, void *data, __u32 data_sz) -{ - const struct event *e = data; - struct tm *tm; - char ts[32]; - time_t t; - - time(&t); - tm = localtime(&t); - strftime(ts, sizeof(ts), "%H:%M:%S", tm); - - if (env.csv) { - printf("%lld,%s,%d,%c,", e->end_ns, e->task, e->tgid, e->type); - if (e->size == LLONG_MAX) - printf("LL_MAX,"); - else - printf("%ld,", e->size); - printf("%lld,%lld,%s\n", e->offset, e->delta_us, e->file); - } else { - printf("%-8s %-14.14s %-6d %c ", ts, e->task, e->tgid, e->type); - if (e->size == LLONG_MAX) - printf("%-7s ", "LL_MAX"); - else - printf("%-7ld ", e->size); - printf("%-8lld %7.2f %s\n", e->offset / 1024, - (double)e->delta_us / 1000, e->file); - } -} - -void handle_lost_events(void *ctx, int cpu, __u64 lost_cnt) -{ - fprintf(stderr, "lost %llu events on CPU #%d\n", lost_cnt, cpu); -} - -int main(int argc, char **argv) -{ - static const struct argp argp = { - .options = opts, - .parser = parse_arg, - .doc = argp_program_doc, - }; - struct perf_buffer_opts pb_opts; - struct perf_buffer *pb = NULL; - struct xfsslower_bpf *obj; - __u64 time_end = 0; - int err; - - err = argp_parse(&argp, argc, argv, 0, NULL, NULL); - if (err) - return err; - - libbpf_set_print(libbpf_print_fn); - - err = bump_memlock_rlimit(); - if (err) { - fprintf(stderr, "failed to increase rlimit: %d\n", err); - return 1; - } - - obj = xfsslower_bpf__open(); - if (!obj) { - fprintf(stderr, "failed to open BPF object\n"); - return 1; - } - - /* initialize global data (filtering options) */ - obj->rodata->min_lat = env.min_lat; - obj->rodata->targ_tgid = env.pid; - - err = xfsslower_bpf__load(obj); - if (err) { - fprintf(stderr, "failed to load BPF object: %d\n", err); - goto cleanup; - } - - err = xfsslower_bpf__attach(obj); - if (err) { - fprintf(stderr, "failed to attach BPF programs\n"); - goto cleanup; - } - - if (env.csv) - printf("ENDTIME_us,TASK,PID,TYPE,BYTES,OFFSET_b,LATENCY_us,FILE"); - else { - if (env.min_lat) - printf("Tracing XFS operations slower than %llu ms", - env.min_lat); - else - printf("Tracing XFS operations"); - if (env.duration) - printf(" for %ld secs.\n", env.duration); - else - printf("... Hit Ctrl-C to end.\n"); - printf("%-8s %-14s %-6s %1s %-7s %-8s %7s %s", - "TIME", "COMM", "PID", "T", "BYTES", "OFF_KB", "LAT(ms)", - "FILENAME\n"); - } - - pb_opts.sample_cb = handle_event; - pb_opts.lost_cb = handle_lost_events; - pb = perf_buffer__new(bpf_map__fd(obj->maps.events), PERF_BUFFER_PAGES, - &pb_opts); - err = libbpf_get_error(pb); - if (err) { - pb = NULL; - fprintf(stderr, "failed to open perf buffer: %d\n", err); - goto cleanup; - } - - /* setup duration */ - if (env.duration) - time_end = get_ktime_ns() + env.duration * NSEC_PER_SEC; - - /* main: poll */ - while (1) { - usleep(PERF_BUFFER_TIME_MS * 1000); - if ((err = perf_buffer__poll(pb, PERF_POLL_TIMEOUT_MS)) < 0) - break; - if (env.duration && get_ktime_ns() > time_end) - goto cleanup; - } - fprintf(stderr, "failed with polling perf buffer: %d\n", err); - -cleanup: - perf_buffer__free(pb); - xfsslower_bpf__destroy(obj); - - return err != 0; -} diff --git a/libbpf-tools/xfsslower.h b/libbpf-tools/xfsslower.h deleted file mode 100644 index 16db77eb..00000000 --- a/libbpf-tools/xfsslower.h +++ /dev/null @@ -1,24 +0,0 @@ -/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ -#ifndef __XFSSLOWER_H -#define __XFSSLOWER_H - -#define DNAME_INLINE_LEN 32 -#define TASK_COMM_LEN 16 - -#define TRACE_READ 'R' -#define TRACE_WRITE 'W' -#define TRACE_OPEN 'O' -#define TRACE_FSYNC 'F' - -struct event { - char file[DNAME_INLINE_LEN]; - char task[TASK_COMM_LEN]; - __u64 delta_us; - __u64 end_ns; - __s64 offset; - ssize_t size; - pid_t tgid; - char type; -}; - -#endif /* __DRSNOOP_H */ -- 2.34.1