libbpf-tools: add filetop
authorHengqi Chen <chenhengqi@outlook.com>
Thu, 15 Jul 2021 16:04:28 +0000 (00:04 +0800)
committeryonghong-song <ys114321@gmail.com>
Thu, 29 Jul 2021 05:41:50 +0000 (22:41 -0700)
Signed-off-by: Hengqi Chen <chenhengqi@outlook.com>
libbpf-tools/.gitignore
libbpf-tools/Makefile
libbpf-tools/filetop.bpf.c [new file with mode: 0644]
libbpf-tools/filetop.c [new file with mode: 0644]
libbpf-tools/filetop.h [new file with mode: 0644]
libbpf-tools/stat.h [new file with mode: 0644]

index 422d8604a01f62ee1ff5c0c07735051557bb382a..8de1bb151e2747d83eecc561137489d20023f133 100644 (file)
@@ -15,6 +15,7 @@
 /ext4dist
 /ext4slower
 /filelife
+/filetop
 /fsdist
 /fsslower
 /funclatency
index f0f67626898b73e8249feed999c72ca704efb9f6..d78a701f88be68f4360a117537a91249c3c9ad0d 100644 (file)
@@ -28,6 +28,7 @@ APPS = \
        drsnoop \
        execsnoop \
        filelife \
+       filetop \
        fsdist \
        fsslower \
        funclatency \
diff --git a/libbpf-tools/filetop.bpf.c b/libbpf-tools/filetop.bpf.c
new file mode 100644 (file)
index 0000000..c02a205
--- /dev/null
@@ -0,0 +1,91 @@
+/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
+/* Copyright (c) 2021 Hengqi Chen */
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_core_read.h>
+#include <bpf/bpf_tracing.h>
+#include "filetop.h"
+#include "stat.h"
+
+#define MAX_ENTRIES    10240
+
+const volatile pid_t target_pid = 0;
+const volatile bool regular_file_only = true;
+static struct file_stat zero_value = {};
+
+struct {
+       __uint(type, BPF_MAP_TYPE_HASH);
+       __uint(max_entries, MAX_ENTRIES);
+       __type(key, struct file_id);
+       __type(value, struct file_stat);
+} entries SEC(".maps");
+
+static void get_file_path(struct file *file, char *buf, size_t size)
+{
+       struct qstr dname;
+
+       dname = BPF_CORE_READ(file, f_path.dentry, d_name);
+       bpf_probe_read_kernel(buf, size, dname.name);
+}
+
+static int probe_entry(struct pt_regs *ctx, struct file *file, size_t count, enum op op)
+{
+       __u64 pid_tgid = bpf_get_current_pid_tgid();
+       __u32 pid = pid_tgid >> 32;
+       __u32 tid = (__u32)pid_tgid;
+       int mode;
+       struct file_id key = {};
+       struct file_stat *valuep;
+
+       if (target_pid && target_pid != pid)
+               return 0;
+
+       mode = BPF_CORE_READ(file, f_inode, i_mode);
+       if (regular_file_only && !S_ISREG(mode))
+               return 0;
+
+       key.dev = BPF_CORE_READ(file, f_inode, i_rdev);
+       key.inode = BPF_CORE_READ(file, f_inode, i_ino);
+       key.pid = pid;
+       key.tid = tid;
+       valuep = bpf_map_lookup_elem(&entries, &key);
+       if (!valuep) {
+               bpf_map_update_elem(&entries, &key, &zero_value, BPF_ANY);
+               valuep = bpf_map_lookup_elem(&entries, &key);
+               if (!valuep)
+                       return 0;
+               valuep->pid = pid;
+               valuep->tid = tid;
+               bpf_get_current_comm(&valuep->comm, sizeof(valuep->comm));
+               get_file_path(file, valuep->filename, sizeof(valuep->filename));
+               if (S_ISREG(mode)) {
+                       valuep->type = 'R';
+               } else if (S_ISSOCK(mode)) {
+                       valuep->type = 'S';
+               } else {
+                       valuep->type = 'O';
+               }
+       }
+       if (op == READ) {
+               valuep->reads++;
+               valuep->read_bytes += count;
+       } else {        /* op == WRITE */
+               valuep->writes++;
+               valuep->write_bytes += count;
+       }
+       return 0;
+};
+
+SEC("kprobe/vfs_read")
+int BPF_KPROBE(vfs_read_entry, struct file *file, char *buf, size_t count, loff_t *pos)
+{
+       return probe_entry(ctx, file, count, READ);
+}
+
+SEC("kprobe/vfs_write")
+int BPF_KPROBE(vfs_write_entry, struct file *file, const char *buf, size_t count, loff_t *pos)
+{
+       return probe_entry(ctx, file, count, WRITE);
+}
+
+char LICENSE[] SEC("license") = "Dual BSD/GPL";
diff --git a/libbpf-tools/filetop.c b/libbpf-tools/filetop.c
new file mode 100644 (file)
index 0000000..e7fb74d
--- /dev/null
@@ -0,0 +1,313 @@
+/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
+
+/*
+ * filetop Trace file reads/writes by process.
+ * Copyright (c) 2021 Hengqi Chen
+ *
+ * Based on filetop(8) from BCC by Brendan Gregg.
+ * 17-Jul-2021   Hengqi Chen   Created this.
+ */
+#include <argp.h>
+#include <errno.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+#include <unistd.h>
+
+#include <bpf/libbpf.h>
+#include <bpf/bpf.h>
+#include "filetop.h"
+#include "filetop.skel.h"
+#include "trace_helpers.h"
+
+#define warn(...) fprintf(stderr, __VA_ARGS__)
+#define OUTPUT_ROWS_LIMIT 10240
+
+enum SORT {
+       ALL,
+       READS,
+       WRITES,
+       RBYTES,
+       WBYTES,
+};
+
+static volatile sig_atomic_t exiting = 0;
+
+static pid_t target_pid = 0;
+static bool clear_screen = true;
+static bool regular_file_only = true;
+static int output_rows = 20;
+static int sort_by = ALL;
+static int interval = 1;
+static int count = 99999999;
+
+const char *argp_program_version = "filetop 0.1";
+const char *argp_program_bug_address =
+       "https://github.com/iovisor/bcc/tree/master/libbpf-tools";
+const char argp_program_doc[] =
+"Trace file reads/writes by process.\n"
+"\n"
+"USAGE: filetop [-h] [-p PID] [interval] [count]\n"
+"\n"
+"EXAMPLES:\n"
+"    filetop            # file I/O top, refresh every 1s\n"
+"    filetop -p 1216    # only trace PID 1216\n"
+"    filetop 5 10       # 5s summaries, 10 times\n";
+
+static const struct argp_option opts[] = {
+       { "pid", 'p', "PID", 0, "Process ID to trace" },
+       { "noclear", 'C', NULL, 0, "Don't clear the screen" },
+       { "all", 'a', NULL, 0, "Include special files" },
+       { "sort", 's', "SORT", 0, "Sort columns, default all [all, reads, writes, rbytes, wbytes]" },
+       { "rows", 'r', "ROWS", 0, "Maximum rows to print, default 20" },
+       { NULL, 'h', NULL, OPTION_HIDDEN, "Show the full help" },
+       {},
+};
+
+static error_t parse_arg(int key, char *arg, struct argp_state *state)
+{
+       long pid, rows;
+       static int pos_args;
+
+       switch (key) {
+       case 'p':
+               errno = 0;
+               pid = strtol(arg, NULL, 10);
+               if (errno || pid <= 0) {
+                       warn("invalid PID: %s\n", arg);
+                       argp_usage(state);
+               }
+               target_pid = pid;
+               break;
+       case 'C':
+               clear_screen = false;
+               break;
+       case 'a':
+               regular_file_only = false;
+               break;
+       case 's':
+               if (!strcmp(arg, "all")) {
+                       sort_by = ALL;
+               } else if (!strcmp(arg, "reads")) {
+                       sort_by = READS;
+               } else if (!strcmp(arg, "writes")) {
+                       sort_by = WRITES;
+               } else if (!strcmp(arg, "rbytes")) {
+                       sort_by = RBYTES;
+               } else if (!strcmp(arg, "wbytes")) {
+                       sort_by = WBYTES;
+               } else {
+                       warn("invalid sort method: %s\n", arg);
+                       argp_usage(state);
+               }
+               break;
+       case 'r':
+               errno = 0;
+               rows = strtol(arg, NULL, 10);
+               if (errno || rows <= 0) {
+                       warn("invalid rows: %s\n", arg);
+                       argp_usage(state);
+               }
+               output_rows = rows;
+               if (output_rows > OUTPUT_ROWS_LIMIT)
+                       output_rows = OUTPUT_ROWS_LIMIT;
+               break;
+       case 'h':
+               argp_state_help(state, stderr, ARGP_HELP_STD_HELP);
+               break;
+       case ARGP_KEY_ARG:
+               errno = 0;
+               if (pos_args == 0) {
+                       interval = strtol(arg, NULL, 10);
+                       if (errno || interval <= 0) {
+                               warn("invalid interval\n");
+                               argp_usage(state);
+                       }
+               } else if (pos_args == 1) {
+                       count = strtol(arg, NULL, 10);
+                       if (errno || count <= 0) {
+                               warn("invalid count\n");
+                               argp_usage(state);
+                       }
+               } else {
+                       warn("unrecognized positional argument: %s\n", arg);
+                       argp_usage(state);
+               }
+               pos_args++;
+               break;
+       default:
+               return ARGP_ERR_UNKNOWN;
+       }
+       return 0;
+}
+
+static void sig_int(int signo)
+{
+       exiting = 1;
+}
+
+static int sort_column(const void *obj1, const void *obj2)
+{
+       struct file_stat *s1 = (struct file_stat *)obj1;
+       struct file_stat *s2 = (struct file_stat *)obj2;
+
+       if (sort_by == READS) {
+               return s2->reads - s1->reads;
+       } else if (sort_by == WRITES) {
+               return s2->writes - s1->writes;
+       } else if (sort_by == RBYTES) {
+               return s2->read_bytes - s1->read_bytes;
+       } else if (sort_by == WBYTES) {
+               return s2->write_bytes - s1->write_bytes;
+       } else {
+               return (s2->reads + s2->writes + s2->read_bytes + s2->write_bytes)
+                    - (s1->reads + s1->writes + s1->read_bytes + s1->write_bytes);
+       }
+}
+
+static int print_stat(struct filetop_bpf *obj)
+{
+       FILE *f;
+       time_t t;
+       struct tm *tm;
+       char ts[16], buf[256];
+       struct file_id key, *prev_key = NULL;
+       static struct file_stat values[OUTPUT_ROWS_LIMIT];
+       int n, i, err = 0, rows = 0;
+       int fd = bpf_map__fd(obj->maps.entries);
+
+       f = fopen("/proc/loadavg", "r");
+       if (f) {
+               time(&t);
+               tm = localtime(&t);
+               strftime(ts, sizeof(ts), "%H:%M:%S", tm);
+               memset(buf, 0 , sizeof(buf));
+               n = fread(buf, 1, sizeof(buf), f);
+               if (n)
+                       printf("%8s loadavg: %s\n", ts, buf);
+               fclose(f);
+       }
+
+       printf("%-7s %-16s %-6s %-6s %-7s %-7s %1s %s\n",
+              "TID", "COMM", "READS", "WRITES", "R_Kb", "W_Kb", "T", "FILE");
+
+       while (1) {
+               err = bpf_map_get_next_key(fd, prev_key, &key);
+               if (err) {
+                       if (errno == ENOENT) {
+                               err = 0;
+                               break;
+                       }
+                       warn("bpf_map_get_next_key failed: %s\n", strerror(errno));
+                       return err;
+               }
+               err = bpf_map_lookup_elem(fd, &key, &values[rows++]);
+               if (err) {
+                       warn("bpf_map_lookup_elem failed: %s\n", strerror(errno));
+                       return err;
+               }
+               prev_key = &key;
+       }
+
+       qsort(values, rows, sizeof(struct file_stat), sort_column);
+       rows = rows < output_rows ? rows : output_rows;
+       for (i = 0; i < rows; i++)
+               printf("%-7d %-16s %-6lld %-6lld %-7lld %-7lld %c %s\n",
+                      values[i].tid, values[i].comm, values[i].reads, values[i].writes,
+                      values[i].read_bytes / 1024, values[i].write_bytes / 1024,
+                      values[i].type, values[i].filename);
+
+       printf("\n");
+       prev_key = NULL;
+
+       while (1) {
+               err = bpf_map_get_next_key(fd, prev_key, &key);
+               if (err) {
+                       if (errno == ENOENT) {
+                               err = 0;
+                               break;
+                       }
+                       warn("bpf_map_get_next_key failed: %s\n", strerror(errno));
+                       return err;
+               }
+               err = bpf_map_delete_elem(fd, &key);
+               if (err) {
+                       warn("bpf_map_delete_elem failed: %s\n", strerror(errno));
+                       return err;
+               }
+               prev_key = &key;
+       }
+       return err;
+}
+
+int main(int argc, char **argv)
+{
+       static const struct argp argp = {
+               .options = opts,
+               .parser = parse_arg,
+               .doc = argp_program_doc,
+       };
+       struct filetop_bpf *obj;
+       int err;
+
+       err = argp_parse(&argp, argc, argv, 0, NULL, NULL);
+       if (err)
+               return err;
+
+       err = bump_memlock_rlimit();
+       if (err) {
+               warn("failed to increase rlimit: %d\n", err);
+               return 1;
+       }
+
+       obj = filetop_bpf__open();
+       if (!obj) {
+               warn("failed to open BPF object\n");
+               return 1;
+       }
+
+       obj->rodata->target_pid = target_pid;
+       obj->rodata->regular_file_only = regular_file_only;
+
+       err = filetop_bpf__load(obj);
+       if (err) {
+               warn("failed to load BPF object: %d\n", err);
+               goto cleanup;
+       }
+
+       err = filetop_bpf__attach(obj);
+       if (err) {
+               warn("failed to attach BPF programs: %d\n", err);
+               goto cleanup;
+       }
+
+       if (signal(SIGINT, sig_int) == SIG_ERR) {
+               warn("can't set signal handler: %s\n", strerror(-errno));
+               goto cleanup;
+       }
+
+       while (1) {
+               sleep(interval);
+
+               if (clear_screen) {
+                       err = system("clear");
+                       if (err)
+                               goto cleanup;
+               }
+
+               err = print_stat(obj);
+               if (err)
+                       goto cleanup;
+
+               count--;
+               if (exiting || !count)
+                       goto cleanup;
+       }
+
+cleanup:
+       filetop_bpf__destroy(obj);
+
+       return err != 0;
+}
diff --git a/libbpf-tools/filetop.h b/libbpf-tools/filetop.h
new file mode 100644 (file)
index 0000000..2974ebf
--- /dev/null
@@ -0,0 +1,32 @@
+/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
+#ifndef __FILETOP_H
+#define __FILETOP_H
+
+#define PATH_MAX       4096
+#define TASK_COMM_LEN  16
+
+enum op {
+       READ,
+       WRITE,
+};
+
+struct file_id {
+       __u64 inode;
+       __u32 dev;
+       __u32 pid;
+       __u32 tid;
+};
+
+struct file_stat {
+       __u64 reads;
+       __u64 read_bytes;
+       __u64 writes;
+       __u64 write_bytes;
+       __u32 pid;
+       __u32 tid;
+       char filename[PATH_MAX];
+       char comm[TASK_COMM_LEN];
+       char type;
+};
+
+#endif /* __FILETOP_H */
diff --git a/libbpf-tools/stat.h b/libbpf-tools/stat.h
new file mode 100644 (file)
index 0000000..e34e3cf
--- /dev/null
@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef __STAT_H
+#define __STAT_H
+
+/* From include/uapi/linux/stat.h */
+
+#define S_IFMT         00170000
+#define S_IFSOCK       0140000
+#define S_IFLNK                0120000
+#define S_IFREG                0100000
+#define S_IFBLK                0060000
+#define S_IFDIR                0040000
+#define S_IFCHR                0020000
+#define S_IFIFO                0010000
+#define S_ISUID                0004000
+#define S_ISGID                0002000
+#define S_ISVTX                0001000
+
+#define S_ISLNK(m)     (((m) & S_IFMT) == S_IFLNK)
+#define S_ISREG(m)     (((m) & S_IFMT) == S_IFREG)
+#define S_ISDIR(m)     (((m) & S_IFMT) == S_IFDIR)
+#define S_ISCHR(m)     (((m) & S_IFMT) == S_IFCHR)
+#define S_ISBLK(m)     (((m) & S_IFMT) == S_IFBLK)
+#define S_ISFIFO(m)    (((m) & S_IFMT) == S_IFIFO)
+#define S_ISSOCK(m)    (((m) & S_IFMT) == S_IFSOCK)
+
+#endif /* __STAT_H */