libbpf-tools: add mountsnoop
authorHengqi Chen <chenhengqi@outlook.com>
Sun, 30 May 2021 08:36:37 +0000 (16:36 +0800)
committeryonghong-song <ys114321@gmail.com>
Mon, 2 Aug 2021 20:35:57 +0000 (13:35 -0700)
This commit adds a new libbpf tool mountsnoop.
It has the same functionalities just as its
counterpart in BCC tools. The default output
is the same.

```
$ mountsnoop
COMM             PID     TID     MNT_NS      CALL
dockerd          1827    1903    4026531840  mount("overlay", "/data/docker/overlay2/153e6b58322c64cf4b2aac1b9caba42d390481a7d33a2bffe0eb858943d49fb6-init/merged", "overlay", 0x0, "index=off,lowerdir=/data/docker/overlay2/l/GWTHHZ2C3PYGAJ5GLTWLHMHHKR,upperdir=/data/docker/overlay2/153e6b58322c64cf4b2aac1b9caba42d390481a7d33a2bffe0eb858943d49fb6-init/diff,workdir=/data/docker/overlay2/153e6b58322c64cf4b2aac1b9caba42d390481a7d33a2bffe0eb858943d49fb6-init/work") = 0
dockerd          1827    1903    4026531840  umount("/data/docker/overlay2/153e6b58322c64cf4b2aac1b9caba42d390481a7d33a2bffe0eb858943d49fb6-init/merged", MS_NOSUID) = 0
```

Also, we provide a detailed mode enabled by -d
option which displays each mount/umount syscall
vertically with more field. In this way, the
output looks more friendly.

```
$ mountsnoop -d -t
PID:    1827
TID:    1864
COMM:   dockerd
OP:     MOUNT
RET:    0
LAT:    246us
MNT_NS: 4026531840
FS:     overlay
SOURCE: overlay
TARGET: /data/docker/overlay2/5fc51d4e4820082177751a8aadf3f42a751c86aff1e0efbc1a5e6af345ee205a-init/merged
DATA:   index=off,lowerdir=/data/docker/overlay2/l/GWTHHZ2C3PYGAJ5GLTWLHMHHKR,upperdir=/data/docker/overlay2/5fc51d4e4820082177751a8aadf3f42a751c86aff1e0efbc1a5e6af345ee205a-init/diff,workdir=/data/docker/overlay2/5fc51d4e4820082177751a8aadf3f42a751c86aff1e0efbc1a5e6af345ee205a-init/work
FLAGS:  0x0

PID:    1827
TID:    1864
COMM:   dockerd
OP:     UMOUNT
RET:    0
LAT:    95us
MNT_NS: 4026531840
FS:
SOURCE:
TARGET: /data/docker/overlay2/5fc51d4e4820082177751a8aadf3f42a751c86aff1e0efbc1a5e6af345ee205a-init/merged
DATA:
FLAGS:  MS_NOSUID
```

Signed-off-by: Hengqi Chen <chenhengqi@outlook.com>
libbpf-tools/.gitignore
libbpf-tools/Makefile
libbpf-tools/mountsnoop.bpf.c [new file with mode: 0644]
libbpf-tools/mountsnoop.c [new file with mode: 0644]
libbpf-tools/mountsnoop.h [new file with mode: 0644]

index 8de1bb151e2747d83eecc561137489d20023f133..2e9583634a5cafb1296df355007d37768a51cb9b 100644 (file)
@@ -24,6 +24,7 @@
 /llcstat
 /nfsdist
 /nfsslower
+/mountsnoop
 /numamove
 /offcputime
 /opensnoop
index d78a701f88be68f4360a117537a91249c3c9ad0d..c6745042b608b4ba5b694388b4904286844eb5c4 100644 (file)
@@ -35,6 +35,7 @@ APPS = \
        gethostlatency \
        hardirqs \
        llcstat \
+       mountsnoop \
        numamove \
        offcputime \
        opensnoop \
diff --git a/libbpf-tools/mountsnoop.bpf.c b/libbpf-tools/mountsnoop.bpf.c
new file mode 100644 (file)
index 0000000..30a5de4
--- /dev/null
@@ -0,0 +1,137 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2021 Hengqi Chen */
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_core_read.h>
+#include <bpf/bpf_tracing.h>
+#include "mountsnoop.h"
+
+#define MAX_ENTRIES 10240
+
+const volatile pid_t target_pid = 0;
+
+struct {
+       __uint(type, BPF_MAP_TYPE_HASH);
+       __uint(max_entries, MAX_ENTRIES);
+       __type(key, __u32);
+       __type(value, struct arg);
+} args SEC(".maps");
+
+struct {
+       __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
+       __uint(max_entries, 1);
+       __type(key, int);
+       __type(value, struct event);
+} heap SEC(".maps");
+
+struct {
+       __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
+       __uint(key_size, sizeof(__u32));
+       __uint(value_size, sizeof(__u32));
+} events SEC(".maps");
+
+static int probe_entry(const char *src, const char *dest, const char *fs,
+                      __u64 flags, const char *data, enum op op)
+{
+       __u64 pid_tgid = bpf_get_current_pid_tgid();
+       __u32 pid = pid_tgid >> 32;
+       __u32 tid = (__u32)pid_tgid;
+       struct arg arg = {};
+
+       if (target_pid && target_pid != pid)
+               return 0;
+
+       arg.ts = bpf_ktime_get_ns();
+       arg.flags = flags;
+       arg.src = src;
+       arg.dest = dest;
+       arg.fs = fs;
+       arg.data= data;
+       arg.op = op;
+       bpf_map_update_elem(&args, &tid, &arg, BPF_ANY);
+       return 0;
+};
+
+static int probe_exit(void *ctx, int ret)
+{
+       __u64 pid_tgid = bpf_get_current_pid_tgid();
+       __u32 pid = pid_tgid >> 32;
+       __u32 tid = (__u32)pid_tgid;
+       struct arg *argp;
+       struct event *eventp;
+       struct task_struct *task;
+       int zero = 0;
+
+       argp = bpf_map_lookup_elem(&args, &tid);
+       if (!argp)
+               return 0;
+
+       eventp = bpf_map_lookup_elem(&heap, &zero);
+       if (!eventp)
+               return 0;
+
+       task = (struct task_struct *)bpf_get_current_task();
+       eventp->delta = bpf_ktime_get_ns() - argp->ts;
+       eventp->flags = argp->flags;
+       eventp->pid = pid;
+       eventp->tid = tid;
+       eventp->mnt_ns = BPF_CORE_READ(task, nsproxy, mnt_ns, ns.inum);
+       eventp->ret = ret;
+       eventp->op = argp->op;
+       bpf_get_current_comm(&eventp->comm, sizeof(eventp->comm));
+       if (argp->src)
+               bpf_probe_read_user_str(eventp->src, sizeof(eventp->src), argp->src);
+       else
+               eventp->src[0] = '\0';
+       if (argp->dest)
+               bpf_probe_read_user_str(eventp->dest, sizeof(eventp->dest), argp->dest);
+       else
+               eventp->dest[0] = '\0';
+       if (argp->fs)
+               bpf_probe_read_user_str(eventp->fs, sizeof(eventp->fs), argp->fs);
+       else
+               eventp->fs[0] = '\0';
+       if (argp->data)
+               bpf_probe_read_user_str(eventp->data, sizeof(eventp->data), argp->data);
+       else
+               eventp->data[0] = '\0';
+       bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, eventp, sizeof(*eventp));
+
+       bpf_map_delete_elem(&args, &tid);
+       return 0;
+}
+
+SEC("tracepoint/syscalls/sys_enter_mount")
+int mount_entry(struct trace_event_raw_sys_enter *ctx)
+{
+       const char *src = (const char *)ctx->args[0];
+       const char *dest = (const char *)ctx->args[1];
+       const char *fs = (const char *)ctx->args[2];
+       __u64 flags = (__u64)ctx->args[3];
+       const char *data = (const char *)ctx->args[4];
+
+       return probe_entry(src, dest, fs, flags, data, MOUNT);
+}
+
+SEC("tracepoint/syscalls/sys_exit_mount")
+int mount_exit(struct trace_event_raw_sys_exit *ctx)
+{
+       return probe_exit(ctx, (int)ctx->ret);
+}
+
+SEC("tracepoint/syscalls/sys_enter_umount")
+int umount_entry(struct trace_event_raw_sys_enter *ctx)
+{
+       const char *dest = (const char *)ctx->args[0];
+       __u64 flags = (__u64)ctx->args[1];
+
+       return probe_entry(NULL, dest, NULL, flags, NULL, UMOUNT);
+}
+
+SEC("tracepoint/syscalls/sys_exit_umount")
+int umount_exit(struct trace_event_raw_sys_exit *ctx)
+{
+       return probe_exit(ctx, (int)ctx->ret);
+}
+
+char LICENSE[] SEC("license") = "Dual BSD/GPL";
diff --git a/libbpf-tools/mountsnoop.c b/libbpf-tools/mountsnoop.c
new file mode 100644 (file)
index 0000000..ff041ef
--- /dev/null
@@ -0,0 +1,308 @@
+/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
+
+/*
+ * mountsnoop  Trace mount and umount[2] syscalls
+ *
+ * Copyright (c) 2021 Hengqi Chen
+ * 30-May-2021   Hengqi Chen   Created this.
+ */
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE
+#endif
+#include <argp.h>
+#include <errno.h>
+#include <signal.h>
+#include <string.h>
+#include <time.h>
+
+#include <bpf/libbpf.h>
+#include <bpf/bpf.h>
+#include "mountsnoop.h"
+#include "mountsnoop.skel.h"
+#include "trace_helpers.h"
+
+#define PERF_BUFFER_PAGES      64
+#define PERF_POLL_TIMEOUT_MS   100
+#define warn(...) fprintf(stderr, __VA_ARGS__)
+
+/* https://www.gnu.org/software/gnulib/manual/html_node/strerrorname_005fnp.html */
+#if !defined(__GLIBC__) || __GLIBC__ < 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ < 32)
+       const char *strerrorname_np(int errnum)
+       {
+               return NULL;
+       }
+#endif
+
+static volatile sig_atomic_t exiting = 0;
+
+static pid_t target_pid = 0;
+static bool emit_timestamp = false;
+static bool output_vertically = false;
+static const char *flag_names[] = {
+       [0] = "MS_RDONLY",
+       [1] = "MS_NOSUID",
+       [2] = "MS_NODEV",
+       [3] = "MS_NOEXEC",
+       [4] = "MS_SYNCHRONOUS",
+       [5] = "MS_REMOUNT",
+       [6] = "MS_MANDLOCK",
+       [7] = "MS_DIRSYNC",
+       [8] = "MS_NOSYMFOLLOW",
+       [9] = "MS_NOATIME",
+       [10] = "MS_NODIRATIME",
+       [11] = "MS_BIND",
+       [12] = "MS_MOVE",
+       [13] = "MS_REC",
+       [14] = "MS_VERBOSE",
+       [15] = "MS_SILENT",
+       [16] = "MS_POSIXACL",
+       [17] = "MS_UNBINDABLE",
+       [18] = "MS_PRIVATE",
+       [19] = "MS_SLAVE",
+       [20] = "MS_SHARED",
+       [21] = "MS_RELATIME",
+       [22] = "MS_KERNMOUNT",
+       [23] = "MS_I_VERSION",
+       [24] = "MS_STRICTATIME",
+       [25] = "MS_LAZYTIME",
+       [26] = "MS_SUBMOUNT",
+       [27] = "MS_NOREMOTELOCK",
+       [28] = "MS_NOSEC",
+       [29] = "MS_BORN",
+       [30] = "MS_ACTIVE",
+       [31] = "MS_NOUSER",
+};
+static const int flag_count = sizeof(flag_names) / sizeof(flag_names[0]);
+
+const char *argp_program_version = "mountsnoop 0.1";
+const char *argp_program_bug_address =
+       "https://github.com/iovisor/bcc/tree/master/libbpf-tools";
+const char argp_program_doc[] =
+"Trace mount and umount syscalls.\n"
+"\n"
+"USAGE: mountsnoop [-h] [-t] [-p PID] [-v]\n"
+"\n"
+"EXAMPLES:\n"
+"    mountsnoop         # trace mount and umount syscalls\n"
+"    mountsnoop -v      # output vertically(one line per column value)\n"
+"    mountsnoop -p 1216 # only trace PID 1216\n";
+
+static const struct argp_option opts[] = {
+       { "pid", 'p', "PID", 0, "Process ID to trace" },
+       { "timestamp", 't', NULL, 0, "Include timestamp on output" },
+       { "detailed", 'd', NULL, 0, "Output result in detail mode" },
+       { NULL, 'h', NULL, OPTION_HIDDEN, "Show the full help" },
+       {},
+};
+
+static error_t parse_arg(int key, char *arg, struct argp_state *state)
+{
+       long pid;
+
+       switch (key) {
+       case 'p':
+               errno = 0;
+               pid = strtol(arg, NULL, 10);
+               if (errno || pid <= 0) {
+                       warn("Invalid PID: %s\n", arg);
+                       argp_usage(state);
+               }
+               target_pid = pid;
+               break;
+       case 't':
+               emit_timestamp = true;
+               break;
+       case 'd':
+               output_vertically = true;
+               break;
+       case 'h':
+               argp_state_help(state, stderr, ARGP_HELP_STD_HELP);
+               break;
+       default:
+               return ARGP_ERR_UNKNOWN;
+       }
+       return 0;
+}
+
+static void sig_int(int signo)
+{
+       exiting = 1;
+}
+
+static const char *strflags(__u64 flags)
+{
+       static char str[512];
+       int i;
+
+       if (!flags)
+               return "0x0";
+
+       str[0] = '\0';
+       for (i = 0; i < flag_count; i++) {
+               if (!((1 << i) & flags))
+                       continue;
+               if (str[0])
+                       strcat(str, " | ");
+               strcat(str, flag_names[i]);
+       }
+       return str;
+}
+
+static const char *strerrno(int errnum)
+{
+       const char *errstr;
+       static char ret[32] = {};
+
+       if (!errnum)
+               return "0";
+
+       ret[0] = '\0';
+       errstr = strerrorname_np(-errnum);
+       if (!errstr) {
+               snprintf(ret, sizeof(ret), "%d", errnum);
+               return ret;
+       }
+
+       snprintf(ret, sizeof(ret), "-%s", errstr);
+       return ret;
+}
+
+static const char *gen_call(const struct event *e)
+{
+       static char call[10240];
+
+       memset(call, 0, sizeof(call));
+       if (e->op == UMOUNT) {
+               snprintf(call, sizeof(call), "umount(\"%s\", %s) = %s",
+                        e->dest, strflags(e->flags), strerrno(e->ret));
+       } else {
+               snprintf(call, sizeof(call), "mount(\"%s\", \"%s\", \"%s\", %s, \"%s\") = %s",
+                        e->src, e->dest, e->fs, strflags(e->flags), e->data, strerrno(e->ret));
+       }
+       return call;
+}
+
+static void handle_event(void *ctx, int cpu, void *data, __u32 data_sz)
+{
+       const struct event *e = data;
+       struct tm *tm;
+       char ts[32];
+       time_t t;
+       const char *indent;
+       static const char *op_name[] = {
+               [MOUNT] = "MOUNT",
+               [UMOUNT] = "UMOUNT",
+       };
+
+       if (emit_timestamp) {
+               time(&t);
+               tm = localtime(&t);
+               strftime(ts, sizeof(ts), "%H:%M:%S ", tm);
+               printf("%s", ts);
+               indent = "    ";
+       } else {
+               indent = "";
+       }
+       if (!output_vertically) {
+               printf("%-16s %-7d %-7d %-11u %s\n",
+                      e->comm, e->pid, e->tid, e->mnt_ns, gen_call(e));
+               return;
+       }
+       if (emit_timestamp)
+               printf("\n");
+       printf("%sPID:    %d\n", indent, e->pid);
+       printf("%sTID:    %d\n", indent, e->tid);
+       printf("%sCOMM:   %s\n", indent, e->comm);
+       printf("%sOP:     %s\n", indent, op_name[e->op]);
+       printf("%sRET:    %s\n", indent, strerrno(e->ret));
+       printf("%sLAT:    %lldus\n", indent, e->delta / 1000);
+       printf("%sMNT_NS: %u\n", indent, e->mnt_ns);
+       printf("%sFS:     %s\n", indent, e->fs);
+       printf("%sSOURCE: %s\n", indent, e->src);
+       printf("%sTARGET: %s\n", indent, e->dest);
+       printf("%sDATA:   %s\n", indent, e->data);
+       printf("%sFLAGS:  %s\n", indent, strflags(e->flags));
+       printf("\n");
+}
+
+static void handle_lost_events(void *ctx, int cpu, __u64 lost_cnt)
+{
+       warn("lost %llu events on CPU #%d\n", lost_cnt, cpu);
+}
+
+int main(int argc, char **argv)
+{
+       static const struct argp argp = {
+               .options = opts,
+               .parser = parse_arg,
+               .doc = argp_program_doc,
+       };
+       struct perf_buffer_opts pb_opts;
+       struct perf_buffer *pb = NULL;
+       struct mountsnoop_bpf *obj;
+       int err;
+
+       err = argp_parse(&argp, argc, argv, 0, NULL, NULL);
+       if (err)
+               return err;
+
+       err = bump_memlock_rlimit();
+       if (err) {
+               warn("failed to increase rlimit: %d\n", err);
+               return 1;
+       }
+
+       obj = mountsnoop_bpf__open();
+       if (!obj) {
+               warn("failed to open BPF object\n");
+               return 1;
+       }
+
+       obj->rodata->target_pid = target_pid;
+
+       err = mountsnoop_bpf__load(obj);
+       if (err) {
+               warn("failed to load BPF object: %d\n", err);
+               goto cleanup;
+       }
+
+       err = mountsnoop_bpf__attach(obj);
+       if (err) {
+               warn("failed to attach BPF programs: %d\n", err);
+               goto cleanup;
+       }
+
+       pb_opts.sample_cb = handle_event;
+       pb_opts.lost_cb = handle_lost_events;
+       pb = perf_buffer__new(bpf_map__fd(obj->maps.events), PERF_BUFFER_PAGES, &pb_opts);
+       err = libbpf_get_error(pb);
+       if (err) {
+               warn("failed to open perf buffer: %d\n", err);
+               goto cleanup;
+       }
+
+       if (signal(SIGINT, sig_int) == SIG_ERR) {
+               warn("can't set signal handler: %s\n", strerror(-errno));
+               goto cleanup;
+       }
+
+       if (!output_vertically) {
+               if (emit_timestamp)
+                       printf("%-8s ", "TIME");
+               printf("%-16s %-7s %-7s %-11s %s\n", "COMM", "PID", "TID", "MNT_NS", "CALL");
+       }
+
+       while (1) {
+               if ((err = perf_buffer__poll(pb, PERF_POLL_TIMEOUT_MS)) < 0)
+                       break;
+               if (exiting)
+                       goto cleanup;
+       }
+       warn("error polling perf buffer: %d\n", err);
+
+cleanup:
+       perf_buffer__free(pb);
+       mountsnoop_bpf__destroy(obj);
+
+       return err != 0;
+}
diff --git a/libbpf-tools/mountsnoop.h b/libbpf-tools/mountsnoop.h
new file mode 100644 (file)
index 0000000..b79fd17
--- /dev/null
@@ -0,0 +1,40 @@
+/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
+#ifndef __MOUNTSNOOP_H
+#define __MOUNTSNOOP_H
+
+#define TASK_COMM_LEN  16
+#define FS_NAME_LEN    8
+#define DATA_LEN       512
+#define PATH_MAX       4096
+
+enum op {
+       MOUNT,
+       UMOUNT,
+};
+
+struct arg {
+       __u64 ts;
+       __u64 flags;
+       const char *src;
+       const char *dest;
+       const char *fs;
+       const char *data;
+       enum op op;
+};
+
+struct event {
+       __u64 delta;
+       __u64 flags;
+       __u32 pid;
+       __u32 tid;
+       unsigned int mnt_ns;
+       int ret;
+       char comm[TASK_COMM_LEN];
+       char fs[FS_NAME_LEN];
+       char src[PATH_MAX];
+       char dest[PATH_MAX];
+       char data[DATA_LEN];
+       enum op op;
+};
+
+#endif /* __MOUNTSNOOP_H */