From e14f27fcb44ea5642c612f01f6751ca6ab89c181 Mon Sep 17 00:00:00 2001 From: mcaleavya Date: Wed, 17 Feb 2016 22:12:44 +0000 Subject: [PATCH] migrated statsnoop to use bpf_perf_event_output --- man/man8/statsnoop.8 | 4 ++ tools/old/statsnoop.py | 118 +++++++++++++++++++++++++++++++++++++++++++++++ tools/statsnoop.py | 121 ++++++++++++++++++++++++++++++++++++++----------- 3 files changed, 217 insertions(+), 26 deletions(-) create mode 100755 tools/old/statsnoop.py diff --git a/man/man8/statsnoop.8 b/man/man8/statsnoop.8 index 335895c..f33294e 100644 --- a/man/man8/statsnoop.8 +++ b/man/man8/statsnoop.8 @@ -12,6 +12,10 @@ applications that are failing, especially on startup. This works by tracing various kernel sys_stat() functions using dynamic tracing, and will need updating to match any changes to these functions. +This makes use of a Linux 4.5 feature (bpf_perf_event_output()); +for kernels older than 4.5, see the version under tools/old, +which uses an older mechanism. + Since this uses BPF, only the root user can use this tool. .SH REQUIREMENTS CONFIG_BPF and bcc. diff --git a/tools/old/statsnoop.py b/tools/old/statsnoop.py new file mode 100755 index 0000000..ad54ac7 --- /dev/null +++ b/tools/old/statsnoop.py @@ -0,0 +1,118 @@ +#!/usr/bin/python +# @lint-avoid-python-3-compatibility-imports +# +# statsnoop Trace stat() syscalls. +# For Linux, uses BCC, eBPF. Embedded C. +# +# USAGE: statsnoop [-h] [-t] [-x] [-p PID] +# +# Copyright 2016 Netflix, Inc. +# Licensed under the Apache License, Version 2.0 (the "License") +# +# 08-Feb-2016 Brendan Gregg Created this. + +from __future__ import print_function +from bcc import BPF +import argparse + +# arguments +examples = """examples: + ./statsnoop # trace all stat() syscalls + ./statsnoop -t # include timestamps + ./statsnoop -x # only show failed stats + ./statsnoop -p 181 # only trace PID 181 +""" +parser = argparse.ArgumentParser( + description="Trace stat() syscalls", + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=examples) +parser.add_argument("-t", "--timestamp", action="store_true", + help="include timestamp on output") +parser.add_argument("-x", "--failed", action="store_true", + help="only show failed stats") +parser.add_argument("-p", "--pid", + help="trace this PID only") +args = parser.parse_args() +debug = 0 + +# define BPF program +bpf_text = """ +#include + +BPF_HASH(args_filename, u32, const char *); + +int trace_entry(struct pt_regs *ctx, const char __user *filename) +{ + u32 pid = bpf_get_current_pid_tgid(); + + FILTER + args_filename.update(&pid, &filename); + + return 0; +}; + +int trace_return(struct pt_regs *ctx) +{ + const char **filenamep; + int ret = ctx->ax; + u32 pid = bpf_get_current_pid_tgid(); + + filenamep = args_filename.lookup(&pid); + if (filenamep == 0) { + // missed entry + return 0; + } + + bpf_trace_printk("%d %s\\n", ret, *filenamep); + args_filename.delete(&pid); + + return 0; +} +""" +if args.pid: + bpf_text = bpf_text.replace('FILTER', + 'if (pid != %s) { return 0; }' % args.pid) +else: + bpf_text = bpf_text.replace('FILTER', '') +if debug: + print(bpf_text) + +# initialize BPF +b = BPF(text=bpf_text) +b.attach_kprobe(event="sys_stat", fn_name="trace_entry") +b.attach_kprobe(event="sys_statfs", fn_name="trace_entry") +b.attach_kprobe(event="sys_newstat", fn_name="trace_entry") +b.attach_kretprobe(event="sys_stat", fn_name="trace_return") +b.attach_kretprobe(event="sys_statfs", fn_name="trace_return") +b.attach_kretprobe(event="sys_newstat", fn_name="trace_return") + +# header +if args.timestamp: + print("%-14s" % ("TIME(s)"), end="") +print("%-6s %-16s %4s %3s %s" % ("PID", "COMM", "FD", "ERR", "PATH")) + +start_ts = 0 + +# format output +while 1: + (task, pid, cpu, flags, ts, msg) = b.trace_fields() + (ret_s, filename) = msg.split(" ", 1) + + ret = int(ret_s) + if (args.failed and (ret >= 0)): + continue + + # split return value into FD and errno columns + if ret >= 0: + fd_s = ret + err = 0 + else: + fd_s = "-1" + err = - ret + + # print columns + if args.timestamp: + if start_ts == 0: + start_ts = ts + print("%-14.9f" % (ts - start_ts), end="") + print("%-6d %-16s %4s %3s %s" % (pid, task, fd_s, err, filename)) diff --git a/tools/statsnoop.py b/tools/statsnoop.py index ad54ac7..0e50a63 100755 --- a/tools/statsnoop.py +++ b/tools/statsnoop.py @@ -10,10 +10,12 @@ # Licensed under the Apache License, Version 2.0 (the "License") # # 08-Feb-2016 Brendan Gregg Created this. +# 17-Feb-2016 Allan McAleavy updated for BPF_PERF_OUTPUT from __future__ import print_function from bcc import BPF import argparse +import ctypes as ct # arguments examples = """examples: @@ -38,32 +40,67 @@ debug = 0 # define BPF program bpf_text = """ #include +#include +#include + +struct val_t { + u32 pid; + u64 ts; + char comm[TASK_COMM_LEN]; + const char *fname; +}; + +struct data_t { + u32 pid; + u64 ts; + u64 delta; + int ret; + char comm[TASK_COMM_LEN]; + char fname[NAME_MAX]; +}; BPF_HASH(args_filename, u32, const char *); +BPF_HASH(infotmp, u32,struct val_t); +BPF_PERF_OUTPUT(events); int trace_entry(struct pt_regs *ctx, const char __user *filename) { + struct val_t val = {}; u32 pid = bpf_get_current_pid_tgid(); FILTER - args_filename.update(&pid, &filename); + if (bpf_get_current_comm(&val.comm, sizeof(val.comm)) == 0) { + val.pid = bpf_get_current_pid_tgid(); + val.ts = bpf_ktime_get_ns(); + val.fname = filename; + infotmp.update(&pid, &val); + } return 0; }; int trace_return(struct pt_regs *ctx) { - const char **filenamep; - int ret = ctx->ax; u32 pid = bpf_get_current_pid_tgid(); + struct val_t *valp; + struct data_t data = {}; + + u64 tsp = bpf_ktime_get_ns(); - filenamep = args_filename.lookup(&pid); - if (filenamep == 0) { + valp = infotmp.lookup(&pid); + if (valp == 0) { // missed entry return 0; } - - bpf_trace_printk("%d %s\\n", ret, *filenamep); + bpf_probe_read(&data.comm,sizeof(data.comm), valp->comm); + bpf_probe_read(&data.fname,sizeof(data.fname),(void *)valp->fname); + data.pid = valp->pid; + data.delta = tsp - valp->ts; + data.ts = tsp /1000; + data.ret = ctx->ax; + + events.perf_submit(ctx,&data,sizeof(data)); + infotmp.delete(&pid); args_filename.delete(&pid); return 0; @@ -86,33 +123,65 @@ b.attach_kretprobe(event="sys_stat", fn_name="trace_return") b.attach_kretprobe(event="sys_statfs", fn_name="trace_return") b.attach_kretprobe(event="sys_newstat", fn_name="trace_return") +TASK_COMM_LEN = 16 # linux/sched.h +NAME_MAX = 255 # linux/limits.h + +class Data(ct.Structure): + _fields_ = [ + ("pid", ct.c_ulonglong), + ("ts", ct.c_ulonglong), + ("delta", ct.c_ulonglong), + ("ret", ct.c_int), + ("comm", ct.c_char * TASK_COMM_LEN), + ("fname", ct.c_char * NAME_MAX) + ] + +start_ts = 0 +prev_ts = 0 +delta = 0 + # header if args.timestamp: print("%-14s" % ("TIME(s)"), end="") print("%-6s %-16s %4s %3s %s" % ("PID", "COMM", "FD", "ERR", "PATH")) -start_ts = 0 - -# format output -while 1: - (task, pid, cpu, flags, ts, msg) = b.trace_fields() - (ret_s, filename) = msg.split(" ", 1) - - ret = int(ret_s) - if (args.failed and (ret >= 0)): - continue +# process event +def print_event(cpu, data, size): + event = ct.cast(data, ct.POINTER(Data)).contents + global start_ts + global prev_ts + global delta + global cont # split return value into FD and errno columns - if ret >= 0: - fd_s = ret + if event.ret >= 0: + fd_s = event.ret err = 0 else: - fd_s = "-1" - err = - ret + fd_s = -1 + err = - event.ret + + if start_ts == 0: + prev_ts = start_ts + + if start_ts == 1: + delta = float(delta) + (event.ts - prev_ts) + + if (args.failed and (event.ret >= 0)): + start_ts = 1 + prev_ts = event.ts + return - # print columns if args.timestamp: - if start_ts == 0: - start_ts = ts - print("%-14.9f" % (ts - start_ts), end="") - print("%-6d %-16s %4s %3s %s" % (pid, task, fd_s, err, filename)) + print("%-14.9f" % (delta / 1000000), end="") + + print("%-6d %-16s %4d %3d %s" % (event.pid, event.comm, + fd_s, err, event.fname)) + + prev_ts = event.ts + start_ts = 1 + +# loop with callback to print_event +b["events"].open_perf_buffer(print_event) +while 1: + b.kprobe_poll() -- 2.7.4