from __future__ import print_function
from bcc import BPF
import argparse
+import ctypes as ct
import signal
+import time
# arguments
examples = """examples:
bpf_text = """
#include <uapi/linux/ptrace.h>
#include <linux/fs.h>
+#include <linux/sched.h>
-#define TRACE_READ 0
-#define TRACE_WRITE 1
+enum trace_mode {
+ MODE_READ,
+ MODE_WRITE
+};
struct val_t {
u32 sz;
u64 ts;
char name[DNAME_INLINE_LEN];
+ char comm[TASK_COMM_LEN];
+};
+
+struct data_t {
+ enum trace_mode mode;
+ u32 pid;
+ u32 sz;
+ u64 delta_us;
+ char name[DNAME_INLINE_LEN];
+ char comm[TASK_COMM_LEN];
};
BPF_HASH(entryinfo, pid_t, struct val_t);
+BPF_PERF_OUTPUT(events);
// store timestamp and size on entry
static int trace_rw_entry(struct pt_regs *ctx, struct file *file,
val.sz = count;
val.ts = bpf_ktime_get_ns();
__builtin_memcpy(&val.name, de->d_iname, sizeof(val.name));
+ bpf_get_current_comm(&val.comm, sizeof(val.comm));
entryinfo.update(&pid, &val);
return 0;
if (delta_us < MIN_US)
return 0;
- if (type == TRACE_READ) {
- bpf_trace_printk("R %d %d %s\\n", valp->sz, delta_us, valp->name);
- } else {
- bpf_trace_printk("W %d %d %s\\n", valp->sz, delta_us, valp->name);
- }
+ struct data_t data = {};
+ data.mode = type;
+ data.pid = pid;
+ data.sz = valp->sz;
+ data.delta_us = delta_us;
+ bpf_probe_read(&data.name, sizeof(data.name), valp->name);
+ bpf_probe_read(&data.comm, sizeof(data.comm), valp->comm);
+ events.perf_submit(ctx, &data, sizeof(data));
return 0;
}
int trace_read_return(struct pt_regs *ctx)
{
- return trace_rw_return(ctx, TRACE_READ);
+ return trace_rw_return(ctx, MODE_READ);
}
int trace_write_return(struct pt_regs *ctx)
{
- return trace_rw_return(ctx, TRACE_WRITE);
+ return trace_rw_return(ctx, MODE_WRITE);
}
"""
b.attach_kretprobe(event="__vfs_read", fn_name="trace_read_return")
b.attach_kretprobe(event="__vfs_write", fn_name="trace_write_return")
+TASK_COMM_LEN = 16 # linux/sched.h
+DNAME_INLINE_LEN = 32 # linux/dcache.h
+
+class Data(ct.Structure):
+ _fields_ = [
+ ("mode", ct.c_int),
+ ("pid", ct.c_uint),
+ ("sz", ct.c_uint),
+ ("delta_us", ct.c_ulonglong),
+ ("name", ct.c_char * DNAME_INLINE_LEN),
+ ("comm", ct.c_char * TASK_COMM_LEN),
+ ]
+
+mode_s = {
+ 0: 'R',
+ 1: 'W',
+}
+
# header
print("Tracing sync read/writes slower than %d ms" % min_ms)
print("%-8s %-14s %-6s %1s %-7s %7s %s" % ("TIME(s)", "COMM", "PID", "D",
"BYTES", "LAT(ms)", "FILENAME"))
-start_ts = 0
+start_ts = time.time()
-# format output
-while 1:
- (task, pid, cpu, flags, ts, msg) = b.trace_fields()
- args = msg.split(" ", 3)
- (type_s, sz, delta_us_s) = (args[0], args[1], args[2])
- try:
- filename = args[3]
- except:
- filename = "?"
- if start_ts == 0:
- start_ts = ts
-
- ms = float(int(delta_us_s, 10)) / 1000
+def print_event(cpu, data, size):
+ event = ct.cast(data, ct.POINTER(Data)).contents
+
+ ms = float(event.delta_us) / 1000
print("%-8.3f %-14.14s %-6s %1s %-7s %7.2f %s" % (
- ts - start_ts, task, pid, type_s, sz, ms, filename))
+ time.time() - start_ts, event.comm, event.pid, mode_s[event.mode],
+ event.sz, ms, event.name))
+
+b["events"].open_perf_buffer(print_event)
+while 1:
+ b.kprobe_poll()
local program = [[
#include <uapi/linux/ptrace.h>
+#include <linux/sched.h>
+
+struct data_t {
+ u64 stack_id;
+ u32 pid;
+ char comm[TASK_COMM_LEN];
+};
BPF_STACK_TRACE(stack_traces, 128)
+BPF_PERF_OUTPUT(events);
void trace_stack(struct pt_regs *ctx) {
+ u32 pid = bpf_get_current_pid_tgid();
FILTER
- int stack_id = stack_traces.get_stackid(ctx, BPF_F_REUSE_STACKID);
- if (stack_id >= 0)
- bpf_trace_printk("stack_id=%d\n", stack_id);
+ struct data_t data = {};
+ data.stack_id = stack_traces.get_stackid(ctx, BPF_F_REUSE_STACKID),
+ data.pid = pid;
+ bpf_get_current_comm(&data.comm, sizeof(data.comm));
+ events.perf_submit(ctx, &data, sizeof(data));
}
]]
+local ffi = require("ffi")
+
return function(BPF, utils)
local parser = utils.argparse("stacksnoop",
"Trace and print kernel stack traces for a kernel function")
local filter = ""
if args.pid then
- filter = [[
- u32 pid;
- pid = bpf_get_current_pid_tgid();
- if (pid != %d) { return; }
- ]] % args.pid
+ filter = "if (pid != %d) { return; }" % args.pid
end
local text = program:gsub("FILTER", filter)
bpf:attach_kprobe{event=args.fn, fn_name="trace_stack"}
if BPF.num_open_kprobes() == 0 then
- print("Function \"%s\" not found. Exiting." % args.fn)
+ print("Function \"%s\" not found. Exiting." % {args.fn})
return
end
if args.verbose then
print("%-18s %-12s %-6s %-3s %s" %
- {"TIME(s)", "COMM", "PID", "CPU", "SYSCALL"})
+ {"TIME(s)", "COMM", "PID", "CPU", "FUNCTION"})
else
- print("%-18s %s" % {"TIME(s)", "SYSCALL"})
+ print("%-18s %s" % {"TIME(s)", "FUNCTION"})
end
local stack_traces = bpf:get_table("stack_traces")
- local pipe = bpf:pipe()
+ local start_ts = utils.posix.time_ns()
- while true do
- local task, pid, cpu, flags, ts, msg = pipe:trace_fields()
- local stack_id = string.match(msg, "stack_id=(%d+)")
+ local function print_event(cpu, event)
+ local ts = (utils.posix.time_ns() - start_ts) / 1e9
- if stack_id then
- if args.verbose then
- print("%-18.9f %-12.12s %-6d %-3d %s" % {ts, task, pid, cpu, args.fn})
- else
- print("%-18.9f %s" % {ts, args.fn})
- end
+ if args.verbose then
+ print("%-18.9f %-12.12s %-6d %-3d %s" %
+ {ts, ffi.string(event.comm), event.pid, cpu, args.fn})
+ else
+ print("%-18.9f %s" % {ts, args.fn})
+ end
- for addr in stack_traces:walk(tonumber(stack_id)) do
- local sym, offset = ksym:resolve(addr)
- if args.offset then
- print("\t%-16p %s+0x%x" % {addr, sym, tonumber(offset)})
- else
- print("\t%-16p %s" % {addr, sym})
- end
+ for addr in stack_traces:walk(tonumber(event.stack_id)) do
+ local sym, offset = ksym:resolve(addr)
+ if args.offset then
+ print("\t%-16p %s+0x%x" % {addr, sym, tonumber(offset)})
+ else
+ print("\t%-16p %s" % {addr, sym})
end
end
+
print()
end
+
+ local TASK_COMM_LEN = 16 -- linux/sched.h
+
+ bpf:get_table("events"):open_perf_buffer(print_event, [[
+ struct {
+ uint64_t stack_id;
+ uint32_t pid;
+ char comm[%d];
+ }
+ ]] % {TASK_COMM_LEN})
+ bpf:kprobe_poll_loop()
end
from __future__ import print_function
from bcc import BPF
import argparse
-import re
+import ctypes as ct
+import time
# arguments
examples = """examples:
# define BPF program
bpf_text = """
#include <uapi/linux/ptrace.h>
+#include <linux/sched.h>
+
+struct data_t {
+ u64 stack_id;
+ u32 pid;
+ char comm[TASK_COMM_LEN];
+};
BPF_STACK_TRACE(stack_traces, 128)
+BPF_PERF_OUTPUT(events);
void trace_stack(struct pt_regs *ctx) {
+ u32 pid = bpf_get_current_pid_tgid();
FILTER
- int stack_id = stack_traces.get_stackid(ctx, BPF_F_REUSE_STACKID);
- if (stack_id >= 0)
- bpf_trace_printk("stack_id=%d\\n", stack_id);
+ struct data_t data = {};
+ data.stack_id = stack_traces.get_stackid(ctx, BPF_F_REUSE_STACKID),
+ data.pid = pid;
+ bpf_get_current_comm(&data.comm, sizeof(data.comm));
+ events.perf_submit(ctx, &data, sizeof(data));
}
"""
if args.pid:
bpf_text = bpf_text.replace('FILTER',
- ('u32 pid; pid = bpf_get_current_pid_tgid(); ' +
- 'if (pid != %s) { return; }') % (args.pid))
+ 'if (pid != %s) { return; }' % args.pid)
else:
bpf_text = bpf_text.replace('FILTER', '')
if debug:
# initialize BPF
b = BPF(text=bpf_text)
b.attach_kprobe(event=function, fn_name="trace_stack")
+
+TASK_COMM_LEN = 16 # linux/sched.h
+
+class Data(ct.Structure):
+ _fields_ = [
+ ("stack_id", ct.c_ulonglong),
+ ("pid", ct.c_uint),
+ ("comm", ct.c_char * TASK_COMM_LEN),
+ ]
+
matched = b.num_open_kprobes()
if matched == 0:
print("Function \"%s\" not found. Exiting." % function)
exit()
stack_traces = b.get_table("stack_traces")
-msg_regexp = re.compile("stack_id=(\d+)")
+start_ts = time.time()
# header
if verbose:
- print("%-18s %-12s %-6s %-3s %s" % ("TIME(s)", "COMM", "PID", "CPU", "SYSCALL"))
+ print("%-18s %-12s %-6s %-3s %s" %
+ ("TIME(s)", "COMM", "PID", "CPU", "FUNCTION"))
else:
- print("%-18s %s" % ("TIME(s)", "SYSCALL"))
+ print("%-18s %s" % ("TIME(s)", "FUNCTION"))
+
+def print_event(cpu, data, size):
+ event = ct.cast(data, ct.POINTER(Data)).contents
+
+ ts = time.time() - start_ts
+
+ if verbose:
+ print("%-18.9f %-12.12s %-6d %-3d %s" % (ts, event.comm, event.pid, cpu,
+ function))
+ else:
+ print("%-18.9f %s" % (ts, function))
+
+ for addr in stack_traces.walk(event.stack_id):
+ sym = b.ksymaddr(addr) if offset else b.ksym(addr)
+ print("\t%016x %s" % (addr, sym))
+
+ print()
-# format output
+b["events"].open_perf_buffer(print_event)
while 1:
- (task, pid, cpu, flags, ts, msg) = b.trace_fields()
- m = msg_regexp.match(msg)
- if m:
- if verbose:
- print("%-18.9f %-12.12s %-6d %-3d %s" % (ts, task, pid, cpu, function))
- else:
- print("%-18.9f %s" % (ts, function))
-
- stack_id = int(m.group(1))
- for addr in stack_traces.walk(stack_id):
- sym = b.ksymaddr(addr) if offset else b.ksym(addr)
- print("\t%016x %s" % (addr, sym))
-
- print()
+ b.kprobe_poll()