From dc642c52317124022ed50b23b671ca13400fea65 Mon Sep 17 00:00:00 2001 From: Brendan Gregg Date: Tue, 9 Feb 2016 00:32:51 -0800 Subject: [PATCH] filelife --- README.md | 1 + man/man8/filelife.8 | 68 +++++++++++++++++++++++++++++ tools/filelife.py | 104 +++++++++++++++++++++++++++++++++++++++++++++ tools/filelife_example.txt | 52 +++++++++++++++++++++++ 4 files changed, 225 insertions(+) create mode 100644 man/man8/filelife.8 create mode 100755 tools/filelife.py create mode 100644 tools/filelife_example.txt diff --git a/README.md b/README.md index 3bf8599..b9eb6cb 100644 --- a/README.md +++ b/README.md @@ -72,6 +72,7 @@ Tools: - tools/[bitesize](tools/bitesize.py): Show per process I/O size histogram. [Examples](tools/bitesize_example.txt). - tools/[cachestat](tools/cachestat.py): Trace page cache hit/miss ratio. [Examples](tools/cachestat_example.txt). - tools/[execsnoop](tools/execsnoop.py): Trace new processes via exec() syscalls. [Examples](tools/execsnoop_example.txt). +- tools/[filelife](tools/filelife.py): Trace the lifespan of short-lived files. [Examples](tools/filelife_example.txt). - tools/[fileslower](tools/fileslower.py): Trace slow synchronous file reads and writes. [Examples](tools/fileslower_example.txt). - tools/[funccount](tools/funccount.py): Count kernel function calls. [Examples](tools/funccount_example.txt). - tools/[funclatency](tools/funclatency.py): Time kernel functions and show their latency distribution. [Examples](tools/funclatency_example.txt). diff --git a/man/man8/filelife.8 b/man/man8/filelife.8 new file mode 100644 index 0000000..b57e21a --- /dev/null +++ b/man/man8/filelife.8 @@ -0,0 +1,68 @@ +.TH filelife 8 "2016-02-08" "USER COMMANDS" +.SH NAME +filelife \- Trace the lifespan of short-lived files. Uses Linux eBPF/bcc. +.SH SYNOPSIS +.B filelife [\-h] [\-p PID] +.SH DESCRIPTION +This traces the creation and deletion of files, providing information +on who deleted the file, the file age, and the file name. The intent is to +provide information on short-lived files, for debugging or performance +analysis. + +This works by tracing the kernel vfs_create() and vfs_delete() functions using +dynamic tracing, and will need updating to match any changes to these +functions. + +Since this uses BPF, only the root user can use this tool. +.SH REQUIREMENTS +CONFIG_BPF and bcc. +.SH OPTIONS +.TP +\-h +Print usage message. +.TP +\-p PID +Trace this process ID only (filtered in-kernel). +.SH EXAMPLES +.TP +Trace all short-lived files, and print details: +# +.B filelife +.TP +Trace all short-lived files created AND deleted by PID 181: +# +.B filelife \-p 181 +.SH FIELDS +.TP +TIME +Time of the deletion. +.TP +PID +Process ID that deleted the file. +.TP +COMM +Process name for the PID. +.TP +AGE(s) +Age of the file, from creation to deletion, in seconds. +.TP +FILE +Filename. +.SH OVERHEAD +This traces the kernel VFS file create and delete functions and prints output +for each delete. As the rate of this is generally expected to be low +(< 1000/s), the overhead is also expected to be negligible. +This is from bcc. +.IP +https://github.com/iovisor/bcc +.PP +Also look in the bcc distribution for a companion _examples.txt file containing +example usage, output, and commentary for this tool. +.SH OS +Linux +.SH STABILITY +Unstable - in development. +.SH AUTHOR +Brendan Gregg +.SH SEE ALSO +opensnoop(1) diff --git a/tools/filelife.py b/tools/filelife.py new file mode 100755 index 0000000..9786fd7 --- /dev/null +++ b/tools/filelife.py @@ -0,0 +1,104 @@ +#!/usr/bin/python +# @lint-avoid-python-3-compatibility-imports +# +# filelife Trace the lifespan of short-lived files. +# For Linux, uses BCC, eBPF. Embedded C. +# +# This traces the creation and deletion of files, providing information +# on who deleted the file, the file age, and the file name. The intent is to +# provide information on short-lived files, for debugging or performance +# analysis. +# +# USAGE: filelife [-h] [-p PID] +# +# Copyright 2016 Netflix, Inc. +# Licensed under the Apache License, Version 2.0 (the "License") +# +# 08-Feb-2015 Brendan Gregg Created this. + +from __future__ import print_function +from bcc import BPF +import argparse +from time import strftime + +# arguments +examples = """examples: + ./filelife # trace all stat() syscalls + ./filelife -p 181 # only trace PID 181 +""" +parser = argparse.ArgumentParser( + description="Trace stat() syscalls", + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=examples) +parser.add_argument("-p", "--pid", + help="trace this PID only") +args = parser.parse_args() +debug = 0 + +# define BPF program +bpf_text = """ +#include +#include + +BPF_HASH(birth, struct dentry *); + +// trace file creation time +int trace_create(struct pt_regs *ctx, struct inode *dir, struct dentry *dentry) +{ + u32 pid = bpf_get_current_pid_tgid(); + FILTER + + u64 ts = bpf_ktime_get_ns(); + birth.update(&dentry, &ts); + + return 0; +}; + +// trace file deletion and output details +int trace_unlink(struct pt_regs *ctx, struct inode *dir, struct dentry *dentry) +{ + u32 pid = bpf_get_current_pid_tgid(); + FILTER + + u64 *tsp, delta; + tsp = birth.lookup(&dentry); + if (tsp == 0) { + return 0; // missed create + } + delta = (bpf_ktime_get_ns() - *tsp) / 1000000; + birth.delete(&dentry); + + if (dentry->d_iname[0] == 0) + return 0; + + bpf_trace_printk("%d %s\\n", delta, dentry->d_iname); + + return 0; +} +""" +if args.pid: + bpf_text = bpf_text.replace('FILTER', + 'if (pid != %s) { return 0; }' % args.pid) +else: + bpf_text = bpf_text.replace('FILTER', '') +if debug: + print(bpf_text) + +# initialize BPF +b = BPF(text=bpf_text) +b.attach_kprobe(event="vfs_create", fn_name="trace_create") +b.attach_kprobe(event="vfs_unlink", fn_name="trace_unlink") + +# header +print("%-8s %-6s %-16s %-7s %s" % ("TIME", "PID", "COMM", "AGE(s)", "FILE")) + +start_ts = 0 + +# format output +while 1: + (task, pid, cpu, flags, ts, msg) = b.trace_fields() + (delta, filename) = msg.split(" ") + + # print columns + print("%-8s %-6d %-16s %-7.2f %s" % (strftime("%H:%M:%S"), pid, task, + float(delta) / 1000, filename)) diff --git a/tools/filelife_example.txt b/tools/filelife_example.txt new file mode 100644 index 0000000..c3d6795 --- /dev/null +++ b/tools/filelife_example.txt @@ -0,0 +1,52 @@ +Demonstrations of filelife, the Linux eBPF/bcc version. + + +filelife traces short-lived files: those that have been created and then +deleted while tracing. For example: + +# ./filelife +TIME PID COMM AGE(s) FILE +05:57:59 8556 gcc 0.04 ccCB5EDe.s +05:57:59 8560 rm 0.02 .entry_64.o.d +05:57:59 8563 gcc 0.02 cc5UFHXf.s +05:57:59 8567 rm 0.01 .thunk_64.o.d +05:57:59 8578 rm 0.02 .syscall_64.o.d +05:58:00 8589 rm 0.03 .common.o.d +05:58:00 8596 rm 0.01 .8592.tmp +05:58:00 8601 rm 0.01 .8597.tmp +05:58:00 8606 rm 0.01 .8602.tmp +05:58:00 8639 rm 0.02 .vma.o.d +05:58:00 8650 rm 0.02 .vdso32-setup.o.d +05:58:00 8656 rm 0.00 .vdso.lds.d +05:58:00 8659 gcc 0.01 ccveeJAz.s +05:58:00 8663 rm 0.01 .vdso-note.o.d +05:58:00 8674 rm 0.02 .vclock_gettime.o.d +05:58:01 8684 rm 0.01 .vgetcpu.o.d +05:58:01 8690 collect2 0.00 ccvKMxdm.ld + +This has caught short-lived files that were created during a Linux kernel +build. The PID shows the process ID that finally deleted the file, and COMM +is its process name. The AGE(s) column shows the age of the file, in seconds, +when it was deleted. These are all short-lived, and existed for less than +one tenth of a second. + +Creating, populating, and then deleting files as part of another process can +be an inefficient method of inter-process communication. It can cause disk I/O +as files are closed and their file descriptors flushed, only later to be +deleted. As such, short-lived files can be a target of performance +optimizations. + +USAGE message: + +# ./filelife -h +usage: filelife [-h] [-p PID] + +Trace stat() syscalls + +optional arguments: + -h, --help show this help message and exit + -p PID, --pid PID trace this PID only + +examples: + ./filelife # trace all stat() syscalls + ./filelife -p 181 # only trace PID 181 -- 2.7.4