- tools/[bitesize](tools/bitesize.py): Show per process I/O size histogram. [Examples](tools/bitesize_example.txt).
- tools/[cachestat](tools/cachestat.py): Trace page cache hit/miss ratio. [Examples](tools/cachestat_example.txt).
- tools/[execsnoop](tools/execsnoop.py): Trace new processes via exec() syscalls. [Examples](tools/execsnoop_example.txt).
+- tools/[dcsnoop](tools/dcsnoop.py): Trace directory entry cache (dcache) lookups. [Examples](tools/dcsnoop_example.txt).
- tools/[dcstat](tools/dcstat.py): Directory entry cache (dcache) stats. [Examples](tools/dcstat_example.txt).
- tools/[filelife](tools/filelife.py): Trace the lifespan of short-lived files. [Examples](tools/filelife_example.txt).
- tools/[fileslower](tools/fileslower.py): Trace slow synchronous file reads and writes. [Examples](tools/fileslower_example.txt).
--- /dev/null
+.TH dcsnoop 8 "2016-02-10" "USER COMMANDS"
+.SH NAME
+dcsnoop \- Trace directory entry cache (dcache) lookups. Uses Linux eBPF/bcc.
+.SH SYNOPSIS
+.B dcsnoop [\-h] [\-a]
+.SH DESCRIPTION
+By default, this traces every failed dcache lookup (cache miss), and shows the
+process performing the lookup and the filename requested. A \-a option can be
+used to show all lookups, not just failed ones.
+
+The output of this tool can be verbose, and is intended for further
+investigations of dcache performance beyond dcstat(8), which prints
+per-second summaries.
+
+This uses kernel dynamic tracing of the d_lookup() function, and will need
+and will need updating to match any changes to this function.
+
+Since this uses BPF, only the root user can use this tool.
+.SH REQUIREMENTS
+CONFIG_BPF and bcc.
+.SH OPTIONS
+.TP
+\-h
+Print usage message.
+.TP
+\-a
+Trace references, not just failed lookups.
+.SH EXAMPLES
+.TP
+Trace failed dcache lookups:
+#
+.B dcsnoop
+.TP
+Trace all dcache lookups:
+#
+.B dcsnoop \-a
+.SH FIELDS
+.TP
+TIME(s)
+Time of lookup, in seconds.
+.TP
+PID
+Process ID.
+.TP
+COMM
+Process name.
+.TP
+T
+Type: R == reference (only visible with \-a), M == miss. A miss will print two
+lines, one for the reference, and one for the miss.
+.TP
+FILE
+The file name component that was being looked up. This contains trailing
+pathname components (after '/'), which will be the subject of subsequent
+lookups.
+.SH OVERHEAD
+File name lookups can be frequent (depending on the workload), and this tool
+prints a line for each failed lookup, and with \-a, each reference as well. The
+output may be verbose, and the incurred overhead, while optimized to some
+extent, may still be from noticeable to significant. This is only really
+intended for deeper investigations beyond dcstat(8), when absolutely necessary.
+Measure and quantify the overhead in a test environment before use.
+.SH SOURCE
+This is from bcc.
+.IP
+https://github.com/iovisor/bcc
+.PP
+Also look in the bcc distribution for a companion _examples.txt file containing
+example usage, output, and commentary for this tool.
+.SH OS
+Linux
+.SH STABILITY
+Unstable - in development.
+.SH AUTHOR
+Brendan Gregg
+.SH SEE ALSO
+dcstat(1)
--- /dev/null
+#!/usr/bin/python
+# @lint-avoid-python-3-compatibility-imports
+#
+# dcsnoop Trace directory entry cache (dcache) lookups.
+# For Linux, uses BCC, eBPF. Embedded C.
+#
+# USAGE: dcsnoop [-h] [-a]
+#
+# By default, this traces every failed dcache lookup, and shows the process
+# performing the lookup and the filename requested. A -a option can be used
+# to show all lookups, not just failed ones.
+#
+# This uses kernel dynamic tracing of the d_lookup() function, and will need
+# to be modified to match kernel changes.
+#
+# Also see dcstat(8), for per-second summaries.
+#
+# Copyright 2016 Netflix, Inc.
+# Licensed under the Apache License, Version 2.0 (the "License")
+#
+# 09-Feb-2016 Brendan Gregg Created this.
+
+from __future__ import print_function
+from bcc import BPF
+import argparse
+import re
+
+# arguments
+examples = """examples:
+ ./dcsnoop # trace failed dcache lookups
+ ./dcsnoop -a # trace all dcache lookups
+"""
+parser = argparse.ArgumentParser(
+ description="Trace directory entry cache (dcache) lookups",
+ formatter_class=argparse.RawDescriptionHelpFormatter,
+ epilog=examples)
+parser.add_argument("-a", "--all", action="store_true",
+ help="trace all lookups (default is fails only)")
+args = parser.parse_args()
+
+# define BPF program
+bpf_text = """
+#include <uapi/linux/ptrace.h>
+#include <linux/fs.h>
+#include <linux/sched.h>
+
+#define MAX_FILE_LEN 64
+
+struct entry_t {
+ char name[MAX_FILE_LEN];
+};
+
+BPF_HASH(entrybypid, u32, struct entry_t);
+
+/* from fs/namei.c: */
+struct nameidata {
+ struct path path;
+ struct qstr last;
+ // [...]
+};
+
+int trace_fast(struct pt_regs *ctx, struct nameidata *nd, struct path *path)
+{
+ bpf_trace_printk("R %s\\n", nd->last.name);
+ return 1;
+}
+
+int kprobe__d_lookup(struct pt_regs *ctx, const struct dentry *parent,
+ const struct qstr *name)
+{
+ u32 pid = bpf_get_current_pid_tgid();
+ struct entry_t entry = {};
+ if (name->name) {
+ bpf_probe_read(&entry.name, sizeof(entry.name), (void *)name->name);
+ }
+ entrybypid.update(&pid, &entry);
+ return 0;
+}
+
+int kretprobe__d_lookup(struct pt_regs *ctx)
+{
+ u32 pid = bpf_get_current_pid_tgid();
+ struct entry_t *ep;
+ ep = entrybypid.lookup(&pid);
+ if (ep == 0) {
+ return 0; // missed entry
+ }
+ if (ctx->ax == 0) {
+ bpf_trace_printk("M %s\\n", ep->name);
+ }
+ entrybypid.delete(&pid);
+ return 0;
+}
+"""
+
+# initialize BPF
+b = BPF(text=bpf_text)
+if args.all:
+ b.attach_kprobe(event="lookup_fast", fn_name="trace_fast")
+
+# header
+print("%-11s %-6s %-16s %1s %s" % ("TIME(s)", "PID", "COMM", "T", "FILE"))
+
+start_ts = 0
+
+# format output
+while 1:
+ (task, pid, cpu, flags, ts, msg) = b.trace_fields()
+ try:
+ (type, file) = msg.split(" ", 1)
+ except ValueError:
+ continue
+
+ if start_ts == 0:
+ start_ts = ts
+
+ print("%-11.6f %-6s %-16s %1s %s" % (ts - start_ts, pid, task, type, file))
--- /dev/null
+Demonstrations of dcsnoop, the Linux eBPF/bcc version.
+
+
+dcsnoop traces directory entry cache (dcache) lookups, and can be used for
+further investigation beyond dcstat(8). The output is likely verbose, as
+dcache lookups are likely frequent. By default, only failed lookups are shown.
+For example:
+
+# ./dcsnoop.py
+TIME(s) PID COMM T FILE
+0.002837 1643 snmpd M net/dev
+0.002852 1643 snmpd M 1643
+0.002856 1643 snmpd M net
+0.002863 1643 snmpd M dev
+0.002952 1643 snmpd M net/if_inet6
+0.002964 1643 snmpd M if_inet6
+0.003180 1643 snmpd M net/ipv4/neigh/eth0/retrans_time_ms
+0.003192 1643 snmpd M ipv4/neigh/eth0/retrans_time_ms
+0.003197 1643 snmpd M neigh/eth0/retrans_time_ms
+0.003203 1643 snmpd M eth0/retrans_time_ms
+0.003206 1643 snmpd M retrans_time_ms
+0.003245 1643 snmpd M ipv6/neigh/eth0/retrans_time_ms
+0.003249 1643 snmpd M neigh/eth0/retrans_time_ms
+0.003252 1643 snmpd M eth0/retrans_time_ms
+0.003255 1643 snmpd M retrans_time_ms
+0.003287 1643 snmpd M conf/eth0/forwarding
+0.003292 1643 snmpd M eth0/forwarding
+0.003295 1643 snmpd M forwarding
+0.003326 1643 snmpd M base_reachable_time_ms
+[...]
+
+I ran a drop caches at the same time as executing this tool. The output shows
+the processes, the type of event ("T" column: M == miss, R == reference),
+and the filename for the dcache lookup.
+
+The way the dcache is currently implemented, each component of a path is
+checked in turn. The first line, showing "net/dev" from snmp, will be a lookup
+for "net" in a directory (that isn't shown here). If it finds "net", it will
+then lookup "dev" inside net. You can see this sequence a little later,
+starting at time 0.003180, where a pathname is being searched
+directory by directory.
+
+
+The -a option will show all lookups, although be warned, the output will be
+very verbose. For example:
+
+# ./dcsnoop
+TIME(s) PID COMM T FILE
+0.000000 20279 dcsnoop.py M p_lookup_fast
+0.000010 20279 dcsnoop.py M enable
+0.000013 20279 dcsnoop.py M id
+0.000015 20279 dcsnoop.py M filter
+0.000017 20279 dcsnoop.py M trigger
+0.000019 20279 dcsnoop.py M format
+0.006148 20279 dcsnoop.py R sys/kernel/debug/tracing/trace_pipe
+0.006158 20279 dcsnoop.py R kernel/debug/tracing/trace_pipe
+0.006161 20279 dcsnoop.py R debug/tracing/trace_pipe
+0.006164 20279 dcsnoop.py R tracing/trace_pipe
+0.006166 20279 dcsnoop.py R trace_pipe
+0.015900 1643 snmpd R proc/sys/net/ipv6/conf/lo/forwarding
+0.015901 1643 snmpd R sys/net/ipv6/conf/lo/forwarding
+0.015901 1643 snmpd R net/ipv6/conf/lo/forwarding
+0.015902 1643 snmpd R ipv6/conf/lo/forwarding
+0.015903 1643 snmpd R conf/lo/forwarding
+0.015904 1643 snmpd R lo/forwarding
+0.015905 1643 snmpd M lo/forwarding
+0.015908 1643 snmpd R forwarding
+0.015909 1643 snmpd M forwarding
+0.015937 1643 snmpd R proc/sys/net/ipv6/neigh/lo/base_reachable_time_ms
+0.015937 1643 snmpd R sys/net/ipv6/neigh/lo/base_reachable_time_ms
+0.015938 1643 snmpd R net/ipv6/neigh/lo/base_reachable_time_ms
+0.015939 1643 snmpd R ipv6/neigh/lo/base_reachable_time_ms
+0.015940 1643 snmpd R neigh/lo/base_reachable_time_ms
+0.015941 1643 snmpd R lo/base_reachable_time_ms
+0.015941 1643 snmpd R base_reachable_time_ms
+0.015943 1643 snmpd M base_reachable_time_ms
+0.043569 1876 supervise M 20281
+0.043573 1886 supervise M 20280
+0.043582 1886 supervise R supervise/status.new
+[...]
+
+
+USAGE message:
+
+# ./dcsnoop.py -h
+usage: dcsnoop.py [-h] [-a]
+
+Trace directory entry cache (dcache) lookups
+
+optional arguments:
+ -h, --help show this help message and exit
+ -a, --all trace all lookups (default is fails only)
+
+examples:
+ ./dcsnoop # trace failed dcache lookups
+ ./dcsnoop -a # trace all dcache lookups