From 75dfd5a4ef69be75305b996e0237a9ac5523bfde Mon Sep 17 00:00:00 2001 From: chantra Date: Tue, 19 Jul 2016 00:17:45 +0200 Subject: [PATCH] [cachetop] add example and man page. make interval a positional parameter. --- man/man8/cachetop.8 | 76 ++++++++++++++++++++++++++++++++++++++++++++++ tools/cachetop.py | 9 ++++-- tools/cachetop_example.txt | 70 ++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 152 insertions(+), 3 deletions(-) create mode 100644 man/man8/cachetop.8 create mode 100644 tools/cachetop_example.txt diff --git a/man/man8/cachetop.8 b/man/man8/cachetop.8 new file mode 100644 index 0000000..6e6ee89 --- /dev/null +++ b/man/man8/cachetop.8 @@ -0,0 +1,76 @@ +.TH cachetop 8 "2016-01-30" "USER COMMANDS" +.SH NAME +cachetop \- Statistics for linux page cache hit/miss ratios per processes. Uses Linux eBPF/bcc. +.SH SYNOPSIS +.B cachetop +[interval] +.SH DESCRIPTION +This traces four kernel functions and prints per-processes summaries every +\fBinterval\fR seconds. This can be useful for processes workload characterization, +and looking for patterns in operation usage over time. + +This works by tracing kernel page cache functions using dynamic tracing, and will +need updating to match any changes to these functions. Edit the script to +customize which functions are traced. + +Since this uses BPF, only the root user can use this tool. +.SH REQUIREMENTS +CONFIG_BPF and bcc. +.SH EXAMPLES +.TP +Update summaries every five second: +# +.B cachetop +.TP +Print summaries each second: +# +.B cachetop 1 +.SH FIELDS +.TP +PID +Process ID of the process causing the cache activity. +.TP +UID +User ID of the process causing the cache activity. +.TP +HITS +Number of page cache hits. +.TP +MISSES +Number of page cache misses. +.TP +DIRTIES +Number of dirty pages added to the page cache. +.TP +READ_HIT% +Read hit percent of page cache usage. +.TP +WRITE_HIT% +Write hit percent of page cache usage. +.TP +BUFFERS_MB +Buffers size taken from /proc/meminfo. +.TP +CACHED_MB +Cached amount of data in current page cache taken from /proc/meminfo. +.SH OVERHEAD +This traces various kernel page cache functions and maintains in-kernel counts, which +are asynchronously copied to user-space. While the rate of operations can +be very high (>1G/sec) we can have up to 34% overhead, this is still a relatively efficient way to trace +these events, and so the overhead is expected to be small for normal workloads. +Measure in a test environment. +.SH SOURCE +This is from bcc. +.IP +https://github.com/iovisor/bcc +.PP +Also look in the bcc distribution for a companion _examples.txt file containing +example usage, output, and commentary for this tool. +.SH OS +Linux +.SH STABILITY +Unstable - in development. +.SH AUTHOR +Emmanuel Bretelle +.SH SEE ALSO +cachestat (8) diff --git a/tools/cachetop.py b/tools/cachetop.py index 428c3c2..b1ea9a6 100755 --- a/tools/cachetop.py +++ b/tools/cachetop.py @@ -225,7 +225,7 @@ def handle_loop(stdscr, args): for i, stat in enumerate(process_stats): stdscr.addstr( i + 2, 0, - "{0:8} {username:8} {2:16} {3:8} {4:8} " + "{0:8} {username:8.8} {2:16} {3:8} {4:8} " "{5:8} {6:9.1f}% {7:9.1f}%".format( *stat, username=pwd.getpwuid(int(stat[1]))[0] ) @@ -239,9 +239,12 @@ def handle_loop(stdscr, args): def parse_arguments(): - parser = argparse.ArgumentParser() + parser = argparse.ArgumentParser( + description='show Linux page cache hit/miss statistics including read ' + 'and write hit % per processes in a UI like top.' + ) parser.add_argument( - '--interval', '-i', type=int, default=5, nargs='?', + 'interval', type=int, default=5, nargs='?', help='Interval between probes.' ) diff --git a/tools/cachetop_example.txt b/tools/cachetop_example.txt new file mode 100644 index 0000000..95fd425 --- /dev/null +++ b/tools/cachetop_example.txt @@ -0,0 +1,70 @@ +# ./cachetop -h +usage: cachetop.py [-h] [interval] + +show Linux page cache hit/miss statistics including read and write hit % per +processes in a UI like top. + +positional arguments: + interval Interval between probes. + +optional arguments: + -h, --help show this help message and exit + +examples: + ./cachetop # run with default option of 5 seconds delay + ./cachetop 1 # print every second hit/miss stats + +# ./cachetop 5 +Buffers MB: 76 / Cached MB: 114 +PID UID CMD HITS MISSES DIRTIES READ_HIT% WRITE_HIT% + 1 root systemd 2 0 0 100.0% 0.0% + 680 root vminfo 3 4 2 14.3% 42.9% + 567 syslog rs:main Q:Reg 10 4 2 57.1% 21.4% + 986 root kworker/u2:2 10 2457 4 0.2% 99.5% + 988 root kworker/u2:2 10 9 4 31.6% 36.8% + 877 vagrant systemd 18 4 2 72.7% 13.6% + 983 root python 148 3 143 3.3% 1.3% + 981 root strace 419 3 143 65.4% 0.5% + 544 messageb dbus-daemon 455 371 454 0.1% 0.4% + 243 root jbd2/dm-0-8 457 371 454 0.4% 0.4% + 985 root (mount) 560 2457 4 18.4% 81.4% + 987 root systemd-udevd 566 9 4 97.7% 1.2% + 988 root systemd-cgroups 569 9 4 97.8% 1.2% + 986 root modprobe 578 9 4 97.8% 1.2% + 287 root systemd-journal 598 371 454 14.9% 0.3% + 985 root mount 692 2457 4 21.8% 78.0% + 984 vagrant find 9529 2457 4 79.5% 20.5% + +Above shows the run of `find /` on a newly booted system. + +Command used to generate the activity +# find / + +Below shows the hit rate increases as we run find a second time and it gets it +its pages from the cache. +# ./cachetop.py +Buffers MB: 76 / Cached MB: 115 +PID UID CMD HITS MISSES DIRTIES READ_HIT% WRITE_HIT% + 544 messageb dbus-daemon 2 2 1 25.0% 50.0% + 680 root vminfo 2 2 1 25.0% 50.0% + 243 root jbd2/dm-0-8 3 2 1 40.0% 40.0% + 1068 root python 5 0 0 100.0% 0.0% + 1071 vagrant bash 350 0 0 100.0% 0.0% + 1071 vagrant find 12959 0 0 100.0% 0.0% + + +Below shows that the dirty pages increases as a file of 80M is created running +# dd if=/dev/urandom of=/tmp/c bs=8192 count=10000 + +# ./cachetop.py 10 +Buffers MB: 77 / Cached MB: 193 +PID UID CMD HITS MISSES DIRTIES READ_HIT% WRITE_HIT% + 544 messageb dbus-daemon 9 10 7 10.5% 15.8% + 680 root vminfo 9 10 7 10.5% 15.8% + 1109 root python 22 0 0 100.0% 0.0% + 243 root jbd2/dm-0-8 25 10 7 51.4% 8.6% + 1070 root kworker/u2:2 85 0 0 100.0% 0.0% + 1110 vagrant bash 366 0 0 100.0% 0.0% + 1110 vagrant dd 42183 40000 20000 27.0% 24.3% + +The file copied into page cache was named /tmp/c with a size of 81920000 (81920000/4096) = 20000 -- 2.7.4