some bcc examples and tools

author Brendan Gregg <brendan.d.gregg@gmail.com>

Tue, 18 Aug 2015 21:56:14 +0000 (14:56 -0700)

committer Brendan Gregg <brendan.d.gregg@gmail.com>

Tue, 18 Aug 2015 21:56:14 +0000 (14:56 -0700)
author Brendan Gregg <brendan.d.gregg@gmail.com>
Tue, 18 Aug 2015 21:56:14 +0000 (14:56 -0700)
committer Brendan Gregg <brendan.d.gregg@gmail.com>
Tue, 18 Aug 2015 21:56:14 +0000 (14:56 -0700)
diff --git a/examples/bitehist.c b/examples/bitehist.c

new file mode 100644 (file)

index 0000000..e4e6772
--- /dev/null
+++ b/examples/bitehist.c
@@ -0,0 +1,48 @@
+/*
+ * bitehist.c  Block I/O size histogram.
+ *             For Linux, uses BCC, eBPF. See .py file.
+ *
+ * Based on eBPF sample tracex2 by Alexi Starovoitov.
+ * Copyright (c) 2013-2015 PLUMgrid, http://plumgrid.com
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * 15-Aug-2015 Brendan Gregg   Created this.
+ */
+
+#include <uapi/linux/ptrace.h>
+#include "../../linux-4.2-rc5/include/linux/blkdev.h"
+
+BPF_TABLE("array", int, u64, dist, 64);
+
+static unsigned int log2(unsigned int v)
+{
+       unsigned int r;
+       unsigned int shift;
+
+       r = (v > 0xFFFF) << 4; v >>= r;
+       shift = (v > 0xFF) << 3; v >>= shift; r |= shift;
+       shift = (v > 0xF) << 2; v >>= shift; r |= shift;
+       shift = (v > 0x3) << 1; v >>= shift; r |= shift;
+       r |= (v >> 1);
+       return r;
+}
+
+static unsigned int log2l(unsigned long v)
+{
+       unsigned int hi = v >> 32;
+       if (hi)
+               return log2(hi) + 32 + 1;
+       else
+               return log2(v) + 1;
+}
+
+int do_request(struct pt_regs *ctx, struct request *req)
+{
+       int index = log2l(req->__data_len / 1024);
+       u64 *leaf = dist.lookup(&index);
+       if (leaf) (*leaf)++;
+
+       return 0;
+}
diff --git a/examples/bitehist.py b/examples/bitehist.py

new file mode 100755 (executable)

index 0000000..ae2112b
--- /dev/null
+++ b/examples/bitehist.py
@@ -0,0 +1,109 @@
+#!/usr/bin/python
+#
+# bitehist.py  Block I/O size histogram.
+#              For Linux, uses BCC, eBPF. See .c file.
+#
+# Written as a basic example of using a histogram to show a distribution.
+#
+# USAGE: bitehist.py [interval [count]]
+#
+# The default interval is 5 seconds. A Ctrl-C will print the partially
+# gathered histogram then exit.
+#
+# Copyright (c) 2015 Brendan Gregg.
+# Licensed under the Apache License, Version 2.0 (the "License")
+#
+# 15-Aug-2015  Brendan Gregg   Created this.
+
+from bpf import BPF
+from ctypes import c_ushort, c_int, c_ulonglong
+from time import sleep
+from sys import argv
+
+def usage():
+       print("USAGE: %s [interval [count]]" % argv[0])
+       exit()
+
+# arguments
+interval = 5
+count = -1
+if len(argv) > 1:
+       try:
+               interval = int(argv[1])
+               if interval == 0:
+                       raise
+               if len(argv) > 2:
+                       count = int(argv[2])
+       except: # also catches -h, --help
+               usage()
+
+# load BPF program
+b = BPF(src_file = "bitehist.c")
+BPF.attach_kprobe(b.load_func("do_request", BPF.KPROBE), "blk_start_request")
+dist = b.get_table("dist", c_int, c_ulonglong)
+dist_max = 64
+
+# header
+print("Tracing... Hit Ctrl-C to end.")
+last = {}
+for i in range(1, dist_max + 1):
+       last[i] = 0
+
+# functions
+stars_max = 38
+def stars(val, val_max, width):
+       i = 0
+       text = ""
+       while (1):
+               if (i > (width * val / val_max) - 1) or (i > width - 1):
+                       break
+               text += "*"
+               i += 1
+       if val > val_max:
+               text = text[:-1] + "+"
+       return text
+
+def print_log2_hist(d, val_type):
+       idx_max = -1
+       val_max = 0
+       for i in range(1, dist_max + 1):
+               try:
+                       val = dist[c_int(i)].value - last[i]
+                       if (val > 0):
+                               idx_max = i
+                       if (val > val_max):
+                               val_max = val
+               except:
+                       break
+       if idx_max > 0:
+               print("     %-15s : count     distribution" % val_type);
+       for i in range(1, idx_max + 1):
+               low = (1 << i) >> 1
+               high = (1 << i) - 1
+               if (low == high):
+                       low -= 1
+               try:
+                       val = dist[c_int(i)].value - last[i]
+                       print("%8d -> %-8d : %-8d |%-*s|" % (low, high, val,
+                           stars_max, stars(val, val_max, stars_max)))
+                       last[i] = dist[c_int(i)].value
+               except:
+                       break
+
+# output
+loop = 0
+do_exit = 0
+while (1):
+       if count > 0:
+               loop += 1
+               if loop > count:
+                       exit()
+       try:
+               sleep(interval)
+       except KeyboardInterrupt:
+               pass; do_exit = 1
+
+       print
+       print_log2_hist(dist, "kbytes")
+       if do_exit:
+               exit()
diff --git a/examples/bitehist_example.txt b/examples/bitehist_example.txt

new file mode 100644 (file)

index 0000000..5af66e1
--- /dev/null
+++ b/examples/bitehist_example.txt
@@ -0,0 +1,70 @@
+Demonstrations of bitehist.py, the Linux eBPF/bcc version.
+
+This prints a power-of-2 histogram to show the block I/O size distribution.
+By default, a summary is printed every five seconds:
+
+# ./bitehist.py
+Tracing... Hit Ctrl-C to end.
+
+     kbytes          : count     distribution
+       0 -> 1        : 0        |                                      |
+       2 -> 3        : 0        |                                      |
+       4 -> 7        : 26       |*************                         |
+       8 -> 15       : 3        |*                                     |
+      16 -> 31       : 5        |**                                    |
+      32 -> 63       : 6        |***                                   |
+      64 -> 127      : 7        |***                                   |
+     128 -> 255      : 75       |**************************************|
+
+     kbytes          : count     distribution
+       0 -> 1        : 0        |                                      |
+       2 -> 3        : 0        |                                      |
+       4 -> 7        : 83       |**************************************|
+       8 -> 15       : 2        |                                      |
+      16 -> 31       : 6        |**                                    |
+      32 -> 63       : 6        |**                                    |
+      64 -> 127      : 5        |**                                    |
+     128 -> 255      : 21       |*********                             |
+^C
+     kbytes          : count     distribution
+       0 -> 1        : 0        |                                      |
+       2 -> 3        : 0        |                                      |
+       4 -> 7        : 8        |**************************************|
+
+The first output shows a bimodal distribution. The largest mode of 75 I/O were
+between 128 and 255 Kbytes in size, and another mode of 26 I/O were between 4
+and 7 Kbytes in size. 
+
+The next output summary shows the workload is doing more 4 - 7 Kbyte I/O.
+
+The final output is partial, showing what was measured until Ctrl-C was hit.
+
+
+For an output interval of one second, and three summaries:
+
+# ./bitehist.py 1 3
+Tracing... Hit Ctrl-C to end.
+
+     kbytes          : count     distribution
+       0 -> 1        : 0        |                                      |
+       2 -> 3        : 0        |                                      |
+       4 -> 7        : 4        |**************************************|
+
+     kbytes          : count     distribution
+       0 -> 1        : 0        |                                      |
+       2 -> 3        : 0        |                                      |
+       4 -> 7        : 5        |**************************************|
+       8 -> 15       : 0        |                                      |
+      16 -> 31       : 0        |                                      |
+      32 -> 63       : 1        |*******                               |
+
+     kbytes          : count     distribution
+       0 -> 1        : 0        |                                      |
+       2 -> 3        : 0        |                                      |
+       4 -> 7        : 4        |**************************************|
+
+
+Full usage:
+
+# ./bitehist.py -h
+USAGE: ./bitehist.py [interval [count]]
diff --git a/examples/disksnoop.c b/examples/disksnoop.c

new file mode 100644 (file)

index 0000000..cd24583
--- /dev/null
+++ b/examples/disksnoop.c
@@ -0,0 +1,48 @@
+/*
+ * disksnoop.c Trace block device I/O: basic version of iosnoop.
+ *             For Linux, uses BCC, eBPF. See .py file.
+ *
+ * Copyright (c) 2015 Brendan Gregg.
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * 11-Aug-2015 Brendan Gregg   Created this.
+ */
+
+#include <uapi/linux/ptrace.h>
+#include "../../linux-4.2-rc5/include/linux/blkdev.h"
+
+struct key_t {
+       struct request *req;
+};
+BPF_TABLE("hash", struct key_t, u64, start, 10240);
+
+int do_request(struct pt_regs *ctx, struct request *req) {
+       struct key_t key = {};
+       u64 ts;
+
+       // stash start timestamp by request ptr
+       ts = bpf_ktime_get_ns();
+       key.req = req;
+       start.update(&key, &ts);
+
+       return 0;
+}
+
+int do_completion(struct pt_regs *ctx, struct request *req) {
+       struct key_t key = {};
+       u64 *tsp, delta;
+
+       key.req = req;
+       tsp = start.lookup(&key);
+
+       if (tsp != 0) {
+               delta = bpf_ktime_get_ns() - *tsp;
+               bpf_trace_printk("%d %x %d\n", req->__data_len,
+                   req->cmd_flags, delta / 1000);
+               start.delete(&key);
+       }
+
+       return 0;
+}
diff --git a/examples/disksnoop.py b/examples/disksnoop.py

new file mode 100755 (executable)

index 0000000..11e5ab6
--- /dev/null
+++ b/examples/disksnoop.py
@@ -0,0 +1,52 @@
+#!/usr/bin/python
+#
+# disksnoop.py Trace block device I/O: basic version of iosnoop.
+#              For Linux, uses BCC, eBPF. See .c file.
+#
+# Written as a basic example of tracing latency.
+#
+# Copyright (c) 2015 Brendan Gregg.
+# Licensed under the Apache License, Version 2.0 (the "License")
+#
+# 11-Aug-2015  Brendan Gregg   Created this.
+
+from bpf import BPF
+import sys
+
+REQ_WRITE = 1          # from include/linux/blk_types.h
+
+# load BPF program
+b = BPF(src_file="disksnoop.c")
+BPF.attach_kprobe(b.load_func("do_request", BPF.KPROBE), "blk_start_request")
+BPF.attach_kprobe(b.load_func("do_completion", BPF.KPROBE), "blk_update_request")
+
+# header
+print "%-18s %-2s %-7s %8s" % ("TIME(s)", "T", "BYTES", "LAT(ms)")
+
+# open trace pipe
+try:
+       trace = open("/sys/kernel/debug/tracing/trace_pipe", "r")
+except:
+       print >> sys.stderr, "ERROR: opening trace_pipe"
+       exit(1)
+
+# format output
+while 1:
+       try:
+               line = trace.readline().rstrip()
+       except KeyboardInterrupt:
+               pass; exit()
+       prolog, time_s, colon, bytes_s, flags_s, us_s = \
+               line.rsplit(" ", 5)
+
+       time_s = time_s[:-1]    # strip trailing ":"
+       flags = int(flags_s, 16)
+       if flags & REQ_WRITE:
+               type_s = "W"
+       elif bytes_s == "0":    # see blk_fill_rwbs() for logic
+               type_s = "M"
+       else:
+               type_s = "R"
+       ms = float(int(us_s, 10)) / 1000
+
+       print "%-18s %-2s %-7s %8.2f" % (time_s, type_s, bytes_s, ms)
diff --git a/examples/disksnoop_example.txt b/examples/disksnoop_example.txt

new file mode 100644 (file)

index 0000000..8352912
--- /dev/null
+++ b/examples/disksnoop_example.txt
@@ -0,0 +1,40 @@
+Demonstrations of disksnoop.py, the Linux eBPF/bcc version.
+
+
+This traces block I/O, a prints a line to summarize each I/O completed:
+
+# ./disksnoop.py 
+TIME(s)            T  BYTES    LAT(ms)
+16458043.435457    W  4096        2.73
+16458043.435981    W  4096        3.24
+16458043.436012    W  4096        3.13
+16458043.437326    W  4096        4.44
+16458044.126545    R  4096       42.82
+16458044.129872    R  4096        3.24
+16458044.130705    R  4096        0.73
+16458044.142813    R  4096       12.01
+16458044.147302    R  4096        4.33
+16458044.148117    R  4096        0.71
+16458044.148950    R  4096        0.70
+16458044.164332    R  4096       15.29
+16458044.168003    R  4096        3.58
+16458044.171676    R  4096        3.59
+16458044.172453    R  4096        0.72
+16458044.173213    R  4096        0.71
+16458044.173989    R  4096        0.72
+16458044.174739    R  4096        0.70
+16458044.190334    R  4096       15.52
+16458044.196608    R  4096        6.17
+16458044.203091    R  4096        6.35
+
+The output includes a basic timestamp (in seconds), the type of I/O (W == write,
+R == read, M == metadata), the size of the I/O in bytes, and the latency (or
+duration) of the I/O in milliseconds.
+
+The latency is measured from I/O request to the device, to the device
+completion. This excludes latency spent queued in the OS.
+
+Most of the I/O in this example were 0.7 and 4 milliseconds in duration. There
+was an outlier of 42.82 milliseconds, a read which followed many writes (the
+high latency may have been caused by the writes still being serviced on the
+storage device).
diff --git a/examples/vfsreadlat.c b/examples/vfsreadlat.c

new file mode 100644 (file)

index 0000000..2d3141c
--- /dev/null
+++ b/examples/vfsreadlat.c
@@ -0,0 +1,73 @@
+/*
+ * vfsreadlat.c                VFS read latency distribution.
+ *                     For Linux, uses BCC, eBPF. See .py file.
+ *
+ * Based on eBPF sample tracex2 by Alexi Starovoitov.
+ * Copyright (c) 2013-2015 PLUMgrid, http://plumgrid.com
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * 15-Aug-2015 Brendan Gregg   Created this.
+ */
+
+#include <uapi/linux/ptrace.h>
+
+struct key_t {
+       u32 pid;
+};
+
+BPF_TABLE("hash", struct key_t, u64, start, 10240);
+BPF_TABLE("array", int, u64, dist, 64);
+
+static unsigned int log2(unsigned int v)
+{
+       unsigned int r;
+       unsigned int shift;
+
+       r = (v > 0xFFFF) << 4; v >>= r;
+       shift = (v > 0xFF) << 3; v >>= shift; r |= shift;
+       shift = (v > 0xF) << 2; v >>= shift; r |= shift;
+       shift = (v > 0x3) << 1; v >>= shift; r |= shift;
+       r |= (v >> 1);
+       return r;
+}
+
+static unsigned int log2l(unsigned long v)
+{
+       unsigned int hi = v >> 32;
+       if (hi)
+               return log2(hi) + 32 + 1;
+       else
+               return log2(v) + 1;
+}
+
+int do_entry(struct pt_regs *ctx)
+{
+       struct key_t key = {};
+       u64 ts, *val, zero = 0;
+
+       key.pid = bpf_get_current_pid_tgid();
+       ts = bpf_ktime_get_ns();
+       start.update(&key, &ts);
+       return 0;
+}
+
+int do_return(struct pt_regs *ctx)
+{
+       struct key_t key = {};
+       u64 *tsp, delta;
+
+       key.pid = bpf_get_current_pid_tgid();
+       tsp = start.lookup(&key);
+
+       if (tsp != 0) {
+               delta = bpf_ktime_get_ns() - *tsp;
+               int index = log2l(delta / 1000);
+               u64 *leaf = dist.lookup(&index);
+               if (leaf) (*leaf)++;
+               start.delete(&key);
+       }
+
+       return 0;
+}
diff --git a/examples/vfsreadlat.py b/examples/vfsreadlat.py

new file mode 100755 (executable)

index 0000000..798ccf1
--- /dev/null
+++ b/examples/vfsreadlat.py
@@ -0,0 +1,110 @@
+#!/usr/bin/python
+#
+# vfsreadlat.py                VFS read latency distribution.
+#                      For Linux, uses BCC, eBPF. See .c file.
+#
+# Written as a basic example of a function latency distribution histogram.
+#
+# USAGE: vfsreadlat.py [interval [count]]
+#
+# The default interval is 5 seconds. A Ctrl-C will print the partially
+# gathered histogram then exit.
+#
+# Copyright (c) 2015 Brendan Gregg.
+# Licensed under the Apache License, Version 2.0 (the "License")
+#
+# 15-Aug-2015  Brendan Gregg   Created this.
+
+from bpf import BPF
+from ctypes import c_ushort, c_int, c_ulonglong
+from time import sleep
+from sys import argv
+
+def usage():
+       print("USAGE: %s [interval [count]]" % argv[0])
+       exit()
+
+# arguments
+interval = 5
+count = -1
+if len(argv) > 1:
+       try:
+               interval = int(argv[1])
+               if interval == 0:
+                       raise
+               if len(argv) > 2:
+                       count = int(argv[2])
+       except: # also catches -h, --help
+               usage()
+
+# load BPF program
+b = BPF(src_file = "vfsreadlat.c")
+BPF.attach_kprobe(b.load_func("do_entry", BPF.KPROBE), "vfs_read")
+BPF.attach_kretprobe(b.load_func("do_return", BPF.KPROBE), "vfs_read")
+dist = b.get_table("dist", c_int, c_ulonglong)
+dist_max = 64
+
+# header
+print("Tracing... Hit Ctrl-C to end.")
+last = {}
+for i in range(1, dist_max + 1):
+       last[i] = 0
+
+# functions
+stars_max = 38
+def stars(val, val_max, width):
+       i = 0
+       text = ""
+       while (1):
+               if (i > (width * val / val_max) - 1) or (i > width - 1):
+                       break
+               text += "*"
+               i += 1
+       if val > val_max:
+               text = text[:-1] + "+"
+       return text
+
+def print_log2_hist(d, val_type):
+       idx_max = -1
+       val_max = 0
+       for i in range(1, dist_max + 1):
+               try:
+                       val = dist[c_int(i)].value - last[i]
+                       if (val > 0):
+                               idx_max = i
+                       if (val > val_max):
+                               val_max = val
+               except:
+                       break
+       if idx_max > 0:
+               print("     %-15s : count     distribution" % val_type);
+       for i in range(1, idx_max + 1):
+               low = (1 << i) >> 1
+               high = (1 << i) - 1
+               if (low == high):
+                       low -= 1
+               try:
+                       val = dist[c_int(i)].value - last[i]
+                       print("%8d -> %-8d : %-8d |%-*s|" % (low, high, val,
+                           stars_max, stars(val, val_max, stars_max)))
+                       last[i] = dist[c_int(i)].value
+               except:
+                       break
+
+# output
+loop = 0
+do_exit = 0
+while (1):
+       if count > 0:
+               loop += 1
+               if loop > count:
+                       exit()
+       try:
+               sleep(interval)
+       except KeyboardInterrupt:
+               pass; do_exit = 1
+
+       print
+       print_log2_hist(dist, "usecs")
+       if do_exit:
+               exit()
diff --git a/examples/vfsreadlat_example.txt b/examples/vfsreadlat_example.txt

new file mode 100644 (file)

index 0000000..1d95f6a
--- /dev/null
+++ b/examples/vfsreadlat_example.txt
@@ -0,0 +1,63 @@
+Demonstrations of vfsreadlat.py, the Linux eBPF/bcc version.
+
+
+This example traces the latency of vfs_read (time from call to return), printing 
+it as a histogram distribution. By default, output is every five seconds:
+
+# ./vfsreadlat.py 
+Tracing... Hit Ctrl-C to end.
+
+     usecs           : count     distribution
+       0 -> 1        : 4457     |*************************************+|
+       2 -> 3        : 447      |***                                   |
+       4 -> 7        : 2059     |*****************                     |
+       8 -> 15       : 1179     |**********                            |
+      16 -> 31       : 63       |                                      |
+      32 -> 63       : 0        |                                      |
+      64 -> 127      : 2        |                                      |
+     128 -> 255      : 0        |                                      |
+     256 -> 511      : 3        |                                      |
+     512 -> 1023     : 1        |                                      |
+    1024 -> 2047     : 3        |                                      |
+    2048 -> 4095     : 2        |                                      |
+    4096 -> 8191     : 0        |                                      |
+    8192 -> 16383    : 0        |                                      |
+   16384 -> 32767    : 0        |                                      |
+   32768 -> 65535    : 0        |                                      |
+   65536 -> 131071   : 4        |                                      |
+  131072 -> 262143   : 2        |                                      |
+  262144 -> 524287   : 0        |                                      |
+  524288 -> 1048575  : 4        |                                      |
+^C
+     usecs           : count     distribution
+       0 -> 1        : 241      |*************************************+|
+       2 -> 3        : 17       |**                                    |
+       4 -> 7        : 2        |                                      |
+       8 -> 15       : 4        |                                      |
+      16 -> 31       : 2        |                                      |
+      32 -> 63       : 0        |                                      |
+      64 -> 127      : 1        |                                      |
+     128 -> 255      : 0        |                                      |
+     256 -> 511      : 1        |                                      |
+     512 -> 1023     : 1        |                                      |
+    1024 -> 2047     : 0        |                                      |
+    2048 -> 4095     : 1        |                                      |
+    4096 -> 8191     : 0        |                                      |
+    8192 -> 16383    : 0        |                                      |
+   16384 -> 32767    : 0        |                                      |
+   32768 -> 65535    : 0        |                                      |
+   65536 -> 131071   : 0        |                                      |
+  131072 -> 262143   : 0        |                                      |
+  262144 -> 524287   : 0        |                                      |
+  524288 -> 1048575  : 1        |                                      |
+
+These examples show outliers in the 524 - 1048 milliseconds range. Since
+vfs_read() will catch many types of events, this could be anything including
+keystroke latency on ssh sessions. Further drilling with bcc will be necessary
+to identify more details.
+
+
+Full usage:
+
+# ./vfsreadlat.py -h
+USAGE: ./vfsreadlat.py [interval [count]]
diff --git a/man/man8/pidpersec.8 b/man/man8/pidpersec.8

new file mode 100644 (file)

index 0000000..2164ffa
--- /dev/null
+++ b/man/man8/pidpersec.8
@@ -0,0 +1,41 @@
+.TH pidpersec 8  "2015-08-18" "USER COMMANDS"
+.SH NAME
+pidpersec \- Count new processes (via fork()). Uses Linux eBPF/bcc.
+.SH SYNOPSIS
+.B pidpersec
+.SH DESCRIPTION
+pidpersec shows how many new processes were created each second. There
+can be performance issues caused by many short-lived processes, which may not
+be visible in sampling tools like top(1). pidpersec provides one way to
+investigate this behavior.
+
+This works by tracing the kernel sched_fork() function using dynamic tracing,
+and will need updating to match any changes to this function.
+
+Since this uses BPF, only the root user can use this tool.
+.SH REQUIREMENTS
+CONFIG_BPF and bcc.
+.SH EXAMPLES
+.TP
+Count new processes created each second:
+#
+.B pidpersec
+.SH OVERHEAD
+This traces the kernel fork function, and maintains an in-kernel count which is
+read asynchronously from user-space. As the rate of this is generally expected to
+be low (<< 1000/s), the overhead is also expected to be negligible.
+.SH SOURCE
+This is from bcc.
+.IP
+https://github.com/iovisor/bcc
+.PP
+Also look in the bcc distribution for a companion _examples.txt file containing
+example usage, output, and commentary for this tool.
+.SH OS
+Linux
+.SH STABILITY
+Unstable - in development.
+.SH AUTHOR
+Brendan Gregg
+.SH SEE ALSO
+top(1)
diff --git a/man/man8/syncsnoop.8 b/man/man8/syncsnoop.8

new file mode 100644 (file)

index 0000000..eb641d6
--- /dev/null
+++ b/man/man8/syncsnoop.8
@@ -0,0 +1,49 @@
+.TH syncsnoop 8  "2015-08-18" "USER COMMANDS"
+.SH NAME
+syncsnoop \- Trace sync() syscall. Uses Linux eBPF/bcc.
+.SH SYNOPSIS
+.B syncsnoop
+.SH DESCRIPTION
+syncsnoop traces calls to sync(), which flushes file system buffers to
+storage devices. These calls can cause performance perturbations, and it can
+be useful to know if they are happening and how frequently.
+
+This works by tracing the kernel sys_sync() function using dynamic tracing, and
+will need updating to match any changes to this function.
+
+This program is also a basic example of eBPF/bcc.
+
+Since this uses BPF, only the root user can use this tool.
+.SH REQUIREMENTS
+CONFIG_BPF and bcc.
+.SH EXAMPLES
+.TP
+Trace calls to sync():
+#
+.B syncsnoop
+.SH FIELDS
+.TP
+TIME(s)
+Time of the call, in seconds.
+.TP
+CALL
+Call traced.
+.SH OVERHEAD
+This traces the kernel sync function and prints output for each event. As the
+rate of this is generally expected to be low (<< 100/s), the overhead is also
+expected to be negligible.
+.SH SOURCE
+This is from bcc.
+.IP
+https://github.com/iovisor/bcc
+.PP
+Also look in the bcc distribution for a companion _examples.txt file containing
+example usage, output, and commentary for this tool.
+.SH OS
+Linux
+.SH STABILITY
+Unstable - in development.
+.SH AUTHOR
+Brendan Gregg
+.SH SEE ALSO
+iostat(1)
diff --git a/man/man8/vfscount.8 b/man/man8/vfscount.8

new file mode 100644 (file)

index 0000000..af5459b
--- /dev/null
+++ b/man/man8/vfscount.8
@@ -0,0 +1,52 @@
+.TH vfscount 8  "2015-08-18" "USER COMMANDS"
+.SH NAME
+vfscount \- Count some common VFS calls. Uses Linux eBPF/bcc.
+.SH SYNOPSIS
+.B vfscount
+.SH DESCRIPTION
+This counts common VFS calls. This can be useful for general workload
+characterization of these operations.
+
+This works by tracing some kernel vfs functions using dynamic tracing, and will
+need updating to match any changes to these functions. Edit the script to
+customize and add functions to trace, which is easy to do.
+
+Since this uses BPF, only the root user can use this tool.
+.SH REQUIREMENTS
+CONFIG_BPF and bcc.
+.SH EXAMPLES
+.TP
+Count some VFS calls until Ctrl-C is hit:
+#
+.B vfscount
+.SH FIELDS
+.TP
+ADDR
+Address of the instruction pointer that was traced (only useful if the FUNC column is suspicious and you would like to double check the translation).
+.TP
+FUNC
+Kernel function name
+.TP
+COUNT
+Number of calls while tracing
+.SH OVERHEAD
+This traces various kernel vfs functions and maintains in-kernel counts, which
+are asynchronously copied to user-space. While the rate of VFS operations can
+be very high (>1M/sec), this is a relatively efficient way to trace these
+events, and so the overhead is expected to be small for normal workloads.
+Measure in a test environment.
+.SH SOURCE
+This is from bcc.
+.IP
+https://github.com/iovisor/bcc
+.PP
+Also look in the bcc distribution for a companion _examples.txt file containing
+example usage, output, and commentary for this tool.
+.SH OS
+Linux
+.SH STABILITY
+Unstable - in development.
+.SH AUTHOR
+Brendan Gregg
+.SH SEE ALSO
+vfsstat(8)
diff --git a/man/man8/vfsstat.8 b/man/man8/vfsstat.8

new file mode 100644 (file)

index 0000000..929e6b6
--- /dev/null
+++ b/man/man8/vfsstat.8
@@ -0,0 +1,65 @@
+.TH vfsstat 8  "2015-08-18" "USER COMMANDS"
+.SH NAME
+vfsstat \- Statistics for some common VFS calls. Uses Linux eBPF/bcc.
+.SH SYNOPSIS
+.B vfsstat
+[interval [count]]
+.SH DESCRIPTION
+This traces some common VFS calls and prints per-second summaries. This can
+be useful for general workload characterization, and looking for patterns
+in operation usage over time.
+
+This works by tracing some kernel vfs functions using dynamic tracing, and will
+need updating to match any changes to these functions. Edit the script to
+customize which functions are traced. Also see vfscount, which is more
+easily customized to trace multiple functions.
+
+Since this uses BPF, only the root user can use this tool.
+.SH REQUIREMENTS
+CONFIG_BPF and bcc.
+.SH EXAMPLES
+.TP
+Print summaries each second:
+#
+.B vfsstat
+.TP
+Print output every five seconds, three times:
+#
+.B vfsstat 5 3
+.SH FIELDS
+.TP
+READ/s
+Number of vfs_read() calls as a per-second average.
+.TP
+WRITE/s
+Number of vfs_write() calls as a per-second average.
+.TP
+CREATE/s
+Number of vfs_create() calls as a per-second average.
+.TP
+OPEN/s
+Number of vfs_open() calls as a per-second average.
+.TP
+FSYNC/s
+Number of vfs_fsync() calls as a per-second average.
+.SH OVERHEAD
+This traces various kernel vfs functions and maintains in-kernel counts, which
+are asynchronously copied to user-space. While the rate of VFS operations can
+be very high (>1M/sec), this is a relatively efficient way to trace these
+events, and so the overhead is expected to be small for normal workloads.
+Measure in a test environment.
+.SH SOURCE
+This is from bcc.
+.IP
+https://github.com/iovisor/bcc
+.PP
+Also look in the bcc distribution for a companion _examples.txt file containing
+example usage, output, and commentary for this tool.
+.SH OS
+Linux
+.SH STABILITY
+Unstable - in development.
+.SH AUTHOR
+Brendan Gregg
+.SH SEE ALSO
+vfscount(8)
diff --git a/tools/pidpersec b/tools/pidpersec

new file mode 100755 (executable)

index 0000000..b84fbe6
--- /dev/null
+++ b/tools/pidpersec
@@ -0,0 +1,40 @@
+#!/usr/bin/python
+#
+# pidpersec    Count new processes (via fork).
+#              For Linux, uses BCC, eBPF. See .c file.
+#
+# USAGE: pidpersec
+#
+# Written as a basic example of counting an event.
+#
+# Copyright (c) 2015 Brendan Gregg.
+# Licensed under the Apache License, Version 2.0 (the "License")
+#
+# 11-Aug-2015  Brendan Gregg   Created this.
+
+from bpf import BPF
+from ctypes import c_ushort, c_int, c_ulonglong
+from time import sleep, strftime
+
+# load BPF program
+b = BPF(src_file = "pidpersec.c")
+BPF.attach_kprobe(b.load_func("do_count", BPF.KPROBE), "sched_fork")
+stats = b.get_table("stats", c_int, c_ulonglong)
+
+# stat indexes
+S_COUNT = 1
+
+# header
+print "Tracing... Ctrl-C to end."
+
+# output
+last = 0
+while (1):
+       try:
+               sleep(1)
+       except KeyboardInterrupt:
+               pass; exit()
+
+       print "%s: PIDs/sec: %d" % (strftime("%H:%M:%S"),
+           (stats[c_int(S_COUNT)].value - last))
+       last = stats[c_int(S_COUNT)].value
diff --git a/tools/pidpersec.c b/tools/pidpersec.c

new file mode 100644 (file)

index 0000000..512d6ca
--- /dev/null
+++ b/tools/pidpersec.c
@@ -0,0 +1,29 @@
+/*
+ * pidpersec.c Count new processes (via fork).
+ *             For Linux, uses BCC, eBPF. See the Python front-end.
+ *
+ * USAGE: pidpersec.py
+ *
+ * Copyright (c) 2015 Brendan Gregg.
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * 11-Aug-2015 Brendan Gregg   Created this.
+ */
+
+#include <uapi/linux/ptrace.h>
+
+enum stat_types {
+       S_COUNT = 1,
+       S_MAXSTAT
+};
+
+BPF_TABLE("array", int, u64, stats, S_MAXSTAT + 1);
+
+void stats_increment(int key) {
+       u64 *leaf = stats.lookup(&key);
+       if (leaf) (*leaf)++;
+}
+
+void do_count(struct pt_regs *ctx) { stats_increment(S_COUNT); }
diff --git a/tools/pidpersec_example.txt b/tools/pidpersec_example.txt

new file mode 100644 (file)

index 0000000..b274dd4
--- /dev/null
+++ b/tools/pidpersec_example.txt
@@ -0,0 +1,22 @@
+Demonstrations of pidpersec, the Linux eBPF/bcc version.
+
+
+This shows the number of new processes created per second, measured by tracing
+the kernel fork() routine:
+
+# ./pidpersec
+Tracing... Ctrl-C to end.
+18:33:06: PIDs/sec: 4
+18:33:07: PIDs/sec: 5
+18:33:08: PIDs/sec: 4
+18:33:09: PIDs/sec: 4
+18:33:10: PIDs/sec: 21
+18:33:11: PIDs/sec: 5
+18:33:12: PIDs/sec: 4
+18:33:13: PIDs/sec: 4
+
+Each second there are four new processes (this happens to be caused by a
+launcher script that is retrying in a loop, and encountering errors).
+
+At 18:33:10, I typed "man ls" in another server session, which caused an
+increase in the number of new processes as the necessary commands were run.
diff --git a/tools/syncsnoop b/tools/syncsnoop

new file mode 100755 (executable)

index 0000000..bc2447d
--- /dev/null
+++ b/tools/syncsnoop
@@ -0,0 +1,46 @@
+#!/usr/bin/python
+#
+# syncsnoop    Trace sync() syscall.
+#              For Linux, uses BCC, eBPF. Embedded C.
+#
+# Written as a basic example of BCC trace & reformat. See
+# examples/hello_world.py for a BCC trace with default output example.
+#
+# Copyright (c) 2015 Brendan Gregg.
+# Licensed under the Apache License, Version 2.0 (the "License")
+#
+# 13-Aug-2015  Brendan Gregg   Created this.
+
+from bpf import BPF
+import sys
+
+# load BPF program
+b = BPF(text = """
+int do_sync(void *ctx) {
+       bpf_trace_printk("sync()\\n");
+       return 0;
+};
+""")
+BPF.attach_kprobe(b.load_func("do_sync", BPF.KPROBE), "sys_sync")
+
+# header
+print "%-18s %s" % ("TIME(s)", "CALL")
+
+# open trace pipe
+try:
+       trace = open("/sys/kernel/debug/tracing/trace_pipe", "r")
+except:
+       print >> sys.stderr, "ERROR: opening trace_pipe"
+       exit(1)
+
+# format output
+while 1:
+       try:
+               line = trace.readline().rstrip()
+       except KeyboardInterrupt:
+               pass; exit()
+
+       prolog, time_s, colon, word = line.rsplit(" ", 3)
+       time_s = time_s[:-1]    # strip trailing ":"
+
+       print "%-18s %s" % (time_s, word)
diff --git a/tools/syncsnoop_example.txt b/tools/syncsnoop_example.txt

new file mode 100644 (file)

index 0000000..56cc21e
--- /dev/null
+++ b/tools/syncsnoop_example.txt
@@ -0,0 +1,14 @@
+Demonstrations of syncsnoop, the Linux eBPF/bcc version.
+
+
+This program traces calls to the kernel sync() routine, with basic timestamps:
+
+# ./syncsnoop 
+TIME(s)            CALL
+16458148.611952    sync()
+16458151.533709    sync()
+^C
+
+While tracing, the "sync" command was executed in another server session.
+
+This can be useful to identify that sync() is being called, and its frequency.
diff --git a/tools/vfscount b/tools/vfscount

new file mode 100755 (executable)

index 0000000..60adcb3
--- /dev/null
+++ b/tools/vfscount
@@ -0,0 +1,73 @@
+#!/usr/bin/python
+#
+# vfscount     Count some VFS calls.
+#              For Linux, uses BCC, eBPF. See .c file.
+#
+# Written as a basic example of counting functions.
+#
+# Copyright (c) 2015 Brendan Gregg.
+# Licensed under the Apache License, Version 2.0 (the "License")
+#
+# 14-Aug-2015  Brendan Gregg   Created this.
+
+from bpf import BPF
+from ctypes import c_ushort, c_int, c_ulonglong
+from time import sleep, strftime
+from sys import stderr
+
+# kernel symbol translation
+ksym_addrs = []                        # addresses for binary search
+ksym_names = []                        # same index as ksym_addrs
+def load_kallsyms():
+       symfile = "/proc/kallsyms"
+       try:
+               syms = open(symfile, "r")
+       except:
+               print >> stderr, "ERROR: reading " + symfile
+               exit()
+       line = syms.readline()
+       for line in iter(syms):
+               cols = line.split()
+               name = cols[2]
+               if name[:4] != "vfs_":  # perf optimization
+                       continue
+               addr = int(cols[0], 16)
+               ksym_addrs.append(addr)
+               ksym_names.append(name)
+       syms.close()
+def ksym(addr):
+       start = -1
+       end = len(ksym_addrs)
+       while end != start + 1:
+               mid = (start + end) / 2
+               if addr < ksym_addrs[mid]:
+                       end = mid
+               else:
+                       start = mid
+       if start == -1:
+               return "[unknown]"
+       return ksym_names[start]
+load_kallsyms()
+
+# load BPF program
+b = BPF(src_file = "vfscount.c")
+fn = b.load_func("do_count", BPF.KPROBE)
+BPF.attach_kprobe(fn, "vfs_read")
+BPF.attach_kprobe(fn, "vfs_write")
+BPF.attach_kprobe(fn, "vfs_fsync")
+BPF.attach_kprobe(fn, "vfs_open")
+BPF.attach_kprobe(fn, "vfs_create")
+counts = b.get_table("counts")
+
+# header
+print "Tracing... Ctrl-C to end."
+
+# output
+try:
+       sleep(99999999)
+except KeyboardInterrupt:
+       pass
+
+print "\n%-16s %-12s %8s" % ("ADDR", "FUNC", "COUNT")
+for k, v in sorted(counts.items(), key=lambda counts: counts[1].value):
+       print "%-16x %-12s %8d" % (k.ip, ksym(k.ip), v.value)
diff --git a/tools/vfscount.c b/tools/vfscount.c

new file mode 100644 (file)

index 0000000..1920c25
--- /dev/null
+++ b/tools/vfscount.c
@@ -0,0 +1,30 @@
+/*
+ * vfscount.c  Count some VFS calls.
+ *             For Linux, uses BCC, eBPF. See the Python front-end.
+ *
+ * USAGE: vfscount.py
+ *
+ * Copyright (c) 2015 Brendan Gregg.
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * 14-Aug-2015 Brendan Gregg   Created this.
+ */
+
+#include <uapi/linux/ptrace.h>
+
+struct key_t {
+       u64 ip;
+};
+
+BPF_TABLE("hash", struct key_t, u64, counts, 256);
+
+int do_count(struct pt_regs *ctx) {
+       struct key_t key = {};
+       u64 zero = 0, *val;
+       key.ip = ctx->ip;
+       val = counts.lookup_or_init(&key, &zero);
+       (*val)++;
+       return 0;
+}
diff --git a/tools/vfscount_example.txt b/tools/vfscount_example.txt

new file mode 100644 (file)

index 0000000..fc5865e
--- /dev/null
+++ b/tools/vfscount_example.txt
@@ -0,0 +1,17 @@
+Demonstrations of vfscount, the Linux eBPF/bcc version.
+
+
+This counts VFS calls, by tracing various kernel calls beginning with "vfs_"
+(edit the script to customize):
+
+# ./vfscount 
+Tracing... Ctrl-C to end.
+^C
+ADDR             FUNC            COUNT
+ffffffff811f2cc1 vfs_create         24
+ffffffff811e71c1 vfs_write         203
+ffffffff811e6061 vfs_open          765
+ffffffff811e7091 vfs_read         1852
+
+This can be useful for workload characterization, to see what types of
+operations are in use.
diff --git a/tools/vfsstat b/tools/vfsstat

new file mode 100755 (executable)

index 0000000..3c5ee99
--- /dev/null
+++ b/tools/vfsstat
@@ -0,0 +1,87 @@
+#!/usr/bin/python
+#
+# vfsstat      Count some VFS calls.
+#              For Linux, uses BCC, eBPF. See .c file.
+#
+# Written as a basic example of counting multiple events as a stat tool.
+#
+# USAGE: vfsstat [interval [count]]
+#
+# Copyright (c) 2015 Brendan Gregg.
+# Licensed under the Apache License, Version 2.0 (the "License")
+#
+# 14-Aug-2015  Brendan Gregg   Created this.
+
+from __future__ import print_function
+from bpf import BPF
+from ctypes import c_ushort, c_int, c_ulonglong
+from time import sleep, strftime
+from sys import argv
+
+def usage():
+       print("USAGE: %s [interval [count]]" % argv[0])
+       exit()
+
+# arguments
+interval = 1
+count = -1
+if len(argv) > 1:
+       try:
+               interval = int(argv[1])
+               if interval == 0:
+                       raise
+               if len(argv) > 2:
+                       count = int(argv[2])
+       except: # also catches -h, --help
+               usage()
+
+# load BPF program
+b = BPF(src_file = "vfsstat.c")
+BPF.attach_kprobe(b.load_func("do_read", BPF.KPROBE), "vfs_read")
+BPF.attach_kprobe(b.load_func("do_write", BPF.KPROBE), "vfs_write")
+BPF.attach_kprobe(b.load_func("do_fsync", BPF.KPROBE), "vfs_fsync")
+BPF.attach_kprobe(b.load_func("do_open", BPF.KPROBE), "vfs_open")
+BPF.attach_kprobe(b.load_func("do_create", BPF.KPROBE), "vfs_create")
+stats = b.get_table("stats", c_int, c_ulonglong)
+
+# stat column labels and indexes
+stat_types = {
+       "READ" : 1,
+       "WRITE" : 2,
+       "FSYNC" : 3,
+       "OPEN" : 4,
+       "CREATE" : 5
+}
+
+# header
+print("%-8s  " % "TIME", end="")
+last = {}
+for stype in stat_types.keys():
+       print(" %8s" % (stype + "/s"), end="")
+       idx = stat_types[stype]
+       last[idx] = 0
+print("")
+
+# output
+i = 0
+while (1):
+       if count > 0:
+               i += 1
+               if i > count:
+                       exit()
+       try:
+               sleep(interval)
+       except KeyboardInterrupt:
+               pass; exit()
+
+       print("%-8s: " % strftime("%H:%M:%S"), end="")
+       # print each statistic as a column
+       for stype in stat_types.keys():
+               idx = stat_types[stype]
+               try:
+                       delta = stats[c_int(idx)].value - last[idx]
+                       print(" %8d" % (delta / interval), end="")
+                       last[idx] = stats[c_int(idx)].value
+               except:
+                       print(" %8d" % 0, end="")
+       print("")
diff --git a/tools/vfsstat.c b/tools/vfsstat.c

new file mode 100644 (file)

index 0000000..d72047e
--- /dev/null
+++ b/tools/vfsstat.c
@@ -0,0 +1,37 @@
+/*
+ * vfsstat.c   Count some VFS calls.
+ *             For Linux, uses BCC, eBPF. See the Python front-end.
+ *
+ * USAGE: vfsstat.py [interval [count]]
+ *
+ * Copyright (c) 2015 Brendan Gregg.
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * 14-Aug-2015 Brendan Gregg   Created this.
+ */
+
+#include <uapi/linux/ptrace.h>
+
+enum stat_types {
+       S_READ = 1,
+       S_WRITE,
+       S_FSYNC,
+       S_OPEN,
+       S_CREATE,
+       S_MAXSTAT
+};
+
+BPF_TABLE("array", int, u64, stats, S_MAXSTAT + 1);
+
+void stats_increment(int key) {
+       u64 *leaf = stats.lookup(&key);
+       if (leaf) (*leaf)++;
+}
+
+void do_read(struct pt_regs *ctx) { stats_increment(S_READ); }
+void do_write(struct pt_regs *ctx) { stats_increment(S_WRITE); }
+void do_fsync(struct pt_regs *ctx) { stats_increment(S_FSYNC); }
+void do_open(struct pt_regs *ctx) { stats_increment(S_OPEN); }
+void do_create(struct pt_regs *ctx) { stats_increment(S_CREATE); }
diff --git a/tools/vfsstat_example.txt b/tools/vfsstat_example.txt

new file mode 100644 (file)

index 0000000..48ef514
--- /dev/null
+++ b/tools/vfsstat_example.txt
@@ -0,0 +1,36 @@
+Demonstrations of vfsstat, the Linux eBPF/bcc version.
+
+
+This traces some common VFS calls and prints per-second summaries. By default,
+the output interval is one second:
+
+# ./vfsstat 
+TIME         READ/s  WRITE/s CREATE/s   OPEN/s  FSYNC/s
+18:35:32:       231       12        4       98        0
+18:35:33:       274       13        4      106        0
+18:35:34:       586       86        4      251        0
+18:35:35:       241       15        4       99        0
+18:35:36:       232       10        4       98        0
+18:35:37:       244       10        4      107        0
+18:35:38:       235       13        4       97        0
+18:35:39:      6749     2633        4     1446        0
+18:35:40:       277       31        4      115        0
+18:35:41:       238       16        6      102        0
+18:35:42:       284       50        8      114        0
+^C
+
+
+Here we are using an output interval of five seconds, and printing three output
+lines:
+
+# ./vfsstat 5 3
+TIME         READ/s  WRITE/s CREATE/s   OPEN/s  FSYNC/s
+18:35:55:       238        8        3      101        0
+18:36:00:       962      233        4      247        0
+18:36:05:       241        8        3      100        0
+
+
+Full usage:
+
+# ./vfsstat -h
+USAGE: ./vfsstat [interval [count]]
author	Brendan Gregg <brendan.d.gregg@gmail.com>
	Tue, 18 Aug 2015 21:56:14 +0000 (14:56 -0700)
committer	Brendan Gregg <brendan.d.gregg@gmail.com>
	Tue, 18 Aug 2015 21:56:14 +0000 (14:56 -0700)
examples/bitehist.c	[new file with mode: 0644]	patch \| blob
examples/bitehist.py	[new file with mode: 0755]	patch \| blob
examples/bitehist_example.txt	[new file with mode: 0644]	patch \| blob
examples/disksnoop.c	[new file with mode: 0644]	patch \| blob
examples/disksnoop.py	[new file with mode: 0755]	patch \| blob
examples/disksnoop_example.txt	[new file with mode: 0644]	patch \| blob
examples/vfsreadlat.c	[new file with mode: 0644]	patch \| blob
examples/vfsreadlat.py	[new file with mode: 0755]	patch \| blob
examples/vfsreadlat_example.txt	[new file with mode: 0644]	patch \| blob
man/man8/pidpersec.8	[new file with mode: 0644]	patch \| blob
man/man8/syncsnoop.8	[new file with mode: 0644]	patch \| blob
man/man8/vfscount.8	[new file with mode: 0644]	patch \| blob
man/man8/vfsstat.8	[new file with mode: 0644]	patch \| blob
tools/pidpersec	[new file with mode: 0755]	patch \| blob
tools/pidpersec.c	[new file with mode: 0644]	patch \| blob
tools/pidpersec_example.txt	[new file with mode: 0644]	patch \| blob
tools/syncsnoop	[new file with mode: 0755]	patch \| blob
tools/syncsnoop_example.txt	[new file with mode: 0644]	patch \| blob
tools/vfscount	[new file with mode: 0755]	patch \| blob
tools/vfscount.c	[new file with mode: 0644]	patch \| blob
tools/vfscount_example.txt	[new file with mode: 0644]	patch \| blob
tools/vfsstat	[new file with mode: 0755]	patch \| blob
tools/vfsstat.c	[new file with mode: 0644]	patch \| blob
tools/vfsstat_example.txt	[new file with mode: 0644]	patch \| blob