--- /dev/null
+.TH virtiostat 8 "2021-02-15" "USER COMMANDS"
+.SH NAME
+virtiostat \- Trace VIRTIO devices input/output statistics. Uses Linux eBPF/bcc.
+.SH SYNOPSIS
+.B virtiostat [\-h] [\-T] [\-D] [INTERVAL] [COUNT]
+.SH DESCRIPTION
+This tool traces VIRTIO devices input/output statistics. It works in lower
+layer of VIRTIO base driver, so it could trace all the devices of VIRTIO
+family. For example, we can't get IO statistics of 9p-fs in a guest virtual
+machine by iostat command, but we can analyze IO statistics by virtiostat.
+The outputing result shows In/Out SGs(scatter list operation) to represent
+positive correlation IOPS, and In/Out BW to represent throughput.
+
+Since this uses BPF, only the root user can use this tool.
+.SH REQUIREMENTS
+CONFIG_BPF and bcc.
+.SH OPTIONS
+.TP
+\-h
+Print usage message.
+.TP
+\-T
+Include a time column on output (HH:MM:SS).
+.TP
+\-D
+Show debug infomation of bpf text.
+.TP
+INTERVAL
+Print output every interval seconds.
+.TP
+COUNT
+Total count of trace in seconds.
+.SH EXAMPLES
+.TP
+Trace virtio device statistics and print 1 second summaries, 10 times:
+#
+.B virtiostat 1 10
+.SH OVERHEAD
+This traces the kernel virtqueue_add_sgs, virtqueue_add_outbuf,
+virtqueue_add_inbuf, virtqueue_add_inbuf_ctx functions.
+The rate of this depends on all the VIRTIO devices IOPS.
+.SH SOURCE
+This is from bcc.
+.IP
+https://github.com/iovisor/bcc
+.PP
+Also look in the bcc distribution for a companion _examples.txt file containing
+example usage, output, and commentary for this tool.
+.SH OS
+Linux
+.SH STABILITY
+Unstable - in development.
+.SH AUTHOR
+zhenwei pi
+.SH SEE ALSO
+iostat(1), iftop(8), funccount(8)
--- /dev/null
+#!/usr/bin/python
+# @lint-avoid-python-3-compatibility-imports
+#
+# virtiostat Show virtio devices input/output statistics.
+# For Linux, uses BCC, eBPF.
+#
+# USAGE: virtiostat [-h] [-T] [-D] [INTERVAL] [COUNT]
+#
+# Copyright (c) 2021 zhenwei pi
+# Licensed under the Apache License, Version 2.0 (the "License")
+#
+# 13-Feb-2021 zhenwei pi Created this.
+
+from __future__ import print_function
+from bcc import BPF
+from time import sleep, strftime
+import argparse
+
+# arguments
+examples = """examples:
+ ./virtiostat # print 3(default) second summaries
+ ./virtiostat 1 10 # print 1 second summaries, 10 times
+ ./virtiostat -T # show timestamps
+ ./virtiostat -D # show debug bpf text
+"""
+parser = argparse.ArgumentParser(
+ description="Show virtio devices input/output statistics",
+ formatter_class=argparse.RawDescriptionHelpFormatter,
+ epilog=examples)
+parser.add_argument("interval", nargs="?", default=3,
+ help="output interval, in seconds")
+parser.add_argument("count", nargs="?", default=99999999,
+ help="number of outputs")
+parser.add_argument("-T", "--timestamp", action="store_true",
+ help="show timestamp on output")
+parser.add_argument("-D", "--debug", action="store_true",
+ help="print BPF program before starting (for debugging purposes)")
+parser.add_argument("--ebpf", action="store_true",
+ help=argparse.SUPPRESS)
+args = parser.parse_args()
+
+# define BPF program
+bpf_text = """
+#ifndef KBUILD_MODNAME
+#define KBUILD_MODNAME "bcc"
+#endif
+#include <linux/virtio.h>
+#include <bcc/proto.h>
+
+/* typically virtio scsi has max SGs of 6 */
+#define VIRTIO_MAX_SGS 6
+/* typically virtio blk has max SEG of 128 */
+#define SG_MAX 128
+
+typedef struct virtio_stat {
+ char driver[16];
+ char dev[12];
+ char vqname[12];
+ u32 in_sgs;
+ u32 out_sgs;
+ u64 in_bw;
+ u64 out_bw;
+} virtio_stat_t;
+
+BPF_HASH(stats, u64, virtio_stat_t);
+
+static struct scatterlist *__sg_next(struct scatterlist *sgp)
+{
+ struct scatterlist sg;
+
+ bpf_probe_read_kernel(&sg, sizeof(sg), sgp);
+ if (sg_is_last(&sg))
+ return NULL;
+
+ sgp++;
+
+ bpf_probe_read_kernel(&sg, sizeof(sg), sgp);
+ if (unlikely(sg_is_chain(&sg)))
+ sgp = sg_chain_ptr(&sg);
+
+ return sgp;
+}
+
+static u64 count_len(struct scatterlist **sgs, unsigned int num)
+{
+ u64 length = 0;
+ unsigned int i, n;
+ struct scatterlist *sgp = NULL;
+
+ for (i = 0; (i < VIRTIO_MAX_SGS) && (i < num); i++) {
+ for (n = 0, sgp = sgs[i]; sgp && (n < SG_MAX); sgp = __sg_next(sgp)) {
+ length += sgp->length;
+ n++;
+ }
+
+ /* Suggested by Yonghong Song:
+ * IndVarSimplifyPass with clang 12 may cause verifier failure:
+ * ; for (i = 0; (i < VIRTIO_MAX_SGS) && (i < num); i++) { // Line 60
+ * 90: 15 08 15 00 00 00 00 00 if r8 == 0 goto +21
+ * 91: bf 81 00 00 00 00 00 00 r1 = r8
+ * 92: 07 01 00 00 ff ff ff ff r1 += -1
+ * 93: 67 01 00 00 20 00 00 00 r1 <<= 32
+ * 94: 77 01 00 00 20 00 00 00 r1 >>= 32
+ * 95: b7 02 00 00 05 00 00 00 r2 = 5
+ * 96: 2d 12 01 00 00 00 00 00 if r2 > r1 goto +1
+ * 97: b7 08 00 00 06 00 00 00 r8 = 6
+ * 98: b7 02 00 00 00 00 00 00 r2 = 0
+ * 99: b7 09 00 00 00 00 00 00 r9 = 0
+ * 100: 7b 8a 68 ff 00 00 00 00 *(u64 *)(r10 - 152) = r8
+ * 101: 05 00 35 00 00 00 00 00 goto +53
+ * Note that r1 is refined by r8 is saved to stack for later use.
+ * This will give verifier u64_max loop bound and eventually cause
+ * verification failure. Workaround with the below asm code.
+ */
+#if __clang_major__ >= 7
+ asm volatile("" : "=r"(i) : "0"(i));
+#endif
+ }
+
+ return length;
+}
+
+static void record(struct virtqueue *vq, struct scatterlist **sgs,
+ unsigned int out_sgs, unsigned int in_sgs)
+{
+ virtio_stat_t newvs = {0};
+ virtio_stat_t *vs;
+ u64 key = (u64)vq;
+ u64 in_bw = 0;
+
+ /* Workaround: separate two count_len() calls, one here and the
+ * other below. Otherwise, compiler may generate some spills which
+ * harms verifier pruning. This happens in llvm12, but not llvm4.
+ * Below code works on both cases.
+ */
+ if (in_sgs)
+ in_bw = count_len(sgs + out_sgs, in_sgs);
+
+ vs = stats.lookup(&key);
+ if (!vs) {
+ bpf_probe_read_kernel(newvs.driver, sizeof(newvs.driver), vq->vdev->dev.driver->name);
+ bpf_probe_read_kernel(newvs.dev, sizeof(newvs.dev), vq->vdev->dev.kobj.name);
+ bpf_probe_read_kernel(newvs.vqname, sizeof(newvs.vqname), vq->name);
+ newvs.out_sgs = out_sgs;
+ newvs.in_sgs = in_sgs;
+ if (out_sgs)
+ newvs.out_bw = count_len(sgs, out_sgs);
+ newvs.in_bw = in_bw;
+ stats.update(&key, &newvs);
+ } else {
+ vs->out_sgs += out_sgs;
+ vs->in_sgs += in_sgs;
+ if (out_sgs)
+ vs->out_bw += count_len(sgs, out_sgs);
+ vs->in_bw += in_bw;
+ }
+}
+
+int trace_virtqueue_add_sgs(struct pt_regs *ctx, struct virtqueue *vq,
+ struct scatterlist **sgs, unsigned int out_sgs,
+ unsigned int in_sgs, void *data, gfp_t gfp)
+
+{
+ record(vq, sgs, out_sgs, in_sgs);
+
+ return 0;
+}
+
+int trace_virtqueue_add_outbuf(struct pt_regs *ctx, struct virtqueue *vq,
+ struct scatterlist *sg, unsigned int num,
+ void *data, gfp_t gfp)
+{
+ record(vq, &sg, 1, 0);
+
+ return 0;
+}
+
+int trace_virtqueue_add_inbuf(struct pt_regs *ctx, struct virtqueue *vq,
+ struct scatterlist *sg, unsigned int num,
+ void *data, gfp_t gfp)
+{
+ record(vq, &sg, 0, 1);
+
+ return 0;
+}
+
+int trace_virtqueue_add_inbuf_ctx(struct pt_regs *ctx, struct virtqueue *vq,
+ struct scatterlist *sg, unsigned int num,
+ void *data, void *_ctx, gfp_t gfp)
+{
+ record(vq, &sg, 0, 1);
+
+ return 0;
+}
+"""
+
+# debug mode: print bpf text
+if args.debug:
+ print(bpf_text)
+
+# dump mode: print bpf text and exit
+if args.ebpf:
+ print(bpf_text)
+ exit()
+
+# load BPF program
+b = BPF(text=bpf_text)
+b.attach_kprobe(event="virtqueue_add_sgs", fn_name="trace_virtqueue_add_sgs")
+b.attach_kprobe(event="virtqueue_add_outbuf", fn_name="trace_virtqueue_add_outbuf")
+b.attach_kprobe(event="virtqueue_add_inbuf", fn_name="trace_virtqueue_add_inbuf")
+b.attach_kprobe(event="virtqueue_add_inbuf_ctx", fn_name="trace_virtqueue_add_inbuf_ctx")
+
+print("Tracing virtio devices statistics ... Hit Ctrl-C to end.")
+
+# start main loop
+exiting = 0 if args.interval else 1
+seconds = 0
+while (1):
+ try:
+ sleep(int(args.interval))
+ seconds = seconds + int(args.interval)
+ except KeyboardInterrupt:
+ exiting = 1
+
+ if args.timestamp:
+ print("%-8s\n" % strftime("%H:%M:%S"), end="")
+ else:
+ print("--------", end="\n")
+
+ print("%14s %8s %10s %7s %7s %14s %14s" % ("Driver", "Device", "VQ Name", "In SGs", "Out SGs", "In BW", "Out BW"))
+ stats = b.get_table("stats")
+ for k, v in sorted(stats.items(), key=lambda vs: vs[1].dev):
+ print("%14s %8s %10s %7d %7d %14d %14d" % (v.driver, v.dev, v.vqname, v.in_sgs, v.out_sgs, v.in_bw, v.out_bw))
+
+ stats.clear()
+
+ if exiting or seconds >= int(args.count):
+ exit()
--- /dev/null
+Demonstrations of virtiostat, the Linux eBPF/bcc version.
+
+
+This program traces virtio devices to analyze the IO operations and
+throughput. For example, guest side mounts a 9p fs, and we can't get
+io statistics by `iostat` command any more. In this scenario, we can
+only get statistics from VIRTIO layer instead of block layer.
+
+Example
+#./virtiostat -T
+Tracing virtio devices statistics ... Hit Ctrl-C to end.
+14:48:30
+ Driver Device VQ Name In SGs Out SGs In BW Out BW
+ virtio_net virtio0 input.0 260669 0 406743040 0
+ virtio_net virtio0 output.0 0 9873 0 833344
+ virtio_blk virtio4 req.0 28 46 448 278976
+ 9pnet_virtio virtio6 requests 99083 99354 1883687 137537263
+14:48:33
+ Driver Device VQ Name In SGs Out SGs In BW Out BW
+ virtio_net virtio0 input.0 260718 0 406819328 0
+ virtio_net virtio0 output.0 0 7139 0 562355
+ virtio_blk virtio4 req.0 11 18 176 110768
+ 9pnet_virtio virtio6 requests 91520 91141 1737364 125320785
+
+
+Full USAGE:
+
+#./virtiostat -h
+usage: virtiostat.py [-h] [-T] [-D] [interval] [count]
+
+Show virtio devices input/output statistics
+
+positional arguments:
+ interval output interval, in seconds
+ count number of outputs
+
+optional arguments:
+ -h, --help show this help message and exit
+ -T, --timestamp show timestamp on output
+ -D, --debug print BPF program before starting (for debugging purposes)
+
+examples:
+ ./virtiostat # print 3(default) second summaries
+ ./virtiostat 1 10 # print 1 second summaries, 10 times
+ ./virtiostat -T # show timestamps
+ ./virtiostat -D # show debug bpf text