add runqlen tool
authorBrendan Gregg <brendan.d.gregg@gmail.com>
Wed, 14 Dec 2016 20:10:59 +0000 (12:10 -0800)
committerBrendan Gregg <brendan.d.gregg@gmail.com>
Wed, 14 Dec 2016 20:10:59 +0000 (12:10 -0800)
README.md
man/man8/runqlat.8
man/man8/runqlen.8 [new file with mode: 0644]
tools/runqlen.py [new file with mode: 0755]
tools/runqlen_example.txt [new file with mode: 0644]

index fb35f5d..719ee12 100644 (file)
--- a/README.md
+++ b/README.md
@@ -112,6 +112,7 @@ Examples:
 - tools/[profile](tools/profile.py): Profile CPU usage by sampling stack traces at a timed interval. [Examples](tools/profile_example.txt).
 - tools/[reset-trace](tools/reset-trace.sh): Reset the state of tracing. Maintenance tool only. [Examples](tools/reset-trace_example.txt).
 - tools/[runqlat](tools/runqlat.py): Run queue (scheduler) latency as a histogram. [Examples](tools/runqlat_example.txt).
+- tools/[runqlen](tools/runqlen.py): Run queue length as a histogram. [Examples](tools/runqlen_example.txt).
 - tools/[softirqs](tools/softirqs.py):  Measure soft IRQ (soft interrupt) event time. [Examples](tools/softirqs_example.txt).
 - tools/[solisten](tools/solisten.py): Trace TCP socket listen. [Examples](tools/solisten_example.txt).
 - tools/[sslsniff](tools/sslsniff.py): Sniff OpenSSL written and readed data. [Examples](tools/sslsniff_example.txt).
index 212479a..9cc6176 100644 (file)
@@ -102,4 +102,4 @@ Unstable - in development.
 .SH AUTHOR
 Brendan Gregg
 .SH SEE ALSO
-pidstat(1)
+runqlen(8), pidstat(1)
diff --git a/man/man8/runqlen.8 b/man/man8/runqlen.8
new file mode 100644 (file)
index 0000000..1cc2789
--- /dev/null
@@ -0,0 +1,86 @@
+.TH runqlen 8  "2016-12-12" "USER COMMANDS"
+.SH NAME
+runqlen \- Scheduler run queue length as a histogram.
+.SH SYNOPSIS
+.B runqlen [\-h] [\-T] [\-O] [\-C] [interval] [count]
+.SH DESCRIPTION
+This program summarizes scheduler queue length as a histogram, and can also
+show run queue occupancy. It works by sampling the run queue length on all
+CPUs at 99 Hertz.
+
+This tool can be used to identify imbalances, eg, when processes are bound
+to CPUs causing queueing, or interrupt mappings causing the same.
+
+Since this uses BPF, only the root user can use this tool.
+.SH REQUIREMENTS
+CONFIG_BPF and bcc.
+.SH OPTIONS
+.TP
+\-h
+Print usage message.
+.TP
+\-T
+Include timestamps on output.
+.TP
+\-O
+Report run queue occupancy.
+.TP
+\-C
+Report for each CPU.
+.TP
+interval
+Output interval, in seconds.
+.TP
+count
+Number of outputs.
+.SH EXAMPLES
+.TP
+Summarize run queue length as a histogram:
+#
+.B runqlen
+.TP
+Print 1 second summaries, 10 times:
+#
+.B runqlen 1 10
+.TP
+Print output every second, with timestamps, and show each CPU separately:
+#
+.B runqlen \-CT 1
+.TP
+Print run queue occupancy every second:
+#
+.B runqlen \-O 1
+.TP
+Print run queue occupancy, with timetamps, for each CPU:
+#
+.B runqlen \-COT 1
+.SH FIELDS
+.TP
+runqlen
+Scheduler run queue length: the number of threads (tasks) waiting to run,
+(excluding including the currently running task).
+.TP
+count
+Number of samples at this queue length.
+.TP
+distribution
+An ASCII bar chart to visualize the distribution (count column)
+.SH OVERHEAD
+This uses sampling at 99 Hertz (on all CPUs), and in-kernel summaries, which
+should make overhead negligible. This does not trace scheduler events, like
+runqlen does, which comes at a much higher overhead cost.
+.SH SOURCE
+This is from bcc.
+.IP
+https://github.com/iovisor/bcc
+.PP
+Also look in the bcc distribution for a companion _examples.txt file containing
+example usage, output, and commentary for this tool.
+.SH OS
+Linux
+.SH STABILITY
+Unstable - in development.
+.SH AUTHOR
+Brendan Gregg
+.SH SEE ALSO
+runqlat(8), pidstat(1)
diff --git a/tools/runqlen.py b/tools/runqlen.py
new file mode 100755 (executable)
index 0000000..204ce19
--- /dev/null
@@ -0,0 +1,185 @@
+#!/usr/bin/python
+# @lint-avoid-python-3-compatibility-imports
+#
+# runqlen    Summarize scheduler run queue length as a histogram.
+#            For Linux, uses BCC, eBPF.
+#
+# This counts the length of the run queue, excluding the currently running
+# thread, and shows it as a histogram.
+#
+# Also answers run queue occupancy.
+#
+# USAGE: runqlen [-h] [-T] [-Q] [-m] [-D] [interval] [count]
+#
+# REQUIRES: Linux 4.9+ (BPF_PROG_TYPE_PERF_EVENT support). Under tools/old is
+# a version of this tool that may work on Linux 4.6 - 4.8.
+#
+# Copyright 2016 Netflix, Inc.
+# Licensed under the Apache License, Version 2.0 (the "License")
+#
+# 12-Dec-2016   Brendan Gregg   Created this.
+
+from __future__ import print_function
+from bcc import BPF, PerfType, PerfSWConfig
+from time import sleep, strftime
+import argparse
+
+# arguments
+examples = """examples:
+    ./runqlen            # summarize run queue length as a histogram
+    ./runqlen 1 10       # print 1 second summaries, 10 times
+    ./runqlen -T 1       # 1s summaries and timestamps
+    ./runqlen -O         # report run queue occupancy
+    ./runqlen -C         # show each CPU separately
+"""
+parser = argparse.ArgumentParser(
+    description="Summarize scheduler run queue length as a histogram",
+    formatter_class=argparse.RawDescriptionHelpFormatter,
+    epilog=examples)
+parser.add_argument("-T", "--timestamp", action="store_true",
+    help="include timestamp on output")
+parser.add_argument("-O", "--runqocc", action="store_true",
+    help="report run queue occupancy")
+parser.add_argument("-C", "--cpus", action="store_true",
+    help="print output for each CPU separately")
+parser.add_argument("interval", nargs="?", default=99999999,
+    help="output interval, in seconds")
+parser.add_argument("count", nargs="?", default=99999999,
+    help="number of outputs")
+args = parser.parse_args()
+countdown = int(args.count)
+debug = 0
+frequency = 99
+
+# define BPF program
+bpf_text = """
+#include <uapi/linux/ptrace.h>
+#include <linux/sched.h>
+
+// Declare enough of cfs_rq to find nr_running, since we can't #import the
+// header. This will need maintenance. It is from kernel/sched/sched.h:
+struct cfs_rq_partial {
+    struct load_weight load;
+    unsigned int nr_running, h_nr_running;
+};
+
+typedef struct cpu_key {
+    int cpu;
+    unsigned int slot;
+} cpu_key_t;
+STORAGE
+
+int do_perf_event()
+{
+    unsigned int len = 0;
+    pid_t pid = 0;
+    struct task_struct *task = NULL;
+    struct cfs_rq_partial *my_q = NULL;
+
+    // Fetch the run queue length from task->se.cfs_rq->nr_running. This is an
+    // unstable interface and may need maintenance. Perhaps a future version
+    // of BPF will support task_rq(p) or something similar as a more reliable
+    // interface.
+    task = (struct task_struct *)bpf_get_current_task();
+    bpf_probe_read(&my_q, sizeof(my_q), &task->se.cfs_rq);
+    bpf_probe_read(&len, sizeof(len), &my_q->nr_running);
+
+    // Decrement idle thread by dropping the run queue by one. We could do
+    // this other ways if needed, like matching on task->pid.
+    if (len > 0)
+        len--;
+
+    STORE
+
+    return 0;
+}
+"""
+
+if args.cpus:
+    bpf_text = bpf_text.replace('STORAGE',
+        'BPF_HISTOGRAM(dist, cpu_key_t);')
+    bpf_text = bpf_text.replace('STORE', 'cpu_key_t key = {.slot = len}; ' +
+        'bpf_probe_read(&key.cpu, sizeof(key.cpu), &task->wake_cpu);' +
+        'dist.increment(key);')
+else:
+    bpf_text = bpf_text.replace('STORAGE',
+        'BPF_HISTOGRAM(dist, unsigned int);')
+    bpf_text = bpf_text.replace('STORE', 'dist.increment(len);')
+
+# code substitutions
+if debug:
+    print(bpf_text)
+
+# load BPF program
+b = BPF(text=bpf_text)
+# initialize BPF & perf_events
+b = BPF(text=bpf_text)
+b.attach_perf_event(ev_type=PerfType.SOFTWARE,
+    ev_config=PerfSWConfig.CPU_CLOCK, fn_name="do_perf_event",
+    sample_period=0, sample_freq=frequency)
+
+print("Sampling run queue length... Hit Ctrl-C to end.")
+
+# output
+exiting = 0 if args.interval else 1
+dist = b.get_table("dist")
+while (1):
+    try:
+        sleep(int(args.interval))
+    except KeyboardInterrupt:
+        exiting = 1
+
+    print()
+    if args.timestamp:
+        print("%-8s\n" % strftime("%H:%M:%S"), end="")
+
+    if args.runqocc:
+        if args.cpus:
+            # run queue occupancy, per-CPU summary
+            idle = {}
+            queued = {}
+            cpumax = 0
+            for k, v in dist.items():
+                if k.cpu > cpumax:
+                    cpumax = k.cpu
+            for c in range(0, cpumax + 1):
+                idle[c] = 0
+                queued[c] = 0
+            for k, v in dist.items():
+                if k.slot == 0:
+                    idle[k.cpu] += v.value
+                else:
+                    queued[k.cpu] += v.value
+            for c in range(0, cpumax + 1):
+                samples = idle[c] + queued[c]
+                if samples:
+                    runqocc = float(queued[c]) / samples
+                else:
+                    runqocc = 0
+                print("runqocc, CPU %-3d %6.2f%%" % (c, 100 * runqocc))
+
+        else:
+            # run queue occupancy, system-wide summary
+            idle = 0
+            queued = 0
+            for k, v in dist.items():
+                if k.value == 0:
+                    idle += v.value
+                else:
+                    queued += v.value
+            samples = idle + queued
+            if samples:
+                runqocc = float(queued) / samples
+            else:
+                runqocc = 0
+            print("runqocc: %0.2f%%" % (100 * runqocc))
+
+    else:
+        # run queue length histograms
+        dist.print_linear_hist("runqlen", "cpu")
+
+    dist.clear()
+
+    countdown -= 1
+    if exiting or countdown == 0:
+        exit()
diff --git a/tools/runqlen_example.txt b/tools/runqlen_example.txt
new file mode 100644 (file)
index 0000000..4c10ed4
--- /dev/null
@@ -0,0 +1,295 @@
+Demonstrations of runqlen, the Linux eBPF/bcc version.
+
+
+This program summarizes scheduler queue length as a histogram, and can also
+show run queue occupancy. It works by sampling the run queue length on all
+CPUs at 99 Hertz.
+
+As an example, here is an idle system:
+
+# ./runqlen.py
+Sampling run queue length... Hit Ctrl-C to end.
+^C
+     runqlen       : count     distribution
+        0          : 1776     |****************************************|
+
+This shows a zero run queue length each time it was sampled.
+
+And now a heavily loaded system:
+
+# ./runqlen.py
+Sampling run queue length... Hit Ctrl-C to end.
+^C
+     runqlen       : count     distribution
+        0          : 1068     |****************************************|
+        1          : 642      |************************                |
+        2          : 369      |*************                           |
+        3          : 183      |******                                  |
+        4          : 104      |***                                     |
+        5          : 42       |*                                       |
+        6          : 13       |                                        |
+        7          : 2        |                                        |
+        8          : 1        |                                        |
+
+Now there is often threads queued, with one sample reaching a queue length
+of 8. This will cause run queue latency, which can be measured by the bcc
+runqlat tool.
+
+
+Here's an example of an issue that runqlen can indentify. Starting with the
+system-wide summary:
+
+# ./runqlen.py 
+Sampling run queue length... Hit Ctrl-C to end.
+^C
+     runqlen       : count     distribution
+        0          : 1209     |****************************************|
+        1          : 372      |************                            |
+        2          : 73       |**                                      |
+        3          : 3        |                                        |
+        4          : 1        |                                        |
+        5          : 0        |                                        |
+        6          : 0        |                                        |
+        7          : 237      |*******                                 |
+
+This shows there is often a run queue length of 7. Now using the -C option to
+see per-CPU histograms:
+
+# ./runqlen.py -C
+Sampling run queue length... Hit Ctrl-C to end.
+^C
+
+cpu = 0
+     runqlen       : count     distribution
+        0          : 257      |****************************************|
+        1          : 64       |*********                               |
+        2          : 5        |                                        |
+        3          : 0        |                                        |
+        4          : 0        |                                        |
+        5          : 0        |                                        |
+        6          : 1        |                                        |
+
+cpu = 1
+     runqlen       : count     distribution
+        0          : 226      |****************************************|
+        1          : 90       |***************                         |
+        2          : 11       |*                                       |
+
+cpu = 2
+     runqlen       : count     distribution
+        0          : 264      |****************************************|
+        1          : 52       |*******                                 |
+        2          : 8        |*                                       |
+        3          : 1        |                                        |
+        4          : 0        |                                        |
+        5          : 0        |                                        |
+        6          : 1        |                                        |
+        7          : 0        |                                        |
+        8          : 1        |                                        |
+
+cpu = 3
+     runqlen       : count     distribution
+        0          : 0        |                                        |
+        1          : 0        |                                        |
+        2          : 0        |                                        |
+        3          : 0        |                                        |
+        4          : 0        |                                        |
+        5          : 0        |                                        |
+        6          : 0        |                                        |
+        7          : 327      |****************************************|
+
+cpu = 4
+     runqlen       : count     distribution
+        0          : 255      |****************************************|
+        1          : 63       |*********                               |
+        2          : 9        |*                                       |
+
+cpu = 5
+     runqlen       : count     distribution
+        0          : 244      |****************************************|
+        1          : 78       |************                            |
+        2          : 3        |                                        |
+        3          : 2        |                                        |
+
+cpu = 6
+     runqlen       : count     distribution
+        0          : 253      |****************************************|
+        1          : 66       |**********                              |
+        2          : 6        |                                        |
+        3          : 1        |                                        |
+        4          : 1        |                                        |
+
+cpu = 7
+     runqlen       : count     distribution
+        0          : 243      |****************************************|
+        1          : 74       |************                            |
+        2          : 6        |                                        |
+        3          : 1        |                                        |
+        4          : 0        |                                        |
+        5          : 1        |                                        |
+        6          : 2        |                                        |
+
+The run queue length of 7 is isolated to CPU 3. It was caused by CPU binding
+(taskset). This can sometimes happen by applications that try to auto-bind
+to CPUs, leaving other CPUs idle while work is queued.
+
+
+runqlat accepts an interval and a count. For example, with -T for timestamps:
+
+# ./runqlen.py -T 1 5
+Sampling run queue length... Hit Ctrl-C to end.
+
+19:51:34
+     runqlen       : count     distribution
+        0          : 635      |****************************************|
+        1          : 142      |********                                |
+        2          : 13       |                                        |
+        3          : 0        |                                        |
+        4          : 1        |                                        |
+
+19:51:35
+     runqlen       : count     distribution
+        0          : 640      |****************************************|
+        1          : 136      |********                                |
+        2          : 13       |                                        |
+        3          : 1        |                                        |
+        4          : 0        |                                        |
+        5          : 0        |                                        |
+        6          : 0        |                                        |
+        7          : 0        |                                        |
+        8          : 0        |                                        |
+        9          : 0        |                                        |
+        10         : 1        |                                        |
+
+19:51:36
+     runqlen       : count     distribution
+        0          : 603      |****************************************|
+        1          : 170      |***********                             |
+        2          : 16       |*                                       |
+        3          : 1        |                                        |
+        4          : 0        |                                        |
+        5          : 0        |                                        |
+        6          : 0        |                                        |
+        7          : 0        |                                        |
+        8          : 0        |                                        |
+        9          : 1        |                                        |
+
+19:51:37
+     runqlen       : count     distribution
+        0          : 617      |****************************************|
+        1          : 154      |*********                               |
+        2          : 20       |*                                       |
+        3          : 0        |                                        |
+        4          : 0        |                                        |
+        5          : 0        |                                        |
+        6          : 0        |                                        |
+        7          : 0        |                                        |
+        8          : 0        |                                        |
+        9          : 0        |                                        |
+        10         : 0        |                                        |
+        11         : 1        |                                        |
+
+19:51:38
+     runqlen       : count     distribution
+        0          : 603      |****************************************|
+        1          : 161      |**********                              |
+        2          : 24       |*                                       |
+        3          : 4        |                                        |
+
+The spikes in run queue length of 11 are likely threads waking up at the same
+time (a thundering herd), and then are scheduled and complete their execution
+quickly.
+
+
+The -O option prints run queue occupancy: the percentage of time that there
+was work queued waiting its turn. Eg:
+
+# ./runqlen.py -OT 1 
+Sampling run queue length... Hit Ctrl-C to end.
+
+19:54:53
+runqocc: 41.09%
+
+19:54:54
+runqocc: 41.85%
+
+19:54:55
+runqocc: 41.47%
+
+19:54:56
+runqocc: 42.35%
+
+19:54:57
+runqocc: 40.83%
+[...]
+
+This can also be examined per-CPU:
+
+# ./runqlen.py -COT 1 
+Sampling run queue length... Hit Ctrl-C to end.
+
+19:55:03
+runqocc, CPU 0    32.32%
+runqocc, CPU 1    26.26%
+runqocc, CPU 2    38.38%
+runqocc, CPU 3   100.00%
+runqocc, CPU 4    26.26%
+runqocc, CPU 5    32.32%
+runqocc, CPU 6    39.39%
+runqocc, CPU 7    46.46%
+
+19:55:04
+runqocc, CPU 0    35.00%
+runqocc, CPU 1    32.32%
+runqocc, CPU 2    37.00%
+runqocc, CPU 3   100.00%
+runqocc, CPU 4    43.43%
+runqocc, CPU 5    31.31%
+runqocc, CPU 6    28.00%
+runqocc, CPU 7    31.31%
+
+19:55:05
+runqocc, CPU 0    43.43%
+runqocc, CPU 1    32.32%
+runqocc, CPU 2    45.45%
+runqocc, CPU 3   100.00%
+runqocc, CPU 4    29.29%
+runqocc, CPU 5    36.36%
+runqocc, CPU 6    36.36%
+runqocc, CPU 7    30.30%
+
+19:55:06
+runqocc, CPU 0    40.00%
+runqocc, CPU 1    38.00%
+runqocc, CPU 2    31.31%
+runqocc, CPU 3   100.00%
+runqocc, CPU 4    31.31%
+runqocc, CPU 5    28.28%
+runqocc, CPU 6    31.00%
+runqocc, CPU 7    29.29%
+[...]
+
+
+USAGE message:
+
+# ./runqlen -h
+usage: runqlen [-h] [-T] [-O] [-C] [interval] [count]
+
+Summarize scheduler run queue length as a histogram
+
+positional arguments:
+  interval         output interval, in seconds
+  count            number of outputs
+
+optional arguments:
+  -h, --help       show this help message and exit
+  -T, --timestamp  include timestamp on output
+  -O, --runqocc    report run queue occupancy
+  -C, --cpus       print output for each CPU separately
+
+examples:
+    ./runqlen            # summarize run queue length as a histogram
+    ./runqlen 1 10       # print 1 second summaries, 10 times
+    ./runqlen -T 1       # 1s summaries and timestamps
+    ./runqlen -O         # report run queue occupancy
+    ./runqlen -C         # show each CPU separately