- tools/[profile](tools/profile.py): Profile CPU usage by sampling stack traces at a timed interval. [Examples](tools/profile_example.txt).
- tools/[reset-trace](tools/reset-trace.sh): Reset the state of tracing. Maintenance tool only. [Examples](tools/reset-trace_example.txt).
- tools/[runqlat](tools/runqlat.py): Run queue (scheduler) latency as a histogram. [Examples](tools/runqlat_example.txt).
+- tools/[runqlen](tools/runqlen.py): Run queue length as a histogram. [Examples](tools/runqlen_example.txt).
- tools/[softirqs](tools/softirqs.py): Measure soft IRQ (soft interrupt) event time. [Examples](tools/softirqs_example.txt).
- tools/[solisten](tools/solisten.py): Trace TCP socket listen. [Examples](tools/solisten_example.txt).
- tools/[sslsniff](tools/sslsniff.py): Sniff OpenSSL written and readed data. [Examples](tools/sslsniff_example.txt).
.SH AUTHOR
Brendan Gregg
.SH SEE ALSO
-pidstat(1)
+runqlen(8), pidstat(1)
--- /dev/null
+.TH runqlen 8 "2016-12-12" "USER COMMANDS"
+.SH NAME
+runqlen \- Scheduler run queue length as a histogram.
+.SH SYNOPSIS
+.B runqlen [\-h] [\-T] [\-O] [\-C] [interval] [count]
+.SH DESCRIPTION
+This program summarizes scheduler queue length as a histogram, and can also
+show run queue occupancy. It works by sampling the run queue length on all
+CPUs at 99 Hertz.
+
+This tool can be used to identify imbalances, eg, when processes are bound
+to CPUs causing queueing, or interrupt mappings causing the same.
+
+Since this uses BPF, only the root user can use this tool.
+.SH REQUIREMENTS
+CONFIG_BPF and bcc.
+.SH OPTIONS
+.TP
+\-h
+Print usage message.
+.TP
+\-T
+Include timestamps on output.
+.TP
+\-O
+Report run queue occupancy.
+.TP
+\-C
+Report for each CPU.
+.TP
+interval
+Output interval, in seconds.
+.TP
+count
+Number of outputs.
+.SH EXAMPLES
+.TP
+Summarize run queue length as a histogram:
+#
+.B runqlen
+.TP
+Print 1 second summaries, 10 times:
+#
+.B runqlen 1 10
+.TP
+Print output every second, with timestamps, and show each CPU separately:
+#
+.B runqlen \-CT 1
+.TP
+Print run queue occupancy every second:
+#
+.B runqlen \-O 1
+.TP
+Print run queue occupancy, with timetamps, for each CPU:
+#
+.B runqlen \-COT 1
+.SH FIELDS
+.TP
+runqlen
+Scheduler run queue length: the number of threads (tasks) waiting to run,
+(excluding including the currently running task).
+.TP
+count
+Number of samples at this queue length.
+.TP
+distribution
+An ASCII bar chart to visualize the distribution (count column)
+.SH OVERHEAD
+This uses sampling at 99 Hertz (on all CPUs), and in-kernel summaries, which
+should make overhead negligible. This does not trace scheduler events, like
+runqlen does, which comes at a much higher overhead cost.
+.SH SOURCE
+This is from bcc.
+.IP
+https://github.com/iovisor/bcc
+.PP
+Also look in the bcc distribution for a companion _examples.txt file containing
+example usage, output, and commentary for this tool.
+.SH OS
+Linux
+.SH STABILITY
+Unstable - in development.
+.SH AUTHOR
+Brendan Gregg
+.SH SEE ALSO
+runqlat(8), pidstat(1)
--- /dev/null
+#!/usr/bin/python
+# @lint-avoid-python-3-compatibility-imports
+#
+# runqlen Summarize scheduler run queue length as a histogram.
+# For Linux, uses BCC, eBPF.
+#
+# This counts the length of the run queue, excluding the currently running
+# thread, and shows it as a histogram.
+#
+# Also answers run queue occupancy.
+#
+# USAGE: runqlen [-h] [-T] [-Q] [-m] [-D] [interval] [count]
+#
+# REQUIRES: Linux 4.9+ (BPF_PROG_TYPE_PERF_EVENT support). Under tools/old is
+# a version of this tool that may work on Linux 4.6 - 4.8.
+#
+# Copyright 2016 Netflix, Inc.
+# Licensed under the Apache License, Version 2.0 (the "License")
+#
+# 12-Dec-2016 Brendan Gregg Created this.
+
+from __future__ import print_function
+from bcc import BPF, PerfType, PerfSWConfig
+from time import sleep, strftime
+import argparse
+
+# arguments
+examples = """examples:
+ ./runqlen # summarize run queue length as a histogram
+ ./runqlen 1 10 # print 1 second summaries, 10 times
+ ./runqlen -T 1 # 1s summaries and timestamps
+ ./runqlen -O # report run queue occupancy
+ ./runqlen -C # show each CPU separately
+"""
+parser = argparse.ArgumentParser(
+ description="Summarize scheduler run queue length as a histogram",
+ formatter_class=argparse.RawDescriptionHelpFormatter,
+ epilog=examples)
+parser.add_argument("-T", "--timestamp", action="store_true",
+ help="include timestamp on output")
+parser.add_argument("-O", "--runqocc", action="store_true",
+ help="report run queue occupancy")
+parser.add_argument("-C", "--cpus", action="store_true",
+ help="print output for each CPU separately")
+parser.add_argument("interval", nargs="?", default=99999999,
+ help="output interval, in seconds")
+parser.add_argument("count", nargs="?", default=99999999,
+ help="number of outputs")
+args = parser.parse_args()
+countdown = int(args.count)
+debug = 0
+frequency = 99
+
+# define BPF program
+bpf_text = """
+#include <uapi/linux/ptrace.h>
+#include <linux/sched.h>
+
+// Declare enough of cfs_rq to find nr_running, since we can't #import the
+// header. This will need maintenance. It is from kernel/sched/sched.h:
+struct cfs_rq_partial {
+ struct load_weight load;
+ unsigned int nr_running, h_nr_running;
+};
+
+typedef struct cpu_key {
+ int cpu;
+ unsigned int slot;
+} cpu_key_t;
+STORAGE
+
+int do_perf_event()
+{
+ unsigned int len = 0;
+ pid_t pid = 0;
+ struct task_struct *task = NULL;
+ struct cfs_rq_partial *my_q = NULL;
+
+ // Fetch the run queue length from task->se.cfs_rq->nr_running. This is an
+ // unstable interface and may need maintenance. Perhaps a future version
+ // of BPF will support task_rq(p) or something similar as a more reliable
+ // interface.
+ task = (struct task_struct *)bpf_get_current_task();
+ bpf_probe_read(&my_q, sizeof(my_q), &task->se.cfs_rq);
+ bpf_probe_read(&len, sizeof(len), &my_q->nr_running);
+
+ // Decrement idle thread by dropping the run queue by one. We could do
+ // this other ways if needed, like matching on task->pid.
+ if (len > 0)
+ len--;
+
+ STORE
+
+ return 0;
+}
+"""
+
+if args.cpus:
+ bpf_text = bpf_text.replace('STORAGE',
+ 'BPF_HISTOGRAM(dist, cpu_key_t);')
+ bpf_text = bpf_text.replace('STORE', 'cpu_key_t key = {.slot = len}; ' +
+ 'bpf_probe_read(&key.cpu, sizeof(key.cpu), &task->wake_cpu);' +
+ 'dist.increment(key);')
+else:
+ bpf_text = bpf_text.replace('STORAGE',
+ 'BPF_HISTOGRAM(dist, unsigned int);')
+ bpf_text = bpf_text.replace('STORE', 'dist.increment(len);')
+
+# code substitutions
+if debug:
+ print(bpf_text)
+
+# load BPF program
+b = BPF(text=bpf_text)
+# initialize BPF & perf_events
+b = BPF(text=bpf_text)
+b.attach_perf_event(ev_type=PerfType.SOFTWARE,
+ ev_config=PerfSWConfig.CPU_CLOCK, fn_name="do_perf_event",
+ sample_period=0, sample_freq=frequency)
+
+print("Sampling run queue length... Hit Ctrl-C to end.")
+
+# output
+exiting = 0 if args.interval else 1
+dist = b.get_table("dist")
+while (1):
+ try:
+ sleep(int(args.interval))
+ except KeyboardInterrupt:
+ exiting = 1
+
+ print()
+ if args.timestamp:
+ print("%-8s\n" % strftime("%H:%M:%S"), end="")
+
+ if args.runqocc:
+ if args.cpus:
+ # run queue occupancy, per-CPU summary
+ idle = {}
+ queued = {}
+ cpumax = 0
+ for k, v in dist.items():
+ if k.cpu > cpumax:
+ cpumax = k.cpu
+ for c in range(0, cpumax + 1):
+ idle[c] = 0
+ queued[c] = 0
+ for k, v in dist.items():
+ if k.slot == 0:
+ idle[k.cpu] += v.value
+ else:
+ queued[k.cpu] += v.value
+ for c in range(0, cpumax + 1):
+ samples = idle[c] + queued[c]
+ if samples:
+ runqocc = float(queued[c]) / samples
+ else:
+ runqocc = 0
+ print("runqocc, CPU %-3d %6.2f%%" % (c, 100 * runqocc))
+
+ else:
+ # run queue occupancy, system-wide summary
+ idle = 0
+ queued = 0
+ for k, v in dist.items():
+ if k.value == 0:
+ idle += v.value
+ else:
+ queued += v.value
+ samples = idle + queued
+ if samples:
+ runqocc = float(queued) / samples
+ else:
+ runqocc = 0
+ print("runqocc: %0.2f%%" % (100 * runqocc))
+
+ else:
+ # run queue length histograms
+ dist.print_linear_hist("runqlen", "cpu")
+
+ dist.clear()
+
+ countdown -= 1
+ if exiting or countdown == 0:
+ exit()
--- /dev/null
+Demonstrations of runqlen, the Linux eBPF/bcc version.
+
+
+This program summarizes scheduler queue length as a histogram, and can also
+show run queue occupancy. It works by sampling the run queue length on all
+CPUs at 99 Hertz.
+
+As an example, here is an idle system:
+
+# ./runqlen.py
+Sampling run queue length... Hit Ctrl-C to end.
+^C
+ runqlen : count distribution
+ 0 : 1776 |****************************************|
+
+This shows a zero run queue length each time it was sampled.
+
+And now a heavily loaded system:
+
+# ./runqlen.py
+Sampling run queue length... Hit Ctrl-C to end.
+^C
+ runqlen : count distribution
+ 0 : 1068 |****************************************|
+ 1 : 642 |************************ |
+ 2 : 369 |************* |
+ 3 : 183 |****** |
+ 4 : 104 |*** |
+ 5 : 42 |* |
+ 6 : 13 | |
+ 7 : 2 | |
+ 8 : 1 | |
+
+Now there is often threads queued, with one sample reaching a queue length
+of 8. This will cause run queue latency, which can be measured by the bcc
+runqlat tool.
+
+
+Here's an example of an issue that runqlen can indentify. Starting with the
+system-wide summary:
+
+# ./runqlen.py
+Sampling run queue length... Hit Ctrl-C to end.
+^C
+ runqlen : count distribution
+ 0 : 1209 |****************************************|
+ 1 : 372 |************ |
+ 2 : 73 |** |
+ 3 : 3 | |
+ 4 : 1 | |
+ 5 : 0 | |
+ 6 : 0 | |
+ 7 : 237 |******* |
+
+This shows there is often a run queue length of 7. Now using the -C option to
+see per-CPU histograms:
+
+# ./runqlen.py -C
+Sampling run queue length... Hit Ctrl-C to end.
+^C
+
+cpu = 0
+ runqlen : count distribution
+ 0 : 257 |****************************************|
+ 1 : 64 |********* |
+ 2 : 5 | |
+ 3 : 0 | |
+ 4 : 0 | |
+ 5 : 0 | |
+ 6 : 1 | |
+
+cpu = 1
+ runqlen : count distribution
+ 0 : 226 |****************************************|
+ 1 : 90 |*************** |
+ 2 : 11 |* |
+
+cpu = 2
+ runqlen : count distribution
+ 0 : 264 |****************************************|
+ 1 : 52 |******* |
+ 2 : 8 |* |
+ 3 : 1 | |
+ 4 : 0 | |
+ 5 : 0 | |
+ 6 : 1 | |
+ 7 : 0 | |
+ 8 : 1 | |
+
+cpu = 3
+ runqlen : count distribution
+ 0 : 0 | |
+ 1 : 0 | |
+ 2 : 0 | |
+ 3 : 0 | |
+ 4 : 0 | |
+ 5 : 0 | |
+ 6 : 0 | |
+ 7 : 327 |****************************************|
+
+cpu = 4
+ runqlen : count distribution
+ 0 : 255 |****************************************|
+ 1 : 63 |********* |
+ 2 : 9 |* |
+
+cpu = 5
+ runqlen : count distribution
+ 0 : 244 |****************************************|
+ 1 : 78 |************ |
+ 2 : 3 | |
+ 3 : 2 | |
+
+cpu = 6
+ runqlen : count distribution
+ 0 : 253 |****************************************|
+ 1 : 66 |********** |
+ 2 : 6 | |
+ 3 : 1 | |
+ 4 : 1 | |
+
+cpu = 7
+ runqlen : count distribution
+ 0 : 243 |****************************************|
+ 1 : 74 |************ |
+ 2 : 6 | |
+ 3 : 1 | |
+ 4 : 0 | |
+ 5 : 1 | |
+ 6 : 2 | |
+
+The run queue length of 7 is isolated to CPU 3. It was caused by CPU binding
+(taskset). This can sometimes happen by applications that try to auto-bind
+to CPUs, leaving other CPUs idle while work is queued.
+
+
+runqlat accepts an interval and a count. For example, with -T for timestamps:
+
+# ./runqlen.py -T 1 5
+Sampling run queue length... Hit Ctrl-C to end.
+
+19:51:34
+ runqlen : count distribution
+ 0 : 635 |****************************************|
+ 1 : 142 |******** |
+ 2 : 13 | |
+ 3 : 0 | |
+ 4 : 1 | |
+
+19:51:35
+ runqlen : count distribution
+ 0 : 640 |****************************************|
+ 1 : 136 |******** |
+ 2 : 13 | |
+ 3 : 1 | |
+ 4 : 0 | |
+ 5 : 0 | |
+ 6 : 0 | |
+ 7 : 0 | |
+ 8 : 0 | |
+ 9 : 0 | |
+ 10 : 1 | |
+
+19:51:36
+ runqlen : count distribution
+ 0 : 603 |****************************************|
+ 1 : 170 |*********** |
+ 2 : 16 |* |
+ 3 : 1 | |
+ 4 : 0 | |
+ 5 : 0 | |
+ 6 : 0 | |
+ 7 : 0 | |
+ 8 : 0 | |
+ 9 : 1 | |
+
+19:51:37
+ runqlen : count distribution
+ 0 : 617 |****************************************|
+ 1 : 154 |********* |
+ 2 : 20 |* |
+ 3 : 0 | |
+ 4 : 0 | |
+ 5 : 0 | |
+ 6 : 0 | |
+ 7 : 0 | |
+ 8 : 0 | |
+ 9 : 0 | |
+ 10 : 0 | |
+ 11 : 1 | |
+
+19:51:38
+ runqlen : count distribution
+ 0 : 603 |****************************************|
+ 1 : 161 |********** |
+ 2 : 24 |* |
+ 3 : 4 | |
+
+The spikes in run queue length of 11 are likely threads waking up at the same
+time (a thundering herd), and then are scheduled and complete their execution
+quickly.
+
+
+The -O option prints run queue occupancy: the percentage of time that there
+was work queued waiting its turn. Eg:
+
+# ./runqlen.py -OT 1
+Sampling run queue length... Hit Ctrl-C to end.
+
+19:54:53
+runqocc: 41.09%
+
+19:54:54
+runqocc: 41.85%
+
+19:54:55
+runqocc: 41.47%
+
+19:54:56
+runqocc: 42.35%
+
+19:54:57
+runqocc: 40.83%
+[...]
+
+This can also be examined per-CPU:
+
+# ./runqlen.py -COT 1
+Sampling run queue length... Hit Ctrl-C to end.
+
+19:55:03
+runqocc, CPU 0 32.32%
+runqocc, CPU 1 26.26%
+runqocc, CPU 2 38.38%
+runqocc, CPU 3 100.00%
+runqocc, CPU 4 26.26%
+runqocc, CPU 5 32.32%
+runqocc, CPU 6 39.39%
+runqocc, CPU 7 46.46%
+
+19:55:04
+runqocc, CPU 0 35.00%
+runqocc, CPU 1 32.32%
+runqocc, CPU 2 37.00%
+runqocc, CPU 3 100.00%
+runqocc, CPU 4 43.43%
+runqocc, CPU 5 31.31%
+runqocc, CPU 6 28.00%
+runqocc, CPU 7 31.31%
+
+19:55:05
+runqocc, CPU 0 43.43%
+runqocc, CPU 1 32.32%
+runqocc, CPU 2 45.45%
+runqocc, CPU 3 100.00%
+runqocc, CPU 4 29.29%
+runqocc, CPU 5 36.36%
+runqocc, CPU 6 36.36%
+runqocc, CPU 7 30.30%
+
+19:55:06
+runqocc, CPU 0 40.00%
+runqocc, CPU 1 38.00%
+runqocc, CPU 2 31.31%
+runqocc, CPU 3 100.00%
+runqocc, CPU 4 31.31%
+runqocc, CPU 5 28.28%
+runqocc, CPU 6 31.00%
+runqocc, CPU 7 29.29%
+[...]
+
+
+USAGE message:
+
+# ./runqlen -h
+usage: runqlen [-h] [-T] [-O] [-C] [interval] [count]
+
+Summarize scheduler run queue length as a histogram
+
+positional arguments:
+ interval output interval, in seconds
+ count number of outputs
+
+optional arguments:
+ -h, --help show this help message and exit
+ -T, --timestamp include timestamp on output
+ -O, --runqocc report run queue occupancy
+ -C, --cpus print output for each CPU separately
+
+examples:
+ ./runqlen # summarize run queue length as a histogram
+ ./runqlen 1 10 # print 1 second summaries, 10 times
+ ./runqlen -T 1 # 1s summaries and timestamps
+ ./runqlen -O # report run queue occupancy
+ ./runqlen -C # show each CPU separately