# This script traces high scheduling delays between tasks being
# ready to run and them running on CPU after that.
#
-# USAGE: runqslower [-p PID] [-t TID] [min_us]
+# USAGE: runqslower [-p PID] [-t TID] [-P] [min_us]
#
# REQUIRES: Linux 4.9+ (BPF_PROG_TYPE_PERF_EVENT support).
#
./runqslower 1000 # trace run queue latency higher than 1000 us
./runqslower -p 123 # trace pid 123
./runqslower -t 123 # trace tid 123 (use for threads only)
+ ./runqslower -P # also show previous task comm and TID
"""
parser = argparse.ArgumentParser(
description="Trace high run queue latency",
help="trace this PID only", type=int)
thread_group.add_argument("-t", "--tid", metavar="TID", dest="tid",
help="trace this TID only", type=int)
+thread_group.add_argument("-P", "--previous", action="store_true",
+ help="also show previous task name and TID")
args = parser.parse_args()
min_us = int(args.min_us)
struct data_t {
u32 pid;
+ u32 prev_pid;
char task[TASK_COMM_LEN];
+ char prev_task[TASK_COMM_LEN];
u64 delta_us;
};
// calculate latency
int trace_run(struct pt_regs *ctx, struct task_struct *prev)
{
- u32 pid, tgid;
+ u32 pid, tgid, prev_pid;
// ivcsw: treat like an enqueue event and store timestamp
+ prev_pid = prev->pid;
if (prev->state == TASK_RUNNING) {
tgid = prev->tgid;
- pid = prev->pid;
u64 ts = bpf_ktime_get_ns();
- if (pid != 0) {
+ if (prev_pid != 0) {
if (!(FILTER_PID) && !(FILTER_TGID)) {
- start.update(&pid, &ts);
+ start.update(&prev_pid, &ts);
}
}
}
struct data_t data = {};
data.pid = pid;
+ data.prev_pid = prev_pid;
data.delta_us = delta_us;
bpf_get_current_comm(&data.task, sizeof(data.task));
+ bpf_probe_read_kernel_str(&data.prev_task, sizeof(data.prev_task), prev->comm);
// output
events.perf_submit(ctx, &data, sizeof(data));
// TP_PROTO(bool preempt, struct task_struct *prev, struct task_struct *next)
struct task_struct *prev = (struct task_struct *)ctx->args[1];
struct task_struct *next= (struct task_struct *)ctx->args[2];
- u32 tgid, pid;
+ u32 tgid, pid, prev_pid;
long state;
// ivcsw: treat like an enqueue event and store timestamp
bpf_probe_read_kernel(&state, sizeof(long), (const void *)&prev->state);
+ bpf_probe_read_kernel(&prev_pid, sizeof(prev->pid), &prev->pid);
if (state == TASK_RUNNING) {
bpf_probe_read_kernel(&tgid, sizeof(prev->tgid), &prev->tgid);
- bpf_probe_read_kernel(&pid, sizeof(prev->pid), &prev->pid);
u64 ts = bpf_ktime_get_ns();
- if (pid != 0) {
+ if (prev_pid != 0) {
if (!(FILTER_PID) && !(FILTER_TGID)) {
- start.update(&pid, &ts);
+ start.update(&prev_pid, &ts);
}
}
struct data_t data = {};
data.pid = pid;
+ data.prev_pid = prev_pid;
data.delta_us = delta_us;
bpf_probe_read_kernel_str(&data.task, sizeof(data.task), next->comm);
+ bpf_probe_read_kernel_str(&data.prev_task, sizeof(data.prev_task), prev->comm);
// output
events.perf_submit(ctx, &data, sizeof(data));
# process event
def print_event(cpu, data, size):
event = b["events"].event(data)
- print("%-8s %-16s %-6s %14s" % (strftime("%H:%M:%S"), event.task, event.pid, event.delta_us))
+ if args.previous:
+ print("%-8s %-16s %-6s %14s %-16s %-6s" % (strftime("%H:%M:%S"), event.task, event.pid, event.delta_us, event.prev_task, event.prev_pid))
+ else:
+ print("%-8s %-16s %-6s %14s" % (strftime("%H:%M:%S"), event.task, event.pid, event.delta_us))
# load BPF program
b = BPF(text=bpf_text)
fn_name="trace_run")
print("Tracing run queue latency higher than %d us" % min_us)
-print("%-8s %-16s %-6s %14s" % ("TIME", "COMM", "TID", "LAT(us)"))
+if args.previous:
+ print("%-8s %-16s %-6s %14s %-16s %-6s" % ("TIME", "COMM", "TID", "LAT(us)", "PREV COMM", "PREV TID"))
+else:
+ print("%-8s %-16s %-6s %14s" % ("TIME", "COMM", "TID", "LAT(us)"))
# read events
b["events"].open_perf_buffer(print_event, page_cnt=64)