tools/offcputime Warn user when an event skipped by negative duration
authorYuto Kawamura <kawamuray.dadada@gmail.com>
Mon, 18 Jan 2021 11:30:34 +0000 (20:30 +0900)
committeryonghong-song <ys114321@gmail.com>
Thu, 21 Jan 2021 06:50:09 +0000 (22:50 -0800)
tools/offcputime.py

index c1bf79c0d13bd90de67343205dcdb3d2158e8e65..7ba5dc51bcb5b3394cef5a1413a9fda176603aef 100755 (executable)
@@ -13,7 +13,7 @@
 from __future__ import print_function
 from bcc import BPF
 from sys import stderr
-from time import sleep, strftime
+from time import strftime
 import argparse
 import errno
 import signal
@@ -126,6 +126,14 @@ BPF_HASH(counts, struct key_t);
 BPF_HASH(start, u32);
 BPF_STACK_TRACE(stack_traces, STACK_STORAGE_SIZE);
 
+struct warn_event_t {
+    u32 pid;
+    u32 tgid;
+    u32 t_start;
+    u32 t_end;
+};
+BPF_PERF_OUTPUT(warn_events);
+
 int oncpu(struct pt_regs *ctx, struct task_struct *prev) {
     u32 pid = prev->pid;
     u32 tgid = prev->tgid;
@@ -150,6 +158,13 @@ int oncpu(struct pt_regs *ctx, struct task_struct *prev) {
     u64 t_end = bpf_ktime_get_ns();
     start.delete(&pid);
     if (t_start > t_end) {
+        struct warn_event_t event = {
+            .pid = pid,
+            .tgid = tgid,
+            .t_start = t_start,
+            .t_end = t_end,
+        };
+        warn_events.perf_submit(ctx, &event, sizeof(event));
         return 0;
     }
     u64 delta = t_end - t_start;
@@ -251,8 +266,23 @@ if not folded:
     else:
         print("... Hit Ctrl-C to end.")
 
+
+def print_warn_event(cpu, data, size):
+    event = b["warn_events"].event(data)
+    # See https://github.com/iovisor/bcc/pull/3227 for those wondering how can this happen.
+    print("WARN: Skipped an event with negative duration: pid:%d, tgid:%d, off-cpu:%d, on-cpu:%d"
+          % (event.pid, event.tgid, event.t_start, event.t_end),
+          file=stderr)
+
+b["warn_events"].open_perf_buffer(print_warn_event)
 try:
-    sleep(duration)
+    duration_ms = duration * 1000
+    start_time_ms = int(BPF.monotonic_time() / 1000000)
+    while True:
+        elapsed_ms = int(BPF.monotonic_time() / 1000000) - start_time_ms
+        if elapsed_ms >= duration_ms:
+            break
+        b.perf_buffer_poll(timeout=duration_ms - elapsed_ms)
 except KeyboardInterrupt:
     # as cleanup can take many seconds, trap Ctrl-C:
     signal.signal(signal.SIGINT, signal_ignore)