handle ENOMEM in tools/offcputime
authorAndrew Birchall <abirchall@fb.com>
Tue, 3 May 2016 23:54:00 +0000 (16:54 -0700)
committerAndrew Birchall <abirchall@fb.com>
Mon, 16 May 2016 18:11:53 +0000 (11:11 -0700)
Summary:
BPF_STACK_TRACE(_name, _size) will allocate space for _size stack traces
(see https://github.com/torvalds/linux/blob/master/kernel/bpf/stackmap.c#L30-L50).

If we've already used all of this space, subsequent calls to bpf_get_stackid()
will return -ENOMEM (see https://github.com/torvalds/linux/blob/master/kernel/bpf/stackmap.c#L173-L176).

This causes our BPF bytecode to store this value in key_t.stack_id and
subsequently causes our python application to crash due to a KeyError when
invoking stack_traces.walk(k.stack_id).

Let's avoid calling stack_traces.walk(k.stack_id) with back stackid's

Test Plan:
Run offcputime.py in an extreme case; with space for only a single stack trace
```
devbig680[bcc](tools): sed_in_file 's/BPF_STACK_TRACE(stack_traces, 1024)/BPF_STACK_TRACE(stack_traces, 1)/' tools/offcputime.py && \
> ~/bcc_run_tool.sh offcputime -f 5; \
> git reset --hard HEAD
swapper/30;start_secondary;cpu_startup_entry;schedule_preempt_disabled;schedule 496
swapper/26;start_secondary;cpu_startup_entry;schedule_preempt_disabled;schedule 553
swapper/28;start_secondary;cpu_startup_entry;schedule_preempt_disabled;schedule 604
swapper/31;start_secondary;cpu_startup_entry;schedule_preempt_disabled;schedule 692
swapper/23;start_secondary;cpu_startup_entry;schedule_preempt_disabled;schedule 713
swapper/18;start_secondary;cpu_startup_entry;schedule_preempt_disabled;schedule 919
swapper/16;start_secondary;cpu_startup_entry;schedule_preempt_disabled;schedule 1051
swapper/20;start_secondary;cpu_startup_entry;schedule_preempt_disabled;schedule 1056
swapper/21;start_secondary;cpu_startup_entry;schedule_preempt_disabled;schedule 1585
swapper/24;start_secondary;cpu_startup_entry;schedule_preempt_disabled;schedule 1597
swapper/27;start_secondary;cpu_startup_entry;schedule_preempt_disabled;schedule 1610
swapper/17;start_secondary;cpu_startup_entry;schedule_preempt_disabled;schedule 1674
swapper/22;start_secondary;cpu_startup_entry;schedule_preempt_disabled;schedule 2390
swapper/25;start_secondary;cpu_startup_entry;schedule_preempt_disabled;schedule 2574
swapper/19;start_secondary;cpu_startup_entry;schedule_preempt_disabled;schedule 2589
swapper/29;start_secondary;cpu_startup_entry;schedule_preempt_disabled;schedule 8428
swapper/8;start_secondary;cpu_startup_entry;schedule_preempt_disabled;schedule 15272
swapper/15;start_secondary;cpu_startup_entry;schedule_preempt_disabled;schedule 15591
swapper/11;start_secondary;cpu_startup_entry;schedule_preempt_disabled;schedule 17934
swapper/9;start_secondary;cpu_startup_entry;schedule_preempt_disabled;schedule 18100
swapper/14;start_secondary;cpu_startup_entry;schedule_preempt_disabled;schedule 18266
swapper/10;start_secondary;cpu_startup_entry;schedule_preempt_disabled;schedule 20124
swapper/12;start_secondary;cpu_startup_entry;schedule_preempt_disabled;schedule 20887
swapper/13;start_secondary;cpu_startup_entry;schedule_preempt_disabled;schedule 23453
swapper/3;start_secondary;cpu_startup_entry;schedule_preempt_disabled;schedule 27296
swapper/5;start_secondary;cpu_startup_entry;schedule_preempt_disabled;schedule 29094
swapper/6;start_secondary;cpu_startup_entry;schedule_preempt_disabled;schedule 29799
swapper/7;start_secondary;cpu_startup_entry;schedule_preempt_disabled;schedule 31522
swapper/1;start_secondary;cpu_startup_entry;schedule_preempt_disabled;schedule 32269
swapper/4;start_secondary;cpu_startup_entry;schedule_preempt_disabled;schedule 35585
swapper/2;start_secondary;cpu_startup_entry;schedule_preempt_disabled;schedule 37627
WARNING: 249 stack traces could not be displayed. Consider increasing stack trace storage size.
HEAD is now at d3365e9 [RFC] handle ENOMEM in tools/offcputime`
```

tools/offcputime.py
tools/offcputime_example.txt

index 84f363e..e8f2120 100755 (executable)
@@ -16,6 +16,7 @@
 
 from __future__ import print_function
 from bcc import BPF
+from sys import stderr
 from time import sleep, strftime
 import argparse
 import signal
@@ -129,32 +130,46 @@ if not folded:
     else:
         print("... Hit Ctrl-C to end.")
 
-# output
-while (1):
-    try:
-        sleep(duration)
-    except KeyboardInterrupt:
-        # as cleanup can take many seconds, trap Ctrl-C:
-        signal.signal(signal.SIGINT, signal_ignore)
-
-    if not folded:
-        print()
-    counts = b.get_table("counts")
-    stack_traces = b.get_table("stack_traces")
-    for k, v in sorted(counts.items(), key=lambda counts: counts[1].value):
-        if folded:
-            # print folded stack output
-            stack = list(stack_traces.walk(k.stack_id))[1:]
-            line = [k.name.decode()] + [b.ksym(addr) for addr in reversed(stack)]
-            print("%s %d" % (";".join(line), v.value))
-        else:
-            # print default multi-line stack output
-            for addr in stack_traces.walk(k.stack_id):
-                print("    %-16x %s" % (addr, b.ksym(addr)))
-            print("    %-16s %s" % ("-", k.name))
-            print("        %d\n" % v.value)
-    counts.clear()
-
-    if not folded:
-        print("Detaching...")
-    exit()
+try:
+    sleep(duration)
+except KeyboardInterrupt:
+    # as cleanup can take many seconds, trap Ctrl-C:
+    signal.signal(signal.SIGINT, signal_ignore)
+
+if not folded:
+    print()
+
+missing_stacks = 0
+counts = b.get_table("counts")
+stack_traces = b.get_table("stack_traces")
+for k, v in sorted(counts.items(), key=lambda counts: counts[1].value):
+    """
+    bpf_get_stackid will return a negative value in the case of an error
+
+    BPF_STACK_TRACE(_name, _size) will allocate space for _size stack traces
+    on each CPU. -ENOMEM will be returned when this limit is reached within a
+    single CPU.
+    """
+    if k.stack_id < 0:
+        missing_stacks += 1
+        continue
+
+    stack = stack_traces.walk(k.stack_id)
+
+    if folded:
+        # print folded stack output
+        stack = list(stack)[1:]
+        line = [k.name.decode()] + [b.ksym(addr) for addr in reversed(stack)]
+        print("%s %d" % (";".join(line), v.value))
+    else:
+        # print default multi-line stack output
+        for addr in stack:
+            print("    %-16x %s" % (addr, b.ksym(addr)))
+        print("    %-16s %s" % ("-", k.name))
+        print("        %d\n" % v.value)
+
+if missing_stacks > 0:
+    print(("WARNING: %d stack traces could not be displayed. "
+        "You may be running out of storage space for stack traces.") %
+        missing_stacks,
+        file=stderr)
index 75b07e2..deab802 100644 (file)
@@ -586,8 +586,6 @@ Tracing off-CPU time (us) by kernel stack... Hit Ctrl-C to end.
     supervise
         81670888
 
-Detaching...
-
 The last few stack traces aren't very interesting, since they are threads that
 are ofter blocked off-CPU waiting for work.
 
@@ -619,8 +617,6 @@ Tracing off-CPU time (us) by kernel stack... Hit Ctrl-C to end.
     dd
         2405710
 
-Detaching...
-
 The stack trace shows "dd" is blocked waiting on disk I/O, as expected, for a
 total of 2.4 seconds during tracing.
 
@@ -645,8 +641,6 @@ Tracing off-CPU time (us) by kernel stack for 5 secs.
     dd
         4413909
 
-Detaching...
-
 Here, dd was blocked for 4.4 seconds out of 5. Or put differently, likely
 on-CPU for about 12% of the time. Which matches the ratio seen by time(1):