From: Andrew Birchall Date: Tue, 3 May 2016 23:54:00 +0000 (-0700) Subject: handle ENOMEM in tools/offcputime X-Git-Tag: v0.2.0~85^2~1 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=ee7e5b46fecfa66b44fadf4ab546f0d63459a3e8;p=platform%2Fupstream%2Fbcc.git handle ENOMEM in tools/offcputime Summary: BPF_STACK_TRACE(_name, _size) will allocate space for _size stack traces (see https://github.com/torvalds/linux/blob/master/kernel/bpf/stackmap.c#L30-L50). If we've already used all of this space, subsequent calls to bpf_get_stackid() will return -ENOMEM (see https://github.com/torvalds/linux/blob/master/kernel/bpf/stackmap.c#L173-L176). This causes our BPF bytecode to store this value in key_t.stack_id and subsequently causes our python application to crash due to a KeyError when invoking stack_traces.walk(k.stack_id). Let's avoid calling stack_traces.walk(k.stack_id) with back stackid's Test Plan: Run offcputime.py in an extreme case; with space for only a single stack trace ``` devbig680[bcc](tools): sed_in_file 's/BPF_STACK_TRACE(stack_traces, 1024)/BPF_STACK_TRACE(stack_traces, 1)/' tools/offcputime.py && \ > ~/bcc_run_tool.sh offcputime -f 5; \ > git reset --hard HEAD swapper/30;start_secondary;cpu_startup_entry;schedule_preempt_disabled;schedule 496 swapper/26;start_secondary;cpu_startup_entry;schedule_preempt_disabled;schedule 553 swapper/28;start_secondary;cpu_startup_entry;schedule_preempt_disabled;schedule 604 swapper/31;start_secondary;cpu_startup_entry;schedule_preempt_disabled;schedule 692 swapper/23;start_secondary;cpu_startup_entry;schedule_preempt_disabled;schedule 713 swapper/18;start_secondary;cpu_startup_entry;schedule_preempt_disabled;schedule 919 swapper/16;start_secondary;cpu_startup_entry;schedule_preempt_disabled;schedule 1051 swapper/20;start_secondary;cpu_startup_entry;schedule_preempt_disabled;schedule 1056 swapper/21;start_secondary;cpu_startup_entry;schedule_preempt_disabled;schedule 1585 swapper/24;start_secondary;cpu_startup_entry;schedule_preempt_disabled;schedule 1597 swapper/27;start_secondary;cpu_startup_entry;schedule_preempt_disabled;schedule 1610 swapper/17;start_secondary;cpu_startup_entry;schedule_preempt_disabled;schedule 1674 swapper/22;start_secondary;cpu_startup_entry;schedule_preempt_disabled;schedule 2390 swapper/25;start_secondary;cpu_startup_entry;schedule_preempt_disabled;schedule 2574 swapper/19;start_secondary;cpu_startup_entry;schedule_preempt_disabled;schedule 2589 swapper/29;start_secondary;cpu_startup_entry;schedule_preempt_disabled;schedule 8428 swapper/8;start_secondary;cpu_startup_entry;schedule_preempt_disabled;schedule 15272 swapper/15;start_secondary;cpu_startup_entry;schedule_preempt_disabled;schedule 15591 swapper/11;start_secondary;cpu_startup_entry;schedule_preempt_disabled;schedule 17934 swapper/9;start_secondary;cpu_startup_entry;schedule_preempt_disabled;schedule 18100 swapper/14;start_secondary;cpu_startup_entry;schedule_preempt_disabled;schedule 18266 swapper/10;start_secondary;cpu_startup_entry;schedule_preempt_disabled;schedule 20124 swapper/12;start_secondary;cpu_startup_entry;schedule_preempt_disabled;schedule 20887 swapper/13;start_secondary;cpu_startup_entry;schedule_preempt_disabled;schedule 23453 swapper/3;start_secondary;cpu_startup_entry;schedule_preempt_disabled;schedule 27296 swapper/5;start_secondary;cpu_startup_entry;schedule_preempt_disabled;schedule 29094 swapper/6;start_secondary;cpu_startup_entry;schedule_preempt_disabled;schedule 29799 swapper/7;start_secondary;cpu_startup_entry;schedule_preempt_disabled;schedule 31522 swapper/1;start_secondary;cpu_startup_entry;schedule_preempt_disabled;schedule 32269 swapper/4;start_secondary;cpu_startup_entry;schedule_preempt_disabled;schedule 35585 swapper/2;start_secondary;cpu_startup_entry;schedule_preempt_disabled;schedule 37627 WARNING: 249 stack traces could not be displayed. Consider increasing stack trace storage size. HEAD is now at d3365e9 [RFC] handle ENOMEM in tools/offcputime` ``` --- diff --git a/tools/offcputime.py b/tools/offcputime.py index 84f363e..e8f2120 100755 --- a/tools/offcputime.py +++ b/tools/offcputime.py @@ -16,6 +16,7 @@ from __future__ import print_function from bcc import BPF +from sys import stderr from time import sleep, strftime import argparse import signal @@ -129,32 +130,46 @@ if not folded: else: print("... Hit Ctrl-C to end.") -# output -while (1): - try: - sleep(duration) - except KeyboardInterrupt: - # as cleanup can take many seconds, trap Ctrl-C: - signal.signal(signal.SIGINT, signal_ignore) - - if not folded: - print() - counts = b.get_table("counts") - stack_traces = b.get_table("stack_traces") - for k, v in sorted(counts.items(), key=lambda counts: counts[1].value): - if folded: - # print folded stack output - stack = list(stack_traces.walk(k.stack_id))[1:] - line = [k.name.decode()] + [b.ksym(addr) for addr in reversed(stack)] - print("%s %d" % (";".join(line), v.value)) - else: - # print default multi-line stack output - for addr in stack_traces.walk(k.stack_id): - print(" %-16x %s" % (addr, b.ksym(addr))) - print(" %-16s %s" % ("-", k.name)) - print(" %d\n" % v.value) - counts.clear() - - if not folded: - print("Detaching...") - exit() +try: + sleep(duration) +except KeyboardInterrupt: + # as cleanup can take many seconds, trap Ctrl-C: + signal.signal(signal.SIGINT, signal_ignore) + +if not folded: + print() + +missing_stacks = 0 +counts = b.get_table("counts") +stack_traces = b.get_table("stack_traces") +for k, v in sorted(counts.items(), key=lambda counts: counts[1].value): + """ + bpf_get_stackid will return a negative value in the case of an error + + BPF_STACK_TRACE(_name, _size) will allocate space for _size stack traces + on each CPU. -ENOMEM will be returned when this limit is reached within a + single CPU. + """ + if k.stack_id < 0: + missing_stacks += 1 + continue + + stack = stack_traces.walk(k.stack_id) + + if folded: + # print folded stack output + stack = list(stack)[1:] + line = [k.name.decode()] + [b.ksym(addr) for addr in reversed(stack)] + print("%s %d" % (";".join(line), v.value)) + else: + # print default multi-line stack output + for addr in stack: + print(" %-16x %s" % (addr, b.ksym(addr))) + print(" %-16s %s" % ("-", k.name)) + print(" %d\n" % v.value) + +if missing_stacks > 0: + print(("WARNING: %d stack traces could not be displayed. " + "You may be running out of storage space for stack traces.") % + missing_stacks, + file=stderr) diff --git a/tools/offcputime_example.txt b/tools/offcputime_example.txt index 75b07e2..deab802 100644 --- a/tools/offcputime_example.txt +++ b/tools/offcputime_example.txt @@ -586,8 +586,6 @@ Tracing off-CPU time (us) by kernel stack... Hit Ctrl-C to end. supervise 81670888 -Detaching... - The last few stack traces aren't very interesting, since they are threads that are ofter blocked off-CPU waiting for work. @@ -619,8 +617,6 @@ Tracing off-CPU time (us) by kernel stack... Hit Ctrl-C to end. dd 2405710 -Detaching... - The stack trace shows "dd" is blocked waiting on disk I/O, as expected, for a total of 2.4 seconds during tracing. @@ -645,8 +641,6 @@ Tracing off-CPU time (us) by kernel stack for 5 secs. dd 4413909 -Detaching... - Here, dd was blocked for 4.4 seconds out of 5. Or put differently, likely on-CPU for about 12% of the time. Which matches the ratio seen by time(1):