From: Vicent Marti Date: Fri, 25 Mar 2016 16:21:44 +0000 (+0100) Subject: offcputime: Resolve stacks using a StackTrace table X-Git-Tag: v0.2.0~135^2~3 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=e82fb1baa26b0e1e524923765ba70d5872e51eef;p=platform%2Fupstream%2Fbcc.git offcputime: Resolve stacks using a StackTrace table The manual walking for kernel stacks in the eBPF code has been replaced with a `BPF_STACK_TRACE` table; the stack ID is now stored as an integer in the main key. Thanks to the `StackTrace.walk` iterator, the printing code can also be significantly simplified for both folded and full stack modes. --- diff --git a/man/man8/offcputime.8 b/man/man8/offcputime.8 index 39e93a7..0c6f75a 100644 --- a/man/man8/offcputime.8 +++ b/man/man8/offcputime.8 @@ -22,10 +22,8 @@ especially the -f format, can be used to generate an "off-CPU time flame graph". See http://www.brendangregg.com/FlameGraphs/offcpuflamegraphs.html -The stack depth is currently limited to 20, and the stack traces are kernel -mode only. Check for newer versions where either may be improved. - -This currently only works on x86_64. Check for future versions. +This tool only works on Linux 4.6+. It uses the new `BPF_STACK_TRACE` table +APIs to generate the in-kernel stack traces. .SH REQUIREMENTS CONFIG_BPF and bcc. .SH OPTIONS diff --git a/tools/offcputime.py b/tools/offcputime.py index b98e920..84f363e 100755 --- a/tools/offcputime.py +++ b/tools/offcputime.py @@ -9,8 +9,6 @@ # as a proof of concept. This implementation should be replaced in the future # with an appropriate bpf_ call, when available. # -# Currently limited to a stack trace depth of 21 (maxdepth + 1). -# # Copyright 2016 Netflix, Inc. # Licensed under the Apache License, Version 2.0 (the "License") # @@ -48,7 +46,6 @@ args = parser.parse_args() folded = args.folded duration = int(args.duration) debug = 0 -maxdepth = 20 # and MAXDEPTH if args.pid and args.useronly: print("ERROR: use either -p or -u.") exit() @@ -62,31 +59,15 @@ bpf_text = """ #include #include -#define MAXDEPTH 20 #define MINBLOCK_US 1 struct key_t { char name[TASK_COMM_LEN]; - // Skip saving the ip - u64 ret[MAXDEPTH]; + int stack_id; }; BPF_HASH(counts, struct key_t); BPF_HASH(start, u32); - -static u64 get_frame(u64 *bp) { - if (*bp) { - // The following stack walker is x86_64 specific - u64 ret = 0; - if (bpf_probe_read(&ret, sizeof(ret), (void *)(*bp+8))) - return 0; - if (bpf_probe_read(bp, sizeof(*bp), (void *)*bp)) - *bp = 0; - if (ret < __START_KERNEL_map) - return 0; - return ret; - } - return 0; -} +BPF_STACK_TRACE(stack_traces, 1024) int oncpu(struct pt_regs *ctx, struct task_struct *prev) { u32 pid; @@ -111,36 +92,12 @@ int oncpu(struct pt_regs *ctx, struct task_struct *prev) { return 0; // create map key - u64 zero = 0, *val, bp = 0; - int depth = 0; + u64 zero = 0, *val; struct key_t key = {}; + bpf_get_current_comm(&key.name, sizeof(key.name)); - bp = ctx->bp; - - // unrolled loop (MAXDEPTH): - if (!(key.ret[depth++] = get_frame(&bp))) goto out; - if (!(key.ret[depth++] = get_frame(&bp))) goto out; - if (!(key.ret[depth++] = get_frame(&bp))) goto out; - if (!(key.ret[depth++] = get_frame(&bp))) goto out; - if (!(key.ret[depth++] = get_frame(&bp))) goto out; - if (!(key.ret[depth++] = get_frame(&bp))) goto out; - if (!(key.ret[depth++] = get_frame(&bp))) goto out; - if (!(key.ret[depth++] = get_frame(&bp))) goto out; - if (!(key.ret[depth++] = get_frame(&bp))) goto out; - if (!(key.ret[depth++] = get_frame(&bp))) goto out; - - if (!(key.ret[depth++] = get_frame(&bp))) goto out; - if (!(key.ret[depth++] = get_frame(&bp))) goto out; - if (!(key.ret[depth++] = get_frame(&bp))) goto out; - if (!(key.ret[depth++] = get_frame(&bp))) goto out; - if (!(key.ret[depth++] = get_frame(&bp))) goto out; - if (!(key.ret[depth++] = get_frame(&bp))) goto out; - if (!(key.ret[depth++] = get_frame(&bp))) goto out; - if (!(key.ret[depth++] = get_frame(&bp))) goto out; - if (!(key.ret[depth++] = get_frame(&bp))) goto out; - if (!(key.ret[depth++] = get_frame(&bp))) goto out; - -out: + key.stack_id = stack_traces.get_stackid(ctx, BPF_F_REUSE_STACKID); + val = counts.lookup_or_init(&key, &zero); (*val) += delta; return 0; @@ -183,24 +140,17 @@ while (1): if not folded: print() counts = b.get_table("counts") + stack_traces = b.get_table("stack_traces") for k, v in sorted(counts.items(), key=lambda counts: counts[1].value): if folded: # print folded stack output - line = k.name.decode() + ";" - for i in reversed(range(0, maxdepth)): - if k.ret[i] == 0: - continue - line = line + b.ksym(k.ret[i]) - if i != 0: - line = line + ";" - print("%s %d" % (line, v.value)) + stack = list(stack_traces.walk(k.stack_id))[1:] + line = [k.name.decode()] + [b.ksym(addr) for addr in reversed(stack)] + print("%s %d" % (";".join(line), v.value)) else: # print default multi-line stack output - for i in range(0, maxdepth): - if k.ret[i] == 0: - break - print(" %-16x %s" % (k.ret[i], - b.ksym(k.ret[i]))) + for addr in stack_traces.walk(k.stack_id): + print(" %-16x %s" % (addr, b.ksym(addr))) print(" %-16s %s" % ("-", k.name)) print(" %d\n" % v.value) counts.clear() diff --git a/tools/old/offcputime.py b/tools/old/offcputime.py new file mode 100755 index 0000000..b98e920 --- /dev/null +++ b/tools/old/offcputime.py @@ -0,0 +1,210 @@ +#!/usr/bin/python +# +# offcputime Summarize off-CPU time by kernel stack trace +# For Linux, uses BCC, eBPF. +# +# USAGE: offcputime [-h] [-u] [-p PID] [-v] [-f] [duration] +# +# The current implementation uses an unrolled loop for x86_64, and was written +# as a proof of concept. This implementation should be replaced in the future +# with an appropriate bpf_ call, when available. +# +# Currently limited to a stack trace depth of 21 (maxdepth + 1). +# +# Copyright 2016 Netflix, Inc. +# Licensed under the Apache License, Version 2.0 (the "License") +# +# 13-Jan-2016 Brendan Gregg Created this. + +from __future__ import print_function +from bcc import BPF +from time import sleep, strftime +import argparse +import signal + +# arguments +examples = """examples: + ./offcputime # trace off-CPU stack time until Ctrl-C + ./offcputime 5 # trace for 5 seconds only + ./offcputime -f 5 # 5 seconds, and output in folded format + ./offcputime -u # don't include kernel threads (user only) + ./offcputime -p 185 # trace fo PID 185 only +""" +parser = argparse.ArgumentParser( + description="Summarize off-CPU time by kernel stack trace", + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=examples) +parser.add_argument("-u", "--useronly", action="store_true", + help="user threads only (no kernel threads)") +parser.add_argument("-p", "--pid", + help="trace this PID only") +parser.add_argument("-v", "--verbose", action="store_true", + help="show raw addresses") +parser.add_argument("-f", "--folded", action="store_true", + help="output folded format") +parser.add_argument("duration", nargs="?", default=99999999, + help="duration of trace, in seconds") +args = parser.parse_args() +folded = args.folded +duration = int(args.duration) +debug = 0 +maxdepth = 20 # and MAXDEPTH +if args.pid and args.useronly: + print("ERROR: use either -p or -u.") + exit() + +# signal handler +def signal_ignore(signal, frame): + print() + +# define BPF program +bpf_text = """ +#include +#include + +#define MAXDEPTH 20 +#define MINBLOCK_US 1 + +struct key_t { + char name[TASK_COMM_LEN]; + // Skip saving the ip + u64 ret[MAXDEPTH]; +}; +BPF_HASH(counts, struct key_t); +BPF_HASH(start, u32); + +static u64 get_frame(u64 *bp) { + if (*bp) { + // The following stack walker is x86_64 specific + u64 ret = 0; + if (bpf_probe_read(&ret, sizeof(ret), (void *)(*bp+8))) + return 0; + if (bpf_probe_read(bp, sizeof(*bp), (void *)*bp)) + *bp = 0; + if (ret < __START_KERNEL_map) + return 0; + return ret; + } + return 0; +} + +int oncpu(struct pt_regs *ctx, struct task_struct *prev) { + u32 pid; + u64 ts, *tsp; + + // record previous thread sleep time + if (FILTER) { + pid = prev->pid; + ts = bpf_ktime_get_ns(); + start.update(&pid, &ts); + } + + // calculate current thread's delta time + pid = bpf_get_current_pid_tgid(); + tsp = start.lookup(&pid); + if (tsp == 0) + return 0; // missed start or filtered + u64 delta = bpf_ktime_get_ns() - *tsp; + start.delete(&pid); + delta = delta / 1000; + if (delta < MINBLOCK_US) + return 0; + + // create map key + u64 zero = 0, *val, bp = 0; + int depth = 0; + struct key_t key = {}; + bpf_get_current_comm(&key.name, sizeof(key.name)); + bp = ctx->bp; + + // unrolled loop (MAXDEPTH): + if (!(key.ret[depth++] = get_frame(&bp))) goto out; + if (!(key.ret[depth++] = get_frame(&bp))) goto out; + if (!(key.ret[depth++] = get_frame(&bp))) goto out; + if (!(key.ret[depth++] = get_frame(&bp))) goto out; + if (!(key.ret[depth++] = get_frame(&bp))) goto out; + if (!(key.ret[depth++] = get_frame(&bp))) goto out; + if (!(key.ret[depth++] = get_frame(&bp))) goto out; + if (!(key.ret[depth++] = get_frame(&bp))) goto out; + if (!(key.ret[depth++] = get_frame(&bp))) goto out; + if (!(key.ret[depth++] = get_frame(&bp))) goto out; + + if (!(key.ret[depth++] = get_frame(&bp))) goto out; + if (!(key.ret[depth++] = get_frame(&bp))) goto out; + if (!(key.ret[depth++] = get_frame(&bp))) goto out; + if (!(key.ret[depth++] = get_frame(&bp))) goto out; + if (!(key.ret[depth++] = get_frame(&bp))) goto out; + if (!(key.ret[depth++] = get_frame(&bp))) goto out; + if (!(key.ret[depth++] = get_frame(&bp))) goto out; + if (!(key.ret[depth++] = get_frame(&bp))) goto out; + if (!(key.ret[depth++] = get_frame(&bp))) goto out; + if (!(key.ret[depth++] = get_frame(&bp))) goto out; + +out: + val = counts.lookup_or_init(&key, &zero); + (*val) += delta; + return 0; +} +""" +if args.pid: + filter = 'pid == %s' % args.pid +elif args.useronly: + filter = '!(prev->flags & PF_KTHREAD)' +else: + filter = '1' +bpf_text = bpf_text.replace('FILTER', filter) +if debug: + print(bpf_text) + +# initialize BPF +b = BPF(text=bpf_text) +b.attach_kprobe(event="finish_task_switch", fn_name="oncpu") +matched = b.num_open_kprobes() +if matched == 0: + print("0 functions traced. Exiting.") + exit() + +# header +if not folded: + print("Tracing off-CPU time (us) by kernel stack", end="") + if duration < 99999999: + print(" for %d secs." % duration) + else: + print("... Hit Ctrl-C to end.") + +# output +while (1): + try: + sleep(duration) + except KeyboardInterrupt: + # as cleanup can take many seconds, trap Ctrl-C: + signal.signal(signal.SIGINT, signal_ignore) + + if not folded: + print() + counts = b.get_table("counts") + for k, v in sorted(counts.items(), key=lambda counts: counts[1].value): + if folded: + # print folded stack output + line = k.name.decode() + ";" + for i in reversed(range(0, maxdepth)): + if k.ret[i] == 0: + continue + line = line + b.ksym(k.ret[i]) + if i != 0: + line = line + ";" + print("%s %d" % (line, v.value)) + else: + # print default multi-line stack output + for i in range(0, maxdepth): + if k.ret[i] == 0: + break + print(" %-16x %s" % (k.ret[i], + b.ksym(k.ret[i]))) + print(" %-16s %s" % ("-", k.name)) + print(" %d\n" % v.value) + counts.clear() + + if not folded: + print("Detaching...") + exit()