From: Vicent Marti Date: Sun, 27 Mar 2016 16:39:18 +0000 (+0200) Subject: stackcount: Resolve stacks using a StackTrace table X-Git-Tag: v0.2.0~135^2 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=b4ebed0dfe3894534ded7475ce1d6dfa119e7208;p=platform%2Fupstream%2Fbcc.git stackcount: Resolve stacks using a StackTrace table The changes for this script are minimal: the inline C probe has been _significantly_ simplified, and should now perform better since the stack walk happens in native code inside the kernel thanks to the BPF_STACK_TRACE table. The output of the tool should be virtually identical, so the man page and included examples have essentially no changes. --- diff --git a/man/man8/stackcount.8 b/man/man8/stackcount.8 index dbc816f..ca22859 100644 --- a/man/man8/stackcount.8 +++ b/man/man8/stackcount.8 @@ -12,10 +12,8 @@ and their occurrence counts. The pattern is a string with optional '*' wildcards, similar to file globbing. If you'd prefer to use regular expressions, use the \-r option. -The stack depth is currently limited to 10 (+1 for the current instruction -pointer). +This tool only works on Linux 4.6+. Stack traces are obtained using the new `BPF_STACK_TRACE` APIs. -This currently only works on x86_64. Check for future versions. .SH REQUIREMENTS CONFIG_BPF and bcc. .SH OPTIONS @@ -82,13 +80,12 @@ Only count stacks when PID 185 is on-CPU: This summarizes unique stack traces in-kernel for efficiency, allowing it to trace a higher rate of function calls than methods that post-process in user space. The stack trace data is only copied to user space when the output is -printed, which usually only happens once. Given these techniques, I'd suspect -that call rates of < 10,000/sec would incur negligible overhead (for this -current version; future versions may improve this). Beyond that, -there will be a point where the overhead is measurable, as this does add -a number of instructions to each function call to walk and save stacks. -Test before production use. You can also use funccount to get a handle on -function call rates first. +printed, which usually only happens once. The stack walking also happens in an +optimized code path in the kernel thanks to the new BPF_STACK_TRACE table APIs, +which should be more efficient than the manual walker in the eBPF tracer which +older versions of this script used. With this in mind, call rates of < +10,000/sec would incur negligible overhead. Test before production use. You can +also use funccount to get a handle on function call rates first. .SH SOURCE This is from bcc. .IP diff --git a/tools/old/stackcount.py b/tools/old/stackcount.py new file mode 100755 index 0000000..7b4aa43 --- /dev/null +++ b/tools/old/stackcount.py @@ -0,0 +1,179 @@ +#!/usr/bin/python +# +# stackcount Count kernel function calls and their stack traces. +# For Linux, uses BCC, eBPF. +# +# USAGE: stackcount [-h] [-p PID] [-i INTERVAL] [-T] [-r] pattern +# +# The pattern is a string with optional '*' wildcards, similar to file +# globbing. If you'd prefer to use regular expressions, use the -r option. +# +# The current implementation uses an unrolled loop for x86_64, and was written +# as a proof of concept. This implementation should be replaced in the future +# with an appropriate bpf_ call, when available. +# +# Currently limited to a stack trace depth of 11 (maxdepth + 1). +# +# Copyright 2016 Netflix, Inc. +# Licensed under the Apache License, Version 2.0 (the "License") +# +# 12-Jan-2016 Brendan Gregg Created this. + +from __future__ import print_function +from bcc import BPF +from time import sleep, strftime +import argparse +import signal + +# arguments +examples = """examples: + ./stackcount submit_bio # count kernel stack traces for submit_bio + ./stackcount ip_output # count kernel stack traces for ip_output + ./stackcount -s ip_output # show symbol offsets + ./stackcount -sv ip_output # show offsets and raw addresses (verbose) + ./stackcount 'tcp_send*' # count stacks for funcs matching tcp_send* + ./stackcount -r '^tcp_send.*' # same as above, using regular expressions + ./stackcount -Ti 5 ip_output # output every 5 seconds, with timestamps + ./stackcount -p 185 ip_output # count ip_output stacks for PID 185 only +""" +parser = argparse.ArgumentParser( + description="Count kernel function calls and their stack traces", + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=examples) +parser.add_argument("-p", "--pid", + help="trace this PID only") +parser.add_argument("-i", "--interval", default=99999999, + help="summary interval, seconds") +parser.add_argument("-T", "--timestamp", action="store_true", + help="include timestamp on output") +parser.add_argument("-r", "--regexp", action="store_true", + help="use regular expressions. Default is \"*\" wildcards only.") +parser.add_argument("-s", "--offset", action="store_true", + help="show address offsets") +parser.add_argument("-v", "--verbose", action="store_true", + help="show raw addresses") +parser.add_argument("pattern", + help="search expression for kernel functions") +args = parser.parse_args() +pattern = args.pattern +if not args.regexp: + pattern = pattern.replace('*', '.*') + pattern = '^' + pattern + '$' +offset = args.offset +verbose = args.verbose +debug = 0 +maxdepth = 10 # and MAXDEPTH + +# signal handler +def signal_ignore(signal, frame): + print() + +# load BPF program +bpf_text = """ +#include + +#define MAXDEPTH 10 + +struct key_t { + u64 ip; + u64 ret[MAXDEPTH]; +}; +BPF_HASH(counts, struct key_t); + +static u64 get_frame(u64 *bp) { + if (*bp) { + // The following stack walker is x86_64 specific + u64 ret = 0; + if (bpf_probe_read(&ret, sizeof(ret), (void *)(*bp+8))) + return 0; + if (bpf_probe_read(bp, sizeof(*bp), (void *)*bp)) + *bp = 0; + if (ret < __START_KERNEL_map) + return 0; + return ret; + } + return 0; +} + +int trace_count(struct pt_regs *ctx) { + FILTER + struct key_t key = {}; + u64 zero = 0, *val, bp = 0; + int depth = 0; + + key.ip = ctx->ip; + bp = ctx->bp; + + // unrolled loop, 10 (MAXDEPTH) frames deep: + if (!(key.ret[depth++] = get_frame(&bp))) goto out; + if (!(key.ret[depth++] = get_frame(&bp))) goto out; + if (!(key.ret[depth++] = get_frame(&bp))) goto out; + if (!(key.ret[depth++] = get_frame(&bp))) goto out; + if (!(key.ret[depth++] = get_frame(&bp))) goto out; + if (!(key.ret[depth++] = get_frame(&bp))) goto out; + if (!(key.ret[depth++] = get_frame(&bp))) goto out; + if (!(key.ret[depth++] = get_frame(&bp))) goto out; + if (!(key.ret[depth++] = get_frame(&bp))) goto out; + if (!(key.ret[depth++] = get_frame(&bp))) goto out; + +out: + val = counts.lookup_or_init(&key, &zero); + (*val)++; + return 0; +} +""" +if args.pid: + bpf_text = bpf_text.replace('FILTER', + ('u32 pid; pid = bpf_get_current_pid_tgid(); ' + + 'if (pid != %s) { return 0; }') % (args.pid)) +else: + bpf_text = bpf_text.replace('FILTER', '') +if debug: + print(bpf_text) +b = BPF(text=bpf_text) +b.attach_kprobe(event_re=pattern, fn_name="trace_count") +matched = b.num_open_kprobes() +if matched == 0: + print("0 functions matched by \"%s\". Exiting." % args.pattern) + exit() + +# header +print("Tracing %d functions for \"%s\"... Hit Ctrl-C to end." % + (matched, args.pattern)) + +def print_frame(addr): + print(" ", end="") + if verbose: + print("%-16x " % addr, end="") + if offset: + print("%s" % b.ksymaddr(addr)) + else: + print("%s" % b.ksym(addr)) + +# output +exiting = 0 if args.interval else 1 +while (1): + try: + sleep(int(args.interval)) + except KeyboardInterrupt: + exiting = 1 + # as cleanup can take many seconds, trap Ctrl-C: + signal.signal(signal.SIGINT, signal_ignore) + + print() + if args.timestamp: + print("%-8s\n" % strftime("%H:%M:%S"), end="") + + counts = b.get_table("counts") + for k, v in sorted(counts.items(), key=lambda counts: counts[1].value): + print_frame(k.ip) + for i in range(0, maxdepth): + if k.ret[i] == 0: + break + print_frame(k.ret[i]) + print(" %d\n" % v.value) + counts.clear() + + if exiting: + print("Detaching...") + exit() diff --git a/tools/stackcount.py b/tools/stackcount.py index 7b4aa43..d9cdb3e 100755 --- a/tools/stackcount.py +++ b/tools/stackcount.py @@ -72,52 +72,14 @@ def signal_ignore(signal, frame): bpf_text = """ #include -#define MAXDEPTH 10 - -struct key_t { - u64 ip; - u64 ret[MAXDEPTH]; -}; -BPF_HASH(counts, struct key_t); - -static u64 get_frame(u64 *bp) { - if (*bp) { - // The following stack walker is x86_64 specific - u64 ret = 0; - if (bpf_probe_read(&ret, sizeof(ret), (void *)(*bp+8))) - return 0; - if (bpf_probe_read(bp, sizeof(*bp), (void *)*bp)) - *bp = 0; - if (ret < __START_KERNEL_map) - return 0; - return ret; - } - return 0; -} +BPF_HASH(counts, int); +BPF_STACK_TRACE(stack_traces, 1024); int trace_count(struct pt_regs *ctx) { FILTER - struct key_t key = {}; - u64 zero = 0, *val, bp = 0; - int depth = 0; - - key.ip = ctx->ip; - bp = ctx->bp; - - // unrolled loop, 10 (MAXDEPTH) frames deep: - if (!(key.ret[depth++] = get_frame(&bp))) goto out; - if (!(key.ret[depth++] = get_frame(&bp))) goto out; - if (!(key.ret[depth++] = get_frame(&bp))) goto out; - if (!(key.ret[depth++] = get_frame(&bp))) goto out; - if (!(key.ret[depth++] = get_frame(&bp))) goto out; - if (!(key.ret[depth++] = get_frame(&bp))) goto out; - if (!(key.ret[depth++] = get_frame(&bp))) goto out; - if (!(key.ret[depth++] = get_frame(&bp))) goto out; - if (!(key.ret[depth++] = get_frame(&bp))) goto out; - if (!(key.ret[depth++] = get_frame(&bp))) goto out; - -out: - val = counts.lookup_or_init(&key, &zero); + int key = stack_traces.get_stackid(ctx, BPF_F_REUSE_STACKID); + u64 zero = 0; + u64 *val = counts.lookup_or_init(&key, &zero); (*val)++; return 0; } @@ -164,13 +126,11 @@ while (1): if args.timestamp: print("%-8s\n" % strftime("%H:%M:%S"), end="") - counts = b.get_table("counts") + counts = b["counts"] + stack_traces = b["stack_traces"] for k, v in sorted(counts.items(), key=lambda counts: counts[1].value): - print_frame(k.ip) - for i in range(0, maxdepth): - if k.ret[i] == 0: - break - print_frame(k.ret[i]) + for addr in stack_traces.walk(k.value): + print_frame(addr) print(" %d\n" % v.value) counts.clear() diff --git a/tools/stackcount_example.txt b/tools/stackcount_example.txt index 4102238..01b5b8a 100644 --- a/tools/stackcount_example.txt +++ b/tools/stackcount_example.txt @@ -82,9 +82,6 @@ tracing. It can be useful to trace the path to submit_bio to explain unusual rates of disk IOPS. These could have in-kernel origins (eg, background scrub). -This version of stackcount truncates stacks to 10 levels deep (plus 1 for -the traced function, so 11). - As another example, here are the code paths that led to ip_output(), which sends a packet at the IP level: