From 23e72b89c9018a653f3628ec87f1dfc8d507863c Mon Sep 17 00:00:00 2001 From: Sasha Goldshtein Date: Tue, 17 Jan 2017 08:49:36 +0000 Subject: [PATCH] trace: Allow function signatures in uprobes and kprobes MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit §`trace` now allows uprobes and kprobes to have function signatures, which means function parameters can be named and typed, rather than relying on the positional arg1, arg2, etc. arguments. This also enables structure field access, which is impossible with the unnamed arguments due to rewriter limitations. The example requested by @brendangregg, which now works, is the following: §Â``` PID TID COMM FUNC - 777 785 automount SyS_nanosleep sleep for 500000000 ns 777 785 automount SyS_nanosleep sleep for 500000000 ns 777 785 automount SyS_nanosleep sleep for 500000000 ns 777 785 automount SyS_nanosleep sleep for 500000000 ns ^C ``` --- man/man8/trace.8 | 12 +++++++++++- tools/trace.py | 29 +++++++++++++++++++++-------- tools/trace_example.txt | 23 ++++++++++++++++++++++- 3 files changed, 54 insertions(+), 10 deletions(-) diff --git a/man/man8/trace.8 b/man/man8/trace.8 index 536bbb8..6824ce7 100644 --- a/man/man8/trace.8 +++ b/man/man8/trace.8 @@ -62,7 +62,7 @@ information. See PROBE SYNTAX below. .SH PROBE SYNTAX The general probe syntax is as follows: -.B [{p,r}]:[library]:function [(predicate)] ["format string"[, arguments]] +.B [{p,r}]:[library]:function[(signature)] [(predicate)] ["format string"[, arguments]] .B {t:category:event,u:library:probe} [(predicate)] ["format string"[, arguments]] .TP @@ -84,6 +84,12 @@ The tracepoint category. For example, "sched" or "irq". .B function The function to probe. .TP +.B signature +The optional signature of the function to probe. This can make it easier to +access the function's arguments, instead of using the "arg1", "arg2" etc. +argument specifiers. For example, "(struct timespec *ts)" in the signature +position lets you use "ts" in the filter or print expressions. +.TP .B event The tracepoint event. For example, "block_rq_complete". .TP @@ -159,6 +165,10 @@ Trace the block:block_rq_complete tracepoint and print the number of sectors com Trace the pthread_create USDT probe from the pthread library and print the address of the thread's start function: # .B trace 'u:pthread:pthread_create """start addr = %llx"", arg3' +.TP +Trace the nanosleep system call and print the sleep duration in nanoseconds: +# +.B trace 'p::SyS_nanosleep(struct timespec *ts) "sleep for %lld ns", ts->tv_nsec' .SH SOURCE This is from bcc. .IP diff --git a/tools/trace.py b/tools/trace.py index 8ac9201..1a9e3d2 100755 --- a/tools/trace.py +++ b/tools/trace.py @@ -93,15 +93,24 @@ class Probe(object): def _parse_probe(self): text = self.raw_probe - # Everything until the first space is the probe specifier - first_space = text.find(' ') - spec = text[:first_space] if first_space >= 0 else text + # There might be a function signature preceding the actual + # filter/print part, or not. Find the probe specifier first -- + # it ends with either a space or an open paren ( for the + # function signature part. + # opt. signature + # probespec | rest + # --------- ---------- -- + (spec, sig, rest) = re.match(r'([^ \t\(]+)(\([^\(]*\))?(.*)', + text).groups() + self._parse_spec(spec) - if first_space >= 0: - text = text[first_space:].lstrip() - else: - text = "" + self.signature = sig[1:-1] if sig else None # remove the parens + if self.signature and self.probe_type in ['u', 't']: + self._bail("USDT and tracepoint probes can't have " + + "a function signature; use arg1, arg2, " + + "... instead") + text = rest.lstrip() # If we now have a (, wait for the balanced closing ) and that # will be the predicate self.filter = None @@ -394,6 +403,8 @@ BPF_PERF_OUTPUT(%s); prefix = "" signature = "struct pt_regs *ctx" + if self.signature: + signature += ", " + self.signature data_fields = "" for i, expr in enumerate(self.values): @@ -559,7 +570,7 @@ trace 'do_sys_open "%s", arg2' Trace the open syscall and print the filename being opened trace 'sys_read (arg3 > 20000) "read %d bytes", arg3' Trace the read syscall and print a message for reads >20000 bytes -trace 'r::do_sys_return "%llx", retval' +trace 'r::do_sys_open "%llx", retval' Trace the return from the open syscall and print the return value trace 'c:open (arg2 == 42) "%s %d", arg1, arg2' Trace the open() call from libc only if the flags (arg2) argument is 42 @@ -575,6 +586,8 @@ trace 't:block:block_rq_complete "sectors=%d", args->nr_sector' Trace the block_rq_complete kernel tracepoint and print # of tx sectors trace 'u:pthread:pthread_create (arg4 != 0)' Trace the USDT probe pthread_create when its 4th argument is non-zero +trace 'p::SyS_nanosleep(struct timespec *ts) "sleep for %lld ns", ts->tv_nsec' + Trace the nanosleep syscall and print the sleep duration in ns """ def __init__(self): diff --git a/tools/trace_example.txt b/tools/trace_example.txt index 46dc843..b392677 100644 --- a/tools/trace_example.txt +++ b/tools/trace_example.txt @@ -146,6 +146,25 @@ TIME PID COMM FUNC - ^C +In the preceding example, as well as in many others, readability may be +improved by providing the function's signature, which names the arguments and +lets you access structure sub-fields, which is hard with the "arg1", "arg2" +convention. For example: + +# trace 'p:c:open(char *filename) "opening %s", filename' +PID TID COMM FUNC - +17507 17507 cat open opening FAQ.txt +^C + +# trace 'p::SyS_nanosleep(struct timespec *ts) "sleep for %lld ns", ts->tv_nsec' +PID TID COMM FUNC - +777 785 automount SyS_nanosleep sleep for 500000000 ns +777 785 automount SyS_nanosleep sleep for 500000000 ns +777 785 automount SyS_nanosleep sleep for 500000000 ns +777 785 automount SyS_nanosleep sleep for 500000000 ns +^C + + As a final example, let's trace open syscalls for a specific process. By default, tracing is system-wide, but the -p switch overrides this: @@ -205,7 +224,7 @@ trace 'do_sys_open "%s", arg2' Trace the open syscall and print the filename being opened trace 'sys_read (arg3 > 20000) "read %d bytes", arg3' Trace the read syscall and print a message for reads >20000 bytes -trace 'r::do_sys_return "%llx", retval' +trace 'r::do_sys_open "%llx", retval' Trace the return from the open syscall and print the return value trace 'c:open (arg2 == 42) "%s %d", arg1, arg2' Trace the open() call from libc only if the flags (arg2) argument is 42 @@ -221,3 +240,5 @@ trace 't:block:block_rq_complete "sectors=%d", args->nr_sector' Trace the block_rq_complete kernel tracepoint and print # of tx sectors trace 'u:pthread:pthread_create (arg4 != 0)' Trace the USDT probe pthread_create when its 4th argument is non-zero +trace 'p::SyS_nanosleep(struct timespec *ts) "sleep for %lld ns", ts->tv_nsec' + Trace the nanosleep syscall and print the sleep duration in ns -- 2.7.4