#
# usage: trace [-h] [-p PID] [-L TID] [-v] [-Z STRING_SIZE] [-S] [-c cgroup_path]
# [-M MAX_EVENTS] [-s SYMBOLFILES] [-T] [-t] [-K] [-U] [-a] [-I header]
+# [-A]
# probe [probe ...]
#
# Licensed under the Apache License, Version 2.0 (the "License")
uid = -1
page_cnt = None
build_id_enabled = False
+ aggregate = False
+ symcount = {}
@classmethod
def configure(cls, args):
cls.page_cnt = args.buffer_pages
cls.bin_cmp = args.bin_cmp
cls.build_id_enabled = args.sym_file_list is not None
+ cls.aggregate = args.aggregate
+ if cls.aggregate and cls.max_events is None:
+ raise ValueError("-M/--max-events should be specified"
+ " with -A/--aggregate")
def __init__(self, probe, string_size, kernel_stack, user_stack,
cgroup_map_name, name, msg_filter):
else: # self.probe_type == 't'
return self.tp_event
- def print_stack(self, bpf, stack_id, tgid):
+ def _stack_to_string(self, bpf, stack_id, tgid):
if stack_id < 0:
- print(" %d" % stack_id)
- return
+ return (" %d" % stack_id)
+ stackstr = ''
stack = list(bpf.get_table(self.stacks_name).walk(stack_id))
for addr in stack:
- print(" ", end="")
+ stackstr += ' '
if Probe.print_address:
- print("%16x " % addr, end="")
- print("%s" % (bpf.sym(addr, tgid,
- show_module=True, show_offset=True)))
+ stackstr += ("%16x " % addr)
+ symstr = bpf.sym(addr, tgid, show_module=True, show_offset=True)
+ stackstr += ('%s\n' % (symstr.decode('utf-8')))
+
+ return stackstr
def _format_message(self, bpf, tgid, values):
# Replace each %K with kernel sym and %U with user sym in tgid
show_module=True, show_offset=True)
return self.python_format % tuple(values)
+ def print_aggregate_events(self):
+ for k, v in sorted(self.symcount.items(), key=lambda item: \
+ item[1], reverse=True):
+ print("%s-->COUNT %d\n\n" % (k, v), end="")
+
def print_event(self, bpf, cpu, data, size):
# Cast as the generated structure type and display
# according to the format string in the probe.
msg = self._format_message(bpf, event.tgid, values)
if self.msg_filter and self.msg_filter not in msg:
return
+ eventstr = ''
if Probe.print_time:
time = strftime("%H:%M:%S") if Probe.use_localtime else \
Probe._time_off_str(event.timestamp_ns)
if Probe.print_unix_timestamp:
- print("%-17s " % time[:17], end="")
+ eventstr += ("%-17s " % time[:17])
else:
- print("%-8s " % time[:8], end="")
+ eventstr += ("%-8s " % time[:8])
if Probe.print_cpu:
- print("%-3s " % event.cpu, end="")
- print("%-7d %-7d %-15s %-16s %s" %
+ eventstr += ("%-3s " % event.cpu)
+ eventstr += ("%-7d %-7d %-15s %-16s %s\n" %
(event.tgid, event.pid,
event.comm.decode('utf-8', 'replace'),
self._display_function(), msg))
if self.kernel_stack:
- self.print_stack(bpf, event.kernel_stack_id, -1)
+ eventstr += self._stack_to_string(bpf, event.kernel_stack_id, -1)
if self.user_stack:
- self.print_stack(bpf, event.user_stack_id, event.tgid)
- if self.user_stack or self.kernel_stack:
+ eventstr += self._stack_to_string(bpf, event.user_stack_id, event.tgid)
+
+ if self.aggregate is False:
+ print(eventstr, end="")
+ if self.kernel_stack or self.user_stack:
print("")
+ else:
+ if eventstr in self.symcount:
+ self.symcount[eventstr] += 1
+ else:
+ self.symcount[eventstr] = 1
Probe.event_count += 1
if Probe.max_events is not None and \
Probe.event_count >= Probe.max_events:
- exit()
- sys.stdout.flush()
+ if self.aggregate:
+ self.print_aggregate_events()
+ sys.stdout.flush()
+ exit()
def attach(self, bpf, verbose):
if len(self.library) == 0:
trace kfree_skb+0x12
Trace the kfree_skb kernel function after the instruction on the 0x12 offset
trace 'do_sys_open "%s", arg2@user'
- Trace the open syscall and print the filename. being opened @user is
+ Trace the open syscall and print the filename being opened @user is
added to arg2 in kprobes to ensure that char * should be copied from
the userspace stack to the bpf stack. If not specified, previous
behaviour is expected.
to 53 (DNS; 13568 in big endian order)
trace -I 'linux/fs_struct.h' 'mntns_install "users = %d", $task->fs->users'
Trace the number of users accessing the file system of the current task
+trace -s /lib/x86_64-linux-gnu/libc.so.6,/bin/ping 'p:c:inet_pton' -U
+ Trace inet_pton system call and use the specified libraries/executables for
+ symbol resolution.
"""
def __init__(self):
"as either full path, "
"or relative to current working directory, "
"or relative to default kernel header search path")
+ parser.add_argument("-A", "--aggregate", action="store_true",
+ help="aggregate amount of each trace")
parser.add_argument("--ebpf", action="store_true",
help=argparse.SUPPRESS)
self.args = parser.parse_args()
need to do that here because `struct timespec` is used internally by the tool,
so it always includes this header file.
+To aggregate amount of trace, you need specify -A with -M EVENTS. A typical
+example:
+1, if we find that the sys CPU utilization is higher by 'top' command
+2, then find that the timer interrupt is more normal by 'irqtop' command
+3, to confirm kernel timer setting frequence by 'funccount -i 1 clockevents_program_event'
+4, to trace timer setting by 'trace clockevents_program_event -K -A -M 1000'
+
+1294576 1294584 CPU 0/KVM clockevents_program_event
+ clockevents_program_event+0x1 [kernel]
+ hrtimer_start_range_ns+0x209 [kernel]
+ start_sw_timer+0x173 [kvm]
+ restart_apic_timer+0x6c [kvm]
+ kvm_set_msr_common+0x442 [kvm]
+ __kvm_set_msr+0xa2 [kvm]
+ kvm_emulate_wrmsr+0x36 [kvm]
+ vcpu_enter_guest+0x326 [kvm]
+ kvm_arch_vcpu_ioctl_run+0xcc [kvm]
+ kvm_vcpu_ioctl+0x22f [kvm]
+ do_vfs_ioctl+0xa1 [kernel]
+ ksys_ioctl+0x60 [kernel]
+ __x64_sys_ioctl+0x16 [kernel]
+ do_syscall_64+0x59 [kernel]
+ entry_SYSCALL_64_after_hwframe+0x44 [kernel]
+-->COUNT 271
+...
+So we can know that 271 timer setting in recent 1000(~27%).
As a final example, let's trace open syscalls for a specific process. By
default, tracing is system-wide, but the -p switch overrides this:
as either full path, or relative to current working
directory, or relative to default kernel header search
path
+ -A, --aggregate aggregate amount of each trace
EXAMPLES:
trace kfree_skb+0x12
Trace the kfree_skb kernel function after the instruction on the 0x12 offset
trace 'do_sys_open "%s", arg2@user'
- Trace the open syscall and print the filename being opened. @user is
+ Trace the open syscall and print the filename being opened @user is
added to arg2 in kprobes to ensure that char * should be copied from
the userspace stack to the bpf stack. If not specified, previous
behaviour is expected.
+
trace 'do_sys_open "%s", arg2@user' -n main
Trace the open syscall and only print event that process names containing "main"
trace 'do_sys_open "%s", arg2@user' --uid 1001
Trace the block_rq_complete kernel tracepoint and print # of tx sectors
trace 'u:pthread:pthread_create (arg4 != 0)'
Trace the USDT probe pthread_create when its 4th argument is non-zero
+trace 'u:pthread:libpthread:pthread_create (arg4 != 0)'
+ Ditto, but the provider name "libpthread" is specified.
trace 'p::SyS_nanosleep(struct timespec *ts) "sleep for %lld ns", ts->tv_nsec'
Trace the nanosleep syscall and print the sleep duration in ns
trace -c /sys/fs/cgroup/system.slice/workload.service '__x64_sys_nanosleep' '__x64_sys_clone'
in kernel/sched/sched.h which is in kernel source tree and not in kernel-devel
package. So this command needs to run at the kernel source tree root directory
so that the added header file can be found by the compiler.
-trace -I 'net/sock.h' \\
+trace -I 'net/sock.h' \
'udpv6_sendmsg(struct sock *sk) (sk->sk_dport == 13568)'
Trace udpv6 sendmsg calls only if socket's destination port is equal
to 53 (DNS; 13568 in big endian order)
trace -s /lib/x86_64-linux-gnu/libc.so.6,/bin/ping 'p:c:inet_pton' -U
Trace inet_pton system call and use the specified libraries/executables for
symbol resolution.
-"