1 // SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
2 /* Copyright (c) 2021 Google LLC.
4 * Based on klockstat from BCC by Jiri Olsa and others
5 * 2021-10-26 Barret Rhoden Created this.
7 /* Differences from BCC python tool:
8 * - can specify a lock by ksym name, using '-L'
9 * - tracks whichever task had the max time for acquire and hold, outputted
10 * when '-s' > 1 (otherwise it's cluttered).
11 * - does not reset stats each interval by default. Can request with -R.
24 #include <sys/param.h>
26 #include <bpf/libbpf.h>
28 #include "klockstat.h"
29 #include "klockstat.skel.h"
30 #include "trace_helpers.h"
32 #define warn(...) fprintf(stderr, __VA_ARGS__)
43 static struct prog_env {
48 unsigned int nr_locks;
49 unsigned int nr_stack_entries;
50 unsigned int sort_acq;
51 unsigned int sort_hld;
52 unsigned int duration;
53 unsigned int interval;
54 unsigned int iterations;
59 .nr_stack_entries = 1,
60 .sort_acq = SORT_ACQ_MAX,
61 .sort_hld = SORT_HLD_MAX,
63 .iterations = 99999999,
66 const char *argp_program_version = "klockstat 0.1";
67 const char *argp_program_bug_address =
68 "https://github.com/iovisor/bcc/tree/master/libbpf-tools";
69 static const char args_doc[] = "FUNCTION";
70 static const char program_doc[] =
71 "Trace mutex lock acquisition and hold times, in nsec\n"
73 "Usage: klockstat [-hRT] [-p PID] [-t TID] [-c FUNC] [-L LOCK] [-n NR_LOCKS]\n"
74 " [-s NR_STACKS] [-S SORT] [-d DURATION] [-i INTERVAL]\n"
77 " klockstat # trace system wide until ctrl-c\n"
78 " klockstat -d 5 # trace for 5 seconds\n"
79 " klockstat -i 5 # print stats every 5 seconds\n"
80 " klockstat -p 181 # trace process 181 only\n"
81 " klockstat -t 181 # trace thread 181 only\n"
82 " klockstat -c pipe_ # print only for lock callers with 'pipe_'\n"
84 " klockstat -L cgroup_mutex # trace the cgroup_mutex lock only\n"
85 " klockstat -S acq_count # sort lock acquired results by acquire count\n"
86 " klockstat -S hld_total # sort lock held results by total held time\n"
87 " klockstat -S acq_count,hld_total # combination of above\n"
88 " klockstat -n 3 # display top 3 locks\n"
89 " klockstat -s 6 # display 6 stack entries per lock\n"
92 static const struct argp_option opts[] = {
93 { "pid", 'p', "PID", 0, "Filter by process ID" },
94 { "tid", 't', "TID", 0, "Filter by thread ID" },
96 { "caller", 'c', "FUNC", 0, "Filter by caller string prefix" },
97 { "lock", 'L', "LOCK", 0, "Filter by specific ksym lock name" },
99 { "locks", 'n', "NR_LOCKS", 0, "Number of locks to print" },
100 { "stacks", 's', "NR_STACKS", 0, "Number of stack entries to print per lock" },
101 { "sort", 'S', "SORT", 0, "Sort by field:\n acq_[max|total|count]\n hld_[max|total|count]" },
103 { "duration", 'd', "SECONDS", 0, "Duration to trace" },
104 { "interval", 'i', "SECONDS", 0, "Print interval" },
105 { "reset", 'R', NULL, 0, "Reset stats each interval" },
106 { "timestamp", 'T', NULL, 0, "Print timestamp" },
108 { NULL, 'h', NULL, OPTION_HIDDEN, "Show the full help" },
112 static bool parse_one_sort(struct prog_env *env, const char *sort)
114 const char *field = sort + 4;
116 if (!strncmp(sort, "acq_", 4)) {
117 if (!strcmp(field, "max")) {
118 env->sort_acq = SORT_ACQ_MAX;
120 } else if (!strcmp(field, "total")) {
121 env->sort_acq = SORT_ACQ_TOTAL;
123 } else if (!strcmp(field, "count")) {
124 env->sort_acq = SORT_ACQ_COUNT;
127 } else if (!strncmp(sort, "hld_", 4)) {
128 if (!strcmp(field, "max")) {
129 env->sort_hld = SORT_HLD_MAX;
131 } else if (!strcmp(field, "total")) {
132 env->sort_hld = SORT_HLD_TOTAL;
134 } else if (!strcmp(field, "count")) {
135 env->sort_hld = SORT_HLD_COUNT;
143 static bool parse_sorts(struct prog_env *env, char *arg)
145 char *comma = strchr(arg, ',');
150 if (!parse_one_sort(env, comma))
153 return parse_one_sort(env, arg);
156 static error_t parse_arg(int key, char *arg, struct argp_state *state)
158 struct prog_env *env = state->input;
159 long duration, interval;
164 env->pid = strtol(arg, NULL, 10);
165 if (errno || env->pid <= 0) {
166 warn("Invalid PID: %s\n", arg);
172 env->tid = strtol(arg, NULL, 10);
173 if (errno || env->tid <= 0) {
174 warn("Invalid TID: %s\n", arg);
182 env->lock_name = arg;
186 env->nr_locks = strtol(arg, NULL, 10);
187 if (errno || env->nr_locks <= 0) {
188 warn("Invalid NR_LOCKS: %s\n", arg);
194 env->nr_stack_entries = strtol(arg, NULL, 10);
195 if (errno || env->nr_stack_entries <= 0) {
196 warn("Invalid NR_STACKS: %s\n", arg);
201 if (!parse_sorts(env, arg)) {
202 warn("Bad sort string: %s\n", arg);
208 duration = strtol(arg, NULL, 10);
209 if (errno || duration <= 0) {
210 warn("Invalid duration: %s\n", arg);
213 env->duration = duration;
217 interval = strtol(arg, NULL, 10);
218 if (errno || interval <= 0) {
219 warn("Invalid interval: %s\n", arg);
222 env->interval = interval;
228 env->timestamp = true;
231 argp_state_help(state, stderr, ARGP_HELP_STD_HELP);
235 if (env->interval > env->duration)
236 env->interval = env->duration;
237 env->iterations = env->duration / env->interval;
241 return ARGP_ERR_UNKNOWN;
249 uint64_t bt[PERF_MAX_STACK_DEPTH];
252 static bool caller_is_traced(struct ksyms *ksyms, uint64_t caller_pc)
254 const struct ksym *ksym;
258 ksym = ksyms__map_addr(ksyms, caller_pc);
261 return strncmp(env.caller, ksym->name, strlen(env.caller)) == 0;
264 static int larger_first(uint64_t x, uint64_t y)
273 static int sort_by_acq(const void *x, const void *y)
275 struct stack_stat *ss_x = *(struct stack_stat**)x;
276 struct stack_stat *ss_y = *(struct stack_stat**)y;
278 switch (env.sort_acq) {
280 return larger_first(ss_x->ls.acq_max_time,
281 ss_y->ls.acq_max_time);
283 return larger_first(ss_x->ls.acq_count,
286 return larger_first(ss_x->ls.acq_total_time,
287 ss_y->ls.acq_total_time);
290 warn("bad sort_acq %d\n", env.sort_acq);
294 static int sort_by_hld(const void *x, const void *y)
296 struct stack_stat *ss_x = *(struct stack_stat**)x;
297 struct stack_stat *ss_y = *(struct stack_stat**)y;
299 switch (env.sort_hld) {
301 return larger_first(ss_x->ls.hld_max_time,
302 ss_y->ls.hld_max_time);
304 return larger_first(ss_x->ls.hld_count,
307 return larger_first(ss_x->ls.hld_total_time,
308 ss_y->ls.hld_total_time);
311 warn("bad sort_hld %d\n", env.sort_hld);
315 static char *symname(struct ksyms *ksyms, uint64_t pc, char *buf, size_t n)
317 const struct ksym *ksym = ksyms__map_addr(ksyms, pc);
321 snprintf(buf, n, "%s+0x%lx", ksym->name, pc - ksym->addr);
325 static void print_acq_header(void)
327 printf("\n Caller Avg Spin Count Max Spin Total Spin\n");
330 static void print_acq_stat(struct ksyms *ksyms, struct stack_stat *ss,
331 int nr_stack_entries)
336 printf("%37s %9llu %8llu %10llu %12llu\n",
337 symname(ksyms, ss->bt[0], buf, sizeof(buf)),
338 ss->ls.acq_total_time / ss->ls.acq_count,
341 ss->ls.acq_total_time);
342 for (i = 1; i < nr_stack_entries; i++) {
345 printf("%37s\n", symname(ksyms, ss->bt[i], buf, sizeof(buf)));
347 if (nr_stack_entries > 1)
348 printf(" Max PID %llu, COMM %s\n",
349 ss->ls.acq_max_id >> 32,
350 ss->ls.acq_max_comm);
353 static void print_hld_header(void)
355 printf("\n Caller Avg Hold Count Max Hold Total Hold\n");
358 static void print_hld_stat(struct ksyms *ksyms, struct stack_stat *ss,
359 int nr_stack_entries)
364 printf("%37s %9llu %8llu %10llu %12llu\n",
365 symname(ksyms, ss->bt[0], buf, sizeof(buf)),
366 ss->ls.hld_total_time / ss->ls.hld_count,
369 ss->ls.hld_total_time);
370 for (i = 1; i < nr_stack_entries; i++) {
373 printf("%37s\n", symname(ksyms, ss->bt[i], buf, sizeof(buf)));
375 if (nr_stack_entries > 1)
376 printf(" Max PID %llu, COMM %s\n",
377 ss->ls.hld_max_id >> 32,
378 ss->ls.hld_max_comm);
381 static int print_stats(struct ksyms *ksyms, int stack_map, int stat_map)
383 struct stack_stat **stats, *ss;
386 uint32_t lookup_key = 0;
390 stats = calloc(stats_sz, sizeof(void *));
392 warn("Out of memory\n");
396 while (bpf_map_get_next_key(stat_map, &lookup_key, &stack_id) == 0) {
397 if (stat_idx == stats_sz) {
399 stats = reallocarray(stats, stats_sz, sizeof(void *));
401 warn("Out of memory\n");
405 ss = malloc(sizeof(struct stack_stat));
407 warn("Out of memory\n");
410 ss->stack_id = stack_id;
412 ret = bpf_map_lookup_and_delete_elem(stat_map,
417 ret = bpf_map_lookup_elem(stat_map, &stack_id, &ss->ls);
418 lookup_key = stack_id;
424 if (bpf_map_lookup_elem(stack_map, &stack_id, &ss->bt)) {
425 /* Can still report the results without a backtrace. */
426 warn("failed to lookup stack_id %u\n", stack_id);
428 if (!caller_is_traced(ksyms, ss->bt[0])) {
432 stats[stat_idx++] = ss;
435 qsort(stats, stat_idx, sizeof(void*), sort_by_acq);
436 for (i = 0; i < MIN(env.nr_locks, stat_idx); i++) {
437 if (i == 0 || env.nr_stack_entries > 1)
439 print_acq_stat(ksyms, stats[i],
440 MIN(env.nr_stack_entries, PERF_MAX_STACK_DEPTH));
443 qsort(stats, stat_idx, sizeof(void*), sort_by_hld);
444 for (i = 0; i < MIN(env.nr_locks, stat_idx); i++) {
445 if (i == 0 || env.nr_stack_entries > 1)
447 print_hld_stat(ksyms, stats[i],
448 MIN(env.nr_stack_entries, PERF_MAX_STACK_DEPTH));
451 for (i = 0; i < stat_idx; i++)
458 static void *get_lock_addr(struct ksyms *ksyms, const char *lock_name)
460 const struct ksym *ksym = ksyms__get_symbol(ksyms, lock_name);
462 return ksym ? (void*)ksym->addr : NULL;
465 static volatile bool exiting;
467 static void sig_hand(int signr)
472 static struct sigaction sigact = {.sa_handler = sig_hand};
474 int main(int argc, char **argv)
476 static const struct argp argp = {
479 .args_doc = args_doc,
482 struct klockstat_bpf *obj = NULL;
483 struct ksyms *ksyms = NULL;
488 void *lock_addr = NULL;
490 err = argp_parse(&argp, argc, argv, 0, NULL, &env);
494 sigaction(SIGINT, &sigact, 0);
496 libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
498 ksyms = ksyms__load();
500 warn("failed to load kallsyms\n");
505 lock_addr = get_lock_addr(ksyms, env.lock_name);
507 warn("failed to find lock %s\n", env.lock_name);
513 obj = klockstat_bpf__open();
515 warn("failed to open BPF object\n");
520 obj->rodata->targ_tgid = env.pid;
521 obj->rodata->targ_pid = env.tid;
522 obj->rodata->targ_lock = lock_addr;
524 err = klockstat_bpf__load(obj);
526 warn("failed to load BPF object\n");
529 err = klockstat_bpf__attach(obj);
531 warn("failed to attach BPF object\n");
535 printf("Tracing mutex lock events... Hit Ctrl-C to end\n");
537 for (i = 0; i < env.iterations && !exiting; i++) {
544 strftime(ts, sizeof(ts), "%H:%M:%S", tm);
545 printf("%-8s\n", ts);
548 if (print_stats(ksyms, bpf_map__fd(obj->maps.stack_map),
549 bpf_map__fd(obj->maps.stat_map))) {
550 warn("print_stats error, aborting.\n");
555 printf("Exiting trace of mutex locks\n");
558 klockstat_bpf__destroy(obj);