From: Barret Rhoden <brho@google.com>
Date: Tue, 2 Nov 2021 21:05:35 +0000 (-0400)
Subject: libbpf-tools: add klockstat
X-Git-Tag: v0.24.0~15^2
X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=66a86795000a913b787f7ae4cd7b9b1f464bdd5e;p=platform%2Fupstream%2Fbcc.git

libbpf-tools: add klockstat

This is a port of BCC's klockstat.  Differences from BCC:
- can specify a lock by ksym name, using -L
- tracks whichever task had the max time for acquire and hold, outputted
when -s > 1 (otherwise it's cluttered).
- does not reset stats each interval by default.  Can request with -R.

-------------
Usage: klockstat [-hRT] [-p PID] [-t TID] [-c FUNC] [-L LOCK] [-n NR_LOCKS]
                 [-s NR_STACKS] [-S SORT] [-d DURATION] [-i INTERVAL]

  -p, --pid=PID              Filter by process ID
  -t, --tid=TID              Filter by thread ID
  -c, --caller=FUNC          Filter by caller string prefix
  -L, --lock=LOCK            Filter by specific ksym lock name
  -n, --locks=NR_LOCKS       Number of locks to print
  -s, --stacks=NR_STACKS     Number of stack entries to print per lock
  -S, --sort=SORT            Sort by field:
                               acq_[max|total|count]
                               hld_[max|total|count]
  -d, --duration=SECONDS     Duration to trace
  -i, --interval=SECONDS     Print interval
  -R, --reset                Reset stats each interval
  -T, --timestamp            Print timestamp

  -?, --help                 Give this help list
      --usage                Give a short usage message
  -V, --version              Print program version

Mandatory or optional arguments to long options are also mandatory or optional
for any corresponding short options.

Examples:
  klockstat                     # trace system wide until ctrl-c
  klockstat -d 5                # trace for 5 seconds
  klockstat -i 5                # print stats every 5 seconds
  klockstat -p 181              # trace process 181 only
  klockstat -t 181              # trace thread 181 only
  klockstat -c pipe_            # print only for lock callers with 'pipe_'
                                # prefix
  klockstat -L cgroup_mutex     # trace the cgroup_mutex lock only
  klockstat -S acq_count        # sort lock acquired results by acquire count
  klockstat -S hld_total        # sort lock held results by total held time
  klockstat -S acq_count,hld_total  # combination of above
  klockstat -n 3                # display top 3 locks
  klockstat -s 6                # display 6 stack entries per lock

-------------

Signed-off-by: Barret Rhoden <brho@google.com>
---

diff --git a/libbpf-tools/.gitignore b/libbpf-tools/.gitignore
index 956b8181..6cdc9f40 100644
--- a/libbpf-tools/.gitignore
+++ b/libbpf-tools/.gitignore
@@ -22,6 +22,7 @@
 /funclatency
 /gethostlatency
 /hardirqs
+/klockstat
 /ksnoop
 /llcstat
 /nfsdist
diff --git a/libbpf-tools/Makefile b/libbpf-tools/Makefile
index 5f7a9295..3a932ba4 100644
--- a/libbpf-tools/Makefile
+++ b/libbpf-tools/Makefile
@@ -35,6 +35,7 @@ APPS = \
 	funclatency \
 	gethostlatency \
 	hardirqs \
+	klockstat \
 	ksnoop \
 	llcstat \
 	mountsnoop \
diff --git a/libbpf-tools/bits.bpf.h b/libbpf-tools/bits.bpf.h
index e1511c0b..a2b7bb98 100644
--- a/libbpf-tools/bits.bpf.h
+++ b/libbpf-tools/bits.bpf.h
@@ -2,6 +2,9 @@
 #ifndef __BITS_BPF_H
 #define __BITS_BPF_H
 
+#define READ_ONCE(x) (*(volatile typeof(x) *)&(x))
+#define WRITE_ONCE(x, val) ((*(volatile typeof(x) *)&(x)) = val)
+
 static __always_inline u64 log2(u32 v)
 {
 	u32 shift, r;
diff --git a/libbpf-tools/klockstat.bpf.c b/libbpf-tools/klockstat.bpf.c
new file mode 100644
index 00000000..eddf8b7e
--- /dev/null
+++ b/libbpf-tools/klockstat.bpf.c
@@ -0,0 +1,230 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Google LLC.
+ *
+ * Based on klockstat from BCC by Jiri Olsa and others
+ * 2021-10-26   Barret Rhoden   Created this.
+ */
+#include "vmlinux.h"
+#include <bpf/bpf_core_read.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include "klockstat.h"
+#include "bits.bpf.h"
+
+const volatile pid_t targ_tgid = 0;
+const volatile pid_t targ_pid = 0;
+struct mutex *const volatile targ_lock = NULL;
+
+struct {
+	__uint(type, BPF_MAP_TYPE_STACK_TRACE);
+	__uint(max_entries, MAX_ENTRIES);
+	__uint(key_size, sizeof(u32));
+	__uint(value_size, PERF_MAX_STACK_DEPTH * sizeof(u64));
+} stack_map SEC(".maps");
+
+/*
+ * Uniquely identifies a task grabbing a particular lock; a task can only hold
+ * the same lock once (non-recursive mutexes).
+ */
+struct task_lock {
+	u64 task_id;
+	u64 lock_ptr;
+};
+
+struct lockholder_info {
+	s32 stack_id;
+	u64 task_id;
+	u64 try_at;
+	u64 acq_at;
+	u64 rel_at;
+};
+
+struct {
+	__uint(type, BPF_MAP_TYPE_HASH);
+	__uint(max_entries, MAX_ENTRIES);
+	__type(key, struct task_lock);
+	__type(value, struct lockholder_info);
+} lockholder_map SEC(".maps");
+
+/*
+ * Keyed by stack_id.
+ *
+ * Multiple call sites may have the same underlying lock, but we only know the
+ * stats for a particular stack frame.  Multiple tasks may have the same
+ * stackframe.
+ */
+struct {
+	__uint(type, BPF_MAP_TYPE_HASH);
+	__uint(max_entries, MAX_ENTRIES);
+	__type(key, s32);
+	__type(value, struct lock_stat);
+} stat_map SEC(".maps");
+
+static bool tracing_task(u64 task_id)
+{
+	u32 tgid = task_id >> 32;
+	u32 pid = task_id;
+
+	if (targ_tgid && targ_tgid != tgid)
+		return false;
+	if (targ_pid && targ_pid != pid)
+		return false;
+	return true;
+}
+
+static void lock_contended(void *ctx, struct mutex *lock)
+{
+	u64 task_id;
+	struct lockholder_info li[1] = {0};
+	struct task_lock tl = {};
+
+	if (targ_lock && targ_lock != lock)
+		return;
+	task_id = bpf_get_current_pid_tgid();
+	if (!tracing_task(task_id))
+		return;
+
+	li->task_id = task_id;
+	/*
+	 * Skip 4 frames, e.g.:
+	 *       __this_module+0x34ef
+	 *       __this_module+0x34ef
+	 *       __this_module+0x8c44
+	 *             mutex_lock+0x5
+	 *
+	 * Note: if you make major changes to this bpf program, double check
+	 * that you aren't skipping too many frames.
+	 */
+	li->stack_id = bpf_get_stackid(ctx, &stack_map,
+				       4 | BPF_F_FAST_STACK_CMP);
+	/* Legit failures include EEXIST */
+	if (li->stack_id < 0)
+		return;
+	li->try_at = bpf_ktime_get_ns();
+
+	tl.task_id = task_id;
+	tl.lock_ptr = (u64)lock;
+	bpf_map_update_elem(&lockholder_map, &tl, li, BPF_ANY);
+}
+
+static void lock_acquired(struct mutex *lock)
+{
+	u64 task_id;
+	struct lockholder_info *li;
+	struct task_lock tl = {};
+
+	if (targ_lock && targ_lock != lock)
+		return;
+	task_id = bpf_get_current_pid_tgid();
+	if (!tracing_task(task_id))
+		return;
+
+	tl.task_id = task_id;
+	tl.lock_ptr = (u64)lock;
+	li = bpf_map_lookup_elem(&lockholder_map, &tl);
+	if (!li)
+		return;
+
+	li->acq_at = bpf_ktime_get_ns();
+}
+
+static void account(struct lockholder_info *li)
+{
+	struct lock_stat *ls;
+	u64 delta;
+
+	/*
+	 * Multiple threads may have the same stack_id.  Even though we are
+	 * holding the lock, dynamically allocated mutexes can have the same
+	 * callgraph but represent different locks.  They will be accounted as
+	 * the same lock, which is what we want, but we need to use atomics to
+	 * avoid corruption, especially for the total_time variables.
+	 */
+	ls = bpf_map_lookup_elem(&stat_map, &li->stack_id);
+	if (!ls) {
+		struct lock_stat fresh = {0};
+
+		bpf_map_update_elem(&stat_map, &li->stack_id, &fresh, BPF_ANY);
+		ls = bpf_map_lookup_elem(&stat_map, &li->stack_id);
+		if (!ls)
+			return;
+	}
+
+	delta = li->acq_at - li->try_at;
+	__sync_fetch_and_add(&ls->acq_count, 1);
+	__sync_fetch_and_add(&ls->acq_total_time, delta);
+	if (delta > READ_ONCE(ls->acq_max_time)) {
+		WRITE_ONCE(ls->acq_max_time, delta);
+		WRITE_ONCE(ls->acq_max_id, li->task_id);
+		/*
+		 * Potentially racy, if multiple threads think they are the max,
+		 * so you may get a clobbered write.
+		 */
+		bpf_get_current_comm(ls->acq_max_comm, TASK_COMM_LEN);
+	}
+
+	delta = li->rel_at - li->acq_at;
+	__sync_fetch_and_add(&ls->hld_count, 1);
+	__sync_fetch_and_add(&ls->hld_total_time, delta);
+	if (delta > READ_ONCE(ls->hld_max_time)) {
+		WRITE_ONCE(ls->hld_max_time, delta);
+		WRITE_ONCE(ls->hld_max_id, li->task_id);
+		bpf_get_current_comm(ls->hld_max_comm, TASK_COMM_LEN);
+	}
+}
+
+static void lock_released(struct mutex *lock)
+{
+	u64 task_id;
+	struct lockholder_info *li;
+	struct task_lock tl = {};
+
+	if (targ_lock && targ_lock != lock)
+		return;
+	task_id = bpf_get_current_pid_tgid();
+	if (!tracing_task(task_id))
+		return;
+	tl.task_id = task_id;
+	tl.lock_ptr = (u64)lock;
+	li = bpf_map_lookup_elem(&lockholder_map, &tl);
+	if (!li)
+		return;
+
+	li->rel_at = bpf_ktime_get_ns();
+	account(li);
+
+	bpf_map_delete_elem(&lockholder_map, &tl);
+}
+
+SEC("fentry/mutex_lock")
+int BPF_PROG(mutex_lock, struct mutex *lock)
+{
+	lock_contended(ctx, lock);
+	return 0;
+}
+
+SEC("fexit/mutex_lock")
+int BPF_PROG(mutex_lock_exit, struct mutex *lock, long ret)
+{
+	lock_acquired(lock);
+	return 0;
+}
+
+SEC("fexit/mutex_trylock")
+int BPF_PROG(mutex_trylock_exit, struct mutex *lock, long ret)
+{
+	if (ret) {
+		lock_contended(ctx, lock);
+		lock_acquired(lock);
+	}
+	return 0;
+}
+
+SEC("fentry/mutex_unlock")
+int BPF_PROG(mutex_unlock, struct mutex *lock)
+{
+	lock_released(lock);
+	return 0;
+}
+
+char LICENSE[] SEC("license") = "GPL";
diff --git a/libbpf-tools/klockstat.c b/libbpf-tools/klockstat.c
new file mode 100644
index 00000000..330915dd
--- /dev/null
+++ b/libbpf-tools/klockstat.c
@@ -0,0 +1,558 @@
+// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+/* Copyright (c) 2021 Google LLC.
+ *
+ * Based on klockstat from BCC by Jiri Olsa and others
+ * 2021-10-26   Barret Rhoden   Created this.
+ */
+#include <argp.h>
+#include <errno.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+#include <unistd.h>
+#include <sys/param.h>
+
+#include <bpf/libbpf.h>
+#include <bpf/bpf.h>
+#include "klockstat.h"
+#include "klockstat.skel.h"
+#include "trace_helpers.h"
+
+#define warn(...) fprintf(stderr, __VA_ARGS__)
+
+enum {
+	SORT_ACQ_MAX,
+	SORT_ACQ_COUNT,
+	SORT_ACQ_TOTAL,
+	SORT_HLD_MAX,
+	SORT_HLD_COUNT,
+	SORT_HLD_TOTAL,
+};
+
+static struct prog_env {
+	pid_t pid;
+	pid_t tid;
+	char *caller;
+	char *lock_name;
+	unsigned int nr_locks;
+	unsigned int nr_stack_entries;
+	unsigned int sort_acq;
+	unsigned int sort_hld;
+	unsigned int duration;
+	unsigned int interval;
+	unsigned int iterations;
+	bool reset;
+	bool timestamp;
+} env = {
+	.nr_locks = 99999999,
+	.nr_stack_entries = 1,
+	.sort_acq = SORT_ACQ_MAX,
+	.sort_hld = SORT_HLD_MAX,
+	.interval = 99999999,
+	.iterations = 99999999,
+};
+
+const char *argp_program_version = "klockstat 0.1";
+const char *argp_program_bug_address =
+	"https://github.com/iovisor/bcc/tree/master/libbpf-tools";
+static const char args_doc[] = "FUNCTION";
+static const char program_doc[] =
+"Trace mutex lock acquisition and hold times, in nsec\n"
+"\n"
+"Usage: klockstat [-hRT] [-p PID] [-t TID] [-c FUNC] [-L LOCK] [-n NR_LOCKS]\n"
+"                 [-s NR_STACKS] [-S SORT] [-d DURATION] [-i INTERVAL]\n"
+"\v"
+"Examples:\n"
+"  klockstat                     # trace system wide until ctrl-c\n"
+"  klockstat -d 5                # trace for 5 seconds\n"
+"  klockstat -i 5                # print stats every 5 seconds\n"
+"  klockstat -p 181              # trace process 181 only\n"
+"  klockstat -t 181              # trace thread 181 only\n"
+"  klockstat -c pipe_            # print only for lock callers with 'pipe_'\n"
+"                                # prefix\n"
+"  klockstat -L cgroup_mutex     # trace the cgroup_mutex lock only\n"
+"  klockstat -S acq_count        # sort lock acquired results by acquire count\n"
+"  klockstat -S hld_total        # sort lock held results by total held time\n"
+"  klockstat -S acq_count,hld_total  # combination of above\n"
+"  klockstat -n 3                # display top 3 locks\n"
+"  klockstat -s 6                # display 6 stack entries per lock\n"
+;
+
+static const struct argp_option opts[] = {
+	{ "pid", 'p', "PID", 0, "Filter by process ID" },
+	{ "tid", 't', "TID", 0, "Filter by thread ID" },
+	{ 0, 0, 0, 0, "" },
+	{ "caller", 'c', "FUNC", 0, "Filter by caller string prefix" },
+	{ "lock", 'L', "LOCK", 0, "Filter by specific ksym lock name" },
+	{ 0, 0, 0, 0, "" },
+	{ "locks", 'n', "NR_LOCKS", 0, "Number of locks to print" },
+	{ "stacks", 's', "NR_STACKS", 0, "Number of stack entries to print per lock" },
+	{ "sort", 'S', "SORT", 0, "Sort by field:\n  acq_[max|total|count]\n  hld_[max|total|count]" },
+	{ 0, 0, 0, 0, "" },
+	{ "duration", 'd', "SECONDS", 0, "Duration to trace" },
+	{ "interval", 'i', "SECONDS", 0, "Print interval" },
+	{ "reset", 'R', NULL, 0, "Reset stats each interval" },
+	{ "timestamp", 'T', NULL, 0, "Print timestamp" },
+
+	{ NULL, 'h', NULL, OPTION_HIDDEN, "Show the full help" },
+	{},
+};
+
+static bool parse_one_sort(struct prog_env *env, const char *sort)
+{
+	const char *field = sort + 4;
+
+	if (!strncmp(sort, "acq_", 4)) {
+		if (!strcmp(field, "max")) {
+			env->sort_acq = SORT_ACQ_MAX;
+			return true;
+		} else if (!strcmp(field, "total")) {
+			env->sort_acq = SORT_ACQ_TOTAL;
+			return true;
+		} else if (!strcmp(field, "count")) {
+			env->sort_acq = SORT_ACQ_COUNT;
+			return true;
+		}
+	} else if (!strncmp(sort, "hld_", 4)) {
+		if (!strcmp(field, "max")) {
+			env->sort_hld = SORT_HLD_MAX;
+			return true;
+		} else if (!strcmp(field, "total")) {
+			env->sort_hld = SORT_HLD_TOTAL;
+			return true;
+		} else if (!strcmp(field, "count")) {
+			env->sort_hld = SORT_HLD_COUNT;
+			return true;
+		}
+	}
+
+	return false;
+}
+
+static bool parse_sorts(struct prog_env *env, char *arg)
+{
+	char *comma = strchr(arg, ',');
+
+	if (comma) {
+		*comma = '\0';
+		comma++;
+		if (!parse_one_sort(env, comma))
+			return false;
+	}
+	return parse_one_sort(env, arg);
+}
+
+static error_t parse_arg(int key, char *arg, struct argp_state *state)
+{
+	struct prog_env *env = state->input;
+	long duration, interval;
+
+	switch (key) {
+	case 'p':
+		errno = 0;
+		env->pid = strtol(arg, NULL, 10);
+		if (errno || env->pid <= 0) {
+			warn("Invalid PID: %s\n", arg);
+			argp_usage(state);
+		}
+		break;
+	case 't':
+		errno = 0;
+		env->tid = strtol(arg, NULL, 10);
+		if (errno || env->tid <= 0) {
+			warn("Invalid TID: %s\n", arg);
+			argp_usage(state);
+		}
+		break;
+	case 'c':
+		env->caller = arg;
+		break;
+	case 'L':
+		env->lock_name = arg;
+		break;
+	case 'n':
+		errno = 0;
+		env->nr_locks = strtol(arg, NULL, 10);
+		if (errno || env->nr_locks <= 0) {
+			warn("Invalid NR_LOCKS: %s\n", arg);
+			argp_usage(state);
+		}
+		break;
+	case 's':
+		errno = 0;
+		env->nr_stack_entries = strtol(arg, NULL, 10);
+		if (errno || env->nr_stack_entries <= 0) {
+			warn("Invalid NR_STACKS: %s\n", arg);
+			argp_usage(state);
+		}
+		break;
+	case 'S':
+		if (!parse_sorts(env, arg)) {
+			warn("Bad sort string: %s\n", arg);
+			argp_usage(state);
+		}
+		break;
+	case 'd':
+		errno = 0;
+		duration = strtol(arg, NULL, 10);
+		if (errno || duration <= 0) {
+			warn("Invalid duration: %s\n", arg);
+			argp_usage(state);
+		}
+		env->duration = duration;
+		break;
+	case 'i':
+		errno = 0;
+		interval = strtol(arg, NULL, 10);
+		if (errno || interval <= 0) {
+			warn("Invalid interval: %s\n", arg);
+			argp_usage(state);
+		}
+		env->interval = interval;
+		break;
+	case 'R':
+		env->reset = true;
+		break;
+	case 'T':
+		env->timestamp = true;
+		break;
+	case 'h':
+		argp_state_help(state, stderr, ARGP_HELP_STD_HELP);
+		break;
+	case ARGP_KEY_END:
+		if (env->duration) {
+			if (env->interval > env->duration)
+				env->interval = env->duration;
+			env->iterations = env->duration / env->interval;
+		}
+		break;
+	default:
+		return ARGP_ERR_UNKNOWN;
+	}
+	return 0;
+}
+
+struct stack_stat {
+	uint32_t stack_id;
+	struct lock_stat ls;
+	uint64_t bt[PERF_MAX_STACK_DEPTH];
+};
+
+static bool caller_is_traced(struct ksyms *ksyms, uint64_t caller_pc)
+{
+	const struct ksym *ksym;
+
+	if (!env.caller)
+		return true;
+	ksym = ksyms__map_addr(ksyms, caller_pc);
+	if (!ksym)
+		return true;
+	return strncmp(env.caller, ksym->name, strlen(env.caller)) == 0;
+}
+
+static int larger_first(uint64_t x, uint64_t y)
+{
+	if (x > y)
+		return -1;
+	if (x == y)
+		return 0;
+	return 1;
+}
+
+static int sort_by_acq(const void *x, const void *y)
+{
+	struct stack_stat *ss_x = *(struct stack_stat**)x;
+	struct stack_stat *ss_y = *(struct stack_stat**)y;
+
+	switch (env.sort_acq) {
+	case SORT_ACQ_MAX:
+		return larger_first(ss_x->ls.acq_max_time,
+				    ss_y->ls.acq_max_time);
+	case SORT_ACQ_COUNT:
+		return larger_first(ss_x->ls.acq_count,
+				    ss_y->ls.acq_count);
+	case SORT_ACQ_TOTAL:
+		return larger_first(ss_x->ls.acq_total_time,
+				    ss_y->ls.acq_total_time);
+	}
+
+	warn("bad sort_acq %d\n", env.sort_acq);
+	return -1;
+}
+
+static int sort_by_hld(const void *x, const void *y)
+{
+	struct stack_stat *ss_x = *(struct stack_stat**)x;
+	struct stack_stat *ss_y = *(struct stack_stat**)y;
+
+	switch (env.sort_hld) {
+	case SORT_HLD_MAX:
+		return larger_first(ss_x->ls.hld_max_time,
+				    ss_y->ls.hld_max_time);
+	case SORT_HLD_COUNT:
+		return larger_first(ss_x->ls.hld_count,
+				    ss_y->ls.hld_count);
+	case SORT_HLD_TOTAL:
+		return larger_first(ss_x->ls.hld_total_time,
+				    ss_y->ls.hld_total_time);
+	}
+
+	warn("bad sort_hld %d\n", env.sort_hld);
+	return -1;
+}
+
+static char *symname(struct ksyms *ksyms, uint64_t pc, char *buf, size_t n)
+{
+	const struct ksym *ksym = ksyms__map_addr(ksyms, pc);
+
+	if (!ksym)
+		return "Unknown";
+	snprintf(buf, n, "%s+0x%lx", ksym->name, pc - ksym->addr);
+	return buf;
+}
+
+static void print_acq_header(void)
+{
+	printf("\n                               Caller  Avg Spin    Count   Max Spin   Total Spin\n");
+}
+
+static void print_acq_stat(struct ksyms *ksyms, struct stack_stat *ss,
+			   int nr_stack_entries)
+{
+	char buf[40];
+	int i;
+
+	printf("%37s %9llu %8llu %10llu %12llu\n",
+	       symname(ksyms, ss->bt[0], buf, sizeof(buf)),
+	       ss->ls.acq_total_time / ss->ls.acq_count,
+	       ss->ls.acq_count,
+	       ss->ls.acq_max_time,
+	       ss->ls.acq_total_time);
+	for (i = 1; i < nr_stack_entries; i++) {
+		if (!ss->bt[i])
+			break;
+		printf("%37s\n", symname(ksyms, ss->bt[i], buf, sizeof(buf)));
+	}
+	if (nr_stack_entries > 1)
+		printf("                              Max PID %llu, COMM %s\n",
+		       ss->ls.acq_max_id >> 32,
+		       ss->ls.acq_max_comm);
+}
+
+static void print_hld_header(void)
+{
+	printf("\n                               Caller  Avg Hold    Count   Max Hold   Total Hold\n");
+}
+
+static void print_hld_stat(struct ksyms *ksyms, struct stack_stat *ss,
+			   int nr_stack_entries)
+{
+	char buf[40];
+	int i;
+
+	printf("%37s %9llu %8llu %10llu %12llu\n",
+	       symname(ksyms, ss->bt[0], buf, sizeof(buf)),
+	       ss->ls.hld_total_time / ss->ls.hld_count,
+	       ss->ls.hld_count,
+	       ss->ls.hld_max_time,
+	       ss->ls.hld_total_time);
+	for (i = 1; i < nr_stack_entries; i++) {
+		if (!ss->bt[i])
+			break;
+		printf("%37s\n", symname(ksyms, ss->bt[i], buf, sizeof(buf)));
+	}
+	if (nr_stack_entries > 1)
+		printf("                              Max PID %llu, COMM %s\n",
+		       ss->ls.hld_max_id >> 32,
+		       ss->ls.hld_max_comm);
+}
+
+static int print_stats(struct ksyms *ksyms, int stack_map, int stat_map)
+{
+	struct stack_stat **stats, *ss;
+	size_t stat_idx = 0;
+	size_t stats_sz = 1;
+	uint32_t lookup_key = 0;
+	uint32_t stack_id;
+	int ret, i;
+
+	stats = calloc(stats_sz, sizeof(void *));
+	if (!stats) {
+		warn("Out of memory\n");
+		return -1;
+	}
+
+	while (bpf_map_get_next_key(stat_map, &lookup_key, &stack_id) == 0) {
+		if (stat_idx == stats_sz) {
+			stats_sz *= 2;
+			stats = reallocarray(stats, stats_sz, sizeof(void *));
+			if (!stats) {
+				warn("Out of memory\n");
+				return -1;
+			}
+		}
+		ss = malloc(sizeof(struct stack_stat));
+		if (!ss) {
+			warn("Out of memory\n");
+			return -1;
+		}
+		ss->stack_id = stack_id;
+		if (env.reset) {
+			ret = bpf_map_lookup_and_delete_elem(stat_map,
+							     &stack_id,
+							     &ss->ls);
+			lookup_key = 0;
+		} else {
+			ret = bpf_map_lookup_elem(stat_map, &stack_id, &ss->ls);
+			lookup_key = stack_id;
+		}
+		if (ret) {
+			free(ss);
+			continue;
+		}
+		if (bpf_map_lookup_elem(stack_map, &stack_id, &ss->bt)) {
+			/* Can still report the results without a backtrace. */
+			warn("failed to lookup stack_id %u\n", stack_id);
+		}
+		if (!caller_is_traced(ksyms, ss->bt[0])) {
+			free(ss);
+			continue;
+		}
+		stats[stat_idx++] = ss;
+	}
+
+	qsort(stats, stat_idx, sizeof(void*), sort_by_acq);
+	for (i = 0; i < MIN(env.nr_locks, stat_idx); i++) {
+		if (i == 0 || env.nr_stack_entries > 1)
+			print_acq_header();
+		print_acq_stat(ksyms, stats[i],
+			       MIN(env.nr_stack_entries, PERF_MAX_STACK_DEPTH));
+	}
+
+	qsort(stats, stat_idx, sizeof(void*), sort_by_hld);
+	for (i = 0; i < MIN(env.nr_locks, stat_idx); i++) {
+		if (i == 0 || env.nr_stack_entries > 1)
+			print_hld_header();
+		print_hld_stat(ksyms, stats[i],
+			       MIN(env.nr_stack_entries, PERF_MAX_STACK_DEPTH));
+	}
+
+	for (i = 0; i < stat_idx; i++)
+		free(stats[i]);
+	free(stats);
+
+	return 0;
+}
+
+static void *get_lock_addr(struct ksyms *ksyms, const char *lock_name)
+{
+	const struct ksym *ksym = ksyms__get_symbol(ksyms, lock_name);
+
+	return ksym ? (void*)ksym->addr : NULL;
+}
+
+static volatile bool exiting;
+
+static void sig_hand(int signr)
+{
+	exiting = true;
+}
+
+static struct sigaction sigact = {.sa_handler = sig_hand};
+
+int main(int argc, char **argv)
+{
+	static const struct argp argp = {
+		.options = opts,
+		.parser = parse_arg,
+		.args_doc = args_doc,
+		.doc = program_doc,
+	};
+	struct klockstat_bpf *obj = NULL;
+	struct ksyms *ksyms = NULL;
+	int i, err;
+	struct tm *tm;
+	char ts[32];
+	time_t t;
+	void *lock_addr = NULL;
+
+	err = argp_parse(&argp, argc, argv, 0, NULL, &env);
+	if (err)
+		return err;
+
+	sigaction(SIGINT, &sigact, 0);
+
+	err = bump_memlock_rlimit();
+	if (err) {
+		warn("failed to increase rlimit: %d\n", err);
+		err = 1;
+		goto cleanup;
+	}
+
+	ksyms = ksyms__load();
+	if (!ksyms) {
+		warn("failed to load kallsyms\n");
+		err = 1;
+		goto cleanup;
+	}
+	if (env.lock_name) {
+		lock_addr = get_lock_addr(ksyms, env.lock_name);
+		if (!lock_addr) {
+			warn("failed to find lock %s\n", env.lock_name);
+			err = 1;
+			goto cleanup;
+		}
+	}
+
+	obj = klockstat_bpf__open();
+	if (!obj) {
+		warn("failed to open BPF object\n");
+		err = 1;
+		goto cleanup;
+	}
+
+	obj->rodata->targ_tgid = env.pid;
+	obj->rodata->targ_pid = env.tid;
+	obj->rodata->targ_lock = lock_addr;
+
+	err = klockstat_bpf__load(obj);
+	if (err) {
+		warn("failed to load BPF object\n");
+		return 1;
+	}
+	err = klockstat_bpf__attach(obj);
+	if (err) {
+		warn("failed to attach BPF object\n");
+		goto cleanup;
+	}
+
+	printf("Tracing mutex lock events...  Hit Ctrl-C to end\n");
+
+	for (i = 0; i < env.iterations && !exiting; i++) {
+		sleep(env.interval);
+
+		printf("\n");
+		if (env.timestamp) {
+			time(&t);
+			tm = localtime(&t);
+			strftime(ts, sizeof(ts), "%H:%M:%S", tm);
+			printf("%-8s\n", ts);
+		}
+
+		if (print_stats(ksyms, bpf_map__fd(obj->maps.stack_map),
+				bpf_map__fd(obj->maps.stat_map))) {
+			warn("print_stats error, aborting.\n");
+			break;
+		}
+	}
+
+	printf("Exiting trace of mutex locks\n");
+
+cleanup:
+	klockstat_bpf__destroy(obj);
+	ksyms__free(ksyms);
+
+	return err != 0;
+}
diff --git a/libbpf-tools/klockstat.h b/libbpf-tools/klockstat.h
new file mode 100644
index 00000000..01c9ad9a
--- /dev/null
+++ b/libbpf-tools/klockstat.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
+#ifndef __KLOCKSTAT_H
+
+#define MAX_ENTRIES 102400
+#define TASK_COMM_LEN 16
+#define PERF_MAX_STACK_DEPTH 127
+
+struct lock_stat {
+	__u64 acq_count;
+	__u64 acq_total_time;
+	__u64 acq_max_time;
+	__u64 acq_max_id;
+	char acq_max_comm[TASK_COMM_LEN];
+	__u64 hld_count;
+	__u64 hld_total_time;
+	__u64 hld_max_time;
+	__u64 hld_max_id;
+	char hld_max_comm[TASK_COMM_LEN];
+};
+
+#endif /*__KLOCKSTAT_H */