--- /dev/null
+/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
+/* Copyright (c) 2021, Oracle and/or its affiliates. */
+
+#include "vmlinux.h"
+
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_core_read.h>
+
+#include "ksnoop.h"
+
+/* For kretprobes, the instruction pointer in the struct pt_regs context
+ * is the kretprobe_trampoline. We derive the instruction pointer
+ * by pushing it onto a function stack on entry and popping it on return.
+ *
+ * We could use bpf_get_func_ip(), but "stack mode" - where we
+ * specify functions "a", "b and "c" and only want to see a trace if "a"
+ * calls "b" and "b" calls "c" - utilizes this stack to determine if trace
+ * data should be collected.
+ */
+#define FUNC_MAX_STACK_DEPTH 16
+
+#ifndef ENOSPC
+#define ENOSPC 28
+#endif
+
+struct func_stack {
+ __u64 task;
+ __u64 ips[FUNC_MAX_STACK_DEPTH];
+ __u8 stack_depth;
+};
+
+#define MAX_TASKS 2048
+
+/* function call stack hashed on a per-task key */
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ /* function call stack for functions we are tracing */
+ __uint(max_entries, MAX_TASKS);
+ __type(key, __u64);
+ __type(value, struct func_stack);
+} ksnoop_func_stack SEC(".maps");
+
+/* per-cpu trace info hashed on function address */
+struct {
+ __uint(type, BPF_MAP_TYPE_PERCPU_HASH);
+ __uint(max_entries, MAX_FUNC_TRACES);
+ __type(key, __u64);
+ __type(value, struct trace);
+} ksnoop_func_map SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
+ __uint(value_size, sizeof(int));
+ __uint(key_size, sizeof(int));
+} ksnoop_perf_map SEC(".maps");
+
+static void clear_trace(struct trace *trace)
+{
+ __builtin_memset(&trace->trace_data, 0, sizeof(trace->trace_data));
+ trace->data_flags = 0;
+ trace->buf_len = 0;
+}
+
+static struct trace *get_trace(struct pt_regs *ctx, bool entry)
+{
+ __u8 stack_depth, last_stack_depth;
+ struct func_stack *func_stack;
+ __u64 ip, last_ip = 0, task;
+ struct trace *trace;
+
+ task = bpf_get_current_task();
+
+ func_stack = bpf_map_lookup_elem(&ksnoop_func_stack, &task);
+ if (!func_stack) {
+ struct func_stack new_stack = { .task = task };
+
+ bpf_map_update_elem(&ksnoop_func_stack, &task, &new_stack,
+ BPF_NOEXIST);
+ func_stack = bpf_map_lookup_elem(&ksnoop_func_stack, &task);
+ if (!func_stack)
+ return NULL;
+ }
+
+ stack_depth = func_stack->stack_depth;
+ if (stack_depth > FUNC_MAX_STACK_DEPTH)
+ return NULL;
+
+ if (entry) {
+ ip = KSNOOP_IP_FIX(PT_REGS_IP_CORE(ctx));
+ if (stack_depth >= FUNC_MAX_STACK_DEPTH - 1)
+ return NULL;
+ /* verifier doesn't like using "stack_depth - 1" as array index
+ * directly.
+ */
+ last_stack_depth = stack_depth - 1;
+ /* get address of last function we called */
+ if (last_stack_depth >= 0 &&
+ last_stack_depth < FUNC_MAX_STACK_DEPTH)
+ last_ip = func_stack->ips[last_stack_depth];
+ /* push ip onto stack. return will pop it. */
+ func_stack->ips[stack_depth++] = ip;
+ func_stack->stack_depth = stack_depth;
+ /* rather than zero stack entries on popping, we zero the
+ * (stack_depth + 1)'th entry when pushing the current
+ * entry. The reason we take this approach is that
+ * when tracking the set of functions we returned from,
+ * we want the history of functions we returned from to
+ * be preserved.
+ */
+ if (stack_depth < FUNC_MAX_STACK_DEPTH)
+ func_stack->ips[stack_depth] = 0;
+ } else {
+ if (stack_depth == 0 || stack_depth >= FUNC_MAX_STACK_DEPTH)
+ return NULL;
+ last_stack_depth = stack_depth;
+ /* get address of last function we returned from */
+ if (last_stack_depth >= 0 &&
+ last_stack_depth < FUNC_MAX_STACK_DEPTH)
+ last_ip = func_stack->ips[last_stack_depth];
+ if (stack_depth > 0)
+ stack_depth = stack_depth - 1;
+ /* retrieve ip from stack as IP in pt_regs is
+ * bpf kretprobe trampoline address.
+ */
+ if (stack_depth >= 0 && stack_depth < FUNC_MAX_STACK_DEPTH)
+ ip = func_stack->ips[stack_depth];
+ if (stack_depth >= 0 && stack_depth < FUNC_MAX_STACK_DEPTH)
+ func_stack->stack_depth = stack_depth;
+ }
+
+ trace = bpf_map_lookup_elem(&ksnoop_func_map, &ip);
+ if (!trace)
+ return NULL;
+
+ /* we may stash data on entry since predicates are a mix
+ * of entry/return; in such cases, trace->flags specifies
+ * KSNOOP_F_STASH, and we will output stashed data on return.
+ * If returning, make sure we don't clear our stashed data.
+ */
+ if (!entry && (trace->flags & KSNOOP_F_STASH)) {
+ /* skip clearing trace data */
+ if (!(trace->data_flags & KSNOOP_F_STASHED)) {
+ /* predicate must have failed */
+ return NULL;
+ }
+ /* skip clearing trace data */
+ } else {
+ /* clear trace data before starting. */
+ clear_trace(trace);
+ }
+
+ if (entry) {
+ /* if in stack mode, check if previous fn matches */
+ if (trace->prev_ip && trace->prev_ip != last_ip)
+ return NULL;
+ /* if tracing intermediate fn in stack of fns, stash data. */
+ if (trace->next_ip)
+ trace->data_flags |= KSNOOP_F_STASH;
+ /* we may stash data on entry since predicates are a mix
+ * of entry/return; in such cases, trace->flags specifies
+ * KSNOOP_F_STASH, and we will output stashed data on return.
+ */
+ if (trace->flags & KSNOOP_F_STASH)
+ trace->data_flags |= KSNOOP_F_STASH;
+ /* otherwise the data is outputted (because we've reached
+ * the last fn in the set of fns specified).
+ */
+ } else {
+ /* In stack mode, check if next fn matches the last fn
+ * we returned from; i.e. "a" called "b", and now
+ * we're at "a", was the last fn we returned from "b"?
+ * If so, stash data for later display (when we reach the
+ * first fn in the set of stack fns).
+ */
+ if (trace->next_ip && trace->next_ip != last_ip)
+ return NULL;
+ if (trace->prev_ip)
+ trace->data_flags |= KSNOOP_F_STASH;
+ /* If there is no "prev" function, i.e. we are at the
+ * first function in a set of stack functions, the trace
+ * info is shown (along with any stashed info associated
+ * with callers).
+ */
+ }
+ trace->task = task;
+ return trace;
+}
+
+static void output_trace(struct pt_regs *ctx, struct trace *trace)
+{
+ __u16 trace_len;
+
+ if (trace->buf_len == 0)
+ goto skip;
+
+ /* we may be simply stashing values, and will report later */
+ if (trace->data_flags & KSNOOP_F_STASH) {
+ trace->data_flags &= ~KSNOOP_F_STASH;
+ trace->data_flags |= KSNOOP_F_STASHED;
+ return;
+ }
+ /* we may be outputting earlier stashed data */
+ if (trace->data_flags & KSNOOP_F_STASHED)
+ trace->data_flags &= ~KSNOOP_F_STASHED;
+
+ /* trim perf event size to only contain data we've recorded. */
+ trace_len = sizeof(*trace) + trace->buf_len - MAX_TRACE_BUF;
+
+ if (trace_len <= sizeof(*trace))
+ bpf_perf_event_output(ctx, &ksnoop_perf_map,
+ BPF_F_CURRENT_CPU,
+ trace, trace_len);
+skip:
+ clear_trace(trace);
+}
+
+static void output_stashed_traces(struct pt_regs *ctx,
+ struct trace *currtrace,
+ bool entry)
+{
+ struct func_stack *func_stack;
+ struct trace *trace = NULL;
+ __u8 stack_depth, i;
+ __u64 task = 0;
+
+ task = bpf_get_current_task();
+ func_stack = bpf_map_lookup_elem(&ksnoop_func_stack, &task);
+ if (!func_stack)
+ return;
+
+ stack_depth = func_stack->stack_depth;
+
+ if (entry) {
+ /* iterate from bottom to top of stack, outputting stashed
+ * data we find. This corresponds to the set of functions
+ * we called before the current function.
+ */
+ for (i = 0;
+ i < func_stack->stack_depth - 1 && i < FUNC_MAX_STACK_DEPTH;
+ i++) {
+ trace = bpf_map_lookup_elem(&ksnoop_func_map,
+ &func_stack->ips[i]);
+ if (!trace || !(trace->data_flags & KSNOOP_F_STASHED))
+ break;
+ if (trace->task != task)
+ return;
+ output_trace(ctx, trace);
+ }
+ } else {
+ /* iterate from top to bottom of stack, outputting stashed
+ * data we find. This corresponds to the set of functions
+ * that returned prior to the current returning function.
+ */
+ for (i = FUNC_MAX_STACK_DEPTH; i > 0; i--) {
+ __u64 ip;
+
+ ip = func_stack->ips[i];
+ if (!ip)
+ continue;
+ trace = bpf_map_lookup_elem(&ksnoop_func_map, &ip);
+ if (!trace || !(trace->data_flags & KSNOOP_F_STASHED))
+ break;
+ if (trace->task != task)
+ return;
+ output_trace(ctx, trace);
+ }
+ }
+ /* finally output the current trace info */
+ output_trace(ctx, currtrace);
+}
+
+static __u64 get_arg(struct pt_regs *ctx, enum arg argnum)
+{
+ switch (argnum) {
+ case KSNOOP_ARG1:
+ return PT_REGS_PARM1_CORE(ctx);
+ case KSNOOP_ARG2:
+ return PT_REGS_PARM2_CORE(ctx);
+ case KSNOOP_ARG3:
+ return PT_REGS_PARM3_CORE(ctx);
+ case KSNOOP_ARG4:
+ return PT_REGS_PARM4_CORE(ctx);
+ case KSNOOP_ARG5:
+ return PT_REGS_PARM5_CORE(ctx);
+ case KSNOOP_RETURN:
+ return PT_REGS_RC_CORE(ctx);
+ default:
+ return 0;
+ }
+}
+
+static int ksnoop(struct pt_regs *ctx, bool entry)
+{
+ void *data_ptr = NULL;
+ struct trace *trace;
+ struct func *func;
+ __u16 trace_len;
+ __u64 data, pg;
+ __u32 currpid;
+ int ret;
+ __u8 i;
+
+ trace = get_trace(ctx, entry);
+ if (!trace)
+ return 0;
+
+ func = &trace->func;
+
+ /* make sure we want events from this pid */
+ currpid = bpf_get_current_pid_tgid();
+ if (trace->filter_pid && trace->filter_pid != currpid)
+ return 0;
+ trace->pid = currpid;
+
+ trace->cpu = bpf_get_smp_processor_id();
+ trace->time = bpf_ktime_get_ns();
+
+ trace->data_flags &= ~(KSNOOP_F_ENTRY | KSNOOP_F_RETURN);
+ if (entry)
+ trace->data_flags |= KSNOOP_F_ENTRY;
+ else
+ trace->data_flags |= KSNOOP_F_RETURN;
+
+
+ for (i = 0; i < MAX_TRACES; i++) {
+ struct trace_data *currdata;
+ struct value *currtrace;
+ char *buf_offset = NULL;
+ __u32 tracesize;
+
+ currdata = &trace->trace_data[i];
+ currtrace = &trace->traces[i];
+
+ if ((entry && !base_arg_is_entry(currtrace->base_arg)) ||
+ (!entry && base_arg_is_entry(currtrace->base_arg)))
+ continue;
+
+ /* skip void (unused) trace arguments, ensuring not to
+ * skip "void *".
+ */
+ if (currtrace->type_id == 0 &&
+ !(currtrace->flags & KSNOOP_F_PTR))
+ continue;
+
+ data = get_arg(ctx, currtrace->base_arg);
+
+ /* look up member value and read into data field. */
+ if (currtrace->flags & KSNOOP_F_MEMBER) {
+ if (currtrace->offset)
+ data += currtrace->offset;
+
+ /* member is a pointer; read it in */
+ if (currtrace->flags & KSNOOP_F_PTR) {
+ void *dataptr = (void *)data;
+
+ ret = bpf_probe_read(&data, sizeof(data),
+ dataptr);
+ if (ret) {
+ currdata->err_type_id =
+ currtrace->type_id;
+ currdata->err = ret;
+ continue;
+ }
+ currdata->raw_value = data;
+ } else if (currtrace->size <=
+ sizeof(currdata->raw_value)) {
+ /* read member value for predicate comparison */
+ bpf_probe_read(&currdata->raw_value,
+ currtrace->size,
+ (void*)data);
+ }
+ } else {
+ currdata->raw_value = data;
+ }
+
+ /* simple predicate evaluation: if any predicate fails,
+ * skip all tracing for this function.
+ */
+ if (currtrace->flags & KSNOOP_F_PREDICATE_MASK) {
+ bool ok = false;
+
+ if (currtrace->flags & KSNOOP_F_PREDICATE_EQ &&
+ currdata->raw_value == currtrace->predicate_value)
+ ok = true;
+
+ if (currtrace->flags & KSNOOP_F_PREDICATE_NOTEQ &&
+ currdata->raw_value != currtrace->predicate_value)
+ ok = true;
+
+ if (currtrace->flags & KSNOOP_F_PREDICATE_GT &&
+ currdata->raw_value > currtrace->predicate_value)
+ ok = true;
+
+ if (currtrace->flags & KSNOOP_F_PREDICATE_LT &&
+ currdata->raw_value < currtrace->predicate_value)
+ ok = true;
+
+ if (!ok) {
+ clear_trace(trace);
+ return 0;
+ }
+ }
+
+ if (currtrace->flags & (KSNOOP_F_PTR | KSNOOP_F_MEMBER))
+ data_ptr = (void *)data;
+ else
+ data_ptr = &data;
+
+ if (trace->buf_len + MAX_TRACE_DATA >= MAX_TRACE_BUF)
+ break;
+
+ buf_offset = &trace->buf[trace->buf_len];
+ if (buf_offset > &trace->buf[MAX_TRACE_BUF]) {
+ currdata->err_type_id = currtrace->type_id;
+ currdata->err = -ENOSPC;
+ continue;
+ }
+ currdata->buf_offset = trace->buf_len;
+
+ tracesize = currtrace->size;
+ if (tracesize > MAX_TRACE_DATA)
+ tracesize = MAX_TRACE_DATA;
+ ret = bpf_probe_read(buf_offset, tracesize, data_ptr);
+ if (ret < 0) {
+ currdata->err_type_id = currtrace->type_id;
+ currdata->err = ret;
+ continue;
+ } else {
+ currdata->buf_len = tracesize;
+ trace->buf_len += tracesize;
+ }
+ }
+
+ /* show accumulated stashed traces (if any) */
+ if ((entry && trace->prev_ip && !trace->next_ip) ||
+ (!entry && trace->next_ip && !trace->prev_ip))
+ output_stashed_traces(ctx, trace, entry);
+ else
+ output_trace(ctx, trace);
+
+ return 0;
+}
+
+SEC("kprobe/foo")
+int kprobe_entry(struct pt_regs *ctx)
+{
+ return ksnoop(ctx, true);
+}
+
+SEC("kretprobe/foo")
+int kprobe_return(struct pt_regs *ctx)
+{
+ return ksnoop(ctx, false);
+}
+
+char _license[] SEC("license") = "Dual BSD/GPL";
--- /dev/null
+/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
+/* Copyright (c) 2021, Oracle and/or its affiliates. */
+
+#include <ctype.h>
+#include <errno.h>
+#include <getopt.h>
+#include <linux/bpf.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
+#include <bpf/btf.h>
+
+#include "ksnoop.h"
+#include "ksnoop.skel.h"
+
+#ifndef KSNOOP_VERSION
+#define KSNOOP_VERSION "0.1"
+#endif
+
+static struct btf *vmlinux_btf;
+static const char *bin_name;
+static int pages = PAGES_DEFAULT;
+
+enum log_level {
+ DEBUG,
+ WARN,
+ ERROR,
+};
+
+static enum log_level log_level = WARN;
+
+static __u32 filter_pid;
+static bool stack_mode;
+
+#define libbpf_errstr(val) strerror(-libbpf_get_error(val))
+
+static void __p(enum log_level level, char *level_str, char *fmt, ...)
+{
+ va_list ap;
+
+ if (level < log_level)
+ return;
+ va_start(ap, fmt);
+ fprintf(stderr, "%s: ", level_str);
+ vfprintf(stderr, fmt, ap);
+ fprintf(stderr, "\n");
+ va_end(ap);
+ fflush(stderr);
+}
+
+#define p_err(fmt, ...) __p(ERROR, "Error", fmt, ##__VA_ARGS__)
+#define p_warn(fmt, ...) __p(WARNING, "Warn", fmt, ##__VA_ARGS__)
+#define p_debug(fmt, ...) __p(DEBUG, "Debug", fmt, ##__VA_ARGS__)
+
+static int do_version(int argc, char **argv)
+{
+ printf("%s v%s\n", bin_name, KSNOOP_VERSION);
+ return 0;
+}
+
+static int cmd_help(int argc, char **argv)
+{
+ fprintf(stderr,
+ "Usage: %s [OPTIONS] [COMMAND | help] FUNC\n"
+ " COMMAND := { trace | info }\n"
+ " FUNC := { name | name(ARG[,ARG]*) }\n"
+ " ARG := { arg | arg [PRED] | arg->member [PRED] }\n"
+ " PRED := { == | != | > | >= | < | <= value }\n"
+ " OPTIONS := { {-d|--debug} | {-V|--version} |\n"
+ " {-p|--pid filter_pid}|\n"
+ " {-P|--pages nr_pages} }\n"
+ " {-s|--stack}\n",
+ bin_name);
+ fprintf(stderr,
+ "Examples:\n"
+ " %s info ip_send_skb\n"
+ " %s trace ip_send_skb\n"
+ " %s trace \"ip_send_skb(skb, return)\"\n"
+ " %s trace \"ip_send_skb(skb->sk, return)\"\n"
+ " %s trace \"ip_send_skb(skb->len > 128, skb)\"\n"
+ " %s trace -s udp_sendmsg ip_send_skb\n",
+ bin_name, bin_name, bin_name, bin_name, bin_name, bin_name);
+ return 0;
+}
+
+static void usage(void)
+{
+ cmd_help(0, NULL);
+ exit(1);
+}
+
+static void type_to_value(struct btf *btf, char *name, __u32 type_id,
+ struct value *val)
+{
+ const struct btf_type *type;
+ __s32 id = type_id;
+
+ if (strlen(val->name) == 0) {
+ if (name)
+ strncpy(val->name, name,
+ sizeof(val->name) - 1);
+ else
+ val->name[0] = '\0';
+ }
+ do {
+ type = btf__type_by_id(btf, id);
+
+ switch (BTF_INFO_KIND(type->info)) {
+ case BTF_KIND_CONST:
+ case BTF_KIND_VOLATILE:
+ case BTF_KIND_RESTRICT:
+ id = type->type;
+ break;
+ case BTF_KIND_PTR:
+ val->flags |= KSNOOP_F_PTR;
+ id = type->type;
+ break;
+ default:
+ val->type_id = id;
+ goto done;
+ }
+ } while (id >= 0);
+
+ val->type_id = KSNOOP_ID_UNKNOWN;
+ return;
+done:
+ val->size = btf__resolve_size(btf, val->type_id);
+}
+
+static int member_to_value(struct btf *btf, const char *name, __u32 type_id,
+ struct value *val, int lvl)
+{
+ const struct btf_member *member;
+ const struct btf_type *type;
+ const char *pname;
+ __s32 id = type_id;
+ int i, nmembers;
+ __u8 kind;
+
+ /* type_to_value has already stripped qualifiers, so
+ * we either have a base type, a struct, union, etc.
+ * Only struct/unions have named members so anything
+ * else is invalid.
+ */
+ p_debug("Looking for member '%s' in type id %d", name, type_id);
+ type = btf__type_by_id(btf, id);
+ pname = btf__str_by_offset(btf, type->name_off);
+ if (strlen(pname) == 0)
+ pname = "<anon>";
+
+ kind = BTF_INFO_KIND(type->info);
+ switch (kind) {
+ case BTF_KIND_STRUCT:
+ case BTF_KIND_UNION:
+ nmembers = BTF_INFO_VLEN(type->info);
+ p_debug("Checking %d members...", nmembers);
+ for (member = (struct btf_member *)(type + 1), i = 0;
+ i < nmembers;
+ member++, i++) {
+ const char *mname;
+ __u16 offset;
+
+ type = btf__type_by_id(btf, member->type);
+ mname = btf__str_by_offset(btf, member->name_off);
+ offset = member->offset / 8;
+
+ p_debug("Checking member '%s' type %d offset %d",
+ mname, member->type, offset);
+
+ /* anonymous struct member? */
+ kind = BTF_INFO_KIND(type->info);
+ if (strlen(mname) == 0 &&
+ (kind == BTF_KIND_STRUCT ||
+ kind == BTF_KIND_UNION)) {
+ p_debug("Checking anon struct/union %d",
+ member->type);
+ val->offset += offset;
+ if (!member_to_value(btf, name, member->type,
+ val, lvl + 1))
+ return 0;
+ val->offset -= offset;
+ continue;
+ }
+
+ if (strcmp(mname, name) == 0) {
+ val->offset += offset;
+ val->flags |= KSNOOP_F_MEMBER;
+ type_to_value(btf, NULL, member->type, val);
+ p_debug("Member '%s', offset %d, flags %x size %d",
+ mname, val->offset, val->flags,
+ val->size);
+ return 0;
+ }
+ }
+ if (lvl > 0)
+ break;
+ p_err("No member '%s' found in %s [%d], offset %d", name, pname,
+ id, val->offset);
+ break;
+ default:
+ p_err("'%s' is not a struct/union", pname);
+ break;
+ }
+ return -ENOENT;
+}
+
+static int get_func_btf(struct btf *btf, struct func *func)
+{
+ const struct btf_param *param;
+ const struct btf_type *type;
+ __u8 i;
+
+ func->id = btf__find_by_name_kind(btf, func->name, BTF_KIND_FUNC);
+ if (func->id <= 0) {
+ p_err("Cannot find function '%s' in BTF: %s",
+ func->name, strerror(-func->id));
+ return -ENOENT;
+ }
+ type = btf__type_by_id(btf, func->id);
+ if (libbpf_get_error(type) ||
+ BTF_INFO_KIND(type->info) != BTF_KIND_FUNC) {
+ p_err("Error looking up function type via id '%d'", func->id);
+ return -EINVAL;
+ }
+ type = btf__type_by_id(btf, type->type);
+ if (libbpf_get_error(type) ||
+ BTF_INFO_KIND(type->info) != BTF_KIND_FUNC_PROTO) {
+ p_err("Error looking up function proto type via id '%d'",
+ func->id);
+ return -EINVAL;
+ }
+ for (param = (struct btf_param *)(type + 1), i = 0;
+ i < BTF_INFO_VLEN(type->info) && i < MAX_ARGS;
+ param++, i++) {
+ type_to_value(btf,
+ (char *)btf__str_by_offset(btf, param->name_off),
+ param->type, &func->args[i]);
+ p_debug("arg #%d: <name '%s', type id '%u'>",
+ i + 1, func->args[i].name, func->args[i].type_id);
+ }
+
+ /* real number of args, even if it is > number we recorded. */
+ func->nr_args = BTF_INFO_VLEN(type->info);
+
+ type_to_value(btf, KSNOOP_RETURN_NAME, type->type,
+ &func->args[KSNOOP_RETURN]);
+ p_debug("return value: type id '%u'>",
+ func->args[KSNOOP_RETURN].type_id);
+ return 0;
+}
+
+int predicate_to_value(char *predicate, struct value *val)
+{
+ char pred[MAX_STR];
+ long v;
+
+ if (!predicate)
+ return 0;
+
+ p_debug("checking predicate '%s' for '%s'", predicate, val->name);
+
+ if (sscanf(predicate, "%[!=><]%li", pred, &v) != 2) {
+ p_err("Invalid specification; expected predicate, not '%s'",
+ predicate);
+ return -EINVAL;
+ }
+ if (!(val->flags & KSNOOP_F_PTR) &&
+ (val->size == 0 || val->size > sizeof(__u64))) {
+ p_err("'%s' (size %d) does not support predicate comparison",
+ val->name, val->size);
+ return -EINVAL;
+ }
+ val->predicate_value = (__u64)v;
+
+ if (strcmp(pred, "==") == 0) {
+ val->flags |= KSNOOP_F_PREDICATE_EQ;
+ goto out;
+ } else if (strcmp(pred, "!=") == 0) {
+ val->flags |= KSNOOP_F_PREDICATE_NOTEQ;
+ goto out;
+ }
+ if (pred[0] == '>')
+ val->flags |= KSNOOP_F_PREDICATE_GT;
+ else if (pred[0] == '<')
+ val->flags |= KSNOOP_F_PREDICATE_LT;
+
+ if (strlen(pred) == 1)
+ goto out;
+
+ if (pred[1] != '=') {
+ p_err("Invalid predicate specification '%s'", predicate);
+ return -EINVAL;
+ }
+ val->flags |= KSNOOP_F_PREDICATE_EQ;
+
+out:
+ p_debug("predicate '%s', flags 0x%x value %x",
+ pred, val->flags, val->predicate_value);
+
+ return 0;
+}
+
+static int trace_to_value(struct btf *btf, struct func *func, char *argname,
+ char *membername, char *predicate, struct value *val)
+{
+ __u8 i;
+
+ if (strlen(membername) > 0)
+ snprintf(val->name, sizeof(val->name), "%s->%s",
+ argname, membername);
+ else
+ strncpy(val->name, argname, sizeof(val->name));
+
+ for (i = 0; i < MAX_TRACES; i++) {
+ if (!func->args[i].name)
+ continue;
+ if (strcmp(argname, func->args[i].name) != 0)
+ continue;
+ p_debug("setting base arg for val %s to %d", val->name, i);
+ val->base_arg = i;
+
+ if (strlen(membername) > 0) {
+ if (member_to_value(btf, membername,
+ func->args[i].type_id, val, 0))
+ return -ENOENT;
+ } else {
+ val->type_id = func->args[i].type_id;
+ val->flags |= func->args[i].flags;
+ val->size = func->args[i].size;
+ }
+ return predicate_to_value(predicate, val);
+ }
+ p_err("Could not find '%s' in arguments/return value for '%s'",
+ argname, func->name);
+ return -ENOENT;
+}
+
+static struct btf *get_btf(const char *name)
+{
+ struct btf *mod_btf;
+
+ p_debug("getting BTF for %s",
+ name && strlen(name) > 0 ? name : "vmlinux");
+
+ if (!vmlinux_btf) {
+ vmlinux_btf = btf__load_vmlinux_btf();
+ if (libbpf_get_error(vmlinux_btf)) {
+ p_err("No BTF, cannot determine type info: %s",
+ libbpf_errstr(vmlinux_btf));
+ return NULL;
+ }
+ }
+ if (!name || strlen(name) == 0)
+ return vmlinux_btf;
+
+ mod_btf = btf__load_module_btf(name, vmlinux_btf);
+ if (libbpf_get_error(mod_btf)) {
+ p_err("No BTF for module '%s': %s",
+ name, libbpf_errstr(mod_btf));
+ return NULL;
+ }
+ return mod_btf;
+}
+
+static void copy_without_spaces(char *target, char *src)
+{
+ for (; *src != '\0'; src++)
+ if (!isspace(*src))
+ *(target++) = *src;
+ *target = '\0';
+}
+
+static char *type_id_to_str(struct btf *btf, __s32 type_id, char *str)
+{
+ const struct btf_type *type;
+ const char *name = "";
+ char *prefix = "";
+ char *suffix = " ";
+ char *ptr = "";
+
+ str[0] = '\0';
+
+ switch (type_id) {
+ case 0:
+ name = "void";
+ break;
+ case KSNOOP_ID_UNKNOWN:
+ name = "?";
+ break;
+ default:
+ do {
+ type = btf__type_by_id(btf, type_id);
+
+ if (libbpf_get_error(type)) {
+ name = "?";
+ break;
+ }
+ switch (BTF_INFO_KIND(type->info)) {
+ case BTF_KIND_CONST:
+ case BTF_KIND_VOLATILE:
+ case BTF_KIND_RESTRICT:
+ type_id = type->type;
+ break;
+ case BTF_KIND_PTR:
+ ptr = "* ";
+ type_id = type->type;
+ break;
+ case BTF_KIND_ARRAY:
+ suffix = "[]";
+ type_id = type->type;
+ break;
+ case BTF_KIND_STRUCT:
+ prefix = "struct ";
+ name = btf__str_by_offset(btf, type->name_off);
+ break;
+ case BTF_KIND_UNION:
+ prefix = "union";
+ name = btf__str_by_offset(btf, type->name_off);
+ break;
+ case BTF_KIND_ENUM:
+ prefix = "enum ";
+ break;
+ case BTF_KIND_TYPEDEF:
+ name = btf__str_by_offset(btf, type->name_off);
+ break;
+ default:
+ name = btf__str_by_offset(btf, type->name_off);
+ break;
+ }
+ } while (type_id >= 0 && strlen(name) == 0);
+ break;
+ }
+ snprintf(str, MAX_STR, "%s%s%s%s", prefix, name, suffix, ptr);
+
+ return str;
+}
+
+static char *value_to_str(struct btf *btf, struct value *val, char *str)
+{
+
+ str = type_id_to_str(btf, val->type_id, str);
+ if (val->flags & KSNOOP_F_PTR)
+ strncat(str, " * ", MAX_STR);
+ if (strlen(val->name) > 0 &&
+ strcmp(val->name, KSNOOP_RETURN_NAME) != 0)
+ strncat(str, val->name, MAX_STR);
+
+ return str;
+}
+
+/* based heavily on bpf_object__read_kallsyms_file() in libbpf.c */
+static int get_func_ip_mod(struct func *func)
+{
+ char sym_type, sym_name[MAX_STR], mod_info[MAX_STR];
+ unsigned long long sym_addr;
+ int ret, err = 0;
+ FILE *f;
+
+ f = fopen("/proc/kallsyms", "r");
+ if (!f) {
+ err = errno;
+ p_err("failed to open /proc/kallsyms: %d", strerror(err));
+ return err;
+ }
+
+ while (true) {
+ ret = fscanf(f, "%llx %c %128s%[^\n]\n",
+ &sym_addr, &sym_type, sym_name, mod_info);
+ if (ret == EOF && feof(f))
+ break;
+ if (ret < 3) {
+ p_err("failed to read kallsyms entry: %d", ret);
+ err = -EINVAL;
+ goto out;
+ }
+ if (strcmp(func->name, sym_name) != 0)
+ continue;
+ func->ip = sym_addr;
+ func->mod[0] = '\0';
+ /* get module name from [modname] */
+ if (ret == 4) {
+ if (sscanf(mod_info, "%*[\t ][%[^]]", func->mod) < 1) {
+ p_err("failed to read module name");
+ err = -EINVAL;
+ goto out;
+ }
+ }
+ p_debug("%s = <ip %llx, mod %s>", func->name, func->ip,
+ strlen(func->mod) > 0 ? func->mod : "vmlinux");
+ break;
+ }
+out:
+ fclose(f);
+ return err;
+}
+
+static void trace_printf(void *ctx, const char *fmt, va_list args)
+{
+ vprintf(fmt, args);
+}
+
+#define VALID_NAME "%[A-Za-z0-9\\-_]"
+#define ARGDATA "%[^)]"
+
+static int parse_trace(char *str, struct trace *trace)
+{
+ __u8 i, nr_predicates = 0, nr_entry = 0, nr_return = 0;
+ char argname[MAX_NAME], membername[MAX_NAME];
+ char tracestr[MAX_STR], argdata[MAX_STR];
+ struct func *func = &trace->func;
+ struct btf_dump_opts opts = { };
+ char *arg, *saveptr;
+ int ret;
+
+ copy_without_spaces(tracestr, str);
+
+ p_debug("Parsing trace '%s'", tracestr);
+
+ trace->filter_pid = (__u32)filter_pid;
+ if (filter_pid)
+ p_debug("Using pid %lu as filter", trace->filter_pid);
+
+ trace->btf = vmlinux_btf;
+
+ ret = sscanf(tracestr, VALID_NAME "(" ARGDATA ")", func->name, argdata);
+ if (ret <= 0)
+ usage();
+ if (ret == 1) {
+ if (strlen(tracestr) > strlen(func->name)) {
+ p_err("Invalid function specification '%s'", tracestr);
+ usage();
+ }
+ argdata[0] = '\0';
+ p_debug("got func '%s'", func->name);
+ } else {
+ if (strlen(tracestr) >
+ strlen(func->name) + strlen(argdata) + 2) {
+ p_err("Invalid function specification '%s'", tracestr);
+ usage();
+ }
+ p_debug("got func '%s', args '%s'", func->name, argdata);
+ trace->flags |= KSNOOP_F_CUSTOM;
+ }
+
+ ret = get_func_ip_mod(func);
+ if (ret) {
+ p_err("could not get address of '%s'", func->name);
+ return ret;
+ }
+ trace->btf = get_btf(func->mod);
+ if (libbpf_get_error(trace->btf)) {
+ p_err("could not get BTF for '%s': %s",
+ strlen(func->mod) ? func->mod : "vmlinux",
+ libbpf_errstr(trace->btf));
+ return -ENOENT;
+ }
+ trace->dump = btf_dump__new(trace->btf, NULL, &opts, trace_printf);
+ if (libbpf_get_error(trace->dump)) {
+ p_err("could not create BTF dump : %n",
+ libbpf_errstr(trace->btf));
+ return -EINVAL;
+ }
+
+ ret = get_func_btf(trace->btf, func);
+ if (ret) {
+ p_debug("unexpected return value '%d' getting function", ret);
+ return ret;
+ }
+
+ for (arg = strtok_r(argdata, ",", &saveptr), i = 0;
+ arg;
+ arg = strtok_r(NULL, ",", &saveptr), i++) {
+ char *predicate = NULL;
+
+ ret = sscanf(arg, VALID_NAME "->" VALID_NAME,
+ argname, membername);
+ if (ret == 2) {
+ if (strlen(arg) >
+ strlen(argname) + strlen(membername) + 2) {
+ predicate = arg + strlen(argname) +
+ strlen(membername) + 2;
+ }
+ p_debug("'%s' dereferences '%s', predicate '%s'",
+ argname, membername, predicate);
+ } else {
+ if (strlen(arg) > strlen(argname))
+ predicate = arg + strlen(argname);
+ p_debug("'%s' arg, predcate '%s'", argname, predicate);
+ membername[0] = '\0';
+ }
+
+ if (i >= MAX_TRACES) {
+ p_err("Too many arguments; up to %d are supported",
+ MAX_TRACES);
+ return -EINVAL;
+ }
+ if (trace_to_value(trace->btf, func, argname, membername,
+ predicate, &trace->traces[i]))
+ return -EINVAL;
+
+ if (predicate)
+ nr_predicates++;
+ if (trace->traces[i].base_arg == KSNOOP_RETURN)
+ nr_return++;
+ else
+ nr_entry++;
+ trace->nr_traces++;
+ }
+
+ if (trace->nr_traces > 0) {
+ trace->flags |= KSNOOP_F_CUSTOM;
+ p_debug("custom trace with %d args", trace->nr_traces);
+
+ /* If we have one or more predicates _and_ references to
+ * entry and return values, we need to activate "stash"
+ * mode where arg traces are stored on entry and not
+ * sent until return to ensure predicates are satisfied.
+ */
+ if (nr_predicates > 0 && nr_entry > 0 && nr_return > 0) {
+ trace->flags |= KSNOOP_F_STASH;
+ p_debug("activating stash mode on entry");
+ }
+ } else {
+ p_debug("Standard trace, function with %d arguments",
+ func->nr_args);
+ /* copy function arg/return value to trace specification. */
+ memcpy(trace->traces, func->args, sizeof(trace->traces));
+ for (i = 0; i < MAX_TRACES; i++)
+ trace->traces[i].base_arg = i;
+ trace->nr_traces = MAX_TRACES;
+ }
+
+ return 0;
+}
+
+static int parse_traces(int argc, char **argv, struct trace **traces)
+{
+ __u8 i;
+
+ if (argc == 0)
+ usage();
+
+ if (argc > MAX_FUNC_TRACES) {
+ p_err("A maximum of %d traces are supported", MAX_FUNC_TRACES);
+ return -EINVAL;
+ }
+ *traces = calloc(argc, sizeof(struct trace));
+ if (!*traces) {
+ p_err("Could not allocate %d traces", argc);
+ return -ENOMEM;
+ }
+ for (i = 0; i < argc; i++) {
+ if (parse_trace(argv[i], &((*traces)[i])))
+ return -EINVAL;
+ if (!stack_mode || i == 0)
+ continue;
+ /* tell stack mode trace which function to expect next */
+ (*traces)[i].prev_ip = (*traces)[i-1].func.ip;
+ (*traces)[i-1].next_ip = (*traces)[i].func.ip;
+ }
+ return i;
+}
+
+static int cmd_info(int argc, char **argv)
+{
+ struct trace *traces;
+ char str[MAX_STR];
+ int nr_traces;
+ __u8 i, j;
+
+ nr_traces = parse_traces(argc, argv, &traces);
+ if (nr_traces < 0)
+ return nr_traces;
+
+ for (i = 0; i < nr_traces; i++) {
+ struct func *func = &traces[i].func;
+
+ printf("%s %s(",
+ value_to_str(traces[i].btf, &func->args[KSNOOP_RETURN],
+ str),
+ func->name);
+ for (j = 0; j < func->nr_args; j++) {
+ if (j > 0)
+ printf(", ");
+ printf("%s", value_to_str(traces[i].btf, &func->args[j],
+ str));
+ }
+ if (func->nr_args > MAX_ARGS)
+ printf(" /* and %d more args that are not traceable */",
+ func->nr_args - MAX_ARGS);
+ printf(");\n");
+ }
+ return 0;
+}
+
+static void trace_handler(void *ctx, int cpu, void *data, __u32 size)
+{
+ struct trace *trace = data;
+ int i, shown, ret;
+
+ p_debug("got trace, size %d", size);
+ if (size < (sizeof(*trace) - MAX_TRACE_BUF)) {
+ p_err("\t/* trace buffer size '%u' < min %ld */",
+ size, sizeof(trace) - MAX_TRACE_BUF);
+ return;
+ }
+ printf("%16lld %4d %8u %s(\n", trace->time, trace->cpu, trace->pid,
+ trace->func.name);
+
+ for (i = 0, shown = 0; i < trace->nr_traces; i++) {
+ DECLARE_LIBBPF_OPTS(btf_dump_type_data_opts, opts);
+ bool entry = trace->data_flags & KSNOOP_F_ENTRY;
+ struct value *val = &trace->traces[i];
+ struct trace_data *data = &trace->trace_data[i];
+
+ opts.indent_level = 36;
+ opts.indent_str = " ";
+
+ /* skip if it's entry data and trace data is for return, or
+ * if it's return and trace data is entry; only exception in
+ * the latter case is if we stashed data; in such cases we
+ * want to see it as it's a mix of entry/return data with
+ * predicates.
+ */
+ if ((entry && !base_arg_is_entry(val->base_arg)) ||
+ (!entry && base_arg_is_entry(val->base_arg) &&
+ !(trace->flags & KSNOOP_F_STASH)))
+ continue;
+
+ if (val->type_id == 0)
+ continue;
+
+ if (shown > 0)
+ printf(",\n");
+ printf("%34s %s = ", "", val->name);
+ if (val->flags & KSNOOP_F_PTR)
+ printf("*(0x%llx)", data->raw_value);
+ printf("\n");
+
+ if (data->err_type_id != 0) {
+ char typestr[MAX_STR];
+
+ printf("%36s /* Cannot show '%s' as '%s%s'; invalid/userspace ptr? */\n",
+ "",
+ val->name,
+ type_id_to_str(trace->btf,
+ val->type_id,
+ typestr),
+ val->flags & KSNOOP_F_PTR ?
+ " *" : "");
+ } else {
+ ret = btf_dump__dump_type_data
+ (trace->dump, val->type_id,
+ trace->buf + data->buf_offset,
+ data->buf_len, &opts);
+ /* truncated? */
+ if (ret == -E2BIG)
+ printf("%36s... /* %d bytes of %d */", "",
+ data->buf_len,
+ val->size);
+ }
+ shown++;
+
+ }
+ printf("\n%31s);\n\n", "");
+ fflush(stdout);
+}
+
+static void lost_handler(void *ctx, int cpu, __u64 cnt)
+{
+ p_err("\t/* lost %llu events */", cnt);
+}
+
+static int add_traces(struct bpf_map *func_map, struct trace *traces,
+ int nr_traces)
+{
+ int i, j, ret, nr_cpus = libbpf_num_possible_cpus();
+ struct trace *map_traces;
+
+ map_traces = calloc(nr_cpus, sizeof(struct trace));
+ if (!map_traces) {
+ p_err("Could not allocate memory for %d traces", nr_traces);
+ return -ENOMEM;
+ }
+ for (i = 0; i < nr_traces; i++) {
+ for (j = 0; j < nr_cpus; j++)
+ memcpy(&map_traces[j], &traces[i],
+ sizeof(map_traces[j]));
+
+ ret = bpf_map_update_elem(bpf_map__fd(func_map),
+ &traces[i].func.ip,
+ map_traces,
+ BPF_NOEXIST);
+ if (ret) {
+ p_err("Could not add map entry for '%s': %s",
+ traces[i].func.name, strerror(-ret));
+ break;
+ }
+ }
+ free(map_traces);
+ return ret;
+}
+
+static int attach_traces(struct ksnoop_bpf *skel, struct trace *traces,
+ int nr_traces)
+{
+ struct bpf_link *link;
+ int i, ret;
+
+ for (i = 0; i < nr_traces; i++) {
+ link = bpf_program__attach_kprobe(skel->progs.kprobe_entry,
+ false,
+ traces[i].func.name);
+ ret = libbpf_get_error(link);
+ if (ret) {
+ p_err("Could not attach kprobe to '%s': %s",
+ traces[i].func.name, strerror(-ret));
+ return ret;
+ }
+ p_debug("Attached kprobe for '%s'", traces[i].func.name);
+
+ link = bpf_program__attach_kprobe(skel->progs.kprobe_return,
+ true,
+ traces[i].func.name);
+ ret = libbpf_get_error(link);
+ if (ret) {
+ p_err("Could not attach kretprobe to '%s': %s",
+ traces[i].func.name, strerror(-ret));
+ return ret;
+ }
+ p_debug("Attached kretprobe for '%s'", traces[i].func.name);
+ }
+ return 0;
+}
+
+static int cmd_trace(int argc, char **argv)
+{
+ struct perf_buffer_opts pb_opts = {};
+ struct bpf_map *perf_map, *func_map;
+ struct perf_buffer *pb;
+ struct ksnoop_bpf *skel;
+ int nr_traces, ret = 0;
+ struct trace *traces;
+
+ nr_traces = parse_traces(argc, argv, &traces);
+ if (nr_traces < 0)
+ return nr_traces;
+
+ skel = ksnoop_bpf__open_and_load();
+ if (!skel) {
+ p_err("Could not load ksnoop BPF: %s", libbpf_errstr(skel));
+ return 1;
+ }
+
+ perf_map = skel->maps.ksnoop_perf_map;
+ if (!perf_map) {
+ p_err("Could not find '%s'", "ksnoop_perf_map");
+ return 1;
+ }
+ func_map = bpf_object__find_map_by_name(skel->obj, "ksnoop_func_map");
+ if (!func_map) {
+ p_err("Could not find '%s'", "ksnoop_func_map");
+ return 1;
+ }
+
+ if (add_traces(func_map, traces, nr_traces)) {
+ p_err("Could not add traces to '%s'", "ksnoop_func_map");
+ return 1;
+ }
+
+ if (attach_traces(skel, traces, nr_traces)) {
+ p_err("Could not attach %d traces", nr_traces);
+ return 1;
+ }
+
+ pb_opts.sample_cb = trace_handler;
+ pb_opts.lost_cb = lost_handler;
+ pb = perf_buffer__new(bpf_map__fd(perf_map), pages, &pb_opts);
+ if (libbpf_get_error(pb)) {
+ p_err("Could not create perf buffer: %s",
+ libbpf_errstr(pb));
+ return 1;
+ }
+
+ printf("%16s %4s %8s %s\n", "TIME", "CPU", "PID", "FUNCTION/ARGS");
+
+ while (1) {
+ ret = perf_buffer__poll(pb, 1);
+ if (ret < 0 && ret != -EINTR) {
+ p_err("Polling failed: %s", strerror(-ret));
+ break;
+ }
+ }
+
+ perf_buffer__free(pb);
+ ksnoop_bpf__destroy(skel);
+
+ return ret;
+}
+
+struct cmd {
+ const char *cmd;
+ int (*func)(int argc, char **argv);
+};
+
+struct cmd cmds[] = {
+ { "info", cmd_info },
+ { "trace", cmd_trace },
+ { "help", cmd_help },
+ { NULL, NULL }
+};
+
+static int cmd_select(int argc, char **argv)
+{
+ int i;
+
+ for (i = 0; cmds[i].cmd; i++) {
+ if (strncmp(*argv, cmds[i].cmd, strlen(*argv)) == 0)
+ return cmds[i].func(argc - 1, argv + 1);
+ }
+ return cmd_trace(argc, argv);
+}
+
+static int print_all_levels(enum libbpf_print_level level,
+ const char *format, va_list args)
+{
+ return vfprintf(stderr, format, args);
+}
+
+int main(int argc, char *argv[])
+{
+ static const struct option options[] = {
+ { "debug", no_argument, NULL, 'd' },
+ { "help", no_argument, NULL, 'h' },
+ { "version", no_argument, NULL, 'V' },
+ { "pages", required_argument, NULL, 'P' },
+ { "pid", required_argument, NULL, 'p' },
+ { 0 }
+ };
+ int opt;
+
+ bin_name = argv[0];
+
+ while ((opt = getopt_long(argc, argv, "dhp:P:sV", options,
+ NULL)) >= 0) {
+ switch (opt) {
+ case 'd':
+ libbpf_set_print(print_all_levels);
+ log_level = DEBUG;
+ break;
+ case 'h':
+ return cmd_help(argc, argv);
+ case 'V':
+ return do_version(argc, argv);
+ case 'p':
+ filter_pid = atoi(optarg);
+ break;
+ case 'P':
+ pages = atoi(optarg);
+ break;
+ case 's':
+ stack_mode = true;
+ break;
+ default:
+ p_err("unrecognized option '%s'", argv[optind - 1]);
+ usage();
+ }
+ }
+ if (argc == 1)
+ usage();
+ argc -= optind;
+ argv += optind;
+ if (argc < 0)
+ usage();
+
+ return cmd_select(argc, argv);
+}
--- /dev/null
+.\" Man page generated from reStructuredText.
+.
+.TH KSNOOP 8 "" "" ""
+.SH NAME
+KSNOOP \- tool for tracing kernel function entry/return showing arguments/return values
+.
+.nr rst2man-indent-level 0
+.
+.de1 rstReportMargin
+\\$1 \\n[an-margin]
+level \\n[rst2man-indent-level]
+level margin: \\n[rst2man-indent\\n[rst2man-indent-level]]
+-
+\\n[rst2man-indent0]
+\\n[rst2man-indent1]
+\\n[rst2man-indent2]
+..
+.de1 INDENT
+.\" .rstReportMargin pre:
+. RS \\$1
+. nr rst2man-indent\\n[rst2man-indent-level] \\n[an-margin]
+. nr rst2man-indent-level +1
+.\" .rstReportMargin post:
+..
+.de UNINDENT
+. RE
+.\" indent \\n[an-margin]
+.\" old: \\n[rst2man-indent\\n[rst2man-indent-level]]
+.nr rst2man-indent-level -1
+.\" new: \\n[rst2man-indent\\n[rst2man-indent-level]]
+.in \\n[rst2man-indent\\n[rst2man-indent-level]]u
+..
+.SH SYNOPSIS
+.INDENT 0.0
+.INDENT 3.5
+\fBksnoop\fP [\fIOPTIONS\fP] { \fICOMMAND\fP \fIFUNC\fP | \fBhelp\fP }
+.sp
+\fIOPTIONS\fP := { { \fB\-V\fP | \fB\-\-version\fP } | { \fB\-h\fP | \fB\-\-help\fP }
+| { [\fB\-P\fP | \fB\-\-pages\fP] nr_pages} | { [\fB\-p\fP | \fB\-\-pid\fP] pid} |
+[{ \fB\-s\fP | \fB\-\-stack\fP }] | [{ \fB\-d\fP | \fB\-\-debug\fP }] }
+.sp
+\fICOMMAND\fP := { \fBtrace\fP | \fBinfo\fP }
+.sp
+\fIFUNC\fP := { \fBname\fP | \fBname\fP(\fBarg\fP[,**arg]) }
+.UNINDENT
+.UNINDENT
+.SH DESCRIPTION
+.INDENT 0.0
+.INDENT 3.5
+\fIksnoop\fP allows for inspection of arguments and return values
+associated with function entry/return.
+.INDENT 0.0
+.TP
+.B \fBksnoop info\fP \fIFUNC\fP
+Show function description, arguments and return value types.
+.TP
+.B \fBksnoop trace\fP \fIFUNC\fP [\fIFUNC\fP]
+Trace function entry and return, showing arguments and
+return values. A function name can simply be specified,
+or a function name along with named arguments, return values.
+\fBreturn\fP is used to specify the return value.
+.UNINDENT
+.sp
+\fIksnoop\fP requires the kernel to provide BTF for itself, and if
+tracing of module data is required, module BTF must be present also.
+Check /sys/kernel/btf to see if BTF is present.
+.sp
+\fBksnoop\fP requires \fICAP_BPF\fP and \fICAP_TRACING\fP capabilities.
+.UNINDENT
+.UNINDENT
+.SH OPTIONS
+.INDENT 0.0
+.INDENT 3.5
+.INDENT 0.0
+.TP
+.B \-h\fP,\fB \-\-help
+Show help information
+.TP
+.B \-V\fP,\fB \-\-version
+Show version.
+.TP
+.B \-d\fP,\fB \-\-debug
+Show debug output.
+.TP
+.B \-p\fP,\fB \-\-pid
+Filter events by pid.
+.TP
+.B \-P\fP,\fB \-\-pages
+Specify number of pages used per\-CPU for perf event
+collection. Default is 8.
+.TP
+.B \-s\fP,\fB \-\-stack
+Specified set of functions are traced if and only
+if they are encountered in the order specified.
+.UNINDENT
+.UNINDENT
+.UNINDENT
+.SH EXAMPLES
+.sp
+\fB# ksnoop info ip_send_skb\fP
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+int ip_send_skb(struct net * net, struct sk_buff * skb);
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.sp
+Show function description.
+.sp
+\fB# ksnoop trace ip_send_skb\fP
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+ TIME CPU PID FUNCTION/ARGS
+78101668506811 1 2813 ip_send_skb(
+ net = *(0xffffffffb5959840)
+ (struct net){
+ .passive = (refcount_t){
+ .refs = (atomic_t){
+ .counter = (int)0x2,
+ },
+ },
+ .dev_base_seq = (unsigned int)0x18,
+ .ifindex = (int)0xf,
+ .list = (struct list_head){
+ .next = (struct list_head *)0xffff9895440dc120,
+ .prev = (struct list_head *)0xffffffffb595a8d0,
+ },
+ ...
+
+79561322965250 1 2813 ip_send_skb(
+ return =
+ (int)0x0
+ );
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.sp
+Show entry/return for ip_send_skb() with arguments, return values.
+.sp
+\fB# ksnoop trace "ip_send_skb(skb)"\fP
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+ TIME CPU PID FUNCTION/ARGS
+78142420834537 1 2813 ip_send_skb(
+ skb = *(0xffff989750797c00)
+ (struct sk_buff){
+ (union){
+ .sk = (struct sock *)0xffff98966ce19200,
+ .ip_defrag_offset = (int)0x6ce19200,
+ },
+ (union){
+ (struct){
+ ._skb_refdst = (long unsigned int)0xffff98981dde2d80,
+ .destructor = (void (*)(struct sk_buff *))0xffffffffb3e1beb0,
+ },
+ ...
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.sp
+Show entry argument \fBskb\fP\&.
+.sp
+\fB# ksnoop trace "ip_send_skb(return)"\fP
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+ TIME CPU PID FUNCTION/ARGS
+78178228354796 1 2813 ip_send_skb(
+ return =
+ (int)0x0
+ );
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.sp
+Show return value from ip_send_skb().
+.sp
+\fB# ksnoop trace "ip_send_skb(skb\->sk)"\fP
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+ TIME CPU PID FUNCTION/ARGS
+78207649138829 2 2813 ip_send_skb(
+ skb\->sk = *(0xffff98966ce19200)
+ (struct sock){
+ .__sk_common = (struct sock_common){
+ (union){
+ .skc_addrpair = (__addrpair)0x1701a8c017d38f8d,
+ (struct){
+ .skc_daddr = (__be32)0x17d38f8d,
+ .skc_rcv_saddr = (__be32)0x1701a8c0,
+ },
+ },
+ ...
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.sp
+Trace member information associated with argument. Only one level of
+membership is supported.
+.sp
+\fB# ksnoop \-p 2813 "ip_rcv(dev)"\fP
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+ TIME CPU PID FUNCTION/ARGS
+78254803164920 1 2813 ip_rcv(
+ dev = *(0xffff9895414cb000)
+ (struct net_device){
+ .name = (char[16])[
+ \(aql\(aq,
+ \(aqo\(aq,
+ ],
+ .name_node = (struct netdev_name_node *)0xffff989541515ec0,
+ .state = (long unsigned int)0x3,
+ ...
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.sp
+Trace \fBdev\fP argument of \fBip_rcv()\fP\&. Specify process id 2813 for events
+for that process only.
+.sp
+\fB# ksnoop \-s tcp_sendmsg __tcp_transmit_skb ip_output\fP
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+ TIME CPU PID FUNCTION/ARGS
+71827770952903 1 4777 __tcp_transmit_skb(
+ sk = *(0xffff9852460a2300)
+ (struct sock){
+ .__sk_common = (struct sock_common){
+ (union){
+ .skc_addrpair = (__addrpair)0x61b2af0a35cbfe0a,
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.sp
+Trace entry/return of tcp_sendmsg, __tcp_transmit_skb and ip_output when
+tcp_sendmsg leads to a call to __tcp_transmit_skb and that in turn
+leads to a call to ip_output; i.e. with a call graph matching the order
+specified. The order does not have to be direct calls, i.e. function A
+can call another function that calls function B.
+.sp
+\fB# ksnoop "ip_send_skb(skb\->len > 100, skb)"\fP
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+ TIME CPU PID FUNCTION/ARGS
+39267395709745 1 2955 ip_send_skb(
+ skb\->len =
+ (unsigned int)0x89,
+ skb = *(0xffff89c8be81e500)
+ (struct sk_buff){
+ (union){
+ .sk = (struct sock *)0xffff89c6c59e5580,
+ .ip_defrag_offset = (int)0xc59e5580,
+ },
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.sp
+Trace ip_send_skb() skbs which have len > 100.
+.SH SEE ALSO
+.INDENT 0.0
+.INDENT 3.5
+\fBbpf\fP(2),
+.UNINDENT
+.UNINDENT
+.\" Generated by docutils manpage writer.
+.