/hardirqs
/llcstat
/numamove
+/offcputime
/opensnoop
/readahead
/runqlat
hardirqs \
llcstat \
numamove \
+ offcputime \
opensnoop \
readahead \
runqlat \
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2021 Wenbo Zhang
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_core_read.h>
+#include <bpf/bpf_tracing.h>
+#include "offcputime.h"
+
+#define PF_KTHREAD 0x00200000 /* I am a kernel thread */
+#define MAX_ENTRIES 10240
+
+const volatile bool kernel_threads_only = false;
+const volatile bool user_threads_only = false;
+const volatile __u64 max_block_ns = -1;
+const volatile __u64 min_block_ns = 1;
+const volatile pid_t targ_tgid = -1;
+const volatile pid_t targ_pid = -1;
+const volatile long state = -1;
+
+struct internal_key {
+ u64 start_ts;
+ struct key_t key;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __type(key, u32);
+ __type(value, struct internal_key);
+ __uint(max_entries, MAX_ENTRIES);
+} start SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_STACK_TRACE);
+ __uint(key_size, sizeof(u32));
+} stackmap SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __type(key, struct key_t);
+ __type(value, struct val_t);
+ __uint(max_entries, MAX_ENTRIES);
+} info SEC(".maps");
+
+static bool allow_record(struct task_struct *t)
+{
+ if (targ_tgid != -1 && targ_tgid != t->tgid)
+ return false;
+ if (targ_pid != -1 && targ_pid != t->pid)
+ return false;
+ if (user_threads_only && t->flags & PF_KTHREAD)
+ return false;
+ else if (kernel_threads_only && !(t->flags & PF_KTHREAD))
+ return false;
+ if (state != -1 && t->state != state)
+ return false;
+ return true;
+}
+
+SEC("tp_btf/sched_switch")
+int BPF_PROG(sched_switch, bool preempt, struct task_struct *prev,
+ struct task_struct *next)
+{
+ struct internal_key *i_keyp, i_key;
+ struct val_t *valp, val;
+ s64 delta;
+ u32 pid;
+
+ if (allow_record(prev)) {
+ pid = prev->pid;
+ /* To distinguish idle threads of different cores */
+ if (!pid)
+ pid = bpf_get_smp_processor_id();
+ i_key.key.pid = pid;
+ i_key.key.tgid = prev->tgid;
+ i_key.start_ts = bpf_ktime_get_ns();
+
+ if (prev->flags & PF_KTHREAD)
+ i_key.key.user_stack_id = -1;
+ else
+ i_key.key.user_stack_id =
+ bpf_get_stackid(ctx, &stackmap,
+ BPF_F_USER_STACK);
+ i_key.key.kern_stack_id = bpf_get_stackid(ctx, &stackmap, 0);
+ bpf_map_update_elem(&start, &pid, &i_key, 0);
+ bpf_probe_read_str(&val.comm, sizeof(prev->comm), prev->comm);
+ val.delta = 0;
+ bpf_map_update_elem(&info, &i_key.key, &val, BPF_NOEXIST);
+ }
+
+ pid = next->pid;
+ i_keyp = bpf_map_lookup_elem(&start, &pid);
+ if (!i_keyp)
+ return 0;
+ delta = (s64)(bpf_ktime_get_ns() - i_keyp->start_ts);
+ if (delta < 0)
+ goto cleanup;
+ delta /= 1000U;
+ if (delta < min_block_ns || delta > max_block_ns)
+ goto cleanup;
+ valp = bpf_map_lookup_elem(&info, &i_keyp->key);
+ if (!valp)
+ goto cleanup;
+ __sync_fetch_and_add(&valp->delta, delta);
+
+cleanup:
+ bpf_map_delete_elem(&start, &pid);
+ return 0;
+}
+
+char LICENSE[] SEC("license") = "GPL";
--- /dev/null
+// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+// Copyright (c) 2021 Wenbo Zhang
+//
+// Based on offcputime(8) from BCC by Brendan Gregg.
+// 19-Mar-2021 Wenbo Zhang Created this.
+#include <argp.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <time.h>
+#include <bpf/libbpf.h>
+#include <bpf/bpf.h>
+#include "offcputime.h"
+#include "offcputime.skel.h"
+#include "trace_helpers.h"
+
+static struct env {
+ pid_t pid;
+ pid_t tid;
+ bool user_threads_only;
+ bool kernel_threads_only;
+ int stack_storage_size;
+ int perf_max_stack_depth;
+ __u64 min_block_time;
+ __u64 max_block_time;
+ long state;
+ int duration;
+ bool verbose;
+} env = {
+ .pid = -1,
+ .tid = -1,
+ .stack_storage_size = 1024,
+ .perf_max_stack_depth = 127,
+ .min_block_time = 1,
+ .max_block_time = -1,
+ .state = -1,
+ .duration = 99999999,
+};
+
+static volatile bool exiting;
+
+const char *argp_program_version = "offcputime 0.1";
+const char *argp_program_bug_address =
+ "https://github.com/iovisor/bcc/tree/master/libbpf-tools";
+const char argp_program_doc[] =
+"Summarize off-CPU time by stack trace.\n"
+"\n"
+"USAGE: offcputime [--help] [-p PID | -u | -k] [-m MIN-BLOCK-TIME] "
+"[-M MAX-BLOCK-TIME] [--state] [--perf-max-stack-depth] [--stack-storage-size] "
+"[duration]\n"
+"EXAMPLES:\n"
+" offcputime # trace off-CPU stack time until Ctrl-C\n"
+" offcputime 5 # trace for 5 seconds only\n"
+" offcputime -m 1000 # trace only events that last more than 1000 usec\n"
+" offcputime -M 10000 # trace only events that last less than 10000 usec\n"
+" offcputime -p 185 # only trace threads for PID 185\n"
+" offcputime -t 188 # only trace thread 188\n"
+" offcputime -u # only trace user threads (no kernel)\n"
+" offcputime -k # only trace kernel threads (no user)\n";
+
+#define OPT_PERF_MAX_STACK_DEPTH 1 /* --pef-max-stack-depth */
+#define OPT_STACK_STORAGE_SIZE 2 /* --stack-storage-size */
+#define OPT_STATE 3 /* --state */
+
+static const struct argp_option opts[] = {
+ { "pid", 'p', "PID", 0, "Trace this PID only" },
+ { "tid", 't', "TID", 0, "Trace this TID only" },
+ { "user-threads-only", 'u', NULL, 0,
+ "User threads only (no kernel threads)" },
+ { "kernel-threads-only", 'k', NULL, 0,
+ "Kernel threads only (no user threads)" },
+ { "perf-max-stack-depth", OPT_PERF_MAX_STACK_DEPTH,
+ "PERF-MAX-STACK-DEPTH", 0, "the limit for both kernel and user stack traces (default 127)" },
+ { "stack-storage-size", OPT_STACK_STORAGE_SIZE, "STACK-STORAGE-SIZE", 0,
+ "the number of unique stack traces that can be stored and displayed (default 1024)" },
+ { "min-block-time", 'm', "MIN-BLOCK-TIME", 0,
+ "the amount of time in microseconds over which we store traces (default 1)" },
+ { "max-block-time", 'M', "MAX-BLOCK-TIME", 0,
+ "the amount of time in microseconds under which we store traces (default U64_MAX)" },
+ { "state", OPT_STATE, "STATE", 0, "filter on this thread state bitmask (eg, 2 == TASK_UNINTERRUPTIBLE) see include/linux/sched.h" },
+ { "verbose", 'v', NULL, 0, "Verbose debug output" },
+ {},
+};
+
+static error_t parse_arg(int key, char *arg, struct argp_state *state)
+{
+ static int pos_args;
+
+ switch (key) {
+ case 'v':
+ env.verbose = true;
+ break;
+ case 'p':
+ errno = 0;
+ env.pid = strtol(arg, NULL, 10);
+ if (errno) {
+ fprintf(stderr, "invalid PID: %s\n", arg);
+ argp_usage(state);
+ }
+ break;
+ case 't':
+ errno = 0;
+ env.tid = strtol(arg, NULL, 10);
+ if (errno || env.tid <= 0) {
+ fprintf(stderr, "Invalid TID: %s\n", arg);
+ argp_usage(state);
+ }
+ break;
+ case 'u':
+ env.user_threads_only = true;
+ break;
+ case 'k':
+ env.kernel_threads_only = true;
+ break;
+ case OPT_PERF_MAX_STACK_DEPTH:
+ errno = 0;
+ env.perf_max_stack_depth = strtol(arg, NULL, 10);
+ if (errno) {
+ fprintf(stderr, "invalid perf max stack depth: %s\n", arg);
+ argp_usage(state);
+ }
+ break;
+ case OPT_STACK_STORAGE_SIZE:
+ errno = 0;
+ env.stack_storage_size = strtol(arg, NULL, 10);
+ if (errno) {
+ fprintf(stderr, "invalid stack storage size: %s\n", arg);
+ argp_usage(state);
+ }
+ break;
+ case 'm':
+ errno = 0;
+ env.min_block_time = strtoll(arg, NULL, 10);
+ if (errno) {
+ fprintf(stderr, "Invalid min block time (in us): %s\n", arg);
+ argp_usage(state);
+ }
+ break;
+ case 'M':
+ errno = 0;
+ env.max_block_time = strtoll(arg, NULL, 10);
+ if (errno) {
+ fprintf(stderr, "Invalid min block time (in us): %s\n", arg);
+ argp_usage(state);
+ }
+ break;
+ case OPT_STATE:
+ errno = 0;
+ env.state = strtol(arg, NULL, 10);
+ if (errno || env.state < 0 || env.state > 2) {
+ fprintf(stderr, "Invalid task state: %s\n", arg);
+ argp_usage(state);
+ }
+ break;
+ case ARGP_KEY_ARG:
+ if (pos_args++) {
+ fprintf(stderr,
+ "Unrecognized positional argument: %s\n", arg);
+ argp_usage(state);
+ }
+ errno = 0;
+ env.duration = strtol(arg, NULL, 10);
+ if (errno || env.duration <= 0) {
+ fprintf(stderr, "Invalid duration (in s): %s\n", arg);
+ argp_usage(state);
+ }
+ break;
+ default:
+ return ARGP_ERR_UNKNOWN;
+ }
+ return 0;
+}
+
+int libbpf_print_fn(enum libbpf_print_level level,
+ const char *format, va_list args)
+{
+ if (level == LIBBPF_DEBUG && !env.verbose)
+ return 0;
+ return vfprintf(stderr, format, args);
+}
+
+static void sig_handler(int sig)
+{
+}
+
+static void print_map(struct ksyms *ksyms, struct syms_cache *syms_cache,
+ struct offcputime_bpf *obj)
+{
+ struct key_t lookup_key = {}, next_key;
+ const struct ksym *ksym;
+ const struct syms *syms;
+ const struct sym *sym;
+ int err, i, ifd, sfd;
+ unsigned long *ip;
+ struct val_t val;
+
+ ip = calloc(env.perf_max_stack_depth, sizeof(*ip));
+ if (!ip) {
+ fprintf(stderr, "failed to alloc ip\n");
+ return;
+ }
+
+ ifd = bpf_map__fd(obj->maps.info);
+ sfd = bpf_map__fd(obj->maps.stackmap);
+ while (!bpf_map_get_next_key(ifd, &lookup_key, &next_key)) {
+ err = bpf_map_lookup_elem(ifd, &next_key, &val);
+ if (err < 0) {
+ fprintf(stderr, "failed to lookup info: %d\n", err);
+ goto cleanup;
+ }
+ lookup_key = next_key;
+ if (val.delta == 0)
+ continue;
+ if (bpf_map_lookup_elem(sfd, &next_key.kern_stack_id, ip) != 0) {
+ fprintf(stderr, " [Missed Kernel Stack]\n");
+ goto print_ustack;
+ }
+ for (i = 0; i < env.perf_max_stack_depth && ip[i]; i++) {
+ ksym = ksyms__map_addr(ksyms, ip[i]);
+ printf(" %s\n", ksym ? ksym->name : "Unknown");
+ }
+
+print_ustack:
+ if (next_key.user_stack_id == -1)
+ goto skip_ustack;
+
+ if (bpf_map_lookup_elem(sfd, &next_key.user_stack_id, ip) != 0) {
+ fprintf(stderr, " [Missed User Stack]\n");
+ continue;
+ }
+
+ syms = syms_cache__get_syms(syms_cache, next_key.tgid);
+ if (!syms) {
+ fprintf(stderr, "failed to get syms\n");
+ goto skip_ustack;
+ }
+ for (i = 0; i < env.perf_max_stack_depth && ip[i]; i++) {
+ sym = syms__map_addr(syms, ip[i]);
+ if (sym)
+ printf(" %s\n", sym->name);
+ else
+ printf(" [unknown]\n");
+ }
+
+skip_ustack:
+ printf(" %-16s %s (%d)\n", "-", val.comm, next_key.pid);
+ printf(" %lld\n\n", val.delta);
+ }
+
+cleanup:
+ free(ip);
+}
+
+int main(int argc, char **argv)
+{
+ static const struct argp argp = {
+ .options = opts,
+ .parser = parse_arg,
+ .doc = argp_program_doc,
+ };
+ struct syms_cache *syms_cache = NULL;
+ struct ksyms *ksyms = NULL;
+ struct offcputime_bpf *obj;
+ int err;
+
+ err = argp_parse(&argp, argc, argv, 0, NULL, NULL);
+ if (err)
+ return err;
+ if (env.user_threads_only && env.kernel_threads_only) {
+ fprintf(stderr, "user_threads_only, kernel_threads_only cann't be used together.\n");
+ return 1;
+ }
+ if (env.min_block_time >= env.max_block_time) {
+ fprintf(stderr, "min_block_time should smaller than max_block_time\n");
+ return 1;
+ }
+
+ libbpf_set_print(libbpf_print_fn);
+
+ err = bump_memlock_rlimit();
+ if (err) {
+ fprintf(stderr, "failed to increase rlimit: %d\n", err);
+ return 1;
+ }
+
+ obj = offcputime_bpf__open();
+ if (!obj) {
+ fprintf(stderr, "failed to open BPF object\n");
+ return 1;
+ }
+
+ /* initialize global data (filtering options) */
+ obj->rodata->targ_tgid = env.pid;
+ obj->rodata->targ_pid = env.tid;
+ obj->rodata->user_threads_only = env.user_threads_only;
+ obj->rodata->kernel_threads_only = env.kernel_threads_only;
+ obj->rodata->state = env.state;
+ obj->rodata->min_block_ns = env.min_block_time;
+ obj->rodata->max_block_ns = env.max_block_time;
+
+ bpf_map__set_value_size(obj->maps.stackmap,
+ env.perf_max_stack_depth * sizeof(unsigned long));
+ bpf_map__set_max_entries(obj->maps.stackmap, env.stack_storage_size);
+
+ err = offcputime_bpf__load(obj);
+ if (err) {
+ fprintf(stderr, "failed to load BPF programs\n");
+ goto cleanup;
+ }
+ ksyms = ksyms__load();
+ if (!ksyms) {
+ fprintf(stderr, "failed to load kallsyms\n");
+ goto cleanup;
+ }
+ syms_cache = syms_cache__new(0);
+ if (!syms_cache) {
+ fprintf(stderr, "failed to create syms_cache\n");
+ goto cleanup;
+ }
+ err = offcputime_bpf__attach(obj);
+ if (err) {
+ fprintf(stderr, "failed to attach BPF programs\n");
+ goto cleanup;
+ }
+
+ signal(SIGINT, sig_handler);
+
+ /*
+ * We'll get sleep interrupted when someone presses Ctrl-C (which will
+ * be "handled" with noop by sig_handler).
+ */
+ sleep(env.duration);
+
+ print_map(ksyms, syms_cache, obj);
+
+cleanup:
+ offcputime_bpf__destroy(obj);
+ syms_cache__free(syms_cache);
+ ksyms__free(ksyms);
+ return err != 0;
+}
--- /dev/null
+/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
+#ifndef __OFFCPUTIME_H
+#define __OFFCPUTIME_H
+
+#define TASK_COMM_LEN 16
+
+struct key_t {
+ __u32 pid;
+ __u32 tgid;
+ int user_stack_id;
+ int kern_stack_id;
+};
+
+struct val_t {
+ __u64 delta;
+ char comm[TASK_COMM_LEN];
+};
+
+#endif /* __OFFCPUTIME_H */
//
// Based on ksyms improvements from Andrii Nakryiko, add more helpers.
// 28-Feb-2020 Wenbo Zhang Created this.
+#define _GNU_SOURCE
+#include <ctype.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
+#include <errno.h>
+#include <fcntl.h>
#include <sys/resource.h>
#include <time.h>
#include <bpf/btf.h>
#include <bpf/libbpf.h>
+#include <limits.h>
#include "trace_helpers.h"
+#include "uprobe_helpers.h"
-#define min(x, y) ({ \
- typeof(x) _min1 = (x); \
- typeof(y) _min2 = (y); \
- (void) (&_min1 == &_min2); \
+#define min(x, y) ({ \
+ typeof(x) _min1 = (x); \
+ typeof(y) _min2 = (y); \
+ (void) (&_min1 == &_min2); \
_min1 < _min2 ? _min1 : _min2; })
#define DISK_NAME_LEN 32
return NULL;
}
+struct load_range {
+ uint64_t start;
+ uint64_t end;
+ uint64_t file_off;
+};
+
+enum elf_type {
+ EXEC,
+ DYN,
+ PERF_MAP,
+ VDSO,
+ UNKNOWN,
+};
+
+struct dso {
+ char *name;
+ struct load_range *ranges;
+ int range_sz;
+ /* Dyn's first text section virtual addr at execution */
+ uint64_t sh_addr;
+ /* Dyn's first text section file offset */
+ uint64_t sh_offset;
+ enum elf_type type;
+
+ struct sym *syms;
+ int syms_sz;
+ int syms_cap;
+
+ /*
+ * libbpf's struct btf is actually a pretty efficient
+ * "set of strings" data structure, so we create an
+ * empty one and use it to store symbol names.
+ */
+ struct btf *btf;
+};
+
+struct map {
+ uint64_t start_addr;
+ uint64_t end_addr;
+ uint64_t file_off;
+ uint64_t dev_major;
+ uint64_t dev_minor;
+ uint64_t inode;
+};
+
+struct syms {
+ struct dso *dsos;
+ int dso_sz;
+};
+
+static bool is_file_backed(const char *mapname)
+{
+#define STARTS_WITH(mapname, prefix) \
+ (!strncmp(mapname, prefix, sizeof(prefix) - 1))
+
+ return mapname[0] && !(
+ STARTS_WITH(mapname, "//anon") ||
+ STARTS_WITH(mapname, "/dev/zero") ||
+ STARTS_WITH(mapname, "/anon_hugepage") ||
+ STARTS_WITH(mapname, "[stack") ||
+ STARTS_WITH(mapname, "/SYSV") ||
+ STARTS_WITH(mapname, "[heap]") ||
+ STARTS_WITH(mapname, "[vsyscall]"));
+}
+
+static bool is_perf_map(const char *path)
+{
+ return false;
+}
+
+static bool is_vdso(const char *path)
+{
+ return !strcmp(path, "[vdso]");
+}
+
+static int get_elf_type(const char *path)
+{
+ GElf_Ehdr hdr;
+ void *res;
+ Elf *e;
+ int fd;
+
+ if (is_vdso(path))
+ return -1;
+ e = open_elf(path, &fd);
+ if (!e)
+ return -1;
+ res = gelf_getehdr(e, &hdr);
+ close_elf(e, fd);
+ if (!res)
+ return -1;
+ return hdr.e_type;
+}
+
+static int get_elf_text_scn_info(const char *path, uint64_t *addr,
+ uint64_t *offset)
+{
+ Elf_Scn *section = NULL;
+ int fd = -1, err = -1;
+ GElf_Shdr header;
+ size_t stridx;
+ Elf *e = NULL;
+ char *name;
+
+ e = open_elf(path, &fd);
+ if (!e)
+ goto err_out;
+ err = elf_getshdrstrndx(e, &stridx);
+ if (err < 0)
+ goto err_out;
+
+ err = -1;
+ while ((section = elf_nextscn(e, section)) != 0) {
+ if (!gelf_getshdr(section, &header))
+ continue;
+
+ name = elf_strptr(e, stridx, header.sh_name);
+ if (name && !strcmp(name, ".text")) {
+ *addr = (uint64_t)header.sh_addr;
+ *offset = (uint64_t)header.sh_offset;
+ err = 0;
+ break;
+ }
+ }
+
+err_out:
+ close_elf(e, fd);
+ return err;
+}
+
+static int syms__add_dso(struct syms *syms, struct map *map, const char *name)
+{
+ struct dso *dso = NULL;
+ int i, type;
+ void *tmp;
+
+ for (i = 0; i < syms->dso_sz; i++) {
+ if (!strcmp(syms->dsos[i].name, name)) {
+ dso = &syms->dsos[i];
+ break;
+ }
+ }
+
+ if (!dso) {
+ tmp = realloc(syms->dsos, (syms->dso_sz + 1) *
+ sizeof(*syms->dsos));
+ if (!tmp)
+ return -1;
+ syms->dsos = tmp;
+ dso = &syms->dsos[syms->dso_sz++];
+ memset(dso, 0, sizeof(*dso));
+ dso->name = strdup(name);
+ dso->btf = btf__new_empty();
+ }
+
+ tmp = realloc(dso->ranges, (dso->range_sz + 1) * sizeof(*dso->ranges));
+ if (!tmp)
+ return -1;
+ dso->ranges = tmp;
+ dso->ranges[dso->range_sz].start = map->start_addr;
+ dso->ranges[dso->range_sz].end = map->end_addr;
+ dso->ranges[dso->range_sz].file_off = map->file_off;
+ dso->range_sz++;
+ type = get_elf_type(name);
+ if (type == ET_EXEC) {
+ dso->type = EXEC;
+ } else if (type == ET_DYN) {
+ dso->type = DYN;
+ if (get_elf_text_scn_info(name, &dso->sh_addr, &dso->sh_offset) < 0)
+ return -1;
+ } else if (is_perf_map(name)) {
+ dso->type = PERF_MAP;
+ } else if (is_vdso(name)) {
+ dso->type = VDSO;
+ } else {
+ dso->type = UNKNOWN;
+ }
+ return 0;
+}
+
+static struct dso *syms__find_dso(const struct syms *syms, unsigned long addr,
+ uint64_t *offset)
+{
+ struct load_range *range;
+ struct dso *dso;
+ int i, j;
+
+ for (i = 0; i < syms->dso_sz; i++) {
+ dso = &syms->dsos[i];
+ for (j = 0; j < dso->range_sz; j++) {
+ range = &dso->ranges[j];
+ if (addr <= range->start || addr >= range->end)
+ continue;
+ if (dso->type == DYN || dso->type == VDSO) {
+ /* Offset within the mmap */
+ *offset = addr - range->start + range->file_off;
+ /* Offset within the ELF for dyn symbol lookup */
+ *offset += dso->sh_addr - dso->sh_offset;
+ } else {
+ *offset = addr;
+ }
+
+ return dso;
+ }
+ }
+
+ return NULL;
+}
+
+static int dso__load_sym_table_from_perf_map(struct dso *dso)
+{
+ return -1;
+}
+
+static int dso__add_sym(struct dso *dso, const char *name, uint64_t start,
+ uint64_t size)
+{
+ struct sym *sym;
+ size_t new_cap;
+ void *tmp;
+ int off;
+
+ off = btf__add_str(dso->btf, name);
+ if (off < 0)
+ return off;
+
+ if (dso->syms_sz + 1 > dso->syms_cap) {
+ new_cap = dso->syms_cap * 4 / 3;
+ if (new_cap < 1024)
+ new_cap = 1024;
+ tmp = realloc(dso->syms, sizeof(*dso->syms) * new_cap);
+ if (!tmp)
+ return -1;
+ dso->syms = tmp;
+ dso->syms_cap = new_cap;
+ }
+
+ sym = &dso->syms[dso->syms_sz++];
+ /* while constructing, re-use pointer as just a plain offset */
+ sym->name = (void*)(unsigned long)off;
+ sym->start = start;
+ sym->size = size;
+
+ return 0;
+}
+
+static int sym_cmp(const void *p1, const void *p2)
+{
+ const struct sym *s1 = p1, *s2 = p2;
+
+ if (s1->start == s2->start)
+ return strcmp(s1->name, s2->name);
+ return s1->start < s2->start ? -1 : 1;
+}
+
+static int dso__add_syms(struct dso *dso, Elf *e, Elf_Scn *section,
+ size_t stridx, size_t symsize)
+{
+ Elf_Data *data = NULL;
+
+ while ((data = elf_getdata(section, data)) != 0) {
+ size_t i, symcount = data->d_size / symsize;
+
+ if (data->d_size % symsize)
+ return -1;
+
+ for (i = 0; i < symcount; ++i) {
+ const char *name;
+ GElf_Sym sym;
+
+ if (!gelf_getsym(data, (int)i, &sym))
+ continue;
+ if (!(name = elf_strptr(e, stridx, sym.st_name)))
+ continue;
+ if (name[0] == '\0')
+ continue;
+
+ if (sym.st_value == 0)
+ continue;
+
+ if (dso__add_sym(dso, name, sym.st_value, sym.st_size))
+ goto err_out;
+ }
+ }
+
+ return 0;
+
+err_out:
+ return -1;
+}
+
+static void dso__free_fields(struct dso *dso)
+{
+ if (!dso)
+ return;
+
+ free(dso->name);
+ free(dso->ranges);
+ free(dso->syms);
+ btf__free(dso->btf);
+}
+
+static int dso__load_sym_table_from_elf(struct dso *dso, int fd)
+{
+ Elf_Scn *section = NULL;
+ Elf *e;
+ int i;
+
+ e = fd > 0 ? open_elf_by_fd(fd) : open_elf(dso->name, &fd);
+ if (!e)
+ return -1;
+
+ while ((section = elf_nextscn(e, section)) != 0) {
+ GElf_Shdr header;
+
+ if (!gelf_getshdr(section, &header))
+ continue;
+
+ if (header.sh_type != SHT_SYMTAB &&
+ header.sh_type != SHT_DYNSYM)
+ continue;
+
+ if (dso__add_syms(dso, e, section, header.sh_link,
+ header.sh_entsize))
+ goto err_out;
+ }
+
+ /* now when strings are finalized, adjust pointers properly */
+ for (i = 0; i < dso->syms_sz; i++)
+ dso->syms[i].name =
+ btf__name_by_offset(dso->btf,
+ (unsigned long)dso->syms[i].name);
+
+ qsort(dso->syms, dso->syms_sz, sizeof(*dso->syms), sym_cmp);
+
+ close_elf(e, fd);
+ return 0;
+
+err_out:
+ dso__free_fields(dso);
+ close_elf(e, fd);
+ return -1;
+}
+
+static int create_tmp_vdso_image(struct dso *dso)
+{
+ uint64_t start_addr, end_addr;
+ long pid = getpid();
+ char buf[PATH_MAX];
+ void *image = NULL;
+ char tmpfile[128];
+ int ret, fd = -1;
+ uint64_t sz;
+ char *name;
+ FILE *f;
+
+ snprintf(tmpfile, sizeof(tmpfile), "/proc/%ld/maps", pid);
+ f = fopen(tmpfile, "r");
+ if (!f)
+ return -1;
+
+ while (true) {
+ ret = fscanf(f, "%lx-%lx %*s %*x %*x:%*x %*u%[^\n]",
+ &start_addr, &end_addr, buf);
+ if (ret == EOF && feof(f))
+ break;
+ if (ret != 3)
+ goto err_out;
+
+ name = buf;
+ while (isspace(*name))
+ name++;
+ if (!is_file_backed(name))
+ continue;
+ if (is_vdso(name))
+ break;
+ }
+
+ sz = end_addr - start_addr;
+ image = malloc(sz);
+ if (!image)
+ goto err_out;
+ memcpy(image, (void *)start_addr, sz);
+
+ snprintf(tmpfile, sizeof(tmpfile),
+ "/tmp/libbpf_%ld_vdso_image_XXXXXX", pid);
+ fd = mkostemp(tmpfile, O_CLOEXEC);
+ if (fd < 0) {
+ fprintf(stderr, "failed to create temp file: %s\n",
+ strerror(errno));
+ goto err_out;
+ }
+ /* Unlink the file to avoid leaking */
+ if (unlink(tmpfile) == -1)
+ fprintf(stderr, "failed to unlink %s: %s\n", tmpfile,
+ strerror(errno));
+ if (write(fd, image, sz) == -1) {
+ fprintf(stderr, "failed to write to vDSO image: %s\n",
+ strerror(errno));
+ close(fd);
+ fd = -1;
+ goto err_out;
+ }
+
+err_out:
+ fclose(f);
+ free(image);
+ return fd;
+}
+
+static int dso__load_sym_table_from_vdso_image(struct dso *dso)
+{
+ int fd = create_tmp_vdso_image(dso);
+
+ if (fd < 0)
+ return -1;
+ return dso__load_sym_table_from_elf(dso, fd);
+}
+
+static int dso__load_sym_table(struct dso *dso)
+{
+ if (dso->type == UNKNOWN)
+ return -1;
+ if (dso->type == PERF_MAP)
+ return dso__load_sym_table_from_perf_map(dso);
+ if (dso->type == EXEC || dso->type == DYN)
+ return dso__load_sym_table_from_elf(dso, 0);
+ if (dso->type == VDSO)
+ return dso__load_sym_table_from_vdso_image(dso);
+ return -1;
+}
+
+static struct sym *dso__find_sym(struct dso *dso, uint64_t offset)
+{
+ unsigned long sym_addr;
+ int start, end, mid;
+
+ if (!dso->syms && dso__load_sym_table(dso))
+ return NULL;
+
+ start = 0;
+ end = dso->syms_sz - 1;
+
+ /* find largest sym_addr <= addr using binary search */
+ while (start < end) {
+ mid = start + (end - start + 1) / 2;
+ sym_addr = dso->syms[mid].start;
+
+ if (sym_addr <= offset)
+ start = mid;
+ else
+ end = mid - 1;
+ }
+
+ if (start == end && dso->syms[start].start <= offset)
+ return &dso->syms[start];
+ return NULL;
+}
+
+struct syms *syms__load_file(const char *fname)
+{
+ char buf[PATH_MAX], perm[5];
+ struct syms *syms;
+ struct map map;
+ char *name;
+ FILE *f;
+ int ret;
+
+ f = fopen(fname, "r");
+ if (!f)
+ return NULL;
+
+ syms = calloc(1, sizeof(*syms));
+ if (!syms)
+ goto err_out;
+
+ while (true) {
+ ret = fscanf(f, "%lx-%lx %4s %lx %lx:%lx %lu%[^\n]",
+ &map.start_addr, &map.end_addr, perm,
+ &map.file_off, &map.dev_major,
+ &map.dev_minor, &map.inode, buf);
+ if (ret == EOF && feof(f))
+ break;
+ if (ret != 8) /* perf-<PID>.map */
+ goto err_out;
+
+ if (perm[2] != 'x')
+ continue;
+
+ name = buf;
+ while (isspace(*name))
+ name++;
+ if (!is_file_backed(name))
+ continue;
+
+ if (syms__add_dso(syms, &map, name))
+ goto err_out;
+ }
+
+ fclose(f);
+ return syms;
+
+err_out:
+ syms__free(syms);
+ fclose(f);
+ return NULL;
+}
+
+struct syms *syms__load_pid(pid_t tgid)
+{
+ char fname[128];
+
+ snprintf(fname, sizeof(fname), "/proc/%ld/maps", (long)tgid);
+ return syms__load_file(fname);
+}
+
+void syms__free(struct syms *syms)
+{
+ int i;
+
+ if (!syms)
+ return;
+
+ for (i = 0; i < syms->dso_sz; i++)
+ dso__free_fields(&syms->dsos[i]);
+ free(syms->dsos);
+ free(syms);
+}
+
+const struct sym *syms__map_addr(const struct syms *syms, unsigned long addr)
+{
+ struct dso *dso;
+ uint64_t offset;
+
+ dso = syms__find_dso(syms, addr, &offset);
+ if (!dso)
+ return NULL;
+ return dso__find_sym(dso, offset);
+}
+
+struct syms_cache {
+ struct {
+ struct syms *syms;
+ int tgid;
+ } *data;
+ int nr;
+};
+
+struct syms_cache *syms_cache__new(int nr)
+{
+ struct syms_cache *syms_cache;
+
+ syms_cache = calloc(1, sizeof(*syms_cache));
+ if (!syms_cache)
+ return NULL;
+ if (nr > 0)
+ syms_cache->data = calloc(nr, sizeof(*syms_cache->data));
+ return syms_cache;
+}
+
+void syms_cache__free(struct syms_cache *syms_cache)
+{
+ int i;
+
+ if (!syms_cache)
+ return;
+
+ for (i = 0; i < syms_cache->nr; i++)
+ syms__free(syms_cache->data[i].syms);
+ free(syms_cache->data);
+ free(syms_cache);
+}
+
+struct syms *syms_cache__get_syms(struct syms_cache *syms_cache, int tgid)
+{
+ void *tmp;
+ int i;
+
+ for (i = 0; i < syms_cache->nr; i++) {
+ if (syms_cache->data[i].tgid == tgid)
+ return syms_cache->data[i].syms;
+ }
+
+ tmp = realloc(syms_cache->data, (syms_cache->nr + 1) *
+ sizeof(*syms_cache->data));
+ if (!tmp)
+ return NULL;
+ syms_cache->data = tmp;
+ syms_cache->data[syms_cache->nr].syms = syms__load_pid(tgid);
+ syms_cache->data[syms_cache->nr].tgid = tgid;
+ return syms_cache->data[syms_cache->nr++].syms;
+}
+
struct partitions {
struct partition *items;
int sz;
}
void print_linear_hist(unsigned int *vals, int vals_size, unsigned int base,
- unsigned int step, const char *val_type)
+ unsigned int step, const char *val_type)
{
int i, stars_max = 40, idx_min = -1, idx_max = -1;
unsigned int val, val_max = 0;
const struct ksym *ksyms__get_symbol(const struct ksyms *ksyms,
const char *name);
+struct sym {
+ const char *name;
+ unsigned long start;
+ unsigned long size;
+};
+
+struct syms;
+
+struct syms *syms__load_pid(int tgid);
+struct syms *syms__load_file(const char *fname);
+void syms__free(struct syms *syms);
+const struct sym *syms__map_addr(const struct syms *syms, unsigned long addr);
+
+struct syms_cache;
+
+struct syms_cache *syms_cache__new(int nr);
+struct syms *syms_cache__get_syms(struct syms_cache *syms_cache, int tgid);
+void syms_cache__free(struct syms_cache *syms_cache);
+
struct partition {
char *name;
unsigned int dev;
* Opens an elf at `path` of kind ELF_K_ELF. Returns NULL on failure. On
* success, close with close_elf(e, fd_close).
*/
-static Elf *open_elf(const char *path, int *fd_close)
+Elf *open_elf(const char *path, int *fd_close)
{
int fd;
Elf *e;
return e;
}
-static void close_elf(Elf *e, int fd_close)
+Elf *open_elf_by_fd(int fd)
+{
+ Elf *e;
+
+ if (elf_version(EV_CURRENT) == EV_NONE) {
+ warn("elf init failed\n");
+ return NULL;
+ }
+ e = elf_begin(fd, ELF_C_READ, NULL);
+ if (!e) {
+ warn("elf_begin failed: %s\n", elf_errmsg(-1));
+ close(fd);
+ return NULL;
+ }
+ if (elf_kind(e) != ELF_K_ELF) {
+ warn("elf kind %d is not ELF_K_ELF\n", elf_kind(e));
+ elf_end(e);
+ close(fd);
+ return NULL;
+ }
+ return e;
+}
+
+void close_elf(Elf *e, int fd_close)
{
elf_end(e);
close(fd_close);
#include <sys/types.h>
#include <unistd.h>
+#include <gelf.h>
int get_pid_binary_path(pid_t pid, char *path, size_t path_sz);
int get_pid_lib_path(pid_t pid, const char *lib, char *path, size_t path_sz);
int resolve_binary_path(const char *binary, pid_t pid, char *path, size_t path_sz);
off_t get_elf_func_offset(const char *path, const char *func);
+Elf *open_elf(const char *path, int *fd_close);
+Elf *open_elf_by_fd(int fd);
+void close_elf(Elf *e, int fd_close);
#endif /* __UPROBE_HELPERS_H */