--- /dev/null
+#!/usr/bin/env python
+# Copyright (c) PLUMgrid, Inc.
+# Licensed under the Apache License, Version 2.0 (the "License")
+
+# This is an example of tracing an event and printing custom fields.
+# run in project examples directory with:
+# sudo ./trace_fields.py"
+
+import atexit
+from bcc import BPF
+import ctypes
+
+counter = 0
+def cb(foo, data, size):
+ global counter
+ counter += 1
+
+prog = """
+BPF_PERF_ARRAY(events, 2);
+BPF_TABLE("array", int, u64, counters, 10);
+int kprobe__sys_write(void *ctx) {
+ struct {
+ u64 ts;
+ } data = {bpf_ktime_get_ns()};
+ if (events.perf_output(ctx, 0, &data, sizeof(data)) < 0)
+ bpf_trace_printk("perf_output failed\\n");
+ int zero = 0;
+ u64 *val = counters.lookup(&zero);
+ if (val) lock_xadd(val, 1);
+ return 0;
+}
+"""
+b = BPF(text=prog)
+b["events"].open_perf_buffer(0, cb, None)
+
+@atexit.register
+def print_counter():
+ global counter
+ global b
+ print("counter = %d vs %d" % (counter, b["counters"][ctypes.c_int(0)].value))
+
+while 1:
+ b.kprobe_poll()
__attribute__((section("maps/" _table_type))) \
struct _name##_table_t _name
+#define BPF_PERF_ARRAY(_name, _max_entries) \
+struct _name##_table_t { \
+ int key; \
+ u32 leaf; \
+ /* counter = map.perf_read(index) */ \
+ u64 (*perf_read) (int); \
+ /* map.perf_ouput(ctx, index, data, data_size) */ \
+ int (*perf_output) (void *, int, void *, u32); \
+ u32 data[_max_entries]; \
+}; \
+__attribute__((section("maps/perf_array"))) \
+struct _name##_table_t _name
+
#define BPF_HASH1(_name) \
BPF_TABLE("hash", u64, u64, _name, 10240)
#define BPF_HASH2(_name, _key_type) \
(void *) BPF_FUNC_skb_get_tunnel_key;
static int (*bpf_skb_set_tunnel_key)(void *ctx, void *from, u32 size, u64 flags) =
(void *) BPF_FUNC_skb_set_tunnel_key;
+static int (*bpf_perf_event_read)(void *map, u32 index) =
+ (void *) BPF_FUNC_perf_event_read;
+#endif
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,4,0)
+static int (*bpf_redirect)(int ifindex, u32 flags) =
+ (void *) BPF_FUNC_redirect;
+static u32 (*bpf_get_route_realm)(void *ctx) =
+ (void *) BPF_FUNC_get_route_realm;
+static int (*bpf_perf_event_output)(void *ctx, void *map, u32 index, void *data, u32 size) =
+ (void *) BPF_FUNC_perf_event_output;
#endif
/* llvm builtin functions that eBPF C program may use to
}
txt += "typeof(" + name + ".leaf) *_leaf = " + lookup + ", &_key); ";
txt += "if (_leaf) (*_leaf)++; })";
+ } else if (memb_name == "perf_output") {
+ string name = Ref->getDecl()->getName();
+ string arg0 = rewriter_.getRewrittenText(SourceRange(Call->getArg(0)->getLocStart(),
+ Call->getArg(0)->getLocEnd()));
+ string args_other = rewriter_.getRewrittenText(SourceRange(Call->getArg(1)->getLocStart(),
+ Call->getArg(3)->getLocEnd()));
+ txt = "bpf_perf_event_output(" + arg0 + ", bpf_pseudo_fd(1, " + fd + "), " + args_other + ")";
} else {
if (memb_name == "lookup") {
prefix = "bpf_map_lookup_elem";
} else if (memb_name == "call") {
prefix = "bpf_tail_call_";
suffix = ")";
+ } else if (memb_name == "perf_read") {
+ prefix = "bpf_perf_event_read";
+ suffix = ")";
} else {
C.getDiagnostics().Report(Call->getLocStart(), diag::err_expected)
<< "valid bpf_table operation";
}
const RecordDecl *RD = R->getDecl()->getDefinition();
+ int major = 0, minor = 0;
+ struct utsname un;
+ if (uname(&un) == 0) {
+ // release format: <major>.<minor>.<revision>[-<othertag>]
+ sscanf(un.release, "%d.%d.", &major, &minor);
+ }
+
TableDesc table;
table.name = Decl->getName();
diag_.Report(Decl->getLocStart(), diag_id) << table.leaf_desc;
}
} else if (A->getName() == "maps/prog") {
- struct utsname un;
- if (uname(&un) == 0) {
- int major = 0, minor = 0;
- // release format: <major>.<minor>.<revision>[-<othertag>]
- sscanf(un.release, "%d.%d.", &major, &minor);
- if (KERNEL_VERSION(major,minor,0) >= KERNEL_VERSION(4,2,0))
- map_type = BPF_MAP_TYPE_PROG_ARRAY;
- }
- if (map_type == BPF_MAP_TYPE_UNSPEC) {
- C.getDiagnostics().Report(Decl->getLocStart(), diag::err_expected)
- << "kernel supporting maps/prog";
- return false;
- }
+ if (KERNEL_VERSION(major,minor,0) >= KERNEL_VERSION(4,2,0))
+ map_type = BPF_MAP_TYPE_PROG_ARRAY;
+ } else if (A->getName() == "maps/perf_array") {
+ if (KERNEL_VERSION(major,minor,0) >= KERNEL_VERSION(4,3,0))
+ map_type = BPF_MAP_TYPE_PERF_EVENT_ARRAY;
}
+
+ if (map_type == BPF_MAP_TYPE_UNSPEC) {
+ unsigned diag_id = C.getDiagnostics().getCustomDiagID(DiagnosticsEngine::Error,
+ "unsupported map type: %0");
+ C.getDiagnostics().Report(Decl->getLocStart(), diag_id) << A->getName();
+ return false;
+ }
+
table.type = map_type;
table.fd = bpf_create_map(map_type, table.key_size, table.leaf_size, table.max_entries);
if (table.fd < 0) {
static int bpf_attach_tracing_event(int progfd, const char *event_path,
struct perf_reader *reader, int pid, int cpu, int group_fd) {
- int efd = -1, rc = -1, pfd = -1;
- ssize_t bytes = -1;
+ int efd = -1, rc = -1, pfd;
+ ssize_t bytes;
char buf[256];
struct perf_event_attr attr = {};
perror("perf_event_open");
goto cleanup;
}
+ perf_reader_set_fd(reader, pfd);
- if (perf_reader_mmap(reader, pfd, attr.sample_type) < 0)
+ if (perf_reader_mmap(reader, attr.type, attr.sample_type) < 0)
goto cleanup;
if (ioctl(pfd, PERF_EVENT_IOC_SET_BPF, progfd) < 0) {
goto cleanup;
}
- rc = pfd;
- pfd = -1;
+ rc = 0;
cleanup:
if (efd >= 0)
close(efd);
- if (pfd >= 0)
- close(pfd);
return rc;
}
char buf[256];
struct perf_reader *reader = NULL;
- reader = perf_reader_new(-1, 8, cb, cb_cookie);
+ reader = perf_reader_new(cb, NULL, cb_cookie);
if (!reader)
goto cleanup;
return rc;
}
+void * bpf_open_perf_buffer(perf_reader_raw_cb raw_cb, void *cb_cookie) {
+ int rc = -1, pfd;
+ struct perf_event_attr attr = {};
+
+ struct perf_reader *reader = perf_reader_new(NULL, raw_cb, cb_cookie);
+
+ if (!reader)
+ goto cleanup;
+
+ attr.config = PERF_COUNT_SW_BPF_OUTPUT;
+ attr.type = PERF_TYPE_SOFTWARE;
+ attr.sample_type = PERF_SAMPLE_RAW;
+ pfd = syscall(__NR_perf_event_open, &attr, -1, 0, -1, PERF_FLAG_FD_CLOEXEC);
+ if (pfd < 0) {
+ perror("perf_event_open");
+ goto cleanup;
+ }
+ perf_reader_set_fd(reader, pfd);
+
+ if (perf_reader_mmap(reader, attr.type, attr.sample_type) < 0)
+ goto cleanup;
+
+ if (ioctl(pfd, PERF_EVENT_IOC_ENABLE, 0) < 0) {
+ perror("ioctl(PERF_EVENT_IOC_ENABLE)");
+ goto cleanup;
+ }
+
+ rc = 0;
+
+cleanup:
+ if (reader && rc < 0) {
+ perf_reader_free(reader);
+ reader = NULL;
+ }
+
+ return reader;
+}
#include "libbpf.h"
#include "perf_reader.h"
+int perf_reader_page_cnt = 8;
+
struct perf_reader {
perf_reader_cb cb;
+ perf_reader_raw_cb raw_cb;
void *cb_cookie; // to be returned in the cb
void *buf; // for keeping segmented data
size_t buf_size;
int page_size;
int page_cnt;
int fd;
+ uint32_t type;
uint64_t sample_type;
};
-struct perf_reader * perf_reader_new(int fd, int page_cnt, perf_reader_cb cb, void *cb_cookie) {
+struct perf_reader * perf_reader_new(perf_reader_cb cb, perf_reader_raw_cb raw_cb, void *cb_cookie) {
struct perf_reader *reader = calloc(1, sizeof(struct perf_reader));
if (!reader)
return NULL;
reader->cb = cb;
+ reader->raw_cb = raw_cb;
reader->cb_cookie = cb_cookie;
- reader->fd = fd;
+ reader->fd = -1;
reader->page_size = getpagesize();
- reader->page_cnt = page_cnt;
+ reader->page_cnt = perf_reader_page_cnt;
return reader;
}
}
}
-int perf_reader_mmap(struct perf_reader *reader, int fd, uint64_t sample_type) {
+int perf_reader_mmap(struct perf_reader *reader, unsigned type, unsigned long sample_type) {
int mmap_size = reader->page_size * (reader->page_cnt + 1);
- if (!reader->cb)
- return 0;
+ if (reader->fd < 0) {
+ fprintf(stderr, "%s: reader fd is not set\n", __FUNCTION__);
+ return -1;
+ }
- reader->base = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE , MAP_SHARED, fd, 0);
+ reader->base = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE , MAP_SHARED, reader->fd, 0);
if (reader->base == MAP_FAILED) {
perror("mmap");
return -1;
}
- reader->fd = fd;
+ reader->type = type;
reader->sample_type = sample_type;
return 0;
uint64_t ip;
};
-static void sample_parse(struct perf_reader *reader, void *data, int size) {
+static void parse_tracepoint(struct perf_reader *reader, void *data, int size) {
uint8_t *ptr = data;
struct perf_event_header *header = (void *)data;
reader->cb(reader->cb_cookie, tk ? tk->common.pid : -1, num_callchain, callchain);
}
+static void parse_sw(struct perf_reader *reader, void *data, int size) {
+ uint8_t *ptr = data;
+ struct perf_event_header *header = (void *)data;
+
+ struct {
+ uint32_t size;
+ char data[0];
+ } *raw = NULL;
+
+ ptr += sizeof(*header);
+ if (ptr > (uint8_t *)data + size) {
+ fprintf(stderr, "%s: corrupt sample header\n", __FUNCTION__);
+ return;
+ }
+
+ if (reader->sample_type & PERF_SAMPLE_RAW) {
+ raw = (void *)ptr;
+ ptr += sizeof(raw->size) + raw->size;
+ if (ptr > (uint8_t *)data + size) {
+ fprintf(stderr, "%s: corrupt raw sample\n", __FUNCTION__);
+ return;
+ }
+ }
+
+ // sanity check
+ if (ptr != (uint8_t *)data + size) {
+ fprintf(stderr, "%s: extra data at end of sample\n", __FUNCTION__);
+ return;
+ }
+
+ if (reader->raw_cb)
+ reader->raw_cb(reader->cb_cookie, raw->data, raw->size);
+}
+
static uint64_t read_data_head(struct perf_event_mmap_page *perf_header) {
uint64_t data_head = *((volatile uint64_t *)&perf_header->data_head);
asm volatile("" ::: "memory");
ptr = reader->buf;
}
- if (e->type == PERF_RECORD_LOST)
+ if (e->type == PERF_RECORD_LOST) {
fprintf(stderr, "Lost %lu samples\n", *(uint64_t *)(ptr + sizeof(*e)));
- else if (e->type == PERF_RECORD_SAMPLE)
- sample_parse(reader, ptr, e->size);
- else
+ } else if (e->type == PERF_RECORD_SAMPLE) {
+ if (reader->type == PERF_TYPE_TRACEPOINT)
+ parse_tracepoint(reader, ptr, e->size);
+ else if (reader->type == PERF_TYPE_SOFTWARE)
+ parse_sw(reader, ptr, e->size);
+ } else {
fprintf(stderr, "%s: unknown sample type %d\n", __FUNCTION__, e->type);
+ }
write_data_tail(perf_header, perf_header->data_tail + e->size);
}
return 0;
}
+void perf_reader_set_fd(struct perf_reader *reader, int fd) {
+ reader->fd = fd;
+}
+
+int perf_reader_fd(struct perf_reader *reader) {
+ return reader->fd;
+}
struct perf_reader;
-struct perf_reader * perf_reader_new(int fd, int page_cnt, perf_reader_cb cb, void *cb_cookie);
+struct perf_reader * perf_reader_new(perf_reader_cb cb, perf_reader_raw_cb raw_cb, void *cb_cookie);
void perf_reader_free(void *ptr);
-int perf_reader_mmap(struct perf_reader *reader, int fd, unsigned long sample_type);
+int perf_reader_mmap(struct perf_reader *reader, unsigned type, unsigned long sample_type);
int perf_reader_poll(int num_readers, struct perf_reader **readers, int timeout);
+int perf_reader_fd(struct perf_reader *reader);
+void perf_reader_set_fd(struct perf_reader *reader, int fd);
typedef void (*perf_reader_cb)(void *cb_cookie, int pid, uint64_t callchain_num,
void *callchain);
+typedef void (*perf_reader_raw_cb)(void *cb_cookie, void *raw, int raw_size);
void * bpf_attach_kprobe(int progfd, const char *event, const char *event_desc,
int pid, int cpu, int group_fd, perf_reader_cb cb,
lib.bpf_attach_kprobe.restype = ct.c_void_p
_CB_TYPE = ct.CFUNCTYPE(None, ct.py_object, ct.c_int,
ct.c_ulonglong, ct.POINTER(ct.c_ulonglong))
+_RAW_CB_TYPE = ct.CFUNCTYPE(None, ct.py_object, ct.c_void_p, ct.c_int)
lib.bpf_attach_kprobe.argtypes = [ct.c_int, ct.c_char_p, ct.c_char_p, ct.c_int,
ct.c_int, ct.c_int, _CB_TYPE, ct.py_object]
lib.bpf_detach_kprobe.restype = ct.c_int
lib.bpf_detach_kprobe.argtypes = [ct.c_char_p]
+lib.bpf_open_perf_buffer.restype = ct.c_void_p
+lib.bpf_open_perf_buffer.argtypes = [_RAW_CB_TYPE, ct.py_object]
lib.perf_reader_poll.restype = ct.c_int
lib.perf_reader_poll.argtypes = [ct.c_int, ct.POINTER(ct.c_void_p), ct.c_int]
lib.perf_reader_free.restype = None
lib.perf_reader_free.argtypes = [ct.c_void_p]
+lib.perf_reader_fd.restype = int
+lib.perf_reader_fd.argtypes = [ct.c_void_p]
open_kprobes = {}
tracefile = None
def cleanup_kprobes():
for k, v in open_kprobes.items():
lib.perf_reader_free(v)
- desc = "-:kprobes/%s" % k
- lib.bpf_detach_kprobe(desc.encode("ascii"))
+ if isinstance(k, str):
+ desc = "-:kprobes/%s" % k
+ lib.bpf_detach_kprobe(desc.encode("ascii"))
open_kprobes.clear()
if tracefile:
tracefile.close()
HASH = 1
ARRAY = 2
PROG_ARRAY = 3
+ PERF_EVENT_ARRAY = 4
class Function(object):
def __init__(self, bpf, name, fd):
raise Exception("Could not scanf leaf")
return leaf
+ def open_perf_buffer(self, key, cb, cookie):
+ reader = lib.bpf_open_perf_buffer(_RAW_CB_TYPE(cb),
+ ct.cast(id(cookie), ct.py_object))
+ if not reader:
+ raise Exception("Could not open perf buffer")
+ fd = lib.perf_reader_fd(reader)
+ self[self.Key(key)] = self.Leaf(fd)
+ open_kprobes[(id(self), key)] = reader
+
+ def close_perf_buffer(self, key):
+ reader = open_kprobes.get((id(self), key))
+ if reader:
+ lib.perf_reader_free(reader)
+ del(open_kprobes[(id(self), key)])
+
def __getitem__(self, key):
key_p = ct.pointer(key)
leaf = self.Leaf()
ttype = lib.bpf_table_type_id(self.bpf.module, self.map_id)
# Deleting from array type maps does not have an effect, so
# zero out the entry instead.
- if ttype in (BPF.ARRAY, BPF.PROG_ARRAY):
+ if ttype in (BPF.ARRAY, BPF.PROG_ARRAY, BPF.PERF_EVENT_ARRAY):
leaf = self.Leaf()
leaf_p = ct.pointer(leaf)
res = lib.bpf_update_elem(self.map_fd,
ct.cast(leaf_p, ct.c_void_p), 0)
if res < 0:
raise Exception("Could not clear item")
+ if ttype == BPF.PERF_EVENT_ARRAY:
+ self.close_perf_buffer(key)
else:
res = lib.bpf_delete_elem(self.map_fd,
ct.cast(key_p, ct.c_void_p))
try:
lib.perf_reader_poll(len(open_kprobes), readers, timeout)
except KeyboardInterrupt:
- pass
+ exit()