From aeca6bfc56e3408947bc40fdecc734d6d8b1a825 Mon Sep 17 00:00:00 2001 From: Brenden Blanco Date: Fri, 6 Nov 2015 13:03:05 -0800 Subject: [PATCH] Let perf_output be always per-cpu Incoporating feedback from bgregg: Make per-cpu nature of output buffer be hidden and implicit. As such, incoporate some rewriting into the definition of the PERF_OUTPUT. Create two different macros to distinguish the perf_array (hardware counters) from the perf_output (ring buffer) use cases. Also, rename perf_output to perf_submit. Signed-off-by: Brenden Blanco --- examples/tracing/trace_perf_output.py | 11 ++++------ src/cc/export/helpers.h | 16 +++++++++++++-- src/cc/frontends/clang/b_frontend_action.cc | 21 ++++++++++++++++--- src/python/bcc/__init__.py | 32 +++++++++++------------------ 4 files changed, 48 insertions(+), 32 deletions(-) diff --git a/examples/tracing/trace_perf_output.py b/examples/tracing/trace_perf_output.py index 3d56e9e..009bdf8 100755 --- a/examples/tracing/trace_perf_output.py +++ b/examples/tracing/trace_perf_output.py @@ -9,22 +9,21 @@ import atexit from bcc import BPF import ctypes -import multiprocessing counter = 0 -def cb(cookie, data, size): +def cb(cpu, data, size): global counter counter += 1 prog = """ -BPF_PERF_ARRAY(events, NUMCPU); +BPF_PERF_OUTPUT(events); BPF_TABLE("array", int, u64, counters, 10); int kprobe__sys_write(void *ctx) { struct { u64 ts; } data = {bpf_ktime_get_ns()}; int rc; - if ((rc = events.perf_output(ctx, bpf_get_smp_processor_id(), &data, sizeof(data))) < 0) + if ((rc = events.perf_submit(ctx, &data, sizeof(data))) < 0) bpf_trace_printk("perf_output failed: %d\\n", rc); int zero = 0; u64 *val = counters.lookup(&zero); @@ -32,10 +31,8 @@ int kprobe__sys_write(void *ctx) { return 0; } """ -numcpu = multiprocessing.cpu_count() -prog = prog.replace("NUMCPU", str(numcpu)) b = BPF(text=prog) -b["events"].open_perf_buffers(cb, None) +b["events"].open_perf_buffer(cb) @atexit.register def print_counter(): diff --git a/src/cc/export/helpers.h b/src/cc/export/helpers.h index 5dcb61b..cbf54af 100644 --- a/src/cc/export/helpers.h +++ b/src/cc/export/helpers.h @@ -42,14 +42,25 @@ struct _name##_table_t { \ __attribute__((section("maps/" _table_type))) \ struct _name##_table_t _name +// Table for pushing custom events to userspace via ring buffer +#define BPF_PERF_OUTPUT(_name) \ +struct _name##_table_t { \ + int key; \ + u32 leaf; \ + /* map.perf_submit(ctx, data, data_size) */ \ + int (*perf_submit) (void *, void *, u32); \ + u32 data[0]; \ +}; \ +__attribute__((section("maps/perf_output"))) \ +struct _name##_table_t _name + +// Table for reading hw perf cpu counters #define BPF_PERF_ARRAY(_name, _max_entries) \ struct _name##_table_t { \ int key; \ u32 leaf; \ /* counter = map.perf_read(index) */ \ u64 (*perf_read) (int); \ - /* map.perf_ouput(ctx, index, data, data_size) */ \ - int (*perf_output) (void *, int, void *, u32); \ u32 data[_max_entries]; \ }; \ __attribute__((section("maps/perf_array"))) \ @@ -364,6 +375,7 @@ int bpf_l4_csum_replace_(void *ctx, u64 off, u64 from, u64 to, u64 flags) { int incr_cksum_l3(void *off, u64 oldval, u64 newval) asm("llvm.bpf.extra"); int incr_cksum_l4(void *off, u64 oldval, u64 newval, u64 flags) asm("llvm.bpf.extra"); +int bpf_num_cpus() asm("llvm.bpf.extra"); #define lock_xadd(ptr, val) ((void)__sync_fetch_and_add(ptr, val)) diff --git a/src/cc/frontends/clang/b_frontend_action.cc b/src/cc/frontends/clang/b_frontend_action.cc index 1f5bff3..a719775 100644 --- a/src/cc/frontends/clang/b_frontend_action.cc +++ b/src/cc/frontends/clang/b_frontend_action.cc @@ -16,6 +16,7 @@ #include #include #include +#include #include #include @@ -332,13 +333,14 @@ bool BTypeVisitor::VisitCallExpr(CallExpr *Call) { } txt += "typeof(" + name + ".leaf) *_leaf = " + lookup + ", &_key); "; txt += "if (_leaf) (*_leaf)++; })"; - } else if (memb_name == "perf_output") { + } else if (memb_name == "perf_submit") { string name = Ref->getDecl()->getName(); string arg0 = rewriter_.getRewrittenText(SourceRange(Call->getArg(0)->getLocStart(), Call->getArg(0)->getLocEnd())); string args_other = rewriter_.getRewrittenText(SourceRange(Call->getArg(1)->getLocStart(), - Call->getArg(3)->getLocEnd())); - txt = "bpf_perf_event_output(" + arg0 + ", bpf_pseudo_fd(1, " + fd + "), " + args_other + ")"; + Call->getArg(2)->getLocEnd())); + txt = "bpf_perf_event_output(" + arg0 + ", bpf_pseudo_fd(1, " + fd + ")"; + txt += ", bpf_get_smp_processor_id(), " + args_other + ")"; } else { if (memb_name == "lookup") { prefix = "bpf_map_lookup_elem"; @@ -413,6 +415,12 @@ bool BTypeVisitor::VisitCallExpr(CallExpr *Call) { rewriter_.ReplaceText(SourceRange(Call->getLocStart(), Call->getArg(0)->getLocEnd()), text); rewriter_.InsertTextAfter(Call->getLocEnd(), "); }"); } + } else if (Decl->getName() == "bpf_num_cpus") { + int numcpu = sysconf(_SC_NPROCESSORS_ONLN); + if (numcpu <= 0) + numcpu = 1; + text = to_string(numcpu); + rewriter_.ReplaceText(SourceRange(Call->getLocStart(), Call->getLocEnd()), text); } } } @@ -538,6 +546,13 @@ bool BTypeVisitor::VisitVarDecl(VarDecl *Decl) { } else if (A->getName() == "maps/prog") { if (KERNEL_VERSION(major,minor,0) >= KERNEL_VERSION(4,2,0)) map_type = BPF_MAP_TYPE_PROG_ARRAY; + } else if (A->getName() == "maps/perf_output") { + if (KERNEL_VERSION(major,minor,0) >= KERNEL_VERSION(4,3,0)) + map_type = BPF_MAP_TYPE_PERF_EVENT_ARRAY; + int numcpu = sysconf(_SC_NPROCESSORS_ONLN); + if (numcpu <= 0) + numcpu = 1; + table.max_entries = numcpu; } else if (A->getName() == "maps/perf_array") { if (KERNEL_VERSION(major,minor,0) >= KERNEL_VERSION(4,3,0)) map_type = BPF_MAP_TYPE_PERF_EVENT_ARRAY; diff --git a/src/python/bcc/__init__.py b/src/python/bcc/__init__.py index 5ab826b..27b7508 100644 --- a/src/python/bcc/__init__.py +++ b/src/python/bcc/__init__.py @@ -187,35 +187,27 @@ class BPF(object): raise Exception("Could not scanf leaf") return leaf - def open_perf_buffers(self, cb, cookie): - """open_perf_buffers(cb, cookie) + def open_perf_buffer(self, callback): + """open_perf_buffers(callback) - Opens ring buffers, one for each cpu, to receive custom perf event - data from the bpf program. The program is expected to use the cpu-id - as the key of the perf_output call. + Opens a set of per-cpu ring buffer to receive custom perf event + data from the bpf program. The callback will be invoked for each + event submitted from the kernel, up to millions per second. """ for i in range(0, multiprocessing.cpu_count()): - self.open_perf_buffer(i, cb, cookie, cpu=i) + self._open_perf_buffer(i, callback) - def open_perf_buffer(self, key, cb, cookie, pid=-1, cpu=0): - """open_perf_buffer(key, cb, cookie, pid=-1, cpu=0) - - Open a ring buffer to receive custom perf event data from the bpf - program. The callback cb is invoked for each event submitted, which - can be up to millions of events per second. The signature of cb - should be cb(cookie, data, data_size). - """ - - fn = _RAW_CB_TYPE(lambda x, data, size: cb(cookie, data, size)) - reader = lib.bpf_open_perf_buffer(fn, None, pid, cpu) + def _open_perf_buffer(self, cpu, callback): + fn = _RAW_CB_TYPE(lambda _, data, size: callback(cpu, data, size)) + reader = lib.bpf_open_perf_buffer(fn, None, -1, cpu) if not reader: raise Exception("Could not open perf buffer") fd = lib.perf_reader_fd(reader) - self[self.Key(key)] = self.Leaf(fd) - open_kprobes[(id(self), key)] = reader + self[self.Key(cpu)] = self.Leaf(fd) + open_kprobes[(id(self), cpu)] = reader # keep a refcnt - self._cbs[key] = (fn, cookie) + self._cbs[cpu] = fn def close_perf_buffer(self, key): reader = open_kprobes.get((id(self), key)) -- 2.7.4