- [2. BPF_HASH](#2-bpf_hash)
- [3. BPF_HISTOGRAM](#3-bpf_histogram)
- [4. BPF_STACK_TRACE](#4-bpf_stack_trace)
- - [5. map.lookup()](#5-maplookup)
- - [6. map.lookup_or_init()](#6-maplookup_or_init)
- - [7. map.delete()](#7-mapdelete)
- - [8. map.update()](#8-mapupdate)
- - [9. map.increment()](#9-mapincrement)
- - [10. map.get_stackid()](#10-mapget_stackid)
+ - [5. BPF_PERF_ARRAY](#5-bpf_perf_array)
+ - [6. map.lookup()](#6-maplookup)
+ - [7. map.lookup_or_init()](#7-maplookup_or_init)
+ - [8. map.delete()](#8-mapdelete)
+ - [9. map.update()](#9-mapupdate)
+ - [10. map.increment()](#10-mapincrement)
+ - [11. map.get_stackid()](#11-mapget_stackid)
+ - [12. map.perf_read()](#12-mapperf_read)
- [bcc Python](#bcc-python)
- [Initialization](#initialization)
[search /examples](https://github.com/iovisor/bcc/search?q=BPF_STACK_TRACE+path%3Aexamples&type=Code),
[search /tools](https://github.com/iovisor/bcc/search?q=BPF_STACK_TRACE+path%3Atools&type=Code)
-### 5. map.lookup()
+### 5. BPF_PERF_ARRAY
+
+Syntax: ```BPF_PERF_ARRAY(name, max_entries)```
+
+Creates perf array named ```name```, with a maximum entry count provided, which must be equal to the number of system cpus. These maps are used to fetch hardware performance counters.
+
+For example:
+
+```C
+text="""
+BPF_PERF_ARRAY(cpu_cycles, NUM_CPUS);
+"""
+b = bcc.BPF(text=text, cflags=["-DNUM_CPUS=%d" % multiprocessing.cpu_count()])
+b["cpu_cycles"].open_perf_event(b["cpu_cycles"].HW_CPU_CYCLES)
+```
+
+This creates a perf array named ```cpu_cycles```, with number of entries equal to the number of cpus/cores. The array is configured so that later calling map.perf_read() will return a hardware-calculated counter of the number of cycles elapsed from some point in the past. Only one type of hardware counter may be configured per table at a time.
+
+Methods (covered later): map.perf_read().
+
+Examples in situ:
+[search /tests](https://github.com/iovisor/bcc/search?q=BPF_PERF_ARRAY+path%3Atests&type=Code)
+
+### 6. map.lookup()
Syntax: ```*val map.lookup(&key)```
[search /examples](https://github.com/iovisor/bcc/search?q=lookup+path%3Aexamples&type=Code),
[search /tools](https://github.com/iovisor/bcc/search?q=lookup+path%3Atools&type=Code)
-### 6. map.lookup_or_init()
+### 7. map.lookup_or_init()
Syntax: ```*val map.lookup_or_init(&key, &zero)```
[search /examples](https://github.com/iovisor/bcc/search?q=lookup_or_init+path%3Aexamples&type=Code),
[search /tools](https://github.com/iovisor/bcc/search?q=lookup_or_init+path%3Atools&type=Code)
-### 7. map.delete()
+### 8. map.delete()
Syntax: ```map.delete(&key)```
[search /examples](https://github.com/iovisor/bcc/search?q=delete+path%3Aexamples&type=Code),
[search /tools](https://github.com/iovisor/bcc/search?q=delete+path%3Atools&type=Code)
-### 8. map.update()
+### 9. map.update()
Syntax: ```map.update(&key, &val)```
[search /examples](https://github.com/iovisor/bcc/search?q=update+path%3Aexamples&type=Code),
[search /tools](https://github.com/iovisor/bcc/search?q=update+path%3Atools&type=Code)
-### 9. map.increment()
+### 10. map.increment()
Syntax: ```map.increment(&key)```
[search /examples](https://github.com/iovisor/bcc/search?q=increment+path%3Aexamples&type=Code),
[search /tools](https://github.com/iovisor/bcc/search?q=increment+path%3Atools&type=Code)
-### 10. map.get_stackid()
+### 11. map.get_stackid()
Syntax: ```int map.get_stackid(void *ctx, u64 flags)```
[search /examples](https://github.com/iovisor/bcc/search?q=get_stackid+path%3Aexamples&type=Code),
[search /tools](https://github.com/iovisor/bcc/search?q=get_stackid+path%3Atools&type=Code)
+### 12. map.perf_read()
+
+Syntax: ```u64 map.perf_read(u32 cpu)```
+
+This returns the hardware performance counter as configured in [5. BPF_PERF_ARRAY](#5-bpf_perf_array)
+
+Examples in situ:
+[search /tests](https://github.com/iovisor/bcc/search?q=perf_read+path%3Atests&type=Code)
+
# bcc Python
## Initialization
#include <arpa/inet.h>
#include <errno.h>
#include <fcntl.h>
+#include <limits.h>
#include <linux/bpf.h>
#include <linux/if_packet.h>
#include <linux/pkt_cls.h>
return NULL;
}
+int bpf_open_perf_event(uint32_t type, uint64_t config, int pid, int cpu) {
+ int fd;
+ struct perf_event_attr attr = {};
+
+ attr.sample_period = LONG_MAX;
+ attr.type = type;
+ attr.config = config;
+
+ fd = syscall(__NR_perf_event_open, &attr, pid, cpu, -1, PERF_FLAG_FD_CLOEXEC);
+ if (fd < 0) {
+ fprintf(stderr, "perf_event_open: %s\n", strerror(errno));
+ return -1;
+ }
+ if (ioctl(fd, PERF_EVENT_IOC_ENABLE, 0) < 0) {
+ perror("ioctl(PERF_EVENT_IOC_ENABLE)");
+ close(fd);
+ return -1;
+ }
+
+ return fd;
+}
int bpf_attach_xdp(const char *dev_name, int progfd) {
struct sockaddr_nl sa;
lib.bpf_detach_tracepoint.argtypes = [ct.c_char_p, ct.c_char_p]
lib.bpf_open_perf_buffer.restype = ct.c_void_p
lib.bpf_open_perf_buffer.argtypes = [_RAW_CB_TYPE, ct.py_object, ct.c_int, ct.c_int]
+lib.bpf_open_perf_event.restype = ct.c_int
+lib.bpf_open_perf_event.argtypes = [ct.c_uint, ct.c_ulonglong, ct.c_int, ct.c_int]
lib.perf_reader_poll.restype = ct.c_int
lib.perf_reader_poll.argtypes = [ct.c_int, ct.POINTER(ct.c_void_p), ct.c_int]
lib.perf_reader_free.restype = None
PERF_TYPE_HARDWARE = 0
PERF_TYPE_SOFTWARE = 1
PERF_TYPE_TRACEPOINT = 2
+ PERF_TYPE_HW_CACHE = 3
# perf_event_sample_format
PERF_SAMPLE_RAW = 1024 # it's a u32; could also try zero args
from collections import MutableMapping
import ctypes as ct
import multiprocessing
+import os
from .libbcc import lib, _RAW_CB_TYPE
+from .perf import Perf
from subprocess import check_output
BPF_MAP_TYPE_HASH = 1
ct.cast(key_p, ct.c_void_p),
ct.cast(leaf_p, ct.c_void_p), 0)
if res < 0:
- raise Exception("Could not update table")
+ errstr = os.strerror(ct.get_errno())
+ raise Exception("Could not update table: %s" % errstr)
# override the MutableMapping's implementation of these since they
# don't handle KeyError nicely
leaf = self.Leaf(leaf.fd)
super(ProgArray, self).__setitem__(key, leaf)
-
class PerfEventArray(ArrayBase):
+ class Event(object):
+ def __init__(self, typ, config):
+ self.typ = typ
+ self.config = config
+
+ HW_CPU_CYCLES = Event(Perf.PERF_TYPE_HARDWARE, 0)
+ HW_INSTRUCTIONS = Event(Perf.PERF_TYPE_HARDWARE, 1)
+ HW_CACHE_REFERENCES = Event(Perf.PERF_TYPE_HARDWARE, 2)
+ HW_CACHE_MISSES = Event(Perf.PERF_TYPE_HARDWARE, 3)
+ HW_BRANCH_INSTRUCTIONS = Event(Perf.PERF_TYPE_HARDWARE, 4)
+ HW_BRANCH_MISSES = Event(Perf.PERF_TYPE_HARDWARE, 5)
+ HW_BUS_CYCLES = Event(Perf.PERF_TYPE_HARDWARE, 6)
+ HW_STALLED_CYCLES_FRONTEND = Event(Perf.PERF_TYPE_HARDWARE, 7)
+ HW_STALLED_CYCLES_BACKEND = Event(Perf.PERF_TYPE_HARDWARE, 8)
+ HW_REF_CPU_CYCLES = Event(Perf.PERF_TYPE_HARDWARE, 9)
+
+ # not yet supported, wip
+ #HW_CACHE_L1D_READ = Event(Perf.PERF_TYPE_HW_CACHE, 0<<0|0<<8|0<<16)
+ #HW_CACHE_L1D_READ_MISS = Event(Perf.PERF_TYPE_HW_CACHE, 0<<0|0<<8|1<<16)
+ #HW_CACHE_L1D_WRITE = Event(Perf.PERF_TYPE_HW_CACHE, 0<<0|1<<8|0<<16)
+ #HW_CACHE_L1D_WRITE_MISS = Event(Perf.PERF_TYPE_HW_CACHE, 0<<0|1<<8|1<<16)
+ #HW_CACHE_L1D_PREF = Event(Perf.PERF_TYPE_HW_CACHE, 0<<0|2<<8|0<<16)
+ #HW_CACHE_L1D_PREF_MISS = Event(Perf.PERF_TYPE_HW_CACHE, 0<<0|2<<8|1<<16)
+ #HW_CACHE_L1I_READ = Event(Perf.PERF_TYPE_HW_CACHE, 1<<0|0<<8|0<<16)
+ #HW_CACHE_L1I_READ_MISS = Event(Perf.PERF_TYPE_HW_CACHE, 1<<0|0<<8|1<<16)
+ #HW_CACHE_L1I_WRITE = Event(Perf.PERF_TYPE_HW_CACHE, 1<<0|1<<8|0<<16)
+ #HW_CACHE_L1I_WRITE_MISS = Event(Perf.PERF_TYPE_HW_CACHE, 1<<0|1<<8|1<<16)
+ #HW_CACHE_L1I_PREF = Event(Perf.PERF_TYPE_HW_CACHE, 1<<0|2<<8|0<<16)
+ #HW_CACHE_L1I_PREF_MISS = Event(Perf.PERF_TYPE_HW_CACHE, 1<<0|2<<8|1<<16)
+ #HW_CACHE_LL_READ = Event(Perf.PERF_TYPE_HW_CACHE, 2<<0|0<<8|0<<16)
+ #HW_CACHE_LL_READ_MISS = Event(Perf.PERF_TYPE_HW_CACHE, 2<<0|0<<8|1<<16)
+ #HW_CACHE_LL_WRITE = Event(Perf.PERF_TYPE_HW_CACHE, 2<<0|1<<8|0<<16)
+ #HW_CACHE_LL_WRITE_MISS = Event(Perf.PERF_TYPE_HW_CACHE, 2<<0|1<<8|1<<16)
+ #HW_CACHE_LL_PREF = Event(Perf.PERF_TYPE_HW_CACHE, 2<<0|2<<8|0<<16)
+ #HW_CACHE_LL_PREF_MISS = Event(Perf.PERF_TYPE_HW_CACHE, 2<<0|2<<8|1<<16)
+
def __init__(self, *args, **kwargs):
super(PerfEventArray, self).__init__(*args, **kwargs)
self.bpf._del_kprobe((id(self), key))
del self._cbs[key]
+ def _open_perf_event(self, cpu, typ, config):
+ fd = lib.bpf_open_perf_event(typ, config, -1, cpu)
+ if fd < 0:
+ raise Exception("bpf_open_perf_event failed")
+ try:
+ self[self.Key(cpu)] = self.Leaf(fd)
+ finally:
+ # the fd is kept open in the map itself by the kernel
+ os.close(fd)
+
+ def open_perf_event(self, ev):
+ """open_perf_event(ev)
+
+ Configures the table such that calls from the bpf program to
+ table.perf_read(bpf_get_smp_processor_id()) will return the hardware
+ counter denoted by event ev on the local cpu.
+ """
+ if not isinstance(ev, self.Event):
+ raise Exception("argument must be an Event, got %s", type(ev))
+
+ for i in range(0, multiprocessing.cpu_count()):
+ self._open_perf_event(i, ev.typ, ev.config)
+
+
class PerCpuHash(HashTable):
def __init__(self, *args, **kwargs):
self.reducer = kwargs.pop("reducer", None)
COMMAND ${TEST_WRAPPER} py_stackid sudo ${CMAKE_CURRENT_SOURCE_DIR}/test_stackid.py)
add_test(NAME py_test_tracepoint WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
COMMAND ${TEST_WRAPPER} py_test_tracepoint sudo ${CMAKE_CURRENT_SOURCE_DIR}/test_tracepoint.py)
+add_test(NAME py_test_perf_event WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
+ COMMAND ${TEST_WRAPPER} py_test_perf_event sudo ${CMAKE_CURRENT_SOURCE_DIR}/test_perf_event.py)
add_test(NAME py_test_dump_func WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
COMMAND ${TEST_WRAPPER} py_dump_func simple ${CMAKE_CURRENT_SOURCE_DIR}/test_dump_func.py)
--- /dev/null
+#!/usr/bin/env python
+# Copyright (c) 2016 PLUMgrid
+# Licensed under the Apache License, Version 2.0 (the "License")
+
+import bcc
+import ctypes
+import multiprocessing
+import os
+import time
+import unittest
+
+class TestPerfCounter(unittest.TestCase):
+ def test_cycles(self):
+ text = """
+BPF_PERF_ARRAY(cnt1, NUM_CPUS);
+BPF_TABLE("array", u32, u64, prev, NUM_CPUS);
+BPF_HISTOGRAM(dist);
+int kprobe__sys_getuid(void *ctx) {
+ u32 cpu = bpf_get_smp_processor_id();
+ u64 val = cnt1.perf_read(cpu);
+ prev.update(&cpu, &val);
+ return 0;
+}
+int kretprobe__sys_getuid(void *ctx) {
+ u32 cpu = bpf_get_smp_processor_id();
+ u64 val = cnt1.perf_read(cpu);
+ u64 *prevp = prev.lookup(&cpu);
+ if (prevp)
+ dist.increment(bpf_log2l(val - *prevp));
+ return 0;
+}
+"""
+ b = bcc.BPF(text=text, debug=0,
+ cflags=["-DNUM_CPUS=%d" % multiprocessing.cpu_count()])
+ cnt1 = b["cnt1"]
+ try:
+ cnt1.open_perf_event(cnt1.HW_CPU_CYCLES)
+ except:
+ if ctypes.get_errno() == 2:
+ raise self.skipTest("hardware events unsupported")
+ raise
+ for i in range(0, 100):
+ os.getuid()
+ b["dist"].print_log2_hist()
+
+if __name__ == "__main__":
+ unittest.main()