Add USDT sample (#1229)
authorbveldhoen <bram@adiplus.nl>
Tue, 11 Jul 2017 18:09:35 +0000 (20:09 +0200)
committerSasha Goldshtein <goldshtn@gmail.com>
Tue, 11 Jul 2017 18:09:35 +0000 (21:09 +0300)
This sample contains:
    - A library with an operation that uses usdt probes.
    - A console application that calls the operation.
    - Scripts to trace the latency of the operation.
    - Corresponding cmake files.

14 files changed:
examples/usdt_sample/CMakeLists.txt [new file with mode: 0755]
examples/usdt_sample/scripts/bpf_text_shared.c [new file with mode: 0644]
examples/usdt_sample/scripts/lat_avg.py [new file with mode: 0755]
examples/usdt_sample/scripts/lat_dist.py [new file with mode: 0755]
examples/usdt_sample/scripts/latency.py [new file with mode: 0755]
examples/usdt_sample/usdt_sample.md [new file with mode: 0644]
examples/usdt_sample/usdt_sample_app1/CMakeLists.txt [new file with mode: 0755]
examples/usdt_sample/usdt_sample_app1/main.cpp [new file with mode: 0644]
examples/usdt_sample/usdt_sample_lib1/CMakeLists.txt [new file with mode: 0755]
examples/usdt_sample/usdt_sample_lib1/include/usdt_sample_lib1/lib1.h [new file with mode: 0644]
examples/usdt_sample/usdt_sample_lib1/include/usdt_sample_lib1/lib1_sdt.h [new file with mode: 0644]
examples/usdt_sample/usdt_sample_lib1/src/lib1.cpp [new file with mode: 0644]
examples/usdt_sample/usdt_sample_lib1/src/lib1_sdt.d [new file with mode: 0644]
tests/python/test_usdt.py

diff --git a/examples/usdt_sample/CMakeLists.txt b/examples/usdt_sample/CMakeLists.txt
new file mode 100755 (executable)
index 0000000..04e5092
--- /dev/null
@@ -0,0 +1,7 @@
+cmake_minimum_required(VERSION 3.0)
+
+# This sample requires C++11 enabled.
+set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -Wall -Weffc++")
+
+add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/usdt_sample_lib1)
+add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/usdt_sample_app1)
diff --git a/examples/usdt_sample/scripts/bpf_text_shared.c b/examples/usdt_sample/scripts/bpf_text_shared.c
new file mode 100644 (file)
index 0000000..d8e7464
--- /dev/null
@@ -0,0 +1,55 @@
+#include <linux/blkdev.h>
+#include <uapi/linux/ptrace.h>
+
+/**
+ * @brief Helper method to filter based on the specified inputString.
+ * @param inputString The operation input string to check against the filter.
+ * @return True if the specified inputString starts with the hard-coded FILTER_STRING; otherwise, false.
+ */
+static inline bool filter(char const* inputString)
+{
+    char needle[] = "FILTER_STRING"; ///< The FILTER STRING is replaced by python code.
+    char haystack[sizeof(needle)] = {};
+    bpf_probe_read(&haystack, sizeof(haystack), (void*)inputString);
+    for (int i = 0; i < sizeof(needle) - 1; ++i) {
+        if (needle[i] != haystack[i]) {
+            return false;
+        }
+    }
+    return true;
+}
+
+/**
+ * @brief Contains the operation start data to trace.
+ */
+struct start_data_t
+{
+    u64 operation_id; ///< The id of the operation.
+    char input[64];   ///< The input string of the request.
+    u64 start;        ///< Timestamp of the start operation (start timestamp).
+};
+
+/**
+ * @brief Contains the operation start data.
+ * key: the operation id.
+ * value: The operation start latency data.
+ */
+BPF_HASH(start_hash, u64, struct start_data_t);
+
+/**
+ * @brief Reads the operation request arguments and stores the start data in the hash.
+ * @param ctx The BPF context.
+ */
+int trace_operation_start(struct pt_regs* ctx)
+{
+    struct start_data_t start_data = {};
+    bpf_usdt_readarg_p(2, ctx, &start_data.input, sizeof(start_data.input));
+
+    FILTER ///< Replaced by python code.
+
+    bpf_usdt_readarg(1, ctx, &start_data.operation_id);
+
+    start_data.start = bpf_ktime_get_ns();
+    start_hash.update(&start_data.operation_id, &start_data);
+    return 0;
+}
diff --git a/examples/usdt_sample/scripts/lat_avg.py b/examples/usdt_sample/scripts/lat_avg.py
new file mode 100755 (executable)
index 0000000..be473d1
--- /dev/null
@@ -0,0 +1,126 @@
+import argparse
+from time import sleep, strftime
+from sys import argv
+import ctypes as ct
+from bcc import BPF, USDT
+import inspect
+import os
+
+# Parse command line arguments
+parser = argparse.ArgumentParser(description="Trace the moving average of the latency of an operation using usdt probes.",
+    formatter_class=argparse.RawDescriptionHelpFormatter)
+parser.add_argument("-p", "--pid", type=int, help="The id of the process to trace.")
+parser.add_argument("-i", "--interval", type=int, help="The interval in seconds on which to report the latency distribution.")
+parser.add_argument("-c", "--count", type=int, default=16, help="The count of samples over which to calculate the moving average.")
+parser.add_argument("-f", "--filterstr", type=str, default="", help="The prefix filter for the operation input. If specified, only operations for which the input string starts with the filterstr are traced.")
+parser.add_argument("-v", "--verbose", dest="verbose", action="store_true", help="If true, will output verbose logging information.")
+parser.set_defaults(verbose=False)
+args = parser.parse_args()
+this_pid = int(args.pid)
+this_interval = int(args.interval)
+this_count = int(args.count)
+this_filter = str(args.filterstr)
+
+if this_interval < 1:
+    print("Invalid value for interval, using 1.")
+    this_interval = 1
+
+if this_count < 1:
+    print("Invalid value for count, using 1.")
+    this_count = 1
+
+debugLevel=0
+if args.verbose:
+    debugLevel=4
+
+# BPF program
+bpf_text_shared = "%s/bpf_text_shared.c" % os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
+bpf_text = open(bpf_text_shared, 'r').read()
+bpf_text += """
+
+const u32 MAX_SAMPLES = SAMPLE_COUNT;
+
+struct hash_key_t
+{
+    char input[64];
+};
+
+struct hash_leaf_t
+{
+    u32 count;
+    u64 total;
+    u64 average;
+};
+
+/**
+ * @brief Contains the averages for the operation latencies by operation input.
+ */
+BPF_HASH(lat_hash, struct hash_key_t, struct hash_leaf_t, 512);
+
+/**
+ * @brief Reads the operation response arguments, calculates the latency, and stores it in the histogram.
+ * @param ctx The BPF context.
+ */
+int trace_operation_end(struct pt_regs* ctx)
+{
+    u64 operation_id;
+    bpf_usdt_readarg(1, ctx, &operation_id);
+
+    struct start_data_t* start_data = start_hash.lookup(&operation_id);
+    if (0 == start_data) {
+        return 0;
+    }
+
+    u64 duration = bpf_ktime_get_ns() - start_data->start;
+    struct hash_key_t hash_key = {};
+    __builtin_memcpy(&hash_key.input, start_data->input, sizeof(hash_key.input));
+    start_hash.delete(&operation_id);
+
+    struct hash_leaf_t zero = {};
+    struct hash_leaf_t* hash_leaf = lat_hash.lookup_or_init(&hash_key, &zero);
+    if (0 == hash_leaf) {
+        return 0;
+    }
+
+    if (hash_leaf->count < MAX_SAMPLES) {
+        hash_leaf->count++;
+    } else {
+        hash_leaf->total -= hash_leaf->average;
+    }
+
+    hash_leaf->total += duration;
+    hash_leaf->average = hash_leaf->total / hash_leaf->count;
+
+    return 0;
+}
+"""
+
+bpf_text = bpf_text.replace("SAMPLE_COUNT", str(this_count))
+bpf_text = bpf_text.replace("FILTER_STRING", this_filter)
+if this_filter:
+    bpf_text = bpf_text.replace("FILTER", "if (!filter(start_data.input)) { return 0; }")
+else:
+    bpf_text = bpf_text.replace("FILTER", "")
+
+# Create USDT context
+print("Attaching probes to pid %d" % this_pid)
+usdt_ctx = USDT(pid=this_pid)
+usdt_ctx.enable_probe(probe="operation_start", fn_name="trace_operation_start")
+usdt_ctx.enable_probe(probe="operation_end", fn_name="trace_operation_end")
+
+# Create BPF context, load BPF program
+bpf_ctx = BPF(text=bpf_text, usdt_contexts=[usdt_ctx], debug=debugLevel)
+
+print("Tracing... Hit Ctrl-C to end.")
+
+lat_hash = bpf_ctx.get_table("lat_hash")
+while (1):
+    try:
+        sleep(this_interval)
+    except KeyboardInterrupt:
+        exit()
+
+    print("[%s]" % strftime("%H:%M:%S"))
+    print("%-64s %8s %16s" % ("input", "count", "latency (us)"))
+    for k, v in lat_hash.items():
+        print("%-64s %8d %16d" % (k.input, v.count, v.average / 1000))
diff --git a/examples/usdt_sample/scripts/lat_dist.py b/examples/usdt_sample/scripts/lat_dist.py
new file mode 100755 (executable)
index 0000000..af13e89
--- /dev/null
@@ -0,0 +1,98 @@
+import argparse
+from time import sleep, strftime
+from sys import argv
+import ctypes as ct
+from bcc import BPF, USDT
+import inspect
+import os
+
+# Parse command line arguments
+parser = argparse.ArgumentParser(description="Trace the latency distribution of an operation using usdt probes.",
+    formatter_class=argparse.RawDescriptionHelpFormatter)
+parser.add_argument("-p", "--pid", type=int, help="The id of the process to trace.")
+parser.add_argument("-i", "--interval", type=int, help="The interval in seconds on which to report the latency distribution.")
+parser.add_argument("-f", "--filterstr", type=str, default="", help="The prefix filter for the operation input. If specified, only operations for which the input string starts with the filterstr are traced.")
+parser.add_argument("-v", "--verbose", dest="verbose", action="store_true", help="If true, will output verbose logging information.")
+parser.set_defaults(verbose=False)
+args = parser.parse_args()
+this_pid = int(args.pid)
+this_interval = int(args.interval)
+this_filter = str(args.filterstr)
+
+if this_interval < 1:
+    print("Invalid value for interval, using 1.")
+    this_interval = 1
+
+debugLevel=0
+if args.verbose:
+    debugLevel=4
+
+# BPF program
+bpf_text_shared = "%s/bpf_text_shared.c" % os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
+bpf_text = open(bpf_text_shared, 'r').read()
+bpf_text += """
+
+/**
+ * @brief The key to use for the latency histogram.
+ */
+struct dist_key_t
+{
+    char input[64];   ///< The input string of the request.
+    u64 slot;         ///< The histogram slot.
+};
+
+/**
+ * @brief Contains the histogram for the operation latencies.
+ */
+BPF_HISTOGRAM(dist, struct dist_key_t);
+
+/**
+ * @brief Reads the operation response arguments, calculates the latency, and stores it in the histogram.
+ * @param ctx The BPF context.
+ */
+int trace_operation_end(struct pt_regs* ctx)
+{
+    u64 operation_id;
+    bpf_usdt_readarg(1, ctx, &operation_id);
+
+    struct start_data_t* start_data = start_hash.lookup(&operation_id);
+    if (0 == start_data) {
+        return 0;
+    }
+
+    u64 duration = bpf_ktime_get_ns() - start_data->start;
+    struct dist_key_t dist_key = {};
+    __builtin_memcpy(&dist_key.input, start_data->input, sizeof(dist_key.input));
+    dist_key.slot = bpf_log2l(duration / 1000);
+    start_hash.delete(&operation_id);
+
+    dist.increment(dist_key);
+    return 0;
+}
+"""
+
+bpf_text = bpf_text.replace("FILTER_STRING", this_filter)
+if this_filter:
+    bpf_text = bpf_text.replace("FILTER", "if (!filter(start_data.input)) { return 0; }")
+else:
+    bpf_text = bpf_text.replace("FILTER", "")
+
+# Create USDT context
+print("Attaching probes to pid %d" % this_pid)
+usdt_ctx = USDT(pid=this_pid)
+usdt_ctx.enable_probe(probe="operation_start", fn_name="trace_operation_start")
+usdt_ctx.enable_probe(probe="operation_end", fn_name="trace_operation_end")
+
+# Create BPF context, load BPF program
+bpf_ctx = BPF(text=bpf_text, usdt_contexts=[usdt_ctx], debug=debugLevel)
+
+start = 0
+dist = bpf_ctx.get_table("dist")
+while (1):
+    try:
+        sleep(this_interval)
+    except KeyboardInterrupt:
+        exit()
+
+    print("[%s]" % strftime("%H:%M:%S"))
+    dist.print_log2_hist("latency (us)")
diff --git a/examples/usdt_sample/scripts/latency.py b/examples/usdt_sample/scripts/latency.py
new file mode 100755 (executable)
index 0000000..7b033ed
--- /dev/null
@@ -0,0 +1,117 @@
+import argparse
+from time import sleep
+from sys import argv
+import ctypes as ct
+from bcc import BPF, USDT
+import inspect
+import os
+
+# Parse command line arguments
+parser = argparse.ArgumentParser(description="Trace the latency of an operation using usdt probes.",
+    formatter_class=argparse.RawDescriptionHelpFormatter)
+parser.add_argument("-p", "--pid", type=int, help="The id of the process to trace.")
+parser.add_argument("-f", "--filterstr", type=str, default="", help="The prefix filter for the operation input. If specified, only operations for which the input string starts with the filterstr are traced.")
+parser.add_argument("-v", "--verbose", dest="verbose", action="store_true", help="If true, will output verbose logging information.")
+parser.set_defaults(verbose=False)
+args = parser.parse_args()
+this_pid = int(args.pid)
+this_filter = str(args.filterstr)
+
+debugLevel=0
+if args.verbose:
+    debugLevel=4
+
+# BPF program
+bpf_text_shared = "%s/bpf_text_shared.c" % os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
+bpf_text = open(bpf_text_shared, 'r').read()
+bpf_text += """
+
+/**
+ * @brief Contains the latency data w.r.t. the complete operation from request to response.
+ */
+struct end_data_t
+{
+    u64 operation_id; ///< The id of the operation.
+    char input[64];   ///< The request (input) string.
+    char output[64];  ///< The response (output) string.
+    u64 start;        ///< The start timestamp of the operation.
+    u64 end;          ///< The end timestamp of the operation.
+    u64 duration;     ///< The duration of the operation.
+};
+
+/**
+ * The output buffer, which will be used to push the latency event data to user space.
+ */
+BPF_PERF_OUTPUT(operation_event);
+
+/**
+ * @brief Reads the operation response arguments, calculates the latency event data, and writes it to the user output buffer.
+ * @param ctx The BPF context.
+ */
+int trace_operation_end(struct pt_regs* ctx)
+{
+    u64 operation_id;
+    bpf_usdt_readarg(1, ctx, &operation_id);
+
+    struct start_data_t* start_data = start_hash.lookup(&operation_id);
+    if (0 == start_data) {
+        return 0;
+    }
+
+    struct end_data_t end_data = {};
+    end_data.operation_id = operation_id;
+    bpf_usdt_readarg_p(2, ctx, &end_data.output, sizeof(end_data.output));
+    end_data.end = bpf_ktime_get_ns();
+    end_data.start = start_data->start;
+    end_data.duration = end_data.end - end_data.start;
+    __builtin_memcpy(&end_data.input, start_data->input, sizeof(end_data.input));
+
+    start_hash.delete(&end_data.operation_id);
+
+    operation_event.perf_submit(ctx, &end_data, sizeof(end_data));
+    return 0;
+}
+"""
+
+bpf_text = bpf_text.replace("FILTER_STRING", this_filter)
+if this_filter:
+    bpf_text = bpf_text.replace("FILTER", "if (!filter(start_data.input)) { return 0; }")
+else:
+    bpf_text = bpf_text.replace("FILTER", "")
+
+# Create USDT context
+print("Attaching probes to pid %d" % this_pid)
+usdt_ctx = USDT(pid=this_pid)
+usdt_ctx.enable_probe(probe="operation_start", fn_name="trace_operation_start")
+usdt_ctx.enable_probe(probe="operation_end", fn_name="trace_operation_end")
+
+# Create BPF context, load BPF program
+bpf_ctx = BPF(text=bpf_text, usdt_contexts=[usdt_ctx], debug=debugLevel)
+
+# Define latency event and print function
+class OperationEventData(ct.Structure):
+  _fields_ = [("operation_id", ct.c_ulonglong),
+              ("input", ct.c_char * 64),
+              ("output", ct.c_char * 64),
+              ("start", ct.c_ulonglong),
+              ("end", ct.c_ulonglong),
+              ("duration", ct.c_ulonglong)]
+
+start = 0
+def print_event(cpu, data, size):
+    global start
+    event = ct.cast(data, ct.POINTER(OperationEventData)).contents
+    if start == 0:
+        start = event.start
+    time_s = (float(event.start - start)) / 1000000000
+    latency = (float(event.duration) / 1000)
+    print("%-18.9f %-10d %-32s %-32s %16d %16d %16d" % (time_s, event.operation_id, event.input, event.output, event.start, event.end, latency))
+
+# Print header
+print("Tracing... Hit Ctrl-C to end.")
+print("%-18s %-10s %-32s %-32s %16s %16s %16s" % ("time(s)", "id", "input", "output", "start (ns)", "end (ns)", "duration (us)"))
+
+# Output latency events
+bpf_ctx["operation_event"].open_perf_buffer(print_event)
+while 1:
+    bpf_ctx.kprobe_poll()
diff --git a/examples/usdt_sample/usdt_sample.md b/examples/usdt_sample/usdt_sample.md
new file mode 100644 (file)
index 0000000..c6b5a07
--- /dev/null
@@ -0,0 +1,163 @@
+Tested on Fedora25 4.11.3-200.fc25.x86_64, gcc (GCC) 6.3.1 20161221 (Red Hat 6.3.1-1)
+
+As an alternative to using ...bcc/tests/python/include/folly/tracing/StaticTracepoint.h,
+it's possible to use systemtap-sdt-devel.
+However, this is *not* required for this sample.
+```bash
+$ sudo dnf install systemtap-sdt-devel  # For Fedora25, other distro's might have differently named packages.
+```
+
+If using systemtap-sdt-devel, the following commands can be used to generate the corresponding header and object files:
+Also see the CMakeLists.txt file for an example how to do this using cmake.
+```bash
+$ dtrace -h -s usdt_sample_lib1/src/lib1_sdt.d -o usdt_sample_lib1/include/usdt_sample_lib1/lib1_sdt.h
+$ dtrace -G -s usdt_sample_lib1/src/lib1_sdt.d -o lib1_sdt.o
+```
+
+Build the sample:
+```bash
+$ pwd
+~/src/bcc
+$ mkdir -p examples/usdt_sample/build && pushd examples/usdt_sample/build
+$ cmake .. && make
+$ popd
+```
+
+After building, you should see the available probes:
+```bash
+$ python tools/tplist.py -l examples/usdt_sample/build/usdt_sample_lib1/libusdt_sample_lib1.so
+examples/usdt_sample/build/usdt_sample_lib1/libusdt_sample_lib1.so usdt_sample_lib1:operation_end
+examples/usdt_sample/build/usdt_sample_lib1/libusdt_sample_lib1.so usdt_sample_lib1:operation_start
+$ readelf -n examples/usdt_sample/build/usdt_sample_lib1/libusdt_sample_lib1.so
+
+Displaying notes found at file offset 0x000001c8 with length 0x00000024:
+  Owner                 Data size      Description
+  GNU                  0x00000014      NT_GNU_BUILD_ID (unique build ID bitstring)
+    Build ID: 3930c19f654990159563394669f2ed5281513302
+
+Displaying notes found at file offset 0x0001b9ec with length 0x000000c0:
+  Owner                 Data size      Description
+  stapsdt              0x00000047      NT_STAPSDT (SystemTap probe descriptors)
+    Provider: usdt_sample_lib1
+    Name: operation_end
+    Location: 0x000000000000ed6d, Base: 0x0000000000000000, Semaphore: 0x0000000000000000
+    Arguments: -8@%rbx -8@%rax
+  stapsdt              0x0000004e      NT_STAPSDT (SystemTap probe descriptors)
+    Provider: usdt_sample_lib1
+    Name: operation_start
+    Location: 0x000000000000ee2c, Base: 0x0000000000000000, Semaphore: 0x0000000000000000
+    Arguments: -8@-24(%rbp) -8@%rax
+```
+
+Start the usdt sample application:
+```bash
+$ examples/usdt_sample/build/usdt_sample_app1/usdt_sample_app1 "pf" 1 30 10 1 50
+Applying the following parameters:
+Input prefix: pf.
+Input range: [1, 30].
+Calls Per Second: 10.
+Latency range: [1, 50] ms.
+You can now run the bcc scripts, see usdt_sample.md for examples.
+pid: 25433
+Press ctrl-c to exit.
+```
+
+Use argdist.py on the individual probes:
+```bash
+$ sudo python tools/argdist.py -p 25433 -i 5 -C 'u:usdt_sample_lib1:operation_start():char*:arg2#input' -z 32
+[11:18:29]
+input
+       COUNT      EVENT
+       1          arg2 = pf_10
+       1          arg2 = pf_5
+       1          arg2 = pf_12
+       1          arg2 = pf_1
+       1          arg2 = pf_11
+       1          arg2 = pf_28
+       1          arg2 = pf_16
+       1          arg2 = pf_19
+       1          arg2 = pf_15
+       1          arg2 = pf_2
+       2          arg2 = pf_17
+       2          arg2 = pf_3
+       2          arg2 = pf_25
+       2          arg2 = pf_30
+       2          arg2 = pf_13
+       2          arg2 = pf_18
+       2          arg2 = pf_7
+       2          arg2 = pf_29
+       2          arg2 = pf_26
+       3          arg2 = pf_8
+       3          arg2 = pf_21
+       3          arg2 = pf_14
+       4          arg2 = pf_6
+       4          arg2 = pf_23
+       5          arg2 = pf_24
+```
+
+Use latency.py to trace the operation latencies:
+```bash
+$ sudo python examples/usdt_sample/scripts/latency.py -p=25433 -f="pf_2"
+Attaching probes to pid 25433
+Tracing... Hit Ctrl-C to end.
+time(s)            id         input                            output                                 start (ns)         end (ns)    duration (us)
+0.000000000        7204       pf_28                            resp_pf_28                         11949439999644   11949489234565            49234
+0.100211886        7205       pf_28                            resp_pf_28                         11949540211530   11949574403064            34191
+0.300586675        7207       pf_21                            resp_pf_21                         11949740586319   11949742773571             2187
+0.400774366        7208       pf_28                            resp_pf_28                         11949840774010   11949859965498            19191
+0.701365719        7211       pf_21                            resp_pf_21                         11950141365363   11950152551131            11185
+0.901736620        7213       pf_25                            resp_pf_25                         11950341736264   11950347924333             6188
+1.102162217        7215       pf_21                            resp_pf_21                         11950542161861   11950567484183            25322
+1.302595998        7217       pf_23                            resp_pf_23                         11950742595642   11950761841242            19245
+1.503047601        7219       pf_2                             resp_pf_2                          11950943047245   11950951213474             8166
+1.703371457        7221       pf_27                            resp_pf_27                         11951143371101   11951176568051            33196
+2.104228899        7225       pf_24                            resp_pf_24                         11951544228543   11951588432769            44204
+2.304608175        7227       pf_21                            resp_pf_21                         11951744607819   11951790796068            46188
+2.404796703        7228       pf_21                            resp_pf_21                         11951844796347   11951877984160            33187
+2.605134923        7230       pf_27                            resp_pf_27                         11952045134567   11952065327660            20193
+3.206291642        7236       pf_29                            resp_pf_29                         11952646291286   11952660443343            14152
+3.506887492        7239       pf_21                            resp_pf_21                         11952946887136   11952995060987            48173
+```
+
+Use lat_dist.py to trace the latency distribution:
+```bash
+$ sudo python examples/usdt_sample/scripts/lat_dist.py -p=25433 -i=30 -f="pf_20"
+Attaching probes to pid 25433
+[11:23:47]
+
+Bucket ptr = 'pf_20'
+     latency (us)        : count     distribution
+         0 -> 1          : 0        |                                        |
+         2 -> 3          : 0        |                                        |
+         4 -> 7          : 0        |                                        |
+         8 -> 15         : 0        |                                        |
+        16 -> 31         : 0        |                                        |
+        32 -> 63         : 0        |                                        |
+        64 -> 127        : 0        |                                        |
+       128 -> 255        : 0        |                                        |
+       256 -> 511        : 0        |                                        |
+       512 -> 1023       : 0        |                                        |
+      1024 -> 2047       : 1        |**********                              |
+      2048 -> 4095       : 1        |**********                              |
+      4096 -> 8191       : 0        |                                        |
+      8192 -> 16383      : 1        |**********                              |
+     16384 -> 32767      : 4        |****************************************|
+     32768 -> 65535      : 3        |******************************          |
+```
+
+Use lat_avg.py to trace the moving average of the latencies:
+```bash
+$ sudo python examples/usdt_sample/scripts/lat_avg.py -p=25433 -i=5 -c=10 -f="pf_2"
+Attaching probes to pid 25433
+Tracing... Hit Ctrl-C to end.
+[11:28:32]
+input                                                               count     latency (us)
+pf_22                                                                   3             7807
+pf_23                                                                   4            36914
+pf_25                                                                   3            31473
+pf_28                                                                   2            10627
+pf_27                                                                   1            47174
+pf_29                                                                   1             8138
+pf_26                                                                   1            49121
+pf_20                                                                   2            29158
+```
diff --git a/examples/usdt_sample/usdt_sample_app1/CMakeLists.txt b/examples/usdt_sample/usdt_sample_app1/CMakeLists.txt
new file mode 100755 (executable)
index 0000000..b447e21
--- /dev/null
@@ -0,0 +1,19 @@
+cmake_minimum_required(VERSION 3.0)
+project(usdt_sample_app1)
+
+include_directories(
+    ${USDT_SAMPLE_LIB1_INCLUDE_DIR}
+)
+
+link_directories(
+    ${USDT_SAMPLE_LIB1_LINK_DIR}
+)
+
+add_executable( ${PROJECT_NAME}
+    ${CMAKE_CURRENT_SOURCE_DIR}/main.cpp
+)
+
+target_link_libraries( ${PROJECT_NAME}
+    ${USDT_SAMPLE_LIB1_LIB}
+    pthread
+)
diff --git a/examples/usdt_sample/usdt_sample_app1/main.cpp b/examples/usdt_sample/usdt_sample_app1/main.cpp
new file mode 100644 (file)
index 0000000..c75c783
--- /dev/null
@@ -0,0 +1,145 @@
+// std
+#include <algorithm>
+#include <cstdlib>
+#include <iostream>
+#include <mutex>
+#include <queue>
+#include <random>
+#include <sstream>
+#include <string>
+#include <thread>
+
+// gnu-c
+#include <sys/types.h>
+#include <unistd.h>
+
+// usdt_sample_lib1
+#include "usdt_sample_lib1/lib1.h"
+
+void print_usage(int argc, char** argv)
+{
+    std::cout << "Usage:" << std::endl;
+    std::cout << argv[0]
+              << " <InputPrefix> <InputMinimum (1-50)> <InputMaximum (1-50)> <CallsPerSec (1-50)> <MinimumLatencyMs (1-50)> <MaximumLatencyMs (1-50)>"
+              << std::endl;
+    std::cout << "InputPrefix: Prefix of the input string to the operation. Default: dummy" << std::endl;
+    std::cout << "InputMinimum: Minimum number to make the input string to the operation somewhat unique. Default: 1" << std::endl;
+    std::cout << "InputMaximum: Maximum number to make the input string to the operation somewhat unique. Default: 50" << std::endl;
+    std::cout << "CallsPerSec: Rate of calls to the operation. Default: 10" << std::endl;
+    std::cout << "MinimumLatencyMs: Minimum latency to apply to the operation. Default: 20" << std::endl;
+    std::cout << "MaximumLatencyMs: Maximum latency to apply to the operation. Default: 40" << std::endl;
+}
+
+int main(int argc, char** argv)
+{
+    std::string inputPrefix("dummy");
+    std::uint32_t inputMinimum = 1;
+    std::uint32_t inputMaximum = 50;
+    std::uint32_t callsPerSec = 10;
+    std::uint32_t minLatMs = 20;
+    std::uint32_t maxLatMs = 40;
+
+    try {
+        if (argc > 1) {
+            inputPrefix = argv[1];
+        }
+
+        if (argc > 2) {
+            inputMinimum = static_cast<std::uint32_t>(std::max(1, std::min(50, std::atoi(argv[2]))));
+        }
+
+        if (argc > 3) {
+            inputMaximum = static_cast<std::uint32_t>(std::max(1, std::min(50, std::atoi(argv[3]))));
+        }
+
+        if (argc > 4) {
+            callsPerSec = static_cast<std::uint32_t>(std::max(1, std::min(50, std::atoi(argv[4]))));
+        }
+
+        if (argc > 5) {
+            minLatMs = static_cast<std::uint32_t>(std::max(1, std::min(50, std::atoi(argv[5]))));
+        }
+
+        if (argc > 6) {
+            maxLatMs = static_cast<std::uint32_t>(std::max(1, std::min(50, std::atoi(argv[6]))));
+        }
+    }
+    catch (const std::exception& exc) {
+        std::cout << "Exception while reading arguments: " << exc.what() << std::endl;
+        print_usage(argc, argv);
+        return -1;
+    }
+    catch (...) {
+        std::cout << "Unknown exception while reading arguments." << std::endl;
+        print_usage(argc, argv);
+        return -1;
+    }
+
+    if (inputMinimum > inputMaximum) {
+        std::cout << "InputMinimum must be smaller than InputMaximum." << std::endl;
+        print_usage(argc, argv);
+        return -1;
+    }
+
+    if (minLatMs > maxLatMs) {
+        std::cout << "MinimumLatencyMs must be smaller than MaximumLatencyMs." << std::endl;
+        print_usage(argc, argv);
+        return -1;
+    }
+
+    std::cout << "Applying the following parameters:" << std::endl
+              << "Input prefix: " << inputPrefix << "." << std::endl
+              << "Input range: [" << inputMinimum << ", " << inputMaximum << "]." << std::endl
+              << "Calls Per Second: " << callsPerSec << "." << std::endl
+              << "Latency range: [" << minLatMs << ", " << maxLatMs << "] ms." << std::endl;
+
+    const int sleepTimeMs = 1000 / callsPerSec;
+    OperationProvider op(minLatMs, maxLatMs);
+
+    std::mutex queueMutex;
+    std::queue<std::shared_future<OperationResponse>> responseQueue;
+
+    auto dequeueFuture = std::async(std::launch::async, [&]() {
+        while (true) {
+            bool empty = false;
+            {
+                std::lock_guard<std::mutex> lg(queueMutex);
+                empty = responseQueue.empty();
+            }
+
+            if (empty) {
+                std::this_thread::sleep_for(std::chrono::milliseconds(sleepTimeMs));
+                continue;
+            }
+
+            responseQueue.front().get();
+
+            // std::cout << "Removing item from queue." << std::endl;
+            std::lock_guard<std::mutex> lg(queueMutex);
+            responseQueue.pop();
+        }
+    });
+
+    std::random_device rd;
+    std::uniform_int_distribution<> dis(inputMinimum, inputMaximum);
+
+    std::cout << "You can now run the bcc scripts, see usdt_sample.md for examples." << std::endl;
+    std::cout << "pid: " << ::getpid() << std::endl;
+    std::cout << "Press ctrl-c to exit." << std::endl;
+    while (true) {
+        std::ostringstream inputOss;
+        inputOss << inputPrefix << "_" << dis(rd);
+        auto responseFuture = op.executeAsync(OperationRequest(inputOss.str()));
+
+        {
+            std::lock_guard<std::mutex> lg(queueMutex);
+            responseQueue.push(responseFuture);
+        }
+
+        // For a sample application, this is good enough to simulate callsPerSec.
+        std::this_thread::sleep_for(std::chrono::milliseconds(sleepTimeMs));
+    }
+
+    dequeueFuture.get();
+    return 0;
+}
diff --git a/examples/usdt_sample/usdt_sample_lib1/CMakeLists.txt b/examples/usdt_sample/usdt_sample_lib1/CMakeLists.txt
new file mode 100755 (executable)
index 0000000..3f1c7b2
--- /dev/null
@@ -0,0 +1,45 @@
+cmake_minimum_required(VERSION 3.0)
+project(usdt_sample_lib1)
+
+# Define variables.
+set(USDT_SAMPLE_LIB1_INCLUDE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/include CACHE STRING "USDT_SAMPLE_LIB1_INCLUDE_DIR" FORCE)
+set(USDT_SAMPLE_LIB1_SRC_DIR ${CMAKE_CURRENT_SOURCE_DIR}/src CACHE STRING "USDT_SAMPLE_LIB1_SRC_DIR" FORCE)
+set(USDT_SAMPLE_LIB1_LINK_DIR ${CMAKE_CURRENT_BINARY_DIR} CACHE STRING "USDT_SAMPLE_LIB1_LINK_DIR" FORCE)
+set(USDT_SAMPLE_LIB1_LIB ${PROJECT_NAME} CACHE STRING "USDT_SAMPLE_LIB1_LIB" FORCE)
+set(USDT_SAMPLE_LIB1_GENERATED ${CMAKE_CURRENT_BINARY_DIR}/generated)
+
+## Start - N.B. Following section only relevant when using systemtap-sdt-devel.
+
+# Create usdt header file.
+# N.B. ${USDT_SAMPLE_LIB1_INCLUDE_DIR}/usdt_sample_lib1/lib1_sdt.h must be removed manually in order for it to be (re-)created.
+#  i.e. after making changes to libt_sdt.d
+#add_custom_command(
+#    OUTPUT ${USDT_SAMPLE_LIB1_INCLUDE_DIR}/usdt_sample_lib1/lib1_sdt.h
+#    PRE_BUILD
+#    COMMAND dtrace -h -s ${USDT_SAMPLE_LIB1_SRC_DIR}/lib1_sdt.d -o ${USDT_SAMPLE_LIB1_INCLUDE_DIR}/usdt_sample_lib1/lib1_sdt.h
+#    COMMENT "Create usdt probes header file"
+#)
+
+# Create usdt object file.
+#file(MAKE_DIRECTORY ${USDT_SAMPLE_LIB1_GENERATED})
+#add_custom_command(
+#    OUTPUT ${USDT_SAMPLE_LIB1_GENERATED}/lib1_sdt.o
+#    PRE_BUILD
+#    COMMAND dtrace -G -s ${USDT_SAMPLE_LIB1_SRC_DIR}/lib1_sdt.d -o ${USDT_SAMPLE_LIB1_GENERATED}/lib1_sdt.o
+#    COMMENT "Create usdt probes object file"
+#)
+
+## End
+
+include_directories(
+    ${USDT_SAMPLE_LIB1_INCLUDE_DIR}
+    # For folly StaticTracepoint.h:
+    ${CMAKE_CURRENT_SOURCE_DIR}/../../../tests/python/include
+)
+
+add_library( ${PROJECT_NAME} SHARED
+## Only relevant when using systemtap-sdt-devel
+#    ${USDT_SAMPLE_LIB1_INCLUDE_DIR}/usdt_sample_lib1/lib1_sdt.h
+#    ${USDT_SAMPLE_LIB1_GENERATED}/lib1_sdt.o
+    ${USDT_SAMPLE_LIB1_SRC_DIR}/lib1.cpp
+)
diff --git a/examples/usdt_sample/usdt_sample_lib1/include/usdt_sample_lib1/lib1.h b/examples/usdt_sample/usdt_sample_lib1/include/usdt_sample_lib1/lib1.h
new file mode 100644 (file)
index 0000000..1a9d13e
--- /dev/null
@@ -0,0 +1,58 @@
+#pragma once
+
+// std
+#include <cstdint>
+#include <future>
+#include <random>
+#include <string>
+
+/**
+ * @brief Contains the operation request data.
+ */
+class OperationRequest
+{
+public:
+    OperationRequest(const std::string& input);
+    const std::string& input() const { return _input; }
+
+private:
+    std::string _input;
+};
+
+/**
+ * @brief Contains the operation response data.
+ */
+class OperationResponse
+{
+public:
+    OperationResponse(const std::string& output);
+    const std::string& output() const { return _output; }
+
+private:
+    std::string _output;
+};
+
+/**
+ * @brief Provides the operation.
+ */
+class OperationProvider
+{
+public:
+    /**
+     * @brief Constructs an instance of OperationProvider.
+     * @param minLatencyMs The minimum latency to simulate for the operation.
+     * @param maxLatencyMs The maximum latency to simulate for the operation.
+     */
+    OperationProvider(std::uint32_t minLatencyMs, std::uint32_t maxLatencyMs);
+
+    /**
+     * @brief Asynchronously executes the operation.
+     * @param request The request input data for the operation.
+     * @return A shared_future of the response of the operation.
+     */
+    std::shared_future<OperationResponse> executeAsync(const OperationRequest& request);
+
+private:
+    std::mt19937 _gen;                    ///< Used randomly determine an operation latency to simulate.
+    std::uniform_int_distribution<> _dis; ///< Used randomly determine an operation latency to simulate.
+};
diff --git a/examples/usdt_sample/usdt_sample_lib1/include/usdt_sample_lib1/lib1_sdt.h b/examples/usdt_sample/usdt_sample_lib1/include/usdt_sample_lib1/lib1_sdt.h
new file mode 100644 (file)
index 0000000..6b8a51a
--- /dev/null
@@ -0,0 +1,36 @@
+// N.B. This file is not used by this usdt_sample. Instead, the StaticTracepoint.h file from folly is used.
+//  It is here only for demonstration purposes.
+
+/* Generated by the Systemtap dtrace wrapper */
+
+
+#define _SDT_HAS_SEMAPHORES 1
+
+
+#define STAP_HAS_SEMAPHORES 1 /* deprecated */
+
+
+#include <sys/sdt.h>
+
+/* USDT_SAMPLE_LIB1_OPERATION_START ( uint64_t operation_id, const char * input ) */
+#if defined STAP_SDT_V1
+#define USDT_SAMPLE_LIB1_OPERATION_START_ENABLED() __builtin_expect (operation_start_semaphore, 0)
+#define usdt_sample_lib1_operation_start_semaphore operation_start_semaphore
+#else
+#define USDT_SAMPLE_LIB1_OPERATION_START_ENABLED() __builtin_expect (usdt_sample_lib1_operation_start_semaphore, 0)
+#endif
+__extension__ extern unsigned short usdt_sample_lib1_operation_start_semaphore __attribute__ ((unused)) __attribute__ ((section (".probes")));
+#define USDT_SAMPLE_LIB1_OPERATION_START(arg1, arg2) \
+DTRACE_PROBE2 (usdt_sample_lib1, operation_start, arg1, arg2)
+
+/* USDT_SAMPLE_LIB1_OPERATION_END ( uint64_t operation_id, const char * output ) */
+#if defined STAP_SDT_V1
+#define USDT_SAMPLE_LIB1_OPERATION_END_ENABLED() __builtin_expect (operation_end_semaphore, 0)
+#define usdt_sample_lib1_operation_end_semaphore operation_end_semaphore
+#else
+#define USDT_SAMPLE_LIB1_OPERATION_END_ENABLED() __builtin_expect (usdt_sample_lib1_operation_end_semaphore, 0)
+#endif
+__extension__ extern unsigned short usdt_sample_lib1_operation_end_semaphore __attribute__ ((unused)) __attribute__ ((section (".probes")));
+#define USDT_SAMPLE_LIB1_OPERATION_END(arg1, arg2) \
+DTRACE_PROBE2 (usdt_sample_lib1, operation_end, arg1, arg2)
+
diff --git a/examples/usdt_sample/usdt_sample_lib1/src/lib1.cpp b/examples/usdt_sample/usdt_sample_lib1/src/lib1.cpp
new file mode 100644 (file)
index 0000000..f19a7ea
--- /dev/null
@@ -0,0 +1,64 @@
+#include "usdt_sample_lib1/lib1.h"
+
+// std
+#include <atomic>
+#include <chrono>
+#include <iostream>
+#include <thread>
+
+// usdt_sample_lib1
+#include "folly/tracing/StaticTracepoint.h"
+
+// When using systemtap-sdt-devel, the following file should be included:
+// #include "usdt_sample_lib1/lib1_sdt.h"
+
+OperationRequest::OperationRequest(const std::string& input_)
+    : _input(input_)
+{
+}
+
+OperationResponse::OperationResponse(const std::string& output_)
+    : _output(output_)
+{
+}
+
+OperationProvider::OperationProvider(std::uint32_t minLatencyMs_, std::uint32_t maxLatencyMs_)
+    : _gen(std::random_device()())
+    , _dis(minLatencyMs_, maxLatencyMs_)
+{
+}
+
+std::shared_future<OperationResponse> OperationProvider::executeAsync(const OperationRequest& request)
+{
+    static std::atomic<std::uint64_t> operationIdCounter(0);
+    std::uint64_t operationId = operationIdCounter++;
+
+    FOLLY_SDT(usdt_sample_lib1, operation_start, operationId, request.input().c_str());
+
+/* Below an example of how to use this sample with systemtap-sdt-devel:
+    if (USDT_SAMPLE_LIB1_OPERATION_START_ENABLED()) {
+        //std::cout << "operation_start probe enabled." << std::endl;
+        USDT_SAMPLE_LIB1_OPERATION_START(operationId, &inputBuf);
+    }
+*/
+
+    auto latencyMs = _dis(_gen);
+
+    return std::async(std::launch::async, [latencyMs, operationId, request]() {
+        std::this_thread::sleep_for(std::chrono::milliseconds(latencyMs));
+
+        auto output = std::string("resp_") + request.input();
+        OperationResponse response(output);
+
+        FOLLY_SDT(usdt_sample_lib1, operation_end, operationId, response.output().c_str());
+
+/* Below an example of how to use this sample with systemtap-sdt-devel:
+        if (USDT_SAMPLE_LIB1_OPERATION_END_ENABLED()) {
+            //std::cout << "operation_end probe enabled." << std::endl;
+            USDT_SAMPLE_LIB1_OPERATION_END(operationId, &outputBuf);
+        }
+*/
+
+        return response;
+    });
+}
diff --git a/examples/usdt_sample/usdt_sample_lib1/src/lib1_sdt.d b/examples/usdt_sample/usdt_sample_lib1/src/lib1_sdt.d
new file mode 100644 (file)
index 0000000..4f7129e
--- /dev/null
@@ -0,0 +1,7 @@
+# This file is only relevant when using systemtap-sdt-devel (see usdt_sample.md).
+#  This usdt_sample uses the StaticTracepoint.h header file (from folly) instead.
+provider usdt_sample_lib1
+{
+    probe operation_start(uint64_t operation_id, const char* input);
+    probe operation_end(uint64_t operation_id, const char* output);
+};
index 0aa4c95..d1f0ca0 100755 (executable)
@@ -11,6 +11,7 @@ from unittest import main, TestCase
 from subprocess import Popen, PIPE
 from tempfile import NamedTemporaryFile
 import ctypes as ct
+import inspect
 import os
 import signal
 
@@ -19,18 +20,36 @@ class TestUDST(TestCase):
         # Application, minimum, to define three trace points
         app_text = b"""
 #include <unistd.h>
-#include <folly/tracing/StaticTracepoint.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+#include "folly/tracing/StaticTracepoint.h"
 
 int main() {
   char s[100];
   int i, a = 200, b = 40;
   for (i = 0; i < 100; i++) s[i] = (i & 7) + (i & 6);
+  uint64_t j = 0;
+  char s1[64];
+  const char* str = "str";
+  size_t len = strlen(str);
   while (1) {
     FOLLY_SDT(test, probe_point_1, s[7], b);
     FOLLY_SDT(test, probe_point_3, a, b);
     FOLLY_SDT(test, probe_point_1, s[4], a);
     FOLLY_SDT(test, probe_point_2, 5, s[10]);
     FOLLY_SDT(test, probe_point_3, s[4], s[7]);
+
+    memset(&s1, '\0', sizeof(s1));
+    strncpy(s1, str, len);
+    snprintf(s1 + len, sizeof(s1) - len, "%d", j);
+    FOLLY_SDT(test, probe_point_4, j++, &s1);
+
+    memset(&s1, '\0', sizeof(s1));
+    strncpy(s1, str, len);
+    snprintf(s1 + len, sizeof(s1) - len, "%d", j);
+    FOLLY_SDT(test, probe_point_5, &s1, j++);
+
     sleep(1);
   }
   return 1;
@@ -38,6 +57,7 @@ int main() {
 """
         # BPF program
         self.bpf_text = """
+#include <linux/blkdev.h>
 #include <uapi/linux/ptrace.h>
 
 struct probe_result_t1 {
@@ -55,10 +75,23 @@ struct probe_result_t3 {
   int v2;
 };
 
+struct probe_result_t4 {
+  u64  v1;
+  char v2[8];
+};
+
+struct probe_result_t5 {
+  char v1[8];
+  u64  v2;
+};
+
 BPF_PERF_OUTPUT(event1);
 BPF_PERF_OUTPUT(event2);
 BPF_PERF_OUTPUT(event3);
-int do_trace(struct pt_regs *ctx) {
+BPF_PERF_OUTPUT(event4);
+BPF_PERF_OUTPUT(event5);
+
+int do_trace1(struct pt_regs *ctx) {
     struct probe_result_t1 result = {};
     bpf_usdt_readarg(1, ctx, &result.v1);
     bpf_usdt_readarg(2, ctx, &result.v2);
@@ -79,12 +112,26 @@ int do_trace3(struct pt_regs *ctx) {
     event3.perf_submit(ctx, &result, sizeof(result));
     return 0;
 }
+int do_trace4(struct pt_regs *ctx) {
+    struct probe_result_t4 result = {};
+    bpf_usdt_readarg(1, ctx, &result.v1);
+    bpf_usdt_readarg_p(2, ctx, &result.v2, sizeof(result.v2));
+    event4.perf_submit(ctx, &result, sizeof(result));
+    return 0;
+}
+int do_trace5(struct pt_regs *ctx) {
+    struct probe_result_t5 result = {};
+    bpf_usdt_readarg_p(1, ctx, &result.v1, sizeof(result.v1));
+    bpf_usdt_readarg(2, ctx, &result.v2);
+    event5.perf_submit(ctx, &result, sizeof(result));
+    return 0;
+}
 """
 
         # Compile and run the application
         self.ftemp = NamedTemporaryFile(delete=False)
         self.ftemp.close()
-        comp = Popen(["gcc", "-I", "%s/include" % os.getcwd(),
+        comp = Popen(["gcc", "-I", "%s/include" % os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe()))),
                       "-x", "c", "-o", self.ftemp.name, "-"],
                      stdin=PIPE)
         comp.stdin.write(app_text)
@@ -95,10 +142,12 @@ int do_trace3(struct pt_regs *ctx) {
     def test_attach1(self):
         # enable USDT probe from given PID and verifier generated BPF programs
         u = USDT(pid=int(self.app.pid))
-        u.enable_probe(probe="probe_point_1", fn_name="do_trace")
+        u.enable_probe(probe="probe_point_1", fn_name="do_trace1")
         u.enable_probe(probe="probe_point_2", fn_name="do_trace2")
         u.enable_probe(probe="probe_point_3", fn_name="do_trace3")
-        b = BPF(text=self.bpf_text, usdt_contexts=[u])
+        u.enable_probe(probe="probe_point_4", fn_name="do_trace4")
+        u.enable_probe(probe="probe_point_5", fn_name="do_trace5")
+        b = BPF(text=self.bpf_text, usdt_contexts=[u], debug=4)
 
         # Event states for each event:
         # 0 - probe not caught, 1 - probe caught with correct value,
@@ -120,6 +169,14 @@ int do_trace3(struct pt_regs *ctx) {
             _fields_ = [("v1", ct.c_int),
                         ("v2", ct.c_int)]
 
+        class Data4(ct.Structure):
+            _fields_ = [("v1", ct.c_ulonglong),
+                        ("v2", ct.c_char * 64)]
+
+        class Data5(ct.Structure):
+            _fields_ = [("v1", ct.c_char * 64),
+                        ("v2", ct.c_ulonglong)]
+
         def check_event_val(event, event_state, v1, v2, v3, v4):
             if ((event.v1 == v1 and event.v2 == v2) or (event.v1 == v3 and event.v2 == v4)):
                 if (event_state == 0 or event_state == 1):
@@ -139,10 +196,20 @@ int do_trace3(struct pt_regs *ctx) {
             event = ct.cast(data, ct.POINTER(Data3)).contents
             self.evt_st_3 = check_event_val(event, self.evt_st_3, 200, 40, 8, 13)
 
+        def print_event4(cpu, data, size):
+            event = ct.cast(data, ct.POINTER(Data4)).contents
+            print("%s" % event.v2)
+
+        def print_event5(cpu, data, size):
+            event = ct.cast(data, ct.POINTER(Data5)).contents
+            print("%s" % event.v1)
+
         # loop with callback to print_event
         b["event1"].open_perf_buffer(print_event1)
         b["event2"].open_perf_buffer(print_event2)
         b["event3"].open_perf_buffer(print_event3)
+        b["event4"].open_perf_buffer(print_event4)
+        b["event5"].open_perf_buffer(print_event5)
 
         # three iterations to make sure we get some probes and have time to process them
         for i in range(3):