--- /dev/null
+cmake_minimum_required(VERSION 3.0)
+
+# This sample requires C++11 enabled.
+set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -Wall -Weffc++")
+
+add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/usdt_sample_lib1)
+add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/usdt_sample_app1)
--- /dev/null
+#include <linux/blkdev.h>
+#include <uapi/linux/ptrace.h>
+
+/**
+ * @brief Helper method to filter based on the specified inputString.
+ * @param inputString The operation input string to check against the filter.
+ * @return True if the specified inputString starts with the hard-coded FILTER_STRING; otherwise, false.
+ */
+static inline bool filter(char const* inputString)
+{
+ char needle[] = "FILTER_STRING"; ///< The FILTER STRING is replaced by python code.
+ char haystack[sizeof(needle)] = {};
+ bpf_probe_read(&haystack, sizeof(haystack), (void*)inputString);
+ for (int i = 0; i < sizeof(needle) - 1; ++i) {
+ if (needle[i] != haystack[i]) {
+ return false;
+ }
+ }
+ return true;
+}
+
+/**
+ * @brief Contains the operation start data to trace.
+ */
+struct start_data_t
+{
+ u64 operation_id; ///< The id of the operation.
+ char input[64]; ///< The input string of the request.
+ u64 start; ///< Timestamp of the start operation (start timestamp).
+};
+
+/**
+ * @brief Contains the operation start data.
+ * key: the operation id.
+ * value: The operation start latency data.
+ */
+BPF_HASH(start_hash, u64, struct start_data_t);
+
+/**
+ * @brief Reads the operation request arguments and stores the start data in the hash.
+ * @param ctx The BPF context.
+ */
+int trace_operation_start(struct pt_regs* ctx)
+{
+ struct start_data_t start_data = {};
+ bpf_usdt_readarg_p(2, ctx, &start_data.input, sizeof(start_data.input));
+
+ FILTER ///< Replaced by python code.
+
+ bpf_usdt_readarg(1, ctx, &start_data.operation_id);
+
+ start_data.start = bpf_ktime_get_ns();
+ start_hash.update(&start_data.operation_id, &start_data);
+ return 0;
+}
--- /dev/null
+import argparse
+from time import sleep, strftime
+from sys import argv
+import ctypes as ct
+from bcc import BPF, USDT
+import inspect
+import os
+
+# Parse command line arguments
+parser = argparse.ArgumentParser(description="Trace the moving average of the latency of an operation using usdt probes.",
+ formatter_class=argparse.RawDescriptionHelpFormatter)
+parser.add_argument("-p", "--pid", type=int, help="The id of the process to trace.")
+parser.add_argument("-i", "--interval", type=int, help="The interval in seconds on which to report the latency distribution.")
+parser.add_argument("-c", "--count", type=int, default=16, help="The count of samples over which to calculate the moving average.")
+parser.add_argument("-f", "--filterstr", type=str, default="", help="The prefix filter for the operation input. If specified, only operations for which the input string starts with the filterstr are traced.")
+parser.add_argument("-v", "--verbose", dest="verbose", action="store_true", help="If true, will output verbose logging information.")
+parser.set_defaults(verbose=False)
+args = parser.parse_args()
+this_pid = int(args.pid)
+this_interval = int(args.interval)
+this_count = int(args.count)
+this_filter = str(args.filterstr)
+
+if this_interval < 1:
+ print("Invalid value for interval, using 1.")
+ this_interval = 1
+
+if this_count < 1:
+ print("Invalid value for count, using 1.")
+ this_count = 1
+
+debugLevel=0
+if args.verbose:
+ debugLevel=4
+
+# BPF program
+bpf_text_shared = "%s/bpf_text_shared.c" % os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
+bpf_text = open(bpf_text_shared, 'r').read()
+bpf_text += """
+
+const u32 MAX_SAMPLES = SAMPLE_COUNT;
+
+struct hash_key_t
+{
+ char input[64];
+};
+
+struct hash_leaf_t
+{
+ u32 count;
+ u64 total;
+ u64 average;
+};
+
+/**
+ * @brief Contains the averages for the operation latencies by operation input.
+ */
+BPF_HASH(lat_hash, struct hash_key_t, struct hash_leaf_t, 512);
+
+/**
+ * @brief Reads the operation response arguments, calculates the latency, and stores it in the histogram.
+ * @param ctx The BPF context.
+ */
+int trace_operation_end(struct pt_regs* ctx)
+{
+ u64 operation_id;
+ bpf_usdt_readarg(1, ctx, &operation_id);
+
+ struct start_data_t* start_data = start_hash.lookup(&operation_id);
+ if (0 == start_data) {
+ return 0;
+ }
+
+ u64 duration = bpf_ktime_get_ns() - start_data->start;
+ struct hash_key_t hash_key = {};
+ __builtin_memcpy(&hash_key.input, start_data->input, sizeof(hash_key.input));
+ start_hash.delete(&operation_id);
+
+ struct hash_leaf_t zero = {};
+ struct hash_leaf_t* hash_leaf = lat_hash.lookup_or_init(&hash_key, &zero);
+ if (0 == hash_leaf) {
+ return 0;
+ }
+
+ if (hash_leaf->count < MAX_SAMPLES) {
+ hash_leaf->count++;
+ } else {
+ hash_leaf->total -= hash_leaf->average;
+ }
+
+ hash_leaf->total += duration;
+ hash_leaf->average = hash_leaf->total / hash_leaf->count;
+
+ return 0;
+}
+"""
+
+bpf_text = bpf_text.replace("SAMPLE_COUNT", str(this_count))
+bpf_text = bpf_text.replace("FILTER_STRING", this_filter)
+if this_filter:
+ bpf_text = bpf_text.replace("FILTER", "if (!filter(start_data.input)) { return 0; }")
+else:
+ bpf_text = bpf_text.replace("FILTER", "")
+
+# Create USDT context
+print("Attaching probes to pid %d" % this_pid)
+usdt_ctx = USDT(pid=this_pid)
+usdt_ctx.enable_probe(probe="operation_start", fn_name="trace_operation_start")
+usdt_ctx.enable_probe(probe="operation_end", fn_name="trace_operation_end")
+
+# Create BPF context, load BPF program
+bpf_ctx = BPF(text=bpf_text, usdt_contexts=[usdt_ctx], debug=debugLevel)
+
+print("Tracing... Hit Ctrl-C to end.")
+
+lat_hash = bpf_ctx.get_table("lat_hash")
+while (1):
+ try:
+ sleep(this_interval)
+ except KeyboardInterrupt:
+ exit()
+
+ print("[%s]" % strftime("%H:%M:%S"))
+ print("%-64s %8s %16s" % ("input", "count", "latency (us)"))
+ for k, v in lat_hash.items():
+ print("%-64s %8d %16d" % (k.input, v.count, v.average / 1000))
--- /dev/null
+import argparse
+from time import sleep, strftime
+from sys import argv
+import ctypes as ct
+from bcc import BPF, USDT
+import inspect
+import os
+
+# Parse command line arguments
+parser = argparse.ArgumentParser(description="Trace the latency distribution of an operation using usdt probes.",
+ formatter_class=argparse.RawDescriptionHelpFormatter)
+parser.add_argument("-p", "--pid", type=int, help="The id of the process to trace.")
+parser.add_argument("-i", "--interval", type=int, help="The interval in seconds on which to report the latency distribution.")
+parser.add_argument("-f", "--filterstr", type=str, default="", help="The prefix filter for the operation input. If specified, only operations for which the input string starts with the filterstr are traced.")
+parser.add_argument("-v", "--verbose", dest="verbose", action="store_true", help="If true, will output verbose logging information.")
+parser.set_defaults(verbose=False)
+args = parser.parse_args()
+this_pid = int(args.pid)
+this_interval = int(args.interval)
+this_filter = str(args.filterstr)
+
+if this_interval < 1:
+ print("Invalid value for interval, using 1.")
+ this_interval = 1
+
+debugLevel=0
+if args.verbose:
+ debugLevel=4
+
+# BPF program
+bpf_text_shared = "%s/bpf_text_shared.c" % os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
+bpf_text = open(bpf_text_shared, 'r').read()
+bpf_text += """
+
+/**
+ * @brief The key to use for the latency histogram.
+ */
+struct dist_key_t
+{
+ char input[64]; ///< The input string of the request.
+ u64 slot; ///< The histogram slot.
+};
+
+/**
+ * @brief Contains the histogram for the operation latencies.
+ */
+BPF_HISTOGRAM(dist, struct dist_key_t);
+
+/**
+ * @brief Reads the operation response arguments, calculates the latency, and stores it in the histogram.
+ * @param ctx The BPF context.
+ */
+int trace_operation_end(struct pt_regs* ctx)
+{
+ u64 operation_id;
+ bpf_usdt_readarg(1, ctx, &operation_id);
+
+ struct start_data_t* start_data = start_hash.lookup(&operation_id);
+ if (0 == start_data) {
+ return 0;
+ }
+
+ u64 duration = bpf_ktime_get_ns() - start_data->start;
+ struct dist_key_t dist_key = {};
+ __builtin_memcpy(&dist_key.input, start_data->input, sizeof(dist_key.input));
+ dist_key.slot = bpf_log2l(duration / 1000);
+ start_hash.delete(&operation_id);
+
+ dist.increment(dist_key);
+ return 0;
+}
+"""
+
+bpf_text = bpf_text.replace("FILTER_STRING", this_filter)
+if this_filter:
+ bpf_text = bpf_text.replace("FILTER", "if (!filter(start_data.input)) { return 0; }")
+else:
+ bpf_text = bpf_text.replace("FILTER", "")
+
+# Create USDT context
+print("Attaching probes to pid %d" % this_pid)
+usdt_ctx = USDT(pid=this_pid)
+usdt_ctx.enable_probe(probe="operation_start", fn_name="trace_operation_start")
+usdt_ctx.enable_probe(probe="operation_end", fn_name="trace_operation_end")
+
+# Create BPF context, load BPF program
+bpf_ctx = BPF(text=bpf_text, usdt_contexts=[usdt_ctx], debug=debugLevel)
+
+start = 0
+dist = bpf_ctx.get_table("dist")
+while (1):
+ try:
+ sleep(this_interval)
+ except KeyboardInterrupt:
+ exit()
+
+ print("[%s]" % strftime("%H:%M:%S"))
+ dist.print_log2_hist("latency (us)")
--- /dev/null
+import argparse
+from time import sleep
+from sys import argv
+import ctypes as ct
+from bcc import BPF, USDT
+import inspect
+import os
+
+# Parse command line arguments
+parser = argparse.ArgumentParser(description="Trace the latency of an operation using usdt probes.",
+ formatter_class=argparse.RawDescriptionHelpFormatter)
+parser.add_argument("-p", "--pid", type=int, help="The id of the process to trace.")
+parser.add_argument("-f", "--filterstr", type=str, default="", help="The prefix filter for the operation input. If specified, only operations for which the input string starts with the filterstr are traced.")
+parser.add_argument("-v", "--verbose", dest="verbose", action="store_true", help="If true, will output verbose logging information.")
+parser.set_defaults(verbose=False)
+args = parser.parse_args()
+this_pid = int(args.pid)
+this_filter = str(args.filterstr)
+
+debugLevel=0
+if args.verbose:
+ debugLevel=4
+
+# BPF program
+bpf_text_shared = "%s/bpf_text_shared.c" % os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
+bpf_text = open(bpf_text_shared, 'r').read()
+bpf_text += """
+
+/**
+ * @brief Contains the latency data w.r.t. the complete operation from request to response.
+ */
+struct end_data_t
+{
+ u64 operation_id; ///< The id of the operation.
+ char input[64]; ///< The request (input) string.
+ char output[64]; ///< The response (output) string.
+ u64 start; ///< The start timestamp of the operation.
+ u64 end; ///< The end timestamp of the operation.
+ u64 duration; ///< The duration of the operation.
+};
+
+/**
+ * The output buffer, which will be used to push the latency event data to user space.
+ */
+BPF_PERF_OUTPUT(operation_event);
+
+/**
+ * @brief Reads the operation response arguments, calculates the latency event data, and writes it to the user output buffer.
+ * @param ctx The BPF context.
+ */
+int trace_operation_end(struct pt_regs* ctx)
+{
+ u64 operation_id;
+ bpf_usdt_readarg(1, ctx, &operation_id);
+
+ struct start_data_t* start_data = start_hash.lookup(&operation_id);
+ if (0 == start_data) {
+ return 0;
+ }
+
+ struct end_data_t end_data = {};
+ end_data.operation_id = operation_id;
+ bpf_usdt_readarg_p(2, ctx, &end_data.output, sizeof(end_data.output));
+ end_data.end = bpf_ktime_get_ns();
+ end_data.start = start_data->start;
+ end_data.duration = end_data.end - end_data.start;
+ __builtin_memcpy(&end_data.input, start_data->input, sizeof(end_data.input));
+
+ start_hash.delete(&end_data.operation_id);
+
+ operation_event.perf_submit(ctx, &end_data, sizeof(end_data));
+ return 0;
+}
+"""
+
+bpf_text = bpf_text.replace("FILTER_STRING", this_filter)
+if this_filter:
+ bpf_text = bpf_text.replace("FILTER", "if (!filter(start_data.input)) { return 0; }")
+else:
+ bpf_text = bpf_text.replace("FILTER", "")
+
+# Create USDT context
+print("Attaching probes to pid %d" % this_pid)
+usdt_ctx = USDT(pid=this_pid)
+usdt_ctx.enable_probe(probe="operation_start", fn_name="trace_operation_start")
+usdt_ctx.enable_probe(probe="operation_end", fn_name="trace_operation_end")
+
+# Create BPF context, load BPF program
+bpf_ctx = BPF(text=bpf_text, usdt_contexts=[usdt_ctx], debug=debugLevel)
+
+# Define latency event and print function
+class OperationEventData(ct.Structure):
+ _fields_ = [("operation_id", ct.c_ulonglong),
+ ("input", ct.c_char * 64),
+ ("output", ct.c_char * 64),
+ ("start", ct.c_ulonglong),
+ ("end", ct.c_ulonglong),
+ ("duration", ct.c_ulonglong)]
+
+start = 0
+def print_event(cpu, data, size):
+ global start
+ event = ct.cast(data, ct.POINTER(OperationEventData)).contents
+ if start == 0:
+ start = event.start
+ time_s = (float(event.start - start)) / 1000000000
+ latency = (float(event.duration) / 1000)
+ print("%-18.9f %-10d %-32s %-32s %16d %16d %16d" % (time_s, event.operation_id, event.input, event.output, event.start, event.end, latency))
+
+# Print header
+print("Tracing... Hit Ctrl-C to end.")
+print("%-18s %-10s %-32s %-32s %16s %16s %16s" % ("time(s)", "id", "input", "output", "start (ns)", "end (ns)", "duration (us)"))
+
+# Output latency events
+bpf_ctx["operation_event"].open_perf_buffer(print_event)
+while 1:
+ bpf_ctx.kprobe_poll()
--- /dev/null
+Tested on Fedora25 4.11.3-200.fc25.x86_64, gcc (GCC) 6.3.1 20161221 (Red Hat 6.3.1-1)
+
+As an alternative to using ...bcc/tests/python/include/folly/tracing/StaticTracepoint.h,
+it's possible to use systemtap-sdt-devel.
+However, this is *not* required for this sample.
+```bash
+$ sudo dnf install systemtap-sdt-devel # For Fedora25, other distro's might have differently named packages.
+```
+
+If using systemtap-sdt-devel, the following commands can be used to generate the corresponding header and object files:
+Also see the CMakeLists.txt file for an example how to do this using cmake.
+```bash
+$ dtrace -h -s usdt_sample_lib1/src/lib1_sdt.d -o usdt_sample_lib1/include/usdt_sample_lib1/lib1_sdt.h
+$ dtrace -G -s usdt_sample_lib1/src/lib1_sdt.d -o lib1_sdt.o
+```
+
+Build the sample:
+```bash
+$ pwd
+~/src/bcc
+$ mkdir -p examples/usdt_sample/build && pushd examples/usdt_sample/build
+$ cmake .. && make
+$ popd
+```
+
+After building, you should see the available probes:
+```bash
+$ python tools/tplist.py -l examples/usdt_sample/build/usdt_sample_lib1/libusdt_sample_lib1.so
+examples/usdt_sample/build/usdt_sample_lib1/libusdt_sample_lib1.so usdt_sample_lib1:operation_end
+examples/usdt_sample/build/usdt_sample_lib1/libusdt_sample_lib1.so usdt_sample_lib1:operation_start
+$ readelf -n examples/usdt_sample/build/usdt_sample_lib1/libusdt_sample_lib1.so
+
+Displaying notes found at file offset 0x000001c8 with length 0x00000024:
+ Owner Data size Description
+ GNU 0x00000014 NT_GNU_BUILD_ID (unique build ID bitstring)
+ Build ID: 3930c19f654990159563394669f2ed5281513302
+
+Displaying notes found at file offset 0x0001b9ec with length 0x000000c0:
+ Owner Data size Description
+ stapsdt 0x00000047 NT_STAPSDT (SystemTap probe descriptors)
+ Provider: usdt_sample_lib1
+ Name: operation_end
+ Location: 0x000000000000ed6d, Base: 0x0000000000000000, Semaphore: 0x0000000000000000
+ Arguments: -8@%rbx -8@%rax
+ stapsdt 0x0000004e NT_STAPSDT (SystemTap probe descriptors)
+ Provider: usdt_sample_lib1
+ Name: operation_start
+ Location: 0x000000000000ee2c, Base: 0x0000000000000000, Semaphore: 0x0000000000000000
+ Arguments: -8@-24(%rbp) -8@%rax
+```
+
+Start the usdt sample application:
+```bash
+$ examples/usdt_sample/build/usdt_sample_app1/usdt_sample_app1 "pf" 1 30 10 1 50
+Applying the following parameters:
+Input prefix: pf.
+Input range: [1, 30].
+Calls Per Second: 10.
+Latency range: [1, 50] ms.
+You can now run the bcc scripts, see usdt_sample.md for examples.
+pid: 25433
+Press ctrl-c to exit.
+```
+
+Use argdist.py on the individual probes:
+```bash
+$ sudo python tools/argdist.py -p 25433 -i 5 -C 'u:usdt_sample_lib1:operation_start():char*:arg2#input' -z 32
+[11:18:29]
+input
+ COUNT EVENT
+ 1 arg2 = pf_10
+ 1 arg2 = pf_5
+ 1 arg2 = pf_12
+ 1 arg2 = pf_1
+ 1 arg2 = pf_11
+ 1 arg2 = pf_28
+ 1 arg2 = pf_16
+ 1 arg2 = pf_19
+ 1 arg2 = pf_15
+ 1 arg2 = pf_2
+ 2 arg2 = pf_17
+ 2 arg2 = pf_3
+ 2 arg2 = pf_25
+ 2 arg2 = pf_30
+ 2 arg2 = pf_13
+ 2 arg2 = pf_18
+ 2 arg2 = pf_7
+ 2 arg2 = pf_29
+ 2 arg2 = pf_26
+ 3 arg2 = pf_8
+ 3 arg2 = pf_21
+ 3 arg2 = pf_14
+ 4 arg2 = pf_6
+ 4 arg2 = pf_23
+ 5 arg2 = pf_24
+```
+
+Use latency.py to trace the operation latencies:
+```bash
+$ sudo python examples/usdt_sample/scripts/latency.py -p=25433 -f="pf_2"
+Attaching probes to pid 25433
+Tracing... Hit Ctrl-C to end.
+time(s) id input output start (ns) end (ns) duration (us)
+0.000000000 7204 pf_28 resp_pf_28 11949439999644 11949489234565 49234
+0.100211886 7205 pf_28 resp_pf_28 11949540211530 11949574403064 34191
+0.300586675 7207 pf_21 resp_pf_21 11949740586319 11949742773571 2187
+0.400774366 7208 pf_28 resp_pf_28 11949840774010 11949859965498 19191
+0.701365719 7211 pf_21 resp_pf_21 11950141365363 11950152551131 11185
+0.901736620 7213 pf_25 resp_pf_25 11950341736264 11950347924333 6188
+1.102162217 7215 pf_21 resp_pf_21 11950542161861 11950567484183 25322
+1.302595998 7217 pf_23 resp_pf_23 11950742595642 11950761841242 19245
+1.503047601 7219 pf_2 resp_pf_2 11950943047245 11950951213474 8166
+1.703371457 7221 pf_27 resp_pf_27 11951143371101 11951176568051 33196
+2.104228899 7225 pf_24 resp_pf_24 11951544228543 11951588432769 44204
+2.304608175 7227 pf_21 resp_pf_21 11951744607819 11951790796068 46188
+2.404796703 7228 pf_21 resp_pf_21 11951844796347 11951877984160 33187
+2.605134923 7230 pf_27 resp_pf_27 11952045134567 11952065327660 20193
+3.206291642 7236 pf_29 resp_pf_29 11952646291286 11952660443343 14152
+3.506887492 7239 pf_21 resp_pf_21 11952946887136 11952995060987 48173
+```
+
+Use lat_dist.py to trace the latency distribution:
+```bash
+$ sudo python examples/usdt_sample/scripts/lat_dist.py -p=25433 -i=30 -f="pf_20"
+Attaching probes to pid 25433
+[11:23:47]
+
+Bucket ptr = 'pf_20'
+ latency (us) : count distribution
+ 0 -> 1 : 0 | |
+ 2 -> 3 : 0 | |
+ 4 -> 7 : 0 | |
+ 8 -> 15 : 0 | |
+ 16 -> 31 : 0 | |
+ 32 -> 63 : 0 | |
+ 64 -> 127 : 0 | |
+ 128 -> 255 : 0 | |
+ 256 -> 511 : 0 | |
+ 512 -> 1023 : 0 | |
+ 1024 -> 2047 : 1 |********** |
+ 2048 -> 4095 : 1 |********** |
+ 4096 -> 8191 : 0 | |
+ 8192 -> 16383 : 1 |********** |
+ 16384 -> 32767 : 4 |****************************************|
+ 32768 -> 65535 : 3 |****************************** |
+```
+
+Use lat_avg.py to trace the moving average of the latencies:
+```bash
+$ sudo python examples/usdt_sample/scripts/lat_avg.py -p=25433 -i=5 -c=10 -f="pf_2"
+Attaching probes to pid 25433
+Tracing... Hit Ctrl-C to end.
+[11:28:32]
+input count latency (us)
+pf_22 3 7807
+pf_23 4 36914
+pf_25 3 31473
+pf_28 2 10627
+pf_27 1 47174
+pf_29 1 8138
+pf_26 1 49121
+pf_20 2 29158
+```
--- /dev/null
+cmake_minimum_required(VERSION 3.0)
+project(usdt_sample_app1)
+
+include_directories(
+ ${USDT_SAMPLE_LIB1_INCLUDE_DIR}
+)
+
+link_directories(
+ ${USDT_SAMPLE_LIB1_LINK_DIR}
+)
+
+add_executable( ${PROJECT_NAME}
+ ${CMAKE_CURRENT_SOURCE_DIR}/main.cpp
+)
+
+target_link_libraries( ${PROJECT_NAME}
+ ${USDT_SAMPLE_LIB1_LIB}
+ pthread
+)
--- /dev/null
+// std
+#include <algorithm>
+#include <cstdlib>
+#include <iostream>
+#include <mutex>
+#include <queue>
+#include <random>
+#include <sstream>
+#include <string>
+#include <thread>
+
+// gnu-c
+#include <sys/types.h>
+#include <unistd.h>
+
+// usdt_sample_lib1
+#include "usdt_sample_lib1/lib1.h"
+
+void print_usage(int argc, char** argv)
+{
+ std::cout << "Usage:" << std::endl;
+ std::cout << argv[0]
+ << " <InputPrefix> <InputMinimum (1-50)> <InputMaximum (1-50)> <CallsPerSec (1-50)> <MinimumLatencyMs (1-50)> <MaximumLatencyMs (1-50)>"
+ << std::endl;
+ std::cout << "InputPrefix: Prefix of the input string to the operation. Default: dummy" << std::endl;
+ std::cout << "InputMinimum: Minimum number to make the input string to the operation somewhat unique. Default: 1" << std::endl;
+ std::cout << "InputMaximum: Maximum number to make the input string to the operation somewhat unique. Default: 50" << std::endl;
+ std::cout << "CallsPerSec: Rate of calls to the operation. Default: 10" << std::endl;
+ std::cout << "MinimumLatencyMs: Minimum latency to apply to the operation. Default: 20" << std::endl;
+ std::cout << "MaximumLatencyMs: Maximum latency to apply to the operation. Default: 40" << std::endl;
+}
+
+int main(int argc, char** argv)
+{
+ std::string inputPrefix("dummy");
+ std::uint32_t inputMinimum = 1;
+ std::uint32_t inputMaximum = 50;
+ std::uint32_t callsPerSec = 10;
+ std::uint32_t minLatMs = 20;
+ std::uint32_t maxLatMs = 40;
+
+ try {
+ if (argc > 1) {
+ inputPrefix = argv[1];
+ }
+
+ if (argc > 2) {
+ inputMinimum = static_cast<std::uint32_t>(std::max(1, std::min(50, std::atoi(argv[2]))));
+ }
+
+ if (argc > 3) {
+ inputMaximum = static_cast<std::uint32_t>(std::max(1, std::min(50, std::atoi(argv[3]))));
+ }
+
+ if (argc > 4) {
+ callsPerSec = static_cast<std::uint32_t>(std::max(1, std::min(50, std::atoi(argv[4]))));
+ }
+
+ if (argc > 5) {
+ minLatMs = static_cast<std::uint32_t>(std::max(1, std::min(50, std::atoi(argv[5]))));
+ }
+
+ if (argc > 6) {
+ maxLatMs = static_cast<std::uint32_t>(std::max(1, std::min(50, std::atoi(argv[6]))));
+ }
+ }
+ catch (const std::exception& exc) {
+ std::cout << "Exception while reading arguments: " << exc.what() << std::endl;
+ print_usage(argc, argv);
+ return -1;
+ }
+ catch (...) {
+ std::cout << "Unknown exception while reading arguments." << std::endl;
+ print_usage(argc, argv);
+ return -1;
+ }
+
+ if (inputMinimum > inputMaximum) {
+ std::cout << "InputMinimum must be smaller than InputMaximum." << std::endl;
+ print_usage(argc, argv);
+ return -1;
+ }
+
+ if (minLatMs > maxLatMs) {
+ std::cout << "MinimumLatencyMs must be smaller than MaximumLatencyMs." << std::endl;
+ print_usage(argc, argv);
+ return -1;
+ }
+
+ std::cout << "Applying the following parameters:" << std::endl
+ << "Input prefix: " << inputPrefix << "." << std::endl
+ << "Input range: [" << inputMinimum << ", " << inputMaximum << "]." << std::endl
+ << "Calls Per Second: " << callsPerSec << "." << std::endl
+ << "Latency range: [" << minLatMs << ", " << maxLatMs << "] ms." << std::endl;
+
+ const int sleepTimeMs = 1000 / callsPerSec;
+ OperationProvider op(minLatMs, maxLatMs);
+
+ std::mutex queueMutex;
+ std::queue<std::shared_future<OperationResponse>> responseQueue;
+
+ auto dequeueFuture = std::async(std::launch::async, [&]() {
+ while (true) {
+ bool empty = false;
+ {
+ std::lock_guard<std::mutex> lg(queueMutex);
+ empty = responseQueue.empty();
+ }
+
+ if (empty) {
+ std::this_thread::sleep_for(std::chrono::milliseconds(sleepTimeMs));
+ continue;
+ }
+
+ responseQueue.front().get();
+
+ // std::cout << "Removing item from queue." << std::endl;
+ std::lock_guard<std::mutex> lg(queueMutex);
+ responseQueue.pop();
+ }
+ });
+
+ std::random_device rd;
+ std::uniform_int_distribution<> dis(inputMinimum, inputMaximum);
+
+ std::cout << "You can now run the bcc scripts, see usdt_sample.md for examples." << std::endl;
+ std::cout << "pid: " << ::getpid() << std::endl;
+ std::cout << "Press ctrl-c to exit." << std::endl;
+ while (true) {
+ std::ostringstream inputOss;
+ inputOss << inputPrefix << "_" << dis(rd);
+ auto responseFuture = op.executeAsync(OperationRequest(inputOss.str()));
+
+ {
+ std::lock_guard<std::mutex> lg(queueMutex);
+ responseQueue.push(responseFuture);
+ }
+
+ // For a sample application, this is good enough to simulate callsPerSec.
+ std::this_thread::sleep_for(std::chrono::milliseconds(sleepTimeMs));
+ }
+
+ dequeueFuture.get();
+ return 0;
+}
--- /dev/null
+cmake_minimum_required(VERSION 3.0)
+project(usdt_sample_lib1)
+
+# Define variables.
+set(USDT_SAMPLE_LIB1_INCLUDE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/include CACHE STRING "USDT_SAMPLE_LIB1_INCLUDE_DIR" FORCE)
+set(USDT_SAMPLE_LIB1_SRC_DIR ${CMAKE_CURRENT_SOURCE_DIR}/src CACHE STRING "USDT_SAMPLE_LIB1_SRC_DIR" FORCE)
+set(USDT_SAMPLE_LIB1_LINK_DIR ${CMAKE_CURRENT_BINARY_DIR} CACHE STRING "USDT_SAMPLE_LIB1_LINK_DIR" FORCE)
+set(USDT_SAMPLE_LIB1_LIB ${PROJECT_NAME} CACHE STRING "USDT_SAMPLE_LIB1_LIB" FORCE)
+set(USDT_SAMPLE_LIB1_GENERATED ${CMAKE_CURRENT_BINARY_DIR}/generated)
+
+## Start - N.B. Following section only relevant when using systemtap-sdt-devel.
+
+# Create usdt header file.
+# N.B. ${USDT_SAMPLE_LIB1_INCLUDE_DIR}/usdt_sample_lib1/lib1_sdt.h must be removed manually in order for it to be (re-)created.
+# i.e. after making changes to libt_sdt.d
+#add_custom_command(
+# OUTPUT ${USDT_SAMPLE_LIB1_INCLUDE_DIR}/usdt_sample_lib1/lib1_sdt.h
+# PRE_BUILD
+# COMMAND dtrace -h -s ${USDT_SAMPLE_LIB1_SRC_DIR}/lib1_sdt.d -o ${USDT_SAMPLE_LIB1_INCLUDE_DIR}/usdt_sample_lib1/lib1_sdt.h
+# COMMENT "Create usdt probes header file"
+#)
+
+# Create usdt object file.
+#file(MAKE_DIRECTORY ${USDT_SAMPLE_LIB1_GENERATED})
+#add_custom_command(
+# OUTPUT ${USDT_SAMPLE_LIB1_GENERATED}/lib1_sdt.o
+# PRE_BUILD
+# COMMAND dtrace -G -s ${USDT_SAMPLE_LIB1_SRC_DIR}/lib1_sdt.d -o ${USDT_SAMPLE_LIB1_GENERATED}/lib1_sdt.o
+# COMMENT "Create usdt probes object file"
+#)
+
+## End
+
+include_directories(
+ ${USDT_SAMPLE_LIB1_INCLUDE_DIR}
+ # For folly StaticTracepoint.h:
+ ${CMAKE_CURRENT_SOURCE_DIR}/../../../tests/python/include
+)
+
+add_library( ${PROJECT_NAME} SHARED
+## Only relevant when using systemtap-sdt-devel
+# ${USDT_SAMPLE_LIB1_INCLUDE_DIR}/usdt_sample_lib1/lib1_sdt.h
+# ${USDT_SAMPLE_LIB1_GENERATED}/lib1_sdt.o
+ ${USDT_SAMPLE_LIB1_SRC_DIR}/lib1.cpp
+)
--- /dev/null
+#pragma once
+
+// std
+#include <cstdint>
+#include <future>
+#include <random>
+#include <string>
+
+/**
+ * @brief Contains the operation request data.
+ */
+class OperationRequest
+{
+public:
+ OperationRequest(const std::string& input);
+ const std::string& input() const { return _input; }
+
+private:
+ std::string _input;
+};
+
+/**
+ * @brief Contains the operation response data.
+ */
+class OperationResponse
+{
+public:
+ OperationResponse(const std::string& output);
+ const std::string& output() const { return _output; }
+
+private:
+ std::string _output;
+};
+
+/**
+ * @brief Provides the operation.
+ */
+class OperationProvider
+{
+public:
+ /**
+ * @brief Constructs an instance of OperationProvider.
+ * @param minLatencyMs The minimum latency to simulate for the operation.
+ * @param maxLatencyMs The maximum latency to simulate for the operation.
+ */
+ OperationProvider(std::uint32_t minLatencyMs, std::uint32_t maxLatencyMs);
+
+ /**
+ * @brief Asynchronously executes the operation.
+ * @param request The request input data for the operation.
+ * @return A shared_future of the response of the operation.
+ */
+ std::shared_future<OperationResponse> executeAsync(const OperationRequest& request);
+
+private:
+ std::mt19937 _gen; ///< Used randomly determine an operation latency to simulate.
+ std::uniform_int_distribution<> _dis; ///< Used randomly determine an operation latency to simulate.
+};
--- /dev/null
+// N.B. This file is not used by this usdt_sample. Instead, the StaticTracepoint.h file from folly is used.
+// It is here only for demonstration purposes.
+
+/* Generated by the Systemtap dtrace wrapper */
+
+
+#define _SDT_HAS_SEMAPHORES 1
+
+
+#define STAP_HAS_SEMAPHORES 1 /* deprecated */
+
+
+#include <sys/sdt.h>
+
+/* USDT_SAMPLE_LIB1_OPERATION_START ( uint64_t operation_id, const char * input ) */
+#if defined STAP_SDT_V1
+#define USDT_SAMPLE_LIB1_OPERATION_START_ENABLED() __builtin_expect (operation_start_semaphore, 0)
+#define usdt_sample_lib1_operation_start_semaphore operation_start_semaphore
+#else
+#define USDT_SAMPLE_LIB1_OPERATION_START_ENABLED() __builtin_expect (usdt_sample_lib1_operation_start_semaphore, 0)
+#endif
+__extension__ extern unsigned short usdt_sample_lib1_operation_start_semaphore __attribute__ ((unused)) __attribute__ ((section (".probes")));
+#define USDT_SAMPLE_LIB1_OPERATION_START(arg1, arg2) \
+DTRACE_PROBE2 (usdt_sample_lib1, operation_start, arg1, arg2)
+
+/* USDT_SAMPLE_LIB1_OPERATION_END ( uint64_t operation_id, const char * output ) */
+#if defined STAP_SDT_V1
+#define USDT_SAMPLE_LIB1_OPERATION_END_ENABLED() __builtin_expect (operation_end_semaphore, 0)
+#define usdt_sample_lib1_operation_end_semaphore operation_end_semaphore
+#else
+#define USDT_SAMPLE_LIB1_OPERATION_END_ENABLED() __builtin_expect (usdt_sample_lib1_operation_end_semaphore, 0)
+#endif
+__extension__ extern unsigned short usdt_sample_lib1_operation_end_semaphore __attribute__ ((unused)) __attribute__ ((section (".probes")));
+#define USDT_SAMPLE_LIB1_OPERATION_END(arg1, arg2) \
+DTRACE_PROBE2 (usdt_sample_lib1, operation_end, arg1, arg2)
+
--- /dev/null
+#include "usdt_sample_lib1/lib1.h"
+
+// std
+#include <atomic>
+#include <chrono>
+#include <iostream>
+#include <thread>
+
+// usdt_sample_lib1
+#include "folly/tracing/StaticTracepoint.h"
+
+// When using systemtap-sdt-devel, the following file should be included:
+// #include "usdt_sample_lib1/lib1_sdt.h"
+
+OperationRequest::OperationRequest(const std::string& input_)
+ : _input(input_)
+{
+}
+
+OperationResponse::OperationResponse(const std::string& output_)
+ : _output(output_)
+{
+}
+
+OperationProvider::OperationProvider(std::uint32_t minLatencyMs_, std::uint32_t maxLatencyMs_)
+ : _gen(std::random_device()())
+ , _dis(minLatencyMs_, maxLatencyMs_)
+{
+}
+
+std::shared_future<OperationResponse> OperationProvider::executeAsync(const OperationRequest& request)
+{
+ static std::atomic<std::uint64_t> operationIdCounter(0);
+ std::uint64_t operationId = operationIdCounter++;
+
+ FOLLY_SDT(usdt_sample_lib1, operation_start, operationId, request.input().c_str());
+
+/* Below an example of how to use this sample with systemtap-sdt-devel:
+ if (USDT_SAMPLE_LIB1_OPERATION_START_ENABLED()) {
+ //std::cout << "operation_start probe enabled." << std::endl;
+ USDT_SAMPLE_LIB1_OPERATION_START(operationId, &inputBuf);
+ }
+*/
+
+ auto latencyMs = _dis(_gen);
+
+ return std::async(std::launch::async, [latencyMs, operationId, request]() {
+ std::this_thread::sleep_for(std::chrono::milliseconds(latencyMs));
+
+ auto output = std::string("resp_") + request.input();
+ OperationResponse response(output);
+
+ FOLLY_SDT(usdt_sample_lib1, operation_end, operationId, response.output().c_str());
+
+/* Below an example of how to use this sample with systemtap-sdt-devel:
+ if (USDT_SAMPLE_LIB1_OPERATION_END_ENABLED()) {
+ //std::cout << "operation_end probe enabled." << std::endl;
+ USDT_SAMPLE_LIB1_OPERATION_END(operationId, &outputBuf);
+ }
+*/
+
+ return response;
+ });
+}
--- /dev/null
+# This file is only relevant when using systemtap-sdt-devel (see usdt_sample.md).
+# This usdt_sample uses the StaticTracepoint.h header file (from folly) instead.
+provider usdt_sample_lib1
+{
+ probe operation_start(uint64_t operation_id, const char* input);
+ probe operation_end(uint64_t operation_id, const char* output);
+};
from subprocess import Popen, PIPE
from tempfile import NamedTemporaryFile
import ctypes as ct
+import inspect
import os
import signal
# Application, minimum, to define three trace points
app_text = b"""
#include <unistd.h>
-#include <folly/tracing/StaticTracepoint.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+#include "folly/tracing/StaticTracepoint.h"
int main() {
char s[100];
int i, a = 200, b = 40;
for (i = 0; i < 100; i++) s[i] = (i & 7) + (i & 6);
+ uint64_t j = 0;
+ char s1[64];
+ const char* str = "str";
+ size_t len = strlen(str);
while (1) {
FOLLY_SDT(test, probe_point_1, s[7], b);
FOLLY_SDT(test, probe_point_3, a, b);
FOLLY_SDT(test, probe_point_1, s[4], a);
FOLLY_SDT(test, probe_point_2, 5, s[10]);
FOLLY_SDT(test, probe_point_3, s[4], s[7]);
+
+ memset(&s1, '\0', sizeof(s1));
+ strncpy(s1, str, len);
+ snprintf(s1 + len, sizeof(s1) - len, "%d", j);
+ FOLLY_SDT(test, probe_point_4, j++, &s1);
+
+ memset(&s1, '\0', sizeof(s1));
+ strncpy(s1, str, len);
+ snprintf(s1 + len, sizeof(s1) - len, "%d", j);
+ FOLLY_SDT(test, probe_point_5, &s1, j++);
+
sleep(1);
}
return 1;
"""
# BPF program
self.bpf_text = """
+#include <linux/blkdev.h>
#include <uapi/linux/ptrace.h>
struct probe_result_t1 {
int v2;
};
+struct probe_result_t4 {
+ u64 v1;
+ char v2[8];
+};
+
+struct probe_result_t5 {
+ char v1[8];
+ u64 v2;
+};
+
BPF_PERF_OUTPUT(event1);
BPF_PERF_OUTPUT(event2);
BPF_PERF_OUTPUT(event3);
-int do_trace(struct pt_regs *ctx) {
+BPF_PERF_OUTPUT(event4);
+BPF_PERF_OUTPUT(event5);
+
+int do_trace1(struct pt_regs *ctx) {
struct probe_result_t1 result = {};
bpf_usdt_readarg(1, ctx, &result.v1);
bpf_usdt_readarg(2, ctx, &result.v2);
event3.perf_submit(ctx, &result, sizeof(result));
return 0;
}
+int do_trace4(struct pt_regs *ctx) {
+ struct probe_result_t4 result = {};
+ bpf_usdt_readarg(1, ctx, &result.v1);
+ bpf_usdt_readarg_p(2, ctx, &result.v2, sizeof(result.v2));
+ event4.perf_submit(ctx, &result, sizeof(result));
+ return 0;
+}
+int do_trace5(struct pt_regs *ctx) {
+ struct probe_result_t5 result = {};
+ bpf_usdt_readarg_p(1, ctx, &result.v1, sizeof(result.v1));
+ bpf_usdt_readarg(2, ctx, &result.v2);
+ event5.perf_submit(ctx, &result, sizeof(result));
+ return 0;
+}
"""
# Compile and run the application
self.ftemp = NamedTemporaryFile(delete=False)
self.ftemp.close()
- comp = Popen(["gcc", "-I", "%s/include" % os.getcwd(),
+ comp = Popen(["gcc", "-I", "%s/include" % os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe()))),
"-x", "c", "-o", self.ftemp.name, "-"],
stdin=PIPE)
comp.stdin.write(app_text)
def test_attach1(self):
# enable USDT probe from given PID and verifier generated BPF programs
u = USDT(pid=int(self.app.pid))
- u.enable_probe(probe="probe_point_1", fn_name="do_trace")
+ u.enable_probe(probe="probe_point_1", fn_name="do_trace1")
u.enable_probe(probe="probe_point_2", fn_name="do_trace2")
u.enable_probe(probe="probe_point_3", fn_name="do_trace3")
- b = BPF(text=self.bpf_text, usdt_contexts=[u])
+ u.enable_probe(probe="probe_point_4", fn_name="do_trace4")
+ u.enable_probe(probe="probe_point_5", fn_name="do_trace5")
+ b = BPF(text=self.bpf_text, usdt_contexts=[u], debug=4)
# Event states for each event:
# 0 - probe not caught, 1 - probe caught with correct value,
_fields_ = [("v1", ct.c_int),
("v2", ct.c_int)]
+ class Data4(ct.Structure):
+ _fields_ = [("v1", ct.c_ulonglong),
+ ("v2", ct.c_char * 64)]
+
+ class Data5(ct.Structure):
+ _fields_ = [("v1", ct.c_char * 64),
+ ("v2", ct.c_ulonglong)]
+
def check_event_val(event, event_state, v1, v2, v3, v4):
if ((event.v1 == v1 and event.v2 == v2) or (event.v1 == v3 and event.v2 == v4)):
if (event_state == 0 or event_state == 1):
event = ct.cast(data, ct.POINTER(Data3)).contents
self.evt_st_3 = check_event_val(event, self.evt_st_3, 200, 40, 8, 13)
+ def print_event4(cpu, data, size):
+ event = ct.cast(data, ct.POINTER(Data4)).contents
+ print("%s" % event.v2)
+
+ def print_event5(cpu, data, size):
+ event = ct.cast(data, ct.POINTER(Data5)).contents
+ print("%s" % event.v1)
+
# loop with callback to print_event
b["event1"].open_perf_buffer(print_event1)
b["event2"].open_perf_buffer(print_event2)
b["event3"].open_perf_buffer(print_event3)
+ b["event4"].open_perf_buffer(print_event4)
+ b["event5"].open_perf_buffer(print_event5)
# three iterations to make sure we get some probes and have time to process them
for i in range(3):