From b4691fbaea4ce10bce124c350db648abb49f1c5c Mon Sep 17 00:00:00 2001 From: bveldhoen Date: Tue, 11 Jul 2017 20:09:35 +0200 Subject: [PATCH] Add USDT sample (#1229) This sample contains: - A library with an operation that uses usdt probes. - A console application that calls the operation. - Scripts to trace the latency of the operation. - Corresponding cmake files. --- examples/usdt_sample/CMakeLists.txt | 7 + examples/usdt_sample/scripts/bpf_text_shared.c | 55 +++++++ examples/usdt_sample/scripts/lat_avg.py | 126 ++++++++++++++++ examples/usdt_sample/scripts/lat_dist.py | 98 +++++++++++++ examples/usdt_sample/scripts/latency.py | 117 +++++++++++++++ examples/usdt_sample/usdt_sample.md | 163 +++++++++++++++++++++ .../usdt_sample/usdt_sample_app1/CMakeLists.txt | 19 +++ examples/usdt_sample/usdt_sample_app1/main.cpp | 145 ++++++++++++++++++ .../usdt_sample/usdt_sample_lib1/CMakeLists.txt | 45 ++++++ .../include/usdt_sample_lib1/lib1.h | 58 ++++++++ .../include/usdt_sample_lib1/lib1_sdt.h | 36 +++++ examples/usdt_sample/usdt_sample_lib1/src/lib1.cpp | 64 ++++++++ .../usdt_sample/usdt_sample_lib1/src/lib1_sdt.d | 7 + tests/python/test_usdt.py | 77 +++++++++- 14 files changed, 1012 insertions(+), 5 deletions(-) create mode 100755 examples/usdt_sample/CMakeLists.txt create mode 100644 examples/usdt_sample/scripts/bpf_text_shared.c create mode 100755 examples/usdt_sample/scripts/lat_avg.py create mode 100755 examples/usdt_sample/scripts/lat_dist.py create mode 100755 examples/usdt_sample/scripts/latency.py create mode 100644 examples/usdt_sample/usdt_sample.md create mode 100755 examples/usdt_sample/usdt_sample_app1/CMakeLists.txt create mode 100644 examples/usdt_sample/usdt_sample_app1/main.cpp create mode 100755 examples/usdt_sample/usdt_sample_lib1/CMakeLists.txt create mode 100644 examples/usdt_sample/usdt_sample_lib1/include/usdt_sample_lib1/lib1.h create mode 100644 examples/usdt_sample/usdt_sample_lib1/include/usdt_sample_lib1/lib1_sdt.h create mode 100644 examples/usdt_sample/usdt_sample_lib1/src/lib1.cpp create mode 100644 examples/usdt_sample/usdt_sample_lib1/src/lib1_sdt.d diff --git a/examples/usdt_sample/CMakeLists.txt b/examples/usdt_sample/CMakeLists.txt new file mode 100755 index 0000000..04e5092 --- /dev/null +++ b/examples/usdt_sample/CMakeLists.txt @@ -0,0 +1,7 @@ +cmake_minimum_required(VERSION 3.0) + +# This sample requires C++11 enabled. +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -Wall -Weffc++") + +add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/usdt_sample_lib1) +add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/usdt_sample_app1) diff --git a/examples/usdt_sample/scripts/bpf_text_shared.c b/examples/usdt_sample/scripts/bpf_text_shared.c new file mode 100644 index 0000000..d8e7464 --- /dev/null +++ b/examples/usdt_sample/scripts/bpf_text_shared.c @@ -0,0 +1,55 @@ +#include +#include + +/** + * @brief Helper method to filter based on the specified inputString. + * @param inputString The operation input string to check against the filter. + * @return True if the specified inputString starts with the hard-coded FILTER_STRING; otherwise, false. + */ +static inline bool filter(char const* inputString) +{ + char needle[] = "FILTER_STRING"; ///< The FILTER STRING is replaced by python code. + char haystack[sizeof(needle)] = {}; + bpf_probe_read(&haystack, sizeof(haystack), (void*)inputString); + for (int i = 0; i < sizeof(needle) - 1; ++i) { + if (needle[i] != haystack[i]) { + return false; + } + } + return true; +} + +/** + * @brief Contains the operation start data to trace. + */ +struct start_data_t +{ + u64 operation_id; ///< The id of the operation. + char input[64]; ///< The input string of the request. + u64 start; ///< Timestamp of the start operation (start timestamp). +}; + +/** + * @brief Contains the operation start data. + * key: the operation id. + * value: The operation start latency data. + */ +BPF_HASH(start_hash, u64, struct start_data_t); + +/** + * @brief Reads the operation request arguments and stores the start data in the hash. + * @param ctx The BPF context. + */ +int trace_operation_start(struct pt_regs* ctx) +{ + struct start_data_t start_data = {}; + bpf_usdt_readarg_p(2, ctx, &start_data.input, sizeof(start_data.input)); + + FILTER ///< Replaced by python code. + + bpf_usdt_readarg(1, ctx, &start_data.operation_id); + + start_data.start = bpf_ktime_get_ns(); + start_hash.update(&start_data.operation_id, &start_data); + return 0; +} diff --git a/examples/usdt_sample/scripts/lat_avg.py b/examples/usdt_sample/scripts/lat_avg.py new file mode 100755 index 0000000..be473d1 --- /dev/null +++ b/examples/usdt_sample/scripts/lat_avg.py @@ -0,0 +1,126 @@ +import argparse +from time import sleep, strftime +from sys import argv +import ctypes as ct +from bcc import BPF, USDT +import inspect +import os + +# Parse command line arguments +parser = argparse.ArgumentParser(description="Trace the moving average of the latency of an operation using usdt probes.", + formatter_class=argparse.RawDescriptionHelpFormatter) +parser.add_argument("-p", "--pid", type=int, help="The id of the process to trace.") +parser.add_argument("-i", "--interval", type=int, help="The interval in seconds on which to report the latency distribution.") +parser.add_argument("-c", "--count", type=int, default=16, help="The count of samples over which to calculate the moving average.") +parser.add_argument("-f", "--filterstr", type=str, default="", help="The prefix filter for the operation input. If specified, only operations for which the input string starts with the filterstr are traced.") +parser.add_argument("-v", "--verbose", dest="verbose", action="store_true", help="If true, will output verbose logging information.") +parser.set_defaults(verbose=False) +args = parser.parse_args() +this_pid = int(args.pid) +this_interval = int(args.interval) +this_count = int(args.count) +this_filter = str(args.filterstr) + +if this_interval < 1: + print("Invalid value for interval, using 1.") + this_interval = 1 + +if this_count < 1: + print("Invalid value for count, using 1.") + this_count = 1 + +debugLevel=0 +if args.verbose: + debugLevel=4 + +# BPF program +bpf_text_shared = "%s/bpf_text_shared.c" % os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe()))) +bpf_text = open(bpf_text_shared, 'r').read() +bpf_text += """ + +const u32 MAX_SAMPLES = SAMPLE_COUNT; + +struct hash_key_t +{ + char input[64]; +}; + +struct hash_leaf_t +{ + u32 count; + u64 total; + u64 average; +}; + +/** + * @brief Contains the averages for the operation latencies by operation input. + */ +BPF_HASH(lat_hash, struct hash_key_t, struct hash_leaf_t, 512); + +/** + * @brief Reads the operation response arguments, calculates the latency, and stores it in the histogram. + * @param ctx The BPF context. + */ +int trace_operation_end(struct pt_regs* ctx) +{ + u64 operation_id; + bpf_usdt_readarg(1, ctx, &operation_id); + + struct start_data_t* start_data = start_hash.lookup(&operation_id); + if (0 == start_data) { + return 0; + } + + u64 duration = bpf_ktime_get_ns() - start_data->start; + struct hash_key_t hash_key = {}; + __builtin_memcpy(&hash_key.input, start_data->input, sizeof(hash_key.input)); + start_hash.delete(&operation_id); + + struct hash_leaf_t zero = {}; + struct hash_leaf_t* hash_leaf = lat_hash.lookup_or_init(&hash_key, &zero); + if (0 == hash_leaf) { + return 0; + } + + if (hash_leaf->count < MAX_SAMPLES) { + hash_leaf->count++; + } else { + hash_leaf->total -= hash_leaf->average; + } + + hash_leaf->total += duration; + hash_leaf->average = hash_leaf->total / hash_leaf->count; + + return 0; +} +""" + +bpf_text = bpf_text.replace("SAMPLE_COUNT", str(this_count)) +bpf_text = bpf_text.replace("FILTER_STRING", this_filter) +if this_filter: + bpf_text = bpf_text.replace("FILTER", "if (!filter(start_data.input)) { return 0; }") +else: + bpf_text = bpf_text.replace("FILTER", "") + +# Create USDT context +print("Attaching probes to pid %d" % this_pid) +usdt_ctx = USDT(pid=this_pid) +usdt_ctx.enable_probe(probe="operation_start", fn_name="trace_operation_start") +usdt_ctx.enable_probe(probe="operation_end", fn_name="trace_operation_end") + +# Create BPF context, load BPF program +bpf_ctx = BPF(text=bpf_text, usdt_contexts=[usdt_ctx], debug=debugLevel) + +print("Tracing... Hit Ctrl-C to end.") + +lat_hash = bpf_ctx.get_table("lat_hash") +while (1): + try: + sleep(this_interval) + except KeyboardInterrupt: + exit() + + print("[%s]" % strftime("%H:%M:%S")) + print("%-64s %8s %16s" % ("input", "count", "latency (us)")) + for k, v in lat_hash.items(): + print("%-64s %8d %16d" % (k.input, v.count, v.average / 1000)) diff --git a/examples/usdt_sample/scripts/lat_dist.py b/examples/usdt_sample/scripts/lat_dist.py new file mode 100755 index 0000000..af13e89 --- /dev/null +++ b/examples/usdt_sample/scripts/lat_dist.py @@ -0,0 +1,98 @@ +import argparse +from time import sleep, strftime +from sys import argv +import ctypes as ct +from bcc import BPF, USDT +import inspect +import os + +# Parse command line arguments +parser = argparse.ArgumentParser(description="Trace the latency distribution of an operation using usdt probes.", + formatter_class=argparse.RawDescriptionHelpFormatter) +parser.add_argument("-p", "--pid", type=int, help="The id of the process to trace.") +parser.add_argument("-i", "--interval", type=int, help="The interval in seconds on which to report the latency distribution.") +parser.add_argument("-f", "--filterstr", type=str, default="", help="The prefix filter for the operation input. If specified, only operations for which the input string starts with the filterstr are traced.") +parser.add_argument("-v", "--verbose", dest="verbose", action="store_true", help="If true, will output verbose logging information.") +parser.set_defaults(verbose=False) +args = parser.parse_args() +this_pid = int(args.pid) +this_interval = int(args.interval) +this_filter = str(args.filterstr) + +if this_interval < 1: + print("Invalid value for interval, using 1.") + this_interval = 1 + +debugLevel=0 +if args.verbose: + debugLevel=4 + +# BPF program +bpf_text_shared = "%s/bpf_text_shared.c" % os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe()))) +bpf_text = open(bpf_text_shared, 'r').read() +bpf_text += """ + +/** + * @brief The key to use for the latency histogram. + */ +struct dist_key_t +{ + char input[64]; ///< The input string of the request. + u64 slot; ///< The histogram slot. +}; + +/** + * @brief Contains the histogram for the operation latencies. + */ +BPF_HISTOGRAM(dist, struct dist_key_t); + +/** + * @brief Reads the operation response arguments, calculates the latency, and stores it in the histogram. + * @param ctx The BPF context. + */ +int trace_operation_end(struct pt_regs* ctx) +{ + u64 operation_id; + bpf_usdt_readarg(1, ctx, &operation_id); + + struct start_data_t* start_data = start_hash.lookup(&operation_id); + if (0 == start_data) { + return 0; + } + + u64 duration = bpf_ktime_get_ns() - start_data->start; + struct dist_key_t dist_key = {}; + __builtin_memcpy(&dist_key.input, start_data->input, sizeof(dist_key.input)); + dist_key.slot = bpf_log2l(duration / 1000); + start_hash.delete(&operation_id); + + dist.increment(dist_key); + return 0; +} +""" + +bpf_text = bpf_text.replace("FILTER_STRING", this_filter) +if this_filter: + bpf_text = bpf_text.replace("FILTER", "if (!filter(start_data.input)) { return 0; }") +else: + bpf_text = bpf_text.replace("FILTER", "") + +# Create USDT context +print("Attaching probes to pid %d" % this_pid) +usdt_ctx = USDT(pid=this_pid) +usdt_ctx.enable_probe(probe="operation_start", fn_name="trace_operation_start") +usdt_ctx.enable_probe(probe="operation_end", fn_name="trace_operation_end") + +# Create BPF context, load BPF program +bpf_ctx = BPF(text=bpf_text, usdt_contexts=[usdt_ctx], debug=debugLevel) + +start = 0 +dist = bpf_ctx.get_table("dist") +while (1): + try: + sleep(this_interval) + except KeyboardInterrupt: + exit() + + print("[%s]" % strftime("%H:%M:%S")) + dist.print_log2_hist("latency (us)") diff --git a/examples/usdt_sample/scripts/latency.py b/examples/usdt_sample/scripts/latency.py new file mode 100755 index 0000000..7b033ed --- /dev/null +++ b/examples/usdt_sample/scripts/latency.py @@ -0,0 +1,117 @@ +import argparse +from time import sleep +from sys import argv +import ctypes as ct +from bcc import BPF, USDT +import inspect +import os + +# Parse command line arguments +parser = argparse.ArgumentParser(description="Trace the latency of an operation using usdt probes.", + formatter_class=argparse.RawDescriptionHelpFormatter) +parser.add_argument("-p", "--pid", type=int, help="The id of the process to trace.") +parser.add_argument("-f", "--filterstr", type=str, default="", help="The prefix filter for the operation input. If specified, only operations for which the input string starts with the filterstr are traced.") +parser.add_argument("-v", "--verbose", dest="verbose", action="store_true", help="If true, will output verbose logging information.") +parser.set_defaults(verbose=False) +args = parser.parse_args() +this_pid = int(args.pid) +this_filter = str(args.filterstr) + +debugLevel=0 +if args.verbose: + debugLevel=4 + +# BPF program +bpf_text_shared = "%s/bpf_text_shared.c" % os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe()))) +bpf_text = open(bpf_text_shared, 'r').read() +bpf_text += """ + +/** + * @brief Contains the latency data w.r.t. the complete operation from request to response. + */ +struct end_data_t +{ + u64 operation_id; ///< The id of the operation. + char input[64]; ///< The request (input) string. + char output[64]; ///< The response (output) string. + u64 start; ///< The start timestamp of the operation. + u64 end; ///< The end timestamp of the operation. + u64 duration; ///< The duration of the operation. +}; + +/** + * The output buffer, which will be used to push the latency event data to user space. + */ +BPF_PERF_OUTPUT(operation_event); + +/** + * @brief Reads the operation response arguments, calculates the latency event data, and writes it to the user output buffer. + * @param ctx The BPF context. + */ +int trace_operation_end(struct pt_regs* ctx) +{ + u64 operation_id; + bpf_usdt_readarg(1, ctx, &operation_id); + + struct start_data_t* start_data = start_hash.lookup(&operation_id); + if (0 == start_data) { + return 0; + } + + struct end_data_t end_data = {}; + end_data.operation_id = operation_id; + bpf_usdt_readarg_p(2, ctx, &end_data.output, sizeof(end_data.output)); + end_data.end = bpf_ktime_get_ns(); + end_data.start = start_data->start; + end_data.duration = end_data.end - end_data.start; + __builtin_memcpy(&end_data.input, start_data->input, sizeof(end_data.input)); + + start_hash.delete(&end_data.operation_id); + + operation_event.perf_submit(ctx, &end_data, sizeof(end_data)); + return 0; +} +""" + +bpf_text = bpf_text.replace("FILTER_STRING", this_filter) +if this_filter: + bpf_text = bpf_text.replace("FILTER", "if (!filter(start_data.input)) { return 0; }") +else: + bpf_text = bpf_text.replace("FILTER", "") + +# Create USDT context +print("Attaching probes to pid %d" % this_pid) +usdt_ctx = USDT(pid=this_pid) +usdt_ctx.enable_probe(probe="operation_start", fn_name="trace_operation_start") +usdt_ctx.enable_probe(probe="operation_end", fn_name="trace_operation_end") + +# Create BPF context, load BPF program +bpf_ctx = BPF(text=bpf_text, usdt_contexts=[usdt_ctx], debug=debugLevel) + +# Define latency event and print function +class OperationEventData(ct.Structure): + _fields_ = [("operation_id", ct.c_ulonglong), + ("input", ct.c_char * 64), + ("output", ct.c_char * 64), + ("start", ct.c_ulonglong), + ("end", ct.c_ulonglong), + ("duration", ct.c_ulonglong)] + +start = 0 +def print_event(cpu, data, size): + global start + event = ct.cast(data, ct.POINTER(OperationEventData)).contents + if start == 0: + start = event.start + time_s = (float(event.start - start)) / 1000000000 + latency = (float(event.duration) / 1000) + print("%-18.9f %-10d %-32s %-32s %16d %16d %16d" % (time_s, event.operation_id, event.input, event.output, event.start, event.end, latency)) + +# Print header +print("Tracing... Hit Ctrl-C to end.") +print("%-18s %-10s %-32s %-32s %16s %16s %16s" % ("time(s)", "id", "input", "output", "start (ns)", "end (ns)", "duration (us)")) + +# Output latency events +bpf_ctx["operation_event"].open_perf_buffer(print_event) +while 1: + bpf_ctx.kprobe_poll() diff --git a/examples/usdt_sample/usdt_sample.md b/examples/usdt_sample/usdt_sample.md new file mode 100644 index 0000000..c6b5a07 --- /dev/null +++ b/examples/usdt_sample/usdt_sample.md @@ -0,0 +1,163 @@ +Tested on Fedora25 4.11.3-200.fc25.x86_64, gcc (GCC) 6.3.1 20161221 (Red Hat 6.3.1-1) + +As an alternative to using ...bcc/tests/python/include/folly/tracing/StaticTracepoint.h, +it's possible to use systemtap-sdt-devel. +However, this is *not* required for this sample. +```bash +$ sudo dnf install systemtap-sdt-devel # For Fedora25, other distro's might have differently named packages. +``` + +If using systemtap-sdt-devel, the following commands can be used to generate the corresponding header and object files: +Also see the CMakeLists.txt file for an example how to do this using cmake. +```bash +$ dtrace -h -s usdt_sample_lib1/src/lib1_sdt.d -o usdt_sample_lib1/include/usdt_sample_lib1/lib1_sdt.h +$ dtrace -G -s usdt_sample_lib1/src/lib1_sdt.d -o lib1_sdt.o +``` + +Build the sample: +```bash +$ pwd +~/src/bcc +$ mkdir -p examples/usdt_sample/build && pushd examples/usdt_sample/build +$ cmake .. && make +$ popd +``` + +After building, you should see the available probes: +```bash +$ python tools/tplist.py -l examples/usdt_sample/build/usdt_sample_lib1/libusdt_sample_lib1.so +examples/usdt_sample/build/usdt_sample_lib1/libusdt_sample_lib1.so usdt_sample_lib1:operation_end +examples/usdt_sample/build/usdt_sample_lib1/libusdt_sample_lib1.so usdt_sample_lib1:operation_start +$ readelf -n examples/usdt_sample/build/usdt_sample_lib1/libusdt_sample_lib1.so + +Displaying notes found at file offset 0x000001c8 with length 0x00000024: + Owner Data size Description + GNU 0x00000014 NT_GNU_BUILD_ID (unique build ID bitstring) + Build ID: 3930c19f654990159563394669f2ed5281513302 + +Displaying notes found at file offset 0x0001b9ec with length 0x000000c0: + Owner Data size Description + stapsdt 0x00000047 NT_STAPSDT (SystemTap probe descriptors) + Provider: usdt_sample_lib1 + Name: operation_end + Location: 0x000000000000ed6d, Base: 0x0000000000000000, Semaphore: 0x0000000000000000 + Arguments: -8@%rbx -8@%rax + stapsdt 0x0000004e NT_STAPSDT (SystemTap probe descriptors) + Provider: usdt_sample_lib1 + Name: operation_start + Location: 0x000000000000ee2c, Base: 0x0000000000000000, Semaphore: 0x0000000000000000 + Arguments: -8@-24(%rbp) -8@%rax +``` + +Start the usdt sample application: +```bash +$ examples/usdt_sample/build/usdt_sample_app1/usdt_sample_app1 "pf" 1 30 10 1 50 +Applying the following parameters: +Input prefix: pf. +Input range: [1, 30]. +Calls Per Second: 10. +Latency range: [1, 50] ms. +You can now run the bcc scripts, see usdt_sample.md for examples. +pid: 25433 +Press ctrl-c to exit. +``` + +Use argdist.py on the individual probes: +```bash +$ sudo python tools/argdist.py -p 25433 -i 5 -C 'u:usdt_sample_lib1:operation_start():char*:arg2#input' -z 32 +[11:18:29] +input + COUNT EVENT + 1 arg2 = pf_10 + 1 arg2 = pf_5 + 1 arg2 = pf_12 + 1 arg2 = pf_1 + 1 arg2 = pf_11 + 1 arg2 = pf_28 + 1 arg2 = pf_16 + 1 arg2 = pf_19 + 1 arg2 = pf_15 + 1 arg2 = pf_2 + 2 arg2 = pf_17 + 2 arg2 = pf_3 + 2 arg2 = pf_25 + 2 arg2 = pf_30 + 2 arg2 = pf_13 + 2 arg2 = pf_18 + 2 arg2 = pf_7 + 2 arg2 = pf_29 + 2 arg2 = pf_26 + 3 arg2 = pf_8 + 3 arg2 = pf_21 + 3 arg2 = pf_14 + 4 arg2 = pf_6 + 4 arg2 = pf_23 + 5 arg2 = pf_24 +``` + +Use latency.py to trace the operation latencies: +```bash +$ sudo python examples/usdt_sample/scripts/latency.py -p=25433 -f="pf_2" +Attaching probes to pid 25433 +Tracing... Hit Ctrl-C to end. +time(s) id input output start (ns) end (ns) duration (us) +0.000000000 7204 pf_28 resp_pf_28 11949439999644 11949489234565 49234 +0.100211886 7205 pf_28 resp_pf_28 11949540211530 11949574403064 34191 +0.300586675 7207 pf_21 resp_pf_21 11949740586319 11949742773571 2187 +0.400774366 7208 pf_28 resp_pf_28 11949840774010 11949859965498 19191 +0.701365719 7211 pf_21 resp_pf_21 11950141365363 11950152551131 11185 +0.901736620 7213 pf_25 resp_pf_25 11950341736264 11950347924333 6188 +1.102162217 7215 pf_21 resp_pf_21 11950542161861 11950567484183 25322 +1.302595998 7217 pf_23 resp_pf_23 11950742595642 11950761841242 19245 +1.503047601 7219 pf_2 resp_pf_2 11950943047245 11950951213474 8166 +1.703371457 7221 pf_27 resp_pf_27 11951143371101 11951176568051 33196 +2.104228899 7225 pf_24 resp_pf_24 11951544228543 11951588432769 44204 +2.304608175 7227 pf_21 resp_pf_21 11951744607819 11951790796068 46188 +2.404796703 7228 pf_21 resp_pf_21 11951844796347 11951877984160 33187 +2.605134923 7230 pf_27 resp_pf_27 11952045134567 11952065327660 20193 +3.206291642 7236 pf_29 resp_pf_29 11952646291286 11952660443343 14152 +3.506887492 7239 pf_21 resp_pf_21 11952946887136 11952995060987 48173 +``` + +Use lat_dist.py to trace the latency distribution: +```bash +$ sudo python examples/usdt_sample/scripts/lat_dist.py -p=25433 -i=30 -f="pf_20" +Attaching probes to pid 25433 +[11:23:47] + +Bucket ptr = 'pf_20' + latency (us) : count distribution + 0 -> 1 : 0 | | + 2 -> 3 : 0 | | + 4 -> 7 : 0 | | + 8 -> 15 : 0 | | + 16 -> 31 : 0 | | + 32 -> 63 : 0 | | + 64 -> 127 : 0 | | + 128 -> 255 : 0 | | + 256 -> 511 : 0 | | + 512 -> 1023 : 0 | | + 1024 -> 2047 : 1 |********** | + 2048 -> 4095 : 1 |********** | + 4096 -> 8191 : 0 | | + 8192 -> 16383 : 1 |********** | + 16384 -> 32767 : 4 |****************************************| + 32768 -> 65535 : 3 |****************************** | +``` + +Use lat_avg.py to trace the moving average of the latencies: +```bash +$ sudo python examples/usdt_sample/scripts/lat_avg.py -p=25433 -i=5 -c=10 -f="pf_2" +Attaching probes to pid 25433 +Tracing... Hit Ctrl-C to end. +[11:28:32] +input count latency (us) +pf_22 3 7807 +pf_23 4 36914 +pf_25 3 31473 +pf_28 2 10627 +pf_27 1 47174 +pf_29 1 8138 +pf_26 1 49121 +pf_20 2 29158 +``` diff --git a/examples/usdt_sample/usdt_sample_app1/CMakeLists.txt b/examples/usdt_sample/usdt_sample_app1/CMakeLists.txt new file mode 100755 index 0000000..b447e21 --- /dev/null +++ b/examples/usdt_sample/usdt_sample_app1/CMakeLists.txt @@ -0,0 +1,19 @@ +cmake_minimum_required(VERSION 3.0) +project(usdt_sample_app1) + +include_directories( + ${USDT_SAMPLE_LIB1_INCLUDE_DIR} +) + +link_directories( + ${USDT_SAMPLE_LIB1_LINK_DIR} +) + +add_executable( ${PROJECT_NAME} + ${CMAKE_CURRENT_SOURCE_DIR}/main.cpp +) + +target_link_libraries( ${PROJECT_NAME} + ${USDT_SAMPLE_LIB1_LIB} + pthread +) diff --git a/examples/usdt_sample/usdt_sample_app1/main.cpp b/examples/usdt_sample/usdt_sample_app1/main.cpp new file mode 100644 index 0000000..c75c783 --- /dev/null +++ b/examples/usdt_sample/usdt_sample_app1/main.cpp @@ -0,0 +1,145 @@ +// std +#include +#include +#include +#include +#include +#include +#include +#include +#include + +// gnu-c +#include +#include + +// usdt_sample_lib1 +#include "usdt_sample_lib1/lib1.h" + +void print_usage(int argc, char** argv) +{ + std::cout << "Usage:" << std::endl; + std::cout << argv[0] + << " " + << std::endl; + std::cout << "InputPrefix: Prefix of the input string to the operation. Default: dummy" << std::endl; + std::cout << "InputMinimum: Minimum number to make the input string to the operation somewhat unique. Default: 1" << std::endl; + std::cout << "InputMaximum: Maximum number to make the input string to the operation somewhat unique. Default: 50" << std::endl; + std::cout << "CallsPerSec: Rate of calls to the operation. Default: 10" << std::endl; + std::cout << "MinimumLatencyMs: Minimum latency to apply to the operation. Default: 20" << std::endl; + std::cout << "MaximumLatencyMs: Maximum latency to apply to the operation. Default: 40" << std::endl; +} + +int main(int argc, char** argv) +{ + std::string inputPrefix("dummy"); + std::uint32_t inputMinimum = 1; + std::uint32_t inputMaximum = 50; + std::uint32_t callsPerSec = 10; + std::uint32_t minLatMs = 20; + std::uint32_t maxLatMs = 40; + + try { + if (argc > 1) { + inputPrefix = argv[1]; + } + + if (argc > 2) { + inputMinimum = static_cast(std::max(1, std::min(50, std::atoi(argv[2])))); + } + + if (argc > 3) { + inputMaximum = static_cast(std::max(1, std::min(50, std::atoi(argv[3])))); + } + + if (argc > 4) { + callsPerSec = static_cast(std::max(1, std::min(50, std::atoi(argv[4])))); + } + + if (argc > 5) { + minLatMs = static_cast(std::max(1, std::min(50, std::atoi(argv[5])))); + } + + if (argc > 6) { + maxLatMs = static_cast(std::max(1, std::min(50, std::atoi(argv[6])))); + } + } + catch (const std::exception& exc) { + std::cout << "Exception while reading arguments: " << exc.what() << std::endl; + print_usage(argc, argv); + return -1; + } + catch (...) { + std::cout << "Unknown exception while reading arguments." << std::endl; + print_usage(argc, argv); + return -1; + } + + if (inputMinimum > inputMaximum) { + std::cout << "InputMinimum must be smaller than InputMaximum." << std::endl; + print_usage(argc, argv); + return -1; + } + + if (minLatMs > maxLatMs) { + std::cout << "MinimumLatencyMs must be smaller than MaximumLatencyMs." << std::endl; + print_usage(argc, argv); + return -1; + } + + std::cout << "Applying the following parameters:" << std::endl + << "Input prefix: " << inputPrefix << "." << std::endl + << "Input range: [" << inputMinimum << ", " << inputMaximum << "]." << std::endl + << "Calls Per Second: " << callsPerSec << "." << std::endl + << "Latency range: [" << minLatMs << ", " << maxLatMs << "] ms." << std::endl; + + const int sleepTimeMs = 1000 / callsPerSec; + OperationProvider op(minLatMs, maxLatMs); + + std::mutex queueMutex; + std::queue> responseQueue; + + auto dequeueFuture = std::async(std::launch::async, [&]() { + while (true) { + bool empty = false; + { + std::lock_guard lg(queueMutex); + empty = responseQueue.empty(); + } + + if (empty) { + std::this_thread::sleep_for(std::chrono::milliseconds(sleepTimeMs)); + continue; + } + + responseQueue.front().get(); + + // std::cout << "Removing item from queue." << std::endl; + std::lock_guard lg(queueMutex); + responseQueue.pop(); + } + }); + + std::random_device rd; + std::uniform_int_distribution<> dis(inputMinimum, inputMaximum); + + std::cout << "You can now run the bcc scripts, see usdt_sample.md for examples." << std::endl; + std::cout << "pid: " << ::getpid() << std::endl; + std::cout << "Press ctrl-c to exit." << std::endl; + while (true) { + std::ostringstream inputOss; + inputOss << inputPrefix << "_" << dis(rd); + auto responseFuture = op.executeAsync(OperationRequest(inputOss.str())); + + { + std::lock_guard lg(queueMutex); + responseQueue.push(responseFuture); + } + + // For a sample application, this is good enough to simulate callsPerSec. + std::this_thread::sleep_for(std::chrono::milliseconds(sleepTimeMs)); + } + + dequeueFuture.get(); + return 0; +} diff --git a/examples/usdt_sample/usdt_sample_lib1/CMakeLists.txt b/examples/usdt_sample/usdt_sample_lib1/CMakeLists.txt new file mode 100755 index 0000000..3f1c7b2 --- /dev/null +++ b/examples/usdt_sample/usdt_sample_lib1/CMakeLists.txt @@ -0,0 +1,45 @@ +cmake_minimum_required(VERSION 3.0) +project(usdt_sample_lib1) + +# Define variables. +set(USDT_SAMPLE_LIB1_INCLUDE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/include CACHE STRING "USDT_SAMPLE_LIB1_INCLUDE_DIR" FORCE) +set(USDT_SAMPLE_LIB1_SRC_DIR ${CMAKE_CURRENT_SOURCE_DIR}/src CACHE STRING "USDT_SAMPLE_LIB1_SRC_DIR" FORCE) +set(USDT_SAMPLE_LIB1_LINK_DIR ${CMAKE_CURRENT_BINARY_DIR} CACHE STRING "USDT_SAMPLE_LIB1_LINK_DIR" FORCE) +set(USDT_SAMPLE_LIB1_LIB ${PROJECT_NAME} CACHE STRING "USDT_SAMPLE_LIB1_LIB" FORCE) +set(USDT_SAMPLE_LIB1_GENERATED ${CMAKE_CURRENT_BINARY_DIR}/generated) + +## Start - N.B. Following section only relevant when using systemtap-sdt-devel. + +# Create usdt header file. +# N.B. ${USDT_SAMPLE_LIB1_INCLUDE_DIR}/usdt_sample_lib1/lib1_sdt.h must be removed manually in order for it to be (re-)created. +# i.e. after making changes to libt_sdt.d +#add_custom_command( +# OUTPUT ${USDT_SAMPLE_LIB1_INCLUDE_DIR}/usdt_sample_lib1/lib1_sdt.h +# PRE_BUILD +# COMMAND dtrace -h -s ${USDT_SAMPLE_LIB1_SRC_DIR}/lib1_sdt.d -o ${USDT_SAMPLE_LIB1_INCLUDE_DIR}/usdt_sample_lib1/lib1_sdt.h +# COMMENT "Create usdt probes header file" +#) + +# Create usdt object file. +#file(MAKE_DIRECTORY ${USDT_SAMPLE_LIB1_GENERATED}) +#add_custom_command( +# OUTPUT ${USDT_SAMPLE_LIB1_GENERATED}/lib1_sdt.o +# PRE_BUILD +# COMMAND dtrace -G -s ${USDT_SAMPLE_LIB1_SRC_DIR}/lib1_sdt.d -o ${USDT_SAMPLE_LIB1_GENERATED}/lib1_sdt.o +# COMMENT "Create usdt probes object file" +#) + +## End + +include_directories( + ${USDT_SAMPLE_LIB1_INCLUDE_DIR} + # For folly StaticTracepoint.h: + ${CMAKE_CURRENT_SOURCE_DIR}/../../../tests/python/include +) + +add_library( ${PROJECT_NAME} SHARED +## Only relevant when using systemtap-sdt-devel +# ${USDT_SAMPLE_LIB1_INCLUDE_DIR}/usdt_sample_lib1/lib1_sdt.h +# ${USDT_SAMPLE_LIB1_GENERATED}/lib1_sdt.o + ${USDT_SAMPLE_LIB1_SRC_DIR}/lib1.cpp +) diff --git a/examples/usdt_sample/usdt_sample_lib1/include/usdt_sample_lib1/lib1.h b/examples/usdt_sample/usdt_sample_lib1/include/usdt_sample_lib1/lib1.h new file mode 100644 index 0000000..1a9d13e --- /dev/null +++ b/examples/usdt_sample/usdt_sample_lib1/include/usdt_sample_lib1/lib1.h @@ -0,0 +1,58 @@ +#pragma once + +// std +#include +#include +#include +#include + +/** + * @brief Contains the operation request data. + */ +class OperationRequest +{ +public: + OperationRequest(const std::string& input); + const std::string& input() const { return _input; } + +private: + std::string _input; +}; + +/** + * @brief Contains the operation response data. + */ +class OperationResponse +{ +public: + OperationResponse(const std::string& output); + const std::string& output() const { return _output; } + +private: + std::string _output; +}; + +/** + * @brief Provides the operation. + */ +class OperationProvider +{ +public: + /** + * @brief Constructs an instance of OperationProvider. + * @param minLatencyMs The minimum latency to simulate for the operation. + * @param maxLatencyMs The maximum latency to simulate for the operation. + */ + OperationProvider(std::uint32_t minLatencyMs, std::uint32_t maxLatencyMs); + + /** + * @brief Asynchronously executes the operation. + * @param request The request input data for the operation. + * @return A shared_future of the response of the operation. + */ + std::shared_future executeAsync(const OperationRequest& request); + +private: + std::mt19937 _gen; ///< Used randomly determine an operation latency to simulate. + std::uniform_int_distribution<> _dis; ///< Used randomly determine an operation latency to simulate. +}; diff --git a/examples/usdt_sample/usdt_sample_lib1/include/usdt_sample_lib1/lib1_sdt.h b/examples/usdt_sample/usdt_sample_lib1/include/usdt_sample_lib1/lib1_sdt.h new file mode 100644 index 0000000..6b8a51a --- /dev/null +++ b/examples/usdt_sample/usdt_sample_lib1/include/usdt_sample_lib1/lib1_sdt.h @@ -0,0 +1,36 @@ +// N.B. This file is not used by this usdt_sample. Instead, the StaticTracepoint.h file from folly is used. +// It is here only for demonstration purposes. + +/* Generated by the Systemtap dtrace wrapper */ + + +#define _SDT_HAS_SEMAPHORES 1 + + +#define STAP_HAS_SEMAPHORES 1 /* deprecated */ + + +#include + +/* USDT_SAMPLE_LIB1_OPERATION_START ( uint64_t operation_id, const char * input ) */ +#if defined STAP_SDT_V1 +#define USDT_SAMPLE_LIB1_OPERATION_START_ENABLED() __builtin_expect (operation_start_semaphore, 0) +#define usdt_sample_lib1_operation_start_semaphore operation_start_semaphore +#else +#define USDT_SAMPLE_LIB1_OPERATION_START_ENABLED() __builtin_expect (usdt_sample_lib1_operation_start_semaphore, 0) +#endif +__extension__ extern unsigned short usdt_sample_lib1_operation_start_semaphore __attribute__ ((unused)) __attribute__ ((section (".probes"))); +#define USDT_SAMPLE_LIB1_OPERATION_START(arg1, arg2) \ +DTRACE_PROBE2 (usdt_sample_lib1, operation_start, arg1, arg2) + +/* USDT_SAMPLE_LIB1_OPERATION_END ( uint64_t operation_id, const char * output ) */ +#if defined STAP_SDT_V1 +#define USDT_SAMPLE_LIB1_OPERATION_END_ENABLED() __builtin_expect (operation_end_semaphore, 0) +#define usdt_sample_lib1_operation_end_semaphore operation_end_semaphore +#else +#define USDT_SAMPLE_LIB1_OPERATION_END_ENABLED() __builtin_expect (usdt_sample_lib1_operation_end_semaphore, 0) +#endif +__extension__ extern unsigned short usdt_sample_lib1_operation_end_semaphore __attribute__ ((unused)) __attribute__ ((section (".probes"))); +#define USDT_SAMPLE_LIB1_OPERATION_END(arg1, arg2) \ +DTRACE_PROBE2 (usdt_sample_lib1, operation_end, arg1, arg2) + diff --git a/examples/usdt_sample/usdt_sample_lib1/src/lib1.cpp b/examples/usdt_sample/usdt_sample_lib1/src/lib1.cpp new file mode 100644 index 0000000..f19a7ea --- /dev/null +++ b/examples/usdt_sample/usdt_sample_lib1/src/lib1.cpp @@ -0,0 +1,64 @@ +#include "usdt_sample_lib1/lib1.h" + +// std +#include +#include +#include +#include + +// usdt_sample_lib1 +#include "folly/tracing/StaticTracepoint.h" + +// When using systemtap-sdt-devel, the following file should be included: +// #include "usdt_sample_lib1/lib1_sdt.h" + +OperationRequest::OperationRequest(const std::string& input_) + : _input(input_) +{ +} + +OperationResponse::OperationResponse(const std::string& output_) + : _output(output_) +{ +} + +OperationProvider::OperationProvider(std::uint32_t minLatencyMs_, std::uint32_t maxLatencyMs_) + : _gen(std::random_device()()) + , _dis(minLatencyMs_, maxLatencyMs_) +{ +} + +std::shared_future OperationProvider::executeAsync(const OperationRequest& request) +{ + static std::atomic operationIdCounter(0); + std::uint64_t operationId = operationIdCounter++; + + FOLLY_SDT(usdt_sample_lib1, operation_start, operationId, request.input().c_str()); + +/* Below an example of how to use this sample with systemtap-sdt-devel: + if (USDT_SAMPLE_LIB1_OPERATION_START_ENABLED()) { + //std::cout << "operation_start probe enabled." << std::endl; + USDT_SAMPLE_LIB1_OPERATION_START(operationId, &inputBuf); + } +*/ + + auto latencyMs = _dis(_gen); + + return std::async(std::launch::async, [latencyMs, operationId, request]() { + std::this_thread::sleep_for(std::chrono::milliseconds(latencyMs)); + + auto output = std::string("resp_") + request.input(); + OperationResponse response(output); + + FOLLY_SDT(usdt_sample_lib1, operation_end, operationId, response.output().c_str()); + +/* Below an example of how to use this sample with systemtap-sdt-devel: + if (USDT_SAMPLE_LIB1_OPERATION_END_ENABLED()) { + //std::cout << "operation_end probe enabled." << std::endl; + USDT_SAMPLE_LIB1_OPERATION_END(operationId, &outputBuf); + } +*/ + + return response; + }); +} diff --git a/examples/usdt_sample/usdt_sample_lib1/src/lib1_sdt.d b/examples/usdt_sample/usdt_sample_lib1/src/lib1_sdt.d new file mode 100644 index 0000000..4f7129e --- /dev/null +++ b/examples/usdt_sample/usdt_sample_lib1/src/lib1_sdt.d @@ -0,0 +1,7 @@ +# This file is only relevant when using systemtap-sdt-devel (see usdt_sample.md). +# This usdt_sample uses the StaticTracepoint.h header file (from folly) instead. +provider usdt_sample_lib1 +{ + probe operation_start(uint64_t operation_id, const char* input); + probe operation_end(uint64_t operation_id, const char* output); +}; diff --git a/tests/python/test_usdt.py b/tests/python/test_usdt.py index 0aa4c95..d1f0ca0 100755 --- a/tests/python/test_usdt.py +++ b/tests/python/test_usdt.py @@ -11,6 +11,7 @@ from unittest import main, TestCase from subprocess import Popen, PIPE from tempfile import NamedTemporaryFile import ctypes as ct +import inspect import os import signal @@ -19,18 +20,36 @@ class TestUDST(TestCase): # Application, minimum, to define three trace points app_text = b""" #include -#include +#include +#include +#include +#include "folly/tracing/StaticTracepoint.h" int main() { char s[100]; int i, a = 200, b = 40; for (i = 0; i < 100; i++) s[i] = (i & 7) + (i & 6); + uint64_t j = 0; + char s1[64]; + const char* str = "str"; + size_t len = strlen(str); while (1) { FOLLY_SDT(test, probe_point_1, s[7], b); FOLLY_SDT(test, probe_point_3, a, b); FOLLY_SDT(test, probe_point_1, s[4], a); FOLLY_SDT(test, probe_point_2, 5, s[10]); FOLLY_SDT(test, probe_point_3, s[4], s[7]); + + memset(&s1, '\0', sizeof(s1)); + strncpy(s1, str, len); + snprintf(s1 + len, sizeof(s1) - len, "%d", j); + FOLLY_SDT(test, probe_point_4, j++, &s1); + + memset(&s1, '\0', sizeof(s1)); + strncpy(s1, str, len); + snprintf(s1 + len, sizeof(s1) - len, "%d", j); + FOLLY_SDT(test, probe_point_5, &s1, j++); + sleep(1); } return 1; @@ -38,6 +57,7 @@ int main() { """ # BPF program self.bpf_text = """ +#include #include struct probe_result_t1 { @@ -55,10 +75,23 @@ struct probe_result_t3 { int v2; }; +struct probe_result_t4 { + u64 v1; + char v2[8]; +}; + +struct probe_result_t5 { + char v1[8]; + u64 v2; +}; + BPF_PERF_OUTPUT(event1); BPF_PERF_OUTPUT(event2); BPF_PERF_OUTPUT(event3); -int do_trace(struct pt_regs *ctx) { +BPF_PERF_OUTPUT(event4); +BPF_PERF_OUTPUT(event5); + +int do_trace1(struct pt_regs *ctx) { struct probe_result_t1 result = {}; bpf_usdt_readarg(1, ctx, &result.v1); bpf_usdt_readarg(2, ctx, &result.v2); @@ -79,12 +112,26 @@ int do_trace3(struct pt_regs *ctx) { event3.perf_submit(ctx, &result, sizeof(result)); return 0; } +int do_trace4(struct pt_regs *ctx) { + struct probe_result_t4 result = {}; + bpf_usdt_readarg(1, ctx, &result.v1); + bpf_usdt_readarg_p(2, ctx, &result.v2, sizeof(result.v2)); + event4.perf_submit(ctx, &result, sizeof(result)); + return 0; +} +int do_trace5(struct pt_regs *ctx) { + struct probe_result_t5 result = {}; + bpf_usdt_readarg_p(1, ctx, &result.v1, sizeof(result.v1)); + bpf_usdt_readarg(2, ctx, &result.v2); + event5.perf_submit(ctx, &result, sizeof(result)); + return 0; +} """ # Compile and run the application self.ftemp = NamedTemporaryFile(delete=False) self.ftemp.close() - comp = Popen(["gcc", "-I", "%s/include" % os.getcwd(), + comp = Popen(["gcc", "-I", "%s/include" % os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe()))), "-x", "c", "-o", self.ftemp.name, "-"], stdin=PIPE) comp.stdin.write(app_text) @@ -95,10 +142,12 @@ int do_trace3(struct pt_regs *ctx) { def test_attach1(self): # enable USDT probe from given PID and verifier generated BPF programs u = USDT(pid=int(self.app.pid)) - u.enable_probe(probe="probe_point_1", fn_name="do_trace") + u.enable_probe(probe="probe_point_1", fn_name="do_trace1") u.enable_probe(probe="probe_point_2", fn_name="do_trace2") u.enable_probe(probe="probe_point_3", fn_name="do_trace3") - b = BPF(text=self.bpf_text, usdt_contexts=[u]) + u.enable_probe(probe="probe_point_4", fn_name="do_trace4") + u.enable_probe(probe="probe_point_5", fn_name="do_trace5") + b = BPF(text=self.bpf_text, usdt_contexts=[u], debug=4) # Event states for each event: # 0 - probe not caught, 1 - probe caught with correct value, @@ -120,6 +169,14 @@ int do_trace3(struct pt_regs *ctx) { _fields_ = [("v1", ct.c_int), ("v2", ct.c_int)] + class Data4(ct.Structure): + _fields_ = [("v1", ct.c_ulonglong), + ("v2", ct.c_char * 64)] + + class Data5(ct.Structure): + _fields_ = [("v1", ct.c_char * 64), + ("v2", ct.c_ulonglong)] + def check_event_val(event, event_state, v1, v2, v3, v4): if ((event.v1 == v1 and event.v2 == v2) or (event.v1 == v3 and event.v2 == v4)): if (event_state == 0 or event_state == 1): @@ -139,10 +196,20 @@ int do_trace3(struct pt_regs *ctx) { event = ct.cast(data, ct.POINTER(Data3)).contents self.evt_st_3 = check_event_val(event, self.evt_st_3, 200, 40, 8, 13) + def print_event4(cpu, data, size): + event = ct.cast(data, ct.POINTER(Data4)).contents + print("%s" % event.v2) + + def print_event5(cpu, data, size): + event = ct.cast(data, ct.POINTER(Data5)).contents + print("%s" % event.v1) + # loop with callback to print_event b["event1"].open_perf_buffer(print_event1) b["event2"].open_perf_buffer(print_event2) b["event3"].open_perf_buffer(print_event3) + b["event4"].open_perf_buffer(print_event4) + b["event5"].open_perf_buffer(print_event5) # three iterations to make sure we get some probes and have time to process them for i in range(3): -- 2.7.4