From 0ad095bb0ed8b3b92c536cc5b1eb96296f6fde23 Mon Sep 17 00:00:00 2001 From: Adrian Szyndela Date: Tue, 19 Oct 2021 16:25:56 +0200 Subject: [PATCH] dbus-latency: make it work Apparently, dbus-latency was measuring something, but it was definitely NOT latency. This commit fixes numerous bugs and misdesigned parts: - enums are 32-bit also on 64-bit platforms; - uprobes for gio were set on g_dbus_message_lock, which is called not only during sending and receiving messages, but also in other cases; now, there are two uprobes for glib: one for sending, one for receiving; - uprobes for libdbus were set on _dbus_connection_message_sent_unlocked() which is called _after_ sending and dbus_connection_dispatch() which is not called for replies at all; - intervals were mislabeled as "ms" instead of "us"; - dbus-daemon sends and receives messages with the same set of functions as libdbus clients; if measuring starts on sending and ends on receiving it was only a half of the job when we counted dbus-daemon; it is now excluded - lots of messages were lost, for examples some signals could be never received; this clogged up the maps up to 10240 elements, and then measuring stopped; we only count method calls now; - message about Ctrl-C was issued too soon, even before compiling; now, it is in the waiting loop. This part still needs fixing: - identifying messages by their serial number. It works quite well with randomized serial numbers in the libraries; - sd_bus is not supported. Change-Id: Ibc42375402c0d832e95c9b10742443839092171e --- src/cc/export/host32.h | 2 + src/cc/export/host64.h | 2 + src/cc/hostcompat/dbus/gio.h | 8 +- src/cc/hostcompat/dbus/libdbus.h | 3 + tools/dbus-latency.c | 168 ++++++++++++++++++++++++++----- tools/dbus-latency.py | 15 +-- 6 files changed, 164 insertions(+), 34 deletions(-) diff --git a/src/cc/export/host32.h b/src/cc/export/host32.h index 7ad59128..42daadcd 100644 --- a/src/cc/export/host32.h +++ b/src/cc/export/host32.h @@ -6,5 +6,7 @@ typedef int32_t int_t; typedef uint32_t ptr_t; typedef uint32_t ulong_t; typedef int32_t long_t; +typedef int32_t enum_t; +typedef int32_t bool_t; #endif // HOST32_H)*****" diff --git a/src/cc/export/host64.h b/src/cc/export/host64.h index 213a9bb1..b582c55f 100644 --- a/src/cc/export/host64.h +++ b/src/cc/export/host64.h @@ -6,5 +6,7 @@ typedef int64_t int_t; typedef uint64_t ptr_t; typedef uint64_t ulong_t; typedef int64_t long_t; +typedef int32_t enum_t; +typedef int32_t bool_t; #endif // HOST64_H)*****" diff --git a/src/cc/hostcompat/dbus/gio.h b/src/cc/hostcompat/dbus/gio.h index e2db5c81..eadf65d2 100644 --- a/src/cc/hostcompat/dbus/gio.h +++ b/src/cc/hostcompat/dbus/gio.h @@ -72,10 +72,10 @@ struct GVariant { struct GDBusMessage { struct GObject parent_instance; - uint_t type; // GDBusMessageType type; - uint_t flags; // GDBusMessageFlags flags; - int_t locked; // gboolean - uint_t byte_order; // GDBusMessageByteOrder byte_order; + enum_t type; // GDBusMessageType type; + enum_t flags; // GDBusMessageFlags flags; + bool_t locked; // gboolean + enum_t byte_order; // GDBusMessageByteOrder byte_order; unsigned char major_protocol_version; uint32_t serial; ptr_t headers; // GHashTable *headers; diff --git a/src/cc/hostcompat/dbus/libdbus.h b/src/cc/hostcompat/dbus/libdbus.h index ee353020..f492890e 100644 --- a/src/cc/hostcompat/dbus/libdbus.h +++ b/src/cc/hostcompat/dbus/libdbus.h @@ -1,6 +1,9 @@ #ifndef HOSTCOMPAT_LIBDBUS_H #define HOSTCOMPAT_LIBDBUS_H +#define DBUS_HEADER_FIELD_DESTINATION 6 +#define DBUS_MESSAGE_TYPE_METHOD_CALL 1 + struct DBusString { ptr_t str; int_t len; diff --git a/tools/dbus-latency.c b/tools/dbus-latency.c index 91b4cb47..608552d2 100644 --- a/tools/dbus-latency.c +++ b/tools/dbus-latency.c @@ -36,24 +36,133 @@ static void get_process_data(struct data_t *data) { data->ts = bpf_ktime_get_ns() / 1000; } -static int get_delay(int serial, struct data_t receiver_data) { - int delta; +static bool exclude_dbus_daemon(struct data_t *data) { + char name[] = "dbus-daemon"; + int i; + // manual strcmp as we don't have it in eBPF code + for (i = 0; i < sizeof(name); i++) + if (data->comm[i] != name[i]) + return false; + return true; +} + +static bool check_field(const char *header, size_t pos, char field_id) +{ + return header[pos] == field_id; +} + +static size_t next_field(const char *header, size_t pos) +{ + // Fields are made of meta data and value. + // Meta data is 32-bit long, with value's type at 3rd byte. + char field_type = header[pos + 2]; + unsigned field_size; + + // Fields are strings (type 's', 'g' and 'g') or uint32. + // Strings are made of string length (uint32) and actual string. + // Let's read the string size. + if (field_type == 's' || field_type == 'o' || field_type == 'g') + field_size = *(uint32_t *)(header + pos + sizeof(uint32_t)); + else + field_size = sizeof(uint32_t); + + pos += field_size + sizeof(uint32_t); // skip meta data and value + pos = (pos + 7) & ~7; // align to 8 for next field + return pos; +} + +static const char *get_field_ptr(const char *header, size_t pos) +{ + // The field's type is at 3rd byte + char field_type = header[pos + 2]; + const char *ret = header + pos + sizeof(uint32_t); // skip meta data + + if (field_type == 's' || field_type == 'o' || field_type == 'g') + ret += sizeof(uint32_t); // skip string length + return ret; +} + +static const char *get_field_value(struct DBusMessage *msg, char field_id) +{ + // DBUS1 marshalling + // TODO GVariant, for kdbus if needed + const char *header = msg->header.data.str; + size_t pos = 16; // start of the fields + + // loop for 9 possible headers, unrolled manually + // kernel does not want to accept this loop: + // + // for (i = 0; i < 9; i++) { + // if (check_field(header, pos, field_id) + // return get_field_ptr(header, pos); + // pos = next_field(header, pos); + // } + // + if (check_field(header, pos, field_id)) + return get_field_ptr(header, pos); + pos = next_field(header, pos); + if (check_field(header, pos, field_id)) + return get_field_ptr(header, pos); + pos = next_field(header, pos); + if (check_field(header, pos, field_id)) + return get_field_ptr(header, pos); + pos = next_field(header, pos); + if (check_field(header, pos, field_id)) + return get_field_ptr(header, pos); + pos = next_field(header, pos); + if (check_field(header, pos, field_id)) + return get_field_ptr(header, pos); + pos = next_field(header, pos); + if (check_field(header, pos, field_id)) + return get_field_ptr(header, pos); + pos = next_field(header, pos); + if (check_field(header, pos, field_id)) + return get_field_ptr(header, pos); + pos = next_field(header, pos); + if (check_field(header, pos, field_id)) + return get_field_ptr(header, pos); + pos = next_field(header, pos); + if (check_field(header, pos, field_id)) + return get_field_ptr(header, pos); + return NULL; +} + +static bool exclude_destination_org_freedesktop_DBus_and_empty(struct DBusMessage *msg) +{ + char name[] = "org.freedesktop.DBus"; + + const char *destination = get_field_value(msg, DBUS_HEADER_FIELD_DESTINATION); + if (destination) { + int i; + // manual strcmp as we don't have it in eBPF code + for (i = 0; i < sizeof(name); i++) + if (destination[i] != name[i]) + return false; + return true; + } + return true; +} + +static int get_delay(int serial, struct data_t *receiver_data) { + int64_t delta; struct stats_t *stat; struct data_t *sender_data = msg_sent_addr.lookup(&serial); - if (sender_data != 0 && sender_data->pid != receiver_data.pid) { + if (sender_data != 0) { + struct data_t s_data = *sender_data; msg_sent_addr.delete(&serial); - delta = receiver_data.ts - sender_data->ts; - if (delta > 5000 || delta <= 0) { + delta = receiver_data->ts - s_data.ts; + if (delta <= 0) { + bpf_trace_printk("wrong delta: %lld\n", delta); return 0; } latency_histo.increment(bpf_log2l(delta)); - receiver_data.pid = sender_data->pid; - receiver_data.ts = 0; - bpf_probe_read(&receiver_data.comm, sizeof(receiver_data.comm), sender_data->comm); - stat = msg_latency.lookup(&receiver_data); + receiver_data->pid = s_data.pid; + receiver_data->ts = 0; + bpf_probe_read(&receiver_data->comm, sizeof(receiver_data->comm), s_data.comm); + stat = msg_latency.lookup(receiver_data); if (stat == 0) { struct stats_t stats = {delta, delta, 1, delta}; - msg_latency.insert(&receiver_data, &stats); + msg_latency.insert(receiver_data, &stats); return 0; } struct stats_t stats = {stat->min, stat->avg, stat->count, stat->max}; @@ -73,26 +182,27 @@ static int get_delay(int serial, struct data_t receiver_data) { } avg += delta; stats.avg = (avg / stats.count); - msg_latency.update(&receiver_data, &stats); + msg_latency.update(receiver_data, &stats); } return 0; } -int get_msg_addr(struct pt_regs *ctx, struct DBusConnection *conn, struct DBusMessage *msg){ +int get_msg_addr(struct pt_regs *ctx, struct DBusMessage *msg, const struct DBusString **h, const struct DBusString **b){ struct data_t data = {}; get_process_data(&data); struct header_data *header = msg->header.data.str; u32 serial = header->serial; + if (exclude_dbus_daemon(&data)) + return 0; msg_sent_addr.update(&serial,&data); return 0; } -int get_msg_latency(struct pt_regs *ctx, struct DBusConnection *conn) { +int get_msg_latency(struct pt_regs *ctx, struct DBusConnection *conn, struct DBusList *link) { struct data_t receiver_data = {}; struct DBusMessage *msg = 0; struct header_data *header = 0; int serial = 0; - struct DBusList *link = conn->incoming_messages; if (link == 0) { return 0; @@ -102,23 +212,33 @@ int get_msg_latency(struct pt_regs *ctx, struct DBusConnection *conn) { msg = link->data; header = msg->header.data.str; serial = header->serial; - get_delay(serial, receiver_data); + + if (exclude_dbus_daemon(&receiver_data)) { + // check if message destination is org.freedesktop.DBus + // we don't take into account messages directed to dbus-daemon + if (exclude_destination_org_freedesktop_DBus_and_empty(msg)) + msg_sent_addr.delete(&serial); + return 0; + } + + get_delay(serial, &receiver_data); return 0; } -int g_get_latency(struct pt_regs *ctx, struct GDBusMessage *msg) { +int g_get_msg_addr(struct pt_regs *ctx, struct GDBusMessage *msg, void *user_data) { struct data_t *receiver_data; u32 serial = msg->serial; - receiver_data = msg_sent_addr.lookup(&serial); struct data_t data = {}; get_process_data(&data); - data.ts = 0; - if ((receiver_data == 0) || (receiver_data->pid == data.pid)){ - msg_sent_addr.delete(&serial); - get_delay(serial, data); - return 0; - } msg_sent_addr.update(&serial, &data); - bpf_trace_printk("sender: %d\n", serial); + return 0; +} + +int g_get_msg_latency(struct pt_regs *ctx, struct GDBusMessage *msg, void *user_data) { + struct data_t *receiver_data; + u32 serial = msg->serial; + struct data_t data = {}; + get_process_data(&data); + get_delay(serial, &data); return 0; } diff --git a/tools/dbus-latency.py b/tools/dbus-latency.py index ea1aae33..d846ca1e 100755 --- a/tools/dbus-latency.py +++ b/tools/dbus-latency.py @@ -9,9 +9,8 @@ def usage(): print("USAGE: %s [--process|histogram]" % argv[0]) exit() -print("Hit Ctrl-C to end.") count = 100 -interval = 5 +interval = 2 loop = 0 mode = "histogram" @@ -26,20 +25,24 @@ else: b = BPF(src_file="dbus-latency.c") -b.attach_uprobe(name="dbus-1", sym="_dbus_connection_message_sent_unlocked", fn_name="get_msg_addr") -b.attach_uprobe(name="dbus-1", sym="dbus_connection_dispatch", fn_name="get_msg_latency") -b.attach_uprobe(name="gio-2.0", sym="g_dbus_message_lock", fn_name="g_get_latency") +b.attach_uprobe(name="dbus-1", sym="_dbus_message_get_network_data", fn_name="get_msg_addr") +b.attach_uprobe(name="dbus-1", sym="_dbus_connection_queue_received_message_link", fn_name="get_msg_latency") +b.attach_uprobe(name="gio-2.0", sym="on_worker_message_about_to_be_sent", fn_name="g_get_msg_addr") +b.attach_uprobe(name="gio-2.0", sym="on_worker_message_received", fn_name="g_get_msg_latency") +print("Hit Ctrl-C to end.") while (1): sleep(interval) loop = loop + 1 print("\n%d:" % loop) if mode =="histogram": - b["latency_histo"].print_log2_hist("ms") + b["latency_histo"].print_log2_hist("us") else: print ("%10s %20s %10s %10s %10s" % ("PID", "COMM", "MIN", "AVG", "MAX")) latency = b["msg_latency"] for v, p in sorted(latency.items(), key=lambda latency: latency[1].avg, reverse=True): print("%10d %20s %10d %10d %10d" % (v.pid, v.comm.encode('string-escape'), int(p.min), int(p.avg), int(p.max))) + print(" messages being transferred: %d" % len(b["msg_sent_addr"].items())) + -- 2.34.1