From a1ac2f90d9cc9912721208c219e0361f50ddebbb Mon Sep 17 00:00:00 2001 From: Jean-Tiare Le Bigot Date: Fri, 4 Mar 2016 21:45:32 +0100 Subject: [PATCH] add solisten tool. Currently TCP only. --- README.md | 1 + tools/solisten.py | 217 +++++++++++++++++++++++++++++++++++++++++++++ tools/solisten_example.txt | 40 +++++++++ 3 files changed, 258 insertions(+) create mode 100644 tools/solisten.py create mode 100644 tools/solisten_example.txt diff --git a/README.md b/README.md index 29c0f0e..244f33c 100644 --- a/README.md +++ b/README.md @@ -100,6 +100,7 @@ Examples: - tools/[pidpersec](tools/pidpersec.py): Count new processes (via fork). [Examples](tools/pidpersec_example.txt). - tools/[runqlat](tools/runqlat.py): Run queue (scheduler) latency as a histogram. [Examples](tools/runqlat_example.txt). - tools/[softirqs](tools/softirqs.py): Measure soft IRQ (soft interrupt) event time. [Examples](tools/softirqs_example.txt). +- tools/[solisten](tools/solisten.py): Trace TCP socket listen. [Examples](tools/solisten_example.txt). - tools/[stackcount](tools/stackcount.py): Count kernel function calls and their stack traces. [Examples](tools/stackcount_example.txt). - tools/[stacksnoop](tools/stacksnoop.py): Trace a kernel function and print all kernel stack traces. [Examples](tools/stacksnoop_example.txt). - tools/[statsnoop](tools/statsnoop.py): Trace stat() syscalls. [Examples](tools/statsnoop_example.txt). diff --git a/tools/solisten.py b/tools/solisten.py new file mode 100644 index 0000000..6ff3d4d --- /dev/null +++ b/tools/solisten.py @@ -0,0 +1,217 @@ +#!/usr/bin/env python +# +# solisten Trace TCP listen events +# For Linux, uses BCC, eBPF. Embedded C. +# +# USAGE: solisten.py [-h] [-p PID] [--show-netns] +# +# This is provided as a basic example of TCP connection & socket tracing. +# It could be usefull in scenarios where load balancers needs to be updated +# dynamically as application is fully initialized. +# +# All IPv4 listen attempts are traced, even if they ultimately fail or the +# the listening program is not willing to accept(). +# +# Copyright (c) 2016 Jean-Tiare Le Bigot. +# Licensed under the Apache License, Version 2.0 (the "License") +# +# 04-Mar-2016 Jean-Tiare Le Bigot Created this. + +import os +import socket +import netaddr +import argparse +from bcc import BPF +import ctypes as ct + +# Arguments +examples = """Examples: + ./solisten.py # Stream socket listen + ./solisten.py -p 1234 # Stream socket listen for specified PID only + ./solisten.py --netns 4242 # Stream socket listen for specified network namespace ID only + ./solisten.py --show-netns # Show network namespace ID. Probably usefull if you run containers +""" + +parser = argparse.ArgumentParser( + description="Stream sockets listen", + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=examples) +parser.add_argument("--show-netns", action="store_true", + help="show network namespace") +parser.add_argument("-p", "--pid", default=0, type=int, + help="trace this PID only") +parser.add_argument("-n", "--netns", default=0, type=int, + help="trace this Network Namespace only") + + +# BPF Program +bpf_text = """ +#include +#include +#include +#include + +// Endian conversion. We can't use kernel version here as it uses inline +// assembly, neither libc version as we can't import it here. Adapted from both. +#if defined(__LITTLE_ENDIAN) +#define bcc_be32_to_cpu(x) ((u32)(__builtin_bswap32)((x))) +#define bcc_be64_to_cpu(x) ((u64)(__builtin_bswap64)((x))) +#elif defined(__BIG_ENDIAN) +#define bcc_be32_to_cpu(x) (x) +#define bcc_be64_to_cpu(x) (x) +#else +#error Host endianness not defined +#endif + +// Common structure for UDP/TCP IPv4/IPv6 +struct listen_evt_t { + u64 ts_us; + u64 pid_tgid; + u64 backlog; + u64 netns; + u64 proto; // familiy << 16 | type + u64 lport; // use only 16 bits + u64 laddr[2]; // IPv4: store in laddr[0] + char task[TASK_COMM_LEN]; +}; +BPF_PERF_OUTPUT(listen_evt); + +// Send an event for each IPv4 listen with PID, bound address and port +int kprobe__inet_listen(struct pt_regs *ctx, struct socket *sock, int backlog) +{ + // cast types. Intermediate cast not needed, kept for readability + struct sock *sk = sock->sk; + struct inet_sock *inet = inet_sk(sk); + + // Built event for userland + struct listen_evt_t evt = { + .ts_us = bpf_ktime_get_ns() / 1000, + .backlog = backlog, + }; + + // Get process comm. Needs LLVM >= 3.7.1 see https://github.com/iovisor/bcc/issues/393 + bpf_get_current_comm(evt.task, TASK_COMM_LEN); + + // Get socket IP family + u16 family = sk->__sk_common.skc_family; + evt.proto = family << 16 | SOCK_STREAM; + + // Get PID + evt.pid_tgid = bpf_get_current_pid_tgid(); + + ##FILTER_PID## + + // Get port + bpf_probe_read(&evt.lport, sizeof(u16), &(inet->inet_sport)); + evt.lport = ntohs(evt.lport); + + // Get network namespace id, if kernel supports it +#ifdef CONFIG_NET_NS + evt.netns = sk->__sk_common.skc_net.net->ns.inum; +#else + evt.netns = 0; +#endif + + ##FILTER_NETNS## + + // Get IP + if (family == AF_INET) { + bpf_probe_read(evt.laddr, sizeof(u32), &(inet->inet_rcv_saddr)); + evt.laddr[0] = bcc_be32_to_cpu(evt.laddr[0]); + } else if (family == AF_INET6) { + bpf_probe_read(evt.laddr, sizeof(evt.laddr), sk->__sk_common.skc_v6_rcv_saddr.in6_u.u6_addr32); + evt.laddr[0] = bcc_be64_to_cpu(evt.laddr[0]); + evt.laddr[1] = bcc_be64_to_cpu(evt.laddr[1]); + } + + // Send event to userland + listen_evt.perf_submit(ctx, &evt, sizeof(evt)); + + return 0; +}; +""" + +# event data +TASK_COMM_LEN = 16 # linux/sched.h +class ListenEvt(ct.Structure): + _fields_ = [ + ("ts_us", ct.c_ulonglong), + ("pid_tgid", ct.c_ulonglong), + ("backlog", ct.c_ulonglong), + ("netns", ct.c_ulonglong), + ("proto", ct.c_ulonglong), + ("lport", ct.c_ulonglong), + ("laddr", ct.c_ulonglong * 2), + ("task", ct.c_char * TASK_COMM_LEN) + ] + + # TODO: properties to unpack protocol / ip / pid / tgid ... + +# Format output +def event_printer(show_netns): + def print_event(cpu, data, size): + # Decode event + event = ct.cast(data, ct.POINTER(ListenEvt)).contents + + pid = event.pid_tgid & 0xffffffff + proto_family = event.proto & 0xff + proto_type = event.proto >> 16 & 0xff + + if proto_family == socket.SOCK_STREAM: + protocol = "TCP" + elif proto_family == socket.SOCK_DGRAM: + protocol = "UDP" + else: + protocol = "UNK" + + address = "" + if proto_type == socket.AF_INET: + protocol += "v4" + address = netaddr.IPAddress(event.laddr[0]) + elif proto_type == socket.AF_INET6: + address = netaddr.IPAddress(event.laddr[0]<<64 | event.laddr[1], version=6) + protocol += "v6" + + # Display + if show_netns: + print("%-6d %-12.12s %-12s %-6s %-8s %-5s %-39s" % ( + pid, event.task, event.netns, protocol, event.backlog, + event.lport, address, + )) + else: + print("%-6d %-12.12s %-6s %-8s %-5s %-39s" % ( + pid, event.task, protocol, event.backlog, + event.lport, address, + )) + + return print_event + +if __name__ == "__main__": + # Parse arguments + args = parser.parse_args() + + pid_filter = "" + netns_filter = "" + + if args.pid: + pid_filter = "if (evt.pid_tgid != %d) return 0;" % args.pid + if args.netns: + netns_filter = "if (evt.netns != %d) return 0;" % args.netns + + bpf_text = bpf_text.replace("##FILTER_PID##", pid_filter) + bpf_text = bpf_text.replace("##FILTER_NETNS##", netns_filter) + + # Initialize BPF + b = BPF(text=bpf_text) + b["listen_evt"].open_perf_buffer(event_printer(args.show_netns)) + + # Print headers + if args.show_netns: + print("%-6s %-12s %-12s %-6s %-8s %-5s %-39s" % ("PID", "COMM", "NETNS", "PROTO", "BACKLOG", "PORT", "ADDR")) + else: + print("%-6s %-12s %-6s %-8s %-5s %-39s" % ("PID", "COMM", "PROTO", "BACKLOG", "PORT", "ADDR")) + + # Read events + while 1: + b.kprobe_poll() + diff --git a/tools/solisten_example.txt b/tools/solisten_example.txt new file mode 100644 index 0000000..f7ace8c --- /dev/null +++ b/tools/solisten_example.txt @@ -0,0 +1,40 @@ +Demonstrations of solisten.py, the Linux eBPF/bcc version. + + +This tool traces the kernel function called when a program wants to listen +for TCP connections. It will not see UDP neither UNIX domain sockets. + +It can be used to dynamically update a load balancer as a program is actually +ready to accept connexion, hence avoiding the "downtime" while it is initializing. + +# ./solisten.py --show-netns +PID COMM NETNS PROTO BACKLOG ADDR PORT +3643 nc 4026531957 TCPv4 1 0.0.0.0 4242 +3659 nc 4026531957 TCPv6 1 2001:f0d0:1002:51::4 4242 +4221 redis-server 4026532165 TCPv6 128 :: 6379 +4221 redis-server 4026532165 TCPv4 128 0.0.0.0 6379 +6067 nginx 4026531957 TCPv4 128 0.0.0.0 80 +6067 nginx 4026531957 TCPv6 128 :: 80 +6069 nginx 4026531957 TCPv4 128 0.0.0.0 80 +6069 nginx 4026531957 TCPv6 128 :: 80 +6069 nginx 4026531957 TCPv4 128 0.0.0.0 80 +6069 nginx 4026531957 TCPv6 128 :: 80 + +This output show the listen event from 3 programs. Netcat was started twice as +shown by the 2 different PIDs. The first time on the wilcard IPv4, the second +time on an IPv6. Netcat being a "one shot" program. It can accept a single +connection, hence the backlog of "1". + +The next program is redis-server. As the netns column shows, it is in a +different network namespace than netcat and nginx. In this specific case +it was launched in a docker container. It listens both on IPv4 and IPv4 +with up to 128 pending connections. + +Determining the actual container is out if the scope of this tool. It could +be derived by scrapping /proc//cgroup. Note that this is racy. + +The overhead of this tool is negligeable as it traces listen() calls which are +invoked in the initialization path of a program. The operation part will remain +unaffected. In particular, accept() calls will not be affected. Neither +individual read() and write(). + -- 2.7.4