From 2c1799c9f901ec68b1bf421f89e93340c33c620e Mon Sep 17 00:00:00 2001 From: Rinat Ibragimov Date: Tue, 11 Jul 2017 21:14:08 +0300 Subject: [PATCH] memleak: expand allocator coverage (#1214) * memleak: handle libc allocation functions other than malloc * memleak: use tracepoints to track kernel allocations * memleak: add combined-only mode With large number of outstanding allocations, amount of data passed from kernel becomes large, which slows everything down. This patch calculates allocation statistics inside kernel, allowing user- space part to pull combined statistics data only, thus significantly reducing amount of passed data. * memleak: increase hashtable capacities There are a lot of allocations happen in kernel. Default values are not enough to keep up. * test: add a test for the memleak tool --- man/man8/memleak.8 | 31 +- tests/python/CMakeLists.txt | 2 + tests/python/test_tools_memleak.py | 121 ++++++++ tests/python/test_tools_memleak_leaker_app.c | 88 ++++++ tools/memleak.py | 300 +++++++++++++++++-- tools/memleak_example.txt | 26 +- 6 files changed, 522 insertions(+), 46 deletions(-) create mode 100755 tests/python/test_tools_memleak.py create mode 100644 tests/python/test_tools_memleak_leaker_app.c diff --git a/man/man8/memleak.8 b/man/man8/memleak.8 index ddce0e4a..8191b387 100644 --- a/man/man8/memleak.8 +++ b/man/man8/memleak.8 @@ -2,21 +2,29 @@ .SH NAME memleak \- Print a summary of outstanding allocations and their call stacks to detect memory leaks. Uses Linux eBPF/bcc. .SH SYNOPSIS -.B memleak [-h] [-p PID] [-t] [-a] [-o OLDER] [-c COMMAND] [-s SAMPLE_RATE] -[-T TOP] [-z MIN_SIZE] [-Z MAX_SIZE] [-O OBJ] [INTERVAL] [COUNT] +.B memleak [-h] [-p PID] [-t] [-a] [-o OLDER] [-c COMMAND] [--combined-only] +[-s SAMPLE_RATE] [-T TOP] [-z MIN_SIZE] [-Z MAX_SIZE] [-O OBJ] [INTERVAL] +[COUNT] .SH DESCRIPTION memleak traces and matches memory allocation and deallocation requests, and collects call stacks for each allocation. memleak can then print a summary of which call stacks performed allocations that weren't subsequently freed. -When tracing a specific process, memleak instruments malloc and free from libc. -When tracing all processes, memleak instruments kmalloc and kfree. +When tracing a specific process, memleak instruments a list of allocation +functions from libc, specifically: malloc, calloc, realloc, posix_memalign, +valloc, memalign, pvalloc, aligned_alloc, and free. +When tracing all processes, memleak instruments kmalloc/kfree, +kmem_cache_alloc/kmem_cache_free, and also page allocations made by +get_free_pages/free_pages. memleak may introduce significant overhead when tracing processes that allocate and free many blocks very quickly. See the OVERHEAD section below. This tool only works on Linux 4.6+. Stack traces are obtained using the new BPF_STACK_TRACE` APIs. For kernels older than 4.6, see the version under tools/old. +Kernel memory allocations are intercepted through tracepoints, which are +available on Linux 4.7+. + .SH REQUIREMENTS CONFIG_BPF and bcc. .SH OPTIONS @@ -25,7 +33,7 @@ CONFIG_BPF and bcc. Print usage message. .TP \-p PID -Trace this process ID only (filtered in-kernel). This traces malloc and free from libc. +Trace this process ID only (filtered in-kernel). This traces libc allocator. .TP \-t Print a trace of all allocation and free requests and results. @@ -38,7 +46,12 @@ Print only allocations older than OLDER milliseconds. Useful to remove false pos The default value is 500 milliseconds. .TP \-c COMMAND -Run the specified command and trace its allocations only. This traces malloc and free from libc. +Run the specified command and trace its allocations only. This traces libc allocator. +.TP +\-\-combined-only +Use statistics precalculated in kernel space. Amount of data to be pulled from +kernel significantly decreases, at the cost of losing capabilities of time-based +false positives filtering (\-o). .TP \-s SAMPLE_RATE Record roughly every SAMPLE_RATE-th allocation to reduce overhead. @@ -54,7 +67,7 @@ Capture only allocations that are larger than or equal to MIN_SIZE bytes. Capture only allocations that are smaller than or equal to MAX_SIZE bytes. .TP \-O OBJ -Attach to malloc and free in specified object instead of resolving libc. Ignored when kernel allocations are profiled. +Attach to allocation functions in specified object instead of resolving libc. Ignored when kernel allocations are profiled. .TP INTERVAL Print a summary of oustanding allocations and their call stacks every INTERVAL seconds. @@ -92,6 +105,10 @@ a significant slowdown. You can use the \-s switch to reduce the overhead further by capturing only every N-th allocation. The \-z and \-Z switches can also reduce overhead by capturing only allocations of specific sizes. +Additionally, option \-\-combined-only saves processing time by reusing already +calculated allocation statistics from kernel. It's faster, but lacks information +about particular allocations. + To determine the rate at which your application is calling malloc/free, or the rate at which your kernel is calling kmalloc/kfree, place a probe with perf and collect statistics. For example, to determine how many calls to __kmalloc are diff --git a/tests/python/CMakeLists.txt b/tests/python/CMakeLists.txt index 7db58cab..a4495e7b 100644 --- a/tests/python/CMakeLists.txt +++ b/tests/python/CMakeLists.txt @@ -66,5 +66,7 @@ add_test(NAME py_test_dump_func WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} COMMAND ${TEST_WRAPPER} py_dump_func simple ${CMAKE_CURRENT_SOURCE_DIR}/test_dump_func.py) add_test(NAME py_test_tools_smoke WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} COMMAND ${TEST_WRAPPER} py_test_tools_smoke sudo ${CMAKE_CURRENT_SOURCE_DIR}/test_tools_smoke.py) +add_test(NAME py_test_tools_memleak WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} + COMMAND ${TEST_WRAPPER} py_test_tools_memleak sudo ${CMAKE_CURRENT_SOURCE_DIR}/test_tools_memleak.py) add_test(NAME py_test_usdt WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} COMMAND ${TEST_WRAPPER} py_test_usdt sudo ${CMAKE_CURRENT_SOURCE_DIR}/test_usdt.py) diff --git a/tests/python/test_tools_memleak.py b/tests/python/test_tools_memleak.py new file mode 100755 index 00000000..acdc6f6c --- /dev/null +++ b/tests/python/test_tools_memleak.py @@ -0,0 +1,121 @@ +#!/usr/bin/env python + +from unittest import main, skipUnless, TestCase +import distutils.version +import os +import subprocess +import sys +import tempfile + +TOOLS_DIR = "../../tools/" + + +class cfg: + cmd_format = "" + + # Amount of memory to leak. Note, that test application allocates memory + # for its own needs in libc, so this amount should be large enough to be + # the biggest allocation. + leaking_amount = 30000 + + +def kernel_version_ge(major, minor): + # True if running kernel is >= X.Y + version = distutils.version.LooseVersion(os.uname()[2]).version + if version[0] > major: + return True + if version[0] < major: + return False + if minor and version[1] < minor: + return False + return True + + +def setUpModule(): + # Build the memory leaking application. + c_src = 'test_tools_memleak_leaker_app.c' + tmp_dir = tempfile.mkdtemp(prefix='bcc-test-memleak-') + c_src_full = os.path.dirname(sys.argv[0]) + os.path.sep + c_src + exec_dst = tmp_dir + os.path.sep + 'leaker_app' + + if subprocess.call(['gcc', '-g', '-O0', '-o', exec_dst, c_src_full]) != 0: + print("can't compile the leaking application") + raise Exception + + # Taking two snapshot with one second interval. Getting the largest + # allocation. Since attaching to a program happens with a delay, we wait + # for the first snapshot, then issue the command to the app. Finally, + # second snapshot is used to extract the information. + # Helper utilities "timeout" and "setbuf" are used to limit overall running + # time, and to disable buffering. + cfg.cmd_format = ( + 'stdbuf -o 0 -i 0 timeout -s KILL 10s ' + TOOLS_DIR + + 'memleak.py -c "{} {{}} {}" -T 1 1 2'.format(exec_dst, + cfg.leaking_amount)) + + +@skipUnless(kernel_version_ge(4, 6), "requires kernel >= 4.6") +class MemleakToolTests(TestCase): + def run_leaker(self, leak_kind): + # Starting memleak.py, which in turn launches the leaking application. + p = subprocess.Popen(cfg.cmd_format.format(leak_kind), + stdin=subprocess.PIPE, stdout=subprocess.PIPE, + shell=True) + + # Waiting for the first report. + while True: + p.poll() + if p.returncode is not None: + break + line = p.stdout.readline() + if "with outstanding allocations" in line: + break + + # At this point, memleak.py have already launched application and set + # probes. Sending command to the leaking application to make its + # allocations. + out = p.communicate(input="\n")[0] + + # If there were memory leaks, they are in the output. Filter the lines + # containing "byte" substring. Every interesting line is expected to + # start with "N bytes from" + x = [x for x in out.split('\n') if 'byte' in x] + + self.assertTrue(len(x) >= 1, + msg="At least one line should have 'byte' substring.") + + # Taking last report. + x = x[-1].split() + self.assertTrue(len(x) >= 1, + msg="There should be at least one word in the line.") + + # First word is the leak amount in bytes. + return int(x[0]) + + def test_malloc(self): + self.assertEqual(cfg.leaking_amount, self.run_leaker("malloc")) + + def test_calloc(self): + self.assertEqual(cfg.leaking_amount, self.run_leaker("calloc")) + + def test_realloc(self): + self.assertEqual(cfg.leaking_amount, self.run_leaker("realloc")) + + def test_posix_memalign(self): + self.assertEqual(cfg.leaking_amount, self.run_leaker("posix_memalign")) + + def test_valloc(self): + self.assertEqual(cfg.leaking_amount, self.run_leaker("valloc")) + + def test_memalign(self): + self.assertEqual(cfg.leaking_amount, self.run_leaker("memalign")) + + def test_pvalloc(self): + self.assertEqual(cfg.leaking_amount, self.run_leaker("pvalloc")) + + def test_aligned_alloc(self): + self.assertEqual(cfg.leaking_amount, self.run_leaker("aligned_alloc")) + + +if __name__ == "__main__": + main() diff --git a/tests/python/test_tools_memleak_leaker_app.c b/tests/python/test_tools_memleak_leaker_app.c new file mode 100644 index 00000000..617dc5aa --- /dev/null +++ b/tests/python/test_tools_memleak_leaker_app.c @@ -0,0 +1,88 @@ +// This is a program that leaks memory, used for memory leak detector testing. + +#include +#include +#include +#include +#include +#include +#include +#include + +static void generate_leak(const char *kind, int amount) { + void *ptr = NULL; + + if (strcmp(kind, "malloc") == 0) { + printf("leaking via malloc, %p\n", malloc(amount)); + return; + } + + if (strcmp(kind, "calloc") == 0) { + printf("leaking via calloc, %p\n", calloc(amount, 1)); + return; + } + + if (strcmp(kind, "realloc") == 0) { + printf("leaking via realloc, %p\n", realloc(malloc(10), amount)); + return; + } + + if (strcmp(kind, "posix_memalign") == 0) { + posix_memalign(&ptr, 512, amount); + printf("leaking via posix_memalign, %p\n", ptr); + return; + } + + if (strcmp(kind, "valloc") == 0) { + printf("leaking via valloc, %p\n", valloc(amount)); + return; + } + + if (strcmp(kind, "memalign") == 0) { + printf("leaking via memalign, %p\n", memalign(512, amount)); + return; + } + + if (strcmp(kind, "pvalloc") == 0) { + printf("leaking via pvalloc, %p\n", pvalloc(amount)); + return; + } + + if (strcmp(kind, "aligned_alloc") == 0) { + printf("leaking via aligned_alloc, %p\n", aligned_alloc(512, amount)); + return; + } + + if (strcmp(kind, "no_leak") == 0) { + void *ptr = malloc(amount); + printf("ptr = %p\n", ptr); + free(ptr); + return; + } + + printf("unknown leak type '%s'\n", kind); +} + +int main(int argc, char *argv[]) { + if (argc < 2) { + printf("usage: leak-userspace [amount]\n"); + return EXIT_SUCCESS; + } + + const char *kind = argv[1]; + + int amount = 30; + if (argc > 2) { + amount = atoi(argv[2]); + if (amount < 1) + amount = 1; + } + + // Wait for something in stdin to give external detector time to attach. + char c; + read(0, &c, sizeof(c)); + + // Do the work. + generate_leak(kind, amount); + return EXIT_SUCCESS; +} diff --git a/tools/memleak.py b/tools/memleak.py index 80a1dc3d..484c3864 100755 --- a/tools/memleak.py +++ b/tools/memleak.py @@ -4,8 +4,8 @@ # memory leaks in user-mode processes and the kernel. # # USAGE: memleak [-h] [-p PID] [-t] [-a] [-o OLDER] [-c COMMAND] -# [-s SAMPLE_RATE] [-d STACK_DEPTH] [-T TOP] [-z MIN_SIZE] -# [-Z MAX_SIZE] +# [--combined-only] [-s SAMPLE_RATE] [-T TOP] [-z MIN_SIZE] +# [-Z MAX_SIZE] [-O OBJ] # [interval] [count] # # Licensed under the Apache License, Version 2.0 (the "License") @@ -44,7 +44,7 @@ EXAMPLES: Trace allocations and display a summary of "leaked" (outstanding) allocations every 5 seconds ./memleak -p $(pidof allocs) -t - Trace allocations and display each individual call to malloc/free + Trace allocations and display each individual allocator function call ./memleak -ap $(pidof allocs) 10 Trace allocations and display allocated addresses, sizes, and stacks every 10 seconds for outstanding allocations @@ -62,8 +62,9 @@ EXAMPLES: description = """ Trace outstanding memory allocations that weren't freed. -Supports both user-mode allocations made with malloc/free and kernel-mode -allocations made with kmalloc/kfree. +Supports both user-mode allocations made with libc functions and kernel-mode +allocations made with kmalloc/kmem_cache_alloc/get_free_pages and corresponding +memory release functions. """ parser = argparse.ArgumentParser(description=description, @@ -83,6 +84,8 @@ parser.add_argument("-o", "--older", default=500, type=int, help="prune allocations younger than this age in milliseconds") parser.add_argument("-c", "--command", help="execute and trace the specified command") +parser.add_argument("--combined-only", default=False, action="store_true", + help="show combined allocation statistics only") parser.add_argument("-s", "--sample-rate", default=1, type=int, help="sample every N-th allocation to decrease the overhead") parser.add_argument("-T", "--top", type=int, default=10, @@ -92,7 +95,7 @@ parser.add_argument("-z", "--min-size", type=int, parser.add_argument("-Z", "--max-size", type=int, help="capture only allocations smaller than this size") parser.add_argument("-O", "--obj", type=str, default="c", - help="attach to malloc & free in the specified object") + help="attach to allocator functions in the specified object") args = parser.parse_args() @@ -126,12 +129,51 @@ struct alloc_info_t { int stack_id; }; +struct combined_alloc_info_t { + u64 total_size; + u64 number_of_allocs; +}; + BPF_HASH(sizes, u64); -BPF_HASH(allocs, u64, struct alloc_info_t); -BPF_STACK_TRACE(stack_traces, 1024) +BPF_TABLE("hash", u64, struct alloc_info_t, allocs, 1000000); +BPF_HASH(memptrs, u64, u64); +BPF_STACK_TRACE(stack_traces, 10240) +BPF_TABLE("hash", u64, struct combined_alloc_info_t, combined_allocs, 10240); + +static inline void update_statistics_add(u64 stack_id, u64 sz) { + struct combined_alloc_info_t *existing_cinfo; + struct combined_alloc_info_t cinfo = {0}; + + existing_cinfo = combined_allocs.lookup(&stack_id); + if (existing_cinfo != 0) + cinfo = *existing_cinfo; + + cinfo.total_size += sz; + cinfo.number_of_allocs += 1; + + combined_allocs.update(&stack_id, &cinfo); +} + +static inline void update_statistics_del(u64 stack_id, u64 sz) { + struct combined_alloc_info_t *existing_cinfo; + struct combined_alloc_info_t cinfo = {0}; + + existing_cinfo = combined_allocs.lookup(&stack_id); + if (existing_cinfo != 0) + cinfo = *existing_cinfo; -int alloc_enter(struct pt_regs *ctx, size_t size) -{ + if (sz >= cinfo.total_size) + cinfo.total_size = 0; + else + cinfo.total_size -= sz; + + if (cinfo.number_of_allocs > 0) + cinfo.number_of_allocs -= 1; + + combined_allocs.update(&stack_id, &cinfo); +} + +static inline int gen_alloc_enter(struct pt_regs *ctx, size_t size) { SIZE_FILTER if (SAMPLE_EVERY_N > 1) { u64 ts = bpf_ktime_get_ns(); @@ -148,9 +190,7 @@ int alloc_enter(struct pt_regs *ctx, size_t size) return 0; } -int alloc_exit(struct pt_regs *ctx) -{ - u64 address = PT_REGS_RC(ctx); +static inline int gen_alloc_exit2(struct pt_regs *ctx, u64 address) { u64 pid = bpf_get_current_pid_tgid(); u64* size64 = sizes.lookup(&pid); struct alloc_info_t info = {0}; @@ -164,6 +204,7 @@ int alloc_exit(struct pt_regs *ctx) info.timestamp_ns = bpf_ktime_get_ns(); info.stack_id = stack_traces.get_stackid(ctx, STACK_FLAGS); allocs.update(&address, &info); + update_statistics_add(info.stack_id, info.size); if (SHOULD_PRINT) { bpf_trace_printk("alloc exited, size = %lu, result = %lx\\n", @@ -172,14 +213,18 @@ int alloc_exit(struct pt_regs *ctx) return 0; } -int free_enter(struct pt_regs *ctx, void *address) -{ +static inline int gen_alloc_exit(struct pt_regs *ctx) { + return gen_alloc_exit2(ctx, PT_REGS_RC(ctx)); +} + +static inline int gen_free_enter(struct pt_regs *ctx, void *address) { u64 addr = (u64)address; struct alloc_info_t *info = allocs.lookup(&addr); if (info == 0) return 0; allocs.delete(&addr); + update_statistics_del(info->stack_id, info->size); if (SHOULD_PRINT) { bpf_trace_printk("free entered, address = %lx, size = %lu\\n", @@ -187,7 +232,138 @@ int free_enter(struct pt_regs *ctx, void *address) } return 0; } + +int malloc_enter(struct pt_regs *ctx, size_t size) { + return gen_alloc_enter(ctx, size); +} + +int malloc_exit(struct pt_regs *ctx) { + return gen_alloc_exit(ctx); +} + +int free_enter(struct pt_regs *ctx, void *address) { + return gen_free_enter(ctx, address); +} + +int calloc_enter(struct pt_regs *ctx, size_t nmemb, size_t size) { + return gen_alloc_enter(ctx, nmemb * size); +} + +int calloc_exit(struct pt_regs *ctx) { + return gen_alloc_exit(ctx); +} + +int realloc_enter(struct pt_regs *ctx, void *ptr, size_t size) { + gen_free_enter(ctx, ptr); + return gen_alloc_enter(ctx, size); +} + +int realloc_exit(struct pt_regs *ctx) { + return gen_alloc_exit(ctx); +} + +int posix_memalign_enter(struct pt_regs *ctx, void **memptr, size_t alignment, + size_t size) { + u64 memptr64 = (u64)(size_t)memptr; + u64 pid = bpf_get_current_pid_tgid(); + + memptrs.update(&pid, &memptr64); + return gen_alloc_enter(ctx, size); +} + +int posix_memalign_exit(struct pt_regs *ctx) { + u64 pid = bpf_get_current_pid_tgid(); + u64 *memptr64 = memptrs.lookup(&pid); + void *addr; + + if (memptr64 == 0) + return 0; + + memptrs.delete(&pid); + + if (bpf_probe_read(&addr, sizeof(void*), (void*)(size_t)*memptr64) != 0) + return 0; + + u64 addr64 = (u64)(size_t)addr; + return gen_alloc_exit2(ctx, addr64); +} + +int aligned_alloc_enter(struct pt_regs *ctx, size_t alignment, size_t size) { + return gen_alloc_enter(ctx, size); +} + +int aligned_alloc_exit(struct pt_regs *ctx) { + return gen_alloc_exit(ctx); +} + +int valloc_enter(struct pt_regs *ctx, size_t size) { + return gen_alloc_enter(ctx, size); +} + +int valloc_exit(struct pt_regs *ctx) { + return gen_alloc_exit(ctx); +} + +int memalign_enter(struct pt_regs *ctx, size_t alignment, size_t size) { + return gen_alloc_enter(ctx, size); +} + +int memalign_exit(struct pt_regs *ctx) { + return gen_alloc_exit(ctx); +} + +int pvalloc_enter(struct pt_regs *ctx, size_t size) { + return gen_alloc_enter(ctx, size); +} + +int pvalloc_exit(struct pt_regs *ctx) { + return gen_alloc_exit(ctx); +} """ + +bpf_source_kernel = """ + +TRACEPOINT_PROBE(kmem, kmalloc) { + gen_alloc_enter((struct pt_regs *)args, args->bytes_alloc); + return gen_alloc_exit2((struct pt_regs *)args, (size_t)args->ptr); +} + +TRACEPOINT_PROBE(kmem, kmalloc_node) { + gen_alloc_enter((struct pt_regs *)args, args->bytes_alloc); + return gen_alloc_exit2((struct pt_regs *)args, (size_t)args->ptr); +} + +TRACEPOINT_PROBE(kmem, kfree) { + return gen_free_enter((struct pt_regs *)args, (void *)args->ptr); +} + +TRACEPOINT_PROBE(kmem, kmem_cache_alloc) { + gen_alloc_enter((struct pt_regs *)args, args->bytes_alloc); + return gen_alloc_exit2((struct pt_regs *)args, (size_t)args->ptr); +} + +TRACEPOINT_PROBE(kmem, kmem_cache_alloc_node) { + gen_alloc_enter((struct pt_regs *)args, args->bytes_alloc); + return gen_alloc_exit2((struct pt_regs *)args, (size_t)args->ptr); +} + +TRACEPOINT_PROBE(kmem, kmem_cache_free) { + return gen_free_enter((struct pt_regs *)args, (void *)args->ptr); +} + +TRACEPOINT_PROBE(kmem, mm_page_alloc) { + gen_alloc_enter((struct pt_regs *)args, PAGE_SIZE << args->order); + return gen_alloc_exit2((struct pt_regs *)args, args->pfn); +} + +TRACEPOINT_PROBE(kmem, mm_page_free) { + return gen_free_enter((struct pt_regs *)args, (void *)args->pfn); +} +""" + +if kernel_trace: + bpf_source += bpf_source_kernel + bpf_source = bpf_source.replace("SHOULD_PRINT", "1" if trace_all else "0") bpf_source = bpf_source.replace("SAMPLE_EVERY_N", str(sample_every_n)) @@ -209,18 +385,54 @@ bpf_source = bpf_source.replace("STACK_FLAGS", stack_flags) bpf_program = BPF(text=bpf_source) if not kernel_trace: - print("Attaching to malloc and free in pid %d, Ctrl+C to quit." % pid) - bpf_program.attach_uprobe(name=obj, sym="malloc", - fn_name="alloc_enter", pid=pid) - bpf_program.attach_uretprobe(name=obj, sym="malloc", - fn_name="alloc_exit", pid=pid) - bpf_program.attach_uprobe(name=obj, sym="free", - fn_name="free_enter", pid=pid) + print("Attaching to pid %d, Ctrl+C to quit." % pid) + + def attach_probes(sym, fn_prefix=None, can_fail=False): + if fn_prefix is None: + fn_prefix = sym + + try: + bpf_program.attach_uprobe(name=obj, sym=sym, + fn_name=fn_prefix+"_enter", + pid=pid) + bpf_program.attach_uretprobe(name=obj, sym=sym, + fn_name=fn_prefix+"_exit", + pid=pid) + except Exception: + if can_fail: + return + else: + raise + + attach_probes("malloc") + attach_probes("calloc") + attach_probes("realloc") + attach_probes("posix_memalign") + attach_probes("valloc") + attach_probes("memalign") + attach_probes("pvalloc") + attach_probes("aligned_alloc", can_fail=True) # added in C11 + bpf_program.attach_uprobe(name=obj, sym="free", fn_name="free_enter", + pid=pid) + else: - print("Attaching to kmalloc and kfree, Ctrl+C to quit.") - bpf_program.attach_kprobe(event="__kmalloc", fn_name="alloc_enter") - bpf_program.attach_kretprobe(event="__kmalloc", fn_name="alloc_exit") - bpf_program.attach_kprobe(event="kfree", fn_name="free_enter") + print("Attaching to kernel allocators, Ctrl+C to quit.") + + # No probe attaching here. Allocations are counted by attaching to + # tracepoints. + # + # Memory allocations in Linux kernel are not limited to malloc/free + # equivalents. It's also common to allocate a memory page or multiple + # pages. Page allocator have two interfaces, one working with page frame + # numbers (PFN), while other working with page addresses. It's possible + # to allocate pages with one kind of functions, and free them with + # another. Code in kernel can easy convert PFNs to addresses and back, + # but it's hard to do the same in eBPF kprobe without fragile hacks. + # + # Fortunately, Linux exposes tracepoints for memory allocations, which + # can be instrumented by eBPF programs. Tracepoint for page allocations + # gives access to PFNs for both allocator interfaces. So there is no + # need to guess which allocation corresponds to which free. def print_outstanding(): print("[%s] Top %d stacks with outstanding allocations:" % @@ -252,6 +464,37 @@ def print_outstanding(): print("\t%d bytes in %d allocations from stack\n\t\t%s" % (alloc.size, alloc.count, "\n\t\t".join(alloc.stack))) +def print_outstanding_combined(): + stack_traces = bpf_program["stack_traces"] + stacks = sorted(bpf_program["combined_allocs"].items(), + key=lambda a: -a[1].total_size) + cnt = 1 + entries = [] + for stack_id, info in stacks: + try: + trace = [] + for addr in stack_traces.walk(stack_id.value): + sym = bpf_program.sym(addr, pid, + show_module=True, + show_offset=True) + trace.append(sym) + trace = "\n\t\t".join(trace) + except KeyError: + trace = "stack information lost" + + entry = ("\t%d bytes in %d allocations from stack\n\t\t%s" % + (info.total_size, info.number_of_allocs, trace)) + entries.append(entry) + + cnt += 1 + if cnt > top_stacks: + break + + print("[%s] Top %d stacks with outstanding allocations:" % + (datetime.now().strftime("%H:%M:%S"), top_stacks)) + + print('\n'.join(reversed(entries))) + count_so_far = 0 while True: if trace_all: @@ -261,7 +504,10 @@ while True: sleep(interval) except KeyboardInterrupt: exit() - print_outstanding() + if args.combined_only: + print_outstanding_combined() + else: + print_outstanding() count_so_far += 1 if num_prints is not None and count_so_far >= num_prints: exit() diff --git a/tools/memleak_example.txt b/tools/memleak_example.txt index accc74f1..307a9fa5 100644 --- a/tools/memleak_example.txt +++ b/tools/memleak_example.txt @@ -7,7 +7,7 @@ of which call stacks performed allocations that weren't subsequently freed. For example: # ./memleak -p $(pidof allocs) -Attaching to malloc and free in pid 5193, Ctrl+C to quit. +Attaching to pid 5193, Ctrl+C to quit. [11:16:33] Top 2 stacks with outstanding allocations: 80 bytes in 5 allocations from stack main+0x6d [allocs] @@ -33,7 +33,7 @@ stack is allocating various sizes and you want to confirm which sizes are prevalent. Use the -a switch: # ./memleak -p $(pidof allocs) -a -Attaching to malloc and free in pid 5193, Ctrl+C to quit. +Attaching to pid 5193, Ctrl+C to quit. [11:16:33] Top 2 stacks with outstanding allocations: addr = 948cd0 size = 16 addr = 948d10 size = 16 @@ -59,12 +59,12 @@ Attaching to malloc and free in pid 5193, Ctrl+C to quit. __libc_start_main+0xf0 [libc-2.21.so] -When using the -p switch, memleak traces the allocations of a particular -process. Without this switch, kernel allocations (kmalloc) are traced instead. +When using the -p switch, memleak traces the libc allocations of a particular +process. Without this switch, kernel allocations are traced instead. For example: # ./memleak -Attaching to kmalloc and kfree, Ctrl+C to quit. +Attaching to kernel allocators, Ctrl+C to quit. ... 248 bytes in 4 allocations from stack bpf_prog_load [kernel] @@ -126,7 +126,7 @@ roughly 10% of the allocations and print the outstanding allocations every 5 seconds, 3 times before quitting: # ./memleak -p $(pidof allocs) -s 10 5 3 -Attaching to malloc and free in pid 2614, Ctrl+C to quit. +Attaching to pid 2614, Ctrl+C to quit. [11:16:33] Top 2 stacks with outstanding allocations: 16 bytes in 1 allocations from stack main+0x6d [allocs] @@ -151,13 +151,14 @@ USAGE message: # ./memleak -h usage: memleak.py [-h] [-p PID] [-t] [-a] [-o OLDER] [-c COMMAND] - [-s SAMPLE_RATE] [-T TOP] [-z MIN_SIZE] [-Z MAX_SIZE] - [-O OBJ] + [--combined-only] [-s SAMPLE_RATE] [-T TOP] [-z MIN_SIZE] + [-Z MAX_SIZE] [-O OBJ] [interval] [count] Trace outstanding memory allocations that weren't freed. -Supports both user-mode allocations made with malloc/free and kernel-mode -allocations made with kmalloc/kfree. +Supports both user-mode allocations made with libc functions and kernel-mode +allocations made with kmalloc/kmem_cache_alloc/get_free_pages and corresponding +memory release functions. positional arguments: interval interval in seconds to print outstanding allocations @@ -175,6 +176,7 @@ optional arguments: milliseconds -c COMMAND, --command COMMAND execute and trace the specified command + --combined-only show combined allocation statistics only -s SAMPLE_RATE, --sample-rate SAMPLE_RATE sample every N-th allocation to decrease the overhead -T TOP, --top TOP display only this many top allocating stacks (by size) @@ -182,7 +184,7 @@ optional arguments: capture only allocations larger than this size -Z MAX_SIZE, --max-size MAX_SIZE capture only allocations smaller than this size - -O OBJ, --obj OBJ attach to malloc & free in the specified object + -O OBJ, --obj OBJ attach to allocator functions in the specified object EXAMPLES: @@ -190,7 +192,7 @@ EXAMPLES: Trace allocations and display a summary of "leaked" (outstanding) allocations every 5 seconds ./memleak -p $(pidof allocs) -t - Trace allocations and display each individual call to malloc/free + Trace allocations and display each individual allocator function call ./memleak -ap $(pidof allocs) 10 Trace allocations and display allocated addresses, sizes, and stacks every 10 seconds for outstanding allocations -- 2.34.1