From 01bb02c7e2beecb0b73fd61063f8c38d013745b8 Mon Sep 17 00:00:00 2001 From: Sasha Goldshtein Date: Thu, 23 Feb 2017 10:17:08 +0000 Subject: [PATCH] cc: Handle nested functions correctly when resolving symbols `ProcSyms::Module::find_addr` incorrectly resolves symbols when functions are nested in each other. Specifically, this was discovered with libpthread, where there are multiple symbols for `write`, where `write_nocancel` is strictly nested inside `write`. Fix by explicitly going backward until we reach a matching symbol -- see details in `ProcSyms::Module::find_addr` comments. --- src/cc/bcc_syms.cc | 33 ++++++++++++++++++++++++--------- 1 file changed, 24 insertions(+), 9 deletions(-) diff --git a/src/cc/bcc_syms.cc b/src/cc/bcc_syms.cc index 54e1c0d..49f33b6 100644 --- a/src/cc/bcc_syms.cc +++ b/src/cc/bcc_syms.cc @@ -190,16 +190,31 @@ bool ProcSyms::Module::find_addr(uint64_t addr, struct bcc_symbol *sym) { sym->offset = offset; auto it = std::upper_bound(syms_.begin(), syms_.end(), Symbol(nullptr, offset, 0)); - if (it != syms_.begin()) - --it; - else - it = syms_.end(); + if (it == syms_.begin()) + return false; - if (it != syms_.end() - && offset >= it->start && offset < it->start + it->size) { - sym->name = it->name->c_str(); - sym->offset = (offset - it->start); - return true; + // 'it' points to the symbol whose start address is strictly greater than + // the address we're looking for. Start stepping backwards as long as the + // current symbol is still below the desired address, and see if the end + // of the current symbol (start + size) is above the desired address. Once + // we have a matching symbol, return it. Note that simply looking at '--it' + // is not enough, because symbols can be nested. For example, we could be + // looking for offset 0x12 with the following symbols available: + // SYMBOL START SIZE END + // goo 0x0 0x6 0x0 + 0x6 = 0x6 + // foo 0x6 0x10 0x6 + 0x10 = 0x16 + // bar 0x8 0x4 0x8 + 0x4 = 0xc + // baz 0x16 0x10 0x16 + 0x10 = 0x26 + // The upper_bound lookup will return baz, and then going one symbol back + // brings us to bar, which does not contain offset 0x12 and is nested inside + // foo. Going back one more symbol brings us to foo, which contains 0x12 + // and is a match. + for (--it; offset >= it->start; --it) { + if (offset < it->start + it->size) { + sym->name = it->name->c_str(); + sym->offset = (offset - it->start); + return true; + } } return false; -- 2.7.4