From 6d766c8bf9df3c22590a78c77879080736ad55ae Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Mon, 8 Feb 2021 12:29:11 -0800 Subject: [PATCH] DebugInfo/Symbolize: Allow STT_NOTYPE/STT_GNU_IFUNC symbols for .symtab symbolization In assembly files, omitting `.type foo,@function` is common. Such functions have type `STT_NOTYPE` and llvm-symbolizer reports `??` for them. An ifunc symbol usually has an associated resolver symbol which is defined at the same address. Returning either one is fine for symbolization. The resolver symbol may not end up in the symbol table if (object file) `.L` is used (linked image) .symtab is stripped while .dynsym is retained. This patch allows ELF STT_NOTYPE/STT_GNU_IFUNC symbols for .symtab symbolization. I have left TODO in the test files for an unimplemented STT_FILE heuristic. Differential Revision: https://reviews.llvm.org/D95916 --- .../DebugInfo/Symbolize/SymbolizableObjectFile.cpp | 33 +++++++++---- llvm/test/DebugInfo/Symbolize/ELF/symtab-file.s | 27 +++++++++++ llvm/test/DebugInfo/Symbolize/ELF/symtab-ifunc.s | 28 +++++++++++ llvm/test/DebugInfo/Symbolize/ELF/symtab-ignored.s | 19 ++++++++ llvm/test/DebugInfo/Symbolize/ELF/symtab-notype.s | 55 ++++++++++++++++++++++ 5 files changed, 154 insertions(+), 8 deletions(-) create mode 100644 llvm/test/DebugInfo/Symbolize/ELF/symtab-file.s create mode 100644 llvm/test/DebugInfo/Symbolize/ELF/symtab-ifunc.s create mode 100644 llvm/test/DebugInfo/Symbolize/ELF/symtab-ignored.s create mode 100644 llvm/test/DebugInfo/Symbolize/ELF/symtab-notype.s diff --git a/llvm/lib/DebugInfo/Symbolize/SymbolizableObjectFile.cpp b/llvm/lib/DebugInfo/Symbolize/SymbolizableObjectFile.cpp index 93d05e4..b260cd9d 100644 --- a/llvm/lib/DebugInfo/Symbolize/SymbolizableObjectFile.cpp +++ b/llvm/lib/DebugInfo/Symbolize/SymbolizableObjectFile.cpp @@ -149,16 +149,27 @@ Error SymbolizableObjectFile::addSymbol(const SymbolRef &Symbol, DataExtractor *OpdExtractor, uint64_t OpdAddress) { // Avoid adding symbols from an unknown/undefined section. - const ObjectFile *Obj = Symbol.getObject(); + const ObjectFile &Obj = *Symbol.getObject(); Expected Sec = Symbol.getSection(); - if (!Sec || (Obj && Obj->section_end() == *Sec)) + if (!Sec || Obj.section_end() == *Sec) return Error::success(); + Expected SymbolTypeOrErr = Symbol.getType(); if (!SymbolTypeOrErr) return SymbolTypeOrErr.takeError(); SymbolRef::Type SymbolType = *SymbolTypeOrErr; - if (SymbolType != SymbolRef::ST_Function && SymbolType != SymbolRef::ST_Data) + if (Obj.isELF()) { + // Allow function and data symbols. Additionally allow STT_NONE, which are + // common for functions defined in assembly. + uint8_t Type = ELFSymbolRef(Symbol).getELFType(); + if (Type != ELF::STT_NOTYPE && Type != ELF::STT_FUNC && + Type != ELF::STT_OBJECT && Type != ELF::STT_GNU_IFUNC) + return Error::success(); + } else if (SymbolType != SymbolRef::ST_Function && + SymbolType != SymbolRef::ST_Data) { return Error::success(); + } + Expected SymbolAddressOrErr = Symbol.getAddress(); if (!SymbolAddressOrErr) return SymbolAddressOrErr.takeError(); @@ -186,11 +197,17 @@ Error SymbolizableObjectFile::addSymbol(const SymbolRef &Symbol, // Mach-O symbol table names have leading underscore, skip it. if (Module->isMachO() && !SymbolName.empty() && SymbolName[0] == '_') SymbolName = SymbolName.drop_front(); - // FIXME: If a function has alias, there are two entries in symbol table - // with same address size. Make sure we choose the correct one. - auto &M = SymbolType == SymbolRef::ST_Function ? Functions : Objects; - SymbolDesc SD = { SymbolAddress, SymbolSize }; - M.emplace_back(SD, SymbolName); + + SymbolDesc SD = {SymbolAddress, SymbolSize}; + + // DATA command symbolizes just ST_Data (ELF STT_OBJECT) symbols as an + // optimization. Treat everything else (e.g. ELF STT_NOTYPE, STT_FUNC and + // STT_GNU_IFUNC) as function symbols which can be used to symbolize + // addresses. + if (SymbolType == SymbolRef::ST_Data) + Objects.emplace_back(SD, SymbolName); + else + Functions.emplace_back(SD, SymbolName); return Error::success(); } diff --git a/llvm/test/DebugInfo/Symbolize/ELF/symtab-file.s b/llvm/test/DebugInfo/Symbolize/ELF/symtab-file.s new file mode 100644 index 0000000..485345e --- /dev/null +++ b/llvm/test/DebugInfo/Symbolize/ELF/symtab-file.s @@ -0,0 +1,27 @@ +## When locating a local symbol, we can obtain the filename according to the +## preceding STT_FILE symbol. +# RUN: llvm-mc -filetype=obj -triple=x86_64 %s -o %t +# RUN: llvm-symbolizer --obj=%t 0 1 2 | FileCheck %s + +## TODO Find the preceding STT_FILE symbol as the filename of a local symbol. +# CHECK: local1 +# CHECK-NEXT: ??:0:0 +# CHECK-EMPTY: +# CHECK-NEXT: local2 +# CHECK-NEXT: ??:0:0 +# CHECK-EMPTY: +# CHECK-NEXT: local3 +# CHECK-NEXT: ??:0:0 +# CHECK-EMPTY: + +.file "1.c" +local1: + nop + +.file "2.c" +local2: + nop + +.file "3.c" +local3: + nop diff --git a/llvm/test/DebugInfo/Symbolize/ELF/symtab-ifunc.s b/llvm/test/DebugInfo/Symbolize/ELF/symtab-ifunc.s new file mode 100644 index 0000000..382cbaf8 --- /dev/null +++ b/llvm/test/DebugInfo/Symbolize/ELF/symtab-ifunc.s @@ -0,0 +1,28 @@ +## Test we can symbolize STT_GNU_IFUNC symbols. +# RUN: llvm-mc -filetype=obj -triple=x86_64 %s -o %t +# RUN: llvm-symbolizer --obj=%t 0 1 + +# CHECK: g_ifunc +# CHECK-NEXT: ??:0:0 +# CHECK-EMPTY: +# CHECK-NEXT: l_ifunc +# CHECK-NEXT: ??:0:0 +# CHECK-EMPTY: + +## TODO Find the preceding STT_FILE symbol as the filename of l_ifunc. +.file "symtab-ifunc.s" + +.Lg_resolver: + ret +.size .Lg_resolver, 1 + +.globl g_ifunc +.set g_ifunc, .Lg_resolver +.type g_ifunc, @gnu_indirect_function + +.Ll_resolver: + ret +.size .Ll_resolver, 1 + +.set l_ifunc, .Ll_resolver +.type l_ifunc, @gnu_indirect_function diff --git a/llvm/test/DebugInfo/Symbolize/ELF/symtab-ignored.s b/llvm/test/DebugInfo/Symbolize/ELF/symtab-ignored.s new file mode 100644 index 0000000..57454c0 --- /dev/null +++ b/llvm/test/DebugInfo/Symbolize/ELF/symtab-ignored.s @@ -0,0 +1,19 @@ +## Ignore STT_SECTION and STT_TLS symbols for .symtab symbolization. +# RUN: llvm-mc -filetype=obj -triple=x86_64 %s -o %t +# RUN: llvm-symbolizer --obj=%t 0 | FileCheck %s + +# CHECK: b +# CHECK-NEXT: ??:0:0 +# CHECK-EMPTY: + +.file "1.c" + +.section a,"a",@progbits +b: + .reloc ., R_X86_64_NONE, a +.section c,"a",@progbits + .reloc ., R_X86_64_NONE, c + +.section .tbss,"awT",@nobits +.globl tls +tls: diff --git a/llvm/test/DebugInfo/Symbolize/ELF/symtab-notype.s b/llvm/test/DebugInfo/Symbolize/ELF/symtab-notype.s new file mode 100644 index 0000000..91ce807 --- /dev/null +++ b/llvm/test/DebugInfo/Symbolize/ELF/symtab-notype.s @@ -0,0 +1,55 @@ +## STT_NOTYPE symbols are common in assembly files. Test we can symbolize them. +# RUN: llvm-mc -filetype=obj -triple=x86_64 %s -o %t +# RUN: llvm-symbolizer --obj=%t --inlines 0 1 2 3 4 5 6 7 | FileCheck %s +# RUN: llvm-symbolizer --obj=%t --no-inlines 0 1 2 3 4 5 6 7 | FileCheck %s + +# CHECK: _start +# CHECK-NEXT: ??:0:0 +# CHECK-EMPTY: +# CHECK-NEXT: g_notype +# CHECK-NEXT: ??:0:0 +# CHECK-EMPTY: +# CHECK-NEXT: g_notype +# CHECK-NEXT: ??:0:0 +# CHECK-EMPTY: + +## This is a gap. +# CHECK-NEXT: ?? +# CHECK-NEXT: ??:0:0 +# CHECK-EMPTY: + +# CHECK-NEXT: l_notype +# CHECK-NEXT: ??:0:0 +# CHECK-EMPTY: + +## TODO addr2line does not symbolize the last two out-of-bounds addresses. +# CHECK-NEXT: l_notype_nosize +# CHECK-NEXT: ??:0:0 +# CHECK-EMPTY: +# CHECK-NEXT: l_notype_nosize +# CHECK-NEXT: ??:0:0 +# CHECK-EMPTY: +# CHECK-NEXT: l_notype_nosize +# CHECK-NEXT: ??:0:0 +# CHECK-EMPTY: + +## TODO Find the preceding STT_FILE symbol as the filename of a local symbol. +.file "symtab-notype.s" + +.globl _start, g_notype +_start: + retq + +g_notype: + nop + nop +.size g_notype, . - g_notype + + nop + +l_notype: + nop +.size l_notype, . - l_notype + +l_notype_nosize: + nop -- 2.7.4