From 51629abce0e2f9d1376eb0b5070532a2bbec6766 Mon Sep 17 00:00:00 2001 From: Jez Ng Date: Tue, 1 Dec 2020 14:45:09 -0800 Subject: [PATCH] [lld-macho] Emit local symbols in symtab; record metadata in LC_DYSYMTAB Symbols of the same type must be laid out contiguously: following ld64's lead, we choose to emit all local symbols first, then external symbols, and finally undefined symbols. For each symbol type, the LC_DYSYMTAB load command will record the range (start index and total number) of those symbols in the symbol table. This work was motivated by the fact that LLDB won't search for debug info if LC_DYSYMTAB says there are no local symbols (since STABS symbols are all local symbols). With this change, LLDB is now able to display the source lines at a given breakpoint when debugging our binaries. Some tests had to be updated due to local symbol names now appearing in `llvm-objdump`'s output. Reviewed By: #lld-macho, smeenai, clayborg Differential Revision: https://reviews.llvm.org/D89285 --- lld/MachO/SyntheticSections.cpp | 72 ++++++++++++++++------ lld/MachO/SyntheticSections.h | 17 +++++- lld/MachO/Writer.cpp | 19 ++++-- lld/test/MachO/stabs.s | 8 +-- lld/test/MachO/subsections-symbol-relocs.s | 4 +- lld/test/MachO/symtab.s | 98 +++++++++++++++++++++++------- lld/test/MachO/tlv.s | 15 +++-- 7 files changed, 173 insertions(+), 60 deletions(-) diff --git a/lld/MachO/SyntheticSections.cpp b/lld/MachO/SyntheticSections.cpp index feaa4b5..56a095c 100644 --- a/lld/MachO/SyntheticSections.cpp +++ b/lld/MachO/SyntheticSections.cpp @@ -19,6 +19,7 @@ #include "lld/Common/ErrorHandler.h" #include "lld/Common/Memory.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/Support/EndianStream.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/LEB128.h" @@ -630,14 +631,35 @@ void SymtabSection::emitFunStabs(Defined *defined) { } void SymtabSection::finalizeContents() { - InputFile *lastFile = nullptr; + // Local symbols aren't in the SymbolTable, so we walk the list of object + // files to gather them. + for (InputFile *file : inputFiles) { + if (auto *objFile = dyn_cast(file)) { + for (Symbol *sym : objFile->symbols) { + // TODO: when we implement -dead_strip, we should filter out symbols + // that belong to dead sections. + if (auto *defined = dyn_cast(sym)) { + if (!defined->isExternal()) { + uint32_t strx = stringTableSection.addString(sym->getName()); + localSymbols.push_back({sym, strx}); + } + } + } + } + } + for (Symbol *sym : symtab->getSymbols()) { - // TODO support other symbol types - if (isa(sym) || sym->isInGot() || sym->isInStubs()) { - sym->symtabIndex = symbols.size(); - symbols.push_back({sym, stringTableSection.addString(sym->getName())}); + uint32_t strx = stringTableSection.addString(sym->getName()); + if (auto *defined = dyn_cast(sym)) { + assert(defined->isExternal()); + externalSymbols.push_back({sym, strx}); + } else if (sym->isInGot() || sym->isInStubs()) { + undefinedSymbols.push_back({sym, strx}); } + } + InputFile *lastFile = nullptr; + for (Symbol *sym : symtab->getSymbols()) { // Emit STABS symbols so that dsymutil and/or the debugger can map address // regions in the final binary to the source and object files from which // they originated. @@ -670,13 +692,35 @@ void SymtabSection::finalizeContents() { if (!stabs.empty()) emitEndSourceStab(); + + uint32_t symtabIndex = stabs.size(); + for (const SymtabEntry &entry : + concat(localSymbols, externalSymbols, undefinedSymbols)) { + entry.sym->symtabIndex = symtabIndex++; + } +} + +uint32_t SymtabSection::getNumSymbols() const { + return stabs.size() + localSymbols.size() + externalSymbols.size() + + undefinedSymbols.size(); } void SymtabSection::writeTo(uint8_t *buf) const { auto *nList = reinterpret_cast(buf); - for (const SymtabEntry &entry : symbols) { + // Emit the stabs entries before the "real" symbols. We cannot emit them + // after as that would render Symbol::symtabIndex inaccurate. + for (const StabsEntry &entry : stabs) { + nList->n_strx = entry.strx; + nList->n_type = entry.type; + nList->n_sect = entry.sect; + nList->n_desc = entry.desc; + nList->n_value = entry.value; + ++nList; + } + + for (const SymtabEntry &entry : concat( + localSymbols, externalSymbols, undefinedSymbols)) { nList->n_strx = entry.strx; - // TODO support other symbol types // TODO populate n_desc with more flags if (auto *defined = dyn_cast(entry.sym)) { if (defined->isAbsolute()) { @@ -684,7 +728,8 @@ void SymtabSection::writeTo(uint8_t *buf) const { nList->n_sect = MachO::NO_SECT; nList->n_value = defined->value; } else { - nList->n_type = MachO::N_EXT | MachO::N_SECT; + nList->n_type = + (defined->isExternal() ? MachO::N_EXT : 0) | MachO::N_SECT; nList->n_sect = defined->isec->parent->index; // For the N_SECT symbol type, n_value is the address of the symbol nList->n_value = defined->getVA(); @@ -693,17 +738,6 @@ void SymtabSection::writeTo(uint8_t *buf) const { } ++nList; } - - // Emit the stabs entries after the "real" symbols. We cannot emit them - // before as that would render Symbol::symtabIndex inaccurate. - for (const StabsEntry &entry : stabs) { - nList->n_strx = entry.strx; - nList->n_type = entry.type; - nList->n_sect = entry.sect; - nList->n_desc = entry.desc; - nList->n_value = entry.value; - ++nList; - } } IndirectSymtabSection::IndirectSymtabSection() diff --git a/lld/MachO/SyntheticSections.h b/lld/MachO/SyntheticSections.h index 4a8820a..e52bc48 100644 --- a/lld/MachO/SyntheticSections.h +++ b/lld/MachO/SyntheticSections.h @@ -420,11 +420,20 @@ struct StabsEntry { explicit StabsEntry(uint8_t type) : type(type) {} }; +// Symbols of the same type must be laid out contiguously: we choose to emit +// all local symbols first, then external symbols, and finally undefined +// symbols. For each symbol type, the LC_DYSYMTAB load command will record the +// range (start index and total number) of those symbols in the symbol table. class SymtabSection : public LinkEditSection { public: SymtabSection(StringTableSection &); void finalizeContents(); - size_t getNumSymbols() const { return stabs.size() + symbols.size(); } + uint32_t getNumSymbols() const; + uint32_t getNumLocalSymbols() const { + return stabs.size() + localSymbols.size(); + } + uint32_t getNumExternalSymbols() const { return externalSymbols.size(); } + uint32_t getNumUndefinedSymbols() const { return undefinedSymbols.size(); } uint64_t getRawSize() const override; void writeTo(uint8_t *buf) const override; @@ -435,8 +444,12 @@ private: void emitFunStabs(Defined *); StringTableSection &stringTableSection; + // STABS symbols are always local symbols, but we represent them with special + // entries because they may use fields like n_sect and n_desc differently. std::vector stabs; - std::vector symbols; + std::vector localSymbols; + std::vector externalSymbols; + std::vector undefinedSymbols; }; // The indirect symbol table is a list of 32-bit integers that serve as indices diff --git a/lld/MachO/Writer.cpp b/lld/MachO/Writer.cpp index e9d88af..39de749 100644 --- a/lld/MachO/Writer.cpp +++ b/lld/MachO/Writer.cpp @@ -113,8 +113,10 @@ public: class LCDysymtab : public LoadCommand { public: - LCDysymtab(IndirectSymtabSection *indirectSymtabSection) - : indirectSymtabSection(indirectSymtabSection) {} + LCDysymtab(SymtabSection *symtabSection, + IndirectSymtabSection *indirectSymtabSection) + : symtabSection(symtabSection), + indirectSymtabSection(indirectSymtabSection) {} uint32_t getSize() const override { return sizeof(dysymtab_command); } @@ -122,11 +124,19 @@ public: auto *c = reinterpret_cast(buf); c->cmd = LC_DYSYMTAB; c->cmdsize = getSize(); + + c->ilocalsym = 0; + c->iextdefsym = c->nlocalsym = symtabSection->getNumLocalSymbols(); + c->nextdefsym = symtabSection->getNumExternalSymbols(); + c->iundefsym = c->iextdefsym + c->nextdefsym; + c->nundefsym = symtabSection->getNumUndefinedSymbols(); + c->indirectsymoff = indirectSymtabSection->fileOff; c->nindirectsyms = indirectSymtabSection->getNumSymbols(); } - IndirectSymtabSection *indirectSymtabSection = nullptr; + SymtabSection *symtabSection; + IndirectSymtabSection *indirectSymtabSection; }; class LCSegment : public LoadCommand { @@ -396,7 +406,8 @@ void Writer::createLoadCommands() { in.header->addLoadCommand(make( in.rebase, in.binding, in.weakBinding, in.lazyBinding, in.exports)); in.header->addLoadCommand(make(symtabSection, stringTableSection)); - in.header->addLoadCommand(make(indirectSymtabSection)); + in.header->addLoadCommand( + make(symtabSection, indirectSymtabSection)); for (StringRef path : config->runtimePaths) in.header->addLoadCommand(make(path)); diff --git a/lld/test/MachO/stabs.s b/lld/test/MachO/stabs.s index 5e85ccc..2c49453 100644 --- a/lld/test/MachO/stabs.s +++ b/lld/test/MachO/stabs.s @@ -12,18 +12,18 @@ # RUN: cd %t && %lld -lSystem test.o foo.o -o test # RUN: llvm-nm -pa %t/test | FileCheck %s -DDIR=%t -# CHECK-DAG: [[#%x, MAIN:]] T _main -# CHECK-DAG: [[#%x, FOO: ]] T _foo # CHECK: 0000000000000000 - 00 0000 SO /tmp/test.cpp # CHECK-NEXT: 0000000000000000 - 03 0001 OSO [[DIR]]/test.o -# CHECK-NEXT: [[#MAIN]] - 01 0000 FUN _main +# CHECK-NEXT: [[#%x, MAIN:]] - 01 0000 FUN _main # CHECK-NEXT: 0000000000000001 - 00 0000 FUN # CHECK-NEXT: 0000000000000000 - 01 0000 SO # CHECK-NEXT: 0000000000000000 - 00 0000 SO /foo.cpp # CHECK-NEXT: 0000000000000000 - 03 0001 OSO [[DIR]]/foo.o -# CHECK-NEXT: [[#FOO]] - 01 0000 FUN _foo +# CHECK-NEXT: [[#%x, FOO:]] - 01 0000 FUN _foo # CHECK-NEXT: 0000000000000001 - 00 0000 FUN # CHECK-NEXT: 0000000000000000 - 01 0000 SO +# CHECK-NEXT: [[#MAIN]] T _main +# CHECK-NEXT: [[#FOO]] T _foo #--- test.s .text diff --git a/lld/test/MachO/subsections-symbol-relocs.s b/lld/test/MachO/subsections-symbol-relocs.s index bb54705..8010a50 100644 --- a/lld/test/MachO/subsections-symbol-relocs.s +++ b/lld/test/MachO/subsections-symbol-relocs.s @@ -22,7 +22,7 @@ # RUN: %lld -o %t/test-2 %t/test.o -order_file %t/order-file-2 # RUN: llvm-objdump -d --no-show-raw-insn %t/test-2 | FileCheck %s # CHECK-LABEL: Disassembly of section __TEXT,__text: -# CHECK: <_bar>: +# CHECK: <_ba{{r|z}}>: # CHECK-NEXT: callq {{.*}} <_foo> # CHECK-EMPTY: # CHECK-NEXT: <_qux>: @@ -30,7 +30,7 @@ # CHECK: <_foo>: # CHECK-NEXT: retq # CHECK: <_main>: -# CHECK-NEXT: callq {{.*}} <_bar> +# CHECK-NEXT: callq {{.*}} <_ba{{r|z}}> # CHECK-NEXT: movq $0, %rax # CHECK-NEXT: retq diff --git a/lld/test/MachO/symtab.s b/lld/test/MachO/symtab.s index 1e9ec15..22dc80a 100644 --- a/lld/test/MachO/symtab.s +++ b/lld/test/MachO/symtab.s @@ -1,56 +1,108 @@ # REQUIRES: x86 -# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %s -o %t.o -# RUN: %lld -o %t %t.o -# RUN: llvm-readobj -symbols %t | FileCheck %s +# RUN: rm -rf %t && split-file %s %t +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/test.s -o %t/test.o +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/libfoo.s -o %t/libfoo.o +# RUN: %lld -dylib %t/libfoo.o -o %t/libfoo.dylib +# RUN: %lld -lSystem %t/test.o %t/libfoo.dylib -o %t/test +# RUN: llvm-readobj --syms --macho-dysymtab %t/test | FileCheck %s # CHECK: Symbols [ # CHECK-NEXT: Symbol { -# CHECK-NEXT: Name: _main +# CHECK-NEXT: Name: _local (1) +# CHECK-NEXT: Type: Section (0xE) +# CHECK-NEXT: Section: __data (0x4) +# CHECK-NEXT: RefType: UndefinedNonLazy (0x0) +# CHECK-NEXT: Flags [ (0x0) +# CHECK-NEXT: ] +# CHECK-NEXT: Value: 0x1{{[0-9a-f]*}} +# CHECK-NEXT: } +# CHECK-NEXT: Symbol { +# CHECK-NEXT: Name: _main (8) # CHECK-NEXT: Extern # CHECK-NEXT: Type: Section (0xE) # CHECK-NEXT: Section: __text (0x1) -# CHECK-NEXT: RefType: +# CHECK-NEXT: RefType: UndefinedNonLazy (0x0) +# CHECK-NEXT: Flags [ (0x0) +# CHECK-NEXT: ] +# CHECK-NEXT: Value: 0x1{{[0-9a-f]*}} +# CHECK-NEXT: } +# CHECK-NEXT: Symbol { +# CHECK-NEXT: Name: _external (54) +# CHECK-NEXT: Extern +# CHECK-NEXT: Type: Section (0xE) +# CHECK-NEXT: Section: __data (0x4) +# CHECK-NEXT: RefType: UndefinedNonLazy (0x0) # CHECK-NEXT: Flags [ (0x0) # CHECK-NEXT: ] -# CHECK-NEXT: Value: +# CHECK-NEXT: Value: 0x1{{[0-9a-f]*}} # CHECK-NEXT: } # CHECK-NEXT: Symbol { -# CHECK-NEXT: Name: bar +# CHECK-NEXT: Name: _external_weak (64) # CHECK-NEXT: Extern # CHECK-NEXT: Type: Section (0xE) # CHECK-NEXT: Section: __text (0x1) -# CHECK-NEXT: RefType: +# CHECK-NEXT: RefType: UndefinedNonLazy (0x0) # CHECK-NEXT: Flags [ (0x80) # CHECK-NEXT: WeakDef (0x80) # CHECK-NEXT: ] -# CHECK-NEXT: Value: +# CHECK-NEXT: Value: 0x1{{[0-9a-f]*}} # CHECK-NEXT: } # CHECK-NEXT: Symbol { -# CHECK-NEXT: Name: foo +# CHECK-NEXT: Name: __dyld_private (102) # CHECK-NEXT: Extern # CHECK-NEXT: Type: Section (0xE) -# CHECK-NEXT: Section: __data -# CHECK-NEXT: RefType: +# CHECK-NEXT: Section: __data (0x4) +# CHECK-NEXT: RefType: UndefinedNonLazy (0x0) # CHECK-NEXT: Flags [ (0x0) # CHECK-NEXT: ] -# CHECK-NEXT: Value: +# CHECK-NEXT: Value: 0x1{{[0-9a-f]*}} +# CHECK-NEXT: } +# CHECK-NEXT: Symbol { +# CHECK-NEXT: Name: dyld_stub_binder (14) +# CHECK-NEXT: Type: Undef (0x0) +# CHECK-NEXT: Section: (0x0) +# CHECK-NEXT: RefType: UndefinedNonLazy (0x0) +# CHECK-NEXT: Flags [ (0x0) +# CHECK-NEXT: ] +# CHECK-NEXT: Value: 0x0 +# CHECK-NEXT: } +# CHECK-NEXT: Symbol { +# CHECK-NEXT: Name: _dynamic (79) +# CHECK-NEXT: Type: Undef (0x0) +# CHECK-NEXT: Section: (0x0) +# CHECK-NEXT: RefType: UndefinedNonLazy (0x0) +# CHECK-NEXT: Flags [ (0x0) +# CHECK-NEXT: ] +# CHECK-NEXT: Value: 0x0 # CHECK-NEXT: } # CHECK-NEXT: ] +# CHECK-NEXT: Dysymtab { +# CHECK-NEXT: ilocalsym: 0 +# CHECK-NEXT: nlocalsym: 1 +# CHECK-NEXT: iextdefsym: 1 +# CHECK-NEXT: nextdefsym: 4 +# CHECK-NEXT: iundefsym: 5 +# CHECK-NEXT: nundefsym: 2 + +#--- libfoo.s +.globl _dynamic +_dynamic: + +#--- test.s +.globl _main, _external, _external_weak .data -.global foo -foo: - .asciz "Hello world!\n" +_external: + .space 0 +_local: + .space 0 .text -.weak_definition bar -.global bar -.global _main +.weak_definition _external_weak +_external_weak: + .space 0 _main: + callq _dynamic mov $0, %rax ret - -bar: - mov $2, %rax - ret diff --git a/lld/test/MachO/tlv.s b/lld/test/MachO/tlv.s index cf472bc..bf0dfcd 100644 --- a/lld/test/MachO/tlv.s +++ b/lld/test/MachO/tlv.s @@ -16,10 +16,13 @@ # CHECK-EMPTY: # CHECK-NEXT: Disassembly of section __DATA,__thread_data: # CHECK-EMPTY: -# CHECK-NEXT: <__thread_data>: -# CHECK-NEXT: ef -# CHECK-NEXT: be ad de be ba -# CHECK-NEXT: fe ca +# CHECK-NEXT: <_foo$tlv$init>: +# CHECK-NEXT: 00 00 +# CHECK-NEXT: 00 00 +# CHECK-EMPTY: +# CHECK-NEXT: <_bar$tlv$init>: +# CHECK-NEXT: 00 00 +# CHECK-NEXT: 00 00 # CHECK-EMPTY: # CHECK-NEXT: Disassembly of section __DATA,__thread_vars: # CHECK-EMPTY: @@ -41,9 +44,9 @@ _main: .section __DATA,__thread_data,thread_local_regular _foo$tlv$init: - .long 0xdeadbeef + .space 4 _bar$tlv$init: - .long 0xcafebabe + .space 4 .section __DATA,__thread_vars,thread_local_variables .globl _foo, _bar -- 2.7.4