From 4f0cccdd7a06ff60d3271638f47082b65f3793f1 Mon Sep 17 00:00:00 2001 From: Jez Ng Date: Tue, 28 Apr 2020 16:58:19 -0700 Subject: [PATCH] [lld-macho][reland] Add basic symbol table output This diff implements basic support for writing a symbol table. Attributes are loosely supported for extern symbols and not at all for other types. Initial version by Kellie Medlin Originally committed in a3d95a50ee33 and reverted in fbae153ca583 due to UBSAN erroring over unaligned writes. That has been fixed in the current diff with the following changes: ``` diff --git a/lld/MachO/SyntheticSections.cpp b/lld/MachO/SyntheticSections.cpp --- a/lld/MachO/SyntheticSections.cpp +++ b/lld/MachO/SyntheticSections.cpp @@ -133,6 +133,9 @@ SymtabSection::SymtabSection(StringTableSection &stringTableSection) : stringTableSection(stringTableSection) { segname = segment_names::linkEdit; name = section_names::symbolTable; + // TODO: When we introduce the SyntheticSections superclass, we should make + // all synthetic sections aligned to WordSize by default. + align = WordSize; } size_t SymtabSection::getSize() const { diff --git a/lld/MachO/Writer.cpp b/lld/MachO/Writer.cpp --- a/lld/MachO/Writer.cpp +++ b/lld/MachO/Writer.cpp @@ -371,6 +371,7 @@ void Writer::assignAddresses(OutputSegment *seg) { ArrayRef sections = p.second; for (InputSection *isec : sections) { addr = alignTo(addr, isec->align); + // We must align the file offsets too to avoid misaligned writes of + // structs. + fileOff = alignTo(fileOff, isec->align); isec->addr = addr; addr += isec->getSize(); fileOff += isec->getFileSize(); @@ -396,6 +397,7 @@ void Writer::writeSections() { uint64_t fileOff = seg->fileOff; for (auto § : seg->getSections()) { for (InputSection *isec : sect.second) { + fileOff = alignTo(fileOff, isec->align); isec->writeTo(buf + fileOff); fileOff += isec->getFileSize(); } ``` I don't think it's easy to write a test for alignment (that doesn't involve brittly hard-coding file offsets), so there isn't one... but UBSAN builds pass now. Differential Revision: https://reviews.llvm.org/D79050 --- lld/MachO/SyntheticSections.cpp | 57 +++++++++++++++++++++++++++++++++++++++++ lld/MachO/SyntheticSections.h | 45 ++++++++++++++++++++++++++++++++ lld/MachO/Writer.cpp | 29 +++++++++++++++++++-- lld/test/MachO/symtab.s | 54 ++++++++++++++++++++++++++++++++++++++ 4 files changed, 183 insertions(+), 2 deletions(-) create mode 100644 lld/test/MachO/symtab.s diff --git a/lld/MachO/SyntheticSections.cpp b/lld/MachO/SyntheticSections.cpp index d4af5a7..df963e2 100644 --- a/lld/MachO/SyntheticSections.cpp +++ b/lld/MachO/SyntheticSections.cpp @@ -9,6 +9,7 @@ #include "SyntheticSections.h" #include "InputFiles.h" #include "OutputSegment.h" +#include "SymbolTable.h" #include "Symbols.h" #include "Writer.h" @@ -128,6 +129,62 @@ void BindingSection::writeTo(uint8_t *buf) { memcpy(buf, contents.data(), contents.size()); } +SymtabSection::SymtabSection(StringTableSection &stringTableSection) + : stringTableSection(stringTableSection) { + segname = segment_names::linkEdit; + name = section_names::symbolTable; + // TODO: When we introduce the SyntheticSections superclass, we should make + // all synthetic sections aligned to WordSize by default. + align = WordSize; +} + +size_t SymtabSection::getSize() const { + return symbols.size() * sizeof(nlist_64); +} + +void SymtabSection::finalizeContents() { + // TODO: We should filter out some symbols. + for (Symbol *sym : symtab->getSymbols()) + symbols.push_back({sym, stringTableSection.addString(sym->getName())}); +} + +void SymtabSection::writeTo(uint8_t *buf) { + auto *nList = reinterpret_cast(buf); + for (const SymtabEntry &entry : symbols) { + // TODO support other symbol types + // TODO populate n_desc + if (auto defined = dyn_cast(entry.sym)) { + nList->n_strx = entry.strx; + nList->n_type = N_EXT | N_SECT; + nList->n_sect = defined->isec->sectionIndex; + // For the N_SECT symbol type, n_value is the address of the symbol + nList->n_value = defined->value + defined->isec->addr; + } + + ++nList; + } +} + +StringTableSection::StringTableSection() { + segname = segment_names::linkEdit; + name = section_names::stringTable; +} + +uint32_t StringTableSection::addString(StringRef str) { + uint32_t strx = size; + strings.push_back(str); + size += str.size() + 1; // account for null terminator + return strx; +} + +void StringTableSection::writeTo(uint8_t *buf) { + uint32_t off = 0; + for (StringRef str : strings) { + memcpy(buf + off, str.data(), str.size()); + off += str.size() + 1; // account for null terminator + } +} + InStruct in; } // namespace macho diff --git a/lld/MachO/SyntheticSections.h b/lld/MachO/SyntheticSections.h index 2adc575..3988772 100644 --- a/lld/MachO/SyntheticSections.h +++ b/lld/MachO/SyntheticSections.h @@ -23,6 +23,8 @@ namespace section_names { constexpr const char *pageZero = "__pagezero"; constexpr const char *header = "__mach_header"; constexpr const char *binding = "__binding"; +constexpr const char *symbolTable = "__symbol_table"; +constexpr const char *stringTable = "__string_table"; } // namespace section_names @@ -93,6 +95,49 @@ public: SmallVector contents; }; +// Stores the strings referenced by the symbol table. +class StringTableSection : public InputSection { +public: + StringTableSection(); + // Returns the start offset of the added string. + uint32_t addString(StringRef); + size_t getSize() const override { return size; } + // Like other sections in __LINKEDIT, the string table section is special: its + // offsets are recorded in the LC_SYMTAB load command, instead of in section + // headers. + bool isHidden() const override { return true; } + void writeTo(uint8_t *buf) override; + +private: + // An n_strx value of 0 always indicates the empty string, so we must locate + // our non-empty string values at positive offsets in the string table. + // Therefore we insert a dummy value at position zero. + std::vector strings{"\0"}; + size_t size = 1; +}; + +struct SymtabEntry { + Symbol *sym; + size_t strx; +}; + +class SymtabSection : public InputSection { +public: + SymtabSection(StringTableSection &); + void finalizeContents(); + size_t getNumSymbols() const { return symbols.size(); } + size_t getSize() const override; + // Like other sections in __LINKEDIT, the symtab section is special: its + // offsets are recorded in the LC_SYMTAB load command, instead of in section + // headers. + bool isHidden() const override { return true; } + void writeTo(uint8_t *buf) override; + +private: + StringTableSection &stringTableSection; + std::vector symbols; +}; + struct InStruct { GotSection *got = nullptr; }; diff --git a/lld/MachO/Writer.cpp b/lld/MachO/Writer.cpp index b6e5ed0..1b8ce5f 100644 --- a/lld/MachO/Writer.cpp +++ b/lld/MachO/Writer.cpp @@ -52,6 +52,8 @@ public: uint64_t fileOff = 0; MachHeaderSection *headerSection = nullptr; BindingSection *bindingSection = nullptr; + SymtabSection *symtabSection = nullptr; + StringTableSection *stringTableSection = nullptr; }; // LC_DYLD_INFO_ONLY stores the offsets of symbol import/export information. @@ -163,13 +165,23 @@ class LCMain : public LoadCommand { class LCSymtab : public LoadCommand { public: + LCSymtab(SymtabSection *symtabSection, StringTableSection *stringTableSection) + : symtabSection(symtabSection), stringTableSection(stringTableSection) {} + uint32_t getSize() const override { return sizeof(symtab_command); } void writeTo(uint8_t *buf) const override { auto *c = reinterpret_cast(buf); c->cmd = LC_SYMTAB; c->cmdsize = getSize(); + c->symoff = symtabSection->getFileOffset(); + c->nsyms = symtabSection->getNumSymbols(); + c->stroff = stringTableSection->getFileOffset(); + c->strsize = stringTableSection->getFileSize(); } + + SymtabSection *symtabSection = nullptr; + StringTableSection *stringTableSection = nullptr; }; class LCLoadDylib : public LoadCommand { @@ -238,7 +250,12 @@ public: {defaultPosition, {}}, // Make sure __LINKEDIT is the last segment (i.e. all its hidden // sections must be ordered after other sections). - {segment_names::linkEdit, {section_names::binding}}, + {segment_names::linkEdit, + { + section_names::binding, + section_names::symbolTable, + section_names::stringTable, + }}, }; for (uint32_t i = 0, n = ordering.size(); i < n; ++i) { @@ -294,7 +311,8 @@ void Writer::scanRelocations() { void Writer::createLoadCommands() { headerSection->addLoadCommand(make(bindingSection)); headerSection->addLoadCommand(make()); - headerSection->addLoadCommand(make()); + headerSection->addLoadCommand( + make(symtabSection, stringTableSection)); headerSection->addLoadCommand(make()); headerSection->addLoadCommand(make()); @@ -323,6 +341,8 @@ void Writer::createLoadCommands() { void Writer::createHiddenSections() { headerSection = createInputSection(); bindingSection = createInputSection(); + stringTableSection = createInputSection(); + symtabSection = createInputSection(*stringTableSection); createInputSection(); } @@ -351,6 +371,9 @@ void Writer::assignAddresses(OutputSegment *seg) { ArrayRef sections = p.second; for (InputSection *isec : sections) { addr = alignTo(addr, isec->align); + // We must align the file offsets too to avoid misaligned writes of + // structs. + fileOff = alignTo(fileOff, isec->align); isec->addr = addr; addr += isec->getSize(); fileOff += isec->getFileSize(); @@ -376,6 +399,7 @@ void Writer::writeSections() { uint64_t fileOff = seg->fileOff; for (auto § : seg->getSections()) { for (InputSection *isec : sect.second) { + fileOff = alignTo(fileOff, isec->align); isec->writeTo(buf + fileOff); fileOff += isec->getFileSize(); } @@ -405,6 +429,7 @@ void Writer::run() { // Fill __LINKEDIT contents. bindingSection->finalizeContents(); + symtabSection->finalizeContents(); // Now that __LINKEDIT is filled out, do a proper calculation of its // addresses and offsets. We don't have to recalculate the other segments diff --git a/lld/test/MachO/symtab.s b/lld/test/MachO/symtab.s new file mode 100644 index 0000000..44a0169 --- /dev/null +++ b/lld/test/MachO/symtab.s @@ -0,0 +1,54 @@ +# REQUIRES: x86 +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %s -o %t.o +# RUN: lld -flavor darwinnew -o %t %t.o +# RUN: llvm-readobj -symbols %t | FileCheck %s + +# CHECK: Symbols [ +# CHECK-NEXT: Symbol { +# CHECK-NEXT: Name: _main +# CHECK-NEXT: Extern +# CHECK-NEXT: Type: Section (0xE) +# CHECK-NEXT: Section: __text (0x1) +# CHECK-NEXT: RefType: +# CHECK-NEXT: Flags [ (0x0) +# CHECK-NEXT: ] +# CHECK-NEXT: Value: +# CHECK-NEXT: } +# CHECK-NEXT: Symbol { +# CHECK-NEXT: Name: bar +# CHECK-NEXT: Extern +# CHECK-NEXT: Type: Section (0xE) +# CHECK-NEXT: Section: __text (0x1) +# CHECK-NEXT: RefType: +# CHECK-NEXT: Flags [ (0x0) +# CHECK-NEXT: ] +# CHECK-NEXT: Value: +# CHECK-NEXT: } +# CHECK-NEXT: Symbol { +# CHECK-NEXT: Name: foo +# CHECK-NEXT: Extern +# CHECK-NEXT: Type: Section (0xE) +# CHECK-NEXT: Section: __data +# CHECK-NEXT: RefType: +# CHECK-NEXT: Flags [ (0x0) +# CHECK-NEXT: ] +# CHECK-NEXT: Value: +# CHECK-NEXT: } +# CHECK-NEXT: ] + +.data +.global foo +foo: + .asciz "Hello world!\n" + +.text +.global bar +.global _main + +_main: + mov $0, %rax + ret + +bar: + mov $2, %rax + ret -- 2.7.4