From 79a5e6b1b77efe7770503ebce2a808f0b89d9e50 Mon Sep 17 00:00:00 2001 From: Peter Collingbourne Date: Fri, 9 Dec 2016 21:55:24 +0000 Subject: [PATCH] COFF: New symbol table design. This ports the ELF linker's symbol table design, introduced in r268178, to the COFF linker. Differential Revision: http://reviews.llvm.org/D21166 llvm-svn: 289280 --- lld/COFF/Chunks.cpp | 4 +- lld/COFF/Chunks.h | 1 - lld/COFF/Config.h | 15 +- lld/COFF/DLL.cpp | 2 +- lld/COFF/Driver.cpp | 14 +- lld/COFF/Driver.h | 3 +- lld/COFF/DriverUtils.cpp | 10 +- lld/COFF/ICF.cpp | 8 +- lld/COFF/InputFiles.cpp | 96 +++++++---- lld/COFF/InputFiles.h | 44 ++--- lld/COFF/MarkLive.cpp | 6 +- lld/COFF/SymbolTable.cpp | 412 +++++++++++++++++++++++--------------------- lld/COFF/SymbolTable.h | 48 ++++-- lld/COFF/Symbols.cpp | 142 +-------------- lld/COFF/Symbols.h | 114 +++++++----- lld/COFF/Writer.cpp | 32 ++-- lld/test/COFF/include2.test | 2 +- lld/test/COFF/order.test | 2 +- lld/test/COFF/symtab.test | 52 ++---- 19 files changed, 465 insertions(+), 542 deletions(-) diff --git a/lld/COFF/Chunks.cpp b/lld/COFF/Chunks.cpp index dc50df7..7f0dfa9 100644 --- a/lld/COFF/Chunks.cpp +++ b/lld/COFF/Chunks.cpp @@ -150,7 +150,7 @@ void SectionChunk::writeTo(uint8_t *Buf) const { // Apply relocations. for (const coff_relocation &Rel : Relocs) { uint8_t *Off = Buf + OutputSectionOff + Rel.VirtualAddress; - SymbolBody *Body = File->getSymbolBody(Rel.SymbolTableIndex)->repl(); + SymbolBody *Body = File->getSymbolBody(Rel.SymbolTableIndex); Defined *Sym = cast(Body); uint64_t P = RVA + Rel.VirtualAddress; switch (Config->Machine) { @@ -203,7 +203,7 @@ void SectionChunk::getBaserels(std::vector *Res) { uint8_t Ty = getBaserelType(Rel); if (Ty == IMAGE_REL_BASED_ABSOLUTE) continue; - SymbolBody *Body = File->getSymbolBody(Rel.SymbolTableIndex)->repl(); + SymbolBody *Body = File->getSymbolBody(Rel.SymbolTableIndex); if (isa(Body)) continue; Res->emplace_back(RVA + Rel.VirtualAddress, Ty); diff --git a/lld/COFF/Chunks.h b/lld/COFF/Chunks.h index 53ccde3..59e36b8 100644 --- a/lld/COFF/Chunks.h +++ b/lld/COFF/Chunks.h @@ -28,7 +28,6 @@ using llvm::object::COFFSymbolRef; using llvm::object::SectionRef; using llvm::object::coff_relocation; using llvm::object::coff_section; -using llvm::sys::fs::file_magic; class Baserel; class Defined; diff --git a/lld/COFF/Config.h b/lld/COFF/Config.h index 7fd8fec..56b2b51 100644 --- a/lld/COFF/Config.h +++ b/lld/COFF/Config.h @@ -26,7 +26,8 @@ using llvm::StringRef; class DefinedAbsolute; class DefinedRelative; class StringChunk; -class Undefined; +struct Symbol; +class SymbolBody; // Short aliases. static const auto AMD64 = llvm::COFF::IMAGE_FILE_MACHINE_AMD64; @@ -37,7 +38,7 @@ static const auto I386 = llvm::COFF::IMAGE_FILE_MACHINE_I386; struct Export { StringRef Name; // N in /export:N or /export:E=N StringRef ExtName; // E in /export:E=N - Undefined *Sym = nullptr; + SymbolBody *Sym = nullptr; uint16_t Ordinal = 0; bool Noname = false; bool Data = false; @@ -76,7 +77,7 @@ struct Configuration { llvm::COFF::MachineTypes Machine = IMAGE_FILE_MACHINE_UNKNOWN; bool Verbose = false; WindowsSubsystem Subsystem = llvm::COFF::IMAGE_SUBSYSTEM_UNKNOWN; - Undefined *Entry = nullptr; + SymbolBody *Entry = nullptr; bool NoEntry = false; std::string OutputFile; bool DoGC = true; @@ -89,7 +90,7 @@ struct Configuration { StringRef PDBPath; // Symbols in this set are considered as live by the garbage collector. - std::set GCRoot; + std::set GCRoot; std::set NoDefaultLibs; bool NoDefaultLibAll = false; @@ -100,11 +101,11 @@ struct Configuration { std::vector Exports; std::set DelayLoads; std::map DLLOrder; - Undefined *DelayLoadHelper = nullptr; + SymbolBody *DelayLoadHelper = nullptr; // Used for SafeSEH. - DefinedRelative *SEHTable = nullptr; - DefinedAbsolute *SEHCount = nullptr; + Symbol *SEHTable = nullptr; + Symbol *SEHCount = nullptr; // Used for /opt:lldlto=N unsigned LTOOptLevel = 2; diff --git a/lld/COFF/DLL.cpp b/lld/COFF/DLL.cpp index 9ac370c..f93dc5c 100644 --- a/lld/COFF/DLL.cpp +++ b/lld/COFF/DLL.cpp @@ -324,7 +324,7 @@ public: if (E.ForwardChunk) { write32le(P, E.ForwardChunk->getRVA()); } else { - write32le(P, cast(E.Sym->repl())->getRVA()); + write32le(P, cast(E.Sym)->getRVA()); } } } diff --git a/lld/COFF/Driver.cpp b/lld/COFF/Driver.cpp index 36b64c8..63841a4 100644 --- a/lld/COFF/Driver.cpp +++ b/lld/COFF/Driver.cpp @@ -207,10 +207,10 @@ void LinkerDriver::addLibSearchPaths() { } } -Undefined *LinkerDriver::addUndefined(StringRef Name) { - Undefined *U = Symtab.addUndefined(Name); - Config->GCRoot.insert(U); - return U; +SymbolBody *LinkerDriver::addUndefined(StringRef Name) { + SymbolBody *B = Symtab.addUndefined(Name); + Config->GCRoot.insert(B); + return B; } // Symbol names are mangled by appending "_" prefix on x86. @@ -232,7 +232,7 @@ StringRef LinkerDriver::findDefaultEntry() { }; for (auto E : Entries) { StringRef Entry = Symtab.findMangle(mangle(E[0])); - if (!Entry.empty() && !isa(Symtab.find(Entry)->Body)) + if (!Entry.empty() && !isa(Symtab.find(Entry)->body())) return mangle(E[1]); } return ""; @@ -715,7 +715,7 @@ void LinkerDriver::link(ArrayRef ArgsArr) { Symbol *Sym = Symtab.find(From); if (!Sym) continue; - if (auto *U = dyn_cast(Sym->Body)) + if (auto *U = dyn_cast(Sym->body())) if (!U->WeakAlias) U->WeakAlias = Symtab.addUndefined(To); } @@ -734,7 +734,7 @@ void LinkerDriver::link(ArrayRef ArgsArr) { Symtab.addCombinedLTOObjects(); // Make sure we have resolved all symbols. - Symtab.reportRemainingUndefines(/*Resolve=*/true); + Symtab.reportRemainingUndefines(); // Windows specific -- if no /subsystem is given, we need to infer // that from entry point name. diff --git a/lld/COFF/Driver.h b/lld/COFF/Driver.h index cd60790..e1edc7d 100644 --- a/lld/COFF/Driver.h +++ b/lld/COFF/Driver.h @@ -59,6 +59,7 @@ private: class LinkerDriver { public: + LinkerDriver() { coff::Symtab = &Symtab; } void link(llvm::ArrayRef Args); // Used by the resolver to parse .drectve section contents. @@ -86,7 +87,7 @@ private: std::vector SearchPaths; std::set VisitedFiles; - Undefined *addUndefined(StringRef Sym); + SymbolBody *addUndefined(StringRef Sym); StringRef mangle(StringRef Sym); // Windows specific -- "main" is not the only main function in Windows. diff --git a/lld/COFF/DriverUtils.cpp b/lld/COFF/DriverUtils.cpp index f8572fa..9dc6a80 100644 --- a/lld/COFF/DriverUtils.cpp +++ b/lld/COFF/DriverUtils.cpp @@ -510,13 +510,13 @@ void fixupExports() { } for (Export &E : Config->Exports) { + SymbolBody *Sym = E.Sym; if (!E.ForwardTo.empty()) { E.SymbolName = E.Name; - } else if (Undefined *U = cast_or_null(E.Sym->WeakAlias)) { - E.SymbolName = U->getName(); - } else { - E.SymbolName = E.Sym->getName(); - } + } else if (auto *U = dyn_cast(Sym)) + if (U->WeakAlias) + Sym = U->WeakAlias; + E.SymbolName = Sym->getName(); } for (Export &E : Config->Exports) { diff --git a/lld/COFF/ICF.cpp b/lld/COFF/ICF.cpp index da608cf..196fbe2 100644 --- a/lld/COFF/ICF.cpp +++ b/lld/COFF/ICF.cpp @@ -115,8 +115,8 @@ bool ICF::equalsConstant(const SectionChunk *A, const SectionChunk *B) { R1.VirtualAddress != R2.VirtualAddress) { return false; } - SymbolBody *B1 = A->File->getSymbolBody(R1.SymbolTableIndex)->repl(); - SymbolBody *B2 = B->File->getSymbolBody(R2.SymbolTableIndex)->repl(); + SymbolBody *B1 = A->File->getSymbolBody(R1.SymbolTableIndex); + SymbolBody *B2 = B->File->getSymbolBody(R2.SymbolTableIndex); if (B1 == B2) return true; if (auto *D1 = dyn_cast(B1)) @@ -141,8 +141,8 @@ bool ICF::equalsConstant(const SectionChunk *A, const SectionChunk *B) { bool ICF::equalsVariable(const SectionChunk *A, const SectionChunk *B) { // Compare relocations. auto Eq = [&](const coff_relocation &R1, const coff_relocation &R2) { - SymbolBody *B1 = A->File->getSymbolBody(R1.SymbolTableIndex)->repl(); - SymbolBody *B2 = B->File->getSymbolBody(R2.SymbolTableIndex)->repl(); + SymbolBody *B1 = A->File->getSymbolBody(R1.SymbolTableIndex); + SymbolBody *B2 = B->File->getSymbolBody(R2.SymbolTableIndex); if (B1 == B2) return true; if (auto *D1 = dyn_cast(B1)) diff --git a/lld/COFF/InputFiles.cpp b/lld/COFF/InputFiles.cpp index d2146c2..37789c9 100644 --- a/lld/COFF/InputFiles.cpp +++ b/lld/COFF/InputFiles.cpp @@ -12,7 +12,9 @@ #include "Driver.h" #include "Error.h" #include "InputFiles.h" +#include "SymbolTable.h" #include "Symbols.h" +#include "lld/Support/Memory.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Triple.h" #include "llvm/ADT/Twine.h" @@ -39,13 +41,13 @@ using namespace llvm::support::endian; using llvm::Triple; using llvm::support::ulittle32_t; +using llvm::sys::fs::file_magic; +using llvm::sys::fs::identify_magic; namespace lld { namespace coff { -int InputFile::NextIndex = 0; LLVMContext BitcodeFile::Context; -std::mutex BitcodeFile::Mu; ArchiveFile::ArchiveFile(MemoryBufferRef M) : InputFile(ArchiveKind, M) {} @@ -53,14 +55,6 @@ void ArchiveFile::parse() { // Parse a MemoryBufferRef as an archive file. File = check(Archive::create(MB), toString(this)); - // Allocate a buffer for Lazy objects. - size_t NumSyms = File->getNumberOfSymbols(); - LazySymbols.reserve(NumSyms); - - // Read the symbol table to construct Lazy objects. - for (const Archive::Symbol &Sym : File->symbols()) - LazySymbols.emplace_back(this, Sym); - // Seen is a map from member files to boolean values. Initially // all members are mapped to false, which indicates all these files // are not read yet. @@ -69,18 +63,22 @@ void ArchiveFile::parse() { Seen[Child.getChildOffset()].clear(); if (Err) fatal(Err, toString(this)); + + // Read the symbol table to construct Lazy objects. + for (const Archive::Symbol &Sym : File->symbols()) + Symtab->addLazy(this, Sym); } // Returns a buffer pointing to a member file containing a given symbol. // This function is thread-safe. -MemoryBufferRef ArchiveFile::getMember(const Archive::Symbol *Sym) { +InputFile *ArchiveFile::getMember(const Archive::Symbol *Sym) { const Archive::Child &C = check(Sym->getMember(), "could not get the member for symbol " + Sym->getName()); // Return an empty buffer if we have already returned the same buffer. if (Seen[C.getChildOffset()].test_and_set()) - return MemoryBufferRef(); + return nullptr; MemoryBufferRef MB = check(C.getMemoryBufferRef(), @@ -90,10 +88,21 @@ MemoryBufferRef ArchiveFile::getMember(const Archive::Symbol *Sym) { Driver->Cpio->append(relativeToRoot(check(C.getFullName())), MB.getBuffer()); - return MB; -} + file_magic Magic = identify_magic(MB.getBuffer()); + if (Magic == file_magic::coff_import_library) + return make(MB); + + InputFile *Obj; + if (Magic == file_magic::coff_object) + Obj = make(MB); + else if (Magic == file_magic::bitcode) + Obj = make(MB); + else + fatal("unknown file type: " + MB.getBufferIdentifier()); -MutableArrayRef ArchiveFile::getLazySymbols() { return LazySymbols; } + Obj->ParentName = getName(); + return Obj; +} void ObjectFile::parse() { // Parse a memory buffer as a COFF file. @@ -167,7 +176,7 @@ void ObjectFile::initializeSymbols() { uint32_t NumSymbols = COFFObj->getNumberOfSymbols(); SymbolBodies.reserve(NumSymbols); SparseSymbolBodies.resize(NumSymbols); - SmallVector, 8> WeakAliases; + SmallVector, 8> WeakAliases; int32_t LastSectionNumber = 0; for (uint32_t I = 0; I < NumSymbols; ++I) { // Get a COFFSymbolRef object. @@ -188,7 +197,7 @@ void ObjectFile::initializeSymbols() { Body = createUndefined(Sym); uint32_t TagIndex = static_cast(AuxP)->TagIndex; - WeakAliases.emplace_back((Undefined *)Body, TagIndex); + WeakAliases.emplace_back(Body, TagIndex); } else { Body = createDefined(Sym, AuxP, IsFirst); } @@ -199,23 +208,30 @@ void ObjectFile::initializeSymbols() { I += Sym.getNumberOfAuxSymbols(); LastSectionNumber = Sym.getSectionNumber(); } - for (auto WeakAlias : WeakAliases) - WeakAlias.first->WeakAlias = SparseSymbolBodies[WeakAlias.second]; + for (auto WeakAlias : WeakAliases) { + auto *U = dyn_cast(WeakAlias.first); + if (!U) + continue; + // Report an error if two undefined symbols have different weak aliases. + if (U->WeakAlias && U->WeakAlias != SparseSymbolBodies[WeakAlias.second]) + Symtab->reportDuplicate(U->symbol(), this); + U->WeakAlias = SparseSymbolBodies[WeakAlias.second]; + } } -Undefined *ObjectFile::createUndefined(COFFSymbolRef Sym) { +SymbolBody *ObjectFile::createUndefined(COFFSymbolRef Sym) { StringRef Name; COFFObj->getSymbolName(Sym, Name); - return new (Alloc) Undefined(Name); + return Symtab->addUndefined(Name, this, Sym.isWeakExternal())->body(); } -Defined *ObjectFile::createDefined(COFFSymbolRef Sym, const void *AuxP, - bool IsFirst) { +SymbolBody *ObjectFile::createDefined(COFFSymbolRef Sym, const void *AuxP, + bool IsFirst) { StringRef Name; if (Sym.isCommon()) { auto *C = new (Alloc) CommonChunk(Sym); Chunks.push_back(C); - return new (Alloc) DefinedCommon(this, Sym, C); + return Symtab->addCommon(this, Sym, C)->body(); } if (Sym.isAbsolute()) { COFFObj->getSymbolName(Sym, Name); @@ -228,7 +244,10 @@ Defined *ObjectFile::createDefined(COFFSymbolRef Sym, const void *AuxP, SEHCompat = true; return nullptr; } - return new (Alloc) DefinedAbsolute(Name, Sym); + if (Sym.isExternal()) + return Symtab->addAbsolute(Name, Sym)->body(); + else + return new (Alloc) DefinedAbsolute(Name, Sym); } int32_t SectionNumber = Sym.getSectionNumber(); if (SectionNumber == llvm::COFF::IMAGE_SYM_DEBUG) @@ -258,7 +277,11 @@ Defined *ObjectFile::createDefined(COFFSymbolRef Sym, const void *AuxP, SC->Checksum = Aux->CheckSum; } - auto *B = new (Alloc) DefinedRegular(this, Sym, SC); + DefinedRegular *B; + if (Sym.isExternal()) + B = cast(Symtab->addRegular(this, Sym, SC)->body()); + else + B = new (Alloc) DefinedRegular(this, Sym, SC); if (SC->isCOMDAT() && Sym.getValue() == 0 && !AuxP) SC->setSymbol(B); @@ -320,22 +343,23 @@ void ImportFile::parse() { ExtName = ExtName.substr(0, ExtName.find('@')); break; } - ImpSym = new (Alloc) DefinedImportData(DLLName, ImpName, ExtName, Hdr); - SymbolBodies.push_back(ImpSym); + + this->Hdr = Hdr; + ExternalName = ExtName; + + ImpSym = cast( + Symtab->addImportData(ImpName, this)->body()); // If type is function, we need to create a thunk which jump to an // address pointed by the __imp_ symbol. (This allows you to call // DLL functions just like regular non-DLL functions.) if (Hdr->getType() != llvm::COFF::IMPORT_CODE) return; - ThunkSym = new (Alloc) DefinedImportThunk(Name, ImpSym, Hdr->Machine); - SymbolBodies.push_back(ThunkSym); + ThunkSym = cast( + Symtab->addImportThunk(Name, ImpSym, Hdr->Machine)->body()); } void BitcodeFile::parse() { - // Usually parse() is thread-safe, but bitcode file is an exception. - std::lock_guard Lock(Mu); - Context.enableDebugTypeODRUniquing(); ErrorOr> ModOrErr = LTOModule::createFromBuffer( Context, MB.getBufferStart(), MB.getBufferSize(), llvm::TargetOptions()); @@ -350,15 +374,15 @@ void BitcodeFile::parse() { StringRef SymName = Saver.save(M->getSymbolName(I)); int SymbolDef = Attrs & LTO_SYMBOL_DEFINITION_MASK; if (SymbolDef == LTO_SYMBOL_DEFINITION_UNDEFINED) { - SymbolBodies.push_back(new (Alloc) Undefined(SymName)); + SymbolBodies.push_back(Symtab->addUndefined(SymName, this, false)->body()); } else { bool Replaceable = (SymbolDef == LTO_SYMBOL_DEFINITION_TENTATIVE || // common (Attrs & LTO_SYMBOL_COMDAT) || // comdat (SymbolDef == LTO_SYMBOL_DEFINITION_WEAK && // weak external (Attrs & LTO_SYMBOL_ALIAS))); - SymbolBodies.push_back(new (Alloc) DefinedBitcode(this, SymName, - Replaceable)); + SymbolBodies.push_back( + Symtab->addBitcode(this, SymName, Replaceable)->body()); } } diff --git a/lld/COFF/InputFiles.h b/lld/COFF/InputFiles.h index 14aecd4..3f199bd 100644 --- a/lld/COFF/InputFiles.h +++ b/lld/COFF/InputFiles.h @@ -31,6 +31,7 @@ using llvm::COFF::MachineTypes; using llvm::object::Archive; using llvm::object::COFFObjectFile; using llvm::object::COFFSymbolRef; +using llvm::object::coff_import_header; using llvm::object::coff_section; class Chunk; @@ -39,6 +40,7 @@ class DefinedImportData; class DefinedImportThunk; class Lazy; class SectionChunk; +struct Symbol; class SymbolBody; class Undefined; @@ -52,9 +54,6 @@ public: // Returns the filename. StringRef getName() { return MB.getBufferIdentifier(); } - // Returns symbols defined by this file. - virtual std::vector &getSymbols() = 0; - // Reads a file (the constructor doesn't do that). virtual void parse() = 0; @@ -67,14 +66,8 @@ public: // Returns .drectve section contents if exist. StringRef getDirectives() { return StringRef(Directives).trim(); } - // Each file has a unique index. The index number is used to - // resolve ties in symbol resolution. - int Index; - static int NextIndex; - protected: - InputFile(Kind K, MemoryBufferRef M) - : Index(NextIndex++), MB(M), FileKind(K) {} + InputFile(Kind K, MemoryBufferRef M) : MB(M), FileKind(K) {} MemoryBufferRef MB; std::string Directives; @@ -90,22 +83,14 @@ public: static bool classof(const InputFile *F) { return F->kind() == ArchiveKind; } void parse() override; - // Returns a memory buffer for a given symbol. An empty memory buffer - // is returned if we have already returned the same memory buffer. - // (So that we don't instantiate same members more than once.) - MemoryBufferRef getMember(const Archive::Symbol *Sym); - - llvm::MutableArrayRef getLazySymbols(); - - // All symbols returned by ArchiveFiles are of Lazy type. - std::vector &getSymbols() override { - llvm_unreachable("internal fatal"); - } + // Returns an input file for a given symbol. A null pointer is returned if we + // have already returned the same input file. (So that we don't instantiate + // the same member more than once.) + InputFile *getMember(const Archive::Symbol *Sym); private: std::unique_ptr File; std::string Filename; - std::vector LazySymbols; std::map Seen; }; @@ -118,7 +103,7 @@ public: MachineTypes getMachineType() override; std::vector &getChunks() { return Chunks; } std::vector &getDebugChunks() { return DebugChunks; } - std::vector &getSymbols() override { return SymbolBodies; } + std::vector &getSymbols() { return SymbolBodies; } // Returns a SymbolBody object for the SymbolIndex'th symbol in the // underlying object file. @@ -142,8 +127,8 @@ private: void initializeSymbols(); void initializeSEH(); - Defined *createDefined(COFFSymbolRef Sym, const void *Aux, bool IsFirst); - Undefined *createUndefined(COFFSymbolRef Sym); + SymbolBody *createDefined(COFFSymbolRef Sym, const void *Aux, bool IsFirst); + SymbolBody *createUndefined(COFFSymbolRef Sym); std::unique_ptr COFFObj; llvm::BumpPtrAllocator Alloc; @@ -181,7 +166,6 @@ public: explicit ImportFile(MemoryBufferRef M) : InputFile(ImportKind, M), StringAlloc(StringAllocAux) {} static bool classof(const InputFile *F) { return F->kind() == ImportKind; } - std::vector &getSymbols() override { return SymbolBodies; } DefinedImportData *ImpSym = nullptr; DefinedImportThunk *ThunkSym = nullptr; @@ -190,10 +174,14 @@ public: private: void parse() override; - std::vector SymbolBodies; llvm::BumpPtrAllocator Alloc; llvm::BumpPtrAllocator StringAllocAux; llvm::StringSaver StringAlloc; + +public: + StringRef ExternalName; + const coff_import_header *Hdr; + Chunk *Location = nullptr; }; // Used for LTO. @@ -201,7 +189,7 @@ class BitcodeFile : public InputFile { public: explicit BitcodeFile(MemoryBufferRef M) : InputFile(BitcodeKind, M) {} static bool classof(const InputFile *F) { return F->kind() == BitcodeKind; } - std::vector &getSymbols() override { return SymbolBodies; } + std::vector &getSymbols() { return SymbolBodies; } MachineTypes getMachineType() override; std::unique_ptr takeModule() { return std::move(M); } diff --git a/lld/COFF/MarkLive.cpp b/lld/COFF/MarkLive.cpp index 0870986..0156d23 100644 --- a/lld/COFF/MarkLive.cpp +++ b/lld/COFF/MarkLive.cpp @@ -38,8 +38,8 @@ void markLive(const std::vector &Chunks) { }; // Add GC root chunks. - for (Undefined *U : Config->GCRoot) - if (auto *D = dyn_cast(U->repl())) + for (SymbolBody *B : Config->GCRoot) + if (auto *D = dyn_cast(B)) Enqueue(D->getChunk()); while (!Worklist.empty()) { @@ -48,7 +48,7 @@ void markLive(const std::vector &Chunks) { // Mark all symbols listed in the relocation table for this section. for (SymbolBody *S : SC->symbols()) - if (auto *D = dyn_cast(S->repl())) + if (auto *D = dyn_cast(S)) Enqueue(D->getChunk()); // Mark associative sections if any. diff --git a/lld/COFF/SymbolTable.cpp b/lld/COFF/SymbolTable.cpp index 9bc705e7..2f7add2 100644 --- a/lld/COFF/SymbolTable.cpp +++ b/lld/COFF/SymbolTable.cpp @@ -12,7 +12,6 @@ #include "Driver.h" #include "Error.h" #include "Symbols.h" -#include "lld/Core/Parallel.h" #include "lld/Support/Memory.h" #include "llvm/IR/LLVMContext.h" #include "llvm/LTO/legacy/LTOCodeGenerator.h" @@ -25,21 +24,15 @@ using namespace llvm; namespace lld { namespace coff { -void SymbolTable::addFile(InputFile *File) { -#if LLVM_ENABLE_THREADS - std::launch Policy = std::launch::async; -#else - std::launch Policy = std::launch::deferred; -#endif +SymbolTable *Symtab; +void SymbolTable::addFile(InputFile *File) { Files.push_back(File); if (auto *F = dyn_cast(File)) { - ArchiveQueue.push_back( - std::async(Policy, [=]() { F->parse(); return F; })); + ArchiveQueue.push_back(F); return; } - ObjectQueue.push_back( - std::async(Policy, [=]() { File->parse(); return File; })); + ObjectQueue.push_back(File); if (auto *F = dyn_cast(File)) { ObjectFiles.push_back(F); } else if (auto *F = dyn_cast(File)) { @@ -53,7 +46,7 @@ void SymbolTable::step() { if (queueEmpty()) return; readObjects(); - readArchives(); + readArchive(); } void SymbolTable::run() { @@ -61,26 +54,17 @@ void SymbolTable::run() { step(); } -void SymbolTable::readArchives() { +void SymbolTable::readArchive() { if (ArchiveQueue.empty()) return; // Add lazy symbols to the symbol table. Lazy symbols that conflict // with existing undefined symbols are accumulated in LazySyms. - std::vector LazySyms; - for (std::future &Future : ArchiveQueue) { - ArchiveFile *File = Future.get(); - if (Config->Verbose) - outs() << "Reading " << toString(File) << "\n"; - for (Lazy &Sym : File->getLazySymbols()) - addLazy(&Sym, &LazySyms); - } - ArchiveQueue.clear(); - - // Add archive member files to ObjectQueue that should resolve - // existing undefined symbols. - for (Symbol *Sym : LazySyms) - addMemberFile(cast(Sym->Body)); + ArchiveFile *File = ArchiveQueue.front(); + ArchiveQueue.pop_front(); + if (Config->Verbose) + outs() << "Reading " << toString(File) << "\n"; + File->parse(); } void SymbolTable::readObjects() { @@ -90,14 +74,12 @@ void SymbolTable::readObjects() { // Add defined and undefined symbols to the symbol table. std::vector Directives; for (size_t I = 0; I < ObjectQueue.size(); ++I) { - InputFile *File = ObjectQueue[I].get(); + InputFile *File = ObjectQueue[I]; if (Config->Verbose) outs() << "Reading " << toString(File) << "\n"; + File->parse(); // Adding symbols may add more files to ObjectQueue // (but not to ArchiveQueue). - for (SymbolBody *Sym : File->getSymbols()) - if (Sym->isExternal()) - addSymbol(Sym); StringRef S = File->getDirectives(); if (!S.empty()) { Directives.push_back(S); @@ -117,127 +99,239 @@ bool SymbolTable::queueEmpty() { return ArchiveQueue.empty() && ObjectQueue.empty(); } -void SymbolTable::reportRemainingUndefines(bool Resolve) { +void SymbolTable::reportRemainingUndefines() { SmallPtrSet Undefs; for (auto &I : Symtab) { Symbol *Sym = I.second; - auto *Undef = dyn_cast(Sym->Body); + auto *Undef = dyn_cast(Sym->body()); if (!Undef) continue; + if (!Sym->IsUsedInRegularObj) + continue; StringRef Name = Undef->getName(); // A weak alias may have been resolved, so check for that. if (Defined *D = Undef->getWeakAlias()) { - if (Resolve) - Sym->Body = D; + // We resolve weak aliases by replacing the alias's SymbolBody with the + // target's SymbolBody. This causes all SymbolBody pointers referring to + // the old symbol to instead refer to the new symbol. However, we can't + // just blindly copy sizeof(Symbol::Body) bytes from D to Sym->Body + // because D may be an internal symbol, and internal symbols are stored as + // "unparented" SymbolBodies. For that reason we need to check which type + // of symbol we are dealing with and copy the correct number of bytes. + if (isa(D)) + memcpy(Sym->Body.buffer, D, sizeof(DefinedRegular)); + else if (isa(D)) + memcpy(Sym->Body.buffer, D, sizeof(DefinedAbsolute)); + else + // No other internal symbols are possible. + Sym->Body = D->symbol()->Body; continue; } // If we can resolve a symbol by removing __imp_ prefix, do that. // This odd rule is for compatibility with MSVC linker. if (Name.startswith("__imp_")) { Symbol *Imp = find(Name.substr(strlen("__imp_"))); - if (Imp && isa(Imp->Body)) { - if (!Resolve) - continue; - auto *D = cast(Imp->Body); - auto *S = make(Name, D); - LocalImportChunks.push_back(S->getChunk()); - Sym->Body = S; + if (Imp && isa(Imp->body())) { + auto *D = cast(Imp->body()); + replaceBody(Sym, Name, D); + LocalImportChunks.push_back( + cast(Sym->body())->getChunk()); continue; } } // Remaining undefined symbols are not fatal if /force is specified. // They are replaced with dummy defined symbols. - if (Config->Force && Resolve) - Sym->Body = make(Name, 0); - Undefs.insert(Sym->Body); + if (Config->Force) + replaceBody(Sym, Name, 0); + Undefs.insert(Sym->body()); } if (Undefs.empty()) return; - for (Undefined *U : Config->GCRoot) - if (Undefs.count(U->repl())) - errs() << ": undefined symbol: " << U->getName() << "\n"; - for (InputFile *File : Files) - if (!isa(File)) - for (SymbolBody *Sym : File->getSymbols()) - if (Undefs.count(Sym->repl())) - errs() << toString(File) << ": undefined symbol: " << Sym->getName() - << "\n"; + for (SymbolBody *B : Config->GCRoot) + if (Undefs.count(B)) + errs() << ": undefined symbol: " << B->getName() << "\n"; + for (ObjectFile *File : ObjectFiles) + for (SymbolBody *Sym : File->getSymbols()) + if (Undefs.count(Sym)) + errs() << toString(File) << ": undefined symbol: " << Sym->getName() + << "\n"; if (!Config->Force) fatal("link failed"); } -void SymbolTable::addLazy(Lazy *New, std::vector *Accum) { - Symbol *Sym = insert(New); - if (Sym->Body == New) - return; - SymbolBody *Existing = Sym->Body; - if (isa(Existing)) - return; - if (Lazy *L = dyn_cast(Existing)) - if (L->getFileIndex() < New->getFileIndex()) - return; - Sym->Body = New; - New->setBackref(Sym); - if (isa(Existing)) - Accum->push_back(Sym); +std::pair SymbolTable::insert(StringRef Name) { + Symbol *&Sym = Symtab[Name]; + if (Sym) + return {Sym, false}; + Sym = make(); + Sym->IsUsedInRegularObj = false; + return {Sym, true}; } -void SymbolTable::addSymbol(SymbolBody *New) { - // Find an existing symbol or create and insert a new one. - assert(isa(New) || isa(New)); - Symbol *Sym = insert(New); - if (Sym->Body == New) - return; - SymbolBody *Existing = Sym->Body; - - // If we have an undefined symbol and a lazy symbol, - // let the lazy symbol to read a member file. - if (auto *L = dyn_cast(Existing)) { - // Undefined symbols with weak aliases need not to be resolved, - // since they would be replaced with weak aliases if they remain - // undefined. - if (auto *U = dyn_cast(New)) { - if (!U->WeakAlias) { - addMemberFile(L); - return; - } - } - Sym->Body = New; +Symbol *SymbolTable::addUndefined(StringRef Name, InputFile *F, + bool IsWeakAlias) { + Symbol *S; + bool WasInserted; + std::tie(S, WasInserted) = insert(Name); + if (!F || !isa(F)) + S->IsUsedInRegularObj = true; + if (WasInserted || (isa(S->body()) && IsWeakAlias)) { + replaceBody(S, Name); + return S; + } + if (auto *L = dyn_cast(S->body())) + addMemberFile(L->File, L->Sym); + return S; +} + +void SymbolTable::addLazy(ArchiveFile *F, const Archive::Symbol Sym) { + StringRef Name = Sym.getName(); + Symbol *S; + bool WasInserted; + std::tie(S, WasInserted) = insert(Name); + if (WasInserted) { + replaceBody(S, F, Sym); return; } + auto *U = dyn_cast(S->body()); + if (!U || U->WeakAlias) + return; + addMemberFile(F, Sym); +} + +void SymbolTable::reportDuplicate(Symbol *Existing, InputFile *NewFile) { + fatal("duplicate symbol: " + toString(*Existing->body()) + " in " + + toString(Existing->body()->getFile()) + " and in " + + (NewFile ? toString(NewFile) : "(internal)")); +} + +Symbol *SymbolTable::addAbsolute(StringRef N, COFFSymbolRef Sym) { + Symbol *S; + bool WasInserted; + std::tie(S, WasInserted) = insert(N); + S->IsUsedInRegularObj = true; + if (WasInserted || isa(S->body()) || isa(S->body())) + replaceBody(S, N, Sym); + else if (!isa(S->body())) + reportDuplicate(S, nullptr); + return S; +} - // compare() returns -1, 0, or 1 if the lhs symbol is less preferable, - // equivalent (conflicting), or more preferable, respectively. - int Comp = Existing->compare(New); - if (Comp == 0) - fatal("duplicate symbol: " + toString(*Existing) + " in " + - toString(Existing->getFile()) + " and in " + - toString(New->getFile())); - if (Comp < 0) - Sym->Body = New; +Symbol *SymbolTable::addAbsolute(StringRef N, uint64_t VA) { + Symbol *S; + bool WasInserted; + std::tie(S, WasInserted) = insert(N); + S->IsUsedInRegularObj = true; + if (WasInserted || isa(S->body()) || isa(S->body())) + replaceBody(S, N, VA); + else if (!isa(S->body())) + reportDuplicate(S, nullptr); + return S; } -Symbol *SymbolTable::insert(SymbolBody *New) { - Symbol *&Sym = Symtab[New->getName()]; - if (Sym) { - New->setBackref(Sym); - return Sym; +Symbol *SymbolTable::addRelative(StringRef N, uint64_t VA) { + Symbol *S; + bool WasInserted; + std::tie(S, WasInserted) = insert(N); + S->IsUsedInRegularObj = true; + if (WasInserted || isa(S->body()) || isa(S->body())) + replaceBody(S, N, VA); + else if (!isa(S->body())) + reportDuplicate(S, nullptr); + return S; +} + +Symbol *SymbolTable::addRegular(ObjectFile *F, COFFSymbolRef Sym, + SectionChunk *C) { + StringRef Name; + F->getCOFFObj()->getSymbolName(Sym, Name); + Symbol *S; + bool WasInserted; + std::tie(S, WasInserted) = insert(Name); + S->IsUsedInRegularObj = true; + if (WasInserted || isa(S->body()) || isa(S->body())) + replaceBody(S, F, Sym, C); + else if (auto *R = dyn_cast(S->body())) { + if (!C->isCOMDAT() || !R->isCOMDAT()) + reportDuplicate(S, F); + } else if (auto *B = dyn_cast(S->body())) { + if (B->IsReplaceable) + replaceBody(S, F, Sym, C); + else if (!C->isCOMDAT()) + reportDuplicate(S, F); + } else + replaceBody(S, F, Sym, C); + return S; +} + +Symbol *SymbolTable::addBitcode(BitcodeFile *F, StringRef N, bool IsReplaceable) { + Symbol *S; + bool WasInserted; + std::tie(S, WasInserted) = insert(N); + if (WasInserted || isa(S->body()) || isa(S->body())) { + replaceBody(S, F, N, IsReplaceable); + return S; } - Sym = make(New); - New->setBackref(Sym); - return Sym; + if (isa(S->body())) + return S; + if (IsReplaceable) + if (isa(S->body()) || isa(S->body())) + return S; + reportDuplicate(S, F); + return S; +} + +Symbol *SymbolTable::addCommon(ObjectFile *F, COFFSymbolRef Sym, + CommonChunk *C) { + StringRef Name; + F->getCOFFObj()->getSymbolName(Sym, Name); + Symbol *S; + bool WasInserted; + std::tie(S, WasInserted) = insert(Name); + S->IsUsedInRegularObj = true; + if (WasInserted || !isa(S->body())) + replaceBody(S, F, Sym, C); + else if (auto *DC = dyn_cast(S->body())) + if (Sym.getValue() > DC->getSize()) + replaceBody(S, F, Sym, C); + return S; +} + +Symbol *SymbolTable::addImportData(StringRef N, ImportFile *F) { + Symbol *S; + bool WasInserted; + std::tie(S, WasInserted) = insert(N); + S->IsUsedInRegularObj = true; + if (WasInserted || isa(S->body()) || isa(S->body())) + replaceBody(S, N, F); + else if (!isa(S->body())) + reportDuplicate(S, nullptr); + return S; +} + +Symbol *SymbolTable::addImportThunk(StringRef Name, DefinedImportData *ID, + uint16_t Machine) { + Symbol *S; + bool WasInserted; + std::tie(S, WasInserted) = insert(Name); + S->IsUsedInRegularObj = true; + if (WasInserted || isa(S->body()) || isa(S->body())) + replaceBody(S, Name, ID, Machine); + else if (!isa(S->body())) + reportDuplicate(S, nullptr); + return S; } // Reads an archive member file pointed by a given symbol. -void SymbolTable::addMemberFile(Lazy *Body) { - InputFile *File = Body->getMember(); +void SymbolTable::addMemberFile(ArchiveFile *F, const Archive::Symbol Sym) { + InputFile *File = F->getMember(&Sym); // getMember returns an empty buffer if the member was already // read from the library. if (!File) return; if (Config->Verbose) - outs() << "Loaded " << toString(File) << " for " << Body->getName() << "\n"; + outs() << "Loaded " << toString(File) << " for " << Sym.getName() << "\n"; addFile(File); } @@ -274,7 +368,7 @@ StringRef SymbolTable::findByPrefix(StringRef Prefix) { StringRef SymbolTable::findMangle(StringRef Name) { if (Symbol *Sym = find(Name)) - if (!isa(Sym->Body)) + if (!isa(Sym->body())) return Name; if (Config->Machine != I386) return findByPrefix(("?" + Name + "@@Y").str()); @@ -288,34 +382,17 @@ StringRef SymbolTable::findMangle(StringRef Name) { return findByPrefix(("?" + Name.substr(1) + "@@Y").str()); } -void SymbolTable::mangleMaybe(Undefined *U) { - if (U->WeakAlias) - return; - if (!isa(U->repl())) +void SymbolTable::mangleMaybe(SymbolBody *B) { + auto *U = dyn_cast(B); + if (!U || U->WeakAlias) return; StringRef Alias = findMangle(U->getName()); if (!Alias.empty()) U->WeakAlias = addUndefined(Alias); } -Undefined *SymbolTable::addUndefined(StringRef Name) { - auto *New = make(Name); - addSymbol(New); - if (auto *U = dyn_cast(New->repl())) - return U; - return New; -} - -DefinedRelative *SymbolTable::addRelative(StringRef Name, uint64_t VA) { - auto *New = make(Name, VA); - addSymbol(New); - return New; -} - -DefinedAbsolute *SymbolTable::addAbsolute(StringRef Name, uint64_t VA) { - auto *New = make(Name, VA); - addSymbol(New); - return New; +SymbolBody *SymbolTable::addUndefined(StringRef Name) { + return addUndefined(Name, nullptr, false)->body(); } void SymbolTable::printMap(llvm::raw_ostream &OS) { @@ -329,59 +406,19 @@ void SymbolTable::printMap(llvm::raw_ostream &OS) { } } -void SymbolTable::addCombinedLTOObject(ObjectFile *Obj) { - for (SymbolBody *Body : Obj->getSymbols()) { - if (!Body->isExternal()) - continue; - // We should not see any new undefined symbols at this point, but we'll - // diagnose them later in reportRemainingUndefines(). - StringRef Name = Body->getName(); - Symbol *Sym = insert(Body); - SymbolBody *Existing = Sym->Body; - - if (Existing == Body) - continue; - - if (isa(Existing)) { - Sym->Body = Body; - continue; - } - if (isa(Body)) { - if (auto *L = dyn_cast(Existing)) { - // We may see new references to runtime library symbols such as __chkstk - // here. These symbols must be wholly defined in non-bitcode files. - addMemberFile(L); - continue; - } - } - - int Comp = Existing->compare(Body); - if (Comp == 0) - fatal("LTO: unexpected duplicate symbol: " + Name); - if (Comp < 0) - Sym->Body = Body; - } -} - void SymbolTable::addCombinedLTOObjects() { if (BitcodeFiles.empty()) return; - // Diagnose any undefined symbols early, but do not resolve weak externals, - // as resolution breaks the invariant that each Symbol points to a unique - // SymbolBody, which we rely on to replace DefinedBitcode symbols correctly. - reportRemainingUndefines(/*Resolve=*/false); - // Create an object file and add it to the symbol table by replacing any // DefinedBitcode symbols with the definitions in the object file. LTOCodeGenerator CG(BitcodeFile::Context); CG.setOptLevel(Config->LTOOptLevel); std::vector Objs = createLTOObjects(&CG); - for (ObjectFile *Obj : Objs) - addCombinedLTOObject(Obj); - size_t NumBitcodeFiles = BitcodeFiles.size(); + for (ObjectFile *Obj : Objs) + Obj->parse(); run(); if (BitcodeFiles.size() != NumBitcodeFiles) fatal("LTO: late loaded symbol created new bitcode reference"); @@ -390,25 +427,17 @@ void SymbolTable::addCombinedLTOObjects() { // Combine and compile bitcode files and then return the result // as a vector of regular COFF object files. std::vector SymbolTable::createLTOObjects(LTOCodeGenerator *CG) { - // All symbols referenced by non-bitcode objects must be preserved. - for (ObjectFile *File : ObjectFiles) - for (SymbolBody *Body : File->getSymbols()) - if (auto *S = dyn_cast(Body->repl())) - CG->addMustPreserveSymbol(S->getName()); - - // Likewise for bitcode symbols which we initially resolved to non-bitcode. + // All symbols referenced by non-bitcode objects, including GC roots, must be + // preserved. We must also replace bitcode symbols with undefined symbols so + // that they may be replaced with real definitions without conflicting. for (BitcodeFile *File : BitcodeFiles) - for (SymbolBody *Body : File->getSymbols()) - if (isa(Body) && !isa(Body->repl())) + for (SymbolBody *Body : File->getSymbols()) { + if (!isa(Body)) + continue; + if (Body->symbol()->IsUsedInRegularObj) CG->addMustPreserveSymbol(Body->getName()); - - // Likewise for other symbols that must be preserved. - for (Undefined *U : Config->GCRoot) { - if (auto *S = dyn_cast(U->repl())) - CG->addMustPreserveSymbol(S->getName()); - else if (auto *S = dyn_cast_or_null(U->getWeakAlias())) - CG->addMustPreserveSymbol(S->getName()); - } + replaceBody(Body->symbol(), Body->getName()); + } CG->setModule(BitcodeFiles[0]->takeModule()); for (unsigned I = 1, E = BitcodeFiles.size(); I != E; ++I) @@ -438,7 +467,6 @@ std::vector SymbolTable::createLTOObjects(LTOCodeGenerator *CG) { auto *ObjFile = new ObjectFile(MemoryBufferRef(Obj, "")); Files.emplace_back(ObjFile); ObjectFiles.push_back(ObjFile); - ObjFile->parse(); ObjFiles.push_back(ObjFile); } diff --git a/lld/COFF/SymbolTable.h b/lld/COFF/SymbolTable.h index 1d11fa7..a7f27af 100644 --- a/lld/COFF/SymbolTable.h +++ b/lld/COFF/SymbolTable.h @@ -22,6 +22,7 @@ #endif #include +#include namespace llvm { struct LTOCodeGenerator; @@ -31,6 +32,7 @@ namespace lld { namespace coff { class Chunk; +class CommonChunk; class Defined; class DefinedAbsolute; class DefinedRelative; @@ -48,7 +50,9 @@ struct Symbol; // conflicts. For example, obviously, a defined symbol is better than // an undefined symbol. Or, if there's a conflict between a lazy and a // undefined, it'll read an archive member to read a real definition -// to replace the lazy symbol. The logic is implemented in resolve(). +// to replace the lazy symbol. The logic is implemented in the +// add*() functions, which are called by input files as they are parsed. +// There is one add* function per symbol type. class SymbolTable { public: void addFile(InputFile *File); @@ -57,9 +61,10 @@ public: void run(); bool queueEmpty(); - // Print an error message on undefined symbols. If Resolve is true, try to - // resolve any undefined symbols and update the symbol table accordingly. - void reportRemainingUndefines(bool Resolve); + // Try to resolve any undefined symbols and update the symbol table + // accordingly, then print an error message for any remaining undefined + // symbols. + void reportRemainingUndefines(); // Returns a list of chunks of selected symbols. std::vector getChunks(); @@ -72,7 +77,7 @@ public: // mangled symbol. This function tries to find a mangled name // for U from the symbol table, and if found, set the symbol as // a weak alias for U. - void mangleMaybe(Undefined *U); + void mangleMaybe(SymbolBody *B); StringRef findMangle(StringRef Name); // Print a layout map to OS. @@ -91,36 +96,49 @@ public: std::vector ObjectFiles; // Creates an Undefined symbol for a given name. - Undefined *addUndefined(StringRef Name); - DefinedRelative *addRelative(StringRef Name, uint64_t VA); - DefinedAbsolute *addAbsolute(StringRef Name, uint64_t VA); + SymbolBody *addUndefined(StringRef Name); + + Symbol *addRelative(StringRef N, uint64_t VA); + Symbol *addAbsolute(StringRef N, uint64_t VA); + + Symbol *addUndefined(StringRef Name, InputFile *F, bool IsWeakAlias); + void addLazy(ArchiveFile *F, const Archive::Symbol Sym); + Symbol *addAbsolute(StringRef N, COFFSymbolRef S); + Symbol *addRegular(ObjectFile *F, COFFSymbolRef S, SectionChunk *C); + Symbol *addBitcode(BitcodeFile *F, StringRef N, bool IsReplaceable); + Symbol *addCommon(ObjectFile *F, COFFSymbolRef S, CommonChunk *C); + Symbol *addImportData(StringRef N, ImportFile *F); + Symbol *addImportThunk(StringRef Name, DefinedImportData *S, + uint16_t Machine); + + void reportDuplicate(Symbol *Existing, InputFile *NewFile); // A list of chunks which to be added to .rdata. std::vector LocalImportChunks; private: - void readArchives(); + void readArchive(); void readObjects(); - void addSymbol(SymbolBody *New); - void addLazy(Lazy *New, std::vector *Accum); - Symbol *insert(SymbolBody *New); + std::pair insert(StringRef Name); StringRef findByPrefix(StringRef Prefix); - void addMemberFile(Lazy *Body); + void addMemberFile(ArchiveFile *F, const Archive::Symbol Sym); void addCombinedLTOObject(ObjectFile *Obj); std::vector createLTOObjects(llvm::LTOCodeGenerator *CG); llvm::DenseMap Symtab; std::vector Files; - std::vector> ArchiveQueue; - std::vector> ObjectQueue; + std::list ArchiveQueue; + std::vector ObjectQueue; std::vector BitcodeFiles; std::vector> Objs; }; +extern SymbolTable *Symtab; + } // namespace coff } // namespace lld diff --git a/lld/COFF/Symbols.cpp b/lld/COFF/Symbols.cpp index 2987911..d714ac9 100644 --- a/lld/COFF/Symbols.cpp +++ b/lld/COFF/Symbols.cpp @@ -18,8 +18,6 @@ using namespace llvm; using namespace llvm::object; -using llvm::sys::fs::identify_magic; -using llvm::sys::fs::file_magic; namespace lld { namespace coff { @@ -49,120 +47,6 @@ InputFile *SymbolBody::getFile() { return nullptr; } -// Returns 1, 0 or -1 if this symbol should take precedence -// over the Other, tie or lose, respectively. -int SymbolBody::compare(SymbolBody *Other) { - Kind LK = kind(), RK = Other->kind(); - - // Normalize so that the smaller kind is on the left. - if (LK > RK) - return -Other->compare(this); - - // First handle comparisons between two different kinds. - if (LK != RK) { - if (RK > LastDefinedKind) { - if (LK == LazyKind && cast(Other)->WeakAlias) - return -1; - - // The LHS is either defined or lazy and so it wins. - assert((LK <= LastDefinedKind || LK == LazyKind) && "Bad kind!"); - return 1; - } - - // Bitcode has special complexities. - if (RK == DefinedBitcodeKind) { - auto *RHS = cast(Other); - - switch (LK) { - case DefinedCommonKind: - return 1; - - case DefinedRegularKind: - // As an approximation, regular symbols win over bitcode symbols, - // but we definitely have a conflict if the regular symbol is not - // replaceable and neither is the bitcode symbol. We do not - // replicate the rest of the symbol resolution logic here; symbol - // resolution will be done accurately after lowering bitcode symbols - // to regular symbols in addCombinedLTOObject(). - if (cast(this)->isCOMDAT() || RHS->IsReplaceable) - return 1; - - // Fallthrough to the default of a tie otherwise. - default: - return 0; - } - } - - // Either of the object file kind will trump a higher kind. - if (LK <= LastDefinedCOFFKind) - return 1; - - // The remaining kind pairs are ties amongst defined symbols. - return 0; - } - - // Now handle the case where the kinds are the same. - switch (LK) { - case DefinedRegularKind: { - auto *LHS = cast(this); - auto *RHS = cast(Other); - if (LHS->isCOMDAT() && RHS->isCOMDAT()) - return LHS->getFileIndex() < RHS->getFileIndex() ? 1 : -1; - return 0; - } - - case DefinedCommonKind: { - auto *LHS = cast(this); - auto *RHS = cast(Other); - if (LHS->getSize() == RHS->getSize()) - return LHS->getFileIndex() < RHS->getFileIndex() ? 1 : -1; - return LHS->getSize() > RHS->getSize() ? 1 : -1; - } - - case DefinedBitcodeKind: { - auto *LHS = cast(this); - auto *RHS = cast(Other); - // If both are non-replaceable, we have a tie. - if (!LHS->IsReplaceable && !RHS->IsReplaceable) - return 0; - - // Non-replaceable symbols win, but even two replaceable symboles don't - // tie. If both symbols are replaceable, choice is arbitrary. - if (RHS->IsReplaceable && LHS->IsReplaceable) - return uintptr_t(LHS) < uintptr_t(RHS) ? 1 : -1; - return LHS->IsReplaceable ? -1 : 1; - } - - case LazyKind: { - // Don't tie, pick the earliest. - auto *LHS = cast(this); - auto *RHS = cast(Other); - return LHS->getFileIndex() < RHS->getFileIndex() ? 1 : -1; - } - - case UndefinedKind: { - auto *LHS = cast(this); - auto *RHS = cast(Other); - // Tie if both undefined symbols have different weak aliases. - if (LHS->WeakAlias && RHS->WeakAlias) { - if (LHS->WeakAlias->getName() != RHS->WeakAlias->getName()) - return 0; - return uintptr_t(LHS) < uintptr_t(RHS) ? 1 : -1; - } - return LHS->WeakAlias ? 1 : -1; - } - - case DefinedLocalImportKind: - case DefinedImportThunkKind: - case DefinedImportDataKind: - case DefinedAbsoluteKind: - case DefinedRelativeKind: - // These all simply tie. - return 0; - } - llvm_unreachable("unknown symbol kind"); -} - COFFSymbolRef DefinedCOFF::getCOFFSymbol() { size_t SymSize = File->getCOFFObj()->getSymbolTableEntrySize(); if (SymSize == sizeof(coff_symbol16)) @@ -182,34 +66,10 @@ DefinedImportThunk::DefinedImportThunk(StringRef Name, DefinedImportData *S, } } -InputFile *Lazy::getMember() { - MemoryBufferRef MBRef = File->getMember(&Sym); - - // getMember returns an empty buffer if the member was already - // read from the library. - if (MBRef.getBuffer().empty()) - return nullptr; - - file_magic Magic = identify_magic(MBRef.getBuffer()); - if (Magic == file_magic::coff_import_library) - return make(MBRef); - - InputFile *Obj; - if (Magic == file_magic::coff_object) - Obj = make(MBRef); - else if (Magic == file_magic::bitcode) - Obj = make(MBRef); - else - fatal("unknown file type: " + File->getName()); - - Obj->ParentName = File->getName(); - return Obj; -} - Defined *Undefined::getWeakAlias() { // A weak alias may be a weak alias to another symbol, so check recursively. for (SymbolBody *A = WeakAlias; A; A = cast(A)->WeakAlias) - if (auto *D = dyn_cast(A->repl())) + if (auto *D = dyn_cast(A)) return D; return nullptr; } diff --git a/lld/COFF/Symbols.h b/lld/COFF/Symbols.h index 1089fdc..240a67a 100644 --- a/lld/COFF/Symbols.h +++ b/lld/COFF/Symbols.h @@ -32,15 +32,8 @@ class ArchiveFile; class BitcodeFile; class InputFile; class ObjectFile; -class SymbolBody; - -// A real symbol object, SymbolBody, is usually accessed indirectly -// through a Symbol. There's always one Symbol for each symbol name. -// The resolver updates SymbolBody pointers as it resolves symbols. -struct Symbol { - explicit Symbol(SymbolBody *P) : Body(P) {} - SymbolBody *Body; -}; +struct Symbol; +class SymbolTable; // The base class for real symbol classes. class SymbolBody { @@ -78,21 +71,13 @@ public: // Returns the file from which this symbol was created. InputFile *getFile(); - // A SymbolBody has a backreference to a Symbol. Originally they are - // doubly-linked. A backreference will never change. But the pointer - // in the Symbol may be mutated by the resolver. If you have a - // pointer P to a SymbolBody and are not sure whether the resolver - // has chosen the object among other objects having the same name, - // you can access P->Backref->Body to get the resolver's result. - void setBackref(Symbol *P) { Backref = P; } - SymbolBody *repl() { return Backref ? Backref->Body : this; } - - // Decides which symbol should "win" in the symbol table, this or - // the Other. Returns 1 if this wins, -1 if the Other wins, or 0 if - // they are duplicate (conflicting) symbols. - int compare(SymbolBody *Other); + Symbol *symbol(); + const Symbol *symbol() const { + return const_cast(this)->symbol(); + } protected: + friend SymbolTable; explicit SymbolBody(Kind K, StringRef N = "") : SymbolKind(K), IsExternal(true), IsCOMDAT(false), IsReplaceable(false), Name(N) {} @@ -107,7 +92,6 @@ protected: unsigned IsReplaceable : 1; StringRef Name; - Symbol *Backref = nullptr; }; // The base class for any defined symbols, including absolute symbols, @@ -149,7 +133,6 @@ public: } ObjectFile *getFile() { return File; } - int getFileIndex() { return File->Index; } COFFSymbolRef getCOFFSymbol(); @@ -195,7 +178,7 @@ public: uint64_t getRVA() { return Data->getRVA(); } private: - friend SymbolBody; + friend SymbolTable; uint64_t getSize() { return Sym->Value; } CommonChunk *Data; }; @@ -254,15 +237,12 @@ public: static bool classof(const SymbolBody *S) { return S->kind() == LazyKind; } - // Returns an object file for this symbol, or a nullptr if the file - // was already returned. - InputFile *getMember(); - - int getFileIndex() { return File->Index; } - ArchiveFile *File; private: + friend SymbolTable; + +private: const Archive::Symbol Sym; }; @@ -295,26 +275,22 @@ public: // table in an output. The former has "__imp_" prefix. class DefinedImportData : public Defined { public: - DefinedImportData(StringRef D, StringRef N, StringRef E, - const coff_import_header *H) - : Defined(DefinedImportDataKind, N), DLLName(D), ExternalName(E), Hdr(H) { + DefinedImportData(StringRef N, ImportFile *F) + : Defined(DefinedImportDataKind, N), File(F) { } static bool classof(const SymbolBody *S) { return S->kind() == DefinedImportDataKind; } - uint64_t getRVA() { return Location->getRVA(); } - StringRef getDLLName() { return DLLName; } - StringRef getExternalName() { return ExternalName; } - void setLocation(Chunk *AddressTable) { Location = AddressTable; } - uint16_t getOrdinal() { return Hdr->OrdinalHint; } + uint64_t getRVA() { return File->Location->getRVA(); } + StringRef getDLLName() { return File->DLLName; } + StringRef getExternalName() { return File->ExternalName; } + void setLocation(Chunk *AddressTable) { File->Location = AddressTable; } + uint16_t getOrdinal() { return File->Hdr->OrdinalHint; } private: - StringRef DLLName; - StringRef ExternalName; - const coff_import_header *Hdr; - Chunk *Location = nullptr; + ImportFile *File; }; // This class represents a symbol for a jump table entry which jumps @@ -345,17 +321,17 @@ private: class DefinedLocalImport : public Defined { public: DefinedLocalImport(StringRef N, Defined *S) - : Defined(DefinedLocalImportKind, N), Data(S) {} + : Defined(DefinedLocalImportKind, N), Data(new LocalImportChunk(S)) {} static bool classof(const SymbolBody *S) { return S->kind() == DefinedLocalImportKind; } - uint64_t getRVA() { return Data.getRVA(); } - Chunk *getChunk() { return &Data; } + uint64_t getRVA() { return Data->getRVA(); } + Chunk *getChunk() { return Data.get(); } private: - LocalImportChunk Data; + std::unique_ptr Data; }; class DefinedBitcode : public Defined { @@ -363,6 +339,11 @@ class DefinedBitcode : public Defined { public: DefinedBitcode(BitcodeFile *F, StringRef N, bool IsReplaceable) : Defined(DefinedBitcodeKind, N), File(F) { + // IsReplaceable tracks whether the bitcode symbol may be replaced with some + // other (defined, common or bitcode) symbol. This is the case for common, + // comdat and weak external symbols. We try to replace bitcode symbols with + // "real" symbols (see SymbolTable::add{Regular,Bitcode}), and resolve the + // result against the real symbol from the combined LTO object. this->IsReplaceable = IsReplaceable; } @@ -398,6 +379,45 @@ inline uint64_t Defined::getRVA() { llvm_unreachable("unknown symbol kind"); } +// A real symbol object, SymbolBody, is usually stored within a Symbol. There's +// always one Symbol for each symbol name. The resolver updates the SymbolBody +// stored in the Body field of this object as it resolves symbols. Symbol also +// holds computed properties of symbol names. +struct Symbol { + // True if this symbol was referenced by a regular (non-bitcode) object. + unsigned IsUsedInRegularObj : 1; + + // This field is used to store the Symbol's SymbolBody. This instantiation of + // AlignedCharArrayUnion gives us a struct with a char array field that is + // large and aligned enough to store any derived class of SymbolBody. + llvm::AlignedCharArrayUnion + Body; + + SymbolBody *body() { + return reinterpret_cast(Body.buffer); + } + const SymbolBody *body() const { return const_cast(this)->body(); } +}; + +template +void replaceBody(Symbol *S, ArgT &&... Arg) { + static_assert(sizeof(T) <= sizeof(S->Body), "Body too small"); + static_assert(alignof(T) <= alignof(decltype(S->Body)), + "Body not aligned enough"); + assert(static_cast(static_cast(nullptr)) == nullptr && + "Not a SymbolBody"); + new (S->Body.buffer) T(std::forward(Arg)...); +} + +inline Symbol *SymbolBody::symbol() { + assert(isExternal()); + return reinterpret_cast(reinterpret_cast(this) - + offsetof(Symbol, Body)); +} + std::string toString(SymbolBody &B); } // namespace coff diff --git a/lld/COFF/Writer.cpp b/lld/COFF/Writer.cpp index 4c5eb22..13d95c6 100644 --- a/lld/COFF/Writer.cpp +++ b/lld/COFF/Writer.cpp @@ -388,7 +388,7 @@ void Writer::createMiscChunks() { if (!File->SEHCompat) return; for (SymbolBody *B : File->SEHandlers) - Handlers.insert(cast(B->repl())); + Handlers.insert(cast(B)); } SEHTable.reset(new SEHTableChunk(Handlers)); @@ -428,7 +428,7 @@ void Writer::createImportTables() { Sec->addChunk(C); } if (!DelayIdata.empty()) { - Defined *Helper = cast(Config->DelayLoadHelper->repl()); + Defined *Helper = cast(Config->DelayLoadHelper); DelayIdata.create(Helper); OutputSection *Sec = createSection(".didat"); for (Chunk *C : DelayIdata.getChunks()) @@ -471,6 +471,10 @@ size_t Writer::addEntryToStringTable(StringRef Str) { } Optional Writer::createSymbol(Defined *Def) { + // Relative symbols are unrepresentable in a COFF symbol table. + if (isa(Def)) + return None; + if (auto *D = dyn_cast(Def)) if (!D->getChunk()->isLive()) return None; @@ -497,7 +501,6 @@ Optional Writer::createSymbol(Defined *Def) { switch (Def->kind()) { case SymbolBody::DefinedAbsoluteKind: - case SymbolBody::DefinedRelativeKind: Sym.Value = Def->getRVA(); Sym.SectionNumber = IMAGE_SYM_ABSOLUTE; break; @@ -530,16 +533,13 @@ void Writer::createSymbolAndStringTable() { Sec->setStringTableOff(addEntryToStringTable(Name)); } + std::set SeenSymbols; for (lld::coff::ObjectFile *File : Symtab->ObjectFiles) for (SymbolBody *B : File->getSymbols()) if (auto *D = dyn_cast(B)) - if (Optional Sym = createSymbol(D)) - OutputSymtab.push_back(*Sym); - - for (ImportFile *File : Symtab->ImportFiles) - for (SymbolBody *B : File->getSymbols()) - if (Optional Sym = createSymbol(cast(B))) - OutputSymtab.push_back(*Sym); + if (SeenSymbols.insert(D).second) + if (Optional Sym = createSymbol(D)) + OutputSymtab.push_back(*Sym); OutputSection *LastSection = OutputSections.back(); // We position the symbol table to be adjacent to the end of the last section. @@ -630,7 +630,7 @@ template void Writer::writeHeader() { PE->SizeOfImage = SizeOfImage; PE->SizeOfHeaders = SizeOfHeaders; if (!Config->NoEntry) { - Defined *Entry = cast(Config->Entry->repl()); + Defined *Entry = cast(Config->Entry); PE->AddressOfEntryPoint = Entry->getRVA(); // Pointer to thumb code must have the LSB set, so adjust it. if (Config->Machine == ARMNT) @@ -685,7 +685,7 @@ template void Writer::writeHeader() { Dir[BASE_RELOCATION_TABLE].Size = Sec->getVirtualSize(); } if (Symbol *Sym = Symtab->findUnderscore("_tls_used")) { - if (Defined *B = dyn_cast(Sym->Body)) { + if (Defined *B = dyn_cast(Sym->body())) { Dir[TLS_TABLE].RelativeVirtualAddress = B->getRVA(); Dir[TLS_TABLE].Size = Config->is64() ? sizeof(object::coff_tls_directory64) @@ -697,7 +697,7 @@ template void Writer::writeHeader() { Dir[DEBUG_DIRECTORY].Size = DebugDirectory->getSize(); } if (Symbol *Sym = Symtab->findUnderscore("_load_config_used")) { - if (auto *B = dyn_cast(Sym->Body)) { + if (auto *B = dyn_cast(Sym->body())) { SectionChunk *SC = B->getChunk(); assert(B->getRVA() >= SC->getRVA()); uint64_t OffsetInChunk = B->getRVA() - SC->getRVA(); @@ -754,8 +754,10 @@ void Writer::openFile(StringRef Path) { void Writer::fixSafeSEHSymbols() { if (!SEHTable) return; - Config->SEHTable->setRVA(SEHTable->getRVA()); - Config->SEHCount->setVA(SEHTable->getSize() / 4); + if (auto *T = dyn_cast(Config->SEHTable->body())) + T->setRVA(SEHTable->getRVA()); + if (auto *C = dyn_cast(Config->SEHCount->body())) + C->setVA(SEHTable->getSize() / 4); } // Handles /section options to allow users to overwrite diff --git a/lld/test/COFF/include2.test b/lld/test/COFF/include2.test index f2379ea..528a273 100644 --- a/lld/test/COFF/include2.test +++ b/lld/test/COFF/include2.test @@ -9,6 +9,6 @@ CHECK: include2.test.tmp1.obj CHECK: include2.test.tmp2.lib -CHECK: include2.test.tmp3.lib CHECK: include2.test.tmp2.lib(include2.test.tmp2.obj) for foo +CHECK: include2.test.tmp3.lib CHECK: include2.test.tmp3.lib(include2.test.tmp3.obj) for bar diff --git a/lld/test/COFF/order.test b/lld/test/COFF/order.test index 6a0cee8..0ce638a 100644 --- a/lld/test/COFF/order.test +++ b/lld/test/COFF/order.test @@ -11,5 +11,5 @@ CHECK: order.test.tmp1.obj CHECK: order.test.tmp3.obj CHECK: order.test.tmp2.lib -CHECK: order.test.tmp3.lib CHECK: order.test.tmp2.lib(order.test.tmp2.obj) for foo +CHECK: order.test.tmp3.lib diff --git a/lld/test/COFF/symtab.test b/lld/test/COFF/symtab.test index bf7f996..ffaca28 100644 --- a/lld/test/COFF/symtab.test +++ b/lld/test/COFF/symtab.test @@ -36,17 +36,17 @@ # CHECK-NEXT: AuxSymbolCount: 0 # CHECK-NEXT: } # CHECK-NEXT: Symbol { -# CHECK-NEXT: Name: message -# CHECK-NEXT: Value: 6 -# CHECK-NEXT: Section: .text2 (3) +# CHECK-NEXT: Name: MessageBoxA +# CHECK-NEXT: Value: 80 +# CHECK-NEXT: Section: .text (2) # CHECK-NEXT: BaseType: Null (0x0) # CHECK-NEXT: ComplexType: Null (0x0) -# CHECK-NEXT: StorageClass: Static (0x3) +# CHECK-NEXT: StorageClass: External (0x2) # CHECK-NEXT: AuxSymbolCount: 0 # CHECK-NEXT: } # CHECK-NEXT: Symbol { -# CHECK-NEXT: Name: main -# CHECK-NEXT: Value: 0 +# CHECK-NEXT: Name: ExitProcess +# CHECK-NEXT: Value: 64 # CHECK-NEXT: Section: .text (2) # CHECK-NEXT: BaseType: Null (0x0) # CHECK-NEXT: ComplexType: Null (0x0) @@ -54,8 +54,8 @@ # CHECK-NEXT: AuxSymbolCount: 0 # CHECK-NEXT: } # CHECK-NEXT: Symbol { -# CHECK-NEXT: Name: caption -# CHECK-NEXT: Value: 0 +# CHECK-NEXT: Name: message +# CHECK-NEXT: Value: 6 # CHECK-NEXT: Section: .text2 (3) # CHECK-NEXT: BaseType: Null (0x0) # CHECK-NEXT: ComplexType: Null (0x0) @@ -63,26 +63,8 @@ # CHECK-NEXT: AuxSymbolCount: 0 # CHECK-NEXT: } # CHECK-NEXT: Symbol { -# CHECK-NEXT: Name: abs_symbol -# CHECK-NEXT: Value: 2662186735 -# CHECK-NEXT: Section: IMAGE_SYM_ABSOLUTE (-1) -# CHECK-NEXT: BaseType: Null (0x0) -# CHECK-NEXT: ComplexType: Null (0x0) -# CHECK-NEXT: StorageClass: External (0x2) -# CHECK-NEXT: AuxSymbolCount: 0 -# CHECK-NEXT: } -# CHECK-NEXT: Symbol { -# CHECK-NEXT: Name: __imp_ExitProcess -# CHECK-NEXT: Value: 64 -# CHECK-NEXT: Section: .idata (5) -# CHECK-NEXT: BaseType: Null (0x0) -# CHECK-NEXT: ComplexType: Null (0x0) -# CHECK-NEXT: StorageClass: External (0x2) -# CHECK-NEXT: AuxSymbolCount: 0 -# CHECK-NEXT: } -# CHECK-NEXT: Symbol { -# CHECK-NEXT: Name: ExitProcess -# CHECK-NEXT: Value: 64 +# CHECK-NEXT: Name: main +# CHECK-NEXT: Value: 0 # CHECK-NEXT: Section: .text (2) # CHECK-NEXT: BaseType: Null (0x0) # CHECK-NEXT: ComplexType: Null (0x0) @@ -90,18 +72,18 @@ # CHECK-NEXT: AuxSymbolCount: 0 # CHECK-NEXT: } # CHECK-NEXT: Symbol { -# CHECK-NEXT: Name: __imp_MessageBoxA -# CHECK-NEXT: Value: 72 -# CHECK-NEXT: Section: .idata (5) +# CHECK-NEXT: Name: caption +# CHECK-NEXT: Value: 0 +# CHECK-NEXT: Section: .text2 (3) # CHECK-NEXT: BaseType: Null (0x0) # CHECK-NEXT: ComplexType: Null (0x0) -# CHECK-NEXT: StorageClass: External (0x2) +# CHECK-NEXT: StorageClass: Static (0x3) # CHECK-NEXT: AuxSymbolCount: 0 # CHECK-NEXT: } # CHECK-NEXT: Symbol { -# CHECK-NEXT: Name: MessageBoxA -# CHECK-NEXT: Value: 80 -# CHECK-NEXT: Section: .text (2) +# CHECK-NEXT: Name: abs_symbol +# CHECK-NEXT: Value: 2662186735 +# CHECK-NEXT: Section: IMAGE_SYM_ABSOLUTE (-1) # CHECK-NEXT: BaseType: Null (0x0) # CHECK-NEXT: ComplexType: Null (0x0) # CHECK-NEXT: StorageClass: External (0x2) -- 2.7.4