From 02df7a6cf1874ecbf0f1ee50846cf8cb2e648b63 Mon Sep 17 00:00:00 2001 From: Rui Ueyama Date: Wed, 17 May 2017 00:35:50 +0000 Subject: [PATCH] Garbage collect dllimported symbols. Summary: Previously, the garbage collector (enabled by default or by explicitly passing /opt:ref) did not kill dllimported symbols. As a result, dllimported symbols could be added to resulting executables' dllimport list even if no one was actually using them. This patch implements dllexported symbol garbage collection. Just like COMDAT sections, dllimported symbols now have Live bits to manage their liveness, and MarkLive marks reachable dllimported symbols. Fixes https://bugs.llvm.org/show_bug.cgi?id=32950 Reviewers: pcc Subscribers: llvm-commits Differential Revision: https://reviews.llvm.org/D33264 llvm-svn: 303225 --- lld/COFF/InputFiles.cpp | 11 ++++++++- lld/COFF/MarkLive.cpp | 38 ++++++++++++++++++++++-------- lld/COFF/Symbols.cpp | 3 ++- lld/COFF/Symbols.h | 24 ++++++++++++++++--- lld/COFF/Writer.cpp | 12 +++++++--- lld/test/COFF/Inputs/import.yaml | 9 ++++++- lld/test/COFF/dllimport-gc.test | 51 ++++++++++++++++++++++++++++++++++++++++ 7 files changed, 129 insertions(+), 19 deletions(-) create mode 100644 lld/test/COFF/dllimport-gc.test diff --git a/lld/COFF/InputFiles.cpp b/lld/COFF/InputFiles.cpp index df3b6a0..4093b2f 100644 --- a/lld/COFF/InputFiles.cpp +++ b/lld/COFF/InputFiles.cpp @@ -325,12 +325,21 @@ void ImportFile::parse() { this->Hdr = Hdr; ExternalName = ExtName; + // Instantiate symbol objects. ImpSym = cast( Symtab->addImportData(ImpName, this)->body()); - if (Hdr->getType() == llvm::COFF::IMPORT_CONST) + + if (Hdr->getType() == llvm::COFF::IMPORT_CONST) { ConstSym = cast(Symtab->addImportData(Name, this)->body()); + // A __imp_ and non-__imp_ symbols for the same dllimport'ed symbol + // should be gc'ed as a group. Add a bidirectional edge. + // Used by MarkLive.cpp. + ImpSym->Sibling = ConstSym; + ConstSym->Sibling = ImpSym; + } + // If type is function, we need to create a thunk which jump to an // address pointed by the __imp_ symbol. (This allows you to call // DLL functions just like regular non-DLL functions.) diff --git a/lld/COFF/MarkLive.cpp b/lld/COFF/MarkLive.cpp index 0156d23..4c53c9a 100644 --- a/lld/COFF/MarkLive.cpp +++ b/lld/COFF/MarkLive.cpp @@ -15,16 +15,16 @@ namespace lld { namespace coff { -// Set live bit on for each reachable chunk. Unmarked (unreachable) -// COMDAT chunks will be ignored by Writer, so they will be excluded -// from the final output. +// Set live bit for each reachable COMDAT chunk and dllimported symbol. +// Unmarked (or unreachable) chunks or symbols are ignored by Writer, +// so they are not included in the final output. void markLive(const std::vector &Chunks) { // We build up a worklist of sections which have been marked as live. We only // push into the worklist when we discover an unmarked section, and we mark // as we push, so sections never appear twice in the list. SmallVector Worklist; - // COMDAT section chunks are dead by default. Add non-COMDAT chunks. + // Non-COMDAT chunks are never be gc'ed, so they are gc-root. for (Chunk *C : Chunks) if (auto *SC = dyn_cast(C)) if (SC->isLive()) @@ -37,19 +37,37 @@ void markLive(const std::vector &Chunks) { Worklist.push_back(C); }; - // Add GC root chunks. + // Mark a given symbol as reachable. + std::function AddSym = [&](SymbolBody *B) { + if (auto *Sym = dyn_cast(B)) { + Enqueue(Sym->getChunk()); + } else if (auto *Sym = dyn_cast(B)) { + if (Sym->Live) + return; + Sym->Live = true; + if (Sym->Sibling) + Sym->Sibling->Live = true; + } else if (auto *Sym = dyn_cast(B)) { + if (Sym->Live) + return; + Sym->Live = true; + AddSym(Sym->WrappedSym); + } else if (auto *Sym = dyn_cast(B)) { + AddSym(Sym->WrappedSym); + } + }; + + // Add gc-root symbols. for (SymbolBody *B : Config->GCRoot) - if (auto *D = dyn_cast(B)) - Enqueue(D->getChunk()); + AddSym(B); while (!Worklist.empty()) { SectionChunk *SC = Worklist.pop_back_val(); assert(SC->isLive() && "We mark as live when pushing onto the worklist!"); // Mark all symbols listed in the relocation table for this section. - for (SymbolBody *S : SC->symbols()) - if (auto *D = dyn_cast(S)) - Enqueue(D->getChunk()); + for (SymbolBody *B : SC->symbols()) + AddSym(B); // Mark associative sections if any. for (SectionChunk *C : SC->children()) diff --git a/lld/COFF/Symbols.cpp b/lld/COFF/Symbols.cpp index 993e920..f63c381 100644 --- a/lld/COFF/Symbols.cpp +++ b/lld/COFF/Symbols.cpp @@ -63,7 +63,8 @@ COFFSymbolRef DefinedCOFF::getCOFFSymbol() { DefinedImportThunk::DefinedImportThunk(StringRef Name, DefinedImportData *S, uint16_t Machine) - : Defined(DefinedImportThunkKind, Name) { + : Defined(DefinedImportThunkKind, Name), Live(!Config->DoGC), + WrappedSym(S) { switch (Machine) { case AMD64: Data = make(S); return; case I386: Data = make(S); return; diff --git a/lld/COFF/Symbols.h b/lld/COFF/Symbols.h index 1b83f73..141b40f 100644 --- a/lld/COFF/Symbols.h +++ b/lld/COFF/Symbols.h @@ -287,19 +287,29 @@ public: class DefinedImportData : public Defined { public: DefinedImportData(StringRef N, ImportFile *F) - : Defined(DefinedImportDataKind, N), File(F) { - } + : Defined(DefinedImportDataKind, N), Live(!Config->DoGC), File(F) {} static bool classof(const SymbolBody *S) { return S->kind() == DefinedImportDataKind; } uint64_t getRVA() { return File->Location->getRVA(); } + StringRef getDLLName() { return File->DLLName; } StringRef getExternalName() { return File->ExternalName; } void setLocation(Chunk *AddressTable) { File->Location = AddressTable; } uint16_t getOrdinal() { return File->Hdr->OrdinalHint; } + // If all sections referring a dllimported symbol become dead by gc, + // we want to kill the symbol as well, so that a resulting binary has + // fewer number of dependencies to DLLs. "Live" bit manages reachability. + bool Live; + + // For a dllimported data symbol, we create two symbols. + // They should be considered as a unit by gc. This pointer points + // to the other symbol. + DefinedImportData *Sibling = nullptr; + private: ImportFile *File; }; @@ -320,6 +330,10 @@ public: uint64_t getRVA() { return Data->getRVA(); } Chunk *getChunk() { return Data; } + // For GC. + bool Live; + DefinedImportData *WrappedSym; + private: Chunk *Data; }; @@ -332,7 +346,8 @@ private: class DefinedLocalImport : public Defined { public: DefinedLocalImport(StringRef N, Defined *S) - : Defined(DefinedLocalImportKind, N), Data(make(S)) {} + : Defined(DefinedLocalImportKind, N), WrappedSym(S), + Data(make(S)) {} static bool classof(const SymbolBody *S) { return S->kind() == DefinedLocalImportKind; @@ -341,6 +356,9 @@ public: uint64_t getRVA() { return Data->getRVA(); } Chunk *getChunk() { return Data; } + // For GC. + Defined *WrappedSym; + private: LocalImportChunk *Data; }; diff --git a/lld/COFF/Writer.cpp b/lld/COFF/Writer.cpp index 5c9c837..385af94 100644 --- a/lld/COFF/Writer.cpp +++ b/lld/COFF/Writer.cpp @@ -376,18 +376,24 @@ void Writer::createImportTables() { OutputSection *Text = createSection(".text"); for (ImportFile *File : Symtab->ImportFiles) { if (DefinedImportThunk *Thunk = File->ThunkSym) - Text->addChunk(Thunk->getChunk()); + if (Thunk->Live) + Text->addChunk(Thunk->getChunk()); + if (Config->DelayLoads.count(StringRef(File->DLLName).lower())) { - DelayIdata.add(File->ImpSym); + if (File->ImpSym->Live) + DelayIdata.add(File->ImpSym); } else { - Idata.add(File->ImpSym); + if (File->ImpSym->Live) + Idata.add(File->ImpSym); } } + if (!Idata.empty()) { OutputSection *Sec = createSection(".idata"); for (Chunk *C : Idata.getChunks()) Sec->addChunk(C); } + if (!DelayIdata.empty()) { Defined *Helper = cast(Config->DelayLoadHelper); DelayIdata.create(Helper); diff --git a/lld/test/COFF/Inputs/import.yaml b/lld/test/COFF/Inputs/import.yaml index 4934001..88af74a 100644 --- a/lld/test/COFF/Inputs/import.yaml +++ b/lld/test/COFF/Inputs/import.yaml @@ -7,6 +7,13 @@ sections: Characteristics: [ IMAGE_SCN_CNT_CODE, IMAGE_SCN_MEM_EXECUTE, IMAGE_SCN_MEM_READ ] Alignment: 4 SectionData: 0000000000000000 + Relocations: + - VirtualAddress: 0 + SymbolName: exportfn1 + Type: IMAGE_REL_AMD64_ADDR32NB + - VirtualAddress: 4 + SymbolName: exportfn2 + Type: IMAGE_REL_AMD64_ADDR32NB symbols: - Name: .text Value: 0 @@ -16,7 +23,7 @@ symbols: StorageClass: IMAGE_SYM_CLASS_STATIC SectionDefinition: Length: 8 - NumberOfRelocations: 0 + NumberOfRelocations: 2 NumberOfLinenumbers: 0 CheckSum: 0 Number: 0 diff --git a/lld/test/COFF/dllimport-gc.test b/lld/test/COFF/dllimport-gc.test new file mode 100644 index 0000000..acc604a --- /dev/null +++ b/lld/test/COFF/dllimport-gc.test @@ -0,0 +1,51 @@ +# REQUIRES: winres + +# RUN: yaml2obj < %p/Inputs/export.yaml > %t-lib.obj +# RUN: lld-link /out:%t.dll /dll %t-lib.obj /implib:%t.lib /export:exportfn1 + +# RUN: yaml2obj < %s > %t.obj + +# RUN: lld-link /out:%t1.exe /entry:main %t.obj %t.lib +# RUN: llvm-readobj -coff-imports %t1.exe | FileCheck -check-prefix=REF %s +# REF-NOT: Symbol: exportfn1 + +# RUN: lld-link /out:%t2.exe /entry:main %t.obj %t.lib /opt:noref +# RUN: llvm-readobj -coff-imports %t2.exe | FileCheck -check-prefix=NOREF %s +# NOREF: Symbol: exportfn1 + +--- !COFF +header: + Machine: IMAGE_FILE_MACHINE_AMD64 + Characteristics: [] +sections: + - Name: .text + Characteristics: [ IMAGE_SCN_CNT_CODE, IMAGE_SCN_MEM_EXECUTE, IMAGE_SCN_MEM_READ ] + Alignment: 4 + SectionData: 0000000000000000 + Relocations: +symbols: + - Name: .text + Value: 0 + SectionNumber: 1 + SimpleType: IMAGE_SYM_TYPE_NULL + ComplexType: IMAGE_SYM_DTYPE_NULL + StorageClass: IMAGE_SYM_CLASS_STATIC + SectionDefinition: + Length: 8 + NumberOfRelocations: 2 + NumberOfLinenumbers: 0 + CheckSum: 0 + Number: 0 + - Name: main + Value: 0 + SectionNumber: 1 + SimpleType: IMAGE_SYM_TYPE_NULL + ComplexType: IMAGE_SYM_DTYPE_FUNCTION + StorageClass: IMAGE_SYM_CLASS_EXTERNAL + - Name: exportfn1 + Value: 0 + SectionNumber: 0 + SimpleType: IMAGE_SYM_TYPE_NULL + ComplexType: IMAGE_SYM_DTYPE_NULL + StorageClass: IMAGE_SYM_CLASS_EXTERNAL +... -- 2.7.4