From: Rui Ueyama Date: Wed, 17 May 2017 21:36:08 +0000 (+0000) Subject: Re-submit r303225: Garbage collect dllimported symbols. X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=cd41bc8dec216d909e6edb4357194b4797acfd5c;p=platform%2Fupstream%2Fllvm.git Re-submit r303225: Garbage collect dllimported symbols. This reverts re-submits r303225 which was reverted in r303270 because it broke the sanitizer-windows bot. The reason of the failure is that we were writing dead symbols to the symbol table. I fixed the issue. llvm-svn: 303304 --- diff --git a/lld/COFF/InputFiles.cpp b/lld/COFF/InputFiles.cpp index df3b6a0..4093b2f 100644 --- a/lld/COFF/InputFiles.cpp +++ b/lld/COFF/InputFiles.cpp @@ -325,12 +325,21 @@ void ImportFile::parse() { this->Hdr = Hdr; ExternalName = ExtName; + // Instantiate symbol objects. ImpSym = cast( Symtab->addImportData(ImpName, this)->body()); - if (Hdr->getType() == llvm::COFF::IMPORT_CONST) + + if (Hdr->getType() == llvm::COFF::IMPORT_CONST) { ConstSym = cast(Symtab->addImportData(Name, this)->body()); + // A __imp_ and non-__imp_ symbols for the same dllimport'ed symbol + // should be gc'ed as a group. Add a bidirectional edge. + // Used by MarkLive.cpp. + ImpSym->Sibling = ConstSym; + ConstSym->Sibling = ImpSym; + } + // If type is function, we need to create a thunk which jump to an // address pointed by the __imp_ symbol. (This allows you to call // DLL functions just like regular non-DLL functions.) diff --git a/lld/COFF/MarkLive.cpp b/lld/COFF/MarkLive.cpp index 0156d23..4c53c9a 100644 --- a/lld/COFF/MarkLive.cpp +++ b/lld/COFF/MarkLive.cpp @@ -15,16 +15,16 @@ namespace lld { namespace coff { -// Set live bit on for each reachable chunk. Unmarked (unreachable) -// COMDAT chunks will be ignored by Writer, so they will be excluded -// from the final output. +// Set live bit for each reachable COMDAT chunk and dllimported symbol. +// Unmarked (or unreachable) chunks or symbols are ignored by Writer, +// so they are not included in the final output. void markLive(const std::vector &Chunks) { // We build up a worklist of sections which have been marked as live. We only // push into the worklist when we discover an unmarked section, and we mark // as we push, so sections never appear twice in the list. SmallVector Worklist; - // COMDAT section chunks are dead by default. Add non-COMDAT chunks. + // Non-COMDAT chunks are never be gc'ed, so they are gc-root. for (Chunk *C : Chunks) if (auto *SC = dyn_cast(C)) if (SC->isLive()) @@ -37,19 +37,37 @@ void markLive(const std::vector &Chunks) { Worklist.push_back(C); }; - // Add GC root chunks. + // Mark a given symbol as reachable. + std::function AddSym = [&](SymbolBody *B) { + if (auto *Sym = dyn_cast(B)) { + Enqueue(Sym->getChunk()); + } else if (auto *Sym = dyn_cast(B)) { + if (Sym->Live) + return; + Sym->Live = true; + if (Sym->Sibling) + Sym->Sibling->Live = true; + } else if (auto *Sym = dyn_cast(B)) { + if (Sym->Live) + return; + Sym->Live = true; + AddSym(Sym->WrappedSym); + } else if (auto *Sym = dyn_cast(B)) { + AddSym(Sym->WrappedSym); + } + }; + + // Add gc-root symbols. for (SymbolBody *B : Config->GCRoot) - if (auto *D = dyn_cast(B)) - Enqueue(D->getChunk()); + AddSym(B); while (!Worklist.empty()) { SectionChunk *SC = Worklist.pop_back_val(); assert(SC->isLive() && "We mark as live when pushing onto the worklist!"); // Mark all symbols listed in the relocation table for this section. - for (SymbolBody *S : SC->symbols()) - if (auto *D = dyn_cast(S)) - Enqueue(D->getChunk()); + for (SymbolBody *B : SC->symbols()) + AddSym(B); // Mark associative sections if any. for (SectionChunk *C : SC->children()) diff --git a/lld/COFF/Symbols.cpp b/lld/COFF/Symbols.cpp index 993e920..f63c381 100644 --- a/lld/COFF/Symbols.cpp +++ b/lld/COFF/Symbols.cpp @@ -63,7 +63,8 @@ COFFSymbolRef DefinedCOFF::getCOFFSymbol() { DefinedImportThunk::DefinedImportThunk(StringRef Name, DefinedImportData *S, uint16_t Machine) - : Defined(DefinedImportThunkKind, Name) { + : Defined(DefinedImportThunkKind, Name), Live(!Config->DoGC), + WrappedSym(S) { switch (Machine) { case AMD64: Data = make(S); return; case I386: Data = make(S); return; diff --git a/lld/COFF/Symbols.h b/lld/COFF/Symbols.h index 1b83f73..141b40f 100644 --- a/lld/COFF/Symbols.h +++ b/lld/COFF/Symbols.h @@ -287,19 +287,29 @@ public: class DefinedImportData : public Defined { public: DefinedImportData(StringRef N, ImportFile *F) - : Defined(DefinedImportDataKind, N), File(F) { - } + : Defined(DefinedImportDataKind, N), Live(!Config->DoGC), File(F) {} static bool classof(const SymbolBody *S) { return S->kind() == DefinedImportDataKind; } uint64_t getRVA() { return File->Location->getRVA(); } + StringRef getDLLName() { return File->DLLName; } StringRef getExternalName() { return File->ExternalName; } void setLocation(Chunk *AddressTable) { File->Location = AddressTable; } uint16_t getOrdinal() { return File->Hdr->OrdinalHint; } + // If all sections referring a dllimported symbol become dead by gc, + // we want to kill the symbol as well, so that a resulting binary has + // fewer number of dependencies to DLLs. "Live" bit manages reachability. + bool Live; + + // For a dllimported data symbol, we create two symbols. + // They should be considered as a unit by gc. This pointer points + // to the other symbol. + DefinedImportData *Sibling = nullptr; + private: ImportFile *File; }; @@ -320,6 +330,10 @@ public: uint64_t getRVA() { return Data->getRVA(); } Chunk *getChunk() { return Data; } + // For GC. + bool Live; + DefinedImportData *WrappedSym; + private: Chunk *Data; }; @@ -332,7 +346,8 @@ private: class DefinedLocalImport : public Defined { public: DefinedLocalImport(StringRef N, Defined *S) - : Defined(DefinedLocalImportKind, N), Data(make(S)) {} + : Defined(DefinedLocalImportKind, N), WrappedSym(S), + Data(make(S)) {} static bool classof(const SymbolBody *S) { return S->kind() == DefinedLocalImportKind; @@ -341,6 +356,9 @@ public: uint64_t getRVA() { return Data->getRVA(); } Chunk *getChunk() { return Data; } + // For GC. + Defined *WrappedSym; + private: LocalImportChunk *Data; }; diff --git a/lld/COFF/Writer.cpp b/lld/COFF/Writer.cpp index 5c9c837..6b1f115 100644 --- a/lld/COFF/Writer.cpp +++ b/lld/COFF/Writer.cpp @@ -376,18 +376,24 @@ void Writer::createImportTables() { OutputSection *Text = createSection(".text"); for (ImportFile *File : Symtab->ImportFiles) { if (DefinedImportThunk *Thunk = File->ThunkSym) - Text->addChunk(Thunk->getChunk()); + if (Thunk->Live) + Text->addChunk(Thunk->getChunk()); + if (Config->DelayLoads.count(StringRef(File->DLLName).lower())) { - DelayIdata.add(File->ImpSym); + if (File->ImpSym->Live) + DelayIdata.add(File->ImpSym); } else { - Idata.add(File->ImpSym); + if (File->ImpSym->Live) + Idata.add(File->ImpSym); } } + if (!Idata.empty()) { OutputSection *Sec = createSection(".idata"); for (Chunk *C : Idata.getChunks()) Sec->addChunk(C); } + if (!DelayIdata.empty()) { Defined *Helper = cast(Config->DelayLoadHelper); DelayIdata.create(Helper); @@ -494,14 +500,24 @@ void Writer::createSymbolAndStringTable() { Sec->setStringTableOff(addEntryToStringTable(Name)); } - for (lld::coff::ObjectFile *File : Symtab->ObjectFiles) - for (SymbolBody *B : File->getSymbols()) - if (auto *D = dyn_cast(B)) - if (!D->WrittenToSymtab) { - D->WrittenToSymtab = true; - if (Optional Sym = createSymbol(D)) - OutputSymtab.push_back(*Sym); - } + for (lld::coff::ObjectFile *File : Symtab->ObjectFiles) { + for (SymbolBody *B : File->getSymbols()) { + auto *D = dyn_cast(B); + if (!D || D->WrittenToSymtab) + continue; + + if (auto *S = dyn_cast(D)) + if (!S->Live) + continue; + if (auto *S = dyn_cast(D)) + if (!S->Live) + continue; + + D->WrittenToSymtab = true; + if (Optional Sym = createSymbol(D)) + OutputSymtab.push_back(*Sym); + } + } OutputSection *LastSection = OutputSections.back(); // We position the symbol table to be adjacent to the end of the last section. diff --git a/lld/test/COFF/Inputs/import.yaml b/lld/test/COFF/Inputs/import.yaml index 4934001..b7ae026 100644 --- a/lld/test/COFF/Inputs/import.yaml +++ b/lld/test/COFF/Inputs/import.yaml @@ -7,6 +7,13 @@ sections: Characteristics: [ IMAGE_SCN_CNT_CODE, IMAGE_SCN_MEM_EXECUTE, IMAGE_SCN_MEM_READ ] Alignment: 4 SectionData: 0000000000000000 + Relocations: + - VirtualAddress: 0 + SymbolName: exportfn1 + Type: IMAGE_REL_AMD64_ADDR32NB + - VirtualAddress: 4 + SymbolName: exportfn2 + Type: IMAGE_REL_AMD64_ADDR32NB symbols: - Name: .text Value: 0 @@ -16,7 +23,7 @@ symbols: StorageClass: IMAGE_SYM_CLASS_STATIC SectionDefinition: Length: 8 - NumberOfRelocations: 0 + NumberOfRelocations: 2 NumberOfLinenumbers: 0 CheckSum: 0 Number: 0 diff --git a/lld/test/COFF/dllimport-gc.test b/lld/test/COFF/dllimport-gc.test new file mode 100644 index 0000000..acc604a --- /dev/null +++ b/lld/test/COFF/dllimport-gc.test @@ -0,0 +1,51 @@ +# REQUIRES: winres + +# RUN: yaml2obj < %p/Inputs/export.yaml > %t-lib.obj +# RUN: lld-link /out:%t.dll /dll %t-lib.obj /implib:%t.lib /export:exportfn1 + +# RUN: yaml2obj < %s > %t.obj + +# RUN: lld-link /out:%t1.exe /entry:main %t.obj %t.lib +# RUN: llvm-readobj -coff-imports %t1.exe | FileCheck -check-prefix=REF %s +# REF-NOT: Symbol: exportfn1 + +# RUN: lld-link /out:%t2.exe /entry:main %t.obj %t.lib /opt:noref +# RUN: llvm-readobj -coff-imports %t2.exe | FileCheck -check-prefix=NOREF %s +# NOREF: Symbol: exportfn1 + +--- !COFF +header: + Machine: IMAGE_FILE_MACHINE_AMD64 + Characteristics: [] +sections: + - Name: .text + Characteristics: [ IMAGE_SCN_CNT_CODE, IMAGE_SCN_MEM_EXECUTE, IMAGE_SCN_MEM_READ ] + Alignment: 4 + SectionData: 0000000000000000 + Relocations: +symbols: + - Name: .text + Value: 0 + SectionNumber: 1 + SimpleType: IMAGE_SYM_TYPE_NULL + ComplexType: IMAGE_SYM_DTYPE_NULL + StorageClass: IMAGE_SYM_CLASS_STATIC + SectionDefinition: + Length: 8 + NumberOfRelocations: 2 + NumberOfLinenumbers: 0 + CheckSum: 0 + Number: 0 + - Name: main + Value: 0 + SectionNumber: 1 + SimpleType: IMAGE_SYM_TYPE_NULL + ComplexType: IMAGE_SYM_DTYPE_FUNCTION + StorageClass: IMAGE_SYM_CLASS_EXTERNAL + - Name: exportfn1 + Value: 0 + SectionNumber: 0 + SimpleType: IMAGE_SYM_TYPE_NULL + ComplexType: IMAGE_SYM_DTYPE_NULL + StorageClass: IMAGE_SYM_CLASS_EXTERNAL +...