From 3a8bb7cd2c75cc1bb83411c380b41e39003823de Mon Sep 17 00:00:00 2001 From: Rui Ueyama Date: Wed, 10 Apr 2019 10:37:10 +0000 Subject: [PATCH] Discard debuginfo for object files empty after GC Patch by Robert O'Callahan. Rust projects tend to link in all object files from all dependent libraries and rely on --gc-sections to strip unused code and data. Unfortunately --gc-sections doesn't currently strip any debuginfo associated with GC'ed sections, so lld links in the full debuginfo from all dependencies even if almost all that code has been discarded. See https://github.com/rust-lang/rust/issues/56068 for some details. Properly stripping debuginfo for discarded sections would be difficult, but a simple approach that helps significantly is to mark debuginfo sections as live only if their associated object file has at least one live code/data section. This patch does that. In a (contrived but not totally artificial) Rust testcase linked above, it reduces the final binary size from 46MB to 5.1MB. Differential Revision: https://reviews.llvm.org/D54747 llvm-svn: 358069 --- lld/ELF/Driver.cpp | 7 ++----- lld/ELF/InputFiles.h | 4 ++++ lld/ELF/InputSection.cpp | 1 + lld/ELF/InputSection.h | 10 ++++++--- lld/ELF/MarkLive.cpp | 36 ++++++++++++++++++++++---------- lld/test/ELF/linkerscript/comdat-gc.s | 3 +++ lld/test/ELF/linkerscript/debuginfo-gc.s | 14 +++++++++++++ 7 files changed, 56 insertions(+), 19 deletions(-) create mode 100644 lld/test/ELF/linkerscript/debuginfo-gc.s diff --git a/lld/ELF/Driver.cpp b/lld/ELF/Driver.cpp index 49812f9..57b9146 100644 --- a/lld/ELF/Driver.cpp +++ b/lld/ELF/Driver.cpp @@ -1613,11 +1613,8 @@ template void LinkerDriver::link(opt::InputArgList &Args) { // We do not want to emit debug sections if --strip-all // or -strip-debug are given. - if (Config->Strip != StripPolicy::None) { - llvm::erase_if(InputSections, [](InputSectionBase *S) { - return S->Name.startswith(".debug") || S->Name.startswith(".zdebug"); - }); - } + if (Config->Strip != StripPolicy::None) + llvm::erase_if(InputSections, [](InputSectionBase *S) { return S->Debug; }); // The Target instance handles target-specific stuff, such as applying // relocations or writing a PLT section. It also contains target-dependent diff --git a/lld/ELF/InputFiles.h b/lld/ELF/InputFiles.h index 1b9de71..e60b63c 100644 --- a/lld/ELF/InputFiles.h +++ b/lld/ELF/InputFiles.h @@ -234,6 +234,10 @@ public: // but had one or more functions with the no_split_stack attribute. bool SomeNoSplitStack = false; + // True if the file has any live Regular or Merge sections that aren't + // the LDSA section. + bool HasLiveCodeOrData = false; + // Pointer to this input file's .llvm_addrsig section, if it has one. const Elf_Shdr *AddrsigSec = nullptr; diff --git a/lld/ELF/InputSection.cpp b/lld/ELF/InputSection.cpp index 8acaa2c..58c3513 100644 --- a/lld/ELF/InputSection.cpp +++ b/lld/ELF/InputSection.cpp @@ -71,6 +71,7 @@ InputSectionBase::InputSectionBase(InputFile *File, uint64_t Flags, NumRelocations = 0; AreRelocsRela = false; + Debug = Name.startswith(".debug") || Name.startswith(".zdebug"); // The ELF spec states that a value of 0 means the section has // no alignment constraits. diff --git a/lld/ELF/InputSection.h b/lld/ELF/InputSection.h index b559ff1..ee5c7cb 100644 --- a/lld/ELF/InputSection.h +++ b/lld/ELF/InputSection.h @@ -51,7 +51,7 @@ public: unsigned SectionKind : 3; - // The next three bit fields are only used by InputSectionBase, but we + // The next four bit fields are only used by InputSectionBase, but we // put them here so the struct packs better. // The garbage collector sets sections' Live bits. @@ -73,6 +73,9 @@ public: unsigned Bss : 1; + // True if this is a debuginfo section. + unsigned Debug : 1; + // Set for sections that should not be folded by ICF. unsigned KeepUnique : 1; @@ -100,8 +103,9 @@ protected: uint64_t Entsize, uint64_t Alignment, uint32_t Type, uint32_t Info, uint32_t Link) : Name(Name), Repl(this), SectionKind(SectionKind), Live(false), - Assigned(false), Bss(false), KeepUnique(false), Alignment(Alignment), - Flags(Flags), Entsize(Entsize), Type(Type), Link(Link), Info(Info) {} + Assigned(false), Bss(false), Debug(false), KeepUnique(false), + Alignment(Alignment), Flags(Flags), Entsize(Entsize), Type(Type), + Link(Link), Info(Info) {} }; // This corresponds to a section of an input file. diff --git a/lld/ELF/MarkLive.cpp b/lld/ELF/MarkLive.cpp index d040a21..08f26ae 100644 --- a/lld/ELF/MarkLive.cpp +++ b/lld/ELF/MarkLive.cpp @@ -47,7 +47,7 @@ public: void run(); private: - void enqueue(InputSectionBase *Sec, uint64_t Offset); + void enqueue(InputSectionBase *Sec, uint64_t Offset, bool IsLSDA); void markSymbol(Symbol *Sym); template @@ -97,7 +97,7 @@ void MarkLive::resolveReloc(InputSectionBase &Sec, RelTy &Rel, Offset += getAddend(Sec, Rel); if (!IsLSDA || !(RelSec->Flags & SHF_EXECINSTR)) - enqueue(RelSec, Offset); + enqueue(RelSec, Offset, IsLSDA); return; } @@ -106,7 +106,7 @@ void MarkLive::resolveReloc(InputSectionBase &Sec, RelTy &Rel, SS->getFile().IsNeeded = true; for (InputSectionBase *Sec : CNamedSections.lookup(Sym.getName())) - enqueue(Sec, 0); + enqueue(Sec, 0, IsLSDA); } // The .eh_frame section is an unfortunate special case. @@ -169,7 +169,8 @@ static bool isReserved(InputSectionBase *Sec) { } template -void MarkLive::enqueue(InputSectionBase *Sec, uint64_t Offset) { +void MarkLive::enqueue(InputSectionBase *Sec, uint64_t Offset, + bool IsLSDA) { // Skip over discarded sections. This in theory shouldn't happen, because // the ELF spec doesn't allow a relocation to point to a deduplicated // COMDAT section directly. Unfortunately this happens in practice (e.g. @@ -183,19 +184,26 @@ void MarkLive::enqueue(InputSectionBase *Sec, uint64_t Offset) { if (auto *MS = dyn_cast(Sec)) MS->getSectionPiece(Offset)->Live = true; + InputSection *S = dyn_cast(Sec); + // LSDA does not count as "live code or data" in the object file. + // The section may already have been marked live for LSDA in which + // case we still need to mark the file. + if (S && !IsLSDA && Sec->File) + Sec->getFile()->HasLiveCodeOrData = true; + if (Sec->Live) return; - Sec->Live = true; + Sec->Live = true; // Add input section to the queue. - if (InputSection *S = dyn_cast(Sec)) + if (S) Queue.push_back(S); } template void MarkLive::markSymbol(Symbol *Sym) { if (auto *D = dyn_cast_or_null(Sym)) if (auto *IS = dyn_cast_or_null(D->Section)) - enqueue(IS, D->Value); + enqueue(IS, D->Value, false); } // This is the main function of the garbage collector. @@ -239,7 +247,7 @@ template void MarkLive::run() { continue; if (isReserved(Sec) || Script->shouldKeep(Sec)) { - enqueue(Sec, 0); + enqueue(Sec, 0, false); } else if (isValidCIdentifier(Sec->Name)) { CNamedSections[Saver.save("__start_" + Sec->Name)].push_back(Sec); CNamedSections[Saver.save("__stop_" + Sec->Name)].push_back(Sec); @@ -259,7 +267,7 @@ template void MarkLive::run() { } for (InputSectionBase *IS : Sec.DependentSections) - enqueue(IS, 0); + enqueue(IS, 0, false); } } @@ -285,7 +293,7 @@ template void elf::markLive() { // The -gc-sections option works only for SHF_ALLOC sections // (sections that are memory-mapped at runtime). So we can // unconditionally make non-SHF_ALLOC sections alive except - // SHF_LINK_ORDER and SHT_REL/SHT_RELA sections. + // SHF_LINK_ORDER and SHT_REL/SHT_RELA sections and .debug sections. // // Usually, SHF_ALLOC sections are not removed even if they are // unreachable through relocations because reachability is not @@ -306,13 +314,19 @@ template void elf::markLive() { bool IsLinkOrder = (Sec->Flags & SHF_LINK_ORDER); bool IsRel = (Sec->Type == SHT_REL || Sec->Type == SHT_RELA); - if (!IsAlloc && !IsLinkOrder && !IsRel) + if (!IsAlloc && !IsLinkOrder && !IsRel && !Sec->Debug) Sec->Live = true; } // Follow the graph to mark all live sections. MarkLive().run(); + // Mark debug sections as live in any object file that has a live + // Regular or Merge section. + for (InputSectionBase *Sec : InputSections) + if (Sec->Debug && Sec->getFile()->HasLiveCodeOrData) + Sec->Live = true; + // Report garbage-collected sections. if (Config->PrintGcSections) for (InputSectionBase *Sec : InputSections) diff --git a/lld/test/ELF/linkerscript/comdat-gc.s b/lld/test/ELF/linkerscript/comdat-gc.s index 63dcf85..223ce93 100644 --- a/lld/test/ELF/linkerscript/comdat-gc.s +++ b/lld/test/ELF/linkerscript/comdat-gc.s @@ -8,6 +8,9 @@ # GC1: Name: .debug_line +# Add .ctors section so all debuginfo isn't GCed +.section .ctors,"ax",@progbits + .file 1 "test/ELF/linkerscript/comdat_gc.s" .section .text._Z3fooIiEvv,"axG",@progbits,_Z3fooIiEvv,comdat .loc 1 14 diff --git a/lld/test/ELF/linkerscript/debuginfo-gc.s b/lld/test/ELF/linkerscript/debuginfo-gc.s new file mode 100644 index 0000000..a991e5f --- /dev/null +++ b/lld/test/ELF/linkerscript/debuginfo-gc.s @@ -0,0 +1,14 @@ +# REQUIRES: x86 + +# RUN: llvm-mc -filetype=obj -triple=x86_64-unknown-linux %s -o %t +# RUN: llvm-mc -filetype=obj -triple=x86_64-unknown-linux %p/Inputs/comdat-gc.s -o %t1 +# RUN: echo "SECTIONS { .text : { *(.text*) } }" > %t.script +# RUN: ld.lld --gc-sections --script %t.script %t %t1 -o %t2 +# RUN: llvm-readobj -sections -symbols %t2 | FileCheck %s + +# CHECK-NOT: Name: .debug_line + +.file 1 "test/ELF/linkerscript/comdat_gc.s" +.section .text._Z3fooIiEvv,"axG",@progbits,_Z3fooIiEvv,comdat +.loc 1 14 + ret -- 2.7.4