From a5645513dba702216672bc31333e9c173b3a56c5 Mon Sep 17 00:00:00 2001 From: Nico Weber Date: Fri, 7 May 2021 17:10:05 -0400 Subject: [PATCH] [lld/mac] Implement -dead_strip Also adds support for live_support sections, no_dead_strip sections, .no_dead_strip symbols. Chromium Framework 345MB unstripped -> 250MB stripped (vs 290MB unstripped -> 236M stripped with ld64). Doing dead stripping is a bit faster than not, because so much less data needs to be processed: % ministat lld_* x lld_nostrip.txt + lld_strip.txt N Min Max Median Avg Stddev x 10 3.929414 4.07692 4.0269079 4.0089678 0.044214794 + 10 3.8129408 3.9025559 3.8670411 3.8642573 0.024779651 Difference at 95.0% confidence -0.144711 +/- 0.0336749 -3.60967% +/- 0.839989% (Student's t, pooled s = 0.0358398) This interacts with many parts of the linker. I tried to add test coverage for all added `isLive()` checks, so that some test will fail if any of them is removed. I checked that the test expectations for the most part match ld64's behavior (except for live-support-iterations.s, see the comment in the test). Interacts with: - debug info - export tries - import opcodes - flags like -exported_symbol(s_list) - -U / dynamic_lookup - mod_init_funcs, mod_term_funcs - weak symbol handling - unwind info - stubs - map files - -sectcreate - undefined, dylib, common, defined (both absolute and normal) symbols It's possible it interacts with more features I didn't think of, of course. I also did some manual testing: - check-llvm check-clang check-lld work with lld with this patch as host linker and -dead_strip enabled - Chromium still starts - Chromium's base_unittests still pass, including unwind tests Implemenation-wise, this is InputSection-based, so it'll work for object files with .subsections_via_symbols (which includes all object files generated by clang). I first based this on the COFF implementation, but later realized that things are more similar to ELF. I think it'd be good to refactor MarkLive.cpp to look more like the ELF part at some point, but I'd like to get a working state checked in first. Mechanical parts: - Rename canOmitFromOutput to wasCoalesced (no behavior change) since it really is for weak coalesced symbols - Add noDeadStrip to Defined, corresponding to N_NO_DEAD_STRIP (`.no_dead_strip` in asm) Fixes PR49276. Differential Revision: https://reviews.llvm.org/D103324 --- lld/MachO/CMakeLists.txt | 1 + lld/MachO/ConcatOutputSection.cpp | 3 +- lld/MachO/Config.h | 1 + lld/MachO/Driver.cpp | 12 +- lld/MachO/InputFiles.cpp | 23 +- lld/MachO/InputSection.h | 13 +- lld/MachO/MapFile.cpp | 8 +- lld/MachO/MarkLive.cpp | 189 ++++++ lld/MachO/MarkLive.h | 20 + lld/MachO/Options.td | 1 - lld/MachO/SymbolTable.cpp | 11 +- lld/MachO/SymbolTable.h | 2 +- lld/MachO/Symbols.cpp | 22 + lld/MachO/Symbols.h | 23 +- lld/MachO/SyntheticSections.cpp | 23 +- lld/MachO/SyntheticSections.h | 2 +- lld/MachO/UnwindInfoSection.cpp | 22 +- lld/MachO/UnwindInfoSection.h | 8 + lld/MachO/Writer.cpp | 3 +- .../MachO/Inputs/MacOSX.sdk/usr/lib/libc++abi.tbd | 2 +- lld/test/MachO/dead-strip.s | 737 +++++++++++++++++++++ lld/test/MachO/mh-header-link.s | 6 +- lld/test/MachO/sectcreate.s | 23 + llvm/utils/gn/secondary/lld/MachO/BUILD.gn | 1 + 24 files changed, 1098 insertions(+), 58 deletions(-) create mode 100644 lld/MachO/MarkLive.cpp create mode 100644 lld/MachO/MarkLive.h create mode 100644 lld/test/MachO/dead-strip.s diff --git a/lld/MachO/CMakeLists.txt b/lld/MachO/CMakeLists.txt index 1c93a88..a805e17 100644 --- a/lld/MachO/CMakeLists.txt +++ b/lld/MachO/CMakeLists.txt @@ -19,6 +19,7 @@ add_lld_library(lldMachO2 InputSection.cpp LTO.cpp MapFile.cpp + MarkLive.cpp ObjC.cpp OutputSection.cpp OutputSegment.cpp diff --git a/lld/MachO/ConcatOutputSection.cpp b/lld/MachO/ConcatOutputSection.cpp index fa47ccf..e9e78f9 100644 --- a/lld/MachO/ConcatOutputSection.cpp +++ b/lld/MachO/ConcatOutputSection.cpp @@ -299,7 +299,8 @@ void ConcatOutputSection::finalize() { r.referent = thunkInfo.sym = symtab->addDefined( thunkName, /*file=*/nullptr, thunkInfo.isec, /*value=*/0, /*size=*/thunkSize, /*isWeakDef=*/false, /*isPrivateExtern=*/true, - /*isThumb=*/false, /*isReferencedDynamically=*/false); + /*isThumb=*/false, /*isReferencedDynamically=*/false, + /*noDeadStrip=*/false); target->populateThunk(thunkInfo.isec, funcSym); finalizeOne(thunkInfo.isec); thunks.push_back(thunkInfo.isec); diff --git a/lld/MachO/Config.h b/lld/MachO/Config.h index da263ec..93e88cf 100644 --- a/lld/MachO/Config.h +++ b/lld/MachO/Config.h @@ -111,6 +111,7 @@ struct Configuration { llvm::StringRef thinLTOJobs; bool deadStripDylibs = false; bool demangle = false; + bool deadStrip = false; PlatformInfo platformInfo; NamespaceKind namespaceKind = NamespaceKind::twolevel; UndefinedSymbolTreatment undefinedSymbolTreatment = diff --git a/lld/MachO/Driver.cpp b/lld/MachO/Driver.cpp index b568609..7003533 100644 --- a/lld/MachO/Driver.cpp +++ b/lld/MachO/Driver.cpp @@ -10,6 +10,7 @@ #include "Config.h" #include "InputFiles.h" #include "LTO.h" +#include "MarkLive.h" #include "ObjC.h" #include "OutputSection.h" #include "OutputSegment.h" @@ -541,12 +542,15 @@ static void replaceCommonSymbols() { isec->flags = S_ZEROFILL; inputSections.push_back(isec); + // FIXME: CommonSymbol should store isReferencedDynamically, noDeadStrip + // and pass them on here. replaceSymbol(sym, sym->getName(), isec->file, isec, /*value=*/0, /*size=*/0, /*isWeakDef=*/false, /*isExternal=*/true, common->privateExtern, /*isThumb=*/false, - /*isReferencedDynamically=*/false); + /*isReferencedDynamically=*/false, + /*noDeadStrip=*/false); } } @@ -967,6 +971,9 @@ bool macho::link(ArrayRef argsArr, bool canExitEarly, depTracker = make(args.getLastArgValue(OPT_dependency_info)); + // Must be set before any InputSections and Symbols are created. + config->deadStrip = args.hasArg(OPT_dead_strip); + config->systemLibraryRoots = getSystemLibraryRoots(args); if (const char *path = getReproduceOption(args)) { // Note that --reproduce is a debug option so you can ignore it @@ -1285,6 +1292,9 @@ bool macho::link(ArrayRef argsArr, bool canExitEarly, } } + if (config->deadStrip) + markLive(); + // Write to an output file. if (target->wordSize == 8) writeResult(); diff --git a/lld/MachO/InputFiles.cpp b/lld/MachO/InputFiles.cpp index 6f71d1f..15dffab 100644 --- a/lld/MachO/InputFiles.cpp +++ b/lld/MachO/InputFiles.cpp @@ -488,10 +488,10 @@ static macho::Symbol *createDefined(const NList &sym, StringRef name, if (isWeakDefCanBeHidden) isPrivateExtern = true; - return symtab->addDefined(name, isec->file, isec, value, size, - sym.n_desc & N_WEAK_DEF, isPrivateExtern, - sym.n_desc & N_ARM_THUMB_DEF, - sym.n_desc & REFERENCED_DYNAMICALLY); + return symtab->addDefined( + name, isec->file, isec, value, size, sym.n_desc & N_WEAK_DEF, + isPrivateExtern, sym.n_desc & N_ARM_THUMB_DEF, + sym.n_desc & REFERENCED_DYNAMICALLY, sym.n_desc & N_NO_DEAD_STRIP); } assert(!isWeakDefCanBeHidden && @@ -499,7 +499,8 @@ static macho::Symbol *createDefined(const NList &sym, StringRef name, return make( name, isec->file, isec, value, size, sym.n_desc & N_WEAK_DEF, /*isExternal=*/false, /*isPrivateExtern=*/false, - sym.n_desc & N_ARM_THUMB_DEF, sym.n_desc & REFERENCED_DYNAMICALLY); + sym.n_desc & N_ARM_THUMB_DEF, sym.n_desc & REFERENCED_DYNAMICALLY, + sym.n_desc & N_NO_DEAD_STRIP); } // Absolute symbols are defined symbols that do not have an associated @@ -512,13 +513,15 @@ static macho::Symbol *createAbsolute(const NList &sym, InputFile *file, return symtab->addDefined(name, file, nullptr, sym.n_value, /*size=*/0, /*isWeakDef=*/false, sym.n_type & N_PEXT, sym.n_desc & N_ARM_THUMB_DEF, - /*isReferencedDynamically=*/false); + /*isReferencedDynamically=*/false, + sym.n_desc & N_NO_DEAD_STRIP); } return make(name, file, nullptr, sym.n_value, /*size=*/0, /*isWeakDef=*/false, /*isExternal=*/false, /*isPrivateExtern=*/false, sym.n_desc & N_ARM_THUMB_DEF, - /*isReferencedDynamically=*/false); + /*isReferencedDynamically=*/false, + sym.n_desc & N_NO_DEAD_STRIP); } template @@ -614,7 +617,7 @@ void ObjFile::parseSymbols(ArrayRef sectionHeaders, auto *nextIsec = make(*isec); nextIsec->data = isec->data.slice(symbolOffset); nextIsec->numRefs = 0; - nextIsec->canOmitFromOutput = false; + nextIsec->wasCoalesced = false; isec->data = isec->data.slice(0, symbolOffset); // By construction, the symbol will be at offset zero in the new @@ -640,6 +643,7 @@ OpaqueFile::OpaqueFile(MemoryBufferRef mb, StringRef segName, isec->segname = segName.take_front(16); const auto *buf = reinterpret_cast(mb.getBufferStart()); isec->data = {buf, mb.getBufferSize()}; + isec->live = true; subsections.push_back({{0, isec}}); } @@ -1027,7 +1031,8 @@ static macho::Symbol *createBitcodeSymbol(const lto::InputFile::Symbol &objSym, return symtab->addDefined(name, &file, /*isec=*/nullptr, /*value=*/0, /*size=*/0, objSym.isWeak(), isPrivateExtern, /*isThumb=*/false, - /*isReferencedDynamically=*/false); + /*isReferencedDynamically=*/false, + /*noDeadStrip=*/false); } BitcodeFile::BitcodeFile(MemoryBufferRef mbref) diff --git a/lld/MachO/InputSection.h b/lld/MachO/InputSection.h index 4655e33..032e9d0 100644 --- a/lld/MachO/InputSection.h +++ b/lld/MachO/InputSection.h @@ -9,6 +9,7 @@ #ifndef LLD_MACHO_INPUT_SECTION_H #define LLD_MACHO_INPUT_SECTION_H +#include "Config.h" #include "Relocations.h" #include "lld/Common/LLVM.h" @@ -47,17 +48,17 @@ public: // How many symbols refer to this InputSection. uint32_t numRefs = 0; - // True if this InputSection could not be written to the output file. - // With subsections_via_symbols, most symbol have its own InputSection, + // With subsections_via_symbols, most symbols have their own InputSection, // and for weak symbols (e.g. from inline functions), only the // InputSection from one translation unit will make it to the output, // while all copies in other translation units are coalesced into the // first and not copied to the output. - bool canOmitFromOutput = false; + bool wasCoalesced = false; - bool shouldOmitFromOutput() const { - return canOmitFromOutput && numRefs == 0; - } + bool isCoalescedWeak() const { return wasCoalesced && numRefs == 0; } + bool shouldOmitFromOutput() const { return !live || isCoalescedWeak(); } + + bool live = !config->deadStrip; ArrayRef data; std::vector relocs; diff --git a/lld/MachO/MapFile.cpp b/lld/MachO/MapFile.cpp index b52e509..37789e3 100644 --- a/lld/MachO/MapFile.cpp +++ b/lld/MachO/MapFile.cpp @@ -64,11 +64,9 @@ static std::vector getSymbols() { for (InputFile *file : inputFiles) if (isa(file)) for (Symbol *sym : file->symbols) { - if (sym == nullptr) - continue; - if (auto *d = dyn_cast(sym)) - if (d->isec && d->getFile() == file) { - assert(!d->isec->shouldOmitFromOutput() && + if (auto *d = dyn_cast_or_null(sym)) + if (d->isLive() && d->isec && d->getFile() == file) { + assert(!d->isec->isCoalescedWeak() && "file->symbols should store resolved symbols"); v.push_back(d); } diff --git a/lld/MachO/MarkLive.cpp b/lld/MachO/MarkLive.cpp new file mode 100644 index 0000000..b425906 --- /dev/null +++ b/lld/MachO/MarkLive.cpp @@ -0,0 +1,189 @@ +//===- MarkLive.cpp -------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "MarkLive.h" +#include "Config.h" +#include "OutputSegment.h" +#include "SymbolTable.h" +#include "Symbols.h" +#include "UnwindInfoSection.h" +#include "mach-o/compact_unwind_encoding.h" +#include "llvm/Support/TimeProfiler.h" + +namespace lld { +namespace macho { + +using namespace llvm; +using namespace llvm::MachO; + +// Set live bit on for each reachable chunk. Unmarked (unreachable) +// InputSections will be ignored by Writer, so they will be excluded +// from the final output. +void markLive() { + TimeTraceScope timeScope("markLive"); + + // We build up a worklist of sections which have been marked as live. We only + // push into the worklist when we discover an unmarked section, and we mark + // as we push, so sections never appear twice in the list. + SmallVector worklist; + + auto enqueue = [&](InputSection *s) { + if (s->live) + return; + s->live = true; + worklist.push_back(s); + }; + + auto addSym = [&](Symbol *s) { + s->used = true; + if (auto *d = dyn_cast(s)) + if (d->isec) + enqueue(d->isec); + }; + + // Add GC roots. + if (config->entry) + addSym(config->entry); + for (Symbol *sym : symtab->getSymbols()) { + if (auto *defined = dyn_cast(sym)) { + // -exported_symbol(s_list) + if (!config->exportedSymbols.empty() && + config->exportedSymbols.match(defined->getName())) { + // FIXME: Instead of doing this here, maybe the Driver code doing + // the matching should add them to explicitUndefineds? Then the + // explicitUndefineds code below would handle this automatically. + assert(!defined->privateExtern && + "should have been rejected by driver"); + addSym(defined); + continue; + } + + // public symbols explicitly marked .no_dead_strip + if (defined->referencedDynamically || defined->noDeadStrip) { + addSym(defined); + continue; + } + + // FIXME: When we implement these flags, make symbols from them GC roots: + // * -reexported_symbol(s_list) + // * -alias(-list) + // * -init + + // In dylibs and bundles, all external functions are GC roots. + // FIXME: -export_dynamic should enable this for executables too. + if (config->outputType != MH_EXECUTE && !defined->privateExtern) { + addSym(defined); + continue; + } + } + } + // -u symbols + for (Symbol *sym : config->explicitUndefineds) + if (auto *defined = dyn_cast(sym)) + addSym(defined); + // local symbols explicitly marked .no_dead_strip + for (const InputFile *file : inputFiles) + if (auto *objFile = dyn_cast(file)) + for (Symbol *sym : objFile->symbols) + if (auto *defined = dyn_cast_or_null(sym)) + if (!defined->isExternal() && defined->noDeadStrip) + addSym(defined); + if (auto *stubBinder = + dyn_cast_or_null(symtab->find("dyld_stub_binder"))) + addSym(stubBinder); + for (InputSection *isec : inputSections) { + // Sections marked no_dead_strip + if (isec->flags & S_ATTR_NO_DEAD_STRIP) { + enqueue(isec); + continue; + } + + // mod_init_funcs, mod_term_funcs sections + if (sectionType(isec->flags) == S_MOD_INIT_FUNC_POINTERS || + sectionType(isec->flags) == S_MOD_TERM_FUNC_POINTERS) { + enqueue(isec); + continue; + } + + // Dead strip runs before UnwindInfoSection handling so we need to keep + // __LD,__compact_unwind alive here. + // But that section contains absolute references to __TEXT,__text and + // keeps most code alive due to that. So we can't just enqueue() the + // section: We must skip the relocations for the functionAddress + // in each CompactUnwindEntry. + // See also scanEhFrameSection() in lld/ELF/MarkLive.cpp. + if (isec->segname == segment_names::ld && + isec->name == section_names::compactUnwind) { + isec->live = true; + const int compactUnwindEntrySize = + target->wordSize == 8 ? sizeof(CompactUnwindEntry) + : sizeof(CompactUnwindEntry); + for (const Reloc &r : isec->relocs) { + // This is the relocation for the address of the function itself. + // Ignore it, else these would keep everything alive. + if (r.offset % compactUnwindEntrySize == 0) + continue; + + if (auto *s = r.referent.dyn_cast()) + addSym(s); + else { + auto *referentIsec = r.referent.get(); + assert(!referentIsec->isCoalescedWeak()); + enqueue(referentIsec); + } + } + continue; + } + } + + do { + // Mark things reachable from GC roots as live. + while (!worklist.empty()) { + InputSection *s = worklist.pop_back_val(); + assert(s->live && "We mark as live when pushing onto the worklist!"); + + // Mark all symbols listed in the relocation table for this section. + for (const Reloc &r : s->relocs) { + if (auto *s = r.referent.dyn_cast()) { + addSym(s); + } else { + auto *referentIsec = r.referent.get(); + assert(!referentIsec->isCoalescedWeak()); + enqueue(referentIsec); + } + } + } + + // S_ATTR_LIVE_SUPPORT sections are live if they point _to_ a live section. + // Process them in a second pass. + for (InputSection *isec : inputSections) { + // FIXME: Check if copying all S_ATTR_LIVE_SUPPORT sections into a + // separate vector and only walking that here is faster. + if (!(isec->flags & S_ATTR_LIVE_SUPPORT) || isec->live) + continue; + + for (const Reloc &r : isec->relocs) { + bool referentLive; + if (auto *s = r.referent.dyn_cast()) + referentLive = s->isLive(); + else + referentLive = r.referent.get()->live; + if (referentLive) + enqueue(isec); + } + } + + // S_ATTR_LIVE_SUPPORT could have marked additional sections live, + // which in turn could mark additional S_ATTR_LIVE_SUPPORT sections live. + // Iterate. In practice, the second iteration won't mark additional + // S_ATTR_LIVE_SUPPORT sections live. + } while (!worklist.empty()); +} + +} // namespace macho +} // namespace lld diff --git a/lld/MachO/MarkLive.h b/lld/MachO/MarkLive.h new file mode 100644 index 0000000..4db657c --- /dev/null +++ b/lld/MachO/MarkLive.h @@ -0,0 +1,20 @@ +//===- MarkLive.h -----------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_MACHO_MARKLIVE_H +#define LLD_MACHO_MARKLIVE_H + +namespace lld { +namespace macho { + +void markLive(); + +} // namespace macho +} // namespace lld + +#endif // LLD_MACHO_MARKLIVE_H diff --git a/lld/MachO/Options.td b/lld/MachO/Options.td index 47c6d99..31c5a44 100644 --- a/lld/MachO/Options.td +++ b/lld/MachO/Options.td @@ -218,7 +218,6 @@ def grp_opts : OptionGroup<"opts">, HelpText<"OPTIMIZATIONS">; def dead_strip : Flag<["-"], "dead_strip">, HelpText<"Remove unreachable functions and data">, - Flags<[HelpHidden]>, Group; def order_file : Separate<["-"], "order_file">, MetaVarName<"">, diff --git a/lld/MachO/SymbolTable.cpp b/lld/MachO/SymbolTable.cpp index 1a9cc4e..9851885 100644 --- a/lld/MachO/SymbolTable.cpp +++ b/lld/MachO/SymbolTable.cpp @@ -46,7 +46,8 @@ Defined *SymbolTable::addDefined(StringRef name, InputFile *file, InputSection *isec, uint64_t value, uint64_t size, bool isWeakDef, bool isPrivateExtern, bool isThumb, - bool isReferencedDynamically) { + bool isReferencedDynamically, + bool noDeadStrip) { Symbol *s; bool wasInserted; bool overridesWeakDef = false; @@ -63,6 +64,7 @@ Defined *SymbolTable::addDefined(StringRef name, InputFile *file, // If one of them isn't private extern, the merged symbol isn't. defined->privateExtern &= isPrivateExtern; defined->referencedDynamically |= isReferencedDynamically; + defined->noDeadStrip |= noDeadStrip; // FIXME: Handle this for bitcode files. // FIXME: We currently only do this if both symbols are weak. @@ -70,7 +72,7 @@ Defined *SymbolTable::addDefined(StringRef name, InputFile *file, // case where !isWeakDef && defined->isWeakDef() right // requires some care and testing). if (isec) - isec->canOmitFromOutput = true; + isec->wasCoalesced = true; } return defined; @@ -89,7 +91,7 @@ Defined *SymbolTable::addDefined(StringRef name, InputFile *file, Defined *defined = replaceSymbol( s, name, file, isec, value, size, isWeakDef, /*isExternal=*/true, - isPrivateExtern, isThumb, isReferencedDynamically); + isPrivateExtern, isThumb, isReferencedDynamically, noDeadStrip); defined->overridesWeakDef = overridesWeakDef; return defined; } @@ -188,7 +190,8 @@ Defined *SymbolTable::addSynthetic(StringRef name, InputSection *isec, bool referencedDynamically) { Defined *s = addDefined(name, nullptr, isec, value, /*size=*/0, /*isWeakDef=*/false, isPrivateExtern, - /*isThumb=*/false, referencedDynamically); + /*isThumb=*/false, referencedDynamically, + /*noDeadStrip=*/false); s->includeInSymtab = includeInSymtab; return s; } diff --git a/lld/MachO/SymbolTable.h b/lld/MachO/SymbolTable.h index 462c84b..17f1ecb 100644 --- a/lld/MachO/SymbolTable.h +++ b/lld/MachO/SymbolTable.h @@ -40,7 +40,7 @@ public: Defined *addDefined(StringRef name, InputFile *, InputSection *, uint64_t value, uint64_t size, bool isWeakDef, bool isPrivateExtern, bool isThumb, - bool isReferencedDynamically); + bool isReferencedDynamically, bool noDeadStrip); Symbol *addUndefined(StringRef name, InputFile *, bool isWeakRef); diff --git a/lld/MachO/Symbols.cpp b/lld/MachO/Symbols.cpp index 853ec74..1f28ceb 100644 --- a/lld/MachO/Symbols.cpp +++ b/lld/MachO/Symbols.cpp @@ -31,7 +31,29 @@ uint64_t Symbol::getStubVA() const { return in.stubs->getVA(stubsIndex); } uint64_t Symbol::getGotVA() const { return in.got->getVA(gotIndex); } uint64_t Symbol::getTlvVA() const { return in.tlvPointers->getVA(gotIndex); } +bool Symbol::isLive() const { + if (isa(this) || isa(this)) + return used; + + if (auto *d = dyn_cast(this)) { + // Non-absolute symbols might be alive because their section is + // no_dead_strip or live_support. In that case, the section will know + // that it's live but `used` might be false. Non-absolute symbols always + // have to use the section's `live` bit as source of truth. + return d->isAbsolute() ? used : d->isec->live; + } + + assert(!isa(this) && + "replaceCommonSymbols() runs before dead code stripping, and isLive() " + "should only be called after dead code stripping"); + + // Assume any other kind of symbol is live. + return true; +} + uint64_t Defined::getVA() const { + assert(isLive() && "this should only be called for live symbols"); + if (isAbsolute()) return value; diff --git a/lld/MachO/Symbols.h b/lld/MachO/Symbols.h index 42a9aee..29eaad8 100644 --- a/lld/MachO/Symbols.h +++ b/lld/MachO/Symbols.h @@ -51,6 +51,8 @@ public: return {nameData, nameSize}; } + bool isLive() const; + virtual uint64_t getVA() const { return 0; } virtual uint64_t getFileOffset() const { @@ -96,7 +98,8 @@ public: protected: Symbol(Kind k, StringRefZ name, InputFile *file) : symbolKind(k), nameData(name.data), nameSize(name.size), file(file), - isUsedInRegularObj(!file || isa(file)) {} + isUsedInRegularObj(!file || isa(file)), + used(!config->deadStrip) {} Kind symbolKind; const char *nameData; @@ -105,19 +108,22 @@ protected: public: // True if this symbol was referenced by a regular (non-bitcode) object. - bool isUsedInRegularObj; + bool isUsedInRegularObj : 1; + + // True if an undefined or dylib symbol is used from a live section. + bool used : 1; }; class Defined : public Symbol { public: Defined(StringRefZ name, InputFile *file, InputSection *isec, uint64_t value, uint64_t size, bool isWeakDef, bool isExternal, bool isPrivateExtern, - bool isThumb, bool isReferencedDynamically) + bool isThumb, bool isReferencedDynamically, bool noDeadStrip) : Symbol(DefinedKind, name, file), isec(isec), value(value), size(size), overridesWeakDef(false), privateExtern(isPrivateExtern), includeInSymtab(true), thumb(isThumb), - referencedDynamically(isReferencedDynamically), weakDef(isWeakDef), - external(isExternal) { + referencedDynamically(isReferencedDynamically), + noDeadStrip(noDeadStrip), weakDef(isWeakDef), external(isExternal) { if (isec) isec->numRefs++; } @@ -156,7 +162,14 @@ public: // symbol table by tools like strip. In theory, this could be set on arbitrary // symbols in input object files. In practice, it's used solely for the // synthetic __mh_execute_header symbol. + // This is information for the static linker, and it's also written to the + // output file's symbol table for tools running later (such as `strip`). bool referencedDynamically : 1; + // Set on symbols that should not be removed by dead code stripping. + // Set for example on `__attribute__((used))` globals, or on some Objective-C + // metadata. This is information only for the static linker and not written + // to the output. + bool noDeadStrip : 1; private: const bool weakDef : 1; diff --git a/lld/MachO/SyntheticSections.cpp b/lld/MachO/SyntheticSections.cpp index a0ed37b..8179de7 100644 --- a/lld/MachO/SyntheticSections.cpp +++ b/lld/MachO/SyntheticSections.cpp @@ -469,11 +469,15 @@ void StubHelperSection::setup() { in.got->addEntry(stubBinder); inputSections.push_back(in.imageLoaderCache); + // Since this isn't in the symbol table or in any input file, the noDeadStrip + // argument doesn't matter. It's kept alive by ImageLoaderCacheSection() + // setting `live` to true on the backing InputSection. dyldPrivate = make("__dyld_private", nullptr, in.imageLoaderCache, 0, 0, /*isWeakDef=*/false, /*isExternal=*/false, /*isPrivateExtern=*/false, - /*isThumb=*/false, /*isReferencedDynamically=*/false); + /*isThumb=*/false, /*isReferencedDynamically=*/false, + /*noDeadStrip=*/false); } ImageLoaderCacheSection::ImageLoaderCacheSection() { @@ -483,6 +487,7 @@ ImageLoaderCacheSection::ImageLoaderCacheSection() { memset(arr, 0, target->wordSize); data = {arr, target->wordSize}; align = target->wordSize; + live = true; } LazyPointerSection::LazyPointerSection() @@ -571,7 +576,7 @@ void ExportSection::finalizeContents() { trieBuilder.setImageBase(in.header->addr); for (const Symbol *sym : symtab->getSymbols()) { if (const auto *defined = dyn_cast(sym)) { - if (defined->privateExtern) + if (defined->privateExtern || !defined->isLive()) continue; trieBuilder.addSymbol(*defined); hasWeakSymbol = hasWeakSymbol || sym->isWeakDef(); @@ -590,7 +595,7 @@ void FunctionStartsSection::finalizeContents() { uint64_t addr = in.header->addr; for (const Symbol *sym : symtab->getSymbols()) { if (const auto *defined = dyn_cast(sym)) { - if (!defined->isec || !isCodeSection(defined->isec)) + if (!defined->isec || !isCodeSection(defined->isec) || !defined->isLive()) continue; // TODO: Add support for thumbs, in that case // the lowest bit of nextAddr needs to be set to 1. @@ -667,6 +672,8 @@ void SymtabSection::emitStabs() { for (const SymtabEntry &entry : concat(localSymbols, externalSymbols)) { Symbol *sym = entry.sym; + assert(sym->isLive() && + "dead symbols should not be in localSymbols, externalSymbols"); if (auto *defined = dyn_cast(sym)) { if (defined->isAbsolute()) continue; @@ -729,12 +736,8 @@ void SymtabSection::finalizeContents() { for (const InputFile *file : inputFiles) { if (auto *objFile = dyn_cast(file)) { for (Symbol *sym : objFile->symbols) { - if (sym == nullptr) - continue; - // TODO: when we implement -dead_strip, we should filter out symbols - // that belong to dead sections. - if (auto *defined = dyn_cast(sym)) { - if (!defined->isExternal()) { + if (auto *defined = dyn_cast_or_null(sym)) { + if (!defined->isExternal() && defined->isLive()) { StringRef name = defined->getName(); if (!name.startswith("l") && !name.startswith("L")) addSymbol(localSymbols, sym); @@ -750,6 +753,8 @@ void SymtabSection::finalizeContents() { addSymbol(localSymbols, dyldPrivate); for (Symbol *sym : symtab->getSymbols()) { + if (!sym->isLive()) + continue; if (auto *defined = dyn_cast(sym)) { if (!defined->includeInSymtab) continue; diff --git a/lld/MachO/SyntheticSections.h b/lld/MachO/SyntheticSections.h index 5778b9e..be77c58 100644 --- a/lld/MachO/SyntheticSections.h +++ b/lld/MachO/SyntheticSections.h @@ -220,7 +220,7 @@ struct WeakBindingEntry { // other dylibs should coalesce to. // // 2) Weak bindings: These tell dyld that a given symbol reference should -// coalesce to a non-weak definition if one is found. Note that unlike in the +// coalesce to a non-weak definition if one is found. Note that unlike the // entries in the BindingSection, the bindings here only refer to these // symbols by name, but do not specify which dylib to load them from. class WeakBindingSection : public LinkEditSection { diff --git a/lld/MachO/UnwindInfoSection.cpp b/lld/MachO/UnwindInfoSection.cpp index f684f76..f629556 100644 --- a/lld/MachO/UnwindInfoSection.cpp +++ b/lld/MachO/UnwindInfoSection.cpp @@ -89,19 +89,11 @@ using namespace lld::macho; // compact_unwind_encoding.h for an overview of the format we are encoding // here. -// TODO(gkm): prune __eh_frame entries superseded by __unwind_info +// TODO(gkm): prune __eh_frame entries superseded by __unwind_info, PR50410 // TODO(gkm): how do we align the 2nd-level pages? using EncodingMap = llvm::DenseMap; -template struct CompactUnwindEntry { - Ptr functionAddress; - uint32_t functionLength; - compact_unwind_encoding_t encoding; - Ptr personality; - Ptr lsda; -}; - struct SecondLevelPage { uint32_t kind; size_t entryIndex; @@ -146,6 +138,11 @@ void UnwindInfoSectionImpl::prepareRelocations(InputSection *isec) { assert(!isec->shouldOmitFromOutput() && "__compact_unwind section should not be omitted"); + // FIXME: This could skip relocations for CompactUnwindEntries that + // point to dead-stripped functions. That might save some amount of + // work. But since there are usually just few personality functions + // that are referenced from many places, at least some of them likely + // live, it wouldn't reduce number of got entries. for (Reloc &r : isec->relocs) { assert(target->hasAttr(r.type, RelocAttrBits::UNSIGNED)); if (r.offset % sizeof(CompactUnwindEntry) != @@ -177,17 +174,20 @@ void UnwindInfoSectionImpl::prepareRelocations(InputSection *isec) { } if (auto *referentIsec = r.referent.dyn_cast()) { - assert(!referentIsec->shouldOmitFromOutput()); + assert(!referentIsec->isCoalescedWeak()); // Personality functions can be referenced via section relocations // if they live in the same object file. Create placeholder synthetic // symbols for them in the GOT. Symbol *&s = personalityTable[{referentIsec, r.addend}]; if (s == nullptr) { + // This runs after dead stripping, so the noDeadStrip argument does not + // matter. s = make("", /*file=*/nullptr, referentIsec, r.addend, /*size=*/0, /*isWeakDef=*/false, /*isExternal=*/false, /*isPrivateExtern=*/false, - /*isThumb=*/false, /*isReferencedDynamically=*/false); + /*isThumb=*/false, /*isReferencedDynamically=*/false, + /*noDeadStrip=*/false); in.got->addEntry(s); } r.referent = s; diff --git a/lld/MachO/UnwindInfoSection.h b/lld/MachO/UnwindInfoSection.h index 3f20245..d530cdd 100644 --- a/lld/MachO/UnwindInfoSection.h +++ b/lld/MachO/UnwindInfoSection.h @@ -20,6 +20,14 @@ namespace lld { namespace macho { +template struct CompactUnwindEntry { + Ptr functionAddress; + uint32_t functionLength; + compact_unwind_encoding_t encoding; + Ptr personality; + Ptr lsda; +}; + class UnwindInfoSection : public SyntheticSection { public: bool isNeeded() const override { return compactUnwindSection != nullptr; } diff --git a/lld/MachO/Writer.cpp b/lld/MachO/Writer.cpp index da2ae69..477ccb7 100644 --- a/lld/MachO/Writer.cpp +++ b/lld/MachO/Writer.cpp @@ -609,9 +609,10 @@ void Writer::scanSymbols() { TimeTraceScope timeScope("Scan symbols"); for (const Symbol *sym : symtab->getSymbols()) { if (const auto *defined = dyn_cast(sym)) { - if (defined->overridesWeakDef) + if (defined->overridesWeakDef && defined->isLive()) in.weakBinding->addNonWeakDefinition(defined); } else if (const auto *dysym = dyn_cast(sym)) { + // This branch intentionally doesn't check isLive(). if (dysym->isDynamicLookup()) continue; dysym->getFile()->refState = diff --git a/lld/test/MachO/Inputs/MacOSX.sdk/usr/lib/libc++abi.tbd b/lld/test/MachO/Inputs/MacOSX.sdk/usr/lib/libc++abi.tbd index 1be7f3d..2f2e79d0 100644 --- a/lld/test/MachO/Inputs/MacOSX.sdk/usr/lib/libc++abi.tbd +++ b/lld/test/MachO/Inputs/MacOSX.sdk/usr/lib/libc++abi.tbd @@ -6,5 +6,5 @@ install-name: '/usr/lib/libc++abi.dylib' current-version: 1281 exports: - archs: [ i386, x86_64, arm64 ] - symbols: [ ___gxx_personality_v0 ] + symbols: [ ___cxa_allocate_exception, ___cxa_begin_catch, ___cxa_end_catch, ___cxa_throw, ___gxx_personality_v0, __ZTIi ] ... diff --git a/lld/test/MachO/dead-strip.s b/lld/test/MachO/dead-strip.s new file mode 100644 index 0000000..c117d65 --- /dev/null +++ b/lld/test/MachO/dead-strip.s @@ -0,0 +1,737 @@ +# REQUIRES: x86 + +# RUN: rm -rf %t; split-file %s %t + +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-macos \ +# RUN: %t/basics.s -o %t/basics.o + +## Check that .private_extern symbols are marked as local in the symbol table +## and aren't in the export trie. +## Dead-stripped symbols should also not be in a map file output. +# RUN: %lld -lSystem -dead_strip -map %t/map -u _ref_private_extern_u \ +# RUN: %t/basics.o -o %t/basics +# RUN: llvm-objdump --syms --section-headers %t/basics | \ +# RUN: FileCheck --check-prefix=EXEC --implicit-check-not _unref %s +# RUN: llvm-objdump --macho --section=__DATA,__ref_section \ +# RUN: --exports-trie --indirect-symbols %t/basics | \ +# RUN: FileCheck --check-prefix=EXECDATA --implicit-check-not _unref %s +# RUN: llvm-otool -l %t/basics | grep -q 'segname __PAGEZERO' +# RUN: FileCheck --check-prefix=MAP --implicit-check-not _unref %s < %t/map +# EXEC-LABEL: Sections: +# EXEC-LABEL: Name +# EXEC-NEXT: __text +# EXEC-NEXT: __got +# EXEC-NEXT: __ref_section +# EXEC-NEXT: __common +# EXEC-LABEL: SYMBOL TABLE: +# EXEC-NEXT: l {{.*}} _ref_data +# EXEC-NEXT: l {{.*}} _ref_local +# EXEC-NEXT: l {{.*}} _ref_from_no_dead_strip_globl +# EXEC-NEXT: l {{.*}} _no_dead_strip_local +# EXEC-NEXT: l {{.*}} _ref_from_no_dead_strip_local +# EXEC-NEXT: l {{.*}} _ref_private_extern_u +# EXEC-NEXT: l {{.*}} _main +# EXEC-NEXT: l {{.*}} _ref_private_extern +# EXEC-NEXT: g {{.*}} _ref_com +# EXEC-NEXT: g {{.*}} _no_dead_strip_globl +# EXEC-NEXT: g {{.*}} __mh_execute_header +# EXECDATA-LABEL: Indirect symbols +# EXECDATA-NEXT: name +# EXECDATA-NEXT: _ref_com +# EXECDATA-LABEL: Contents of (__DATA,__ref_section) section +# EXECDATA-NEXT: 04 00 00 00 00 00 00 00 05 00 00 00 00 00 00 00 +# EXECDATA-LABEL: Exports trie: +# EXECDATA-NEXT: __mh_execute_header +# EXECDATA-NEXT: _ref_com +# EXECDATA-NEXT: _no_dead_strip_globl +# MAP: _main + +# RUN: %lld -dylib -dead_strip -u _ref_private_extern_u %t/basics.o -o %t/basics.dylib +# RUN: llvm-objdump --syms %t/basics.dylib | \ +# RUN: FileCheck --check-prefix=DYLIB --implicit-check-not _unref %s +# RUN: %lld -bundle -dead_strip -u _ref_private_extern_u %t/basics.o -o %t/basics.dylib +# RUN: llvm-objdump --syms %t/basics.dylib | \ +# RUN: FileCheck --check-prefix=DYLIB --implicit-check-not _unref %s +# DYLIB-LABEL: SYMBOL TABLE: +# DYLIB-NEXT: l {{.*}} _ref_data +# DYLIB-NEXT: l {{.*}} _ref_local +# DYLIB-NEXT: l {{.*}} _ref_from_no_dead_strip_globl +# DYLIB-NEXT: l {{.*}} _no_dead_strip_local +# DYLIB-NEXT: l {{.*}} _ref_from_no_dead_strip_local +# DYLIB-NEXT: l {{.*}} _ref_private_extern_u +# DYLIB-NEXT: l {{.*}} _ref_private_extern +# DYLIB-NEXT: g {{.*}} _ref_com +# DYLIB-NEXT: g {{.*}} _unref_com +# DYLIB-NEXT: g {{.*}} _unref_extern +# DYLIB-NEXT: g {{.*}} _no_dead_strip_globl + +## Absolute symbol handling. +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-macos \ +# RUN: %t/abs.s -o %t/abs.o +# RUN: %lld -lSystem -dead_strip %t/abs.o -o %t/abs +# RUN: llvm-objdump --macho --syms --exports-trie %t/abs | \ +# RUN: FileCheck --check-prefix=ABS %s +#ABS-LABEL: SYMBOL TABLE: +#ABS-NEXT: g {{.*}} _main +#ABS-NEXT: g *ABS* _abs1 +#ABS-NEXT: g {{.*}} __mh_execute_header +#ABS-LABEL: Exports trie: +#ABS-NEXT: __mh_execute_header +#ABS-NEXT: _main +#ABS-NEXT: _abs1 [absolute] + +## Check that symbols from -exported_symbol(s_list) are preserved. +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-macos \ +# RUN: %t/exported-symbol.s -o %t/exported-symbol.o +# RUN: %lld -lSystem -dead_strip -exported_symbol _my_exported_symbol \ +# RUN: %t/exported-symbol.o -o %t/exported-symbol +# RUN: llvm-objdump --syms %t/exported-symbol | \ +# RUN: FileCheck --check-prefix=EXPORTEDSYMBOL --implicit-check-not _unref %s +# EXPORTEDSYMBOL-LABEL: SYMBOL TABLE: +# EXPORTEDSYMBOL-NEXT: l {{.*}} _main +# EXPORTEDSYMBOL-NEXT: l {{.*}} __mh_execute_header +# EXPORTEDSYMBOL-NEXT: g {{.*}} _my_exported_symbol + +## Check that mod_init_funcs and mod_term_funcs are not stripped. +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-macos \ +# RUN: %t/mod-funcs.s -o %t/mod-funcs.o +# RUN: %lld -lSystem -dead_strip %t/mod-funcs.o -o %t/mod-funcs +# RUN: llvm-objdump --syms %t/mod-funcs | \ +# RUN: FileCheck --check-prefix=MODFUNCS --implicit-check-not _unref %s +# MODFUNCS-LABEL: SYMBOL TABLE: +# MODFUNCS-NEXT: l {{.*}} _ref_from_init +# MODFUNCS-NEXT: l {{.*}} _ref_init +# MODFUNCS-NEXT: l {{.*}} _ref_from_term +# MODFUNCS-NEXT: l {{.*}} _ref_term +# MODFUNCS-NEXT: g {{.*}} _main +# MODFUNCS-NEXT: g {{.*}} __mh_execute_header + +## Check that DylibSymbols in dead subsections are stripped: They should +## not be in the import table and should have no import stubs. +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-macos \ +# RUN: %t/dylib.s -o %t/dylib.o +# RUN: %lld -dylib -dead_strip %t/dylib.o -o %t/dylib.dylib +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-macos \ +# RUN: %t/strip-dylib-ref.s -o %t/strip-dylib-ref.o +# RUN: %lld -lSystem -dead_strip %t/strip-dylib-ref.o %t/dylib.dylib \ +# RUN: -o %t/strip-dylib-ref -U _ref_undef_fun -U _unref_undef_fun +# RUN: llvm-objdump --syms --bind --lazy-bind --weak-bind %t/strip-dylib-ref | \ +# RUN: FileCheck --check-prefix=STRIPDYLIB --implicit-check-not _unref %s +# STRIPDYLIB: SYMBOL TABLE: +# STRIPDYLIB-NEXT: l {{.*}} __dyld_private +# STRIPDYLIB-NEXT: g {{.*}} _main +# STRIPDYLIB-NEXT: g {{.*}} __mh_execute_header +# STRIPDYLIB-NEXT: *UND* _ref_undef_fun +# STRIPDYLIB-NEXT: *UND* dyld_stub_binder +# STRIPDYLIB-NEXT: *UND* _ref_dylib_fun +# STRIPDYLIB: Bind table: +# STRIPDYLIB: Lazy bind table: +# STRIPDYLIB: __DATA __la_symbol_ptr {{.*}} flat-namespace _ref_undef_fun +# STRIPDYLIB: __DATA __la_symbol_ptr {{.*}} dylib _ref_dylib_fun +# STRIPDYLIB: Weak bind table: +## Stubs smoke check: There should be two stubs entries, not four, but we +## don't verify that they belong to _ref_undef_fun and _ref_dylib_fun. +# RUN: llvm-objdump -d --section=__stubs --section=__stub_helper \ +# RUN: %t/strip-dylib-ref |FileCheck --check-prefix=STUBS %s +# STUBS-LABEL: <__stubs>: +# STUBS-NEXT: jmpq +# STUBS-NEXT: jmpq +# STUBS-NOT: jmpq +# STUBS-LABEL: <__stub_helper>: +# STUBS: pushq $0 +# STUBS: jmp +# STUBS: jmp +# STUBS-NOT: jmp +## An undefined symbol referenced from a dead-stripped function shouldn't +## produce a diagnostic: +# RUN: %lld -lSystem -dead_strip %t/strip-dylib-ref.o %t/dylib.dylib \ +# RUN: -o %t/strip-dylib-ref -U _ref_undef_fun + +## S_ATTR_LIVE_SUPPORT tests. +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-macos \ +# RUN: %t/live-support.s -o %t/live-support.o +# RUN: %lld -lSystem -dead_strip %t/live-support.o %t/dylib.dylib \ +# RUN: -U _ref_undef_fun -U _unref_undef_fun -o %t/live-support +# RUN: llvm-objdump --syms %t/live-support | \ +# RUN: FileCheck --check-prefix=LIVESUPP --implicit-check-not _unref %s +# LIVESUPP-LABEL: SYMBOL TABLE: +# LIVESUPP-NEXT: l {{.*}} _ref_ls_fun_fw +# LIVESUPP-NEXT: l {{.*}} _ref_ls_fun_bw +# LIVESUPP-NEXT: l {{.*}} _ref_ls_dylib_fun +# LIVESUPP-NEXT: l {{.*}} _ref_ls_undef_fun +# LIVESUPP-NEXT: l {{.*}} __dyld_private +# LIVESUPP-NEXT: g {{.*}} _main +# LIVESUPP-NEXT: g {{.*}} _bar +# LIVESUPP-NEXT: g {{.*}} _foo +# LIVESUPP-NEXT: g {{.*}} __mh_execute_header +# LIVESUPP-NEXT: *UND* _ref_undef_fun +# LIVESUPP-NEXT: *UND* dyld_stub_binder +# LIVESUPP-NEXT: *UND* _ref_dylib_fun + +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-macos \ +# RUN: %t/live-support-iterations.s -o %t/live-support-iterations.o +# RUN: %lld -lSystem -dead_strip %t/live-support-iterations.o \ +# RUN: -o %t/live-support-iterations +# RUN: llvm-objdump --syms %t/live-support-iterations | \ +# RUN: FileCheck --check-prefix=LIVESUPP2 --implicit-check-not _unref %s +# LIVESUPP2-LABEL: SYMBOL TABLE: +# LIVESUPP2-NEXT: l {{.*}} _bar +# LIVESUPP2-NEXT: l {{.*}} _foo_refd +# LIVESUPP2-NEXT: l {{.*}} _bar_refd +# LIVESUPP2-NEXT: l {{.*}} _baz +# LIVESUPP2-NEXT: l {{.*}} _baz_refd +# LIVESUPP2-NEXT: l {{.*}} _foo +# LIVESUPP2-NEXT: g {{.*}} _main +# LIVESUPP2-NEXT: g {{.*}} __mh_execute_header + +## Dead stripping should not remove the __TEXT,__unwind_info +## and __TEXT,__gcc_except_tab functions, but it should still +## remove the unreferenced function __Z5unref. +## The reference to ___gxx_personality_v0 should also not be +## stripped. +## (Need to use darwin19.0.0 to make -mc emit __LD,__compact_unwind.) +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin19.0.0 \ +# RUN: %t/unwind.s -o %t/unwind.o +# RUN: %lld -lc++ -lSystem -dead_strip %t/unwind.o -o %t/unwind +# RUN: llvm-objdump --syms %t/unwind | \ +# RUN: FileCheck --check-prefix=UNWIND --implicit-check-not unref %s +# RUN: llvm-otool -l %t/unwind | grep -q 'sectname __unwind_info' +# RUN: llvm-otool -l %t/unwind | grep -q 'sectname __gcc_except_tab' +# UNWIND-LABEL: SYMBOL TABLE: +# UNWIND-NEXT: l O __TEXT,__gcc_except_tab GCC_except_table1 +# UNWIND-NEXT: l O __DATA,__data __dyld_private +# UNWIND-NEXT: g F __TEXT,__text _main +# UNWIND-NEXT: g F __TEXT,__text __mh_execute_header +# UNWIND-NEXT: *UND* ___cxa_allocate_exception +# UNWIND-NEXT: *UND* ___cxa_end_catch +# UNWIND-NEXT: *UND* __ZTIi +# UNWIND-NEXT: *UND* ___cxa_throw +# UNWIND-NEXT: *UND* ___gxx_personality_v0 +# UNWIND-NEXT: *UND* ___cxa_begin_catch +# UNWIND-NEXT: *UND* dyld_stub_binder + +## If a dead stripped function has a strong ref to a dylib symbol but +## a live function only a weak ref, the dylib is still not a WEAK_DYLIB. +## This matches ld64. +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-macos \ +# RUN: %t/weak-ref.s -o %t/weak-ref.o +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-macos \ +# RUN: %t/strong-dead-ref.s -o %t/strong-dead-ref.o +# RUN: %lld -lSystem -dead_strip %t/weak-ref.o %t/strong-dead-ref.o \ +# RUN: %t/dylib.dylib -o %t/weak-ref +# RUN: llvm-otool -l %t/weak-ref | FileCheck -DDIR=%t --check-prefix=WEAK %s +# WEAK: cmd LC_LOAD_DYLIB +# WEAK-NEXT: cmdsize +# WEAK-NEXT: name /usr/lib/libSystem.dylib +# WEAK: cmd LC_LOAD_DYLIB +# WEAK-NEXT: cmdsize +# WEAK-NEXT: name [[DIR]]/dylib.dylib + +## A strong symbol that would override a weak import does not emit the +## "this overrides a weak import" opcode if it is dead-stripped. +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-macos \ +# RUN: %t/weak-dylib.s -o %t/weak-dylib.o +# RUN: %lld -dylib -dead_strip %t/weak-dylib.o -o %t/weak-dylib.dylib +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-macos \ +# RUN: %t/dead-weak-override.s -o %t/dead-weak-override.o +# RUN: %lld -dead_strip %t/dead-weak-override.o %t/weak-dylib.dylib \ +# RUN: -o %t/dead-weak-override +# RUN: llvm-objdump --macho --weak-bind --private-header \ +# RUN: %t/dead-weak-override | FileCheck --check-prefix=DEADWEAK %s +# DEADWEAK-NOT: WEAK_DEFINES +# DEADWEAK: Weak bind table: +# DEADWEAK: segment section address type addend symbol +# DEADWEAK-NOT: strong _weak_in_dylib + +## Stripped symbols should not be in the debug info stabs entries. +# RUN: llvm-mc -g -filetype=obj -triple=x86_64-apple-macos \ +# RUN: %t/debug.s -o %t/debug.o +# RUN: %lld -lSystem -dead_strip %t/debug.o -o %t/debug +# RUN: dsymutil -s %t/debug | FileCheck --check-prefix=EXECSTABS %s +# EXECSTABS-NOT: N_FUN {{.*}} '_unref' +# EXECSTABS: N_FUN {{.*}} '_main' +# EXECSTABS-NOT: N_FUN {{.*}} '_unref' + +#--- basics.s +.comm _ref_com, 1 +.comm _unref_com, 1 + +.section __DATA,__unref_section +_unref_data: + .quad 4 + +l_unref_data: + .quad 5 + +## Referenced by no_dead_strip == S_ATTR_NO_DEAD_STRIP +.section __DATA,__ref_section,regular,no_dead_strip + +## Referenced because in no_dead_strip section. +_ref_data: + .quad 4 + +## This is a local symbol so it's not in the symbol table, but +## it is still in the section data. +l_ref_data: + .quad 5 + +.text + +# Exported symbols should not be stripped from dylibs +# or bundles, but they should be stripped from executables. +.globl _unref_extern +_unref_extern: + callq _ref_local + retq + +# Unreferenced local symbols should be stripped. +_unref_local: + retq + +# Same for unreferenced private externs. +.globl _unref_private_extern +.private_extern _unref_private_extern +_unref_private_extern: + # This shouldn't create an indirect symbol since it's + # a reference from a dead function. + movb _unref_com@GOTPCREL(%rip), %al + retq + +# Referenced local symbols should not be stripped. +_ref_local: + callq _ref_private_extern + retq + +# Same for referenced private externs. +# This one is referenced by a relocation. +.globl _ref_private_extern +.private_extern _ref_private_extern +_ref_private_extern: + retq + +# This one is referenced by a -u flag. +.globl _ref_private_extern_u +.private_extern _ref_private_extern_u +_ref_private_extern_u: + retq + +# Entry point should not be stripped for executables, even if hidden. +# For shared libraries this is stripped since it's just a regular hidden +# symbol there. +.globl _main +.private_extern _main +_main: + movb _ref_com@GOTPCREL(%rip), %al + callq _ref_local + retq + +# Things marked no_dead_strip should not be stripped either. +# (clang emits this e.g. for `__attribute__((used))` globals.) +# Both for .globl symbols... +.globl _no_dead_strip_globl +.no_dead_strip _no_dead_strip_globl +_no_dead_strip_globl: + callq _ref_from_no_dead_strip_globl + retq +_ref_from_no_dead_strip_globl: + retq + +# ...and for locals. +.no_dead_strip _no_dead_strip_local +_no_dead_strip_local: + callq _ref_from_no_dead_strip_local + retq +_ref_from_no_dead_strip_local: + retq + +.subsections_via_symbols + +#--- exported-symbol.s +.text + +.globl _unref_symbol +_unref_symbol: + retq + +.globl _my_exported_symbol +_my_exported_symbol: + retq + +.globl _main +_main: + retq + +.subsections_via_symbols + +#--- abs.s +.globl _abs1, _abs2, _abs3 + +.no_dead_strip _abs1 +_abs1 = 1 +_abs2 = 2 +_abs3 = 3 + +.section __DATA,__foo,regular,no_dead_strip +# Absolute symbols are not in a section, so the no_dead_strip +# on the section above has no effect. +.globl _abs4 +_abs4 = 4 + +.text +.globl _main +_main: + # This is relaxed away, so there's no relocation here and + # _abs3 isn't in the exported symbol table. + mov _abs3, %rax + retq + +.subsections_via_symbols + +#--- mod-funcs.s +## Roughly based on `clang -O2 -S` output for `struct A { A(); ~A(); }; A a;` +## for mod_init_funcs. mod_term_funcs then similar to that. +.section __TEXT,__StaticInit,regular,pure_instructions + +__unref: + retq + +_ref_from_init: + retq + +_ref_init: + callq _ref_from_init + retq + +_ref_from_term: + retq + +_ref_term: + callq _ref_from_term + retq + +.globl _main +_main: + retq + +.section __DATA,__mod_init_func,mod_init_funcs +.quad _ref_init + +.section __DATA,__mod_term_func,mod_term_funcs +.quad _ref_term + +.subsections_via_symbols + +#--- dylib.s +.text + +.globl _ref_dylib_fun +_ref_dylib_fun: + retq + +.globl _unref_dylib_fun +_unref_dylib_fun: + retq + +.subsections_via_symbols + +#--- strip-dylib-ref.s +.text + +_unref: + callq _ref_dylib_fun + callq _unref_dylib_fun + callq _ref_undef_fun + callq _unref_undef_fun + retq + +.globl _main +_main: + callq _ref_dylib_fun + callq _ref_undef_fun + retq + +.subsections_via_symbols + +#--- live-support.s +## In practice, live_support is used for instruction profiling +## data and asan. (Also for __eh_frame, but that needs special handling +## in the linker anyways.) +## This test isn't based on anything happening in real code though. +.section __TEXT,__ref_ls_fw,regular,live_support +_ref_ls_fun_fw: + # This is called by _main and is kept alive by normal + # forward liveness propagation, The live_support attribute + # does nothing in this case. + retq + +.section __TEXT,__unref_ls_fw,regular,live_support +_unref_ls_fun_fw: + retq + +.section __TEXT,__ref_ls_bw,regular,live_support +_ref_ls_fun_bw: + # This _calls_ something that's alive but isn't referenced itself. This is + # kept alive only due to this being in a live_support section. + callq _foo + + # _bar on the other hand is kept alive since it's called from here. + callq _bar + retq + +## Kept alive by a live symbol form a dynamic library. +_ref_ls_dylib_fun: + callq _ref_dylib_fun + retq + +## Kept alive by a live undefined symbol. +_ref_ls_undef_fun: + callq _ref_undef_fun + retq + +## All symbols in this live_support section reference dead symbols +## and are hence dead themselves. +.section __TEXT,__unref_ls_bw,regular,live_support +_unref_ls_fun_bw: + callq _unref + retq + +_unref_ls_dylib_fun_bw: + callq _unref_dylib_fun + retq + +_unref_ls_undef_fun_bw: + callq _unref_undef_fun + retq + +.text +.globl _unref +_unref: + retq + +.globl _bar +_bar: + retq + +.globl _foo +_foo: + callq _ref_ls_fun_fw + retq + +.globl _main +_main: + callq _ref_ls_fun_fw + callq _foo + callq _ref_dylib_fun + callq _ref_undef_fun + retq + +.subsections_via_symbols + +#--- live-support-iterations.s +.section __TEXT,_ls,regular,live_support + +## This is a live_support subsection that only becomes +## live after _foo below is processed. This means the algorithm of +## 1. mark things reachable from gc roots live +## 2. go through live sections and mark the ones live pointing to +## live symbols or sections +## needs more than one iteration, since _bar won't be live when step 2 +## runs for the first time. +## (ld64 gets this wrong -- it has different output based on if _bar is +## before _foo or after it.) +_bar: + callq _foo_refd + callq _bar_refd + retq + +## Same here. This is maybe more interesting since it references a live_support +## symbol instead of a "normal" symbol. +_baz: + callq _foo_refd + callq _baz_refd + retq + +_foo: + callq _main + callq _foo_refd + retq + +## Test no_dead_strip on a symbol in a live_support section. +## ld64 ignores this, but that doesn't look intentional. So lld honors it. +.no_dead_strip +_quux: + retq + + +.text +.globl _main +_main: + movq $0, %rax + retq + +_foo_refd: + retq + +_bar_refd: + retq + +_baz_refd: + retq + +.subsections_via_symbols + +#--- unwind.s +## This is the output of `clang -O2 -S throw.cc` where throw.cc +## looks like this: +## void unref() {} +## int main() { +## try { +## throw 0; +## } catch (int i) { +## return i; +## } +## } +.section __TEXT,__text,regular,pure_instructions + +.globl __Z5unrefv +.p2align 4, 0x90 +__Z5unrefv: +.cfi_startproc + pushq %rbp + .cfi_def_cfa_offset 16 + .cfi_offset %rbp, -16 + movq %rsp, %rbp + .cfi_def_cfa_register %rbp + popq %rbp + retq + .cfi_endproc + +.globl _main +.p2align 4, 0x90 +_main: +Lfunc_begin0: + .cfi_startproc + .cfi_personality 155, ___gxx_personality_v0 + .cfi_lsda 16, Lexception0 + pushq %rbp + .cfi_def_cfa_offset 16 + .cfi_offset %rbp, -16 + movq %rsp, %rbp + .cfi_def_cfa_register %rbp + pushq %rbx + pushq %rax + .cfi_offset %rbx, -24 + movl $4, %edi + callq ___cxa_allocate_exception + movl $0, (%rax) +Ltmp0: + movq __ZTIi@GOTPCREL(%rip), %rsi + movq %rax, %rdi + xorl %edx, %edx + callq ___cxa_throw +Ltmp1: + ud2 +LBB1_2: +Ltmp2: + movq %rax, %rdi + callq ___cxa_begin_catch + movl (%rax), %ebx + callq ___cxa_end_catch + movl %ebx, %eax + addq $8, %rsp + popq %rbx + popq %rbp + retq +Lfunc_end0: +.cfi_endproc + +.section __TEXT,__gcc_except_tab +.p2align 2 +GCC_except_table1: +Lexception0: + .byte 255 ## @LPStart Encoding = omit + .byte 155 ## @TType Encoding = indirect pcrel sdata4 + .uleb128 Lttbase0-Lttbaseref0 +Lttbaseref0: + .byte 1 ## Call site Encoding = uleb128 + .uleb128 Lcst_end0-Lcst_begin0 +Lcst_begin0: + .uleb128 Lfunc_begin0-Lfunc_begin0 ## >> Call Site 1 << + .uleb128 Ltmp0-Lfunc_begin0 ## Call between Lfunc_begin0 and Ltmp0 + .byte 0 ## has no landing pad + .byte 0 ## On action: cleanup + .uleb128 Ltmp0-Lfunc_begin0 ## >> Call Site 2 << + .uleb128 Ltmp1-Ltmp0 ## Call between Ltmp0 and Ltmp1 + .uleb128 Ltmp2-Lfunc_begin0 ## jumps to Ltmp2 + .byte 1 ## On action: 1 + .uleb128 Ltmp1-Lfunc_begin0 ## >> Call Site 3 << + .uleb128 Lfunc_end0-Ltmp1 ## Call between Ltmp1 and Lfunc_end0 + .byte 0 ## has no landing pad + .byte 0 ## On action: cleanup +Lcst_end0: + .byte 1 ## >> Action Record 1 << + ## Catch TypeInfo 1 + .byte 0 ## No further actions + .p2align 2 + ## >> Catch TypeInfos << + .long __ZTIi@GOTPCREL+4 ## TypeInfo 1 +Lttbase0: + .p2align 2 + ## -- End function +.subsections_via_symbols + +#--- weak-ref.s +.text +.weak_reference _ref_dylib_fun +.globl _main +_main: + callq _ref_dylib_fun + retq + +.subsections_via_symbols + +#--- strong-dead-ref.s +.text +.globl _unref_dylib_fun +_unref: + callq _unref_dylib_fun + retq + +.subsections_via_symbols + +#--- weak-dylib.s +.text +.globl _weak_in_dylib +.weak_definition _weak_in_dylib +_weak_in_dylib: + retq + +.subsections_via_symbols + +#--- dead-weak-override.s + +## Overrides the _weak_in_dylib symbol in weak-dylib, but is dead stripped. +.text + +#.no_dead_strip _weak_in_dylib +.globl _weak_in_dylib +_weak_in_dylib: + retq + +.globl _main +_main: + retq + +.subsections_via_symbols + +#--- debug.s +.text +.globl _unref +_unref: + retq + +.globl _main +_main: + retq + +.subsections_via_symbols diff --git a/lld/test/MachO/mh-header-link.s b/lld/test/MachO/mh-header-link.s index 5c313e2..80b8f47 100644 --- a/lld/test/MachO/mh-header-link.s +++ b/lld/test/MachO/mh-header-link.s @@ -9,7 +9,7 @@ ## (but not in other types of files) # RUN: llvm-mc %t/dylib.s -triple=x86_64-apple-macos10.15 -filetype=obj -o %t/dylib.o -# RUN: %lld -pie -dylib %t/dylib.o -o %t/dylib.out +# RUN: %lld -pie -dylib -dead_strip %t/dylib.o -o %t/dylib.out # RUN: llvm-objdump -m --syms %t/dylib.out | FileCheck %s --check-prefix DYLIB # RUN: not %lld -pie -o /dev/null %t/dylib.o 2>&1 | FileCheck %s --check-prefix ERR-DYLIB @@ -21,7 +21,7 @@ ## Test that in an executable, we can link against __mh_execute_header # RUN: llvm-mc %t/main.s -triple=x86_64-apple-macos10.15 -filetype=obj -o %t/exec.o -# RUN: %lld -pie %t/exec.o -o %t/exec.out +# RUN: %lld -pie -dead_strip -lSystem %t/exec.o -o %t/exec.out ## But it would be an error trying to reference __mh_execute_header in a dylib # RUN: not %lld -pie -o /dev/null -dylib %t/exec.o 2>&1 | FileCheck %s --check-prefix ERR-EXEC @@ -34,6 +34,7 @@ _main: mov __mh_execute_header@GOTPCREL(%rip), %rax ret +.subsections_via_symbols #--- dylib.s .text @@ -41,3 +42,4 @@ _main: _main: mov __mh_dylib_header@GOTPCREL(%rip), %rax ret +.subsections_via_symbols diff --git a/lld/test/MachO/sectcreate.s b/lld/test/MachO/sectcreate.s index 2934bf7..03847e1 100644 --- a/lld/test/MachO/sectcreate.s +++ b/lld/test/MachO/sectcreate.s @@ -10,6 +10,16 @@ # RUN: -o %t %t.o # RUN: llvm-objdump -s %t | FileCheck %s +## -dead_strip does not strip -sectcreate sections, +## but also doesn't set S_ATTR_NO_DEAD_STRIP on them. +# RUN: %lld -dead_strip \ +# RUN: -sectcreate SEG SEC1 %t1 \ +# RUN: -segcreate SEG SEC2 %t3 \ +# RUN: -sectcreate SEG SEC1 %t2 \ +# RUN: -o %t %t.o +# RUN: llvm-objdump -s %t | FileCheck --check-prefix=STRIPPED %s +# RUN: llvm-readobj --sections %t | FileCheck --check-prefix=STRIPPEDSEC %s + # CHECK: Contents of section __TEXT,__text: # CHECK: Contents of section __DATA,__data: # CHECK: my string!. @@ -19,6 +29,17 @@ # CHECK: Contents of section SEG,SEC2: # CHECK: -sectcreate 2. +# STRIPPED: Contents of section __TEXT,__text: +# STRIPPED-NOT: Contents of section __DATA,__data: +# STRIPPED-NOT: my string!. +# STRIPPED: Contents of section SEG,SEC1: +# STRIPPED: -sectcreate 1.1. +# STRIPPED: -sectcreate 1.2. +# STRIPPED: Contents of section SEG,SEC2: +# STRIPPED: -sectcreate 2. + +# STRIPPEDSEC-NOT: NoDeadStrip + .text .global _main _main: @@ -29,3 +50,5 @@ _main: .global my_string my_string: .string "my string!" + +.subsections_via_symbols diff --git a/llvm/utils/gn/secondary/lld/MachO/BUILD.gn b/llvm/utils/gn/secondary/lld/MachO/BUILD.gn index 29e10d6..67cf249 100644 --- a/llvm/utils/gn/secondary/lld/MachO/BUILD.gn +++ b/llvm/utils/gn/secondary/lld/MachO/BUILD.gn @@ -36,6 +36,7 @@ static_library("MachO2") { "InputSection.cpp", "LTO.cpp", "MapFile.cpp", + "MarkLive.cpp", "ObjC.cpp", "OutputSection.cpp", "OutputSegment.cpp", -- 2.7.4