From ceff23c6e39118fbbfb053e70d786fad40987c5b Mon Sep 17 00:00:00 2001 From: Jez Ng Date: Mon, 14 Mar 2022 21:51:15 -0400 Subject: [PATCH] [lld-macho] -flat_namespace for dylibs should make all externs interposable All references to interposable symbols can be redirected at runtime to point to a different symbol definition (with the same name). For example, if both dylib A and B define symbol _foo, and we load A before B at runtime, then all references to _foo within dylib B will point to the definition in dylib A. ld64 makes all extern symbols interposable when linking with `-flat_namespace`. TODO 1: Support `-interposable` and `-interposable_list`, which should just be a matter of parsing those CLI flags and setting the `Defined::interposable` bit. TODO 2: Set Reloc::FinalDefinitionInLinkageUnit correctly with this info (we are currently not setting it at all, so we're erring on the conservative side, but we should help the LTO backend generate more optimal code.) Reviewed By: modimo, MaskRay Differential Revision: https://reviews.llvm.org/D119294 --- lld/MachO/SymbolTable.cpp | 7 +- lld/MachO/Symbols.cpp | 8 +- lld/MachO/Symbols.h | 11 ++- lld/MachO/SyntheticSections.cpp | 3 + lld/MachO/Writer.cpp | 5 +- ...at-namespace.s => flat-namespace-dysyms.s} | 0 lld/test/MachO/flat-namespace-interposable.s | 80 +++++++++++++++++++ 7 files changed, 108 insertions(+), 6 deletions(-) rename lld/test/MachO/{flat-namespace.s => flat-namespace-dysyms.s} (100%) create mode 100644 lld/test/MachO/flat-namespace-interposable.s diff --git a/lld/MachO/SymbolTable.cpp b/lld/MachO/SymbolTable.cpp index 0f23bf38ee58..2b6fb77f08aa 100644 --- a/lld/MachO/SymbolTable.cpp +++ b/lld/MachO/SymbolTable.cpp @@ -96,10 +96,15 @@ Defined *SymbolTable::addDefined(StringRef name, InputFile *file, // of a name conflict, we fall through to the replaceSymbol() call below. } + // With -flat_namespace, all extern symbols in dylibs are interposable. + // FIXME: Add support for `-interposable` (PR53680). + bool interposable = config->namespaceKind == NamespaceKind::flat && + config->outputType != MachO::MH_EXECUTE && + !isPrivateExtern; Defined *defined = replaceSymbol( s, name, file, isec, value, size, isWeakDef, /*isExternal=*/true, isPrivateExtern, isThumb, isReferencedDynamically, noDeadStrip, - overridesWeakDef, isWeakDefCanBeHidden); + overridesWeakDef, isWeakDefCanBeHidden, interposable); return defined; } diff --git a/lld/MachO/Symbols.cpp b/lld/MachO/Symbols.cpp index 2c3a59a40277..90575397bc12 100644 --- a/lld/MachO/Symbols.cpp +++ b/lld/MachO/Symbols.cpp @@ -44,12 +44,14 @@ Defined::Defined(StringRefZ name, InputFile *file, InputSection *isec, uint64_t value, uint64_t size, bool isWeakDef, bool isExternal, bool isPrivateExtern, bool isThumb, bool isReferencedDynamically, bool noDeadStrip, - bool canOverrideWeakDef, bool isWeakDefCanBeHidden) + bool canOverrideWeakDef, bool isWeakDefCanBeHidden, + bool interposable) : Symbol(DefinedKind, name, file), overridesWeakDef(canOverrideWeakDef), privateExtern(isPrivateExtern), includeInSymtab(true), thumb(isThumb), referencedDynamically(isReferencedDynamically), noDeadStrip(noDeadStrip), - weakDefCanBeHidden(isWeakDefCanBeHidden), weakDef(isWeakDef), - external(isExternal), isec(isec), value(value), size(size) { + interposable(interposable), weakDefCanBeHidden(isWeakDefCanBeHidden), + weakDef(isWeakDef), external(isExternal), isec(isec), value(value), + size(size) { if (isec) { isec->symbols.push_back(this); // Maintain sorted order. diff --git a/lld/MachO/Symbols.h b/lld/MachO/Symbols.h index ec00b17d2d57..a0653bef215b 100644 --- a/lld/MachO/Symbols.h +++ b/lld/MachO/Symbols.h @@ -118,7 +118,8 @@ public: Defined(StringRefZ name, InputFile *file, InputSection *isec, uint64_t value, uint64_t size, bool isWeakDef, bool isExternal, bool isPrivateExtern, bool isThumb, bool isReferencedDynamically, bool noDeadStrip, - bool canOverrideWeakDef = false, bool isWeakDefCanBeHidden = false); + bool canOverrideWeakDef = false, bool isWeakDefCanBeHidden = false, + bool interposable = false); bool isWeakDef() const override { return weakDef; } bool isExternalWeakDef() const { @@ -158,6 +159,14 @@ public: // metadata. This is information only for the static linker and not written // to the output. bool noDeadStrip : 1; + // Whether references to this symbol can be interposed at runtime to point to + // a different symbol definition (with the same name). For example, if both + // dylib A and B define an interposable symbol _foo, and we load A before B at + // runtime, then all references to _foo within dylib B will point to the + // definition in dylib A. + // + // Only extern symbols may be interposable. + bool interposable : 1; bool weakDefCanBeHidden : 1; diff --git a/lld/MachO/SyntheticSections.cpp b/lld/MachO/SyntheticSections.cpp index 6acc5c995aae..ccd4f9067610 100644 --- a/lld/MachO/SyntheticSections.cpp +++ b/lld/MachO/SyntheticSections.cpp @@ -235,6 +235,8 @@ void macho::addNonLazyBindingEntries(const Symbol *sym, in.rebase->addEntry(isec, offset); if (defined->isExternalWeakDef()) in.weakBinding->addEntry(sym, isec, offset, addend); + else if (defined->interposable) + in.binding->addEntry(sym, isec, offset, addend); } else { // Undefined symbols are filtered out in scanRelocations(); we should never // get here @@ -420,6 +422,7 @@ static int16_t ordinalForDylibSymbol(const DylibSymbol &dysym) { static int16_t ordinalForSymbol(const Symbol &sym) { if (const auto *dysym = dyn_cast(&sym)) return ordinalForDylibSymbol(*dysym); + assert(cast(&sym)->interposable); return BIND_SPECIAL_DYLIB_FLAT_LOOKUP; } diff --git a/lld/MachO/Writer.cpp b/lld/MachO/Writer.cpp index 56a26f11e63c..b1e7d4ca06d7 100644 --- a/lld/MachO/Writer.cpp +++ b/lld/MachO/Writer.cpp @@ -594,6 +594,9 @@ static void prepareBranchTarget(Symbol *sym) { in.weakBinding->addEntry(sym, in.lazyPointers->isec, sym->stubsIndex * target->wordSize); } + } else if (defined->interposable) { + if (in.stubs->addEntry(sym)) + in.lazyBinding->addEntry(sym); } } else { llvm_unreachable("invalid branch target symbol type"); @@ -605,7 +608,7 @@ static bool needsBinding(const Symbol *sym) { if (isa(sym)) return true; if (const auto *defined = dyn_cast(sym)) - return defined->isExternalWeakDef(); + return defined->isExternalWeakDef() || defined->interposable; return false; } diff --git a/lld/test/MachO/flat-namespace.s b/lld/test/MachO/flat-namespace-dysyms.s similarity index 100% rename from lld/test/MachO/flat-namespace.s rename to lld/test/MachO/flat-namespace-dysyms.s diff --git a/lld/test/MachO/flat-namespace-interposable.s b/lld/test/MachO/flat-namespace-interposable.s new file mode 100644 index 000000000000..e561f7b1c9be --- /dev/null +++ b/lld/test/MachO/flat-namespace-interposable.s @@ -0,0 +1,80 @@ +# REQUIRES: x86 +# RUN: rm -rf %t; split-file %s %t + +## With -flat_namespace, non-weak extern symbols in dylibs become interposable. +## Check that we generate the correct bindings for them. The test also includes +## other symbol types like weak externs to verify we continue to do the same +## (correct) thing even when `-flat_namespace` is enabled, instead of generating +## spurious bindings. + +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-macos -o %t/foo.o %t/foo.s +# RUN: %lld -lSystem -flat_namespace -o %t/foo %t/foo.o +# RUN: %lld -lSystem -dylib -flat_namespace -o %t/foo.dylib %t/foo.o +# RUN: %lld -lSystem -bundle -flat_namespace -o %t/foo.bundle %t/foo.o +# RUN: llvm-objdump --macho --bind --lazy-bind --weak-bind %t/foo | FileCheck \ +# RUN: %s --check-prefix=EXEC --implicit-check-not=_private_extern +# RUN: llvm-objdump --macho --bind --lazy-bind --weak-bind %t/foo.dylib | \ +# RUN: FileCheck %s --check-prefix=DYLIB --implicit-check-not=_private_extern +# RUN: llvm-objdump --macho --bind --lazy-bind --weak-bind %t/foo.bundle | \ +# RUN: FileCheck %s --check-prefix=DYLIB --implicit-check-not=_private_extern + +## Executables with -flat_namespace don't have interposable externs. +# EXEC: Bind table: +# EXEC-NEXT: segment section address type addend dylib symbol +# EXEC-EMPTY: +# EXEC-NEXT: Lazy bind table: +# EXEC-NEXT: segment section address dylib symbol +# EXEC-EMPTY: +# EXEC-NEXT: Weak bind table: +# EXEC-NEXT: segment section address type addend symbol +# EXEC-NEXT: __DATA __la_symbol_ptr {{.*}} pointer 0 _weak_extern +# EXEC-NEXT: __DATA __data {{.*}} pointer 0 _weak_extern +# EXEC-EMPTY: + +# DYLIB: Bind table: +# DYLIB-NEXT: segment section address type addend dylib symbol +# DYLIB-DAG: __DATA __data {{.*}} pointer 0 flat-namespace _extern +# DYLIB-DAG: __DATA __thread_ptrs {{.*}} pointer 0 flat-namespace _tlv +# DYLIB-DAG: __DATA_CONST __got {{.*}} pointer 0 flat-namespace dyld_stub_binder +# DYLIB-EMPTY: +# DYLIB-NEXT: Lazy bind table: +# DYLIB-NEXT: segment section address dylib symbol +# DYLIB-NEXT: __DATA __la_symbol_ptr {{.*}} flat-namespace _extern +# DYLIB-EMPTY: +# DYLIB-NEXT: Weak bind table: +# DYLIB-NEXT: segment section address type addend symbol +# DYLIB-NEXT: __DATA __la_symbol_ptr {{.*}} pointer 0 _weak_extern +# DYLIB-NEXT: __DATA __data {{.*}} pointer 0 _weak_extern + +#--- foo.s + +.globl _main, _extern, _weak_extern, _tlv +.weak_definition _weak_extern +.private_extern _private_extern + +_extern: + retq +_weak_extern: + retq +_private_extern: + retq +_local: + retq + +_main: + callq _extern + callq _weak_extern + callq _private_extern + callq _local + mov _tlv@TLVP(%rip), %rax + retq + +.data +.quad _extern +.quad _weak_extern +.quad _local + +.section __DATA,__thread_vars,thread_local_variables +_tlv: + +.subsections_via_symbols -- 2.34.1