From 718c32175b3bcced88d9ca25ab1482a682ae5bbb Mon Sep 17 00:00:00 2001 From: Jez Ng Date: Mon, 5 Jul 2021 20:00:09 -0400 Subject: [PATCH] [lld-macho] Only emit one BIND_OPCODE_SET_SYMBOL per symbol Size-wise, BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM is the most expensive opcode, since it comes with an associated symbol string. We were previously emitting it once per binding, instead of once per symbol. This diff groups all bindings for a given symbol together and ensures we only emit one such opcode per symbol. This matches ld64's behavior. While this is a relatively small win on chromium_framework (-72KiB), for programs that have more dynamic bindings, the difference can be quite large. This change is perf-neutral when linking chromium_framework. Reviewed By: #lld-macho, thakis Differential Revision: https://reviews.llvm.org/D105075 --- lld/MachO/SyntheticSections.cpp | 104 ++++++++++++++++++++++++++-------------- lld/MachO/SyntheticSections.h | 31 +++++------- lld/test/MachO/bind-opcodes.s | 45 +++++++++++++++++ 3 files changed, 125 insertions(+), 55 deletions(-) create mode 100644 lld/test/MachO/bind-opcodes.s diff --git a/lld/MachO/SyntheticSections.cpp b/lld/MachO/SyntheticSections.cpp index 2a409bf..0a8153f 100644 --- a/lld/MachO/SyntheticSections.cpp +++ b/lld/MachO/SyntheticSections.cpp @@ -273,7 +273,6 @@ struct Binding { OutputSegment *segment = nullptr; uint64_t offset = 0; int64_t addend = 0; - int16_t ordinal = 0; }; } // namespace @@ -283,9 +282,8 @@ struct Binding { // The bind opcode "interpreter" remembers the values of each binding field, so // we only need to encode the differences between bindings. Hence the use of // lastBinding. -static void encodeBinding(const Symbol *sym, const OutputSection *osec, - uint64_t outSecOff, int64_t addend, - bool isWeakBinding, Binding &lastBinding, +static void encodeBinding(const OutputSection *osec, uint64_t outSecOff, + int64_t addend, Binding &lastBinding, raw_svector_ostream &os) { OutputSegment *seg = osec->parent; uint64_t offset = osec->getSegmentOffset() + outSecOff; @@ -307,13 +305,7 @@ static void encodeBinding(const Symbol *sym, const OutputSection *osec, lastBinding.addend = addend; } - uint8_t flags = BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM; - if (!isWeakBinding && sym->isWeakRef()) - flags |= BIND_SYMBOL_FLAGS_WEAK_IMPORT; - - os << flags << sym->getName() << '\0' - << static_cast(BIND_OPCODE_SET_TYPE_IMM | BIND_TYPE_POINTER) - << static_cast(BIND_OPCODE_DO_BIND); + os << static_cast(BIND_OPCODE_DO_BIND); // DO_BIND causes dyld to both perform the binding and increment the offset lastBinding.offset += target->wordSize; } @@ -345,6 +337,37 @@ static void encodeWeakOverride(const Defined *defined, << defined->getName() << '\0'; } +// Organize the bindings so we can encoded them with fewer opcodes. +// +// First, all bindings for a given symbol should be grouped together. +// BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM is the largest opcode (since it +// has an associated symbol string), so we only want to emit it once per symbol. +// +// Within each group, we sort the bindings by address. Since bindings are +// delta-encoded, sorting them allows for a more compact result. Note that +// sorting by address alone ensures that bindings for the same segment / section +// are located together, minimizing the number of times we have to emit +// BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB. +// +// Finally, we sort the symbols by the address of their first binding, again +// to facilitate the delta-encoding process. +template +std::vector>> +sortBindings(const BindingsMap &bindingsMap) { + std::vector>> bindingsVec( + bindingsMap.begin(), bindingsMap.end()); + for (auto &p : bindingsVec) { + std::vector &bindings = p.second; + llvm::sort(bindings, [](const BindingEntry &a, const BindingEntry &b) { + return a.target.getVA() < b.target.getVA(); + }); + } + llvm::sort(bindingsVec, [](const auto &a, const auto &b) { + return a.second[0].target.getVA() < b.second[0].target.getVA(); + }); + return bindingsVec; +} + // Emit bind opcodes, which are a stream of byte-sized opcodes that dyld // interprets to update a record with the following fields: // * segment index (of the segment to write the symbol addresses to, typically @@ -361,24 +384,30 @@ static void encodeWeakOverride(const Defined *defined, void BindingSection::finalizeContents() { raw_svector_ostream os{contents}; Binding lastBinding; - - // Since bindings are delta-encoded, sorting them allows for a more compact - // result. Note that sorting by address alone ensures that bindings for the - // same segment / section are located together. - llvm::sort(bindings, [](const BindingEntry &a, const BindingEntry &b) { - return a.target.getVA() < b.target.getVA(); - }); - for (const BindingEntry &b : bindings) { - int16_t ordinal = ordinalForDylibSymbol(*b.dysym); - if (ordinal != lastBinding.ordinal) { + int16_t lastOrdinal = 0; + + for (auto &p : sortBindings(bindingsMap)) { + const DylibSymbol *sym = p.first; + std::vector &bindings = p.second; + llvm::sort(bindings, [](const BindingEntry &a, const BindingEntry &b) { + return a.target.getVA() < b.target.getVA(); + }); + uint8_t flags = BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM; + if (sym->isWeakRef()) + flags |= BIND_SYMBOL_FLAGS_WEAK_IMPORT; + os << flags << sym->getName() << '\0' + << static_cast(BIND_OPCODE_SET_TYPE_IMM | BIND_TYPE_POINTER); + int16_t ordinal = ordinalForDylibSymbol(*sym); + if (ordinal != lastOrdinal) { encodeDylibOrdinal(ordinal, os); - lastBinding.ordinal = ordinal; + lastOrdinal = ordinal; } - encodeBinding(b.dysym, b.target.isec->parent, - b.target.isec->getOffset(b.target.offset), b.addend, - /*isWeakBinding=*/false, lastBinding, os); + for (const BindingEntry &b : bindings) + encodeBinding(b.target.isec->parent, + b.target.isec->getOffset(b.target.offset), b.addend, + lastBinding, os); } - if (!bindings.empty()) + if (!bindingsMap.empty()) os << static_cast(BIND_OPCODE_DONE); } @@ -396,17 +425,18 @@ void WeakBindingSection::finalizeContents() { for (const Defined *defined : definitions) encodeWeakOverride(defined, os); - // Since bindings are delta-encoded, sorting them allows for a more compact - // result. - llvm::sort(bindings, - [](const WeakBindingEntry &a, const WeakBindingEntry &b) { - return a.target.getVA() < b.target.getVA(); - }); - for (const WeakBindingEntry &b : bindings) - encodeBinding(b.symbol, b.target.isec->parent, - b.target.isec->getOffset(b.target.offset), b.addend, - /*isWeakBinding=*/true, lastBinding, os); - if (!bindings.empty() || !definitions.empty()) + for (auto &p : sortBindings(bindingsMap)) { + const Symbol *sym = p.first; + std::vector &bindings = p.second; + os << static_cast(BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM) + << sym->getName() << '\0' + << static_cast(BIND_OPCODE_SET_TYPE_IMM | BIND_TYPE_POINTER); + for (const BindingEntry &b : bindings) + encodeBinding(b.target.isec->parent, + b.target.isec->getOffset(b.target.offset), b.addend, + lastBinding, os); + } + if (!bindingsMap.empty() || !definitions.empty()) os << static_cast(BIND_OPCODE_DONE); } diff --git a/lld/MachO/SyntheticSections.h b/lld/MachO/SyntheticSections.h index b33d5e5..162e69e 100644 --- a/lld/MachO/SyntheticSections.h +++ b/lld/MachO/SyntheticSections.h @@ -16,6 +16,7 @@ #include "OutputSegment.h" #include "Target.h" +#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/Hashing.h" #include "llvm/ADT/SetVector.h" #include "llvm/MC/StringTableBuilder.h" @@ -169,40 +170,34 @@ private: }; struct BindingEntry { - const DylibSymbol *dysym; int64_t addend; Location target; - BindingEntry(const DylibSymbol *dysym, int64_t addend, Location target) - : dysym(dysym), addend(addend), target(std::move(target)) {} + BindingEntry(int64_t addend, Location target) + : addend(addend), target(std::move(target)) {} }; +template +using BindingsMap = llvm::DenseMap>; + // Stores bind opcodes for telling dyld which symbols to load non-lazily. class BindingSection final : public LinkEditSection { public: BindingSection(); void finalizeContents() override; uint64_t getRawSize() const override { return contents.size(); } - bool isNeeded() const override { return !bindings.empty(); } + bool isNeeded() const override { return !bindingsMap.empty(); } void writeTo(uint8_t *buf) const override; void addEntry(const DylibSymbol *dysym, const InputSection *isec, uint64_t offset, int64_t addend = 0) { - bindings.emplace_back(dysym, addend, Location(isec, offset)); + bindingsMap[dysym].emplace_back(addend, Location(isec, offset)); } private: - std::vector bindings; + BindingsMap bindingsMap; SmallVector contents; }; -struct WeakBindingEntry { - const Symbol *symbol; - int64_t addend; - Location target; - WeakBindingEntry(const Symbol *symbol, int64_t addend, Location target) - : symbol(symbol), addend(addend), target(std::move(target)) {} -}; - // Stores bind opcodes for telling dyld which weak symbols need coalescing. // There are two types of entries in this section: // @@ -219,17 +214,17 @@ public: void finalizeContents() override; uint64_t getRawSize() const override { return contents.size(); } bool isNeeded() const override { - return !bindings.empty() || !definitions.empty(); + return !bindingsMap.empty() || !definitions.empty(); } void writeTo(uint8_t *buf) const override; void addEntry(const Symbol *symbol, const InputSection *isec, uint64_t offset, int64_t addend = 0) { - bindings.emplace_back(symbol, addend, Location(isec, offset)); + bindingsMap[symbol].emplace_back(addend, Location(isec, offset)); } - bool hasEntry() const { return !bindings.empty(); } + bool hasEntry() const { return !bindingsMap.empty(); } void addNonWeakDefinition(const Defined *defined) { definitions.emplace_back(defined); @@ -238,7 +233,7 @@ public: bool hasNonWeakDefinition() const { return !definitions.empty(); } private: - std::vector bindings; + BindingsMap bindingsMap; std::vector definitions; SmallVector contents; }; diff --git a/lld/test/MachO/bind-opcodes.s b/lld/test/MachO/bind-opcodes.s new file mode 100644 index 0000000..b66bffb --- /dev/null +++ b/lld/test/MachO/bind-opcodes.s @@ -0,0 +1,45 @@ +# REQUIRES: x86 +# RUN: rm -rf %t; split-file %s %t +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/foo.s -o %t/foo.o +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/test.s -o %t/test.o +# RUN: %lld -dylib %t/foo.o -o %t/libfoo.dylib +# RUN: %lld -lSystem %t/test.o %t/libfoo.dylib -o %t/test + +## Make sure we emit exactly one BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM per +## symbol. +# RUN: obj2yaml %t/test | FileCheck %s --implicit-check-not BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM + +# CHECK: Opcode: BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM +# CHECK-NEXT: Imm: 0 +# CHECK-NEXT: Symbol: _foo + +# CHECK: Opcode: BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM +# CHECK-NEXT: Imm: 0 +# CHECK-NEXT: Symbol: _bar + +# RUN: llvm-objdump --macho --bind %t/test | FileCheck %s --check-prefix=BIND +# BIND: Bind table: +# BIND-NEXT: segment section address type addend dylib symbol +# BIND-NEXT: __DATA __data {{.*}} pointer 0 libfoo _foo +# BIND-NEXT: __DATA __data {{.*}} pointer 0 libfoo _foo +# BIND-NEXT: __DATA __data {{.*}} pointer 0 libfoo _bar +# BIND-NEXT: __DATA __data {{.*}} pointer 0 libfoo _bar +# BIND-EMPTY: + +#--- foo.s +.globl _foo, _bar +_foo: + .space 4 +_bar: + .space 4 + +#--- test.s +.data +.quad _foo +.quad _bar +.quad _foo +.quad _bar + +.globl _main +.text +_main: -- 2.7.4