From c3e4f3b231db1ee31668b2e06047cf2a1e89455e Mon Sep 17 00:00:00 2001 From: Greg McGary Date: Sat, 2 Jan 2021 10:31:55 -0800 Subject: [PATCH] [lld-macho] Fix alignment & layout to match ld64 and satisfy kernel & codesign The Mach kernel & codesign on arm64 macOS has strict requirements for alignment and sequence of segments and sections. Dyld probably is just as picky, though kernel & codesign reject malformed Mach-O files before dyld ever has a chance. I developed this diff by incrementally changing alignments & sequences to match the output of ld64. I stopped when my hello-world test program started working: `codesign --verify` succeded, and `execve(2)` didn't immediately fail with `errno == EBADMACHO` = `"Malformed Mach-O file"`. Differential Revision: https://reviews.llvm.org/D94935 --- lld/MachO/SyntheticSections.cpp | 14 +++++-- lld/MachO/SyntheticSections.h | 8 +++- lld/MachO/UnwindInfoSection.cpp | 2 +- lld/MachO/Writer.cpp | 38 ++++++++++-------- lld/test/MachO/dylib-stub.yaml | 2 +- lld/test/MachO/entry-symbol.s | 8 ++-- lld/test/MachO/indirect-symtab.s | 14 +++---- lld/test/MachO/lc-build-version.s | 1 - lld/test/MachO/load-command-sequence.s | 71 ++++++++++++++++++++++++++++++++++ lld/test/MachO/symtab.s | 6 +-- lld/test/MachO/weak-binding.s | 14 +++---- lld/test/MachO/weak-reference.s | 9 ++--- 12 files changed, 136 insertions(+), 51 deletions(-) create mode 100644 lld/test/MachO/load-command-sequence.s diff --git a/lld/MachO/SyntheticSections.cpp b/lld/MachO/SyntheticSections.cpp index 4fa03b5..561c845 100644 --- a/lld/MachO/SyntheticSections.cpp +++ b/lld/MachO/SyntheticSections.cpp @@ -188,7 +188,7 @@ void RebaseSection::writeTo(uint8_t *buf) const { NonLazyPointerSectionBase::NonLazyPointerSectionBase(const char *segname, const char *name) : SyntheticSection(segname, name) { - align = 8; + align = WordSize; // vector of pointers / mimic ld64 flags = MachO::S_NON_LAZY_SYMBOL_POINTERS; } @@ -389,7 +389,9 @@ void macho::addNonLazyBindingEntries(const Symbol *sym, StubsSection::StubsSection() : SyntheticSection(segment_names::text, "__stubs") { - flags = MachO::S_SYMBOL_STUBS; + flags = MachO::S_SYMBOL_STUBS | MachO::S_ATTR_SOME_INSTRUCTIONS | + MachO::S_ATTR_PURE_INSTRUCTIONS; + align = 4; // machine instructions / mimic ld64 reserved2 = target->stubSize; } @@ -413,7 +415,10 @@ bool StubsSection::addEntry(Symbol *sym) { } StubHelperSection::StubHelperSection() - : SyntheticSection(segment_names::text, "__stub_helper") {} + : SyntheticSection(segment_names::text, "__stub_helper") { + flags = MachO::S_ATTR_SOME_INSTRUCTIONS | MachO::S_ATTR_PURE_INSTRUCTIONS; + align = 4; // machine instructions / mimic ld64 +} uint64_t StubHelperSection::getSize() const { return target->stubHelperHeaderSize + @@ -453,11 +458,12 @@ ImageLoaderCacheSection::ImageLoaderCacheSection() { uint8_t *arr = bAlloc.Allocate(WordSize); memset(arr, 0, WordSize); data = {arr, WordSize}; + align = WordSize; // pointer / mimic ld64 } LazyPointerSection::LazyPointerSection() : SyntheticSection(segment_names::data, "__la_symbol_ptr") { - align = 8; + align = WordSize; // vector of pointers / mimic ld64 flags = MachO::S_LAZY_SYMBOL_POINTERS; } diff --git a/lld/MachO/SyntheticSections.h b/lld/MachO/SyntheticSections.h index 7bca28d..843966c 100644 --- a/lld/MachO/SyntheticSections.h +++ b/lld/MachO/SyntheticSections.h @@ -46,6 +46,12 @@ constexpr const char unwindInfo[] = "__unwind_info"; // these are not synthetic, but in service of synthetic __unwind_info constexpr const char compactUnwind[] = "__compact_unwind"; constexpr const char ehFrame[] = "__eh_frame"; +// these are not synthetic, but need to be sorted +constexpr const char text[] = "__text"; +constexpr const char stubs[] = "__stubs"; +constexpr const char stubHelper[] = "__stub_helper"; +constexpr const char laSymbolPtr[] = "__la_symbol_ptr"; +constexpr const char data[] = "__data"; } // namespace section_names @@ -71,7 +77,7 @@ class LinkEditSection : public SyntheticSection { public: LinkEditSection(const char *segname, const char *name) : SyntheticSection(segname, name) { - align = WordSize; + align = WordSize; // mimic ld64 } // Sections in __LINKEDIT are special: their offsets are recorded in the diff --git a/lld/MachO/UnwindInfoSection.cpp b/lld/MachO/UnwindInfoSection.cpp index afaa69d..30e50b3 100644 --- a/lld/MachO/UnwindInfoSection.cpp +++ b/lld/MachO/UnwindInfoSection.cpp @@ -87,7 +87,7 @@ using namespace lld::macho; UnwindInfoSection::UnwindInfoSection() : SyntheticSection(segment_names::text, section_names::unwindInfo) { - align = WordSize; // TODO(gkm): make this 4 KiB ? + align = 4; // mimic ld64 } bool UnwindInfoSection::isNeeded() const { diff --git a/lld/MachO/Writer.cpp b/lld/MachO/Writer.cpp index 8f90bad..46667e1 100644 --- a/lld/MachO/Writer.cpp +++ b/lld/MachO/Writer.cpp @@ -463,6 +463,12 @@ void Writer::scanSymbols() { } void Writer::createLoadCommands() { + uint8_t segIndex = 0; + for (OutputSegment *seg : outputSegments) { + in.header->addLoadCommand(make(seg->name, seg)); + seg->index = segIndex++; + } + in.header->addLoadCommand(make( in.rebase, in.binding, in.weakBinding, in.lazyBinding, in.exports)); in.header->addLoadCommand(make(symtabSection, stringTableSection)); @@ -473,8 +479,8 @@ void Writer::createLoadCommands() { switch (config->outputType) { case MH_EXECUTE: - in.header->addLoadCommand(make()); in.header->addLoadCommand(make()); + in.header->addLoadCommand(make()); break; case MH_DYLIB: in.header->addLoadCommand(make(LC_ID_DYLIB, config->installName, @@ -487,16 +493,10 @@ void Writer::createLoadCommands() { llvm_unreachable("unhandled output file type"); } - in.header->addLoadCommand(make(config->platform)); - uuidCommand = make(); in.header->addLoadCommand(uuidCommand); - uint8_t segIndex = 0; - for (OutputSegment *seg : outputSegments) { - in.header->addLoadCommand(make(seg->name, seg)); - seg->index = segIndex++; - } + in.header->addLoadCommand(make(config->platform)); uint64_t dylibOrdinal = 1; for (InputFile *file : inputFiles) { @@ -566,8 +566,10 @@ static DenseMap buildInputSectionPriorities() { static int segmentOrder(OutputSegment *seg) { return StringSwitch(seg->name) - .Case(segment_names::pageZero, -2) - .Case(segment_names::text, -1) + .Case(segment_names::pageZero, -4) + .Case(segment_names::text, -3) + .Case(segment_names::dataConst, -2) + .Case(segment_names::data, -1) // Make sure __LINKEDIT is the last segment (i.e. all its hidden // sections must be ordered after other sections). .Case(segment_names::linkEdit, std::numeric_limits::max()) @@ -579,12 +581,14 @@ static int sectionOrder(OutputSection *osec) { // Sections are uniquely identified by their segment + section name. if (segname == segment_names::text) { return StringSwitch(osec->name) - .Case(section_names::header, -1) + .Case(section_names::header, -4) + .Case(section_names::text, -3) + .Case(section_names::stubs, -2) + .Case(section_names::stubHelper, -1) .Case(section_names::unwindInfo, std::numeric_limits::max() - 1) .Case(section_names::ehFrame, std::numeric_limits::max()) .Default(0); - } - if (segname == segment_names::data) { + } else if (segname == segment_names::data) { // For each thread spawned, dyld will initialize its TLVs by copying the // address range from the start of the first thread-local data section to // the end of the last one. We therefore arrange these sections contiguously @@ -600,10 +604,12 @@ static int sectionOrder(OutputSection *osec) { case S_ZEROFILL: return std::numeric_limits::max(); default: - return 0; + return StringSwitch(osec->name) + .Case(section_names::laSymbolPtr, -2) + .Case(section_names::data, -1) + .Default(0); } - } - if (segname == segment_names::linkEdit) { + } else if (segname == segment_names::linkEdit) { return StringSwitch(osec->name) .Case(section_names::rebase, -8) .Case(section_names::binding, -7) diff --git a/lld/test/MachO/dylib-stub.yaml b/lld/test/MachO/dylib-stub.yaml index 68f5333..9f0328a 100644 --- a/lld/test/MachO/dylib-stub.yaml +++ b/lld/test/MachO/dylib-stub.yaml @@ -14,7 +14,7 @@ # RUN: yaml2obj %t/fat.dylib.yaml > %t/fat.dylib # RUN: %lld -lSystem %t/test.o %t/fat.dylib -o %t/test # RUN: llvm-objdump --macho --lazy-bind %t/test | FileCheck %s -# CHECK: __DATA __la_symbol_ptr 0x100001008 foo _foo +# CHECK: __DATA __la_symbol_ptr 0x100002000 foo _foo #--- test.s .globl _main diff --git a/lld/test/MachO/entry-symbol.s b/lld/test/MachO/entry-symbol.s index 142eae1..ddee4e9 100644 --- a/lld/test/MachO/entry-symbol.s +++ b/lld/test/MachO/entry-symbol.s @@ -8,16 +8,16 @@ # RUN: llvm-objdump --macho --all-headers --syms %t/not-main | FileCheck %s # CHECK-LABEL: SYMBOL TABLE # CHECK-NEXT: {{0*}}[[#%x, ENTRY_ADDR:]] {{.*}} __TEXT,__text _not_main -# CHECK: cmd LC_MAIN -# CHECK-NEXT: cmdsize 24 -# CHECK-NEXT: entryoff [[#ENTRYOFF:]] # CHECK: sectname __text # CHECK-NEXT: segname __TEXT ## Note: the following checks assume that the entry symbol is right at the ## beginning of __text. # CHECK-NEXT: addr 0x{{0*}}[[#ENTRY_ADDR]] # CHECK-NEXT: size -# CHECK-NEXT: offset [[#ENTRYOFF]] +# CHECK-NEXT: offset [[#ENTRYOFF:]] +# CHECK: cmd LC_MAIN +# CHECK-NEXT: cmdsize 24 +# CHECK-NEXT: entryoff [[#ENTRYOFF]] # RUN: %lld -lSystem -o %t/dysym-main %t/not-main.o %t/libfoo.dylib -e _dysym_main # RUN: llvm-objdump --macho --all-headers --indirect-symbols --lazy-bind %t/dysym-main | FileCheck %s --check-prefix=DYSYM diff --git a/lld/test/MachO/indirect-symtab.s b/lld/test/MachO/indirect-symtab.s index 53c3d5d..a3790fc 100644 --- a/lld/test/MachO/indirect-symtab.s +++ b/lld/test/MachO/indirect-symtab.s @@ -20,19 +20,19 @@ # CHECK-NEXT: address index name # CHECK-NEXT: _bar_fn # CHECK-NEXT: _foo_fn -# CHECK-NEXT: Indirect symbols for (__DATA,__thread_ptrs) 2 entries +# CHECK-NEXT: Indirect symbols for (__DATA_CONST,__got) 3 entries # CHECK-NEXT: address index name -# CHECK-NEXT: _bar_tlv -# CHECK-NEXT: _foo_tlv +# CHECK-NEXT: _bar +# CHECK-NEXT: _foo +# CHECK-NEXT: _stub_binder # CHECK-NEXT: Indirect symbols for (__DATA,__la_symbol_ptr) 2 entries # CHECK-NEXT: address index name # CHECK-NEXT: _bar_fn # CHECK-NEXT: _foo_fn -# CHECK-NEXT: Indirect symbols for (__DATA_CONST,__got) 3 entries +# CHECK-NEXT: Indirect symbols for (__DATA,__thread_ptrs) 2 entries # CHECK-NEXT: address index name -# CHECK-NEXT: _bar -# CHECK-NEXT: _foo -# CHECK-NEXT: _stub_binder +# CHECK-NEXT: _bar_tlv +# CHECK-NEXT: _foo_tlv #--- libfoo.s diff --git a/lld/test/MachO/lc-build-version.s b/lld/test/MachO/lc-build-version.s index efffe57..eb4f422 100644 --- a/lld/test/MachO/lc-build-version.s +++ b/lld/test/MachO/lc-build-version.s @@ -11,7 +11,6 @@ # CHECK-NEXT: ntools 1 # CHECK-NEXT: tool ld # CHECK-NEXT: version {{[0-9\.]+}} -# CHECK-NEXT: Load command [[#]] .text .global _main diff --git a/lld/test/MachO/load-command-sequence.s b/lld/test/MachO/load-command-sequence.s new file mode 100644 index 0000000..265316e --- /dev/null +++ b/lld/test/MachO/load-command-sequence.s @@ -0,0 +1,71 @@ +# REQUIRES: x86 + +# RUN: rm -rf %t && mkdir -p %t +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %s -o %t/test.o +# RUN: %lld -execute -o %t/exec %t/test.o -lSystem +# RUN: %lld -dylib -o %t/dylib %t/test.o -lSystem +# RUN: %lld -bundle -o %t/bundle %t/test.o -lSystem + +# RUN: llvm-objdump --macho --all-headers %t/exec | \ +# RUN: FileCheck %s --check-prefixes=EXEC,COMMON +# RUN: llvm-objdump --macho --all-headers %t/dylib | \ +# RUN: FileCheck %s --check-prefixes=DYLIB,COMMON +# RUN: llvm-objdump --macho --all-headers %t/bundle | \ +# RUN: FileCheck %s --check-prefixes=BUNDLE,COMMON + +## Check that load commands and sections within segments occur in the proper +## sequence. On ARM64 kernel is especially picky about layout, and will +## barf with errno=EBADMACHO when the sequence is wrong. + +# EXEC: cmd LC_SEGMENT_64 +# EXEC: segname __PAGEZERO + +# COMMON: cmd LC_SEGMENT_64 +# COMMON: segname __TEXT +# COMMON: sectname __text +# COMMON: segname __TEXT +# COMMON: sectname __cstring +# COMMON: segname __TEXT +# COMMON: cmd LC_SEGMENT_64 +# COMMON: segname __DATA_CONST +# COMMON: sectname __got +# COMMON: segname __DATA_CONST +# COMMON: cmd LC_SEGMENT_64 +# COMMON: segname __DATA +# COMMON: sectname __data +# COMMON: segname __DATA +# COMMON: sectname __const +# COMMON: segname __DATA +# COMMON: cmd LC_SEGMENT_64 +# COMMON: segname __LINKEDIT +# COMMON: cmd LC_DYLD_INFO_ONLY +# COMMON: cmd LC_SYMTAB +# COMMON: cmd LC_DYSYMTAB + +# EXEC: cmd LC_LOAD_DYLINKER +# EXEC: cmd LC_MAIN +# DYLIB: cmd LC_ID_DYLIB + +# COMMON: cmd LC_UUID +# COMMON: cmd LC_BUILD_VERSION +# COMMON: cmd LC_LOAD_DYLIB + +.section __TEXT,__cstring +_str: + .asciz "Help me! I'm trapped in a test!\n" + +.globl _mutable +.section __DATA,__data +mutable: + .long 0x1234 + +.globl _constant +.section __DATA,__const +constant: + .long 0x4567 + +.text +.global _main +_main: + mov ___nan@GOTPCREL(%rip), %rax + ret diff --git a/lld/test/MachO/symtab.s b/lld/test/MachO/symtab.s index fa784a3..44d4206 100644 --- a/lld/test/MachO/symtab.s +++ b/lld/test/MachO/symtab.s @@ -10,7 +10,7 @@ # CHECK-NEXT: Symbol { # CHECK-NEXT: Name: _local # CHECK-NEXT: Type: Section (0xE) -# CHECK-NEXT: Section: __data (0x4) +# CHECK-NEXT: Section: __data # CHECK-NEXT: RefType: UndefinedNonLazy (0x0) # CHECK-NEXT: Flags [ (0x0) # CHECK-NEXT: ] @@ -19,7 +19,7 @@ # CHECK-NEXT: Symbol { # CHECK-NEXT: Name: __dyld_private # CHECK-NEXT: Type: Section (0xE) -# CHECK-NEXT: Section: __data (0x4) +# CHECK-NEXT: Section: __data # CHECK-NEXT: RefType: UndefinedNonLazy (0x0) # CHECK-NEXT: Flags [ (0x0) # CHECK-NEXT: ] @@ -39,7 +39,7 @@ # CHECK-NEXT: Name: _external # CHECK-NEXT: Extern # CHECK-NEXT: Type: Section (0xE) -# CHECK-NEXT: Section: __data (0x4) +# CHECK-NEXT: Section: __data # CHECK-NEXT: RefType: UndefinedNonLazy (0x0) # CHECK-NEXT: Flags [ (0x0) # CHECK-NEXT: ] diff --git a/lld/test/MachO/weak-binding.s b/lld/test/MachO/weak-binding.s index f627daf..b237d10 100644 --- a/lld/test/MachO/weak-binding.s +++ b/lld/test/MachO/weak-binding.s @@ -7,15 +7,15 @@ # RUN: llvm-objdump -d --no-show-raw-insn --bind --lazy-bind --weak-bind --full-contents %t/test | \ # RUN: FileCheck %s -# CHECK: Contents of section __DATA,__la_symbol_ptr: +# CHECK: Contents of section __DATA_CONST,__got: ## Check that this section contains a nonzero pointer. It should point to -## _weak_external_fn, but we don't have a good way of testing the exact value as -## the bytes here are in little-endian order. +## _weak_external_for_gotpcrel. # CHECK-NEXT: {{[0-9a-f]+}} {{[0-9a-f ]*[1-9a-f]+[0-9a-f ]*}} -# CHECK: Contents of section __DATA_CONST,__got: +# CHECK: Contents of section __DATA,__la_symbol_ptr: ## Check that this section contains a nonzero pointer. It should point to -## _weak_external_for_gotpcrel. +## _weak_external_fn, but we don't have a good way of testing the exact value as +## the bytes here are in little-endian order. # CHECK-NEXT: {{[0-9a-f]+}} {{[0-9a-f ]*[1-9a-f]+[0-9a-f ]*}} # CHECK: <_main>: @@ -30,11 +30,11 @@ # CHECK-NEXT: callq 0x{{[0-9a-f]*}} # CHECK-LABEL: Bind table: +# CHECK-DAG: __DATA_CONST __got 0x[[#WEAK_DY_GOT_ADDR]] pointer 0 libfoo _weak_dysym_for_gotpcrel +# CHECK-DAG: __DATA __la_symbol_ptr 0x[[#%x,WEAK_DY_FN:]] pointer 0 libfoo _weak_dysym_fn # CHECK-DAG: __DATA __data 0x[[#%x,WEAK_DY:]] pointer 0 libfoo _weak_dysym # CHECK-DAG: __DATA __thread_vars 0x{{[0-9a-f]*}} pointer 0 libSystem __tlv_bootstrap # CHECK-DAG: __DATA __thread_ptrs 0x[[#WEAK_DY_TLV_ADDR]] pointer 0 libfoo _weak_dysym_tlv -# CHECK-DAG: __DATA_CONST __got 0x[[#WEAK_DY_GOT_ADDR]] pointer 0 libfoo _weak_dysym_for_gotpcrel -# CHECK-DAG: __DATA __la_symbol_ptr 0x[[#%x,WEAK_DY_FN:]] pointer 0 libfoo _weak_dysym_fn ## Check that we don't have any other bindings # CHECK-NOT: pointer diff --git a/lld/test/MachO/weak-reference.s b/lld/test/MachO/weak-reference.s index 6b6c297..4e4f488 100644 --- a/lld/test/MachO/weak-reference.s +++ b/lld/test/MachO/weak-reference.s @@ -34,10 +34,10 @@ # YAML-LABEL: WeakBindOpcodes: # YAML: - Opcode: BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM # YAML-NEXT: Imm: 0 -# YAML-NEXT: Symbol: _weak_foo +# YAML-NEXT: Symbol: _weak_foo_fn # YAML: - Opcode: BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM # YAML-NEXT: Imm: 0 -# YAML-NEXT: Symbol: _weak_foo_fn +# YAML-NEXT: Symbol: _weak_foo # YAML-LABEL: LazyBindOpcodes: # YAML: - Opcode: BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM # YAML-NEXT: Imm: 1 @@ -79,13 +79,10 @@ # STRONG-YAML-LABEL: WeakBindOpcodes: # STRONG-YAML: - Opcode: BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM # STRONG-YAML-NEXT: Imm: 0 -# STRONG-YAML-NEXT: Symbol: _weak_foo +# STRONG-YAML-NEXT: Symbol: _weak_foo_fn # STRONG-YAML: - Opcode: BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM # STRONG-YAML-NEXT: Imm: 0 # STRONG-YAML-NEXT: Symbol: _weak_foo -# STRONG-YAML: - Opcode: BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM -# STRONG-YAML-NEXT: Imm: 0 -# STRONG-YAML-NEXT: Symbol: _weak_foo_fn # STRONG-YAML-LABEL: LazyBindOpcodes: # STRONG-YAML: - Opcode: BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM # STRONG-YAML-NEXT: Imm: 0 -- 2.7.4