From 80caa1eb4a0eab61debdcda515d00461a20520a6 Mon Sep 17 00:00:00 2001 From: Nico Weber Date: Fri, 23 Jul 2021 10:12:55 -0400 Subject: [PATCH] [lld/mac] Add support for segment$start$ and segment$end$ symbols These symbols are somewhat interesting in that they create non-existing segments, which as far as I know is the only way to create segments that don't contain any sections. Final part of part of PR50760. Like D106629, but for segments instead of sections. I'm not aware of anything that needs this in practice. Differential Revision: https://reviews.llvm.org/D106767 --- lld/MachO/OutputSegment.cpp | 8 +++ lld/MachO/OutputSegment.h | 6 ++ lld/MachO/SymbolTable.cpp | 20 +++--- lld/MachO/Writer.cpp | 1 + lld/test/MachO/start-end.s | 148 +++++++++++++++++++++++++++++++++++++++++++- 5 files changed, 174 insertions(+), 9 deletions(-) diff --git a/lld/MachO/OutputSegment.cpp b/lld/MachO/OutputSegment.cpp index 7d9544d..3bbaf7f 100644 --- a/lld/MachO/OutputSegment.cpp +++ b/lld/MachO/OutputSegment.cpp @@ -9,6 +9,7 @@ #include "OutputSegment.h" #include "ConcatOutputSection.h" #include "InputSection.h" +#include "Symbols.h" #include "SyntheticSections.h" #include "lld/Common/ErrorHandler.h" @@ -145,6 +146,13 @@ void OutputSegment::sortOutputSections() { llvm::stable_sort(sections, compareByOrder(sectionOrder)); } +void OutputSegment::assignAddressesToStartEndSymbols() { + for (Defined *d : segmentStartSymbols) + d->value = addr; + for (Defined *d : segmentEndSymbols) + d->value = addr + vmSize; +} + void macho::sortOutputSegments() { llvm::stable_sort(outputSegments, compareByOrder(segmentOrder)); diff --git a/lld/MachO/OutputSegment.h b/lld/MachO/OutputSegment.h index 10d60b9..b3863f4 100644 --- a/lld/MachO/OutputSegment.h +++ b/lld/MachO/OutputSegment.h @@ -10,7 +10,9 @@ #define LLD_MACHO_OUTPUT_SEGMENT_H #include "OutputSection.h" +#include "Symbols.h" #include "lld/Common/LLVM.h" +#include "llvm/ADT/TinyPtrVector.h" #include #include @@ -41,6 +43,7 @@ class OutputSegment { public: void addOutputSection(OutputSection *os); void sortOutputSections(); + void assignAddressesToStartEndSymbols(); const std::vector &getSections() const { return sections; } size_t numNonHiddenSections() const; @@ -55,6 +58,9 @@ public: uint32_t initProt = 0; uint8_t index; + llvm::TinyPtrVector segmentStartSymbols; + llvm::TinyPtrVector segmentEndSymbols; + private: std::vector sections; }; diff --git a/lld/MachO/SymbolTable.cpp b/lld/MachO/SymbolTable.cpp index a09ff94..c5808a8 100644 --- a/lld/MachO/SymbolTable.cpp +++ b/lld/MachO/SymbolTable.cpp @@ -203,6 +203,12 @@ enum class Boundary { End, }; +static Defined *createBoundarySymbol(const Undefined &sym) { + return symtab->addSynthetic( + sym.getName(), /*isec=*/nullptr, /*value=*/-1, /*isPrivateExtern=*/true, + /*includeInSymtab=*/false, /*referencedDynamically=*/false); +} + static void handleSectionBoundarySymbol(const Undefined &sym, StringRef segSect, Boundary which) { StringRef segName, sectName; @@ -238,19 +244,19 @@ static void handleSectionBoundarySymbol(const Undefined &sym, StringRef segSect, inputSections.push_back(isec); } - Defined *boundarySym = symtab->addSynthetic( - sym.getName(), /*isec=*/nullptr, /*value=*/-1, /*isPrivateExtern=*/true, - /*includeInSymtab=*/false, /*referencedDynamically=*/false); if (which == Boundary::Start) - osec->sectionStartSymbols.push_back(boundarySym); + osec->sectionStartSymbols.push_back(createBoundarySymbol(sym)); else - osec->sectionEndSymbols.push_back(boundarySym); + osec->sectionEndSymbols.push_back(createBoundarySymbol(sym)); } static void handleSegmentBoundarySymbol(const Undefined &sym, StringRef segName, Boundary which) { - // FIXME - error("segment$start$ and segment$end$ symbols are not yet implemented"); + OutputSegment *seg = getOrCreateOutputSegment(segName); + if (which == Boundary::Start) + seg->segmentStartSymbols.push_back(createBoundarySymbol(sym)); + else + seg->segmentEndSymbols.push_back(createBoundarySymbol(sym)); } void lld::macho::treatUndefinedSymbol(const Undefined &sym, StringRef source) { diff --git a/lld/MachO/Writer.cpp b/lld/MachO/Writer.cpp index 1f66cb1..d9c9cf5 100644 --- a/lld/MachO/Writer.cpp +++ b/lld/MachO/Writer.cpp @@ -992,6 +992,7 @@ void Writer::finalizeAddresses() { addr = alignTo(addr, pageSize); seg->vmSize = addr - seg->addr; seg->fileSize = fileOff - seg->fileOff; + seg->assignAddressesToStartEndSymbols(); } } diff --git a/lld/test/MachO/start-end.s b/lld/test/MachO/start-end.s index a55ec6e..dbe4f88 100644 --- a/lld/test/MachO/start-end.s +++ b/lld/test/MachO/start-end.s @@ -1,11 +1,10 @@ # REQUIRES: x86 -## FIXME: Add tests for segment$start$foo, segment$end$foo once implemented. - # RUN: rm -rf %t; split-file %s %t # RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/main.s -o %t/main.o # RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/foo.s -o %t/foo.o +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/seg.s -o %t/seg.o # RUN: %lld -lSystem %t/main.o %t/foo.o -o %t.out \ # RUN: -rename_section __FOO __bar __BAZ __quux \ @@ -171,6 +170,101 @@ # CHECK: [[#%.16x, TEXTSTART]] # CHECK-NOT: [[#%.16x, TEXTEND]] +############################################################################### +## Test segment$start and segment$end. + +# RUN: %lld -lSystem %t/seg.o -o %t.seg.out \ +# RUN: -rename_segment __FOO __BAZ \ +# RUN: -rename_segment __WHAT __FOO \ +# RUN: -u 'segment$start$__UFLAG_SEG' \ +# RUN: -e 'segment$start$__TEXT' +# RUN: llvm-objdump --macho --syms %t.seg.out > %t-seg-dump.txt +## llvm-objdump can't dump segment names; use lld-otool for this. +# RUN: llvm-otool -l %t.seg.out | grep -A6 LC_SEGMENT >> %t-seg-dump.txt +# RUN: llvm-objdump --macho -d --no-symbolic-operands --no-show-raw-insn %t.seg.out >> %t-seg-dump.txt +# RUN: llvm-objdump --macho --function-starts %t.out >> %t-seg-dump.txt +# RUN: FileCheck %s --check-prefix=SEG < %t-seg-dump.txt + +# SEG-LABEL: SYMBOL TABLE: +# SEG-NOT: segment$start$__TEXT +# SEG-NOT: segment$end$__TEXT +# SEG-NOT: segment$start$__FOO +# SEG-NOT: segment$end$__FOO +# SEG-NOT: segment$start$__BAZ +# SEG-NOT: segment$end$__BAZ +# SEG-NOT: segment$start$__WHAT +# SEG-NOT: segment$end$__WHAT +# SEG-NOT: segment$start$__UFLAG_SEG +# SEG-NOT: segment$start$__UFLAG_SEG +# SEG: segment$start$REGULAR +# SEG: segment$end$REGULAR + +# SEG: cmd LC_SEGMENT_64 +# SEG-NEXT: cmdsize +# SEG-NEXT: segname __PAGEZERO + +# SEG: cmd LC_SEGMENT_64 +# SEG-NEXT: cmdsize +# SEG-NEXT: segname __TEXT +# SEG-NEXT: vmaddr 0x[[#%x, TEXTSTART:]] +# SEG-NEXT: vmsize 0x[[#%x, TEXTSIZE:]] + +# SEG: cmd LC_SEGMENT_64 +# SEG-NEXT: cmdsize +# SEG-NEXT: segname __BAZ +# SEG-NEXT: vmaddr 0x[[#%x, BAZSTART:]] +# SEG-NEXT: vmsize 0x[[#%x, BAZSIZE:]] + +# SEG: cmd LC_SEGMENT_64 +# SEG-NEXT: cmdsize +# SEG-NEXT: segname __FOO +# SEG-NEXT: vmaddr 0x[[#%x, FOOSTART:]] +# SEG-NEXT: vmsize 0x[[#%x, FOOSIZE:]] + +# SEG: cmd LC_SEGMENT_64 +# SEG-NEXT: cmdsize +# SEG-NEXT: segname __UFLAG_SEG +# SEG-NEXT: vmaddr 0x[[#%x, UFLAGSTART:]] +# SEG-NEXT: vmsize 0x0000000000000000 + +# SEG: cmd LC_SEGMENT_64 +# SEG-NEXT: cmdsize +# SEG-NEXT: segname ASDF +# SEG-NEXT: vmaddr 0x[[#%x, ASDFSTART:]] +# SEG-NEXT: vmsize 0x0000000000000000 + +# SEG: _main + +## segment$start$__TEXT / segment$end$__TEXT +# SEG: [[#%x, PC1:]]: +# SEG-SAME: leaq [[#%d, TEXTSTART - PC1 - 7]](%rip), %rax +# SEG-NEXT: [[#%x, PC2:]]: +# SEG-SAME: leaq [[#%d, TEXTSTART + TEXTSIZE - PC2 - 7]](%rip), %rbx + +## segment$start$__FOO / segment$end$__FOO, which is renamed to __BAZ +# SEG: [[#%x, PC3:]]: +# SEG-SAME: leaq [[#%d, BAZSTART - PC3 - 7]](%rip), %rax +# SEG-NEXT: [[#%x, PC4:]]: +# SEG-SAME: leaq [[#%d, BAZSTART + BAZSIZE - PC4 - 7]](%rip), %rbx + +## segment$start$__BAZ / segment$end$__BAZ +# SEG: [[#%x, PC5:]]: +# SEG-SAME: leaq [[#%d, BAZSTART - PC5 - 7]](%rip), %rax +# SEG-NEXT: [[#%x, PC6:]]: +# SEG-SAME: leaq [[#%d, BAZSTART + BAZSIZE - PC6 - 7]](%rip), %rbx + +## segment$start$__WHAT / segment$end$__WHAT, which is renamed to __FOO +# SEG: [[#%x, PC7:]]: +# SEG-SAME: leaq [[#%d, FOOSTART - PC7 - 7]](%rip), %rax +# SEG-NEXT: [[#%x, PC8:]]: +# SEG-SAME: leaq [[#%d, FOOSTART + FOOSIZE - PC8 - 7]](%rip), %rbx + +## segment$start$ASDF / segment$end$ASDF +# SEG: [[#%x, PC9:]]: +# SEG-SAME: leaq [[#%d, ASDFSTART - PC9 - 7]](%rip), %rax +# SEG-NEXT: [[#%x, PC10:]]: +# SEG-SAME: leaq [[#%d, ASDFSTART - PC10 - 7]](%rip), %rbx + #--- order.txt _otherfun _main @@ -272,3 +366,53 @@ section$end$ACTUAL$symbol: .fill 1 .subsections_via_symbols + +#--- seg.s +## Renamed to __BAZ,__bar by -rename_segment +.section __FOO,__bar +.space 1 + +## Renamed to __FOO,__ever by -rename_segment +.section __WHAT,__ever +.space 1 + +.text +.globl segment$start$REGULAR +segment$start$REGULAR: + retq + +.globl segment$end$REGULAR +segment$end$REGULAR: + retq + +.globl _main +_main: + movq segment$start$__TEXT@GOTPCREL(%rip), %rax + movq segment$end$__TEXT@GOTPCREL(%rip), %rbx + + movq segment$start$__FOO@GOTPCREL(%rip), %rax + movq segment$end$__FOO@GOTPCREL(%rip), %rbx + + movq segment$start$__BAZ@GOTPCREL(%rip), %rax + movq segment$end$__BAZ@GOTPCREL(%rip), %rbx + + # (These two lines make ld64 crash linking the .o file. + # The rest of the test works with ld64 too.) + movq segment$start$__WHAT@GOTPCREL(%rip), %rax + movq segment$end$__WHAT@GOTPCREL(%rip), %rbx + + # References to non-existing segments create that segment, + # without any sections in it. + movq segment$start$ASDF@GOTPCREL(%rip), %rax + movq segment$end$ASDF@GOTPCREL(%rip), %rbx + + ## Non-undefined symbols don't create segments. + callq segment$start$REGULAR + callq segment$end$REGULAR + + ret + +.section __TEXT,__fill +.space 578 + +.subsections_via_symbols -- 2.7.4