From ec29538af2e0886a65f479d6a533956a1c478132 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Thu, 13 Aug 2020 09:00:26 -0700 Subject: [PATCH] [ELF] Assign file offsets of non-SHF_ALLOC after SHF_ALLOC and set sh_addr=0 to non-SHF_ALLOC * GNU ld places non-SHF_ALLOC sections after SHF_ALLOC sections. This has the advantage that the file offsets of a non-SHF_ALLOC cannot be contained in a PT_LOAD. This patch matches the behavior. * For non-SHF_ALLOC non-orphan sections, GNU ld may assign non-zero sh_addr and treat them similar to SHT_NOBITS (not advance location counter). This is an alternative approach to what we have done in D85100. By placing non-SHF_ALLOC sections at the end, we can drop special cases in createSection and findOrphanPos added by D85100. Different from GNU ld, we set sh_addr to 0 for non-SHF_ALLOC sections. 0 arguably is better because non-SHF_ALLOC sections don't appear in the memory image. ELF spec says: > sh_addr - If the section will appear in the memory image of a process, this > member gives the address at which the section's first byte should > reside. Otherwise, the member contains 0. D85100 appeared to take a detour. If we take a combined view on D85100 and this patch, the overall complexity slightly increases (one more 3-line loop) and compatibility with GNU ld improves. The behavior we don't want to match is the special treatment of .symtab .shstrtab .strtab: they can be matched in LLD but not in GNU ld. Reviewed By: jhenderson, psmith Differential Revision: https://reviews.llvm.org/D85867 --- lld/ELF/LinkerScript.cpp | 37 +++++---- lld/ELF/Writer.cpp | 13 ++-- .../ELF/linkerscript/memory-region-alignment.test | 21 ++--- lld/test/ELF/linkerscript/sections-nonalloc.s | 90 ++++++++++++++++++++++ lld/test/ELF/linkerscript/sections.s | 39 +--------- lld/test/ELF/linkerscript/symbols-non-alloc.test | 7 +- 6 files changed, 138 insertions(+), 69 deletions(-) create mode 100644 lld/test/ELF/linkerscript/sections-nonalloc.s diff --git a/lld/ELF/LinkerScript.cpp b/lld/ELF/LinkerScript.cpp index a187aa1..7e97576 100644 --- a/lld/ELF/LinkerScript.cpp +++ b/lld/ELF/LinkerScript.cpp @@ -586,8 +586,6 @@ static OutputSection *findByName(ArrayRef vec, static OutputSection *createSection(InputSectionBase *isec, StringRef outsecName) { OutputSection *sec = script->createOutputSection(outsecName, ""); - if (!(isec->flags & SHF_ALLOC)) - sec->addrExpr = [] { return 0; }; sec->recordSection(isec); return sec; } @@ -852,21 +850,27 @@ static OutputSection *findFirstSection(PhdrEntry *load) { void LinkerScript::assignOffsets(OutputSection *sec) { const bool sameMemRegion = ctx->memRegion == sec->memRegion; const bool prevLMARegionIsDefault = ctx->lmaRegion == nullptr; + const uint64_t savedDot = dot; ctx->memRegion = sec->memRegion; ctx->lmaRegion = sec->lmaRegion; - if (ctx->memRegion) - dot = ctx->memRegion->curPos; - - if (sec->addrExpr) - setDot(sec->addrExpr, sec->location, false); - // If the address of the section has been moved forward by an explicit - // expression so that it now starts past the current curPos of the enclosing - // region, we need to expand the current region to account for the space - // between the previous section, if any, and the start of this section. - if (ctx->memRegion && ctx->memRegion->curPos < dot) - expandMemoryRegion(ctx->memRegion, dot - ctx->memRegion->curPos, - ctx->memRegion->name, sec->name); + if (sec->flags & SHF_ALLOC) { + if (ctx->memRegion) + dot = ctx->memRegion->curPos; + if (sec->addrExpr) + setDot(sec->addrExpr, sec->location, false); + + // If the address of the section has been moved forward by an explicit + // expression so that it now starts past the current curPos of the enclosing + // region, we need to expand the current region to account for the space + // between the previous section, if any, and the start of this section. + if (ctx->memRegion && ctx->memRegion->curPos < dot) + expandMemoryRegion(ctx->memRegion, dot - ctx->memRegion->curPos, + ctx->memRegion->name, sec->name); + } else { + // Non-SHF_ALLOC sections have zero addresses. + dot = 0; + } switchTo(sec); @@ -918,6 +922,11 @@ void LinkerScript::assignOffsets(OutputSection *sec) { for (InputSection *sec : cast(base)->sections) output(sec); } + + // Non-SHF_ALLOC sections do not affect the addresses of other OutputSections + // as they are not part of the process image. + if (!(sec->flags & SHF_ALLOC)) + dot = savedDot; } static bool isDiscardable(OutputSection &sec) { diff --git a/lld/ELF/Writer.cpp b/lld/ELF/Writer.cpp index cffdce0..b26817b 100644 --- a/lld/ELF/Writer.cpp +++ b/lld/ELF/Writer.cpp @@ -1234,13 +1234,7 @@ static bool shouldSkip(BaseCommand *cmd) { static std::vector::iterator findOrphanPos(std::vector::iterator b, std::vector::iterator e) { - // OutputSections without the SHF_ALLOC flag are not part of the memory image - // and their addresses usually don't matter. Place any orphan sections without - // the SHF_ALLOC flag at the end so that these do not affect the address - // assignment of OutputSections with the SHF_ALLOC flag. OutputSection *sec = cast(*e); - if (!(sec->flags & SHF_ALLOC)) - return e; // Find the first element that has as close a rank as possible. auto i = std::max_element(b, e, [=](BaseCommand *a, BaseCommand *b) { @@ -2589,7 +2583,11 @@ template void Writer::assignFileOffsets() { if (p->p_type == PT_LOAD && (p->p_flags & PF_X)) lastRX = p; + // Layout SHF_ALLOC sections before non-SHF_ALLOC sections. A non-SHF_ALLOC + // will not occupy file offsets contained by a PT_LOAD. for (OutputSection *sec : outputSections) { + if (!(sec->flags & SHF_ALLOC)) + continue; off = setFileOffset(sec, off); // If this is a last section of the last executable segment and that @@ -2599,6 +2597,9 @@ template void Writer::assignFileOffsets() { lastRX->lastSec == sec) off = alignTo(off, config->commonPageSize); } + for (OutputSection *sec : outputSections) + if (!(sec->flags & SHF_ALLOC)) + off = setFileOffset(sec, off); sectionHeaderOff = alignTo(off, config->wordsize); fileSize = sectionHeaderOff + (outputSections.size() + 1) * sizeof(Elf_Shdr); diff --git a/lld/test/ELF/linkerscript/memory-region-alignment.test b/lld/test/ELF/linkerscript/memory-region-alignment.test index f0540a7..ea85829 100644 --- a/lld/test/ELF/linkerscript/memory-region-alignment.test +++ b/lld/test/ELF/linkerscript/memory-region-alignment.test @@ -1,5 +1,5 @@ # REQUIRES: x86 -# RUN: echo '.section .foo,"a"; .quad 0; .section .zed,"M",@progbits,1; .byte 0' > %t.s +# RUN: echo '.section .foo,"a"; .quad 0; .section .zed,"aM",@progbits,1; .byte 0' > %t.s # RUN: llvm-mc -filetype=obj -triple=x86_64-unknown-linux %t.s -o %t.o MEMORY { @@ -28,24 +28,25 @@ SECTIONS { # CHECK-NEXT: Offset: 0x1008 # CHECK-NEXT: Size: 8 -# CHECK: Name: .text +# CHECK: Name: .zed # CHECK-NEXT: Type: SHT_PROGBITS # CHECK-NEXT: Flags [ # CHECK-NEXT: SHF_ALLOC -# CHECK-NEXT: SHF_EXECINSTR +# CHECK-NEXT: SHF_MERGE # CHECK-NEXT: ] # CHECK-NEXT: Address: 0x10 # CHECK-NEXT: Offset: 0x1010 -# CHECK-NEXT: Size: 0 +# CHECK-NEXT: Size: 1 -# CHECK: Name: .zed +# CHECK: Name: .text # CHECK-NEXT: Type: SHT_PROGBITS # CHECK-NEXT: Flags [ -# CHECK-NEXT: SHF_MERGE +# CHECK-NEXT: SHF_ALLOC +# CHECK-NEXT: SHF_EXECINSTR # CHECK-NEXT: ] -# CHECK-NEXT: Address: 0x10 -# CHECK-NEXT: Offset: 0x1010 -# CHECK-NEXT: Size: 1 +# CHECK-NEXT: Address: 0x14 +# CHECK-NEXT: Offset: 0x1014 +# CHECK-NEXT: Size: 0 # CHECK: Name: .comment # CHECK-NEXT: Type: SHT_PROGBITS @@ -54,5 +55,5 @@ SECTIONS { # CHECK-NEXT: SHF_STRINGS # CHECK-NEXT: ] # CHECK-NEXT: Address: 0x0 -# CHECK-NEXT: Offset: 0x1011 +# CHECK-NEXT: Offset: 0x1014 # CHECK-NEXT: Size: 8 diff --git a/lld/test/ELF/linkerscript/sections-nonalloc.s b/lld/test/ELF/linkerscript/sections-nonalloc.s new file mode 100644 index 0000000..a0669f7 --- /dev/null +++ b/lld/test/ELF/linkerscript/sections-nonalloc.s @@ -0,0 +1,90 @@ +# REQUIRES: x86 +# RUN: split-file %s %t +# RUN: llvm-mc -filetype=obj -triple=x86_64 %t/main.s -o %t.o + +## Non-SHF_ALLOC sections are placed after all SHF_ALLOC sections. They will +## thus not be contained in a PT_LOAD segment. data2 has a PT_LOAD segment, +## even if it is preceded by a non-SHF_ALLOC section. Non-SHF_ALLOC orphan +## sections have zero addresses. +## NOTE: GNU ld assigns non-zero addresses to non-SHF_ALLOC non-orphan sections. +# RUN: ld.lld -T %t/a.lds %t.o -o %ta +# RUN: llvm-readelf -S -l %ta | FileCheck %s + +# CHECK: [Nr] Name Type Address Off Size ES Flg Lk +# CHECK-NEXT: [ 0] NULL 0000000000000000 000000 000000 00 0 +# CHECK-NEXT: [ 1] .bss NOBITS 0000000000000000 001000 000001 00 WA 0 +# CHECK-NEXT: [ 2] data1 PROGBITS 0000000000000001 001001 000001 00 WA 0 +# CHECK-NEXT: [ 3] data3 PROGBITS 0000000000000002 001002 000001 00 WA 0 +# CHECK-NEXT: [ 4] other1 PROGBITS 0000000000000000 001008 000001 00 0 +# CHECK-NEXT: [ 5] other2 PROGBITS 0000000000000000 001010 000001 00 0 +## Orphan placement places other3, .symtab, .shstrtab and .strtab after other2. +# CHECK-NEXT: [ 6] other3 PROGBITS 0000000000000000 001020 000001 00 0 +# CHECK-NEXT: [ 7] .symtab SYMTAB 0000000000000000 001028 000030 18 9 +# CHECK-NEXT: [ 8] .shstrtab STRTAB 0000000000000000 001058 00004d 00 0 +# CHECK-NEXT: [ 9] .strtab STRTAB 0000000000000000 0010a5 000008 00 0 +# CHECK-NEXT: [10] data2 PROGBITS 0000000000000003 001003 000001 00 WA 0 +# CHECK-NEXT: [11] .text PROGBITS 0000000000000004 001004 000001 00 AX 0 + +# CHECK: Type Offset VirtAddr PhysAddr FileSiz MemSiz Flg Align +# CHECK-NEXT: LOAD 0x001000 0x0000000000000000 0x0000000000000000 0x000004 0x000004 RW 0x1000 +# CHECK-NEXT: LOAD 0x001004 0x0000000000000004 0x0000000000000004 0x000001 0x000001 R E 0x1000 +# CHECK-NEXT: GNU_STACK 0x000000 0x0000000000000000 0x0000000000000000 0x000000 0x000000 RW 0 + +# RUN: ld.lld -T %t/b.lds %t.o -o %tb +# RUN: llvm-readelf -S -l %tb | FileCheck %s --check-prefix=CHECK1 + +# CHECK1: [Nr] Name Type Address Off Size ES Flg Lk +# CHECK1-NEXT: [ 0] NULL 0000000000000000 000000 000000 00 0 +# CHECK1-NEXT: [ 1] .text PROGBITS 00000000000000b0 0000b0 000001 00 AX 0 +# CHECK1-NEXT: [ 2] .bss NOBITS 00000000000000b1 0000b1 000001 00 WA 0 +# CHECK1-NEXT: [ 3] data1 PROGBITS 00000000000000b2 0000b2 000001 00 WA 0 +# CHECK1-NEXT: [ 4] data3 PROGBITS 00000000000000b3 0000b3 000001 00 WA 0 +# CHECK1-NEXT: [ 5] other1 PROGBITS 0000000000000000 0000b8 000001 00 0 +# CHECK1-NEXT: [ 6] other2 PROGBITS 0000000000000000 0000c0 000001 00 0 +# CHECK1-NEXT: [ 7] other3 PROGBITS 0000000000000000 0000d0 000001 00 0 +# CHECK1-NEXT: [ 8] .symtab SYMTAB 0000000000000000 0000d8 000030 18 10 +# CHECK1-NEXT: [ 9] .shstrtab STRTAB 0000000000000000 000108 00004d 00 0 +# CHECK1-NEXT: [10] .strtab STRTAB 0000000000000000 000155 000008 00 0 +# CHECK1-NEXT: [11] data2 PROGBITS 00000000000000b4 0000b4 000001 00 WA 0 +# CHECK1: Type Offset VirtAddr PhysAddr FileSiz MemSiz Flg Align +# CHECK1-NEXT: LOAD 0x000000 0x0000000000000000 0x0000000000000000 0x0000b5 0x0000b5 RWE 0x1000 +# CHECK1-NEXT: 0x60000000 0x0000b8 0x0000000000000000 0x0000000000000000 0x000009 0x000001 0x8 + +#--- a.lds +SECTIONS { + .bss : { *(.bss) } + data1 : { *(data1) } + other1 : { *(other1) } + other2 : { *(other2) } + data2 : { *(data2) } + .text : { *(.text) } + /DISCARD/ : { *(.comment) } +} + +#--- b.lds +PHDRS { + text PT_LOAD FILEHDR PHDRS; + foo 0x60000000 FLAGS (0); +} +SECTIONS { + . = SIZEOF_HEADERS; + .text : { *(.text) } : text + .bss : { *(.bss) } : text + data1 : { *(data1) } : text + other1 : { *(other1) } : foo + other2 : { *(other2) } : foo + data2 : { *(data1) } : text + /DISCARD/ : { *(.comment) } +} + +#--- main.s +.globl _start +_start: nop +.section data1,"aw"; .byte 0 +.section data2,"aw"; .byte 0 +.section data3,"aw"; .byte 0 +.bss; .byte 0 + +.section other1; .p2align 2; .byte 0 +.section other2; .p2align 3; .byte 0 +.section other3; .p2align 4; .byte 0 diff --git a/lld/test/ELF/linkerscript/sections.s b/lld/test/ELF/linkerscript/sections.s index fa34640..539aa9c 100644 --- a/lld/test/ELF/linkerscript/sections.s +++ b/lld/test/ELF/linkerscript/sections.s @@ -25,39 +25,6 @@ # SEC-DEFAULT: 7 .shstrtab 0000003b {{[0-9a-f]*}} # SEC-DEFAULT: 8 .strtab 00000008 {{[0-9a-f]*}} -## Sections are placed in the order specified by the linker script. .data has -## a PT_LOAD segment, even if it is preceded by a non-alloc section. To -## allow this, place non-alloc orphan sections at the end and advance -## location counters for non-alloc non-orphan sections. -# RUN: echo "SECTIONS { \ -# RUN: .bss : { *(.bss) } \ -# RUN: other : { *(other) } \ -# RUN: .shstrtab : { *(.shstrtab) } \ -# RUN: .symtab : { *(.symtab) } \ -# RUN: .strtab : { *(.strtab) } \ -# RUN: .data : { *(.data) } \ -# RUN: .text : { *(.text) } }" > %t3.lds -# RUN: ld.lld -o %t3a -T %t3.lds %t -# RUN: llvm-readelf -S -l %t3a | FileCheck --check-prefix=SEC-ORDER %s -# RUN: ld.lld -o %t3b -T %t3.lds --unique %t -# RUN: llvm-readelf -S -l %t3b | FileCheck --check-prefix=SEC-ORDER %s - -# SEC-ORDER: [Nr] Name Type Address Off Size ES Flg -# SEC-ORDER: [ 0] NULL 0000000000000000 000000 000000 00 -# SEC-ORDER-NEXT: [ 1] .bss NOBITS 0000000000000000 001000 000002 00 WA -# SEC-ORDER-NEXT: [ 2] other PROGBITS 0000000000000002 001002 000003 00 WA -# SEC-ORDER-NEXT: [ 3] .shstrtab STRTAB 0000000000000005 001005 00003b 00 -# SEC-ORDER-NEXT: [ 4] .symtab SYMTAB 0000000000000040 001040 000030 18 -# SEC-ORDER-NEXT: [ 5] .strtab STRTAB 0000000000000070 001070 000008 00 -# SEC-ORDER-NEXT: [ 6] .data PROGBITS 0000000000000078 001078 000020 00 WA -# SEC-ORDER-NEXT: [ 7] .text PROGBITS 0000000000000098 001098 00000e 00 AX -# SEC-ORDER-NEXT: [ 8] .comment PROGBITS 0000000000000000 0010a6 000008 01 MS - -# SEC-ORDER: Type Offset VirtAddr PhysAddr FileSiz MemSiz Flg Align -# SEC-ORDER-NEXT: LOAD 0x001000 0x0000000000000000 0x0000000000000000 0x000098 0x000098 RW 0x1000 -# SEC-ORDER-NEXT: LOAD 0x001098 0x0000000000000098 0x0000000000000098 0x00000e 0x00000e R E 0x1000 -# SEC-ORDER-NEXT: GNU_STACK 0x000000 0x0000000000000000 0x0000000000000000 0x000000 0x000000 RW 0 - # .text and .data have swapped names but proper sizes and types. # RUN: echo "SECTIONS { \ # RUN: .data : { *(.text) } \ @@ -112,12 +79,12 @@ # SEP-BY-NONALLOC: [ 1] .text PROGBITS 0000000000000000 001000 00000e 00 AX # SEP-BY-NONALLOC-NEXT: [ 2] .data PROGBITS 000000000000000e 00100e 000020 00 WA # SEP-BY-NONALLOC-NEXT: [ 3] .bss NOBITS 000000000000002e 00102e 000002 00 WA -# SEP-BY-NONALLOC-NEXT: [ 4] .comment PROGBITS 0000000000000030 00102e 000008 01 MS -# SEP-BY-NONALLOC-NEXT: [ 5] other PROGBITS 0000000000000038 001038 000003 00 WA +# SEP-BY-NONALLOC-NEXT: [ 4] .comment PROGBITS 0000000000000000 001033 000008 01 MS +# SEP-BY-NONALLOC: [ 8] other PROGBITS 0000000000000030 001030 000003 00 WA # SEP-BY-NONALLOC: Type Offset VirtAddr PhysAddr FileSiz MemSiz Flg Align # SEP-BY-NONALLOC-NEXT: LOAD 0x001000 0x0000000000000000 0x0000000000000000 0x00000e 0x00000e R E 0x1000 -# SEP-BY-NONALLOC-NEXT: LOAD 0x00100e 0x000000000000000e 0x000000000000000e 0x00002d 0x00002d RW 0x1000 +# SEP-BY-NONALLOC-NEXT: LOAD 0x00100e 0x000000000000000e 0x000000000000000e 0x000025 0x000025 RW 0x1000 # SEP-BY-NONALLOC-NEXT: GNU_STACK 0x000000 0x0000000000000000 0x0000000000000000 0x000000 0x000000 RW 0 # Input section pattern contains additional semicolon. diff --git a/lld/test/ELF/linkerscript/symbols-non-alloc.test b/lld/test/ELF/linkerscript/symbols-non-alloc.test index 2bd6fc8..ca47b2b 100644 --- a/lld/test/ELF/linkerscript/symbols-non-alloc.test +++ b/lld/test/ELF/linkerscript/symbols-non-alloc.test @@ -1,6 +1,6 @@ # REQUIRES: x86 ## The address of a symbol assignment after a non-SHF_ALLOC section equals the -## end address of the section. +## end address of the last SHF_ALLOC section. # RUN: echo '.section .nonalloc,""; .quad 0' \ # RUN: | llvm-mc -filetype=obj -triple=x86_64-unknown-linux - -o %t @@ -8,10 +8,11 @@ # RUN: llvm-objdump --section-headers -t %t2 | FileCheck %s # CHECK: Sections: -# CHECK: .nonalloc 00000008 0000000000000120 +# CHECK: .text 00000000 0000000000000120 +# CHECK: .nonalloc 00000008 0000000000000000 # CHECK: SYMBOL TABLE: -# CHECK: 0000000000000128 g .nonalloc 0000000000000000 Sym +# CHECK: 0000000000000120 g .nonalloc 0000000000000000 Sym SECTIONS { . = SIZEOF_HEADERS; -- 2.7.4