From 8f91f38148e84a1b3fd8b5a090e53ff5dd0258f5 Mon Sep 17 00:00:00 2001 From: Sean Fertile Date: Mon, 7 Dec 2020 09:28:17 -0500 Subject: [PATCH] [LLD] Search archives for symbol defs to override COMMON symbols. This patch changes the archive handling to enable the semantics needed for legacy FORTRAN common blocks and block data. When we have a COMMON definition of a symbol and are including an archive, LLD will now search the members for global/weak defintions to override the COMMON symbol. The previous LLD behavior (where a member would only be included if it satisifed some other needed symbol definition) can be re-enabled with the option '-no-fortran-common'. Differential Revision: https://reviews.llvm.org/D86142 --- lld/ELF/Config.h | 1 + lld/ELF/Driver.cpp | 2 + lld/ELF/InputFiles.cpp | 89 +++++++++++++++++ lld/ELF/InputFiles.h | 8 ++ lld/ELF/Options.td | 4 + lld/ELF/Symbols.cpp | 26 +++++ lld/docs/ld.lld.1 | 2 + lld/test/ELF/common-archive-lookup.s | 182 +++++++++++++++++++++++++++++++++++ lld/test/ELF/warn-backrefs.s | 12 +++ 9 files changed, 326 insertions(+) create mode 100644 lld/test/ELF/common-archive-lookup.s diff --git a/lld/ELF/Config.h b/lld/ELF/Config.h index 547e290..0ec4cb9 100644 --- a/lld/ELF/Config.h +++ b/lld/ELF/Config.h @@ -163,6 +163,7 @@ struct Configuration { bool fixCortexA53Errata843419; bool fixCortexA8; bool formatBinary = false; + bool fortranCommon; bool gcSections; bool gdbIndex; bool gnuHash = false; diff --git a/lld/ELF/Driver.cpp b/lld/ELF/Driver.cpp index 395b620..87ca200 100644 --- a/lld/ELF/Driver.cpp +++ b/lld/ELF/Driver.cpp @@ -974,6 +974,8 @@ static void readConfigs(opt::InputArgList &args) { !args.hasArg(OPT_relocatable); config->fixCortexA8 = args.hasArg(OPT_fix_cortex_a8) && !args.hasArg(OPT_relocatable); + config->fortranCommon = + args.hasFlag(OPT_fortran_common, OPT_no_fortran_common, true); config->gcSections = args.hasFlag(OPT_gc_sections, OPT_no_gc_sections, false); config->gnuUnique = args.hasFlag(OPT_gnu_unique, OPT_no_gnu_unique, true); config->gdbIndex = args.hasFlag(OPT_gdb_index, OPT_no_gdb_index, false); diff --git a/lld/ELF/InputFiles.cpp b/lld/ELF/InputFiles.cpp index f442609..3ff93c8 100644 --- a/lld/ELF/InputFiles.cpp +++ b/lld/ELF/InputFiles.cpp @@ -1237,6 +1237,88 @@ void ArchiveFile::fetch(const Archive::Symbol &sym) { parseFile(file); } +// The handling of tentative definitions (COMMON symbols) in archives is murky. +// A tentative defintion will be promoted to a global definition if there are no +// non-tentative definitions to dominate it. When we hold a tentative definition +// to a symbol and are inspecting archive memebers for inclusion there are 2 +// ways we can proceed: +// +// 1) Consider the tentative definition a 'real' definition (ie promotion from +// tentative to real definition has already happened) and not inspect +// archive members for Global/Weak definitions to replace the tentative +// definition. An archive member would only be included if it satisfies some +// other undefined symbol. This is the behavior Gold uses. +// +// 2) Consider the tentative definition as still undefined (ie the promotion to +// a real definiton happens only after all symbol resolution is done). +// The linker searches archive memebers for global or weak definitions to +// replace the tentative definition with. This is the behavior used by +// GNU ld. +// +// The second behavior is inherited from SysVR4, which based it on the FORTRAN +// COMMON BLOCK model. This behavior is needed for proper initalizations in old +// (pre F90) FORTRAN code that is packaged into an archive. +// +// The following functions search archive members for defintions to replace +// tentative defintions (implementing behavior 2). +static bool isBitcodeNonCommonDef(MemoryBufferRef mb, StringRef symName, + StringRef archiveName) { + IRSymtabFile symtabFile = check(readIRSymtab(mb)); + for (const irsymtab::Reader::SymbolRef &sym : + symtabFile.TheReader.symbols()) { + if (sym.isGlobal() && sym.getName() == symName) + return !sym.isUndefined() && !sym.isCommon(); + } + return false; +} + +template +static bool isNonCommonDef(MemoryBufferRef mb, StringRef symName, + StringRef archiveName) { + ObjFile *obj = make>(mb, archiveName); + StringRef stringtable = obj->getStringTable(); + + for (auto sym : obj->template getGlobalELFSyms()) { + Expected name = sym.getName(stringtable); + if (name && name.get() == symName) + return sym.isDefined() && !sym.isCommon(); + } + return false; +} + +static bool isNonCommonDef(MemoryBufferRef mb, StringRef symName, + StringRef archiveName) { + switch (getELFKind(mb, archiveName)) { + case ELF32LEKind: + return isNonCommonDef(mb, symName, archiveName); + case ELF32BEKind: + return isNonCommonDef(mb, symName, archiveName); + case ELF64LEKind: + return isNonCommonDef(mb, symName, archiveName); + case ELF64BEKind: + return isNonCommonDef(mb, symName, archiveName); + default: + llvm_unreachable("getELFKind"); + } +} + +bool ArchiveFile::shouldFetchForCommon(const Archive::Symbol &sym) { + Archive::Child c = + CHECK(sym.getMember(), toString(this) + + ": could not get the member for symbol " + + toELFString(sym)); + MemoryBufferRef mb = + CHECK(c.getMemoryBufferRef(), + toString(this) + + ": could not get the buffer for the member defining symbol " + + toELFString(sym)); + + if (isBitcode(mb)) + return isBitcodeNonCommonDef(mb, sym.getName(), getName()); + + return isNonCommonDef(mb, sym.getName(), getName()); +} + size_t ArchiveFile::getMemberCount() const { size_t count = 0; Error err = Error::success(); @@ -1771,6 +1853,13 @@ template void LazyObjFile::parse() { } } +bool LazyObjFile::shouldFetchForCommon(const StringRef &name) { + if (isBitcode(mb)) + return isBitcodeNonCommonDef(mb, name, archiveName); + + return isNonCommonDef(mb, name, archiveName); +} + std::string elf::replaceThinLTOSuffix(StringRef path) { StringRef suffix = config->thinLTOObjectSuffixReplace.first; StringRef repl = config->thinLTOObjectSuffixReplace.second; diff --git a/lld/ELF/InputFiles.h b/lld/ELF/InputFiles.h index e8fc689..8c89828 100644 --- a/lld/ELF/InputFiles.h +++ b/lld/ELF/InputFiles.h @@ -312,6 +312,10 @@ public: template void parse(); void fetch(); + // Check if a non-common symbol should be fetched to override a common + // definition. + bool shouldFetchForCommon(const StringRef &name); + bool fetched = false; private: @@ -331,6 +335,10 @@ public: // more than once.) void fetch(const Archive::Symbol &sym); + // Check if a non-common symbol should be fetched to override a common + // definition. + bool shouldFetchForCommon(const Archive::Symbol &sym); + size_t getMemberCount() const; size_t getFetchedMemberCount() const { return seen.size(); } diff --git a/lld/ELF/Options.td b/lld/ELF/Options.td index c3490b6..0254b54 100644 --- a/lld/ELF/Options.td +++ b/lld/ELF/Options.td @@ -64,6 +64,10 @@ defm optimize_bb_jumps: BB<"optimize-bb-jumps", "Remove direct jumps at the end to the next basic block", "Do not remove any direct jumps at the end to the next basic block (default)">; +defm fortran_common : BB<"fortran-common", + "Search archive members for definitions to override COMMON symbols (default)", + "Do not search archive members for definitions to override COMMON symbols">; + defm split_stack_adjust_size : Eq<"split-stack-adjust-size", "Specify adjustment to stack size when a split-stack function calls a " diff --git a/lld/ELF/Symbols.cpp b/lld/ELF/Symbols.cpp index a2153da..a54f68e 100644 --- a/lld/ELF/Symbols.cpp +++ b/lld/ELF/Symbols.cpp @@ -689,7 +689,33 @@ void Symbol::resolveDefined(const Defined &other) { other.value); } +template +static void replaceCommon(Symbol &oldSym, const LazyT &newSym) { + backwardReferences.erase(&oldSym); + oldSym.replace(newSym); + newSym.fetch(); +} + template void Symbol::resolveLazy(const LazyT &other) { + // For common objects, we want to look for global or weak definitions that + // should be fetched as the cannonical definition instead. + if (isCommon() && elf::config->fortranCommon) { + if (auto *laSym = dyn_cast(&other)) { + ArchiveFile *archive = cast(laSym->file); + const Archive::Symbol &archiveSym = laSym->sym; + if (archive->shouldFetchForCommon(archiveSym)) { + replaceCommon(*this, other); + return; + } + } else if (auto *loSym = dyn_cast(&other)) { + LazyObjFile *obj = cast(loSym->file); + if (obj->shouldFetchForCommon(loSym->getName())) { + replaceCommon(*this, other); + return; + } + } + } + if (!isUndefined()) { // See the comment in resolveUndefined(). if (isDefined()) diff --git a/lld/docs/ld.lld.1 b/lld/docs/ld.lld.1 index 538d09d6..1449b02 100644 --- a/lld/docs/ld.lld.1 +++ b/lld/docs/ld.lld.1 @@ -311,6 +311,8 @@ Do not demangle symbol names. Inhibit output of an .Li .interp section. +.It Fl -no-fortran-common +Do not search archive members for definitions to override COMMON symbols. .It Fl -no-gc-sections Disable garbage collection of unused sections. .It Fl -no-gnu-unique diff --git a/lld/test/ELF/common-archive-lookup.s b/lld/test/ELF/common-archive-lookup.s new file mode 100644 index 0000000..c33ff86 --- /dev/null +++ b/lld/test/ELF/common-archive-lookup.s @@ -0,0 +1,182 @@ +# REQUIRES: ppc + +# RUN: rm -rf %t.dir +# RUN: split-file %s %t.dir +# RUN: cd %t.dir + +## Object files. +# RUN: llvm-mc -triple=powerpc64le -filetype=obj ref.s -o %t1.o +# RUN: llvm-mc -triple=powerpc64le -filetype=obj refanddef.s -o %t2.o +# RUN: llvm-mc -triple=powerpc64le -filetype=obj def.s -o %tstrong_data_only.o +# RUN: llvm-mc -triple=powerpc64le -filetype=obj weak.s -o %tweak_data_only.o + +# RUN: llvm-mc -triple=powerpc64le -filetype=obj main.s -o %tmain.o + +## Object file archives. +# RUN: llvm-ar crs %t1.a %t1.o %tstrong_data_only.o +# RUN: llvm-ar crs %t2.a %t1.o %tweak_data_only.o +# RUN: llvm-ar crs %t3.a %t2.o %tstrong_data_only.o + +## Bitcode files. +# RUN: llvm-as -o %t1.bc commonblock.ll +# RUN: llvm-as -o %t2.bc blockdata.ll + +## Bitcode archive. +# RUN: llvm-ar crs %t4.a %t1.bc %t2.bc + +# RUN: ld.lld -o %t1 %tmain.o %t1.a +# RUN: llvm-objdump -D -j .data %t1 | FileCheck --check-prefix=TEST1 %s + +# RUN: ld.lld -o %t2 %tmain.o --start-lib %t1.o %tstrong_data_only.o --end-lib +# RUN: llvm-objdump -D -j .data %t2 | FileCheck --check-prefix=TEST1 %s + +# RUN: ld.lld -o %t3 %tmain.o %t2.a +# RUN: llvm-objdump -D -j .data %t3 | FileCheck --check-prefix=TEST1 %s + +# RUN: ld.lld -o %t4 %tmain.o --start-lib %t1.o %tweak_data_only.o --end-lib +# RUN: llvm-objdump -D -j .data %t4 | FileCheck --check-prefix=TEST1 %s + +# RUN: ld.lld -o %t5 %tmain.o %t3.a --print-map | FileCheck --check-prefix=MAP %s + +# RUN: ld.lld -o %t6 %tmain.o %t2.o %t1.a +# RUN: llvm-objdump -D -j .data %t6 | FileCheck --check-prefix=TEST2 %s + +# RUN: ld.lld -o %t7 %tmain.o %t2.o --start-lib %t1.o %tstrong_data_only.o --end-lib +# RUN: llvm-objdump -D -j .data %t7 | FileCheck --check-prefix=TEST2 %s + +# RUN: not ld.lld -o %t8 %tmain.o %t1.a %tstrong_data_only.o 2>&1 | \ +# RUN: FileCheck --check-prefix=ERR %s + +# RUN: not ld.lld -o %t9 %tmain.o --start-lib %t1.o %t2.o --end-lib %tstrong_data_only.o 2>&1 | \ +# RUN: FileCheck --check-prefix=ERR %s + +# ERR: ld.lld: error: duplicate symbol: block + +# RUN: ld.lld --no-fortran-common -o %t10 %tmain.o %t1.a +# RUN: llvm-readobj --syms %t10 | FileCheck --check-prefix=NFC %s + +# RUN: ld.lld --no-fortran-common -o %t11 %tmain.o --start-lib %t1.o %tstrong_data_only.o --end-lib +# RUN: llvm-readobj --syms %t11 | FileCheck --check-prefix=NFC %s + +# RUN: ld.lld -o - %tmain.o %t4.a --lto-emit-asm | FileCheck --check-prefix=ASM %s + +# RUN: ld.lld -o - %tmain.o --start-lib %t1.bc %t2.bc --end-lib --lto-emit-asm | \ +# RUN: FileCheck --check-prefix=ASM %s + +## Old FORTRAN that mixes use of COMMON blocks and BLOCK DATA requires that we +## search through archives for non-tentative definitions (from the BLOCK DATA) +## to replace the tentative definitions (from the COMMON block(s)). + +## Ensure we have used the initialized definition of 'block' instead of a +## common definition. +# TEST1-LABEL: Disassembly of section .data: +# TEST1: : +# TEST1-NEXT: ea 2e 44 54 +# TEST1-NEXT: fb 21 09 40 +# TEST1-NEXT: ... + + +# NFC: Name: block +# NFC-NEXT: Value: +# NFC-NEXT: Size: 40 +# NFC-NEXT: Binding: Global (0x1) +# NFC-NEXT: Type: Object (0x1) +# NFC-NEXT: Other: 0 +# NFC-NEXT: Section: .bss + +## Expecting the strong definition from the object file, and the defintions from +## the archive do not interfere. +# TEST2-LABEL: Disassembly of section .data: +# TEST2: : +# TEST2-NEXT: 03 57 14 8b +# TEST2-NEXT: 0a bf 05 40 +# TEST2-NEXT: ... + +# MAP: 28 8 {{.*}}tmp3.a(common-archive-lookup.s.tmp2.o):(.data) +# MAP-NEXT: 28 1 block + +# ASM: .type block,@object +# ASM: block: +# ASM-NEXT: .long 5 +# ASM: .size block, 20 + +#--- ref.s + .text + .abiversion 2 + .global bar + .type bar,@function +bar: + addis 4, 2, block@toc@ha + addi 4, 4, block@toc@l + +## Tentative definition of 'block'. + .comm block,40,8 + +#--- refanddef.s +## An alternate strong definition of block, in the same file as +## a different referenced symbol. + .text + .abiversion 2 + .global bar + .type bar,@function +bar: + addis 4, 2, block@toc@ha + addi 4, 4, block@toc@l + + .data + .type block,@object + .global block + .p2align 3 +block: + .quad 0x4005bf0a8b145703 # double 2.7182818284589998 + .space 32 + .size block, 40 + +#--- def.s +## Strong definition of 'block'. + .data + .type block,@object + .global block + .p2align 3 +block: + .quad 0x400921fb54442eea # double 3.1415926535900001 + .space 32 + .size block, 40 + +#--- weak.s +## Weak definition of `block`. + .data + .type block,@object + .weak block + .p2align 3 +block: + .quad 0x400921fb54442eea # double 3.1415926535900001 + .space 32 + .size block, 40 + +#--- main.s + .global _start +_start: + bl bar + blr + + +#--- blockdata.ll +target datalayout = "e-m:e-i64:64-n32:64-v256:256:256-v512:512:512" +target triple = "powerpc64le-unknown-linux-gnu" + +@block = dso_local local_unnamed_addr global [5 x i32] [i32 5, i32 0, i32 0, i32 0, i32 0], align 4 + +#--- commonblock.ll +target datalayout = "e-m:e-i64:64-n32:64-v256:256:256-v512:512:512" +target triple = "powerpc64le-unknown-linux-gnu" + +@block = common dso_local local_unnamed_addr global [5 x i32] zeroinitializer, align 4 + +define dso_local i32 @bar(i32 signext %i) local_unnamed_addr { +entry: + %idxprom = sext i32 %i to i64 + %arrayidx = getelementptr inbounds [5 x i32], [5 x i32]* @block, i64 0, i64 %idxprom + %0 = load i32, i32* %arrayidx, align 8 + ret i32 %0 +} diff --git a/lld/test/ELF/warn-backrefs.s b/lld/test/ELF/warn-backrefs.s index 6912955..9a43e97 100644 --- a/lld/test/ELF/warn-backrefs.s +++ b/lld/test/ELF/warn-backrefs.s @@ -66,6 +66,18 @@ # RUN: echo '.weak foo; foo:' | llvm-mc -filetype=obj -triple=x86_64 - -o %tweak.o # RUN: ld.lld --fatal-warnings --warn-backrefs --start-lib %tweak.o --end-lib %t1.o %t2.o -o /dev/null +## Check common symbols. A common sym might later be replaced by a non-common definition. +# RUN: echo '.comm obj, 4' | llvm-mc -filetype=obj -triple=x86_64 -o %tcomm.o +# RUN: echo '.type obj,@object; .data; .globl obj; .p2align 2; obj: .long 55; .size obj, 4' | llvm-mc -filetype=obj -triple=x86_64 -o %tstrong.o +# RUN: echo '.globl foo; foo: movl obj(%rip), %eax' | llvm-mc -triple=x86_64 -filetype=obj -o %t5.o +# RUN: llvm-ar rcs %tcomm.a %tcomm.o +# RUN: llvm-ar rcs %tstrong.a %tstrong.o +# RUN: ld.lld --warn-backrefs %tcomm.a %t1.o %t5.o 2>&1 -o /dev/null | FileCheck --check-prefix=COMM %s +# RUN: ld.lld --fatal-warnings --warn-backrefs %tcomm.a %t1.o %t5.o %tstrong.a 2>&1 -o /dev/null +# RUN: ld.lld --warn-backrefs --no-fortran-common %tcomm.a %t1.o %t5.o %tstrong.a 2>&1 -o /dev/null | FileCheck --check-prefix=COMM %s + +# COMM: ld.lld: warning: backward reference detected: obj in {{.*}}5.o refers to {{.*}}comm.a + ## If a lazy definition appears after the backward reference, don't warn. ## A traditional Unix linker will resolve the reference to the later definition. # RUN: ld.lld --fatal-warnings --warn-backrefs %t2.a %t1.o %t2.a -o /dev/null -- 2.7.4