From c30e6447c0225f675773d07f2b6d4e3c2b962155 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Mon, 14 Mar 2022 14:13:41 -0700 Subject: [PATCH] [ELF] Move section assignment from initializeSymbols to postParse https://discourse.llvm.org/t/parallel-input-file-parsing/60164 initializeSymbols currently sets Defined::section and handles non-prevailing COMDAT groups. Move the code to the parallel postParse to reduce work from the single-threading code path and make parallel section initialization infeasible. Postpone reporting duplicate symbol errors so that the messages have the section information. (`Defined::section` is assigned in postParse and another thread may not have the information). * duplicated-synthetic-sym.s: BinaryFile duplicate definition (very rare) now has no section information * comdat-binding: `%t/w.o %t/g.o` leads to an undesired undefined symbol. This is not ideal but we report a diagnostic to inform that this is unsupported. (See release note) * comdat-discarded-lazy.s: %tdef.o is unextracted. The new behavior (discarded section error) makes more sense Depends on D120640 Reviewed By: peter.smith Differential Revision: https://reviews.llvm.org/D120626 --- lld/ELF/Config.h | 19 ++++++++ lld/ELF/Driver.cpp | 14 ++++++ lld/ELF/InputFiles.cpp | 77 ++++++++++++++---------------- lld/ELF/Relocations.cpp | 9 +++- lld/ELF/Symbols.cpp | 13 ++++- lld/ELF/Symbols.h | 3 +- lld/docs/ReleaseNotes.rst | 2 + lld/test/ELF/comdat-binding.s | 15 +++++- lld/test/ELF/comdat-discarded-error.s | 1 + lld/test/ELF/comdat-discarded-lazy.s | 10 ++-- lld/test/ELF/duplicated-synthetic-sym.s | 4 +- lld/test/ELF/exclude-discarded-error2.s | 12 ++--- lld/test/ELF/lto/comdat-mixed-archive.test | 2 +- 13 files changed, 120 insertions(+), 61 deletions(-) diff --git a/lld/ELF/Config.h b/lld/ELF/Config.h index 7b7265c..82aa217 100644 --- a/lld/ELF/Config.h +++ b/lld/ELF/Config.h @@ -30,6 +30,7 @@ namespace elf { class InputFile; class InputSectionBase; +class Symbol; enum ELFKind : uint8_t { ELFNoneKind, @@ -349,6 +350,24 @@ struct Configuration { // The only instance of Configuration struct. extern std::unique_ptr config; +struct DuplicateSymbol { + const Symbol *sym; + const InputFile *file; + InputSectionBase *section; + uint64_t value; +}; + +struct Ctx { + // Duplicate symbol candidates. + SmallVector duplicates; + // Symbols in a non-prevailing COMDAT group which should be changed to an + // Undefined. + SmallVector, 0> nonPrevailingSyms; +}; + +// The only instance of Ctx struct. +extern std::unique_ptr ctx; + // The first two elements of versionDefinitions represent VER_NDX_LOCAL and // VER_NDX_GLOBAL. This helper returns other elements. static inline ArrayRef namedVersionDefs() { diff --git a/lld/ELF/Driver.cpp b/lld/ELF/Driver.cpp index 9f5d550..15434cc 100644 --- a/lld/ELF/Driver.cpp +++ b/lld/ELF/Driver.cpp @@ -74,6 +74,7 @@ using namespace lld; using namespace lld::elf; std::unique_ptr elf::config; +std::unique_ptr elf::ctx; std::unique_ptr elf::driver; static void setConfigs(opt::InputArgList &args); @@ -117,6 +118,7 @@ bool elf::link(ArrayRef args, llvm::raw_ostream &stdoutOS, "--error-limit=0 to see all errors)"; config = std::make_unique(); + elf::ctx = std::make_unique(); driver = std::make_unique(); script = std::make_unique(); symtab = std::make_unique(); @@ -2486,6 +2488,16 @@ void LinkerDriver::link(opt::InputArgList &args) { parallelForEach(objectFiles, initializeLocalSymbols); parallelForEach(objectFiles, postParseObjectFile); parallelForEach(bitcodeFiles, [](BitcodeFile *file) { file->postParse(); }); + for (auto &it : ctx->nonPrevailingSyms) { + Symbol &sym = *it.first; + sym.replace(Undefined{sym.file, sym.getName(), sym.binding, sym.stOther, + sym.type, it.second}); + cast(sym).nonPrevailing = true; + } + ctx->nonPrevailingSyms.clear(); + for (const DuplicateSymbol &d : ctx->duplicates) + reportDuplicate(*d.sym, d.file, d.section, d.value); + ctx->duplicates.clear(); // Return if there were name resolution errors. if (errorCount()) @@ -2558,6 +2570,8 @@ void LinkerDriver::link(opt::InputArgList &args) { auto newObjectFiles = makeArrayRef(objectFiles).slice(numObjsBeforeLTO); parallelForEach(newObjectFiles, initializeLocalSymbols); parallelForEach(newObjectFiles, postParseObjectFile); + for (const DuplicateSymbol &d : ctx->duplicates) + reportDuplicate(*d.sym, d.file, d.section, d.value); // Handle --exclude-libs again because lto.tmp may reference additional // libcalls symbols defined in an excluded archive. This may override diff --git a/lld/ELF/InputFiles.cpp b/lld/ELF/InputFiles.cpp index 0df7552..778258f 100644 --- a/lld/ELF/InputFiles.cpp +++ b/lld/ELF/InputFiles.cpp @@ -1016,7 +1016,6 @@ InputSectionBase *ObjFile::createInputSection(uint32_t idx, // its corresponding ELF symbol table. template void ObjFile::initializeSymbols(const object::ELFFile &obj) { - ArrayRef sections(this->sections); SymbolTable &symtab = *elf::symtab; ArrayRef eSyms = this->getELFSyms(); @@ -1044,13 +1043,6 @@ void ObjFile::initializeSymbols(const object::ELFFile &obj) { continue; } - if (LLVM_UNLIKELY(secIdx == SHN_XINDEX)) - secIdx = check(getExtendedSymbolTableIndex(eSym, i, shndxTable)); - else if (secIdx >= SHN_LORESERVE) - secIdx = 0; - if (LLVM_UNLIKELY(secIdx >= sections.size())) - fatal(toString(this) + ": invalid section index: " + Twine(secIdx)); - InputSectionBase *sec = sections[secIdx]; uint8_t stOther = eSym.st_other; uint8_t type = eSym.getType(); uint64_t value = eSym.st_value; @@ -1068,30 +1060,11 @@ void ObjFile::initializeSymbols(const object::ELFFile &obj) { continue; } - // If a defined symbol is in a discarded section, handle it as if it - // were an undefined symbol. Such symbol doesn't comply with the - // standard, but in practice, a .eh_frame often directly refer - // COMDAT member sections, and if a comdat group is discarded, some - // defined symbol in a .eh_frame becomes dangling symbols. - if (sec == &InputSection::discarded) { - Undefined und{this, StringRef(), binding, stOther, type, secIdx}; - // !LazyObjFile::lazy indicates that the file containing this object has - // not finished processing, i.e. this symbol is a result of a lazy symbol - // extract. We should demote the lazy symbol to an Undefined so that any - // relocations outside of the group to it will trigger a discarded section - // error. - if (sym->symbolKind == Symbol::LazyObjectKind && !sym->file->lazy) - sym->replace(und); - else - sym->resolve(und); - continue; - } - - // Handle global defined symbols. + // Handle global defined symbols. Defined::section will be set in postParse. if (binding == STB_GLOBAL || binding == STB_WEAK || binding == STB_GNU_UNIQUE) { - sym->resolve( - Defined{this, StringRef(), binding, stOther, type, value, size, sec}); + sym->resolve(Defined{this, StringRef(), binding, stOther, type, value, + size, nullptr}); continue; } @@ -1156,10 +1129,12 @@ template void ObjFile::initializeLocalSymbols() { // Called after all ObjFile::parse is called for all ObjFiles. This checks // duplicate symbols and may do symbol property merge in the future. template void ObjFile::postParse() { + static std::mutex mu; ArrayRef eSyms = this->getELFSyms(); for (size_t i = firstGlobal, end = eSyms.size(); i != end; ++i) { const Elf_Sym &eSym = eSyms[i]; - const Symbol &sym = *symbols[i]; + Symbol &sym = *symbols[i]; + uint32_t secIdx = eSym.st_shndx; // st_value of STT_TLS represents the assigned offset, not the actual // address which is used by STT_FUNC and STT_OBJECT. STT_TLS symbols can @@ -1170,23 +1145,41 @@ template void ObjFile::postParse() { errorOrWarn("TLS attribute mismatch: " + toString(sym) + "\n>>> in " + toString(sym.file) + "\n>>> in " + toString(this)); - // !sym.file allows a symbol assignment redefines a symbol without an error. - if (sym.file == this || !sym.file || !sym.isDefined() || - eSym.st_shndx == SHN_UNDEF || eSym.st_shndx == SHN_COMMON || - eSym.getBinding() == STB_WEAK) + // Handle non-COMMON defined symbol below. !sym.file allows a symbol + // assignment to redefine a symbol without an error. + if (!sym.file || !sym.isDefined() || secIdx == SHN_UNDEF || + secIdx == SHN_COMMON) continue; - uint32_t secIdx = eSym.st_shndx; + if (LLVM_UNLIKELY(secIdx == SHN_XINDEX)) - secIdx = cantFail(getExtendedSymbolTableIndex(eSym, i, shndxTable)); + secIdx = check(getExtendedSymbolTableIndex(eSym, i, shndxTable)); else if (secIdx >= SHN_LORESERVE) secIdx = 0; - if (sections[secIdx] == &InputSection::discarded) + if (LLVM_UNLIKELY(secIdx >= sections.size())) + fatal(toString(this) + ": invalid section index: " + Twine(secIdx)); + InputSectionBase *sec = sections[secIdx]; + if (sec == &InputSection::discarded) { + if (sym.traced) { + printTraceSymbol(Undefined{this, sym.getName(), sym.binding, + sym.stOther, sym.type, secIdx}, + sym.getName()); + } + if (sym.file == this) { + std::lock_guard lock(mu); + ctx->nonPrevailingSyms.emplace_back(&sym, secIdx); + } + continue; + } + + if (sym.file == this) { + cast(sym).section = sec; continue; - // Allow absolute symbols with the same value for GNU ld compatibility. - if (!cast(sym).section && !sections[secIdx] && - cast(sym).value == eSym.st_value) + } + + if (eSym.getBinding() == STB_WEAK) continue; - reportDuplicate(sym, this, sections[secIdx], eSym.st_value); + std::lock_guard lock(mu); + ctx->duplicates.push_back({&sym, this, sec, eSym.st_value}); } } diff --git a/lld/ELF/Relocations.cpp b/lld/ELF/Relocations.cpp index fa45475..ed1f216 100644 --- a/lld/ELF/Relocations.cpp +++ b/lld/ELF/Relocations.cpp @@ -545,9 +545,16 @@ static std::string maybeReportDiscarded(Undefined &sym) { // If the discarded section is a COMDAT. StringRef signature = file->getShtGroupSignature(objSections, elfSec); if (const InputFile *prevailing = - symtab->comdatGroups.lookup(CachedHashStringRef(signature))) + symtab->comdatGroups.lookup(CachedHashStringRef(signature))) { msg += "\n>>> section group signature: " + signature.str() + "\n>>> prevailing definition is in " + toString(prevailing); + if (sym.nonPrevailing) { + msg += "\n>>> or the symbol in the prevailing group had STB_WEAK " + "binding and the symbol in a non-prevailing group had STB_GLOBAL " + "binding. Mixing groups with STB_WEAK and STB_GLOBAL binding " + "signature is not supported"; + } + } return msg; } diff --git a/lld/ELF/Symbols.cpp b/lld/ELF/Symbols.cpp index e8ead0d..bd78de7 100644 --- a/lld/ELF/Symbols.cpp +++ b/lld/ELF/Symbols.cpp @@ -537,11 +537,20 @@ bool Symbol::shouldReplace(const Defined &other) const { return !isGlobal() && other.isGlobal(); } -void elf::reportDuplicate(const Symbol &sym, InputFile *newFile, +void elf::reportDuplicate(const Symbol &sym, const InputFile *newFile, InputSectionBase *errSec, uint64_t errOffset) { if (config->allowMultipleDefinition) return; - const Defined *d = cast(&sym); + // A definition in a COMDAT may be reported as duplicate with a definition + // relative to .gnu.linkonce.t.__x86.get_pc_thunk.bx. + // .gnu.linkonce.t.__x86.get_pc_thunk.bx will be discarded, so there is + // actually no duplicate. + const Defined *d = dyn_cast(&sym); + if (!d) + return; + // Allow absolute symbols with the same value for GNU ld compatibility. + if (!d->section && !errSec && errOffset && d->value == errOffset) + return; if (!d->section || !errSec) { error("duplicate symbol: " + toString(sym) + "\n>>> defined in " + toString(sym.file) + "\n>>> defined in " + toString(newFile)); diff --git a/lld/ELF/Symbols.h b/lld/ELF/Symbols.h index cd0e54d..766c89b 100644 --- a/lld/ELF/Symbols.h +++ b/lld/ELF/Symbols.h @@ -377,6 +377,7 @@ public: // The section index if in a discarded section, 0 otherwise. uint32_t discardedSecIdx; + bool nonPrevailing = false; }; class SharedSymbol : public Symbol { @@ -558,7 +559,7 @@ template Defined *makeDefined(T &&...args) { Defined(std::forward(args)...); } -void reportDuplicate(const Symbol &sym, InputFile *newFile, +void reportDuplicate(const Symbol &sym, const InputFile *newFile, InputSectionBase *errSec, uint64_t errOffset); void maybeWarnUnorderableSymbol(const Symbol *sym); bool computeIsPreemptible(const Symbol &sym); diff --git a/lld/docs/ReleaseNotes.rst b/lld/docs/ReleaseNotes.rst index 5878cdd..dc74b8a 100644 --- a/lld/docs/ReleaseNotes.rst +++ b/lld/docs/ReleaseNotes.rst @@ -35,6 +35,8 @@ Breaking changes * The GNU ld incompatible ``--no-define-common`` has been removed. * The obscure ``-dc``/``-dp`` options have been removed. * ``-d`` is now ignored. +* If a prevailing COMDAT group defines STB_WEAK symbol, having a STB_GLOBAL symbol in a non-prevailing group is now rejected with a diagnostic. + (`D120626 `_) COFF Improvements ----------------- diff --git a/lld/test/ELF/comdat-binding.s b/lld/test/ELF/comdat-binding.s index 6c6fd89..139a1db 100644 --- a/lld/test/ELF/comdat-binding.s +++ b/lld/test/ELF/comdat-binding.s @@ -8,11 +8,22 @@ # RUN: ld.lld -e 0 %t/u.o %t/w.o -o %t/u # RUN: llvm-readelf -s %t/u | FileCheck %s --check-prefix=UNIQUE -# RUN: ld.lld -e 0 %t/w.o %t/g.o -o %t/w -# RUN: llvm-readelf -s %t/w | FileCheck %s --check-prefix=WEAK +## We prefer STB_GLOBAL definition, then changing it to undefined since it is in +## in a non-prevailing COMDAT. Ideally this should be defined, but our behavior +## is fine because valid input cannot form this case. +# RUN: ld.lld -e 0 %t/w.o %t/g.o -o %t/und --noinhibit-exec 2>&1 | FileCheck %s --check-prefix=WARN +# RUN: llvm-readelf -s %t/und | FileCheck %s --check-prefix=UND # WEAK: NOTYPE WEAK DEFAULT [[#]] _ZZ1fvE1x # UNIQUE: OBJECT UNIQUE DEFAULT [[#]] _ZZ1fvE1x +# UND: NOTYPE GLOBAL DEFAULT UND _ZZ1fvE1x + +# WARN: warning: relocation refers to a symbol in a discarded section: f()::x +# WARN-NEXT: >>> defined in {{.*}}g.o +# WARN-NEXT: >>> section group signature: _ZZ1fvE1x +# WARN-NEXT: >>> prevailing definition is in {{.*}}w.o +# WARN-NEXT: >>> or the symbol in the prevailing group had STB_WEAK binding and the symbol in a non-prevailing group had STB_GLOBAL binding. Mixing groups with STB_WEAK and STB_GLOBAL binding signature is not supported +# WARN-NEXT: >>> referenced by {{.*}}g.o:(.text+0x3) #--- g.s movq _ZZ1fvE1x@gotpcrel(%rip), %rax diff --git a/lld/test/ELF/comdat-discarded-error.s b/lld/test/ELF/comdat-discarded-error.s index b818b15..dec927d 100644 --- a/lld/test/ELF/comdat-discarded-error.s +++ b/lld/test/ELF/comdat-discarded-error.s @@ -11,6 +11,7 @@ # CHECK-NEXT: >>> defined in {{.*}}3.o # CHECK-NEXT: >>> section group signature: foo # CHECK-NEXT: >>> prevailing definition is in {{.*}}2.o +# CHECK-NEXT: >>> or the symbol in the prevailing group {{.*}} # CHECK-NEXT: >>> referenced by {{.*}}1.o:(.text+0x1) # CHECK: error: relocation refers to a discarded section: .text.foo diff --git a/lld/test/ELF/comdat-discarded-lazy.s b/lld/test/ELF/comdat-discarded-lazy.s index e19e4b2..23b922a 100644 --- a/lld/test/ELF/comdat-discarded-lazy.s +++ b/lld/test/ELF/comdat-discarded-lazy.s @@ -26,6 +26,7 @@ # AA-NEXT: >>> defined in {{.*}}aa.o # AA-NEXT: >>> section group signature: foo # AA-NEXT: >>> prevailing definition is in {{.*}}1.o +# AA-NEXT: >>> or the symbol in the prevailing group {{.*}} # AA-NEXT: >>> referenced by {{.*}}aa.o:(.text+0x1) ## Test the case when the symbol causing a "discarded section" is ordered @@ -45,14 +46,15 @@ # ZZ-NEXT: >>> defined in {{.*}}zz.o # ZZ-NEXT: >>> section group signature: foo # ZZ-NEXT: >>> prevailing definition is in {{.*}}1.o +# ZZ-NEXT: >>> or the symbol in the prevailing group {{.*}} # ZZ-NEXT: >>> referenced by {{.*}}zz.o:(.text+0x1) -## Don't error if the symbol which would cause "discarded section" -## was inserted before %tzz.o +## The definition in %tdef.o is outside a group. Currently we give an error +## because %tdef.o is not extracted. # RUN: echo '.globl zz; zz:' | llvm-mc -filetype=obj -triple=x86_64 - -o %tdef.o -# RUN: ld.lld %t.o --start-lib %t1.o %tdef.o %tzz.o --end-lib -o /dev/null +# RUN: not ld.lld %t.o --start-lib %t1.o %tdef.o %tzz.o --end-lib -o /dev/null 2>&1 | FileCheck --check-prefix=ZZ %s # RUN: rm -f %tdef.a && llvm-ar rc %tdef.a %tdef.o -# RUN: ld.lld %t.o --start-lib %t1.o %tdef.a %tzz.o --end-lib -o /dev/null +# RUN: not ld.lld %t.o --start-lib %t1.o %tdef.a %tzz.o --end-lib -o /dev/null 2>&1 | FileCheck --check-prefix=ZZ %s .globl _start _start: diff --git a/lld/test/ELF/duplicated-synthetic-sym.s b/lld/test/ELF/duplicated-synthetic-sym.s index 64e7161..0bf7800 100644 --- a/lld/test/ELF/duplicated-synthetic-sym.s +++ b/lld/test/ELF/duplicated-synthetic-sym.s @@ -9,8 +9,8 @@ // RUN: not ld.lld %t.o --format binary file.bin -o /dev/null 2>&1 | FileCheck %s // CHECK: duplicate symbol: _binary_file_bin_start -// CHECK-NEXT: defined at {{.*}}.o:(.text+0x0) -// CHECK-NEXT: defined at file.bin:(.data+0x0) +// CHECK-NEXT: defined in {{.*}}.o +// CHECK-NEXT: defined in .globl _binary_file_bin_start _binary_file_bin_start: diff --git a/lld/test/ELF/exclude-discarded-error2.s b/lld/test/ELF/exclude-discarded-error2.s index 5474cfe..cda7bc7 100644 --- a/lld/test/ELF/exclude-discarded-error2.s +++ b/lld/test/ELF/exclude-discarded-error2.s @@ -1,14 +1,14 @@ # REQUIRES: x86 # RUN: llvm-mc -filetype=obj -triple=x86_64 %s -o %t.o -# RUN: echo '.section .foo,"ae"; .weak foo; foo:' | \ +# RUN: echo '.section .foo,"aG",@progbits,zz,comdat; .weak foo; foo:' | \ # RUN: llvm-mc -filetype=obj -triple=x86_64 - -o %t1.o -# RUN: not ld.lld %t.o %t1.o -o /dev/null 2>&1 | FileCheck %s +# RUN: ld.lld %t.o %t1.o -o %t +# RUN: llvm-readelf -s %t | FileCheck %s -# Because foo defined in %t1.o is weak, it does not override global undefined -# in %t.o -# CHECK-NOT: discarded section -# CHECK: undefined symbol: foo +# CHECK: NOTYPE WEAK DEFAULT UND foo .globl _start _start: jmp foo + +.section .foo,"aG",@progbits,zz,comdat diff --git a/lld/test/ELF/lto/comdat-mixed-archive.test b/lld/test/ELF/lto/comdat-mixed-archive.test index ee644a8..d7ea4eb 100644 --- a/lld/test/ELF/lto/comdat-mixed-archive.test +++ b/lld/test/ELF/lto/comdat-mixed-archive.test @@ -31,8 +31,8 @@ TRACE: lib.a(obj.o): lazy definition of foo TRACE-NEXT: lib.a(obj.o): lazy definition of bar TRACE-NEXT: lib.a(bc.bc): definition of foo TRACE-NEXT: lib.a(bc.bc): reference to bar -TRACE-NEXT: lib.a(obj.o): reference to foo TRACE-NEXT: lib.a(obj.o): definition of bar +TRACE-NEXT: lib.a(obj.o): reference to foo TRACE-NEXT: : reference to foo ;; The definition of "foo" is visible outside the LTO result. TRACE-NEXT: lto.tmp: definition of foo -- 2.7.4