From ba890da2878299dc82b104c06f067e45162d880f Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Tue, 9 Jun 2020 11:25:55 -0700 Subject: [PATCH] [ELF] Demote lazy symbols relative to a discarded section to Undefined Fixes PR45594. In `ObjFile::initializeSymbols()`, for a defined symbol relative to a discarded section (due to section group rules), it may have been inserted as a lazy symbol. We need to demote it to an Undefined to enable the `discarded section` error happened in a later pass. Add `LazyObjFile::fetched` (if true) and `ArchiveFile::parsed` (if false) to represent that there is an ongoing lazy symbol fetch and we should replace the current lazy symbol with an Undefined, instead of calling `Symbol::resolve` (`Symbol::resolve` should be called if the lazy symbol was added by an unrelated archive/lazy object). As a side result, one small issue in start-lib-comdat.s is now fixed. The hack motivating D51892 will be unsupported: if `.gnu.linkonce.t.__i686.get_pc_thunk.bx` in an archive is referenced by another section, this will likely be errored unless the function is also defined in a regular object file. (Bringing back rL330869 would error `undefined symbol` instead of the more relevant `discarded section`.) Note, glibc i386's crti.o still works (PR31215), because `.gnu.linkonce.t.__x86.get_pc_thunk.bx` is in crti.o (one of the first regular object files in a linker command line). Reviewed By: psmith Differential Revision: https://reviews.llvm.org/D79300 --- lld/ELF/InputFiles.cpp | 25 ++++++++++++--- lld/ELF/InputFiles.h | 4 +++ lld/test/ELF/comdat-discarded-lazy.s | 60 ++++++++++++++++++++++++++++++++++++ lld/test/ELF/i386-linkonce.s | 4 ++- lld/test/ELF/start-lib-comdat.s | 2 +- 5 files changed, 88 insertions(+), 7 deletions(-) create mode 100644 lld/test/ELF/comdat-discarded-lazy.s diff --git a/lld/ELF/InputFiles.cpp b/lld/ELF/InputFiles.cpp index c451aee..5bbd6f0 100644 --- a/lld/ELF/InputFiles.cpp +++ b/lld/ELF/InputFiles.cpp @@ -1117,8 +1117,20 @@ template void ObjFile::initializeSymbols() { // COMDAT member sections, and if a comdat group is discarded, some // defined symbol in a .eh_frame becomes dangling symbols. if (sec == &InputSection::discarded) { - this->symbols[i]->resolve( - Undefined{this, name, binding, stOther, type, secIdx}); + Undefined und{this, name, binding, stOther, type, secIdx}; + Symbol *sym = this->symbols[i]; + // !ArchiveFile::parsed or LazyObjFile::fetched means that the file + // containing this object has not finished processing, i.e. this symbol is + // a result of a lazy symbol fetch. We should demote the lazy symbol to an + // Undefined so that any relocations outside of the group to it will + // trigger a discarded section error. + if ((sym->symbolKind == Symbol::LazyArchiveKind && + !cast(sym->file)->parsed) || + (sym->symbolKind == Symbol::LazyObjectKind && + cast(sym->file)->fetched)) + sym->replace(und); + else + sym->resolve(und); continue; } @@ -1141,6 +1153,10 @@ ArchiveFile::ArchiveFile(std::unique_ptr &&file) void ArchiveFile::parse() { for (const Archive::Symbol &sym : file->symbols()) symtab->addSymbol(LazyArchive{*this, sym}); + + // Inform a future invocation of ObjFile::initializeSymbols() that this + // archive has been processed. + parsed = true; } // Returns a buffer pointing to a member file containing a given symbol. @@ -1615,14 +1631,13 @@ InputFile *elf::createObjectFile(MemoryBufferRef mb, StringRef archiveName, } void LazyObjFile::fetch() { - if (mb.getBuffer().empty()) + if (fetched) return; + fetched = true; InputFile *file = createObjectFile(mb, archiveName, offsetInArchive); file->groupId = groupId; - mb = {}; - // Copy symbol vector so that the new InputFile doesn't have to // insert the same defined symbols to the symbol table again. file->symbols = std::move(symbols); diff --git a/lld/ELF/InputFiles.h b/lld/ELF/InputFiles.h index 51882e0..7af85e4 100644 --- a/lld/ELF/InputFiles.h +++ b/lld/ELF/InputFiles.h @@ -307,6 +307,8 @@ public: template void parse(); void fetch(); + bool fetched = false; + private: uint64_t offsetInArchive; }; @@ -327,6 +329,8 @@ public: size_t getMemberCount() const; size_t getFetchedMemberCount() const { return seen.size(); } + bool parsed = false; + private: std::unique_ptr file; llvm::DenseSet seen; diff --git a/lld/test/ELF/comdat-discarded-lazy.s b/lld/test/ELF/comdat-discarded-lazy.s new file mode 100644 index 0000000..8ee1515 --- /dev/null +++ b/lld/test/ELF/comdat-discarded-lazy.s @@ -0,0 +1,60 @@ +# REQUIRES: x86 +## Test that lazy symbols in a section group can be demoted to Undefined, +## so that we can report a "discarded section" error. + +# RUN: llvm-mc -filetype=obj -triple=x86_64 %s -o %t.o +# RUN: echo '.globl f1, foo; f1: call foo; \ +# RUN: .section .text.foo,"axG",@progbits,foo,comdat; foo:' | \ +# RUN: llvm-mc -filetype=obj -triple=x86_64 - -o %t1.o + +## Test the case when the symbol causing a "discarded section" is ordered +## *before* the symbol fetching the lazy object. +## The test relies on the symbol table order of llvm-mc (lexical), which will +## need adjustment if llvm-mc changes its behavior. +# RUN: echo '.globl f2, aa; f2: call aa; \ +# RUN: .section .text.foo,"axG",@progbits,foo,comdat; aa:' | \ +# RUN: llvm-mc -filetype=obj -triple=x86_64 - -o %taa.o +# RUN: llvm-nm -p %taa.o | FileCheck --check-prefix=AA-NM %s +# RUN: not ld.lld %t.o --start-lib %t1.o %taa.o --end-lib -o /dev/null 2>&1 | FileCheck --check-prefix=AA %s +# RUN: rm -f %taa.a && llvm-ar rc %taa.a %taa.o +# RUN: not ld.lld %t.o --start-lib %t1.o --end-lib %taa.a -o /dev/null 2>&1 | FileCheck --check-prefix=AA %s + +# AA-NM: aa +# AA-NM: f2 + +# AA: error: relocation refers to a symbol in a discarded section: aa +# AA-NEXT: >>> defined in {{.*}}aa.o +# AA-NEXT: >>> section group signature: foo +# AA-NEXT: >>> prevailing definition is in {{.*}}1.o +# AA-NEXT: >>> referenced by {{.*}}aa.o:(.text+0x1) + +## Test the case when the symbol causing a "discarded section" is ordered +## *after* the symbol fetching the lazy object. +# RUN: echo '.globl f2, zz; f2: call zz; \ +# RUN: .section .text.foo,"axG",@progbits,foo,comdat; zz:' | \ +# RUN: llvm-mc -filetype=obj -triple=x86_64 - -o %tzz.o +# RUN: llvm-nm -p %tzz.o | FileCheck --check-prefix=ZZ-NM %s +# RUN: not ld.lld %t.o --start-lib %t1.o %tzz.o --end-lib -o /dev/null 2>&1 | FileCheck --check-prefix=ZZ %s +# RUN: rm -f %tzz.a && llvm-ar rc %tzz.a %tzz.o +# RUN: not ld.lld %t.o --start-lib %t1.o --end-lib %tzz.a -o /dev/null 2>&1 | FileCheck --check-prefix=ZZ %s + +# ZZ-NM: f2 +# ZZ-NM: zz + +# ZZ: error: relocation refers to a symbol in a discarded section: zz +# ZZ-NEXT: >>> defined in {{.*}}zz.o +# ZZ-NEXT: >>> section group signature: foo +# ZZ-NEXT: >>> prevailing definition is in {{.*}}1.o +# ZZ-NEXT: >>> referenced by {{.*}}zz.o:(.text+0x1) + +## Don't error if the symbol which would cause "discarded section" +## was inserted before %tzz.o +# RUN: echo '.globl zz; zz:' | llvm-mc -filetype=obj -triple=x86_64 - -o %tdef.o +# RUN: ld.lld %t.o --start-lib %t1.o %tdef.o %tzz.o --end-lib -o /dev/null +# RUN: rm -f %tdef.a && llvm-ar rc %tdef.a %tdef.o +# RUN: ld.lld %t.o --start-lib %t1.o %tdef.a %tzz.o --end-lib -o /dev/null + +.globl _start +_start: + call f1 + call f2 diff --git a/lld/test/ELF/i386-linkonce.s b/lld/test/ELF/i386-linkonce.s index c06b042..f7da0ae 100644 --- a/lld/test/ELF/i386-linkonce.s +++ b/lld/test/ELF/i386-linkonce.s @@ -2,7 +2,9 @@ // RUN: llvm-mc -filetype=obj -triple=i386-linux-gnu %s -o %t.o // RUN: llvm-mc -filetype=obj -triple=i386-linux-gnu %p/Inputs/i386-linkonce.s -o %t2.o // RUN: llvm-ar rcs %t2.a %t2.o -// RUN: ld.lld %t.o %t2.a -o %t +// RUN: not ld.lld %t.o %t2.a -o /dev/null 2>&1 | FileCheck %s + +// CHECK: error: relocation refers to a symbol in a discarded section: __i686.get_pc_thunk.bx .globl _start _start: diff --git a/lld/test/ELF/start-lib-comdat.s b/lld/test/ELF/start-lib-comdat.s index 34c9934..996ddb4 100644 --- a/lld/test/ELF/start-lib-comdat.s +++ b/lld/test/ELF/start-lib-comdat.s @@ -6,7 +6,7 @@ // RUN: ld.lld -shared -o %t3 %t1.o --start-lib %t2.o --end-lib // RUN: llvm-readobj --symbols %t3 | FileCheck %s // RUN: ld.lld -shared -o %t3 --start-lib %t2.o --end-lib %t1.o -// RUN: llvm-readobj --symbols %t3 | FileCheck %s +// RUN: llvm-readobj --symbols %t3 | FileCheck /dev/null --implicit-check-not='Name: zed' // CHECK: Name: zed // CHECK-NEXT: Value: -- 2.7.4