From: Fangrui Song Date: Tue, 16 May 2023 16:22:21 +0000 (-0700) Subject: [llvm-objdump][X86] Add @plt symbols for .plt.got X-Git-Tag: upstream/17.0.6~8318 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=9e37a7bd1f38fed4e00704d561b3897fe8915c4c;p=platform%2Fupstream%2Fllvm.git [llvm-objdump][X86] Add @plt symbols for .plt.got If a symbol needs both JUMP_SLOT and GLOB_DAT relocations, there is a minor linker optimization to keep just GLOB_DAT. This optimization is only implemented by GNU ld's x86 port and mold. https://maskray.me/blog/2021-08-29-all-about-global-offset-table#combining-.got-and-.got.plt With the optimizing, the PLT entry is placed in .plt.got and the associated GOTPLT entry is placed in .got (ld.bfd -z now) or .got.plt (ld.bfd -z lazy). The relocation is in .rel[a].dyn. This patch synthesizes `symbol@plt` labels for these .plt.got entries. Example: ``` cat > a.s < b.s <: 1000: ff 35 ea 1f 00 00 pushq 0x1fea(%rip) # 0x2ff0 <_GLOBAL_OFFSET_TABLE_+0x8> 1006: ff 25 ec 1f 00 00 jmpq *0x1fec(%rip) # 0x2ff8 <_GLOBAL_OFFSET_TABLE_+0x10> 100c: 0f 1f 40 00 nopl (%rax) 0000000000001010 : 1010: ff 25 ea 1f 00 00 jmpq *0x1fea(%rip) # 0x3000 <_GLOBAL_OFFSET_TABLE_+0x18> 1016: 68 00 00 00 00 pushq $0x0 101b: e9 e0 ff ff ff jmp 0x1000 <.plt> 0000000000001020 : 1020: ff 25 e2 1f 00 00 jmpq *0x1fe2(%rip) # 0x3008 <_GLOBAL_OFFSET_TABLE_+0x20> 1026: 68 01 00 00 00 pushq $0x1 102b: e9 d0 ff ff ff jmp 0x1000 <.plt> Disassembly of section .plt.got: 0000000000001030 : 1030: ff 25 a2 1f 00 00 jmpq *0x1fa2(%rip) # 0x2fd8 1036: 66 90 nop 0000000000001038 : 1038: ff 25 a2 1f 00 00 jmpq *0x1fa2(%rip) # 0x2fe0 103e: 66 90 nop ``` For x86-32, with -z now, if we remove `foo0` and `foo1`, the absence of regular PLT will cause GNU ld to omit .got.plt, and our code cannot synthesize @plt labels. This is an extreme corner case that almost never happens in practice (to trigger the case, ensure every PLT symbol has been taken address). To fix it, we can get the `_GLOBAL_OFFSET_TABLE_` symbol value, but the complexity is not worth it. Close https://github.com/llvm/llvm-project/issues/62537 Reviewed By: bd1976llvm Differential Revision: https://reviews.llvm.org/D149817 --- diff --git a/llvm/include/llvm/Object/ELFObjectFile.h b/llvm/include/llvm/Object/ELFObjectFile.h index d06aed0..1c22028e 100644 --- a/llvm/include/llvm/Object/ELFObjectFile.h +++ b/llvm/include/llvm/Object/ELFObjectFile.h @@ -48,6 +48,12 @@ extern const llvm::EnumEntry ElfSymbolTypes[NumElfSymbolTypes]; class elf_symbol_iterator; +struct ELFPltEntry { + StringRef Section; + std::optional Symbol; + uint64_t Address; +}; + class ELFObjectFileBase : public ObjectFile { friend class ELFRelocationRef; friend class ELFSectionRef; @@ -97,8 +103,7 @@ public: virtual uint16_t getEMachine() const = 0; - std::vector, uint64_t>> - getPltAddresses() const; + std::vector getPltEntries() const; /// Returns a vector containing a symbol version for each dynamic symbol. /// Returns an empty vector if version sections do not exist. diff --git a/llvm/lib/Object/ELFObjectFile.cpp b/llvm/lib/Object/ELFObjectFile.cpp index 204ba6b..da36da3 100644 --- a/llvm/lib/Object/ELFObjectFile.cpp +++ b/llvm/lib/Object/ELFObjectFile.cpp @@ -601,20 +601,21 @@ void ELFObjectFileBase::setARMSubArch(Triple &TheTriple) const { TheTriple.setArchName(Triple); } -std::vector, uint64_t>> -ELFObjectFileBase::getPltAddresses() const { +std::vector ELFObjectFileBase::getPltEntries() const { std::string Err; const auto Triple = makeTriple(); const auto *T = TargetRegistry::lookupTarget(Triple.str(), Err); if (!T) return {}; - uint64_t JumpSlotReloc = 0; + uint32_t JumpSlotReloc = 0, GlobDatReloc = 0; switch (Triple.getArch()) { case Triple::x86: JumpSlotReloc = ELF::R_386_JUMP_SLOT; + GlobDatReloc = ELF::R_386_GLOB_DAT; break; case Triple::x86_64: JumpSlotReloc = ELF::R_X86_64_JUMP_SLOT; + GlobDatReloc = ELF::R_X86_64_GLOB_DAT; break; case Triple::aarch64: case Triple::aarch64_be: @@ -628,7 +629,8 @@ ELFObjectFileBase::getPltAddresses() const { T->createMCInstrAnalysis(MII.get())); if (!MIA) return {}; - std::optional Plt, RelaPlt; + std::vector> PltEntries; + std::optional RelaPlt, RelaDyn; uint64_t GotBaseVA = 0; for (const SectionRef &Section : sections()) { Expected NameOrErr = Section.getName(); @@ -638,47 +640,66 @@ ELFObjectFileBase::getPltAddresses() const { } StringRef Name = *NameOrErr; - if (Name == ".plt") - Plt = Section; - else if (Name == ".rela.plt" || Name == ".rel.plt") + if (Name == ".rela.plt" || Name == ".rel.plt") { RelaPlt = Section; - else if (Name == ".got.plt") + } else if (Name == ".rela.dyn" || Name == ".rel.dyn") { + RelaDyn = Section; + } else if (Name == ".got.plt") { GotBaseVA = Section.getAddress(); + } else if (Name == ".plt" || Name == ".plt.got") { + Expected PltContents = Section.getContents(); + if (!PltContents) { + consumeError(PltContents.takeError()); + return {}; + } + llvm::append_range( + PltEntries, + MIA->findPltEntries(Section.getAddress(), + arrayRefFromStringRef(*PltContents), Triple)); + } } - if (!Plt || !RelaPlt) - return {}; - Expected PltContents = Plt->getContents(); - if (!PltContents) { - consumeError(PltContents.takeError()); - return {}; - } - auto PltEntries = MIA->findPltEntries( - Plt->getAddress(), arrayRefFromStringRef(*PltContents), Triple); // Build a map from GOT entry virtual address to PLT entry virtual address. DenseMap GotToPlt; - for (auto [Plt, GotPltEntry] : PltEntries) { + for (auto [Plt, GotPlt] : PltEntries) { + uint64_t GotPltEntry = GotPlt; // An x86-32 PIC PLT uses jmp DWORD PTR [ebx-offset]. Add // _GLOBAL_OFFSET_TABLE_ (EBX) to get the .got.plt (or .got) entry address. - if (static_cast(GotPltEntry) < 0 && getEMachine() == ELF::EM_386) - GotPltEntry = ~GotPltEntry + GotBaseVA; + // See X86MCTargetDesc.cpp:findPltEntries for the 1 << 32 bit. + if (GotPltEntry & (uint64_t(1) << 32) && getEMachine() == ELF::EM_386) + GotPltEntry = static_cast(GotPltEntry) + GotBaseVA; GotToPlt.insert(std::make_pair(GotPltEntry, Plt)); } + // Find the relocations in the dynamic relocation table that point to // locations in the GOT for which we know the corresponding PLT entry. - std::vector, uint64_t>> Result; - for (const auto &Relocation : RelaPlt->relocations()) { - if (Relocation.getType() != JumpSlotReloc) - continue; - auto PltEntryIter = GotToPlt.find(Relocation.getOffset()); - if (PltEntryIter != GotToPlt.end()) { - symbol_iterator Sym = Relocation.getSymbol(); - if (Sym == symbol_end()) - Result.emplace_back(std::nullopt, PltEntryIter->second); - else - Result.emplace_back(Sym->getRawDataRefImpl(), PltEntryIter->second); + std::vector Result; + auto handleRels = [&](iterator_range Rels, + uint32_t RelType, StringRef PltSec) { + for (const auto &R : Rels) { + if (R.getType() != RelType) + continue; + auto PltEntryIter = GotToPlt.find(R.getOffset()); + if (PltEntryIter != GotToPlt.end()) { + symbol_iterator Sym = R.getSymbol(); + if (Sym == symbol_end()) + Result.push_back( + ELFPltEntry{PltSec, std::nullopt, PltEntryIter->second}); + else + Result.push_back(ELFPltEntry{PltSec, Sym->getRawDataRefImpl(), + PltEntryIter->second}); + } } - } + }; + + if (RelaPlt) + handleRels(RelaPlt->relocations(), JumpSlotReloc, ".plt"); + + // If a symbol needing a PLT entry also needs a GLOB_DAT relocation, GNU ld's + // x86 port places the PLT entry in the .plt.got section. + if (RelaDyn) + handleRels(RelaDyn->relocations(), GlobDatReloc, ".plt.got"); + return Result; } diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp index 160384b..f98be66 100644 --- a/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp +++ b/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp @@ -577,8 +577,11 @@ findX86PltEntries(uint64_t PltSectionVA, ArrayRef PltContents) { if (PltContents[Byte] == 0xff && PltContents[Byte + 1] == 0xa3) { // The jmp instruction at the beginning of each PLT entry jumps to the // address of the base of the .got.plt section plus the immediate. + // Set the 1 << 32 bit to let ELFObjectFileBase::getPltEntries convert the + // offset to an address. Imm may be a negative int32_t if the GOT entry is + // in .got. uint32_t Imm = support::endian::read32le(PltContents.data() + Byte + 2); - Result.emplace_back(PltSectionVA + Byte, ~static_cast(Imm)); + Result.emplace_back(PltSectionVA + Byte, Imm | (uint64_t(1) << 32)); Byte += 6; } else if (PltContents[Byte] == 0xff && PltContents[Byte + 1] == 0x25) { // The jmp instruction at the beginning of each PLT entry jumps to the diff --git a/llvm/test/tools/llvm-objdump/X86/plt-got.test b/llvm/test/tools/llvm-objdump/X86/plt-got.test new file mode 100644 index 0000000..a4f15b2 --- /dev/null +++ b/llvm/test/tools/llvm-objdump/X86/plt-got.test @@ -0,0 +1,233 @@ +## If a symbol needing a PLT entry also needs a GLOB_DAT relocation, GNU ld's +## x86 port places the PLT entry in .plt.got, relocated by a GLOB_DAT. The +## JUMP_SLOT relocation is unused in this case. +## Test that we synthesize @plt symbols for such PLT entries. +# RUN: yaml2obj --docnum=1 %s -o %t.x86-64 +# RUN: llvm-objdump -d %t.x86-64 | FileCheck %s --check-prefix=64 +# RUN: yaml2obj --docnum=2 %s -o %t.x86-32 +# RUN: llvm-objdump -d %t.x86-32 | FileCheck %s --check-prefix=32 + +# 64: Disassembly of section .plt: +# 64-EMPTY: +# 64-NEXT: <.plt>: +# 64: : +# 64: : + +# 64: Disassembly of section .plt.got: +# 64-EMPTY: +# 64-NEXT: : +# 64: : + +# 64: <_start>: +# 64-NEXT: movq {{.*}}(%rip), %rax +# 64-NEXT: movq {{.*}}(%rip), %rax +# 64-NEXT: callq {{.*}} +# 64-NEXT: callq {{.*}} +# 64-NEXT: callq {{.*}} +# 64-NEXT: callq {{.*}} + +# 32: Disassembly of section .plt: +# 32-EMPTY: +# 32-NEXT: <.plt>: +# 32: : +# 32: : + +# 32: Disassembly of section .plt.got: +# 32-EMPTY: +# 32-NEXT: : +# 32: : + +# 32: <_start>: +# 32-NEXT: movl -0x8(%eax), %eax +# 32-NEXT: movl -0x4(%eax), %eax +# 32-NEXT: calll {{.*}} +# 32-NEXT: calll {{.*}} +# 32-NEXT: calll {{.*}} +# 32-NEXT: calll {{.*}} + +## %t.x86-64 is linked with ld.bfd -pie -z now a.o b.so and +## doesn't have .got.plt. +## %t.x86-32 is linked with ld.bfd -pie -z lazy a.o b.so and +## has .got.plt. + +## a.s +## .globl _start; _start: +## mov combined0@gotpcrel(%rip), %rax # movl combined0@GOT(%eax), %eax for x86-32 +## mov combined1@gotpcrel(%rip), %rax # movl combined1@GOT(%eax), %eax for x86-32 +## call combined0@plt +## call combined1@plt +## call foo0@plt +## call foo1@plt + +## b.s +## .globl foo0, foo1, combined0, combined1 +## foo0: foo1: combined0: combined1: + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_DYN + Machine: EM_X86_64 + Entry: 0x1040 +Sections: + - Name: .rela.dyn + Type: SHT_RELA + Flags: [ SHF_ALLOC ] + Address: 0x340 + Link: .dynsym + AddressAlign: 0x8 + Relocations: + - Offset: 0x2FF0 + Symbol: combined0 + Type: R_X86_64_GLOB_DAT + - Offset: 0x2FF8 + Symbol: combined1 + Type: R_X86_64_GLOB_DAT + - Name: .rela.plt + Type: SHT_RELA + Flags: [ SHF_ALLOC, SHF_INFO_LINK ] + Address: 0x370 + Link: .dynsym + AddressAlign: 0x8 + Info: .got + Relocations: + - Offset: 0x2FE0 + Symbol: foo1 + Type: R_X86_64_JUMP_SLOT + - Offset: 0x2FE8 + Symbol: foo0 + Type: R_X86_64_JUMP_SLOT + - Name: .plt + Type: SHT_PROGBITS + Flags: [ SHF_ALLOC, SHF_EXECINSTR ] + Address: 0x1000 + AddressAlign: 0x10 + EntSize: 0x10 + Offset: 0x1000 + Content: FF35CA1F0000FF25CC1F00000F1F4000FF25CA1F00006800000000E9E0FFFFFFFF25C21F00006801000000E9D0FFFFFF + - Name: .plt.got + Type: SHT_PROGBITS + Flags: [ SHF_ALLOC, SHF_EXECINSTR ] + Address: 0x1030 + AddressAlign: 0x8 + EntSize: 0x8 + Content: FF25BA1F00006690FF25BA1F00006690 + - Name: .text + Type: SHT_PROGBITS + Flags: [ SHF_ALLOC, SHF_EXECINSTR ] + Address: 0x1040 + AddressAlign: 0x1 + Content: 488B05A91F0000488B05AA1F0000E8DDFFFFFFE8E0FFFFFFE8C3FFFFFFE8AEFFFFFF + - Name: .got + Type: SHT_PROGBITS + Flags: [ SHF_WRITE, SHF_ALLOC ] + Address: 0x2FC8 + AddressAlign: 0x8 + EntSize: 0x8 + Content: '682E000000000000000000000000000000000000000000001610000000000000261000000000000000000000000000000000000000000000' +Symbols: + - Name: _start + Section: .text + Binding: STB_GLOBAL + Value: 0x1040 +DynamicSymbols: + - Name: foo1 + Binding: STB_GLOBAL + - Name: foo0 + Binding: STB_GLOBAL + - Name: combined0 + Binding: STB_GLOBAL + - Name: combined1 + Binding: STB_GLOBAL +... + +--- !ELF +FileHeader: + Class: ELFCLASS32 + Data: ELFDATA2LSB + Type: ET_DYN + Machine: EM_386 + Entry: 0x1040 +Sections: + - Name: .rel.dyn + Type: SHT_REL + Flags: [ SHF_ALLOC ] + Address: 0x218 + Link: .dynsym + AddressAlign: 0x4 + Relocations: + - Offset: 0x2FEC + Symbol: combined0 + Type: R_386_GLOB_DAT + - Offset: 0x2FF0 + Symbol: combined1 + Type: R_386_GLOB_DAT + - Name: .rel.plt + Type: SHT_REL + Flags: [ SHF_ALLOC, SHF_INFO_LINK ] + Address: 0x228 + Link: .dynsym + AddressAlign: 0x4 + Info: .got.plt + Relocations: + - Offset: 0x3000 + Symbol: foo1 + Type: R_386_JUMP_SLOT + - Offset: 0x3004 + Symbol: foo0 + Type: R_386_JUMP_SLOT + - Name: .plt + Type: SHT_PROGBITS + Flags: [ SHF_ALLOC, SHF_EXECINSTR ] + Address: 0x1000 + AddressAlign: 0x10 + EntSize: 0x4 + Offset: 0x1000 + Content: FFB304000000FFA30800000000000000FFA30C0000006800000000E9E0FFFFFFFFA3100000006808000000E9D0FFFFFF + - Name: .plt.got + Type: SHT_PROGBITS + Flags: [ SHF_ALLOC, SHF_EXECINSTR ] + Address: 0x1030 + AddressAlign: 0x8 + EntSize: 0x8 + Content: FFA3F8FFFFFF6690FFA3FCFFFFFF6690 + - Name: .text + Type: SHT_PROGBITS + Flags: [ SHF_ALLOC, SHF_EXECINSTR ] + Address: 0x1040 + AddressAlign: 0x1 + Content: 8B80F8FFFFFF8B80FCFFFFFFE8DFFFFFFFE8E2FFFFFFE8C5FFFFFFE8B0FFFFFF + - Name: .got + Type: SHT_PROGBITS + Flags: [ SHF_WRITE, SHF_ALLOC ] + Address: 0x2FEC + AddressAlign: 0x4 + EntSize: 0x4 + Content: '0000000000000000' + - Name: .got.plt + Type: SHT_PROGBITS + Flags: [ SHF_WRITE, SHF_ALLOC ] + Address: 0x2FF4 + AddressAlign: 0x4 + EntSize: 0x4 + Content: 442F000000000000000000001610000026100000 +Symbols: + - Name: _GLOBAL_OFFSET_TABLE_ + Type: STT_OBJECT + Section: .got.plt + Value: 0x2FF4 + - Name: _start + Section: .text + Binding: STB_GLOBAL + Value: 0x1040 +DynamicSymbols: + - Name: combined0 + Binding: STB_GLOBAL + - Name: foo1 + Binding: STB_GLOBAL + - Name: foo0 + Binding: STB_GLOBAL + - Name: combined1 + Binding: STB_GLOBAL +... diff --git a/llvm/tools/llvm-cfi-verify/lib/FileAnalysis.cpp b/llvm/tools/llvm-cfi-verify/lib/FileAnalysis.cpp index 3e03c82..64ebe48 100644 --- a/llvm/tools/llvm-cfi-verify/lib/FileAnalysis.cpp +++ b/llvm/tools/llvm-cfi-verify/lib/FileAnalysis.cpp @@ -574,15 +574,15 @@ Error FileAnalysis::parseSymbolTable() { } } if (auto *ElfObject = dyn_cast(Object)) { - for (const auto &Addr : ElfObject->getPltAddresses()) { - if (!Addr.first) + for (const auto &Plt : ElfObject->getPltEntries()) { + if (!Plt.Symbol) continue; - object::SymbolRef Sym(*Addr.first, Object); + object::SymbolRef Sym(*Plt.Symbol, Object); auto SymNameOrErr = Sym.getName(); if (!SymNameOrErr) consumeError(SymNameOrErr.takeError()); else if (TrapOnFailFunctions.contains(*SymNameOrErr)) - TrapOnFailFunctionAddresses.insert(Addr.second); + TrapOnFailFunctionAddresses.insert(Plt.Address); } } return Error::success(); diff --git a/llvm/tools/llvm-objdump/llvm-objdump.cpp b/llvm/tools/llvm-objdump/llvm-objdump.cpp index 8e372cb..4267ce6 100644 --- a/llvm/tools/llvm-objdump/llvm-objdump.cpp +++ b/llvm/tools/llvm-objdump/llvm-objdump.cpp @@ -917,34 +917,30 @@ static void addPltEntries(const ObjectFile &Obj, auto *ElfObj = dyn_cast(&Obj); if (!ElfObj) return; - std::optional Plt; - for (const SectionRef &Section : Obj.sections()) { + DenseMap Sections; + for (SectionRef Section : Obj.sections()) { Expected SecNameOrErr = Section.getName(); if (!SecNameOrErr) { consumeError(SecNameOrErr.takeError()); continue; } - if (*SecNameOrErr == ".plt") - Plt = Section; + Sections[*SecNameOrErr] = Section; } - if (!Plt) - return; - for (auto PltEntry : ElfObj->getPltAddresses()) { - if (PltEntry.first) { - SymbolRef Symbol(*PltEntry.first, ElfObj); + for (auto Plt : ElfObj->getPltEntries()) { + if (Plt.Symbol) { + SymbolRef Symbol(*Plt.Symbol, ElfObj); uint8_t SymbolType = getElfSymbolType(Obj, Symbol); if (Expected NameOrErr = Symbol.getName()) { if (!NameOrErr->empty()) - AllSymbols[*Plt].emplace_back( - PltEntry.second, Saver.save((*NameOrErr + "@plt").str()), - SymbolType); + AllSymbols[Sections[Plt.Section]].emplace_back( + Plt.Address, Saver.save((*NameOrErr + "@plt").str()), SymbolType); continue; } else { // The warning has been reported in disassembleObject(). consumeError(NameOrErr.takeError()); } } - reportWarning("PLT entry at 0x" + Twine::utohexstr(PltEntry.second) + + reportWarning("PLT entry at 0x" + Twine::utohexstr(Plt.Address) + " references an invalid symbol", Obj.getFileName()); }