From 614dc11ca8bc8cb3296dbcc4712d98408d438f45 Mon Sep 17 00:00:00 2001 From: Sean Fertile Date: Wed, 14 Nov 2018 17:56:43 +0000 Subject: [PATCH] [PPC64] Long branch thunks. On PowerPC64, when a function call offset is too large to encode in a call instruction the address is stored in a table in the data segment. A thunk is used to load the branch target address from the table relative to the TOC-pointer and indirectly branch to the callee. When linking position-dependent code the addresses are stored directly in the table, for position-independent code the table is allocated and filled in at load time by the dynamic linker. For position-independent code the branch targets could have gone in the .got.plt but using the .branch_lt section for both position dependent and position independent binaries keeps it consitent and helps keep this PPC64 specific logic seperated from the target-independent code handling the .got.plt. Differential Revision: https://reviews.llvm.org/D53408 llvm-svn: 346877 --- lld/ELF/Arch/PPC64.cpp | 26 ++++++- lld/ELF/Relocations.cpp | 1 + lld/ELF/Symbols.cpp | 11 +++ lld/ELF/Symbols.h | 7 ++ lld/ELF/SyntheticSections.cpp | 47 +++++++++++++ lld/ELF/SyntheticSections.h | 20 ++++++ lld/ELF/Thunks.cpp | 83 ++++++++++++++++++---- lld/ELF/Writer.cpp | 6 ++ lld/test/ELF/basic-ppc64.s | 51 +++++++++----- lld/test/ELF/basic64be.s | 65 +++++++++++------ lld/test/ELF/ppc64-call-reach.s | 42 +++++++++-- lld/test/ELF/ppc64-long-branch.s | 121 ++++++++++++++++++++++++++++++++ lld/test/ELF/ppc64-shared-long_branch.s | 114 ++++++++++++++++++++++++++++++ 13 files changed, 532 insertions(+), 62 deletions(-) create mode 100644 lld/test/ELF/ppc64-long-branch.s create mode 100644 lld/test/ELF/ppc64-shared-long_branch.s diff --git a/lld/ELF/Arch/PPC64.cpp b/lld/ELF/Arch/PPC64.cpp index 4613179..884f176 100644 --- a/lld/ELF/Arch/PPC64.cpp +++ b/lld/ELF/Arch/PPC64.cpp @@ -113,6 +113,7 @@ public: void writeGotHeader(uint8_t *Buf) const override; bool needsThunk(RelExpr Expr, RelType Type, const InputFile *File, uint64_t BranchAddr, const Symbol &S) const override; + bool inBranchRange(RelType Type, uint64_t Src, uint64_t Dst) const override; RelExpr adjustRelaxExpr(RelType Type, const uint8_t *Data, RelExpr Expr) const override; void relaxTlsGdToIe(uint8_t *Loc, RelType Type, uint64_t Val) const override; @@ -709,9 +710,28 @@ void PPC64::relocateOne(uint8_t *Loc, RelType Type, uint64_t Val) const { bool PPC64::needsThunk(RelExpr Expr, RelType Type, const InputFile *File, uint64_t BranchAddr, const Symbol &S) const { - // If a function is in the plt it needs to be called through - // a call stub. - return Type == R_PPC64_REL24 && S.isInPlt(); + // The only call relocation we currently support is the REL24 type. + if (Type != R_PPC64_REL24) + return false; + + // If a function is in the Plt it needs to be called with a call-stub. + if (S.isInPlt()) + return true; + + // If a symbol is a weak undefined and we are compiling an executable + // it doesn't need a range-extending thunk since it can't be called. + if (S.isUndefWeak() && !Config->Shared) + return false; + + // If the offset exceeds the range of the branch type then it will need + // a range-extending thunk. + return !inBranchRange(Type, BranchAddr, S.getVA()); +} + +bool PPC64::inBranchRange(RelType Type, uint64_t Src, uint64_t Dst) const { + assert(Type == R_PPC64_REL24 && "Unexpected relocation type used in branch"); + int64_t Offset = Dst - Src; + return isInt<26>(Offset); } RelExpr PPC64::adjustRelaxExpr(RelType Type, const uint8_t *Data, diff --git a/lld/ELF/Relocations.cpp b/lld/ELF/Relocations.cpp index d2c4bbb..b4ed71f 100644 --- a/lld/ELF/Relocations.cpp +++ b/lld/ELF/Relocations.cpp @@ -489,6 +489,7 @@ static void replaceWithDefined(Symbol &Sym, SectionBase *Sec, uint64_t Value, Sym.PltIndex = Old.PltIndex; Sym.GotIndex = Old.GotIndex; Sym.VerdefIndex = Old.VerdefIndex; + Sym.PPC64BranchltIndex = Old.PPC64BranchltIndex; Sym.IsPreemptible = true; Sym.ExportDynamic = true; Sym.IsUsedInRegularObj = true; diff --git a/lld/ELF/Symbols.cpp b/lld/ELF/Symbols.cpp index 5f7d953..2ff2edf 100644 --- a/lld/ELF/Symbols.cpp +++ b/lld/ELF/Symbols.cpp @@ -138,6 +138,11 @@ uint64_t Symbol::getGotPltOffset() const { return (PltIndex + Target->GotPltHeaderEntriesNum) * Target->GotPltEntrySize; } +uint64_t Symbol::getPPC64LongBranchOffset() const { + assert(PPC64BranchltIndex != 0xffff); + return PPC64BranchltIndex * Target->GotPltEntrySize; +} + uint64_t Symbol::getPltVA() const { if (this->IsInIplt) return In.Iplt->getVA() + PltIndex * Target->PltEntrySize; @@ -149,6 +154,12 @@ uint64_t Symbol::getPltOffset() const { return Target->getPltEntryOffset(PltIndex); } +uint64_t Symbol::getPPC64LongBranchTableVA() const { + assert(PPC64BranchltIndex != 0xffff); + return In.PPC64LongBranchTarget->getVA() + + PPC64BranchltIndex * Target->GotPltEntrySize; +} + uint64_t Symbol::getSize() const { if (const auto *DR = dyn_cast(this)) return DR->Size; diff --git a/lld/ELF/Symbols.h b/lld/ELF/Symbols.h index a6c5697..26a80ac 100644 --- a/lld/ELF/Symbols.h +++ b/lld/ELF/Symbols.h @@ -79,6 +79,7 @@ public: uint32_t DynsymIndex = 0; uint32_t GotIndex = -1; uint32_t PltIndex = -1; + uint32_t GlobalDynIndex = -1; // This field is a index to the symbol's version definition. @@ -87,6 +88,9 @@ public: // Version definition index. uint16_t VersionId; + // An index into the .branch_lt section on PPC64. + uint16_t PPC64BranchltIndex = -1; + // Symbol binding. This is not overwritten by replaceSymbol to track // changes during resolution. In particular: // - An undefined weak is still weak when it resolves to a shared library. @@ -159,6 +163,7 @@ public: bool isInGot() const { return GotIndex != -1U; } bool isInPlt() const { return PltIndex != -1U; } + bool isInPPC64Branchlt() const { return PPC64BranchltIndex != 0xffff; } uint64_t getVA(int64_t Addend = 0) const; @@ -168,6 +173,8 @@ public: uint64_t getGotPltVA() const; uint64_t getPltVA() const; uint64_t getPltOffset() const; + uint64_t getPPC64LongBranchTableVA() const; + uint64_t getPPC64LongBranchOffset() const; uint64_t getSize() const; OutputSection *getOutputSection() const; diff --git a/lld/ELF/SyntheticSections.cpp b/lld/ELF/SyntheticSections.cpp index 2ceec55..97fd839 100644 --- a/lld/ELF/SyntheticSections.cpp +++ b/lld/ELF/SyntheticSections.cpp @@ -3070,6 +3070,53 @@ bool ThunkSection::assignOffsets() { return Changed; } +// If linking position-dependent code then the table will store the addresses +// directly in the binary so the section has type SHT_PROGBITS. If linking +// position-independent code the section has type SHT_NOBITS since it will be +// allocated and filled in by the dynamic linker. +PPC64LongBranchTargetSection::PPC64LongBranchTargetSection() + : SyntheticSection(SHF_ALLOC | SHF_WRITE, + Config->Pic ? SHT_NOBITS : SHT_PROGBITS, 8, + ".branch_lt") {} + +void PPC64LongBranchTargetSection::addEntry(Symbol &Sym) { + assert(Sym.PPC64BranchltIndex == 0xffff); + Sym.PPC64BranchltIndex = Entries.size(); + Entries.push_back(&Sym); +} + +size_t PPC64LongBranchTargetSection::getSize() const { + return Entries.size() * 8; +} + +void PPC64LongBranchTargetSection::writeTo(uint8_t *Buf) { + assert(Target->GotPltEntrySize == 8); + // If linking non-pic we have the final addresses of the targets and they get + // written to the table directly. For pic the dynamic linker will allocate + // the section and fill it it. + if (Config->Pic) + return; + + for (const Symbol *Sym : Entries) { + assert(Sym->getVA()); + // Need calls to branch to the local entry-point since a long-branch + // must be a local-call. + write64(Buf, + Sym->getVA() + getPPC64GlobalEntryToLocalEntryOffset(Sym->StOther)); + Buf += Target->GotPltEntrySize; + } +} + +bool PPC64LongBranchTargetSection::empty() const { + // `removeUnusedSyntheticSections()` is called before thunk allocation which + // is too early to determine if this section will be empty or not. We need + // Finalized to keep the section alive until after thunk creation. Finalized + // only gets set to true once `finalizeSections()` is called after thunk + // creation. Becuase of this, if we don't create any long-branch thunks we end + // up with an empty .branch_lt section in the binary. + return Finalized && Entries.empty(); +} + InStruct elf::In; template GdbIndexSection *GdbIndexSection::create(); diff --git a/lld/ELF/SyntheticSections.h b/lld/ELF/SyntheticSections.h index 3ddb408..f35a8bb 100644 --- a/lld/ELF/SyntheticSections.h +++ b/lld/ELF/SyntheticSections.h @@ -964,6 +964,25 @@ private: size_t Size = 0; }; +// This section is used to store the addresses of functions that are called +// in range-extending thunks on PowerPC64. When producing position dependant +// code the addresses are link-time constants and the table is written out to +// the binary. When producing position-dependant code the table is allocated and +// filled in by the dynamic linker. +class PPC64LongBranchTargetSection final : public SyntheticSection { +public: + PPC64LongBranchTargetSection(); + void addEntry(Symbol &Sym); + size_t getSize() const override; + void writeTo(uint8_t *Buf) override; + bool empty() const override; + void finalizeContents() override { Finalized = true; } + +private: + std::vector Entries; + bool Finalized = false; +}; + InputSection *createInterpSection(); MergeInputSection *createCommentSection(); template void splitSections(); @@ -990,6 +1009,7 @@ struct InStruct { GotSection *Got; GotPltSection *GotPlt; IgotPltSection *IgotPlt; + PPC64LongBranchTargetSection *PPC64LongBranchTarget; MipsGotSection *MipsGot; MipsRldMapSection *MipsRldMap; PltSection *Plt; diff --git a/lld/ELF/Thunks.cpp b/lld/ELF/Thunks.cpp index 901de37..c8e7dca 100644 --- a/lld/ELF/Thunks.cpp +++ b/lld/ELF/Thunks.cpp @@ -234,6 +234,46 @@ public: void addSymbols(ThunkSection &IS) override; }; +// A bl instruction uses a signed 24 bit offset, with an implicit 4 byte +// alignment. This gives a possible 26 bits of 'reach'. If the call offset is +// larger then that we need to emit a long-branch thunk. The target address +// of the callee is stored in a table to be accessed TOC-relative. Since the +// call must be local (a non-local call will have a PltCallStub instead) the +// table stores the address of the callee's local entry point. For +// position-independent code a corresponding relative dynamic relocation is +// used. +class PPC64LongBranchThunk : public Thunk { +public: + uint32_t size() override { return 16; } + void writeTo(uint8_t *Buf) override; + void addSymbols(ThunkSection &IS) override; + +protected: + PPC64LongBranchThunk(Symbol &Dest) : Thunk(Dest) {} +}; + +class PPC64PILongBranchThunk final : public PPC64LongBranchThunk { +public: + PPC64PILongBranchThunk(Symbol &Dest) : PPC64LongBranchThunk(Dest) { + assert(!Dest.IsPreemptible); + if (Dest.isInPPC64Branchlt()) + return; + + In.PPC64LongBranchTarget->addEntry(Dest); + In.RelaDyn->addReloc({Target->RelativeRel, In.PPC64LongBranchTarget, + Dest.getPPC64LongBranchOffset(), true, &Dest, + getPPC64GlobalEntryToLocalEntryOffset(Dest.StOther)}); + } +}; + +class PPC64PDLongBranchThunk final : public PPC64LongBranchThunk { +public: + PPC64PDLongBranchThunk(Symbol &Dest) : PPC64LongBranchThunk(Dest) { + if (!Dest.isInPPC64Branchlt()) + In.PPC64LongBranchTarget->addEntry(Dest); + } +}; + } // end anonymous namespace Defined *Thunk::addSymbol(StringRef Name, uint8_t Type, uint64_t Value, @@ -573,17 +613,21 @@ InputSection *MicroMipsR6Thunk::getTargetInputSection() const { return dyn_cast(DR.Section); } -void PPC64PltCallStub::writeTo(uint8_t *Buf) { - int64_t Off = Destination.getGotPltVA() - getPPC64TocBase(); - // Need to add 0x8000 to offset to account for the low bits being signed. - uint16_t OffHa = (Off + 0x8000) >> 16; - uint16_t OffLo = Off; +static void writePPCLoadAndBranch(uint8_t *Buf, int64_t Offset) { + uint16_t OffHa = (Offset + 0x8000) >> 16; + uint16_t OffLo = Offset & 0xffff; - write32(Buf + 0, 0xf8410018); // std r2,24(r1) - write32(Buf + 4, 0x3d820000 | OffHa); // addis r12,r2, X@plt@to@ha - write32(Buf + 8, 0xe98c0000 | OffLo); // ld r12,X@plt@toc@l(r12) - write32(Buf + 12, 0x7d8903a6); // mtctr r12 - write32(Buf + 16, 0x4e800420); // bctr + write32(Buf + 0, 0x3d820000 | OffHa); // addis r12, r2, OffHa + write32(Buf + 4, 0xe98c0000 | OffLo); // ld r12, OffLo(r12) + write32(Buf + 8, 0x7d8903a6); // mtctr r12 + write32(Buf + 12, 0x4e800420); // bctr +} + +void PPC64PltCallStub::writeTo(uint8_t *Buf) { + int64_t Offset = Destination.getGotPltVA() - getPPC64TocBase(); + // Save the TOC pointer to the save-slot reserved in the call frame. + write32(Buf + 0, 0xf8410018); // std r2,24(r1) + writePPCLoadAndBranch(Buf + 4, Offset); } void PPC64PltCallStub::addSymbols(ThunkSection &IS) { @@ -592,6 +636,16 @@ void PPC64PltCallStub::addSymbols(ThunkSection &IS) { S->NeedsTocRestore = true; } +void PPC64LongBranchThunk::writeTo(uint8_t *Buf) { + int64_t Offset = Destination.getPPC64LongBranchTableVA() - getPPC64TocBase(); + writePPCLoadAndBranch(Buf, Offset); +} + +void PPC64LongBranchThunk::addSymbols(ThunkSection &IS) { + addSymbol(Saver.save("__long_branch_" + Destination.getName()), STT_FUNC, 0, + IS); +} + Thunk::Thunk(Symbol &D) : Destination(D), Offset(0) {} Thunk::~Thunk() = default; @@ -675,9 +729,14 @@ static Thunk *addThunkMips(RelType Type, Symbol &S) { } static Thunk *addThunkPPC64(RelType Type, Symbol &S) { - if (Type == R_PPC64_REL24) + assert(Type == R_PPC64_REL24 && "unexpected relocation type for thunk"); + if (S.isInPlt()) return make(S); - fatal("unexpected relocation type"); + + if (Config->Pic) + return make(S); + + return make(S); } Thunk *addThunk(RelType Type, Symbol &S) { diff --git a/lld/ELF/Writer.cpp b/lld/ELF/Writer.cpp index e486a77..0eb117a 100644 --- a/lld/ELF/Writer.cpp +++ b/lld/ELF/Writer.cpp @@ -365,6 +365,11 @@ template static void createSyntheticSections() { Add(In.Got); } + if (Config->EMachine == EM_PPC64) { + In.PPC64LongBranchTarget = make(); + Add(In.PPC64LongBranchTarget); + } + In.GotPlt = make(); Add(In.GotPlt); In.IgotPlt = make(); @@ -1756,6 +1761,7 @@ template void Writer::finalizeSections() { // maybeAddThunks may have added local symbols to the static symbol table. finalizeSynthetic(In.SymTab); + finalizeSynthetic(In.PPC64LongBranchTarget); // Fill other section headers. The dynamic table is finalized // at the end because some tags like RELSZ depend on result diff --git a/lld/test/ELF/basic-ppc64.s b/lld/test/ELF/basic-ppc64.s index f586d63..624d830 100644 --- a/lld/test/ELF/basic-ppc64.s +++ b/lld/test/ELF/basic-ppc64.s @@ -28,7 +28,7 @@ // CHECK-NEXT: Version: 1 // CHECK-NEXT: Entry: 0x10000 // CHECK-NEXT: ProgramHeaderOffset: 0x40 -// CHECK-NEXT: SectionHeaderOffset: +// CHECK-NEXT: SectionHeaderOffset: 0x200F8 // CHECK-NEXT: Flags [ (0x2) // CHECK-NEXT: 0x2 // CHECK-NEXT: ] @@ -36,8 +36,8 @@ // CHECK-NEXT: ProgramHeaderEntrySize: 56 // CHECK-NEXT: ProgramHeaderCount: 7 // CHECK-NEXT: SectionHeaderEntrySize: 64 -// CHECK-NEXT: SectionHeaderCount: 10 -// CHECK-NEXT: StringTableSectionIndex: 8 +// CHECK-NEXT: SectionHeaderCount: 11 +// CHECK-NEXT: StringTableSectionIndex: 9 // CHECK-NEXT:} // CHECK-NEXT:Sections [ // CHECK-NEXT: Section { @@ -156,7 +156,23 @@ // CHECK-NEXT: } // CHECK-NEXT: Section { // CHECK-NEXT: Index: 6 -// CHECK-NEXT: Name: .comment (38) +// CHECK-NEXT: Name: .branch_lt (38) +// CHECK-NEXT: Type: SHT_NOBITS (0x8) +// CHECK-NEXT: Flags [ (0x3) +// CHECK-NEXT: SHF_ALLOC (0x2) +// CHECK-NEXT: SHF_WRITE (0x1) +// CHECK-NEXT: ] +// CHECK-NEXT: Address: 0x30000 +// CHECK-NEXT: Offset: 0x20060 +// CHECK-NEXT: Size: 0 +// CHECK-NEXT: Link: 0 +// CHECK-NEXT: Info: 0 +// CHECK-NEXT: AddressAlignment: 8 +// CHECK-NEXT: EntrySize: 0 +// CHECK-NEXT: } +// CHECK-NEXT: Section { +// CHECK-NEXT: Index: 7 +// CHECK-NEXT: Name: .comment (49) // CHECK-NEXT: Type: SHT_PROGBITS (0x1) // CHECK-NEXT: Flags [ (0x30) // CHECK-NEXT: SHF_MERGE (0x10) @@ -174,15 +190,15 @@ // CHECK-NEXT: ) // CHECK-NEXT: } // CHECK-NEXT: Section { -// CHECK-NEXT: Index: 7 -// CHECK-NEXT: Name: .symtab (47) +// CHECK-NEXT: Index: 8 +// CHECK-NEXT: Name: .symtab (58) // CHECK-NEXT: Type: SHT_SYMTAB (0x2) // CHECK-NEXT: Flags [ (0x0) // CHECK-NEXT: ] // CHECK-NEXT: Address: 0x0 // CHECK-NEXT: Offset: 0x20068 // CHECK-NEXT: Size: 48 -// CHECK-NEXT: Link: 9 +// CHECK-NEXT: Link: 10 // CHECK-NEXT: Info: 2 // CHECK-NEXT: AddressAlignment: 8 // CHECK-NEXT: EntrySize: 24 @@ -193,14 +209,14 @@ // CHECK-NEXT: ) // CHECK-NEXT: } // CHECK-NEXT: Section { -// CHECK-NEXT: Index: 8 -// CHECK-NEXT: Name: .shstrtab (55) +// CHECK-NEXT: Index: 9 +// CHECK-NEXT: Name: .shstrtab (66) // CHECK-NEXT: Type: SHT_STRTAB (0x3) // CHECK-NEXT: Flags [ (0x0) // CHECK-NEXT: ] // CHECK-NEXT: Address: 0x0 // CHECK-NEXT: Offset: 0x20098 -// CHECK-NEXT: Size: 73 +// CHECK-NEXT: Size: 84 // CHECK-NEXT: Link: 0 // CHECK-NEXT: Info: 0 // CHECK-NEXT: AddressAlignment: 1 @@ -208,19 +224,20 @@ // CHECK-NEXT: SectionData ( // CHECK-NEXT: 0000: 002E6479 6E73796D 002E6861 7368002E |..dynsym..hash..| // CHECK-NEXT: 0010: 64796E73 7472002E 74657874 002E6479 |dynstr..text..dy| -// CHECK-NEXT: 0020: 6E616D69 63002E63 6F6D6D65 6E74002E |namic..comment..| -// CHECK-NEXT: 0030: 73796D74 6162002E 73687374 72746162 |symtab..shstrtab| -// CHECK-NEXT: 0040: 002E7374 72746162 00 |..strtab.| +// CHECK-NEXT: 0020: 6E616D69 63002E62 72616E63 685F6C74 |namic..branch_lt| +// CHECK-NEXT: 0030: 002E636F 6D6D656E 74002E73 796D7461 |..comment..symta| +// CHECK-NEXT: 0040: 62002E73 68737472 74616200 2E737472 |b..shstrtab..str| +// CHECK-NEXT: 0050: 74616200 |tab.| // CHECK-NEXT: ) // CHECK-NEXT: } // CHECK-NEXT: Section { -// CHECK-NEXT: Index: 9 -// CHECK-NEXT: Name: .strtab (65) +// CHECK-NEXT: Index: 10 +// CHECK-NEXT: Name: .strtab (76) // CHECK-NEXT: Type: SHT_STRTAB (0x3) // CHECK-NEXT: Flags [ (0x0) // CHECK-NEXT: ] // CHECK-NEXT: Address: 0x0 -// CHECK-NEXT: Offset: 0x200E1 +// CHECK-NEXT: Offset: 0x200EC // CHECK-NEXT: Size: 10 // CHECK-NEXT: Link: 0 // CHECK-NEXT: Info: 0 @@ -275,7 +292,7 @@ // CHECK-NEXT: VirtualAddress: 0x20000 // CHECK-NEXT: PhysicalAddress: 0x20000 // CHECK-NEXT: FileSize: 96 -// CHECK-NEXT: MemSize: 96 +// CHECK-NEXT: MemSize: 65536 // CHECK-NEXT: Flags [ (0x6) // CHECK-NEXT: PF_R (0x4) // CHECK-NEXT: PF_W (0x2) diff --git a/lld/test/ELF/basic64be.s b/lld/test/ELF/basic64be.s index 2bef154..0add5b9 100644 --- a/lld/test/ELF/basic64be.s +++ b/lld/test/ELF/basic64be.s @@ -23,7 +23,7 @@ # CHECK-NEXT: Version: 1 # CHECK-NEXT: Entry: 0x10010000 # CHECK-NEXT: ProgramHeaderOffset: 0x40 -# CHECK-NEXT: SectionHeaderOffset: 0x11050 +# CHECK-NEXT: SectionHeaderOffset: 0x20058 # CHECK-NEXT: Flags [ (0x2) # CHECK-NEXT: 0x2 # CHECK-NEXT: ] @@ -31,8 +31,8 @@ # CHECK-NEXT: ProgramHeaderEntrySize: 56 # CHECK-NEXT: ProgramHeaderCount: 4 # CHECK-NEXT: SectionHeaderEntrySize: 64 -# CHECK-NEXT: SectionHeaderCount: 6 -# CHECK-NEXT: StringTableSectionIndex: 4 +# CHECK-NEXT: SectionHeaderCount: 7 +# CHECK-NEXT: StringTableSectionIndex: 5 # CHECK-NEXT: } # CHECK-NEXT: Sections [ # CHECK-NEXT: Section { @@ -72,14 +72,32 @@ # CHECK-NEXT: } # CHECK-NEXT: Section { # CHECK-NEXT: Index: 2 -# CHECK-NEXT: Name: .comment (7) +# CHECK-NEXT: Name: .branch_lt (7) +# CHECK-NEXT: Type: SHT_PROGBITS (0x1) +# CHECK-NEXT: Flags [ (0x3) +# CHECK-NEXT: SHF_ALLOC (0x2) +# CHECK-NEXT: SHF_WRITE (0x1) +# CHECK-NEXT: ] +# CHECK-NEXT: Address: 0x10020000 +# CHECK-NEXT: Offset: 0x20000 +# CHECK-NEXT: Size: 0 +# CHECK-NEXT: Link: 0 +# CHECK-NEXT: Info: 0 +# CHECK-NEXT: AddressAlignment: 8 +# CHECK-NEXT: EntrySize: 0 +# CHECK-NEXT: SectionData ( +# CHECK-NEXT: ) +# CHECK-NEXT: } +# CHECK-NEXT: Section { +# CHECK-NEXT: Index: 3 +# CHECK-NEXT: Name: .comment (18) # CHECK-NEXT: Type: SHT_PROGBITS (0x1) # CHECK-NEXT: Flags [ (0x30) # CHECK-NEXT: SHF_MERGE (0x10) # CHECK-NEXT: SHF_STRINGS (0x20) # CHECK-NEXT: ] # CHECK-NEXT: Address: 0x0 -# CHECK-NEXT: Offset: 0x11000 +# CHECK-NEXT: Offset: 0x20000 # CHECK-NEXT: Size: 8 # CHECK-NEXT: Link: 0 # CHECK-NEXT: Info: 0 @@ -90,15 +108,15 @@ # CHECK-NEXT: ) # CHECK-NEXT: } # CHECK-NEXT: Section { -# CHECK-NEXT: Index: 3 -# CHECK-NEXT: Name: .symtab (16) +# CHECK-NEXT: Index: 4 +# CHECK-NEXT: Name: .symtab (27) # CHECK-NEXT: Type: SHT_SYMTAB (0x2) # CHECK-NEXT: Flags [ (0x0) # CHECK-NEXT: ] # CHECK-NEXT: Address: 0x0 -# CHECK-NEXT: Offset: 0x11008 +# CHECK-NEXT: Offset: 0x20008 # CHECK-NEXT: Size: 24 -# CHECK-NEXT: Link: 5 +# CHECK-NEXT: Link: 6 # CHECK-NEXT: Info: 1 # CHECK-NEXT: AddressAlignment: 8 # CHECK-NEXT: EntrySize: 24 @@ -108,32 +126,33 @@ # CHECK-NEXT: ) # CHECK-NEXT: } # CHECK-NEXT: Section { -# CHECK-NEXT: Index: 4 -# CHECK-NEXT: Name: .shstrtab (24) +# CHECK-NEXT: Index: 5 +# CHECK-NEXT: Name: .shstrtab (35) # CHECK-NEXT: Type: SHT_STRTAB (0x3) # CHECK-NEXT: Flags [ (0x0) # CHECK-NEXT: ] # CHECK-NEXT: Address: 0x0 -# CHECK-NEXT: Offset: 0x11020 -# CHECK-NEXT: Size: 42 +# CHECK-NEXT: Offset: 0x20020 +# CHECK-NEXT: Size: 53 # CHECK-NEXT: Link: 0 # CHECK-NEXT: Info: 0 # CHECK-NEXT: AddressAlignment: 1 # CHECK-NEXT: EntrySize: 0 # CHECK-NEXT: SectionData ( -# CHECK-NEXT: 0000: 002E7465 7874002E 636F6D6D 656E7400 |..text..comment.| -# CHECK-NEXT: 0010: 2E73796D 74616200 2E736873 74727461 |.symtab..shstrta| -# CHECK-NEXT: 0020: 62002E73 74727461 6200 |b..strtab.| +# CHECK-NEXT: 0000: 002E7465 7874002E 6272616E 63685F6C |..text..branch_l| +# CHECK-NEXT: 0010: 74002E63 6F6D6D65 6E74002E 73796D74 |t..comment..symt| +# CHECK-NEXT: 0020: 6162002E 73687374 72746162 002E7374 |ab..shstrtab..st| +# CHECK-NEXT: 0030: 72746162 00 |rtab.| # CHECK-NEXT: ) # CHECK-NEXT: } # CHECK-NEXT: Section { -# CHECK-NEXT: Index: 5 -# CHECK-NEXT: Name: .strtab (34) +# CHECK-NEXT: Index: 6 +# CHECK-NEXT: Name: .strtab (45) # CHECK-NEXT: Type: SHT_STRTAB (0x3) # CHECK-NEXT: Flags [ (0x0) # CHECK-NEXT: ] # CHECK-NEXT: Address: 0x0 -# CHECK-NEXT: Offset: 0x1104A +# CHECK-NEXT: Offset: 0x20055 # CHECK-NEXT: Size: 1 # CHECK-NEXT: Link: 0 # CHECK-NEXT: Info: 0 @@ -150,8 +169,8 @@ # CHECK-NEXT: Offset: 0x40 # CHECK-NEXT: VirtualAddress: 0x10000040 # CHECK-NEXT: PhysicalAddress: 0x10000040 -# CHECK-NEXT: FileSize: 224 -# CHECK-NEXT: MemSize: 224 +# CHECK-NEXT: FileSize: 280 +# CHECK-NEXT: MemSize: 280 # CHECK-NEXT: Flags [ (0x4) # CHECK-NEXT: PF_R (0x4) # CHECK-NEXT: ] @@ -162,8 +181,8 @@ # CHECK-NEXT: Offset: 0x0 # CHECK-NEXT: VirtualAddress: 0x10000000 # CHECK-NEXT: PhysicalAddress: 0x10000000 -# CHECK-NEXT: FileSize: 288 -# CHECK-NEXT: MemSize: 288 +# CHECK-NEXT: FileSize: 344 +# CHECK-NEXT: MemSize: 344 # CHECK-NEXT: Flags [ (0x4) # CHECK-NEXT: PF_R (0x4) # CHECK-NEXT: ] diff --git a/lld/test/ELF/ppc64-call-reach.s b/lld/test/ELF/ppc64-call-reach.s index 3e655a1..a02bfa8 100644 --- a/lld/test/ELF/ppc64-call-reach.s +++ b/lld/test/ELF/ppc64-call-reach.s @@ -10,8 +10,10 @@ # RUN: ld.lld --defsym callee=0xE010014 --defsym tail_callee=0xE010024 \ # RUN: %t.o -o %t # RUN: llvm-objdump -d %t | FileCheck --check-prefix=NEGOFFSET %s -# RUN: not ld.lld --defsym callee=0x12010018 --defsym tail_callee=0x12010028 \ -# RUN: %t.o -o %t 2>&1 | FileCheck --check-prefix=OVERFLOW %s +# RUN: ld.lld --defsym callee=0x12010018 --defsym tail_callee=0x12010028 \ +# RUN: %t.o -o %t +# RUN: llvm-objdump -d %t | FileCheck --check-prefix=THUNK %s +# RUN: llvm-readelf --sections %t | FileCheck --check-prefix=BRANCHLT %s # RUN: not ld.lld --defsym callee=0x1001002D --defsym tail_callee=0x1001002F \ # RUN: %t.o -o %t 2>&1 | FileCheck --check-prefix=MISSALIGNED %s @@ -25,14 +27,13 @@ # RUN: ld.lld --defsym callee=0xE010014 --defsym tail_callee=0xE010024 \ # RUN: %t.o -o %t # RUN: llvm-objdump -d %t | FileCheck --check-prefix=NEGOFFSET %s -# RUN: not ld.lld --defsym callee=0x12010018 --defsym tail_callee=0x12010028 \ -# RUN: %t.o -o %t 2>&1 | FileCheck --check-prefix=OVERFLOW %s +# RUN: ld.lld --defsym callee=0x12010018 --defsym tail_callee=0x12010028 \ +# RUN: %t.o -o %t +# RUN: llvm-objdump -d %t | FileCheck --check-prefix=THUNK %s +# RUN: llvm-readelf --sections %t | FileCheck --check-prefix=BRANCHLT %s # RUN: not ld.lld --defsym callee=0x1001002D --defsym tail_callee=0x1001002F \ # RUN: %t.o -o %t 2>&1 | FileCheck --check-prefix=MISSALIGNED %s -# OVERFLOW: ld.lld: error: {{.*}}.o:(.text+0x14): relocation R_PPC64_REL24 out of range: 33554436 is not in [-33554432, 33554431] -# OVERFLOW: ld.lld: error: {{.*}}.o:(.text+0x24): relocation R_PPC64_REL24 out of range: 33554436 is not in [-33554432, 33554431] - # MISSALIGNED: ld.lld: error: {{.*}}.o:(.text+0x14): improper alignment for relocation R_PPC64_REL24: 0x19 is not aligned to 4 bytes # MISSALIGNED: ld.lld: error: {{.*}}.o:(.text+0x24): improper alignment for relocation R_PPC64_REL24: 0xB is not aligned to 4 bytes @@ -64,3 +65,30 @@ test: # NEGOFFSET: 10010014: {{.*}} bl .+33554432 # NEGOFFSET: 10010024: {{.*}} b .+33554432 +# .branch_lt[0] +# THUNK-LABEL: __long_branch_callee: +# THUNK-NEXT: 10010000: {{.*}} addis 12, 2, -1 +# THUNK-NEXT: ld 12, -32768(12) +# THUNK-NEXT: mtctr 12 +# THUNK-NEXT: bctr + +# .branch_lt[1] +# THUNK-LABEL: __long_branch_tail_callee: +# THUNK-NEXT: 10010010: {{.*}} addis 12, 2, -1 +# THUNK-NEXT: ld 12, -32760(12) +# THUNK-NEXT: mtctr 12 +# THUNK-NEXT: bctr + +# Each call now branches to a thunk, and although it is printed as positive +# the offset is interpreted as a signed 26 bit value so 67108812 is actually +# -52. +# THUNK-LABEL: test: +# THUNK: 10010034: {{.*}} bl .+67108812 +# THUNK: 10010044: {{.*}} b .+67108812 + +# The offset from the TOC to the .branch_lt section is (-1 << 16) - 32768. +# Name Type Address Off Size +# BRANCHLT: .branch_lt PROGBITS 0000000010020000 020000 000010 +# BRANCHLT: .got PROGBITS 0000000010030000 030000 000008 +# BRANCHLT-NOT: .plt + diff --git a/lld/test/ELF/ppc64-long-branch.s b/lld/test/ELF/ppc64-long-branch.s new file mode 100644 index 0000000..db662d92 --- /dev/null +++ b/lld/test/ELF/ppc64-long-branch.s @@ -0,0 +1,121 @@ +# REQUIRES: ppc + +# RUN: llvm-mc -filetype=obj -triple=powerpc64le-unknown-linux %s -o %t.o +# RUN: llvm-mc -filetype=obj -triple=powerpc64le-unknown-linux %p/Inputs/ppc64-func-global-entry.s -o %t2.o +# RUN: ld.lld -shared %t2.o -o %t3.so +# RUN: ld.lld --no-toc-optimize %t.o %t3.so -o %t +# RUN: llvm-objdump -d -start-address=0x10010000 -stop-address=0x10010018 %t | FileCheck %s -check-prefix=CALLEE_DUMP +# RUN: llvm-objdump -d -start-address=0x12010020 -stop-address=0x12010084 %t | FileCheck %s -check-prefix=CALLER_DUMP +# RUN: llvm-objdump -D -start-address=0x12020008 -stop-address=0x12020010 %t | FileCheck %s -check-prefix=BRANCH_LT_LE +# RUN: llvm-readelf --sections %t | FileCheck %s -check-prefix=SECTIONS + +# RUN: llvm-mc -filetype=obj -triple=powerpc64-unknown-linux %s -o %t.o +# RUN: llvm-mc -filetype=obj -triple=powerpc64-unknown-linux %p/Inputs/ppc64-func-global-entry.s -o %t2.o +# RUN: ld.lld -shared %t2.o -o %t3.so +# RUN: ld.lld --no-toc-optimize %t.o %t3.so -o %t +# RUN: llvm-objdump -d -start-address=0x10010000 -stop-address=0x10010018 %t | FileCheck %s -check-prefix=CALLEE_DUMP +# RUN: llvm-objdump -d -start-address=0x12010020 -stop-address=0x12010084 %t | FileCheck %s -check-prefix=CALLER_DUMP +# RUN: llvm-objdump -D -start-address=0x12020008 -stop-address=0x12020010 %t | FileCheck %s -check-prefix=BRANCH_LT_BE +# RUN: llvm-readelf --sections %t | FileCheck %s -check-prefix=SECTIONS + + .text + .abiversion 2 + .protected callee + .globl callee + .p2align 4 + .type callee,@function +callee: +.Lfunc_gep0: + addis 2, 12, .TOC.-.Lfunc_gep0@ha + addi 2, 2, .TOC.-.Lfunc_gep0@l +.Lfunc_lep0: + .localentry callee, .Lfunc_lep0-.Lfunc_gep0 + addis 4, 2, .LC0@toc@ha + ld 4, .LC0@toc@l(4) + lwz 3, 0(4) + blr + + .space 0x2000000 + + .protected _start + .global _start + .p2align 4 + .type _start,@function +_start: +.Lfunc_begin1: +.Lfunc_gep1: + addis 2, 12, .TOC.-.Lfunc_gep1@ha + addi 2, 2, .TOC.-.Lfunc_gep1@l +.Lfunc_lep1: + .localentry _start, .Lfunc_lep1-.Lfunc_gep1 + mflr 0 + std 0, 16(1) + stdu 1, -32(1) + bl callee + bl foo_external_diff + nop + addi 1, 1, 32 + ld 0, 16(1) + mtlr 0 + + addis 4, 2, .LC1@toc@ha + ld 4, .LC1@toc@l(4) + lwz 4, 0(4) + add 3, 3, 4 + blr + + + .section .toc,"aw",@progbits +.LC0: + .tc a[TC],a +.LC1: + .tc b[TC],b + + + .data + .type a,@object + .globl a + .p2align 2 +a: + .long 11 + .size a, 4 + + .type b,@object + .global b + .p2align 2 +b: + .long 33 + .size b, 4 + +# Verify address of the callee +# CALLEE_DUMP: callee: +# CALLEE_DUMP: 10010000: {{.*}} addis 2, 12, 515 +# CALLEE_DUMP: 10010004: {{.*}} addi 2, 2, -32544 +# CALLEE_DUMP: 10010008: {{.*}} addis 4, 2, 0 + +# Verify the address of _start, and the call to the long-branch thunk. +# CALLER_DUMP: _start: +# CALLER_DUMP: 12010020: {{.*}} addis 2, 12, 3 +# CALLER_DUMP: 12010038: {{.*}} bl .+56 + +# Verify the thunks contents: TOC-pointer + offset = .branch_lt[0] +# 0x120380e8 + (-2 << 16 + 32552) = 0x12020008 +# CALLER_DUMP: __long_branch_callee: +# CALLER_DUMP: 12010060: {{.*}} addis 12, 2, -2 +# CALLER_DUMP: 12010064: {{.*}} ld 12, 32552(12) +# CALLER_DUMP: 12010068: {{.*}} mtctr 12 +# CALLER_DUMP: 1201006c: {{.*}} bctr + +# BRANCH_LT_LE: Disassembly of section .branch_lt: +# BRANCH_LT_LE-NEXT: .branch_lt: +# BRANCH_LT_LE-NEXT: 12020008: 08 00 01 10 +# BRANCH_LT_LE-NEXT: 1202000c: 00 00 00 00 + +# BRANCH_LT_BE: Disassembly of section .branch_lt: +# BRANCH_LT_BE-NEXT: .branch_lt: +# BRANCH_LT_BE-NEXT: 12020008: 00 00 00 00 +# BRANCH_LT_BE-NEXT: 1202000c: 10 01 00 08 + +# [Nr] Name Type Address Off Size +# SECTIONS: [ 9] .branch_lt PROGBITS 0000000012020008 2020008 000008 +# SECTIONS: [11] .got PROGBITS 00000000120300e0 20300e0 000008 diff --git a/lld/test/ELF/ppc64-shared-long_branch.s b/lld/test/ELF/ppc64-shared-long_branch.s new file mode 100644 index 0000000..34d5314 --- /dev/null +++ b/lld/test/ELF/ppc64-shared-long_branch.s @@ -0,0 +1,114 @@ +# REQUIRES: ppc + +# RUN: llvm-mc -filetype=obj -triple=powerpc64le-unknown-linux %s -o %t.o +# RUN: ld.lld --no-toc-optimize -shared %t.o -o %t +# RUN: llvm-objdump -d -start-address=0x10000 -stop-address=0x10018 %t | FileCheck %s -check-prefix=CALLEE_DUMP +# RUN: llvm-objdump -d -start-address=0x2010020 -stop-address=0x2010070 %t | FileCheck %s -check-prefix=CALLER_DUMP +# RUN: llvm-readelf --sections %t | FileCheck %s -check-prefix=SECTIONS +# RUN: llvm-readelf --relocations %t | FileCheck %s -check-prefix=DYNRELOC + + +# _start calls protected function callee. Since callee is protected no plt stub +# is needed. The binary however has been padded out with space so that the call +# distance is further then a bl instrution can reach. + + .text + .abiversion 2 + .protected callee + .global callee + .p2align 4 + .type callee,@function +callee: +.Lfunc_gep0: + addis 2, 12, .TOC.-.Lfunc_gep0@ha + addi 2, 2, .TOC.-.Lfunc_gep0@l +.Lfunc_lep0: + .localentry callee, .Lfunc_lep0-.Lfunc_gep0 + addis 4, 2, .LC0@toc@ha + ld 4, .LC0@toc@l(4) + lwz 3, 0(4) + blr + + .space 0x2000000 + + .protected _start + .globl _start + .p2align 4 + .type _start,@function +_start: +.Lfunc_begin1: +.Lfunc_gep1: + addis 2, 12, .TOC.-.Lfunc_gep1@ha + addi 2, 2, .TOC.-.Lfunc_gep1@l +.Lfunc_lep1: + .localentry _start, .Lfunc_lep1-.Lfunc_gep1 + mflr 0 + std 0, 16(1) + stdu 1, -32(1) + bl callee + bl ext_callee + nop + addi 1, 1, 32 + ld 0, 16(1) + mtlr 0 + + addis 4, 2, .LC1@toc@ha + ld 4, .LC1@toc@l(4) + lwz 4, 0(4) + add 3, 3, 4 + blr + + + .section .toc,"aw",@progbits +.LC0: + .tc a[TC],a +.LC1: + .tc b[TC],b + + + .data + .type a,@object + .globl a + .p2align 2 +a: + .long 11 + .size a, 4 + + .type b,@object + .globl b + .p2align 2 +b: + .long 33 + .size b, 4 + +# Verify address of the callee +# CALLEE_DUMP: callee: +# CALLEE_DUMP: 10000: {{.*}} addis 2, 12, 515 +# CALLEE_DUMP: 10004: {{.*}} addi 2, 2, -32528 +# CALLEE_DUMP: 10008: {{.*}} addis 4, 2, 0 + +# Verify the address of _start, and the call to the long-branch thunk. +# CALLER_DUMP: _start: +# CALLER_DUMP: 2010020: {{.*}} addis 2, 12, 3 +# CALLER_DUMP: 2010038: {{.*}} bl .+56 + +# Verify the thunks contents: TOC-pointer + offset = .branch_lt[0] +# 0x20380F0 + 32552 = 0x2040018 +# CALLER_DUMP: __long_branch_callee: +# CALLER_DUMP: 2010060: {{.*}} addis 12, 2, 0 +# CALLER_DUMP: 2010064: {{.*}} ld 12, 32552(12) +# CALLER_DUMP: 2010068: {{.*}} mtctr 12 +# CALLER_DUMP: 201006c: {{.*}} bctr + +# .got section is at address 0x20300f0 so TOC pointer points to 0x20400F0. +# .plt section has a 2 entry header and a single entry for the long branch. +# [Nr] Name Type Address Off Size +# SECTIONS: [11] .got PROGBITS 00000000020300f0 20300f0 000008 +# SECTIONS: [13] .plt NOBITS 0000000002040000 2030108 000018 +# SECTIONS: [14] .branch_lt NOBITS 0000000002040018 2030108 000008 + +# There is a relative dynamic relocation for (.plt + 16 bytes), with a base +# address equal to callees local entry point (0x10000 + 8). +# DYNRELOC: Relocation section '.rela.dyn' at offset 0x{{[0-9a-f]+}} contains 3 entries: +# DYNRELOC: Offset Info Type Symbol's Value +# DYNRELOC: 0000000002040018 0000000000000016 R_PPC64_RELATIVE 10008 -- 2.7.4