From f60cb34c91de692eda4d4907bfd14f88913d9cfc Mon Sep 17 00:00:00 2001 From: Sean Fertile Date: Tue, 26 Jun 2018 19:38:18 +0000 Subject: [PATCH] [PPC64] Thread-local storage general-dynamic to initial-exec relaxation. Patch adds support for relaxing the general-dynamic tls sequence to initial-exec. the relaxation performs the following transformation: addis r3, r2, x@got@tlsgd@ha --> addis r3, r2, x@got@tprel@ha addi r3, r3, x@got@tlsgd@l --> ld r3, x@got@tprel@l(r3) bl __tls_get_addr(x@tlsgd) --> nop nop --> add r3, r3, r13 and instead of emitting a DTPMOD64/DTPREL64 pair for x, we emit a single R_PPC64_TPREL64. Differential Revision: https://reviews.llvm.org/D48090 llvm-svn: 335651 --- lld/ELF/Arch/PPC64.cpp | 53 +++++++++++++++++++++ lld/ELF/InputSection.cpp | 8 ++++ lld/ELF/Relocations.h | 1 + lld/test/ELF/ppc64-gd-to-ie.s | 104 ++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 166 insertions(+) create mode 100644 lld/test/ELF/ppc64-gd-to-ie.s diff --git a/lld/ELF/Arch/PPC64.cpp b/lld/ELF/Arch/PPC64.cpp index 4e30218..7a2cc60 100644 --- a/lld/ELF/Arch/PPC64.cpp +++ b/lld/ELF/Arch/PPC64.cpp @@ -51,6 +51,9 @@ public: void writeGotHeader(uint8_t *Buf) const override; bool needsThunk(RelExpr Expr, RelType Type, const InputFile *File, uint64_t BranchAddr, const Symbol &S) const override; + RelExpr adjustRelaxExpr(RelType Type, const uint8_t *Data, + RelExpr Expr) const override; + void relaxTlsGdToIe(uint8_t *Loc, RelType Type, uint64_t Val) const override; }; } // namespace @@ -214,6 +217,7 @@ RelExpr PPC64::getRelExpr(RelType Type, const Symbol &S, case R_PPC64_DTPREL64: return R_ABS; case R_PPC64_TLSGD: + return R_TLSDESC_CALL; case R_PPC64_TLSLD: case R_PPC64_TLS: return R_HINT; @@ -405,6 +409,55 @@ bool PPC64::needsThunk(RelExpr Expr, RelType Type, const InputFile *File, return Type == R_PPC64_REL24 && S.isInPlt(); } +RelExpr PPC64::adjustRelaxExpr(RelType Type, const uint8_t *Data, + RelExpr Expr) const { + if (Expr == R_RELAX_TLS_GD_TO_IE) + return R_RELAX_TLS_GD_TO_IE_GOT_OFF; + return Expr; +} + +// Reference: 3.7.4.1 of the 64-bit ELF V2 abi supplement. +// The general dynamic code sequence for a global `x` uses 4 instructions. +// Instruction Relocation Symbol +// addis r3, r2, x@got@tlsgd@ha R_PPC64_GOT_TLSGD16_HA x +// addi r3, r3, x@got@tlsgd@l R_PPC64_GOT_TLSGD16_LO x +// bl __tls_get_addr(x@tlsgd) R_PPC64_TLSGD x +// R_PPC64_REL24 __tls_get_addr +// nop None None +// +// Relaxing to initial-exec entails: +// 1) Convert the addis/addi pair that builds the address of the tls_index +// struct for 'x' to an addis/ld pair that loads an offset from a got-entry. +// 2) Convert the call to __tls_get_addr to a nop. +// 3) Convert the nop following the call to an add of the loaded offset to the +// thread pointer. +// Since the nop must directly follow the call, the R_PPC64_TLSGD relocation is +// used as the relaxation hint for both steps 2 and 3. +void PPC64::relaxTlsGdToIe(uint8_t *Loc, RelType Type, uint64_t Val) const { + switch (Type) { + case R_PPC64_GOT_TLSGD16_HA: + // This is relaxed from addis rT, r2, sym@got@tlsgd@ha to + // addis rT, r2, sym@got@tprel@ha. + relocateOne(Loc, R_PPC64_GOT_TPREL16_HA, Val); + return; + case R_PPC64_GOT_TLSGD16_LO: { + // Relax from addi r3, rA, sym@got@tlsgd@l to + // ld r3, sym@got@tprel@l(rA) + uint32_t EndianOffset = Config->EKind == ELF64BEKind ? 2U : 0U; + uint32_t InputRegister = (read32(Loc - EndianOffset) & (0x1f << 16)); + write32(Loc - EndianOffset, 0xE8600000 | InputRegister); + relocateOne(Loc, R_PPC64_GOT_TPREL16_LO_DS, Val); + return; + } + case R_PPC64_TLSGD: + write32(Loc, 0x60000000); // bl __tls_get_addr(sym@tlsgd) --> nop + write32(Loc + 4, 0x7c636A14); // nop --> add r3, r3, r13 + return; + default: + llvm_unreachable("unsupported relocation for TLS GD to IE relaxation"); + } +} + TargetInfo *elf::getPPC64TargetInfo() { static PPC64 Target; return &Target; diff --git a/lld/ELF/InputSection.cpp b/lld/ELF/InputSection.cpp index 33f4f25..aec9cd1 100644 --- a/lld/ELF/InputSection.cpp +++ b/lld/ELF/InputSection.cpp @@ -504,6 +504,7 @@ static uint64_t getRelocTargetVA(const InputFile *File, RelType Type, int64_t A, case R_RELAX_TLS_GD_TO_IE_END: return Sym.getGotOffset() + A - InX::Got->getSize(); case R_GOT_OFF: + case R_RELAX_TLS_GD_TO_IE_GOT_OFF: return Sym.getGotOffset() + A; case R_GOT_PAGE_PC: case R_RELAX_TLS_GD_TO_IE_PAGE_PC: @@ -773,11 +774,18 @@ void InputSectionBase::relocateAlloc(uint8_t *Buf, uint8_t *BufEnd) { break; case R_RELAX_TLS_GD_TO_IE: case R_RELAX_TLS_GD_TO_IE_ABS: + case R_RELAX_TLS_GD_TO_IE_GOT_OFF: case R_RELAX_TLS_GD_TO_IE_PAGE_PC: case R_RELAX_TLS_GD_TO_IE_END: Target->relaxTlsGdToIe(BufLoc, Type, TargetVA); break; case R_PPC_CALL: + // If this is a call to __tls_get_addr, it may be part of a TLS + // sequence that has been relaxed and turned into a nop. In this + // case, we don't want to handle it as a call. + if (read32(BufLoc) == 0x60000000) // nop + break; + // Patch a nop (0x60000000) to a ld. if (Rel.Sym->NeedsTocRestore) { if (BufLoc + 8 > BufEnd || read32(BufLoc + 4) != 0x60000000) { diff --git a/lld/ELF/Relocations.h b/lld/ELF/Relocations.h index 2a82a67..efedd72 100644 --- a/lld/ELF/Relocations.h +++ b/lld/ELF/Relocations.h @@ -67,6 +67,7 @@ enum RelExpr { R_RELAX_TLS_GD_TO_IE, R_RELAX_TLS_GD_TO_IE_ABS, R_RELAX_TLS_GD_TO_IE_END, + R_RELAX_TLS_GD_TO_IE_GOT_OFF, R_RELAX_TLS_GD_TO_IE_PAGE_PC, R_RELAX_TLS_GD_TO_LE, R_RELAX_TLS_GD_TO_LE_NEG, diff --git a/lld/test/ELF/ppc64-gd-to-ie.s b/lld/test/ELF/ppc64-gd-to-ie.s new file mode 100644 index 0000000..1a6cc5b --- /dev/null +++ b/lld/test/ELF/ppc64-gd-to-ie.s @@ -0,0 +1,104 @@ +# REQUIRES: ppc + +# RUN: llvm-mc -filetype=obj -triple=powerpc64le-unknown-linux %s -o %t.o +# RUN: llvm-mc -filetype=obj -triple=powerpc64le-unknown-linux %p/Inputs/ppc64-tls.s -o %t2.o +# RUN: ld.lld -shared %t2.o -o %t3.so +# RUN: ld.lld %t.o %t3.so -o %t +# RUN: llvm-objdump --section-headers %t | FileCheck --check-prefix=CheckGot %s +# RUN: llvm-objdump -D %t | FileCheck --check-prefix=Dis %s +# RUN: llvm-readelf -relocations --wide %t | FileCheck --check-prefix=OutputRelocs %s + +# RUN: llvm-mc -filetype=obj -triple=powerpc64-unknown-linux %s -o %t.o +# RUN: llvm-mc -filetype=obj -triple=powerpc64-unknown-linux %p/Inputs/ppc64-tls.s -o %t2.o +# RUN: ld.lld -shared %t2.o -o %t3.so +# RUN: ld.lld %t.o %t3.so -o %t +# RUN: llvm-objdump --section-headers %t | FileCheck --check-prefix=CheckGot %s +# RUN: llvm-objdump -D %t | FileCheck --check-prefix=Dis %s +# RUN: llvm-readelf -relocations --wide %t | FileCheck --check-prefix=OutputRelocs %s + + .text + .abiversion 2 + .globl _start + .p2align 4 + .type _start,@function +_start: +.Lfunc_gep0: + addis 2, 12, .TOC.-.Lfunc_gep0@ha + addi 2, 2, .TOC.-.Lfunc_gep0@l +.Lfunc_lep0: + .localentry _start, .Lfunc_lep0-.Lfunc_gep0 + mflr 0 + std 0, 16(1) + stdu 1, -32(1) + addis 3, 2, a@got@tlsgd@ha + addi 3, 3, a@got@tlsgd@l + bl __tls_get_addr(a@tlsgd) + nop + lwa 3, 0(3) + addi 1, 1, 32 + ld 0, 16(1) + mtlr 0 + blr + + + .globl other_reg + .p2align 4 + .type other_reg,@function +other_reg: +.Lfunc_gep1: + addis 2, 12, .TOC.-.Lfunc_gep1@ha + addi 2, 2, .TOC.-.Lfunc_gep1@l +.Lfunc_lep1: + .localentry other_reg, .Lfunc_lep1-.Lfunc_gep1 + mflr 0 + std 0, 16(1) + stdu 1, -32(1) + addis 5, 2, a@got@tlsgd@ha + addi 3, 5, a@got@tlsgd@l + bl __tls_get_addr(a@tlsgd) + nop + lwa 4, 0(3) + addis 30, 2, b@got@tlsgd@ha + addi 3, 30, b@got@tlsgd@l + bl __tls_get_addr(b@tlsgd) + nop + lwa 3, 0(3) + add 3, 4, 3 + addi 1, 1, 32 + ld 0, 16(1) + mtlr 0 + blr + + .globl __tls_get_addr + .type __tls_get_addr,@function +__tls_get_addr: + + +# CheckGot: .got 00000018 00000000100200c0 DATA +# .got is at 0x100200c0 so the toc-base is 100280c0. +# `a` is at .got[1], we expect the offsets to be: +# Ha(a) = ((0x100200c8 - 0x100280c0) + 0x8000) >> 16 = 0 +# Lo(a) = (0x100200c8 - 0x100280c0) = -32760 + +# Dis-LABEL: _start +# Dis: addis 3, 2, 0 +# Dis-NEXT: ld 3, -32760(3) +# Dis-NEXT: nop +# Dis-NEXT: add 3, 3, 13 + +# Dis-LABEL: other_reg +# Dis: addis 5, 2, 0 +# Dis-NEXT: ld 3, -32760(5) +# Dis-NEXT: nop +# Dis-NEXT: add 3, 3, 13 +# Dis: addis 30, 2, 0 +# Dis: ld 3, -32752(30) +# Dis-NEXT: nop +# Dis-NEXT: add 3, 3, 13 + +# Verify that the only dynamic relocations we emit are TPREL ones rather then +# the DTPMOD64/DTPREL64 pair for general-dynamic. +# OutputRelocs: Relocation section '.rela.dyn' at offset 0x{{[0-9a-f]+}} contains 2 entries: +# OutputRelocs-NEXT: Offset Info Type Symbol's Value Symbol's Name + Addend +# OutputRelocs-NEXT: {{[0-9a-f]+}} {{[0-9a-f]+}} R_PPC64_TPREL64 {{[0-9a-f]+}} a + 0 +# OutputRelocs-NEXT: {{[0-9a-f]+}} {{[0-9a-f]+}} R_PPC64_TPREL64 {{[0-9a-f]+}} b + 0 -- 2.7.4