From cddb0dbcefd83109eee99a3bb1d8aa72629cf21d Mon Sep 17 00:00:00 2001 From: Nemanja Ivanovic Date: Mon, 17 Aug 2020 09:30:14 -0500 Subject: [PATCH] [LLD][PowerPC] Implement GOT to PC-Rel relaxation This patch implements the handling for the R_PPC64_PCREL_OPT relocation as well as the GOT relocation for the associated R_PPC64_GOT_PCREL34 relocation. On Power10 targets with PC-Relative addressing, the linker can relax GOT-relative accesses to PC-Relative under some conditions. Since the sequence consists of a prefixed load, followed by a non-prefixed access (load or store), the linker needs to replace the first instruction (as the replacement instruction will be prefixed). The compiler communicates to the linker that this optimization is safe by placing the two aforementioned relocations on the GOT load (of the address). The linker then does two things: - Convert the load from the got into a PC-Relative add to compute the address relative to the PC - Find the instruction referred to by the second relocation (R_PPC64_PCREL_OPT) and replace the first with the PC-Relative version of it It is important to synchronize the mapping from legacy memory instructions to their PC-Relative form. Hence, this patch adds a file to be included by both the compiler and the linker so they're always in agreement. Differential revision: https://reviews.llvm.org/D84360 --- lld/ELF/Arch/PPC64.cpp | 231 ++++++++++++ lld/ELF/Arch/PPCInsns.def | 27 ++ lld/ELF/Config.h | 1 + lld/ELF/Driver.cpp | 5 + lld/ELF/InputSection.cpp | 16 + lld/ELF/Options.td | 4 + lld/ELF/Relocations.cpp | 2 +- lld/ELF/Relocations.h | 1 + .../ELF/Inputs/ppc64-got-to-pcrel-relaxation-def.s | 66 ++++ lld/test/ELF/ppc64-got-to-pcrel-relaxation.s | 392 +++++++++++++++++++++ 10 files changed, 744 insertions(+), 1 deletion(-) create mode 100644 lld/ELF/Arch/PPCInsns.def create mode 100644 lld/test/ELF/Inputs/ppc64-got-to-pcrel-relaxation-def.s create mode 100644 lld/test/ELF/ppc64-got-to-pcrel-relaxation.s diff --git a/lld/ELF/Arch/PPC64.cpp b/lld/ELF/Arch/PPC64.cpp index c1ad72e..cfb3ca9 100644 --- a/lld/ELF/Arch/PPC64.cpp +++ b/lld/ELF/Arch/PPC64.cpp @@ -62,6 +62,90 @@ enum DFormOpcd { ADDI = 14 }; +enum class PPCLegacyInsn : uint32_t { + NOINSN = 0, + // Loads. + LBZ = 0x88000000, + LHZ = 0xa0000000, + LWZ = 0x80000000, + LHA = 0xa8000000, + LWA = 0xe8000002, + LD = 0xe8000000, + LFS = 0xC0000000, + LXSSP = 0xe4000003, + LFD = 0xc8000000, + LXSD = 0xe4000002, + LXV = 0xf4000001, + LXVP = 0x18000000, + + // Stores. + STB = 0x98000000, + STH = 0xb0000000, + STW = 0x90000000, + STD = 0xf8000000, + STFS = 0xd0000000, + STXSSP = 0xf4000003, + STFD = 0xd8000000, + STXSD = 0xf4000002, + STXV = 0xf4000005, + STXVP = 0x18000001 +}; +enum class PPCPrefixedInsn : uint64_t { + NOINSN = 0, + PREFIX_MLS = 0x0610000000000000, + PREFIX_8LS = 0x0410000000000000, + + // Loads. + PLBZ = PREFIX_MLS, + PLHZ = PREFIX_MLS, + PLWZ = PREFIX_MLS, + PLHA = PREFIX_MLS, + PLWA = PREFIX_8LS | 0xa4000000, + PLD = PREFIX_8LS | 0xe4000000, + PLFS = PREFIX_MLS, + PLXSSP = PREFIX_8LS | 0xac000000, + PLFD = PREFIX_MLS, + PLXSD = PREFIX_8LS | 0xa8000000, + PLXV = PREFIX_8LS | 0xc8000000, + PLXVP = PREFIX_8LS | 0xe8000000, + + // Stores. + PSTB = PREFIX_MLS, + PSTH = PREFIX_MLS, + PSTW = PREFIX_MLS, + PSTD = PREFIX_8LS | 0xf4000000, + PSTFS = PREFIX_MLS, + PSTXSSP = PREFIX_8LS | 0xbc000000, + PSTFD = PREFIX_MLS, + PSTXSD = PREFIX_8LS | 0xb8000000, + PSTXV = PREFIX_8LS | 0xd8000000, + PSTXVP = PREFIX_8LS | 0xf8000000 +}; +static bool checkPPCLegacyInsn(uint32_t encoding) { + PPCLegacyInsn insn = static_cast(encoding); + if (insn == PPCLegacyInsn::NOINSN) + return false; +#define PCREL_OPT(Legacy, PCRel, InsnMask) \ + if (insn == PPCLegacyInsn::Legacy) \ + return true; +#include "PPCInsns.def" +#undef PCREL_OPT + return false; +} + +// Masks to apply to legacy instructions when converting them to prefixed, +// pc-relative versions. For the most part, the primary opcode is shared +// between the legacy instruction and the suffix of its prefixed version. +// However, there are some instances where that isn't the case (DS-Form and +// DQ-form instructions). +enum class LegacyToPrefixMask : uint64_t { + NOMASK = 0x0, + OPC_AND_RST = 0xffe00000, // Primary opc (0-5) and R[ST] (6-10). + ONLY_RST = 0x3e00000, // [RS]T (6-10). + ST_STX28_TO5 = + 0x8000000003e00000, // S/T (6-10) - The [S/T]X bit moves from 28 to 5. +}; + uint64_t elf::getPPC64TocBase() { // The TOC consists of sections .got, .toc, .tocbss, .plt in that order. The // TOC starts where the first of these sections starts. We always create a @@ -333,6 +417,7 @@ static bool isDQFormInstruction(uint32_t encoding) { switch (getPrimaryOpCode(encoding)) { default: return false; + case 6: // Power10 paired loads/stores (lxvp, stxvp). case 56: // The only instruction with a primary opcode of 56 is `lq`. return true; @@ -344,6 +429,78 @@ static bool isDQFormInstruction(uint32_t encoding) { } } +static bool isDSFormInstruction(PPCLegacyInsn insn) { + switch (insn) { + default: + return false; + case PPCLegacyInsn::LWA: + case PPCLegacyInsn::LD: + case PPCLegacyInsn::LXSD: + case PPCLegacyInsn::LXSSP: + case PPCLegacyInsn::STD: + case PPCLegacyInsn::STXSD: + case PPCLegacyInsn::STXSSP: + return true; + } +} + +static PPCLegacyInsn getPPCLegacyInsn(uint32_t encoding) { + uint32_t opc = encoding & 0xfc000000; + + // If the primary opcode is shared between multiple instructions, we need to + // fix it up to match the actual instruction we are after. + if ((opc == 0xe4000000 || opc == 0xe8000000 || opc == 0xf4000000 || + opc == 0xf8000000) && + !isDQFormInstruction(encoding)) + opc = encoding & 0xfc000003; + else if (opc == 0xf4000000) + opc = encoding & 0xfc000007; + else if (opc == 0x18000000) + opc = encoding & 0xfc00000f; + + // If the value is not one of the enumerators in PPCLegacyInsn, we want to + // return PPCLegacyInsn::NOINSN. + if (!checkPPCLegacyInsn(opc)) + return PPCLegacyInsn::NOINSN; + return static_cast(opc); +} + +static PPCPrefixedInsn getPCRelativeForm(PPCLegacyInsn insn) { + switch (insn) { +#define PCREL_OPT(Legacy, PCRel, InsnMask) \ + case PPCLegacyInsn::Legacy: \ + return PPCPrefixedInsn::PCRel +#include "PPCInsns.def" +#undef PCREL_OPT + } + return PPCPrefixedInsn::NOINSN; +} + +static LegacyToPrefixMask getInsnMask(PPCLegacyInsn insn) { + switch (insn) { +#define PCREL_OPT(Legacy, PCRel, InsnMask) \ + case PPCLegacyInsn::Legacy: \ + return LegacyToPrefixMask::InsnMask +#include "PPCInsns.def" +#undef PCREL_OPT + } + return LegacyToPrefixMask::NOMASK; +} +static uint64_t getPCRelativeForm(uint32_t encoding) { + PPCLegacyInsn origInsn = getPPCLegacyInsn(encoding); + PPCPrefixedInsn pcrelInsn = getPCRelativeForm(origInsn); + if (pcrelInsn == PPCPrefixedInsn::NOINSN) + return UINT64_C(-1); + LegacyToPrefixMask origInsnMask = getInsnMask(origInsn); + uint64_t pcrelEncoding = + (uint64_t)pcrelInsn | (encoding & (uint64_t)origInsnMask); + + // If the mask requires moving bit 28 to bit 5, do that now. + if (origInsnMask == LegacyToPrefixMask::ST_STX28_TO5) + pcrelEncoding |= (encoding & 0x8) << 23; + return pcrelEncoding; +} + static bool isInstructionUpdateForm(uint32_t encoding) { switch (getPrimaryOpCode(encoding)) { default: @@ -368,6 +525,25 @@ static bool isInstructionUpdateForm(uint32_t encoding) { } } +// Compute the total displacement between the prefixed instruction that gets +// to the start of the data and the load/store instruction that has the offset +// into the data structure. +// For example: +// paddi 3, 0, 1000, 1 +// lwz 3, 20(3) +// Should add up to 1020 for total displacement. +static int64_t getTotalDisp(uint64_t prefixedInsn, uint32_t accessInsn) { + int64_t disp34 = llvm::SignExtend64( + ((prefixedInsn & 0x3ffff00000000) >> 16) | (prefixedInsn & 0xffff), 34); + int32_t disp16 = llvm::SignExtend32(accessInsn & 0xffff, 16); + // For DS and DQ form instructions, we need to mask out the XO bits. + if (isDQFormInstruction(accessInsn)) + disp16 &= ~0xf; + else if (isDSFormInstruction(getPPCLegacyInsn(accessInsn))) + disp16 &= ~0x3; + return disp34 + disp16; +} + // There are a number of places when we either want to read or write an // instruction when handling a half16 relocation type. On big-endian the buffer // pointer is pointing into the middle of the word we want to extract, and on @@ -475,6 +651,49 @@ void PPC64::relaxGot(uint8_t *loc, const Relocation &rel, uint64_t val) const { relocateNoSym(loc, R_PPC64_TOC16_LO, val); break; } + case R_PPC64_GOT_PCREL34: { + // Clear the first 8 bits of the prefix and the first 6 bits of the + // instruction (the primary opcode). + uint64_t insn = readPrefixedInstruction(loc); + if ((insn & 0xfc000000) != 0xe4000000) + error("expected a 'pld' for got-indirect to pc-relative relaxing"); + insn &= ~0xff000000fc000000; + + // Replace the cleared bits with the values for PADDI (0x600000038000000); + insn |= 0x600000038000000; + writePrefixedInstruction(loc, insn); + relocate(loc, rel, val); + break; + } + case R_PPC64_PCREL_OPT: { + // We can only relax this if the R_PPC64_GOT_PCREL34 at this offset can + // be relaxed. The eligibility for the relaxation needs to be determined + // on that relocation since this one does not relocate a symbol. + uint64_t insn = readPrefixedInstruction(loc); + uint32_t accessInsn = read32(loc + rel.addend); + uint64_t pcRelInsn = getPCRelativeForm(accessInsn); + + // This error is not necessary for correctness but is emitted for now + // to ensure we don't miss these opportunities in real code. It can be + // removed at a later date. + if (pcRelInsn == UINT64_C(-1)) { + errorOrWarn( + "unrecognized instruction for R_PPC64_PCREL_OPT relaxation: 0x" + + Twine::utohexstr(accessInsn)); + break; + } + + int64_t totalDisp = getTotalDisp(insn, accessInsn); + if (!isInt<34>(totalDisp)) + break; // Displacement doesn't fit. + // Convert the PADDI to the prefixed version of accessInsn and convert + // accessInsn to a nop. + writePrefixedInstruction(loc, pcRelInsn | + ((totalDisp & 0x3ffff0000) << 16) | + (totalDisp & 0xffff)); + write32(loc + rel.addend, 0x60000000); // nop accessInsn. + break; + } default: llvm_unreachable("unexpected relocation type"); } @@ -668,6 +887,7 @@ RelExpr PPC64::getRelExpr(RelType type, const Symbol &s, case R_PPC64_TOC16_LO: return R_GOTREL; case R_PPC64_GOT_PCREL34: + case R_PPC64_PCREL_OPT: return R_GOT_PC; case R_PPC64_TOC16_HA: case R_PPC64_TOC16_LO_DS: @@ -1024,6 +1244,9 @@ void PPC64::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const { (val & si1Mask)); break; } + // If we encounter a PCREL_OPT relocation that we won't optimize. + case R_PPC64_PCREL_OPT: + break; default: llvm_unreachable("unknown relocation"); } @@ -1080,6 +1303,14 @@ bool PPC64::inBranchRange(RelType type, uint64_t src, uint64_t dst) const { RelExpr PPC64::adjustRelaxExpr(RelType type, const uint8_t *data, RelExpr expr) const { + if ((type == R_PPC64_GOT_PCREL34 || type == R_PPC64_PCREL_OPT) && + config->pcRelOptimize) { + // It only makes sense to optimize pld since paddi means that the address + // of the object in the GOT is required rather than the object itself. + assert(data && "Expecting an instruction encoding here"); + if ((readPrefixedInstruction(data) & 0xfc000000) == 0xe4000000) + return R_PPC64_RELAX_GOT_PC; + } if (expr == R_RELAX_TLS_GD_TO_IE) return R_RELAX_TLS_GD_TO_IE_GOT_OFF; if (expr == R_RELAX_TLS_LD_TO_LE) diff --git a/lld/ELF/Arch/PPCInsns.def b/lld/ELF/Arch/PPCInsns.def new file mode 100644 index 0000000..1baa8fd --- /dev/null +++ b/lld/ELF/Arch/PPCInsns.def @@ -0,0 +1,27 @@ +#ifndef PCREL_OPT +#error "Need to define function-style macro PCREL_OPT" +#endif +PCREL_OPT(NOINSN, NOINSN, NOMASK); +PCREL_OPT(LBZ, PLBZ, OPC_AND_RST); +PCREL_OPT(LHZ, PLHZ, OPC_AND_RST); +PCREL_OPT(LWZ, PLWZ, OPC_AND_RST); +PCREL_OPT(LHA, PLHA, OPC_AND_RST); +PCREL_OPT(LWA, PLWA, ONLY_RST); +PCREL_OPT(LD, PLD , ONLY_RST); +PCREL_OPT(LFS, PLFS, OPC_AND_RST); +PCREL_OPT(LXSSP, PLXSSP, ONLY_RST); +PCREL_OPT(LFD, PLFD, OPC_AND_RST); +PCREL_OPT(LXSD, PLXSD, ONLY_RST); +PCREL_OPT(LXV, PLXV, ST_STX28_TO5); +PCREL_OPT(LXVP, PLXVP, OPC_AND_RST); + +PCREL_OPT(STB, PSTB, OPC_AND_RST); +PCREL_OPT(STH, PSTH, OPC_AND_RST); +PCREL_OPT(STW, PSTW, OPC_AND_RST); +PCREL_OPT(STD, PSTD, ONLY_RST); +PCREL_OPT(STFS, PSTFS, OPC_AND_RST); +PCREL_OPT(STXSSP, PSTXSSP, ONLY_RST); +PCREL_OPT(STFD, PSTFD, OPC_AND_RST); +PCREL_OPT(STXSD, PSTXSD, ONLY_RST); +PCREL_OPT(STXV, PSTXV, ST_STX28_TO5); +PCREL_OPT(STXVP, PSTXVP, OPC_AND_RST); diff --git a/lld/ELF/Config.h b/lld/ELF/Config.h index 1afeee0..f043d1d 100644 --- a/lld/ELF/Config.h +++ b/lld/ELF/Config.h @@ -206,6 +206,7 @@ struct Configuration { bool thinLTOIndexOnly; bool timeTraceEnabled; bool tocOptimize; + bool pcRelOptimize; bool undefinedVersion; bool unique; bool useAndroidRelrTags = false; diff --git a/lld/ELF/Driver.cpp b/lld/ELF/Driver.cpp index 12c8e5d..34f2cd6 100644 --- a/lld/ELF/Driver.cpp +++ b/lld/ELF/Driver.cpp @@ -309,6 +309,9 @@ static void checkOptions() { if (config->tocOptimize && config->emachine != EM_PPC64) error("--toc-optimize is only supported on the PowerPC64 target"); + if (config->pcRelOptimize && config->emachine != EM_PPC64) + error("--pcrel--optimize is only supported on the PowerPC64 target"); + if (config->pie && config->shared) error("-shared and -pie may not be used together"); @@ -1288,6 +1291,8 @@ static void setConfigs(opt::InputArgList &args) { config->tocOptimize = args.hasFlag(OPT_toc_optimize, OPT_no_toc_optimize, m == EM_PPC64); + config->pcRelOptimize = + args.hasFlag(OPT_pcrel_optimize, OPT_no_pcrel_optimize, m == EM_PPC64); } // Returns a value of "-format" option. diff --git a/lld/ELF/InputSection.cpp b/lld/ELF/InputSection.cpp index 6440e87..914c4e0 100644 --- a/lld/ELF/InputSection.cpp +++ b/lld/ELF/InputSection.cpp @@ -807,6 +807,7 @@ uint64_t InputSectionBase::getRelocTargetVA(const InputFile *file, RelType type, case R_PPC64_TOCBASE: return getPPC64TocBase() + a; case R_RELAX_GOT_PC: + case R_PPC64_RELAX_GOT_PC: return sym.getVA(a) - p; case R_RELAX_TLS_GD_TO_LE: case R_RELAX_TLS_IE_TO_LE: @@ -1004,6 +1005,7 @@ void InputSectionBase::relocate(uint8_t *buf, uint8_t *bufEnd) { void InputSectionBase::relocateAlloc(uint8_t *buf, uint8_t *bufEnd) { assert(flags & SHF_ALLOC); const unsigned bits = config->wordsize * 8; + uint64_t lastPPCRelaxedRelocOff = UINT64_C(-1); for (const Relocation &rel : relocations) { if (rel.expr == R_NONE) @@ -1025,6 +1027,20 @@ void InputSectionBase::relocateAlloc(uint8_t *buf, uint8_t *bufEnd) { case R_RELAX_GOT_PC_NOPIC: target->relaxGot(bufLoc, rel, targetVA); break; + case R_PPC64_RELAX_GOT_PC: { + // The R_PPC64_PCREL_OPT relocation must appear immediately after + // R_PPC64_GOT_PCREL34 in the relocations table at the same offset. + // We can only relax R_PPC64_PCREL_OPT if we have also relaxed + // the associated R_PPC64_GOT_PCREL34 since only the latter has an + // associated symbol. So save the offset when relaxing R_PPC64_GOT_PCREL34 + // and only relax the other if the saved offset matches. + if (type == R_PPC64_GOT_PCREL34) + lastPPCRelaxedRelocOff = offset; + if (type == R_PPC64_PCREL_OPT && offset != lastPPCRelaxedRelocOff) + break; + target->relaxGot(bufLoc, rel, targetVA); + break; + } case R_PPC64_RELAX_TOC: // rel.sym refers to the STT_SECTION symbol associated to the .toc input // section. If an R_PPC64_TOC16_LO (.toc + addend) references the TOC diff --git a/lld/ELF/Options.td b/lld/ELF/Options.td index c3cadaf..5563a95 100644 --- a/lld/ELF/Options.td +++ b/lld/ELF/Options.td @@ -404,6 +404,10 @@ defm toc_optimize : B<"toc-optimize", "(PowerPC64) Enable TOC related optimizations (default)", "(PowerPC64) Disable TOC related optimizations">; +defm pcrel_optimize : B<"pcrel-optimize", + "(PowerPC64) Enable PC-relative optimizations (default)", + "(PowerPC64) Disable PC-relative optimizations">; + def trace: F<"trace">, HelpText<"Print the names of the input files">; defm trace_symbol: Eq<"trace-symbol", "Trace references to symbols">; diff --git a/lld/ELF/Relocations.cpp b/lld/ELF/Relocations.cpp index 751ded3..7fc9b49 100644 --- a/lld/ELF/Relocations.cpp +++ b/lld/ELF/Relocations.cpp @@ -376,7 +376,7 @@ static bool needsGot(RelExpr expr) { static bool isRelExpr(RelExpr expr) { return oneof(expr); + R_RISCV_PC_INDIRECT, R_PPC64_RELAX_GOT_PC>(expr); } // Returns true if a given relocation can be computed at link-time. diff --git a/lld/ELF/Relocations.h b/lld/ELF/Relocations.h index ec59c63..4f48082 100644 --- a/lld/ELF/Relocations.h +++ b/lld/ELF/Relocations.h @@ -96,6 +96,7 @@ enum RelExpr { R_PPC64_CALL_PLT, R_PPC64_RELAX_TOC, R_PPC64_TOCBASE, + R_PPC64_RELAX_GOT_PC, R_RISCV_ADD, R_RISCV_PC_INDIRECT, }; diff --git a/lld/test/ELF/Inputs/ppc64-got-to-pcrel-relaxation-def.s b/lld/test/ELF/Inputs/ppc64-got-to-pcrel-relaxation-def.s new file mode 100644 index 0000000..c7936bf --- /dev/null +++ b/lld/test/ELF/Inputs/ppc64-got-to-pcrel-relaxation-def.s @@ -0,0 +1,66 @@ + .section ".text" + .comm storeVal_vector,8,8 + .comm useVal_vector,8,8 + .globl storeVal_longlong, useAddr_longlong, useVal_longlong, storeVal_sshort + .globl useAddr_sshort, useVal_sshort, storeVal_sint, useAddr_sint, useVal_sint + .globl storeVal_double, useAddr_double, useVal_double, storeVal_float + .globl useAddr_float, useVal_float, storeVal_uint, storeVal_uint + .globl useVal_uint, storeVal_ushort, useAddr_ushort, useVal_ushort + .globl storeVal, useAddr, useVal + .section ".data" + .align 3 + .type storeVal_longlong, @object + .size storeVal_longlong, 8 +storeVal_longlong: + .quad 18 +useAddr_longlong: + .quad 17 +useVal_longlong: + .quad 16 +storeVal_sshort: + .short -15 +useAddr_sshort: + .short -14 +useVal_sshort: + .short -13 + .zero 2 +storeVal_sint: + .long -12 +useAddr_sint: + .long -11 +useVal_sint: + .long -10 + .zero 4 +storeVal_double: + .long 858993459 + .long 1076966195 +useAddr_double: + .long -1717986918 + .long -1070589543 +useVal_double: + .long 0 + .long 1076756480 +storeVal_float: + .long 1045220557 +useAddr_float: + .long -1050568294 +useVal_float: + .long 1095761920 +storeVal_uint: + .long 12 +useAddr_uint: + .long 11 +useVal_uint: + .long 10 +storeVal_ushort: + .short 1 +useAddr_ushort: + .short 10 +useVal_ushort: + .short 5 +storeVal: + .byte -1 +useAddr: + .byte 10 +useVal: + .byte 5 diff --git a/lld/test/ELF/ppc64-got-to-pcrel-relaxation.s b/lld/test/ELF/ppc64-got-to-pcrel-relaxation.s new file mode 100644 index 0000000..37fe65c --- /dev/null +++ b/lld/test/ELF/ppc64-got-to-pcrel-relaxation.s @@ -0,0 +1,392 @@ +# REQUIRES: ppc +# RUN: llvm-mc -filetype=obj -triple=powerpc64le %s -o %t1.o +# RUN: llvm-mc -filetype=obj -triple=powerpc64le %p/Inputs/ppc64-got-to-pcrel-relaxation-def.s -o %t2.o +# RUN: ld.lld --shared %t2.o -o %t2.so --soname=t2 +# RUN: ld.lld %t1.o %t2.o -o %t +# RUN: ld.lld %t1.o %t2.so -o %ts +# RUN: ld.lld %t1.o %t2.o -o %tn --no-pcrel-optimize +# RUN: llvm-objdump -d --no-show-raw-insn --mcpu=pwr10 %t | FileCheck %s --check-prefix=CHECK-S +# RUN: llvm-objdump -d --no-show-raw-insn --mcpu=pwr10 %ts | FileCheck %s --check-prefix=CHECK-D +# RUN: llvm-objdump -d --no-show-raw-insn --mcpu=pwr10 %tn | FileCheck %s --check-prefix=CHECK-D + +# RUN: llvm-mc -filetype=obj -triple=powerpc64 %s -o %t1.o +# RUN: llvm-mc -filetype=obj -triple=powerpc64 %p/Inputs/ppc64-got-to-pcrel-relaxation-def.s -o %t2.o +# RUN: ld.lld --shared %t2.o -o %t2.so --soname=t2 +# RUN: ld.lld %t1.o %t2.o -o %t +# RUN: ld.lld %t1.o %t2.so -o %ts +# RUN: ld.lld %t1.o %t2.o -o %tn --no-pcrel-optimize +# RUN: llvm-objdump -d --no-show-raw-insn --mcpu=pwr10 %t | FileCheck %s --check-prefix=CHECK-S +# RUN: llvm-objdump -d --no-show-raw-insn --mcpu=pwr10 %ts | FileCheck %s --check-prefix=CHECK-D +# RUN: llvm-objdump -d --no-show-raw-insn --mcpu=pwr10 %tn | FileCheck %s --check-prefix=CHECK-D + +# CHECK-S-LABEL: : +# CHECK-S-NEXT: plbz 10 +# CHECK-S-NEXT: paddi 9 +# CHECK-S-NEXT: li 3, 0 +# CHECK-S-NEXT: nop +# CHECK-S-NEXT: rldicl 9, 9, 9, 60 +# CHECK-S-NEXT: add 9, 9, 10 +# CHECK-S-NEXT: pstb 9 +# CHECK-S-NEXT: nop +# CHECK-S-NEXT: blr + +# CHECK-D-LABEL: : +# CHECK-D-NEXT: pld 8 +# CHECK-D-NEXT: pld 9 +# CHECK-D-NEXT: li 3, 0 +# CHECK-D-NEXT: lbz 10, 0(8) +# CHECK-D-NEXT: rldicl 9, 9, 9, 60 +# CHECK-D-NEXT: add 9, 9, 10 +# CHECK-D-NEXT: pld 10 +# CHECK-D-NEXT: stb 9, 0(10) +# CHECK-D-NEXT: blr +check_LBZ_STB: + pld 8,useVal@got@pcrel(0),1 +.Lpcrel1: + pld 9,useAddr@got@pcrel(0),1 + li 3,0 + .reloc .Lpcrel1-8,R_PPC64_PCREL_OPT,.-(.Lpcrel1-8) + lbz 10,0(8) + rldicl 9,9,9,60 + add 9,9,10 + pld 10,storeVal@got@pcrel(0),1 +.Lpcrel2: + .reloc .Lpcrel2-8,R_PPC64_PCREL_OPT,.-(.Lpcrel2-8) + stb 9,0(10) + blr + +# CHECK-S-LABEL: : +# CHECK-S-NEXT: plhz 3 +# CHECK-S-NEXT: nop +# CHECK-S-NEXT: nop +# CHECK-S-NEXT: psth 3 +# CHECK-S-NEXT: nop +# CHECK-S-NEXT: blr + +# CHECK-D-LABEL: : +# CHECK-D-NEXT: pld 9 +# CHECK-D-NEXT: lhz 3, 0(9) +# CHECK-D-NEXT: nop +# CHECK-D-NEXT: pld 9 +# CHECK-D-NEXT: sth 3, 0(9) +# CHECK-D-NEXT: blr +check_LHZ_STH: + pld 9,useVal_ushort@got@pcrel(0),1 +.Lpcrel3: + .reloc .Lpcrel3-8,R_PPC64_PCREL_OPT,.-(.Lpcrel3-8) + lhz 3,0(9) + pld 9,storeVal_ushort@got@pcrel(0),1 +.Lpcrel4: + .reloc .Lpcrel4-8,R_PPC64_PCREL_OPT,.-(.Lpcrel4-8) + sth 3,0(9) + blr + +# CHECK-S-LABEL: : +# CHECK-S-NEXT: plwz 3 +# CHECK-S-NEXT: nop +# CHECK-S-NEXT: pstw 3 +# CHECK-S-NEXT: nop +# CHECK-S-NEXT: blr + +# CHECK-D-LABEL: : +# CHECK-D-NEXT: pld 9 +# CHECK-D-NEXT: lwz 3, 0(9) +# CHECK-D-NEXT: pld 9 +# CHECK-D-NEXT: stw 3, 0(9) +# CHECK-D-NEXT: blr +check_LWZ_STW: + pld 9,useVal_uint@got@pcrel(0),1 +.Lpcrel5: + .reloc .Lpcrel5-8,R_PPC64_PCREL_OPT,.-(.Lpcrel5-8) + lwz 3,0(9) + pld 9,storeVal_uint@got@pcrel(0),1 +.Lpcrel6: + .reloc .Lpcrel6-8,R_PPC64_PCREL_OPT,.-(.Lpcrel6-8) + stw 3,0(9) + blr + +# CHECK-S-LABEL: : +# CHECK-S-NEXT: plfs 1 +# CHECK-S-NEXT: nop +# CHECK-S-NEXT: pstfs 1 +# CHECK-S-NEXT: nop +# CHECK-S-NEXT: blr + +# CHECK-D-LABEL: : +# CHECK-D-NEXT: pld 9 +# CHECK-D-NEXT: lfs 1, 0(9) +# CHECK-D-NEXT: pld 9 +# CHECK-D-NEXT: stfs 1, 0(9) +# CHECK-D-NEXT: blr +check_LFS_STFS: + pld 9,useVal_float@got@pcrel(0),1 +.Lpcrel7: + .reloc .Lpcrel7-8,R_PPC64_PCREL_OPT,.-(.Lpcrel7-8) + lfs 1,0(9) + pld 9,storeVal_float@got@pcrel(0),1 +.Lpcrel8: + .reloc .Lpcrel8-8,R_PPC64_PCREL_OPT,.-(.Lpcrel8-8) + stfs 1,0(9) + blr + +# CHECK-S-LABEL: : +# CHECK-S-NEXT: plfd 1 +# CHECK-S-NEXT: nop +# CHECK-S-NEXT: pstfd 1 +# CHECK-S-NEXT: nop +# CHECK-S-NEXT: blr + +# CHECK-D-LABEL: : +# CHECK-D-NEXT: pld 9 +# CHECK-D-NEXT: lfd 1, 0(9) +# CHECK-D-NEXT: pld 9 +# CHECK-D-NEXT: stfd 1, 0(9) +# CHECK-D-NEXT: blr +check_LFD_STFD: + pld 9,useVal_double@got@pcrel(0),1 +.Lpcrel9: + .reloc .Lpcrel9-8,R_PPC64_PCREL_OPT,.-(.Lpcrel9-8) + lfd 1,0(9) + pld 9,storeVal_double@got@pcrel(0),1 +.Lpcrel10: + .reloc .Lpcrel10-8,R_PPC64_PCREL_OPT,.-(.Lpcrel10-8) + stfd 1,0(9) + blr + +# CHECK-S-LABEL: : +# CHECK-S-NEXT: mr 9, 3 +# CHECK-S-NEXT: plwa 3 +# CHECK-S-NEXT: pstw 9 +# CHECK-S-NEXT: nop +# CHECK-S-NEXT: nop +# CHECK-S-NEXT: blr + +# CHECK-D-LABEL: : +# CHECK-D-NEXT: mr 9, 3 +# CHECK-D-NEXT: pld 8 +# CHECK-D-NEXT: pld 10 +# CHECK-D-NEXT: lwa 3, 0(8) +# CHECK-D-NEXT: stw 9, 0(10) +# CHECK-D-NEXT: blr +check_LWA_STW: + mr 9,3 + pld 8,useVal_sint@got@pcrel(0),1 +.Lpcrel11: + pld 10,storeVal_sint@got@pcrel(0),1 +.Lpcrel12: + .reloc .Lpcrel11-8,R_PPC64_PCREL_OPT,.-(.Lpcrel11-8) + lwa 3,0(8) + .reloc .Lpcrel12-8,R_PPC64_PCREL_OPT,.-(.Lpcrel12-8) + stw 9,0(10) + blr + +# CHECK-S-LABEL: : +# CHECK-S-NEXT: mr 9, 3 +# CHECK-S-NEXT: plha 3 +# CHECK-S-NEXT: psth 9 +# CHECK-S-NEXT: nop +# CHECK-S-NEXT: nop +# CHECK-S-NEXT: blr + +# CHECK-D-LABEL: : +# CHECK-D-NEXT: mr 9, 3 +# CHECK-D-NEXT: pld 8 +# CHECK-D-NEXT: pld 10 +# CHECK-D-NEXT: lha 3, 0(8) +# CHECK-D-NEXT: sth 9, 0(10) +# CHECK-D-NEXT: blr +check_LHA_STH: + mr 9,3 + pld 8,useVal_sshort@got@pcrel(0),1 +.Lpcrel13: + pld 10,storeVal_sshort@got@pcrel(0),1 +.Lpcrel14: + .reloc .Lpcrel13-8,R_PPC64_PCREL_OPT,.-(.Lpcrel13-8) + lha 3,0(8) + .reloc .Lpcrel14-8,R_PPC64_PCREL_OPT,.-(.Lpcrel14-8) + sth 9,0(10) + blr + +# CHECK-S-LABEL: : +# CHECK-S-NEXT: pld 3 +# CHECK-S-NEXT: nop +# CHECK-S-NEXT: pstd 3 +# CHECK-S-NEXT: nop +# CHECK-S-NEXT: blr + +# CHECK-D-LABEL: : +# CHECK-D-NEXT: pld 9 +# CHECK-D-NEXT: ld 3, 0(9) +# CHECK-D-NEXT: pld 9 +# CHECK-D-NEXT: std 3, 0(9) +# CHECK-D-NEXT: blr +check_LD_STD: + pld 9,useVal_longlong@got@pcrel(0),1 +.Lpcrel15: + .reloc .Lpcrel15-8,R_PPC64_PCREL_OPT,.-(.Lpcrel15-8) + ld 3,0(9) + pld 9,storeVal_longlong@got@pcrel(0),1 +.Lpcrel16: + .reloc .Lpcrel16-8,R_PPC64_PCREL_OPT,.-(.Lpcrel16-8) + std 3,0(9) + blr + +# CHECK-S-LABEL: : +# CHECK-S-NEXT: plxv 34 +# CHECK-S-NEXT: nop +# CHECK-S-NEXT: pstxv 34 +# CHECK-S-NEXT: nop +# CHECK-S-NEXT: blr + +# CHECK-D-LABEL: : +# CHECK-D-NEXT: pld 9 +# CHECK-D-NEXT: lxv 34, 0(9) +# CHECK-D-NEXT: pld 9 +# CHECK-D-NEXT: stxv 34, 0(9) +# CHECK-D-NEXT: blr +check_LXV_STXV: + pld 9,useVal_vector@got@pcrel(0),1 +.Lpcrel17: + .reloc .Lpcrel17-8,R_PPC64_PCREL_OPT,.-(.Lpcrel17-8) + lxv 34,0(9) + pld 9,storeVal_vector@got@pcrel(0),1 +.Lpcrel18: + .reloc .Lpcrel18-8,R_PPC64_PCREL_OPT,.-(.Lpcrel18-8) + stxv 34,0(9) + blr + +# CHECK-S-LABEL: : +# CHECK-S-NEXT: plxssp 1 +# CHECK-S-NEXT: nop +# CHECK-S-NEXT: pstxssp 1 +# CHECK-S-NEXT: nop +# CHECK-S-NEXT: blr + +# CHECK-D-LABEL: : +# CHECK-D-NEXT: pld 9 +# CHECK-D-NEXT: lxssp 1, 0(9) +# CHECK-D-NEXT: pld 9 +# CHECK-D-NEXT: stxssp 1, 0(9) +# CHECK-D-NEXT: blr +check_LXSSP_STXSSP: + pld 9,useVal_float@got@pcrel(0),1 +.Lpcrel19: + .reloc .Lpcrel19-8,R_PPC64_PCREL_OPT,.-(.Lpcrel19-8) + lxssp 1,0(9) + pld 9,storeVal_float@got@pcrel(0),1 +.Lpcrel20: + .reloc .Lpcrel20-8,R_PPC64_PCREL_OPT,.-(.Lpcrel20-8) + stxssp 1,0(9) + blr + +# CHECK-S-LABEL: : +# CHECK-S-NEXT: plxsd 1, [[#ADDR1:]] +# CHECK-S-NEXT: nop +# CHECK-S-NEXT: pstxsd 1, [[#ADDR2:]] +# CHECK-S-NEXT: nop +# CHECK-S-NEXT: blr + +# CHECK-D-LABEL: : +# CHECK-D-NEXT: pld 9 +# CHECK-D-NEXT: lxsd 1, 0(9) +# CHECK-D-NEXT: pld 9 +# CHECK-D-NEXT: stxsd 1, 0(9) +# CHECK-D-NEXT: blr +check_LXSD_STXSD: + pld 9,useVal_double@got@pcrel(0),1 +.Lpcrel21: + .reloc .Lpcrel21-8,R_PPC64_PCREL_OPT,.-(.Lpcrel21-8) + lxsd 1,0(9) + pld 9,storeVal_double@got@pcrel(0),1 +.Lpcrel22: + .reloc .Lpcrel22-8,R_PPC64_PCREL_OPT,.-(.Lpcrel22-8) + stxsd 1,0(9) + blr + +# The respective displacements are computed relative to the PC which advanced +# by 28 bytes in this function. Since the displacements in the two access +# instructions are 8 and 32 so the displacements are those computed above minus +# 20 and plus 4 (+8 - 28 and +32 - 28) respectively. +# CHECK-S-LABEL: : +# CHECK-S-NEXT: plxsd 1, [[#ADDR1-20]] +# CHECK-S-NEXT: nop +# CHECK-S-NEXT: pstxsd 1, [[#ADDR2+4]] +# CHECK-S-NEXT: nop +# CHECK-S-NEXT: blr + +# CHECK-D-LABEL: : +# CHECK-D-NEXT: pld 9 +# CHECK-D-NEXT: lxsd 1, 8(9) +# CHECK-D-NEXT: pld 9 +# CHECK-D-NEXT: stxsd 1, 32(9) +# CHECK-D-NEXT: blr +check_LXSD_STXSD_aggr: + pld 9,useVal_double@got@pcrel(0),1 +.Lpcrel23: + .reloc .Lpcrel23-8,R_PPC64_PCREL_OPT,.-(.Lpcrel23-8) + lxsd 1,8(9) + pld 9,storeVal_double@got@pcrel(0),1 +.Lpcrel24: + .reloc .Lpcrel24-8,R_PPC64_PCREL_OPT,.-(.Lpcrel24-8) + stxsd 1,32(9) + blr + +# This includes a nop but that is not emitted by the linker. +# It is an alignment nop to prevent the prefixed instruction from +# crossing a 64-byte boundary. +# CHECK-S-LABEL: : +# CHECK-S-NEXT: paddi 9 +# CHECK-S-NEXT: ld 3, 0(9) +# CHECK-S-NEXT: nop +# CHECK-S-NEXT: paddi 9 +# CHECK-S-NEXT: std 3, 0(9) +# CHECK-S-NEXT: blr + +# CHECK-D-LABEL: : +# CHECK-D-NEXT: paddi 9 +# CHECK-D-NEXT: ld 3, 0(9) +# CHECK-D-NEXT: nop +# CHECK-D-NEXT: paddi 9 +# CHECK-D-NEXT: std 3, 0(9) +# CHECK-D-NEXT: blr +check_LD_STD_W_PADDI: + paddi 9,0,useVal_longlong@got@pcrel,1 +.Lpcrel25: + .reloc .Lpcrel25-8,R_PPC64_PCREL_OPT,.-(.Lpcrel25-8) + ld 3,0(9) + paddi 9,0,storeVal_longlong@got@pcrel,1 +.Lpcrel26: + .reloc .Lpcrel26-8,R_PPC64_PCREL_OPT,.-(.Lpcrel26-8) + std 3,0(9) + blr +# CHECK-S-LABEL: : +# CHECK-S-NEXT: paddi 3, 0, -12, 1 +# CHECK-S-NEXT: lwz 4, 8(3) +# CHECK-S-NEXT: paddi 3, 0, -24, 1 +# CHECK-S-NEXT: stw 4, 32(3) +# CHECK-S-NEXT: blr + +# CHECK-D-LABEL: : +# CHECK-D-NEXT: paddi 3, 0, -12, 1 +# CHECK-D-NEXT: lwz 4, 8(3) +# CHECK-D-NEXT: paddi 3, 0, -24, 1 +# CHECK-D-NEXT: stw 4, 32(3) +# CHECK-D-NEXT: blr +.type Arr,@object # @Arr +.globl Arr +.p2align 2 +Arr: +.long 11 # 0xb +.long 22 # 0x16 +.long 33 # 0x21 +check_LXSD_STXSD_aggr_notoc: + paddi 3, 0, Arr@PCREL, 1 +.Lpcrel27: + .reloc .Lpcrel27-8,R_PPC64_PCREL_OPT,.-(.Lpcrel27-8) + lwz 4,8(3) + paddi 3, 0, Arr@PCREL, 1 +.Lpcrel28: + .reloc .Lpcrel28-8,R_PPC64_PCREL_OPT,.-(.Lpcrel28-8) + stw 4,32(3) + blr + -- 2.7.4