From 00786d3a5f229b11a41dcbc4c6081edeaa7ee5b7 Mon Sep 17 00:00:00 2001 From: WANG Xuerui Date: Wed, 21 Jun 2023 16:04:57 +0800 Subject: [PATCH] [LoongArch] Support CodeModel::Large codegen This is intended to behave like GCC's `-mcmodel=extreme`. Technically the true GCC equivalent would be `-mcmodel=large` which is not yet implemented there, and we probably do not want to take the "Large" name until things settle in GCC side, but: * LLVM does not have a `CodeModel::Extreme`, and it seems too early to have such a variant added just for enabling LoongArch; and * `CodeModel::Small` is already being used for GCC `-mcmodel=normal` which is already a case of divergent naming. Regarding the codegen, loads/stores immediately after a PC-relative large address load (that ends with something like `add.d $addr, $addr, $tmp`) should get merged with the addition into corresponding `ldx/stx` ops, but is currently not done. This is because pseudo-instructions are expanded after instruction selection, and is best fixed with a separate change. Reviewed By: SixWeining Differential Revision: https://reviews.llvm.org/D150522 --- .../LoongArch/LoongArchExpandPseudoInsts.cpp | 230 +++++++++++++++++++-- .../lib/Target/LoongArch/LoongArchISelLowering.cpp | 89 ++++++-- llvm/lib/Target/LoongArch/LoongArchISelLowering.h | 4 +- llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp | 8 + llvm/lib/Target/LoongArch/LoongArchMCInstLower.cpp | 24 +++ .../Target/LoongArch/LoongArchTargetMachine.cpp | 23 ++- .../LoongArch/MCTargetDesc/LoongArchBaseInfo.h | 8 + .../{codemodel-medium.ll => code-models.ll} | 43 ++++ llvm/test/CodeGen/LoongArch/global-address.ll | 34 +++ llvm/test/CodeGen/LoongArch/tls-models.ll | 98 +++++++++ 10 files changed, 528 insertions(+), 33 deletions(-) rename llvm/test/CodeGen/LoongArch/{codemodel-medium.ll => code-models.ll} (61%) diff --git a/llvm/lib/Target/LoongArch/LoongArchExpandPseudoInsts.cpp b/llvm/lib/Target/LoongArch/LoongArchExpandPseudoInsts.cpp index bad39dc..dd0b2cf 100644 --- a/llvm/lib/Target/LoongArch/LoongArchExpandPseudoInsts.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchExpandPseudoInsts.cpp @@ -19,8 +19,11 @@ #include "llvm/CodeGen/LivePhysRegs.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineOperand.h" +#include "llvm/CodeGen/Register.h" #include "llvm/MC/MCContext.h" #include "llvm/Support/CodeGen.h" +#include "llvm/Support/ErrorHandling.h" using namespace llvm; @@ -57,24 +60,39 @@ private: MachineBasicBlock::iterator &NextMBBI, unsigned FlagsHi, unsigned SecondOpcode, unsigned FlagsLo); + bool expandLargeAddressLoad(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NextMBBI, + unsigned LastOpcode, unsigned IdentifyingMO); + bool expandLargeAddressLoad(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NextMBBI, + unsigned LastOpcode, unsigned IdentifyingMO, + const MachineOperand &Symbol, Register DestReg, + bool EraseFromParent); bool expandLoadAddressPcrel(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, - MachineBasicBlock::iterator &NextMBBI); + MachineBasicBlock::iterator &NextMBBI, + bool Large = false); bool expandLoadAddressGot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, - MachineBasicBlock::iterator &NextMBBI); + MachineBasicBlock::iterator &NextMBBI, + bool Large = false); bool expandLoadAddressTLSLE(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, MachineBasicBlock::iterator &NextMBBI); bool expandLoadAddressTLSIE(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, - MachineBasicBlock::iterator &NextMBBI); + MachineBasicBlock::iterator &NextMBBI, + bool Large = false); bool expandLoadAddressTLSLD(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, - MachineBasicBlock::iterator &NextMBBI); + MachineBasicBlock::iterator &NextMBBI, + bool Large = false); bool expandLoadAddressTLSGD(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, - MachineBasicBlock::iterator &NextMBBI); + MachineBasicBlock::iterator &NextMBBI, + bool Large = false); bool expandFunctionCALL(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, MachineBasicBlock::iterator &NextMBBI, @@ -111,16 +129,26 @@ bool LoongArchPreRAExpandPseudo::expandMI( switch (MBBI->getOpcode()) { case LoongArch::PseudoLA_PCREL: return expandLoadAddressPcrel(MBB, MBBI, NextMBBI); + case LoongArch::PseudoLA_PCREL_LARGE: + return expandLoadAddressPcrel(MBB, MBBI, NextMBBI, /*Large=*/true); case LoongArch::PseudoLA_GOT: return expandLoadAddressGot(MBB, MBBI, NextMBBI); + case LoongArch::PseudoLA_GOT_LARGE: + return expandLoadAddressGot(MBB, MBBI, NextMBBI, /*Large=*/true); case LoongArch::PseudoLA_TLS_LE: return expandLoadAddressTLSLE(MBB, MBBI, NextMBBI); case LoongArch::PseudoLA_TLS_IE: return expandLoadAddressTLSIE(MBB, MBBI, NextMBBI); + case LoongArch::PseudoLA_TLS_IE_LARGE: + return expandLoadAddressTLSIE(MBB, MBBI, NextMBBI, /*Large=*/true); case LoongArch::PseudoLA_TLS_LD: return expandLoadAddressTLSLD(MBB, MBBI, NextMBBI); + case LoongArch::PseudoLA_TLS_LD_LARGE: + return expandLoadAddressTLSLD(MBB, MBBI, NextMBBI, /*Large=*/true); case LoongArch::PseudoLA_TLS_GD: return expandLoadAddressTLSGD(MBB, MBBI, NextMBBI); + case LoongArch::PseudoLA_TLS_GD_LARGE: + return expandLoadAddressTLSGD(MBB, MBBI, NextMBBI, /*Large=*/true); case LoongArch::PseudoCALL: return expandFunctionCALL(MBB, MBBI, NextMBBI, /*IsTailCall=*/false); case LoongArch::PseudoTAIL: @@ -157,9 +185,118 @@ bool LoongArchPreRAExpandPseudo::expandPcalau12iInstPair( return true; } +bool LoongArchPreRAExpandPseudo::expandLargeAddressLoad( + MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NextMBBI, unsigned LastOpcode, + unsigned IdentifyingMO) { + MachineInstr &MI = *MBBI; + return expandLargeAddressLoad(MBB, MBBI, NextMBBI, LastOpcode, IdentifyingMO, + MI.getOperand(2), MI.getOperand(0).getReg(), + true); +} + +bool LoongArchPreRAExpandPseudo::expandLargeAddressLoad( + MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NextMBBI, unsigned LastOpcode, + unsigned IdentifyingMO, const MachineOperand &Symbol, Register DestReg, + bool EraseFromParent) { + // Code Sequence: + // + // Part1: pcalau12i $scratch, %MO1(sym) + // Part0: addi.d $dest, $zero, %MO0(sym) + // Part2: lu32i.d $dest, %MO2(sym) + // Part3: lu52i.d $dest, $dest, %MO3(sym) + // Fin: LastOpcode $dest, $dest, $scratch + + unsigned MO0, MO1, MO2, MO3; + switch (IdentifyingMO) { + default: + llvm_unreachable("unsupported identifying MO"); + case LoongArchII::MO_PCREL_LO: + MO0 = IdentifyingMO; + MO1 = LoongArchII::MO_PCREL_HI; + MO2 = LoongArchII::MO_PCREL64_LO; + MO3 = LoongArchII::MO_PCREL64_HI; + break; + case LoongArchII::MO_GOT_PC_HI: + case LoongArchII::MO_LD_PC_HI: + case LoongArchII::MO_GD_PC_HI: + // These cases relocate just like the GOT case, except for Part1. + MO0 = LoongArchII::MO_GOT_PC_LO; + MO1 = IdentifyingMO; + MO2 = LoongArchII::MO_GOT_PC64_LO; + MO3 = LoongArchII::MO_GOT_PC64_HI; + break; + case LoongArchII::MO_IE_PC_LO: + MO0 = IdentifyingMO; + MO1 = LoongArchII::MO_IE_PC_HI; + MO2 = LoongArchII::MO_IE_PC64_LO; + MO3 = LoongArchII::MO_IE_PC64_HI; + break; + } + + MachineFunction *MF = MBB.getParent(); + MachineInstr &MI = *MBBI; + DebugLoc DL = MI.getDebugLoc(); + + assert(MF->getSubtarget().is64Bit() && + "Large code model requires LA64"); + + Register TmpPart1 = + MF->getRegInfo().createVirtualRegister(&LoongArch::GPRRegClass); + Register TmpPart0 = + DestReg.isVirtual() + ? MF->getRegInfo().createVirtualRegister(&LoongArch::GPRRegClass) + : DestReg; + Register TmpParts02 = + DestReg.isVirtual() + ? MF->getRegInfo().createVirtualRegister(&LoongArch::GPRRegClass) + : DestReg; + Register TmpParts023 = + DestReg.isVirtual() + ? MF->getRegInfo().createVirtualRegister(&LoongArch::GPRRegClass) + : DestReg; + + auto Part1 = BuildMI(MBB, MBBI, DL, TII->get(LoongArch::PCALAU12I), TmpPart1); + auto Part0 = BuildMI(MBB, MBBI, DL, TII->get(LoongArch::ADDI_D), TmpPart0) + .addReg(LoongArch::R0); + auto Part2 = BuildMI(MBB, MBBI, DL, TII->get(LoongArch::LU32I_D), TmpParts02) + // "rj" is needed due to InstrInfo pattern requirement. + .addReg(TmpPart0, RegState::Kill); + auto Part3 = BuildMI(MBB, MBBI, DL, TII->get(LoongArch::LU52I_D), TmpParts023) + .addReg(TmpParts02, RegState::Kill); + BuildMI(MBB, MBBI, DL, TII->get(LastOpcode), DestReg) + .addReg(TmpParts023) + .addReg(TmpPart1, RegState::Kill); + + if (Symbol.getType() == MachineOperand::MO_ExternalSymbol) { + const char *SymName = Symbol.getSymbolName(); + Part0.addExternalSymbol(SymName, MO0); + Part1.addExternalSymbol(SymName, MO1); + Part2.addExternalSymbol(SymName, MO2); + Part3.addExternalSymbol(SymName, MO3); + } else { + Part0.addDisp(Symbol, 0, MO0); + Part1.addDisp(Symbol, 0, MO1); + Part2.addDisp(Symbol, 0, MO2); + Part3.addDisp(Symbol, 0, MO3); + } + + if (EraseFromParent) + MI.eraseFromParent(); + + return true; +} + bool LoongArchPreRAExpandPseudo::expandLoadAddressPcrel( MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, - MachineBasicBlock::iterator &NextMBBI) { + MachineBasicBlock::iterator &NextMBBI, bool Large) { + if (Large) + // Emit the 5-insn large address load sequence with the `%pc` family of + // relocs. + return expandLargeAddressLoad(MBB, MBBI, NextMBBI, LoongArch::ADD_D, + LoongArchII::MO_PCREL_LO); + // Code Sequence: // pcalau12i $rd, %pc_hi20(sym) // addi.w/d $rd, $rd, %pc_lo12(sym) @@ -172,7 +309,13 @@ bool LoongArchPreRAExpandPseudo::expandLoadAddressPcrel( bool LoongArchPreRAExpandPseudo::expandLoadAddressGot( MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, - MachineBasicBlock::iterator &NextMBBI) { + MachineBasicBlock::iterator &NextMBBI, bool Large) { + if (Large) + // Emit the 5-insn large address load sequence with the `%got_pc` family + // of relocs, loading the result from GOT with `ldx.d` in the end. + return expandLargeAddressLoad(MBB, MBBI, NextMBBI, LoongArch::LDX_D, + LoongArchII::MO_GOT_PC_HI); + // Code Sequence: // pcalau12i $rd, %got_pc_hi20(sym) // ld.w/d $rd, $rd, %got_pc_lo12(sym) @@ -189,29 +332,57 @@ bool LoongArchPreRAExpandPseudo::expandLoadAddressTLSLE( // Code Sequence: // lu12i.w $rd, %le_hi20(sym) // ori $rd, $rd, %le_lo12(sym) + // + // And additionally if generating code using the large code model: + // + // lu32i.d $rd, %le64_lo20(sym) + // lu52i.d $rd, $rd, %le64_hi12(sym) MachineFunction *MF = MBB.getParent(); MachineInstr &MI = *MBBI; DebugLoc DL = MI.getDebugLoc(); + bool Large = MF->getTarget().getCodeModel() == CodeModel::Large; Register DestReg = MI.getOperand(0).getReg(); - Register ScratchReg = + Register Parts01 = + Large ? MF->getRegInfo().createVirtualRegister(&LoongArch::GPRRegClass) + : DestReg; + Register Part1 = MF->getRegInfo().createVirtualRegister(&LoongArch::GPRRegClass); MachineOperand &Symbol = MI.getOperand(1); - BuildMI(MBB, MBBI, DL, TII->get(LoongArch::LU12I_W), ScratchReg) + BuildMI(MBB, MBBI, DL, TII->get(LoongArch::LU12I_W), Part1) .addDisp(Symbol, 0, LoongArchII::MO_LE_HI); - BuildMI(MBB, MBBI, DL, TII->get(LoongArch::ORI), DestReg) - .addReg(ScratchReg) + BuildMI(MBB, MBBI, DL, TII->get(LoongArch::ORI), Parts01) + .addReg(Part1, RegState::Kill) .addDisp(Symbol, 0, LoongArchII::MO_LE_LO); + if (Large) { + Register Parts012 = + MF->getRegInfo().createVirtualRegister(&LoongArch::GPRRegClass); + + BuildMI(MBB, MBBI, DL, TII->get(LoongArch::LU32I_D), Parts012) + // "rj" is needed due to InstrInfo pattern requirement. + .addReg(Parts01, RegState::Kill) + .addDisp(Symbol, 0, LoongArchII::MO_LE64_LO); + BuildMI(MBB, MBBI, DL, TII->get(LoongArch::LU52I_D), DestReg) + .addReg(Parts012, RegState::Kill) + .addDisp(Symbol, 0, LoongArchII::MO_LE64_HI); + } + MI.eraseFromParent(); return true; } bool LoongArchPreRAExpandPseudo::expandLoadAddressTLSIE( MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, - MachineBasicBlock::iterator &NextMBBI) { + MachineBasicBlock::iterator &NextMBBI, bool Large) { + if (Large) + // Emit the 5-insn large address load sequence with the `%ie_pc` family + // of relocs, loading the result with `ldx.d` in the end. + return expandLargeAddressLoad(MBB, MBBI, NextMBBI, LoongArch::LDX_D, + LoongArchII::MO_IE_PC_LO); + // Code Sequence: // pcalau12i $rd, %ie_pc_hi20(sym) // ld.w/d $rd, $rd, %ie_pc_lo12(sym) @@ -224,7 +395,13 @@ bool LoongArchPreRAExpandPseudo::expandLoadAddressTLSIE( bool LoongArchPreRAExpandPseudo::expandLoadAddressTLSLD( MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, - MachineBasicBlock::iterator &NextMBBI) { + MachineBasicBlock::iterator &NextMBBI, bool Large) { + if (Large) + // Emit the 5-insn large address load sequence with the `%got_pc` family + // of relocs, with the `pcalau12i` insn relocated with `%ld_pc_hi20`. + return expandLargeAddressLoad(MBB, MBBI, NextMBBI, LoongArch::ADD_D, + LoongArchII::MO_LD_PC_HI); + // Code Sequence: // pcalau12i $rd, %ld_pc_hi20(sym) // addi.w/d $rd, $rd, %got_pc_lo12(sym) @@ -237,7 +414,13 @@ bool LoongArchPreRAExpandPseudo::expandLoadAddressTLSLD( bool LoongArchPreRAExpandPseudo::expandLoadAddressTLSGD( MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, - MachineBasicBlock::iterator &NextMBBI) { + MachineBasicBlock::iterator &NextMBBI, bool Large) { + if (Large) + // Emit the 5-insn large address load sequence with the `%got_pc` family + // of relocs, with the `pcalau12i` insn relocated with `%gd_pc_hi20`. + return expandLargeAddressLoad(MBB, MBBI, NextMBBI, LoongArch::ADD_D, + LoongArchII::MO_GD_PC_HI); + // Code Sequence: // pcalau12i $rd, %gd_pc_hi20(sym) // addi.w/d $rd, $rd, %got_pc_lo12(sym) @@ -299,6 +482,25 @@ bool LoongArchPreRAExpandPseudo::expandFunctionCALL( CALL.addGlobalAddress(GV, 0, LoongArchII::MO_PCREL_LO); break; } + case CodeModel::Large: { + // Emit the 5-insn large address load sequence, either directly or + // indirectly in case of going through the GOT, then JIRL_TAIL or + // JIRL_CALL to $addr. + Opcode = + IsTailCall ? LoongArch::PseudoJIRL_TAIL : LoongArch::PseudoJIRL_CALL; + Register AddrReg = + IsTailCall + ? MF->getRegInfo().createVirtualRegister(&LoongArch::GPRRegClass) + : LoongArch::R1; + + bool UseGOT = Func.isGlobal() && !Func.getGlobal()->isDSOLocal(); + unsigned MO = UseGOT ? LoongArchII::MO_GOT_PC_HI : LoongArchII::MO_PCREL_LO; + unsigned LAOpcode = UseGOT ? LoongArch::LDX_D : LoongArch::ADD_D; + expandLargeAddressLoad(MBB, MBBI, NextMBBI, LAOpcode, MO, Func, AddrReg, + false); + CALL = BuildMI(MBB, MBBI, DL, TII->get(Opcode)).addReg(AddrReg).addImm(0); + break; + } } // Transfer implicit operands. diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp index 74a16bf..57d870e 100644 --- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp @@ -22,9 +22,12 @@ #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/ISDOpcodes.h" #include "llvm/CodeGen/RuntimeLibcalls.h" +#include "llvm/CodeGen/SelectionDAGNodes.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/IntrinsicsLoongArch.h" +#include "llvm/Support/CodeGen.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/KnownBits.h" #include "llvm/Support/MathExtras.h" @@ -467,16 +470,44 @@ SDValue LoongArchTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG, SDLoc DL(N); EVT Ty = getPointerTy(DAG.getDataLayout()); SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0); - // TODO: Check CodeModel. - if (IsLocal) - // This generates the pattern (PseudoLA_PCREL sym), which expands to - // (addi.w/d (pcalau12i %pc_hi20(sym)) %pc_lo12(sym)). - return SDValue(DAG.getMachineNode(LoongArch::PseudoLA_PCREL, DL, Ty, Addr), - 0); - // This generates the pattern (PseudoLA_GOT sym), which expands to (ld.w/d - // (pcalau12i %got_pc_hi20(sym)) %got_pc_lo12(sym)). - return SDValue(DAG.getMachineNode(LoongArch::PseudoLA_GOT, DL, Ty, Addr), 0); + switch (DAG.getTarget().getCodeModel()) { + default: + report_fatal_error("Unsupported code model"); + + case CodeModel::Large: { + assert(Subtarget.is64Bit() && "Large code model requires LA64"); + + // This is not actually used, but is necessary for successfully matching + // the PseudoLA_*_LARGE nodes. + SDValue Tmp = DAG.getConstant(0, DL, Ty); + if (IsLocal) + // This generates the pattern (PseudoLA_PCREL_LARGE tmp sym), that + // eventually becomes the desired 5-insn code sequence. + return SDValue(DAG.getMachineNode(LoongArch::PseudoLA_PCREL_LARGE, DL, Ty, + Tmp, Addr), + 0); + + // This generates the pattern (PseudoLA_GOT_LARGE tmp sym), that eventually + // becomes the desired 5-insn code sequence. + return SDValue( + DAG.getMachineNode(LoongArch::PseudoLA_GOT_LARGE, DL, Ty, Tmp, Addr), + 0); + } + + case CodeModel::Small: + case CodeModel::Medium: + if (IsLocal) + // This generates the pattern (PseudoLA_PCREL sym), which expands to + // (addi.w/d (pcalau12i %pc_hi20(sym)) %pc_lo12(sym)). + return SDValue( + DAG.getMachineNode(LoongArch::PseudoLA_PCREL, DL, Ty, Addr), 0); + + // This generates the pattern (PseudoLA_GOT sym), which expands to (ld.w/d + // (pcalau12i %got_pc_hi20(sym)) %got_pc_lo12(sym)). + return SDValue(DAG.getMachineNode(LoongArch::PseudoLA_GOT, DL, Ty, Addr), + 0); + } } SDValue LoongArchTargetLowering::lowerBlockAddress(SDValue Op, @@ -503,13 +534,19 @@ SDValue LoongArchTargetLowering::lowerGlobalAddress(SDValue Op, SDValue LoongArchTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N, SelectionDAG &DAG, - unsigned Opc) const { + unsigned Opc, + bool Large) const { SDLoc DL(N); EVT Ty = getPointerTy(DAG.getDataLayout()); MVT GRLenVT = Subtarget.getGRLenVT(); + // This is not actually used, but is necessary for successfully matching the + // PseudoLA_*_LARGE nodes. + SDValue Tmp = DAG.getConstant(0, DL, Ty); SDValue Addr = DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, 0); - SDValue Offset = SDValue(DAG.getMachineNode(Opc, DL, Ty, Addr), 0); + SDValue Offset = Large + ? SDValue(DAG.getMachineNode(Opc, DL, Ty, Tmp, Addr), 0) + : SDValue(DAG.getMachineNode(Opc, DL, Ty, Addr), 0); // Add the thread pointer. return DAG.getNode(ISD::ADD, DL, Ty, Offset, @@ -518,14 +555,20 @@ SDValue LoongArchTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N, SDValue LoongArchTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N, SelectionDAG &DAG, - unsigned Opc) const { + unsigned Opc, + bool Large) const { SDLoc DL(N); EVT Ty = getPointerTy(DAG.getDataLayout()); IntegerType *CallTy = Type::getIntNTy(*DAG.getContext(), Ty.getSizeInBits()); + // This is not actually used, but is necessary for successfully matching the + // PseudoLA_*_LARGE nodes. + SDValue Tmp = DAG.getConstant(0, DL, Ty); + // Use a PC-relative addressing mode to access the dynamic GOT address. SDValue Addr = DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, 0); - SDValue Load = SDValue(DAG.getMachineNode(Opc, DL, Ty, Addr), 0); + SDValue Load = Large ? SDValue(DAG.getMachineNode(Opc, DL, Ty, Tmp, Addr), 0) + : SDValue(DAG.getMachineNode(Opc, DL, Ty, Addr), 0); // Prepare argument list to generate call. ArgListTy Args; @@ -552,6 +595,9 @@ LoongArchTargetLowering::lowerGlobalTLSAddress(SDValue Op, CallingConv::GHC) report_fatal_error("In GHC calling convention TLS is not supported"); + bool Large = DAG.getTarget().getCodeModel() == CodeModel::Large; + assert((!Large || Subtarget.is64Bit()) && "Large code model requires LA64"); + GlobalAddressSDNode *N = cast(Op); assert(N->getOffset() == 0 && "unexpected offset in global node"); @@ -561,20 +607,31 @@ LoongArchTargetLowering::lowerGlobalTLSAddress(SDValue Op, // In this model, application code calls the dynamic linker function // __tls_get_addr to locate TLS offsets into the dynamic thread vector at // runtime. - Addr = getDynamicTLSAddr(N, DAG, LoongArch::PseudoLA_TLS_GD); + Addr = getDynamicTLSAddr(N, DAG, + Large ? LoongArch::PseudoLA_TLS_GD_LARGE + : LoongArch::PseudoLA_TLS_GD, + Large); break; case TLSModel::LocalDynamic: // Same as GeneralDynamic, except for assembly modifiers and relocation // records. - Addr = getDynamicTLSAddr(N, DAG, LoongArch::PseudoLA_TLS_LD); + Addr = getDynamicTLSAddr(N, DAG, + Large ? LoongArch::PseudoLA_TLS_LD_LARGE + : LoongArch::PseudoLA_TLS_LD, + Large); break; case TLSModel::InitialExec: // This model uses the GOT to resolve TLS offsets. - Addr = getStaticTLSAddr(N, DAG, LoongArch::PseudoLA_TLS_IE); + Addr = getStaticTLSAddr(N, DAG, + Large ? LoongArch::PseudoLA_TLS_IE_LARGE + : LoongArch::PseudoLA_TLS_IE, + Large); break; case TLSModel::LocalExec: // This model is used when static linking as the TLS offsets are resolved // during program linking. + // + // This node doesn't need an extra argument for the large code model. Addr = getStaticTLSAddr(N, DAG, LoongArch::PseudoLA_TLS_LE); break; } diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h index dab8944..3fae3d5 100644 --- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h +++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h @@ -230,9 +230,9 @@ private: template SDValue getAddr(NodeTy *N, SelectionDAG &DAG, bool IsLocal = true) const; SDValue getStaticTLSAddr(GlobalAddressSDNode *N, SelectionDAG &DAG, - unsigned Opc) const; + unsigned Opc, bool Large = false) const; SDValue getDynamicTLSAddr(GlobalAddressSDNode *N, SelectionDAG &DAG, - unsigned Opc) const; + unsigned Opc, bool Large = false) const; SDValue lowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const; SDValue lowerBlockAddress(SDValue Op, SelectionDAG &DAG) const; SDValue lowerJumpTable(SDValue Op, SelectionDAG &DAG) const; diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp index a7c5e4d..f5e32c4 100644 --- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp @@ -478,12 +478,20 @@ LoongArchInstrInfo::getSerializableDirectMachineOperandTargetFlags() const { {MO_CALL_PLT, "loongarch-call-plt"}, {MO_PCREL_HI, "loongarch-pcrel-hi"}, {MO_PCREL_LO, "loongarch-pcrel-lo"}, + {MO_PCREL64_LO, "loongarch-pcrel64-lo"}, + {MO_PCREL64_HI, "loongarch-pcrel64-hi"}, {MO_GOT_PC_HI, "loongarch-got-pc-hi"}, {MO_GOT_PC_LO, "loongarch-got-pc-lo"}, + {MO_GOT_PC64_LO, "loongarch-got-pc64-lo"}, + {MO_GOT_PC64_HI, "loongarch-got-pc64-hi"}, {MO_LE_HI, "loongarch-le-hi"}, {MO_LE_LO, "loongarch-le-lo"}, + {MO_LE64_LO, "loongarch-le64-lo"}, + {MO_LE64_HI, "loongarch-le64-hi"}, {MO_IE_PC_HI, "loongarch-ie-pc-hi"}, {MO_IE_PC_LO, "loongarch-ie-pc-lo"}, + {MO_IE_PC64_LO, "loongarch-ie-pc64-lo"}, + {MO_IE_PC64_HI, "loongarch-ie-pc64-hi"}, {MO_LD_PC_HI, "loongarch-ld-pc-hi"}, {MO_GD_PC_HI, "loongarch-gd-pc-hi"}}; return ArrayRef(TargetFlags); diff --git a/llvm/lib/Target/LoongArch/LoongArchMCInstLower.cpp b/llvm/lib/Target/LoongArch/LoongArchMCInstLower.cpp index 64f08e2..5daa948 100644 --- a/llvm/lib/Target/LoongArch/LoongArchMCInstLower.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchMCInstLower.cpp @@ -47,24 +47,48 @@ static MCOperand lowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym, case LoongArchII::MO_PCREL_LO: Kind = LoongArchMCExpr::VK_LoongArch_PCALA_LO12; break; + case LoongArchII::MO_PCREL64_LO: + Kind = LoongArchMCExpr::VK_LoongArch_PCALA64_LO20; + break; + case LoongArchII::MO_PCREL64_HI: + Kind = LoongArchMCExpr::VK_LoongArch_PCALA64_HI12; + break; case LoongArchII::MO_GOT_PC_HI: Kind = LoongArchMCExpr::VK_LoongArch_GOT_PC_HI20; break; case LoongArchII::MO_GOT_PC_LO: Kind = LoongArchMCExpr::VK_LoongArch_GOT_PC_LO12; break; + case LoongArchII::MO_GOT_PC64_LO: + Kind = LoongArchMCExpr::VK_LoongArch_GOT64_PC_LO20; + break; + case LoongArchII::MO_GOT_PC64_HI: + Kind = LoongArchMCExpr::VK_LoongArch_GOT64_PC_HI12; + break; case LoongArchII::MO_LE_HI: Kind = LoongArchMCExpr::VK_LoongArch_TLS_LE_HI20; break; case LoongArchII::MO_LE_LO: Kind = LoongArchMCExpr::VK_LoongArch_TLS_LE_LO12; break; + case LoongArchII::MO_LE64_LO: + Kind = LoongArchMCExpr::VK_LoongArch_TLS_LE64_LO20; + break; + case LoongArchII::MO_LE64_HI: + Kind = LoongArchMCExpr::VK_LoongArch_TLS_LE64_HI12; + break; case LoongArchII::MO_IE_PC_HI: Kind = LoongArchMCExpr::VK_LoongArch_TLS_IE_PC_HI20; break; case LoongArchII::MO_IE_PC_LO: Kind = LoongArchMCExpr::VK_LoongArch_TLS_IE_PC_LO12; break; + case LoongArchII::MO_IE_PC64_LO: + Kind = LoongArchMCExpr::VK_LoongArch_TLS_IE64_PC_LO20; + break; + case LoongArchII::MO_IE_PC64_HI: + Kind = LoongArchMCExpr::VK_LoongArch_TLS_IE64_PC_HI12; + break; case LoongArchII::MO_LD_PC_HI: Kind = LoongArchMCExpr::VK_LoongArch_TLS_LD_PC_HI20; break; diff --git a/llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp b/llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp index da294fc..46e4a06 100644 --- a/llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp @@ -21,6 +21,7 @@ #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" #include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/MC/TargetRegistry.h" +#include "llvm/Support/CodeGen.h" #include "llvm/Transforms/Scalar.h" #include @@ -54,13 +55,33 @@ static Reloc::Model getEffectiveRelocModel(const Triple &TT, return RM.value_or(Reloc::Static); } +static CodeModel::Model +getEffectiveLoongArchCodeModel(const Triple &TT, + std::optional CM) { + if (!CM) + return CodeModel::Small; + + switch (*CM) { + case CodeModel::Small: + case CodeModel::Medium: + return *CM; + case CodeModel::Large: + if (!TT.isArch64Bit()) + report_fatal_error("Large code model requires LA64"); + return *CM; + default: + report_fatal_error( + "Only small, medium and large code models are allowed on LoongArch"); + } +} + LoongArchTargetMachine::LoongArchTargetMachine( const Target &T, const Triple &TT, StringRef CPU, StringRef FS, const TargetOptions &Options, std::optional RM, std::optional CM, CodeGenOpt::Level OL, bool JIT) : LLVMTargetMachine(T, computeDataLayout(TT), TT, CPU, FS, Options, getEffectiveRelocModel(TT, RM), - getEffectiveCodeModel(CM, CodeModel::Small), OL), + getEffectiveLoongArchCodeModel(TT, CM), OL), TLOF(std::make_unique()) { initAsmInfo(); } diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchBaseInfo.h b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchBaseInfo.h index cdbd1f5..4ba7858 100644 --- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchBaseInfo.h +++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchBaseInfo.h @@ -31,12 +31,20 @@ enum { MO_CALL_PLT, MO_PCREL_HI, MO_PCREL_LO, + MO_PCREL64_LO, + MO_PCREL64_HI, MO_GOT_PC_HI, MO_GOT_PC_LO, + MO_GOT_PC64_LO, + MO_GOT_PC64_HI, MO_LE_HI, MO_LE_LO, + MO_LE64_LO, + MO_LE64_HI, MO_IE_PC_HI, MO_IE_PC_LO, + MO_IE_PC64_LO, + MO_IE_PC64_HI, MO_LD_PC_HI, MO_GD_PC_HI, // TODO: Add more flags. diff --git a/llvm/test/CodeGen/LoongArch/codemodel-medium.ll b/llvm/test/CodeGen/LoongArch/code-models.ll similarity index 61% rename from llvm/test/CodeGen/LoongArch/codemodel-medium.ll rename to llvm/test/CodeGen/LoongArch/code-models.ll index d4d97e7..c610f64 100644 --- a/llvm/test/CodeGen/LoongArch/codemodel-medium.ll +++ b/llvm/test/CodeGen/LoongArch/code-models.ll @@ -3,6 +3,8 @@ ; RUN: FileCheck --check-prefix=SMALL %s ; RUN: llc --mtriple=loongarch64 --code-model=medium < %s | \ ; RUN: FileCheck --check-prefix=MEDIUM %s +; RUN: llc --mtriple=loongarch64 --code-model=large < %s | \ +; RUN: FileCheck --check-prefix=LARGE %s declare void @llvm.memset.p0.i64(ptr, i8, i64, i1) declare i32 @callee(i32) @@ -26,6 +28,20 @@ define i32 @call_globaladdress(i32 %a) nounwind { ; MEDIUM-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload ; MEDIUM-NEXT: addi.d $sp, $sp, 16 ; MEDIUM-NEXT: ret +; +; LARGE-LABEL: call_globaladdress: +; LARGE: # %bb.0: +; LARGE-NEXT: addi.d $sp, $sp, -16 +; LARGE-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LARGE-NEXT: pcalau12i $a1, %got_pc_hi20(callee) +; LARGE-NEXT: addi.d $ra, $zero, %got_pc_lo12(callee) +; LARGE-NEXT: lu32i.d $ra, %got64_pc_lo20(callee) +; LARGE-NEXT: lu52i.d $ra, $ra, %got64_pc_hi12(callee) +; LARGE-NEXT: ldx.d $ra, $ra, $a1 +; LARGE-NEXT: jirl $ra, $ra, 0 +; LARGE-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LARGE-NEXT: addi.d $sp, $sp, 16 +; LARGE-NEXT: ret %1 = call i32 @callee(i32 %a) ret i32 %1 } @@ -57,6 +73,24 @@ define void @call_external_sym(ptr %dst) { ; MEDIUM-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload ; MEDIUM-NEXT: addi.d $sp, $sp, 16 ; MEDIUM-NEXT: ret +; +; LARGE-LABEL: call_external_sym: +; LARGE: # %bb.0: # %entry +; LARGE-NEXT: addi.d $sp, $sp, -16 +; LARGE-NEXT: .cfi_def_cfa_offset 16 +; LARGE-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LARGE-NEXT: .cfi_offset 1, -8 +; LARGE-NEXT: ori $a2, $zero, 1000 +; LARGE-NEXT: move $a1, $zero +; LARGE-NEXT: pcalau12i $a3, %pc_hi20(memset) +; LARGE-NEXT: addi.d $ra, $zero, %pc_lo12(memset) +; LARGE-NEXT: lu32i.d $ra, %pc64_lo20(memset) +; LARGE-NEXT: lu52i.d $ra, $ra, %pc64_hi12(memset) +; LARGE-NEXT: add.d $ra, $ra, $a3 +; LARGE-NEXT: jirl $ra, $ra, 0 +; LARGE-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LARGE-NEXT: addi.d $sp, $sp, 16 +; LARGE-NEXT: ret entry: call void @llvm.memset.p0.i64(ptr %dst, i8 0, i64 1000, i1 false) ret void @@ -73,6 +107,15 @@ define i32 @caller_tail(i32 %i) nounwind { ; MEDIUM: # %bb.0: # %entry ; MEDIUM-NEXT: pcalau12i $a1, %pc_hi20(callee_tail) ; MEDIUM-NEXT: jirl $zero, $a1, %pc_lo12(callee_tail) +; +; LARGE-LABEL: caller_tail: +; LARGE: # %bb.0: # %entry +; LARGE-NEXT: pcalau12i $a1, %got_pc_hi20(callee_tail) +; LARGE-NEXT: addi.d $a2, $zero, %got_pc_lo12(callee_tail) +; LARGE-NEXT: lu32i.d $a2, %got64_pc_lo20(callee_tail) +; LARGE-NEXT: lu52i.d $a2, $a2, %got64_pc_hi12(callee_tail) +; LARGE-NEXT: ldx.d $a1, $a2, $a1 +; LARGE-NEXT: jr $a1 entry: %r = tail call i32 @callee_tail(i32 %i) ret i32 %r diff --git a/llvm/test/CodeGen/LoongArch/global-address.ll b/llvm/test/CodeGen/LoongArch/global-address.ll index 258c4e8..a8f0ef6 100644 --- a/llvm/test/CodeGen/LoongArch/global-address.ll +++ b/llvm/test/CodeGen/LoongArch/global-address.ll @@ -3,6 +3,8 @@ ; RUN: llc --mtriple=loongarch32 --relocation-model=pic < %s | FileCheck %s --check-prefix=LA32PIC ; RUN: llc --mtriple=loongarch64 --relocation-model=static < %s | FileCheck %s --check-prefix=LA64NOPIC ; RUN: llc --mtriple=loongarch64 --relocation-model=pic < %s | FileCheck %s --check-prefix=LA64PIC +; RUN: llc --mtriple=loongarch64 --code-model=large --relocation-model=static < %s | FileCheck %s --check-prefix=LA64LARGENOPIC +; RUN: llc --mtriple=loongarch64 --code-model=large --relocation-model=pic < %s | FileCheck %s --check-prefix=LA64LARGEPIC @g = dso_local global i32 zeroinitializer, align 4 @G = global i32 zeroinitializer, align 4 @@ -47,6 +49,38 @@ define void @foo() nounwind { ; LA64PIC-NEXT: addi.d $a0, $a0, %pc_lo12(.Lg$local) ; LA64PIC-NEXT: ld.w $a0, $a0, 0 ; LA64PIC-NEXT: ret +; +; LA64LARGENOPIC-LABEL: foo: +; LA64LARGENOPIC: # %bb.0: +; LA64LARGENOPIC-NEXT: pcalau12i $a0, %got_pc_hi20(G) +; LA64LARGENOPIC-NEXT: addi.d $a1, $zero, %got_pc_lo12(G) +; LA64LARGENOPIC-NEXT: lu32i.d $a1, %got64_pc_lo20(G) +; LA64LARGENOPIC-NEXT: lu52i.d $a1, $a1, %got64_pc_hi12(G) +; LA64LARGENOPIC-NEXT: ldx.d $a0, $a1, $a0 +; LA64LARGENOPIC-NEXT: ld.w $a0, $a0, 0 +; LA64LARGENOPIC-NEXT: pcalau12i $a0, %pc_hi20(g) +; LA64LARGENOPIC-NEXT: addi.d $a1, $zero, %pc_lo12(g) +; LA64LARGENOPIC-NEXT: lu32i.d $a1, %pc64_lo20(g) +; LA64LARGENOPIC-NEXT: lu52i.d $a1, $a1, %pc64_hi12(g) +; LA64LARGENOPIC-NEXT: add.d $a0, $a1, $a0 +; LA64LARGENOPIC-NEXT: ld.w $a0, $a0, 0 +; LA64LARGENOPIC-NEXT: ret +; +; LA64LARGEPIC-LABEL: foo: +; LA64LARGEPIC: # %bb.0: +; LA64LARGEPIC-NEXT: pcalau12i $a0, %got_pc_hi20(G) +; LA64LARGEPIC-NEXT: addi.d $a1, $zero, %got_pc_lo12(G) +; LA64LARGEPIC-NEXT: lu32i.d $a1, %got64_pc_lo20(G) +; LA64LARGEPIC-NEXT: lu52i.d $a1, $a1, %got64_pc_hi12(G) +; LA64LARGEPIC-NEXT: ldx.d $a0, $a1, $a0 +; LA64LARGEPIC-NEXT: ld.w $a0, $a0, 0 +; LA64LARGEPIC-NEXT: pcalau12i $a0, %pc_hi20(.Lg$local) +; LA64LARGEPIC-NEXT: addi.d $a1, $zero, %pc_lo12(.Lg$local) +; LA64LARGEPIC-NEXT: lu32i.d $a1, %pc64_lo20(.Lg$local) +; LA64LARGEPIC-NEXT: lu52i.d $a1, $a1, %pc64_hi12(.Lg$local) +; LA64LARGEPIC-NEXT: add.d $a0, $a1, $a0 +; LA64LARGEPIC-NEXT: ld.w $a0, $a0, 0 +; LA64LARGEPIC-NEXT: ret %V = load volatile i32, ptr @G %v = load volatile i32, ptr @g ret void diff --git a/llvm/test/CodeGen/LoongArch/tls-models.ll b/llvm/test/CodeGen/LoongArch/tls-models.ll index d973cd4..a2a3792 100644 --- a/llvm/test/CodeGen/LoongArch/tls-models.ll +++ b/llvm/test/CodeGen/LoongArch/tls-models.ll @@ -1,8 +1,10 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc --mtriple=loongarch32 --relocation-model=pic < %s | FileCheck %s --check-prefix=LA32PIC ; RUN: llc --mtriple=loongarch64 --relocation-model=pic < %s | FileCheck %s --check-prefix=LA64PIC +; RUN: llc --mtriple=loongarch64 --code-model=large --relocation-model=pic < %s | FileCheck %s --check-prefix=LA64LARGEPIC ; RUN: llc --mtriple=loongarch32 < %s | FileCheck %s --check-prefix=LA32NOPIC ; RUN: llc --mtriple=loongarch64 < %s | FileCheck %s --check-prefix=LA64NOPIC +; RUN: llc --mtriple=loongarch64 --code-model=large < %s | FileCheck %s --check-prefix=LA64LARGENOPIC ;; Check that TLS symbols are lowered correctly based on the specified ;; model. Make sure they're external to avoid them all being optimised to Local @@ -38,6 +40,25 @@ define ptr @f1() nounwind { ; LA64PIC-NEXT: addi.d $sp, $sp, 16 ; LA64PIC-NEXT: ret ; +; LA64LARGEPIC-LABEL: f1: +; LA64LARGEPIC: # %bb.0: # %entry +; LA64LARGEPIC-NEXT: addi.d $sp, $sp, -16 +; LA64LARGEPIC-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64LARGEPIC-NEXT: pcalau12i $a0, %gd_pc_hi20(unspecified) +; LA64LARGEPIC-NEXT: addi.d $a1, $zero, %got_pc_lo12(unspecified) +; LA64LARGEPIC-NEXT: lu32i.d $a1, %got64_pc_lo20(unspecified) +; LA64LARGEPIC-NEXT: lu52i.d $a1, $a1, %got64_pc_hi12(unspecified) +; LA64LARGEPIC-NEXT: add.d $a0, $a1, $a0 +; LA64LARGEPIC-NEXT: pcalau12i $a1, %pc_hi20(__tls_get_addr) +; LA64LARGEPIC-NEXT: addi.d $ra, $zero, %pc_lo12(__tls_get_addr) +; LA64LARGEPIC-NEXT: lu32i.d $ra, %pc64_lo20(__tls_get_addr) +; LA64LARGEPIC-NEXT: lu52i.d $ra, $ra, %pc64_hi12(__tls_get_addr) +; LA64LARGEPIC-NEXT: add.d $ra, $ra, $a1 +; LA64LARGEPIC-NEXT: jirl $ra, $ra, 0 +; LA64LARGEPIC-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64LARGEPIC-NEXT: addi.d $sp, $sp, 16 +; LA64LARGEPIC-NEXT: ret +; ; LA32NOPIC-LABEL: f1: ; LA32NOPIC: # %bb.0: # %entry ; LA32NOPIC-NEXT: pcalau12i $a0, %ie_pc_hi20(unspecified) @@ -51,6 +72,16 @@ define ptr @f1() nounwind { ; LA64NOPIC-NEXT: ld.d $a0, $a0, %ie_pc_lo12(unspecified) ; LA64NOPIC-NEXT: add.d $a0, $a0, $tp ; LA64NOPIC-NEXT: ret +; +; LA64LARGENOPIC-LABEL: f1: +; LA64LARGENOPIC: # %bb.0: # %entry +; LA64LARGENOPIC-NEXT: pcalau12i $a0, %ie_pc_hi20(unspecified) +; LA64LARGENOPIC-NEXT: addi.d $a1, $zero, %ie_pc_lo12(unspecified) +; LA64LARGENOPIC-NEXT: lu32i.d $a1, %ie64_pc_lo20(unspecified) +; LA64LARGENOPIC-NEXT: lu52i.d $a1, $a1, %ie64_pc_hi12(unspecified) +; LA64LARGENOPIC-NEXT: ldx.d $a0, $a1, $a0 +; LA64LARGENOPIC-NEXT: add.d $a0, $a0, $tp +; LA64LARGENOPIC-NEXT: ret entry: ret ptr @unspecified } @@ -80,6 +111,25 @@ define ptr @f2() nounwind { ; LA64PIC-NEXT: addi.d $sp, $sp, 16 ; LA64PIC-NEXT: ret ; +; LA64LARGEPIC-LABEL: f2: +; LA64LARGEPIC: # %bb.0: # %entry +; LA64LARGEPIC-NEXT: addi.d $sp, $sp, -16 +; LA64LARGEPIC-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64LARGEPIC-NEXT: pcalau12i $a0, %ld_pc_hi20(ld) +; LA64LARGEPIC-NEXT: addi.d $a1, $zero, %got_pc_lo12(ld) +; LA64LARGEPIC-NEXT: lu32i.d $a1, %got64_pc_lo20(ld) +; LA64LARGEPIC-NEXT: lu52i.d $a1, $a1, %got64_pc_hi12(ld) +; LA64LARGEPIC-NEXT: add.d $a0, $a1, $a0 +; LA64LARGEPIC-NEXT: pcalau12i $a1, %pc_hi20(__tls_get_addr) +; LA64LARGEPIC-NEXT: addi.d $ra, $zero, %pc_lo12(__tls_get_addr) +; LA64LARGEPIC-NEXT: lu32i.d $ra, %pc64_lo20(__tls_get_addr) +; LA64LARGEPIC-NEXT: lu52i.d $ra, $ra, %pc64_hi12(__tls_get_addr) +; LA64LARGEPIC-NEXT: add.d $ra, $ra, $a1 +; LA64LARGEPIC-NEXT: jirl $ra, $ra, 0 +; LA64LARGEPIC-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64LARGEPIC-NEXT: addi.d $sp, $sp, 16 +; LA64LARGEPIC-NEXT: ret +; ; LA32NOPIC-LABEL: f2: ; LA32NOPIC: # %bb.0: # %entry ; LA32NOPIC-NEXT: pcalau12i $a0, %ie_pc_hi20(ld) @@ -93,6 +143,16 @@ define ptr @f2() nounwind { ; LA64NOPIC-NEXT: ld.d $a0, $a0, %ie_pc_lo12(ld) ; LA64NOPIC-NEXT: add.d $a0, $a0, $tp ; LA64NOPIC-NEXT: ret +; +; LA64LARGENOPIC-LABEL: f2: +; LA64LARGENOPIC: # %bb.0: # %entry +; LA64LARGENOPIC-NEXT: pcalau12i $a0, %ie_pc_hi20(ld) +; LA64LARGENOPIC-NEXT: addi.d $a1, $zero, %ie_pc_lo12(ld) +; LA64LARGENOPIC-NEXT: lu32i.d $a1, %ie64_pc_lo20(ld) +; LA64LARGENOPIC-NEXT: lu52i.d $a1, $a1, %ie64_pc_hi12(ld) +; LA64LARGENOPIC-NEXT: ldx.d $a0, $a1, $a0 +; LA64LARGENOPIC-NEXT: add.d $a0, $a0, $tp +; LA64LARGENOPIC-NEXT: ret entry: ret ptr @ld } @@ -114,6 +174,16 @@ define ptr @f3() nounwind { ; LA64PIC-NEXT: add.d $a0, $a0, $tp ; LA64PIC-NEXT: ret ; +; LA64LARGEPIC-LABEL: f3: +; LA64LARGEPIC: # %bb.0: # %entry +; LA64LARGEPIC-NEXT: pcalau12i $a0, %ie_pc_hi20(ie) +; LA64LARGEPIC-NEXT: addi.d $a1, $zero, %ie_pc_lo12(ie) +; LA64LARGEPIC-NEXT: lu32i.d $a1, %ie64_pc_lo20(ie) +; LA64LARGEPIC-NEXT: lu52i.d $a1, $a1, %ie64_pc_hi12(ie) +; LA64LARGEPIC-NEXT: ldx.d $a0, $a1, $a0 +; LA64LARGEPIC-NEXT: add.d $a0, $a0, $tp +; LA64LARGEPIC-NEXT: ret +; ; LA32NOPIC-LABEL: f3: ; LA32NOPIC: # %bb.0: # %entry ; LA32NOPIC-NEXT: pcalau12i $a0, %ie_pc_hi20(ie) @@ -127,6 +197,16 @@ define ptr @f3() nounwind { ; LA64NOPIC-NEXT: ld.d $a0, $a0, %ie_pc_lo12(ie) ; LA64NOPIC-NEXT: add.d $a0, $a0, $tp ; LA64NOPIC-NEXT: ret +; +; LA64LARGENOPIC-LABEL: f3: +; LA64LARGENOPIC: # %bb.0: # %entry +; LA64LARGENOPIC-NEXT: pcalau12i $a0, %ie_pc_hi20(ie) +; LA64LARGENOPIC-NEXT: addi.d $a1, $zero, %ie_pc_lo12(ie) +; LA64LARGENOPIC-NEXT: lu32i.d $a1, %ie64_pc_lo20(ie) +; LA64LARGENOPIC-NEXT: lu52i.d $a1, $a1, %ie64_pc_hi12(ie) +; LA64LARGENOPIC-NEXT: ldx.d $a0, $a1, $a0 +; LA64LARGENOPIC-NEXT: add.d $a0, $a0, $tp +; LA64LARGENOPIC-NEXT: ret entry: ret ptr @ie } @@ -148,6 +228,15 @@ define ptr @f4() nounwind { ; LA64PIC-NEXT: add.d $a0, $a0, $tp ; LA64PIC-NEXT: ret ; +; LA64LARGEPIC-LABEL: f4: +; LA64LARGEPIC: # %bb.0: # %entry +; LA64LARGEPIC-NEXT: lu12i.w $a0, %le_hi20(le) +; LA64LARGEPIC-NEXT: ori $a0, $a0, %le_lo12(le) +; LA64LARGEPIC-NEXT: lu32i.d $a0, %le64_lo20(le) +; LA64LARGEPIC-NEXT: lu52i.d $a0, $a0, %le64_hi12(le) +; LA64LARGEPIC-NEXT: add.d $a0, $a0, $tp +; LA64LARGEPIC-NEXT: ret +; ; LA32NOPIC-LABEL: f4: ; LA32NOPIC: # %bb.0: # %entry ; LA32NOPIC-NEXT: lu12i.w $a0, %le_hi20(le) @@ -161,6 +250,15 @@ define ptr @f4() nounwind { ; LA64NOPIC-NEXT: ori $a0, $a0, %le_lo12(le) ; LA64NOPIC-NEXT: add.d $a0, $a0, $tp ; LA64NOPIC-NEXT: ret +; +; LA64LARGENOPIC-LABEL: f4: +; LA64LARGENOPIC: # %bb.0: # %entry +; LA64LARGENOPIC-NEXT: lu12i.w $a0, %le_hi20(le) +; LA64LARGENOPIC-NEXT: ori $a0, $a0, %le_lo12(le) +; LA64LARGENOPIC-NEXT: lu32i.d $a0, %le64_lo20(le) +; LA64LARGENOPIC-NEXT: lu52i.d $a0, $a0, %le64_hi12(le) +; LA64LARGENOPIC-NEXT: add.d $a0, $a0, $tp +; LA64LARGENOPIC-NEXT: ret entry: ret ptr @le } -- 2.7.4