From f5ae07504846ec967ee8ac51198f17e76632b01b Mon Sep 17 00:00:00 2001 From: Amy Kwan Date: Mon, 19 Jun 2023 23:22:28 -0500 Subject: [PATCH] [AIX][TLS] Generate 32-bit local-exec access code sequence This patch adds support for the TLS local-exec access model on AIX to allow for the ability to generate the 32-bit (specifically, non-optimized) code sequence. This work is a follow up of D149722. The particular sequence that is generated for this sequence is as follows: ``` .tc var[TC],var[TL]@le. // variable offset, with the le relocation specifier bla .__get_tpointer() // get the thread pointer, modifies r3 lwz reg1, var[TC](2) // load the variable offset add reg2, r3, reg1 // add the variable offset to the retrieved thread pointer ``` Differential Revision: https://reviews.llvm.org/D152669 --- llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp | 37 +- llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 27 +- llvm/lib/Target/PowerPC/PPCISelLowering.h | 5 + llvm/lib/Target/PowerPC/PPCInstrInfo.td | 19 + llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp | 29 +- .../test/CodeGen/PowerPC/aix-tls-le-ldst-double.ll | 406 ++++++++++++++++++ llvm/test/CodeGen/PowerPC/aix-tls-le-ldst-float.ll | 406 ++++++++++++++++++ llvm/test/CodeGen/PowerPC/aix-tls-le-ldst-int.ll | 414 +++++++++++++++++++ .../CodeGen/PowerPC/aix-tls-le-ldst-longlong.ll | 454 +++++++++++++++++++++ .../PowerPC/aix-tls-le-xcoff-reloc-large32.ll | 272 ++++++++++++ .../CodeGen/PowerPC/aix-tls-le-xcoff-reloc32.ll | 245 +++++++++++ 11 files changed, 2288 insertions(+), 26 deletions(-) create mode 100644 llvm/test/CodeGen/PowerPC/aix-tls-le-xcoff-reloc-large32.ll create mode 100644 llvm/test/CodeGen/PowerPC/aix-tls-le-xcoff-reloc32.ll diff --git a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp index 2a192e6..c3c52a3 100644 --- a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp +++ b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp @@ -196,6 +196,7 @@ public: void LowerSTACKMAP(StackMaps &SM, const MachineInstr &MI); void LowerPATCHPOINT(StackMaps &SM, const MachineInstr &MI); void EmitTlsCall(const MachineInstr *MI, MCSymbolRefExpr::VariantKind VK); + void EmitAIXTlsCallHelper(const MachineInstr *MI); bool runOnMachineFunction(MachineFunction &MF) override { Subtarget = &MF.getSubtarget(); bool Changed = AsmPrinter::runOnMachineFunction(MF); @@ -611,13 +612,26 @@ void PPCAsmPrinter::LowerPATCHPOINT(StackMaps &SM, const MachineInstr &MI) { /// This helper function creates the TlsGetAddr MCSymbol for AIX. We will /// create the csect and use the qual-name symbol instead of creating just the /// external symbol. -static MCSymbol *createMCSymbolForTlsGetAddr(MCContext &Ctx) { +static MCSymbol *createMCSymbolForTlsGetAddr(MCContext &Ctx, unsigned MIOpc) { + StringRef SymName = + MIOpc == PPC::GETtlsTpointer32AIX ? ".__get_tpointer" : ".__tls_get_addr"; return Ctx - .getXCOFFSection(".__tls_get_addr", SectionKind::getText(), + .getXCOFFSection(SymName, SectionKind::getText(), XCOFF::CsectProperties(XCOFF::XMC_PR, XCOFF::XTY_ER)) ->getQualNameSymbol(); } +void PPCAsmPrinter::EmitAIXTlsCallHelper(const MachineInstr *MI) { + assert(Subtarget->isAIXABI() && + "Only expecting to emit calls to get the thread pointer on AIX!"); + + MCSymbol *TlsCall = createMCSymbolForTlsGetAddr(OutContext, MI->getOpcode()); + const MCExpr *TlsRef = + MCSymbolRefExpr::create(TlsCall, MCSymbolRefExpr::VK_None, OutContext); + EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::BLA).addExpr(TlsRef)); + return; +} + /// EmitTlsCall -- Given a GETtls[ld]ADDR[32] instruction, print a /// call to __tls_get_addr to the current output stream. void PPCAsmPrinter::EmitTlsCall(const MachineInstr *MI, @@ -652,10 +666,7 @@ void PPCAsmPrinter::EmitTlsCall(const MachineInstr *MI, assert(MI->getOperand(2).isReg() && MI->getOperand(2).getReg() == VarOffsetReg && "GETtls[ld]ADDR[32] must read GPR4"); - MCSymbol *TlsGetAddr = createMCSymbolForTlsGetAddr(OutContext); - const MCExpr *TlsRef = MCSymbolRefExpr::create( - TlsGetAddr, MCSymbolRefExpr::VK_None, OutContext); - EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::BLA).addExpr(TlsRef)); + EmitAIXTlsCallHelper(MI); return; } @@ -1357,6 +1368,12 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) { EmitTlsCall(MI, MCSymbolRefExpr::VK_PPC_TLSGD); return; } + case PPC::GETtlsTpointer32AIX: { + // Transform: %r3 = GETtlsTpointer32AIX + // Into: BLA .__get_tpointer() + EmitAIXTlsCallHelper(MI); + return; + } case PPC::ADDIStlsldHA: { // Transform: %xd = ADDIStlsldHA %x2, @sym // Into: %xd = ADDIS8 %x2, sym@got@tlsld@ha @@ -2776,11 +2793,13 @@ void PPCAIXAsmPrinter::emitInstruction(const MachineInstr *MI) { MMI->hasDebugInfo()); break; } + case PPC::GETtlsTpointer32AIX: case PPC::GETtlsADDR64AIX: case PPC::GETtlsADDR32AIX: { - // The reference to .__tls_get_addr is unknown to the assembler - // so we need to emit an external symbol reference. - MCSymbol *TlsGetAddr = createMCSymbolForTlsGetAddr(OutContext); + // A reference to .__tls_get_addr/.__get_tpointer is unknown to the + // assembler so we need to emit an external symbol reference. + MCSymbol *TlsGetAddr = + createMCSymbolForTlsGetAddr(OutContext, MI->getOpcode()); ExtSymSDNodeSymbols.insert(TlsGetAddr); break; } diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index f4eb89d..599e576 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -1738,6 +1738,7 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const { case PPCISD::ADDIS_TLSGD_HA: return "PPCISD::ADDIS_TLSGD_HA"; case PPCISD::ADDI_TLSGD_L: return "PPCISD::ADDI_TLSGD_L"; case PPCISD::GET_TLS_ADDR: return "PPCISD::GET_TLS_ADDR"; + case PPCISD::GET_TPOINTER: return "PPCISD::GET_TPOINTER"; case PPCISD::ADDI_TLSGD_L_ADDR: return "PPCISD::ADDI_TLSGD_L_ADDR"; case PPCISD::TLSGD_AIX: return "PPCISD::TLSGD_AIX"; case PPCISD::ADDIS_TLSLD_HA: return "PPCISD::ADDIS_TLSLD_HA"; @@ -3330,22 +3331,28 @@ SDValue PPCTargetLowering::LowerGlobalTLSAddressAIX(SDValue Op, TLSModel::Model Model = getTargetMachine().getTLSModel(GV); if (Model == TLSModel::LocalExec) { - if (Is64Bit) { + SDValue VariableOffsetTGA = + DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, PPCII::MO_TPREL_FLAG); + SDValue VariableOffset = getTOCEntry(DAG, dl, VariableOffsetTGA); + SDValue TLSReg; + if (Is64Bit) // For local-exec on AIX (64-bit), the sequence that is generated involves // a load of the variable offset (from the TOC), followed by an add of the // loaded variable offset to R13 (the thread pointer). // This code sequence looks like: // ld reg1,var[TC](2) // add reg2, reg1, r13 // r13 contains the thread pointer - SDValue TLSReg = DAG.getRegister(PPC::X13, MVT::i64); - SDValue VariableOffsetTGA = - DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, PPCII::MO_TPREL_FLAG); - SDValue VariableOffset = getTOCEntry(DAG, dl, VariableOffsetTGA); - return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TLSReg, VariableOffset); - } else { - report_fatal_error("On AIX, the local-exec TLS model is only supported " - "on PPC64 for now."); - } + TLSReg = DAG.getRegister(PPC::X13, MVT::i64); + else + // For local-exec on AIX (32-bit), the sequence that is generated involves + // loading the variable offset from the TOC, generating a call to + // .__get_tpointer to get the thread pointer (which will be in R3), and + // adding the two together: + // lwz reg1,var[TC](2) + // bla .__get_tpointer + // add reg2, reg1, r3 + TLSReg = DAG.getNode(PPCISD::GET_TPOINTER, dl, PtrVT); + return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TLSReg, VariableOffset); } // The Local-Exec and General-Dynamic TLS models are currently the only diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h index 2e2514a5..e6ebc68 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.h +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h @@ -356,6 +356,11 @@ namespace llvm { /// ADDIS_TLSGD_L_ADDR until after register assignment. GET_TLS_ADDR, + /// %x3 = GET_TPOINTER - Used for the local-exec TLS model on 32-bit AIX, + /// produces a call to .__get_tpointer to retrieve the thread pointer + /// At the end of the call, the thread pointer is found in R3. + GET_TPOINTER, + /// G8RC = ADDI_TLSGD_L_ADDR G8RReg, Symbol, Symbol - Op that /// combines ADDI_TLSGD_L and GET_TLS_ADDR until expansion following /// register assignment. diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/llvm/lib/Target/PowerPC/PPCInstrInfo.td index a8c27d0..25b2f9b 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.td +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.td @@ -213,6 +213,7 @@ def PPCaddTls : SDNode<"PPCISD::ADD_TLS", SDTIntBinOp, []>; def PPCaddisTlsgdHA : SDNode<"PPCISD::ADDIS_TLSGD_HA", SDTIntBinOp>; def PPCaddiTlsgdL : SDNode<"PPCISD::ADDI_TLSGD_L", SDTIntBinOp>; def PPCgetTlsAddr : SDNode<"PPCISD::GET_TLS_ADDR", SDTIntBinOp>; +def PPCgetTpointer : SDNode<"PPCISD::GET_TPOINTER", SDTIntLeaf, []>; def PPCaddiTlsgdLAddr : SDNode<"PPCISD::ADDI_TLSGD_L_ADDR", SDTypeProfile<1, 3, [ SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, @@ -3142,6 +3143,24 @@ def GETtlsADDR32AIX : PPCEmitTimePseudo<(outs gprc:$rD), (ins gprc:$offset, gprc "GETtlsADDR32AIX", [(set i32:$rD, (PPCgetTlsAddr i32:$offset, i32:$handle))]>; + +// For local-exec accesses on 32-bit AIX, a call to .__get_tpointer is +// generated to retrieve the thread pointer. GETtlsTpointer32AIX clobbers both +// R3 and the LR (link register). +let hasExtraSrcRegAllocReq = 1, hasExtraDefRegAllocReq = 1, + Defs = [R3,LR] in +def GETtlsTpointer32AIX : PPCEmitTimePseudo<(outs gprc:$rD), (ins), + "GETtlsTpointer32AIX", + [(set i32:$rD, (PPCgetTpointer))]>; + +// The following pattern matches local-exec TLS accesses on 32-bit AIX. +// PPCaddTls is used in local-exec accesses in order to: +// - Get the address of a variable (add the variable offset to the thread +// pointer, retrieved by calling .__get_tpointer). +// - Create an opportunity to optimize the user of the loaded address. +def : Pat<(PPCaddTls i32:$in, i32:$addr), + (ADD4TLS $in, $addr)>; + // Combined op for ADDItlsgdL32 and GETtlsADDR32, late expanded. R3 and LR // are true defines while the rest of the Defs are clobbers. let hasExtraSrcRegAllocReq = 1, hasExtraDefRegAllocReq = 1, diff --git a/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp b/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp index 59e8f3f..9518d53 100644 --- a/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp +++ b/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp @@ -56,13 +56,16 @@ protected: I != IE;) { MachineInstr &MI = *I; IsPCREL = isPCREL(MI); + // There are a number of slight differences in code generation + // when we call .__get_tpointer (32-bit AIX TLS). + bool IsTLSTPRelMI = MI.getOpcode() == PPC::GETtlsTpointer32AIX; if (MI.getOpcode() != PPC::ADDItlsgdLADDR && MI.getOpcode() != PPC::ADDItlsldLADDR && MI.getOpcode() != PPC::ADDItlsgdLADDR32 && MI.getOpcode() != PPC::ADDItlsldLADDR32 && MI.getOpcode() != PPC::TLSGDAIX && - MI.getOpcode() != PPC::TLSGDAIX8 && !IsPCREL) { + MI.getOpcode() != PPC::TLSGDAIX8 && !IsTLSTPRelMI && !IsPCREL) { // Although we create ADJCALLSTACKDOWN and ADJCALLSTACKUP // as scheduling fences, we skip creating fences if we already // have existing ADJCALLSTACKDOWN/UP to avoid nesting, @@ -82,7 +85,7 @@ protected: Register InReg = PPC::NoRegister; Register GPR3 = Is64Bit ? PPC::X3 : PPC::R3; Register GPR4 = Is64Bit ? PPC::X4 : PPC::R4; - if (!IsPCREL) + if (!IsPCREL && !IsTLSTPRelMI) InReg = MI.getOperand(1).getReg(); DebugLoc DL = MI.getDebugLoc(); @@ -116,6 +119,12 @@ protected: // set Opc2 here. Opc2 = PPC::GETtlsADDR32AIX; break; + case PPC::GETtlsTpointer32AIX: + // GETtlsTpointer32AIX is expanded to a call to GET_TPOINTER on AIX + // 32-bit mode within PPCAsmPrinter. This instruction does not need + // to change, so Opc2 is set to the same instruction opcode. + Opc2 = PPC::GETtlsTpointer32AIX; + break; case PPC::PADDI8pc: assert(IsPCREL && "Expecting General/Local Dynamic PCRel"); Opc1 = PPC::PADDI8pc; @@ -138,11 +147,17 @@ protected: if (IsAIX) { // The variable offset and region handle are copied in r4 and r3. The // copies are followed by GETtlsADDR32AIX/GETtlsADDR64AIX. - BuildMI(MBB, I, DL, TII->get(TargetOpcode::COPY), GPR4) - .addReg(MI.getOperand(1).getReg()); - BuildMI(MBB, I, DL, TII->get(TargetOpcode::COPY), GPR3) - .addReg(MI.getOperand(2).getReg()); - BuildMI(MBB, I, DL, TII->get(Opc2), GPR3).addReg(GPR3).addReg(GPR4); + if (!IsTLSTPRelMI) { + BuildMI(MBB, I, DL, TII->get(TargetOpcode::COPY), GPR4) + .addReg(MI.getOperand(1).getReg()); + BuildMI(MBB, I, DL, TII->get(TargetOpcode::COPY), GPR3) + .addReg(MI.getOperand(2).getReg()); + BuildMI(MBB, I, DL, TII->get(Opc2), GPR3).addReg(GPR3).addReg(GPR4); + } else + // The opcode of GETtlsTpointer32AIX does not change, because later + // this instruction will be expanded into a call to .__get_tpointer, + // which will return the thread pointer into r3. + BuildMI(MBB, I, DL, TII->get(Opc2), GPR3); } else { MachineInstr *Addi; if (IsPCREL) { diff --git a/llvm/test/CodeGen/PowerPC/aix-tls-le-ldst-double.ll b/llvm/test/CodeGen/PowerPC/aix-tls-le-ldst-double.ll index 2e0697d..4490541 100644 --- a/llvm/test/CodeGen/PowerPC/aix-tls-le-ldst-double.ll +++ b/llvm/test/CodeGen/PowerPC/aix-tls-le-ldst-double.ll @@ -5,6 +5,12 @@ ; RUN: llc -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \ ; RUN: -mtriple powerpc64-ibm-aix-xcoff --code-model=large < %s \ ; RUN: | FileCheck %s --check-prefix=LARGE64 +; RUN: llc -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \ +; RUN: -mtriple powerpc-ibm-aix-xcoff < %s | FileCheck %s \ +; RUN: --check-prefix=SMALL32 +; RUN: llc -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \ +; RUN: -mtriple powerpc-ibm-aix-xcoff --code-model=large < %s \ +; RUN: | FileCheck %s --check-prefix=LARGE32 @ThreadLocalVarInit = thread_local(localexec) global double 0x4021947AE147AE14, align 8 @VarInit = global double 8.787000e+01, align 8 @@ -28,6 +34,35 @@ define void @storeITLUninit(double noundef %x) { ; LARGE64-NEXT: add r3, r13, r3 ; LARGE64-NEXT: stfd f1, 0(r3) ; LARGE64-NEXT: blr +; +; SMALL32-LABEL: storeITLUninit: +; SMALL32: # %bb.0: # %entry +; SMALL32-NEXT: mflr r0 +; SMALL32-NEXT: stwu r1, -32(r1) +; SMALL32-NEXT: lwz r4, L..C0(r2) # target-flags(ppc-tprel) @IThreadLocalVarUninit +; SMALL32-NEXT: bla .__get_tpointer[PR] +; SMALL32-NEXT: stw r0, 40(r1) +; SMALL32-NEXT: add r3, r3, r4 +; SMALL32-NEXT: stfd f1, 0(r3) +; SMALL32-NEXT: addi r1, r1, 32 +; SMALL32-NEXT: lwz r0, 8(r1) +; SMALL32-NEXT: mtlr r0 +; SMALL32-NEXT: blr +; +; LARGE32-LABEL: storeITLUninit: +; LARGE32: # %bb.0: # %entry +; LARGE32-NEXT: mflr r0 +; LARGE32-NEXT: stwu r1, -32(r1) +; LARGE32-NEXT: stw r0, 40(r1) +; LARGE32-NEXT: addis r3, L..C0@u(r2) +; LARGE32-NEXT: lwz r4, L..C0@l(r3) +; LARGE32-NEXT: bla .__get_tpointer[PR] +; LARGE32-NEXT: add r3, r3, r4 +; LARGE32-NEXT: stfd f1, 0(r3) +; LARGE32-NEXT: addi r1, r1, 32 +; LARGE32-NEXT: lwz r0, 8(r1) +; LARGE32-NEXT: mtlr r0 +; LARGE32-NEXT: blr entry: %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @IThreadLocalVarUninit) store double %x, ptr %0, align 8 @@ -49,6 +84,35 @@ define void @storeITLInit(double noundef %x) { ; LARGE64-NEXT: add r3, r13, r3 ; LARGE64-NEXT: stfd f1, 0(r3) ; LARGE64-NEXT: blr +; +; SMALL32-LABEL: storeITLInit: +; SMALL32: # %bb.0: # %entry +; SMALL32-NEXT: mflr r0 +; SMALL32-NEXT: stwu r1, -32(r1) +; SMALL32-NEXT: lwz r4, L..C1(r2) # target-flags(ppc-tprel) @IThreadLocalVarInit +; SMALL32-NEXT: bla .__get_tpointer[PR] +; SMALL32-NEXT: stw r0, 40(r1) +; SMALL32-NEXT: add r3, r3, r4 +; SMALL32-NEXT: stfd f1, 0(r3) +; SMALL32-NEXT: addi r1, r1, 32 +; SMALL32-NEXT: lwz r0, 8(r1) +; SMALL32-NEXT: mtlr r0 +; SMALL32-NEXT: blr +; +; LARGE32-LABEL: storeITLInit: +; LARGE32: # %bb.0: # %entry +; LARGE32-NEXT: mflr r0 +; LARGE32-NEXT: stwu r1, -32(r1) +; LARGE32-NEXT: stw r0, 40(r1) +; LARGE32-NEXT: addis r3, L..C1@u(r2) +; LARGE32-NEXT: lwz r4, L..C1@l(r3) +; LARGE32-NEXT: bla .__get_tpointer[PR] +; LARGE32-NEXT: add r3, r3, r4 +; LARGE32-NEXT: stfd f1, 0(r3) +; LARGE32-NEXT: addi r1, r1, 32 +; LARGE32-NEXT: lwz r0, 8(r1) +; LARGE32-NEXT: mtlr r0 +; LARGE32-NEXT: blr entry: %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @IThreadLocalVarInit) store double %x, ptr %0, align 8 @@ -70,6 +134,35 @@ define void @storeTLUninit(double noundef %x) { ; LARGE64-NEXT: add r3, r13, r3 ; LARGE64-NEXT: stfd f1, 0(r3) ; LARGE64-NEXT: blr +; +; SMALL32-LABEL: storeTLUninit: +; SMALL32: # %bb.0: # %entry +; SMALL32-NEXT: mflr r0 +; SMALL32-NEXT: stwu r1, -32(r1) +; SMALL32-NEXT: lwz r4, L..C2(r2) # target-flags(ppc-tprel) @ThreadLocalVarUninit +; SMALL32-NEXT: bla .__get_tpointer[PR] +; SMALL32-NEXT: stw r0, 40(r1) +; SMALL32-NEXT: add r3, r3, r4 +; SMALL32-NEXT: stfd f1, 0(r3) +; SMALL32-NEXT: addi r1, r1, 32 +; SMALL32-NEXT: lwz r0, 8(r1) +; SMALL32-NEXT: mtlr r0 +; SMALL32-NEXT: blr +; +; LARGE32-LABEL: storeTLUninit: +; LARGE32: # %bb.0: # %entry +; LARGE32-NEXT: mflr r0 +; LARGE32-NEXT: stwu r1, -32(r1) +; LARGE32-NEXT: stw r0, 40(r1) +; LARGE32-NEXT: addis r3, L..C2@u(r2) +; LARGE32-NEXT: lwz r4, L..C2@l(r3) +; LARGE32-NEXT: bla .__get_tpointer[PR] +; LARGE32-NEXT: add r3, r3, r4 +; LARGE32-NEXT: stfd f1, 0(r3) +; LARGE32-NEXT: addi r1, r1, 32 +; LARGE32-NEXT: lwz r0, 8(r1) +; LARGE32-NEXT: mtlr r0 +; LARGE32-NEXT: blr entry: %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @ThreadLocalVarUninit) store double %x, ptr %0, align 8 @@ -91,6 +184,35 @@ define void @storeTLInit(double noundef %x) { ; LARGE64-NEXT: add r3, r13, r3 ; LARGE64-NEXT: stfd f1, 0(r3) ; LARGE64-NEXT: blr +; +; SMALL32-LABEL: storeTLInit: +; SMALL32: # %bb.0: # %entry +; SMALL32-NEXT: mflr r0 +; SMALL32-NEXT: stwu r1, -32(r1) +; SMALL32-NEXT: lwz r4, L..C3(r2) # target-flags(ppc-tprel) @ThreadLocalVarInit +; SMALL32-NEXT: bla .__get_tpointer[PR] +; SMALL32-NEXT: stw r0, 40(r1) +; SMALL32-NEXT: add r3, r3, r4 +; SMALL32-NEXT: stfd f1, 0(r3) +; SMALL32-NEXT: addi r1, r1, 32 +; SMALL32-NEXT: lwz r0, 8(r1) +; SMALL32-NEXT: mtlr r0 +; SMALL32-NEXT: blr +; +; LARGE32-LABEL: storeTLInit: +; LARGE32: # %bb.0: # %entry +; LARGE32-NEXT: mflr r0 +; LARGE32-NEXT: stwu r1, -32(r1) +; LARGE32-NEXT: stw r0, 40(r1) +; LARGE32-NEXT: addis r3, L..C3@u(r2) +; LARGE32-NEXT: lwz r4, L..C3@l(r3) +; LARGE32-NEXT: bla .__get_tpointer[PR] +; LARGE32-NEXT: add r3, r3, r4 +; LARGE32-NEXT: stfd f1, 0(r3) +; LARGE32-NEXT: addi r1, r1, 32 +; LARGE32-NEXT: lwz r0, 8(r1) +; LARGE32-NEXT: mtlr r0 +; LARGE32-NEXT: blr entry: %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @ThreadLocalVarInit) store double %x, ptr %0, align 8 @@ -112,6 +234,35 @@ define double @loadITLUninit() { ; LARGE64-NEXT: add r3, r13, r3 ; LARGE64-NEXT: lfd f1, 0(r3) ; LARGE64-NEXT: blr +; +; SMALL32-LABEL: loadITLUninit: +; SMALL32: # %bb.0: # %entry +; SMALL32-NEXT: mflr r0 +; SMALL32-NEXT: stwu r1, -32(r1) +; SMALL32-NEXT: lwz r4, L..C0(r2) # target-flags(ppc-tprel) @IThreadLocalVarUninit +; SMALL32-NEXT: bla .__get_tpointer[PR] +; SMALL32-NEXT: stw r0, 40(r1) +; SMALL32-NEXT: add r3, r3, r4 +; SMALL32-NEXT: lfd f1, 0(r3) +; SMALL32-NEXT: addi r1, r1, 32 +; SMALL32-NEXT: lwz r0, 8(r1) +; SMALL32-NEXT: mtlr r0 +; SMALL32-NEXT: blr +; +; LARGE32-LABEL: loadITLUninit: +; LARGE32: # %bb.0: # %entry +; LARGE32-NEXT: mflr r0 +; LARGE32-NEXT: stwu r1, -32(r1) +; LARGE32-NEXT: stw r0, 40(r1) +; LARGE32-NEXT: addis r3, L..C0@u(r2) +; LARGE32-NEXT: lwz r4, L..C0@l(r3) +; LARGE32-NEXT: bla .__get_tpointer[PR] +; LARGE32-NEXT: add r3, r3, r4 +; LARGE32-NEXT: lfd f1, 0(r3) +; LARGE32-NEXT: addi r1, r1, 32 +; LARGE32-NEXT: lwz r0, 8(r1) +; LARGE32-NEXT: mtlr r0 +; LARGE32-NEXT: blr entry: %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @IThreadLocalVarUninit) %1 = load double, ptr %0, align 8 @@ -140,6 +291,42 @@ define double @loadITLUninit2() { ; LARGE64-NEXT: lfd f0, 0(r3) ; LARGE64-NEXT: xsadddp f1, f0, f1 ; LARGE64-NEXT: blr +; +; SMALL32-LABEL: loadITLUninit2: +; SMALL32: # %bb.0: # %entry +; SMALL32-NEXT: mflr r0 +; SMALL32-NEXT: stwu r1, -32(r1) +; SMALL32-NEXT: lwz r4, L..C0(r2) # target-flags(ppc-tprel) @IThreadLocalVarUninit +; SMALL32-NEXT: lwz r5, L..C4(r2) # @VarInit +; SMALL32-NEXT: bla .__get_tpointer[PR] +; SMALL32-NEXT: stw r0, 40(r1) +; SMALL32-NEXT: add r3, r3, r4 +; SMALL32-NEXT: lfd f1, 0(r5) +; SMALL32-NEXT: lfd f0, 0(r3) +; SMALL32-NEXT: xsadddp f1, f0, f1 +; SMALL32-NEXT: addi r1, r1, 32 +; SMALL32-NEXT: lwz r0, 8(r1) +; SMALL32-NEXT: mtlr r0 +; SMALL32-NEXT: blr +; +; LARGE32-LABEL: loadITLUninit2: +; LARGE32: # %bb.0: # %entry +; LARGE32-NEXT: mflr r0 +; LARGE32-NEXT: stwu r1, -32(r1) +; LARGE32-NEXT: stw r0, 40(r1) +; LARGE32-NEXT: addis r3, L..C0@u(r2) +; LARGE32-NEXT: lwz r4, L..C0@l(r3) +; LARGE32-NEXT: bla .__get_tpointer[PR] +; LARGE32-NEXT: add r3, r3, r4 +; LARGE32-NEXT: lfd f0, 0(r3) +; LARGE32-NEXT: addis r3, L..C4@u(r2) +; LARGE32-NEXT: lwz r3, L..C4@l(r3) +; LARGE32-NEXT: lfd f1, 0(r3) +; LARGE32-NEXT: xsadddp f1, f0, f1 +; LARGE32-NEXT: addi r1, r1, 32 +; LARGE32-NEXT: lwz r0, 8(r1) +; LARGE32-NEXT: mtlr r0 +; LARGE32-NEXT: blr entry: %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @IThreadLocalVarUninit) %1 = load double, ptr %0, align 8 @@ -163,6 +350,35 @@ define double @loadITLInit() { ; LARGE64-NEXT: add r3, r13, r3 ; LARGE64-NEXT: lfd f1, 0(r3) ; LARGE64-NEXT: blr +; +; SMALL32-LABEL: loadITLInit: +; SMALL32: # %bb.0: # %entry +; SMALL32-NEXT: mflr r0 +; SMALL32-NEXT: stwu r1, -32(r1) +; SMALL32-NEXT: lwz r4, L..C1(r2) # target-flags(ppc-tprel) @IThreadLocalVarInit +; SMALL32-NEXT: bla .__get_tpointer[PR] +; SMALL32-NEXT: stw r0, 40(r1) +; SMALL32-NEXT: add r3, r3, r4 +; SMALL32-NEXT: lfd f1, 0(r3) +; SMALL32-NEXT: addi r1, r1, 32 +; SMALL32-NEXT: lwz r0, 8(r1) +; SMALL32-NEXT: mtlr r0 +; SMALL32-NEXT: blr +; +; LARGE32-LABEL: loadITLInit: +; LARGE32: # %bb.0: # %entry +; LARGE32-NEXT: mflr r0 +; LARGE32-NEXT: stwu r1, -32(r1) +; LARGE32-NEXT: stw r0, 40(r1) +; LARGE32-NEXT: addis r3, L..C1@u(r2) +; LARGE32-NEXT: lwz r4, L..C1@l(r3) +; LARGE32-NEXT: bla .__get_tpointer[PR] +; LARGE32-NEXT: add r3, r3, r4 +; LARGE32-NEXT: lfd f1, 0(r3) +; LARGE32-NEXT: addi r1, r1, 32 +; LARGE32-NEXT: lwz r0, 8(r1) +; LARGE32-NEXT: mtlr r0 +; LARGE32-NEXT: blr entry: %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @IThreadLocalVarInit) %1 = load double, ptr %0, align 8 @@ -191,6 +407,42 @@ define double @loadITLInit2() { ; LARGE64-NEXT: lfd f0, 0(r3) ; LARGE64-NEXT: xsadddp f1, f0, f1 ; LARGE64-NEXT: blr +; +; SMALL32-LABEL: loadITLInit2: +; SMALL32: # %bb.0: # %entry +; SMALL32-NEXT: mflr r0 +; SMALL32-NEXT: stwu r1, -32(r1) +; SMALL32-NEXT: lwz r4, L..C1(r2) # target-flags(ppc-tprel) @IThreadLocalVarInit +; SMALL32-NEXT: lwz r5, L..C4(r2) # @VarInit +; SMALL32-NEXT: bla .__get_tpointer[PR] +; SMALL32-NEXT: stw r0, 40(r1) +; SMALL32-NEXT: add r3, r3, r4 +; SMALL32-NEXT: lfd f1, 0(r5) +; SMALL32-NEXT: lfd f0, 0(r3) +; SMALL32-NEXT: xsadddp f1, f0, f1 +; SMALL32-NEXT: addi r1, r1, 32 +; SMALL32-NEXT: lwz r0, 8(r1) +; SMALL32-NEXT: mtlr r0 +; SMALL32-NEXT: blr +; +; LARGE32-LABEL: loadITLInit2: +; LARGE32: # %bb.0: # %entry +; LARGE32-NEXT: mflr r0 +; LARGE32-NEXT: stwu r1, -32(r1) +; LARGE32-NEXT: stw r0, 40(r1) +; LARGE32-NEXT: addis r3, L..C1@u(r2) +; LARGE32-NEXT: lwz r4, L..C1@l(r3) +; LARGE32-NEXT: bla .__get_tpointer[PR] +; LARGE32-NEXT: add r3, r3, r4 +; LARGE32-NEXT: lfd f0, 0(r3) +; LARGE32-NEXT: addis r3, L..C4@u(r2) +; LARGE32-NEXT: lwz r3, L..C4@l(r3) +; LARGE32-NEXT: lfd f1, 0(r3) +; LARGE32-NEXT: xsadddp f1, f0, f1 +; LARGE32-NEXT: addi r1, r1, 32 +; LARGE32-NEXT: lwz r0, 8(r1) +; LARGE32-NEXT: mtlr r0 +; LARGE32-NEXT: blr entry: %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @IThreadLocalVarInit) %1 = load double, ptr %0, align 8 @@ -214,6 +466,35 @@ define double @loadTLUninit() { ; LARGE64-NEXT: add r3, r13, r3 ; LARGE64-NEXT: lfd f1, 0(r3) ; LARGE64-NEXT: blr +; +; SMALL32-LABEL: loadTLUninit: +; SMALL32: # %bb.0: # %entry +; SMALL32-NEXT: mflr r0 +; SMALL32-NEXT: stwu r1, -32(r1) +; SMALL32-NEXT: lwz r4, L..C2(r2) # target-flags(ppc-tprel) @ThreadLocalVarUninit +; SMALL32-NEXT: bla .__get_tpointer[PR] +; SMALL32-NEXT: stw r0, 40(r1) +; SMALL32-NEXT: add r3, r3, r4 +; SMALL32-NEXT: lfd f1, 0(r3) +; SMALL32-NEXT: addi r1, r1, 32 +; SMALL32-NEXT: lwz r0, 8(r1) +; SMALL32-NEXT: mtlr r0 +; SMALL32-NEXT: blr +; +; LARGE32-LABEL: loadTLUninit: +; LARGE32: # %bb.0: # %entry +; LARGE32-NEXT: mflr r0 +; LARGE32-NEXT: stwu r1, -32(r1) +; LARGE32-NEXT: stw r0, 40(r1) +; LARGE32-NEXT: addis r3, L..C2@u(r2) +; LARGE32-NEXT: lwz r4, L..C2@l(r3) +; LARGE32-NEXT: bla .__get_tpointer[PR] +; LARGE32-NEXT: add r3, r3, r4 +; LARGE32-NEXT: lfd f1, 0(r3) +; LARGE32-NEXT: addi r1, r1, 32 +; LARGE32-NEXT: lwz r0, 8(r1) +; LARGE32-NEXT: mtlr r0 +; LARGE32-NEXT: blr entry: %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @ThreadLocalVarUninit) %1 = load double, ptr %0, align 8 @@ -242,6 +523,42 @@ define double @loadTLUninit2() { ; LARGE64-NEXT: lfd f0, 0(r3) ; LARGE64-NEXT: xsadddp f1, f0, f1 ; LARGE64-NEXT: blr +; +; SMALL32-LABEL: loadTLUninit2: +; SMALL32: # %bb.0: # %entry +; SMALL32-NEXT: mflr r0 +; SMALL32-NEXT: stwu r1, -32(r1) +; SMALL32-NEXT: lwz r4, L..C2(r2) # target-flags(ppc-tprel) @ThreadLocalVarUninit +; SMALL32-NEXT: lwz r5, L..C4(r2) # @VarInit +; SMALL32-NEXT: bla .__get_tpointer[PR] +; SMALL32-NEXT: stw r0, 40(r1) +; SMALL32-NEXT: add r3, r3, r4 +; SMALL32-NEXT: lfd f1, 0(r5) +; SMALL32-NEXT: lfd f0, 0(r3) +; SMALL32-NEXT: xsadddp f1, f0, f1 +; SMALL32-NEXT: addi r1, r1, 32 +; SMALL32-NEXT: lwz r0, 8(r1) +; SMALL32-NEXT: mtlr r0 +; SMALL32-NEXT: blr +; +; LARGE32-LABEL: loadTLUninit2: +; LARGE32: # %bb.0: # %entry +; LARGE32-NEXT: mflr r0 +; LARGE32-NEXT: stwu r1, -32(r1) +; LARGE32-NEXT: stw r0, 40(r1) +; LARGE32-NEXT: addis r3, L..C2@u(r2) +; LARGE32-NEXT: lwz r4, L..C2@l(r3) +; LARGE32-NEXT: bla .__get_tpointer[PR] +; LARGE32-NEXT: add r3, r3, r4 +; LARGE32-NEXT: lfd f0, 0(r3) +; LARGE32-NEXT: addis r3, L..C4@u(r2) +; LARGE32-NEXT: lwz r3, L..C4@l(r3) +; LARGE32-NEXT: lfd f1, 0(r3) +; LARGE32-NEXT: xsadddp f1, f0, f1 +; LARGE32-NEXT: addi r1, r1, 32 +; LARGE32-NEXT: lwz r0, 8(r1) +; LARGE32-NEXT: mtlr r0 +; LARGE32-NEXT: blr entry: %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @ThreadLocalVarUninit) %1 = load double, ptr %0, align 8 @@ -265,6 +582,35 @@ define double @loadTLInit() { ; LARGE64-NEXT: add r3, r13, r3 ; LARGE64-NEXT: lfd f1, 0(r3) ; LARGE64-NEXT: blr +; +; SMALL32-LABEL: loadTLInit: +; SMALL32: # %bb.0: # %entry +; SMALL32-NEXT: mflr r0 +; SMALL32-NEXT: stwu r1, -32(r1) +; SMALL32-NEXT: lwz r4, L..C3(r2) # target-flags(ppc-tprel) @ThreadLocalVarInit +; SMALL32-NEXT: bla .__get_tpointer[PR] +; SMALL32-NEXT: stw r0, 40(r1) +; SMALL32-NEXT: add r3, r3, r4 +; SMALL32-NEXT: lfd f1, 0(r3) +; SMALL32-NEXT: addi r1, r1, 32 +; SMALL32-NEXT: lwz r0, 8(r1) +; SMALL32-NEXT: mtlr r0 +; SMALL32-NEXT: blr +; +; LARGE32-LABEL: loadTLInit: +; LARGE32: # %bb.0: # %entry +; LARGE32-NEXT: mflr r0 +; LARGE32-NEXT: stwu r1, -32(r1) +; LARGE32-NEXT: stw r0, 40(r1) +; LARGE32-NEXT: addis r3, L..C3@u(r2) +; LARGE32-NEXT: lwz r4, L..C3@l(r3) +; LARGE32-NEXT: bla .__get_tpointer[PR] +; LARGE32-NEXT: add r3, r3, r4 +; LARGE32-NEXT: lfd f1, 0(r3) +; LARGE32-NEXT: addi r1, r1, 32 +; LARGE32-NEXT: lwz r0, 8(r1) +; LARGE32-NEXT: mtlr r0 +; LARGE32-NEXT: blr entry: %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @ThreadLocalVarInit) %1 = load double, ptr %0, align 8 @@ -293,6 +639,42 @@ define double @loadTLInit2() { ; LARGE64-NEXT: lfd f0, 0(r3) ; LARGE64-NEXT: xsadddp f1, f0, f1 ; LARGE64-NEXT: blr +; +; SMALL32-LABEL: loadTLInit2: +; SMALL32: # %bb.0: # %entry +; SMALL32-NEXT: mflr r0 +; SMALL32-NEXT: stwu r1, -32(r1) +; SMALL32-NEXT: lwz r4, L..C3(r2) # target-flags(ppc-tprel) @ThreadLocalVarInit +; SMALL32-NEXT: lwz r5, L..C4(r2) # @VarInit +; SMALL32-NEXT: bla .__get_tpointer[PR] +; SMALL32-NEXT: stw r0, 40(r1) +; SMALL32-NEXT: add r3, r3, r4 +; SMALL32-NEXT: lfd f1, 0(r5) +; SMALL32-NEXT: lfd f0, 0(r3) +; SMALL32-NEXT: xsadddp f1, f0, f1 +; SMALL32-NEXT: addi r1, r1, 32 +; SMALL32-NEXT: lwz r0, 8(r1) +; SMALL32-NEXT: mtlr r0 +; SMALL32-NEXT: blr +; +; LARGE32-LABEL: loadTLInit2: +; LARGE32: # %bb.0: # %entry +; LARGE32-NEXT: mflr r0 +; LARGE32-NEXT: stwu r1, -32(r1) +; LARGE32-NEXT: stw r0, 40(r1) +; LARGE32-NEXT: addis r3, L..C3@u(r2) +; LARGE32-NEXT: lwz r4, L..C3@l(r3) +; LARGE32-NEXT: bla .__get_tpointer[PR] +; LARGE32-NEXT: add r3, r3, r4 +; LARGE32-NEXT: lfd f0, 0(r3) +; LARGE32-NEXT: addis r3, L..C4@u(r2) +; LARGE32-NEXT: lwz r3, L..C4@l(r3) +; LARGE32-NEXT: lfd f1, 0(r3) +; LARGE32-NEXT: xsadddp f1, f0, f1 +; LARGE32-NEXT: addi r1, r1, 32 +; LARGE32-NEXT: lwz r0, 8(r1) +; LARGE32-NEXT: mtlr r0 +; LARGE32-NEXT: blr entry: %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @ThreadLocalVarInit) %1 = load double, ptr %0, align 8 @@ -326,3 +708,27 @@ entry: ; LARGE64-NEXT: .tc ThreadLocalVarInit[TE],ThreadLocalVarInit[TL]@le ; LARGE64-LABEL: L..C4: ; LARGE64-NEXT: .tc VarInit[TE],VarInit[RW] + +; SMALL32-LABEL: .toc +; SMALL32-LABEL: L..C0: +; SMALL32-NEXT: .tc IThreadLocalVarUninit[TC],IThreadLocalVarUninit[UL]@le +; SMALL32-LABEL: L..C1: +; SMALL32-NEXT: .tc IThreadLocalVarInit[TC],IThreadLocalVarInit[TL]@le +; SMALL32-LABEL: L..C2: +; SMALL32-NEXT: .tc ThreadLocalVarUninit[TC],ThreadLocalVarUninit[TL]@le +; SMALL32-LABEL: L..C3: +; SMALL32-NEXT: .tc ThreadLocalVarInit[TC],ThreadLocalVarInit[TL]@le +; SMALL32-LABEL: L..C4: +; SMALL32-NEXT: .tc VarInit[TC],VarInit[RW] + +; LARGE32-LABEL: .toc +; LARGE32-LABEL: L..C0: +; LARGE32-NEXT: .tc IThreadLocalVarUninit[TE],IThreadLocalVarUninit[UL]@le +; LARGE32-LABEL: L..C1: +; LARGE32-NEXT: .tc IThreadLocalVarInit[TE],IThreadLocalVarInit[TL]@le +; LARGE32-LABEL: L..C2: +; LARGE32-NEXT: .tc ThreadLocalVarUninit[TE],ThreadLocalVarUninit[TL]@le +; LARGE32-LABEL: L..C3: +; LARGE32-NEXT: .tc ThreadLocalVarInit[TE],ThreadLocalVarInit[TL]@le +; LARGE32-LABEL: L..C4: +; LARGE32-NEXT: .tc VarInit[TE],VarInit[RW] diff --git a/llvm/test/CodeGen/PowerPC/aix-tls-le-ldst-float.ll b/llvm/test/CodeGen/PowerPC/aix-tls-le-ldst-float.ll index 2b93616..427262c 100644 --- a/llvm/test/CodeGen/PowerPC/aix-tls-le-ldst-float.ll +++ b/llvm/test/CodeGen/PowerPC/aix-tls-le-ldst-float.ll @@ -5,6 +5,12 @@ ; RUN: llc -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \ ; RUN: -mtriple powerpc64-ibm-aix-xcoff --code-model=large < %s \ ; RUN: | FileCheck %s --check-prefix=LARGE64 +; RUN: llc -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \ +; RUN: -mtriple powerpc-ibm-aix-xcoff < %s | FileCheck %s \ +; RUN: --check-prefix=SMALL32 +; RUN: llc -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \ +; RUN: -mtriple powerpc-ibm-aix-xcoff --code-model=large < %s \ +; RUN: | FileCheck %s --check-prefix=LARGE32 @ThreadLocalVarInit = thread_local(localexec) global float 0x401D333340000000, align 4 @VarInit = global float 0x4021666660000000, align 4 @@ -28,6 +34,35 @@ define void @storeITLUninit(float noundef %x) { ; LARGE64-NEXT: add r3, r13, r3 ; LARGE64-NEXT: stfs f1, 0(r3) ; LARGE64-NEXT: blr +; +; SMALL32-LABEL: storeITLUninit: +; SMALL32: # %bb.0: # %entry +; SMALL32-NEXT: mflr r0 +; SMALL32-NEXT: stwu r1, -32(r1) +; SMALL32-NEXT: lwz r4, L..C0(r2) # target-flags(ppc-tprel) @IThreadLocalVarUninit +; SMALL32-NEXT: bla .__get_tpointer[PR] +; SMALL32-NEXT: stw r0, 40(r1) +; SMALL32-NEXT: add r3, r3, r4 +; SMALL32-NEXT: stfs f1, 0(r3) +; SMALL32-NEXT: addi r1, r1, 32 +; SMALL32-NEXT: lwz r0, 8(r1) +; SMALL32-NEXT: mtlr r0 +; SMALL32-NEXT: blr +; +; LARGE32-LABEL: storeITLUninit: +; LARGE32: # %bb.0: # %entry +; LARGE32-NEXT: mflr r0 +; LARGE32-NEXT: stwu r1, -32(r1) +; LARGE32-NEXT: stw r0, 40(r1) +; LARGE32-NEXT: addis r3, L..C0@u(r2) +; LARGE32-NEXT: lwz r4, L..C0@l(r3) +; LARGE32-NEXT: bla .__get_tpointer[PR] +; LARGE32-NEXT: add r3, r3, r4 +; LARGE32-NEXT: stfs f1, 0(r3) +; LARGE32-NEXT: addi r1, r1, 32 +; LARGE32-NEXT: lwz r0, 8(r1) +; LARGE32-NEXT: mtlr r0 +; LARGE32-NEXT: blr entry: %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @IThreadLocalVarUninit) store float %x, ptr %0, align 4 @@ -49,6 +84,35 @@ define void @storeITLInit(float noundef %x) { ; LARGE64-NEXT: add r3, r13, r3 ; LARGE64-NEXT: stfs f1, 0(r3) ; LARGE64-NEXT: blr +; +; SMALL32-LABEL: storeITLInit: +; SMALL32: # %bb.0: # %entry +; SMALL32-NEXT: mflr r0 +; SMALL32-NEXT: stwu r1, -32(r1) +; SMALL32-NEXT: lwz r4, L..C1(r2) # target-flags(ppc-tprel) @IThreadLocalVarInit +; SMALL32-NEXT: bla .__get_tpointer[PR] +; SMALL32-NEXT: stw r0, 40(r1) +; SMALL32-NEXT: add r3, r3, r4 +; SMALL32-NEXT: stfs f1, 0(r3) +; SMALL32-NEXT: addi r1, r1, 32 +; SMALL32-NEXT: lwz r0, 8(r1) +; SMALL32-NEXT: mtlr r0 +; SMALL32-NEXT: blr +; +; LARGE32-LABEL: storeITLInit: +; LARGE32: # %bb.0: # %entry +; LARGE32-NEXT: mflr r0 +; LARGE32-NEXT: stwu r1, -32(r1) +; LARGE32-NEXT: stw r0, 40(r1) +; LARGE32-NEXT: addis r3, L..C1@u(r2) +; LARGE32-NEXT: lwz r4, L..C1@l(r3) +; LARGE32-NEXT: bla .__get_tpointer[PR] +; LARGE32-NEXT: add r3, r3, r4 +; LARGE32-NEXT: stfs f1, 0(r3) +; LARGE32-NEXT: addi r1, r1, 32 +; LARGE32-NEXT: lwz r0, 8(r1) +; LARGE32-NEXT: mtlr r0 +; LARGE32-NEXT: blr entry: %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @IThreadLocalVarInit) store float %x, ptr %0, align 4 @@ -70,6 +134,35 @@ define void @storeTLUninit(float noundef %x) { ; LARGE64-NEXT: add r3, r13, r3 ; LARGE64-NEXT: stfs f1, 0(r3) ; LARGE64-NEXT: blr +; +; SMALL32-LABEL: storeTLUninit: +; SMALL32: # %bb.0: # %entry +; SMALL32-NEXT: mflr r0 +; SMALL32-NEXT: stwu r1, -32(r1) +; SMALL32-NEXT: lwz r4, L..C2(r2) # target-flags(ppc-tprel) @ThreadLocalVarUninit +; SMALL32-NEXT: bla .__get_tpointer[PR] +; SMALL32-NEXT: stw r0, 40(r1) +; SMALL32-NEXT: add r3, r3, r4 +; SMALL32-NEXT: stfs f1, 0(r3) +; SMALL32-NEXT: addi r1, r1, 32 +; SMALL32-NEXT: lwz r0, 8(r1) +; SMALL32-NEXT: mtlr r0 +; SMALL32-NEXT: blr +; +; LARGE32-LABEL: storeTLUninit: +; LARGE32: # %bb.0: # %entry +; LARGE32-NEXT: mflr r0 +; LARGE32-NEXT: stwu r1, -32(r1) +; LARGE32-NEXT: stw r0, 40(r1) +; LARGE32-NEXT: addis r3, L..C2@u(r2) +; LARGE32-NEXT: lwz r4, L..C2@l(r3) +; LARGE32-NEXT: bla .__get_tpointer[PR] +; LARGE32-NEXT: add r3, r3, r4 +; LARGE32-NEXT: stfs f1, 0(r3) +; LARGE32-NEXT: addi r1, r1, 32 +; LARGE32-NEXT: lwz r0, 8(r1) +; LARGE32-NEXT: mtlr r0 +; LARGE32-NEXT: blr entry: %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @ThreadLocalVarUninit) store float %x, ptr %0, align 4 @@ -91,6 +184,35 @@ define void @storeTLInit(float noundef %x) { ; LARGE64-NEXT: add r3, r13, r3 ; LARGE64-NEXT: stfs f1, 0(r3) ; LARGE64-NEXT: blr +; +; SMALL32-LABEL: storeTLInit: +; SMALL32: # %bb.0: # %entry +; SMALL32-NEXT: mflr r0 +; SMALL32-NEXT: stwu r1, -32(r1) +; SMALL32-NEXT: lwz r4, L..C3(r2) # target-flags(ppc-tprel) @ThreadLocalVarInit +; SMALL32-NEXT: bla .__get_tpointer[PR] +; SMALL32-NEXT: stw r0, 40(r1) +; SMALL32-NEXT: add r3, r3, r4 +; SMALL32-NEXT: stfs f1, 0(r3) +; SMALL32-NEXT: addi r1, r1, 32 +; SMALL32-NEXT: lwz r0, 8(r1) +; SMALL32-NEXT: mtlr r0 +; SMALL32-NEXT: blr +; +; LARGE32-LABEL: storeTLInit: +; LARGE32: # %bb.0: # %entry +; LARGE32-NEXT: mflr r0 +; LARGE32-NEXT: stwu r1, -32(r1) +; LARGE32-NEXT: stw r0, 40(r1) +; LARGE32-NEXT: addis r3, L..C3@u(r2) +; LARGE32-NEXT: lwz r4, L..C3@l(r3) +; LARGE32-NEXT: bla .__get_tpointer[PR] +; LARGE32-NEXT: add r3, r3, r4 +; LARGE32-NEXT: stfs f1, 0(r3) +; LARGE32-NEXT: addi r1, r1, 32 +; LARGE32-NEXT: lwz r0, 8(r1) +; LARGE32-NEXT: mtlr r0 +; LARGE32-NEXT: blr entry: %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @ThreadLocalVarInit) store float %x, ptr %0, align 4 @@ -112,6 +234,35 @@ define float @loadITLUninit() { ; LARGE64-NEXT: add r3, r13, r3 ; LARGE64-NEXT: lfs f1, 0(r3) ; LARGE64-NEXT: blr +; +; SMALL32-LABEL: loadITLUninit: +; SMALL32: # %bb.0: # %entry +; SMALL32-NEXT: mflr r0 +; SMALL32-NEXT: stwu r1, -32(r1) +; SMALL32-NEXT: lwz r4, L..C0(r2) # target-flags(ppc-tprel) @IThreadLocalVarUninit +; SMALL32-NEXT: bla .__get_tpointer[PR] +; SMALL32-NEXT: stw r0, 40(r1) +; SMALL32-NEXT: add r3, r3, r4 +; SMALL32-NEXT: lfs f1, 0(r3) +; SMALL32-NEXT: addi r1, r1, 32 +; SMALL32-NEXT: lwz r0, 8(r1) +; SMALL32-NEXT: mtlr r0 +; SMALL32-NEXT: blr +; +; LARGE32-LABEL: loadITLUninit: +; LARGE32: # %bb.0: # %entry +; LARGE32-NEXT: mflr r0 +; LARGE32-NEXT: stwu r1, -32(r1) +; LARGE32-NEXT: stw r0, 40(r1) +; LARGE32-NEXT: addis r3, L..C0@u(r2) +; LARGE32-NEXT: lwz r4, L..C0@l(r3) +; LARGE32-NEXT: bla .__get_tpointer[PR] +; LARGE32-NEXT: add r3, r3, r4 +; LARGE32-NEXT: lfs f1, 0(r3) +; LARGE32-NEXT: addi r1, r1, 32 +; LARGE32-NEXT: lwz r0, 8(r1) +; LARGE32-NEXT: mtlr r0 +; LARGE32-NEXT: blr entry: %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @IThreadLocalVarUninit) %1 = load float, ptr %0, align 4 @@ -140,6 +291,42 @@ define float @loadITLUninit2() { ; LARGE64-NEXT: lfs f0, 0(r3) ; LARGE64-NEXT: fadds f1, f0, f1 ; LARGE64-NEXT: blr +; +; SMALL32-LABEL: loadITLUninit2: +; SMALL32: # %bb.0: # %entry +; SMALL32-NEXT: mflr r0 +; SMALL32-NEXT: stwu r1, -32(r1) +; SMALL32-NEXT: lwz r4, L..C0(r2) # target-flags(ppc-tprel) @IThreadLocalVarUninit +; SMALL32-NEXT: lwz r5, L..C4(r2) # @VarInit +; SMALL32-NEXT: bla .__get_tpointer[PR] +; SMALL32-NEXT: stw r0, 40(r1) +; SMALL32-NEXT: add r3, r3, r4 +; SMALL32-NEXT: lfs f1, 0(r5) +; SMALL32-NEXT: lfs f0, 0(r3) +; SMALL32-NEXT: fadds f1, f0, f1 +; SMALL32-NEXT: addi r1, r1, 32 +; SMALL32-NEXT: lwz r0, 8(r1) +; SMALL32-NEXT: mtlr r0 +; SMALL32-NEXT: blr +; +; LARGE32-LABEL: loadITLUninit2: +; LARGE32: # %bb.0: # %entry +; LARGE32-NEXT: mflr r0 +; LARGE32-NEXT: stwu r1, -32(r1) +; LARGE32-NEXT: stw r0, 40(r1) +; LARGE32-NEXT: addis r3, L..C0@u(r2) +; LARGE32-NEXT: lwz r4, L..C0@l(r3) +; LARGE32-NEXT: bla .__get_tpointer[PR] +; LARGE32-NEXT: add r3, r3, r4 +; LARGE32-NEXT: lfs f0, 0(r3) +; LARGE32-NEXT: addis r3, L..C4@u(r2) +; LARGE32-NEXT: lwz r3, L..C4@l(r3) +; LARGE32-NEXT: lfs f1, 0(r3) +; LARGE32-NEXT: fadds f1, f0, f1 +; LARGE32-NEXT: addi r1, r1, 32 +; LARGE32-NEXT: lwz r0, 8(r1) +; LARGE32-NEXT: mtlr r0 +; LARGE32-NEXT: blr entry: %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @IThreadLocalVarUninit) %1 = load float, ptr %0, align 4 @@ -163,6 +350,35 @@ define float @loadITLInit() { ; LARGE64-NEXT: add r3, r13, r3 ; LARGE64-NEXT: lfs f1, 0(r3) ; LARGE64-NEXT: blr +; +; SMALL32-LABEL: loadITLInit: +; SMALL32: # %bb.0: # %entry +; SMALL32-NEXT: mflr r0 +; SMALL32-NEXT: stwu r1, -32(r1) +; SMALL32-NEXT: lwz r4, L..C1(r2) # target-flags(ppc-tprel) @IThreadLocalVarInit +; SMALL32-NEXT: bla .__get_tpointer[PR] +; SMALL32-NEXT: stw r0, 40(r1) +; SMALL32-NEXT: add r3, r3, r4 +; SMALL32-NEXT: lfs f1, 0(r3) +; SMALL32-NEXT: addi r1, r1, 32 +; SMALL32-NEXT: lwz r0, 8(r1) +; SMALL32-NEXT: mtlr r0 +; SMALL32-NEXT: blr +; +; LARGE32-LABEL: loadITLInit: +; LARGE32: # %bb.0: # %entry +; LARGE32-NEXT: mflr r0 +; LARGE32-NEXT: stwu r1, -32(r1) +; LARGE32-NEXT: stw r0, 40(r1) +; LARGE32-NEXT: addis r3, L..C1@u(r2) +; LARGE32-NEXT: lwz r4, L..C1@l(r3) +; LARGE32-NEXT: bla .__get_tpointer[PR] +; LARGE32-NEXT: add r3, r3, r4 +; LARGE32-NEXT: lfs f1, 0(r3) +; LARGE32-NEXT: addi r1, r1, 32 +; LARGE32-NEXT: lwz r0, 8(r1) +; LARGE32-NEXT: mtlr r0 +; LARGE32-NEXT: blr entry: %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @IThreadLocalVarInit) %1 = load float, ptr %0, align 4 @@ -191,6 +407,42 @@ define float @loadITLInit2() { ; LARGE64-NEXT: lfs f0, 0(r3) ; LARGE64-NEXT: fadds f1, f0, f1 ; LARGE64-NEXT: blr +; +; SMALL32-LABEL: loadITLInit2: +; SMALL32: # %bb.0: # %entry +; SMALL32-NEXT: mflr r0 +; SMALL32-NEXT: stwu r1, -32(r1) +; SMALL32-NEXT: lwz r4, L..C1(r2) # target-flags(ppc-tprel) @IThreadLocalVarInit +; SMALL32-NEXT: lwz r5, L..C4(r2) # @VarInit +; SMALL32-NEXT: bla .__get_tpointer[PR] +; SMALL32-NEXT: stw r0, 40(r1) +; SMALL32-NEXT: add r3, r3, r4 +; SMALL32-NEXT: lfs f1, 0(r5) +; SMALL32-NEXT: lfs f0, 0(r3) +; SMALL32-NEXT: fadds f1, f0, f1 +; SMALL32-NEXT: addi r1, r1, 32 +; SMALL32-NEXT: lwz r0, 8(r1) +; SMALL32-NEXT: mtlr r0 +; SMALL32-NEXT: blr +; +; LARGE32-LABEL: loadITLInit2: +; LARGE32: # %bb.0: # %entry +; LARGE32-NEXT: mflr r0 +; LARGE32-NEXT: stwu r1, -32(r1) +; LARGE32-NEXT: stw r0, 40(r1) +; LARGE32-NEXT: addis r3, L..C1@u(r2) +; LARGE32-NEXT: lwz r4, L..C1@l(r3) +; LARGE32-NEXT: bla .__get_tpointer[PR] +; LARGE32-NEXT: add r3, r3, r4 +; LARGE32-NEXT: lfs f0, 0(r3) +; LARGE32-NEXT: addis r3, L..C4@u(r2) +; LARGE32-NEXT: lwz r3, L..C4@l(r3) +; LARGE32-NEXT: lfs f1, 0(r3) +; LARGE32-NEXT: fadds f1, f0, f1 +; LARGE32-NEXT: addi r1, r1, 32 +; LARGE32-NEXT: lwz r0, 8(r1) +; LARGE32-NEXT: mtlr r0 +; LARGE32-NEXT: blr entry: %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @IThreadLocalVarInit) %1 = load float, ptr %0, align 4 @@ -214,6 +466,35 @@ define float @loadTLUninit() { ; LARGE64-NEXT: add r3, r13, r3 ; LARGE64-NEXT: lfs f1, 0(r3) ; LARGE64-NEXT: blr +; +; SMALL32-LABEL: loadTLUninit: +; SMALL32: # %bb.0: # %entry +; SMALL32-NEXT: mflr r0 +; SMALL32-NEXT: stwu r1, -32(r1) +; SMALL32-NEXT: lwz r4, L..C2(r2) # target-flags(ppc-tprel) @ThreadLocalVarUninit +; SMALL32-NEXT: bla .__get_tpointer[PR] +; SMALL32-NEXT: stw r0, 40(r1) +; SMALL32-NEXT: add r3, r3, r4 +; SMALL32-NEXT: lfs f1, 0(r3) +; SMALL32-NEXT: addi r1, r1, 32 +; SMALL32-NEXT: lwz r0, 8(r1) +; SMALL32-NEXT: mtlr r0 +; SMALL32-NEXT: blr +; +; LARGE32-LABEL: loadTLUninit: +; LARGE32: # %bb.0: # %entry +; LARGE32-NEXT: mflr r0 +; LARGE32-NEXT: stwu r1, -32(r1) +; LARGE32-NEXT: stw r0, 40(r1) +; LARGE32-NEXT: addis r3, L..C2@u(r2) +; LARGE32-NEXT: lwz r4, L..C2@l(r3) +; LARGE32-NEXT: bla .__get_tpointer[PR] +; LARGE32-NEXT: add r3, r3, r4 +; LARGE32-NEXT: lfs f1, 0(r3) +; LARGE32-NEXT: addi r1, r1, 32 +; LARGE32-NEXT: lwz r0, 8(r1) +; LARGE32-NEXT: mtlr r0 +; LARGE32-NEXT: blr entry: %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @ThreadLocalVarUninit) %1 = load float, ptr %0, align 4 @@ -242,6 +523,42 @@ define float @loadTLUninit2() { ; LARGE64-NEXT: lfs f0, 0(r3) ; LARGE64-NEXT: fadds f1, f0, f1 ; LARGE64-NEXT: blr +; +; SMALL32-LABEL: loadTLUninit2: +; SMALL32: # %bb.0: # %entry +; SMALL32-NEXT: mflr r0 +; SMALL32-NEXT: stwu r1, -32(r1) +; SMALL32-NEXT: lwz r4, L..C2(r2) # target-flags(ppc-tprel) @ThreadLocalVarUninit +; SMALL32-NEXT: lwz r5, L..C4(r2) # @VarInit +; SMALL32-NEXT: bla .__get_tpointer[PR] +; SMALL32-NEXT: stw r0, 40(r1) +; SMALL32-NEXT: add r3, r3, r4 +; SMALL32-NEXT: lfs f1, 0(r5) +; SMALL32-NEXT: lfs f0, 0(r3) +; SMALL32-NEXT: fadds f1, f0, f1 +; SMALL32-NEXT: addi r1, r1, 32 +; SMALL32-NEXT: lwz r0, 8(r1) +; SMALL32-NEXT: mtlr r0 +; SMALL32-NEXT: blr +; +; LARGE32-LABEL: loadTLUninit2: +; LARGE32: # %bb.0: # %entry +; LARGE32-NEXT: mflr r0 +; LARGE32-NEXT: stwu r1, -32(r1) +; LARGE32-NEXT: stw r0, 40(r1) +; LARGE32-NEXT: addis r3, L..C2@u(r2) +; LARGE32-NEXT: lwz r4, L..C2@l(r3) +; LARGE32-NEXT: bla .__get_tpointer[PR] +; LARGE32-NEXT: add r3, r3, r4 +; LARGE32-NEXT: lfs f0, 0(r3) +; LARGE32-NEXT: addis r3, L..C4@u(r2) +; LARGE32-NEXT: lwz r3, L..C4@l(r3) +; LARGE32-NEXT: lfs f1, 0(r3) +; LARGE32-NEXT: fadds f1, f0, f1 +; LARGE32-NEXT: addi r1, r1, 32 +; LARGE32-NEXT: lwz r0, 8(r1) +; LARGE32-NEXT: mtlr r0 +; LARGE32-NEXT: blr entry: %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @ThreadLocalVarUninit) %1 = load float, ptr %0, align 4 @@ -265,6 +582,35 @@ define float @loadTLInit() { ; LARGE64-NEXT: add r3, r13, r3 ; LARGE64-NEXT: lfs f1, 0(r3) ; LARGE64-NEXT: blr +; +; SMALL32-LABEL: loadTLInit: +; SMALL32: # %bb.0: # %entry +; SMALL32-NEXT: mflr r0 +; SMALL32-NEXT: stwu r1, -32(r1) +; SMALL32-NEXT: lwz r4, L..C3(r2) # target-flags(ppc-tprel) @ThreadLocalVarInit +; SMALL32-NEXT: bla .__get_tpointer[PR] +; SMALL32-NEXT: stw r0, 40(r1) +; SMALL32-NEXT: add r3, r3, r4 +; SMALL32-NEXT: lfs f1, 0(r3) +; SMALL32-NEXT: addi r1, r1, 32 +; SMALL32-NEXT: lwz r0, 8(r1) +; SMALL32-NEXT: mtlr r0 +; SMALL32-NEXT: blr +; +; LARGE32-LABEL: loadTLInit: +; LARGE32: # %bb.0: # %entry +; LARGE32-NEXT: mflr r0 +; LARGE32-NEXT: stwu r1, -32(r1) +; LARGE32-NEXT: stw r0, 40(r1) +; LARGE32-NEXT: addis r3, L..C3@u(r2) +; LARGE32-NEXT: lwz r4, L..C3@l(r3) +; LARGE32-NEXT: bla .__get_tpointer[PR] +; LARGE32-NEXT: add r3, r3, r4 +; LARGE32-NEXT: lfs f1, 0(r3) +; LARGE32-NEXT: addi r1, r1, 32 +; LARGE32-NEXT: lwz r0, 8(r1) +; LARGE32-NEXT: mtlr r0 +; LARGE32-NEXT: blr entry: %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @ThreadLocalVarInit) %1 = load float, ptr %0, align 4 @@ -293,6 +639,42 @@ define float @loadTLInit2() { ; LARGE64-NEXT: lfs f0, 0(r3) ; LARGE64-NEXT: fadds f1, f0, f1 ; LARGE64-NEXT: blr +; +; SMALL32-LABEL: loadTLInit2: +; SMALL32: # %bb.0: # %entry +; SMALL32-NEXT: mflr r0 +; SMALL32-NEXT: stwu r1, -32(r1) +; SMALL32-NEXT: lwz r4, L..C3(r2) # target-flags(ppc-tprel) @ThreadLocalVarInit +; SMALL32-NEXT: lwz r5, L..C4(r2) # @VarInit +; SMALL32-NEXT: bla .__get_tpointer[PR] +; SMALL32-NEXT: stw r0, 40(r1) +; SMALL32-NEXT: add r3, r3, r4 +; SMALL32-NEXT: lfs f1, 0(r5) +; SMALL32-NEXT: lfs f0, 0(r3) +; SMALL32-NEXT: fadds f1, f0, f1 +; SMALL32-NEXT: addi r1, r1, 32 +; SMALL32-NEXT: lwz r0, 8(r1) +; SMALL32-NEXT: mtlr r0 +; SMALL32-NEXT: blr +; +; LARGE32-LABEL: loadTLInit2: +; LARGE32: # %bb.0: # %entry +; LARGE32-NEXT: mflr r0 +; LARGE32-NEXT: stwu r1, -32(r1) +; LARGE32-NEXT: stw r0, 40(r1) +; LARGE32-NEXT: addis r3, L..C3@u(r2) +; LARGE32-NEXT: lwz r4, L..C3@l(r3) +; LARGE32-NEXT: bla .__get_tpointer[PR] +; LARGE32-NEXT: add r3, r3, r4 +; LARGE32-NEXT: lfs f0, 0(r3) +; LARGE32-NEXT: addis r3, L..C4@u(r2) +; LARGE32-NEXT: lwz r3, L..C4@l(r3) +; LARGE32-NEXT: lfs f1, 0(r3) +; LARGE32-NEXT: fadds f1, f0, f1 +; LARGE32-NEXT: addi r1, r1, 32 +; LARGE32-NEXT: lwz r0, 8(r1) +; LARGE32-NEXT: mtlr r0 +; LARGE32-NEXT: blr entry: %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @ThreadLocalVarInit) %1 = load float, ptr %0, align 4 @@ -326,3 +708,27 @@ entry: ; LARGE64-NEXT: .tc ThreadLocalVarInit[TE],ThreadLocalVarInit[TL]@le ; LARGE64-LABEL: L..C4: ; LARGE64-NEXT: .tc VarInit[TE],VarInit[RW] + +; SMALL32-LABEL: .toc +; SMALL32-LABEL: L..C0: +; SMALL32-NEXT: .tc IThreadLocalVarUninit[TC],IThreadLocalVarUninit[UL]@le +; SMALL32-LABEL: L..C1: +; SMALL32-NEXT: .tc IThreadLocalVarInit[TC],IThreadLocalVarInit[TL]@le +; SMALL32-LABEL: L..C2: +; SMALL32-NEXT: .tc ThreadLocalVarUninit[TC],ThreadLocalVarUninit[TL]@le +; SMALL32-LABEL: L..C3: +; SMALL32-NEXT: .tc ThreadLocalVarInit[TC],ThreadLocalVarInit[TL]@le +; SMALL32-LABEL: L..C4: +; SMALL32-NEXT: .tc VarInit[TC],VarInit[RW] + +; LARGE32-LABEL: .toc +; LARGE32-LABEL: L..C0: +; LARGE32-NEXT: .tc IThreadLocalVarUninit[TE],IThreadLocalVarUninit[UL]@le +; LARGE32-LABEL: L..C1: +; LARGE32-NEXT: .tc IThreadLocalVarInit[TE],IThreadLocalVarInit[TL]@le +; LARGE32-LABEL: L..C2: +; LARGE32-NEXT: .tc ThreadLocalVarUninit[TE],ThreadLocalVarUninit[TL]@le +; LARGE32-LABEL: L..C3: +; LARGE32-NEXT: .tc ThreadLocalVarInit[TE],ThreadLocalVarInit[TL]@le +; LARGE32-LABEL: L..C4: +; LARGE32-NEXT: .tc VarInit[TE],VarInit[RW] diff --git a/llvm/test/CodeGen/PowerPC/aix-tls-le-ldst-int.ll b/llvm/test/CodeGen/PowerPC/aix-tls-le-ldst-int.ll index 1df8484..01aa56a 100644 --- a/llvm/test/CodeGen/PowerPC/aix-tls-le-ldst-int.ll +++ b/llvm/test/CodeGen/PowerPC/aix-tls-le-ldst-int.ll @@ -5,6 +5,12 @@ ; RUN: llc -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \ ; RUN: -mtriple powerpc64-ibm-aix-xcoff --code-model=large < %s \ ; RUN: | FileCheck %s --check-prefix=LARGE64 +; RUN: llc -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \ +; RUN: -mtriple powerpc-ibm-aix-xcoff < %s | FileCheck %s \ +; RUN: --check-prefix=SMALL32 +; RUN: llc -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \ +; RUN: -mtriple powerpc-ibm-aix-xcoff --code-model=large < %s \ +; RUN: | FileCheck %s --check-prefix=LARGE32 @ThreadLocalVarInit = thread_local(localexec) global i32 1, align 4 @VarInit = global i32 87, align 4 @@ -28,6 +34,37 @@ define void @storeITLUninit(i32 noundef signext %x) { ; LARGE64-NEXT: add r4, r13, r4 ; LARGE64-NEXT: stw r3, 0(r4) ; LARGE64-NEXT: blr +; +; SMALL32-LABEL: storeITLUninit: +; SMALL32: # %bb.0: # %entry +; SMALL32-NEXT: mflr r0 +; SMALL32-NEXT: stwu r1, -32(r1) +; SMALL32-NEXT: lwz r5, L..C0(r2) # target-flags(ppc-tprel) @IThreadLocalVarUninit +; SMALL32-NEXT: mr r4, r3 +; SMALL32-NEXT: bla .__get_tpointer[PR] +; SMALL32-NEXT: stw r0, 40(r1) +; SMALL32-NEXT: add r3, r3, r5 +; SMALL32-NEXT: stw r4, 0(r3) +; SMALL32-NEXT: addi r1, r1, 32 +; SMALL32-NEXT: lwz r0, 8(r1) +; SMALL32-NEXT: mtlr r0 +; SMALL32-NEXT: blr +; +; LARGE32-LABEL: storeITLUninit: +; LARGE32: # %bb.0: # %entry +; LARGE32-NEXT: mflr r0 +; LARGE32-NEXT: stwu r1, -32(r1) +; LARGE32-NEXT: stw r0, 40(r1) +; LARGE32-NEXT: mr r4, r3 +; LARGE32-NEXT: addis r3, L..C0@u(r2) +; LARGE32-NEXT: lwz r5, L..C0@l(r3) +; LARGE32-NEXT: bla .__get_tpointer[PR] +; LARGE32-NEXT: add r3, r3, r5 +; LARGE32-NEXT: stw r4, 0(r3) +; LARGE32-NEXT: addi r1, r1, 32 +; LARGE32-NEXT: lwz r0, 8(r1) +; LARGE32-NEXT: mtlr r0 +; LARGE32-NEXT: blr entry: %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @IThreadLocalVarUninit) store i32 %x, ptr %0, align 4 @@ -49,6 +86,37 @@ define void @storeITLInit(i32 noundef signext %x) { ; LARGE64-NEXT: add r4, r13, r4 ; LARGE64-NEXT: stw r3, 0(r4) ; LARGE64-NEXT: blr +; +; SMALL32-LABEL: storeITLInit: +; SMALL32: # %bb.0: # %entry +; SMALL32-NEXT: mflr r0 +; SMALL32-NEXT: stwu r1, -32(r1) +; SMALL32-NEXT: lwz r5, L..C1(r2) # target-flags(ppc-tprel) @IThreadLocalVarInit +; SMALL32-NEXT: mr r4, r3 +; SMALL32-NEXT: bla .__get_tpointer[PR] +; SMALL32-NEXT: stw r0, 40(r1) +; SMALL32-NEXT: add r3, r3, r5 +; SMALL32-NEXT: stw r4, 0(r3) +; SMALL32-NEXT: addi r1, r1, 32 +; SMALL32-NEXT: lwz r0, 8(r1) +; SMALL32-NEXT: mtlr r0 +; SMALL32-NEXT: blr +; +; LARGE32-LABEL: storeITLInit: +; LARGE32: # %bb.0: # %entry +; LARGE32-NEXT: mflr r0 +; LARGE32-NEXT: stwu r1, -32(r1) +; LARGE32-NEXT: stw r0, 40(r1) +; LARGE32-NEXT: mr r4, r3 +; LARGE32-NEXT: addis r3, L..C1@u(r2) +; LARGE32-NEXT: lwz r5, L..C1@l(r3) +; LARGE32-NEXT: bla .__get_tpointer[PR] +; LARGE32-NEXT: add r3, r3, r5 +; LARGE32-NEXT: stw r4, 0(r3) +; LARGE32-NEXT: addi r1, r1, 32 +; LARGE32-NEXT: lwz r0, 8(r1) +; LARGE32-NEXT: mtlr r0 +; LARGE32-NEXT: blr entry: %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @IThreadLocalVarInit) store i32 %x, ptr %0, align 4 @@ -70,6 +138,37 @@ define void @storeTLUninit(i32 noundef signext %x) { ; LARGE64-NEXT: add r4, r13, r4 ; LARGE64-NEXT: stw r3, 0(r4) ; LARGE64-NEXT: blr +; +; SMALL32-LABEL: storeTLUninit: +; SMALL32: # %bb.0: # %entry +; SMALL32-NEXT: mflr r0 +; SMALL32-NEXT: stwu r1, -32(r1) +; SMALL32-NEXT: lwz r5, L..C2(r2) # target-flags(ppc-tprel) @ThreadLocalVarUninit +; SMALL32-NEXT: mr r4, r3 +; SMALL32-NEXT: bla .__get_tpointer[PR] +; SMALL32-NEXT: stw r0, 40(r1) +; SMALL32-NEXT: add r3, r3, r5 +; SMALL32-NEXT: stw r4, 0(r3) +; SMALL32-NEXT: addi r1, r1, 32 +; SMALL32-NEXT: lwz r0, 8(r1) +; SMALL32-NEXT: mtlr r0 +; SMALL32-NEXT: blr +; +; LARGE32-LABEL: storeTLUninit: +; LARGE32: # %bb.0: # %entry +; LARGE32-NEXT: mflr r0 +; LARGE32-NEXT: stwu r1, -32(r1) +; LARGE32-NEXT: stw r0, 40(r1) +; LARGE32-NEXT: mr r4, r3 +; LARGE32-NEXT: addis r3, L..C2@u(r2) +; LARGE32-NEXT: lwz r5, L..C2@l(r3) +; LARGE32-NEXT: bla .__get_tpointer[PR] +; LARGE32-NEXT: add r3, r3, r5 +; LARGE32-NEXT: stw r4, 0(r3) +; LARGE32-NEXT: addi r1, r1, 32 +; LARGE32-NEXT: lwz r0, 8(r1) +; LARGE32-NEXT: mtlr r0 +; LARGE32-NEXT: blr entry: %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @ThreadLocalVarUninit) store i32 %x, ptr %0, align 4 @@ -91,6 +190,37 @@ define void @storeTLInit(i32 noundef signext %x) { ; LARGE64-NEXT: add r4, r13, r4 ; LARGE64-NEXT: stw r3, 0(r4) ; LARGE64-NEXT: blr +; +; SMALL32-LABEL: storeTLInit: +; SMALL32: # %bb.0: # %entry +; SMALL32-NEXT: mflr r0 +; SMALL32-NEXT: stwu r1, -32(r1) +; SMALL32-NEXT: lwz r5, L..C3(r2) # target-flags(ppc-tprel) @ThreadLocalVarInit +; SMALL32-NEXT: mr r4, r3 +; SMALL32-NEXT: bla .__get_tpointer[PR] +; SMALL32-NEXT: stw r0, 40(r1) +; SMALL32-NEXT: add r3, r3, r5 +; SMALL32-NEXT: stw r4, 0(r3) +; SMALL32-NEXT: addi r1, r1, 32 +; SMALL32-NEXT: lwz r0, 8(r1) +; SMALL32-NEXT: mtlr r0 +; SMALL32-NEXT: blr +; +; LARGE32-LABEL: storeTLInit: +; LARGE32: # %bb.0: # %entry +; LARGE32-NEXT: mflr r0 +; LARGE32-NEXT: stwu r1, -32(r1) +; LARGE32-NEXT: stw r0, 40(r1) +; LARGE32-NEXT: mr r4, r3 +; LARGE32-NEXT: addis r3, L..C3@u(r2) +; LARGE32-NEXT: lwz r5, L..C3@l(r3) +; LARGE32-NEXT: bla .__get_tpointer[PR] +; LARGE32-NEXT: add r3, r3, r5 +; LARGE32-NEXT: stw r4, 0(r3) +; LARGE32-NEXT: addi r1, r1, 32 +; LARGE32-NEXT: lwz r0, 8(r1) +; LARGE32-NEXT: mtlr r0 +; LARGE32-NEXT: blr entry: %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @ThreadLocalVarInit) store i32 %x, ptr %0, align 4 @@ -112,6 +242,35 @@ define signext i32 @loadITLUninit() { ; LARGE64-NEXT: add r3, r13, r3 ; LARGE64-NEXT: lwa r3, 0(r3) ; LARGE64-NEXT: blr +; +; SMALL32-LABEL: loadITLUninit: +; SMALL32: # %bb.0: # %entry +; SMALL32-NEXT: mflr r0 +; SMALL32-NEXT: stwu r1, -32(r1) +; SMALL32-NEXT: lwz r4, L..C0(r2) # target-flags(ppc-tprel) @IThreadLocalVarUninit +; SMALL32-NEXT: bla .__get_tpointer[PR] +; SMALL32-NEXT: stw r0, 40(r1) +; SMALL32-NEXT: add r3, r3, r4 +; SMALL32-NEXT: lwz r3, 0(r3) +; SMALL32-NEXT: addi r1, r1, 32 +; SMALL32-NEXT: lwz r0, 8(r1) +; SMALL32-NEXT: mtlr r0 +; SMALL32-NEXT: blr +; +; LARGE32-LABEL: loadITLUninit: +; LARGE32: # %bb.0: # %entry +; LARGE32-NEXT: mflr r0 +; LARGE32-NEXT: stwu r1, -32(r1) +; LARGE32-NEXT: stw r0, 40(r1) +; LARGE32-NEXT: addis r3, L..C0@u(r2) +; LARGE32-NEXT: lwz r4, L..C0@l(r3) +; LARGE32-NEXT: bla .__get_tpointer[PR] +; LARGE32-NEXT: add r3, r3, r4 +; LARGE32-NEXT: lwz r3, 0(r3) +; LARGE32-NEXT: addi r1, r1, 32 +; LARGE32-NEXT: lwz r0, 8(r1) +; LARGE32-NEXT: mtlr r0 +; LARGE32-NEXT: blr entry: %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @IThreadLocalVarUninit) %1 = load i32, ptr %0, align 4 @@ -142,6 +301,42 @@ define signext i32 @loadITLUninit2() { ; LARGE64-NEXT: add r3, r4, r3 ; LARGE64-NEXT: extsw r3, r3 ; LARGE64-NEXT: blr +; +; SMALL32-LABEL: loadITLUninit2: +; SMALL32: # %bb.0: # %entry +; SMALL32-NEXT: mflr r0 +; SMALL32-NEXT: stwu r1, -32(r1) +; SMALL32-NEXT: lwz r4, L..C0(r2) # target-flags(ppc-tprel) @IThreadLocalVarUninit +; SMALL32-NEXT: lwz r5, L..C4(r2) # @VarInit +; SMALL32-NEXT: bla .__get_tpointer[PR] +; SMALL32-NEXT: stw r0, 40(r1) +; SMALL32-NEXT: add r3, r3, r4 +; SMALL32-NEXT: lwz r4, 0(r5) +; SMALL32-NEXT: lwz r3, 0(r3) +; SMALL32-NEXT: add r3, r4, r3 +; SMALL32-NEXT: addi r1, r1, 32 +; SMALL32-NEXT: lwz r0, 8(r1) +; SMALL32-NEXT: mtlr r0 +; SMALL32-NEXT: blr +; +; LARGE32-LABEL: loadITLUninit2: +; LARGE32: # %bb.0: # %entry +; LARGE32-NEXT: mflr r0 +; LARGE32-NEXT: stwu r1, -32(r1) +; LARGE32-NEXT: stw r0, 40(r1) +; LARGE32-NEXT: addis r3, L..C0@u(r2) +; LARGE32-NEXT: lwz r4, L..C0@l(r3) +; LARGE32-NEXT: bla .__get_tpointer[PR] +; LARGE32-NEXT: add r3, r3, r4 +; LARGE32-NEXT: lwz r3, 0(r3) +; LARGE32-NEXT: addis r4, L..C4@u(r2) +; LARGE32-NEXT: lwz r4, L..C4@l(r4) +; LARGE32-NEXT: lwz r4, 0(r4) +; LARGE32-NEXT: add r3, r4, r3 +; LARGE32-NEXT: addi r1, r1, 32 +; LARGE32-NEXT: lwz r0, 8(r1) +; LARGE32-NEXT: mtlr r0 +; LARGE32-NEXT: blr entry: %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @IThreadLocalVarUninit) %1 = load i32, ptr %0, align 4 @@ -165,6 +360,35 @@ define signext i32 @loadITLInit() { ; LARGE64-NEXT: add r3, r13, r3 ; LARGE64-NEXT: lwa r3, 0(r3) ; LARGE64-NEXT: blr +; +; SMALL32-LABEL: loadITLInit: +; SMALL32: # %bb.0: # %entry +; SMALL32-NEXT: mflr r0 +; SMALL32-NEXT: stwu r1, -32(r1) +; SMALL32-NEXT: lwz r4, L..C1(r2) # target-flags(ppc-tprel) @IThreadLocalVarInit +; SMALL32-NEXT: bla .__get_tpointer[PR] +; SMALL32-NEXT: stw r0, 40(r1) +; SMALL32-NEXT: add r3, r3, r4 +; SMALL32-NEXT: lwz r3, 0(r3) +; SMALL32-NEXT: addi r1, r1, 32 +; SMALL32-NEXT: lwz r0, 8(r1) +; SMALL32-NEXT: mtlr r0 +; SMALL32-NEXT: blr +; +; LARGE32-LABEL: loadITLInit: +; LARGE32: # %bb.0: # %entry +; LARGE32-NEXT: mflr r0 +; LARGE32-NEXT: stwu r1, -32(r1) +; LARGE32-NEXT: stw r0, 40(r1) +; LARGE32-NEXT: addis r3, L..C1@u(r2) +; LARGE32-NEXT: lwz r4, L..C1@l(r3) +; LARGE32-NEXT: bla .__get_tpointer[PR] +; LARGE32-NEXT: add r3, r3, r4 +; LARGE32-NEXT: lwz r3, 0(r3) +; LARGE32-NEXT: addi r1, r1, 32 +; LARGE32-NEXT: lwz r0, 8(r1) +; LARGE32-NEXT: mtlr r0 +; LARGE32-NEXT: blr entry: %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @IThreadLocalVarInit) %1 = load i32, ptr %0, align 4 @@ -195,6 +419,42 @@ define signext i32 @loadITLInit2() { ; LARGE64-NEXT: add r3, r4, r3 ; LARGE64-NEXT: extsw r3, r3 ; LARGE64-NEXT: blr +; +; SMALL32-LABEL: loadITLInit2: +; SMALL32: # %bb.0: # %entry +; SMALL32-NEXT: mflr r0 +; SMALL32-NEXT: stwu r1, -32(r1) +; SMALL32-NEXT: lwz r4, L..C1(r2) # target-flags(ppc-tprel) @IThreadLocalVarInit +; SMALL32-NEXT: lwz r5, L..C4(r2) # @VarInit +; SMALL32-NEXT: bla .__get_tpointer[PR] +; SMALL32-NEXT: stw r0, 40(r1) +; SMALL32-NEXT: add r3, r3, r4 +; SMALL32-NEXT: lwz r4, 0(r5) +; SMALL32-NEXT: lwz r3, 0(r3) +; SMALL32-NEXT: add r3, r4, r3 +; SMALL32-NEXT: addi r1, r1, 32 +; SMALL32-NEXT: lwz r0, 8(r1) +; SMALL32-NEXT: mtlr r0 +; SMALL32-NEXT: blr +; +; LARGE32-LABEL: loadITLInit2: +; LARGE32: # %bb.0: # %entry +; LARGE32-NEXT: mflr r0 +; LARGE32-NEXT: stwu r1, -32(r1) +; LARGE32-NEXT: stw r0, 40(r1) +; LARGE32-NEXT: addis r3, L..C1@u(r2) +; LARGE32-NEXT: lwz r4, L..C1@l(r3) +; LARGE32-NEXT: bla .__get_tpointer[PR] +; LARGE32-NEXT: add r3, r3, r4 +; LARGE32-NEXT: lwz r3, 0(r3) +; LARGE32-NEXT: addis r4, L..C4@u(r2) +; LARGE32-NEXT: lwz r4, L..C4@l(r4) +; LARGE32-NEXT: lwz r4, 0(r4) +; LARGE32-NEXT: add r3, r4, r3 +; LARGE32-NEXT: addi r1, r1, 32 +; LARGE32-NEXT: lwz r0, 8(r1) +; LARGE32-NEXT: mtlr r0 +; LARGE32-NEXT: blr entry: %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @IThreadLocalVarInit) %1 = load i32, ptr %0, align 4 @@ -218,6 +478,35 @@ define signext i32 @loadTLUninit() { ; LARGE64-NEXT: add r3, r13, r3 ; LARGE64-NEXT: lwa r3, 0(r3) ; LARGE64-NEXT: blr +; +; SMALL32-LABEL: loadTLUninit: +; SMALL32: # %bb.0: # %entry +; SMALL32-NEXT: mflr r0 +; SMALL32-NEXT: stwu r1, -32(r1) +; SMALL32-NEXT: lwz r4, L..C2(r2) # target-flags(ppc-tprel) @ThreadLocalVarUninit +; SMALL32-NEXT: bla .__get_tpointer[PR] +; SMALL32-NEXT: stw r0, 40(r1) +; SMALL32-NEXT: add r3, r3, r4 +; SMALL32-NEXT: lwz r3, 0(r3) +; SMALL32-NEXT: addi r1, r1, 32 +; SMALL32-NEXT: lwz r0, 8(r1) +; SMALL32-NEXT: mtlr r0 +; SMALL32-NEXT: blr +; +; LARGE32-LABEL: loadTLUninit: +; LARGE32: # %bb.0: # %entry +; LARGE32-NEXT: mflr r0 +; LARGE32-NEXT: stwu r1, -32(r1) +; LARGE32-NEXT: stw r0, 40(r1) +; LARGE32-NEXT: addis r3, L..C2@u(r2) +; LARGE32-NEXT: lwz r4, L..C2@l(r3) +; LARGE32-NEXT: bla .__get_tpointer[PR] +; LARGE32-NEXT: add r3, r3, r4 +; LARGE32-NEXT: lwz r3, 0(r3) +; LARGE32-NEXT: addi r1, r1, 32 +; LARGE32-NEXT: lwz r0, 8(r1) +; LARGE32-NEXT: mtlr r0 +; LARGE32-NEXT: blr entry: %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @ThreadLocalVarUninit) %1 = load i32, ptr %0, align 4 @@ -248,6 +537,42 @@ define signext i32 @loadTLUninit2() { ; LARGE64-NEXT: add r3, r4, r3 ; LARGE64-NEXT: extsw r3, r3 ; LARGE64-NEXT: blr +; +; SMALL32-LABEL: loadTLUninit2: +; SMALL32: # %bb.0: # %entry +; SMALL32-NEXT: mflr r0 +; SMALL32-NEXT: stwu r1, -32(r1) +; SMALL32-NEXT: lwz r4, L..C2(r2) # target-flags(ppc-tprel) @ThreadLocalVarUninit +; SMALL32-NEXT: lwz r5, L..C4(r2) # @VarInit +; SMALL32-NEXT: bla .__get_tpointer[PR] +; SMALL32-NEXT: stw r0, 40(r1) +; SMALL32-NEXT: add r3, r3, r4 +; SMALL32-NEXT: lwz r4, 0(r5) +; SMALL32-NEXT: lwz r3, 0(r3) +; SMALL32-NEXT: add r3, r4, r3 +; SMALL32-NEXT: addi r1, r1, 32 +; SMALL32-NEXT: lwz r0, 8(r1) +; SMALL32-NEXT: mtlr r0 +; SMALL32-NEXT: blr +; +; LARGE32-LABEL: loadTLUninit2: +; LARGE32: # %bb.0: # %entry +; LARGE32-NEXT: mflr r0 +; LARGE32-NEXT: stwu r1, -32(r1) +; LARGE32-NEXT: stw r0, 40(r1) +; LARGE32-NEXT: addis r3, L..C2@u(r2) +; LARGE32-NEXT: lwz r4, L..C2@l(r3) +; LARGE32-NEXT: bla .__get_tpointer[PR] +; LARGE32-NEXT: add r3, r3, r4 +; LARGE32-NEXT: lwz r3, 0(r3) +; LARGE32-NEXT: addis r4, L..C4@u(r2) +; LARGE32-NEXT: lwz r4, L..C4@l(r4) +; LARGE32-NEXT: lwz r4, 0(r4) +; LARGE32-NEXT: add r3, r4, r3 +; LARGE32-NEXT: addi r1, r1, 32 +; LARGE32-NEXT: lwz r0, 8(r1) +; LARGE32-NEXT: mtlr r0 +; LARGE32-NEXT: blr entry: %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @ThreadLocalVarUninit) %1 = load i32, ptr %0, align 4 @@ -271,6 +596,35 @@ define signext i32 @loadTLInit() { ; LARGE64-NEXT: add r3, r13, r3 ; LARGE64-NEXT: lwa r3, 0(r3) ; LARGE64-NEXT: blr +; +; SMALL32-LABEL: loadTLInit: +; SMALL32: # %bb.0: # %entry +; SMALL32-NEXT: mflr r0 +; SMALL32-NEXT: stwu r1, -32(r1) +; SMALL32-NEXT: lwz r4, L..C3(r2) # target-flags(ppc-tprel) @ThreadLocalVarInit +; SMALL32-NEXT: bla .__get_tpointer[PR] +; SMALL32-NEXT: stw r0, 40(r1) +; SMALL32-NEXT: add r3, r3, r4 +; SMALL32-NEXT: lwz r3, 0(r3) +; SMALL32-NEXT: addi r1, r1, 32 +; SMALL32-NEXT: lwz r0, 8(r1) +; SMALL32-NEXT: mtlr r0 +; SMALL32-NEXT: blr +; +; LARGE32-LABEL: loadTLInit: +; LARGE32: # %bb.0: # %entry +; LARGE32-NEXT: mflr r0 +; LARGE32-NEXT: stwu r1, -32(r1) +; LARGE32-NEXT: stw r0, 40(r1) +; LARGE32-NEXT: addis r3, L..C3@u(r2) +; LARGE32-NEXT: lwz r4, L..C3@l(r3) +; LARGE32-NEXT: bla .__get_tpointer[PR] +; LARGE32-NEXT: add r3, r3, r4 +; LARGE32-NEXT: lwz r3, 0(r3) +; LARGE32-NEXT: addi r1, r1, 32 +; LARGE32-NEXT: lwz r0, 8(r1) +; LARGE32-NEXT: mtlr r0 +; LARGE32-NEXT: blr entry: %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @ThreadLocalVarInit) %1 = load i32, ptr %0, align 4 @@ -301,6 +655,42 @@ define signext i32 @loadTLInit2() { ; LARGE64-NEXT: add r3, r4, r3 ; LARGE64-NEXT: extsw r3, r3 ; LARGE64-NEXT: blr +; +; SMALL32-LABEL: loadTLInit2: +; SMALL32: # %bb.0: # %entry +; SMALL32-NEXT: mflr r0 +; SMALL32-NEXT: stwu r1, -32(r1) +; SMALL32-NEXT: lwz r4, L..C3(r2) # target-flags(ppc-tprel) @ThreadLocalVarInit +; SMALL32-NEXT: lwz r5, L..C4(r2) # @VarInit +; SMALL32-NEXT: bla .__get_tpointer[PR] +; SMALL32-NEXT: stw r0, 40(r1) +; SMALL32-NEXT: add r3, r3, r4 +; SMALL32-NEXT: lwz r4, 0(r5) +; SMALL32-NEXT: lwz r3, 0(r3) +; SMALL32-NEXT: add r3, r4, r3 +; SMALL32-NEXT: addi r1, r1, 32 +; SMALL32-NEXT: lwz r0, 8(r1) +; SMALL32-NEXT: mtlr r0 +; SMALL32-NEXT: blr +; +; LARGE32-LABEL: loadTLInit2: +; LARGE32: # %bb.0: # %entry +; LARGE32-NEXT: mflr r0 +; LARGE32-NEXT: stwu r1, -32(r1) +; LARGE32-NEXT: stw r0, 40(r1) +; LARGE32-NEXT: addis r3, L..C3@u(r2) +; LARGE32-NEXT: lwz r4, L..C3@l(r3) +; LARGE32-NEXT: bla .__get_tpointer[PR] +; LARGE32-NEXT: add r3, r3, r4 +; LARGE32-NEXT: lwz r3, 0(r3) +; LARGE32-NEXT: addis r4, L..C4@u(r2) +; LARGE32-NEXT: lwz r4, L..C4@l(r4) +; LARGE32-NEXT: lwz r4, 0(r4) +; LARGE32-NEXT: add r3, r4, r3 +; LARGE32-NEXT: addi r1, r1, 32 +; LARGE32-NEXT: lwz r0, 8(r1) +; LARGE32-NEXT: mtlr r0 +; LARGE32-NEXT: blr entry: %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @ThreadLocalVarInit) %1 = load i32, ptr %0, align 4 @@ -334,3 +724,27 @@ entry: ; LARGE64-NEXT: .tc ThreadLocalVarInit[TE],ThreadLocalVarInit[TL]@le ; LARGE64-LABEL: L..C4: ; LARGE64-NEXT: .tc VarInit[TE],VarInit[RW] + +; SMALL32-LABEL: .toc +; SMALL32-LABEL: L..C0: +; SMALL32-NEXT: .tc IThreadLocalVarUninit[TC],IThreadLocalVarUninit[UL]@le +; SMALL32-LABEL: L..C1: +; SMALL32-NEXT: .tc IThreadLocalVarInit[TC],IThreadLocalVarInit[TL]@le +; SMALL32-LABEL: L..C2: +; SMALL32-NEXT: .tc ThreadLocalVarUninit[TC],ThreadLocalVarUninit[TL]@le +; SMALL32-LABEL: L..C3: +; SMALL32-NEXT: .tc ThreadLocalVarInit[TC],ThreadLocalVarInit[TL]@le +; SMALL32-LABEL: L..C4: +; SMALL32-NEXT: .tc VarInit[TC],VarInit[RW] + +; LARGE32-LABEL: .toc +; LARGE32-LABEL: L..C0: +; LARGE32-NEXT: .tc IThreadLocalVarUninit[TE],IThreadLocalVarUninit[UL]@le +; LARGE32-LABEL: L..C1: +; LARGE32-NEXT: .tc IThreadLocalVarInit[TE],IThreadLocalVarInit[TL]@le +; LARGE32-LABEL: L..C2: +; LARGE32-NEXT: .tc ThreadLocalVarUninit[TE],ThreadLocalVarUninit[TL]@le +; LARGE32-LABEL: L..C3: +; LARGE32-NEXT: .tc ThreadLocalVarInit[TE],ThreadLocalVarInit[TL]@le +; LARGE32-LABEL: L..C4: +; LARGE32-NEXT: .tc VarInit[TE],VarInit[RW] diff --git a/llvm/test/CodeGen/PowerPC/aix-tls-le-ldst-longlong.ll b/llvm/test/CodeGen/PowerPC/aix-tls-le-ldst-longlong.ll index 8ef6809..4661b57 100644 --- a/llvm/test/CodeGen/PowerPC/aix-tls-le-ldst-longlong.ll +++ b/llvm/test/CodeGen/PowerPC/aix-tls-le-ldst-longlong.ll @@ -5,6 +5,12 @@ ; RUN: llc -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \ ; RUN: -mtriple powerpc64-ibm-aix-xcoff --code-model=large < %s \ ; RUN: | FileCheck %s --check-prefix=LARGE64 +; RUN: llc -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \ +; RUN: -mtriple powerpc-ibm-aix-xcoff < %s | FileCheck %s \ +; RUN: --check-prefix=SMALL32 +; RUN: llc -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \ +; RUN: -mtriple powerpc-ibm-aix-xcoff --code-model=large < %s \ +; RUN: | FileCheck %s --check-prefix=LARGE32 @ThreadLocalVarInit = thread_local(localexec) global i64 1, align 8 @VarInit = global i64 87, align 8 @@ -28,6 +34,39 @@ define void @storeITLUninit(i64 noundef %x) { ; LARGE64-NEXT: add r4, r13, r4 ; LARGE64-NEXT: std r3, 0(r4) ; LARGE64-NEXT: blr +; +; SMALL32-LABEL: storeITLUninit: +; SMALL32: # %bb.0: # %entry +; SMALL32-NEXT: mflr r0 +; SMALL32-NEXT: stwu r1, -32(r1) +; SMALL32-NEXT: lwz r6, L..C0(r2) # target-flags(ppc-tprel) @IThreadLocalVarUninit +; SMALL32-NEXT: mr r5, r3 +; SMALL32-NEXT: bla .__get_tpointer[PR] +; SMALL32-NEXT: stw r0, 40(r1) +; SMALL32-NEXT: add r3, r3, r6 +; SMALL32-NEXT: stw r4, 4(r3) +; SMALL32-NEXT: stw r5, 0(r3) +; SMALL32-NEXT: addi r1, r1, 32 +; SMALL32-NEXT: lwz r0, 8(r1) +; SMALL32-NEXT: mtlr r0 +; SMALL32-NEXT: blr +; +; LARGE32-LABEL: storeITLUninit: +; LARGE32: # %bb.0: # %entry +; LARGE32-NEXT: mflr r0 +; LARGE32-NEXT: stwu r1, -32(r1) +; LARGE32-NEXT: stw r0, 40(r1) +; LARGE32-NEXT: mr r5, r3 +; LARGE32-NEXT: addis r3, L..C0@u(r2) +; LARGE32-NEXT: lwz r6, L..C0@l(r3) +; LARGE32-NEXT: bla .__get_tpointer[PR] +; LARGE32-NEXT: add r3, r3, r6 +; LARGE32-NEXT: stw r4, 4(r3) +; LARGE32-NEXT: stw r5, 0(r3) +; LARGE32-NEXT: addi r1, r1, 32 +; LARGE32-NEXT: lwz r0, 8(r1) +; LARGE32-NEXT: mtlr r0 +; LARGE32-NEXT: blr entry: %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @IThreadLocalVarUninit) store i64 %x, ptr %0, align 8 @@ -49,6 +88,39 @@ define void @storeITLInit(i64 noundef %x) { ; LARGE64-NEXT: add r4, r13, r4 ; LARGE64-NEXT: std r3, 0(r4) ; LARGE64-NEXT: blr +; +; SMALL32-LABEL: storeITLInit: +; SMALL32: # %bb.0: # %entry +; SMALL32-NEXT: mflr r0 +; SMALL32-NEXT: stwu r1, -32(r1) +; SMALL32-NEXT: lwz r6, L..C1(r2) # target-flags(ppc-tprel) @IThreadLocalVarInit +; SMALL32-NEXT: mr r5, r3 +; SMALL32-NEXT: bla .__get_tpointer[PR] +; SMALL32-NEXT: stw r0, 40(r1) +; SMALL32-NEXT: add r3, r3, r6 +; SMALL32-NEXT: stw r4, 4(r3) +; SMALL32-NEXT: stw r5, 0(r3) +; SMALL32-NEXT: addi r1, r1, 32 +; SMALL32-NEXT: lwz r0, 8(r1) +; SMALL32-NEXT: mtlr r0 +; SMALL32-NEXT: blr +; +; LARGE32-LABEL: storeITLInit: +; LARGE32: # %bb.0: # %entry +; LARGE32-NEXT: mflr r0 +; LARGE32-NEXT: stwu r1, -32(r1) +; LARGE32-NEXT: stw r0, 40(r1) +; LARGE32-NEXT: mr r5, r3 +; LARGE32-NEXT: addis r3, L..C1@u(r2) +; LARGE32-NEXT: lwz r6, L..C1@l(r3) +; LARGE32-NEXT: bla .__get_tpointer[PR] +; LARGE32-NEXT: add r3, r3, r6 +; LARGE32-NEXT: stw r4, 4(r3) +; LARGE32-NEXT: stw r5, 0(r3) +; LARGE32-NEXT: addi r1, r1, 32 +; LARGE32-NEXT: lwz r0, 8(r1) +; LARGE32-NEXT: mtlr r0 +; LARGE32-NEXT: blr entry: %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @IThreadLocalVarInit) store i64 %x, ptr %0, align 8 @@ -70,6 +142,39 @@ define void @storeTLUninit(i64 noundef %x) { ; LARGE64-NEXT: add r4, r13, r4 ; LARGE64-NEXT: std r3, 0(r4) ; LARGE64-NEXT: blr +; +; SMALL32-LABEL: storeTLUninit: +; SMALL32: # %bb.0: # %entry +; SMALL32-NEXT: mflr r0 +; SMALL32-NEXT: stwu r1, -32(r1) +; SMALL32-NEXT: lwz r6, L..C2(r2) # target-flags(ppc-tprel) @ThreadLocalVarUninit +; SMALL32-NEXT: mr r5, r3 +; SMALL32-NEXT: bla .__get_tpointer[PR] +; SMALL32-NEXT: stw r0, 40(r1) +; SMALL32-NEXT: add r3, r3, r6 +; SMALL32-NEXT: stw r4, 4(r3) +; SMALL32-NEXT: stw r5, 0(r3) +; SMALL32-NEXT: addi r1, r1, 32 +; SMALL32-NEXT: lwz r0, 8(r1) +; SMALL32-NEXT: mtlr r0 +; SMALL32-NEXT: blr +; +; LARGE32-LABEL: storeTLUninit: +; LARGE32: # %bb.0: # %entry +; LARGE32-NEXT: mflr r0 +; LARGE32-NEXT: stwu r1, -32(r1) +; LARGE32-NEXT: stw r0, 40(r1) +; LARGE32-NEXT: mr r5, r3 +; LARGE32-NEXT: addis r3, L..C2@u(r2) +; LARGE32-NEXT: lwz r6, L..C2@l(r3) +; LARGE32-NEXT: bla .__get_tpointer[PR] +; LARGE32-NEXT: add r3, r3, r6 +; LARGE32-NEXT: stw r4, 4(r3) +; LARGE32-NEXT: stw r5, 0(r3) +; LARGE32-NEXT: addi r1, r1, 32 +; LARGE32-NEXT: lwz r0, 8(r1) +; LARGE32-NEXT: mtlr r0 +; LARGE32-NEXT: blr entry: %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @ThreadLocalVarUninit) store i64 %x, ptr %0, align 8 @@ -91,6 +196,39 @@ define void @storeTLInit(i64 noundef %x) { ; LARGE64-NEXT: add r4, r13, r4 ; LARGE64-NEXT: std r3, 0(r4) ; LARGE64-NEXT: blr +; +; SMALL32-LABEL: storeTLInit: +; SMALL32: # %bb.0: # %entry +; SMALL32-NEXT: mflr r0 +; SMALL32-NEXT: stwu r1, -32(r1) +; SMALL32-NEXT: lwz r6, L..C3(r2) # target-flags(ppc-tprel) @ThreadLocalVarInit +; SMALL32-NEXT: mr r5, r3 +; SMALL32-NEXT: bla .__get_tpointer[PR] +; SMALL32-NEXT: stw r0, 40(r1) +; SMALL32-NEXT: add r3, r3, r6 +; SMALL32-NEXT: stw r4, 4(r3) +; SMALL32-NEXT: stw r5, 0(r3) +; SMALL32-NEXT: addi r1, r1, 32 +; SMALL32-NEXT: lwz r0, 8(r1) +; SMALL32-NEXT: mtlr r0 +; SMALL32-NEXT: blr +; +; LARGE32-LABEL: storeTLInit: +; LARGE32: # %bb.0: # %entry +; LARGE32-NEXT: mflr r0 +; LARGE32-NEXT: stwu r1, -32(r1) +; LARGE32-NEXT: stw r0, 40(r1) +; LARGE32-NEXT: mr r5, r3 +; LARGE32-NEXT: addis r3, L..C3@u(r2) +; LARGE32-NEXT: lwz r6, L..C3@l(r3) +; LARGE32-NEXT: bla .__get_tpointer[PR] +; LARGE32-NEXT: add r3, r3, r6 +; LARGE32-NEXT: stw r4, 4(r3) +; LARGE32-NEXT: stw r5, 0(r3) +; LARGE32-NEXT: addi r1, r1, 32 +; LARGE32-NEXT: lwz r0, 8(r1) +; LARGE32-NEXT: mtlr r0 +; LARGE32-NEXT: blr entry: %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @ThreadLocalVarInit) store i64 %x, ptr %0, align 8 @@ -112,6 +250,37 @@ define i64 @loadITLUninit() { ; LARGE64-NEXT: add r3, r13, r3 ; LARGE64-NEXT: ld r3, 0(r3) ; LARGE64-NEXT: blr +; +; SMALL32-LABEL: loadITLUninit: +; SMALL32: # %bb.0: # %entry +; SMALL32-NEXT: mflr r0 +; SMALL32-NEXT: stwu r1, -32(r1) +; SMALL32-NEXT: lwz r4, L..C0(r2) # target-flags(ppc-tprel) @IThreadLocalVarUninit +; SMALL32-NEXT: bla .__get_tpointer[PR] +; SMALL32-NEXT: stw r0, 40(r1) +; SMALL32-NEXT: add r4, r3, r4 +; SMALL32-NEXT: lwz r3, 0(r4) +; SMALL32-NEXT: lwz r4, 4(r4) +; SMALL32-NEXT: addi r1, r1, 32 +; SMALL32-NEXT: lwz r0, 8(r1) +; SMALL32-NEXT: mtlr r0 +; SMALL32-NEXT: blr +; +; LARGE32-LABEL: loadITLUninit: +; LARGE32: # %bb.0: # %entry +; LARGE32-NEXT: mflr r0 +; LARGE32-NEXT: stwu r1, -32(r1) +; LARGE32-NEXT: stw r0, 40(r1) +; LARGE32-NEXT: addis r3, L..C0@u(r2) +; LARGE32-NEXT: lwz r4, L..C0@l(r3) +; LARGE32-NEXT: bla .__get_tpointer[PR] +; LARGE32-NEXT: add r4, r3, r4 +; LARGE32-NEXT: lwz r3, 0(r4) +; LARGE32-NEXT: lwz r4, 4(r4) +; LARGE32-NEXT: addi r1, r1, 32 +; LARGE32-NEXT: lwz r0, 8(r1) +; LARGE32-NEXT: mtlr r0 +; LARGE32-NEXT: blr entry: %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @IThreadLocalVarUninit) %1 = load i64, ptr %0, align 8 @@ -140,6 +309,48 @@ define i64 @loadITLUninit2() { ; LARGE64-NEXT: ld r3, 0(r3) ; LARGE64-NEXT: add r3, r4, r3 ; LARGE64-NEXT: blr +; +; SMALL32-LABEL: loadITLUninit2: +; SMALL32: # %bb.0: # %entry +; SMALL32-NEXT: mflr r0 +; SMALL32-NEXT: stwu r1, -32(r1) +; SMALL32-NEXT: lwz r4, L..C0(r2) # target-flags(ppc-tprel) @IThreadLocalVarUninit +; SMALL32-NEXT: lwz r5, L..C4(r2) # @VarInit +; SMALL32-NEXT: bla .__get_tpointer[PR] +; SMALL32-NEXT: stw r0, 40(r1) +; SMALL32-NEXT: add r3, r3, r4 +; SMALL32-NEXT: lwz r6, 4(r5) +; SMALL32-NEXT: lwz r5, 0(r5) +; SMALL32-NEXT: lwz r4, 4(r3) +; SMALL32-NEXT: lwz r3, 0(r3) +; SMALL32-NEXT: addc r4, r6, r4 +; SMALL32-NEXT: adde r3, r5, r3 +; SMALL32-NEXT: addi r1, r1, 32 +; SMALL32-NEXT: lwz r0, 8(r1) +; SMALL32-NEXT: mtlr r0 +; SMALL32-NEXT: blr +; +; LARGE32-LABEL: loadITLUninit2: +; LARGE32: # %bb.0: # %entry +; LARGE32-NEXT: mflr r0 +; LARGE32-NEXT: stwu r1, -32(r1) +; LARGE32-NEXT: stw r0, 40(r1) +; LARGE32-NEXT: addis r3, L..C0@u(r2) +; LARGE32-NEXT: lwz r4, L..C0@l(r3) +; LARGE32-NEXT: bla .__get_tpointer[PR] +; LARGE32-NEXT: add r3, r3, r4 +; LARGE32-NEXT: lwz r4, 4(r3) +; LARGE32-NEXT: lwz r3, 0(r3) +; LARGE32-NEXT: addis r5, L..C4@u(r2) +; LARGE32-NEXT: lwz r5, L..C4@l(r5) +; LARGE32-NEXT: lwz r6, 4(r5) +; LARGE32-NEXT: lwz r5, 0(r5) +; LARGE32-NEXT: addc r4, r6, r4 +; LARGE32-NEXT: adde r3, r5, r3 +; LARGE32-NEXT: addi r1, r1, 32 +; LARGE32-NEXT: lwz r0, 8(r1) +; LARGE32-NEXT: mtlr r0 +; LARGE32-NEXT: blr entry: %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @IThreadLocalVarUninit) %1 = load i64, ptr %0, align 8 @@ -163,6 +374,37 @@ define i64 @loadITLInit() { ; LARGE64-NEXT: add r3, r13, r3 ; LARGE64-NEXT: ld r3, 0(r3) ; LARGE64-NEXT: blr +; +; SMALL32-LABEL: loadITLInit: +; SMALL32: # %bb.0: # %entry +; SMALL32-NEXT: mflr r0 +; SMALL32-NEXT: stwu r1, -32(r1) +; SMALL32-NEXT: lwz r4, L..C1(r2) # target-flags(ppc-tprel) @IThreadLocalVarInit +; SMALL32-NEXT: bla .__get_tpointer[PR] +; SMALL32-NEXT: stw r0, 40(r1) +; SMALL32-NEXT: add r4, r3, r4 +; SMALL32-NEXT: lwz r3, 0(r4) +; SMALL32-NEXT: lwz r4, 4(r4) +; SMALL32-NEXT: addi r1, r1, 32 +; SMALL32-NEXT: lwz r0, 8(r1) +; SMALL32-NEXT: mtlr r0 +; SMALL32-NEXT: blr +; +; LARGE32-LABEL: loadITLInit: +; LARGE32: # %bb.0: # %entry +; LARGE32-NEXT: mflr r0 +; LARGE32-NEXT: stwu r1, -32(r1) +; LARGE32-NEXT: stw r0, 40(r1) +; LARGE32-NEXT: addis r3, L..C1@u(r2) +; LARGE32-NEXT: lwz r4, L..C1@l(r3) +; LARGE32-NEXT: bla .__get_tpointer[PR] +; LARGE32-NEXT: add r4, r3, r4 +; LARGE32-NEXT: lwz r3, 0(r4) +; LARGE32-NEXT: lwz r4, 4(r4) +; LARGE32-NEXT: addi r1, r1, 32 +; LARGE32-NEXT: lwz r0, 8(r1) +; LARGE32-NEXT: mtlr r0 +; LARGE32-NEXT: blr entry: %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @IThreadLocalVarInit) %1 = load i64, ptr %0, align 8 @@ -191,6 +433,48 @@ define i64 @loadITLInit2() { ; LARGE64-NEXT: ld r3, 0(r3) ; LARGE64-NEXT: add r3, r4, r3 ; LARGE64-NEXT: blr +; +; SMALL32-LABEL: loadITLInit2: +; SMALL32: # %bb.0: # %entry +; SMALL32-NEXT: mflr r0 +; SMALL32-NEXT: stwu r1, -32(r1) +; SMALL32-NEXT: lwz r4, L..C1(r2) # target-flags(ppc-tprel) @IThreadLocalVarInit +; SMALL32-NEXT: lwz r5, L..C4(r2) # @VarInit +; SMALL32-NEXT: bla .__get_tpointer[PR] +; SMALL32-NEXT: stw r0, 40(r1) +; SMALL32-NEXT: add r3, r3, r4 +; SMALL32-NEXT: lwz r6, 4(r5) +; SMALL32-NEXT: lwz r5, 0(r5) +; SMALL32-NEXT: lwz r4, 4(r3) +; SMALL32-NEXT: lwz r3, 0(r3) +; SMALL32-NEXT: addc r4, r6, r4 +; SMALL32-NEXT: adde r3, r5, r3 +; SMALL32-NEXT: addi r1, r1, 32 +; SMALL32-NEXT: lwz r0, 8(r1) +; SMALL32-NEXT: mtlr r0 +; SMALL32-NEXT: blr +; +; LARGE32-LABEL: loadITLInit2: +; LARGE32: # %bb.0: # %entry +; LARGE32-NEXT: mflr r0 +; LARGE32-NEXT: stwu r1, -32(r1) +; LARGE32-NEXT: stw r0, 40(r1) +; LARGE32-NEXT: addis r3, L..C1@u(r2) +; LARGE32-NEXT: lwz r4, L..C1@l(r3) +; LARGE32-NEXT: bla .__get_tpointer[PR] +; LARGE32-NEXT: add r3, r3, r4 +; LARGE32-NEXT: lwz r4, 4(r3) +; LARGE32-NEXT: lwz r3, 0(r3) +; LARGE32-NEXT: addis r5, L..C4@u(r2) +; LARGE32-NEXT: lwz r5, L..C4@l(r5) +; LARGE32-NEXT: lwz r6, 4(r5) +; LARGE32-NEXT: lwz r5, 0(r5) +; LARGE32-NEXT: addc r4, r6, r4 +; LARGE32-NEXT: adde r3, r5, r3 +; LARGE32-NEXT: addi r1, r1, 32 +; LARGE32-NEXT: lwz r0, 8(r1) +; LARGE32-NEXT: mtlr r0 +; LARGE32-NEXT: blr entry: %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @IThreadLocalVarInit) %1 = load i64, ptr %0, align 8 @@ -214,6 +498,37 @@ define i64 @loadTLUninit() { ; LARGE64-NEXT: add r3, r13, r3 ; LARGE64-NEXT: ld r3, 0(r3) ; LARGE64-NEXT: blr +; +; SMALL32-LABEL: loadTLUninit: +; SMALL32: # %bb.0: # %entry +; SMALL32-NEXT: mflr r0 +; SMALL32-NEXT: stwu r1, -32(r1) +; SMALL32-NEXT: lwz r4, L..C2(r2) # target-flags(ppc-tprel) @ThreadLocalVarUninit +; SMALL32-NEXT: bla .__get_tpointer[PR] +; SMALL32-NEXT: stw r0, 40(r1) +; SMALL32-NEXT: add r4, r3, r4 +; SMALL32-NEXT: lwz r3, 0(r4) +; SMALL32-NEXT: lwz r4, 4(r4) +; SMALL32-NEXT: addi r1, r1, 32 +; SMALL32-NEXT: lwz r0, 8(r1) +; SMALL32-NEXT: mtlr r0 +; SMALL32-NEXT: blr +; +; LARGE32-LABEL: loadTLUninit: +; LARGE32: # %bb.0: # %entry +; LARGE32-NEXT: mflr r0 +; LARGE32-NEXT: stwu r1, -32(r1) +; LARGE32-NEXT: stw r0, 40(r1) +; LARGE32-NEXT: addis r3, L..C2@u(r2) +; LARGE32-NEXT: lwz r4, L..C2@l(r3) +; LARGE32-NEXT: bla .__get_tpointer[PR] +; LARGE32-NEXT: add r4, r3, r4 +; LARGE32-NEXT: lwz r3, 0(r4) +; LARGE32-NEXT: lwz r4, 4(r4) +; LARGE32-NEXT: addi r1, r1, 32 +; LARGE32-NEXT: lwz r0, 8(r1) +; LARGE32-NEXT: mtlr r0 +; LARGE32-NEXT: blr entry: %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @ThreadLocalVarUninit) %1 = load i64, ptr %0, align 8 @@ -242,6 +557,48 @@ define i64 @loadTLUninit2() { ; LARGE64-NEXT: ld r3, 0(r3) ; LARGE64-NEXT: add r3, r4, r3 ; LARGE64-NEXT: blr +; +; SMALL32-LABEL: loadTLUninit2: +; SMALL32: # %bb.0: # %entry +; SMALL32-NEXT: mflr r0 +; SMALL32-NEXT: stwu r1, -32(r1) +; SMALL32-NEXT: lwz r4, L..C2(r2) # target-flags(ppc-tprel) @ThreadLocalVarUninit +; SMALL32-NEXT: lwz r5, L..C4(r2) # @VarInit +; SMALL32-NEXT: bla .__get_tpointer[PR] +; SMALL32-NEXT: stw r0, 40(r1) +; SMALL32-NEXT: add r3, r3, r4 +; SMALL32-NEXT: lwz r6, 4(r5) +; SMALL32-NEXT: lwz r5, 0(r5) +; SMALL32-NEXT: lwz r4, 4(r3) +; SMALL32-NEXT: lwz r3, 0(r3) +; SMALL32-NEXT: addc r4, r6, r4 +; SMALL32-NEXT: adde r3, r5, r3 +; SMALL32-NEXT: addi r1, r1, 32 +; SMALL32-NEXT: lwz r0, 8(r1) +; SMALL32-NEXT: mtlr r0 +; SMALL32-NEXT: blr +; +; LARGE32-LABEL: loadTLUninit2: +; LARGE32: # %bb.0: # %entry +; LARGE32-NEXT: mflr r0 +; LARGE32-NEXT: stwu r1, -32(r1) +; LARGE32-NEXT: stw r0, 40(r1) +; LARGE32-NEXT: addis r3, L..C2@u(r2) +; LARGE32-NEXT: lwz r4, L..C2@l(r3) +; LARGE32-NEXT: bla .__get_tpointer[PR] +; LARGE32-NEXT: add r3, r3, r4 +; LARGE32-NEXT: lwz r4, 4(r3) +; LARGE32-NEXT: lwz r3, 0(r3) +; LARGE32-NEXT: addis r5, L..C4@u(r2) +; LARGE32-NEXT: lwz r5, L..C4@l(r5) +; LARGE32-NEXT: lwz r6, 4(r5) +; LARGE32-NEXT: lwz r5, 0(r5) +; LARGE32-NEXT: addc r4, r6, r4 +; LARGE32-NEXT: adde r3, r5, r3 +; LARGE32-NEXT: addi r1, r1, 32 +; LARGE32-NEXT: lwz r0, 8(r1) +; LARGE32-NEXT: mtlr r0 +; LARGE32-NEXT: blr entry: %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @ThreadLocalVarUninit) %1 = load i64, ptr %0, align 8 @@ -265,6 +622,37 @@ define i64 @loadTLInit() { ; LARGE64-NEXT: add r3, r13, r3 ; LARGE64-NEXT: ld r3, 0(r3) ; LARGE64-NEXT: blr +; +; SMALL32-LABEL: loadTLInit: +; SMALL32: # %bb.0: # %entry +; SMALL32-NEXT: mflr r0 +; SMALL32-NEXT: stwu r1, -32(r1) +; SMALL32-NEXT: lwz r4, L..C3(r2) # target-flags(ppc-tprel) @ThreadLocalVarInit +; SMALL32-NEXT: bla .__get_tpointer[PR] +; SMALL32-NEXT: stw r0, 40(r1) +; SMALL32-NEXT: add r4, r3, r4 +; SMALL32-NEXT: lwz r3, 0(r4) +; SMALL32-NEXT: lwz r4, 4(r4) +; SMALL32-NEXT: addi r1, r1, 32 +; SMALL32-NEXT: lwz r0, 8(r1) +; SMALL32-NEXT: mtlr r0 +; SMALL32-NEXT: blr +; +; LARGE32-LABEL: loadTLInit: +; LARGE32: # %bb.0: # %entry +; LARGE32-NEXT: mflr r0 +; LARGE32-NEXT: stwu r1, -32(r1) +; LARGE32-NEXT: stw r0, 40(r1) +; LARGE32-NEXT: addis r3, L..C3@u(r2) +; LARGE32-NEXT: lwz r4, L..C3@l(r3) +; LARGE32-NEXT: bla .__get_tpointer[PR] +; LARGE32-NEXT: add r4, r3, r4 +; LARGE32-NEXT: lwz r3, 0(r4) +; LARGE32-NEXT: lwz r4, 4(r4) +; LARGE32-NEXT: addi r1, r1, 32 +; LARGE32-NEXT: lwz r0, 8(r1) +; LARGE32-NEXT: mtlr r0 +; LARGE32-NEXT: blr entry: %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @ThreadLocalVarInit) %1 = load i64, ptr %0, align 8 @@ -293,6 +681,48 @@ define i64 @loadTLInit2() { ; LARGE64-NEXT: ld r3, 0(r3) ; LARGE64-NEXT: add r3, r4, r3 ; LARGE64-NEXT: blr +; +; SMALL32-LABEL: loadTLInit2: +; SMALL32: # %bb.0: # %entry +; SMALL32-NEXT: mflr r0 +; SMALL32-NEXT: stwu r1, -32(r1) +; SMALL32-NEXT: lwz r4, L..C3(r2) # target-flags(ppc-tprel) @ThreadLocalVarInit +; SMALL32-NEXT: lwz r5, L..C4(r2) # @VarInit +; SMALL32-NEXT: bla .__get_tpointer[PR] +; SMALL32-NEXT: stw r0, 40(r1) +; SMALL32-NEXT: add r3, r3, r4 +; SMALL32-NEXT: lwz r6, 4(r5) +; SMALL32-NEXT: lwz r5, 0(r5) +; SMALL32-NEXT: lwz r4, 4(r3) +; SMALL32-NEXT: lwz r3, 0(r3) +; SMALL32-NEXT: addc r4, r6, r4 +; SMALL32-NEXT: adde r3, r5, r3 +; SMALL32-NEXT: addi r1, r1, 32 +; SMALL32-NEXT: lwz r0, 8(r1) +; SMALL32-NEXT: mtlr r0 +; SMALL32-NEXT: blr +; +; LARGE32-LABEL: loadTLInit2: +; LARGE32: # %bb.0: # %entry +; LARGE32-NEXT: mflr r0 +; LARGE32-NEXT: stwu r1, -32(r1) +; LARGE32-NEXT: stw r0, 40(r1) +; LARGE32-NEXT: addis r3, L..C3@u(r2) +; LARGE32-NEXT: lwz r4, L..C3@l(r3) +; LARGE32-NEXT: bla .__get_tpointer[PR] +; LARGE32-NEXT: add r3, r3, r4 +; LARGE32-NEXT: lwz r4, 4(r3) +; LARGE32-NEXT: lwz r3, 0(r3) +; LARGE32-NEXT: addis r5, L..C4@u(r2) +; LARGE32-NEXT: lwz r5, L..C4@l(r5) +; LARGE32-NEXT: lwz r6, 4(r5) +; LARGE32-NEXT: lwz r5, 0(r5) +; LARGE32-NEXT: addc r4, r6, r4 +; LARGE32-NEXT: adde r3, r5, r3 +; LARGE32-NEXT: addi r1, r1, 32 +; LARGE32-NEXT: lwz r0, 8(r1) +; LARGE32-NEXT: mtlr r0 +; LARGE32-NEXT: blr entry: %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @ThreadLocalVarInit) %1 = load i64, ptr %0, align 8 @@ -326,3 +756,27 @@ entry: ; LARGE64-NEXT: .tc ThreadLocalVarInit[TE],ThreadLocalVarInit[TL]@le ; LARGE64-LABEL: L..C4: ; LARGE64-NEXT: .tc VarInit[TE],VarInit[RW] + +; SMALL32-LABEL: .toc +; SMALL32-LABEL: L..C0: +; SMALL32-NEXT: .tc IThreadLocalVarUninit[TC],IThreadLocalVarUninit[UL]@le +; SMALL32-LABEL: L..C1: +; SMALL32-NEXT: .tc IThreadLocalVarInit[TC],IThreadLocalVarInit[TL]@le +; SMALL32-LABEL: L..C2: +; SMALL32-NEXT: .tc ThreadLocalVarUninit[TC],ThreadLocalVarUninit[TL]@le +; SMALL32-LABEL: L..C3: +; SMALL32-NEXT: .tc ThreadLocalVarInit[TC],ThreadLocalVarInit[TL]@le +; SMALL32-LABEL: L..C4: +; SMALL32-NEXT: .tc VarInit[TC],VarInit[RW] + +; LARGE32-LABEL: .toc +; LARGE32-LABEL: L..C0: +; LARGE32-NEXT: .tc IThreadLocalVarUninit[TE],IThreadLocalVarUninit[UL]@le +; LARGE32-LABEL: L..C1: +; LARGE32-NEXT: .tc IThreadLocalVarInit[TE],IThreadLocalVarInit[TL]@le +; LARGE32-LABEL: L..C2: +; LARGE32-NEXT: .tc ThreadLocalVarUninit[TE],ThreadLocalVarUninit[TL]@le +; LARGE32-LABEL: L..C3: +; LARGE32-NEXT: .tc ThreadLocalVarInit[TE],ThreadLocalVarInit[TL]@le +; LARGE32-LABEL: L..C4: +; LARGE32-NEXT: .tc VarInit[TE],VarInit[RW] diff --git a/llvm/test/CodeGen/PowerPC/aix-tls-le-xcoff-reloc-large32.ll b/llvm/test/CodeGen/PowerPC/aix-tls-le-xcoff-reloc-large32.ll new file mode 100644 index 0000000..261ee7e --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/aix-tls-le-xcoff-reloc-large32.ll @@ -0,0 +1,272 @@ +; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mattr=-altivec -mtriple powerpc-ibm-aix-xcoff \ +; RUN: -xcoff-traceback-table=false --code-model=large -filetype=obj -o %t.o < %s +; RUN: llvm-readobj --relocs --expand-relocs %t.o | FileCheck --check-prefix=RELOC %s +; RUN: llvm-readobj --syms %t.o | FileCheck --check-prefix=SYM %s +; RUN: llvm-objdump -D -r --symbol-description %t.o | FileCheck --check-prefix=DIS %s + +@ThreadLocalVarInit = thread_local(localexec) global i64 1, align 8 +@VarInit = global i64 87, align 8 +@IThreadLocalVarUninit = internal thread_local(localexec) global i64 0, align 8 +declare nonnull ptr @llvm.threadlocal.address.p0(ptr nonnull) + +define void @storeITLUninit(i64 noundef %x) { +entry: + %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @IThreadLocalVarUninit) + store i64 %x, ptr %0, align 8 + ret void +} + +define i64 @loadTLInit() { +entry: + %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @ThreadLocalVarInit) + %1 = load i64, ptr %0, align 8 + %2 = load i64, ptr @VarInit, align 8 + %add = add nsw i64 %2, %1 + ret i64 %add +} + +; RELOC: File: {{.*}}aix-tls-le-xcoff-reloc-large32.ll.tmp.o +; RELOC-NEXT: Format: aixcoff-rs6000 +; RELOC-NEXT: Arch: powerpc +; RELOC-NEXT: AddressSize: 32bit +; RELOC-NEXT: Relocations [ +; RELOC: Virtual Address: 0x12 +; RELOC-NEXT: Symbol: IThreadLocalVarUninit (17) +; RELOC-NEXT: IsSigned: No +; RELOC-NEXT: FixupBitValue: 0 +; RELOC-NEXT: Length: 16 +; RELOC-NEXT: Type: R_TOCU (0x30) +; RELOC-NEXT: } +; RELOC: Virtual Address: 0x16 +; RELOC-NEXT: Symbol: IThreadLocalVarUninit (17) +; RELOC-NEXT: IsSigned: No +; RELOC-NEXT: FixupBitValue: 0 +; RELOC-NEXT: Length: 16 +; RELOC-NEXT: Type: R_TOCL (0x31) +; RELOC-NEXT: } +; RELOC: Virtual Address: 0x18 +; RELOC-NEXT: Symbol: .__get_tpointer (1) +; RELOC-NEXT: IsSigned: No +; RELOC-NEXT: FixupBitValue: 0 +; RELOC-NEXT: Length: 26 +; RELOC-NEXT: Type: R_RBA (0x18) +; RELOC-NEXT: } +; RELOC: Virtual Address: 0x4E +; RELOC-NEXT: Symbol: ThreadLocalVarInit (19) +; RELOC-NEXT: IsSigned: No +; RELOC-NEXT: FixupBitValue: 0 +; RELOC-NEXT: Length: 16 +; RELOC-NEXT: Type: R_TOCU (0x30) +; RELOC-NEXT: } +; RELOC: Virtual Address: 0x52 +; RELOC-NEXT: Symbol: ThreadLocalVarInit (19) +; RELOC-NEXT: IsSigned: No +; RELOC-NEXT: FixupBitValue: 0 +; RELOC-NEXT: Length: 16 +; RELOC-NEXT: Type: R_TOCL (0x31) +; RELOC-NEXT: } +; RELOC: Virtual Address: 0x54 +; RELOC-NEXT: Symbol: .__get_tpointer (1) +; RELOC-NEXT: IsSigned: No +; RELOC-NEXT: FixupBitValue: 0 +; RELOC-NEXT: Length: 26 +; RELOC-NEXT: Type: R_RBA (0x18) +; RELOC-NEXT: } +; RELOC: Virtual Address: 0xB0 +; RELOC-NEXT: Symbol: IThreadLocalVarUninit (25) +; RELOC-NEXT: IsSigned: No +; RELOC-NEXT: FixupBitValue: 0 +; RELOC-NEXT: Length: 32 +; RELOC-NEXT: Type: R_TLS_LE (0x23) +; RELOC-NEXT: } +; RELOC: Relocation { +; RELOC-NEXT: Virtual Address: 0xB4 +; RELOC-NEXT: Symbol: ThreadLocalVarInit (23) +; RELOC-NEXT: IsSigned: No +; RELOC-NEXT: FixupBitValue: 0 +; RELOC-NEXT: Length: 32 +; RELOC-NEXT: Type: R_TLS_LE (0x23) +; RELOC-NEXT: } + +; SYM: File: {{.*}}aix-tls-le-xcoff-reloc-large32.ll.tmp.o +; SYM-NEXT: Format: aixcoff-rs6000 +; SYM-NEXT: Arch: powerpc +; SYM-NEXT: AddressSize: 32bit +; SYM-NEXT: Symbols [ +; SYM: Index: 1 +; SYM-NEXT: Name: .__get_tpointer +; SYM-NEXT: Value (RelocatableAddress): 0x0 +; SYM-NEXT: Section: N_UNDEF +; SYM-NEXT: Type: 0x0 +; SYM-NEXT: StorageClass: C_EXT (0x2) +; SYM-NEXT: NumberOfAuxEntries: 1 +; SYM-NEXT: CSECT Auxiliary Entry { +; SYM-NEXT: Index: 2 +; SYM-NEXT: SectionLen: 0 +; SYM-NEXT: ParameterHashIndex: 0x0 +; SYM-NEXT: TypeChkSectNum: 0x0 +; SYM-NEXT: SymbolAlignmentLog2: 0 +; SYM-NEXT: SymbolType: XTY_ER (0x0) +; SYM-NEXT: StorageMappingClass: XMC_PR (0x0) +; SYM-NEXT: StabInfoIndex: 0x0 +; SYM-NEXT: StabSectNum: 0x0 +; SYM-NEXT: } +; SYM-NEXT: } +; SYM: Index: 17 +; SYM-NEXT: Name: IThreadLocalVarUninit +; SYM-NEXT: Value (RelocatableAddress): 0xB0 +; SYM-NEXT: Section: .data +; SYM-NEXT: Type: 0x0 +; SYM-NEXT: StorageClass: C_HIDEXT (0x6B) +; SYM-NEXT: NumberOfAuxEntries: 1 +; SYM-NEXT: CSECT Auxiliary Entry { +; SYM-NEXT: Index: 18 +; SYM-NEXT: SectionLen: 4 +; SYM-NEXT: ParameterHashIndex: 0x0 +; SYM-NEXT: TypeChkSectNum: 0x0 +; SYM-NEXT: SymbolAlignmentLog2: 2 +; SYM-NEXT: SymbolType: XTY_SD (0x1) +; SYM-NEXT: StorageMappingClass: XMC_TE (0x16) +; SYM-NEXT: StabInfoIndex: 0x0 +; SYM-NEXT: StabSectNum: 0x0 +; SYM-NEXT: } +; SYM-NEXT: } +; SYM: Index: 19 +; SYM-NEXT: Name: ThreadLocalVarInit +; SYM-NEXT: Value (RelocatableAddress): 0xB4 +; SYM-NEXT: Section: .data +; SYM-NEXT: Type: 0x0 +; SYM-NEXT: StorageClass: C_HIDEXT (0x6B) +; SYM-NEXT: NumberOfAuxEntries: 1 +; SYM-NEXT: CSECT Auxiliary Entry { +; SYM-NEXT: Index: 20 +; SYM-NEXT: SectionLen: 4 +; SYM-NEXT: ParameterHashIndex: 0x0 +; SYM-NEXT: TypeChkSectNum: 0x0 +; SYM-NEXT: SymbolAlignmentLog2: 2 +; SYM-NEXT: SymbolType: XTY_SD (0x1) +; SYM-NEXT: StorageMappingClass: XMC_TE (0x16) +; SYM-NEXT: StabInfoIndex: 0x0 +; SYM-NEXT: StabSectNum: 0x0 +; SYM-NEXT: } +; SYM-NEXT: } +; SYM: Index: 23 +; SYM-NEXT: Name: ThreadLocalVarInit +; SYM-NEXT: Value (RelocatableAddress): 0x0 +; SYM-NEXT: Section: .tdata +; SYM-NEXT: Type: 0x0 +; SYM-NEXT: StorageClass: C_EXT (0x2) +; SYM-NEXT: NumberOfAuxEntries: 1 +; SYM-NEXT: CSECT Auxiliary Entry { +; SYM-NEXT: Index: 24 +; SYM-NEXT: SectionLen: 8 +; SYM-NEXT: ParameterHashIndex: 0x0 +; SYM-NEXT: TypeChkSectNum: 0x0 +; SYM-NEXT: SymbolAlignmentLog2: 3 +; SYM-NEXT: SymbolType: XTY_SD (0x1) +; SYM-NEXT: StorageMappingClass: XMC_TL (0x14) +; SYM-NEXT: StabInfoIndex: 0x0 +; SYM-NEXT: StabSectNum: 0x0 +; SYM-NEXT: } +; SYM-NEXT: } +; SYM: Index: 25 +; SYM-NEXT: Name: IThreadLocalVarUninit +; SYM-NEXT: Value (RelocatableAddress): 0x8 +; SYM-NEXT: Section: .tbss +; SYM-NEXT: Type: 0x0 +; SYM-NEXT: StorageClass: C_HIDEXT (0x6B) +; SYM-NEXT: NumberOfAuxEntries: 1 +; SYM-NEXT: CSECT Auxiliary Entry { +; SYM-NEXT: Index: 26 +; SYM-NEXT: SectionLen: 8 +; SYM-NEXT: ParameterHashIndex: 0x0 +; SYM-NEXT: TypeChkSectNum: 0x0 +; SYM-NEXT: SymbolAlignmentLog2: 3 +; SYM-NEXT: SymbolType: XTY_CM (0x3) +; SYM-NEXT: StorageMappingClass: XMC_UL (0x15) +; SYM-NEXT: StabInfoIndex: 0x0 +; SYM-NEXT: StabSectNum: 0x0 +; SYM-NEXT: } +; SYM-NEXT: } + +; DIS: {{.*}}aix-tls-le-xcoff-reloc-large32.ll.tmp.o: file format aixcoff-rs6000 +; DIS: Disassembly of section .text: +; DIS: 00000000 (idx: 5) .storeITLUninit: +; DIS-NEXT: mflr 0 +; DIS-NEXT: stwu 1, -32(1) +; DIS-NEXT: stw 0, 40(1) +; DIS-NEXT: mr 5, 3 +; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} addis 3, 2, 0 +; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCU (idx: 17) IThreadLocalVarUninit[TE] +; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} lwz 6, 0(3) +; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCL (idx: 17) IThreadLocalVarUninit[TE] +; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} bla 0 +; DIS-NEXT: {{0*}}[[#ADDR]]: R_RBA (idx: 1) .__get_tpointer[PR] +; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} add 3, 3, 6 +; DIS-NEXT: stw 4, 4(3) +; DIS-NEXT: stw 5, 0(3) +; DIS-NEXT: addi 1, 1, 32 +; DIS-NEXT: lwz 0, 8(1) +; DIS-NEXT: mtlr 0 +; DIS-NEXT: blr +; DIS: 00000040 (idx: 7) .loadTLInit: +; DIS-NEXT: mflr 0 +; DIS-NEXT: stwu 1, -32(1) +; DIS-NEXT: stw 0, 40(1) +; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} addis 3, 2, 0 +; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCU (idx: 19) ThreadLocalVarInit[TE] +; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} lwz 4, 4(3) +; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCL (idx: 19) ThreadLocalVarInit[TE] +; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} bla 0 +; DIS-NEXT: {{0*}}[[#ADDR]]: R_RBA (idx: 1) .__get_tpointer[PR] +; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} add 3, 3, 4 +; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} lwz 4, 4(3) +; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} lwz 3, 0(3) +; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} addis 5, 2, 0 +; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCU (idx: 21) VarInit[TE] +; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} lwz 5, 8(5) +; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCL (idx: 21) VarInit[TE] +; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} lwz 6, 4(5) +; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} lwz 5, 0(5) +; DIS-NEXT: addc 4, 6, 4 +; DIS-NEXT: adde 3, 5, 3 +; DIS-NEXT: addi 1, 1, 32 +; DIS-NEXT: lwz 0, 8(1) +; DIS-NEXT: mtlr 0 +; DIS-NEXT: blr + +; DIS: Disassembly of section .data: +; DIS: 00000090 (idx: 9) VarInit[RW]: +; DIS-NEXT: 90: 00 00 00 00 +; DIS-NEXT: 94: 00 00 00 57 +; DIS: 00000098 (idx: 11) storeITLUninit[DS]: +; DIS-NEXT: 98: 00 00 00 00 +; DIS-NEXT: 00000098: R_POS (idx: 5) .storeITLUninit +; DIS-NEXT: 9c: 00 00 00 b0 +; DIS-NEXT: 0000009c: R_POS (idx: 15) TOC[TC0] +; DIS-NEXT: a0: 00 00 00 00 +; DIS: 000000a4 (idx: 13) loadTLInit[DS]: +; DIS-NEXT: a4: 00 00 00 40 +; DIS-NEXT: 000000a4: R_POS (idx: 7) .loadTLInit +; DIS-NEXT: a8: 00 00 00 b0 +; DIS-NEXT: 000000a8: R_POS (idx: 15) TOC[TC0] +; DIS-NEXT: ac: 00 00 00 00 +; DIS: 000000b0 (idx: 17) IThreadLocalVarUninit[TE]: +; DIS-NEXT: b0: 00 00 00 00 +; DIS-NEXT: 000000b0: R_TLS_LE (idx: 25) IThreadLocalVarUninit[UL] +; DIS: 000000b4 (idx: 19) ThreadLocalVarInit[TE]: +; DIS-NEXT: b4: 00 00 00 00 +; DIS-NEXT: 000000b4: R_TLS_LE (idx: 23) ThreadLocalVarInit[TL] +; DIS: 000000b8 (idx: 21) VarInit[TE]: +; DIS-NEXT: b8: 00 00 00 90 +; DIS-NEXT: 000000b8: R_POS (idx: 9) VarInit[RW] + +; DIS: Disassembly of section .tdata: +; DIS: 00000000 (idx: 23) ThreadLocalVarInit[TL]: +; DIS-NEXT: 0: 00 00 00 00 +; DIS-NEXT: 4: 00 00 00 01 + +; DIS: Disassembly of section .tbss: +; DIS: 00000008 (idx: 25) IThreadLocalVarUninit[UL]: +; DIS-NEXT: ... + diff --git a/llvm/test/CodeGen/PowerPC/aix-tls-le-xcoff-reloc32.ll b/llvm/test/CodeGen/PowerPC/aix-tls-le-xcoff-reloc32.ll new file mode 100644 index 0000000..d3dbbf4 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/aix-tls-le-xcoff-reloc32.ll @@ -0,0 +1,245 @@ +; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mattr=-altivec -mtriple powerpc-ibm-aix-xcoff \ +; RUN: -xcoff-traceback-table=false -data-sections=false -filetype=obj -o %t.o < %s +; RUN: llvm-readobj --relocs --expand-relocs %t.o | FileCheck --check-prefix=RELOC %s +; RUN: llvm-readobj --syms %t.o | FileCheck --check-prefix=SYM %s +; RUN: llvm-objdump -D -r --symbol-description %t.o | FileCheck --check-prefix=DIS %s + +@ThreadLocalVarInit = thread_local(localexec) global i32 1, align 4 +@VarInit = global i32 87, align 4 +@IThreadLocalVarUninit = internal thread_local(localexec) global i32 0, align 4 +declare nonnull ptr @llvm.threadlocal.address.p0(ptr nonnull) + +define void @storeITLUninit(i32 noundef signext %x) { +entry: + %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @IThreadLocalVarUninit) + store i32 %x, ptr %0, align 4 + ret void +} + +define signext i32 @loadTLInit() { +entry: + %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @ThreadLocalVarInit) + %1 = load i32, ptr %0, align 4 + %2 = load i32, ptr @VarInit, align 4 + %add = add nsw i32 %2, %1 + ret i32 %add +} + +; RELOC: File: {{.*}}aix-tls-le-xcoff-reloc32.ll.tmp.o +; RELOC-NEXT: Format: aixcoff-rs6000 +; RELOC-NEXT: Arch: powerpc +; RELOC-NEXT: AddressSize: 32bit +; RELOC-NEXT: Relocations [ +; RELOC: Virtual Address: 0xA +; RELOC-NEXT: Symbol: IThreadLocalVarUninit (19) +; RELOC-NEXT: IsSigned: No +; RELOC-NEXT: FixupBitValue: 0 +; RELOC-NEXT: Length: 16 +; RELOC-NEXT: Type: R_TOC (0x3) +; RELOC-NEXT: } +; RELOC: Virtual Address: 0x10 +; RELOC-NEXT: Symbol: .__get_tpointer (1) +; RELOC-NEXT: IsSigned: No +; RELOC-NEXT: FixupBitValue: 0 +; RELOC-NEXT: Length: 26 +; RELOC-NEXT: Type: R_RBA (0x18) +; RELOC-NEXT: } +; RELOC: Virtual Address: 0x3A +; RELOC-NEXT: Symbol: ThreadLocalVarInit (21) +; RELOC-NEXT: IsSigned: No +; RELOC-NEXT: FixupBitValue: 0 +; RELOC-NEXT: Length: 16 +; RELOC-NEXT: Type: R_TOC (0x3) +; RELOC-NEXT: } +; RELOC: Virtual Address: 0x40 +; RELOC-NEXT: Symbol: .__get_tpointer (1) +; RELOC-NEXT: IsSigned: No +; RELOC-NEXT: FixupBitValue: 0 +; RELOC-NEXT: Length: 26 +; RELOC-NEXT: Type: R_RBA (0x18) +; RELOC-NEXT: } +; RELOC: Virtual Address: 0x84 +; RELOC-NEXT: Symbol: IThreadLocalVarUninit (29) +; RELOC-NEXT: IsSigned: No +; RELOC-NEXT: FixupBitValue: 0 +; RELOC-NEXT: Length: 32 +; RELOC-NEXT: Type: R_TLS_LE (0x23) +; RELOC-NEXT: } +; RELOC: Virtual Address: 0x88 +; RELOC-NEXT: Symbol: ThreadLocalVarInit (27) +; RELOC-NEXT: IsSigned: No +; RELOC-NEXT: FixupBitValue: 0 +; RELOC-NEXT: Length: 32 +; RELOC-NEXT: Type: R_TLS_LE (0x23) +; RELOC-NEXT: } + +; SYM: File: {{.*}}aix-tls-le-xcoff-reloc32.ll.tmp.o +; SYM-NEXT: Format: aixcoff-rs6000 +; SYM-NEXT: Arch: powerpc +; SYM-NEXT: AddressSize: 32bit +; SYM-NEXT: Symbols [ +; SYM: Index: 1 +; SYM-NEXT: Name: .__get_tpointer +; SYM-NEXT: Value (RelocatableAddress): 0x0 +; SYM-NEXT: Section: N_UNDEF +; SYM-NEXT: Type: 0x0 +; SYM-NEXT: StorageClass: C_EXT (0x2) +; SYM-NEXT: NumberOfAuxEntries: 1 +; SYM-NEXT: CSECT Auxiliary Entry { +; SYM-NEXT: Index: 2 +; SYM-NEXT: SectionLen: 0 +; SYM-NEXT: ParameterHashIndex: 0x0 +; SYM-NEXT: TypeChkSectNum: 0x0 +; SYM-NEXT: SymbolAlignmentLog2: 0 +; SYM-NEXT: SymbolType: XTY_ER (0x0) +; SYM-NEXT: StorageMappingClass: XMC_PR (0x0) +; SYM-NEXT: StabInfoIndex: 0x0 +; SYM-NEXT: StabSectNum: 0x0 +; SYM-NEXT: } +; SYM-NEXT: } +; SYM: Index: 19 +; SYM-NEXT: Name: IThreadLocalVarUninit +; SYM-NEXT: Value (RelocatableAddress): 0x84 +; SYM-NEXT: Section: .data +; SYM-NEXT: Type: 0x0 +; SYM-NEXT: StorageClass: C_HIDEXT (0x6B) +; SYM-NEXT: NumberOfAuxEntries: 1 +; SYM-NEXT: CSECT Auxiliary Entry { +; SYM-NEXT: Index: 20 +; SYM-NEXT: SectionLen: 4 +; SYM-NEXT: ParameterHashIndex: 0x0 +; SYM-NEXT: TypeChkSectNum: 0x0 +; SYM-NEXT: SymbolAlignmentLog2: 2 +; SYM-NEXT: SymbolType: XTY_SD (0x1) +; SYM-NEXT: StorageMappingClass: XMC_TC (0x3) +; SYM-NEXT: StabInfoIndex: 0x0 +; SYM-NEXT: StabSectNum: 0x0 +; SYM-NEXT: } +; SYM-NEXT: } +; SYM: Index: 21 +; SYM-NEXT: Name: ThreadLocalVarInit +; SYM-NEXT: Value (RelocatableAddress): 0x88 +; SYM-NEXT: Section: .data +; SYM-NEXT: Type: 0x0 +; SYM-NEXT: StorageClass: C_HIDEXT (0x6B) +; SYM-NEXT: NumberOfAuxEntries: 1 +; SYM-NEXT: CSECT Auxiliary Entry { +; SYM-NEXT: Index: 22 +; SYM-NEXT: SectionLen: 4 +; SYM-NEXT: ParameterHashIndex: 0x0 +; SYM-NEXT: TypeChkSectNum: 0x0 +; SYM-NEXT: SymbolAlignmentLog2: 2 +; SYM-NEXT: SymbolType: XTY_SD (0x1) +; SYM-NEXT: StorageMappingClass: XMC_TC (0x3) +; SYM-NEXT: StabInfoIndex: 0x0 +; SYM-NEXT: StabSectNum: 0x0 +; SYM-NEXT: } +; SYM-NEXT: } +; SYM: Index: 27 +; SYM-NEXT: Name: ThreadLocalVarInit +; SYM-NEXT: Value (RelocatableAddress): 0x0 +; SYM-NEXT: Section: .tdata +; SYM-NEXT: Type: 0x0 +; SYM-NEXT: StorageClass: C_EXT (0x2) +; SYM-NEXT: NumberOfAuxEntries: 1 +; SYM-NEXT: CSECT Auxiliary Entry { +; SYM-NEXT: Index: 28 +; SYM-NEXT: ContainingCsectSymbolIndex: 25 +; SYM-NEXT: ParameterHashIndex: 0x0 +; SYM-NEXT: TypeChkSectNum: 0x0 +; SYM-NEXT: SymbolAlignmentLog2: 0 +; SYM-NEXT: SymbolType: XTY_LD (0x2) +; SYM-NEXT: StorageMappingClass: XMC_TL (0x14) +; SYM-NEXT: StabInfoIndex: 0x0 +; SYM-NEXT: StabSectNum: 0x0 +; SYM-NEXT: } +; SYM-NEXT: } +; SYM: Index: 29 +; SYM-NEXT: Name: IThreadLocalVarUninit +; SYM-NEXT: Value (RelocatableAddress): 0x4 +; SYM-NEXT: Section: .tbss +; SYM-NEXT: Type: 0x0 +; SYM-NEXT: StorageClass: C_HIDEXT (0x6B) +; SYM-NEXT: NumberOfAuxEntries: 1 +; SYM-NEXT: CSECT Auxiliary Entry { +; SYM-NEXT: Index: 30 +; SYM-NEXT: SectionLen: 4 +; SYM-NEXT: ParameterHashIndex: 0x0 +; SYM-NEXT: TypeChkSectNum: 0x0 +; SYM-NEXT: SymbolAlignmentLog2: 2 +; SYM-NEXT: SymbolType: XTY_CM (0x3) +; SYM-NEXT: StorageMappingClass: XMC_UL (0x15) +; SYM-NEXT: StabInfoIndex: 0x0 +; SYM-NEXT: StabSectNum: 0x0 +; SYM-NEXT: } +; SYM-NEXT: } + +; DIS: {{.*}}aix-tls-le-xcoff-reloc32.ll.tmp.o: file format aixcoff-rs6000 +; DIS: Disassembly of section .text: +; DIS: 00000000 (idx: 5) .storeITLUninit: +; DIS-NEXT: mflr 0 +; DIS-NEXT: stwu 1, -32(1) +; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} lwz 5, 0(2) +; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOC (idx: 19) IThreadLocalVarUninit[TC] +; DIS-NEXT: mr 4, 3 +; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} bla 0 +; DIS-NEXT: {{0*}}[[#ADDR]]: R_RBA (idx: 1) .__get_tpointer[PR] +; DIS-NEXT: stw 0, 40(1) +; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} add 3, 3, 5 +; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} stw 4, 0(3) +; DIS-NEXT: addi 1, 1, 32 +; DIS-NEXT: lwz 0, 8(1) +; DIS-NEXT: mtlr 0 +; DIS-NEXT: blr +; DIS: 00000030 (idx: 7) .loadTLInit: +; DIS-NEXT: mflr 0 +; DIS-NEXT: stwu 1, -32(1) +; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} lwz 4, 4(2) +; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOC (idx: 21) ThreadLocalVarInit[TC] +; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} lwz 5, 8(2) +; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOC (idx: 23) VarInit[TC] +; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} bla 0 +; DIS-NEXT: {{0*}}[[#ADDR]]: R_RBA (idx: 1) .__get_tpointer[PR] +; DIS-NEXT: stw 0, 40(1) +; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} add 3, 3, 4 +; DIS-NEXT: lwz 4, 0(5) +; DIS-NEXT: lwz 3, 0(3) +; DIS-NEXT: add 3, 4, 3 +; DIS-NEXT: addi 1, 1, 32 +; DIS-NEXT: lwz 0, 8(1) +; DIS-NEXT: mtlr 0 +; DIS-NEXT: blr + +; DIS: Disassembly of section .data: +; DIS: 00000068 (idx: 11) VarInit: +; DIS-NEXT: 68: 00 00 00 57 +; DIS: 0000006c (idx: 13) storeITLUninit[DS]: +; DIS-NEXT: 6c: 00 00 00 00 +; DIS-NEXT: 0000006c: R_POS (idx: 5) .storeITLUninit +; DIS-NEXT: 70: 00 00 00 84 +; DIS-NEXT: 00000070: R_POS (idx: 17) TOC[TC0] +; DIS-NEXT: 74: 00 00 00 00 +; DIS: 00000078 (idx: 15) loadTLInit[DS]: +; DIS-NEXT: 78: 00 00 00 30 +; DIS-NEXT: 00000078: R_POS (idx: 7) .loadTLInit +; DIS-NEXT: 7c: 00 00 00 84 +; DIS-NEXT: 0000007c: R_POS (idx: 17) TOC[TC0] +; DIS-NEXT: 80: 00 00 00 00 +; DIS: 00000084 (idx: 19) IThreadLocalVarUninit[TC]: +; DIS-NEXT: 84: 00 00 00 00 +; DIS-NEXT: 00000084: R_TLS_LE (idx: 29) IThreadLocalVarUninit[UL] +; DIS: 00000088 (idx: 21) ThreadLocalVarInit[TC]: +; DIS-NEXT: 88: 00 00 00 00 +; DIS-NEXT: 00000088: R_TLS_LE (idx: 27) ThreadLocalVarInit +; DIS: 0000008c (idx: 23) VarInit[TC]: +; DIS-NEXT: 8c: 00 00 00 68 +; DIS-NEXT: 0000008c: R_POS (idx: 11) VarInit + +; DIS: Disassembly of section .tdata: +; DIS: 00000000 (idx: 27) ThreadLocalVarInit: +; DIS-NEXT: 0: 00 00 00 01 + +; DIS: Disassembly of section .tbss: +; DIS: 00000004 (idx: 29) IThreadLocalVarUninit[UL]: +; DIS-NEXT: ... + -- 2.7.4