on 64-bit PowerPC ELF.
The patch includes code to handle external assembly and MC output with the
integrated assembler. It intentionally does not support the "old" JIT.
For the initial-exec TLS model, the ABI requires the following to calculate
the address of external thread-local variable x:
Code sequence Relocation Symbol
ld 9,x@got@tprel(2) R_PPC64_GOT_TPREL16_DS x
add 9,9,x@tls R_PPC64_TLS x
The register 9 is arbitrary here. The linker will replace x@got@tprel
with the offset relative to the thread pointer to the generated GOT
entry for symbol x. It will replace x@tls with the thread-pointer
register (13).
The two test cases verify correct assembly output and relocation output
as just described.
PowerPC-specific selection node variants are added for the two
instructions above: LD_GOT_TPREL and ADD_TLS. These are inserted
when an initial-exec global variable is encountered by
PPCTargetLowering::LowerGlobalTLSAddress(), and later lowered to
machine instructions LDgotTPREL and ADD8TLS. LDgotTPREL is a pseudo
that uses the same LDrs support added for medium code model's LDtocL,
with a different relocation type.
The rest of the processing is straightforward.
llvm-svn: 169281
VK_PPC_TPREL16_LO, // symbol@tprel@l
VK_PPC_TOC16_HA, // symbol@toc@ha
VK_PPC_TOC16_LO, // symbol@toc@l
+ VK_PPC_GOT_TPREL16_DS, // symbol@got@tprel
+ VK_PPC_TLS, // symbol@tls
VK_Mips_GPREL,
VK_Mips_GOT_CALL,
R_PPC64_TOC16_HA = 50,
R_PPC64_TOC = 51,
R_PPC64_TOC16_DS = 63,
- R_PPC64_TOC16_LO_DS = 64
+ R_PPC64_TOC16_LO_DS = 64,
+ R_PPC64_TLS = 67,
+ R_PPC64_GOT_TPREL16_DS = 87
};
// ARM Specific e_flags
case VK_PPC_TPREL16_LO: return "tprel@l";
case VK_PPC_TOC16_HA: return "toc@ha";
case VK_PPC_TOC16_LO: return "toc@l";
+ case VK_PPC_GOT_TPREL16_DS: return "got@tprel";
+ case VK_PPC_TLS: return "tls";
case VK_Mips_GPREL: return "GPREL";
case VK_Mips_GOT_CALL: return "GOT_CALL";
case VK_Mips_GOT16: return "GOT16";
case FK_Data_4:
case FK_Data_8:
case PPC::fixup_ppc_toc:
+ case PPC::fixup_ppc_tlsreg:
return Value;
case PPC::fixup_ppc_lo14:
case PPC::fixup_ppc_toc16_ds:
{ "fixup_ppc_lo14", 16, 14, 0 },
{ "fixup_ppc_toc", 0, 64, 0 },
{ "fixup_ppc_toc16", 16, 16, 0 },
- { "fixup_ppc_toc16_ds", 16, 14, 0 }
+ { "fixup_ppc_toc16_ds", 16, 14, 0 },
+ { "fixup_ppc_tlsreg", 0, 0, 0 }
};
if (Kind < FirstTargetFixupKind)
case MCSymbolRefExpr::VK_PPC_TOC16_LO:
Type = ELF::R_PPC64_TOC16_LO_DS;
break;
+ case MCSymbolRefExpr::VK_PPC_GOT_TPREL16_DS:
+ Type = ELF::R_PPC64_GOT_TPREL16_DS;
+ break;
}
break;
+ case PPC::fixup_ppc_tlsreg:
+ Type = ELF::R_PPC64_TLS;
+ break;
case FK_Data_8:
switch (Modifier) {
default: llvm_unreachable("Unsupported Modifier");
/// fixup_ppc_toc16_ds - A 14-bit signed fixup relative to the TOC base with
/// implied 2 zero bits
fixup_ppc_toc16_ds,
+
+ /// fixup_ppc_tlsreg - Insert thread-pointer register number.
+ fixup_ppc_tlsreg,
// Marker
LastTargetFixupKind,
SmallVectorImpl<MCFixup> &Fixups) const;
unsigned getMemRIXEncoding(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups) const;
+ unsigned getTLSOffsetEncoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+ unsigned getTLSRegEncoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups) const;
unsigned get_crbitm_encoding(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups) const;
}
+unsigned PPCMCCodeEmitter::getTLSOffsetEncoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ const MCOperand &MO = MI.getOperand(OpNo);
+
+ // Add a fixup for the GOT displacement to the TLS block offset.
+ Fixups.push_back(MCFixup::Create(0, MO.getExpr(),
+ (MCFixupKind)PPC::fixup_ppc_toc16_ds));
+ return 0;
+}
+
+
+unsigned PPCMCCodeEmitter::getTLSRegEncoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ const MCOperand &MO = MI.getOperand(OpNo);
+ if (MO.isReg()) return getMachineOpValue(MI, MO, Fixups);
+
+ // Add a fixup for the TLS register, which simply provides a relocation
+ // hint to the linker that this statement is part of a relocation sequence.
+ // Return the thread-pointer register's encoding.
+ Fixups.push_back(MCFixup::Create(0, MO.getExpr(),
+ (MCFixupKind)PPC::fixup_ppc_tlsreg));
+ return getPPCRegisterNumbering(PPC::X13);
+}
+
+
unsigned PPCMCCodeEmitter::
get_crbitm_encoding(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups) const {
MO_NLP_HIDDEN_FLAG = 16,
/// The next are not flags but distinct values.
- MO_ACCESS_MASK = 224,
+ MO_ACCESS_MASK = 0xe0,
/// MO_LO16, MO_HA16 - lo16(symbol) and ha16(symbol)
- MO_LO16 = 32, MO_HA16 = 64,
+ MO_LO16 = 1 << 5,
+ MO_HA16 = 2 << 5,
- MO_TPREL16_HA = 96,
- MO_TPREL16_LO = 128
+ MO_TPREL16_HA = 3 << 5,
+ MO_TPREL16_LO = 4 << 5,
+ MO_GOT_TPREL16_DS = 5 << 5,
+ MO_TLS = 6 << 5
};
} // end namespace PPCII
OutStreamer.EmitInstruction(TmpInst);
return;
}
+ case PPC::LDgotTPREL: {
+ // Transform %Xd = LDgotTPREL <ga:@sym>, %Xs
+ LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, Subtarget.isDarwin());
+
+ // Change the opcode to LDrs, which is a form of LD with the offset
+ // specified by a SymbolLo.
+ TmpInst.setOpcode(PPC::LDrs);
+ const MachineOperand &MO = MI->getOperand(1);
+ const GlobalValue *GValue = MO.getGlobal();
+ MCSymbol *MOSymbol = Mang->getSymbol(GValue);
+ const MCExpr *Exp =
+ MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TPREL16_DS,
+ OutContext);
+ TmpInst.getOperand(1) = MCOperand::CreateExpr(Exp);
+ OutStreamer.EmitInstruction(TmpInst);
+ return;
+ }
case PPC::MFCRpseud:
case PPC::MFCR8pseud:
// Transform: %R3 = MFCRpseud %CR7
unsigned getLO16Encoding(const MachineInstr &MI, unsigned OpNo) const;
unsigned getMemRIEncoding(const MachineInstr &MI, unsigned OpNo) const;
unsigned getMemRIXEncoding(const MachineInstr &MI, unsigned OpNo) const;
+ unsigned getTLSOffsetEncoding(const MachineInstr &MI, unsigned OpNo) const;
+ unsigned getTLSRegEncoding(const MachineInstr &MI, unsigned OpNo) const;
const char *getPassName() const { return "PowerPC Machine Code Emitter"; }
}
+unsigned PPCCodeEmitter::getTLSOffsetEncoding(const MachineInstr &MI,
+ unsigned OpNo) const {
+ llvm_unreachable("TLS not supported on the old JIT.");
+ return 0;
+}
+
+
+unsigned PPCCodeEmitter::getTLSRegEncoding(const MachineInstr &MI,
+ unsigned OpNo) const {
+ llvm_unreachable("TLS not supported on the old JIT.");
+ return 0;
+}
+
+
unsigned PPCCodeEmitter::getMachineOpValue(const MachineInstr &MI,
const MachineOperand &MO) const {
return CurDAG->getMachineNode(PPC::ADDItocL, dl, MVT::i64,
SDValue(Tmp, 0), GA);
}
+ case PPCISD::LD_GOT_TPREL: {
+ assert (PPCSubTarget.isPPC64() && "Only supported for 64-bit ABI");
+ return CurDAG->getMachineNode(PPC::LDgotTPREL, dl, MVT::i64,
+ N->getOperand(0), N->getOperand(1));
+ }
}
return SelectCode(N);
case PPCISD::ADDIS_TOC_HA: return "PPCISD::ADDIS_TOC_HA";
case PPCISD::LD_TOC_L: return "PPCISD::LD_TOC_L";
case PPCISD::ADDI_TOC_L: return "PPCISD::ADDI_TOC_L";
+ case PPCISD::LD_GOT_TPREL: return "PPCISD::LD_GOT_TPREL";
+ case PPCISD::ADD_TLS: return "PPCISD::ADD_TLS";
}
}
EVT PtrVT = getPointerTy();
bool is64bit = PPCSubTarget.isPPC64();
- TLSModel::Model model = getTargetMachine().getTLSModel(GV);
+ TLSModel::Model Model = getTargetMachine().getTLSModel(GV);
+
+ if (Model == TLSModel::LocalExec) {
+ SDValue TGAHi = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
+ PPCII::MO_TPREL16_HA);
+ SDValue TGALo = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
+ PPCII::MO_TPREL16_LO);
+ SDValue TLSReg = DAG.getRegister(is64bit ? PPC::X13 : PPC::R2,
+ is64bit ? MVT::i64 : MVT::i32);
+ SDValue Hi = DAG.getNode(PPCISD::Hi, dl, PtrVT, TGAHi, TLSReg);
+ return DAG.getNode(PPCISD::Lo, dl, PtrVT, TGALo, Hi);
+ }
+
+ if (!is64bit)
+ llvm_unreachable("only local-exec is currently supported for ppc32");
- SDValue TGAHi = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
- PPCII::MO_TPREL16_HA);
- SDValue TGALo = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
- PPCII::MO_TPREL16_LO);
+ if (Model != TLSModel::InitialExec)
+ llvm_unreachable("only local-exec and initial-exec TLS modes supported");
- if (model != TLSModel::LocalExec)
- llvm_unreachable("only local-exec TLS mode supported");
- SDValue TLSReg = DAG.getRegister(is64bit ? PPC::X13 : PPC::R2,
+ SDValue GOTOffset = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
+ PPCII::MO_GOT_TPREL16_DS);
+ SDValue TPReg = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
+ PPCII::MO_TLS);
+ SDValue GOTReg = DAG.getRegister(is64bit ? PPC::X2 : PPC::R2,
is64bit ? MVT::i64 : MVT::i32);
- SDValue Hi = DAG.getNode(PPCISD::Hi, dl, PtrVT, TGAHi, TLSReg);
- return DAG.getNode(PPCISD::Lo, dl, PtrVT, TGALo, Hi);
+ SDValue TPOffset = DAG.getNode(PPCISD::LD_GOT_TPREL, dl, PtrVT,
+ GOTOffset, GOTReg);
+ return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TPOffset, TPReg);
}
SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op,
CR6SET,
CR6UNSET,
+ /// G8RC = LD_GOT_TPREL Symbol, G8RReg - Used by the initial-exec
+ /// TLS model, produces a LD instruction with base register G8RReg
+ /// and offset sym@got@tprel. The latter identifies the GOT entry
+ /// containing the offset of "sym" relative to the thread pointer.
+ LD_GOT_TPREL,
+
+ /// G8RC = ADD_TLS G8RReg, Symbol - Used by the initial-exec TLS
+ /// model, produces an ADD instruction that adds the contents of
+ /// G8RReg to the thread pointer. Symbol contains a relocation
+ /// sym@tls which is to be replaced by the thread pointer and
+ /// identifies to the linker that the instruction is part of a
+ /// TLS sequence.
+ ADD_TLS,
+
/// STD_32 - This is the STD instruction for use with "32-bit" registers.
STD_32 = ISD::FIRST_TARGET_MEMORY_OPCODE,
let EncoderMethod = "getMemRIXEncoding";
let MIOperandInfo = (ops symbolLo64:$off, ptr_rc:$reg);
}
+def tlsaddr : Operand<i64> {
+ let EncoderMethod = "getTLSOffsetEncoding";
+}
+def tlsreg : Operand<i64> {
+ let EncoderMethod = "getTLSRegEncoding";
+}
//===----------------------------------------------------------------------===//
// 64-bit transformation functions.
def ADD8 : XOForm_1<31, 266, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB),
"add $rT, $rA, $rB", IntSimple,
[(set G8RC:$rT, (add G8RC:$rA, G8RC:$rB))]>;
+// ADD8 has a special form: reg = ADD8(reg, sym@tls) for use by the
+// initial-exec thread-local storage model.
+def ADD8TLS : XOForm_1<31, 266, 0, (outs G8RC:$rT), (ins G8RC:$rA, tlsreg:$rB),
+ "add $rT, $rA, $rB", IntSimple,
+ [(set G8RC:$rT, (add G8RC:$rA, tglobaltlsaddr:$rB))]>;
let Defs = [CARRY] in {
def ADDC8 : XOForm_1<31, 10, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB),
[(set G8RC:$rD,
(PPCaddiTocL G8RC:$reg, tglobaladdr:$disp))]>, isPPC64;
+// Support for thread-local storage.
+def LDgotTPREL: Pseudo<(outs G8RC:$rD), (ins tlsaddr:$disp, G8RC:$reg),
+ "#LDgotTPREL",
+ [(set G8RC:$rD,
+ (PPCldGotTprel G8RC:$reg, tglobaltlsaddr:$disp))]>,
+ isPPC64;
+def : Pat<(PPCaddTls G8RC:$in, tglobaltlsaddr:$g),
+ (ADD8TLS G8RC:$in, tglobaltlsaddr:$g)>;
+
let PPC970_Unit = 2 in {
// Truncating stores.
def STB8 : DForm_1<38, (outs), (ins G8RC:$rS, memri:$src),
def PPCvmaddfp : SDNode<"PPCISD::VMADDFP", SDTFPTernaryOp, []>;
def PPCvnmsubfp : SDNode<"PPCISD::VNMSUBFP", SDTFPTernaryOp, []>;
+def PPCldGotTprel : SDNode<"PPCISD::LD_GOT_TPREL", SDTIntBinOp, [SDNPMayLoad]>;
+def PPCaddTls : SDNode<"PPCISD::ADD_TLS", SDTIntBinOp, []>;
+
def PPCvperm : SDNode<"PPCISD::VPERM", SDT_PPCvperm, []>;
// These nodes represent the 32-bit PPC shifts that operate on 6-bit shift
break;
case PPCII::MO_TPREL16_LO: RefKind = MCSymbolRefExpr::VK_PPC_TPREL16_LO;
break;
+ case PPCII::MO_GOT_TPREL16_DS:
+ RefKind = MCSymbolRefExpr::VK_PPC_GOT_TPREL16_DS;
+ break;
+ case PPCII::MO_TLS:
+ RefKind = MCSymbolRefExpr::VK_PPC_TLS;
+ break;
}
// FIXME: This isn't right, but we don't have a good way to express this in
--- /dev/null
+; RUN: llc -mcpu=pwr7 -O0 -filetype=obj %s -o - | \
+; RUN: elf-dump --dump-section-data | FileCheck %s
+
+; Test correct relocation generation for thread-local storage
+; using the initial-exec model and integrated assembly.
+
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+@a = external thread_local global i32
+
+define signext i32 @main() nounwind {
+entry:
+ %retval = alloca i32, align 4
+ store i32 0, i32* %retval
+ %0 = load i32* @a, align 4
+ ret i32 %0
+}
+
+; Verify generation of R_PPC64_GOT_TPREL16_DS and R_PPC64_TLS for
+; accessing external variable a.
+;
+; CHECK: '.rela.text'
+; CHECK: Relocation 0
+; CHECK-NEXT: 'r_offset'
+; CHECK-NEXT: 'r_sym', 0x[[SYM1:[0-9a-f]+]]
+; CHECK-NEXT: 'r_type', 0x00000057
+; CHECK: Relocation 1
+; CHECK-NEXT: 'r_offset'
+; CHECK-NEXT: 'r_sym', 0x[[SYM1]]
+; CHECK-NEXT: 'r_type', 0x00000043
+
--- /dev/null
+; RUN: llc -mcpu=pwr7 -O0 <%s | FileCheck %s
+
+; Test correct assembly code generation for thread-local storage
+; using the initial-exec model.
+
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+@a = external thread_local global i32
+
+define signext i32 @main() nounwind {
+entry:
+ %retval = alloca i32, align 4
+ store i32 0, i32* %retval
+ %0 = load i32* @a, align 4
+ ret i32 %0
+}
+
+; CHECK: ld [[REG:[0-9]+]], a@got@tprel(2)
+; CHECK: add {{[0-9]+}}, [[REG]], a@tls
+