From a8c624a1c469752fe80fdd25426de6d14e4f8ca4 Mon Sep 17 00:00:00 2001 From: Xiangling Liao Date: Tue, 13 Aug 2019 20:29:01 +0000 Subject: [PATCH] [AIX]Lowering global address for 32/64bit small/large code models This patch implements global address lowering for 32/64 bit with small/large code models. 1.For 32bit large code model on AIX, there are newly added pseudo opcode LWZtocL & ADDIStocHA32, the support of which on MC layer will be provided by future patches. 2.The default code model on AIX should be small code model. 3.Since AIX does not have medium code model, "report_fatal_error" when users specify it. Differential Revision: https://reviews.llvm.org/D63547 llvm-svn: 368744 --- llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp | 87 ++++++++++++++++------ llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 41 +++++----- llvm/lib/Target/PowerPC/PPCISelLowering.h | 2 + llvm/lib/Target/PowerPC/PPCInstrInfo.td | 9 +++ llvm/lib/Target/PowerPC/PPCTOCRegDeps.cpp | 9 ++- llvm/lib/Target/PowerPC/PPCTargetMachine.cpp | 17 ++++- .../test/CodeGen/PowerPC/lower-globaladdr32-aix.ll | 38 ++++++++++ .../test/CodeGen/PowerPC/lower-globaladdr64-aix.ll | 38 ++++++++++ 8 files changed, 195 insertions(+), 46 deletions(-) create mode 100644 llvm/test/CodeGen/PowerPC/lower-globaladdr32-aix.ll create mode 100644 llvm/test/CodeGen/PowerPC/lower-globaladdr64-aix.ll diff --git a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp index 645a740..b0fa571 100644 --- a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -5065,44 +5065,87 @@ void PPCDAGToDAGISel::Select(SDNode *N) { return; } case PPCISD::TOC_ENTRY: { - assert ((PPCSubTarget->isPPC64() || PPCSubTarget->isSVR4ABI()) && - "Only supported for 64-bit ABI and 32-bit SVR4"); - if (PPCSubTarget->isSVR4ABI() && !PPCSubTarget->isPPC64()) { - SDValue GA = N->getOperand(0); - SDNode *MN = CurDAG->getMachineNode(PPC::LWZtoc, dl, MVT::i32, GA, - N->getOperand(1)); - transferMemOperands(N, MN); - ReplaceNode(N, MN); - return; - } + const bool isPPC64 = PPCSubTarget->isPPC64(); + const bool isELFABI = PPCSubTarget->isSVR4ABI(); + const bool isAIXABI = PPCSubTarget->isAIXABI(); + + assert(!PPCSubTarget->isDarwin() && "TOC is an ELF/XCOFF construct"); + + // PowerPC only support small, medium and large code model. + const CodeModel::Model CModel = TM.getCodeModel(); + assert((CModel != CodeModel::Tiny || CModel != CodeModel::Kernel) && + "PowerPC doesn't support tiny or kernel code models."); - // For medium and large code model, we generate two instructions as - // described below. Otherwise we allow SelectCodeCommon to handle this, + if (isAIXABI && CModel == CodeModel::Medium) + report_fatal_error("Medium code model is not supported on AIX."); + + // For 64-bit small code model, we allow SelectCodeCommon to handle this, // selecting one of LDtoc, LDtocJTI, LDtocCPT, and LDtocBA. - CodeModel::Model CModel = TM.getCodeModel(); - if (CModel != CodeModel::Medium && CModel != CodeModel::Large) + if (isPPC64 && CModel == CodeModel::Small) break; - // The first source operand is a TargetGlobalAddress or a TargetJumpTable. - // If it must be toc-referenced according to PPCSubTarget, we generate: + // Handle 32-bit small code model. + if (!isPPC64) { + // Transforms the ISD::TOC_ENTRY node to a PPCISD::LWZtoc. + auto replaceWithLWZtoc = [this, &dl](SDNode *TocEntry) { + SDValue GA = TocEntry->getOperand(0); + SDValue TocBase = TocEntry->getOperand(1); + SDNode *MN = CurDAG->getMachineNode(PPC::LWZtoc, dl, MVT::i32, GA, + TocBase); + transferMemOperands(TocEntry, MN); + ReplaceNode(TocEntry, MN); + }; + + if (isELFABI) { + assert(TM.isPositionIndependent() && + "32-bit ELF can only have TOC entries in position independent" + " code."); + // 32-bit ELF always uses a small code model toc access. + replaceWithLWZtoc(N); + return; + } + + if (isAIXABI && CModel == CodeModel::Small) { + replaceWithLWZtoc(N); + return; + } + } + + assert(CModel != CodeModel::Small && "All small code models handled."); + + assert((isPPC64 || (isAIXABI && !isPPC64)) && "We are dealing with 64-bit" + " ELF/AIX or 32-bit AIX in the following."); + + // Transforms the ISD::TOC_ENTRY node for 32-bit AIX large code model mode + // or 64-bit medium (ELF-only) or large (ELF and AIX) code model code. We + // generate two instructions as described below. The first source operand + // is a symbol reference. If it must be toc-referenced according to + // PPCSubTarget, we generate: + // [32-bit AIX] + // LWZtocL(@sym, ADDIStocHA(%r2, @sym)) + // [64-bit ELF/AIX] // LDtocL(@sym, ADDIStocHA8(%x2, @sym)) // Otherwise we generate: // ADDItocL(ADDIStocHA8(%x2, @sym), @sym) SDValue GA = N->getOperand(0); SDValue TOCbase = N->getOperand(1); - SDNode *Tmp = CurDAG->getMachineNode(PPC::ADDIStocHA8, dl, MVT::i64, - TOCbase, GA); + + EVT VT = isPPC64 ? MVT::i64 : MVT::i32; + SDNode *Tmp = CurDAG->getMachineNode( + isPPC64 ? PPC::ADDIStocHA8 : PPC::ADDIStocHA, dl, VT, TOCbase, GA); + if (PPCLowering->isAccessedAsGotIndirect(GA)) { - // If it is access as got-indirect, we need an extra LD to load + // If it is accessed as got-indirect, we need an extra LWZ/LD to load // the address. - SDNode *MN = CurDAG->getMachineNode(PPC::LDtocL, dl, MVT::i64, GA, - SDValue(Tmp, 0)); + SDNode *MN = CurDAG->getMachineNode( + isPPC64 ? PPC::LDtocL : PPC::LWZtocL, dl, VT, GA, SDValue(Tmp, 0)); + transferMemOperands(N, MN); ReplaceNode(N, MN); return; } - // Build the address relative to the TOC-pointer.. + // Build the address relative to the TOC-pointer. ReplaceNode(N, CurDAG->getMachineNode(PPC::ADDItocL, dl, MVT::i64, SDValue(Tmp, 0), GA)); return; diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index 54090c4..d963b93 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -1521,7 +1521,7 @@ bool PPC::isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, bool PPC::isVPKUDUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, SelectionDAG &DAG) { const PPCSubtarget& Subtarget = - static_cast(DAG.getSubtarget()); + static_cast(DAG.getSubtarget()); if (!Subtarget.hasP8Vector()) return false; @@ -2671,12 +2671,14 @@ static void setUsesTOCBasePtr(SelectionDAG &DAG) { setUsesTOCBasePtr(DAG.getMachineFunction()); } -static SDValue getTOCEntry(SelectionDAG &DAG, const SDLoc &dl, bool Is64Bit, - SDValue GA) { +SDValue PPCTargetLowering::getTOCEntry(SelectionDAG &DAG, const SDLoc &dl, + SDValue GA) const { + const bool Is64Bit = Subtarget.isPPC64(); EVT VT = Is64Bit ? MVT::i64 : MVT::i32; - SDValue Reg = Is64Bit ? DAG.getRegister(PPC::X2, VT) : - DAG.getNode(PPCISD::GlobalBaseReg, dl, VT); - + SDValue Reg = Is64Bit ? DAG.getRegister(PPC::X2, VT) + : Subtarget.isAIXABI() + ? DAG.getRegister(PPC::R2, VT) + : DAG.getNode(PPCISD::GlobalBaseReg, dl, VT); SDValue Ops[] = { GA, Reg }; return DAG.getMemIntrinsicNode( PPCISD::TOC_ENTRY, dl, DAG.getVTList(VT, MVT::Other), Ops, VT, @@ -2695,7 +2697,7 @@ SDValue PPCTargetLowering::LowerConstantPool(SDValue Op, if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) { setUsesTOCBasePtr(DAG); SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0); - return getTOCEntry(DAG, SDLoc(CP), true, GA); + return getTOCEntry(DAG, SDLoc(CP), GA); } unsigned MOHiFlag, MOLoFlag; @@ -2705,7 +2707,7 @@ SDValue PPCTargetLowering::LowerConstantPool(SDValue Op, if (IsPIC && Subtarget.isSVR4ABI()) { SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), PPCII::MO_PIC_FLAG); - return getTOCEntry(DAG, SDLoc(CP), false, GA); + return getTOCEntry(DAG, SDLoc(CP), GA); } SDValue CPIHi = @@ -2771,7 +2773,7 @@ SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const { if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) { setUsesTOCBasePtr(DAG); SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT); - return getTOCEntry(DAG, SDLoc(JT), true, GA); + return getTOCEntry(DAG, SDLoc(JT), GA); } unsigned MOHiFlag, MOLoFlag; @@ -2781,7 +2783,7 @@ SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const { if (IsPIC && Subtarget.isSVR4ABI()) { SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, PPCII::MO_PIC_FLAG); - return getTOCEntry(DAG, SDLoc(GA), false, GA); + return getTOCEntry(DAG, SDLoc(GA), GA); } SDValue JTIHi = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOHiFlag); @@ -2802,7 +2804,7 @@ SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op, if (Subtarget.isPPC64()) setUsesTOCBasePtr(DAG); SDValue GA = DAG.getTargetBlockAddress(BA, PtrVT, BASDN->getOffset()); - return getTOCEntry(DAG, SDLoc(BASDN), Subtarget.isPPC64(), GA); + return getTOCEntry(DAG, SDLoc(BASDN), GA); } unsigned MOHiFlag, MOLoFlag; @@ -2917,12 +2919,12 @@ SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op, SDLoc DL(GSDN); const GlobalValue *GV = GSDN->getGlobal(); - // 64-bit SVR4 ABI code is always position-independent. + // 64-bit SVR4 ABI & AIX ABI code is always position-independent. // The actual address of the GlobalValue is stored in the TOC. - if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) { + if ((Subtarget.isSVR4ABI() && Subtarget.isPPC64()) || Subtarget.isAIXABI()) { setUsesTOCBasePtr(DAG); SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset()); - return getTOCEntry(DAG, DL, true, GA); + return getTOCEntry(DAG, DL, GA); } unsigned MOHiFlag, MOLoFlag; @@ -2933,7 +2935,7 @@ SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op, SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), PPCII::MO_PIC_FLAG); - return getTOCEntry(DAG, DL, false, GA); + return getTOCEntry(DAG, DL, GA); } SDValue GAHi = @@ -14407,11 +14409,14 @@ bool PPCTargetLowering::isAccessedAsGotIndirect(SDValue GA) const { if (Subtarget.isSVR4ABI() && !Subtarget.isPPC64()) return true; + // AIX accesses everything indirectly through the TOC, which is similar to + // the GOT. + if (Subtarget.isAIXABI()) + return true; + CodeModel::Model CModel = getTargetMachine().getCodeModel(); // If it is small or large code model, module locals are accessed - // indirectly by loading their address from .toc/.got. The difference - // is that for large code model we have ADDIStocHA8 + LDtocL and for - // small code model we simply have LDtoc. + // indirectly by loading their address from .toc/.got. if (CModel == CodeModel::Small || CModel == CodeModel::Large) return true; diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h index 499f8a2..757e2d3 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.h +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h @@ -1020,6 +1020,8 @@ namespace llvm { SDValue &FPOpOut, const SDLoc &dl) const; + SDValue getTOCEntry(SelectionDAG &DAG, const SDLoc &dl, SDValue GA) const; + SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const; SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const; diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/llvm/lib/Target/PowerPC/PPCInstrInfo.td index c313337..a9b0ce2 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.td +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.td @@ -3161,7 +3161,16 @@ def ADDISdtprelHA32 : PPCEmitTimePseudo<(outs gprc:$rD), (ins gprc_nor0:$reg, s1 def LWZtoc : PPCEmitTimePseudo<(outs gprc:$rD), (ins tocentry32:$disp, gprc:$reg), "#LWZtoc", [(set i32:$rD, + (PPCtoc_entry tglobaladdr:$disp, i32:$reg))]>; +def LWZtocL : PPCEmitTimePseudo<(outs gprc:$rD), (ins tocentry32:$disp, gprc_nor0:$reg), + "#LWZtocL", + [(set i32:$rD, (PPCtoc_entry tglobaladdr:$disp, i32:$reg))]>; +def ADDIStocHA : PPCEmitTimePseudo<(outs gprc:$rD), (ins gprc_nor0:$reg, tocentry32:$disp), + "#ADDIStocHA", + [(set i32:$rD, + (PPCtoc_entry i32:$reg, tglobaladdr:$disp))]>; + // Get Global (GOT) Base Register offset, from the word immediately preceding // the function label. def UpdateGBR : PPCEmitTimePseudo<(outs gprc:$rD, gprc:$rT), (ins gprc:$rI), "#UpdateGBR", []>; diff --git a/llvm/lib/Target/PowerPC/PPCTOCRegDeps.cpp b/llvm/lib/Target/PowerPC/PPCTOCRegDeps.cpp index 3eb0569..895ae67 100644 --- a/llvm/lib/Target/PowerPC/PPCTOCRegDeps.cpp +++ b/llvm/lib/Target/PowerPC/PPCTOCRegDeps.cpp @@ -95,7 +95,8 @@ namespace { protected: bool hasTOCLoReloc(const MachineInstr &MI) { if (MI.getOpcode() == PPC::LDtocL || - MI.getOpcode() == PPC::ADDItocL) + MI.getOpcode() == PPC::ADDItocL || + MI.getOpcode() == PPC::LWZtocL) return true; for (const MachineOperand &MO : MI.operands()) { @@ -109,11 +110,15 @@ protected: bool processBlock(MachineBasicBlock &MBB) { bool Changed = false; + const bool isPPC64 = + MBB.getParent()->getSubtarget().isPPC64(); + const unsigned TOCReg = isPPC64 ? PPC::X2 : PPC::R2; + for (auto &MI : MBB) { if (!hasTOCLoReloc(MI)) continue; - MI.addOperand(MachineOperand::CreateReg(PPC::X2, + MI.addOperand(MachineOperand::CreateReg(TOCReg, false /*IsDef*/, true /*IsImp*/)); Changed = true; diff --git a/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp b/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp index 213e2f5..aa7a2c0 100644 --- a/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp +++ b/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp @@ -249,10 +249,19 @@ static CodeModel::Model getEffectivePPCCodeModel(const Triple &TT, report_fatal_error("Target does not support the kernel CodeModel", false); return *CM; } - if (!TT.isOSDarwin() && !JIT && - (TT.getArch() == Triple::ppc64 || TT.getArch() == Triple::ppc64le)) - return CodeModel::Medium; - return CodeModel::Small; + + if (JIT) + return CodeModel::Small; + if (TT.isOSAIX()) + return CodeModel::Small; + + assert(TT.isOSBinFormatELF() && "All remaining PPC OSes are ELF based."); + + if (TT.isArch32Bit()) + return CodeModel::Small; + + assert(TT.isArch64Bit() && "Unsupported PPC architecture."); + return CodeModel::Medium; } diff --git a/llvm/test/CodeGen/PowerPC/lower-globaladdr32-aix.ll b/llvm/test/CodeGen/PowerPC/lower-globaladdr32-aix.ll new file mode 100644 index 0000000..3f02d4f --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/lower-globaladdr32-aix.ll @@ -0,0 +1,38 @@ +; RUN: llc -mtriple powerpc-ibm-aix-xcoff -code-model=small \ +; RUN: -stop-after=machine-cp -print-before=simple-register-coalescing 2>&1 < \ +; RUN: %s | FileCheck --check-prefix=SMALL %s + +; RUN: not llc -mtriple powerpc-ibm-aix-xcoff -code-model=medium \ +; RUN: -stop-after=machine-cp 2>&1 < %s | FileCheck --check-prefix=MEDIUM %s + +; RUN: llc -mtriple powerpc-ibm-aix-xcoff -code-model=large \ +; RUN: -stop-after=machine-cp -print-before=simple-register-coalescing 2>&1 < \ +; RUN: %s | FileCheck --check-prefix=LARGE %s + +; RUN: llc -mtriple powerpc-ibm-aix-xcoff -stop-after=machine-cp \ +; RUN: -print-before=simple-register-coalescing 2>&1 < %s | FileCheck \ +; RUN: --check-prefix=SMALL %s + +@msg = common global i8* null, align 4 +@ptr = common global i8* null, align 4 + +define void @foo() { +entry: +; SMALL: %0:gprc_and_gprc_nor0 = LWZtoc @msg, $r2 :: (load 4 from got) +; SMALL: %1:gprc = LWZ 0, %0:gprc_and_gprc_nor0 :: (dereferenceable load 4 from @msg) +; SMALL: %2:gprc_and_gprc_nor0 = LWZtoc @ptr, $r2 :: (load 4 from got) +; SMALL: STW %1:gprc, 0, %2:gprc_and_gprc_nor0 :: (store 4 into @ptr) + +; MEDIUM: Medium code model is not supported on AIX. + +; LARGE: %0:gprc_and_gprc_nor0 = ADDIStocHA $r2, @msg +; LARGE: %1:gprc_and_gprc_nor0 = LWZtocL @msg, %0:gprc_and_gprc_nor0, implicit $r2 :: (load 4 from got) +; LARGE: %2:gprc = LWZ 0, %1:gprc_and_gprc_nor0 :: (dereferenceable load 4 from @msg) +; LARGE: %3:gprc_and_gprc_nor0 = ADDIStocHA $r2, @ptr +; LARGE: %4:gprc_and_gprc_nor0 = LWZtocL @ptr, %3:gprc_and_gprc_nor0, implicit $r2 :: (load 4 from got) +; LARGE: STW %2:gprc, 0, %4:gprc_and_gprc_nor0 :: (store 4 into @ptr) + + %0 = load i8*, i8** @msg, align 4 + store i8* %0, i8** @ptr, align 4 + ret void +} diff --git a/llvm/test/CodeGen/PowerPC/lower-globaladdr64-aix.ll b/llvm/test/CodeGen/PowerPC/lower-globaladdr64-aix.ll new file mode 100644 index 0000000..7e00a59 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/lower-globaladdr64-aix.ll @@ -0,0 +1,38 @@ +; RUN: llc -mtriple powerpc64-ibm-aix-xcoff -code-model=small \ +; RUN: -stop-after=machine-cp -print-before=simple-register-coalescing 2>&1 < \ +; RUN: %s | FileCheck --check-prefix=SMALL %s + +; RUN: not llc -mtriple powerpc64-ibm-aix-xcoff -code-model=medium \ +; RUN: -stop-after=machine-cp 2>&1 < %s | FileCheck --check-prefix=MEDIUM %s + +; RUN: llc -mtriple powerpc64-ibm-aix-xcoff -code-model=large \ +; RUN: -stop-after=machine-cp -print-before=simple-register-coalescing 2>&1 < \ +; RUN: %s | FileCheck --check-prefix=LARGE %s + +; RUN: llc -mtriple powerpc64-ibm-aix-xcoff -stop-after=machine-cp \ +; RUN: -print-before=simple-register-coalescing 2>&1 < %s | FileCheck \ +; RUN: --check-prefix=SMALL %s + +@msg = common global i8* null, align 8 +@ptr = common global i8* null, align 8 + +define void @foo() { +entry: +; SMALL: %0:g8rc_and_g8rc_nox0 = LDtoc @msg, $x2 :: (load 8 from got) +; SMALL: %1:g8rc = LD 0, %0:g8rc_and_g8rc_nox0 :: (dereferenceable load 8 from @msg) +; SMALL: %2:g8rc_and_g8rc_nox0 = LDtoc @ptr, $x2 :: (load 8 from got) +; SMALL: STD %1:g8rc, 0, %2:g8rc_and_g8rc_nox0 :: (store 8 into @ptr) + +; MEDIUM: Medium code model is not supported on AIX. + +; LARGE: %0:g8rc_and_g8rc_nox0 = ADDIStocHA8 $x2, @msg +; LARGE: %1:g8rc_and_g8rc_nox0 = LDtocL @msg, %0:g8rc_and_g8rc_nox0, implicit $x2 :: (load 8 from got) +; LARGE: %2:g8rc = LD 0, %1:g8rc_and_g8rc_nox0 :: (dereferenceable load 8 from @msg) +; LARGE: %3:g8rc_and_g8rc_nox0 = ADDIStocHA8 $x2, @ptr +; LARGE: %4:g8rc_and_g8rc_nox0 = LDtocL @ptr, %3:g8rc_and_g8rc_nox0, implicit $x2 :: (load 8 from got) +; LARGE: STD %2:g8rc, 0, %4:g8rc_and_g8rc_nox0 :: (store 8 into @ptr) + + %0 = load i8*, i8** @msg, align 8 + store i8* %0, i8** @ptr, align 8 + ret void +} -- 2.7.4