From 794c8c0f783eb95a43f0fca5d1d75b60eb4d237c Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Tue, 2 Dec 2014 17:05:41 +0000 Subject: [PATCH] R600/SI: Set the ATC bit on all resource descriptors for the HSA runtime llvm-svn: 223125 --- llvm/lib/Target/R600/AMDGPUISelDAGToDAG.cpp | 4 +++- llvm/lib/Target/R600/AMDGPUSubtarget.cpp | 3 ++- llvm/lib/Target/R600/AMDGPUSubtarget.h | 4 ++++ llvm/lib/Target/R600/SIISelLowering.cpp | 10 +++++++--- llvm/lib/Target/R600/SIInstrInfo.cpp | 18 ++++++++++++++---- llvm/lib/Target/R600/SIInstrInfo.h | 3 +++ llvm/test/CodeGen/R600/hsa.ll | 12 ++++++++++++ 7 files changed, 45 insertions(+), 9 deletions(-) create mode 100644 llvm/test/CodeGen/R600/hsa.ll diff --git a/llvm/lib/Target/R600/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/R600/AMDGPUISelDAGToDAG.cpp index 90b6672..ad5a541 100644 --- a/llvm/lib/Target/R600/AMDGPUISelDAGToDAG.cpp +++ b/llvm/lib/Target/R600/AMDGPUISelDAGToDAG.cpp @@ -1012,6 +1012,8 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &GLC, SDValue &SLC, SDValue &TFE) const { SDValue Ptr, VAddr, Offen, Idxen, Addr64; + const SIInstrInfo *TII = + static_cast(Subtarget.getInstrInfo()); SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64, GLC, SLC, TFE); @@ -1019,7 +1021,7 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, if (!cast(Offen)->getSExtValue() && !cast(Idxen)->getSExtValue() && !cast(Addr64)->getSExtValue()) { - uint64_t Rsrc = AMDGPU::RSRC_DATA_FORMAT | + uint64_t Rsrc = TII->getDefaultRsrcDataFormat() | APInt::getAllOnesValue(32).getZExtValue(); // Size SDLoc DL(Addr); diff --git a/llvm/lib/Target/R600/AMDGPUSubtarget.cpp b/llvm/lib/Target/R600/AMDGPUSubtarget.cpp index 9d09a19..0d693c8 100644 --- a/llvm/lib/Target/R600/AMDGPUSubtarget.cpp +++ b/llvm/lib/Target/R600/AMDGPUSubtarget.cpp @@ -84,7 +84,8 @@ AMDGPUSubtarget::AMDGPUSubtarget(StringRef TT, StringRef GPU, StringRef FS, FrameLowering(TargetFrameLowering::StackGrowsUp, 64 * 16, // Maximum stack alignment (long16) 0), - InstrItins(getInstrItineraryForCPU(GPU)) { + InstrItins(getInstrItineraryForCPU(GPU)), + TargetTriple(TT) { if (getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) { InstrInfo.reset(new R600InstrInfo(*this)); TLInfo.reset(new R600TargetLowering(TM)); diff --git a/llvm/lib/Target/R600/AMDGPUSubtarget.h b/llvm/lib/Target/R600/AMDGPUSubtarget.h index f71d80a..79adf55 100644 --- a/llvm/lib/Target/R600/AMDGPUSubtarget.h +++ b/llvm/lib/Target/R600/AMDGPUSubtarget.h @@ -68,6 +68,7 @@ private: std::unique_ptr TLInfo; std::unique_ptr InstrInfo; InstrItineraryData InstrItins; + Triple TargetTriple; public: AMDGPUSubtarget(StringRef TT, StringRef CPU, StringRef FS, TargetMachine &TM); @@ -217,6 +218,9 @@ public: bool r600ALUEncoding() const { return R600ALUInst; } + bool isAmdHsaOS() const { + return TargetTriple.getOS() == Triple::AMDHSA; + } }; } // End namespace llvm diff --git a/llvm/lib/Target/R600/SIISelLowering.cpp b/llvm/lib/Target/R600/SIISelLowering.cpp index adc0178..2733e09 100644 --- a/llvm/lib/Target/R600/SIISelLowering.cpp +++ b/llvm/lib/Target/R600/SIISelLowering.cpp @@ -2030,6 +2030,8 @@ static SDValue buildSMovImm32(SelectionDAG &DAG, SDLoc DL, uint64_t Val) { MachineSDNode *SITargetLowering::wrapAddr64Rsrc(SelectionDAG &DAG, SDLoc DL, SDValue Ptr) const { + const SIInstrInfo *TII = static_cast( + getTargetMachine().getSubtargetImpl()->getInstrInfo()); #if 1 // XXX - Workaround for moveToVALU not handling different register class // inserts for REG_SEQUENCE. @@ -2039,7 +2041,7 @@ MachineSDNode *SITargetLowering::wrapAddr64Rsrc(SelectionDAG &DAG, DAG.getTargetConstant(AMDGPU::SGPR_64RegClassID, MVT::i32), buildSMovImm32(DAG, DL, 0), DAG.getTargetConstant(AMDGPU::sub0, MVT::i32), - buildSMovImm32(DAG, DL, AMDGPU::RSRC_DATA_FORMAT >> 32), + buildSMovImm32(DAG, DL, TII->getDefaultRsrcDataFormat() >> 32), DAG.getTargetConstant(AMDGPU::sub1, MVT::i32) }; @@ -2063,7 +2065,7 @@ MachineSDNode *SITargetLowering::wrapAddr64Rsrc(SelectionDAG &DAG, DAG.getTargetConstant(AMDGPU::sub0_sub1, MVT::i32), buildSMovImm32(DAG, DL, 0), DAG.getTargetConstant(AMDGPU::sub2, MVT::i32), - buildSMovImm32(DAG, DL, AMDGPU::RSRC_DATA_FORMAT >> 32), + buildSMovImm32(DAG, DL, TII->getDefaultRsrcFormat() >> 32), DAG.getTargetConstant(AMDGPU::sub3, MVT::i32) }; @@ -2110,7 +2112,9 @@ MachineSDNode *SITargetLowering::buildRSRC(SelectionDAG &DAG, MachineSDNode *SITargetLowering::buildScratchRSRC(SelectionDAG &DAG, SDLoc DL, SDValue Ptr) const { - uint64_t Rsrc = AMDGPU::RSRC_DATA_FORMAT | AMDGPU::RSRC_TID_ENABLE | + const SIInstrInfo *TII = static_cast( + getTargetMachine().getSubtargetImpl()->getInstrInfo()); + uint64_t Rsrc = TII->getDefaultRsrcDataFormat() | AMDGPU::RSRC_TID_ENABLE | 0xffffffff; // Size return buildRSRC(DAG, DL, Ptr, 0, Rsrc); diff --git a/llvm/lib/Target/R600/SIInstrInfo.cpp b/llvm/lib/Target/R600/SIInstrInfo.cpp index 21aadea..931f351 100644 --- a/llvm/lib/Target/R600/SIInstrInfo.cpp +++ b/llvm/lib/Target/R600/SIInstrInfo.cpp @@ -1580,6 +1580,7 @@ void SIInstrInfo::legalizeOperands(MachineInstr *MI) const { unsigned SRsrcFormatLo = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass); unsigned SRsrcFormatHi = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass); unsigned NewSRsrc = MRI.createVirtualRegister(&AMDGPU::SReg_128RegClass); + uint64_t RsrcDataFormat = getDefaultRsrcDataFormat(); // Zero64 = 0 BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B64), @@ -1589,12 +1590,12 @@ void SIInstrInfo::legalizeOperands(MachineInstr *MI) const { // SRsrcFormatLo = RSRC_DATA_FORMAT{31-0} BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32), SRsrcFormatLo) - .addImm(AMDGPU::RSRC_DATA_FORMAT & 0xFFFFFFFF); + .addImm(RsrcDataFormat & 0xFFFFFFFF); // SRsrcFormatHi = RSRC_DATA_FORMAT{63-32} BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32), SRsrcFormatHi) - .addImm(AMDGPU::RSRC_DATA_FORMAT >> 32); + .addImm(RsrcDataFormat >> 32); // NewSRsrc = {Zero64, SRsrcFormat} BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::REG_SEQUENCE), @@ -1798,13 +1799,14 @@ void SIInstrInfo::moveSMRDToVALU(MachineInstr *MI, MachineRegisterInfo &MRI) con unsigned DWord1 = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass); unsigned DWord2 = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass); unsigned DWord3 = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass); + uint64_t RsrcDataFormat = getDefaultRsrcDataFormat(); BuildMI(*MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32), DWord1) .addImm(0); BuildMI(*MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32), DWord2) - .addImm(AMDGPU::RSRC_DATA_FORMAT & 0xFFFFFFFF); + .addImm(RsrcDataFormat & 0xFFFFFFFF); BuildMI(*MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32), DWord3) - .addImm(AMDGPU::RSRC_DATA_FORMAT >> 32); + .addImm(RsrcDataFormat >> 32); BuildMI(*MBB, MI, MI->getDebugLoc(), get(AMDGPU::REG_SEQUENCE), SRsrc) .addReg(DWord0) .addImm(AMDGPU::sub0) @@ -2413,3 +2415,11 @@ MachineOperand *SIInstrInfo::getNamedOperand(MachineInstr &MI, return &MI.getOperand(Idx); } + +uint64_t SIInstrInfo::getDefaultRsrcDataFormat() const { + uint64_t RsrcDataFormat = AMDGPU::RSRC_DATA_FORMAT; + if (ST.isAmdHsaOS()) + RsrcDataFormat |= (1ULL << 56); + + return RsrcDataFormat; +} diff --git a/llvm/lib/Target/R600/SIInstrInfo.h b/llvm/lib/Target/R600/SIInstrInfo.h index 5295606..32881c7 100644 --- a/llvm/lib/Target/R600/SIInstrInfo.h +++ b/llvm/lib/Target/R600/SIInstrInfo.h @@ -308,6 +308,9 @@ public: unsigned OpName) const { return getNamedOperand(const_cast(MI), OpName); } + + uint64_t getDefaultRsrcDataFormat() const; + }; namespace AMDGPU { diff --git a/llvm/test/CodeGen/R600/hsa.ll b/llvm/test/CodeGen/R600/hsa.ll new file mode 100644 index 0000000..2e79866 --- /dev/null +++ b/llvm/test/CodeGen/R600/hsa.ll @@ -0,0 +1,12 @@ +; RUN: llc < %s -mtriple=r600--amdhsa -mcpu=kaveri | FileCheck --check-prefix=HSA %s + +; HSA: {{^}}simple: +; Make sure we are setting the ATC bit: +; HSA: s_mov_b32 s[[HI:[0-9]]], 0x100f000 +; HSA: buffer_store_dword v{{[0-9]+}}, s[0:[[HI]]], 0 + +define void @simple(i32 addrspace(1)* %out) { +entry: + store i32 0, i32 addrspace(1)* %out + ret void +} -- 2.7.4