From cb9ae93712464858c8deaf18dea25d41a9d5212a Mon Sep 17 00:00:00 2001 From: Stanislav Mekhanoshin Date: Fri, 10 Jun 2022 10:58:39 -0700 Subject: [PATCH] [AMDGPU] Define SGPR_NULL64 register. NFCI. On gfx10+ null register can be used as both 32 and 64 bit operand. Define a 64 bit version of the register to use during codegen. Differential Revision: https://reviews.llvm.org/D127527 --- llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp | 1 + llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 2 +- llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp | 3 ++- llvm/lib/Target/AMDGPU/SIRegisterInfo.td | 18 +++++++++++++----- llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp | 7 +++++++ 5 files changed, 24 insertions(+), 7 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp b/llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp index a17601f..f7f93c7 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp @@ -249,6 +249,7 @@ AMDGPUResourceUsageAnalysis::analyzeResourceUsage( case AMDGPU::SRC_PRIVATE_BASE: case AMDGPU::SRC_PRIVATE_LIMIT: case AMDGPU::SGPR_NULL: + case AMDGPU::SGPR_NULL64: case AMDGPU::MODE: continue; diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index 4f081c5..692e377 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -3897,7 +3897,7 @@ bool SIInstrInfo::usesConstantBus(const MachineRegisterInfo &MRI, return RI.isSGPRClass(MRI.getRegClass(MO.getReg())); // Null is free - if (MO.getReg() == AMDGPU::SGPR_NULL) + if (MO.getReg() == AMDGPU::SGPR_NULL || MO.getReg() == AMDGPU::SGPR_NULL64) return false; // SGPRs use the constant bus diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp index 39f6ce1..ad1455e 100644 --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -591,7 +591,7 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const { reserveRegisterTuples(Reserved, AMDGPU::TTMP14_TTMP15); // Reserve null register - it shall never be allocated - reserveRegisterTuples(Reserved, AMDGPU::SGPR_NULL); + reserveRegisterTuples(Reserved, AMDGPU::SGPR_NULL64); // Disallow vcc_hi allocation in wave32. It may be allocated but most likely // will result in bugs. @@ -3063,6 +3063,7 @@ SIRegisterInfo::getProperlyAlignedRC(const TargetRegisterClass *RC) const { bool SIRegisterInfo::isConstantPhysReg(MCRegister PhysReg) const { switch (PhysReg) { case AMDGPU::SGPR_NULL: + case AMDGPU::SGPR_NULL64: case AMDGPU::SRC_SHARED_BASE: case AMDGPU::SRC_PRIVATE_BASE: case AMDGPU::SRC_SHARED_LIMIT: diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td index 4188b49..9b03a31 100644 --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td @@ -220,6 +220,14 @@ defm M0 : SIRegLoHi16 <"m0", 0>; defm SGPR_NULL_gfxpre11 : SIRegLoHi16 <"null", 125>; defm SGPR_NULL_gfx11plus : SIRegLoHi16 <"null", 124>; defm SGPR_NULL : SIRegLoHi16 <"null", 0>; +defm SGPR_NULL_HI : SIRegLoHi16 <"", 0>; + +def SGPR_NULL64 : + RegisterWithSubRegs<"null", [SGPR_NULL, SGPR_NULL_HI]> { + let Namespace = "AMDGPU"; + let SubRegIndices = [sub0, sub1]; + let HWEncoding = SGPR_NULL.HWEncoding; +} defm SRC_SHARED_BASE : SIRegLoHi16<"src_shared_base", 235>; defm SRC_SHARED_LIMIT : SIRegLoHi16<"src_shared_limit", 236>; @@ -642,16 +650,16 @@ let GeneratePressureSet = 0, HasSGPR = 1 in { // See comments in SIInstructions.td for more info. def SReg_32_XM0_XEXEC : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16, i1], 32, (add SGPR_32, VCC_LO, VCC_HI, FLAT_SCR_LO, FLAT_SCR_HI, XNACK_MASK_LO, XNACK_MASK_HI, - SGPR_NULL, TTMP_32, TMA_LO, TMA_HI, TBA_LO, TBA_HI, SRC_SHARED_BASE, SRC_SHARED_LIMIT, - SRC_PRIVATE_BASE, SRC_PRIVATE_LIMIT, SRC_POPS_EXITING_WAVE_ID, + SGPR_NULL, SGPR_NULL_HI, TTMP_32, TMA_LO, TMA_HI, TBA_LO, TBA_HI, SRC_SHARED_BASE, + SRC_SHARED_LIMIT, SRC_PRIVATE_BASE, SRC_PRIVATE_LIMIT, SRC_POPS_EXITING_WAVE_ID, SRC_VCCZ, SRC_EXECZ, SRC_SCC)> { let AllocationPriority = 10; } def SReg_LO16_XM0_XEXEC : SIRegisterClass<"AMDGPU", [i16, f16], 16, (add SGPR_LO16, VCC_LO_LO16, VCC_HI_LO16, FLAT_SCR_LO_LO16, FLAT_SCR_HI_LO16, - XNACK_MASK_LO_LO16, XNACK_MASK_HI_LO16, SGPR_NULL_LO16, TTMP_LO16, TMA_LO_LO16, - TMA_HI_LO16, TBA_LO_LO16, TBA_HI_LO16, SRC_SHARED_BASE_LO16, + XNACK_MASK_LO_LO16, XNACK_MASK_HI_LO16, SGPR_NULL_LO16, SGPR_NULL_HI_LO16, TTMP_LO16, + TMA_LO_LO16, TMA_HI_LO16, TBA_LO_LO16, TBA_HI_LO16, SRC_SHARED_BASE_LO16, SRC_SHARED_LIMIT_LO16, SRC_PRIVATE_BASE_LO16, SRC_PRIVATE_LIMIT_LO16, SRC_POPS_EXITING_WAVE_ID_LO16, SRC_VCCZ_LO16, SRC_EXECZ_LO16, SRC_SCC_LO16)> { let Size = 16; @@ -715,7 +723,7 @@ def TTMP_64 : SIRegisterClass<"AMDGPU", [v2i32, i64, f64, v4i16, v4f16], 32, } def SReg_64_XEXEC : SIRegisterClass<"AMDGPU", [v2i32, i64, v2f32, f64, i1, v4i16, v4f16], 32, - (add SGPR_64, VCC, FLAT_SCR, XNACK_MASK, TTMP_64, TBA, TMA)> { + (add SGPR_64, VCC, FLAT_SCR, XNACK_MASK, SGPR_NULL64, TTMP_64, TBA, TMA)> { let CopyCost = 1; let AllocationPriority = 13; let HasSGPR = 1; diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp index fab5376..345d89e 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -1812,6 +1812,7 @@ bool isSGPR(unsigned Reg, const MCRegisterInfo* TRI) { CASE_VI_GFX9PLUS(TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15) \ CASE_GFXPRE11_GFX11PLUS(M0) \ CASE_GFXPRE11_GFX11PLUS(SGPR_NULL) \ + CASE_GFXPRE11_GFX11PLUS_TO(SGPR_NULL64, SGPR_NULL) \ } #define CASE_CI_VI(node) \ @@ -1824,6 +1825,9 @@ bool isSGPR(unsigned Reg, const MCRegisterInfo* TRI) { #define CASE_GFXPRE11_GFX11PLUS(node) \ case node: return isGFX11Plus(STI) ? node##_gfx11plus : node##_gfxpre11; +#define CASE_GFXPRE11_GFX11PLUS_TO(node, result) \ + case node: return isGFX11Plus(STI) ? result##_gfx11plus : result##_gfxpre11; + unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI) { if (STI.getTargetTriple().getArch() == Triple::r600) return Reg; @@ -1833,10 +1837,12 @@ unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI) { #undef CASE_CI_VI #undef CASE_VI_GFX9PLUS #undef CASE_GFXPRE11_GFX11PLUS +#undef CASE_GFXPRE11_GFX11PLUS_TO #define CASE_CI_VI(node) case node##_ci: case node##_vi: return node; #define CASE_VI_GFX9PLUS(node) case node##_vi: case node##_gfx9plus: return node; #define CASE_GFXPRE11_GFX11PLUS(node) case node##_gfx11plus: case node##_gfxpre11: return node; +#define CASE_GFXPRE11_GFX11PLUS_TO(node, result) unsigned mc2PseudoReg(unsigned Reg) { MAP_REG2REG @@ -1845,6 +1851,7 @@ unsigned mc2PseudoReg(unsigned Reg) { #undef CASE_CI_VI #undef CASE_VI_GFX9PLUS #undef CASE_GFXPRE11_GFX11PLUS +#undef CASE_GFXPRE11_GFX11PLUS_TO #undef MAP_REG2REG bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo) { -- 2.7.4