From 450afcea39edd897eed541527e5150a9063397dd Mon Sep 17 00:00:00 2001 From: Stanislav Mekhanoshin Date: Tue, 30 Jul 2019 19:29:33 +0000 Subject: [PATCH] [AMDGPU] Reserve all AGPRs on targets which do not have them Differential Revision: https://reviews.llvm.org/D65471 llvm-svn: 367347 --- llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 2 ++ llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp | 8 ++++++++ llvm/test/CodeGen/AMDGPU/spill-vgpr-to-agpr.ll | 20 +++++++++----------- 3 files changed, 19 insertions(+), 11 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 2a69b2e..fa0dc77 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -10464,6 +10464,8 @@ SITargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, } break; case 'a': + if (!Subtarget->hasMAIInsts()) + break; switch (VT.getSizeInBits()) { default: return std::make_pair(0U, nullptr); diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp index 652569d..989d706 100644 --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -220,6 +220,14 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const { reserveRegisterTuples(Reserved, Reg); } + // Reserve all the rest AGPRs if there are no instructions to use it. + if (!ST.hasMAIInsts()) { + for (unsigned i = 0; i < MaxNumVGPRs; ++i) { + unsigned Reg = AMDGPU::AGPR_32RegClass.getRegister(i); + reserveRegisterTuples(Reserved, Reg); + } + } + const SIMachineFunctionInfo *MFI = MF.getInfo(); unsigned ScratchWaveOffsetReg = MFI->getScratchWaveOffsetReg(); diff --git a/llvm/test/CodeGen/AMDGPU/spill-vgpr-to-agpr.ll b/llvm/test/CodeGen/AMDGPU/spill-vgpr-to-agpr.ll index 6eef782..711166c 100644 --- a/llvm/test/CodeGen/AMDGPU/spill-vgpr-to-agpr.ll +++ b/llvm/test/CodeGen/AMDGPU/spill-vgpr-to-agpr.ll @@ -1,5 +1,5 @@ ; RUN: llc -march=amdgcn -mcpu=gfx908 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX908 %s -; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX900 %s +; RUN: not llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefixes=GCN,GFX900 %s ; GCN-LABEL: {{^}}max_10_vgprs: ; GFX900-DAG: s_mov_b32 s{{[0-9]+}}, SCRATCH_RSRC_DWORD0 @@ -57,23 +57,21 @@ define amdgpu_kernel void @max_10_vgprs(i32 addrspace(1)* %p) #0 { } ; GCN-LABEL: {{^}}max_10_vgprs_used_9a: -; GCN-DAG: s_mov_b32 s{{[0-9]+}}, SCRATCH_RSRC_DWORD0 -; GCN-DAG: s_mov_b32 s{{[0-9]+}}, SCRATCH_RSRC_DWORD1 +; GFX908-DAG: s_mov_b32 s{{[0-9]+}}, SCRATCH_RSRC_DWORD0 +; GFX908-DAG: s_mov_b32 s{{[0-9]+}}, SCRATCH_RSRC_DWORD1 ; GFX908: v_accvgpr_write_b32 a9, v{{[0-9]}} -; GCN: buffer_store_dword v{{[0-9]}}, -; GFX900: buffer_store_dword v{{[0-9]}}, -; GFX900: buffer_load_dword v{{[0-9]}}, -; GFX900: buffer_load_dword v{{[0-9]}}, +; GFX908: buffer_store_dword v{{[0-9]}}, ; GFX908-NOT: buffer_ ; GFX908: v_accvgpr_read_b32 v{{[0-9]}}, a9 ; GFX908: buffer_load_dword v{{[0-9]}}, ; GFX908-NOT: buffer_ -; GCN: NumVgprs: 10 -; GFX900: ScratchSize: 12 +; GFX900: couldn't allocate input reg for constraint 'a' + +; GFX908: NumVgprs: 10 ; GFX908: ScratchSize: 8 -; GCN: VGPRBlocks: 2 -; GCN: NumVGPRsForWavesPerEU: 10 +; GFX908: VGPRBlocks: 2 +; GFX908: NumVGPRsForWavesPerEU: 10 define amdgpu_kernel void @max_10_vgprs_used_9a(i32 addrspace(1)* %p) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() call void asm sideeffect "", "a,a,a,a,a,a,a,a,a"(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9) -- 2.7.4