From ad4a18251a37a244e3d4375538abfc0894a1e6ac Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Mon, 14 Jun 2021 12:53:36 -0400 Subject: [PATCH] AMDGPU: Fix assert on m0_lo16/m0_hi16 These get added (redundantly) to the bundle expanded for indirect register accesses. We hit this path only when there is a call in the function. --- llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp | 2 ++ llvm/test/CodeGen/AMDGPU/indirect-addressing-si-gfx9.ll | 17 +++++++++++++++++ 2 files changed, 19 insertions(+) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp index d3ac254..8f69e20 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp @@ -833,6 +833,8 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage( case AMDGPU::EXEC_HI: case AMDGPU::SCC: case AMDGPU::M0: + case AMDGPU::M0_LO16: + case AMDGPU::M0_HI16: case AMDGPU::SRC_SHARED_BASE: case AMDGPU::SRC_SHARED_LIMIT: case AMDGPU::SRC_PRIVATE_BASE: diff --git a/llvm/test/CodeGen/AMDGPU/indirect-addressing-si-gfx9.ll b/llvm/test/CodeGen/AMDGPU/indirect-addressing-si-gfx9.ll index dbe904a..730bcb4 100644 --- a/llvm/test/CodeGen/AMDGPU/indirect-addressing-si-gfx9.ll +++ b/llvm/test/CodeGen/AMDGPU/indirect-addressing-si-gfx9.ll @@ -66,6 +66,23 @@ entry: ret void } +declare hidden void @foo() + +; For functions with calls, we were not accounting for m0_lo16/m0_hi16 +; uses on the BUNDLE created when expanding the insert register pseudo. +; GCN-LABEL: {{^}}insertelement_with_call: +; GCN: s_set_gpr_idx_on s{{[0-9]+}}, gpr_idx(DST) +; GCN-NEXT: v_mov_b32_e32 {{v[0-9]+}}, 8 +; GCN-NEXT: s_set_gpr_idx_off +; GCN: s_swappc_b64 +define amdgpu_kernel void @insertelement_with_call(<16 x i32> addrspace(1)* %ptr, i32 %idx) #0 { + %vec = load <16 x i32>, <16 x i32> addrspace(1)* %ptr + %i6 = insertelement <16 x i32> %vec, i32 8, i32 %idx + call void @foo() + store <16 x i32> %i6, <16 x i32> addrspace(1)* null + ret void +} + declare i32 @llvm.amdgcn.workitem.id.x() #1 declare void @llvm.amdgcn.s.barrier() #2 -- 2.7.4