From 367248956e93982a73c0441868a562aeb85af5a0 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Thu, 10 Sep 2020 12:11:53 -0400 Subject: [PATCH] AMDGPU: Clear offset register when using local stack area eliminateFrameIndex won't fix up the offset register when the direct frame index reference is moved to a separate move instruction. Switch the offset to a base 0 (which it probably should be to begin with). --- llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp | 10 ++++++++-- .../CodeGen/AMDGPU/local-stack-alloc-block-sp-reference.ll | 8 ++++---- .../CodeGen/AMDGPU/stack-pointer-offset-relative-frameindex.ll | 5 +++-- 3 files changed, 15 insertions(+), 8 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp index 8a989998..c3ffd5b 100644 --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -503,8 +503,10 @@ void SIRegisterInfo::resolveFrameIndex(MachineInstr &MI, Register BaseReg, #endif assert(FIOp && FIOp->isFI() && "frame index must be address operand"); assert(TII->isMUBUF(MI)); - assert(TII->getNamedOperand(MI, AMDGPU::OpName::soffset)->getReg() == - MF->getInfo()->getStackPtrOffsetReg() && + + MachineOperand *SOffset = TII->getNamedOperand(MI, AMDGPU::OpName::soffset); + assert(SOffset->getReg() == + MF->getInfo()->getStackPtrOffsetReg() && "should only be seeing stack pointer offset relative FrameIndex"); MachineOperand *OffsetOp = TII->getNamedOperand(MI, AMDGPU::OpName::offset); @@ -513,6 +515,10 @@ void SIRegisterInfo::resolveFrameIndex(MachineInstr &MI, Register BaseReg, FIOp->ChangeToRegister(BaseReg, false); OffsetOp->setImm(NewOffset); + + // The move materializing the base address will be an absolute stack address, + // so clear the base offset. + SOffset->ChangeToImmediate(0); } bool SIRegisterInfo::isFrameOffsetLegal(const MachineInstr *MI, diff --git a/llvm/test/CodeGen/AMDGPU/local-stack-alloc-block-sp-reference.ll b/llvm/test/CodeGen/AMDGPU/local-stack-alloc-block-sp-reference.ll index a97b5dab..f390fad 100644 --- a/llvm/test/CodeGen/AMDGPU/local-stack-alloc-block-sp-reference.ll +++ b/llvm/test/CodeGen/AMDGPU/local-stack-alloc-block-sp-reference.ll @@ -41,8 +41,8 @@ define amdgpu_kernel void @local_stack_offset_uses_sp(i64 addrspace(1)* %out, i8 ; GCN-NEXT: v_add_u32_e32 v1, 0x20d0, v1 ; GCN-NEXT: buffer_load_dword v2, v1, s[0:3], 0 offen ; GCN-NEXT: buffer_load_dword v1, v1, s[0:3], 0 offen offset:4 -; GCN-NEXT: buffer_load_dword v3, v0, s[0:3], s32 offen -; GCN-NEXT: buffer_load_dword v4, v0, s[0:3], s32 offen offset:4 +; GCN-NEXT: buffer_load_dword v3, v0, s[0:3], 0 offen +; GCN-NEXT: buffer_load_dword v4, v0, s[0:3], 0 offen offset:4 ; GCN-NEXT: s_waitcnt vmcnt(1) ; GCN-NEXT: v_add_co_u32_e32 v0, vcc, v2, v3 ; GCN-NEXT: s_waitcnt lgkmcnt(0) @@ -94,8 +94,8 @@ define void @func_local_stack_offset_uses_sp(i64 addrspace(1)* %out, i8 addrspac ; GCN-NEXT: v_add_u32_e32 v3, 0x20d0, v3 ; GCN-NEXT: buffer_load_dword v4, v3, s[0:3], 0 offen ; GCN-NEXT: buffer_load_dword v3, v3, s[0:3], 0 offen offset:4 -; GCN-NEXT: buffer_load_dword v5, v2, s[0:3], s32 offen -; GCN-NEXT: buffer_load_dword v6, v2, s[0:3], s32 offen offset:4 +; GCN-NEXT: buffer_load_dword v5, v2, s[0:3], 0 offen +; GCN-NEXT: buffer_load_dword v6, v2, s[0:3], 0 offen offset:4 ; GCN-NEXT: s_sub_u32 s32, s32, 0x180000 ; GCN-NEXT: s_mov_b32 s33, s5 ; GCN-NEXT: s_waitcnt vmcnt(1) diff --git a/llvm/test/CodeGen/AMDGPU/stack-pointer-offset-relative-frameindex.ll b/llvm/test/CodeGen/AMDGPU/stack-pointer-offset-relative-frameindex.ll index e2d64c1..78e1402 100644 --- a/llvm/test/CodeGen/AMDGPU/stack-pointer-offset-relative-frameindex.ll +++ b/llvm/test/CodeGen/AMDGPU/stack-pointer-offset-relative-frameindex.ll @@ -41,8 +41,9 @@ define amdgpu_kernel void @kernel_background_evaluate(float addrspace(5)* %kg, < ; GCN-NEXT: s_and_saveexec_b32 s0, vcc_lo ; GCN-NEXT: s_cbranch_execz BB0_2 ; GCN-NEXT: ; %bb.1: ; %if.then4.i -; GCN-NEXT: buffer_load_dword v0, v40, s[36:39], s32 offen -; GCN-NEXT: buffer_load_dword v1, v40, s[36:39], s32 offen offset:4 +; GCN-NEXT: s_clause 0x1 +; GCN-NEXT: buffer_load_dword v0, v40, s[36:39], 0 offen +; GCN-NEXT: buffer_load_dword v1, v40, s[36:39], 0 offen offset:4 ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: v_add_nc_u32_e32 v0, v1, v0 ; GCN-NEXT: v_mul_lo_u32 v0, 0x41c64e6d, v0 -- 2.7.4