From 5a061041ec4c8e16ba8d8321353d3a35ca77858f Mon Sep 17 00:00:00 2001 From: Christudasan Devadasan Date: Fri, 9 Oct 2020 16:50:24 +0530 Subject: [PATCH] [AMDGPU] Avoid offset register in MUBUF for direct stack object accesses We use an absolute address for stack objects and it would be necessary to have a constant 0 for soffset field. Fixes: SWDEV-228562 Reviewed By: arsenm Differential Revision: https://reviews.llvm.org/D89234 --- llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp | 20 ++++++-------------- llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp | 13 ++++++------- llvm/test/CodeGen/AMDGPU/call-argument-types.ll | 8 ++++---- .../CodeGen/AMDGPU/callee-special-input-vgprs.ll | 8 +++----- 4 files changed, 19 insertions(+), 30 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp index 8915ef8..8993d41 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp @@ -1468,22 +1468,14 @@ static bool isStackPtrRelative(const MachinePointerInfo &PtrInfo) { std::pair AMDGPUDAGToDAGISel::foldFrameIndex(SDValue N) const { SDLoc DL(N); - const MachineFunction &MF = CurDAG->getMachineFunction(); - const SIMachineFunctionInfo *Info = MF.getInfo(); - if (auto FI = dyn_cast(N)) { - SDValue TFI = CurDAG->getTargetFrameIndex(FI->getIndex(), - FI->getValueType(0)); - - // If we can resolve this to a frame index access, this will be relative to - // either the stack or frame pointer SGPR. - return std::make_pair( - TFI, CurDAG->getRegister(Info->getStackPtrOffsetReg(), MVT::i32)); - } + auto *FI = dyn_cast(N); + SDValue TFI = + FI ? CurDAG->getTargetFrameIndex(FI->getIndex(), FI->getValueType(0)) : N; - // If we don't know this private access is a local stack object, it needs to - // be relative to the entry point's scratch wave offset. - return std::make_pair(N, CurDAG->getTargetConstant(0, DL, MVT::i32)); + // We rebase the base address into an absolute stack address and hence + // use constant 0 for soffset. + return std::make_pair(TFI, CurDAG->getTargetConstant(0, DL, MVT::i32)); } bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffen(SDNode *Parent, diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp index a4e9444..e064197 100644 --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -478,9 +478,10 @@ void SIRegisterInfo::resolveFrameIndex(MachineInstr &MI, Register BaseReg, assert(TII->isMUBUF(MI)); MachineOperand *SOffset = TII->getNamedOperand(MI, AMDGPU::OpName::soffset); - assert(SOffset->getReg() == - MF->getInfo()->getStackPtrOffsetReg() && - "should only be seeing stack pointer offset relative FrameIndex"); + assert((SOffset->isReg() && + SOffset->getReg() == + MF->getInfo()->getStackPtrOffsetReg()) || + (SOffset->isImm() && SOffset->getImm() == 0)); MachineOperand *OffsetOp = TII->getNamedOperand(MI, AMDGPU::OpName::offset); int64_t NewOffset = OffsetOp->getImm() + Offset; @@ -489,10 +490,6 @@ void SIRegisterInfo::resolveFrameIndex(MachineInstr &MI, Register BaseReg, FIOp->ChangeToRegister(BaseReg, false); OffsetOp->setImm(NewOffset); - - // The move materializing the base address will be an absolute stack address, - // so clear the base offset. - SOffset->ChangeToImmediate(0); } bool SIRegisterInfo::isFrameOffsetLegal(const MachineInstr *MI, @@ -1455,6 +1452,8 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI, } else { SOffset.setReg(FrameReg); } + } else if (SOffset.isImm() && FrameReg != AMDGPU::NoRegister) { + SOffset.ChangeToRegister(FrameReg, false); } int64_t Offset = FrameInfo.getObjectOffset(Index); diff --git a/llvm/test/CodeGen/AMDGPU/call-argument-types.ll b/llvm/test/CodeGen/AMDGPU/call-argument-types.ll index fb1cd3b..20d398c 100644 --- a/llvm/test/CodeGen/AMDGPU/call-argument-types.ll +++ b/llvm/test/CodeGen/AMDGPU/call-argument-types.ll @@ -744,13 +744,13 @@ entry: ; GCN-LABEL: {{^}}tail_call_byval_align16: ; GCN-NOT: s32 -; GCN: buffer_load_dword v32, off, s[0:3], s32 offset:12 -; GCN: buffer_load_dword v33, off, s[0:3], s32 offset:8 +; GCN: buffer_load_dword [[VREG1:v[0-9]+]], off, s[0:3], s32 offset:8 +; GCN: buffer_load_dword [[VREG2:v[0-9]+]], off, s[0:3], s32 offset:12 ; GCN: s_getpc_b64 -; GCN: buffer_store_dword v32, off, s[0:3], s32 offset:4 -; GCN: buffer_store_dword v33, off, s[0:3], s32{{$}} +; GCN: buffer_store_dword [[VREG2]], off, s[0:3], s32 offset:4 +; GCN: buffer_store_dword [[VREG1]], off, s[0:3], s32{{$}} ; GCN-NOT: s32 ; GCN: s_setpc_b64 define void @tail_call_byval_align16(<32 x i32> %val, double %tmp) #0 { diff --git a/llvm/test/CodeGen/AMDGPU/callee-special-input-vgprs.ll b/llvm/test/CodeGen/AMDGPU/callee-special-input-vgprs.ll index 9bbcc23..18fe484 100644 --- a/llvm/test/CodeGen/AMDGPU/callee-special-input-vgprs.ll +++ b/llvm/test/CodeGen/AMDGPU/callee-special-input-vgprs.ll @@ -611,12 +611,10 @@ define void @too_many_args_use_workitem_id_x_byval( ; GCN-LABEL: {{^}}kern_call_too_many_args_use_workitem_id_x_byval: ; VARABI: enable_vgpr_workitem_id = 0 ; VARABI: v_mov_b32_e32 [[K:v[0-9]+]], 0x3e7{{$}} -; VARABI: buffer_store_dword [[K]], off, s[0:3], 0 offset:4 -; VARABI: buffer_load_dword [[RELOAD_BYVAL:v[0-9]+]], off, s[0:3], 0 offset:4 ; VARABI: s_movk_i32 s32, 0x400{{$}} - -; VARABI-NOT: s32 +; VARABI: buffer_store_dword [[K]], off, s[0:3], 0 offset:4 ; VARABI: buffer_store_dword v0, off, s[0:3], s32 offset:4 +; VARABI: buffer_load_dword [[RELOAD_BYVAL:v[0-9]+]], off, s[0:3], 0 offset:4 ; VARABI: buffer_store_dword [[RELOAD_BYVAL]], off, s[0:3], s32{{$}} ; VARABI: v_mov_b32_e32 [[RELOAD_BYVAL]], @@ -660,8 +658,8 @@ define amdgpu_kernel void @kern_call_too_many_args_use_workitem_id_x_byval() #1 ; GCN-LABEL: {{^}}func_call_too_many_args_use_workitem_id_x_byval: ; VARABI: v_mov_b32_e32 [[K:v[0-9]+]], 0x3e7{{$}} ; VARABI: buffer_store_dword [[K]], off, s[0:3], s33{{$}} -; VARABI: buffer_load_dword [[RELOAD_BYVAL:v[0-9]+]], off, s[0:3], s33{{$}} ; VARABI: buffer_store_dword v0, off, s[0:3], s32 offset:4 +; VARABI: buffer_load_dword [[RELOAD_BYVAL:v[0-9]+]], off, s[0:3], s33{{$}} ; VARABI: buffer_store_dword [[RELOAD_BYVAL]], off, s[0:3], s32{{$}} ; VARABI: v_mov_b32_e32 [[RELOAD_BYVAL]], ; VARABI: s_swappc_b64 -- 2.7.4