From e0a4da8c0a2a0628fdae427c6eb2949b3bcbdfa0 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Thu, 30 May 2019 19:33:18 +0000 Subject: [PATCH] AMDGPU/GlobalISel: Add wave scratch offset argument Avoids crashing in PEI in a future change. llvm-svn: 362136 --- llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp | 42 ++++++++++++++++++++++ .../irtranslator-amdgpu_kernel-system-sgprs.ll | 10 ++++++ 2 files changed, 52 insertions(+) create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_kernel-system-sgprs.ll diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp index 249498e..ff34759 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp @@ -156,6 +156,43 @@ void AMDGPUCallLowering::lowerParameter(MachineIRBuilder &MIRBuilder, MIRBuilder.buildLoad(DstReg, PtrReg, *MMO); } +static unsigned findFirstFreeSGPR(CCState &CCInfo) { + unsigned NumSGPRs = AMDGPU::SGPR_32RegClass.getNumRegs(); + for (unsigned Reg = 0; Reg < NumSGPRs; ++Reg) { + if (!CCInfo.isAllocated(AMDGPU::SGPR0 + Reg)) { + return AMDGPU::SGPR0 + Reg; + } + } + llvm_unreachable("Cannot allocate sgpr"); +} + +static void allocateSystemSGPRs(CCState &CCInfo, + MachineFunction &MF, + SIMachineFunctionInfo &Info, + CallingConv::ID CallConv, + bool IsShader) { + if (Info.hasPrivateSegmentWaveByteOffset()) { + // Scratch wave offset passed in system SGPR. + unsigned PrivateSegmentWaveByteOffsetReg; + + if (IsShader) { + PrivateSegmentWaveByteOffsetReg = + Info.getPrivateSegmentWaveByteOffsetSystemSGPR(); + + // This is true if the scratch wave byte offset doesn't have a fixed + // location. + if (PrivateSegmentWaveByteOffsetReg == AMDGPU::NoRegister) { + PrivateSegmentWaveByteOffsetReg = findFirstFreeSGPR(CCInfo); + Info.setPrivateSegmentWaveByteOffset(PrivateSegmentWaveByteOffsetReg); + } + } else + PrivateSegmentWaveByteOffsetReg = Info.addPrivateSegmentWaveByteOffset(); + + MF.addLiveIn(PrivateSegmentWaveByteOffsetReg, &AMDGPU::SGPR_32RegClass); + CCInfo.AllocateReg(PrivateSegmentWaveByteOffsetReg); + } +} + bool AMDGPUCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder, const Function &F, ArrayRef VRegs) const { @@ -171,6 +208,8 @@ bool AMDGPUCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder, const SIRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); const DataLayout &DL = F.getParent()->getDataLayout(); + bool IsShader = AMDGPU::isShader(F.getCallingConv()); + SmallVector ArgLocs; CCState CCInfo(F.getCallingConv(), F.isVarArg(), MF, ArgLocs, F.getContext()); @@ -242,6 +281,7 @@ bool AMDGPUCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder, ++i; } + allocateSystemSGPRs(CCInfo, MF, *Info, F.getCallingConv(), IsShader); return true; } @@ -313,6 +353,8 @@ bool AMDGPUCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder, MIRBuilder.getMBB().addLiveIn(VA.getLocReg()); MIRBuilder.buildCopy(VRegs[OrigArgIdx], VA.getLocReg()); } + + allocateSystemSGPRs(CCInfo, MF, *Info, F.getCallingConv(), IsShader); return true; } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_kernel-system-sgprs.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_kernel-system-sgprs.ll new file mode 100644 index 0000000..00b1264 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_kernel-system-sgprs.ll @@ -0,0 +1,10 @@ +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -O0 -amdgpu-ir-lower-kernel-arguments=0 -stop-after=irtranslator -global-isel %s -o - | FileCheck -check-prefix=HSA %s + +; HSA-LABEL: name: default_kernel +; HSA: liveins: +; HSA-NEXT: - { reg: '$sgpr0_sgpr1_sgpr2_sgpr3', virtual-reg: '%0' } +; HSA-NEXT: - { reg: '$sgpr4', virtual-reg: '%1' } +; HSA-NEXT: frameInfo: +define amdgpu_kernel void @default_kernel() { + ret void +} -- 2.7.4