From b5798b09d302a5f49a05ad9a2df2ae8c646b3802 Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Fri, 26 Jun 2015 21:15:03 +0000 Subject: [PATCH] AMDGPU/SI: There are no implicit kernel args in the amdhsa ABI Reviewers: arsenm Subscribers: llvm-commits Differential Revision: http://reviews.llvm.org/D10706 llvm-svn: 240830 --- llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h | 7 +++++++ llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 3 ++- llvm/test/CodeGen/AMDGPU/hsa.ll | 1 + 3 files changed, 10 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h index 0d40d14..27f269a 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h @@ -275,6 +275,13 @@ public: bool enableSubRegLiveness() const override { return true; } + + /// \brief Returns the offset in bytes from the start of the input buffer + /// of the first explicit kernel argument. + unsigned getExplicitKernelArgOffset() const { + return isAmdHsaOS() ? 0 : 36; + } + }; } // End namespace llvm diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index c73e448..ead1a37 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -583,7 +583,8 @@ SDValue SITargetLowering::LowerFormalArguments( if (VA.isMemLoc()) { VT = Ins[i].VT; EVT MemVT = Splits[i].VT; - const unsigned Offset = 36 + VA.getLocMemOffset(); + const unsigned Offset = Subtarget->getExplicitKernelArgOffset() + + VA.getLocMemOffset(); // The first 36 bytes of the input buffer contains information about // thread group and global sizes. SDValue Arg = LowerParameter(DAG, VT, MemVT, DL, DAG.getRoot(), diff --git a/llvm/test/CodeGen/AMDGPU/hsa.ll b/llvm/test/CodeGen/AMDGPU/hsa.ll index 3a76a36..3561d4a 100644 --- a/llvm/test/CodeGen/AMDGPU/hsa.ll +++ b/llvm/test/CodeGen/AMDGPU/hsa.ll @@ -5,6 +5,7 @@ ; HSA-NEXT: .ascii "HSA Code Unit:0.0:AMD:0.1:GFX8.1:0" ; Test that the amd_kernel_code_t object is emitted ; HSA: .asciz +; HSA: s_load_dwordx2 s[{{[0-9]+:[0-9]+}}], s[0:1], 0x0 ; Make sure we are setting the ATC bit: ; HSA: s_mov_b32 s[[HI:[0-9]]], 0x100f000 ; HSA: buffer_store_dword v{{[0-9]+}}, s[0:[[HI]]], 0 -- 2.7.4