From b2869eb6e98cb6b1730bca86b48c007970c861d7 Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Fri, 9 Sep 2016 19:28:00 +0000 Subject: [PATCH] AMDGPU/SI: Make sure llvm.amdgcn.implicitarg.ptr() is 8-byte aligned for HSA Reviewers: arsenm Subscribers: arsenm, wdng, nhaehnle, llvm-commits Differential Revision: https://reviews.llvm.org/D24405 llvm-svn: 281080 --- llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp | 3 ++- llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h | 4 ++++ llvm/test/CodeGen/AMDGPU/llvm.amdgcn.kernarg.segment.ptr.ll | 2 +- 3 files changed, 7 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp index 682157b..ed24292 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -2690,7 +2690,8 @@ SDValue AMDGPUTargetLowering::CreateLiveInRegister(SelectionDAG &DAG, uint32_t AMDGPUTargetLowering::getImplicitParameterOffset( const AMDGPUMachineFunction *MFI, const ImplicitParameter Param) const { - uint64_t ArgOffset = alignTo(MFI->getABIArgOffset(), 4); + unsigned Alignment = Subtarget->getAlignmentForImplicitArgPtr(); + uint64_t ArgOffset = alignTo(MFI->getABIArgOffset(), Alignment); switch (Param) { case GRID_DIM: return ArgOffset; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h index b94548a..b1cadeb 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h @@ -276,6 +276,10 @@ public: return isAmdHsaOS() ? 0 : 36; } + unsigned getAlignmentForImplicitArgPtr() const { + return isAmdHsaOS() ? 8 : 4; + } + unsigned getStackAlignment() const { // Scratch is allocated in 256 dword per wave blocks. return 4 * 256 / getWavefrontSize(); diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.kernarg.segment.ptr.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.kernarg.segment.ptr.ll index 41977c2..c79cf8d 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.kernarg.segment.ptr.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.kernarg.segment.ptr.ll @@ -29,7 +29,7 @@ define void @test_implicit(i32 addrspace(1)* %out) #1 { ; ALL-LABEL: {{^}}test_implicit_alignment ; MESA: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0xc -; HSA: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0x3 +; HSA: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0x4 ; ALL: v_mov_b32_e32 [[V_VAL:v[0-9]+]], [[VAL]] ; MESA: buffer_store_dword [[V_VAL]] ; HSA: flat_store_dword v[{{[0-9]+:[0-9]+}}], [[V_VAL]] -- 2.7.4