From 175959e35016fe22d6f4f9dbbd92aad47f442dbc Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Tue, 6 Dec 2016 21:53:10 +0000 Subject: [PATCH] AMDGPU/SI: Set correct value for amd_kernel_code_t::kernarg_segment_alignment Reviewers: arsenm Subscribers: kzhuravl, wdng, nhaehnle, yaxunl, llvm-commits, tony-tye Differential Revision: https://reviews.llvm.org/D27416 llvm-svn: 288852 --- llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp | 5 +++++ llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.h | 4 ++++ llvm/test/CodeGen/AMDGPU/kernel-args.ll | 28 ++++++++++++++++++++++++++ 3 files changed, 37 insertions(+) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp index d836a8e..2390fc9 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp @@ -763,6 +763,11 @@ void AMDGPUAsmPrinter::EmitAmdKernelCodeT(const MachineFunction &MF, header.reserved_vgpr_first = KernelInfo.ReservedVGPRFirst; header.reserved_vgpr_count = KernelInfo.ReservedVGPRCount; + // These alignment values are specified in powers of two, so alignment = + // 2^n. The minimum alignment is 2^4 = 16. + header.kernarg_segment_alignment = std::max((size_t)4, + countTrailingZeros(MFI->getMaxKernArgAlign())); + if (STM.debuggerEmitPrologue()) { header.debug_wavefront_private_segment_offset_sgpr = KernelInfo.DebuggerWavefrontPrivateSegmentOffsetSGPR; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.h b/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.h index 0c8b79d..5d0640b 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.h @@ -50,6 +50,10 @@ public: return KernArgSize; } + unsigned getMaxKernArgAlign() const { + return MaxKernArgAlign; + } + void setABIArgOffset(unsigned NewOffset) { ABIArgOffset = NewOffset; } diff --git a/llvm/test/CodeGen/AMDGPU/kernel-args.ll b/llvm/test/CodeGen/AMDGPU/kernel-args.ll index dcb2ddb..b1f20fd 100644 --- a/llvm/test/CodeGen/AMDGPU/kernel-args.ll +++ b/llvm/test/CodeGen/AMDGPU/kernel-args.ll @@ -5,6 +5,7 @@ ; RUN: llc < %s -march=r600 -mcpu=cayman | FileCheck %s --check-prefix=EG --check-prefix=FUNC ; FUNC-LABEL: {{^}}i8_arg: +; HSA-VI: kernarg_segment_alignment = 4 ; EG: AND_INT {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z ; SI: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0xb ; MESA-VI: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0x2c @@ -24,6 +25,7 @@ entry: } ; FUNC-LABEL: {{^}}i8_zext_arg: +; HSA-VI: kernarg_segment_alignment = 4 ; EG: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z ; SI: s_load_dword s{{[0-9]}}, s[0:1], 0xb ; MESA-VI: s_load_dword s{{[0-9]}}, s[0:1], 0x2c @@ -42,6 +44,7 @@ entry: } ; FUNC-LABEL: {{^}}i8_sext_arg: +; HSA-VI: kernarg_segment_alignment = 4 ; EG: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z ; SI: s_load_dword s{{[0-9]}}, s[0:1], 0xb ; MESA-VI: s_load_dword s{{[0-9]}}, s[0:1], 0x2c @@ -60,6 +63,7 @@ entry: } ; FUNC-LABEL: {{^}}i16_arg: +; HSA-VI: kernarg_segment_alignment = 4 ; EG: AND_INT {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z ; SI: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0xb ; MESA-VI: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0x2c @@ -79,6 +83,7 @@ entry: } ; FUNC-LABEL: {{^}}i16_zext_arg: +; HSA-VI: kernarg_segment_alignment = 4 ; EG: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z ; SI: s_load_dword s{{[0-9]}}, s[0:1], 0xb ; MESA-VI: s_load_dword s{{[0-9]}}, s[0:1], 0x2c @@ -97,6 +102,7 @@ entry: } ; FUNC-LABEL: {{^}}i16_sext_arg: +; HSA-VI: kernarg_segment_alignment = 4 ; EG: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z ; SI: s_load_dword s{{[0-9]}}, s[0:1], 0xb ; MESA-VI: s_load_dword s{{[0-9]}}, s[0:1], 0x2c @@ -115,6 +121,7 @@ entry: } ; FUNC-LABEL: {{^}}i32_arg: +; HSA-VI: kernarg_segment_alignment = 4 ; EG: T{{[0-9]\.[XYZW]}}, KC0[2].Z ; SI: s_load_dword s{{[0-9]}}, s[0:1], 0xb ; MESA-VI: s_load_dword s{{[0-9]}}, s[0:1], 0x2c @@ -126,6 +133,7 @@ entry: } ; FUNC-LABEL: {{^}}f32_arg: +; HSA-VI: kernarg_segment_alignment = 4 ; EG: T{{[0-9]\.[XYZW]}}, KC0[2].Z ; SI: s_load_dword s{{[0-9]}}, s[0:1], 0xb ; MESA-VI: s_load_dword s{{[0-9]}}, s[0:1], 0x2c @@ -137,6 +145,7 @@ entry: } ; FUNC-LABEL: {{^}}v2i8_arg: +; HSA-VI: kernarg_segment_alignment = 4 ; EG: VTX_READ_8 ; EG: VTX_READ_8 ; MESA-GCN: buffer_load_ubyte @@ -150,6 +159,7 @@ entry: } ; FUNC-LABEL: {{^}}v2i16_arg: +; HSA-VI: kernarg_segment_alignment = 4 ; EG: VTX_READ_16 ; EG: VTX_READ_16 ; MESA-GCN: buffer_load_ushort @@ -163,6 +173,7 @@ entry: } ; FUNC-LABEL: {{^}}v2i32_arg: +; HSA-VI: kernarg_segment_alignment = 4 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].X ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[2].W ; SI: s_load_dwordx2 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0xb @@ -175,6 +186,7 @@ entry: } ; FUNC-LABEL: {{^}}v2f32_arg: +; HSA-VI: kernarg_segment_alignment = 4 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].X ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[2].W ; SI: s_load_dwordx2 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0xb @@ -187,6 +199,7 @@ entry: } ; FUNC-LABEL: {{^}}v3i8_arg: +; HSA-VI: kernarg_segment_alignment = 4 ; EG-DAG: VTX_READ_8 T{{[0-9]}}.X, T{{[0-9]}}.X, 40 ; EG-DAG: VTX_READ_8 T{{[0-9]}}.X, T{{[0-9]}}.X, 41 ; EG-DAG: VTX_READ_8 T{{[0-9]}}.X, T{{[0-9]}}.X, 42 @@ -203,6 +216,7 @@ entry: } ; FUNC-LABEL: {{^}}v3i16_arg: +; HSA-VI: kernarg_segment_alignment = 4 ; EG-DAG: VTX_READ_16 T{{[0-9]}}.X, T{{[0-9]}}.X, 44 ; EG-DAG: VTX_READ_16 T{{[0-9]}}.X, T{{[0-9]}}.X, 46 ; EG-DAG: VTX_READ_16 T{{[0-9]}}.X, T{{[0-9]}}.X, 48 @@ -218,6 +232,7 @@ entry: ret void } ; FUNC-LABEL: {{^}}v3i32_arg: +; HSA-VI: kernarg_segment_alignment = 4 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Y ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Z ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].W @@ -231,6 +246,7 @@ entry: } ; FUNC-LABEL: {{^}}v3f32_arg: +; HSA-VI: kernarg_segment_alignment = 4 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Y ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Z ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].W @@ -244,6 +260,7 @@ entry: } ; FUNC-LABEL: {{^}}v4i8_arg: +; HSA-VI: kernarg_segment_alignment = 4 ; EG: VTX_READ_8 ; EG: VTX_READ_8 ; EG: VTX_READ_8 @@ -263,6 +280,7 @@ entry: } ; FUNC-LABEL: {{^}}v4i16_arg: +; HSA-VI: kernarg_segment_alignment = 4 ; EG: VTX_READ_16 ; EG: VTX_READ_16 ; EG: VTX_READ_16 @@ -282,6 +300,7 @@ entry: } ; FUNC-LABEL: {{^}}v4i32_arg: +; HSA-VI: kernarg_segment_alignment = 4 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Y ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Z ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].W @@ -296,6 +315,7 @@ entry: } ; FUNC-LABEL: {{^}}v4f32_arg: +; HSA-VI: kernarg_segment_alignment = 4 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Y ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Z ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].W @@ -310,6 +330,7 @@ entry: } ; FUNC-LABEL: {{^}}v8i8_arg: +; HSA-VI: kernarg_segment_alignment = 4 ; EG: VTX_READ_8 ; EG: VTX_READ_8 ; EG: VTX_READ_8 @@ -340,6 +361,7 @@ entry: } ; FUNC-LABEL: {{^}}v8i16_arg: +; HSA-VI: kernarg_segment_alignment = 4 ; EG: VTX_READ_16 ; EG: VTX_READ_16 ; EG: VTX_READ_16 @@ -371,6 +393,7 @@ entry: } ; FUNC-LABEL: {{^}}v8i32_arg: +; HSA-VI: kernarg_segment_alignment = 5 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].Y ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].Z ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].W @@ -389,6 +412,7 @@ entry: } ; FUNC-LABEL: {{^}}v8f32_arg: +; HSA-VI: kernarg_segment_alignment = 5 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].Y ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].Z ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].W @@ -405,6 +429,7 @@ entry: } ; FUNC-LABEL: {{^}}v16i8_arg: +; HSA-VI: kernarg_segment_alignment = 4 ; EG: VTX_READ_8 ; EG: VTX_READ_8 ; EG: VTX_READ_8 @@ -460,6 +485,7 @@ entry: } ; FUNC-LABEL: {{^}}v16i16_arg: +; HSA-VI: kernarg_segment_alignment = 5 ; EG: VTX_READ_16 ; EG: VTX_READ_16 ; EG: VTX_READ_16 @@ -515,6 +541,7 @@ entry: } ; FUNC-LABEL: {{^}}v16i32_arg: +; HSA-VI: kernarg_segment_alignment = 6 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].Y ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].Z ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].W @@ -541,6 +568,7 @@ entry: } ; FUNC-LABEL: {{^}}v16f32_arg: +; HSA-VI: kernarg_segment_alignment = 6 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].Y ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].Z ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].W -- 2.7.4