From b9704c001cdfd55c4e4b17ac5ec849dd20486c15 Mon Sep 17 00:00:00 2001 From: Stanislav Mekhanoshin Date: Fri, 23 Feb 2018 23:53:27 +0000 Subject: [PATCH] [AMDGPU] Fixed madak.ll test on VI, added GFX10. NFC. llvm-svn: 325995 --- llvm/test/CodeGen/AMDGPU/madak.ll | 78 ++++++++++++++++++++++----------------- 1 file changed, 45 insertions(+), 33 deletions(-) diff --git a/llvm/test/CodeGen/AMDGPU/madak.ll b/llvm/test/CodeGen/AMDGPU/madak.ll index cc08333..9fe1af8 100644 --- a/llvm/test/CodeGen/AMDGPU/madak.ll +++ b/llvm/test/CodeGen/AMDGPU/madak.ll @@ -1,15 +1,16 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN %s -; XUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN %s - -; FIXME: Enable VI +; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX6 %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX8,GFX8_9 %s +; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9,GFX8_9 %s declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone declare float @llvm.fabs.f32(float) nounwind readnone ; GCN-LABEL: {{^}}madak_f32: -; GCN: buffer_load_dword [[VA:v[0-9]+]] -; GCN: buffer_load_dword [[VB:v[0-9]+]] -; GCN: v_madak_f32 {{v[0-9]+}}, [[VA]], [[VB]], 0x41200000 +; GFX6: buffer_load_dword [[VA:v[0-9]+]] +; GFX6: buffer_load_dword [[VB:v[0-9]+]] +; GFX8_9: {{flat|global}}_load_dword [[VB:v[0-9]+]] +; GFX8_9: {{flat|global}}_load_dword [[VA:v[0-9]+]] +; GCN: v_madak_f32 {{v[0-9]+}}, [[VA]], [[VB]], 0x41200000 define amdgpu_kernel void @madak_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in.a, float addrspace(1)* noalias %in.b) nounwind { %tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone %in.a.gep = getelementptr float, float addrspace(1)* %in.a, i32 %tid @@ -30,12 +31,16 @@ define amdgpu_kernel void @madak_f32(float addrspace(1)* noalias %out, float add ; it. ; GCN-LABEL: {{^}}madak_2_use_f32: -; GCN-DAG: buffer_load_dword [[VA:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} -; GCN-DAG: buffer_load_dword [[VB:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4 -; GCN-DAG: buffer_load_dword [[VC:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8 -; GCN-DAG: v_mov_b32_e32 [[VK:v[0-9]+]], 0x41200000 -; GCN-DAG: v_madak_f32 {{v[0-9]+}}, [[VA]], [[VB]], 0x41200000 -; GCN-DAG: v_mac_f32_e32 [[VK]], [[VA]], [[VC]] +; GFX8_9: v_mov_b32_e32 [[VK:v[0-9]+]], 0x41200000 +; GFX6-DAG: buffer_load_dword [[VA:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} +; GFX6-DAG: buffer_load_dword [[VB:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4 +; GFX6-DAG: buffer_load_dword [[VC:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8 +; GFX8_9: {{flat|global}}_load_dword [[VA:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}} +; GFX8_9: {{flat|global}}_load_dword [[VB:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}} +; GFX8_9: {{flat|global}}_load_dword [[VC:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}} +; GFX6-DAG: v_mov_b32_e32 [[VK:v[0-9]+]], 0x41200000 +; GCN-DAG: v_madak_f32 {{v[0-9]+}}, [[VA]], [[VB]], 0x41200000 +; GCN-DAG: v_mac_f32_e32 [[VK]], [[VA]], [[VC]] ; GCN: s_endpgm define amdgpu_kernel void @madak_2_use_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) nounwind { %tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone @@ -62,7 +67,7 @@ define amdgpu_kernel void @madak_2_use_f32(float addrspace(1)* noalias %out, flo } ; GCN-LABEL: {{^}}madak_m_inline_imm_f32: -; GCN: buffer_load_dword [[VA:v[0-9]+]] +; GCN: {{buffer|flat|global}}_load_dword [[VA:v[0-9]+]] ; GCN: v_madak_f32 {{v[0-9]+}}, 4.0, [[VA]], 0x41200000 define amdgpu_kernel void @madak_m_inline_imm_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in.a) nounwind { %tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone @@ -81,9 +86,11 @@ define amdgpu_kernel void @madak_m_inline_imm_f32(float addrspace(1)* noalias %o ; an inline immediate. ; GCN-LABEL: {{^}}madak_inline_imm_f32: -; GCN: buffer_load_dword [[VA:v[0-9]+]] -; GCN: buffer_load_dword [[VB:v[0-9]+]] -; GCN: v_mad_f32 {{v[0-9]+}}, [[VA]], [[VB]], 4.0 +; GFX6: buffer_load_dword [[VA:v[0-9]+]] +; GFX6: buffer_load_dword [[VB:v[0-9]+]] +; GFX8_9: {{flat|global}}_load_dword [[VB:v[0-9]+]] +; GFX8_9: {{flat|global}}_load_dword [[VA:v[0-9]+]] +; GCN: v_mad_f32 {{v[0-9]+}}, [[VA]], [[VB]], 4.0 define amdgpu_kernel void @madak_inline_imm_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in.a, float addrspace(1)* noalias %in.b) nounwind { %tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone %in.a.gep = getelementptr float, float addrspace(1)* %in.a, i32 %tid @@ -103,7 +110,7 @@ define amdgpu_kernel void @madak_inline_imm_f32(float addrspace(1)* noalias %out ; GCN-LABEL: {{^}}s_v_madak_f32: ; GCN-DAG: s_load_dword [[SB:s[0-9]+]] ; GCN-DAG: v_mov_b32_e32 [[VK:v[0-9]+]], 0x41200000 -; GCN-DAG: buffer_load_dword [[VA:v[0-9]+]] +; GCN-DAG: {{buffer|flat|global}}_load_dword [[VA:v[0-9]+]] ; GCN-NOT: v_madak_f32 ; GCN: v_mac_f32_e32 [[VK]], [[SB]], [[VA]] define amdgpu_kernel void @s_v_madak_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in.a, float %b) nounwind { @@ -122,7 +129,7 @@ define amdgpu_kernel void @s_v_madak_f32(float addrspace(1)* noalias %out, float ; GCN-LABEL: @v_s_madak_f32 ; GCN-DAG: s_load_dword [[SB:s[0-9]+]] ; GCN-DAG: v_mov_b32_e32 [[VK:v[0-9]+]], 0x41200000 -; GCN-DAG: buffer_load_dword [[VA:v[0-9]+]] +; GCN-DAG: {{buffer|flat|global}}_load_dword [[VA:v[0-9]+]] ; GCN-NOT: v_madak_f32 ; GCN: v_mac_f32_e32 [[VK]], [[SB]], [[VA]] define amdgpu_kernel void @v_s_madak_f32(float addrspace(1)* noalias %out, float %a, float addrspace(1)* noalias %in.b) nounwind { @@ -149,10 +156,12 @@ define amdgpu_kernel void @s_s_madak_f32(float addrspace(1)* %out, float %a, flo } ; GCN-LABEL: {{^}}no_madak_src0_modifier_f32: -; GCN: buffer_load_dword [[VA:v[0-9]+]] -; GCN: buffer_load_dword [[VB:v[0-9]+]] -; GCN: v_mad_f32 {{v[0-9]+}}, |{{v[0-9]+}}|, {{v[0-9]+}}, {{[sv][0-9]+}} -; GCN: s_endpgm +; GFX6: buffer_load_dword [[VA:v[0-9]+]] +; GFX6: buffer_load_dword [[VB:v[0-9]+]] +; GFX8_9: {{flat|global}}_load_dword [[VB:v[0-9]+]] +; GFX8_9: {{flat|global}}_load_dword [[VA:v[0-9]+]] +; GCN: v_mad_f32 {{v[0-9]+}}, |{{v[0-9]+}}|, {{v[0-9]+}}, {{[sv][0-9]+}} +; GCN: s_endpgm define amdgpu_kernel void @no_madak_src0_modifier_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in.a, float addrspace(1)* noalias %in.b) nounwind { %tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone %in.a.gep = getelementptr float, float addrspace(1)* %in.a, i32 %tid @@ -171,10 +180,12 @@ define amdgpu_kernel void @no_madak_src0_modifier_f32(float addrspace(1)* noalia } ; GCN-LABEL: {{^}}no_madak_src1_modifier_f32: -; GCN: buffer_load_dword [[VA:v[0-9]+]] -; GCN: buffer_load_dword [[VB:v[0-9]+]] -; GCN: v_mad_f32 {{v[0-9]+}}, {{v[0-9]+}}, |{{v[0-9]+}}|, {{[sv][0-9]+}} -; GCN: s_endpgm +; GFX6: buffer_load_dword [[VA:v[0-9]+]] +; GFX6: buffer_load_dword [[VB:v[0-9]+]] +; GFX8_9: {{flat|global}}_load_dword [[VB:v[0-9]+]] +; GFX8_9: {{flat|global}}_load_dword [[VA:v[0-9]+]] +; GCN: v_mad_f32 {{v[0-9]+}}, {{v[0-9]+}}, |{{v[0-9]+}}|, {{[sv][0-9]+}} +; GCN: s_endpgm define amdgpu_kernel void @no_madak_src1_modifier_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in.a, float addrspace(1)* noalias %in.b) nounwind { %tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone %in.a.gep = getelementptr float, float addrspace(1)* %in.a, i32 %tid @@ -195,12 +206,13 @@ define amdgpu_kernel void @no_madak_src1_modifier_f32(float addrspace(1)* noalia ; SIFoldOperands should not fold the SGPR copy into the instruction ; because the implicit immediate already uses the constant bus. ; GCN-LABEL: {{^}}madak_constant_bus_violation: -; GCN: s_load_dword [[SGPR0:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, {{0xa|0x28}} -; GCN: v_mov_b32_e32 [[SGPR0_VCOPY:v[0-9]+]], [[SGPR0]] -; GCN: buffer_load_dword [[VGPR:v[0-9]+]] -; GCN: v_madak_f32 [[MADAK:v[0-9]+]], 0.5, [[SGPR0_VCOPY]], 0x42280000 -; GCN: v_mul_f32_e32 [[MUL:v[0-9]+]], [[MADAK]], [[VGPR]] -; GCN: buffer_store_dword [[MUL]] +; GCN: s_load_dword [[SGPR0:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, {{0xa|0x28}} +; GCN: v_mov_b32_e32 [[SGPR0_VCOPY:v[0-9]+]], [[SGPR0]] +; GCN: {{buffer|flat|global}}_load_dword [[VGPR:v[0-9]+]] +; GCN: v_madak_f32 [[MADAK:v[0-9]+]], 0.5, [[SGPR0_VCOPY]], 0x42280000 +; GCN: v_mul_f32_e32 [[MUL:v[0-9]+]], [[MADAK]], [[VGPR]] +; GFX6: buffer_store_dword [[MUL]] +; GFX8_9: {{flat|global}}_store_dword v[{{[0-9:]+}}], [[MUL]] define amdgpu_kernel void @madak_constant_bus_violation(i32 %arg1, float %sgpr0, float %sgpr1) #0 { bb: %tmp = icmp eq i32 %arg1, 0 -- 2.7.4