From 70b92820158781ae42d89568b15873e74871e59f Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Sun, 12 Nov 2017 23:53:44 +0000 Subject: [PATCH] AMDGPU: Fix -enable-var-scope violations llvm-svn: 318004 --- llvm/test/CodeGen/AMDGPU/and.ll | 7 +++-- llvm/test/CodeGen/AMDGPU/br_cc.f16.ll | 8 ++--- .../CodeGen/AMDGPU/control-flow-fastregalloc.ll | 6 ++-- llvm/test/CodeGen/AMDGPU/ctpop.ll | 8 ++--- llvm/test/CodeGen/AMDGPU/fabs.f16.ll | 8 ++--- .../CodeGen/AMDGPU/fcanonicalize-elimination.ll | 26 ++++++++--------- llvm/test/CodeGen/AMDGPU/fma-combine.ll | 8 +++-- llvm/test/CodeGen/AMDGPU/fmed3.ll | 14 ++++----- llvm/test/CodeGen/AMDGPU/fmuladd.f16.ll | 18 ++++++------ llvm/test/CodeGen/AMDGPU/fneg-combines.ll | 10 +++---- llvm/test/CodeGen/AMDGPU/fpext.f16.ll | 10 +++---- llvm/test/CodeGen/AMDGPU/fptrunc.f16.ll | 9 ++++-- llvm/test/CodeGen/AMDGPU/imm.ll | 6 ++-- .../test/CodeGen/AMDGPU/insert_vector_elt.v2i16.ll | 13 +++++---- llvm/test/CodeGen/AMDGPU/llvm.fma.f16.ll | 6 ++-- llvm/test/CodeGen/AMDGPU/llvm.fmuladd.f16.ll | 10 +++---- llvm/test/CodeGen/AMDGPU/llvm.maxnum.f16.ll | 7 +++-- llvm/test/CodeGen/AMDGPU/llvm.minnum.f16.ll | 6 ++-- llvm/test/CodeGen/AMDGPU/mad-combine.ll | 15 ++++++---- llvm/test/CodeGen/AMDGPU/pack.v2i16.ll | 8 ++--- llvm/test/CodeGen/AMDGPU/sext-in-reg.ll | 11 ++++--- llvm/test/CodeGen/AMDGPU/shift-and-i64-ubfe.ll | 6 ++-- llvm/test/CodeGen/AMDGPU/simplify-libcalls.ll | 34 +++++++++++----------- llvm/test/CodeGen/AMDGPU/trunc-cmp-constant.ll | 6 ++-- llvm/test/CodeGen/AMDGPU/uniform-cfg.ll | 6 ++-- llvm/test/CodeGen/AMDGPU/v_cndmask.ll | 15 +++++----- 26 files changed, 146 insertions(+), 135 deletions(-) diff --git a/llvm/test/CodeGen/AMDGPU/and.ll b/llvm/test/CodeGen/AMDGPU/and.ll index 8693089..c320e06 100644 --- a/llvm/test/CodeGen/AMDGPU/and.ll +++ b/llvm/test/CodeGen/AMDGPU/and.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=SI -check-prefix=FUNC %s ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s declare i32 @llvm.r600.read.tidig.x() #0 @@ -83,7 +83,8 @@ define amdgpu_kernel void @s_and_multi_use_constant_i32_0(i32 addrspace(1)* %out ; SI: s_and_b32 [[AND:s[0-9]+]], s{{[0-9]+}}, [[K]] ; SI: s_add_i32 ; SI: s_add_i32 [[ADD:s[0-9]+]], s{{[0-9]+}}, [[K]] -; SI: buffer_store_dword [[VK]] +; SI: v_mov_b32_e32 [[VADD:v[0-9]+]], [[ADD]] +; SI: buffer_store_dword [[VADD]] define amdgpu_kernel void @s_and_multi_use_constant_i32_1(i32 addrspace(1)* %out, i32 %a, i32 %b) { %and = and i32 %a, 1234567 %foo = add i32 %and, 1234567 diff --git a/llvm/test/CodeGen/AMDGPU/br_cc.f16.ll b/llvm/test/CodeGen/AMDGPU/br_cc.f16.ll index 566c361..f145869 100644 --- a/llvm/test/CodeGen/AMDGPU/br_cc.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/br_cc.f16.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s -; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s +; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=SI %s +; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=VI %s ; GCN-LABEL: {{^}}br_cc_f16: ; GCN: buffer_load_ushort v[[A_F16:[0-9]+]] @@ -50,7 +50,7 @@ two: ; VI: s_cbranch_vccnz ; GCN: one{{$}} -; VI: v_mov_b32_e32 v[[A_F16:[0-9]+]], 0x380{{0|1}}{{$}} +; GCN: v_mov_b32_e32 v[[A_F16:[0-9]+]], 0x380{{0|1}}{{$}} ; SI: buffer_store_short v[[A_F16]] ; SI: s_endpgm @@ -89,7 +89,7 @@ two: ; SI: v_cvt_f16_f32_e32 v[[A_F16:[0-9]+]], v[[A_F32]] ; GCN: two{{$}} -; VI: v_mov_b32_e32 v[[B_F16:[0-9]+]], 0x3800{{$}} +; GCN: v_mov_b32_e32 v[[B_F16:[0-9]+]], 0x3800{{$}} ; GCN: buffer_store_short v[[B_F16]] ; GCN: s_endpgm define amdgpu_kernel void @br_cc_f16_imm_b( diff --git a/llvm/test/CodeGen/AMDGPU/control-flow-fastregalloc.ll b/llvm/test/CodeGen/AMDGPU/control-flow-fastregalloc.ll index 08a31ac..3eef069 100644 --- a/llvm/test/CodeGen/AMDGPU/control-flow-fastregalloc.ll +++ b/llvm/test/CodeGen/AMDGPU/control-flow-fastregalloc.ll @@ -1,5 +1,5 @@ -; RUN: llc -O0 -mtriple=amdgcn--amdhsa -march=amdgcn -amdgpu-spill-sgpr-to-vgpr=0 -verify-machineinstrs < %s | FileCheck -check-prefix=VMEM -check-prefix=GCN %s -; RUN: llc -O0 -mtriple=amdgcn--amdhsa -march=amdgcn -amdgpu-spill-sgpr-to-vgpr=1 -verify-machineinstrs < %s | FileCheck -check-prefix=VGPR -check-prefix=GCN %s +; RUN: llc -O0 -mtriple=amdgcn--amdhsa -march=amdgcn -amdgpu-spill-sgpr-to-vgpr=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=VMEM -check-prefix=GCN %s +; RUN: llc -O0 -mtriple=amdgcn--amdhsa -march=amdgcn -amdgpu-spill-sgpr-to-vgpr=1 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=VGPR -check-prefix=GCN %s ; Verify registers used for tracking exec mask changes when all ; registers are spilled at the end of the block. The SGPR spill @@ -171,7 +171,7 @@ end: ; GCN: {{^}}; BB#0: ; GCN: s_mov_b32 m0, -1 -; VMEM: ds_read_b32 [[LOAD0:v[0-9]+]] +; GCN: ds_read_b32 [[LOAD0:v[0-9]+]] ; GCN: v_cmp_ne_u32_e64 [[CMP0:s\[[0-9]+:[0-9]\]]], v0, diff --git a/llvm/test/CodeGen/AMDGPU/ctpop.ll b/llvm/test/CodeGen/AMDGPU/ctpop.ll index 9bad786..5ea3903 100644 --- a/llvm/test/CodeGen/AMDGPU/ctpop.ll +++ b/llvm/test/CodeGen/AMDGPU/ctpop.ll @@ -1,6 +1,6 @@ -; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC -check-prefix=SI %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC -check-prefix=VI %s -; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=FUNC -check-prefix=SI %s +; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=FUNC -check-prefix=VI %s +; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=EG -check-prefix=FUNC %s declare i32 @llvm.ctpop.i32(i32) nounwind readnone declare <2 x i32> @llvm.ctpop.v2i32(<2 x i32>) nounwind readnone @@ -308,7 +308,7 @@ define amdgpu_kernel void @v_ctpop_i32_add_vvar_inv(i32 addrspace(1)* noalias %o ; SI: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0xd ; VI: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0x34 ; GCN: s_bcnt1_i32_b32 [[SRESULT:s[0-9]+]], [[VAL]] -; GCN: v_mov_b32_e32 [[RESULT]], [[SRESULT]] +; GCN: v_mov_b32_e32 [[RESULT:v[0-9]+]], [[SRESULT]] ; GCN: buffer_store_dword [[RESULT]], ; GCN: s_endpgm ; EG: BCNT_INT diff --git a/llvm/test/CodeGen/AMDGPU/fabs.f16.ll b/llvm/test/CodeGen/AMDGPU/fabs.f16.ll index 4429cfa..3e2b44f 100644 --- a/llvm/test/CodeGen/AMDGPU/fabs.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/fabs.f16.ll @@ -1,6 +1,6 @@ -; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=CI %s -; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s -; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx901 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 %s +; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=CI %s +; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=tonga -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=VI %s +; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx901 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GFX9 %s ; DAGCombiner will transform: ; (fabs (f16 bitcast (i16 a))) => (f16 bitcast (and (i16 a), 0x7FFFFFFF)) @@ -20,7 +20,7 @@ define amdgpu_kernel void @s_fabs_free_f16(half addrspace(1)* %out, i16 %in) { ; GCN-LABEL: {{^}}s_fabs_f16: ; CI: flat_load_ushort [[VAL:v[0-9]+]], -; CI: v_and_b32_e32 [[CVT0:v[0-9]+]], 0x7fff, [[VAL]] +; CI: v_and_b32_e32 [[RESULT:v[0-9]+]], 0x7fff, [[VAL]] ; CI: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]] define amdgpu_kernel void @s_fabs_f16(half addrspace(1)* %out, half %in) { %fabs = call half @llvm.fabs.f16(half %in) diff --git a/llvm/test/CodeGen/AMDGPU/fcanonicalize-elimination.ll b/llvm/test/CodeGen/AMDGPU/fcanonicalize-elimination.ll index f662788..f64c27d 100644 --- a/llvm/test/CodeGen/AMDGPU/fcanonicalize-elimination.ll +++ b/llvm/test/CodeGen/AMDGPU/fcanonicalize-elimination.ll @@ -1,7 +1,7 @@ -; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs -mattr=-fp32-denormals < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=GCN-FLUSH %s -; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs -mattr=-fp32-denormals,+fp-exceptions < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-EXCEPT -check-prefix=VI -check-prefix=GCN-FLUSH %s -; RUN: llc -march=amdgcn -mcpu=gfx901 -verify-machineinstrs -mattr=+fp32-denormals < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 -check-prefix=GFX9-DENORM %s -; RUN: llc -march=amdgcn -mcpu=gfx901 -verify-machineinstrs -mattr=-fp32-denormals < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 -check-prefix=GCN-FLUSH %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs -mattr=-fp32-denormals < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI,GCN-FLUSH %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs -mattr=-fp32-denormals,+fp-exceptions < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-EXCEPT,VI,GCN-FLUSH %s +; RUN: llc -march=amdgcn -mcpu=gfx901 -verify-machineinstrs -mattr=+fp32-denormals < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9,GFX9-DENORM,GCN-DENORM %s +; RUN: llc -march=amdgcn -mcpu=gfx901 -verify-machineinstrs -mattr=-fp32-denormals < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9,GFX9-FLUSH,GCN-FLUSH %s ; GCN-LABEL: {{^}}test_no_fold_canonicalize_loaded_value_f32: ; GCN-FLUSH: v_mul_f32_e32 v{{[0-9]+}}, 1.0, v{{[0-9]+}} @@ -381,9 +381,9 @@ define amdgpu_kernel void @test_fold_canonicalize_minnum_value_f32(float addrspa ; GCN-LABEL: test_fold_canonicalize_sNaN_value_f32: ; GCN: v_min_f32_e32 [[V0:v[0-9]+]], 0x7f800001, v{{[0-9]+}} -; GCN-FLUSH: v_mul_f32_e32 v{{[0-9]+}}, 1.0, [[V0]] -; GCN-DENORM: v_max_f32_e32 v{{[0-9]+}}, [[V0]], [[V0]] -; GCN: {{flat|global}}_store_dword v[{{[0-9:]+}}], [[V]] +; GCN-FLUSH: v_mul_f32_e32 [[RESULT:v[0-9]+]], 1.0, [[V0]] +; GCN-DENORM: v_max_f32_e32 [[RESULT:v[0-9]+]], [[V0]], [[V0]] +; GCN: {{flat|global}}_store_dword v[{{[0-9:]+}}], [[RESULT]] define amdgpu_kernel void @test_fold_canonicalize_sNaN_value_f32(float addrspace(1)* %arg) { %id = tail call i32 @llvm.amdgcn.workitem.id.x() %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id @@ -395,10 +395,10 @@ define amdgpu_kernel void @test_fold_canonicalize_sNaN_value_f32(float addrspace } ; GCN-LABEL: test_fold_canonicalize_denorm_value_f32: -; GFX9: v_min_f32_e32 [[V:v[0-9]+]], 0x7fffff, v{{[0-9]+}} +; GFX9: v_min_f32_e32 [[RESULT:v[0-9]+]], 0x7fffff, v{{[0-9]+}} ; VI: v_min_f32_e32 [[V0:v[0-9]+]], 0x7fffff, v{{[0-9]+}} -; VI: v_mul_f32_e32 v{{[0-9]+}}, 1.0, [[V0]] -; GCN: {{flat|global}}_store_dword v[{{[0-9:]+}}], [[V]] +; VI: v_mul_f32_e32 [[RESULT:v[0-9]+]], 1.0, [[V0]] +; GCN: {{flat|global}}_store_dword v[{{[0-9:]+}}], [[RESULT]] ; GFX9-NOT: 1.0 define amdgpu_kernel void @test_fold_canonicalize_denorm_value_f32(float addrspace(1)* %arg) { %id = tail call i32 @llvm.amdgcn.workitem.id.x() @@ -411,10 +411,10 @@ define amdgpu_kernel void @test_fold_canonicalize_denorm_value_f32(float addrspa } ; GCN-LABEL: test_fold_canonicalize_maxnum_value_from_load_f32: -; GFX9: v_max_f32_e32 [[V:v[0-9]+]], 0, v{{[0-9]+}} +; GFX9: v_max_f32_e32 [[RESULT:v[0-9]+]], 0, v{{[0-9]+}} ; VI: v_max_f32_e32 [[V0:v[0-9]+]], 0, v{{[0-9]+}} -; VI: v_mul_f32_e32 v{{[0-9]+}}, 1.0, [[V0]] -; GCN: {{flat|global}}_store_dword v[{{[0-9:]+}}], [[V]] +; VI: v_mul_f32_e32 [[RESULT:v[0-9]+]], 1.0, [[V0]] +; GCN: {{flat|global}}_store_dword v[{{[0-9:]+}}], [[RESULT]] ; GFX9-NOT: 1.0 define amdgpu_kernel void @test_fold_canonicalize_maxnum_value_from_load_f32(float addrspace(1)* %arg) { %id = tail call i32 @llvm.amdgcn.workitem.id.x() diff --git a/llvm/test/CodeGen/AMDGPU/fma-combine.ll b/llvm/test/CodeGen/AMDGPU/fma-combine.ll index 132f053..3b3c87f 100644 --- a/llvm/test/CodeGen/AMDGPU/fma-combine.ll +++ b/llvm/test/CodeGen/AMDGPU/fma-combine.ll @@ -1,6 +1,6 @@ -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -verify-machineinstrs -fp-contract=fast < %s | FileCheck -check-prefix=SI-NOFMA -check-prefix=SI-SAFE -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=verde -verify-machineinstrs -fp-contract=fast < %s | FileCheck -check-prefix=SI-NOFMA -check-prefix=SI-SAFE -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -verify-machineinstrs -fp-contract=fast -enable-no-infs-fp-math -enable-unsafe-fp-math -mattr=+fp32-denormals < %s | FileCheck -check-prefix=SI-FMA -check-prefix=SI-UNSAFE -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -verify-machineinstrs -fp-contract=fast < %s | FileCheck -enable-var-scope -check-prefix=SI-NOFMA -check-prefix=SI-SAFE -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=verde -verify-machineinstrs -fp-contract=fast < %s | FileCheck -enable-var-scope -check-prefix=SI-NOFMA -check-prefix=SI-SAFE -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -verify-machineinstrs -fp-contract=fast -enable-no-infs-fp-math -enable-unsafe-fp-math -mattr=+fp32-denormals < %s | FileCheck -enable-var-scope -check-prefix=SI-FMA -check-prefix=SI-UNSAFE -check-prefix=SI -check-prefix=FUNC %s ; Note: The SI-FMA conversions of type x * (y + 1) --> x * y + x would be ; beneficial even without fp32 denormals, but they do require no-infs-fp-math @@ -237,6 +237,7 @@ define amdgpu_kernel void @combine_to_fma_fsub_2_f64(double addrspace(1)* noalia ; SI-DAG: buffer_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} ; SI-DAG: buffer_load_dwordx2 [[B:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}} ; SI-DAG: buffer_load_dwordx2 [[C:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:16{{$}} +; SI-DAG: buffer_load_dwordx2 [[D:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:24{{$}} ; SI-DAG: v_fma_f64 [[RESULT0:v\[[0-9]+:[0-9]+\]]], -[[A]], [[B]], -[[C]] ; SI-DAG: v_fma_f64 [[RESULT1:v\[[0-9]+:[0-9]+\]]], -[[A]], [[B]], -[[D]] ; SI-DAG: buffer_store_dwordx2 [[RESULT0]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} @@ -271,6 +272,7 @@ define amdgpu_kernel void @combine_to_fma_fsub_2_f64_2uses_neg(double addrspace( ; SI-DAG: buffer_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} ; SI-DAG: buffer_load_dwordx2 [[B:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}} ; SI-DAG: buffer_load_dwordx2 [[C:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:16{{$}} +; SI-DAG: buffer_load_dwordx2 [[D:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:24{{$}} ; SI-DAG: v_fma_f64 [[RESULT0:v\[[0-9]+:[0-9]+\]]], -[[A]], [[B]], -[[C]] ; SI-DAG: v_fma_f64 [[RESULT1:v\[[0-9]+:[0-9]+\]]], [[A]], [[B]], -[[D]] ; SI-DAG: buffer_store_dwordx2 [[RESULT0]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} diff --git a/llvm/test/CodeGen/AMDGPU/fmed3.ll b/llvm/test/CodeGen/AMDGPU/fmed3.ll index 19672af..da21cc4 100644 --- a/llvm/test/CodeGen/AMDGPU/fmed3.ll +++ b/llvm/test/CodeGen/AMDGPU/fmed3.ll @@ -1,9 +1,9 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=NOSNAN -check-prefix=GCN -check-prefix=SI %s -; RUN: llc -march=amdgcn -mattr=+fp-exceptions -verify-machineinstrs < %s | FileCheck -check-prefix=SNAN -check-prefix=GCN -check-prefix=SI %s -; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=NOSNAN -check-prefix=GCN -check-prefix=VI -check-prefix=GFX89 %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=+fp-exceptions -verify-machineinstrs < %s | FileCheck -check-prefix=SNAN -check-prefix=GCN -check-prefix=VI -check-prefix=GFX89 %s -; RUN: llc -march=amdgcn -mcpu=gfx901 -verify-machineinstrs < %s | FileCheck -check-prefix=NOSNAN -check-prefix=GCN -check-prefix=GFX9 -check-prefix=GFX89 %s -; RUN: llc -march=amdgcn -mcpu=gfx901 -mattr=+fp-exceptions -verify-machineinstrs < %s | FileCheck -check-prefix=SNAN -check-prefix=GCN -check-prefix=GFX9 -check-prefix=GFX89 %s +; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=NOSNAN -check-prefix=GCN -check-prefix=SI %s +; RUN: llc -march=amdgcn -mattr=+fp-exceptions -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=SNAN -check-prefix=GCN -check-prefix=SI %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=NOSNAN -check-prefix=GCN -check-prefix=VI -check-prefix=GFX89 %s +; RUN: llc -march=amdgcn -mcpu=tonga -mattr=+fp-exceptions -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=SNAN -check-prefix=GCN -check-prefix=VI -check-prefix=GFX89 %s +; RUN: llc -march=amdgcn -mcpu=gfx901 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=NOSNAN -check-prefix=GCN -check-prefix=GFX9 -check-prefix=GFX89 %s +; RUN: llc -march=amdgcn -mcpu=gfx901 -mattr=+fp-exceptions -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=SNAN -check-prefix=GCN -check-prefix=GFX9 -check-prefix=GFX89 %s ; GCN-LABEL: {{^}}v_test_nnan_input_fmed3_r_i_i_f32: @@ -899,7 +899,7 @@ define amdgpu_kernel void @v_test_global_nnans_min_max_f32(float addrspace(1)* % ; VI: v_max_f16_e32 v{{[0-9]+}}, 2.0 ; VI: v_min_f16_e32 v{{[0-9]+}}, 4.0 -; GFX9: v_add_f16_e32 v{{[0-9]+}}, 1.0 +; GFX9: v_add_f16_e32 [[ADD:v[0-9]+]], 1.0 ; GFX9: v_med3_f16 v{{[0-9]+}}, [[ADD]], 2.0, 4.0 define amdgpu_kernel void @v_test_nnan_input_fmed3_r_i_i_f16(half addrspace(1)* %out, half addrspace(1)* %aptr) #1 { %tid = call i32 @llvm.amdgcn.workitem.id.x() diff --git a/llvm/test/CodeGen/AMDGPU/fmuladd.f16.ll b/llvm/test/CodeGen/AMDGPU/fmuladd.f16.ll index 5f9e965..0fdb945 100644 --- a/llvm/test/CodeGen/AMDGPU/fmuladd.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/fmuladd.f16.ll @@ -1,12 +1,12 @@ -; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-fp64-fp16-denormals -fp-contract=on -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-STRICT,VI-FLUSH,VI %s -; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-fp64-fp16-denormals -fp-contract=on -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-STRICT,VI-FLUSH,VI %s -; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-fp64-fp16-denormals -fp-contract=fast -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-CONTRACT,VI-FLUSH,VI %s -; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-fp64-fp16-denormals -fp-contract=fast -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-CONTRACT,VI-FLUSH,VI %s +; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-fp64-fp16-denormals -fp-contract=on -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-STRICT,VI-FLUSH,VI %s +; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-fp64-fp16-denormals -fp-contract=on -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-STRICT,VI-FLUSH,VI %s +; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-fp64-fp16-denormals -fp-contract=fast -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-CONTRACT,VI-FLUSH,VI %s +; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-fp64-fp16-denormals -fp-contract=fast -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-CONTRACT,VI-FLUSH,VI %s -; RUN: llc -march=amdgcn -mcpu=fiji -mattr=+fp64-fp16-denormals -fp-contract=on -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-STRICT,VI-DENORM-STRICT,VI-DENORM,VI %s -; RUN: llc -march=amdgcn -mcpu=fiji -mattr=+fp64-fp16-denormals -fp-contract=on -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-STRICT,VI-DENORM-STRICT,VI-DENORM,VI %s -; RUN: llc -march=amdgcn -mcpu=fiji -mattr=+fp64-fp16-denormals -fp-contract=fast -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-CONTRACT,VI-DENORM-CONTRACT,VI-DENORM,VI %s -; RUN: llc -march=amdgcn -mcpu=fiji -mattr=+fp64-fp16-denormals -fp-contract=fast -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-CONTRACT,VI-DENORM-CONTRACT,VI-DENORM,VI %s +; RUN: llc -march=amdgcn -mcpu=fiji -mattr=+fp64-fp16-denormals -fp-contract=on -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-STRICT,VI-DENORM-STRICT,VI-DENORM,VI %s +; RUN: llc -march=amdgcn -mcpu=fiji -mattr=+fp64-fp16-denormals -fp-contract=on -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-STRICT,VI-DENORM-STRICT,VI-DENORM,VI %s +; RUN: llc -march=amdgcn -mcpu=fiji -mattr=+fp64-fp16-denormals -fp-contract=fast -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-CONTRACT,VI-DENORM-CONTRACT,VI-DENORM,VI %s +; RUN: llc -march=amdgcn -mcpu=fiji -mattr=+fp64-fp16-denormals -fp-contract=fast -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-CONTRACT,VI-DENORM-CONTRACT,VI-DENORM,VI %s declare i32 @llvm.amdgcn.workitem.id.x() #1 declare half @llvm.fmuladd.f16(half, half, half) #1 @@ -441,7 +441,7 @@ define amdgpu_kernel void @fsub_c_fadd_a_a_f16(half addrspace(1)* %out, half add ; VI-FLUSH: v_mad_f16 [[RESULT:v[0-9]+]], [[R1]], 2.0, -[[R2]] -; VI-DENORM-CONTRACT: v_fma_f16 [[R2]], [[R1]], 2.0, -[[R2]] +; VI-DENORM-CONTRACT: v_fma_f16 [[RESULT:v[0-9]+]], [[R1]], 2.0, -[[R2]] ; VI-DENORM-STRICT: v_add_f16_e32 [[TMP:v[0-9]+]], [[R1]], [[R1]] ; VI-DENORM-STRICT: v_sub_f16_e32 [[RESULT:v[0-9]+]], [[TMP]], [[R2]] diff --git a/llvm/test/CodeGen/AMDGPU/fneg-combines.ll b/llvm/test/CodeGen/AMDGPU/fneg-combines.ll index 002bc47..fd3d4f0 100644 --- a/llvm/test/CodeGen/AMDGPU/fneg-combines.ll +++ b/llvm/test/CodeGen/AMDGPU/fneg-combines.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -mcpu=tahiti -start-after=sink -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-SAFE -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -enable-no-signed-zeros-fp-math -march=amdgcn -mcpu=tahiti -start-after=sink -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-NSZ -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tahiti -start-after=sink -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GCN-SAFE -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -enable-no-signed-zeros-fp-math -march=amdgcn -mcpu=tahiti -start-after=sink -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GCN-NSZ -check-prefix=SI -check-prefix=FUNC %s ; -------------------------------------------------------------------------------- ; fadd tests @@ -59,7 +59,7 @@ define amdgpu_kernel void @v_fneg_add_store_use_add_f32(float addrspace(1)* %out ; GCN-SAFE: v_mul_f32_e32 [[MUL:v[0-9]+]], 4.0, [[ADD]] ; GCN-NSZ: v_sub_f32_e64 [[NEG_ADD:v[0-9]+]], -[[A]], [[B]] -; GCN-NSZ-NEXT: v_mul_f32_e32 [[MUL:v[0-9]+]], -4.0, [[ADD]] +; GCN-NSZ-NEXT: v_mul_f32_e32 [[MUL:v[0-9]+]], -4.0, [[NEG_ADD]] ; GCN: buffer_store_dword [[NEG_ADD]] ; GCN-NEXT: buffer_store_dword [[MUL]] define amdgpu_kernel void @v_fneg_add_multi_use_add_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 { @@ -513,7 +513,7 @@ define amdgpu_kernel void @v_fneg_0_minnum_foldable_use_f32(float addrspace(1)* ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] ; GCN: v_max_f32_e64 [[MAX0:v[0-9]+]], -[[A]], -[[B]] -; GCN-NEXT: v_mul_f32_e32 [[MUL1:v[0-9]+]], -4.0, [[MUL0]] +; GCN-NEXT: v_mul_f32_e32 [[MUL1:v[0-9]+]], -4.0, [[MAX0]] ; GCN-NEXT: buffer_store_dword [[MAX0]] ; GCN-NEXT: buffer_store_dword [[MUL1]] define amdgpu_kernel void @v_fneg_minnum_multi_use_minnum_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 { @@ -660,7 +660,7 @@ define amdgpu_kernel void @v_fneg_0_maxnum_foldable_use_f32(float addrspace(1)* ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] ; GCN: v_min_f32_e64 [[MAX0:v[0-9]+]], -[[A]], -[[B]] -; GCN-NEXT: v_mul_f32_e32 [[MUL1:v[0-9]+]], -4.0, [[MUL0]] +; GCN-NEXT: v_mul_f32_e32 [[MUL1:v[0-9]+]], -4.0, [[MAX0]] ; GCN-NEXT: buffer_store_dword [[MAX0]] ; GCN-NEXT: buffer_store_dword [[MUL1]] define amdgpu_kernel void @v_fneg_maxnum_multi_use_maxnum_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 { diff --git a/llvm/test/CodeGen/AMDGPU/fpext.f16.ll b/llvm/test/CodeGen/AMDGPU/fpext.f16.ll index ae82973..56da7f1 100644 --- a/llvm/test/CodeGen/AMDGPU/fpext.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/fpext.f16.ll @@ -1,6 +1,6 @@ -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=SIVI %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=GFX89 %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx901 -mattr=-flat-for-global -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 -check-prefix=GFX89 %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=SI -check-prefix=SIVI %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=VI -check-prefix=GFX89 %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx901 -mattr=-flat-for-global -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GFX9 -check-prefix=GFX89 %s ; GCN-LABEL: {{^}}fpext_f16_to_f32 ; GCN: buffer_load_ushort v[[A_F16:[0-9]+]] @@ -180,7 +180,7 @@ entry: ; GCN-DAG: v_and_b32_e32 [[XOR:v[0-9]+]], 0x7fff, [[A]] ; SI-DAG: v_cvt_f32_f16_e32 [[CVT:v[0-9]+]], [[A]] -; VI-DAG: v_cvt_f32_f16_e64 [[CVT:v[0-9]+]], |[[A]]| +; GFX89-DAG: v_cvt_f32_f16_e64 [[CVT:v[0-9]+]], |[[A]]| ; GCN: store_dword [[CVT]] ; GCN: store_short [[XOR]] @@ -226,7 +226,7 @@ entry: ; GCN-DAG: v_or_b32_e32 [[OR:v[0-9]+]], 0x8000, [[A]] ; SI: v_cvt_f32_f16_e32 [[CVT:v[0-9]+]], [[OR]] -; VI-DAG: v_cvt_f32_f16_e64 [[CVT:v[0-9]+]], -|[[OR]]| +; GFX89-DAG: v_cvt_f32_f16_e64 [[CVT:v[0-9]+]], -|[[OR]]| ; GCN: buffer_store_dword [[CVT]] ; GCN: buffer_store_short [[OR]] diff --git a/llvm/test/CodeGen/AMDGPU/fptrunc.f16.ll b/llvm/test/CodeGen/AMDGPU/fptrunc.f16.ll index 64df625..eb71967 100644 --- a/llvm/test/CodeGen/AMDGPU/fptrunc.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/fptrunc.f16.ll @@ -1,6 +1,6 @@ -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=SIVI %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=SIVI %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx901 -mattr=-flat-for-global,-fp64-fp16-denormals -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=SI -check-prefix=SIVI %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=VI -check-prefix=SIVI %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx901 -mattr=-flat-for-global,-fp64-fp16-denormals -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GFX9 %s ; GCN-LABEL: {{^}}fptrunc_f32_to_f16: ; GCN: buffer_load_dword v[[A_F32:[0-9]+]] @@ -65,6 +65,9 @@ entry: ; GCN-DAG: v_cvt_f32_f64_e32 v[[A_F32_0:[0-9]+]], v{{\[}}[[A_F64_0]]:{{[0-9]+}}{{\]}} ; GCN-DAG: v_cvt_f32_f64_e32 v[[A_F32_1:[0-9]+]], v{{\[}}{{[0-9]+}}:[[A_F64_3]]{{\]}} ; GCN-DAG: v_cvt_f16_f32_e32 v[[R_F16_0:[0-9]+]], v[[A_F32_0]] +; +; SI-DAG: v_cvt_f16_f32_e32 v[[CVTHI:[0-9]+]], v[[A_F32_1]] +; SI-DAG: v_lshlrev_b32_e32 v[[R_F16_HI:[0-9]+]], 16, v[[CVTHI]] ; VI: v_cvt_f16_f32_sdwa v[[R_F16_HI:[0-9]+]], v[[A_F32_1]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD diff --git a/llvm/test/CodeGen/AMDGPU/imm.ll b/llvm/test/CodeGen/AMDGPU/imm.ll index 8cda01a1..4e20d03 100644 --- a/llvm/test/CodeGen/AMDGPU/imm.ll +++ b/llvm/test/CodeGen/AMDGPU/imm.ll @@ -1,5 +1,5 @@ -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=SI %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=VI %s ; Use a 64-bit value with lo bits that can be represented as an inline constant ; GCN-LABEL: {{^}}i64_imm_inline_lo: @@ -129,7 +129,7 @@ define amdgpu_kernel void @store_inline_imm_inv_2pi_f32(float addrspace(1)* %out } ; GCN-LABEL: {{^}}store_inline_imm_m_inv_2pi_f32: -; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0xbe22f983{{$}} +; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0xbe22f983{{$}} ; GCN: buffer_store_dword [[REG]] define amdgpu_kernel void @store_inline_imm_m_inv_2pi_f32(float addrspace(1)* %out) { store float 0xBFC45F3060000000, float addrspace(1)* %out diff --git a/llvm/test/CodeGen/AMDGPU/insert_vector_elt.v2i16.ll b/llvm/test/CodeGen/AMDGPU/insert_vector_elt.v2i16.ll index 7a6919c..adfa43f 100644 --- a/llvm/test/CodeGen/AMDGPU/insert_vector_elt.v2i16.ll +++ b/llvm/test/CodeGen/AMDGPU/insert_vector_elt.v2i16.ll @@ -1,6 +1,6 @@ -; RUN: llc -verify-machineinstrs -march=amdgcn -mcpu=gfx901 -enable-amdgpu-aa=0 -mattr=+flat-for-global,-fp64-fp16-denormals < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 -check-prefix=GFX89 %s -; RUN: llc -verify-machineinstrs -march=amdgcn -mcpu=fiji -enable-amdgpu-aa=0 -mattr=+flat-for-global < %s | FileCheck -check-prefix=GCN -check-prefix=CIVI -check-prefix=VI -check-prefix=GFX89 %s -; RUN: llc -verify-machineinstrs -march=amdgcn -mcpu=hawaii -enable-amdgpu-aa=0 -mattr=+flat-for-global < %s | FileCheck -check-prefix=GCN -check-prefix=CIVI -check-prefix=CI %s +; RUN: llc -verify-machineinstrs -march=amdgcn -mcpu=gfx901 -enable-amdgpu-aa=0 -mattr=+flat-for-global,-fp64-fp16-denormals < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GFX9 -check-prefix=GFX89 %s +; RUN: llc -verify-machineinstrs -march=amdgcn -mcpu=fiji -enable-amdgpu-aa=0 -mattr=+flat-for-global < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=CIVI -check-prefix=VI -check-prefix=GFX89 %s +; RUN: llc -verify-machineinstrs -march=amdgcn -mcpu=hawaii -enable-amdgpu-aa=0 -mattr=+flat-for-global < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=CIVI -check-prefix=CI %s ; GCN-LABEL: {{^}}s_insertelement_v2i16_0: ; GCN: s_load_dword [[VEC:s[0-9]+]] @@ -81,8 +81,9 @@ define amdgpu_kernel void @s_insertelement_v2i16_0_reghi(<2 x i16> addrspace(1)* ; GCN: s_load_dword [[ELT_ARG:s[0-9]+]], s[0:1] ; GCN: s_load_dword [[VEC:s[0-9]+]], -; CIVI-DAG: s_and_b32 [[ELT1:s[0-9]+]], [[VEC]], 0xffff0000{{$}} -; CIVI: s_or_b32 s{{[0-9]+}}, [[ELT0]], [[ELT1]] +; CIVI-DAG: s_lshr_b32 [[ELT1:s[0-9]+]], [[ELT_ARG]], 16 +; CIVI-DAG: s_and_b32 [[ELT0:s[0-9]+]], [[VEC]], 0xffff0000{{$}} +; CIVI: s_or_b32 s{{[0-9]+}}, [[ELT1]], [[ELT0]] ; GFX9: s_lshr_b32 [[ELT1:s[0-9]+]], [[ELT_ARG]], 16 ; GFX9: s_pack_lh_b32_b16 s{{[0-9]+}}, [[ELT1]], [[VEC]] @@ -174,7 +175,7 @@ define amdgpu_kernel void @s_insertelement_v2f16_0(<2 x half> addrspace(1)* %out } ; GCN-LABEL: {{^}}s_insertelement_v2f16_1: -; GFX9: s_load_dword [[VEC:s[0-9]+]] +; GCN: s_load_dword [[VEC:s[0-9]+]] ; GCN-NOT: s_lshr ; CIVI: s_and_b32 [[ELT0:s[0-9]+]], [[VEC]], 0xffff{{$}} diff --git a/llvm/test/CodeGen/AMDGPU/llvm.fma.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.fma.f16.ll index 738f429..bfcce66 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.fma.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.fma.f16.ll @@ -1,5 +1,5 @@ -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=SI %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=VI %s declare half @llvm.fma.f16(half %a, half %b, half %c) declare <2 x half> @llvm.fma.v2f16(<2 x half> %a, <2 x half> %b, <2 x half> %c) @@ -253,7 +253,7 @@ define amdgpu_kernel void @fma_v2f16_imm_b( ; VI-DAG: v_fma_f16 v[[R_F16_1:[0-9]+]], v[[A_F16_1]], v[[B_F16_1]], v[[C_F16]] ; GCN-NOT: and -; GCN: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_0]], v[[R_F16_HI]] +; GCN: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_0]], v[[R_F16_1]] ; GCN: buffer_store_dword v[[R_V2_F16]] ; GCN: s_endpgm define amdgpu_kernel void @fma_v2f16_imm_c( diff --git a/llvm/test/CodeGen/AMDGPU/llvm.fmuladd.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.fmuladd.f16.ll index ebf8baf..478658c 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.fmuladd.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.fmuladd.f16.ll @@ -1,7 +1,7 @@ -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -mattr=-fp64-fp16-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=SI-FLUSH %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -mattr=-fp64-fp16-denormals,-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=VI-FLUSH %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -mattr=+fp64-fp16-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=SI-DENORM %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -mattr=+fp64-fp16-denormals,-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=VI-DENORM %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -mattr=-fp64-fp16-denormals -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=SI -check-prefix=SI-FLUSH %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -mattr=-fp64-fp16-denormals,-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=VI -check-prefix=VI-FLUSH %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -mattr=+fp64-fp16-denormals -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=SI -check-prefix=SI-DENORM %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -mattr=+fp64-fp16-denormals,-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=VI -check-prefix=VI-DENORM %s declare half @llvm.fmuladd.f16(half %a, half %b, half %c) declare <2 x half> @llvm.fmuladd.v2f16(<2 x half> %a, <2 x half> %b, <2 x half> %c) @@ -70,7 +70,7 @@ define amdgpu_kernel void @fmuladd_f16_imm_a( ; GCN: buffer_load_ushort v[[C_F16:[0-9]+]] ; SI: v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], v[[A_F16]] ; SI: v_cvt_f32_f16_e32 v[[C_F32:[0-9]+]], v[[C_F16]] -; SI: v_mac_f32_e32 v[[C_F32]], 0x40400000, v[[B_F32]] +; SI: v_mac_f32_e32 v[[C_F32]], 0x40400000, v[[A_F32]] ; SI: v_cvt_f16_f32_e32 v[[R_F16:[0-9]+]], v[[C_F32]] ; SI: buffer_store_short v[[R_F16]] diff --git a/llvm/test/CodeGen/AMDGPU/llvm.maxnum.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.maxnum.f16.ll index 59a4f61..aa085a7 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.maxnum.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.maxnum.f16.ll @@ -1,5 +1,5 @@ -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=SI %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=VI %s declare half @llvm.maxnum.f16(half %a, half %b) declare <2 x half> @llvm.maxnum.v2f16(<2 x half> %a, <2 x half> %b) @@ -137,8 +137,9 @@ entry: ; SI: v_cvt_f16_f32_e32 v[[R_F16_0:[0-9]+]], v[[R_F32_0]] ; SI: v_max_f32_e32 v[[R_F32_1:[0-9]+]], 0x40400000, v[[A_F32_1]] ; SI: v_cvt_f16_f32_e32 v[[R_F16_1:[0-9]+]], v[[R_F32_1]] + ; VI-DAG: v_mov_b32_e32 [[CONST3:v[0-9]+]], 0x4200 -; VI-DAG: v_max_f16_sdwa v[[R_F16_HI:[0-9]+]], v[[B_V2_F16]], [[CONST3]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; VI-DAG: v_max_f16_sdwa v[[R_F16_HI:[0-9]+]], v[[A_V2_F16]], [[CONST3]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD ; VI-DAG: v_max_f16_e32 v[[R_F16_0:[0-9]+]], 4.0, v[[A_V2_F16]] ; SI-DAG: v_lshlrev_b32_e32 v[[R_F16_HI:[0-9]+]], 16, v[[R_F16_1]] diff --git a/llvm/test/CodeGen/AMDGPU/llvm.minnum.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.minnum.f16.ll index 5fcbd1e..5ab03aa 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.minnum.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.minnum.f16.ll @@ -1,5 +1,5 @@ -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=SI %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=VI %s declare half @llvm.minnum.f16(half %a, half %b) declare <2 x half> @llvm.minnum.v2f16(<2 x half> %a, <2 x half> %b) @@ -139,7 +139,7 @@ entry: ; SI: v_min_f32_e32 v[[R_F32_1:[0-9]+]], 0x40400000, v[[A_F32_1]] ; SI: v_cvt_f16_f32_e32 v[[R_F16_1:[0-9]+]], v[[R_F32_1]] ; VI-DAG: v_mov_b32_e32 [[CONST3:v[0-9]+]], 0x4200 -; VI-DAG: v_min_f16_sdwa v[[R_F16_HI:[0-9]+]], v[[B_V2_F16]], [[CONST3]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; VI-DAG: v_min_f16_sdwa v[[R_F16_HI:[0-9]+]], v[[A_V2_F16]], [[CONST3]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD ; VI-DAG: v_min_f16_e32 v[[R_F16_0:[0-9]+]], 4.0, v[[A_V2_F16]] ; SI-DAG: v_lshlrev_b32_e32 v[[R_F16_HI:[0-9]+]], 16, v[[R_F16_1]] diff --git a/llvm/test/CodeGen/AMDGPU/mad-combine.ll b/llvm/test/CodeGen/AMDGPU/mad-combine.ll index 8a6bf85..0b360f6 100644 --- a/llvm/test/CodeGen/AMDGPU/mad-combine.ll +++ b/llvm/test/CodeGen/AMDGPU/mad-combine.ll @@ -1,12 +1,12 @@ ; Make sure we still form mad even when unsafe math or fp-contract is allowed instead of fma. -; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=SI-STD -check-prefix=SI-STD-SAFE -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs -fp-contract=fast < %s | FileCheck -check-prefix=SI -check-prefix=SI-STD -check-prefix=SI-STD-SAFE -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=SI -check-prefix=SI-STD -check-prefix=SI-STD-UNSAFE -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=SI -check-prefix=SI-STD -check-prefix=SI-STD-SAFE -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs -fp-contract=fast < %s | FileCheck -enable-var-scope -check-prefix=SI -check-prefix=SI-STD -check-prefix=SI-STD-SAFE -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -enable-var-scope -check-prefix=SI -check-prefix=SI-STD -check-prefix=SI-STD-UNSAFE -check-prefix=FUNC %s ; Make sure we don't form mad with denormals -; RUN: llc -march=amdgcn -mcpu=tahiti -mattr=+fp32-denormals -fp-contract=fast -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=SI-DENORM -check-prefix=SI-DENORM-FASTFMAF -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=verde -mattr=+fp32-denormals -fp-contract=fast -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=SI-DENORM -check-prefix=SI-DENORM-SLOWFMAF -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tahiti -mattr=+fp32-denormals -fp-contract=fast -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=SI -check-prefix=SI-DENORM -check-prefix=SI-DENORM-FASTFMAF -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=verde -mattr=+fp32-denormals -fp-contract=fast -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=SI -check-prefix=SI-DENORM -check-prefix=SI-DENORM-SLOWFMAF -check-prefix=FUNC %s declare i32 @llvm.amdgcn.workitem.id.x() #0 declare float @llvm.fabs.f32(float) #0 @@ -231,6 +231,7 @@ define amdgpu_kernel void @combine_to_mad_fsub_1_f32(float addrspace(1)* noalias ; SI-DAG: buffer_load_dword [[A:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} ; SI-DAG: buffer_load_dword [[B:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}} ; SI-DAG: buffer_load_dword [[C:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}} +; SI-DAG: buffer_load_dword [[D:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:12{{$}} ; SI-STD-DAG: v_mad_f32 [[RESULT0:v[0-9]+]], -[[A]], [[B]], [[C]] ; SI-STD-DAG: v_mad_f32 [[RESULT1:v[0-9]+]], -[[A]], [[B]], [[D]] @@ -305,6 +306,7 @@ define amdgpu_kernel void @combine_to_mad_fsub_2_f32(float addrspace(1)* noalias ; SI-DAG: buffer_load_dword [[A:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} ; SI-DAG: buffer_load_dword [[B:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}} ; SI-DAG: buffer_load_dword [[C:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}} +; SI-DAG: buffer_load_dword [[D:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:12{{$}} ; SI-STD-DAG: v_mad_f32 [[RESULT0:v[0-9]+]], [[A]], -[[B]], -[[C]] ; SI-STD-DAG: v_mad_f32 [[RESULT1:v[0-9]+]], [[A]], -[[B]], -[[D]] @@ -348,6 +350,7 @@ define amdgpu_kernel void @combine_to_mad_fsub_2_f32_2uses_neg(float addrspace(1 ; SI-DAG: buffer_load_dword [[A:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} ; SI-DAG: buffer_load_dword [[B:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}} ; SI-DAG: buffer_load_dword [[C:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}} +; SI-DAG: buffer_load_dword [[D:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:12{{$}} ; SI-STD-DAG: v_mad_f32 [[RESULT0:v[0-9]+]], -[[A]], [[B]], -[[C]] ; SI-STD-DAG: v_mad_f32 [[RESULT1:v[0-9]+]], [[A]], [[B]], -[[D]] @@ -401,7 +404,7 @@ define amdgpu_kernel void @combine_to_mad_fsub_2_f32_2uses_mul(float addrspace(1 ; SI-DENORM: v_mul_f32_e32 [[TMP0:v[0-9]+]], [[D]], [[E]] ; SI-DENORM: v_fma_f32 [[TMP1:v[0-9]+]], [[A]], [[B]], [[TMP0]] -; SI-DENORM: v_sub_f32_e32 [[RESULT1:v[0-9]+]], [[TMP1]], [[C]] +; SI-DENORM: v_sub_f32_e32 [[RESULT:v[0-9]+]], [[TMP1]], [[C]] ; SI: buffer_store_dword [[RESULT]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} define amdgpu_kernel void @aggressive_combine_to_mad_fsub_0_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 { diff --git a/llvm/test/CodeGen/AMDGPU/pack.v2i16.ll b/llvm/test/CodeGen/AMDGPU/pack.v2i16.ll index c860bcb..22c72e7 100644 --- a/llvm/test/CodeGen/AMDGPU/pack.v2i16.ll +++ b/llvm/test/CodeGen/AMDGPU/pack.v2i16.ll @@ -1,6 +1,6 @@ -; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx901 -mattr=-flat-for-global,-fp64-fp16-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 %s -; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s -; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=CI %s +; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx901 -mattr=-flat-for-global,-fp64-fp16-denormals -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GFX9 %s +; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=VI %s +; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=CI %s ; GCN-LABEL: {{^}}s_pack_v2i16: @@ -160,7 +160,7 @@ define amdgpu_kernel void @v_pack_v2i16_imm_hi(i32 addrspace(1)* %in0) #0 { ; GCN-LABEL: {{^}}v_pack_v2i16_inline_imm_hi: ; GFX9: global_load_dword [[VAL:v[0-9]+]] -; GFX9: v_lshl_or_b32 [[PACKED:v[0-9]+]], 7, 16, [[VAL0]] +; GFX9: v_lshl_or_b32 [[PACKED:v[0-9]+]], 7, 16, [[VAL]] ; GFX9: ; use [[PACKED]] define amdgpu_kernel void @v_pack_v2i16_inline_imm_hi(i32 addrspace(1)* %in0) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() diff --git a/llvm/test/CodeGen/AMDGPU/sext-in-reg.ll b/llvm/test/CodeGen/AMDGPU/sext-in-reg.ll index f645088..d3f33e8 100644 --- a/llvm/test/CodeGen/AMDGPU/sext-in-reg.ll +++ b/llvm/test/CodeGen/AMDGPU/sext-in-reg.ll @@ -1,8 +1,7 @@ -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=GFX89 -check-prefix=FUNC %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=gfx901 -mattr=-flat-for-global -verify-machineinstrs -enable-packed-inlinable-literals < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 -check-prefix=GFX89 -check-prefix=FUNC %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=r600 -mtriple=r600---amdgiz -mcpu=cypress < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s -target datalayout = "A5" +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=VI -check-prefix=GFX89 -check-prefix=FUNC %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=gfx901 -mattr=-flat-for-global -verify-machineinstrs -enable-packed-inlinable-literals < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GFX9 -check-prefix=GFX89 -check-prefix=FUNC %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=r600 -mtriple=r600---amdgiz -mcpu=cypress < %s | FileCheck -enable-var-scope -check-prefix=EG -check-prefix=FUNC %s ; FIXME: i16 promotion pass ruins the scalar cases when legal. ; FIXME: r600 fails verifier @@ -621,7 +620,7 @@ define amdgpu_kernel void @s_sext_in_reg_i2_i16_arg(i16 addrspace(1)* %out, i16 ; SI: s_sext_i32_i8 [[SSEXT:s[0-9]+]], [[VAL]] ; SI: v_mov_b32_e32 [[VSEXT:v[0-9]+]], [[SSEXT]] -; SI: buffer_store_short [[VBFE]] +; SI: buffer_store_short [[VSEXT]] ; GFX89: s_lshl_b32 s{{[0-9]+}}, s{{[0-9]+}}, 8{{$}} ; GFX89: s_sext_i32_i16 s{{[0-9]+}}, s{{[0-9]+}} diff --git a/llvm/test/CodeGen/AMDGPU/shift-and-i64-ubfe.ll b/llvm/test/CodeGen/AMDGPU/shift-and-i64-ubfe.ll index dae22ec..26e6a3a 100644 --- a/llvm/test/CodeGen/AMDGPU/shift-and-i64-ubfe.ll +++ b/llvm/test/CodeGen/AMDGPU/shift-and-i64-ubfe.ll @@ -1,4 +1,4 @@ -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s ; FIXME: Fails with -enable-var-scope ; Make sure 64-bit BFE pattern does a 32-bit BFE on the relevant half. @@ -92,7 +92,7 @@ define amdgpu_kernel void @v_uextract_bit_32_i64(i64 addrspace(1)* %out, i64 add ; GCN: buffer_load_dword [[VAL:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}} ; GCN-DAG: v_bfe_u32 v[[BFE:[0-9]+]], [[VAL]], 1, 1{{$}} ; GCN-DAG: v_mov_b32_e32 v[[ZERO1:[0-9]+]], v[[ZERO]] -; GCN: buffer_store_dwordx2 v{{\[}}[[SHIFT]]:[[ZERO1]]{{\]}} +; GCN: buffer_store_dwordx2 v{{\[}}[[BFE]]:[[ZERO1]]{{\]}} define amdgpu_kernel void @v_uextract_bit_33_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 { %id.x = tail call i32 @llvm.amdgcn.workitem.id.x() %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x @@ -211,7 +211,7 @@ define amdgpu_kernel void @v_uextract_bit_30_60_i64(i64 addrspace(1)* %out, i64 ; GCN: buffer_load_dword [[VAL:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}} ; GCN-DAG: v_bfe_u32 v[[BFE:[0-9]+]], [[VAL]], 1, 30 ; GCN-DAG: v_mov_b32_e32 v[[ZERO1:[0-9]+]], v[[ZERO]] -; GCN: buffer_store_dwordx2 v{{\[}}[[SHIFT]]:[[ZERO1]]{{\]}} +; GCN: buffer_store_dwordx2 v{{\[}}[[BFE]]:[[ZERO1]]{{\]}} define amdgpu_kernel void @v_uextract_bit_33_63_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 { %id.x = tail call i32 @llvm.amdgcn.workitem.id.x() %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x diff --git a/llvm/test/CodeGen/AMDGPU/simplify-libcalls.ll b/llvm/test/CodeGen/AMDGPU/simplify-libcalls.ll index aa6c183..47e7d4c 100644 --- a/llvm/test/CodeGen/AMDGPU/simplify-libcalls.ll +++ b/llvm/test/CodeGen/AMDGPU/simplify-libcalls.ll @@ -1,6 +1,6 @@ -; RUN: opt -S -O1 -mtriple=amdgcn---amdgiz -amdgpu-simplify-libcall <%s | FileCheck -check-prefix=GCN -check-prefix=GCN-POSTLINK %s -; RUN: opt -S -O1 -mtriple=amdgcn---amdgiz -amdgpu-simplify-libcall -amdgpu-prelink <%s | FileCheck -check-prefix=GCN -check-prefix=GCN-PRELINK %s -; RUN: opt -S -O1 -mtriple=amdgcn---amdgiz -amdgpu-use-native -amdgpu-prelink <%s | FileCheck -check-prefix=GCN -check-prefix=GCN-NATIVE %s +; RUN: opt -S -O1 -mtriple=amdgcn---amdgiz -amdgpu-simplify-libcall < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GCN-POSTLINK %s +; RUN: opt -S -O1 -mtriple=amdgcn---amdgiz -amdgpu-simplify-libcall -amdgpu-prelink <%s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GCN-PRELINK %s +; RUN: opt -S -O1 -mtriple=amdgcn---amdgiz -amdgpu-use-native -amdgpu-prelink < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GCN-NATIVE %s ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_sincos ; GCN-POSTLINK: tail call fast float @_Z3sinf( @@ -697,8 +697,8 @@ declare float @_Z6sincosfPf(float, float*) %opencl.reserve_id_t = type opaque ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_read_pipe(%opencl.pipe_t addrspace(1)* %p, i32 addrspace(1)* %ptr) -; GCN-PRELINK: call i32 @__read_pipe_2_4(%opencl.pipe_t addrspace(1)* %{{.*}}, i32* %{{.*}}) #[[NOUNWIND:[0-9]+]] -; GCN-PRELINK: call i32 @__read_pipe_4_4(%opencl.pipe_t addrspace(1)* %{{.*}}, %opencl.reserve_id_t addrspace(5)* %{{.*}}, i32 2, i32* %{{.*}}) #[[NOUNWIND]] +; GCN-PRELINK: call i32 @__read_pipe_2_4(%opencl.pipe_t addrspace(1)* %{{.*}}, i32* %{{.*}}) #[[$NOUNWIND:[0-9]+]] +; GCN-PRELINK: call i32 @__read_pipe_4_4(%opencl.pipe_t addrspace(1)* %{{.*}}, %opencl.reserve_id_t addrspace(5)* %{{.*}}, i32 2, i32* %{{.*}}) #[[$NOUNWIND]] define amdgpu_kernel void @test_read_pipe(%opencl.pipe_t addrspace(1)* %p, i32 addrspace(1)* %ptr) local_unnamed_addr { entry: %tmp = bitcast i32 addrspace(1)* %ptr to i8 addrspace(1)* @@ -719,8 +719,8 @@ declare i32 @__read_pipe_4(%opencl.pipe_t addrspace(1)*, %opencl.reserve_id_t ad declare void @__commit_read_pipe(%opencl.pipe_t addrspace(1)*, %opencl.reserve_id_t addrspace(5)*, i32, i32) ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_write_pipe(%opencl.pipe_t addrspace(1)* %p, i32 addrspace(1)* %ptr) -; GCN-PRELINK: call i32 @__write_pipe_2_4(%opencl.pipe_t addrspace(1)* %{{.*}}, i32* %{{.*}}) #[[NOUNWIND]] -; GCN-PRELINK: call i32 @__write_pipe_4_4(%opencl.pipe_t addrspace(1)* %{{.*}}, %opencl.reserve_id_t addrspace(5)* %{{.*}}, i32 2, i32* %{{.*}}) #[[NOUNWIND]] +; GCN-PRELINK: call i32 @__write_pipe_2_4(%opencl.pipe_t addrspace(1)* %{{.*}}, i32* %{{.*}}) #[[$NOUNWIND]] +; GCN-PRELINK: call i32 @__write_pipe_4_4(%opencl.pipe_t addrspace(1)* %{{.*}}, %opencl.reserve_id_t addrspace(5)* %{{.*}}, i32 2, i32* %{{.*}}) #[[$NOUNWIND]] define amdgpu_kernel void @test_write_pipe(%opencl.pipe_t addrspace(1)* %p, i32 addrspace(1)* %ptr) local_unnamed_addr { entry: %tmp = bitcast i32 addrspace(1)* %ptr to i8 addrspace(1)* @@ -743,15 +743,15 @@ declare void @__commit_write_pipe(%opencl.pipe_t addrspace(1)*, %opencl.reserve_ %struct.S = type { [100 x i32] } ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_pipe_size -; GCN-PRELINK: call i32 @__read_pipe_2_1(%opencl.pipe_t addrspace(1)* %{{.*}} i8* %{{.*}}) #[[NOUNWIND]] -; GCN-PRELINK: call i32 @__read_pipe_2_2(%opencl.pipe_t addrspace(1)* %{{.*}} i16* %{{.*}}) #[[NOUNWIND]] -; GCN-PRELINK: call i32 @__read_pipe_2_4(%opencl.pipe_t addrspace(1)* %{{.*}} i32* %{{.*}}) #[[NOUNWIND]] -; GCN-PRELINK: call i32 @__read_pipe_2_8(%opencl.pipe_t addrspace(1)* %{{.*}} i64* %{{.*}}) #[[NOUNWIND]] -; GCN-PRELINK: call i32 @__read_pipe_2_16(%opencl.pipe_t addrspace(1)* %{{.*}}, <2 x i64>* %{{.*}}) #[[NOUNWIND]] -; GCN-PRELINK: call i32 @__read_pipe_2_32(%opencl.pipe_t addrspace(1)* %{{.*}}, <4 x i64>* %{{.*}} #[[NOUNWIND]] -; GCN-PRELINK: call i32 @__read_pipe_2_64(%opencl.pipe_t addrspace(1)* %{{.*}}, <8 x i64>* %{{.*}} #[[NOUNWIND]] -; GCN-PRELINK: call i32 @__read_pipe_2_128(%opencl.pipe_t addrspace(1)* %{{.*}}, <16 x i64>* %{{.*}} #[[NOUNWIND]] -; GCN-PRELINK: call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %{{.*}}, i8* %{{.*}} i32 400, i32 4) #[[NOUNWIND]] +; GCN-PRELINK: call i32 @__read_pipe_2_1(%opencl.pipe_t addrspace(1)* %{{.*}} i8* %{{.*}}) #[[$NOUNWIND]] +; GCN-PRELINK: call i32 @__read_pipe_2_2(%opencl.pipe_t addrspace(1)* %{{.*}} i16* %{{.*}}) #[[$NOUNWIND]] +; GCN-PRELINK: call i32 @__read_pipe_2_4(%opencl.pipe_t addrspace(1)* %{{.*}} i32* %{{.*}}) #[[$NOUNWIND]] +; GCN-PRELINK: call i32 @__read_pipe_2_8(%opencl.pipe_t addrspace(1)* %{{.*}} i64* %{{.*}}) #[[$NOUNWIND]] +; GCN-PRELINK: call i32 @__read_pipe_2_16(%opencl.pipe_t addrspace(1)* %{{.*}}, <2 x i64>* %{{.*}}) #[[$NOUNWIND]] +; GCN-PRELINK: call i32 @__read_pipe_2_32(%opencl.pipe_t addrspace(1)* %{{.*}}, <4 x i64>* %{{.*}} #[[$NOUNWIND]] +; GCN-PRELINK: call i32 @__read_pipe_2_64(%opencl.pipe_t addrspace(1)* %{{.*}}, <8 x i64>* %{{.*}} #[[$NOUNWIND]] +; GCN-PRELINK: call i32 @__read_pipe_2_128(%opencl.pipe_t addrspace(1)* %{{.*}}, <16 x i64>* %{{.*}} #[[$NOUNWIND]] +; GCN-PRELINK: call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %{{.*}}, i8* %{{.*}} i32 400, i32 4) #[[$NOUNWIND]] define amdgpu_kernel void @test_pipe_size(%opencl.pipe_t addrspace(1)* %p1, i8 addrspace(1)* %ptr1, %opencl.pipe_t addrspace(1)* %p2, i16 addrspace(1)* %ptr2, %opencl.pipe_t addrspace(1)* %p4, i32 addrspace(1)* %ptr4, %opencl.pipe_t addrspace(1)* %p8, i64 addrspace(1)* %ptr8, %opencl.pipe_t addrspace(1)* %p16, <2 x i64> addrspace(1)* %ptr16, %opencl.pipe_t addrspace(1)* %p32, <4 x i64> addrspace(1)* %ptr32, %opencl.pipe_t addrspace(1)* %p64, <8 x i64> addrspace(1)* %ptr64, %opencl.pipe_t addrspace(1)* %p128, <16 x i64> addrspace(1)* %ptr128, %opencl.pipe_t addrspace(1)* %pu, %struct.S addrspace(1)* %ptru) local_unnamed_addr #0 { entry: %tmp = addrspacecast i8 addrspace(1)* %ptr1 to i8* @@ -783,5 +783,5 @@ entry: ret void } -; CGN-PRELINK: attributes #[[NOUNWIND]] = { nounwind } +; CGN-PRELINK: attributes #[[$NOUNWIND]] = { nounwind } attributes #0 = { nounwind } diff --git a/llvm/test/CodeGen/AMDGPU/trunc-cmp-constant.ll b/llvm/test/CodeGen/AMDGPU/trunc-cmp-constant.ll index cb8d365..e1644e4 100644 --- a/llvm/test/CodeGen/AMDGPU/trunc-cmp-constant.ll +++ b/llvm/test/CodeGen/AMDGPU/trunc-cmp-constant.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=SI -check-prefix=FUNC %s declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone @@ -94,7 +94,7 @@ define amdgpu_kernel void @sextload_i1_to_i32_trunc_cmp_ne_0(i1 addrspace(1)* %o ; FUNC-LABEL: {{^}}zextload_i1_to_i32_trunc_cmp_ne_0: ; SI: buffer_load_ubyte [[LOAD:v[0-9]+]] -; SI: v_and_b32_e32 [[TMP:v[0-9]+]], 1, [[LOAD]] +; SI: v_and_b32_e32 [[RESULT:v[0-9]+]], 1, [[LOAD]] ; SI: buffer_store_byte [[RESULT]] define amdgpu_kernel void @zextload_i1_to_i32_trunc_cmp_ne_0(i1 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind { %load = load i1, i1 addrspace(1)* %in diff --git a/llvm/test/CodeGen/AMDGPU/uniform-cfg.ll b/llvm/test/CodeGen/AMDGPU/uniform-cfg.ll index a87e738..42ac2a0 100644 --- a/llvm/test/CodeGen/AMDGPU/uniform-cfg.ll +++ b/llvm/test/CodeGen/AMDGPU/uniform-cfg.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -mcpu=verde -amdgpu-early-ifcvt=0 -machine-sink-split-probability-threshold=0 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -amdgpu-early-ifcvt=0 -machine-sink-split-probability-threshold=0 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s +; RUN: llc -march=amdgcn -mcpu=verde -amdgpu-early-ifcvt=0 -machine-sink-split-probability-threshold=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=SI %s +; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -amdgpu-early-ifcvt=0 -machine-sink-split-probability-threshold=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=VI %s ; GCN-LABEL: {{^}}uniform_if_scc: ; GCN-DAG: s_cmp_eq_u32 s{{[0-9]+}}, 0 @@ -502,7 +502,7 @@ done: ; GCN: s_mov_b32 [[S_VAL]], 1 ; GCN: [[IF_LABEL]]: -; GCN: v_mov_b32_e32 [[V_VAL]], [[S_VAL]] +; GCN: v_mov_b32_e32 [[V_VAL:v[0-9]+]], [[S_VAL]] ; GCN: buffer_store_dword [[V_VAL]] define amdgpu_kernel void @uniform_if_scc_i64_sgt(i64 %cond, i32 addrspace(1)* %out) { entry: diff --git a/llvm/test/CodeGen/AMDGPU/v_cndmask.ll b/llvm/test/CodeGen/AMDGPU/v_cndmask.ll index 4fe30ae..bd7738e 100644 --- a/llvm/test/CodeGen/AMDGPU/v_cndmask.ll +++ b/llvm/test/CodeGen/AMDGPU/v_cndmask.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s +; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=SI %s +; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=VI %s declare i32 @llvm.amdgcn.workitem.id.x() #1 @@ -60,8 +60,8 @@ define amdgpu_kernel void @fcmp_sgprX_k0_select_k1_sgprZ_f32(float addrspace(1)* ; GCN-LABEL: {{^}}fcmp_sgprX_k0_select_k1_sgprX_f32: ; GCN: s_load_dword [[X:s[0-9]+]] ; GCN-DAG: v_cmp_nlg_f32_e64 vcc, [[X]], 0 -; GCN-DAG: v_mov_b32_e32 [[VZ:v[0-9]+]], [[Z]] -; GCN: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, [[VZ]], vcc +; GCN-DAG: v_mov_b32_e32 [[VX:v[0-9]+]], [[X]] +; GCN: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, [[VX]], vcc define amdgpu_kernel void @fcmp_sgprX_k0_select_k1_sgprX_f32(float addrspace(1)* %out, float %x) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 %tid.ext = sext i32 %tid to i64 @@ -73,7 +73,8 @@ define amdgpu_kernel void @fcmp_sgprX_k0_select_k1_sgprX_f32(float addrspace(1)* } ; GCN-LABEL: {{^}}fcmp_sgprX_k0_select_k0_sgprZ_f32: -; GCN: s_load_dword [[X:s[0-9]+]] +; GCN-DAG: s_load_dword [[X:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, {{0xb|0x2c}} +; GCN-DAG: s_load_dword [[Z:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, {{0xc|0x30}} ; GCN-DAG: v_cmp_nlg_f32_e64 vcc, [[X]], 0 ; GCN-DAG: v_mov_b32_e32 [[VZ:v[0-9]+]], [[Z]] ; GCN: v_cndmask_b32_e32 v{{[0-9]+}}, 0, [[VZ]], vcc @@ -90,8 +91,8 @@ define amdgpu_kernel void @fcmp_sgprX_k0_select_k0_sgprZ_f32(float addrspace(1)* ; GCN-LABEL: {{^}}fcmp_sgprX_k0_select_k0_sgprX_f32: ; GCN: s_load_dword [[X:s[0-9]+]] ; GCN-DAG: v_cmp_nlg_f32_e64 vcc, [[X]], 0 -; GCN-DAG: v_mov_b32_e32 [[VZ:v[0-9]+]], [[Z]] -; GCN: v_cndmask_b32_e32 v{{[0-9]+}}, 0, [[VZ]], vcc +; GCN-DAG: v_mov_b32_e32 [[VX:v[0-9]+]], [[X]] +; GCN: v_cndmask_b32_e32 v{{[0-9]+}}, 0, [[VX]], vcc define amdgpu_kernel void @fcmp_sgprX_k0_select_k0_sgprX_f32(float addrspace(1)* %out, float %x) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 %tid.ext = sext i32 %tid to i64 -- 2.7.4