From: Stanislav Mekhanoshin Date: Wed, 13 Nov 2019 23:58:49 +0000 (-0800) Subject: [AMDGPU] Fixed mfma-loop test. NFC. X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=af7d4022c77d851e9569ec3ded6038616a6622d0;p=platform%2Fupstream%2Fllvm.git [AMDGPU] Fixed mfma-loop test. NFC. --- diff --git a/llvm/test/CodeGen/AMDGPU/mfma-loop.ll b/llvm/test/CodeGen/AMDGPU/mfma-loop.ll index 57dd313..b66c9d4 100644 --- a/llvm/test/CodeGen/AMDGPU/mfma-loop.ll +++ b/llvm/test/CodeGen/AMDGPU/mfma-loop.ll @@ -2,7 +2,7 @@ ; GCN-LABEL: {{^}}test_mfma_loop_zeroinit: -; GCN-COUNT32: v_accvgpr_write_b32 a{{[0-9]+}}, 0{{$}} +; GCN-COUNT-32: v_accvgpr_write_b32 a{{[0-9]+}}, 0{{$}} ; Check that we do not copy agprs to vgprs and back inside the loop. @@ -14,7 +14,7 @@ ; Final result should be read only once after the loop. -; GCN-COUNT32: v_accvgpr_read_b32 +; GCN-COUNT-32: v_accvgpr_read_b32 define amdgpu_kernel void @test_mfma_loop_zeroinit(<32 x float> addrspace(1)* %arg) { entry: @@ -40,7 +40,7 @@ exit: ; Check that we do not use 32 temp sgprs as well. ; GCN: v_mov_b32_e32 [[TMP:v[0-9]+]], 0x42f60000 -; GCN-COUNT32: v_accvgpr_write_b32 a0, [[TMP]] +; GCN-COUNT-32: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP]] ; GCN: [[LOOP:BB[0-9_]+]]: ; GCN-NOT: v_accvgpr @@ -48,7 +48,7 @@ exit: ; GCN-NOT: v_accvgpr ; GCN: s_cbranch_scc1 [[LOOP]] -; GCN-COUNT32: v_accvgpr_read_b32 +; GCN-COUNT-32: v_accvgpr_read_b32 define amdgpu_kernel void @test_mfma_loop_unfoldable_splat(<32 x float> addrspace(1)* %arg) { entry: @@ -71,7 +71,7 @@ exit: ; GCN: v_accvgpr_write_b32 a{{[0-9]+}}, 0{{$}} ; GCN: v_accvgpr_write_b32 a{{[0-9]+}}, 1.0{{$}} -; GCN-COUNT30: v_accvgpr_write_b32 a{{[0-9]+}}, 0{{$}} +; GCN-COUNT-30: v_accvgpr_write_b32 a{{[0-9]+}}, 0{{$}} ; GCN: [[LOOP:BB[0-9_]+]]: ; GCN-NOT: v_accvgpr @@ -79,7 +79,7 @@ exit: ; GCN-NOT: v_accvgpr ; GCN: s_cbranch_scc1 [[LOOP]] -; GCN-COUNT32: v_accvgpr_read_b32 +; GCN-COUNT-32: v_accvgpr_read_b32 define amdgpu_kernel void @test_mfma_loop_non_splat(<32 x float> addrspace(1)* %arg) { entry: @@ -106,10 +106,67 @@ exit: ; GCN: v_mov_b32_e32 [[TMP1:v[0-9]+]], 0x42f60000 ; GCN: v_mov_b32_e32 [[TMP2:v[0-9]+]], 0x42f80000 ; GCN: v_mov_b32_e32 [[TMP3:v[0-9]+]], 0x42fe0000 -; GCN-COUNT29: v_mov_b32_e32 v1, 0x4{{[0-9a-f]+}} -; GCN-COUNT10: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP1]] -; GCN-COUNT11: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP2]] -; GCN-COUNT11: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP3]] +; GCN: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP1]] +; GCN: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP2]] +; GCN: v_mov_b32_e32 [[TMP1]], 0x4{{[0-9a-f]+}} +; GCN: v_mov_b32_e32 [[TMP2]], 0x4{{[0-9a-f]+}} +; GCN: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP3]] +; GCN: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP1]] +; GCN: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP2]] +; GCN: v_mov_b32_e32 [[TMP1]], 0x4{{[0-9a-f]+}} +; GCN: v_mov_b32_e32 [[TMP2]], 0x4{{[0-9a-f]+}} +; GCN: v_mov_b32_e32 [[TMP3]], 0x4{{[0-9a-f]+}} +; GCN: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP1]] +; GCN: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP2]] +; GCN: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP3]] +; GCN: v_mov_b32_e32 [[TMP1]], 0x4{{[0-9a-f]+}} +; GCN: v_mov_b32_e32 [[TMP2]], 0x4{{[0-9a-f]+}} +; GCN: v_mov_b32_e32 [[TMP3]], 0x4{{[0-9a-f]+}} +; GCN: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP1]] +; GCN: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP2]] +; GCN: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP3]] +; GCN: v_mov_b32_e32 [[TMP1]], 0x4{{[0-9a-f]+}} +; GCN: v_mov_b32_e32 [[TMP2]], 0x4{{[0-9a-f]+}} +; GCN: v_mov_b32_e32 [[TMP3]], 0x4{{[0-9a-f]+}} +; GCN: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP1]] +; GCN: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP2]] +; GCN: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP3]] +; GCN: v_mov_b32_e32 [[TMP1]], 0x4{{[0-9a-f]+}} +; GCN: v_mov_b32_e32 [[TMP2]], 0x4{{[0-9a-f]+}} +; GCN: v_mov_b32_e32 [[TMP3]], 0x4{{[0-9a-f]+}} +; GCN: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP1]] +; GCN: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP2]] +; GCN: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP3]] +; GCN: v_mov_b32_e32 [[TMP1]], 0x4{{[0-9a-f]+}} +; GCN: v_mov_b32_e32 [[TMP2]], 0x4{{[0-9a-f]+}} +; GCN: v_mov_b32_e32 [[TMP3]], 0x4{{[0-9a-f]+}} +; GCN: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP1]] +; GCN: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP2]] +; GCN: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP3]] +; GCN: v_mov_b32_e32 [[TMP1]], 0x4{{[0-9a-f]+}} +; GCN: v_mov_b32_e32 [[TMP2]], 0x4{{[0-9a-f]+}} +; GCN: v_mov_b32_e32 [[TMP3]], 0x4{{[0-9a-f]+}} +; GCN: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP1]] +; GCN: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP2]] +; GCN: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP3]] +; GCN: v_mov_b32_e32 [[TMP1]], 0x4{{[0-9a-f]+}} +; GCN: v_mov_b32_e32 [[TMP2]], 0x4{{[0-9a-f]+}} +; GCN: v_mov_b32_e32 [[TMP3]], 0x4{{[0-9a-f]+}} +; GCN: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP1]] +; GCN: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP2]] +; GCN: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP3]] +; GCN: v_mov_b32_e32 [[TMP1]], 0x4{{[0-9a-f]+}} +; GCN: v_mov_b32_e32 [[TMP2]], 0x4{{[0-9a-f]+}} +; GCN: v_mov_b32_e32 [[TMP3]], 0x4{{[0-9a-f]+}} +; GCN: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP1]] +; GCN: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP2]] +; GCN: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP3]] +; GCN: v_mov_b32_e32 [[TMP1]], 0x4{{[0-9a-f]+}} +; GCN: v_mov_b32_e32 [[TMP2]], 0x4{{[0-9a-f]+}} +; GCN: v_mov_b32_e32 [[TMP3]], 0x4{{[0-9a-f]+}} +; GCN: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP1]] +; GCN: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP2]] +; GCN: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP3]] ; GCN: [[LOOP:BB[0-9_]+]]: ; GCN-NOT: v_accvgpr @@ -117,7 +174,7 @@ exit: ; GCN-NOT: v_accvgpr ; GCN: s_cbranch_scc1 [[LOOP]] -; GCN-COUNT32: v_accvgpr_read_b32 +; GCN-COUNT-32: v_accvgpr_read_b32 define amdgpu_kernel void @test_mfma_loop_unfoldable_seq(<32 x float> addrspace(1)* %arg) { entry: @@ -138,7 +195,7 @@ exit: ; GCN-LABEL: {{^}}test_mfma_loop_vgpr_init: -; GCN-COUNT32: v_accvgpr_write_b32 a{{[0-9]+}}, v0{{$}} +; GCN-COUNT-32: v_accvgpr_write_b32 a{{[0-9]+}}, v0{{$}} ; GCN: [[LOOP:BB[0-9_]+]]: ; GCN-NOT: v_accvgpr @@ -146,7 +203,7 @@ exit: ; GCN-NOT: v_accvgpr ; GCN: s_cbranch_scc1 [[LOOP]] -; GCN-COUNT32: v_accvgpr_read_b32 +; GCN-COUNT-32: v_accvgpr_read_b32 define amdgpu_kernel void @test_mfma_loop_vgpr_init(<32 x float> addrspace(1)* %arg) { entry: @@ -203,7 +260,7 @@ exit: ; GCN-LABEL: {{^}}test_mfma_loop_sgpr_init: ; GCN: v_mov_b32_e32 [[TMP:v[0-9]+]], s{{[0-9]+}} -; GCN-COUNT32: v_accvgpr_write_b32 a0, [[TMP]] +; GCN-COUNT-32: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP]] ; GCN: [[LOOP:BB[0-9_]+]]: ; GCN-NOT: v_accvgpr @@ -211,7 +268,7 @@ exit: ; GCN-NOT: v_accvgpr ; GCN: s_cbranch_scc1 [[LOOP]] -; GCN-COUNT32: v_accvgpr_read_b32 +; GCN-COUNT-32: v_accvgpr_read_b32 define amdgpu_kernel void @test_mfma_loop_sgpr_init(<32 x float> addrspace(1)* %arg, float %init) { entry: @@ -265,11 +322,39 @@ exit: ; GCN-LABEL: {{^}}test_mfma_loop_mixed_init: -; GCN: v_accvgpr_write_b32 a{{[0-9]+}}, v0 -; GCN: v_mov_b32_e32 [[TMP:v[0-9]+]], s{{[0-9]+}} -; GCN: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP]] - -; GCN-COUNT30: v_accvgpr_write_b32 a{{[0-9]+}}, 0{{$}} +; GCN-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v0 +; GCN-DAG: v_mov_b32_e32 [[TMP:v[0-9]+]], s{{[0-9]+}} +; GCN-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP]] +; GCN-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, 0{{$}} +; GCN-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, 0{{$}} +; GCN-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, 0{{$}} +; GCN-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, 0{{$}} +; GCN-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, 0{{$}} +; GCN-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, 0{{$}} +; GCN-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, 0{{$}} +; GCN-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, 0{{$}} +; GCN-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, 0{{$}} +; GCN-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, 0{{$}} +; GCN-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, 0{{$}} +; GCN-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, 0{{$}} +; GCN-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, 0{{$}} +; GCN-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, 0{{$}} +; GCN-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, 0{{$}} +; GCN-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, 0{{$}} +; GCN-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, 0{{$}} +; GCN-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, 0{{$}} +; GCN-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, 0{{$}} +; GCN-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, 0{{$}} +; GCN-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, 0{{$}} +; GCN-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, 0{{$}} +; GCN-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, 0{{$}} +; GCN-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, 0{{$}} +; GCN-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, 0{{$}} +; GCN-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, 0{{$}} +; GCN-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, 0{{$}} +; GCN-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, 0{{$}} +; GCN-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, 0{{$}} +; GCN-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, 0{{$}} ; GCN: [[LOOP:BB[0-9_]+]]: ; GCN-NOT: v_accvgpr @@ -277,7 +362,7 @@ exit: ; GCN-NOT: v_accvgpr ; GCN: s_cbranch_scc1 [[LOOP]] -; GCN-COUNT32: v_accvgpr_read_b32 +; GCN-COUNT-32: v_accvgpr_read_b32 define amdgpu_kernel void @test_mfma_loop_mixed_init(<32 x float> addrspace(1)* %arg, float %x) { entry: @@ -303,7 +388,7 @@ exit: ; GCN-LABEL: {{^}}test_mfma_loop_mfma_forward_init: -; GCN-COUNT32: v_accvgpr_write_b32 a{{[0-9]+}}, 0 +; GCN-COUNT-32: v_accvgpr_write_b32 a{{[0-9]+}}, 0 ; GCN: v_mfma_f32_32x32x1f32 ; GCN-NOT: v_accvgpr @@ -313,7 +398,7 @@ exit: ; GCN-NOT: v_accvgpr ; GCN: s_cbranch_scc1 [[LOOP]] -; GCN-COUNT32: v_accvgpr_read_b32 +; GCN-COUNT-32: v_accvgpr_read_b32 define amdgpu_kernel void @test_mfma_loop_mfma_forward_init(<32 x float> addrspace(1)* %arg) { entry: %mai.0 = tail call <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float 1.0, float 2.0, <32 x float> zeroinitializer, i32 0, i32 0, i32 0) @@ -335,13 +420,13 @@ exit: ; GCN-LABEL: {{^}}test_mfma_loop_agpr_init: -; GCN-COUNT32: v_accvgpr_write_b32 a{{[0-9]+}}, 0 +; GCN-COUNT-32: v_accvgpr_write_b32 a{{[0-9]+}}, 0 ; GCN: v_mfma_f32_32x32x1f32 ; Check that we are using only one tmp VGPR. ; GCN: v_accvgpr_read_b32 [[TMP:v[0-9]+]], a{{[0-9]+}} -; GCN-COUNT32: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP]]{{$}} +; GCN-COUNT-31: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP]]{{$}} ; GCN: [[LOOP:BB[0-9_]+]]: ; GCN-NOT: v_accvgpr @@ -349,7 +434,7 @@ exit: ; GCN-NOT: v_accvgpr ; GCN: s_cbranch_scc1 [[LOOP]] -; GCN-COUNT32: v_accvgpr_read_b32 +; GCN-COUNT-32: v_accvgpr_read_b32 define amdgpu_kernel void @test_mfma_loop_agpr_init(<32 x float> addrspace(1)* %arg) { entry: %mai.0 = tail call <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float 1.0, float 2.0, <32 x float> zeroinitializer, i32 0, i32 0, i32 0)