-; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -strict-whitespace -check-prefix=GCN %s
-; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -strict-whitespace -check-prefixes=GCN,GFX10 %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -strict-whitespace -check-prefixes=GCN,PREGFX11 %s
+; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -strict-whitespace -check-prefixes=GCN,GFX10,PREGFX11 %s
+; RUN: llc -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -strict-whitespace -check-prefixes=GCN,GFX11 %s
declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #1
declare void @llvm.amdgcn.exp.i32(i32, i32, i32, i32, i32, i32, i1, i1) #1
}
; GCN-LABEL: {{^}}test_export_null_f32:
-; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1.0
-; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2.0
-; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 0.5
-; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4.0
-; GCN: exp null [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}}
-; GCN: exp null [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}}
+; PREGFX11-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1.0
+; PREGFX11-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2.0
+; PREGFX11-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 0.5
+; PREGFX11-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4.0
+; PREGFX11: exp null [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}}
+; PREGFX11: exp null [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}}
define amdgpu_kernel void @test_export_null_f32() #0 {
call void @llvm.amdgcn.exp.f32(i32 9, i32 15, float 1.0, float 2.0, float 0.5, float 4.0, i1 false, i1 false)
call void @llvm.amdgcn.exp.f32(i32 9, i32 15, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
}
; GCN-LABEL: {{^}}test_export_param0_f32:
-; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1.0
-; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2.0
-; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 0.5
-; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4.0
-; GCN: exp param0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}}
-; GCN: exp param0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}}
+; PREGFX11-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1.0
+; PREGFX11-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2.0
+; PREGFX11-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 0.5
+; PREGFX11-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4.0
+; PREGFX11: exp param0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}}
+; PREGFX11: exp param0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}}
define amdgpu_kernel void @test_export_param0_f32() #0 {
call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float 1.0, float 2.0, float 0.5, float 4.0, i1 false, i1 false)
call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
}
; GCN-LABEL: {{^}}test_export_param31_f32:
-; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1.0
-; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2.0
-; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 0.5
-; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4.0
-; GCN: exp param31 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}}
-; GCN: exp param31 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}}
+; PREGFX11-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1.0
+; PREGFX11-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2.0
+; PREGFX11-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 0.5
+; PREGFX11-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4.0
+; PREGFX11: exp param31 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}}
+; PREGFX11: exp param31 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}}
define amdgpu_kernel void @test_export_param31_f32() #0 {
call void @llvm.amdgcn.exp.f32(i32 63, i32 15, float 1.0, float 2.0, float 0.5, float 4.0, i1 false, i1 false)
call void @llvm.amdgcn.exp.f32(i32 63, i32 15, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2.0
; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 0.5
; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4.0
-; GCN: exp mrt0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] vm{{$}}
-; GCN: exp mrt0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done vm{{$}}
+; PREGFX11: exp mrt0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] vm{{$}}
+; PREGFX11: exp mrt0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done vm{{$}}
+; GFX11: exp mrt0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}}
+; GFX11: exp mrt0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}}
define amdgpu_kernel void @test_export_vm_f32() #0 {
call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float 1.0, float 2.0, float 0.5, float 4.0, i1 false, i1 true)
call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 true)
}
; GCN-LABEL: {{^}}test_export_null_i32:
-; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1
-; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2
-; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 5
-; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4
-; GCN: exp null [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}}
-; GCN: exp null [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}}
+; PREGFX11-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1
+; PREGFX11-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2
+; PREGFX11-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 5
+; PREGFX11-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4
+; PREGFX11: exp null [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}}
+; PREGFX11: exp null [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}}
define amdgpu_kernel void @test_export_null_i32() #0 {
call void @llvm.amdgcn.exp.i32(i32 9, i32 15, i32 1, i32 2, i32 5, i32 4, i1 false, i1 false)
call void @llvm.amdgcn.exp.i32(i32 9, i32 15, i32 1, i32 2, i32 5, i32 4, i1 true, i1 false)
}
; GCN-LABEL: {{^}}test_export_param0_i32:
-; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1
-; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2
-; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 5
-; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4
-; GCN: exp param0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}}
-; GCN: exp param0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}}
+; PREGFX11-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1
+; PREGFX11-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2
+; PREGFX11-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 5
+; PREGFX11-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4
+; PREGFX11: exp param0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}}
+; PREGFX11: exp param0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}}
define amdgpu_kernel void @test_export_param0_i32() #0 {
call void @llvm.amdgcn.exp.i32(i32 32, i32 15, i32 1, i32 2, i32 5, i32 4, i1 false, i1 false)
call void @llvm.amdgcn.exp.i32(i32 32, i32 15, i32 1, i32 2, i32 5, i32 4, i1 true, i1 false)
}
; GCN-LABEL: {{^}}test_export_param31_i32:
-; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1
-; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2
-; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 5
-; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4
-; GCN: exp param31 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}}
-; GCN: exp param31 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}}
+; PREGFX11-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1
+; PREGFX11-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2
+; PREGFX11-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 5
+; PREGFX11-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4
+; PREGFX11: exp param31 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}}
+; PREGFX11: exp param31 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}}
define amdgpu_kernel void @test_export_param31_i32() #0 {
call void @llvm.amdgcn.exp.i32(i32 63, i32 15, i32 1, i32 2, i32 5, i32 4, i1 false, i1 false)
call void @llvm.amdgcn.exp.i32(i32 63, i32 15, i32 1, i32 2, i32 5, i32 4, i1 true, i1 false)
; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2
; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 5
; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4
-; GCN: exp mrt0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] vm{{$}}
-; GCN: exp mrt0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done vm{{$}}
+; PREGFX11: exp mrt0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] vm{{$}}
+; PREGFX11: exp mrt0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done vm{{$}}
+; GFX11: exp mrt0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}}
+; GFX11: exp mrt0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}}
define amdgpu_kernel void @test_export_vm_i32() #0 {
call void @llvm.amdgcn.exp.i32(i32 0, i32 15, i32 1, i32 2, i32 5, i32 4, i1 false, i1 true)
call void @llvm.amdgcn.exp.i32(i32 0, i32 15, i32 1, i32 2, i32 5, i32 4, i1 true, i1 true)
}
; GCN-LABEL: {{^}}test_export_clustering:
-; GCN-DAG: v_mov_b32_e32 [[W0:v[0-9]+]], 0
-; GCN-DAG: v_mov_b32_e32 [[W1:v[0-9]+]], 1.0
-; GCN-DAG: v_mov_b32_e32 [[X:v[0-9]+]], s0
-; GCN-DAG: v_mov_b32_e32 [[Y:v[0-9]+]], s1
-; GCN-DAG: v_add_f32_e{{32|64}} [[Z0:v[0-9]+]]
-; GCN-DAG: v_sub_f32_e{{32|64}} [[Z1:v[0-9]+]]
-; GCN: exp param0 [[X]], [[Y]], [[Z0]], [[W0]]{{$}}
-; GCN-NEXT: exp param1 [[X]], [[Y]], [[Z1]], [[W1]] done{{$}}
+; PREGFX11-DAG: v_mov_b32_e32 [[W0:v[0-9]+]], 0
+; PREGFX11-DAG: v_mov_b32_e32 [[W1:v[0-9]+]], 1.0
+; PREGFX11-DAG: v_mov_b32_e32 [[X:v[0-9]+]], s0
+; PREGFX11-DAG: v_mov_b32_e32 [[Y:v[0-9]+]], s1
+; PREGFX11-DAG: v_add_f32_e{{32|64}} [[Z0:v[0-9]+]]
+; PREGFX11-DAG: v_sub_f32_e{{32|64}} [[Z1:v[0-9]+]]
+; PREGFX11: exp param0 [[X]], [[Y]], [[Z0]], [[W0]]{{$}}
+; PREGFX11-NEXT: exp param1 [[X]], [[Y]], [[Z1]], [[W1]] done{{$}}
define amdgpu_kernel void @test_export_clustering(float %x, float %y) #0 {
%z0 = fadd float %x, %y
call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float %x, float %y, float %z0, float 0.0, i1 false, i1 false)
}
; GCN-LABEL: {{^}}test_export_pos_before_param:
-; GCN: exp pos0
-; GCN-NOT: s_waitcnt
-; GCN: exp param0
+; PREGFX11: exp pos0
+; PREGFX11-NOT: s_waitcnt
+; PREGFX11: exp param0
define amdgpu_kernel void @test_export_pos_before_param(float %x, float %y) #0 {
%z0 = fadd float %x, %y
call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float 1.0, float 1.0, float 1.0, float %z0, i1 false, i1 false)
}
; GCN-LABEL: {{^}}test_export_pos_before_param_ordered:
-; GCN: exp pos0
-; GCN: exp pos1
-; GCN: exp pos2
-; GCN-NOT: s_waitcnt
-; GCN: exp param0
-; GCN: exp param1
-; GCN: exp param2
+; PREGFX11: exp pos0
+; PREGFX11: exp pos1
+; PREGFX11: exp pos2
+; PREGFX11-NOT: s_waitcnt
+; PREGFX11: exp param0
+; PREGFX11: exp param1
+; PREGFX11: exp param2
define amdgpu_kernel void @test_export_pos_before_param_ordered(float %x, float %y) #0 {
%z0 = fadd float %x, %y
call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float 1.0, float 1.0, float 1.0, float %z0, i1 false, i1 false)
}
; GCN-LABEL: {{^}}test_export_pos_before_param_across_load:
-; GCN: exp pos0
-; GCN-NEXT: exp param0
-; GCN-NEXT: exp param1
+; PREGFX11: exp pos0
+; PREGFX11-NEXT: exp param0
+; PREGFX11-NEXT: exp param1
define amdgpu_kernel void @test_export_pos_before_param_across_load(i32 %idx) #0 {
call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float 1.0, float 1.0, float 1.0, float 1.0, i1 false, i1 false)
call void @llvm.amdgcn.exp.f32(i32 33, i32 15, float 1.0, float 1.0, float 1.0, float 0.5, i1 false, i1 false)
}
; GCN-LABEL: {{^}}test_export_across_store_load:
-; GCN: buffer_store
-; GCN: buffer_load
-; GCN: exp pos0
-; GCN: exp param0
-; GCN: exp param1
+; PREGFX11: buffer_store
+; PREGFX11: buffer_load
+; PREGFX11: exp pos0
+; PREGFX11: exp param0
+; PREGFX11: exp param1
define amdgpu_kernel void @test_export_across_store_load(i32 %idx, float %v) #0 {
%data0 = alloca <4 x float>, align 8, addrspace(5)
%data1 = alloca <4 x float>, align 8, addrspace(5)