-; RUN: llc < %s -march=amdgcn -mcpu=tahiti -verify-machineinstrs | FileCheck %s -check-prefixes=GCN,SI,SICI
-; RUN: llc < %s -march=amdgcn -mcpu=hawaii -verify-machineinstrs | FileCheck %s -check-prefixes=GCN,CI,SICI
-; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s -check-prefixes=GCN,VI
-
-;GCN-LABEL: {{^}}s_buffer_load_imm:
-;GCN-NOT: s_waitcnt;
-;SI: s_buffer_load_dword s{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0x1
-;CI: s_buffer_load_dword s{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0x1
-;VI: s_buffer_load_dword s{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0x4
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -march=amdgcn -mcpu=tahiti -verify-machineinstrs | FileCheck %s -check-prefixes=GFX678,GFX67,GFX6
+; RUN: llc < %s -march=amdgcn -mcpu=hawaii -verify-machineinstrs | FileCheck %s -check-prefixes=GFX678,GFX67,GFX78,GFX7
+; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s -check-prefixes=GFX678,GFX78,GFX8
+
define amdgpu_ps void @s_buffer_load_imm(<4 x i32> inreg %desc) {
+; GFX67-LABEL: s_buffer_load_imm:
+; GFX67: ; %bb.0: ; %main_body
+; GFX67-NEXT: s_buffer_load_dword s0, s[0:3], 0x1
+; GFX67-NEXT: s_waitcnt lgkmcnt(0)
+; GFX67-NEXT: v_mov_b32_e32 v0, s0
+; GFX67-NEXT: exp mrt0 v0, v0, v0, v0 done vm
+; GFX67-NEXT: s_endpgm
+;
+; GFX8-LABEL: s_buffer_load_imm:
+; GFX8: ; %bb.0: ; %main_body
+; GFX8-NEXT: s_buffer_load_dword s0, s[0:3], 0x4
+; GFX8-NEXT: s_waitcnt lgkmcnt(0)
+; GFX8-NEXT: v_mov_b32_e32 v0, s0
+; GFX8-NEXT: exp mrt0 v0, v0, v0, v0 done vm
+; GFX8-NEXT: s_endpgm
main_body:
%load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 4, i32 0)
%bitcast = bitcast i32 %load to float
ret void
}
-;GCN-LABEL: {{^}}s_buffer_load_index:
-;GCN-NOT: s_waitcnt;
-;GCN: s_buffer_load_dword s{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}}
define amdgpu_ps void @s_buffer_load_index(<4 x i32> inreg %desc, i32 inreg %index) {
+; GFX678-LABEL: s_buffer_load_index:
+; GFX678: ; %bb.0: ; %main_body
+; GFX678-NEXT: s_buffer_load_dword s0, s[0:3], s4
+; GFX678-NEXT: s_waitcnt lgkmcnt(0)
+; GFX678-NEXT: v_mov_b32_e32 v0, s0
+; GFX678-NEXT: exp mrt0 v0, v0, v0, v0 done vm
+; GFX678-NEXT: s_endpgm
main_body:
%load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 %index, i32 0)
%bitcast = bitcast i32 %load to float
ret void
}
-;GCN-LABEL: {{^}}s_buffer_load_index_divergent:
-;GCN-NOT: s_waitcnt;
-;GCN: buffer_load_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 offen
define amdgpu_ps void @s_buffer_load_index_divergent(<4 x i32> inreg %desc, i32 %index) {
+; GFX678-LABEL: s_buffer_load_index_divergent:
+; GFX678: ; %bb.0: ; %main_body
+; GFX678-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen
+; GFX678-NEXT: s_waitcnt vmcnt(0)
+; GFX678-NEXT: exp mrt0 v0, v0, v0, v0 done vm
+; GFX678-NEXT: s_endpgm
main_body:
%load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 %index, i32 0)
%bitcast = bitcast i32 %load to float
ret void
}
-;GCN-LABEL: {{^}}s_buffer_loadx2_imm:
-;GCN-NOT: s_waitcnt;
-;SI: s_buffer_load_dwordx2 s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], 0x10
-;CI: s_buffer_load_dwordx2 s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], 0x10
-;VI: s_buffer_load_dwordx2 s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], 0x40
define amdgpu_ps void @s_buffer_loadx2_imm(<4 x i32> inreg %desc) {
+; GFX67-LABEL: s_buffer_loadx2_imm:
+; GFX67: ; %bb.0: ; %main_body
+; GFX67-NEXT: s_buffer_load_dwordx2 s[0:1], s[0:3], 0x10
+; GFX67-NEXT: s_waitcnt lgkmcnt(0)
+; GFX67-NEXT: v_mov_b32_e32 v0, s0
+; GFX67-NEXT: v_mov_b32_e32 v1, s1
+; GFX67-NEXT: exp mrt0 v0, v1, v0, v0 done vm
+; GFX67-NEXT: s_endpgm
+;
+; GFX8-LABEL: s_buffer_loadx2_imm:
+; GFX8: ; %bb.0: ; %main_body
+; GFX8-NEXT: s_buffer_load_dwordx2 s[0:1], s[0:3], 0x40
+; GFX8-NEXT: s_waitcnt lgkmcnt(0)
+; GFX8-NEXT: v_mov_b32_e32 v0, s0
+; GFX8-NEXT: v_mov_b32_e32 v1, s1
+; GFX8-NEXT: exp mrt0 v0, v1, v0, v0 done vm
+; GFX8-NEXT: s_endpgm
main_body:
%load = call <2 x i32> @llvm.amdgcn.s.buffer.load.v2i32(<4 x i32> %desc, i32 64, i32 0)
%bitcast = bitcast <2 x i32> %load to <2 x float>
ret void
}
-;GCN-LABEL: {{^}}s_buffer_loadx2_index:
-;GCN-NOT: s_waitcnt;
-;GCN: s_buffer_load_dwordx2 s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}}
define amdgpu_ps void @s_buffer_loadx2_index(<4 x i32> inreg %desc, i32 inreg %index) {
+; GFX678-LABEL: s_buffer_loadx2_index:
+; GFX678: ; %bb.0: ; %main_body
+; GFX678-NEXT: s_buffer_load_dwordx2 s[0:1], s[0:3], s4
+; GFX678-NEXT: s_waitcnt lgkmcnt(0)
+; GFX678-NEXT: v_mov_b32_e32 v0, s0
+; GFX678-NEXT: v_mov_b32_e32 v1, s1
+; GFX678-NEXT: exp mrt0 v0, v1, v0, v0 done vm
+; GFX678-NEXT: s_endpgm
main_body:
%load = call <2 x i32> @llvm.amdgcn.s.buffer.load.v2i32(<4 x i32> %desc, i32 %index, i32 0)
%bitcast = bitcast <2 x i32> %load to <2 x float>
ret void
}
-;GCN-LABEL: {{^}}s_buffer_loadx2_index_divergent:
-;GCN-NOT: s_waitcnt;
-;GCN: buffer_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 offen
define amdgpu_ps void @s_buffer_loadx2_index_divergent(<4 x i32> inreg %desc, i32 %index) {
+; GFX678-LABEL: s_buffer_loadx2_index_divergent:
+; GFX678: ; %bb.0: ; %main_body
+; GFX678-NEXT: buffer_load_dwordx2 v[0:1], v0, s[0:3], 0 offen
+; GFX678-NEXT: s_waitcnt vmcnt(0)
+; GFX678-NEXT: exp mrt0 v0, v1, v0, v0 done vm
+; GFX678-NEXT: s_endpgm
main_body:
%load = call <2 x i32> @llvm.amdgcn.s.buffer.load.v2i32(<4 x i32> %desc, i32 %index, i32 0)
%bitcast = bitcast <2 x i32> %load to <2 x float>
ret void
}
-;GCN-LABEL: {{^}}s_buffer_loadx3_imm:
-;GCN-NOT: s_waitcnt;
-;SI: s_buffer_load_dwordx4 s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], 0x10
-;CI: s_buffer_load_dwordx4 s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], 0x10
-;VI: s_buffer_load_dwordx4 s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], 0x40
define amdgpu_ps void @s_buffer_loadx3_imm(<4 x i32> inreg %desc) {
+; GFX67-LABEL: s_buffer_loadx3_imm:
+; GFX67: ; %bb.0: ; %main_body
+; GFX67-NEXT: s_buffer_load_dwordx4 s[0:3], s[0:3], 0x10
+; GFX67-NEXT: s_waitcnt lgkmcnt(0)
+; GFX67-NEXT: v_mov_b32_e32 v0, s0
+; GFX67-NEXT: v_mov_b32_e32 v1, s1
+; GFX67-NEXT: v_mov_b32_e32 v2, s2
+; GFX67-NEXT: exp mrt0 v0, v1, v2, v0 done vm
+; GFX67-NEXT: s_endpgm
+;
+; GFX8-LABEL: s_buffer_loadx3_imm:
+; GFX8: ; %bb.0: ; %main_body
+; GFX8-NEXT: s_buffer_load_dwordx4 s[0:3], s[0:3], 0x40
+; GFX8-NEXT: s_waitcnt lgkmcnt(0)
+; GFX8-NEXT: v_mov_b32_e32 v0, s0
+; GFX8-NEXT: v_mov_b32_e32 v1, s1
+; GFX8-NEXT: v_mov_b32_e32 v2, s2
+; GFX8-NEXT: exp mrt0 v0, v1, v2, v0 done vm
+; GFX8-NEXT: s_endpgm
main_body:
%load = call <3 x i32> @llvm.amdgcn.s.buffer.load.v3i32(<4 x i32> %desc, i32 64, i32 0)
%bitcast = bitcast <3 x i32> %load to <3 x float>
ret void
}
-;GCN-LABEL: {{^}}s_buffer_loadx3_index:
-;GCN-NOT: s_waitcnt;
-;GCN: s_buffer_load_dwordx4 s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}}
define amdgpu_ps void @s_buffer_loadx3_index(<4 x i32> inreg %desc, i32 inreg %index) {
+; GFX678-LABEL: s_buffer_loadx3_index:
+; GFX678: ; %bb.0: ; %main_body
+; GFX678-NEXT: s_buffer_load_dwordx4 s[0:3], s[0:3], s4
+; GFX678-NEXT: s_waitcnt lgkmcnt(0)
+; GFX678-NEXT: v_mov_b32_e32 v0, s0
+; GFX678-NEXT: v_mov_b32_e32 v1, s1
+; GFX678-NEXT: v_mov_b32_e32 v2, s2
+; GFX678-NEXT: exp mrt0 v0, v1, v2, v0 done vm
+; GFX678-NEXT: s_endpgm
main_body:
%load = call <3 x i32> @llvm.amdgcn.s.buffer.load.v3i32(<4 x i32> %desc, i32 %index, i32 0)
%bitcast = bitcast <3 x i32> %load to <3 x float>
ret void
}
-;GCN-LABEL: {{^}}s_buffer_loadx3_index_divergent:
-;GCN-NOT: s_waitcnt;
-;SI: buffer_load_dwordx4 v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 offen
-;CI: buffer_load_dwordx3 v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 offen
-;VI: buffer_load_dwordx3 v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 offen
define amdgpu_ps void @s_buffer_loadx3_index_divergent(<4 x i32> inreg %desc, i32 %index) {
+; GFX6-LABEL: s_buffer_loadx3_index_divergent:
+; GFX6: ; %bb.0: ; %main_body
+; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v0, s[0:3], 0 offen
+; GFX6-NEXT: s_waitcnt vmcnt(0)
+; GFX6-NEXT: exp mrt0 v0, v1, v2, v0 done vm
+; GFX6-NEXT: s_endpgm
+;
+; GFX78-LABEL: s_buffer_loadx3_index_divergent:
+; GFX78: ; %bb.0: ; %main_body
+; GFX78-NEXT: buffer_load_dwordx3 v[0:2], v0, s[0:3], 0 offen
+; GFX78-NEXT: s_waitcnt vmcnt(0)
+; GFX78-NEXT: exp mrt0 v0, v1, v2, v0 done vm
+; GFX78-NEXT: s_endpgm
main_body:
%load = call <3 x i32> @llvm.amdgcn.s.buffer.load.v3i32(<4 x i32> %desc, i32 %index, i32 0)
%bitcast = bitcast <3 x i32> %load to <3 x float>
ret void
}
-;GCN-LABEL: {{^}}s_buffer_loadx4_imm:
-;GCN-NOT: s_waitcnt;
-;SI: s_buffer_load_dwordx4 s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], 0x32
-;CI: s_buffer_load_dwordx4 s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], 0x32
-;VI: s_buffer_load_dwordx4 s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], 0xc8
define amdgpu_ps void @s_buffer_loadx4_imm(<4 x i32> inreg %desc) {
+; GFX67-LABEL: s_buffer_loadx4_imm:
+; GFX67: ; %bb.0: ; %main_body
+; GFX67-NEXT: s_buffer_load_dwordx4 s[0:3], s[0:3], 0x32
+; GFX67-NEXT: s_waitcnt lgkmcnt(0)
+; GFX67-NEXT: v_mov_b32_e32 v0, s0
+; GFX67-NEXT: v_mov_b32_e32 v1, s1
+; GFX67-NEXT: v_mov_b32_e32 v2, s2
+; GFX67-NEXT: v_mov_b32_e32 v3, s3
+; GFX67-NEXT: exp mrt0 v0, v1, v2, v3 done vm
+; GFX67-NEXT: s_endpgm
+;
+; GFX8-LABEL: s_buffer_loadx4_imm:
+; GFX8: ; %bb.0: ; %main_body
+; GFX8-NEXT: s_buffer_load_dwordx4 s[0:3], s[0:3], 0xc8
+; GFX8-NEXT: s_waitcnt lgkmcnt(0)
+; GFX8-NEXT: v_mov_b32_e32 v0, s0
+; GFX8-NEXT: v_mov_b32_e32 v1, s1
+; GFX8-NEXT: v_mov_b32_e32 v2, s2
+; GFX8-NEXT: v_mov_b32_e32 v3, s3
+; GFX8-NEXT: exp mrt0 v0, v1, v2, v3 done vm
+; GFX8-NEXT: s_endpgm
main_body:
%load = call <4 x i32> @llvm.amdgcn.s.buffer.load.v4i32(<4 x i32> %desc, i32 200, i32 0)
%bitcast = bitcast <4 x i32> %load to <4 x float>
ret void
}
-;GCN-LABEL: {{^}}s_buffer_loadx4_index:
-;GCN-NOT: s_waitcnt;
-;GCN: buffer_load_dwordx4 s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}}
define amdgpu_ps void @s_buffer_loadx4_index(<4 x i32> inreg %desc, i32 inreg %index) {
+; GFX678-LABEL: s_buffer_loadx4_index:
+; GFX678: ; %bb.0: ; %main_body
+; GFX678-NEXT: s_buffer_load_dwordx4 s[0:3], s[0:3], s4
+; GFX678-NEXT: s_waitcnt lgkmcnt(0)
+; GFX678-NEXT: v_mov_b32_e32 v0, s0
+; GFX678-NEXT: v_mov_b32_e32 v1, s1
+; GFX678-NEXT: v_mov_b32_e32 v2, s2
+; GFX678-NEXT: v_mov_b32_e32 v3, s3
+; GFX678-NEXT: exp mrt0 v0, v1, v2, v3 done vm
+; GFX678-NEXT: s_endpgm
main_body:
%load = call <4 x i32> @llvm.amdgcn.s.buffer.load.v4i32(<4 x i32> %desc, i32 %index, i32 0)
%bitcast = bitcast <4 x i32> %load to <4 x float>
ret void
}
-;GCN-LABEL: {{^}}s_buffer_loadx4_index_divergent:
-;GCN-NOT: s_waitcnt;
-;GCN: buffer_load_dwordx4 v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 offen
define amdgpu_ps void @s_buffer_loadx4_index_divergent(<4 x i32> inreg %desc, i32 %index) {
+; GFX678-LABEL: s_buffer_loadx4_index_divergent:
+; GFX678: ; %bb.0: ; %main_body
+; GFX678-NEXT: buffer_load_dwordx4 v[0:3], v0, s[0:3], 0 offen
+; GFX678-NEXT: s_waitcnt vmcnt(0)
+; GFX678-NEXT: exp mrt0 v0, v1, v2, v3 done vm
+; GFX678-NEXT: s_endpgm
main_body:
%load = call <4 x i32> @llvm.amdgcn.s.buffer.load.v4i32(<4 x i32> %desc, i32 %index, i32 0)
%bitcast = bitcast <4 x i32> %load to <4 x float>
ret void
}
-;GCN-LABEL: {{^}}s_buffer_load_imm_mergex2:
-;GCN-NOT: s_waitcnt;
-;SI: s_buffer_load_dwordx2 s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], 0x1
-;CI: s_buffer_load_dwordx2 s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], 0x1
-;VI: s_buffer_load_dwordx2 s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], 0x4
define amdgpu_ps void @s_buffer_load_imm_mergex2(<4 x i32> inreg %desc) {
+; GFX67-LABEL: s_buffer_load_imm_mergex2:
+; GFX67: ; %bb.0: ; %main_body
+; GFX67-NEXT: s_buffer_load_dwordx2 s[0:1], s[0:3], 0x1
+; GFX67-NEXT: s_waitcnt lgkmcnt(0)
+; GFX67-NEXT: v_mov_b32_e32 v0, s0
+; GFX67-NEXT: v_mov_b32_e32 v1, s1
+; GFX67-NEXT: exp mrt0 v0, v1, v0, v0 done vm
+; GFX67-NEXT: s_endpgm
+;
+; GFX8-LABEL: s_buffer_load_imm_mergex2:
+; GFX8: ; %bb.0: ; %main_body
+; GFX8-NEXT: s_buffer_load_dwordx2 s[0:1], s[0:3], 0x4
+; GFX8-NEXT: s_waitcnt lgkmcnt(0)
+; GFX8-NEXT: v_mov_b32_e32 v0, s0
+; GFX8-NEXT: v_mov_b32_e32 v1, s1
+; GFX8-NEXT: exp mrt0 v0, v1, v0, v0 done vm
+; GFX8-NEXT: s_endpgm
main_body:
%load0 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 4, i32 0)
%load1 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 8, i32 0)
ret void
}
-;GCN-LABEL: {{^}}s_buffer_load_imm_mergex4:
-;GCN-NOT: s_waitcnt;
-;SI: s_buffer_load_dwordx4 s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], 0x2
-;CI: s_buffer_load_dwordx4 s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], 0x2
-;VI: s_buffer_load_dwordx4 s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], 0x8
define amdgpu_ps void @s_buffer_load_imm_mergex4(<4 x i32> inreg %desc) {
+; GFX67-LABEL: s_buffer_load_imm_mergex4:
+; GFX67: ; %bb.0: ; %main_body
+; GFX67-NEXT: s_buffer_load_dwordx4 s[0:3], s[0:3], 0x2
+; GFX67-NEXT: s_waitcnt lgkmcnt(0)
+; GFX67-NEXT: v_mov_b32_e32 v0, s0
+; GFX67-NEXT: v_mov_b32_e32 v1, s1
+; GFX67-NEXT: v_mov_b32_e32 v2, s2
+; GFX67-NEXT: v_mov_b32_e32 v3, s3
+; GFX67-NEXT: exp mrt0 v0, v1, v2, v3 done vm
+; GFX67-NEXT: s_endpgm
+;
+; GFX8-LABEL: s_buffer_load_imm_mergex4:
+; GFX8: ; %bb.0: ; %main_body
+; GFX8-NEXT: s_buffer_load_dwordx4 s[0:3], s[0:3], 0x8
+; GFX8-NEXT: s_waitcnt lgkmcnt(0)
+; GFX8-NEXT: v_mov_b32_e32 v0, s0
+; GFX8-NEXT: v_mov_b32_e32 v1, s1
+; GFX8-NEXT: v_mov_b32_e32 v2, s2
+; GFX8-NEXT: v_mov_b32_e32 v3, s3
+; GFX8-NEXT: exp mrt0 v0, v1, v2, v3 done vm
+; GFX8-NEXT: s_endpgm
main_body:
%load0 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 8, i32 0)
%load1 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 12, i32 0)
@gv = external addrspace(1) global i32
-;GCN-LABEL: {{^}}s_buffer_load_index_across_bb:
-;GCN-NOT: s_waitcnt;
-;GCN: v_or_b32
-;GCN: buffer_load_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 offen
define amdgpu_ps void @s_buffer_load_index_across_bb(<4 x i32> inreg %desc, i32 %index) {
+; GFX6-LABEL: s_buffer_load_index_across_bb:
+; GFX6: ; %bb.0: ; %main_body
+; GFX6-NEXT: s_getpc_b64 s[4:5]
+; GFX6-NEXT: s_add_u32 s4, s4, gv@gotpcrel32@lo+4
+; GFX6-NEXT: s_addc_u32 s5, s5, gv@gotpcrel32@hi+12
+; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
+; GFX6-NEXT: v_lshlrev_b32_e32 v0, 4, v0
+; GFX6-NEXT: s_mov_b32 s7, 0xf000
+; GFX6-NEXT: s_mov_b32 s6, -1
+; GFX6-NEXT: s_waitcnt lgkmcnt(0)
+; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0
+; GFX6-NEXT: s_waitcnt expcnt(0)
+; GFX6-NEXT: v_or_b32_e32 v0, 8, v0
+; GFX6-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen
+; GFX6-NEXT: s_waitcnt vmcnt(0)
+; GFX6-NEXT: exp mrt0 v0, v0, v0, v0 done vm
+; GFX6-NEXT: s_endpgm
+;
+; GFX7-LABEL: s_buffer_load_index_across_bb:
+; GFX7: ; %bb.0: ; %main_body
+; GFX7-NEXT: s_getpc_b64 s[4:5]
+; GFX7-NEXT: s_add_u32 s4, s4, gv@gotpcrel32@lo+4
+; GFX7-NEXT: s_addc_u32 s5, s5, gv@gotpcrel32@hi+12
+; GFX7-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
+; GFX7-NEXT: v_lshlrev_b32_e32 v0, 4, v0
+; GFX7-NEXT: s_mov_b32 s7, 0xf000
+; GFX7-NEXT: s_mov_b32 s6, -1
+; GFX7-NEXT: s_waitcnt lgkmcnt(0)
+; GFX7-NEXT: buffer_store_dword v0, off, s[4:7], 0
+; GFX7-NEXT: v_or_b32_e32 v0, 8, v0
+; GFX7-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: exp mrt0 v0, v0, v0, v0 done vm
+; GFX7-NEXT: s_endpgm
+;
+; GFX8-LABEL: s_buffer_load_index_across_bb:
+; GFX8: ; %bb.0: ; %main_body
+; GFX8-NEXT: s_getpc_b64 s[4:5]
+; GFX8-NEXT: s_add_u32 s4, s4, gv@gotpcrel32@lo+4
+; GFX8-NEXT: s_addc_u32 s5, s5, gv@gotpcrel32@hi+12
+; GFX8-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
+; GFX8-NEXT: v_lshlrev_b32_e32 v0, 4, v0
+; GFX8-NEXT: s_waitcnt lgkmcnt(0)
+; GFX8-NEXT: v_mov_b32_e32 v1, s4
+; GFX8-NEXT: v_mov_b32_e32 v2, s5
+; GFX8-NEXT: flat_store_dword v[1:2], v0
+; GFX8-NEXT: v_or_b32_e32 v0, 8, v0
+; GFX8-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen
+; GFX8-NEXT: s_waitcnt vmcnt(0)
+; GFX8-NEXT: exp mrt0 v0, v0, v0, v0 done vm
+; GFX8-NEXT: s_endpgm
main_body:
%tmp = shl i32 %index, 4
store i32 %tmp, ptr addrspace(1) @gv
ret void
}
-;GCN-LABEL: {{^}}s_buffer_load_index_across_bb_merged:
-;GCN-NOT: s_waitcnt;
-;GCN: buffer_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 offen offset:8
define amdgpu_ps void @s_buffer_load_index_across_bb_merged(<4 x i32> inreg %desc, i32 %index) {
+; GFX678-LABEL: s_buffer_load_index_across_bb_merged:
+; GFX678: ; %bb.0: ; %main_body
+; GFX678-NEXT: v_lshlrev_b32_e32 v0, 4, v0
+; GFX678-NEXT: buffer_load_dwordx2 v[0:1], v0, s[0:3], 0 offen offset:8
+; GFX678-NEXT: s_waitcnt vmcnt(0)
+; GFX678-NEXT: exp mrt0 v0, v1, v0, v0 done vm
+; GFX678-NEXT: s_endpgm
main_body:
%tmp = shl i32 %index, 4
br label %bb1
ret void
}
-; GCN-LABEL: {{^}}s_buffer_load_imm_neg1:
-; GCN: s_mov_b32 [[K:s[0-9]+]], -1{{$}}
-; GCN: s_buffer_load_dword s0, s[0:3], [[K]]{{$}}
define amdgpu_ps i32 @s_buffer_load_imm_neg1(<4 x i32> inreg %desc) {
+; GFX6-LABEL: s_buffer_load_imm_neg1:
+; GFX6: ; %bb.0:
+; GFX6-NEXT: s_mov_b32 s4, -1
+; GFX6-NEXT: s_nop 3
+; GFX6-NEXT: s_buffer_load_dword s0, s[0:3], s4
+; GFX6-NEXT: s_waitcnt lgkmcnt(0)
+; GFX6-NEXT: ; return to shader part epilog
+;
+; GFX78-LABEL: s_buffer_load_imm_neg1:
+; GFX78: ; %bb.0:
+; GFX78-NEXT: s_mov_b32 s4, -1
+; GFX78-NEXT: s_buffer_load_dword s0, s[0:3], s4
+; GFX78-NEXT: s_waitcnt lgkmcnt(0)
+; GFX78-NEXT: ; return to shader part epilog
%load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 -1, i32 0)
ret i32 %load
}
-; GCN-LABEL: {{^}}s_buffer_load_imm_neg4:
-; SI: s_mov_b32 [[K:s[0-9]+]], -4{{$}}
-; SI: s_buffer_load_dword s0, s[0:3], [[K]]{{$}}
-
-; CI: s_buffer_load_dword s0, s[0:3], 0x3fffffff{{$}}
-
-; VI: s_mov_b32 [[K:s[0-9]+]], -4{{$}}
-; VI: s_buffer_load_dword s0, s[0:3], [[K]]{{$}}
define amdgpu_ps i32 @s_buffer_load_imm_neg4(<4 x i32> inreg %desc) {
+; GFX6-LABEL: s_buffer_load_imm_neg4:
+; GFX6: ; %bb.0:
+; GFX6-NEXT: s_mov_b32 s4, -4
+; GFX6-NEXT: s_nop 3
+; GFX6-NEXT: s_buffer_load_dword s0, s[0:3], s4
+; GFX6-NEXT: s_waitcnt lgkmcnt(0)
+; GFX6-NEXT: ; return to shader part epilog
+;
+; GFX7-LABEL: s_buffer_load_imm_neg4:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_buffer_load_dword s0, s[0:3], 0x3fffffff
+; GFX7-NEXT: s_waitcnt lgkmcnt(0)
+; GFX7-NEXT: ; return to shader part epilog
+;
+; GFX8-LABEL: s_buffer_load_imm_neg4:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_mov_b32 s4, -4
+; GFX8-NEXT: s_buffer_load_dword s0, s[0:3], s4
+; GFX8-NEXT: s_waitcnt lgkmcnt(0)
+; GFX8-NEXT: ; return to shader part epilog
%load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 -4, i32 0)
ret i32 %load
}
-; GCN-LABEL: {{^}}s_buffer_load_imm_neg8:
-; SI: s_mov_b32 [[K:s[0-9]+]], -8{{$}}
-; SI: s_buffer_load_dword s0, s[0:3], [[K]]{{$}}
-
-; CI: s_buffer_load_dword s0, s[0:3], 0x3ffffffe{{$}}
define amdgpu_ps i32 @s_buffer_load_imm_neg8(<4 x i32> inreg %desc) {
+; GFX6-LABEL: s_buffer_load_imm_neg8:
+; GFX6: ; %bb.0:
+; GFX6-NEXT: s_mov_b32 s4, -8
+; GFX6-NEXT: s_nop 3
+; GFX6-NEXT: s_buffer_load_dword s0, s[0:3], s4
+; GFX6-NEXT: s_waitcnt lgkmcnt(0)
+; GFX6-NEXT: ; return to shader part epilog
+;
+; GFX7-LABEL: s_buffer_load_imm_neg8:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_buffer_load_dword s0, s[0:3], 0x3ffffffe
+; GFX7-NEXT: s_waitcnt lgkmcnt(0)
+; GFX7-NEXT: ; return to shader part epilog
+;
+; GFX8-LABEL: s_buffer_load_imm_neg8:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_mov_b32 s4, -8
+; GFX8-NEXT: s_buffer_load_dword s0, s[0:3], s4
+; GFX8-NEXT: s_waitcnt lgkmcnt(0)
+; GFX8-NEXT: ; return to shader part epilog
%load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 -8, i32 0)
ret i32 %load
}
-; GCN-LABEL: {{^}}s_buffer_load_imm_bit31:
-; SI: s_brev_b32 [[K:s[0-9]+]], 1{{$}}
-; SI: s_buffer_load_dword s0, s[0:3], [[K]]{{$}}
-
-; CI: s_buffer_load_dword s0, s[0:3], 0x20000000{{$}}
-
-; VI: s_brev_b32 [[K:s[0-9]+]], 1{{$}}
-; VI: s_buffer_load_dword s0, s[0:3], [[K]]{{$}}
define amdgpu_ps i32 @s_buffer_load_imm_bit31(<4 x i32> inreg %desc) {
+; GFX6-LABEL: s_buffer_load_imm_bit31:
+; GFX6: ; %bb.0:
+; GFX6-NEXT: s_brev_b32 s4, 1
+; GFX6-NEXT: s_nop 3
+; GFX6-NEXT: s_buffer_load_dword s0, s[0:3], s4
+; GFX6-NEXT: s_waitcnt lgkmcnt(0)
+; GFX6-NEXT: ; return to shader part epilog
+;
+; GFX7-LABEL: s_buffer_load_imm_bit31:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_buffer_load_dword s0, s[0:3], 0x20000000
+; GFX7-NEXT: s_waitcnt lgkmcnt(0)
+; GFX7-NEXT: ; return to shader part epilog
+;
+; GFX8-LABEL: s_buffer_load_imm_bit31:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_brev_b32 s4, 1
+; GFX8-NEXT: s_buffer_load_dword s0, s[0:3], s4
+; GFX8-NEXT: s_waitcnt lgkmcnt(0)
+; GFX8-NEXT: ; return to shader part epilog
%load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 -2147483648, i32 0)
ret i32 %load
}
-; GCN-LABEL: {{^}}s_buffer_load_imm_bit30:
-; SI: s_mov_b32 [[K:s[0-9]+]], 2.0{{$}}
-; SI: s_buffer_load_dword s0, s[0:3], [[K]]{{$}}
-
-; CI: s_buffer_load_dword s0, s[0:3], 0x10000000{{$}}
-
-; VI: s_mov_b32 [[K:s[0-9]+]], 2.0{{$}}
-; VI: s_buffer_load_dword s0, s[0:3], [[K]]{{$}}
define amdgpu_ps i32 @s_buffer_load_imm_bit30(<4 x i32> inreg %desc) {
+; GFX6-LABEL: s_buffer_load_imm_bit30:
+; GFX6: ; %bb.0:
+; GFX6-NEXT: s_mov_b32 s4, 2.0
+; GFX6-NEXT: s_nop 3
+; GFX6-NEXT: s_buffer_load_dword s0, s[0:3], s4
+; GFX6-NEXT: s_waitcnt lgkmcnt(0)
+; GFX6-NEXT: ; return to shader part epilog
+;
+; GFX7-LABEL: s_buffer_load_imm_bit30:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_buffer_load_dword s0, s[0:3], 0x10000000
+; GFX7-NEXT: s_waitcnt lgkmcnt(0)
+; GFX7-NEXT: ; return to shader part epilog
+;
+; GFX8-LABEL: s_buffer_load_imm_bit30:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_mov_b32 s4, 2.0
+; GFX8-NEXT: s_buffer_load_dword s0, s[0:3], s4
+; GFX8-NEXT: s_waitcnt lgkmcnt(0)
+; GFX8-NEXT: ; return to shader part epilog
%load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 1073741824, i32 0)
ret i32 %load
}
-; GCN-LABEL: {{^}}s_buffer_load_imm_bit29:
-; SI: s_brev_b32 [[K:s[0-9]+]], 4{{$}}
-; SI: s_buffer_load_dword s0, s[0:3], [[K]]{{$}}
-
-; CI: s_buffer_load_dword s0, s[0:3], 0x8000000{{$}}
-
-; VI: s_brev_b32 [[K:s[0-9]+]], 4{{$}}
-; VI: s_buffer_load_dword s0, s[0:3], [[K]]{{$}}
define amdgpu_ps i32 @s_buffer_load_imm_bit29(<4 x i32> inreg %desc) {
+; GFX6-LABEL: s_buffer_load_imm_bit29:
+; GFX6: ; %bb.0:
+; GFX6-NEXT: s_brev_b32 s4, 4
+; GFX6-NEXT: s_nop 3
+; GFX6-NEXT: s_buffer_load_dword s0, s[0:3], s4
+; GFX6-NEXT: s_waitcnt lgkmcnt(0)
+; GFX6-NEXT: ; return to shader part epilog
+;
+; GFX7-LABEL: s_buffer_load_imm_bit29:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_buffer_load_dword s0, s[0:3], 0x8000000
+; GFX7-NEXT: s_waitcnt lgkmcnt(0)
+; GFX7-NEXT: ; return to shader part epilog
+;
+; GFX8-LABEL: s_buffer_load_imm_bit29:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_brev_b32 s4, 4
+; GFX8-NEXT: s_buffer_load_dword s0, s[0:3], s4
+; GFX8-NEXT: s_waitcnt lgkmcnt(0)
+; GFX8-NEXT: ; return to shader part epilog
%load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 536870912, i32 0)
ret i32 %load
}
-; GCN-LABEL: {{^}}s_buffer_load_imm_bit21:
-; SI: s_mov_b32 [[K:s[0-9]+]], 0x200000{{$}}
-; SI: s_buffer_load_dword s0, s[0:3], [[K]]{{$}}
-
-; CI: s_buffer_load_dword s0, s[0:3], 0x80000{{$}}
-
-; VI: s_mov_b32 [[K:s[0-9]+]], 0x200000{{$}}
-; VI: s_buffer_load_dword s0, s[0:3], [[K]]{{$}}
define amdgpu_ps i32 @s_buffer_load_imm_bit21(<4 x i32> inreg %desc) {
+; GFX6-LABEL: s_buffer_load_imm_bit21:
+; GFX6: ; %bb.0:
+; GFX6-NEXT: s_mov_b32 s4, 0x200000
+; GFX6-NEXT: s_nop 3
+; GFX6-NEXT: s_buffer_load_dword s0, s[0:3], s4
+; GFX6-NEXT: s_waitcnt lgkmcnt(0)
+; GFX6-NEXT: ; return to shader part epilog
+;
+; GFX7-LABEL: s_buffer_load_imm_bit21:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_buffer_load_dword s0, s[0:3], 0x80000
+; GFX7-NEXT: s_waitcnt lgkmcnt(0)
+; GFX7-NEXT: ; return to shader part epilog
+;
+; GFX8-LABEL: s_buffer_load_imm_bit21:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_mov_b32 s4, 0x200000
+; GFX8-NEXT: s_buffer_load_dword s0, s[0:3], s4
+; GFX8-NEXT: s_waitcnt lgkmcnt(0)
+; GFX8-NEXT: ; return to shader part epilog
%load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 2097152, i32 0)
ret i32 %load
}
-; GCN-LABEL: {{^}}s_buffer_load_imm_bit20:
-; SI: s_mov_b32 [[K:s[0-9]+]], 0x100000{{$}}
-; SI: s_buffer_load_dword s0, s[0:3], [[K]]{{$}}
-
-; CI: s_buffer_load_dword s0, s[0:3], 0x40000{{$}}
-
-; VI: s_mov_b32 [[K:s[0-9]+]], 0x100000{{$}}
-; VI: s_buffer_load_dword s0, s[0:3], [[K]]{{$}}
define amdgpu_ps i32 @s_buffer_load_imm_bit20(<4 x i32> inreg %desc) {
+; GFX6-LABEL: s_buffer_load_imm_bit20:
+; GFX6: ; %bb.0:
+; GFX6-NEXT: s_mov_b32 s4, 0x100000
+; GFX6-NEXT: s_nop 3
+; GFX6-NEXT: s_buffer_load_dword s0, s[0:3], s4
+; GFX6-NEXT: s_waitcnt lgkmcnt(0)
+; GFX6-NEXT: ; return to shader part epilog
+;
+; GFX7-LABEL: s_buffer_load_imm_bit20:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_buffer_load_dword s0, s[0:3], 0x40000
+; GFX7-NEXT: s_waitcnt lgkmcnt(0)
+; GFX7-NEXT: ; return to shader part epilog
+;
+; GFX8-LABEL: s_buffer_load_imm_bit20:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_mov_b32 s4, 0x100000
+; GFX8-NEXT: s_buffer_load_dword s0, s[0:3], s4
+; GFX8-NEXT: s_waitcnt lgkmcnt(0)
+; GFX8-NEXT: ; return to shader part epilog
%load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 1048576, i32 0)
ret i32 %load
}
-; GCN-LABEL: {{^}}s_buffer_load_imm_neg_bit20:
-; SI: s_mov_b32 [[K:s[0-9]+]], 0xfff00000{{$}}
-; SI: s_buffer_load_dword s0, s[0:3], [[K]]{{$}}
-
-; CI: s_buffer_load_dword s0, s[0:3], 0x3ffc0000{{$}}
-
-; VI: s_mov_b32 [[K:s[0-9]+]], 0xfff00000{{$}}
-; VI: s_buffer_load_dword s0, s[0:3], [[K]]{{$}}
define amdgpu_ps i32 @s_buffer_load_imm_neg_bit20(<4 x i32> inreg %desc) {
+; GFX6-LABEL: s_buffer_load_imm_neg_bit20:
+; GFX6: ; %bb.0:
+; GFX6-NEXT: s_mov_b32 s4, 0xfff00000
+; GFX6-NEXT: s_nop 3
+; GFX6-NEXT: s_buffer_load_dword s0, s[0:3], s4
+; GFX6-NEXT: s_waitcnt lgkmcnt(0)
+; GFX6-NEXT: ; return to shader part epilog
+;
+; GFX7-LABEL: s_buffer_load_imm_neg_bit20:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_buffer_load_dword s0, s[0:3], 0x3ffc0000
+; GFX7-NEXT: s_waitcnt lgkmcnt(0)
+; GFX7-NEXT: ; return to shader part epilog
+;
+; GFX8-LABEL: s_buffer_load_imm_neg_bit20:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_mov_b32 s4, 0xfff00000
+; GFX8-NEXT: s_buffer_load_dword s0, s[0:3], s4
+; GFX8-NEXT: s_waitcnt lgkmcnt(0)
+; GFX8-NEXT: ; return to shader part epilog
%load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 -1048576, i32 0)
ret i32 %load
}
-; GCN-LABEL: {{^}}s_buffer_load_imm_bit19:
-; SI: s_mov_b32 [[K:s[0-9]+]], 0x80000{{$}}
-; SI: s_buffer_load_dword s0, s[0:3], [[K]]{{$}}
-
-; CI: s_buffer_load_dword s0, s[0:3], 0x20000{{$}}
-
-; VI: s_buffer_load_dword s0, s[0:3], 0x80000{{$}}
define amdgpu_ps i32 @s_buffer_load_imm_bit19(<4 x i32> inreg %desc) {
+; GFX6-LABEL: s_buffer_load_imm_bit19:
+; GFX6: ; %bb.0:
+; GFX6-NEXT: s_mov_b32 s4, 0x80000
+; GFX6-NEXT: s_nop 3
+; GFX6-NEXT: s_buffer_load_dword s0, s[0:3], s4
+; GFX6-NEXT: s_waitcnt lgkmcnt(0)
+; GFX6-NEXT: ; return to shader part epilog
+;
+; GFX7-LABEL: s_buffer_load_imm_bit19:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_buffer_load_dword s0, s[0:3], 0x20000
+; GFX7-NEXT: s_waitcnt lgkmcnt(0)
+; GFX7-NEXT: ; return to shader part epilog
+;
+; GFX8-LABEL: s_buffer_load_imm_bit19:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_buffer_load_dword s0, s[0:3], 0x80000
+; GFX8-NEXT: s_waitcnt lgkmcnt(0)
+; GFX8-NEXT: ; return to shader part epilog
%load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 524288, i32 0)
ret i32 %load
}
-; GCN-LABEL: {{^}}s_buffer_load_imm_neg_bit19:
-; SI: s_mov_b32 [[K:s[0-9]+]], 0xfff80000{{$}}
-; SI: s_buffer_load_dword s0, s[0:3], [[K]]{{$}}
-
-; CI: s_buffer_load_dword s0, s[0:3], 0x3ffe0000{{$}}
-
-; VI: s_mov_b32 [[K:s[0-9]+]], 0xfff80000{{$}}
-; VI: s_buffer_load_dword s0, s[0:3], [[K]]{{$}}
define amdgpu_ps i32 @s_buffer_load_imm_neg_bit19(<4 x i32> inreg %desc) {
+; GFX6-LABEL: s_buffer_load_imm_neg_bit19:
+; GFX6: ; %bb.0:
+; GFX6-NEXT: s_mov_b32 s4, 0xfff80000
+; GFX6-NEXT: s_nop 3
+; GFX6-NEXT: s_buffer_load_dword s0, s[0:3], s4
+; GFX6-NEXT: s_waitcnt lgkmcnt(0)
+; GFX6-NEXT: ; return to shader part epilog
+;
+; GFX7-LABEL: s_buffer_load_imm_neg_bit19:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_buffer_load_dword s0, s[0:3], 0x3ffe0000
+; GFX7-NEXT: s_waitcnt lgkmcnt(0)
+; GFX7-NEXT: ; return to shader part epilog
+;
+; GFX8-LABEL: s_buffer_load_imm_neg_bit19:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_mov_b32 s4, 0xfff80000
+; GFX8-NEXT: s_buffer_load_dword s0, s[0:3], s4
+; GFX8-NEXT: s_waitcnt lgkmcnt(0)
+; GFX8-NEXT: ; return to shader part epilog
%load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 -524288, i32 0)
ret i32 %load
}
-; GCN-LABEL: {{^}}s_buffer_load_imm_255:
-; SICI: s_movk_i32 [[K:s[0-9]+]], 0xff{{$}}
-; SICI: s_buffer_load_dword s0, s[0:3], [[K]]{{$}}
-
-; VI: s_buffer_load_dword s0, s[0:3], 0xff{{$}}
define amdgpu_ps i32 @s_buffer_load_imm_255(<4 x i32> inreg %desc) {
+; GFX6-LABEL: s_buffer_load_imm_255:
+; GFX6: ; %bb.0:
+; GFX6-NEXT: s_movk_i32 s4, 0xff
+; GFX6-NEXT: s_nop 3
+; GFX6-NEXT: s_buffer_load_dword s0, s[0:3], s4
+; GFX6-NEXT: s_waitcnt lgkmcnt(0)
+; GFX6-NEXT: ; return to shader part epilog
+;
+; GFX7-LABEL: s_buffer_load_imm_255:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_movk_i32 s4, 0xff
+; GFX7-NEXT: s_buffer_load_dword s0, s[0:3], s4
+; GFX7-NEXT: s_waitcnt lgkmcnt(0)
+; GFX7-NEXT: ; return to shader part epilog
+;
+; GFX8-LABEL: s_buffer_load_imm_255:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_buffer_load_dword s0, s[0:3], 0xff
+; GFX8-NEXT: s_waitcnt lgkmcnt(0)
+; GFX8-NEXT: ; return to shader part epilog
%load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 255, i32 0)
ret i32 %load
}
-; GCN-LABEL: {{^}}s_buffer_load_imm_256:
-; SICI: s_buffer_load_dword s0, s[0:3], 0x40{{$}}
-; VI: s_buffer_load_dword s0, s[0:3], 0x100{{$}}
define amdgpu_ps i32 @s_buffer_load_imm_256(<4 x i32> inreg %desc) {
+; GFX67-LABEL: s_buffer_load_imm_256:
+; GFX67: ; %bb.0:
+; GFX67-NEXT: s_buffer_load_dword s0, s[0:3], 0x40
+; GFX67-NEXT: s_waitcnt lgkmcnt(0)
+; GFX67-NEXT: ; return to shader part epilog
+;
+; GFX8-LABEL: s_buffer_load_imm_256:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_buffer_load_dword s0, s[0:3], 0x100
+; GFX8-NEXT: s_waitcnt lgkmcnt(0)
+; GFX8-NEXT: ; return to shader part epilog
%load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 256, i32 0)
ret i32 %load
}
-; GCN-LABEL: {{^}}s_buffer_load_imm_1016:
-; SICI: s_buffer_load_dword s0, s[0:3], 0xfe{{$}}
-; VI: s_buffer_load_dword s0, s[0:3], 0x3f8{{$}}
define amdgpu_ps i32 @s_buffer_load_imm_1016(<4 x i32> inreg %desc) {
+; GFX67-LABEL: s_buffer_load_imm_1016:
+; GFX67: ; %bb.0:
+; GFX67-NEXT: s_buffer_load_dword s0, s[0:3], 0xfe
+; GFX67-NEXT: s_waitcnt lgkmcnt(0)
+; GFX67-NEXT: ; return to shader part epilog
+;
+; GFX8-LABEL: s_buffer_load_imm_1016:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_buffer_load_dword s0, s[0:3], 0x3f8
+; GFX8-NEXT: s_waitcnt lgkmcnt(0)
+; GFX8-NEXT: ; return to shader part epilog
%load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 1016, i32 0)
ret i32 %load
}
-; GCN-LABEL: {{^}}s_buffer_load_imm_1020:
-; SICI: s_buffer_load_dword s0, s[0:3], 0xff{{$}}
-; VI: s_buffer_load_dword s0, s[0:3], 0x3fc{{$}}
define amdgpu_ps i32 @s_buffer_load_imm_1020(<4 x i32> inreg %desc) {
+; GFX67-LABEL: s_buffer_load_imm_1020:
+; GFX67: ; %bb.0:
+; GFX67-NEXT: s_buffer_load_dword s0, s[0:3], 0xff
+; GFX67-NEXT: s_waitcnt lgkmcnt(0)
+; GFX67-NEXT: ; return to shader part epilog
+;
+; GFX8-LABEL: s_buffer_load_imm_1020:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_buffer_load_dword s0, s[0:3], 0x3fc
+; GFX8-NEXT: s_waitcnt lgkmcnt(0)
+; GFX8-NEXT: ; return to shader part epilog
%load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 1020, i32 0)
ret i32 %load
}
-; GCN-LABEL: {{^}}s_buffer_load_imm_1021:
-; SICI: s_movk_i32 [[K:s[0-9]+]], 0x3fd{{$}}
-; SICI: s_buffer_load_dword s0, s[0:3], [[K]]{{$}}
define amdgpu_ps i32 @s_buffer_load_imm_1021(<4 x i32> inreg %desc) {
+; GFX6-LABEL: s_buffer_load_imm_1021:
+; GFX6: ; %bb.0:
+; GFX6-NEXT: s_movk_i32 s4, 0x3fd
+; GFX6-NEXT: s_nop 3
+; GFX6-NEXT: s_buffer_load_dword s0, s[0:3], s4
+; GFX6-NEXT: s_waitcnt lgkmcnt(0)
+; GFX6-NEXT: ; return to shader part epilog
+;
+; GFX7-LABEL: s_buffer_load_imm_1021:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_movk_i32 s4, 0x3fd
+; GFX7-NEXT: s_buffer_load_dword s0, s[0:3], s4
+; GFX7-NEXT: s_waitcnt lgkmcnt(0)
+; GFX7-NEXT: ; return to shader part epilog
+;
+; GFX8-LABEL: s_buffer_load_imm_1021:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_buffer_load_dword s0, s[0:3], 0x3fd
+; GFX8-NEXT: s_waitcnt lgkmcnt(0)
+; GFX8-NEXT: ; return to shader part epilog
%load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 1021, i32 0)
ret i32 %load
}
-; GCN-LABEL: {{^}}s_buffer_load_imm_1024:
-; SI: s_movk_i32 [[K:s[0-9]+]], 0x400{{$}}
-; SI: s_buffer_load_dword s0, s[0:3], [[K]]{{$}}
-
-; CI: s_buffer_load_dword s0, s[0:3], 0x100{{$}}
-
-; VI: s_buffer_load_dword s0, s[0:3], 0x400{{$}}
define amdgpu_ps i32 @s_buffer_load_imm_1024(<4 x i32> inreg %desc) {
+; GFX6-LABEL: s_buffer_load_imm_1024:
+; GFX6: ; %bb.0:
+; GFX6-NEXT: s_movk_i32 s4, 0x400
+; GFX6-NEXT: s_nop 3
+; GFX6-NEXT: s_buffer_load_dword s0, s[0:3], s4
+; GFX6-NEXT: s_waitcnt lgkmcnt(0)
+; GFX6-NEXT: ; return to shader part epilog
+;
+; GFX7-LABEL: s_buffer_load_imm_1024:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_buffer_load_dword s0, s[0:3], 0x100
+; GFX7-NEXT: s_waitcnt lgkmcnt(0)
+; GFX7-NEXT: ; return to shader part epilog
+;
+; GFX8-LABEL: s_buffer_load_imm_1024:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_buffer_load_dword s0, s[0:3], 0x400
+; GFX8-NEXT: s_waitcnt lgkmcnt(0)
+; GFX8-NEXT: ; return to shader part epilog
%load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 1024, i32 0)
ret i32 %load
}
-; GCN-LABEL: {{^}}s_buffer_load_imm_1025:
-; SICI: s_movk_i32 [[K:s[0-9]+]], 0x401{{$}}
-; SICI: s_buffer_load_dword s0, s[0:3], [[K]]{{$}}
-
-; VI: s_buffer_load_dword s0, s[0:3], 0x401{{$}}
define amdgpu_ps i32 @s_buffer_load_imm_1025(<4 x i32> inreg %desc) {
+; GFX6-LABEL: s_buffer_load_imm_1025:
+; GFX6: ; %bb.0:
+; GFX6-NEXT: s_movk_i32 s4, 0x401
+; GFX6-NEXT: s_nop 3
+; GFX6-NEXT: s_buffer_load_dword s0, s[0:3], s4
+; GFX6-NEXT: s_waitcnt lgkmcnt(0)
+; GFX6-NEXT: ; return to shader part epilog
+;
+; GFX7-LABEL: s_buffer_load_imm_1025:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_movk_i32 s4, 0x401
+; GFX7-NEXT: s_buffer_load_dword s0, s[0:3], s4
+; GFX7-NEXT: s_waitcnt lgkmcnt(0)
+; GFX7-NEXT: ; return to shader part epilog
+;
+; GFX8-LABEL: s_buffer_load_imm_1025:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_buffer_load_dword s0, s[0:3], 0x401
+; GFX8-NEXT: s_waitcnt lgkmcnt(0)
+; GFX8-NEXT: ; return to shader part epilog
%load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 1025, i32 0)
ret i32 %load
}
-; GCN-LABEL: {{^}}s_buffer_load_imm_1028:
-; SI: s_movk_i32 [[K:s[0-9]+]], 0x400{{$}}
-; SI: s_buffer_load_dword s0, s[0:3], [[K]]{{$}}
-
-; CI: s_buffer_load_dword s0, s[0:3], 0x100{{$}}
-; VI: s_buffer_load_dword s0, s[0:3], 0x400{{$}}
define amdgpu_ps i32 @s_buffer_load_imm_1028(<4 x i32> inreg %desc) {
+; GFX6-LABEL: s_buffer_load_imm_1028:
+; GFX6: ; %bb.0:
+; GFX6-NEXT: s_movk_i32 s4, 0x400
+; GFX6-NEXT: s_nop 3
+; GFX6-NEXT: s_buffer_load_dword s0, s[0:3], s4
+; GFX6-NEXT: s_waitcnt lgkmcnt(0)
+; GFX6-NEXT: ; return to shader part epilog
+;
+; GFX7-LABEL: s_buffer_load_imm_1028:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_buffer_load_dword s0, s[0:3], 0x100
+; GFX7-NEXT: s_waitcnt lgkmcnt(0)
+; GFX7-NEXT: ; return to shader part epilog
+;
+; GFX8-LABEL: s_buffer_load_imm_1028:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_buffer_load_dword s0, s[0:3], 0x400
+; GFX8-NEXT: s_waitcnt lgkmcnt(0)
+; GFX8-NEXT: ; return to shader part epilog
%load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 1024, i32 0)
ret i32 %load
}