+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
; RUN: llc -march=amdgcn -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX8 %s
; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 %s
; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX10 %s
-; RUN: llc -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX10 %s
+; RUN: llc -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX11 %s
; FIXME: GFX9 should be producing v_mad_u16 instead of v_mad_legacy_u16.
-; GCN-LABEL: {{^}}mad_u16
-; GCN: {{flat|global}}_load_{{ushort|u16}} v[[A:[0-9]+]]
-; GCN: {{flat|global}}_load_{{ushort|u16}} v[[B:[0-9]+]]
-; GCN: {{flat|global}}_load_{{ushort|u16}} v[[C:[0-9]+]]
-; GFX8: v_mad_u16 v[[R:[0-9]+]], v[[A]], v[[B]], v[[C]]
-; GFX9: v_mad_legacy_u16 v[[R:[0-9]+]], v[[A]], v[[B]], v[[C]]
-; GFX10: v_mad_u16 v[[R:[0-9]+]], v[[A]], v[[B]], v[[C]]
-; GCN: {{flat|global}}_store_{{short|b16}} v{{.+}}, v[[R]]
-; GCN: s_endpgm
define amdgpu_kernel void @mad_u16(
+; GFX8-LABEL: mad_u16:
+; GFX8: ; %bb.0: ; %entry
+; GFX8-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x24
+; GFX8-NEXT: v_lshlrev_b32_e32 v4, 1, v0
+; GFX8-NEXT: s_waitcnt lgkmcnt(0)
+; GFX8-NEXT: v_mov_b32_e32 v1, s3
+; GFX8-NEXT: v_add_u32_e32 v0, vcc, s2, v4
+; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
+; GFX8-NEXT: v_mov_b32_e32 v3, s5
+; GFX8-NEXT: v_add_u32_e32 v2, vcc, s4, v4
+; GFX8-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc
+; GFX8-NEXT: v_mov_b32_e32 v5, s7
+; GFX8-NEXT: v_add_u32_e32 v4, vcc, s6, v4
+; GFX8-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc
+; GFX8-NEXT: flat_load_ushort v6, v[0:1] glc
+; GFX8-NEXT: s_waitcnt vmcnt(0)
+; GFX8-NEXT: flat_load_ushort v2, v[2:3] glc
+; GFX8-NEXT: s_waitcnt vmcnt(0)
+; GFX8-NEXT: flat_load_ushort v3, v[4:5] glc
+; GFX8-NEXT: s_waitcnt vmcnt(0)
+; GFX8-NEXT: v_mov_b32_e32 v0, s0
+; GFX8-NEXT: v_mov_b32_e32 v1, s1
+; GFX8-NEXT: v_mad_u16 v2, v6, v2, v3
+; GFX8-NEXT: flat_store_short v[0:1], v2
+; GFX8-NEXT: s_endpgm
+;
+; GFX9-LABEL: mad_u16:
+; GFX9: ; %bb.0: ; %entry
+; GFX9-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x24
+; GFX9-NEXT: v_lshlrev_b32_e32 v0, 1, v0
+; GFX9-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-NEXT: global_load_ushort v1, v0, s[2:3] glc
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: global_load_ushort v2, v0, s[4:5] glc
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: global_load_ushort v3, v0, s[6:7] glc
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: v_mov_b32_e32 v0, 0
+; GFX9-NEXT: v_mad_legacy_u16 v1, v1, v2, v3
+; GFX9-NEXT: global_store_short v0, v1, s[0:1]
+; GFX9-NEXT: s_endpgm
+;
+; GFX10-LABEL: mad_u16:
+; GFX10: ; %bb.0: ; %entry
+; GFX10-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x24
+; GFX10-NEXT: v_lshlrev_b32_e32 v0, 1, v0
+; GFX10-NEXT: s_waitcnt lgkmcnt(0)
+; GFX10-NEXT: global_load_ushort v1, v0, s[2:3] glc dlc
+; GFX10-NEXT: s_waitcnt vmcnt(0)
+; GFX10-NEXT: global_load_ushort v2, v0, s[4:5] glc dlc
+; GFX10-NEXT: s_waitcnt vmcnt(0)
+; GFX10-NEXT: global_load_ushort v3, v0, s[6:7] glc dlc
+; GFX10-NEXT: s_waitcnt vmcnt(0)
+; GFX10-NEXT: v_mov_b32_e32 v0, 0
+; GFX10-NEXT: v_mad_u16 v1, v1, v2, v3
+; GFX10-NEXT: global_store_short v0, v1, s[0:1]
+; GFX10-NEXT: s_endpgm
+;
+; GFX11-LABEL: mad_u16:
+; GFX11: ; %bb.0: ; %entry
+; GFX11-NEXT: s_load_b256 s[0:7], s[0:1], 0x24
+; GFX11-NEXT: v_dual_mov_b32 v3, 0 :: v_dual_lshlrev_b32 v0, 1, v0
+; GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-NEXT: global_load_u16 v1, v0, s[2:3] glc dlc
+; GFX11-NEXT: s_waitcnt vmcnt(0)
+; GFX11-NEXT: global_load_u16 v2, v0, s[4:5] glc dlc
+; GFX11-NEXT: s_waitcnt vmcnt(0)
+; GFX11-NEXT: global_load_u16 v0, v0, s[6:7] glc dlc
+; GFX11-NEXT: s_waitcnt vmcnt(0)
+; GFX11-NEXT: v_mad_u16 v0, v1, v2, v0
+; GFX11-NEXT: global_store_b16 v3, v0, s[0:1]
+; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX11-NEXT: s_endpgm
ptr addrspace(1) %r,
ptr addrspace(1) %a,
ptr addrspace(1) %b,
}
declare i32 @llvm.amdgcn.workitem.id.x()
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; GCN: {{.*}}