NFC. Pre-commits test changes to have a separate CHECK line where GFX11 behavior will diverge from
previous subtargets in a future patch.
; RUN: llc -global-isel -march=amdgcn -mcpu=fiji < %s | FileCheck -check-prefix=GFX8 %s
; RUN: llc -global-isel -march=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s
; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s
-; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefix=GFX10 %s
+; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefix=GFX11 %s
define float @v_fma_f32(float %x, float %y, float %z) {
; GFX6-LABEL: v_fma_f32:
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_fma_f32 v0, v0, v1, v2
; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: v_fma_f32:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX11-NEXT: v_fma_f32 v0, v0, v1, v2
+; GFX11-NEXT: s_setpc_b64 s[30:31]
%fma = call float @llvm.fma.f32(float %x, float %y, float %z)
ret float %fma
}
; GFX10-NEXT: v_fma_f32 v0, v0, v2, v4
; GFX10-NEXT: v_fma_f32 v1, v1, v3, v5
; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: v_fma_v2f32:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX11-NEXT: v_fma_f32 v0, v0, v2, v4
+; GFX11-NEXT: v_fma_f32 v1, v1, v3, v5
+; GFX11-NEXT: s_setpc_b64 s[30:31]
%fma = call <2 x float> @llvm.fma.v2f32(<2 x float> %x, <2 x float> %y, <2 x float> %z)
ret <2 x float> %fma
}
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_fma_f16 v0, v0, v1, v2
; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: v_fma_f16:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX11-NEXT: v_fma_f16 v0, v0, v1, v2
+; GFX11-NEXT: s_setpc_b64 s[30:31]
%fma = call half @llvm.fma.f16(half %x, half %y, half %z)
ret half %fma
}
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_pk_fma_f16 v0, v0, v1, v2
; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: v_fma_v2f16:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX11-NEXT: v_pk_fma_f16 v0, v0, v1, v2
+; GFX11-NEXT: s_setpc_b64 s[30:31]
%fma = call <2 x half> @llvm.fma.v2f16(<2 x half> %x, <2 x half> %y, <2 x half> %z)
ret <2 x half> %fma
}
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_pk_fma_f16 v0, v0, v1, v2 neg_lo:[1,0,0] neg_hi:[1,0,0]
; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: v_fma_v2f16_fneg_lhs:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX11-NEXT: v_pk_fma_f16 v0, v0, v1, v2 neg_lo:[1,0,0] neg_hi:[1,0,0]
+; GFX11-NEXT: s_setpc_b64 s[30:31]
%x.fneg = fneg <2 x half> %x
%fma = call <2 x half> @llvm.fma.v2f16(<2 x half> %x.fneg, <2 x half> %y, <2 x half> %z)
ret <2 x half> %fma
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_pk_fma_f16 v0, v0, v1, v2 neg_lo:[0,1,0] neg_hi:[0,1,0]
; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: v_fma_v2f16_fneg_rhs:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX11-NEXT: v_pk_fma_f16 v0, v0, v1, v2 neg_lo:[0,1,0] neg_hi:[0,1,0]
+; GFX11-NEXT: s_setpc_b64 s[30:31]
%y.fneg = fneg <2 x half> %y
%fma = call <2 x half> @llvm.fma.v2f16(<2 x half> %x, <2 x half> %y.fneg, <2 x half> %z)
ret <2 x half> %fma
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_pk_fma_f16 v0, v0, v1, v2
; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: v_fma_v2f16_fneg_lhs_rhs:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX11-NEXT: v_pk_fma_f16 v0, v0, v1, v2
+; GFX11-NEXT: s_setpc_b64 s[30:31]
%x.fneg = fneg <2 x half> %x
%y.fneg = fneg <2 x half> %y
%fma = call <2 x half> @llvm.fma.v2f16(<2 x half> %x.fneg, <2 x half> %y.fneg, <2 x half> %z)
; GFX10-NEXT: v_pk_fma_f16 v0, v0, v2, v4
; GFX10-NEXT: v_pk_fma_f16 v1, v1, v3, v5
; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: v_fma_v4f16:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX11-NEXT: v_pk_fma_f16 v0, v0, v2, v4
+; GFX11-NEXT: v_pk_fma_f16 v1, v1, v3, v5
+; GFX11-NEXT: s_setpc_b64 s[30:31]
%fma = call <4 x half> @llvm.fma.v4f16(<4 x half> %x, <4 x half> %y, <4 x half> %z)
ret <4 x half> %fma
}
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5]
; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: v_fma_f64:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX11-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5]
+; GFX11-NEXT: s_setpc_b64 s[30:31]
%fma = call double @llvm.fma.f64(double %x, double %y, double %z)
ret double %fma
}
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], -v[4:5]
; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: v_fma_f64_fneg_all:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX11-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], -v[4:5]
+; GFX11-NEXT: s_setpc_b64 s[30:31]
%neg.x = fneg double %x
%neg.y = fneg double %y
%neg.z = fneg double %z
; GFX10-NEXT: v_fma_f64 v[0:1], v[0:1], v[4:5], v[8:9]
; GFX10-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[10:11]
; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: v_fma_v2f64:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX11-NEXT: v_fma_f64 v[0:1], v[0:1], v[4:5], v[8:9]
+; GFX11-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[10:11]
+; GFX11-NEXT: s_setpc_b64 s[30:31]
%fma = call <2 x double> @llvm.fma.v2f64(<2 x double> %x, <2 x double> %y, <2 x double> %z)
ret <2 x double> %fma
}
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_fma_f32 v0, |v0|, v1, v2
; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: v_fma_f32_fabs_lhs:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX11-NEXT: v_fma_f32 v0, |v0|, v1, v2
+; GFX11-NEXT: s_setpc_b64 s[30:31]
%fabs.x = call float @llvm.fabs.f32(float %x)
%fma = call float @llvm.fma.f32(float %fabs.x, float %y, float %z)
ret float %fma
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_fma_f32 v0, v0, |v1|, v2
; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: v_fma_f32_fabs_rhs:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX11-NEXT: v_fma_f32 v0, v0, |v1|, v2
+; GFX11-NEXT: s_setpc_b64 s[30:31]
%fabs.y = call float @llvm.fabs.f32(float %y)
%fma = call float @llvm.fma.f32(float %x, float %fabs.y, float %z)
ret float %fma
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_fma_f32 v0, |v0|, |v1|, v2
; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: v_fma_f32_fabs_lhs_rhs:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX11-NEXT: v_fma_f32 v0, |v0|, |v1|, v2
+; GFX11-NEXT: s_setpc_b64 s[30:31]
%fabs.x = call float @llvm.fabs.f32(float %x)
%fabs.y = call float @llvm.fabs.f32(float %y)
%fma = call float @llvm.fma.f32(float %fabs.x, float %fabs.y, float %z)
; GFX10: ; %bb.0:
; GFX10-NEXT: v_fma_f32 v0, s0, v0, v1
; GFX10-NEXT: ; return to shader part epilog
+;
+; GFX11-LABEL: v_fma_f32_sgpr_vgpr_vgpr:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: v_fma_f32 v0, s0, v0, v1
+; GFX11-NEXT: ; return to shader part epilog
%fma = call float @llvm.fma.f32(float %x, float %y, float %z)
ret float %fma
}
; GFX10: ; %bb.0:
; GFX10-NEXT: v_fma_f32 v0, s0, v0, v1
; GFX10-NEXT: ; return to shader part epilog
+;
+; GFX11-LABEL: v_fma_f32_vgpr_sgpr_vgpr:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: v_fma_f32 v0, s0, v0, v1
+; GFX11-NEXT: ; return to shader part epilog
%fma = call float @llvm.fma.f32(float %x, float %y, float %z)
ret float %fma
}
; GFX10-NEXT: v_mov_b32_e32 v0, s2
; GFX10-NEXT: v_fma_f32 v0, s1, s0, v0
; GFX10-NEXT: ; return to shader part epilog
+;
+; GFX11-LABEL: v_fma_f32_sgpr_sgpr_sgpr:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: v_mov_b32_e32 v0, s2
+; GFX11-NEXT: v_fma_f32 v0, s1, s0, v0
+; GFX11-NEXT: ; return to shader part epilog
%fma = call float @llvm.fma.f32(float %x, float %y, float %z)
ret float %fma
}
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_fma_f32 v0, -v0, v1, v2
; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: v_fma_f32_fneg_lhs:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX11-NEXT: v_fma_f32 v0, -v0, v1, v2
+; GFX11-NEXT: s_setpc_b64 s[30:31]
%neg.x = fneg float %x
%fma = call float @llvm.fma.f32(float %neg.x, float %y, float %z)
ret float %fma
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_fma_f32 v0, v0, -v1, v2
; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: v_fma_f32_fneg_rhs:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX11-NEXT: v_fma_f32 v0, v0, -v1, v2
+; GFX11-NEXT: s_setpc_b64 s[30:31]
%neg.y = fneg float %y
%fma = call float @llvm.fma.f32(float %x, float %neg.y, float %z)
ret float %fma
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_fma_f32 v0, v0, v1, -v2
; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: v_fma_f32_fneg_z:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX11-NEXT: v_fma_f32 v0, v0, v1, -v2
+; GFX11-NEXT: s_setpc_b64 s[30:31]
%neg.z = fneg float %z
%fma = call float @llvm.fma.f32(float %x, float %y, float %neg.z)
ret float %fma
# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -global-isel-abort=2 -disable-gisel-legality-check -pass-remarks-missed='gisel*' -verify-machineinstrs %s -o - 2>%t | FileCheck -check-prefix=GFX10 %s
# RUN: FileCheck --check-prefix=ERR %s < %t
-# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -global-isel-abort=2 -disable-gisel-legality-check -pass-remarks-missed='gisel*' -verify-machineinstrs %s -o - 2>%t | FileCheck -check-prefix=GFX10 %s
+# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -global-isel-abort=2 -disable-gisel-legality-check -pass-remarks-missed='gisel*' -verify-machineinstrs %s -o - 2>%t | FileCheck -check-prefix=GFX11 %s
# RUN: FileCheck --check-prefix=ERR %s < %t
# ERR-NOT: remark
; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY1]](s32)
; GFX10-NEXT: [[ASHR:%[0-9]+]]:sgpr(s16) = G_ASHR [[TRUNC]], [[TRUNC1]](s16)
; GFX10-NEXT: S_ENDPGM 0, implicit [[ASHR]](s16)
+ ; GFX11-LABEL: name: ashr_s16_s16_ss
+ ; GFX11: liveins: $sgpr0, $sgpr1
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+ ; GFX11-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
+ ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32)
+ ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY1]](s32)
+ ; GFX11-NEXT: [[ASHR:%[0-9]+]]:sgpr(s16) = G_ASHR [[TRUNC]], [[TRUNC1]](s16)
+ ; GFX11-NEXT: S_ENDPGM 0, implicit [[ASHR]](s16)
%0:sgpr(s32) = COPY $sgpr0
%1:sgpr(s32) = COPY $sgpr1
%2:sgpr(s16) = G_TRUNC %0
; GFX10-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0
; GFX10-NEXT: [[V_ASHRREV_I16_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I16_e64 [[COPY1]], [[COPY]], implicit $exec
; GFX10-NEXT: S_ENDPGM 0, implicit [[V_ASHRREV_I16_e64_]]
+ ; GFX11-LABEL: name: ashr_s16_s16_vs
+ ; GFX11: liveins: $sgpr0, $vgpr0
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX11-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0
+ ; GFX11-NEXT: [[V_ASHRREV_I16_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I16_e64 [[COPY1]], [[COPY]], implicit $exec
+ ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_ASHRREV_I16_e64_]]
%0:vgpr(s32) = COPY $vgpr0
%1:sgpr(s32) = COPY $sgpr0
%2:vgpr(s16) = G_TRUNC %0
; GFX10-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32)
; GFX10-NEXT: [[ASHR:%[0-9]+]]:vgpr(s16) = G_ASHR [[TRUNC]], [[COPY1]](s32)
; GFX10-NEXT: S_ENDPGM 0, implicit [[ASHR]](s16)
+ ; GFX11-LABEL: name: ashr_s16_s32_vv
+ ; GFX11: liveins: $vgpr0, $vgpr1
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+ ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
+ ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32)
+ ; GFX11-NEXT: [[ASHR:%[0-9]+]]:vgpr(s16) = G_ASHR [[TRUNC]], [[COPY1]](s32)
+ ; GFX11-NEXT: S_ENDPGM 0, implicit [[ASHR]](s16)
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = COPY $vgpr1
%2:vgpr(s16) = G_TRUNC %0
; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX10-NEXT: [[V_ASHRREV_I16_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I16_e64 [[COPY1]], [[COPY]], implicit $exec
; GFX10-NEXT: S_ENDPGM 0, implicit [[V_ASHRREV_I16_e64_]]
+ ; GFX11-LABEL: name: ashr_s16_s16_vv
+ ; GFX11: liveins: $vgpr0, $vgpr1
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX11-NEXT: [[V_ASHRREV_I16_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I16_e64 [[COPY1]], [[COPY]], implicit $exec
+ ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_ASHRREV_I16_e64_]]
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = COPY $vgpr1
%2:vgpr(s16) = G_TRUNC %0
; GFX10-NEXT: [[V_ASHRREV_I16_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I16_e64 [[COPY1]], [[COPY]], implicit $exec
; GFX10-NEXT: [[V_BFE_U32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_U32_e64 [[V_ASHRREV_I16_e64_]], 0, 16, implicit $exec
; GFX10-NEXT: S_ENDPGM 0, implicit [[V_BFE_U32_e64_]]
+ ; GFX11-LABEL: name: ashr_s16_s16_vv_zext_to_s32
+ ; GFX11: liveins: $vgpr0, $vgpr1
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX11-NEXT: [[V_ASHRREV_I16_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I16_e64 [[COPY1]], [[COPY]], implicit $exec
+ ; GFX11-NEXT: [[V_BFE_U32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_U32_e64 [[V_ASHRREV_I16_e64_]], 0, 16, implicit $exec
+ ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_BFE_U32_e64_]]
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = COPY $vgpr1
%2:vgpr(s16) = G_TRUNC %0
; GFX10-NEXT: [[ASHR:%[0-9]+]]:vgpr(s16) = G_ASHR [[TRUNC]], [[TRUNC1]](s16)
; GFX10-NEXT: [[ZEXT:%[0-9]+]]:vgpr(s64) = G_ZEXT [[ASHR]](s16)
; GFX10-NEXT: S_ENDPGM 0, implicit [[ZEXT]](s64)
+ ; GFX11-LABEL: name: ashr_s16_vv_zext_to_s64
+ ; GFX11: liveins: $vgpr0, $vgpr1
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+ ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
+ ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32)
+ ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY1]](s32)
+ ; GFX11-NEXT: [[ASHR:%[0-9]+]]:vgpr(s16) = G_ASHR [[TRUNC]], [[TRUNC1]](s16)
+ ; GFX11-NEXT: [[ZEXT:%[0-9]+]]:vgpr(s64) = G_ZEXT [[ASHR]](s16)
+ ; GFX11-NEXT: S_ENDPGM 0, implicit [[ZEXT]](s64)
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = COPY $vgpr1
%2:vgpr(s16) = G_TRUNC %0
; GFX10-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32)
; GFX10-NEXT: [[ASHR:%[0-9]+]]:sgpr(s16) = G_ASHR [[TRUNC]], [[COPY1]](s32)
; GFX10-NEXT: S_ENDPGM 0, implicit [[ASHR]](s16)
+ ; GFX11-LABEL: name: ashr_s16_s32_ss
+ ; GFX11: liveins: $sgpr0, $sgpr1
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+ ; GFX11-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
+ ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32)
+ ; GFX11-NEXT: [[ASHR:%[0-9]+]]:sgpr(s16) = G_ASHR [[TRUNC]], [[COPY1]](s32)
+ ; GFX11-NEXT: S_ENDPGM 0, implicit [[ASHR]](s16)
%0:sgpr(s32) = COPY $sgpr0
%1:sgpr(s32) = COPY $sgpr1
%2:sgpr(s16) = G_TRUNC %0
; GFX10-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32)
; GFX10-NEXT: [[ASHR:%[0-9]+]]:vgpr(s16) = G_ASHR [[TRUNC]], [[COPY1]](s32)
; GFX10-NEXT: S_ENDPGM 0, implicit [[ASHR]](s16)
+ ; GFX11-LABEL: name: ashr_s16_s32_sv
+ ; GFX11: liveins: $sgpr0, $vgpr0
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+ ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+ ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32)
+ ; GFX11-NEXT: [[ASHR:%[0-9]+]]:vgpr(s16) = G_ASHR [[TRUNC]], [[COPY1]](s32)
+ ; GFX11-NEXT: S_ENDPGM 0, implicit [[ASHR]](s16)
%0:sgpr(s32) = COPY $sgpr0
%1:vgpr(s32) = COPY $vgpr0
%2:sgpr(s16) = G_TRUNC %0
; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX10-NEXT: [[V_ASHRREV_I16_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I16_e64 [[COPY1]], [[COPY]], implicit $exec
; GFX10-NEXT: S_ENDPGM 0, implicit [[V_ASHRREV_I16_e64_]]
+ ; GFX11-LABEL: name: ashr_s16_s16_sv
+ ; GFX11: liveins: $sgpr0, $vgpr0
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+ ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX11-NEXT: [[V_ASHRREV_I16_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I16_e64 [[COPY1]], [[COPY]], implicit $exec
+ ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_ASHRREV_I16_e64_]]
%0:sgpr(s32) = COPY $sgpr0
%1:vgpr(s32) = COPY $vgpr0
%2:sgpr(s16) = G_TRUNC %0
; GFX10-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32)
; GFX10-NEXT: [[ASHR:%[0-9]+]]:vgpr(s16) = G_ASHR [[TRUNC]], [[COPY1]](s32)
; GFX10-NEXT: S_ENDPGM 0, implicit [[ASHR]](s16)
+ ; GFX11-LABEL: name: ashr_s16_s32_vs
+ ; GFX11: liveins: $sgpr0, $vgpr0
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+ ; GFX11-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+ ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32)
+ ; GFX11-NEXT: [[ASHR:%[0-9]+]]:vgpr(s16) = G_ASHR [[TRUNC]], [[COPY1]](s32)
+ ; GFX11-NEXT: S_ENDPGM 0, implicit [[ASHR]](s16)
%0:vgpr(s32) = COPY $vgpr0
%1:sgpr(s32) = COPY $sgpr0
%2:vgpr(s16) = G_TRUNC %0
# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -disable-gisel-legality-check -o - %s | FileCheck -check-prefix=GFX8 %s
# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX9 %s
# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX10 %s
-# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX10 %s
+# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX11 %s
---
; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX10-NEXT: %2:vgpr_32 = nofpexcept V_MAX_F16_e64 0, [[COPY]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
; GFX10-NEXT: S_ENDPGM 0, implicit %2
+ ; GFX11-LABEL: name: fcanonicalize_f16_denorm
+ ; GFX11: liveins: $vgpr0
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX11-NEXT: %2:vgpr_32 = nofpexcept V_MAX_F16_e64 0, [[COPY]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
+ ; GFX11-NEXT: S_ENDPGM 0, implicit %2
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s16) = G_TRUNC %0
%2:vgpr(s16) = G_FCANONICALIZE %1
; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX10-NEXT: %2:vgpr_32 = nofpexcept V_MAX_F16_e64 0, [[COPY]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
; GFX10-NEXT: S_ENDPGM 0, implicit %2
+ ; GFX11-LABEL: name: fcanonicalize_f16_flush
+ ; GFX11: liveins: $vgpr0
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX11-NEXT: %2:vgpr_32 = nofpexcept V_MAX_F16_e64 0, [[COPY]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
+ ; GFX11-NEXT: S_ENDPGM 0, implicit %2
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s16) = G_TRUNC %0
%2:vgpr(s16) = G_FCANONICALIZE %1
; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX10-NEXT: %1:vgpr_32 = nofpexcept V_MAX_F32_e64 0, [[COPY]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
; GFX10-NEXT: S_ENDPGM 0, implicit %1
+ ; GFX11-LABEL: name: fcanonicalize_f32_denorm
+ ; GFX11: liveins: $vgpr0
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX11-NEXT: %1:vgpr_32 = nofpexcept V_MAX_F32_e64 0, [[COPY]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
+ ; GFX11-NEXT: S_ENDPGM 0, implicit %1
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = G_FCANONICALIZE %0
S_ENDPGM 0, implicit %1
; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX10-NEXT: %1:vgpr_32 = nofpexcept V_MAX_F32_e64 0, [[COPY]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
; GFX10-NEXT: S_ENDPGM 0, implicit %1
+ ; GFX11-LABEL: name: fcanonicalize_f32_flush
+ ; GFX11: liveins: $vgpr0
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX11-NEXT: %1:vgpr_32 = nofpexcept V_MAX_F32_e64 0, [[COPY]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
+ ; GFX11-NEXT: S_ENDPGM 0, implicit %1
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = G_FCANONICALIZE %0
S_ENDPGM 0, implicit %1
; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX10-NEXT: %1:vgpr_32 = nofpexcept V_PK_MAX_F16 8, [[COPY]], 8, [[COPY]], 0, 0, 0, 0, 0, implicit $mode, implicit $exec
; GFX10-NEXT: S_ENDPGM 0, implicit %1
+ ; GFX11-LABEL: name: fcanonicalize_v2f16_denorm
+ ; GFX11: liveins: $vgpr0
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX11-NEXT: %1:vgpr_32 = nofpexcept V_PK_MAX_F16 8, [[COPY]], 8, [[COPY]], 0, 0, 0, 0, 0, implicit $mode, implicit $exec
+ ; GFX11-NEXT: S_ENDPGM 0, implicit %1
%0:vgpr(<2 x s16>) = COPY $vgpr0
%1:vgpr(<2 x s16>) = G_FCANONICALIZE %0
S_ENDPGM 0, implicit %1
; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX10-NEXT: %1:vgpr_32 = nofpexcept V_PK_MAX_F16 8, [[COPY]], 8, [[COPY]], 0, 0, 0, 0, 0, implicit $mode, implicit $exec
; GFX10-NEXT: S_ENDPGM 0, implicit %1
+ ; GFX11-LABEL: name: fcanonicalize_v2f16_flush
+ ; GFX11: liveins: $vgpr0
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX11-NEXT: %1:vgpr_32 = nofpexcept V_PK_MAX_F16 8, [[COPY]], 8, [[COPY]], 0, 0, 0, 0, 0, implicit $mode, implicit $exec
+ ; GFX11-NEXT: S_ENDPGM 0, implicit %1
%0:vgpr(<2 x s16>) = COPY $vgpr0
%1:vgpr(<2 x s16>) = G_FCANONICALIZE %0
S_ENDPGM 0, implicit %1
; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX10-NEXT: %1:vreg_64 = nofpexcept V_MAX_F64_e64 0, [[COPY]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
; GFX10-NEXT: S_ENDPGM 0, implicit %1
+ ; GFX11-LABEL: name: fcanonicalize_f64_denorm
+ ; GFX11: liveins: $vgpr0_vgpr1
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX11-NEXT: %1:vreg_64 = nofpexcept V_MAX_F64_e64 0, [[COPY]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
+ ; GFX11-NEXT: S_ENDPGM 0, implicit %1
%0:vgpr(s64) = COPY $vgpr0_vgpr1
%1:vgpr(s64) = G_FCANONICALIZE %0
S_ENDPGM 0, implicit %1
; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX10-NEXT: %1:vreg_64 = nofpexcept V_MAX_F64_e64 0, [[COPY]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
; GFX10-NEXT: S_ENDPGM 0, implicit %1
+ ; GFX11-LABEL: name: fcanonicalize_f64_flush
+ ; GFX11: liveins: $vgpr0_vgpr1
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX11-NEXT: %1:vreg_64 = nofpexcept V_MAX_F64_e64 0, [[COPY]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
+ ; GFX11-NEXT: S_ENDPGM 0, implicit %1
%0:vgpr(s64) = COPY $vgpr0_vgpr1
%1:vgpr(s64) = G_FCANONICALIZE %0
S_ENDPGM 0, implicit %1
; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX10-NEXT: %2:vgpr_32 = nofpexcept V_MAX_F32_e64 2, [[COPY]], 2, [[COPY]], 0, 0, implicit $mode, implicit $exec
; GFX10-NEXT: S_ENDPGM 0, implicit %2
+ ; GFX11-LABEL: name: fcanonicalize_fabs_f32_denorm
+ ; GFX11: liveins: $vgpr0
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX11-NEXT: %2:vgpr_32 = nofpexcept V_MAX_F32_e64 2, [[COPY]], 2, [[COPY]], 0, 0, implicit $mode, implicit $exec
+ ; GFX11-NEXT: S_ENDPGM 0, implicit %2
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = G_FABS %0
%2:vgpr(s32) = G_FCANONICALIZE %1
; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX10-NEXT: %2:vgpr_32 = nofpexcept V_MAX_F32_e64 2, [[COPY]], 2, [[COPY]], 0, 0, implicit $mode, implicit $exec
; GFX10-NEXT: S_ENDPGM 0, implicit %2
+ ; GFX11-LABEL: name: fcanonicalize_fabs_f32_flush
+ ; GFX11: liveins: $vgpr0
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX11-NEXT: %2:vgpr_32 = nofpexcept V_MAX_F32_e64 2, [[COPY]], 2, [[COPY]], 0, 0, implicit $mode, implicit $exec
+ ; GFX11-NEXT: S_ENDPGM 0, implicit %2
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = G_FABS %0
%2:vgpr(s32) = G_FCANONICALIZE %1
; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX10-NEXT: %2:vgpr_32 = nofpexcept V_MAX_F32_e64 1, [[COPY]], 1, [[COPY]], 0, 0, implicit $mode, implicit $exec
; GFX10-NEXT: S_ENDPGM 0, implicit %2
+ ; GFX11-LABEL: name: fcanonicalize_fneg_f32_denorm
+ ; GFX11: liveins: $vgpr0
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX11-NEXT: %2:vgpr_32 = nofpexcept V_MAX_F32_e64 1, [[COPY]], 1, [[COPY]], 0, 0, implicit $mode, implicit $exec
+ ; GFX11-NEXT: S_ENDPGM 0, implicit %2
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = G_FNEG %0
%2:vgpr(s32) = G_FCANONICALIZE %1
; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX10-NEXT: %2:vgpr_32 = nofpexcept V_MAX_F32_e64 1, [[COPY]], 1, [[COPY]], 0, 0, implicit $mode, implicit $exec
; GFX10-NEXT: S_ENDPGM 0, implicit %2
+ ; GFX11-LABEL: name: fcanonicalize_fneg_f32_flush
+ ; GFX11: liveins: $vgpr0
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX11-NEXT: %2:vgpr_32 = nofpexcept V_MAX_F32_e64 1, [[COPY]], 1, [[COPY]], 0, 0, implicit $mode, implicit $exec
+ ; GFX11-NEXT: S_ENDPGM 0, implicit %2
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = G_FNEG %0
%2:vgpr(s32) = G_FCANONICALIZE %1
; GFX10-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec
; GFX10-NEXT: %3:vgpr_32 = nofpexcept V_MAX_F32_e64 2, [[V_XOR_B32_e64_]], 2, [[V_XOR_B32_e64_]], 0, 0, implicit $mode, implicit $exec
; GFX10-NEXT: S_ENDPGM 0, implicit %3
+ ; GFX11-LABEL: name: fcanonicalize_fneg_fabs_f32_denorm
+ ; GFX11: liveins: $vgpr0
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX11-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648
+ ; GFX11-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec
+ ; GFX11-NEXT: %3:vgpr_32 = nofpexcept V_MAX_F32_e64 2, [[V_XOR_B32_e64_]], 2, [[V_XOR_B32_e64_]], 0, 0, implicit $mode, implicit $exec
+ ; GFX11-NEXT: S_ENDPGM 0, implicit %3
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = G_FNEG %0
%2:vgpr(s32) = G_FABS %1
; GFX10-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec
; GFX10-NEXT: %3:vgpr_32 = nofpexcept V_MAX_F32_e64 2, [[V_XOR_B32_e64_]], 2, [[V_XOR_B32_e64_]], 0, 0, implicit $mode, implicit $exec
; GFX10-NEXT: S_ENDPGM 0, implicit %3
+ ; GFX11-LABEL: name: fcanonicalize_fneg_fabs_f32_flush
+ ; GFX11: liveins: $vgpr0
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX11-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648
+ ; GFX11-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec
+ ; GFX11-NEXT: %3:vgpr_32 = nofpexcept V_MAX_F32_e64 2, [[V_XOR_B32_e64_]], 2, [[V_XOR_B32_e64_]], 0, 0, implicit $mode, implicit $exec
+ ; GFX11-NEXT: S_ENDPGM 0, implicit %3
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = G_FNEG %0
%2:vgpr(s32) = G_FABS %1
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=WAVE64 %s
# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=WAVE32 %s
-# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=WAVE32 %s
+# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX11 %s
---
name: fcmp_false_s16_vv
; WAVE32-NEXT: [[TRUNC1:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY1]](s32)
; WAVE32-NEXT: [[FCMP:%[0-9]+]]:vcc(s1) = G_FCMP floatpred(false), [[TRUNC]](s16), [[TRUNC1]]
; WAVE32-NEXT: S_ENDPGM 0, implicit [[FCMP]](s1)
+ ; GFX11-LABEL: name: fcmp_false_s16_vv
+ ; GFX11: liveins: $vgpr0, $vgpr1
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+ ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
+ ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32)
+ ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY1]](s32)
+ ; GFX11-NEXT: [[FCMP:%[0-9]+]]:vcc(s1) = G_FCMP floatpred(false), [[TRUNC]](s16), [[TRUNC1]]
+ ; GFX11-NEXT: S_ENDPGM 0, implicit [[FCMP]](s1)
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = COPY $vgpr1
%2:vgpr(s16) = G_TRUNC %0
; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; WAVE32-NEXT: %4:sreg_32_xm0_xexec = nofpexcept V_CMP_EQ_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec
; WAVE32-NEXT: S_ENDPGM 0, implicit %4
+ ; GFX11-LABEL: name: fcmp_oeq_s16_vv
+ ; GFX11: liveins: $vgpr0, $vgpr1
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX11-NEXT: %4:sreg_32_xm0_xexec = nofpexcept V_CMP_EQ_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec
+ ; GFX11-NEXT: S_ENDPGM 0, implicit %4
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = COPY $vgpr1
%2:vgpr(s16) = G_TRUNC %0
; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; WAVE32-NEXT: %4:sreg_32_xm0_xexec = nofpexcept V_CMP_GT_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec
; WAVE32-NEXT: S_ENDPGM 0, implicit %4
+ ; GFX11-LABEL: name: fcmp_ogt_s16_vv
+ ; GFX11: liveins: $vgpr0, $vgpr1
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX11-NEXT: %4:sreg_32_xm0_xexec = nofpexcept V_CMP_GT_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec
+ ; GFX11-NEXT: S_ENDPGM 0, implicit %4
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = COPY $vgpr1
%2:vgpr(s16) = G_TRUNC %0
; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; WAVE32-NEXT: %4:sreg_32_xm0_xexec = nofpexcept V_CMP_GE_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec
; WAVE32-NEXT: S_ENDPGM 0, implicit %4
+ ; GFX11-LABEL: name: fcmp_oge_s16_vv
+ ; GFX11: liveins: $vgpr0, $vgpr1
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX11-NEXT: %4:sreg_32_xm0_xexec = nofpexcept V_CMP_GE_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec
+ ; GFX11-NEXT: S_ENDPGM 0, implicit %4
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = COPY $vgpr1
%2:vgpr(s16) = G_TRUNC %0
; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; WAVE32-NEXT: %4:sreg_32_xm0_xexec = nofpexcept V_CMP_LT_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec
; WAVE32-NEXT: S_ENDPGM 0, implicit %4
+ ; GFX11-LABEL: name: fcmp_olt_s16_vv
+ ; GFX11: liveins: $vgpr0, $vgpr1
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX11-NEXT: %4:sreg_32_xm0_xexec = nofpexcept V_CMP_LT_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec
+ ; GFX11-NEXT: S_ENDPGM 0, implicit %4
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = COPY $vgpr1
%2:vgpr(s16) = G_TRUNC %0
; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; WAVE32-NEXT: %4:sreg_32_xm0_xexec = nofpexcept V_CMP_LE_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec
; WAVE32-NEXT: S_ENDPGM 0, implicit %4
+ ; GFX11-LABEL: name: fcmp_ole_s16_vv
+ ; GFX11: liveins: $vgpr0, $vgpr1
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX11-NEXT: %4:sreg_32_xm0_xexec = nofpexcept V_CMP_LE_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec
+ ; GFX11-NEXT: S_ENDPGM 0, implicit %4
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = COPY $vgpr1
%2:vgpr(s16) = G_TRUNC %0
; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; WAVE32-NEXT: %4:sreg_32_xm0_xexec = nofpexcept V_CMP_LG_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec
; WAVE32-NEXT: S_ENDPGM 0, implicit %4
+ ; GFX11-LABEL: name: fcmp_one_s16_vv
+ ; GFX11: liveins: $vgpr0, $vgpr1
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX11-NEXT: %4:sreg_32_xm0_xexec = nofpexcept V_CMP_LG_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec
+ ; GFX11-NEXT: S_ENDPGM 0, implicit %4
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = COPY $vgpr1
%2:vgpr(s16) = G_TRUNC %0
; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; WAVE32-NEXT: %4:sreg_32_xm0_xexec = nofpexcept V_CMP_LG_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec
; WAVE32-NEXT: S_ENDPGM 0, implicit %4
+ ; GFX11-LABEL: name: fcmp_ord_s16_vv
+ ; GFX11: liveins: $vgpr0, $vgpr1
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX11-NEXT: %4:sreg_32_xm0_xexec = nofpexcept V_CMP_LG_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec
+ ; GFX11-NEXT: S_ENDPGM 0, implicit %4
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = COPY $vgpr1
%2:vgpr(s16) = G_TRUNC %0
; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; WAVE32-NEXT: %4:sreg_32_xm0_xexec = nofpexcept V_CMP_U_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec
; WAVE32-NEXT: S_ENDPGM 0, implicit %4
+ ; GFX11-LABEL: name: fcmp_uno_s16_vv
+ ; GFX11: liveins: $vgpr0, $vgpr1
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX11-NEXT: %4:sreg_32_xm0_xexec = nofpexcept V_CMP_U_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec
+ ; GFX11-NEXT: S_ENDPGM 0, implicit %4
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = COPY $vgpr1
%2:vgpr(s16) = G_TRUNC %0
; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; WAVE32-NEXT: %4:sreg_32_xm0_xexec = nofpexcept V_CMP_NLG_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec
; WAVE32-NEXT: S_ENDPGM 0, implicit %4
+ ; GFX11-LABEL: name: fcmp_ueq_s16_vv
+ ; GFX11: liveins: $vgpr0, $vgpr1
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX11-NEXT: %4:sreg_32_xm0_xexec = nofpexcept V_CMP_NLG_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec
+ ; GFX11-NEXT: S_ENDPGM 0, implicit %4
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = COPY $vgpr1
%2:vgpr(s16) = G_TRUNC %0
; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; WAVE32-NEXT: %4:sreg_32_xm0_xexec = nofpexcept V_CMP_NLE_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec
; WAVE32-NEXT: S_ENDPGM 0, implicit %4
+ ; GFX11-LABEL: name: fcmp_ugt_s16_vv
+ ; GFX11: liveins: $vgpr0, $vgpr1
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX11-NEXT: %4:sreg_32_xm0_xexec = nofpexcept V_CMP_NLE_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec
+ ; GFX11-NEXT: S_ENDPGM 0, implicit %4
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = COPY $vgpr1
%2:vgpr(s16) = G_TRUNC %0
; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; WAVE32-NEXT: %4:sreg_32_xm0_xexec = nofpexcept V_CMP_NLT_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec
; WAVE32-NEXT: S_ENDPGM 0, implicit %4
+ ; GFX11-LABEL: name: fcmp_uge_s16_vv
+ ; GFX11: liveins: $vgpr0, $vgpr1
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX11-NEXT: %4:sreg_32_xm0_xexec = nofpexcept V_CMP_NLT_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec
+ ; GFX11-NEXT: S_ENDPGM 0, implicit %4
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = COPY $vgpr1
%2:vgpr(s16) = G_TRUNC %0
; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; WAVE32-NEXT: %4:sreg_32_xm0_xexec = nofpexcept V_CMP_NGE_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec
; WAVE32-NEXT: S_ENDPGM 0, implicit %4
+ ; GFX11-LABEL: name: fcmp_ult_s16_vv
+ ; GFX11: liveins: $vgpr0, $vgpr1
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX11-NEXT: %4:sreg_32_xm0_xexec = nofpexcept V_CMP_NGE_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec
+ ; GFX11-NEXT: S_ENDPGM 0, implicit %4
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = COPY $vgpr1
%2:vgpr(s16) = G_TRUNC %0
; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; WAVE32-NEXT: %4:sreg_32_xm0_xexec = nofpexcept V_CMP_NGT_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec
; WAVE32-NEXT: S_ENDPGM 0, implicit %4
+ ; GFX11-LABEL: name: fcmp_ule_s16_vv
+ ; GFX11: liveins: $vgpr0, $vgpr1
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX11-NEXT: %4:sreg_32_xm0_xexec = nofpexcept V_CMP_NGT_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec
+ ; GFX11-NEXT: S_ENDPGM 0, implicit %4
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = COPY $vgpr1
%2:vgpr(s16) = G_TRUNC %0
; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; WAVE32-NEXT: %4:sreg_32_xm0_xexec = nofpexcept V_CMP_NEQ_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec
; WAVE32-NEXT: S_ENDPGM 0, implicit %4
+ ; GFX11-LABEL: name: fcmp_une_s16_vv
+ ; GFX11: liveins: $vgpr0, $vgpr1
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX11-NEXT: %4:sreg_32_xm0_xexec = nofpexcept V_CMP_NEQ_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec
+ ; GFX11-NEXT: S_ENDPGM 0, implicit %4
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = COPY $vgpr1
%2:vgpr(s16) = G_TRUNC %0
; WAVE32-NEXT: [[TRUNC1:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY1]](s32)
; WAVE32-NEXT: [[FCMP:%[0-9]+]]:vcc(s1) = G_FCMP floatpred(true), [[TRUNC]](s16), [[TRUNC1]]
; WAVE32-NEXT: S_ENDPGM 0, implicit [[FCMP]](s1)
+ ; GFX11-LABEL: name: fcmp_true_s16_vv
+ ; GFX11: liveins: $vgpr0, $vgpr1
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+ ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
+ ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32)
+ ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY1]](s32)
+ ; GFX11-NEXT: [[FCMP:%[0-9]+]]:vcc(s1) = G_FCMP floatpred(true), [[TRUNC]](s16), [[TRUNC1]]
+ ; GFX11-NEXT: S_ENDPGM 0, implicit [[FCMP]](s1)
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = COPY $vgpr1
%2:vgpr(s16) = G_TRUNC %0
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck %s
-# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck %s
-# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck %s
-# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck %s
+# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=CHECK %s
+# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=CHECK %s
+# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=CHECK %s
+# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX11 %s
---
name: fmaxnum_ieee_f16_vv
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK-NEXT: %4:vgpr_32 = nofpexcept V_MAX_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: S_ENDPGM 0, implicit %4
+ ; GFX11-LABEL: name: fmaxnum_ieee_f16_vv
+ ; GFX11: liveins: $vgpr0, $vgpr1
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX11-NEXT: %4:vgpr_32 = nofpexcept V_MAX_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec
+ ; GFX11-NEXT: S_ENDPGM 0, implicit %4
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = COPY $vgpr1
%2:vgpr(s16) = G_TRUNC %0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK-NEXT: %5:vgpr_32 = nofpexcept V_MAX_F16_e64 0, [[COPY]], 1, [[COPY1]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: S_ENDPGM 0, implicit %5
+ ; GFX11-LABEL: name: fmaxnum_ieee_f16_v_fneg_v
+ ; GFX11: liveins: $vgpr0, $vgpr1
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX11-NEXT: %5:vgpr_32 = nofpexcept V_MAX_F16_e64 0, [[COPY]], 1, [[COPY1]], 0, 0, implicit $mode, implicit $exec
+ ; GFX11-NEXT: S_ENDPGM 0, implicit %5
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = COPY $vgpr1
%2:vgpr(s16) = G_TRUNC %0
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck %s
-# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck %s
-# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck %s
-# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck %s
+# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=CHECK %s
+# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=CHECK %s
+# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=CHECK %s
+# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX11 %s
---
name: fmaxnum_f16_vv
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK-NEXT: %4:vgpr_32 = nofpexcept V_MAX_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: S_ENDPGM 0, implicit %4
+ ; GFX11-LABEL: name: fmaxnum_f16_vv
+ ; GFX11: liveins: $vgpr0, $vgpr1
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX11-NEXT: %4:vgpr_32 = nofpexcept V_MAX_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec
+ ; GFX11-NEXT: S_ENDPGM 0, implicit %4
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = COPY $vgpr1
%2:vgpr(s16) = G_TRUNC %0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK-NEXT: %5:vgpr_32 = nofpexcept V_MAX_F16_e64 0, [[COPY]], 1, [[COPY1]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: S_ENDPGM 0, implicit %5
+ ; GFX11-LABEL: name: fmaxnum_f16_v_fneg_v
+ ; GFX11: liveins: $vgpr0, $vgpr1
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX11-NEXT: %5:vgpr_32 = nofpexcept V_MAX_F16_e64 0, [[COPY]], 1, [[COPY1]], 0, 0, implicit $mode, implicit $exec
+ ; GFX11-NEXT: S_ENDPGM 0, implicit %5
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = COPY $vgpr1
%2:vgpr(s16) = G_TRUNC %0
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck %s
-# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck %s
-# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck %s
-# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck %s
+# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=CHECK %s
+# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=CHECK %s
+# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=CHECK %s
+# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX11 %s
---
name: fminnum_ieee_f16_vv
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK-NEXT: %4:vgpr_32 = nofpexcept V_MIN_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: S_ENDPGM 0, implicit %4
+ ; GFX11-LABEL: name: fminnum_ieee_f16_vv
+ ; GFX11: liveins: $vgpr0, $vgpr1
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX11-NEXT: %4:vgpr_32 = nofpexcept V_MIN_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec
+ ; GFX11-NEXT: S_ENDPGM 0, implicit %4
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = COPY $vgpr1
%2:vgpr(s16) = G_TRUNC %0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK-NEXT: %5:vgpr_32 = nofpexcept V_MIN_F16_e64 0, [[COPY]], 1, [[COPY1]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: S_ENDPGM 0, implicit %5
+ ; GFX11-LABEL: name: fminnum_ieee_f16_v_fneg_v
+ ; GFX11: liveins: $vgpr0, $vgpr1
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX11-NEXT: %5:vgpr_32 = nofpexcept V_MIN_F16_e64 0, [[COPY]], 1, [[COPY1]], 0, 0, implicit $mode, implicit $exec
+ ; GFX11-NEXT: S_ENDPGM 0, implicit %5
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = COPY $vgpr1
%2:vgpr(s16) = G_TRUNC %0
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck %s
-# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck %s
-# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck %s
-# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck %s
+# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=CHECK %s
+# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=CHECK %s
+# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=CHECK %s
+# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX11 %s
---
name: fminnum_f16_vv
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK-NEXT: %4:vgpr_32 = nofpexcept V_MIN_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: S_ENDPGM 0, implicit %4
+ ; GFX11-LABEL: name: fminnum_f16_vv
+ ; GFX11: liveins: $vgpr0, $vgpr1
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX11-NEXT: %4:vgpr_32 = nofpexcept V_MIN_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec
+ ; GFX11-NEXT: S_ENDPGM 0, implicit %4
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = COPY $vgpr1
%2:vgpr(s16) = G_TRUNC %0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK-NEXT: %5:vgpr_32 = nofpexcept V_MIN_F16_e64 0, [[COPY]], 1, [[COPY1]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: S_ENDPGM 0, implicit %5
+ ; GFX11-LABEL: name: fminnum_f16_v_fneg_v
+ ; GFX11: liveins: $vgpr0, $vgpr1
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX11-NEXT: %5:vgpr_32 = nofpexcept V_MIN_F16_e64 0, [[COPY]], 1, [[COPY1]], 0, 0, implicit $mode, implicit $exec
+ ; GFX11-NEXT: S_ENDPGM 0, implicit %5
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = COPY $vgpr1
%2:vgpr(s16) = G_TRUNC %0
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=WAVE64 %s
# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=WAVE32 %s
-# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=WAVE32 %s
+# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX11 %s
---
; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; WAVE32-NEXT: [[V_CMP_EQ_U16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U16_e64 [[COPY]], [[COPY1]], implicit $exec
; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_U16_e64_]]
+ ; GFX11-LABEL: name: icmp_eq_s16_sv
+ ; GFX11: liveins: $sgpr0, $vgpr0
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+ ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX11-NEXT: [[V_CMP_EQ_U16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U16_e64 [[COPY]], [[COPY1]], implicit $exec
+ ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_U16_e64_]]
%0:sgpr(s32) = COPY $sgpr0
%1:vgpr(s32) = COPY $vgpr0
%2:sgpr(s16) = G_TRUNC %0
; WAVE32-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0
; WAVE32-NEXT: [[V_CMP_EQ_U16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U16_e64 [[COPY]], [[COPY1]], implicit $exec
; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_U16_e64_]]
+ ; GFX11-LABEL: name: icmp_eq_s16_vs
+ ; GFX11: liveins: $sgpr0, $vgpr0
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX11-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0
+ ; GFX11-NEXT: [[V_CMP_EQ_U16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U16_e64 [[COPY]], [[COPY1]], implicit $exec
+ ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_U16_e64_]]
%0:vgpr(s32) = COPY $vgpr0
%1:sgpr(s32) = COPY $sgpr0
%2:vgpr(s16) = G_TRUNC %0
; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; WAVE32-NEXT: [[V_CMP_EQ_U16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U16_e64 [[COPY]], [[COPY1]], implicit $exec
; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_U16_e64_]]
+ ; GFX11-LABEL: name: icmp_eq_s16_vv
+ ; GFX11: liveins: $vgpr0, $vgpr1
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX11-NEXT: [[V_CMP_EQ_U16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U16_e64 [[COPY]], [[COPY1]], implicit $exec
+ ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_U16_e64_]]
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = COPY $vgpr1
%2:vgpr(s16) = G_TRUNC %0
; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; WAVE32-NEXT: [[V_CMP_NE_U16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_NE_U16_e64 [[COPY]], [[COPY1]], implicit $exec
; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_NE_U16_e64_]]
+ ; GFX11-LABEL: name: icmp_ne_s16_vv
+ ; GFX11: liveins: $vgpr0, $vgpr1
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX11-NEXT: [[V_CMP_NE_U16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_NE_U16_e64 [[COPY]], [[COPY1]], implicit $exec
+ ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_NE_U16_e64_]]
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = COPY $vgpr1
%2:vgpr(s16) = G_TRUNC %0
; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; WAVE32-NEXT: [[V_CMP_LT_I16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_LT_I16_e64 [[COPY]], [[COPY1]], implicit $exec
; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_LT_I16_e64_]]
+ ; GFX11-LABEL: name: icmp_slt_s16_vv
+ ; GFX11: liveins: $vgpr0, $vgpr1
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX11-NEXT: [[V_CMP_LT_I16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_LT_I16_e64 [[COPY]], [[COPY1]], implicit $exec
+ ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_LT_I16_e64_]]
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = COPY $vgpr1
%2:vgpr(s16) = G_TRUNC %0
; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; WAVE32-NEXT: [[V_CMP_LE_I16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_LE_I16_e64 [[COPY]], [[COPY1]], implicit $exec
; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_LE_I16_e64_]]
+ ; GFX11-LABEL: name: icmp_sle_s16_vv
+ ; GFX11: liveins: $vgpr0, $vgpr1
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX11-NEXT: [[V_CMP_LE_I16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_LE_I16_e64 [[COPY]], [[COPY1]], implicit $exec
+ ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_LE_I16_e64_]]
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = COPY $vgpr1
%2:vgpr(s16) = G_TRUNC %0
; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; WAVE32-NEXT: [[V_CMP_LT_U16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_LT_U16_e64 [[COPY]], [[COPY1]], implicit $exec
; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_LT_U16_e64_]]
+ ; GFX11-LABEL: name: icmp_ult_s16_vv
+ ; GFX11: liveins: $vgpr0, $vgpr1
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX11-NEXT: [[V_CMP_LT_U16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_LT_U16_e64 [[COPY]], [[COPY1]], implicit $exec
+ ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_LT_U16_e64_]]
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = COPY $vgpr1
%2:vgpr(s16) = G_TRUNC %0
; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; WAVE32-NEXT: [[V_CMP_LE_U16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_LE_U16_e64 [[COPY]], [[COPY1]], implicit $exec
; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_LE_U16_e64_]]
+ ; GFX11-LABEL: name: icmp_ule_s16_vv
+ ; GFX11: liveins: $vgpr0, $vgpr1
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX11-NEXT: [[V_CMP_LE_U16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_LE_U16_e64 [[COPY]], [[COPY1]], implicit $exec
+ ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_LE_U16_e64_]]
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = COPY $vgpr1
%2:vgpr(s16) = G_TRUNC %0
# RUN: FileCheck --check-prefix=ERR %s < %t
# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -global-isel-abort=2 -disable-gisel-legality-check -pass-remarks-missed='gisel*' -verify-machineinstrs %s -o - 2>%t | FileCheck -check-prefix=GFX10 %s
-# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -global-isel-abort=2 -disable-gisel-legality-check -pass-remarks-missed='gisel*' -verify-machineinstrs %s -o - 2>%t | FileCheck -check-prefix=GFX10 %s
+# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -global-isel-abort=2 -disable-gisel-legality-check -pass-remarks-missed='gisel*' -verify-machineinstrs %s -o - 2>%t | FileCheck -check-prefix=GFX11 %s
# RUN: FileCheck --check-prefix=ERR %s < %t
# ERR-NOT: remark
; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY1]](s32)
; GFX10-NEXT: [[LSHR:%[0-9]+]]:sgpr(s16) = G_LSHR [[TRUNC]], [[TRUNC1]](s16)
; GFX10-NEXT: S_ENDPGM 0, implicit [[LSHR]](s16)
+ ; GFX11-LABEL: name: lshr_s16_s16_ss
+ ; GFX11: liveins: $sgpr0, $sgpr1
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+ ; GFX11-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
+ ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32)
+ ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY1]](s32)
+ ; GFX11-NEXT: [[LSHR:%[0-9]+]]:sgpr(s16) = G_LSHR [[TRUNC]], [[TRUNC1]](s16)
+ ; GFX11-NEXT: S_ENDPGM 0, implicit [[LSHR]](s16)
%0:sgpr(s32) = COPY $sgpr0
%1:sgpr(s32) = COPY $sgpr1
%2:sgpr(s16) = G_TRUNC %0
; GFX10-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0
; GFX10-NEXT: [[V_LSHRREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B16_e64 [[COPY1]], [[COPY]], implicit $exec
; GFX10-NEXT: S_ENDPGM 0, implicit [[V_LSHRREV_B16_e64_]]
+ ; GFX11-LABEL: name: lshr_s16_s16_vs
+ ; GFX11: liveins: $sgpr0, $vgpr0
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX11-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0
+ ; GFX11-NEXT: [[V_LSHRREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B16_e64 [[COPY1]], [[COPY]], implicit $exec
+ ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_LSHRREV_B16_e64_]]
%0:vgpr(s32) = COPY $vgpr0
%1:sgpr(s32) = COPY $sgpr0
%2:vgpr(s16) = G_TRUNC %0
; GFX10-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32)
; GFX10-NEXT: [[LSHR:%[0-9]+]]:vgpr(s16) = G_LSHR [[TRUNC]], [[COPY1]](s32)
; GFX10-NEXT: S_ENDPGM 0, implicit [[LSHR]](s16)
+ ; GFX11-LABEL: name: lshr_s16_s32_vv
+ ; GFX11: liveins: $vgpr0, $vgpr1
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+ ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
+ ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32)
+ ; GFX11-NEXT: [[LSHR:%[0-9]+]]:vgpr(s16) = G_LSHR [[TRUNC]], [[COPY1]](s32)
+ ; GFX11-NEXT: S_ENDPGM 0, implicit [[LSHR]](s16)
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = COPY $vgpr1
%2:vgpr(s16) = G_TRUNC %0
; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX10-NEXT: [[V_LSHRREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B16_e64 [[COPY1]], [[COPY]], implicit $exec
; GFX10-NEXT: S_ENDPGM 0, implicit [[V_LSHRREV_B16_e64_]]
+ ; GFX11-LABEL: name: lshr_s16_s16_vv
+ ; GFX11: liveins: $vgpr0, $vgpr1
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX11-NEXT: [[V_LSHRREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B16_e64 [[COPY1]], [[COPY]], implicit $exec
+ ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_LSHRREV_B16_e64_]]
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = COPY $vgpr1
%2:vgpr(s16) = G_TRUNC %0
; GFX10-NEXT: [[V_LSHRREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B16_e64 [[COPY1]], [[COPY]], implicit $exec
; GFX10-NEXT: [[V_BFE_U32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_U32_e64 [[V_LSHRREV_B16_e64_]], 0, 16, implicit $exec
; GFX10-NEXT: S_ENDPGM 0, implicit [[V_BFE_U32_e64_]]
+ ; GFX11-LABEL: name: lshr_s16_s16_vv_zext_to_s32
+ ; GFX11: liveins: $vgpr0, $vgpr1
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX11-NEXT: [[V_LSHRREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B16_e64 [[COPY1]], [[COPY]], implicit $exec
+ ; GFX11-NEXT: [[V_BFE_U32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_U32_e64 [[V_LSHRREV_B16_e64_]], 0, 16, implicit $exec
+ ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_BFE_U32_e64_]]
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = COPY $vgpr1
%2:vgpr(s16) = G_TRUNC %0
; GFX10-NEXT: [[LSHR:%[0-9]+]]:vgpr(s16) = G_LSHR [[TRUNC]], [[TRUNC1]](s16)
; GFX10-NEXT: [[ZEXT:%[0-9]+]]:vgpr(s64) = G_ZEXT [[LSHR]](s16)
; GFX10-NEXT: S_ENDPGM 0, implicit [[ZEXT]](s64)
+ ; GFX11-LABEL: name: lshr_s16_vv_zext_to_s64
+ ; GFX11: liveins: $vgpr0, $vgpr1
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+ ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
+ ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32)
+ ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY1]](s32)
+ ; GFX11-NEXT: [[LSHR:%[0-9]+]]:vgpr(s16) = G_LSHR [[TRUNC]], [[TRUNC1]](s16)
+ ; GFX11-NEXT: [[ZEXT:%[0-9]+]]:vgpr(s64) = G_ZEXT [[LSHR]](s16)
+ ; GFX11-NEXT: S_ENDPGM 0, implicit [[ZEXT]](s64)
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = COPY $vgpr1
%2:vgpr(s16) = G_TRUNC %0
; GFX10-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32)
; GFX10-NEXT: [[LSHR:%[0-9]+]]:sgpr(s16) = G_LSHR [[TRUNC]], [[COPY1]](s32)
; GFX10-NEXT: S_ENDPGM 0, implicit [[LSHR]](s16)
+ ; GFX11-LABEL: name: lshr_s16_s32_ss
+ ; GFX11: liveins: $sgpr0, $sgpr1
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+ ; GFX11-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
+ ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32)
+ ; GFX11-NEXT: [[LSHR:%[0-9]+]]:sgpr(s16) = G_LSHR [[TRUNC]], [[COPY1]](s32)
+ ; GFX11-NEXT: S_ENDPGM 0, implicit [[LSHR]](s16)
%0:sgpr(s32) = COPY $sgpr0
%1:sgpr(s32) = COPY $sgpr1
%2:sgpr(s16) = G_TRUNC %0
; GFX10-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32)
; GFX10-NEXT: [[LSHR:%[0-9]+]]:vgpr(s16) = G_LSHR [[TRUNC]], [[COPY1]](s32)
; GFX10-NEXT: S_ENDPGM 0, implicit [[LSHR]](s16)
+ ; GFX11-LABEL: name: lshr_s16_s32_sv
+ ; GFX11: liveins: $sgpr0, $vgpr0
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+ ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+ ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32)
+ ; GFX11-NEXT: [[LSHR:%[0-9]+]]:vgpr(s16) = G_LSHR [[TRUNC]], [[COPY1]](s32)
+ ; GFX11-NEXT: S_ENDPGM 0, implicit [[LSHR]](s16)
%0:sgpr(s32) = COPY $sgpr0
%1:vgpr(s32) = COPY $vgpr0
%2:sgpr(s16) = G_TRUNC %0
; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX10-NEXT: [[V_LSHRREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B16_e64 [[COPY1]], [[COPY]], implicit $exec
; GFX10-NEXT: S_ENDPGM 0, implicit [[V_LSHRREV_B16_e64_]]
+ ; GFX11-LABEL: name: lshr_s16_s16_sv
+ ; GFX11: liveins: $sgpr0, $vgpr0
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+ ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX11-NEXT: [[V_LSHRREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B16_e64 [[COPY1]], [[COPY]], implicit $exec
+ ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_LSHRREV_B16_e64_]]
%0:sgpr(s32) = COPY $sgpr0
%1:vgpr(s32) = COPY $vgpr0
%2:sgpr(s16) = G_TRUNC %0
; GFX10-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32)
; GFX10-NEXT: [[LSHR:%[0-9]+]]:vgpr(s16) = G_LSHR [[TRUNC]], [[COPY1]](s32)
; GFX10-NEXT: S_ENDPGM 0, implicit [[LSHR]](s16)
+ ; GFX11-LABEL: name: lshr_s16_s32_vs
+ ; GFX11: liveins: $sgpr0, $vgpr0
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+ ; GFX11-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+ ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32)
+ ; GFX11-NEXT: [[LSHR:%[0-9]+]]:vgpr(s16) = G_LSHR [[TRUNC]], [[COPY1]](s32)
+ ; GFX11-NEXT: S_ENDPGM 0, implicit [[LSHR]](s16)
%0:vgpr(s32) = COPY $vgpr0
%1:sgpr(s32) = COPY $sgpr0
%2:vgpr(s16) = G_TRUNC %0
# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GFX8 %s
# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GFX9 %s
# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GFX9 %s
-# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GFX9 %s
+# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GFX11 %s
---
name: smed3_s16_vvv
; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GFX9-NEXT: [[V_MED3_I16_e64_:%[0-9]+]]:vgpr_32 = V_MED3_I16_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec
; GFX9-NEXT: S_ENDPGM 0, implicit [[V_MED3_I16_e64_]]
+ ; GFX11-LABEL: name: smed3_s16_vvv
+ ; GFX11: liveins: $vgpr0, $vgpr1, $vgpr2
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; GFX11-NEXT: [[V_MED3_I16_e64_:%[0-9]+]]:vgpr_32 = V_MED3_I16_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec
+ ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_MED3_I16_e64_]]
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = COPY $vgpr1
%2:vgpr(s32) = COPY $vgpr2
; GFX9-NEXT: [[V_MAX_I16_e64_:%[0-9]+]]:vgpr_32 = V_MAX_I16_e64 [[COPY]], [[COPY1]], implicit $exec
; GFX9-NEXT: [[V_MED3_I16_e64_:%[0-9]+]]:vgpr_32 = V_MED3_I16_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec
; GFX9-NEXT: S_ENDPGM 0, implicit [[V_MED3_I16_e64_]], implicit [[V_MAX_I16_e64_]]
+ ; GFX11-LABEL: name: smed3_s16_vvv_multiuse0
+ ; GFX11: liveins: $vgpr0, $vgpr1, $vgpr2
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; GFX11-NEXT: [[V_MAX_I16_e64_:%[0-9]+]]:vgpr_32 = V_MAX_I16_e64 [[COPY]], [[COPY1]], implicit $exec
+ ; GFX11-NEXT: [[V_MED3_I16_e64_:%[0-9]+]]:vgpr_32 = V_MED3_I16_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec
+ ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_MED3_I16_e64_]], implicit [[V_MAX_I16_e64_]]
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = COPY $vgpr1
%2:vgpr(s32) = COPY $vgpr2
; GFX9-NEXT: [[V_MIN_I16_e64_:%[0-9]+]]:vgpr_32 = V_MIN_I16_e64 [[COPY]], [[COPY1]], implicit $exec
; GFX9-NEXT: [[V_MED3_I16_e64_:%[0-9]+]]:vgpr_32 = V_MED3_I16_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec
; GFX9-NEXT: S_ENDPGM 0, implicit [[V_MED3_I16_e64_]], implicit [[V_MIN_I16_e64_]]
+ ; GFX11-LABEL: name: smed3_s16_vvv_multiuse1
+ ; GFX11: liveins: $vgpr0, $vgpr1, $vgpr2
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; GFX11-NEXT: [[V_MIN_I16_e64_:%[0-9]+]]:vgpr_32 = V_MIN_I16_e64 [[COPY]], [[COPY1]], implicit $exec
+ ; GFX11-NEXT: [[V_MED3_I16_e64_:%[0-9]+]]:vgpr_32 = V_MED3_I16_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec
+ ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_MED3_I16_e64_]], implicit [[V_MIN_I16_e64_]]
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = COPY $vgpr1
%2:vgpr(s32) = COPY $vgpr2
; GFX9-NEXT: [[V_MAX_I16_e64_:%[0-9]+]]:vgpr_32 = V_MAX_I16_e64 [[V_MIN_I16_e64_]], [[COPY2]], implicit $exec
; GFX9-NEXT: [[V_MED3_I16_e64_:%[0-9]+]]:vgpr_32 = V_MED3_I16_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec
; GFX9-NEXT: S_ENDPGM 0, implicit [[V_MED3_I16_e64_]], implicit [[V_MAX_I16_e64_]]
+ ; GFX11-LABEL: name: smed3_s16_vvv_multiuse2
+ ; GFX11: liveins: $vgpr0, $vgpr1, $vgpr2
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; GFX11-NEXT: [[V_MIN_I16_e64_:%[0-9]+]]:vgpr_32 = V_MIN_I16_e64 [[COPY]], [[COPY1]], implicit $exec
+ ; GFX11-NEXT: [[V_MAX_I16_e64_:%[0-9]+]]:vgpr_32 = V_MAX_I16_e64 [[V_MIN_I16_e64_]], [[COPY2]], implicit $exec
+ ; GFX11-NEXT: [[V_MED3_I16_e64_:%[0-9]+]]:vgpr_32 = V_MED3_I16_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec
+ ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_MED3_I16_e64_]], implicit [[V_MAX_I16_e64_]]
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = COPY $vgpr1
%2:vgpr(s32) = COPY $vgpr2
# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GFX8 %s
# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GFX9 %s
# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GFX9 %s
-# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GFX9 %s
+# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GFX11 %s
---
name: umed3_s16_vvv
; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GFX9-NEXT: [[V_MED3_U16_e64_:%[0-9]+]]:vgpr_32 = V_MED3_U16_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec
; GFX9-NEXT: S_ENDPGM 0, implicit [[V_MED3_U16_e64_]]
+ ; GFX11-LABEL: name: umed3_s16_vvv
+ ; GFX11: liveins: $vgpr0, $vgpr1, $vgpr2
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; GFX11-NEXT: [[V_MED3_U16_e64_:%[0-9]+]]:vgpr_32 = V_MED3_U16_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec
+ ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_MED3_U16_e64_]]
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = COPY $vgpr1
%2:vgpr(s32) = COPY $vgpr2
; GFX9-NEXT: [[V_MAX_U16_e64_:%[0-9]+]]:vgpr_32 = V_MAX_U16_e64 [[COPY]], [[COPY1]], implicit $exec
; GFX9-NEXT: [[V_MED3_U16_e64_:%[0-9]+]]:vgpr_32 = V_MED3_U16_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec
; GFX9-NEXT: S_ENDPGM 0, implicit [[V_MED3_U16_e64_]], implicit [[V_MAX_U16_e64_]]
+ ; GFX11-LABEL: name: umed3_s16_vvv_multiuse0
+ ; GFX11: liveins: $vgpr0, $vgpr1, $vgpr2
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; GFX11-NEXT: [[V_MAX_U16_e64_:%[0-9]+]]:vgpr_32 = V_MAX_U16_e64 [[COPY]], [[COPY1]], implicit $exec
+ ; GFX11-NEXT: [[V_MED3_U16_e64_:%[0-9]+]]:vgpr_32 = V_MED3_U16_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec
+ ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_MED3_U16_e64_]], implicit [[V_MAX_U16_e64_]]
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = COPY $vgpr1
%2:vgpr(s32) = COPY $vgpr2
; GFX9-NEXT: [[V_MIN_U16_e64_:%[0-9]+]]:vgpr_32 = V_MIN_U16_e64 [[COPY]], [[COPY1]], implicit $exec
; GFX9-NEXT: [[V_MED3_U16_e64_:%[0-9]+]]:vgpr_32 = V_MED3_U16_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec
; GFX9-NEXT: S_ENDPGM 0, implicit [[V_MED3_U16_e64_]], implicit [[V_MIN_U16_e64_]]
+ ; GFX11-LABEL: name: umed3_s16_vvv_multiuse1
+ ; GFX11: liveins: $vgpr0, $vgpr1, $vgpr2
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; GFX11-NEXT: [[V_MIN_U16_e64_:%[0-9]+]]:vgpr_32 = V_MIN_U16_e64 [[COPY]], [[COPY1]], implicit $exec
+ ; GFX11-NEXT: [[V_MED3_U16_e64_:%[0-9]+]]:vgpr_32 = V_MED3_U16_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec
+ ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_MED3_U16_e64_]], implicit [[V_MIN_U16_e64_]]
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = COPY $vgpr1
%2:vgpr(s32) = COPY $vgpr2
; GFX9-NEXT: [[V_MAX_U16_e64_:%[0-9]+]]:vgpr_32 = V_MAX_U16_e64 [[V_MIN_U16_e64_]], [[COPY2]], implicit $exec
; GFX9-NEXT: [[V_MED3_U16_e64_:%[0-9]+]]:vgpr_32 = V_MED3_U16_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec
; GFX9-NEXT: S_ENDPGM 0, implicit [[V_MED3_U16_e64_]], implicit [[V_MAX_U16_e64_]]
+ ; GFX11-LABEL: name: umed3_s16_vvv_multiuse2
+ ; GFX11: liveins: $vgpr0, $vgpr1, $vgpr2
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; GFX11-NEXT: [[V_MIN_U16_e64_:%[0-9]+]]:vgpr_32 = V_MIN_U16_e64 [[COPY]], [[COPY1]], implicit $exec
+ ; GFX11-NEXT: [[V_MAX_U16_e64_:%[0-9]+]]:vgpr_32 = V_MAX_U16_e64 [[V_MIN_U16_e64_]], [[COPY2]], implicit $exec
+ ; GFX11-NEXT: [[V_MED3_U16_e64_:%[0-9]+]]:vgpr_32 = V_MED3_U16_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec
+ ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_MED3_U16_e64_]], implicit [[V_MAX_U16_e64_]]
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = COPY $vgpr1
%2:vgpr(s32) = COPY $vgpr2
# RUN: FileCheck --check-prefix=ERR %s < %t
# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -global-isel-abort=2 -disable-gisel-legality-check -pass-remarks-missed='gisel*' -verify-machineinstrs %s -o - 2>%t | FileCheck -check-prefix=GFX10 %s
-# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -global-isel-abort=2 -disable-gisel-legality-check -pass-remarks-missed='gisel*' -verify-machineinstrs %s -o - 2>%t | FileCheck -check-prefix=GFX10 %s
+# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -global-isel-abort=2 -disable-gisel-legality-check -pass-remarks-missed='gisel*' -verify-machineinstrs %s -o - 2>%t | FileCheck -check-prefix=GFX11 %s
# RUN: FileCheck --check-prefix=ERR %s < %t
# ERR-NOT: remark
; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY1]](s32)
; GFX10-NEXT: [[SHL:%[0-9]+]]:sgpr(s16) = G_SHL [[TRUNC]], [[TRUNC1]](s16)
; GFX10-NEXT: S_ENDPGM 0, implicit [[SHL]](s16)
+ ; GFX11-LABEL: name: shl_s16_s16_ss
+ ; GFX11: liveins: $sgpr0, $sgpr1
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+ ; GFX11-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
+ ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32)
+ ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY1]](s32)
+ ; GFX11-NEXT: [[SHL:%[0-9]+]]:sgpr(s16) = G_SHL [[TRUNC]], [[TRUNC1]](s16)
+ ; GFX11-NEXT: S_ENDPGM 0, implicit [[SHL]](s16)
%0:sgpr(s32) = COPY $sgpr0
%1:sgpr(s32) = COPY $sgpr1
%2:sgpr(s16) = G_TRUNC %0
; GFX10-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0
; GFX10-NEXT: [[V_LSHLREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B16_e64 [[COPY1]], [[COPY]], implicit $exec
; GFX10-NEXT: S_ENDPGM 0, implicit [[V_LSHLREV_B16_e64_]]
+ ; GFX11-LABEL: name: shl_s16_s16_vs
+ ; GFX11: liveins: $sgpr0, $vgpr0
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX11-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0
+ ; GFX11-NEXT: [[V_LSHLREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B16_e64 [[COPY1]], [[COPY]], implicit $exec
+ ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_LSHLREV_B16_e64_]]
%0:vgpr(s32) = COPY $vgpr0
%1:sgpr(s32) = COPY $sgpr0
%2:vgpr(s16) = G_TRUNC %0
; GFX10-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32)
; GFX10-NEXT: [[SHL:%[0-9]+]]:vgpr(s16) = G_SHL [[TRUNC]], [[COPY1]](s32)
; GFX10-NEXT: S_ENDPGM 0, implicit [[SHL]](s16)
+ ; GFX11-LABEL: name: shl_s16_s32_vv
+ ; GFX11: liveins: $vgpr0, $vgpr1
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+ ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
+ ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32)
+ ; GFX11-NEXT: [[SHL:%[0-9]+]]:vgpr(s16) = G_SHL [[TRUNC]], [[COPY1]](s32)
+ ; GFX11-NEXT: S_ENDPGM 0, implicit [[SHL]](s16)
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = COPY $vgpr1
%2:vgpr(s16) = G_TRUNC %0
; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX10-NEXT: [[V_LSHLREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B16_e64 [[COPY1]], [[COPY]], implicit $exec
; GFX10-NEXT: S_ENDPGM 0, implicit [[V_LSHLREV_B16_e64_]]
+ ; GFX11-LABEL: name: shl_s16_s16_vv
+ ; GFX11: liveins: $vgpr0, $vgpr1
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX11-NEXT: [[V_LSHLREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B16_e64 [[COPY1]], [[COPY]], implicit $exec
+ ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_LSHLREV_B16_e64_]]
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = COPY $vgpr1
%2:vgpr(s16) = G_TRUNC %0
; GFX10-NEXT: [[V_LSHLREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B16_e64 [[COPY1]], [[COPY]], implicit $exec
; GFX10-NEXT: [[V_BFE_U32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_U32_e64 [[V_LSHLREV_B16_e64_]], 0, 16, implicit $exec
; GFX10-NEXT: S_ENDPGM 0, implicit [[V_BFE_U32_e64_]]
+ ; GFX11-LABEL: name: shl_s16_s16_vv_zext_to_s32
+ ; GFX11: liveins: $vgpr0, $vgpr1
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX11-NEXT: [[V_LSHLREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B16_e64 [[COPY1]], [[COPY]], implicit $exec
+ ; GFX11-NEXT: [[V_BFE_U32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_U32_e64 [[V_LSHLREV_B16_e64_]], 0, 16, implicit $exec
+ ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_BFE_U32_e64_]]
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = COPY $vgpr1
%2:vgpr(s16) = G_TRUNC %0
; GFX10-NEXT: [[SHL:%[0-9]+]]:vgpr(s16) = G_SHL [[TRUNC]], [[TRUNC1]](s16)
; GFX10-NEXT: [[ZEXT:%[0-9]+]]:vgpr(s64) = G_ZEXT [[SHL]](s16)
; GFX10-NEXT: S_ENDPGM 0, implicit [[ZEXT]](s64)
+ ; GFX11-LABEL: name: shl_s16_vv_zext_to_s64
+ ; GFX11: liveins: $vgpr0, $vgpr1
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+ ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
+ ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32)
+ ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY1]](s32)
+ ; GFX11-NEXT: [[SHL:%[0-9]+]]:vgpr(s16) = G_SHL [[TRUNC]], [[TRUNC1]](s16)
+ ; GFX11-NEXT: [[ZEXT:%[0-9]+]]:vgpr(s64) = G_ZEXT [[SHL]](s16)
+ ; GFX11-NEXT: S_ENDPGM 0, implicit [[ZEXT]](s64)
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = COPY $vgpr1
%2:vgpr(s16) = G_TRUNC %0
; GFX10-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32)
; GFX10-NEXT: [[SHL:%[0-9]+]]:sgpr(s16) = G_SHL [[TRUNC]], [[COPY1]](s32)
; GFX10-NEXT: S_ENDPGM 0, implicit [[SHL]](s16)
+ ; GFX11-LABEL: name: shl_s16_s32_ss
+ ; GFX11: liveins: $sgpr0, $sgpr1
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+ ; GFX11-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
+ ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32)
+ ; GFX11-NEXT: [[SHL:%[0-9]+]]:sgpr(s16) = G_SHL [[TRUNC]], [[COPY1]](s32)
+ ; GFX11-NEXT: S_ENDPGM 0, implicit [[SHL]](s16)
%0:sgpr(s32) = COPY $sgpr0
%1:sgpr(s32) = COPY $sgpr1
%2:sgpr(s16) = G_TRUNC %0
; GFX10-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32)
; GFX10-NEXT: [[SHL:%[0-9]+]]:vgpr(s16) = G_SHL [[TRUNC]], [[COPY1]](s32)
; GFX10-NEXT: S_ENDPGM 0, implicit [[SHL]](s16)
+ ; GFX11-LABEL: name: shl_s16_s32_sv
+ ; GFX11: liveins: $sgpr0, $vgpr0
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+ ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+ ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32)
+ ; GFX11-NEXT: [[SHL:%[0-9]+]]:vgpr(s16) = G_SHL [[TRUNC]], [[COPY1]](s32)
+ ; GFX11-NEXT: S_ENDPGM 0, implicit [[SHL]](s16)
%0:sgpr(s32) = COPY $sgpr0
%1:vgpr(s32) = COPY $vgpr0
%2:sgpr(s16) = G_TRUNC %0
; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX10-NEXT: [[V_LSHLREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B16_e64 [[COPY1]], [[COPY]], implicit $exec
; GFX10-NEXT: S_ENDPGM 0, implicit [[V_LSHLREV_B16_e64_]]
+ ; GFX11-LABEL: name: shl_s16_s16_sv
+ ; GFX11: liveins: $sgpr0, $vgpr0
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+ ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX11-NEXT: [[V_LSHLREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B16_e64 [[COPY1]], [[COPY]], implicit $exec
+ ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_LSHLREV_B16_e64_]]
%0:sgpr(s32) = COPY $sgpr0
%1:vgpr(s32) = COPY $vgpr0
%2:sgpr(s16) = G_TRUNC %0
; GFX10-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32)
; GFX10-NEXT: [[SHL:%[0-9]+]]:vgpr(s16) = G_SHL [[TRUNC]], [[COPY1]](s32)
; GFX10-NEXT: S_ENDPGM 0, implicit [[SHL]](s16)
+ ; GFX11-LABEL: name: shl_s16_s32_vs
+ ; GFX11: liveins: $sgpr0, $vgpr0
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+ ; GFX11-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+ ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32)
+ ; GFX11-NEXT: [[SHL:%[0-9]+]]:vgpr(s16) = G_SHL [[TRUNC]], [[COPY1]](s32)
+ ; GFX11-NEXT: S_ENDPGM 0, implicit [[SHL]](s16)
%0:vgpr(s32) = COPY $vgpr0
%1:sgpr(s32) = COPY $sgpr0
%2:vgpr(s16) = G_TRUNC %0
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=WAVE64 %s
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=WAVE32 %s
-# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=WAVE32 %s
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GFX11 %s
---
; WAVE32-NEXT: [[V_CVT_F32_I32_e64_1:%[0-9]+]]:vgpr_32 = V_CVT_F32_I32_e64 [[COPY1]], 0, 0, implicit $mode, implicit $exec
; WAVE32-NEXT: GLOBAL_STORE_DWORD [[COPY2]], [[V_CVT_F32_I32_e64_]], 0, 0, implicit $exec :: (store (s32), addrspace 1)
; WAVE32-NEXT: GLOBAL_STORE_DWORD [[COPY2]], [[V_CVT_F32_I32_e64_1]], 0, 0, implicit $exec :: (store (s32), addrspace 1)
+ ; GFX11-LABEL: name: sitofp
+ ; GFX11: liveins: $sgpr0, $vgpr0, $vgpr3_vgpr4
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+ ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr3_vgpr4
+ ; GFX11-NEXT: [[V_CVT_F32_I32_e64_:%[0-9]+]]:vgpr_32 = V_CVT_F32_I32_e64 [[COPY]], 0, 0, implicit $mode, implicit $exec
+ ; GFX11-NEXT: [[V_CVT_F32_I32_e64_1:%[0-9]+]]:vgpr_32 = V_CVT_F32_I32_e64 [[COPY1]], 0, 0, implicit $mode, implicit $exec
+ ; GFX11-NEXT: GLOBAL_STORE_DWORD [[COPY2]], [[V_CVT_F32_I32_e64_]], 0, 0, implicit $exec :: (store (s32), addrspace 1)
+ ; GFX11-NEXT: GLOBAL_STORE_DWORD [[COPY2]], [[V_CVT_F32_I32_e64_1]], 0, 0, implicit $exec :: (store (s32), addrspace 1)
%0:sgpr(s32) = COPY $sgpr0
%1:vgpr(s32) = COPY $vgpr0
; WAVE32-NEXT: [[V_CVT_F32_I32_e32_:%[0-9]+]]:vgpr_32 = V_CVT_F32_I32_e32 [[COPY]], implicit $mode, implicit $exec
; WAVE32-NEXT: %1:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 [[V_CVT_F32_I32_e32_]], implicit $mode, implicit $exec
; WAVE32-NEXT: $vgpr0 = COPY %1
+ ; GFX11-LABEL: name: sitofp_s32_to_s16_vv
+ ; GFX11: liveins: $vgpr0
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX11-NEXT: [[V_CVT_F32_I32_e32_:%[0-9]+]]:vgpr_32 = V_CVT_F32_I32_e32 [[COPY]], implicit $mode, implicit $exec
+ ; GFX11-NEXT: %1:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 [[V_CVT_F32_I32_e32_]], implicit $mode, implicit $exec
+ ; GFX11-NEXT: $vgpr0 = COPY %1
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s16) = G_SITOFP %0
%2:vgpr(s32) = G_ANYEXT %1
; WAVE32-NEXT: [[V_CVT_F32_I32_e32_:%[0-9]+]]:vgpr_32 = V_CVT_F32_I32_e32 [[COPY]], implicit $mode, implicit $exec
; WAVE32-NEXT: %1:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 [[V_CVT_F32_I32_e32_]], implicit $mode, implicit $exec
; WAVE32-NEXT: $vgpr0 = COPY %1
+ ; GFX11-LABEL: name: sitofp_s32_to_s16_vs
+ ; GFX11: liveins: $sgpr0
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+ ; GFX11-NEXT: [[V_CVT_F32_I32_e32_:%[0-9]+]]:vgpr_32 = V_CVT_F32_I32_e32 [[COPY]], implicit $mode, implicit $exec
+ ; GFX11-NEXT: %1:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 [[V_CVT_F32_I32_e32_]], implicit $mode, implicit $exec
+ ; GFX11-NEXT: $vgpr0 = COPY %1
%0:sgpr(s32) = COPY $sgpr0
%1:vgpr(s16) = G_SITOFP %0
%2:vgpr(s32) = G_ANYEXT %1
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -march=amdgcn -mcpu=hawaii -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=WAVE64 %s
# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=WAVE32 %s
-# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=WAVE32 %s
+# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GFX11 %s
---
name: uitofp_s32_to_s32_vv
; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; WAVE32-NEXT: [[V_CVT_F32_U32_e64_:%[0-9]+]]:vgpr_32 = V_CVT_F32_U32_e64 [[COPY]], 0, 0, implicit $mode, implicit $exec
; WAVE32-NEXT: $vgpr0 = COPY [[V_CVT_F32_U32_e64_]]
+ ; GFX11-LABEL: name: uitofp_s32_to_s32_vv
+ ; GFX11: liveins: $vgpr0
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX11-NEXT: [[V_CVT_F32_U32_e64_:%[0-9]+]]:vgpr_32 = V_CVT_F32_U32_e64 [[COPY]], 0, 0, implicit $mode, implicit $exec
+ ; GFX11-NEXT: $vgpr0 = COPY [[V_CVT_F32_U32_e64_]]
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = G_UITOFP %0
$vgpr0 = COPY %1
; WAVE32-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
; WAVE32-NEXT: [[V_CVT_F32_U32_e64_:%[0-9]+]]:vgpr_32 = V_CVT_F32_U32_e64 [[COPY]], 0, 0, implicit $mode, implicit $exec
; WAVE32-NEXT: $vgpr0 = COPY [[V_CVT_F32_U32_e64_]]
+ ; GFX11-LABEL: name: uitofp_s32_to_s32_vs
+ ; GFX11: liveins: $sgpr0
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+ ; GFX11-NEXT: [[V_CVT_F32_U32_e64_:%[0-9]+]]:vgpr_32 = V_CVT_F32_U32_e64 [[COPY]], 0, 0, implicit $mode, implicit $exec
+ ; GFX11-NEXT: $vgpr0 = COPY [[V_CVT_F32_U32_e64_]]
%0:sgpr(s32) = COPY $sgpr0
%1:vgpr(s32) = G_UITOFP %0
$vgpr0 = COPY %1
; WAVE32-NEXT: [[V_CVT_F32_U32_e32_:%[0-9]+]]:vgpr_32 = V_CVT_F32_U32_e32 [[COPY]], implicit $mode, implicit $exec
; WAVE32-NEXT: %1:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 [[V_CVT_F32_U32_e32_]], implicit $mode, implicit $exec
; WAVE32-NEXT: $vgpr0 = COPY %1
+ ; GFX11-LABEL: name: uitofp_s32_to_s16_vv
+ ; GFX11: liveins: $vgpr0
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX11-NEXT: [[V_CVT_F32_U32_e32_:%[0-9]+]]:vgpr_32 = V_CVT_F32_U32_e32 [[COPY]], implicit $mode, implicit $exec
+ ; GFX11-NEXT: %1:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 [[V_CVT_F32_U32_e32_]], implicit $mode, implicit $exec
+ ; GFX11-NEXT: $vgpr0 = COPY %1
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s16) = G_UITOFP %0
%2:vgpr(s32) = G_ANYEXT %1
; WAVE32-NEXT: [[V_CVT_F32_U32_e32_:%[0-9]+]]:vgpr_32 = V_CVT_F32_U32_e32 [[COPY]], implicit $mode, implicit $exec
; WAVE32-NEXT: %1:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 [[V_CVT_F32_U32_e32_]], implicit $mode, implicit $exec
; WAVE32-NEXT: $vgpr0 = COPY %1
+ ; GFX11-LABEL: name: uitofp_s32_to_s16_vs
+ ; GFX11: liveins: $sgpr0
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+ ; GFX11-NEXT: [[V_CVT_F32_U32_e32_:%[0-9]+]]:vgpr_32 = V_CVT_F32_U32_e32 [[COPY]], implicit $mode, implicit $exec
+ ; GFX11-NEXT: %1:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 [[V_CVT_F32_U32_e32_]], implicit $mode, implicit $exec
+ ; GFX11-NEXT: $vgpr0 = COPY %1
%0:sgpr(s32) = COPY $sgpr0
%1:vgpr(s16) = G_UITOFP %0
%2:vgpr(s32) = G_ANYEXT %1
; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9 %s
; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji < %s | FileCheck -check-prefixes=GCN,GFX8 %s
; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GFX10 %s
-; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX10 %s
+; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX11 %s
define half @v_constained_fma_f16_fpexcept_strict(half %x, half %y, half %z) #0 {
; GCN-LABEL: v_constained_fma_f16_fpexcept_strict:
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_fma_f16 v0, v0, v1, v2
; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: v_constained_fma_f16_fpexcept_strict:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX11-NEXT: v_fma_f16 v0, v0, v1, v2
+; GFX11-NEXT: s_setpc_b64 s[30:31]
%val = call half @llvm.experimental.constrained.fma.f16(half %x, half %y, half %z, metadata !"round.tonearest", metadata !"fpexcept.strict")
ret half %val
}
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_pk_fma_f16 v0, v0, v1, v2
; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: v_constained_fma_v2f16_fpexcept_strict:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX11-NEXT: v_pk_fma_f16 v0, v0, v1, v2
+; GFX11-NEXT: s_setpc_b64 s[30:31]
%val = call <2 x half> @llvm.experimental.constrained.fma.v2f16(<2 x half> %x, <2 x half> %y, <2 x half> %z, metadata !"round.tonearest", metadata !"fpexcept.strict")
ret <2 x half> %val
}
; GFX10-NEXT: v_pk_fma_f16 v0, v0, v2, v4
; GFX10-NEXT: v_fma_f16 v1, v1, v3, v5
; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: v_constained_fma_v3f16_fpexcept_strict:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX11-NEXT: v_pk_fma_f16 v0, v0, v2, v4
+; GFX11-NEXT: v_fma_f16 v1, v1, v3, v5
+; GFX11-NEXT: s_setpc_b64 s[30:31]
%val = call <3 x half> @llvm.experimental.constrained.fma.v3f16(<3 x half> %x, <3 x half> %y, <3 x half> %z, metadata !"round.tonearest", metadata !"fpexcept.strict")
ret <3 x half> %val
}
; GFX10-NEXT: v_lshl_or_b32 v0, v9, 16, v0
; GFX10-NEXT: v_lshl_or_b32 v1, v6, 16, v1
; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: v_constained_fma_v4f16_fpexcept_strict:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX11-NEXT: v_lshrrev_b32_e32 v6, 16, v5
+; GFX11-NEXT: v_lshrrev_b32_e32 v7, 16, v3
+; GFX11-NEXT: v_lshrrev_b32_e32 v8, 16, v1
+; GFX11-NEXT: v_lshrrev_b32_e32 v9, 16, v4
+; GFX11-NEXT: v_lshrrev_b32_e32 v10, 16, v2
+; GFX11-NEXT: v_lshrrev_b32_e32 v11, 16, v0
+; GFX11-NEXT: v_fmac_f16_e32 v4, v0, v2
+; GFX11-NEXT: v_fmac_f16_e32 v5, v1, v3
+; GFX11-NEXT: v_fmac_f16_e32 v6, v8, v7
+; GFX11-NEXT: v_fmac_f16_e32 v9, v11, v10
+; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v4
+; GFX11-NEXT: v_and_b32_e32 v1, 0xffff, v5
+; GFX11-NEXT: v_lshl_or_b32 v0, v9, 16, v0
+; GFX11-NEXT: v_lshl_or_b32 v1, v6, 16, v1
+; GFX11-NEXT: s_setpc_b64 s[30:31]
%val = call <4 x half> @llvm.experimental.constrained.fma.v4f16(<4 x half> %x, <4 x half> %y, <4 x half> %z, metadata !"round.tonearest", metadata !"fpexcept.strict")
ret <4 x half> %val
}
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_fma_f16 v0, v0, v1, -v2
; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: v_constained_fma_f16_fpexcept_strict_fneg:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX11-NEXT: v_fma_f16 v0, v0, v1, -v2
+; GFX11-NEXT: s_setpc_b64 s[30:31]
%neg.z = fneg half %z
%val = call half @llvm.experimental.constrained.fma.f16(half %x, half %y, half %neg.z, metadata !"round.tonearest", metadata !"fpexcept.strict")
ret half %val
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_fma_f16 v0, -v0, -v1, v2
; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: v_constained_fma_f16_fpexcept_strict_fneg_fneg:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX11-NEXT: v_fma_f16 v0, -v0, -v1, v2
+; GFX11-NEXT: s_setpc_b64 s[30:31]
%neg.x = fneg half %x
%neg.y = fneg half %y
%val = call half @llvm.experimental.constrained.fma.f16(half %neg.x, half %neg.y, half %z, metadata !"round.tonearest", metadata !"fpexcept.strict")
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_fma_f16 v0, |v0|, |v1|, v2
; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: v_constained_fma_f16_fpexcept_strict_fabs_fabs:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX11-NEXT: v_fma_f16 v0, |v0|, |v1|, v2
+; GFX11-NEXT: s_setpc_b64 s[30:31]
%neg.x = call half @llvm.fabs.f16(half %x)
%neg.y = call half @llvm.fabs.f16(half %y)
%val = call half @llvm.experimental.constrained.fma.f16(half %neg.x, half %neg.y, half %z, metadata !"round.tonearest", metadata !"fpexcept.strict")
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_pk_fma_f16 v0, v0, v1, v2 neg_lo:[1,1,0] neg_hi:[1,1,0]
; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: v_constained_fma_v2f16_fpexcept_strict_fneg_fneg:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX11-NEXT: v_pk_fma_f16 v0, v0, v1, v2 neg_lo:[1,1,0] neg_hi:[1,1,0]
+; GFX11-NEXT: s_setpc_b64 s[30:31]
%neg.x = fneg <2 x half> %x
%neg.y = fneg <2 x half> %y
%val = call <2 x half> @llvm.experimental.constrained.fma.v2f16(<2 x half> %neg.x, <2 x half> %neg.y, <2 x half> %z, metadata !"round.tonearest", metadata !"fpexcept.strict")