.narrowScalarFor({{S64, S16}}, changeTo(0, S32))
.scalarize(0);
- getActionDefinitionsBuilder(G_FSUB)
+ auto &FSubActions = getActionDefinitionsBuilder(G_FSUB);
+ if (ST.has16BitInsts()) {
+ FSubActions
+ // Use actual fsub instruction
+ .legalFor({S32, S16})
+ // Must use fadd + fneg
+ .lowerFor({S64, V2S16});
+ } else {
+ FSubActions
// Use actual fsub instruction
.legalFor({S32})
// Must use fadd + fneg
- .lowerFor({S64, S16, V2S16})
- .scalarize(0)
- .clampScalar(0, S32, S64);
+ .lowerFor({S64, S16, V2S16});
+ }
+
+ FSubActions
+ .scalarize(0)
+ .clampScalar(0, S32, S64);
// Whether this is legal depends on the floating point mode for the function.
auto &FMad = getActionDefinitionsBuilder(G_FMAD);
; GFX9: ; %bb.0: ; %.entry
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_mul_f16_e32 v0, v0, v1
-; GFX9-NEXT: v_add_f16_e64 v0, v0, -v2
+; GFX9-NEXT: v_sub_f16_e32 v0, v0, v2
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-CONTRACT-LABEL: test_half_sub_mul:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_mul_f16_e32 v0, v0, v1
-; GFX10-NEXT: v_add_f16_e64 v0, v0, -v2
+; GFX10-NEXT: v_sub_f16_e32 v0, v0, v2
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-CONTRACT-LABEL: test_half_sub_mul:
; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-DENORM-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-DENORM-NEXT: v_mul_f16_e32 v0, v0, v1
-; GFX10-DENORM-NEXT: v_add_f16_e64 v0, v0, -v2
+; GFX10-DENORM-NEXT: v_sub_f16_e32 v0, v0, v2
; GFX10-DENORM-NEXT: s_setpc_b64 s[30:31]
.entry:
%a = fmul half %x, %y
; GFX9-LABEL: test_half_sub_mul_rhs:
; GFX9: ; %bb.0: ; %.entry
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_mul_f16_e64 v0, v0, -v1
-; GFX9-NEXT: v_add_f16_e32 v0, v2, v0
+; GFX9-NEXT: v_mul_f16_e32 v0, v0, v1
+; GFX9-NEXT: v_sub_f16_e32 v0, v2, v0
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-CONTRACT-LABEL: test_half_sub_mul_rhs:
; GFX9-DENORM-LABEL: test_half_sub_mul_rhs:
; GFX9-DENORM: ; %bb.0: ; %.entry
; GFX9-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-DENORM-NEXT: v_mad_legacy_f16 v0, v0, -v1, v2
+; GFX9-DENORM-NEXT: v_mad_legacy_f16 v0, -v0, v1, v2
; GFX9-DENORM-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: test_half_sub_mul_rhs:
; GFX10: ; %bb.0: ; %.entry
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX10-NEXT: v_mul_f16_e64 v0, v0, -v1
-; GFX10-NEXT: v_add_f16_e32 v0, v2, v0
+; GFX10-NEXT: v_mul_f16_e32 v0, v0, v1
+; GFX10-NEXT: v_sub_f16_e32 v0, v2, v0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-CONTRACT-LABEL: test_half_sub_mul_rhs:
; GFX10-DENORM: ; %bb.0: ; %.entry
; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-DENORM-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX10-DENORM-NEXT: v_mul_f16_e64 v0, v0, -v1
-; GFX10-DENORM-NEXT: v_add_f16_e32 v0, v2, v0
+; GFX10-DENORM-NEXT: v_mul_f16_e32 v0, v0, v1
+; GFX10-DENORM-NEXT: v_sub_f16_e32 v0, v2, v0
; GFX10-DENORM-NEXT: s_setpc_b64 s[30:31]
.entry:
%a = fmul half %x, %y
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_pk_mul_f16 v0, v0, v2
; GFX9-NEXT: v_pk_mul_f16 v1, v1, v3
-; GFX9-NEXT: v_add_f16_e64 v2, v0, -v4
-; GFX9-NEXT: v_add_f16_sdwa v0, v0, -v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX9-NEXT: v_add_f16_e64 v3, v1, -v5
-; GFX9-NEXT: v_add_f16_sdwa v1, v1, -v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX9-NEXT: v_sub_f16_e32 v2, v0, v4
+; GFX9-NEXT: v_sub_f16_sdwa v0, v0, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX9-NEXT: v_sub_f16_e32 v3, v1, v5
+; GFX9-NEXT: v_sub_f16_sdwa v1, v1, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; GFX9-NEXT: v_mov_b32_e32 v4, 0xffff
; GFX9-NEXT: v_and_or_b32 v0, v2, v4, v0
; GFX9-NEXT: v_and_or_b32 v1, v3, v4, v1
; GFX9-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-DENORM-NEXT: v_pk_mul_f16 v0, v0, v2
; GFX9-DENORM-NEXT: v_pk_mul_f16 v1, v1, v3
-; GFX9-DENORM-NEXT: v_add_f16_e64 v2, v0, -v4
-; GFX9-DENORM-NEXT: v_add_f16_sdwa v0, v0, -v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX9-DENORM-NEXT: v_add_f16_e64 v3, v1, -v5
-; GFX9-DENORM-NEXT: v_add_f16_sdwa v1, v1, -v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX9-DENORM-NEXT: v_sub_f16_e32 v2, v0, v4
+; GFX9-DENORM-NEXT: v_sub_f16_sdwa v0, v0, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX9-DENORM-NEXT: v_sub_f16_e32 v3, v1, v5
+; GFX9-DENORM-NEXT: v_sub_f16_sdwa v1, v1, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; GFX9-DENORM-NEXT: v_mov_b32_e32 v4, 0xffff
; GFX9-DENORM-NEXT: v_and_or_b32 v0, v2, v4, v0
; GFX9-DENORM-NEXT: v_and_or_b32 v1, v3, v4, v1
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_pk_mul_f16 v0, v0, v2
; GFX10-NEXT: v_pk_mul_f16 v1, v1, v3
-; GFX10-NEXT: v_add_f16_e64 v2, v0, -v4
-; GFX10-NEXT: v_add_f16_sdwa v0, v0, -v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX10-NEXT: v_add_f16_e64 v3, v1, -v5
-; GFX10-NEXT: v_add_f16_sdwa v1, v1, -v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX10-NEXT: v_sub_f16_e32 v2, v0, v4
+; GFX10-NEXT: v_sub_f16_sdwa v0, v0, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX10-NEXT: v_sub_f16_e32 v3, v1, v5
+; GFX10-NEXT: v_sub_f16_sdwa v1, v1, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; GFX10-NEXT: v_and_or_b32 v0, 0xffff, v2, v0
; GFX10-NEXT: v_and_or_b32 v1, 0xffff, v3, v1
; GFX10-NEXT: s_setpc_b64 s[30:31]
; GFX10-DENORM-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-DENORM-NEXT: v_pk_mul_f16 v0, v0, v2
; GFX10-DENORM-NEXT: v_pk_mul_f16 v1, v1, v3
-; GFX10-DENORM-NEXT: v_add_f16_e64 v2, v0, -v4
-; GFX10-DENORM-NEXT: v_add_f16_sdwa v0, v0, -v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX10-DENORM-NEXT: v_add_f16_e64 v3, v1, -v5
-; GFX10-DENORM-NEXT: v_add_f16_sdwa v1, v1, -v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX10-DENORM-NEXT: v_sub_f16_e32 v2, v0, v4
+; GFX10-DENORM-NEXT: v_sub_f16_sdwa v0, v0, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX10-DENORM-NEXT: v_sub_f16_e32 v3, v1, v5
+; GFX10-DENORM-NEXT: v_sub_f16_sdwa v1, v1, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; GFX10-DENORM-NEXT: v_and_or_b32 v0, 0xffff, v2, v0
; GFX10-DENORM-NEXT: v_and_or_b32 v1, 0xffff, v3, v1
; GFX10-DENORM-NEXT: s_setpc_b64 s[30:31]
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_pk_mul_f16 v0, v0, v2
; GFX9-NEXT: v_pk_mul_f16 v1, v1, v3
-; GFX9-NEXT: v_add_f16_e64 v2, v4, -v0
-; GFX9-NEXT: v_add_f16_sdwa v0, v4, -v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX9-NEXT: v_add_f16_e64 v3, v5, -v1
-; GFX9-NEXT: v_add_f16_sdwa v1, v5, -v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX9-NEXT: v_sub_f16_e32 v2, v4, v0
+; GFX9-NEXT: v_sub_f16_sdwa v0, v4, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX9-NEXT: v_sub_f16_e32 v3, v5, v1
+; GFX9-NEXT: v_sub_f16_sdwa v1, v5, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; GFX9-NEXT: v_mov_b32_e32 v4, 0xffff
; GFX9-NEXT: v_and_or_b32 v0, v2, v4, v0
; GFX9-NEXT: v_and_or_b32 v1, v3, v4, v1
; GFX9-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-DENORM-NEXT: v_pk_mul_f16 v0, v0, v2
; GFX9-DENORM-NEXT: v_pk_mul_f16 v1, v1, v3
-; GFX9-DENORM-NEXT: v_add_f16_e64 v2, v4, -v0
-; GFX9-DENORM-NEXT: v_add_f16_sdwa v0, v4, -v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX9-DENORM-NEXT: v_add_f16_e64 v3, v5, -v1
-; GFX9-DENORM-NEXT: v_add_f16_sdwa v1, v5, -v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX9-DENORM-NEXT: v_sub_f16_e32 v2, v4, v0
+; GFX9-DENORM-NEXT: v_sub_f16_sdwa v0, v4, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX9-DENORM-NEXT: v_sub_f16_e32 v3, v5, v1
+; GFX9-DENORM-NEXT: v_sub_f16_sdwa v1, v5, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; GFX9-DENORM-NEXT: v_mov_b32_e32 v4, 0xffff
; GFX9-DENORM-NEXT: v_and_or_b32 v0, v2, v4, v0
; GFX9-DENORM-NEXT: v_and_or_b32 v1, v3, v4, v1
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_pk_mul_f16 v0, v0, v2
; GFX10-NEXT: v_pk_mul_f16 v1, v1, v3
-; GFX10-NEXT: v_add_f16_e64 v2, v4, -v0
-; GFX10-NEXT: v_add_f16_sdwa v0, v4, -v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX10-NEXT: v_add_f16_e64 v3, v5, -v1
-; GFX10-NEXT: v_add_f16_sdwa v1, v5, -v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX10-NEXT: v_sub_f16_e32 v2, v4, v0
+; GFX10-NEXT: v_sub_f16_sdwa v0, v4, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX10-NEXT: v_sub_f16_e32 v3, v5, v1
+; GFX10-NEXT: v_sub_f16_sdwa v1, v5, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; GFX10-NEXT: v_and_or_b32 v0, 0xffff, v2, v0
; GFX10-NEXT: v_and_or_b32 v1, 0xffff, v3, v1
; GFX10-NEXT: s_setpc_b64 s[30:31]
; GFX10-DENORM-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-DENORM-NEXT: v_pk_mul_f16 v0, v0, v2
; GFX10-DENORM-NEXT: v_pk_mul_f16 v1, v1, v3
-; GFX10-DENORM-NEXT: v_add_f16_e64 v2, v4, -v0
-; GFX10-DENORM-NEXT: v_add_f16_sdwa v0, v4, -v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX10-DENORM-NEXT: v_add_f16_e64 v3, v5, -v1
-; GFX10-DENORM-NEXT: v_add_f16_sdwa v1, v5, -v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX10-DENORM-NEXT: v_sub_f16_e32 v2, v4, v0
+; GFX10-DENORM-NEXT: v_sub_f16_sdwa v0, v4, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX10-DENORM-NEXT: v_sub_f16_e32 v3, v5, v1
+; GFX10-DENORM-NEXT: v_sub_f16_sdwa v1, v5, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; GFX10-DENORM-NEXT: v_and_or_b32 v0, 0xffff, v2, v0
; GFX10-DENORM-NEXT: v_and_or_b32 v1, 0xffff, v3, v1
; GFX10-DENORM-NEXT: s_setpc_b64 s[30:31]
; GFX9: ; %bb.0: ; %entry
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_mul_f16_e64 v0, v0, -v1
-; GFX9-NEXT: v_add_f16_e64 v0, v0, -v2
+; GFX9-NEXT: v_sub_f16_e32 v0, v0, v2
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-CONTRACT-LABEL: test_f16_sub_ext_neg_mul:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_mul_f16_e64 v0, v0, -v1
-; GFX10-NEXT: v_add_f16_e64 v0, v0, -v2
+; GFX10-NEXT: v_sub_f16_e32 v0, v0, v2
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-CONTRACT-LABEL: test_f16_sub_ext_neg_mul:
; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-DENORM-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-DENORM-NEXT: v_mul_f16_e64 v0, v0, -v1
-; GFX10-DENORM-NEXT: v_add_f16_e64 v0, v0, -v2
+; GFX10-DENORM-NEXT: v_sub_f16_e32 v0, v0, v2
; GFX10-DENORM-NEXT: s_setpc_b64 s[30:31]
entry:
%a = fmul half %x, %y
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_pk_mul_f16 v0, v0, v2 neg_lo:[0,1] neg_hi:[0,1]
; GFX9-NEXT: v_pk_mul_f16 v1, v1, v3 neg_lo:[0,1] neg_hi:[0,1]
-; GFX9-NEXT: v_add_f16_e64 v2, v0, -v4
-; GFX9-NEXT: v_add_f16_sdwa v0, v0, -v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX9-NEXT: v_add_f16_e64 v3, v1, -v5
-; GFX9-NEXT: v_add_f16_sdwa v1, v1, -v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX9-NEXT: v_sub_f16_e32 v2, v0, v4
+; GFX9-NEXT: v_sub_f16_sdwa v0, v0, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX9-NEXT: v_sub_f16_e32 v3, v1, v5
+; GFX9-NEXT: v_sub_f16_sdwa v1, v1, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; GFX9-NEXT: v_mov_b32_e32 v4, 0xffff
; GFX9-NEXT: v_and_or_b32 v0, v2, v4, v0
; GFX9-NEXT: v_and_or_b32 v1, v3, v4, v1
; GFX9-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-DENORM-NEXT: v_pk_mul_f16 v0, v0, v2 neg_lo:[0,1] neg_hi:[0,1]
; GFX9-DENORM-NEXT: v_pk_mul_f16 v1, v1, v3 neg_lo:[0,1] neg_hi:[0,1]
-; GFX9-DENORM-NEXT: v_add_f16_e64 v2, v0, -v4
-; GFX9-DENORM-NEXT: v_add_f16_sdwa v0, v0, -v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX9-DENORM-NEXT: v_add_f16_e64 v3, v1, -v5
-; GFX9-DENORM-NEXT: v_add_f16_sdwa v1, v1, -v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX9-DENORM-NEXT: v_sub_f16_e32 v2, v0, v4
+; GFX9-DENORM-NEXT: v_sub_f16_sdwa v0, v0, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX9-DENORM-NEXT: v_sub_f16_e32 v3, v1, v5
+; GFX9-DENORM-NEXT: v_sub_f16_sdwa v1, v1, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; GFX9-DENORM-NEXT: v_mov_b32_e32 v4, 0xffff
; GFX9-DENORM-NEXT: v_and_or_b32 v0, v2, v4, v0
; GFX9-DENORM-NEXT: v_and_or_b32 v1, v3, v4, v1
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_pk_mul_f16 v0, v0, v2 neg_lo:[0,1] neg_hi:[0,1]
; GFX10-NEXT: v_pk_mul_f16 v1, v1, v3 neg_lo:[0,1] neg_hi:[0,1]
-; GFX10-NEXT: v_add_f16_e64 v2, v0, -v4
-; GFX10-NEXT: v_add_f16_sdwa v0, v0, -v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX10-NEXT: v_add_f16_e64 v3, v1, -v5
-; GFX10-NEXT: v_add_f16_sdwa v1, v1, -v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX10-NEXT: v_sub_f16_e32 v2, v0, v4
+; GFX10-NEXT: v_sub_f16_sdwa v0, v0, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX10-NEXT: v_sub_f16_e32 v3, v1, v5
+; GFX10-NEXT: v_sub_f16_sdwa v1, v1, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; GFX10-NEXT: v_and_or_b32 v0, 0xffff, v2, v0
; GFX10-NEXT: v_and_or_b32 v1, 0xffff, v3, v1
; GFX10-NEXT: s_setpc_b64 s[30:31]
; GFX10-DENORM-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-DENORM-NEXT: v_pk_mul_f16 v0, v0, v2 neg_lo:[0,1] neg_hi:[0,1]
; GFX10-DENORM-NEXT: v_pk_mul_f16 v1, v1, v3 neg_lo:[0,1] neg_hi:[0,1]
-; GFX10-DENORM-NEXT: v_add_f16_e64 v2, v0, -v4
-; GFX10-DENORM-NEXT: v_add_f16_sdwa v0, v0, -v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX10-DENORM-NEXT: v_add_f16_e64 v3, v1, -v5
-; GFX10-DENORM-NEXT: v_add_f16_sdwa v1, v1, -v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX10-DENORM-NEXT: v_sub_f16_e32 v2, v0, v4
+; GFX10-DENORM-NEXT: v_sub_f16_sdwa v0, v0, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX10-DENORM-NEXT: v_sub_f16_e32 v3, v1, v5
+; GFX10-DENORM-NEXT: v_sub_f16_sdwa v1, v1, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; GFX10-DENORM-NEXT: v_and_or_b32 v0, 0xffff, v2, v0
; GFX10-DENORM-NEXT: v_and_or_b32 v1, 0xffff, v3, v1
; GFX10-DENORM-NEXT: s_setpc_b64 s[30:31]
; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
- ; VI-NEXT: [[FNEG:%[0-9]+]]:_(s16) = G_FNEG [[TRUNC1]]
- ; VI-NEXT: [[FADD:%[0-9]+]]:_(s16) = G_FADD [[TRUNC]], [[FNEG]]
- ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD]](s16)
+ ; VI-NEXT: [[FSUB:%[0-9]+]]:_(s16) = G_FSUB [[TRUNC]], [[TRUNC1]]
+ ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FSUB]](s16)
; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32)
; GFX9-LABEL: name: test_fsub_s16
; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
- ; GFX9-NEXT: [[FNEG:%[0-9]+]]:_(s16) = G_FNEG [[TRUNC1]]
- ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(s16) = G_FADD [[TRUNC]], [[FNEG]]
- ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD]](s16)
+ ; GFX9-NEXT: [[FSUB:%[0-9]+]]:_(s16) = G_FSUB [[TRUNC]], [[TRUNC1]]
+ ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FSUB]](s16)
; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32)
%0:_(s32) = COPY $vgpr0
%1:_(s32) = COPY $vgpr1
; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>)
; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32)
- ; VI-NEXT: [[FNEG:%[0-9]+]]:_(s16) = G_FNEG [[TRUNC3]]
- ; VI-NEXT: [[FADD:%[0-9]+]]:_(s16) = G_FADD [[TRUNC]], [[FNEG]]
- ; VI-NEXT: [[FNEG1:%[0-9]+]]:_(s16) = G_FNEG [[TRUNC4]]
- ; VI-NEXT: [[FADD1:%[0-9]+]]:_(s16) = G_FADD [[TRUNC1]], [[FNEG1]]
- ; VI-NEXT: [[FNEG2:%[0-9]+]]:_(s16) = G_FNEG [[TRUNC5]]
- ; VI-NEXT: [[FADD2:%[0-9]+]]:_(s16) = G_FADD [[TRUNC2]], [[FNEG2]]
+ ; VI-NEXT: [[FSUB:%[0-9]+]]:_(s16) = G_FSUB [[TRUNC]], [[TRUNC3]]
+ ; VI-NEXT: [[FSUB1:%[0-9]+]]:_(s16) = G_FSUB [[TRUNC1]], [[TRUNC4]]
+ ; VI-NEXT: [[FSUB2:%[0-9]+]]:_(s16) = G_FSUB [[TRUNC2]], [[TRUNC5]]
; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
; VI-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>)
; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32)
; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>)
- ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FADD]](s16)
- ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FADD1]](s16)
+ ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FSUB]](s16)
+ ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FSUB1]](s16)
; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32)
; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]]
; VI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
- ; VI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FADD2]](s16)
+ ; VI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FSUB2]](s16)
; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]]
; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32)
; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>)
; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32)
- ; GFX9-NEXT: [[FNEG:%[0-9]+]]:_(s16) = G_FNEG [[TRUNC3]]
- ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(s16) = G_FADD [[TRUNC]], [[FNEG]]
- ; GFX9-NEXT: [[FNEG1:%[0-9]+]]:_(s16) = G_FNEG [[TRUNC4]]
- ; GFX9-NEXT: [[FADD1:%[0-9]+]]:_(s16) = G_FADD [[TRUNC1]], [[FNEG1]]
- ; GFX9-NEXT: [[FNEG2:%[0-9]+]]:_(s16) = G_FNEG [[TRUNC5]]
- ; GFX9-NEXT: [[FADD2:%[0-9]+]]:_(s16) = G_FADD [[TRUNC2]], [[FNEG2]]
+ ; GFX9-NEXT: [[FSUB:%[0-9]+]]:_(s16) = G_FSUB [[TRUNC]], [[TRUNC3]]
+ ; GFX9-NEXT: [[FSUB1:%[0-9]+]]:_(s16) = G_FSUB [[TRUNC1]], [[TRUNC4]]
+ ; GFX9-NEXT: [[FSUB2:%[0-9]+]]:_(s16) = G_FSUB [[TRUNC2]], [[TRUNC5]]
; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
; GFX9-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>)
; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32)
; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>)
- ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD]](s16)
- ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD1]](s16)
+ ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FSUB]](s16)
+ ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FSUB1]](s16)
; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32)
- ; GFX9-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD2]](s16)
+ ; GFX9-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[FSUB2]](s16)
; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[BITCAST4]](s32)
; GFX9-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR2]](s32), [[BITCAST5]](s32)
; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>)
; VI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32)
; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32)
; VI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32)
- ; VI-NEXT: [[FNEG:%[0-9]+]]:_(s16) = G_FNEG [[TRUNC4]]
- ; VI-NEXT: [[FADD:%[0-9]+]]:_(s16) = G_FADD [[TRUNC]], [[FNEG]]
- ; VI-NEXT: [[FNEG1:%[0-9]+]]:_(s16) = G_FNEG [[TRUNC5]]
- ; VI-NEXT: [[FADD1:%[0-9]+]]:_(s16) = G_FADD [[TRUNC1]], [[FNEG1]]
- ; VI-NEXT: [[FNEG2:%[0-9]+]]:_(s16) = G_FNEG [[TRUNC6]]
- ; VI-NEXT: [[FADD2:%[0-9]+]]:_(s16) = G_FADD [[TRUNC2]], [[FNEG2]]
- ; VI-NEXT: [[FNEG3:%[0-9]+]]:_(s16) = G_FNEG [[TRUNC7]]
- ; VI-NEXT: [[FADD3:%[0-9]+]]:_(s16) = G_FADD [[TRUNC3]], [[FNEG3]]
- ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FADD]](s16)
- ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FADD1]](s16)
+ ; VI-NEXT: [[FSUB:%[0-9]+]]:_(s16) = G_FSUB [[TRUNC]], [[TRUNC4]]
+ ; VI-NEXT: [[FSUB1:%[0-9]+]]:_(s16) = G_FSUB [[TRUNC1]], [[TRUNC5]]
+ ; VI-NEXT: [[FSUB2:%[0-9]+]]:_(s16) = G_FSUB [[TRUNC2]], [[TRUNC6]]
+ ; VI-NEXT: [[FSUB3:%[0-9]+]]:_(s16) = G_FSUB [[TRUNC3]], [[TRUNC7]]
+ ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FSUB]](s16)
+ ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FSUB1]](s16)
; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32)
; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]]
; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
- ; VI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FADD2]](s16)
- ; VI-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[FADD3]](s16)
+ ; VI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FSUB2]](s16)
+ ; VI-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[FSUB3]](s16)
; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32)
; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]]
; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
; GFX9-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32)
; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32)
; GFX9-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32)
- ; GFX9-NEXT: [[FNEG:%[0-9]+]]:_(s16) = G_FNEG [[TRUNC4]]
- ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(s16) = G_FADD [[TRUNC]], [[FNEG]]
- ; GFX9-NEXT: [[FNEG1:%[0-9]+]]:_(s16) = G_FNEG [[TRUNC5]]
- ; GFX9-NEXT: [[FADD1:%[0-9]+]]:_(s16) = G_FADD [[TRUNC1]], [[FNEG1]]
- ; GFX9-NEXT: [[FNEG2:%[0-9]+]]:_(s16) = G_FNEG [[TRUNC6]]
- ; GFX9-NEXT: [[FADD2:%[0-9]+]]:_(s16) = G_FADD [[TRUNC2]], [[FNEG2]]
- ; GFX9-NEXT: [[FNEG3:%[0-9]+]]:_(s16) = G_FNEG [[TRUNC7]]
- ; GFX9-NEXT: [[FADD3:%[0-9]+]]:_(s16) = G_FADD [[TRUNC3]], [[FNEG3]]
- ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD]](s16)
- ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD1]](s16)
+ ; GFX9-NEXT: [[FSUB:%[0-9]+]]:_(s16) = G_FSUB [[TRUNC]], [[TRUNC4]]
+ ; GFX9-NEXT: [[FSUB1:%[0-9]+]]:_(s16) = G_FSUB [[TRUNC1]], [[TRUNC5]]
+ ; GFX9-NEXT: [[FSUB2:%[0-9]+]]:_(s16) = G_FSUB [[TRUNC2]], [[TRUNC6]]
+ ; GFX9-NEXT: [[FSUB3:%[0-9]+]]:_(s16) = G_FSUB [[TRUNC3]], [[TRUNC7]]
+ ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FSUB]](s16)
+ ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FSUB1]](s16)
; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32)
- ; GFX9-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD2]](s16)
- ; GFX9-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD3]](s16)
+ ; GFX9-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[FSUB2]](s16)
+ ; GFX9-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[FSUB3]](s16)
; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[ANYEXT3]](s32)
; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>)
; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
; GFX8-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s16) = G_INTRINSIC_TRUNC [[TRUNC]]
- ; GFX8-NEXT: [[FNEG:%[0-9]+]]:_(s16) = G_FNEG [[INTRINSIC_TRUNC]]
- ; GFX8-NEXT: [[FADD:%[0-9]+]]:_(s16) = G_FADD [[TRUNC]], [[FNEG]]
- ; GFX8-NEXT: [[FABS:%[0-9]+]]:_(s16) = G_FABS [[FADD]]
+ ; GFX8-NEXT: [[FSUB:%[0-9]+]]:_(s16) = G_FSUB [[TRUNC]], [[INTRINSIC_TRUNC]]
+ ; GFX8-NEXT: [[FABS:%[0-9]+]]:_(s16) = G_FABS [[FSUB]]
; GFX8-NEXT: [[C:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH0000
; GFX8-NEXT: [[C1:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH3800
; GFX8-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768
; GFX8-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[C3]], [[AND]]
; GFX8-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS]](s16), [[C1]]
; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[FCMP]](s1), [[OR]], [[C]]
- ; GFX8-NEXT: [[FADD1:%[0-9]+]]:_(s16) = G_FADD [[INTRINSIC_TRUNC]], [[SELECT]]
- ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD1]](s16)
+ ; GFX8-NEXT: [[FADD:%[0-9]+]]:_(s16) = G_FADD [[INTRINSIC_TRUNC]], [[SELECT]]
+ ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD]](s16)
; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](s32)
; GFX9-LABEL: name: test_intrinsic_round_s16
; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
; GFX9-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s16) = G_INTRINSIC_TRUNC [[TRUNC]]
- ; GFX9-NEXT: [[FNEG:%[0-9]+]]:_(s16) = G_FNEG [[INTRINSIC_TRUNC]]
- ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(s16) = G_FADD [[TRUNC]], [[FNEG]]
- ; GFX9-NEXT: [[FABS:%[0-9]+]]:_(s16) = G_FABS [[FADD]]
+ ; GFX9-NEXT: [[FSUB:%[0-9]+]]:_(s16) = G_FSUB [[TRUNC]], [[INTRINSIC_TRUNC]]
+ ; GFX9-NEXT: [[FABS:%[0-9]+]]:_(s16) = G_FABS [[FSUB]]
; GFX9-NEXT: [[C:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH0000
; GFX9-NEXT: [[C1:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH3800
; GFX9-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768
; GFX9-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[C3]], [[AND]]
; GFX9-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS]](s16), [[C1]]
; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[FCMP]](s1), [[OR]], [[C]]
- ; GFX9-NEXT: [[FADD1:%[0-9]+]]:_(s16) = G_FADD [[INTRINSIC_TRUNC]], [[SELECT]]
- ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD1]](s16)
+ ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(s16) = G_FADD [[INTRINSIC_TRUNC]], [[SELECT]]
+ ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD]](s16)
; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32)
%0:_(s32) = COPY $vgpr0
%1:_(s16) = G_TRUNC %0
; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
; GFX8-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s16) = G_INTRINSIC_TRUNC [[TRUNC]]
- ; GFX8-NEXT: [[FNEG:%[0-9]+]]:_(s16) = G_FNEG [[INTRINSIC_TRUNC]]
- ; GFX8-NEXT: [[FADD:%[0-9]+]]:_(s16) = G_FADD [[TRUNC]], [[FNEG]]
- ; GFX8-NEXT: [[FABS:%[0-9]+]]:_(s16) = G_FABS [[FADD]]
+ ; GFX8-NEXT: [[FSUB:%[0-9]+]]:_(s16) = G_FSUB [[TRUNC]], [[INTRINSIC_TRUNC]]
+ ; GFX8-NEXT: [[FABS:%[0-9]+]]:_(s16) = G_FABS [[FSUB]]
; GFX8-NEXT: [[C1:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH0000
; GFX8-NEXT: [[C2:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH3800
; GFX8-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768
; GFX8-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[C4]], [[AND]]
; GFX8-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS]](s16), [[C2]]
; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[FCMP]](s1), [[OR]], [[C1]]
- ; GFX8-NEXT: [[FADD1:%[0-9]+]]:_(s16) = G_FADD [[INTRINSIC_TRUNC]], [[SELECT]]
+ ; GFX8-NEXT: [[FADD:%[0-9]+]]:_(s16) = G_FADD [[INTRINSIC_TRUNC]], [[SELECT]]
; GFX8-NEXT: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(s16) = G_INTRINSIC_TRUNC [[TRUNC1]]
- ; GFX8-NEXT: [[FNEG1:%[0-9]+]]:_(s16) = G_FNEG [[INTRINSIC_TRUNC1]]
- ; GFX8-NEXT: [[FADD2:%[0-9]+]]:_(s16) = G_FADD [[TRUNC1]], [[FNEG1]]
- ; GFX8-NEXT: [[FABS1:%[0-9]+]]:_(s16) = G_FABS [[FADD2]]
+ ; GFX8-NEXT: [[FSUB1:%[0-9]+]]:_(s16) = G_FSUB [[TRUNC1]], [[INTRINSIC_TRUNC1]]
+ ; GFX8-NEXT: [[FABS1:%[0-9]+]]:_(s16) = G_FABS [[FSUB1]]
; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C3]]
; GFX8-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[C4]], [[AND1]]
; GFX8-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS1]](s16), [[C2]]
; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[FCMP1]](s1), [[OR1]], [[C1]]
- ; GFX8-NEXT: [[FADD3:%[0-9]+]]:_(s16) = G_FADD [[INTRINSIC_TRUNC1]], [[SELECT1]]
- ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FADD1]](s16)
- ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FADD3]](s16)
+ ; GFX8-NEXT: [[FADD1:%[0-9]+]]:_(s16) = G_FADD [[INTRINSIC_TRUNC1]], [[SELECT1]]
+ ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FADD]](s16)
+ ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FADD1]](s16)
; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32)
; GFX8-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]]
; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
; GFX9-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s16) = G_INTRINSIC_TRUNC [[TRUNC]]
- ; GFX9-NEXT: [[FNEG:%[0-9]+]]:_(s16) = G_FNEG [[INTRINSIC_TRUNC]]
- ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(s16) = G_FADD [[TRUNC]], [[FNEG]]
- ; GFX9-NEXT: [[FABS:%[0-9]+]]:_(s16) = G_FABS [[FADD]]
+ ; GFX9-NEXT: [[FSUB:%[0-9]+]]:_(s16) = G_FSUB [[TRUNC]], [[INTRINSIC_TRUNC]]
+ ; GFX9-NEXT: [[FABS:%[0-9]+]]:_(s16) = G_FABS [[FSUB]]
; GFX9-NEXT: [[C1:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH0000
; GFX9-NEXT: [[C2:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH3800
; GFX9-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768
; GFX9-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[C4]], [[AND]]
; GFX9-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS]](s16), [[C2]]
; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[FCMP]](s1), [[OR]], [[C1]]
- ; GFX9-NEXT: [[FADD1:%[0-9]+]]:_(s16) = G_FADD [[INTRINSIC_TRUNC]], [[SELECT]]
+ ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(s16) = G_FADD [[INTRINSIC_TRUNC]], [[SELECT]]
; GFX9-NEXT: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(s16) = G_INTRINSIC_TRUNC [[TRUNC1]]
- ; GFX9-NEXT: [[FNEG1:%[0-9]+]]:_(s16) = G_FNEG [[INTRINSIC_TRUNC1]]
- ; GFX9-NEXT: [[FADD2:%[0-9]+]]:_(s16) = G_FADD [[TRUNC1]], [[FNEG1]]
- ; GFX9-NEXT: [[FABS1:%[0-9]+]]:_(s16) = G_FABS [[FADD2]]
+ ; GFX9-NEXT: [[FSUB1:%[0-9]+]]:_(s16) = G_FSUB [[TRUNC1]], [[INTRINSIC_TRUNC1]]
+ ; GFX9-NEXT: [[FABS1:%[0-9]+]]:_(s16) = G_FABS [[FSUB1]]
; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C3]]
; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[C4]], [[AND1]]
; GFX9-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS1]](s16), [[C2]]
; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[FCMP1]](s1), [[OR1]], [[C1]]
- ; GFX9-NEXT: [[FADD3:%[0-9]+]]:_(s16) = G_FADD [[INTRINSIC_TRUNC1]], [[SELECT1]]
- ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD1]](s16)
- ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD3]](s16)
+ ; GFX9-NEXT: [[FADD1:%[0-9]+]]:_(s16) = G_FADD [[INTRINSIC_TRUNC1]], [[SELECT1]]
+ ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD]](s16)
+ ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD1]](s16)
; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32)
; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>)
%0:_(<2 x s16>) = COPY $vgpr0
; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
; GFX8-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32)
; GFX8-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s16) = G_INTRINSIC_TRUNC [[TRUNC]]
- ; GFX8-NEXT: [[FNEG:%[0-9]+]]:_(s16) = G_FNEG [[INTRINSIC_TRUNC]]
- ; GFX8-NEXT: [[FADD:%[0-9]+]]:_(s16) = G_FADD [[TRUNC]], [[FNEG]]
- ; GFX8-NEXT: [[FABS:%[0-9]+]]:_(s16) = G_FABS [[FADD]]
+ ; GFX8-NEXT: [[FSUB:%[0-9]+]]:_(s16) = G_FSUB [[TRUNC]], [[INTRINSIC_TRUNC]]
+ ; GFX8-NEXT: [[FABS:%[0-9]+]]:_(s16) = G_FABS [[FSUB]]
; GFX8-NEXT: [[C1:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH0000
; GFX8-NEXT: [[C2:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH3800
; GFX8-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768
; GFX8-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[C4]], [[AND]]
; GFX8-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS]](s16), [[C2]]
; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[FCMP]](s1), [[OR]], [[C1]]
- ; GFX8-NEXT: [[FADD1:%[0-9]+]]:_(s16) = G_FADD [[INTRINSIC_TRUNC]], [[SELECT]]
+ ; GFX8-NEXT: [[FADD:%[0-9]+]]:_(s16) = G_FADD [[INTRINSIC_TRUNC]], [[SELECT]]
; GFX8-NEXT: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(s16) = G_INTRINSIC_TRUNC [[TRUNC1]]
- ; GFX8-NEXT: [[FNEG1:%[0-9]+]]:_(s16) = G_FNEG [[INTRINSIC_TRUNC1]]
- ; GFX8-NEXT: [[FADD2:%[0-9]+]]:_(s16) = G_FADD [[TRUNC1]], [[FNEG1]]
- ; GFX8-NEXT: [[FABS1:%[0-9]+]]:_(s16) = G_FABS [[FADD2]]
+ ; GFX8-NEXT: [[FSUB1:%[0-9]+]]:_(s16) = G_FSUB [[TRUNC1]], [[INTRINSIC_TRUNC1]]
+ ; GFX8-NEXT: [[FABS1:%[0-9]+]]:_(s16) = G_FABS [[FSUB1]]
; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C3]]
; GFX8-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[C4]], [[AND1]]
; GFX8-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS1]](s16), [[C2]]
; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[FCMP1]](s1), [[OR1]], [[C1]]
- ; GFX8-NEXT: [[FADD3:%[0-9]+]]:_(s16) = G_FADD [[INTRINSIC_TRUNC1]], [[SELECT1]]
+ ; GFX8-NEXT: [[FADD1:%[0-9]+]]:_(s16) = G_FADD [[INTRINSIC_TRUNC1]], [[SELECT1]]
; GFX8-NEXT: [[INTRINSIC_TRUNC2:%[0-9]+]]:_(s16) = G_INTRINSIC_TRUNC [[TRUNC2]]
- ; GFX8-NEXT: [[FNEG2:%[0-9]+]]:_(s16) = G_FNEG [[INTRINSIC_TRUNC2]]
- ; GFX8-NEXT: [[FADD4:%[0-9]+]]:_(s16) = G_FADD [[TRUNC2]], [[FNEG2]]
- ; GFX8-NEXT: [[FABS2:%[0-9]+]]:_(s16) = G_FABS [[FADD4]]
+ ; GFX8-NEXT: [[FSUB2:%[0-9]+]]:_(s16) = G_FSUB [[TRUNC2]], [[INTRINSIC_TRUNC2]]
+ ; GFX8-NEXT: [[FABS2:%[0-9]+]]:_(s16) = G_FABS [[FSUB2]]
; GFX8-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C3]]
; GFX8-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[C4]], [[AND2]]
; GFX8-NEXT: [[FCMP2:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS2]](s16), [[C2]]
; GFX8-NEXT: [[SELECT2:%[0-9]+]]:_(s16) = G_SELECT [[FCMP2]](s1), [[OR2]], [[C1]]
- ; GFX8-NEXT: [[FADD5:%[0-9]+]]:_(s16) = G_FADD [[INTRINSIC_TRUNC2]], [[SELECT2]]
+ ; GFX8-NEXT: [[FADD2:%[0-9]+]]:_(s16) = G_FADD [[INTRINSIC_TRUNC2]], [[SELECT2]]
; GFX8-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
; GFX8-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>)
- ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FADD1]](s16)
- ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FADD3]](s16)
+ ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FADD]](s16)
+ ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FADD1]](s16)
; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32)
; GFX8-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]]
; GFX8-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
- ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FADD5]](s16)
+ ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FADD2]](s16)
; GFX8-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
; GFX8-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C5]]
; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32)
; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32)
; GFX9-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s16) = G_INTRINSIC_TRUNC [[TRUNC]]
- ; GFX9-NEXT: [[FNEG:%[0-9]+]]:_(s16) = G_FNEG [[INTRINSIC_TRUNC]]
- ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(s16) = G_FADD [[TRUNC]], [[FNEG]]
- ; GFX9-NEXT: [[FABS:%[0-9]+]]:_(s16) = G_FABS [[FADD]]
+ ; GFX9-NEXT: [[FSUB:%[0-9]+]]:_(s16) = G_FSUB [[TRUNC]], [[INTRINSIC_TRUNC]]
+ ; GFX9-NEXT: [[FABS:%[0-9]+]]:_(s16) = G_FABS [[FSUB]]
; GFX9-NEXT: [[C1:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH0000
; GFX9-NEXT: [[C2:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH3800
; GFX9-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768
; GFX9-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[C4]], [[AND]]
; GFX9-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS]](s16), [[C2]]
; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[FCMP]](s1), [[OR]], [[C1]]
- ; GFX9-NEXT: [[FADD1:%[0-9]+]]:_(s16) = G_FADD [[INTRINSIC_TRUNC]], [[SELECT]]
+ ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(s16) = G_FADD [[INTRINSIC_TRUNC]], [[SELECT]]
; GFX9-NEXT: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(s16) = G_INTRINSIC_TRUNC [[TRUNC1]]
- ; GFX9-NEXT: [[FNEG1:%[0-9]+]]:_(s16) = G_FNEG [[INTRINSIC_TRUNC1]]
- ; GFX9-NEXT: [[FADD2:%[0-9]+]]:_(s16) = G_FADD [[TRUNC1]], [[FNEG1]]
- ; GFX9-NEXT: [[FABS1:%[0-9]+]]:_(s16) = G_FABS [[FADD2]]
+ ; GFX9-NEXT: [[FSUB1:%[0-9]+]]:_(s16) = G_FSUB [[TRUNC1]], [[INTRINSIC_TRUNC1]]
+ ; GFX9-NEXT: [[FABS1:%[0-9]+]]:_(s16) = G_FABS [[FSUB1]]
; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C3]]
; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[C4]], [[AND1]]
; GFX9-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS1]](s16), [[C2]]
; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[FCMP1]](s1), [[OR1]], [[C1]]
- ; GFX9-NEXT: [[FADD3:%[0-9]+]]:_(s16) = G_FADD [[INTRINSIC_TRUNC1]], [[SELECT1]]
+ ; GFX9-NEXT: [[FADD1:%[0-9]+]]:_(s16) = G_FADD [[INTRINSIC_TRUNC1]], [[SELECT1]]
; GFX9-NEXT: [[INTRINSIC_TRUNC2:%[0-9]+]]:_(s16) = G_INTRINSIC_TRUNC [[TRUNC2]]
- ; GFX9-NEXT: [[FNEG2:%[0-9]+]]:_(s16) = G_FNEG [[INTRINSIC_TRUNC2]]
- ; GFX9-NEXT: [[FADD4:%[0-9]+]]:_(s16) = G_FADD [[TRUNC2]], [[FNEG2]]
- ; GFX9-NEXT: [[FABS2:%[0-9]+]]:_(s16) = G_FABS [[FADD4]]
+ ; GFX9-NEXT: [[FSUB2:%[0-9]+]]:_(s16) = G_FSUB [[TRUNC2]], [[INTRINSIC_TRUNC2]]
+ ; GFX9-NEXT: [[FABS2:%[0-9]+]]:_(s16) = G_FABS [[FSUB2]]
; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C3]]
; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[C4]], [[AND2]]
; GFX9-NEXT: [[FCMP2:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS2]](s16), [[C2]]
; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(s16) = G_SELECT [[FCMP2]](s1), [[OR2]], [[C1]]
- ; GFX9-NEXT: [[FADD5:%[0-9]+]]:_(s16) = G_FADD [[INTRINSIC_TRUNC2]], [[SELECT2]]
+ ; GFX9-NEXT: [[FADD2:%[0-9]+]]:_(s16) = G_FADD [[INTRINSIC_TRUNC2]], [[SELECT2]]
; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
; GFX9-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>)
- ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD1]](s16)
- ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD3]](s16)
+ ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD]](s16)
+ ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD1]](s16)
; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32)
- ; GFX9-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD5]](s16)
+ ; GFX9-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD2]](s16)
; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[BITCAST2]](s32)
; GFX9-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR1]](s32), [[BITCAST3]](s32)
; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>)
; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
; GFX8-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
; GFX8-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s16) = G_INTRINSIC_TRUNC [[TRUNC]]
- ; GFX8-NEXT: [[FNEG:%[0-9]+]]:_(s16) = G_FNEG [[INTRINSIC_TRUNC]]
- ; GFX8-NEXT: [[FADD:%[0-9]+]]:_(s16) = G_FADD [[TRUNC]], [[FNEG]]
- ; GFX8-NEXT: [[FABS:%[0-9]+]]:_(s16) = G_FABS [[FADD]]
+ ; GFX8-NEXT: [[FSUB:%[0-9]+]]:_(s16) = G_FSUB [[TRUNC]], [[INTRINSIC_TRUNC]]
+ ; GFX8-NEXT: [[FABS:%[0-9]+]]:_(s16) = G_FABS [[FSUB]]
; GFX8-NEXT: [[C1:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH0000
; GFX8-NEXT: [[C2:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH3800
; GFX8-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768
; GFX8-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[C4]], [[AND]]
; GFX8-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS]](s16), [[C2]]
; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[FCMP]](s1), [[OR]], [[C1]]
- ; GFX8-NEXT: [[FADD1:%[0-9]+]]:_(s16) = G_FADD [[INTRINSIC_TRUNC]], [[SELECT]]
+ ; GFX8-NEXT: [[FADD:%[0-9]+]]:_(s16) = G_FADD [[INTRINSIC_TRUNC]], [[SELECT]]
; GFX8-NEXT: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(s16) = G_INTRINSIC_TRUNC [[TRUNC1]]
- ; GFX8-NEXT: [[FNEG1:%[0-9]+]]:_(s16) = G_FNEG [[INTRINSIC_TRUNC1]]
- ; GFX8-NEXT: [[FADD2:%[0-9]+]]:_(s16) = G_FADD [[TRUNC1]], [[FNEG1]]
- ; GFX8-NEXT: [[FABS1:%[0-9]+]]:_(s16) = G_FABS [[FADD2]]
+ ; GFX8-NEXT: [[FSUB1:%[0-9]+]]:_(s16) = G_FSUB [[TRUNC1]], [[INTRINSIC_TRUNC1]]
+ ; GFX8-NEXT: [[FABS1:%[0-9]+]]:_(s16) = G_FABS [[FSUB1]]
; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C3]]
; GFX8-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[C4]], [[AND1]]
; GFX8-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS1]](s16), [[C2]]
; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[FCMP1]](s1), [[OR1]], [[C1]]
- ; GFX8-NEXT: [[FADD3:%[0-9]+]]:_(s16) = G_FADD [[INTRINSIC_TRUNC1]], [[SELECT1]]
+ ; GFX8-NEXT: [[FADD1:%[0-9]+]]:_(s16) = G_FADD [[INTRINSIC_TRUNC1]], [[SELECT1]]
; GFX8-NEXT: [[INTRINSIC_TRUNC2:%[0-9]+]]:_(s16) = G_INTRINSIC_TRUNC [[TRUNC2]]
- ; GFX8-NEXT: [[FNEG2:%[0-9]+]]:_(s16) = G_FNEG [[INTRINSIC_TRUNC2]]
- ; GFX8-NEXT: [[FADD4:%[0-9]+]]:_(s16) = G_FADD [[TRUNC2]], [[FNEG2]]
- ; GFX8-NEXT: [[FABS2:%[0-9]+]]:_(s16) = G_FABS [[FADD4]]
+ ; GFX8-NEXT: [[FSUB2:%[0-9]+]]:_(s16) = G_FSUB [[TRUNC2]], [[INTRINSIC_TRUNC2]]
+ ; GFX8-NEXT: [[FABS2:%[0-9]+]]:_(s16) = G_FABS [[FSUB2]]
; GFX8-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C3]]
; GFX8-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[C4]], [[AND2]]
; GFX8-NEXT: [[FCMP2:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS2]](s16), [[C2]]
; GFX8-NEXT: [[SELECT2:%[0-9]+]]:_(s16) = G_SELECT [[FCMP2]](s1), [[OR2]], [[C1]]
- ; GFX8-NEXT: [[FADD5:%[0-9]+]]:_(s16) = G_FADD [[INTRINSIC_TRUNC2]], [[SELECT2]]
+ ; GFX8-NEXT: [[FADD2:%[0-9]+]]:_(s16) = G_FADD [[INTRINSIC_TRUNC2]], [[SELECT2]]
; GFX8-NEXT: [[INTRINSIC_TRUNC3:%[0-9]+]]:_(s16) = G_INTRINSIC_TRUNC [[TRUNC3]]
- ; GFX8-NEXT: [[FNEG3:%[0-9]+]]:_(s16) = G_FNEG [[INTRINSIC_TRUNC3]]
- ; GFX8-NEXT: [[FADD6:%[0-9]+]]:_(s16) = G_FADD [[TRUNC3]], [[FNEG3]]
- ; GFX8-NEXT: [[FABS3:%[0-9]+]]:_(s16) = G_FABS [[FADD6]]
+ ; GFX8-NEXT: [[FSUB3:%[0-9]+]]:_(s16) = G_FSUB [[TRUNC3]], [[INTRINSIC_TRUNC3]]
+ ; GFX8-NEXT: [[FABS3:%[0-9]+]]:_(s16) = G_FABS [[FSUB3]]
; GFX8-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C3]]
; GFX8-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[C4]], [[AND3]]
; GFX8-NEXT: [[FCMP3:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS3]](s16), [[C2]]
; GFX8-NEXT: [[SELECT3:%[0-9]+]]:_(s16) = G_SELECT [[FCMP3]](s1), [[OR3]], [[C1]]
- ; GFX8-NEXT: [[FADD7:%[0-9]+]]:_(s16) = G_FADD [[INTRINSIC_TRUNC3]], [[SELECT3]]
- ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FADD1]](s16)
- ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FADD3]](s16)
+ ; GFX8-NEXT: [[FADD3:%[0-9]+]]:_(s16) = G_FADD [[INTRINSIC_TRUNC3]], [[SELECT3]]
+ ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FADD]](s16)
+ ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FADD1]](s16)
; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32)
; GFX8-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]]
; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32)
- ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FADD5]](s16)
- ; GFX8-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[FADD7]](s16)
+ ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FADD2]](s16)
+ ; GFX8-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[FADD3]](s16)
; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32)
; GFX8-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]]
; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32)
; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
; GFX9-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s16) = G_INTRINSIC_TRUNC [[TRUNC]]
- ; GFX9-NEXT: [[FNEG:%[0-9]+]]:_(s16) = G_FNEG [[INTRINSIC_TRUNC]]
- ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(s16) = G_FADD [[TRUNC]], [[FNEG]]
- ; GFX9-NEXT: [[FABS:%[0-9]+]]:_(s16) = G_FABS [[FADD]]
+ ; GFX9-NEXT: [[FSUB:%[0-9]+]]:_(s16) = G_FSUB [[TRUNC]], [[INTRINSIC_TRUNC]]
+ ; GFX9-NEXT: [[FABS:%[0-9]+]]:_(s16) = G_FABS [[FSUB]]
; GFX9-NEXT: [[C1:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH0000
; GFX9-NEXT: [[C2:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH3800
; GFX9-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768
; GFX9-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[C4]], [[AND]]
; GFX9-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS]](s16), [[C2]]
; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[FCMP]](s1), [[OR]], [[C1]]
- ; GFX9-NEXT: [[FADD1:%[0-9]+]]:_(s16) = G_FADD [[INTRINSIC_TRUNC]], [[SELECT]]
+ ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(s16) = G_FADD [[INTRINSIC_TRUNC]], [[SELECT]]
; GFX9-NEXT: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(s16) = G_INTRINSIC_TRUNC [[TRUNC1]]
- ; GFX9-NEXT: [[FNEG1:%[0-9]+]]:_(s16) = G_FNEG [[INTRINSIC_TRUNC1]]
- ; GFX9-NEXT: [[FADD2:%[0-9]+]]:_(s16) = G_FADD [[TRUNC1]], [[FNEG1]]
- ; GFX9-NEXT: [[FABS1:%[0-9]+]]:_(s16) = G_FABS [[FADD2]]
+ ; GFX9-NEXT: [[FSUB1:%[0-9]+]]:_(s16) = G_FSUB [[TRUNC1]], [[INTRINSIC_TRUNC1]]
+ ; GFX9-NEXT: [[FABS1:%[0-9]+]]:_(s16) = G_FABS [[FSUB1]]
; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C3]]
; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[C4]], [[AND1]]
; GFX9-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS1]](s16), [[C2]]
; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[FCMP1]](s1), [[OR1]], [[C1]]
- ; GFX9-NEXT: [[FADD3:%[0-9]+]]:_(s16) = G_FADD [[INTRINSIC_TRUNC1]], [[SELECT1]]
+ ; GFX9-NEXT: [[FADD1:%[0-9]+]]:_(s16) = G_FADD [[INTRINSIC_TRUNC1]], [[SELECT1]]
; GFX9-NEXT: [[INTRINSIC_TRUNC2:%[0-9]+]]:_(s16) = G_INTRINSIC_TRUNC [[TRUNC2]]
- ; GFX9-NEXT: [[FNEG2:%[0-9]+]]:_(s16) = G_FNEG [[INTRINSIC_TRUNC2]]
- ; GFX9-NEXT: [[FADD4:%[0-9]+]]:_(s16) = G_FADD [[TRUNC2]], [[FNEG2]]
- ; GFX9-NEXT: [[FABS2:%[0-9]+]]:_(s16) = G_FABS [[FADD4]]
+ ; GFX9-NEXT: [[FSUB2:%[0-9]+]]:_(s16) = G_FSUB [[TRUNC2]], [[INTRINSIC_TRUNC2]]
+ ; GFX9-NEXT: [[FABS2:%[0-9]+]]:_(s16) = G_FABS [[FSUB2]]
; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C3]]
; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[C4]], [[AND2]]
; GFX9-NEXT: [[FCMP2:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS2]](s16), [[C2]]
; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(s16) = G_SELECT [[FCMP2]](s1), [[OR2]], [[C1]]
- ; GFX9-NEXT: [[FADD5:%[0-9]+]]:_(s16) = G_FADD [[INTRINSIC_TRUNC2]], [[SELECT2]]
+ ; GFX9-NEXT: [[FADD2:%[0-9]+]]:_(s16) = G_FADD [[INTRINSIC_TRUNC2]], [[SELECT2]]
; GFX9-NEXT: [[INTRINSIC_TRUNC3:%[0-9]+]]:_(s16) = G_INTRINSIC_TRUNC [[TRUNC3]]
- ; GFX9-NEXT: [[FNEG3:%[0-9]+]]:_(s16) = G_FNEG [[INTRINSIC_TRUNC3]]
- ; GFX9-NEXT: [[FADD6:%[0-9]+]]:_(s16) = G_FADD [[TRUNC3]], [[FNEG3]]
- ; GFX9-NEXT: [[FABS3:%[0-9]+]]:_(s16) = G_FABS [[FADD6]]
+ ; GFX9-NEXT: [[FSUB3:%[0-9]+]]:_(s16) = G_FSUB [[TRUNC3]], [[INTRINSIC_TRUNC3]]
+ ; GFX9-NEXT: [[FABS3:%[0-9]+]]:_(s16) = G_FABS [[FSUB3]]
; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C3]]
; GFX9-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[C4]], [[AND3]]
; GFX9-NEXT: [[FCMP3:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS3]](s16), [[C2]]
; GFX9-NEXT: [[SELECT3:%[0-9]+]]:_(s16) = G_SELECT [[FCMP3]](s1), [[OR3]], [[C1]]
- ; GFX9-NEXT: [[FADD7:%[0-9]+]]:_(s16) = G_FADD [[INTRINSIC_TRUNC3]], [[SELECT3]]
- ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD1]](s16)
- ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD3]](s16)
+ ; GFX9-NEXT: [[FADD3:%[0-9]+]]:_(s16) = G_FADD [[INTRINSIC_TRUNC3]], [[SELECT3]]
+ ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD]](s16)
+ ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD1]](s16)
; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32)
- ; GFX9-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD5]](s16)
- ; GFX9-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD7]](s16)
+ ; GFX9-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD2]](s16)
+ ; GFX9-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD3]](s16)
; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[ANYEXT3]](s32)
; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>)
; GISEL-NEXT: s_waitcnt vmcnt(0)
; GISEL-NEXT: global_load_ushort v2, v0, s[2:3] glc dlc
; GISEL-NEXT: s_waitcnt vmcnt(0)
-; GISEL-NEXT: v_mov_b32_e32 v0, 0x4000
-; GISEL-NEXT: v_add_f16_e32 v1, -2.0, v1
-; GISEL-NEXT: v_add_f16_sdwa v0, v2, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GISEL-NEXT: v_and_or_b32 v0, 0xffff, v1, v0
+; GISEL-NEXT: s_waitcnt_depctr 0xffe3
+; GISEL-NEXT: s_movk_i32 s0, 0x4000
+; GISEL-NEXT: v_subrev_f16_e32 v0, 2.0, v1
+; GISEL-NEXT: v_add_f16_sdwa v1, v2, s0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GISEL-NEXT: v_and_or_b32 v0, 0xffff, v0, v1
; GISEL-NEXT: ;;#ASMSTART
; GISEL-NEXT: ; use v0
; GISEL-NEXT: ;;#ASMEND
; GISEL-NEXT: s_mov_b32 s0, 0x8000
; GISEL-NEXT: v_add_f16_e32 v0, 2.0, v1
; GISEL-NEXT: v_add_f16_e32 v1, 2.0, v2
-; GISEL-NEXT: v_add_f16_e64 v0, 0x8000, -v0
-; GISEL-NEXT: v_add_f16_sdwa v1, s0, -v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GISEL-NEXT: v_sub_f16_e32 v0, 0x8000, v0
+; GISEL-NEXT: v_sub_f16_sdwa v1, s0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
; GISEL-NEXT: v_and_or_b32 v0, 0xffff, v0, v1
; GISEL-NEXT: ;;#ASMSTART
; GISEL-NEXT: ; use v0