DAGCombinerInfo &DCI) const {
EVT VT = N->getValueType(0);
EVT ScalarVT = VT.getScalarType();
- if (ScalarVT != MVT::f32)
+ if (ScalarVT != MVT::f32 && ScalarVT != MVT::f16)
return SDValue();
SelectionDAG &DAG = DCI.DAG;
// about in practice.
if (DCI.isAfterLegalizeDAG() && SrcVT == MVT::i32) {
if (DAG.MaskedValueIsZero(Src, APInt::getHighBitsSet(32, 24))) {
- SDValue Cvt = DAG.getNode(AMDGPUISD::CVT_F32_UBYTE0, DL, VT, Src);
+ SDValue Cvt = DAG.getNode(AMDGPUISD::CVT_F32_UBYTE0, DL, MVT::f32, Src);
DCI.AddToWorklist(Cvt.getNode());
+
+ // For the f16 case, fold to a cast to f32 and then cast back to f16.
+ if (ScalarVT != MVT::f32) {
+ Cvt = DAG.getNode(ISD::FP_ROUND, DL, VT, Cvt,
+ DAG.getTargetConstant(0, DL, MVT::i32));
+ }
return Cvt;
}
}
; VI-LABEL: v_uitofp_i32_to_f16_mask255:
; VI: ; %bb.0:
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-NEXT: v_cvt_f32_u32_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0
+; VI-NEXT: v_cvt_f32_ubyte0_e32 v0, v0
; VI-NEXT: v_cvt_f16_f32_e32 v0, v0
; VI-NEXT: s_setpc_b64 s[30:31]
%masked = and i32 %arg0, 255
; VI-LABEL: v_sitofp_i32_to_f16_mask255:
; VI: ; %bb.0:
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-NEXT: v_cvt_f32_i32_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0
+; VI-NEXT: v_cvt_f32_ubyte0_e32 v0, v0
; VI-NEXT: v_cvt_f16_f32_e32 v0, v0
; VI-NEXT: s_setpc_b64 s[30:31]
%masked = and i32 %arg0, 255
; VI-LABEL: v_uitofp_to_f16_lshr8_mask255:
; VI: ; %bb.0:
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-NEXT: v_cvt_f32_u32_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1
+; VI-NEXT: v_cvt_f32_ubyte1_e32 v0, v0
; VI-NEXT: v_cvt_f16_f32_e32 v0, v0
; VI-NEXT: s_setpc_b64 s[30:31]
%lshr.8 = lshr i32 %arg0, 8
; VI-LABEL: v_uitofp_to_f16_lshr16_mask255:
; VI: ; %bb.0:
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-NEXT: v_cvt_f32_u32_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_2
+; VI-NEXT: v_cvt_f32_ubyte2_e32 v0, v0
; VI-NEXT: v_cvt_f16_f32_e32 v0, v0
; VI-NEXT: s_setpc_b64 s[30:31]
%lshr.16 = lshr i32 %arg0, 16
; VI-LABEL: v_uitofp_to_f16_lshr24_mask255:
; VI: ; %bb.0:
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-NEXT: v_cvt_f32_u32_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_3
+; VI-NEXT: v_cvt_f32_ubyte3_e32 v0, v0
; VI-NEXT: v_cvt_f16_f32_e32 v0, v0
; VI-NEXT: s_setpc_b64 s[30:31]
%lshr.16 = lshr i32 %arg0, 24