From: Phoebe Wang Date: Sat, 4 Feb 2023 10:23:39 +0000 (+0800) Subject: [X86][FP16] Lower half->i16 into vcvttph2[u]w directly X-Git-Tag: upstream/17.0.6~18602 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=7f12efa88e17548d98f3e7425687f4afe0df34ed;p=platform%2Fupstream%2Fllvm.git [X86][FP16] Lower half->i16 into vcvttph2[u]w directly Reviewed By: LuoYuanke, RKSimon Differential Revision: https://reviews.llvm.org/D143170 --- diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index e9228e4..a218871 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -1680,16 +1680,20 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::FCOPYSIGN, VT, Custom); } - for (MVT VT : { MVT::v16i1, MVT::v16i8, MVT::v16i16 }) { + for (MVT VT : { MVT::v16i1, MVT::v16i8 }) { setOperationPromotedToType(ISD::FP_TO_SINT , VT, MVT::v16i32); setOperationPromotedToType(ISD::FP_TO_UINT , VT, MVT::v16i32); setOperationPromotedToType(ISD::STRICT_FP_TO_SINT, VT, MVT::v16i32); setOperationPromotedToType(ISD::STRICT_FP_TO_UINT, VT, MVT::v16i32); } - setOperationAction(ISD::FP_TO_SINT, MVT::v16i32, Custom); - setOperationAction(ISD::FP_TO_UINT, MVT::v16i32, Custom); - setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v16i32, Custom); - setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v16i32, Custom); + + for (MVT VT : { MVT::v16i16, MVT::v16i32 }) { + setOperationAction(ISD::FP_TO_SINT, VT, Custom); + setOperationAction(ISD::FP_TO_UINT, VT, Custom); + setOperationAction(ISD::STRICT_FP_TO_SINT, VT, Custom); + setOperationAction(ISD::STRICT_FP_TO_UINT, VT, Custom); + } + setOperationAction(ISD::SINT_TO_FP, MVT::v16i32, Custom); setOperationAction(ISD::UINT_TO_FP, MVT::v16i32, Custom); setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v16i32, Custom); @@ -22830,19 +22834,24 @@ SDValue X86TargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const { return Res; } - if (VT == MVT::v8i16 && (SrcVT == MVT::v8f32 || SrcVT == MVT::v8f64)) { + // v8f32/v16f32/v8f64->v8i16/v16i16 need to widen first. + if (VT.getVectorElementType() == MVT::i16) { + assert((SrcVT.getVectorElementType() == MVT::f32 || + SrcVT.getVectorElementType() == MVT::f64) && + "Expected f32/f64 vector!"); + MVT NVT = VT.changeVectorElementType(MVT::i32); if (IsStrict) { Res = DAG.getNode(IsSigned ? ISD::STRICT_FP_TO_SINT : ISD::STRICT_FP_TO_UINT, - dl, {MVT::v8i32, MVT::Other}, {Chain, Src}); + dl, {NVT, MVT::Other}, {Chain, Src}); Chain = Res.getValue(1); } else { Res = DAG.getNode(IsSigned ? ISD::FP_TO_SINT : ISD::FP_TO_UINT, dl, - MVT::v8i32, Src); + NVT, Src); } // TODO: Need to add exception check code for strict FP. - Res = DAG.getNode(ISD::TRUNCATE, dl, MVT::v8i16, Res); + Res = DAG.getNode(ISD::TRUNCATE, dl, VT, Res); if (IsStrict) return DAG.getMergeValues({Res, Chain}, dl); diff --git a/llvm/test/CodeGen/X86/avx512-cvt.ll b/llvm/test/CodeGen/X86/avx512-cvt.ll index bfd9b32..93aba2e 100644 --- a/llvm/test/CodeGen/X86/avx512-cvt.ll +++ b/llvm/test/CodeGen/X86/avx512-cvt.ll @@ -424,7 +424,7 @@ define <16 x i8> @f32to16uc(<16 x float> %f) { define <16 x i16> @f32to16us(<16 x float> %f) { ; ALL-LABEL: f32to16us: ; ALL: # %bb.0: -; ALL-NEXT: vcvttps2dq %zmm0, %zmm0 +; ALL-NEXT: vcvttps2udq %zmm0, %zmm0 ; ALL-NEXT: vpmovdw %zmm0, %ymm0 ; ALL-NEXT: retq %res = fptoui <16 x float> %f to <16 x i16> diff --git a/llvm/test/CodeGen/X86/avx512fp16-cvt-ph-w-vl-intrinsics.ll b/llvm/test/CodeGen/X86/avx512fp16-cvt-ph-w-vl-intrinsics.ll index 4c873eb..b1bedcf 100644 --- a/llvm/test/CodeGen/X86/avx512fp16-cvt-ph-w-vl-intrinsics.ll +++ b/llvm/test/CodeGen/X86/avx512fp16-cvt-ph-w-vl-intrinsics.ll @@ -771,8 +771,7 @@ define <2 x half> @test_u33tofp2(<2 x i33> %arg0) { define <16 x i16> @test_s16tof16(<16 x half> %a) { ; CHECK-LABEL: test_s16tof16: ; CHECK: # %bb.0: -; CHECK-NEXT: vcvttph2dq %ymm0, %zmm0 -; CHECK-NEXT: vpmovdw %zmm0, %ymm0 +; CHECK-NEXT: vcvttph2w %ymm0, %ymm0 ; CHECK-NEXT: retq %res = fptosi <16 x half> %a to <16 x i16> ret <16 x i16> %res @@ -781,8 +780,7 @@ define <16 x i16> @test_s16tof16(<16 x half> %a) { define <16 x i16> @test_u16tof16(<16 x half> %a) { ; CHECK-LABEL: test_u16tof16: ; CHECK: # %bb.0: -; CHECK-NEXT: vcvttph2dq %ymm0, %zmm0 -; CHECK-NEXT: vpmovdw %zmm0, %ymm0 +; CHECK-NEXT: vcvttph2uw %ymm0, %ymm0 ; CHECK-NEXT: retq %res = fptoui <16 x half> %a to <16 x i16> ret <16 x i16> %res diff --git a/llvm/test/CodeGen/X86/vec-strict-fptoint-256-fp16.ll b/llvm/test/CodeGen/X86/vec-strict-fptoint-256-fp16.ll index bc0dd02..36d6f86 100644 --- a/llvm/test/CodeGen/X86/vec-strict-fptoint-256-fp16.ll +++ b/llvm/test/CodeGen/X86/vec-strict-fptoint-256-fp16.ll @@ -59,8 +59,7 @@ define <8 x i32> @strict_vector_fptoui_v8f16_to_v8i32(<8 x half> %a) #0 { define <16 x i16> @strict_vector_fptosi_v16f16_to_v16i16(<16 x half> %a) #0 { ; CHECK-LABEL: strict_vector_fptosi_v16f16_to_v16i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vcvttph2dq %ymm0, %zmm0 -; CHECK-NEXT: vpmovdw %zmm0, %ymm0 +; CHECK-NEXT: vcvttph2w %ymm0, %ymm0 ; CHECK-NEXT: ret{{[l|q]}} %ret = call <16 x i16> @llvm.experimental.constrained.fptosi.v16i16.v16f16(<16 x half> %a, metadata !"fpexcept.strict") #0 @@ -70,8 +69,7 @@ define <16 x i16> @strict_vector_fptosi_v16f16_to_v16i16(<16 x half> %a) #0 { define <16 x i16> @strict_vector_fptoui_v16f16_to_v16i16(<16 x half> %a) #0 { ; CHECK-LABEL: strict_vector_fptoui_v16f16_to_v16i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vcvttph2dq %ymm0, %zmm0 -; CHECK-NEXT: vpmovdw %zmm0, %ymm0 +; CHECK-NEXT: vcvttph2uw %ymm0, %ymm0 ; CHECK-NEXT: ret{{[l|q]}} %ret = call <16 x i16> @llvm.experimental.constrained.fptoui.v16i16.v16f16(<16 x half> %a, metadata !"fpexcept.strict") #0 diff --git a/llvm/test/CodeGen/X86/vec-strict-fptoint-512.ll b/llvm/test/CodeGen/X86/vec-strict-fptoint-512.ll index af52e5f..cd39206 100644 --- a/llvm/test/CodeGen/X86/vec-strict-fptoint-512.ll +++ b/llvm/test/CodeGen/X86/vec-strict-fptoint-512.ll @@ -798,7 +798,7 @@ define <16 x i16> @strict_vector_fptosi_v16f32_to_v16i16(<16 x float> %a) #0 { define <16 x i16> @strict_vector_fptoui_v16f32_to_v16i16(<16 x float> %a) #0 { ; CHECK-LABEL: strict_vector_fptoui_v16f32_to_v16i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vcvttps2dq %zmm0, %zmm0 +; CHECK-NEXT: vcvttps2udq %zmm0, %zmm0 ; CHECK-NEXT: vpmovdw %zmm0, %ymm0 ; CHECK-NEXT: ret{{[l|q]}} %ret = call <16 x i16> @llvm.experimental.constrained.fptoui.v16i16.v16f32(<16 x float> %a,