From: Phoebe Wang <phoebe.wang@intel.com>
Date: Sat, 4 Feb 2023 10:23:39 +0000 (+0800)
Subject: [X86][FP16] Lower half->i16 into vcvttph2[u]w directly
X-Git-Tag: upstream/17.0.6~18602
X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=7f12efa88e17548d98f3e7425687f4afe0df34ed;p=platform%2Fupstream%2Fllvm.git

[X86][FP16] Lower half->i16 into vcvttph2[u]w directly

Reviewed By: LuoYuanke, RKSimon

Differential Revision: https://reviews.llvm.org/D143170
---

diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index e9228e4..a218871 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -1680,16 +1680,20 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
       setOperationAction(ISD::FCOPYSIGN, VT, Custom);
     }
 
-    for (MVT VT : { MVT::v16i1, MVT::v16i8, MVT::v16i16 }) {
+    for (MVT VT : { MVT::v16i1, MVT::v16i8 }) {
       setOperationPromotedToType(ISD::FP_TO_SINT       , VT, MVT::v16i32);
       setOperationPromotedToType(ISD::FP_TO_UINT       , VT, MVT::v16i32);
       setOperationPromotedToType(ISD::STRICT_FP_TO_SINT, VT, MVT::v16i32);
       setOperationPromotedToType(ISD::STRICT_FP_TO_UINT, VT, MVT::v16i32);
     }
-    setOperationAction(ISD::FP_TO_SINT,        MVT::v16i32, Custom);
-    setOperationAction(ISD::FP_TO_UINT,        MVT::v16i32, Custom);
-    setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v16i32, Custom);
-    setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v16i32, Custom);
+
+    for (MVT VT : { MVT::v16i16, MVT::v16i32 }) {
+      setOperationAction(ISD::FP_TO_SINT,        VT, Custom);
+      setOperationAction(ISD::FP_TO_UINT,        VT, Custom);
+      setOperationAction(ISD::STRICT_FP_TO_SINT, VT, Custom);
+      setOperationAction(ISD::STRICT_FP_TO_UINT, VT, Custom);
+    }
+
     setOperationAction(ISD::SINT_TO_FP,        MVT::v16i32, Custom);
     setOperationAction(ISD::UINT_TO_FP,        MVT::v16i32, Custom);
     setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v16i32, Custom);
@@ -22830,19 +22834,24 @@ SDValue X86TargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const {
       return Res;
     }
 
-    if (VT == MVT::v8i16 && (SrcVT == MVT::v8f32 || SrcVT == MVT::v8f64)) {
+    // v8f32/v16f32/v8f64->v8i16/v16i16 need to widen first.
+    if (VT.getVectorElementType() == MVT::i16) {
+      assert((SrcVT.getVectorElementType() == MVT::f32 ||
+              SrcVT.getVectorElementType() == MVT::f64) &&
+             "Expected f32/f64 vector!");
+      MVT NVT = VT.changeVectorElementType(MVT::i32);
       if (IsStrict) {
         Res = DAG.getNode(IsSigned ? ISD::STRICT_FP_TO_SINT
                                    : ISD::STRICT_FP_TO_UINT,
-                          dl, {MVT::v8i32, MVT::Other}, {Chain, Src});
+                          dl, {NVT, MVT::Other}, {Chain, Src});
         Chain = Res.getValue(1);
       } else {
         Res = DAG.getNode(IsSigned ? ISD::FP_TO_SINT : ISD::FP_TO_UINT, dl,
-                          MVT::v8i32, Src);
+                          NVT, Src);
       }
 
       // TODO: Need to add exception check code for strict FP.
-      Res = DAG.getNode(ISD::TRUNCATE, dl, MVT::v8i16, Res);
+      Res = DAG.getNode(ISD::TRUNCATE, dl, VT, Res);
 
       if (IsStrict)
         return DAG.getMergeValues({Res, Chain}, dl);
diff --git a/llvm/test/CodeGen/X86/avx512-cvt.ll b/llvm/test/CodeGen/X86/avx512-cvt.ll
index bfd9b32..93aba2e 100644
--- a/llvm/test/CodeGen/X86/avx512-cvt.ll
+++ b/llvm/test/CodeGen/X86/avx512-cvt.ll
@@ -424,7 +424,7 @@ define <16 x i8> @f32to16uc(<16 x float> %f) {
 define <16 x i16> @f32to16us(<16 x float> %f) {
 ; ALL-LABEL: f32to16us:
 ; ALL:       # %bb.0:
-; ALL-NEXT:    vcvttps2dq %zmm0, %zmm0
+; ALL-NEXT:    vcvttps2udq %zmm0, %zmm0
 ; ALL-NEXT:    vpmovdw %zmm0, %ymm0
 ; ALL-NEXT:    retq
   %res = fptoui <16 x float> %f to <16 x i16>
diff --git a/llvm/test/CodeGen/X86/avx512fp16-cvt-ph-w-vl-intrinsics.ll b/llvm/test/CodeGen/X86/avx512fp16-cvt-ph-w-vl-intrinsics.ll
index 4c873eb..b1bedcf 100644
--- a/llvm/test/CodeGen/X86/avx512fp16-cvt-ph-w-vl-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/avx512fp16-cvt-ph-w-vl-intrinsics.ll
@@ -771,8 +771,7 @@ define <2 x half> @test_u33tofp2(<2 x i33> %arg0) {
 define <16 x i16> @test_s16tof16(<16 x half> %a) {
 ; CHECK-LABEL: test_s16tof16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vcvttph2dq %ymm0, %zmm0
-; CHECK-NEXT:    vpmovdw %zmm0, %ymm0
+; CHECK-NEXT:    vcvttph2w %ymm0, %ymm0
 ; CHECK-NEXT:    retq
   %res = fptosi <16 x half> %a to <16 x i16>
   ret <16 x i16> %res
@@ -781,8 +780,7 @@ define <16 x i16> @test_s16tof16(<16 x half> %a) {
 define <16 x i16> @test_u16tof16(<16 x half> %a) {
 ; CHECK-LABEL: test_u16tof16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vcvttph2dq %ymm0, %zmm0
-; CHECK-NEXT:    vpmovdw %zmm0, %ymm0
+; CHECK-NEXT:    vcvttph2uw %ymm0, %ymm0
 ; CHECK-NEXT:    retq
   %res = fptoui <16 x half> %a to <16 x i16>
   ret <16 x i16> %res
diff --git a/llvm/test/CodeGen/X86/vec-strict-fptoint-256-fp16.ll b/llvm/test/CodeGen/X86/vec-strict-fptoint-256-fp16.ll
index bc0dd02..36d6f86 100644
--- a/llvm/test/CodeGen/X86/vec-strict-fptoint-256-fp16.ll
+++ b/llvm/test/CodeGen/X86/vec-strict-fptoint-256-fp16.ll
@@ -59,8 +59,7 @@ define <8 x i32> @strict_vector_fptoui_v8f16_to_v8i32(<8 x half> %a) #0 {
 define <16 x i16> @strict_vector_fptosi_v16f16_to_v16i16(<16 x half> %a) #0 {
 ; CHECK-LABEL: strict_vector_fptosi_v16f16_to_v16i16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vcvttph2dq %ymm0, %zmm0
-; CHECK-NEXT:    vpmovdw %zmm0, %ymm0
+; CHECK-NEXT:    vcvttph2w %ymm0, %ymm0
 ; CHECK-NEXT:    ret{{[l|q]}}
   %ret = call <16 x i16> @llvm.experimental.constrained.fptosi.v16i16.v16f16(<16 x half> %a,
                                               metadata !"fpexcept.strict") #0
@@ -70,8 +69,7 @@ define <16 x i16> @strict_vector_fptosi_v16f16_to_v16i16(<16 x half> %a) #0 {
 define <16 x i16> @strict_vector_fptoui_v16f16_to_v16i16(<16 x half> %a) #0 {
 ; CHECK-LABEL: strict_vector_fptoui_v16f16_to_v16i16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vcvttph2dq %ymm0, %zmm0
-; CHECK-NEXT:    vpmovdw %zmm0, %ymm0
+; CHECK-NEXT:    vcvttph2uw %ymm0, %ymm0
 ; CHECK-NEXT:    ret{{[l|q]}}
   %ret = call <16 x i16> @llvm.experimental.constrained.fptoui.v16i16.v16f16(<16 x half> %a,
                                               metadata !"fpexcept.strict") #0
diff --git a/llvm/test/CodeGen/X86/vec-strict-fptoint-512.ll b/llvm/test/CodeGen/X86/vec-strict-fptoint-512.ll
index af52e5f..cd39206 100644
--- a/llvm/test/CodeGen/X86/vec-strict-fptoint-512.ll
+++ b/llvm/test/CodeGen/X86/vec-strict-fptoint-512.ll
@@ -798,7 +798,7 @@ define <16 x i16> @strict_vector_fptosi_v16f32_to_v16i16(<16 x float> %a) #0 {
 define <16 x i16> @strict_vector_fptoui_v16f32_to_v16i16(<16 x float> %a) #0 {
 ; CHECK-LABEL: strict_vector_fptoui_v16f32_to_v16i16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vcvttps2dq %zmm0, %zmm0
+; CHECK-NEXT:    vcvttps2udq %zmm0, %zmm0
 ; CHECK-NEXT:    vpmovdw %zmm0, %ymm0
 ; CHECK-NEXT:    ret{{[l|q]}}
   %ret = call <16 x i16> @llvm.experimental.constrained.fptoui.v16i16.v16f32(<16 x float> %a,