[X86] Custom widen v2i32->v2f32 strict_sint_to_fp to avoid scalarization.

author Craig Topper <craig.topper@gmail.com>

Fri, 27 Dec 2019 07:57:33 +0000 (23:57 -0800)

committer Craig Topper <craig.topper@gmail.com>

Fri, 27 Dec 2019 08:28:44 +0000 (00:28 -0800)
author Craig Topper <craig.topper@gmail.com>
Fri, 27 Dec 2019 07:57:33 +0000 (23:57 -0800)
committer Craig Topper <craig.topper@gmail.com>
Fri, 27 Dec 2019 08:28:44 +0000 (00:28 -0800)
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp

index 530a790..cfb5e11 100644 (file)
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -987,6 +987,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
      setOperationAction(ISD::STRICT_UINT_TO_FP,  MVT::v2i32, Custom);
  
      // Fast v2f32 UINT_TO_FP( v2i32 ) custom conversion.
+    setOperationAction(ISD::SINT_TO_FP,         MVT::v2f32, Custom);
+    setOperationAction(ISD::STRICT_SINT_TO_FP,  MVT::v2f32, Custom);
      setOperationAction(ISD::UINT_TO_FP,         MVT::v2f32, Custom);
      setOperationAction(ISD::STRICT_UINT_TO_FP,  MVT::v2f32, Custom);
  
@@ -1848,8 +1850,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
      if (Subtarget.hasDQI()) {
        // Fast v2f32 SINT_TO_FP( v2i64 ) custom conversion.
        // v2f32 UINT_TO_FP is already custom under SSE2.
-      setOperationAction(ISD::SINT_TO_FP,        MVT::v2f32, Custom);
-      setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v2f32, Custom);
        assert(isOperationCustom(ISD::UINT_TO_FP, MVT::v2f32) &&
               isOperationCustom(ISD::STRICT_UINT_TO_FP, MVT::v2f32) &&
               "Unexpected operation action!");
@@ -29009,8 +29009,24 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
        }
        return;
      }
-    if (SrcVT != MVT::v2i32 || IsSigned)
+    if (SrcVT != MVT::v2i32)
        return;
+
+    if (IsSigned) {
+      if (!IsStrict)
+        return;
+
+      // Custom widen strict v2i32->v2f32 to avoid scalarization.
+      // FIXME: Should generic type legalizer do this?
+      Src = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4i32, Src,
+                        DAG.getConstant(0, dl, MVT::v2i32));
+      SDValue Res = DAG.getNode(N->getOpcode(), dl, {MVT::v4f32, MVT::Other},
+                                {N->getOperand(0), Src});
+      Results.push_back(Res);
+      Results.push_back(Res.getValue(1));
+      return;
+    }
+
      assert(Subtarget.hasSSE2() && "Requires at least SSE2!");
      SDValue ZExtIn = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v2i64, Src);
      SDValue VBias =
diff --git a/llvm/test/CodeGen/X86/vec-strict-inttofp-128.ll b/llvm/test/CodeGen/X86/vec-strict-inttofp-128.ll

index daf4bb5..9cc2c2f 100644 (file)
--- a/llvm/test/CodeGen/X86/vec-strict-inttofp-128.ll
+++ b/llvm/test/CodeGen/X86/vec-strict-inttofp-128.ll
@@ -38,23 +38,14 @@ declare <2 x double> @llvm.experimental.constrained.uitofp.v2f64.v2i64(<2 x i64>
  define <2 x float> @sitofp_v2i32_v2f32(<2 x i32> %x) #0 {
  ; SSE-LABEL: sitofp_v2i32_v2f32:
  ; SSE:       # %bb.0:
-; SSE-NEXT:    movd %xmm0, %eax
-; SSE-NEXT:    cvtsi2ss %eax, %xmm1
-; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
-; SSE-NEXT:    movd %xmm0, %eax
-; SSE-NEXT:    xorps %xmm0, %xmm0
-; SSE-NEXT:    cvtsi2ss %eax, %xmm0
-; SSE-NEXT:    unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
-; SSE-NEXT:    movaps %xmm1, %xmm0
+; SSE-NEXT:    movq {{.*#+}} xmm0 = xmm0[0],zero
+; SSE-NEXT:    cvtdq2ps %xmm0, %xmm0
  ; SSE-NEXT:    ret{{[l|q]}}
  ;
  ; AVX-LABEL: sitofp_v2i32_v2f32:
  ; AVX:       # %bb.0:
-; AVX-NEXT:    vextractps $1, %xmm0, %eax
-; AVX-NEXT:    vcvtsi2ss %eax, %xmm1, %xmm1
-; AVX-NEXT:    vmovd %xmm0, %eax
-; AVX-NEXT:    vcvtsi2ss %eax, %xmm2, %xmm0
-; AVX-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
+; AVX-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
+; AVX-NEXT:    vcvtdq2ps %xmm0, %xmm0
  ; AVX-NEXT:    ret{{[l|q]}}
   %result = call <2 x float> @llvm.experimental.constrained.sitofp.v2f32.v2i32(<2 x i32> %x,
                                                                metadata !"round.dynamic",
diff --git a/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll b/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll

index 478594f..73371bb 100644 (file)
--- a/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll
@@ -6220,23 +6220,14 @@ entry:
  define <2 x float> @constrained_vector_sitofp_v2f32_v2i32(<2 x i32> %x) #0 {
  ; CHECK-LABEL: constrained_vector_sitofp_v2f32_v2i32:
  ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    movd %xmm0, %eax
-; CHECK-NEXT:    cvtsi2ss %eax, %xmm1
-; CHECK-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
-; CHECK-NEXT:    movd %xmm0, %eax
-; CHECK-NEXT:    xorps %xmm0, %xmm0
-; CHECK-NEXT:    cvtsi2ss %eax, %xmm0
-; CHECK-NEXT:    unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
-; CHECK-NEXT:    movaps %xmm1, %xmm0
+; CHECK-NEXT:    movq {{.*#+}} xmm0 = xmm0[0],zero
+; CHECK-NEXT:    cvtdq2ps %xmm0, %xmm0
  ; CHECK-NEXT:    retq
  ;
  ; AVX-LABEL: constrained_vector_sitofp_v2f32_v2i32:
  ; AVX:       # %bb.0: # %entry
-; AVX-NEXT:    vextractps $1, %xmm0, %eax
-; AVX-NEXT:    vcvtsi2ss %eax, %xmm1, %xmm1
-; AVX-NEXT:    vmovd %xmm0, %eax
-; AVX-NEXT:    vcvtsi2ss %eax, %xmm2, %xmm0
-; AVX-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
+; AVX-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
+; AVX-NEXT:    vcvtdq2ps %xmm0, %xmm0
  ; AVX-NEXT:    retq
  entry:
    %result = call <2 x float>
author	Craig Topper <craig.topper@gmail.com>
	Fri, 27 Dec 2019 07:57:33 +0000 (23:57 -0800)
committer	Craig Topper <craig.topper@gmail.com>
	Fri, 27 Dec 2019 08:28:44 +0000 (00:28 -0800)
llvm/lib/Target/X86/X86ISelLowering.cpp		patch \| blob \| history
llvm/test/CodeGen/X86/vec-strict-inttofp-128.ll		patch \| blob \| history
llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll		patch \| blob \| history