setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v2i32, Custom);
// Fast v2f32 UINT_TO_FP( v2i32 ) custom conversion.
+ setOperationAction(ISD::SINT_TO_FP, MVT::v2f32, Custom);
+ setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v2f32, Custom);
setOperationAction(ISD::UINT_TO_FP, MVT::v2f32, Custom);
setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v2f32, Custom);
if (Subtarget.hasDQI()) {
// Fast v2f32 SINT_TO_FP( v2i64 ) custom conversion.
// v2f32 UINT_TO_FP is already custom under SSE2.
- setOperationAction(ISD::SINT_TO_FP, MVT::v2f32, Custom);
- setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v2f32, Custom);
assert(isOperationCustom(ISD::UINT_TO_FP, MVT::v2f32) &&
isOperationCustom(ISD::STRICT_UINT_TO_FP, MVT::v2f32) &&
"Unexpected operation action!");
}
return;
}
- if (SrcVT != MVT::v2i32 || IsSigned)
+ if (SrcVT != MVT::v2i32)
return;
+
+ if (IsSigned) {
+ if (!IsStrict)
+ return;
+
+ // Custom widen strict v2i32->v2f32 to avoid scalarization.
+ // FIXME: Should generic type legalizer do this?
+ Src = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4i32, Src,
+ DAG.getConstant(0, dl, MVT::v2i32));
+ SDValue Res = DAG.getNode(N->getOpcode(), dl, {MVT::v4f32, MVT::Other},
+ {N->getOperand(0), Src});
+ Results.push_back(Res);
+ Results.push_back(Res.getValue(1));
+ return;
+ }
+
assert(Subtarget.hasSSE2() && "Requires at least SSE2!");
SDValue ZExtIn = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v2i64, Src);
SDValue VBias =
define <2 x float> @sitofp_v2i32_v2f32(<2 x i32> %x) #0 {
; SSE-LABEL: sitofp_v2i32_v2f32:
; SSE: # %bb.0:
-; SSE-NEXT: movd %xmm0, %eax
-; SSE-NEXT: cvtsi2ss %eax, %xmm1
-; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
-; SSE-NEXT: movd %xmm0, %eax
-; SSE-NEXT: xorps %xmm0, %xmm0
-; SSE-NEXT: cvtsi2ss %eax, %xmm0
-; SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
-; SSE-NEXT: movaps %xmm1, %xmm0
+; SSE-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero
+; SSE-NEXT: cvtdq2ps %xmm0, %xmm0
; SSE-NEXT: ret{{[l|q]}}
;
; AVX-LABEL: sitofp_v2i32_v2f32:
; AVX: # %bb.0:
-; AVX-NEXT: vextractps $1, %xmm0, %eax
-; AVX-NEXT: vcvtsi2ss %eax, %xmm1, %xmm1
-; AVX-NEXT: vmovd %xmm0, %eax
-; AVX-NEXT: vcvtsi2ss %eax, %xmm2, %xmm0
-; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
+; AVX-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
+; AVX-NEXT: vcvtdq2ps %xmm0, %xmm0
; AVX-NEXT: ret{{[l|q]}}
%result = call <2 x float> @llvm.experimental.constrained.sitofp.v2f32.v2i32(<2 x i32> %x,
metadata !"round.dynamic",
define <2 x float> @constrained_vector_sitofp_v2f32_v2i32(<2 x i32> %x) #0 {
; CHECK-LABEL: constrained_vector_sitofp_v2f32_v2i32:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: movd %xmm0, %eax
-; CHECK-NEXT: cvtsi2ss %eax, %xmm1
-; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
-; CHECK-NEXT: movd %xmm0, %eax
-; CHECK-NEXT: xorps %xmm0, %xmm0
-; CHECK-NEXT: cvtsi2ss %eax, %xmm0
-; CHECK-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
-; CHECK-NEXT: movaps %xmm1, %xmm0
+; CHECK-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero
+; CHECK-NEXT: cvtdq2ps %xmm0, %xmm0
; CHECK-NEXT: retq
;
; AVX-LABEL: constrained_vector_sitofp_v2f32_v2i32:
; AVX: # %bb.0: # %entry
-; AVX-NEXT: vextractps $1, %xmm0, %eax
-; AVX-NEXT: vcvtsi2ss %eax, %xmm1, %xmm1
-; AVX-NEXT: vmovd %xmm0, %eax
-; AVX-NEXT: vcvtsi2ss %eax, %xmm2, %xmm0
-; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
+; AVX-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
+; AVX-NEXT: vcvtdq2ps %xmm0, %xmm0
; AVX-NEXT: retq
entry:
%result = call <2 x float>