setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v2i32, Custom);
// Fast v2f32 UINT_TO_FP( v2i32 ) custom conversion.
- // FIXME: Does this apply to STRICT_UINT_TO_FP?
setOperationAction(ISD::UINT_TO_FP, MVT::v2f32, Custom);
+ setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v2f32, Custom);
setOperationAction(ISD::FP_EXTEND, MVT::v2f32, Custom);
setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v2f32, Custom);
setOperationAction(ISD::SINT_TO_FP, MVT::v2f32, Custom);
setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v2f32, Custom);
assert(isOperationCustom(ISD::UINT_TO_FP, MVT::v2f32) &&
+ isOperationCustom(ISD::STRICT_UINT_TO_FP, MVT::v2f32) &&
"Unexpected operation action!");
- setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v2f32, Custom);
// v2i64 FP_TO_S/UINT(v2f32) custom conversion.
setOperationAction(ISD::FP_TO_SINT, MVT::v2f32, Custom);
setOperationAction(ISD::FP_TO_UINT, MVT::v2f32, Custom);
}
return;
}
- // FIXME: Is this safe for strict fp?
- if (SrcVT != MVT::v2i32 || IsSigned || IsStrict)
+ if (SrcVT != MVT::v2i32 || IsSigned)
return;
assert(Subtarget.hasSSE2() && "Requires at least SSE2!");
SDValue ZExtIn = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v2i64, Src);
SDValue Or = DAG.getNode(ISD::OR, dl, MVT::v2i64, ZExtIn,
DAG.getBitcast(MVT::v2i64, VBias));
Or = DAG.getBitcast(MVT::v2f64, Or);
- // TODO: Are there any fast-math-flags to propagate here?
- SDValue Sub = DAG.getNode(ISD::FSUB, dl, MVT::v2f64, Or, VBias);
- Results.push_back(DAG.getNode(X86ISD::VFPROUND, dl, MVT::v4f32, Sub));
+ if (IsStrict) {
+ SDValue Sub = DAG.getNode(ISD::STRICT_FSUB, dl, {MVT::v2f64, MVT::Other},
+ {N->getOperand(0), Or, VBias});
+ SDValue Res = DAG.getNode(X86ISD::STRICT_VFPROUND, dl,
+ {MVT::v4f32, MVT::Other},
+ {Sub.getValue(1), Sub});
+ Results.push_back(Res);
+ Results.push_back(Res.getValue(1));
+ } else {
+ // TODO: Are there any fast-math-flags to propagate here?
+ SDValue Sub = DAG.getNode(ISD::FSUB, dl, MVT::v2f64, Or, VBias);
+ Results.push_back(DAG.getNode(X86ISD::VFPROUND, dl, MVT::v4f32, Sub));
+ }
return;
}
case ISD::STRICT_FP_ROUND:
}
define <2 x float> @uitofp_v2i32_v2f32(<2 x i32> %x) #0 {
-; SSE-32-LABEL: uitofp_v2i32_v2f32:
-; SSE-32: # %bb.0:
-; SSE-32-NEXT: xorps %xmm2, %xmm2
-; SSE-32-NEXT: xorps %xmm1, %xmm1
-; SSE-32-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
-; SSE-32-NEXT: movsd {{.*#+}} xmm3 = mem[0],zero
-; SSE-32-NEXT: orps %xmm3, %xmm1
-; SSE-32-NEXT: subsd %xmm3, %xmm1
-; SSE-32-NEXT: cvtsd2ss %xmm1, %xmm1
-; SSE-32-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
-; SSE-32-NEXT: movss {{.*#+}} xmm2 = xmm0[0],xmm2[1,2,3]
-; SSE-32-NEXT: orps %xmm3, %xmm2
-; SSE-32-NEXT: subsd %xmm3, %xmm2
-; SSE-32-NEXT: xorps %xmm0, %xmm0
-; SSE-32-NEXT: cvtsd2ss %xmm2, %xmm0
-; SSE-32-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
-; SSE-32-NEXT: movaps %xmm1, %xmm0
-; SSE-32-NEXT: retl
-;
-; SSE-64-LABEL: uitofp_v2i32_v2f32:
-; SSE-64: # %bb.0:
-; SSE-64-NEXT: movd %xmm0, %eax
-; SSE-64-NEXT: cvtsi2ss %rax, %xmm1
-; SSE-64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
-; SSE-64-NEXT: movd %xmm0, %eax
-; SSE-64-NEXT: xorps %xmm0, %xmm0
-; SSE-64-NEXT: cvtsi2ss %rax, %xmm0
-; SSE-64-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
-; SSE-64-NEXT: movaps %xmm1, %xmm0
-; SSE-64-NEXT: retq
-;
-; AVX1-32-LABEL: uitofp_v2i32_v2f32:
-; AVX1-32: # %bb.0:
-; AVX1-32-NEXT: vxorps %xmm1, %xmm1, %xmm1
-; AVX1-32-NEXT: vblendps {{.*#+}} xmm2 = xmm0[0],xmm1[1,2,3]
-; AVX1-32-NEXT: vmovsd {{.*#+}} xmm3 = mem[0],zero
-; AVX1-32-NEXT: vorps %xmm3, %xmm2, %xmm2
-; AVX1-32-NEXT: vsubsd %xmm3, %xmm2, %xmm2
-; AVX1-32-NEXT: vcvtsd2ss %xmm2, %xmm2, %xmm2
-; AVX1-32-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,1,2,3]
-; AVX1-32-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
-; AVX1-32-NEXT: vorps %xmm3, %xmm0, %xmm0
-; AVX1-32-NEXT: vsubsd %xmm3, %xmm0, %xmm0
-; AVX1-32-NEXT: vcvtsd2ss %xmm0, %xmm0, %xmm0
-; AVX1-32-NEXT: vinsertps {{.*#+}} xmm0 = xmm2[0],xmm0[0],xmm2[2,3]
-; AVX1-32-NEXT: retl
-;
-; AVX1-64-LABEL: uitofp_v2i32_v2f32:
-; AVX1-64: # %bb.0:
-; AVX1-64-NEXT: vextractps $1, %xmm0, %eax
-; AVX1-64-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1
-; AVX1-64-NEXT: vmovd %xmm0, %eax
-; AVX1-64-NEXT: vcvtsi2ss %rax, %xmm2, %xmm0
-; AVX1-64-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
-; AVX1-64-NEXT: retq
-;
-; AVX512F-LABEL: uitofp_v2i32_v2f32:
-; AVX512F: # %bb.0:
-; AVX512F-NEXT: vextractps $1, %xmm0, %eax
-; AVX512F-NEXT: vcvtusi2ss %eax, %xmm1, %xmm1
-; AVX512F-NEXT: vmovd %xmm0, %eax
-; AVX512F-NEXT: vcvtusi2ss %eax, %xmm2, %xmm0
-; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
-; AVX512F-NEXT: ret{{[l|q]}}
-;
-; AVX512VL-LABEL: uitofp_v2i32_v2f32:
-; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vextractps $1, %xmm0, %eax
-; AVX512VL-NEXT: vcvtusi2ss %eax, %xmm1, %xmm1
-; AVX512VL-NEXT: vmovd %xmm0, %eax
-; AVX512VL-NEXT: vcvtusi2ss %eax, %xmm2, %xmm0
-; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
-; AVX512VL-NEXT: ret{{[l|q]}}
-;
-; AVX512DQ-LABEL: uitofp_v2i32_v2f32:
-; AVX512DQ: # %bb.0:
-; AVX512DQ-NEXT: vextractps $1, %xmm0, %eax
-; AVX512DQ-NEXT: vcvtusi2ss %eax, %xmm1, %xmm1
-; AVX512DQ-NEXT: vmovd %xmm0, %eax
-; AVX512DQ-NEXT: vcvtusi2ss %eax, %xmm2, %xmm0
-; AVX512DQ-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
-; AVX512DQ-NEXT: ret{{[l|q]}}
+; SSE-LABEL: uitofp_v2i32_v2f32:
+; SSE: # %bb.0:
+; SSE-NEXT: xorpd %xmm1, %xmm1
+; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE-NEXT: movapd {{.*#+}} xmm1 = [4.503599627370496E+15,4.503599627370496E+15]
+; SSE-NEXT: orpd %xmm1, %xmm0
+; SSE-NEXT: subpd %xmm1, %xmm0
+; SSE-NEXT: cvtpd2ps %xmm0, %xmm0
+; SSE-NEXT: ret{{[l|q]}}
;
-; AVX512DQVL-LABEL: uitofp_v2i32_v2f32:
-; AVX512DQVL: # %bb.0:
-; AVX512DQVL-NEXT: vextractps $1, %xmm0, %eax
-; AVX512DQVL-NEXT: vcvtusi2ss %eax, %xmm1, %xmm1
-; AVX512DQVL-NEXT: vmovd %xmm0, %eax
-; AVX512DQVL-NEXT: vcvtusi2ss %eax, %xmm2, %xmm0
-; AVX512DQVL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
-; AVX512DQVL-NEXT: ret{{[l|q]}}
+; AVX-LABEL: uitofp_v2i32_v2f32:
+; AVX: # %bb.0:
+; AVX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
+; AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [4.503599627370496E+15,4.503599627370496E+15]
+; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vsubpd %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vcvtpd2ps %xmm0, %xmm0
+; AVX-NEXT: ret{{[l|q]}}
%result = call <2 x float> @llvm.experimental.constrained.uitofp.v2f32.v2i32(<2 x i32> %x,
metadata !"round.dynamic",
metadata !"fpexcept.strict") #0
define <2 x float> @constrained_vector_uitofp_v2f32_v2i32(<2 x i32> %x) #0 {
; CHECK-LABEL: constrained_vector_uitofp_v2f32_v2i32:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: movd %xmm0, %eax
-; CHECK-NEXT: cvtsi2ss %rax, %xmm1
-; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
-; CHECK-NEXT: movd %xmm0, %eax
-; CHECK-NEXT: xorps %xmm0, %xmm0
-; CHECK-NEXT: cvtsi2ss %rax, %xmm0
-; CHECK-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
-; CHECK-NEXT: movaps %xmm1, %xmm0
+; CHECK-NEXT: xorpd %xmm1, %xmm1
+; CHECK-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; CHECK-NEXT: movapd {{.*#+}} xmm1 = [4.503599627370496E+15,4.503599627370496E+15]
+; CHECK-NEXT: orpd %xmm1, %xmm0
+; CHECK-NEXT: subpd %xmm1, %xmm0
+; CHECK-NEXT: cvtpd2ps %xmm0, %xmm0
; CHECK-NEXT: retq
;
-; AVX1-LABEL: constrained_vector_uitofp_v2f32_v2i32:
-; AVX1: # %bb.0: # %entry
-; AVX1-NEXT: vextractps $1, %xmm0, %eax
-; AVX1-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1
-; AVX1-NEXT: vmovd %xmm0, %eax
-; AVX1-NEXT: vcvtsi2ss %rax, %xmm2, %xmm0
-; AVX1-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
-; AVX1-NEXT: retq
-;
-; AVX512-LABEL: constrained_vector_uitofp_v2f32_v2i32:
-; AVX512: # %bb.0: # %entry
-; AVX512-NEXT: vextractps $1, %xmm0, %eax
-; AVX512-NEXT: vcvtusi2ss %eax, %xmm1, %xmm1
-; AVX512-NEXT: vmovd %xmm0, %eax
-; AVX512-NEXT: vcvtusi2ss %eax, %xmm2, %xmm0
-; AVX512-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
-; AVX512-NEXT: retq
+; AVX-LABEL: constrained_vector_uitofp_v2f32_v2i32:
+; AVX: # %bb.0: # %entry
+; AVX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
+; AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [4.503599627370496E+15,4.503599627370496E+15]
+; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vsubpd %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vcvtpd2ps %xmm0, %xmm0
+; AVX-NEXT: retq
entry:
%result = call <2 x float>
@llvm.experimental.constrained.uitofp.v2f32.v2i32(<2 x i32> %x,