setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v8i32, Legal);
setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v4i32, Legal);
setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v2i32, Custom);
- setOperationAction(ISD::UINT_TO_FP, MVT::v8i32, Legal);
- setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Legal);
- setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v8i32, Legal);
- setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4i32, Legal);
+ setOperationAction(ISD::UINT_TO_FP, MVT::v8i32,
+ Subtarget.hasVLX() ? Legal : Custom);
+ setOperationAction(ISD::UINT_TO_FP, MVT::v4i32,
+ Subtarget.hasVLX() ? Legal : Custom);
+ setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v8i32,
+ Subtarget.hasVLX() ? Legal : Custom);
+ setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4i32,
+ Subtarget.hasVLX() ? Legal : Custom);
for (auto VT : { MVT::v2i64, MVT::v4i64 }) {
setOperationAction(ISD::SMAX, VT, Legal);
if (Subtarget.hasDQI()) {
for (auto VT : { MVT::v2i64, MVT::v4i64 }) {
- setOperationAction(ISD::SINT_TO_FP, VT, Legal);
- setOperationAction(ISD::UINT_TO_FP, VT, Legal);
- setOperationAction(ISD::STRICT_SINT_TO_FP, VT, Legal);
- setOperationAction(ISD::STRICT_UINT_TO_FP, VT, Legal);
+ setOperationAction(ISD::SINT_TO_FP, VT,
+ Subtarget.hasVLX() ? Legal : Custom);
+ setOperationAction(ISD::UINT_TO_FP, VT,
+ Subtarget.hasVLX() ? Legal : Custom);
+ setOperationAction(ISD::STRICT_SINT_TO_FP, VT,
+ Subtarget.hasVLX() ? Legal : Custom);
+ setOperationAction(ISD::STRICT_UINT_TO_FP, VT,
+ Subtarget.hasVLX() ? Legal : Custom);
setOperationAction(ISD::FP_TO_SINT, VT, Legal);
setOperationAction(ISD::FP_TO_UINT, VT, Legal);
setOperationAction(ISD::STRICT_FP_TO_SINT, VT, Legal);
DAG.getIntPtrConstant(0, DL));
}
+static SDValue lowerINT_TO_FP_vXi64(SDValue Op, SelectionDAG &DAG,
+ const X86Subtarget &Subtarget) {
+ assert(Subtarget.hasDQI() && !Subtarget.hasVLX() && "Unexpected features");
+
+ SDLoc DL(Op);
+ bool IsStrict = Op->isStrictFPOpcode();
+ MVT VT = Op->getSimpleValueType(0);
+ SDValue Src = Op->getOperand(IsStrict ? 1 : 0);
+ MVT SrcVT = Src.getSimpleValueType();
+ assert((SrcVT == MVT::v2i64 || SrcVT == MVT::v4i64) &&
+ "Unsupported custom type");
+
+ // With AVX512DQ, but not VLX we need to widen to get a 512-bit result type.
+ assert((VT == MVT::v4f32 || VT == MVT::v2f64 || VT == MVT::v4f64) &&
+ "Unexpected VT!");
+ MVT WideVT = VT == MVT::v4f32 ? MVT::v8f32 : MVT::v8f64;
+
+ // Need to concat with zero vector for strict fp to avoid spurious
+ // exceptions.
+ SDValue Tmp =
+ IsStrict ? DAG.getConstant(0, DL, MVT::v8i64) : DAG.getUNDEF(MVT::v8i64);
+ Src = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, MVT::v8i64, Tmp, Src,
+ DAG.getIntPtrConstant(0, DL));
+ SDValue Res, Chain;
+ if (IsStrict) {
+ Res = DAG.getNode(Op.getOpcode(), DL, {WideVT, MVT::Other},
+ {Op->getOperand(0), Src});
+ Chain = Res.getValue(1);
+ } else {
+ Res = DAG.getNode(Op.getOpcode(), DL, WideVT, Src);
+ }
+
+ Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Res,
+ DAG.getIntPtrConstant(0, DL));
+
+ if (IsStrict)
+ return DAG.getMergeValues({Res, Chain}, DL);
+ return Res;
+}
+
SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op,
SelectionDAG &DAG) const {
bool IsStrict = Op->isStrictFPOpcode();
DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4i32, Src,
DAG.getUNDEF(SrcVT)));
}
+ if (SrcVT == MVT::v2i64 || SrcVT == MVT::v4i64)
+ return lowerINT_TO_FP_vXi64(Op, DAG, Subtarget);
+
return SDValue();
}
static SDValue lowerUINT_TO_FP_vXi32(SDValue Op, SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
+ SDLoc DL(Op);
+ bool IsStrict = Op->isStrictFPOpcode();
+ SDValue V = Op->getOperand(IsStrict ? 1 : 0);
+ MVT VecIntVT = V.getSimpleValueType();
+ assert((VecIntVT == MVT::v4i32 || VecIntVT == MVT::v8i32) &&
+ "Unsupported custom type");
+
+ if (Subtarget.hasAVX512()) {
+ // With AVX512, but not VLX we need to widen to get a 512-bit result type.
+ assert(!Subtarget.hasVLX() && "Unexpected features");
+ MVT VT = Op->getSimpleValueType(0);
+
+ // v8i32->v8f64 is legal with AVX512 so just return it.
+ if (VT == MVT::v8f64)
+ return Op;
+
+ assert((VT == MVT::v4f32 || VT == MVT::v8f32 || VT == MVT::v4f64) &&
+ "Unexpected VT!");
+ MVT WideVT = VT == MVT::v4f64 ? MVT::v8f64 : MVT::v16f32;
+ MVT WideIntVT = VT == MVT::v4f64 ? MVT::v8i32 : MVT::v16i32;
+ // Need to concat with zero vector for strict fp to avoid spurious
+ // exceptions.
+ SDValue Tmp =
+ IsStrict ? DAG.getConstant(0, DL, WideIntVT) : DAG.getUNDEF(WideIntVT);
+ V = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, WideIntVT, Tmp, V,
+ DAG.getIntPtrConstant(0, DL));
+ SDValue Res, Chain;
+ if (IsStrict) {
+ Res = DAG.getNode(ISD::STRICT_UINT_TO_FP, DL, {WideVT, MVT::Other},
+ {Op->getOperand(0), V});
+ Chain = Res.getValue(1);
+ } else {
+ Res = DAG.getNode(ISD::UINT_TO_FP, DL, WideVT, V);
+ }
+
+ Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Res,
+ DAG.getIntPtrConstant(0, DL));
+
+ if (IsStrict)
+ return DAG.getMergeValues({Res, Chain}, DL);
+ return Res;
+ }
+
// The algorithm is the following:
// #ifdef __SSE4_1__
// uint4 lo = _mm_blend_epi16( v, (uint4) 0x4b000000, 0xaa);
if (DAG.getTarget().Options.UnsafeFPMath)
return SDValue();
- SDLoc DL(Op);
- bool IsStrict = Op->isStrictFPOpcode();
- SDValue V = Op->getOperand(IsStrict ? 1 : 0);
- MVT VecIntVT = V.getSimpleValueType();
bool Is128 = VecIntVT == MVT::v4i32;
MVT VecFloatVT = Is128 ? MVT::v4f32 : MVT::v8f32;
// If we convert to something else than the supported type, e.g., to v4f64,
if (VecFloatVT != Op->getSimpleValueType(0))
return SDValue();
- assert((VecIntVT == MVT::v4i32 || VecIntVT == MVT::v8i32) &&
- "Unsupported custom type");
-
// In the #idef/#else code, we have in common:
// - The vector of constants:
// -- 0x4b000000
return lowerUINT_TO_FP_v2i32(Op, DAG, Subtarget, dl);
case MVT::v4i32:
case MVT::v8i32:
- assert(!Subtarget.hasAVX512());
return lowerUINT_TO_FP_vXi32(Op, DAG, Subtarget);
+ case MVT::v2i64:
+ case MVT::v4i64:
+ return lowerINT_TO_FP_vXi64(Op, DAG, Subtarget);
}
}
SDLoc dl(Op);
auto PtrVT = getPointerTy(DAG.getDataLayout());
MVT SrcVT = Src.getSimpleValueType();
- MVT DstVT = Op.getSimpleValueType();
+ MVT DstVT = Op->getSimpleValueType(0);
SDValue Chain = IsStrict ? Op.getOperand(0) : DAG.getEntryNode();
if (DstVT == MVT::f128)
(EXTRACT_SUBREG (v8i32 (VCVTTPD2UDQZrr
(v8f64 (INSERT_SUBREG (IMPLICIT_DEF),
VR256X:$src1, sub_ymm)))), sub_xmm)>;
-
-def : Pat<(v8f32 (any_uint_to_fp (v8i32 VR256X:$src1))),
- (EXTRACT_SUBREG (v16f32 (VCVTUDQ2PSZrr
- (v16i32 (INSERT_SUBREG (IMPLICIT_DEF),
- VR256X:$src1, sub_ymm)))), sub_ymm)>;
-
-def : Pat<(v4f32 (any_uint_to_fp (v4i32 VR128X:$src1))),
- (EXTRACT_SUBREG (v16f32 (VCVTUDQ2PSZrr
- (v16i32 (INSERT_SUBREG (IMPLICIT_DEF),
- VR128X:$src1, sub_xmm)))), sub_xmm)>;
-
-def : Pat<(v4f64 (any_uint_to_fp (v4i32 VR128X:$src1))),
- (EXTRACT_SUBREG (v8f64 (VCVTUDQ2PDZrr
- (v8i32 (INSERT_SUBREG (IMPLICIT_DEF),
- VR128X:$src1, sub_xmm)))), sub_ymm)>;
}
let Predicates = [HasVLX] in {
(EXTRACT_SUBREG (v8i64 (VCVTTPD2UQQZrr
(v8f64 (INSERT_SUBREG (IMPLICIT_DEF),
VR256X:$src1, sub_ymm)))), sub_ymm)>;
-
-def : Pat<(v4f32 (any_sint_to_fp (v4i64 VR256X:$src1))),
- (EXTRACT_SUBREG (v8f32 (VCVTQQ2PSZrr
- (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
- VR256X:$src1, sub_ymm)))), sub_xmm)>;
-
-def : Pat<(v2f64 (any_sint_to_fp (v2i64 VR128X:$src1))),
- (EXTRACT_SUBREG (v8f64 (VCVTQQ2PDZrr
- (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
- VR128X:$src1, sub_xmm)))), sub_xmm)>;
-
-def : Pat<(v4f64 (any_sint_to_fp (v4i64 VR256X:$src1))),
- (EXTRACT_SUBREG (v8f64 (VCVTQQ2PDZrr
- (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
- VR256X:$src1, sub_ymm)))), sub_ymm)>;
-
-def : Pat<(v4f32 (any_uint_to_fp (v4i64 VR256X:$src1))),
- (EXTRACT_SUBREG (v8f32 (VCVTUQQ2PSZrr
- (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
- VR256X:$src1, sub_ymm)))), sub_xmm)>;
-
-def : Pat<(v2f64 (any_uint_to_fp (v2i64 VR128X:$src1))),
- (EXTRACT_SUBREG (v8f64 (VCVTUQQ2PDZrr
- (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
- VR128X:$src1, sub_xmm)))), sub_xmm)>;
-
-def : Pat<(v4f64 (any_uint_to_fp (v4i64 VR256X:$src1))),
- (EXTRACT_SUBREG (v8f64 (VCVTUQQ2PDZrr
- (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
- VR256X:$src1, sub_ymm)))), sub_ymm)>;
}
//===----------------------------------------------------------------------===//
; AVX1-64-NEXT: vaddps %xmm0, %xmm1, %xmm0
; AVX1-64-NEXT: retq
;
-; FIXME: This is an unsafe behavior for strict FP
; AVX512F-LABEL: uitofp_v4i32_v4f32:
; AVX512F: # %bb.0:
-; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
+; AVX512F-NEXT: vmovaps %xmm0, %xmm0
; AVX512F-NEXT: vcvtudq2ps %zmm0, %zmm0
; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512F-NEXT: vzeroupper
; AVX512VL-NEXT: vcvtudq2ps %xmm0, %xmm0
; AVX512VL-NEXT: ret{{[l|q]}}
;
-; FIXME: This is an unsafe behavior for strict FP
; AVX512DQ-LABEL: uitofp_v4i32_v4f32:
; AVX512DQ: # %bb.0:
-; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
+; AVX512DQ-NEXT: vmovaps %xmm0, %xmm0
; AVX512DQ-NEXT: vcvtudq2ps %zmm0, %zmm0
; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512DQ-NEXT: vzeroupper
; AVX-64-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; AVX-64-NEXT: retq
;
-; FIXME: This is an unsafe behavior for strict FP
; AVX512DQ-LABEL: sitofp_v2i64_v2f64:
; AVX512DQ: # %bb.0:
-; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
+; AVX512DQ-NEXT: vmovaps %xmm0, %xmm0
; AVX512DQ-NEXT: vcvtqq2pd %zmm0, %zmm0
; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512DQ-NEXT: vzeroupper
; AVX512VL-64-NEXT: vaddpd %xmm0, %xmm1, %xmm0
; AVX512VL-64-NEXT: retq
;
-; FIXME: This is an unsafe behavior for strict FP
; AVX512DQ-LABEL: uitofp_v2i64_v2f64:
; AVX512DQ: # %bb.0:
-; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
+; AVX512DQ-NEXT: vmovaps %xmm0, %xmm0
; AVX512DQ-NEXT: vcvtuqq2pd %zmm0, %zmm0
; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512DQ-NEXT: vzeroupper
; AVX1-NEXT: vaddps %ymm0, %ymm1, %ymm0
; AVX1-NEXT: ret{{[l|q]}}
;
-; FIXME: This is an unsafe behavior for strict FP
; AVX512F-LABEL: uitofp_v8i32_v8f32:
; AVX512F: # %bb.0:
-; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
+; AVX512F-NEXT: vmovaps %ymm0, %ymm0
; AVX512F-NEXT: vcvtudq2ps %zmm0, %zmm0
; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
; AVX512F-NEXT: ret{{[l|q]}}
; AVX512VL-NEXT: vcvtudq2ps %ymm0, %ymm0
; AVX512VL-NEXT: ret{{[l|q]}}
;
-; FIXME: This is an unsafe behavior for strict FP
; AVX512DQ-LABEL: uitofp_v8i32_v8f32:
; AVX512DQ: # %bb.0:
-; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
+; AVX512DQ-NEXT: vmovaps %ymm0, %ymm0
; AVX512DQ-NEXT: vcvtudq2ps %zmm0, %zmm0
; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
; AVX512DQ-NEXT: ret{{[l|q]}}
; AVX1-NEXT: vaddpd %ymm0, %ymm1, %ymm0
; AVX1-NEXT: ret{{[l|q]}}
;
-; FIXME: This is an unsafe behavior for strict FP
; AVX512F-LABEL: uitofp_v4i32_v4f64:
; AVX512F: # %bb.0:
-; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
+; AVX512F-NEXT: vmovaps %xmm0, %xmm0
; AVX512F-NEXT: vcvtudq2pd %ymm0, %zmm0
; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
; AVX512F-NEXT: ret{{[l|q]}}
; AVX512VL-NEXT: vcvtudq2pd %xmm0, %ymm0
; AVX512VL-NEXT: ret{{[l|q]}}
;
-; FIXME: This is an unsafe behavior for strict FP
; AVX512DQ-LABEL: uitofp_v4i32_v4f64:
; AVX512DQ: # %bb.0:
-; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
+; AVX512DQ-NEXT: vmovaps %xmm0, %xmm0
; AVX512DQ-NEXT: vcvtudq2pd %ymm0, %zmm0
; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
; AVX512DQ-NEXT: ret{{[l|q]}}
; AVX-64-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX-64-NEXT: retq
;
-; FIXME: This is an unsafe behavior for strict FP
; AVX512DQ-LABEL: sitofp_v4i64_v4f64:
; AVX512DQ: # %bb.0:
-; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
+; AVX512DQ-NEXT: vmovaps %ymm0, %ymm0
; AVX512DQ-NEXT: vcvtqq2pd %zmm0, %zmm0
; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
; AVX512DQ-NEXT: ret{{[l|q]}}
; AVX512VL-64-NEXT: vaddpd %ymm0, %ymm1, %ymm0
; AVX512VL-64-NEXT: retq
;
-; FIXME: This is an unsafe behavior for strict FP
; AVX512DQ-LABEL: uitofp_v4i64_v4f64:
; AVX512DQ: # %bb.0:
-; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
+; AVX512DQ-NEXT: vmovaps %ymm0, %ymm0
; AVX512DQ-NEXT: vcvtuqq2pd %zmm0, %zmm0
; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
; AVX512DQ-NEXT: ret{{[l|q]}}
;
; AVX512DQ-LABEL: constrained_vector_sitofp_v2f64_v2i64:
; AVX512DQ: # %bb.0: # %entry
-; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
+; AVX512DQ-NEXT: vmovaps %xmm0, %xmm0
; AVX512DQ-NEXT: vcvtqq2pd %zmm0, %zmm0
; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512DQ-NEXT: vzeroupper
;
; AVX512DQ-LABEL: constrained_vector_sitofp_v4f64_v4i64:
; AVX512DQ: # %bb.0: # %entry
-; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
+; AVX512DQ-NEXT: vmovaps %ymm0, %ymm0
; AVX512DQ-NEXT: vcvtqq2pd %zmm0, %zmm0
; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
; AVX512DQ-NEXT: retq
;
; AVX512DQ-LABEL: constrained_vector_sitofp_v4f32_v4i64:
; AVX512DQ: # %bb.0: # %entry
-; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
+; AVX512DQ-NEXT: vmovaps %ymm0, %ymm0
; AVX512DQ-NEXT: vcvtqq2ps %zmm0, %ymm0
; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
; AVX512DQ-NEXT: vzeroupper
;
; AVX512DQ-LABEL: constrained_vector_uitofp_v2f64_v2i64:
; AVX512DQ: # %bb.0: # %entry
-; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
+; AVX512DQ-NEXT: vmovaps %xmm0, %xmm0
; AVX512DQ-NEXT: vcvtuqq2pd %zmm0, %zmm0
; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512DQ-NEXT: vzeroupper
;
; AVX512-LABEL: constrained_vector_uitofp_v4f64_v4i32:
; AVX512: # %bb.0: # %entry
-; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
+; AVX512-NEXT: vmovaps %xmm0, %xmm0
; AVX512-NEXT: vcvtudq2pd %ymm0, %zmm0
; AVX512-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
; AVX512-NEXT: retq
;
; AVX512-LABEL: constrained_vector_uitofp_v4f32_v4i32:
; AVX512: # %bb.0: # %entry
-; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
+; AVX512-NEXT: vmovaps %xmm0, %xmm0
; AVX512-NEXT: vcvtudq2ps %zmm0, %zmm0
; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512-NEXT: vzeroupper
;
; AVX512DQ-LABEL: constrained_vector_uitofp_v4f64_v4i64:
; AVX512DQ: # %bb.0: # %entry
-; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
+; AVX512DQ-NEXT: vmovaps %ymm0, %ymm0
; AVX512DQ-NEXT: vcvtuqq2pd %zmm0, %zmm0
; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
; AVX512DQ-NEXT: retq
;
; AVX512DQ-LABEL: constrained_vector_uitofp_v4f32_v4i64:
; AVX512DQ: # %bb.0: # %entry
-; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
+; AVX512DQ-NEXT: vmovaps %ymm0, %ymm0
; AVX512DQ-NEXT: vcvtuqq2ps %zmm0, %ymm0
; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
; AVX512DQ-NEXT: vzeroupper