if (!Subtarget.hasF16C() || Subtarget.useSoftFloat())
return SDValue();
- if (Subtarget.hasFP16())
- return SDValue();
-
bool IsStrict = N->isStrictFPOpcode();
EVT VT = N->getValueType(0);
SDValue Src = N->getOperand(IsStrict ? 1 : 0);
SrcVT.getVectorElementType() != MVT::f32)
return SDValue();
+ SDLoc dl(N);
+
+ SDValue Cvt, Chain;
unsigned NumElts = VT.getVectorNumElements();
- if (NumElts == 1 || !isPowerOf2_32(NumElts))
+ if (Subtarget.hasFP16()) {
+ // Combine (v8f16 fp_round(concat_vectors(v4f32 (xint_to_fp v4i64), ..)))
+ // into (v8f16 vector_shuffle(v8f16 (CVTXI2P v4i64), ..))
+ if (NumElts == 8 && Src.getOpcode() == ISD::CONCAT_VECTORS) {
+ SDValue Cvt0, Cvt1;
+ SDValue Op0 = Src.getOperand(0);
+ SDValue Op1 = Src.getOperand(1);
+ bool IsOp0Strict = Op0->isStrictFPOpcode();
+ if (Op0.getOpcode() != Op1.getOpcode() ||
+ Op0.getOperand(IsOp0Strict ? 1 : 0).getValueType() != MVT::v4i64 ||
+ Op1.getOperand(IsOp0Strict ? 1 : 0).getValueType() != MVT::v4i64) {
+ return SDValue();
+ }
+ int Mask[8] = {0, 1, 2, 3, 8, 9, 10, 11};
+ if (IsStrict) {
+ assert(IsOp0Strict && "Op0 must be strict node");
+ unsigned Opc = Op0.getOpcode() == ISD::STRICT_SINT_TO_FP
+ ? X86ISD::STRICT_CVTSI2P
+ : X86ISD::STRICT_CVTUI2P;
+ Cvt0 = DAG.getNode(Opc, dl, {MVT::v8f16, MVT::Other},
+ {Op0.getOperand(0), Op0.getOperand(1)});
+ Cvt1 = DAG.getNode(Opc, dl, {MVT::v8f16, MVT::Other},
+ {Op1.getOperand(0), Op1.getOperand(1)});
+ Cvt = DAG.getVectorShuffle(MVT::v8f16, dl, Cvt0, Cvt1, Mask);
+ return DAG.getMergeValues({Cvt, Cvt0.getValue(1)}, dl);
+ }
+ unsigned Opc = Op0.getOpcode() == ISD::SINT_TO_FP ? X86ISD::CVTSI2P
+ : X86ISD::CVTUI2P;
+ Cvt0 = DAG.getNode(Opc, dl, MVT::v8f16, Op0.getOperand(0));
+ Cvt1 = DAG.getNode(Opc, dl, MVT::v8f16, Op1.getOperand(0));
+ return Cvt = DAG.getVectorShuffle(MVT::v8f16, dl, Cvt0, Cvt1, Mask);
+ }
return SDValue();
+ }
- SDLoc dl(N);
+ if (NumElts == 1 || !isPowerOf2_32(NumElts))
+ return SDValue();
// Widen to at least 4 input elements.
if (NumElts < 4)
DAG.getConstantFP(0.0, dl, SrcVT));
// Destination is v8i16 with at least 8 elements.
- EVT CvtVT = EVT::getVectorVT(*DAG.getContext(), MVT::i16,
- std::max(8U, NumElts));
- SDValue Cvt, Chain;
+ EVT CvtVT =
+ EVT::getVectorVT(*DAG.getContext(), MVT::i16, std::max(8U, NumElts));
SDValue Rnd = DAG.getTargetConstant(4, dl, MVT::i32);
if (IsStrict) {
Cvt = DAG.getNode(X86ISD::STRICT_CVTPS2PH, dl, {CvtVT, MVT::Other},
define <8 x half> @s64tof16(<8 x i64> %a) #0 {
; CHECK-LABEL: s64tof16:
; CHECK: # %bb.0:
-; CHECK-NEXT: vcvtqq2ps %ymm0, %xmm0
-; CHECK-NEXT: vcvtqq2ps %ymm1, %xmm1
-; CHECK-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; CHECK-NEXT: vcvtps2phx %ymm0, %xmm0
+; CHECK-NEXT: vcvtqq2ph %ymm1, %xmm1
+; CHECK-NEXT: vcvtqq2ph %ymm0, %xmm0
+; CHECK-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: ret{{[l|q]}}
%1 = sitofp <8 x i64> %a to <8 x half>
define <8 x half> @u64tof16(<8 x i64> %a) #0 {
; CHECK-LABEL: u64tof16:
; CHECK: # %bb.0:
-; CHECK-NEXT: vcvtuqq2ps %ymm0, %xmm0
-; CHECK-NEXT: vcvtuqq2ps %ymm1, %xmm1
-; CHECK-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; CHECK-NEXT: vcvtps2phx %ymm0, %xmm0
+; CHECK-NEXT: vcvtuqq2ph %ymm1, %xmm1
+; CHECK-NEXT: vcvtuqq2ph %ymm0, %xmm0
+; CHECK-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: ret{{[l|q]}}
%1 = uitofp <8 x i64> %a to <8 x half>
define <8 x half> @sitofp_v8i64_v8f16(<8 x i64> %x) #1 {
; CHECK-LABEL: sitofp_v8i64_v8f16:
; CHECK: # %bb.0:
-; CHECK-NEXT: vcvtqq2ps %ymm0, %xmm0
-; CHECK-NEXT: vcvtqq2ps %ymm1, %xmm1
-; CHECK-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; CHECK-NEXT: vcvtps2phx %ymm0, %xmm0
+; CHECK-NEXT: vcvtqq2ph %ymm1, %xmm1
+; CHECK-NEXT: vcvtqq2ph %ymm0, %xmm0
+; CHECK-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: ret{{[l|q]}}
%result = call <8 x half> @llvm.experimental.constrained.sitofp.v8f16.v8i64(<8 x i64> %x,
define <8 x half> @uitofp_v8i64_v8f16(<8 x i64> %x) #1 {
; CHECK-LABEL: uitofp_v8i64_v8f16:
; CHECK: # %bb.0:
-; CHECK-NEXT: vcvtuqq2ps %ymm0, %xmm0
-; CHECK-NEXT: vcvtuqq2ps %ymm1, %xmm1
-; CHECK-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; CHECK-NEXT: vcvtps2phx %ymm0, %xmm0
+; CHECK-NEXT: vcvtuqq2ph %ymm1, %xmm1
+; CHECK-NEXT: vcvtuqq2ph %ymm0, %xmm0
+; CHECK-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: ret{{[l|q]}}
%result = call <8 x half> @llvm.experimental.constrained.uitofp.v8f16.v8i64(<8 x i64> %x,