Both the (V)CVTDQ2PD (i32 to f64) and (V)CVTUDQ2PD (u32 to f64) conversion instructions are lossless and can be safely represented as generic SINT_TO_FP/UINT_TO_FP calls instead of x86 intrinsics without affecting final codegen.
LLVM counterpart to D26686
Differential Revision: https://reviews.llvm.org/D26736
llvm-svn: 287108
// Vector convert
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
- def int_x86_avx512_mask_cvtdq2pd_128 :
- GCCBuiltin<"__builtin_ia32_cvtdq2pd128_mask">,
- Intrinsic<[llvm_v2f64_ty],
- [llvm_v4i32_ty, llvm_v2f64_ty, llvm_i8_ty],
- [IntrNoMem]>;
-
- def int_x86_avx512_mask_cvtdq2pd_256 :
- GCCBuiltin<"__builtin_ia32_cvtdq2pd256_mask">,
- Intrinsic<[llvm_v4f64_ty],
- [llvm_v4i32_ty, llvm_v4f64_ty, llvm_i8_ty],
- [IntrNoMem]>;
-
- def int_x86_avx512_mask_cvtdq2pd_512 :
- GCCBuiltin<"__builtin_ia32_cvtdq2pd512_mask">,
- Intrinsic<[llvm_v8f64_ty],
- [llvm_v8i32_ty, llvm_v8f64_ty, llvm_i8_ty],
- [IntrNoMem]>;
-
def int_x86_avx512_mask_cvtdq2ps_128 :
GCCBuiltin<"__builtin_ia32_cvtdq2ps128_mask">,
Intrinsic<[llvm_v4f32_ty],
[llvm_v8f32_ty, llvm_v8i64_ty, llvm_i8_ty, llvm_i32_ty],
[IntrNoMem]>;
- def int_x86_avx512_mask_cvtudq2pd_128 :
- GCCBuiltin<"__builtin_ia32_cvtudq2pd128_mask">,
- Intrinsic<[llvm_v2f64_ty],
- [llvm_v4i32_ty, llvm_v2f64_ty, llvm_i8_ty],
- [IntrNoMem]>;
-
- def int_x86_avx512_mask_cvtudq2pd_256 :
- GCCBuiltin<"__builtin_ia32_cvtudq2pd256_mask">,
- Intrinsic<[llvm_v4f64_ty],
- [llvm_v4i32_ty, llvm_v4f64_ty, llvm_i8_ty],
- [IntrNoMem]>;
-
- def int_x86_avx512_mask_cvtudq2pd_512 :
- GCCBuiltin<"__builtin_ia32_cvtudq2pd512_mask">,
- Intrinsic<[llvm_v8f64_ty],
- [llvm_v8i32_ty, llvm_v8f64_ty, llvm_i8_ty],
- [IntrNoMem]>;
-
def int_x86_avx512_mask_cvtudq2ps_128 :
GCCBuiltin<"__builtin_ia32_cvtudq2ps128_mask">,
Intrinsic<[llvm_v4f32_ty],
Name.startswith("avx512.mask.padd.") || // Added in 4.0
Name.startswith("avx512.mask.psub.") || // Added in 4.0
Name.startswith("avx512.mask.pmull.") || // Added in 4.0
+ Name.startswith("avx512.mask.cvtdq2pd.") || // Added in 4.0
+ Name.startswith("avx512.mask.cvtudq2pd.") || // Added in 4.0
Name == "avx512.mask.add.pd.128" || // Added in 4.0
Name == "avx512.mask.add.pd.256" || // Added in 4.0
Name == "avx512.mask.add.ps.128" || // Added in 4.0
} else if (IsX86 && (Name == "sse2.cvtdq2pd" ||
Name == "sse2.cvtps2pd" ||
Name == "avx.cvtdq2.pd.256" ||
- Name == "avx.cvt.ps2.pd.256")) {
+ Name == "avx.cvt.ps2.pd.256" ||
+ Name.startswith("avx512.mask.cvtdq2pd.") ||
+ Name.startswith("avx512.mask.cvtudq2pd."))) {
// Lossless i32/float to double conversion.
// Extract the bottom elements if necessary and convert to double vector.
Value *Src = CI->getArgOperand(0);
ShuffleMask);
}
- bool Int2Double = (StringRef::npos != Name.find("cvtdq2"));
- if (Int2Double)
+ bool SInt2Double = (StringRef::npos != Name.find("cvtdq2"));
+ bool UInt2Double = (StringRef::npos != Name.find("cvtudq2"));
+ if (SInt2Double)
Rep = Builder.CreateSIToFP(Rep, DstTy, "cvtdq2pd");
+ else if (UInt2Double)
+ Rep = Builder.CreateUIToFP(Rep, DstTy, "cvtudq2pd");
else
Rep = Builder.CreateFPExt(Rep, DstTy, "cvtps2pd");
+
+ if (CI->getNumArgOperands() == 3)
+ Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
+ CI->getArgOperand(1));
} else if (IsX86 && Name.startswith("sse4a.movnt.")) {
Module *M = F->getParent();
SmallVector<Metadata *, 1> Elts;
setOperationAction(ISD::FP_TO_UINT, MVT::v8i32, Legal);
setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal);
setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Legal);
+ setOperationAction(ISD::UINT_TO_FP, MVT::v2i32, Custom);
setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal);
setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal);
setOperationAction(ISD::ZERO_EXTEND, MVT::v4i32, Custom);
SDValue X86TargetLowering::lowerUINT_TO_FP_vec(SDValue Op,
SelectionDAG &DAG) const {
SDValue N0 = Op.getOperand(0);
- MVT SVT = N0.getSimpleValueType();
+ MVT VT = Op.getSimpleValueType();
+ MVT SrcVT = N0.getSimpleValueType();
SDLoc dl(Op);
- if (SVT.getVectorElementType() == MVT::i1) {
- if (SVT == MVT::v2i1)
+ if (SrcVT.getVectorElementType() == MVT::i1) {
+ if (SrcVT == MVT::v2i1)
return DAG.getNode(ISD::UINT_TO_FP, dl, Op.getValueType(),
DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v2i64, N0));
- MVT IntegerVT = MVT::getVectorVT(MVT::i32, SVT.getVectorNumElements());
+ MVT IntegerVT = MVT::getVectorVT(MVT::i32, SrcVT.getVectorNumElements());
return DAG.getNode(ISD::UINT_TO_FP, dl, Op.getValueType(),
DAG.getNode(ISD::ZERO_EXTEND, dl, IntegerVT, N0));
}
- switch (SVT.SimpleTy) {
+ switch (SrcVT.SimpleTy) {
default:
llvm_unreachable("Custom UINT_TO_FP is not supported!");
+ case MVT::v2i32: {
+ if (VT == MVT::v2f64)
+ return DAG.getNode(X86ISD::CVTUDQ2PD, dl, VT,
+ DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4i32, N0,
+ DAG.getUNDEF(SrcVT)));
+ return SDValue();
+ }
case MVT::v4i8:
case MVT::v4i16:
case MVT::v8i8:
case MVT::v8i16: {
- MVT NVT = MVT::getVectorVT(MVT::i32, SVT.getVectorNumElements());
+ MVT NVT = MVT::getVectorVT(MVT::i32, SrcVT.getVectorNumElements());
return DAG.getNode(ISD::SINT_TO_FP, dl, Op.getValueType(),
DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, N0));
}
X86ISD::CONFLICT, 0),
X86_INTRINSIC_DATA(avx512_mask_conflict_q_512, INTR_TYPE_1OP_MASK,
X86ISD::CONFLICT, 0),
- X86_INTRINSIC_DATA(avx512_mask_cvtdq2pd_128, INTR_TYPE_1OP_MASK,
- X86ISD::CVTDQ2PD, 0),
- X86_INTRINSIC_DATA(avx512_mask_cvtdq2pd_256, INTR_TYPE_1OP_MASK,
- ISD::SINT_TO_FP, 0),
- X86_INTRINSIC_DATA(avx512_mask_cvtdq2pd_512, INTR_TYPE_1OP_MASK,
- ISD::SINT_TO_FP, 0), // no rm
X86_INTRINSIC_DATA(avx512_mask_cvtdq2ps_128, INTR_TYPE_1OP_MASK,
ISD::SINT_TO_FP, 0),
X86_INTRINSIC_DATA(avx512_mask_cvtdq2ps_256, INTR_TYPE_1OP_MASK,
ISD::FP_TO_UINT, 0),
X86_INTRINSIC_DATA(avx512_mask_cvttps2uqq_512, INTR_TYPE_1OP_MASK,
ISD::FP_TO_UINT, X86ISD::CVTTP2UI_RND),
- X86_INTRINSIC_DATA(avx512_mask_cvtudq2pd_128, INTR_TYPE_1OP_MASK,
- X86ISD::CVTUDQ2PD, 0),
- X86_INTRINSIC_DATA(avx512_mask_cvtudq2pd_256, INTR_TYPE_1OP_MASK,
- ISD::UINT_TO_FP, 0),
- X86_INTRINSIC_DATA(avx512_mask_cvtudq2pd_512, INTR_TYPE_1OP_MASK,
- ISD::UINT_TO_FP, 0), // no rm
X86_INTRINSIC_DATA(avx512_mask_cvtudq2ps_128, INTR_TYPE_1OP_MASK,
ISD::UINT_TO_FP, 0),
X86_INTRINSIC_DATA(avx512_mask_cvtudq2ps_256, INTR_TYPE_1OP_MASK,
ret <8 x i64> %res
}
+declare <8 x double> @llvm.x86.avx512.mask.cvtdq2pd.512(<8 x i32>, <8 x double>, i8)
+
+define <8 x double>@test_int_x86_avx512_mask_cvt_dq2pd_512(<8 x i32> %x0, <8 x double> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_dq2pd_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vcvtdq2pd %ymm0, %zmm2
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vcvtdq2pd %ymm0, %zmm1 {%k1}
+; CHECK-NEXT: vaddpd %zmm2, %zmm1, %zmm0
+; CHECK-NEXT: retq
+ %res = call <8 x double> @llvm.x86.avx512.mask.cvtdq2pd.512(<8 x i32> %x0, <8 x double> %x1, i8 %x2)
+ %res1 = call <8 x double> @llvm.x86.avx512.mask.cvtdq2pd.512(<8 x i32> %x0, <8 x double> %x1, i8 -1)
+ %res2 = fadd <8 x double> %res, %res1
+ ret <8 x double> %res2
+}
+
+declare <8 x double> @llvm.x86.avx512.mask.cvtudq2pd.512(<8 x i32>, <8 x double>, i8)
+
+define <8 x double>@test_int_x86_avx512_mask_cvt_udq2pd_512(<8 x i32> %x0, <8 x double> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_udq2pd_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vcvtudq2pd %ymm0, %zmm2
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vcvtudq2pd %ymm0, %zmm1 {%k1}
+; CHECK-NEXT: vaddpd %zmm2, %zmm1, %zmm0
+; CHECK-NEXT: retq
+ %res = call <8 x double> @llvm.x86.avx512.mask.cvtudq2pd.512(<8 x i32> %x0, <8 x double> %x1, i8 %x2)
+ %res1 = call <8 x double> @llvm.x86.avx512.mask.cvtudq2pd.512(<8 x i32> %x0, <8 x double> %x1, i8 -1)
+ %res2 = fadd <8 x double> %res, %res1
+ ret <8 x double> %res2
+}
ret void
}
-declare <8 x double> @llvm.x86.avx512.mask.cvtdq2pd.512(<8 x i32>, <8 x double>, i8)
-
-define <8 x double>@test_int_x86_avx512_mask_cvt_dq2pd_512(<8 x i32> %x0, <8 x double> %x1, i8 %x2) {
-; CHECK-LABEL: test_int_x86_avx512_mask_cvt_dq2pd_512:
-; CHECK: ## BB#0:
-; CHECK-NEXT: kmovw %edi, %k1
-; CHECK-NEXT: vcvtdq2pd %ymm0, %zmm1 {%k1}
-; CHECK-NEXT: vcvtdq2pd %ymm0, %zmm0
-; CHECK-NEXT: vaddpd %zmm0, %zmm1, %zmm0
-; CHECK-NEXT: retq
- %res = call <8 x double> @llvm.x86.avx512.mask.cvtdq2pd.512(<8 x i32> %x0, <8 x double> %x1, i8 %x2)
- %res1 = call <8 x double> @llvm.x86.avx512.mask.cvtdq2pd.512(<8 x i32> %x0, <8 x double> %x1, i8 -1)
- %res2 = fadd <8 x double> %res, %res1
- ret <8 x double> %res2
-}
-
declare <16 x float> @llvm.x86.avx512.mask.cvtdq2ps.512(<16 x i32>, <16 x float>, i16, i32)
define <16 x float>@test_int_x86_avx512_mask_cvt_dq2ps_512(<16 x i32> %x0, <16 x float> %x1, i16 %x2) {
ret <8 x i32> %res2
}
-declare <8 x double> @llvm.x86.avx512.mask.cvtudq2pd.512(<8 x i32>, <8 x double>, i8)
-
-define <8 x double>@test_int_x86_avx512_mask_cvt_udq2pd_512(<8 x i32> %x0, <8 x double> %x1, i8 %x2) {
-; CHECK-LABEL: test_int_x86_avx512_mask_cvt_udq2pd_512:
-; CHECK: ## BB#0:
-; CHECK-NEXT: kmovw %edi, %k1
-; CHECK-NEXT: vcvtudq2pd %ymm0, %zmm1 {%k1}
-; CHECK-NEXT: vcvtudq2pd %ymm0, %zmm0
-; CHECK-NEXT: vaddpd %zmm0, %zmm1, %zmm0
-; CHECK-NEXT: retq
- %res = call <8 x double> @llvm.x86.avx512.mask.cvtudq2pd.512(<8 x i32> %x0, <8 x double> %x1, i8 %x2)
- %res1 = call <8 x double> @llvm.x86.avx512.mask.cvtudq2pd.512(<8 x i32> %x0, <8 x double> %x1, i8 -1)
- %res2 = fadd <8 x double> %res, %res1
- ret <8 x double> %res2
-}
-
-
declare <16 x float> @llvm.x86.avx512.mask.cvtudq2ps.512(<16 x i32>, <16 x float>, i16, i32)
define <16 x float>@test_int_x86_avx512_mask_cvt_udq2ps_512(<16 x i32> %x0, <16 x float> %x1, i16 %x2) {
ret <4 x i64> %res4
}
+declare <2 x double> @llvm.x86.avx512.mask.cvtdq2pd.128(<4 x i32>, <2 x double>, i8)
+
+define <2 x double>@test_int_x86_avx512_mask_cvt_dq2pd_128(<4 x i32> %x0, <2 x double> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_dq2pd_128:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vcvtdq2pd %xmm0, %xmm2 ## encoding: [0x62,0xf1,0x7e,0x08,0xe6,0xd0]
+; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
+; CHECK-NEXT: vcvtdq2pd %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7e,0x09,0xe6,0xc8]
+; CHECK-NEXT: vaddpd %xmm2, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xf5,0x08,0x58,0xc2]
+; CHECK-NEXT: retq ## encoding: [0xc3]
+ %res = call <2 x double> @llvm.x86.avx512.mask.cvtdq2pd.128(<4 x i32> %x0, <2 x double> %x1, i8 %x2)
+ %res1 = call <2 x double> @llvm.x86.avx512.mask.cvtdq2pd.128(<4 x i32> %x0, <2 x double> %x1, i8 -1)
+ %res2 = fadd <2 x double> %res, %res1
+ ret <2 x double> %res2
+}
+
+declare <4 x double> @llvm.x86.avx512.mask.cvtdq2pd.256(<4 x i32>, <4 x double>, i8)
+
+define <4 x double>@test_int_x86_avx512_mask_cvt_dq2pd_256(<4 x i32> %x0, <4 x double> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_dq2pd_256:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vcvtdq2pd %xmm0, %ymm2 ## encoding: [0x62,0xf1,0x7e,0x28,0xe6,0xd0]
+; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
+; CHECK-NEXT: vcvtdq2pd %xmm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7e,0x29,0xe6,0xc8]
+; CHECK-NEXT: vaddpd %ymm2, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xf5,0x28,0x58,0xc2]
+; CHECK-NEXT: retq ## encoding: [0xc3]
+ %res = call <4 x double> @llvm.x86.avx512.mask.cvtdq2pd.256(<4 x i32> %x0, <4 x double> %x1, i8 %x2)
+ %res1 = call <4 x double> @llvm.x86.avx512.mask.cvtdq2pd.256(<4 x i32> %x0, <4 x double> %x1, i8 -1)
+ %res2 = fadd <4 x double> %res, %res1
+ ret <4 x double> %res2
+}
+
+declare <2 x double> @llvm.x86.avx512.mask.cvtudq2pd.128(<4 x i32>, <2 x double>, i8)
+
+define <2 x double>@test_int_x86_avx512_mask_cvt_udq2pd_128(<4 x i32> %x0, <2 x double> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_udq2pd_128:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vcvtudq2pd %xmm0, %xmm2 ## encoding: [0x62,0xf1,0x7e,0x08,0x7a,0xd0]
+; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
+; CHECK-NEXT: vcvtudq2pd %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7e,0x09,0x7a,0xc8]
+; CHECK-NEXT: vaddpd %xmm2, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xf5,0x08,0x58,0xc2]
+; CHECK-NEXT: retq ## encoding: [0xc3]
+ %res = call <2 x double> @llvm.x86.avx512.mask.cvtudq2pd.128(<4 x i32> %x0, <2 x double> %x1, i8 %x2)
+ %res1 = call <2 x double> @llvm.x86.avx512.mask.cvtudq2pd.128(<4 x i32> %x0, <2 x double> %x1, i8 -1)
+ %res2 = fadd <2 x double> %res, %res1
+ ret <2 x double> %res2
+}
+
+declare <4 x double> @llvm.x86.avx512.mask.cvtudq2pd.256(<4 x i32>, <4 x double>, i8)
+
+define <4 x double>@test_int_x86_avx512_mask_cvt_udq2pd_256(<4 x i32> %x0, <4 x double> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_udq2pd_256:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vcvtudq2pd %xmm0, %ymm2 ## encoding: [0x62,0xf1,0x7e,0x28,0x7a,0xd0]
+; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
+; CHECK-NEXT: vcvtudq2pd %xmm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7e,0x29,0x7a,0xc8]
+; CHECK-NEXT: vaddpd %ymm2, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xf5,0x28,0x58,0xc2]
+; CHECK-NEXT: retq ## encoding: [0xc3]
+ %res = call <4 x double> @llvm.x86.avx512.mask.cvtudq2pd.256(<4 x i32> %x0, <4 x double> %x1, i8 %x2)
+ %res1 = call <4 x double> @llvm.x86.avx512.mask.cvtudq2pd.256(<4 x i32> %x0, <4 x double> %x1, i8 -1)
+ %res2 = fadd <4 x double> %res, %res1
+ ret <4 x double> %res2
+}
ret void
}
-declare <2 x double> @llvm.x86.avx512.mask.cvtdq2pd.128(<4 x i32>, <2 x double>, i8)
-
-define <2 x double>@test_int_x86_avx512_mask_cvt_dq2pd_128(<4 x i32> %x0, <2 x double> %x1, i8 %x2) {
-; CHECK-LABEL: test_int_x86_avx512_mask_cvt_dq2pd_128:
-; CHECK: ## BB#0:
-; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
-; CHECK-NEXT: vcvtdq2pd %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7e,0x09,0xe6,0xc8]
-; CHECK-NEXT: vcvtdq2pd %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7e,0x08,0xe6,0xc0]
-; CHECK-NEXT: vaddpd %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xf5,0x08,0x58,0xc0]
-; CHECK-NEXT: retq ## encoding: [0xc3]
- %res = call <2 x double> @llvm.x86.avx512.mask.cvtdq2pd.128(<4 x i32> %x0, <2 x double> %x1, i8 %x2)
- %res1 = call <2 x double> @llvm.x86.avx512.mask.cvtdq2pd.128(<4 x i32> %x0, <2 x double> %x1, i8 -1)
- %res2 = fadd <2 x double> %res, %res1
- ret <2 x double> %res2
-}
-
-declare <4 x double> @llvm.x86.avx512.mask.cvtdq2pd.256(<4 x i32>, <4 x double>, i8)
-
-define <4 x double>@test_int_x86_avx512_mask_cvt_dq2pd_256(<4 x i32> %x0, <4 x double> %x1, i8 %x2) {
-; CHECK-LABEL: test_int_x86_avx512_mask_cvt_dq2pd_256:
-; CHECK: ## BB#0:
-; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
-; CHECK-NEXT: vcvtdq2pd %xmm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7e,0x29,0xe6,0xc8]
-; CHECK-NEXT: vcvtdq2pd %xmm0, %ymm0 ## encoding: [0x62,0xf1,0x7e,0x28,0xe6,0xc0]
-; CHECK-NEXT: vaddpd %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xf5,0x28,0x58,0xc0]
-; CHECK-NEXT: retq ## encoding: [0xc3]
- %res = call <4 x double> @llvm.x86.avx512.mask.cvtdq2pd.256(<4 x i32> %x0, <4 x double> %x1, i8 %x2)
- %res1 = call <4 x double> @llvm.x86.avx512.mask.cvtdq2pd.256(<4 x i32> %x0, <4 x double> %x1, i8 -1)
- %res2 = fadd <4 x double> %res, %res1
- ret <4 x double> %res2
-}
-
declare <4 x float> @llvm.x86.avx512.mask.cvtdq2ps.128(<4 x i32>, <4 x float>, i8)
define <4 x float>@test_int_x86_avx512_mask_cvt_dq2ps_128(<4 x i32> %x0, <4 x float> %x1, i8 %x2) {
ret <8 x i32> %res2
}
-declare <2 x double> @llvm.x86.avx512.mask.cvtudq2pd.128(<4 x i32>, <2 x double>, i8)
-
-define <2 x double>@test_int_x86_avx512_mask_cvt_udq2pd_128(<4 x i32> %x0, <2 x double> %x1, i8 %x2) {
-; CHECK-LABEL: test_int_x86_avx512_mask_cvt_udq2pd_128:
-; CHECK: ## BB#0:
-; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
-; CHECK-NEXT: vcvtudq2pd %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7e,0x09,0x7a,0xc8]
-; CHECK-NEXT: vcvtudq2pd %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7e,0x08,0x7a,0xc0]
-; CHECK-NEXT: vaddpd %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xf5,0x08,0x58,0xc0]
-; CHECK-NEXT: retq ## encoding: [0xc3]
- %res = call <2 x double> @llvm.x86.avx512.mask.cvtudq2pd.128(<4 x i32> %x0, <2 x double> %x1, i8 %x2)
- %res1 = call <2 x double> @llvm.x86.avx512.mask.cvtudq2pd.128(<4 x i32> %x0, <2 x double> %x1, i8 -1)
- %res2 = fadd <2 x double> %res, %res1
- ret <2 x double> %res2
-}
-
-declare <4 x double> @llvm.x86.avx512.mask.cvtudq2pd.256(<4 x i32>, <4 x double>, i8)
-
-define <4 x double>@test_int_x86_avx512_mask_cvt_udq2pd_256(<4 x i32> %x0, <4 x double> %x1, i8 %x2) {
-; CHECK-LABEL: test_int_x86_avx512_mask_cvt_udq2pd_256:
-; CHECK: ## BB#0:
-; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
-; CHECK-NEXT: vcvtudq2pd %xmm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7e,0x29,0x7a,0xc8]
-; CHECK-NEXT: vcvtudq2pd %xmm0, %ymm0 ## encoding: [0x62,0xf1,0x7e,0x28,0x7a,0xc0]
-; CHECK-NEXT: vaddpd %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xf5,0x28,0x58,0xc0]
-; CHECK-NEXT: retq ## encoding: [0xc3]
- %res = call <4 x double> @llvm.x86.avx512.mask.cvtudq2pd.256(<4 x i32> %x0, <4 x double> %x1, i8 %x2)
- %res1 = call <4 x double> @llvm.x86.avx512.mask.cvtudq2pd.256(<4 x i32> %x0, <4 x double> %x1, i8 -1)
- %res2 = fadd <4 x double> %res, %res1
- ret <4 x double> %res2
-}
-
declare <4 x float> @llvm.x86.avx512.mask.cvtudq2ps.128(<4 x i32>, <4 x float>, i8)
define <4 x float>@test_int_x86_avx512_mask_cvt_udq2ps_128(<4 x i32> %x0, <4 x float> %x1, i8 %x2) {
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=VEX --check-prefix=AVX1
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=VEX --check-prefix=AVX2
;
; AVX512-LABEL: uitofp_2i32_to_2f64:
; AVX512: # BB#0:
-; AVX512-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
-; AVX512-NEXT: vpextrq $1, %xmm0, %rax
-; AVX512-NEXT: vcvtusi2sdq %rax, %xmm1, %xmm1
-; AVX512-NEXT: vmovq %xmm0, %rax
-; AVX512-NEXT: vcvtusi2sdq %rax, %xmm2, %xmm0
-; AVX512-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; AVX512-NEXT: vcvtudq2pd %xmm0, %xmm0
; AVX512-NEXT: retq
%shuf = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
%cvt = uitofp <2 x i32> %shuf to <2 x double>
; AVX512-LABEL: uitofp_load_2i32_to_2f64:
; AVX512: # BB#0:
; AVX512-NEXT: vpmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero
-; AVX512-NEXT: vpxord %xmm1, %xmm1, %xmm1
-; AVX512-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
-; AVX512-NEXT: vpextrq $1, %xmm0, %rax
-; AVX512-NEXT: vcvtusi2sdq %rax, %xmm2, %xmm1
-; AVX512-NEXT: vmovq %xmm0, %rax
-; AVX512-NEXT: vcvtusi2sdq %rax, %xmm2, %xmm0
-; AVX512-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; AVX512-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; AVX512-NEXT: vcvtudq2pd %xmm0, %xmm0
; AVX512-NEXT: retq
%ld = load <2 x i32>, <2 x i32> *%a
%cvt = uitofp <2 x i32> %ld to <2 x double>