// Convert Double to Signed/Unsigned Doubleword with truncation
multiclass avx512_cvttpd2dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
- SDNode OpNode128, SDNode OpNodeRnd,
- X86SchedWriteWidths sched> {
+ SDNode OpNodeRnd, X86SchedWriteWidths sched> {
let Predicates = [HasAVX512] in {
defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f64_info, OpNode,
sched.ZMM>,
// dest type - 'v4i32x_info'. We also specify the broadcast string explicitly
// due to the same reason.
defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v2f64x_info,
- OpNode128, sched.XMM, "{1to2}", "{x}">, EVEX_V128;
+ OpNode, sched.XMM, "{1to2}", "{x}">, EVEX_V128;
defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f64x_info, OpNode,
sched.YMM, "{1to4}", "{y}">, EVEX_V256;
// Convert Float to Signed/Unsigned Quardword with truncation
multiclass avx512_cvttps2qq<bits<8> opc, string OpcodeStr, SDNode OpNode,
- SDNode OpNode128, SDNode OpNodeRnd,
- X86SchedWriteWidths sched> {
+ SDNode OpNodeRnd, X86SchedWriteWidths sched> {
let Predicates = [HasDQI] in {
defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f32x_info, OpNode, sched.ZMM>,
avx512_vcvt_fp_sae<opc, OpcodeStr, v8i64_info, v8f32x_info,
let Predicates = [HasDQI, HasVLX] in {
// Explicitly specified broadcast string, since we take only 2 elements
// from v4f32x_info source
- defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v4f32x_info, OpNode128,
+ defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v4f32x_info, OpNode,
sched.XMM, "{1to2}", "", f64mem>, EVEX_V128;
defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f32x_info, OpNode,
sched.YMM>, EVEX_V256;
X86VSintToFpRnd, SchedWriteCvtDQ2PS>,
PS, EVEX_CD8<32, CD8VF>;
-defm VCVTTPS2DQ : avx512_cvttps2dq<0x5B, "vcvttps2dq", fp_to_sint,
+defm VCVTTPS2DQ : avx512_cvttps2dq<0x5B, "vcvttps2dq", X86cvttp2si,
X86cvttp2siRnd, SchedWriteCvtPS2DQ>,
XS, EVEX_CD8<32, CD8VF>;
-defm VCVTTPD2DQ : avx512_cvttpd2dq<0xE6, "vcvttpd2dq", fp_to_sint, X86cvttp2si,
+defm VCVTTPD2DQ : avx512_cvttpd2dq<0xE6, "vcvttpd2dq", X86cvttp2si,
X86cvttp2siRnd, SchedWriteCvtPD2DQ>,
PD, VEX_W, EVEX_CD8<64, CD8VF>;
-defm VCVTTPS2UDQ : avx512_cvttps2dq<0x78, "vcvttps2udq", fp_to_uint,
+defm VCVTTPS2UDQ : avx512_cvttps2dq<0x78, "vcvttps2udq", X86cvttp2ui,
X86cvttp2uiRnd, SchedWriteCvtPS2DQ>, PS,
EVEX_CD8<32, CD8VF>;
-defm VCVTTPD2UDQ : avx512_cvttpd2dq<0x78, "vcvttpd2udq", fp_to_uint,
- X86cvttp2ui, X86cvttp2uiRnd, SchedWriteCvtPD2DQ>,
+defm VCVTTPD2UDQ : avx512_cvttpd2dq<0x78, "vcvttpd2udq", X86cvttp2ui,
+ X86cvttp2uiRnd, SchedWriteCvtPD2DQ>,
PS, VEX_W, EVEX_CD8<64, CD8VF>;
defm VCVTUDQ2PD : avx512_cvtdq2pd<0x7A, "vcvtudq2pd", uint_to_fp,
X86cvtp2UIntRnd, SchedWriteCvtPS2DQ>, PD,
EVEX_CD8<32, CD8VH>;
-defm VCVTTPD2QQ : avx512_cvttpd2qq<0x7A, "vcvttpd2qq", fp_to_sint,
+defm VCVTTPD2QQ : avx512_cvttpd2qq<0x7A, "vcvttpd2qq", X86cvttp2si,
X86cvttp2siRnd, SchedWriteCvtPD2DQ>, VEX_W,
PD, EVEX_CD8<64, CD8VF>;
-defm VCVTTPS2QQ : avx512_cvttps2qq<0x7A, "vcvttps2qq", fp_to_sint, X86cvttp2si,
+defm VCVTTPS2QQ : avx512_cvttps2qq<0x7A, "vcvttps2qq", X86cvttp2si,
X86cvttp2siRnd, SchedWriteCvtPS2DQ>, PD,
EVEX_CD8<32, CD8VH>;
-defm VCVTTPD2UQQ : avx512_cvttpd2qq<0x78, "vcvttpd2uqq", fp_to_uint,
+defm VCVTTPD2UQQ : avx512_cvttpd2qq<0x78, "vcvttpd2uqq", X86cvttp2ui,
X86cvttp2uiRnd, SchedWriteCvtPD2DQ>, VEX_W,
PD, EVEX_CD8<64, CD8VF>;
-defm VCVTTPS2UQQ : avx512_cvttps2qq<0x78, "vcvttps2uqq", fp_to_uint, X86cvttp2ui,
+defm VCVTTPS2UQQ : avx512_cvttps2qq<0x78, "vcvttps2uqq", X86cvttp2ui,
X86cvttp2uiRnd, SchedWriteCvtPS2DQ>, PD,
EVEX_CD8<32, CD8VH>;
X86VUintToFpRnd, SchedWriteCvtDQ2PS>, VEX_W, XD,
EVEX_CD8<64, CD8VF>;
+let Predicates = [HasAVX512] in {
+ def : Pat<(v16i32 (fp_to_sint (v16f32 VR512:$src))),
+ (VCVTTPS2DQZrr VR512:$src)>;
+ def : Pat<(v16i32 (fp_to_sint (loadv16f32 addr:$src))),
+ (VCVTTPS2DQZrm addr:$src)>;
+
+ def : Pat<(v16i32 (fp_to_uint (v16f32 VR512:$src))),
+ (VCVTTPS2UDQZrr VR512:$src)>;
+ def : Pat<(v16i32 (fp_to_uint (loadv16f32 addr:$src))),
+ (VCVTTPS2UDQZrm addr:$src)>;
+
+ def : Pat<(v8i32 (fp_to_sint (v8f64 VR512:$src))),
+ (VCVTTPD2DQZrr VR512:$src)>;
+ def : Pat<(v8i32 (fp_to_sint (loadv8f64 addr:$src))),
+ (VCVTTPD2DQZrm addr:$src)>;
+
+ def : Pat<(v8i32 (fp_to_uint (v8f64 VR512:$src))),
+ (VCVTTPD2UDQZrr VR512:$src)>;
+ def : Pat<(v8i32 (fp_to_uint (loadv8f64 addr:$src))),
+ (VCVTTPD2UDQZrm addr:$src)>;
+}
+
+let Predicates = [HasVLX] in {
+ def : Pat<(v4i32 (fp_to_sint (v4f32 VR128X:$src))),
+ (VCVTTPS2DQZ128rr VR128X:$src)>;
+ def : Pat<(v4i32 (fp_to_sint (loadv4f32 addr:$src))),
+ (VCVTTPS2DQZ128rm addr:$src)>;
+
+ def : Pat<(v4i32 (fp_to_uint (v4f32 VR128X:$src))),
+ (VCVTTPS2UDQZ128rr VR128X:$src)>;
+ def : Pat<(v4i32 (fp_to_uint (loadv4f32 addr:$src))),
+ (VCVTTPS2UDQZ128rm addr:$src)>;
+
+ def : Pat<(v8i32 (fp_to_sint (v8f32 VR256X:$src))),
+ (VCVTTPS2DQZ256rr VR256X:$src)>;
+ def : Pat<(v8i32 (fp_to_sint (loadv8f32 addr:$src))),
+ (VCVTTPS2DQZ256rm addr:$src)>;
+
+ def : Pat<(v8i32 (fp_to_uint (v8f32 VR256X:$src))),
+ (VCVTTPS2UDQZ256rr VR256X:$src)>;
+ def : Pat<(v8i32 (fp_to_uint (loadv8f32 addr:$src))),
+ (VCVTTPS2UDQZ256rm addr:$src)>;
+
+ def : Pat<(v4i32 (fp_to_sint (v4f64 VR256X:$src))),
+ (VCVTTPD2DQZ256rr VR256X:$src)>;
+ def : Pat<(v4i32 (fp_to_sint (loadv4f64 addr:$src))),
+ (VCVTTPD2DQZ256rm addr:$src)>;
+
+ def : Pat<(v4i32 (fp_to_uint (v4f64 VR256X:$src))),
+ (VCVTTPD2UDQZ256rr VR256X:$src)>;
+ def : Pat<(v4i32 (fp_to_uint (loadv4f64 addr:$src))),
+ (VCVTTPD2UDQZ256rm addr:$src)>;
+}
+
+let Predicates = [HasDQI] in {
+ def : Pat<(v8i64 (fp_to_sint (v8f32 VR256X:$src))),
+ (VCVTTPS2QQZrr VR256X:$src)>;
+ def : Pat<(v8i64 (fp_to_sint (loadv8f32 addr:$src))),
+ (VCVTTPS2QQZrm addr:$src)>;
+
+ def : Pat<(v8i64 (fp_to_uint (v8f32 VR256X:$src))),
+ (VCVTTPS2UQQZrr VR256X:$src)>;
+ def : Pat<(v8i64 (fp_to_uint (loadv8f32 addr:$src))),
+ (VCVTTPS2UQQZrm addr:$src)>;
+
+ def : Pat<(v8i64 (fp_to_sint (v8f64 VR512:$src))),
+ (VCVTTPD2QQZrr VR512:$src)>;
+ def : Pat<(v8i64 (fp_to_sint (loadv8f64 addr:$src))),
+ (VCVTTPD2QQZrm addr:$src)>;
+
+ def : Pat<(v8i64 (fp_to_uint (v8f64 VR512:$src))),
+ (VCVTTPD2UQQZrr VR512:$src)>;
+ def : Pat<(v8i64 (fp_to_uint (loadv8f64 addr:$src))),
+ (VCVTTPD2UQQZrm addr:$src)>;
+}
+
+let Predicates = [HasDQI, HasVLX] in {
+ def : Pat<(v4i64 (fp_to_sint (v4f32 VR128X:$src))),
+ (VCVTTPS2QQZ256rr VR128X:$src)>;
+ def : Pat<(v4i64 (fp_to_sint (loadv4f32 addr:$src))),
+ (VCVTTPS2QQZ256rm addr:$src)>;
+
+ def : Pat<(v4i64 (fp_to_uint (v4f32 VR128X:$src))),
+ (VCVTTPS2UQQZ256rr VR128X:$src)>;
+ def : Pat<(v4i64 (fp_to_uint (loadv4f32 addr:$src))),
+ (VCVTTPS2UQQZ256rm addr:$src)>;
+
+ def : Pat<(v2i64 (fp_to_sint (v2f64 VR128X:$src))),
+ (VCVTTPD2QQZ128rr VR128X:$src)>;
+ def : Pat<(v2i64 (fp_to_sint (loadv2f64 addr:$src))),
+ (VCVTTPD2QQZ128rm addr:$src)>;
+
+ def : Pat<(v2i64 (fp_to_uint (v2f64 VR128X:$src))),
+ (VCVTTPD2UQQZ128rr VR128X:$src)>;
+ def : Pat<(v2i64 (fp_to_uint (loadv2f64 addr:$src))),
+ (VCVTTPD2UQQZ128rm addr:$src)>;
+
+ def : Pat<(v4i64 (fp_to_sint (v4f64 VR256X:$src))),
+ (VCVTTPD2QQZ256rr VR256X:$src)>;
+ def : Pat<(v4i64 (fp_to_sint (loadv4f64 addr:$src))),
+ (VCVTTPD2QQZ256rm addr:$src)>;
+
+ def : Pat<(v4i64 (fp_to_uint (v4f64 VR256X:$src))),
+ (VCVTTPD2UQQZ256rr VR256X:$src)>;
+ def : Pat<(v4i64 (fp_to_uint (loadv4f64 addr:$src))),
+ (VCVTTPD2UQQZ256rm addr:$src)>;
+}
+
let Predicates = [HasAVX512, NoVLX] in {
def : Pat<(v8i32 (fp_to_uint (v8f32 VR256X:$src1))),
(EXTRACT_SUBREG (v16i32 (VCVTTPS2UDQZrr
(bc_v2i64 (v4i32 (X86cvtp2Int (v4f32 VR128:$src)))))),
(MMX_CVTPS2PIirr VR128:$src)>;
def : Pat<(x86mmx (MMX_X86movdq2q
+ (bc_v2i64 (v4i32 (X86cvttp2si (v4f32 VR128:$src)))))),
+ (MMX_CVTTPS2PIirr VR128:$src)>;
+def : Pat<(x86mmx (MMX_X86movdq2q
(bc_v2i64 (v4i32 (fp_to_sint (v4f32 VR128:$src)))))),
(MMX_CVTTPS2PIirr VR128:$src)>;
def : Pat<(x86mmx (MMX_X86movdq2q
def VCVTTPS2DQrr : VS2SI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvttps2dq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
- (v4i32 (fp_to_sint (v4f32 VR128:$src))))]>,
+ (v4i32 (X86cvttp2si (v4f32 VR128:$src))))]>,
VEX, Sched<[WriteCvtPS2I]>, VEX_WIG;
def VCVTTPS2DQrm : VS2SI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
"cvttps2dq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
- (v4i32 (fp_to_sint (loadv4f32 addr:$src))))]>,
+ (v4i32 (X86cvttp2si (loadv4f32 addr:$src))))]>,
VEX, Sched<[WriteCvtPS2ILd]>, VEX_WIG;
def VCVTTPS2DQYrr : VS2SI<0x5B, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
"cvttps2dq\t{$src, $dst|$dst, $src}",
[(set VR256:$dst,
- (v8i32 (fp_to_sint (v8f32 VR256:$src))))]>,
+ (v8i32 (X86cvttp2si (v8f32 VR256:$src))))]>,
VEX, VEX_L, Sched<[WriteCvtPS2IY]>, VEX_WIG;
def VCVTTPS2DQYrm : VS2SI<0x5B, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
"cvttps2dq\t{$src, $dst|$dst, $src}",
[(set VR256:$dst,
- (v8i32 (fp_to_sint (loadv8f32 addr:$src))))]>,
+ (v8i32 (X86cvttp2si (loadv8f32 addr:$src))))]>,
VEX, VEX_L,
Sched<[WriteCvtPS2IYLd]>, VEX_WIG;
}
+let Predicates = [HasAVX, NoVLX] in {
+ def : Pat<(v4i32 (fp_to_sint (v4f32 VR128:$src))),
+ (VCVTTPS2DQrr VR128:$src)>;
+ def : Pat<(v4i32 (fp_to_sint (loadv4f32 addr:$src))),
+ (VCVTTPS2DQrm addr:$src)>;
+ def : Pat<(v8i32 (fp_to_sint (v8f32 VR256:$src))),
+ (VCVTTPS2DQYrr VR256:$src)>;
+ def : Pat<(v8i32 (fp_to_sint (loadv8f32 addr:$src))),
+ (VCVTTPS2DQYrm addr:$src)>;
+}
+
def CVTTPS2DQrr : S2SI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvttps2dq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
- (v4i32 (fp_to_sint (v4f32 VR128:$src))))]>,
+ (v4i32 (X86cvttp2si (v4f32 VR128:$src))))]>,
Sched<[WriteCvtPS2I]>;
def CVTTPS2DQrm : S2SI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
"cvttps2dq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
- (v4i32 (fp_to_sint (memopv4f32 addr:$src))))]>,
+ (v4i32 (X86cvttp2si (memopv4f32 addr:$src))))]>,
Sched<[WriteCvtPS2ILd]>;
+let Predicates = [UseSSE2] in {
+ def : Pat<(v4i32 (fp_to_sint (v4f32 VR128:$src))),
+ (CVTTPS2DQrr VR128:$src)>;
+ def : Pat<(v4i32 (fp_to_sint (memopv4f32 addr:$src))),
+ (CVTTPS2DQrm addr:$src)>;
+}
+
let Predicates = [HasAVX, NoVLX] in
def VCVTTPD2DQrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvttpd2dq\t{$src, $dst|$dst, $src}",
// XMM only
def : InstAlias<"vcvttpd2dqx\t{$src, $dst|$dst, $src}",
(VCVTTPD2DQrr VR128:$dst, VR128:$src), 0>;
+
let Predicates = [HasAVX, NoVLX] in
def VCVTTPD2DQrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
"cvttpd2dq{x}\t{$src, $dst|$dst, $src}",
def VCVTTPD2DQYrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src),
"cvttpd2dq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
- (v4i32 (fp_to_sint (v4f64 VR256:$src))))]>,
+ (v4i32 (X86cvttp2si (v4f64 VR256:$src))))]>,
VEX, VEX_L, Sched<[WriteCvtPD2IY]>, VEX_WIG;
def VCVTTPD2DQYrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src),
"cvttpd2dq{y}\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
- (v4i32 (fp_to_sint (loadv4f64 addr:$src))))]>,
+ (v4i32 (X86cvttp2si (loadv4f64 addr:$src))))]>,
VEX, VEX_L, Sched<[WriteCvtPD2IYLd]>, VEX_WIG;
}
def : InstAlias<"vcvttpd2dqy\t{$src, $dst|$dst, $src}",
(VCVTTPD2DQYrm VR128:$dst, f256mem:$src), 0, "intel">;
let Predicates = [HasAVX, NoVLX] in {
+ def : Pat<(v4i32 (fp_to_sint (v4f64 VR256:$src))),
+ (VCVTTPD2DQYrr VR256:$src)>;
+ def : Pat<(v4i32 (fp_to_sint (loadv4f64 addr:$src))),
+ (VCVTTPD2DQYrm addr:$src)>;
+}
+
+let Predicates = [HasAVX, NoVLX] in {
let AddedComplexity = 15 in {
def : Pat<(X86vzmovl (v2i64 (bitconvert
(v4i32 (X86cvtp2Int (v2f64 VR128:$src)))))),
X86_INTRINSIC_DATA(avx_cvt_pd2_ps_256,CVTPD2PS, ISD::FP_ROUND, 0),
X86_INTRINSIC_DATA(avx_cvt_pd2dq_256, INTR_TYPE_1OP, X86ISD::CVTP2SI, 0),
X86_INTRINSIC_DATA(avx_cvt_ps2dq_256, INTR_TYPE_1OP, X86ISD::CVTP2SI, 0),
- X86_INTRINSIC_DATA(avx_cvtt_pd2dq_256,INTR_TYPE_1OP, ISD::FP_TO_SINT, 0),
- X86_INTRINSIC_DATA(avx_cvtt_ps2dq_256,INTR_TYPE_1OP, ISD::FP_TO_SINT, 0),
+ X86_INTRINSIC_DATA(avx_cvtt_pd2dq_256,INTR_TYPE_1OP, X86ISD::CVTTP2SI, 0),
+ X86_INTRINSIC_DATA(avx_cvtt_ps2dq_256,INTR_TYPE_1OP, X86ISD::CVTTP2SI, 0),
X86_INTRINSIC_DATA(avx_hadd_pd_256, INTR_TYPE_2OP, X86ISD::FHADD, 0),
X86_INTRINSIC_DATA(avx_hadd_ps_256, INTR_TYPE_2OP, X86ISD::FHADD, 0),
X86_INTRINSIC_DATA(avx_hsub_pd_256, INTR_TYPE_2OP, X86ISD::FHSUB, 0),
X86_INTRINSIC_DATA(avx512_mask_cvttpd2dq_128, INTR_TYPE_1OP_MASK,
X86ISD::CVTTP2SI, 0),
X86_INTRINSIC_DATA(avx512_mask_cvttpd2dq_512, INTR_TYPE_1OP_MASK,
- ISD::FP_TO_SINT, X86ISD::CVTTP2SI_RND),
+ X86ISD::CVTTP2SI, X86ISD::CVTTP2SI_RND),
X86_INTRINSIC_DATA(avx512_mask_cvttpd2qq_128, INTR_TYPE_1OP_MASK,
- ISD::FP_TO_SINT, 0),
+ X86ISD::CVTTP2SI, 0),
X86_INTRINSIC_DATA(avx512_mask_cvttpd2qq_256, INTR_TYPE_1OP_MASK,
- ISD::FP_TO_SINT, 0),
+ X86ISD::CVTTP2SI, 0),
X86_INTRINSIC_DATA(avx512_mask_cvttpd2qq_512, INTR_TYPE_1OP_MASK,
- ISD::FP_TO_SINT, X86ISD::CVTTP2SI_RND),
+ X86ISD::CVTTP2SI, X86ISD::CVTTP2SI_RND),
X86_INTRINSIC_DATA(avx512_mask_cvttpd2udq_128, INTR_TYPE_1OP_MASK,
X86ISD::CVTTP2UI, 0),
X86_INTRINSIC_DATA(avx512_mask_cvttpd2udq_256, INTR_TYPE_1OP_MASK,
- ISD::FP_TO_UINT, 0),
+ X86ISD::CVTTP2UI, 0),
X86_INTRINSIC_DATA(avx512_mask_cvttpd2udq_512, INTR_TYPE_1OP_MASK,
- ISD::FP_TO_UINT, X86ISD::CVTTP2UI_RND),
+ X86ISD::CVTTP2UI, X86ISD::CVTTP2UI_RND),
X86_INTRINSIC_DATA(avx512_mask_cvttpd2uqq_128, INTR_TYPE_1OP_MASK,
- ISD::FP_TO_UINT, 0),
+ X86ISD::CVTTP2UI, 0),
X86_INTRINSIC_DATA(avx512_mask_cvttpd2uqq_256, INTR_TYPE_1OP_MASK,
- ISD::FP_TO_UINT, 0),
+ X86ISD::CVTTP2UI, 0),
X86_INTRINSIC_DATA(avx512_mask_cvttpd2uqq_512, INTR_TYPE_1OP_MASK,
- ISD::FP_TO_UINT, X86ISD::CVTTP2UI_RND),
+ X86ISD::CVTTP2UI, X86ISD::CVTTP2UI_RND),
X86_INTRINSIC_DATA(avx512_mask_cvttps2dq_512, INTR_TYPE_1OP_MASK,
- ISD::FP_TO_SINT, X86ISD::CVTTP2SI_RND),
+ X86ISD::CVTTP2SI, X86ISD::CVTTP2SI_RND),
X86_INTRINSIC_DATA(avx512_mask_cvttps2qq_128, INTR_TYPE_1OP_MASK,
X86ISD::CVTTP2SI, 0),
X86_INTRINSIC_DATA(avx512_mask_cvttps2qq_256, INTR_TYPE_1OP_MASK,
- ISD::FP_TO_SINT, 0),
+ X86ISD::CVTTP2SI, 0),
X86_INTRINSIC_DATA(avx512_mask_cvttps2qq_512, INTR_TYPE_1OP_MASK,
- ISD::FP_TO_SINT, X86ISD::CVTTP2SI_RND),
+ X86ISD::CVTTP2SI, X86ISD::CVTTP2SI_RND),
X86_INTRINSIC_DATA(avx512_mask_cvttps2udq_128, INTR_TYPE_1OP_MASK,
- ISD::FP_TO_UINT, 0),
+ X86ISD::CVTTP2UI, 0),
X86_INTRINSIC_DATA(avx512_mask_cvttps2udq_256, INTR_TYPE_1OP_MASK,
- ISD::FP_TO_UINT, 0),
+ X86ISD::CVTTP2UI, 0),
X86_INTRINSIC_DATA(avx512_mask_cvttps2udq_512, INTR_TYPE_1OP_MASK,
- ISD::FP_TO_UINT, X86ISD::CVTTP2UI_RND),
+ X86ISD::CVTTP2UI, X86ISD::CVTTP2UI_RND),
X86_INTRINSIC_DATA(avx512_mask_cvttps2uqq_128, INTR_TYPE_1OP_MASK,
X86ISD::CVTTP2UI, 0),
X86_INTRINSIC_DATA(avx512_mask_cvttps2uqq_256, INTR_TYPE_1OP_MASK,
- ISD::FP_TO_UINT, 0),
+ X86ISD::CVTTP2UI, 0),
X86_INTRINSIC_DATA(avx512_mask_cvttps2uqq_512, INTR_TYPE_1OP_MASK,
- ISD::FP_TO_UINT, X86ISD::CVTTP2UI_RND),
+ X86ISD::CVTTP2UI, X86ISD::CVTTP2UI_RND),
X86_INTRINSIC_DATA(avx512_mask_cvtudq2ps_512, INTR_TYPE_1OP_MASK,
ISD::UINT_TO_FP, X86ISD::UINT_TO_FP_RND),
X86_INTRINSIC_DATA(avx512_mask_cvtuqq2pd_512, INTR_TYPE_1OP_MASK,
X86_INTRINSIC_DATA(sse2_cvtpd2ps, INTR_TYPE_1OP, X86ISD::VFPROUND, 0),
X86_INTRINSIC_DATA(sse2_cvtps2dq, INTR_TYPE_1OP, X86ISD::CVTP2SI, 0),
X86_INTRINSIC_DATA(sse2_cvttpd2dq, INTR_TYPE_1OP, X86ISD::CVTTP2SI, 0),
- X86_INTRINSIC_DATA(sse2_cvttps2dq, INTR_TYPE_1OP, ISD::FP_TO_SINT, 0),
+ X86_INTRINSIC_DATA(sse2_cvttps2dq, INTR_TYPE_1OP, X86ISD::CVTTP2SI, 0),
X86_INTRINSIC_DATA(sse2_max_pd, INTR_TYPE_2OP, X86ISD::FMAX, 0),
X86_INTRINSIC_DATA(sse2_max_sd, INTR_TYPE_2OP, X86ISD::FMAXS, 0),
X86_INTRINSIC_DATA(sse2_min_pd, INTR_TYPE_2OP, X86ISD::FMIN, 0),
define <8 x float> @float_to_int_to_float_mem_v8f32(<8 x float>* %p) {
; AVX-LABEL: float_to_int_to_float_mem_v8f32:
; AVX: # %bb.0:
-; AVX-NEXT: vroundps $11, (%rdi), %ymm0
+; AVX-NEXT: vcvttps2dq (%rdi), %ymm0
+; AVX-NEXT: vcvtdq2ps %ymm0, %ymm0
; AVX-NEXT: retq
%x = load <8 x float>, <8 x float>* %p, align 16
%fptosi = tail call <8 x i32> @llvm.x86.avx.cvtt.ps2dq.256(<8 x float> %x)
define <8 x float> @float_to_int_to_float_reg_v8f32(<8 x float> %x) {
; AVX-LABEL: float_to_int_to_float_reg_v8f32:
; AVX: # %bb.0:
-; AVX-NEXT: vroundps $11, %ymm0, %ymm0
+; AVX-NEXT: vcvttps2dq %ymm0, %ymm0
+; AVX-NEXT: vcvtdq2ps %ymm0, %ymm0
; AVX-NEXT: retq
%fptosi = tail call <8 x i32> @llvm.x86.avx.cvtt.ps2dq.256(<8 x float> %x)
%sitofp = sitofp <8 x i32> %fptosi to <8 x float>
define <4 x double> @float_to_int_to_float_mem_v4f64(<4 x double>* %p) {
; AVX-LABEL: float_to_int_to_float_mem_v4f64:
; AVX: # %bb.0:
-; AVX-NEXT: vroundpd $11, (%rdi), %ymm0
+; AVX-NEXT: vcvttpd2dqy (%rdi), %xmm0
+; AVX-NEXT: vcvtdq2pd %xmm0, %ymm0
; AVX-NEXT: retq
%x = load <4 x double>, <4 x double>* %p, align 16
%fptosi = tail call <4 x i32> @llvm.x86.avx.cvtt.pd2dq.256(<4 x double> %x)
define <4 x double> @float_to_int_to_float_reg_v4f64(<4 x double> %x) {
; AVX-LABEL: float_to_int_to_float_reg_v4f64:
; AVX: # %bb.0:
-; AVX-NEXT: vroundpd $11, %ymm0, %ymm0
+; AVX-NEXT: vcvttpd2dq %ymm0, %xmm0
+; AVX-NEXT: vcvtdq2pd %xmm0, %ymm0
; AVX-NEXT: retq
%fptosi = tail call <4 x i32> @llvm.x86.avx.cvtt.pd2dq.256(<4 x double> %x)
%sitofp = sitofp <4 x i32> %fptosi to <4 x double>
define <16 x float> @float_to_sint_to_float_mem_v16f32(<16 x float>* %p) {
; CHECK-LABEL: float_to_sint_to_float_mem_v16f32:
; CHECK: # %bb.0:
-; CHECK-NEXT: vrndscaleps $11, (%rdi), %zmm0
+; CHECK-NEXT: vcvttps2dq (%rdi), %zmm0
+; CHECK-NEXT: vcvtdq2ps %zmm0, %zmm0
; CHECK-NEXT: retq
%x = load <16 x float>, <16 x float>* %p
%fptosi = tail call <16 x i32> @llvm.x86.avx512.mask.cvttps2dq.512(<16 x float> %x, <16 x i32> undef, i16 -1, i32 4)
define <16 x float> @float_to_sint_to_float_reg_v16f32(<16 x float> %x) {
; CHECK-LABEL: float_to_sint_to_float_reg_v16f32:
; CHECK: # %bb.0:
-; CHECK-NEXT: vrndscaleps $11, %zmm0, %zmm0
+; CHECK-NEXT: vcvttps2dq %zmm0, %zmm0
+; CHECK-NEXT: vcvtdq2ps %zmm0, %zmm0
; CHECK-NEXT: retq
%fptosi = tail call <16 x i32> @llvm.x86.avx512.mask.cvttps2dq.512(<16 x float> %x, <16 x i32> undef, i16 -1, i32 4)
%sitofp = sitofp <16 x i32> %fptosi to <16 x float>
define <16 x float> @float_to_uint_to_float_mem_v16f32(<16 x float>* %p) {
; CHECK-LABEL: float_to_uint_to_float_mem_v16f32:
; CHECK: # %bb.0:
-; CHECK-NEXT: vrndscaleps $11, (%rdi), %zmm0
+; CHECK-NEXT: vcvttps2udq (%rdi), %zmm0
+; CHECK-NEXT: vcvtudq2ps %zmm0, %zmm0
; CHECK-NEXT: retq
%x = load <16 x float>, <16 x float>* %p
%fptoui = tail call <16 x i32> @llvm.x86.avx512.mask.cvttps2udq.512(<16 x float> %x, <16 x i32> undef, i16 -1, i32 4)
define <16 x float> @float_to_uint_to_float_reg_v16f32(<16 x float> %x) {
; CHECK-LABEL: float_to_uint_to_float_reg_v16f32:
; CHECK: # %bb.0:
-; CHECK-NEXT: vrndscaleps $11, %zmm0, %zmm0
+; CHECK-NEXT: vcvttps2udq %zmm0, %zmm0
+; CHECK-NEXT: vcvtudq2ps %zmm0, %zmm0
; CHECK-NEXT: retq
%fptoui = tail call <16 x i32> @llvm.x86.avx512.mask.cvttps2udq.512(<16 x float> %x, <16 x i32> undef, i16 -1, i32 4)
%uitofp = uitofp <16 x i32> %fptoui to <16 x float>
define <4 x float> @float_to_uint_to_float_mem_v4f32(<4 x float>* %p) {
; CHECK-LABEL: float_to_uint_to_float_mem_v4f32:
; CHECK: # %bb.0:
-; CHECK-NEXT: vroundps $11, (%rdi), %xmm0
+; CHECK-NEXT: vcvttps2udq (%rdi), %xmm0
+; CHECK-NEXT: vcvtudq2ps %xmm0, %xmm0
; CHECK-NEXT: retq
%x = load <4 x float>, <4 x float>* %p
%fptoui = tail call <4 x i32> @llvm.x86.avx512.mask.cvttps2udq.128(<4 x float> %x, <4 x i32> undef, i8 -1)
define <4 x float> @float_to_uint_to_float_reg_v4f32(<4 x float> %x) {
; CHECK-LABEL: float_to_uint_to_float_reg_v4f32:
; CHECK: # %bb.0:
-; CHECK-NEXT: vroundps $11, %xmm0, %xmm0
+; CHECK-NEXT: vcvttps2udq %xmm0, %xmm0
+; CHECK-NEXT: vcvtudq2ps %xmm0, %xmm0
; CHECK-NEXT: retq
%fptoui = tail call <4 x i32> @llvm.x86.avx512.mask.cvttps2udq.128(<4 x float> %x, <4 x i32> undef, i8 -1)
%uitofp = uitofp <4 x i32> %fptoui to <4 x float>
define <8 x float> @float_to_uint_to_float_mem_v8f32(<8 x float>* %p) {
; CHECK-LABEL: float_to_uint_to_float_mem_v8f32:
; CHECK: # %bb.0:
-; CHECK-NEXT: vroundps $11, (%rdi), %ymm0
+; CHECK-NEXT: vcvttps2udq (%rdi), %ymm0
+; CHECK-NEXT: vcvtudq2ps %ymm0, %ymm0
; CHECK-NEXT: retq
%x = load <8 x float>, <8 x float>* %p
%fptoui = tail call <8 x i32> @llvm.x86.avx512.mask.cvttps2udq.256(<8 x float> %x, <8 x i32> undef, i8 -1)
define <8 x float> @float_to_uint_to_float_reg_v8f32(<8 x float> %x) {
; CHECK-LABEL: float_to_uint_to_float_reg_v8f32:
; CHECK: # %bb.0:
-; CHECK-NEXT: vroundps $11, %ymm0, %ymm0
+; CHECK-NEXT: vcvttps2udq %ymm0, %ymm0
+; CHECK-NEXT: vcvtudq2ps %ymm0, %ymm0
; CHECK-NEXT: retq
%fptoui = tail call <8 x i32> @llvm.x86.avx512.mask.cvttps2udq.256(<8 x float> %x, <8 x i32> undef, i8 -1)
%uitofp = uitofp <8 x i32> %fptoui to <8 x float>
define <4 x double> @double_to_uint_to_double_mem_v4f64(<4 x double>* %p) {
; CHECK-LABEL: double_to_uint_to_double_mem_v4f64:
; CHECK: # %bb.0:
-; CHECK-NEXT: vroundpd $11, (%rdi), %ymm0
+; CHECK-NEXT: vcvttpd2udqy (%rdi), %xmm0
+; CHECK-NEXT: vcvtudq2pd %xmm0, %ymm0
; CHECK-NEXT: retq
%x = load <4 x double>, <4 x double>* %p
%fptoui = tail call <4 x i32> @llvm.x86.avx512.mask.cvttpd2udq.256(<4 x double> %x, <4 x i32> undef, i8 -1)
define <4 x double> @double_to_uint_to_double_reg_v4f64(<4 x double> %x) {
; CHECK-LABEL: double_to_uint_to_double_reg_v4f64:
; CHECK: # %bb.0:
-; CHECK-NEXT: vroundpd $11, %ymm0, %ymm0
+; CHECK-NEXT: vcvttpd2udq %ymm0, %xmm0
+; CHECK-NEXT: vcvtudq2pd %xmm0, %ymm0
; CHECK-NEXT: retq
%fptoui = tail call <4 x i32> @llvm.x86.avx512.mask.cvttpd2udq.256(<4 x double> %x, <4 x i32> undef, i8 -1)
%uitofp = uitofp <4 x i32> %fptoui to <4 x double>
define <8 x double> @double_to_sint_to_double_mem_v8f64(<8 x double>* %p) {
; CHECK-LABEL: double_to_sint_to_double_mem_v8f64:
; CHECK: # %bb.0:
-; CHECK-NEXT: vrndscalepd $11, (%rdi), %zmm0
+; CHECK-NEXT: vcvttpd2dq (%rdi), %ymm0
+; CHECK-NEXT: vcvtdq2pd %ymm0, %zmm0
; CHECK-NEXT: retq
%x = load <8 x double>, <8 x double>* %p
%fptosi = tail call <8 x i32> @llvm.x86.avx512.mask.cvttpd2dq.512(<8 x double> %x, <8 x i32> undef, i8 -1, i32 4)
define <8 x double> @double_to_sint_to_double_reg_v8f64(<8 x double> %x) {
; CHECK-LABEL: double_to_sint_to_double_reg_v8f64:
; CHECK: # %bb.0:
-; CHECK-NEXT: vrndscalepd $11, %zmm0, %zmm0
+; CHECK-NEXT: vcvttpd2dq %zmm0, %ymm0
+; CHECK-NEXT: vcvtdq2pd %ymm0, %zmm0
; CHECK-NEXT: retq
%fptosi = tail call <8 x i32> @llvm.x86.avx512.mask.cvttpd2dq.512(<8 x double> %x, <8 x i32> undef, i8 -1, i32 4)
%sitofp = sitofp <8 x i32> %fptosi to <8 x double>
define <8 x double> @double_to_uint_to_double_mem_v8f64(<8 x double>* %p) {
; CHECK-LABEL: double_to_uint_to_double_mem_v8f64:
; CHECK: # %bb.0:
-; CHECK-NEXT: vrndscalepd $11, (%rdi), %zmm0
+; CHECK-NEXT: vcvttpd2udq (%rdi), %ymm0
+; CHECK-NEXT: vcvtudq2pd %ymm0, %zmm0
; CHECK-NEXT: retq
%x = load <8 x double>, <8 x double>* %p
%fptoui = tail call <8 x i32> @llvm.x86.avx512.mask.cvttpd2udq.512(<8 x double> %x, <8 x i32> undef, i8 -1, i32 4)
define <8 x double> @double_to_uint_to_double_reg_v8f64(<8 x double> %x) {
; CHECK-LABEL: double_to_uint_to_double_reg_v8f64:
; CHECK: # %bb.0:
-; CHECK-NEXT: vrndscalepd $11, %zmm0, %zmm0
+; CHECK-NEXT: vcvttpd2udq %zmm0, %ymm0
+; CHECK-NEXT: vcvtudq2pd %ymm0, %zmm0
; CHECK-NEXT: retq
%fptoui = tail call <8 x i32> @llvm.x86.avx512.mask.cvttpd2udq.512(<8 x double> %x, <8 x i32> undef, i8 -1, i32 4)
%uitofp = uitofp <8 x i32> %fptoui to <8 x double>
define <4 x float> @float_to_sint64_to_float_mem_v4f32(<4 x float>* %p) {
; CHECK-LABEL: float_to_sint64_to_float_mem_v4f32:
; CHECK: # %bb.0:
-; CHECK-NEXT: vroundps $11, (%rdi), %xmm0
+; CHECK-NEXT: vcvttps2qq (%rdi), %ymm0
+; CHECK-NEXT: vcvtqq2ps %ymm0, %xmm0
+; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
%x = load <4 x float>, <4 x float>* %p
%fptosi = tail call <4 x i64> @llvm.x86.avx512.mask.cvttps2qq.256(<4 x float> %x, <4 x i64> undef, i8 -1)
define <4 x float> @float_to_sint64_to_float_reg_v4f32(<4 x float> %x) {
; CHECK-LABEL: float_to_sint64_to_float_reg_v4f32:
; CHECK: # %bb.0:
-; CHECK-NEXT: vroundps $11, %xmm0, %xmm0
+; CHECK-NEXT: vcvttps2qq %xmm0, %ymm0
+; CHECK-NEXT: vcvtqq2ps %ymm0, %xmm0
+; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
%fptosi = tail call <4 x i64> @llvm.x86.avx512.mask.cvttps2qq.256(<4 x float> %x, <4 x i64> undef, i8 -1)
%sitofp = sitofp <4 x i64> %fptosi to <4 x float>
define <4 x float> @float_to_uint64_to_float_mem_v4f32(<4 x float>* %p) {
; CHECK-LABEL: float_to_uint64_to_float_mem_v4f32:
; CHECK: # %bb.0:
-; CHECK-NEXT: vroundps $11, (%rdi), %xmm0
+; CHECK-NEXT: vcvttps2uqq (%rdi), %ymm0
+; CHECK-NEXT: vcvtuqq2ps %ymm0, %xmm0
+; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
%x = load <4 x float>, <4 x float>* %p
%fptoui = tail call <4 x i64> @llvm.x86.avx512.mask.cvttps2uqq.256(<4 x float> %x, <4 x i64> undef, i8 -1)
define <4 x float> @float_to_uint64_to_float_reg_v4f32(<4 x float> %x) {
; CHECK-LABEL: float_to_uint64_to_float_reg_v4f32:
; CHECK: # %bb.0:
-; CHECK-NEXT: vroundps $11, %xmm0, %xmm0
+; CHECK-NEXT: vcvttps2uqq %xmm0, %ymm0
+; CHECK-NEXT: vcvtuqq2ps %ymm0, %xmm0
+; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
%fptoui = tail call <4 x i64> @llvm.x86.avx512.mask.cvttps2uqq.256(<4 x float> %x, <4 x i64> undef, i8 -1)
%uitofp = uitofp <4 x i64> %fptoui to <4 x float>
define <8 x float> @float_to_sint64_to_float_mem_v8f32(<8 x float>* %p) {
; CHECK-LABEL: float_to_sint64_to_float_mem_v8f32:
; CHECK: # %bb.0:
-; CHECK-NEXT: vroundps $11, (%rdi), %ymm0
+; CHECK-NEXT: vcvttps2qq (%rdi), %zmm0
+; CHECK-NEXT: vcvtqq2ps %zmm0, %ymm0
; CHECK-NEXT: retq
%x = load <8 x float>, <8 x float>* %p
%fptosi = tail call <8 x i64> @llvm.x86.avx512.mask.cvttps2qq.512(<8 x float> %x, <8 x i64> undef, i8 -1, i32 4)
define <8 x float> @float_to_sint64_to_float_reg_v8f32(<8 x float> %x) {
; CHECK-LABEL: float_to_sint64_to_float_reg_v8f32:
; CHECK: # %bb.0:
-; CHECK-NEXT: vroundps $11, %ymm0, %ymm0
+; CHECK-NEXT: vcvttps2qq %ymm0, %zmm0
+; CHECK-NEXT: vcvtqq2ps %zmm0, %ymm0
; CHECK-NEXT: retq
%fptosi = tail call <8 x i64> @llvm.x86.avx512.mask.cvttps2qq.512(<8 x float> %x, <8 x i64> undef, i8 -1, i32 4)
%sitofp = sitofp <8 x i64> %fptosi to <8 x float>
define <8 x float> @float_to_uint64_to_float_mem_v8f32(<8 x float>* %p) {
; CHECK-LABEL: float_to_uint64_to_float_mem_v8f32:
; CHECK: # %bb.0:
-; CHECK-NEXT: vroundps $11, (%rdi), %ymm0
+; CHECK-NEXT: vcvttps2uqq (%rdi), %zmm0
+; CHECK-NEXT: vcvtuqq2ps %zmm0, %ymm0
; CHECK-NEXT: retq
%x = load <8 x float>, <8 x float>* %p
%fptoui = tail call <8 x i64> @llvm.x86.avx512.mask.cvttps2uqq.512(<8 x float> %x, <8 x i64> undef, i8 -1, i32 4)
define <8 x float> @float_to_uint64_to_float_reg_v8f32(<8 x float> %x) {
; CHECK-LABEL: float_to_uint64_to_float_reg_v8f32:
; CHECK: # %bb.0:
-; CHECK-NEXT: vroundps $11, %ymm0, %ymm0
+; CHECK-NEXT: vcvttps2uqq %ymm0, %zmm0
+; CHECK-NEXT: vcvtuqq2ps %zmm0, %ymm0
; CHECK-NEXT: retq
%fptoui = tail call <8 x i64> @llvm.x86.avx512.mask.cvttps2uqq.512(<8 x float> %x, <8 x i64> undef, i8 -1, i32 4)
%uitofp = uitofp <8 x i64> %fptoui to <8 x float>
define <2 x double> @double_to_sint64_to_double_mem_v2f64(<2 x double>* %p) {
; CHECK-LABEL: double_to_sint64_to_double_mem_v2f64:
; CHECK: # %bb.0:
-; CHECK-NEXT: vroundpd $11, (%rdi), %xmm0
+; CHECK-NEXT: vcvttpd2qq (%rdi), %xmm0
+; CHECK-NEXT: vcvtqq2pd %xmm0, %xmm0
; CHECK-NEXT: retq
%x = load <2 x double>, <2 x double>* %p
%fptosi = tail call <2 x i64> @llvm.x86.avx512.mask.cvttpd2qq.128(<2 x double> %x, <2 x i64> undef, i8 -1)
define <2 x double> @double_to_sint64_to_double_reg_v2f64(<2 x double> %x) {
; CHECK-LABEL: double_to_sint64_to_double_reg_v2f64:
; CHECK: # %bb.0:
-; CHECK-NEXT: vroundpd $11, %xmm0, %xmm0
+; CHECK-NEXT: vcvttpd2qq %xmm0, %xmm0
+; CHECK-NEXT: vcvtqq2pd %xmm0, %xmm0
; CHECK-NEXT: retq
%fptosi = tail call <2 x i64> @llvm.x86.avx512.mask.cvttpd2qq.128(<2 x double> %x, <2 x i64> undef, i8 -1)
%sitofp = sitofp <2 x i64> %fptosi to <2 x double>
define <2 x double> @double_to_uint64_to_double_mem_v2f64(<2 x double>* %p) {
; CHECK-LABEL: double_to_uint64_to_double_mem_v2f64:
; CHECK: # %bb.0:
-; CHECK-NEXT: vroundpd $11, (%rdi), %xmm0
+; CHECK-NEXT: vcvttpd2uqq (%rdi), %xmm0
+; CHECK-NEXT: vcvtuqq2pd %xmm0, %xmm0
; CHECK-NEXT: retq
%x = load <2 x double>, <2 x double>* %p
%fptoui = tail call <2 x i64> @llvm.x86.avx512.mask.cvttpd2uqq.128(<2 x double> %x, <2 x i64> undef, i8 -1)
define <2 x double> @double_to_uint64_to_double_reg_v2f64(<2 x double> %x) {
; CHECK-LABEL: double_to_uint64_to_double_reg_v2f64:
; CHECK: # %bb.0:
-; CHECK-NEXT: vroundpd $11, %xmm0, %xmm0
+; CHECK-NEXT: vcvttpd2uqq %xmm0, %xmm0
+; CHECK-NEXT: vcvtuqq2pd %xmm0, %xmm0
; CHECK-NEXT: retq
%fptoui = tail call <2 x i64> @llvm.x86.avx512.mask.cvttpd2uqq.128(<2 x double> %x, <2 x i64> undef, i8 -1)
%uitofp = uitofp <2 x i64> %fptoui to <2 x double>
define <4 x double> @double_to_sint64_to_double_mem_v4f64(<4 x double>* %p) {
; CHECK-LABEL: double_to_sint64_to_double_mem_v4f64:
; CHECK: # %bb.0:
-; CHECK-NEXT: vroundpd $11, (%rdi), %ymm0
+; CHECK-NEXT: vcvttpd2qq (%rdi), %ymm0
+; CHECK-NEXT: vcvtqq2pd %ymm0, %ymm0
; CHECK-NEXT: retq
%x = load <4 x double>, <4 x double>* %p
%fptosi = tail call <4 x i64> @llvm.x86.avx512.mask.cvttpd2qq.256(<4 x double> %x, <4 x i64> undef, i8 -1)
define <4 x double> @double_to_sint64_to_double_reg_v4f64(<4 x double> %x) {
; CHECK-LABEL: double_to_sint64_to_double_reg_v4f64:
; CHECK: # %bb.0:
-; CHECK-NEXT: vroundpd $11, %ymm0, %ymm0
+; CHECK-NEXT: vcvttpd2qq %ymm0, %ymm0
+; CHECK-NEXT: vcvtqq2pd %ymm0, %ymm0
; CHECK-NEXT: retq
%fptosi = tail call <4 x i64> @llvm.x86.avx512.mask.cvttpd2qq.256(<4 x double> %x, <4 x i64> undef, i8 -1)
%sitofp = sitofp <4 x i64> %fptosi to <4 x double>
define <4 x double> @double_to_uint64_to_double_mem_v4f64(<4 x double>* %p) {
; CHECK-LABEL: double_to_uint64_to_double_mem_v4f64:
; CHECK: # %bb.0:
-; CHECK-NEXT: vroundpd $11, (%rdi), %ymm0
+; CHECK-NEXT: vcvttpd2uqq (%rdi), %ymm0
+; CHECK-NEXT: vcvtuqq2pd %ymm0, %ymm0
; CHECK-NEXT: retq
%x = load <4 x double>, <4 x double>* %p
%fptoui = tail call <4 x i64> @llvm.x86.avx512.mask.cvttpd2uqq.256(<4 x double> %x, <4 x i64> undef, i8 -1)
define <4 x double> @double_to_uint64_to_double_reg_v4f64(<4 x double> %x) {
; CHECK-LABEL: double_to_uint64_to_double_reg_v4f64:
; CHECK: # %bb.0:
-; CHECK-NEXT: vroundpd $11, %ymm0, %ymm0
+; CHECK-NEXT: vcvttpd2uqq %ymm0, %ymm0
+; CHECK-NEXT: vcvtuqq2pd %ymm0, %ymm0
; CHECK-NEXT: retq
%fptoui = tail call <4 x i64> @llvm.x86.avx512.mask.cvttpd2uqq.256(<4 x double> %x, <4 x i64> undef, i8 -1)
%uitofp = uitofp <4 x i64> %fptoui to <4 x double>
define <8 x double> @double_to_sint64_to_double_mem_v8f64(<8 x double>* %p) {
; CHECK-LABEL: double_to_sint64_to_double_mem_v8f64:
; CHECK: # %bb.0:
-; CHECK-NEXT: vrndscalepd $11, (%rdi), %zmm0
+; CHECK-NEXT: vcvttpd2qq (%rdi), %zmm0
+; CHECK-NEXT: vcvtqq2pd %zmm0, %zmm0
; CHECK-NEXT: retq
%x = load <8 x double>, <8 x double>* %p
%fptosi = tail call <8 x i64> @llvm.x86.avx512.mask.cvttpd2qq.512(<8 x double> %x, <8 x i64> undef, i8 -1, i32 4)
define <8 x double> @double_to_sint64_to_double_reg_v8f64(<8 x double> %x) {
; CHECK-LABEL: double_to_sint64_to_double_reg_v8f64:
; CHECK: # %bb.0:
-; CHECK-NEXT: vrndscalepd $11, %zmm0, %zmm0
+; CHECK-NEXT: vcvttpd2qq %zmm0, %zmm0
+; CHECK-NEXT: vcvtqq2pd %zmm0, %zmm0
; CHECK-NEXT: retq
%fptosi = tail call <8 x i64> @llvm.x86.avx512.mask.cvttpd2qq.512(<8 x double> %x, <8 x i64> undef, i8 -1, i32 4)
%sitofp = sitofp <8 x i64> %fptosi to <8 x double>
define <8 x double> @double_to_uint64_to_double_mem_v8f64(<8 x double>* %p) {
; CHECK-LABEL: double_to_uint64_to_double_mem_v8f64:
; CHECK: # %bb.0:
-; CHECK-NEXT: vrndscalepd $11, (%rdi), %zmm0
+; CHECK-NEXT: vcvttpd2uqq (%rdi), %zmm0
+; CHECK-NEXT: vcvtuqq2pd %zmm0, %zmm0
; CHECK-NEXT: retq
%x = load <8 x double>, <8 x double>* %p
%fptoui = tail call <8 x i64> @llvm.x86.avx512.mask.cvttpd2uqq.512(<8 x double> %x, <8 x i64> undef, i8 -1, i32 4)
define <8 x double> @double_to_uint64_to_double_reg_v8f64(<8 x double> %x) {
; CHECK-LABEL: double_to_uint64_to_double_reg_v8f64:
; CHECK: # %bb.0:
-; CHECK-NEXT: vrndscalepd $11, %zmm0, %zmm0
+; CHECK-NEXT: vcvttpd2uqq %zmm0, %zmm0
+; CHECK-NEXT: vcvtuqq2pd %zmm0, %zmm0
; CHECK-NEXT: retq
%fptoui = tail call <8 x i64> @llvm.x86.avx512.mask.cvttpd2uqq.512(<8 x double> %x, <8 x i64> undef, i8 -1, i32 4)
%uitofp = uitofp <8 x i64> %fptoui to <8 x double>
define <4 x float> @float_to_int_to_float_mem_v4f32(<4 x float>* %p) {
; SSE-LABEL: float_to_int_to_float_mem_v4f32:
; SSE: # %bb.0:
-; SSE-NEXT: roundps $11, (%rdi), %xmm0
+; SSE-NEXT: cvttps2dq (%rdi), %xmm0
+; SSE-NEXT: cvtdq2ps %xmm0, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: float_to_int_to_float_mem_v4f32:
; AVX: # %bb.0:
-; AVX-NEXT: vroundps $11, (%rdi), %xmm0
+; AVX-NEXT: vcvttps2dq (%rdi), %xmm0
+; AVX-NEXT: vcvtdq2ps %xmm0, %xmm0
; AVX-NEXT: retq
%x = load <4 x float>, <4 x float>* %p, align 16
%fptosi = tail call <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float> %x)
define <4 x float> @float_to_int_to_float_reg_v4f32(<4 x float> %x) {
; SSE-LABEL: float_to_int_to_float_reg_v4f32:
; SSE: # %bb.0:
-; SSE-NEXT: roundps $11, %xmm0, %xmm0
+; SSE-NEXT: cvttps2dq %xmm0, %xmm0
+; SSE-NEXT: cvtdq2ps %xmm0, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: float_to_int_to_float_reg_v4f32:
; AVX: # %bb.0:
-; AVX-NEXT: vroundps $11, %xmm0, %xmm0
+; AVX-NEXT: vcvttps2dq %xmm0, %xmm0
+; AVX-NEXT: vcvtdq2ps %xmm0, %xmm0
; AVX-NEXT: retq
%fptosi = tail call <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float> %x)
%sitofp = sitofp <4 x i32> %fptosi to <4 x float>