From: Craig Topper Date: Sat, 12 Jan 2019 00:55:27 +0000 (+0000) Subject: [X86] Add ISD nodes for masked truncate so we can properly represent when the output... X-Git-Tag: llvmorg-8.0.0-rc1~369 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=abe6ef8d09d169da259e0f629df05a44d716dc7f;p=platform%2Fupstream%2Fllvm.git [X86] Add ISD nodes for masked truncate so we can properly represent when the output has more elements than the input due to needing to be 128 bits. We can't properly represent this with a vselect since the upper elements of the result are supposed to be zeroed regardless of the mask. This also reuses the new nodes even when the result type fits in 128 bits if the input is q/d and the result is w/b since vselect w/b using k-register condition isn't legal without avx512bw. Currently we're doing this even when avx512bw is enabled, but I might change that. This fixes some of PR34877 llvm-svn: 350985 --- diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index e074655..bd6a527 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -21385,10 +21385,6 @@ static SDValue getVectorMaskingNode(SDValue Op, SDValue Mask, case X86ISD::VPSHUFBITQMB: case X86ISD::VFPCLASS: return DAG.getNode(ISD::AND, dl, VT, Op, VMask); - case ISD::TRUNCATE: - case X86ISD::VTRUNC: - case X86ISD::VTRUNCS: - case X86ISD::VTRUNCUS: case X86ISD::CVTPS2PH: // We can't use ISD::VSELECT here because it is not always "Legal" // for the destination type. For example vpmovqb require only AVX512 @@ -22068,6 +22064,22 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SDValue Results[] = { SetCC, Res }; return DAG.getMergeValues(Results, dl); } + case TRUNCATE_TO_REG: { + SDValue Src = Op.getOperand(1); + SDValue PassThru = Op.getOperand(2); + SDValue Mask = Op.getOperand(3); + + if (isAllOnesConstant(Mask)) + return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, + Op.getValueType(), Src), + Mask, PassThru, Subtarget, DAG); + + MVT SrcVT = Src.getSimpleValueType(); + MVT MaskVT = MVT::getVectorVT(MVT::i1, SrcVT.getVectorNumElements()); + Mask = getMaskNode(Mask, MaskVT, Subtarget, DAG, dl); + return DAG.getNode(IntrData->Opc1, dl, Op.getValueType(), Src, PassThru, + Mask); + } default: break; } @@ -27172,6 +27184,9 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::VTRUNC: return "X86ISD::VTRUNC"; case X86ISD::VTRUNCS: return "X86ISD::VTRUNCS"; case X86ISD::VTRUNCUS: return "X86ISD::VTRUNCUS"; + case X86ISD::VMTRUNC: return "X86ISD::VMTRUNC"; + case X86ISD::VMTRUNCS: return "X86ISD::VMTRUNCS"; + case X86ISD::VMTRUNCUS: return "X86ISD::VMTRUNCUS"; case X86ISD::VTRUNCSTORES: return "X86ISD::VTRUNCSTORES"; case X86ISD::VTRUNCSTOREUS: return "X86ISD::VTRUNCSTOREUS"; case X86ISD::VMTRUNCSTORES: return "X86ISD::VMTRUNCSTORES"; diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h index 8968559..7e01590 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.h +++ b/llvm/lib/Target/X86/X86ISelLowering.h @@ -292,6 +292,12 @@ namespace llvm { // Vector integer truncate with unsigned/signed saturation. VTRUNCUS, VTRUNCS, + // Masked version of the above. Used when less than a 128-bit result is + // produced since the mask only applies to the lower elements and can't + // be represented by a select. + // SRC, PASSTHRU, MASK + VMTRUNC, VMTRUNCUS, VMTRUNCS, + // Vector FP extend. VFPEXT, VFPEXT_RND, VFPEXTS_RND, diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index 97c9eac..46cddee 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -9318,14 +9318,47 @@ defm : avx512_masked_scalar_imm; +def select_truncs : PatFrag<(ops node:$src, node:$src0, node:$mask), + (vselect node:$mask, + (X86vtruncs node:$src), node:$src0)>; +def select_truncus : PatFrag<(ops node:$src, node:$src0, node:$mask), + (vselect node:$mask, + (X86vtruncus node:$src), node:$src0)>; + multiclass avx512_trunc_common opc, string OpcodeStr, SDNode OpNode, + SDPatternOperator MaskNode, X86FoldableSchedWrite sched, X86VectorVTInfo SrcInfo, X86VectorVTInfo DestInfo, X86MemOperand x86memop> { - let ExeDomain = DestInfo.ExeDomain in - defm rr : AVX512_maskable, - EVEX, T8XS, Sched<[sched]>; + let ExeDomain = DestInfo.ExeDomain in { + def rr : AVX512XS8I, + EVEX, Sched<[sched]>; + let Constraints = "$src0 = $dst" in + def rrk : AVX512XS8I, + EVEX, EVEX_K, Sched<[sched]>; + def rrkz : AVX512XS8I, + EVEX, EVEX_KZ, Sched<[sched]>; + } let mayStore = 1, hasSideEffects = 0, ExeDomain = DestInfo.ExeDomain in { def mr : AVX512XS8I opc, string OpcodeStr, SDNode OpNode128, - SDNode OpNode256, SDNode OpNode512, X86FoldableSchedWrite sched, + SDNode OpNode256, SDNode OpNode512, + SDPatternOperator MaskNode128, + SDPatternOperator MaskNode256, + SDPatternOperator MaskNode512, + X86FoldableSchedWrite sched, AVX512VLVectorVTInfo VTSrcInfo, X86VectorVTInfo DestInfoZ128, X86VectorVTInfo DestInfoZ256, X86VectorVTInfo DestInfoZ, @@ -9365,118 +9402,167 @@ multiclass avx512_trunc opc, string OpcodeStr, SDNode OpNode128, PatFrag mtruncFrag, Predicate prd = HasAVX512>{ let Predicates = [HasVLX, prd] in { - defm Z128: avx512_trunc_common, avx512_trunc_mr_lowering, EVEX_V128; - defm Z256: avx512_trunc_common, avx512_trunc_mr_lowering, EVEX_V256; } let Predicates = [prd] in - defm Z: avx512_trunc_common, avx512_trunc_mr_lowering, EVEX_V512; } multiclass avx512_trunc_qb opc, string OpcodeStr, SDNode OpNode, + SDPatternOperator MaskNode, X86FoldableSchedWrite sched, PatFrag StoreNode, - PatFrag MaskedStoreNode, SDNode InVecNode = OpNode> { - defm NAME: avx512_trunc { + defm NAME: avx512_trunc, EVEX_CD8<8, CD8VO>; } multiclass avx512_trunc_qw opc, string OpcodeStr, SDNode OpNode, + SDPatternOperator MaskNode, X86FoldableSchedWrite sched, PatFrag StoreNode, - PatFrag MaskedStoreNode, SDNode InVecNode = OpNode> { - defm NAME: avx512_trunc { + defm NAME: avx512_trunc, EVEX_CD8<16, CD8VQ>; } multiclass avx512_trunc_qd opc, string OpcodeStr, SDNode OpNode, + SDPatternOperator MaskNode, X86FoldableSchedWrite sched, PatFrag StoreNode, - PatFrag MaskedStoreNode, SDNode InVecNode = OpNode> { - defm NAME: avx512_trunc { + defm NAME: avx512_trunc, EVEX_CD8<32, CD8VH>; } multiclass avx512_trunc_db opc, string OpcodeStr, SDNode OpNode, + SDPatternOperator MaskNode, X86FoldableSchedWrite sched, PatFrag StoreNode, - PatFrag MaskedStoreNode, SDNode InVecNode = OpNode> { - defm NAME: avx512_trunc { + defm NAME: avx512_trunc, EVEX_CD8<8, CD8VQ>; } multiclass avx512_trunc_dw opc, string OpcodeStr, SDNode OpNode, + SDPatternOperator MaskNode, X86FoldableSchedWrite sched, PatFrag StoreNode, - PatFrag MaskedStoreNode, SDNode InVecNode = OpNode> { - defm NAME: avx512_trunc { + defm NAME: avx512_trunc, EVEX_CD8<16, CD8VH>; } multiclass avx512_trunc_wb opc, string OpcodeStr, SDNode OpNode, + SDPatternOperator MaskNode, X86FoldableSchedWrite sched, PatFrag StoreNode, - PatFrag MaskedStoreNode, SDNode InVecNode = OpNode> { + PatFrag MaskedStoreNode, SDNode InVecNode, + SDPatternOperator InVecMaskNode> { defm NAME: avx512_trunc, EVEX_CD8<16, CD8VH>; } -defm VPMOVQB : avx512_trunc_qb<0x32, "vpmovqb", trunc, WriteShuffle256, - truncstorevi8, masked_truncstorevi8, X86vtrunc>; -defm VPMOVSQB : avx512_trunc_qb<0x22, "vpmovsqb", X86vtruncs, WriteShuffle256, - truncstore_s_vi8, masked_truncstore_s_vi8>; -defm VPMOVUSQB : avx512_trunc_qb<0x12, "vpmovusqb", X86vtruncus, WriteShuffle256, - truncstore_us_vi8, masked_truncstore_us_vi8>; - -defm VPMOVQW : avx512_trunc_qw<0x34, "vpmovqw", trunc, WriteShuffle256, - truncstorevi16, masked_truncstorevi16, X86vtrunc>; -defm VPMOVSQW : avx512_trunc_qw<0x24, "vpmovsqw", X86vtruncs, WriteShuffle256, - truncstore_s_vi16, masked_truncstore_s_vi16>; -defm VPMOVUSQW : avx512_trunc_qw<0x14, "vpmovusqw", X86vtruncus, WriteShuffle256, - truncstore_us_vi16, masked_truncstore_us_vi16>; - -defm VPMOVQD : avx512_trunc_qd<0x35, "vpmovqd", trunc, WriteShuffle256, - truncstorevi32, masked_truncstorevi32, X86vtrunc>; -defm VPMOVSQD : avx512_trunc_qd<0x25, "vpmovsqd", X86vtruncs, WriteShuffle256, - truncstore_s_vi32, masked_truncstore_s_vi32>; -defm VPMOVUSQD : avx512_trunc_qd<0x15, "vpmovusqd", X86vtruncus, WriteShuffle256, - truncstore_us_vi32, masked_truncstore_us_vi32>; - -defm VPMOVDB : avx512_trunc_db<0x31, "vpmovdb", trunc, WriteShuffle256, - truncstorevi8, masked_truncstorevi8, X86vtrunc>; -defm VPMOVSDB : avx512_trunc_db<0x21, "vpmovsdb", X86vtruncs, WriteShuffle256, - truncstore_s_vi8, masked_truncstore_s_vi8>; -defm VPMOVUSDB : avx512_trunc_db<0x11, "vpmovusdb", X86vtruncus, WriteShuffle256, - truncstore_us_vi8, masked_truncstore_us_vi8>; - -defm VPMOVDW : avx512_trunc_dw<0x33, "vpmovdw", trunc, WriteShuffle256, - truncstorevi16, masked_truncstorevi16, X86vtrunc>; -defm VPMOVSDW : avx512_trunc_dw<0x23, "vpmovsdw", X86vtruncs, WriteShuffle256, - truncstore_s_vi16, masked_truncstore_s_vi16>; -defm VPMOVUSDW : avx512_trunc_dw<0x13, "vpmovusdw", X86vtruncus, WriteShuffle256, - truncstore_us_vi16, masked_truncstore_us_vi16>; - -defm VPMOVWB : avx512_trunc_wb<0x30, "vpmovwb", trunc, WriteShuffle256, - truncstorevi8, masked_truncstorevi8, X86vtrunc>; -defm VPMOVSWB : avx512_trunc_wb<0x20, "vpmovswb", X86vtruncs, WriteShuffle256, - truncstore_s_vi8, masked_truncstore_s_vi8>; -defm VPMOVUSWB : avx512_trunc_wb<0x10, "vpmovuswb", X86vtruncus, WriteShuffle256, - truncstore_us_vi8, masked_truncstore_us_vi8>; +defm VPMOVQB : avx512_trunc_qb<0x32, "vpmovqb", trunc, select_trunc, + WriteShuffle256, truncstorevi8, + masked_truncstorevi8, X86vtrunc, X86vmtrunc>; +defm VPMOVSQB : avx512_trunc_qb<0x22, "vpmovsqb", X86vtruncs, select_truncs, + WriteShuffle256, truncstore_s_vi8, + masked_truncstore_s_vi8, X86vtruncs, + X86vmtruncs>; +defm VPMOVUSQB : avx512_trunc_qb<0x12, "vpmovusqb", X86vtruncus, + select_truncus, WriteShuffle256, + truncstore_us_vi8, masked_truncstore_us_vi8, + X86vtruncus, X86vmtruncus>; + +defm VPMOVQW : avx512_trunc_qw<0x34, "vpmovqw", trunc, select_trunc, + WriteShuffle256, truncstorevi16, + masked_truncstorevi16, X86vtrunc, X86vmtrunc>; +defm VPMOVSQW : avx512_trunc_qw<0x24, "vpmovsqw", X86vtruncs, select_truncs, + WriteShuffle256, truncstore_s_vi16, + masked_truncstore_s_vi16, X86vtruncs, + X86vmtruncs>; +defm VPMOVUSQW : avx512_trunc_qw<0x14, "vpmovusqw", X86vtruncus, + select_truncus, WriteShuffle256, + truncstore_us_vi16, masked_truncstore_us_vi16, + X86vtruncus, X86vmtruncus>; + +defm VPMOVQD : avx512_trunc_qd<0x35, "vpmovqd", trunc, select_trunc, + WriteShuffle256, truncstorevi32, + masked_truncstorevi32, X86vtrunc, X86vmtrunc>; +defm VPMOVSQD : avx512_trunc_qd<0x25, "vpmovsqd", X86vtruncs, select_truncs, + WriteShuffle256, truncstore_s_vi32, + masked_truncstore_s_vi32, X86vtruncs, + X86vmtruncs>; +defm VPMOVUSQD : avx512_trunc_qd<0x15, "vpmovusqd", X86vtruncus, + select_truncus, WriteShuffle256, + truncstore_us_vi32, masked_truncstore_us_vi32, + X86vtruncus, X86vmtruncus>; + +defm VPMOVDB : avx512_trunc_db<0x31, "vpmovdb", trunc, select_trunc, + WriteShuffle256, truncstorevi8, + masked_truncstorevi8, X86vtrunc, X86vmtrunc>; +defm VPMOVSDB : avx512_trunc_db<0x21, "vpmovsdb", X86vtruncs, select_truncs, + WriteShuffle256, truncstore_s_vi8, + masked_truncstore_s_vi8, X86vtruncs, + X86vmtruncs>; +defm VPMOVUSDB : avx512_trunc_db<0x11, "vpmovusdb", X86vtruncus, + select_truncus, WriteShuffle256, + truncstore_us_vi8, masked_truncstore_us_vi8, + X86vtruncus, X86vmtruncus>; + +defm VPMOVDW : avx512_trunc_dw<0x33, "vpmovdw", trunc, select_trunc, + WriteShuffle256, truncstorevi16, + masked_truncstorevi16, X86vtrunc, X86vmtrunc>; +defm VPMOVSDW : avx512_trunc_dw<0x23, "vpmovsdw", X86vtruncs, select_truncs, + WriteShuffle256, truncstore_s_vi16, + masked_truncstore_s_vi16, X86vtruncs, + X86vmtruncs>; +defm VPMOVUSDW : avx512_trunc_dw<0x13, "vpmovusdw", X86vtruncus, + select_truncus, WriteShuffle256, + truncstore_us_vi16, masked_truncstore_us_vi16, + X86vtruncus, X86vmtruncus>; + +defm VPMOVWB : avx512_trunc_wb<0x30, "vpmovwb", trunc, select_trunc, + WriteShuffle256, truncstorevi8, + masked_truncstorevi8, X86vtrunc, + X86vmtrunc>; +defm VPMOVSWB : avx512_trunc_wb<0x20, "vpmovswb", X86vtruncs, select_truncs, + WriteShuffle256, truncstore_s_vi8, + masked_truncstore_s_vi8, X86vtruncs, + X86vmtruncs>; +defm VPMOVUSWB : avx512_trunc_wb<0x10, "vpmovuswb", X86vtruncus, + select_truncus, WriteShuffle256, + truncstore_us_vi8, masked_truncstore_us_vi8, + X86vtruncus, X86vmtruncus>; let Predicates = [HasAVX512, NoVLX] in { def: Pat<(v8i16 (trunc (v8i32 VR256X:$src))), @@ -9495,6 +9581,44 @@ def: Pat<(v16i8 (trunc (v16i16 VR256X:$src))), VR256X:$src, sub_ymm))), sub_xmm))>; } +// Without BWI we can't use vXi16/vXi8 vselect so we have to use vmtrunc nodes. +multiclass mtrunc_lowering { + def : Pat<(DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src), + DestInfo.RC:$src0, + SrcInfo.KRCWM:$mask)), + (!cast(InstrName#"rrk") DestInfo.RC:$src0, + SrcInfo.KRCWM:$mask, + SrcInfo.RC:$src)>; + + def : Pat<(DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src), + DestInfo.ImmAllZerosV, + SrcInfo.KRCWM:$mask)), + (!cast(InstrName#"rrkz") SrcInfo.KRCWM:$mask, + SrcInfo.RC:$src)>; +} + +let Predicates = [HasVLX] in { +defm : mtrunc_lowering<"VPMOVDWZ256", X86vmtrunc, v8i16x_info, v8i32x_info>; +defm : mtrunc_lowering<"VPMOVSDWZ256", X86vmtruncs, v8i16x_info, v8i32x_info>; +defm : mtrunc_lowering<"VPMOVUSDWZ256", X86vmtruncus, v8i16x_info, v8i32x_info>; +} + +let Predicates = [HasAVX512] in { +defm : mtrunc_lowering<"VPMOVDWZ", X86vmtrunc, v16i16x_info, v16i32_info>; +defm : mtrunc_lowering<"VPMOVSDWZ", X86vmtruncs, v16i16x_info, v16i32_info>; +defm : mtrunc_lowering<"VPMOVUSDWZ", X86vmtruncus, v16i16x_info, v16i32_info>; + +defm : mtrunc_lowering<"VPMOVDBZ", X86vmtrunc, v16i8x_info, v16i32_info>; +defm : mtrunc_lowering<"VPMOVSDBZ", X86vmtruncs, v16i8x_info, v16i32_info>; +defm : mtrunc_lowering<"VPMOVUSDBZ", X86vmtruncus, v16i8x_info, v16i32_info>; + +defm : mtrunc_lowering<"VPMOVQWZ", X86vmtrunc, v8i16x_info, v8i64_info>; +defm : mtrunc_lowering<"VPMOVSQWZ", X86vmtruncs, v8i16x_info, v8i64_info>; +defm : mtrunc_lowering<"VPMOVUSQWZ", X86vmtruncus, v8i16x_info, v8i64_info>; +} + multiclass WriteShuffle256_common opc, string OpcodeStr, X86FoldableSchedWrite sched, X86VectorVTInfo DestInfo, X86VectorVTInfo SrcInfo, X86MemOperand x86memop, PatFrag LdFrag, SDNode OpNode>{ diff --git a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td index 9ba4611..d806472 100644 --- a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -104,12 +104,21 @@ def X86vzload : SDNode<"X86ISD::VZEXT_LOAD", SDTLoad, [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; def SDTVtrunc : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>, - SDTCisInt<0>, SDTCisInt<1>, - SDTCisOpSmallerThanOp<0, 1>]>; + SDTCisInt<0>, SDTCisInt<1>, + SDTCisOpSmallerThanOp<0, 1>]>; +def SDTVmtrunc : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisVec<1>, + SDTCisInt<0>, SDTCisInt<1>, + SDTCisOpSmallerThanOp<0, 1>, + SDTCisSameAs<0, 2>, + SDTCVecEltisVT<3, i1>, + SDTCisSameNumEltsAs<1, 3>]>; def X86vtrunc : SDNode<"X86ISD::VTRUNC", SDTVtrunc>; def X86vtruncs : SDNode<"X86ISD::VTRUNCS", SDTVtrunc>; def X86vtruncus : SDNode<"X86ISD::VTRUNCUS", SDTVtrunc>; +def X86vmtrunc : SDNode<"X86ISD::VMTRUNC", SDTVmtrunc>; +def X86vmtruncs : SDNode<"X86ISD::VMTRUNCS", SDTVmtrunc>; +def X86vmtruncus : SDNode<"X86ISD::VMTRUNCUS", SDTVmtrunc>; def X86vfpext : SDNode<"X86ISD::VFPEXT", SDTypeProfile<1, 1, [SDTCVecEltisVT<0, f64>, diff --git a/llvm/lib/Target/X86/X86IntrinsicsInfo.h b/llvm/lib/Target/X86/X86IntrinsicsInfo.h index b1c75f0..1c811a6 100644 --- a/llvm/lib/Target/X86/X86IntrinsicsInfo.h +++ b/llvm/lib/Target/X86/X86IntrinsicsInfo.h @@ -32,6 +32,7 @@ enum IntrinsicType : uint16_t { IFMA_OP, VPERM_2OP, INTR_TYPE_SCALAR_MASK, INTR_TYPE_SCALAR_MASK_RM, INTR_TYPE_3OP_SCALAR_MASK, COMPRESS_EXPAND_IN_REG, + TRUNCATE_TO_REG, TRUNCATE_TO_MEM_VI8, TRUNCATE_TO_MEM_VI16, TRUNCATE_TO_MEM_VI32, FIXUPIMM, FIXUPIMM_MASKZ, FIXUPIMMS, FIXUPIMMS_MASKZ, GATHER_AVX2, @@ -667,110 +668,110 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86ISD::FMULS_RND, 0), X86_INTRINSIC_DATA(avx512_mask_mul_ss_round, INTR_TYPE_SCALAR_MASK_RM, X86ISD::FMULS_RND, 0), - X86_INTRINSIC_DATA(avx512_mask_pmov_db_128, INTR_TYPE_1OP_MASK, - X86ISD::VTRUNC, 0), - X86_INTRINSIC_DATA(avx512_mask_pmov_db_256, INTR_TYPE_1OP_MASK, - X86ISD::VTRUNC, 0), - X86_INTRINSIC_DATA(avx512_mask_pmov_db_512, INTR_TYPE_1OP_MASK, - ISD::TRUNCATE, 0), - X86_INTRINSIC_DATA(avx512_mask_pmov_dw_128, INTR_TYPE_1OP_MASK, - X86ISD::VTRUNC, 0), - X86_INTRINSIC_DATA(avx512_mask_pmov_dw_256, INTR_TYPE_1OP_MASK, - ISD::TRUNCATE, 0), - X86_INTRINSIC_DATA(avx512_mask_pmov_dw_512, INTR_TYPE_1OP_MASK, - ISD::TRUNCATE, 0), - X86_INTRINSIC_DATA(avx512_mask_pmov_qb_128, INTR_TYPE_1OP_MASK, - X86ISD::VTRUNC, 0), - X86_INTRINSIC_DATA(avx512_mask_pmov_qb_256, INTR_TYPE_1OP_MASK, - X86ISD::VTRUNC, 0), - X86_INTRINSIC_DATA(avx512_mask_pmov_qb_512, INTR_TYPE_1OP_MASK, - X86ISD::VTRUNC, 0), - X86_INTRINSIC_DATA(avx512_mask_pmov_qd_128, INTR_TYPE_1OP_MASK, - X86ISD::VTRUNC, 0), + X86_INTRINSIC_DATA(avx512_mask_pmov_db_128, TRUNCATE_TO_REG, + X86ISD::VTRUNC, X86ISD::VMTRUNC), + X86_INTRINSIC_DATA(avx512_mask_pmov_db_256, TRUNCATE_TO_REG, + X86ISD::VTRUNC, X86ISD::VMTRUNC), + X86_INTRINSIC_DATA(avx512_mask_pmov_db_512, TRUNCATE_TO_REG, + ISD::TRUNCATE, X86ISD::VMTRUNC), + X86_INTRINSIC_DATA(avx512_mask_pmov_dw_128, TRUNCATE_TO_REG, + X86ISD::VTRUNC, X86ISD::VMTRUNC), + X86_INTRINSIC_DATA(avx512_mask_pmov_dw_256, TRUNCATE_TO_REG, + ISD::TRUNCATE, X86ISD::VMTRUNC), + X86_INTRINSIC_DATA(avx512_mask_pmov_dw_512, TRUNCATE_TO_REG, + ISD::TRUNCATE, X86ISD::VMTRUNC), + X86_INTRINSIC_DATA(avx512_mask_pmov_qb_128, TRUNCATE_TO_REG, + X86ISD::VTRUNC, X86ISD::VMTRUNC), + X86_INTRINSIC_DATA(avx512_mask_pmov_qb_256, TRUNCATE_TO_REG, + X86ISD::VTRUNC, X86ISD::VMTRUNC), + X86_INTRINSIC_DATA(avx512_mask_pmov_qb_512, TRUNCATE_TO_REG, + X86ISD::VTRUNC, X86ISD::VMTRUNC), + X86_INTRINSIC_DATA(avx512_mask_pmov_qd_128, TRUNCATE_TO_REG, + X86ISD::VTRUNC, X86ISD::VMTRUNC), X86_INTRINSIC_DATA(avx512_mask_pmov_qd_256, INTR_TYPE_1OP_MASK, ISD::TRUNCATE, 0), X86_INTRINSIC_DATA(avx512_mask_pmov_qd_512, INTR_TYPE_1OP_MASK, ISD::TRUNCATE, 0), - X86_INTRINSIC_DATA(avx512_mask_pmov_qw_128, INTR_TYPE_1OP_MASK, - X86ISD::VTRUNC, 0), - X86_INTRINSIC_DATA(avx512_mask_pmov_qw_256, INTR_TYPE_1OP_MASK, - X86ISD::VTRUNC, 0), - X86_INTRINSIC_DATA(avx512_mask_pmov_qw_512, INTR_TYPE_1OP_MASK, - ISD::TRUNCATE, 0), - X86_INTRINSIC_DATA(avx512_mask_pmov_wb_128, INTR_TYPE_1OP_MASK, - X86ISD::VTRUNC, 0), + X86_INTRINSIC_DATA(avx512_mask_pmov_qw_128, TRUNCATE_TO_REG, + X86ISD::VTRUNC, X86ISD::VMTRUNC), + X86_INTRINSIC_DATA(avx512_mask_pmov_qw_256, TRUNCATE_TO_REG, + X86ISD::VTRUNC, X86ISD::VMTRUNC), + X86_INTRINSIC_DATA(avx512_mask_pmov_qw_512, TRUNCATE_TO_REG, + ISD::TRUNCATE, X86ISD::VMTRUNC), + X86_INTRINSIC_DATA(avx512_mask_pmov_wb_128, TRUNCATE_TO_REG, + X86ISD::VTRUNC, X86ISD::VMTRUNC), X86_INTRINSIC_DATA(avx512_mask_pmov_wb_256, INTR_TYPE_1OP_MASK, ISD::TRUNCATE, 0), X86_INTRINSIC_DATA(avx512_mask_pmov_wb_512, INTR_TYPE_1OP_MASK, ISD::TRUNCATE, 0), - X86_INTRINSIC_DATA(avx512_mask_pmovs_db_128, INTR_TYPE_1OP_MASK, - X86ISD::VTRUNCS, 0), - X86_INTRINSIC_DATA(avx512_mask_pmovs_db_256, INTR_TYPE_1OP_MASK, - X86ISD::VTRUNCS, 0), - X86_INTRINSIC_DATA(avx512_mask_pmovs_db_512, INTR_TYPE_1OP_MASK, - X86ISD::VTRUNCS, 0), - X86_INTRINSIC_DATA(avx512_mask_pmovs_dw_128, INTR_TYPE_1OP_MASK, - X86ISD::VTRUNCS, 0), - X86_INTRINSIC_DATA(avx512_mask_pmovs_dw_256, INTR_TYPE_1OP_MASK, - X86ISD::VTRUNCS, 0), - X86_INTRINSIC_DATA(avx512_mask_pmovs_dw_512, INTR_TYPE_1OP_MASK, - X86ISD::VTRUNCS, 0), - X86_INTRINSIC_DATA(avx512_mask_pmovs_qb_128, INTR_TYPE_1OP_MASK, - X86ISD::VTRUNCS, 0), - X86_INTRINSIC_DATA(avx512_mask_pmovs_qb_256, INTR_TYPE_1OP_MASK, - X86ISD::VTRUNCS, 0), - X86_INTRINSIC_DATA(avx512_mask_pmovs_qb_512, INTR_TYPE_1OP_MASK, - X86ISD::VTRUNCS, 0), + X86_INTRINSIC_DATA(avx512_mask_pmovs_db_128, TRUNCATE_TO_REG, + X86ISD::VTRUNCS, X86ISD::VMTRUNCS), + X86_INTRINSIC_DATA(avx512_mask_pmovs_db_256, TRUNCATE_TO_REG, + X86ISD::VTRUNCS, X86ISD::VMTRUNCS), + X86_INTRINSIC_DATA(avx512_mask_pmovs_db_512, TRUNCATE_TO_REG, + X86ISD::VTRUNCS, X86ISD::VMTRUNCS), + X86_INTRINSIC_DATA(avx512_mask_pmovs_dw_128, TRUNCATE_TO_REG, + X86ISD::VTRUNCS, X86ISD::VMTRUNCS), + X86_INTRINSIC_DATA(avx512_mask_pmovs_dw_256, TRUNCATE_TO_REG, + X86ISD::VTRUNCS, X86ISD::VMTRUNCS), + X86_INTRINSIC_DATA(avx512_mask_pmovs_dw_512, TRUNCATE_TO_REG, + X86ISD::VTRUNCS, X86ISD::VMTRUNCS), + X86_INTRINSIC_DATA(avx512_mask_pmovs_qb_128, TRUNCATE_TO_REG, + X86ISD::VTRUNCS, X86ISD::VMTRUNCS), + X86_INTRINSIC_DATA(avx512_mask_pmovs_qb_256, TRUNCATE_TO_REG, + X86ISD::VTRUNCS, X86ISD::VMTRUNCS), + X86_INTRINSIC_DATA(avx512_mask_pmovs_qb_512, TRUNCATE_TO_REG, + X86ISD::VTRUNCS, X86ISD::VMTRUNCS), X86_INTRINSIC_DATA(avx512_mask_pmovs_qd_128, INTR_TYPE_1OP_MASK, X86ISD::VTRUNCS, 0), X86_INTRINSIC_DATA(avx512_mask_pmovs_qd_256, INTR_TYPE_1OP_MASK, X86ISD::VTRUNCS, 0), X86_INTRINSIC_DATA(avx512_mask_pmovs_qd_512, INTR_TYPE_1OP_MASK, X86ISD::VTRUNCS, 0), - X86_INTRINSIC_DATA(avx512_mask_pmovs_qw_128, INTR_TYPE_1OP_MASK, - X86ISD::VTRUNCS, 0), - X86_INTRINSIC_DATA(avx512_mask_pmovs_qw_256, INTR_TYPE_1OP_MASK, - X86ISD::VTRUNCS, 0), - X86_INTRINSIC_DATA(avx512_mask_pmovs_qw_512, INTR_TYPE_1OP_MASK, - X86ISD::VTRUNCS, 0), - X86_INTRINSIC_DATA(avx512_mask_pmovs_wb_128, INTR_TYPE_1OP_MASK, - X86ISD::VTRUNCS, 0), + X86_INTRINSIC_DATA(avx512_mask_pmovs_qw_128, TRUNCATE_TO_REG, + X86ISD::VTRUNCS, X86ISD::VMTRUNCS), + X86_INTRINSIC_DATA(avx512_mask_pmovs_qw_256, TRUNCATE_TO_REG, + X86ISD::VTRUNCS, X86ISD::VMTRUNCS), + X86_INTRINSIC_DATA(avx512_mask_pmovs_qw_512, TRUNCATE_TO_REG, + X86ISD::VTRUNCS, X86ISD::VMTRUNCS), + X86_INTRINSIC_DATA(avx512_mask_pmovs_wb_128, TRUNCATE_TO_REG, + X86ISD::VTRUNCS, X86ISD::VMTRUNCS), X86_INTRINSIC_DATA(avx512_mask_pmovs_wb_256, INTR_TYPE_1OP_MASK, X86ISD::VTRUNCS, 0), X86_INTRINSIC_DATA(avx512_mask_pmovs_wb_512, INTR_TYPE_1OP_MASK, X86ISD::VTRUNCS, 0), - X86_INTRINSIC_DATA(avx512_mask_pmovus_db_128, INTR_TYPE_1OP_MASK, - X86ISD::VTRUNCUS, 0), - X86_INTRINSIC_DATA(avx512_mask_pmovus_db_256, INTR_TYPE_1OP_MASK, - X86ISD::VTRUNCUS, 0), - X86_INTRINSIC_DATA(avx512_mask_pmovus_db_512, INTR_TYPE_1OP_MASK, - X86ISD::VTRUNCUS, 0), - X86_INTRINSIC_DATA(avx512_mask_pmovus_dw_128, INTR_TYPE_1OP_MASK, - X86ISD::VTRUNCUS, 0), - X86_INTRINSIC_DATA(avx512_mask_pmovus_dw_256, INTR_TYPE_1OP_MASK, - X86ISD::VTRUNCUS, 0), - X86_INTRINSIC_DATA(avx512_mask_pmovus_dw_512, INTR_TYPE_1OP_MASK, - X86ISD::VTRUNCUS, 0), - X86_INTRINSIC_DATA(avx512_mask_pmovus_qb_128, INTR_TYPE_1OP_MASK, - X86ISD::VTRUNCUS, 0), - X86_INTRINSIC_DATA(avx512_mask_pmovus_qb_256, INTR_TYPE_1OP_MASK, - X86ISD::VTRUNCUS, 0), - X86_INTRINSIC_DATA(avx512_mask_pmovus_qb_512, INTR_TYPE_1OP_MASK, - X86ISD::VTRUNCUS, 0), - X86_INTRINSIC_DATA(avx512_mask_pmovus_qd_128, INTR_TYPE_1OP_MASK, - X86ISD::VTRUNCUS, 0), + X86_INTRINSIC_DATA(avx512_mask_pmovus_db_128, TRUNCATE_TO_REG, + X86ISD::VTRUNCUS, X86ISD::VMTRUNCUS), + X86_INTRINSIC_DATA(avx512_mask_pmovus_db_256, TRUNCATE_TO_REG, + X86ISD::VTRUNCUS, X86ISD::VMTRUNCUS), + X86_INTRINSIC_DATA(avx512_mask_pmovus_db_512, TRUNCATE_TO_REG, + X86ISD::VTRUNCUS, X86ISD::VMTRUNCUS), + X86_INTRINSIC_DATA(avx512_mask_pmovus_dw_128, TRUNCATE_TO_REG, + X86ISD::VTRUNCUS, X86ISD::VMTRUNCUS), + X86_INTRINSIC_DATA(avx512_mask_pmovus_dw_256, TRUNCATE_TO_REG, + X86ISD::VTRUNCUS, X86ISD::VMTRUNCUS), + X86_INTRINSIC_DATA(avx512_mask_pmovus_dw_512, TRUNCATE_TO_REG, + X86ISD::VTRUNCUS, X86ISD::VMTRUNCUS), + X86_INTRINSIC_DATA(avx512_mask_pmovus_qb_128, TRUNCATE_TO_REG, + X86ISD::VTRUNCUS, X86ISD::VMTRUNCUS), + X86_INTRINSIC_DATA(avx512_mask_pmovus_qb_256, TRUNCATE_TO_REG, + X86ISD::VTRUNCUS, X86ISD::VMTRUNCUS), + X86_INTRINSIC_DATA(avx512_mask_pmovus_qb_512, TRUNCATE_TO_REG, + X86ISD::VTRUNCUS, X86ISD::VMTRUNCUS), + X86_INTRINSIC_DATA(avx512_mask_pmovus_qd_128, TRUNCATE_TO_REG, + X86ISD::VTRUNCUS, X86ISD::VMTRUNCUS), X86_INTRINSIC_DATA(avx512_mask_pmovus_qd_256, INTR_TYPE_1OP_MASK, X86ISD::VTRUNCUS, 0), X86_INTRINSIC_DATA(avx512_mask_pmovus_qd_512, INTR_TYPE_1OP_MASK, X86ISD::VTRUNCUS, 0), - X86_INTRINSIC_DATA(avx512_mask_pmovus_qw_128, INTR_TYPE_1OP_MASK, - X86ISD::VTRUNCUS, 0), - X86_INTRINSIC_DATA(avx512_mask_pmovus_qw_256, INTR_TYPE_1OP_MASK, - X86ISD::VTRUNCUS, 0), - X86_INTRINSIC_DATA(avx512_mask_pmovus_qw_512, INTR_TYPE_1OP_MASK, - X86ISD::VTRUNCUS, 0), - X86_INTRINSIC_DATA(avx512_mask_pmovus_wb_128, INTR_TYPE_1OP_MASK, - X86ISD::VTRUNCUS, 0), + X86_INTRINSIC_DATA(avx512_mask_pmovus_qw_128, TRUNCATE_TO_REG, + X86ISD::VTRUNCUS, X86ISD::VMTRUNCUS), + X86_INTRINSIC_DATA(avx512_mask_pmovus_qw_256, TRUNCATE_TO_REG, + X86ISD::VTRUNCUS, X86ISD::VMTRUNCUS), + X86_INTRINSIC_DATA(avx512_mask_pmovus_qw_512, TRUNCATE_TO_REG, + X86ISD::VTRUNCUS, X86ISD::VMTRUNCUS), + X86_INTRINSIC_DATA(avx512_mask_pmovus_wb_128, TRUNCATE_TO_REG, + X86ISD::VTRUNCUS, X86ISD::VMTRUNCUS), X86_INTRINSIC_DATA(avx512_mask_pmovus_wb_256, INTR_TYPE_1OP_MASK, X86ISD::VTRUNCUS, 0), X86_INTRINSIC_DATA(avx512_mask_pmovus_wb_512, INTR_TYPE_1OP_MASK, diff --git a/llvm/test/CodeGen/X86/avx512-intrinsics.ll b/llvm/test/CodeGen/X86/avx512-intrinsics.ll index 7c5b874..9fec17d 100644 --- a/llvm/test/CodeGen/X86/avx512-intrinsics.ll +++ b/llvm/test/CodeGen/X86/avx512-intrinsics.ll @@ -2603,11 +2603,11 @@ define <16 x i8>@test_int_x86_avx512_mask_pmov_qb_512(<8 x i64> %x0, <16 x i8> % ; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qb_512: ; CHECK: ## %bb.0: ; CHECK-NEXT: kmovw %edi, %k1 -; CHECK-NEXT: vpmovqb %zmm0, %xmm2 {%k1} {z} +; CHECK-NEXT: vpmovqb %zmm0, %xmm2 ; CHECK-NEXT: vpmovqb %zmm0, %xmm1 {%k1} -; CHECK-NEXT: vpaddb %xmm2, %xmm1, %xmm1 -; CHECK-NEXT: vpmovqb %zmm0, %xmm0 -; CHECK-NEXT: vpaddb %xmm1, %xmm0, %xmm0 +; CHECK-NEXT: vpmovqb %zmm0, %xmm0 {%k1} {z} +; CHECK-NEXT: vpaddb %xmm0, %xmm1, %xmm0 +; CHECK-NEXT: vpaddb %xmm0, %xmm2, %xmm0 ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmov.qb.512(<8 x i64> %x0, <16 x i8> %x1, i8 -1) @@ -2639,11 +2639,11 @@ define <16 x i8>@test_int_x86_avx512_mask_pmovs_qb_512(<8 x i64> %x0, <16 x i8> ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qb_512: ; CHECK: ## %bb.0: ; CHECK-NEXT: kmovw %edi, %k1 -; CHECK-NEXT: vpmovsqb %zmm0, %xmm2 {%k1} {z} +; CHECK-NEXT: vpmovsqb %zmm0, %xmm2 ; CHECK-NEXT: vpmovsqb %zmm0, %xmm1 {%k1} -; CHECK-NEXT: vpaddb %xmm2, %xmm1, %xmm1 -; CHECK-NEXT: vpmovsqb %zmm0, %xmm0 -; CHECK-NEXT: vpaddb %xmm1, %xmm0, %xmm0 +; CHECK-NEXT: vpmovsqb %zmm0, %xmm0 {%k1} {z} +; CHECK-NEXT: vpaddb %xmm0, %xmm1, %xmm0 +; CHECK-NEXT: vpaddb %xmm0, %xmm2, %xmm0 ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.512(<8 x i64> %x0, <16 x i8> %x1, i8 -1) @@ -2675,11 +2675,11 @@ define <16 x i8>@test_int_x86_avx512_mask_pmovus_qb_512(<8 x i64> %x0, <16 x i8> ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qb_512: ; CHECK: ## %bb.0: ; CHECK-NEXT: kmovw %edi, %k1 -; CHECK-NEXT: vpmovusqb %zmm0, %xmm2 {%k1} {z} +; CHECK-NEXT: vpmovusqb %zmm0, %xmm2 ; CHECK-NEXT: vpmovusqb %zmm0, %xmm1 {%k1} -; CHECK-NEXT: vpaddb %xmm2, %xmm1, %xmm1 -; CHECK-NEXT: vpmovusqb %zmm0, %xmm0 -; CHECK-NEXT: vpaddb %xmm1, %xmm0, %xmm0 +; CHECK-NEXT: vpmovusqb %zmm0, %xmm0 {%k1} {z} +; CHECK-NEXT: vpaddb %xmm0, %xmm1, %xmm0 +; CHECK-NEXT: vpaddb %xmm0, %xmm2, %xmm0 ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.512(<8 x i64> %x0, <16 x i8> %x1, i8 -1) @@ -2711,11 +2711,11 @@ define <8 x i16>@test_int_x86_avx512_mask_pmov_qw_512(<8 x i64> %x0, <8 x i16> % ; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qw_512: ; CHECK: ## %bb.0: ; CHECK-NEXT: kmovw %edi, %k1 -; CHECK-NEXT: vpmovqw %zmm0, %xmm2 {%k1} {z} +; CHECK-NEXT: vpmovqw %zmm0, %xmm2 ; CHECK-NEXT: vpmovqw %zmm0, %xmm1 {%k1} -; CHECK-NEXT: vpaddw %xmm2, %xmm1, %xmm1 -; CHECK-NEXT: vpmovqw %zmm0, %xmm0 -; CHECK-NEXT: vpaddw %xmm1, %xmm0, %xmm0 +; CHECK-NEXT: vpmovqw %zmm0, %xmm0 {%k1} {z} +; CHECK-NEXT: vpaddw %xmm0, %xmm1, %xmm0 +; CHECK-NEXT: vpaddw %xmm0, %xmm2, %xmm0 ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmov.qw.512(<8 x i64> %x0, <8 x i16> %x1, i8 -1) @@ -2747,11 +2747,11 @@ define <8 x i16>@test_int_x86_avx512_mask_pmovs_qw_512(<8 x i64> %x0, <8 x i16> ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qw_512: ; CHECK: ## %bb.0: ; CHECK-NEXT: kmovw %edi, %k1 -; CHECK-NEXT: vpmovsqw %zmm0, %xmm2 {%k1} {z} +; CHECK-NEXT: vpmovsqw %zmm0, %xmm2 ; CHECK-NEXT: vpmovsqw %zmm0, %xmm1 {%k1} -; CHECK-NEXT: vpaddw %xmm2, %xmm1, %xmm1 -; CHECK-NEXT: vpmovsqw %zmm0, %xmm0 -; CHECK-NEXT: vpaddw %xmm1, %xmm0, %xmm0 +; CHECK-NEXT: vpmovsqw %zmm0, %xmm0 {%k1} {z} +; CHECK-NEXT: vpaddw %xmm0, %xmm1, %xmm0 +; CHECK-NEXT: vpaddw %xmm0, %xmm2, %xmm0 ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.512(<8 x i64> %x0, <8 x i16> %x1, i8 -1) @@ -2783,11 +2783,11 @@ define <8 x i16>@test_int_x86_avx512_mask_pmovus_qw_512(<8 x i64> %x0, <8 x i16> ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qw_512: ; CHECK: ## %bb.0: ; CHECK-NEXT: kmovw %edi, %k1 -; CHECK-NEXT: vpmovusqw %zmm0, %xmm2 {%k1} {z} +; CHECK-NEXT: vpmovusqw %zmm0, %xmm2 ; CHECK-NEXT: vpmovusqw %zmm0, %xmm1 {%k1} -; CHECK-NEXT: vpaddw %xmm2, %xmm1, %xmm1 -; CHECK-NEXT: vpmovusqw %zmm0, %xmm0 -; CHECK-NEXT: vpaddw %xmm1, %xmm0, %xmm0 +; CHECK-NEXT: vpmovusqw %zmm0, %xmm0 {%k1} {z} +; CHECK-NEXT: vpaddw %xmm0, %xmm1, %xmm0 +; CHECK-NEXT: vpaddw %xmm0, %xmm2, %xmm0 ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.512(<8 x i64> %x0, <8 x i16> %x1, i8 -1) @@ -2924,11 +2924,11 @@ define <16 x i8>@test_int_x86_avx512_mask_pmov_db_512(<16 x i32> %x0, <16 x i8> ; CHECK-LABEL: test_int_x86_avx512_mask_pmov_db_512: ; CHECK: ## %bb.0: ; CHECK-NEXT: kmovw %edi, %k1 -; CHECK-NEXT: vpmovdb %zmm0, %xmm2 {%k1} {z} +; CHECK-NEXT: vpmovdb %zmm0, %xmm2 ; CHECK-NEXT: vpmovdb %zmm0, %xmm1 {%k1} -; CHECK-NEXT: vpaddb %xmm2, %xmm1, %xmm1 -; CHECK-NEXT: vpmovdb %zmm0, %xmm0 -; CHECK-NEXT: vpaddb %xmm1, %xmm0, %xmm0 +; CHECK-NEXT: vpmovdb %zmm0, %xmm0 {%k1} {z} +; CHECK-NEXT: vpaddb %xmm0, %xmm1, %xmm0 +; CHECK-NEXT: vpaddb %xmm0, %xmm2, %xmm0 ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmov.db.512(<16 x i32> %x0, <16 x i8> %x1, i16 -1) @@ -2960,11 +2960,11 @@ define <16 x i8>@test_int_x86_avx512_mask_pmovs_db_512(<16 x i32> %x0, <16 x i8> ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_db_512: ; CHECK: ## %bb.0: ; CHECK-NEXT: kmovw %edi, %k1 -; CHECK-NEXT: vpmovsdb %zmm0, %xmm2 {%k1} {z} +; CHECK-NEXT: vpmovsdb %zmm0, %xmm2 ; CHECK-NEXT: vpmovsdb %zmm0, %xmm1 {%k1} -; CHECK-NEXT: vpaddb %xmm2, %xmm1, %xmm1 -; CHECK-NEXT: vpmovsdb %zmm0, %xmm0 -; CHECK-NEXT: vpaddb %xmm1, %xmm0, %xmm0 +; CHECK-NEXT: vpmovsdb %zmm0, %xmm0 {%k1} {z} +; CHECK-NEXT: vpaddb %xmm0, %xmm1, %xmm0 +; CHECK-NEXT: vpaddb %xmm0, %xmm2, %xmm0 ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.db.512(<16 x i32> %x0, <16 x i8> %x1, i16 -1) @@ -2996,11 +2996,11 @@ define <16 x i8>@test_int_x86_avx512_mask_pmovus_db_512(<16 x i32> %x0, <16 x i8 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_db_512: ; CHECK: ## %bb.0: ; CHECK-NEXT: kmovw %edi, %k1 -; CHECK-NEXT: vpmovusdb %zmm0, %xmm2 {%k1} {z} +; CHECK-NEXT: vpmovusdb %zmm0, %xmm2 ; CHECK-NEXT: vpmovusdb %zmm0, %xmm1 {%k1} -; CHECK-NEXT: vpaddb %xmm2, %xmm1, %xmm1 -; CHECK-NEXT: vpmovusdb %zmm0, %xmm0 -; CHECK-NEXT: vpaddb %xmm1, %xmm0, %xmm0 +; CHECK-NEXT: vpmovusdb %zmm0, %xmm0 {%k1} {z} +; CHECK-NEXT: vpaddb %xmm0, %xmm1, %xmm0 +; CHECK-NEXT: vpaddb %xmm0, %xmm2, %xmm0 ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.db.512(<16 x i32> %x0, <16 x i8> %x1, i16 -1) @@ -3032,11 +3032,11 @@ define <16 x i16>@test_int_x86_avx512_mask_pmov_dw_512(<16 x i32> %x0, <16 x i16 ; CHECK-LABEL: test_int_x86_avx512_mask_pmov_dw_512: ; CHECK: ## %bb.0: ; CHECK-NEXT: kmovw %edi, %k1 -; CHECK-NEXT: vpmovdw %zmm0, %ymm2 {%k1} {z} +; CHECK-NEXT: vpmovdw %zmm0, %ymm2 ; CHECK-NEXT: vpmovdw %zmm0, %ymm1 {%k1} -; CHECK-NEXT: vpaddw %ymm2, %ymm1, %ymm1 -; CHECK-NEXT: vpmovdw %zmm0, %ymm0 -; CHECK-NEXT: vpaddw %ymm1, %ymm0, %ymm0 +; CHECK-NEXT: vpmovdw %zmm0, %ymm0 {%k1} {z} +; CHECK-NEXT: vpaddw %ymm0, %ymm1, %ymm0 +; CHECK-NEXT: vpaddw %ymm0, %ymm2, %ymm0 ; CHECK-NEXT: retq %res0 = call <16 x i16> @llvm.x86.avx512.mask.pmov.dw.512(<16 x i32> %x0, <16 x i16> %x1, i16 -1) %res1 = call <16 x i16> @llvm.x86.avx512.mask.pmov.dw.512(<16 x i32> %x0, <16 x i16> %x1, i16 %x2) @@ -3067,11 +3067,11 @@ define <16 x i16>@test_int_x86_avx512_mask_pmovs_dw_512(<16 x i32> %x0, <16 x i1 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_dw_512: ; CHECK: ## %bb.0: ; CHECK-NEXT: kmovw %edi, %k1 -; CHECK-NEXT: vpmovsdw %zmm0, %ymm2 {%k1} {z} +; CHECK-NEXT: vpmovsdw %zmm0, %ymm2 ; CHECK-NEXT: vpmovsdw %zmm0, %ymm1 {%k1} -; CHECK-NEXT: vpaddw %ymm2, %ymm1, %ymm1 -; CHECK-NEXT: vpmovsdw %zmm0, %ymm0 -; CHECK-NEXT: vpaddw %ymm1, %ymm0, %ymm0 +; CHECK-NEXT: vpmovsdw %zmm0, %ymm0 {%k1} {z} +; CHECK-NEXT: vpaddw %ymm0, %ymm1, %ymm0 +; CHECK-NEXT: vpaddw %ymm0, %ymm2, %ymm0 ; CHECK-NEXT: retq %res0 = call <16 x i16> @llvm.x86.avx512.mask.pmovs.dw.512(<16 x i32> %x0, <16 x i16> %x1, i16 -1) %res1 = call <16 x i16> @llvm.x86.avx512.mask.pmovs.dw.512(<16 x i32> %x0, <16 x i16> %x1, i16 %x2) @@ -3102,11 +3102,11 @@ define <16 x i16>@test_int_x86_avx512_mask_pmovus_dw_512(<16 x i32> %x0, <16 x i ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_dw_512: ; CHECK: ## %bb.0: ; CHECK-NEXT: kmovw %edi, %k1 -; CHECK-NEXT: vpmovusdw %zmm0, %ymm2 {%k1} {z} +; CHECK-NEXT: vpmovusdw %zmm0, %ymm2 ; CHECK-NEXT: vpmovusdw %zmm0, %ymm1 {%k1} -; CHECK-NEXT: vpaddw %ymm2, %ymm1, %ymm1 -; CHECK-NEXT: vpmovusdw %zmm0, %ymm0 -; CHECK-NEXT: vpaddw %ymm1, %ymm0, %ymm0 +; CHECK-NEXT: vpmovusdw %zmm0, %ymm0 {%k1} {z} +; CHECK-NEXT: vpaddw %ymm0, %ymm1, %ymm0 +; CHECK-NEXT: vpaddw %ymm0, %ymm2, %ymm0 ; CHECK-NEXT: retq %res0 = call <16 x i16> @llvm.x86.avx512.mask.pmovus.dw.512(<16 x i32> %x0, <16 x i16> %x1, i16 -1) %res1 = call <16 x i16> @llvm.x86.avx512.mask.pmovus.dw.512(<16 x i32> %x0, <16 x i16> %x1, i16 %x2) diff --git a/llvm/test/CodeGen/X86/avx512bwvl-intrinsics.ll b/llvm/test/CodeGen/X86/avx512bwvl-intrinsics.ll index 7c40918..215446f 100644 --- a/llvm/test/CodeGen/X86/avx512bwvl-intrinsics.ll +++ b/llvm/test/CodeGen/X86/avx512bwvl-intrinsics.ll @@ -1454,22 +1454,23 @@ declare <16 x i8> @llvm.x86.avx512.mask.pmov.wb.128(<8 x i16>, <16 x i8>, i8) define <16 x i8>@test_int_x86_avx512_mask_pmov_wb_128(<8 x i16> %x0, <16 x i8> %x1, i8 %x2) { ; X86-LABEL: test_int_x86_avx512_mask_pmov_wb_128: ; X86: # %bb.0: -; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] -; X86-NEXT: vpmovwb %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x30,0xc2] +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] +; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] +; X86-NEXT: vpmovwb %xmm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x08,0x30,0xc2] ; X86-NEXT: vpmovwb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x30,0xc1] -; X86-NEXT: vpaddb %xmm2, %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfc,0xca] -; X86-NEXT: vpmovwb %xmm0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x30,0xc0] -; X86-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfc,0xc1] +; X86-NEXT: vpmovwb %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x30,0xc0] +; X86-NEXT: vpaddb %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfc,0xc0] +; X86-NEXT: vpaddb %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfc,0xc0] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_pmov_wb_128: ; X64: # %bb.0: ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] -; X64-NEXT: vpmovwb %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x30,0xc2] +; X64-NEXT: vpmovwb %xmm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x08,0x30,0xc2] ; X64-NEXT: vpmovwb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x30,0xc1] -; X64-NEXT: vpaddb %xmm2, %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfc,0xca] -; X64-NEXT: vpmovwb %xmm0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x30,0xc0] -; X64-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfc,0xc1] +; X64-NEXT: vpmovwb %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x30,0xc0] +; X64-NEXT: vpaddb %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfc,0xc0] +; X64-NEXT: vpaddb %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfc,0xc0] ; X64-NEXT: retq # encoding: [0xc3] %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmov.wb.128(<8 x i16> %x0, <16 x i8> %x1, i8 -1) %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmov.wb.128(<8 x i16> %x0, <16 x i8> %x1, i8 %x2) @@ -1507,22 +1508,23 @@ declare <16 x i8> @llvm.x86.avx512.mask.pmovs.wb.128(<8 x i16>, <16 x i8>, i8) define <16 x i8>@test_int_x86_avx512_mask_pmovs_wb_128(<8 x i16> %x0, <16 x i8> %x1, i8 %x2) { ; X86-LABEL: test_int_x86_avx512_mask_pmovs_wb_128: ; X86: # %bb.0: -; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] -; X86-NEXT: vpmovswb %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x20,0xc2] +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] +; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] +; X86-NEXT: vpmovswb %xmm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x08,0x20,0xc2] ; X86-NEXT: vpmovswb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x20,0xc1] -; X86-NEXT: vpaddb %xmm2, %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfc,0xca] -; X86-NEXT: vpmovswb %xmm0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x20,0xc0] -; X86-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfc,0xc1] +; X86-NEXT: vpmovswb %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x20,0xc0] +; X86-NEXT: vpaddb %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfc,0xc0] +; X86-NEXT: vpaddb %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfc,0xc0] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_pmovs_wb_128: ; X64: # %bb.0: ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] -; X64-NEXT: vpmovswb %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x20,0xc2] +; X64-NEXT: vpmovswb %xmm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x08,0x20,0xc2] ; X64-NEXT: vpmovswb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x20,0xc1] -; X64-NEXT: vpaddb %xmm2, %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfc,0xca] -; X64-NEXT: vpmovswb %xmm0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x20,0xc0] -; X64-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfc,0xc1] +; X64-NEXT: vpmovswb %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x20,0xc0] +; X64-NEXT: vpaddb %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfc,0xc0] +; X64-NEXT: vpaddb %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfc,0xc0] ; X64-NEXT: retq # encoding: [0xc3] %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.wb.128(<8 x i16> %x0, <16 x i8> %x1, i8 -1) %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.wb.128(<8 x i16> %x0, <16 x i8> %x1, i8 %x2) @@ -1560,22 +1562,23 @@ declare <16 x i8> @llvm.x86.avx512.mask.pmovus.wb.128(<8 x i16>, <16 x i8>, i8) define <16 x i8>@test_int_x86_avx512_mask_pmovus_wb_128(<8 x i16> %x0, <16 x i8> %x1, i8 %x2) { ; X86-LABEL: test_int_x86_avx512_mask_pmovus_wb_128: ; X86: # %bb.0: -; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] -; X86-NEXT: vpmovuswb %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x10,0xc2] +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] +; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] +; X86-NEXT: vpmovuswb %xmm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x08,0x10,0xc2] ; X86-NEXT: vpmovuswb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x10,0xc1] -; X86-NEXT: vpaddb %xmm2, %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfc,0xca] -; X86-NEXT: vpmovuswb %xmm0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x10,0xc0] -; X86-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfc,0xc1] +; X86-NEXT: vpmovuswb %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x10,0xc0] +; X86-NEXT: vpaddb %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfc,0xc0] +; X86-NEXT: vpaddb %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfc,0xc0] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_pmovus_wb_128: ; X64: # %bb.0: ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] -; X64-NEXT: vpmovuswb %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x10,0xc2] +; X64-NEXT: vpmovuswb %xmm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x08,0x10,0xc2] ; X64-NEXT: vpmovuswb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x10,0xc1] -; X64-NEXT: vpaddb %xmm2, %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfc,0xca] -; X64-NEXT: vpmovuswb %xmm0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x10,0xc0] -; X64-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfc,0xc1] +; X64-NEXT: vpmovuswb %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x10,0xc0] +; X64-NEXT: vpaddb %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfc,0xc0] +; X64-NEXT: vpaddb %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfc,0xc0] ; X64-NEXT: retq # encoding: [0xc3] %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.wb.128(<8 x i16> %x0, <16 x i8> %x1, i8 -1) %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.wb.128(<8 x i16> %x0, <16 x i8> %x1, i8 %x2) diff --git a/llvm/test/CodeGen/X86/avx512vl-intrinsics.ll b/llvm/test/CodeGen/X86/avx512vl-intrinsics.ll index e903572..13d08d0 100644 --- a/llvm/test/CodeGen/X86/avx512vl-intrinsics.ll +++ b/llvm/test/CodeGen/X86/avx512vl-intrinsics.ll @@ -1617,23 +1617,23 @@ declare <16 x i8> @llvm.x86.avx512.mask.pmov.qb.128(<2 x i64>, <16 x i8>, i8) define <16 x i8>@test_int_x86_avx512_mask_pmov_qb_128(<2 x i64> %x0, <16 x i8> %x1, i8 %x2) { ; X86-LABEL: test_int_x86_avx512_mask_pmov_qb_128: ; X86: # %bb.0: -; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x04] +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] -; X86-NEXT: vpmovqb %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x32,0xc2] +; X86-NEXT: vpmovqb %xmm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x08,0x32,0xc2] ; X86-NEXT: vpmovqb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x32,0xc1] -; X86-NEXT: vpaddb %xmm2, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0xfc,0xca] -; X86-NEXT: vpmovqb %xmm0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x32,0xc0] -; X86-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfc,0xc1] +; X86-NEXT: vpmovqb %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x32,0xc0] +; X86-NEXT: vpaddb %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfc,0xc0] +; X86-NEXT: vpaddb %xmm0, %xmm2, %xmm0 # encoding: [0xc5,0xe9,0xfc,0xc0] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_pmov_qb_128: ; X64: # %bb.0: ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] -; X64-NEXT: vpmovqb %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x32,0xc2] +; X64-NEXT: vpmovqb %xmm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x08,0x32,0xc2] ; X64-NEXT: vpmovqb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x32,0xc1] -; X64-NEXT: vpaddb %xmm2, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0xfc,0xca] -; X64-NEXT: vpmovqb %xmm0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x32,0xc0] -; X64-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfc,0xc1] +; X64-NEXT: vpmovqb %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x32,0xc0] +; X64-NEXT: vpaddb %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfc,0xc0] +; X64-NEXT: vpaddb %xmm0, %xmm2, %xmm0 # encoding: [0xc5,0xe9,0xfc,0xc0] ; X64-NEXT: retq # encoding: [0xc3] %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmov.qb.128(<2 x i64> %x0, <16 x i8> %x1, i8 -1) %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmov.qb.128(<2 x i64> %x0, <16 x i8> %x1, i8 %x2) @@ -1671,23 +1671,23 @@ declare <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.128(<2 x i64>, <16 x i8>, i8) define <16 x i8>@test_int_x86_avx512_mask_pmovs_qb_128(<2 x i64> %x0, <16 x i8> %x1, i8 %x2) { ; X86-LABEL: test_int_x86_avx512_mask_pmovs_qb_128: ; X86: # %bb.0: -; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x04] +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] -; X86-NEXT: vpmovsqb %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x22,0xc2] +; X86-NEXT: vpmovsqb %xmm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x08,0x22,0xc2] ; X86-NEXT: vpmovsqb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x22,0xc1] -; X86-NEXT: vpaddb %xmm2, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0xfc,0xca] -; X86-NEXT: vpmovsqb %xmm0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x22,0xc0] -; X86-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfc,0xc1] +; X86-NEXT: vpmovsqb %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x22,0xc0] +; X86-NEXT: vpaddb %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfc,0xc0] +; X86-NEXT: vpaddb %xmm0, %xmm2, %xmm0 # encoding: [0xc5,0xe9,0xfc,0xc0] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_pmovs_qb_128: ; X64: # %bb.0: ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] -; X64-NEXT: vpmovsqb %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x22,0xc2] +; X64-NEXT: vpmovsqb %xmm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x08,0x22,0xc2] ; X64-NEXT: vpmovsqb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x22,0xc1] -; X64-NEXT: vpaddb %xmm2, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0xfc,0xca] -; X64-NEXT: vpmovsqb %xmm0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x22,0xc0] -; X64-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfc,0xc1] +; X64-NEXT: vpmovsqb %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x22,0xc0] +; X64-NEXT: vpaddb %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfc,0xc0] +; X64-NEXT: vpaddb %xmm0, %xmm2, %xmm0 # encoding: [0xc5,0xe9,0xfc,0xc0] ; X64-NEXT: retq # encoding: [0xc3] %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.128(<2 x i64> %x0, <16 x i8> %x1, i8 -1) %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.128(<2 x i64> %x0, <16 x i8> %x1, i8 %x2) @@ -1725,23 +1725,23 @@ declare <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.128(<2 x i64>, <16 x i8>, i8) define <16 x i8>@test_int_x86_avx512_mask_pmovus_qb_128(<2 x i64> %x0, <16 x i8> %x1, i8 %x2) { ; X86-LABEL: test_int_x86_avx512_mask_pmovus_qb_128: ; X86: # %bb.0: -; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x04] +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] -; X86-NEXT: vpmovusqb %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x12,0xc2] +; X86-NEXT: vpmovusqb %xmm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x08,0x12,0xc2] ; X86-NEXT: vpmovusqb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x12,0xc1] -; X86-NEXT: vpaddb %xmm2, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0xfc,0xca] -; X86-NEXT: vpmovusqb %xmm0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x12,0xc0] -; X86-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfc,0xc1] +; X86-NEXT: vpmovusqb %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x12,0xc0] +; X86-NEXT: vpaddb %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfc,0xc0] +; X86-NEXT: vpaddb %xmm0, %xmm2, %xmm0 # encoding: [0xc5,0xe9,0xfc,0xc0] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_pmovus_qb_128: ; X64: # %bb.0: ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] -; X64-NEXT: vpmovusqb %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x12,0xc2] +; X64-NEXT: vpmovusqb %xmm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x08,0x12,0xc2] ; X64-NEXT: vpmovusqb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x12,0xc1] -; X64-NEXT: vpaddb %xmm2, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0xfc,0xca] -; X64-NEXT: vpmovusqb %xmm0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x12,0xc0] -; X64-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfc,0xc1] +; X64-NEXT: vpmovusqb %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x12,0xc0] +; X64-NEXT: vpaddb %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfc,0xc0] +; X64-NEXT: vpaddb %xmm0, %xmm2, %xmm0 # encoding: [0xc5,0xe9,0xfc,0xc0] ; X64-NEXT: retq # encoding: [0xc3] %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.128(<2 x i64> %x0, <16 x i8> %x1, i8 -1) %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.128(<2 x i64> %x0, <16 x i8> %x1, i8 %x2) @@ -1779,24 +1779,24 @@ declare <16 x i8> @llvm.x86.avx512.mask.pmov.qb.256(<4 x i64>, <16 x i8>, i8) define <16 x i8>@test_int_x86_avx512_mask_pmov_qb_256(<4 x i64> %x0, <16 x i8> %x1, i8 %x2) { ; X86-LABEL: test_int_x86_avx512_mask_pmov_qb_256: ; X86: # %bb.0: -; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x04] +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] -; X86-NEXT: vpmovqb %ymm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x32,0xc2] +; X86-NEXT: vpmovqb %ymm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x28,0x32,0xc2] ; X86-NEXT: vpmovqb %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x32,0xc1] -; X86-NEXT: vpaddb %xmm2, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0xfc,0xca] -; X86-NEXT: vpmovqb %ymm0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x28,0x32,0xc0] -; X86-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfc,0xc1] +; X86-NEXT: vpmovqb %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x32,0xc0] +; X86-NEXT: vpaddb %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfc,0xc0] +; X86-NEXT: vpaddb %xmm0, %xmm2, %xmm0 # encoding: [0xc5,0xe9,0xfc,0xc0] ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_pmov_qb_256: ; X64: # %bb.0: ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] -; X64-NEXT: vpmovqb %ymm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x32,0xc2] +; X64-NEXT: vpmovqb %ymm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x28,0x32,0xc2] ; X64-NEXT: vpmovqb %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x32,0xc1] -; X64-NEXT: vpaddb %xmm2, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0xfc,0xca] -; X64-NEXT: vpmovqb %ymm0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x28,0x32,0xc0] -; X64-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfc,0xc1] +; X64-NEXT: vpmovqb %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x32,0xc0] +; X64-NEXT: vpaddb %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfc,0xc0] +; X64-NEXT: vpaddb %xmm0, %xmm2, %xmm0 # encoding: [0xc5,0xe9,0xfc,0xc0] ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] ; X64-NEXT: retq # encoding: [0xc3] %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmov.qb.256(<4 x i64> %x0, <16 x i8> %x1, i8 -1) @@ -1837,24 +1837,24 @@ declare <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.256(<4 x i64>, <16 x i8>, i8) define <16 x i8>@test_int_x86_avx512_mask_pmovs_qb_256(<4 x i64> %x0, <16 x i8> %x1, i8 %x2) { ; X86-LABEL: test_int_x86_avx512_mask_pmovs_qb_256: ; X86: # %bb.0: -; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x04] +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] -; X86-NEXT: vpmovsqb %ymm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x22,0xc2] +; X86-NEXT: vpmovsqb %ymm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x28,0x22,0xc2] ; X86-NEXT: vpmovsqb %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x22,0xc1] -; X86-NEXT: vpaddb %xmm2, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0xfc,0xca] -; X86-NEXT: vpmovsqb %ymm0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x28,0x22,0xc0] -; X86-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfc,0xc1] +; X86-NEXT: vpmovsqb %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x22,0xc0] +; X86-NEXT: vpaddb %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfc,0xc0] +; X86-NEXT: vpaddb %xmm0, %xmm2, %xmm0 # encoding: [0xc5,0xe9,0xfc,0xc0] ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_pmovs_qb_256: ; X64: # %bb.0: ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] -; X64-NEXT: vpmovsqb %ymm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x22,0xc2] +; X64-NEXT: vpmovsqb %ymm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x28,0x22,0xc2] ; X64-NEXT: vpmovsqb %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x22,0xc1] -; X64-NEXT: vpaddb %xmm2, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0xfc,0xca] -; X64-NEXT: vpmovsqb %ymm0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x28,0x22,0xc0] -; X64-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfc,0xc1] +; X64-NEXT: vpmovsqb %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x22,0xc0] +; X64-NEXT: vpaddb %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfc,0xc0] +; X64-NEXT: vpaddb %xmm0, %xmm2, %xmm0 # encoding: [0xc5,0xe9,0xfc,0xc0] ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] ; X64-NEXT: retq # encoding: [0xc3] %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.256(<4 x i64> %x0, <16 x i8> %x1, i8 -1) @@ -1895,24 +1895,24 @@ declare <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.256(<4 x i64>, <16 x i8>, i8) define <16 x i8>@test_int_x86_avx512_mask_pmovus_qb_256(<4 x i64> %x0, <16 x i8> %x1, i8 %x2) { ; X86-LABEL: test_int_x86_avx512_mask_pmovus_qb_256: ; X86: # %bb.0: -; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x04] +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] -; X86-NEXT: vpmovusqb %ymm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x12,0xc2] +; X86-NEXT: vpmovusqb %ymm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x28,0x12,0xc2] ; X86-NEXT: vpmovusqb %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x12,0xc1] -; X86-NEXT: vpaddb %xmm2, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0xfc,0xca] -; X86-NEXT: vpmovusqb %ymm0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x28,0x12,0xc0] -; X86-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfc,0xc1] +; X86-NEXT: vpmovusqb %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x12,0xc0] +; X86-NEXT: vpaddb %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfc,0xc0] +; X86-NEXT: vpaddb %xmm0, %xmm2, %xmm0 # encoding: [0xc5,0xe9,0xfc,0xc0] ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_pmovus_qb_256: ; X64: # %bb.0: ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] -; X64-NEXT: vpmovusqb %ymm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x12,0xc2] +; X64-NEXT: vpmovusqb %ymm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x28,0x12,0xc2] ; X64-NEXT: vpmovusqb %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x12,0xc1] -; X64-NEXT: vpaddb %xmm2, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0xfc,0xca] -; X64-NEXT: vpmovusqb %ymm0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x28,0x12,0xc0] -; X64-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfc,0xc1] +; X64-NEXT: vpmovusqb %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x12,0xc0] +; X64-NEXT: vpaddb %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfc,0xc0] +; X64-NEXT: vpaddb %xmm0, %xmm2, %xmm0 # encoding: [0xc5,0xe9,0xfc,0xc0] ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] ; X64-NEXT: retq # encoding: [0xc3] %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.256(<4 x i64> %x0, <16 x i8> %x1, i8 -1) @@ -1955,21 +1955,21 @@ define <8 x i16>@test_int_x86_avx512_mask_pmov_qw_128(<2 x i64> %x0, <8 x i16> % ; X86: # %bb.0: ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] +; X86-NEXT: vpmovqw %xmm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x08,0x34,0xc2] ; X86-NEXT: vpmovqw %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x34,0xc1] -; X86-NEXT: vpmovqw %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x34,0xc2] -; X86-NEXT: vpaddw %xmm2, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0xfd,0xca] -; X86-NEXT: vpmovqw %xmm0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x34,0xc0] -; X86-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfd,0xc1] +; X86-NEXT: vpmovqw %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x34,0xc0] +; X86-NEXT: vpaddw %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfd,0xc0] +; X86-NEXT: vpaddw %xmm0, %xmm2, %xmm0 # encoding: [0xc5,0xe9,0xfd,0xc0] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_pmov_qw_128: ; X64: # %bb.0: ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] -; X64-NEXT: vpmovqw %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x34,0xc2] +; X64-NEXT: vpmovqw %xmm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x08,0x34,0xc2] ; X64-NEXT: vpmovqw %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x34,0xc1] -; X64-NEXT: vpaddw %xmm2, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0xfd,0xca] -; X64-NEXT: vpmovqw %xmm0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x34,0xc0] -; X64-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfd,0xc1] +; X64-NEXT: vpmovqw %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x34,0xc0] +; X64-NEXT: vpaddw %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfd,0xc0] +; X64-NEXT: vpaddw %xmm0, %xmm2, %xmm0 # encoding: [0xc5,0xe9,0xfd,0xc0] ; X64-NEXT: retq # encoding: [0xc3] %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmov.qw.128(<2 x i64> %x0, <8 x i16> %x1, i8 -1) %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmov.qw.128(<2 x i64> %x0, <8 x i16> %x1, i8 %x2) @@ -2009,21 +2009,21 @@ define <8 x i16>@test_int_x86_avx512_mask_pmovs_qw_128(<2 x i64> %x0, <8 x i16> ; X86: # %bb.0: ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] +; X86-NEXT: vpmovsqw %xmm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x08,0x24,0xc2] ; X86-NEXT: vpmovsqw %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x24,0xc1] -; X86-NEXT: vpmovsqw %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x24,0xc2] -; X86-NEXT: vpaddw %xmm2, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0xfd,0xca] -; X86-NEXT: vpmovsqw %xmm0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x24,0xc0] -; X86-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfd,0xc1] +; X86-NEXT: vpmovsqw %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x24,0xc0] +; X86-NEXT: vpaddw %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfd,0xc0] +; X86-NEXT: vpaddw %xmm0, %xmm2, %xmm0 # encoding: [0xc5,0xe9,0xfd,0xc0] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_pmovs_qw_128: ; X64: # %bb.0: ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] -; X64-NEXT: vpmovsqw %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x24,0xc2] +; X64-NEXT: vpmovsqw %xmm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x08,0x24,0xc2] ; X64-NEXT: vpmovsqw %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x24,0xc1] -; X64-NEXT: vpaddw %xmm2, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0xfd,0xca] -; X64-NEXT: vpmovsqw %xmm0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x24,0xc0] -; X64-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfd,0xc1] +; X64-NEXT: vpmovsqw %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x24,0xc0] +; X64-NEXT: vpaddw %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfd,0xc0] +; X64-NEXT: vpaddw %xmm0, %xmm2, %xmm0 # encoding: [0xc5,0xe9,0xfd,0xc0] ; X64-NEXT: retq # encoding: [0xc3] %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.128(<2 x i64> %x0, <8 x i16> %x1, i8 -1) %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.128(<2 x i64> %x0, <8 x i16> %x1, i8 %x2) @@ -2063,21 +2063,21 @@ define <8 x i16>@test_int_x86_avx512_mask_pmovus_qw_128(<2 x i64> %x0, <8 x i16> ; X86: # %bb.0: ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] +; X86-NEXT: vpmovusqw %xmm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x08,0x14,0xc2] ; X86-NEXT: vpmovusqw %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x14,0xc1] -; X86-NEXT: vpmovusqw %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x14,0xc2] -; X86-NEXT: vpaddw %xmm2, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0xfd,0xca] -; X86-NEXT: vpmovusqw %xmm0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x14,0xc0] -; X86-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfd,0xc1] +; X86-NEXT: vpmovusqw %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x14,0xc0] +; X86-NEXT: vpaddw %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfd,0xc0] +; X86-NEXT: vpaddw %xmm0, %xmm2, %xmm0 # encoding: [0xc5,0xe9,0xfd,0xc0] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_pmovus_qw_128: ; X64: # %bb.0: ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] -; X64-NEXT: vpmovusqw %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x14,0xc2] +; X64-NEXT: vpmovusqw %xmm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x08,0x14,0xc2] ; X64-NEXT: vpmovusqw %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x14,0xc1] -; X64-NEXT: vpaddw %xmm2, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0xfd,0xca] -; X64-NEXT: vpmovusqw %xmm0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x14,0xc0] -; X64-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfd,0xc1] +; X64-NEXT: vpmovusqw %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x14,0xc0] +; X64-NEXT: vpaddw %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfd,0xc0] +; X64-NEXT: vpaddw %xmm0, %xmm2, %xmm0 # encoding: [0xc5,0xe9,0xfd,0xc0] ; X64-NEXT: retq # encoding: [0xc3] %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.128(<2 x i64> %x0, <8 x i16> %x1, i8 -1) %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.128(<2 x i64> %x0, <8 x i16> %x1, i8 %x2) @@ -2117,22 +2117,22 @@ define <8 x i16>@test_int_x86_avx512_mask_pmov_qw_256(<4 x i64> %x0, <8 x i16> % ; X86: # %bb.0: ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] +; X86-NEXT: vpmovqw %ymm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x28,0x34,0xc2] ; X86-NEXT: vpmovqw %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x34,0xc1] -; X86-NEXT: vpmovqw %ymm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x34,0xc2] -; X86-NEXT: vpaddw %xmm2, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0xfd,0xca] -; X86-NEXT: vpmovqw %ymm0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x28,0x34,0xc0] -; X86-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfd,0xc1] +; X86-NEXT: vpmovqw %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x34,0xc0] +; X86-NEXT: vpaddw %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfd,0xc0] +; X86-NEXT: vpaddw %xmm0, %xmm2, %xmm0 # encoding: [0xc5,0xe9,0xfd,0xc0] ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_pmov_qw_256: ; X64: # %bb.0: ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] -; X64-NEXT: vpmovqw %ymm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x34,0xc2] +; X64-NEXT: vpmovqw %ymm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x28,0x34,0xc2] ; X64-NEXT: vpmovqw %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x34,0xc1] -; X64-NEXT: vpaddw %xmm2, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0xfd,0xca] -; X64-NEXT: vpmovqw %ymm0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x28,0x34,0xc0] -; X64-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfd,0xc1] +; X64-NEXT: vpmovqw %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x34,0xc0] +; X64-NEXT: vpaddw %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfd,0xc0] +; X64-NEXT: vpaddw %xmm0, %xmm2, %xmm0 # encoding: [0xc5,0xe9,0xfd,0xc0] ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] ; X64-NEXT: retq # encoding: [0xc3] %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmov.qw.256(<4 x i64> %x0, <8 x i16> %x1, i8 -1) @@ -2175,22 +2175,22 @@ define <8 x i16>@test_int_x86_avx512_mask_pmovs_qw_256(<4 x i64> %x0, <8 x i16> ; X86: # %bb.0: ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] +; X86-NEXT: vpmovsqw %ymm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x28,0x24,0xc2] ; X86-NEXT: vpmovsqw %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x24,0xc1] -; X86-NEXT: vpmovsqw %ymm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x24,0xc2] -; X86-NEXT: vpaddw %xmm2, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0xfd,0xca] -; X86-NEXT: vpmovsqw %ymm0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x28,0x24,0xc0] -; X86-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfd,0xc1] +; X86-NEXT: vpmovsqw %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x24,0xc0] +; X86-NEXT: vpaddw %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfd,0xc0] +; X86-NEXT: vpaddw %xmm0, %xmm2, %xmm0 # encoding: [0xc5,0xe9,0xfd,0xc0] ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_pmovs_qw_256: ; X64: # %bb.0: ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] -; X64-NEXT: vpmovsqw %ymm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x24,0xc2] +; X64-NEXT: vpmovsqw %ymm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x28,0x24,0xc2] ; X64-NEXT: vpmovsqw %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x24,0xc1] -; X64-NEXT: vpaddw %xmm2, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0xfd,0xca] -; X64-NEXT: vpmovsqw %ymm0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x28,0x24,0xc0] -; X64-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfd,0xc1] +; X64-NEXT: vpmovsqw %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x24,0xc0] +; X64-NEXT: vpaddw %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfd,0xc0] +; X64-NEXT: vpaddw %xmm0, %xmm2, %xmm0 # encoding: [0xc5,0xe9,0xfd,0xc0] ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] ; X64-NEXT: retq # encoding: [0xc3] %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.256(<4 x i64> %x0, <8 x i16> %x1, i8 -1) @@ -2233,22 +2233,22 @@ define <8 x i16>@test_int_x86_avx512_mask_pmovus_qw_256(<4 x i64> %x0, <8 x i16> ; X86: # %bb.0: ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] +; X86-NEXT: vpmovusqw %ymm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x28,0x14,0xc2] ; X86-NEXT: vpmovusqw %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x14,0xc1] -; X86-NEXT: vpmovusqw %ymm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x14,0xc2] -; X86-NEXT: vpaddw %xmm2, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0xfd,0xca] -; X86-NEXT: vpmovusqw %ymm0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x28,0x14,0xc0] -; X86-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfd,0xc1] +; X86-NEXT: vpmovusqw %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x14,0xc0] +; X86-NEXT: vpaddw %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfd,0xc0] +; X86-NEXT: vpaddw %xmm0, %xmm2, %xmm0 # encoding: [0xc5,0xe9,0xfd,0xc0] ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_pmovus_qw_256: ; X64: # %bb.0: ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] -; X64-NEXT: vpmovusqw %ymm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x14,0xc2] +; X64-NEXT: vpmovusqw %ymm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x28,0x14,0xc2] ; X64-NEXT: vpmovusqw %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x14,0xc1] -; X64-NEXT: vpaddw %xmm2, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0xfd,0xca] -; X64-NEXT: vpmovusqw %ymm0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x28,0x14,0xc0] -; X64-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfd,0xc1] +; X64-NEXT: vpmovusqw %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x14,0xc0] +; X64-NEXT: vpaddw %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfd,0xc0] +; X64-NEXT: vpaddw %xmm0, %xmm2, %xmm0 # encoding: [0xc5,0xe9,0xfd,0xc0] ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] ; X64-NEXT: retq # encoding: [0xc3] %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.256(<4 x i64> %x0, <8 x i16> %x1, i8 -1) @@ -2291,21 +2291,21 @@ define <4 x i32>@test_int_x86_avx512_mask_pmov_qd_128(<2 x i64> %x0, <4 x i32> % ; X86: # %bb.0: ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] +; X86-NEXT: vpmovqd %xmm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x08,0x35,0xc2] ; X86-NEXT: vpmovqd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x35,0xc1] -; X86-NEXT: vpmovqd %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x35,0xc2] -; X86-NEXT: vpaddd %xmm2, %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xca] -; X86-NEXT: vpmovqd %xmm0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x35,0xc0] -; X86-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc1] +; X86-NEXT: vpmovqd %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x35,0xc0] +; X86-NEXT: vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0] +; X86-NEXT: vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_pmov_qd_128: ; X64: # %bb.0: ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] -; X64-NEXT: vpmovqd %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x35,0xc2] +; X64-NEXT: vpmovqd %xmm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x08,0x35,0xc2] ; X64-NEXT: vpmovqd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x35,0xc1] -; X64-NEXT: vpaddd %xmm2, %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xca] -; X64-NEXT: vpmovqd %xmm0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x35,0xc0] -; X64-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc1] +; X64-NEXT: vpmovqd %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x35,0xc0] +; X64-NEXT: vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0] +; X64-NEXT: vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0] ; X64-NEXT: retq # encoding: [0xc3] %res0 = call <4 x i32> @llvm.x86.avx512.mask.pmov.qd.128(<2 x i64> %x0, <4 x i32> %x1, i8 -1) %res1 = call <4 x i32> @llvm.x86.avx512.mask.pmov.qd.128(<2 x i64> %x0, <4 x i32> %x1, i8 %x2) @@ -2345,20 +2345,20 @@ define <4 x i32>@test_int_x86_avx512_mask_pmovs_qd_128(<2 x i64> %x0, <4 x i32> ; X86: # %bb.0: ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] -; X86-NEXT: vpmovsqd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x25,0xc1] -; X86-NEXT: vpmovsqd %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x25,0xc2] -; X86-NEXT: vpaddd %xmm2, %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xca] ; X86-NEXT: vpmovsqd %xmm0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x25,0xc0] +; X86-NEXT: vmovdqa32 %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x6f,0xc8] +; X86-NEXT: vmovdqa32 %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x6f,0xd0] +; X86-NEXT: vpaddd %xmm2, %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xca] ; X86-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc1] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_pmovs_qd_128: ; X64: # %bb.0: ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] -; X64-NEXT: vpmovsqd %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x25,0xc2] -; X64-NEXT: vpmovsqd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x25,0xc1] -; X64-NEXT: vpaddd %xmm2, %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xca] ; X64-NEXT: vpmovsqd %xmm0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x25,0xc0] +; X64-NEXT: vmovdqa32 %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x6f,0xd0] +; X64-NEXT: vmovdqa32 %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x6f,0xc8] +; X64-NEXT: vpaddd %xmm2, %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xca] ; X64-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc1] ; X64-NEXT: retq # encoding: [0xc3] %res0 = call <4 x i32> @llvm.x86.avx512.mask.pmovs.qd.128(<2 x i64> %x0, <4 x i32> %x1, i8 -1) @@ -2399,21 +2399,21 @@ define <4 x i32>@test_int_x86_avx512_mask_pmovus_qd_128(<2 x i64> %x0, <4 x i32> ; X86: # %bb.0: ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] +; X86-NEXT: vpmovusqd %xmm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x08,0x15,0xc2] ; X86-NEXT: vpmovusqd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x15,0xc1] -; X86-NEXT: vpmovusqd %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x15,0xc2] -; X86-NEXT: vpaddd %xmm2, %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xca] -; X86-NEXT: vpmovusqd %xmm0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x15,0xc0] -; X86-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc1] +; X86-NEXT: vpmovusqd %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x15,0xc0] +; X86-NEXT: vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0] +; X86-NEXT: vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_pmovus_qd_128: ; X64: # %bb.0: ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] -; X64-NEXT: vpmovusqd %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x15,0xc2] +; X64-NEXT: vpmovusqd %xmm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x08,0x15,0xc2] ; X64-NEXT: vpmovusqd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x15,0xc1] -; X64-NEXT: vpaddd %xmm2, %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xca] -; X64-NEXT: vpmovusqd %xmm0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x15,0xc0] -; X64-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc1] +; X64-NEXT: vpmovusqd %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x15,0xc0] +; X64-NEXT: vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0] +; X64-NEXT: vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0] ; X64-NEXT: retq # encoding: [0xc3] %res0 = call <4 x i32> @llvm.x86.avx512.mask.pmovus.qd.128(<2 x i64> %x0, <4 x i32> %x1, i8 -1) %res1 = call <4 x i32> @llvm.x86.avx512.mask.pmovus.qd.128(<2 x i64> %x0, <4 x i32> %x1, i8 %x2) @@ -2625,23 +2625,23 @@ declare <16 x i8> @llvm.x86.avx512.mask.pmov.db.128(<4 x i32>, <16 x i8>, i8) define <16 x i8>@test_int_x86_avx512_mask_pmov_db_128(<4 x i32> %x0, <16 x i8> %x1, i8 %x2) { ; X86-LABEL: test_int_x86_avx512_mask_pmov_db_128: ; X86: # %bb.0: -; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x04] +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] -; X86-NEXT: vpmovdb %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x31,0xc2] +; X86-NEXT: vpmovdb %xmm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x08,0x31,0xc2] ; X86-NEXT: vpmovdb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x31,0xc1] -; X86-NEXT: vpaddb %xmm2, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0xfc,0xca] -; X86-NEXT: vpmovdb %xmm0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x31,0xc0] -; X86-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfc,0xc1] +; X86-NEXT: vpmovdb %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x31,0xc0] +; X86-NEXT: vpaddb %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfc,0xc0] +; X86-NEXT: vpaddb %xmm0, %xmm2, %xmm0 # encoding: [0xc5,0xe9,0xfc,0xc0] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_pmov_db_128: ; X64: # %bb.0: ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] -; X64-NEXT: vpmovdb %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x31,0xc2] +; X64-NEXT: vpmovdb %xmm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x08,0x31,0xc2] ; X64-NEXT: vpmovdb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x31,0xc1] -; X64-NEXT: vpaddb %xmm2, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0xfc,0xca] -; X64-NEXT: vpmovdb %xmm0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x31,0xc0] -; X64-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfc,0xc1] +; X64-NEXT: vpmovdb %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x31,0xc0] +; X64-NEXT: vpaddb %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfc,0xc0] +; X64-NEXT: vpaddb %xmm0, %xmm2, %xmm0 # encoding: [0xc5,0xe9,0xfc,0xc0] ; X64-NEXT: retq # encoding: [0xc3] %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmov.db.128(<4 x i32> %x0, <16 x i8> %x1, i8 -1) %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmov.db.128(<4 x i32> %x0, <16 x i8> %x1, i8 %x2) @@ -2679,23 +2679,23 @@ declare <16 x i8> @llvm.x86.avx512.mask.pmovs.db.128(<4 x i32>, <16 x i8>, i8) define <16 x i8>@test_int_x86_avx512_mask_pmovs_db_128(<4 x i32> %x0, <16 x i8> %x1, i8 %x2) { ; X86-LABEL: test_int_x86_avx512_mask_pmovs_db_128: ; X86: # %bb.0: -; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x04] +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] -; X86-NEXT: vpmovsdb %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x21,0xc2] +; X86-NEXT: vpmovsdb %xmm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x08,0x21,0xc2] ; X86-NEXT: vpmovsdb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x21,0xc1] -; X86-NEXT: vpaddb %xmm2, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0xfc,0xca] -; X86-NEXT: vpmovsdb %xmm0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x21,0xc0] -; X86-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfc,0xc1] +; X86-NEXT: vpmovsdb %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x21,0xc0] +; X86-NEXT: vpaddb %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfc,0xc0] +; X86-NEXT: vpaddb %xmm0, %xmm2, %xmm0 # encoding: [0xc5,0xe9,0xfc,0xc0] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_pmovs_db_128: ; X64: # %bb.0: ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] -; X64-NEXT: vpmovsdb %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x21,0xc2] +; X64-NEXT: vpmovsdb %xmm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x08,0x21,0xc2] ; X64-NEXT: vpmovsdb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x21,0xc1] -; X64-NEXT: vpaddb %xmm2, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0xfc,0xca] -; X64-NEXT: vpmovsdb %xmm0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x21,0xc0] -; X64-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfc,0xc1] +; X64-NEXT: vpmovsdb %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x21,0xc0] +; X64-NEXT: vpaddb %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfc,0xc0] +; X64-NEXT: vpaddb %xmm0, %xmm2, %xmm0 # encoding: [0xc5,0xe9,0xfc,0xc0] ; X64-NEXT: retq # encoding: [0xc3] %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.db.128(<4 x i32> %x0, <16 x i8> %x1, i8 -1) %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.db.128(<4 x i32> %x0, <16 x i8> %x1, i8 %x2) @@ -2733,23 +2733,23 @@ declare <16 x i8> @llvm.x86.avx512.mask.pmovus.db.128(<4 x i32>, <16 x i8>, i8) define <16 x i8>@test_int_x86_avx512_mask_pmovus_db_128(<4 x i32> %x0, <16 x i8> %x1, i8 %x2) { ; X86-LABEL: test_int_x86_avx512_mask_pmovus_db_128: ; X86: # %bb.0: -; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x04] +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] -; X86-NEXT: vpmovusdb %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x11,0xc2] +; X86-NEXT: vpmovusdb %xmm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x08,0x11,0xc2] ; X86-NEXT: vpmovusdb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x11,0xc1] -; X86-NEXT: vpaddb %xmm2, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0xfc,0xca] -; X86-NEXT: vpmovusdb %xmm0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x11,0xc0] -; X86-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfc,0xc1] +; X86-NEXT: vpmovusdb %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x11,0xc0] +; X86-NEXT: vpaddb %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfc,0xc0] +; X86-NEXT: vpaddb %xmm0, %xmm2, %xmm0 # encoding: [0xc5,0xe9,0xfc,0xc0] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_pmovus_db_128: ; X64: # %bb.0: ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] -; X64-NEXT: vpmovusdb %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x11,0xc2] +; X64-NEXT: vpmovusdb %xmm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x08,0x11,0xc2] ; X64-NEXT: vpmovusdb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x11,0xc1] -; X64-NEXT: vpaddb %xmm2, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0xfc,0xca] -; X64-NEXT: vpmovusdb %xmm0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x11,0xc0] -; X64-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfc,0xc1] +; X64-NEXT: vpmovusdb %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x11,0xc0] +; X64-NEXT: vpaddb %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfc,0xc0] +; X64-NEXT: vpaddb %xmm0, %xmm2, %xmm0 # encoding: [0xc5,0xe9,0xfc,0xc0] ; X64-NEXT: retq # encoding: [0xc3] %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.db.128(<4 x i32> %x0, <16 x i8> %x1, i8 -1) %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.db.128(<4 x i32> %x0, <16 x i8> %x1, i8 %x2) @@ -2787,24 +2787,24 @@ declare <16 x i8> @llvm.x86.avx512.mask.pmov.db.256(<8 x i32>, <16 x i8>, i8) define <16 x i8>@test_int_x86_avx512_mask_pmov_db_256(<8 x i32> %x0, <16 x i8> %x1, i8 %x2) { ; X86-LABEL: test_int_x86_avx512_mask_pmov_db_256: ; X86: # %bb.0: -; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x04] +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] -; X86-NEXT: vpmovdb %ymm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x31,0xc2] +; X86-NEXT: vpmovdb %ymm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x28,0x31,0xc2] ; X86-NEXT: vpmovdb %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x31,0xc1] -; X86-NEXT: vpaddb %xmm2, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0xfc,0xca] -; X86-NEXT: vpmovdb %ymm0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x28,0x31,0xc0] -; X86-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfc,0xc1] +; X86-NEXT: vpmovdb %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x31,0xc0] +; X86-NEXT: vpaddb %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfc,0xc0] +; X86-NEXT: vpaddb %xmm0, %xmm2, %xmm0 # encoding: [0xc5,0xe9,0xfc,0xc0] ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_pmov_db_256: ; X64: # %bb.0: ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] -; X64-NEXT: vpmovdb %ymm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x31,0xc2] +; X64-NEXT: vpmovdb %ymm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x28,0x31,0xc2] ; X64-NEXT: vpmovdb %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x31,0xc1] -; X64-NEXT: vpaddb %xmm2, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0xfc,0xca] -; X64-NEXT: vpmovdb %ymm0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x28,0x31,0xc0] -; X64-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfc,0xc1] +; X64-NEXT: vpmovdb %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x31,0xc0] +; X64-NEXT: vpaddb %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfc,0xc0] +; X64-NEXT: vpaddb %xmm0, %xmm2, %xmm0 # encoding: [0xc5,0xe9,0xfc,0xc0] ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] ; X64-NEXT: retq # encoding: [0xc3] %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmov.db.256(<8 x i32> %x0, <16 x i8> %x1, i8 -1) @@ -2845,24 +2845,24 @@ declare <16 x i8> @llvm.x86.avx512.mask.pmovs.db.256(<8 x i32>, <16 x i8>, i8) define <16 x i8>@test_int_x86_avx512_mask_pmovs_db_256(<8 x i32> %x0, <16 x i8> %x1, i8 %x2) { ; X86-LABEL: test_int_x86_avx512_mask_pmovs_db_256: ; X86: # %bb.0: -; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x04] +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] -; X86-NEXT: vpmovsdb %ymm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x21,0xc2] +; X86-NEXT: vpmovsdb %ymm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x28,0x21,0xc2] ; X86-NEXT: vpmovsdb %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x21,0xc1] -; X86-NEXT: vpaddb %xmm2, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0xfc,0xca] -; X86-NEXT: vpmovsdb %ymm0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x28,0x21,0xc0] -; X86-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfc,0xc1] +; X86-NEXT: vpmovsdb %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x21,0xc0] +; X86-NEXT: vpaddb %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfc,0xc0] +; X86-NEXT: vpaddb %xmm0, %xmm2, %xmm0 # encoding: [0xc5,0xe9,0xfc,0xc0] ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_pmovs_db_256: ; X64: # %bb.0: ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] -; X64-NEXT: vpmovsdb %ymm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x21,0xc2] +; X64-NEXT: vpmovsdb %ymm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x28,0x21,0xc2] ; X64-NEXT: vpmovsdb %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x21,0xc1] -; X64-NEXT: vpaddb %xmm2, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0xfc,0xca] -; X64-NEXT: vpmovsdb %ymm0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x28,0x21,0xc0] -; X64-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfc,0xc1] +; X64-NEXT: vpmovsdb %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x21,0xc0] +; X64-NEXT: vpaddb %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfc,0xc0] +; X64-NEXT: vpaddb %xmm0, %xmm2, %xmm0 # encoding: [0xc5,0xe9,0xfc,0xc0] ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] ; X64-NEXT: retq # encoding: [0xc3] %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.db.256(<8 x i32> %x0, <16 x i8> %x1, i8 -1) @@ -2903,24 +2903,24 @@ declare <16 x i8> @llvm.x86.avx512.mask.pmovus.db.256(<8 x i32>, <16 x i8>, i8) define <16 x i8>@test_int_x86_avx512_mask_pmovus_db_256(<8 x i32> %x0, <16 x i8> %x1, i8 %x2) { ; X86-LABEL: test_int_x86_avx512_mask_pmovus_db_256: ; X86: # %bb.0: -; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x04] +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] -; X86-NEXT: vpmovusdb %ymm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x11,0xc2] +; X86-NEXT: vpmovusdb %ymm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x28,0x11,0xc2] ; X86-NEXT: vpmovusdb %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x11,0xc1] -; X86-NEXT: vpaddb %xmm2, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0xfc,0xca] -; X86-NEXT: vpmovusdb %ymm0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x28,0x11,0xc0] -; X86-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfc,0xc1] +; X86-NEXT: vpmovusdb %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x11,0xc0] +; X86-NEXT: vpaddb %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfc,0xc0] +; X86-NEXT: vpaddb %xmm0, %xmm2, %xmm0 # encoding: [0xc5,0xe9,0xfc,0xc0] ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_pmovus_db_256: ; X64: # %bb.0: ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] -; X64-NEXT: vpmovusdb %ymm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x11,0xc2] +; X64-NEXT: vpmovusdb %ymm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x28,0x11,0xc2] ; X64-NEXT: vpmovusdb %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x11,0xc1] -; X64-NEXT: vpaddb %xmm2, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0xfc,0xca] -; X64-NEXT: vpmovusdb %ymm0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x28,0x11,0xc0] -; X64-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfc,0xc1] +; X64-NEXT: vpmovusdb %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x11,0xc0] +; X64-NEXT: vpaddb %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfc,0xc0] +; X64-NEXT: vpaddb %xmm0, %xmm2, %xmm0 # encoding: [0xc5,0xe9,0xfc,0xc0] ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] ; X64-NEXT: retq # encoding: [0xc3] %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.db.256(<8 x i32> %x0, <16 x i8> %x1, i8 -1) @@ -2963,21 +2963,21 @@ define <8 x i16>@test_int_x86_avx512_mask_pmov_dw_128(<4 x i32> %x0, <8 x i16> % ; X86: # %bb.0: ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] +; X86-NEXT: vpmovdw %xmm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x08,0x33,0xc2] ; X86-NEXT: vpmovdw %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x33,0xc1] -; X86-NEXT: vpmovdw %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x33,0xc2] -; X86-NEXT: vpaddw %xmm2, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0xfd,0xca] -; X86-NEXT: vpmovdw %xmm0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x33,0xc0] -; X86-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfd,0xc1] +; X86-NEXT: vpmovdw %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x33,0xc0] +; X86-NEXT: vpaddw %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfd,0xc0] +; X86-NEXT: vpaddw %xmm0, %xmm2, %xmm0 # encoding: [0xc5,0xe9,0xfd,0xc0] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_pmov_dw_128: ; X64: # %bb.0: ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] -; X64-NEXT: vpmovdw %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x33,0xc2] +; X64-NEXT: vpmovdw %xmm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x08,0x33,0xc2] ; X64-NEXT: vpmovdw %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x33,0xc1] -; X64-NEXT: vpaddw %xmm2, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0xfd,0xca] -; X64-NEXT: vpmovdw %xmm0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x33,0xc0] -; X64-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfd,0xc1] +; X64-NEXT: vpmovdw %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x33,0xc0] +; X64-NEXT: vpaddw %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfd,0xc0] +; X64-NEXT: vpaddw %xmm0, %xmm2, %xmm0 # encoding: [0xc5,0xe9,0xfd,0xc0] ; X64-NEXT: retq # encoding: [0xc3] %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmov.dw.128(<4 x i32> %x0, <8 x i16> %x1, i8 -1) %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmov.dw.128(<4 x i32> %x0, <8 x i16> %x1, i8 %x2) @@ -3017,21 +3017,21 @@ define <8 x i16>@test_int_x86_avx512_mask_pmovs_dw_128(<4 x i32> %x0, <8 x i16> ; X86: # %bb.0: ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] +; X86-NEXT: vpmovsdw %xmm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x08,0x23,0xc2] ; X86-NEXT: vpmovsdw %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x23,0xc1] -; X86-NEXT: vpmovsdw %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x23,0xc2] -; X86-NEXT: vpaddw %xmm2, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0xfd,0xca] -; X86-NEXT: vpmovsdw %xmm0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x23,0xc0] -; X86-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfd,0xc1] +; X86-NEXT: vpmovsdw %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x23,0xc0] +; X86-NEXT: vpaddw %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfd,0xc0] +; X86-NEXT: vpaddw %xmm0, %xmm2, %xmm0 # encoding: [0xc5,0xe9,0xfd,0xc0] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_pmovs_dw_128: ; X64: # %bb.0: ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] -; X64-NEXT: vpmovsdw %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x23,0xc2] +; X64-NEXT: vpmovsdw %xmm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x08,0x23,0xc2] ; X64-NEXT: vpmovsdw %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x23,0xc1] -; X64-NEXT: vpaddw %xmm2, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0xfd,0xca] -; X64-NEXT: vpmovsdw %xmm0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x23,0xc0] -; X64-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfd,0xc1] +; X64-NEXT: vpmovsdw %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x23,0xc0] +; X64-NEXT: vpaddw %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfd,0xc0] +; X64-NEXT: vpaddw %xmm0, %xmm2, %xmm0 # encoding: [0xc5,0xe9,0xfd,0xc0] ; X64-NEXT: retq # encoding: [0xc3] %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.dw.128(<4 x i32> %x0, <8 x i16> %x1, i8 -1) %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.dw.128(<4 x i32> %x0, <8 x i16> %x1, i8 %x2) @@ -3071,21 +3071,21 @@ define <8 x i16>@test_int_x86_avx512_mask_pmovus_dw_128(<4 x i32> %x0, <8 x i16> ; X86: # %bb.0: ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] +; X86-NEXT: vpmovusdw %xmm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x08,0x13,0xc2] ; X86-NEXT: vpmovusdw %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x13,0xc1] -; X86-NEXT: vpmovusdw %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x13,0xc2] -; X86-NEXT: vpaddw %xmm2, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0xfd,0xca] -; X86-NEXT: vpmovusdw %xmm0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x13,0xc0] -; X86-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfd,0xc1] +; X86-NEXT: vpmovusdw %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x13,0xc0] +; X86-NEXT: vpaddw %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfd,0xc0] +; X86-NEXT: vpaddw %xmm0, %xmm2, %xmm0 # encoding: [0xc5,0xe9,0xfd,0xc0] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_pmovus_dw_128: ; X64: # %bb.0: ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] -; X64-NEXT: vpmovusdw %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x13,0xc2] +; X64-NEXT: vpmovusdw %xmm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x08,0x13,0xc2] ; X64-NEXT: vpmovusdw %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x13,0xc1] -; X64-NEXT: vpaddw %xmm2, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0xfd,0xca] -; X64-NEXT: vpmovusdw %xmm0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x13,0xc0] -; X64-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfd,0xc1] +; X64-NEXT: vpmovusdw %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x13,0xc0] +; X64-NEXT: vpaddw %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfd,0xc0] +; X64-NEXT: vpaddw %xmm0, %xmm2, %xmm0 # encoding: [0xc5,0xe9,0xfd,0xc0] ; X64-NEXT: retq # encoding: [0xc3] %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.dw.128(<4 x i32> %x0, <8 x i16> %x1, i8 -1) %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.dw.128(<4 x i32> %x0, <8 x i16> %x1, i8 %x2) @@ -3125,22 +3125,22 @@ define <8 x i16>@test_int_x86_avx512_mask_pmov_dw_256(<8 x i32> %x0, <8 x i16> % ; X86: # %bb.0: ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] +; X86-NEXT: vpmovdw %ymm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x28,0x33,0xc2] ; X86-NEXT: vpmovdw %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x33,0xc1] -; X86-NEXT: vpmovdw %ymm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x33,0xc2] -; X86-NEXT: vpaddw %xmm2, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0xfd,0xca] -; X86-NEXT: vpmovdw %ymm0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x28,0x33,0xc0] -; X86-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfd,0xc1] +; X86-NEXT: vpmovdw %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x33,0xc0] +; X86-NEXT: vpaddw %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfd,0xc0] +; X86-NEXT: vpaddw %xmm0, %xmm2, %xmm0 # encoding: [0xc5,0xe9,0xfd,0xc0] ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_pmov_dw_256: ; X64: # %bb.0: ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] -; X64-NEXT: vpmovdw %ymm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x33,0xc2] +; X64-NEXT: vpmovdw %ymm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x28,0x33,0xc2] ; X64-NEXT: vpmovdw %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x33,0xc1] -; X64-NEXT: vpaddw %xmm2, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0xfd,0xca] -; X64-NEXT: vpmovdw %ymm0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x28,0x33,0xc0] -; X64-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfd,0xc1] +; X64-NEXT: vpmovdw %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x33,0xc0] +; X64-NEXT: vpaddw %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfd,0xc0] +; X64-NEXT: vpaddw %xmm0, %xmm2, %xmm0 # encoding: [0xc5,0xe9,0xfd,0xc0] ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] ; X64-NEXT: retq # encoding: [0xc3] %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmov.dw.256(<8 x i32> %x0, <8 x i16> %x1, i8 -1) @@ -3183,22 +3183,22 @@ define <8 x i16>@test_int_x86_avx512_mask_pmovs_dw_256(<8 x i32> %x0, <8 x i16> ; X86: # %bb.0: ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] +; X86-NEXT: vpmovsdw %ymm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x28,0x23,0xc2] ; X86-NEXT: vpmovsdw %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x23,0xc1] -; X86-NEXT: vpmovsdw %ymm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x23,0xc2] -; X86-NEXT: vpaddw %xmm2, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0xfd,0xca] -; X86-NEXT: vpmovsdw %ymm0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x28,0x23,0xc0] -; X86-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfd,0xc1] +; X86-NEXT: vpmovsdw %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x23,0xc0] +; X86-NEXT: vpaddw %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfd,0xc0] +; X86-NEXT: vpaddw %xmm0, %xmm2, %xmm0 # encoding: [0xc5,0xe9,0xfd,0xc0] ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_pmovs_dw_256: ; X64: # %bb.0: ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] -; X64-NEXT: vpmovsdw %ymm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x23,0xc2] +; X64-NEXT: vpmovsdw %ymm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x28,0x23,0xc2] ; X64-NEXT: vpmovsdw %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x23,0xc1] -; X64-NEXT: vpaddw %xmm2, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0xfd,0xca] -; X64-NEXT: vpmovsdw %ymm0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x28,0x23,0xc0] -; X64-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfd,0xc1] +; X64-NEXT: vpmovsdw %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x23,0xc0] +; X64-NEXT: vpaddw %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfd,0xc0] +; X64-NEXT: vpaddw %xmm0, %xmm2, %xmm0 # encoding: [0xc5,0xe9,0xfd,0xc0] ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] ; X64-NEXT: retq # encoding: [0xc3] %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.dw.256(<8 x i32> %x0, <8 x i16> %x1, i8 -1) @@ -3241,22 +3241,22 @@ define <8 x i16>@test_int_x86_avx512_mask_pmovus_dw_256(<8 x i32> %x0, <8 x i16> ; X86: # %bb.0: ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] +; X86-NEXT: vpmovusdw %ymm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x28,0x13,0xc2] ; X86-NEXT: vpmovusdw %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x13,0xc1] -; X86-NEXT: vpmovusdw %ymm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x13,0xc2] -; X86-NEXT: vpaddw %xmm2, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0xfd,0xca] -; X86-NEXT: vpmovusdw %ymm0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x28,0x13,0xc0] -; X86-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfd,0xc1] +; X86-NEXT: vpmovusdw %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x13,0xc0] +; X86-NEXT: vpaddw %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfd,0xc0] +; X86-NEXT: vpaddw %xmm0, %xmm2, %xmm0 # encoding: [0xc5,0xe9,0xfd,0xc0] ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_pmovus_dw_256: ; X64: # %bb.0: ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] -; X64-NEXT: vpmovusdw %ymm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x13,0xc2] +; X64-NEXT: vpmovusdw %ymm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x28,0x13,0xc2] ; X64-NEXT: vpmovusdw %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x13,0xc1] -; X64-NEXT: vpaddw %xmm2, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0xfd,0xca] -; X64-NEXT: vpmovusdw %ymm0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x28,0x13,0xc0] -; X64-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfd,0xc1] +; X64-NEXT: vpmovusdw %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x13,0xc0] +; X64-NEXT: vpaddw %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfd,0xc0] +; X64-NEXT: vpaddw %xmm0, %xmm2, %xmm0 # encoding: [0xc5,0xe9,0xfd,0xc0] ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] ; X64-NEXT: retq # encoding: [0xc3] %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.dw.256(<8 x i32> %x0, <8 x i16> %x1, i8 -1) diff --git a/llvm/test/CodeGen/X86/pr37499.ll b/llvm/test/CodeGen/X86/pr37499.ll index 3fd2ca6..2995017 100644 --- a/llvm/test/CodeGen/X86/pr37499.ll +++ b/llvm/test/CodeGen/X86/pr37499.ll @@ -4,7 +4,11 @@ define <2 x i64> @undef_tval() { ; CHECK-LABEL: undef_tval: ; CHECK: # %bb.0: -; CHECK-NEXT: vmovaps {{.*#+}} xmm0 = [1,1,1,1,1,1,1,1] +; CHECK-NEXT: vmovdqa {{.*#+}} xmm0 = [1,1,1,1,1,1,1,1] +; CHECK-NEXT: movb $1, %al +; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: vpmovqw %zmm0, %xmm0 {%k1} +; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq %1 = tail call <8 x i16> @llvm.x86.avx512.mask.pmov.qw.512(<8 x i64> undef, <8 x i16> , i8 1) #3 %2 = bitcast <8 x i16> %1 to <2 x i64>