From 36be852cee88366e79761df4e01da5aeead710e9 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Wed, 29 Nov 2017 18:52:20 +0000 Subject: [PATCH] [X86][AVX512] Tag 3OP (shuffles, double-shifts and GFNI) instructions scheduler classes llvm-svn: 319337 --- llvm/lib/Target/X86/X86InstrAVX512.td | 167 +++++++------ llvm/test/CodeGen/X86/avx512-schedule.ll | 4 +- llvm/test/CodeGen/X86/avx512-shuffle-schedule.ll | 288 +++++++++++------------ 3 files changed, 237 insertions(+), 222 deletions(-) diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index 2a52240..5d97c21 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -8655,21 +8655,24 @@ multiclass avx512_fp_packed_imm opc, string OpcodeStr, SDNode OpNode, //handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm) // op(reg_vec2,mem_vec,imm) multiclass avx512_3Op_rm_imm8 opc, string OpcodeStr, SDNode OpNode, - X86VectorVTInfo DestInfo, X86VectorVTInfo SrcInfo>{ + OpndItins itins, X86VectorVTInfo DestInfo, + X86VectorVTInfo SrcInfo>{ let ExeDomain = DestInfo.ExeDomain in { defm rri : AVX512_maskable; + (i8 imm:$src3))), itins.rr>, + Sched<[itins.Sched]>; defm rmi : AVX512_maskable; + (i8 imm:$src3))), itins.rm>, + Sched<[itins.Sched.Folded, ReadAfterLd]>; } } @@ -8677,8 +8680,8 @@ multiclass avx512_3Op_rm_imm8 opc, string OpcodeStr, SDNode OpNode, // op(reg_vec2,mem_vec,imm) // op(reg_vec2,broadcast(eltVt),imm) multiclass avx512_3Op_imm8 opc, string OpcodeStr, SDNode OpNode, - X86VectorVTInfo _>: - avx512_3Op_rm_imm8{ + OpndItins itins, X86VectorVTInfo _>: + avx512_3Op_rm_imm8{ let ExeDomain = _.ExeDomain in defm rmbi : AVX512_maskable opc, string OpcodeStr, SDNode OpNode, "$src1, ${src2}"##_.BroadcastStr##", $src3", (OpNode (_.VT _.RC:$src1), (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))), - (i8 imm:$src3))>, EVEX_B; + (i8 imm:$src3)), itins.rm>, EVEX_B, + Sched<[itins.Sched.Folded, ReadAfterLd]>; } //handle scalar instruction reg_vec1 = op(reg_vec2,reg_vec3,imm) @@ -8756,29 +8760,29 @@ multiclass avx512_common_fp_sae_packed_imm opc, SDNode OpNode, string OpStr, - AVX512VLVectorVTInfo DestInfo, AVX512VLVectorVTInfo SrcInfo, - Predicate Pred = HasBWI> { + OpndItins itins, AVX512VLVectorVTInfo DestInfo, + AVX512VLVectorVTInfo SrcInfo, Predicate Pred = HasBWI> { let Predicates = [Pred] in { - defm Z : avx512_3Op_rm_imm8, EVEX_V512, AVX512AIi8Base, EVEX_4V; } let Predicates = [Pred, HasVLX] in { - defm Z128 : avx512_3Op_rm_imm8, EVEX_V128, AVX512AIi8Base, EVEX_4V; - defm Z256 : avx512_3Op_rm_imm8, EVEX_V256, AVX512AIi8Base, EVEX_4V; } } multiclass avx512_common_3Op_imm8 opc, SDNode OpNode, + bits<8> opc, SDNode OpNode, OpndItins itins, Predicate Pred = HasAVX512> { let Predicates = [Pred] in { - defm Z : avx512_3Op_imm8, EVEX_V512; + defm Z : avx512_3Op_imm8, EVEX_V512; } let Predicates = [Pred, HasVLX] in { - defm Z128 : avx512_3Op_imm8, EVEX_V128; - defm Z256 : avx512_3Op_imm8, EVEX_V256; + defm Z128 : avx512_3Op_imm8, EVEX_V128; + defm Z256 : avx512_3Op_imm8, EVEX_V256; } } @@ -8918,25 +8922,25 @@ def : Pat<(v4f64 (ftrunc VR256X:$src)), (VRNDSCALEPDZ256rri VR256X:$src, (i32 0xB))>; } -multiclass avx512_shuff_packed_128 opc>{ +multiclass avx512_shuff_packed_128 opc>{ let Predicates = [HasAVX512] in { - defm Z : avx512_3Op_imm8, EVEX_V512; + defm Z : avx512_3Op_imm8, EVEX_V512; } let Predicates = [HasAVX512, HasVLX] in { - defm Z256 : avx512_3Op_imm8, EVEX_V256; + defm Z256 : avx512_3Op_imm8, EVEX_V256; } } -defm VSHUFF32X4 : avx512_shuff_packed_128<"vshuff32x4",avx512vl_f32_info, 0x23>, - AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>; -defm VSHUFF64X2 : avx512_shuff_packed_128<"vshuff64x2",avx512vl_f64_info, 0x23>, - AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W; -defm VSHUFI32X4 : avx512_shuff_packed_128<"vshufi32x4",avx512vl_i32_info, 0x43>, - AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>; -defm VSHUFI64X2 : avx512_shuff_packed_128<"vshufi64x2",avx512vl_i64_info, 0x43>, - AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W; +defm VSHUFF32X4 : avx512_shuff_packed_128<"vshuff32x4", SSE_SHUFP, + avx512vl_f32_info, 0x23>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>; +defm VSHUFF64X2 : avx512_shuff_packed_128<"vshuff64x2", SSE_SHUFP, + avx512vl_f64_info, 0x23>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W; +defm VSHUFI32X4 : avx512_shuff_packed_128<"vshufi32x4", SSE_SHUFP, + avx512vl_i32_info, 0x43>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>; +defm VSHUFI64X2 : avx512_shuff_packed_128<"vshufi64x2", SSE_SHUFP, + avx512vl_i64_info, 0x43>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W; let Predicates = [HasAVX512] in { // Provide fallback in case the load node that is used in the broadcast @@ -8971,17 +8975,18 @@ def : Pat<(v64i8 (X86SubVBroadcast (v16i8 VR128X:$src))), 0)>; } -multiclass avx512_valign { - defm NAME: avx512_common_3Op_imm8, +multiclass avx512_valign { + defm NAME: avx512_common_3Op_imm8, AVX512AIi8Base, EVEX_4V; } -defm VALIGND: avx512_valign<"valignd", avx512vl_i32_info>, +defm VALIGND: avx512_valign<"valignd", SSE_PALIGN, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>; -defm VALIGNQ: avx512_valign<"valignq", avx512vl_i64_info>, +defm VALIGNQ: avx512_valign<"valignq", SSE_PALIGN, avx512vl_i64_info>, EVEX_CD8<64, CD8VF>, VEX_W; -defm VPALIGNR: avx512_common_3Op_rm_imm8<0x0F, X86PAlignr, "vpalignr" , +defm VPALIGNR: avx512_common_3Op_rm_imm8<0x0F, X86PAlignr, "vpalignr", SSE_PALIGN, avx512vl_i8_info, avx512vl_i8_info>, EVEX_CD8<8, CD8VF>; @@ -9102,8 +9107,9 @@ let Predicates = [HasVLX, HasBWI] in { v16i8x_info, ValigndImm8XForm>; } -defm VDBPSADBW: avx512_common_3Op_rm_imm8<0x42, X86dbpsadbw, "vdbpsadbw" , - avx512vl_i16_info, avx512vl_i8_info>, EVEX_CD8<8, CD8VF>; +defm VDBPSADBW: avx512_common_3Op_rm_imm8<0x42, X86dbpsadbw, "vdbpsadbw", + SSE_INTMUL_ITINS_P, avx512vl_i16_info, avx512vl_i8_info>, + EVEX_CD8<8, CD8VF>; multiclass avx512_unary_rm opc, string OpcodeStr, SDNode OpNode, OpndItins itins, X86VectorVTInfo _> { @@ -9509,18 +9515,21 @@ defm VPINSRWZ : avx512_insert_elt_bw<0xC4, "vpinsrw", X86pinsrw, v8i16x_info, extloadi16>, PD, VEX_WIG; defm VPINSRDZ : avx512_insert_elt_dq<0x22, "vpinsrd", v4i32x_info, GR32>; defm VPINSRQZ : avx512_insert_elt_dq<0x22, "vpinsrq", v2i64x_info, GR64>, VEX_W; + //===----------------------------------------------------------------------===// // VSHUFPS - VSHUFPD Operations //===----------------------------------------------------------------------===// + multiclass avx512_shufp{ - defm NAME: avx512_common_3Op_imm8, - EVEX_CD8, - AVX512AIi8Base, EVEX_4V; + defm NAME: avx512_common_3Op_imm8, EVEX_CD8, + AVX512AIi8Base, EVEX_4V; } defm VSHUFPS: avx512_shufp<"vshufps", avx512vl_i32_info, avx512vl_f32_info>, PS; defm VSHUFPD: avx512_shufp<"vshufpd", avx512vl_i64_info, avx512vl_f64_info>, PD, VEX_W; + //===----------------------------------------------------------------------===// // AVX-512 - Byte shift Left/Right //===----------------------------------------------------------------------===// @@ -10101,26 +10110,27 @@ defm : vpclmulqdq_aliases<"VPCLMULQDQZ256", VR256X, i256mem>; //===----------------------------------------------------------------------===// multiclass VBMI2_shift_var_rm Op, string OpStr, SDNode OpNode, - X86VectorVTInfo VTI> { + OpndItins itins, X86VectorVTInfo VTI> { let Constraints = "$src1 = $dst", ExeDomain = VTI.ExeDomain in { defm r: AVX512_maskable_3src, - AVX512FMA3Base; + (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2, VTI.RC:$src3)), + itins.rr>, AVX512FMA3Base, Sched<[itins.Sched]>; defm m: AVX512_maskable_3src, - AVX512FMA3Base; + (VTI.VT (bitconvert (VTI.LdFrag addr:$src3))))), + itins.rm>, AVX512FMA3Base, + Sched<[itins.Sched.Folded, ReadAfterLd]>; } } multiclass VBMI2_shift_var_rmb Op, string OpStr, SDNode OpNode, - X86VectorVTInfo VTI> - : VBMI2_shift_var_rm { + OpndItins itins, X86VectorVTInfo VTI> + : VBMI2_shift_var_rm { let Constraints = "$src1 = $dst", ExeDomain = VTI.ExeDomain in defm mb: AVX512_maskable_3src Op, string OpStr, SDNode OpNode, "${src3}"##VTI.BroadcastStr##", $src2", "$src2, ${src3}"##VTI.BroadcastStr, (OpNode VTI.RC:$src1, VTI.RC:$src2, - (VTI.VT (X86VBroadcast (VTI.ScalarLdFrag addr:$src3))))>, - AVX512FMA3Base, EVEX_B; + (VTI.VT (X86VBroadcast (VTI.ScalarLdFrag addr:$src3)))), + itins.rm>, AVX512FMA3Base, EVEX_B, + Sched<[itins.Sched.Folded, ReadAfterLd]>; } multiclass VBMI2_shift_var_rm_common Op, string OpStr, SDNode OpNode, - AVX512VLVectorVTInfo VTI> { + OpndItins itins, AVX512VLVectorVTInfo VTI> { let Predicates = [HasVBMI2] in - defm Z : VBMI2_shift_var_rm, EVEX_V512; + defm Z : VBMI2_shift_var_rm, EVEX_V512; let Predicates = [HasVBMI2, HasVLX] in { - defm Z256 : VBMI2_shift_var_rm, EVEX_V256; - defm Z128 : VBMI2_shift_var_rm, EVEX_V128; + defm Z256 : VBMI2_shift_var_rm, EVEX_V256; + defm Z128 : VBMI2_shift_var_rm, EVEX_V128; } } multiclass VBMI2_shift_var_rmb_common Op, string OpStr, SDNode OpNode, - AVX512VLVectorVTInfo VTI> { + OpndItins itins, AVX512VLVectorVTInfo VTI> { let Predicates = [HasVBMI2] in - defm Z : VBMI2_shift_var_rmb, EVEX_V512; + defm Z : VBMI2_shift_var_rmb, EVEX_V512; let Predicates = [HasVBMI2, HasVLX] in { - defm Z256 : VBMI2_shift_var_rmb, EVEX_V256; - defm Z128 : VBMI2_shift_var_rmb, EVEX_V128; + defm Z256 : VBMI2_shift_var_rmb, EVEX_V256; + defm Z128 : VBMI2_shift_var_rmb, EVEX_V128; } } multiclass VBMI2_shift_var wOp, bits<8> dqOp, string Prefix, - SDNode OpNode> { - defm W : VBMI2_shift_var_rm_common { + defm W : VBMI2_shift_var_rm_common, VEX_W, EVEX_CD8<16, CD8VF>; - defm D : VBMI2_shift_var_rmb_common, EVEX_CD8<32, CD8VF>; - defm Q : VBMI2_shift_var_rmb_common, VEX_W, EVEX_CD8<64, CD8VF>; } multiclass VBMI2_shift_imm wOp, bits<8> dqOp, string Prefix, - SDNode OpNode> { - defm W : avx512_common_3Op_rm_imm8, VEX_W, EVEX_CD8<16, CD8VF>; + SDNode OpNode, OpndItins itins> { + defm W : avx512_common_3Op_rm_imm8, + VEX_W, EVEX_CD8<16, CD8VF>; defm D : avx512_common_3Op_imm8, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>; + OpNode, itins, HasVBMI2>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>; defm Q : avx512_common_3Op_imm8, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W; + itins, HasVBMI2>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W; } // Concat & Shift -defm VPSHLDV : VBMI2_shift_var<0x70, 0x71, "vpshldv", X86VShldv>; -defm VPSHRDV : VBMI2_shift_var<0x72, 0x73, "vpshrdv", X86VShrdv>; -defm VPSHLD : VBMI2_shift_imm<0x70, 0x71, "vpshld", X86VShld>; -defm VPSHRD : VBMI2_shift_imm<0x72, 0x73, "vpshrd", X86VShrd>; +defm VPSHLDV : VBMI2_shift_var<0x70, 0x71, "vpshldv", X86VShldv, SSE_INTMUL_ITINS_P>; +defm VPSHRDV : VBMI2_shift_var<0x72, 0x73, "vpshrdv", X86VShrdv, SSE_INTMUL_ITINS_P>; +defm VPSHLD : VBMI2_shift_imm<0x70, 0x71, "vpshld", X86VShld, SSE_INTMUL_ITINS_P>; +defm VPSHRD : VBMI2_shift_imm<0x72, 0x73, "vpshrd", X86VShrd, SSE_INTMUL_ITINS_P>; + // Compress defm VPCOMPRESSB : compress_by_elt_width <0x63, "vpcompressb", avx512vl_i8_info, HasVBMI2>, EVEX; @@ -10290,9 +10303,9 @@ defm GF2P8MULB : GF2P8MULB_avx512_common<0xCF, "vgf2p8mulb", X86GF2P8mulb>, EVEX_CD8<8, CD8VF>, T8PD; multiclass GF2P8AFFINE_avx512_rmb_imm Op, string OpStr, SDNode OpNode, - X86VectorVTInfo VTI, + OpndItins itins, X86VectorVTInfo VTI, X86VectorVTInfo BcstVTI> - : avx512_3Op_rm_imm8 { + : avx512_3Op_rm_imm8 { let ExeDomain = VTI.ExeDomain in defm rmbi : AVX512_maskable Op, string OpStr, SDNode OpNode, "$src1, ${src2}"##BcstVTI.BroadcastStr##", $src3", (OpNode (VTI.VT VTI.RC:$src1), (bitconvert (BcstVTI.VT (X86VBroadcast (loadi64 addr:$src2)))), - (i8 imm:$src3))>, EVEX_B; + (i8 imm:$src3)), itins.rm>, EVEX_B, + Sched<[itins.Sched.Folded, ReadAfterLd]>; } -multiclass GF2P8AFFINE_avx512_common Op, string OpStr, SDNode OpNode> { +multiclass GF2P8AFFINE_avx512_common Op, string OpStr, SDNode OpNode, + OpndItins itins> { let Predicates = [HasGFNI, HasAVX512, HasBWI] in - defm Z : GF2P8AFFINE_avx512_rmb_imm, EVEX_V512; let Predicates = [HasGFNI, HasVLX, HasBWI] in { - defm Z256 : GF2P8AFFINE_avx512_rmb_imm, EVEX_V256; - defm Z128 : GF2P8AFFINE_avx512_rmb_imm, EVEX_V128; } } defm GF2P8AFFINEINVQB : GF2P8AFFINE_avx512_common<0xCF, "vgf2p8affineinvqb", - X86GF2P8affineinvqb>, + X86GF2P8affineinvqb, SSE_INTMUL_ITINS_P>, EVEX_4V, EVEX_CD8<8, CD8VF>, VEX_W, AVX512AIi8Base; defm GF2P8AFFINEQB : GF2P8AFFINE_avx512_common<0xCE, "vgf2p8affineqb", - X86GF2P8affineqb>, + X86GF2P8affineqb, SSE_INTMUL_ITINS_P>, EVEX_4V, EVEX_CD8<8, CD8VF>, VEX_W, AVX512AIi8Base; diff --git a/llvm/test/CodeGen/X86/avx512-schedule.ll b/llvm/test/CodeGen/X86/avx512-schedule.ll index 51a1c33..aa13797 100755 --- a/llvm/test/CodeGen/X86/avx512-schedule.ll +++ b/llvm/test/CodeGen/X86/avx512-schedule.ll @@ -7331,7 +7331,7 @@ define <64 x i8> @vmov_test16(i64 %x) { ; GENERIC-NEXT: movl $32, %eax # sched: [1:0.33] ; GENERIC-NEXT: kmovd %eax, %k1 ; GENERIC-NEXT: vpblendmb %ymm0, %ymm1, %ymm0 {%k1} -; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7] +; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7] sched: [1:1.00] ; GENERIC-NEXT: vpmovb2m %zmm0, %k0 ; GENERIC-NEXT: vpmovm2b %k0, %zmm0 ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -7371,7 +7371,7 @@ define <64 x i8> @vmov_test17(i64 %x, i32 %y, i32 %z) { ; GENERIC-NEXT: movl $32, %eax # sched: [1:0.33] ; GENERIC-NEXT: kmovd %eax, %k1 ; GENERIC-NEXT: vpblendmb %ymm0, %ymm1, %ymm0 {%k1} -; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7] +; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7] sched: [1:1.00] ; GENERIC-NEXT: vpmovb2m %zmm0, %k0 ; GENERIC-NEXT: vpmovm2b %k0, %zmm0 ; GENERIC-NEXT: retq # sched: [1:1.00] diff --git a/llvm/test/CodeGen/X86/avx512-shuffle-schedule.ll b/llvm/test/CodeGen/X86/avx512-shuffle-schedule.ll index c95f0d4..5836e32 100755 --- a/llvm/test/CodeGen/X86/avx512-shuffle-schedule.ll +++ b/llvm/test/CodeGen/X86/avx512-shuffle-schedule.ll @@ -9535,7 +9535,7 @@ define <8 x float> @test2_8xfloat_masked_shuff_mask0(<8 x float> %vec1, <8 x flo ; GENERIC: # BB#0: ; GENERIC-NEXT: vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33] ; GENERIC-NEXT: vpcmpeqd %ymm4, %ymm3, %k1 -; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm2 {%k1} = ymm0[4,5,6,7],ymm1[0,1,2,3] +; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm2 {%k1} = ymm0[4,5,6,7],ymm1[0,1,2,3] sched: [1:1.00] ; GENERIC-NEXT: vmovaps %ymm2, %ymm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -9557,7 +9557,7 @@ define <8 x float> @test2_8xfloat_zero_masked_shuff_mask0(<8 x float> %vec1, <8 ; GENERIC: # BB#0: ; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33] ; GENERIC-NEXT: vpcmpeqd %ymm3, %ymm2, %k1 -; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],ymm1[0,1,2,3] +; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],ymm1[0,1,2,3] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test2_8xfloat_zero_masked_shuff_mask0: @@ -9576,7 +9576,7 @@ define <8 x float> @test2_8xfloat_masked_shuff_mask1(<8 x float> %vec1, <8 x flo ; GENERIC: # BB#0: ; GENERIC-NEXT: vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33] ; GENERIC-NEXT: vpcmpeqd %ymm4, %ymm3, %k1 -; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm2 {%k1} = ymm0[4,5,6,7],ymm1[0,1,2,3] +; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm2 {%k1} = ymm0[4,5,6,7],ymm1[0,1,2,3] sched: [1:1.00] ; GENERIC-NEXT: vmovaps %ymm2, %ymm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -9598,7 +9598,7 @@ define <8 x float> @test2_8xfloat_zero_masked_shuff_mask1(<8 x float> %vec1, <8 ; GENERIC: # BB#0: ; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33] ; GENERIC-NEXT: vpcmpeqd %ymm3, %ymm2, %k1 -; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],ymm1[0,1,2,3] +; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],ymm1[0,1,2,3] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test2_8xfloat_zero_masked_shuff_mask1: @@ -9617,7 +9617,7 @@ define <8 x float> @test2_8xfloat_masked_shuff_mask2(<8 x float> %vec1, <8 x flo ; GENERIC: # BB#0: ; GENERIC-NEXT: vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33] ; GENERIC-NEXT: vpcmpeqd %ymm4, %ymm3, %k1 -; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm2 {%k1} = ymm0[4,5,6,7],ymm1[4,5,6,7] +; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm2 {%k1} = ymm0[4,5,6,7],ymm1[4,5,6,7] sched: [1:1.00] ; GENERIC-NEXT: vmovaps %ymm2, %ymm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -9639,7 +9639,7 @@ define <8 x float> @test2_8xfloat_zero_masked_shuff_mask2(<8 x float> %vec1, <8 ; GENERIC: # BB#0: ; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33] ; GENERIC-NEXT: vpcmpeqd %ymm3, %ymm2, %k1 -; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],ymm1[4,5,6,7] +; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],ymm1[4,5,6,7] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test2_8xfloat_zero_masked_shuff_mask2: @@ -9671,7 +9671,7 @@ define <8 x float> @test2_8xfloat_masked_shuff_mask3(<8 x float> %vec1, <8 x flo ; GENERIC: # BB#0: ; GENERIC-NEXT: vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33] ; GENERIC-NEXT: vpcmpeqd %ymm4, %ymm3, %k1 -; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm2 {%k1} = ymm0[4,5,6,7],ymm1[0,1,2,3] +; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm2 {%k1} = ymm0[4,5,6,7],ymm1[0,1,2,3] sched: [1:1.00] ; GENERIC-NEXT: vmovaps %ymm2, %ymm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -9693,7 +9693,7 @@ define <8 x float> @test_8xfloat_zero_masked_shuff_mask3(<8 x float> %vec1, <8 x ; GENERIC: # BB#0: ; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33] ; GENERIC-NEXT: vpcmpeqd %ymm3, %ymm2, %k1 -; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],ymm1[0,1,2,3] +; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],ymm1[0,1,2,3] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_8xfloat_zero_masked_shuff_mask3: @@ -9726,7 +9726,7 @@ define <8 x float> @test_8xfloat_masked_shuff_mem_mask0(<8 x float> %vec1, <8 x ; GENERIC: # BB#0: ; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33] ; GENERIC-NEXT: vpcmpeqd %ymm3, %ymm2, %k1 -; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[4,5,6,7] +; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[4,5,6,7] sched: [5:1.00] ; GENERIC-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -9749,7 +9749,7 @@ define <8 x float> @test_8xfloat_zero_masked_shuff_mem_mask0(<8 x float> %vec1, ; GENERIC: # BB#0: ; GENERIC-NEXT: vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33] ; GENERIC-NEXT: vpcmpeqd %ymm2, %ymm1, %k1 -; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[4,5,6,7] +; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[4,5,6,7] sched: [5:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_8xfloat_zero_masked_shuff_mem_mask0: @@ -9770,7 +9770,7 @@ define <8 x float> @test_8xfloat_masked_shuff_mem_mask1(<8 x float> %vec1, <8 x ; GENERIC: # BB#0: ; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33] ; GENERIC-NEXT: vpcmpeqd %ymm3, %ymm2, %k1 -; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[4,5,6,7] +; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[4,5,6,7] sched: [5:1.00] ; GENERIC-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -9793,7 +9793,7 @@ define <8 x float> @test_8xfloat_zero_masked_shuff_mem_mask1(<8 x float> %vec1, ; GENERIC: # BB#0: ; GENERIC-NEXT: vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33] ; GENERIC-NEXT: vpcmpeqd %ymm2, %ymm1, %k1 -; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[4,5,6,7] +; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[4,5,6,7] sched: [5:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_8xfloat_zero_masked_shuff_mem_mask1: @@ -9814,7 +9814,7 @@ define <8 x float> @test_8xfloat_masked_shuff_mem_mask2(<8 x float> %vec1, <8 x ; GENERIC: # BB#0: ; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33] ; GENERIC-NEXT: vpcmpeqd %ymm3, %ymm2, %k1 -; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[0,1,2,3] +; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [5:1.00] ; GENERIC-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -9837,7 +9837,7 @@ define <8 x float> @test_8xfloat_zero_masked_shuff_mem_mask2(<8 x float> %vec1, ; GENERIC: # BB#0: ; GENERIC-NEXT: vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33] ; GENERIC-NEXT: vpcmpeqd %ymm2, %ymm1, %k1 -; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[0,1,2,3] +; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [5:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_8xfloat_zero_masked_shuff_mem_mask2: @@ -9872,7 +9872,7 @@ define <8 x float> @test_8xfloat_masked_shuff_mem_mask3(<8 x float> %vec1, <8 x ; GENERIC: # BB#0: ; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33] ; GENERIC-NEXT: vpcmpeqd %ymm3, %ymm2, %k1 -; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[0,1,2,3] +; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [5:1.00] ; GENERIC-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -9895,7 +9895,7 @@ define <8 x float> @test_8xfloat_zero_masked_shuff_mem_mask3(<8 x float> %vec1, ; GENERIC: # BB#0: ; GENERIC-NEXT: vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33] ; GENERIC-NEXT: vpcmpeqd %ymm2, %ymm1, %k1 -; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[0,1,2,3] +; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [5:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_8xfloat_zero_masked_shuff_mem_mask3: @@ -9914,7 +9914,7 @@ define <8 x float> @test_8xfloat_zero_masked_shuff_mem_mask3(<8 x float> %vec1, define <16 x float> @test_16xfloat_shuff_mask0(<16 x float> %vec1, <16 x float> %vec2, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xfloat_shuff_mask0: ; GENERIC: # BB#0: -; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[6,7,0,1],zmm1[2,3,6,7] +; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[6,7,0,1],zmm1[2,3,6,7] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_16xfloat_shuff_mask0: @@ -9929,7 +9929,7 @@ define <16 x float> @test_16xfloat_masked_shuff_mask0(<16 x float> %vec1, <16 x ; GENERIC: # BB#0: ; GENERIC-NEXT: vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33] ; GENERIC-NEXT: vpcmpeqd %zmm4, %zmm3, %k1 -; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm2 {%k1} = zmm0[12,13,14,15,0,1,2,3],zmm1[4,5,6,7,12,13,14,15] +; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm2 {%k1} = zmm0[12,13,14,15,0,1,2,3],zmm1[4,5,6,7,12,13,14,15] sched: [1:1.00] ; GENERIC-NEXT: vmovaps %zmm2, %zmm0 ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -9951,7 +9951,7 @@ define <16 x float> @test_16xfloat_zero_masked_shuff_mask0(<16 x float> %vec1, < ; GENERIC: # BB#0: ; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33] ; GENERIC-NEXT: vpcmpeqd %zmm3, %zmm2, %k1 -; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[12,13,14,15,0,1,2,3],zmm1[4,5,6,7,12,13,14,15] +; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[12,13,14,15,0,1,2,3],zmm1[4,5,6,7,12,13,14,15] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_16xfloat_zero_masked_shuff_mask0: @@ -9970,7 +9970,7 @@ define <16 x float> @test_16xfloat_masked_shuff_mask1(<16 x float> %vec1, <16 x ; GENERIC: # BB#0: ; GENERIC-NEXT: vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33] ; GENERIC-NEXT: vpcmpeqd %zmm4, %zmm3, %k1 -; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm2 {%k1} = zmm0[0,1,2,3,8,9,10,11],zmm1[0,1,2,3,12,13,14,15] +; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm2 {%k1} = zmm0[0,1,2,3,8,9,10,11],zmm1[0,1,2,3,12,13,14,15] sched: [1:1.00] ; GENERIC-NEXT: vmovaps %zmm2, %zmm0 ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -9992,7 +9992,7 @@ define <16 x float> @test_16xfloat_zero_masked_shuff_mask1(<16 x float> %vec1, < ; GENERIC: # BB#0: ; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33] ; GENERIC-NEXT: vpcmpeqd %zmm3, %zmm2, %k1 -; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3,8,9,10,11],zmm1[0,1,2,3,12,13,14,15] +; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3,8,9,10,11],zmm1[0,1,2,3,12,13,14,15] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_16xfloat_zero_masked_shuff_mask1: @@ -10011,7 +10011,7 @@ define <16 x float> @test_16xfloat_masked_shuff_mask2(<16 x float> %vec1, <16 x ; GENERIC: # BB#0: ; GENERIC-NEXT: vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33] ; GENERIC-NEXT: vpcmpeqd %zmm4, %zmm3, %k1 -; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm2 {%k1} = zmm0[12,13,14,15,4,5,6,7],zmm1[0,1,2,3,4,5,6,7] +; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm2 {%k1} = zmm0[12,13,14,15,4,5,6,7],zmm1[0,1,2,3,4,5,6,7] sched: [1:1.00] ; GENERIC-NEXT: vmovaps %zmm2, %zmm0 ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -10033,7 +10033,7 @@ define <16 x float> @test_16xfloat_zero_masked_shuff_mask2(<16 x float> %vec1, < ; GENERIC: # BB#0: ; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33] ; GENERIC-NEXT: vpcmpeqd %zmm3, %zmm2, %k1 -; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[12,13,14,15,4,5,6,7],zmm1[0,1,2,3,4,5,6,7] +; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[12,13,14,15,4,5,6,7],zmm1[0,1,2,3,4,5,6,7] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_16xfloat_zero_masked_shuff_mask2: @@ -10050,7 +10050,7 @@ define <16 x float> @test_16xfloat_zero_masked_shuff_mask2(<16 x float> %vec1, < define <16 x float> @test_16xfloat_shuff_mask3(<16 x float> %vec1, <16 x float> %vec2) { ; GENERIC-LABEL: test_16xfloat_shuff_mask3: ; GENERIC: # BB#0: -; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[4,5,6,7],zmm1[0,1,4,5] +; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[4,5,6,7],zmm1[0,1,4,5] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_16xfloat_shuff_mask3: @@ -10065,7 +10065,7 @@ define <16 x float> @test_16xfloat_masked_shuff_mask3(<16 x float> %vec1, <16 x ; GENERIC: # BB#0: ; GENERIC-NEXT: vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33] ; GENERIC-NEXT: vpcmpeqd %zmm4, %zmm3, %k1 -; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm2 {%k1} = zmm0[8,9,10,11,12,13,14,15],zmm1[0,1,2,3,8,9,10,11] +; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm2 {%k1} = zmm0[8,9,10,11,12,13,14,15],zmm1[0,1,2,3,8,9,10,11] sched: [1:1.00] ; GENERIC-NEXT: vmovaps %zmm2, %zmm0 ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -10087,7 +10087,7 @@ define <16 x float> @test_16xfloat_zero_masked_shuff_mask3(<16 x float> %vec1, < ; GENERIC: # BB#0: ; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33] ; GENERIC-NEXT: vpcmpeqd %zmm3, %zmm2, %k1 -; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[8,9,10,11,12,13,14,15],zmm1[0,1,2,3,8,9,10,11] +; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[8,9,10,11,12,13,14,15],zmm1[0,1,2,3,8,9,10,11] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_16xfloat_zero_masked_shuff_mask3: @@ -10104,7 +10104,7 @@ define <16 x float> @test_16xfloat_zero_masked_shuff_mask3(<16 x float> %vec1, < define <16 x float> @test_16xfloat_shuff_mem_mask0(<16 x float> %vec1, <16 x float>* %vec2p) { ; GENERIC-LABEL: test_16xfloat_shuff_mem_mask0: ; GENERIC: # BB#0: -; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[6,7,4,5],mem[4,5,2,3] +; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[6,7,4,5],mem[4,5,2,3] sched: [5:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_16xfloat_shuff_mem_mask0: @@ -10120,7 +10120,7 @@ define <16 x float> @test_16xfloat_masked_shuff_mem_mask0(<16 x float> %vec1, <1 ; GENERIC: # BB#0: ; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33] ; GENERIC-NEXT: vpcmpeqd %zmm3, %zmm2, %k1 -; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm1 {%k1} = zmm0[12,13,14,15,8,9,10,11],mem[8,9,10,11,4,5,6,7] +; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm1 {%k1} = zmm0[12,13,14,15,8,9,10,11],mem[8,9,10,11,4,5,6,7] sched: [5:1.00] ; GENERIC-NEXT: vmovaps %zmm1, %zmm0 ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -10143,7 +10143,7 @@ define <16 x float> @test_16xfloat_zero_masked_shuff_mem_mask0(<16 x float> %vec ; GENERIC: # BB#0: ; GENERIC-NEXT: vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33] ; GENERIC-NEXT: vpcmpeqd %zmm2, %zmm1, %k1 -; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[12,13,14,15,8,9,10,11],mem[8,9,10,11,4,5,6,7] +; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[12,13,14,15,8,9,10,11],mem[8,9,10,11,4,5,6,7] sched: [5:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_16xfloat_zero_masked_shuff_mem_mask0: @@ -10164,7 +10164,7 @@ define <16 x float> @test_16xfloat_masked_shuff_mem_mask1(<16 x float> %vec1, <1 ; GENERIC: # BB#0: ; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33] ; GENERIC-NEXT: vpcmpeqd %zmm3, %zmm2, %k1 -; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm1 {%k1} = zmm0[8,9,10,11,4,5,6,7],mem[8,9,10,11,4,5,6,7] +; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm1 {%k1} = zmm0[8,9,10,11,4,5,6,7],mem[8,9,10,11,4,5,6,7] sched: [5:1.00] ; GENERIC-NEXT: vmovaps %zmm1, %zmm0 ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -10187,7 +10187,7 @@ define <16 x float> @test_16xfloat_zero_masked_shuff_mem_mask1(<16 x float> %vec ; GENERIC: # BB#0: ; GENERIC-NEXT: vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33] ; GENERIC-NEXT: vpcmpeqd %zmm2, %zmm1, %k1 -; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[8,9,10,11,4,5,6,7],mem[8,9,10,11,4,5,6,7] +; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[8,9,10,11,4,5,6,7],mem[8,9,10,11,4,5,6,7] sched: [5:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_16xfloat_zero_masked_shuff_mem_mask1: @@ -10208,7 +10208,7 @@ define <16 x float> @test_16xfloat_masked_shuff_mem_mask2(<16 x float> %vec1, <1 ; GENERIC: # BB#0: ; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33] ; GENERIC-NEXT: vpcmpeqd %zmm3, %zmm2, %k1 -; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm1 {%k1} = zmm0[0,1,2,3,0,1,2,3],mem[8,9,10,11,8,9,10,11] +; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm1 {%k1} = zmm0[0,1,2,3,0,1,2,3],mem[8,9,10,11,8,9,10,11] sched: [5:1.00] ; GENERIC-NEXT: vmovaps %zmm1, %zmm0 ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -10231,7 +10231,7 @@ define <16 x float> @test_16xfloat_zero_masked_shuff_mem_mask2(<16 x float> %vec ; GENERIC: # BB#0: ; GENERIC-NEXT: vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33] ; GENERIC-NEXT: vpcmpeqd %zmm2, %zmm1, %k1 -; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3,0,1,2,3],mem[8,9,10,11,8,9,10,11] +; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3,0,1,2,3],mem[8,9,10,11,8,9,10,11] sched: [5:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_16xfloat_zero_masked_shuff_mem_mask2: @@ -10250,7 +10250,7 @@ define <16 x float> @test_16xfloat_zero_masked_shuff_mem_mask2(<16 x float> %vec define <16 x float> @test_16xfloat_shuff_mem_mask3(<16 x float> %vec1, <16 x float>* %vec2p) { ; GENERIC-LABEL: test_16xfloat_shuff_mem_mask3: ; GENERIC: # BB#0: -; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[2,3,0,1],mem[6,7,6,7] +; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[2,3,0,1],mem[6,7,6,7] sched: [5:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_16xfloat_shuff_mem_mask3: @@ -10266,7 +10266,7 @@ define <16 x float> @test_16xfloat_masked_shuff_mem_mask3(<16 x float> %vec1, <1 ; GENERIC: # BB#0: ; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33] ; GENERIC-NEXT: vpcmpeqd %zmm3, %zmm2, %k1 -; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm1 {%k1} = zmm0[4,5,6,7,0,1,2,3],mem[12,13,14,15,12,13,14,15] +; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm1 {%k1} = zmm0[4,5,6,7,0,1,2,3],mem[12,13,14,15,12,13,14,15] sched: [5:1.00] ; GENERIC-NEXT: vmovaps %zmm1, %zmm0 ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -10289,7 +10289,7 @@ define <16 x float> @test_16xfloat_zero_masked_shuff_mem_mask3(<16 x float> %vec ; GENERIC: # BB#0: ; GENERIC-NEXT: vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33] ; GENERIC-NEXT: vpcmpeqd %zmm2, %zmm1, %k1 -; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,6,7,0,1,2,3],mem[12,13,14,15,12,13,14,15] +; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,6,7,0,1,2,3],mem[12,13,14,15,12,13,14,15] sched: [5:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_16xfloat_zero_masked_shuff_mem_mask3: @@ -10323,7 +10323,7 @@ define <4 x double> @test_4xdouble_masked_shuff_mask0(<4 x double> %vec1, <4 x d ; GENERIC: # BB#0: ; GENERIC-NEXT: vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33] ; GENERIC-NEXT: vpcmpeqq %ymm4, %ymm3, %k1 -; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm2 {%k1} = ymm0[2,3],ymm1[0,1] +; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm2 {%k1} = ymm0[2,3],ymm1[0,1] sched: [1:1.00] ; GENERIC-NEXT: vmovapd %ymm2, %ymm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -10345,7 +10345,7 @@ define <4 x double> @test_4xdouble_zero_masked_shuff_mask0(<4 x double> %vec1, < ; GENERIC: # BB#0: ; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33] ; GENERIC-NEXT: vpcmpeqq %ymm3, %ymm2, %k1 -; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],ymm1[0,1] +; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],ymm1[0,1] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_4xdouble_zero_masked_shuff_mask0: @@ -10364,7 +10364,7 @@ define <4 x double> @test_4xdouble_masked_shuff_mask1(<4 x double> %vec1, <4 x d ; GENERIC: # BB#0: ; GENERIC-NEXT: vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33] ; GENERIC-NEXT: vpcmpeqq %ymm4, %ymm3, %k1 -; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm2 {%k1} = ymm0[2,3],ymm1[0,1] +; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm2 {%k1} = ymm0[2,3],ymm1[0,1] sched: [1:1.00] ; GENERIC-NEXT: vmovapd %ymm2, %ymm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -10386,7 +10386,7 @@ define <4 x double> @test_4xdouble_zero_masked_shuff_mask1(<4 x double> %vec1, < ; GENERIC: # BB#0: ; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33] ; GENERIC-NEXT: vpcmpeqq %ymm3, %ymm2, %k1 -; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],ymm1[0,1] +; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],ymm1[0,1] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_4xdouble_zero_masked_shuff_mask1: @@ -10405,7 +10405,7 @@ define <4 x double> @test_4xdouble_masked_shuff_mask2(<4 x double> %vec1, <4 x d ; GENERIC: # BB#0: ; GENERIC-NEXT: vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33] ; GENERIC-NEXT: vpcmpeqq %ymm4, %ymm3, %k1 -; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm2 {%k1} = ymm0[2,3],ymm1[2,3] +; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm2 {%k1} = ymm0[2,3],ymm1[2,3] sched: [1:1.00] ; GENERIC-NEXT: vmovapd %ymm2, %ymm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -10427,7 +10427,7 @@ define <4 x double> @test_4xdouble_zero_masked_shuff_mask2(<4 x double> %vec1, < ; GENERIC: # BB#0: ; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33] ; GENERIC-NEXT: vpcmpeqq %ymm3, %ymm2, %k1 -; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],ymm1[2,3] +; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],ymm1[2,3] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_4xdouble_zero_masked_shuff_mask2: @@ -10459,7 +10459,7 @@ define <4 x double> @test_4xdouble_masked_shuff_mask3(<4 x double> %vec1, <4 x d ; GENERIC: # BB#0: ; GENERIC-NEXT: vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33] ; GENERIC-NEXT: vpcmpeqq %ymm4, %ymm3, %k1 -; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm2 {%k1} = ymm0[2,3],ymm1[2,3] +; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm2 {%k1} = ymm0[2,3],ymm1[2,3] sched: [1:1.00] ; GENERIC-NEXT: vmovapd %ymm2, %ymm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -10481,7 +10481,7 @@ define <4 x double> @test_4xdouble_zero_masked_shuff_mask3(<4 x double> %vec1, < ; GENERIC: # BB#0: ; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33] ; GENERIC-NEXT: vpcmpeqq %ymm3, %ymm2, %k1 -; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],ymm1[2,3] +; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],ymm1[2,3] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_4xdouble_zero_masked_shuff_mask3: @@ -10514,7 +10514,7 @@ define <4 x double> @test_4xdouble_masked_shuff_mem_mask0(<4 x double> %vec1, <4 ; GENERIC: # BB#0: ; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33] ; GENERIC-NEXT: vpcmpeqq %ymm3, %ymm2, %k1 -; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[2,3] +; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[2,3] sched: [5:1.00] ; GENERIC-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -10537,7 +10537,7 @@ define <4 x double> @test_4xdouble_zero_masked_shuff_mem_mask0(<4 x double> %vec ; GENERIC: # BB#0: ; GENERIC-NEXT: vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33] ; GENERIC-NEXT: vpcmpeqq %ymm2, %ymm1, %k1 -; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[2,3] +; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[2,3] sched: [5:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_4xdouble_zero_masked_shuff_mem_mask0: @@ -10558,7 +10558,7 @@ define <4 x double> @test_4xdouble_masked_shuff_mem_mask1(<4 x double> %vec1, <4 ; GENERIC: # BB#0: ; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33] ; GENERIC-NEXT: vpcmpeqq %ymm3, %ymm2, %k1 -; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[0,1] +; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[0,1] sched: [5:1.00] ; GENERIC-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -10581,7 +10581,7 @@ define <4 x double> @test_4xdouble_zero_masked_shuff_mem_mask1(<4 x double> %vec ; GENERIC: # BB#0: ; GENERIC-NEXT: vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33] ; GENERIC-NEXT: vpcmpeqq %ymm2, %ymm1, %k1 -; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[0,1] +; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[0,1] sched: [5:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_4xdouble_zero_masked_shuff_mem_mask1: @@ -10602,7 +10602,7 @@ define <4 x double> @test_4xdouble_masked_shuff_mem_mask2(<4 x double> %vec1, <4 ; GENERIC: # BB#0: ; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33] ; GENERIC-NEXT: vpcmpeqq %ymm3, %ymm2, %k1 -; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[0,1] +; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[0,1] sched: [5:1.00] ; GENERIC-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -10625,7 +10625,7 @@ define <4 x double> @test_4xdouble_zero_masked_shuff_mem_mask2(<4 x double> %vec ; GENERIC: # BB#0: ; GENERIC-NEXT: vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33] ; GENERIC-NEXT: vpcmpeqq %ymm2, %ymm1, %k1 -; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[0,1] +; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[0,1] sched: [5:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_4xdouble_zero_masked_shuff_mem_mask2: @@ -10660,7 +10660,7 @@ define <4 x double> @test_4xdouble_masked_shuff_mem_mask3(<4 x double> %vec1, <4 ; GENERIC: # BB#0: ; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33] ; GENERIC-NEXT: vpcmpeqq %ymm3, %ymm2, %k1 -; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[2,3] +; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[2,3] sched: [5:1.00] ; GENERIC-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -10683,7 +10683,7 @@ define <4 x double> @test_4xdouble_zero_masked_shuff_mem_mask3(<4 x double> %vec ; GENERIC: # BB#0: ; GENERIC-NEXT: vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33] ; GENERIC-NEXT: vpcmpeqq %ymm2, %ymm1, %k1 -; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[2,3] +; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[2,3] sched: [5:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_4xdouble_zero_masked_shuff_mem_mask3: @@ -10702,7 +10702,7 @@ define <4 x double> @test_4xdouble_zero_masked_shuff_mem_mask3(<4 x double> %vec define <8 x double> @test_8xdouble_shuff_mask0(<8 x double> %vec1, <8 x double> %vec2) { ; GENERIC-LABEL: test_8xdouble_shuff_mask0: ; GENERIC: # BB#0: -; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[6,7,2,3],zmm1[6,7,0,1] +; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[6,7,2,3],zmm1[6,7,0,1] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_8xdouble_shuff_mask0: @@ -10717,7 +10717,7 @@ define <8 x double> @test_8xdouble_masked_shuff_mask0(<8 x double> %vec1, <8 x d ; GENERIC: # BB#0: ; GENERIC-NEXT: vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33] ; GENERIC-NEXT: vpcmpeqq %zmm4, %zmm3, %k1 -; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm2 {%k1} = zmm0[6,7,2,3],zmm1[6,7,0,1] +; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm2 {%k1} = zmm0[6,7,2,3],zmm1[6,7,0,1] sched: [1:1.00] ; GENERIC-NEXT: vmovapd %zmm2, %zmm0 ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -10739,7 +10739,7 @@ define <8 x double> @test_8xdouble_zero_masked_shuff_mask0(<8 x double> %vec1, < ; GENERIC: # BB#0: ; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33] ; GENERIC-NEXT: vpcmpeqq %zmm3, %zmm2, %k1 -; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[6,7,2,3],zmm1[6,7,0,1] +; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[6,7,2,3],zmm1[6,7,0,1] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_8xdouble_zero_masked_shuff_mask0: @@ -10758,7 +10758,7 @@ define <8 x double> @test_8xdouble_masked_shuff_mask1(<8 x double> %vec1, <8 x d ; GENERIC: # BB#0: ; GENERIC-NEXT: vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33] ; GENERIC-NEXT: vpcmpeqq %zmm4, %zmm3, %k1 -; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm2 {%k1} = zmm0[0,1,4,5],zmm1[0,1,4,5] +; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm2 {%k1} = zmm0[0,1,4,5],zmm1[0,1,4,5] sched: [1:1.00] ; GENERIC-NEXT: vmovapd %zmm2, %zmm0 ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -10780,7 +10780,7 @@ define <8 x double> @test_8xdouble_zero_masked_shuff_mask1(<8 x double> %vec1, < ; GENERIC: # BB#0: ; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33] ; GENERIC-NEXT: vpcmpeqq %zmm3, %zmm2, %k1 -; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,4,5],zmm1[0,1,4,5] +; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,4,5],zmm1[0,1,4,5] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_8xdouble_zero_masked_shuff_mask1: @@ -10799,7 +10799,7 @@ define <8 x double> @test_8xdouble_masked_shuff_mask2(<8 x double> %vec1, <8 x d ; GENERIC: # BB#0: ; GENERIC-NEXT: vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33] ; GENERIC-NEXT: vpcmpeqq %zmm4, %zmm3, %k1 -; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm2 {%k1} = zmm0[6,7,4,5],zmm1[4,5,0,1] +; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm2 {%k1} = zmm0[6,7,4,5],zmm1[4,5,0,1] sched: [1:1.00] ; GENERIC-NEXT: vmovapd %zmm2, %zmm0 ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -10821,7 +10821,7 @@ define <8 x double> @test_8xdouble_zero_masked_shuff_mask2(<8 x double> %vec1, < ; GENERIC: # BB#0: ; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33] ; GENERIC-NEXT: vpcmpeqq %zmm3, %zmm2, %k1 -; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[6,7,4,5],zmm1[4,5,0,1] +; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[6,7,4,5],zmm1[4,5,0,1] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_8xdouble_zero_masked_shuff_mask2: @@ -10838,7 +10838,7 @@ define <8 x double> @test_8xdouble_zero_masked_shuff_mask2(<8 x double> %vec1, < define <8 x double> @test_8xdouble_shuff_mask3(<8 x double> %vec1, <8 x double> %vec2) { ; GENERIC-LABEL: test_8xdouble_shuff_mask3: ; GENERIC: # BB#0: -; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[4,5,4,5],zmm1[4,5,2,3] +; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[4,5,4,5],zmm1[4,5,2,3] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_8xdouble_shuff_mask3: @@ -10853,7 +10853,7 @@ define <8 x double> @test_8xdouble_masked_shuff_mask3(<8 x double> %vec1, <8 x d ; GENERIC: # BB#0: ; GENERIC-NEXT: vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33] ; GENERIC-NEXT: vpcmpeqq %zmm4, %zmm3, %k1 -; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm2 {%k1} = zmm0[4,5,4,5],zmm1[4,5,2,3] +; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm2 {%k1} = zmm0[4,5,4,5],zmm1[4,5,2,3] sched: [1:1.00] ; GENERIC-NEXT: vmovapd %zmm2, %zmm0 ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -10875,7 +10875,7 @@ define <8 x double> @test_8xdouble_zero_masked_shuff_mask3(<8 x double> %vec1, < ; GENERIC: # BB#0: ; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33] ; GENERIC-NEXT: vpcmpeqq %zmm3, %zmm2, %k1 -; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,4,5],zmm1[4,5,2,3] +; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,4,5],zmm1[4,5,2,3] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_8xdouble_zero_masked_shuff_mask3: @@ -10892,7 +10892,7 @@ define <8 x double> @test_8xdouble_zero_masked_shuff_mask3(<8 x double> %vec1, < define <8 x double> @test_8xdouble_shuff_mem_mask0(<8 x double> %vec1, <8 x double>* %vec2p) { ; GENERIC-LABEL: test_8xdouble_shuff_mem_mask0: ; GENERIC: # BB#0: -; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[6,7,0,1],mem[0,1,0,1] +; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[6,7,0,1],mem[0,1,0,1] sched: [5:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_8xdouble_shuff_mem_mask0: @@ -10908,7 +10908,7 @@ define <8 x double> @test_8xdouble_masked_shuff_mem_mask0(<8 x double> %vec1, <8 ; GENERIC: # BB#0: ; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33] ; GENERIC-NEXT: vpcmpeqq %zmm3, %zmm2, %k1 -; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm1 {%k1} = zmm0[6,7,0,1],mem[0,1,0,1] +; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm1 {%k1} = zmm0[6,7,0,1],mem[0,1,0,1] sched: [5:1.00] ; GENERIC-NEXT: vmovapd %zmm1, %zmm0 ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -10931,7 +10931,7 @@ define <8 x double> @test_8xdouble_zero_masked_shuff_mem_mask0(<8 x double> %vec ; GENERIC: # BB#0: ; GENERIC-NEXT: vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33] ; GENERIC-NEXT: vpcmpeqq %zmm2, %zmm1, %k1 -; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[6,7,0,1],mem[0,1,0,1] +; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[6,7,0,1],mem[0,1,0,1] sched: [5:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_8xdouble_zero_masked_shuff_mem_mask0: @@ -10952,7 +10952,7 @@ define <8 x double> @test_8xdouble_masked_shuff_mem_mask1(<8 x double> %vec1, <8 ; GENERIC: # BB#0: ; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33] ; GENERIC-NEXT: vpcmpeqq %zmm3, %zmm2, %k1 -; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm1 {%k1} = zmm0[6,7,6,7],mem[0,1,2,3] +; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm1 {%k1} = zmm0[6,7,6,7],mem[0,1,2,3] sched: [5:1.00] ; GENERIC-NEXT: vmovapd %zmm1, %zmm0 ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -10975,7 +10975,7 @@ define <8 x double> @test_8xdouble_zero_masked_shuff_mem_mask1(<8 x double> %vec ; GENERIC: # BB#0: ; GENERIC-NEXT: vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33] ; GENERIC-NEXT: vpcmpeqq %zmm2, %zmm1, %k1 -; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[6,7,6,7],mem[0,1,2,3] +; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[6,7,6,7],mem[0,1,2,3] sched: [5:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_8xdouble_zero_masked_shuff_mem_mask1: @@ -10996,7 +10996,7 @@ define <8 x double> @test_8xdouble_masked_shuff_mem_mask2(<8 x double> %vec1, <8 ; GENERIC: # BB#0: ; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33] ; GENERIC-NEXT: vpcmpeqq %zmm3, %zmm2, %k1 -; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm1 {%k1} = zmm0[0,1,2,3],mem[0,1,4,5] +; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm1 {%k1} = zmm0[0,1,2,3],mem[0,1,4,5] sched: [5:1.00] ; GENERIC-NEXT: vmovapd %zmm1, %zmm0 ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -11019,7 +11019,7 @@ define <8 x double> @test_8xdouble_zero_masked_shuff_mem_mask2(<8 x double> %vec ; GENERIC: # BB#0: ; GENERIC-NEXT: vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33] ; GENERIC-NEXT: vpcmpeqq %zmm2, %zmm1, %k1 -; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3],mem[0,1,4,5] +; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3],mem[0,1,4,5] sched: [5:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_8xdouble_zero_masked_shuff_mem_mask2: @@ -11038,7 +11038,7 @@ define <8 x double> @test_8xdouble_zero_masked_shuff_mem_mask2(<8 x double> %vec define <8 x double> @test_8xdouble_shuff_mem_mask3(<8 x double> %vec1, <8 x double>* %vec2p) { ; GENERIC-LABEL: test_8xdouble_shuff_mem_mask3: ; GENERIC: # BB#0: -; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[2,3,0,1],mem[4,5,0,1] +; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[2,3,0,1],mem[4,5,0,1] sched: [5:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_8xdouble_shuff_mem_mask3: @@ -11054,7 +11054,7 @@ define <8 x double> @test_8xdouble_masked_shuff_mem_mask3(<8 x double> %vec1, <8 ; GENERIC: # BB#0: ; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33] ; GENERIC-NEXT: vpcmpeqq %zmm3, %zmm2, %k1 -; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm1 {%k1} = zmm0[2,3,0,1],mem[4,5,0,1] +; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm1 {%k1} = zmm0[2,3,0,1],mem[4,5,0,1] sched: [5:1.00] ; GENERIC-NEXT: vmovapd %zmm1, %zmm0 ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -11077,7 +11077,7 @@ define <8 x double> @test_8xdouble_zero_masked_shuff_mem_mask3(<8 x double> %vec ; GENERIC: # BB#0: ; GENERIC-NEXT: vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33] ; GENERIC-NEXT: vpcmpeqq %zmm2, %zmm1, %k1 -; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[2,3,0,1],mem[4,5,0,1] +; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[2,3,0,1],mem[4,5,0,1] sched: [5:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_8xdouble_zero_masked_shuff_mem_mask3: @@ -11111,7 +11111,7 @@ define <8 x i32> @test_8xi32_masked_shuff_mask0(<8 x i32> %vec1, <8 x i32> %vec2 ; GENERIC: # BB#0: ; GENERIC-NEXT: vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33] ; GENERIC-NEXT: vpcmpeqd %ymm4, %ymm3, %k1 -; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm2 {%k1} = ymm0[4,5,6,7],ymm1[4,5,6,7] +; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm2 {%k1} = ymm0[4,5,6,7],ymm1[4,5,6,7] sched: [1:1.00] ; GENERIC-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -11133,7 +11133,7 @@ define <8 x i32> @test_8xi32_zero_masked_shuff_mask0(<8 x i32> %vec1, <8 x i32> ; GENERIC: # BB#0: ; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33] ; GENERIC-NEXT: vpcmpeqd %ymm3, %ymm2, %k1 -; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],ymm1[4,5,6,7] +; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],ymm1[4,5,6,7] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_8xi32_zero_masked_shuff_mask0: @@ -11152,7 +11152,7 @@ define <8 x i32> @test_8xi32_masked_shuff_mask1(<8 x i32> %vec1, <8 x i32> %vec2 ; GENERIC: # BB#0: ; GENERIC-NEXT: vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33] ; GENERIC-NEXT: vpcmpeqd %ymm4, %ymm3, %k1 -; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm2 {%k1} = ymm0[4,5,6,7],ymm1[0,1,2,3] +; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm2 {%k1} = ymm0[4,5,6,7],ymm1[0,1,2,3] sched: [1:1.00] ; GENERIC-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -11174,7 +11174,7 @@ define <8 x i32> @test_8xi32_zero_masked_shuff_mask1(<8 x i32> %vec1, <8 x i32> ; GENERIC: # BB#0: ; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33] ; GENERIC-NEXT: vpcmpeqd %ymm3, %ymm2, %k1 -; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],ymm1[0,1,2,3] +; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],ymm1[0,1,2,3] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_8xi32_zero_masked_shuff_mask1: @@ -11193,7 +11193,7 @@ define <8 x i32> @test_8xi32_masked_shuff_mask2(<8 x i32> %vec1, <8 x i32> %vec2 ; GENERIC: # BB#0: ; GENERIC-NEXT: vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33] ; GENERIC-NEXT: vpcmpeqd %ymm4, %ymm3, %k1 -; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm2 {%k1} = ymm0[4,5,6,7],ymm1[4,5,6,7] +; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm2 {%k1} = ymm0[4,5,6,7],ymm1[4,5,6,7] sched: [1:1.00] ; GENERIC-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -11215,7 +11215,7 @@ define <8 x i32> @test_8xi32_zero_masked_shuff_mask2(<8 x i32> %vec1, <8 x i32> ; GENERIC: # BB#0: ; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33] ; GENERIC-NEXT: vpcmpeqd %ymm3, %ymm2, %k1 -; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],ymm1[4,5,6,7] +; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],ymm1[4,5,6,7] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_8xi32_zero_masked_shuff_mask2: @@ -11247,7 +11247,7 @@ define <8 x i32> @test_8xi32_masked_shuff_mask3(<8 x i32> %vec1, <8 x i32> %vec2 ; GENERIC: # BB#0: ; GENERIC-NEXT: vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33] ; GENERIC-NEXT: vpcmpeqd %ymm4, %ymm3, %k1 -; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm2 {%k1} = ymm0[4,5,6,7],ymm1[0,1,2,3] +; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm2 {%k1} = ymm0[4,5,6,7],ymm1[0,1,2,3] sched: [1:1.00] ; GENERIC-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -11269,7 +11269,7 @@ define <8 x i32> @test_8xi32_zero_masked_shuff_mask3(<8 x i32> %vec1, <8 x i32> ; GENERIC: # BB#0: ; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33] ; GENERIC-NEXT: vpcmpeqd %ymm3, %ymm2, %k1 -; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],ymm1[0,1,2,3] +; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],ymm1[0,1,2,3] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_8xi32_zero_masked_shuff_mask3: @@ -11302,7 +11302,7 @@ define <8 x i32> @test_8xi32_masked_shuff_mem_mask0(<8 x i32> %vec1, <8 x i32>* ; GENERIC: # BB#0: ; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33] ; GENERIC-NEXT: vpcmpeqd %ymm3, %ymm2, %k1 -; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[4,5,6,7] +; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[4,5,6,7] sched: [5:1.00] ; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -11325,7 +11325,7 @@ define <8 x i32> @test_8xi32_zero_masked_shuff_mem_mask0(<8 x i32> %vec1, <8 x i ; GENERIC: # BB#0: ; GENERIC-NEXT: vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33] ; GENERIC-NEXT: vpcmpeqd %ymm2, %ymm1, %k1 -; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[4,5,6,7] +; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[4,5,6,7] sched: [5:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_8xi32_zero_masked_shuff_mem_mask0: @@ -11346,7 +11346,7 @@ define <8 x i32> @test_8xi32_masked_shuff_mem_mask1(<8 x i32> %vec1, <8 x i32>* ; GENERIC: # BB#0: ; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33] ; GENERIC-NEXT: vpcmpeqd %ymm3, %ymm2, %k1 -; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[0,1,2,3] +; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [5:1.00] ; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -11369,7 +11369,7 @@ define <8 x i32> @test_8xi32_zero_masked_shuff_mem_mask1(<8 x i32> %vec1, <8 x i ; GENERIC: # BB#0: ; GENERIC-NEXT: vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33] ; GENERIC-NEXT: vpcmpeqd %ymm2, %ymm1, %k1 -; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[0,1,2,3] +; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [5:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_8xi32_zero_masked_shuff_mem_mask1: @@ -11390,7 +11390,7 @@ define <8 x i32> @test_8xi32_masked_shuff_mem_mask2(<8 x i32> %vec1, <8 x i32>* ; GENERIC: # BB#0: ; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33] ; GENERIC-NEXT: vpcmpeqd %ymm3, %ymm2, %k1 -; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[0,1,2,3] +; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [5:1.00] ; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -11413,7 +11413,7 @@ define <8 x i32> @test_8xi32_zero_masked_shuff_mem_mask2(<8 x i32> %vec1, <8 x i ; GENERIC: # BB#0: ; GENERIC-NEXT: vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33] ; GENERIC-NEXT: vpcmpeqd %ymm2, %ymm1, %k1 -; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[0,1,2,3] +; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [5:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_8xi32_zero_masked_shuff_mem_mask2: @@ -11448,7 +11448,7 @@ define <8 x i32> @test_8xi32_masked_shuff_mem_mask3(<8 x i32> %vec1, <8 x i32>* ; GENERIC: # BB#0: ; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33] ; GENERIC-NEXT: vpcmpeqd %ymm3, %ymm2, %k1 -; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[0,1,2,3] +; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [5:1.00] ; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -11471,7 +11471,7 @@ define <8 x i32> @test_8xi32_zero_masked_shuff_mem_mask3(<8 x i32> %vec1, <8 x i ; GENERIC: # BB#0: ; GENERIC-NEXT: vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33] ; GENERIC-NEXT: vpcmpeqd %ymm2, %ymm1, %k1 -; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[0,1,2,3] +; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [5:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_8xi32_zero_masked_shuff_mem_mask3: @@ -11490,7 +11490,7 @@ define <8 x i32> @test_8xi32_zero_masked_shuff_mem_mask3(<8 x i32> %vec1, <8 x i define <16 x i32> @test_16xi32_shuff_mask0(<16 x i32> %vec1, <16 x i32> %vec2) { ; GENERIC-LABEL: test_16xi32_shuff_mask0: ; GENERIC: # BB#0: -; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[2,3,2,3],zmm1[2,3,6,7] +; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[2,3,2,3],zmm1[2,3,6,7] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_16xi32_shuff_mask0: @@ -11505,7 +11505,7 @@ define <16 x i32> @test_16xi32_masked_shuff_mask0(<16 x i32> %vec1, <16 x i32> % ; GENERIC: # BB#0: ; GENERIC-NEXT: vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33] ; GENERIC-NEXT: vpcmpeqd %zmm4, %zmm3, %k1 -; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm2 {%k1} = zmm0[4,5,6,7,4,5,6,7],zmm1[4,5,6,7,12,13,14,15] +; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm2 {%k1} = zmm0[4,5,6,7,4,5,6,7],zmm1[4,5,6,7,12,13,14,15] sched: [1:1.00] ; GENERIC-NEXT: vmovdqa64 %zmm2, %zmm0 ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -11527,7 +11527,7 @@ define <16 x i32> @test_16xi32_zero_masked_shuff_mask0(<16 x i32> %vec1, <16 x i ; GENERIC: # BB#0: ; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33] ; GENERIC-NEXT: vpcmpeqd %zmm3, %zmm2, %k1 -; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,6,7,4,5,6,7],zmm1[4,5,6,7,12,13,14,15] +; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,6,7,4,5,6,7],zmm1[4,5,6,7,12,13,14,15] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_16xi32_zero_masked_shuff_mask0: @@ -11546,7 +11546,7 @@ define <16 x i32> @test_16xi32_masked_shuff_mask1(<16 x i32> %vec1, <16 x i32> % ; GENERIC: # BB#0: ; GENERIC-NEXT: vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33] ; GENERIC-NEXT: vpcmpeqd %zmm4, %zmm3, %k1 -; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm2 {%k1} = zmm0[8,9,10,11,8,9,10,11],zmm1[8,9,10,11,4,5,6,7] +; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm2 {%k1} = zmm0[8,9,10,11,8,9,10,11],zmm1[8,9,10,11,4,5,6,7] sched: [1:1.00] ; GENERIC-NEXT: vmovdqa64 %zmm2, %zmm0 ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -11568,7 +11568,7 @@ define <16 x i32> @test_16xi32_zero_masked_shuff_mask1(<16 x i32> %vec1, <16 x i ; GENERIC: # BB#0: ; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33] ; GENERIC-NEXT: vpcmpeqd %zmm3, %zmm2, %k1 -; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[8,9,10,11,8,9,10,11],zmm1[8,9,10,11,4,5,6,7] +; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[8,9,10,11,8,9,10,11],zmm1[8,9,10,11,4,5,6,7] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_16xi32_zero_masked_shuff_mask1: @@ -11587,7 +11587,7 @@ define <16 x i32> @test_16xi32_masked_shuff_mask2(<16 x i32> %vec1, <16 x i32> % ; GENERIC: # BB#0: ; GENERIC-NEXT: vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33] ; GENERIC-NEXT: vpcmpeqd %zmm4, %zmm3, %k1 -; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm2 {%k1} = zmm0[4,5,6,7,8,9,10,11],zmm1[0,1,2,3,0,1,2,3] +; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm2 {%k1} = zmm0[4,5,6,7,8,9,10,11],zmm1[0,1,2,3,0,1,2,3] sched: [1:1.00] ; GENERIC-NEXT: vmovdqa64 %zmm2, %zmm0 ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -11609,7 +11609,7 @@ define <16 x i32> @test_16xi32_zero_masked_shuff_mask2(<16 x i32> %vec1, <16 x i ; GENERIC: # BB#0: ; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33] ; GENERIC-NEXT: vpcmpeqd %zmm3, %zmm2, %k1 -; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,6,7,8,9,10,11],zmm1[0,1,2,3,0,1,2,3] +; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,6,7,8,9,10,11],zmm1[0,1,2,3,0,1,2,3] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_16xi32_zero_masked_shuff_mask2: @@ -11626,7 +11626,7 @@ define <16 x i32> @test_16xi32_zero_masked_shuff_mask2(<16 x i32> %vec1, <16 x i define <16 x i32> @test_16xi32_shuff_mask3(<16 x i32> %vec1, <16 x i32> %vec2) { ; GENERIC-LABEL: test_16xi32_shuff_mask3: ; GENERIC: # BB#0: -; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[2,3,0,1],zmm1[4,5,2,3] +; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[2,3,0,1],zmm1[4,5,2,3] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_16xi32_shuff_mask3: @@ -11641,7 +11641,7 @@ define <16 x i32> @test_16xi32_masked_shuff_mask3(<16 x i32> %vec1, <16 x i32> % ; GENERIC: # BB#0: ; GENERIC-NEXT: vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33] ; GENERIC-NEXT: vpcmpeqd %zmm4, %zmm3, %k1 -; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm2 {%k1} = zmm0[4,5,6,7,0,1,2,3],zmm1[8,9,10,11,4,5,6,7] +; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm2 {%k1} = zmm0[4,5,6,7,0,1,2,3],zmm1[8,9,10,11,4,5,6,7] sched: [1:1.00] ; GENERIC-NEXT: vmovdqa64 %zmm2, %zmm0 ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -11663,7 +11663,7 @@ define <16 x i32> @test_16xi32_zero_masked_shuff_mask3(<16 x i32> %vec1, <16 x i ; GENERIC: # BB#0: ; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33] ; GENERIC-NEXT: vpcmpeqd %zmm3, %zmm2, %k1 -; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,6,7,0,1,2,3],zmm1[8,9,10,11,4,5,6,7] +; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,6,7,0,1,2,3],zmm1[8,9,10,11,4,5,6,7] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_16xi32_zero_masked_shuff_mask3: @@ -11680,7 +11680,7 @@ define <16 x i32> @test_16xi32_zero_masked_shuff_mask3(<16 x i32> %vec1, <16 x i define <16 x i32> @test_16xi32_shuff_mem_mask0(<16 x i32> %vec1, <16 x i32>* %vec2p) { ; GENERIC-LABEL: test_16xi32_shuff_mem_mask0: ; GENERIC: # BB#0: -; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[4,5,2,3],mem[4,5,0,1] +; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[4,5,2,3],mem[4,5,0,1] sched: [5:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_16xi32_shuff_mem_mask0: @@ -11696,7 +11696,7 @@ define <16 x i32> @test_16xi32_masked_shuff_mem_mask0(<16 x i32> %vec1, <16 x i3 ; GENERIC: # BB#0: ; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33] ; GENERIC-NEXT: vpcmpeqd %zmm3, %zmm2, %k1 -; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm1 {%k1} = zmm0[8,9,10,11,4,5,6,7],mem[8,9,10,11,0,1,2,3] +; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm1 {%k1} = zmm0[8,9,10,11,4,5,6,7],mem[8,9,10,11,0,1,2,3] sched: [5:1.00] ; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -11719,7 +11719,7 @@ define <16 x i32> @test_16xi32_zero_masked_shuff_mem_mask0(<16 x i32> %vec1, <16 ; GENERIC: # BB#0: ; GENERIC-NEXT: vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33] ; GENERIC-NEXT: vpcmpeqd %zmm2, %zmm1, %k1 -; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[8,9,10,11,4,5,6,7],mem[8,9,10,11,0,1,2,3] +; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[8,9,10,11,4,5,6,7],mem[8,9,10,11,0,1,2,3] sched: [5:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_16xi32_zero_masked_shuff_mem_mask0: @@ -11740,7 +11740,7 @@ define <16 x i32> @test_16xi32_masked_shuff_mem_mask1(<16 x i32> %vec1, <16 x i3 ; GENERIC: # BB#0: ; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33] ; GENERIC-NEXT: vpcmpeqd %zmm3, %zmm2, %k1 -; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm1 {%k1} = zmm0[4,5,6,7,4,5,6,7],mem[0,1,2,3,8,9,10,11] +; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm1 {%k1} = zmm0[4,5,6,7,4,5,6,7],mem[0,1,2,3,8,9,10,11] sched: [5:1.00] ; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -11763,7 +11763,7 @@ define <16 x i32> @test_16xi32_zero_masked_shuff_mem_mask1(<16 x i32> %vec1, <16 ; GENERIC: # BB#0: ; GENERIC-NEXT: vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33] ; GENERIC-NEXT: vpcmpeqd %zmm2, %zmm1, %k1 -; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,6,7,4,5,6,7],mem[0,1,2,3,8,9,10,11] +; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,6,7,4,5,6,7],mem[0,1,2,3,8,9,10,11] sched: [5:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_16xi32_zero_masked_shuff_mem_mask1: @@ -11784,7 +11784,7 @@ define <16 x i32> @test_16xi32_masked_shuff_mem_mask2(<16 x i32> %vec1, <16 x i3 ; GENERIC: # BB#0: ; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33] ; GENERIC-NEXT: vpcmpeqd %zmm3, %zmm2, %k1 -; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm1 {%k1} = zmm0[4,5,6,7,8,9,10,11],mem[12,13,14,15,12,13,14,15] +; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm1 {%k1} = zmm0[4,5,6,7,8,9,10,11],mem[12,13,14,15,12,13,14,15] sched: [5:1.00] ; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -11807,7 +11807,7 @@ define <16 x i32> @test_16xi32_zero_masked_shuff_mem_mask2(<16 x i32> %vec1, <16 ; GENERIC: # BB#0: ; GENERIC-NEXT: vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33] ; GENERIC-NEXT: vpcmpeqd %zmm2, %zmm1, %k1 -; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,6,7,8,9,10,11],mem[12,13,14,15,12,13,14,15] +; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,6,7,8,9,10,11],mem[12,13,14,15,12,13,14,15] sched: [5:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_16xi32_zero_masked_shuff_mem_mask2: @@ -11826,7 +11826,7 @@ define <16 x i32> @test_16xi32_zero_masked_shuff_mem_mask2(<16 x i32> %vec1, <16 define <16 x i32> @test_16xi32_shuff_mem_mask3(<16 x i32> %vec1, <16 x i32>* %vec2p) { ; GENERIC-LABEL: test_16xi32_shuff_mem_mask3: ; GENERIC: # BB#0: -; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[2,3,2,3],mem[2,3,6,7] +; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[2,3,2,3],mem[2,3,6,7] sched: [5:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_16xi32_shuff_mem_mask3: @@ -11842,7 +11842,7 @@ define <16 x i32> @test_16xi32_masked_shuff_mem_mask3(<16 x i32> %vec1, <16 x i3 ; GENERIC: # BB#0: ; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33] ; GENERIC-NEXT: vpcmpeqd %zmm3, %zmm2, %k1 -; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm1 {%k1} = zmm0[4,5,6,7,4,5,6,7],mem[4,5,6,7,12,13,14,15] +; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm1 {%k1} = zmm0[4,5,6,7,4,5,6,7],mem[4,5,6,7,12,13,14,15] sched: [5:1.00] ; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -11865,7 +11865,7 @@ define <16 x i32> @test_16xi32_zero_masked_shuff_mem_mask3(<16 x i32> %vec1, <16 ; GENERIC: # BB#0: ; GENERIC-NEXT: vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33] ; GENERIC-NEXT: vpcmpeqd %zmm2, %zmm1, %k1 -; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,6,7,4,5,6,7],mem[4,5,6,7,12,13,14,15] +; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,6,7,4,5,6,7],mem[4,5,6,7,12,13,14,15] sched: [5:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_16xi32_zero_masked_shuff_mem_mask3: @@ -11899,7 +11899,7 @@ define <4 x i64> @test_4xi64_masked_shuff_mask0(<4 x i64> %vec1, <4 x i64> %vec2 ; GENERIC: # BB#0: ; GENERIC-NEXT: vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33] ; GENERIC-NEXT: vpcmpeqq %ymm4, %ymm3, %k1 -; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm2 {%k1} = ymm0[2,3],ymm1[0,1] +; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm2 {%k1} = ymm0[2,3],ymm1[0,1] sched: [1:1.00] ; GENERIC-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -11921,7 +11921,7 @@ define <4 x i64> @test_4xi64_zero_masked_shuff_mask0(<4 x i64> %vec1, <4 x i64> ; GENERIC: # BB#0: ; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33] ; GENERIC-NEXT: vpcmpeqq %ymm3, %ymm2, %k1 -; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],ymm1[0,1] +; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],ymm1[0,1] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_4xi64_zero_masked_shuff_mask0: @@ -11940,7 +11940,7 @@ define <4 x i64> @test_4xi64_masked_shuff_mask1(<4 x i64> %vec1, <4 x i64> %vec2 ; GENERIC: # BB#0: ; GENERIC-NEXT: vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33] ; GENERIC-NEXT: vpcmpeqq %ymm4, %ymm3, %k1 -; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm2 {%k1} = ymm0[2,3],ymm1[2,3] +; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm2 {%k1} = ymm0[2,3],ymm1[2,3] sched: [1:1.00] ; GENERIC-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -11962,7 +11962,7 @@ define <4 x i64> @test_4xi64_zero_masked_shuff_mask1(<4 x i64> %vec1, <4 x i64> ; GENERIC: # BB#0: ; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33] ; GENERIC-NEXT: vpcmpeqq %ymm3, %ymm2, %k1 -; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],ymm1[2,3] +; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],ymm1[2,3] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_4xi64_zero_masked_shuff_mask1: @@ -11981,7 +11981,7 @@ define <4 x i64> @test_4xi64_masked_shuff_mask2(<4 x i64> %vec1, <4 x i64> %vec2 ; GENERIC: # BB#0: ; GENERIC-NEXT: vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33] ; GENERIC-NEXT: vpcmpeqq %ymm4, %ymm3, %k1 -; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm2 {%k1} = ymm0[2,3],ymm1[0,1] +; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm2 {%k1} = ymm0[2,3],ymm1[0,1] sched: [1:1.00] ; GENERIC-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -12003,7 +12003,7 @@ define <4 x i64> @test_4xi64_zero_masked_shuff_mask2(<4 x i64> %vec1, <4 x i64> ; GENERIC: # BB#0: ; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33] ; GENERIC-NEXT: vpcmpeqq %ymm3, %ymm2, %k1 -; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],ymm1[0,1] +; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],ymm1[0,1] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_4xi64_zero_masked_shuff_mask2: @@ -12035,7 +12035,7 @@ define <4 x i64> @test_4xi64_masked_shuff_mask3(<4 x i64> %vec1, <4 x i64> %vec2 ; GENERIC: # BB#0: ; GENERIC-NEXT: vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33] ; GENERIC-NEXT: vpcmpeqq %ymm4, %ymm3, %k1 -; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm2 {%k1} = ymm0[2,3],ymm1[2,3] +; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm2 {%k1} = ymm0[2,3],ymm1[2,3] sched: [1:1.00] ; GENERIC-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -12057,7 +12057,7 @@ define <4 x i64> @test_4xi64_zero_masked_shuff_mask3(<4 x i64> %vec1, <4 x i64> ; GENERIC: # BB#0: ; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33] ; GENERIC-NEXT: vpcmpeqq %ymm3, %ymm2, %k1 -; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],ymm1[2,3] +; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],ymm1[2,3] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_4xi64_zero_masked_shuff_mask3: @@ -12090,7 +12090,7 @@ define <4 x i64> @test_4xi64_masked_shuff_mem_mask0(<4 x i64> %vec1, <4 x i64>* ; GENERIC: # BB#0: ; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33] ; GENERIC-NEXT: vpcmpeqq %ymm3, %ymm2, %k1 -; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[2,3] +; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[2,3] sched: [5:1.00] ; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -12113,7 +12113,7 @@ define <4 x i64> @test_4xi64_zero_masked_shuff_mem_mask0(<4 x i64> %vec1, <4 x i ; GENERIC: # BB#0: ; GENERIC-NEXT: vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33] ; GENERIC-NEXT: vpcmpeqq %ymm2, %ymm1, %k1 -; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[2,3] +; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[2,3] sched: [5:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_4xi64_zero_masked_shuff_mem_mask0: @@ -12134,7 +12134,7 @@ define <4 x i64> @test_4xi64_masked_shuff_mem_mask1(<4 x i64> %vec1, <4 x i64>* ; GENERIC: # BB#0: ; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33] ; GENERIC-NEXT: vpcmpeqq %ymm3, %ymm2, %k1 -; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[0,1] +; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[0,1] sched: [5:1.00] ; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -12157,7 +12157,7 @@ define <4 x i64> @test_4xi64_zero_masked_shuff_mem_mask1(<4 x i64> %vec1, <4 x i ; GENERIC: # BB#0: ; GENERIC-NEXT: vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33] ; GENERIC-NEXT: vpcmpeqq %ymm2, %ymm1, %k1 -; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[0,1] +; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[0,1] sched: [5:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_4xi64_zero_masked_shuff_mem_mask1: @@ -12178,7 +12178,7 @@ define <4 x i64> @test_4xi64_masked_shuff_mem_mask2(<4 x i64> %vec1, <4 x i64>* ; GENERIC: # BB#0: ; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33] ; GENERIC-NEXT: vpcmpeqq %ymm3, %ymm2, %k1 -; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[0,1] +; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[0,1] sched: [5:1.00] ; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -12201,7 +12201,7 @@ define <4 x i64> @test_4xi64_zero_masked_shuff_mem_mask2(<4 x i64> %vec1, <4 x i ; GENERIC: # BB#0: ; GENERIC-NEXT: vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33] ; GENERIC-NEXT: vpcmpeqq %ymm2, %ymm1, %k1 -; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[0,1] +; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[0,1] sched: [5:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_4xi64_zero_masked_shuff_mem_mask2: @@ -12236,7 +12236,7 @@ define <4 x i64> @test_4xi64_masked_shuff_mem_mask3(<4 x i64> %vec1, <4 x i64>* ; GENERIC: # BB#0: ; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33] ; GENERIC-NEXT: vpcmpeqq %ymm3, %ymm2, %k1 -; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[2,3] +; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[2,3] sched: [5:1.00] ; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -12259,7 +12259,7 @@ define <4 x i64> @test_4xi64_zero_masked_shuff_mem_mask3(<4 x i64> %vec1, <4 x i ; GENERIC: # BB#0: ; GENERIC-NEXT: vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33] ; GENERIC-NEXT: vpcmpeqq %ymm2, %ymm1, %k1 -; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[2,3] +; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[2,3] sched: [5:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_4xi64_zero_masked_shuff_mem_mask3: @@ -12278,7 +12278,7 @@ define <4 x i64> @test_4xi64_zero_masked_shuff_mem_mask3(<4 x i64> %vec1, <4 x i define <8 x i64> @test_8xi64_shuff_mask0(<8 x i64> %vec1, <8 x i64> %vec2) { ; GENERIC-LABEL: test_8xi64_shuff_mask0: ; GENERIC: # BB#0: -; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[4,5,4,5],zmm1[4,5,4,5] +; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[4,5,4,5],zmm1[4,5,4,5] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_8xi64_shuff_mask0: @@ -12293,7 +12293,7 @@ define <8 x i64> @test_8xi64_masked_shuff_mask0(<8 x i64> %vec1, <8 x i64> %vec2 ; GENERIC: # BB#0: ; GENERIC-NEXT: vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33] ; GENERIC-NEXT: vpcmpeqq %zmm4, %zmm3, %k1 -; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm2 {%k1} = zmm0[4,5,4,5],zmm1[4,5,4,5] +; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm2 {%k1} = zmm0[4,5,4,5],zmm1[4,5,4,5] sched: [1:1.00] ; GENERIC-NEXT: vmovdqa64 %zmm2, %zmm0 ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -12315,7 +12315,7 @@ define <8 x i64> @test_8xi64_zero_masked_shuff_mask0(<8 x i64> %vec1, <8 x i64> ; GENERIC: # BB#0: ; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33] ; GENERIC-NEXT: vpcmpeqq %zmm3, %zmm2, %k1 -; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,4,5],zmm1[4,5,4,5] +; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,4,5],zmm1[4,5,4,5] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_8xi64_zero_masked_shuff_mask0: @@ -12334,7 +12334,7 @@ define <8 x i64> @test_8xi64_masked_shuff_mask1(<8 x i64> %vec1, <8 x i64> %vec2 ; GENERIC: # BB#0: ; GENERIC-NEXT: vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33] ; GENERIC-NEXT: vpcmpeqq %zmm4, %zmm3, %k1 -; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm2 {%k1} = zmm0[6,7,4,5],zmm1[2,3,4,5] +; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm2 {%k1} = zmm0[6,7,4,5],zmm1[2,3,4,5] sched: [1:1.00] ; GENERIC-NEXT: vmovdqa64 %zmm2, %zmm0 ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -12356,7 +12356,7 @@ define <8 x i64> @test_8xi64_zero_masked_shuff_mask1(<8 x i64> %vec1, <8 x i64> ; GENERIC: # BB#0: ; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33] ; GENERIC-NEXT: vpcmpeqq %zmm3, %zmm2, %k1 -; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[6,7,4,5],zmm1[2,3,4,5] +; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[6,7,4,5],zmm1[2,3,4,5] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_8xi64_zero_masked_shuff_mask1: @@ -12375,7 +12375,7 @@ define <8 x i64> @test_8xi64_masked_shuff_mask2(<8 x i64> %vec1, <8 x i64> %vec2 ; GENERIC: # BB#0: ; GENERIC-NEXT: vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33] ; GENERIC-NEXT: vpcmpeqq %zmm4, %zmm3, %k1 -; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm2 {%k1} = zmm0[0,1,4,5],zmm1[0,1,0,1] +; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm2 {%k1} = zmm0[0,1,4,5],zmm1[0,1,0,1] sched: [1:1.00] ; GENERIC-NEXT: vmovdqa64 %zmm2, %zmm0 ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -12397,7 +12397,7 @@ define <8 x i64> @test_8xi64_zero_masked_shuff_mask2(<8 x i64> %vec1, <8 x i64> ; GENERIC: # BB#0: ; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33] ; GENERIC-NEXT: vpcmpeqq %zmm3, %zmm2, %k1 -; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,4,5],zmm1[0,1,0,1] +; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,4,5],zmm1[0,1,0,1] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_8xi64_zero_masked_shuff_mask2: @@ -12414,7 +12414,7 @@ define <8 x i64> @test_8xi64_zero_masked_shuff_mask2(<8 x i64> %vec1, <8 x i64> define <8 x i64> @test_8xi64_shuff_mask3(<8 x i64> %vec1, <8 x i64> %vec2) { ; GENERIC-LABEL: test_8xi64_shuff_mask3: ; GENERIC: # BB#0: -; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[2,3,6,7],zmm1[4,5,2,3] +; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[2,3,6,7],zmm1[4,5,2,3] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_8xi64_shuff_mask3: @@ -12429,7 +12429,7 @@ define <8 x i64> @test_8xi64_masked_shuff_mask3(<8 x i64> %vec1, <8 x i64> %vec2 ; GENERIC: # BB#0: ; GENERIC-NEXT: vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33] ; GENERIC-NEXT: vpcmpeqq %zmm4, %zmm3, %k1 -; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm2 {%k1} = zmm0[2,3,6,7],zmm1[4,5,2,3] +; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm2 {%k1} = zmm0[2,3,6,7],zmm1[4,5,2,3] sched: [1:1.00] ; GENERIC-NEXT: vmovdqa64 %zmm2, %zmm0 ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -12451,7 +12451,7 @@ define <8 x i64> @test_8xi64_zero_masked_shuff_mask3(<8 x i64> %vec1, <8 x i64> ; GENERIC: # BB#0: ; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33] ; GENERIC-NEXT: vpcmpeqq %zmm3, %zmm2, %k1 -; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[2,3,6,7],zmm1[4,5,2,3] +; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[2,3,6,7],zmm1[4,5,2,3] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_8xi64_zero_masked_shuff_mask3: @@ -12468,7 +12468,7 @@ define <8 x i64> @test_8xi64_zero_masked_shuff_mask3(<8 x i64> %vec1, <8 x i64> define <8 x i64> @test_8xi64_shuff_mem_mask0(<8 x i64> %vec1, <8 x i64>* %vec2p) { ; GENERIC-LABEL: test_8xi64_shuff_mem_mask0: ; GENERIC: # BB#0: -; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[2,3,2,3],mem[4,5,2,3] +; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[2,3,2,3],mem[4,5,2,3] sched: [5:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_8xi64_shuff_mem_mask0: @@ -12484,7 +12484,7 @@ define <8 x i64> @test_8xi64_masked_shuff_mem_mask0(<8 x i64> %vec1, <8 x i64>* ; GENERIC: # BB#0: ; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33] ; GENERIC-NEXT: vpcmpeqq %zmm3, %zmm2, %k1 -; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm1 {%k1} = zmm0[2,3,2,3],mem[4,5,2,3] +; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm1 {%k1} = zmm0[2,3,2,3],mem[4,5,2,3] sched: [5:1.00] ; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -12507,7 +12507,7 @@ define <8 x i64> @test_8xi64_zero_masked_shuff_mem_mask0(<8 x i64> %vec1, <8 x i ; GENERIC: # BB#0: ; GENERIC-NEXT: vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33] ; GENERIC-NEXT: vpcmpeqq %zmm2, %zmm1, %k1 -; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[2,3,2,3],mem[4,5,2,3] +; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[2,3,2,3],mem[4,5,2,3] sched: [5:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_8xi64_zero_masked_shuff_mem_mask0: @@ -12528,7 +12528,7 @@ define <8 x i64> @test_8xi64_masked_shuff_mem_mask1(<8 x i64> %vec1, <8 x i64>* ; GENERIC: # BB#0: ; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33] ; GENERIC-NEXT: vpcmpeqq %zmm3, %zmm2, %k1 -; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm1 {%k1} = zmm0[2,3,0,1],mem[0,1,0,1] +; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm1 {%k1} = zmm0[2,3,0,1],mem[0,1,0,1] sched: [5:1.00] ; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -12551,7 +12551,7 @@ define <8 x i64> @test_8xi64_zero_masked_shuff_mem_mask1(<8 x i64> %vec1, <8 x i ; GENERIC: # BB#0: ; GENERIC-NEXT: vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33] ; GENERIC-NEXT: vpcmpeqq %zmm2, %zmm1, %k1 -; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[2,3,0,1],mem[0,1,0,1] +; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[2,3,0,1],mem[0,1,0,1] sched: [5:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_8xi64_zero_masked_shuff_mem_mask1: @@ -12572,7 +12572,7 @@ define <8 x i64> @test_8xi64_masked_shuff_mem_mask2(<8 x i64> %vec1, <8 x i64>* ; GENERIC: # BB#0: ; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33] ; GENERIC-NEXT: vpcmpeqq %zmm3, %zmm2, %k1 -; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm1 {%k1} = zmm0[4,5,0,1],mem[2,3,2,3] +; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm1 {%k1} = zmm0[4,5,0,1],mem[2,3,2,3] sched: [5:1.00] ; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -12595,7 +12595,7 @@ define <8 x i64> @test_8xi64_zero_masked_shuff_mem_mask2(<8 x i64> %vec1, <8 x i ; GENERIC: # BB#0: ; GENERIC-NEXT: vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33] ; GENERIC-NEXT: vpcmpeqq %zmm2, %zmm1, %k1 -; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,0,1],mem[2,3,2,3] +; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,0,1],mem[2,3,2,3] sched: [5:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_8xi64_zero_masked_shuff_mem_mask2: @@ -12614,7 +12614,7 @@ define <8 x i64> @test_8xi64_zero_masked_shuff_mem_mask2(<8 x i64> %vec1, <8 x i define <8 x i64> @test_8xi64_shuff_mem_mask3(<8 x i64> %vec1, <8 x i64>* %vec2p) { ; GENERIC-LABEL: test_8xi64_shuff_mem_mask3: ; GENERIC: # BB#0: -; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[2,3,0,1],mem[6,7,2,3] +; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[2,3,0,1],mem[6,7,2,3] sched: [5:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_8xi64_shuff_mem_mask3: @@ -12630,7 +12630,7 @@ define <8 x i64> @test_8xi64_masked_shuff_mem_mask3(<8 x i64> %vec1, <8 x i64>* ; GENERIC: # BB#0: ; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33] ; GENERIC-NEXT: vpcmpeqq %zmm3, %zmm2, %k1 -; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm1 {%k1} = zmm0[2,3,0,1],mem[6,7,2,3] +; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm1 {%k1} = zmm0[2,3,0,1],mem[6,7,2,3] sched: [5:1.00] ; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -12653,7 +12653,7 @@ define <8 x i64> @test_8xi64_zero_masked_shuff_mem_mask3(<8 x i64> %vec1, <8 x i ; GENERIC: # BB#0: ; GENERIC-NEXT: vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33] ; GENERIC-NEXT: vpcmpeqq %zmm2, %zmm1, %k1 -; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[2,3,0,1],mem[6,7,2,3] +; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[2,3,0,1],mem[6,7,2,3] sched: [5:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_8xi64_zero_masked_shuff_mem_mask3: -- 2.7.4