From: Simon Pilgrim Date: Fri, 20 Apr 2018 21:16:05 +0000 (+0000) Subject: [X86] Add WriteFSign/WriteFLogic scheduler classes X-Git-Tag: llvmorg-7.0.0-rc1~7762 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=d14d2e7b185ffd7fcc171ea07734ba5b7aa73d8a;p=platform%2Fupstream%2Fllvm.git [X86] Add WriteFSign/WriteFLogic scheduler classes Split the fp and integer vector logical instruction scheduler classes - older CPUs especially often handled these on different pipes. This unearthed a couple of things that are also handled in this patch: (1) We were tagging avx512 fp logic ops as WriteFAdd, probably because of the lack of WriteFLogic (2) SandyBridge had integer logic ops only using Port5, when afaict they can use Ports015. (3) Cleaned up x86 FCHS/FABS scheduling as they are typically treated as fp logic ops. Differential Revision: https://reviews.llvm.org/D45629 llvm-svn: 330480 --- diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index fb6b7b5..f18490f 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -5058,10 +5058,10 @@ let isCodeGenOnly = 1 in { defm VMINC : avx512_fp_binop_p<0x5D, "vmin", X86fminc, HasAVX512, WriteFCmp, 1>; defm VMAXC : avx512_fp_binop_p<0x5F, "vmax", X86fmaxc, HasAVX512, WriteFCmp, 1>; } -defm VAND : avx512_fp_binop_p<0x54, "vand", null_frag, HasDQI, WriteFAdd, 1>; -defm VANDN : avx512_fp_binop_p<0x55, "vandn", null_frag, HasDQI, WriteFAdd, 0>; -defm VOR : avx512_fp_binop_p<0x56, "vor", null_frag, HasDQI, WriteFAdd, 1>; -defm VXOR : avx512_fp_binop_p<0x57, "vxor", null_frag, HasDQI, WriteFAdd, 1>; +defm VAND : avx512_fp_binop_p<0x54, "vand", null_frag, HasDQI, WriteFLogic, 1>; +defm VANDN : avx512_fp_binop_p<0x55, "vandn", null_frag, HasDQI, WriteFLogic, 0>; +defm VOR : avx512_fp_binop_p<0x56, "vor", null_frag, HasDQI, WriteFLogic, 1>; +defm VXOR : avx512_fp_binop_p<0x57, "vxor", null_frag, HasDQI, WriteFLogic, 1>; // Patterns catch floating point selects with bitcasted integer logic ops. multiclass avx512_fp_logical_lowering; let Defs = [FPSW] in { -let SchedRW = [WriteVecLogic] in { +let SchedRW = [WriteFSign] in { defm CHS : FPUnary; defm ABS : FPUnary; } diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td index 6f7b47a..cf8b987 100644 --- a/llvm/lib/Target/X86/X86InstrSSE.td +++ b/llvm/lib/Target/X86/X86InstrSSE.td @@ -95,14 +95,14 @@ multiclass sse12_fp_packed_logical_rm opc, RegisterClass RC, Domain d, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), pat_rr, d>, - Sched<[WriteVecLogic]>; + Sched<[WriteFLogic]>; let hasSideEffects = 0, mayLoad = 1 in def rm : PI, - Sched<[WriteVecLogicLd, ReadAfterLd]>; + Sched<[WriteFLogic.Folded, ReadAfterLd]>; } diff --git a/llvm/lib/Target/X86/X86SchedBroadwell.td b/llvm/lib/Target/X86/X86SchedBroadwell.td index 5139f97..352d418 100755 --- a/llvm/lib/Target/X86/X86SchedBroadwell.td +++ b/llvm/lib/Target/X86/X86SchedBroadwell.td @@ -163,6 +163,8 @@ defm : BWWriteResPair; // Floating point square root defm : BWWriteResPair; // Floating point reciprocal estimate. defm : BWWriteResPair; // Floating point reciprocal square root estimate. defm : BWWriteResPair; // Fused Multiply Add. +defm : BWWriteResPair; // Floating point fabs/fchs. +defm : BWWriteResPair; // Floating point and/or/xor logicals. defm : BWWriteResPair; // Floating point vector shuffles. defm : BWWriteResPair; // Floating point vector variable shuffles. defm : BWWriteResPair; // Floating point vector blends. @@ -177,6 +179,7 @@ def : WriteRes; def : WriteRes; defm : BWWriteResPair; // Vector integer ALU op, no logicals. +defm : BWWriteResPair; // Vector integer and/or/xor. defm : BWWriteResPair; // Vector integer shifts. defm : BWWriteResPair; // Vector integer multiply. defm : BWWriteResPair; // PMULLD @@ -187,10 +190,6 @@ defm : BWWriteResPair; // Vector variable ble defm : BWWriteResPair; // Vector MPSAD. defm : BWWriteResPair; // Vector PSADBW. -// Vector bitwise operations. -// These are often used on both floating point and integer vectors. -defm : BWWriteResPair; // Vector and/or/xor. - // Conversion between integer and float. defm : BWWriteResPair; // Float -> Integer. defm : BWWriteResPair; // Integer -> Float. @@ -380,10 +379,6 @@ def: InstRW<[BWWriteResGroup3], (instregex "MMX_MOVD64rr", "MMX_PUNPCKLBWirr", "MMX_PUNPCKLDQirr", "MMX_PUNPCKLWDirr", - "(V?)ANDNPD(Y?)rr", - "(V?)ANDNPS(Y?)rr", - "(V?)ANDPD(Y?)rr", - "(V?)ANDPS(Y?)rr", "VBROADCASTSSrr", "(V?)INSERTPSrr", "(V?)MOV64toPQIrr", @@ -437,9 +432,7 @@ def: InstRW<[BWWriteResGroup3], (instregex "MMX_MOVD64rr", "(V?)UNPCKHPD(Y?)rr", "(V?)UNPCKHPS(Y?)rr", "(V?)UNPCKLPD(Y?)rr", - "(V?)UNPCKLPS(Y?)rr", - "(V?)XORPD(Y?)rr", - "(V?)XORPS(Y?)rr")>; + "(V?)UNPCKLPS(Y?)rr")>; def BWWriteResGroup4 : SchedWriteRes<[BWPort6]> { let Latency = 1; @@ -552,21 +545,13 @@ def BWWriteResGroup8 : SchedWriteRes<[BWPort015]> { let ResourceCycles = [1]; } def: InstRW<[BWWriteResGroup8], (instregex "MMX_MOVQ64rr", - "MMX_PANDNirr", - "MMX_PANDirr", - "MMX_PORirr", - "MMX_PXORirr", "(V?)BLENDPD(Y?)rri", "(V?)BLENDPS(Y?)rri", "(V?)MOVDQA(Y?)rr", "(V?)MOVDQU(Y?)rr", "(V?)MOVPQI2QIrr", "VMOVZPQILo2PQIrr", - "(V?)PANDN(Y?)rr", - "(V?)PAND(Y?)rr", - "VPBLENDD(Y?)rri", - "(V?)POR(Y?)rr", - "(V?)PXOR(Y?)rr")>; + "VPBLENDD(Y?)rri")>; def BWWriteResGroup9 : SchedWriteRes<[BWPort0156]> { let Latency = 1; @@ -1238,17 +1223,11 @@ def: InstRW<[BWWriteResGroup61], (instregex "MMX_PALIGNRrmi", "MMX_PUNPCKLBWirm", "MMX_PUNPCKLDQirm", "MMX_PUNPCKLWDirm", - "(V?)ANDNPDrm", - "(V?)ANDNPSrm", - "(V?)ANDPDrm", - "(V?)ANDPSrm", "(V?)INSERTPSrm", "(V?)MOVHPDrm", "(V?)MOVHPSrm", "(V?)MOVLPDrm", "(V?)MOVLPSrm", - "(V?)ORPDrm", - "(V?)ORPSrm", "(V?)PACKSSDWrm", "(V?)PACKSSWBrm", "(V?)PACKUSDWrm", @@ -1292,9 +1271,7 @@ def: InstRW<[BWWriteResGroup61], (instregex "MMX_PALIGNRrmi", "(V?)UNPCKHPDrm", "(V?)UNPCKHPSrm", "(V?)UNPCKLPDrm", - "(V?)UNPCKLPSrm", - "(V?)XORPDrm", - "(V?)XORPSrm")>; + "(V?)UNPCKLPSrm")>; def BWWriteResGroup62 : SchedWriteRes<[BWPort6,BWPort23]> { let Latency = 6; @@ -1387,19 +1364,11 @@ def BWWriteResGroup65 : SchedWriteRes<[BWPort23,BWPort015]> { let NumMicroOps = 2; let ResourceCycles = [1,1]; } -def: InstRW<[BWWriteResGroup65], (instregex "MMX_PANDNirm", - "MMX_PANDirm", - "MMX_PORirm", - "MMX_PXORirm", - "(V?)BLENDPDrmi", +def: InstRW<[BWWriteResGroup65], (instregex "(V?)BLENDPDrmi", "(V?)BLENDPSrmi", "VINSERTF128rm", "VINSERTI128rm", - "(V?)PANDNrm", - "(V?)PANDrm", - "VPBLENDDrmi", - "(V?)PORrm", - "(V?)PXORrm")>; + "VPBLENDDrmi")>; def BWWriteResGroup66 : SchedWriteRes<[BWPort23,BWPort0156]> { let Latency = 6; diff --git a/llvm/lib/Target/X86/X86SchedHaswell.td b/llvm/lib/Target/X86/X86SchedHaswell.td index 6976d551..9a8a7be 100644 --- a/llvm/lib/Target/X86/X86SchedHaswell.td +++ b/llvm/lib/Target/X86/X86SchedHaswell.td @@ -160,6 +160,8 @@ defm : HWWriteResPair; defm : HWWriteResPair; defm : HWWriteResPair; defm : HWWriteResPair; +defm : HWWriteResPair; +defm : HWWriteResPair; defm : HWWriteResPair; defm : HWWriteResPair; defm : HWWriteResPair; @@ -173,7 +175,7 @@ def : WriteRes { let Latency = 5; } def : WriteRes; defm : HWWriteResPair; -defm : HWWriteResPair; +defm : HWWriteResPair; defm : HWWriteResPair; defm : HWWriteResPair; defm : HWWriteResPair; @@ -494,12 +496,6 @@ def : InstRW<[HWWriteFRSTOR], (instregex "FRSTORm")>; //-- Arithmetic instructions --// -// FABS. -def : InstRW<[HWWriteP0], (instregex "ABS_F")>; - -// FCHS. -def : InstRW<[HWWriteP0], (instregex "CHS_F")>; - // FCOMPP FUCOMPP. // r. def : InstRW<[HWWrite2P01], (instregex "FCOMPP", "UCOM_FPPr")>; @@ -737,10 +733,6 @@ def: InstRW<[HWWriteResGroup4], (instregex "MMX_MOVD64rr", "MMX_PUNPCKLBWirr", "MMX_PUNPCKLDQirr", "MMX_PUNPCKLWDirr", - "(V?)ANDNPD(Y?)rr", - "(V?)ANDNPS(Y?)rr", - "(V?)ANDPD(Y?)rr", - "(V?)ANDPS(Y?)rr", "VBROADCASTSSrr", "(V?)INSERTPSrr", "(V?)MOV64toPQIrr", @@ -756,8 +748,6 @@ def: InstRW<[HWWriteResGroup4], (instregex "MMX_MOVD64rr", "(V?)MOVSSrr", "(V?)MOVUPD(Y?)rr", "(V?)MOVUPS(Y?)rr", - "(V?)ORPD(Y?)rr", - "(V?)ORPS(Y?)rr", "(V?)PACKSSDW(Y?)rr", "(V?)PACKSSWB(Y?)rr", "(V?)PACKUSDW(Y?)rr", @@ -798,9 +788,7 @@ def: InstRW<[HWWriteResGroup4], (instregex "MMX_MOVD64rr", "(V?)UNPCKHPD(Y?)rr", "(V?)UNPCKHPS(Y?)rr", "(V?)UNPCKLPD(Y?)rr", - "(V?)UNPCKLPS(Y?)rr", - "(V?)XORPD(Y?)rr", - "(V?)XORPS(Y?)rr")>; + "(V?)UNPCKLPS(Y?)rr")>; def HWWriteResGroup5 : SchedWriteRes<[HWPort6]> { let Latency = 1; @@ -925,11 +913,7 @@ def: InstRW<[HWWriteResGroup9], (instregex "MMX_MOVQ64rr", "(V?)MOVDQU(Y?)rr", "(V?)MOVPQI2QIrr", "VMOVZPQILo2PQIrr", - "(V?)PANDN(Y?)rr", - "(V?)PAND(Y?)rr", - "VPBLENDD(Y?)rri", - "(V?)POR(Y?)rr", - "(V?)PXOR(Y?)rr")>; + "VPBLENDD(Y?)rri")>; def HWWriteResGroup10 : SchedWriteRes<[HWPort0156]> { let Latency = 1; @@ -1041,13 +1025,7 @@ def HWWriteResGroup13 : SchedWriteRes<[HWPort5,HWPort23]> { let ResourceCycles = [1,1]; } def: InstRW<[HWWriteResGroup13], (instregex "PUNPCKLWDrm", - "(V?)ANDNPDrm", - "(V?)ANDNPSrm", - "(V?)ANDPDrm", - "(V?)ANDPSrm", "(V?)INSERTPSrm", - "(V?)ORPDrm", - "(V?)ORPSrm", "(V?)PACKSSDWrm", "(V?)PACKSSWBrm", "(V?)PACKUSDWrm", @@ -1075,9 +1053,7 @@ def: InstRW<[HWWriteResGroup13], (instregex "PUNPCKLWDrm", "(V?)UNPCKHPDrm", "(V?)UNPCKHPSrm", "(V?)UNPCKLPDrm", - "(V?)UNPCKLPSrm", - "(V?)XORPDrm", - "(V?)XORPSrm")>; + "(V?)UNPCKLPSrm")>; def HWWriteResGroup13_1 : SchedWriteRes<[HWPort5,HWPort23]> { let Latency = 8; @@ -1310,11 +1286,7 @@ def: InstRW<[HWWriteResGroup17], (instregex "(V?)BLENDPDrmi", "(V?)BLENDPSrmi", "VINSERTF128rm", "VINSERTI128rm", - "(V?)PANDNrm", - "(V?)PANDrm", - "VPBLENDDrmi", - "(V?)PORrm", - "(V?)PXORrm")>; + "VPBLENDDrmi")>; def HWWriteResGroup17_1 : SchedWriteRes<[HWPort23,HWPort015]> { let Latency = 6; diff --git a/llvm/lib/Target/X86/X86SchedSandyBridge.td b/llvm/lib/Target/X86/X86SchedSandyBridge.td index 429d6ad..ee09461 100644 --- a/llvm/lib/Target/X86/X86SchedSandyBridge.td +++ b/llvm/lib/Target/X86/X86SchedSandyBridge.td @@ -149,6 +149,8 @@ defm : SBWriteResPair; defm : SBWriteResPair; defm : SBWriteResPair; defm : SBWriteResPair; +defm : SBWriteResPair; +defm : SBWriteResPair; defm : SBWriteResPair; defm : SBWriteResPair; defm : SBWriteResPair; @@ -160,7 +162,7 @@ def : WriteRes { let Latency = 6; } def : WriteRes; defm : SBWriteResPair; -defm : SBWriteResPair; +defm : SBWriteResPair; defm : SBWriteResPair; defm : SBWriteResPair; defm : SBWriteResPair; // TODO this is probably wrong for 256/512-bit for the "generic" model @@ -451,11 +453,7 @@ def: InstRW<[SBWriteResGroup6], (instregex "MMX_MOVQ2DQrr", "MOVDQArr", //TODO: Why are these separated from their VEX equivalent "MOVDQUrr", // TODO: Why are these separated from their VEX equivalent "(V?)MOVPQI2QIrr", - "(V?)MOVZPQILo2PQIrr", - "(V?)PANDNrr", - "(V?)PANDrr", - "(V?)PORrr", - "(V?)PXORrr")>; + "(V?)MOVZPQILo2PQIrr")>; def SBWriteResGroup7 : SchedWriteRes<[SBPort0]> { let Latency = 2; @@ -967,7 +965,11 @@ def SBWriteResGroup52 : SchedWriteRes<[SBPort23,SBPort015]> { let ResourceCycles = [1,1]; } def: InstRW<[SBWriteResGroup52], (instregex "LODSL", - "LODSQ")>; + "LODSQ", + "MMX_PANDirm", + "MMX_PANDNirm", + "MMX_PORirm", + "MMX_PXORirm")>; def SBWriteResGroup53 : SchedWriteRes<[SBPort4,SBPort23]> { let Latency = 6; @@ -1012,18 +1014,12 @@ def SBWriteResGroup56 : SchedWriteRes<[SBPort5,SBPort23]> { let NumMicroOps = 2; let ResourceCycles = [1,1]; } -def: InstRW<[SBWriteResGroup56], (instregex "(V?)ANDNPDrm", - "(V?)ANDNPSrm", - "(V?)ANDPDrm", - "(V?)ANDPSrm", - "VBROADCASTF128", +def: InstRW<[SBWriteResGroup56], (instregex "VBROADCASTF128", "(V?)INSERTPSrm", "(V?)MOVHPDrm", "(V?)MOVHPSrm", "(V?)MOVLPDrm", "(V?)MOVLPSrm", - "(V?)ORPDrm", - "(V?)ORPSrm", "VPERMILPDmi", "VPERMILPDrm", "VPERMILPSmi", @@ -1033,9 +1029,7 @@ def: InstRW<[SBWriteResGroup56], (instregex "(V?)ANDNPDrm", "(V?)UNPCKHPDrm", "(V?)UNPCKHPSrm", "(V?)UNPCKLPDrm", - "(V?)UNPCKLPSrm", - "(V?)XORPDrm", - "(V?)XORPSrm")>; + "(V?)UNPCKLPSrm")>; def SBWriteResGroup58 : SchedWriteRes<[SBPort23,SBPort05]> { let Latency = 7; @@ -1130,16 +1124,6 @@ def: InstRW<[SBWriteResGroup59], (instregex "MMX_PADDQirm", "(V?)PUNPCKLQDQrm", "(V?)PUNPCKLWDrm")>; -def SBWriteResGroup60 : SchedWriteRes<[SBPort23,SBPort015]> { - let Latency = 7; - let NumMicroOps = 2; - let ResourceCycles = [1,1]; -} -def: InstRW<[SBWriteResGroup60], (instregex "(V?)PANDNrm", - "(V?)PANDrm", - "(V?)PORrm", - "(V?)PXORrm")>; - def SBWriteResGroup61 : SchedWriteRes<[SBPort0,SBPort05]> { let Latency = 7; let NumMicroOps = 3; diff --git a/llvm/lib/Target/X86/X86SchedSkylakeClient.td b/llvm/lib/Target/X86/X86SchedSkylakeClient.td index d09e670..72d15a3 100644 --- a/llvm/lib/Target/X86/X86SchedSkylakeClient.td +++ b/llvm/lib/Target/X86/X86SchedSkylakeClient.td @@ -160,6 +160,8 @@ defm : SKLWriteResPair; // Floating point square ro defm : SKLWriteResPair; // Floating point reciprocal estimate. defm : SKLWriteResPair; // Floating point reciprocal square root estimate. defm : SKLWriteResPair; // Fused Multiply Add. +defm : SKLWriteResPair; // Floating point fabs/fchs. +defm : SKLWriteResPair; // Floating point and/or/xor logicals. defm : SKLWriteResPair; // Floating point vector shuffles. defm : SKLWriteResPair; // Floating point vector shuffles. defm : SKLWriteResPair; // Floating point vector blends. @@ -174,6 +176,7 @@ def : WriteRes; def : WriteRes; defm : SKLWriteResPair; // Vector integer ALU op, no logicals. +defm : SKLWriteResPair; // Vector integer and/or/xor. defm : SKLWriteResPair; // Vector integer shifts. defm : SKLWriteResPair; // Vector integer multiply. defm : SKLWriteResPair; @@ -184,10 +187,6 @@ defm : SKLWriteResPair; // Vector variable b defm : SKLWriteResPair; // Vector MPSAD. defm : SKLWriteResPair; // Vector PSADBW. -// Vector bitwise operations. -// These are often used on both floating point and integer vectors. -defm : SKLWriteResPair; // Vector and/or/xor. - // Conversion between integer and float. defm : SKLWriteResPair; // Float -> Integer. defm : SKLWriteResPair; // Integer -> Float. @@ -565,11 +564,7 @@ def SKLWriteResGroup9 : SchedWriteRes<[SKLPort015]> { let NumMicroOps = 1; let ResourceCycles = [1]; } -def: InstRW<[SKLWriteResGroup9], (instregex "(V?)ANDNPD(Y?)rr", - "(V?)ANDNPS(Y?)rr", - "(V?)ANDPD(Y?)rr", - "(V?)ANDPS(Y?)rr", - "(V?)BLENDPD(Y?)rri", +def: InstRW<[SKLWriteResGroup9], (instregex "(V?)BLENDPD(Y?)rri", "(V?)BLENDPS(Y?)rri", "(V?)MOVAPD(Y?)rr", "(V?)MOVAPS(Y?)rr", @@ -579,23 +574,15 @@ def: InstRW<[SKLWriteResGroup9], (instregex "(V?)ANDNPD(Y?)rr", "(V?)MOVUPD(Y?)rr", "(V?)MOVUPS(Y?)rr", "(V?)MOVZPQILo2PQIrr", - "(V?)ORPD(Y?)rr", - "(V?)ORPS(Y?)rr", "(V?)PADDB(Y?)rr", "(V?)PADDD(Y?)rr", "(V?)PADDQ(Y?)rr", "(V?)PADDW(Y?)rr", - "(V?)PANDN(Y?)rr", - "(V?)PAND(Y?)rr", "VPBLENDD(Y?)rri", - "(V?)POR(Y?)rr", "(V?)PSUBB(Y?)rr", "(V?)PSUBD(Y?)rr", "(V?)PSUBQ(Y?)rr", - "(V?)PSUBW(Y?)rr", - "(V?)PXOR(Y?)rr", - "(V?)XORPD(Y?)rr", - "(V?)XORPS(Y?)rr")>; + "(V?)PSUBW(Y?)rr")>; def SKLWriteResGroup10 : SchedWriteRes<[SKLPort0156]> { let Latency = 1; @@ -1605,35 +1592,23 @@ def SKLWriteResGroup91 : SchedWriteRes<[SKLPort23,SKLPort015]> { let NumMicroOps = 2; let ResourceCycles = [1,1]; } -def: InstRW<[SKLWriteResGroup91], (instregex "(V?)ANDNPDrm", - "(V?)ANDNPSrm", - "(V?)ANDPDrm", - "(V?)ANDPSrm", - "(V?)BLENDPDrmi", +def: InstRW<[SKLWriteResGroup91], (instregex "(V?)BLENDPDrmi", "(V?)BLENDPSrmi", "(V?)INSERTF128rm", "(V?)INSERTI128rm", "(V?)MASKMOVPDrm", "(V?)MASKMOVPSrm", - "(V?)ORPDrm", - "(V?)ORPSrm", "(V?)PADDBrm", "(V?)PADDDrm", "(V?)PADDQrm", "(V?)PADDWrm", - "(V?)PANDNrm", - "(V?)PANDrm", "(V?)PBLENDDrmi", "(V?)PMASKMOVDrm", "(V?)PMASKMOVQrm", - "(V?)PORrm", "(V?)PSUBBrm", "(V?)PSUBDrm", "(V?)PSUBQrm", - "(V?)PSUBWrm", - "(V?)PXORrm", - "(V?)XORPDrm", - "(V?)XORPSrm")>; + "(V?)PSUBWrm")>; def SKLWriteResGroup92 : SchedWriteRes<[SKLPort5,SKLPort23]> { let Latency = 7; diff --git a/llvm/lib/Target/X86/X86SchedSkylakeServer.td b/llvm/lib/Target/X86/X86SchedSkylakeServer.td index df34920..aa2a8b2 100755 --- a/llvm/lib/Target/X86/X86SchedSkylakeServer.td +++ b/llvm/lib/Target/X86/X86SchedSkylakeServer.td @@ -160,6 +160,8 @@ defm : SKXWriteResPair; // Floating point square ro defm : SKXWriteResPair; // Floating point reciprocal estimate. defm : SKXWriteResPair; // Floating point reciprocal square root estimate. defm : SKXWriteResPair; // Fused Multiply Add. +defm : SKXWriteResPair; // Floating point fabs/fchs. +defm : SKXWriteResPair; // Floating point and/or/xor logicals. defm : SKXWriteResPair; // Floating point vector shuffles. defm : SKXWriteResPair; // Floating point vector variable shuffles. defm : SKXWriteResPair; // Floating point vector blends. @@ -174,6 +176,7 @@ def : WriteRes; def : WriteRes; defm : SKXWriteResPair; // Vector integer ALU op, no logicals. +defm : SKXWriteResPair; // Vector integer and/or/xor. defm : SKXWriteResPair; // Vector integer shifts. defm : SKXWriteResPair; // Vector integer multiply. defm : SKXWriteResPair; // Vector integer multiply. @@ -184,10 +187,6 @@ defm : SKXWriteResPair; // Vector variable b defm : SKXWriteResPair; // Vector MPSAD. defm : SKXWriteResPair; // Vector PSADBW. -// Vector bitwise operations. -// These are often used on both floating point and integer vectors. -defm : SKXWriteResPair; // Vector and/or/xor. - // Conversion between integer and float. defm : SKXWriteResPair; // Float -> Integer. defm : SKXWriteResPair; // Integer -> Float. @@ -1026,11 +1025,7 @@ def SKXWriteResGroup9 : SchedWriteRes<[SKXPort015]> { let NumMicroOps = 1; let ResourceCycles = [1]; } -def: InstRW<[SKXWriteResGroup9], (instregex "ANDNPDrr", - "ANDNPSrr", - "ANDPDrr", - "ANDPSrr", - "BLENDPDrri", +def: InstRW<[SKXWriteResGroup9], (instregex "BLENDPDrri", "BLENDPSrri", "MOVAPDrr", "MOVAPSrr", @@ -1039,40 +1034,14 @@ def: InstRW<[SKXWriteResGroup9], (instregex "ANDNPDrr", "MOVPQI2QIrr", "MOVUPDrr", "MOVUPSrr", - "ORPDrr", - "ORPSrr", "PADDBrr", "PADDDrr", "PADDQrr", "PADDWrr", - "PANDNrr", - "PANDrr", - "PORrr", "PSUBBrr", "PSUBDrr", "PSUBQrr", "PSUBWrr", - "PXORrr", - "VANDNPDYrr", - "VANDNPDZ128rr", - "VANDNPDZ256rr", - "VANDNPDZrr", - "VANDNPDrr", - "VANDNPSYrr", - "VANDNPSZ128rr", - "VANDNPSZ256rr", - "VANDNPSZrr", - "VANDNPSrr", - "VANDPDYrr", - "VANDPDZ128rr", - "VANDPDZ256rr", - "VANDPDZrr", - "VANDPDrr", - "VANDPSYrr", - "VANDPSZ128rr", - "VANDPSZ256rr", - "VANDPSZrr", - "VANDPSrr", "VBLENDMPDZ128rr", "VBLENDMPDZ256rr", "VBLENDMPDZrr", @@ -1128,16 +1097,6 @@ def: InstRW<[SKXWriteResGroup9], (instregex "ANDNPDrr", "VMOVUPSYrr", "VMOVUPSrr", "VMOVZPQILo2PQIrr", - "VORPDYrr", - "VORPDZ128rr", - "VORPDZ256rr", - "VORPDZrr", - "VORPDrr", - "VORPSYrr", - "VORPSZ128rr", - "VORPSZ256rr", - "VORPSZrr", - "VORPSrr", "VPADDBYrr", "VPADDBZ128rr", "VPADDBZ256rr", @@ -1158,22 +1117,6 @@ def: InstRW<[SKXWriteResGroup9], (instregex "ANDNPDrr", "VPADDWZ256rr", "VPADDWZrr", "VPADDWrr", - "VPANDDZ128rr", - "VPANDDZ256rr", - "VPANDDZrr", - "VPANDNDZ128rr", - "VPANDNDZ256rr", - "VPANDNDZrr", - "VPANDNQZ128rr", - "VPANDNQZ256rr", - "VPANDNQZrr", - "VPANDNYrr", - "VPANDNrr", - "VPANDQZ128rr", - "VPANDQZ256rr", - "VPANDQZrr", - "VPANDYrr", - "VPANDrr", "VPBLENDDYrri", "VPBLENDDrri", "VPBLENDMBZ128rr", @@ -1188,14 +1131,6 @@ def: InstRW<[SKXWriteResGroup9], (instregex "ANDNPDrr", "VPBLENDMWZ128rr", "VPBLENDMWZ256rr", "VPBLENDMWZrr", - "VPORDZ128rr", - "VPORDZ256rr", - "VPORDZrr", - "VPORQZ128rr", - "VPORQZ256rr", - "VPORQZrr", - "VPORYrr", - "VPORrr", "VPSUBBYrr", "VPSUBBZ128rr", "VPSUBBZ256rr", @@ -1220,27 +1155,7 @@ def: InstRW<[SKXWriteResGroup9], (instregex "ANDNPDrr", "VPTERNLOGDZrri", "VPTERNLOGQZ128rri", "VPTERNLOGQZ256rri", - "VPTERNLOGQZrri", - "VPXORDZ128rr", - "VPXORDZ256rr", - "VPXORDZrr", - "VPXORQZ128rr", - "VPXORQZ256rr", - "VPXORQZrr", - "VPXORYrr", - "VPXORrr", - "VXORPDYrr", - "VXORPDZ128rr", - "VXORPDZ256rr", - "VXORPDZrr", - "VXORPDrr", - "VXORPSYrr", - "VXORPSZ128rr", - "VXORPSZ256rr", - "VXORPSZrr", - "VXORPSrr", - "XORPDrr", - "XORPSrr")>; + "VPTERNLOGQZrri")>; def SKXWriteResGroup10 : SchedWriteRes<[SKXPort0156]> { let Latency = 1; @@ -3346,34 +3261,16 @@ def SKXWriteResGroup95 : SchedWriteRes<[SKXPort23,SKXPort015]> { let NumMicroOps = 2; let ResourceCycles = [1,1]; } -def: InstRW<[SKXWriteResGroup95], (instregex "ANDNPDrm", - "ANDNPSrm", - "ANDPDrm", - "ANDPSrm", - "BLENDPDrmi", +def: InstRW<[SKXWriteResGroup95], (instregex "BLENDPDrmi", "BLENDPSrmi", - "ORPDrm", - "ORPSrm", "PADDBrm", "PADDDrm", "PADDQrm", "PADDWrm", - "PANDNrm", - "PANDrm", - "PORrm", "PSUBBrm", "PSUBDrm", "PSUBQrm", "PSUBWrm", - "PXORrm", - "VANDNPDZ128rm(b?)", - "VANDNPDrm", - "VANDNPSZ128rm(b?)", - "VANDNPSrm", - "VANDPDZ128rm(b?)", - "VANDPDrm", - "VANDPSZ128rm(b?)", - "VANDPSrm", "VBLENDMPDZ128rm(b?)", "VBLENDMPSZ128rm(b?)", "VBLENDPDrmi", @@ -3398,10 +3295,6 @@ def: InstRW<[SKXWriteResGroup95], (instregex "ANDNPDrm", "VMOVSLDUPZ128rm(b?)", "VMOVUPDZ128rm(b?)", "VMOVUPSZ128rm(b?)", - "VORPDZ128rm(b?)", - "VORPDrm", - "VORPSZ128rm(b?)", - "VORPSrm", "VPADDBZ128rm(b?)", "VPADDBrm", "VPADDDZ128rm(b?)", @@ -3410,12 +3303,6 @@ def: InstRW<[SKXWriteResGroup95], (instregex "ANDNPDrm", "VPADDQrm", "VPADDWZ128rm(b?)", "VPADDWrm", - "VPANDDZ128rm(b?)", - "VPANDNDZ128rm(b?)", - "VPANDNQZ128rm(b?)", - "VPANDNrm", - "VPANDQZ128rm(b?)", - "VPANDrm", "VPBLENDDrmi", "VPBLENDMBZ128rm(b?)", "VPBLENDMDZ128rm(b?)", @@ -3425,8 +3312,6 @@ def: InstRW<[SKXWriteResGroup95], (instregex "ANDNPDrm", "VPBROADCASTQZ128m(b?)", "VPMASKMOVDrm", "VPMASKMOVQrm", - "VPORDZ128rm(b?)", - "VPORQZ128rm(b?)", "VPORrm", "VPSUBBZ128rm(b?)", "VPSUBBrm", @@ -3437,16 +3322,7 @@ def: InstRW<[SKXWriteResGroup95], (instregex "ANDNPDrm", "VPSUBWZ128rm(b?)", "VPSUBWrm", "VPTERNLOGDZ128rm(b?)i", - "VPTERNLOGQZ128rm(b?)i", - "VPXORDZ128rm(b?)", - "VPXORQZ128rm(b?)", - "VPXORrm", - "VXORPDZ128rm(b?)", - "VXORPDrm", - "VXORPSZ128rm(b?)", - "VXORPSrm", - "XORPDrm", - "XORPSrm")>; + "VPTERNLOGQZ128rm(b?)i")>; def SKXWriteResGroup96 : SchedWriteRes<[SKXPort5,SKXPort23]> { let Latency = 7; diff --git a/llvm/lib/Target/X86/X86Schedule.td b/llvm/lib/Target/X86/X86Schedule.td index dd2ffed..3ded412 100644 --- a/llvm/lib/Target/X86/X86Schedule.td +++ b/llvm/lib/Target/X86/X86Schedule.td @@ -87,6 +87,8 @@ defm WriteFSqrt : X86SchedWritePair; // Floating point square root. defm WriteFRcp : X86SchedWritePair; // Floating point reciprocal estimate. defm WriteFRsqrt : X86SchedWritePair; // Floating point reciprocal square root estimate. defm WriteFMA : X86SchedWritePair; // Fused Multiply Add. +defm WriteFSign : X86SchedWritePair; // Floating point fabs/fchs. +defm WriteFLogic : X86SchedWritePair; // Floating point and/or/xor logicals. defm WriteFShuffle : X86SchedWritePair; // Floating point vector shuffles. defm WriteFVarShuffle : X86SchedWritePair; // Floating point vector variable shuffles. defm WriteFBlend : X86SchedWritePair; // Floating point vector blends. @@ -104,6 +106,7 @@ def WriteVecLoad : SchedWrite; def WriteVecStore : SchedWrite; def WriteVecMove : SchedWrite; defm WriteVecALU : X86SchedWritePair; // Vector integer ALU op, no logicals. +defm WriteVecLogic : X86SchedWritePair; // Vector integer and/or/xor logicals. defm WriteVecShift : X86SchedWritePair; // Vector integer shifts. defm WriteVecIMul : X86SchedWritePair; // Vector integer multiply. defm WritePMULLD : X86SchedWritePair; // PMULLD @@ -114,10 +117,6 @@ defm WriteVarBlend : X86SchedWritePair; // Vector variable blends. defm WritePSADBW : X86SchedWritePair; // Vector PSADBW. defm WriteMPSAD : X86SchedWritePair; // Vector MPSAD. -// Vector bitwise operations. -// These are often used on both floating point and integer vectors. -defm WriteVecLogic : X86SchedWritePair; // Vector and/or/xor. - // MOVMSK operations. def WriteFMOVMSK : SchedWrite; def WriteVecMOVMSK : SchedWrite; diff --git a/llvm/lib/Target/X86/X86ScheduleAtom.td b/llvm/lib/Target/X86/X86ScheduleAtom.td index a76cdce..94c88c5 100644 --- a/llvm/lib/Target/X86/X86ScheduleAtom.td +++ b/llvm/lib/Target/X86/X86ScheduleAtom.td @@ -210,6 +210,8 @@ defm : AtomWriteResPair; defm : AtomWriteResPair; defm : AtomWriteResPair; +defm : AtomWriteResPair; +defm : AtomWriteResPair; defm : AtomWriteResPair; defm : AtomWriteResPair; // NOTE: Doesn't exist on Atom. defm : AtomWriteResPair; // NOTE: Doesn't exist on Atom. @@ -351,8 +353,7 @@ def AtomWrite1_1 : SchedWriteRes<[AtomPort1]> { let ResourceCycles = [1]; } def : InstRW<[AtomWrite1_1], (instrs FCOMPP)>; -def : InstRW<[AtomWrite1_1], (instregex "ABS_F", "CHS_F", - "UCOM_F(P|PP)?r", +def : InstRW<[AtomWrite1_1], (instregex "UCOM_F(P|PP)?r", "BT(C|R|S)?(16|32|64)(rr|ri8)")>; def AtomWrite1_5 : SchedWriteRes<[AtomPort1]> { diff --git a/llvm/lib/Target/X86/X86ScheduleBtVer2.td b/llvm/lib/Target/X86/X86ScheduleBtVer2.td index 681d8fc..14d60e5 100644 --- a/llvm/lib/Target/X86/X86ScheduleBtVer2.td +++ b/llvm/lib/Target/X86/X86ScheduleBtVer2.td @@ -300,6 +300,8 @@ defm : JWriteResFpuPair; defm : JWriteResFpuPair; defm : JWriteResFpuPair; defm : JWriteResFpuPair; +defm : JWriteResFpuPair; +defm : JWriteResFpuPair; defm : JWriteResFpuPair; defm : JWriteResFpuPair; defm : JWriteResFpuPair; @@ -532,25 +534,6 @@ def : InstRW<[JWriteCVTPH2PSYLd], (instrs VCVTPH2PSYrm)>; // AVX instructions. //////////////////////////////////////////////////////////////////////////////// -def JWriteFLogic: SchedWriteRes<[JFPU01, JFPX]> { -} -def : InstRW<[JWriteFLogic], (instrs ORPDrr, ORPSrr, VORPDrr, VORPSrr, - XORPDrr, XORPSrr, VXORPDrr, VXORPSrr, - ANDPDrr, ANDPSrr, VANDPDrr, VANDPSrr, - ANDNPDrr, ANDNPSrr, VANDNPDrr, VANDNPSrr)>; - -def JWriteFLogicLd: SchedWriteRes<[JLAGU, JFPU01, JFPX]> { - let Latency = 6; -} -def : InstRW<[JWriteFLogicLd, ReadAfterLd], (instrs ORPDrm, ORPSrm, - VORPDrm, VORPSrm, - XORPDrm, XORPSrm, - VXORPDrm, VXORPSrm, - ANDPDrm, ANDPSrm, - VANDPDrm, VANDPSrm, - ANDNPDrm, ANDNPSrm, - VANDNPDrm, VANDNPSrm)>; - def JWriteFLogicY: SchedWriteRes<[JFPU01, JFPX]> { let ResourceCycles = [2, 2]; let NumMicroOps = 2; diff --git a/llvm/lib/Target/X86/X86ScheduleSLM.td b/llvm/lib/Target/X86/X86ScheduleSLM.td index badf5e8..d62286d 100644 --- a/llvm/lib/Target/X86/X86ScheduleSLM.td +++ b/llvm/lib/Target/X86/X86ScheduleSLM.td @@ -135,6 +135,8 @@ defm : SLMWriteResPair; defm : SLMWriteResPair; defm : SLMWriteResPair; defm : SLMWriteResPair; +defm : SLMWriteResPair; +defm : SLMWriteResPair; defm : SLMWriteResPair; defm : SLMWriteResPair; defm : SLMWriteResPair; diff --git a/llvm/lib/Target/X86/X86ScheduleZnver1.td b/llvm/lib/Target/X86/X86ScheduleZnver1.td index 051b2e4..1df972f 100644 --- a/llvm/lib/Target/X86/X86ScheduleZnver1.td +++ b/llvm/lib/Target/X86/X86ScheduleZnver1.td @@ -202,6 +202,8 @@ defm : ZnWriteResFpuPair; defm : ZnWriteResFpuPair; defm : ZnWriteResFpuPair; defm : ZnWriteResFpuPair; +defm : ZnWriteResFpuPair; +defm : ZnWriteResFpuPair; defm : ZnWriteResFpuPair; defm : ZnWriteResFpuPair; defm : ZnWriteResFpuPair; @@ -778,10 +780,6 @@ def : InstRW<[WriteMicrocoded], (instregex "FRSTORm")>; //-- Arithmetic instructions --// -def ZnWriteFPU3Lat2 : SchedWriteRes<[ZnFPU3]> { - let Latency = 2; -} - def ZnWriteFPU3Lat1 : SchedWriteRes<[ZnFPU3]> ; def ZnWriteFPU0Lat1 : SchedWriteRes<[ZnFPU0]> ; @@ -790,9 +788,6 @@ def ZnWriteFPU0Lat1Ld : SchedWriteRes<[ZnAGU, ZnFPU0]> { let Latency = 8; } -// FABS. -def : InstRW<[ZnWriteFPU3Lat2], (instregex "ABS_F")>; - // FCHS. def : InstRW<[ZnWriteFPU3Lat1], (instregex "CHS_F")>; @@ -1672,15 +1667,6 @@ def ZnWriteRSQRTPSYLd : SchedWriteRes<[ZnAGU, ZnFPU01]> { } def : InstRW<[ZnWriteRSQRTPSYLd], (instregex "VRSQRTPSYm")>; -//-- Logic instructions --// - -// AND, ANDN, OR, XOR PS/PD. -// x,x / v,v,v. -def : InstRW<[WriteVecLogic], (instregex "(V?)(AND|ANDN|OR|XOR)P(S|D)(Y?)rr")>; -// x,m / v,v,m. -def : InstRW<[WriteVecLogicLd], - (instregex "(V?)(AND|ANDN|OR|XOR)P(S|D)(Y?)rm")>; - //-- Other instructions --// // VZEROUPPER. diff --git a/llvm/test/CodeGen/X86/avx2-schedule.ll b/llvm/test/CodeGen/X86/avx2-schedule.ll index e7a2f75..f7e8ccf 100644 --- a/llvm/test/CodeGen/X86/avx2-schedule.ll +++ b/llvm/test/CodeGen/X86/avx2-schedule.ll @@ -654,7 +654,7 @@ define <32 x i8> @test_pabsb(<32 x i8> %a0, <32 x i8> *%a1) { ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpabsb %ymm0, %ymm0 # sched: [3:1.00] ; GENERIC-NEXT: vpabsb (%rdi), %ymm1 # sched: [8:1.00] -; GENERIC-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:1.00] +; GENERIC-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pabsb: @@ -704,7 +704,7 @@ define <8 x i32> @test_pabsd(<8 x i32> %a0, <8 x i32> *%a1) { ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpabsd %ymm0, %ymm0 # sched: [3:1.00] ; GENERIC-NEXT: vpabsd (%rdi), %ymm1 # sched: [8:1.00] -; GENERIC-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:1.00] +; GENERIC-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pabsd: @@ -754,7 +754,7 @@ define <16 x i16> @test_pabsw(<16 x i16> %a0, <16 x i16> *%a1) { ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpabsw %ymm0, %ymm0 # sched: [3:1.00] ; GENERIC-NEXT: vpabsw (%rdi), %ymm1 # sched: [8:1.00] -; GENERIC-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:1.00] +; GENERIC-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pabsw: @@ -1367,8 +1367,8 @@ define <32 x i8> @test_palignr(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) { define <4 x i64> @test_pand(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) { ; GENERIC-LABEL: test_pand: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpand %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; GENERIC-NEXT: vpand (%rdi), %ymm0, %ymm0 # sched: [6:1.00] +; GENERIC-NEXT: vpand %ymm1, %ymm0, %ymm0 # sched: [1:0.33] +; GENERIC-NEXT: vpand (%rdi), %ymm0, %ymm0 # sched: [7:0.50] ; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [3:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -1416,8 +1416,8 @@ define <4 x i64> @test_pand(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) { define <4 x i64> @test_pandn(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) { ; GENERIC-LABEL: test_pandn: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpandn %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; GENERIC-NEXT: vpandn (%rdi), %ymm0, %ymm1 # sched: [6:1.00] +; GENERIC-NEXT: vpandn %ymm1, %ymm0, %ymm0 # sched: [1:0.33] +; GENERIC-NEXT: vpandn (%rdi), %ymm0, %ymm1 # sched: [7:0.50] ; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [3:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -5038,8 +5038,8 @@ declare <4 x i64> @llvm.x86.avx2.pmulu.dq(<8 x i32>, <8 x i32>) nounwind readnon define <4 x i64> @test_por(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) { ; GENERIC-LABEL: test_por: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; GENERIC-NEXT: vpor (%rdi), %ymm0, %ymm0 # sched: [6:1.00] +; GENERIC-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33] +; GENERIC-NEXT: vpor (%rdi), %ymm0, %ymm0 # sched: [7:0.50] ; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [3:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -5225,7 +5225,7 @@ define <16 x i16> @test_pshufhw(<16 x i16> %a0, <16 x i16> *%a1) { ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,7,6,5,4,8,9,10,11,15,14,13,12] sched: [1:1.00] ; GENERIC-NEXT: vpshufhw {{.*#+}} ymm1 = mem[0,1,2,3,5,4,7,6,8,9,10,11,13,12,15,14] sched: [6:1.00] -; GENERIC-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:1.00] +; GENERIC-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pshufhw: @@ -5274,7 +5274,7 @@ define <16 x i16> @test_pshuflw(<16 x i16> %a0, <16 x i16> *%a1) { ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[3,2,1,0,4,5,6,7,11,10,9,8,12,13,14,15] sched: [1:1.00] ; GENERIC-NEXT: vpshuflw {{.*#+}} ymm1 = mem[1,0,3,2,4,5,6,7,9,8,11,10,12,13,14,15] sched: [6:1.00] -; GENERIC-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:1.00] +; GENERIC-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pshuflw: @@ -7064,8 +7064,8 @@ define <16 x i16> @test_punpcklwd(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a define <4 x i64> @test_pxor(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) { ; GENERIC-LABEL: test_pxor: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpxor %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; GENERIC-NEXT: vpxor (%rdi), %ymm0, %ymm0 # sched: [6:1.00] +; GENERIC-NEXT: vpxor %ymm1, %ymm0, %ymm0 # sched: [1:0.33] +; GENERIC-NEXT: vpxor (%rdi), %ymm0, %ymm0 # sched: [7:0.50] ; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [3:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; diff --git a/llvm/test/CodeGen/X86/avx512-schedule.ll b/llvm/test/CodeGen/X86/avx512-schedule.ll index e76e719..cd35308 100755 --- a/llvm/test/CodeGen/X86/avx512-schedule.ll +++ b/llvm/test/CodeGen/X86/avx512-schedule.ll @@ -400,7 +400,7 @@ define <16 x i32> @vpaddd_broadcast_test(<16 x i32> %i) nounwind { define <16 x i32> @vpaddd_mask_test(<16 x i32> %i, <16 x i32> %j, <16 x i32> %mask1) nounwind readnone { ; GENERIC-LABEL: vpaddd_mask_test: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestmd %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestmd %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpaddd %zmm1, %zmm0, %zmm0 {%k1} # sched: [3:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -418,7 +418,7 @@ define <16 x i32> @vpaddd_mask_test(<16 x i32> %i, <16 x i32> %j, <16 x i32> %ma define <16 x i32> @vpaddd_maskz_test(<16 x i32> %i, <16 x i32> %j, <16 x i32> %mask1) nounwind readnone { ; GENERIC-LABEL: vpaddd_maskz_test: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestmd %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestmd %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpaddd %zmm1, %zmm0, %zmm0 {%k1} {z} # sched: [3:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -436,7 +436,7 @@ define <16 x i32> @vpaddd_maskz_test(<16 x i32> %i, <16 x i32> %j, <16 x i32> %m define <16 x i32> @vpaddd_mask_fold_test(<16 x i32> %i, <16 x i32>* %j.ptr, <16 x i32> %mask1) nounwind readnone { ; GENERIC-LABEL: vpaddd_mask_fold_test: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestmd %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestmd %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpaddd (%rdi), %zmm0, %zmm0 {%k1} # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -455,7 +455,7 @@ define <16 x i32> @vpaddd_mask_fold_test(<16 x i32> %i, <16 x i32>* %j.ptr, <16 define <16 x i32> @vpaddd_mask_broadcast_test(<16 x i32> %i, <16 x i32> %mask1) nounwind readnone { ; GENERIC-LABEL: vpaddd_mask_broadcast_test: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestmd %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestmd %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 {%k1} # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -473,7 +473,7 @@ define <16 x i32> @vpaddd_mask_broadcast_test(<16 x i32> %i, <16 x i32> %mask1) define <16 x i32> @vpaddd_maskz_fold_test(<16 x i32> %i, <16 x i32>* %j.ptr, <16 x i32> %mask1) nounwind readnone { ; GENERIC-LABEL: vpaddd_maskz_fold_test: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestmd %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestmd %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpaddd (%rdi), %zmm0, %zmm0 {%k1} {z} # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -492,7 +492,7 @@ define <16 x i32> @vpaddd_maskz_fold_test(<16 x i32> %i, <16 x i32>* %j.ptr, <16 define <16 x i32> @vpaddd_maskz_broadcast_test(<16 x i32> %i, <16 x i32> %mask1) nounwind readnone { ; GENERIC-LABEL: vpaddd_maskz_broadcast_test: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestmd %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestmd %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 {%k1} {z} # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -657,7 +657,7 @@ define <8 x i64> @addq_broadcast(<8 x i64> %a) nounwind { define <8 x i64> @orq_broadcast(<8 x i64> %a) nounwind { ; GENERIC-LABEL: orq_broadcast: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vorpd {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [8:1.00] +; GENERIC-NEXT: vorpd {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [7:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: orq_broadcast: @@ -671,7 +671,7 @@ define <8 x i64> @orq_broadcast(<8 x i64> %a) nounwind { define <16 x i32> @andd512fold(<16 x i32> %y, <16 x i32>* %x) { ; GENERIC-LABEL: andd512fold: ; GENERIC: # %bb.0: # %entry -; GENERIC-NEXT: vandps (%rdi), %zmm0, %zmm0 # sched: [8:1.00] +; GENERIC-NEXT: vandps (%rdi), %zmm0, %zmm0 # sched: [7:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: andd512fold: @@ -687,7 +687,7 @@ entry: define <8 x i64> @andqbrst(<8 x i64> %p1, i64* %ap) { ; GENERIC-LABEL: andqbrst: ; GENERIC: # %bb.0: # %entry -; GENERIC-NEXT: vandpd (%rdi){1to8}, %zmm0, %zmm0 # sched: [8:1.00] +; GENERIC-NEXT: vandpd (%rdi){1to8}, %zmm0, %zmm0 # sched: [7:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: andqbrst: @@ -705,7 +705,7 @@ entry: define <16 x float> @test_mask_vaddps(<16 x float> %dst, <16 x float> %i, ; GENERIC-LABEL: test_mask_vaddps: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestmd %zmm3, %zmm3, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestmd %zmm3, %zmm3, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vaddps %zmm2, %zmm1, %zmm0 {%k1} # sched: [3:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -725,7 +725,7 @@ define <16 x float> @test_mask_vaddps(<16 x float> %dst, <16 x float> %i, define <16 x float> @test_mask_vmulps(<16 x float> %dst, <16 x float> %i, <16 x float> %j, <16 x i32> %mask1) nounwind readnone { ; GENERIC-LABEL: test_mask_vmulps: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestmd %zmm3, %zmm3, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestmd %zmm3, %zmm3, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vmulps %zmm2, %zmm1, %zmm0 {%k1} # sched: [5:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -743,7 +743,7 @@ define <16 x float> @test_mask_vmulps(<16 x float> %dst, <16 x float> %i, <16 x define <16 x float> @test_mask_vminps(<16 x float> %dst, <16 x float> %i, <16 x float> %j, <16 x i32> %mask1) nounwind readnone { ; GENERIC-LABEL: test_mask_vminps: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestmd %zmm3, %zmm3, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestmd %zmm3, %zmm3, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vminps %zmm2, %zmm1, %zmm0 {%k1} # sched: [3:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -762,7 +762,7 @@ define <16 x float> @test_mask_vminps(<16 x float> %dst, <16 x float> %i, <16 x define <8 x double> @test_mask_vminpd(<8 x double> %dst, <8 x double> %i, <8 x double> %j, <8 x i32> %mask1) nounwind readnone { ; GENERIC-LABEL: test_mask_vminpd: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestmd %ymm3, %ymm3, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestmd %ymm3, %ymm3, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vminpd %zmm2, %zmm1, %zmm0 {%k1} # sched: [3:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -781,7 +781,7 @@ define <8 x double> @test_mask_vminpd(<8 x double> %dst, <8 x double> %i, <8 x d define <16 x float> @test_mask_vmaxps(<16 x float> %dst, <16 x float> %i, <16 x float> %j, <16 x i32> %mask1) nounwind readnone { ; GENERIC-LABEL: test_mask_vmaxps: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestmd %zmm3, %zmm3, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestmd %zmm3, %zmm3, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vmaxps %zmm2, %zmm1, %zmm0 {%k1} # sched: [3:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -800,7 +800,7 @@ define <16 x float> @test_mask_vmaxps(<16 x float> %dst, <16 x float> %i, <16 x define <8 x double> @test_mask_vmaxpd(<8 x double> %dst, <8 x double> %i, <8 x double> %j, <8 x i32> %mask1) nounwind readnone { ; GENERIC-LABEL: test_mask_vmaxpd: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestmd %ymm3, %ymm3, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestmd %ymm3, %ymm3, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vmaxpd %zmm2, %zmm1, %zmm0 {%k1} # sched: [3:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -819,7 +819,7 @@ define <8 x double> @test_mask_vmaxpd(<8 x double> %dst, <8 x double> %i, <8 x d define <16 x float> @test_mask_vsubps(<16 x float> %dst, <16 x float> %i, <16 x float> %j, <16 x i32> %mask1) nounwind readnone { ; GENERIC-LABEL: test_mask_vsubps: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestmd %zmm3, %zmm3, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestmd %zmm3, %zmm3, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vsubps %zmm2, %zmm1, %zmm0 {%k1} # sched: [3:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -837,7 +837,7 @@ define <16 x float> @test_mask_vsubps(<16 x float> %dst, <16 x float> %i, <16 x define <16 x float> @test_mask_vdivps(<16 x float> %dst, <16 x float> %i, <16 x float> %j, <16 x i32> %mask1) nounwind readnone { ; GENERIC-LABEL: test_mask_vdivps: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestmd %zmm3, %zmm3, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestmd %zmm3, %zmm3, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vdivps %zmm2, %zmm1, %zmm0 {%k1} # sched: [24:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -855,7 +855,7 @@ define <16 x float> @test_mask_vdivps(<16 x float> %dst, <16 x float> %i, <16 x define <8 x double> @test_mask_vaddpd(<8 x double> %dst, <8 x double> %i, <8 x double> %j, <8 x i64> %mask1) nounwind readnone { ; GENERIC-LABEL: test_mask_vaddpd: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestmq %zmm3, %zmm3, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestmq %zmm3, %zmm3, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vaddpd %zmm2, %zmm1, %zmm0 {%k1} # sched: [3:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -873,7 +873,7 @@ define <8 x double> @test_mask_vaddpd(<8 x double> %dst, <8 x double> %i, <8 x d define <8 x double> @test_maskz_vaddpd(<8 x double> %i, <8 x double> %j, <8 x i64> %mask1) nounwind readnone { ; GENERIC-LABEL: test_maskz_vaddpd: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestmq %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestmq %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vaddpd %zmm1, %zmm0, %zmm0 {%k1} {z} # sched: [3:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -891,7 +891,7 @@ define <8 x double> @test_maskz_vaddpd(<8 x double> %i, <8 x double> %j, <8 x i6 define <8 x double> @test_mask_fold_vaddpd(<8 x double> %dst, <8 x double> %i, <8 x double>* %j, <8 x i64> %mask1) nounwind { ; GENERIC-LABEL: test_mask_fold_vaddpd: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestmq %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestmq %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vaddpd (%rdi), %zmm1, %zmm0 {%k1} # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -910,7 +910,7 @@ define <8 x double> @test_mask_fold_vaddpd(<8 x double> %dst, <8 x double> %i, < define <8 x double> @test_maskz_fold_vaddpd(<8 x double> %i, <8 x double>* %j, <8 x i64> %mask1) nounwind { ; GENERIC-LABEL: test_maskz_fold_vaddpd: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestmq %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestmq %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vaddpd (%rdi), %zmm0, %zmm0 {%k1} {z} # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -947,7 +947,7 @@ define <8 x double> @test_broadcast_vaddpd(<8 x double> %i, double* %j) nounwind define <8 x double> @test_mask_broadcast_vaddpd(<8 x double> %dst, <8 x double> %i, double* %j, <8 x i64> %mask1) nounwind { ; GENERIC-LABEL: test_mask_broadcast_vaddpd: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestmq %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestmq %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vaddpd (%rdi){1to8}, %zmm1, %zmm1 {%k1} # sched: [8:1.00] ; GENERIC-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -971,7 +971,7 @@ define <8 x double> @test_mask_broadcast_vaddpd(<8 x double> %dst, <8 x double> define <8 x double> @test_maskz_broadcast_vaddpd(<8 x double> %i, double* %j, ; GENERIC-LABEL: test_maskz_broadcast_vaddpd: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestmq %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestmq %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vaddpd (%rdi){1to8}, %zmm0, %zmm0 {%k1} {z} # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -994,7 +994,7 @@ define <8 x double> @test_maskz_broadcast_vaddpd(<8 x double> %i, double* %j, define <16 x float> @test_fxor(<16 x float> %a) { ; GENERIC-LABEL: test_fxor: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vxorps {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:1.00] +; GENERIC-NEXT: vxorps {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [7:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_fxor: @@ -1009,7 +1009,7 @@ define <16 x float> @test_fxor(<16 x float> %a) { define <8 x float> @test_fxor_8f32(<8 x float> %a) { ; GENERIC-LABEL: test_fxor_8f32: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vxorps {{.*}}(%rip){1to8}, %ymm0, %ymm0 # sched: [8:1.00] +; GENERIC-NEXT: vxorps {{.*}}(%rip){1to8}, %ymm0, %ymm0 # sched: [7:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_fxor_8f32: @@ -1023,7 +1023,7 @@ define <8 x float> @test_fxor_8f32(<8 x float> %a) { define <8 x double> @fabs_v8f64(<8 x double> %p) ; GENERIC-LABEL: fabs_v8f64: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vandpd {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [8:1.00] +; GENERIC-NEXT: vandpd {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [7:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: fabs_v8f64: @@ -1039,7 +1039,7 @@ declare <8 x double> @llvm.fabs.v8f64(<8 x double> %p) define <16 x float> @fabs_v16f32(<16 x float> %p) ; GENERIC-LABEL: fabs_v16f32: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vandps {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:1.00] +; GENERIC-NEXT: vandps {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [7:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: fabs_v16f32: @@ -2829,7 +2829,7 @@ define <8 x float> @ubto8f32(<8 x i32> %a) { ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33] ; GENERIC-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0 # sched: [3:1.00] -; GENERIC-NEXT: vpandd {{.*}}(%rip){1to8}, %ymm0, %ymm0 # sched: [6:1.00] +; GENERIC-NEXT: vpandd {{.*}}(%rip){1to8}, %ymm0, %ymm0 # sched: [7:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: ubto8f32: @@ -2869,7 +2869,7 @@ define <4 x float> @ubto4f32(<4 x i32> %a) { ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33] ; GENERIC-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 # sched: [1:0.50] -; GENERIC-NEXT: vpandd {{.*}}(%rip){1to4}, %xmm0, %xmm0 # sched: [6:1.00] +; GENERIC-NEXT: vpandd {{.*}}(%rip){1to4}, %xmm0, %xmm0 # sched: [7:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: ubto4f32: @@ -4394,7 +4394,7 @@ define i16 @trunc_i32_to_i1(i32 %a) { ; GENERIC-NEXT: kshiftlw $1, %k0, %k0 # sched: [1:1.00] ; GENERIC-NEXT: andl $1, %edi # sched: [1:0.33] ; GENERIC-NEXT: kmovw %edi, %k1 # sched: [1:0.33] -; GENERIC-NEXT: korw %k1, %k0, %k0 # sched: [1:1.00] +; GENERIC-NEXT: korw %k1, %k0, %k0 # sched: [1:0.33] ; GENERIC-NEXT: kmovd %k0, %eax # sched: [1:0.33] ; GENERIC-NEXT: # kill: def $ax killed $ax killed $eax ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -4809,7 +4809,7 @@ define <16 x float> @test_x86_fnmsub_ps_z(<16 x float> %a0, <16 x float> %a1, <1 ; GENERIC-LABEL: test_x86_fnmsub_ps_z: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmulps %zmm1, %zmm0, %zmm0 # sched: [5:1.00] -; GENERIC-NEXT: vxorps {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:1.00] +; GENERIC-NEXT: vxorps {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [7:1.00] ; GENERIC-NEXT: vsubps %zmm2, %zmm0, %zmm0 # sched: [3:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -5029,7 +5029,7 @@ define <16 x i32> @vpandd(<16 x i32> %a, <16 x i32> %b) nounwind uwtable readnon ; GENERIC-LABEL: vpandd: ; GENERIC: # %bb.0: # %entry ; GENERIC-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:1.00] -; GENERIC-NEXT: vpandq %zmm1, %zmm0, %zmm0 # sched: [1:1.00] +; GENERIC-NEXT: vpandq %zmm1, %zmm0, %zmm0 # sched: [1:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: vpandd: @@ -5049,7 +5049,7 @@ define <16 x i32> @vpandnd(<16 x i32> %a, <16 x i32> %b) nounwind uwtable readno ; GENERIC-LABEL: vpandnd: ; GENERIC: # %bb.0: # %entry ; GENERIC-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:1.00] -; GENERIC-NEXT: vpandnq %zmm0, %zmm1, %zmm0 # sched: [1:1.00] +; GENERIC-NEXT: vpandnq %zmm0, %zmm1, %zmm0 # sched: [1:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: vpandnd: @@ -5071,7 +5071,7 @@ define <16 x i32> @vpord(<16 x i32> %a, <16 x i32> %b) nounwind uwtable readnone ; GENERIC-LABEL: vpord: ; GENERIC: # %bb.0: # %entry ; GENERIC-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:1.00] -; GENERIC-NEXT: vporq %zmm1, %zmm0, %zmm0 # sched: [1:1.00] +; GENERIC-NEXT: vporq %zmm1, %zmm0, %zmm0 # sched: [1:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: vpord: @@ -5091,7 +5091,7 @@ define <16 x i32> @vpxord(<16 x i32> %a, <16 x i32> %b) nounwind uwtable readnon ; GENERIC-LABEL: vpxord: ; GENERIC: # %bb.0: # %entry ; GENERIC-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:1.00] -; GENERIC-NEXT: vpxorq %zmm1, %zmm0, %zmm0 # sched: [1:1.00] +; GENERIC-NEXT: vpxorq %zmm1, %zmm0, %zmm0 # sched: [1:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: vpxord: @@ -5111,7 +5111,7 @@ define <8 x i64> @vpandq(<8 x i64> %a, <8 x i64> %b) nounwind uwtable readnone s ; GENERIC-LABEL: vpandq: ; GENERIC: # %bb.0: # %entry ; GENERIC-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [8:1.00] -; GENERIC-NEXT: vpandq %zmm1, %zmm0, %zmm0 # sched: [1:1.00] +; GENERIC-NEXT: vpandq %zmm1, %zmm0, %zmm0 # sched: [1:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: vpandq: @@ -5130,7 +5130,7 @@ define <8 x i64> @vpandnq(<8 x i64> %a, <8 x i64> %b) nounwind uwtable readnone ; GENERIC-LABEL: vpandnq: ; GENERIC: # %bb.0: # %entry ; GENERIC-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [8:1.00] -; GENERIC-NEXT: vpandnq %zmm0, %zmm1, %zmm0 # sched: [1:1.00] +; GENERIC-NEXT: vpandnq %zmm0, %zmm1, %zmm0 # sched: [1:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: vpandnq: @@ -5150,7 +5150,7 @@ define <8 x i64> @vporq(<8 x i64> %a, <8 x i64> %b) nounwind uwtable readnone ss ; GENERIC-LABEL: vporq: ; GENERIC: # %bb.0: # %entry ; GENERIC-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [8:1.00] -; GENERIC-NEXT: vporq %zmm1, %zmm0, %zmm0 # sched: [1:1.00] +; GENERIC-NEXT: vporq %zmm1, %zmm0, %zmm0 # sched: [1:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: vporq: @@ -5169,7 +5169,7 @@ define <8 x i64> @vpxorq(<8 x i64> %a, <8 x i64> %b) nounwind uwtable readnone s ; GENERIC-LABEL: vpxorq: ; GENERIC: # %bb.0: # %entry ; GENERIC-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [8:1.00] -; GENERIC-NEXT: vpxorq %zmm1, %zmm0, %zmm0 # sched: [1:1.00] +; GENERIC-NEXT: vpxorq %zmm1, %zmm0, %zmm0 # sched: [1:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: vpxorq: @@ -5187,7 +5187,7 @@ entry: define <64 x i8> @and_v64i8(<64 x i8> %a, <64 x i8> %b) { ; GENERIC-LABEL: and_v64i8: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vandps %zmm1, %zmm0, %zmm0 # sched: [3:1.00] +; GENERIC-NEXT: vandps %zmm1, %zmm0, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: and_v64i8: @@ -5201,7 +5201,7 @@ define <64 x i8> @and_v64i8(<64 x i8> %a, <64 x i8> %b) { define <64 x i8> @andn_v64i8(<64 x i8> %a, <64 x i8> %b) { ; GENERIC-LABEL: andn_v64i8: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vandnps %zmm0, %zmm1, %zmm0 # sched: [3:1.00] +; GENERIC-NEXT: vandnps %zmm0, %zmm1, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: andn_v64i8: @@ -5219,7 +5219,7 @@ define <64 x i8> @andn_v64i8(<64 x i8> %a, <64 x i8> %b) { define <64 x i8> @or_v64i8(<64 x i8> %a, <64 x i8> %b) { ; GENERIC-LABEL: or_v64i8: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vorps %zmm1, %zmm0, %zmm0 # sched: [3:1.00] +; GENERIC-NEXT: vorps %zmm1, %zmm0, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: or_v64i8: @@ -5233,7 +5233,7 @@ define <64 x i8> @or_v64i8(<64 x i8> %a, <64 x i8> %b) { define <64 x i8> @xor_v64i8(<64 x i8> %a, <64 x i8> %b) { ; GENERIC-LABEL: xor_v64i8: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vxorps %zmm1, %zmm0, %zmm0 # sched: [3:1.00] +; GENERIC-NEXT: vxorps %zmm1, %zmm0, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: xor_v64i8: @@ -5247,7 +5247,7 @@ define <64 x i8> @xor_v64i8(<64 x i8> %a, <64 x i8> %b) { define <32 x i16> @and_v32i16(<32 x i16> %a, <32 x i16> %b) { ; GENERIC-LABEL: and_v32i16: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vandps %zmm1, %zmm0, %zmm0 # sched: [3:1.00] +; GENERIC-NEXT: vandps %zmm1, %zmm0, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: and_v32i16: @@ -5261,7 +5261,7 @@ define <32 x i16> @and_v32i16(<32 x i16> %a, <32 x i16> %b) { define <32 x i16> @andn_v32i16(<32 x i16> %a, <32 x i16> %b) { ; GENERIC-LABEL: andn_v32i16: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vandnps %zmm0, %zmm1, %zmm0 # sched: [3:1.00] +; GENERIC-NEXT: vandnps %zmm0, %zmm1, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: andn_v32i16: @@ -5277,7 +5277,7 @@ define <32 x i16> @andn_v32i16(<32 x i16> %a, <32 x i16> %b) { define <32 x i16> @or_v32i16(<32 x i16> %a, <32 x i16> %b) { ; GENERIC-LABEL: or_v32i16: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vorps %zmm1, %zmm0, %zmm0 # sched: [3:1.00] +; GENERIC-NEXT: vorps %zmm1, %zmm0, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: or_v32i16: @@ -5291,7 +5291,7 @@ define <32 x i16> @or_v32i16(<32 x i16> %a, <32 x i16> %b) { define <32 x i16> @xor_v32i16(<32 x i16> %a, <32 x i16> %b) { ; GENERIC-LABEL: xor_v32i16: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vxorps %zmm1, %zmm0, %zmm0 # sched: [3:1.00] +; GENERIC-NEXT: vxorps %zmm1, %zmm0, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: xor_v32i16: @@ -5306,7 +5306,7 @@ define <16 x float> @masked_and_v16f32(<16 x float> %a, <16 x float> %b, <16 x f ; GENERIC-LABEL: masked_and_v16f32: ; GENERIC: # %bb.0: ; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vandps %zmm1, %zmm0, %zmm2 {%k1} # sched: [3:1.00] +; GENERIC-NEXT: vandps %zmm1, %zmm0, %zmm2 {%k1} # sched: [1:1.00] ; GENERIC-NEXT: vaddps %zmm2, %zmm3, %zmm0 # sched: [3:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -5331,7 +5331,7 @@ define <16 x float> @masked_or_v16f32(<16 x float> %a, <16 x float> %b, <16 x fl ; GENERIC-LABEL: masked_or_v16f32: ; GENERIC: # %bb.0: ; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vandps %zmm1, %zmm0, %zmm2 {%k1} # sched: [3:1.00] +; GENERIC-NEXT: vandps %zmm1, %zmm0, %zmm2 {%k1} # sched: [1:1.00] ; GENERIC-NEXT: vaddps %zmm2, %zmm3, %zmm0 # sched: [3:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -5356,7 +5356,7 @@ define <16 x float> @masked_xor_v16f32(<16 x float> %a, <16 x float> %b, <16 x f ; GENERIC-LABEL: masked_xor_v16f32: ; GENERIC: # %bb.0: ; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vandps %zmm1, %zmm0, %zmm2 {%k1} # sched: [3:1.00] +; GENERIC-NEXT: vandps %zmm1, %zmm0, %zmm2 {%k1} # sched: [1:1.00] ; GENERIC-NEXT: vaddps %zmm2, %zmm3, %zmm0 # sched: [3:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -5381,7 +5381,7 @@ define <8 x double> @masked_and_v8f64(<8 x double> %a, <8 x double> %b, <8 x dou ; GENERIC-LABEL: masked_and_v8f64: ; GENERIC: # %bb.0: ; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vandpd %zmm1, %zmm0, %zmm2 {%k1} # sched: [3:1.00] +; GENERIC-NEXT: vandpd %zmm1, %zmm0, %zmm2 {%k1} # sched: [1:1.00] ; GENERIC-NEXT: vaddpd %zmm2, %zmm3, %zmm0 # sched: [3:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -5406,7 +5406,7 @@ define <8 x double> @masked_or_v8f64(<8 x double> %a, <8 x double> %b, <8 x doub ; GENERIC-LABEL: masked_or_v8f64: ; GENERIC: # %bb.0: ; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vandpd %zmm1, %zmm0, %zmm2 {%k1} # sched: [3:1.00] +; GENERIC-NEXT: vandpd %zmm1, %zmm0, %zmm2 {%k1} # sched: [1:1.00] ; GENERIC-NEXT: vaddpd %zmm2, %zmm3, %zmm0 # sched: [3:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -5431,7 +5431,7 @@ define <8 x double> @masked_xor_v8f64(<8 x double> %a, <8 x double> %b, <8 x dou ; GENERIC-LABEL: masked_xor_v8f64: ; GENERIC: # %bb.0: ; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vandpd %zmm1, %zmm0, %zmm2 {%k1} # sched: [3:1.00] +; GENERIC-NEXT: vandpd %zmm1, %zmm0, %zmm2 {%k1} # sched: [1:1.00] ; GENERIC-NEXT: vaddpd %zmm2, %zmm3, %zmm0 # sched: [3:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -5456,7 +5456,7 @@ define <8 x i64> @test_mm512_mask_and_epi32(<8 x i64> %__src, i16 zeroext %__k, ; GENERIC-LABEL: test_mm512_mask_and_epi32: ; GENERIC: # %bb.0: # %entry ; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vandps %zmm2, %zmm1, %zmm0 {%k1} # sched: [3:1.00] +; GENERIC-NEXT: vandps %zmm2, %zmm1, %zmm0 {%k1} # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_mm512_mask_and_epi32: @@ -5478,7 +5478,7 @@ define <8 x i64> @test_mm512_mask_or_epi32(<8 x i64> %__src, i16 zeroext %__k, < ; GENERIC-LABEL: test_mm512_mask_or_epi32: ; GENERIC: # %bb.0: # %entry ; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vorps %zmm2, %zmm1, %zmm0 {%k1} # sched: [3:1.00] +; GENERIC-NEXT: vorps %zmm2, %zmm1, %zmm0 {%k1} # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_mm512_mask_or_epi32: @@ -5500,7 +5500,7 @@ define <8 x i64> @test_mm512_mask_xor_epi32(<8 x i64> %__src, i16 zeroext %__k, ; GENERIC-LABEL: test_mm512_mask_xor_epi32: ; GENERIC: # %bb.0: # %entry ; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vxorps %zmm2, %zmm1, %zmm0 {%k1} # sched: [3:1.00] +; GENERIC-NEXT: vxorps %zmm2, %zmm1, %zmm0 {%k1} # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_mm512_mask_xor_epi32: @@ -5522,7 +5522,7 @@ define <8 x double> @test_mm512_mask_xor_pd(<8 x double> %__W, i8 zeroext %__U, ; GENERIC-LABEL: test_mm512_mask_xor_pd: ; GENERIC: # %bb.0: # %entry ; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vxorpd %zmm2, %zmm1, %zmm0 {%k1} # sched: [3:1.00] +; GENERIC-NEXT: vxorpd %zmm2, %zmm1, %zmm0 {%k1} # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_mm512_mask_xor_pd: @@ -5544,7 +5544,7 @@ define <8 x double> @test_mm512_maskz_xor_pd(i8 zeroext %__U, <8 x double> %__A, ; GENERIC-LABEL: test_mm512_maskz_xor_pd: ; GENERIC: # %bb.0: # %entry ; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vxorpd %zmm1, %zmm0, %zmm0 {%k1} {z} # sched: [3:1.00] +; GENERIC-NEXT: vxorpd %zmm1, %zmm0, %zmm0 {%k1} {z} # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_mm512_maskz_xor_pd: @@ -5566,7 +5566,7 @@ define <16 x float> @test_mm512_mask_xor_ps(<16 x float> %__W, i16 zeroext %__U, ; GENERIC-LABEL: test_mm512_mask_xor_ps: ; GENERIC: # %bb.0: # %entry ; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vxorps %zmm2, %zmm1, %zmm0 {%k1} # sched: [3:1.00] +; GENERIC-NEXT: vxorps %zmm2, %zmm1, %zmm0 {%k1} # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_mm512_mask_xor_ps: @@ -5588,7 +5588,7 @@ define <16 x float> @test_mm512_maskz_xor_ps(i16 zeroext %__U, <16 x float> %__A ; GENERIC-LABEL: test_mm512_maskz_xor_ps: ; GENERIC: # %bb.0: # %entry ; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vxorps %zmm1, %zmm0, %zmm0 {%k1} {z} # sched: [3:1.00] +; GENERIC-NEXT: vxorps %zmm1, %zmm0, %zmm0 {%k1} {z} # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_mm512_maskz_xor_ps: @@ -5610,7 +5610,7 @@ define <8 x double> @test_mm512_mask_or_pd(<8 x double> %__W, i8 zeroext %__U, < ; GENERIC-LABEL: test_mm512_mask_or_pd: ; GENERIC: # %bb.0: # %entry ; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vorpd %zmm1, %zmm2, %zmm0 {%k1} # sched: [3:1.00] +; GENERIC-NEXT: vorpd %zmm1, %zmm2, %zmm0 {%k1} # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_mm512_mask_or_pd: @@ -5632,7 +5632,7 @@ define <8 x double> @test_mm512_maskz_or_pd(i8 zeroext %__U, <8 x double> %__A, ; GENERIC-LABEL: test_mm512_maskz_or_pd: ; GENERIC: # %bb.0: # %entry ; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vorpd %zmm0, %zmm1, %zmm0 {%k1} {z} # sched: [3:1.00] +; GENERIC-NEXT: vorpd %zmm0, %zmm1, %zmm0 {%k1} {z} # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_mm512_maskz_or_pd: @@ -5654,7 +5654,7 @@ define <16 x float> @test_mm512_mask_or_ps(<16 x float> %__W, i16 zeroext %__U, ; GENERIC-LABEL: test_mm512_mask_or_ps: ; GENERIC: # %bb.0: # %entry ; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vorps %zmm1, %zmm2, %zmm0 {%k1} # sched: [3:1.00] +; GENERIC-NEXT: vorps %zmm1, %zmm2, %zmm0 {%k1} # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_mm512_mask_or_ps: @@ -5676,7 +5676,7 @@ define <16 x float> @test_mm512_maskz_or_ps(i16 zeroext %__U, <16 x float> %__A, ; GENERIC-LABEL: test_mm512_maskz_or_ps: ; GENERIC: # %bb.0: # %entry ; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vorps %zmm0, %zmm1, %zmm0 {%k1} {z} # sched: [3:1.00] +; GENERIC-NEXT: vorps %zmm0, %zmm1, %zmm0 {%k1} {z} # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_mm512_maskz_or_ps: @@ -5698,7 +5698,7 @@ define <8 x double> @test_mm512_mask_and_pd(<8 x double> %__W, i8 zeroext %__U, ; GENERIC-LABEL: test_mm512_mask_and_pd: ; GENERIC: # %bb.0: # %entry ; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vandpd %zmm1, %zmm2, %zmm0 {%k1} # sched: [3:1.00] +; GENERIC-NEXT: vandpd %zmm1, %zmm2, %zmm0 {%k1} # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_mm512_mask_and_pd: @@ -5720,7 +5720,7 @@ define <8 x double> @test_mm512_maskz_and_pd(i8 zeroext %__U, <8 x double> %__A, ; GENERIC-LABEL: test_mm512_maskz_and_pd: ; GENERIC: # %bb.0: # %entry ; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vandpd %zmm0, %zmm1, %zmm0 {%k1} {z} # sched: [3:1.00] +; GENERIC-NEXT: vandpd %zmm0, %zmm1, %zmm0 {%k1} {z} # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_mm512_maskz_and_pd: @@ -5742,7 +5742,7 @@ define <16 x float> @test_mm512_mask_and_ps(<16 x float> %__W, i16 zeroext %__U, ; GENERIC-LABEL: test_mm512_mask_and_ps: ; GENERIC: # %bb.0: # %entry ; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vandps %zmm1, %zmm2, %zmm0 {%k1} # sched: [3:1.00] +; GENERIC-NEXT: vandps %zmm1, %zmm2, %zmm0 {%k1} # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_mm512_mask_and_ps: @@ -5764,7 +5764,7 @@ define <16 x float> @test_mm512_maskz_and_ps(i16 zeroext %__U, <16 x float> %__A ; GENERIC-LABEL: test_mm512_maskz_and_ps: ; GENERIC: # %bb.0: # %entry ; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vandps %zmm0, %zmm1, %zmm0 {%k1} {z} # sched: [3:1.00] +; GENERIC-NEXT: vandps %zmm0, %zmm1, %zmm0 {%k1} {z} # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_mm512_maskz_and_ps: @@ -5786,7 +5786,7 @@ define <8 x double> @test_mm512_mask_andnot_pd(<8 x double> %__W, i8 zeroext %__ ; GENERIC-LABEL: test_mm512_mask_andnot_pd: ; GENERIC: # %bb.0: # %entry ; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vandnpd %zmm2, %zmm1, %zmm0 {%k1} # sched: [3:1.00] +; GENERIC-NEXT: vandnpd %zmm2, %zmm1, %zmm0 {%k1} # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_mm512_mask_andnot_pd: @@ -5809,7 +5809,7 @@ define <8 x double> @test_mm512_maskz_andnot_pd(i8 zeroext %__U, <8 x double> %_ ; GENERIC-LABEL: test_mm512_maskz_andnot_pd: ; GENERIC: # %bb.0: # %entry ; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vandnpd %zmm1, %zmm0, %zmm0 {%k1} {z} # sched: [3:1.00] +; GENERIC-NEXT: vandnpd %zmm1, %zmm0, %zmm0 {%k1} {z} # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_mm512_maskz_andnot_pd: @@ -5832,7 +5832,7 @@ define <16 x float> @test_mm512_mask_andnot_ps(<16 x float> %__W, i16 zeroext %_ ; GENERIC-LABEL: test_mm512_mask_andnot_ps: ; GENERIC: # %bb.0: # %entry ; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vandnps %zmm2, %zmm1, %zmm0 {%k1} # sched: [3:1.00] +; GENERIC-NEXT: vandnps %zmm2, %zmm1, %zmm0 {%k1} # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_mm512_mask_andnot_ps: @@ -5855,7 +5855,7 @@ define <16 x float> @test_mm512_maskz_andnot_ps(i16 zeroext %__U, <16 x float> % ; GENERIC-LABEL: test_mm512_maskz_andnot_ps: ; GENERIC: # %bb.0: # %entry ; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vandnps %zmm1, %zmm0, %zmm0 {%k1} {z} # sched: [3:1.00] +; GENERIC-NEXT: vandnps %zmm1, %zmm0, %zmm0 {%k1} {z} # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_mm512_maskz_andnot_ps: @@ -6349,7 +6349,7 @@ define <16 x float> @mov_test31(i8 * %addr) { define <16 x i32> @mov_test32(i8 * %addr, <16 x i32> %old, <16 x i32> %mask1) { ; GENERIC-LABEL: mov_test32: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestmd %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestmd %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vmovdqa32 (%rdi), %zmm0 {%k1} # sched: [6:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -6368,7 +6368,7 @@ define <16 x i32> @mov_test32(i8 * %addr, <16 x i32> %old, <16 x i32> %mask1) { define <16 x i32> @mov_test33(i8 * %addr, <16 x i32> %old, <16 x i32> %mask1) { ; GENERIC-LABEL: mov_test33: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestmd %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestmd %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vmovdqu32 (%rdi), %zmm0 {%k1} # sched: [6:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -6387,7 +6387,7 @@ define <16 x i32> @mov_test33(i8 * %addr, <16 x i32> %old, <16 x i32> %mask1) { define <16 x i32> @mov_test34(i8 * %addr, <16 x i32> %mask1) { ; GENERIC-LABEL: mov_test34: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestmd %zmm0, %zmm0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestmd %zmm0, %zmm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vmovdqa32 (%rdi), %zmm0 {%k1} {z} # sched: [6:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -6406,7 +6406,7 @@ define <16 x i32> @mov_test34(i8 * %addr, <16 x i32> %mask1) { define <16 x i32> @mov_test35(i8 * %addr, <16 x i32> %mask1) { ; GENERIC-LABEL: mov_test35: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestmd %zmm0, %zmm0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestmd %zmm0, %zmm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vmovdqu32 (%rdi), %zmm0 {%k1} {z} # sched: [6:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -6425,7 +6425,7 @@ define <16 x i32> @mov_test35(i8 * %addr, <16 x i32> %mask1) { define <8 x i64> @mov_test36(i8 * %addr, <8 x i64> %old, <8 x i64> %mask1) { ; GENERIC-LABEL: mov_test36: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestmq %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestmq %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vmovdqa64 (%rdi), %zmm0 {%k1} # sched: [6:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -6444,7 +6444,7 @@ define <8 x i64> @mov_test36(i8 * %addr, <8 x i64> %old, <8 x i64> %mask1) { define <8 x i64> @mov_test37(i8 * %addr, <8 x i64> %old, <8 x i64> %mask1) { ; GENERIC-LABEL: mov_test37: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestmq %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestmq %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vmovdqu64 (%rdi), %zmm0 {%k1} # sched: [6:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -6463,7 +6463,7 @@ define <8 x i64> @mov_test37(i8 * %addr, <8 x i64> %old, <8 x i64> %mask1) { define <8 x i64> @mov_test38(i8 * %addr, <8 x i64> %mask1) { ; GENERIC-LABEL: mov_test38: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestmq %zmm0, %zmm0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestmq %zmm0, %zmm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vmovdqa64 (%rdi), %zmm0 {%k1} {z} # sched: [6:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -6482,7 +6482,7 @@ define <8 x i64> @mov_test38(i8 * %addr, <8 x i64> %mask1) { define <8 x i64> @mov_test39(i8 * %addr, <8 x i64> %mask1) { ; GENERIC-LABEL: mov_test39: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestmq %zmm0, %zmm0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestmq %zmm0, %zmm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vmovdqu64 (%rdi), %zmm0 {%k1} {z} # sched: [6:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -6744,7 +6744,7 @@ define void @mask16_mem(i16* %ptr) { ; GENERIC-LABEL: mask16_mem: ; GENERIC: # %bb.0: ; GENERIC-NEXT: kmovw (%rdi), %k0 # sched: [5:0.50] -; GENERIC-NEXT: knotw %k0, %k0 # sched: [1:1.00] +; GENERIC-NEXT: knotw %k0, %k0 # sched: [1:0.33] ; GENERIC-NEXT: kmovw %k0, (%rdi) # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -6766,7 +6766,7 @@ define void @mask8_mem(i8* %ptr) { ; GENERIC-LABEL: mask8_mem: ; GENERIC: # %bb.0: ; GENERIC-NEXT: kmovb (%rdi), %k0 # sched: [5:0.50] -; GENERIC-NEXT: knotb %k0, %k0 # sched: [1:1.00] +; GENERIC-NEXT: knotb %k0, %k0 # sched: [1:0.33] ; GENERIC-NEXT: kmovb %k0, (%rdi) # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -6816,9 +6816,9 @@ define i16 @mand16_mem(<16 x i1>* %x, <16 x i1>* %y) { ; GENERIC: # %bb.0: ; GENERIC-NEXT: kmovw (%rdi), %k0 # sched: [5:0.50] ; GENERIC-NEXT: kmovw (%rsi), %k1 # sched: [5:0.50] -; GENERIC-NEXT: kandw %k1, %k0, %k2 # sched: [1:1.00] -; GENERIC-NEXT: kxorw %k1, %k0, %k0 # sched: [1:1.00] -; GENERIC-NEXT: korw %k0, %k2, %k0 # sched: [1:1.00] +; GENERIC-NEXT: kandw %k1, %k0, %k2 # sched: [1:0.33] +; GENERIC-NEXT: kxorw %k1, %k0, %k0 # sched: [1:0.33] +; GENERIC-NEXT: korw %k0, %k2, %k0 # sched: [1:0.33] ; GENERIC-NEXT: kmovd %k0, %eax # sched: [1:0.33] ; GENERIC-NEXT: # kill: def $ax killed $ax killed $eax ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -7053,7 +7053,7 @@ define <16 x i8> @vcmp_test8(<16 x i32>%a, <16 x i32>%b, i32 %a1, i32 %b1) { ; GENERIC-NEXT: cmpl %esi, %edi # sched: [1:0.33] ; GENERIC-NEXT: jg .LBB386_1 # sched: [1:1.00] ; GENERIC-NEXT: # %bb.2: -; GENERIC-NEXT: kxorw %k0, %k0, %k0 # sched: [1:1.00] +; GENERIC-NEXT: kxorw %k0, %k0, %k0 # sched: [1:0.33] ; GENERIC-NEXT: vpmovm2b %k0, %xmm0 # sched: [1:0.33] ; GENERIC-NEXT: vzeroupper # sched: [100:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -7230,10 +7230,10 @@ define <64 x i8> @vmov_test16(i64 %x) { ; GENERIC-NEXT: movb $1, %al # sched: [1:0.33] ; GENERIC-NEXT: kmovd %eax, %k1 # sched: [1:0.33] ; GENERIC-NEXT: kshiftrq $5, %k0, %k2 # sched: [1:1.00] -; GENERIC-NEXT: kxorq %k1, %k2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: kxorq %k1, %k2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: kshiftlq $63, %k1, %k1 # sched: [1:1.00] ; GENERIC-NEXT: kshiftrq $58, %k1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: kxorq %k1, %k0, %k0 # sched: [1:1.00] +; GENERIC-NEXT: kxorq %k1, %k0, %k0 # sched: [1:0.33] ; GENERIC-NEXT: vpmovm2b %k0, %zmm0 # sched: [1:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -7264,10 +7264,10 @@ define <64 x i8> @vmov_test17(i64 %x, i32 %y, i32 %z) { ; GENERIC-NEXT: setg %al # sched: [1:0.50] ; GENERIC-NEXT: kmovd %eax, %k1 # sched: [1:0.33] ; GENERIC-NEXT: kshiftrq $5, %k0, %k2 # sched: [1:1.00] -; GENERIC-NEXT: kxorq %k1, %k2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: kxorq %k1, %k2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: kshiftlq $63, %k1, %k1 # sched: [1:1.00] ; GENERIC-NEXT: kshiftrq $58, %k1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: kxorq %k1, %k0, %k0 # sched: [1:1.00] +; GENERIC-NEXT: kxorq %k1, %k0, %k0 # sched: [1:0.33] ; GENERIC-NEXT: vpmovm2b %k0, %zmm0 # sched: [1:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -7299,14 +7299,14 @@ define <8 x i1> @vmov_test18(i8 %a, i16 %y) { ; GENERIC-NEXT: kshiftrw $8, %k2, %k0 # sched: [1:1.00] ; GENERIC-NEXT: kshiftrw $9, %k2, %k2 # sched: [1:1.00] ; GENERIC-NEXT: kshiftrb $6, %k1, %k3 # sched: [1:1.00] -; GENERIC-NEXT: kxorb %k2, %k3, %k2 # sched: [1:1.00] +; GENERIC-NEXT: kxorb %k2, %k3, %k2 # sched: [1:0.33] ; GENERIC-NEXT: kshiftlb $7, %k2, %k2 # sched: [1:1.00] ; GENERIC-NEXT: kshiftrb $1, %k2, %k2 # sched: [1:1.00] -; GENERIC-NEXT: kxorb %k2, %k1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: kxorb %k2, %k1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: kshiftlb $1, %k1, %k1 # sched: [1:1.00] ; GENERIC-NEXT: kshiftrb $1, %k1, %k1 # sched: [1:1.00] ; GENERIC-NEXT: kshiftlb $7, %k0, %k0 # sched: [1:1.00] -; GENERIC-NEXT: korb %k0, %k1, %k0 # sched: [1:1.00] +; GENERIC-NEXT: korb %k0, %k1, %k0 # sched: [1:0.33] ; GENERIC-NEXT: vpmovm2w %k0, %xmm0 # sched: [1:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -7393,8 +7393,8 @@ define void @store_v1i1(<1 x i1> %c , <1 x i1>* %ptr) { ; GENERIC-LABEL: store_v1i1: ; GENERIC: # %bb.0: ; GENERIC-NEXT: kmovd %edi, %k0 # sched: [1:0.33] -; GENERIC-NEXT: kxnorw %k0, %k0, %k1 # sched: [1:1.00] -; GENERIC-NEXT: kxorw %k1, %k0, %k0 # sched: [1:1.00] +; GENERIC-NEXT: kxnorw %k0, %k0, %k1 # sched: [1:0.33] +; GENERIC-NEXT: kxorw %k1, %k0, %k0 # sched: [1:0.33] ; GENERIC-NEXT: kmovb %k0, (%rsi) # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -7415,7 +7415,7 @@ define void @store_v2i1(<2 x i1> %c , <2 x i1>* %ptr) { ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpsllq $63, %xmm0, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: vpmovq2m %xmm0, %k0 # sched: [1:0.33] -; GENERIC-NEXT: knotw %k0, %k0 # sched: [1:1.00] +; GENERIC-NEXT: knotw %k0, %k0 # sched: [1:0.33] ; GENERIC-NEXT: kmovb %k0, (%rdi) # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -7436,7 +7436,7 @@ define void @store_v4i1(<4 x i1> %c , <4 x i1>* %ptr) { ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: vpmovd2m %xmm0, %k0 # sched: [1:0.33] -; GENERIC-NEXT: knotw %k0, %k0 # sched: [1:1.00] +; GENERIC-NEXT: knotw %k0, %k0 # sched: [1:0.33] ; GENERIC-NEXT: kmovb %k0, (%rdi) # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -7457,7 +7457,7 @@ define void @store_v8i1(<8 x i1> %c , <8 x i1>* %ptr) { ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: vpmovw2m %xmm0, %k0 # sched: [1:0.33] -; GENERIC-NEXT: knotb %k0, %k0 # sched: [1:1.00] +; GENERIC-NEXT: knotb %k0, %k0 # sched: [1:0.33] ; GENERIC-NEXT: kmovb %k0, (%rdi) # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -7478,7 +7478,7 @@ define void @store_v16i1(<16 x i1> %c , <16 x i1>* %ptr) { ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpsllw $7, %xmm0, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: vpmovb2m %xmm0, %k0 # sched: [1:0.33] -; GENERIC-NEXT: knotw %k0, %k0 # sched: [1:1.00] +; GENERIC-NEXT: knotw %k0, %k0 # sched: [1:0.33] ; GENERIC-NEXT: kmovw %k0, (%rdi) # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -7607,7 +7607,7 @@ define void @ktest_1(<8 x double> %in, double * %base) { ; GENERIC-NEXT: vcmpltpd %zmm0, %zmm1, %k1 # sched: [3:1.00] ; GENERIC-NEXT: vmovupd 8(%rdi), %zmm1 {%k1} {z} # sched: [6:0.50] ; GENERIC-NEXT: vcmpltpd %zmm1, %zmm0, %k0 {%k1} # sched: [3:1.00] -; GENERIC-NEXT: kortestb %k0, %k0 # sched: [1:1.00] +; GENERIC-NEXT: kortestb %k0, %k0 # sched: [1:0.33] ; GENERIC-NEXT: je .LBB410_2 # sched: [1:1.00] ; GENERIC-NEXT: # %bb.1: # %L1 ; GENERIC-NEXT: vmovapd %zmm0, (%rdi) # sched: [1:1.00] @@ -7675,7 +7675,7 @@ define void @ktest_2(<32 x float> %in, float * %base) { ; GENERIC-NEXT: vcmpltps %zmm3, %zmm0, %k1 # sched: [3:1.00] ; GENERIC-NEXT: vcmpltps %zmm2, %zmm1, %k2 # sched: [3:1.00] ; GENERIC-NEXT: kunpckwd %k1, %k2, %k1 # sched: [1:1.00] -; GENERIC-NEXT: kortestd %k1, %k0 # sched: [1:1.00] +; GENERIC-NEXT: kortestd %k1, %k0 # sched: [1:0.33] ; GENERIC-NEXT: je .LBB411_2 # sched: [1:1.00] ; GENERIC-NEXT: # %bb.1: # %L1 ; GENERIC-NEXT: vmovaps %zmm0, (%rdi) # sched: [1:1.00] @@ -7962,7 +7962,7 @@ define void @store_64i1(<64 x i1>* %a, <64 x i1> %v) { define i32 @test_bitcast_v8i1_zext(<16 x i32> %a) { ; GENERIC-LABEL: test_bitcast_v8i1_zext: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm0, %zmm0, %k0 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm0, %zmm0, %k0 # sched: [1:0.33] ; GENERIC-NEXT: kmovb %k0, %eax # sched: [1:0.33] ; GENERIC-NEXT: addl %eax, %eax # sched: [1:0.33] ; GENERIC-NEXT: vzeroupper # sched: [100:0.33] @@ -7986,7 +7986,7 @@ define i32 @test_bitcast_v8i1_zext(<16 x i32> %a) { define i32 @test_bitcast_v16i1_zext(<16 x i32> %a) { ; GENERIC-LABEL: test_bitcast_v16i1_zext: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm0, %zmm0, %k0 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm0, %zmm0, %k0 # sched: [1:0.33] ; GENERIC-NEXT: kmovw %k0, %eax # sched: [1:0.33] ; GENERIC-NEXT: addl %eax, %eax # sched: [1:0.33] ; GENERIC-NEXT: vzeroupper # sched: [100:0.33] @@ -8011,7 +8011,7 @@ define i16 @test_v16i1_add(i16 %x, i16 %y) { ; GENERIC: # %bb.0: ; GENERIC-NEXT: kmovd %edi, %k0 # sched: [1:0.33] ; GENERIC-NEXT: kmovd %esi, %k1 # sched: [1:0.33] -; GENERIC-NEXT: kxorw %k1, %k0, %k0 # sched: [1:1.00] +; GENERIC-NEXT: kxorw %k1, %k0, %k0 # sched: [1:0.33] ; GENERIC-NEXT: kmovd %k0, %eax # sched: [1:0.33] ; GENERIC-NEXT: # kill: def $ax killed $ax killed $eax ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -8036,7 +8036,7 @@ define i16 @test_v16i1_sub(i16 %x, i16 %y) { ; GENERIC: # %bb.0: ; GENERIC-NEXT: kmovd %edi, %k0 # sched: [1:0.33] ; GENERIC-NEXT: kmovd %esi, %k1 # sched: [1:0.33] -; GENERIC-NEXT: kxorw %k1, %k0, %k0 # sched: [1:1.00] +; GENERIC-NEXT: kxorw %k1, %k0, %k0 # sched: [1:0.33] ; GENERIC-NEXT: kmovd %k0, %eax # sched: [1:0.33] ; GENERIC-NEXT: # kill: def $ax killed $ax killed $eax ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -8061,7 +8061,7 @@ define i16 @test_v16i1_mul(i16 %x, i16 %y) { ; GENERIC: # %bb.0: ; GENERIC-NEXT: kmovd %edi, %k0 # sched: [1:0.33] ; GENERIC-NEXT: kmovd %esi, %k1 # sched: [1:0.33] -; GENERIC-NEXT: kandw %k1, %k0, %k0 # sched: [1:1.00] +; GENERIC-NEXT: kandw %k1, %k0, %k0 # sched: [1:0.33] ; GENERIC-NEXT: kmovd %k0, %eax # sched: [1:0.33] ; GENERIC-NEXT: # kill: def $ax killed $ax killed $eax ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -8086,7 +8086,7 @@ define i8 @test_v8i1_add(i8 %x, i8 %y) { ; GENERIC: # %bb.0: ; GENERIC-NEXT: kmovd %edi, %k0 # sched: [1:0.33] ; GENERIC-NEXT: kmovd %esi, %k1 # sched: [1:0.33] -; GENERIC-NEXT: kxorb %k1, %k0, %k0 # sched: [1:1.00] +; GENERIC-NEXT: kxorb %k1, %k0, %k0 # sched: [1:0.33] ; GENERIC-NEXT: kmovd %k0, %eax # sched: [1:0.33] ; GENERIC-NEXT: # kill: def $al killed $al killed $eax ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -8111,7 +8111,7 @@ define i8 @test_v8i1_sub(i8 %x, i8 %y) { ; GENERIC: # %bb.0: ; GENERIC-NEXT: kmovd %edi, %k0 # sched: [1:0.33] ; GENERIC-NEXT: kmovd %esi, %k1 # sched: [1:0.33] -; GENERIC-NEXT: kxorb %k1, %k0, %k0 # sched: [1:1.00] +; GENERIC-NEXT: kxorb %k1, %k0, %k0 # sched: [1:0.33] ; GENERIC-NEXT: kmovd %k0, %eax # sched: [1:0.33] ; GENERIC-NEXT: # kill: def $al killed $al killed $eax ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -8136,7 +8136,7 @@ define i8 @test_v8i1_mul(i8 %x, i8 %y) { ; GENERIC: # %bb.0: ; GENERIC-NEXT: kmovd %edi, %k0 # sched: [1:0.33] ; GENERIC-NEXT: kmovd %esi, %k1 # sched: [1:0.33] -; GENERIC-NEXT: kandb %k1, %k0, %k0 # sched: [1:1.00] +; GENERIC-NEXT: kandb %k1, %k0, %k0 # sched: [1:0.33] ; GENERIC-NEXT: kmovd %k0, %eax # sched: [1:0.33] ; GENERIC-NEXT: # kill: def $al killed $al killed $eax ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -8218,7 +8218,7 @@ define <16 x float> @_inreg16xfloat(float %a) { define <16 x float> @_ss16xfloat_mask(float %a, <16 x float> %i, <16 x i32> %mask1) { ; GENERIC-LABEL: _ss16xfloat_mask: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestmd %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestmd %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vbroadcastss %xmm0, %zmm1 {%k1} # sched: [1:1.00] ; GENERIC-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -8239,7 +8239,7 @@ define <16 x float> @_ss16xfloat_mask(float %a, <16 x float> %i, <16 x i32> %m define <16 x float> @_ss16xfloat_maskz(float %a, <16 x i32> %mask1) { ; GENERIC-LABEL: _ss16xfloat_maskz: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestmd %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestmd %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vbroadcastss %xmm0, %zmm0 {%k1} {z} # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -8274,7 +8274,7 @@ define <16 x float> @_ss16xfloat_load(float* %a.ptr) { define <16 x float> @_ss16xfloat_mask_load(float* %a.ptr, <16 x float> %i, <16 x i32> %mask1) { ; GENERIC-LABEL: _ss16xfloat_mask_load: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestmd %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestmd %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vbroadcastss (%rdi), %zmm0 {%k1} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -8294,7 +8294,7 @@ define <16 x float> @_ss16xfloat_mask_load(float* %a.ptr, <16 x float> %i, <16 define <16 x float> @_ss16xfloat_maskz_load(float* %a.ptr, <16 x i32> %mask1) { ; GENERIC-LABEL: _ss16xfloat_maskz_load: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestmd %zmm0, %zmm0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestmd %zmm0, %zmm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vbroadcastss (%rdi), %zmm0 {%k1} {z} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -8329,7 +8329,7 @@ define <8 x double> @_inreg8xdouble(double %a) { define <8 x double> @_sd8xdouble_mask(double %a, <8 x double> %i, <8 x i32> %mask1) { ; GENERIC-LABEL: _sd8xdouble_mask: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestmd %ymm2, %ymm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestmd %ymm2, %ymm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vbroadcastsd %xmm0, %zmm1 {%k1} # sched: [1:1.00] ; GENERIC-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -8350,7 +8350,7 @@ define <8 x double> @_sd8xdouble_mask(double %a, <8 x double> %i, <8 x i32> %m define <8 x double> @_sd8xdouble_maskz(double %a, <8 x i32> %mask1) { ; GENERIC-LABEL: _sd8xdouble_maskz: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestmd %ymm1, %ymm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestmd %ymm1, %ymm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vbroadcastsd %xmm0, %zmm0 {%k1} {z} # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -8385,7 +8385,7 @@ define <8 x double> @_sd8xdouble_load(double* %a.ptr) { define <8 x double> @_sd8xdouble_mask_load(double* %a.ptr, <8 x double> %i, <8 x i32> %mask1) { ; GENERIC-LABEL: _sd8xdouble_mask_load: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestmd %ymm1, %ymm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestmd %ymm1, %ymm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vbroadcastsd (%rdi), %zmm0 {%k1} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -8405,7 +8405,7 @@ define <8 x double> @_sd8xdouble_mask_load(double* %a.ptr, <8 x double> %i, <8 define <8 x double> @_sd8xdouble_maskz_load(double* %a.ptr, <8 x i32> %mask1) { ; GENERIC-LABEL: _sd8xdouble_maskz_load: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestmd %ymm0, %ymm0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestmd %ymm0, %ymm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vbroadcastsd (%rdi), %zmm0 {%k1} {z} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -8456,7 +8456,7 @@ define <16 x i32> @test_vbroadcast() { ; GENERIC-NEXT: vxorps %xmm0, %xmm0, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: vcmpunordps %zmm0, %zmm0, %k0 # sched: [3:1.00] ; GENERIC-NEXT: vpmovm2d %k0, %zmm0 # sched: [1:0.33] -; GENERIC-NEXT: knotw %k0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: knotw %k0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z} # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; diff --git a/llvm/test/CodeGen/X86/avx512-shuffle-schedule.ll b/llvm/test/CodeGen/X86/avx512-shuffle-schedule.ll index 325dad9..d1af9d4 100755 --- a/llvm/test/CodeGen/X86/avx512-shuffle-schedule.ll +++ b/llvm/test/CodeGen/X86/avx512-shuffle-schedule.ll @@ -23,7 +23,7 @@ define <16 x i16> @test_masked_16xi16_perm_mask0(<16 x i16> %vec, <16 x i16> %ve ; GENERIC-LABEL: test_masked_16xi16_perm_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa {{.*#+}} ymm3 = [8,6,12,4,7,9,14,8,4,12,9,4,14,15,12,14] sched: [7:0.50] -; GENERIC-NEXT: vptestnmw %ymm2, %ymm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %ymm2, %ymm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermw %ymm0, %ymm3, %ymm1 {%k1} # sched: [1:1.00] ; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -45,7 +45,7 @@ define <16 x i16> @test_masked_z_16xi16_perm_mask0(<16 x i16> %vec, <16 x i16> % ; GENERIC-LABEL: test_masked_z_16xi16_perm_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa {{.*#+}} ymm2 = [8,6,12,4,7,9,14,8,4,12,9,4,14,15,12,14] sched: [7:0.50] -; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermw %ymm0, %ymm2, %ymm0 {%k1} {z} # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -64,7 +64,7 @@ define <16 x i16> @test_masked_16xi16_perm_mask1(<16 x i16> %vec, <16 x i16> %ve ; GENERIC-LABEL: test_masked_16xi16_perm_mask1: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa {{.*#+}} ymm3 = [4,11,14,10,7,1,6,9,14,15,7,13,4,12,8,0] sched: [7:0.50] -; GENERIC-NEXT: vptestnmw %ymm2, %ymm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %ymm2, %ymm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermw %ymm0, %ymm3, %ymm1 {%k1} # sched: [1:1.00] ; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -86,7 +86,7 @@ define <16 x i16> @test_masked_z_16xi16_perm_mask1(<16 x i16> %vec, <16 x i16> % ; GENERIC-LABEL: test_masked_z_16xi16_perm_mask1: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa {{.*#+}} ymm2 = [4,11,14,10,7,1,6,9,14,15,7,13,4,12,8,0] sched: [7:0.50] -; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermw %ymm0, %ymm2, %ymm0 {%k1} {z} # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -105,7 +105,7 @@ define <16 x i16> @test_masked_16xi16_perm_mask2(<16 x i16> %vec, <16 x i16> %ve ; GENERIC-LABEL: test_masked_16xi16_perm_mask2: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa {{.*#+}} ymm3 = [11,6,13,10,0,7,13,3,5,13,3,9,3,15,12,7] sched: [7:0.50] -; GENERIC-NEXT: vptestnmw %ymm2, %ymm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %ymm2, %ymm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermw %ymm0, %ymm3, %ymm1 {%k1} # sched: [1:1.00] ; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -127,7 +127,7 @@ define <16 x i16> @test_masked_z_16xi16_perm_mask2(<16 x i16> %vec, <16 x i16> % ; GENERIC-LABEL: test_masked_z_16xi16_perm_mask2: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa {{.*#+}} ymm2 = [11,6,13,10,0,7,13,3,5,13,3,9,3,15,12,7] sched: [7:0.50] -; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermw %ymm0, %ymm2, %ymm0 {%k1} {z} # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -161,7 +161,7 @@ define <16 x i16> @test_masked_16xi16_perm_mask3(<16 x i16> %vec, <16 x i16> %ve ; GENERIC-LABEL: test_masked_16xi16_perm_mask3: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa {{.*#+}} ymm3 = [1,5,8,14,1,8,11,8,13,8,15,9,9,7,9,6] sched: [7:0.50] -; GENERIC-NEXT: vptestnmw %ymm2, %ymm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %ymm2, %ymm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermw %ymm0, %ymm3, %ymm1 {%k1} # sched: [1:1.00] ; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -183,7 +183,7 @@ define <16 x i16> @test_masked_z_16xi16_perm_mask3(<16 x i16> %vec, <16 x i16> % ; GENERIC-LABEL: test_masked_z_16xi16_perm_mask3: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa {{.*#+}} ymm2 = [1,5,8,14,1,8,11,8,13,8,15,9,9,7,9,6] sched: [7:0.50] -; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermw %ymm0, %ymm2, %ymm0 {%k1} {z} # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -218,7 +218,7 @@ define <16 x i16> @test_masked_16xi16_perm_mem_mask0(<16 x i16>* %vp, <16 x i16> ; GENERIC-LABEL: test_masked_16xi16_perm_mem_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa {{.*#+}} ymm2 = [9,10,7,1,12,14,14,13,14,14,8,6,11,4,12,13] sched: [7:0.50] -; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermw (%rdi), %ymm2, %ymm0 {%k1} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -239,7 +239,7 @@ define <16 x i16> @test_masked_z_16xi16_perm_mem_mask0(<16 x i16>* %vp, <16 x i1 ; GENERIC-LABEL: test_masked_z_16xi16_perm_mem_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa {{.*#+}} ymm1 = [9,10,7,1,12,14,14,13,14,14,8,6,11,4,12,13] sched: [7:0.50] -; GENERIC-NEXT: vptestnmw %ymm0, %ymm0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %ymm0, %ymm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermw (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -260,7 +260,7 @@ define <16 x i16> @test_masked_16xi16_perm_mem_mask1(<16 x i16>* %vp, <16 x i16> ; GENERIC-LABEL: test_masked_16xi16_perm_mem_mask1: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa {{.*#+}} ymm2 = [14,9,15,9,7,10,15,14,12,1,9,7,10,13,3,11] sched: [7:0.50] -; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermw (%rdi), %ymm2, %ymm0 {%k1} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -281,7 +281,7 @@ define <16 x i16> @test_masked_z_16xi16_perm_mem_mask1(<16 x i16>* %vp, <16 x i1 ; GENERIC-LABEL: test_masked_z_16xi16_perm_mem_mask1: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa {{.*#+}} ymm1 = [14,9,15,9,7,10,15,14,12,1,9,7,10,13,3,11] sched: [7:0.50] -; GENERIC-NEXT: vptestnmw %ymm0, %ymm0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %ymm0, %ymm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermw (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -302,7 +302,7 @@ define <16 x i16> @test_masked_16xi16_perm_mem_mask2(<16 x i16>* %vp, <16 x i16> ; GENERIC-LABEL: test_masked_16xi16_perm_mem_mask2: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa {{.*#+}} ymm2 = [1,3,12,5,13,1,2,11,0,9,14,8,10,0,10,9] sched: [7:0.50] -; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermw (%rdi), %ymm2, %ymm0 {%k1} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -323,7 +323,7 @@ define <16 x i16> @test_masked_z_16xi16_perm_mem_mask2(<16 x i16>* %vp, <16 x i1 ; GENERIC-LABEL: test_masked_z_16xi16_perm_mem_mask2: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa {{.*#+}} ymm1 = [1,3,12,5,13,1,2,11,0,9,14,8,10,0,10,9] sched: [7:0.50] -; GENERIC-NEXT: vptestnmw %ymm0, %ymm0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %ymm0, %ymm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermw (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -360,7 +360,7 @@ define <16 x i16> @test_masked_16xi16_perm_mem_mask3(<16 x i16>* %vp, <16 x i16> ; GENERIC-LABEL: test_masked_16xi16_perm_mem_mask3: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa {{.*#+}} ymm2 = [9,6,5,15,0,0,15,2,1,3,12,14,0,6,1,4] sched: [7:0.50] -; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermw (%rdi), %ymm2, %ymm0 {%k1} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -381,7 +381,7 @@ define <16 x i16> @test_masked_z_16xi16_perm_mem_mask3(<16 x i16>* %vp, <16 x i1 ; GENERIC-LABEL: test_masked_z_16xi16_perm_mem_mask3: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa {{.*#+}} ymm1 = [9,6,5,15,0,0,15,2,1,3,12,14,0,6,1,4] sched: [7:0.50] -; GENERIC-NEXT: vptestnmw %ymm0, %ymm0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %ymm0, %ymm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermw (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -417,7 +417,7 @@ define <32 x i16> @test_masked_32xi16_perm_mask0(<32 x i16> %vec, <32 x i16> %ve ; GENERIC-LABEL: test_masked_32xi16_perm_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm3 = [16,1,3,31,6,11,23,26,29,5,21,30,1,21,27,10,8,19,14,5,15,13,18,16,9,11,26,8,17,0,23,10] sched: [6:0.50] -; GENERIC-NEXT: vptestnmw %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermw %zmm0, %zmm3, %zmm1 {%k1} # sched: [1:1.00] ; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -439,7 +439,7 @@ define <32 x i16> @test_masked_z_32xi16_perm_mask0(<32 x i16> %vec, <32 x i16> % ; GENERIC-LABEL: test_masked_z_32xi16_perm_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [16,1,3,31,6,11,23,26,29,5,21,30,1,21,27,10,8,19,14,5,15,13,18,16,9,11,26,8,17,0,23,10] sched: [6:0.50] -; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermw %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -458,7 +458,7 @@ define <32 x i16> @test_masked_32xi16_perm_mask1(<32 x i16> %vec, <32 x i16> %ve ; GENERIC-LABEL: test_masked_32xi16_perm_mask1: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm3 = [1,8,7,30,11,9,11,30,20,19,22,12,13,20,0,6,10,7,20,12,28,18,13,12,22,13,21,1,14,8,5,16] sched: [6:0.50] -; GENERIC-NEXT: vptestnmw %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermw %zmm0, %zmm3, %zmm1 {%k1} # sched: [1:1.00] ; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -480,7 +480,7 @@ define <32 x i16> @test_masked_z_32xi16_perm_mask1(<32 x i16> %vec, <32 x i16> % ; GENERIC-LABEL: test_masked_z_32xi16_perm_mask1: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [1,8,7,30,11,9,11,30,20,19,22,12,13,20,0,6,10,7,20,12,28,18,13,12,22,13,21,1,14,8,5,16] sched: [6:0.50] -; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermw %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -499,7 +499,7 @@ define <32 x i16> @test_masked_32xi16_perm_mask2(<32 x i16> %vec, <32 x i16> %ve ; GENERIC-LABEL: test_masked_32xi16_perm_mask2: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm3 = [15,17,24,28,15,9,14,25,28,25,6,31,20,2,23,31,12,21,10,6,22,0,26,16,3,3,20,27,8,31,3,27] sched: [6:0.50] -; GENERIC-NEXT: vptestnmw %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermw %zmm0, %zmm3, %zmm1 {%k1} # sched: [1:1.00] ; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -521,7 +521,7 @@ define <32 x i16> @test_masked_z_32xi16_perm_mask2(<32 x i16> %vec, <32 x i16> % ; GENERIC-LABEL: test_masked_z_32xi16_perm_mask2: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [15,17,24,28,15,9,14,25,28,25,6,31,20,2,23,31,12,21,10,6,22,0,26,16,3,3,20,27,8,31,3,27] sched: [6:0.50] -; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermw %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -555,7 +555,7 @@ define <32 x i16> @test_masked_32xi16_perm_mask3(<32 x i16> %vec, <32 x i16> %ve ; GENERIC-LABEL: test_masked_32xi16_perm_mask3: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm3 = [12,2,8,14,25,27,4,16,20,11,27,8,0,1,21,17,30,30,29,1,23,22,20,22,28,20,11,17,6,18,0,4] sched: [6:0.50] -; GENERIC-NEXT: vptestnmw %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermw %zmm0, %zmm3, %zmm1 {%k1} # sched: [1:1.00] ; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -577,7 +577,7 @@ define <32 x i16> @test_masked_z_32xi16_perm_mask3(<32 x i16> %vec, <32 x i16> % ; GENERIC-LABEL: test_masked_z_32xi16_perm_mask3: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [12,2,8,14,25,27,4,16,20,11,27,8,0,1,21,17,30,30,29,1,23,22,20,22,28,20,11,17,6,18,0,4] sched: [6:0.50] -; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermw %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -612,7 +612,7 @@ define <32 x i16> @test_masked_32xi16_perm_mem_mask0(<32 x i16>* %vp, <32 x i16> ; GENERIC-LABEL: test_masked_32xi16_perm_mem_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [19,1,5,31,9,12,17,9,15,7,1,5,16,2,12,10,13,3,29,15,26,31,10,15,22,13,9,23,28,29,20,12] sched: [6:0.50] -; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermw (%rdi), %zmm2, %zmm0 {%k1} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -633,7 +633,7 @@ define <32 x i16> @test_masked_z_32xi16_perm_mem_mask0(<32 x i16>* %vp, <32 x i1 ; GENERIC-LABEL: test_masked_z_32xi16_perm_mem_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm1 = [19,1,5,31,9,12,17,9,15,7,1,5,16,2,12,10,13,3,29,15,26,31,10,15,22,13,9,23,28,29,20,12] sched: [6:0.50] -; GENERIC-NEXT: vptestnmw %zmm0, %zmm0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %zmm0, %zmm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermw (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -654,7 +654,7 @@ define <32 x i16> @test_masked_32xi16_perm_mem_mask1(<32 x i16>* %vp, <32 x i16> ; GENERIC-LABEL: test_masked_32xi16_perm_mem_mask1: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [31,20,2,2,23,1,0,12,16,14,15,18,21,13,11,31,8,24,13,11,2,27,22,28,14,21,3,12,6,1,30,6] sched: [6:0.50] -; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermw (%rdi), %zmm2, %zmm0 {%k1} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -675,7 +675,7 @@ define <32 x i16> @test_masked_z_32xi16_perm_mem_mask1(<32 x i16>* %vp, <32 x i1 ; GENERIC-LABEL: test_masked_z_32xi16_perm_mem_mask1: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm1 = [31,20,2,2,23,1,0,12,16,14,15,18,21,13,11,31,8,24,13,11,2,27,22,28,14,21,3,12,6,1,30,6] sched: [6:0.50] -; GENERIC-NEXT: vptestnmw %zmm0, %zmm0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %zmm0, %zmm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermw (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -696,7 +696,7 @@ define <32 x i16> @test_masked_32xi16_perm_mem_mask2(<32 x i16>* %vp, <32 x i16> ; GENERIC-LABEL: test_masked_32xi16_perm_mem_mask2: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [4,6,12,17,4,31,31,4,12,21,28,15,29,10,15,15,21,6,19,7,10,30,28,26,1,4,8,25,26,18,22,25] sched: [6:0.50] -; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermw (%rdi), %zmm2, %zmm0 {%k1} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -717,7 +717,7 @@ define <32 x i16> @test_masked_z_32xi16_perm_mem_mask2(<32 x i16>* %vp, <32 x i1 ; GENERIC-LABEL: test_masked_z_32xi16_perm_mem_mask2: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm1 = [4,6,12,17,4,31,31,4,12,21,28,15,29,10,15,15,21,6,19,7,10,30,28,26,1,4,8,25,26,18,22,25] sched: [6:0.50] -; GENERIC-NEXT: vptestnmw %zmm0, %zmm0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %zmm0, %zmm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermw (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -754,7 +754,7 @@ define <32 x i16> @test_masked_32xi16_perm_mem_mask3(<32 x i16>* %vp, <32 x i16> ; GENERIC-LABEL: test_masked_32xi16_perm_mem_mask3: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [2,2,27,1,7,1,0,27,10,5,4,20,30,16,28,16,18,21,25,24,31,23,28,6,17,19,26,15,25,12,18,27] sched: [6:0.50] -; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermw (%rdi), %zmm2, %zmm0 {%k1} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -775,7 +775,7 @@ define <32 x i16> @test_masked_z_32xi16_perm_mem_mask3(<32 x i16>* %vp, <32 x i1 ; GENERIC-LABEL: test_masked_z_32xi16_perm_mem_mask3: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm1 = [2,2,27,1,7,1,0,27,10,5,4,20,30,16,28,16,18,21,25,24,31,23,28,6,17,19,26,15,25,12,18,27] sched: [6:0.50] -; GENERIC-NEXT: vptestnmw %zmm0, %zmm0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %zmm0, %zmm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermw (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -811,7 +811,7 @@ define <8 x i32> @test_masked_8xi32_perm_mask0(<8 x i32> %vec, <8 x i32> %vec2, ; GENERIC-LABEL: test_masked_8xi32_perm_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa {{.*#+}} ymm3 = [4,2,0,6,7,2,3,6] sched: [7:0.50] -; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermd %ymm0, %ymm3, %ymm1 {%k1} # sched: [1:1.00] ; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -833,7 +833,7 @@ define <8 x i32> @test_masked_z_8xi32_perm_mask0(<8 x i32> %vec, <8 x i32> %mask ; GENERIC-LABEL: test_masked_z_8xi32_perm_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa {{.*#+}} ymm2 = [4,2,0,6,7,2,3,6] sched: [7:0.50] -; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermd %ymm0, %ymm2, %ymm0 {%k1} {z} # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -852,7 +852,7 @@ define <8 x i32> @test_masked_8xi32_perm_mask1(<8 x i32> %vec, <8 x i32> %vec2, ; GENERIC-LABEL: test_masked_8xi32_perm_mask1: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa {{.*#+}} ymm3 = [0,5,1,2,6,0,0,3] sched: [7:0.50] -; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermd %ymm0, %ymm3, %ymm1 {%k1} # sched: [1:1.00] ; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -874,7 +874,7 @@ define <8 x i32> @test_masked_z_8xi32_perm_mask1(<8 x i32> %vec, <8 x i32> %mask ; GENERIC-LABEL: test_masked_z_8xi32_perm_mask1: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa {{.*#+}} ymm2 = [0,5,1,2,6,0,0,3] sched: [7:0.50] -; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermd %ymm0, %ymm2, %ymm0 {%k1} {z} # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -893,7 +893,7 @@ define <8 x i32> @test_masked_8xi32_perm_mask2(<8 x i32> %vec, <8 x i32> %vec2, ; GENERIC-LABEL: test_masked_8xi32_perm_mask2: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa {{.*#+}} ymm3 = [3,6,5,5,1,7,3,4] sched: [7:0.50] -; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermd %ymm0, %ymm3, %ymm1 {%k1} # sched: [1:1.00] ; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -915,7 +915,7 @@ define <8 x i32> @test_masked_z_8xi32_perm_mask2(<8 x i32> %vec, <8 x i32> %mask ; GENERIC-LABEL: test_masked_z_8xi32_perm_mask2: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa {{.*#+}} ymm2 = [3,6,5,5,1,7,3,4] sched: [7:0.50] -; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermd %ymm0, %ymm2, %ymm0 {%k1} {z} # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -949,7 +949,7 @@ define <8 x i32> @test_masked_8xi32_perm_mask3(<8 x i32> %vec, <8 x i32> %vec2, ; GENERIC-LABEL: test_masked_8xi32_perm_mask3: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa {{.*#+}} ymm3 = [3,0,3,1,0,4,5,0] sched: [7:0.50] -; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermd %ymm0, %ymm3, %ymm1 {%k1} # sched: [1:1.00] ; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -971,7 +971,7 @@ define <8 x i32> @test_masked_z_8xi32_perm_mask3(<8 x i32> %vec, <8 x i32> %mask ; GENERIC-LABEL: test_masked_z_8xi32_perm_mask3: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa {{.*#+}} ymm2 = [3,0,3,1,0,4,5,0] sched: [7:0.50] -; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermd %ymm0, %ymm2, %ymm0 {%k1} {z} # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -1006,7 +1006,7 @@ define <8 x i32> @test_masked_8xi32_perm_mem_mask0(<8 x i32>* %vp, <8 x i32> %ve ; GENERIC-LABEL: test_masked_8xi32_perm_mem_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa {{.*#+}} ymm2 = [3,7,4,3,5,2,0,5] sched: [7:0.50] -; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermd (%rdi), %ymm2, %ymm0 {%k1} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -1027,7 +1027,7 @@ define <8 x i32> @test_masked_z_8xi32_perm_mem_mask0(<8 x i32>* %vp, <8 x i32> % ; GENERIC-LABEL: test_masked_z_8xi32_perm_mem_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa {{.*#+}} ymm1 = [3,7,4,3,5,2,0,5] sched: [7:0.50] -; GENERIC-NEXT: vptestnmd %ymm0, %ymm0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm0, %ymm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermd (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -1048,7 +1048,7 @@ define <8 x i32> @test_masked_8xi32_perm_mem_mask1(<8 x i32>* %vp, <8 x i32> %ve ; GENERIC-LABEL: test_masked_8xi32_perm_mem_mask1: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa {{.*#+}} ymm2 = [4,6,1,7,6,7,6,5] sched: [7:0.50] -; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermd (%rdi), %ymm2, %ymm0 {%k1} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -1069,7 +1069,7 @@ define <8 x i32> @test_masked_z_8xi32_perm_mem_mask1(<8 x i32>* %vp, <8 x i32> % ; GENERIC-LABEL: test_masked_z_8xi32_perm_mem_mask1: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa {{.*#+}} ymm1 = [4,6,1,7,6,7,6,5] sched: [7:0.50] -; GENERIC-NEXT: vptestnmd %ymm0, %ymm0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm0, %ymm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermd (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -1090,7 +1090,7 @@ define <8 x i32> @test_masked_8xi32_perm_mem_mask2(<8 x i32>* %vp, <8 x i32> %ve ; GENERIC-LABEL: test_masked_8xi32_perm_mem_mask2: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa {{.*#+}} ymm2 = [6,4,6,1,6,3,6,3] sched: [7:0.50] -; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermd (%rdi), %ymm2, %ymm0 {%k1} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -1111,7 +1111,7 @@ define <8 x i32> @test_masked_z_8xi32_perm_mem_mask2(<8 x i32>* %vp, <8 x i32> % ; GENERIC-LABEL: test_masked_z_8xi32_perm_mem_mask2: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa {{.*#+}} ymm1 = [6,4,6,1,6,3,6,3] sched: [7:0.50] -; GENERIC-NEXT: vptestnmd %ymm0, %ymm0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm0, %ymm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermd (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -1148,7 +1148,7 @@ define <8 x i32> @test_masked_8xi32_perm_mem_mask3(<8 x i32>* %vp, <8 x i32> %ve ; GENERIC-LABEL: test_masked_8xi32_perm_mem_mask3: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa {{.*#+}} ymm2 = [6,0,0,7,3,7,7,5] sched: [7:0.50] -; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermd (%rdi), %ymm2, %ymm0 {%k1} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -1169,7 +1169,7 @@ define <8 x i32> @test_masked_z_8xi32_perm_mem_mask3(<8 x i32>* %vp, <8 x i32> % ; GENERIC-LABEL: test_masked_z_8xi32_perm_mem_mask3: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa {{.*#+}} ymm1 = [6,0,0,7,3,7,7,5] sched: [7:0.50] -; GENERIC-NEXT: vptestnmd %ymm0, %ymm0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm0, %ymm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermd (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -1205,7 +1205,7 @@ define <16 x i32> @test_masked_16xi32_perm_mask0(<16 x i32> %vec, <16 x i32> %ve ; GENERIC-LABEL: test_masked_16xi32_perm_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm3 = [14,12,11,6,4,1,6,9,14,14,6,1,12,11,0,7] sched: [6:0.50] -; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermd %zmm0, %zmm3, %zmm1 {%k1} # sched: [1:1.00] ; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -1227,7 +1227,7 @@ define <16 x i32> @test_masked_z_16xi32_perm_mask0(<16 x i32> %vec, <16 x i32> % ; GENERIC-LABEL: test_masked_z_16xi32_perm_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [14,12,11,6,4,1,6,9,14,14,6,1,12,11,0,7] sched: [6:0.50] -; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermd %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -1246,7 +1246,7 @@ define <16 x i32> @test_masked_16xi32_perm_mask1(<16 x i32> %vec, <16 x i32> %ve ; GENERIC-LABEL: test_masked_16xi32_perm_mask1: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm3 = [10,0,14,15,11,1,1,5,0,5,0,15,13,1,14,3] sched: [6:0.50] -; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermd %zmm0, %zmm3, %zmm1 {%k1} # sched: [1:1.00] ; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -1268,7 +1268,7 @@ define <16 x i32> @test_masked_z_16xi32_perm_mask1(<16 x i32> %vec, <16 x i32> % ; GENERIC-LABEL: test_masked_z_16xi32_perm_mask1: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [10,0,14,15,11,1,1,5,0,5,0,15,13,1,14,3] sched: [6:0.50] -; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermd %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -1287,7 +1287,7 @@ define <16 x i32> @test_masked_16xi32_perm_mask2(<16 x i32> %vec, <16 x i32> %ve ; GENERIC-LABEL: test_masked_16xi32_perm_mask2: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm3 = [3,10,15,1,0,5,0,9,13,2,1,5,15,2,15,5] sched: [6:0.50] -; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermd %zmm0, %zmm3, %zmm1 {%k1} # sched: [1:1.00] ; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -1309,7 +1309,7 @@ define <16 x i32> @test_masked_z_16xi32_perm_mask2(<16 x i32> %vec, <16 x i32> % ; GENERIC-LABEL: test_masked_z_16xi32_perm_mask2: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [3,10,15,1,0,5,0,9,13,2,1,5,15,2,15,5] sched: [6:0.50] -; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermd %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -1343,7 +1343,7 @@ define <16 x i32> @test_masked_16xi32_perm_mask3(<16 x i32> %vec, <16 x i32> %ve ; GENERIC-LABEL: test_masked_16xi32_perm_mask3: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm3 = [7,4,14,15,10,2,15,1,9,2,14,15,12,5,3,12] sched: [6:0.50] -; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermd %zmm0, %zmm3, %zmm1 {%k1} # sched: [1:1.00] ; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -1365,7 +1365,7 @@ define <16 x i32> @test_masked_z_16xi32_perm_mask3(<16 x i32> %vec, <16 x i32> % ; GENERIC-LABEL: test_masked_z_16xi32_perm_mask3: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [7,4,14,15,10,2,15,1,9,2,14,15,12,5,3,12] sched: [6:0.50] -; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermd %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -1400,7 +1400,7 @@ define <16 x i32> @test_masked_16xi32_perm_mem_mask0(<16 x i32>* %vp, <16 x i32> ; GENERIC-LABEL: test_masked_16xi32_perm_mem_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,1,6,8,11,2,6,10,1,7,5,15,0,6,6] sched: [6:0.50] -; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermd (%rdi), %zmm2, %zmm0 {%k1} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -1421,7 +1421,7 @@ define <16 x i32> @test_masked_z_16xi32_perm_mem_mask0(<16 x i32>* %vp, <16 x i3 ; GENERIC-LABEL: test_masked_z_16xi32_perm_mem_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,1,1,6,8,11,2,6,10,1,7,5,15,0,6,6] sched: [6:0.50] -; GENERIC-NEXT: vptestnmd %zmm0, %zmm0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm0, %zmm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermd (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -1442,7 +1442,7 @@ define <16 x i32> @test_masked_16xi32_perm_mem_mask1(<16 x i32>* %vp, <16 x i32> ; GENERIC-LABEL: test_masked_16xi32_perm_mem_mask1: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [11,5,3,4,7,15,12,4,8,11,12,7,6,12,6,3] sched: [6:0.50] -; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermd (%rdi), %zmm2, %zmm0 {%k1} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -1463,7 +1463,7 @@ define <16 x i32> @test_masked_z_16xi32_perm_mem_mask1(<16 x i32>* %vp, <16 x i3 ; GENERIC-LABEL: test_masked_z_16xi32_perm_mem_mask1: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm1 = [11,5,3,4,7,15,12,4,8,11,12,7,6,12,6,3] sched: [6:0.50] -; GENERIC-NEXT: vptestnmd %zmm0, %zmm0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm0, %zmm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermd (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -1484,7 +1484,7 @@ define <16 x i32> @test_masked_16xi32_perm_mem_mask2(<16 x i32>* %vp, <16 x i32> ; GENERIC-LABEL: test_masked_16xi32_perm_mem_mask2: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [7,14,2,7,10,7,3,0,11,9,0,4,12,10,8,2] sched: [6:0.50] -; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermd (%rdi), %zmm2, %zmm0 {%k1} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -1505,7 +1505,7 @@ define <16 x i32> @test_masked_z_16xi32_perm_mem_mask2(<16 x i32>* %vp, <16 x i3 ; GENERIC-LABEL: test_masked_z_16xi32_perm_mem_mask2: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm1 = [7,14,2,7,10,7,3,0,11,9,0,4,12,10,8,2] sched: [6:0.50] -; GENERIC-NEXT: vptestnmd %zmm0, %zmm0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm0, %zmm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermd (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -1542,7 +1542,7 @@ define <16 x i32> @test_masked_16xi32_perm_mem_mask3(<16 x i32>* %vp, <16 x i32> ; GENERIC-LABEL: test_masked_16xi32_perm_mem_mask3: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [11,7,10,12,3,12,4,15,1,14,0,4,8,9,6,1] sched: [6:0.50] -; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermd (%rdi), %zmm2, %zmm0 {%k1} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -1563,7 +1563,7 @@ define <16 x i32> @test_masked_z_16xi32_perm_mem_mask3(<16 x i32>* %vp, <16 x i3 ; GENERIC-LABEL: test_masked_z_16xi32_perm_mem_mask3: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm1 = [11,7,10,12,3,12,4,15,1,14,0,4,8,9,6,1] sched: [6:0.50] -; GENERIC-NEXT: vptestnmd %zmm0, %zmm0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm0, %zmm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermd (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -1596,7 +1596,7 @@ define <4 x i64> @test_4xi64_perm_mask0(<4 x i64> %vec) { define <4 x i64> @test_masked_4xi64_perm_mask0(<4 x i64> %vec, <4 x i64> %vec2, <4 x i64> %mask) { ; GENERIC-LABEL: test_masked_4xi64_perm_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermq {{.*#+}} ymm1 {%k1} = ymm0[2,0,3,1] sched: [1:1.00] ; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -1616,7 +1616,7 @@ define <4 x i64> @test_masked_4xi64_perm_mask0(<4 x i64> %vec, <4 x i64> %vec2, define <4 x i64> @test_masked_z_4xi64_perm_mask0(<4 x i64> %vec, <4 x i64> %mask) { ; GENERIC-LABEL: test_masked_z_4xi64_perm_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermq {{.*#+}} ymm0 {%k1} {z} = ymm0[2,0,3,1] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -1633,7 +1633,7 @@ define <4 x i64> @test_masked_z_4xi64_perm_mask0(<4 x i64> %vec, <4 x i64> %mask define <4 x i64> @test_masked_4xi64_perm_mask1(<4 x i64> %vec, <4 x i64> %vec2, <4 x i64> %mask) { ; GENERIC-LABEL: test_masked_4xi64_perm_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermq {{.*#+}} ymm1 {%k1} = ymm0[1,2,0,3] sched: [1:1.00] ; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -1653,7 +1653,7 @@ define <4 x i64> @test_masked_4xi64_perm_mask1(<4 x i64> %vec, <4 x i64> %vec2, define <4 x i64> @test_masked_z_4xi64_perm_mask1(<4 x i64> %vec, <4 x i64> %mask) { ; GENERIC-LABEL: test_masked_z_4xi64_perm_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermq {{.*#+}} ymm0 {%k1} {z} = ymm0[1,2,0,3] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -1670,7 +1670,7 @@ define <4 x i64> @test_masked_z_4xi64_perm_mask1(<4 x i64> %vec, <4 x i64> %mask define <4 x i64> @test_masked_4xi64_perm_mask2(<4 x i64> %vec, <4 x i64> %vec2, <4 x i64> %mask) { ; GENERIC-LABEL: test_masked_4xi64_perm_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermq {{.*#+}} ymm1 {%k1} = ymm0[2,2,2,1] sched: [1:1.00] ; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -1690,7 +1690,7 @@ define <4 x i64> @test_masked_4xi64_perm_mask2(<4 x i64> %vec, <4 x i64> %vec2, define <4 x i64> @test_masked_z_4xi64_perm_mask2(<4 x i64> %vec, <4 x i64> %mask) { ; GENERIC-LABEL: test_masked_z_4xi64_perm_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermq {{.*#+}} ymm0 {%k1} {z} = ymm0[2,2,2,1] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -1720,7 +1720,7 @@ define <4 x i64> @test_4xi64_perm_mask3(<4 x i64> %vec) { define <4 x i64> @test_masked_4xi64_perm_mask3(<4 x i64> %vec, <4 x i64> %vec2, <4 x i64> %mask) { ; GENERIC-LABEL: test_masked_4xi64_perm_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermq {{.*#+}} ymm1 {%k1} = ymm0[2,1,3,3] sched: [1:1.00] ; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -1740,7 +1740,7 @@ define <4 x i64> @test_masked_4xi64_perm_mask3(<4 x i64> %vec, <4 x i64> %vec2, define <4 x i64> @test_masked_z_4xi64_perm_mask3(<4 x i64> %vec, <4 x i64> %mask) { ; GENERIC-LABEL: test_masked_z_4xi64_perm_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermq {{.*#+}} ymm0 {%k1} {z} = ymm0[2,1,3,3] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -1771,7 +1771,7 @@ define <4 x i64> @test_4xi64_perm_mem_mask0(<4 x i64>* %vp) { define <4 x i64> @test_masked_4xi64_perm_mem_mask0(<4 x i64>* %vp, <4 x i64> %vec2, <4 x i64> %mask) { ; GENERIC-LABEL: test_masked_4xi64_perm_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermq {{.*#+}} ymm0 {%k1} = mem[2,1,2,0] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -1790,7 +1790,7 @@ define <4 x i64> @test_masked_4xi64_perm_mem_mask0(<4 x i64>* %vp, <4 x i64> %ve define <4 x i64> @test_masked_z_4xi64_perm_mem_mask0(<4 x i64>* %vp, <4 x i64> %mask) { ; GENERIC-LABEL: test_masked_z_4xi64_perm_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm0, %ymm0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm0, %ymm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermq {{.*#+}} ymm0 {%k1} {z} = mem[2,1,2,0] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -1809,7 +1809,7 @@ define <4 x i64> @test_masked_z_4xi64_perm_mem_mask0(<4 x i64>* %vp, <4 x i64> % define <4 x i64> @test_masked_4xi64_perm_mem_mask1(<4 x i64>* %vp, <4 x i64> %vec2, <4 x i64> %mask) { ; GENERIC-LABEL: test_masked_4xi64_perm_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermq {{.*#+}} ymm0 {%k1} = mem[2,1,1,1] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -1828,7 +1828,7 @@ define <4 x i64> @test_masked_4xi64_perm_mem_mask1(<4 x i64>* %vp, <4 x i64> %ve define <4 x i64> @test_masked_z_4xi64_perm_mem_mask1(<4 x i64>* %vp, <4 x i64> %mask) { ; GENERIC-LABEL: test_masked_z_4xi64_perm_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm0, %ymm0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm0, %ymm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermq {{.*#+}} ymm0 {%k1} {z} = mem[2,1,1,1] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -1847,7 +1847,7 @@ define <4 x i64> @test_masked_z_4xi64_perm_mem_mask1(<4 x i64>* %vp, <4 x i64> % define <4 x i64> @test_masked_4xi64_perm_mem_mask2(<4 x i64>* %vp, <4 x i64> %vec2, <4 x i64> %mask) { ; GENERIC-LABEL: test_masked_4xi64_perm_mem_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermq {{.*#+}} ymm0 {%k1} = mem[0,1,2,0] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -1866,7 +1866,7 @@ define <4 x i64> @test_masked_4xi64_perm_mem_mask2(<4 x i64>* %vp, <4 x i64> %ve define <4 x i64> @test_masked_z_4xi64_perm_mem_mask2(<4 x i64>* %vp, <4 x i64> %mask) { ; GENERIC-LABEL: test_masked_z_4xi64_perm_mem_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm0, %ymm0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm0, %ymm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermq {{.*#+}} ymm0 {%k1} {z} = mem[0,1,2,0] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -1899,7 +1899,7 @@ define <4 x i64> @test_4xi64_perm_mem_mask3(<4 x i64>* %vp) { define <4 x i64> @test_masked_4xi64_perm_mem_mask3(<4 x i64>* %vp, <4 x i64> %vec2, <4 x i64> %mask) { ; GENERIC-LABEL: test_masked_4xi64_perm_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermq {{.*#+}} ymm0 {%k1} = mem[2,0,1,3] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -1918,7 +1918,7 @@ define <4 x i64> @test_masked_4xi64_perm_mem_mask3(<4 x i64>* %vp, <4 x i64> %ve define <4 x i64> @test_masked_z_4xi64_perm_mem_mask3(<4 x i64>* %vp, <4 x i64> %mask) { ; GENERIC-LABEL: test_masked_z_4xi64_perm_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm0, %ymm0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm0, %ymm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermq {{.*#+}} ymm0 {%k1} {z} = mem[2,0,1,3] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -1953,7 +1953,7 @@ define <8 x i64> @test_masked_8xi64_perm_mask0(<8 x i64> %vec, <8 x i64> %vec2, ; GENERIC-LABEL: test_masked_8xi64_perm_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,4,7,6,5,5,1,6] sched: [6:0.50] -; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermq %zmm0, %zmm3, %zmm1 {%k1} # sched: [1:1.00] ; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -1975,7 +1975,7 @@ define <8 x i64> @test_masked_z_8xi64_perm_mask0(<8 x i64> %vec, <8 x i64> %mask ; GENERIC-LABEL: test_masked_z_8xi64_perm_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,4,7,6,5,5,1,6] sched: [6:0.50] -; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermq %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -1993,7 +1993,7 @@ define <8 x i64> @test_masked_z_8xi64_perm_mask0(<8 x i64> %vec, <8 x i64> %mask define <8 x i64> @test_masked_8xi64_perm_imm_mask1(<8 x i64> %vec, <8 x i64> %vec2, <8 x i64> %mask) { ; GENERIC-LABEL: test_masked_8xi64_perm_imm_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermq {{.*#+}} zmm1 {%k1} = zmm0[1,0,1,1,5,4,5,5] sched: [1:1.00] ; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -2013,7 +2013,7 @@ define <8 x i64> @test_masked_8xi64_perm_imm_mask1(<8 x i64> %vec, <8 x i64> %ve define <8 x i64> @test_masked_z_8xi64_perm_imm_mask1(<8 x i64> %vec, <8 x i64> %mask) { ; GENERIC-LABEL: test_masked_z_8xi64_perm_imm_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermq {{.*#+}} zmm0 {%k1} {z} = zmm0[1,0,1,1,5,4,5,5] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -2031,7 +2031,7 @@ define <8 x i64> @test_masked_8xi64_perm_mask2(<8 x i64> %vec, <8 x i64> %vec2, ; GENERIC-LABEL: test_masked_8xi64_perm_mask2: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm3 = [1,3,7,3,3,5,4,1] sched: [6:0.50] -; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermq %zmm0, %zmm3, %zmm1 {%k1} # sched: [1:1.00] ; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -2053,7 +2053,7 @@ define <8 x i64> @test_masked_z_8xi64_perm_mask2(<8 x i64> %vec, <8 x i64> %mask ; GENERIC-LABEL: test_masked_z_8xi64_perm_mask2: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [1,3,7,3,3,5,4,1] sched: [6:0.50] -; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermq %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -2084,7 +2084,7 @@ define <8 x i64> @test_8xi64_perm_imm_mask3(<8 x i64> %vec) { define <8 x i64> @test_masked_8xi64_perm_imm_mask3(<8 x i64> %vec, <8 x i64> %vec2, <8 x i64> %mask) { ; GENERIC-LABEL: test_masked_8xi64_perm_imm_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermq {{.*#+}} zmm1 {%k1} = zmm0[3,1,3,1,7,5,7,5] sched: [1:1.00] ; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -2104,7 +2104,7 @@ define <8 x i64> @test_masked_8xi64_perm_imm_mask3(<8 x i64> %vec, <8 x i64> %ve define <8 x i64> @test_masked_z_8xi64_perm_imm_mask3(<8 x i64> %vec, <8 x i64> %mask) { ; GENERIC-LABEL: test_masked_z_8xi64_perm_imm_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermq {{.*#+}} zmm0 {%k1} {z} = zmm0[3,1,3,1,7,5,7,5] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -2122,7 +2122,7 @@ define <8 x i64> @test_masked_8xi64_perm_mask4(<8 x i64> %vec, <8 x i64> %vec2, ; GENERIC-LABEL: test_masked_8xi64_perm_mask4: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm3 = [6,3,1,1,7,4,0,3] sched: [6:0.50] -; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermq %zmm0, %zmm3, %zmm1 {%k1} # sched: [1:1.00] ; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -2144,7 +2144,7 @@ define <8 x i64> @test_masked_z_8xi64_perm_mask4(<8 x i64> %vec, <8 x i64> %mask ; GENERIC-LABEL: test_masked_z_8xi64_perm_mask4: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [6,3,1,1,7,4,0,3] sched: [6:0.50] -; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermq %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -2162,7 +2162,7 @@ define <8 x i64> @test_masked_z_8xi64_perm_mask4(<8 x i64> %vec, <8 x i64> %mask define <8 x i64> @test_masked_8xi64_perm_imm_mask5(<8 x i64> %vec, <8 x i64> %vec2, <8 x i64> %mask) { ; GENERIC-LABEL: test_masked_8xi64_perm_imm_mask5: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermq {{.*#+}} zmm1 {%k1} = zmm0[0,0,0,0,4,4,4,4] sched: [1:1.00] ; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -2182,7 +2182,7 @@ define <8 x i64> @test_masked_8xi64_perm_imm_mask5(<8 x i64> %vec, <8 x i64> %ve define <8 x i64> @test_masked_z_8xi64_perm_imm_mask5(<8 x i64> %vec, <8 x i64> %mask) { ; GENERIC-LABEL: test_masked_z_8xi64_perm_imm_mask5: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermq {{.*#+}} zmm0 {%k1} {z} = zmm0[0,0,0,0,4,4,4,4] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -2215,7 +2215,7 @@ define <8 x i64> @test_masked_8xi64_perm_mask6(<8 x i64> %vec, <8 x i64> %vec2, ; GENERIC-LABEL: test_masked_8xi64_perm_mask6: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm3 = [5,1,4,4,5,4,2,7] sched: [6:0.50] -; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermq %zmm0, %zmm3, %zmm1 {%k1} # sched: [1:1.00] ; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -2237,7 +2237,7 @@ define <8 x i64> @test_masked_z_8xi64_perm_mask6(<8 x i64> %vec, <8 x i64> %mask ; GENERIC-LABEL: test_masked_z_8xi64_perm_mask6: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [5,1,4,4,5,4,2,7] sched: [6:0.50] -; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermq %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -2255,7 +2255,7 @@ define <8 x i64> @test_masked_z_8xi64_perm_mask6(<8 x i64> %vec, <8 x i64> %mask define <8 x i64> @test_masked_8xi64_perm_imm_mask7(<8 x i64> %vec, <8 x i64> %vec2, <8 x i64> %mask) { ; GENERIC-LABEL: test_masked_8xi64_perm_imm_mask7: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermq {{.*#+}} zmm1 {%k1} = zmm0[3,3,3,3,7,7,7,7] sched: [1:1.00] ; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -2275,7 +2275,7 @@ define <8 x i64> @test_masked_8xi64_perm_imm_mask7(<8 x i64> %vec, <8 x i64> %ve define <8 x i64> @test_masked_z_8xi64_perm_imm_mask7(<8 x i64> %vec, <8 x i64> %mask) { ; GENERIC-LABEL: test_masked_z_8xi64_perm_imm_mask7: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermq {{.*#+}} zmm0 {%k1} {z} = zmm0[3,3,3,3,7,7,7,7] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -2309,7 +2309,7 @@ define <8 x i64> @test_masked_8xi64_perm_mem_mask0(<8 x i64>* %vp, <8 x i64> %ve ; GENERIC-LABEL: test_masked_8xi64_perm_mem_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [5,1,6,5,7,3,7,3] sched: [6:0.50] -; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermq (%rdi), %zmm2, %zmm0 {%k1} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -2330,7 +2330,7 @@ define <8 x i64> @test_masked_z_8xi64_perm_mem_mask0(<8 x i64>* %vp, <8 x i64> % ; GENERIC-LABEL: test_masked_z_8xi64_perm_mem_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm1 = [5,1,6,5,7,3,7,3] sched: [6:0.50] -; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermq (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -2350,7 +2350,7 @@ define <8 x i64> @test_masked_z_8xi64_perm_mem_mask0(<8 x i64>* %vp, <8 x i64> % define <8 x i64> @test_masked_8xi64_perm_imm_mem_mask1(<8 x i64>* %vp, <8 x i64> %vec2, <8 x i64> %mask) { ; GENERIC-LABEL: test_masked_8xi64_perm_imm_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermq {{.*#+}} zmm0 {%k1} = mem[1,1,1,0,5,5,5,4] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -2369,7 +2369,7 @@ define <8 x i64> @test_masked_8xi64_perm_imm_mem_mask1(<8 x i64>* %vp, <8 x i64> define <8 x i64> @test_masked_z_8xi64_perm_imm_mem_mask1(<8 x i64>* %vp, <8 x i64> %mask) { ; GENERIC-LABEL: test_masked_z_8xi64_perm_imm_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermq {{.*#+}} zmm0 {%k1} {z} = mem[1,1,1,0,5,5,5,4] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -2389,7 +2389,7 @@ define <8 x i64> @test_masked_8xi64_perm_mem_mask2(<8 x i64>* %vp, <8 x i64> %ve ; GENERIC-LABEL: test_masked_8xi64_perm_mem_mask2: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,2,1,4,1,1,5,5] sched: [6:0.50] -; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermq (%rdi), %zmm2, %zmm0 {%k1} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -2410,7 +2410,7 @@ define <8 x i64> @test_masked_z_8xi64_perm_mem_mask2(<8 x i64>* %vp, <8 x i64> % ; GENERIC-LABEL: test_masked_z_8xi64_perm_mem_mask2: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,2,1,4,1,1,5,5] sched: [6:0.50] -; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermq (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -2444,7 +2444,7 @@ define <8 x i64> @test_8xi64_perm_imm_mem_mask3(<8 x i64>* %vp) { define <8 x i64> @test_masked_8xi64_perm_imm_mem_mask3(<8 x i64>* %vp, <8 x i64> %vec2, <8 x i64> %mask) { ; GENERIC-LABEL: test_masked_8xi64_perm_imm_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermq {{.*#+}} zmm0 {%k1} = mem[1,3,1,1,5,7,5,5] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -2463,7 +2463,7 @@ define <8 x i64> @test_masked_8xi64_perm_imm_mem_mask3(<8 x i64>* %vp, <8 x i64> define <8 x i64> @test_masked_z_8xi64_perm_imm_mem_mask3(<8 x i64>* %vp, <8 x i64> %mask) { ; GENERIC-LABEL: test_masked_z_8xi64_perm_imm_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermq {{.*#+}} zmm0 {%k1} {z} = mem[1,3,1,1,5,7,5,5] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -2483,7 +2483,7 @@ define <8 x i64> @test_masked_8xi64_perm_mem_mask4(<8 x i64>* %vp, <8 x i64> %ve ; GENERIC-LABEL: test_masked_8xi64_perm_mem_mask4: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [5,0,7,0,3,5,0,6] sched: [6:0.50] -; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermq (%rdi), %zmm2, %zmm0 {%k1} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -2504,7 +2504,7 @@ define <8 x i64> @test_masked_z_8xi64_perm_mem_mask4(<8 x i64>* %vp, <8 x i64> % ; GENERIC-LABEL: test_masked_z_8xi64_perm_mem_mask4: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm1 = [5,0,7,0,3,5,0,6] sched: [6:0.50] -; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermq (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -2524,7 +2524,7 @@ define <8 x i64> @test_masked_z_8xi64_perm_mem_mask4(<8 x i64>* %vp, <8 x i64> % define <8 x i64> @test_masked_8xi64_perm_imm_mem_mask5(<8 x i64>* %vp, <8 x i64> %vec2, <8 x i64> %mask) { ; GENERIC-LABEL: test_masked_8xi64_perm_imm_mem_mask5: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermq {{.*#+}} zmm0 {%k1} = mem[3,1,0,0,7,5,4,4] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -2543,7 +2543,7 @@ define <8 x i64> @test_masked_8xi64_perm_imm_mem_mask5(<8 x i64>* %vp, <8 x i64> define <8 x i64> @test_masked_z_8xi64_perm_imm_mem_mask5(<8 x i64>* %vp, <8 x i64> %mask) { ; GENERIC-LABEL: test_masked_z_8xi64_perm_imm_mem_mask5: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermq {{.*#+}} zmm0 {%k1} {z} = mem[3,1,0,0,7,5,4,4] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -2579,7 +2579,7 @@ define <8 x i64> @test_masked_8xi64_perm_mem_mask6(<8 x i64>* %vp, <8 x i64> %ve ; GENERIC-LABEL: test_masked_8xi64_perm_mem_mask6: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,6,3,7,3,0,3,6] sched: [6:0.50] -; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermq (%rdi), %zmm2, %zmm0 {%k1} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -2600,7 +2600,7 @@ define <8 x i64> @test_masked_z_8xi64_perm_mem_mask6(<8 x i64>* %vp, <8 x i64> % ; GENERIC-LABEL: test_masked_z_8xi64_perm_mem_mask6: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,6,3,7,3,0,3,6] sched: [6:0.50] -; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermq (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -2620,7 +2620,7 @@ define <8 x i64> @test_masked_z_8xi64_perm_mem_mask6(<8 x i64>* %vp, <8 x i64> % define <8 x i64> @test_masked_8xi64_perm_imm_mem_mask7(<8 x i64>* %vp, <8 x i64> %vec2, <8 x i64> %mask) { ; GENERIC-LABEL: test_masked_8xi64_perm_imm_mem_mask7: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermq {{.*#+}} zmm0 {%k1} = mem[3,0,0,1,7,4,4,5] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -2639,7 +2639,7 @@ define <8 x i64> @test_masked_8xi64_perm_imm_mem_mask7(<8 x i64>* %vp, <8 x i64> define <8 x i64> @test_masked_z_8xi64_perm_imm_mem_mask7(<8 x i64>* %vp, <8 x i64> %mask) { ; GENERIC-LABEL: test_masked_z_8xi64_perm_imm_mem_mask7: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermq {{.*#+}} zmm0 {%k1} {z} = mem[3,0,0,1,7,4,4,5] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -2674,7 +2674,7 @@ define <8 x float> @test_masked_8xfloat_perm_mask0(<8 x float> %vec, <8 x float> ; GENERIC-LABEL: test_masked_8xfloat_perm_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovaps {{.*#+}} ymm3 = [3,4,2,4,1,2,3,4] sched: [7:0.50] -; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermps %ymm0, %ymm3, %ymm1 {%k1} # sched: [1:1.00] ; GENERIC-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -2696,7 +2696,7 @@ define <8 x float> @test_masked_z_8xfloat_perm_mask0(<8 x float> %vec, <8 x i32> ; GENERIC-LABEL: test_masked_z_8xfloat_perm_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovaps {{.*#+}} ymm2 = [3,4,2,4,1,2,3,4] sched: [7:0.50] -; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermps %ymm0, %ymm2, %ymm0 {%k1} {z} # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -2715,7 +2715,7 @@ define <8 x float> @test_masked_8xfloat_perm_mask1(<8 x float> %vec, <8 x float> ; GENERIC-LABEL: test_masked_8xfloat_perm_mask1: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovaps {{.*#+}} ymm3 = [4,2,1,0,6,0,5,1] sched: [7:0.50] -; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermps %ymm0, %ymm3, %ymm1 {%k1} # sched: [1:1.00] ; GENERIC-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -2737,7 +2737,7 @@ define <8 x float> @test_masked_z_8xfloat_perm_mask1(<8 x float> %vec, <8 x i64> ; GENERIC-LABEL: test_masked_z_8xfloat_perm_mask1: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovaps {{.*#+}} ymm2 = [4,2,1,0,6,0,5,1] sched: [7:0.50] -; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermps %ymm0, %ymm2, %ymm0 {%k1} {z} # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -2756,7 +2756,7 @@ define <8 x float> @test_masked_8xfloat_perm_mask2(<8 x float> %vec, <8 x float> ; GENERIC-LABEL: test_masked_8xfloat_perm_mask2: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovaps {{.*#+}} ymm3 = [2,5,5,5,4,6,0,5] sched: [7:0.50] -; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermps %ymm0, %ymm3, %ymm1 {%k1} # sched: [1:1.00] ; GENERIC-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -2778,7 +2778,7 @@ define <8 x float> @test_masked_z_8xfloat_perm_mask2(<8 x float> %vec, <8 x i32> ; GENERIC-LABEL: test_masked_z_8xfloat_perm_mask2: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovaps {{.*#+}} ymm2 = [2,5,5,5,4,6,0,5] sched: [7:0.50] -; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermps %ymm0, %ymm2, %ymm0 {%k1} {z} # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -2812,7 +2812,7 @@ define <8 x float> @test_masked_8xfloat_perm_mask3(<8 x float> %vec, <8 x float> ; GENERIC-LABEL: test_masked_8xfloat_perm_mask3: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovaps {{.*#+}} ymm3 = [0,5,2,5,5,5,1,6] sched: [7:0.50] -; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermps %ymm0, %ymm3, %ymm1 {%k1} # sched: [1:1.00] ; GENERIC-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -2834,7 +2834,7 @@ define <8 x float> @test_masked_z_8xfloat_perm_mask3(<8 x float> %vec, <8 x i32> ; GENERIC-LABEL: test_masked_z_8xfloat_perm_mask3: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovaps {{.*#+}} ymm2 = [0,5,2,5,5,5,1,6] sched: [7:0.50] -; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermps %ymm0, %ymm2, %ymm0 {%k1} {z} # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -2869,7 +2869,7 @@ define <8 x float> @test_masked_8xfloat_perm_mem_mask0(<8 x float>* %vp, <8 x fl ; GENERIC-LABEL: test_masked_8xfloat_perm_mem_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovaps {{.*#+}} ymm2 = [5,2,1,6,4,2,4,0] sched: [7:0.50] -; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermps (%rdi), %ymm2, %ymm0 {%k1} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -2890,7 +2890,7 @@ define <8 x float> @test_masked_z_8xfloat_perm_mem_mask0(<8 x float>* %vp, <8 x ; GENERIC-LABEL: test_masked_z_8xfloat_perm_mem_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovaps {{.*#+}} ymm1 = [5,2,1,6,4,2,4,0] sched: [7:0.50] -; GENERIC-NEXT: vptestnmd %ymm0, %ymm0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm0, %ymm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermps (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -2911,7 +2911,7 @@ define <8 x float> @test_masked_8xfloat_perm_mem_mask1(<8 x float>* %vp, <8 x fl ; GENERIC-LABEL: test_masked_8xfloat_perm_mem_mask1: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovaps {{.*#+}} ymm2 = [1,3,7,4,0,6,6,6] sched: [7:0.50] -; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermps (%rdi), %ymm2, %ymm0 {%k1} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -2932,7 +2932,7 @@ define <8 x float> @test_masked_z_8xfloat_perm_mem_mask1(<8 x float>* %vp, <8 x ; GENERIC-LABEL: test_masked_z_8xfloat_perm_mem_mask1: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovaps {{.*#+}} ymm1 = [1,3,7,4,0,6,6,6] sched: [7:0.50] -; GENERIC-NEXT: vptestnmd %ymm0, %ymm0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm0, %ymm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermps (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -2953,7 +2953,7 @@ define <8 x float> @test_masked_8xfloat_perm_mem_mask2(<8 x float>* %vp, <8 x fl ; GENERIC-LABEL: test_masked_8xfloat_perm_mem_mask2: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovaps {{.*#+}} ymm2 = [4,5,1,5,6,6,2,4] sched: [7:0.50] -; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermps (%rdi), %ymm2, %ymm0 {%k1} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -2974,7 +2974,7 @@ define <8 x float> @test_masked_z_8xfloat_perm_mem_mask2(<8 x float>* %vp, <8 x ; GENERIC-LABEL: test_masked_z_8xfloat_perm_mem_mask2: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovaps {{.*#+}} ymm1 = [4,5,1,5,6,6,2,4] sched: [7:0.50] -; GENERIC-NEXT: vptestnmd %ymm0, %ymm0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm0, %ymm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermps (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -3011,7 +3011,7 @@ define <8 x float> @test_masked_8xfloat_perm_mem_mask3(<8 x float>* %vp, <8 x fl ; GENERIC-LABEL: test_masked_8xfloat_perm_mem_mask3: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovaps {{.*#+}} ymm2 = [5,7,0,6,4,2,3,0] sched: [7:0.50] -; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermps (%rdi), %ymm2, %ymm0 {%k1} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -3032,7 +3032,7 @@ define <8 x float> @test_masked_z_8xfloat_perm_mem_mask3(<8 x float>* %vp, <8 x ; GENERIC-LABEL: test_masked_z_8xfloat_perm_mem_mask3: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovaps {{.*#+}} ymm1 = [5,7,0,6,4,2,3,0] sched: [7:0.50] -; GENERIC-NEXT: vptestnmd %ymm0, %ymm0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm0, %ymm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermps (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -3068,7 +3068,7 @@ define <16 x float> @test_masked_16xfloat_perm_mask0(<16 x float> %vec, <16 x fl ; GENERIC-LABEL: test_masked_16xfloat_perm_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovaps {{.*#+}} zmm3 = [15,7,5,13,4,9,11,13,12,6,0,0,11,15,5,7] sched: [6:0.50] -; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermps %zmm0, %zmm3, %zmm1 {%k1} # sched: [1:1.00] ; GENERIC-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -3090,7 +3090,7 @@ define <16 x float> @test_masked_z_16xfloat_perm_mask0(<16 x float> %vec, <16 x ; GENERIC-LABEL: test_masked_z_16xfloat_perm_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovaps {{.*#+}} zmm2 = [15,7,5,13,4,9,11,13,12,6,0,0,11,15,5,7] sched: [6:0.50] -; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermps %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -3109,7 +3109,7 @@ define <16 x float> @test_masked_16xfloat_perm_mask1(<16 x float> %vec, <16 x fl ; GENERIC-LABEL: test_masked_16xfloat_perm_mask1: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovaps {{.*#+}} zmm3 = [11,10,4,10,4,5,8,11,2,0,10,0,0,3,10,1] sched: [6:0.50] -; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermps %zmm0, %zmm3, %zmm1 {%k1} # sched: [1:1.00] ; GENERIC-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -3131,7 +3131,7 @@ define <16 x float> @test_masked_z_16xfloat_perm_mask1(<16 x float> %vec, <16 x ; GENERIC-LABEL: test_masked_z_16xfloat_perm_mask1: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovaps {{.*#+}} zmm2 = [11,10,4,10,4,5,8,11,2,0,10,0,0,3,10,1] sched: [6:0.50] -; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermps %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -3150,7 +3150,7 @@ define <16 x float> @test_masked_16xfloat_perm_mask2(<16 x float> %vec, <16 x fl ; GENERIC-LABEL: test_masked_16xfloat_perm_mask2: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovaps {{.*#+}} zmm3 = [0,15,6,14,3,6,5,2,5,15,11,6,6,4,8,11] sched: [6:0.50] -; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermps %zmm0, %zmm3, %zmm1 {%k1} # sched: [1:1.00] ; GENERIC-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -3172,7 +3172,7 @@ define <16 x float> @test_masked_z_16xfloat_perm_mask2(<16 x float> %vec, <16 x ; GENERIC-LABEL: test_masked_z_16xfloat_perm_mask2: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovaps {{.*#+}} zmm2 = [0,15,6,14,3,6,5,2,5,15,11,6,6,4,8,11] sched: [6:0.50] -; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermps %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -3206,7 +3206,7 @@ define <16 x float> @test_masked_16xfloat_perm_mask3(<16 x float> %vec, <16 x fl ; GENERIC-LABEL: test_masked_16xfloat_perm_mask3: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovaps {{.*#+}} zmm3 = [10,7,0,14,6,6,0,2,13,8,11,2,5,13,13,3] sched: [6:0.50] -; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermps %zmm0, %zmm3, %zmm1 {%k1} # sched: [1:1.00] ; GENERIC-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -3228,7 +3228,7 @@ define <16 x float> @test_masked_z_16xfloat_perm_mask3(<16 x float> %vec, <16 x ; GENERIC-LABEL: test_masked_z_16xfloat_perm_mask3: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovaps {{.*#+}} zmm2 = [10,7,0,14,6,6,0,2,13,8,11,2,5,13,13,3] sched: [6:0.50] -; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermps %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -3263,7 +3263,7 @@ define <16 x float> @test_masked_16xfloat_perm_mem_mask0(<16 x float>* %vp, <16 ; GENERIC-LABEL: test_masked_16xfloat_perm_mem_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovaps {{.*#+}} zmm2 = [10,2,1,14,9,9,7,2,9,4,12,11,0,14,0,1] sched: [6:0.50] -; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermps (%rdi), %zmm2, %zmm0 {%k1} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -3284,7 +3284,7 @@ define <16 x float> @test_masked_z_16xfloat_perm_mem_mask0(<16 x float>* %vp, <1 ; GENERIC-LABEL: test_masked_z_16xfloat_perm_mem_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovaps {{.*#+}} zmm1 = [10,2,1,14,9,9,7,2,9,4,12,11,0,14,0,1] sched: [6:0.50] -; GENERIC-NEXT: vptestnmd %zmm0, %zmm0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm0, %zmm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermps (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -3305,7 +3305,7 @@ define <16 x float> @test_masked_16xfloat_perm_mem_mask1(<16 x float>* %vp, <16 ; GENERIC-LABEL: test_masked_16xfloat_perm_mem_mask1: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovaps {{.*#+}} zmm2 = [4,2,3,5,11,6,4,7,6,4,14,8,15,12,9,4] sched: [6:0.50] -; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermps (%rdi), %zmm2, %zmm0 {%k1} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -3326,7 +3326,7 @@ define <16 x float> @test_masked_z_16xfloat_perm_mem_mask1(<16 x float>* %vp, <1 ; GENERIC-LABEL: test_masked_z_16xfloat_perm_mem_mask1: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovaps {{.*#+}} zmm1 = [4,2,3,5,11,6,4,7,6,4,14,8,15,12,9,4] sched: [6:0.50] -; GENERIC-NEXT: vptestnmd %zmm0, %zmm0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm0, %zmm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermps (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -3347,7 +3347,7 @@ define <16 x float> @test_masked_16xfloat_perm_mem_mask2(<16 x float>* %vp, <16 ; GENERIC-LABEL: test_masked_16xfloat_perm_mem_mask2: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovaps {{.*#+}} zmm2 = [10,7,11,6,7,0,11,0,10,9,12,4,10,3,8,5] sched: [6:0.50] -; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermps (%rdi), %zmm2, %zmm0 {%k1} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -3368,7 +3368,7 @@ define <16 x float> @test_masked_z_16xfloat_perm_mem_mask2(<16 x float>* %vp, <1 ; GENERIC-LABEL: test_masked_z_16xfloat_perm_mem_mask2: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovaps {{.*#+}} zmm1 = [10,7,11,6,7,0,11,0,10,9,12,4,10,3,8,5] sched: [6:0.50] -; GENERIC-NEXT: vptestnmd %zmm0, %zmm0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm0, %zmm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermps (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -3405,7 +3405,7 @@ define <16 x float> @test_masked_16xfloat_perm_mem_mask3(<16 x float>* %vp, <16 ; GENERIC-LABEL: test_masked_16xfloat_perm_mem_mask3: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovaps {{.*#+}} zmm2 = [15,15,3,9,5,15,14,9,11,10,5,14,14,5,11,0] sched: [6:0.50] -; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermps (%rdi), %zmm2, %zmm0 {%k1} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -3426,7 +3426,7 @@ define <16 x float> @test_masked_z_16xfloat_perm_mem_mask3(<16 x float>* %vp, <1 ; GENERIC-LABEL: test_masked_z_16xfloat_perm_mem_mask3: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovaps {{.*#+}} zmm1 = [15,15,3,9,5,15,14,9,11,10,5,14,14,5,11,0] sched: [6:0.50] -; GENERIC-NEXT: vptestnmd %zmm0, %zmm0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm0, %zmm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermps (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -3459,7 +3459,7 @@ define <4 x double> @test_4xdouble_perm_mask0(<4 x double> %vec) { define <4 x double> @test_masked_4xdouble_perm_mask0(<4 x double> %vec, <4 x double> %vec2, <4 x i64> %mask) { ; GENERIC-LABEL: test_masked_4xdouble_perm_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermpd {{.*#+}} ymm1 {%k1} = ymm0[2,1,3,2] sched: [1:1.00] ; GENERIC-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -3479,7 +3479,7 @@ define <4 x double> @test_masked_4xdouble_perm_mask0(<4 x double> %vec, <4 x dou define <4 x double> @test_masked_z_4xdouble_perm_mask0(<4 x double> %vec, <4 x i64> %mask) { ; GENERIC-LABEL: test_masked_z_4xdouble_perm_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermpd {{.*#+}} ymm0 {%k1} {z} = ymm0[2,1,3,2] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -3496,7 +3496,7 @@ define <4 x double> @test_masked_z_4xdouble_perm_mask0(<4 x double> %vec, <4 x i define <4 x double> @test_masked_4xdouble_perm_mask1(<4 x double> %vec, <4 x double> %vec2, <4 x i64> %mask) { ; GENERIC-LABEL: test_masked_4xdouble_perm_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermpd {{.*#+}} ymm1 {%k1} = ymm0[3,0,0,0] sched: [1:1.00] ; GENERIC-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -3516,7 +3516,7 @@ define <4 x double> @test_masked_4xdouble_perm_mask1(<4 x double> %vec, <4 x dou define <4 x double> @test_masked_z_4xdouble_perm_mask1(<4 x double> %vec, <4 x i64> %mask) { ; GENERIC-LABEL: test_masked_z_4xdouble_perm_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermpd {{.*#+}} ymm0 {%k1} {z} = ymm0[3,0,0,0] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -3533,7 +3533,7 @@ define <4 x double> @test_masked_z_4xdouble_perm_mask1(<4 x double> %vec, <4 x i define <4 x double> @test_masked_4xdouble_perm_mask2(<4 x double> %vec, <4 x double> %vec2, <4 x i64> %mask) { ; GENERIC-LABEL: test_masked_4xdouble_perm_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermpd {{.*#+}} ymm1 {%k1} = ymm0[0,3,3,1] sched: [1:1.00] ; GENERIC-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -3553,7 +3553,7 @@ define <4 x double> @test_masked_4xdouble_perm_mask2(<4 x double> %vec, <4 x dou define <4 x double> @test_masked_z_4xdouble_perm_mask2(<4 x double> %vec, <4 x i64> %mask) { ; GENERIC-LABEL: test_masked_z_4xdouble_perm_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0,3,3,1] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -3583,7 +3583,7 @@ define <4 x double> @test_4xdouble_perm_mask3(<4 x double> %vec) { define <4 x double> @test_masked_4xdouble_perm_mask3(<4 x double> %vec, <4 x double> %vec2, <4 x i64> %mask) { ; GENERIC-LABEL: test_masked_4xdouble_perm_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermpd {{.*#+}} ymm1 {%k1} = ymm0[3,3,3,2] sched: [1:1.00] ; GENERIC-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -3603,7 +3603,7 @@ define <4 x double> @test_masked_4xdouble_perm_mask3(<4 x double> %vec, <4 x dou define <4 x double> @test_masked_z_4xdouble_perm_mask3(<4 x double> %vec, <4 x i64> %mask) { ; GENERIC-LABEL: test_masked_z_4xdouble_perm_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermpd {{.*#+}} ymm0 {%k1} {z} = ymm0[3,3,3,2] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -3634,7 +3634,7 @@ define <4 x double> @test_4xdouble_perm_mem_mask0(<4 x double>* %vp) { define <4 x double> @test_masked_4xdouble_perm_mem_mask0(<4 x double>* %vp, <4 x double> %vec2, <4 x i64> %mask) { ; GENERIC-LABEL: test_masked_4xdouble_perm_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermpd {{.*#+}} ymm0 {%k1} = mem[0,0,2,0] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -3653,7 +3653,7 @@ define <4 x double> @test_masked_4xdouble_perm_mem_mask0(<4 x double>* %vp, <4 x define <4 x double> @test_masked_z_4xdouble_perm_mem_mask0(<4 x double>* %vp, <4 x i64> %mask) { ; GENERIC-LABEL: test_masked_z_4xdouble_perm_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm0, %ymm0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm0, %ymm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermpd {{.*#+}} ymm0 {%k1} {z} = mem[0,0,2,0] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -3672,7 +3672,7 @@ define <4 x double> @test_masked_z_4xdouble_perm_mem_mask0(<4 x double>* %vp, <4 define <4 x double> @test_masked_4xdouble_perm_mem_mask1(<4 x double>* %vp, <4 x double> %vec2, <4 x i64> %mask) { ; GENERIC-LABEL: test_masked_4xdouble_perm_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermpd {{.*#+}} ymm0 {%k1} = mem[0,2,3,2] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -3691,7 +3691,7 @@ define <4 x double> @test_masked_4xdouble_perm_mem_mask1(<4 x double>* %vp, <4 x define <4 x double> @test_masked_z_4xdouble_perm_mem_mask1(<4 x double>* %vp, <4 x i64> %mask) { ; GENERIC-LABEL: test_masked_z_4xdouble_perm_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm0, %ymm0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm0, %ymm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermpd {{.*#+}} ymm0 {%k1} {z} = mem[0,2,3,2] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -3710,7 +3710,7 @@ define <4 x double> @test_masked_z_4xdouble_perm_mem_mask1(<4 x double>* %vp, <4 define <4 x double> @test_masked_4xdouble_perm_mem_mask2(<4 x double>* %vp, <4 x double> %vec2, <4 x i64> %mask) { ; GENERIC-LABEL: test_masked_4xdouble_perm_mem_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermpd {{.*#+}} ymm0 {%k1} = mem[3,1,1,1] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -3729,7 +3729,7 @@ define <4 x double> @test_masked_4xdouble_perm_mem_mask2(<4 x double>* %vp, <4 x define <4 x double> @test_masked_z_4xdouble_perm_mem_mask2(<4 x double>* %vp, <4 x i64> %mask) { ; GENERIC-LABEL: test_masked_z_4xdouble_perm_mem_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm0, %ymm0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm0, %ymm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermpd {{.*#+}} ymm0 {%k1} {z} = mem[3,1,1,1] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -3762,7 +3762,7 @@ define <4 x double> @test_4xdouble_perm_mem_mask3(<4 x double>* %vp) { define <4 x double> @test_masked_4xdouble_perm_mem_mask3(<4 x double>* %vp, <4 x double> %vec2, <4 x i64> %mask) { ; GENERIC-LABEL: test_masked_4xdouble_perm_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermpd {{.*#+}} ymm0 {%k1} = mem[3,2,3,2] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -3781,7 +3781,7 @@ define <4 x double> @test_masked_4xdouble_perm_mem_mask3(<4 x double>* %vp, <4 x define <4 x double> @test_masked_z_4xdouble_perm_mem_mask3(<4 x double>* %vp, <4 x i64> %mask) { ; GENERIC-LABEL: test_masked_z_4xdouble_perm_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm0, %ymm0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm0, %ymm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermpd {{.*#+}} ymm0 {%k1} {z} = mem[3,2,3,2] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -3816,7 +3816,7 @@ define <8 x double> @test_masked_8xdouble_perm_mask0(<8 x double> %vec, <8 x dou ; GENERIC-LABEL: test_masked_8xdouble_perm_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovapd {{.*#+}} zmm3 = [5,7,4,2,7,4,3,4] sched: [6:0.50] -; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermpd %zmm0, %zmm3, %zmm1 {%k1} # sched: [1:1.00] ; GENERIC-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -3838,7 +3838,7 @@ define <8 x double> @test_masked_z_8xdouble_perm_mask0(<8 x double> %vec, <8 x i ; GENERIC-LABEL: test_masked_z_8xdouble_perm_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovapd {{.*#+}} zmm2 = [5,7,4,2,7,4,3,4] sched: [6:0.50] -; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermpd %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -3856,7 +3856,7 @@ define <8 x double> @test_masked_z_8xdouble_perm_mask0(<8 x double> %vec, <8 x i define <8 x double> @test_masked_8xdouble_perm_imm_mask1(<8 x double> %vec, <8 x double> %vec2, <8 x i64> %mask) { ; GENERIC-LABEL: test_masked_8xdouble_perm_imm_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermpd {{.*#+}} zmm1 {%k1} = zmm0[3,0,0,2,7,4,4,6] sched: [1:1.00] ; GENERIC-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -3876,7 +3876,7 @@ define <8 x double> @test_masked_8xdouble_perm_imm_mask1(<8 x double> %vec, <8 x define <8 x double> @test_masked_z_8xdouble_perm_imm_mask1(<8 x double> %vec, <8 x i64> %mask) { ; GENERIC-LABEL: test_masked_z_8xdouble_perm_imm_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermpd {{.*#+}} zmm0 {%k1} {z} = zmm0[3,0,0,2,7,4,4,6] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -3894,7 +3894,7 @@ define <8 x double> @test_masked_8xdouble_perm_mask2(<8 x double> %vec, <8 x dou ; GENERIC-LABEL: test_masked_8xdouble_perm_mask2: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovapd {{.*#+}} zmm3 = [7,5,5,5,3,5,1,7] sched: [6:0.50] -; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermpd %zmm0, %zmm3, %zmm1 {%k1} # sched: [1:1.00] ; GENERIC-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -3916,7 +3916,7 @@ define <8 x double> @test_masked_z_8xdouble_perm_mask2(<8 x double> %vec, <8 x i ; GENERIC-LABEL: test_masked_z_8xdouble_perm_mask2: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovapd {{.*#+}} zmm2 = [7,5,5,5,3,5,1,7] sched: [6:0.50] -; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermpd %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -3947,7 +3947,7 @@ define <8 x double> @test_8xdouble_perm_imm_mask3(<8 x double> %vec) { define <8 x double> @test_masked_8xdouble_perm_imm_mask3(<8 x double> %vec, <8 x double> %vec2, <8 x i64> %mask) { ; GENERIC-LABEL: test_masked_8xdouble_perm_imm_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermpd {{.*#+}} zmm1 {%k1} = zmm0[1,3,3,0,5,7,7,4] sched: [1:1.00] ; GENERIC-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -3967,7 +3967,7 @@ define <8 x double> @test_masked_8xdouble_perm_imm_mask3(<8 x double> %vec, <8 x define <8 x double> @test_masked_z_8xdouble_perm_imm_mask3(<8 x double> %vec, <8 x i64> %mask) { ; GENERIC-LABEL: test_masked_z_8xdouble_perm_imm_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1,3,3,0,5,7,7,4] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -3985,7 +3985,7 @@ define <8 x double> @test_masked_8xdouble_perm_mask4(<8 x double> %vec, <8 x dou ; GENERIC-LABEL: test_masked_8xdouble_perm_mask4: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovapd {{.*#+}} zmm3 = [3,5,3,4,6,5,7,1] sched: [6:0.50] -; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermpd %zmm0, %zmm3, %zmm1 {%k1} # sched: [1:1.00] ; GENERIC-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -4007,7 +4007,7 @@ define <8 x double> @test_masked_z_8xdouble_perm_mask4(<8 x double> %vec, <8 x i ; GENERIC-LABEL: test_masked_z_8xdouble_perm_mask4: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovapd {{.*#+}} zmm2 = [3,5,3,4,6,5,7,1] sched: [6:0.50] -; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermpd %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -4025,7 +4025,7 @@ define <8 x double> @test_masked_z_8xdouble_perm_mask4(<8 x double> %vec, <8 x i define <8 x double> @test_masked_8xdouble_perm_imm_mask5(<8 x double> %vec, <8 x double> %vec2, <8 x i64> %mask) { ; GENERIC-LABEL: test_masked_8xdouble_perm_imm_mask5: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermpd {{.*#+}} zmm1 {%k1} = zmm0[3,3,2,3,7,7,6,7] sched: [1:1.00] ; GENERIC-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -4045,7 +4045,7 @@ define <8 x double> @test_masked_8xdouble_perm_imm_mask5(<8 x double> %vec, <8 x define <8 x double> @test_masked_z_8xdouble_perm_imm_mask5(<8 x double> %vec, <8 x i64> %mask) { ; GENERIC-LABEL: test_masked_z_8xdouble_perm_imm_mask5: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermpd {{.*#+}} zmm0 {%k1} {z} = zmm0[3,3,2,3,7,7,6,7] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -4078,7 +4078,7 @@ define <8 x double> @test_masked_8xdouble_perm_mask6(<8 x double> %vec, <8 x dou ; GENERIC-LABEL: test_masked_8xdouble_perm_mask6: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovapd {{.*#+}} zmm3 = [2,7,6,4,0,0,0,2] sched: [6:0.50] -; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermpd %zmm0, %zmm3, %zmm1 {%k1} # sched: [1:1.00] ; GENERIC-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -4100,7 +4100,7 @@ define <8 x double> @test_masked_z_8xdouble_perm_mask6(<8 x double> %vec, <8 x i ; GENERIC-LABEL: test_masked_z_8xdouble_perm_mask6: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovapd {{.*#+}} zmm2 = [2,7,6,4,0,0,0,2] sched: [6:0.50] -; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermpd %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -4118,7 +4118,7 @@ define <8 x double> @test_masked_z_8xdouble_perm_mask6(<8 x double> %vec, <8 x i define <8 x double> @test_masked_8xdouble_perm_imm_mask7(<8 x double> %vec, <8 x double> %vec2, <8 x i64> %mask) { ; GENERIC-LABEL: test_masked_8xdouble_perm_imm_mask7: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermpd {{.*#+}} zmm1 {%k1} = zmm0[3,1,3,2,7,5,7,6] sched: [1:1.00] ; GENERIC-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -4138,7 +4138,7 @@ define <8 x double> @test_masked_8xdouble_perm_imm_mask7(<8 x double> %vec, <8 x define <8 x double> @test_masked_z_8xdouble_perm_imm_mask7(<8 x double> %vec, <8 x i64> %mask) { ; GENERIC-LABEL: test_masked_z_8xdouble_perm_imm_mask7: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermpd {{.*#+}} zmm0 {%k1} {z} = zmm0[3,1,3,2,7,5,7,6] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -4172,7 +4172,7 @@ define <8 x double> @test_masked_8xdouble_perm_mem_mask0(<8 x double>* %vp, <8 x ; GENERIC-LABEL: test_masked_8xdouble_perm_mem_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovapd {{.*#+}} zmm2 = [0,3,4,0,4,2,0,1] sched: [6:0.50] -; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermpd (%rdi), %zmm2, %zmm0 {%k1} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -4193,7 +4193,7 @@ define <8 x double> @test_masked_z_8xdouble_perm_mem_mask0(<8 x double>* %vp, <8 ; GENERIC-LABEL: test_masked_z_8xdouble_perm_mem_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovapd {{.*#+}} zmm1 = [0,3,4,0,4,2,0,1] sched: [6:0.50] -; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermpd (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -4213,7 +4213,7 @@ define <8 x double> @test_masked_z_8xdouble_perm_mem_mask0(<8 x double>* %vp, <8 define <8 x double> @test_masked_8xdouble_perm_imm_mem_mask1(<8 x double>* %vp, <8 x double> %vec2, <8 x i64> %mask) { ; GENERIC-LABEL: test_masked_8xdouble_perm_imm_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermpd {{.*#+}} zmm0 {%k1} = mem[0,2,0,3,4,6,4,7] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -4232,7 +4232,7 @@ define <8 x double> @test_masked_8xdouble_perm_imm_mem_mask1(<8 x double>* %vp, define <8 x double> @test_masked_z_8xdouble_perm_imm_mem_mask1(<8 x double>* %vp, <8 x i64> %mask) { ; GENERIC-LABEL: test_masked_z_8xdouble_perm_imm_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermpd {{.*#+}} zmm0 {%k1} {z} = mem[0,2,0,3,4,6,4,7] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -4252,7 +4252,7 @@ define <8 x double> @test_masked_8xdouble_perm_mem_mask2(<8 x double>* %vp, <8 x ; GENERIC-LABEL: test_masked_8xdouble_perm_mem_mask2: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovapd {{.*#+}} zmm2 = [6,7,2,7,7,6,2,5] sched: [6:0.50] -; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermpd (%rdi), %zmm2, %zmm0 {%k1} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -4273,7 +4273,7 @@ define <8 x double> @test_masked_z_8xdouble_perm_mem_mask2(<8 x double>* %vp, <8 ; GENERIC-LABEL: test_masked_z_8xdouble_perm_mem_mask2: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovapd {{.*#+}} zmm1 = [6,7,2,7,7,6,2,5] sched: [6:0.50] -; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermpd (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -4307,7 +4307,7 @@ define <8 x double> @test_8xdouble_perm_imm_mem_mask3(<8 x double>* %vp) { define <8 x double> @test_masked_8xdouble_perm_imm_mem_mask3(<8 x double>* %vp, <8 x double> %vec2, <8 x i64> %mask) { ; GENERIC-LABEL: test_masked_8xdouble_perm_imm_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermpd {{.*#+}} zmm0 {%k1} = mem[2,1,1,0,6,5,5,4] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -4326,7 +4326,7 @@ define <8 x double> @test_masked_8xdouble_perm_imm_mem_mask3(<8 x double>* %vp, define <8 x double> @test_masked_z_8xdouble_perm_imm_mem_mask3(<8 x double>* %vp, <8 x i64> %mask) { ; GENERIC-LABEL: test_masked_z_8xdouble_perm_imm_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermpd {{.*#+}} zmm0 {%k1} {z} = mem[2,1,1,0,6,5,5,4] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -4346,7 +4346,7 @@ define <8 x double> @test_masked_8xdouble_perm_mem_mask4(<8 x double>* %vp, <8 x ; GENERIC-LABEL: test_masked_8xdouble_perm_mem_mask4: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovapd {{.*#+}} zmm2 = [1,1,3,5,6,0,6,0] sched: [6:0.50] -; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermpd (%rdi), %zmm2, %zmm0 {%k1} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -4367,7 +4367,7 @@ define <8 x double> @test_masked_z_8xdouble_perm_mem_mask4(<8 x double>* %vp, <8 ; GENERIC-LABEL: test_masked_z_8xdouble_perm_mem_mask4: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovapd {{.*#+}} zmm1 = [1,1,3,5,6,0,6,0] sched: [6:0.50] -; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermpd (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -4387,7 +4387,7 @@ define <8 x double> @test_masked_z_8xdouble_perm_mem_mask4(<8 x double>* %vp, <8 define <8 x double> @test_masked_8xdouble_perm_imm_mem_mask5(<8 x double>* %vp, <8 x double> %vec2, <8 x i64> %mask) { ; GENERIC-LABEL: test_masked_8xdouble_perm_imm_mem_mask5: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermpd {{.*#+}} zmm0 {%k1} = mem[2,2,2,3,6,6,6,7] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -4406,7 +4406,7 @@ define <8 x double> @test_masked_8xdouble_perm_imm_mem_mask5(<8 x double>* %vp, define <8 x double> @test_masked_z_8xdouble_perm_imm_mem_mask5(<8 x double>* %vp, <8 x i64> %mask) { ; GENERIC-LABEL: test_masked_z_8xdouble_perm_imm_mem_mask5: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermpd {{.*#+}} zmm0 {%k1} {z} = mem[2,2,2,3,6,6,6,7] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -4442,7 +4442,7 @@ define <8 x double> @test_masked_8xdouble_perm_mem_mask6(<8 x double>* %vp, <8 x ; GENERIC-LABEL: test_masked_8xdouble_perm_mem_mask6: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovapd {{.*#+}} zmm2 = [2,4,0,4,6,1,2,5] sched: [6:0.50] -; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermpd (%rdi), %zmm2, %zmm0 {%k1} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -4463,7 +4463,7 @@ define <8 x double> @test_masked_z_8xdouble_perm_mem_mask6(<8 x double>* %vp, <8 ; GENERIC-LABEL: test_masked_z_8xdouble_perm_mem_mask6: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovapd {{.*#+}} zmm1 = [2,4,0,4,6,1,2,5] sched: [6:0.50] -; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermpd (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -4483,7 +4483,7 @@ define <8 x double> @test_masked_z_8xdouble_perm_mem_mask6(<8 x double>* %vp, <8 define <8 x double> @test_masked_8xdouble_perm_imm_mem_mask7(<8 x double>* %vp, <8 x double> %vec2, <8 x i64> %mask) { ; GENERIC-LABEL: test_masked_8xdouble_perm_imm_mem_mask7: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermpd {{.*#+}} zmm0 {%k1} = mem[0,3,2,0,4,7,6,4] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -4502,7 +4502,7 @@ define <8 x double> @test_masked_8xdouble_perm_imm_mem_mask7(<8 x double>* %vp, define <8 x double> @test_masked_z_8xdouble_perm_imm_mem_mask7(<8 x double>* %vp, <8 x i64> %mask) { ; GENERIC-LABEL: test_masked_z_8xdouble_perm_imm_mem_mask7: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermpd {{.*#+}} zmm0 {%k1} {z} = mem[0,3,2,0,4,7,6,4] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -4534,7 +4534,7 @@ define <16 x i8> @test_16xi8_perm_mask0(<16 x i8> %vec) { define <16 x i8> @test_masked_16xi8_perm_mask0(<16 x i8> %vec, <16 x i8> %vec2, <16 x i8> %mask) { ; GENERIC-LABEL: test_masked_16xi8_perm_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmb %xmm2, %xmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmb %xmm2, %xmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufb {{.*#+}} xmm1 {%k1} = xmm0[8,6,12,4,7,9,14,8,4,12,9,4,14,15,12,14] sched: [6:0.50] ; GENERIC-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -4554,7 +4554,7 @@ define <16 x i8> @test_masked_16xi8_perm_mask0(<16 x i8> %vec, <16 x i8> %vec2, define <16 x i8> @test_masked_z_16xi8_perm_mask0(<16 x i8> %vec, <16 x i8> %mask) { ; GENERIC-LABEL: test_masked_z_16xi8_perm_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmb %xmm1, %xmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmb %xmm1, %xmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm0[8,6,12,4,7,9,14,8,4,12,9,4,14,15,12,14] sched: [6:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -4571,7 +4571,7 @@ define <16 x i8> @test_masked_z_16xi8_perm_mask0(<16 x i8> %vec, <16 x i8> %mask define <16 x i8> @test_masked_16xi8_perm_mask1(<16 x i8> %vec, <16 x i8> %vec2, <16 x i8> %mask) { ; GENERIC-LABEL: test_masked_16xi8_perm_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmb %xmm2, %xmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmb %xmm2, %xmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufb {{.*#+}} xmm1 {%k1} = xmm0[4,11,14,10,7,1,6,9,14,15,7,13,4,12,8,0] sched: [6:0.50] ; GENERIC-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -4591,7 +4591,7 @@ define <16 x i8> @test_masked_16xi8_perm_mask1(<16 x i8> %vec, <16 x i8> %vec2, define <16 x i8> @test_masked_z_16xi8_perm_mask1(<16 x i8> %vec, <16 x i8> %mask) { ; GENERIC-LABEL: test_masked_z_16xi8_perm_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmb %xmm1, %xmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmb %xmm1, %xmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm0[4,11,14,10,7,1,6,9,14,15,7,13,4,12,8,0] sched: [6:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -4608,7 +4608,7 @@ define <16 x i8> @test_masked_z_16xi8_perm_mask1(<16 x i8> %vec, <16 x i8> %mask define <16 x i8> @test_masked_16xi8_perm_mask2(<16 x i8> %vec, <16 x i8> %vec2, <16 x i8> %mask) { ; GENERIC-LABEL: test_masked_16xi8_perm_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmb %xmm2, %xmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmb %xmm2, %xmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufb {{.*#+}} xmm1 {%k1} = xmm0[11,6,13,10,0,7,13,3,5,13,3,9,3,15,12,7] sched: [6:0.50] ; GENERIC-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -4628,7 +4628,7 @@ define <16 x i8> @test_masked_16xi8_perm_mask2(<16 x i8> %vec, <16 x i8> %vec2, define <16 x i8> @test_masked_z_16xi8_perm_mask2(<16 x i8> %vec, <16 x i8> %mask) { ; GENERIC-LABEL: test_masked_z_16xi8_perm_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmb %xmm1, %xmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmb %xmm1, %xmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm0[11,6,13,10,0,7,13,3,5,13,3,9,3,15,12,7] sched: [6:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -4658,7 +4658,7 @@ define <16 x i8> @test_16xi8_perm_mask3(<16 x i8> %vec) { define <16 x i8> @test_masked_16xi8_perm_mask3(<16 x i8> %vec, <16 x i8> %vec2, <16 x i8> %mask) { ; GENERIC-LABEL: test_masked_16xi8_perm_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmb %xmm2, %xmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmb %xmm2, %xmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufb {{.*#+}} xmm1 {%k1} = xmm0[1,5,8,14,1,8,11,8,13,8,15,9,9,7,9,6] sched: [6:0.50] ; GENERIC-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -4678,7 +4678,7 @@ define <16 x i8> @test_masked_16xi8_perm_mask3(<16 x i8> %vec, <16 x i8> %vec2, define <16 x i8> @test_masked_z_16xi8_perm_mask3(<16 x i8> %vec, <16 x i8> %mask) { ; GENERIC-LABEL: test_masked_z_16xi8_perm_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmb %xmm1, %xmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmb %xmm1, %xmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm0[1,5,8,14,1,8,11,8,13,8,15,9,9,7,9,6] sched: [6:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -4712,7 +4712,7 @@ define <16 x i8> @test_masked_16xi8_perm_mem_mask0(<16 x i8>* %vp, <16 x i8> %ve ; GENERIC-LABEL: test_masked_16xi8_perm_mem_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa (%rdi), %xmm2 # sched: [6:0.50] -; GENERIC-NEXT: vptestnmb %xmm1, %xmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmb %xmm1, %xmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufb {{.*#+}} xmm0 {%k1} = xmm2[9,10,7,1,12,14,14,13,14,14,8,6,11,4,12,13] sched: [6:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -4733,7 +4733,7 @@ define <16 x i8> @test_masked_z_16xi8_perm_mem_mask0(<16 x i8>* %vp, <16 x i8> % ; GENERIC-LABEL: test_masked_z_16xi8_perm_mem_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa (%rdi), %xmm1 # sched: [6:0.50] -; GENERIC-NEXT: vptestnmb %xmm0, %xmm0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmb %xmm0, %xmm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm1[9,10,7,1,12,14,14,13,14,14,8,6,11,4,12,13] sched: [6:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -4754,7 +4754,7 @@ define <16 x i8> @test_masked_16xi8_perm_mem_mask1(<16 x i8>* %vp, <16 x i8> %ve ; GENERIC-LABEL: test_masked_16xi8_perm_mem_mask1: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa (%rdi), %xmm2 # sched: [6:0.50] -; GENERIC-NEXT: vptestnmb %xmm1, %xmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmb %xmm1, %xmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufb {{.*#+}} xmm0 {%k1} = xmm2[14,9,15,9,7,10,15,14,12,1,9,7,10,13,3,11] sched: [6:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -4775,7 +4775,7 @@ define <16 x i8> @test_masked_z_16xi8_perm_mem_mask1(<16 x i8>* %vp, <16 x i8> % ; GENERIC-LABEL: test_masked_z_16xi8_perm_mem_mask1: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa (%rdi), %xmm1 # sched: [6:0.50] -; GENERIC-NEXT: vptestnmb %xmm0, %xmm0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmb %xmm0, %xmm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm1[14,9,15,9,7,10,15,14,12,1,9,7,10,13,3,11] sched: [6:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -4796,7 +4796,7 @@ define <16 x i8> @test_masked_16xi8_perm_mem_mask2(<16 x i8>* %vp, <16 x i8> %ve ; GENERIC-LABEL: test_masked_16xi8_perm_mem_mask2: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa (%rdi), %xmm2 # sched: [6:0.50] -; GENERIC-NEXT: vptestnmb %xmm1, %xmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmb %xmm1, %xmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufb {{.*#+}} xmm0 {%k1} = xmm2[1,3,12,5,13,1,2,11,0,9,14,8,10,0,10,9] sched: [6:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -4817,7 +4817,7 @@ define <16 x i8> @test_masked_z_16xi8_perm_mem_mask2(<16 x i8>* %vp, <16 x i8> % ; GENERIC-LABEL: test_masked_z_16xi8_perm_mem_mask2: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa (%rdi), %xmm1 # sched: [6:0.50] -; GENERIC-NEXT: vptestnmb %xmm0, %xmm0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmb %xmm0, %xmm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm1[1,3,12,5,13,1,2,11,0,9,14,8,10,0,10,9] sched: [6:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -4854,7 +4854,7 @@ define <16 x i8> @test_masked_16xi8_perm_mem_mask3(<16 x i8>* %vp, <16 x i8> %ve ; GENERIC-LABEL: test_masked_16xi8_perm_mem_mask3: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa (%rdi), %xmm2 # sched: [6:0.50] -; GENERIC-NEXT: vptestnmb %xmm1, %xmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmb %xmm1, %xmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufb {{.*#+}} xmm0 {%k1} = xmm2[9,6,5,15,0,0,15,2,1,3,12,14,0,6,1,4] sched: [6:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -4875,7 +4875,7 @@ define <16 x i8> @test_masked_z_16xi8_perm_mem_mask3(<16 x i8>* %vp, <16 x i8> % ; GENERIC-LABEL: test_masked_z_16xi8_perm_mem_mask3: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa (%rdi), %xmm1 # sched: [6:0.50] -; GENERIC-NEXT: vptestnmb %xmm0, %xmm0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmb %xmm0, %xmm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm1[9,6,5,15,0,0,15,2,1,3,12,14,0,6,1,4] sched: [6:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -4908,7 +4908,7 @@ define <32 x i8> @test_32xi8_perm_mask0(<32 x i8> %vec) { define <32 x i8> @test_masked_32xi8_perm_mask0(<32 x i8> %vec, <32 x i8> %vec2, <32 x i8> %mask) { ; GENERIC-LABEL: test_masked_32xi8_perm_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmb %ymm2, %ymm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmb %ymm2, %ymm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufb {{.*#+}} ymm1 {%k1} = ymm0[8,0,1,15,3,5,11,13,14,2,10,15,0,10,13,5,20,25,23,18,23,22,25,24,20,21,29,20,24,16,27,21] sched: [6:0.50] ; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -4928,7 +4928,7 @@ define <32 x i8> @test_masked_32xi8_perm_mask0(<32 x i8> %vec, <32 x i8> %vec2, define <32 x i8> @test_masked_z_32xi8_perm_mask0(<32 x i8> %vec, <32 x i8> %mask) { ; GENERIC-LABEL: test_masked_z_32xi8_perm_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmb %ymm1, %ymm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmb %ymm1, %ymm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm0[8,0,1,15,3,5,11,13,14,2,10,15,0,10,13,5,20,25,23,18,23,22,25,24,20,21,29,20,24,16,27,21] sched: [6:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -4945,7 +4945,7 @@ define <32 x i8> @test_masked_z_32xi8_perm_mask0(<32 x i8> %vec, <32 x i8> %mask define <32 x i8> @test_masked_32xi8_perm_mask1(<32 x i8> %vec, <32 x i8> %vec2, <32 x i8> %mask) { ; GENERIC-LABEL: test_masked_32xi8_perm_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmb %ymm2, %ymm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmb %ymm2, %ymm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufb {{.*#+}} ymm1 {%k1} = ymm0[0,4,3,15,5,4,5,15,10,9,11,6,6,10,0,3,21,19,26,22,30,25,22,22,27,22,26,16,23,20,18,24] sched: [6:0.50] ; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -4965,7 +4965,7 @@ define <32 x i8> @test_masked_32xi8_perm_mask1(<32 x i8> %vec, <32 x i8> %vec2, define <32 x i8> @test_masked_z_32xi8_perm_mask1(<32 x i8> %vec, <32 x i8> %mask) { ; GENERIC-LABEL: test_masked_z_32xi8_perm_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmb %ymm1, %ymm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmb %ymm1, %ymm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm0[0,4,3,15,5,4,5,15,10,9,11,6,6,10,0,3,21,19,26,22,30,25,22,22,27,22,26,16,23,20,18,24] sched: [6:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -4982,7 +4982,7 @@ define <32 x i8> @test_masked_z_32xi8_perm_mask1(<32 x i8> %vec, <32 x i8> %mask define <32 x i8> @test_masked_32xi8_perm_mask2(<32 x i8> %vec, <32 x i8> %vec2, <32 x i8> %mask) { ; GENERIC-LABEL: test_masked_32xi8_perm_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmb %ymm2, %ymm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmb %ymm2, %ymm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufb {{.*#+}} ymm1 {%k1} = ymm0[7,8,12,14,7,4,7,12,14,12,3,15,10,1,11,15,22,26,21,19,27,16,29,24,17,17,26,29,20,31,17,29] sched: [6:0.50] ; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -5002,7 +5002,7 @@ define <32 x i8> @test_masked_32xi8_perm_mask2(<32 x i8> %vec, <32 x i8> %vec2, define <32 x i8> @test_masked_z_32xi8_perm_mask2(<32 x i8> %vec, <32 x i8> %mask) { ; GENERIC-LABEL: test_masked_z_32xi8_perm_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmb %ymm1, %ymm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmb %ymm1, %ymm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm0[7,8,12,14,7,4,7,12,14,12,3,15,10,1,11,15,22,26,21,19,27,16,29,24,17,17,26,29,20,31,17,29] sched: [6:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -5032,7 +5032,7 @@ define <32 x i8> @test_32xi8_perm_mask3(<32 x i8> %vec) { define <32 x i8> @test_masked_32xi8_perm_mask3(<32 x i8> %vec, <32 x i8> %vec2, <32 x i8> %mask) { ; GENERIC-LABEL: test_masked_32xi8_perm_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmb %ymm2, %ymm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmb %ymm2, %ymm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufb {{.*#+}} ymm1 {%k1} = ymm0[6,1,4,7,12,13,2,8,10,5,13,4,0,0,10,8,31,31,30,16,27,27,26,27,30,26,21,24,19,25,16,18] sched: [6:0.50] ; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -5052,7 +5052,7 @@ define <32 x i8> @test_masked_32xi8_perm_mask3(<32 x i8> %vec, <32 x i8> %vec2, define <32 x i8> @test_masked_z_32xi8_perm_mask3(<32 x i8> %vec, <32 x i8> %mask) { ; GENERIC-LABEL: test_masked_z_32xi8_perm_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmb %ymm1, %ymm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmb %ymm1, %ymm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm0[6,1,4,7,12,13,2,8,10,5,13,4,0,0,10,8,31,31,30,16,27,27,26,27,30,26,21,24,19,25,16,18] sched: [6:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -5086,7 +5086,7 @@ define <32 x i8> @test_masked_32xi8_perm_mem_mask0(<32 x i8>* %vp, <32 x i8> %ve ; GENERIC-LABEL: test_masked_32xi8_perm_mem_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa (%rdi), %ymm2 # sched: [7:0.50] -; GENERIC-NEXT: vptestnmb %ymm1, %ymm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmb %ymm1, %ymm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufb {{.*#+}} ymm0 {%k1} = ymm2[9,0,2,15,4,6,8,4,7,3,0,2,8,1,6,5,22,17,30,23,29,31,21,23,27,22,20,27,30,30,26,22] sched: [6:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -5107,7 +5107,7 @@ define <32 x i8> @test_masked_z_32xi8_perm_mem_mask0(<32 x i8>* %vp, <32 x i8> % ; GENERIC-LABEL: test_masked_z_32xi8_perm_mem_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa (%rdi), %ymm1 # sched: [7:0.50] -; GENERIC-NEXT: vptestnmb %ymm0, %ymm0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmb %ymm0, %ymm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm1[9,0,2,15,4,6,8,4,7,3,0,2,8,1,6,5,22,17,30,23,29,31,21,23,27,22,20,27,30,30,26,22] sched: [6:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -5128,7 +5128,7 @@ define <32 x i8> @test_masked_32xi8_perm_mem_mask1(<32 x i8>* %vp, <32 x i8> %ve ; GENERIC-LABEL: test_masked_32xi8_perm_mem_mask1: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa (%rdi), %ymm2 # sched: [7:0.50] -; GENERIC-NEXT: vptestnmb %ymm1, %ymm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmb %ymm1, %ymm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufb {{.*#+}} ymm0 {%k1} = ymm2[15,10,1,1,11,0,0,6,8,7,7,9,10,6,5,15,20,28,22,21,17,29,27,30,23,26,17,22,19,16,31,19] sched: [6:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -5149,7 +5149,7 @@ define <32 x i8> @test_masked_z_32xi8_perm_mem_mask1(<32 x i8>* %vp, <32 x i8> % ; GENERIC-LABEL: test_masked_z_32xi8_perm_mem_mask1: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa (%rdi), %ymm1 # sched: [7:0.50] -; GENERIC-NEXT: vptestnmb %ymm0, %ymm0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmb %ymm0, %ymm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm1[15,10,1,1,11,0,0,6,8,7,7,9,10,6,5,15,20,28,22,21,17,29,27,30,23,26,17,22,19,16,31,19] sched: [6:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -5170,7 +5170,7 @@ define <32 x i8> @test_masked_32xi8_perm_mem_mask2(<32 x i8>* %vp, <32 x i8> %ve ; GENERIC-LABEL: test_masked_32xi8_perm_mem_mask2: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa (%rdi), %ymm2 # sched: [7:0.50] -; GENERIC-NEXT: vptestnmb %ymm1, %ymm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmb %ymm1, %ymm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufb {{.*#+}} ymm0 {%k1} = ymm2[2,3,6,8,2,15,15,2,6,10,14,7,14,5,7,7,26,19,25,19,21,31,30,29,16,18,20,28,29,25,27,28] sched: [6:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -5191,7 +5191,7 @@ define <32 x i8> @test_masked_z_32xi8_perm_mem_mask2(<32 x i8>* %vp, <32 x i8> % ; GENERIC-LABEL: test_masked_z_32xi8_perm_mem_mask2: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa (%rdi), %ymm1 # sched: [7:0.50] -; GENERIC-NEXT: vptestnmb %ymm0, %ymm0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmb %ymm0, %ymm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm1[2,3,6,8,2,15,15,2,6,10,14,7,14,5,7,7,26,19,25,19,21,31,30,29,16,18,20,28,29,25,27,28] sched: [6:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -5228,7 +5228,7 @@ define <32 x i8> @test_masked_32xi8_perm_mem_mask3(<32 x i8>* %vp, <32 x i8> %ve ; GENERIC-LABEL: test_masked_32xi8_perm_mem_mask3: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa (%rdi), %ymm2 # sched: [7:0.50] -; GENERIC-NEXT: vptestnmb %ymm1, %ymm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmb %ymm1, %ymm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufb {{.*#+}} ymm0 {%k1} = ymm2[1,1,13,0,3,0,0,13,5,2,2,10,15,8,14,8,25,26,28,28,31,27,30,19,24,25,29,23,28,22,25,29] sched: [6:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -5249,7 +5249,7 @@ define <32 x i8> @test_masked_z_32xi8_perm_mem_mask3(<32 x i8>* %vp, <32 x i8> % ; GENERIC-LABEL: test_masked_z_32xi8_perm_mem_mask3: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa (%rdi), %ymm1 # sched: [7:0.50] -; GENERIC-NEXT: vptestnmb %ymm0, %ymm0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmb %ymm0, %ymm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm1[1,1,13,0,3,0,0,13,5,2,2,10,15,8,14,8,25,26,28,28,31,27,30,19,24,25,29,23,28,22,25,29] sched: [6:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -5282,7 +5282,7 @@ define <64 x i8> @test_64xi8_perm_mask0(<64 x i8> %vec) { define <64 x i8> @test_masked_64xi8_perm_mask0(<64 x i8> %vec, <64 x i8> %vec2, <64 x i8> %mask) { ; GENERIC-LABEL: test_masked_64xi8_perm_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmb %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmb %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufb {{.*#+}} zmm1 {%k1} = zmm0[8,4,1,13,15,4,6,12,0,10,2,4,13,0,0,6,23,29,27,26,18,31,22,25,22,16,23,18,16,25,26,17,40,37,38,44,39,46,41,39,42,37,33,42,41,44,34,46,60,62,61,58,60,56,60,51,60,55,60,55,60,49,48,62] sched: [6:0.50] ; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -5302,7 +5302,7 @@ define <64 x i8> @test_masked_64xi8_perm_mask0(<64 x i8> %vec, <64 x i8> %vec2, define <64 x i8> @test_masked_z_64xi8_perm_mask0(<64 x i8> %vec, <64 x i8> %mask) { ; GENERIC-LABEL: test_masked_z_64xi8_perm_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmb %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmb %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm0[8,4,1,13,15,4,6,12,0,10,2,4,13,0,0,6,23,29,27,26,18,31,22,25,22,16,23,18,16,25,26,17,40,37,38,44,39,46,41,39,42,37,33,42,41,44,34,46,60,62,61,58,60,56,60,51,60,55,60,55,60,49,48,62] sched: [6:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -5319,7 +5319,7 @@ define <64 x i8> @test_masked_z_64xi8_perm_mask0(<64 x i8> %vec, <64 x i8> %mask define <64 x i8> @test_masked_64xi8_perm_mask1(<64 x i8> %vec, <64 x i8> %vec2, <64 x i8> %mask) { ; GENERIC-LABEL: test_masked_64xi8_perm_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmb %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmb %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufb {{.*#+}} zmm1 {%k1} = zmm0[7,14,15,10,9,3,1,13,14,12,11,6,4,1,6,9,30,30,22,17,28,27,16,23,26,16,30,31,27,17,17,21,32,37,32,47,45,33,46,35,35,42,47,33,32,37,32,41,61,50,49,53,63,50,63,53,55,52,62,63,58,50,63,49] sched: [6:0.50] ; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -5339,7 +5339,7 @@ define <64 x i8> @test_masked_64xi8_perm_mask1(<64 x i8> %vec, <64 x i8> %vec2, define <64 x i8> @test_masked_z_64xi8_perm_mask1(<64 x i8> %vec, <64 x i8> %mask) { ; GENERIC-LABEL: test_masked_z_64xi8_perm_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmb %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmb %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm0[7,14,15,10,9,3,1,13,14,12,11,6,4,1,6,9,30,30,22,17,28,27,16,23,26,16,30,31,27,17,17,21,32,37,32,47,45,33,46,35,35,42,47,33,32,37,32,41,61,50,49,53,63,50,63,53,55,52,62,63,58,50,63,49] sched: [6:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -5356,7 +5356,7 @@ define <64 x i8> @test_masked_z_64xi8_perm_mask1(<64 x i8> %vec, <64 x i8> %mask define <64 x i8> @test_masked_64xi8_perm_mask2(<64 x i8> %vec, <64 x i8> %vec2, <64 x i8> %mask) { ; GENERIC-LABEL: test_masked_64xi8_perm_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmb %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmb %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufb {{.*#+}} zmm1 {%k1} = zmm0[9,2,14,15,12,5,3,12,4,6,0,2,0,1,1,6,24,27,18,22,26,17,23,21,31,16,22,22,27,21,19,20,39,47,44,36,40,43,44,39,38,44,38,35,39,46,34,39,58,55,51,48,59,57,48,52,60,58,56,50,59,55,58,60] sched: [6:0.50] ; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -5376,7 +5376,7 @@ define <64 x i8> @test_masked_64xi8_perm_mask2(<64 x i8> %vec, <64 x i8> %vec2, define <64 x i8> @test_masked_z_64xi8_perm_mask2(<64 x i8> %vec, <64 x i8> %mask) { ; GENERIC-LABEL: test_masked_z_64xi8_perm_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmb %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmb %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm0[9,2,14,15,12,5,3,12,4,6,0,2,0,1,1,6,24,27,18,22,26,17,23,21,31,16,22,22,27,21,19,20,39,47,44,36,40,43,44,39,38,44,38,35,39,46,34,39,58,55,51,48,59,57,48,52,60,58,56,50,59,55,58,60] sched: [6:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -5406,7 +5406,7 @@ define <64 x i8> @test_64xi8_perm_mask3(<64 x i8> %vec) { define <64 x i8> @test_masked_64xi8_perm_mask3(<64 x i8> %vec, <64 x i8> %vec2, <64 x i8> %mask) { ; GENERIC-LABEL: test_masked_64xi8_perm_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmb %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmb %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufb {{.*#+}} zmm1 {%k1} = zmm0[3,12,4,15,1,14,0,4,8,9,6,1,4,4,12,14,25,16,28,20,21,24,19,30,18,22,20,24,25,26,24,22,42,38,44,44,36,37,42,34,43,38,41,34,42,37,39,38,55,59,53,58,48,52,59,48,57,48,55,62,48,56,49,61] sched: [6:0.50] ; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -5426,7 +5426,7 @@ define <64 x i8> @test_masked_64xi8_perm_mask3(<64 x i8> %vec, <64 x i8> %vec2, define <64 x i8> @test_masked_z_64xi8_perm_mask3(<64 x i8> %vec, <64 x i8> %mask) { ; GENERIC-LABEL: test_masked_z_64xi8_perm_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmb %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmb %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm0[3,12,4,15,1,14,0,4,8,9,6,1,4,4,12,14,25,16,28,20,21,24,19,30,18,22,20,24,25,26,24,22,42,38,44,44,36,37,42,34,43,38,41,34,42,37,39,38,55,59,53,58,48,52,59,48,57,48,55,62,48,56,49,61] sched: [6:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -5460,7 +5460,7 @@ define <64 x i8> @test_masked_64xi8_perm_mem_mask0(<64 x i8>* %vp, <64 x i8> %ve ; GENERIC-LABEL: test_masked_64xi8_perm_mem_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 (%rdi), %zmm2 # sched: [6:0.50] -; GENERIC-NEXT: vptestnmb %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmb %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 {%k1} = zmm2[0,9,15,13,11,11,3,12,4,1,7,5,2,6,14,6,23,27,24,18,30,23,28,22,28,22,19,19,31,25,16,22,35,33,34,32,42,34,41,41,43,40,36,46,37,39,42,40,63,63,62,62,57,55,59,51,52,48,50,48,58,50,60,58] sched: [6:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -5481,7 +5481,7 @@ define <64 x i8> @test_masked_z_64xi8_perm_mem_mask0(<64 x i8>* %vp, <64 x i8> % ; GENERIC-LABEL: test_masked_z_64xi8_perm_mem_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 (%rdi), %zmm1 # sched: [6:0.50] -; GENERIC-NEXT: vptestnmb %zmm0, %zmm0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmb %zmm0, %zmm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm1[0,9,15,13,11,11,3,12,4,1,7,5,2,6,14,6,23,27,24,18,30,23,28,22,28,22,19,19,31,25,16,22,35,33,34,32,42,34,41,41,43,40,36,46,37,39,42,40,63,63,62,62,57,55,59,51,52,48,50,48,58,50,60,58] sched: [6:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -5502,7 +5502,7 @@ define <64 x i8> @test_masked_64xi8_perm_mem_mask1(<64 x i8>* %vp, <64 x i8> %ve ; GENERIC-LABEL: test_masked_64xi8_perm_mem_mask1: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 (%rdi), %zmm2 # sched: [6:0.50] -; GENERIC-NEXT: vptestnmb %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmb %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 {%k1} = zmm2[15,6,14,7,5,1,14,12,5,7,5,0,0,5,3,8,19,19,26,27,20,29,20,21,27,16,30,17,23,27,16,28,47,39,33,33,33,44,38,46,39,33,38,44,45,32,34,39,50,61,62,53,54,56,52,56,51,52,55,57,56,52,51,49] sched: [6:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -5523,7 +5523,7 @@ define <64 x i8> @test_masked_z_64xi8_perm_mem_mask1(<64 x i8>* %vp, <64 x i8> % ; GENERIC-LABEL: test_masked_z_64xi8_perm_mem_mask1: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 (%rdi), %zmm1 # sched: [6:0.50] -; GENERIC-NEXT: vptestnmb %zmm0, %zmm0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmb %zmm0, %zmm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm1[15,6,14,7,5,1,14,12,5,7,5,0,0,5,3,8,19,19,26,27,20,29,20,21,27,16,30,17,23,27,16,28,47,39,33,33,33,44,38,46,39,33,38,44,45,32,34,39,50,61,62,53,54,56,52,56,51,52,55,57,56,52,51,49] sched: [6:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -5544,7 +5544,7 @@ define <64 x i8> @test_masked_64xi8_perm_mem_mask2(<64 x i8>* %vp, <64 x i8> %ve ; GENERIC-LABEL: test_masked_64xi8_perm_mem_mask2: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 (%rdi), %zmm2 # sched: [6:0.50] -; GENERIC-NEXT: vptestnmb %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmb %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 {%k1} = zmm2[12,1,11,3,4,11,10,11,8,13,1,10,1,11,5,10,27,26,19,29,19,24,26,19,26,20,18,28,24,21,25,16,34,38,47,40,33,44,44,44,41,43,35,43,45,44,37,41,58,62,49,61,56,53,55,48,51,58,58,55,63,55,53,61] sched: [6:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -5565,7 +5565,7 @@ define <64 x i8> @test_masked_z_64xi8_perm_mem_mask2(<64 x i8>* %vp, <64 x i8> % ; GENERIC-LABEL: test_masked_z_64xi8_perm_mem_mask2: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 (%rdi), %zmm1 # sched: [6:0.50] -; GENERIC-NEXT: vptestnmb %zmm0, %zmm0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmb %zmm0, %zmm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm1[12,1,11,3,4,11,10,11,8,13,1,10,1,11,5,10,27,26,19,29,19,24,26,19,26,20,18,28,24,21,25,16,34,38,47,40,33,44,44,44,41,43,35,43,45,44,37,41,58,62,49,61,56,53,55,48,51,58,58,55,63,55,53,61] sched: [6:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -5602,7 +5602,7 @@ define <64 x i8> @test_masked_64xi8_perm_mem_mask3(<64 x i8>* %vp, <64 x i8> %ve ; GENERIC-LABEL: test_masked_64xi8_perm_mem_mask3: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 (%rdi), %zmm2 # sched: [6:0.50] -; GENERIC-NEXT: vptestnmb %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmb %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 {%k1} = zmm2[4,9,11,13,12,6,0,0,11,15,5,7,11,10,4,10,20,21,24,27,18,16,26,16,16,19,26,17,16,31,22,30,35,38,37,34,37,47,43,38,38,36,40,43,42,39,32,46,54,54,48,50,61,56,59,50,53,61,61,51,48,60,50,60] sched: [6:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -5623,7 +5623,7 @@ define <64 x i8> @test_masked_z_64xi8_perm_mem_mask3(<64 x i8>* %vp, <64 x i8> % ; GENERIC-LABEL: test_masked_z_64xi8_perm_mem_mask3: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 (%rdi), %zmm1 # sched: [6:0.50] -; GENERIC-NEXT: vptestnmb %zmm0, %zmm0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmb %zmm0, %zmm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm1[4,9,11,13,12,6,0,0,11,15,5,7,11,10,4,10,20,21,24,27,18,16,26,16,16,19,26,17,16,31,22,30,35,38,37,34,37,47,43,38,38,36,40,43,42,39,32,46,54,54,48,50,61,56,59,50,53,61,61,51,48,60,50,60] sched: [6:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -5656,7 +5656,7 @@ define <8 x i16> @test_8xi16_perm_high_mask0(<8 x i16> %vec) { define <8 x i16> @test_masked_8xi16_perm_high_mask0(<8 x i16> %vec, <8 x i16> %vec2, <8 x i16> %mask) { ; GENERIC-LABEL: test_masked_8xi16_perm_high_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %xmm2, %xmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %xmm2, %xmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufhw {{.*#+}} xmm1 {%k1} = xmm0[0,1,2,3,6,5,7,6] sched: [1:1.00] ; GENERIC-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -5676,7 +5676,7 @@ define <8 x i16> @test_masked_8xi16_perm_high_mask0(<8 x i16> %vec, <8 x i16> %v define <8 x i16> @test_masked_z_8xi16_perm_high_mask0(<8 x i16> %vec, <8 x i16> %mask) { ; GENERIC-LABEL: test_masked_z_8xi16_perm_high_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} {z} = xmm0[0,1,2,3,6,5,7,6] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -5693,7 +5693,7 @@ define <8 x i16> @test_masked_z_8xi16_perm_high_mask0(<8 x i16> %vec, <8 x i16> define <8 x i16> @test_masked_8xi16_perm_low_mask1(<8 x i16> %vec, <8 x i16> %vec2, <8 x i16> %mask) { ; GENERIC-LABEL: test_masked_8xi16_perm_low_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %xmm2, %xmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %xmm2, %xmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshuflw {{.*#+}} xmm1 {%k1} = xmm0[0,3,0,0,4,5,6,7] sched: [1:1.00] ; GENERIC-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -5713,7 +5713,7 @@ define <8 x i16> @test_masked_8xi16_perm_low_mask1(<8 x i16> %vec, <8 x i16> %ve define <8 x i16> @test_masked_z_8xi16_perm_low_mask1(<8 x i16> %vec, <8 x i16> %mask) { ; GENERIC-LABEL: test_masked_z_8xi16_perm_low_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} {z} = xmm0[0,3,0,0,4,5,6,7] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -5730,7 +5730,7 @@ define <8 x i16> @test_masked_z_8xi16_perm_low_mask1(<8 x i16> %vec, <8 x i16> % define <8 x i16> @test_masked_8xi16_perm_high_mask2(<8 x i16> %vec, <8 x i16> %vec2, <8 x i16> %mask) { ; GENERIC-LABEL: test_masked_8xi16_perm_high_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %xmm2, %xmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %xmm2, %xmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufhw {{.*#+}} xmm1 {%k1} = xmm0[0,1,2,3,5,4,4,5] sched: [1:1.00] ; GENERIC-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -5750,7 +5750,7 @@ define <8 x i16> @test_masked_8xi16_perm_high_mask2(<8 x i16> %vec, <8 x i16> %v define <8 x i16> @test_masked_z_8xi16_perm_high_mask2(<8 x i16> %vec, <8 x i16> %mask) { ; GENERIC-LABEL: test_masked_z_8xi16_perm_high_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} {z} = xmm0[0,1,2,3,5,4,4,5] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -5780,7 +5780,7 @@ define <8 x i16> @test_8xi16_perm_low_mask3(<8 x i16> %vec) { define <8 x i16> @test_masked_8xi16_perm_low_mask3(<8 x i16> %vec, <8 x i16> %vec2, <8 x i16> %mask) { ; GENERIC-LABEL: test_masked_8xi16_perm_low_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %xmm2, %xmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %xmm2, %xmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshuflw {{.*#+}} xmm1 {%k1} = xmm0[2,1,1,1,4,5,6,7] sched: [1:1.00] ; GENERIC-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -5800,7 +5800,7 @@ define <8 x i16> @test_masked_8xi16_perm_low_mask3(<8 x i16> %vec, <8 x i16> %ve define <8 x i16> @test_masked_z_8xi16_perm_low_mask3(<8 x i16> %vec, <8 x i16> %mask) { ; GENERIC-LABEL: test_masked_z_8xi16_perm_low_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} {z} = xmm0[2,1,1,1,4,5,6,7] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -5817,7 +5817,7 @@ define <8 x i16> @test_masked_z_8xi16_perm_low_mask3(<8 x i16> %vec, <8 x i16> % define <8 x i16> @test_masked_8xi16_perm_high_mask4(<8 x i16> %vec, <8 x i16> %vec2, <8 x i16> %mask) { ; GENERIC-LABEL: test_masked_8xi16_perm_high_mask4: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %xmm2, %xmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %xmm2, %xmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufhw {{.*#+}} xmm1 {%k1} = xmm0[0,1,2,3,5,5,7,6] sched: [1:1.00] ; GENERIC-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -5837,7 +5837,7 @@ define <8 x i16> @test_masked_8xi16_perm_high_mask4(<8 x i16> %vec, <8 x i16> %v define <8 x i16> @test_masked_z_8xi16_perm_high_mask4(<8 x i16> %vec, <8 x i16> %mask) { ; GENERIC-LABEL: test_masked_z_8xi16_perm_high_mask4: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} {z} = xmm0[0,1,2,3,5,5,7,6] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -5854,7 +5854,7 @@ define <8 x i16> @test_masked_z_8xi16_perm_high_mask4(<8 x i16> %vec, <8 x i16> define <8 x i16> @test_masked_8xi16_perm_low_mask5(<8 x i16> %vec, <8 x i16> %vec2, <8 x i16> %mask) { ; GENERIC-LABEL: test_masked_8xi16_perm_low_mask5: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %xmm2, %xmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %xmm2, %xmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshuflw {{.*#+}} xmm1 {%k1} = xmm0[3,3,2,1,4,5,6,7] sched: [1:1.00] ; GENERIC-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -5874,7 +5874,7 @@ define <8 x i16> @test_masked_8xi16_perm_low_mask5(<8 x i16> %vec, <8 x i16> %ve define <8 x i16> @test_masked_z_8xi16_perm_low_mask5(<8 x i16> %vec, <8 x i16> %mask) { ; GENERIC-LABEL: test_masked_z_8xi16_perm_low_mask5: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} {z} = xmm0[3,3,2,1,4,5,6,7] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -5904,7 +5904,7 @@ define <8 x i16> @test_8xi16_perm_high_mask6(<8 x i16> %vec) { define <8 x i16> @test_masked_8xi16_perm_high_mask6(<8 x i16> %vec, <8 x i16> %vec2, <8 x i16> %mask) { ; GENERIC-LABEL: test_masked_8xi16_perm_high_mask6: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %xmm2, %xmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %xmm2, %xmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufhw {{.*#+}} xmm1 {%k1} = xmm0[0,1,2,3,6,5,6,5] sched: [1:1.00] ; GENERIC-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -5924,7 +5924,7 @@ define <8 x i16> @test_masked_8xi16_perm_high_mask6(<8 x i16> %vec, <8 x i16> %v define <8 x i16> @test_masked_z_8xi16_perm_high_mask6(<8 x i16> %vec, <8 x i16> %mask) { ; GENERIC-LABEL: test_masked_z_8xi16_perm_high_mask6: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} {z} = xmm0[0,1,2,3,6,5,6,5] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -5941,7 +5941,7 @@ define <8 x i16> @test_masked_z_8xi16_perm_high_mask6(<8 x i16> %vec, <8 x i16> define <8 x i16> @test_masked_8xi16_perm_low_mask7(<8 x i16> %vec, <8 x i16> %vec2, <8 x i16> %mask) { ; GENERIC-LABEL: test_masked_8xi16_perm_low_mask7: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %xmm2, %xmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %xmm2, %xmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshuflw {{.*#+}} xmm1 {%k1} = xmm0[1,0,2,0,4,5,6,7] sched: [1:1.00] ; GENERIC-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -5961,7 +5961,7 @@ define <8 x i16> @test_masked_8xi16_perm_low_mask7(<8 x i16> %vec, <8 x i16> %ve define <8 x i16> @test_masked_z_8xi16_perm_low_mask7(<8 x i16> %vec, <8 x i16> %mask) { ; GENERIC-LABEL: test_masked_z_8xi16_perm_low_mask7: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} {z} = xmm0[1,0,2,0,4,5,6,7] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -5992,7 +5992,7 @@ define <8 x i16> @test_8xi16_perm_high_mem_mask0(<8 x i16>* %vp) { define <8 x i16> @test_masked_8xi16_perm_high_mem_mask0(<8 x i16>* %vp, <8 x i16> %vec2, <8 x i16> %mask) { ; GENERIC-LABEL: test_masked_8xi16_perm_high_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} = mem[0,1,2,3,7,7,4,6] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -6011,7 +6011,7 @@ define <8 x i16> @test_masked_8xi16_perm_high_mem_mask0(<8 x i16>* %vp, <8 x i16 define <8 x i16> @test_masked_z_8xi16_perm_high_mem_mask0(<8 x i16>* %vp, <8 x i16> %mask) { ; GENERIC-LABEL: test_masked_z_8xi16_perm_high_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %xmm0, %xmm0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %xmm0, %xmm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} {z} = mem[0,1,2,3,7,7,4,6] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -6030,7 +6030,7 @@ define <8 x i16> @test_masked_z_8xi16_perm_high_mem_mask0(<8 x i16>* %vp, <8 x i define <8 x i16> @test_masked_8xi16_perm_low_mem_mask1(<8 x i16>* %vp, <8 x i16> %vec2, <8 x i16> %mask) { ; GENERIC-LABEL: test_masked_8xi16_perm_low_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} = mem[1,3,3,2,4,5,6,7] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -6049,7 +6049,7 @@ define <8 x i16> @test_masked_8xi16_perm_low_mem_mask1(<8 x i16>* %vp, <8 x i16> define <8 x i16> @test_masked_z_8xi16_perm_low_mem_mask1(<8 x i16>* %vp, <8 x i16> %mask) { ; GENERIC-LABEL: test_masked_z_8xi16_perm_low_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %xmm0, %xmm0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %xmm0, %xmm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} {z} = mem[1,3,3,2,4,5,6,7] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -6068,7 +6068,7 @@ define <8 x i16> @test_masked_z_8xi16_perm_low_mem_mask1(<8 x i16>* %vp, <8 x i1 define <8 x i16> @test_masked_8xi16_perm_high_mem_mask2(<8 x i16>* %vp, <8 x i16> %vec2, <8 x i16> %mask) { ; GENERIC-LABEL: test_masked_8xi16_perm_high_mem_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} = mem[0,1,2,3,6,6,5,7] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -6087,7 +6087,7 @@ define <8 x i16> @test_masked_8xi16_perm_high_mem_mask2(<8 x i16>* %vp, <8 x i16 define <8 x i16> @test_masked_z_8xi16_perm_high_mem_mask2(<8 x i16>* %vp, <8 x i16> %mask) { ; GENERIC-LABEL: test_masked_z_8xi16_perm_high_mem_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %xmm0, %xmm0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %xmm0, %xmm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} {z} = mem[0,1,2,3,6,6,5,7] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -6120,7 +6120,7 @@ define <8 x i16> @test_8xi16_perm_low_mem_mask3(<8 x i16>* %vp) { define <8 x i16> @test_masked_8xi16_perm_low_mem_mask3(<8 x i16>* %vp, <8 x i16> %vec2, <8 x i16> %mask) { ; GENERIC-LABEL: test_masked_8xi16_perm_low_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} = mem[3,1,2,0,4,5,6,7] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -6139,7 +6139,7 @@ define <8 x i16> @test_masked_8xi16_perm_low_mem_mask3(<8 x i16>* %vp, <8 x i16> define <8 x i16> @test_masked_z_8xi16_perm_low_mem_mask3(<8 x i16>* %vp, <8 x i16> %mask) { ; GENERIC-LABEL: test_masked_z_8xi16_perm_low_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %xmm0, %xmm0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %xmm0, %xmm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} {z} = mem[3,1,2,0,4,5,6,7] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -6158,7 +6158,7 @@ define <8 x i16> @test_masked_z_8xi16_perm_low_mem_mask3(<8 x i16>* %vp, <8 x i1 define <8 x i16> @test_masked_8xi16_perm_high_mem_mask4(<8 x i16>* %vp, <8 x i16> %vec2, <8 x i16> %mask) { ; GENERIC-LABEL: test_masked_8xi16_perm_high_mem_mask4: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} = mem[0,1,2,3,7,6,7,5] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -6177,7 +6177,7 @@ define <8 x i16> @test_masked_8xi16_perm_high_mem_mask4(<8 x i16>* %vp, <8 x i16 define <8 x i16> @test_masked_z_8xi16_perm_high_mem_mask4(<8 x i16>* %vp, <8 x i16> %mask) { ; GENERIC-LABEL: test_masked_z_8xi16_perm_high_mem_mask4: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %xmm0, %xmm0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %xmm0, %xmm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} {z} = mem[0,1,2,3,7,6,7,5] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -6196,7 +6196,7 @@ define <8 x i16> @test_masked_z_8xi16_perm_high_mem_mask4(<8 x i16>* %vp, <8 x i define <8 x i16> @test_masked_8xi16_perm_low_mem_mask5(<8 x i16>* %vp, <8 x i16> %vec2, <8 x i16> %mask) { ; GENERIC-LABEL: test_masked_8xi16_perm_low_mem_mask5: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} = mem[2,1,3,2,4,5,6,7] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -6215,7 +6215,7 @@ define <8 x i16> @test_masked_8xi16_perm_low_mem_mask5(<8 x i16>* %vp, <8 x i16> define <8 x i16> @test_masked_z_8xi16_perm_low_mem_mask5(<8 x i16>* %vp, <8 x i16> %mask) { ; GENERIC-LABEL: test_masked_z_8xi16_perm_low_mem_mask5: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %xmm0, %xmm0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %xmm0, %xmm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} {z} = mem[2,1,3,2,4,5,6,7] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -6248,7 +6248,7 @@ define <8 x i16> @test_8xi16_perm_high_mem_mask6(<8 x i16>* %vp) { define <8 x i16> @test_masked_8xi16_perm_high_mem_mask6(<8 x i16>* %vp, <8 x i16> %vec2, <8 x i16> %mask) { ; GENERIC-LABEL: test_masked_8xi16_perm_high_mem_mask6: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} = mem[0,1,2,3,7,4,4,4] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -6267,7 +6267,7 @@ define <8 x i16> @test_masked_8xi16_perm_high_mem_mask6(<8 x i16>* %vp, <8 x i16 define <8 x i16> @test_masked_z_8xi16_perm_high_mem_mask6(<8 x i16>* %vp, <8 x i16> %mask) { ; GENERIC-LABEL: test_masked_z_8xi16_perm_high_mem_mask6: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %xmm0, %xmm0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %xmm0, %xmm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} {z} = mem[0,1,2,3,7,4,4,4] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -6286,7 +6286,7 @@ define <8 x i16> @test_masked_z_8xi16_perm_high_mem_mask6(<8 x i16>* %vp, <8 x i define <8 x i16> @test_masked_8xi16_perm_low_mem_mask7(<8 x i16>* %vp, <8 x i16> %vec2, <8 x i16> %mask) { ; GENERIC-LABEL: test_masked_8xi16_perm_low_mem_mask7: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} = mem[0,3,3,1,4,5,6,7] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -6305,7 +6305,7 @@ define <8 x i16> @test_masked_8xi16_perm_low_mem_mask7(<8 x i16>* %vp, <8 x i16> define <8 x i16> @test_masked_z_8xi16_perm_low_mem_mask7(<8 x i16>* %vp, <8 x i16> %mask) { ; GENERIC-LABEL: test_masked_z_8xi16_perm_low_mem_mask7: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %xmm0, %xmm0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %xmm0, %xmm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} {z} = mem[0,3,3,1,4,5,6,7] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -6337,7 +6337,7 @@ define <16 x i16> @test_16xi16_perm_high_mask0(<16 x i16> %vec) { define <16 x i16> @test_masked_16xi16_perm_high_mask0(<16 x i16> %vec, <16 x i16> %vec2, <16 x i16> %mask) { ; GENERIC-LABEL: test_masked_16xi16_perm_high_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %ymm2, %ymm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %ymm2, %ymm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufhw {{.*#+}} ymm1 {%k1} = ymm0[0,1,2,3,4,4,6,4,8,9,10,11,12,12,14,12] sched: [1:1.00] ; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -6357,7 +6357,7 @@ define <16 x i16> @test_masked_16xi16_perm_high_mask0(<16 x i16> %vec, <16 x i16 define <16 x i16> @test_masked_z_16xi16_perm_high_mask0(<16 x i16> %vec, <16 x i16> %mask) { ; GENERIC-LABEL: test_masked_z_16xi16_perm_high_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} {z} = ymm0[0,1,2,3,4,4,6,4,8,9,10,11,12,12,14,12] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -6374,7 +6374,7 @@ define <16 x i16> @test_masked_z_16xi16_perm_high_mask0(<16 x i16> %vec, <16 x i define <16 x i16> @test_masked_16xi16_perm_low_mask1(<16 x i16> %vec, <16 x i16> %vec2, <16 x i16> %mask) { ; GENERIC-LABEL: test_masked_16xi16_perm_low_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %ymm2, %ymm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %ymm2, %ymm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshuflw {{.*#+}} ymm1 {%k1} = ymm0[0,2,3,2,4,5,6,7,8,10,11,10,12,13,14,15] sched: [1:1.00] ; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -6394,7 +6394,7 @@ define <16 x i16> @test_masked_16xi16_perm_low_mask1(<16 x i16> %vec, <16 x i16> define <16 x i16> @test_masked_z_16xi16_perm_low_mask1(<16 x i16> %vec, <16 x i16> %mask) { ; GENERIC-LABEL: test_masked_z_16xi16_perm_low_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} {z} = ymm0[0,2,3,2,4,5,6,7,8,10,11,10,12,13,14,15] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -6411,7 +6411,7 @@ define <16 x i16> @test_masked_z_16xi16_perm_low_mask1(<16 x i16> %vec, <16 x i1 define <16 x i16> @test_masked_16xi16_perm_high_mask2(<16 x i16> %vec, <16 x i16> %vec2, <16 x i16> %mask) { ; GENERIC-LABEL: test_masked_16xi16_perm_high_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %ymm2, %ymm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %ymm2, %ymm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufhw {{.*#+}} ymm1 {%k1} = ymm0[0,1,2,3,7,5,5,5,8,9,10,11,15,13,13,13] sched: [1:1.00] ; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -6431,7 +6431,7 @@ define <16 x i16> @test_masked_16xi16_perm_high_mask2(<16 x i16> %vec, <16 x i16 define <16 x i16> @test_masked_z_16xi16_perm_high_mask2(<16 x i16> %vec, <16 x i16> %mask) { ; GENERIC-LABEL: test_masked_z_16xi16_perm_high_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} {z} = ymm0[0,1,2,3,7,5,5,5,8,9,10,11,15,13,13,13] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -6461,7 +6461,7 @@ define <16 x i16> @test_16xi16_perm_low_mask3(<16 x i16> %vec) { define <16 x i16> @test_masked_16xi16_perm_low_mask3(<16 x i16> %vec, <16 x i16> %vec2, <16 x i16> %mask) { ; GENERIC-LABEL: test_masked_16xi16_perm_low_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %ymm2, %ymm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %ymm2, %ymm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshuflw {{.*#+}} ymm1 {%k1} = ymm0[3,2,3,2,4,5,6,7,11,10,11,10,12,13,14,15] sched: [1:1.00] ; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -6481,7 +6481,7 @@ define <16 x i16> @test_masked_16xi16_perm_low_mask3(<16 x i16> %vec, <16 x i16> define <16 x i16> @test_masked_z_16xi16_perm_low_mask3(<16 x i16> %vec, <16 x i16> %mask) { ; GENERIC-LABEL: test_masked_z_16xi16_perm_low_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} {z} = ymm0[3,2,3,2,4,5,6,7,11,10,11,10,12,13,14,15] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -6498,7 +6498,7 @@ define <16 x i16> @test_masked_z_16xi16_perm_low_mask3(<16 x i16> %vec, <16 x i1 define <16 x i16> @test_masked_16xi16_perm_high_mask4(<16 x i16> %vec, <16 x i16> %vec2, <16 x i16> %mask) { ; GENERIC-LABEL: test_masked_16xi16_perm_high_mask4: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %ymm2, %ymm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %ymm2, %ymm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufhw {{.*#+}} ymm1 {%k1} = ymm0[0,1,2,3,6,7,4,7,8,9,10,11,14,15,12,15] sched: [1:1.00] ; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -6518,7 +6518,7 @@ define <16 x i16> @test_masked_16xi16_perm_high_mask4(<16 x i16> %vec, <16 x i16 define <16 x i16> @test_masked_z_16xi16_perm_high_mask4(<16 x i16> %vec, <16 x i16> %mask) { ; GENERIC-LABEL: test_masked_z_16xi16_perm_high_mask4: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} {z} = ymm0[0,1,2,3,6,7,4,7,8,9,10,11,14,15,12,15] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -6535,7 +6535,7 @@ define <16 x i16> @test_masked_z_16xi16_perm_high_mask4(<16 x i16> %vec, <16 x i define <16 x i16> @test_masked_16xi16_perm_low_mask5(<16 x i16> %vec, <16 x i16> %vec2, <16 x i16> %mask) { ; GENERIC-LABEL: test_masked_16xi16_perm_low_mask5: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %ymm2, %ymm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %ymm2, %ymm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshuflw {{.*#+}} ymm1 {%k1} = ymm0[3,3,3,0,4,5,6,7,11,11,11,8,12,13,14,15] sched: [1:1.00] ; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -6555,7 +6555,7 @@ define <16 x i16> @test_masked_16xi16_perm_low_mask5(<16 x i16> %vec, <16 x i16> define <16 x i16> @test_masked_z_16xi16_perm_low_mask5(<16 x i16> %vec, <16 x i16> %mask) { ; GENERIC-LABEL: test_masked_z_16xi16_perm_low_mask5: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} {z} = ymm0[3,3,3,0,4,5,6,7,11,11,11,8,12,13,14,15] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -6585,7 +6585,7 @@ define <16 x i16> @test_16xi16_perm_high_mask6(<16 x i16> %vec) { define <16 x i16> @test_masked_16xi16_perm_high_mask6(<16 x i16> %vec, <16 x i16> %vec2, <16 x i16> %mask) { ; GENERIC-LABEL: test_masked_16xi16_perm_high_mask6: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %ymm2, %ymm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %ymm2, %ymm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufhw {{.*#+}} ymm1 {%k1} = ymm0[0,1,2,3,6,7,6,5,8,9,10,11,14,15,14,13] sched: [1:1.00] ; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -6605,7 +6605,7 @@ define <16 x i16> @test_masked_16xi16_perm_high_mask6(<16 x i16> %vec, <16 x i16 define <16 x i16> @test_masked_z_16xi16_perm_high_mask6(<16 x i16> %vec, <16 x i16> %mask) { ; GENERIC-LABEL: test_masked_z_16xi16_perm_high_mask6: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} {z} = ymm0[0,1,2,3,6,7,6,5,8,9,10,11,14,15,14,13] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -6622,7 +6622,7 @@ define <16 x i16> @test_masked_z_16xi16_perm_high_mask6(<16 x i16> %vec, <16 x i define <16 x i16> @test_masked_16xi16_perm_low_mask7(<16 x i16> %vec, <16 x i16> %vec2, <16 x i16> %mask) { ; GENERIC-LABEL: test_masked_16xi16_perm_low_mask7: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %ymm2, %ymm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %ymm2, %ymm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshuflw {{.*#+}} ymm1 {%k1} = ymm0[3,2,1,2,4,5,6,7,11,10,9,10,12,13,14,15] sched: [1:1.00] ; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -6642,7 +6642,7 @@ define <16 x i16> @test_masked_16xi16_perm_low_mask7(<16 x i16> %vec, <16 x i16> define <16 x i16> @test_masked_z_16xi16_perm_low_mask7(<16 x i16> %vec, <16 x i16> %mask) { ; GENERIC-LABEL: test_masked_z_16xi16_perm_low_mask7: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} {z} = ymm0[3,2,1,2,4,5,6,7,11,10,9,10,12,13,14,15] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -6673,7 +6673,7 @@ define <16 x i16> @test_16xi16_perm_high_mem_mask0(<16 x i16>* %vp) { define <16 x i16> @test_masked_16xi16_perm_high_mem_mask0(<16 x i16>* %vp, <16 x i16> %vec2, <16 x i16> %mask) { ; GENERIC-LABEL: test_masked_16xi16_perm_high_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} = mem[0,1,2,3,5,6,4,7,8,9,10,11,13,14,12,15] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -6692,7 +6692,7 @@ define <16 x i16> @test_masked_16xi16_perm_high_mem_mask0(<16 x i16>* %vp, <16 x define <16 x i16> @test_masked_z_16xi16_perm_high_mem_mask0(<16 x i16>* %vp, <16 x i16> %mask) { ; GENERIC-LABEL: test_masked_z_16xi16_perm_high_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %ymm0, %ymm0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %ymm0, %ymm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} {z} = mem[0,1,2,3,5,6,4,7,8,9,10,11,13,14,12,15] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -6711,7 +6711,7 @@ define <16 x i16> @test_masked_z_16xi16_perm_high_mem_mask0(<16 x i16>* %vp, <16 define <16 x i16> @test_masked_16xi16_perm_low_mem_mask1(<16 x i16>* %vp, <16 x i16> %vec2, <16 x i16> %mask) { ; GENERIC-LABEL: test_masked_16xi16_perm_low_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} = mem[1,3,3,0,4,5,6,7,9,11,11,8,12,13,14,15] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -6730,7 +6730,7 @@ define <16 x i16> @test_masked_16xi16_perm_low_mem_mask1(<16 x i16>* %vp, <16 x define <16 x i16> @test_masked_z_16xi16_perm_low_mem_mask1(<16 x i16>* %vp, <16 x i16> %mask) { ; GENERIC-LABEL: test_masked_z_16xi16_perm_low_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %ymm0, %ymm0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %ymm0, %ymm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} {z} = mem[1,3,3,0,4,5,6,7,9,11,11,8,12,13,14,15] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -6749,7 +6749,7 @@ define <16 x i16> @test_masked_z_16xi16_perm_low_mem_mask1(<16 x i16>* %vp, <16 define <16 x i16> @test_masked_16xi16_perm_high_mem_mask2(<16 x i16>* %vp, <16 x i16> %vec2, <16 x i16> %mask) { ; GENERIC-LABEL: test_masked_16xi16_perm_high_mem_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} = mem[0,1,2,3,5,6,5,6,8,9,10,11,13,14,13,14] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -6768,7 +6768,7 @@ define <16 x i16> @test_masked_16xi16_perm_high_mem_mask2(<16 x i16>* %vp, <16 x define <16 x i16> @test_masked_z_16xi16_perm_high_mem_mask2(<16 x i16>* %vp, <16 x i16> %mask) { ; GENERIC-LABEL: test_masked_z_16xi16_perm_high_mem_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %ymm0, %ymm0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %ymm0, %ymm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} {z} = mem[0,1,2,3,5,6,5,6,8,9,10,11,13,14,13,14] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -6801,7 +6801,7 @@ define <16 x i16> @test_16xi16_perm_low_mem_mask3(<16 x i16>* %vp) { define <16 x i16> @test_masked_16xi16_perm_low_mem_mask3(<16 x i16>* %vp, <16 x i16> %vec2, <16 x i16> %mask) { ; GENERIC-LABEL: test_masked_16xi16_perm_low_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} = mem[3,2,3,0,4,5,6,7,11,10,11,8,12,13,14,15] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -6820,7 +6820,7 @@ define <16 x i16> @test_masked_16xi16_perm_low_mem_mask3(<16 x i16>* %vp, <16 x define <16 x i16> @test_masked_z_16xi16_perm_low_mem_mask3(<16 x i16>* %vp, <16 x i16> %mask) { ; GENERIC-LABEL: test_masked_z_16xi16_perm_low_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %ymm0, %ymm0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %ymm0, %ymm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} {z} = mem[3,2,3,0,4,5,6,7,11,10,11,8,12,13,14,15] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -6839,7 +6839,7 @@ define <16 x i16> @test_masked_z_16xi16_perm_low_mem_mask3(<16 x i16>* %vp, <16 define <16 x i16> @test_masked_16xi16_perm_high_mem_mask4(<16 x i16>* %vp, <16 x i16> %vec2, <16 x i16> %mask) { ; GENERIC-LABEL: test_masked_16xi16_perm_high_mem_mask4: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} = mem[0,1,2,3,7,7,6,7,8,9,10,11,15,15,14,15] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -6858,7 +6858,7 @@ define <16 x i16> @test_masked_16xi16_perm_high_mem_mask4(<16 x i16>* %vp, <16 x define <16 x i16> @test_masked_z_16xi16_perm_high_mem_mask4(<16 x i16>* %vp, <16 x i16> %mask) { ; GENERIC-LABEL: test_masked_z_16xi16_perm_high_mem_mask4: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %ymm0, %ymm0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %ymm0, %ymm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} {z} = mem[0,1,2,3,7,7,6,7,8,9,10,11,15,15,14,15] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -6877,7 +6877,7 @@ define <16 x i16> @test_masked_z_16xi16_perm_high_mem_mask4(<16 x i16>* %vp, <16 define <16 x i16> @test_masked_16xi16_perm_low_mem_mask5(<16 x i16>* %vp, <16 x i16> %vec2, <16 x i16> %mask) { ; GENERIC-LABEL: test_masked_16xi16_perm_low_mem_mask5: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} = mem[1,3,3,2,4,5,6,7,9,11,11,10,12,13,14,15] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -6896,7 +6896,7 @@ define <16 x i16> @test_masked_16xi16_perm_low_mem_mask5(<16 x i16>* %vp, <16 x define <16 x i16> @test_masked_z_16xi16_perm_low_mem_mask5(<16 x i16>* %vp, <16 x i16> %mask) { ; GENERIC-LABEL: test_masked_z_16xi16_perm_low_mem_mask5: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %ymm0, %ymm0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %ymm0, %ymm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} {z} = mem[1,3,3,2,4,5,6,7,9,11,11,10,12,13,14,15] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -6929,7 +6929,7 @@ define <16 x i16> @test_16xi16_perm_high_mem_mask6(<16 x i16>* %vp) { define <16 x i16> @test_masked_16xi16_perm_high_mem_mask6(<16 x i16>* %vp, <16 x i16> %vec2, <16 x i16> %mask) { ; GENERIC-LABEL: test_masked_16xi16_perm_high_mem_mask6: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} = mem[0,1,2,3,4,4,4,5,8,9,10,11,12,12,12,13] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -6948,7 +6948,7 @@ define <16 x i16> @test_masked_16xi16_perm_high_mem_mask6(<16 x i16>* %vp, <16 x define <16 x i16> @test_masked_z_16xi16_perm_high_mem_mask6(<16 x i16>* %vp, <16 x i16> %mask) { ; GENERIC-LABEL: test_masked_z_16xi16_perm_high_mem_mask6: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %ymm0, %ymm0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %ymm0, %ymm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} {z} = mem[0,1,2,3,4,4,4,5,8,9,10,11,12,12,12,13] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -6967,7 +6967,7 @@ define <16 x i16> @test_masked_z_16xi16_perm_high_mem_mask6(<16 x i16>* %vp, <16 define <16 x i16> @test_masked_16xi16_perm_low_mem_mask7(<16 x i16>* %vp, <16 x i16> %vec2, <16 x i16> %mask) { ; GENERIC-LABEL: test_masked_16xi16_perm_low_mem_mask7: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} = mem[3,1,3,2,4,5,6,7,11,9,11,10,12,13,14,15] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -6986,7 +6986,7 @@ define <16 x i16> @test_masked_16xi16_perm_low_mem_mask7(<16 x i16>* %vp, <16 x define <16 x i16> @test_masked_z_16xi16_perm_low_mem_mask7(<16 x i16>* %vp, <16 x i16> %mask) { ; GENERIC-LABEL: test_masked_z_16xi16_perm_low_mem_mask7: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %ymm0, %ymm0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %ymm0, %ymm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} {z} = mem[3,1,3,2,4,5,6,7,11,9,11,10,12,13,14,15] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -7018,7 +7018,7 @@ define <32 x i16> @test_32xi16_perm_high_mask0(<32 x i16> %vec) { define <32 x i16> @test_masked_32xi16_perm_high_mask0(<32 x i16> %vec, <32 x i16> %vec2, <32 x i16> %mask) { ; GENERIC-LABEL: test_masked_32xi16_perm_high_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufhw {{.*#+}} zmm1 {%k1} = zmm0[0,1,2,3,4,5,6,4,8,9,10,11,12,13,14,12,16,17,18,19,20,21,22,20,24,25,26,27,28,29,30,28] sched: [1:1.00] ; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -7038,7 +7038,7 @@ define <32 x i16> @test_masked_32xi16_perm_high_mask0(<32 x i16> %vec, <32 x i16 define <32 x i16> @test_masked_z_32xi16_perm_high_mask0(<32 x i16> %vec, <32 x i16> %mask) { ; GENERIC-LABEL: test_masked_z_32xi16_perm_high_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3,4,5,6,4,8,9,10,11,12,13,14,12,16,17,18,19,20,21,22,20,24,25,26,27,28,29,30,28] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -7055,7 +7055,7 @@ define <32 x i16> @test_masked_z_32xi16_perm_high_mask0(<32 x i16> %vec, <32 x i define <32 x i16> @test_masked_32xi16_perm_low_mask1(<32 x i16> %vec, <32 x i16> %vec2, <32 x i16> %mask) { ; GENERIC-LABEL: test_masked_32xi16_perm_low_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshuflw {{.*#+}} zmm1 {%k1} = zmm0[2,1,0,0,4,5,6,7,10,9,8,8,12,13,14,15,18,17,16,16,20,21,22,23,26,25,24,24,28,29,30,31] sched: [1:1.00] ; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -7075,7 +7075,7 @@ define <32 x i16> @test_masked_32xi16_perm_low_mask1(<32 x i16> %vec, <32 x i16> define <32 x i16> @test_masked_z_32xi16_perm_low_mask1(<32 x i16> %vec, <32 x i16> %mask) { ; GENERIC-LABEL: test_masked_z_32xi16_perm_low_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshuflw {{.*#+}} zmm0 {%k1} {z} = zmm0[2,1,0,0,4,5,6,7,10,9,8,8,12,13,14,15,18,17,16,16,20,21,22,23,26,25,24,24,28,29,30,31] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -7092,7 +7092,7 @@ define <32 x i16> @test_masked_z_32xi16_perm_low_mask1(<32 x i16> %vec, <32 x i1 define <32 x i16> @test_masked_32xi16_perm_high_mask2(<32 x i16> %vec, <32 x i16> %vec2, <32 x i16> %mask) { ; GENERIC-LABEL: test_masked_32xi16_perm_high_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufhw {{.*#+}} zmm1 {%k1} = zmm0[0,1,2,3,4,6,4,7,8,9,10,11,12,14,12,15,16,17,18,19,20,22,20,23,24,25,26,27,28,30,28,31] sched: [1:1.00] ; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -7112,7 +7112,7 @@ define <32 x i16> @test_masked_32xi16_perm_high_mask2(<32 x i16> %vec, <32 x i16 define <32 x i16> @test_masked_z_32xi16_perm_high_mask2(<32 x i16> %vec, <32 x i16> %mask) { ; GENERIC-LABEL: test_masked_z_32xi16_perm_high_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3,4,6,4,7,8,9,10,11,12,14,12,15,16,17,18,19,20,22,20,23,24,25,26,27,28,30,28,31] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -7142,7 +7142,7 @@ define <32 x i16> @test_32xi16_perm_low_mask3(<32 x i16> %vec) { define <32 x i16> @test_masked_32xi16_perm_low_mask3(<32 x i16> %vec, <32 x i16> %vec2, <32 x i16> %mask) { ; GENERIC-LABEL: test_masked_32xi16_perm_low_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshuflw {{.*#+}} zmm1 {%k1} = zmm0[3,3,1,3,4,5,6,7,11,11,9,11,12,13,14,15,19,19,17,19,20,21,22,23,27,27,25,27,28,29,30,31] sched: [1:1.00] ; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -7162,7 +7162,7 @@ define <32 x i16> @test_masked_32xi16_perm_low_mask3(<32 x i16> %vec, <32 x i16> define <32 x i16> @test_masked_z_32xi16_perm_low_mask3(<32 x i16> %vec, <32 x i16> %mask) { ; GENERIC-LABEL: test_masked_z_32xi16_perm_low_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshuflw {{.*#+}} zmm0 {%k1} {z} = zmm0[3,3,1,3,4,5,6,7,11,11,9,11,12,13,14,15,19,19,17,19,20,21,22,23,27,27,25,27,28,29,30,31] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -7179,7 +7179,7 @@ define <32 x i16> @test_masked_z_32xi16_perm_low_mask3(<32 x i16> %vec, <32 x i1 define <32 x i16> @test_masked_32xi16_perm_high_mask4(<32 x i16> %vec, <32 x i16> %vec2, <32 x i16> %mask) { ; GENERIC-LABEL: test_masked_32xi16_perm_high_mask4: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufhw {{.*#+}} zmm1 {%k1} = zmm0[0,1,2,3,7,7,5,6,8,9,10,11,15,15,13,14,16,17,18,19,23,23,21,22,24,25,26,27,31,31,29,30] sched: [1:1.00] ; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -7199,7 +7199,7 @@ define <32 x i16> @test_masked_32xi16_perm_high_mask4(<32 x i16> %vec, <32 x i16 define <32 x i16> @test_masked_z_32xi16_perm_high_mask4(<32 x i16> %vec, <32 x i16> %mask) { ; GENERIC-LABEL: test_masked_z_32xi16_perm_high_mask4: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3,7,7,5,6,8,9,10,11,15,15,13,14,16,17,18,19,23,23,21,22,24,25,26,27,31,31,29,30] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -7216,7 +7216,7 @@ define <32 x i16> @test_masked_z_32xi16_perm_high_mask4(<32 x i16> %vec, <32 x i define <32 x i16> @test_masked_32xi16_perm_low_mask5(<32 x i16> %vec, <32 x i16> %vec2, <32 x i16> %mask) { ; GENERIC-LABEL: test_masked_32xi16_perm_low_mask5: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshuflw {{.*#+}} zmm1 {%k1} = zmm0[2,1,1,0,4,5,6,7,10,9,9,8,12,13,14,15,18,17,17,16,20,21,22,23,26,25,25,24,28,29,30,31] sched: [1:1.00] ; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -7236,7 +7236,7 @@ define <32 x i16> @test_masked_32xi16_perm_low_mask5(<32 x i16> %vec, <32 x i16> define <32 x i16> @test_masked_z_32xi16_perm_low_mask5(<32 x i16> %vec, <32 x i16> %mask) { ; GENERIC-LABEL: test_masked_z_32xi16_perm_low_mask5: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshuflw {{.*#+}} zmm0 {%k1} {z} = zmm0[2,1,1,0,4,5,6,7,10,9,9,8,12,13,14,15,18,17,17,16,20,21,22,23,26,25,25,24,28,29,30,31] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -7266,7 +7266,7 @@ define <32 x i16> @test_32xi16_perm_high_mask6(<32 x i16> %vec) { define <32 x i16> @test_masked_32xi16_perm_high_mask6(<32 x i16> %vec, <32 x i16> %vec2, <32 x i16> %mask) { ; GENERIC-LABEL: test_masked_32xi16_perm_high_mask6: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufhw {{.*#+}} zmm1 {%k1} = zmm0[0,1,2,3,4,4,5,6,8,9,10,11,12,12,13,14,16,17,18,19,20,20,21,22,24,25,26,27,28,28,29,30] sched: [1:1.00] ; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -7286,7 +7286,7 @@ define <32 x i16> @test_masked_32xi16_perm_high_mask6(<32 x i16> %vec, <32 x i16 define <32 x i16> @test_masked_z_32xi16_perm_high_mask6(<32 x i16> %vec, <32 x i16> %mask) { ; GENERIC-LABEL: test_masked_z_32xi16_perm_high_mask6: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3,4,4,5,6,8,9,10,11,12,12,13,14,16,17,18,19,20,20,21,22,24,25,26,27,28,28,29,30] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -7303,7 +7303,7 @@ define <32 x i16> @test_masked_z_32xi16_perm_high_mask6(<32 x i16> %vec, <32 x i define <32 x i16> @test_masked_32xi16_perm_low_mask7(<32 x i16> %vec, <32 x i16> %vec2, <32 x i16> %mask) { ; GENERIC-LABEL: test_masked_32xi16_perm_low_mask7: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshuflw {{.*#+}} zmm1 {%k1} = zmm0[3,0,3,0,4,5,6,7,11,8,11,8,12,13,14,15,19,16,19,16,20,21,22,23,27,24,27,24,28,29,30,31] sched: [1:1.00] ; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -7323,7 +7323,7 @@ define <32 x i16> @test_masked_32xi16_perm_low_mask7(<32 x i16> %vec, <32 x i16> define <32 x i16> @test_masked_z_32xi16_perm_low_mask7(<32 x i16> %vec, <32 x i16> %mask) { ; GENERIC-LABEL: test_masked_z_32xi16_perm_low_mask7: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshuflw {{.*#+}} zmm0 {%k1} {z} = zmm0[3,0,3,0,4,5,6,7,11,8,11,8,12,13,14,15,19,16,19,16,20,21,22,23,27,24,27,24,28,29,30,31] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -7354,7 +7354,7 @@ define <32 x i16> @test_32xi16_perm_high_mem_mask0(<32 x i16>* %vp) { define <32 x i16> @test_masked_32xi16_perm_high_mem_mask0(<32 x i16>* %vp, <32 x i16> %vec2, <32 x i16> %mask) { ; GENERIC-LABEL: test_masked_32xi16_perm_high_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,7,4,5,6,8,9,10,11,15,12,13,14,16,17,18,19,23,20,21,22,24,25,26,27,31,28,29,30] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -7373,7 +7373,7 @@ define <32 x i16> @test_masked_32xi16_perm_high_mem_mask0(<32 x i16>* %vp, <32 x define <32 x i16> @test_masked_z_32xi16_perm_high_mem_mask0(<32 x i16>* %vp, <32 x i16> %mask) { ; GENERIC-LABEL: test_masked_z_32xi16_perm_high_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %zmm0, %zmm0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %zmm0, %zmm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,7,4,5,6,8,9,10,11,15,12,13,14,16,17,18,19,23,20,21,22,24,25,26,27,31,28,29,30] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -7392,7 +7392,7 @@ define <32 x i16> @test_masked_z_32xi16_perm_high_mem_mask0(<32 x i16>* %vp, <32 define <32 x i16> @test_masked_32xi16_perm_low_mem_mask1(<32 x i16>* %vp, <32 x i16> %vec2, <32 x i16> %mask) { ; GENERIC-LABEL: test_masked_32xi16_perm_low_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshuflw {{.*#+}} zmm0 {%k1} = mem[1,1,3,3,4,5,6,7,9,9,11,11,12,13,14,15,17,17,19,19,20,21,22,23,25,25,27,27,28,29,30,31] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -7411,7 +7411,7 @@ define <32 x i16> @test_masked_32xi16_perm_low_mem_mask1(<32 x i16>* %vp, <32 x define <32 x i16> @test_masked_z_32xi16_perm_low_mem_mask1(<32 x i16>* %vp, <32 x i16> %mask) { ; GENERIC-LABEL: test_masked_z_32xi16_perm_low_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %zmm0, %zmm0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %zmm0, %zmm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshuflw {{.*#+}} zmm0 {%k1} {z} = mem[1,1,3,3,4,5,6,7,9,9,11,11,12,13,14,15,17,17,19,19,20,21,22,23,25,25,27,27,28,29,30,31] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -7430,7 +7430,7 @@ define <32 x i16> @test_masked_z_32xi16_perm_low_mem_mask1(<32 x i16>* %vp, <32 define <32 x i16> @test_masked_32xi16_perm_high_mem_mask2(<32 x i16>* %vp, <32 x i16> %vec2, <32 x i16> %mask) { ; GENERIC-LABEL: test_masked_32xi16_perm_high_mem_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,4,7,6,4,8,9,10,11,12,15,14,12,16,17,18,19,20,23,22,20,24,25,26,27,28,31,30,28] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -7449,7 +7449,7 @@ define <32 x i16> @test_masked_32xi16_perm_high_mem_mask2(<32 x i16>* %vp, <32 x define <32 x i16> @test_masked_z_32xi16_perm_high_mem_mask2(<32 x i16>* %vp, <32 x i16> %mask) { ; GENERIC-LABEL: test_masked_z_32xi16_perm_high_mem_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %zmm0, %zmm0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %zmm0, %zmm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,4,7,6,4,8,9,10,11,12,15,14,12,16,17,18,19,20,23,22,20,24,25,26,27,28,31,30,28] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -7482,7 +7482,7 @@ define <32 x i16> @test_32xi16_perm_low_mem_mask3(<32 x i16>* %vp) { define <32 x i16> @test_masked_32xi16_perm_low_mem_mask3(<32 x i16>* %vp, <32 x i16> %vec2, <32 x i16> %mask) { ; GENERIC-LABEL: test_masked_32xi16_perm_low_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshuflw {{.*#+}} zmm0 {%k1} = mem[2,2,0,3,4,5,6,7,10,10,8,11,12,13,14,15,18,18,16,19,20,21,22,23,26,26,24,27,28,29,30,31] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -7501,7 +7501,7 @@ define <32 x i16> @test_masked_32xi16_perm_low_mem_mask3(<32 x i16>* %vp, <32 x define <32 x i16> @test_masked_z_32xi16_perm_low_mem_mask3(<32 x i16>* %vp, <32 x i16> %mask) { ; GENERIC-LABEL: test_masked_z_32xi16_perm_low_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %zmm0, %zmm0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %zmm0, %zmm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshuflw {{.*#+}} zmm0 {%k1} {z} = mem[2,2,0,3,4,5,6,7,10,10,8,11,12,13,14,15,18,18,16,19,20,21,22,23,26,26,24,27,28,29,30,31] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -7520,7 +7520,7 @@ define <32 x i16> @test_masked_z_32xi16_perm_low_mem_mask3(<32 x i16>* %vp, <32 define <32 x i16> @test_masked_32xi16_perm_high_mem_mask4(<32 x i16>* %vp, <32 x i16> %vec2, <32 x i16> %mask) { ; GENERIC-LABEL: test_masked_32xi16_perm_high_mem_mask4: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,7,4,6,5,8,9,10,11,15,12,14,13,16,17,18,19,23,20,22,21,24,25,26,27,31,28,30,29] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -7539,7 +7539,7 @@ define <32 x i16> @test_masked_32xi16_perm_high_mem_mask4(<32 x i16>* %vp, <32 x define <32 x i16> @test_masked_z_32xi16_perm_high_mem_mask4(<32 x i16>* %vp, <32 x i16> %mask) { ; GENERIC-LABEL: test_masked_z_32xi16_perm_high_mem_mask4: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %zmm0, %zmm0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %zmm0, %zmm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,7,4,6,5,8,9,10,11,15,12,14,13,16,17,18,19,23,20,22,21,24,25,26,27,31,28,30,29] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -7559,7 +7559,7 @@ define <32 x i16> @test_masked_32xi16_perm_low_mem_mask5(<32 x i16>* %vp, <32 x ; GENERIC-LABEL: test_masked_32xi16_perm_low_mem_mask5: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpshufd {{.*#+}} zmm2 = mem[0,0,2,3,4,4,6,7,8,8,10,11,12,12,14,15] sched: [6:1.00] -; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vmovdqu16 %zmm2, %zmm0 {%k1} # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -7580,7 +7580,7 @@ define <32 x i16> @test_masked_z_32xi16_perm_low_mem_mask5(<32 x i16>* %vp, <32 ; GENERIC-LABEL: test_masked_z_32xi16_perm_low_mem_mask5: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpshufd {{.*#+}} zmm1 = mem[0,0,2,3,4,4,6,7,8,8,10,11,12,12,14,15] sched: [6:1.00] -; GENERIC-NEXT: vptestnmw %zmm0, %zmm0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %zmm0, %zmm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vmovdqu16 %zmm1, %zmm0 {%k1} {z} # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -7614,7 +7614,7 @@ define <32 x i16> @test_32xi16_perm_high_mem_mask6(<32 x i16>* %vp) { define <32 x i16> @test_masked_32xi16_perm_high_mem_mask6(<32 x i16>* %vp, <32 x i16> %vec2, <32 x i16> %mask) { ; GENERIC-LABEL: test_masked_32xi16_perm_high_mem_mask6: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,6,5,6,6,8,9,10,11,14,13,14,14,16,17,18,19,22,21,22,22,24,25,26,27,30,29,30,30] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -7633,7 +7633,7 @@ define <32 x i16> @test_masked_32xi16_perm_high_mem_mask6(<32 x i16>* %vp, <32 x define <32 x i16> @test_masked_z_32xi16_perm_high_mem_mask6(<32 x i16>* %vp, <32 x i16> %mask) { ; GENERIC-LABEL: test_masked_z_32xi16_perm_high_mem_mask6: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %zmm0, %zmm0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %zmm0, %zmm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,6,5,6,6,8,9,10,11,14,13,14,14,16,17,18,19,22,21,22,22,24,25,26,27,30,29,30,30] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -7652,7 +7652,7 @@ define <32 x i16> @test_masked_z_32xi16_perm_high_mem_mask6(<32 x i16>* %vp, <32 define <32 x i16> @test_masked_32xi16_perm_low_mem_mask7(<32 x i16>* %vp, <32 x i16> %vec2, <32 x i16> %mask) { ; GENERIC-LABEL: test_masked_32xi16_perm_low_mem_mask7: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshuflw {{.*#+}} zmm0 {%k1} = mem[3,1,3,0,4,5,6,7,11,9,11,8,12,13,14,15,19,17,19,16,20,21,22,23,27,25,27,24,28,29,30,31] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -7671,7 +7671,7 @@ define <32 x i16> @test_masked_32xi16_perm_low_mem_mask7(<32 x i16>* %vp, <32 x define <32 x i16> @test_masked_z_32xi16_perm_low_mem_mask7(<32 x i16>* %vp, <32 x i16> %mask) { ; GENERIC-LABEL: test_masked_z_32xi16_perm_low_mem_mask7: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %zmm0, %zmm0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %zmm0, %zmm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshuflw {{.*#+}} zmm0 {%k1} {z} = mem[3,1,3,0,4,5,6,7,11,9,11,8,12,13,14,15,19,17,19,16,20,21,22,23,27,25,27,24,28,29,30,31] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -7703,7 +7703,7 @@ define <4 x i32> @test_4xi32_perm_mask0(<4 x i32> %vec) { define <4 x i32> @test_masked_4xi32_perm_mask0(<4 x i32> %vec, <4 x i32> %vec2, <4 x i32> %mask) { ; GENERIC-LABEL: test_masked_4xi32_perm_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufd {{.*#+}} xmm1 {%k1} = xmm0[2,3,3,0] sched: [1:1.00] ; GENERIC-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -7723,7 +7723,7 @@ define <4 x i32> @test_masked_4xi32_perm_mask0(<4 x i32> %vec, <4 x i32> %vec2, define <4 x i32> @test_masked_z_4xi32_perm_mask0(<4 x i32> %vec, <4 x i32> %mask) { ; GENERIC-LABEL: test_masked_z_4xi32_perm_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufd {{.*#+}} xmm0 {%k1} {z} = xmm0[2,3,3,0] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -7740,7 +7740,7 @@ define <4 x i32> @test_masked_z_4xi32_perm_mask0(<4 x i32> %vec, <4 x i32> %mask define <4 x i32> @test_masked_4xi32_perm_mask1(<4 x i32> %vec, <4 x i32> %vec2, <4 x i32> %mask) { ; GENERIC-LABEL: test_masked_4xi32_perm_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufd {{.*#+}} xmm1 {%k1} = xmm0[1,0,2,0] sched: [1:1.00] ; GENERIC-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -7760,7 +7760,7 @@ define <4 x i32> @test_masked_4xi32_perm_mask1(<4 x i32> %vec, <4 x i32> %vec2, define <4 x i32> @test_masked_z_4xi32_perm_mask1(<4 x i32> %vec, <4 x i32> %mask) { ; GENERIC-LABEL: test_masked_z_4xi32_perm_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufd {{.*#+}} xmm0 {%k1} {z} = xmm0[1,0,2,0] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -7777,7 +7777,7 @@ define <4 x i32> @test_masked_z_4xi32_perm_mask1(<4 x i32> %vec, <4 x i32> %mask define <4 x i32> @test_masked_4xi32_perm_mask2(<4 x i32> %vec, <4 x i32> %vec2, <4 x i32> %mask) { ; GENERIC-LABEL: test_masked_4xi32_perm_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufd {{.*#+}} xmm1 {%k1} = xmm0[3,0,1,0] sched: [1:1.00] ; GENERIC-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -7797,7 +7797,7 @@ define <4 x i32> @test_masked_4xi32_perm_mask2(<4 x i32> %vec, <4 x i32> %vec2, define <4 x i32> @test_masked_z_4xi32_perm_mask2(<4 x i32> %vec, <4 x i32> %mask) { ; GENERIC-LABEL: test_masked_z_4xi32_perm_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufd {{.*#+}} xmm0 {%k1} {z} = xmm0[3,0,1,0] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -7827,7 +7827,7 @@ define <4 x i32> @test_4xi32_perm_mask3(<4 x i32> %vec) { define <4 x i32> @test_masked_4xi32_perm_mask3(<4 x i32> %vec, <4 x i32> %vec2, <4 x i32> %mask) { ; GENERIC-LABEL: test_masked_4xi32_perm_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufd {{.*#+}} xmm1 {%k1} = xmm0[1,1,0,3] sched: [1:1.00] ; GENERIC-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -7847,7 +7847,7 @@ define <4 x i32> @test_masked_4xi32_perm_mask3(<4 x i32> %vec, <4 x i32> %vec2, define <4 x i32> @test_masked_z_4xi32_perm_mask3(<4 x i32> %vec, <4 x i32> %mask) { ; GENERIC-LABEL: test_masked_z_4xi32_perm_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufd {{.*#+}} xmm0 {%k1} {z} = xmm0[1,1,0,3] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -7878,7 +7878,7 @@ define <4 x i32> @test_4xi32_perm_mem_mask0(<4 x i32>* %vp) { define <4 x i32> @test_masked_4xi32_perm_mem_mask0(<4 x i32>* %vp, <4 x i32> %vec2, <4 x i32> %mask) { ; GENERIC-LABEL: test_masked_4xi32_perm_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufd {{.*#+}} xmm0 {%k1} = mem[0,1,3,3] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -7897,7 +7897,7 @@ define <4 x i32> @test_masked_4xi32_perm_mem_mask0(<4 x i32>* %vp, <4 x i32> %ve define <4 x i32> @test_masked_z_4xi32_perm_mem_mask0(<4 x i32>* %vp, <4 x i32> %mask) { ; GENERIC-LABEL: test_masked_z_4xi32_perm_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %xmm0, %xmm0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %xmm0, %xmm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufd {{.*#+}} xmm0 {%k1} {z} = mem[0,1,3,3] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -7916,7 +7916,7 @@ define <4 x i32> @test_masked_z_4xi32_perm_mem_mask0(<4 x i32>* %vp, <4 x i32> % define <4 x i32> @test_masked_4xi32_perm_mem_mask1(<4 x i32>* %vp, <4 x i32> %vec2, <4 x i32> %mask) { ; GENERIC-LABEL: test_masked_4xi32_perm_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufd {{.*#+}} xmm0 {%k1} = mem[2,2,3,1] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -7935,7 +7935,7 @@ define <4 x i32> @test_masked_4xi32_perm_mem_mask1(<4 x i32>* %vp, <4 x i32> %ve define <4 x i32> @test_masked_z_4xi32_perm_mem_mask1(<4 x i32>* %vp, <4 x i32> %mask) { ; GENERIC-LABEL: test_masked_z_4xi32_perm_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %xmm0, %xmm0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %xmm0, %xmm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufd {{.*#+}} xmm0 {%k1} {z} = mem[2,2,3,1] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -7954,7 +7954,7 @@ define <4 x i32> @test_masked_z_4xi32_perm_mem_mask1(<4 x i32>* %vp, <4 x i32> % define <4 x i32> @test_masked_4xi32_perm_mem_mask2(<4 x i32>* %vp, <4 x i32> %vec2, <4 x i32> %mask) { ; GENERIC-LABEL: test_masked_4xi32_perm_mem_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufd {{.*#+}} xmm0 {%k1} = mem[0,3,0,1] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -7973,7 +7973,7 @@ define <4 x i32> @test_masked_4xi32_perm_mem_mask2(<4 x i32>* %vp, <4 x i32> %ve define <4 x i32> @test_masked_z_4xi32_perm_mem_mask2(<4 x i32>* %vp, <4 x i32> %mask) { ; GENERIC-LABEL: test_masked_z_4xi32_perm_mem_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %xmm0, %xmm0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %xmm0, %xmm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufd {{.*#+}} xmm0 {%k1} {z} = mem[0,3,0,1] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -8006,7 +8006,7 @@ define <4 x i32> @test_4xi32_perm_mem_mask3(<4 x i32>* %vp) { define <4 x i32> @test_masked_4xi32_perm_mem_mask3(<4 x i32>* %vp, <4 x i32> %vec2, <4 x i32> %mask) { ; GENERIC-LABEL: test_masked_4xi32_perm_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufd {{.*#+}} xmm0 {%k1} = mem[1,0,1,0] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -8025,7 +8025,7 @@ define <4 x i32> @test_masked_4xi32_perm_mem_mask3(<4 x i32>* %vp, <4 x i32> %ve define <4 x i32> @test_masked_z_4xi32_perm_mem_mask3(<4 x i32>* %vp, <4 x i32> %mask) { ; GENERIC-LABEL: test_masked_z_4xi32_perm_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %xmm0, %xmm0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %xmm0, %xmm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufd {{.*#+}} xmm0 {%k1} {z} = mem[1,0,1,0] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -8057,7 +8057,7 @@ define <8 x i32> @test2_8xi32_perm_mask0(<8 x i32> %vec) { define <8 x i32> @test2_masked_8xi32_perm_mask0(<8 x i32> %vec, <8 x i32> %vec2, <8 x i32> %mask) { ; GENERIC-LABEL: test2_masked_8xi32_perm_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufd {{.*#+}} ymm1 {%k1} = ymm0[2,3,1,0,6,7,5,4] sched: [1:1.00] ; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -8077,7 +8077,7 @@ define <8 x i32> @test2_masked_8xi32_perm_mask0(<8 x i32> %vec, <8 x i32> %vec2, define <8 x i32> @test2_masked_z_8xi32_perm_mask0(<8 x i32> %vec, <8 x i32> %mask) { ; GENERIC-LABEL: test2_masked_z_8xi32_perm_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufd {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3,1,0,6,7,5,4] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -8094,7 +8094,7 @@ define <8 x i32> @test2_masked_z_8xi32_perm_mask0(<8 x i32> %vec, <8 x i32> %mas define <8 x i32> @test2_masked_8xi32_perm_mask1(<8 x i32> %vec, <8 x i32> %vec2, <8 x i32> %mask) { ; GENERIC-LABEL: test2_masked_8xi32_perm_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufd {{.*#+}} ymm1 {%k1} = ymm0[0,3,3,3,4,7,7,7] sched: [1:1.00] ; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -8114,7 +8114,7 @@ define <8 x i32> @test2_masked_8xi32_perm_mask1(<8 x i32> %vec, <8 x i32> %vec2, define <8 x i32> @test2_masked_z_8xi32_perm_mask1(<8 x i32> %vec, <8 x i32> %mask) { ; GENERIC-LABEL: test2_masked_z_8xi32_perm_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufd {{.*#+}} ymm0 {%k1} {z} = ymm0[0,3,3,3,4,7,7,7] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -8131,7 +8131,7 @@ define <8 x i32> @test2_masked_z_8xi32_perm_mask1(<8 x i32> %vec, <8 x i32> %mas define <8 x i32> @test2_masked_8xi32_perm_mask2(<8 x i32> %vec, <8 x i32> %vec2, <8 x i32> %mask) { ; GENERIC-LABEL: test2_masked_8xi32_perm_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufd {{.*#+}} ymm1 {%k1} = ymm0[1,2,0,3,5,6,4,7] sched: [1:1.00] ; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -8151,7 +8151,7 @@ define <8 x i32> @test2_masked_8xi32_perm_mask2(<8 x i32> %vec, <8 x i32> %vec2, define <8 x i32> @test2_masked_z_8xi32_perm_mask2(<8 x i32> %vec, <8 x i32> %mask) { ; GENERIC-LABEL: test2_masked_z_8xi32_perm_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufd {{.*#+}} ymm0 {%k1} {z} = ymm0[1,2,0,3,5,6,4,7] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -8181,7 +8181,7 @@ define <8 x i32> @test2_8xi32_perm_mask3(<8 x i32> %vec) { define <8 x i32> @test2_masked_8xi32_perm_mask3(<8 x i32> %vec, <8 x i32> %vec2, <8 x i32> %mask) { ; GENERIC-LABEL: test2_masked_8xi32_perm_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufd {{.*#+}} ymm1 {%k1} = ymm0[1,3,1,0,5,7,5,4] sched: [1:1.00] ; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -8201,7 +8201,7 @@ define <8 x i32> @test2_masked_8xi32_perm_mask3(<8 x i32> %vec, <8 x i32> %vec2, define <8 x i32> @test2_masked_z_8xi32_perm_mask3(<8 x i32> %vec, <8 x i32> %mask) { ; GENERIC-LABEL: test2_masked_z_8xi32_perm_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufd {{.*#+}} ymm0 {%k1} {z} = ymm0[1,3,1,0,5,7,5,4] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -8232,7 +8232,7 @@ define <8 x i32> @test2_8xi32_perm_mem_mask0(<8 x i32>* %vp) { define <8 x i32> @test2_masked_8xi32_perm_mem_mask0(<8 x i32>* %vp, <8 x i32> %vec2, <8 x i32> %mask) { ; GENERIC-LABEL: test2_masked_8xi32_perm_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufd {{.*#+}} ymm0 {%k1} = mem[1,0,2,0,5,4,6,4] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -8251,7 +8251,7 @@ define <8 x i32> @test2_masked_8xi32_perm_mem_mask0(<8 x i32>* %vp, <8 x i32> %v define <8 x i32> @test2_masked_z_8xi32_perm_mem_mask0(<8 x i32>* %vp, <8 x i32> %mask) { ; GENERIC-LABEL: test2_masked_z_8xi32_perm_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm0, %ymm0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm0, %ymm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufd {{.*#+}} ymm0 {%k1} {z} = mem[1,0,2,0,5,4,6,4] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -8270,7 +8270,7 @@ define <8 x i32> @test2_masked_z_8xi32_perm_mem_mask0(<8 x i32>* %vp, <8 x i32> define <8 x i32> @test2_masked_8xi32_perm_mem_mask1(<8 x i32>* %vp, <8 x i32> %vec2, <8 x i32> %mask) { ; GENERIC-LABEL: test2_masked_8xi32_perm_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufd {{.*#+}} ymm0 {%k1} = mem[0,3,2,0,4,7,6,4] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -8289,7 +8289,7 @@ define <8 x i32> @test2_masked_8xi32_perm_mem_mask1(<8 x i32>* %vp, <8 x i32> %v define <8 x i32> @test2_masked_z_8xi32_perm_mem_mask1(<8 x i32>* %vp, <8 x i32> %mask) { ; GENERIC-LABEL: test2_masked_z_8xi32_perm_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm0, %ymm0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm0, %ymm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufd {{.*#+}} ymm0 {%k1} {z} = mem[0,3,2,0,4,7,6,4] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -8308,7 +8308,7 @@ define <8 x i32> @test2_masked_z_8xi32_perm_mem_mask1(<8 x i32>* %vp, <8 x i32> define <8 x i32> @test2_masked_8xi32_perm_mem_mask2(<8 x i32>* %vp, <8 x i32> %vec2, <8 x i32> %mask) { ; GENERIC-LABEL: test2_masked_8xi32_perm_mem_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufd {{.*#+}} ymm0 {%k1} = mem[3,2,3,1,7,6,7,5] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -8327,7 +8327,7 @@ define <8 x i32> @test2_masked_8xi32_perm_mem_mask2(<8 x i32>* %vp, <8 x i32> %v define <8 x i32> @test2_masked_z_8xi32_perm_mem_mask2(<8 x i32>* %vp, <8 x i32> %mask) { ; GENERIC-LABEL: test2_masked_z_8xi32_perm_mem_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm0, %ymm0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm0, %ymm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufd {{.*#+}} ymm0 {%k1} {z} = mem[3,2,3,1,7,6,7,5] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -8360,7 +8360,7 @@ define <8 x i32> @test2_8xi32_perm_mem_mask3(<8 x i32>* %vp) { define <8 x i32> @test2_masked_8xi32_perm_mem_mask3(<8 x i32>* %vp, <8 x i32> %vec2, <8 x i32> %mask) { ; GENERIC-LABEL: test2_masked_8xi32_perm_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufd {{.*#+}} ymm0 {%k1} = mem[3,2,0,0,7,6,4,4] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -8379,7 +8379,7 @@ define <8 x i32> @test2_masked_8xi32_perm_mem_mask3(<8 x i32>* %vp, <8 x i32> %v define <8 x i32> @test2_masked_z_8xi32_perm_mem_mask3(<8 x i32>* %vp, <8 x i32> %mask) { ; GENERIC-LABEL: test2_masked_z_8xi32_perm_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm0, %ymm0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm0, %ymm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufd {{.*#+}} ymm0 {%k1} {z} = mem[3,2,0,0,7,6,4,4] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -8411,7 +8411,7 @@ define <16 x i32> @test2_16xi32_perm_mask0(<16 x i32> %vec) { define <16 x i32> @test2_masked_16xi32_perm_mask0(<16 x i32> %vec, <16 x i32> %vec2, <16 x i32> %mask) { ; GENERIC-LABEL: test2_masked_16xi32_perm_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufd {{.*#+}} zmm1 {%k1} = zmm0[3,1,3,0,7,5,7,4,11,9,11,8,15,13,15,12] sched: [1:1.00] ; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -8431,7 +8431,7 @@ define <16 x i32> @test2_masked_16xi32_perm_mask0(<16 x i32> %vec, <16 x i32> %v define <16 x i32> @test2_masked_z_16xi32_perm_mask0(<16 x i32> %vec, <16 x i32> %mask) { ; GENERIC-LABEL: test2_masked_z_16xi32_perm_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufd {{.*#+}} zmm0 {%k1} {z} = zmm0[3,1,3,0,7,5,7,4,11,9,11,8,15,13,15,12] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -8448,7 +8448,7 @@ define <16 x i32> @test2_masked_z_16xi32_perm_mask0(<16 x i32> %vec, <16 x i32> define <16 x i32> @test2_masked_16xi32_perm_mask1(<16 x i32> %vec, <16 x i32> %vec2, <16 x i32> %mask) { ; GENERIC-LABEL: test2_masked_16xi32_perm_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufd {{.*#+}} zmm1 {%k1} = zmm0[2,0,3,0,6,4,7,4,10,8,11,8,14,12,15,12] sched: [1:1.00] ; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -8468,7 +8468,7 @@ define <16 x i32> @test2_masked_16xi32_perm_mask1(<16 x i32> %vec, <16 x i32> %v define <16 x i32> @test2_masked_z_16xi32_perm_mask1(<16 x i32> %vec, <16 x i32> %mask) { ; GENERIC-LABEL: test2_masked_z_16xi32_perm_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufd {{.*#+}} zmm0 {%k1} {z} = zmm0[2,0,3,0,6,4,7,4,10,8,11,8,14,12,15,12] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -8485,7 +8485,7 @@ define <16 x i32> @test2_masked_z_16xi32_perm_mask1(<16 x i32> %vec, <16 x i32> define <16 x i32> @test2_masked_16xi32_perm_mask2(<16 x i32> %vec, <16 x i32> %vec2, <16 x i32> %mask) { ; GENERIC-LABEL: test2_masked_16xi32_perm_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufd {{.*#+}} zmm1 {%k1} = zmm0[1,3,3,0,5,7,7,4,9,11,11,8,13,15,15,12] sched: [1:1.00] ; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -8505,7 +8505,7 @@ define <16 x i32> @test2_masked_16xi32_perm_mask2(<16 x i32> %vec, <16 x i32> %v define <16 x i32> @test2_masked_z_16xi32_perm_mask2(<16 x i32> %vec, <16 x i32> %mask) { ; GENERIC-LABEL: test2_masked_z_16xi32_perm_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufd {{.*#+}} zmm0 {%k1} {z} = zmm0[1,3,3,0,5,7,7,4,9,11,11,8,13,15,15,12] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -8535,7 +8535,7 @@ define <16 x i32> @test2_16xi32_perm_mask3(<16 x i32> %vec) { define <16 x i32> @test2_masked_16xi32_perm_mask3(<16 x i32> %vec, <16 x i32> %vec2, <16 x i32> %mask) { ; GENERIC-LABEL: test2_masked_16xi32_perm_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufd {{.*#+}} zmm1 {%k1} = zmm0[3,2,0,3,7,6,4,7,11,10,8,11,15,14,12,15] sched: [1:1.00] ; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -8555,7 +8555,7 @@ define <16 x i32> @test2_masked_16xi32_perm_mask3(<16 x i32> %vec, <16 x i32> %v define <16 x i32> @test2_masked_z_16xi32_perm_mask3(<16 x i32> %vec, <16 x i32> %mask) { ; GENERIC-LABEL: test2_masked_z_16xi32_perm_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufd {{.*#+}} zmm0 {%k1} {z} = zmm0[3,2,0,3,7,6,4,7,11,10,8,11,15,14,12,15] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -8586,7 +8586,7 @@ define <16 x i32> @test2_16xi32_perm_mem_mask0(<16 x i32>* %vp) { define <16 x i32> @test2_masked_16xi32_perm_mem_mask0(<16 x i32>* %vp, <16 x i32> %vec2, <16 x i32> %mask) { ; GENERIC-LABEL: test2_masked_16xi32_perm_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufd {{.*#+}} zmm0 {%k1} = mem[1,0,1,3,5,4,5,7,9,8,9,11,13,12,13,15] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -8605,7 +8605,7 @@ define <16 x i32> @test2_masked_16xi32_perm_mem_mask0(<16 x i32>* %vp, <16 x i32 define <16 x i32> @test2_masked_z_16xi32_perm_mem_mask0(<16 x i32>* %vp, <16 x i32> %mask) { ; GENERIC-LABEL: test2_masked_z_16xi32_perm_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm0, %zmm0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm0, %zmm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufd {{.*#+}} zmm0 {%k1} {z} = mem[1,0,1,3,5,4,5,7,9,8,9,11,13,12,13,15] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -8624,7 +8624,7 @@ define <16 x i32> @test2_masked_z_16xi32_perm_mem_mask0(<16 x i32>* %vp, <16 x i define <16 x i32> @test2_masked_16xi32_perm_mem_mask1(<16 x i32>* %vp, <16 x i32> %vec2, <16 x i32> %mask) { ; GENERIC-LABEL: test2_masked_16xi32_perm_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufd {{.*#+}} zmm0 {%k1} = mem[1,0,0,2,5,4,4,6,9,8,8,10,13,12,12,14] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -8643,7 +8643,7 @@ define <16 x i32> @test2_masked_16xi32_perm_mem_mask1(<16 x i32>* %vp, <16 x i32 define <16 x i32> @test2_masked_z_16xi32_perm_mem_mask1(<16 x i32>* %vp, <16 x i32> %mask) { ; GENERIC-LABEL: test2_masked_z_16xi32_perm_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm0, %zmm0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm0, %zmm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufd {{.*#+}} zmm0 {%k1} {z} = mem[1,0,0,2,5,4,4,6,9,8,8,10,13,12,12,14] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -8662,7 +8662,7 @@ define <16 x i32> @test2_masked_z_16xi32_perm_mem_mask1(<16 x i32>* %vp, <16 x i define <16 x i32> @test2_masked_16xi32_perm_mem_mask2(<16 x i32>* %vp, <16 x i32> %vec2, <16 x i32> %mask) { ; GENERIC-LABEL: test2_masked_16xi32_perm_mem_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufd {{.*#+}} zmm0 {%k1} = mem[2,0,1,2,6,4,5,6,10,8,9,10,14,12,13,14] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -8681,7 +8681,7 @@ define <16 x i32> @test2_masked_16xi32_perm_mem_mask2(<16 x i32>* %vp, <16 x i32 define <16 x i32> @test2_masked_z_16xi32_perm_mem_mask2(<16 x i32>* %vp, <16 x i32> %mask) { ; GENERIC-LABEL: test2_masked_z_16xi32_perm_mem_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm0, %zmm0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm0, %zmm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufd {{.*#+}} zmm0 {%k1} {z} = mem[2,0,1,2,6,4,5,6,10,8,9,10,14,12,13,14] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -8714,7 +8714,7 @@ define <16 x i32> @test2_16xi32_perm_mem_mask3(<16 x i32>* %vp) { define <16 x i32> @test2_masked_16xi32_perm_mem_mask3(<16 x i32>* %vp, <16 x i32> %vec2, <16 x i32> %mask) { ; GENERIC-LABEL: test2_masked_16xi32_perm_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufd {{.*#+}} zmm0 {%k1} = mem[3,1,1,1,7,5,5,5,11,9,9,9,15,13,13,13] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -8733,7 +8733,7 @@ define <16 x i32> @test2_masked_16xi32_perm_mem_mask3(<16 x i32>* %vp, <16 x i32 define <16 x i32> @test2_masked_z_16xi32_perm_mem_mask3(<16 x i32>* %vp, <16 x i32> %mask) { ; GENERIC-LABEL: test2_masked_z_16xi32_perm_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm0, %zmm0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm0, %zmm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufd {{.*#+}} zmm0 {%k1} {z} = mem[3,1,1,1,7,5,5,5,11,9,9,9,15,13,13,13] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -8765,7 +8765,7 @@ define <8 x float> @test2_8xfloat_shuff_mask0(<8 x float> %vec1, <8 x float> %ve define <8 x float> @test2_8xfloat_masked_shuff_mask0(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %vec3, <8 x i32> %mask) { ; GENERIC-LABEL: test2_8xfloat_masked_shuff_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm3, %ymm3, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm3, %ymm3, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm2 {%k1} = ymm0[4,5,6,7],ymm1[0,1,2,3] sched: [1:1.00] ; GENERIC-NEXT: vmovaps %ymm2, %ymm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -8785,7 +8785,7 @@ define <8 x float> @test2_8xfloat_masked_shuff_mask0(<8 x float> %vec1, <8 x flo define <8 x float> @test2_8xfloat_zero_masked_shuff_mask0(<8 x float> %vec1, <8 x float> %vec2, <8 x i32> %mask) { ; GENERIC-LABEL: test2_8xfloat_zero_masked_shuff_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],ymm1[0,1,2,3] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -8802,7 +8802,7 @@ define <8 x float> @test2_8xfloat_zero_masked_shuff_mask0(<8 x float> %vec1, <8 define <8 x float> @test2_8xfloat_masked_shuff_mask1(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %vec3, <8 x i32> %mask) { ; GENERIC-LABEL: test2_8xfloat_masked_shuff_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm3, %ymm3, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm3, %ymm3, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm2 {%k1} = ymm0[4,5,6,7],ymm1[0,1,2,3] sched: [1:1.00] ; GENERIC-NEXT: vmovaps %ymm2, %ymm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -8822,7 +8822,7 @@ define <8 x float> @test2_8xfloat_masked_shuff_mask1(<8 x float> %vec1, <8 x flo define <8 x float> @test2_8xfloat_zero_masked_shuff_mask1(<8 x float> %vec1, <8 x float> %vec2, <8 x i32> %mask) { ; GENERIC-LABEL: test2_8xfloat_zero_masked_shuff_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],ymm1[0,1,2,3] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -8839,7 +8839,7 @@ define <8 x float> @test2_8xfloat_zero_masked_shuff_mask1(<8 x float> %vec1, <8 define <8 x float> @test2_8xfloat_masked_shuff_mask2(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %vec3, <8 x i32> %mask) { ; GENERIC-LABEL: test2_8xfloat_masked_shuff_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm3, %ymm3, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm3, %ymm3, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm2 {%k1} = ymm0[4,5,6,7],ymm1[4,5,6,7] sched: [1:1.00] ; GENERIC-NEXT: vmovaps %ymm2, %ymm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -8859,7 +8859,7 @@ define <8 x float> @test2_8xfloat_masked_shuff_mask2(<8 x float> %vec1, <8 x flo define <8 x float> @test2_8xfloat_zero_masked_shuff_mask2(<8 x float> %vec1, <8 x float> %vec2, <8 x i32> %mask) { ; GENERIC-LABEL: test2_8xfloat_zero_masked_shuff_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],ymm1[4,5,6,7] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -8889,7 +8889,7 @@ define <8 x float> @test2_8xfloat_shuff_mask3(<8 x float> %vec1, <8 x float> %ve define <8 x float> @test2_8xfloat_masked_shuff_mask3(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %vec3, <8 x i32> %mask) { ; GENERIC-LABEL: test2_8xfloat_masked_shuff_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm3, %ymm3, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm3, %ymm3, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm2 {%k1} = ymm0[4,5,6,7],ymm1[0,1,2,3] sched: [1:1.00] ; GENERIC-NEXT: vmovaps %ymm2, %ymm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -8909,7 +8909,7 @@ define <8 x float> @test2_8xfloat_masked_shuff_mask3(<8 x float> %vec1, <8 x flo define <8 x float> @test_8xfloat_zero_masked_shuff_mask3(<8 x float> %vec1, <8 x float> %vec2, <8 x i32> %mask) { ; GENERIC-LABEL: test_8xfloat_zero_masked_shuff_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],ymm1[0,1,2,3] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -8940,7 +8940,7 @@ define <8 x float> @test_8xfloat_shuff_mem_mask0(<8 x float> %vec1, <8 x float>* define <8 x float> @test_8xfloat_masked_shuff_mem_mask0(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %vec3, <8 x i32> %mask) { ; GENERIC-LABEL: test_8xfloat_masked_shuff_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[4,5,6,7] sched: [6:1.00] ; GENERIC-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -8961,7 +8961,7 @@ define <8 x float> @test_8xfloat_masked_shuff_mem_mask0(<8 x float> %vec1, <8 x define <8 x float> @test_8xfloat_zero_masked_shuff_mem_mask0(<8 x float> %vec1, <8 x float>* %vec2p, <8 x i32> %mask) { ; GENERIC-LABEL: test_8xfloat_zero_masked_shuff_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[4,5,6,7] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -8980,7 +8980,7 @@ define <8 x float> @test_8xfloat_zero_masked_shuff_mem_mask0(<8 x float> %vec1, define <8 x float> @test_8xfloat_masked_shuff_mem_mask1(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %vec3, <8 x i32> %mask) { ; GENERIC-LABEL: test_8xfloat_masked_shuff_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[4,5,6,7] sched: [6:1.00] ; GENERIC-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -9001,7 +9001,7 @@ define <8 x float> @test_8xfloat_masked_shuff_mem_mask1(<8 x float> %vec1, <8 x define <8 x float> @test_8xfloat_zero_masked_shuff_mem_mask1(<8 x float> %vec1, <8 x float>* %vec2p, <8 x i32> %mask) { ; GENERIC-LABEL: test_8xfloat_zero_masked_shuff_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[4,5,6,7] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -9020,7 +9020,7 @@ define <8 x float> @test_8xfloat_zero_masked_shuff_mem_mask1(<8 x float> %vec1, define <8 x float> @test_8xfloat_masked_shuff_mem_mask2(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %vec3, <8 x i32> %mask) { ; GENERIC-LABEL: test_8xfloat_masked_shuff_mem_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [6:1.00] ; GENERIC-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -9041,7 +9041,7 @@ define <8 x float> @test_8xfloat_masked_shuff_mem_mask2(<8 x float> %vec1, <8 x define <8 x float> @test_8xfloat_zero_masked_shuff_mem_mask2(<8 x float> %vec1, <8 x float>* %vec2p, <8 x i32> %mask) { ; GENERIC-LABEL: test_8xfloat_zero_masked_shuff_mem_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -9074,7 +9074,7 @@ define <8 x float> @test_8xfloat_shuff_mem_mask3(<8 x float> %vec1, <8 x float>* define <8 x float> @test_8xfloat_masked_shuff_mem_mask3(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %vec3, <8 x i32> %mask) { ; GENERIC-LABEL: test_8xfloat_masked_shuff_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [6:1.00] ; GENERIC-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -9095,7 +9095,7 @@ define <8 x float> @test_8xfloat_masked_shuff_mem_mask3(<8 x float> %vec1, <8 x define <8 x float> @test_8xfloat_zero_masked_shuff_mem_mask3(<8 x float> %vec1, <8 x float>* %vec2p, <8 x i32> %mask) { ; GENERIC-LABEL: test_8xfloat_zero_masked_shuff_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -9127,7 +9127,7 @@ define <16 x float> @test_16xfloat_shuff_mask0(<16 x float> %vec1, <16 x float> define <16 x float> @test_16xfloat_masked_shuff_mask0(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %vec3, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xfloat_masked_shuff_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm2 {%k1} = zmm0[12,13,14,15,0,1,2,3],zmm1[4,5,6,7,12,13,14,15] sched: [1:1.00] ; GENERIC-NEXT: vmovaps %zmm2, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -9147,7 +9147,7 @@ define <16 x float> @test_16xfloat_masked_shuff_mask0(<16 x float> %vec1, <16 x define <16 x float> @test_16xfloat_zero_masked_shuff_mask0(<16 x float> %vec1, <16 x float> %vec2, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xfloat_zero_masked_shuff_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[12,13,14,15,0,1,2,3],zmm1[4,5,6,7,12,13,14,15] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -9164,7 +9164,7 @@ define <16 x float> @test_16xfloat_zero_masked_shuff_mask0(<16 x float> %vec1, < define <16 x float> @test_16xfloat_masked_shuff_mask1(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %vec3, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xfloat_masked_shuff_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm2 {%k1} = zmm0[0,1,2,3,8,9,10,11],zmm1[0,1,2,3,12,13,14,15] sched: [1:1.00] ; GENERIC-NEXT: vmovaps %zmm2, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -9184,7 +9184,7 @@ define <16 x float> @test_16xfloat_masked_shuff_mask1(<16 x float> %vec1, <16 x define <16 x float> @test_16xfloat_zero_masked_shuff_mask1(<16 x float> %vec1, <16 x float> %vec2, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xfloat_zero_masked_shuff_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3,8,9,10,11],zmm1[0,1,2,3,12,13,14,15] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -9201,7 +9201,7 @@ define <16 x float> @test_16xfloat_zero_masked_shuff_mask1(<16 x float> %vec1, < define <16 x float> @test_16xfloat_masked_shuff_mask2(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %vec3, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xfloat_masked_shuff_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm2 {%k1} = zmm0[12,13,14,15,4,5,6,7],zmm1[0,1,2,3,4,5,6,7] sched: [1:1.00] ; GENERIC-NEXT: vmovaps %zmm2, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -9221,7 +9221,7 @@ define <16 x float> @test_16xfloat_masked_shuff_mask2(<16 x float> %vec1, <16 x define <16 x float> @test_16xfloat_zero_masked_shuff_mask2(<16 x float> %vec1, <16 x float> %vec2, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xfloat_zero_masked_shuff_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[12,13,14,15,4,5,6,7],zmm1[0,1,2,3,4,5,6,7] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -9251,7 +9251,7 @@ define <16 x float> @test_16xfloat_shuff_mask3(<16 x float> %vec1, <16 x float> define <16 x float> @test_16xfloat_masked_shuff_mask3(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %vec3, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xfloat_masked_shuff_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm2 {%k1} = zmm0[8,9,10,11,12,13,14,15],zmm1[0,1,2,3,8,9,10,11] sched: [1:1.00] ; GENERIC-NEXT: vmovaps %zmm2, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -9271,7 +9271,7 @@ define <16 x float> @test_16xfloat_masked_shuff_mask3(<16 x float> %vec1, <16 x define <16 x float> @test_16xfloat_zero_masked_shuff_mask3(<16 x float> %vec1, <16 x float> %vec2, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xfloat_zero_masked_shuff_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[8,9,10,11,12,13,14,15],zmm1[0,1,2,3,8,9,10,11] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -9302,7 +9302,7 @@ define <16 x float> @test_16xfloat_shuff_mem_mask0(<16 x float> %vec1, <16 x flo define <16 x float> @test_16xfloat_masked_shuff_mem_mask0(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %vec3, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xfloat_masked_shuff_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm1 {%k1} = zmm0[12,13,14,15,8,9,10,11],mem[8,9,10,11,4,5,6,7] sched: [6:1.00] ; GENERIC-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -9323,7 +9323,7 @@ define <16 x float> @test_16xfloat_masked_shuff_mem_mask0(<16 x float> %vec1, <1 define <16 x float> @test_16xfloat_zero_masked_shuff_mem_mask0(<16 x float> %vec1, <16 x float>* %vec2p, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xfloat_zero_masked_shuff_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[12,13,14,15,8,9,10,11],mem[8,9,10,11,4,5,6,7] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -9342,7 +9342,7 @@ define <16 x float> @test_16xfloat_zero_masked_shuff_mem_mask0(<16 x float> %vec define <16 x float> @test_16xfloat_masked_shuff_mem_mask1(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %vec3, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xfloat_masked_shuff_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm1 {%k1} = zmm0[8,9,10,11,4,5,6,7],mem[8,9,10,11,4,5,6,7] sched: [6:1.00] ; GENERIC-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -9363,7 +9363,7 @@ define <16 x float> @test_16xfloat_masked_shuff_mem_mask1(<16 x float> %vec1, <1 define <16 x float> @test_16xfloat_zero_masked_shuff_mem_mask1(<16 x float> %vec1, <16 x float>* %vec2p, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xfloat_zero_masked_shuff_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[8,9,10,11,4,5,6,7],mem[8,9,10,11,4,5,6,7] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -9382,7 +9382,7 @@ define <16 x float> @test_16xfloat_zero_masked_shuff_mem_mask1(<16 x float> %vec define <16 x float> @test_16xfloat_masked_shuff_mem_mask2(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %vec3, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xfloat_masked_shuff_mem_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm1 {%k1} = zmm0[0,1,2,3,0,1,2,3],mem[8,9,10,11,8,9,10,11] sched: [6:1.00] ; GENERIC-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -9403,7 +9403,7 @@ define <16 x float> @test_16xfloat_masked_shuff_mem_mask2(<16 x float> %vec1, <1 define <16 x float> @test_16xfloat_zero_masked_shuff_mem_mask2(<16 x float> %vec1, <16 x float>* %vec2p, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xfloat_zero_masked_shuff_mem_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3,0,1,2,3],mem[8,9,10,11,8,9,10,11] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -9436,7 +9436,7 @@ define <16 x float> @test_16xfloat_shuff_mem_mask3(<16 x float> %vec1, <16 x flo define <16 x float> @test_16xfloat_masked_shuff_mem_mask3(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %vec3, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xfloat_masked_shuff_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm1 {%k1} = zmm0[4,5,6,7,0,1,2,3],mem[12,13,14,15,12,13,14,15] sched: [6:1.00] ; GENERIC-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -9457,7 +9457,7 @@ define <16 x float> @test_16xfloat_masked_shuff_mem_mask3(<16 x float> %vec1, <1 define <16 x float> @test_16xfloat_zero_masked_shuff_mem_mask3(<16 x float> %vec1, <16 x float>* %vec2p, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xfloat_zero_masked_shuff_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,6,7,0,1,2,3],mem[12,13,14,15,12,13,14,15] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -9489,7 +9489,7 @@ define <4 x double> @test_4xdouble_shuff_mask0(<4 x double> %vec1, <4 x double> define <4 x double> @test_4xdouble_masked_shuff_mask0(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %vec3, <4 x i64> %mask) { ; GENERIC-LABEL: test_4xdouble_masked_shuff_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm3, %ymm3, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm3, %ymm3, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm2 {%k1} = ymm0[2,3],ymm1[0,1] sched: [1:1.00] ; GENERIC-NEXT: vmovapd %ymm2, %ymm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -9509,7 +9509,7 @@ define <4 x double> @test_4xdouble_masked_shuff_mask0(<4 x double> %vec1, <4 x d define <4 x double> @test_4xdouble_zero_masked_shuff_mask0(<4 x double> %vec1, <4 x double> %vec2, <4 x i64> %mask) { ; GENERIC-LABEL: test_4xdouble_zero_masked_shuff_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],ymm1[0,1] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -9526,7 +9526,7 @@ define <4 x double> @test_4xdouble_zero_masked_shuff_mask0(<4 x double> %vec1, < define <4 x double> @test_4xdouble_masked_shuff_mask1(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %vec3, <4 x i64> %mask) { ; GENERIC-LABEL: test_4xdouble_masked_shuff_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm3, %ymm3, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm3, %ymm3, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm2 {%k1} = ymm0[2,3],ymm1[0,1] sched: [1:1.00] ; GENERIC-NEXT: vmovapd %ymm2, %ymm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -9546,7 +9546,7 @@ define <4 x double> @test_4xdouble_masked_shuff_mask1(<4 x double> %vec1, <4 x d define <4 x double> @test_4xdouble_zero_masked_shuff_mask1(<4 x double> %vec1, <4 x double> %vec2, <4 x i64> %mask) { ; GENERIC-LABEL: test_4xdouble_zero_masked_shuff_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],ymm1[0,1] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -9563,7 +9563,7 @@ define <4 x double> @test_4xdouble_zero_masked_shuff_mask1(<4 x double> %vec1, < define <4 x double> @test_4xdouble_masked_shuff_mask2(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %vec3, <4 x i64> %mask) { ; GENERIC-LABEL: test_4xdouble_masked_shuff_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm3, %ymm3, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm3, %ymm3, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm2 {%k1} = ymm0[2,3],ymm1[2,3] sched: [1:1.00] ; GENERIC-NEXT: vmovapd %ymm2, %ymm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -9583,7 +9583,7 @@ define <4 x double> @test_4xdouble_masked_shuff_mask2(<4 x double> %vec1, <4 x d define <4 x double> @test_4xdouble_zero_masked_shuff_mask2(<4 x double> %vec1, <4 x double> %vec2, <4 x i64> %mask) { ; GENERIC-LABEL: test_4xdouble_zero_masked_shuff_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],ymm1[2,3] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -9613,7 +9613,7 @@ define <4 x double> @test_4xdouble_shuff_mask3(<4 x double> %vec1, <4 x double> define <4 x double> @test_4xdouble_masked_shuff_mask3(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %vec3, <4 x i64> %mask) { ; GENERIC-LABEL: test_4xdouble_masked_shuff_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm3, %ymm3, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm3, %ymm3, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm2 {%k1} = ymm0[2,3],ymm1[2,3] sched: [1:1.00] ; GENERIC-NEXT: vmovapd %ymm2, %ymm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -9633,7 +9633,7 @@ define <4 x double> @test_4xdouble_masked_shuff_mask3(<4 x double> %vec1, <4 x d define <4 x double> @test_4xdouble_zero_masked_shuff_mask3(<4 x double> %vec1, <4 x double> %vec2, <4 x i64> %mask) { ; GENERIC-LABEL: test_4xdouble_zero_masked_shuff_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],ymm1[2,3] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -9664,7 +9664,7 @@ define <4 x double> @test_4xdouble_shuff_mem_mask0(<4 x double> %vec1, <4 x doub define <4 x double> @test_4xdouble_masked_shuff_mem_mask0(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %vec3, <4 x i64> %mask) { ; GENERIC-LABEL: test_4xdouble_masked_shuff_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[2,3] sched: [6:1.00] ; GENERIC-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -9685,7 +9685,7 @@ define <4 x double> @test_4xdouble_masked_shuff_mem_mask0(<4 x double> %vec1, <4 define <4 x double> @test_4xdouble_zero_masked_shuff_mem_mask0(<4 x double> %vec1, <4 x double>* %vec2p, <4 x i64> %mask) { ; GENERIC-LABEL: test_4xdouble_zero_masked_shuff_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[2,3] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -9704,7 +9704,7 @@ define <4 x double> @test_4xdouble_zero_masked_shuff_mem_mask0(<4 x double> %vec define <4 x double> @test_4xdouble_masked_shuff_mem_mask1(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %vec3, <4 x i64> %mask) { ; GENERIC-LABEL: test_4xdouble_masked_shuff_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[0,1] sched: [6:1.00] ; GENERIC-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -9725,7 +9725,7 @@ define <4 x double> @test_4xdouble_masked_shuff_mem_mask1(<4 x double> %vec1, <4 define <4 x double> @test_4xdouble_zero_masked_shuff_mem_mask1(<4 x double> %vec1, <4 x double>* %vec2p, <4 x i64> %mask) { ; GENERIC-LABEL: test_4xdouble_zero_masked_shuff_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[0,1] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -9744,7 +9744,7 @@ define <4 x double> @test_4xdouble_zero_masked_shuff_mem_mask1(<4 x double> %vec define <4 x double> @test_4xdouble_masked_shuff_mem_mask2(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %vec3, <4 x i64> %mask) { ; GENERIC-LABEL: test_4xdouble_masked_shuff_mem_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[0,1] sched: [6:1.00] ; GENERIC-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -9765,7 +9765,7 @@ define <4 x double> @test_4xdouble_masked_shuff_mem_mask2(<4 x double> %vec1, <4 define <4 x double> @test_4xdouble_zero_masked_shuff_mem_mask2(<4 x double> %vec1, <4 x double>* %vec2p, <4 x i64> %mask) { ; GENERIC-LABEL: test_4xdouble_zero_masked_shuff_mem_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[0,1] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -9798,7 +9798,7 @@ define <4 x double> @test_4xdouble_shuff_mem_mask3(<4 x double> %vec1, <4 x doub define <4 x double> @test_4xdouble_masked_shuff_mem_mask3(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %vec3, <4 x i64> %mask) { ; GENERIC-LABEL: test_4xdouble_masked_shuff_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[2,3] sched: [6:1.00] ; GENERIC-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -9819,7 +9819,7 @@ define <4 x double> @test_4xdouble_masked_shuff_mem_mask3(<4 x double> %vec1, <4 define <4 x double> @test_4xdouble_zero_masked_shuff_mem_mask3(<4 x double> %vec1, <4 x double>* %vec2p, <4 x i64> %mask) { ; GENERIC-LABEL: test_4xdouble_zero_masked_shuff_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[2,3] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -9851,7 +9851,7 @@ define <8 x double> @test_8xdouble_shuff_mask0(<8 x double> %vec1, <8 x double> define <8 x double> @test_8xdouble_masked_shuff_mask0(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %vec3, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xdouble_masked_shuff_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm2 {%k1} = zmm0[6,7,2,3],zmm1[6,7,0,1] sched: [1:1.00] ; GENERIC-NEXT: vmovapd %zmm2, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -9871,7 +9871,7 @@ define <8 x double> @test_8xdouble_masked_shuff_mask0(<8 x double> %vec1, <8 x d define <8 x double> @test_8xdouble_zero_masked_shuff_mask0(<8 x double> %vec1, <8 x double> %vec2, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xdouble_zero_masked_shuff_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[6,7,2,3],zmm1[6,7,0,1] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -9888,7 +9888,7 @@ define <8 x double> @test_8xdouble_zero_masked_shuff_mask0(<8 x double> %vec1, < define <8 x double> @test_8xdouble_masked_shuff_mask1(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %vec3, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xdouble_masked_shuff_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm2 {%k1} = zmm0[0,1,4,5],zmm1[0,1,4,5] sched: [1:1.00] ; GENERIC-NEXT: vmovapd %zmm2, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -9908,7 +9908,7 @@ define <8 x double> @test_8xdouble_masked_shuff_mask1(<8 x double> %vec1, <8 x d define <8 x double> @test_8xdouble_zero_masked_shuff_mask1(<8 x double> %vec1, <8 x double> %vec2, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xdouble_zero_masked_shuff_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,4,5],zmm1[0,1,4,5] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -9925,7 +9925,7 @@ define <8 x double> @test_8xdouble_zero_masked_shuff_mask1(<8 x double> %vec1, < define <8 x double> @test_8xdouble_masked_shuff_mask2(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %vec3, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xdouble_masked_shuff_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm2 {%k1} = zmm0[6,7,4,5],zmm1[4,5,0,1] sched: [1:1.00] ; GENERIC-NEXT: vmovapd %zmm2, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -9945,7 +9945,7 @@ define <8 x double> @test_8xdouble_masked_shuff_mask2(<8 x double> %vec1, <8 x d define <8 x double> @test_8xdouble_zero_masked_shuff_mask2(<8 x double> %vec1, <8 x double> %vec2, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xdouble_zero_masked_shuff_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[6,7,4,5],zmm1[4,5,0,1] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -9975,7 +9975,7 @@ define <8 x double> @test_8xdouble_shuff_mask3(<8 x double> %vec1, <8 x double> define <8 x double> @test_8xdouble_masked_shuff_mask3(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %vec3, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xdouble_masked_shuff_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm2 {%k1} = zmm0[4,5,4,5],zmm1[4,5,2,3] sched: [1:1.00] ; GENERIC-NEXT: vmovapd %zmm2, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -9995,7 +9995,7 @@ define <8 x double> @test_8xdouble_masked_shuff_mask3(<8 x double> %vec1, <8 x d define <8 x double> @test_8xdouble_zero_masked_shuff_mask3(<8 x double> %vec1, <8 x double> %vec2, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xdouble_zero_masked_shuff_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,4,5],zmm1[4,5,2,3] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -10026,7 +10026,7 @@ define <8 x double> @test_8xdouble_shuff_mem_mask0(<8 x double> %vec1, <8 x doub define <8 x double> @test_8xdouble_masked_shuff_mem_mask0(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %vec3, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xdouble_masked_shuff_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm1 {%k1} = zmm0[6,7,0,1],mem[0,1,0,1] sched: [6:1.00] ; GENERIC-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -10047,7 +10047,7 @@ define <8 x double> @test_8xdouble_masked_shuff_mem_mask0(<8 x double> %vec1, <8 define <8 x double> @test_8xdouble_zero_masked_shuff_mem_mask0(<8 x double> %vec1, <8 x double>* %vec2p, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xdouble_zero_masked_shuff_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[6,7,0,1],mem[0,1,0,1] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -10066,7 +10066,7 @@ define <8 x double> @test_8xdouble_zero_masked_shuff_mem_mask0(<8 x double> %vec define <8 x double> @test_8xdouble_masked_shuff_mem_mask1(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %vec3, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xdouble_masked_shuff_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm1 {%k1} = zmm0[6,7,6,7],mem[0,1,2,3] sched: [6:1.00] ; GENERIC-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -10087,7 +10087,7 @@ define <8 x double> @test_8xdouble_masked_shuff_mem_mask1(<8 x double> %vec1, <8 define <8 x double> @test_8xdouble_zero_masked_shuff_mem_mask1(<8 x double> %vec1, <8 x double>* %vec2p, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xdouble_zero_masked_shuff_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[6,7,6,7],mem[0,1,2,3] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -10106,7 +10106,7 @@ define <8 x double> @test_8xdouble_zero_masked_shuff_mem_mask1(<8 x double> %vec define <8 x double> @test_8xdouble_masked_shuff_mem_mask2(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %vec3, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xdouble_masked_shuff_mem_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm1 {%k1} = zmm0[0,1,2,3],mem[0,1,4,5] sched: [6:1.00] ; GENERIC-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -10127,7 +10127,7 @@ define <8 x double> @test_8xdouble_masked_shuff_mem_mask2(<8 x double> %vec1, <8 define <8 x double> @test_8xdouble_zero_masked_shuff_mem_mask2(<8 x double> %vec1, <8 x double>* %vec2p, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xdouble_zero_masked_shuff_mem_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3],mem[0,1,4,5] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -10160,7 +10160,7 @@ define <8 x double> @test_8xdouble_shuff_mem_mask3(<8 x double> %vec1, <8 x doub define <8 x double> @test_8xdouble_masked_shuff_mem_mask3(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %vec3, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xdouble_masked_shuff_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm1 {%k1} = zmm0[2,3,0,1],mem[4,5,0,1] sched: [6:1.00] ; GENERIC-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -10181,7 +10181,7 @@ define <8 x double> @test_8xdouble_masked_shuff_mem_mask3(<8 x double> %vec1, <8 define <8 x double> @test_8xdouble_zero_masked_shuff_mem_mask3(<8 x double> %vec1, <8 x double>* %vec2p, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xdouble_zero_masked_shuff_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[2,3,0,1],mem[4,5,0,1] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -10213,7 +10213,7 @@ define <8 x i32> @test_8xi32_shuff_mask0(<8 x i32> %vec1, <8 x i32> %vec2) { define <8 x i32> @test_8xi32_masked_shuff_mask0(<8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> %vec3, <8 x i32> %mask) { ; GENERIC-LABEL: test_8xi32_masked_shuff_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm3, %ymm3, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm3, %ymm3, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm2 {%k1} = ymm0[4,5,6,7],ymm1[4,5,6,7] sched: [1:1.00] ; GENERIC-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -10233,7 +10233,7 @@ define <8 x i32> @test_8xi32_masked_shuff_mask0(<8 x i32> %vec1, <8 x i32> %vec2 define <8 x i32> @test_8xi32_zero_masked_shuff_mask0(<8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> %mask) { ; GENERIC-LABEL: test_8xi32_zero_masked_shuff_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],ymm1[4,5,6,7] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -10250,7 +10250,7 @@ define <8 x i32> @test_8xi32_zero_masked_shuff_mask0(<8 x i32> %vec1, <8 x i32> define <8 x i32> @test_8xi32_masked_shuff_mask1(<8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> %vec3, <8 x i32> %mask) { ; GENERIC-LABEL: test_8xi32_masked_shuff_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm3, %ymm3, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm3, %ymm3, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm2 {%k1} = ymm0[4,5,6,7],ymm1[0,1,2,3] sched: [1:1.00] ; GENERIC-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -10270,7 +10270,7 @@ define <8 x i32> @test_8xi32_masked_shuff_mask1(<8 x i32> %vec1, <8 x i32> %vec2 define <8 x i32> @test_8xi32_zero_masked_shuff_mask1(<8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> %mask) { ; GENERIC-LABEL: test_8xi32_zero_masked_shuff_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],ymm1[0,1,2,3] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -10287,7 +10287,7 @@ define <8 x i32> @test_8xi32_zero_masked_shuff_mask1(<8 x i32> %vec1, <8 x i32> define <8 x i32> @test_8xi32_masked_shuff_mask2(<8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> %vec3, <8 x i32> %mask) { ; GENERIC-LABEL: test_8xi32_masked_shuff_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm3, %ymm3, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm3, %ymm3, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm2 {%k1} = ymm0[4,5,6,7],ymm1[4,5,6,7] sched: [1:1.00] ; GENERIC-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -10307,7 +10307,7 @@ define <8 x i32> @test_8xi32_masked_shuff_mask2(<8 x i32> %vec1, <8 x i32> %vec2 define <8 x i32> @test_8xi32_zero_masked_shuff_mask2(<8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> %mask) { ; GENERIC-LABEL: test_8xi32_zero_masked_shuff_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],ymm1[4,5,6,7] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -10337,7 +10337,7 @@ define <8 x i32> @test_8xi32_shuff_mask3(<8 x i32> %vec1, <8 x i32> %vec2) { define <8 x i32> @test_8xi32_masked_shuff_mask3(<8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> %vec3, <8 x i32> %mask) { ; GENERIC-LABEL: test_8xi32_masked_shuff_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm3, %ymm3, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm3, %ymm3, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm2 {%k1} = ymm0[4,5,6,7],ymm1[0,1,2,3] sched: [1:1.00] ; GENERIC-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -10357,7 +10357,7 @@ define <8 x i32> @test_8xi32_masked_shuff_mask3(<8 x i32> %vec1, <8 x i32> %vec2 define <8 x i32> @test_8xi32_zero_masked_shuff_mask3(<8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> %mask) { ; GENERIC-LABEL: test_8xi32_zero_masked_shuff_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],ymm1[0,1,2,3] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -10388,7 +10388,7 @@ define <8 x i32> @test_8xi32_shuff_mem_mask0(<8 x i32> %vec1, <8 x i32>* %vec2p) define <8 x i32> @test_8xi32_masked_shuff_mem_mask0(<8 x i32> %vec1, <8 x i32>* %vec2p, <8 x i32> %vec3, <8 x i32> %mask) { ; GENERIC-LABEL: test_8xi32_masked_shuff_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[4,5,6,7] sched: [6:1.00] ; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -10409,7 +10409,7 @@ define <8 x i32> @test_8xi32_masked_shuff_mem_mask0(<8 x i32> %vec1, <8 x i32>* define <8 x i32> @test_8xi32_zero_masked_shuff_mem_mask0(<8 x i32> %vec1, <8 x i32>* %vec2p, <8 x i32> %mask) { ; GENERIC-LABEL: test_8xi32_zero_masked_shuff_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[4,5,6,7] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -10428,7 +10428,7 @@ define <8 x i32> @test_8xi32_zero_masked_shuff_mem_mask0(<8 x i32> %vec1, <8 x i define <8 x i32> @test_8xi32_masked_shuff_mem_mask1(<8 x i32> %vec1, <8 x i32>* %vec2p, <8 x i32> %vec3, <8 x i32> %mask) { ; GENERIC-LABEL: test_8xi32_masked_shuff_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [6:1.00] ; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -10449,7 +10449,7 @@ define <8 x i32> @test_8xi32_masked_shuff_mem_mask1(<8 x i32> %vec1, <8 x i32>* define <8 x i32> @test_8xi32_zero_masked_shuff_mem_mask1(<8 x i32> %vec1, <8 x i32>* %vec2p, <8 x i32> %mask) { ; GENERIC-LABEL: test_8xi32_zero_masked_shuff_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -10468,7 +10468,7 @@ define <8 x i32> @test_8xi32_zero_masked_shuff_mem_mask1(<8 x i32> %vec1, <8 x i define <8 x i32> @test_8xi32_masked_shuff_mem_mask2(<8 x i32> %vec1, <8 x i32>* %vec2p, <8 x i32> %vec3, <8 x i32> %mask) { ; GENERIC-LABEL: test_8xi32_masked_shuff_mem_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [6:1.00] ; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -10489,7 +10489,7 @@ define <8 x i32> @test_8xi32_masked_shuff_mem_mask2(<8 x i32> %vec1, <8 x i32>* define <8 x i32> @test_8xi32_zero_masked_shuff_mem_mask2(<8 x i32> %vec1, <8 x i32>* %vec2p, <8 x i32> %mask) { ; GENERIC-LABEL: test_8xi32_zero_masked_shuff_mem_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -10522,7 +10522,7 @@ define <8 x i32> @test_8xi32_shuff_mem_mask3(<8 x i32> %vec1, <8 x i32>* %vec2p) define <8 x i32> @test_8xi32_masked_shuff_mem_mask3(<8 x i32> %vec1, <8 x i32>* %vec2p, <8 x i32> %vec3, <8 x i32> %mask) { ; GENERIC-LABEL: test_8xi32_masked_shuff_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [6:1.00] ; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -10543,7 +10543,7 @@ define <8 x i32> @test_8xi32_masked_shuff_mem_mask3(<8 x i32> %vec1, <8 x i32>* define <8 x i32> @test_8xi32_zero_masked_shuff_mem_mask3(<8 x i32> %vec1, <8 x i32>* %vec2p, <8 x i32> %mask) { ; GENERIC-LABEL: test_8xi32_zero_masked_shuff_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -10575,7 +10575,7 @@ define <16 x i32> @test_16xi32_shuff_mask0(<16 x i32> %vec1, <16 x i32> %vec2) { define <16 x i32> @test_16xi32_masked_shuff_mask0(<16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> %vec3, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xi32_masked_shuff_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm2 {%k1} = zmm0[4,5,6,7,4,5,6,7],zmm1[4,5,6,7,12,13,14,15] sched: [1:1.00] ; GENERIC-NEXT: vmovdqa64 %zmm2, %zmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -10595,7 +10595,7 @@ define <16 x i32> @test_16xi32_masked_shuff_mask0(<16 x i32> %vec1, <16 x i32> % define <16 x i32> @test_16xi32_zero_masked_shuff_mask0(<16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xi32_zero_masked_shuff_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,6,7,4,5,6,7],zmm1[4,5,6,7,12,13,14,15] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -10612,7 +10612,7 @@ define <16 x i32> @test_16xi32_zero_masked_shuff_mask0(<16 x i32> %vec1, <16 x i define <16 x i32> @test_16xi32_masked_shuff_mask1(<16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> %vec3, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xi32_masked_shuff_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm2 {%k1} = zmm0[8,9,10,11,8,9,10,11],zmm1[8,9,10,11,4,5,6,7] sched: [1:1.00] ; GENERIC-NEXT: vmovdqa64 %zmm2, %zmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -10632,7 +10632,7 @@ define <16 x i32> @test_16xi32_masked_shuff_mask1(<16 x i32> %vec1, <16 x i32> % define <16 x i32> @test_16xi32_zero_masked_shuff_mask1(<16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xi32_zero_masked_shuff_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[8,9,10,11,8,9,10,11],zmm1[8,9,10,11,4,5,6,7] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -10649,7 +10649,7 @@ define <16 x i32> @test_16xi32_zero_masked_shuff_mask1(<16 x i32> %vec1, <16 x i define <16 x i32> @test_16xi32_masked_shuff_mask2(<16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> %vec3, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xi32_masked_shuff_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm2 {%k1} = zmm0[4,5,6,7,8,9,10,11],zmm1[0,1,2,3,0,1,2,3] sched: [1:1.00] ; GENERIC-NEXT: vmovdqa64 %zmm2, %zmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -10669,7 +10669,7 @@ define <16 x i32> @test_16xi32_masked_shuff_mask2(<16 x i32> %vec1, <16 x i32> % define <16 x i32> @test_16xi32_zero_masked_shuff_mask2(<16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xi32_zero_masked_shuff_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,6,7,8,9,10,11],zmm1[0,1,2,3,0,1,2,3] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -10699,7 +10699,7 @@ define <16 x i32> @test_16xi32_shuff_mask3(<16 x i32> %vec1, <16 x i32> %vec2) { define <16 x i32> @test_16xi32_masked_shuff_mask3(<16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> %vec3, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xi32_masked_shuff_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm2 {%k1} = zmm0[4,5,6,7,0,1,2,3],zmm1[8,9,10,11,4,5,6,7] sched: [1:1.00] ; GENERIC-NEXT: vmovdqa64 %zmm2, %zmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -10719,7 +10719,7 @@ define <16 x i32> @test_16xi32_masked_shuff_mask3(<16 x i32> %vec1, <16 x i32> % define <16 x i32> @test_16xi32_zero_masked_shuff_mask3(<16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xi32_zero_masked_shuff_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,6,7,0,1,2,3],zmm1[8,9,10,11,4,5,6,7] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -10750,7 +10750,7 @@ define <16 x i32> @test_16xi32_shuff_mem_mask0(<16 x i32> %vec1, <16 x i32>* %ve define <16 x i32> @test_16xi32_masked_shuff_mem_mask0(<16 x i32> %vec1, <16 x i32>* %vec2p, <16 x i32> %vec3, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xi32_masked_shuff_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm1 {%k1} = zmm0[8,9,10,11,4,5,6,7],mem[8,9,10,11,0,1,2,3] sched: [6:1.00] ; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -10771,7 +10771,7 @@ define <16 x i32> @test_16xi32_masked_shuff_mem_mask0(<16 x i32> %vec1, <16 x i3 define <16 x i32> @test_16xi32_zero_masked_shuff_mem_mask0(<16 x i32> %vec1, <16 x i32>* %vec2p, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xi32_zero_masked_shuff_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[8,9,10,11,4,5,6,7],mem[8,9,10,11,0,1,2,3] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -10790,7 +10790,7 @@ define <16 x i32> @test_16xi32_zero_masked_shuff_mem_mask0(<16 x i32> %vec1, <16 define <16 x i32> @test_16xi32_masked_shuff_mem_mask1(<16 x i32> %vec1, <16 x i32>* %vec2p, <16 x i32> %vec3, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xi32_masked_shuff_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm1 {%k1} = zmm0[4,5,6,7,4,5,6,7],mem[0,1,2,3,8,9,10,11] sched: [6:1.00] ; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -10811,7 +10811,7 @@ define <16 x i32> @test_16xi32_masked_shuff_mem_mask1(<16 x i32> %vec1, <16 x i3 define <16 x i32> @test_16xi32_zero_masked_shuff_mem_mask1(<16 x i32> %vec1, <16 x i32>* %vec2p, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xi32_zero_masked_shuff_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,6,7,4,5,6,7],mem[0,1,2,3,8,9,10,11] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -10830,7 +10830,7 @@ define <16 x i32> @test_16xi32_zero_masked_shuff_mem_mask1(<16 x i32> %vec1, <16 define <16 x i32> @test_16xi32_masked_shuff_mem_mask2(<16 x i32> %vec1, <16 x i32>* %vec2p, <16 x i32> %vec3, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xi32_masked_shuff_mem_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm1 {%k1} = zmm0[4,5,6,7,8,9,10,11],mem[12,13,14,15,12,13,14,15] sched: [6:1.00] ; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -10851,7 +10851,7 @@ define <16 x i32> @test_16xi32_masked_shuff_mem_mask2(<16 x i32> %vec1, <16 x i3 define <16 x i32> @test_16xi32_zero_masked_shuff_mem_mask2(<16 x i32> %vec1, <16 x i32>* %vec2p, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xi32_zero_masked_shuff_mem_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,6,7,8,9,10,11],mem[12,13,14,15,12,13,14,15] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -10884,7 +10884,7 @@ define <16 x i32> @test_16xi32_shuff_mem_mask3(<16 x i32> %vec1, <16 x i32>* %ve define <16 x i32> @test_16xi32_masked_shuff_mem_mask3(<16 x i32> %vec1, <16 x i32>* %vec2p, <16 x i32> %vec3, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xi32_masked_shuff_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm1 {%k1} = zmm0[4,5,6,7,4,5,6,7],mem[4,5,6,7,12,13,14,15] sched: [6:1.00] ; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -10905,7 +10905,7 @@ define <16 x i32> @test_16xi32_masked_shuff_mem_mask3(<16 x i32> %vec1, <16 x i3 define <16 x i32> @test_16xi32_zero_masked_shuff_mem_mask3(<16 x i32> %vec1, <16 x i32>* %vec2p, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xi32_zero_masked_shuff_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,6,7,4,5,6,7],mem[4,5,6,7,12,13,14,15] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -10937,7 +10937,7 @@ define <4 x i64> @test_4xi64_shuff_mask0(<4 x i64> %vec1, <4 x i64> %vec2) { define <4 x i64> @test_4xi64_masked_shuff_mask0(<4 x i64> %vec1, <4 x i64> %vec2, <4 x i64> %vec3, <4 x i64> %mask) { ; GENERIC-LABEL: test_4xi64_masked_shuff_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm3, %ymm3, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm3, %ymm3, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm2 {%k1} = ymm0[2,3],ymm1[0,1] sched: [1:1.00] ; GENERIC-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -10957,7 +10957,7 @@ define <4 x i64> @test_4xi64_masked_shuff_mask0(<4 x i64> %vec1, <4 x i64> %vec2 define <4 x i64> @test_4xi64_zero_masked_shuff_mask0(<4 x i64> %vec1, <4 x i64> %vec2, <4 x i64> %mask) { ; GENERIC-LABEL: test_4xi64_zero_masked_shuff_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],ymm1[0,1] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -10974,7 +10974,7 @@ define <4 x i64> @test_4xi64_zero_masked_shuff_mask0(<4 x i64> %vec1, <4 x i64> define <4 x i64> @test_4xi64_masked_shuff_mask1(<4 x i64> %vec1, <4 x i64> %vec2, <4 x i64> %vec3, <4 x i64> %mask) { ; GENERIC-LABEL: test_4xi64_masked_shuff_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm3, %ymm3, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm3, %ymm3, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm2 {%k1} = ymm0[2,3],ymm1[2,3] sched: [1:1.00] ; GENERIC-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -10994,7 +10994,7 @@ define <4 x i64> @test_4xi64_masked_shuff_mask1(<4 x i64> %vec1, <4 x i64> %vec2 define <4 x i64> @test_4xi64_zero_masked_shuff_mask1(<4 x i64> %vec1, <4 x i64> %vec2, <4 x i64> %mask) { ; GENERIC-LABEL: test_4xi64_zero_masked_shuff_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],ymm1[2,3] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -11011,7 +11011,7 @@ define <4 x i64> @test_4xi64_zero_masked_shuff_mask1(<4 x i64> %vec1, <4 x i64> define <4 x i64> @test_4xi64_masked_shuff_mask2(<4 x i64> %vec1, <4 x i64> %vec2, <4 x i64> %vec3, <4 x i64> %mask) { ; GENERIC-LABEL: test_4xi64_masked_shuff_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm3, %ymm3, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm3, %ymm3, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm2 {%k1} = ymm0[2,3],ymm1[0,1] sched: [1:1.00] ; GENERIC-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -11031,7 +11031,7 @@ define <4 x i64> @test_4xi64_masked_shuff_mask2(<4 x i64> %vec1, <4 x i64> %vec2 define <4 x i64> @test_4xi64_zero_masked_shuff_mask2(<4 x i64> %vec1, <4 x i64> %vec2, <4 x i64> %mask) { ; GENERIC-LABEL: test_4xi64_zero_masked_shuff_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],ymm1[0,1] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -11061,7 +11061,7 @@ define <4 x i64> @test_4xi64_shuff_mask3(<4 x i64> %vec1, <4 x i64> %vec2) { define <4 x i64> @test_4xi64_masked_shuff_mask3(<4 x i64> %vec1, <4 x i64> %vec2, <4 x i64> %vec3, <4 x i64> %mask) { ; GENERIC-LABEL: test_4xi64_masked_shuff_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm3, %ymm3, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm3, %ymm3, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm2 {%k1} = ymm0[2,3],ymm1[2,3] sched: [1:1.00] ; GENERIC-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -11081,7 +11081,7 @@ define <4 x i64> @test_4xi64_masked_shuff_mask3(<4 x i64> %vec1, <4 x i64> %vec2 define <4 x i64> @test_4xi64_zero_masked_shuff_mask3(<4 x i64> %vec1, <4 x i64> %vec2, <4 x i64> %mask) { ; GENERIC-LABEL: test_4xi64_zero_masked_shuff_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],ymm1[2,3] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -11112,7 +11112,7 @@ define <4 x i64> @test_4xi64_shuff_mem_mask0(<4 x i64> %vec1, <4 x i64>* %vec2p) define <4 x i64> @test_4xi64_masked_shuff_mem_mask0(<4 x i64> %vec1, <4 x i64>* %vec2p, <4 x i64> %vec3, <4 x i64> %mask) { ; GENERIC-LABEL: test_4xi64_masked_shuff_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[2,3] sched: [6:1.00] ; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -11133,7 +11133,7 @@ define <4 x i64> @test_4xi64_masked_shuff_mem_mask0(<4 x i64> %vec1, <4 x i64>* define <4 x i64> @test_4xi64_zero_masked_shuff_mem_mask0(<4 x i64> %vec1, <4 x i64>* %vec2p, <4 x i64> %mask) { ; GENERIC-LABEL: test_4xi64_zero_masked_shuff_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[2,3] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -11152,7 +11152,7 @@ define <4 x i64> @test_4xi64_zero_masked_shuff_mem_mask0(<4 x i64> %vec1, <4 x i define <4 x i64> @test_4xi64_masked_shuff_mem_mask1(<4 x i64> %vec1, <4 x i64>* %vec2p, <4 x i64> %vec3, <4 x i64> %mask) { ; GENERIC-LABEL: test_4xi64_masked_shuff_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[0,1] sched: [6:1.00] ; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -11173,7 +11173,7 @@ define <4 x i64> @test_4xi64_masked_shuff_mem_mask1(<4 x i64> %vec1, <4 x i64>* define <4 x i64> @test_4xi64_zero_masked_shuff_mem_mask1(<4 x i64> %vec1, <4 x i64>* %vec2p, <4 x i64> %mask) { ; GENERIC-LABEL: test_4xi64_zero_masked_shuff_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[0,1] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -11192,7 +11192,7 @@ define <4 x i64> @test_4xi64_zero_masked_shuff_mem_mask1(<4 x i64> %vec1, <4 x i define <4 x i64> @test_4xi64_masked_shuff_mem_mask2(<4 x i64> %vec1, <4 x i64>* %vec2p, <4 x i64> %vec3, <4 x i64> %mask) { ; GENERIC-LABEL: test_4xi64_masked_shuff_mem_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[0,1] sched: [6:1.00] ; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -11213,7 +11213,7 @@ define <4 x i64> @test_4xi64_masked_shuff_mem_mask2(<4 x i64> %vec1, <4 x i64>* define <4 x i64> @test_4xi64_zero_masked_shuff_mem_mask2(<4 x i64> %vec1, <4 x i64>* %vec2p, <4 x i64> %mask) { ; GENERIC-LABEL: test_4xi64_zero_masked_shuff_mem_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[0,1] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -11246,7 +11246,7 @@ define <4 x i64> @test_4xi64_shuff_mem_mask3(<4 x i64> %vec1, <4 x i64>* %vec2p) define <4 x i64> @test_4xi64_masked_shuff_mem_mask3(<4 x i64> %vec1, <4 x i64>* %vec2p, <4 x i64> %vec3, <4 x i64> %mask) { ; GENERIC-LABEL: test_4xi64_masked_shuff_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[2,3] sched: [6:1.00] ; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -11267,7 +11267,7 @@ define <4 x i64> @test_4xi64_masked_shuff_mem_mask3(<4 x i64> %vec1, <4 x i64>* define <4 x i64> @test_4xi64_zero_masked_shuff_mem_mask3(<4 x i64> %vec1, <4 x i64>* %vec2p, <4 x i64> %mask) { ; GENERIC-LABEL: test_4xi64_zero_masked_shuff_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[2,3] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -11299,7 +11299,7 @@ define <8 x i64> @test_8xi64_shuff_mask0(<8 x i64> %vec1, <8 x i64> %vec2) { define <8 x i64> @test_8xi64_masked_shuff_mask0(<8 x i64> %vec1, <8 x i64> %vec2, <8 x i64> %vec3, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xi64_masked_shuff_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm2 {%k1} = zmm0[4,5,4,5],zmm1[4,5,4,5] sched: [1:1.00] ; GENERIC-NEXT: vmovdqa64 %zmm2, %zmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -11319,7 +11319,7 @@ define <8 x i64> @test_8xi64_masked_shuff_mask0(<8 x i64> %vec1, <8 x i64> %vec2 define <8 x i64> @test_8xi64_zero_masked_shuff_mask0(<8 x i64> %vec1, <8 x i64> %vec2, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xi64_zero_masked_shuff_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,4,5],zmm1[4,5,4,5] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -11336,7 +11336,7 @@ define <8 x i64> @test_8xi64_zero_masked_shuff_mask0(<8 x i64> %vec1, <8 x i64> define <8 x i64> @test_8xi64_masked_shuff_mask1(<8 x i64> %vec1, <8 x i64> %vec2, <8 x i64> %vec3, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xi64_masked_shuff_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm2 {%k1} = zmm0[6,7,4,5],zmm1[2,3,4,5] sched: [1:1.00] ; GENERIC-NEXT: vmovdqa64 %zmm2, %zmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -11356,7 +11356,7 @@ define <8 x i64> @test_8xi64_masked_shuff_mask1(<8 x i64> %vec1, <8 x i64> %vec2 define <8 x i64> @test_8xi64_zero_masked_shuff_mask1(<8 x i64> %vec1, <8 x i64> %vec2, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xi64_zero_masked_shuff_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[6,7,4,5],zmm1[2,3,4,5] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -11373,7 +11373,7 @@ define <8 x i64> @test_8xi64_zero_masked_shuff_mask1(<8 x i64> %vec1, <8 x i64> define <8 x i64> @test_8xi64_masked_shuff_mask2(<8 x i64> %vec1, <8 x i64> %vec2, <8 x i64> %vec3, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xi64_masked_shuff_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm2 {%k1} = zmm0[0,1,4,5],zmm1[0,1,0,1] sched: [1:1.00] ; GENERIC-NEXT: vmovdqa64 %zmm2, %zmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -11393,7 +11393,7 @@ define <8 x i64> @test_8xi64_masked_shuff_mask2(<8 x i64> %vec1, <8 x i64> %vec2 define <8 x i64> @test_8xi64_zero_masked_shuff_mask2(<8 x i64> %vec1, <8 x i64> %vec2, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xi64_zero_masked_shuff_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,4,5],zmm1[0,1,0,1] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -11423,7 +11423,7 @@ define <8 x i64> @test_8xi64_shuff_mask3(<8 x i64> %vec1, <8 x i64> %vec2) { define <8 x i64> @test_8xi64_masked_shuff_mask3(<8 x i64> %vec1, <8 x i64> %vec2, <8 x i64> %vec3, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xi64_masked_shuff_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm2 {%k1} = zmm0[2,3,6,7],zmm1[4,5,2,3] sched: [1:1.00] ; GENERIC-NEXT: vmovdqa64 %zmm2, %zmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -11443,7 +11443,7 @@ define <8 x i64> @test_8xi64_masked_shuff_mask3(<8 x i64> %vec1, <8 x i64> %vec2 define <8 x i64> @test_8xi64_zero_masked_shuff_mask3(<8 x i64> %vec1, <8 x i64> %vec2, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xi64_zero_masked_shuff_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[2,3,6,7],zmm1[4,5,2,3] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -11474,7 +11474,7 @@ define <8 x i64> @test_8xi64_shuff_mem_mask0(<8 x i64> %vec1, <8 x i64>* %vec2p) define <8 x i64> @test_8xi64_masked_shuff_mem_mask0(<8 x i64> %vec1, <8 x i64>* %vec2p, <8 x i64> %vec3, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xi64_masked_shuff_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm1 {%k1} = zmm0[2,3,2,3],mem[4,5,2,3] sched: [6:1.00] ; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -11495,7 +11495,7 @@ define <8 x i64> @test_8xi64_masked_shuff_mem_mask0(<8 x i64> %vec1, <8 x i64>* define <8 x i64> @test_8xi64_zero_masked_shuff_mem_mask0(<8 x i64> %vec1, <8 x i64>* %vec2p, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xi64_zero_masked_shuff_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[2,3,2,3],mem[4,5,2,3] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -11514,7 +11514,7 @@ define <8 x i64> @test_8xi64_zero_masked_shuff_mem_mask0(<8 x i64> %vec1, <8 x i define <8 x i64> @test_8xi64_masked_shuff_mem_mask1(<8 x i64> %vec1, <8 x i64>* %vec2p, <8 x i64> %vec3, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xi64_masked_shuff_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm1 {%k1} = zmm0[2,3,0,1],mem[0,1,0,1] sched: [6:1.00] ; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -11535,7 +11535,7 @@ define <8 x i64> @test_8xi64_masked_shuff_mem_mask1(<8 x i64> %vec1, <8 x i64>* define <8 x i64> @test_8xi64_zero_masked_shuff_mem_mask1(<8 x i64> %vec1, <8 x i64>* %vec2p, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xi64_zero_masked_shuff_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[2,3,0,1],mem[0,1,0,1] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -11554,7 +11554,7 @@ define <8 x i64> @test_8xi64_zero_masked_shuff_mem_mask1(<8 x i64> %vec1, <8 x i define <8 x i64> @test_8xi64_masked_shuff_mem_mask2(<8 x i64> %vec1, <8 x i64>* %vec2p, <8 x i64> %vec3, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xi64_masked_shuff_mem_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm1 {%k1} = zmm0[4,5,0,1],mem[2,3,2,3] sched: [6:1.00] ; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -11575,7 +11575,7 @@ define <8 x i64> @test_8xi64_masked_shuff_mem_mask2(<8 x i64> %vec1, <8 x i64>* define <8 x i64> @test_8xi64_zero_masked_shuff_mem_mask2(<8 x i64> %vec1, <8 x i64>* %vec2p, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xi64_zero_masked_shuff_mem_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,0,1],mem[2,3,2,3] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -11608,7 +11608,7 @@ define <8 x i64> @test_8xi64_shuff_mem_mask3(<8 x i64> %vec1, <8 x i64>* %vec2p) define <8 x i64> @test_8xi64_masked_shuff_mem_mask3(<8 x i64> %vec1, <8 x i64>* %vec2p, <8 x i64> %vec3, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xi64_masked_shuff_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm1 {%k1} = zmm0[2,3,0,1],mem[6,7,2,3] sched: [6:1.00] ; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -11629,7 +11629,7 @@ define <8 x i64> @test_8xi64_masked_shuff_mem_mask3(<8 x i64> %vec1, <8 x i64>* define <8 x i64> @test_8xi64_zero_masked_shuff_mem_mask3(<8 x i64> %vec1, <8 x i64>* %vec2p, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xi64_zero_masked_shuff_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[2,3,0,1],mem[6,7,2,3] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -11661,7 +11661,7 @@ define <4 x float> @test_4xfloat_unpack_low_mask0(<4 x float> %vec1, <4 x float> define <4 x float> @test_4xfloat_masked_unpack_low_mask0(<4 x float> %vec1, <4 x float> %vec2, <4 x float> %vec3, <4 x i32> %mask) { ; GENERIC-LABEL: test_4xfloat_masked_unpack_low_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %xmm3, %xmm3, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %xmm3, %xmm3, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpcklps {{.*#+}} xmm2 {%k1} = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00] ; GENERIC-NEXT: vmovaps %xmm2, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -11681,7 +11681,7 @@ define <4 x float> @test_4xfloat_masked_unpack_low_mask0(<4 x float> %vec1, <4 x define <4 x float> @test_4xfloat_zero_masked_unpack_low_mask0(<4 x float> %vec1, <4 x float> %vec2, <4 x i32> %mask) { ; GENERIC-LABEL: test_4xfloat_zero_masked_unpack_low_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -11698,7 +11698,7 @@ define <4 x float> @test_4xfloat_zero_masked_unpack_low_mask0(<4 x float> %vec1, define <4 x float> @test_4xfloat_masked_unpack_low_mask1(<4 x float> %vec1, <4 x float> %vec2, <4 x float> %vec3, <4 x i32> %mask) { ; GENERIC-LABEL: test_4xfloat_masked_unpack_low_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %xmm3, %xmm3, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %xmm3, %xmm3, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpcklps {{.*#+}} xmm2 {%k1} = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00] ; GENERIC-NEXT: vmovaps %xmm2, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -11718,7 +11718,7 @@ define <4 x float> @test_4xfloat_masked_unpack_low_mask1(<4 x float> %vec1, <4 x define <4 x float> @test_4xfloat_zero_masked_unpack_low_mask1(<4 x float> %vec1, <4 x float> %vec2, <4 x i32> %mask) { ; GENERIC-LABEL: test_4xfloat_zero_masked_unpack_low_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -11735,7 +11735,7 @@ define <4 x float> @test_4xfloat_zero_masked_unpack_low_mask1(<4 x float> %vec1, define <4 x float> @test_4xfloat_masked_unpack_low_mask2(<4 x float> %vec1, <4 x float> %vec2, <4 x float> %vec3, <4 x i32> %mask) { ; GENERIC-LABEL: test_4xfloat_masked_unpack_low_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %xmm3, %xmm3, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %xmm3, %xmm3, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpcklps {{.*#+}} xmm2 {%k1} = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00] ; GENERIC-NEXT: vmovaps %xmm2, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -11755,7 +11755,7 @@ define <4 x float> @test_4xfloat_masked_unpack_low_mask2(<4 x float> %vec1, <4 x define <4 x float> @test_4xfloat_zero_masked_unpack_low_mask2(<4 x float> %vec1, <4 x float> %vec2, <4 x i32> %mask) { ; GENERIC-LABEL: test_4xfloat_zero_masked_unpack_low_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -11785,7 +11785,7 @@ define <4 x float> @test_4xfloat_unpack_low_mask3(<4 x float> %vec1, <4 x float> define <4 x float> @test_4xfloat_masked_unpack_low_mask3(<4 x float> %vec1, <4 x float> %vec2, <4 x float> %vec3, <4 x i32> %mask) { ; GENERIC-LABEL: test_4xfloat_masked_unpack_low_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %xmm3, %xmm3, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %xmm3, %xmm3, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpcklps {{.*#+}} xmm2 {%k1} = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00] ; GENERIC-NEXT: vmovaps %xmm2, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -11805,7 +11805,7 @@ define <4 x float> @test_4xfloat_masked_unpack_low_mask3(<4 x float> %vec1, <4 x define <4 x float> @test_4xfloat_zero_masked_unpack_low_mask3(<4 x float> %vec1, <4 x float> %vec2, <4 x i32> %mask) { ; GENERIC-LABEL: test_4xfloat_zero_masked_unpack_low_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -11836,7 +11836,7 @@ define <4 x float> @test_4xfloat_unpack_low_mem_mask0(<4 x float> %vec1, <4 x fl define <4 x float> @test_4xfloat_masked_unpack_low_mem_mask0(<4 x float> %vec1, <4 x float>* %vec2p, <4 x float> %vec3, <4 x i32> %mask) { ; GENERIC-LABEL: test_4xfloat_masked_unpack_low_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpcklps {{.*#+}} xmm1 {%k1} = xmm0[0],mem[0],xmm0[1],mem[1] sched: [6:1.00] ; GENERIC-NEXT: vmovaps %xmm1, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -11857,7 +11857,7 @@ define <4 x float> @test_4xfloat_masked_unpack_low_mem_mask0(<4 x float> %vec1, define <4 x float> @test_4xfloat_zero_masked_unpack_low_mem_mask0(<4 x float> %vec1, <4 x float>* %vec2p, <4 x i32> %mask) { ; GENERIC-LABEL: test_4xfloat_zero_masked_unpack_low_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],mem[0],xmm0[1],mem[1] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -11876,7 +11876,7 @@ define <4 x float> @test_4xfloat_zero_masked_unpack_low_mem_mask0(<4 x float> %v define <4 x float> @test_4xfloat_masked_unpack_low_mem_mask1(<4 x float> %vec1, <4 x float>* %vec2p, <4 x float> %vec3, <4 x i32> %mask) { ; GENERIC-LABEL: test_4xfloat_masked_unpack_low_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpcklps {{.*#+}} xmm1 {%k1} = xmm0[0],mem[0],xmm0[1],mem[1] sched: [6:1.00] ; GENERIC-NEXT: vmovaps %xmm1, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -11897,7 +11897,7 @@ define <4 x float> @test_4xfloat_masked_unpack_low_mem_mask1(<4 x float> %vec1, define <4 x float> @test_4xfloat_zero_masked_unpack_low_mem_mask1(<4 x float> %vec1, <4 x float>* %vec2p, <4 x i32> %mask) { ; GENERIC-LABEL: test_4xfloat_zero_masked_unpack_low_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],mem[0],xmm0[1],mem[1] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -11916,7 +11916,7 @@ define <4 x float> @test_4xfloat_zero_masked_unpack_low_mem_mask1(<4 x float> %v define <4 x float> @test_4xfloat_masked_unpack_low_mem_mask2(<4 x float> %vec1, <4 x float>* %vec2p, <4 x float> %vec3, <4 x i32> %mask) { ; GENERIC-LABEL: test_4xfloat_masked_unpack_low_mem_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpcklps {{.*#+}} xmm1 {%k1} = xmm0[0],mem[0],xmm0[1],mem[1] sched: [6:1.00] ; GENERIC-NEXT: vmovaps %xmm1, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -11937,7 +11937,7 @@ define <4 x float> @test_4xfloat_masked_unpack_low_mem_mask2(<4 x float> %vec1, define <4 x float> @test_4xfloat_zero_masked_unpack_low_mem_mask2(<4 x float> %vec1, <4 x float>* %vec2p, <4 x i32> %mask) { ; GENERIC-LABEL: test_4xfloat_zero_masked_unpack_low_mem_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],mem[0],xmm0[1],mem[1] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -11970,7 +11970,7 @@ define <4 x float> @test_4xfloat_unpack_low_mem_mask3(<4 x float> %vec1, <4 x fl define <4 x float> @test_4xfloat_masked_unpack_low_mem_mask3(<4 x float> %vec1, <4 x float>* %vec2p, <4 x float> %vec3, <4 x i32> %mask) { ; GENERIC-LABEL: test_4xfloat_masked_unpack_low_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpcklps {{.*#+}} xmm1 {%k1} = xmm0[0],mem[0],xmm0[1],mem[1] sched: [6:1.00] ; GENERIC-NEXT: vmovaps %xmm1, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -11991,7 +11991,7 @@ define <4 x float> @test_4xfloat_masked_unpack_low_mem_mask3(<4 x float> %vec1, define <4 x float> @test_4xfloat_zero_masked_unpack_low_mem_mask3(<4 x float> %vec1, <4 x float>* %vec2p, <4 x i32> %mask) { ; GENERIC-LABEL: test_4xfloat_zero_masked_unpack_low_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],mem[0],xmm0[1],mem[1] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -12023,7 +12023,7 @@ define <8 x float> @test_8xfloat_unpack_low_mask0(<8 x float> %vec1, <8 x float> define <8 x float> @test_8xfloat_masked_unpack_low_mask0(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %vec3, <8 x i32> %mask) { ; GENERIC-LABEL: test_8xfloat_masked_unpack_low_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm3, %ymm3, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm3, %ymm3, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpcklps {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00] ; GENERIC-NEXT: vmovaps %ymm2, %ymm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -12043,7 +12043,7 @@ define <8 x float> @test_8xfloat_masked_unpack_low_mask0(<8 x float> %vec1, <8 x define <8 x float> @test_8xfloat_zero_masked_unpack_low_mask0(<8 x float> %vec1, <8 x float> %vec2, <8 x i32> %mask) { ; GENERIC-LABEL: test_8xfloat_zero_masked_unpack_low_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -12060,7 +12060,7 @@ define <8 x float> @test_8xfloat_zero_masked_unpack_low_mask0(<8 x float> %vec1, define <8 x float> @test_8xfloat_masked_unpack_low_mask1(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %vec3, <8 x i32> %mask) { ; GENERIC-LABEL: test_8xfloat_masked_unpack_low_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm3, %ymm3, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm3, %ymm3, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpcklps {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00] ; GENERIC-NEXT: vmovaps %ymm2, %ymm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -12080,7 +12080,7 @@ define <8 x float> @test_8xfloat_masked_unpack_low_mask1(<8 x float> %vec1, <8 x define <8 x float> @test_8xfloat_zero_masked_unpack_low_mask1(<8 x float> %vec1, <8 x float> %vec2, <8 x i32> %mask) { ; GENERIC-LABEL: test_8xfloat_zero_masked_unpack_low_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -12097,7 +12097,7 @@ define <8 x float> @test_8xfloat_zero_masked_unpack_low_mask1(<8 x float> %vec1, define <8 x float> @test_8xfloat_masked_unpack_low_mask2(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %vec3, <8 x i32> %mask) { ; GENERIC-LABEL: test_8xfloat_masked_unpack_low_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm3, %ymm3, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm3, %ymm3, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpcklps {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00] ; GENERIC-NEXT: vmovaps %ymm2, %ymm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -12117,7 +12117,7 @@ define <8 x float> @test_8xfloat_masked_unpack_low_mask2(<8 x float> %vec1, <8 x define <8 x float> @test_8xfloat_zero_masked_unpack_low_mask2(<8 x float> %vec1, <8 x float> %vec2, <8 x i32> %mask) { ; GENERIC-LABEL: test_8xfloat_zero_masked_unpack_low_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -12147,7 +12147,7 @@ define <8 x float> @test_8xfloat_unpack_low_mask3(<8 x float> %vec1, <8 x float> define <8 x float> @test_8xfloat_masked_unpack_low_mask3(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %vec3, <8 x i32> %mask) { ; GENERIC-LABEL: test_8xfloat_masked_unpack_low_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm3, %ymm3, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm3, %ymm3, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpcklps {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00] ; GENERIC-NEXT: vmovaps %ymm2, %ymm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -12167,7 +12167,7 @@ define <8 x float> @test_8xfloat_masked_unpack_low_mask3(<8 x float> %vec1, <8 x define <8 x float> @test_8xfloat_zero_masked_unpack_low_mask3(<8 x float> %vec1, <8 x float> %vec2, <8 x i32> %mask) { ; GENERIC-LABEL: test_8xfloat_zero_masked_unpack_low_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -12198,7 +12198,7 @@ define <8 x float> @test_8xfloat_unpack_low_mem_mask0(<8 x float> %vec1, <8 x fl define <8 x float> @test_8xfloat_masked_unpack_low_mem_mask0(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %vec3, <8 x i32> %mask) { ; GENERIC-LABEL: test_8xfloat_masked_unpack_low_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpcklps {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [6:1.00] ; GENERIC-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -12219,7 +12219,7 @@ define <8 x float> @test_8xfloat_masked_unpack_low_mem_mask0(<8 x float> %vec1, define <8 x float> @test_8xfloat_zero_masked_unpack_low_mem_mask0(<8 x float> %vec1, <8 x float>* %vec2p, <8 x i32> %mask) { ; GENERIC-LABEL: test_8xfloat_zero_masked_unpack_low_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -12238,7 +12238,7 @@ define <8 x float> @test_8xfloat_zero_masked_unpack_low_mem_mask0(<8 x float> %v define <8 x float> @test_8xfloat_masked_unpack_low_mem_mask1(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %vec3, <8 x i32> %mask) { ; GENERIC-LABEL: test_8xfloat_masked_unpack_low_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpcklps {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [6:1.00] ; GENERIC-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -12259,7 +12259,7 @@ define <8 x float> @test_8xfloat_masked_unpack_low_mem_mask1(<8 x float> %vec1, define <8 x float> @test_8xfloat_zero_masked_unpack_low_mem_mask1(<8 x float> %vec1, <8 x float>* %vec2p, <8 x i32> %mask) { ; GENERIC-LABEL: test_8xfloat_zero_masked_unpack_low_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -12278,7 +12278,7 @@ define <8 x float> @test_8xfloat_zero_masked_unpack_low_mem_mask1(<8 x float> %v define <8 x float> @test_8xfloat_masked_unpack_low_mem_mask2(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %vec3, <8 x i32> %mask) { ; GENERIC-LABEL: test_8xfloat_masked_unpack_low_mem_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpcklps {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [6:1.00] ; GENERIC-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -12299,7 +12299,7 @@ define <8 x float> @test_8xfloat_masked_unpack_low_mem_mask2(<8 x float> %vec1, define <8 x float> @test_8xfloat_zero_masked_unpack_low_mem_mask2(<8 x float> %vec1, <8 x float>* %vec2p, <8 x i32> %mask) { ; GENERIC-LABEL: test_8xfloat_zero_masked_unpack_low_mem_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -12332,7 +12332,7 @@ define <8 x float> @test_8xfloat_unpack_low_mem_mask3(<8 x float> %vec1, <8 x fl define <8 x float> @test_8xfloat_masked_unpack_low_mem_mask3(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %vec3, <8 x i32> %mask) { ; GENERIC-LABEL: test_8xfloat_masked_unpack_low_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpcklps {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [6:1.00] ; GENERIC-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -12353,7 +12353,7 @@ define <8 x float> @test_8xfloat_masked_unpack_low_mem_mask3(<8 x float> %vec1, define <8 x float> @test_8xfloat_zero_masked_unpack_low_mem_mask3(<8 x float> %vec1, <8 x float>* %vec2p, <8 x i32> %mask) { ; GENERIC-LABEL: test_8xfloat_zero_masked_unpack_low_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -12385,7 +12385,7 @@ define <16 x float> @test_16xfloat_unpack_low_mask0(<16 x float> %vec1, <16 x fl define <16 x float> @test_16xfloat_masked_unpack_low_mask0(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %vec3, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xfloat_masked_unpack_low_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpcklps {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] sched: [1:1.00] ; GENERIC-NEXT: vmovaps %zmm2, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -12405,7 +12405,7 @@ define <16 x float> @test_16xfloat_masked_unpack_low_mask0(<16 x float> %vec1, < define <16 x float> @test_16xfloat_zero_masked_unpack_low_mask0(<16 x float> %vec1, <16 x float> %vec2, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xfloat_zero_masked_unpack_low_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -12422,7 +12422,7 @@ define <16 x float> @test_16xfloat_zero_masked_unpack_low_mask0(<16 x float> %ve define <16 x float> @test_16xfloat_masked_unpack_low_mask1(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %vec3, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xfloat_masked_unpack_low_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpcklps {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] sched: [1:1.00] ; GENERIC-NEXT: vmovaps %zmm2, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -12442,7 +12442,7 @@ define <16 x float> @test_16xfloat_masked_unpack_low_mask1(<16 x float> %vec1, < define <16 x float> @test_16xfloat_zero_masked_unpack_low_mask1(<16 x float> %vec1, <16 x float> %vec2, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xfloat_zero_masked_unpack_low_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -12459,7 +12459,7 @@ define <16 x float> @test_16xfloat_zero_masked_unpack_low_mask1(<16 x float> %ve define <16 x float> @test_16xfloat_masked_unpack_low_mask2(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %vec3, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xfloat_masked_unpack_low_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpcklps {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] sched: [1:1.00] ; GENERIC-NEXT: vmovaps %zmm2, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -12479,7 +12479,7 @@ define <16 x float> @test_16xfloat_masked_unpack_low_mask2(<16 x float> %vec1, < define <16 x float> @test_16xfloat_zero_masked_unpack_low_mask2(<16 x float> %vec1, <16 x float> %vec2, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xfloat_zero_masked_unpack_low_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -12509,7 +12509,7 @@ define <16 x float> @test_16xfloat_unpack_low_mask3(<16 x float> %vec1, <16 x fl define <16 x float> @test_16xfloat_masked_unpack_low_mask3(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %vec3, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xfloat_masked_unpack_low_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpcklps {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] sched: [1:1.00] ; GENERIC-NEXT: vmovaps %zmm2, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -12529,7 +12529,7 @@ define <16 x float> @test_16xfloat_masked_unpack_low_mask3(<16 x float> %vec1, < define <16 x float> @test_16xfloat_zero_masked_unpack_low_mask3(<16 x float> %vec1, <16 x float> %vec2, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xfloat_zero_masked_unpack_low_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -12560,7 +12560,7 @@ define <16 x float> @test_16xfloat_unpack_low_mem_mask0(<16 x float> %vec1, <16 define <16 x float> @test_16xfloat_masked_unpack_low_mem_mask0(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %vec3, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xfloat_masked_unpack_low_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpcklps {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [6:1.00] ; GENERIC-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -12581,7 +12581,7 @@ define <16 x float> @test_16xfloat_masked_unpack_low_mem_mask0(<16 x float> %vec define <16 x float> @test_16xfloat_zero_masked_unpack_low_mem_mask0(<16 x float> %vec1, <16 x float>* %vec2p, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xfloat_zero_masked_unpack_low_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -12600,7 +12600,7 @@ define <16 x float> @test_16xfloat_zero_masked_unpack_low_mem_mask0(<16 x float> define <16 x float> @test_16xfloat_masked_unpack_low_mem_mask1(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %vec3, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xfloat_masked_unpack_low_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpcklps {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [6:1.00] ; GENERIC-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -12621,7 +12621,7 @@ define <16 x float> @test_16xfloat_masked_unpack_low_mem_mask1(<16 x float> %vec define <16 x float> @test_16xfloat_zero_masked_unpack_low_mem_mask1(<16 x float> %vec1, <16 x float>* %vec2p, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xfloat_zero_masked_unpack_low_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -12640,7 +12640,7 @@ define <16 x float> @test_16xfloat_zero_masked_unpack_low_mem_mask1(<16 x float> define <16 x float> @test_16xfloat_masked_unpack_low_mem_mask2(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %vec3, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xfloat_masked_unpack_low_mem_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpcklps {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [6:1.00] ; GENERIC-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -12661,7 +12661,7 @@ define <16 x float> @test_16xfloat_masked_unpack_low_mem_mask2(<16 x float> %vec define <16 x float> @test_16xfloat_zero_masked_unpack_low_mem_mask2(<16 x float> %vec1, <16 x float>* %vec2p, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xfloat_zero_masked_unpack_low_mem_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -12694,7 +12694,7 @@ define <16 x float> @test_16xfloat_unpack_low_mem_mask3(<16 x float> %vec1, <16 define <16 x float> @test_16xfloat_masked_unpack_low_mem_mask3(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %vec3, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xfloat_masked_unpack_low_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpcklps {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [6:1.00] ; GENERIC-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -12715,7 +12715,7 @@ define <16 x float> @test_16xfloat_masked_unpack_low_mem_mask3(<16 x float> %vec define <16 x float> @test_16xfloat_zero_masked_unpack_low_mem_mask3(<16 x float> %vec1, <16 x float>* %vec2p, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xfloat_zero_masked_unpack_low_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -12747,7 +12747,7 @@ define <2 x double> @test_2xdouble_unpack_low_mask0(<2 x double> %vec1, <2 x dou define <2 x double> @test_2xdouble_masked_unpack_low_mask0(<2 x double> %vec1, <2 x double> %vec2, <2 x double> %vec3, <2 x i64> %mask) { ; GENERIC-LABEL: test_2xdouble_masked_unpack_low_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %xmm3, %xmm3, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %xmm3, %xmm3, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpcklpd {{.*#+}} xmm2 {%k1} = xmm0[0],xmm1[0] sched: [1:1.00] ; GENERIC-NEXT: vmovapd %xmm2, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -12767,7 +12767,7 @@ define <2 x double> @test_2xdouble_masked_unpack_low_mask0(<2 x double> %vec1, < define <2 x double> @test_2xdouble_zero_masked_unpack_low_mask0(<2 x double> %vec1, <2 x double> %vec2, <2 x i64> %mask) { ; GENERIC-LABEL: test_2xdouble_zero_masked_unpack_low_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %xmm2, %xmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %xmm2, %xmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpcklpd {{.*#+}} xmm0 {%k1} {z} = xmm0[0],xmm1[0] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -12784,7 +12784,7 @@ define <2 x double> @test_2xdouble_zero_masked_unpack_low_mask0(<2 x double> %ve define <2 x double> @test_2xdouble_masked_unpack_low_mask1(<2 x double> %vec1, <2 x double> %vec2, <2 x double> %vec3, <2 x i64> %mask) { ; GENERIC-LABEL: test_2xdouble_masked_unpack_low_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %xmm3, %xmm3, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %xmm3, %xmm3, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpcklpd {{.*#+}} xmm2 {%k1} = xmm0[0],xmm1[0] sched: [1:1.00] ; GENERIC-NEXT: vmovapd %xmm2, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -12804,7 +12804,7 @@ define <2 x double> @test_2xdouble_masked_unpack_low_mask1(<2 x double> %vec1, < define <2 x double> @test_2xdouble_zero_masked_unpack_low_mask1(<2 x double> %vec1, <2 x double> %vec2, <2 x i64> %mask) { ; GENERIC-LABEL: test_2xdouble_zero_masked_unpack_low_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %xmm2, %xmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %xmm2, %xmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpcklpd {{.*#+}} xmm0 {%k1} {z} = xmm0[0],xmm1[0] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -12835,7 +12835,7 @@ define <2 x double> @test_2xdouble_unpack_low_mem_mask0(<2 x double> %vec1, <2 x define <2 x double> @test_2xdouble_masked_unpack_low_mem_mask0(<2 x double> %vec1, <2 x double>* %vec2p, <2 x double> %vec3, <2 x i64> %mask) { ; GENERIC-LABEL: test_2xdouble_masked_unpack_low_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %xmm2, %xmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %xmm2, %xmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpcklpd {{.*#+}} xmm1 {%k1} = xmm0[0],mem[0] sched: [6:1.00] ; GENERIC-NEXT: vmovapd %xmm1, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -12856,7 +12856,7 @@ define <2 x double> @test_2xdouble_masked_unpack_low_mem_mask0(<2 x double> %vec define <2 x double> @test_2xdouble_zero_masked_unpack_low_mem_mask0(<2 x double> %vec1, <2 x double>* %vec2p, <2 x i64> %mask) { ; GENERIC-LABEL: test_2xdouble_zero_masked_unpack_low_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %xmm1, %xmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %xmm1, %xmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpcklpd {{.*#+}} xmm0 {%k1} {z} = xmm0[0],mem[0] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -12875,7 +12875,7 @@ define <2 x double> @test_2xdouble_zero_masked_unpack_low_mem_mask0(<2 x double> define <2 x double> @test_2xdouble_masked_unpack_low_mem_mask1(<2 x double> %vec1, <2 x double>* %vec2p, <2 x double> %vec3, <2 x i64> %mask) { ; GENERIC-LABEL: test_2xdouble_masked_unpack_low_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %xmm2, %xmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %xmm2, %xmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpcklpd {{.*#+}} xmm1 {%k1} = xmm0[0],mem[0] sched: [6:1.00] ; GENERIC-NEXT: vmovapd %xmm1, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -12896,7 +12896,7 @@ define <2 x double> @test_2xdouble_masked_unpack_low_mem_mask1(<2 x double> %vec define <2 x double> @test_2xdouble_zero_masked_unpack_low_mem_mask1(<2 x double> %vec1, <2 x double>* %vec2p, <2 x i64> %mask) { ; GENERIC-LABEL: test_2xdouble_zero_masked_unpack_low_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %xmm1, %xmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %xmm1, %xmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpcklpd {{.*#+}} xmm0 {%k1} {z} = xmm0[0],mem[0] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -12928,7 +12928,7 @@ define <4 x double> @test_4xdouble_unpack_low_mask0(<4 x double> %vec1, <4 x dou define <4 x double> @test_4xdouble_masked_unpack_low_mask0(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %vec3, <4 x i64> %mask) { ; GENERIC-LABEL: test_4xdouble_masked_unpack_low_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm3, %ymm3, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm3, %ymm3, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00] ; GENERIC-NEXT: vmovapd %ymm2, %ymm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -12948,7 +12948,7 @@ define <4 x double> @test_4xdouble_masked_unpack_low_mask0(<4 x double> %vec1, < define <4 x double> @test_4xdouble_zero_masked_unpack_low_mask0(<4 x double> %vec1, <4 x double> %vec2, <4 x i64> %mask) { ; GENERIC-LABEL: test_4xdouble_zero_masked_unpack_low_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -12965,7 +12965,7 @@ define <4 x double> @test_4xdouble_zero_masked_unpack_low_mask0(<4 x double> %ve define <4 x double> @test_4xdouble_masked_unpack_low_mask1(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %vec3, <4 x i64> %mask) { ; GENERIC-LABEL: test_4xdouble_masked_unpack_low_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm3, %ymm3, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm3, %ymm3, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00] ; GENERIC-NEXT: vmovapd %ymm2, %ymm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -12985,7 +12985,7 @@ define <4 x double> @test_4xdouble_masked_unpack_low_mask1(<4 x double> %vec1, < define <4 x double> @test_4xdouble_zero_masked_unpack_low_mask1(<4 x double> %vec1, <4 x double> %vec2, <4 x i64> %mask) { ; GENERIC-LABEL: test_4xdouble_zero_masked_unpack_low_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -13002,7 +13002,7 @@ define <4 x double> @test_4xdouble_zero_masked_unpack_low_mask1(<4 x double> %ve define <4 x double> @test_4xdouble_masked_unpack_low_mask2(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %vec3, <4 x i64> %mask) { ; GENERIC-LABEL: test_4xdouble_masked_unpack_low_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm3, %ymm3, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm3, %ymm3, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00] ; GENERIC-NEXT: vmovapd %ymm2, %ymm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -13022,7 +13022,7 @@ define <4 x double> @test_4xdouble_masked_unpack_low_mask2(<4 x double> %vec1, < define <4 x double> @test_4xdouble_zero_masked_unpack_low_mask2(<4 x double> %vec1, <4 x double> %vec2, <4 x i64> %mask) { ; GENERIC-LABEL: test_4xdouble_zero_masked_unpack_low_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -13052,7 +13052,7 @@ define <4 x double> @test_4xdouble_unpack_low_mask3(<4 x double> %vec1, <4 x dou define <4 x double> @test_4xdouble_masked_unpack_low_mask3(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %vec3, <4 x i64> %mask) { ; GENERIC-LABEL: test_4xdouble_masked_unpack_low_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm3, %ymm3, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm3, %ymm3, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00] ; GENERIC-NEXT: vmovapd %ymm2, %ymm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -13072,7 +13072,7 @@ define <4 x double> @test_4xdouble_masked_unpack_low_mask3(<4 x double> %vec1, < define <4 x double> @test_4xdouble_zero_masked_unpack_low_mask3(<4 x double> %vec1, <4 x double> %vec2, <4 x i64> %mask) { ; GENERIC-LABEL: test_4xdouble_zero_masked_unpack_low_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -13103,7 +13103,7 @@ define <4 x double> @test_4xdouble_unpack_low_mem_mask0(<4 x double> %vec1, <4 x define <4 x double> @test_4xdouble_masked_unpack_low_mem_mask0(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %vec3, <4 x i64> %mask) { ; GENERIC-LABEL: test_4xdouble_masked_unpack_low_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[2],mem[2] sched: [6:1.00] ; GENERIC-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -13124,7 +13124,7 @@ define <4 x double> @test_4xdouble_masked_unpack_low_mem_mask0(<4 x double> %vec define <4 x double> @test_4xdouble_zero_masked_unpack_low_mem_mask0(<4 x double> %vec1, <4 x double>* %vec2p, <4 x i64> %mask) { ; GENERIC-LABEL: test_4xdouble_zero_masked_unpack_low_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[2],mem[2] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -13143,7 +13143,7 @@ define <4 x double> @test_4xdouble_zero_masked_unpack_low_mem_mask0(<4 x double> define <4 x double> @test_4xdouble_masked_unpack_low_mem_mask1(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %vec3, <4 x i64> %mask) { ; GENERIC-LABEL: test_4xdouble_masked_unpack_low_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[2],mem[2] sched: [6:1.00] ; GENERIC-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -13164,7 +13164,7 @@ define <4 x double> @test_4xdouble_masked_unpack_low_mem_mask1(<4 x double> %vec define <4 x double> @test_4xdouble_zero_masked_unpack_low_mem_mask1(<4 x double> %vec1, <4 x double>* %vec2p, <4 x i64> %mask) { ; GENERIC-LABEL: test_4xdouble_zero_masked_unpack_low_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[2],mem[2] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -13183,7 +13183,7 @@ define <4 x double> @test_4xdouble_zero_masked_unpack_low_mem_mask1(<4 x double> define <4 x double> @test_4xdouble_masked_unpack_low_mem_mask2(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %vec3, <4 x i64> %mask) { ; GENERIC-LABEL: test_4xdouble_masked_unpack_low_mem_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[2],mem[2] sched: [6:1.00] ; GENERIC-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -13204,7 +13204,7 @@ define <4 x double> @test_4xdouble_masked_unpack_low_mem_mask2(<4 x double> %vec define <4 x double> @test_4xdouble_zero_masked_unpack_low_mem_mask2(<4 x double> %vec1, <4 x double>* %vec2p, <4 x i64> %mask) { ; GENERIC-LABEL: test_4xdouble_zero_masked_unpack_low_mem_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[2],mem[2] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -13237,7 +13237,7 @@ define <4 x double> @test_4xdouble_unpack_low_mem_mask3(<4 x double> %vec1, <4 x define <4 x double> @test_4xdouble_masked_unpack_low_mem_mask3(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %vec3, <4 x i64> %mask) { ; GENERIC-LABEL: test_4xdouble_masked_unpack_low_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[2],mem[2] sched: [6:1.00] ; GENERIC-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -13258,7 +13258,7 @@ define <4 x double> @test_4xdouble_masked_unpack_low_mem_mask3(<4 x double> %vec define <4 x double> @test_4xdouble_zero_masked_unpack_low_mem_mask3(<4 x double> %vec1, <4 x double>* %vec2p, <4 x i64> %mask) { ; GENERIC-LABEL: test_4xdouble_zero_masked_unpack_low_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[2],mem[2] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -13290,7 +13290,7 @@ define <8 x double> @test_8xdouble_unpack_low_mask0(<8 x double> %vec1, <8 x dou define <8 x double> @test_8xdouble_masked_unpack_low_mask0(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %vec3, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xdouble_masked_unpack_low_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] sched: [1:1.00] ; GENERIC-NEXT: vmovapd %zmm2, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -13310,7 +13310,7 @@ define <8 x double> @test_8xdouble_masked_unpack_low_mask0(<8 x double> %vec1, < define <8 x double> @test_8xdouble_zero_masked_unpack_low_mask0(<8 x double> %vec1, <8 x double> %vec2, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xdouble_zero_masked_unpack_low_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -13327,7 +13327,7 @@ define <8 x double> @test_8xdouble_zero_masked_unpack_low_mask0(<8 x double> %ve define <8 x double> @test_8xdouble_masked_unpack_low_mask1(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %vec3, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xdouble_masked_unpack_low_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] sched: [1:1.00] ; GENERIC-NEXT: vmovapd %zmm2, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -13347,7 +13347,7 @@ define <8 x double> @test_8xdouble_masked_unpack_low_mask1(<8 x double> %vec1, < define <8 x double> @test_8xdouble_zero_masked_unpack_low_mask1(<8 x double> %vec1, <8 x double> %vec2, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xdouble_zero_masked_unpack_low_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -13364,7 +13364,7 @@ define <8 x double> @test_8xdouble_zero_masked_unpack_low_mask1(<8 x double> %ve define <8 x double> @test_8xdouble_masked_unpack_low_mask2(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %vec3, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xdouble_masked_unpack_low_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] sched: [1:1.00] ; GENERIC-NEXT: vmovapd %zmm2, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -13384,7 +13384,7 @@ define <8 x double> @test_8xdouble_masked_unpack_low_mask2(<8 x double> %vec1, < define <8 x double> @test_8xdouble_zero_masked_unpack_low_mask2(<8 x double> %vec1, <8 x double> %vec2, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xdouble_zero_masked_unpack_low_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -13414,7 +13414,7 @@ define <8 x double> @test_8xdouble_unpack_low_mask3(<8 x double> %vec1, <8 x dou define <8 x double> @test_8xdouble_masked_unpack_low_mask3(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %vec3, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xdouble_masked_unpack_low_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] sched: [1:1.00] ; GENERIC-NEXT: vmovapd %zmm2, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -13434,7 +13434,7 @@ define <8 x double> @test_8xdouble_masked_unpack_low_mask3(<8 x double> %vec1, < define <8 x double> @test_8xdouble_zero_masked_unpack_low_mask3(<8 x double> %vec1, <8 x double> %vec2, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xdouble_zero_masked_unpack_low_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -13465,7 +13465,7 @@ define <8 x double> @test_8xdouble_unpack_low_mem_mask0(<8 x double> %vec1, <8 x define <8 x double> @test_8xdouble_masked_unpack_low_mem_mask0(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %vec3, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xdouble_masked_unpack_low_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [6:1.00] ; GENERIC-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -13486,7 +13486,7 @@ define <8 x double> @test_8xdouble_masked_unpack_low_mem_mask0(<8 x double> %vec define <8 x double> @test_8xdouble_zero_masked_unpack_low_mem_mask0(<8 x double> %vec1, <8 x double>* %vec2p, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xdouble_zero_masked_unpack_low_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -13505,7 +13505,7 @@ define <8 x double> @test_8xdouble_zero_masked_unpack_low_mem_mask0(<8 x double> define <8 x double> @test_8xdouble_masked_unpack_low_mem_mask1(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %vec3, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xdouble_masked_unpack_low_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [6:1.00] ; GENERIC-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -13526,7 +13526,7 @@ define <8 x double> @test_8xdouble_masked_unpack_low_mem_mask1(<8 x double> %vec define <8 x double> @test_8xdouble_zero_masked_unpack_low_mem_mask1(<8 x double> %vec1, <8 x double>* %vec2p, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xdouble_zero_masked_unpack_low_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -13545,7 +13545,7 @@ define <8 x double> @test_8xdouble_zero_masked_unpack_low_mem_mask1(<8 x double> define <8 x double> @test_8xdouble_masked_unpack_low_mem_mask2(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %vec3, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xdouble_masked_unpack_low_mem_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [6:1.00] ; GENERIC-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -13566,7 +13566,7 @@ define <8 x double> @test_8xdouble_masked_unpack_low_mem_mask2(<8 x double> %vec define <8 x double> @test_8xdouble_zero_masked_unpack_low_mem_mask2(<8 x double> %vec1, <8 x double>* %vec2p, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xdouble_zero_masked_unpack_low_mem_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -13599,7 +13599,7 @@ define <8 x double> @test_8xdouble_unpack_low_mem_mask3(<8 x double> %vec1, <8 x define <8 x double> @test_8xdouble_masked_unpack_low_mem_mask3(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %vec3, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xdouble_masked_unpack_low_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [6:1.00] ; GENERIC-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -13620,7 +13620,7 @@ define <8 x double> @test_8xdouble_masked_unpack_low_mem_mask3(<8 x double> %vec define <8 x double> @test_8xdouble_zero_masked_unpack_low_mem_mask3(<8 x double> %vec1, <8 x double>* %vec2p, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xdouble_zero_masked_unpack_low_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -13652,7 +13652,7 @@ define <4 x float> @test_4xfloat_unpack_high_mask0(<4 x float> %vec1, <4 x float define <4 x float> @test_4xfloat_masked_unpack_high_mask0(<4 x float> %vec1, <4 x float> %vec2, <4 x float> %vec3, <4 x i32> %mask) { ; GENERIC-LABEL: test_4xfloat_masked_unpack_high_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %xmm3, %xmm3, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %xmm3, %xmm3, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpckhps {{.*#+}} xmm2 {%k1} = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] ; GENERIC-NEXT: vmovaps %xmm2, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -13672,7 +13672,7 @@ define <4 x float> @test_4xfloat_masked_unpack_high_mask0(<4 x float> %vec1, <4 define <4 x float> @test_4xfloat_zero_masked_unpack_high_mask0(<4 x float> %vec1, <4 x float> %vec2, <4 x i32> %mask) { ; GENERIC-LABEL: test_4xfloat_zero_masked_unpack_high_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -13689,7 +13689,7 @@ define <4 x float> @test_4xfloat_zero_masked_unpack_high_mask0(<4 x float> %vec1 define <4 x float> @test_4xfloat_masked_unpack_high_mask1(<4 x float> %vec1, <4 x float> %vec2, <4 x float> %vec3, <4 x i32> %mask) { ; GENERIC-LABEL: test_4xfloat_masked_unpack_high_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %xmm3, %xmm3, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %xmm3, %xmm3, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpckhps {{.*#+}} xmm2 {%k1} = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] ; GENERIC-NEXT: vmovaps %xmm2, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -13709,7 +13709,7 @@ define <4 x float> @test_4xfloat_masked_unpack_high_mask1(<4 x float> %vec1, <4 define <4 x float> @test_4xfloat_zero_masked_unpack_high_mask1(<4 x float> %vec1, <4 x float> %vec2, <4 x i32> %mask) { ; GENERIC-LABEL: test_4xfloat_zero_masked_unpack_high_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -13726,7 +13726,7 @@ define <4 x float> @test_4xfloat_zero_masked_unpack_high_mask1(<4 x float> %vec1 define <4 x float> @test_4xfloat_masked_unpack_high_mask2(<4 x float> %vec1, <4 x float> %vec2, <4 x float> %vec3, <4 x i32> %mask) { ; GENERIC-LABEL: test_4xfloat_masked_unpack_high_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %xmm3, %xmm3, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %xmm3, %xmm3, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpckhps {{.*#+}} xmm2 {%k1} = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] ; GENERIC-NEXT: vmovaps %xmm2, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -13746,7 +13746,7 @@ define <4 x float> @test_4xfloat_masked_unpack_high_mask2(<4 x float> %vec1, <4 define <4 x float> @test_4xfloat_zero_masked_unpack_high_mask2(<4 x float> %vec1, <4 x float> %vec2, <4 x i32> %mask) { ; GENERIC-LABEL: test_4xfloat_zero_masked_unpack_high_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -13776,7 +13776,7 @@ define <4 x float> @test_4xfloat_unpack_high_mask3(<4 x float> %vec1, <4 x float define <4 x float> @test_4xfloat_masked_unpack_high_mask3(<4 x float> %vec1, <4 x float> %vec2, <4 x float> %vec3, <4 x i32> %mask) { ; GENERIC-LABEL: test_4xfloat_masked_unpack_high_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %xmm3, %xmm3, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %xmm3, %xmm3, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpckhps {{.*#+}} xmm2 {%k1} = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] ; GENERIC-NEXT: vmovaps %xmm2, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -13796,7 +13796,7 @@ define <4 x float> @test_4xfloat_masked_unpack_high_mask3(<4 x float> %vec1, <4 define <4 x float> @test_4xfloat_zero_masked_unpack_high_mask3(<4 x float> %vec1, <4 x float> %vec2, <4 x i32> %mask) { ; GENERIC-LABEL: test_4xfloat_zero_masked_unpack_high_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -13827,7 +13827,7 @@ define <4 x float> @test_4xfloat_unpack_high_mem_mask0(<4 x float> %vec1, <4 x f define <4 x float> @test_4xfloat_masked_unpack_high_mem_mask0(<4 x float> %vec1, <4 x float>* %vec2p, <4 x float> %vec3, <4 x i32> %mask) { ; GENERIC-LABEL: test_4xfloat_masked_unpack_high_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpckhps {{.*#+}} xmm1 {%k1} = xmm0[2],mem[2],xmm0[3],mem[3] sched: [6:1.00] ; GENERIC-NEXT: vmovaps %xmm1, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -13848,7 +13848,7 @@ define <4 x float> @test_4xfloat_masked_unpack_high_mem_mask0(<4 x float> %vec1, define <4 x float> @test_4xfloat_zero_masked_unpack_high_mem_mask0(<4 x float> %vec1, <4 x float>* %vec2p, <4 x i32> %mask) { ; GENERIC-LABEL: test_4xfloat_zero_masked_unpack_high_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],mem[2],xmm0[3],mem[3] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -13867,7 +13867,7 @@ define <4 x float> @test_4xfloat_zero_masked_unpack_high_mem_mask0(<4 x float> % define <4 x float> @test_4xfloat_masked_unpack_high_mem_mask1(<4 x float> %vec1, <4 x float>* %vec2p, <4 x float> %vec3, <4 x i32> %mask) { ; GENERIC-LABEL: test_4xfloat_masked_unpack_high_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpckhps {{.*#+}} xmm1 {%k1} = xmm0[2],mem[2],xmm0[3],mem[3] sched: [6:1.00] ; GENERIC-NEXT: vmovaps %xmm1, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -13888,7 +13888,7 @@ define <4 x float> @test_4xfloat_masked_unpack_high_mem_mask1(<4 x float> %vec1, define <4 x float> @test_4xfloat_zero_masked_unpack_high_mem_mask1(<4 x float> %vec1, <4 x float>* %vec2p, <4 x i32> %mask) { ; GENERIC-LABEL: test_4xfloat_zero_masked_unpack_high_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],mem[2],xmm0[3],mem[3] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -13907,7 +13907,7 @@ define <4 x float> @test_4xfloat_zero_masked_unpack_high_mem_mask1(<4 x float> % define <4 x float> @test_4xfloat_masked_unpack_high_mem_mask2(<4 x float> %vec1, <4 x float>* %vec2p, <4 x float> %vec3, <4 x i32> %mask) { ; GENERIC-LABEL: test_4xfloat_masked_unpack_high_mem_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpckhps {{.*#+}} xmm1 {%k1} = xmm0[2],mem[2],xmm0[3],mem[3] sched: [6:1.00] ; GENERIC-NEXT: vmovaps %xmm1, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -13928,7 +13928,7 @@ define <4 x float> @test_4xfloat_masked_unpack_high_mem_mask2(<4 x float> %vec1, define <4 x float> @test_4xfloat_zero_masked_unpack_high_mem_mask2(<4 x float> %vec1, <4 x float>* %vec2p, <4 x i32> %mask) { ; GENERIC-LABEL: test_4xfloat_zero_masked_unpack_high_mem_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],mem[2],xmm0[3],mem[3] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -13961,7 +13961,7 @@ define <4 x float> @test_4xfloat_unpack_high_mem_mask3(<4 x float> %vec1, <4 x f define <4 x float> @test_4xfloat_masked_unpack_high_mem_mask3(<4 x float> %vec1, <4 x float>* %vec2p, <4 x float> %vec3, <4 x i32> %mask) { ; GENERIC-LABEL: test_4xfloat_masked_unpack_high_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpckhps {{.*#+}} xmm1 {%k1} = xmm0[2],mem[2],xmm0[3],mem[3] sched: [6:1.00] ; GENERIC-NEXT: vmovaps %xmm1, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -13982,7 +13982,7 @@ define <4 x float> @test_4xfloat_masked_unpack_high_mem_mask3(<4 x float> %vec1, define <4 x float> @test_4xfloat_zero_masked_unpack_high_mem_mask3(<4 x float> %vec1, <4 x float>* %vec2p, <4 x i32> %mask) { ; GENERIC-LABEL: test_4xfloat_zero_masked_unpack_high_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],mem[2],xmm0[3],mem[3] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -14014,7 +14014,7 @@ define <8 x float> @test_8xfloat_unpack_high_mask0(<8 x float> %vec1, <8 x float define <8 x float> @test_8xfloat_masked_unpack_high_mask0(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %vec3, <8 x i32> %mask) { ; GENERIC-LABEL: test_8xfloat_masked_unpack_high_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm3, %ymm3, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm3, %ymm3, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpckhps {{.*#+}} ymm2 {%k1} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00] ; GENERIC-NEXT: vmovaps %ymm2, %ymm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -14034,7 +14034,7 @@ define <8 x float> @test_8xfloat_masked_unpack_high_mask0(<8 x float> %vec1, <8 define <8 x float> @test_8xfloat_zero_masked_unpack_high_mask0(<8 x float> %vec1, <8 x float> %vec2, <8 x i32> %mask) { ; GENERIC-LABEL: test_8xfloat_zero_masked_unpack_high_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -14051,7 +14051,7 @@ define <8 x float> @test_8xfloat_zero_masked_unpack_high_mask0(<8 x float> %vec1 define <8 x float> @test_8xfloat_masked_unpack_high_mask1(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %vec3, <8 x i32> %mask) { ; GENERIC-LABEL: test_8xfloat_masked_unpack_high_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm3, %ymm3, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm3, %ymm3, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpckhps {{.*#+}} ymm2 {%k1} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00] ; GENERIC-NEXT: vmovaps %ymm2, %ymm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -14071,7 +14071,7 @@ define <8 x float> @test_8xfloat_masked_unpack_high_mask1(<8 x float> %vec1, <8 define <8 x float> @test_8xfloat_zero_masked_unpack_high_mask1(<8 x float> %vec1, <8 x float> %vec2, <8 x i32> %mask) { ; GENERIC-LABEL: test_8xfloat_zero_masked_unpack_high_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -14088,7 +14088,7 @@ define <8 x float> @test_8xfloat_zero_masked_unpack_high_mask1(<8 x float> %vec1 define <8 x float> @test_8xfloat_masked_unpack_high_mask2(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %vec3, <8 x i32> %mask) { ; GENERIC-LABEL: test_8xfloat_masked_unpack_high_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm3, %ymm3, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm3, %ymm3, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpckhps {{.*#+}} ymm2 {%k1} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00] ; GENERIC-NEXT: vmovaps %ymm2, %ymm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -14108,7 +14108,7 @@ define <8 x float> @test_8xfloat_masked_unpack_high_mask2(<8 x float> %vec1, <8 define <8 x float> @test_8xfloat_zero_masked_unpack_high_mask2(<8 x float> %vec1, <8 x float> %vec2, <8 x i32> %mask) { ; GENERIC-LABEL: test_8xfloat_zero_masked_unpack_high_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -14138,7 +14138,7 @@ define <8 x float> @test_8xfloat_unpack_high_mask3(<8 x float> %vec1, <8 x float define <8 x float> @test_8xfloat_masked_unpack_high_mask3(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %vec3, <8 x i32> %mask) { ; GENERIC-LABEL: test_8xfloat_masked_unpack_high_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm3, %ymm3, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm3, %ymm3, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpckhps {{.*#+}} ymm2 {%k1} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00] ; GENERIC-NEXT: vmovaps %ymm2, %ymm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -14158,7 +14158,7 @@ define <8 x float> @test_8xfloat_masked_unpack_high_mask3(<8 x float> %vec1, <8 define <8 x float> @test_8xfloat_zero_masked_unpack_high_mask3(<8 x float> %vec1, <8 x float> %vec2, <8 x i32> %mask) { ; GENERIC-LABEL: test_8xfloat_zero_masked_unpack_high_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -14189,7 +14189,7 @@ define <8 x float> @test_8xfloat_unpack_high_mem_mask0(<8 x float> %vec1, <8 x f define <8 x float> @test_8xfloat_masked_unpack_high_mem_mask0(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %vec3, <8 x i32> %mask) { ; GENERIC-LABEL: test_8xfloat_masked_unpack_high_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpckhps {{.*#+}} ymm1 {%k1} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [6:1.00] ; GENERIC-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -14210,7 +14210,7 @@ define <8 x float> @test_8xfloat_masked_unpack_high_mem_mask0(<8 x float> %vec1, define <8 x float> @test_8xfloat_zero_masked_unpack_high_mem_mask0(<8 x float> %vec1, <8 x float>* %vec2p, <8 x i32> %mask) { ; GENERIC-LABEL: test_8xfloat_zero_masked_unpack_high_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -14229,7 +14229,7 @@ define <8 x float> @test_8xfloat_zero_masked_unpack_high_mem_mask0(<8 x float> % define <8 x float> @test_8xfloat_masked_unpack_high_mem_mask1(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %vec3, <8 x i32> %mask) { ; GENERIC-LABEL: test_8xfloat_masked_unpack_high_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpckhps {{.*#+}} ymm1 {%k1} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [6:1.00] ; GENERIC-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -14250,7 +14250,7 @@ define <8 x float> @test_8xfloat_masked_unpack_high_mem_mask1(<8 x float> %vec1, define <8 x float> @test_8xfloat_zero_masked_unpack_high_mem_mask1(<8 x float> %vec1, <8 x float>* %vec2p, <8 x i32> %mask) { ; GENERIC-LABEL: test_8xfloat_zero_masked_unpack_high_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -14269,7 +14269,7 @@ define <8 x float> @test_8xfloat_zero_masked_unpack_high_mem_mask1(<8 x float> % define <8 x float> @test_8xfloat_masked_unpack_high_mem_mask2(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %vec3, <8 x i32> %mask) { ; GENERIC-LABEL: test_8xfloat_masked_unpack_high_mem_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpckhps {{.*#+}} ymm1 {%k1} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [6:1.00] ; GENERIC-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -14290,7 +14290,7 @@ define <8 x float> @test_8xfloat_masked_unpack_high_mem_mask2(<8 x float> %vec1, define <8 x float> @test_8xfloat_zero_masked_unpack_high_mem_mask2(<8 x float> %vec1, <8 x float>* %vec2p, <8 x i32> %mask) { ; GENERIC-LABEL: test_8xfloat_zero_masked_unpack_high_mem_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -14323,7 +14323,7 @@ define <8 x float> @test_8xfloat_unpack_high_mem_mask3(<8 x float> %vec1, <8 x f define <8 x float> @test_8xfloat_masked_unpack_high_mem_mask3(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %vec3, <8 x i32> %mask) { ; GENERIC-LABEL: test_8xfloat_masked_unpack_high_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpckhps {{.*#+}} ymm1 {%k1} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [6:1.00] ; GENERIC-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -14344,7 +14344,7 @@ define <8 x float> @test_8xfloat_masked_unpack_high_mem_mask3(<8 x float> %vec1, define <8 x float> @test_8xfloat_zero_masked_unpack_high_mem_mask3(<8 x float> %vec1, <8 x float>* %vec2p, <8 x i32> %mask) { ; GENERIC-LABEL: test_8xfloat_zero_masked_unpack_high_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -14376,7 +14376,7 @@ define <16 x float> @test_16xfloat_unpack_high_mask0(<16 x float> %vec1, <16 x f define <16 x float> @test_16xfloat_masked_unpack_high_mask0(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %vec3, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xfloat_masked_unpack_high_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpckhps {{.*#+}} zmm2 {%k1} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] sched: [1:1.00] ; GENERIC-NEXT: vmovaps %zmm2, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -14396,7 +14396,7 @@ define <16 x float> @test_16xfloat_masked_unpack_high_mask0(<16 x float> %vec1, define <16 x float> @test_16xfloat_zero_masked_unpack_high_mask0(<16 x float> %vec1, <16 x float> %vec2, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xfloat_zero_masked_unpack_high_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -14413,7 +14413,7 @@ define <16 x float> @test_16xfloat_zero_masked_unpack_high_mask0(<16 x float> %v define <16 x float> @test_16xfloat_masked_unpack_high_mask1(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %vec3, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xfloat_masked_unpack_high_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpckhps {{.*#+}} zmm2 {%k1} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] sched: [1:1.00] ; GENERIC-NEXT: vmovaps %zmm2, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -14433,7 +14433,7 @@ define <16 x float> @test_16xfloat_masked_unpack_high_mask1(<16 x float> %vec1, define <16 x float> @test_16xfloat_zero_masked_unpack_high_mask1(<16 x float> %vec1, <16 x float> %vec2, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xfloat_zero_masked_unpack_high_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -14450,7 +14450,7 @@ define <16 x float> @test_16xfloat_zero_masked_unpack_high_mask1(<16 x float> %v define <16 x float> @test_16xfloat_masked_unpack_high_mask2(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %vec3, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xfloat_masked_unpack_high_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpckhps {{.*#+}} zmm2 {%k1} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] sched: [1:1.00] ; GENERIC-NEXT: vmovaps %zmm2, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -14470,7 +14470,7 @@ define <16 x float> @test_16xfloat_masked_unpack_high_mask2(<16 x float> %vec1, define <16 x float> @test_16xfloat_zero_masked_unpack_high_mask2(<16 x float> %vec1, <16 x float> %vec2, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xfloat_zero_masked_unpack_high_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -14500,7 +14500,7 @@ define <16 x float> @test_16xfloat_unpack_high_mask3(<16 x float> %vec1, <16 x f define <16 x float> @test_16xfloat_masked_unpack_high_mask3(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %vec3, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xfloat_masked_unpack_high_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpckhps {{.*#+}} zmm2 {%k1} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] sched: [1:1.00] ; GENERIC-NEXT: vmovaps %zmm2, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -14520,7 +14520,7 @@ define <16 x float> @test_16xfloat_masked_unpack_high_mask3(<16 x float> %vec1, define <16 x float> @test_16xfloat_zero_masked_unpack_high_mask3(<16 x float> %vec1, <16 x float> %vec2, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xfloat_zero_masked_unpack_high_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -14551,7 +14551,7 @@ define <16 x float> @test_16xfloat_unpack_high_mem_mask0(<16 x float> %vec1, <16 define <16 x float> @test_16xfloat_masked_unpack_high_mem_mask0(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %vec3, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xfloat_masked_unpack_high_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpckhps {{.*#+}} zmm1 {%k1} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [6:1.00] ; GENERIC-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -14572,7 +14572,7 @@ define <16 x float> @test_16xfloat_masked_unpack_high_mem_mask0(<16 x float> %ve define <16 x float> @test_16xfloat_zero_masked_unpack_high_mem_mask0(<16 x float> %vec1, <16 x float>* %vec2p, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xfloat_zero_masked_unpack_high_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -14591,7 +14591,7 @@ define <16 x float> @test_16xfloat_zero_masked_unpack_high_mem_mask0(<16 x float define <16 x float> @test_16xfloat_masked_unpack_high_mem_mask1(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %vec3, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xfloat_masked_unpack_high_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpckhps {{.*#+}} zmm1 {%k1} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [6:1.00] ; GENERIC-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -14612,7 +14612,7 @@ define <16 x float> @test_16xfloat_masked_unpack_high_mem_mask1(<16 x float> %ve define <16 x float> @test_16xfloat_zero_masked_unpack_high_mem_mask1(<16 x float> %vec1, <16 x float>* %vec2p, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xfloat_zero_masked_unpack_high_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -14631,7 +14631,7 @@ define <16 x float> @test_16xfloat_zero_masked_unpack_high_mem_mask1(<16 x float define <16 x float> @test_16xfloat_masked_unpack_high_mem_mask2(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %vec3, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xfloat_masked_unpack_high_mem_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpckhps {{.*#+}} zmm1 {%k1} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [6:1.00] ; GENERIC-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -14652,7 +14652,7 @@ define <16 x float> @test_16xfloat_masked_unpack_high_mem_mask2(<16 x float> %ve define <16 x float> @test_16xfloat_zero_masked_unpack_high_mem_mask2(<16 x float> %vec1, <16 x float>* %vec2p, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xfloat_zero_masked_unpack_high_mem_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -14685,7 +14685,7 @@ define <16 x float> @test_16xfloat_unpack_high_mem_mask3(<16 x float> %vec1, <16 define <16 x float> @test_16xfloat_masked_unpack_high_mem_mask3(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %vec3, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xfloat_masked_unpack_high_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpckhps {{.*#+}} zmm1 {%k1} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [6:1.00] ; GENERIC-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -14706,7 +14706,7 @@ define <16 x float> @test_16xfloat_masked_unpack_high_mem_mask3(<16 x float> %ve define <16 x float> @test_16xfloat_zero_masked_unpack_high_mem_mask3(<16 x float> %vec1, <16 x float>* %vec2p, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xfloat_zero_masked_unpack_high_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -14738,7 +14738,7 @@ define <2 x double> @test_2xdouble_unpack_high_mask0(<2 x double> %vec1, <2 x do define <2 x double> @test_2xdouble_masked_unpack_high_mask0(<2 x double> %vec1, <2 x double> %vec2, <2 x double> %vec3, <2 x i64> %mask) { ; GENERIC-LABEL: test_2xdouble_masked_unpack_high_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %xmm3, %xmm3, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %xmm3, %xmm3, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpckhpd {{.*#+}} xmm2 {%k1} = xmm0[1],xmm1[1] sched: [1:1.00] ; GENERIC-NEXT: vmovapd %xmm2, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -14758,7 +14758,7 @@ define <2 x double> @test_2xdouble_masked_unpack_high_mask0(<2 x double> %vec1, define <2 x double> @test_2xdouble_zero_masked_unpack_high_mask0(<2 x double> %vec1, <2 x double> %vec2, <2 x i64> %mask) { ; GENERIC-LABEL: test_2xdouble_zero_masked_unpack_high_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %xmm2, %xmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %xmm2, %xmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpckhpd {{.*#+}} xmm0 {%k1} {z} = xmm0[1],xmm1[1] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -14775,7 +14775,7 @@ define <2 x double> @test_2xdouble_zero_masked_unpack_high_mask0(<2 x double> %v define <2 x double> @test_2xdouble_masked_unpack_high_mask1(<2 x double> %vec1, <2 x double> %vec2, <2 x double> %vec3, <2 x i64> %mask) { ; GENERIC-LABEL: test_2xdouble_masked_unpack_high_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %xmm3, %xmm3, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %xmm3, %xmm3, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpckhpd {{.*#+}} xmm2 {%k1} = xmm0[1],xmm1[1] sched: [1:1.00] ; GENERIC-NEXT: vmovapd %xmm2, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -14795,7 +14795,7 @@ define <2 x double> @test_2xdouble_masked_unpack_high_mask1(<2 x double> %vec1, define <2 x double> @test_2xdouble_zero_masked_unpack_high_mask1(<2 x double> %vec1, <2 x double> %vec2, <2 x i64> %mask) { ; GENERIC-LABEL: test_2xdouble_zero_masked_unpack_high_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %xmm2, %xmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %xmm2, %xmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpckhpd {{.*#+}} xmm0 {%k1} {z} = xmm0[1],xmm1[1] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -14826,7 +14826,7 @@ define <2 x double> @test_2xdouble_unpack_high_mem_mask0(<2 x double> %vec1, <2 define <2 x double> @test_2xdouble_masked_unpack_high_mem_mask0(<2 x double> %vec1, <2 x double>* %vec2p, <2 x double> %vec3, <2 x i64> %mask) { ; GENERIC-LABEL: test_2xdouble_masked_unpack_high_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %xmm2, %xmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %xmm2, %xmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpckhpd {{.*#+}} xmm1 {%k1} = xmm0[1],mem[1] sched: [6:1.00] ; GENERIC-NEXT: vmovapd %xmm1, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -14847,7 +14847,7 @@ define <2 x double> @test_2xdouble_masked_unpack_high_mem_mask0(<2 x double> %ve define <2 x double> @test_2xdouble_zero_masked_unpack_high_mem_mask0(<2 x double> %vec1, <2 x double>* %vec2p, <2 x i64> %mask) { ; GENERIC-LABEL: test_2xdouble_zero_masked_unpack_high_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %xmm1, %xmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %xmm1, %xmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpckhpd {{.*#+}} xmm0 {%k1} {z} = xmm0[1],mem[1] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -14866,7 +14866,7 @@ define <2 x double> @test_2xdouble_zero_masked_unpack_high_mem_mask0(<2 x double define <2 x double> @test_2xdouble_masked_unpack_high_mem_mask1(<2 x double> %vec1, <2 x double>* %vec2p, <2 x double> %vec3, <2 x i64> %mask) { ; GENERIC-LABEL: test_2xdouble_masked_unpack_high_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %xmm2, %xmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %xmm2, %xmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpckhpd {{.*#+}} xmm1 {%k1} = xmm0[1],mem[1] sched: [6:1.00] ; GENERIC-NEXT: vmovapd %xmm1, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -14887,7 +14887,7 @@ define <2 x double> @test_2xdouble_masked_unpack_high_mem_mask1(<2 x double> %ve define <2 x double> @test_2xdouble_zero_masked_unpack_high_mem_mask1(<2 x double> %vec1, <2 x double>* %vec2p, <2 x i64> %mask) { ; GENERIC-LABEL: test_2xdouble_zero_masked_unpack_high_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %xmm1, %xmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %xmm1, %xmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpckhpd {{.*#+}} xmm0 {%k1} {z} = xmm0[1],mem[1] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -14919,7 +14919,7 @@ define <4 x double> @test_4xdouble_unpack_high_mask0(<4 x double> %vec1, <4 x do define <4 x double> @test_4xdouble_masked_unpack_high_mask0(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %vec3, <4 x i64> %mask) { ; GENERIC-LABEL: test_4xdouble_masked_unpack_high_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm3, %ymm3, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm3, %ymm3, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm2 {%k1} = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00] ; GENERIC-NEXT: vmovapd %ymm2, %ymm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -14939,7 +14939,7 @@ define <4 x double> @test_4xdouble_masked_unpack_high_mask0(<4 x double> %vec1, define <4 x double> @test_4xdouble_zero_masked_unpack_high_mask0(<4 x double> %vec1, <4 x double> %vec2, <4 x i64> %mask) { ; GENERIC-LABEL: test_4xdouble_zero_masked_unpack_high_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -14956,7 +14956,7 @@ define <4 x double> @test_4xdouble_zero_masked_unpack_high_mask0(<4 x double> %v define <4 x double> @test_4xdouble_masked_unpack_high_mask1(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %vec3, <4 x i64> %mask) { ; GENERIC-LABEL: test_4xdouble_masked_unpack_high_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm3, %ymm3, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm3, %ymm3, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm2 {%k1} = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00] ; GENERIC-NEXT: vmovapd %ymm2, %ymm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -14976,7 +14976,7 @@ define <4 x double> @test_4xdouble_masked_unpack_high_mask1(<4 x double> %vec1, define <4 x double> @test_4xdouble_zero_masked_unpack_high_mask1(<4 x double> %vec1, <4 x double> %vec2, <4 x i64> %mask) { ; GENERIC-LABEL: test_4xdouble_zero_masked_unpack_high_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -14993,7 +14993,7 @@ define <4 x double> @test_4xdouble_zero_masked_unpack_high_mask1(<4 x double> %v define <4 x double> @test_4xdouble_masked_unpack_high_mask2(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %vec3, <4 x i64> %mask) { ; GENERIC-LABEL: test_4xdouble_masked_unpack_high_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm3, %ymm3, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm3, %ymm3, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm2 {%k1} = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00] ; GENERIC-NEXT: vmovapd %ymm2, %ymm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -15013,7 +15013,7 @@ define <4 x double> @test_4xdouble_masked_unpack_high_mask2(<4 x double> %vec1, define <4 x double> @test_4xdouble_zero_masked_unpack_high_mask2(<4 x double> %vec1, <4 x double> %vec2, <4 x i64> %mask) { ; GENERIC-LABEL: test_4xdouble_zero_masked_unpack_high_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -15043,7 +15043,7 @@ define <4 x double> @test_4xdouble_unpack_high_mask3(<4 x double> %vec1, <4 x do define <4 x double> @test_4xdouble_masked_unpack_high_mask3(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %vec3, <4 x i64> %mask) { ; GENERIC-LABEL: test_4xdouble_masked_unpack_high_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm3, %ymm3, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm3, %ymm3, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm2 {%k1} = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00] ; GENERIC-NEXT: vmovapd %ymm2, %ymm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -15063,7 +15063,7 @@ define <4 x double> @test_4xdouble_masked_unpack_high_mask3(<4 x double> %vec1, define <4 x double> @test_4xdouble_zero_masked_unpack_high_mask3(<4 x double> %vec1, <4 x double> %vec2, <4 x i64> %mask) { ; GENERIC-LABEL: test_4xdouble_zero_masked_unpack_high_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -15094,7 +15094,7 @@ define <4 x double> @test_4xdouble_unpack_high_mem_mask0(<4 x double> %vec1, <4 define <4 x double> @test_4xdouble_masked_unpack_high_mem_mask0(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %vec3, <4 x i64> %mask) { ; GENERIC-LABEL: test_4xdouble_masked_unpack_high_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm1 {%k1} = ymm0[1],mem[1],ymm0[3],mem[3] sched: [6:1.00] ; GENERIC-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -15115,7 +15115,7 @@ define <4 x double> @test_4xdouble_masked_unpack_high_mem_mask0(<4 x double> %ve define <4 x double> @test_4xdouble_zero_masked_unpack_high_mem_mask0(<4 x double> %vec1, <4 x double>* %vec2p, <4 x i64> %mask) { ; GENERIC-LABEL: test_4xdouble_zero_masked_unpack_high_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],mem[1],ymm0[3],mem[3] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -15134,7 +15134,7 @@ define <4 x double> @test_4xdouble_zero_masked_unpack_high_mem_mask0(<4 x double define <4 x double> @test_4xdouble_masked_unpack_high_mem_mask1(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %vec3, <4 x i64> %mask) { ; GENERIC-LABEL: test_4xdouble_masked_unpack_high_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm1 {%k1} = ymm0[1],mem[1],ymm0[3],mem[3] sched: [6:1.00] ; GENERIC-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -15155,7 +15155,7 @@ define <4 x double> @test_4xdouble_masked_unpack_high_mem_mask1(<4 x double> %ve define <4 x double> @test_4xdouble_zero_masked_unpack_high_mem_mask1(<4 x double> %vec1, <4 x double>* %vec2p, <4 x i64> %mask) { ; GENERIC-LABEL: test_4xdouble_zero_masked_unpack_high_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],mem[1],ymm0[3],mem[3] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -15174,7 +15174,7 @@ define <4 x double> @test_4xdouble_zero_masked_unpack_high_mem_mask1(<4 x double define <4 x double> @test_4xdouble_masked_unpack_high_mem_mask2(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %vec3, <4 x i64> %mask) { ; GENERIC-LABEL: test_4xdouble_masked_unpack_high_mem_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm1 {%k1} = ymm0[1],mem[1],ymm0[3],mem[3] sched: [6:1.00] ; GENERIC-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -15195,7 +15195,7 @@ define <4 x double> @test_4xdouble_masked_unpack_high_mem_mask2(<4 x double> %ve define <4 x double> @test_4xdouble_zero_masked_unpack_high_mem_mask2(<4 x double> %vec1, <4 x double>* %vec2p, <4 x i64> %mask) { ; GENERIC-LABEL: test_4xdouble_zero_masked_unpack_high_mem_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],mem[1],ymm0[3],mem[3] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -15228,7 +15228,7 @@ define <4 x double> @test_4xdouble_unpack_high_mem_mask3(<4 x double> %vec1, <4 define <4 x double> @test_4xdouble_masked_unpack_high_mem_mask3(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %vec3, <4 x i64> %mask) { ; GENERIC-LABEL: test_4xdouble_masked_unpack_high_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm1 {%k1} = ymm0[1],mem[1],ymm0[3],mem[3] sched: [6:1.00] ; GENERIC-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -15249,7 +15249,7 @@ define <4 x double> @test_4xdouble_masked_unpack_high_mem_mask3(<4 x double> %ve define <4 x double> @test_4xdouble_zero_masked_unpack_high_mem_mask3(<4 x double> %vec1, <4 x double>* %vec2p, <4 x i64> %mask) { ; GENERIC-LABEL: test_4xdouble_zero_masked_unpack_high_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],mem[1],ymm0[3],mem[3] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -15281,7 +15281,7 @@ define <8 x double> @test_8xdouble_unpack_high_mask0(<8 x double> %vec1, <8 x do define <8 x double> @test_8xdouble_masked_unpack_high_mask0(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %vec3, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xdouble_masked_unpack_high_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm2 {%k1} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] sched: [1:1.00] ; GENERIC-NEXT: vmovapd %zmm2, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -15301,7 +15301,7 @@ define <8 x double> @test_8xdouble_masked_unpack_high_mask0(<8 x double> %vec1, define <8 x double> @test_8xdouble_zero_masked_unpack_high_mask0(<8 x double> %vec1, <8 x double> %vec2, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xdouble_zero_masked_unpack_high_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -15318,7 +15318,7 @@ define <8 x double> @test_8xdouble_zero_masked_unpack_high_mask0(<8 x double> %v define <8 x double> @test_8xdouble_masked_unpack_high_mask1(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %vec3, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xdouble_masked_unpack_high_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm2 {%k1} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] sched: [1:1.00] ; GENERIC-NEXT: vmovapd %zmm2, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -15338,7 +15338,7 @@ define <8 x double> @test_8xdouble_masked_unpack_high_mask1(<8 x double> %vec1, define <8 x double> @test_8xdouble_zero_masked_unpack_high_mask1(<8 x double> %vec1, <8 x double> %vec2, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xdouble_zero_masked_unpack_high_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -15355,7 +15355,7 @@ define <8 x double> @test_8xdouble_zero_masked_unpack_high_mask1(<8 x double> %v define <8 x double> @test_8xdouble_masked_unpack_high_mask2(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %vec3, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xdouble_masked_unpack_high_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm2 {%k1} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] sched: [1:1.00] ; GENERIC-NEXT: vmovapd %zmm2, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -15375,7 +15375,7 @@ define <8 x double> @test_8xdouble_masked_unpack_high_mask2(<8 x double> %vec1, define <8 x double> @test_8xdouble_zero_masked_unpack_high_mask2(<8 x double> %vec1, <8 x double> %vec2, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xdouble_zero_masked_unpack_high_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -15405,7 +15405,7 @@ define <8 x double> @test_8xdouble_unpack_high_mask3(<8 x double> %vec1, <8 x do define <8 x double> @test_8xdouble_masked_unpack_high_mask3(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %vec3, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xdouble_masked_unpack_high_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm2 {%k1} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] sched: [1:1.00] ; GENERIC-NEXT: vmovapd %zmm2, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -15425,7 +15425,7 @@ define <8 x double> @test_8xdouble_masked_unpack_high_mask3(<8 x double> %vec1, define <8 x double> @test_8xdouble_zero_masked_unpack_high_mask3(<8 x double> %vec1, <8 x double> %vec2, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xdouble_zero_masked_unpack_high_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -15456,7 +15456,7 @@ define <8 x double> @test_8xdouble_unpack_high_mem_mask0(<8 x double> %vec1, <8 define <8 x double> @test_8xdouble_masked_unpack_high_mem_mask0(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %vec3, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xdouble_masked_unpack_high_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm1 {%k1} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [6:1.00] ; GENERIC-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -15477,7 +15477,7 @@ define <8 x double> @test_8xdouble_masked_unpack_high_mem_mask0(<8 x double> %ve define <8 x double> @test_8xdouble_zero_masked_unpack_high_mem_mask0(<8 x double> %vec1, <8 x double>* %vec2p, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xdouble_zero_masked_unpack_high_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -15496,7 +15496,7 @@ define <8 x double> @test_8xdouble_zero_masked_unpack_high_mem_mask0(<8 x double define <8 x double> @test_8xdouble_masked_unpack_high_mem_mask1(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %vec3, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xdouble_masked_unpack_high_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm1 {%k1} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [6:1.00] ; GENERIC-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -15517,7 +15517,7 @@ define <8 x double> @test_8xdouble_masked_unpack_high_mem_mask1(<8 x double> %ve define <8 x double> @test_8xdouble_zero_masked_unpack_high_mem_mask1(<8 x double> %vec1, <8 x double>* %vec2p, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xdouble_zero_masked_unpack_high_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -15536,7 +15536,7 @@ define <8 x double> @test_8xdouble_zero_masked_unpack_high_mem_mask1(<8 x double define <8 x double> @test_8xdouble_masked_unpack_high_mem_mask2(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %vec3, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xdouble_masked_unpack_high_mem_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm1 {%k1} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [6:1.00] ; GENERIC-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -15557,7 +15557,7 @@ define <8 x double> @test_8xdouble_masked_unpack_high_mem_mask2(<8 x double> %ve define <8 x double> @test_8xdouble_zero_masked_unpack_high_mem_mask2(<8 x double> %vec1, <8 x double>* %vec2p, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xdouble_zero_masked_unpack_high_mem_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -15590,7 +15590,7 @@ define <8 x double> @test_8xdouble_unpack_high_mem_mask3(<8 x double> %vec1, <8 define <8 x double> @test_8xdouble_masked_unpack_high_mem_mask3(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %vec3, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xdouble_masked_unpack_high_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm1 {%k1} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [6:1.00] ; GENERIC-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -15611,7 +15611,7 @@ define <8 x double> @test_8xdouble_masked_unpack_high_mem_mask3(<8 x double> %ve define <8 x double> @test_8xdouble_zero_masked_unpack_high_mem_mask3(<8 x double> %vec1, <8 x double>* %vec2p, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xdouble_zero_masked_unpack_high_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; diff --git a/llvm/test/CodeGen/X86/mmx-schedule.ll b/llvm/test/CodeGen/X86/mmx-schedule.ll index 4651b80..5fa5a15 100644 --- a/llvm/test/CodeGen/X86/mmx-schedule.ll +++ b/llvm/test/CodeGen/X86/mmx-schedule.ll @@ -16,7 +16,7 @@ define i64 @test_cvtpd2pi(<2 x double> %a0, <2 x double>* %a1) optsize { ; GENERIC: # %bb.0: ; GENERIC-NEXT: cvtpd2pi (%rdi), %mm0 # sched: [10:1.00] ; GENERIC-NEXT: cvtpd2pi %xmm0, %mm1 # sched: [4:1.00] -; GENERIC-NEXT: por %mm1, %mm0 # sched: [1:1.00] +; GENERIC-NEXT: por %mm1, %mm0 # sched: [1:0.33] ; GENERIC-NEXT: movq %mm0, %rax # sched: [1:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -40,7 +40,7 @@ define i64 @test_cvtpd2pi(<2 x double> %a0, <2 x double>* %a1) optsize { ; SANDY: # %bb.0: ; SANDY-NEXT: cvtpd2pi (%rdi), %mm0 # sched: [10:1.00] ; SANDY-NEXT: cvtpd2pi %xmm0, %mm1 # sched: [4:1.00] -; SANDY-NEXT: por %mm1, %mm0 # sched: [1:1.00] +; SANDY-NEXT: por %mm1, %mm0 # sched: [1:0.33] ; SANDY-NEXT: movq %mm0, %rax # sched: [1:0.33] ; SANDY-NEXT: retq # sched: [1:1.00] ; @@ -261,7 +261,7 @@ define i64 @test_cvtps2pi(<4 x float> %a0, <4 x float>* %a1) optsize { ; GENERIC: # %bb.0: ; GENERIC-NEXT: cvtps2pi %xmm0, %mm0 # sched: [3:1.00] ; GENERIC-NEXT: cvtps2pi (%rdi), %mm1 # sched: [9:1.00] -; GENERIC-NEXT: por %mm0, %mm1 # sched: [1:1.00] +; GENERIC-NEXT: por %mm0, %mm1 # sched: [1:0.33] ; GENERIC-NEXT: movq %mm1, %rax # sched: [1:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -285,7 +285,7 @@ define i64 @test_cvtps2pi(<4 x float> %a0, <4 x float>* %a1) optsize { ; SANDY: # %bb.0: ; SANDY-NEXT: cvtps2pi %xmm0, %mm0 # sched: [3:1.00] ; SANDY-NEXT: cvtps2pi (%rdi), %mm1 # sched: [9:1.00] -; SANDY-NEXT: por %mm0, %mm1 # sched: [1:1.00] +; SANDY-NEXT: por %mm0, %mm1 # sched: [1:0.33] ; SANDY-NEXT: movq %mm1, %rax # sched: [1:0.33] ; SANDY-NEXT: retq # sched: [1:1.00] ; @@ -350,7 +350,7 @@ define i64 @test_cvttpd2pi(<2 x double> %a0, <2 x double>* %a1) optsize { ; GENERIC: # %bb.0: ; GENERIC-NEXT: cvttpd2pi (%rdi), %mm0 # sched: [10:1.00] ; GENERIC-NEXT: cvttpd2pi %xmm0, %mm1 # sched: [4:1.00] -; GENERIC-NEXT: por %mm1, %mm0 # sched: [1:1.00] +; GENERIC-NEXT: por %mm1, %mm0 # sched: [1:0.33] ; GENERIC-NEXT: movq %mm0, %rax # sched: [1:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -374,7 +374,7 @@ define i64 @test_cvttpd2pi(<2 x double> %a0, <2 x double>* %a1) optsize { ; SANDY: # %bb.0: ; SANDY-NEXT: cvttpd2pi (%rdi), %mm0 # sched: [10:1.00] ; SANDY-NEXT: cvttpd2pi %xmm0, %mm1 # sched: [4:1.00] -; SANDY-NEXT: por %mm1, %mm0 # sched: [1:1.00] +; SANDY-NEXT: por %mm1, %mm0 # sched: [1:0.33] ; SANDY-NEXT: movq %mm0, %rax # sched: [1:0.33] ; SANDY-NEXT: retq # sched: [1:1.00] ; @@ -439,7 +439,7 @@ define i64 @test_cvttps2pi(<4 x float> %a0, <4 x float>* %a1) optsize { ; GENERIC: # %bb.0: ; GENERIC-NEXT: cvttps2pi %xmm0, %mm0 # sched: [3:1.00] ; GENERIC-NEXT: cvttps2pi (%rdi), %mm1 # sched: [9:1.00] -; GENERIC-NEXT: por %mm0, %mm1 # sched: [1:1.00] +; GENERIC-NEXT: por %mm0, %mm1 # sched: [1:0.33] ; GENERIC-NEXT: movq %mm1, %rax # sched: [1:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -463,7 +463,7 @@ define i64 @test_cvttps2pi(<4 x float> %a0, <4 x float>* %a1) optsize { ; SANDY: # %bb.0: ; SANDY-NEXT: cvttps2pi %xmm0, %mm0 # sched: [3:1.00] ; SANDY-NEXT: cvttps2pi (%rdi), %mm1 # sched: [9:1.00] -; SANDY-NEXT: por %mm0, %mm1 # sched: [1:1.00] +; SANDY-NEXT: por %mm0, %mm1 # sched: [1:0.33] ; SANDY-NEXT: movq %mm1, %rax # sched: [1:0.33] ; SANDY-NEXT: retq # sched: [1:1.00] ; @@ -2198,8 +2198,8 @@ declare x86_mmx @llvm.x86.mmx.palignr.b(x86_mmx, x86_mmx, i8) nounwind readnone define i64 @test_pand(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; GENERIC-LABEL: test_pand: ; GENERIC: # %bb.0: -; GENERIC-NEXT: pand %mm1, %mm0 # sched: [1:1.00] -; GENERIC-NEXT: pand (%rdi), %mm0 # sched: [6:1.00] +; GENERIC-NEXT: pand %mm1, %mm0 # sched: [1:0.33] +; GENERIC-NEXT: pand (%rdi), %mm0 # sched: [6:0.50] ; GENERIC-NEXT: movq %mm0, %rax # sched: [1:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -2219,8 +2219,8 @@ define i64 @test_pand(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; ; SANDY-LABEL: test_pand: ; SANDY: # %bb.0: -; SANDY-NEXT: pand %mm1, %mm0 # sched: [1:1.00] -; SANDY-NEXT: pand (%rdi), %mm0 # sched: [6:1.00] +; SANDY-NEXT: pand %mm1, %mm0 # sched: [1:0.33] +; SANDY-NEXT: pand (%rdi), %mm0 # sched: [6:0.50] ; SANDY-NEXT: movq %mm0, %rax # sched: [1:0.33] ; SANDY-NEXT: retq # sched: [1:1.00] ; @@ -2276,8 +2276,8 @@ declare x86_mmx @llvm.x86.mmx.pand(x86_mmx, x86_mmx) nounwind readnone define i64 @test_pandn(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; GENERIC-LABEL: test_pandn: ; GENERIC: # %bb.0: -; GENERIC-NEXT: pandn %mm1, %mm0 # sched: [1:1.00] -; GENERIC-NEXT: pandn (%rdi), %mm0 # sched: [6:1.00] +; GENERIC-NEXT: pandn %mm1, %mm0 # sched: [1:0.33] +; GENERIC-NEXT: pandn (%rdi), %mm0 # sched: [6:0.50] ; GENERIC-NEXT: movq %mm0, %rax # sched: [1:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -2297,8 +2297,8 @@ define i64 @test_pandn(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; ; SANDY-LABEL: test_pandn: ; SANDY: # %bb.0: -; SANDY-NEXT: pandn %mm1, %mm0 # sched: [1:1.00] -; SANDY-NEXT: pandn (%rdi), %mm0 # sched: [6:1.00] +; SANDY-NEXT: pandn %mm1, %mm0 # sched: [1:0.33] +; SANDY-NEXT: pandn (%rdi), %mm0 # sched: [6:0.50] ; SANDY-NEXT: movq %mm0, %rax # sched: [1:0.33] ; SANDY-NEXT: retq # sched: [1:1.00] ; @@ -4503,8 +4503,8 @@ declare x86_mmx @llvm.x86.mmx.pmulu.dq(x86_mmx, x86_mmx) nounwind readnone define i64 @test_por(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; GENERIC-LABEL: test_por: ; GENERIC: # %bb.0: -; GENERIC-NEXT: por %mm1, %mm0 # sched: [1:1.00] -; GENERIC-NEXT: por (%rdi), %mm0 # sched: [6:1.00] +; GENERIC-NEXT: por %mm1, %mm0 # sched: [1:0.33] +; GENERIC-NEXT: por (%rdi), %mm0 # sched: [6:0.50] ; GENERIC-NEXT: movq %mm0, %rax # sched: [1:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -4524,8 +4524,8 @@ define i64 @test_por(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; ; SANDY-LABEL: test_por: ; SANDY: # %bb.0: -; SANDY-NEXT: por %mm1, %mm0 # sched: [1:1.00] -; SANDY-NEXT: por (%rdi), %mm0 # sched: [6:1.00] +; SANDY-NEXT: por %mm1, %mm0 # sched: [1:0.33] +; SANDY-NEXT: por (%rdi), %mm0 # sched: [6:0.50] ; SANDY-NEXT: movq %mm0, %rax # sched: [1:0.33] ; SANDY-NEXT: retq # sched: [1:1.00] ; @@ -6861,8 +6861,8 @@ declare x86_mmx @llvm.x86.mmx.punpcklwd(x86_mmx, x86_mmx) nounwind readnone define i64 @test_pxor(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; GENERIC-LABEL: test_pxor: ; GENERIC: # %bb.0: -; GENERIC-NEXT: pxor %mm1, %mm0 # sched: [1:1.00] -; GENERIC-NEXT: pxor (%rdi), %mm0 # sched: [6:1.00] +; GENERIC-NEXT: pxor %mm1, %mm0 # sched: [1:0.33] +; GENERIC-NEXT: pxor (%rdi), %mm0 # sched: [6:0.50] ; GENERIC-NEXT: movq %mm0, %rax # sched: [1:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -6882,8 +6882,8 @@ define i64 @test_pxor(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; ; SANDY-LABEL: test_pxor: ; SANDY: # %bb.0: -; SANDY-NEXT: pxor %mm1, %mm0 # sched: [1:1.00] -; SANDY-NEXT: pxor (%rdi), %mm0 # sched: [6:1.00] +; SANDY-NEXT: pxor %mm1, %mm0 # sched: [1:0.33] +; SANDY-NEXT: pxor (%rdi), %mm0 # sched: [6:0.50] ; SANDY-NEXT: movq %mm0, %rax # sched: [1:0.33] ; SANDY-NEXT: retq # sched: [1:1.00] ; diff --git a/llvm/test/CodeGen/X86/x87-schedule.ll b/llvm/test/CodeGen/X86/x87-schedule.ll index bd58c18..b21b834 100644 --- a/llvm/test/CodeGen/X86/x87-schedule.ll +++ b/llvm/test/CodeGen/X86/x87-schedule.ll @@ -124,28 +124,28 @@ define void @test_fabs() optsize { ; BROADWELL-LABEL: test_fabs: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: fabs # sched: [1:0.33] +; BROADWELL-NEXT: fabs # sched: [1:1.00] ; BROADWELL-NEXT: #NO_APP ; BROADWELL-NEXT: retl # sched: [6:0.50] ; ; SKYLAKE-LABEL: test_fabs: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: fabs # sched: [1:0.33] +; SKYLAKE-NEXT: fabs # sched: [1:1.00] ; SKYLAKE-NEXT: #NO_APP ; SKYLAKE-NEXT: retl # sched: [6:0.50] ; ; SKX-LABEL: test_fabs: ; SKX: # %bb.0: ; SKX-NEXT: #APP -; SKX-NEXT: fabs # sched: [1:0.33] +; SKX-NEXT: fabs # sched: [1:1.00] ; SKX-NEXT: #NO_APP ; SKX-NEXT: retl # sched: [6:0.50] ; ; BTVER2-LABEL: test_fabs: ; BTVER2: # %bb.0: ; BTVER2-NEXT: #APP -; BTVER2-NEXT: fabs # sched: [1:0.50] +; BTVER2-NEXT: fabs # sched: [2:1.00] ; BTVER2-NEXT: #NO_APP ; BTVER2-NEXT: retl # sched: [4:1.00] ; @@ -540,28 +540,28 @@ define void @test_fchs() optsize { ; BROADWELL-LABEL: test_fchs: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: fchs # sched: [1:0.33] +; BROADWELL-NEXT: fchs # sched: [1:1.00] ; BROADWELL-NEXT: #NO_APP ; BROADWELL-NEXT: retl # sched: [6:0.50] ; ; SKYLAKE-LABEL: test_fchs: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: fchs # sched: [1:0.33] +; SKYLAKE-NEXT: fchs # sched: [1:1.00] ; SKYLAKE-NEXT: #NO_APP ; SKYLAKE-NEXT: retl # sched: [6:0.50] ; ; SKX-LABEL: test_fchs: ; SKX: # %bb.0: ; SKX-NEXT: #APP -; SKX-NEXT: fchs # sched: [1:0.33] +; SKX-NEXT: fchs # sched: [1:1.00] ; SKX-NEXT: #NO_APP ; SKX-NEXT: retl # sched: [6:0.50] ; ; BTVER2-LABEL: test_fchs: ; BTVER2: # %bb.0: ; BTVER2-NEXT: #APP -; BTVER2-NEXT: fchs # sched: [1:0.50] +; BTVER2-NEXT: fchs # sched: [2:1.00] ; BTVER2-NEXT: #NO_APP ; BTVER2-NEXT: retl # sched: [4:1.00] ;