From: Simon Pilgrim Date: Wed, 2 May 2018 17:58:50 +0000 (+0000) Subject: [X86] Cleanup WriteFShuffle/WriteFVarShuffle (+256 variants) scheduler classes with... X-Git-Tag: llvmorg-7.0.0-rc1~6910 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=819f218f079077973d6b9473e2af01d5dd1a5e00;p=platform%2Fupstream%2Fllvm.git [X86] Cleanup WriteFShuffle/WriteFVarShuffle (+256 variants) scheduler classes with more common default values llvm-svn: 331380 --- diff --git a/llvm/lib/Target/X86/X86SchedBroadwell.td b/llvm/lib/Target/X86/X86SchedBroadwell.td index 3de79f4..999ecd3 100755 --- a/llvm/lib/Target/X86/X86SchedBroadwell.td +++ b/llvm/lib/Target/X86/X86SchedBroadwell.td @@ -205,8 +205,8 @@ defm : BWWriteResPair; // Vector integ defm : BWWriteResPair; // Vector integer shifts. defm : BWWriteResPair; // Vector integer multiply. defm : BWWriteResPair; // PMULLD -defm : BWWriteResPair; // Vector shuffles. -defm : BWWriteResPair; // Vector variable shuffles. +defm : BWWriteResPair; // Vector shuffles. +defm : BWWriteResPair; // Vector variable shuffles. defm : BWWriteResPair; // Vector blends. defm : BWWriteResPair; // Vector variable blends. defm : BWWriteResPair; // Vector MPSAD. @@ -334,11 +334,11 @@ defm : BWWriteResPair; def : WriteRes { let Latency = 100; } // def WriteSystem : SchedWrite; // AVX2. -defm : BWWriteResPair; // Fp 256-bit width vector shuffles. -defm : BWWriteResPair; // Fp 256-bit width vector variable shuffles. -defm : BWWriteResPair; // 256-bit width vector shuffles. -defm : BWWriteResPair; // 256-bit width vector variable shuffles. -defm : BWWriteResPair; // Variable vector shifts. +defm : BWWriteResPair; // Fp 256-bit width vector shuffles. +defm : BWWriteResPair; // Fp 256-bit width vector variable shuffles. +defm : BWWriteResPair; // 256-bit width vector shuffles. +defm : BWWriteResPair; // 256-bit width vector variable shuffles. +defm : BWWriteResPair; // Variable vector shifts. // Old microcoded instructions that nobody use. def : WriteRes { let Latency = 100; } // def WriteMicrocoded : SchedWrite; diff --git a/llvm/lib/Target/X86/X86SchedHaswell.td b/llvm/lib/Target/X86/X86SchedHaswell.td index 5a942d8c..8473c65 100644 --- a/llvm/lib/Target/X86/X86SchedHaswell.td +++ b/llvm/lib/Target/X86/X86SchedHaswell.td @@ -172,14 +172,14 @@ defm : HWWriteResPair; defm : HWWriteResPair; defm : HWWriteResPair; defm : HWWriteResPair; -defm : HWWriteResPair; +defm : HWWriteResPair; defm : HWWriteResPair; defm : HWWriteResPair; defm : HWWriteResPair; defm : HWWriteResPair; defm : HWWriteResPair; -defm : HWWriteResPair; -defm : HWWriteResPair; +defm : HWWriteResPair; +defm : HWWriteResPair; defm : HWWriteResPair; defm : HWWriteResPair; @@ -200,11 +200,11 @@ defm : HWWriteResPair; defm : HWWriteResPair; defm : HWWriteResPair; defm : HWWriteResPair; -defm : HWWriteResPair; -defm : HWWriteResPair; +defm : HWWriteResPair; +defm : HWWriteResPair; defm : HWWriteResPair; -defm : HWWriteResPair; -defm : HWWriteResPair; +defm : HWWriteResPair; +defm : HWWriteResPair; defm : HWWriteResPair; defm : HWWriteResPair; defm : HWWriteResPair; @@ -221,6 +221,7 @@ def : WriteRes { let Latency = 6; let NumMicroOps = 2; } +def: InstRW<[WriteVecInsertLd], (instregex "(V?)MOV(H|L)(PD|PS)rm")>; def : WriteRes { let Latency = 2; @@ -874,14 +875,11 @@ def HWWriteResGroup13 : SchedWriteRes<[HWPort5,HWPort23]> { let NumMicroOps = 2; let ResourceCycles = [1,1]; } -def: InstRW<[HWWriteResGroup13], (instregex "(V?)INSERTPSrm", - "(V?)PACKSSDWrm", +def: InstRW<[HWWriteResGroup13], (instregex "(V?)PACKSSDWrm", "(V?)PACKSSWBrm", "(V?)PACKUSDWrm", "(V?)PACKUSWBrm", "(V?)PALIGNRrmi", - "VPERMILPDmi", - "VPERMILPSmi", "(V?)PSHUFBrm", "(V?)PSHUFDmi", "(V?)PSHUFHWmi", @@ -893,13 +891,7 @@ def: InstRW<[HWWriteResGroup13], (instregex "(V?)INSERTPSrm", "(V?)PUNPCKLBWrm", "(V?)PUNPCKLDQrm", "(V?)PUNPCKLQDQrm", - "(V?)PUNPCKLWDrm", - "(V?)SHUFPDrmi", - "(V?)SHUFPSrmi", - "(V?)UNPCKHPDrm", - "(V?)UNPCKHPSrm", - "(V?)UNPCKLPDrm", - "(V?)UNPCKLPSrm")>; + "(V?)PUNPCKLWDrm")>; def HWWriteResGroup13_1 : SchedWriteRes<[HWPort5,HWPort23]> { let Latency = 8; @@ -1415,13 +1407,7 @@ def HWWriteResGroup53 : SchedWriteRes<[HWPort5,HWPort23]> { let NumMicroOps = 2; let ResourceCycles = [1,1]; } -def: InstRW<[HWWriteResGroup53], (instregex "VPERM2F128rm", - "VPERM2I128rm", - "VPERMDYrm", - "VPERMPDYmi", - "VPERMPSYrm", - "VPERMQYmi", - "VPMOVZXBDYrm", +def: InstRW<[HWWriteResGroup53], (instregex "VPMOVZXBDYrm", "VPMOVZXBQYrm", "VPMOVZXBWYrm", "VPMOVZXDQYrm", @@ -1798,8 +1784,8 @@ def HWWriteResGroup89 : SchedWriteRes<[HWPort0]> { let ResourceCycles = [1]; } def: InstRW<[HWWriteResGroup89], (instregex "(V?)PCMPGTQ(Y?)rr", - "MUL_FPrST0", - "MUL_FST0r", + "MUL_FPrST0", + "MUL_FST0r", "MUL_FrST0")>; def HWWriteResGroup91_1 : SchedWriteRes<[HWPort0,HWPort23,HWFPDivider]> { diff --git a/llvm/lib/Target/X86/X86SchedSandyBridge.td b/llvm/lib/Target/X86/X86SchedSandyBridge.td index b44ee05..0b0817c 100644 --- a/llvm/lib/Target/X86/X86SchedSandyBridge.td +++ b/llvm/lib/Target/X86/X86SchedSandyBridge.td @@ -159,10 +159,10 @@ defm : SBWriteResPair; defm : SBWriteResPair; defm : SBWriteResPair; defm : SBWriteResPair; -defm : SBWriteResPair; +defm : SBWriteResPair; defm : SBWriteResPair; -defm : SBWriteResPair; -defm : SBWriteResPair; +defm : SBWriteResPair; +defm : SBWriteResPair; defm : SBWriteResPair; defm : SBWriteResPair; defm : SBWriteResPair; @@ -180,8 +180,8 @@ defm : SBWriteResPair; defm : SBWriteResPair; defm : SBWriteResPair; defm : SBWriteResPair; // TODO this is probably wrong for 256/512-bit for the "generic" model -defm : SBWriteResPair; -defm : SBWriteResPair; +defm : SBWriteResPair; +defm : SBWriteResPair; defm : SBWriteResPair; defm : SBWriteResPair; defm : SBWriteResPair; @@ -322,10 +322,10 @@ def : WriteRes; // AVX2/FMA is not supported on that architecture, but we should define the basic // scheduling resources anyway. -defm : SBWriteResPair; -defm : SBWriteResPair; -defm : SBWriteResPair; -defm : SBWriteResPair; +defm : SBWriteResPair; +defm : SBWriteResPair; +defm : SBWriteResPair; +defm : SBWriteResPair; defm : SBWriteResPair; defm : SBWriteResPair; defm : SBWriteResPair; @@ -372,9 +372,6 @@ def: InstRW<[SBWriteResGroup2], (instregex "FFREE", "RETQ", "ST_FPrr", "ST_Frr", - "VEXTRACTF128rr", - "VINSERTF128rr", - "VPERM2F128rr", "(V?)MOV64toPQIrr", "(V?)MOVDI2PDIrr")>; @@ -936,28 +933,6 @@ def: InstRW<[SBWriteResGroup55], (instregex "(V?)CVTPS2PD(Y?)rm", "VTESTPDrm", "VTESTPSrm")>; -def SBWriteResGroup56 : SchedWriteRes<[SBPort5,SBPort23]> { - let Latency = 7; - let NumMicroOps = 2; - let ResourceCycles = [1,1]; -} -def: InstRW<[SBWriteResGroup56], (instregex "VBROADCASTF128", - "(V?)INSERTPSrm", - "(V?)MOVHPDrm", - "(V?)MOVHPSrm", - "(V?)MOVLPDrm", - "(V?)MOVLPSrm", - "VPERMILPDmi", - "VPERMILPDrm", - "VPERMILPSmi", - "VPERMILPSrm", - "(V?)SHUFPDrmi", - "(V?)SHUFPSrmi", - "(V?)UNPCKHPDrm", - "(V?)UNPCKHPSrm", - "(V?)UNPCKLPDrm", - "(V?)UNPCKLPSrm")>; - def SBWriteResGroup58 : SchedWriteRes<[SBPort23,SBPort05]> { let Latency = 7; let NumMicroOps = 2; @@ -1135,15 +1110,6 @@ def SBWriteResGroup72 : SchedWriteRes<[SBPort1,SBPort23]> { } def: InstRW<[SBWriteResGroup72], (instrs MUL8m)>; -def SBWriteResGroup73 : SchedWriteRes<[SBPort5,SBPort23]> { - let Latency = 8; - let NumMicroOps = 2; - let ResourceCycles = [1,1]; -} -def: InstRW<[SBWriteResGroup73], (instregex "VPERM2F128rm", - "VPERMILPDYrm", - "VPERMILPSYrm")>; - def SBWriteResGroup75 : SchedWriteRes<[SBPort23,SBPort05]> { let Latency = 8; let NumMicroOps = 3; diff --git a/llvm/lib/Target/X86/X86SchedSkylakeClient.td b/llvm/lib/Target/X86/X86SchedSkylakeClient.td index 3128ce6..b76369d 100644 --- a/llvm/lib/Target/X86/X86SchedSkylakeClient.td +++ b/llvm/lib/Target/X86/X86SchedSkylakeClient.td @@ -172,10 +172,10 @@ defm : SKLWriteResPair; // Fused Multipl defm : SKLWriteResPair; // Floating point fabs/fchs. defm : SKLWriteResPair; // Floating point and/or/xor logicals. defm : SKLWriteResPair; // Floating point and/or/xor logicals (YMM/ZMM). -defm : SKLWriteResPair; // Floating point vector shuffles. +defm : SKLWriteResPair; // Floating point vector shuffles. defm : SKLWriteResPair; // Floating point vector shuffles (YMM/ZMM). -defm : SKLWriteResPair; // Floating point vector shuffles. -defm : SKLWriteResPair; // Floating point vector shuffles. +defm : SKLWriteResPair; // Floating point vector shuffles. +defm : SKLWriteResPair; // Floating point vector shuffles. defm : SKLWriteResPair; // Floating point vector blends. defm : SKLWriteResPair; // Floating point vector blends. defm : SKLWriteResPair; // Fp vector variable blends. @@ -201,8 +201,8 @@ defm : SKLWriteResPair; // Vector int defm : SKLWriteResPair; // Vector integer shifts. defm : SKLWriteResPair; // Vector integer multiply. defm : SKLWriteResPair; -defm : SKLWriteResPair; // Vector shuffles. -defm : SKLWriteResPair; // Vector shuffles. +defm : SKLWriteResPair; // Vector shuffles. +defm : SKLWriteResPair; // Vector shuffles. defm : SKLWriteResPair; // Vector blends. defm : SKLWriteResPair; // Vector variable blends. defm : SKLWriteResPair; // Vector MPSAD. @@ -219,6 +219,7 @@ def : WriteRes { let Latency = 6; let NumMicroOps = 2; } +def: InstRW<[WriteVecInsertLd], (instregex "(V?)MOV(H|L)(PD|PS)rm")>; def : WriteRes { let Latency = 3; @@ -339,10 +340,10 @@ def : WriteRes { def : WriteRes { let Latency = 100; } // def WriteSystem : SchedWrite; // AVX2. -defm : SKLWriteResPair; // Fp 256-bit width vector shuffles. -defm : SKLWriteResPair; // Fp 256-bit width vector variable shuffles. -defm : SKLWriteResPair; // 256-bit width vector shuffles. -defm : SKLWriteResPair; // 256-bit width vector variable shuffles. +defm : SKLWriteResPair; // Fp 256-bit width vector shuffles. +defm : SKLWriteResPair; // Fp 256-bit width vector variable shuffles. +defm : SKLWriteResPair; // 256-bit width vector shuffles. +defm : SKLWriteResPair; // 256-bit width vector variable shuffles. defm : SKLWriteResPair; // Variable vector shifts. // Old microcoded instructions that nobody use. @@ -1260,18 +1261,13 @@ def SKLWriteResGroup88 : SchedWriteRes<[SKLPort5,SKLPort23]> { let NumMicroOps = 2; let ResourceCycles = [1,1]; } -def: InstRW<[SKLWriteResGroup88], (instregex "(V?)INSERTPSrm", - "(V?)PACKSSDWrm", +def: InstRW<[SKLWriteResGroup88], (instregex "(V?)PACKSSDWrm", "(V?)PACKSSWBrm", "(V?)PACKUSDWrm", "(V?)PACKUSWBrm", "(V?)PALIGNRrmi", "VPBROADCASTBrm", "VPBROADCASTWrm", - "VPERMILPDmi", - "VPERMILPDrm", - "VPERMILPSmi", - "VPERMILPSrm", "(V?)PSHUFBrm", "(V?)PSHUFDmi", "(V?)PSHUFHWmi", @@ -1283,13 +1279,7 @@ def: InstRW<[SKLWriteResGroup88], (instregex "(V?)INSERTPSrm", "(V?)PUNPCKLBWrm", "(V?)PUNPCKLDQrm", "(V?)PUNPCKLQDQrm", - "(V?)PUNPCKLWDrm", - "(V?)SHUFPDrmi", - "(V?)SHUFPSrmi", - "(V?)UNPCKHPDrm", - "(V?)UNPCKHPSrm", - "(V?)UNPCKLPDrm", - "(V?)UNPCKLPSrm")>; + "(V?)PUNPCKLWDrm")>; def SKLWriteResGroup89 : SchedWriteRes<[SKLPort5,SKLPort01]> { let Latency = 7; @@ -1514,8 +1504,6 @@ def: InstRW<[SKLWriteResGroup108], (instregex "FCOM32m", "VPBLENDWYrmi", "VPBROADCASTBYrm", "VPBROADCASTWYrm", - "VPERMILPDYrm", - "VPERMILPSYrm", "VPMOVSXBDYrm", "VPMOVSXBQYrm", "VPMOVSXWQYrm", @@ -1791,12 +1779,6 @@ def SKLWriteResGroup133 : SchedWriteRes<[SKLPort5,SKLPort23]> { def: InstRW<[SKLWriteResGroup133], (instregex "(ADD|SUB|SUBR)_F(32|64)m", "ILD_F(16|32|64)m", "VPCMPGTQYrm", - "VPERM2F128rm", - "VPERM2I128rm", - "VPERMDYrm", - "VPERMPDYmi", - "VPERMPSYrm", - "VPERMQYmi", "VPMOVZXBDYrm", "VPMOVZXBQYrm", "VPMOVZXBWYrm", diff --git a/llvm/lib/Target/X86/X86SchedSkylakeServer.td b/llvm/lib/Target/X86/X86SchedSkylakeServer.td index 00c38b3..667b519 100755 --- a/llvm/lib/Target/X86/X86SchedSkylakeServer.td +++ b/llvm/lib/Target/X86/X86SchedSkylakeServer.td @@ -172,10 +172,10 @@ defm : SKXWriteResPair; // Fused Multipl defm : SKXWriteResPair; // Floating point fabs/fchs. defm : SKXWriteResPair; // Floating point and/or/xor logicals. defm : SKXWriteResPair; // Floating point and/or/xor logicals (YMM/ZMM). -defm : SKXWriteResPair; // Floating point vector shuffles. +defm : SKXWriteResPair; // Floating point vector shuffles. defm : SKXWriteResPair; // Floating point vector shuffles (YMM/ZMM). -defm : SKXWriteResPair; // Floating point vector variable shuffles. -defm : SKXWriteResPair; // Floating point vector variable shuffles. +defm : SKXWriteResPair; // Floating point vector variable shuffles. +defm : SKXWriteResPair; // Floating point vector variable shuffles. defm : SKXWriteResPair; // Floating point vector blends. defm : SKXWriteResPair; // Floating point vector blends. defm : SKXWriteResPair; // Fp vector variable blends. @@ -201,8 +201,8 @@ defm : SKXWriteResPair; // Vector int defm : SKXWriteResPair; // Vector integer shifts. defm : SKXWriteResPair; // Vector integer multiply. defm : SKXWriteResPair; // Vector integer multiply. -defm : SKXWriteResPair; // Vector shuffles. -defm : SKXWriteResPair; // Vector variable shuffles. +defm : SKXWriteResPair; // Vector shuffles. +defm : SKXWriteResPair; // Vector variable shuffles. defm : SKXWriteResPair; // Vector blends. defm : SKXWriteResPair; // Vector variable blends. defm : SKXWriteResPair; // Vector MPSAD. @@ -219,6 +219,7 @@ def : WriteRes { let Latency = 6; let NumMicroOps = 2; } +def: InstRW<[WriteVecInsertLd], (instregex "(V?)MOV(H|L)(PD|PS)rm")>; def : WriteRes { let Latency = 3; @@ -339,10 +340,10 @@ def : WriteRes { def : WriteRes { let Latency = 100; } // def WriteSystem : SchedWrite; // AVX2. -defm : SKXWriteResPair; // Fp 256-bit width vector shuffles. -defm : SKXWriteResPair; // Fp 256-bit width vector variable shuffles. -defm : SKXWriteResPair; // 256-bit width vector shuffles. -defm : SKXWriteResPair; // 256-bit width vector variable shuffles. +defm : SKXWriteResPair; // Fp 256-bit width vector shuffles. +defm : SKXWriteResPair; // Fp 256-bit width vector variable shuffles. +defm : SKXWriteResPair; // 256-bit width vector shuffles. +defm : SKXWriteResPair; // 256-bit width vector variable shuffles. defm : SKXWriteResPair; // Variable vector shifts. // Old microcoded instructions that nobody use. @@ -1284,57 +1285,18 @@ def: InstRW<[SKXWriteResGroup32], (instregex "(ADD|SUB|SUBR)_FPrST0", "VPMINUQZ128rr", "VPMINUQZ256rr", "VPMINUQZrr", - "VPMOVQDZ128rr", - "VPMOVQDZ256rr", - "VPMOVQDZrr", "VPMOVSXBDYrr", - "VPMOVSXBDZ128rr", - "VPMOVSXBDZ256rr", - "VPMOVSXBDZrr", "VPMOVSXBQYrr", - "VPMOVSXBQZ128rr", - "VPMOVSXBQZ256rr", - "VPMOVSXBQZrr", "VPMOVSXBWYrr", - "VPMOVSXBWZ128rr", - "VPMOVSXBWZ256rr", - "VPMOVSXBWZrr", "VPMOVSXDQYrr", - "VPMOVSXDQZ128rr", - "VPMOVSXDQZ256rr", - "VPMOVSXDQZrr", "VPMOVSXWDYrr", - "VPMOVSXWDZ128rr", - "VPMOVSXWDZ256rr", - "VPMOVSXWDZrr", "VPMOVSXWQYrr", - "VPMOVSXWQZ128rr", - "VPMOVSXWQZ256rr", - "VPMOVSXWQZrr", "VPMOVZXBDYrr", - "VPMOVZXBDZ128rr", - "VPMOVZXBDZ256rr", - "VPMOVZXBDZrr", "VPMOVZXBQYrr", - "VPMOVZXBQZ128rr", - "VPMOVZXBQZ256rr", - "VPMOVZXBQZrr", "VPMOVZXBWYrr", - "VPMOVZXBWZ128rr", - "VPMOVZXBWZ256rr", - "VPMOVZXBWZrr", "VPMOVZXDQYrr", - "VPMOVZXDQZ128rr", - "VPMOVZXDQZ256rr", - "VPMOVZXDQZrr", "VPMOVZXWDYrr", - "VPMOVZXWDZ128rr", - "VPMOVZXWDZ256rr", - "VPMOVZXWDZrr", "VPMOVZXWQYrr", - "VPMOVZXWQZ128rr", - "VPMOVZXWQZ256rr", - "VPMOVZXWQZrr", "VPSADBWZrr", // TODO: 512-bit ops require ports 0/1 to be joined. "VPTESTMBZ128rr", "VPTESTMBZ256rr", @@ -2189,9 +2151,7 @@ def SKXWriteResGroup92 : SchedWriteRes<[SKXPort5,SKXPort23]> { let NumMicroOps = 2; let ResourceCycles = [1,1]; } -def: InstRW<[SKXWriteResGroup92], (instregex "VINSERTPSZrm(b?)", - "(V?)INSERTPSrm", - "VMOVSDZrm(b?)", +def: InstRW<[SKXWriteResGroup92], (instregex "VMOVSDZrm(b?)", "VMOVSSZrm(b?)", "VPACKSSDWZ128rm(b?)", "(V?)PACKSSDWrm", @@ -2207,14 +2167,6 @@ def: InstRW<[SKXWriteResGroup92], (instregex "VINSERTPSZrm(b?)", "VPBROADCASTBrm", "VPBROADCASTWZ128m(b?)", "VPBROADCASTWrm", - "VPERMILPDZ128m(b?)i", - "VPERMILPDZ128rm(b?)", - "VPERMILPDmi", - "VPERMILPDrm", - "VPERMILPSZ128m(b?)i", - "VPERMILPSZ128rm(b?)", - "VPERMILPSmi", - "VPERMILPSrm", "VPSHUFBZ128rm(b?)", "(V?)PSHUFBrm", "VPSHUFDZ128m(b?)i", @@ -2240,19 +2192,7 @@ def: InstRW<[SKXWriteResGroup92], (instregex "VINSERTPSZrm(b?)", "VPUNPCKLQDQZ128rm(b?)", "(V?)PUNPCKLQDQrm", "VPUNPCKLWDZ128rm(b?)", - "(V?)PUNPCKLWDrm", - "VSHUFPDZ128rm(b?)i", - "(V?)SHUFPDrmi", - "VSHUFPSZ128rm(b?)i", - "(V?)SHUFPSrmi", - "VUNPCKHPDZ128rm(b?)", - "(V?)UNPCKHPDrm", - "VUNPCKHPSZ128rm(b?)", - "(V?)UNPCKHPSrm", - "VUNPCKLPDZ128rm(b?)", - "(V?)UNPCKLPDrm", - "VUNPCKLPSZ128rm(b?)", - "(V?)UNPCKLPSrm")>; + "(V?)PUNPCKLWDrm")>; def SKXWriteResGroup93 : SchedWriteRes<[SKXPort5,SKXPort015]> { let Latency = 7; @@ -2711,12 +2651,6 @@ def: InstRW<[SKXWriteResGroup119], (instregex "FCOM32m", "VPBROADCASTWYrm", "VPBROADCASTWZ256m(b?)", "VPBROADCASTWZm(b?)", - "VPERMILPDYrm", - "VPERMILPDZ256rm(b?)", - "VPERMILPDZrm(b?)", - "VPERMILPSYrm", - "VPERMILPSZ256rm(b?)", - "VPERMILPSZrm(b?)", "VPMOVSXBDYrm", "VPMOVSXBQYrm", "VPMOVSXWQYrm", @@ -3367,40 +3301,6 @@ def: InstRW<[SKXWriteResGroup148], (instregex "(ADD|SUB|SUBR)_F(32|64)m", "VPCMPUWZrmi(b?)", "VPCMPWZ256rmi(b?)", "VPCMPWZrmi(b?)", - "VPERM2F128rm", - "VPERM2I128rm", - "VPERMDYrm", - "VPERMDZ256rm(b?)", - "VPERMDZrm(b?)", - "VPERMI2D256rm(b?)", - "VPERMI2Drm(b?)", - "VPERMI2PD256rm(b?)", - "VPERMI2PDrm(b?)", - "VPERMI2PS256rm(b?)", - "VPERMI2PSrm(b?)", - "VPERMI2Q256rm(b?)", - "VPERMI2Qrm(b?)", - "VPERMPDYmi", - "VPERMPDZ256m(b?)i", - "VPERMPDZ256rm(b?)", - "VPERMPDZm(b?)i", - "VPERMPDZrm(b?)", - "VPERMPSYrm", - "VPERMPSZ256rm(b?)", - "VPERMPSZrm(b?)", - "VPERMQYmi", - "VPERMQZ256m(b?)i", - "VPERMQZ256rm(b?)", - "VPERMQZm(b?)i", - "VPERMQZrm(b?)", - "VPERMT2D256rm(b?)", - "VPERMT2Drm(b?)", - "VPERMT2PD256rm(b?)", - "VPERMT2PDrm(b?)", - "VPERMT2PS256rm(b?)", - "VPERMT2PSrm(b?)", - "VPERMT2Q256rm(b?)", - "VPERMT2Qrm(b?)", "VPMAXSQZ256rm(b?)", "VPMAXSQZrm(b?)", "VPMAXUQZ256rm(b?)", @@ -3409,35 +3309,11 @@ def: InstRW<[SKXWriteResGroup148], (instregex "(ADD|SUB|SUBR)_F(32|64)m", "VPMINSQZrm(b?)", "VPMINUQZ256rm(b?)", "VPMINUQZrm(b?)", - "VPMOVSXBDZ256rm(b?)", - "VPMOVSXBDZrm(b?)", - "VPMOVSXBQZ256rm(b?)", - "VPMOVSXBQZrm(b?)", - "VPMOVSXBWZ256rm(b?)", - "VPMOVSXBWZrm(b?)", - "VPMOVSXDQZ256rm(b?)", - "VPMOVSXDQZrm(b?)", - "VPMOVSXWDZ256rm(b?)", - "VPMOVSXWDZrm(b?)", - "VPMOVSXWQZ256rm(b?)", - "VPMOVSXWQZrm(b?)", "VPMOVZXBDYrm", - "VPMOVZXBDZ256rm(b?)", - "VPMOVZXBDZrm(b?)", "VPMOVZXBQYrm", - "VPMOVZXBQZ256rm(b?)", - "VPMOVZXBQZrm(b?)", "VPMOVZXBWYrm", - "VPMOVZXBWZ256rm(b?)", - "VPMOVZXBWZrm(b?)", "VPMOVZXDQYrm", - "VPMOVZXDQZ256rm(b?)", - "VPMOVZXDQZrm(b?)", - "VPMOVZXWDZ256rm(b?)", - "VPMOVZXWDZrm(b?)", "VPMOVZXWQYrm", - "VPMOVZXWQZ256rm(b?)", - "VPMOVZXWQZrm(b?)", "VPSADBWYrm", "VPSADBWZ256rm(b?)", "VPSADBWZrm(b?)", @@ -3456,15 +3332,7 @@ def: InstRW<[SKXWriteResGroup148], (instregex "(ADD|SUB|SUBR)_F(32|64)m", "VPTESTNMQZ256rm(b?)", "VPTESTNMQZrm(b?)", "VPTESTNMWZ256rm(b?)", - "VPTESTNMWZrm(b?)", - "VSHUFF32X4Z256rm(b?)i", - "VSHUFF32X4Zrm(b?)i", - "VSHUFF64X2Z256rm(b?)i", - "VSHUFF64X2Zrm(b?)i", - "VSHUFI32X4Z256rm(b?)i", - "VSHUFI32X4Zrm(b?)i", - "VSHUFI64X2Z256rm(b?)i", - "VSHUFI64X2Zrm(b?)i")>; + "VPTESTNMWZrm(b?)")>; def SKXWriteResGroup149 : SchedWriteRes<[SKXPort23,SKXPort015]> { let Latency = 10; diff --git a/llvm/test/CodeGen/X86/avx2-schedule.ll b/llvm/test/CodeGen/X86/avx2-schedule.ll index fee3c83..14da72a 100644 --- a/llvm/test/CodeGen/X86/avx2-schedule.ll +++ b/llvm/test/CodeGen/X86/avx2-schedule.ll @@ -523,7 +523,7 @@ define <8 x i32> @test_inserti128(<8 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) ; GENERIC-LABEL: test_inserti128: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1 # sched: [1:1.00] -; GENERIC-NEXT: vinserti128 $1, (%rdi), %ymm0, %ymm0 # sched: [6:1.00] +; GENERIC-NEXT: vinserti128 $1, (%rdi), %ymm0, %ymm0 # sched: [8:1.00] ; GENERIC-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [3:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -2506,7 +2506,7 @@ define <4 x i64> @test_perm2i128(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) { ; GENERIC-LABEL: test_perm2i128: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3],ymm1[0,1] sched: [1:1.00] -; GENERIC-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [6:1.00] +; GENERIC-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [8:1.00] ; GENERIC-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [3:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -2555,7 +2555,7 @@ define <8 x i32> @test_permd(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) { ; GENERIC-LABEL: test_permd: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpermd %ymm1, %ymm0, %ymm1 # sched: [1:1.00] -; GENERIC-NEXT: vpermd (%rdi), %ymm0, %ymm0 # sched: [6:1.00] +; GENERIC-NEXT: vpermd (%rdi), %ymm0, %ymm0 # sched: [8:1.00] ; GENERIC-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [3:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -2605,7 +2605,7 @@ define <4 x double> @test_permpd(<4 x double> %a0, <4 x double> *%a1) { ; GENERIC-LABEL: test_permpd: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[3,2,2,3] sched: [1:1.00] -; GENERIC-NEXT: vpermpd {{.*#+}} ymm1 = mem[0,2,2,3] sched: [6:1.00] +; GENERIC-NEXT: vpermpd {{.*#+}} ymm1 = mem[0,2,2,3] sched: [8:1.00] ; GENERIC-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -2654,7 +2654,7 @@ define <8 x float> @test_permps(<8 x i32> %a0, <8 x float> %a1, <8 x float> *%a2 ; GENERIC-LABEL: test_permps: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpermps %ymm1, %ymm0, %ymm1 # sched: [1:1.00] -; GENERIC-NEXT: vpermps (%rdi), %ymm0, %ymm0 # sched: [6:1.00] +; GENERIC-NEXT: vpermps (%rdi), %ymm0, %ymm0 # sched: [8:1.00] ; GENERIC-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -2704,7 +2704,7 @@ define <4 x i64> @test_permq(<4 x i64> %a0, <4 x i64> *%a1) { ; GENERIC-LABEL: test_permq: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,2,2,3] sched: [1:1.00] -; GENERIC-NEXT: vpermq {{.*#+}} ymm1 = mem[0,2,2,3] sched: [6:1.00] +; GENERIC-NEXT: vpermq {{.*#+}} ymm1 = mem[0,2,2,3] sched: [8:1.00] ; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [3:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; diff --git a/llvm/test/CodeGen/X86/avx512-schedule.ll b/llvm/test/CodeGen/X86/avx512-schedule.ll index e28ac79..4f3b7f1 100755 --- a/llvm/test/CodeGen/X86/avx512-schedule.ll +++ b/llvm/test/CodeGen/X86/avx512-schedule.ll @@ -2957,7 +2957,7 @@ define <8 x i16> @zext_8x8mem_to_8x16(<8 x i8> *%i , <8 x i1> %mask) nounwind re ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpmovzxbw {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [6:1.00] +; GENERIC-NEXT: vpmovzxbw {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: zext_8x8mem_to_8x16: @@ -2977,7 +2977,7 @@ define <8 x i16> @sext_8x8mem_to_8x16(<8 x i8> *%i , <8 x i1> %mask) nounwind re ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpmovsxbw (%rdi), %xmm0 {%k1} {z} # sched: [6:1.00] +; GENERIC-NEXT: vpmovsxbw (%rdi), %xmm0 {%k1} {z} # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: sext_8x8mem_to_8x16: @@ -2998,7 +2998,7 @@ define <16 x i16> @zext_16x8mem_to_16x16(<16 x i8> *%i , <16 x i1> %mask) nounwi ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpsllw $7, %xmm0, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: vpmovb2m %xmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpmovzxbw {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero sched: [6:1.00] +; GENERIC-NEXT: vpmovzxbw {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: zext_16x8mem_to_16x16: @@ -3018,7 +3018,7 @@ define <16 x i16> @sext_16x8mem_to_16x16(<16 x i8> *%i , <16 x i1> %mask) nounwi ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpsllw $7, %xmm0, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: vpmovb2m %xmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpmovsxbw (%rdi), %ymm0 {%k1} {z} # sched: [6:1.00] +; GENERIC-NEXT: vpmovsxbw (%rdi), %ymm0 {%k1} {z} # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: sext_16x8mem_to_16x16: @@ -3104,7 +3104,7 @@ define <32 x i16> @zext_32x8mem_to_32x16(<32 x i8> *%i , <32 x i1> %mask) nounwi ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpsllw $7, %ymm0, %ymm0 # sched: [1:1.00] ; GENERIC-NEXT: vpmovb2m %ymm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpmovzxbw {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero,mem[16],zero,mem[17],zero,mem[18],zero,mem[19],zero,mem[20],zero,mem[21],zero,mem[22],zero,mem[23],zero,mem[24],zero,mem[25],zero,mem[26],zero,mem[27],zero,mem[28],zero,mem[29],zero,mem[30],zero,mem[31],zero sched: [6:1.00] +; GENERIC-NEXT: vpmovzxbw {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero,mem[16],zero,mem[17],zero,mem[18],zero,mem[19],zero,mem[20],zero,mem[21],zero,mem[22],zero,mem[23],zero,mem[24],zero,mem[25],zero,mem[26],zero,mem[27],zero,mem[28],zero,mem[29],zero,mem[30],zero,mem[31],zero sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: zext_32x8mem_to_32x16: @@ -3124,7 +3124,7 @@ define <32 x i16> @sext_32x8mem_to_32x16(<32 x i8> *%i , <32 x i1> %mask) nounwi ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpsllw $7, %ymm0, %ymm0 # sched: [1:1.00] ; GENERIC-NEXT: vpmovb2m %ymm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpmovsxbw (%rdi), %zmm0 {%k1} {z} # sched: [6:1.00] +; GENERIC-NEXT: vpmovsxbw (%rdi), %zmm0 {%k1} {z} # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: sext_32x8mem_to_32x16: @@ -3210,7 +3210,7 @@ define <4 x i32> @zext_4x8mem_to_4x32(<4 x i8> *%i , <4 x i1> %mask) nounwind re ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: vpmovd2m %xmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpmovzxbd {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [6:1.00] +; GENERIC-NEXT: vpmovzxbd {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: zext_4x8mem_to_4x32: @@ -3230,7 +3230,7 @@ define <4 x i32> @sext_4x8mem_to_4x32(<4 x i8> *%i , <4 x i1> %mask) nounwind re ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: vpmovd2m %xmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpmovsxbd (%rdi), %xmm0 {%k1} {z} # sched: [6:1.00] +; GENERIC-NEXT: vpmovsxbd (%rdi), %xmm0 {%k1} {z} # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: sext_4x8mem_to_4x32: @@ -3250,7 +3250,7 @@ define <8 x i32> @zext_8x8mem_to_8x32(<8 x i8> *%i , <8 x i1> %mask) nounwind re ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpmovzxbd {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero sched: [6:1.00] +; GENERIC-NEXT: vpmovzxbd {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: zext_8x8mem_to_8x32: @@ -3270,7 +3270,7 @@ define <8 x i32> @sext_8x8mem_to_8x32(<8 x i8> *%i , <8 x i1> %mask) nounwind re ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpmovsxbd (%rdi), %ymm0 {%k1} {z} # sched: [6:1.00] +; GENERIC-NEXT: vpmovsxbd (%rdi), %ymm0 {%k1} {z} # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: sext_8x8mem_to_8x32: @@ -3290,7 +3290,7 @@ define <16 x i32> @zext_16x8mem_to_16x32(<16 x i8> *%i , <16 x i1> %mask) nounwi ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpsllw $7, %xmm0, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: vpmovb2m %xmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpmovzxbd {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero,mem[8],zero,zero,zero,mem[9],zero,zero,zero,mem[10],zero,zero,zero,mem[11],zero,zero,zero,mem[12],zero,zero,zero,mem[13],zero,zero,zero,mem[14],zero,zero,zero,mem[15],zero,zero,zero sched: [6:1.00] +; GENERIC-NEXT: vpmovzxbd {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero,mem[8],zero,zero,zero,mem[9],zero,zero,zero,mem[10],zero,zero,zero,mem[11],zero,zero,zero,mem[12],zero,zero,zero,mem[13],zero,zero,zero,mem[14],zero,zero,zero,mem[15],zero,zero,zero sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: zext_16x8mem_to_16x32: @@ -3310,7 +3310,7 @@ define <16 x i32> @sext_16x8mem_to_16x32(<16 x i8> *%i , <16 x i1> %mask) nounwi ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpsllw $7, %xmm0, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: vpmovb2m %xmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpmovsxbd (%rdi), %zmm0 {%k1} {z} # sched: [6:1.00] +; GENERIC-NEXT: vpmovsxbd (%rdi), %zmm0 {%k1} {z} # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: sext_16x8mem_to_16x32: @@ -3396,7 +3396,7 @@ define <2 x i64> @zext_2x8mem_to_2x64(<2 x i8> *%i , <2 x i1> %mask) nounwind re ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpsllq $63, %xmm0, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: vpmovq2m %xmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpmovzxbq {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero sched: [6:1.00] +; GENERIC-NEXT: vpmovzxbq {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: zext_2x8mem_to_2x64: @@ -3415,7 +3415,7 @@ define <2 x i64> @sext_2x8mem_to_2x64mask(<2 x i8> *%i , <2 x i1> %mask) nounwin ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpsllq $63, %xmm0, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: vpmovq2m %xmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpmovsxbq (%rdi), %xmm0 {%k1} {z} # sched: [6:1.00] +; GENERIC-NEXT: vpmovsxbq (%rdi), %xmm0 {%k1} {z} # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: sext_2x8mem_to_2x64mask: @@ -3449,7 +3449,7 @@ define <4 x i64> @zext_4x8mem_to_4x64(<4 x i8> *%i , <4 x i1> %mask) nounwind re ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: vpmovd2m %xmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpmovzxbq {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero sched: [6:1.00] +; GENERIC-NEXT: vpmovzxbq {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: zext_4x8mem_to_4x64: @@ -3469,7 +3469,7 @@ define <4 x i64> @sext_4x8mem_to_4x64mask(<4 x i8> *%i , <4 x i1> %mask) nounwin ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: vpmovd2m %xmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpmovsxbq (%rdi), %ymm0 {%k1} {z} # sched: [6:1.00] +; GENERIC-NEXT: vpmovsxbq (%rdi), %ymm0 {%k1} {z} # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: sext_4x8mem_to_4x64mask: @@ -3504,7 +3504,7 @@ define <8 x i64> @zext_8x8mem_to_8x64(<8 x i8> *%i , <8 x i1> %mask) nounwind re ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpmovzxbq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero,mem[4],zero,zero,zero,zero,zero,zero,zero,mem[5],zero,zero,zero,zero,zero,zero,zero,mem[6],zero,zero,zero,zero,zero,zero,zero,mem[7],zero,zero,zero,zero,zero,zero,zero sched: [6:1.00] +; GENERIC-NEXT: vpmovzxbq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero,mem[4],zero,zero,zero,zero,zero,zero,zero,mem[5],zero,zero,zero,zero,zero,zero,zero,mem[6],zero,zero,zero,zero,zero,zero,zero,mem[7],zero,zero,zero,zero,zero,zero,zero sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: zext_8x8mem_to_8x64: @@ -3524,7 +3524,7 @@ define <8 x i64> @sext_8x8mem_to_8x64mask(<8 x i8> *%i , <8 x i1> %mask) nounwin ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpmovsxbq (%rdi), %zmm0 {%k1} {z} # sched: [6:1.00] +; GENERIC-NEXT: vpmovsxbq (%rdi), %zmm0 {%k1} {z} # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: sext_8x8mem_to_8x64mask: @@ -3542,7 +3542,7 @@ define <8 x i64> @sext_8x8mem_to_8x64mask(<8 x i8> *%i , <8 x i1> %mask) nounwin define <8 x i64> @sext_8x8mem_to_8x64(<8 x i8> *%i) nounwind readnone { ; GENERIC-LABEL: sext_8x8mem_to_8x64: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpmovsxbq (%rdi), %zmm0 # sched: [6:1.00] +; GENERIC-NEXT: vpmovsxbq (%rdi), %zmm0 # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: sext_8x8mem_to_8x64: @@ -3559,7 +3559,7 @@ define <4 x i32> @zext_4x16mem_to_4x32(<4 x i16> *%i , <4 x i1> %mask) nounwind ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: vpmovd2m %xmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpmovzxwd {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [6:1.00] +; GENERIC-NEXT: vpmovzxwd {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: zext_4x16mem_to_4x32: @@ -3579,7 +3579,7 @@ define <4 x i32> @sext_4x16mem_to_4x32mask(<4 x i16> *%i , <4 x i1> %mask) nounw ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: vpmovd2m %xmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpmovsxwd (%rdi), %xmm0 {%k1} {z} # sched: [6:1.00] +; GENERIC-NEXT: vpmovsxwd (%rdi), %xmm0 {%k1} {z} # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: sext_4x16mem_to_4x32mask: @@ -3615,7 +3615,7 @@ define <8 x i32> @zext_8x16mem_to_8x32(<8 x i16> *%i , <8 x i1> %mask) nounwind ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpmovzxwd {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [6:1.00] +; GENERIC-NEXT: vpmovzxwd {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: zext_8x16mem_to_8x32: @@ -3635,7 +3635,7 @@ define <8 x i32> @sext_8x16mem_to_8x32mask(<8 x i16> *%i , <8 x i1> %mask) nounw ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpmovsxwd (%rdi), %ymm0 {%k1} {z} # sched: [6:1.00] +; GENERIC-NEXT: vpmovsxwd (%rdi), %ymm0 {%k1} {z} # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: sext_8x16mem_to_8x32mask: @@ -3703,7 +3703,7 @@ define <16 x i32> @zext_16x16mem_to_16x32(<16 x i16> *%i , <16 x i1> %mask) noun ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpsllw $7, %xmm0, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: vpmovb2m %xmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpmovzxwd {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero sched: [6:1.00] +; GENERIC-NEXT: vpmovzxwd {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: zext_16x16mem_to_16x32: @@ -3723,7 +3723,7 @@ define <16 x i32> @sext_16x16mem_to_16x32mask(<16 x i16> *%i , <16 x i1> %mask) ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpsllw $7, %xmm0, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: vpmovb2m %xmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpmovsxwd (%rdi), %zmm0 {%k1} {z} # sched: [6:1.00] +; GENERIC-NEXT: vpmovsxwd (%rdi), %zmm0 {%k1} {z} # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: sext_16x16mem_to_16x32mask: @@ -3741,7 +3741,7 @@ define <16 x i32> @sext_16x16mem_to_16x32mask(<16 x i16> *%i , <16 x i1> %mask) define <16 x i32> @sext_16x16mem_to_16x32(<16 x i16> *%i) nounwind readnone { ; GENERIC-LABEL: sext_16x16mem_to_16x32: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpmovsxwd (%rdi), %zmm0 # sched: [6:1.00] +; GENERIC-NEXT: vpmovsxwd (%rdi), %zmm0 # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: sext_16x16mem_to_16x32: @@ -3790,7 +3790,7 @@ define <2 x i64> @zext_2x16mem_to_2x64(<2 x i16> *%i , <2 x i1> %mask) nounwind ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpsllq $63, %xmm0, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: vpmovq2m %xmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpmovzxwq {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero sched: [6:1.00] +; GENERIC-NEXT: vpmovzxwq {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: zext_2x16mem_to_2x64: @@ -3810,7 +3810,7 @@ define <2 x i64> @sext_2x16mem_to_2x64mask(<2 x i16> *%i , <2 x i1> %mask) nounw ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpsllq $63, %xmm0, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: vpmovq2m %xmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpmovsxwq (%rdi), %xmm0 {%k1} {z} # sched: [6:1.00] +; GENERIC-NEXT: vpmovsxwq (%rdi), %xmm0 {%k1} {z} # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: sext_2x16mem_to_2x64mask: @@ -3845,7 +3845,7 @@ define <4 x i64> @zext_4x16mem_to_4x64(<4 x i16> *%i , <4 x i1> %mask) nounwind ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: vpmovd2m %xmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpmovzxwq {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [6:1.00] +; GENERIC-NEXT: vpmovzxwq {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: zext_4x16mem_to_4x64: @@ -3865,7 +3865,7 @@ define <4 x i64> @sext_4x16mem_to_4x64mask(<4 x i16> *%i , <4 x i1> %mask) nounw ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: vpmovd2m %xmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpmovsxwq (%rdi), %ymm0 {%k1} {z} # sched: [6:1.00] +; GENERIC-NEXT: vpmovsxwq (%rdi), %ymm0 {%k1} {z} # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: sext_4x16mem_to_4x64mask: @@ -3900,7 +3900,7 @@ define <8 x i64> @zext_8x16mem_to_8x64(<8 x i16> *%i , <8 x i1> %mask) nounwind ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpmovzxwq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero sched: [6:1.00] +; GENERIC-NEXT: vpmovzxwq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: zext_8x16mem_to_8x64: @@ -3920,7 +3920,7 @@ define <8 x i64> @sext_8x16mem_to_8x64mask(<8 x i16> *%i , <8 x i1> %mask) nounw ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpmovsxwq (%rdi), %zmm0 {%k1} {z} # sched: [6:1.00] +; GENERIC-NEXT: vpmovsxwq (%rdi), %zmm0 {%k1} {z} # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: sext_8x16mem_to_8x64mask: @@ -3938,7 +3938,7 @@ define <8 x i64> @sext_8x16mem_to_8x64mask(<8 x i16> *%i , <8 x i1> %mask) nounw define <8 x i64> @sext_8x16mem_to_8x64(<8 x i16> *%i) nounwind readnone { ; GENERIC-LABEL: sext_8x16mem_to_8x64: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpmovsxwq (%rdi), %zmm0 # sched: [6:1.00] +; GENERIC-NEXT: vpmovsxwq (%rdi), %zmm0 # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: sext_8x16mem_to_8x64: @@ -3988,7 +3988,7 @@ define <2 x i64> @zext_2x32mem_to_2x64(<2 x i32> *%i , <2 x i1> %mask) nounwind ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpsllq $63, %xmm0, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: vpmovq2m %xmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpmovzxdq {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,mem[1],zero sched: [6:1.00] +; GENERIC-NEXT: vpmovzxdq {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,mem[1],zero sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: zext_2x32mem_to_2x64: @@ -4008,7 +4008,7 @@ define <2 x i64> @sext_2x32mem_to_2x64mask(<2 x i32> *%i , <2 x i1> %mask) nounw ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpsllq $63, %xmm0, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: vpmovq2m %xmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpmovsxdq (%rdi), %xmm0 {%k1} {z} # sched: [6:1.00] +; GENERIC-NEXT: vpmovsxdq (%rdi), %xmm0 {%k1} {z} # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: sext_2x32mem_to_2x64mask: @@ -4043,7 +4043,7 @@ define <4 x i64> @zext_4x32mem_to_4x64(<4 x i32> *%i , <4 x i1> %mask) nounwind ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: vpmovd2m %xmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpmovzxdq {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [6:1.00] +; GENERIC-NEXT: vpmovzxdq {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: zext_4x32mem_to_4x64: @@ -4063,7 +4063,7 @@ define <4 x i64> @sext_4x32mem_to_4x64mask(<4 x i32> *%i , <4 x i1> %mask) nounw ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: vpmovd2m %xmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpmovsxdq (%rdi), %ymm0 {%k1} {z} # sched: [6:1.00] +; GENERIC-NEXT: vpmovsxdq (%rdi), %ymm0 {%k1} {z} # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: sext_4x32mem_to_4x64mask: @@ -4131,7 +4131,7 @@ define <8 x i64> @zext_8x32mem_to_8x64(<8 x i32> *%i , <8 x i1> %mask) nounwind ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpmovzxdq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [6:1.00] +; GENERIC-NEXT: vpmovzxdq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: zext_8x32mem_to_8x64: @@ -4151,7 +4151,7 @@ define <8 x i64> @sext_8x32mem_to_8x64mask(<8 x i32> *%i , <8 x i1> %mask) nounw ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpmovsxdq (%rdi), %zmm0 {%k1} {z} # sched: [6:1.00] +; GENERIC-NEXT: vpmovsxdq (%rdi), %zmm0 {%k1} {z} # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: sext_8x32mem_to_8x64mask: @@ -4169,7 +4169,7 @@ define <8 x i64> @sext_8x32mem_to_8x64mask(<8 x i32> *%i , <8 x i1> %mask) nounw define <8 x i64> @sext_8x32mem_to_8x64(<8 x i32> *%i) nounwind readnone { ; GENERIC-LABEL: sext_8x32mem_to_8x64: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpmovsxdq (%rdi), %zmm0 # sched: [6:1.00] +; GENERIC-NEXT: vpmovsxdq (%rdi), %zmm0 # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: sext_8x32mem_to_8x64: @@ -4473,7 +4473,7 @@ define <8 x i64> @sext_8i1_8i64(<8 x i32> %a1, <8 x i32> %a2) nounwind { define void @extload_v8i64(<8 x i8>* %a, <8 x i64>* %res) { ; GENERIC-LABEL: extload_v8i64: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpmovsxbq (%rdi), %zmm0 # sched: [6:1.00] +; GENERIC-NEXT: vpmovsxbq (%rdi), %zmm0 # sched: [8:1.00] ; GENERIC-NEXT: vmovdqa64 %zmm0, (%rsi) # sched: [1:1.00] ; GENERIC-NEXT: vzeroupper # sched: [100:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -8258,7 +8258,7 @@ define <16 x float> @_ss16xfloat_maskz(float %a, <16 x i32> %mask1) { define <16 x float> @_ss16xfloat_load(float* %a.ptr) { ; GENERIC-LABEL: _ss16xfloat_load: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vbroadcastss (%rdi), %zmm0 # sched: [6:1.00] +; GENERIC-NEXT: vbroadcastss (%rdi), %zmm0 # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: _ss16xfloat_load: @@ -8275,7 +8275,7 @@ define <16 x float> @_ss16xfloat_mask_load(float* %a.ptr, <16 x float> %i, <16 ; GENERIC-LABEL: _ss16xfloat_mask_load: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestmd %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vbroadcastss (%rdi), %zmm0 {%k1} # sched: [6:1.00] +; GENERIC-NEXT: vbroadcastss (%rdi), %zmm0 {%k1} # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: _ss16xfloat_mask_load: @@ -8295,7 +8295,7 @@ define <16 x float> @_ss16xfloat_maskz_load(float* %a.ptr, <16 x i32> %mask1) ; GENERIC-LABEL: _ss16xfloat_maskz_load: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestmd %zmm0, %zmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vbroadcastss (%rdi), %zmm0 {%k1} {z} # sched: [6:1.00] +; GENERIC-NEXT: vbroadcastss (%rdi), %zmm0 {%k1} {z} # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: _ss16xfloat_maskz_load: @@ -8369,7 +8369,7 @@ define <8 x double> @_sd8xdouble_maskz(double %a, <8 x i32> %mask1) { define <8 x double> @_sd8xdouble_load(double* %a.ptr) { ; GENERIC-LABEL: _sd8xdouble_load: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vbroadcastsd (%rdi), %zmm0 # sched: [6:1.00] +; GENERIC-NEXT: vbroadcastsd (%rdi), %zmm0 # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: _sd8xdouble_load: @@ -8386,7 +8386,7 @@ define <8 x double> @_sd8xdouble_mask_load(double* %a.ptr, <8 x double> %i, <8 ; GENERIC-LABEL: _sd8xdouble_mask_load: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestmd %ymm1, %ymm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vbroadcastsd (%rdi), %zmm0 {%k1} # sched: [6:1.00] +; GENERIC-NEXT: vbroadcastsd (%rdi), %zmm0 {%k1} # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: _sd8xdouble_mask_load: @@ -8406,7 +8406,7 @@ define <8 x double> @_sd8xdouble_maskz_load(double* %a.ptr, <8 x i32> %mask1) ; GENERIC-LABEL: _sd8xdouble_maskz_load: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestmd %ymm0, %ymm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vbroadcastsd (%rdi), %zmm0 {%k1} {z} # sched: [6:1.00] +; GENERIC-NEXT: vbroadcastsd (%rdi), %zmm0 {%k1} {z} # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: _sd8xdouble_maskz_load: @@ -8700,7 +8700,7 @@ define <16 x float> @broadcast_ss_spill(float %x) { ; GENERIC-NEXT: vaddss %xmm0, %xmm0, %xmm0 # sched: [3:1.00] ; GENERIC-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill sched: [1:1.00] ; GENERIC-NEXT: callq func_f32 -; GENERIC-NEXT: vbroadcastss (%rsp), %zmm0 # 16-byte Folded Reload sched: [6:1.00] +; GENERIC-NEXT: vbroadcastss (%rsp), %zmm0 # 16-byte Folded Reload sched: [8:1.00] ; GENERIC-NEXT: addq $24, %rsp # sched: [1:0.33] ; GENERIC-NEXT: .cfi_def_cfa_offset 8 ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -8732,7 +8732,7 @@ define <8 x double> @broadcast_sd_spill(double %x) { ; GENERIC-NEXT: vaddsd %xmm0, %xmm0, %xmm0 # sched: [3:1.00] ; GENERIC-NEXT: vmovapd %xmm0, (%rsp) # 16-byte Spill sched: [1:1.00] ; GENERIC-NEXT: callq func_f64 -; GENERIC-NEXT: vbroadcastsd (%rsp), %zmm0 # 16-byte Folded Reload sched: [6:1.00] +; GENERIC-NEXT: vbroadcastsd (%rsp), %zmm0 # 16-byte Folded Reload sched: [8:1.00] ; GENERIC-NEXT: addq $24, %rsp # sched: [1:0.33] ; GENERIC-NEXT: .cfi_def_cfa_offset 8 ; GENERIC-NEXT: retq # sched: [1:1.00] diff --git a/llvm/test/CodeGen/X86/avx512-shuffle-schedule.ll b/llvm/test/CodeGen/X86/avx512-shuffle-schedule.ll index 90da799..3d627ce 100755 --- a/llvm/test/CodeGen/X86/avx512-shuffle-schedule.ll +++ b/llvm/test/CodeGen/X86/avx512-shuffle-schedule.ll @@ -202,7 +202,7 @@ define <16 x i16> @test_16xi16_perm_mem_mask0(<16 x i16>* %vp) { ; GENERIC-LABEL: test_16xi16_perm_mem_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa {{.*#+}} ymm0 = [9,10,7,1,12,14,14,13,14,14,8,6,11,4,12,13] sched: [7:0.50] -; GENERIC-NEXT: vpermw (%rdi), %ymm0, %ymm0 # sched: [6:1.00] +; GENERIC-NEXT: vpermw (%rdi), %ymm0, %ymm0 # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_16xi16_perm_mem_mask0: @@ -219,7 +219,7 @@ define <16 x i16> @test_masked_16xi16_perm_mem_mask0(<16 x i16>* %vp, <16 x i16> ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa {{.*#+}} ymm2 = [9,10,7,1,12,14,14,13,14,14,8,6,11,4,12,13] sched: [7:0.50] ; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermw (%rdi), %ymm2, %ymm0 {%k1} # sched: [6:1.00] +; GENERIC-NEXT: vpermw (%rdi), %ymm2, %ymm0 {%k1} # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_16xi16_perm_mem_mask0: @@ -240,7 +240,7 @@ define <16 x i16> @test_masked_z_16xi16_perm_mem_mask0(<16 x i16>* %vp, <16 x i1 ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa {{.*#+}} ymm1 = [9,10,7,1,12,14,14,13,14,14,8,6,11,4,12,13] sched: [7:0.50] ; GENERIC-NEXT: vptestnmw %ymm0, %ymm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermw (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [6:1.00] +; GENERIC-NEXT: vpermw (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_16xi16_perm_mem_mask0: @@ -261,7 +261,7 @@ define <16 x i16> @test_masked_16xi16_perm_mem_mask1(<16 x i16>* %vp, <16 x i16> ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa {{.*#+}} ymm2 = [14,9,15,9,7,10,15,14,12,1,9,7,10,13,3,11] sched: [7:0.50] ; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermw (%rdi), %ymm2, %ymm0 {%k1} # sched: [6:1.00] +; GENERIC-NEXT: vpermw (%rdi), %ymm2, %ymm0 {%k1} # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_16xi16_perm_mem_mask1: @@ -282,7 +282,7 @@ define <16 x i16> @test_masked_z_16xi16_perm_mem_mask1(<16 x i16>* %vp, <16 x i1 ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa {{.*#+}} ymm1 = [14,9,15,9,7,10,15,14,12,1,9,7,10,13,3,11] sched: [7:0.50] ; GENERIC-NEXT: vptestnmw %ymm0, %ymm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermw (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [6:1.00] +; GENERIC-NEXT: vpermw (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_16xi16_perm_mem_mask1: @@ -303,7 +303,7 @@ define <16 x i16> @test_masked_16xi16_perm_mem_mask2(<16 x i16>* %vp, <16 x i16> ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa {{.*#+}} ymm2 = [1,3,12,5,13,1,2,11,0,9,14,8,10,0,10,9] sched: [7:0.50] ; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermw (%rdi), %ymm2, %ymm0 {%k1} # sched: [6:1.00] +; GENERIC-NEXT: vpermw (%rdi), %ymm2, %ymm0 {%k1} # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_16xi16_perm_mem_mask2: @@ -324,7 +324,7 @@ define <16 x i16> @test_masked_z_16xi16_perm_mem_mask2(<16 x i16>* %vp, <16 x i1 ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa {{.*#+}} ymm1 = [1,3,12,5,13,1,2,11,0,9,14,8,10,0,10,9] sched: [7:0.50] ; GENERIC-NEXT: vptestnmw %ymm0, %ymm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermw (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [6:1.00] +; GENERIC-NEXT: vpermw (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_16xi16_perm_mem_mask2: @@ -344,7 +344,7 @@ define <16 x i16> @test_16xi16_perm_mem_mask3(<16 x i16>* %vp) { ; GENERIC-LABEL: test_16xi16_perm_mem_mask3: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa {{.*#+}} ymm0 = [9,6,5,15,0,0,15,2,1,3,12,14,0,6,1,4] sched: [7:0.50] -; GENERIC-NEXT: vpermw (%rdi), %ymm0, %ymm0 # sched: [6:1.00] +; GENERIC-NEXT: vpermw (%rdi), %ymm0, %ymm0 # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_16xi16_perm_mem_mask3: @@ -361,7 +361,7 @@ define <16 x i16> @test_masked_16xi16_perm_mem_mask3(<16 x i16>* %vp, <16 x i16> ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa {{.*#+}} ymm2 = [9,6,5,15,0,0,15,2,1,3,12,14,0,6,1,4] sched: [7:0.50] ; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermw (%rdi), %ymm2, %ymm0 {%k1} # sched: [6:1.00] +; GENERIC-NEXT: vpermw (%rdi), %ymm2, %ymm0 {%k1} # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_16xi16_perm_mem_mask3: @@ -382,7 +382,7 @@ define <16 x i16> @test_masked_z_16xi16_perm_mem_mask3(<16 x i16>* %vp, <16 x i1 ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa {{.*#+}} ymm1 = [9,6,5,15,0,0,15,2,1,3,12,14,0,6,1,4] sched: [7:0.50] ; GENERIC-NEXT: vptestnmw %ymm0, %ymm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermw (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [6:1.00] +; GENERIC-NEXT: vpermw (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_16xi16_perm_mem_mask3: @@ -596,7 +596,7 @@ define <32 x i16> @test_32xi16_perm_mem_mask0(<32 x i16>* %vp) { ; GENERIC-LABEL: test_32xi16_perm_mem_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm0 = [19,1,5,31,9,12,17,9,15,7,1,5,16,2,12,10,13,3,29,15,26,31,10,15,22,13,9,23,28,29,20,12] sched: [6:0.50] -; GENERIC-NEXT: vpermw (%rdi), %zmm0, %zmm0 # sched: [6:1.00] +; GENERIC-NEXT: vpermw (%rdi), %zmm0, %zmm0 # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_32xi16_perm_mem_mask0: @@ -613,7 +613,7 @@ define <32 x i16> @test_masked_32xi16_perm_mem_mask0(<32 x i16>* %vp, <32 x i16> ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [19,1,5,31,9,12,17,9,15,7,1,5,16,2,12,10,13,3,29,15,26,31,10,15,22,13,9,23,28,29,20,12] sched: [6:0.50] ; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermw (%rdi), %zmm2, %zmm0 {%k1} # sched: [6:1.00] +; GENERIC-NEXT: vpermw (%rdi), %zmm2, %zmm0 {%k1} # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_32xi16_perm_mem_mask0: @@ -634,7 +634,7 @@ define <32 x i16> @test_masked_z_32xi16_perm_mem_mask0(<32 x i16>* %vp, <32 x i1 ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm1 = [19,1,5,31,9,12,17,9,15,7,1,5,16,2,12,10,13,3,29,15,26,31,10,15,22,13,9,23,28,29,20,12] sched: [6:0.50] ; GENERIC-NEXT: vptestnmw %zmm0, %zmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermw (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [6:1.00] +; GENERIC-NEXT: vpermw (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_32xi16_perm_mem_mask0: @@ -655,7 +655,7 @@ define <32 x i16> @test_masked_32xi16_perm_mem_mask1(<32 x i16>* %vp, <32 x i16> ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [31,20,2,2,23,1,0,12,16,14,15,18,21,13,11,31,8,24,13,11,2,27,22,28,14,21,3,12,6,1,30,6] sched: [6:0.50] ; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermw (%rdi), %zmm2, %zmm0 {%k1} # sched: [6:1.00] +; GENERIC-NEXT: vpermw (%rdi), %zmm2, %zmm0 {%k1} # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_32xi16_perm_mem_mask1: @@ -676,7 +676,7 @@ define <32 x i16> @test_masked_z_32xi16_perm_mem_mask1(<32 x i16>* %vp, <32 x i1 ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm1 = [31,20,2,2,23,1,0,12,16,14,15,18,21,13,11,31,8,24,13,11,2,27,22,28,14,21,3,12,6,1,30,6] sched: [6:0.50] ; GENERIC-NEXT: vptestnmw %zmm0, %zmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermw (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [6:1.00] +; GENERIC-NEXT: vpermw (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_32xi16_perm_mem_mask1: @@ -697,7 +697,7 @@ define <32 x i16> @test_masked_32xi16_perm_mem_mask2(<32 x i16>* %vp, <32 x i16> ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [4,6,12,17,4,31,31,4,12,21,28,15,29,10,15,15,21,6,19,7,10,30,28,26,1,4,8,25,26,18,22,25] sched: [6:0.50] ; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermw (%rdi), %zmm2, %zmm0 {%k1} # sched: [6:1.00] +; GENERIC-NEXT: vpermw (%rdi), %zmm2, %zmm0 {%k1} # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_32xi16_perm_mem_mask2: @@ -718,7 +718,7 @@ define <32 x i16> @test_masked_z_32xi16_perm_mem_mask2(<32 x i16>* %vp, <32 x i1 ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm1 = [4,6,12,17,4,31,31,4,12,21,28,15,29,10,15,15,21,6,19,7,10,30,28,26,1,4,8,25,26,18,22,25] sched: [6:0.50] ; GENERIC-NEXT: vptestnmw %zmm0, %zmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermw (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [6:1.00] +; GENERIC-NEXT: vpermw (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_32xi16_perm_mem_mask2: @@ -738,7 +738,7 @@ define <32 x i16> @test_32xi16_perm_mem_mask3(<32 x i16>* %vp) { ; GENERIC-LABEL: test_32xi16_perm_mem_mask3: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm0 = [2,2,27,1,7,1,0,27,10,5,4,20,30,16,28,16,18,21,25,24,31,23,28,6,17,19,26,15,25,12,18,27] sched: [6:0.50] -; GENERIC-NEXT: vpermw (%rdi), %zmm0, %zmm0 # sched: [6:1.00] +; GENERIC-NEXT: vpermw (%rdi), %zmm0, %zmm0 # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_32xi16_perm_mem_mask3: @@ -755,7 +755,7 @@ define <32 x i16> @test_masked_32xi16_perm_mem_mask3(<32 x i16>* %vp, <32 x i16> ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [2,2,27,1,7,1,0,27,10,5,4,20,30,16,28,16,18,21,25,24,31,23,28,6,17,19,26,15,25,12,18,27] sched: [6:0.50] ; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermw (%rdi), %zmm2, %zmm0 {%k1} # sched: [6:1.00] +; GENERIC-NEXT: vpermw (%rdi), %zmm2, %zmm0 {%k1} # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_32xi16_perm_mem_mask3: @@ -776,7 +776,7 @@ define <32 x i16> @test_masked_z_32xi16_perm_mem_mask3(<32 x i16>* %vp, <32 x i1 ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm1 = [2,2,27,1,7,1,0,27,10,5,4,20,30,16,28,16,18,21,25,24,31,23,28,6,17,19,26,15,25,12,18,27] sched: [6:0.50] ; GENERIC-NEXT: vptestnmw %zmm0, %zmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermw (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [6:1.00] +; GENERIC-NEXT: vpermw (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_32xi16_perm_mem_mask3: @@ -990,7 +990,7 @@ define <8 x i32> @test_8xi32_perm_mem_mask0(<8 x i32>* %vp) { ; GENERIC-LABEL: test_8xi32_perm_mem_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovaps {{.*#+}} ymm0 = [3,7,4,3,5,2,0,5] sched: [7:0.50] -; GENERIC-NEXT: vpermps (%rdi), %ymm0, %ymm0 # sched: [6:1.00] +; GENERIC-NEXT: vpermps (%rdi), %ymm0, %ymm0 # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_8xi32_perm_mem_mask0: @@ -1007,7 +1007,7 @@ define <8 x i32> @test_masked_8xi32_perm_mem_mask0(<8 x i32>* %vp, <8 x i32> %ve ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa {{.*#+}} ymm2 = [3,7,4,3,5,2,0,5] sched: [7:0.50] ; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermd (%rdi), %ymm2, %ymm0 {%k1} # sched: [6:1.00] +; GENERIC-NEXT: vpermd (%rdi), %ymm2, %ymm0 {%k1} # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_8xi32_perm_mem_mask0: @@ -1028,7 +1028,7 @@ define <8 x i32> @test_masked_z_8xi32_perm_mem_mask0(<8 x i32>* %vp, <8 x i32> % ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa {{.*#+}} ymm1 = [3,7,4,3,5,2,0,5] sched: [7:0.50] ; GENERIC-NEXT: vptestnmd %ymm0, %ymm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermd (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [6:1.00] +; GENERIC-NEXT: vpermd (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_8xi32_perm_mem_mask0: @@ -1049,7 +1049,7 @@ define <8 x i32> @test_masked_8xi32_perm_mem_mask1(<8 x i32>* %vp, <8 x i32> %ve ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa {{.*#+}} ymm2 = [4,6,1,7,6,7,6,5] sched: [7:0.50] ; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermd (%rdi), %ymm2, %ymm0 {%k1} # sched: [6:1.00] +; GENERIC-NEXT: vpermd (%rdi), %ymm2, %ymm0 {%k1} # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_8xi32_perm_mem_mask1: @@ -1070,7 +1070,7 @@ define <8 x i32> @test_masked_z_8xi32_perm_mem_mask1(<8 x i32>* %vp, <8 x i32> % ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa {{.*#+}} ymm1 = [4,6,1,7,6,7,6,5] sched: [7:0.50] ; GENERIC-NEXT: vptestnmd %ymm0, %ymm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermd (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [6:1.00] +; GENERIC-NEXT: vpermd (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_8xi32_perm_mem_mask1: @@ -1091,7 +1091,7 @@ define <8 x i32> @test_masked_8xi32_perm_mem_mask2(<8 x i32>* %vp, <8 x i32> %ve ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa {{.*#+}} ymm2 = [6,4,6,1,6,3,6,3] sched: [7:0.50] ; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermd (%rdi), %ymm2, %ymm0 {%k1} # sched: [6:1.00] +; GENERIC-NEXT: vpermd (%rdi), %ymm2, %ymm0 {%k1} # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_8xi32_perm_mem_mask2: @@ -1112,7 +1112,7 @@ define <8 x i32> @test_masked_z_8xi32_perm_mem_mask2(<8 x i32>* %vp, <8 x i32> % ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa {{.*#+}} ymm1 = [6,4,6,1,6,3,6,3] sched: [7:0.50] ; GENERIC-NEXT: vptestnmd %ymm0, %ymm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermd (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [6:1.00] +; GENERIC-NEXT: vpermd (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_8xi32_perm_mem_mask2: @@ -1132,7 +1132,7 @@ define <8 x i32> @test_8xi32_perm_mem_mask3(<8 x i32>* %vp) { ; GENERIC-LABEL: test_8xi32_perm_mem_mask3: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovaps {{.*#+}} ymm0 = [6,0,0,7,3,7,7,5] sched: [7:0.50] -; GENERIC-NEXT: vpermps (%rdi), %ymm0, %ymm0 # sched: [6:1.00] +; GENERIC-NEXT: vpermps (%rdi), %ymm0, %ymm0 # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_8xi32_perm_mem_mask3: @@ -1149,7 +1149,7 @@ define <8 x i32> @test_masked_8xi32_perm_mem_mask3(<8 x i32>* %vp, <8 x i32> %ve ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa {{.*#+}} ymm2 = [6,0,0,7,3,7,7,5] sched: [7:0.50] ; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermd (%rdi), %ymm2, %ymm0 {%k1} # sched: [6:1.00] +; GENERIC-NEXT: vpermd (%rdi), %ymm2, %ymm0 {%k1} # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_8xi32_perm_mem_mask3: @@ -1170,7 +1170,7 @@ define <8 x i32> @test_masked_z_8xi32_perm_mem_mask3(<8 x i32>* %vp, <8 x i32> % ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa {{.*#+}} ymm1 = [6,0,0,7,3,7,7,5] sched: [7:0.50] ; GENERIC-NEXT: vptestnmd %ymm0, %ymm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermd (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [6:1.00] +; GENERIC-NEXT: vpermd (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_8xi32_perm_mem_mask3: @@ -1384,7 +1384,7 @@ define <16 x i32> @test_16xi32_perm_mem_mask0(<16 x i32>* %vp) { ; GENERIC-LABEL: test_16xi32_perm_mem_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovaps {{.*#+}} zmm0 = [0,1,1,6,8,11,2,6,10,1,7,5,15,0,6,6] sched: [6:0.50] -; GENERIC-NEXT: vpermps (%rdi), %zmm0, %zmm0 # sched: [6:1.00] +; GENERIC-NEXT: vpermps (%rdi), %zmm0, %zmm0 # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_16xi32_perm_mem_mask0: @@ -1401,7 +1401,7 @@ define <16 x i32> @test_masked_16xi32_perm_mem_mask0(<16 x i32>* %vp, <16 x i32> ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,1,6,8,11,2,6,10,1,7,5,15,0,6,6] sched: [6:0.50] ; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermd (%rdi), %zmm2, %zmm0 {%k1} # sched: [6:1.00] +; GENERIC-NEXT: vpermd (%rdi), %zmm2, %zmm0 {%k1} # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_16xi32_perm_mem_mask0: @@ -1422,7 +1422,7 @@ define <16 x i32> @test_masked_z_16xi32_perm_mem_mask0(<16 x i32>* %vp, <16 x i3 ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,1,1,6,8,11,2,6,10,1,7,5,15,0,6,6] sched: [6:0.50] ; GENERIC-NEXT: vptestnmd %zmm0, %zmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermd (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [6:1.00] +; GENERIC-NEXT: vpermd (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_16xi32_perm_mem_mask0: @@ -1443,7 +1443,7 @@ define <16 x i32> @test_masked_16xi32_perm_mem_mask1(<16 x i32>* %vp, <16 x i32> ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [11,5,3,4,7,15,12,4,8,11,12,7,6,12,6,3] sched: [6:0.50] ; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermd (%rdi), %zmm2, %zmm0 {%k1} # sched: [6:1.00] +; GENERIC-NEXT: vpermd (%rdi), %zmm2, %zmm0 {%k1} # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_16xi32_perm_mem_mask1: @@ -1464,7 +1464,7 @@ define <16 x i32> @test_masked_z_16xi32_perm_mem_mask1(<16 x i32>* %vp, <16 x i3 ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm1 = [11,5,3,4,7,15,12,4,8,11,12,7,6,12,6,3] sched: [6:0.50] ; GENERIC-NEXT: vptestnmd %zmm0, %zmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermd (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [6:1.00] +; GENERIC-NEXT: vpermd (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_16xi32_perm_mem_mask1: @@ -1485,7 +1485,7 @@ define <16 x i32> @test_masked_16xi32_perm_mem_mask2(<16 x i32>* %vp, <16 x i32> ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [7,14,2,7,10,7,3,0,11,9,0,4,12,10,8,2] sched: [6:0.50] ; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermd (%rdi), %zmm2, %zmm0 {%k1} # sched: [6:1.00] +; GENERIC-NEXT: vpermd (%rdi), %zmm2, %zmm0 {%k1} # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_16xi32_perm_mem_mask2: @@ -1506,7 +1506,7 @@ define <16 x i32> @test_masked_z_16xi32_perm_mem_mask2(<16 x i32>* %vp, <16 x i3 ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm1 = [7,14,2,7,10,7,3,0,11,9,0,4,12,10,8,2] sched: [6:0.50] ; GENERIC-NEXT: vptestnmd %zmm0, %zmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermd (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [6:1.00] +; GENERIC-NEXT: vpermd (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_16xi32_perm_mem_mask2: @@ -1526,7 +1526,7 @@ define <16 x i32> @test_16xi32_perm_mem_mask3(<16 x i32>* %vp) { ; GENERIC-LABEL: test_16xi32_perm_mem_mask3: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovaps {{.*#+}} zmm0 = [11,7,10,12,3,12,4,15,1,14,0,4,8,9,6,1] sched: [6:0.50] -; GENERIC-NEXT: vpermps (%rdi), %zmm0, %zmm0 # sched: [6:1.00] +; GENERIC-NEXT: vpermps (%rdi), %zmm0, %zmm0 # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_16xi32_perm_mem_mask3: @@ -1543,7 +1543,7 @@ define <16 x i32> @test_masked_16xi32_perm_mem_mask3(<16 x i32>* %vp, <16 x i32> ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [11,7,10,12,3,12,4,15,1,14,0,4,8,9,6,1] sched: [6:0.50] ; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermd (%rdi), %zmm2, %zmm0 {%k1} # sched: [6:1.00] +; GENERIC-NEXT: vpermd (%rdi), %zmm2, %zmm0 {%k1} # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_16xi32_perm_mem_mask3: @@ -1564,7 +1564,7 @@ define <16 x i32> @test_masked_z_16xi32_perm_mem_mask3(<16 x i32>* %vp, <16 x i3 ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm1 = [11,7,10,12,3,12,4,15,1,14,0,4,8,9,6,1] sched: [6:0.50] ; GENERIC-NEXT: vptestnmd %zmm0, %zmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermd (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [6:1.00] +; GENERIC-NEXT: vpermd (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_16xi32_perm_mem_mask3: @@ -1757,7 +1757,7 @@ define <4 x i64> @test_masked_z_4xi64_perm_mask3(<4 x i64> %vec, <4 x i64> %mask define <4 x i64> @test_4xi64_perm_mem_mask0(<4 x i64>* %vp) { ; GENERIC-LABEL: test_4xi64_perm_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpermpd {{.*#+}} ymm0 = mem[2,1,2,0] sched: [6:1.00] +; GENERIC-NEXT: vpermpd {{.*#+}} ymm0 = mem[2,1,2,0] sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_4xi64_perm_mem_mask0: @@ -1772,7 +1772,7 @@ define <4 x i64> @test_masked_4xi64_perm_mem_mask0(<4 x i64>* %vp, <4 x i64> %ve ; GENERIC-LABEL: test_masked_4xi64_perm_mem_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermq {{.*#+}} ymm0 {%k1} = mem[2,1,2,0] sched: [6:1.00] +; GENERIC-NEXT: vpermq {{.*#+}} ymm0 {%k1} = mem[2,1,2,0] sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_4xi64_perm_mem_mask0: @@ -1791,7 +1791,7 @@ define <4 x i64> @test_masked_z_4xi64_perm_mem_mask0(<4 x i64>* %vp, <4 x i64> % ; GENERIC-LABEL: test_masked_z_4xi64_perm_mem_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmq %ymm0, %ymm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermq {{.*#+}} ymm0 {%k1} {z} = mem[2,1,2,0] sched: [6:1.00] +; GENERIC-NEXT: vpermq {{.*#+}} ymm0 {%k1} {z} = mem[2,1,2,0] sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_4xi64_perm_mem_mask0: @@ -1810,7 +1810,7 @@ define <4 x i64> @test_masked_4xi64_perm_mem_mask1(<4 x i64>* %vp, <4 x i64> %ve ; GENERIC-LABEL: test_masked_4xi64_perm_mem_mask1: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermq {{.*#+}} ymm0 {%k1} = mem[2,1,1,1] sched: [6:1.00] +; GENERIC-NEXT: vpermq {{.*#+}} ymm0 {%k1} = mem[2,1,1,1] sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_4xi64_perm_mem_mask1: @@ -1829,7 +1829,7 @@ define <4 x i64> @test_masked_z_4xi64_perm_mem_mask1(<4 x i64>* %vp, <4 x i64> % ; GENERIC-LABEL: test_masked_z_4xi64_perm_mem_mask1: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmq %ymm0, %ymm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermq {{.*#+}} ymm0 {%k1} {z} = mem[2,1,1,1] sched: [6:1.00] +; GENERIC-NEXT: vpermq {{.*#+}} ymm0 {%k1} {z} = mem[2,1,1,1] sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_4xi64_perm_mem_mask1: @@ -1848,7 +1848,7 @@ define <4 x i64> @test_masked_4xi64_perm_mem_mask2(<4 x i64>* %vp, <4 x i64> %ve ; GENERIC-LABEL: test_masked_4xi64_perm_mem_mask2: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermq {{.*#+}} ymm0 {%k1} = mem[0,1,2,0] sched: [6:1.00] +; GENERIC-NEXT: vpermq {{.*#+}} ymm0 {%k1} = mem[0,1,2,0] sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_4xi64_perm_mem_mask2: @@ -1867,7 +1867,7 @@ define <4 x i64> @test_masked_z_4xi64_perm_mem_mask2(<4 x i64>* %vp, <4 x i64> % ; GENERIC-LABEL: test_masked_z_4xi64_perm_mem_mask2: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmq %ymm0, %ymm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermq {{.*#+}} ymm0 {%k1} {z} = mem[0,1,2,0] sched: [6:1.00] +; GENERIC-NEXT: vpermq {{.*#+}} ymm0 {%k1} {z} = mem[0,1,2,0] sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_4xi64_perm_mem_mask2: @@ -1885,7 +1885,7 @@ define <4 x i64> @test_masked_z_4xi64_perm_mem_mask2(<4 x i64>* %vp, <4 x i64> % define <4 x i64> @test_4xi64_perm_mem_mask3(<4 x i64>* %vp) { ; GENERIC-LABEL: test_4xi64_perm_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpermpd {{.*#+}} ymm0 = mem[2,0,1,3] sched: [6:1.00] +; GENERIC-NEXT: vpermpd {{.*#+}} ymm0 = mem[2,0,1,3] sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_4xi64_perm_mem_mask3: @@ -1900,7 +1900,7 @@ define <4 x i64> @test_masked_4xi64_perm_mem_mask3(<4 x i64>* %vp, <4 x i64> %ve ; GENERIC-LABEL: test_masked_4xi64_perm_mem_mask3: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermq {{.*#+}} ymm0 {%k1} = mem[2,0,1,3] sched: [6:1.00] +; GENERIC-NEXT: vpermq {{.*#+}} ymm0 {%k1} = mem[2,0,1,3] sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_4xi64_perm_mem_mask3: @@ -1919,7 +1919,7 @@ define <4 x i64> @test_masked_z_4xi64_perm_mem_mask3(<4 x i64>* %vp, <4 x i64> % ; GENERIC-LABEL: test_masked_z_4xi64_perm_mem_mask3: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmq %ymm0, %ymm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermq {{.*#+}} ymm0 {%k1} {z} = mem[2,0,1,3] sched: [6:1.00] +; GENERIC-NEXT: vpermq {{.*#+}} ymm0 {%k1} {z} = mem[2,0,1,3] sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_4xi64_perm_mem_mask3: @@ -2293,7 +2293,7 @@ define <8 x i64> @test_8xi64_perm_mem_mask0(<8 x i64>* %vp) { ; GENERIC-LABEL: test_8xi64_perm_mem_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovaps {{.*#+}} zmm0 = [5,1,6,5,7,3,7,3] sched: [6:0.50] -; GENERIC-NEXT: vpermpd (%rdi), %zmm0, %zmm0 # sched: [6:1.00] +; GENERIC-NEXT: vpermpd (%rdi), %zmm0, %zmm0 # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_8xi64_perm_mem_mask0: @@ -2310,7 +2310,7 @@ define <8 x i64> @test_masked_8xi64_perm_mem_mask0(<8 x i64>* %vp, <8 x i64> %ve ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [5,1,6,5,7,3,7,3] sched: [6:0.50] ; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermq (%rdi), %zmm2, %zmm0 {%k1} # sched: [6:1.00] +; GENERIC-NEXT: vpermq (%rdi), %zmm2, %zmm0 {%k1} # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_8xi64_perm_mem_mask0: @@ -2331,7 +2331,7 @@ define <8 x i64> @test_masked_z_8xi64_perm_mem_mask0(<8 x i64>* %vp, <8 x i64> % ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm1 = [5,1,6,5,7,3,7,3] sched: [6:0.50] ; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermq (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [6:1.00] +; GENERIC-NEXT: vpermq (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_8xi64_perm_mem_mask0: @@ -2351,7 +2351,7 @@ define <8 x i64> @test_masked_8xi64_perm_imm_mem_mask1(<8 x i64>* %vp, <8 x i64> ; GENERIC-LABEL: test_masked_8xi64_perm_imm_mem_mask1: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermq {{.*#+}} zmm0 {%k1} = mem[1,1,1,0,5,5,5,4] sched: [6:1.00] +; GENERIC-NEXT: vpermq {{.*#+}} zmm0 {%k1} = mem[1,1,1,0,5,5,5,4] sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_8xi64_perm_imm_mem_mask1: @@ -2370,7 +2370,7 @@ define <8 x i64> @test_masked_z_8xi64_perm_imm_mem_mask1(<8 x i64>* %vp, <8 x i6 ; GENERIC-LABEL: test_masked_z_8xi64_perm_imm_mem_mask1: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermq {{.*#+}} zmm0 {%k1} {z} = mem[1,1,1,0,5,5,5,4] sched: [6:1.00] +; GENERIC-NEXT: vpermq {{.*#+}} zmm0 {%k1} {z} = mem[1,1,1,0,5,5,5,4] sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_8xi64_perm_imm_mem_mask1: @@ -2390,7 +2390,7 @@ define <8 x i64> @test_masked_8xi64_perm_mem_mask2(<8 x i64>* %vp, <8 x i64> %ve ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,2,1,4,1,1,5,5] sched: [6:0.50] ; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermq (%rdi), %zmm2, %zmm0 {%k1} # sched: [6:1.00] +; GENERIC-NEXT: vpermq (%rdi), %zmm2, %zmm0 {%k1} # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_8xi64_perm_mem_mask2: @@ -2411,7 +2411,7 @@ define <8 x i64> @test_masked_z_8xi64_perm_mem_mask2(<8 x i64>* %vp, <8 x i64> % ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,2,1,4,1,1,5,5] sched: [6:0.50] ; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermq (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [6:1.00] +; GENERIC-NEXT: vpermq (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_8xi64_perm_mem_mask2: @@ -2430,7 +2430,7 @@ define <8 x i64> @test_masked_z_8xi64_perm_mem_mask2(<8 x i64>* %vp, <8 x i64> % define <8 x i64> @test_8xi64_perm_imm_mem_mask3(<8 x i64>* %vp) { ; GENERIC-LABEL: test_8xi64_perm_imm_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpermpd {{.*#+}} zmm0 = mem[1,3,1,1,5,7,5,5] sched: [6:1.00] +; GENERIC-NEXT: vpermpd {{.*#+}} zmm0 = mem[1,3,1,1,5,7,5,5] sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_8xi64_perm_imm_mem_mask3: @@ -2445,7 +2445,7 @@ define <8 x i64> @test_masked_8xi64_perm_imm_mem_mask3(<8 x i64>* %vp, <8 x i64> ; GENERIC-LABEL: test_masked_8xi64_perm_imm_mem_mask3: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermq {{.*#+}} zmm0 {%k1} = mem[1,3,1,1,5,7,5,5] sched: [6:1.00] +; GENERIC-NEXT: vpermq {{.*#+}} zmm0 {%k1} = mem[1,3,1,1,5,7,5,5] sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_8xi64_perm_imm_mem_mask3: @@ -2464,7 +2464,7 @@ define <8 x i64> @test_masked_z_8xi64_perm_imm_mem_mask3(<8 x i64>* %vp, <8 x i6 ; GENERIC-LABEL: test_masked_z_8xi64_perm_imm_mem_mask3: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermq {{.*#+}} zmm0 {%k1} {z} = mem[1,3,1,1,5,7,5,5] sched: [6:1.00] +; GENERIC-NEXT: vpermq {{.*#+}} zmm0 {%k1} {z} = mem[1,3,1,1,5,7,5,5] sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_8xi64_perm_imm_mem_mask3: @@ -2484,7 +2484,7 @@ define <8 x i64> @test_masked_8xi64_perm_mem_mask4(<8 x i64>* %vp, <8 x i64> %ve ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [5,0,7,0,3,5,0,6] sched: [6:0.50] ; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermq (%rdi), %zmm2, %zmm0 {%k1} # sched: [6:1.00] +; GENERIC-NEXT: vpermq (%rdi), %zmm2, %zmm0 {%k1} # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_8xi64_perm_mem_mask4: @@ -2505,7 +2505,7 @@ define <8 x i64> @test_masked_z_8xi64_perm_mem_mask4(<8 x i64>* %vp, <8 x i64> % ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm1 = [5,0,7,0,3,5,0,6] sched: [6:0.50] ; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermq (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [6:1.00] +; GENERIC-NEXT: vpermq (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_8xi64_perm_mem_mask4: @@ -2525,7 +2525,7 @@ define <8 x i64> @test_masked_8xi64_perm_imm_mem_mask5(<8 x i64>* %vp, <8 x i64> ; GENERIC-LABEL: test_masked_8xi64_perm_imm_mem_mask5: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermq {{.*#+}} zmm0 {%k1} = mem[3,1,0,0,7,5,4,4] sched: [6:1.00] +; GENERIC-NEXT: vpermq {{.*#+}} zmm0 {%k1} = mem[3,1,0,0,7,5,4,4] sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_8xi64_perm_imm_mem_mask5: @@ -2544,7 +2544,7 @@ define <8 x i64> @test_masked_z_8xi64_perm_imm_mem_mask5(<8 x i64>* %vp, <8 x i6 ; GENERIC-LABEL: test_masked_z_8xi64_perm_imm_mem_mask5: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermq {{.*#+}} zmm0 {%k1} {z} = mem[3,1,0,0,7,5,4,4] sched: [6:1.00] +; GENERIC-NEXT: vpermq {{.*#+}} zmm0 {%k1} {z} = mem[3,1,0,0,7,5,4,4] sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_8xi64_perm_imm_mem_mask5: @@ -2563,7 +2563,7 @@ define <8 x i64> @test_8xi64_perm_mem_mask6(<8 x i64>* %vp) { ; GENERIC-LABEL: test_8xi64_perm_mem_mask6: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovaps {{.*#+}} zmm0 = [0,6,3,7,3,0,3,6] sched: [6:0.50] -; GENERIC-NEXT: vpermpd (%rdi), %zmm0, %zmm0 # sched: [6:1.00] +; GENERIC-NEXT: vpermpd (%rdi), %zmm0, %zmm0 # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_8xi64_perm_mem_mask6: @@ -2580,7 +2580,7 @@ define <8 x i64> @test_masked_8xi64_perm_mem_mask6(<8 x i64>* %vp, <8 x i64> %ve ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,6,3,7,3,0,3,6] sched: [6:0.50] ; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermq (%rdi), %zmm2, %zmm0 {%k1} # sched: [6:1.00] +; GENERIC-NEXT: vpermq (%rdi), %zmm2, %zmm0 {%k1} # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_8xi64_perm_mem_mask6: @@ -2601,7 +2601,7 @@ define <8 x i64> @test_masked_z_8xi64_perm_mem_mask6(<8 x i64>* %vp, <8 x i64> % ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,6,3,7,3,0,3,6] sched: [6:0.50] ; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermq (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [6:1.00] +; GENERIC-NEXT: vpermq (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_8xi64_perm_mem_mask6: @@ -2621,7 +2621,7 @@ define <8 x i64> @test_masked_8xi64_perm_imm_mem_mask7(<8 x i64>* %vp, <8 x i64> ; GENERIC-LABEL: test_masked_8xi64_perm_imm_mem_mask7: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermq {{.*#+}} zmm0 {%k1} = mem[3,0,0,1,7,4,4,5] sched: [6:1.00] +; GENERIC-NEXT: vpermq {{.*#+}} zmm0 {%k1} = mem[3,0,0,1,7,4,4,5] sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_8xi64_perm_imm_mem_mask7: @@ -2640,7 +2640,7 @@ define <8 x i64> @test_masked_z_8xi64_perm_imm_mem_mask7(<8 x i64>* %vp, <8 x i6 ; GENERIC-LABEL: test_masked_z_8xi64_perm_imm_mem_mask7: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermq {{.*#+}} zmm0 {%k1} {z} = mem[3,0,0,1,7,4,4,5] sched: [6:1.00] +; GENERIC-NEXT: vpermq {{.*#+}} zmm0 {%k1} {z} = mem[3,0,0,1,7,4,4,5] sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_8xi64_perm_imm_mem_mask7: @@ -2853,7 +2853,7 @@ define <8 x float> @test_8xfloat_perm_mem_mask0(<8 x float>* %vp) { ; GENERIC-LABEL: test_8xfloat_perm_mem_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovaps {{.*#+}} ymm0 = [5,2,1,6,4,2,4,0] sched: [7:0.50] -; GENERIC-NEXT: vpermps (%rdi), %ymm0, %ymm0 # sched: [6:1.00] +; GENERIC-NEXT: vpermps (%rdi), %ymm0, %ymm0 # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_8xfloat_perm_mem_mask0: @@ -2870,7 +2870,7 @@ define <8 x float> @test_masked_8xfloat_perm_mem_mask0(<8 x float>* %vp, <8 x fl ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovaps {{.*#+}} ymm2 = [5,2,1,6,4,2,4,0] sched: [7:0.50] ; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermps (%rdi), %ymm2, %ymm0 {%k1} # sched: [6:1.00] +; GENERIC-NEXT: vpermps (%rdi), %ymm2, %ymm0 {%k1} # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_8xfloat_perm_mem_mask0: @@ -2891,7 +2891,7 @@ define <8 x float> @test_masked_z_8xfloat_perm_mem_mask0(<8 x float>* %vp, <8 x ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovaps {{.*#+}} ymm1 = [5,2,1,6,4,2,4,0] sched: [7:0.50] ; GENERIC-NEXT: vptestnmd %ymm0, %ymm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermps (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [6:1.00] +; GENERIC-NEXT: vpermps (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_8xfloat_perm_mem_mask0: @@ -2912,7 +2912,7 @@ define <8 x float> @test_masked_8xfloat_perm_mem_mask1(<8 x float>* %vp, <8 x fl ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovaps {{.*#+}} ymm2 = [1,3,7,4,0,6,6,6] sched: [7:0.50] ; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermps (%rdi), %ymm2, %ymm0 {%k1} # sched: [6:1.00] +; GENERIC-NEXT: vpermps (%rdi), %ymm2, %ymm0 {%k1} # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_8xfloat_perm_mem_mask1: @@ -2933,7 +2933,7 @@ define <8 x float> @test_masked_z_8xfloat_perm_mem_mask1(<8 x float>* %vp, <8 x ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovaps {{.*#+}} ymm1 = [1,3,7,4,0,6,6,6] sched: [7:0.50] ; GENERIC-NEXT: vptestnmd %ymm0, %ymm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermps (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [6:1.00] +; GENERIC-NEXT: vpermps (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_8xfloat_perm_mem_mask1: @@ -2954,7 +2954,7 @@ define <8 x float> @test_masked_8xfloat_perm_mem_mask2(<8 x float>* %vp, <8 x fl ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovaps {{.*#+}} ymm2 = [4,5,1,5,6,6,2,4] sched: [7:0.50] ; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermps (%rdi), %ymm2, %ymm0 {%k1} # sched: [6:1.00] +; GENERIC-NEXT: vpermps (%rdi), %ymm2, %ymm0 {%k1} # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_8xfloat_perm_mem_mask2: @@ -2975,7 +2975,7 @@ define <8 x float> @test_masked_z_8xfloat_perm_mem_mask2(<8 x float>* %vp, <8 x ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovaps {{.*#+}} ymm1 = [4,5,1,5,6,6,2,4] sched: [7:0.50] ; GENERIC-NEXT: vptestnmd %ymm0, %ymm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermps (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [6:1.00] +; GENERIC-NEXT: vpermps (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_8xfloat_perm_mem_mask2: @@ -2995,7 +2995,7 @@ define <8 x float> @test_8xfloat_perm_mem_mask3(<8 x float>* %vp, <8 x i32> %mas ; GENERIC-LABEL: test_8xfloat_perm_mem_mask3: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovaps {{.*#+}} ymm0 = [5,7,0,6,4,2,3,0] sched: [7:0.50] -; GENERIC-NEXT: vpermps (%rdi), %ymm0, %ymm0 # sched: [6:1.00] +; GENERIC-NEXT: vpermps (%rdi), %ymm0, %ymm0 # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_8xfloat_perm_mem_mask3: @@ -3012,7 +3012,7 @@ define <8 x float> @test_masked_8xfloat_perm_mem_mask3(<8 x float>* %vp, <8 x fl ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovaps {{.*#+}} ymm2 = [5,7,0,6,4,2,3,0] sched: [7:0.50] ; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermps (%rdi), %ymm2, %ymm0 {%k1} # sched: [6:1.00] +; GENERIC-NEXT: vpermps (%rdi), %ymm2, %ymm0 {%k1} # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_8xfloat_perm_mem_mask3: @@ -3033,7 +3033,7 @@ define <8 x float> @test_masked_z_8xfloat_perm_mem_mask3(<8 x float>* %vp, <8 x ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovaps {{.*#+}} ymm1 = [5,7,0,6,4,2,3,0] sched: [7:0.50] ; GENERIC-NEXT: vptestnmd %ymm0, %ymm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermps (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [6:1.00] +; GENERIC-NEXT: vpermps (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_8xfloat_perm_mem_mask3: @@ -3247,7 +3247,7 @@ define <16 x float> @test_16xfloat_perm_mem_mask0(<16 x float>* %vp) { ; GENERIC-LABEL: test_16xfloat_perm_mem_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovaps {{.*#+}} zmm0 = [10,2,1,14,9,9,7,2,9,4,12,11,0,14,0,1] sched: [6:0.50] -; GENERIC-NEXT: vpermps (%rdi), %zmm0, %zmm0 # sched: [6:1.00] +; GENERIC-NEXT: vpermps (%rdi), %zmm0, %zmm0 # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_16xfloat_perm_mem_mask0: @@ -3264,7 +3264,7 @@ define <16 x float> @test_masked_16xfloat_perm_mem_mask0(<16 x float>* %vp, <16 ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovaps {{.*#+}} zmm2 = [10,2,1,14,9,9,7,2,9,4,12,11,0,14,0,1] sched: [6:0.50] ; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermps (%rdi), %zmm2, %zmm0 {%k1} # sched: [6:1.00] +; GENERIC-NEXT: vpermps (%rdi), %zmm2, %zmm0 {%k1} # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_16xfloat_perm_mem_mask0: @@ -3285,7 +3285,7 @@ define <16 x float> @test_masked_z_16xfloat_perm_mem_mask0(<16 x float>* %vp, <1 ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovaps {{.*#+}} zmm1 = [10,2,1,14,9,9,7,2,9,4,12,11,0,14,0,1] sched: [6:0.50] ; GENERIC-NEXT: vptestnmd %zmm0, %zmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermps (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [6:1.00] +; GENERIC-NEXT: vpermps (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_16xfloat_perm_mem_mask0: @@ -3306,7 +3306,7 @@ define <16 x float> @test_masked_16xfloat_perm_mem_mask1(<16 x float>* %vp, <16 ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovaps {{.*#+}} zmm2 = [4,2,3,5,11,6,4,7,6,4,14,8,15,12,9,4] sched: [6:0.50] ; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermps (%rdi), %zmm2, %zmm0 {%k1} # sched: [6:1.00] +; GENERIC-NEXT: vpermps (%rdi), %zmm2, %zmm0 {%k1} # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_16xfloat_perm_mem_mask1: @@ -3327,7 +3327,7 @@ define <16 x float> @test_masked_z_16xfloat_perm_mem_mask1(<16 x float>* %vp, <1 ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovaps {{.*#+}} zmm1 = [4,2,3,5,11,6,4,7,6,4,14,8,15,12,9,4] sched: [6:0.50] ; GENERIC-NEXT: vptestnmd %zmm0, %zmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermps (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [6:1.00] +; GENERIC-NEXT: vpermps (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_16xfloat_perm_mem_mask1: @@ -3348,7 +3348,7 @@ define <16 x float> @test_masked_16xfloat_perm_mem_mask2(<16 x float>* %vp, <16 ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovaps {{.*#+}} zmm2 = [10,7,11,6,7,0,11,0,10,9,12,4,10,3,8,5] sched: [6:0.50] ; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermps (%rdi), %zmm2, %zmm0 {%k1} # sched: [6:1.00] +; GENERIC-NEXT: vpermps (%rdi), %zmm2, %zmm0 {%k1} # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_16xfloat_perm_mem_mask2: @@ -3369,7 +3369,7 @@ define <16 x float> @test_masked_z_16xfloat_perm_mem_mask2(<16 x float>* %vp, <1 ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovaps {{.*#+}} zmm1 = [10,7,11,6,7,0,11,0,10,9,12,4,10,3,8,5] sched: [6:0.50] ; GENERIC-NEXT: vptestnmd %zmm0, %zmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermps (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [6:1.00] +; GENERIC-NEXT: vpermps (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_16xfloat_perm_mem_mask2: @@ -3389,7 +3389,7 @@ define <16 x float> @test_16xfloat_perm_mem_mask3(<16 x float>* %vp) { ; GENERIC-LABEL: test_16xfloat_perm_mem_mask3: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovaps {{.*#+}} zmm0 = [15,15,3,9,5,15,14,9,11,10,5,14,14,5,11,0] sched: [6:0.50] -; GENERIC-NEXT: vpermps (%rdi), %zmm0, %zmm0 # sched: [6:1.00] +; GENERIC-NEXT: vpermps (%rdi), %zmm0, %zmm0 # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_16xfloat_perm_mem_mask3: @@ -3406,7 +3406,7 @@ define <16 x float> @test_masked_16xfloat_perm_mem_mask3(<16 x float>* %vp, <16 ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovaps {{.*#+}} zmm2 = [15,15,3,9,5,15,14,9,11,10,5,14,14,5,11,0] sched: [6:0.50] ; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermps (%rdi), %zmm2, %zmm0 {%k1} # sched: [6:1.00] +; GENERIC-NEXT: vpermps (%rdi), %zmm2, %zmm0 {%k1} # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_16xfloat_perm_mem_mask3: @@ -3427,7 +3427,7 @@ define <16 x float> @test_masked_z_16xfloat_perm_mem_mask3(<16 x float>* %vp, <1 ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovaps {{.*#+}} zmm1 = [15,15,3,9,5,15,14,9,11,10,5,14,14,5,11,0] sched: [6:0.50] ; GENERIC-NEXT: vptestnmd %zmm0, %zmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermps (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [6:1.00] +; GENERIC-NEXT: vpermps (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_16xfloat_perm_mem_mask3: @@ -3620,7 +3620,7 @@ define <4 x double> @test_masked_z_4xdouble_perm_mask3(<4 x double> %vec, <4 x i define <4 x double> @test_4xdouble_perm_mem_mask0(<4 x double>* %vp) { ; GENERIC-LABEL: test_4xdouble_perm_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpermpd {{.*#+}} ymm0 = mem[0,0,2,0] sched: [6:1.00] +; GENERIC-NEXT: vpermpd {{.*#+}} ymm0 = mem[0,0,2,0] sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_4xdouble_perm_mem_mask0: @@ -3635,7 +3635,7 @@ define <4 x double> @test_masked_4xdouble_perm_mem_mask0(<4 x double>* %vp, <4 x ; GENERIC-LABEL: test_masked_4xdouble_perm_mem_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermpd {{.*#+}} ymm0 {%k1} = mem[0,0,2,0] sched: [6:1.00] +; GENERIC-NEXT: vpermpd {{.*#+}} ymm0 {%k1} = mem[0,0,2,0] sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_4xdouble_perm_mem_mask0: @@ -3654,7 +3654,7 @@ define <4 x double> @test_masked_z_4xdouble_perm_mem_mask0(<4 x double>* %vp, <4 ; GENERIC-LABEL: test_masked_z_4xdouble_perm_mem_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmq %ymm0, %ymm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermpd {{.*#+}} ymm0 {%k1} {z} = mem[0,0,2,0] sched: [6:1.00] +; GENERIC-NEXT: vpermpd {{.*#+}} ymm0 {%k1} {z} = mem[0,0,2,0] sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_4xdouble_perm_mem_mask0: @@ -3673,7 +3673,7 @@ define <4 x double> @test_masked_4xdouble_perm_mem_mask1(<4 x double>* %vp, <4 x ; GENERIC-LABEL: test_masked_4xdouble_perm_mem_mask1: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermpd {{.*#+}} ymm0 {%k1} = mem[0,2,3,2] sched: [6:1.00] +; GENERIC-NEXT: vpermpd {{.*#+}} ymm0 {%k1} = mem[0,2,3,2] sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_4xdouble_perm_mem_mask1: @@ -3692,7 +3692,7 @@ define <4 x double> @test_masked_z_4xdouble_perm_mem_mask1(<4 x double>* %vp, <4 ; GENERIC-LABEL: test_masked_z_4xdouble_perm_mem_mask1: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmq %ymm0, %ymm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermpd {{.*#+}} ymm0 {%k1} {z} = mem[0,2,3,2] sched: [6:1.00] +; GENERIC-NEXT: vpermpd {{.*#+}} ymm0 {%k1} {z} = mem[0,2,3,2] sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_4xdouble_perm_mem_mask1: @@ -3711,7 +3711,7 @@ define <4 x double> @test_masked_4xdouble_perm_mem_mask2(<4 x double>* %vp, <4 x ; GENERIC-LABEL: test_masked_4xdouble_perm_mem_mask2: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermpd {{.*#+}} ymm0 {%k1} = mem[3,1,1,1] sched: [6:1.00] +; GENERIC-NEXT: vpermpd {{.*#+}} ymm0 {%k1} = mem[3,1,1,1] sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_4xdouble_perm_mem_mask2: @@ -3730,7 +3730,7 @@ define <4 x double> @test_masked_z_4xdouble_perm_mem_mask2(<4 x double>* %vp, <4 ; GENERIC-LABEL: test_masked_z_4xdouble_perm_mem_mask2: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmq %ymm0, %ymm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermpd {{.*#+}} ymm0 {%k1} {z} = mem[3,1,1,1] sched: [6:1.00] +; GENERIC-NEXT: vpermpd {{.*#+}} ymm0 {%k1} {z} = mem[3,1,1,1] sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_4xdouble_perm_mem_mask2: @@ -3748,7 +3748,7 @@ define <4 x double> @test_masked_z_4xdouble_perm_mem_mask2(<4 x double>* %vp, <4 define <4 x double> @test_4xdouble_perm_mem_mask3(<4 x double>* %vp) { ; GENERIC-LABEL: test_4xdouble_perm_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpermpd {{.*#+}} ymm0 = mem[3,2,3,2] sched: [6:1.00] +; GENERIC-NEXT: vpermpd {{.*#+}} ymm0 = mem[3,2,3,2] sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_4xdouble_perm_mem_mask3: @@ -3763,7 +3763,7 @@ define <4 x double> @test_masked_4xdouble_perm_mem_mask3(<4 x double>* %vp, <4 x ; GENERIC-LABEL: test_masked_4xdouble_perm_mem_mask3: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermpd {{.*#+}} ymm0 {%k1} = mem[3,2,3,2] sched: [6:1.00] +; GENERIC-NEXT: vpermpd {{.*#+}} ymm0 {%k1} = mem[3,2,3,2] sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_4xdouble_perm_mem_mask3: @@ -3782,7 +3782,7 @@ define <4 x double> @test_masked_z_4xdouble_perm_mem_mask3(<4 x double>* %vp, <4 ; GENERIC-LABEL: test_masked_z_4xdouble_perm_mem_mask3: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmq %ymm0, %ymm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermpd {{.*#+}} ymm0 {%k1} {z} = mem[3,2,3,2] sched: [6:1.00] +; GENERIC-NEXT: vpermpd {{.*#+}} ymm0 {%k1} {z} = mem[3,2,3,2] sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_4xdouble_perm_mem_mask3: @@ -4156,7 +4156,7 @@ define <8 x double> @test_8xdouble_perm_mem_mask0(<8 x double>* %vp) { ; GENERIC-LABEL: test_8xdouble_perm_mem_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovaps {{.*#+}} zmm0 = [0,3,4,0,4,2,0,1] sched: [6:0.50] -; GENERIC-NEXT: vpermpd (%rdi), %zmm0, %zmm0 # sched: [6:1.00] +; GENERIC-NEXT: vpermpd (%rdi), %zmm0, %zmm0 # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_8xdouble_perm_mem_mask0: @@ -4173,7 +4173,7 @@ define <8 x double> @test_masked_8xdouble_perm_mem_mask0(<8 x double>* %vp, <8 x ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovapd {{.*#+}} zmm2 = [0,3,4,0,4,2,0,1] sched: [6:0.50] ; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermpd (%rdi), %zmm2, %zmm0 {%k1} # sched: [6:1.00] +; GENERIC-NEXT: vpermpd (%rdi), %zmm2, %zmm0 {%k1} # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_8xdouble_perm_mem_mask0: @@ -4194,7 +4194,7 @@ define <8 x double> @test_masked_z_8xdouble_perm_mem_mask0(<8 x double>* %vp, <8 ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovapd {{.*#+}} zmm1 = [0,3,4,0,4,2,0,1] sched: [6:0.50] ; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermpd (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [6:1.00] +; GENERIC-NEXT: vpermpd (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_8xdouble_perm_mem_mask0: @@ -4214,7 +4214,7 @@ define <8 x double> @test_masked_8xdouble_perm_imm_mem_mask1(<8 x double>* %vp, ; GENERIC-LABEL: test_masked_8xdouble_perm_imm_mem_mask1: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermpd {{.*#+}} zmm0 {%k1} = mem[0,2,0,3,4,6,4,7] sched: [6:1.00] +; GENERIC-NEXT: vpermpd {{.*#+}} zmm0 {%k1} = mem[0,2,0,3,4,6,4,7] sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_8xdouble_perm_imm_mem_mask1: @@ -4233,7 +4233,7 @@ define <8 x double> @test_masked_z_8xdouble_perm_imm_mem_mask1(<8 x double>* %vp ; GENERIC-LABEL: test_masked_z_8xdouble_perm_imm_mem_mask1: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermpd {{.*#+}} zmm0 {%k1} {z} = mem[0,2,0,3,4,6,4,7] sched: [6:1.00] +; GENERIC-NEXT: vpermpd {{.*#+}} zmm0 {%k1} {z} = mem[0,2,0,3,4,6,4,7] sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_8xdouble_perm_imm_mem_mask1: @@ -4253,7 +4253,7 @@ define <8 x double> @test_masked_8xdouble_perm_mem_mask2(<8 x double>* %vp, <8 x ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovapd {{.*#+}} zmm2 = [6,7,2,7,7,6,2,5] sched: [6:0.50] ; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermpd (%rdi), %zmm2, %zmm0 {%k1} # sched: [6:1.00] +; GENERIC-NEXT: vpermpd (%rdi), %zmm2, %zmm0 {%k1} # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_8xdouble_perm_mem_mask2: @@ -4274,7 +4274,7 @@ define <8 x double> @test_masked_z_8xdouble_perm_mem_mask2(<8 x double>* %vp, <8 ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovapd {{.*#+}} zmm1 = [6,7,2,7,7,6,2,5] sched: [6:0.50] ; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermpd (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [6:1.00] +; GENERIC-NEXT: vpermpd (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_8xdouble_perm_mem_mask2: @@ -4293,7 +4293,7 @@ define <8 x double> @test_masked_z_8xdouble_perm_mem_mask2(<8 x double>* %vp, <8 define <8 x double> @test_8xdouble_perm_imm_mem_mask3(<8 x double>* %vp) { ; GENERIC-LABEL: test_8xdouble_perm_imm_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpermpd {{.*#+}} zmm0 = mem[2,1,1,0,6,5,5,4] sched: [6:1.00] +; GENERIC-NEXT: vpermpd {{.*#+}} zmm0 = mem[2,1,1,0,6,5,5,4] sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_8xdouble_perm_imm_mem_mask3: @@ -4308,7 +4308,7 @@ define <8 x double> @test_masked_8xdouble_perm_imm_mem_mask3(<8 x double>* %vp, ; GENERIC-LABEL: test_masked_8xdouble_perm_imm_mem_mask3: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermpd {{.*#+}} zmm0 {%k1} = mem[2,1,1,0,6,5,5,4] sched: [6:1.00] +; GENERIC-NEXT: vpermpd {{.*#+}} zmm0 {%k1} = mem[2,1,1,0,6,5,5,4] sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_8xdouble_perm_imm_mem_mask3: @@ -4327,7 +4327,7 @@ define <8 x double> @test_masked_z_8xdouble_perm_imm_mem_mask3(<8 x double>* %vp ; GENERIC-LABEL: test_masked_z_8xdouble_perm_imm_mem_mask3: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermpd {{.*#+}} zmm0 {%k1} {z} = mem[2,1,1,0,6,5,5,4] sched: [6:1.00] +; GENERIC-NEXT: vpermpd {{.*#+}} zmm0 {%k1} {z} = mem[2,1,1,0,6,5,5,4] sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_8xdouble_perm_imm_mem_mask3: @@ -4347,7 +4347,7 @@ define <8 x double> @test_masked_8xdouble_perm_mem_mask4(<8 x double>* %vp, <8 x ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovapd {{.*#+}} zmm2 = [1,1,3,5,6,0,6,0] sched: [6:0.50] ; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermpd (%rdi), %zmm2, %zmm0 {%k1} # sched: [6:1.00] +; GENERIC-NEXT: vpermpd (%rdi), %zmm2, %zmm0 {%k1} # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_8xdouble_perm_mem_mask4: @@ -4368,7 +4368,7 @@ define <8 x double> @test_masked_z_8xdouble_perm_mem_mask4(<8 x double>* %vp, <8 ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovapd {{.*#+}} zmm1 = [1,1,3,5,6,0,6,0] sched: [6:0.50] ; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermpd (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [6:1.00] +; GENERIC-NEXT: vpermpd (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_8xdouble_perm_mem_mask4: @@ -4388,7 +4388,7 @@ define <8 x double> @test_masked_8xdouble_perm_imm_mem_mask5(<8 x double>* %vp, ; GENERIC-LABEL: test_masked_8xdouble_perm_imm_mem_mask5: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermpd {{.*#+}} zmm0 {%k1} = mem[2,2,2,3,6,6,6,7] sched: [6:1.00] +; GENERIC-NEXT: vpermpd {{.*#+}} zmm0 {%k1} = mem[2,2,2,3,6,6,6,7] sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_8xdouble_perm_imm_mem_mask5: @@ -4407,7 +4407,7 @@ define <8 x double> @test_masked_z_8xdouble_perm_imm_mem_mask5(<8 x double>* %vp ; GENERIC-LABEL: test_masked_z_8xdouble_perm_imm_mem_mask5: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermpd {{.*#+}} zmm0 {%k1} {z} = mem[2,2,2,3,6,6,6,7] sched: [6:1.00] +; GENERIC-NEXT: vpermpd {{.*#+}} zmm0 {%k1} {z} = mem[2,2,2,3,6,6,6,7] sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_8xdouble_perm_imm_mem_mask5: @@ -4426,7 +4426,7 @@ define <8 x double> @test_8xdouble_perm_mem_mask6(<8 x double>* %vp) { ; GENERIC-LABEL: test_8xdouble_perm_mem_mask6: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovaps {{.*#+}} zmm0 = [2,4,0,4,6,1,2,5] sched: [6:0.50] -; GENERIC-NEXT: vpermpd (%rdi), %zmm0, %zmm0 # sched: [6:1.00] +; GENERIC-NEXT: vpermpd (%rdi), %zmm0, %zmm0 # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_8xdouble_perm_mem_mask6: @@ -4443,7 +4443,7 @@ define <8 x double> @test_masked_8xdouble_perm_mem_mask6(<8 x double>* %vp, <8 x ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovapd {{.*#+}} zmm2 = [2,4,0,4,6,1,2,5] sched: [6:0.50] ; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermpd (%rdi), %zmm2, %zmm0 {%k1} # sched: [6:1.00] +; GENERIC-NEXT: vpermpd (%rdi), %zmm2, %zmm0 {%k1} # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_8xdouble_perm_mem_mask6: @@ -4464,7 +4464,7 @@ define <8 x double> @test_masked_z_8xdouble_perm_mem_mask6(<8 x double>* %vp, <8 ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovapd {{.*#+}} zmm1 = [2,4,0,4,6,1,2,5] sched: [6:0.50] ; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermpd (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [6:1.00] +; GENERIC-NEXT: vpermpd (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_8xdouble_perm_mem_mask6: @@ -4484,7 +4484,7 @@ define <8 x double> @test_masked_8xdouble_perm_imm_mem_mask7(<8 x double>* %vp, ; GENERIC-LABEL: test_masked_8xdouble_perm_imm_mem_mask7: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermpd {{.*#+}} zmm0 {%k1} = mem[0,3,2,0,4,7,6,4] sched: [6:1.00] +; GENERIC-NEXT: vpermpd {{.*#+}} zmm0 {%k1} = mem[0,3,2,0,4,7,6,4] sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_8xdouble_perm_imm_mem_mask7: @@ -4503,7 +4503,7 @@ define <8 x double> @test_masked_z_8xdouble_perm_imm_mem_mask7(<8 x double>* %vp ; GENERIC-LABEL: test_masked_z_8xdouble_perm_imm_mem_mask7: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermpd {{.*#+}} zmm0 {%k1} {z} = mem[0,3,2,0,4,7,6,4] sched: [6:1.00] +; GENERIC-NEXT: vpermpd {{.*#+}} zmm0 {%k1} {z} = mem[0,3,2,0,4,7,6,4] sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_8xdouble_perm_imm_mem_mask7: @@ -8941,7 +8941,7 @@ define <8 x float> @test_8xfloat_masked_shuff_mem_mask0(<8 x float> %vec1, <8 x ; GENERIC-LABEL: test_8xfloat_masked_shuff_mem_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[4,5,6,7] sched: [6:1.00] +; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[4,5,6,7] sched: [8:1.00] ; GENERIC-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -8962,7 +8962,7 @@ define <8 x float> @test_8xfloat_zero_masked_shuff_mem_mask0(<8 x float> %vec1, ; GENERIC-LABEL: test_8xfloat_zero_masked_shuff_mem_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[4,5,6,7] sched: [6:1.00] +; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[4,5,6,7] sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_8xfloat_zero_masked_shuff_mem_mask0: @@ -8981,7 +8981,7 @@ define <8 x float> @test_8xfloat_masked_shuff_mem_mask1(<8 x float> %vec1, <8 x ; GENERIC-LABEL: test_8xfloat_masked_shuff_mem_mask1: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[4,5,6,7] sched: [6:1.00] +; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[4,5,6,7] sched: [8:1.00] ; GENERIC-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -9002,7 +9002,7 @@ define <8 x float> @test_8xfloat_zero_masked_shuff_mem_mask1(<8 x float> %vec1, ; GENERIC-LABEL: test_8xfloat_zero_masked_shuff_mem_mask1: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[4,5,6,7] sched: [6:1.00] +; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[4,5,6,7] sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_8xfloat_zero_masked_shuff_mem_mask1: @@ -9021,7 +9021,7 @@ define <8 x float> @test_8xfloat_masked_shuff_mem_mask2(<8 x float> %vec1, <8 x ; GENERIC-LABEL: test_8xfloat_masked_shuff_mem_mask2: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [6:1.00] +; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [8:1.00] ; GENERIC-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -9042,7 +9042,7 @@ define <8 x float> @test_8xfloat_zero_masked_shuff_mem_mask2(<8 x float> %vec1, ; GENERIC-LABEL: test_8xfloat_zero_masked_shuff_mem_mask2: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [6:1.00] +; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_8xfloat_zero_masked_shuff_mem_mask2: @@ -9075,7 +9075,7 @@ define <8 x float> @test_8xfloat_masked_shuff_mem_mask3(<8 x float> %vec1, <8 x ; GENERIC-LABEL: test_8xfloat_masked_shuff_mem_mask3: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [6:1.00] +; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [8:1.00] ; GENERIC-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -9096,7 +9096,7 @@ define <8 x float> @test_8xfloat_zero_masked_shuff_mem_mask3(<8 x float> %vec1, ; GENERIC-LABEL: test_8xfloat_zero_masked_shuff_mem_mask3: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [6:1.00] +; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_8xfloat_zero_masked_shuff_mem_mask3: @@ -9288,7 +9288,7 @@ define <16 x float> @test_16xfloat_zero_masked_shuff_mask3(<16 x float> %vec1, < define <16 x float> @test_16xfloat_shuff_mem_mask0(<16 x float> %vec1, <16 x float>* %vec2p) { ; GENERIC-LABEL: test_16xfloat_shuff_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm0 = zmm0[12,13,14,15,8,9,10,11],mem[8,9,10,11,4,5,6,7] sched: [6:1.00] +; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm0 = zmm0[12,13,14,15,8,9,10,11],mem[8,9,10,11,4,5,6,7] sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_16xfloat_shuff_mem_mask0: @@ -9303,7 +9303,7 @@ define <16 x float> @test_16xfloat_masked_shuff_mem_mask0(<16 x float> %vec1, <1 ; GENERIC-LABEL: test_16xfloat_masked_shuff_mem_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm1 {%k1} = zmm0[12,13,14,15,8,9,10,11],mem[8,9,10,11,4,5,6,7] sched: [6:1.00] +; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm1 {%k1} = zmm0[12,13,14,15,8,9,10,11],mem[8,9,10,11,4,5,6,7] sched: [8:1.00] ; GENERIC-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -9324,7 +9324,7 @@ define <16 x float> @test_16xfloat_zero_masked_shuff_mem_mask0(<16 x float> %vec ; GENERIC-LABEL: test_16xfloat_zero_masked_shuff_mem_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[12,13,14,15,8,9,10,11],mem[8,9,10,11,4,5,6,7] sched: [6:1.00] +; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[12,13,14,15,8,9,10,11],mem[8,9,10,11,4,5,6,7] sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_16xfloat_zero_masked_shuff_mem_mask0: @@ -9343,7 +9343,7 @@ define <16 x float> @test_16xfloat_masked_shuff_mem_mask1(<16 x float> %vec1, <1 ; GENERIC-LABEL: test_16xfloat_masked_shuff_mem_mask1: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm1 {%k1} = zmm0[8,9,10,11,4,5,6,7],mem[8,9,10,11,4,5,6,7] sched: [6:1.00] +; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm1 {%k1} = zmm0[8,9,10,11,4,5,6,7],mem[8,9,10,11,4,5,6,7] sched: [8:1.00] ; GENERIC-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -9364,7 +9364,7 @@ define <16 x float> @test_16xfloat_zero_masked_shuff_mem_mask1(<16 x float> %vec ; GENERIC-LABEL: test_16xfloat_zero_masked_shuff_mem_mask1: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[8,9,10,11,4,5,6,7],mem[8,9,10,11,4,5,6,7] sched: [6:1.00] +; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[8,9,10,11,4,5,6,7],mem[8,9,10,11,4,5,6,7] sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_16xfloat_zero_masked_shuff_mem_mask1: @@ -9383,7 +9383,7 @@ define <16 x float> @test_16xfloat_masked_shuff_mem_mask2(<16 x float> %vec1, <1 ; GENERIC-LABEL: test_16xfloat_masked_shuff_mem_mask2: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm1 {%k1} = zmm0[0,1,2,3,0,1,2,3],mem[8,9,10,11,8,9,10,11] sched: [6:1.00] +; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm1 {%k1} = zmm0[0,1,2,3,0,1,2,3],mem[8,9,10,11,8,9,10,11] sched: [8:1.00] ; GENERIC-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -9404,7 +9404,7 @@ define <16 x float> @test_16xfloat_zero_masked_shuff_mem_mask2(<16 x float> %vec ; GENERIC-LABEL: test_16xfloat_zero_masked_shuff_mem_mask2: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3,0,1,2,3],mem[8,9,10,11,8,9,10,11] sched: [6:1.00] +; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3,0,1,2,3],mem[8,9,10,11,8,9,10,11] sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_16xfloat_zero_masked_shuff_mem_mask2: @@ -9422,7 +9422,7 @@ define <16 x float> @test_16xfloat_zero_masked_shuff_mem_mask2(<16 x float> %vec define <16 x float> @test_16xfloat_shuff_mem_mask3(<16 x float> %vec1, <16 x float>* %vec2p) { ; GENERIC-LABEL: test_16xfloat_shuff_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm0 = zmm0[4,5,6,7,0,1,2,3],mem[12,13,14,15,12,13,14,15] sched: [6:1.00] +; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm0 = zmm0[4,5,6,7,0,1,2,3],mem[12,13,14,15,12,13,14,15] sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_16xfloat_shuff_mem_mask3: @@ -9437,7 +9437,7 @@ define <16 x float> @test_16xfloat_masked_shuff_mem_mask3(<16 x float> %vec1, <1 ; GENERIC-LABEL: test_16xfloat_masked_shuff_mem_mask3: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm1 {%k1} = zmm0[4,5,6,7,0,1,2,3],mem[12,13,14,15,12,13,14,15] sched: [6:1.00] +; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm1 {%k1} = zmm0[4,5,6,7,0,1,2,3],mem[12,13,14,15,12,13,14,15] sched: [8:1.00] ; GENERIC-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -9458,7 +9458,7 @@ define <16 x float> @test_16xfloat_zero_masked_shuff_mem_mask3(<16 x float> %vec ; GENERIC-LABEL: test_16xfloat_zero_masked_shuff_mem_mask3: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,6,7,0,1,2,3],mem[12,13,14,15,12,13,14,15] sched: [6:1.00] +; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,6,7,0,1,2,3],mem[12,13,14,15,12,13,14,15] sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_16xfloat_zero_masked_shuff_mem_mask3: @@ -9665,7 +9665,7 @@ define <4 x double> @test_4xdouble_masked_shuff_mem_mask0(<4 x double> %vec1, <4 ; GENERIC-LABEL: test_4xdouble_masked_shuff_mem_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[2,3] sched: [6:1.00] +; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[2,3] sched: [8:1.00] ; GENERIC-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -9686,7 +9686,7 @@ define <4 x double> @test_4xdouble_zero_masked_shuff_mem_mask0(<4 x double> %vec ; GENERIC-LABEL: test_4xdouble_zero_masked_shuff_mem_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[2,3] sched: [6:1.00] +; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[2,3] sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_4xdouble_zero_masked_shuff_mem_mask0: @@ -9705,7 +9705,7 @@ define <4 x double> @test_4xdouble_masked_shuff_mem_mask1(<4 x double> %vec1, <4 ; GENERIC-LABEL: test_4xdouble_masked_shuff_mem_mask1: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[0,1] sched: [6:1.00] +; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[0,1] sched: [8:1.00] ; GENERIC-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -9726,7 +9726,7 @@ define <4 x double> @test_4xdouble_zero_masked_shuff_mem_mask1(<4 x double> %vec ; GENERIC-LABEL: test_4xdouble_zero_masked_shuff_mem_mask1: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[0,1] sched: [6:1.00] +; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[0,1] sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_4xdouble_zero_masked_shuff_mem_mask1: @@ -9745,7 +9745,7 @@ define <4 x double> @test_4xdouble_masked_shuff_mem_mask2(<4 x double> %vec1, <4 ; GENERIC-LABEL: test_4xdouble_masked_shuff_mem_mask2: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[0,1] sched: [6:1.00] +; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[0,1] sched: [8:1.00] ; GENERIC-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -9766,7 +9766,7 @@ define <4 x double> @test_4xdouble_zero_masked_shuff_mem_mask2(<4 x double> %vec ; GENERIC-LABEL: test_4xdouble_zero_masked_shuff_mem_mask2: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[0,1] sched: [6:1.00] +; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[0,1] sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_4xdouble_zero_masked_shuff_mem_mask2: @@ -9799,7 +9799,7 @@ define <4 x double> @test_4xdouble_masked_shuff_mem_mask3(<4 x double> %vec1, <4 ; GENERIC-LABEL: test_4xdouble_masked_shuff_mem_mask3: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[2,3] sched: [6:1.00] +; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[2,3] sched: [8:1.00] ; GENERIC-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -9820,7 +9820,7 @@ define <4 x double> @test_4xdouble_zero_masked_shuff_mem_mask3(<4 x double> %vec ; GENERIC-LABEL: test_4xdouble_zero_masked_shuff_mem_mask3: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[2,3] sched: [6:1.00] +; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[2,3] sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_4xdouble_zero_masked_shuff_mem_mask3: @@ -10012,7 +10012,7 @@ define <8 x double> @test_8xdouble_zero_masked_shuff_mask3(<8 x double> %vec1, < define <8 x double> @test_8xdouble_shuff_mem_mask0(<8 x double> %vec1, <8 x double>* %vec2p) { ; GENERIC-LABEL: test_8xdouble_shuff_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[6,7,0,1],mem[0,1,0,1] sched: [6:1.00] +; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[6,7,0,1],mem[0,1,0,1] sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_8xdouble_shuff_mem_mask0: @@ -10027,7 +10027,7 @@ define <8 x double> @test_8xdouble_masked_shuff_mem_mask0(<8 x double> %vec1, <8 ; GENERIC-LABEL: test_8xdouble_masked_shuff_mem_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm1 {%k1} = zmm0[6,7,0,1],mem[0,1,0,1] sched: [6:1.00] +; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm1 {%k1} = zmm0[6,7,0,1],mem[0,1,0,1] sched: [8:1.00] ; GENERIC-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -10048,7 +10048,7 @@ define <8 x double> @test_8xdouble_zero_masked_shuff_mem_mask0(<8 x double> %vec ; GENERIC-LABEL: test_8xdouble_zero_masked_shuff_mem_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[6,7,0,1],mem[0,1,0,1] sched: [6:1.00] +; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[6,7,0,1],mem[0,1,0,1] sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_8xdouble_zero_masked_shuff_mem_mask0: @@ -10067,7 +10067,7 @@ define <8 x double> @test_8xdouble_masked_shuff_mem_mask1(<8 x double> %vec1, <8 ; GENERIC-LABEL: test_8xdouble_masked_shuff_mem_mask1: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm1 {%k1} = zmm0[6,7,6,7],mem[0,1,2,3] sched: [6:1.00] +; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm1 {%k1} = zmm0[6,7,6,7],mem[0,1,2,3] sched: [8:1.00] ; GENERIC-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -10088,7 +10088,7 @@ define <8 x double> @test_8xdouble_zero_masked_shuff_mem_mask1(<8 x double> %vec ; GENERIC-LABEL: test_8xdouble_zero_masked_shuff_mem_mask1: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[6,7,6,7],mem[0,1,2,3] sched: [6:1.00] +; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[6,7,6,7],mem[0,1,2,3] sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_8xdouble_zero_masked_shuff_mem_mask1: @@ -10107,7 +10107,7 @@ define <8 x double> @test_8xdouble_masked_shuff_mem_mask2(<8 x double> %vec1, <8 ; GENERIC-LABEL: test_8xdouble_masked_shuff_mem_mask2: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm1 {%k1} = zmm0[0,1,2,3],mem[0,1,4,5] sched: [6:1.00] +; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm1 {%k1} = zmm0[0,1,2,3],mem[0,1,4,5] sched: [8:1.00] ; GENERIC-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -10128,7 +10128,7 @@ define <8 x double> @test_8xdouble_zero_masked_shuff_mem_mask2(<8 x double> %vec ; GENERIC-LABEL: test_8xdouble_zero_masked_shuff_mem_mask2: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3],mem[0,1,4,5] sched: [6:1.00] +; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3],mem[0,1,4,5] sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_8xdouble_zero_masked_shuff_mem_mask2: @@ -10146,7 +10146,7 @@ define <8 x double> @test_8xdouble_zero_masked_shuff_mem_mask2(<8 x double> %vec define <8 x double> @test_8xdouble_shuff_mem_mask3(<8 x double> %vec1, <8 x double>* %vec2p) { ; GENERIC-LABEL: test_8xdouble_shuff_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[2,3,0,1],mem[4,5,0,1] sched: [6:1.00] +; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[2,3,0,1],mem[4,5,0,1] sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_8xdouble_shuff_mem_mask3: @@ -10161,7 +10161,7 @@ define <8 x double> @test_8xdouble_masked_shuff_mem_mask3(<8 x double> %vec1, <8 ; GENERIC-LABEL: test_8xdouble_masked_shuff_mem_mask3: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm1 {%k1} = zmm0[2,3,0,1],mem[4,5,0,1] sched: [6:1.00] +; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm1 {%k1} = zmm0[2,3,0,1],mem[4,5,0,1] sched: [8:1.00] ; GENERIC-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -10182,7 +10182,7 @@ define <8 x double> @test_8xdouble_zero_masked_shuff_mem_mask3(<8 x double> %vec ; GENERIC-LABEL: test_8xdouble_zero_masked_shuff_mem_mask3: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[2,3,0,1],mem[4,5,0,1] sched: [6:1.00] +; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[2,3,0,1],mem[4,5,0,1] sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_8xdouble_zero_masked_shuff_mem_mask3: @@ -10374,7 +10374,7 @@ define <8 x i32> @test_8xi32_zero_masked_shuff_mask3(<8 x i32> %vec1, <8 x i32> define <8 x i32> @test_8xi32_shuff_mem_mask0(<8 x i32> %vec1, <8 x i32>* %vec2p) { ; GENERIC-LABEL: test_8xi32_shuff_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3] sched: [6:1.00] +; GENERIC-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3] sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_8xi32_shuff_mem_mask0: @@ -10389,7 +10389,7 @@ define <8 x i32> @test_8xi32_masked_shuff_mem_mask0(<8 x i32> %vec1, <8 x i32>* ; GENERIC-LABEL: test_8xi32_masked_shuff_mem_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[4,5,6,7] sched: [6:1.00] +; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[4,5,6,7] sched: [8:1.00] ; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -10410,7 +10410,7 @@ define <8 x i32> @test_8xi32_zero_masked_shuff_mem_mask0(<8 x i32> %vec1, <8 x i ; GENERIC-LABEL: test_8xi32_zero_masked_shuff_mem_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[4,5,6,7] sched: [6:1.00] +; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[4,5,6,7] sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_8xi32_zero_masked_shuff_mem_mask0: @@ -10429,7 +10429,7 @@ define <8 x i32> @test_8xi32_masked_shuff_mem_mask1(<8 x i32> %vec1, <8 x i32>* ; GENERIC-LABEL: test_8xi32_masked_shuff_mem_mask1: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [6:1.00] +; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [8:1.00] ; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -10450,7 +10450,7 @@ define <8 x i32> @test_8xi32_zero_masked_shuff_mem_mask1(<8 x i32> %vec1, <8 x i ; GENERIC-LABEL: test_8xi32_zero_masked_shuff_mem_mask1: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [6:1.00] +; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_8xi32_zero_masked_shuff_mem_mask1: @@ -10469,7 +10469,7 @@ define <8 x i32> @test_8xi32_masked_shuff_mem_mask2(<8 x i32> %vec1, <8 x i32>* ; GENERIC-LABEL: test_8xi32_masked_shuff_mem_mask2: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [6:1.00] +; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [8:1.00] ; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -10490,7 +10490,7 @@ define <8 x i32> @test_8xi32_zero_masked_shuff_mem_mask2(<8 x i32> %vec1, <8 x i ; GENERIC-LABEL: test_8xi32_zero_masked_shuff_mem_mask2: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [6:1.00] +; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_8xi32_zero_masked_shuff_mem_mask2: @@ -10508,7 +10508,7 @@ define <8 x i32> @test_8xi32_zero_masked_shuff_mem_mask2(<8 x i32> %vec1, <8 x i define <8 x i32> @test_8xi32_shuff_mem_mask3(<8 x i32> %vec1, <8 x i32>* %vec2p) { ; GENERIC-LABEL: test_8xi32_shuff_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [6:1.00] +; GENERIC-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_8xi32_shuff_mem_mask3: @@ -10523,7 +10523,7 @@ define <8 x i32> @test_8xi32_masked_shuff_mem_mask3(<8 x i32> %vec1, <8 x i32>* ; GENERIC-LABEL: test_8xi32_masked_shuff_mem_mask3: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [6:1.00] +; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [8:1.00] ; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -10544,7 +10544,7 @@ define <8 x i32> @test_8xi32_zero_masked_shuff_mem_mask3(<8 x i32> %vec1, <8 x i ; GENERIC-LABEL: test_8xi32_zero_masked_shuff_mem_mask3: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [6:1.00] +; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_8xi32_zero_masked_shuff_mem_mask3: @@ -10736,7 +10736,7 @@ define <16 x i32> @test_16xi32_zero_masked_shuff_mask3(<16 x i32> %vec1, <16 x i define <16 x i32> @test_16xi32_shuff_mem_mask0(<16 x i32> %vec1, <16 x i32>* %vec2p) { ; GENERIC-LABEL: test_16xi32_shuff_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm0 = zmm0[8,9,10,11,4,5,6,7],mem[8,9,10,11,0,1,2,3] sched: [6:1.00] +; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm0 = zmm0[8,9,10,11,4,5,6,7],mem[8,9,10,11,0,1,2,3] sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_16xi32_shuff_mem_mask0: @@ -10751,7 +10751,7 @@ define <16 x i32> @test_16xi32_masked_shuff_mem_mask0(<16 x i32> %vec1, <16 x i3 ; GENERIC-LABEL: test_16xi32_masked_shuff_mem_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm1 {%k1} = zmm0[8,9,10,11,4,5,6,7],mem[8,9,10,11,0,1,2,3] sched: [6:1.00] +; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm1 {%k1} = zmm0[8,9,10,11,4,5,6,7],mem[8,9,10,11,0,1,2,3] sched: [8:1.00] ; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -10772,7 +10772,7 @@ define <16 x i32> @test_16xi32_zero_masked_shuff_mem_mask0(<16 x i32> %vec1, <16 ; GENERIC-LABEL: test_16xi32_zero_masked_shuff_mem_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[8,9,10,11,4,5,6,7],mem[8,9,10,11,0,1,2,3] sched: [6:1.00] +; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[8,9,10,11,4,5,6,7],mem[8,9,10,11,0,1,2,3] sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_16xi32_zero_masked_shuff_mem_mask0: @@ -10791,7 +10791,7 @@ define <16 x i32> @test_16xi32_masked_shuff_mem_mask1(<16 x i32> %vec1, <16 x i3 ; GENERIC-LABEL: test_16xi32_masked_shuff_mem_mask1: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm1 {%k1} = zmm0[4,5,6,7,4,5,6,7],mem[0,1,2,3,8,9,10,11] sched: [6:1.00] +; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm1 {%k1} = zmm0[4,5,6,7,4,5,6,7],mem[0,1,2,3,8,9,10,11] sched: [8:1.00] ; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -10812,7 +10812,7 @@ define <16 x i32> @test_16xi32_zero_masked_shuff_mem_mask1(<16 x i32> %vec1, <16 ; GENERIC-LABEL: test_16xi32_zero_masked_shuff_mem_mask1: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,6,7,4,5,6,7],mem[0,1,2,3,8,9,10,11] sched: [6:1.00] +; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,6,7,4,5,6,7],mem[0,1,2,3,8,9,10,11] sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_16xi32_zero_masked_shuff_mem_mask1: @@ -10831,7 +10831,7 @@ define <16 x i32> @test_16xi32_masked_shuff_mem_mask2(<16 x i32> %vec1, <16 x i3 ; GENERIC-LABEL: test_16xi32_masked_shuff_mem_mask2: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm1 {%k1} = zmm0[4,5,6,7,8,9,10,11],mem[12,13,14,15,12,13,14,15] sched: [6:1.00] +; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm1 {%k1} = zmm0[4,5,6,7,8,9,10,11],mem[12,13,14,15,12,13,14,15] sched: [8:1.00] ; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -10852,7 +10852,7 @@ define <16 x i32> @test_16xi32_zero_masked_shuff_mem_mask2(<16 x i32> %vec1, <16 ; GENERIC-LABEL: test_16xi32_zero_masked_shuff_mem_mask2: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,6,7,8,9,10,11],mem[12,13,14,15,12,13,14,15] sched: [6:1.00] +; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,6,7,8,9,10,11],mem[12,13,14,15,12,13,14,15] sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_16xi32_zero_masked_shuff_mem_mask2: @@ -10870,7 +10870,7 @@ define <16 x i32> @test_16xi32_zero_masked_shuff_mem_mask2(<16 x i32> %vec1, <16 define <16 x i32> @test_16xi32_shuff_mem_mask3(<16 x i32> %vec1, <16 x i32>* %vec2p) { ; GENERIC-LABEL: test_16xi32_shuff_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm0 = zmm0[4,5,6,7,4,5,6,7],mem[4,5,6,7,12,13,14,15] sched: [6:1.00] +; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm0 = zmm0[4,5,6,7,4,5,6,7],mem[4,5,6,7,12,13,14,15] sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_16xi32_shuff_mem_mask3: @@ -10885,7 +10885,7 @@ define <16 x i32> @test_16xi32_masked_shuff_mem_mask3(<16 x i32> %vec1, <16 x i3 ; GENERIC-LABEL: test_16xi32_masked_shuff_mem_mask3: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm1 {%k1} = zmm0[4,5,6,7,4,5,6,7],mem[4,5,6,7,12,13,14,15] sched: [6:1.00] +; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm1 {%k1} = zmm0[4,5,6,7,4,5,6,7],mem[4,5,6,7,12,13,14,15] sched: [8:1.00] ; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -10906,7 +10906,7 @@ define <16 x i32> @test_16xi32_zero_masked_shuff_mem_mask3(<16 x i32> %vec1, <16 ; GENERIC-LABEL: test_16xi32_zero_masked_shuff_mem_mask3: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,6,7,4,5,6,7],mem[4,5,6,7,12,13,14,15] sched: [6:1.00] +; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,6,7,4,5,6,7],mem[4,5,6,7,12,13,14,15] sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_16xi32_zero_masked_shuff_mem_mask3: @@ -11098,7 +11098,7 @@ define <4 x i64> @test_4xi64_zero_masked_shuff_mask3(<4 x i64> %vec1, <4 x i64> define <4 x i64> @test_4xi64_shuff_mem_mask0(<4 x i64> %vec1, <4 x i64>* %vec2p) { ; GENERIC-LABEL: test_4xi64_shuff_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3] sched: [6:1.00] +; GENERIC-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3] sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_4xi64_shuff_mem_mask0: @@ -11113,7 +11113,7 @@ define <4 x i64> @test_4xi64_masked_shuff_mem_mask0(<4 x i64> %vec1, <4 x i64>* ; GENERIC-LABEL: test_4xi64_masked_shuff_mem_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[2,3] sched: [6:1.00] +; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[2,3] sched: [8:1.00] ; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -11134,7 +11134,7 @@ define <4 x i64> @test_4xi64_zero_masked_shuff_mem_mask0(<4 x i64> %vec1, <4 x i ; GENERIC-LABEL: test_4xi64_zero_masked_shuff_mem_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[2,3] sched: [6:1.00] +; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[2,3] sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_4xi64_zero_masked_shuff_mem_mask0: @@ -11153,7 +11153,7 @@ define <4 x i64> @test_4xi64_masked_shuff_mem_mask1(<4 x i64> %vec1, <4 x i64>* ; GENERIC-LABEL: test_4xi64_masked_shuff_mem_mask1: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[0,1] sched: [6:1.00] +; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[0,1] sched: [8:1.00] ; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -11174,7 +11174,7 @@ define <4 x i64> @test_4xi64_zero_masked_shuff_mem_mask1(<4 x i64> %vec1, <4 x i ; GENERIC-LABEL: test_4xi64_zero_masked_shuff_mem_mask1: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[0,1] sched: [6:1.00] +; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[0,1] sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_4xi64_zero_masked_shuff_mem_mask1: @@ -11193,7 +11193,7 @@ define <4 x i64> @test_4xi64_masked_shuff_mem_mask2(<4 x i64> %vec1, <4 x i64>* ; GENERIC-LABEL: test_4xi64_masked_shuff_mem_mask2: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[0,1] sched: [6:1.00] +; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[0,1] sched: [8:1.00] ; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -11214,7 +11214,7 @@ define <4 x i64> @test_4xi64_zero_masked_shuff_mem_mask2(<4 x i64> %vec1, <4 x i ; GENERIC-LABEL: test_4xi64_zero_masked_shuff_mem_mask2: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[0,1] sched: [6:1.00] +; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[0,1] sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_4xi64_zero_masked_shuff_mem_mask2: @@ -11232,7 +11232,7 @@ define <4 x i64> @test_4xi64_zero_masked_shuff_mem_mask2(<4 x i64> %vec1, <4 x i define <4 x i64> @test_4xi64_shuff_mem_mask3(<4 x i64> %vec1, <4 x i64>* %vec2p) { ; GENERIC-LABEL: test_4xi64_shuff_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3] sched: [6:1.00] +; GENERIC-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3] sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_4xi64_shuff_mem_mask3: @@ -11247,7 +11247,7 @@ define <4 x i64> @test_4xi64_masked_shuff_mem_mask3(<4 x i64> %vec1, <4 x i64>* ; GENERIC-LABEL: test_4xi64_masked_shuff_mem_mask3: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[2,3] sched: [6:1.00] +; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[2,3] sched: [8:1.00] ; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -11268,7 +11268,7 @@ define <4 x i64> @test_4xi64_zero_masked_shuff_mem_mask3(<4 x i64> %vec1, <4 x i ; GENERIC-LABEL: test_4xi64_zero_masked_shuff_mem_mask3: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[2,3] sched: [6:1.00] +; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[2,3] sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_4xi64_zero_masked_shuff_mem_mask3: @@ -11460,7 +11460,7 @@ define <8 x i64> @test_8xi64_zero_masked_shuff_mask3(<8 x i64> %vec1, <8 x i64> define <8 x i64> @test_8xi64_shuff_mem_mask0(<8 x i64> %vec1, <8 x i64>* %vec2p) { ; GENERIC-LABEL: test_8xi64_shuff_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[2,3,2,3],mem[4,5,2,3] sched: [6:1.00] +; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[2,3,2,3],mem[4,5,2,3] sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_8xi64_shuff_mem_mask0: @@ -11475,7 +11475,7 @@ define <8 x i64> @test_8xi64_masked_shuff_mem_mask0(<8 x i64> %vec1, <8 x i64>* ; GENERIC-LABEL: test_8xi64_masked_shuff_mem_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm1 {%k1} = zmm0[2,3,2,3],mem[4,5,2,3] sched: [6:1.00] +; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm1 {%k1} = zmm0[2,3,2,3],mem[4,5,2,3] sched: [8:1.00] ; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -11496,7 +11496,7 @@ define <8 x i64> @test_8xi64_zero_masked_shuff_mem_mask0(<8 x i64> %vec1, <8 x i ; GENERIC-LABEL: test_8xi64_zero_masked_shuff_mem_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[2,3,2,3],mem[4,5,2,3] sched: [6:1.00] +; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[2,3,2,3],mem[4,5,2,3] sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_8xi64_zero_masked_shuff_mem_mask0: @@ -11515,7 +11515,7 @@ define <8 x i64> @test_8xi64_masked_shuff_mem_mask1(<8 x i64> %vec1, <8 x i64>* ; GENERIC-LABEL: test_8xi64_masked_shuff_mem_mask1: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm1 {%k1} = zmm0[2,3,0,1],mem[0,1,0,1] sched: [6:1.00] +; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm1 {%k1} = zmm0[2,3,0,1],mem[0,1,0,1] sched: [8:1.00] ; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -11536,7 +11536,7 @@ define <8 x i64> @test_8xi64_zero_masked_shuff_mem_mask1(<8 x i64> %vec1, <8 x i ; GENERIC-LABEL: test_8xi64_zero_masked_shuff_mem_mask1: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[2,3,0,1],mem[0,1,0,1] sched: [6:1.00] +; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[2,3,0,1],mem[0,1,0,1] sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_8xi64_zero_masked_shuff_mem_mask1: @@ -11555,7 +11555,7 @@ define <8 x i64> @test_8xi64_masked_shuff_mem_mask2(<8 x i64> %vec1, <8 x i64>* ; GENERIC-LABEL: test_8xi64_masked_shuff_mem_mask2: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm1 {%k1} = zmm0[4,5,0,1],mem[2,3,2,3] sched: [6:1.00] +; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm1 {%k1} = zmm0[4,5,0,1],mem[2,3,2,3] sched: [8:1.00] ; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -11576,7 +11576,7 @@ define <8 x i64> @test_8xi64_zero_masked_shuff_mem_mask2(<8 x i64> %vec1, <8 x i ; GENERIC-LABEL: test_8xi64_zero_masked_shuff_mem_mask2: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,0,1],mem[2,3,2,3] sched: [6:1.00] +; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,0,1],mem[2,3,2,3] sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_8xi64_zero_masked_shuff_mem_mask2: @@ -11594,7 +11594,7 @@ define <8 x i64> @test_8xi64_zero_masked_shuff_mem_mask2(<8 x i64> %vec1, <8 x i define <8 x i64> @test_8xi64_shuff_mem_mask3(<8 x i64> %vec1, <8 x i64>* %vec2p) { ; GENERIC-LABEL: test_8xi64_shuff_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[2,3,0,1],mem[6,7,2,3] sched: [6:1.00] +; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[2,3,0,1],mem[6,7,2,3] sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_8xi64_shuff_mem_mask3: @@ -11609,7 +11609,7 @@ define <8 x i64> @test_8xi64_masked_shuff_mem_mask3(<8 x i64> %vec1, <8 x i64>* ; GENERIC-LABEL: test_8xi64_masked_shuff_mem_mask3: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm1 {%k1} = zmm0[2,3,0,1],mem[6,7,2,3] sched: [6:1.00] +; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm1 {%k1} = zmm0[2,3,0,1],mem[6,7,2,3] sched: [8:1.00] ; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -11630,7 +11630,7 @@ define <8 x i64> @test_8xi64_zero_masked_shuff_mem_mask3(<8 x i64> %vec1, <8 x i ; GENERIC-LABEL: test_8xi64_zero_masked_shuff_mem_mask3: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[2,3,0,1],mem[6,7,2,3] sched: [6:1.00] +; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[2,3,0,1],mem[6,7,2,3] sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_8xi64_zero_masked_shuff_mem_mask3: @@ -11837,7 +11837,7 @@ define <4 x float> @test_4xfloat_masked_unpack_low_mem_mask0(<4 x float> %vec1, ; GENERIC-LABEL: test_4xfloat_masked_unpack_low_mem_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpcklps {{.*#+}} xmm1 {%k1} = xmm0[0],mem[0],xmm0[1],mem[1] sched: [6:1.00] +; GENERIC-NEXT: vunpcklps {{.*#+}} xmm1 {%k1} = xmm0[0],mem[0],xmm0[1],mem[1] sched: [7:1.00] ; GENERIC-NEXT: vmovaps %xmm1, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -11858,7 +11858,7 @@ define <4 x float> @test_4xfloat_zero_masked_unpack_low_mem_mask0(<4 x float> %v ; GENERIC-LABEL: test_4xfloat_zero_masked_unpack_low_mem_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],mem[0],xmm0[1],mem[1] sched: [6:1.00] +; GENERIC-NEXT: vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],mem[0],xmm0[1],mem[1] sched: [7:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_4xfloat_zero_masked_unpack_low_mem_mask0: @@ -11877,7 +11877,7 @@ define <4 x float> @test_4xfloat_masked_unpack_low_mem_mask1(<4 x float> %vec1, ; GENERIC-LABEL: test_4xfloat_masked_unpack_low_mem_mask1: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpcklps {{.*#+}} xmm1 {%k1} = xmm0[0],mem[0],xmm0[1],mem[1] sched: [6:1.00] +; GENERIC-NEXT: vunpcklps {{.*#+}} xmm1 {%k1} = xmm0[0],mem[0],xmm0[1],mem[1] sched: [7:1.00] ; GENERIC-NEXT: vmovaps %xmm1, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -11898,7 +11898,7 @@ define <4 x float> @test_4xfloat_zero_masked_unpack_low_mem_mask1(<4 x float> %v ; GENERIC-LABEL: test_4xfloat_zero_masked_unpack_low_mem_mask1: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],mem[0],xmm0[1],mem[1] sched: [6:1.00] +; GENERIC-NEXT: vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],mem[0],xmm0[1],mem[1] sched: [7:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_4xfloat_zero_masked_unpack_low_mem_mask1: @@ -11917,7 +11917,7 @@ define <4 x float> @test_4xfloat_masked_unpack_low_mem_mask2(<4 x float> %vec1, ; GENERIC-LABEL: test_4xfloat_masked_unpack_low_mem_mask2: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpcklps {{.*#+}} xmm1 {%k1} = xmm0[0],mem[0],xmm0[1],mem[1] sched: [6:1.00] +; GENERIC-NEXT: vunpcklps {{.*#+}} xmm1 {%k1} = xmm0[0],mem[0],xmm0[1],mem[1] sched: [7:1.00] ; GENERIC-NEXT: vmovaps %xmm1, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -11938,7 +11938,7 @@ define <4 x float> @test_4xfloat_zero_masked_unpack_low_mem_mask2(<4 x float> %v ; GENERIC-LABEL: test_4xfloat_zero_masked_unpack_low_mem_mask2: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],mem[0],xmm0[1],mem[1] sched: [6:1.00] +; GENERIC-NEXT: vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],mem[0],xmm0[1],mem[1] sched: [7:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_4xfloat_zero_masked_unpack_low_mem_mask2: @@ -11971,7 +11971,7 @@ define <4 x float> @test_4xfloat_masked_unpack_low_mem_mask3(<4 x float> %vec1, ; GENERIC-LABEL: test_4xfloat_masked_unpack_low_mem_mask3: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpcklps {{.*#+}} xmm1 {%k1} = xmm0[0],mem[0],xmm0[1],mem[1] sched: [6:1.00] +; GENERIC-NEXT: vunpcklps {{.*#+}} xmm1 {%k1} = xmm0[0],mem[0],xmm0[1],mem[1] sched: [7:1.00] ; GENERIC-NEXT: vmovaps %xmm1, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -11992,7 +11992,7 @@ define <4 x float> @test_4xfloat_zero_masked_unpack_low_mem_mask3(<4 x float> %v ; GENERIC-LABEL: test_4xfloat_zero_masked_unpack_low_mem_mask3: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],mem[0],xmm0[1],mem[1] sched: [6:1.00] +; GENERIC-NEXT: vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],mem[0],xmm0[1],mem[1] sched: [7:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_4xfloat_zero_masked_unpack_low_mem_mask3: @@ -12836,7 +12836,7 @@ define <2 x double> @test_2xdouble_masked_unpack_low_mem_mask0(<2 x double> %vec ; GENERIC-LABEL: test_2xdouble_masked_unpack_low_mem_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmq %xmm2, %xmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpcklpd {{.*#+}} xmm1 {%k1} = xmm0[0],mem[0] sched: [6:1.00] +; GENERIC-NEXT: vunpcklpd {{.*#+}} xmm1 {%k1} = xmm0[0],mem[0] sched: [7:1.00] ; GENERIC-NEXT: vmovapd %xmm1, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -12857,7 +12857,7 @@ define <2 x double> @test_2xdouble_zero_masked_unpack_low_mem_mask0(<2 x double> ; GENERIC-LABEL: test_2xdouble_zero_masked_unpack_low_mem_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmq %xmm1, %xmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpcklpd {{.*#+}} xmm0 {%k1} {z} = xmm0[0],mem[0] sched: [6:1.00] +; GENERIC-NEXT: vunpcklpd {{.*#+}} xmm0 {%k1} {z} = xmm0[0],mem[0] sched: [7:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_2xdouble_zero_masked_unpack_low_mem_mask0: @@ -12876,7 +12876,7 @@ define <2 x double> @test_2xdouble_masked_unpack_low_mem_mask1(<2 x double> %vec ; GENERIC-LABEL: test_2xdouble_masked_unpack_low_mem_mask1: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmq %xmm2, %xmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpcklpd {{.*#+}} xmm1 {%k1} = xmm0[0],mem[0] sched: [6:1.00] +; GENERIC-NEXT: vunpcklpd {{.*#+}} xmm1 {%k1} = xmm0[0],mem[0] sched: [7:1.00] ; GENERIC-NEXT: vmovapd %xmm1, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -12897,7 +12897,7 @@ define <2 x double> @test_2xdouble_zero_masked_unpack_low_mem_mask1(<2 x double> ; GENERIC-LABEL: test_2xdouble_zero_masked_unpack_low_mem_mask1: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmq %xmm1, %xmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpcklpd {{.*#+}} xmm0 {%k1} {z} = xmm0[0],mem[0] sched: [6:1.00] +; GENERIC-NEXT: vunpcklpd {{.*#+}} xmm0 {%k1} {z} = xmm0[0],mem[0] sched: [7:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_2xdouble_zero_masked_unpack_low_mem_mask1: @@ -13828,7 +13828,7 @@ define <4 x float> @test_4xfloat_masked_unpack_high_mem_mask0(<4 x float> %vec1, ; GENERIC-LABEL: test_4xfloat_masked_unpack_high_mem_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpckhps {{.*#+}} xmm1 {%k1} = xmm0[2],mem[2],xmm0[3],mem[3] sched: [6:1.00] +; GENERIC-NEXT: vunpckhps {{.*#+}} xmm1 {%k1} = xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00] ; GENERIC-NEXT: vmovaps %xmm1, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -13849,7 +13849,7 @@ define <4 x float> @test_4xfloat_zero_masked_unpack_high_mem_mask0(<4 x float> % ; GENERIC-LABEL: test_4xfloat_zero_masked_unpack_high_mem_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],mem[2],xmm0[3],mem[3] sched: [6:1.00] +; GENERIC-NEXT: vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_4xfloat_zero_masked_unpack_high_mem_mask0: @@ -13868,7 +13868,7 @@ define <4 x float> @test_4xfloat_masked_unpack_high_mem_mask1(<4 x float> %vec1, ; GENERIC-LABEL: test_4xfloat_masked_unpack_high_mem_mask1: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpckhps {{.*#+}} xmm1 {%k1} = xmm0[2],mem[2],xmm0[3],mem[3] sched: [6:1.00] +; GENERIC-NEXT: vunpckhps {{.*#+}} xmm1 {%k1} = xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00] ; GENERIC-NEXT: vmovaps %xmm1, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -13889,7 +13889,7 @@ define <4 x float> @test_4xfloat_zero_masked_unpack_high_mem_mask1(<4 x float> % ; GENERIC-LABEL: test_4xfloat_zero_masked_unpack_high_mem_mask1: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],mem[2],xmm0[3],mem[3] sched: [6:1.00] +; GENERIC-NEXT: vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_4xfloat_zero_masked_unpack_high_mem_mask1: @@ -13908,7 +13908,7 @@ define <4 x float> @test_4xfloat_masked_unpack_high_mem_mask2(<4 x float> %vec1, ; GENERIC-LABEL: test_4xfloat_masked_unpack_high_mem_mask2: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpckhps {{.*#+}} xmm1 {%k1} = xmm0[2],mem[2],xmm0[3],mem[3] sched: [6:1.00] +; GENERIC-NEXT: vunpckhps {{.*#+}} xmm1 {%k1} = xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00] ; GENERIC-NEXT: vmovaps %xmm1, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -13929,7 +13929,7 @@ define <4 x float> @test_4xfloat_zero_masked_unpack_high_mem_mask2(<4 x float> % ; GENERIC-LABEL: test_4xfloat_zero_masked_unpack_high_mem_mask2: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],mem[2],xmm0[3],mem[3] sched: [6:1.00] +; GENERIC-NEXT: vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_4xfloat_zero_masked_unpack_high_mem_mask2: @@ -13962,7 +13962,7 @@ define <4 x float> @test_4xfloat_masked_unpack_high_mem_mask3(<4 x float> %vec1, ; GENERIC-LABEL: test_4xfloat_masked_unpack_high_mem_mask3: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpckhps {{.*#+}} xmm1 {%k1} = xmm0[2],mem[2],xmm0[3],mem[3] sched: [6:1.00] +; GENERIC-NEXT: vunpckhps {{.*#+}} xmm1 {%k1} = xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00] ; GENERIC-NEXT: vmovaps %xmm1, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -13983,7 +13983,7 @@ define <4 x float> @test_4xfloat_zero_masked_unpack_high_mem_mask3(<4 x float> % ; GENERIC-LABEL: test_4xfloat_zero_masked_unpack_high_mem_mask3: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],mem[2],xmm0[3],mem[3] sched: [6:1.00] +; GENERIC-NEXT: vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_4xfloat_zero_masked_unpack_high_mem_mask3: @@ -14827,7 +14827,7 @@ define <2 x double> @test_2xdouble_masked_unpack_high_mem_mask0(<2 x double> %ve ; GENERIC-LABEL: test_2xdouble_masked_unpack_high_mem_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmq %xmm2, %xmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpckhpd {{.*#+}} xmm1 {%k1} = xmm0[1],mem[1] sched: [6:1.00] +; GENERIC-NEXT: vunpckhpd {{.*#+}} xmm1 {%k1} = xmm0[1],mem[1] sched: [7:1.00] ; GENERIC-NEXT: vmovapd %xmm1, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -14848,7 +14848,7 @@ define <2 x double> @test_2xdouble_zero_masked_unpack_high_mem_mask0(<2 x double ; GENERIC-LABEL: test_2xdouble_zero_masked_unpack_high_mem_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmq %xmm1, %xmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpckhpd {{.*#+}} xmm0 {%k1} {z} = xmm0[1],mem[1] sched: [6:1.00] +; GENERIC-NEXT: vunpckhpd {{.*#+}} xmm0 {%k1} {z} = xmm0[1],mem[1] sched: [7:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_2xdouble_zero_masked_unpack_high_mem_mask0: @@ -14867,7 +14867,7 @@ define <2 x double> @test_2xdouble_masked_unpack_high_mem_mask1(<2 x double> %ve ; GENERIC-LABEL: test_2xdouble_masked_unpack_high_mem_mask1: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmq %xmm2, %xmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpckhpd {{.*#+}} xmm1 {%k1} = xmm0[1],mem[1] sched: [6:1.00] +; GENERIC-NEXT: vunpckhpd {{.*#+}} xmm1 {%k1} = xmm0[1],mem[1] sched: [7:1.00] ; GENERIC-NEXT: vmovapd %xmm1, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -14888,7 +14888,7 @@ define <2 x double> @test_2xdouble_zero_masked_unpack_high_mem_mask1(<2 x double ; GENERIC-LABEL: test_2xdouble_zero_masked_unpack_high_mem_mask1: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmq %xmm1, %xmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpckhpd {{.*#+}} xmm0 {%k1} {z} = xmm0[1],mem[1] sched: [6:1.00] +; GENERIC-NEXT: vunpckhpd {{.*#+}} xmm0 {%k1} {z} = xmm0[1],mem[1] sched: [7:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_2xdouble_zero_masked_unpack_high_mem_mask1: diff --git a/llvm/test/CodeGen/X86/xop-schedule.ll b/llvm/test/CodeGen/X86/xop-schedule.ll index 9d9f2c5..70f73a8 100644 --- a/llvm/test/CodeGen/X86/xop-schedule.ll +++ b/llvm/test/CodeGen/X86/xop-schedule.ll @@ -212,8 +212,8 @@ define void @test_vpermil2pd_128(<2 x double> %a0, <2 x double> %a1, <2 x double ; GENERIC: # %bb.0: ; GENERIC-NEXT: #APP ; GENERIC-NEXT: vpermil2pd $3, %xmm2, %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: vpermil2pd $3, %xmm2, (%rdi), %xmm0, %xmm0 # sched: [6:1.00] -; GENERIC-NEXT: vpermil2pd $3, (%rdi), %xmm1, %xmm0, %xmm0 # sched: [6:1.00] +; GENERIC-NEXT: vpermil2pd $3, %xmm2, (%rdi), %xmm0, %xmm0 # sched: [7:1.00] +; GENERIC-NEXT: vpermil2pd $3, (%rdi), %xmm1, %xmm0, %xmm0 # sched: [7:1.00] ; GENERIC-NEXT: #NO_APP ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -234,8 +234,8 @@ define void @test_vpermil2pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double ; GENERIC: # %bb.0: ; GENERIC-NEXT: #APP ; GENERIC-NEXT: vpermil2pd $3, %ymm2, %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; GENERIC-NEXT: vpermil2pd $3, %ymm2, (%rdi), %ymm0, %ymm0 # sched: [6:1.00] -; GENERIC-NEXT: vpermil2pd $3, (%rdi), %ymm1, %ymm0, %ymm0 # sched: [6:1.00] +; GENERIC-NEXT: vpermil2pd $3, %ymm2, (%rdi), %ymm0, %ymm0 # sched: [8:1.00] +; GENERIC-NEXT: vpermil2pd $3, (%rdi), %ymm1, %ymm0, %ymm0 # sched: [8:1.00] ; GENERIC-NEXT: #NO_APP ; GENERIC-NEXT: vzeroupper # sched: [100:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -258,8 +258,8 @@ define void @test_vpermil2ps_128(<4 x float> %a0, <4 x float> %a1, <4 x float> % ; GENERIC: # %bb.0: ; GENERIC-NEXT: #APP ; GENERIC-NEXT: vpermil2ps $3, %xmm2, %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: vpermil2ps $3, %xmm2, (%rdi), %xmm0, %xmm0 # sched: [6:1.00] -; GENERIC-NEXT: vpermil2ps $3, (%rdi), %xmm1, %xmm0, %xmm0 # sched: [6:1.00] +; GENERIC-NEXT: vpermil2ps $3, %xmm2, (%rdi), %xmm0, %xmm0 # sched: [7:1.00] +; GENERIC-NEXT: vpermil2ps $3, (%rdi), %xmm1, %xmm0, %xmm0 # sched: [7:1.00] ; GENERIC-NEXT: #NO_APP ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -280,8 +280,8 @@ define void @test_vpermil2ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> % ; GENERIC: # %bb.0: ; GENERIC-NEXT: #APP ; GENERIC-NEXT: vpermil2ps $3, %ymm2, %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; GENERIC-NEXT: vpermil2ps $3, %ymm2, (%rdi), %ymm0, %ymm0 # sched: [6:1.00] -; GENERIC-NEXT: vpermil2ps $3, (%rdi), %ymm1, %ymm0, %ymm0 # sched: [6:1.00] +; GENERIC-NEXT: vpermil2ps $3, %ymm2, (%rdi), %ymm0, %ymm0 # sched: [8:1.00] +; GENERIC-NEXT: vpermil2ps $3, (%rdi), %ymm1, %ymm0, %ymm0 # sched: [8:1.00] ; GENERIC-NEXT: #NO_APP ; GENERIC-NEXT: vzeroupper # sched: [100:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00]