From: Simon Pilgrim Date: Thu, 10 May 2018 17:06:09 +0000 (+0000) Subject: [X86] Split WriteVecALU/WriteVecLogic/WriteShuffle/WriteVarShuffle/WritePSADBW/WriteP... X-Git-Tag: llvmorg-7.0.0-rc1~6327 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=38ac0e9c6b1ed09bae3ebb11414fa9908a6b10a3;p=platform%2Fupstream%2Fllvm.git [X86] Split WriteVecALU/WriteVecLogic/WriteShuffle/WriteVarShuffle/WritePSADBW/WritePHAdd scheduler classes Split off XMM classes from the default (MMX) classes. llvm-svn: 331999 --- diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index 958ec92..cf2e33c 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -1333,7 +1333,7 @@ multiclass avx512_int_broadcast_rm_vl opc, string OpcodeStr, avx512_int_broadcast_rm_lowering<_.info256, _.info256>, EVEX_V256; defm Z128 : avx512_broadcast_rm, + WriteShuffleXLd, _.info128, _.info128>, EVEX_V128; } } @@ -1353,7 +1353,8 @@ multiclass avx512_subvec_broadcast_rm opc, string OpcodeStr, (ins _Src.MemOp:$src), OpcodeStr, "$src", "$src", (_Dst.VT (X86SubVBroadcast (_Src.VT (bitconvert (_Src.LdFrag addr:$src)))))>, - AVX5128IBase, EVEX, Sched<[WriteShuffleLd]>; + Sched<[SchedWriteShuffle.YMM.Folded]>, + AVX5128IBase, EVEX; } // This should be used for the AVX512DQ broadcast instructions. It disables @@ -1367,7 +1368,8 @@ multiclass avx512_subvec_broadcast_rm_dq opc, string OpcodeStr, (null_frag), (_Dst.VT (X86SubVBroadcast (_Src.VT (bitconvert (_Src.LdFrag addr:$src)))))>, - AVX5128IBase, EVEX, Sched<[WriteShuffleLd]>; + Sched<[SchedWriteShuffle.YMM.Folded]>, + AVX5128IBase, EVEX; } let Predicates = [HasAVX512] in { @@ -1646,7 +1648,7 @@ multiclass avx512_common_broadcast_i32x2 opc, string OpcodeStr, let Predicates = [HasDQI, HasVLX] in defm Z128 : avx512_broadcast_rm_split, EVEX_V128; } diff --git a/llvm/lib/Target/X86/X86InstrXOP.td b/llvm/lib/Target/X86/X86InstrXOP.td index a2f9b14..47b2f5d 100644 --- a/llvm/lib/Target/X86/X86InstrXOP.td +++ b/llvm/lib/Target/X86/X86InstrXOP.td @@ -14,11 +14,11 @@ multiclass xop2op opc, string OpcodeStr, Intrinsic Int, PatFrag memop> { def rr : IXOP, XOP, Sched<[WritePHAdd]>; + [(set VR128:$dst, (Int VR128:$src))]>, XOP, Sched<[SchedWritePHAdd.XMM]>; def rm : IXOP, XOP, - Sched<[WritePHAdd.Folded, ReadAfterLd]>; + Sched<[SchedWritePHAdd.XMM.Folded, ReadAfterLd]>; } let ExeDomain = SSEPackedInt in { diff --git a/llvm/lib/Target/X86/X86SchedBroadwell.td b/llvm/lib/Target/X86/X86SchedBroadwell.td index 764decf..fcda188 100755 --- a/llvm/lib/Target/X86/X86SchedBroadwell.td +++ b/llvm/lib/Target/X86/X86SchedBroadwell.td @@ -265,8 +265,10 @@ defm : X86WriteRes; defm : X86WriteRes; defm : BWWriteResPair; // Vector integer ALU op, no logicals. +defm : BWWriteResPair; // Vector integer ALU op, no logicals. defm : BWWriteResPair; // Vector integer ALU op, no logicals (YMM/ZMM). defm : BWWriteResPair; // Vector integer and/or/xor. +defm : BWWriteResPair; // Vector integer and/or/xor. defm : BWWriteResPair; // Vector integer and/or/xor (YMM/ZMM). defm : BWWriteResPair; // Vector integer TEST instructions. defm : BWWriteResPair; // Vector integer TEST instructions (YMM/ZMM). @@ -276,8 +278,10 @@ defm : BWWriteResPair; // Vector intege defm : BWWriteResPair; // Vector PMULLD. defm : BWWriteResPair; // Vector PMULLD (YMM/ZMM). defm : BWWriteResPair; // Vector shuffles. +defm : BWWriteResPair; // Vector shuffles. defm : BWWriteResPair; // Vector shuffles (YMM/ZMM). defm : BWWriteResPair; // Vector variable shuffles. +defm : BWWriteResPair; // Vector variable shuffles. defm : BWWriteResPair; // Vector variable shuffles (YMM/ZMM). defm : BWWriteResPair; // Vector blends. defm : BWWriteResPair; // Vector blends (YMM/ZMM). @@ -286,6 +290,7 @@ defm : BWWriteResPair; // Vector variab defm : BWWriteResPair; // Vector MPSAD. defm : BWWriteResPair; // Vector MPSAD. defm : BWWriteResPair; // Vector PSADBW. +defm : BWWriteResPair; // Vector PSADBW. defm : BWWriteResPair; // Vector PSADBW (YMM/ZMM). defm : BWWriteResPair; // Vector PHMINPOS. @@ -448,6 +453,7 @@ def : WriteRes; defm : BWWriteResPair; defm : BWWriteResPair; defm : BWWriteResPair; +defm : BWWriteResPair; defm : BWWriteResPair; // Remaining instrs. diff --git a/llvm/lib/Target/X86/X86SchedHaswell.td b/llvm/lib/Target/X86/X86SchedHaswell.td index 1e431e0..1dda56a 100644 --- a/llvm/lib/Target/X86/X86SchedHaswell.td +++ b/llvm/lib/Target/X86/X86SchedHaswell.td @@ -255,11 +255,13 @@ defm : X86WriteRes; defm : X86WriteRes; -defm : HWWriteResPair; +defm : HWWriteResPair; +defm : HWWriteResPair; defm : HWWriteResPair; defm : HWWriteResPair; defm : HWWriteResPair; -defm : HWWriteResPair; +defm : HWWriteResPair; +defm : HWWriteResPair; defm : HWWriteResPair; defm : HWWriteResPair; defm : HWWriteResPair; @@ -267,8 +269,10 @@ defm : HWWriteResPair; defm : HWWriteResPair; defm : HWWriteResPair; defm : HWWriteResPair; +defm : HWWriteResPair; defm : HWWriteResPair; -defm : HWWriteResPair; +defm : HWWriteResPair; +defm : HWWriteResPair; defm : HWWriteResPair; defm : HWWriteResPair; defm : HWWriteResPair; @@ -278,7 +282,8 @@ defm : HWWriteResPair; defm : HWWriteResPair; defm : HWWriteResPair; defm : HWWriteResPair; -defm : HWWriteResPair; +defm : HWWriteResPair; +defm : HWWriteResPair; defm : HWWriteResPair; defm : HWWriteResPair; @@ -684,7 +689,8 @@ def : InstRW<[HWWriteFXTRACT], (instrs FXTRACT)>; defm : HWWriteResPair; defm : HWWriteResPair; -defm : HWWriteResPair; +defm : HWWriteResPair; +defm : HWWriteResPair; defm : HWWriteResPair; //=== Floating Point XMM and YMM Instructions ===// @@ -913,26 +919,16 @@ def HWWriteResGroup12_2 : SchedWriteRes<[HWPort1,HWPort06,HWPort0156,HWPort23]> def: InstRW<[HWWriteResGroup12_2], (instrs IMUL16m, MUL16m)>; def HWWriteResGroup13 : SchedWriteRes<[HWPort5,HWPort23]> { - let Latency = 7; + let Latency = 6; let NumMicroOps = 2; let ResourceCycles = [1,1]; } -def: InstRW<[HWWriteResGroup13], (instregex "(V?)PACKSSDWrm", - "(V?)PACKSSWBrm", - "(V?)PACKUSDWrm", - "(V?)PACKUSWBrm", - "(V?)PALIGNRrmi", - "(V?)PSHUFDmi", - "(V?)PSHUFHWmi", - "(V?)PSHUFLWmi", - "(V?)PUNPCKHBWrm", - "(V?)PUNPCKHDQrm", - "(V?)PUNPCKHQDQrm", - "(V?)PUNPCKHWDrm", - "(V?)PUNPCKLBWrm", - "(V?)PUNPCKLDQrm", - "(V?)PUNPCKLQDQrm", - "(V?)PUNPCKLWDrm")>; +def: InstRW<[HWWriteResGroup13], (instregex "(V?)PMOV(SX|ZX)BDrm", + "(V?)PMOV(SX|ZX)BQrm", + "(V?)PMOV(SX|ZX)BWrm", + "(V?)PMOV(SX|ZX)DQrm", + "(V?)PMOV(SX|ZX)WDrm", + "(V?)PMOV(SX|ZX)WQrm")>; def HWWriteResGroup13_1 : SchedWriteRes<[HWPort5,HWPort23]> { let Latency = 8; @@ -943,13 +939,6 @@ def: InstRW<[HWWriteResGroup13_1], (instregex "VPMOVSXBDYrm", "VPMOVSXBQYrm", "VPMOVSXWQYrm")>; -def HWWriteResGroup13_2 : SchedWriteRes<[HWPort5,HWPort23]> { - let Latency = 6; - let NumMicroOps = 2; - let ResourceCycles = [1,1]; -} -def: InstRW<[HWWriteResGroup13_2], (instregex "MMX_PSHUFBrm")>; - def HWWriteResGroup14 : SchedWriteRes<[HWPort6,HWPort23]> { let Latency = 6; let NumMicroOps = 2; @@ -974,14 +963,7 @@ def: InstRW<[HWWriteResGroup16], (instregex "ANDN(32|64)rm", "BLSI(32|64)rm", "BLSMSK(32|64)rm", "BLSR(32|64)rm", - "MOVBE(16|32|64)rm", - "MMX_PABS(B|D|W)rm", - "MMX_P(ADD|SUB)(B|D|W|Q)irm", - "MMX_P(ADD|SUB)(U?)S(B|W)irm", - "MMX_PAVG(B|W)irm", - "MMX_PCMP(EQ|GT)(B|D|W)irm", - "MMX_P(MAX|MIN)(SW|UB)irm", - "MMX_PSIGN(B|D|W)rm")>; + "MOVBE(16|32|64)rm")>; def HWWriteResGroup17 : SchedWriteRes<[HWPort23,HWPort015]> { let Latency = 7; @@ -992,16 +974,6 @@ def: InstRW<[HWWriteResGroup17], (instregex "VINSERTF128rm", "VINSERTI128rm", "VPBLENDDrmi")>; -def HWWriteResGroup17_1 : SchedWriteRes<[HWPort23,HWPort015]> { - let Latency = 6; - let NumMicroOps = 2; - let ResourceCycles = [1,1]; -} -def: InstRW<[HWWriteResGroup17_1], (instregex "MMX_PANDNirm", - "MMX_PANDirm", - "MMX_PORirm", - "MMX_PXORirm")>; - def HWWriteResGroup17_2 : SchedWriteRes<[HWPort23,HWPort015]> { let Latency = 8; let NumMicroOps = 2; @@ -1356,13 +1328,6 @@ def HWWriteResGroup62 : SchedWriteRes<[HWPort1,HWPort4,HWPort237]> { def: InstRW<[HWWriteResGroup62], (instregex "IST(T?)_FP(16|32|64)m", "IST_F(16|32)m")>; -def HWWriteResGroup64 : SchedWriteRes<[HWPort5,HWPort23,HWPort15]> { - let Latency = 8; - let NumMicroOps = 4; - let ResourceCycles = [2,1,1]; -} -def: InstRW<[HWWriteResGroup64], (instregex "MMX_PH(ADD|SUB)(D|SW|W)rm")>; - def HWWriteResGroup65 : SchedWriteRes<[HWPort23,HWPort06,HWPort0156]> { let Latency = 8; let NumMicroOps = 4; @@ -1594,13 +1559,6 @@ def HWWriteResGroup91_3 : SchedWriteRes<[HWPort0,HWPort23]> { def: InstRW<[HWWriteResGroup91_3], (instregex "MUL_F(32|64)m", "VPCMPGTQYrm")>; -def HWWriteResGroup91_5 : SchedWriteRes<[HWPort0,HWPort23]> { - let Latency = 10; - let NumMicroOps = 2; - let ResourceCycles = [1,1]; -} -def: InstRW<[HWWriteResGroup91_5], (instregex "MMX_PSADBWirm")>; - def HWWriteResGroup93 : SchedWriteRes<[HWPort1,HWPort5]> { let Latency = 5; let NumMicroOps = 3; diff --git a/llvm/lib/Target/X86/X86SchedSandyBridge.td b/llvm/lib/Target/X86/X86SchedSandyBridge.td index 295c724..821f657 100644 --- a/llvm/lib/Target/X86/X86SchedSandyBridge.td +++ b/llvm/lib/Target/X86/X86SchedSandyBridge.td @@ -235,11 +235,13 @@ defm : X86WriteRes; defm : X86WriteRes; -defm : SBWriteResPair; +defm : SBWriteResPair; +defm : SBWriteResPair; defm : SBWriteResPair; defm : SBWriteResPair; defm : SBWriteResPair; -defm : SBWriteResPair; +defm : SBWriteResPair; +defm : SBWriteResPair; defm : SBWriteResPair; defm : SBWriteResPair; defm : SBWriteResPair; @@ -247,8 +249,10 @@ defm : SBWriteResPair; defm : SBWriteResPair; defm : SBWriteResPair; // TODO this is probably wrong for 256/512-bit for the "generic" model defm : SBWriteResPair; +defm : SBWriteResPair; defm : SBWriteResPair; -defm : SBWriteResPair; +defm : SBWriteResPair; +defm : SBWriteResPair; defm : SBWriteResPair; defm : SBWriteResPair; defm : SBWriteResPair; @@ -256,7 +260,8 @@ defm : SBWriteResPair; defm : SBWriteResPair; defm : SBWriteResPair; defm : SBWriteResPair; -defm : SBWriteResPair; +defm : SBWriteResPair; +defm : SBWriteResPair; defm : SBWriteResPair; defm : SBWriteResPair; @@ -295,7 +300,8 @@ def : WriteRes { defm : SBWriteResPair; defm : SBWriteResPair; -defm : SBWriteResPair; +defm : SBWriteResPair; +defm : SBWriteResPair; defm : SBWriteResPair; //////////////////////////////////////////////////////////////////////////////// @@ -471,37 +477,10 @@ def SBWriteResGroup5 : SchedWriteRes<[SBPort15]> { let NumMicroOps = 1; let ResourceCycles = [1]; } -def: InstRW<[SBWriteResGroup5], (instregex "MMX_PALIGNRrri", - "(V?)PACKSSDWrr", - "(V?)PACKSSWBrr", - "(V?)PACKUSDWrr", - "(V?)PACKUSWBrr", - "(V?)PALIGNRrri", - "(V?)PMOVSXBDrr", - "(V?)PMOVSXBQrr", - "(V?)PMOVSXBWrr", - "(V?)PMOVSXDQrr", - "(V?)PMOVSXWDrr", - "(V?)PMOVSXWQrr", - "(V?)PMOVZXBDrr", - "(V?)PMOVZXBQrr", - "(V?)PMOVZXBWrr", - "(V?)PMOVZXDQrr", - "(V?)PMOVZXWDrr", - "(V?)PMOVZXWQrr", - "(V?)PSHUFDri", - "(V?)PSHUFHWri", - "(V?)PSHUFLWri", - "(V?)PSLLDQri", - "(V?)PSRLDQri", - "(V?)PUNPCKHBWrr", - "(V?)PUNPCKHDQrr", - "(V?)PUNPCKHQDQrr", - "(V?)PUNPCKHWDrr", - "(V?)PUNPCKLBWrr", - "(V?)PUNPCKLDQrr", - "(V?)PUNPCKLQDQrr", - "(V?)PUNPCKLWDrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "MMX_PABS(B|D|W)rr", + "MMX_PADDQirr", + "MMX_PALIGNRrri", + "MMX_PSIGN(B|D|W)rr")>; def SBWriteResGroup6 : SchedWriteRes<[SBPort015]> { let Latency = 1; @@ -608,12 +587,6 @@ def SBWriteResGroup21 : SchedWriteRes<[SBPort1]> { let ResourceCycles = [1]; } def: InstRW<[SBWriteResGroup21], (instregex "MMX_CVTPI2PSirr", - "MMX_PADD(B|D|W)irr", - "MMX_P(ADD|SUB)(U?)S(B|W)irr", - "MMX_PAVG(B|W)irr", - "MMX_PCMP(EQ|GT)(B|D|W)irr", - "MMX_P(MAX|MIN)(SW|UB)irr", - "MMX_PSUB(B|D|Q|W)irr", "PUSHFS64", "(V?)CVTDQ2PS(Y?)rr")>; @@ -884,7 +857,6 @@ def SBWriteResGroup51 : SchedWriteRes<[SBPort23,SBPort15]> { } def: InstRW<[SBWriteResGroup51], (instregex "MMX_PABS(B|D|W)rm", "MMX_PALIGNRrmi", - "MMX_PSHUFBrm", "MMX_PSIGN(B|D|W)rm")>; def SBWriteResGroup52 : SchedWriteRes<[SBPort23,SBPort015]> { @@ -893,11 +865,7 @@ def SBWriteResGroup52 : SchedWriteRes<[SBPort23,SBPort015]> { let ResourceCycles = [1,1]; } def: InstRW<[SBWriteResGroup52], (instregex "LODSL", - "LODSQ", - "MMX_PANDirm", - "MMX_PANDNirm", - "MMX_PORirm", - "MMX_PXORirm")>; + "LODSQ")>; def SBWriteResGroup53 : SchedWriteRes<[SBPort4,SBPort23]> { let Latency = 6; @@ -944,46 +912,7 @@ def SBWriteResGroup59 : SchedWriteRes<[SBPort23,SBPort15]> { let NumMicroOps = 2; let ResourceCycles = [1,1]; } -def: InstRW<[SBWriteResGroup59], (instregex "(V?)PACKSSDWrm", - "(V?)PACKSSWBrm", - "(V?)PACKUSDWrm", - "(V?)PACKUSWBrm", - "(V?)PALIGNRrmi", - "(V?)PMOVSXBDrm", - "(V?)PMOVSXBQrm", - "(V?)PMOVSXBWrm", - "(V?)PMOVSXDQrm", - "(V?)PMOVSXWDrm", - "(V?)PMOVSXWQrm", - "(V?)PMOVZXBDrm", - "(V?)PMOVZXBQrm", - "(V?)PMOVZXBWrm", - "(V?)PMOVZXDQrm", - "(V?)PMOVZXWDrm", - "(V?)PMOVZXWQrm", - "(V?)PSHUFDmi", - "(V?)PSHUFHWmi", - "(V?)PSHUFLWmi", - "(V?)PUNPCKHBWrm", - "(V?)PUNPCKHDQrm", - "(V?)PUNPCKHQDQrm", - "(V?)PUNPCKHWDrm", - "(V?)PUNPCKLBWrm", - "(V?)PUNPCKLDQrm", - "(V?)PUNPCKLQDQrm", - "(V?)PUNPCKLWDrm")>; - -def SBWriteResGroup59a : SchedWriteRes<[SBPort23,SBPort1]> { - let Latency = 8; - let NumMicroOps = 2; - let ResourceCycles = [1,1]; -} -def: InstRW<[SBWriteResGroup59a], (instregex "MMX_PADD(B|D|W)irm", - "MMX_P(ADD|SUB)(U?)S(B|W)irm", - "MMX_PAVG(B|W)irm", - "MMX_PCMP(EQ|GT)(B|D|W)irm", - "MMX_P(MAX|MIN)(SW|UB)irm", - "MMX_PSUB(B|D|Q|W)irm")>; +def: InstRW<[SBWriteResGroup59], (instregex "MMX_PADDQirm")>; def SBWriteResGroup62 : SchedWriteRes<[SBPort5,SBPort23]> { let Latency = 7; @@ -1060,13 +989,6 @@ def SBWriteResGroup77 : SchedWriteRes<[SBPort0,SBPort1,SBPort23]> { } def: InstRW<[SBWriteResGroup77], (instregex "(V?)(U?)COMI(SD|SS)rm")>; -def SBWriteResGroup80 : SchedWriteRes<[SBPort23,SBPort15]> { - let Latency = 8; - let NumMicroOps = 4; - let ResourceCycles = [1,3]; -} -def: InstRW<[SBWriteResGroup80], (instregex "MMX_PH(ADD|SUB)(D|SW|W)rm")>; - def SBWriteResGroup81 : SchedWriteRes<[SBPort23,SBPort015]> { let Latency = 8; let NumMicroOps = 4; @@ -1134,13 +1056,6 @@ def SBWriteResGroup88 : SchedWriteRes<[SBPort4,SBPort23,SBPort05,SBPort015]> { def: InstRW<[SBWriteResGroup88], (instregex "SHLD(16|32|64)mri8", "SHRD(16|32|64)mri8")>; -def SBWriteResGroup89_2 : SchedWriteRes<[SBPort0,SBPort23]> { - let Latency = 10; - let NumMicroOps = 2; - let ResourceCycles = [1,1]; -} -def: InstRW<[SBWriteResGroup89_2], (instregex "MMX_PSADBWirm")>; - def SBWriteResGroup90 : SchedWriteRes<[SBPort1,SBPort23]> { let Latency = 9; let NumMicroOps = 2; diff --git a/llvm/lib/Target/X86/X86SchedSkylakeClient.td b/llvm/lib/Target/X86/X86SchedSkylakeClient.td index d521864..9c5f7de 100644 --- a/llvm/lib/Target/X86/X86SchedSkylakeClient.td +++ b/llvm/lib/Target/X86/X86SchedSkylakeClient.td @@ -256,9 +256,11 @@ defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; -defm : SKLWriteResPair; // Vector integer ALU op, no logicals. +defm : SKLWriteResPair; // Vector integer ALU op, no logicals. +defm : SKLWriteResPair; // Vector integer ALU op, no logicals (XMM). defm : SKLWriteResPair; // Vector integer ALU op, no logicals (YMM/ZMM). -defm : SKLWriteResPair; // Vector integer and/or/xor. +defm : SKLWriteResPair; // Vector integer and/or/xor. +defm : SKLWriteResPair; // Vector integer and/or/xor (XMM). defm : SKLWriteResPair; // Vector integer and/or/xor (YMM/ZMM). defm : SKLWriteResPair; // Vector integer TEST instructions. defm : SKLWriteResPair; // Vector integer TEST instructions (YMM/ZMM). @@ -268,17 +270,20 @@ defm : SKLWriteResPair; // Vector int defm : SKLWriteResPair; // Vector PMULLD. defm : SKLWriteResPair; // Vector PMULLD (YMM/ZMM). defm : SKLWriteResPair; // Vector shuffles. +defm : SKLWriteResPair; // Vector shuffles (XMM). defm : SKLWriteResPair; // Vector shuffles (YMM/ZMM). -defm : SKLWriteResPair; // Vector shuffles. +defm : SKLWriteResPair; // Vector shuffles. +defm : SKLWriteResPair; // Vector shuffles (XMM). defm : SKLWriteResPair; // Vector shuffles (YMM/ZMM). defm : SKLWriteResPair; // Vector blends. defm : SKLWriteResPair; // Vector blends (YMM/ZMM). defm : SKLWriteResPair; // Vector variable blends. defm : SKLWriteResPair; // Vector variable blends (YMM/ZMM). defm : SKLWriteResPair; // Vector MPSAD. -defm : SKLWriteResPair; // Vector MPSAD. -defm : SKLWriteResPair; // Vector PSADBW. -defm : SKLWriteResPair; // Vector PSADBW. +defm : SKLWriteResPair; // Vector MPSAD (YMM/ZMM). +defm : SKLWriteResPair; // Vector PSADBW. +defm : SKLWriteResPair; // Vector PSADBW (XMM). +defm : SKLWriteResPair; // Vector PSADBW (YMM/ZMM). defm : SKLWriteResPair; // Vector PHMINPOS. // Vector integer shifts. @@ -450,7 +455,8 @@ def : WriteRes; defm : SKLWriteResPair; defm : SKLWriteResPair; -defm : SKLWriteResPair; +defm : SKLWriteResPair; +defm : SKLWriteResPair; defm : SKLWriteResPair; // Remaining instrs. @@ -497,15 +503,7 @@ def SKLWriteResGroup6 : SchedWriteRes<[SKLPort05]> { let ResourceCycles = [1]; } def: InstRW<[SKLWriteResGroup6], (instrs FINCSTP, FNOP)>; -def: InstRW<[SKLWriteResGroup6], (instregex "MMX_MOVQ64rr", - "MMX_PABS(B|D|W)rr", - "MMX_PADD(B|D|Q|W)irr", - "MMX_PANDNirr", - "MMX_PANDirr", - "MMX_PORirr", - "MMX_PSIGN(B|D|W)rr", - "MMX_PSUB(B|D|Q|W)irr", - "MMX_PXORirr")>; +def: InstRW<[SKLWriteResGroup6], (instregex "MMX_MOVQ64rr")>; def SKLWriteResGroup7 : SchedWriteRes<[SKLPort06]> { let Latency = 1; @@ -768,13 +766,6 @@ def SKLWriteResGroup36 : SchedWriteRes<[SKLPort5,SKLPort01]> { def: InstRW<[SKLWriteResGroup36], (instregex "(V?)PHADDSW(Y?)rr", "(V?)PHSUBSW(Y?)rr")>; -def SKLWriteResGroup37 : SchedWriteRes<[SKLPort5,SKLPort05]> { - let Latency = 3; - let NumMicroOps = 3; - let ResourceCycles = [2,1]; -} -def: InstRW<[SKLWriteResGroup37], (instregex "MMX_PH(ADD|SUB)(D|W)rr")>; - def SKLWriteResGroup39 : SchedWriteRes<[SKLPort5,SKLPort0156]> { let Latency = 3; let NumMicroOps = 3; @@ -1037,20 +1028,6 @@ def SKLWriteResGroup72 : SchedWriteRes<[SKLPort6,SKLPort23]> { def: InstRW<[SKLWriteResGroup72], (instregex "FARJMP64", "JMP(16|32|64)m")>; -def SKLWriteResGroup73 : SchedWriteRes<[SKLPort23,SKLPort05]> { - let Latency = 6; - let NumMicroOps = 2; - let ResourceCycles = [1,1]; -} -def: InstRW<[SKLWriteResGroup73], (instregex "MMX_PABS(B|D|W)rm", - "MMX_PADD(B|D|Q|W)irm", - "MMX_PANDNirm", - "MMX_PANDirm", - "MMX_PORirm", - "MMX_PSIGN(B|D|W)rm", - "MMX_PSUB(B|D|Q|W)irm", - "MMX_PXORirm")>; - def SKLWriteResGroup74 : SchedWriteRes<[SKLPort23,SKLPort06]> { let Latency = 6; let NumMicroOps = 2; @@ -1165,35 +1142,16 @@ def SKLWriteResGroup86 : SchedWriteRes<[SKLPort0,SKLPort5]> { def: InstRW<[SKLWriteResGroup86], (instregex "VCVTDQ2PDYrr")>; def SKLWriteResGroup88 : SchedWriteRes<[SKLPort5,SKLPort23]> { - let Latency = 7; - let NumMicroOps = 2; - let ResourceCycles = [1,1]; -} -def: InstRW<[SKLWriteResGroup88], (instregex "(V?)PACKSSDWrm", - "(V?)PACKSSWBrm", - "(V?)PACKUSDWrm", - "(V?)PACKUSWBrm", - "(V?)PALIGNRrmi", - "VPBROADCASTBrm", - "VPBROADCASTWrm", - "(V?)PSHUFDmi", - "(V?)PSHUFHWmi", - "(V?)PSHUFLWmi", - "(V?)PUNPCKHBWrm", - "(V?)PUNPCKHDQrm", - "(V?)PUNPCKHQDQrm", - "(V?)PUNPCKHWDrm", - "(V?)PUNPCKLBWrm", - "(V?)PUNPCKLDQrm", - "(V?)PUNPCKLQDQrm", - "(V?)PUNPCKLWDrm")>; - -def SKLWriteResGroup88a : SchedWriteRes<[SKLPort5,SKLPort23]> { let Latency = 6; let NumMicroOps = 2; let ResourceCycles = [1,1]; } -def: InstRW<[SKLWriteResGroup88a], (instregex "MMX_PSHUFBrm")>; +def: InstRW<[SKLWriteResGroup88], (instregex "(V?)PMOV(SX|ZX)BDrm", + "(V?)PMOV(SX|ZX)BQrm", + "(V?)PMOV(SX|ZX)BWrm", + "(V?)PMOV(SX|ZX)DQrm", + "(V?)PMOV(SX|ZX)WDrm", + "(V?)PMOV(SX|ZX)WQrm")>; def SKLWriteResGroup89 : SchedWriteRes<[SKLPort5,SKLPort01]> { let Latency = 7; @@ -1326,7 +1284,6 @@ def: InstRW<[SKLWriteResGroup108], (instregex "FCOM32m", "FCOM64m", "FCOMP32m", "FCOMP64m", - "MMX_PSADBWirm", // TODO - SKLWriteResGroup120?? "VPBROADCASTBYrm", "VPBROADCASTWYrm", "VPMOVSXBDYrm", @@ -1349,13 +1306,6 @@ def SKLWriteResGroup112 : SchedWriteRes<[SKLPort0,SKLPort5,SKLPort23]> { } def: InstRW<[SKLWriteResGroup112], (instregex "MMX_PH(ADD|SUB)SWrm")>; -def SKLWriteResGroup113 : SchedWriteRes<[SKLPort5,SKLPort23,SKLPort05]> { - let Latency = 8; - let NumMicroOps = 4; - let ResourceCycles = [2,1,1]; -} -def: InstRW<[SKLWriteResGroup113], (instregex "MMX_PH(ADD|SUB)(D|W)rm")>; - def SKLWriteResGroup114 : SchedWriteRes<[SKLPort4,SKLPort5,SKLPort237,SKLPort01]> { let Latency = 8; let NumMicroOps = 4; diff --git a/llvm/lib/Target/X86/X86SchedSkylakeServer.td b/llvm/lib/Target/X86/X86SchedSkylakeServer.td index 3f0b51d..59c773e 100755 --- a/llvm/lib/Target/X86/X86SchedSkylakeServer.td +++ b/llvm/lib/Target/X86/X86SchedSkylakeServer.td @@ -256,9 +256,11 @@ defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; -defm : SKXWriteResPair; // Vector integer ALU op, no logicals. +defm : SKXWriteResPair; // Vector integer ALU op, no logicals. +defm : SKXWriteResPair; // Vector integer ALU op, no logicals (XMM). defm : SKXWriteResPair; // Vector integer ALU op, no logicals (YMM/ZMM). -defm : SKXWriteResPair; // Vector integer and/or/xor. +defm : SKXWriteResPair; // Vector integer and/or/xor. +defm : SKXWriteResPair; // Vector integer and/or/xor (XMM). defm : SKXWriteResPair; // Vector integer and/or/xor (YMM/ZMM). defm : SKXWriteResPair; // Vector integer TEST instructions. defm : SKXWriteResPair; // Vector integer TEST instructions (YMM/ZMM). @@ -268,8 +270,10 @@ defm : SKXWriteResPair; // Vector in defm : SKXWriteResPair; // Vector PMULLD. defm : SKXWriteResPair; // Vector PMULLD (YMM/ZMM). defm : SKXWriteResPair; // Vector shuffles. +defm : SKXWriteResPair; // Vector shuffles (XMM). defm : SKXWriteResPair; // Vector shuffles (YMM/ZMM). -defm : SKXWriteResPair; // Vector variable shuffles. +defm : SKXWriteResPair; // Vector variable shuffles. +defm : SKXWriteResPair; // Vector variable shuffles (XMM). defm : SKXWriteResPair; // Vector variable shuffles (YMM/ZMM). defm : SKXWriteResPair; // Vector blends. defm : SKXWriteResPair; // Vector blends (YMM/ZMM). @@ -277,7 +281,8 @@ defm : SKXWriteResPair; // Vector var defm : SKXWriteResPair; // Vector variable blends (YMM/ZMM). defm : SKXWriteResPair; // Vector MPSAD. defm : SKXWriteResPair; // Vector MPSAD. -defm : SKXWriteResPair; // Vector PSADBW. +defm : SKXWriteResPair; // Vector PSADBW. +defm : SKXWriteResPair; // Vector PSADBW. defm : SKXWriteResPair; // Vector PSADBW. defm : SKXWriteResPair; // Vector PHMINPOS. @@ -450,7 +455,8 @@ def : WriteRes; defm : SKXWriteResPair; defm : SKXWriteResPair; -defm : SKXWriteResPair; +defm : SKXWriteResPair; +defm : SKXWriteResPair; defm : SKXWriteResPair; // Remaining instrs. @@ -511,15 +517,7 @@ def SKXWriteResGroup6 : SchedWriteRes<[SKXPort05]> { let ResourceCycles = [1]; } def: InstRW<[SKXWriteResGroup6], (instrs FINCSTP, FNOP)>; -def: InstRW<[SKXWriteResGroup6], (instregex "MMX_MOVQ64rr", - "MMX_PABS(B|D|W)rr", - "MMX_PADD(B|D|Q|W)irr", - "MMX_PANDNirr", - "MMX_PANDirr", - "MMX_PORirr", - "MMX_PSIGN(B|D|W)rr", - "MMX_PSUB(B|D|Q|W)irr", - "MMX_PXORirr")>; +def: InstRW<[SKXWriteResGroup6], (instregex "MMX_MOVQ64rr")>; def SKXWriteResGroup7 : SchedWriteRes<[SKXPort06]> { let Latency = 1; @@ -601,7 +599,6 @@ def: InstRW<[SKXWriteResGroup11], (instregex "FBSTPm", "MMX_MOVD64mr", "MMX_MOVNTQmr", "MMX_MOVQ64mr", - "MOVNTDQmr", "MOVNTI_64mr", "MOVNTImr", "ST_FP32m", @@ -847,13 +844,6 @@ def SKXWriteResGroup38 : SchedWriteRes<[SKXPort5,SKXPort01]> { } def: InstRW<[SKXWriteResGroup38], (instregex "(V?)PH(ADD|SUB)SW(Y?)rr")>; -def SKXWriteResGroup39 : SchedWriteRes<[SKXPort5,SKXPort05]> { - let Latency = 3; - let NumMicroOps = 3; - let ResourceCycles = [2,1]; -} -def: InstRW<[SKXWriteResGroup39], (instregex "MMX_PH(ADD|SUB)(D|W)rr")>; - def SKXWriteResGroup41 : SchedWriteRes<[SKXPort5,SKXPort0156]> { let Latency = 3; let NumMicroOps = 3; @@ -1250,20 +1240,6 @@ def SKXWriteResGroup76 : SchedWriteRes<[SKXPort6,SKXPort23]> { def: InstRW<[SKXWriteResGroup76], (instregex "FARJMP64", "JMP(16|32|64)m")>; -def SKXWriteResGroup77 : SchedWriteRes<[SKXPort23,SKXPort05]> { - let Latency = 6; - let NumMicroOps = 2; - let ResourceCycles = [1,1]; -} -def: InstRW<[SKXWriteResGroup77], (instregex "MMX_PABS(B|D|W)rm", - "MMX_PADD(B|D|Q|W)irm", - "MMX_PANDNirm", - "MMX_PANDirm", - "MMX_PORirm", - "MMX_PSIGN(B|D|W)rm", - "MMX_PSUB(B|D|Q|W)irm", - "MMX_PXORirm")>; - def SKXWriteResGroup78 : SchedWriteRes<[SKXPort23,SKXPort06]> { let Latency = 6; let NumMicroOps = 2; @@ -1394,52 +1370,19 @@ def SKXWriteResGroup92 : SchedWriteRes<[SKXPort5,SKXPort23]> { let ResourceCycles = [1,1]; } def: InstRW<[SKXWriteResGroup92], (instregex "VMOVSDZrm(b?)", - "VMOVSSZrm(b?)", - "VPACKSSDWZ128rm(b?)", - "(V?)PACKSSDWrm", - "VPACKSSWBZ128rm(b?)", - "(V?)PACKSSWBrm", - "VPACKUSDWZ128rm(b?)", - "(V?)PACKUSDWrm", - "VPACKUSWBZ128rm(b?)", - "(V?)PACKUSWBrm", - "VPALIGNRZ128rmi(b?)", - "(V?)PALIGNRrmi", - "VPBROADCASTBZ128m(b?)", - "VPBROADCASTBrm", - "VPBROADCASTWZ128m(b?)", - "VPBROADCASTWrm", - "VPSHUFDZ128m(b?)i", - "(V?)PSHUFDmi", - "VPSHUFHWZ128mi(b?)", - "(V?)PSHUFHWmi", - "VPSHUFLWZ128mi(b?)", - "(V?)PSHUFLWmi", - "VPSLLDQZ128rm(b?)", - "VPSRLDQZ128rm(b?)", - "VPUNPCKHBWZ128rm(b?)", - "(V?)PUNPCKHBWrm", - "VPUNPCKHDQZ128rm(b?)", - "(V?)PUNPCKHDQrm", - "VPUNPCKHQDQZ128rm(b?)", - "(V?)PUNPCKHQDQrm", - "VPUNPCKHWDZ128rm(b?)", - "(V?)PUNPCKHWDrm", - "VPUNPCKLBWZ128rm(b?)", - "(V?)PUNPCKLBWrm", - "VPUNPCKLDQZ128rm(b?)", - "(V?)PUNPCKLDQrm", - "VPUNPCKLQDQZ128rm(b?)", - "(V?)PUNPCKLQDQrm", - "VPUNPCKLWDZ128rm(b?)", - "(V?)PUNPCKLWDrm")>; + "VMOVSSZrm(b?)")>; def SKXWriteResGroup92a : SchedWriteRes<[SKXPort5,SKXPort23]> { let Latency = 6; let NumMicroOps = 2; let ResourceCycles = [1,1]; } -def: InstRW<[SKXWriteResGroup92a], (instregex "MMX_PSHUFBrm")>; +def: InstRW<[SKXWriteResGroup92a], (instregex "(V?)PMOV(SX|ZX)BDrm", + "(V?)PMOV(SX|ZX)BQrm", + "(V?)PMOV(SX|ZX)BWrm", + "(V?)PMOV(SX|ZX)DQrm", + "(V?)PMOV(SX|ZX)WDrm", + "(V?)PMOV(SX|ZX)WQrm")>; def SKXWriteResGroup93 : SchedWriteRes<[SKXPort5,SKXPort015]> { let Latency = 7; @@ -1676,7 +1619,6 @@ def SKXWriteResGroup119 : SchedWriteRes<[SKXPort5,SKXPort23]> { } def: InstRW<[SKXWriteResGroup119], (instregex "FCOM(P?)32m", "FCOM(P?)64m", - "MMX_PSADBWirm", "VFPCLASSSDrm(b?)", "VPBROADCASTBYrm", "VPBROADCASTB(Z|Z256)m(b?)", @@ -1751,13 +1693,6 @@ def SKXWriteResGroup123 : SchedWriteRes<[SKXPort0,SKXPort5,SKXPort23]> { } def: InstRW<[SKXWriteResGroup123], (instregex "MMX_PH(ADD|SUB)SWrm")>; -def SKXWriteResGroup124 : SchedWriteRes<[SKXPort5,SKXPort23,SKXPort05]> { - let Latency = 8; - let NumMicroOps = 4; - let ResourceCycles = [2,1,1]; -} -def: InstRW<[SKXWriteResGroup124], (instregex "MMX_PH(ADD|SUB)(D|W)rm")>; - def SKXWriteResGroup125 : SchedWriteRes<[SKXPort4,SKXPort5,SKXPort237,SKXPort015]> { let Latency = 8; let NumMicroOps = 4; diff --git a/llvm/lib/Target/X86/X86Schedule.td b/llvm/lib/Target/X86/X86Schedule.td index 3a5f324..937c349 100644 --- a/llvm/lib/Target/X86/X86Schedule.td +++ b/llvm/lib/Target/X86/X86Schedule.td @@ -194,6 +194,7 @@ class FMASC { X86FoldableSchedWrite Sched = WriteFAdd; } defm WriteFHAdd : X86SchedWritePair; defm WriteFHAddY : X86SchedWritePair; // YMM/ZMM. defm WritePHAdd : X86SchedWritePair; +defm WritePHAddX : X86SchedWritePair; // XMM. defm WritePHAddY : X86SchedWritePair; // YMM/ZMM. // Vector integer operations. @@ -205,10 +206,12 @@ def WriteVecMaskedStore : SchedWrite; def WriteVecMaskedStoreY : SchedWrite; def WriteVecMove : SchedWrite; -defm WriteVecALU : X86SchedWritePair; // Vector integer ALU op, no logicals. -defm WriteVecALUY : X86SchedWritePair; // Vector integer ALU op, no logicals (YMM/ZMM). -defm WriteVecLogic : X86SchedWritePair; // Vector integer and/or/xor logicals. -defm WriteVecLogicY: X86SchedWritePair; // Vector integer and/or/xor logicals (YMM/ZMM). +defm WriteVecALU : X86SchedWritePair; // Vector integer ALU op, no logicals. +defm WriteVecALUX : X86SchedWritePair; // Vector integer ALU op, no logicals (XMM). +defm WriteVecALUY : X86SchedWritePair; // Vector integer ALU op, no logicals (YMM/ZMM). +defm WriteVecLogic : X86SchedWritePair; // Vector integer and/or/xor logicals. +defm WriteVecLogicX : X86SchedWritePair; // Vector integer and/or/xor logicals (XMM). +defm WriteVecLogicY : X86SchedWritePair; // Vector integer and/or/xor logicals (YMM/ZMM). defm WriteVecTest : X86SchedWritePair; // Vector integer TEST instructions. defm WriteVecTestY : X86SchedWritePair; // Vector integer TEST instructions (YMM/ZMM). defm WriteVecShift : X86SchedWritePair; // Vector integer shifts (default). @@ -223,14 +226,17 @@ defm WriteVecIMulY : X86SchedWritePair; // Vector integer multiply (YMM/ZMM). defm WritePMULLD : X86SchedWritePair; // Vector PMULLD. defm WritePMULLDY : X86SchedWritePair; // Vector PMULLD (YMM/ZMM). defm WriteShuffle : X86SchedWritePair; // Vector shuffles. +defm WriteShuffleX : X86SchedWritePair; // Vector shuffles (XMM). defm WriteShuffleY : X86SchedWritePair; // Vector shuffles (YMM/ZMM). defm WriteVarShuffle : X86SchedWritePair; // Vector variable shuffles. +defm WriteVarShuffleX : X86SchedWritePair; // Vector variable shuffles (XMM). defm WriteVarShuffleY : X86SchedWritePair; // Vector variable shuffles (YMM/ZMM). defm WriteBlend : X86SchedWritePair; // Vector blends. defm WriteBlendY : X86SchedWritePair; // Vector blends (YMM/ZMM). defm WriteVarBlend : X86SchedWritePair; // Vector variable blends. defm WriteVarBlendY : X86SchedWritePair; // Vector variable blends (YMM/ZMM). defm WritePSADBW : X86SchedWritePair; // Vector PSADBW. +defm WritePSADBWX : X86SchedWritePair; // Vector PSADBW (XMM). defm WritePSADBWY : X86SchedWritePair; // Vector PSADBW (YMM/ZMM). defm WriteMPSAD : X86SchedWritePair; // Vector MPSAD. defm WriteMPSADY : X86SchedWritePair; // Vector MPSAD (YMM/ZMM). @@ -356,11 +362,11 @@ def SchedWriteFVarBlend WriteFVarBlendY, WriteFVarBlendY>; def SchedWriteVecALU - : X86SchedWriteWidths; + : X86SchedWriteWidths; def SchedWritePHAdd - : X86SchedWriteWidths; + : X86SchedWriteWidths; def SchedWriteVecLogic - : X86SchedWriteWidths; def SchedWriteVecTest : X86SchedWriteWidths; def SchedWritePSADBW - : X86SchedWriteWidths; def SchedWriteShuffle - : X86SchedWriteWidths; def SchedWriteVarShuffle - : X86SchedWriteWidths; def SchedWriteBlend : X86SchedWriteWidths; diff --git a/llvm/lib/Target/X86/X86ScheduleAtom.td b/llvm/lib/Target/X86/X86ScheduleAtom.td index 91ebc5f..00721ec 100644 --- a/llvm/lib/Target/X86/X86ScheduleAtom.td +++ b/llvm/lib/Target/X86/X86ScheduleAtom.td @@ -281,8 +281,10 @@ def : WriteRes; def : WriteRes; defm : AtomWriteResPair; +defm : AtomWriteResPair; defm : AtomWriteResPair; defm : AtomWriteResPair; +defm : AtomWriteResPair; defm : AtomWriteResPair; defm : AtomWriteResPair; defm : AtomWriteResPair; @@ -300,11 +302,14 @@ defm : AtomWriteResPair; defm : AtomWriteResPair; defm : AtomWriteResPair; defm : AtomWriteResPair; -defm : AtomWriteResPair; +defm : AtomWriteResPair; +defm : AtomWriteResPair; defm : AtomWriteResPair; defm : AtomWriteResPair; +defm : AtomWriteResPair; defm : AtomWriteResPair; -defm : AtomWriteResPair; +defm : AtomWriteResPair; +defm : AtomWriteResPair; defm : AtomWriteResPair; defm : AtomWriteResPair; // NOTE: Doesn't exist on Atom. defm : AtomWriteResPair; // NOTE: Doesn't exist on Atom. @@ -355,7 +360,8 @@ defm : AtomWriteResPair; // NOTE: Do defm : AtomWriteResPair; defm : AtomWriteResPair; -defm : AtomWriteResPair; +defm : AtomWriteResPair; +defm : AtomWriteResPair; defm : AtomWriteResPair; //////////////////////////////////////////////////////////////////////////////// @@ -387,7 +393,6 @@ def : InstRW<[AtomWrite0_1], (instrs FXAM, LD_Frr, MOVSX64rr32, MMX_MOVD64rr, MMX_MOVD64to64rr, - MMX_PSHUFBrr, MMX_PSHUFBrm, MOVDI2PDIrr, MOVDI2SSrr, MOV64toPQIrr, @@ -492,7 +497,7 @@ def : InstRW<[AtomWrite01_3], (instrs CLD, LDDQUrm, POP16rmm, POP32rmm, POP64rmm)>; def : InstRW<[AtomWrite01_3], (instregex "XADD(8|16|32|64)rm", "XCHG(8|16|32|64)rm", - "(MMX_)?PH(ADD|SUB)Drr", + "PH(ADD|SUB)Drr", "MOV(S|Z)X16rm8", "MMX_P(ADD|SUB)Qirm", "MOV(UPS|UPD|DQU)rm", @@ -506,9 +511,8 @@ def : InstRW<[AtomWrite01_4], (instrs CBW, CWD, CWDE, CDQ, CDQE, CQO, JCXZ, JECXZ, JRCXZ, SHLD32mrCL, SHRD32mrCL, SHLD32mri8, SHRD32mri8, - LD_F80m, - MMX_PSADBWirr, MMX_PSADBWirm)>; -def : InstRW<[AtomWrite01_4], (instregex "(MMX_)?PH(ADD|SUB)Drm", + LD_F80m)>; +def : InstRW<[AtomWrite01_4], (instregex "PH(ADD|SUB)Drm", "(MMX_)?PEXTRWrr(_REV)?")>; def AtomWrite01_5 : SchedWriteRes<[AtomPort01]> { diff --git a/llvm/lib/Target/X86/X86ScheduleBtVer2.td b/llvm/lib/Target/X86/X86ScheduleBtVer2.td index 4004eff..18d4d324 100644 --- a/llvm/lib/Target/X86/X86ScheduleBtVer2.td +++ b/llvm/lib/Target/X86/X86ScheduleBtVer2.td @@ -407,6 +407,7 @@ defm : X86WriteRes; defm : JWriteResFpuPair; +defm : JWriteResFpuPair; defm : JWriteResFpuPair; defm : JWriteResFpuPair; defm : JWriteResFpuPair; @@ -422,17 +423,21 @@ defm : JWriteResFpuPair; defm : JWriteResFpuPair; defm : JWriteResFpuPair; +defm : JWriteResFpuPair; defm : JWriteResFpuPair; defm : JWriteResFpuPair; defm : JWriteResFpuPair; +defm : JWriteResFpuPair; defm : JWriteResFpuPair; defm : JWriteResFpuPair; +defm : JWriteResFpuPair; defm : JWriteResFpuPair; defm : JWriteResFpuPair; defm : JWriteResFpuPair; defm : JWriteResFpuPair; defm : JWriteResFpuPair; defm : JWriteResFpuPair; +defm : JWriteResFpuPair; defm : JWriteResFpuPair; // NOTE: Doesn't exist on Jaguar. defm : JWriteResFpuPair; defm : JWriteResYMMPair; @@ -482,6 +487,7 @@ defm : JWriteResFpuPair; defm : JWriteResFpuPair; defm : JWriteResYMMPair; defm : JWriteResFpuPair; +defm : JWriteResFpuPair; defm : JWriteResFpuPair; //////////////////////////////////////////////////////////////////////////////// diff --git a/llvm/lib/Target/X86/X86ScheduleSLM.td b/llvm/lib/Target/X86/X86ScheduleSLM.td index 9243074..ef5a0f3 100644 --- a/llvm/lib/Target/X86/X86ScheduleSLM.td +++ b/llvm/lib/Target/X86/X86ScheduleSLM.td @@ -219,10 +219,12 @@ defm : SLMWriteResPair; defm : SLMWriteResPair; defm : SLMWriteResPair; defm : SLMWriteResPair; +defm : SLMWriteResPair; defm : SLMWriteResPair; defm : SLMWriteResPair; defm : SLMWriteResPair; defm : SLMWriteResPair; +defm : SLMWriteResPair; defm : SLMWriteResPair; defm : SLMWriteResPair; defm : SLMWriteResPair; @@ -233,13 +235,16 @@ defm : SLMWriteResPair; defm : SLMWriteResPair; defm : SLMWriteResPair; defm : SLMWriteResPair; +defm : SLMWriteResPair; defm : SLMWriteResPair; +defm : SLMWriteResPair; defm : SLMWriteResPair; defm : SLMWriteResPair; defm : SLMWriteResPair; defm : SLMWriteResPair; defm : SLMWriteResPair; defm : SLMWriteResPair; +defm : SLMWriteResPair; defm : SLMWriteResPair; defm : SLMWriteResPair; @@ -260,6 +265,7 @@ def : WriteRes { defm : SLMWriteResPair; defm : SLMWriteResPair; defm : SLMWriteResPair; +defm : SLMWriteResPair; defm : SLMWriteResPair; // String instructions. diff --git a/llvm/lib/Target/X86/X86ScheduleZnver1.td b/llvm/lib/Target/X86/X86ScheduleZnver1.td index b7abbee..6f2448f 100644 --- a/llvm/lib/Target/X86/X86ScheduleZnver1.td +++ b/llvm/lib/Target/X86/X86ScheduleZnver1.td @@ -281,10 +281,12 @@ defm : ZnWriteResFpuPair; defm : ZnWriteResFpuPair; defm : ZnWriteResFpuPair; defm : ZnWriteResFpuPair; +defm : ZnWriteResFpuPair; defm : ZnWriteResFpuPair; defm : ZnWriteResFpuPair; defm : ZnWriteResFpuPair; defm : ZnWriteResFpuPair; +defm : ZnWriteResFpuPair; defm : ZnWriteResFpuPair; defm : ZnWriteResFpuPair; defm : ZnWriteResFpuPair; @@ -292,14 +294,17 @@ defm : ZnWriteResFpuPair; defm : ZnWriteResFpuPair; // FIXME defm : ZnWriteResFpuPair; // FIXME defm : ZnWriteResFpuPair; +defm : ZnWriteResFpuPair; defm : ZnWriteResFpuPair; defm : ZnWriteResFpuPair; +defm : ZnWriteResFpuPair; defm : ZnWriteResFpuPair; defm : ZnWriteResFpuPair; defm : ZnWriteResFpuPair; defm : ZnWriteResFpuPair; defm : ZnWriteResFpuPair; defm : ZnWriteResFpuPair; +defm : ZnWriteResFpuPair; defm : ZnWriteResFpuPair; defm : ZnWriteResFpuPair; @@ -1046,6 +1051,8 @@ def : InstRW<[WriteMicrocoded], (instregex "VPGATHER(Q|D)(Q|D)(Y?)rm")>; // PHADD|PHSUB (S) W/D. def : SchedAlias; def : SchedAlias; +def : SchedAlias; +def : SchedAlias; def : SchedAlias; def : SchedAlias; diff --git a/llvm/test/CodeGen/X86/3dnow-schedule.ll b/llvm/test/CodeGen/X86/3dnow-schedule.ll index 6de1626..2c33d68 100644 --- a/llvm/test/CodeGen/X86/3dnow-schedule.ll +++ b/llvm/test/CodeGen/X86/3dnow-schedule.ll @@ -14,8 +14,8 @@ declare void @llvm.x86.mmx.femms() nounwind readnone define i64 @test_pavgusb(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; CHECK-LABEL: test_pavgusb: ; CHECK: # %bb.0: -; CHECK-NEXT: pavgusb %mm1, %mm0 # sched: [1:0.50] -; CHECK-NEXT: pavgusb (%rdi), %mm0 # sched: [7:0.50] +; CHECK-NEXT: pavgusb %mm1, %mm0 # sched: [3:1.00] +; CHECK-NEXT: pavgusb (%rdi), %mm0 # sched: [8:1.00] ; CHECK-NEXT: movq %mm0, %rax # sched: [1:0.33] ; CHECK-NEXT: retq # sched: [1:1.00] %1 = call x86_mmx @llvm.x86.3dnow.pavgusb(x86_mmx %a0, x86_mmx %a1) diff --git a/llvm/test/CodeGen/X86/avx2-schedule.ll b/llvm/test/CodeGen/X86/avx2-schedule.ll index 60dc340..26c318e 100644 --- a/llvm/test/CodeGen/X86/avx2-schedule.ll +++ b/llvm/test/CodeGen/X86/avx2-schedule.ll @@ -1761,8 +1761,8 @@ define <16 x i16> @test_pblendw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) define <16 x i8> @test_pbroadcastb(<16 x i8> %a0, <16 x i8> *%a1) { ; GENERIC-LABEL: test_pbroadcastb: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpbroadcastb %xmm0, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: vpbroadcastb (%rdi), %xmm1 # sched: [6:1.00] +; GENERIC-NEXT: vpbroadcastb %xmm0, %xmm0 # sched: [1:0.50] +; GENERIC-NEXT: vpbroadcastb (%rdi), %xmm1 # sched: [7:0.50] ; GENERIC-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -1811,7 +1811,7 @@ define <32 x i8> @test_pbroadcastb_ymm(<32 x i8> %a0, <32 x i8> *%a1) { ; GENERIC-LABEL: test_pbroadcastb_ymm: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpbroadcastb %xmm0, %ymm0 # sched: [1:1.00] -; GENERIC-NEXT: vpbroadcastb (%rdi), %ymm1 # sched: [6:1.00] +; GENERIC-NEXT: vpbroadcastb (%rdi), %ymm1 # sched: [7:0.50] ; GENERIC-NEXT: vpaddb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -1859,8 +1859,8 @@ define <32 x i8> @test_pbroadcastb_ymm(<32 x i8> %a0, <32 x i8> *%a1) { define <4 x i32> @test_pbroadcastd(<4 x i32> %a0, <4 x i32> *%a1) { ; GENERIC-LABEL: test_pbroadcastd: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpbroadcastd %xmm0, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: vpbroadcastd (%rdi), %xmm1 # sched: [6:1.00] +; GENERIC-NEXT: vpbroadcastd %xmm0, %xmm0 # sched: [1:0.50] +; GENERIC-NEXT: vpbroadcastd (%rdi), %xmm1 # sched: [7:0.50] ; GENERIC-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -1909,7 +1909,7 @@ define <8 x i32> @test_pbroadcastd_ymm(<8 x i32> %a0, <8 x i32> *%a1) { ; GENERIC-LABEL: test_pbroadcastd_ymm: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpbroadcastd %xmm0, %ymm0 # sched: [1:1.00] -; GENERIC-NEXT: vpbroadcastd (%rdi), %ymm1 # sched: [6:1.00] +; GENERIC-NEXT: vpbroadcastd (%rdi), %ymm1 # sched: [7:0.50] ; GENERIC-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -1957,8 +1957,8 @@ define <8 x i32> @test_pbroadcastd_ymm(<8 x i32> %a0, <8 x i32> *%a1) { define <2 x i64> @test_pbroadcastq(<2 x i64> %a0, <2 x i64> *%a1) { ; GENERIC-LABEL: test_pbroadcastq: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpbroadcastq %xmm0, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: vpbroadcastq (%rdi), %xmm1 # sched: [6:1.00] +; GENERIC-NEXT: vpbroadcastq %xmm0, %xmm0 # sched: [1:0.50] +; GENERIC-NEXT: vpbroadcastq (%rdi), %xmm1 # sched: [7:0.50] ; GENERIC-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -2007,7 +2007,7 @@ define <4 x i64> @test_pbroadcastq_ymm(<4 x i64> %a0, <4 x i64> *%a1) { ; GENERIC-LABEL: test_pbroadcastq_ymm: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpbroadcastq %xmm0, %ymm0 # sched: [1:1.00] -; GENERIC-NEXT: vpbroadcastq (%rdi), %ymm1 # sched: [6:1.00] +; GENERIC-NEXT: vpbroadcastq (%rdi), %ymm1 # sched: [7:0.50] ; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -2055,8 +2055,8 @@ define <4 x i64> @test_pbroadcastq_ymm(<4 x i64> %a0, <4 x i64> *%a1) { define <8 x i16> @test_pbroadcastw(<8 x i16> %a0, <8 x i16> *%a1) { ; GENERIC-LABEL: test_pbroadcastw: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpbroadcastw %xmm0, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: vpbroadcastw (%rdi), %xmm1 # sched: [6:1.00] +; GENERIC-NEXT: vpbroadcastw %xmm0, %xmm0 # sched: [1:0.50] +; GENERIC-NEXT: vpbroadcastw (%rdi), %xmm1 # sched: [7:0.50] ; GENERIC-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -2105,7 +2105,7 @@ define <16 x i16> @test_pbroadcastw_ymm(<16 x i16> %a0, <16 x i16> *%a1) { ; GENERIC-LABEL: test_pbroadcastw_ymm: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpbroadcastw %xmm0, %ymm0 # sched: [1:1.00] -; GENERIC-NEXT: vpbroadcastw (%rdi), %ymm1 # sched: [6:1.00] +; GENERIC-NEXT: vpbroadcastw (%rdi), %ymm1 # sched: [7:0.50] ; GENERIC-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; diff --git a/llvm/test/CodeGen/X86/avx512-shuffle-schedule.ll b/llvm/test/CodeGen/X86/avx512-shuffle-schedule.ll index 5c9dfad..a210b63 100755 --- a/llvm/test/CodeGen/X86/avx512-shuffle-schedule.ll +++ b/llvm/test/CodeGen/X86/avx512-shuffle-schedule.ll @@ -5657,7 +5657,7 @@ define <8 x i16> @test_masked_8xi16_perm_high_mask0(<8 x i16> %vec, <8 x i16> %v ; GENERIC-LABEL: test_masked_8xi16_perm_high_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmw %xmm2, %xmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufhw {{.*#+}} xmm1 {%k1} = xmm0[0,1,2,3,6,5,7,6] sched: [1:1.00] +; GENERIC-NEXT: vpshufhw {{.*#+}} xmm1 {%k1} = xmm0[0,1,2,3,6,5,7,6] sched: [1:0.50] ; GENERIC-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -5677,7 +5677,7 @@ define <8 x i16> @test_masked_z_8xi16_perm_high_mask0(<8 x i16> %vec, <8 x i16> ; GENERIC-LABEL: test_masked_z_8xi16_perm_high_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} {z} = xmm0[0,1,2,3,6,5,7,6] sched: [1:1.00] +; GENERIC-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} {z} = xmm0[0,1,2,3,6,5,7,6] sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_8xi16_perm_high_mask0: @@ -5694,7 +5694,7 @@ define <8 x i16> @test_masked_8xi16_perm_low_mask1(<8 x i16> %vec, <8 x i16> %ve ; GENERIC-LABEL: test_masked_8xi16_perm_low_mask1: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmw %xmm2, %xmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshuflw {{.*#+}} xmm1 {%k1} = xmm0[0,3,0,0,4,5,6,7] sched: [1:1.00] +; GENERIC-NEXT: vpshuflw {{.*#+}} xmm1 {%k1} = xmm0[0,3,0,0,4,5,6,7] sched: [1:0.50] ; GENERIC-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -5714,7 +5714,7 @@ define <8 x i16> @test_masked_z_8xi16_perm_low_mask1(<8 x i16> %vec, <8 x i16> % ; GENERIC-LABEL: test_masked_z_8xi16_perm_low_mask1: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} {z} = xmm0[0,3,0,0,4,5,6,7] sched: [1:1.00] +; GENERIC-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} {z} = xmm0[0,3,0,0,4,5,6,7] sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_8xi16_perm_low_mask1: @@ -5731,7 +5731,7 @@ define <8 x i16> @test_masked_8xi16_perm_high_mask2(<8 x i16> %vec, <8 x i16> %v ; GENERIC-LABEL: test_masked_8xi16_perm_high_mask2: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmw %xmm2, %xmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufhw {{.*#+}} xmm1 {%k1} = xmm0[0,1,2,3,5,4,4,5] sched: [1:1.00] +; GENERIC-NEXT: vpshufhw {{.*#+}} xmm1 {%k1} = xmm0[0,1,2,3,5,4,4,5] sched: [1:0.50] ; GENERIC-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -5751,7 +5751,7 @@ define <8 x i16> @test_masked_z_8xi16_perm_high_mask2(<8 x i16> %vec, <8 x i16> ; GENERIC-LABEL: test_masked_z_8xi16_perm_high_mask2: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} {z} = xmm0[0,1,2,3,5,4,4,5] sched: [1:1.00] +; GENERIC-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} {z} = xmm0[0,1,2,3,5,4,4,5] sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_8xi16_perm_high_mask2: @@ -5781,7 +5781,7 @@ define <8 x i16> @test_masked_8xi16_perm_low_mask3(<8 x i16> %vec, <8 x i16> %ve ; GENERIC-LABEL: test_masked_8xi16_perm_low_mask3: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmw %xmm2, %xmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshuflw {{.*#+}} xmm1 {%k1} = xmm0[2,1,1,1,4,5,6,7] sched: [1:1.00] +; GENERIC-NEXT: vpshuflw {{.*#+}} xmm1 {%k1} = xmm0[2,1,1,1,4,5,6,7] sched: [1:0.50] ; GENERIC-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -5801,7 +5801,7 @@ define <8 x i16> @test_masked_z_8xi16_perm_low_mask3(<8 x i16> %vec, <8 x i16> % ; GENERIC-LABEL: test_masked_z_8xi16_perm_low_mask3: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} {z} = xmm0[2,1,1,1,4,5,6,7] sched: [1:1.00] +; GENERIC-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} {z} = xmm0[2,1,1,1,4,5,6,7] sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_8xi16_perm_low_mask3: @@ -5818,7 +5818,7 @@ define <8 x i16> @test_masked_8xi16_perm_high_mask4(<8 x i16> %vec, <8 x i16> %v ; GENERIC-LABEL: test_masked_8xi16_perm_high_mask4: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmw %xmm2, %xmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufhw {{.*#+}} xmm1 {%k1} = xmm0[0,1,2,3,5,5,7,6] sched: [1:1.00] +; GENERIC-NEXT: vpshufhw {{.*#+}} xmm1 {%k1} = xmm0[0,1,2,3,5,5,7,6] sched: [1:0.50] ; GENERIC-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -5838,7 +5838,7 @@ define <8 x i16> @test_masked_z_8xi16_perm_high_mask4(<8 x i16> %vec, <8 x i16> ; GENERIC-LABEL: test_masked_z_8xi16_perm_high_mask4: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} {z} = xmm0[0,1,2,3,5,5,7,6] sched: [1:1.00] +; GENERIC-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} {z} = xmm0[0,1,2,3,5,5,7,6] sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_8xi16_perm_high_mask4: @@ -5855,7 +5855,7 @@ define <8 x i16> @test_masked_8xi16_perm_low_mask5(<8 x i16> %vec, <8 x i16> %ve ; GENERIC-LABEL: test_masked_8xi16_perm_low_mask5: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmw %xmm2, %xmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshuflw {{.*#+}} xmm1 {%k1} = xmm0[3,3,2,1,4,5,6,7] sched: [1:1.00] +; GENERIC-NEXT: vpshuflw {{.*#+}} xmm1 {%k1} = xmm0[3,3,2,1,4,5,6,7] sched: [1:0.50] ; GENERIC-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -5875,7 +5875,7 @@ define <8 x i16> @test_masked_z_8xi16_perm_low_mask5(<8 x i16> %vec, <8 x i16> % ; GENERIC-LABEL: test_masked_z_8xi16_perm_low_mask5: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} {z} = xmm0[3,3,2,1,4,5,6,7] sched: [1:1.00] +; GENERIC-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} {z} = xmm0[3,3,2,1,4,5,6,7] sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_8xi16_perm_low_mask5: @@ -5905,7 +5905,7 @@ define <8 x i16> @test_masked_8xi16_perm_high_mask6(<8 x i16> %vec, <8 x i16> %v ; GENERIC-LABEL: test_masked_8xi16_perm_high_mask6: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmw %xmm2, %xmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufhw {{.*#+}} xmm1 {%k1} = xmm0[0,1,2,3,6,5,6,5] sched: [1:1.00] +; GENERIC-NEXT: vpshufhw {{.*#+}} xmm1 {%k1} = xmm0[0,1,2,3,6,5,6,5] sched: [1:0.50] ; GENERIC-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -5925,7 +5925,7 @@ define <8 x i16> @test_masked_z_8xi16_perm_high_mask6(<8 x i16> %vec, <8 x i16> ; GENERIC-LABEL: test_masked_z_8xi16_perm_high_mask6: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} {z} = xmm0[0,1,2,3,6,5,6,5] sched: [1:1.00] +; GENERIC-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} {z} = xmm0[0,1,2,3,6,5,6,5] sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_8xi16_perm_high_mask6: @@ -5942,7 +5942,7 @@ define <8 x i16> @test_masked_8xi16_perm_low_mask7(<8 x i16> %vec, <8 x i16> %ve ; GENERIC-LABEL: test_masked_8xi16_perm_low_mask7: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmw %xmm2, %xmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshuflw {{.*#+}} xmm1 {%k1} = xmm0[1,0,2,0,4,5,6,7] sched: [1:1.00] +; GENERIC-NEXT: vpshuflw {{.*#+}} xmm1 {%k1} = xmm0[1,0,2,0,4,5,6,7] sched: [1:0.50] ; GENERIC-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -5962,7 +5962,7 @@ define <8 x i16> @test_masked_z_8xi16_perm_low_mask7(<8 x i16> %vec, <8 x i16> % ; GENERIC-LABEL: test_masked_z_8xi16_perm_low_mask7: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} {z} = xmm0[1,0,2,0,4,5,6,7] sched: [1:1.00] +; GENERIC-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} {z} = xmm0[1,0,2,0,4,5,6,7] sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_8xi16_perm_low_mask7: @@ -5993,7 +5993,7 @@ define <8 x i16> @test_masked_8xi16_perm_high_mem_mask0(<8 x i16>* %vp, <8 x i16 ; GENERIC-LABEL: test_masked_8xi16_perm_high_mem_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} = mem[0,1,2,3,7,7,4,6] sched: [6:1.00] +; GENERIC-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} = mem[0,1,2,3,7,7,4,6] sched: [7:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_8xi16_perm_high_mem_mask0: @@ -6012,7 +6012,7 @@ define <8 x i16> @test_masked_z_8xi16_perm_high_mem_mask0(<8 x i16>* %vp, <8 x i ; GENERIC-LABEL: test_masked_z_8xi16_perm_high_mem_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmw %xmm0, %xmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} {z} = mem[0,1,2,3,7,7,4,6] sched: [6:1.00] +; GENERIC-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} {z} = mem[0,1,2,3,7,7,4,6] sched: [7:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_8xi16_perm_high_mem_mask0: @@ -6031,7 +6031,7 @@ define <8 x i16> @test_masked_8xi16_perm_low_mem_mask1(<8 x i16>* %vp, <8 x i16> ; GENERIC-LABEL: test_masked_8xi16_perm_low_mem_mask1: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} = mem[1,3,3,2,4,5,6,7] sched: [6:1.00] +; GENERIC-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} = mem[1,3,3,2,4,5,6,7] sched: [7:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_8xi16_perm_low_mem_mask1: @@ -6050,7 +6050,7 @@ define <8 x i16> @test_masked_z_8xi16_perm_low_mem_mask1(<8 x i16>* %vp, <8 x i1 ; GENERIC-LABEL: test_masked_z_8xi16_perm_low_mem_mask1: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmw %xmm0, %xmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} {z} = mem[1,3,3,2,4,5,6,7] sched: [6:1.00] +; GENERIC-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} {z} = mem[1,3,3,2,4,5,6,7] sched: [7:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_8xi16_perm_low_mem_mask1: @@ -6069,7 +6069,7 @@ define <8 x i16> @test_masked_8xi16_perm_high_mem_mask2(<8 x i16>* %vp, <8 x i16 ; GENERIC-LABEL: test_masked_8xi16_perm_high_mem_mask2: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} = mem[0,1,2,3,6,6,5,7] sched: [6:1.00] +; GENERIC-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} = mem[0,1,2,3,6,6,5,7] sched: [7:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_8xi16_perm_high_mem_mask2: @@ -6088,7 +6088,7 @@ define <8 x i16> @test_masked_z_8xi16_perm_high_mem_mask2(<8 x i16>* %vp, <8 x i ; GENERIC-LABEL: test_masked_z_8xi16_perm_high_mem_mask2: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmw %xmm0, %xmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} {z} = mem[0,1,2,3,6,6,5,7] sched: [6:1.00] +; GENERIC-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} {z} = mem[0,1,2,3,6,6,5,7] sched: [7:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_8xi16_perm_high_mem_mask2: @@ -6121,7 +6121,7 @@ define <8 x i16> @test_masked_8xi16_perm_low_mem_mask3(<8 x i16>* %vp, <8 x i16> ; GENERIC-LABEL: test_masked_8xi16_perm_low_mem_mask3: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} = mem[3,1,2,0,4,5,6,7] sched: [6:1.00] +; GENERIC-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} = mem[3,1,2,0,4,5,6,7] sched: [7:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_8xi16_perm_low_mem_mask3: @@ -6140,7 +6140,7 @@ define <8 x i16> @test_masked_z_8xi16_perm_low_mem_mask3(<8 x i16>* %vp, <8 x i1 ; GENERIC-LABEL: test_masked_z_8xi16_perm_low_mem_mask3: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmw %xmm0, %xmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} {z} = mem[3,1,2,0,4,5,6,7] sched: [6:1.00] +; GENERIC-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} {z} = mem[3,1,2,0,4,5,6,7] sched: [7:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_8xi16_perm_low_mem_mask3: @@ -6159,7 +6159,7 @@ define <8 x i16> @test_masked_8xi16_perm_high_mem_mask4(<8 x i16>* %vp, <8 x i16 ; GENERIC-LABEL: test_masked_8xi16_perm_high_mem_mask4: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} = mem[0,1,2,3,7,6,7,5] sched: [6:1.00] +; GENERIC-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} = mem[0,1,2,3,7,6,7,5] sched: [7:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_8xi16_perm_high_mem_mask4: @@ -6178,7 +6178,7 @@ define <8 x i16> @test_masked_z_8xi16_perm_high_mem_mask4(<8 x i16>* %vp, <8 x i ; GENERIC-LABEL: test_masked_z_8xi16_perm_high_mem_mask4: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmw %xmm0, %xmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} {z} = mem[0,1,2,3,7,6,7,5] sched: [6:1.00] +; GENERIC-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} {z} = mem[0,1,2,3,7,6,7,5] sched: [7:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_8xi16_perm_high_mem_mask4: @@ -6197,7 +6197,7 @@ define <8 x i16> @test_masked_8xi16_perm_low_mem_mask5(<8 x i16>* %vp, <8 x i16> ; GENERIC-LABEL: test_masked_8xi16_perm_low_mem_mask5: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} = mem[2,1,3,2,4,5,6,7] sched: [6:1.00] +; GENERIC-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} = mem[2,1,3,2,4,5,6,7] sched: [7:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_8xi16_perm_low_mem_mask5: @@ -6216,7 +6216,7 @@ define <8 x i16> @test_masked_z_8xi16_perm_low_mem_mask5(<8 x i16>* %vp, <8 x i1 ; GENERIC-LABEL: test_masked_z_8xi16_perm_low_mem_mask5: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmw %xmm0, %xmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} {z} = mem[2,1,3,2,4,5,6,7] sched: [6:1.00] +; GENERIC-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} {z} = mem[2,1,3,2,4,5,6,7] sched: [7:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_8xi16_perm_low_mem_mask5: @@ -6249,7 +6249,7 @@ define <8 x i16> @test_masked_8xi16_perm_high_mem_mask6(<8 x i16>* %vp, <8 x i16 ; GENERIC-LABEL: test_masked_8xi16_perm_high_mem_mask6: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} = mem[0,1,2,3,7,4,4,4] sched: [6:1.00] +; GENERIC-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} = mem[0,1,2,3,7,4,4,4] sched: [7:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_8xi16_perm_high_mem_mask6: @@ -6268,7 +6268,7 @@ define <8 x i16> @test_masked_z_8xi16_perm_high_mem_mask6(<8 x i16>* %vp, <8 x i ; GENERIC-LABEL: test_masked_z_8xi16_perm_high_mem_mask6: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmw %xmm0, %xmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} {z} = mem[0,1,2,3,7,4,4,4] sched: [6:1.00] +; GENERIC-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} {z} = mem[0,1,2,3,7,4,4,4] sched: [7:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_8xi16_perm_high_mem_mask6: @@ -6287,7 +6287,7 @@ define <8 x i16> @test_masked_8xi16_perm_low_mem_mask7(<8 x i16>* %vp, <8 x i16> ; GENERIC-LABEL: test_masked_8xi16_perm_low_mem_mask7: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} = mem[0,3,3,1,4,5,6,7] sched: [6:1.00] +; GENERIC-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} = mem[0,3,3,1,4,5,6,7] sched: [7:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_8xi16_perm_low_mem_mask7: @@ -6306,7 +6306,7 @@ define <8 x i16> @test_masked_z_8xi16_perm_low_mem_mask7(<8 x i16>* %vp, <8 x i1 ; GENERIC-LABEL: test_masked_z_8xi16_perm_low_mem_mask7: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmw %xmm0, %xmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} {z} = mem[0,3,3,1,4,5,6,7] sched: [6:1.00] +; GENERIC-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} {z} = mem[0,3,3,1,4,5,6,7] sched: [7:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_8xi16_perm_low_mem_mask7: @@ -7704,7 +7704,7 @@ define <4 x i32> @test_masked_4xi32_perm_mask0(<4 x i32> %vec, <4 x i32> %vec2, ; GENERIC-LABEL: test_masked_4xi32_perm_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufd {{.*#+}} xmm1 {%k1} = xmm0[2,3,3,0] sched: [1:1.00] +; GENERIC-NEXT: vpshufd {{.*#+}} xmm1 {%k1} = xmm0[2,3,3,0] sched: [1:0.50] ; GENERIC-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -7724,7 +7724,7 @@ define <4 x i32> @test_masked_z_4xi32_perm_mask0(<4 x i32> %vec, <4 x i32> %mask ; GENERIC-LABEL: test_masked_z_4xi32_perm_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufd {{.*#+}} xmm0 {%k1} {z} = xmm0[2,3,3,0] sched: [1:1.00] +; GENERIC-NEXT: vpshufd {{.*#+}} xmm0 {%k1} {z} = xmm0[2,3,3,0] sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_4xi32_perm_mask0: @@ -7741,7 +7741,7 @@ define <4 x i32> @test_masked_4xi32_perm_mask1(<4 x i32> %vec, <4 x i32> %vec2, ; GENERIC-LABEL: test_masked_4xi32_perm_mask1: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufd {{.*#+}} xmm1 {%k1} = xmm0[1,0,2,0] sched: [1:1.00] +; GENERIC-NEXT: vpshufd {{.*#+}} xmm1 {%k1} = xmm0[1,0,2,0] sched: [1:0.50] ; GENERIC-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -7761,7 +7761,7 @@ define <4 x i32> @test_masked_z_4xi32_perm_mask1(<4 x i32> %vec, <4 x i32> %mask ; GENERIC-LABEL: test_masked_z_4xi32_perm_mask1: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufd {{.*#+}} xmm0 {%k1} {z} = xmm0[1,0,2,0] sched: [1:1.00] +; GENERIC-NEXT: vpshufd {{.*#+}} xmm0 {%k1} {z} = xmm0[1,0,2,0] sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_4xi32_perm_mask1: @@ -7778,7 +7778,7 @@ define <4 x i32> @test_masked_4xi32_perm_mask2(<4 x i32> %vec, <4 x i32> %vec2, ; GENERIC-LABEL: test_masked_4xi32_perm_mask2: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufd {{.*#+}} xmm1 {%k1} = xmm0[3,0,1,0] sched: [1:1.00] +; GENERIC-NEXT: vpshufd {{.*#+}} xmm1 {%k1} = xmm0[3,0,1,0] sched: [1:0.50] ; GENERIC-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -7798,7 +7798,7 @@ define <4 x i32> @test_masked_z_4xi32_perm_mask2(<4 x i32> %vec, <4 x i32> %mask ; GENERIC-LABEL: test_masked_z_4xi32_perm_mask2: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufd {{.*#+}} xmm0 {%k1} {z} = xmm0[3,0,1,0] sched: [1:1.00] +; GENERIC-NEXT: vpshufd {{.*#+}} xmm0 {%k1} {z} = xmm0[3,0,1,0] sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_4xi32_perm_mask2: @@ -7828,7 +7828,7 @@ define <4 x i32> @test_masked_4xi32_perm_mask3(<4 x i32> %vec, <4 x i32> %vec2, ; GENERIC-LABEL: test_masked_4xi32_perm_mask3: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufd {{.*#+}} xmm1 {%k1} = xmm0[1,1,0,3] sched: [1:1.00] +; GENERIC-NEXT: vpshufd {{.*#+}} xmm1 {%k1} = xmm0[1,1,0,3] sched: [1:0.50] ; GENERIC-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -7848,7 +7848,7 @@ define <4 x i32> @test_masked_z_4xi32_perm_mask3(<4 x i32> %vec, <4 x i32> %mask ; GENERIC-LABEL: test_masked_z_4xi32_perm_mask3: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufd {{.*#+}} xmm0 {%k1} {z} = xmm0[1,1,0,3] sched: [1:1.00] +; GENERIC-NEXT: vpshufd {{.*#+}} xmm0 {%k1} {z} = xmm0[1,1,0,3] sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_4xi32_perm_mask3: @@ -7879,7 +7879,7 @@ define <4 x i32> @test_masked_4xi32_perm_mem_mask0(<4 x i32>* %vp, <4 x i32> %ve ; GENERIC-LABEL: test_masked_4xi32_perm_mem_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufd {{.*#+}} xmm0 {%k1} = mem[0,1,3,3] sched: [6:1.00] +; GENERIC-NEXT: vpshufd {{.*#+}} xmm0 {%k1} = mem[0,1,3,3] sched: [7:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_4xi32_perm_mem_mask0: @@ -7898,7 +7898,7 @@ define <4 x i32> @test_masked_z_4xi32_perm_mem_mask0(<4 x i32>* %vp, <4 x i32> % ; GENERIC-LABEL: test_masked_z_4xi32_perm_mem_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmd %xmm0, %xmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufd {{.*#+}} xmm0 {%k1} {z} = mem[0,1,3,3] sched: [6:1.00] +; GENERIC-NEXT: vpshufd {{.*#+}} xmm0 {%k1} {z} = mem[0,1,3,3] sched: [7:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_4xi32_perm_mem_mask0: @@ -7917,7 +7917,7 @@ define <4 x i32> @test_masked_4xi32_perm_mem_mask1(<4 x i32>* %vp, <4 x i32> %ve ; GENERIC-LABEL: test_masked_4xi32_perm_mem_mask1: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufd {{.*#+}} xmm0 {%k1} = mem[2,2,3,1] sched: [6:1.00] +; GENERIC-NEXT: vpshufd {{.*#+}} xmm0 {%k1} = mem[2,2,3,1] sched: [7:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_4xi32_perm_mem_mask1: @@ -7936,7 +7936,7 @@ define <4 x i32> @test_masked_z_4xi32_perm_mem_mask1(<4 x i32>* %vp, <4 x i32> % ; GENERIC-LABEL: test_masked_z_4xi32_perm_mem_mask1: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmd %xmm0, %xmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufd {{.*#+}} xmm0 {%k1} {z} = mem[2,2,3,1] sched: [6:1.00] +; GENERIC-NEXT: vpshufd {{.*#+}} xmm0 {%k1} {z} = mem[2,2,3,1] sched: [7:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_4xi32_perm_mem_mask1: @@ -7955,7 +7955,7 @@ define <4 x i32> @test_masked_4xi32_perm_mem_mask2(<4 x i32>* %vp, <4 x i32> %ve ; GENERIC-LABEL: test_masked_4xi32_perm_mem_mask2: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufd {{.*#+}} xmm0 {%k1} = mem[0,3,0,1] sched: [6:1.00] +; GENERIC-NEXT: vpshufd {{.*#+}} xmm0 {%k1} = mem[0,3,0,1] sched: [7:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_4xi32_perm_mem_mask2: @@ -7974,7 +7974,7 @@ define <4 x i32> @test_masked_z_4xi32_perm_mem_mask2(<4 x i32>* %vp, <4 x i32> % ; GENERIC-LABEL: test_masked_z_4xi32_perm_mem_mask2: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmd %xmm0, %xmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufd {{.*#+}} xmm0 {%k1} {z} = mem[0,3,0,1] sched: [6:1.00] +; GENERIC-NEXT: vpshufd {{.*#+}} xmm0 {%k1} {z} = mem[0,3,0,1] sched: [7:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_4xi32_perm_mem_mask2: @@ -8007,7 +8007,7 @@ define <4 x i32> @test_masked_4xi32_perm_mem_mask3(<4 x i32>* %vp, <4 x i32> %ve ; GENERIC-LABEL: test_masked_4xi32_perm_mem_mask3: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufd {{.*#+}} xmm0 {%k1} = mem[1,0,1,0] sched: [6:1.00] +; GENERIC-NEXT: vpshufd {{.*#+}} xmm0 {%k1} = mem[1,0,1,0] sched: [7:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_4xi32_perm_mem_mask3: @@ -8026,7 +8026,7 @@ define <4 x i32> @test_masked_z_4xi32_perm_mem_mask3(<4 x i32>* %vp, <4 x i32> % ; GENERIC-LABEL: test_masked_z_4xi32_perm_mem_mask3: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmd %xmm0, %xmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufd {{.*#+}} xmm0 {%k1} {z} = mem[1,0,1,0] sched: [6:1.00] +; GENERIC-NEXT: vpshufd {{.*#+}} xmm0 {%k1} {z} = mem[1,0,1,0] sched: [7:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_z_4xi32_perm_mem_mask3: diff --git a/llvm/test/CodeGen/X86/xop-schedule.ll b/llvm/test/CodeGen/X86/xop-schedule.ll index 7179cd4..9a314e2 100644 --- a/llvm/test/CodeGen/X86/xop-schedule.ll +++ b/llvm/test/CodeGen/X86/xop-schedule.ll @@ -101,9 +101,9 @@ define void @test_vpcmov_128(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2, <2 x i ; GENERIC-LABEL: test_vpcmov_128: ; GENERIC: # %bb.0: ; GENERIC-NEXT: #APP -; GENERIC-NEXT: vpcmov %xmm2, %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: vpcmov (%rdi), %xmm1, %xmm0, %xmm0 # sched: [6:1.00] -; GENERIC-NEXT: vpcmov %xmm2, (%rdi), %xmm0, %xmm0 # sched: [6:1.00] +; GENERIC-NEXT: vpcmov %xmm2, %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; GENERIC-NEXT: vpcmov (%rdi), %xmm1, %xmm0, %xmm0 # sched: [7:0.50] +; GENERIC-NEXT: vpcmov %xmm2, (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; GENERIC-NEXT: #NO_APP ; GENERIC-NEXT: retq # sched: [1:1.00] ;