From 8a937e00d81d2d44b3fa81511052862b410c9599 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Fri, 27 Apr 2018 18:19:48 +0000 Subject: [PATCH] [X86] Split WriteFBlend/WriteFVarBlend/WriteFVarShuffle into XMM and YMM/ZMM scheduler classes This removes all the WriteFBlend/WriteFVarBlend InstRW overrides - some WriteFVarShuffle remain to be fixed. llvm-svn: 331065 --- llvm/lib/Target/X86/X86InstrAVX512.td | 55 +++++++++++++++++----------- llvm/lib/Target/X86/X86InstrSSE.td | 32 +++++++++------- llvm/lib/Target/X86/X86SchedBroadwell.td | 17 ++++----- llvm/lib/Target/X86/X86SchedHaswell.td | 17 +++------ llvm/lib/Target/X86/X86SchedSandyBridge.td | 15 ++------ llvm/lib/Target/X86/X86SchedSkylakeClient.td | 15 ++------ llvm/lib/Target/X86/X86SchedSkylakeServer.td | 13 ++----- llvm/lib/Target/X86/X86Schedule.td | 3 ++ llvm/lib/Target/X86/X86ScheduleAtom.td | 3 ++ llvm/lib/Target/X86/X86ScheduleBtVer2.td | 23 +++--------- llvm/lib/Target/X86/X86ScheduleSLM.td | 3 ++ llvm/lib/Target/X86/X86ScheduleZnver1.td | 3 ++ 12 files changed, 93 insertions(+), 106 deletions(-) diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index c39429e..02497ce 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -1932,37 +1932,45 @@ multiclass WriteFVarBlendask_rmb opc, string OpcodeStr, } multiclass blendmask_dq opc, string OpcodeStr, - X86FoldableSchedWrite sched, + X86FoldableSchedWrite sched128, + X86FoldableSchedWrite sched256, AVX512VLVectorVTInfo VTInfo> { - defm Z : WriteFVarBlendask , - WriteFVarBlendask_rmb , EVEX_V512; + defm Z : WriteFVarBlendask , + WriteFVarBlendask_rmb , EVEX_V512; let Predicates = [HasVLX] in { - defm Z256 : WriteFVarBlendask, - WriteFVarBlendask_rmb, EVEX_V256; - defm Z128 : WriteFVarBlendask, - WriteFVarBlendask_rmb, EVEX_V128; + defm Z256 : WriteFVarBlendask, + WriteFVarBlendask_rmb, EVEX_V256; + defm Z128 : WriteFVarBlendask, + WriteFVarBlendask_rmb, EVEX_V128; } } multiclass blendmask_bw opc, string OpcodeStr, - X86FoldableSchedWrite sched, + X86FoldableSchedWrite sched128, + X86FoldableSchedWrite sched256, AVX512VLVectorVTInfo VTInfo> { let Predicates = [HasBWI] in - defm Z : WriteFVarBlendask, EVEX_V512; + defm Z : WriteFVarBlendask, EVEX_V512; let Predicates = [HasBWI, HasVLX] in { - defm Z256 : WriteFVarBlendask, EVEX_V256; - defm Z128 : WriteFVarBlendask, EVEX_V128; + defm Z256 : WriteFVarBlendask, EVEX_V256; + defm Z128 : WriteFVarBlendask, EVEX_V128; } } -defm VBLENDMPS : blendmask_dq <0x65, "vblendmps", WriteFVarBlend, avx512vl_f32_info>; -defm VBLENDMPD : blendmask_dq <0x65, "vblendmpd", WriteFVarBlend, avx512vl_f64_info>, VEX_W; -defm VPBLENDMD : blendmask_dq <0x64, "vpblendmd", WriteVarBlend, avx512vl_i32_info>; -defm VPBLENDMQ : blendmask_dq <0x64, "vpblendmq", WriteVarBlend, avx512vl_i64_info>, VEX_W; -defm VPBLENDMB : blendmask_bw <0x66, "vpblendmb", WriteVarBlend, avx512vl_i8_info>; -defm VPBLENDMW : blendmask_bw <0x66, "vpblendmw", WriteVarBlend, avx512vl_i16_info>, VEX_W; +defm VBLENDMPS : blendmask_dq<0x65, "vblendmps", WriteFVarBlend, WriteFVarBlendY, + avx512vl_f32_info>; +defm VBLENDMPD : blendmask_dq<0x65, "vblendmpd", WriteFVarBlend, WriteFVarBlendY, + avx512vl_f64_info>, VEX_W; +defm VPBLENDMD : blendmask_dq<0x64, "vpblendmd", WriteVarBlend, WriteVarBlend, + avx512vl_i32_info>; +defm VPBLENDMQ : blendmask_dq<0x64, "vpblendmq", WriteVarBlend, WriteVarBlend, + avx512vl_i64_info>, VEX_W; +defm VPBLENDMB : blendmask_bw<0x66, "vpblendmb", WriteVarBlend, WriteVarBlend, + avx512vl_i8_info>; +defm VPBLENDMW : blendmask_bw<0x66, "vpblendmw", WriteVarBlend, WriteVarBlend, + avx512vl_i16_info>, VEX_W; //===----------------------------------------------------------------------===// // Compare Instructions @@ -5967,23 +5975,26 @@ multiclass avx512_permil_vec OpcVar, string OpcodeStr, SDNode OpNode, } multiclass avx512_permil_vec_common OpcVar, - X86FoldableSchedWrite sched, AVX512VLVectorVTInfo _, + X86FoldableSchedWrite sched128, + X86FoldableSchedWrite sched256, + AVX512VLVectorVTInfo _, AVX512VLVectorVTInfo Ctrl> { let Predicates = [HasAVX512] in { - defm Z : avx512_permil_vec, EVEX_V512; } let Predicates = [HasAVX512, HasVLX] in { - defm Z128 : avx512_permil_vec, EVEX_V128; - defm Z256 : avx512_permil_vec, EVEX_V256; } } multiclass avx512_permil OpcImm, bits<8> OpcVar, AVX512VLVectorVTInfo _, AVX512VLVectorVTInfo Ctrl>{ - defm NAME: avx512_permil_vec_common; + defm NAME: avx512_permil_vec_common; defm NAME: avx512_shift_rmi_sizes, EVEX, AVX512AIi8Base, EVEX_CD8<_.info128.EltSize, CD8VF>; diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td index 6f6afe7..8f88b9b 100644 --- a/llvm/lib/Target/X86/X86InstrSSE.td +++ b/llvm/lib/Target/X86/X86InstrSSE.td @@ -6049,7 +6049,7 @@ let Predicates = [HasAVX] in { VEX_4V, VEX_WIG; defm VBLENDPSY : SS41I_blend_rmi<0x0C, "vblendps", X86Blendi, v8f32, VR256, loadv8f32, f256mem, 0, SSEPackedSingle, - WriteFBlend, BlendCommuteImm8>, + WriteFBlendY, BlendCommuteImm8>, VEX_4V, VEX_L, VEX_WIG; defm VBLENDPD : SS41I_blend_rmi<0x0D, "vblendpd", X86Blendi, v2f64, VR128, loadv2f64, f128mem, 0, SSEPackedDouble, @@ -6057,7 +6057,7 @@ let Predicates = [HasAVX] in { VEX_4V, VEX_WIG; defm VBLENDPDY : SS41I_blend_rmi<0x0D, "vblendpd", X86Blendi, v4f64, VR256, loadv4f64, f256mem, 0, SSEPackedDouble, - WriteFBlend, BlendCommuteImm4>, + WriteFBlendY, BlendCommuteImm4>, VEX_4V, VEX_L, VEX_WIG; defm VPBLENDW : SS41I_blend_rmi<0x0E, "vpblendw", X86Blendi, v8i16, VR128, loadv2i64, i128mem, 0, SSEPackedInt, @@ -6130,7 +6130,7 @@ defm VBLENDVPD : SS41I_quaternary_int_avx<0x4B, "vblendvpd", VR128, f128mem, WriteFVarBlend>; defm VBLENDVPDY : SS41I_quaternary_int_avx<0x4B, "vblendvpd", VR256, f256mem, loadv4f64, int_x86_avx_blendv_pd_256, - WriteFVarBlend>, VEX_L; + WriteFVarBlendY>, VEX_L; } // ExeDomain = SSEPackedDouble let ExeDomain = SSEPackedSingle in { defm VBLENDVPS : SS41I_quaternary_int_avx<0x4A, "vblendvps", VR128, f128mem, @@ -6138,7 +6138,7 @@ defm VBLENDVPS : SS41I_quaternary_int_avx<0x4A, "vblendvps", VR128, f128mem, WriteFVarBlend>; defm VBLENDVPSY : SS41I_quaternary_int_avx<0x4A, "vblendvps", VR256, f256mem, loadv8f32, int_x86_avx_blendv_ps_256, - WriteFVarBlend>, VEX_L; + WriteFVarBlendY>, VEX_L; } // ExeDomain = SSEPackedSingle defm VPBLENDVB : SS41I_quaternary_int_avx<0x4C, "vpblendvb", VR128, i128mem, loadv2i64, int_x86_sse41_pblendvb, @@ -7156,45 +7156,51 @@ defm VMASKMOVPD : avx_movmask_rm<0x2D, 0x2F, "vmaskmovpd", multiclass avx_permil opc_rm, bits<8> opc_rmi, string OpcodeStr, RegisterClass RC, X86MemOperand x86memop_f, X86MemOperand x86memop_i, PatFrag i_frag, - ValueType f_vt, ValueType i_vt> { + ValueType f_vt, ValueType i_vt, + X86FoldableSchedWrite sched, + X86FoldableSchedWrite varsched> { let Predicates = [HasAVX, NoVLX] in { def rr : AVX8I, VEX_4V, - Sched<[WriteFVarShuffle]>; + Sched<[varsched]>; def rm : AVX8I, VEX_4V, - Sched<[WriteFVarShuffleLd, ReadAfterLd]>; + Sched<[varsched.Folded, ReadAfterLd]>; def ri : AVXAIi8, VEX, - Sched<[WriteFShuffle]>; + Sched<[sched]>; def mi : AVXAIi8, VEX, - Sched<[WriteFShuffleLd]>; + Sched<[sched.Folded]>; }// Predicates = [HasAVX, NoVLX] } let ExeDomain = SSEPackedSingle in { defm VPERMILPS : avx_permil<0x0C, 0x04, "vpermilps", VR128, f128mem, i128mem, - loadv2i64, v4f32, v4i32>; + loadv2i64, v4f32, v4i32, WriteFShuffle, + WriteFVarShuffle>; defm VPERMILPSY : avx_permil<0x0C, 0x04, "vpermilps", VR256, f256mem, i256mem, - loadv4i64, v8f32, v8i32>, VEX_L; + loadv4i64, v8f32, v8i32, WriteFShuffle, + WriteFVarShuffleY>, VEX_L; } let ExeDomain = SSEPackedDouble in { defm VPERMILPD : avx_permil<0x0D, 0x05, "vpermilpd", VR128, f128mem, i128mem, - loadv2i64, v2f64, v2i64>; + loadv2i64, v2f64, v2i64, WriteFShuffle, + WriteFVarShuffle>; defm VPERMILPDY : avx_permil<0x0D, 0x05, "vpermilpd", VR256, f256mem, i256mem, - loadv4i64, v4f64, v4i64>, VEX_L; + loadv4i64, v4f64, v4i64, WriteFShuffle, + WriteFVarShuffleY>, VEX_L; } //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/X86/X86SchedBroadwell.td b/llvm/lib/Target/X86/X86SchedBroadwell.td index 2c180f5..18a32a8 100755 --- a/llvm/lib/Target/X86/X86SchedBroadwell.td +++ b/llvm/lib/Target/X86/X86SchedBroadwell.td @@ -169,9 +169,12 @@ defm : BWWriteResPair; // Floating point fabs/fch defm : BWWriteResPair; // Floating point and/or/xor logicals. defm : BWWriteResPair; // Floating point and/or/xor logicals (YMM/ZMM). defm : BWWriteResPair; // Floating point vector shuffles. -defm : BWWriteResPair; // Floating point vector variable shuffles. -defm : BWWriteResPair; // Floating point vector blends. +defm : BWWriteResPair; // Floating point vector variable shuffles. +defm : BWWriteResPair; // Floating point vector variable shuffles. +defm : BWWriteResPair; // Floating point vector blends. +defm : BWWriteResPair; // Floating point vector blends. defm : BWWriteResPair; // Fp vector variable blends. +defm : BWWriteResPair; // Fp vector variable blends. def : WriteRes { let Latency = 4; @@ -1099,9 +1102,7 @@ def: InstRW<[BWWriteResGroup75], (instregex "VPACKSSDWYrm", "VPALIGNRYrmi", "VPBLENDWYrmi", "VPERMILPDYmi", - "VPERMILPDYrm", "VPERMILPSYmi", - "VPERMILPSYrm", "VPSHUFBYrm", "VPSHUFDYmi", "VPSHUFHWYmi", @@ -1175,9 +1176,7 @@ def BWWriteResGroup77 : SchedWriteRes<[BWPort23,BWPort015]> { let NumMicroOps = 2; let ResourceCycles = [1,1]; } -def: InstRW<[BWWriteResGroup77], (instregex "VBLENDPDYrmi", - "VBLENDPSYrmi", - "VPANDNYrm", +def: InstRW<[BWWriteResGroup77], (instregex "VPANDNYrm", "VPANDYrm", "VPBLENDDYrmi", "VPORYrm", @@ -1334,9 +1333,7 @@ def BWWriteResGroup94 : SchedWriteRes<[BWPort5,BWPort23]> { let NumMicroOps = 3; let ResourceCycles = [2,1]; } -def: InstRW<[BWWriteResGroup94], (instregex "VBLENDVPDYrm", - "VBLENDVPSYrm", - "VMASKMOVPDYrm", +def: InstRW<[BWWriteResGroup94], (instregex "VMASKMOVPDYrm", "VMASKMOVPSYrm", "VPBLENDVBYrm", "VPMASKMOVDYrm", diff --git a/llvm/lib/Target/X86/X86SchedHaswell.td b/llvm/lib/Target/X86/X86SchedHaswell.td index e2c1853..59d2063 100644 --- a/llvm/lib/Target/X86/X86SchedHaswell.td +++ b/llvm/lib/Target/X86/X86SchedHaswell.td @@ -166,11 +166,14 @@ defm : HWWriteResPair; defm : HWWriteResPair; defm : HWWriteResPair; defm : HWWriteResPair; -defm : HWWriteResPair; +defm : HWWriteResPair; +defm : HWWriteResPair; defm : HWWriteResPair; +defm : HWWriteResPair; defm : HWWriteResPair; defm : HWWriteResPair; defm : HWWriteResPair; +defm : HWWriteResPair; def : WriteRes { let Latency = 5; @@ -885,9 +888,7 @@ def: InstRW<[HWWriteResGroup13], (instregex "PUNPCKLWDrm", "(V?)PACKUSWBrm", "(V?)PALIGNRrmi", "VPERMILPDmi", - "VPERMILPDrm", "VPERMILPSmi", - "VPERMILPSrm", "(V?)PSHUFBrm", "(V?)PSHUFDmi", "(V?)PSHUFHWmi", @@ -919,9 +920,7 @@ def: InstRW<[HWWriteResGroup13_1], (instregex "VPACKSSDWYrm", "VPALIGNRYrmi", "VPBLENDWYrmi", "VPERMILPDYmi", - "VPERMILPDYrm", "VPERMILPSYmi", - "VPERMILPSYrm", "VPMOVSXBDYrm", "VPMOVSXBQYrm", "VPMOVSXWQYrm", @@ -1092,9 +1091,7 @@ def HWWriteResGroup17_2 : SchedWriteRes<[HWPort23,HWPort015]> { let NumMicroOps = 2; let ResourceCycles = [1,1]; } -def: InstRW<[HWWriteResGroup17_2], (instregex "VBLENDPDYrmi", - "VBLENDPSYrmi", - "VPANDNYrm", +def: InstRW<[HWWriteResGroup17_2], (instregex "VPANDNYrm", "VPANDYrm", "VPBLENDDYrmi", "VPORYrm", @@ -1272,9 +1269,7 @@ def HWWriteResGroup36_1 : SchedWriteRes<[HWPort5,HWPort23]> { let NumMicroOps = 3; let ResourceCycles = [2,1]; } -def: InstRW<[HWWriteResGroup36_1], (instregex "VBLENDVPDYrm", - "VBLENDVPSYrm", - "VMASKMOVPDYrm", +def: InstRW<[HWWriteResGroup36_1], (instregex "VMASKMOVPDYrm", "VMASKMOVPSYrm", "VPBLENDVBYrm", "VPMASKMOVDYrm", diff --git a/llvm/lib/Target/X86/X86SchedSandyBridge.td b/llvm/lib/Target/X86/X86SchedSandyBridge.td index f59bd57..abd4d72 100644 --- a/llvm/lib/Target/X86/X86SchedSandyBridge.td +++ b/llvm/lib/Target/X86/X86SchedSandyBridge.td @@ -154,8 +154,11 @@ defm : SBWriteResPair; defm : SBWriteResPair; defm : SBWriteResPair; defm : SBWriteResPair; +defm : SBWriteResPair; defm : SBWriteResPair; +defm : SBWriteResPair; defm : SBWriteResPair; +defm : SBWriteResPair; def : WriteRes { let Latency = 4; } // Vector integer operations. @@ -1156,14 +1159,6 @@ def: InstRW<[SBWriteResGroup73], (instregex "VPERM2F128rm", "VUNPCKLPDYrm", "VUNPCKLPSYrm")>; -def SBWriteResGroup74 : SchedWriteRes<[SBPort23,SBPort05]> { - let Latency = 8; - let NumMicroOps = 2; - let ResourceCycles = [1,1]; -} -def: InstRW<[SBWriteResGroup74], (instregex "VBLENDPDYrmi", - "VBLENDPSYrmi")>; - def SBWriteResGroup75 : SchedWriteRes<[SBPort23,SBPort05]> { let Latency = 8; let NumMicroOps = 3; @@ -1335,9 +1330,7 @@ def SBWriteResGroup91 : SchedWriteRes<[SBPort23,SBPort05]> { let NumMicroOps = 3; let ResourceCycles = [1,2]; } -def: InstRW<[SBWriteResGroup91], (instregex "VBLENDVPDYrm", - "VBLENDVPSYrm", - "VMASKMOVPDYrm", +def: InstRW<[SBWriteResGroup91], (instregex "VMASKMOVPDYrm", "VMASKMOVPSYrm")>; def SBWriteResGroup92 : SchedWriteRes<[SBPort0,SBPort1,SBPort5]> { diff --git a/llvm/lib/Target/X86/X86SchedSkylakeClient.td b/llvm/lib/Target/X86/X86SchedSkylakeClient.td index ee4e4dd..e1a8e40 100644 --- a/llvm/lib/Target/X86/X86SchedSkylakeClient.td +++ b/llvm/lib/Target/X86/X86SchedSkylakeClient.td @@ -167,8 +167,11 @@ defm : SKLWriteResPair; // Floating po defm : SKLWriteResPair; // Floating point and/or/xor logicals (YMM/ZMM). defm : SKLWriteResPair; // Floating point vector shuffles. defm : SKLWriteResPair; // Floating point vector shuffles. +defm : SKLWriteResPair; // Floating point vector shuffles. defm : SKLWriteResPair; // Floating point vector blends. +defm : SKLWriteResPair; // Floating point vector blends. defm : SKLWriteResPair; // Fp vector variable blends. +defm : SKLWriteResPair; // Fp vector variable blends. def : WriteRes { let Latency = 6; @@ -1626,9 +1629,7 @@ def SKLWriteResGroup110 : SchedWriteRes<[SKLPort23,SKLPort015]> { let NumMicroOps = 2; let ResourceCycles = [1,1]; } -def: InstRW<[SKLWriteResGroup110], (instregex "VBLENDPDYrmi", - "VBLENDPSYrmi", - "VMASKMOVPDYrm", +def: InstRW<[SKLWriteResGroup110], (instregex "VMASKMOVPDYrm", "VMASKMOVPSYrm", "VPADDBYrm", "VPADDDYrm", @@ -1768,14 +1769,6 @@ def SKLWriteResGroup124 : SchedWriteRes<[SKLPort5,SKLPort01]> { } def: InstRW<[SKLWriteResGroup124], (instregex "(V?)DPPDrri")>; -def SKLWriteResGroup125 : SchedWriteRes<[SKLPort23,SKLPort015]> { - let Latency = 9; - let NumMicroOps = 3; - let ResourceCycles = [1,2]; -} -def: InstRW<[SKLWriteResGroup125], (instregex "VBLENDVPDYrm", - "VBLENDVPSYrm")>; - def SKLWriteResGroup126 : SchedWriteRes<[SKLPort0,SKLPort5,SKLPort23]> { let Latency = 9; let NumMicroOps = 3; diff --git a/llvm/lib/Target/X86/X86SchedSkylakeServer.td b/llvm/lib/Target/X86/X86SchedSkylakeServer.td index 085f440..e5d4b1f 100755 --- a/llvm/lib/Target/X86/X86SchedSkylakeServer.td +++ b/llvm/lib/Target/X86/X86SchedSkylakeServer.td @@ -167,8 +167,11 @@ defm : SKXWriteResPair; // Floating poi defm : SKXWriteResPair; // Floating point and/or/xor logicals (YMM/ZMM). defm : SKXWriteResPair; // Floating point vector shuffles. defm : SKXWriteResPair; // Floating point vector variable shuffles. +defm : SKXWriteResPair; // Floating point vector variable shuffles. defm : SKXWriteResPair; // Floating point vector blends. +defm : SKXWriteResPair; // Floating point vector blends. defm : SKXWriteResPair; // Fp vector variable blends. +defm : SKXWriteResPair; // Fp vector variable blends. def : WriteRes { let Latency = 6; @@ -3171,8 +3174,6 @@ def: InstRW<[SKXWriteResGroup121], (instregex "VBLENDMPDZ256rm(b?)", "VBLENDMPDZrm(b?)", "VBLENDMPSZ256rm(b?)", "VBLENDMPSZrm(b?)", - "VBLENDPDYrmi", - "VBLENDPSYrmi", "VBROADCASTF32X2Z256m(b?)", "VBROADCASTF32X2Zm(b?)", "VBROADCASTF32X4Z256rm(b?)", @@ -3530,14 +3531,6 @@ def SKXWriteResGroup139 : SchedWriteRes<[SKXPort5,SKXPort015]> { } def: InstRW<[SKXWriteResGroup139], (instregex "(V?)DPPDrri")>; -def SKXWriteResGroup140 : SchedWriteRes<[SKXPort23,SKXPort015]> { - let Latency = 9; - let NumMicroOps = 3; - let ResourceCycles = [1,2]; -} -def: InstRW<[SKXWriteResGroup140], (instregex "VBLENDVPDYrm", - "VBLENDVPSYrm")>; - def SKXWriteResGroup141 : SchedWriteRes<[SKXPort0,SKXPort5,SKXPort23]> { let Latency = 9; let NumMicroOps = 3; diff --git a/llvm/lib/Target/X86/X86Schedule.td b/llvm/lib/Target/X86/X86Schedule.td index 20241bf..ac2c56e 100644 --- a/llvm/lib/Target/X86/X86Schedule.td +++ b/llvm/lib/Target/X86/X86Schedule.td @@ -94,8 +94,11 @@ defm WriteFLogic : X86SchedWritePair; // Floating point and/or/xor logicals. defm WriteFLogicY : X86SchedWritePair; // Floating point and/or/xor logicals (YMM/ZMM). defm WriteFShuffle : X86SchedWritePair; // Floating point vector shuffles. defm WriteFVarShuffle : X86SchedWritePair; // Floating point vector variable shuffles. +defm WriteFVarShuffleY : X86SchedWritePair; // Floating point vector variable shuffles (YMM/ZMM). defm WriteFBlend : X86SchedWritePair; // Floating point vector blends. +defm WriteFBlendY : X86SchedWritePair; // Floating point vector blends (YMM/ZMM). defm WriteFVarBlend : X86SchedWritePair; // Fp vector variable blends. +defm WriteFVarBlendY : X86SchedWritePair; // Fp vector variable blends (YMM/ZMM). // FMA Scheduling helper class. class FMASC { X86FoldableSchedWrite Sched = WriteFAdd; } diff --git a/llvm/lib/Target/X86/X86ScheduleAtom.td b/llvm/lib/Target/X86/X86ScheduleAtom.td index 51d046f..f706bc2 100644 --- a/llvm/lib/Target/X86/X86ScheduleAtom.td +++ b/llvm/lib/Target/X86/X86ScheduleAtom.td @@ -215,11 +215,14 @@ defm : AtomWriteResPair; defm : AtomWriteResPair; // NOTE: Doesn't exist on Atom. defm : AtomWriteResPair; defm : AtomWriteResPair; // NOTE: Doesn't exist on Atom. +defm : AtomWriteResPair; // NOTE: Doesn't exist on Atom. defm : AtomWriteResPair; // NOTE: Doesn't exist on Atom. defm : AtomWriteResPair; // NOTE: Doesn't exist on Atom. defm : AtomWriteResPair; // NOTE: Doesn't exist on Atom. defm : AtomWriteResPair; // NOTE: Doesn't exist on Atom. +defm : AtomWriteResPair; // NOTE: Doesn't exist on Atom. defm : AtomWriteResPair; // NOTE: Doesn't exist on Atom. +defm : AtomWriteResPair; // NOTE: Doesn't exist on Atom. defm : AtomWriteResPair; // NOTE: Doesn't exist on Atom. defm : AtomWriteResPair; // NOTE: Doesn't exist on Atom. diff --git a/llvm/lib/Target/X86/X86ScheduleBtVer2.td b/llvm/lib/Target/X86/X86ScheduleBtVer2.td index 09cb530..ffa8e09 100644 --- a/llvm/lib/Target/X86/X86ScheduleBtVer2.td +++ b/llvm/lib/Target/X86/X86ScheduleBtVer2.td @@ -331,8 +331,11 @@ defm : JWriteResFpuPair; defm : JWriteResYMMPair; defm : JWriteResFpuPair; defm : JWriteResFpuPair; +defm : JWriteResYMMPair; defm : JWriteResFpuPair; +defm : JWriteResYMMPair; defm : JWriteResFpuPair; +defm : JWriteResYMMPair; defm : JWriteResFpuPair; defm : JWriteResFpuPair; // NOTE: Doesn't exist on Jaguar. @@ -681,26 +684,11 @@ def JWriteVCVTPDYLd: SchedWriteRes<[JLAGU, JFPU1, JSTC, JFPX]> { } def : InstRW<[JWriteVCVTPDYLd, ReadAfterLd], (instrs VCVTPD2DQYrm, VCVTTPD2DQYrm, VCVTPD2PSYrm)>; -def JWriteVPERMY: SchedWriteRes<[JFPU01, JFPX]> { - let Latency = 3; - let ResourceCycles = [2, 6]; - let NumMicroOps = 6; -} -def : InstRW<[JWriteVPERMY], (instrs VBLENDVPDYrr, VBLENDVPSYrr, VPERMILPDYrr, VPERMILPSYrr)>; - -def JWriteVPERMYLd: SchedWriteRes<[JLAGU, JFPU01, JFPX]> { - let Latency = 8; - let ResourceCycles = [2, 2, 6]; - let NumMicroOps = 6; -} -def : InstRW<[JWriteVPERMYLd, ReadAfterLd], (instrs VBLENDVPDYrm, VBLENDVPSYrm, VPERMILPDYrm, VPERMILPSYrm)>; - def JWriteShuffleY: SchedWriteRes<[JFPU01, JFPX]> { let ResourceCycles = [2, 2]; let NumMicroOps = 2; } -def : InstRW<[JWriteShuffleY], (instrs VBLENDPDYrri, VBLENDPSYrri, - VMOVDDUPYrr, VMOVSHDUPYrr, VMOVSLDUPYrr, +def : InstRW<[JWriteShuffleY], (instrs VMOVDDUPYrr, VMOVSHDUPYrr, VMOVSLDUPYrr, VPERMILPDYri, VPERMILPSYri, VSHUFPDYrri, VSHUFPSYrri, VUNPCKHPDYrr, VUNPCKHPSYrr, VUNPCKLPDYrr, VUNPCKLPSYrr)>; @@ -710,8 +698,7 @@ def JWriteShuffleYLd: SchedWriteRes<[JLAGU, JFPU01, JFPX]> { let ResourceCycles = [2, 2, 2]; let NumMicroOps = 2; } -def : InstRW<[JWriteShuffleYLd, ReadAfterLd], (instrs VBLENDPDYrmi, VBLENDPSYrmi, - VMOVDDUPYrm, VMOVSHDUPYrm, +def : InstRW<[JWriteShuffleYLd, ReadAfterLd], (instrs VMOVDDUPYrm, VMOVSHDUPYrm, VMOVSLDUPYrm, VPERMILPDYmi, VPERMILPSYmi, VSHUFPDYrmi, VSHUFPSYrmi, VUNPCKHPDYrm, diff --git a/llvm/lib/Target/X86/X86ScheduleSLM.td b/llvm/lib/Target/X86/X86ScheduleSLM.td index c682130..6d2b7d1 100644 --- a/llvm/lib/Target/X86/X86ScheduleSLM.td +++ b/llvm/lib/Target/X86/X86ScheduleSLM.td @@ -145,6 +145,7 @@ defm : SLMWriteResPair; defm : SLMWriteResPair; defm : SLMWriteResPair; defm : SLMWriteResPair; +defm : SLMWriteResPair; defm : SLMWriteResPair; def : WriteRes; @@ -278,8 +279,10 @@ def : WriteRes; // AVX/FMA is not supported on that architecture, but we should define the basic // scheduling resources anyway. def : WriteRes; +defm : SLMWriteResPair; defm : SLMWriteResPair; defm : SLMWriteResPair; +defm : SLMWriteResPair; defm : SLMWriteResPair; defm : SLMWriteResPair; defm : SLMWriteResPair; diff --git a/llvm/lib/Target/X86/X86ScheduleZnver1.td b/llvm/lib/Target/X86/X86ScheduleZnver1.td index f5fdf14..4076240 100644 --- a/llvm/lib/Target/X86/X86ScheduleZnver1.td +++ b/llvm/lib/Target/X86/X86ScheduleZnver1.td @@ -197,7 +197,9 @@ defm : ZnWriteResFpuPair; defm : ZnWriteResFpuPair; defm : ZnWriteResFpuPair; defm : ZnWriteResFpuPair; +defm : ZnWriteResFpuPair; defm : ZnWriteResFpuPair; +defm : ZnWriteResFpuPair; defm : ZnWriteResFpuPair; defm : ZnWriteResFpuPair; defm : ZnWriteResFpuPair; @@ -208,6 +210,7 @@ defm : ZnWriteResFpuPair; defm : ZnWriteResFpuPair; defm : ZnWriteResFpuPair; defm : ZnWriteResFpuPair; +defm : ZnWriteResFpuPair; defm : ZnWriteResFpuPair; defm : ZnWriteResFpuPair; defm : ZnWriteResFpuPair; -- 2.7.4