From c708868cb15bbeaecc10ca9e92c7060fc3d3390a Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Tue, 1 May 2018 18:06:07 +0000 Subject: [PATCH] [X86] Split WriteFRcp/WriteFRsqrt/WriteFSqrt into XMM and YMM/ZMM scheduler classes llvm-svn: 331290 --- llvm/lib/Target/X86/X86InstrAVX512.td | 73 +++++++++++++++------------- llvm/lib/Target/X86/X86InstrSSE.td | 56 ++++++++++----------- llvm/lib/Target/X86/X86SchedBroadwell.td | 9 ++-- llvm/lib/Target/X86/X86SchedHaswell.td | 9 ++-- llvm/lib/Target/X86/X86SchedSandyBridge.td | 6 ++- llvm/lib/Target/X86/X86SchedSkylakeClient.td | 9 ++-- llvm/lib/Target/X86/X86SchedSkylakeServer.td | 13 ++--- llvm/lib/Target/X86/X86Schedule.td | 9 +++- llvm/lib/Target/X86/X86ScheduleAtom.td | 3 ++ llvm/lib/Target/X86/X86ScheduleBtVer2.td | 21 ++------ llvm/lib/Target/X86/X86ScheduleSLM.td | 3 ++ llvm/lib/Target/X86/X86ScheduleZnver1.td | 3 ++ 12 files changed, 117 insertions(+), 97 deletions(-) diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index e27357e..2175c46 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -7863,14 +7863,18 @@ multiclass avx512_fp14_s opc, string OpcodeStr, SDNode OpNode, } } -defm VRCP14SS : avx512_fp14_s<0x4D, "vrcp14ss", X86rcp14s, WriteFRcp, f32x_info>, - EVEX_CD8<32, CD8VT1>, T8PD, NotMemoryFoldable; -defm VRCP14SD : avx512_fp14_s<0x4D, "vrcp14sd", X86rcp14s, WriteFRcp, f64x_info>, - VEX_W, EVEX_CD8<64, CD8VT1>, T8PD, NotMemoryFoldable; -defm VRSQRT14SS : avx512_fp14_s<0x4F, "vrsqrt14ss", X86rsqrt14s, WriteFRsqrt, f32x_info>, - EVEX_CD8<32, CD8VT1>, T8PD, NotMemoryFoldable; -defm VRSQRT14SD : avx512_fp14_s<0x4F, "vrsqrt14sd", X86rsqrt14s, WriteFRsqrt, f64x_info>, - VEX_W, EVEX_CD8<64, CD8VT1>, T8PD, NotMemoryFoldable; +defm VRCP14SS : avx512_fp14_s<0x4D, "vrcp14ss", X86rcp14s, SchedWriteFRcp.Scl, + f32x_info>, EVEX_CD8<32, CD8VT1>, + T8PD, NotMemoryFoldable; +defm VRCP14SD : avx512_fp14_s<0x4D, "vrcp14sd", X86rcp14s, SchedWriteFRcp.Scl, + f64x_info>, VEX_W, EVEX_CD8<64, CD8VT1>, + T8PD, NotMemoryFoldable; +defm VRSQRT14SS : avx512_fp14_s<0x4F, "vrsqrt14ss", X86rsqrt14s, + SchedWriteFRsqrt.Scl, f32x_info>, + EVEX_CD8<32, CD8VT1>, T8PD, NotMemoryFoldable; +defm VRSQRT14SD : avx512_fp14_s<0x4F, "vrsqrt14sd", X86rsqrt14s, + SchedWriteFRsqrt.Scl, f64x_info>, VEX_W, + EVEX_CD8<64, CD8VT1>, T8PD, NotMemoryFoldable; /// avx512_fp14_p rcp14ps, rcp14pd, rsqrt14ps, rsqrt14pd multiclass avx512_fp14_p opc, string OpcodeStr, SDNode OpNode, @@ -7895,31 +7899,31 @@ multiclass avx512_fp14_p opc, string OpcodeStr, SDNode OpNode, } multiclass avx512_fp14_p_vl_all opc, string OpcodeStr, SDNode OpNode, - X86FoldableSchedWrite sched> { - defm PSZ : avx512_fp14_p { + defm PSZ : avx512_fp14_p, EVEX_V512, EVEX_CD8<32, CD8VF>; - defm PDZ : avx512_fp14_p, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; // Define only if AVX512VL feature is present. let Predicates = [HasVLX] in { defm PSZ128 : avx512_fp14_p, + OpNode, sched.XMM, v4f32x_info>, EVEX_V128, EVEX_CD8<32, CD8VF>; defm PSZ256 : avx512_fp14_p, + OpNode, sched.YMM, v8f32x_info>, EVEX_V256, EVEX_CD8<32, CD8VF>; defm PDZ128 : avx512_fp14_p, + OpNode, sched.XMM, v2f64x_info>, EVEX_V128, VEX_W, EVEX_CD8<64, CD8VF>; defm PDZ256 : avx512_fp14_p, + OpNode, sched.YMM, v4f64x_info>, EVEX_V256, VEX_W, EVEX_CD8<64, CD8VF>; } } -defm VRSQRT14 : avx512_fp14_p_vl_all<0x4E, "vrsqrt14", X86rsqrt14, WriteFRsqrt>; -defm VRCP14 : avx512_fp14_p_vl_all<0x4C, "vrcp14", X86rcp14, WriteFRcp>; +defm VRSQRT14 : avx512_fp14_p_vl_all<0x4E, "vrsqrt14", X86rsqrt14, SchedWriteFRsqrt>; +defm VRCP14 : avx512_fp14_p_vl_all<0x4C, "vrcp14", X86rcp14, SchedWriteFRcp>; /// avx512_fp28_s rcp28ss, rcp28sd, rsqrt28ss, rsqrt28sd multiclass avx512_fp28_s opc, string OpcodeStr,X86VectorVTInfo _, @@ -8065,32 +8069,34 @@ multiclass avx512_sqrt_packed opc, string OpcodeStr, } } -multiclass avx512_sqrt_packed_all opc, string OpcodeStr> { - defm PSZ : avx512_sqrt_packed, +multiclass avx512_sqrt_packed_all opc, string OpcodeStr, + X86SchedWriteWidths sched> { + defm PSZ : avx512_sqrt_packed, EVEX_V512, PS, EVEX_CD8<32, CD8VF>; - defm PDZ : avx512_sqrt_packed, + defm PDZ : avx512_sqrt_packed, EVEX_V512, VEX_W, PD, EVEX_CD8<64, CD8VF>; // Define only if AVX512VL feature is present. let Predicates = [HasVLX] in { defm PSZ128 : avx512_sqrt_packed, + sched.XMM, v4f32x_info>, EVEX_V128, PS, EVEX_CD8<32, CD8VF>; defm PSZ256 : avx512_sqrt_packed, + sched.YMM, v8f32x_info>, EVEX_V256, PS, EVEX_CD8<32, CD8VF>; defm PDZ128 : avx512_sqrt_packed, + sched.XMM, v2f64x_info>, EVEX_V128, VEX_W, PD, EVEX_CD8<64, CD8VF>; defm PDZ256 : avx512_sqrt_packed, + sched.YMM, v4f64x_info>, EVEX_V256, VEX_W, PD, EVEX_CD8<64, CD8VF>; } } -multiclass avx512_sqrt_packed_all_round opc, string OpcodeStr> { - defm PSZ : avx512_sqrt_packed_round opc, string OpcodeStr, + X86SchedWriteWidths sched> { + defm PSZ : avx512_sqrt_packed_round, EVEX_V512, PS, EVEX_CD8<32, CD8VF>; - defm PDZ : avx512_sqrt_packed_round, EVEX_V512, VEX_W, PD, EVEX_CD8<64, CD8VF>; } @@ -8153,20 +8159,21 @@ multiclass avx512_sqrt_scalar opc, string OpcodeStr, X86FoldableSchedWri } } -multiclass avx512_sqrt_scalar_all opc, string OpcodeStr> { - defm SSZ : avx512_sqrt_scalar opc, string OpcodeStr, + X86SchedWriteWidths sched> { + defm SSZ : avx512_sqrt_scalar, EVEX_CD8<32, CD8VT1>, EVEX_4V, XS, NotMemoryFoldable; - defm SDZ : avx512_sqrt_scalar, EVEX_CD8<64, CD8VT1>, EVEX_4V, XD, VEX_W, NotMemoryFoldable; } -defm VSQRT : avx512_sqrt_packed_all<0x51, "vsqrt">, - avx512_sqrt_packed_all_round<0x51, "vsqrt">; +defm VSQRT : avx512_sqrt_packed_all<0x51, "vsqrt", SchedWriteFSqrt>, + avx512_sqrt_packed_all_round<0x51, "vsqrt", SchedWriteFSqrt>; -defm VSQRT : avx512_sqrt_scalar_all<0x51, "vsqrt">, VEX_LIG; +defm VSQRT : avx512_sqrt_scalar_all<0x51, "vsqrt", SchedWriteFSqrt>, VEX_LIG; multiclass avx512_rndscale_scalar opc, string OpcodeStr, X86FoldableSchedWrite sched, X86VectorVTInfo _> { diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td index 90a62c5..550076d 100644 --- a/llvm/lib/Target/X86/X86InstrSSE.td +++ b/llvm/lib/Target/X86/X86InstrSSE.td @@ -2836,114 +2836,114 @@ multiclass avx_fp_unop_s opc, string OpcodeStr, RegisterClass RC, /// sse1_fp_unop_p - SSE1 unops in packed form. multiclass sse1_fp_unop_p opc, string OpcodeStr, SDNode OpNode, - X86FoldableSchedWrite sched, list prds> { + X86SchedWriteWidths sched, list prds> { let Predicates = prds in { def V#NAME#PSr : PSI, - VEX, Sched<[sched]>, VEX_WIG; + VEX, Sched<[sched.XMM]>, VEX_WIG; def V#NAME#PSm : PSI, - VEX, Sched<[sched.Folded]>, VEX_WIG; + VEX, Sched<[sched.XMM.Folded]>, VEX_WIG; def V#NAME#PSYr : PSI, - VEX, VEX_L, Sched<[sched]>, VEX_WIG; + VEX, VEX_L, Sched<[sched.YMM]>, VEX_WIG; def V#NAME#PSYm : PSI, - VEX, VEX_L, Sched<[sched.Folded]>, VEX_WIG; + VEX, VEX_L, Sched<[sched.YMM.Folded]>, VEX_WIG; } def PSr : PSI, - Sched<[sched]>; + Sched<[sched.XMM]>; def PSm : PSI, - Sched<[sched.Folded]>; + Sched<[sched.XMM.Folded]>; } /// sse2_fp_unop_p - SSE2 unops in vector forms. multiclass sse2_fp_unop_p opc, string OpcodeStr, - SDNode OpNode, X86FoldableSchedWrite sched> { + SDNode OpNode, X86SchedWriteWidths sched> { let Predicates = [HasAVX, NoVLX] in { def V#NAME#PDr : PDI, - VEX, Sched<[sched]>, VEX_WIG; + VEX, Sched<[sched.XMM]>, VEX_WIG; def V#NAME#PDm : PDI, - VEX, Sched<[sched.Folded]>, VEX_WIG; + VEX, Sched<[sched.XMM.Folded]>, VEX_WIG; def V#NAME#PDYr : PDI, - VEX, VEX_L, Sched<[sched]>, VEX_WIG; + VEX, VEX_L, Sched<[sched.YMM]>, VEX_WIG; def V#NAME#PDYm : PDI, - VEX, VEX_L, Sched<[sched.Folded]>, VEX_WIG; + VEX, VEX_L, Sched<[sched.YMM.Folded]>, VEX_WIG; } def PDr : PDI, - Sched<[sched]>; + Sched<[sched.XMM]>; def PDm : PDI, - Sched<[sched.Folded]>; + Sched<[sched.XMM.Folded]>; } multiclass sse1_fp_unop_s opc, string OpcodeStr, SDNode OpNode, - X86FoldableSchedWrite sched, Predicate AVXTarget> { + X86SchedWriteWidths sched, Predicate AVXTarget> { defm SS : sse_fp_unop_s("int_x86_sse_"##OpcodeStr##_ss), OpNode, - SSEPackedSingle, sched, UseSSE1, "SS">, XS; + SSEPackedSingle, sched.Scl, UseSSE1, "SS">, XS; defm V#NAME#SS : avx_fp_unop_s("int_x86_sse_"##OpcodeStr##_ss), OpNode, - SSEPackedSingle, sched, AVXTarget, "SS">, XS, VEX_4V, + SSEPackedSingle, sched.Scl, AVXTarget, "SS">, XS, VEX_4V, VEX_LIG, VEX_WIG, NotMemoryFoldable; } multiclass sse2_fp_unop_s opc, string OpcodeStr, SDNode OpNode, - X86FoldableSchedWrite sched, Predicate AVXTarget> { + X86SchedWriteWidths sched, Predicate AVXTarget> { defm SD : sse_fp_unop_s("int_x86_sse2_"##OpcodeStr##_sd), - OpNode, SSEPackedDouble, sched, UseSSE2, "SD">, XD; + OpNode, SSEPackedDouble, sched.Scl, UseSSE2, "SD">, XD; defm V#NAME#SD : avx_fp_unop_s("int_x86_sse2_"##OpcodeStr##_sd), - OpNode, SSEPackedDouble, sched, AVXTarget, "SD">, + OpNode, SSEPackedDouble, sched.Scl, AVXTarget, "SD">, XD, VEX_4V, VEX_LIG, VEX_WIG, NotMemoryFoldable; } // Square root. -defm SQRT : sse1_fp_unop_s<0x51, "sqrt", fsqrt, WriteFSqrt, UseAVX>, - sse1_fp_unop_p<0x51, "sqrt", fsqrt, WriteFSqrt, [HasAVX, NoVLX]>, - sse2_fp_unop_s<0x51, "sqrt", fsqrt, WriteFSqrt, UseAVX>, - sse2_fp_unop_p<0x51, "sqrt", fsqrt, WriteFSqrt>; +defm SQRT : sse1_fp_unop_s<0x51, "sqrt", fsqrt, SchedWriteFSqrt, UseAVX>, + sse1_fp_unop_p<0x51, "sqrt", fsqrt, SchedWriteFSqrt, [HasAVX, NoVLX]>, + sse2_fp_unop_s<0x51, "sqrt", fsqrt, SchedWriteFSqrt, UseAVX>, + sse2_fp_unop_p<0x51, "sqrt", fsqrt, SchedWriteFSqrt>; // Reciprocal approximations. Note that these typically require refinement // in order to obtain suitable precision. -defm RSQRT : sse1_fp_unop_s<0x52, "rsqrt", X86frsqrt, WriteFRsqrt, HasAVX>, - sse1_fp_unop_p<0x52, "rsqrt", X86frsqrt, WriteFRsqrt, [HasAVX]>; -defm RCP : sse1_fp_unop_s<0x53, "rcp", X86frcp, WriteFRcp, HasAVX>, - sse1_fp_unop_p<0x53, "rcp", X86frcp, WriteFRcp, [HasAVX]>; +defm RSQRT : sse1_fp_unop_s<0x52, "rsqrt", X86frsqrt, SchedWriteFRsqrt, HasAVX>, + sse1_fp_unop_p<0x52, "rsqrt", X86frsqrt, SchedWriteFRsqrt, [HasAVX]>; +defm RCP : sse1_fp_unop_s<0x53, "rcp", X86frcp, SchedWriteFRcp, HasAVX>, + sse1_fp_unop_p<0x53, "rcp", X86frcp, SchedWriteFRcp, [HasAVX]>; // There is no f64 version of the reciprocal approximation instructions. diff --git a/llvm/lib/Target/X86/X86SchedBroadwell.td b/llvm/lib/Target/X86/X86SchedBroadwell.td index 067cc9f..50c2818d 100755 --- a/llvm/lib/Target/X86/X86SchedBroadwell.td +++ b/llvm/lib/Target/X86/X86SchedBroadwell.td @@ -161,9 +161,12 @@ defm : BWWriteResPair; // Floating point c defm : BWWriteResPair; // Floating point compare to flags. defm : BWWriteResPair; // Floating point multiplication. defm : BWWriteResPair; // 10-14 cycles. // Floating point division. -defm : BWWriteResPair; // Floating point square root. -defm : BWWriteResPair; // Floating point reciprocal estimate. -defm : BWWriteResPair; // Floating point reciprocal square root estimate. +defm : BWWriteResPair; // Floating point square root. +defm : BWWriteResPair; // Floating point square root (YMM/ZMM). +defm : BWWriteResPair; // Floating point reciprocal estimate. +defm : BWWriteResPair; // Floating point reciprocal estimate (YMM/ZMM). +defm : BWWriteResPair; // Floating point reciprocal square root estimate. +defm : BWWriteResPair; // Floating point reciprocal square root estimate (YMM/ZMM). defm : BWWriteResPair; // Fused Multiply Add. defm : BWWriteResPair; // Fused Multiply Add (Scalar). defm : BWWriteResPair; // Fused Multiply Add (YMM/ZMM). diff --git a/llvm/lib/Target/X86/X86SchedHaswell.td b/llvm/lib/Target/X86/X86SchedHaswell.td index 4e9fe82..c7634fb 100644 --- a/llvm/lib/Target/X86/X86SchedHaswell.td +++ b/llvm/lib/Target/X86/X86SchedHaswell.td @@ -155,9 +155,12 @@ defm : HWWriteResPair; defm : HWWriteResPair; defm : HWWriteResPair; defm : HWWriteResPair; // 10-14 cycles. -defm : HWWriteResPair; -defm : HWWriteResPair; -defm : HWWriteResPair; +defm : HWWriteResPair; +defm : HWWriteResPair; +defm : HWWriteResPair; +defm : HWWriteResPair; +defm : HWWriteResPair; +defm : HWWriteResPair; defm : HWWriteResPair; defm : HWWriteResPair; defm : HWWriteResPair; diff --git a/llvm/lib/Target/X86/X86SchedSandyBridge.td b/llvm/lib/Target/X86/X86SchedSandyBridge.td index cadaf54..49b4d28 100644 --- a/llvm/lib/Target/X86/X86SchedSandyBridge.td +++ b/llvm/lib/Target/X86/X86SchedSandyBridge.td @@ -146,8 +146,11 @@ defm : SBWriteResPair; defm : SBWriteResPair; defm : SBWriteResPair; defm : SBWriteResPair; +defm : SBWriteResPair; defm : SBWriteResPair; -defm : SBWriteResPair; +defm : SBWriteResPair; +defm : SBWriteResPair; +defm : SBWriteResPair; defm : SBWriteResPair; defm : SBWriteResPair; defm : SBWriteResPair; @@ -1525,6 +1528,7 @@ def SBWriteResGroup114 : SchedWriteRes<[SBPort1,SBPort23]> { let ResourceCycles = [2,1]; } def: InstRW<[SBWriteResGroup114], (instregex "(ADD|SUB|SUBR)_FI(16|32)m")>; + def SBWriteResGroup116 : SchedWriteRes<[SBPort0,SBFPDivider]> { let Latency = 14; let NumMicroOps = 1; diff --git a/llvm/lib/Target/X86/X86SchedSkylakeClient.td b/llvm/lib/Target/X86/X86SchedSkylakeClient.td index a423779..0b8e350 100644 --- a/llvm/lib/Target/X86/X86SchedSkylakeClient.td +++ b/llvm/lib/Target/X86/X86SchedSkylakeClient.td @@ -158,9 +158,12 @@ defm : SKLWriteResPair; // Floating point defm : SKLWriteResPair; // Floating point compare to flags. defm : SKLWriteResPair; // Floating point multiplication. defm : SKLWriteResPair; // 10-14 cycles. // Floating point division. -defm : SKLWriteResPair; // Floating point square root. -defm : SKLWriteResPair; // Floating point reciprocal estimate. -defm : SKLWriteResPair; // Floating point reciprocal square root estimate. +defm : SKLWriteResPair; // Floating point square root. +defm : SKLWriteResPair; // Floating point square root (YMM/ZMM). +defm : SKLWriteResPair; // Floating point reciprocal estimate. +defm : SKLWriteResPair; // Floating point reciprocal estimate (YMM/ZMM). +defm : SKLWriteResPair; // Floating point reciprocal square root estimate. +defm : SKLWriteResPair; // Floating point reciprocal square root estimate (YMM/ZMM). defm : SKLWriteResPair; // Fused Multiply Add. defm : SKLWriteResPair; // Fused Multiply Add (Scalar). defm : SKLWriteResPair; // Fused Multiply Add (YMM/ZMM). diff --git a/llvm/lib/Target/X86/X86SchedSkylakeServer.td b/llvm/lib/Target/X86/X86SchedSkylakeServer.td index 1fe3faa..29de898 100755 --- a/llvm/lib/Target/X86/X86SchedSkylakeServer.td +++ b/llvm/lib/Target/X86/X86SchedSkylakeServer.td @@ -158,9 +158,12 @@ defm : SKXWriteResPair; // Floating point defm : SKXWriteResPair; // Floating point compare to flags. defm : SKXWriteResPair; // Floating point multiplication. defm : SKXWriteResPair; // 10-14 cycles. // Floating point division. -defm : SKXWriteResPair; // Floating point square root. +defm : SKXWriteResPair; // Floating point square root. +defm : SKXWriteResPair; // Floating point square root (YMM/ZMM). defm : SKXWriteResPair; // Floating point reciprocal estimate. +defm : SKXWriteResPair; // Floating point reciprocal estimate (YMM/ZMM). defm : SKXWriteResPair; // Floating point reciprocal square root estimate. +defm : SKXWriteResPair; // Floating point reciprocal square root estimate (YMM/ZMM). defm : SKXWriteResPair; // Fused Multiply Add. defm : SKXWriteResPair; // Fused Multiply Add (Scalar). defm : SKXWriteResPair; // Fused Multiply Add (YMM/ZMM). @@ -3622,13 +3625,7 @@ def SKXWriteResGroup160 : SchedWriteRes<[SKXPort0,SKXPort23]> { let NumMicroOps = 2; let ResourceCycles = [1,1]; } -def: InstRW<[SKXWriteResGroup160], (instregex "MUL_F(32|64)m", - "VRCP14PDZ256m(b?)", - "VRCP14PSZ256m(b?)", - "VRCPPSYm", - "VRSQRT14PDZ256m(b?)", - "VRSQRT14PSZ256m(b?)", - "VRSQRTPSYm")>; +def: InstRW<[SKXWriteResGroup160], (instregex "MUL_F(32|64)m")>; def SKXWriteResGroup161 : SchedWriteRes<[SKXPort23,SKXPort015]> { let Latency = 11; diff --git a/llvm/lib/Target/X86/X86Schedule.td b/llvm/lib/Target/X86/X86Schedule.td index ad2974d..85dcfc1 100644 --- a/llvm/lib/Target/X86/X86Schedule.td +++ b/llvm/lib/Target/X86/X86Schedule.td @@ -98,8 +98,11 @@ defm WriteFCom : X86SchedWritePair; // Floating point compare to flags. defm WriteFMul : X86SchedWritePair; // Floating point multiplication. defm WriteFDiv : X86SchedWritePair; // Floating point division. defm WriteFSqrt : X86SchedWritePair; // Floating point square root. +defm WriteFSqrtY : X86SchedWritePair; // Floating point square root (YMM/ZMM). defm WriteFRcp : X86SchedWritePair; // Floating point reciprocal estimate. +defm WriteFRcpY : X86SchedWritePair; // Floating point reciprocal estimate (YMM/ZMM). defm WriteFRsqrt : X86SchedWritePair; // Floating point reciprocal square root estimate. +defm WriteFRsqrtY: X86SchedWritePair; // Floating point reciprocal square root estimate (YMM/ZMM). defm WriteFMA : X86SchedWritePair; // Fused Multiply Add. defm WriteFMAS : X86SchedWritePair; // Fused Multiply Add (Scalar). defm WriteFMAY : X86SchedWritePair; // Fused Multiply Add (YMM/ZMM). @@ -210,10 +213,12 @@ def SchedWriteFMul : X86SchedWriteWidths; def SchedWriteFDiv : X86SchedWriteWidths; +def SchedWriteFSqrt + : X86SchedWriteWidths; def SchedWriteFRcp - : X86SchedWriteWidths; + : X86SchedWriteWidths; def SchedWriteFRsqrt - : X86SchedWriteWidths; + : X86SchedWriteWidths; def SchedWriteFLogic : X86SchedWriteWidths; diff --git a/llvm/lib/Target/X86/X86ScheduleAtom.td b/llvm/lib/Target/X86/X86ScheduleAtom.td index 85f0b23..8e64bbd 100644 --- a/llvm/lib/Target/X86/X86ScheduleAtom.td +++ b/llvm/lib/Target/X86/X86ScheduleAtom.td @@ -209,9 +209,12 @@ defm : AtomWriteResPair; defm : AtomWriteResPair; defm : AtomWriteResPair; +defm : AtomWriteResPair; defm : AtomWriteResPair; +defm : AtomWriteResPair; defm : AtomWriteResPair; defm : AtomWriteResPair; +defm : AtomWriteResPair; defm : AtomWriteResPair; defm : AtomWriteResPair; defm : AtomWriteResPair; // NOTE: Doesn't exist on Atom. diff --git a/llvm/lib/Target/X86/X86ScheduleBtVer2.td b/llvm/lib/Target/X86/X86ScheduleBtVer2.td index 5edc060..a1b6358 100644 --- a/llvm/lib/Target/X86/X86ScheduleBtVer2.td +++ b/llvm/lib/Target/X86/X86ScheduleBtVer2.td @@ -325,9 +325,12 @@ defm : JWriteResFpuPair; // NOTE: Doesn't defm : JWriteResFpuPair; // NOTE: Doesn't exist on Jaguar. defm : JWriteResFpuPair; // NOTE: Doesn't exist on Jaguar. defm : JWriteResFpuPair; +defm : JWriteResYMMPair; defm : JWriteResFpuPair; +defm : JWriteResYMMPair; defm : JWriteResFpuPair; defm : JWriteResFpuPair; +defm : JWriteResYMMPair; defm : JWriteResFpuPair; defm : JWriteResFpuPair; defm : JWriteResYMMPair; @@ -587,14 +590,14 @@ def JWriteVMULYPS: SchedWriteRes<[JFPU1, JFPM]> { let ResourceCycles = [2, 2]; let NumMicroOps = 2; } -def : InstRW<[JWriteVMULYPS], (instrs VMULPSYrr, VRCPPSYr, VRSQRTPSYr)>; +def : InstRW<[JWriteVMULYPS], (instrs VMULPSYrr)>; def JWriteVMULYPSLd: SchedWriteRes<[JLAGU, JFPU1, JFPM]> { let Latency = 7; let ResourceCycles = [2, 2, 2]; let NumMicroOps = 2; } -def : InstRW<[JWriteVMULYPSLd, ReadAfterLd], (instrs VMULPSYrm, VRCPPSYm, VRSQRTPSYm)>; +def : InstRW<[JWriteVMULYPSLd, ReadAfterLd], (instrs VMULPSYrm)>; def JWriteVMULPD: SchedWriteRes<[JFPU1, JFPM]> { let Latency = 4; @@ -744,20 +747,6 @@ def JWriteVSQRTYPDLd: SchedWriteRes<[JLAGU, JFPU1, JFPM]> { } def : InstRW<[JWriteVSQRTYPDLd], (instrs VSQRTPDYm)>; -def JWriteVSQRTYPS: SchedWriteRes<[JFPU1, JFPM]> { - let Latency = 42; - let ResourceCycles = [2, 42]; - let NumMicroOps = 2; -} -def : InstRW<[JWriteVSQRTYPS], (instrs VSQRTPSYr)>; - -def JWriteVSQRTYPSLd: SchedWriteRes<[JLAGU, JFPU1, JFPM]> { - let Latency = 47; - let ResourceCycles = [2, 2, 42]; - let NumMicroOps = 2; -} -def : InstRW<[JWriteVSQRTYPSLd], (instrs VSQRTPSYm)>; - def JWriteJVZEROALL: SchedWriteRes<[]> { let Latency = 90; let NumMicroOps = 73; diff --git a/llvm/lib/Target/X86/X86ScheduleSLM.td b/llvm/lib/Target/X86/X86ScheduleSLM.td index a130788..cfd626f 100644 --- a/llvm/lib/Target/X86/X86ScheduleSLM.td +++ b/llvm/lib/Target/X86/X86ScheduleSLM.td @@ -137,8 +137,11 @@ defm : SLMWriteResPair; defm : SLMWriteResPair; defm : SLMWriteResPair; defm : SLMWriteResPair; +defm : SLMWriteResPair; defm : SLMWriteResPair; +defm : SLMWriteResPair; defm : SLMWriteResPair; +defm : SLMWriteResPair; defm : SLMWriteResPair; defm : SLMWriteResPair; defm : SLMWriteResPair; diff --git a/llvm/lib/Target/X86/X86ScheduleZnver1.td b/llvm/lib/Target/X86/X86ScheduleZnver1.td index 6265164..96947da 100644 --- a/llvm/lib/Target/X86/X86ScheduleZnver1.td +++ b/llvm/lib/Target/X86/X86ScheduleZnver1.td @@ -219,8 +219,11 @@ defm : ZnWriteResFpuPair; defm : ZnWriteResFpuPair; defm : ZnWriteResFpuPair; defm : ZnWriteResFpuPair; +defm : ZnWriteResFpuPair; defm : ZnWriteResFpuPair; +defm : ZnWriteResFpuPair; defm : ZnWriteResFpuPair; +defm : ZnWriteResFpuPair; def : WriteRes; // Vector integer operations which uses FPU units -- 2.7.4