From dbd1ae7ddd96c0b194117d93842b458abd10a75d Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Wed, 25 Apr 2018 13:07:58 +0000 Subject: [PATCH] [X86] Split WriteFMA into XMM, Scalar and YMM/ZMM scheduler classes This removes all the FMA InstRW overrides. If we ever get PR36924, then we can remove many of these declarations from models. llvm-svn: 330820 --- llvm/lib/Target/X86/X86InstrAVX512.td | 82 ++++++---- llvm/lib/Target/X86/X86InstrFMA.td | 233 ++++++++++++++------------- llvm/lib/Target/X86/X86SchedBroadwell.td | 12 +- llvm/lib/Target/X86/X86SchedHaswell.td | 10 +- llvm/lib/Target/X86/X86SchedSandyBridge.td | 2 + llvm/lib/Target/X86/X86SchedSkylakeClient.td | 8 +- llvm/lib/Target/X86/X86SchedSkylakeServer.td | 14 +- llvm/lib/Target/X86/X86Schedule.td | 2 + llvm/lib/Target/X86/X86ScheduleAtom.td | 2 + llvm/lib/Target/X86/X86ScheduleBtVer2.td | 2 + llvm/lib/Target/X86/X86ScheduleSLM.td | 2 + llvm/lib/Target/X86/X86ScheduleZnver1.td | 2 + 12 files changed, 201 insertions(+), 170 deletions(-) diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index 3c5ae58..6046541 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -6123,19 +6123,20 @@ let Predicates = [HasAVX512] in { // multiclass avx512_fma3p_213_rm opc, string OpcodeStr, SDNode OpNode, + X86FoldableSchedWrite sched, X86VectorVTInfo _, string Suff> { let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0 in { defm r: AVX512_maskable_3src, - AVX512FMA3Base, Sched<[WriteFMA]>; + AVX512FMA3Base, Sched<[sched]>; defm m: AVX512_maskable_3src, - AVX512FMA3Base, Sched<[WriteFMALd, ReadAfterLd]>; + AVX512FMA3Base, Sched<[sched.Folded, ReadAfterLd]>; defm mb: AVX512_maskable_3src opc, string OpcodeStr, SDNode OpNode, !strconcat("$src2, ${src3}", _.BroadcastStr ), (OpNode _.RC:$src2, _.RC:$src1,(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3)))), 1, 0>, - AVX512FMA3Base, EVEX_B, Sched<[WriteFMALd, ReadAfterLd]>; + AVX512FMA3Base, EVEX_B, Sched<[sched.Folded, ReadAfterLd]>; } } multiclass avx512_fma3_213_round opc, string OpcodeStr, SDNode OpNode, + X86FoldableSchedWrite sched, X86VectorVTInfo _, string Suff> { let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0 in defm rb: AVX512_maskable_3src, - AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[WriteFMA]>; + AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[sched]>; } multiclass avx512_fma3p_213_common opc, string OpcodeStr, SDNode OpNode, SDNode OpNodeRnd, AVX512VLVectorVTInfo _, string Suff> { let Predicates = [HasAVX512] in { - defm Z : avx512_fma3p_213_rm, - avx512_fma3_213_round, EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>; + defm Z : avx512_fma3p_213_rm, + avx512_fma3_213_round, + EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>; } let Predicates = [HasVLX, HasAVX512] in { - defm Z256 : avx512_fma3p_213_rm, + defm Z256 : avx512_fma3p_213_rm, EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>; - defm Z128 : avx512_fma3p_213_rm, + defm Z128 : avx512_fma3p_213_rm, EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>; } } @@ -6190,19 +6196,20 @@ defm VFNMSUB213 : avx512_fma3p_213_f<0xAE, "vfnmsub213", X86Fnmsub, X86FnmsubR multiclass avx512_fma3p_231_rm opc, string OpcodeStr, SDNode OpNode, + X86FoldableSchedWrite sched, X86VectorVTInfo _, string Suff> { let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0 in { defm r: AVX512_maskable_3src, AVX512FMA3Base, Sched<[WriteFMA]>; + vselect, 1>, AVX512FMA3Base, Sched<[sched]>; defm m: AVX512_maskable_3src, - AVX512FMA3Base, Sched<[WriteFMALd, ReadAfterLd]>; + AVX512FMA3Base, Sched<[sched.Folded, ReadAfterLd]>; defm mb: AVX512_maskable_3src opc, string OpcodeStr, SDNode OpNode, (_.VT (OpNode _.RC:$src2, (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src3))), _.RC:$src1)), 1, 0>, AVX512FMA3Base, EVEX_B, - Sched<[WriteFMALd, ReadAfterLd]>; + Sched<[sched.Folded, ReadAfterLd]>; } } multiclass avx512_fma3_231_round opc, string OpcodeStr, SDNode OpNode, + X86FoldableSchedWrite sched, X86VectorVTInfo _, string Suff> { let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0 in defm rb: AVX512_maskable_3src opc, string OpcodeStr, SDNode OpNode, OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc", (_.VT ( OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1, (i32 imm:$rc))), 1, 1, vselect, 1>, - AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[WriteFMA]>; + AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[sched]>; } multiclass avx512_fma3p_231_common opc, string OpcodeStr, SDNode OpNode, SDNode OpNodeRnd, AVX512VLVectorVTInfo _, string Suff> { let Predicates = [HasAVX512] in { - defm Z : avx512_fma3p_231_rm, - avx512_fma3_231_round, EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>; + defm Z : avx512_fma3p_231_rm, + avx512_fma3_231_round, + EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>; } let Predicates = [HasVLX, HasAVX512] in { - defm Z256 : avx512_fma3p_231_rm, + defm Z256 : avx512_fma3p_231_rm, EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>; - defm Z128 : avx512_fma3p_231_rm, + defm Z128 : avx512_fma3p_231_rm, EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>; } } @@ -6258,13 +6270,14 @@ defm VFNMADD231 : avx512_fma3p_231_f<0xBC, "vfnmadd231", X86Fnmadd, X86FnmaddR defm VFNMSUB231 : avx512_fma3p_231_f<0xBE, "vfnmsub231", X86Fnmsub, X86FnmsubRnd>; multiclass avx512_fma3p_132_rm opc, string OpcodeStr, SDNode OpNode, + X86FoldableSchedWrite sched, X86VectorVTInfo _, string Suff> { let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0 in { defm r: AVX512_maskable_3src, - AVX512FMA3Base, Sched<[WriteFMA]>; + AVX512FMA3Base, Sched<[sched]>; // Pattern is 312 order so that the load is in a different place from the // 213 and 231 patterns this helps tablegen's duplicate pattern detection. @@ -6272,7 +6285,7 @@ multiclass avx512_fma3p_132_rm opc, string OpcodeStr, SDNode OpNode, (ins _.RC:$src2, _.MemOp:$src3), OpcodeStr, "$src3, $src2", "$src2, $src3", (_.VT (OpNode (_.LdFrag addr:$src3), _.RC:$src1, _.RC:$src2)), 1, 0>, - AVX512FMA3Base, Sched<[WriteFMALd, ReadAfterLd]>; + AVX512FMA3Base, Sched<[sched.Folded, ReadAfterLd]>; // Pattern is 312 order so that the load is in a different place from the // 213 and 231 patterns this helps tablegen's duplicate pattern detection. @@ -6282,11 +6295,12 @@ multiclass avx512_fma3p_132_rm opc, string OpcodeStr, SDNode OpNode, "$src2, ${src3}"##_.BroadcastStr, (_.VT (OpNode (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src3))), _.RC:$src1, _.RC:$src2)), 1, 0>, - AVX512FMA3Base, EVEX_B, Sched<[WriteFMALd, ReadAfterLd]>; + AVX512FMA3Base, EVEX_B, Sched<[sched.Folded, ReadAfterLd]>; } } multiclass avx512_fma3_132_round opc, string OpcodeStr, SDNode OpNode, + X86FoldableSchedWrite sched, X86VectorVTInfo _, string Suff> { let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0 in defm rb: AVX512_maskable_3src opc, string OpcodeStr, SDNode OpNode, OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc", (_.VT ( OpNode _.RC:$src1, _.RC:$src3, _.RC:$src2, (i32 imm:$rc))), 1, 1, vselect, 1>, - AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[WriteFMA]>; + AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[sched]>; } multiclass avx512_fma3p_132_common opc, string OpcodeStr, SDNode OpNode, SDNode OpNodeRnd, AVX512VLVectorVTInfo _, string Suff> { let Predicates = [HasAVX512] in { - defm Z : avx512_fma3p_132_rm, - avx512_fma3_132_round, EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>; + defm Z : avx512_fma3p_132_rm, + avx512_fma3_132_round, + EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>; } let Predicates = [HasVLX, HasAVX512] in { - defm Z256 : avx512_fma3p_132_rm, + defm Z256 : avx512_fma3p_132_rm, EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>; - defm Z128 : avx512_fma3p_132_rm, + defm Z128 : avx512_fma3p_132_rm, EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>; } } @@ -6336,29 +6354,29 @@ let Constraints = "$src1 = $dst", hasSideEffects = 0 in { defm r_Int: AVX512_maskable_3src_scalar, - AVX512FMA3Base, Sched<[WriteFMA]>; + AVX512FMA3Base, Sched<[WriteFMAS]>; defm m_Int: AVX512_maskable_3src_scalar, - AVX512FMA3Base, Sched<[WriteFMALd, ReadAfterLd]>; + AVX512FMA3Base, Sched<[WriteFMASLd, ReadAfterLd]>; defm rb_Int: AVX512_maskable_3src_scalar, - AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[WriteFMA]>; + AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[WriteFMAS]>; let isCodeGenOnly = 1, isCommutable = 1 in { def r : AVX512FMA3S, Sched<[WriteFMA]>; + !if(MaskOnlyReg, [], [RHS_r])>, Sched<[WriteFMAS]>; def m : AVX512FMA3S, Sched<[WriteFMALd, ReadAfterLd]>; + [RHS_m]>, Sched<[WriteFMASLd, ReadAfterLd]>; }// isCodeGenOnly = 1 }// Constraints = "$src1 = $dst" } diff --git a/llvm/lib/Target/X86/X86InstrFMA.td b/llvm/lib/Target/X86/X86InstrFMA.td index 3928f0f..e3b44c9 100644 --- a/llvm/lib/Target/X86/X86InstrFMA.td +++ b/llvm/lib/Target/X86/X86InstrFMA.td @@ -36,13 +36,13 @@ multiclass fma3p_rm_213 opc, string OpcodeStr, RegisterClass RC, ValueType VT, X86MemOperand x86memop, PatFrag MemFrag, - SDNode Op> { + SDNode Op, X86FoldableSchedWrite sched> { def r : FMA3, - Sched<[WriteFMA]>; + Sched<[sched]>; let mayLoad = 1 in def m : FMA3 opc, string OpcodeStr, RegisterClass RC, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), [(set RC:$dst, (VT (Op RC:$src2, RC:$src1, (MemFrag addr:$src3))))]>, - Sched<[WriteFMALd, ReadAfterLd, ReadAfterLd]>; + Sched<[sched.Folded, ReadAfterLd, ReadAfterLd]>; } multiclass fma3p_rm_231 opc, string OpcodeStr, RegisterClass RC, ValueType VT, X86MemOperand x86memop, PatFrag MemFrag, - SDNode Op> { + SDNode Op, X86FoldableSchedWrite sched> { let hasSideEffects = 0 in def r : FMA3, Sched<[WriteFMA]>; + []>, Sched<[sched]>; let mayLoad = 1 in def m : FMA3 opc, string OpcodeStr, RegisterClass RC, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), [(set RC:$dst, (VT (Op RC:$src2, (MemFrag addr:$src3), RC:$src1)))]>, - Sched<[WriteFMALd, ReadAfterLd, ReadAfterLd]>; + Sched<[sched.Folded, ReadAfterLd, ReadAfterLd]>; } multiclass fma3p_rm_132 opc, string OpcodeStr, RegisterClass RC, ValueType VT, X86MemOperand x86memop, PatFrag MemFrag, - SDNode Op> { + SDNode Op, X86FoldableSchedWrite sched> { let hasSideEffects = 0 in def r : FMA3, Sched<[WriteFMA]>; + []>, Sched<[sched]>; // Pattern is 312 order so that the load is in a different place from the // 213 and 231 patterns this helps tablegen's duplicate pattern detection. @@ -93,71 +93,77 @@ multiclass fma3p_rm_132 opc, string OpcodeStr, RegisterClass RC, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), [(set RC:$dst, (VT (Op (MemFrag addr:$src3), RC:$src1, RC:$src2)))]>, - Sched<[WriteFMALd, ReadAfterLd, ReadAfterLd]>; + Sched<[sched.Folded, ReadAfterLd, ReadAfterLd]>; } let Constraints = "$src1 = $dst", hasSideEffects = 0, isCommutable = 1 in multiclass fma3p_forms opc132, bits<8> opc213, bits<8> opc231, string OpcodeStr, string PackTy, string Suff, PatFrag MemFrag128, PatFrag MemFrag256, - SDNode Op, ValueType OpTy128, ValueType OpTy256> { + SDNode Op, ValueType OpTy128, ValueType OpTy256, + X86FoldableSchedWrite sched128, + X86FoldableSchedWrite sched256> { defm NAME#213#Suff : fma3p_rm_213; + VR128, OpTy128, f128mem, MemFrag128, Op, sched128>; defm NAME#231#Suff : fma3p_rm_231; + VR128, OpTy128, f128mem, MemFrag128, Op, sched128>; defm NAME#132#Suff : fma3p_rm_132; + VR128, OpTy128, f128mem, MemFrag128, Op, sched128>; defm NAME#213#Suff#Y : fma3p_rm_213, + VR256, OpTy256, f256mem, MemFrag256, Op, sched256>, VEX_L; defm NAME#231#Suff#Y : fma3p_rm_231, + VR256, OpTy256, f256mem, MemFrag256, Op, sched256>, VEX_L; defm NAME#132#Suff#Y : fma3p_rm_132, + VR256, OpTy256, f256mem, MemFrag256, Op, sched256>, VEX_L; } // Fused Multiply-Add let ExeDomain = SSEPackedSingle in { defm VFMADD : fma3p_forms<0x98, 0xA8, 0xB8, "vfmadd", "ps", "PS", - loadv4f32, loadv8f32, X86Fmadd, v4f32, v8f32>; + loadv4f32, loadv8f32, X86Fmadd, v4f32, v8f32, + WriteFMA, WriteFMAY>; defm VFMSUB : fma3p_forms<0x9A, 0xAA, 0xBA, "vfmsub", "ps", "PS", - loadv4f32, loadv8f32, X86Fmsub, v4f32, v8f32>; + loadv4f32, loadv8f32, X86Fmsub, v4f32, v8f32, + WriteFMA, WriteFMAY>; defm VFMADDSUB : fma3p_forms<0x96, 0xA6, 0xB6, "vfmaddsub", "ps", "PS", - loadv4f32, loadv8f32, X86Fmaddsub, v4f32, v8f32>; + loadv4f32, loadv8f32, X86Fmaddsub, v4f32, v8f32, + WriteFMA, WriteFMAY>; defm VFMSUBADD : fma3p_forms<0x97, 0xA7, 0xB7, "vfmsubadd", "ps", "PS", - loadv4f32, loadv8f32, X86Fmsubadd, v4f32, v8f32>; + loadv4f32, loadv8f32, X86Fmsubadd, v4f32, v8f32, + WriteFMA, WriteFMAY>; } let ExeDomain = SSEPackedDouble in { defm VFMADD : fma3p_forms<0x98, 0xA8, 0xB8, "vfmadd", "pd", "PD", loadv2f64, loadv4f64, X86Fmadd, v2f64, - v4f64>, VEX_W; + v4f64, WriteFMA, WriteFMAY>, VEX_W; defm VFMSUB : fma3p_forms<0x9A, 0xAA, 0xBA, "vfmsub", "pd", "PD", loadv2f64, loadv4f64, X86Fmsub, v2f64, - v4f64>, VEX_W; + v4f64, WriteFMA, WriteFMAY>, VEX_W; defm VFMADDSUB : fma3p_forms<0x96, 0xA6, 0xB6, "vfmaddsub", "pd", "PD", loadv2f64, loadv4f64, X86Fmaddsub, - v2f64, v4f64>, VEX_W; + v2f64, v4f64, WriteFMA, WriteFMAY>, VEX_W; defm VFMSUBADD : fma3p_forms<0x97, 0xA7, 0xB7, "vfmsubadd", "pd", "PD", loadv2f64, loadv4f64, X86Fmsubadd, - v2f64, v4f64>, VEX_W; + v2f64, v4f64, WriteFMA, WriteFMAY>, VEX_W; } // Fused Negative Multiply-Add let ExeDomain = SSEPackedSingle in { defm VFNMADD : fma3p_forms<0x9C, 0xAC, 0xBC, "vfnmadd", "ps", "PS", loadv4f32, - loadv8f32, X86Fnmadd, v4f32, v8f32>; + loadv8f32, X86Fnmadd, v4f32, v8f32, WriteFMA, WriteFMAY>; defm VFNMSUB : fma3p_forms<0x9E, 0xAE, 0xBE, "vfnmsub", "ps", "PS", loadv4f32, - loadv8f32, X86Fnmsub, v4f32, v8f32>; + loadv8f32, X86Fnmsub, v4f32, v8f32, WriteFMA, WriteFMAY>; } let ExeDomain = SSEPackedDouble in { defm VFNMADD : fma3p_forms<0x9C, 0xAC, 0xBC, "vfnmadd", "pd", "PD", loadv2f64, - loadv4f64, X86Fnmadd, v2f64, v4f64>, VEX_W; + loadv4f64, X86Fnmadd, v2f64, v4f64, WriteFMA, WriteFMAY>, VEX_W; defm VFNMSUB : fma3p_forms<0x9E, 0xAE, 0xBE, "vfnmsub", "pd", "PD", loadv2f64, - loadv4f64, X86Fnmsub, v2f64, v4f64>, VEX_W; + loadv4f64, X86Fnmsub, v2f64, v4f64, WriteFMA, WriteFMAY>, VEX_W; } // All source register operands of FMA opcodes defined in fma3s_rm multiclass @@ -171,13 +177,14 @@ let ExeDomain = SSEPackedDouble in { // defining FMA3 opcodes above. multiclass fma3s_rm_213 opc, string OpcodeStr, X86MemOperand x86memop, RegisterClass RC, - SDPatternOperator OpNode> { + SDPatternOperator OpNode, + X86FoldableSchedWrite sched> { def r : FMA3S, - Sched<[WriteFMA]>; + Sched<[sched]>; let mayLoad = 1 in def m : FMA3S opc, string OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), [(set RC:$dst, (OpNode RC:$src2, RC:$src1, (load addr:$src3)))]>, - Sched<[WriteFMALd, ReadAfterLd, ReadAfterLd]>; + Sched<[sched.Folded, ReadAfterLd, ReadAfterLd]>; } multiclass fma3s_rm_231 opc, string OpcodeStr, X86MemOperand x86memop, RegisterClass RC, - SDPatternOperator OpNode> { + SDPatternOperator OpNode, X86FoldableSchedWrite sched> { let hasSideEffects = 0 in def r : FMA3S, Sched<[WriteFMA]>; + []>, Sched<[sched]>; let mayLoad = 1 in def m : FMA3S opc, string OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), [(set RC:$dst, (OpNode RC:$src2, (load addr:$src3), RC:$src1))]>, - Sched<[WriteFMALd, ReadAfterLd, ReadAfterLd]>; + Sched<[sched.Folded, ReadAfterLd, ReadAfterLd]>; } multiclass fma3s_rm_132 opc, string OpcodeStr, X86MemOperand x86memop, RegisterClass RC, - SDPatternOperator OpNode> { + SDPatternOperator OpNode, X86FoldableSchedWrite sched> { let hasSideEffects = 0 in def r : FMA3S, Sched<[WriteFMA]>; + []>, Sched<[sched]>; // Pattern is 312 order so that the load is in a different place from the // 213 and 231 patterns this helps tablegen's duplicate pattern detection. @@ -228,20 +235,20 @@ multiclass fma3s_rm_132 opc, string OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), [(set RC:$dst, (OpNode (load addr:$src3), RC:$src1, RC:$src2))]>, - Sched<[WriteFMALd, ReadAfterLd, ReadAfterLd]>; + Sched<[sched.Folded, ReadAfterLd, ReadAfterLd]>; } let Constraints = "$src1 = $dst", isCommutable = 1, hasSideEffects = 0 in multiclass fma3s_forms opc132, bits<8> opc213, bits<8> opc231, string OpStr, string PackTy, string Suff, SDNode OpNode, RegisterClass RC, - X86MemOperand x86memop> { + X86MemOperand x86memop, X86FoldableSchedWrite sched> { defm NAME#213#Suff : fma3s_rm_213; + x86memop, RC, OpNode, sched>; defm NAME#231#Suff : fma3s_rm_231; + x86memop, RC, OpNode, sched>; defm NAME#132#Suff : fma3s_rm_132; + x86memop, RC, OpNode, sched>; } // These FMA*_Int instructions are defined specially for being used when @@ -260,19 +267,20 @@ multiclass fma3s_forms opc132, bits<8> opc213, bits<8> opc231, let Constraints = "$src1 = $dst", isCommutable = 1, isCodeGenOnly = 1, hasSideEffects = 0 in multiclass fma3s_rm_int opc, string OpcodeStr, - Operand memopr, RegisterClass RC> { + Operand memopr, RegisterClass RC, + X86FoldableSchedWrite sched> { def r_Int : FMA3S_Int, Sched<[WriteFMA]>; + []>, Sched<[sched]>; let mayLoad = 1 in def m_Int : FMA3S_Int, Sched<[WriteFMALd, ReadAfterLd, ReadAfterLd]>; + []>, Sched<[sched.Folded, ReadAfterLd, ReadAfterLd]>; } // The FMA 213 form is created for lowering of scalar FMA intrinscis @@ -286,28 +294,30 @@ multiclass fma3s_rm_int opc, string OpcodeStr, // such analysis will be implemented eventually. multiclass fma3s_int_forms opc132, bits<8> opc213, bits<8> opc231, string OpStr, string PackTy, string Suff, - RegisterClass RC, Operand memop> { + RegisterClass RC, Operand memop, + X86FoldableSchedWrite sched> { defm NAME#132#Suff : fma3s_rm_int; + memop, RC, sched>; defm NAME#213#Suff : fma3s_rm_int; + memop, RC, sched>; defm NAME#231#Suff : fma3s_rm_int; + memop, RC, sched>; } multiclass fma3s opc132, bits<8> opc213, bits<8> opc231, - string OpStr, SDNode OpNodeIntrin, SDNode OpNode> { + string OpStr, SDNode OpNodeIntrin, SDNode OpNode, + X86FoldableSchedWrite sched> { let ExeDomain = SSEPackedSingle in defm NAME : fma3s_forms, + FR32, f32mem, sched>, fma3s_int_forms; + VR128, ssmem, sched>; let ExeDomain = SSEPackedDouble in defm NAME : fma3s_forms, + FR64, f64mem, sched>, fma3s_int_forms, VEX_W; + VR128, sdmem, sched>, VEX_W; // These patterns use the 123 ordering, instead of 213, even though // they match the intrinsic to the 213 version of the instruction. @@ -345,23 +355,23 @@ multiclass fma3s opc132, bits<8> opc213, bits<8> opc231, } } -defm VFMADD : fma3s<0x99, 0xA9, 0xB9, "vfmadd", X86Fmadds1, X86Fmadd>, VEX_LIG; -defm VFMSUB : fma3s<0x9B, 0xAB, 0xBB, "vfmsub", X86Fmsubs1, X86Fmsub>, VEX_LIG; - -defm VFNMADD : fma3s<0x9D, 0xAD, 0xBD, "vfnmadd", X86Fnmadds1, X86Fnmadd>, - VEX_LIG; -defm VFNMSUB : fma3s<0x9F, 0xAF, 0xBF, "vfnmsub", X86Fnmsubs1, X86Fnmsub>, - VEX_LIG; +defm VFMADD : fma3s<0x99, 0xA9, 0xB9, "vfmadd", X86Fmadds1, X86Fmadd, + WriteFMAS>, VEX_LIG; +defm VFMSUB : fma3s<0x9B, 0xAB, 0xBB, "vfmsub", X86Fmsubs1, X86Fmsub, + WriteFMAS>, VEX_LIG; +defm VFNMADD : fma3s<0x9D, 0xAD, 0xBD, "vfnmadd", X86Fnmadds1, X86Fnmadd, + WriteFMAS>, VEX_LIG; +defm VFNMSUB : fma3s<0x9F, 0xAF, 0xBF, "vfnmsub", X86Fnmsubs1, X86Fnmsub, + WriteFMAS>, VEX_LIG; //===----------------------------------------------------------------------===// // FMA4 - AMD 4 operand Fused Multiply-Add instructions //===----------------------------------------------------------------------===// - multiclass fma4s opc, string OpcodeStr, RegisterClass RC, X86MemOperand x86memop, ValueType OpVT, SDNode OpNode, - PatFrag mem_frag> { + PatFrag mem_frag, X86FoldableSchedWrite sched> { let isCommutable = 1 in def rr : FMA4S opc, string OpcodeStr, RegisterClass RC, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2, RC:$src3)))]>, VEX_W, VEX_LIG, - Sched<[WriteFMA]>; + Sched<[sched]>; def rm : FMA4S, VEX_W, VEX_LIG, - Sched<[WriteFMALd, ReadAfterLd, ReadAfterLd]>; + Sched<[sched.Folded, ReadAfterLd, ReadAfterLd]>; def mr : FMA4S, VEX_LIG, - Sched<[WriteFMALd, ReadAfterLd, + Sched<[sched.Folded, ReadAfterLd, // x86memop:$src2 ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault, @@ -395,11 +405,12 @@ let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in (ins RC:$src1, RC:$src2, RC:$src3), !strconcat(OpcodeStr, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), []>, - VEX_LIG, FoldGenData, Sched<[WriteFMA]>; + VEX_LIG, FoldGenData, Sched<[sched]>; } multiclass fma4s_int opc, string OpcodeStr, Operand memop, - ValueType VT, ComplexPattern mem_cpat, SDNode OpNode> { + ValueType VT, ComplexPattern mem_cpat, SDNode OpNode, + X86FoldableSchedWrite sched> { let isCodeGenOnly = 1 in { def rr_Int : FMA4S_Int, VEX_W, - VEX_LIG, Sched<[WriteFMA]>; + VEX_LIG, Sched<[sched]>; def rm_Int : FMA4S_Int, VEX_W, VEX_LIG, - Sched<[WriteFMALd, ReadAfterLd, ReadAfterLd]>; + Sched<[sched.Folded, ReadAfterLd, ReadAfterLd]>; def mr_Int : FMA4S_Int, - VEX_LIG, Sched<[WriteFMALd, ReadAfterLd, + VEX_LIG, Sched<[sched.Folded, ReadAfterLd, // memop:$src2 ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault, @@ -432,13 +443,15 @@ let hasSideEffects = 0 in (ins VR128:$src1, VR128:$src2, VR128:$src3), !strconcat(OpcodeStr, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), - []>, VEX_LIG, FoldGenData, Sched<[WriteFMA]>; + []>, VEX_LIG, FoldGenData, Sched<[sched]>; } // isCodeGenOnly = 1 } multiclass fma4p opc, string OpcodeStr, SDNode OpNode, ValueType OpVT128, ValueType OpVT256, - PatFrag ld_frag128, PatFrag ld_frag256> { + PatFrag ld_frag128, PatFrag ld_frag256, + X86FoldableSchedWrite sched128, + X86FoldableSchedWrite sched256> { let isCommutable = 1 in def rr : FMA4 opc, string OpcodeStr, SDNode OpNode, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), [(set VR128:$dst, (OpVT128 (OpNode VR128:$src1, VR128:$src2, VR128:$src3)))]>, - VEX_W, Sched<[WriteFMA]>; + VEX_W, Sched<[sched128]>; def rm : FMA4, VEX_W, - Sched<[WriteFMALd, ReadAfterLd, ReadAfterLd]>; + Sched<[sched128.Folded, ReadAfterLd, ReadAfterLd]>; def mr : FMA4, - Sched<[WriteFMALd, ReadAfterLd, + Sched<[sched128.Folded, ReadAfterLd, // f128mem:$src2 ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault, @@ -473,21 +486,21 @@ multiclass fma4p opc, string OpcodeStr, SDNode OpNode, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), [(set VR256:$dst, (OpVT256 (OpNode VR256:$src1, VR256:$src2, VR256:$src3)))]>, - VEX_W, VEX_L, Sched<[WriteFMA]>; + VEX_W, VEX_L, Sched<[sched256]>; def Yrm : FMA4, VEX_W, VEX_L, - Sched<[WriteFMALd, ReadAfterLd, ReadAfterLd]>; + Sched<[sched256.Folded, ReadAfterLd, ReadAfterLd]>; def Ymr : FMA4, VEX_L, - Sched<[WriteFMALd, ReadAfterLd, + Sched<[sched256.Folded, ReadAfterLd, // f256mem:$src2 ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault, @@ -499,74 +512,78 @@ let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in { (ins VR128:$src1, VR128:$src2, VR128:$src3), !strconcat(OpcodeStr, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), []>, - Sched<[WriteFMA]>, FoldGenData; + Sched<[sched128]>, FoldGenData; def Yrr_REV : FMA4, - VEX_L, Sched<[WriteFMA]>, FoldGenData; + VEX_L, Sched<[sched256]>, FoldGenData; } // isCodeGenOnly = 1 } let ExeDomain = SSEPackedSingle in { // Scalar Instructions - defm VFMADDSS4 : fma4s<0x6A, "vfmaddss", FR32, f32mem, f32, X86Fmadd, loadf32>, + defm VFMADDSS4 : fma4s<0x6A, "vfmaddss", FR32, f32mem, f32, X86Fmadd, loadf32, + WriteFMAS>, fma4s_int<0x6A, "vfmaddss", ssmem, v4f32, sse_load_f32, - X86Fmadd4s>; - defm VFMSUBSS4 : fma4s<0x6E, "vfmsubss", FR32, f32mem, f32, X86Fmsub, loadf32>, + X86Fmadd4s, WriteFMAS>; + defm VFMSUBSS4 : fma4s<0x6E, "vfmsubss", FR32, f32mem, f32, X86Fmsub, loadf32, + WriteFMAS>, fma4s_int<0x6E, "vfmsubss", ssmem, v4f32, sse_load_f32, - X86Fmsub4s>; + X86Fmsub4s, WriteFMAS>; defm VFNMADDSS4 : fma4s<0x7A, "vfnmaddss", FR32, f32mem, f32, - X86Fnmadd, loadf32>, + X86Fnmadd, loadf32, WriteFMAS>, fma4s_int<0x7A, "vfnmaddss", ssmem, v4f32, sse_load_f32, - X86Fnmadd4s>; + X86Fnmadd4s, WriteFMAS>; defm VFNMSUBSS4 : fma4s<0x7E, "vfnmsubss", FR32, f32mem, f32, - X86Fnmsub, loadf32>, + X86Fnmsub, loadf32, WriteFMAS>, fma4s_int<0x7E, "vfnmsubss", ssmem, v4f32, sse_load_f32, - X86Fnmsub4s>; + X86Fnmsub4s, WriteFMAS>; // Packed Instructions defm VFMADDPS4 : fma4p<0x68, "vfmaddps", X86Fmadd, v4f32, v8f32, - loadv4f32, loadv8f32>; + loadv4f32, loadv8f32, WriteFMA, WriteFMAY>; defm VFMSUBPS4 : fma4p<0x6C, "vfmsubps", X86Fmsub, v4f32, v8f32, - loadv4f32, loadv8f32>; + loadv4f32, loadv8f32, WriteFMA, WriteFMAY>; defm VFNMADDPS4 : fma4p<0x78, "vfnmaddps", X86Fnmadd, v4f32, v8f32, - loadv4f32, loadv8f32>; + loadv4f32, loadv8f32, WriteFMA, WriteFMAY>; defm VFNMSUBPS4 : fma4p<0x7C, "vfnmsubps", X86Fnmsub, v4f32, v8f32, - loadv4f32, loadv8f32>; + loadv4f32, loadv8f32, WriteFMA, WriteFMAY>; defm VFMADDSUBPS4 : fma4p<0x5C, "vfmaddsubps", X86Fmaddsub, v4f32, v8f32, - loadv4f32, loadv8f32>; + loadv4f32, loadv8f32, WriteFMA, WriteFMAY>; defm VFMSUBADDPS4 : fma4p<0x5E, "vfmsubaddps", X86Fmsubadd, v4f32, v8f32, - loadv4f32, loadv8f32>; + loadv4f32, loadv8f32, WriteFMA, WriteFMAY>; } let ExeDomain = SSEPackedDouble in { // Scalar Instructions - defm VFMADDSD4 : fma4s<0x6B, "vfmaddsd", FR64, f64mem, f64, X86Fmadd, loadf64>, + defm VFMADDSD4 : fma4s<0x6B, "vfmaddsd", FR64, f64mem, f64, X86Fmadd, loadf64, + WriteFMAS>, fma4s_int<0x6B, "vfmaddsd", sdmem, v2f64, sse_load_f64, - X86Fmadd4s>; - defm VFMSUBSD4 : fma4s<0x6F, "vfmsubsd", FR64, f64mem, f64, X86Fmsub, loadf64>, + X86Fmadd4s, WriteFMAS>; + defm VFMSUBSD4 : fma4s<0x6F, "vfmsubsd", FR64, f64mem, f64, X86Fmsub, loadf64, + WriteFMAS>, fma4s_int<0x6F, "vfmsubsd", sdmem, v2f64, sse_load_f64, - X86Fmsub4s>; + X86Fmsub4s, WriteFMAS>; defm VFNMADDSD4 : fma4s<0x7B, "vfnmaddsd", FR64, f64mem, f64, - X86Fnmadd, loadf64>, + X86Fnmadd, loadf64, WriteFMAS>, fma4s_int<0x7B, "vfnmaddsd", sdmem, v2f64, sse_load_f64, - X86Fnmadd4s>; + X86Fnmadd4s, WriteFMAS>; defm VFNMSUBSD4 : fma4s<0x7F, "vfnmsubsd", FR64, f64mem, f64, - X86Fnmsub, loadf64>, + X86Fnmsub, loadf64, WriteFMAS>, fma4s_int<0x7F, "vfnmsubsd", sdmem, v2f64, sse_load_f64, - X86Fnmsub4s>; + X86Fnmsub4s, WriteFMAS>; // Packed Instructions defm VFMADDPD4 : fma4p<0x69, "vfmaddpd", X86Fmadd, v2f64, v4f64, - loadv2f64, loadv4f64>; + loadv2f64, loadv4f64, WriteFMA, WriteFMAY>; defm VFMSUBPD4 : fma4p<0x6D, "vfmsubpd", X86Fmsub, v2f64, v4f64, - loadv2f64, loadv4f64>; + loadv2f64, loadv4f64, WriteFMA, WriteFMAY>; defm VFNMADDPD4 : fma4p<0x79, "vfnmaddpd", X86Fnmadd, v2f64, v4f64, - loadv2f64, loadv4f64>; + loadv2f64, loadv4f64, WriteFMA, WriteFMAY>; defm VFNMSUBPD4 : fma4p<0x7D, "vfnmsubpd", X86Fnmsub, v2f64, v4f64, - loadv2f64, loadv4f64>; + loadv2f64, loadv4f64, WriteFMA, WriteFMAY>; defm VFMADDSUBPD4 : fma4p<0x5D, "vfmaddsubpd", X86Fmaddsub, v2f64, v4f64, - loadv2f64, loadv4f64>; + loadv2f64, loadv4f64, WriteFMA, WriteFMAY>; defm VFMSUBADDPD4 : fma4p<0x5F, "vfmsubaddpd", X86Fmsubadd, v2f64, v4f64, - loadv2f64, loadv4f64>; + loadv2f64, loadv4f64, WriteFMA, WriteFMAY>; } diff --git a/llvm/lib/Target/X86/X86SchedBroadwell.td b/llvm/lib/Target/X86/X86SchedBroadwell.td index 15b07c3..bc20bd8 100755 --- a/llvm/lib/Target/X86/X86SchedBroadwell.td +++ b/llvm/lib/Target/X86/X86SchedBroadwell.td @@ -162,7 +162,9 @@ defm : BWWriteResPair; // 10-14 cycles. // Floating defm : BWWriteResPair; // Floating point square root. defm : BWWriteResPair; // Floating point reciprocal estimate. defm : BWWriteResPair; // Floating point reciprocal square root estimate. -defm : BWWriteResPair; // Fused Multiply Add. +defm : BWWriteResPair; // Fused Multiply Add. +defm : BWWriteResPair; // Fused Multiply Add (Scalar). +defm : BWWriteResPair; // Fused Multiply Add (YMM/ZMM). defm : BWWriteResPair; // Floating point fabs/fchs. defm : BWWriteResPair; // Floating point and/or/xor logicals. defm : BWWriteResPair; // Floating point vector shuffles. @@ -1616,14 +1618,6 @@ def: InstRW<[BWWriteResGroup123], (instregex "MUL_F32m", "VPMULUDQYrm", "VPSADBWYrm")>; -def BWWriteResGroup124 : SchedWriteRes<[BWPort01,BWPort23]> { - let Latency = 11; - let NumMicroOps = 2; - let ResourceCycles = [1,1]; -} -def: InstRW<[BWWriteResGroup124], - (instregex "VF(N)?M(ADD|SUB|ADDSUB|SUBADD)(132|213|231)P(D|S)Ym")>; - def BWWriteResGroup126 : SchedWriteRes<[BWPort0,BWPort015]> { let Latency = 11; let NumMicroOps = 3; diff --git a/llvm/lib/Target/X86/X86SchedHaswell.td b/llvm/lib/Target/X86/X86SchedHaswell.td index a9ffba0..2c0d8d8 100644 --- a/llvm/lib/Target/X86/X86SchedHaswell.td +++ b/llvm/lib/Target/X86/X86SchedHaswell.td @@ -159,7 +159,9 @@ defm : HWWriteResPair; defm : HWWriteResPair; defm : HWWriteResPair; defm : HWWriteResPair; -defm : HWWriteResPair; +defm : HWWriteResPair; +defm : HWWriteResPair; +defm : HWWriteResPair; defm : HWWriteResPair; defm : HWWriteResPair; defm : HWWriteResPair; @@ -1927,8 +1929,7 @@ def HWWriteResGroup92 : SchedWriteRes<[HWPort01,HWPort23]> { let ResourceCycles = [1,1]; } def: InstRW<[HWWriteResGroup92], (instregex "(V?)MULPDrm", - "(V?)MULPSrm", - "VF(N)?M(ADD|SUB|ADDSUB|SUBADD)(132|213|231)P(D|S)m")>; + "(V?)MULPSrm")>; def HWWriteResGroup92_1 : SchedWriteRes<[HWPort01,HWPort23]> { let Latency = 12; @@ -1936,8 +1937,7 @@ def HWWriteResGroup92_1 : SchedWriteRes<[HWPort01,HWPort23]> { let ResourceCycles = [1,1]; } def: InstRW<[HWWriteResGroup92_1], (instregex "VMULPDYrm", - "VMULPSYrm", - "VF(N)?M(ADD|SUB|ADDSUB|SUBADD)(132|213|231)P(D|S)Ym")>; + "VMULPSYrm")>; def HWWriteResGroup92_2 : SchedWriteRes<[HWPort01,HWPort23]> { let Latency = 10; diff --git a/llvm/lib/Target/X86/X86SchedSandyBridge.td b/llvm/lib/Target/X86/X86SchedSandyBridge.td index 8b1324c..426d8fa 100644 --- a/llvm/lib/Target/X86/X86SchedSandyBridge.td +++ b/llvm/lib/Target/X86/X86SchedSandyBridge.td @@ -314,6 +314,8 @@ defm : SBWriteResPair; defm : SBWriteResPair; defm : SBWriteResPair; defm : SBWriteResPair; +defm : SBWriteResPair; +defm : SBWriteResPair; // Remaining SNB instrs. diff --git a/llvm/lib/Target/X86/X86SchedSkylakeClient.td b/llvm/lib/Target/X86/X86SchedSkylakeClient.td index e05f58c..fb16fa8 100644 --- a/llvm/lib/Target/X86/X86SchedSkylakeClient.td +++ b/llvm/lib/Target/X86/X86SchedSkylakeClient.td @@ -159,7 +159,9 @@ defm : SKLWriteResPair; // 10-14 cycles. // Floatin defm : SKLWriteResPair; // Floating point square root. defm : SKLWriteResPair; // Floating point reciprocal estimate. defm : SKLWriteResPair; // Floating point reciprocal square root estimate. -defm : SKLWriteResPair; // Fused Multiply Add. +defm : SKLWriteResPair; // Fused Multiply Add. +defm : SKLWriteResPair; // Fused Multiply Add (Scalar). +defm : SKLWriteResPair; // Fused Multiply Add (YMM/ZMM). defm : SKLWriteResPair; // Floating point fabs/fchs. defm : SKLWriteResPair; // Floating point and/or/xor logicals. defm : SKLWriteResPair; // Floating point vector shuffles. @@ -1893,8 +1895,6 @@ def: InstRW<[SKLWriteResGroup134], (instregex "(V?)ADDPDrm", "(V?)PMULUDQrm", "(V?)SUBPDrm", "(V?)SUBPSrm")>; -def: InstRW<[SKLWriteResGroup134], - (instregex "VF(N)?M(ADD|SUB|ADDSUB|SUBADD)(132|213|231)P(D|S)m")>; def SKLWriteResGroup138 : SchedWriteRes<[SKLPort0,SKLPort5,SKLPort23]> { let Latency = 10; @@ -2006,8 +2006,6 @@ def: InstRW<[SKLWriteResGroup147], (instregex "VADDPDYrm", "VPMULUDQYrm", "VSUBPDYrm", "VSUBPSYrm")>; -def: InstRW<[SKLWriteResGroup147], - (instregex "VF(N)?M(ADD|SUB|ADDSUB|SUBADD)(132|213|231)P(D|S)Ym")>; def SKLWriteResGroup149 : SchedWriteRes<[SKLPort5,SKLPort23]> { let Latency = 11; diff --git a/llvm/lib/Target/X86/X86SchedSkylakeServer.td b/llvm/lib/Target/X86/X86SchedSkylakeServer.td index 441926b..509ebf2 100755 --- a/llvm/lib/Target/X86/X86SchedSkylakeServer.td +++ b/llvm/lib/Target/X86/X86SchedSkylakeServer.td @@ -159,7 +159,9 @@ defm : SKXWriteResPair; // 10-14 cycles. // Floatin defm : SKXWriteResPair; // Floating point square root. defm : SKXWriteResPair; // Floating point reciprocal estimate. defm : SKXWriteResPair; // Floating point reciprocal square root estimate. -defm : SKXWriteResPair; // Fused Multiply Add. +defm : SKXWriteResPair; // Fused Multiply Add. +defm : SKXWriteResPair; // Fused Multiply Add (Scalar). +defm : SKXWriteResPair; // Fused Multiply Add (YMM/ZMM). defm : SKXWriteResPair; // Floating point fabs/fchs. defm : SKXWriteResPair; // Floating point and/or/xor logicals. defm : SKXWriteResPair; // Floating point vector shuffles. @@ -3770,11 +3772,6 @@ def SKXWriteResGroup149 : SchedWriteRes<[SKXPort23,SKXPort015]> { let NumMicroOps = 2; let ResourceCycles = [1,1]; } -def: InstRW<[SKXWriteResGroup149], - (instregex - "VF(N)?M(ADD|SUB|ADDSUB|SUBADD)(132|213|231)P(D|S)Z128m(b?)", - "VF(N)?M(ADD|SUB|ADDSUB|SUBADD)(132|213|231)P(D|S)m", - "VF(N)?M(ADD|SUB)(132|213|231)S(D|S)Zm")>; def: InstRW<[SKXWriteResGroup149], (instregex "CVTDQ2PSrm", "CVTPS2DQrm", "CVTSS2SDrm", @@ -3932,11 +3929,6 @@ def SKXWriteResGroup161 : SchedWriteRes<[SKXPort23,SKXPort015]> { let NumMicroOps = 2; let ResourceCycles = [1,1]; } -def: InstRW<[SKXWriteResGroup161], - (instregex - "VF(N)?M(ADD|SUB|ADDSUB|SUBADD)(132|213|231)P(D|S)Ym", - "VF(N)?M(ADD|SUB|ADDSUB|SUBADD)(132|213|231)P(D|S)Z256m(b?)", - "VF(N)?M(ADD|SUB|ADDSUB|SUBADD)(132|213|231)P(D|S)Zm(b?)")>; def: InstRW<[SKXWriteResGroup161], (instregex "VADDPDYrm", "VADDPDZ256rm(b?)", "VADDPDZrm(b?)", diff --git a/llvm/lib/Target/X86/X86Schedule.td b/llvm/lib/Target/X86/X86Schedule.td index 379bf80..31d14f6 100644 --- a/llvm/lib/Target/X86/X86Schedule.td +++ b/llvm/lib/Target/X86/X86Schedule.td @@ -87,6 +87,8 @@ defm WriteFSqrt : X86SchedWritePair; // Floating point square root. defm WriteFRcp : X86SchedWritePair; // Floating point reciprocal estimate. defm WriteFRsqrt : X86SchedWritePair; // Floating point reciprocal square root estimate. defm WriteFMA : X86SchedWritePair; // Fused Multiply Add. +defm WriteFMAS : X86SchedWritePair; // Fused Multiply Add (Scalar). +defm WriteFMAY : X86SchedWritePair; // Fused Multiply Add (YMM/ZMM). defm WriteFSign : X86SchedWritePair; // Floating point fabs/fchs. defm WriteFLogic : X86SchedWritePair; // Floating point and/or/xor logicals. defm WriteFShuffle : X86SchedWritePair; // Floating point vector shuffles. diff --git a/llvm/lib/Target/X86/X86ScheduleAtom.td b/llvm/lib/Target/X86/X86ScheduleAtom.td index fcb32ef..87b43ee 100644 --- a/llvm/lib/Target/X86/X86ScheduleAtom.td +++ b/llvm/lib/Target/X86/X86ScheduleAtom.td @@ -215,6 +215,8 @@ defm : AtomWriteResPair; defm : AtomWriteResPair; defm : AtomWriteResPair; // NOTE: Doesn't exist on Atom. defm : AtomWriteResPair; // NOTE: Doesn't exist on Atom. +defm : AtomWriteResPair; // NOTE: Doesn't exist on Atom. +defm : AtomWriteResPair; // NOTE: Doesn't exist on Atom. defm : AtomWriteResPair; // NOTE: Doesn't exist on Atom. defm : AtomWriteResPair; // NOTE: Doesn't exist on Atom. defm : AtomWriteResPair; // NOTE: Doesn't exist on Atom. diff --git a/llvm/lib/Target/X86/X86ScheduleBtVer2.td b/llvm/lib/Target/X86/X86ScheduleBtVer2.td index 44f8b8b..6f8eb9b 100644 --- a/llvm/lib/Target/X86/X86ScheduleBtVer2.td +++ b/llvm/lib/Target/X86/X86ScheduleBtVer2.td @@ -301,6 +301,8 @@ defm : JWriteResFpuPair; defm : JWriteResFpuPair; defm : JWriteResFpuPair; defm : JWriteResFpuPair; // NOTE: Doesn't exist on Jaguar. +defm : JWriteResFpuPair; // NOTE: Doesn't exist on Jaguar. +defm : JWriteResFpuPair; // NOTE: Doesn't exist on Jaguar. defm : JWriteResFpuPair; defm : JWriteResFpuPair; defm : JWriteResFpuPair; diff --git a/llvm/lib/Target/X86/X86ScheduleSLM.td b/llvm/lib/Target/X86/X86ScheduleSLM.td index 7b0c085..8663d2f 100644 --- a/llvm/lib/Target/X86/X86ScheduleSLM.td +++ b/llvm/lib/Target/X86/X86ScheduleSLM.td @@ -284,6 +284,8 @@ defm : SLMWriteResPair; defm : SLMWriteResPair; defm : SLMWriteResPair; defm : SLMWriteResPair; +defm : SLMWriteResPair; +defm : SLMWriteResPair; // Instruction overrides diff --git a/llvm/lib/Target/X86/X86ScheduleZnver1.td b/llvm/lib/Target/X86/X86ScheduleZnver1.td index f65aa75..ee1a004 100644 --- a/llvm/lib/Target/X86/X86ScheduleZnver1.td +++ b/llvm/lib/Target/X86/X86ScheduleZnver1.td @@ -208,6 +208,8 @@ defm : ZnWriteResFpuPair; defm : ZnWriteResFpuPair; defm : ZnWriteResFpuPair; defm : ZnWriteResFpuPair; +defm : ZnWriteResFpuPair; +defm : ZnWriteResFpuPair; defm : ZnWriteResFpuPair; defm : ZnWriteResFpuPair; defm : ZnWriteResFpuPair; -- 2.7.4