From 97160be53d46833b38b1d9310a9800fe85f83055 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Mon, 27 Nov 2017 10:41:32 +0000 Subject: [PATCH] [X86][FMA] Tag all FMA/FMA4 instructions with WriteFMA schedule class As mentioned on PR17367, many instructions are missing scheduling tags preventing us from setting 'CompleteModel = 1' for better instruction analysis. This patch deals with FMA/FMA4 which is one of the bigger offenders (along with AVX512 in general). Annoyingly all scheduler models need to define WriteFMA (now that its actually used), even for older targets without FMA/FMA4 support, but that is an existing problem shared by other schedule classes. Differential Revision: https://reviews.llvm.org/D40351 llvm-svn: 319016 --- llvm/lib/Target/X86/X86InstrAVX512.td | 38 +++++++------- llvm/lib/Target/X86/X86InstrFMA.td | 74 +++++++++++++++++----------- llvm/lib/Target/X86/X86SchedBroadwell.td | 2 +- llvm/lib/Target/X86/X86SchedHaswell.td | 1 + llvm/lib/Target/X86/X86SchedSandyBridge.td | 3 +- llvm/lib/Target/X86/X86SchedSkylakeClient.td | 2 +- llvm/lib/Target/X86/X86SchedSkylakeServer.td | 2 +- llvm/lib/Target/X86/X86ScheduleBtVer2.td | 1 + llvm/lib/Target/X86/X86ScheduleSLM.td | 3 +- llvm/lib/Target/X86/X86ScheduleZnver1.td | 1 + 10 files changed, 75 insertions(+), 52 deletions(-) diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index 1f2e719..c30faed 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -5790,13 +5790,13 @@ multiclass avx512_fma3p_213_rm opc, string OpcodeStr, SDNode OpNode, (ins _.RC:$src2, _.RC:$src3), OpcodeStr, "$src3, $src2", "$src2, $src3", (_.VT (OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3)), 1, 1>, - AVX512FMA3Base; + AVX512FMA3Base, Sched<[WriteFMA]>; defm m: AVX512_maskable_3src, - AVX512FMA3Base; + AVX512FMA3Base, Sched<[WriteFMA, ReadAfterLd]>; defm mb: AVX512_maskable_3src opc, string OpcodeStr, SDNode OpNode, !strconcat("$src2, ${src3}", _.BroadcastStr ), (OpNode _.RC:$src2, _.RC:$src1,(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3)))), 1, 0>, - AVX512FMA3Base, EVEX_B; + AVX512FMA3Base, EVEX_B, Sched<[WriteFMA, ReadAfterLd]>; } } @@ -5815,7 +5815,7 @@ multiclass avx512_fma3_213_round opc, string OpcodeStr, SDNode OpNode, (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc), OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc", (_.VT ( OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i32 imm:$rc))), 1, 1>, - AVX512FMA3Base, EVEX_B, EVEX_RC; + AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[WriteFMA]>; } multiclass avx512_fma3p_213_common opc, string OpcodeStr, SDNode OpNode, @@ -5857,13 +5857,13 @@ multiclass avx512_fma3p_231_rm opc, string OpcodeStr, SDNode OpNode, (ins _.RC:$src2, _.RC:$src3), OpcodeStr, "$src3, $src2", "$src2, $src3", (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1)), 1, 1, vselect, 1>, - AVX512FMA3Base; + AVX512FMA3Base, Sched<[WriteFMA]>; defm m: AVX512_maskable_3src, - AVX512FMA3Base; + AVX512FMA3Base, Sched<[WriteFMA, ReadAfterLd]>; defm mb: AVX512_maskable_3src opc, string OpcodeStr, SDNode OpNode, "$src2, ${src3}"##_.BroadcastStr, (_.VT (OpNode _.RC:$src2, (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src3))), - _.RC:$src1)), 1, 0>, AVX512FMA3Base, EVEX_B; + _.RC:$src1)), 1, 0>, AVX512FMA3Base, EVEX_B, + Sched<[WriteFMA, ReadAfterLd]>; } } @@ -5883,7 +5884,7 @@ multiclass avx512_fma3_231_round opc, string OpcodeStr, SDNode OpNode, OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc", (_.VT ( OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1, (i32 imm:$rc))), 1, 1, vselect, 1>, - AVX512FMA3Base, EVEX_B, EVEX_RC; + AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[WriteFMA]>; } multiclass avx512_fma3p_231_common opc, string OpcodeStr, SDNode OpNode, @@ -5924,7 +5925,7 @@ multiclass avx512_fma3p_132_rm opc, string OpcodeStr, SDNode OpNode, (ins _.RC:$src2, _.RC:$src3), OpcodeStr, "$src3, $src2", "$src2, $src3", (_.VT (OpNode _.RC:$src1, _.RC:$src3, _.RC:$src2)), 1, 1, vselect, 1>, - AVX512FMA3Base; + AVX512FMA3Base, Sched<[WriteFMA]>; // Pattern is 312 order so that the load is in a different place from the // 213 and 231 patterns this helps tablegen's duplicate pattern detection. @@ -5932,7 +5933,7 @@ multiclass avx512_fma3p_132_rm opc, string OpcodeStr, SDNode OpNode, (ins _.RC:$src2, _.MemOp:$src3), OpcodeStr, "$src3, $src2", "$src2, $src3", (_.VT (OpNode (_.LdFrag addr:$src3), _.RC:$src1, _.RC:$src2)), 1, 0>, - AVX512FMA3Base; + AVX512FMA3Base, Sched<[WriteFMA, ReadAfterLd]>; // Pattern is 312 order so that the load is in a different place from the // 213 and 231 patterns this helps tablegen's duplicate pattern detection. @@ -5941,7 +5942,8 @@ multiclass avx512_fma3p_132_rm opc, string OpcodeStr, SDNode OpNode, OpcodeStr, "${src3}"##_.BroadcastStr##", $src2", "$src2, ${src3}"##_.BroadcastStr, (_.VT (OpNode (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src3))), - _.RC:$src1, _.RC:$src2)), 1, 0>, AVX512FMA3Base, EVEX_B; + _.RC:$src1, _.RC:$src2)), 1, 0>, AVX512FMA3Base, EVEX_B, + Sched<[WriteFMA, ReadAfterLd]>; } } @@ -5953,7 +5955,7 @@ multiclass avx512_fma3_132_round opc, string OpcodeStr, SDNode OpNode, OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc", (_.VT ( OpNode _.RC:$src1, _.RC:$src3, _.RC:$src2, (i32 imm:$rc))), 1, 1, vselect, 1>, - AVX512FMA3Base, EVEX_B, EVEX_RC; + AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[WriteFMA]>; } multiclass avx512_fma3p_132_common opc, string OpcodeStr, SDNode OpNode, @@ -5994,28 +5996,30 @@ multiclass avx512_fma3s_common opc, string OpcodeStr, X86VectorVTInfo _, let Constraints = "$src1 = $dst", hasSideEffects = 0 in { defm r_Int: AVX512_maskable_3src_scalar, AVX512FMA3Base; + "$src3, $src2", "$src2, $src3", RHS_VEC_r, 1, 1>, AVX512FMA3Base, + Sched<[WriteFMA]>; defm m_Int: AVX512_maskable_3src_scalar, AVX512FMA3Base; + "$src3, $src2", "$src2, $src3", RHS_VEC_m, 1, 1>, AVX512FMA3Base, + Sched<[WriteFMA, ReadAfterLd]>; defm rb_Int: AVX512_maskable_3src_scalar, - AVX512FMA3Base, EVEX_B, EVEX_RC; + AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[WriteFMA, ReadAfterLd]>; let isCodeGenOnly = 1, isCommutable = 1 in { def r : AVX512FMA3S; + !if(MaskOnlyReg, [], [RHS_r])>, Sched<[WriteFMA]>; def m : AVX512FMA3S; + [RHS_m]>, Sched<[WriteFMA, ReadAfterLd]>; }// isCodeGenOnly = 1 }// Constraints = "$src1 = $dst" } diff --git a/llvm/lib/Target/X86/X86InstrFMA.td b/llvm/lib/Target/X86/X86InstrFMA.td index dd6a61d..1b70667 100644 --- a/llvm/lib/Target/X86/X86InstrFMA.td +++ b/llvm/lib/Target/X86/X86InstrFMA.td @@ -41,7 +41,8 @@ multiclass fma3p_rm_213 opc, string OpcodeStr, RegisterClass RC, (ins RC:$src1, RC:$src2, RC:$src3), !strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), - [(set RC:$dst, (VT (Op RC:$src2, RC:$src1, RC:$src3)))]>; + [(set RC:$dst, (VT (Op RC:$src2, RC:$src1, RC:$src3)))]>, + Sched<[WriteFMA]>; let mayLoad = 1 in def m : FMA3 opc, string OpcodeStr, RegisterClass RC, !strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), [(set RC:$dst, (VT (Op RC:$src2, RC:$src1, - (MemFrag addr:$src3))))]>; + (MemFrag addr:$src3))))]>, + Sched<[WriteFMA, ReadAfterLd]>; } multiclass fma3p_rm_231 opc, string OpcodeStr, RegisterClass RC, @@ -60,7 +62,7 @@ multiclass fma3p_rm_231 opc, string OpcodeStr, RegisterClass RC, (ins RC:$src1, RC:$src2, RC:$src3), !strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), - []>; + []>, Sched<[WriteFMA]>; let mayLoad = 1 in def m : FMA3 opc, string OpcodeStr, RegisterClass RC, !strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), [(set RC:$dst, (VT (Op RC:$src2, (MemFrag addr:$src3), - RC:$src1)))]>; + RC:$src1)))]>, Sched<[WriteFMA, ReadAfterLd]>; } multiclass fma3p_rm_132 opc, string OpcodeStr, RegisterClass RC, @@ -79,7 +81,7 @@ multiclass fma3p_rm_132 opc, string OpcodeStr, RegisterClass RC, (ins RC:$src1, RC:$src2, RC:$src3), !strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), - []>; + []>, Sched<[WriteFMA]>; // Pattern is 312 order so that the load is in a different place from the // 213 and 231 patterns this helps tablegen's duplicate pattern detection. @@ -89,7 +91,7 @@ multiclass fma3p_rm_132 opc, string OpcodeStr, RegisterClass RC, !strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), [(set RC:$dst, (VT (Op (MemFrag addr:$src3), RC:$src1, - RC:$src2)))]>; + RC:$src2)))]>, Sched<[WriteFMA, ReadAfterLd]>; } let Constraints = "$src1 = $dst", hasSideEffects = 0, isCommutable = 1 in @@ -172,7 +174,8 @@ multiclass fma3s_rm_213 opc, string OpcodeStr, (ins RC:$src1, RC:$src2, RC:$src3), !strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), - [(set RC:$dst, (OpNode RC:$src2, RC:$src1, RC:$src3))]>; + [(set RC:$dst, (OpNode RC:$src2, RC:$src1, RC:$src3))]>, + Sched<[WriteFMA]>; let mayLoad = 1 in def m : FMA3S opc, string OpcodeStr, !strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), [(set RC:$dst, - (OpNode RC:$src2, RC:$src1, (load addr:$src3)))]>; + (OpNode RC:$src2, RC:$src1, (load addr:$src3)))]>, + Sched<[WriteFMA, ReadAfterLd]>; } multiclass fma3s_rm_231 opc, string OpcodeStr, @@ -191,7 +195,7 @@ multiclass fma3s_rm_231 opc, string OpcodeStr, (ins RC:$src1, RC:$src2, RC:$src3), !strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), - []>; + []>, Sched<[WriteFMA]>; let mayLoad = 1 in def m : FMA3S opc, string OpcodeStr, !strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), [(set RC:$dst, - (OpNode RC:$src2, (load addr:$src3), RC:$src1))]>; + (OpNode RC:$src2, (load addr:$src3), RC:$src1))]>, + Sched<[WriteFMA, ReadAfterLd]>; } multiclass fma3s_rm_132 opc, string OpcodeStr, @@ -210,7 +215,7 @@ multiclass fma3s_rm_132 opc, string OpcodeStr, (ins RC:$src1, RC:$src2, RC:$src3), !strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), - []>; + []>, Sched<[WriteFMA]>; // Pattern is 312 order so that the load is in a different place from the // 213 and 231 patterns this helps tablegen's duplicate pattern detection. @@ -220,7 +225,8 @@ multiclass fma3s_rm_132 opc, string OpcodeStr, !strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), [(set RC:$dst, - (OpNode (load addr:$src3), RC:$src1, RC:$src2))]>; + (OpNode (load addr:$src3), RC:$src1, RC:$src2))]>, + Sched<[WriteFMA, ReadAfterLd]>; } let Constraints = "$src1 = $dst", isCommutable = 1, hasSideEffects = 0 in @@ -257,14 +263,14 @@ multiclass fma3s_rm_int opc, string OpcodeStr, (ins RC:$src1, RC:$src2, RC:$src3), !strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), - []>; + []>, Sched<[WriteFMA]>; let mayLoad = 1 in def m_Int : FMA3S_Int; + []>, Sched<[WriteFMA, ReadAfterLd]>; } // The FMA 213 form is created for lowering of scalar FMA intrinscis @@ -360,26 +366,29 @@ multiclass fma4s opc, string OpcodeStr, RegisterClass RC, !strconcat(OpcodeStr, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), [(set RC:$dst, - (OpVT (OpNode RC:$src1, RC:$src2, RC:$src3)))]>, VEX_W, VEX_LIG; + (OpVT (OpNode RC:$src1, RC:$src2, RC:$src3)))]>, VEX_W, VEX_LIG, + Sched<[WriteFMA]>; def rm : FMA4S, VEX_W, VEX_LIG; + (mem_frag addr:$src3)))]>, VEX_W, VEX_LIG, + Sched<[WriteFMA, ReadAfterLd]>; def mr : FMA4S, VEX_LIG; + (OpNode RC:$src1, (mem_frag addr:$src2), RC:$src3))]>, VEX_LIG, + Sched<[WriteFMA, ReadAfterLd]>; // For disassembler let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in def rr_REV : FMA4S, - VEX_LIG, FoldGenData; + VEX_LIG, FoldGenData, Sched<[WriteFMA]>; } multiclass fma4s_int opc, string OpcodeStr, Operand memop, @@ -391,26 +400,27 @@ let isCodeGenOnly = 1 in { "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), [(set VR128:$dst, (VT (OpNode VR128:$src1, VR128:$src2, VR128:$src3)))]>, VEX_W, - VEX_LIG; + VEX_LIG, Sched<[WriteFMA]>; def rm_Int : FMA4S_Int, VEX_W, VEX_LIG; + mem_cpat:$src3)))]>, VEX_W, VEX_LIG, + Sched<[WriteFMA, ReadAfterLd]>; def mr_Int : FMA4S_Int, - VEX_LIG; + VEX_LIG, Sched<[WriteFMA, ReadAfterLd]>; let hasSideEffects = 0 in def rr_Int_REV : FMA4S_Int, VEX_LIG, FoldGenData; + []>, VEX_LIG, FoldGenData, Sched<[WriteFMA]>; } // isCodeGenOnly = 1 } @@ -424,19 +434,21 @@ multiclass fma4p opc, string OpcodeStr, SDNode OpNode, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), [(set VR128:$dst, (OpVT128 (OpNode VR128:$src1, VR128:$src2, VR128:$src3)))]>, - VEX_W; + VEX_W, Sched<[WriteFMA]>; def rm : FMA4, VEX_W; + (ld_frag128 addr:$src3)))]>, VEX_W, + Sched<[WriteFMA, ReadAfterLd]>; def mr : FMA4; + (OpNode VR128:$src1, (ld_frag128 addr:$src2), VR128:$src3))]>, + Sched<[WriteFMA, ReadAfterLd]>; let isCommutable = 1 in def Yrr : FMA4 opc, string OpcodeStr, SDNode OpNode, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), [(set VR256:$dst, (OpVT256 (OpNode VR256:$src1, VR256:$src2, VR256:$src3)))]>, - VEX_W, VEX_L; + VEX_W, VEX_L, Sched<[WriteFMA]>; def Yrm : FMA4, VEX_W, VEX_L; + (ld_frag256 addr:$src3)))]>, VEX_W, VEX_L, + Sched<[WriteFMA, ReadAfterLd]>; def Ymr : FMA4, VEX_L; + (ld_frag256 addr:$src2), VR256:$src3))]>, VEX_L, + Sched<[WriteFMA, ReadAfterLd]>; // For disassembler let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in { def rr_REV : FMA4, - FoldGenData; + Sched<[WriteFMA]>, FoldGenData; def Yrr_REV : FMA4, - VEX_L, FoldGenData; + VEX_L, Sched<[WriteFMA]>, FoldGenData; } // isCodeGenOnly = 1 } diff --git a/llvm/lib/Target/X86/X86SchedBroadwell.td b/llvm/lib/Target/X86/X86SchedBroadwell.td index c70af22..7fef01c 100755 --- a/llvm/lib/Target/X86/X86SchedBroadwell.td +++ b/llvm/lib/Target/X86/X86SchedBroadwell.td @@ -131,7 +131,7 @@ defm : BWWriteResPair; // 10-14 cycles. // Floating po defm : BWWriteResPair; // Floating point square root. defm : BWWriteResPair; // Floating point reciprocal estimate. defm : BWWriteResPair; // Floating point reciprocal square root estimate. -// defm WriteFMA : X86SchedWritePair; // Fused Multiply Add. +defm : BWWriteResPair; // Fused Multiply Add. defm : BWWriteResPair; // Floating point vector shuffles. defm : BWWriteResPair; // Floating point vector blends. def : WriteRes { // Fp vector variable blends. diff --git a/llvm/lib/Target/X86/X86SchedHaswell.td b/llvm/lib/Target/X86/X86SchedHaswell.td index c2b188b..5b92234 100644 --- a/llvm/lib/Target/X86/X86SchedHaswell.td +++ b/llvm/lib/Target/X86/X86SchedHaswell.td @@ -134,6 +134,7 @@ defm : HWWriteResPair; defm : HWWriteResPair; defm : HWWriteResPair; defm : HWWriteResPair; +defm : HWWriteResPair; defm : HWWriteResPair; defm : HWWriteResPair; defm : HWWriteResPair; diff --git a/llvm/lib/Target/X86/X86SchedSandyBridge.td b/llvm/lib/Target/X86/X86SchedSandyBridge.td index c86c48c..c6c60bf 100644 --- a/llvm/lib/Target/X86/X86SchedSandyBridge.td +++ b/llvm/lib/Target/X86/X86SchedSandyBridge.td @@ -276,11 +276,12 @@ def : WriteRes { let Latency = 100; } def : WriteRes; def : WriteRes; -// AVX2 is not supported on that architecture, but we should define the basic +// AVX2/FMA is not supported on that architecture, but we should define the basic // scheduling resources anyway. defm : SBWriteResPair; defm : SBWriteResPair; defm : SBWriteResPair; +defm : SBWriteResPair; // Remaining SNB instrs. diff --git a/llvm/lib/Target/X86/X86SchedSkylakeClient.td b/llvm/lib/Target/X86/X86SchedSkylakeClient.td index aabb45b..eeeffdf 100644 --- a/llvm/lib/Target/X86/X86SchedSkylakeClient.td +++ b/llvm/lib/Target/X86/X86SchedSkylakeClient.td @@ -132,7 +132,7 @@ defm : SKLWriteResPair; // 10-14 cycles. // Floating defm : SKLWriteResPair; // Floating point square root. defm : SKLWriteResPair; // Floating point reciprocal estimate. defm : SKLWriteResPair; // Floating point reciprocal square root estimate. -// defm WriteFMA : X86SchedWritePair; // Fused Multiply Add. +defm : SKLWriteResPair; // Fused Multiply Add. defm : SKLWriteResPair; // Floating point vector shuffles. defm : SKLWriteResPair; // Floating point vector blends. def : WriteRes { // Fp vector variable blends. diff --git a/llvm/lib/Target/X86/X86SchedSkylakeServer.td b/llvm/lib/Target/X86/X86SchedSkylakeServer.td index 50f6379..8ba1ac0 100755 --- a/llvm/lib/Target/X86/X86SchedSkylakeServer.td +++ b/llvm/lib/Target/X86/X86SchedSkylakeServer.td @@ -132,7 +132,7 @@ defm : SKXWriteResPair; // 10-14 cycles. // Floating defm : SKXWriteResPair; // Floating point square root. defm : SKXWriteResPair; // Floating point reciprocal estimate. defm : SKXWriteResPair; // Floating point reciprocal square root estimate. -// defm WriteFMA : X86SchedWritePair; // Fused Multiply Add. +defm : SKXWriteResPair; // Fused Multiply Add. defm : SKXWriteResPair; // Floating point vector shuffles. defm : SKXWriteResPair; // Floating point vector blends. def : WriteRes { // Fp vector variable blends. diff --git a/llvm/lib/Target/X86/X86ScheduleBtVer2.td b/llvm/lib/Target/X86/X86ScheduleBtVer2.td index edfb5805..a2f0296 100644 --- a/llvm/lib/Target/X86/X86ScheduleBtVer2.td +++ b/llvm/lib/Target/X86/X86ScheduleBtVer2.td @@ -192,6 +192,7 @@ defm : JWriteResIntPair; defm : JWriteResFpuPair; defm : JWriteResFpuPair; +defm : JWriteResFpuPair; // NOTE: Doesn't exist on Jaguar. defm : JWriteResFpuPair; defm : JWriteResFpuPair; defm : JWriteResFpuPair; diff --git a/llvm/lib/Target/X86/X86ScheduleSLM.td b/llvm/lib/Target/X86/X86ScheduleSLM.td index 03ed2db..6a2a998 100644 --- a/llvm/lib/Target/X86/X86ScheduleSLM.td +++ b/llvm/lib/Target/X86/X86ScheduleSLM.td @@ -249,7 +249,7 @@ def : WriteRes { let Latency = 100; } def : WriteRes; def : WriteRes; -// AVX is not supported on that architecture, but we should define the basic +// AVX/FMA is not supported on that architecture, but we should define the basic // scheduling resources anyway. def : WriteRes; defm : SMWriteResPair; @@ -257,4 +257,5 @@ defm : SMWriteResPair; defm : SMWriteResPair; defm : SMWriteResPair; defm : SMWriteResPair; +defm : SMWriteResPair; } // SchedModel diff --git a/llvm/lib/Target/X86/X86ScheduleZnver1.td b/llvm/lib/Target/X86/X86ScheduleZnver1.td index c72880b..5ebe8a2 100644 --- a/llvm/lib/Target/X86/X86ScheduleZnver1.td +++ b/llvm/lib/Target/X86/X86ScheduleZnver1.td @@ -175,6 +175,7 @@ defm : ZnWriteResFpuPair; defm : ZnWriteResFpuPair; defm : ZnWriteResFpuPair; defm : ZnWriteResFpuPair; +defm : ZnWriteResFpuPair; defm : ZnWriteResFpuPair; defm : ZnWriteResFpuPair; defm : ZnWriteResFpuPair; -- 2.7.4