[X86] Split WriteFDiv schedule classes to support single/double scalar, XMM and YMM...
authorSimon Pilgrim <llvm-dev@redking.me.uk>
Mon, 7 May 2018 16:15:46 +0000 (16:15 +0000)
committerSimon Pilgrim <llvm-dev@redking.me.uk>
Mon, 7 May 2018 16:15:46 +0000 (16:15 +0000)
This removes all InstrRW overrides for these instructions - some x87 overrides remain but most use default (and realistic) values.

llvm-svn: 331643

17 files changed:
llvm/lib/Target/X86/X86InstrAVX512.td
llvm/lib/Target/X86/X86InstrSSE.td
llvm/lib/Target/X86/X86SchedBroadwell.td
llvm/lib/Target/X86/X86SchedHaswell.td
llvm/lib/Target/X86/X86SchedSandyBridge.td
llvm/lib/Target/X86/X86SchedSkylakeClient.td
llvm/lib/Target/X86/X86SchedSkylakeServer.td
llvm/lib/Target/X86/X86Schedule.td
llvm/lib/Target/X86/X86ScheduleAtom.td
llvm/lib/Target/X86/X86ScheduleBtVer2.td
llvm/lib/Target/X86/X86ScheduleSLM.td
llvm/lib/Target/X86/X86ScheduleZnver1.td
llvm/test/CodeGen/X86/avx512-schedule.ll
llvm/test/CodeGen/X86/recip-fastmath.ll
llvm/test/CodeGen/X86/x87-schedule.ll
llvm/test/tools/llvm-mca/X86/SLM/resources-x87.s
llvm/test/tools/llvm-mca/X86/SandyBridge/resources-x87.s

index ea1e4e2..94065de 100644 (file)
@@ -4910,42 +4910,42 @@ multiclass avx512_fp_scalar_sae<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
 }
 
 multiclass avx512_binop_s_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
-                                SDNode VecNode, X86FoldableSchedWrite sched,
+                                SDNode VecNode, X86SchedWriteSizes sched,
                                 bit IsCommutable> {
   defm SSZ : avx512_fp_scalar<opc, OpcodeStr#"ss", f32x_info, OpNode, VecNode,
-                              sched, IsCommutable>,
+                              sched.PS.Scl, IsCommutable>,
              avx512_fp_scalar_round<opc, OpcodeStr#"ss", f32x_info, VecNode,
-                              sched, IsCommutable>,
+                              sched.PS.Scl, IsCommutable>,
                               XS, EVEX_4V, VEX_LIG,  EVEX_CD8<32, CD8VT1>;
   defm SDZ : avx512_fp_scalar<opc, OpcodeStr#"sd", f64x_info, OpNode, VecNode,
-                              sched, IsCommutable>,
+                              sched.PD.Scl, IsCommutable>,
              avx512_fp_scalar_round<opc, OpcodeStr#"sd", f64x_info, VecNode,
-                              sched, IsCommutable>,
+                              sched.PD.Scl, IsCommutable>,
                               XD, VEX_W, EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>;
 }
 
 multiclass avx512_binop_s_sae<bits<8> opc, string OpcodeStr, SDNode OpNode,
                               SDNode VecNode, SDNode SaeNode,
-                              X86FoldableSchedWrite sched, bit IsCommutable> {
+                              X86SchedWriteSizes sched, bit IsCommutable> {
   defm SSZ : avx512_fp_scalar_sae<opc, OpcodeStr#"ss", f32x_info, OpNode,
-                              VecNode, SaeNode, sched, IsCommutable>,
+                              VecNode, SaeNode, sched.PS.Scl, IsCommutable>,
                               XS, EVEX_4V, VEX_LIG,  EVEX_CD8<32, CD8VT1>;
   defm SDZ : avx512_fp_scalar_sae<opc, OpcodeStr#"sd", f64x_info, OpNode,
-                              VecNode, SaeNode, sched, IsCommutable>,
+                              VecNode, SaeNode, sched.PD.Scl, IsCommutable>,
                               XD, VEX_W, EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>;
 }
 defm VADD : avx512_binop_s_round<0x58, "vadd", fadd, X86faddRnds,
-                                 SchedWriteFAdd.Scl, 1>;
+                                 SchedWriteFAddSizes, 1>;
 defm VMUL : avx512_binop_s_round<0x59, "vmul", fmul, X86fmulRnds,
-                                 SchedWriteFMul.Scl, 1>;
+                                 SchedWriteFMulSizes, 1>;
 defm VSUB : avx512_binop_s_round<0x5C, "vsub", fsub, X86fsubRnds,
-                                 SchedWriteFAdd.Scl, 0>;
+                                 SchedWriteFAddSizes, 0>;
 defm VDIV : avx512_binop_s_round<0x5E, "vdiv", fdiv, X86fdivRnds,
-                                 SchedWriteFDiv.Scl, 0>;
+                                 SchedWriteFDivSizes, 0>;
 defm VMIN : avx512_binop_s_sae<0x5D, "vmin", X86fmin, X86fmins, X86fminRnds,
-                               SchedWriteFCmp.Scl, 0>;
+                               SchedWriteFCmpSizes, 0>;
 defm VMAX : avx512_binop_s_sae<0x5F, "vmax", X86fmax, X86fmaxs, X86fmaxRnds,
-                               SchedWriteFCmp.Scl, 0>;
+                               SchedWriteFCmpSizes, 0>;
 
 // MIN/MAX nodes are commutable under "unsafe-fp-math". In this case we use
 // X86fminc and X86fmaxc instead of X86fmin and X86fmax
@@ -5034,86 +5034,86 @@ multiclass avx512_fp_sae_packed<bits<8> opc, string OpcodeStr,
 }
 
 multiclass avx512_fp_binop_p<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
-                             Predicate prd, X86SchedWriteWidths sched,
+                             Predicate prd, X86SchedWriteSizes sched,
                              bit IsCommutable = 0> {
   let Predicates = [prd] in {
   defm PSZ : avx512_fp_packed<opc, OpcodeStr, OpNode, v16f32_info,
-                              sched.ZMM, IsCommutable>, EVEX_V512, PS,
+                              sched.PS.ZMM, IsCommutable>, EVEX_V512, PS,
                               EVEX_CD8<32, CD8VF>;
   defm PDZ : avx512_fp_packed<opc, OpcodeStr, OpNode, v8f64_info,
-                              sched.ZMM, IsCommutable>, EVEX_V512, PD, VEX_W,
+                              sched.PD.ZMM, IsCommutable>, EVEX_V512, PD, VEX_W,
                               EVEX_CD8<64, CD8VF>;
   }
 
     // Define only if AVX512VL feature is present.
   let Predicates = [prd, HasVLX] in {
     defm PSZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, v4f32x_info,
-                                   sched.XMM, IsCommutable>, EVEX_V128, PS,
+                                   sched.PS.XMM, IsCommutable>, EVEX_V128, PS,
                                    EVEX_CD8<32, CD8VF>;
     defm PSZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, v8f32x_info,
-                                   sched.YMM, IsCommutable>, EVEX_V256, PS,
+                                   sched.PS.YMM, IsCommutable>, EVEX_V256, PS,
                                    EVEX_CD8<32, CD8VF>;
     defm PDZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, v2f64x_info,
-                                   sched.XMM, IsCommutable>, EVEX_V128, PD, VEX_W,
+                                   sched.PD.XMM, IsCommutable>, EVEX_V128, PD, VEX_W,
                                    EVEX_CD8<64, CD8VF>;
     defm PDZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, v4f64x_info,
-                                   sched.YMM, IsCommutable>, EVEX_V256, PD, VEX_W,
+                                   sched.PD.YMM, IsCommutable>, EVEX_V256, PD, VEX_W,
                                    EVEX_CD8<64, CD8VF>;
   }
 }
 
 multiclass avx512_fp_binop_p_round<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd,
-                                   X86SchedWriteWidths sched> {
-  defm PSZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, sched.ZMM,
+                                   X86SchedWriteSizes sched> {
+  defm PSZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, sched.PS.ZMM,
                                     v16f32_info>,
                                     EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
-  defm PDZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, sched.ZMM,
+  defm PDZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, sched.PD.ZMM,
                                     v8f64_info>,
                                     EVEX_V512, PD, VEX_W,EVEX_CD8<64, CD8VF>;
 }
 
 multiclass avx512_fp_binop_p_sae<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd,
-                                 X86SchedWriteWidths sched> {
-  defm PSZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, sched.ZMM,
+                                 X86SchedWriteSizes sched> {
+  defm PSZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, sched.PS.ZMM,
                                   v16f32_info>,
                                   EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
-  defm PDZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, sched.ZMM,
+  defm PDZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, sched.PD.ZMM,
                                   v8f64_info>,
                                   EVEX_V512, PD, VEX_W,EVEX_CD8<64, CD8VF>;
 }
 
 defm VADD : avx512_fp_binop_p<0x58, "vadd", fadd, HasAVX512,
-                              SchedWriteFAdd, 1>,
-            avx512_fp_binop_p_round<0x58, "vadd", X86faddRnd, SchedWriteFAdd>;
+                              SchedWriteFAddSizes, 1>,
+            avx512_fp_binop_p_round<0x58, "vadd", X86faddRnd, SchedWriteFAddSizes>;
 defm VMUL : avx512_fp_binop_p<0x59, "vmul", fmul, HasAVX512,
-                              SchedWriteFMul, 1>,
-            avx512_fp_binop_p_round<0x59, "vmul", X86fmulRnd, SchedWriteFMul>;
+                              SchedWriteFMulSizes, 1>,
+            avx512_fp_binop_p_round<0x59, "vmul", X86fmulRnd, SchedWriteFMulSizes>;
 defm VSUB : avx512_fp_binop_p<0x5C, "vsub", fsub, HasAVX512,
-                              SchedWriteFAdd>,
-            avx512_fp_binop_p_round<0x5C, "vsub", X86fsubRnd, SchedWriteFAdd>;
+                              SchedWriteFAddSizes>,
+            avx512_fp_binop_p_round<0x5C, "vsub", X86fsubRnd, SchedWriteFAddSizes>;
 defm VDIV : avx512_fp_binop_p<0x5E, "vdiv", fdiv, HasAVX512,
-                              SchedWriteFDiv>,
-            avx512_fp_binop_p_round<0x5E, "vdiv", X86fdivRnd, SchedWriteFDiv>;
+                              SchedWriteFDivSizes>,
+            avx512_fp_binop_p_round<0x5E, "vdiv", X86fdivRnd, SchedWriteFDivSizes>;
 defm VMIN : avx512_fp_binop_p<0x5D, "vmin", X86fmin, HasAVX512,
-                              SchedWriteFCmp, 0>,
-            avx512_fp_binop_p_sae<0x5D, "vmin", X86fminRnd, SchedWriteFCmp>;
+                              SchedWriteFCmpSizes, 0>,
+            avx512_fp_binop_p_sae<0x5D, "vmin", X86fminRnd, SchedWriteFCmpSizes>;
 defm VMAX : avx512_fp_binop_p<0x5F, "vmax", X86fmax, HasAVX512,
-                              SchedWriteFCmp, 0>,
-            avx512_fp_binop_p_sae<0x5F, "vmax", X86fmaxRnd, SchedWriteFCmp>;
+                              SchedWriteFCmpSizes, 0>,
+            avx512_fp_binop_p_sae<0x5F, "vmax", X86fmaxRnd, SchedWriteFCmpSizes>;
 let isCodeGenOnly = 1 in {
   defm VMINC : avx512_fp_binop_p<0x5D, "vmin", X86fminc, HasAVX512,
-                                 SchedWriteFCmp, 1>;
+                                 SchedWriteFCmpSizes, 1>;
   defm VMAXC : avx512_fp_binop_p<0x5F, "vmax", X86fmaxc, HasAVX512,
-                                 SchedWriteFCmp, 1>;
+                                 SchedWriteFCmpSizes, 1>;
 }
 defm VAND  : avx512_fp_binop_p<0x54, "vand", null_frag, HasDQI,
-                               SchedWriteFLogic, 1>;
+                               SchedWriteFLogicSizes, 1>;
 defm VANDN : avx512_fp_binop_p<0x55, "vandn", null_frag, HasDQI,
-                               SchedWriteFLogic, 0>;
+                               SchedWriteFLogicSizes, 0>;
 defm VOR   : avx512_fp_binop_p<0x56, "vor", null_frag, HasDQI,
-                               SchedWriteFLogic, 1>;
+                               SchedWriteFLogicSizes, 1>;
 defm VXOR  : avx512_fp_binop_p<0x57, "vxor", null_frag, HasDQI,
-                               SchedWriteFLogic, 1>;
+                               SchedWriteFLogicSizes, 1>;
 
 // Patterns catch floating point selects with bitcasted integer logic ops.
 multiclass avx512_fp_logical_lowering<string InstrStr, SDNode OpNode,
@@ -9960,9 +9960,9 @@ def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (loadv2f64 addr:$sr
 //===----------------------------------------------------------------------===//
 
 defm VUNPCKH : avx512_fp_binop_p<0x15, "vunpckh", X86Unpckh, HasAVX512,
-                                 SchedWriteFShuffle>;
+                                 SchedWriteFShuffleSizes>;
 defm VUNPCKL : avx512_fp_binop_p<0x14, "vunpckl", X86Unpckl, HasAVX512,
-                                 SchedWriteFShuffle>;
+                                 SchedWriteFShuffleSizes>;
 
 defm VPUNPCKLBW : avx512_binop_rm_vl_b<0x60, "vpunpcklbw", X86Unpckl,
                                        SchedWriteShuffle, HasBWI>;
index 1ad7d4c..4913bd4 100644 (file)
@@ -2510,99 +2510,99 @@ def : Pat<(X86fandn VR128:$src1, (memopv4f32 addr:$src2)),
 /// FIXME: once all 256-bit intrinsics are matched, cleanup and refactor those
 /// classes below
 multiclass basic_sse12_fp_binop_p<bits<8> opc, string OpcodeStr,
-                                  SDNode OpNode, X86SchedWriteWidths sched> {
+                                  SDNode OpNode, X86SchedWriteSizes sched> {
   let Predicates = [HasAVX, NoVLX] in {
   defm V#NAME#PS : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode,
                                VR128, v4f32, f128mem, loadv4f32,
-                               SSEPackedSingle, sched.XMM, 0>, PS, VEX_4V, VEX_WIG;
+                               SSEPackedSingle, sched.PS.XMM, 0>, PS, VEX_4V, VEX_WIG;
   defm V#NAME#PD : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), OpNode,
                                VR128, v2f64, f128mem, loadv2f64,
-                               SSEPackedDouble, sched.XMM, 0>, PD, VEX_4V, VEX_WIG;
+                               SSEPackedDouble, sched.PD.XMM, 0>, PD, VEX_4V, VEX_WIG;
 
   defm V#NAME#PSY : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"),
                         OpNode, VR256, v8f32, f256mem, loadv8f32,
-                        SSEPackedSingle, sched.YMM, 0>, PS, VEX_4V, VEX_L, VEX_WIG;
+                        SSEPackedSingle, sched.PS.YMM, 0>, PS, VEX_4V, VEX_L, VEX_WIG;
   defm V#NAME#PDY : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"),
                         OpNode, VR256, v4f64, f256mem, loadv4f64,
-                        SSEPackedDouble, sched.YMM, 0>, PD, VEX_4V, VEX_L, VEX_WIG;
+                        SSEPackedDouble, sched.PD.YMM, 0>, PD, VEX_4V, VEX_L, VEX_WIG;
   }
 
   let Constraints = "$src1 = $dst" in {
     defm PS : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode, VR128,
                               v4f32, f128mem, memopv4f32, SSEPackedSingle,
-                              sched.XMM>, PS;
+                              sched.PS.XMM>, PS;
     defm PD : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), OpNode, VR128,
                               v2f64, f128mem, memopv2f64, SSEPackedDouble,
-                              sched.XMM>, PD;
+                              sched.PD.XMM>, PD;
   }
 }
 
 multiclass basic_sse12_fp_binop_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
-                                  X86SchedWriteWidths sched> {
+                                  X86SchedWriteSizes sched> {
   defm V#NAME#SS : sse12_fp_scalar<opc, !strconcat(OpcodeStr, "ss"),
-                         OpNode, FR32, f32mem, SSEPackedSingle, sched.Scl, 0>,
+                         OpNode, FR32, f32mem, SSEPackedSingle, sched.PS.Scl, 0>,
                          XS, VEX_4V, VEX_LIG, VEX_WIG;
   defm V#NAME#SD : sse12_fp_scalar<opc, !strconcat(OpcodeStr, "sd"),
-                         OpNode, FR64, f64mem, SSEPackedDouble, sched.Scl, 0>,
+                         OpNode, FR64, f64mem, SSEPackedDouble, sched.PD.Scl, 0>,
                          XD, VEX_4V, VEX_LIG, VEX_WIG;
 
   let Constraints = "$src1 = $dst" in {
     defm SS : sse12_fp_scalar<opc, !strconcat(OpcodeStr, "ss"),
                               OpNode, FR32, f32mem, SSEPackedSingle,
-                              sched.Scl>, XS;
+                              sched.PS.Scl>, XS;
     defm SD : sse12_fp_scalar<opc, !strconcat(OpcodeStr, "sd"),
                               OpNode, FR64, f64mem, SSEPackedDouble,
-                              sched.Scl>, XD;
+                              sched.PD.Scl>, XD;
   }
 }
 
 multiclass basic_sse12_fp_binop_s_int<bits<8> opc, string OpcodeStr,
                                       SDPatternOperator OpNode,
-                                      X86SchedWriteWidths sched> {
+                                      X86SchedWriteSizes sched> {
   defm V#NAME#SS : sse12_fp_scalar_int<opc, OpcodeStr, OpNode, VR128, v4f32,
                    !strconcat(OpcodeStr, "ss"), ssmem, sse_load_f32,
-                   SSEPackedSingle, sched.Scl, 0>, XS, VEX_4V, VEX_LIG, VEX_WIG;
+                   SSEPackedSingle, sched.PS.Scl, 0>, XS, VEX_4V, VEX_LIG, VEX_WIG;
   defm V#NAME#SD : sse12_fp_scalar_int<opc, OpcodeStr, OpNode, VR128, v2f64,
                    !strconcat(OpcodeStr, "sd"), sdmem, sse_load_f64,
-                   SSEPackedDouble, sched.Scl, 0>, XD, VEX_4V, VEX_LIG, VEX_WIG;
+                   SSEPackedDouble, sched.PD.Scl, 0>, XD, VEX_4V, VEX_LIG, VEX_WIG;
 
   let Constraints = "$src1 = $dst" in {
     defm SS : sse12_fp_scalar_int<opc, OpcodeStr, OpNode, VR128, v4f32,
                    !strconcat(OpcodeStr, "ss"), ssmem, sse_load_f32,
-                   SSEPackedSingle, sched.Scl>, XS;
+                   SSEPackedSingle, sched.PS.Scl>, XS;
     defm SD : sse12_fp_scalar_int<opc, OpcodeStr, OpNode, VR128, v2f64,
                    !strconcat(OpcodeStr, "sd"), sdmem, sse_load_f64,
-                   SSEPackedDouble, sched.Scl>, XD;
+                   SSEPackedDouble, sched.PD.Scl>, XD;
   }
 }
 
 // Binary Arithmetic instructions
-defm ADD : basic_sse12_fp_binop_p<0x58, "add", fadd, SchedWriteFAdd>,
-           basic_sse12_fp_binop_s<0x58, "add", fadd, SchedWriteFAdd>,
-           basic_sse12_fp_binop_s_int<0x58, "add", null_frag, SchedWriteFAdd>;
-defm MUL : basic_sse12_fp_binop_p<0x59, "mul", fmul, SchedWriteFMul>,
-           basic_sse12_fp_binop_s<0x59, "mul", fmul, SchedWriteFMul>,
-           basic_sse12_fp_binop_s_int<0x59, "mul", null_frag, SchedWriteFMul>;
+defm ADD : basic_sse12_fp_binop_p<0x58, "add", fadd, SchedWriteFAddSizes>,
+           basic_sse12_fp_binop_s<0x58, "add", fadd, SchedWriteFAddSizes>,
+           basic_sse12_fp_binop_s_int<0x58, "add", null_frag, SchedWriteFAddSizes>;
+defm MUL : basic_sse12_fp_binop_p<0x59, "mul", fmul, SchedWriteFMulSizes>,
+           basic_sse12_fp_binop_s<0x59, "mul", fmul, SchedWriteFMulSizes>,
+           basic_sse12_fp_binop_s_int<0x59, "mul", null_frag, SchedWriteFMulSizes>;
 let isCommutable = 0 in {
-  defm SUB : basic_sse12_fp_binop_p<0x5C, "sub", fsub, SchedWriteFAdd>,
-             basic_sse12_fp_binop_s<0x5C, "sub", fsub, SchedWriteFAdd>,
-             basic_sse12_fp_binop_s_int<0x5C, "sub", null_frag, SchedWriteFAdd>;
-  defm DIV : basic_sse12_fp_binop_p<0x5E, "div", fdiv, SchedWriteFDiv>,
-             basic_sse12_fp_binop_s<0x5E, "div", fdiv, SchedWriteFDiv>,
-             basic_sse12_fp_binop_s_int<0x5E, "div", null_frag, SchedWriteFDiv>;
-  defm MAX : basic_sse12_fp_binop_p<0x5F, "max", X86fmax, SchedWriteFCmp>,
-             basic_sse12_fp_binop_s<0x5F, "max", X86fmax, SchedWriteFCmp>,
-             basic_sse12_fp_binop_s_int<0x5F, "max", X86fmaxs, SchedWriteFCmp>;
-  defm MIN : basic_sse12_fp_binop_p<0x5D, "min", X86fmin, SchedWriteFCmp>,
-             basic_sse12_fp_binop_s<0x5D, "min", X86fmin, SchedWriteFCmp>,
-             basic_sse12_fp_binop_s_int<0x5D, "min", X86fmins, SchedWriteFCmp>;
+  defm SUB : basic_sse12_fp_binop_p<0x5C, "sub", fsub, SchedWriteFAddSizes>,
+             basic_sse12_fp_binop_s<0x5C, "sub", fsub, SchedWriteFAddSizes>,
+             basic_sse12_fp_binop_s_int<0x5C, "sub", null_frag, SchedWriteFAddSizes>;
+  defm DIV : basic_sse12_fp_binop_p<0x5E, "div", fdiv, SchedWriteFDivSizes>,
+             basic_sse12_fp_binop_s<0x5E, "div", fdiv, SchedWriteFDivSizes>,
+             basic_sse12_fp_binop_s_int<0x5E, "div", null_frag, SchedWriteFDivSizes>;
+  defm MAX : basic_sse12_fp_binop_p<0x5F, "max", X86fmax, SchedWriteFCmpSizes>,
+             basic_sse12_fp_binop_s<0x5F, "max", X86fmax, SchedWriteFCmpSizes>,
+             basic_sse12_fp_binop_s_int<0x5F, "max", X86fmaxs, SchedWriteFCmpSizes>;
+  defm MIN : basic_sse12_fp_binop_p<0x5D, "min", X86fmin, SchedWriteFCmpSizes>,
+             basic_sse12_fp_binop_s<0x5D, "min", X86fmin, SchedWriteFCmpSizes>,
+             basic_sse12_fp_binop_s_int<0x5D, "min", X86fmins, SchedWriteFCmpSizes>;
 }
 
 let isCodeGenOnly = 1 in {
-  defm MAXC: basic_sse12_fp_binop_p<0x5F, "max", X86fmaxc, SchedWriteFCmp>,
-             basic_sse12_fp_binop_s<0x5F, "max", X86fmaxc, SchedWriteFCmp>;
-  defm MINC: basic_sse12_fp_binop_p<0x5D, "min", X86fminc, SchedWriteFCmp>,
-             basic_sse12_fp_binop_s<0x5D, "min", X86fminc, SchedWriteFCmp>;
+  defm MAXC: basic_sse12_fp_binop_p<0x5F, "max", X86fmaxc, SchedWriteFCmpSizes>,
+             basic_sse12_fp_binop_s<0x5F, "max", X86fmaxc, SchedWriteFCmpSizes>;
+  defm MINC: basic_sse12_fp_binop_p<0x5D, "min", X86fminc, SchedWriteFCmpSizes>,
+             basic_sse12_fp_binop_s<0x5D, "min", X86fminc, SchedWriteFCmpSizes>;
 }
 
 // Patterns used to select SSE scalar fp arithmetic instructions from
index 93adb10..936dd6e 100755 (executable)
@@ -162,8 +162,15 @@ defm : BWWriteResPair<WriteFCmpY,  [BWPort1],  3, [1], 1, 6>; // Floating point
 defm : BWWriteResPair<WriteFCom,   [BWPort1],  3>; // Floating point compare to flags.
 defm : BWWriteResPair<WriteFMul,   [BWPort01], 3, [1], 1, 5>; // Floating point multiplication.
 defm : BWWriteResPair<WriteFMulY,  [BWPort01], 3, [1], 1, 6>; // Floating point multiplication (YMM/ZMM).
-defm : BWWriteResPair<WriteFDiv,   [BWPort0], 12, [1], 1, 5>; // 10-14 cycles. // Floating point division.
-defm : BWWriteResPair<WriteFDivY,  [BWPort0], 12, [1], 1, 7>; // 10-14 cycles. // Floating point division (YMM/ZMM).
+
+//defm : BWWriteResPair<WriteFDiv,     [BWPort0,BWFPDivider], 11, [1,3], 1, 5>; // Floating point division.
+defm : BWWriteResPair<WriteFDivX,    [BWPort0,BWFPDivider], 11, [1,5], 1, 5>; // Floating point division (XMM).
+defm : BWWriteResPair<WriteFDivY,    [BWPort0,BWPort015,BWFPDivider], 17, [2,1,10], 3, 6>; // Floating point division (YMM).
+defm : BWWriteResPair<WriteFDivZ,    [BWPort0,BWPort015,BWFPDivider], 17, [2,1,10], 3, 6>; // Floating point division (ZMM).
+//defm : BWWriteResPair<WriteFDiv64,   [BWPort0,BWFPDivider], 14, [1,8], 1, 5>; // Floating point division.
+defm : BWWriteResPair<WriteFDiv64X,  [BWPort0,BWFPDivider], 14, [1,8], 1, 5>; // Floating point division (XMM).
+defm : BWWriteResPair<WriteFDiv64Y,  [BWPort0,BWPort015,BWFPDivider], 23, [2,1,16], 3, 6>; // Floating point division (YMM).
+defm : BWWriteResPair<WriteFDiv64Z,  [BWPort0,BWPort015,BWFPDivider], 23, [2,1,16], 3, 6>; // Floating point division (ZMM).
 
 defm : X86WriteRes<WriteFSqrt,       [BWPort0,BWFPDivider], 11, [1,4], 1>; // Floating point square root.
 defm : X86WriteRes<WriteFSqrtLd,     [BWPort0,BWPort23,BWFPDivider], 16, [1,1,7], 2>;
@@ -1394,19 +1401,12 @@ def BWWriteResGroup121 : SchedWriteRes<[BWPort1,BWPort23,BWPort06,BWPort0156]> {
 }
 def: InstRW<[BWWriteResGroup121], (instrs IMUL32m, MUL32m, MULX32rm)>;
 
-def BWWriteResGroup122 : SchedWriteRes<[BWPort0,BWFPDivider]> {
-  let Latency = 11;
-  let NumMicroOps = 1;
-  let ResourceCycles = [1,5];
-}
-def: InstRW<[BWWriteResGroup122], (instregex "(V?)DIVPSrr")>;
-
 def BWWriteResGroup122_1 : SchedWriteRes<[BWPort0,BWFPDivider]> {
   let Latency = 11;
   let NumMicroOps = 1;
   let ResourceCycles = [1,3]; // Really 2.5 cycle throughput
 }
-def: InstRW<[BWWriteResGroup122_1], (instregex "(V?)DIVSSrr")>;
+def : SchedAlias<WriteFDiv, BWWriteResGroup122_1>; // TODO - convert to ZnWriteResFpuPair
 
 def BWWriteResGroup123 : SchedWriteRes<[BWPort0,BWPort23]> {
   let Latency = 11;
@@ -1461,19 +1461,12 @@ def BWWriteResGroup135 : SchedWriteRes<[BWPort1,BWPort23]> {
 }
 def: InstRW<[BWWriteResGroup135], (instregex "(ADD|SUB|SUBR)_FI(16|32)m")>;
 
-def BWWriteResGroup139 : SchedWriteRes<[BWPort0,BWFPDivider]> {
-  let Latency = 14;
-  let NumMicroOps = 1;
-  let ResourceCycles = [1,8];
-}
-def: InstRW<[BWWriteResGroup139], (instregex "(V?)DIVPDrr")>;
-
 def BWWriteResGroup139_1 : SchedWriteRes<[BWPort0,BWFPDivider]> {
   let Latency = 14;
   let NumMicroOps = 1;
   let ResourceCycles = [1,4];
 }
-def: InstRW<[BWWriteResGroup139_1], (instregex "(V?)DIVSDrr")>;
+def : SchedAlias<WriteFDiv64, BWWriteResGroup139_1>; // TODO - convert to ZnWriteResFpuPair
 
 def BWWriteResGroup141 : SchedWriteRes<[BWPort0,BWPort1,BWPort23]> {
   let Latency = 14;
@@ -1524,8 +1517,7 @@ def BWWriteResGroup150 : SchedWriteRes<[BWPort0,BWPort23,BWFPDivider]> {
   let NumMicroOps = 2;
   let ResourceCycles = [1,1,5];
 }
-def: InstRW<[BWWriteResGroup150], (instregex "(V?)DIVPSrm",
-                                             "(V?)DIVSSrm")>;
+def : SchedAlias<WriteFDivLd, BWWriteResGroup150>; // TODO - convert to ZnWriteResFpuPair
 
 def BWWriteResGroup153 : SchedWriteRes<[BWPort4,BWPort23,BWPort237,BWPort06,BWPort15,BWPort0156]> {
   let Latency = 16;
@@ -1541,13 +1533,6 @@ def BWWriteResGroup154 : SchedWriteRes<[BWPort5]> {
 }
 def: InstRW<[BWWriteResGroup154], (instrs VZEROALL)>;
 
-def BWWriteResGroup155 : SchedWriteRes<[BWPort0,BWPort015,BWFPDivider]> {
-  let Latency = 17;
-  let NumMicroOps = 3;
-  let ResourceCycles = [2,1,10];
-}
-def: InstRW<[BWWriteResGroup155], (instregex "VDIVPSYrr")>;
-
 def BWWriteResGroup159 : SchedWriteRes<[BWPort5,BWPort6,BWPort06,BWPort0156]> {
   let Latency = 18;
   let NumMicroOps = 8;
@@ -1568,8 +1553,7 @@ def BWWriteResGroup161 : SchedWriteRes<[BWPort0,BWPort23,BWFPDivider]> {
   let NumMicroOps = 2;
   let ResourceCycles = [1,1,8];
 }
-def: InstRW<[BWWriteResGroup161], (instregex "(V?)DIVPDrm",
-                                             "(V?)DIVSDrm")>;
+def : SchedAlias<WriteFDiv64Ld, BWWriteResGroup161>; // TODO - convert to ZnWriteResFpuPair
 
 def BWWriteResGroup165 : SchedWriteRes<[BWPort0]> {
   let Latency = 20;
@@ -1608,20 +1592,6 @@ def BWWriteResGroup172 : SchedWriteRes<[BWPort6,BWPort23,BWPort0156]> {
 }
 def: InstRW<[BWWriteResGroup172], (instregex "POPF64")>;
 
-def BWWriteResGroup173 : SchedWriteRes<[BWPort0,BWPort015,BWFPDivider]> {
-  let Latency = 23;
-  let NumMicroOps = 3;
-  let ResourceCycles = [2,1,16];
-}
-def: InstRW<[BWWriteResGroup173], (instregex "VDIVPDYrr")>;
-
-def BWWriteResGroup174 : SchedWriteRes<[BWPort0,BWPort23,BWPort015,BWFPDivider]> {
-  let Latency = 23;
-  let NumMicroOps = 4;
-  let ResourceCycles = [2,1,1,10];
-}
-def: InstRW<[BWWriteResGroup174], (instregex "VDIVPSYrm")>;
-
 def BWWriteResGroup176 : SchedWriteRes<[BWPort6,BWPort23,BWPort0156]> {
   let Latency = 23;
   let NumMicroOps = 19;
@@ -1650,13 +1620,6 @@ def BWWriteResGroup182 : SchedWriteRes<[BWPort0,BWPort1,BWPort23]> {
 }
 def: InstRW<[BWWriteResGroup182], (instregex "DIVR_FI(16|32)m")>;
 
-def BWWriteResGroup183 : SchedWriteRes<[BWPort0,BWPort23,BWPort015,BWFPDivider]> {
-  let Latency = 29;
-  let NumMicroOps = 4;
-  let ResourceCycles = [2,1,1,16];
-}
-def: InstRW<[BWWriteResGroup183], (instregex "VDIVPDYrm")>;
-
 def BWWriteResGroup183_1 : SchedWriteRes<[BWPort4, BWPort5, BWPort23, BWPort0156]> {
   let Latency = 22;
   let NumMicroOps = 7;
index 2e2535e..b7de192 100644 (file)
@@ -156,8 +156,15 @@ defm : HWWriteResPair<WriteFCmpY,  [HWPort1],  3, [1], 1, 7>;
 defm : HWWriteResPair<WriteFCom,   [HWPort1],  3>;
 defm : HWWriteResPair<WriteFMul,  [HWPort01],  5, [1], 1, 6>;
 defm : HWWriteResPair<WriteFMulY, [HWPort01],  5, [1], 1, 7>;
-defm : HWWriteResPair<WriteFDiv,   [HWPort0], 12, [1], 1, 5>; // 10-14 cycles.
-defm : HWWriteResPair<WriteFDivY,  [HWPort0], 12, [1], 1, 7>; // 10-14 cycles.
+
+defm : HWWriteResPair<WriteFDiv,    [HWPort0,HWFPDivider], 13, [1,7], 1, 5>;
+defm : HWWriteResPair<WriteFDivX,   [HWPort0,HWFPDivider], 13, [1,7], 1, 6>;
+defm : HWWriteResPair<WriteFDivY,   [HWPort0,HWPort15,HWFPDivider], 21, [2,1,14], 3, 7>;
+defm : HWWriteResPair<WriteFDivZ,   [HWPort0,HWPort15,HWFPDivider], 21, [2,1,14], 3, 7>;
+defm : HWWriteResPair<WriteFDiv64,  [HWPort0,HWFPDivider], 20, [1,14], 1, 5>;
+defm : HWWriteResPair<WriteFDiv64X, [HWPort0,HWFPDivider], 20, [1,14], 1, 6>;
+defm : HWWriteResPair<WriteFDiv64Y, [HWPort0,HWPort15,HWFPDivider], 35, [2,1,28], 3, 7>;
+defm : HWWriteResPair<WriteFDiv64Z, [HWPort0,HWPort15,HWFPDivider], 35, [2,1,28], 3, 7>;
 
 defm : HWWriteResPair<WriteFRcp,   [HWPort0],  5, [1], 1, 5>;
 defm : HWWriteResPair<WriteFRcpX,  [HWPort0],  5, [1], 1, 6>;
@@ -1652,13 +1659,6 @@ def: InstRW<[HWWriteResGroup89], (instregex "(V?)PCMPGTQ(Y?)rr",
                                             "MUL_FST0r",
                                             "MUL_FrST0")>;
 
-def HWWriteResGroup91_4 : SchedWriteRes<[HWPort0,HWPort23,HWFPDivider]> {
-  let Latency = 18;
-  let NumMicroOps = 2;
-  let ResourceCycles = [1,1,7];
-}
-def: InstRW<[HWWriteResGroup91_4], (instregex "(V?)DIVSSrm")>;
-
 def HWWriteResGroup91_2 : SchedWriteRes<[HWPort0,HWPort23]> {
   let Latency = 11;
   let NumMicroOps = 2;
@@ -1828,14 +1828,6 @@ def HWWriteResGroup120 : SchedWriteRes<[HWPort1,HWPort23,HWPort237,HWPort06,HWPo
 }
 def: InstRW<[HWWriteResGroup120], (instregex "RCL(8|16|32|64)mCL")>;
 
-def HWWriteResGroup121 : SchedWriteRes<[HWPort0,HWFPDivider]> {
-  let Latency = 13;
-  let NumMicroOps = 1;
-  let ResourceCycles = [1,7];
-}
-def: InstRW<[HWWriteResGroup121], (instregex "(V?)DIVPSrr",
-                                             "(V?)DIVSSrr")>;
-
 def HWWriteResGroup129 : SchedWriteRes<[HWPort1,HWPort06,HWPort0156]> {
   let Latency = 11;
   let NumMicroOps = 7;
@@ -1865,13 +1857,6 @@ def HWWriteResGroup132 : SchedWriteRes<[HWPort4,HWPort23,HWPort237,HWPort06,HWPo
 }
 def: InstRW<[HWWriteResGroup132], (instregex "CMPXCHG8B")>;
 
-def HWWriteResGroup134 : SchedWriteRes<[HWPort0,HWPort23,HWFPDivider]> {
-  let Latency = 19;
-  let NumMicroOps = 2;
-  let ResourceCycles = [1,1,7];
-}
-def: InstRW<[HWWriteResGroup134], (instregex "(V?)DIVPSrm")>;
-
 def HWWriteResGroup135 : SchedWriteRes<[HWPort1,HWPort23,HWPort237,HWPort06,HWPort15,HWPort0156]> {
   let Latency = 19;
   let NumMicroOps = 11;
@@ -1945,14 +1930,6 @@ def: InstRW<[HWWriteResGroup154], (instregex "DIV_FPrST0",
                                              "DIV_FST0r",
                                              "DIV_FrST0")>;
 
-def HWWriteResGroup154_1 : SchedWriteRes<[HWPort0,HWFPDivider]> {
-  let Latency = 20;
-  let NumMicroOps = 1;
-  let ResourceCycles = [1,14];
-}
-def: InstRW<[HWWriteResGroup154_1], (instregex "(V?)DIVPDrr",
-                                               "(V?)DIVSDrr")>;
-
 def HWWriteResGroup155 : SchedWriteRes<[HWPort0,HWPort23]> {
   let Latency = 27;
   let NumMicroOps = 2;
@@ -1960,20 +1937,6 @@ def HWWriteResGroup155 : SchedWriteRes<[HWPort0,HWPort23]> {
 }
 def: InstRW<[HWWriteResGroup155], (instregex "DIVR_F(32|64)m")>;
 
-def HWWriteResGroup155_1 : SchedWriteRes<[HWPort0,HWPort23,HWFPDivider]> {
-  let Latency = 26;
-  let NumMicroOps = 2;
-  let ResourceCycles = [1,1,14];
-}
-def: InstRW<[HWWriteResGroup155_1], (instregex "(V?)DIVPDrm")>;
-
-def HWWriteResGroup155_4 : SchedWriteRes<[HWPort0,HWPort23,HWFPDivider]> {
-  let Latency = 25;
-  let NumMicroOps = 2;
-  let ResourceCycles = [1,1,14];
-}
-def: InstRW<[HWWriteResGroup155_4], (instregex "(V?)DIVSDrm")>;
-
 def HWWriteResGroup156 : SchedWriteRes<[HWPort5,HWPort6,HWPort0156]> {
   let Latency = 20;
   let NumMicroOps = 10;
@@ -1981,20 +1944,6 @@ def HWWriteResGroup156 : SchedWriteRes<[HWPort5,HWPort6,HWPort0156]> {
 }
 def: InstRW<[HWWriteResGroup156], (instregex "MWAITrr")>;
 
-def HWWriteResGroup159 : SchedWriteRes<[HWPort0,HWPort15,HWFPDivider]> {
-  let Latency = 21;
-  let NumMicroOps = 3;
-  let ResourceCycles = [2,1,14];
-}
-def: InstRW<[HWWriteResGroup159], (instregex "VDIVPSYrr")>;
-
-def HWWriteResGroup160 : SchedWriteRes<[HWPort0,HWPort23,HWPort15,HWFPDivider]> {
-  let Latency = 28;
-  let NumMicroOps = 4;
-  let ResourceCycles = [2,1,1,14];
-}
-def: InstRW<[HWWriteResGroup160], (instregex "VDIVPSYrm")>;
-
 def HWWriteResGroup161 : SchedWriteRes<[HWPort0,HWPort1,HWPort23]> {
   let Latency = 30;
   let NumMicroOps = 3;
@@ -2055,20 +2004,6 @@ def HWWriteResGroup171 : SchedWriteRes<[HWPort5,HWPort6,HWPort23,HWPort237,HWPor
 def: InstRW<[HWWriteResGroup171], (instregex "OUT(8|16|32)ir",
                                              "OUT(8|16|32)rr")>;
 
-def HWWriteResGroup173 : SchedWriteRes<[HWPort0,HWPort15,HWFPDivider]> {
-  let Latency = 35;
-  let NumMicroOps = 3;
-  let ResourceCycles = [2,1,28];
-}
-def: InstRW<[HWWriteResGroup173], (instregex "VDIVPDYrr")>;
-
-def HWWriteResGroup174 : SchedWriteRes<[HWPort0,HWPort23,HWPort15,HWFPDivider]> {
-  let Latency = 42;
-  let NumMicroOps = 4;
-  let ResourceCycles = [2,1,1,28];
-}
-def: InstRW<[HWWriteResGroup174], (instregex "VDIVPDYrm")>;
-
 def HWWriteResGroup175 : SchedWriteRes<[HWPort1,HWPort4,HWPort5,HWPort6,HWPort23,HWPort237,HWPort15,HWPort0156]> {
   let Latency = 41;
   let NumMicroOps = 18;
index c3ef44b..fcd5fb9 100644 (file)
@@ -146,8 +146,15 @@ defm : SBWriteResPair<WriteFCmpY,  [SBPort1],  3, [1], 1, 7>;
 defm : SBWriteResPair<WriteFCom,   [SBPort1],  3>;
 defm : SBWriteResPair<WriteFMul,   [SBPort0],  5, [1], 1, 6>;
 defm : SBWriteResPair<WriteFMulY,  [SBPort0],  5, [1], 1, 7>;
-defm : SBWriteResPair<WriteFDiv,   [SBPort0], 24, [1], 1, 5>;
-defm : SBWriteResPair<WriteFDivY,  [SBPort0], 24, [1], 1, 7>;
+
+defm : SBWriteResPair<WriteFDiv,    [SBPort0,SBFPDivider], 14, [1,14], 1, 6>;
+defm : SBWriteResPair<WriteFDivX,   [SBPort0,SBFPDivider], 14, [1,14], 1, 6>;
+defm : SBWriteResPair<WriteFDivY,   [SBPort0,SBPort05,SBFPDivider], 29, [2,1,28], 3, 7>;
+defm : SBWriteResPair<WriteFDivZ,   [SBPort0,SBPort05,SBFPDivider], 29, [2,1,28], 3, 7>;
+defm : SBWriteResPair<WriteFDiv64,  [SBPort0,SBFPDivider], 22, [1,22], 1, 6>;
+defm : SBWriteResPair<WriteFDiv64X, [SBPort0,SBFPDivider], 22, [1,22], 1, 6>;
+defm : SBWriteResPair<WriteFDiv64Y, [SBPort0,SBPort05,SBFPDivider], 45, [2,1,44], 3, 7>;
+defm : SBWriteResPair<WriteFDiv64Z, [SBPort0,SBPort05,SBFPDivider], 45, [2,1,44], 3, 7>;
 
 defm : SBWriteResPair<WriteFRcp,   [SBPort0],  5, [1], 1, 6>;
 defm : SBWriteResPair<WriteFRcpX,  [SBPort0],  5, [1], 1, 6>;
@@ -1361,14 +1368,6 @@ def SBWriteResGroup114 : SchedWriteRes<[SBPort1,SBPort23]> {
 }
 def: InstRW<[SBWriteResGroup114], (instregex "(ADD|SUB|SUBR)_FI(16|32)m")>;
 
-def SBWriteResGroup116 : SchedWriteRes<[SBPort0,SBFPDivider]> {
-  let Latency = 14;
-  let NumMicroOps = 1;
-  let ResourceCycles = [1,14];
-}
-def: InstRW<[SBWriteResGroup116], (instregex "(V?)DIVPSrr",
-                                             "(V?)DIVSSrr")>;
-
 def SBWriteResGroup119 : SchedWriteRes<[SBPort0,SBPort1,SBPort23]> {
   let Latency = 15;
   let NumMicroOps = 3;
@@ -1376,37 +1375,6 @@ def SBWriteResGroup119 : SchedWriteRes<[SBPort0,SBPort1,SBPort23]> {
 }
 def: InstRW<[SBWriteResGroup119], (instregex "MUL_FI(16|32)m")>;
 
-def SBWriteResGroup123 : SchedWriteRes<[SBPort0,SBPort23,SBFPDivider]> {
-  let Latency = 20;
-  let NumMicroOps = 2;
-  let ResourceCycles = [1,1,14];
-}
-def: InstRW<[SBWriteResGroup123], (instregex "(V?)DIVPSrm",
-                                             "(V?)DIVSSrm")>;
-
-def SBWriteResGroup126 : SchedWriteRes<[SBPort0,SBFPDivider]> {
-  let Latency = 22;
-  let NumMicroOps = 1;
-  let ResourceCycles = [1,22];
-}
-def: InstRW<[SBWriteResGroup126], (instregex "(V?)DIVPDrr",
-                                             "(V?)DIVSDrr")>;
-
-def SBWriteResGroup128 : SchedWriteRes<[SBPort0,SBPort23,SBFPDivider]> {
-  let Latency = 28;
-  let NumMicroOps = 2;
-  let ResourceCycles = [1,1,22];
-}
-def: InstRW<[SBWriteResGroup128], (instregex "(V?)DIVPDrm",
-                                             "(V?)DIVSDrm")>;
-
-def SBWriteResGroup129 : SchedWriteRes<[SBPort0,SBPort05,SBFPDivider]> {
-  let Latency = 29;
-  let NumMicroOps = 3;
-  let ResourceCycles = [2,1,28];
-}
-def: InstRW<[SBWriteResGroup129], (instregex "VDIVPSYrr")>;
-
 def SBWriteResGroup130 : SchedWriteRes<[SBPort0,SBPort23]> {
   let Latency = 31;
   let NumMicroOps = 2;
@@ -1421,25 +1389,4 @@ def SBWriteResGroup131 : SchedWriteRes<[SBPort0,SBPort1,SBPort23]> {
 }
 def: InstRW<[SBWriteResGroup131], (instregex "DIV(R?)_FI(16|32)m")>;
 
-def SBWriteResGroup132 : SchedWriteRes<[SBPort0,SBPort23,SBPort05,SBFPDivider]> {
-  let Latency = 36;
-  let NumMicroOps = 4;
-  let ResourceCycles = [2,1,1,28];
-}
-def: InstRW<[SBWriteResGroup132], (instregex "VDIVPSYrm")>;
-
-def SBWriteResGroup133 : SchedWriteRes<[SBPort0,SBPort05,SBFPDivider]> {
-  let Latency = 45;
-  let NumMicroOps = 3;
-  let ResourceCycles = [2,1,44];
-}
-def: InstRW<[SBWriteResGroup133], (instregex "VDIVPDYrr")>;
-
-def SBWriteResGroup134 : SchedWriteRes<[SBPort0,SBPort23,SBPort05,SBFPDivider]> {
-  let Latency = 52;
-  let NumMicroOps = 4;
-  let ResourceCycles = [2,1,1,44];
-}
-def: InstRW<[SBWriteResGroup134], (instregex "VDIVPDYrm")>;
-
 } // SchedModel
index 077eeed..9875ce3 100644 (file)
@@ -159,8 +159,15 @@ defm : SKLWriteResPair<WriteFCmpY, [SKLPort01],  4, [1], 1, 7>; // Floating poin
 defm : SKLWriteResPair<WriteFCom,   [SKLPort0],  2>; // Floating point compare to flags.
 defm : SKLWriteResPair<WriteFMul,  [SKLPort01],  4, [1], 1, 6>; // Floating point multiplication.
 defm : SKLWriteResPair<WriteFMulY, [SKLPort01],  4, [1], 1, 7>; // Floating point multiplication (YMM/ZMM).
-defm : SKLWriteResPair<WriteFDiv,   [SKLPort0], 12, [1], 1, 5>; // 10-14 cycles. // Floating point division.
-defm : SKLWriteResPair<WriteFDivY,  [SKLPort0], 12, [1], 1, 7>; // 10-14 cycles. // Floating point division (YMM/ZMM).
+
+defm : SKLWriteResPair<WriteFDiv,     [SKLPort0,SKLFPDivider], 11, [1,3], 1, 5>; // Floating point division.
+//defm : SKLWriteResPair<WriteFDivX,    [SKLPort0,SKLFPDivider], 11, [1,3], 1, 6>; // Floating point division (XMM).
+defm : SKLWriteResPair<WriteFDivY,    [SKLPort0,SKLFPDivider], 11, [1,5], 1, 7>; // Floating point division (YMM).
+defm : SKLWriteResPair<WriteFDivZ,    [SKLPort0,SKLFPDivider], 11, [1,5], 1, 7>; // Floating point division (ZMM).
+//defm : SKLWriteResPair<WriteFDiv64,   [SKLPort0,SKLFPDivider], 14, [1,3], 1, 5>; // Floating point double division.
+//defm : SKLWriteResPair<WriteFDiv64X,  [SKLPort0,SKLFPDivider], 14, [1,3], 1, 6>; // Floating point double division (XMM).
+//defm : SKLWriteResPair<WriteFDiv64Y,  [SKLPort0,SKLFPDivider], 14, [1,5], 1, 7>; // Floating point double division (YMM).
+defm : SKLWriteResPair<WriteFDiv64Z,  [SKLPort0,SKLFPDivider], 14, [1,5], 1, 7>; // Floating point double division (ZMM).
 
 defm : SKLWriteResPair<WriteFSqrt,    [SKLPort0,SKLFPDivider], 12, [1,3], 1, 5>; // Floating point square root.
 defm : SKLWriteResPair<WriteFSqrtX,   [SKLPort0,SKLFPDivider], 12, [1,3], 1, 6>; // Floating point square root (XMM).
@@ -1611,15 +1618,7 @@ def SKLWriteResGroup145 : SchedWriteRes<[SKLPort0,SKLFPDivider]> {
   let NumMicroOps = 1;
   let ResourceCycles = [1,3];
 }
-def: InstRW<[SKLWriteResGroup145], (instregex "(V?)DIVPSrr",
-                                              "(V?)DIVSSrr")>;
-
-def SKLWriteResGroup145_1 : SchedWriteRes<[SKLPort0,SKLFPDivider]> {
-  let Latency = 11;
-  let NumMicroOps = 1;
-  let ResourceCycles = [1,5];
-}
-def: InstRW<[SKLWriteResGroup145_1], (instregex "VDIVPSYrr")>;
+def : SchedAlias<WriteFDivX, SKLWriteResGroup145>; // TODO - convert to ZnWriteResFpuPair
 
 def SKLWriteResGroup146 : SchedWriteRes<[SKLPort0,SKLPort23]> {
   let Latency = 11;
@@ -1736,15 +1735,15 @@ def SKLWriteResGroup166 : SchedWriteRes<[SKLPort0,SKLFPDivider]> {
   let NumMicroOps = 1;
   let ResourceCycles = [1,3];
 }
-def: InstRW<[SKLWriteResGroup166], (instregex "(V?)DIVPDrr",
-                                              "(V?)DIVSDrr")>;
+def : SchedAlias<WriteFDiv64,  SKLWriteResGroup166>; // TODO - convert to ZnWriteResFpuPair
+def : SchedAlias<WriteFDiv64X, SKLWriteResGroup166>; // TODO - convert to ZnWriteResFpuPair
 
 def SKLWriteResGroup166_1 : SchedWriteRes<[SKLPort0,SKLFPDivider]> {
   let Latency = 14;
   let NumMicroOps = 1;
   let ResourceCycles = [1,5];
 }
-def: InstRW<[SKLWriteResGroup166_1], (instregex "VDIVPDYrr")>;
+def : SchedAlias<WriteFDiv64Y, SKLWriteResGroup166_1>; // TODO - convert to ZnWriteResFpuPair
 
 def SKLWriteResGroup169 : SchedWriteRes<[SKLPort0,SKLPort5,SKLPort23]> {
   let Latency = 14;
@@ -1776,13 +1775,6 @@ def SKLWriteResGroup174 : SchedWriteRes<[SKLPort1,SKLPort23,SKLPort237,SKLPort06
 }
 def: InstRW<[SKLWriteResGroup174], (instregex "RCL(8|16|32|64)mCL")>;
 
-def SKLWriteResGroup175 : SchedWriteRes<[SKLPort0,SKLPort23,SKLFPDivider]> {
-  let Latency = 16;
-  let NumMicroOps = 2;
-  let ResourceCycles = [1,1,3];
-}
-def: InstRW<[SKLWriteResGroup175], (instregex "(V?)DIVSSrm")>;
-
 def SKLWriteResGroup177 : SchedWriteRes<[SKLPort4,SKLPort23,SKLPort237,SKLPort06,SKLPort15,SKLPort0156]> {
   let Latency = 16;
   let NumMicroOps = 14;
@@ -1802,7 +1794,7 @@ def SKLWriteResGroup179 : SchedWriteRes<[SKLPort0,SKLPort23,SKLFPDivider]> {
   let NumMicroOps = 2;
   let ResourceCycles = [1,1,5];
 }
-def: InstRW<[SKLWriteResGroup179], (instregex "(V?)DIVPSrm")>;
+def : SchedAlias<WriteFDivXLd, SKLWriteResGroup179>; // TODO - convert to ZnWriteResFpuPair
 
 def SKLWriteResGroup180 : SchedWriteRes<[SKLPort0,SKLPort1,SKLPort5,SKLPort6,SKLPort05,SKLPort0156]> {
   let Latency = 17;
@@ -1811,13 +1803,6 @@ def SKLWriteResGroup180 : SchedWriteRes<[SKLPort0,SKLPort1,SKLPort5,SKLPort6,SKL
 }
 def: InstRW<[SKLWriteResGroup180], (instrs XCH_F)>;
 
-def SKLWriteResGroup182 : SchedWriteRes<[SKLPort0,SKLPort23,SKLFPDivider]> {
-  let Latency = 18;
-  let NumMicroOps = 2;
-  let ResourceCycles = [1,1,5];
-}
-def: InstRW<[SKLWriteResGroup182], (instregex "VDIVPSYrm")>;
-
 def SKLWriteResGroup184 : SchedWriteRes<[SKLPort5,SKLPort6,SKLPort06,SKLPort0156]> {
   let Latency = 18;
   let NumMicroOps = 8;
@@ -1837,7 +1822,7 @@ def SKLWriteResGroup186 : SchedWriteRes<[SKLPort0,SKLPort23,SKLFPDivider]> {
   let NumMicroOps = 2;
   let ResourceCycles = [1,1,4];
 }
-def: InstRW<[SKLWriteResGroup186], (instregex "(V?)DIVSDrm")>;
+def : SchedAlias<WriteFDiv64Ld,  SKLWriteResGroup186>; // TODO - convert to ZnWriteResFpuPair
 
 def SKLWriteResGroup189 : SchedWriteRes<[SKLPort0]> {
   let Latency = 20;
@@ -1853,7 +1838,7 @@ def SKLWriteResGroup190 : SchedWriteRes<[SKLPort0,SKLPort23,SKLFPDivider]> {
   let NumMicroOps = 2;
   let ResourceCycles = [1,1,4];
 }
-def: InstRW<[SKLWriteResGroup190], (instregex "(V?)DIVPDrm")>;
+def : SchedAlias<WriteFDiv64XLd, SKLWriteResGroup190>; // TODO - convert to ZnWriteResFpuPair
 
 def SKLWriteResGroup192 : SchedWriteRes<[SKLPort4,SKLPort5,SKLPort6,SKLPort23,SKLPort237,SKLPort06,SKLPort0156]> {
   let Latency = 20;
@@ -1874,7 +1859,7 @@ def SKLWriteResGroup195 : SchedWriteRes<[SKLPort0,SKLPort23,SKLFPDivider]> {
   let NumMicroOps = 2;
   let ResourceCycles = [1,1,8];
 }
-def: InstRW<[SKLWriteResGroup195], (instregex "VDIVPDYrm")>;
+def : SchedAlias<WriteFDiv64YLd, SKLWriteResGroup195>; // TODO - convert to ZnWriteResFpuPair
 
 def SKLWriteResGroup196 : SchedWriteRes<[SKLPort0,SKLPort23]> {
   let Latency = 22;
index bd58687..7bff6d0 100755 (executable)
@@ -159,8 +159,15 @@ defm : SKXWriteResPair<WriteFCmpY,[SKXPort015],  4, [1], 1, 7>; // Floating poin
 defm : SKXWriteResPair<WriteFCom,   [SKXPort0],  2>; // Floating point compare to flags.
 defm : SKXWriteResPair<WriteFMul, [SKXPort015],  4, [1], 1, 6>; // Floating point multiplication.
 defm : SKXWriteResPair<WriteFMulY,[SKXPort015],  4, [1], 1, 7>; // Floating point multiplication (YMM/ZMM).
-defm : SKXWriteResPair<WriteFDiv,   [SKXPort0], 12, [1], 1, 5>; // 10-14 cycles. // Floating point division.
-defm : SKXWriteResPair<WriteFDivY,  [SKXPort0], 12, [1], 1, 7>; // 10-14 cycles. // Floating point division (YMM/ZMM).
+
+defm : SKXWriteResPair<WriteFDiv,     [SKXPort0,SKXFPDivider], 11, [1,3], 1, 5>; // 10-14 cycles. // Floating point division.
+//defm : SKXWriteResPair<WriteFDivX,    [SKXPort0,SKXFPDivider], 11, [1,3], 1, 6>; // 10-14 cycles. // Floating point division (XMM).
+defm : SKXWriteResPair<WriteFDivY,    [SKXPort0,SKXFPDivider], 11, [1,5], 1, 7>; // 10-14 cycles. // Floating point division (YMM).
+defm : SKXWriteResPair<WriteFDivZ,    [SKXPort0,SKXPort5,SKXFPDivider], 18, [2,1,10], 3, 7>; // 10-14 cycles. // Floating point division (ZMM).
+//defm : SKXWriteResPair<WriteFDiv64,   [SKXPort0,SKXFPDivider], 14, [1,3], 1, 5>; // 10-14 cycles. // Floating point division.
+//defm : SKXWriteResPair<WriteFDiv64X,  [SKXPort0,SKXFPDivider], 14, [1,3], 1, 6>; // 10-14 cycles. // Floating point division (XMM).
+//defm : SKXWriteResPair<WriteFDiv64Y,  [SKXPort0,SKXFPDivider], 14, [1,5], 1, 7>; // 10-14 cycles. // Floating point division (YMM).
+defm : SKXWriteResPair<WriteFDiv64Z,  [SKXPort0,SKXPort5,SKXFPDivider], 23, [2,1,16], 3, 7>; // 10-14 cycles. // Floating point division (ZMM).
 
 defm : SKXWriteResPair<WriteFSqrt,    [SKXPort0,SKXFPDivider], 12, [1,3], 1, 5>; // Floating point square root.
 defm : SKXWriteResPair<WriteFSqrtX,   [SKXPort0,SKXFPDivider], 12, [1,3], 1, 6>; // Floating point square root (XMM).
@@ -2708,15 +2715,7 @@ def SKXWriteResGroup159 : SchedWriteRes<[SKXPort0,SKXFPDivider]> {
   let NumMicroOps = 1;
   let ResourceCycles = [1,3];
 }
-def: InstRW<[SKXWriteResGroup159], (instregex "(V?)DIVPS(Z128)?rr",
-                                              "(V?)DIVSS(Z?)rr")>;
-
-def SKXWriteResGroup159_1 : SchedWriteRes<[SKXPort0,SKXFPDivider]> {
-  let Latency = 11;
-  let NumMicroOps = 1;
-  let ResourceCycles = [1,5];
-}
-def: InstRW<[SKXWriteResGroup159_1], (instregex "VDIVPS(Y|Z256)rr")>;
+def : SchedAlias<WriteFDivX,  SKXWriteResGroup159>; // TODO - convert to ZnWriteResFpuPair
 
 def SKXWriteResGroup160 : SchedWriteRes<[SKXPort0,SKXPort23]> {
   let Latency = 11;
@@ -2958,15 +2957,15 @@ def SKXWriteResGroup184 : SchedWriteRes<[SKXPort0,SKXFPDivider]> {
   let NumMicroOps = 1;
   let ResourceCycles = [1,3];
 }
-def: InstRW<[SKXWriteResGroup184], (instregex "(V?)DIVPDrr",
-                                              "(V?)DIVSD(Z?)rr")>;
+def : SchedAlias<WriteFDiv64,  SKXWriteResGroup184>; // TODO - convert to ZnWriteResFpuPair
+def : SchedAlias<WriteFDiv64X, SKXWriteResGroup184>; // TODO - convert to ZnWriteResFpuPair
 
 def SKXWriteResGroup184_1 : SchedWriteRes<[SKXPort0,SKXFPDivider]> {
   let Latency = 14;
   let NumMicroOps = 1;
   let ResourceCycles = [1,5];
 }
-def: InstRW<[SKXWriteResGroup184_1], (instregex "VDIVPD(Y|Z256)rr")>;
+def : SchedAlias<WriteFDiv64Y, SKXWriteResGroup184_1>; // TODO - convert to ZnWriteResFpuPair
 
 def SKXWriteResGroup187 : SchedWriteRes<[SKXPort0,SKXPort5,SKXPort23]> {
   let Latency = 14;
@@ -3028,13 +3027,6 @@ def SKXWriteResGroup195 : SchedWriteRes<[SKXPort1,SKXPort23,SKXPort237,SKXPort06
 }
 def: InstRW<[SKXWriteResGroup195], (instregex "RCL(8|16|32|64)mCL")>;
 
-def SKXWriteResGroup196 : SchedWriteRes<[SKXPort0,SKXPort23,SKXFPDivider]> {
-  let Latency = 16;
-  let NumMicroOps = 2;
-  let ResourceCycles = [1,1,3];
-}
-def: InstRW<[SKXWriteResGroup196], (instregex "(V?)DIVSS(Z?)rm")>;
-
 def SKXWriteResGroup198 : SchedWriteRes<[SKXPort0,SKXPort23,SKXPort015]> {
   let Latency = 16;
   let NumMicroOps = 4;
@@ -3064,7 +3056,7 @@ def SKXWriteResGroup201 : SchedWriteRes<[SKXPort0,SKXPort23,SKXFPDivider]> {
   let NumMicroOps = 2;
   let ResourceCycles = [1,1,5];
 }
-def: InstRW<[SKXWriteResGroup201], (instregex "(V?)DIVPS(Z128)?rm")>;
+def : SchedAlias<WriteFDivXLd, SKXWriteResGroup201>; // TODO - convert to ZnWriteResFpuPair
 
 def SKXWriteResGroup202 : SchedWriteRes<[SKXPort0,SKXPort1,SKXPort5,SKXPort6,SKXPort05,SKXPort0156]> {
   let Latency = 17;
@@ -3073,13 +3065,6 @@ def SKXWriteResGroup202 : SchedWriteRes<[SKXPort0,SKXPort1,SKXPort5,SKXPort6,SKX
 }
 def: InstRW<[SKXWriteResGroup202], (instrs XCH_F)>;
 
-def SKXWriteResGroup204 : SchedWriteRes<[SKXPort0,SKXPort23,SKXFPDivider]> {
-  let Latency = 18;
-  let NumMicroOps = 2;
-  let ResourceCycles = [1,1,5];
-}
-def: InstRW<[SKXWriteResGroup204], (instregex "VDIVPS(Y|Z256)rm")>;
-
 def SKXWriteResGroup205 : SchedWriteRes<[SKXPort23,SKXPort015]> {
   let Latency = 18;
   let NumMicroOps = 4;
@@ -3106,7 +3091,7 @@ def SKXWriteResGroup209 : SchedWriteRes<[SKXPort0,SKXPort23,SKXFPDivider]> {
   let NumMicroOps = 2;
   let ResourceCycles = [1,1,4];
 }
-def: InstRW<[SKXWriteResGroup209], (instregex "(V?)DIVSD(Z?)rm")>;
+def : SchedAlias<WriteFDiv64Ld,  SKXWriteResGroup209>; // TODO - convert to ZnWriteResFpuPair
 
 def SKXWriteResGroup211 : SchedWriteRes<[SKXPort23,SKXPort015]> {
   let Latency = 19;
@@ -3138,7 +3123,7 @@ def SKXWriteResGroup216 : SchedWriteRes<[SKXPort0,SKXPort23,SKXFPDivider]> {
   let NumMicroOps = 2;
   let ResourceCycles = [1,1,4];
 }
-def: InstRW<[SKXWriteResGroup216], (instregex "(V?)DIVPD(Z128)?rm")>;
+def : SchedAlias<WriteFDiv64XLd, SKXWriteResGroup216>; // TODO - convert to ZnWriteResFpuPair
 
 def SKXWriteResGroup218 : SchedWriteRes<[SKXPort0,SKXPort23,SKXPort015,SKXPort0156]> {
   let Latency = 20;
@@ -3169,7 +3154,7 @@ def SKXWriteResGroup222 : SchedWriteRes<[SKXPort0,SKXPort23,SKXFPDivider]> {
   let NumMicroOps = 2;
   let ResourceCycles = [1,1,8];
 }
-def: InstRW<[SKXWriteResGroup222], (instregex "VDIVPD(Y|Z256)rm")>;
+def : SchedAlias<WriteFDiv64YLd, SKXWriteResGroup222>; // TODO - convert to ZnWriteResFpuPair
 
 def SKXWriteResGroup223 : SchedWriteRes<[SKXPort0,SKXPort23]> {
   let Latency = 22;
@@ -3238,20 +3223,6 @@ def SKXWriteResGroup225 : SchedWriteRes<[SKXPort5,SKXPort01,SKXPort015]> {
 def: InstRW<[SKXWriteResGroup225], (instregex "VPCONFLICTDZ128rr",
                                               "VPCONFLICTQZ256rr")>;
 
-def SKXWriteResGroup227 : SchedWriteRes<[SKXPort0,SKXPort5,SKXFPDivider]> {
-  let Latency = 23;
-  let NumMicroOps = 3;
-  let ResourceCycles = [2,1,16];
-}
-def: InstRW<[SKXWriteResGroup227], (instregex "VDIVPDZrr")>;
-
-def SKXWriteResGroup227_1 : SchedWriteRes<[SKXPort0,SKXPort5,SKXFPDivider]> {
-  let Latency = 18;
-  let NumMicroOps = 3;
-  let ResourceCycles = [2,1,10];
-}
-def: InstRW<[SKXWriteResGroup227_1], (instregex "VDIVPSZrr")>;
-
 def SKXWriteResGroup228 : SchedWriteRes<[SKXPort0,SKXPort4,SKXPort5,SKXPort23,SKXPort237,SKXPort06,SKXPort0156]> {
   let Latency = 23;
   let NumMicroOps = 19;
@@ -3259,13 +3230,6 @@ def SKXWriteResGroup228 : SchedWriteRes<[SKXPort0,SKXPort4,SKXPort5,SKXPort23,SK
 }
 def: InstRW<[SKXWriteResGroup228], (instregex "CMPXCHG16B")>;
 
-def SKXWriteResGroup230 : SchedWriteRes<[SKXPort0,SKXPort23,SKXPort5,SKXFPDivider]> {
-  let Latency = 25;
-  let NumMicroOps = 4;
-  let ResourceCycles = [2,1,1,10];
-}
-def: InstRW<[SKXWriteResGroup230], (instregex "VDIVPSZrm(b?)")>;
-
 def SKXWriteResGroup233 : SchedWriteRes<[SKXPort0,SKXPort5,SKXPort23]> {
   let Latency = 25;
   let NumMicroOps = 3;
@@ -3330,13 +3294,6 @@ def SKXWriteResGroup243 : SchedWriteRes<[SKXPort0,SKXPort5,SKXPort23]> {
 }
 def: InstRW<[SKXWriteResGroup243], (instregex "DIVR_FI(16|32)m")>;
 
-def SKXWriteResGroup244 : SchedWriteRes<[SKXPort0,SKXPort23,SKXPort5,SKXFPDivider]> {
-  let Latency = 30;
-  let NumMicroOps = 4;
-  let ResourceCycles = [2,1,1,16];
-}
-def: InstRW<[SKXWriteResGroup244], (instregex "VDIVPDZrm(b?)")>;
-
 def SKXWriteResGroup245 : SchedWriteRes<[SKXPort0,SKXPort23,SKXPort015,SKXPort0156]> {
   let Latency = 30;
   let NumMicroOps = 5;
index cc933c8..22c6719 100644 (file)
@@ -115,8 +115,14 @@ defm WriteFCmpY  : X86SchedWritePair; // Floating point compare (YMM/ZMM).
 defm WriteFCom   : X86SchedWritePair; // Floating point compare to flags.
 defm WriteFMul   : X86SchedWritePair; // Floating point multiplication.
 defm WriteFMulY  : X86SchedWritePair; // Floating point multiplication (YMM/ZMM).
-defm WriteFDiv   : X86SchedWritePair; // Floating point division.
-defm WriteFDivY  : X86SchedWritePair; // Floating point division (YMM/ZMM).
+defm WriteFDiv    : X86SchedWritePair; // Floating point division.
+defm WriteFDivX   : X86SchedWritePair; // Floating point division (XMM).
+defm WriteFDivY   : X86SchedWritePair; // Floating point division (YMM).
+defm WriteFDivZ   : X86SchedWritePair; // Floating point division (ZMM).
+defm WriteFDiv64  : X86SchedWritePair; // Floating point division.
+defm WriteFDiv64X : X86SchedWritePair; // Floating point division (XMM).
+defm WriteFDiv64Y : X86SchedWritePair; // Floating point division (YMM).
+defm WriteFDiv64Z : X86SchedWritePair; // Floating point division (ZMM).
 defm WriteFSqrt  : X86SchedWritePair; // Floating point square root.
 defm WriteFSqrtX : X86SchedWritePair; // Floating point square root (XMM).
 defm WriteFSqrtY : X86SchedWritePair; // Floating point square root (YMM).
@@ -268,6 +274,8 @@ def SchedWriteFCmp
  : X86SchedWriteWidths<WriteFCmp, WriteFCmp, WriteFCmpY, WriteFCmpY>;
 def SchedWriteFMul
  : X86SchedWriteWidths<WriteFMul, WriteFMul, WriteFMulY, WriteFMulY>;
+def SchedWriteFMul64
+ : X86SchedWriteWidths<WriteFMul, WriteFMul, WriteFMulY, WriteFMulY>;
 def SchedWriteFMA
  : X86SchedWriteWidths<WriteFMA, WriteFMAX, WriteFMAY, WriteFMAY>;
 def SchedWriteDPPD
@@ -275,7 +283,9 @@ def SchedWriteDPPD
 def SchedWriteDPPS
  : X86SchedWriteWidths<WriteDPPS, WriteDPPS, WriteDPPSY, WriteDPPSY>;
 def SchedWriteFDiv
- : X86SchedWriteWidths<WriteFDiv, WriteFDiv, WriteFDivY, WriteFDivY>;
+ : X86SchedWriteWidths<WriteFDiv, WriteFDivX, WriteFDivY, WriteFDivZ>;
+def SchedWriteFDiv64
+ : X86SchedWriteWidths<WriteFDiv64, WriteFDiv64X, WriteFDiv64Y, WriteFDiv64Z>;
 def SchedWriteFSqrt
  : X86SchedWriteWidths<WriteFSqrt, WriteFSqrtX,
                        WriteFSqrtY, WriteFSqrtZ>;
@@ -347,12 +357,18 @@ def SchedWriteVarBlend
 // Vector size wrappers.
 def SchedWriteFAddSizes
  : X86SchedWriteSizes<SchedWriteFAdd, SchedWriteFAdd>;
+def SchedWriteFCmpSizes
+ : X86SchedWriteSizes<SchedWriteFCmp, SchedWriteFCmp>;
 def SchedWriteFMulSizes
- : X86SchedWriteSizes<SchedWriteFMul, SchedWriteFMul>;
+ : X86SchedWriteSizes<SchedWriteFMul, SchedWriteFMul64>;
 def SchedWriteFDivSizes
- : X86SchedWriteSizes<SchedWriteFDiv, SchedWriteFDiv>;
+ : X86SchedWriteSizes<SchedWriteFDiv, SchedWriteFDiv64>;
 def SchedWriteFSqrtSizes
  : X86SchedWriteSizes<SchedWriteFSqrt, SchedWriteFSqrt64>;
+def SchedWriteFLogicSizes
+ : X86SchedWriteSizes<SchedWriteFLogic, SchedWriteFLogic>;
+def SchedWriteFShuffleSizes
+ : X86SchedWriteSizes<SchedWriteFShuffle, SchedWriteFShuffle>;
 
 //===----------------------------------------------------------------------===//
 // Generic Processor Scheduler Models.
index 8ffa9e6..77c8ae7 100644 (file)
@@ -217,7 +217,13 @@ defm : AtomWriteResPair<WriteFRsqrt,         [AtomPort0],  [AtomPort0],  4,  4,
 defm : AtomWriteResPair<WriteFRsqrtX,       [AtomPort01], [AtomPort01],  9, 10,  [9], [10]>;
 defm : AtomWriteResPair<WriteFRsqrtY,       [AtomPort01], [AtomPort01],  9, 10,  [9], [10]>;
 defm : AtomWriteResPair<WriteFDiv,          [AtomPort01], [AtomPort01], 34, 34, [34], [34]>;
-defm : AtomWriteResPair<WriteFDivY,         [AtomPort01], [AtomPort01], 34, 34, [34], [34]>;
+defm : AtomWriteResPair<WriteFDivX,         [AtomPort01], [AtomPort01], 70, 70, [70], [70]>;
+defm : AtomWriteResPair<WriteFDivY,         [AtomPort01], [AtomPort01], 70, 70, [70], [70]>;
+defm : AtomWriteResPair<WriteFDivZ,         [AtomPort01], [AtomPort01], 70, 70, [70], [70]>;
+defm : AtomWriteResPair<WriteFDiv64,        [AtomPort01], [AtomPort01], 62, 62, [62], [62]>;
+defm : AtomWriteResPair<WriteFDiv64X,       [AtomPort01], [AtomPort01],125,125,[125],[125]>;
+defm : AtomWriteResPair<WriteFDiv64Y,       [AtomPort01], [AtomPort01],125,125,[125],[125]>;
+defm : AtomWriteResPair<WriteFDiv64Z,       [AtomPort01], [AtomPort01],125,125,[125],[125]>;
 defm : AtomWriteResPair<WriteFSqrt,         [AtomPort01], [AtomPort01], 34, 34, [34], [34]>;
 defm : AtomWriteResPair<WriteFSqrtX,        [AtomPort01], [AtomPort01], 70, 70, [70], [70]>;
 defm : AtomWriteResPair<WriteFSqrtY,        [AtomPort01], [AtomPort01], 70, 70, [70], [70]>;
@@ -702,12 +708,6 @@ def AtomWrite01_59 : SchedWriteRes<[AtomPort01]> {
 }
 def : InstRW<[AtomWrite01_59], (instrs INSB, INSL, INSW)>;
 
-def AtomWrite01_62 : SchedWriteRes<[AtomPort01]> {
-  let Latency = 62;
-  let ResourceCycles = [62];
-}
-def : InstRW<[AtomWrite01_62], (instregex "DIVSD(r|m)(_Int)?")>;
-
 def AtomWrite01_63 : SchedWriteRes<[AtomPort01]> {
   let Latency = 63;
   let ResourceCycles = [63];
@@ -720,12 +720,6 @@ def AtomWrite01_68 : SchedWriteRes<[AtomPort01]> {
 }
 def : InstRW<[AtomWrite01_68], (instrs OUT8rr, OUT16rr, OUT32rr)>;
 
-def AtomWrite01_70 : SchedWriteRes<[AtomPort01]> {
-  let Latency = 70;
-  let ResourceCycles = [70];
-}
-def : InstRW<[AtomWrite01_70], (instrs DIVPSrr, DIVPSrm)>;
-
 def AtomWrite01_71 : SchedWriteRes<[AtomPort01]> {
   let Latency = 71;
   let ResourceCycles = [71];
@@ -788,12 +782,6 @@ def AtomWrite01_121 : SchedWriteRes<[AtomPort01]> {
 }
 def : InstRW<[AtomWrite01_121], (instrs CPUID)>;
 
-def AtomWrite01_125 : SchedWriteRes<[AtomPort01]> {
-  let Latency = 125;
-  let ResourceCycles = [125];
-}
-def : InstRW<[AtomWrite01_125], (instrs DIVPDrr, DIVPDrm)>;
-
 def AtomWrite01_127 : SchedWriteRes<[AtomPort01]> {
   let Latency = 127;
   let ResourceCycles = [127];
index 31e26b4..685ea3b 100644 (file)
@@ -336,7 +336,13 @@ defm : JWriteResFpuPair<WriteFRsqrt,       [JFPU1, JFPM],  2>;
 defm : JWriteResFpuPair<WriteFRsqrtX,      [JFPU1, JFPM],  2>;
 defm : JWriteResYMMPair<WriteFRsqrtY,      [JFPU1, JFPM],  2, [2,2], 2>;
 defm : JWriteResFpuPair<WriteFDiv,         [JFPU1, JFPM], 19, [1, 19]>;
+defm : JWriteResFpuPair<WriteFDivX,        [JFPU1, JFPM], 19, [1, 19]>;
 defm : JWriteResYMMPair<WriteFDivY,        [JFPU1, JFPM], 38, [2, 38], 2>;
+defm : JWriteResYMMPair<WriteFDivZ,        [JFPU1, JFPM], 38, [2, 38], 2>;
+defm : JWriteResFpuPair<WriteFDiv64,       [JFPU1, JFPM], 19, [1, 19]>;
+defm : JWriteResFpuPair<WriteFDiv64X,      [JFPU1, JFPM], 19, [1, 19]>;
+defm : JWriteResYMMPair<WriteFDiv64Y,      [JFPU1, JFPM], 38, [2, 38], 2>;
+defm : JWriteResYMMPair<WriteFDiv64Z,      [JFPU1, JFPM], 38, [2, 38], 2>;
 defm : JWriteResFpuPair<WriteFSqrt,        [JFPU1, JFPM], 21, [1, 21]>;
 defm : JWriteResFpuPair<WriteFSqrtX,       [JFPU1, JFPM], 21, [1, 21]>;
 defm : JWriteResYMMPair<WriteFSqrtY,       [JFPU1, JFPM], 42, [2, 42], 2>;
index 9d1787f..93de36b 100644 (file)
@@ -137,8 +137,14 @@ defm : SLMWriteResPair<WriteFCmpY,  [SLM_FPC_RSV1], 3>;
 defm : SLMWriteResPair<WriteFCom,   [SLM_FPC_RSV1], 3>;
 defm : SLMWriteResPair<WriteFMul,   [SLM_FPC_RSV0, SLMFPMultiplier], 5, [1,2]>;
 defm : SLMWriteResPair<WriteFMulY,  [SLM_FPC_RSV0, SLMFPMultiplier], 5, [1,2]>;
-defm : SLMWriteResPair<WriteFDiv,   [SLM_FPC_RSV0, SLMFPDivider], 34, [1,34]>;
-defm : SLMWriteResPair<WriteFDivY,  [SLM_FPC_RSV0, SLMFPDivider], 34, [1,34]>;
+defm : SLMWriteResPair<WriteFDiv,     [SLM_FPC_RSV0, SLMFPDivider], 19, [1,17]>;
+defm : SLMWriteResPair<WriteFDivX,    [SLM_FPC_RSV0, SLMFPDivider], 39, [1,39]>;
+defm : SLMWriteResPair<WriteFDivY,    [SLM_FPC_RSV0, SLMFPDivider], 39, [1,39]>;
+defm : SLMWriteResPair<WriteFDivZ,    [SLM_FPC_RSV0, SLMFPDivider], 39, [1,39]>;
+defm : SLMWriteResPair<WriteFDiv64,   [SLM_FPC_RSV0, SLMFPDivider], 34, [1,32]>;
+defm : SLMWriteResPair<WriteFDiv64X,  [SLM_FPC_RSV0, SLMFPDivider], 69, [1,69]>;
+defm : SLMWriteResPair<WriteFDiv64Y,  [SLM_FPC_RSV0, SLMFPDivider], 69, [1,69]>;
+defm : SLMWriteResPair<WriteFDiv64Z,  [SLM_FPC_RSV0, SLMFPDivider], 69, [1,69]>;
 defm : SLMWriteResPair<WriteFRcp,     [SLM_FPC_RSV0], 5>;
 defm : SLMWriteResPair<WriteFRcpX,    [SLM_FPC_RSV0], 5>;
 defm : SLMWriteResPair<WriteFRcpY,    [SLM_FPC_RSV0], 5>;
@@ -333,62 +339,4 @@ defm : SLMWriteResPair<WriteFMA, [SLM_FPC_RSV0],  1>;
 defm : SLMWriteResPair<WriteFMAX, [SLM_FPC_RSV0],  1>;
 defm : SLMWriteResPair<WriteFMAY, [SLM_FPC_RSV0],  1>;
 
-// Instruction overrides
-
-def SLMriteResGroup1 : SchedWriteRes<[SLM_FPC_RSV0,SLMFPDivider]> {
-  let Latency = 69;
-  let NumMicroOps = 1;
-  let ResourceCycles = [1,69];
-}
-def: InstRW<[SLMriteResGroup1], (instregex "(V?)DIVPDrr")>;
-
-def SLMriteResGroup2 : SchedWriteRes<[SLM_FPC_RSV0,SLMFPDivider]> {
-  let Latency = 39;
-  let NumMicroOps = 1;
-  let ResourceCycles = [1,39];
-}
-def: InstRW<[SLMriteResGroup2], (instregex "(V?)DIVPSrr")>;
-
-def SLMriteResGroup3 : SchedWriteRes<[SLM_FPC_RSV0,SLMFPDivider]> {
-  let Latency = 34;
-  let NumMicroOps = 1;
-  let ResourceCycles = [1,32];
-}
-def: InstRW<[SLMriteResGroup3], (instregex "(V?)DIVSDrr")>;
-
-def SLMriteResGroup4 : SchedWriteRes<[SLM_FPC_RSV0,SLMFPDivider]> {
-  let Latency = 19;
-  let NumMicroOps = 1;
-  let ResourceCycles = [1,17];
-}
-def: InstRW<[SLMriteResGroup4], (instregex "(V?)DIVSSrr")>;
-
-def SLMriteResGroup5 : SchedWriteRes<[SLM_MEC_RSV,SLM_FPC_RSV0,SLMFPDivider]> {
-  let Latency = 72;
-  let NumMicroOps = 1;
-  let ResourceCycles = [1,1,69];
-}
-def: InstRW<[SLMriteResGroup5], (instregex "(V?)DIVPDrm")>;
-
-def SLMriteResGroup6 : SchedWriteRes<[SLM_MEC_RSV,SLM_FPC_RSV0,SLMFPDivider]> {
-  let Latency = 42;
-  let NumMicroOps = 1;
-  let ResourceCycles = [1,1,39];
-}
-def: InstRW<[SLMriteResGroup6], (instregex "(V?)DIVPSrm")>;
-
-def SLMriteResGroup7 : SchedWriteRes<[SLM_MEC_RSV,SLM_FPC_RSV0,SLMFPDivider]> {
-  let Latency = 37;
-  let NumMicroOps = 1;
-  let ResourceCycles = [1,1,32];
-}
-def: InstRW<[SLMriteResGroup7], (instregex "(V?)DIVSDrm")>;
-
-def SLMriteResGroup8 : SchedWriteRes<[SLM_MEC_RSV,SLM_FPC_RSV0,SLMFPDivider]> {
-  let Latency = 22;
-  let NumMicroOps = 1;
-  let ResourceCycles = [1,1,17];
-}
-def: InstRW<[SLMriteResGroup8], (instregex "(V?)DIVSSrm")>;
-
 } // SchedModel
index f5a0e9c..5c3408b 100644 (file)
@@ -207,7 +207,13 @@ defm : ZnWriteResFpuPair<WriteCvtI2F,    [ZnFPU3],  5>;
 defm : ZnWriteResFpuPair<WriteCvtF2F,    [ZnFPU3],  5>;
 defm : ZnWriteResFpuPair<WriteCvtF2I,    [ZnFPU3],  5>;
 defm : ZnWriteResFpuPair<WriteFDiv,      [ZnFPU3], 15>;
-defm : ZnWriteResFpuPair<WriteFDivY,     [ZnFPU3], 15>;
+defm : ZnWriteResFpuPair<WriteFDivX,     [ZnFPU3], 15>;
+//defm : ZnWriteResFpuPair<WriteFDivY,     [ZnFPU3], 15>;
+defm : ZnWriteResFpuPair<WriteFDivZ,     [ZnFPU3], 15>;
+defm : ZnWriteResFpuPair<WriteFDiv64,    [ZnFPU3], 15>;
+defm : ZnWriteResFpuPair<WriteFDiv64X,   [ZnFPU3], 15>;
+//defm : ZnWriteResFpuPair<WriteFDiv64Y,   [ZnFPU3], 15>;
+defm : ZnWriteResFpuPair<WriteFDiv64Z,   [ZnFPU3], 15>;
 defm : ZnWriteResFpuPair<WriteFSign,     [ZnFPU3],  2>;
 defm : ZnWriteResFpuPair<WriteFRnd,      [ZnFPU3],  4, [1], 1, 7, 1>; // FIXME: Should folds require 1 extra uops?
 defm : ZnWriteResFpuPair<WriteFRndY,     [ZnFPU3],  4, [1], 1, 7, 1>; // FIXME: Should folds require 1 extra uops?
@@ -1481,12 +1487,13 @@ def ZnWriteMULYLd : SchedWriteRes<[ZnAGU, ZnFPU01]> {
 def : InstRW<[ZnWriteMULYLd], (instregex "(V?)MUL(P|S)(S|D)Yrm")>;
 
 // VDIVPS.
+// TODO - convert to ZnWriteResFpuPair
 // y,y,y.
 def ZnWriteVDIVPSYr : SchedWriteRes<[ZnFPU3]> {
   let Latency = 12;
   let ResourceCycles = [12];
 }
-def : InstRW<[ZnWriteVDIVPSYr], (instregex "VDIVPSYrr")>;
+def : SchedAlias<WriteFDivY,   ZnWriteVDIVPSYr>;
 
 // y,y,m256.
 def ZnWriteVDIVPSYLd : SchedWriteRes<[ZnAGU, ZnFPU3]> {
@@ -1494,15 +1501,16 @@ def ZnWriteVDIVPSYLd : SchedWriteRes<[ZnAGU, ZnFPU3]> {
   let NumMicroOps = 2;
   let ResourceCycles = [1, 19];
 }
-def : InstRW<[ZnWriteVDIVPSYLd], (instregex "VDIVPSYrm")>;
+def : SchedAlias<WriteFDivYLd,  ZnWriteVDIVPSYLd>;
 
 // VDIVPD.
+// TODO - convert to ZnWriteResFpuPair
 // y,y,y.
 def ZnWriteVDIVPDY : SchedWriteRes<[ZnFPU3]> {
   let Latency = 15;
   let ResourceCycles = [15];
 }
-def : InstRW<[ZnWriteVDIVPDY], (instregex "VDIVPDYrr")>;
+def : SchedAlias<WriteFDiv64Y, ZnWriteVDIVPDY>;
 
 // y,y,m256.
 def ZnWriteVDIVPDYLd : SchedWriteRes<[ZnAGU, ZnFPU3]> {
@@ -1510,7 +1518,7 @@ def ZnWriteVDIVPDYLd : SchedWriteRes<[ZnAGU, ZnFPU3]> {
   let NumMicroOps = 2;
   let ResourceCycles = [1,22];
 }
-def : InstRW<[ZnWriteVDIVPDYLd], (instregex "VDIVPDYrm")>;
+def : SchedAlias<WriteFDiv64YLd, ZnWriteVDIVPDYLd>;
 
 // VRCPPS.
 // TODO - convert to ZnWriteResFpuPair
index 78439fc..3a282aa 100755 (executable)
@@ -231,7 +231,7 @@ entry:
 define <8 x double> @divpd512(<8 x double> %y, <8 x double> %x) {
 ; GENERIC-LABEL: divpd512:
 ; GENERIC:       # %bb.0: # %entry
-; GENERIC-NEXT:    vdivpd %zmm0, %zmm1, %zmm0 # sched: [24:1.00]
+; GENERIC-NEXT:    vdivpd %zmm0, %zmm1, %zmm0 # sched: [45:44.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: divpd512:
@@ -246,7 +246,7 @@ entry:
 define <8 x double> @divpd512fold(<8 x double> %y) {
 ; GENERIC-LABEL: divpd512fold:
 ; GENERIC:       # %bb.0: # %entry
-; GENERIC-NEXT:    vdivpd {{.*}}(%rip), %zmm0, %zmm0 # sched: [31:1.00]
+; GENERIC-NEXT:    vdivpd {{.*}}(%rip), %zmm0, %zmm0 # sched: [52:44.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: divpd512fold:
@@ -261,7 +261,7 @@ entry:
 define <16 x float> @divps512(<16 x float> %y, <16 x float> %x) {
 ; GENERIC-LABEL: divps512:
 ; GENERIC:       # %bb.0: # %entry
-; GENERIC-NEXT:    vdivps %zmm0, %zmm1, %zmm0 # sched: [24:1.00]
+; GENERIC-NEXT:    vdivps %zmm0, %zmm1, %zmm0 # sched: [29:28.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: divps512:
@@ -276,7 +276,7 @@ entry:
 define <16 x float> @divps512fold(<16 x float> %y) {
 ; GENERIC-LABEL: divps512fold:
 ; GENERIC:       # %bb.0: # %entry
-; GENERIC-NEXT:    vdivps {{.*}}(%rip), %zmm0, %zmm0 # sched: [31:1.00]
+; GENERIC-NEXT:    vdivps {{.*}}(%rip), %zmm0, %zmm0 # sched: [36:28.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: divps512fold:
@@ -838,7 +838,7 @@ define <16 x float> @test_mask_vdivps(<16 x float> %dst, <16 x float> %i, <16 x
 ; GENERIC-LABEL: test_mask_vdivps:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestmd %zmm3, %zmm3, %k1 # sched: [1:0.33]
-; GENERIC-NEXT:    vdivps %zmm2, %zmm1, %zmm0 {%k1} # sched: [24:1.00]
+; GENERIC-NEXT:    vdivps %zmm2, %zmm1, %zmm0 {%k1} # sched: [29:28.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_mask_vdivps:
index 2e6c99a..29254f5 100644 (file)
@@ -895,7 +895,7 @@ define <16 x float> @v16f32_no_estimate(<16 x float> %x) #0 {
 ; KNL-LABEL: v16f32_no_estimate:
 ; KNL:       # %bb.0:
 ; KNL-NEXT:    vbroadcastss {{.*#+}} zmm1 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] sched: [10:1.00]
-; KNL-NEXT:    vdivps %zmm0, %zmm1, %zmm0 # sched: [12:1.00]
+; KNL-NEXT:    vdivps %zmm0, %zmm1, %zmm0 # sched: [21:14.00]
 ; KNL-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKX-LABEL: v16f32_no_estimate:
index 639ac51..e8113e1 100644 (file)
@@ -1397,10 +1397,10 @@ define void @test_fdiv(float *%a0, double *%a1) optsize {
 ; SLM-NEXT:    movl {{[0-9]+}}(%esp), %eax # sched: [3:1.00]
 ; SLM-NEXT:    movl {{[0-9]+}}(%esp), %ecx # sched: [3:1.00]
 ; SLM-NEXT:    #APP
-; SLM-NEXT:    fdiv %st(0), %st(1) # sched: [34:34.00]
-; SLM-NEXT:    fdiv %st(2) # sched: [34:34.00]
-; SLM-NEXT:    fdivs (%ecx) # sched: [37:34.00]
-; SLM-NEXT:    fdivl (%eax) # sched: [37:34.00]
+; SLM-NEXT:    fdiv %st(0), %st(1) # sched: [19:17.00]
+; SLM-NEXT:    fdiv %st(2) # sched: [19:17.00]
+; SLM-NEXT:    fdivs (%ecx) # sched: [22:17.00]
+; SLM-NEXT:    fdivl (%eax) # sched: [22:17.00]
 ; SLM-NEXT:    #NO_APP
 ; SLM-NEXT:    retl # sched: [4:1.00]
 ;
@@ -1409,8 +1409,8 @@ define void @test_fdiv(float *%a0, double *%a1) optsize {
 ; SANDY-NEXT:    movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
 ; SANDY-NEXT:    movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
 ; SANDY-NEXT:    #APP
-; SANDY-NEXT:    fdiv %st(0), %st(1) # sched: [24:1.00]
-; SANDY-NEXT:    fdiv %st(2) # sched: [24:1.00]
+; SANDY-NEXT:    fdiv %st(0), %st(1) # sched: [14:14.00]
+; SANDY-NEXT:    fdiv %st(2) # sched: [14:14.00]
 ; SANDY-NEXT:    fdivs (%ecx) # sched: [31:1.00]
 ; SANDY-NEXT:    fdivl (%eax) # sched: [31:1.00]
 ; SANDY-NEXT:    #NO_APP
@@ -1521,10 +1521,10 @@ define void @test_fdivp_fidiv(i16 *%a0, i32 *%a1) optsize {
 ; SLM-NEXT:    movl {{[0-9]+}}(%esp), %eax # sched: [3:1.00]
 ; SLM-NEXT:    movl {{[0-9]+}}(%esp), %ecx # sched: [3:1.00]
 ; SLM-NEXT:    #APP
-; SLM-NEXT:    fdivp %st(1) # sched: [34:34.00]
-; SLM-NEXT:    fdivp %st(2) # sched: [34:34.00]
-; SLM-NEXT:    fidivs (%ecx) # sched: [37:34.00]
-; SLM-NEXT:    fidivl (%eax) # sched: [37:34.00]
+; SLM-NEXT:    fdivp %st(1) # sched: [19:17.00]
+; SLM-NEXT:    fdivp %st(2) # sched: [19:17.00]
+; SLM-NEXT:    fidivs (%ecx) # sched: [22:17.00]
+; SLM-NEXT:    fidivl (%eax) # sched: [22:17.00]
 ; SLM-NEXT:    #NO_APP
 ; SLM-NEXT:    retl # sched: [4:1.00]
 ;
@@ -1533,8 +1533,8 @@ define void @test_fdivp_fidiv(i16 *%a0, i32 *%a1) optsize {
 ; SANDY-NEXT:    movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
 ; SANDY-NEXT:    movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
 ; SANDY-NEXT:    #APP
-; SANDY-NEXT:    fdivp %st(1) # sched: [24:1.00]
-; SANDY-NEXT:    fdivp %st(2) # sched: [24:1.00]
+; SANDY-NEXT:    fdivp %st(1) # sched: [14:14.00]
+; SANDY-NEXT:    fdivp %st(2) # sched: [14:14.00]
 ; SANDY-NEXT:    fidivs (%ecx) # sched: [34:1.00]
 ; SANDY-NEXT:    fidivl (%eax) # sched: [34:1.00]
 ; SANDY-NEXT:    #NO_APP
@@ -1645,10 +1645,10 @@ define void @test_fdivr(float *%a0, double *%a1) optsize {
 ; SLM-NEXT:    movl {{[0-9]+}}(%esp), %eax # sched: [3:1.00]
 ; SLM-NEXT:    movl {{[0-9]+}}(%esp), %ecx # sched: [3:1.00]
 ; SLM-NEXT:    #APP
-; SLM-NEXT:    fdivr %st(0), %st(1) # sched: [34:34.00]
-; SLM-NEXT:    fdivr %st(2) # sched: [34:34.00]
-; SLM-NEXT:    fdivrs (%ecx) # sched: [37:34.00]
-; SLM-NEXT:    fdivrl (%eax) # sched: [37:34.00]
+; SLM-NEXT:    fdivr %st(0), %st(1) # sched: [19:17.00]
+; SLM-NEXT:    fdivr %st(2) # sched: [19:17.00]
+; SLM-NEXT:    fdivrs (%ecx) # sched: [22:17.00]
+; SLM-NEXT:    fdivrl (%eax) # sched: [22:17.00]
 ; SLM-NEXT:    #NO_APP
 ; SLM-NEXT:    retl # sched: [4:1.00]
 ;
@@ -1657,8 +1657,8 @@ define void @test_fdivr(float *%a0, double *%a1) optsize {
 ; SANDY-NEXT:    movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
 ; SANDY-NEXT:    movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
 ; SANDY-NEXT:    #APP
-; SANDY-NEXT:    fdivr %st(0), %st(1) # sched: [24:1.00]
-; SANDY-NEXT:    fdivr %st(2) # sched: [24:1.00]
+; SANDY-NEXT:    fdivr %st(0), %st(1) # sched: [14:14.00]
+; SANDY-NEXT:    fdivr %st(2) # sched: [14:14.00]
 ; SANDY-NEXT:    fdivrs (%ecx) # sched: [31:1.00]
 ; SANDY-NEXT:    fdivrl (%eax) # sched: [31:1.00]
 ; SANDY-NEXT:    #NO_APP
@@ -1769,10 +1769,10 @@ define void @test_fdivrp_fidivr(i16 *%a0, i32 *%a1) optsize {
 ; SLM-NEXT:    movl {{[0-9]+}}(%esp), %eax # sched: [3:1.00]
 ; SLM-NEXT:    movl {{[0-9]+}}(%esp), %ecx # sched: [3:1.00]
 ; SLM-NEXT:    #APP
-; SLM-NEXT:    fdivrp %st(1) # sched: [34:34.00]
-; SLM-NEXT:    fdivrp %st(2) # sched: [34:34.00]
-; SLM-NEXT:    fidivrs (%ecx) # sched: [37:34.00]
-; SLM-NEXT:    fidivrl (%eax) # sched: [37:34.00]
+; SLM-NEXT:    fdivrp %st(1) # sched: [19:17.00]
+; SLM-NEXT:    fdivrp %st(2) # sched: [19:17.00]
+; SLM-NEXT:    fidivrs (%ecx) # sched: [22:17.00]
+; SLM-NEXT:    fidivrl (%eax) # sched: [22:17.00]
 ; SLM-NEXT:    #NO_APP
 ; SLM-NEXT:    retl # sched: [4:1.00]
 ;
@@ -1781,8 +1781,8 @@ define void @test_fdivrp_fidivr(i16 *%a0, i32 *%a1) optsize {
 ; SANDY-NEXT:    movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
 ; SANDY-NEXT:    movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
 ; SANDY-NEXT:    #APP
-; SANDY-NEXT:    fdivrp %st(1) # sched: [24:1.00]
-; SANDY-NEXT:    fdivrp %st(2) # sched: [24:1.00]
+; SANDY-NEXT:    fdivrp %st(1) # sched: [14:14.00]
+; SANDY-NEXT:    fdivrp %st(2) # sched: [14:14.00]
 ; SANDY-NEXT:    fidivrs (%ecx) # sched: [34:1.00]
 ; SANDY-NEXT:    fidivrl (%eax) # sched: [34:1.00]
 ; SANDY-NEXT:    #NO_APP
index 7baab7c..579530c 100644 (file)
@@ -241,22 +241,22 @@ fyl2xp1
 # CHECK-NEXT:  1      3     1.00                  *    fcompi  %st(3)
 # CHECK-NEXT:  1      100   1.00                  *    fcos
 # CHECK-NEXT:  1      100   1.00                  *    fdecstp
-# CHECK-NEXT:  1      34    34.00                 *    fdiv    %st(0), %st(1)
-# CHECK-NEXT:  1      34    34.00                 *    fdiv    %st(2)
-# CHECK-NEXT:  1      37    34.00   *             *    fdivs   (%ecx)
-# CHECK-NEXT:  1      37    34.00   *             *    fdivl   (%eax)
-# CHECK-NEXT:  1      34    34.00                 *    fdivp   %st(1)
-# CHECK-NEXT:  1      34    34.00                 *    fdivp   %st(2)
-# CHECK-NEXT:  1      37    34.00   *             *    fidivs  (%ecx)
-# CHECK-NEXT:  1      37    34.00   *             *    fidivl  (%eax)
-# CHECK-NEXT:  1      34    34.00                 *    fdivr   %st(0), %st(1)
-# CHECK-NEXT:  1      34    34.00                 *    fdivr   %st(2)
-# CHECK-NEXT:  1      37    34.00   *             *    fdivrs  (%ecx)
-# CHECK-NEXT:  1      37    34.00   *             *    fdivrl  (%eax)
-# CHECK-NEXT:  1      34    34.00                 *    fdivrp  %st(1)
-# CHECK-NEXT:  1      34    34.00                 *    fdivrp  %st(2)
-# CHECK-NEXT:  1      37    34.00   *             *    fidivrs (%ecx)
-# CHECK-NEXT:  1      37    34.00   *             *    fidivrl (%eax)
+# CHECK-NEXT:  1      19    17.00                 *    fdiv    %st(0), %st(1)
+# CHECK-NEXT:  1      19    17.00                 *    fdiv    %st(2)
+# CHECK-NEXT:  1      22    17.00   *             *    fdivs   (%ecx)
+# CHECK-NEXT:  1      22    17.00   *             *    fdivl   (%eax)
+# CHECK-NEXT:  1      19    17.00                 *    fdivp   %st(1)
+# CHECK-NEXT:  1      19    17.00                 *    fdivp   %st(2)
+# CHECK-NEXT:  1      22    17.00   *             *    fidivs  (%ecx)
+# CHECK-NEXT:  1      22    17.00   *             *    fidivl  (%eax)
+# CHECK-NEXT:  1      19    17.00                 *    fdivr   %st(0), %st(1)
+# CHECK-NEXT:  1      19    17.00                 *    fdivr   %st(2)
+# CHECK-NEXT:  1      22    17.00   *             *    fdivrs  (%ecx)
+# CHECK-NEXT:  1      22    17.00   *             *    fdivrl  (%eax)
+# CHECK-NEXT:  1      19    17.00                 *    fdivrp  %st(1)
+# CHECK-NEXT:  1      19    17.00                 *    fdivrp  %st(2)
+# CHECK-NEXT:  1      22    17.00   *             *    fidivrs (%ecx)
+# CHECK-NEXT:  1      22    17.00   *             *    fidivrl (%eax)
 # CHECK-NEXT:  1      100   1.00                  *    ffree   %st(0)
 # CHECK-NEXT:  1      6     1.00                  *    ficoms  (%ecx)
 # CHECK-NEXT:  1      6     1.00                  *    ficoml  (%eax)
@@ -367,7 +367,7 @@ fyl2xp1
 
 # CHECK:      Resource pressure per iteration:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]
-# CHECK-NEXT:  -     584.00 16.00  64.00  55.00  9.50   9.50   52.00
+# CHECK-NEXT:  -     312.00 16.00  64.00  55.00  9.50   9.50   52.00
 
 # CHECK:      Resource pressure by instruction:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]     Instructions:
@@ -406,22 +406,22 @@ fyl2xp1
 # CHECK-NEXT:  -      -      -      -     1.00    -      -      -      fcompi  %st(3)
 # CHECK-NEXT:  -      -      -     1.00    -      -      -      -      fcos
 # CHECK-NEXT:  -      -      -     1.00    -      -      -      -      fdecstp
-# CHECK-NEXT:  -     34.00   -     1.00    -      -      -      -      fdiv    %st(0), %st(1)
-# CHECK-NEXT:  -     34.00   -     1.00    -      -      -      -      fdiv    %st(2)
-# CHECK-NEXT:  -     34.00   -     1.00    -      -      -     1.00    fdivs   (%ecx)
-# CHECK-NEXT:  -     34.00   -     1.00    -      -      -     1.00    fdivl   (%eax)
-# CHECK-NEXT:  -     34.00   -     1.00    -      -      -      -      fdivp   %st(1)
-# CHECK-NEXT:  -     34.00   -     1.00    -      -      -      -      fdivp   %st(2)
-# CHECK-NEXT:  -     34.00   -     1.00    -      -      -     1.00    fidivs  (%ecx)
-# CHECK-NEXT:  -     34.00   -     1.00    -      -      -     1.00    fidivl  (%eax)
-# CHECK-NEXT:  -     34.00   -     1.00    -      -      -      -      fdivr   %st(0), %st(1)
-# CHECK-NEXT:  -     34.00   -     1.00    -      -      -      -      fdivr   %st(2)
-# CHECK-NEXT:  -     34.00   -     1.00    -      -      -     1.00    fdivrs  (%ecx)
-# CHECK-NEXT:  -     34.00   -     1.00    -      -      -     1.00    fdivrl  (%eax)
-# CHECK-NEXT:  -     34.00   -     1.00    -      -      -      -      fdivrp  %st(1)
-# CHECK-NEXT:  -     34.00   -     1.00    -      -      -      -      fdivrp  %st(2)
-# CHECK-NEXT:  -     34.00   -     1.00    -      -      -     1.00    fidivrs (%ecx)
-# CHECK-NEXT:  -     34.00   -     1.00    -      -      -     1.00    fidivrl (%eax)
+# CHECK-NEXT:  -     17.00   -     1.00    -      -      -      -      fdiv    %st(0), %st(1)
+# CHECK-NEXT:  -     17.00   -     1.00    -      -      -      -      fdiv    %st(2)
+# CHECK-NEXT:  -     17.00   -     1.00    -      -      -     1.00    fdivs   (%ecx)
+# CHECK-NEXT:  -     17.00   -     1.00    -      -      -     1.00    fdivl   (%eax)
+# CHECK-NEXT:  -     17.00   -     1.00    -      -      -      -      fdivp   %st(1)
+# CHECK-NEXT:  -     17.00   -     1.00    -      -      -      -      fdivp   %st(2)
+# CHECK-NEXT:  -     17.00   -     1.00    -      -      -     1.00    fidivs  (%ecx)
+# CHECK-NEXT:  -     17.00   -     1.00    -      -      -     1.00    fidivl  (%eax)
+# CHECK-NEXT:  -     17.00   -     1.00    -      -      -      -      fdivr   %st(0), %st(1)
+# CHECK-NEXT:  -     17.00   -     1.00    -      -      -      -      fdivr   %st(2)
+# CHECK-NEXT:  -     17.00   -     1.00    -      -      -     1.00    fdivrs  (%ecx)
+# CHECK-NEXT:  -     17.00   -     1.00    -      -      -     1.00    fdivrl  (%eax)
+# CHECK-NEXT:  -     17.00   -     1.00    -      -      -      -      fdivrp  %st(1)
+# CHECK-NEXT:  -     17.00   -     1.00    -      -      -      -      fdivrp  %st(2)
+# CHECK-NEXT:  -     17.00   -     1.00    -      -      -     1.00    fidivrs (%ecx)
+# CHECK-NEXT:  -     17.00   -     1.00    -      -      -     1.00    fidivrl (%eax)
 # CHECK-NEXT:  -      -      -     1.00    -      -      -      -      ffree   %st(0)
 # CHECK-NEXT:  -      -      -      -     1.00    -      -     1.00    ficoms  (%ecx)
 # CHECK-NEXT:  -      -      -      -     1.00    -      -     1.00    ficoml  (%eax)
index 8a2130d..e6e1404 100644 (file)
@@ -241,20 +241,20 @@ fyl2xp1
 # CHECK-NEXT:  3      3     1.00                  *    fcompi  %st(3)
 # CHECK-NEXT:  1      100   0.33                  *    fcos
 # CHECK-NEXT:  1      1     1.00                  *    fdecstp
-# CHECK-NEXT:  1      24    1.00                  *    fdiv    %st(0), %st(1)
-# CHECK-NEXT:  1      24    1.00                  *    fdiv    %st(2)
+# CHECK-NEXT:  1      14    14.00                 *    fdiv    %st(0), %st(1)
+# CHECK-NEXT:  1      14    14.00                 *    fdiv    %st(2)
 # CHECK-NEXT:  2      31    1.00    *             *    fdivs   (%ecx)
 # CHECK-NEXT:  2      31    1.00    *             *    fdivl   (%eax)
-# CHECK-NEXT:  1      24    1.00                  *    fdivp   %st(1)
-# CHECK-NEXT:  1      24    1.00                  *    fdivp   %st(2)
+# CHECK-NEXT:  1      14    14.00                 *    fdivp   %st(1)
+# CHECK-NEXT:  1      14    14.00                 *    fdivp   %st(2)
 # CHECK-NEXT:  3      34    1.00    *             *    fidivs  (%ecx)
 # CHECK-NEXT:  3      34    1.00    *             *    fidivl  (%eax)
-# CHECK-NEXT:  1      24    1.00                  *    fdivr   %st(0), %st(1)
-# CHECK-NEXT:  1      24    1.00                  *    fdivr   %st(2)
+# CHECK-NEXT:  1      14    14.00                 *    fdivr   %st(0), %st(1)
+# CHECK-NEXT:  1      14    14.00                 *    fdivr   %st(2)
 # CHECK-NEXT:  2      31    1.00    *             *    fdivrs  (%ecx)
 # CHECK-NEXT:  2      31    1.00    *             *    fdivrl  (%eax)
-# CHECK-NEXT:  1      24    1.00                  *    fdivrp  %st(1)
-# CHECK-NEXT:  1      24    1.00                  *    fdivrp  %st(2)
+# CHECK-NEXT:  1      14    14.00                 *    fdivrp  %st(1)
+# CHECK-NEXT:  1      14    14.00                 *    fdivrp  %st(2)
 # CHECK-NEXT:  3      34    1.00    *             *    fidivrs (%ecx)
 # CHECK-NEXT:  3      34    1.00    *             *    fidivrl (%eax)
 # CHECK-NEXT:  1      1     1.00                  *    ffree   %st(0)
@@ -367,7 +367,7 @@ fyl2xp1
 
 # CHECK:      Resource pressure per iteration:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6.0]  [6.1]
-# CHECK-NEXT:  -     24.00  48.33  87.33  17.00  54.33  34.00  34.00
+# CHECK-NEXT:  -     136.00 48.33  87.33  17.00  54.33  34.00  34.00
 
 # CHECK:      Resource pressure by instruction:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6.0]  [6.1]   Instructions:
@@ -406,20 +406,20 @@ fyl2xp1
 # CHECK-NEXT:  -      -     1.00   1.00    -     1.00    -      -      fcompi  %st(3)
 # CHECK-NEXT:  -      -     0.33   0.33    -     0.33    -      -      fcos
 # CHECK-NEXT:  -      -      -      -      -     1.00    -      -      fdecstp
-# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      fdiv    %st(0), %st(1)
-# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      fdiv    %st(2)
+# CHECK-NEXT:  -     14.00  1.00    -      -      -      -      -      fdiv    %st(0), %st(1)
+# CHECK-NEXT:  -     14.00  1.00    -      -      -      -      -      fdiv    %st(2)
 # CHECK-NEXT:  -      -     1.00    -      -      -     0.50   0.50    fdivs   (%ecx)
 # CHECK-NEXT:  -      -     1.00    -      -      -     0.50   0.50    fdivl   (%eax)
-# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      fdivp   %st(1)
-# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      fdivp   %st(2)
+# CHECK-NEXT:  -     14.00  1.00    -      -      -      -      -      fdivp   %st(1)
+# CHECK-NEXT:  -     14.00  1.00    -      -      -      -      -      fdivp   %st(2)
 # CHECK-NEXT:  -      -     1.00   1.00    -      -     0.50   0.50    fidivs  (%ecx)
 # CHECK-NEXT:  -      -     1.00   1.00    -      -     0.50   0.50    fidivl  (%eax)
-# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      fdivr   %st(0), %st(1)
-# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      fdivr   %st(2)
+# CHECK-NEXT:  -     14.00  1.00    -      -      -      -      -      fdivr   %st(0), %st(1)
+# CHECK-NEXT:  -     14.00  1.00    -      -      -      -      -      fdivr   %st(2)
 # CHECK-NEXT:  -      -     1.00    -      -      -     0.50   0.50    fdivrs  (%ecx)
 # CHECK-NEXT:  -      -     1.00    -      -      -     0.50   0.50    fdivrl  (%eax)
-# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      fdivrp  %st(1)
-# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      fdivrp  %st(2)
+# CHECK-NEXT:  -     14.00  1.00    -      -      -      -      -      fdivrp  %st(1)
+# CHECK-NEXT:  -     14.00  1.00    -      -      -      -      -      fdivrp  %st(2)
 # CHECK-NEXT:  -      -     1.00   1.00    -      -     0.50   0.50    fidivrs (%ecx)
 # CHECK-NEXT:  -      -     1.00   1.00    -      -     0.50   0.50    fidivrl (%eax)
 # CHECK-NEXT:  -      -      -      -      -     1.00    -      -      ffree   %st(0)