This removes all InstrRW overrides for these instructions - some x87 overrides remain but most use default (and realistic) values.
llvm-svn: 331643
}
multiclass avx512_binop_s_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
- SDNode VecNode, X86FoldableSchedWrite sched,
+ SDNode VecNode, X86SchedWriteSizes sched,
bit IsCommutable> {
defm SSZ : avx512_fp_scalar<opc, OpcodeStr#"ss", f32x_info, OpNode, VecNode,
- sched, IsCommutable>,
+ sched.PS.Scl, IsCommutable>,
avx512_fp_scalar_round<opc, OpcodeStr#"ss", f32x_info, VecNode,
- sched, IsCommutable>,
+ sched.PS.Scl, IsCommutable>,
XS, EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>;
defm SDZ : avx512_fp_scalar<opc, OpcodeStr#"sd", f64x_info, OpNode, VecNode,
- sched, IsCommutable>,
+ sched.PD.Scl, IsCommutable>,
avx512_fp_scalar_round<opc, OpcodeStr#"sd", f64x_info, VecNode,
- sched, IsCommutable>,
+ sched.PD.Scl, IsCommutable>,
XD, VEX_W, EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>;
}
multiclass avx512_binop_s_sae<bits<8> opc, string OpcodeStr, SDNode OpNode,
SDNode VecNode, SDNode SaeNode,
- X86FoldableSchedWrite sched, bit IsCommutable> {
+ X86SchedWriteSizes sched, bit IsCommutable> {
defm SSZ : avx512_fp_scalar_sae<opc, OpcodeStr#"ss", f32x_info, OpNode,
- VecNode, SaeNode, sched, IsCommutable>,
+ VecNode, SaeNode, sched.PS.Scl, IsCommutable>,
XS, EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>;
defm SDZ : avx512_fp_scalar_sae<opc, OpcodeStr#"sd", f64x_info, OpNode,
- VecNode, SaeNode, sched, IsCommutable>,
+ VecNode, SaeNode, sched.PD.Scl, IsCommutable>,
XD, VEX_W, EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>;
}
defm VADD : avx512_binop_s_round<0x58, "vadd", fadd, X86faddRnds,
- SchedWriteFAdd.Scl, 1>;
+ SchedWriteFAddSizes, 1>;
defm VMUL : avx512_binop_s_round<0x59, "vmul", fmul, X86fmulRnds,
- SchedWriteFMul.Scl, 1>;
+ SchedWriteFMulSizes, 1>;
defm VSUB : avx512_binop_s_round<0x5C, "vsub", fsub, X86fsubRnds,
- SchedWriteFAdd.Scl, 0>;
+ SchedWriteFAddSizes, 0>;
defm VDIV : avx512_binop_s_round<0x5E, "vdiv", fdiv, X86fdivRnds,
- SchedWriteFDiv.Scl, 0>;
+ SchedWriteFDivSizes, 0>;
defm VMIN : avx512_binop_s_sae<0x5D, "vmin", X86fmin, X86fmins, X86fminRnds,
- SchedWriteFCmp.Scl, 0>;
+ SchedWriteFCmpSizes, 0>;
defm VMAX : avx512_binop_s_sae<0x5F, "vmax", X86fmax, X86fmaxs, X86fmaxRnds,
- SchedWriteFCmp.Scl, 0>;
+ SchedWriteFCmpSizes, 0>;
// MIN/MAX nodes are commutable under "unsafe-fp-math". In this case we use
// X86fminc and X86fmaxc instead of X86fmin and X86fmax
}
multiclass avx512_fp_binop_p<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
- Predicate prd, X86SchedWriteWidths sched,
+ Predicate prd, X86SchedWriteSizes sched,
bit IsCommutable = 0> {
let Predicates = [prd] in {
defm PSZ : avx512_fp_packed<opc, OpcodeStr, OpNode, v16f32_info,
- sched.ZMM, IsCommutable>, EVEX_V512, PS,
+ sched.PS.ZMM, IsCommutable>, EVEX_V512, PS,
EVEX_CD8<32, CD8VF>;
defm PDZ : avx512_fp_packed<opc, OpcodeStr, OpNode, v8f64_info,
- sched.ZMM, IsCommutable>, EVEX_V512, PD, VEX_W,
+ sched.PD.ZMM, IsCommutable>, EVEX_V512, PD, VEX_W,
EVEX_CD8<64, CD8VF>;
}
// Define only if AVX512VL feature is present.
let Predicates = [prd, HasVLX] in {
defm PSZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, v4f32x_info,
- sched.XMM, IsCommutable>, EVEX_V128, PS,
+ sched.PS.XMM, IsCommutable>, EVEX_V128, PS,
EVEX_CD8<32, CD8VF>;
defm PSZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, v8f32x_info,
- sched.YMM, IsCommutable>, EVEX_V256, PS,
+ sched.PS.YMM, IsCommutable>, EVEX_V256, PS,
EVEX_CD8<32, CD8VF>;
defm PDZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, v2f64x_info,
- sched.XMM, IsCommutable>, EVEX_V128, PD, VEX_W,
+ sched.PD.XMM, IsCommutable>, EVEX_V128, PD, VEX_W,
EVEX_CD8<64, CD8VF>;
defm PDZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, v4f64x_info,
- sched.YMM, IsCommutable>, EVEX_V256, PD, VEX_W,
+ sched.PD.YMM, IsCommutable>, EVEX_V256, PD, VEX_W,
EVEX_CD8<64, CD8VF>;
}
}
multiclass avx512_fp_binop_p_round<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd,
- X86SchedWriteWidths sched> {
- defm PSZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, sched.ZMM,
+ X86SchedWriteSizes sched> {
+ defm PSZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, sched.PS.ZMM,
v16f32_info>,
EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
- defm PDZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, sched.ZMM,
+ defm PDZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, sched.PD.ZMM,
v8f64_info>,
EVEX_V512, PD, VEX_W,EVEX_CD8<64, CD8VF>;
}
multiclass avx512_fp_binop_p_sae<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd,
- X86SchedWriteWidths sched> {
- defm PSZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, sched.ZMM,
+ X86SchedWriteSizes sched> {
+ defm PSZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, sched.PS.ZMM,
v16f32_info>,
EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
- defm PDZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, sched.ZMM,
+ defm PDZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, sched.PD.ZMM,
v8f64_info>,
EVEX_V512, PD, VEX_W,EVEX_CD8<64, CD8VF>;
}
defm VADD : avx512_fp_binop_p<0x58, "vadd", fadd, HasAVX512,
- SchedWriteFAdd, 1>,
- avx512_fp_binop_p_round<0x58, "vadd", X86faddRnd, SchedWriteFAdd>;
+ SchedWriteFAddSizes, 1>,
+ avx512_fp_binop_p_round<0x58, "vadd", X86faddRnd, SchedWriteFAddSizes>;
defm VMUL : avx512_fp_binop_p<0x59, "vmul", fmul, HasAVX512,
- SchedWriteFMul, 1>,
- avx512_fp_binop_p_round<0x59, "vmul", X86fmulRnd, SchedWriteFMul>;
+ SchedWriteFMulSizes, 1>,
+ avx512_fp_binop_p_round<0x59, "vmul", X86fmulRnd, SchedWriteFMulSizes>;
defm VSUB : avx512_fp_binop_p<0x5C, "vsub", fsub, HasAVX512,
- SchedWriteFAdd>,
- avx512_fp_binop_p_round<0x5C, "vsub", X86fsubRnd, SchedWriteFAdd>;
+ SchedWriteFAddSizes>,
+ avx512_fp_binop_p_round<0x5C, "vsub", X86fsubRnd, SchedWriteFAddSizes>;
defm VDIV : avx512_fp_binop_p<0x5E, "vdiv", fdiv, HasAVX512,
- SchedWriteFDiv>,
- avx512_fp_binop_p_round<0x5E, "vdiv", X86fdivRnd, SchedWriteFDiv>;
+ SchedWriteFDivSizes>,
+ avx512_fp_binop_p_round<0x5E, "vdiv", X86fdivRnd, SchedWriteFDivSizes>;
defm VMIN : avx512_fp_binop_p<0x5D, "vmin", X86fmin, HasAVX512,
- SchedWriteFCmp, 0>,
- avx512_fp_binop_p_sae<0x5D, "vmin", X86fminRnd, SchedWriteFCmp>;
+ SchedWriteFCmpSizes, 0>,
+ avx512_fp_binop_p_sae<0x5D, "vmin", X86fminRnd, SchedWriteFCmpSizes>;
defm VMAX : avx512_fp_binop_p<0x5F, "vmax", X86fmax, HasAVX512,
- SchedWriteFCmp, 0>,
- avx512_fp_binop_p_sae<0x5F, "vmax", X86fmaxRnd, SchedWriteFCmp>;
+ SchedWriteFCmpSizes, 0>,
+ avx512_fp_binop_p_sae<0x5F, "vmax", X86fmaxRnd, SchedWriteFCmpSizes>;
let isCodeGenOnly = 1 in {
defm VMINC : avx512_fp_binop_p<0x5D, "vmin", X86fminc, HasAVX512,
- SchedWriteFCmp, 1>;
+ SchedWriteFCmpSizes, 1>;
defm VMAXC : avx512_fp_binop_p<0x5F, "vmax", X86fmaxc, HasAVX512,
- SchedWriteFCmp, 1>;
+ SchedWriteFCmpSizes, 1>;
}
defm VAND : avx512_fp_binop_p<0x54, "vand", null_frag, HasDQI,
- SchedWriteFLogic, 1>;
+ SchedWriteFLogicSizes, 1>;
defm VANDN : avx512_fp_binop_p<0x55, "vandn", null_frag, HasDQI,
- SchedWriteFLogic, 0>;
+ SchedWriteFLogicSizes, 0>;
defm VOR : avx512_fp_binop_p<0x56, "vor", null_frag, HasDQI,
- SchedWriteFLogic, 1>;
+ SchedWriteFLogicSizes, 1>;
defm VXOR : avx512_fp_binop_p<0x57, "vxor", null_frag, HasDQI,
- SchedWriteFLogic, 1>;
+ SchedWriteFLogicSizes, 1>;
// Patterns catch floating point selects with bitcasted integer logic ops.
multiclass avx512_fp_logical_lowering<string InstrStr, SDNode OpNode,
//===----------------------------------------------------------------------===//
defm VUNPCKH : avx512_fp_binop_p<0x15, "vunpckh", X86Unpckh, HasAVX512,
- SchedWriteFShuffle>;
+ SchedWriteFShuffleSizes>;
defm VUNPCKL : avx512_fp_binop_p<0x14, "vunpckl", X86Unpckl, HasAVX512,
- SchedWriteFShuffle>;
+ SchedWriteFShuffleSizes>;
defm VPUNPCKLBW : avx512_binop_rm_vl_b<0x60, "vpunpcklbw", X86Unpckl,
SchedWriteShuffle, HasBWI>;
/// FIXME: once all 256-bit intrinsics are matched, cleanup and refactor those
/// classes below
multiclass basic_sse12_fp_binop_p<bits<8> opc, string OpcodeStr,
- SDNode OpNode, X86SchedWriteWidths sched> {
+ SDNode OpNode, X86SchedWriteSizes sched> {
let Predicates = [HasAVX, NoVLX] in {
defm V#NAME#PS : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode,
VR128, v4f32, f128mem, loadv4f32,
- SSEPackedSingle, sched.XMM, 0>, PS, VEX_4V, VEX_WIG;
+ SSEPackedSingle, sched.PS.XMM, 0>, PS, VEX_4V, VEX_WIG;
defm V#NAME#PD : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), OpNode,
VR128, v2f64, f128mem, loadv2f64,
- SSEPackedDouble, sched.XMM, 0>, PD, VEX_4V, VEX_WIG;
+ SSEPackedDouble, sched.PD.XMM, 0>, PD, VEX_4V, VEX_WIG;
defm V#NAME#PSY : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"),
OpNode, VR256, v8f32, f256mem, loadv8f32,
- SSEPackedSingle, sched.YMM, 0>, PS, VEX_4V, VEX_L, VEX_WIG;
+ SSEPackedSingle, sched.PS.YMM, 0>, PS, VEX_4V, VEX_L, VEX_WIG;
defm V#NAME#PDY : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"),
OpNode, VR256, v4f64, f256mem, loadv4f64,
- SSEPackedDouble, sched.YMM, 0>, PD, VEX_4V, VEX_L, VEX_WIG;
+ SSEPackedDouble, sched.PD.YMM, 0>, PD, VEX_4V, VEX_L, VEX_WIG;
}
let Constraints = "$src1 = $dst" in {
defm PS : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode, VR128,
v4f32, f128mem, memopv4f32, SSEPackedSingle,
- sched.XMM>, PS;
+ sched.PS.XMM>, PS;
defm PD : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), OpNode, VR128,
v2f64, f128mem, memopv2f64, SSEPackedDouble,
- sched.XMM>, PD;
+ sched.PD.XMM>, PD;
}
}
multiclass basic_sse12_fp_binop_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
- X86SchedWriteWidths sched> {
+ X86SchedWriteSizes sched> {
defm V#NAME#SS : sse12_fp_scalar<opc, !strconcat(OpcodeStr, "ss"),
- OpNode, FR32, f32mem, SSEPackedSingle, sched.Scl, 0>,
+ OpNode, FR32, f32mem, SSEPackedSingle, sched.PS.Scl, 0>,
XS, VEX_4V, VEX_LIG, VEX_WIG;
defm V#NAME#SD : sse12_fp_scalar<opc, !strconcat(OpcodeStr, "sd"),
- OpNode, FR64, f64mem, SSEPackedDouble, sched.Scl, 0>,
+ OpNode, FR64, f64mem, SSEPackedDouble, sched.PD.Scl, 0>,
XD, VEX_4V, VEX_LIG, VEX_WIG;
let Constraints = "$src1 = $dst" in {
defm SS : sse12_fp_scalar<opc, !strconcat(OpcodeStr, "ss"),
OpNode, FR32, f32mem, SSEPackedSingle,
- sched.Scl>, XS;
+ sched.PS.Scl>, XS;
defm SD : sse12_fp_scalar<opc, !strconcat(OpcodeStr, "sd"),
OpNode, FR64, f64mem, SSEPackedDouble,
- sched.Scl>, XD;
+ sched.PD.Scl>, XD;
}
}
multiclass basic_sse12_fp_binop_s_int<bits<8> opc, string OpcodeStr,
SDPatternOperator OpNode,
- X86SchedWriteWidths sched> {
+ X86SchedWriteSizes sched> {
defm V#NAME#SS : sse12_fp_scalar_int<opc, OpcodeStr, OpNode, VR128, v4f32,
!strconcat(OpcodeStr, "ss"), ssmem, sse_load_f32,
- SSEPackedSingle, sched.Scl, 0>, XS, VEX_4V, VEX_LIG, VEX_WIG;
+ SSEPackedSingle, sched.PS.Scl, 0>, XS, VEX_4V, VEX_LIG, VEX_WIG;
defm V#NAME#SD : sse12_fp_scalar_int<opc, OpcodeStr, OpNode, VR128, v2f64,
!strconcat(OpcodeStr, "sd"), sdmem, sse_load_f64,
- SSEPackedDouble, sched.Scl, 0>, XD, VEX_4V, VEX_LIG, VEX_WIG;
+ SSEPackedDouble, sched.PD.Scl, 0>, XD, VEX_4V, VEX_LIG, VEX_WIG;
let Constraints = "$src1 = $dst" in {
defm SS : sse12_fp_scalar_int<opc, OpcodeStr, OpNode, VR128, v4f32,
!strconcat(OpcodeStr, "ss"), ssmem, sse_load_f32,
- SSEPackedSingle, sched.Scl>, XS;
+ SSEPackedSingle, sched.PS.Scl>, XS;
defm SD : sse12_fp_scalar_int<opc, OpcodeStr, OpNode, VR128, v2f64,
!strconcat(OpcodeStr, "sd"), sdmem, sse_load_f64,
- SSEPackedDouble, sched.Scl>, XD;
+ SSEPackedDouble, sched.PD.Scl>, XD;
}
}
// Binary Arithmetic instructions
-defm ADD : basic_sse12_fp_binop_p<0x58, "add", fadd, SchedWriteFAdd>,
- basic_sse12_fp_binop_s<0x58, "add", fadd, SchedWriteFAdd>,
- basic_sse12_fp_binop_s_int<0x58, "add", null_frag, SchedWriteFAdd>;
-defm MUL : basic_sse12_fp_binop_p<0x59, "mul", fmul, SchedWriteFMul>,
- basic_sse12_fp_binop_s<0x59, "mul", fmul, SchedWriteFMul>,
- basic_sse12_fp_binop_s_int<0x59, "mul", null_frag, SchedWriteFMul>;
+defm ADD : basic_sse12_fp_binop_p<0x58, "add", fadd, SchedWriteFAddSizes>,
+ basic_sse12_fp_binop_s<0x58, "add", fadd, SchedWriteFAddSizes>,
+ basic_sse12_fp_binop_s_int<0x58, "add", null_frag, SchedWriteFAddSizes>;
+defm MUL : basic_sse12_fp_binop_p<0x59, "mul", fmul, SchedWriteFMulSizes>,
+ basic_sse12_fp_binop_s<0x59, "mul", fmul, SchedWriteFMulSizes>,
+ basic_sse12_fp_binop_s_int<0x59, "mul", null_frag, SchedWriteFMulSizes>;
let isCommutable = 0 in {
- defm SUB : basic_sse12_fp_binop_p<0x5C, "sub", fsub, SchedWriteFAdd>,
- basic_sse12_fp_binop_s<0x5C, "sub", fsub, SchedWriteFAdd>,
- basic_sse12_fp_binop_s_int<0x5C, "sub", null_frag, SchedWriteFAdd>;
- defm DIV : basic_sse12_fp_binop_p<0x5E, "div", fdiv, SchedWriteFDiv>,
- basic_sse12_fp_binop_s<0x5E, "div", fdiv, SchedWriteFDiv>,
- basic_sse12_fp_binop_s_int<0x5E, "div", null_frag, SchedWriteFDiv>;
- defm MAX : basic_sse12_fp_binop_p<0x5F, "max", X86fmax, SchedWriteFCmp>,
- basic_sse12_fp_binop_s<0x5F, "max", X86fmax, SchedWriteFCmp>,
- basic_sse12_fp_binop_s_int<0x5F, "max", X86fmaxs, SchedWriteFCmp>;
- defm MIN : basic_sse12_fp_binop_p<0x5D, "min", X86fmin, SchedWriteFCmp>,
- basic_sse12_fp_binop_s<0x5D, "min", X86fmin, SchedWriteFCmp>,
- basic_sse12_fp_binop_s_int<0x5D, "min", X86fmins, SchedWriteFCmp>;
+ defm SUB : basic_sse12_fp_binop_p<0x5C, "sub", fsub, SchedWriteFAddSizes>,
+ basic_sse12_fp_binop_s<0x5C, "sub", fsub, SchedWriteFAddSizes>,
+ basic_sse12_fp_binop_s_int<0x5C, "sub", null_frag, SchedWriteFAddSizes>;
+ defm DIV : basic_sse12_fp_binop_p<0x5E, "div", fdiv, SchedWriteFDivSizes>,
+ basic_sse12_fp_binop_s<0x5E, "div", fdiv, SchedWriteFDivSizes>,
+ basic_sse12_fp_binop_s_int<0x5E, "div", null_frag, SchedWriteFDivSizes>;
+ defm MAX : basic_sse12_fp_binop_p<0x5F, "max", X86fmax, SchedWriteFCmpSizes>,
+ basic_sse12_fp_binop_s<0x5F, "max", X86fmax, SchedWriteFCmpSizes>,
+ basic_sse12_fp_binop_s_int<0x5F, "max", X86fmaxs, SchedWriteFCmpSizes>;
+ defm MIN : basic_sse12_fp_binop_p<0x5D, "min", X86fmin, SchedWriteFCmpSizes>,
+ basic_sse12_fp_binop_s<0x5D, "min", X86fmin, SchedWriteFCmpSizes>,
+ basic_sse12_fp_binop_s_int<0x5D, "min", X86fmins, SchedWriteFCmpSizes>;
}
let isCodeGenOnly = 1 in {
- defm MAXC: basic_sse12_fp_binop_p<0x5F, "max", X86fmaxc, SchedWriteFCmp>,
- basic_sse12_fp_binop_s<0x5F, "max", X86fmaxc, SchedWriteFCmp>;
- defm MINC: basic_sse12_fp_binop_p<0x5D, "min", X86fminc, SchedWriteFCmp>,
- basic_sse12_fp_binop_s<0x5D, "min", X86fminc, SchedWriteFCmp>;
+ defm MAXC: basic_sse12_fp_binop_p<0x5F, "max", X86fmaxc, SchedWriteFCmpSizes>,
+ basic_sse12_fp_binop_s<0x5F, "max", X86fmaxc, SchedWriteFCmpSizes>;
+ defm MINC: basic_sse12_fp_binop_p<0x5D, "min", X86fminc, SchedWriteFCmpSizes>,
+ basic_sse12_fp_binop_s<0x5D, "min", X86fminc, SchedWriteFCmpSizes>;
}
// Patterns used to select SSE scalar fp arithmetic instructions from
defm : BWWriteResPair<WriteFCom, [BWPort1], 3>; // Floating point compare to flags.
defm : BWWriteResPair<WriteFMul, [BWPort01], 3, [1], 1, 5>; // Floating point multiplication.
defm : BWWriteResPair<WriteFMulY, [BWPort01], 3, [1], 1, 6>; // Floating point multiplication (YMM/ZMM).
-defm : BWWriteResPair<WriteFDiv, [BWPort0], 12, [1], 1, 5>; // 10-14 cycles. // Floating point division.
-defm : BWWriteResPair<WriteFDivY, [BWPort0], 12, [1], 1, 7>; // 10-14 cycles. // Floating point division (YMM/ZMM).
+
+//defm : BWWriteResPair<WriteFDiv, [BWPort0,BWFPDivider], 11, [1,3], 1, 5>; // Floating point division.
+defm : BWWriteResPair<WriteFDivX, [BWPort0,BWFPDivider], 11, [1,5], 1, 5>; // Floating point division (XMM).
+defm : BWWriteResPair<WriteFDivY, [BWPort0,BWPort015,BWFPDivider], 17, [2,1,10], 3, 6>; // Floating point division (YMM).
+defm : BWWriteResPair<WriteFDivZ, [BWPort0,BWPort015,BWFPDivider], 17, [2,1,10], 3, 6>; // Floating point division (ZMM).
+//defm : BWWriteResPair<WriteFDiv64, [BWPort0,BWFPDivider], 14, [1,8], 1, 5>; // Floating point division.
+defm : BWWriteResPair<WriteFDiv64X, [BWPort0,BWFPDivider], 14, [1,8], 1, 5>; // Floating point division (XMM).
+defm : BWWriteResPair<WriteFDiv64Y, [BWPort0,BWPort015,BWFPDivider], 23, [2,1,16], 3, 6>; // Floating point division (YMM).
+defm : BWWriteResPair<WriteFDiv64Z, [BWPort0,BWPort015,BWFPDivider], 23, [2,1,16], 3, 6>; // Floating point division (ZMM).
defm : X86WriteRes<WriteFSqrt, [BWPort0,BWFPDivider], 11, [1,4], 1>; // Floating point square root.
defm : X86WriteRes<WriteFSqrtLd, [BWPort0,BWPort23,BWFPDivider], 16, [1,1,7], 2>;
}
def: InstRW<[BWWriteResGroup121], (instrs IMUL32m, MUL32m, MULX32rm)>;
-def BWWriteResGroup122 : SchedWriteRes<[BWPort0,BWFPDivider]> {
- let Latency = 11;
- let NumMicroOps = 1;
- let ResourceCycles = [1,5];
-}
-def: InstRW<[BWWriteResGroup122], (instregex "(V?)DIVPSrr")>;
-
def BWWriteResGroup122_1 : SchedWriteRes<[BWPort0,BWFPDivider]> {
let Latency = 11;
let NumMicroOps = 1;
let ResourceCycles = [1,3]; // Really 2.5 cycle throughput
}
-def: InstRW<[BWWriteResGroup122_1], (instregex "(V?)DIVSSrr")>;
+def : SchedAlias<WriteFDiv, BWWriteResGroup122_1>; // TODO - convert to ZnWriteResFpuPair
def BWWriteResGroup123 : SchedWriteRes<[BWPort0,BWPort23]> {
let Latency = 11;
}
def: InstRW<[BWWriteResGroup135], (instregex "(ADD|SUB|SUBR)_FI(16|32)m")>;
-def BWWriteResGroup139 : SchedWriteRes<[BWPort0,BWFPDivider]> {
- let Latency = 14;
- let NumMicroOps = 1;
- let ResourceCycles = [1,8];
-}
-def: InstRW<[BWWriteResGroup139], (instregex "(V?)DIVPDrr")>;
-
def BWWriteResGroup139_1 : SchedWriteRes<[BWPort0,BWFPDivider]> {
let Latency = 14;
let NumMicroOps = 1;
let ResourceCycles = [1,4];
}
-def: InstRW<[BWWriteResGroup139_1], (instregex "(V?)DIVSDrr")>;
+def : SchedAlias<WriteFDiv64, BWWriteResGroup139_1>; // TODO - convert to ZnWriteResFpuPair
def BWWriteResGroup141 : SchedWriteRes<[BWPort0,BWPort1,BWPort23]> {
let Latency = 14;
let NumMicroOps = 2;
let ResourceCycles = [1,1,5];
}
-def: InstRW<[BWWriteResGroup150], (instregex "(V?)DIVPSrm",
- "(V?)DIVSSrm")>;
+def : SchedAlias<WriteFDivLd, BWWriteResGroup150>; // TODO - convert to ZnWriteResFpuPair
def BWWriteResGroup153 : SchedWriteRes<[BWPort4,BWPort23,BWPort237,BWPort06,BWPort15,BWPort0156]> {
let Latency = 16;
}
def: InstRW<[BWWriteResGroup154], (instrs VZEROALL)>;
-def BWWriteResGroup155 : SchedWriteRes<[BWPort0,BWPort015,BWFPDivider]> {
- let Latency = 17;
- let NumMicroOps = 3;
- let ResourceCycles = [2,1,10];
-}
-def: InstRW<[BWWriteResGroup155], (instregex "VDIVPSYrr")>;
-
def BWWriteResGroup159 : SchedWriteRes<[BWPort5,BWPort6,BWPort06,BWPort0156]> {
let Latency = 18;
let NumMicroOps = 8;
let NumMicroOps = 2;
let ResourceCycles = [1,1,8];
}
-def: InstRW<[BWWriteResGroup161], (instregex "(V?)DIVPDrm",
- "(V?)DIVSDrm")>;
+def : SchedAlias<WriteFDiv64Ld, BWWriteResGroup161>; // TODO - convert to ZnWriteResFpuPair
def BWWriteResGroup165 : SchedWriteRes<[BWPort0]> {
let Latency = 20;
}
def: InstRW<[BWWriteResGroup172], (instregex "POPF64")>;
-def BWWriteResGroup173 : SchedWriteRes<[BWPort0,BWPort015,BWFPDivider]> {
- let Latency = 23;
- let NumMicroOps = 3;
- let ResourceCycles = [2,1,16];
-}
-def: InstRW<[BWWriteResGroup173], (instregex "VDIVPDYrr")>;
-
-def BWWriteResGroup174 : SchedWriteRes<[BWPort0,BWPort23,BWPort015,BWFPDivider]> {
- let Latency = 23;
- let NumMicroOps = 4;
- let ResourceCycles = [2,1,1,10];
-}
-def: InstRW<[BWWriteResGroup174], (instregex "VDIVPSYrm")>;
-
def BWWriteResGroup176 : SchedWriteRes<[BWPort6,BWPort23,BWPort0156]> {
let Latency = 23;
let NumMicroOps = 19;
}
def: InstRW<[BWWriteResGroup182], (instregex "DIVR_FI(16|32)m")>;
-def BWWriteResGroup183 : SchedWriteRes<[BWPort0,BWPort23,BWPort015,BWFPDivider]> {
- let Latency = 29;
- let NumMicroOps = 4;
- let ResourceCycles = [2,1,1,16];
-}
-def: InstRW<[BWWriteResGroup183], (instregex "VDIVPDYrm")>;
-
def BWWriteResGroup183_1 : SchedWriteRes<[BWPort4, BWPort5, BWPort23, BWPort0156]> {
let Latency = 22;
let NumMicroOps = 7;
defm : HWWriteResPair<WriteFCom, [HWPort1], 3>;
defm : HWWriteResPair<WriteFMul, [HWPort01], 5, [1], 1, 6>;
defm : HWWriteResPair<WriteFMulY, [HWPort01], 5, [1], 1, 7>;
-defm : HWWriteResPair<WriteFDiv, [HWPort0], 12, [1], 1, 5>; // 10-14 cycles.
-defm : HWWriteResPair<WriteFDivY, [HWPort0], 12, [1], 1, 7>; // 10-14 cycles.
+
+defm : HWWriteResPair<WriteFDiv, [HWPort0,HWFPDivider], 13, [1,7], 1, 5>;
+defm : HWWriteResPair<WriteFDivX, [HWPort0,HWFPDivider], 13, [1,7], 1, 6>;
+defm : HWWriteResPair<WriteFDivY, [HWPort0,HWPort15,HWFPDivider], 21, [2,1,14], 3, 7>;
+defm : HWWriteResPair<WriteFDivZ, [HWPort0,HWPort15,HWFPDivider], 21, [2,1,14], 3, 7>;
+defm : HWWriteResPair<WriteFDiv64, [HWPort0,HWFPDivider], 20, [1,14], 1, 5>;
+defm : HWWriteResPair<WriteFDiv64X, [HWPort0,HWFPDivider], 20, [1,14], 1, 6>;
+defm : HWWriteResPair<WriteFDiv64Y, [HWPort0,HWPort15,HWFPDivider], 35, [2,1,28], 3, 7>;
+defm : HWWriteResPair<WriteFDiv64Z, [HWPort0,HWPort15,HWFPDivider], 35, [2,1,28], 3, 7>;
defm : HWWriteResPair<WriteFRcp, [HWPort0], 5, [1], 1, 5>;
defm : HWWriteResPair<WriteFRcpX, [HWPort0], 5, [1], 1, 6>;
"MUL_FST0r",
"MUL_FrST0")>;
-def HWWriteResGroup91_4 : SchedWriteRes<[HWPort0,HWPort23,HWFPDivider]> {
- let Latency = 18;
- let NumMicroOps = 2;
- let ResourceCycles = [1,1,7];
-}
-def: InstRW<[HWWriteResGroup91_4], (instregex "(V?)DIVSSrm")>;
-
def HWWriteResGroup91_2 : SchedWriteRes<[HWPort0,HWPort23]> {
let Latency = 11;
let NumMicroOps = 2;
}
def: InstRW<[HWWriteResGroup120], (instregex "RCL(8|16|32|64)mCL")>;
-def HWWriteResGroup121 : SchedWriteRes<[HWPort0,HWFPDivider]> {
- let Latency = 13;
- let NumMicroOps = 1;
- let ResourceCycles = [1,7];
-}
-def: InstRW<[HWWriteResGroup121], (instregex "(V?)DIVPSrr",
- "(V?)DIVSSrr")>;
-
def HWWriteResGroup129 : SchedWriteRes<[HWPort1,HWPort06,HWPort0156]> {
let Latency = 11;
let NumMicroOps = 7;
}
def: InstRW<[HWWriteResGroup132], (instregex "CMPXCHG8B")>;
-def HWWriteResGroup134 : SchedWriteRes<[HWPort0,HWPort23,HWFPDivider]> {
- let Latency = 19;
- let NumMicroOps = 2;
- let ResourceCycles = [1,1,7];
-}
-def: InstRW<[HWWriteResGroup134], (instregex "(V?)DIVPSrm")>;
-
def HWWriteResGroup135 : SchedWriteRes<[HWPort1,HWPort23,HWPort237,HWPort06,HWPort15,HWPort0156]> {
let Latency = 19;
let NumMicroOps = 11;
"DIV_FST0r",
"DIV_FrST0")>;
-def HWWriteResGroup154_1 : SchedWriteRes<[HWPort0,HWFPDivider]> {
- let Latency = 20;
- let NumMicroOps = 1;
- let ResourceCycles = [1,14];
-}
-def: InstRW<[HWWriteResGroup154_1], (instregex "(V?)DIVPDrr",
- "(V?)DIVSDrr")>;
-
def HWWriteResGroup155 : SchedWriteRes<[HWPort0,HWPort23]> {
let Latency = 27;
let NumMicroOps = 2;
}
def: InstRW<[HWWriteResGroup155], (instregex "DIVR_F(32|64)m")>;
-def HWWriteResGroup155_1 : SchedWriteRes<[HWPort0,HWPort23,HWFPDivider]> {
- let Latency = 26;
- let NumMicroOps = 2;
- let ResourceCycles = [1,1,14];
-}
-def: InstRW<[HWWriteResGroup155_1], (instregex "(V?)DIVPDrm")>;
-
-def HWWriteResGroup155_4 : SchedWriteRes<[HWPort0,HWPort23,HWFPDivider]> {
- let Latency = 25;
- let NumMicroOps = 2;
- let ResourceCycles = [1,1,14];
-}
-def: InstRW<[HWWriteResGroup155_4], (instregex "(V?)DIVSDrm")>;
-
def HWWriteResGroup156 : SchedWriteRes<[HWPort5,HWPort6,HWPort0156]> {
let Latency = 20;
let NumMicroOps = 10;
}
def: InstRW<[HWWriteResGroup156], (instregex "MWAITrr")>;
-def HWWriteResGroup159 : SchedWriteRes<[HWPort0,HWPort15,HWFPDivider]> {
- let Latency = 21;
- let NumMicroOps = 3;
- let ResourceCycles = [2,1,14];
-}
-def: InstRW<[HWWriteResGroup159], (instregex "VDIVPSYrr")>;
-
-def HWWriteResGroup160 : SchedWriteRes<[HWPort0,HWPort23,HWPort15,HWFPDivider]> {
- let Latency = 28;
- let NumMicroOps = 4;
- let ResourceCycles = [2,1,1,14];
-}
-def: InstRW<[HWWriteResGroup160], (instregex "VDIVPSYrm")>;
-
def HWWriteResGroup161 : SchedWriteRes<[HWPort0,HWPort1,HWPort23]> {
let Latency = 30;
let NumMicroOps = 3;
def: InstRW<[HWWriteResGroup171], (instregex "OUT(8|16|32)ir",
"OUT(8|16|32)rr")>;
-def HWWriteResGroup173 : SchedWriteRes<[HWPort0,HWPort15,HWFPDivider]> {
- let Latency = 35;
- let NumMicroOps = 3;
- let ResourceCycles = [2,1,28];
-}
-def: InstRW<[HWWriteResGroup173], (instregex "VDIVPDYrr")>;
-
-def HWWriteResGroup174 : SchedWriteRes<[HWPort0,HWPort23,HWPort15,HWFPDivider]> {
- let Latency = 42;
- let NumMicroOps = 4;
- let ResourceCycles = [2,1,1,28];
-}
-def: InstRW<[HWWriteResGroup174], (instregex "VDIVPDYrm")>;
-
def HWWriteResGroup175 : SchedWriteRes<[HWPort1,HWPort4,HWPort5,HWPort6,HWPort23,HWPort237,HWPort15,HWPort0156]> {
let Latency = 41;
let NumMicroOps = 18;
defm : SBWriteResPair<WriteFCom, [SBPort1], 3>;
defm : SBWriteResPair<WriteFMul, [SBPort0], 5, [1], 1, 6>;
defm : SBWriteResPair<WriteFMulY, [SBPort0], 5, [1], 1, 7>;
-defm : SBWriteResPair<WriteFDiv, [SBPort0], 24, [1], 1, 5>;
-defm : SBWriteResPair<WriteFDivY, [SBPort0], 24, [1], 1, 7>;
+
+defm : SBWriteResPair<WriteFDiv, [SBPort0,SBFPDivider], 14, [1,14], 1, 6>;
+defm : SBWriteResPair<WriteFDivX, [SBPort0,SBFPDivider], 14, [1,14], 1, 6>;
+defm : SBWriteResPair<WriteFDivY, [SBPort0,SBPort05,SBFPDivider], 29, [2,1,28], 3, 7>;
+defm : SBWriteResPair<WriteFDivZ, [SBPort0,SBPort05,SBFPDivider], 29, [2,1,28], 3, 7>;
+defm : SBWriteResPair<WriteFDiv64, [SBPort0,SBFPDivider], 22, [1,22], 1, 6>;
+defm : SBWriteResPair<WriteFDiv64X, [SBPort0,SBFPDivider], 22, [1,22], 1, 6>;
+defm : SBWriteResPair<WriteFDiv64Y, [SBPort0,SBPort05,SBFPDivider], 45, [2,1,44], 3, 7>;
+defm : SBWriteResPair<WriteFDiv64Z, [SBPort0,SBPort05,SBFPDivider], 45, [2,1,44], 3, 7>;
defm : SBWriteResPair<WriteFRcp, [SBPort0], 5, [1], 1, 6>;
defm : SBWriteResPair<WriteFRcpX, [SBPort0], 5, [1], 1, 6>;
}
def: InstRW<[SBWriteResGroup114], (instregex "(ADD|SUB|SUBR)_FI(16|32)m")>;
-def SBWriteResGroup116 : SchedWriteRes<[SBPort0,SBFPDivider]> {
- let Latency = 14;
- let NumMicroOps = 1;
- let ResourceCycles = [1,14];
-}
-def: InstRW<[SBWriteResGroup116], (instregex "(V?)DIVPSrr",
- "(V?)DIVSSrr")>;
-
def SBWriteResGroup119 : SchedWriteRes<[SBPort0,SBPort1,SBPort23]> {
let Latency = 15;
let NumMicroOps = 3;
}
def: InstRW<[SBWriteResGroup119], (instregex "MUL_FI(16|32)m")>;
-def SBWriteResGroup123 : SchedWriteRes<[SBPort0,SBPort23,SBFPDivider]> {
- let Latency = 20;
- let NumMicroOps = 2;
- let ResourceCycles = [1,1,14];
-}
-def: InstRW<[SBWriteResGroup123], (instregex "(V?)DIVPSrm",
- "(V?)DIVSSrm")>;
-
-def SBWriteResGroup126 : SchedWriteRes<[SBPort0,SBFPDivider]> {
- let Latency = 22;
- let NumMicroOps = 1;
- let ResourceCycles = [1,22];
-}
-def: InstRW<[SBWriteResGroup126], (instregex "(V?)DIVPDrr",
- "(V?)DIVSDrr")>;
-
-def SBWriteResGroup128 : SchedWriteRes<[SBPort0,SBPort23,SBFPDivider]> {
- let Latency = 28;
- let NumMicroOps = 2;
- let ResourceCycles = [1,1,22];
-}
-def: InstRW<[SBWriteResGroup128], (instregex "(V?)DIVPDrm",
- "(V?)DIVSDrm")>;
-
-def SBWriteResGroup129 : SchedWriteRes<[SBPort0,SBPort05,SBFPDivider]> {
- let Latency = 29;
- let NumMicroOps = 3;
- let ResourceCycles = [2,1,28];
-}
-def: InstRW<[SBWriteResGroup129], (instregex "VDIVPSYrr")>;
-
def SBWriteResGroup130 : SchedWriteRes<[SBPort0,SBPort23]> {
let Latency = 31;
let NumMicroOps = 2;
}
def: InstRW<[SBWriteResGroup131], (instregex "DIV(R?)_FI(16|32)m")>;
-def SBWriteResGroup132 : SchedWriteRes<[SBPort0,SBPort23,SBPort05,SBFPDivider]> {
- let Latency = 36;
- let NumMicroOps = 4;
- let ResourceCycles = [2,1,1,28];
-}
-def: InstRW<[SBWriteResGroup132], (instregex "VDIVPSYrm")>;
-
-def SBWriteResGroup133 : SchedWriteRes<[SBPort0,SBPort05,SBFPDivider]> {
- let Latency = 45;
- let NumMicroOps = 3;
- let ResourceCycles = [2,1,44];
-}
-def: InstRW<[SBWriteResGroup133], (instregex "VDIVPDYrr")>;
-
-def SBWriteResGroup134 : SchedWriteRes<[SBPort0,SBPort23,SBPort05,SBFPDivider]> {
- let Latency = 52;
- let NumMicroOps = 4;
- let ResourceCycles = [2,1,1,44];
-}
-def: InstRW<[SBWriteResGroup134], (instregex "VDIVPDYrm")>;
-
} // SchedModel
defm : SKLWriteResPair<WriteFCom, [SKLPort0], 2>; // Floating point compare to flags.
defm : SKLWriteResPair<WriteFMul, [SKLPort01], 4, [1], 1, 6>; // Floating point multiplication.
defm : SKLWriteResPair<WriteFMulY, [SKLPort01], 4, [1], 1, 7>; // Floating point multiplication (YMM/ZMM).
-defm : SKLWriteResPair<WriteFDiv, [SKLPort0], 12, [1], 1, 5>; // 10-14 cycles. // Floating point division.
-defm : SKLWriteResPair<WriteFDivY, [SKLPort0], 12, [1], 1, 7>; // 10-14 cycles. // Floating point division (YMM/ZMM).
+
+defm : SKLWriteResPair<WriteFDiv, [SKLPort0,SKLFPDivider], 11, [1,3], 1, 5>; // Floating point division.
+//defm : SKLWriteResPair<WriteFDivX, [SKLPort0,SKLFPDivider], 11, [1,3], 1, 6>; // Floating point division (XMM).
+defm : SKLWriteResPair<WriteFDivY, [SKLPort0,SKLFPDivider], 11, [1,5], 1, 7>; // Floating point division (YMM).
+defm : SKLWriteResPair<WriteFDivZ, [SKLPort0,SKLFPDivider], 11, [1,5], 1, 7>; // Floating point division (ZMM).
+//defm : SKLWriteResPair<WriteFDiv64, [SKLPort0,SKLFPDivider], 14, [1,3], 1, 5>; // Floating point double division.
+//defm : SKLWriteResPair<WriteFDiv64X, [SKLPort0,SKLFPDivider], 14, [1,3], 1, 6>; // Floating point double division (XMM).
+//defm : SKLWriteResPair<WriteFDiv64Y, [SKLPort0,SKLFPDivider], 14, [1,5], 1, 7>; // Floating point double division (YMM).
+defm : SKLWriteResPair<WriteFDiv64Z, [SKLPort0,SKLFPDivider], 14, [1,5], 1, 7>; // Floating point double division (ZMM).
defm : SKLWriteResPair<WriteFSqrt, [SKLPort0,SKLFPDivider], 12, [1,3], 1, 5>; // Floating point square root.
defm : SKLWriteResPair<WriteFSqrtX, [SKLPort0,SKLFPDivider], 12, [1,3], 1, 6>; // Floating point square root (XMM).
let NumMicroOps = 1;
let ResourceCycles = [1,3];
}
-def: InstRW<[SKLWriteResGroup145], (instregex "(V?)DIVPSrr",
- "(V?)DIVSSrr")>;
-
-def SKLWriteResGroup145_1 : SchedWriteRes<[SKLPort0,SKLFPDivider]> {
- let Latency = 11;
- let NumMicroOps = 1;
- let ResourceCycles = [1,5];
-}
-def: InstRW<[SKLWriteResGroup145_1], (instregex "VDIVPSYrr")>;
+def : SchedAlias<WriteFDivX, SKLWriteResGroup145>; // TODO - convert to ZnWriteResFpuPair
def SKLWriteResGroup146 : SchedWriteRes<[SKLPort0,SKLPort23]> {
let Latency = 11;
let NumMicroOps = 1;
let ResourceCycles = [1,3];
}
-def: InstRW<[SKLWriteResGroup166], (instregex "(V?)DIVPDrr",
- "(V?)DIVSDrr")>;
+def : SchedAlias<WriteFDiv64, SKLWriteResGroup166>; // TODO - convert to ZnWriteResFpuPair
+def : SchedAlias<WriteFDiv64X, SKLWriteResGroup166>; // TODO - convert to ZnWriteResFpuPair
def SKLWriteResGroup166_1 : SchedWriteRes<[SKLPort0,SKLFPDivider]> {
let Latency = 14;
let NumMicroOps = 1;
let ResourceCycles = [1,5];
}
-def: InstRW<[SKLWriteResGroup166_1], (instregex "VDIVPDYrr")>;
+def : SchedAlias<WriteFDiv64Y, SKLWriteResGroup166_1>; // TODO - convert to ZnWriteResFpuPair
def SKLWriteResGroup169 : SchedWriteRes<[SKLPort0,SKLPort5,SKLPort23]> {
let Latency = 14;
}
def: InstRW<[SKLWriteResGroup174], (instregex "RCL(8|16|32|64)mCL")>;
-def SKLWriteResGroup175 : SchedWriteRes<[SKLPort0,SKLPort23,SKLFPDivider]> {
- let Latency = 16;
- let NumMicroOps = 2;
- let ResourceCycles = [1,1,3];
-}
-def: InstRW<[SKLWriteResGroup175], (instregex "(V?)DIVSSrm")>;
-
def SKLWriteResGroup177 : SchedWriteRes<[SKLPort4,SKLPort23,SKLPort237,SKLPort06,SKLPort15,SKLPort0156]> {
let Latency = 16;
let NumMicroOps = 14;
let NumMicroOps = 2;
let ResourceCycles = [1,1,5];
}
-def: InstRW<[SKLWriteResGroup179], (instregex "(V?)DIVPSrm")>;
+def : SchedAlias<WriteFDivXLd, SKLWriteResGroup179>; // TODO - convert to ZnWriteResFpuPair
def SKLWriteResGroup180 : SchedWriteRes<[SKLPort0,SKLPort1,SKLPort5,SKLPort6,SKLPort05,SKLPort0156]> {
let Latency = 17;
}
def: InstRW<[SKLWriteResGroup180], (instrs XCH_F)>;
-def SKLWriteResGroup182 : SchedWriteRes<[SKLPort0,SKLPort23,SKLFPDivider]> {
- let Latency = 18;
- let NumMicroOps = 2;
- let ResourceCycles = [1,1,5];
-}
-def: InstRW<[SKLWriteResGroup182], (instregex "VDIVPSYrm")>;
-
def SKLWriteResGroup184 : SchedWriteRes<[SKLPort5,SKLPort6,SKLPort06,SKLPort0156]> {
let Latency = 18;
let NumMicroOps = 8;
let NumMicroOps = 2;
let ResourceCycles = [1,1,4];
}
-def: InstRW<[SKLWriteResGroup186], (instregex "(V?)DIVSDrm")>;
+def : SchedAlias<WriteFDiv64Ld, SKLWriteResGroup186>; // TODO - convert to ZnWriteResFpuPair
def SKLWriteResGroup189 : SchedWriteRes<[SKLPort0]> {
let Latency = 20;
let NumMicroOps = 2;
let ResourceCycles = [1,1,4];
}
-def: InstRW<[SKLWriteResGroup190], (instregex "(V?)DIVPDrm")>;
+def : SchedAlias<WriteFDiv64XLd, SKLWriteResGroup190>; // TODO - convert to ZnWriteResFpuPair
def SKLWriteResGroup192 : SchedWriteRes<[SKLPort4,SKLPort5,SKLPort6,SKLPort23,SKLPort237,SKLPort06,SKLPort0156]> {
let Latency = 20;
let NumMicroOps = 2;
let ResourceCycles = [1,1,8];
}
-def: InstRW<[SKLWriteResGroup195], (instregex "VDIVPDYrm")>;
+def : SchedAlias<WriteFDiv64YLd, SKLWriteResGroup195>; // TODO - convert to ZnWriteResFpuPair
def SKLWriteResGroup196 : SchedWriteRes<[SKLPort0,SKLPort23]> {
let Latency = 22;
defm : SKXWriteResPair<WriteFCom, [SKXPort0], 2>; // Floating point compare to flags.
defm : SKXWriteResPair<WriteFMul, [SKXPort015], 4, [1], 1, 6>; // Floating point multiplication.
defm : SKXWriteResPair<WriteFMulY,[SKXPort015], 4, [1], 1, 7>; // Floating point multiplication (YMM/ZMM).
-defm : SKXWriteResPair<WriteFDiv, [SKXPort0], 12, [1], 1, 5>; // 10-14 cycles. // Floating point division.
-defm : SKXWriteResPair<WriteFDivY, [SKXPort0], 12, [1], 1, 7>; // 10-14 cycles. // Floating point division (YMM/ZMM).
+
+defm : SKXWriteResPair<WriteFDiv, [SKXPort0,SKXFPDivider], 11, [1,3], 1, 5>; // 10-14 cycles. // Floating point division.
+//defm : SKXWriteResPair<WriteFDivX, [SKXPort0,SKXFPDivider], 11, [1,3], 1, 6>; // 10-14 cycles. // Floating point division (XMM).
+defm : SKXWriteResPair<WriteFDivY, [SKXPort0,SKXFPDivider], 11, [1,5], 1, 7>; // 10-14 cycles. // Floating point division (YMM).
+defm : SKXWriteResPair<WriteFDivZ, [SKXPort0,SKXPort5,SKXFPDivider], 18, [2,1,10], 3, 7>; // 10-14 cycles. // Floating point division (ZMM).
+//defm : SKXWriteResPair<WriteFDiv64, [SKXPort0,SKXFPDivider], 14, [1,3], 1, 5>; // 10-14 cycles. // Floating point division.
+//defm : SKXWriteResPair<WriteFDiv64X, [SKXPort0,SKXFPDivider], 14, [1,3], 1, 6>; // 10-14 cycles. // Floating point division (XMM).
+//defm : SKXWriteResPair<WriteFDiv64Y, [SKXPort0,SKXFPDivider], 14, [1,5], 1, 7>; // 10-14 cycles. // Floating point division (YMM).
+defm : SKXWriteResPair<WriteFDiv64Z, [SKXPort0,SKXPort5,SKXFPDivider], 23, [2,1,16], 3, 7>; // 10-14 cycles. // Floating point division (ZMM).
defm : SKXWriteResPair<WriteFSqrt, [SKXPort0,SKXFPDivider], 12, [1,3], 1, 5>; // Floating point square root.
defm : SKXWriteResPair<WriteFSqrtX, [SKXPort0,SKXFPDivider], 12, [1,3], 1, 6>; // Floating point square root (XMM).
let NumMicroOps = 1;
let ResourceCycles = [1,3];
}
-def: InstRW<[SKXWriteResGroup159], (instregex "(V?)DIVPS(Z128)?rr",
- "(V?)DIVSS(Z?)rr")>;
-
-def SKXWriteResGroup159_1 : SchedWriteRes<[SKXPort0,SKXFPDivider]> {
- let Latency = 11;
- let NumMicroOps = 1;
- let ResourceCycles = [1,5];
-}
-def: InstRW<[SKXWriteResGroup159_1], (instregex "VDIVPS(Y|Z256)rr")>;
+def : SchedAlias<WriteFDivX, SKXWriteResGroup159>; // TODO - convert to ZnWriteResFpuPair
def SKXWriteResGroup160 : SchedWriteRes<[SKXPort0,SKXPort23]> {
let Latency = 11;
let NumMicroOps = 1;
let ResourceCycles = [1,3];
}
-def: InstRW<[SKXWriteResGroup184], (instregex "(V?)DIVPDrr",
- "(V?)DIVSD(Z?)rr")>;
+def : SchedAlias<WriteFDiv64, SKXWriteResGroup184>; // TODO - convert to ZnWriteResFpuPair
+def : SchedAlias<WriteFDiv64X, SKXWriteResGroup184>; // TODO - convert to ZnWriteResFpuPair
def SKXWriteResGroup184_1 : SchedWriteRes<[SKXPort0,SKXFPDivider]> {
let Latency = 14;
let NumMicroOps = 1;
let ResourceCycles = [1,5];
}
-def: InstRW<[SKXWriteResGroup184_1], (instregex "VDIVPD(Y|Z256)rr")>;
+def : SchedAlias<WriteFDiv64Y, SKXWriteResGroup184_1>; // TODO - convert to ZnWriteResFpuPair
def SKXWriteResGroup187 : SchedWriteRes<[SKXPort0,SKXPort5,SKXPort23]> {
let Latency = 14;
}
def: InstRW<[SKXWriteResGroup195], (instregex "RCL(8|16|32|64)mCL")>;
-def SKXWriteResGroup196 : SchedWriteRes<[SKXPort0,SKXPort23,SKXFPDivider]> {
- let Latency = 16;
- let NumMicroOps = 2;
- let ResourceCycles = [1,1,3];
-}
-def: InstRW<[SKXWriteResGroup196], (instregex "(V?)DIVSS(Z?)rm")>;
-
def SKXWriteResGroup198 : SchedWriteRes<[SKXPort0,SKXPort23,SKXPort015]> {
let Latency = 16;
let NumMicroOps = 4;
let NumMicroOps = 2;
let ResourceCycles = [1,1,5];
}
-def: InstRW<[SKXWriteResGroup201], (instregex "(V?)DIVPS(Z128)?rm")>;
+def : SchedAlias<WriteFDivXLd, SKXWriteResGroup201>; // TODO - convert to ZnWriteResFpuPair
def SKXWriteResGroup202 : SchedWriteRes<[SKXPort0,SKXPort1,SKXPort5,SKXPort6,SKXPort05,SKXPort0156]> {
let Latency = 17;
}
def: InstRW<[SKXWriteResGroup202], (instrs XCH_F)>;
-def SKXWriteResGroup204 : SchedWriteRes<[SKXPort0,SKXPort23,SKXFPDivider]> {
- let Latency = 18;
- let NumMicroOps = 2;
- let ResourceCycles = [1,1,5];
-}
-def: InstRW<[SKXWriteResGroup204], (instregex "VDIVPS(Y|Z256)rm")>;
-
def SKXWriteResGroup205 : SchedWriteRes<[SKXPort23,SKXPort015]> {
let Latency = 18;
let NumMicroOps = 4;
let NumMicroOps = 2;
let ResourceCycles = [1,1,4];
}
-def: InstRW<[SKXWriteResGroup209], (instregex "(V?)DIVSD(Z?)rm")>;
+def : SchedAlias<WriteFDiv64Ld, SKXWriteResGroup209>; // TODO - convert to ZnWriteResFpuPair
def SKXWriteResGroup211 : SchedWriteRes<[SKXPort23,SKXPort015]> {
let Latency = 19;
let NumMicroOps = 2;
let ResourceCycles = [1,1,4];
}
-def: InstRW<[SKXWriteResGroup216], (instregex "(V?)DIVPD(Z128)?rm")>;
+def : SchedAlias<WriteFDiv64XLd, SKXWriteResGroup216>; // TODO - convert to ZnWriteResFpuPair
def SKXWriteResGroup218 : SchedWriteRes<[SKXPort0,SKXPort23,SKXPort015,SKXPort0156]> {
let Latency = 20;
let NumMicroOps = 2;
let ResourceCycles = [1,1,8];
}
-def: InstRW<[SKXWriteResGroup222], (instregex "VDIVPD(Y|Z256)rm")>;
+def : SchedAlias<WriteFDiv64YLd, SKXWriteResGroup222>; // TODO - convert to ZnWriteResFpuPair
def SKXWriteResGroup223 : SchedWriteRes<[SKXPort0,SKXPort23]> {
let Latency = 22;
def: InstRW<[SKXWriteResGroup225], (instregex "VPCONFLICTDZ128rr",
"VPCONFLICTQZ256rr")>;
-def SKXWriteResGroup227 : SchedWriteRes<[SKXPort0,SKXPort5,SKXFPDivider]> {
- let Latency = 23;
- let NumMicroOps = 3;
- let ResourceCycles = [2,1,16];
-}
-def: InstRW<[SKXWriteResGroup227], (instregex "VDIVPDZrr")>;
-
-def SKXWriteResGroup227_1 : SchedWriteRes<[SKXPort0,SKXPort5,SKXFPDivider]> {
- let Latency = 18;
- let NumMicroOps = 3;
- let ResourceCycles = [2,1,10];
-}
-def: InstRW<[SKXWriteResGroup227_1], (instregex "VDIVPSZrr")>;
-
def SKXWriteResGroup228 : SchedWriteRes<[SKXPort0,SKXPort4,SKXPort5,SKXPort23,SKXPort237,SKXPort06,SKXPort0156]> {
let Latency = 23;
let NumMicroOps = 19;
}
def: InstRW<[SKXWriteResGroup228], (instregex "CMPXCHG16B")>;
-def SKXWriteResGroup230 : SchedWriteRes<[SKXPort0,SKXPort23,SKXPort5,SKXFPDivider]> {
- let Latency = 25;
- let NumMicroOps = 4;
- let ResourceCycles = [2,1,1,10];
-}
-def: InstRW<[SKXWriteResGroup230], (instregex "VDIVPSZrm(b?)")>;
-
def SKXWriteResGroup233 : SchedWriteRes<[SKXPort0,SKXPort5,SKXPort23]> {
let Latency = 25;
let NumMicroOps = 3;
}
def: InstRW<[SKXWriteResGroup243], (instregex "DIVR_FI(16|32)m")>;
-def SKXWriteResGroup244 : SchedWriteRes<[SKXPort0,SKXPort23,SKXPort5,SKXFPDivider]> {
- let Latency = 30;
- let NumMicroOps = 4;
- let ResourceCycles = [2,1,1,16];
-}
-def: InstRW<[SKXWriteResGroup244], (instregex "VDIVPDZrm(b?)")>;
-
def SKXWriteResGroup245 : SchedWriteRes<[SKXPort0,SKXPort23,SKXPort015,SKXPort0156]> {
let Latency = 30;
let NumMicroOps = 5;
defm WriteFCom : X86SchedWritePair; // Floating point compare to flags.
defm WriteFMul : X86SchedWritePair; // Floating point multiplication.
defm WriteFMulY : X86SchedWritePair; // Floating point multiplication (YMM/ZMM).
-defm WriteFDiv : X86SchedWritePair; // Floating point division.
-defm WriteFDivY : X86SchedWritePair; // Floating point division (YMM/ZMM).
+defm WriteFDiv : X86SchedWritePair; // Floating point division.
+defm WriteFDivX : X86SchedWritePair; // Floating point division (XMM).
+defm WriteFDivY : X86SchedWritePair; // Floating point division (YMM).
+defm WriteFDivZ : X86SchedWritePair; // Floating point division (ZMM).
+defm WriteFDiv64 : X86SchedWritePair; // Floating point division.
+defm WriteFDiv64X : X86SchedWritePair; // Floating point division (XMM).
+defm WriteFDiv64Y : X86SchedWritePair; // Floating point division (YMM).
+defm WriteFDiv64Z : X86SchedWritePair; // Floating point division (ZMM).
defm WriteFSqrt : X86SchedWritePair; // Floating point square root.
defm WriteFSqrtX : X86SchedWritePair; // Floating point square root (XMM).
defm WriteFSqrtY : X86SchedWritePair; // Floating point square root (YMM).
: X86SchedWriteWidths<WriteFCmp, WriteFCmp, WriteFCmpY, WriteFCmpY>;
def SchedWriteFMul
: X86SchedWriteWidths<WriteFMul, WriteFMul, WriteFMulY, WriteFMulY>;
+def SchedWriteFMul64
+ : X86SchedWriteWidths<WriteFMul, WriteFMul, WriteFMulY, WriteFMulY>;
def SchedWriteFMA
: X86SchedWriteWidths<WriteFMA, WriteFMAX, WriteFMAY, WriteFMAY>;
def SchedWriteDPPD
def SchedWriteDPPS
: X86SchedWriteWidths<WriteDPPS, WriteDPPS, WriteDPPSY, WriteDPPSY>;
def SchedWriteFDiv
- : X86SchedWriteWidths<WriteFDiv, WriteFDiv, WriteFDivY, WriteFDivY>;
+ : X86SchedWriteWidths<WriteFDiv, WriteFDivX, WriteFDivY, WriteFDivZ>;
+def SchedWriteFDiv64
+ : X86SchedWriteWidths<WriteFDiv64, WriteFDiv64X, WriteFDiv64Y, WriteFDiv64Z>;
def SchedWriteFSqrt
: X86SchedWriteWidths<WriteFSqrt, WriteFSqrtX,
WriteFSqrtY, WriteFSqrtZ>;
// Vector size wrappers.
def SchedWriteFAddSizes
: X86SchedWriteSizes<SchedWriteFAdd, SchedWriteFAdd>;
+def SchedWriteFCmpSizes
+ : X86SchedWriteSizes<SchedWriteFCmp, SchedWriteFCmp>;
def SchedWriteFMulSizes
- : X86SchedWriteSizes<SchedWriteFMul, SchedWriteFMul>;
+ : X86SchedWriteSizes<SchedWriteFMul, SchedWriteFMul64>;
def SchedWriteFDivSizes
- : X86SchedWriteSizes<SchedWriteFDiv, SchedWriteFDiv>;
+ : X86SchedWriteSizes<SchedWriteFDiv, SchedWriteFDiv64>;
def SchedWriteFSqrtSizes
: X86SchedWriteSizes<SchedWriteFSqrt, SchedWriteFSqrt64>;
+def SchedWriteFLogicSizes
+ : X86SchedWriteSizes<SchedWriteFLogic, SchedWriteFLogic>;
+def SchedWriteFShuffleSizes
+ : X86SchedWriteSizes<SchedWriteFShuffle, SchedWriteFShuffle>;
//===----------------------------------------------------------------------===//
// Generic Processor Scheduler Models.
defm : AtomWriteResPair<WriteFRsqrtX, [AtomPort01], [AtomPort01], 9, 10, [9], [10]>;
defm : AtomWriteResPair<WriteFRsqrtY, [AtomPort01], [AtomPort01], 9, 10, [9], [10]>;
defm : AtomWriteResPair<WriteFDiv, [AtomPort01], [AtomPort01], 34, 34, [34], [34]>;
-defm : AtomWriteResPair<WriteFDivY, [AtomPort01], [AtomPort01], 34, 34, [34], [34]>;
+defm : AtomWriteResPair<WriteFDivX, [AtomPort01], [AtomPort01], 70, 70, [70], [70]>;
+defm : AtomWriteResPair<WriteFDivY, [AtomPort01], [AtomPort01], 70, 70, [70], [70]>;
+defm : AtomWriteResPair<WriteFDivZ, [AtomPort01], [AtomPort01], 70, 70, [70], [70]>;
+defm : AtomWriteResPair<WriteFDiv64, [AtomPort01], [AtomPort01], 62, 62, [62], [62]>;
+defm : AtomWriteResPair<WriteFDiv64X, [AtomPort01], [AtomPort01],125,125,[125],[125]>;
+defm : AtomWriteResPair<WriteFDiv64Y, [AtomPort01], [AtomPort01],125,125,[125],[125]>;
+defm : AtomWriteResPair<WriteFDiv64Z, [AtomPort01], [AtomPort01],125,125,[125],[125]>;
defm : AtomWriteResPair<WriteFSqrt, [AtomPort01], [AtomPort01], 34, 34, [34], [34]>;
defm : AtomWriteResPair<WriteFSqrtX, [AtomPort01], [AtomPort01], 70, 70, [70], [70]>;
defm : AtomWriteResPair<WriteFSqrtY, [AtomPort01], [AtomPort01], 70, 70, [70], [70]>;
}
def : InstRW<[AtomWrite01_59], (instrs INSB, INSL, INSW)>;
-def AtomWrite01_62 : SchedWriteRes<[AtomPort01]> {
- let Latency = 62;
- let ResourceCycles = [62];
-}
-def : InstRW<[AtomWrite01_62], (instregex "DIVSD(r|m)(_Int)?")>;
-
def AtomWrite01_63 : SchedWriteRes<[AtomPort01]> {
let Latency = 63;
let ResourceCycles = [63];
}
def : InstRW<[AtomWrite01_68], (instrs OUT8rr, OUT16rr, OUT32rr)>;
-def AtomWrite01_70 : SchedWriteRes<[AtomPort01]> {
- let Latency = 70;
- let ResourceCycles = [70];
-}
-def : InstRW<[AtomWrite01_70], (instrs DIVPSrr, DIVPSrm)>;
-
def AtomWrite01_71 : SchedWriteRes<[AtomPort01]> {
let Latency = 71;
let ResourceCycles = [71];
}
def : InstRW<[AtomWrite01_121], (instrs CPUID)>;
-def AtomWrite01_125 : SchedWriteRes<[AtomPort01]> {
- let Latency = 125;
- let ResourceCycles = [125];
-}
-def : InstRW<[AtomWrite01_125], (instrs DIVPDrr, DIVPDrm)>;
-
def AtomWrite01_127 : SchedWriteRes<[AtomPort01]> {
let Latency = 127;
let ResourceCycles = [127];
defm : JWriteResFpuPair<WriteFRsqrtX, [JFPU1, JFPM], 2>;
defm : JWriteResYMMPair<WriteFRsqrtY, [JFPU1, JFPM], 2, [2,2], 2>;
defm : JWriteResFpuPair<WriteFDiv, [JFPU1, JFPM], 19, [1, 19]>;
+defm : JWriteResFpuPair<WriteFDivX, [JFPU1, JFPM], 19, [1, 19]>;
defm : JWriteResYMMPair<WriteFDivY, [JFPU1, JFPM], 38, [2, 38], 2>;
+defm : JWriteResYMMPair<WriteFDivZ, [JFPU1, JFPM], 38, [2, 38], 2>;
+defm : JWriteResFpuPair<WriteFDiv64, [JFPU1, JFPM], 19, [1, 19]>;
+defm : JWriteResFpuPair<WriteFDiv64X, [JFPU1, JFPM], 19, [1, 19]>;
+defm : JWriteResYMMPair<WriteFDiv64Y, [JFPU1, JFPM], 38, [2, 38], 2>;
+defm : JWriteResYMMPair<WriteFDiv64Z, [JFPU1, JFPM], 38, [2, 38], 2>;
defm : JWriteResFpuPair<WriteFSqrt, [JFPU1, JFPM], 21, [1, 21]>;
defm : JWriteResFpuPair<WriteFSqrtX, [JFPU1, JFPM], 21, [1, 21]>;
defm : JWriteResYMMPair<WriteFSqrtY, [JFPU1, JFPM], 42, [2, 42], 2>;
defm : SLMWriteResPair<WriteFCom, [SLM_FPC_RSV1], 3>;
defm : SLMWriteResPair<WriteFMul, [SLM_FPC_RSV0, SLMFPMultiplier], 5, [1,2]>;
defm : SLMWriteResPair<WriteFMulY, [SLM_FPC_RSV0, SLMFPMultiplier], 5, [1,2]>;
-defm : SLMWriteResPair<WriteFDiv, [SLM_FPC_RSV0, SLMFPDivider], 34, [1,34]>;
-defm : SLMWriteResPair<WriteFDivY, [SLM_FPC_RSV0, SLMFPDivider], 34, [1,34]>;
+defm : SLMWriteResPair<WriteFDiv, [SLM_FPC_RSV0, SLMFPDivider], 19, [1,17]>;
+defm : SLMWriteResPair<WriteFDivX, [SLM_FPC_RSV0, SLMFPDivider], 39, [1,39]>;
+defm : SLMWriteResPair<WriteFDivY, [SLM_FPC_RSV0, SLMFPDivider], 39, [1,39]>;
+defm : SLMWriteResPair<WriteFDivZ, [SLM_FPC_RSV0, SLMFPDivider], 39, [1,39]>;
+defm : SLMWriteResPair<WriteFDiv64, [SLM_FPC_RSV0, SLMFPDivider], 34, [1,32]>;
+defm : SLMWriteResPair<WriteFDiv64X, [SLM_FPC_RSV0, SLMFPDivider], 69, [1,69]>;
+defm : SLMWriteResPair<WriteFDiv64Y, [SLM_FPC_RSV0, SLMFPDivider], 69, [1,69]>;
+defm : SLMWriteResPair<WriteFDiv64Z, [SLM_FPC_RSV0, SLMFPDivider], 69, [1,69]>;
defm : SLMWriteResPair<WriteFRcp, [SLM_FPC_RSV0], 5>;
defm : SLMWriteResPair<WriteFRcpX, [SLM_FPC_RSV0], 5>;
defm : SLMWriteResPair<WriteFRcpY, [SLM_FPC_RSV0], 5>;
defm : SLMWriteResPair<WriteFMAX, [SLM_FPC_RSV0], 1>;
defm : SLMWriteResPair<WriteFMAY, [SLM_FPC_RSV0], 1>;
-// Instruction overrides
-
-def SLMriteResGroup1 : SchedWriteRes<[SLM_FPC_RSV0,SLMFPDivider]> {
- let Latency = 69;
- let NumMicroOps = 1;
- let ResourceCycles = [1,69];
-}
-def: InstRW<[SLMriteResGroup1], (instregex "(V?)DIVPDrr")>;
-
-def SLMriteResGroup2 : SchedWriteRes<[SLM_FPC_RSV0,SLMFPDivider]> {
- let Latency = 39;
- let NumMicroOps = 1;
- let ResourceCycles = [1,39];
-}
-def: InstRW<[SLMriteResGroup2], (instregex "(V?)DIVPSrr")>;
-
-def SLMriteResGroup3 : SchedWriteRes<[SLM_FPC_RSV0,SLMFPDivider]> {
- let Latency = 34;
- let NumMicroOps = 1;
- let ResourceCycles = [1,32];
-}
-def: InstRW<[SLMriteResGroup3], (instregex "(V?)DIVSDrr")>;
-
-def SLMriteResGroup4 : SchedWriteRes<[SLM_FPC_RSV0,SLMFPDivider]> {
- let Latency = 19;
- let NumMicroOps = 1;
- let ResourceCycles = [1,17];
-}
-def: InstRW<[SLMriteResGroup4], (instregex "(V?)DIVSSrr")>;
-
-def SLMriteResGroup5 : SchedWriteRes<[SLM_MEC_RSV,SLM_FPC_RSV0,SLMFPDivider]> {
- let Latency = 72;
- let NumMicroOps = 1;
- let ResourceCycles = [1,1,69];
-}
-def: InstRW<[SLMriteResGroup5], (instregex "(V?)DIVPDrm")>;
-
-def SLMriteResGroup6 : SchedWriteRes<[SLM_MEC_RSV,SLM_FPC_RSV0,SLMFPDivider]> {
- let Latency = 42;
- let NumMicroOps = 1;
- let ResourceCycles = [1,1,39];
-}
-def: InstRW<[SLMriteResGroup6], (instregex "(V?)DIVPSrm")>;
-
-def SLMriteResGroup7 : SchedWriteRes<[SLM_MEC_RSV,SLM_FPC_RSV0,SLMFPDivider]> {
- let Latency = 37;
- let NumMicroOps = 1;
- let ResourceCycles = [1,1,32];
-}
-def: InstRW<[SLMriteResGroup7], (instregex "(V?)DIVSDrm")>;
-
-def SLMriteResGroup8 : SchedWriteRes<[SLM_MEC_RSV,SLM_FPC_RSV0,SLMFPDivider]> {
- let Latency = 22;
- let NumMicroOps = 1;
- let ResourceCycles = [1,1,17];
-}
-def: InstRW<[SLMriteResGroup8], (instregex "(V?)DIVSSrm")>;
-
} // SchedModel
defm : ZnWriteResFpuPair<WriteCvtF2F, [ZnFPU3], 5>;
defm : ZnWriteResFpuPair<WriteCvtF2I, [ZnFPU3], 5>;
defm : ZnWriteResFpuPair<WriteFDiv, [ZnFPU3], 15>;
-defm : ZnWriteResFpuPair<WriteFDivY, [ZnFPU3], 15>;
+defm : ZnWriteResFpuPair<WriteFDivX, [ZnFPU3], 15>;
+//defm : ZnWriteResFpuPair<WriteFDivY, [ZnFPU3], 15>;
+defm : ZnWriteResFpuPair<WriteFDivZ, [ZnFPU3], 15>;
+defm : ZnWriteResFpuPair<WriteFDiv64, [ZnFPU3], 15>;
+defm : ZnWriteResFpuPair<WriteFDiv64X, [ZnFPU3], 15>;
+//defm : ZnWriteResFpuPair<WriteFDiv64Y, [ZnFPU3], 15>;
+defm : ZnWriteResFpuPair<WriteFDiv64Z, [ZnFPU3], 15>;
defm : ZnWriteResFpuPair<WriteFSign, [ZnFPU3], 2>;
defm : ZnWriteResFpuPair<WriteFRnd, [ZnFPU3], 4, [1], 1, 7, 1>; // FIXME: Should folds require 1 extra uops?
defm : ZnWriteResFpuPair<WriteFRndY, [ZnFPU3], 4, [1], 1, 7, 1>; // FIXME: Should folds require 1 extra uops?
def : InstRW<[ZnWriteMULYLd], (instregex "(V?)MUL(P|S)(S|D)Yrm")>;
// VDIVPS.
+// TODO - convert to ZnWriteResFpuPair
// y,y,y.
def ZnWriteVDIVPSYr : SchedWriteRes<[ZnFPU3]> {
let Latency = 12;
let ResourceCycles = [12];
}
-def : InstRW<[ZnWriteVDIVPSYr], (instregex "VDIVPSYrr")>;
+def : SchedAlias<WriteFDivY, ZnWriteVDIVPSYr>;
// y,y,m256.
def ZnWriteVDIVPSYLd : SchedWriteRes<[ZnAGU, ZnFPU3]> {
let NumMicroOps = 2;
let ResourceCycles = [1, 19];
}
-def : InstRW<[ZnWriteVDIVPSYLd], (instregex "VDIVPSYrm")>;
+def : SchedAlias<WriteFDivYLd, ZnWriteVDIVPSYLd>;
// VDIVPD.
+// TODO - convert to ZnWriteResFpuPair
// y,y,y.
def ZnWriteVDIVPDY : SchedWriteRes<[ZnFPU3]> {
let Latency = 15;
let ResourceCycles = [15];
}
-def : InstRW<[ZnWriteVDIVPDY], (instregex "VDIVPDYrr")>;
+def : SchedAlias<WriteFDiv64Y, ZnWriteVDIVPDY>;
// y,y,m256.
def ZnWriteVDIVPDYLd : SchedWriteRes<[ZnAGU, ZnFPU3]> {
let NumMicroOps = 2;
let ResourceCycles = [1,22];
}
-def : InstRW<[ZnWriteVDIVPDYLd], (instregex "VDIVPDYrm")>;
+def : SchedAlias<WriteFDiv64YLd, ZnWriteVDIVPDYLd>;
// VRCPPS.
// TODO - convert to ZnWriteResFpuPair
define <8 x double> @divpd512(<8 x double> %y, <8 x double> %x) {
; GENERIC-LABEL: divpd512:
; GENERIC: # %bb.0: # %entry
-; GENERIC-NEXT: vdivpd %zmm0, %zmm1, %zmm0 # sched: [24:1.00]
+; GENERIC-NEXT: vdivpd %zmm0, %zmm1, %zmm0 # sched: [45:44.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: divpd512:
define <8 x double> @divpd512fold(<8 x double> %y) {
; GENERIC-LABEL: divpd512fold:
; GENERIC: # %bb.0: # %entry
-; GENERIC-NEXT: vdivpd {{.*}}(%rip), %zmm0, %zmm0 # sched: [31:1.00]
+; GENERIC-NEXT: vdivpd {{.*}}(%rip), %zmm0, %zmm0 # sched: [52:44.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: divpd512fold:
define <16 x float> @divps512(<16 x float> %y, <16 x float> %x) {
; GENERIC-LABEL: divps512:
; GENERIC: # %bb.0: # %entry
-; GENERIC-NEXT: vdivps %zmm0, %zmm1, %zmm0 # sched: [24:1.00]
+; GENERIC-NEXT: vdivps %zmm0, %zmm1, %zmm0 # sched: [29:28.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: divps512:
define <16 x float> @divps512fold(<16 x float> %y) {
; GENERIC-LABEL: divps512fold:
; GENERIC: # %bb.0: # %entry
-; GENERIC-NEXT: vdivps {{.*}}(%rip), %zmm0, %zmm0 # sched: [31:1.00]
+; GENERIC-NEXT: vdivps {{.*}}(%rip), %zmm0, %zmm0 # sched: [36:28.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: divps512fold:
; GENERIC-LABEL: test_mask_vdivps:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestmd %zmm3, %zmm3, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vdivps %zmm2, %zmm1, %zmm0 {%k1} # sched: [24:1.00]
+; GENERIC-NEXT: vdivps %zmm2, %zmm1, %zmm0 {%k1} # sched: [29:28.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_mask_vdivps:
; KNL-LABEL: v16f32_no_estimate:
; KNL: # %bb.0:
; KNL-NEXT: vbroadcastss {{.*#+}} zmm1 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] sched: [10:1.00]
-; KNL-NEXT: vdivps %zmm0, %zmm1, %zmm0 # sched: [12:1.00]
+; KNL-NEXT: vdivps %zmm0, %zmm1, %zmm0 # sched: [21:14.00]
; KNL-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: v16f32_no_estimate:
; SLM-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [3:1.00]
; SLM-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [3:1.00]
; SLM-NEXT: #APP
-; SLM-NEXT: fdiv %st(0), %st(1) # sched: [34:34.00]
-; SLM-NEXT: fdiv %st(2) # sched: [34:34.00]
-; SLM-NEXT: fdivs (%ecx) # sched: [37:34.00]
-; SLM-NEXT: fdivl (%eax) # sched: [37:34.00]
+; SLM-NEXT: fdiv %st(0), %st(1) # sched: [19:17.00]
+; SLM-NEXT: fdiv %st(2) # sched: [19:17.00]
+; SLM-NEXT: fdivs (%ecx) # sched: [22:17.00]
+; SLM-NEXT: fdivl (%eax) # sched: [22:17.00]
; SLM-NEXT: #NO_APP
; SLM-NEXT: retl # sched: [4:1.00]
;
; SANDY-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
; SANDY-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
; SANDY-NEXT: #APP
-; SANDY-NEXT: fdiv %st(0), %st(1) # sched: [24:1.00]
-; SANDY-NEXT: fdiv %st(2) # sched: [24:1.00]
+; SANDY-NEXT: fdiv %st(0), %st(1) # sched: [14:14.00]
+; SANDY-NEXT: fdiv %st(2) # sched: [14:14.00]
; SANDY-NEXT: fdivs (%ecx) # sched: [31:1.00]
; SANDY-NEXT: fdivl (%eax) # sched: [31:1.00]
; SANDY-NEXT: #NO_APP
; SLM-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [3:1.00]
; SLM-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [3:1.00]
; SLM-NEXT: #APP
-; SLM-NEXT: fdivp %st(1) # sched: [34:34.00]
-; SLM-NEXT: fdivp %st(2) # sched: [34:34.00]
-; SLM-NEXT: fidivs (%ecx) # sched: [37:34.00]
-; SLM-NEXT: fidivl (%eax) # sched: [37:34.00]
+; SLM-NEXT: fdivp %st(1) # sched: [19:17.00]
+; SLM-NEXT: fdivp %st(2) # sched: [19:17.00]
+; SLM-NEXT: fidivs (%ecx) # sched: [22:17.00]
+; SLM-NEXT: fidivl (%eax) # sched: [22:17.00]
; SLM-NEXT: #NO_APP
; SLM-NEXT: retl # sched: [4:1.00]
;
; SANDY-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
; SANDY-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
; SANDY-NEXT: #APP
-; SANDY-NEXT: fdivp %st(1) # sched: [24:1.00]
-; SANDY-NEXT: fdivp %st(2) # sched: [24:1.00]
+; SANDY-NEXT: fdivp %st(1) # sched: [14:14.00]
+; SANDY-NEXT: fdivp %st(2) # sched: [14:14.00]
; SANDY-NEXT: fidivs (%ecx) # sched: [34:1.00]
; SANDY-NEXT: fidivl (%eax) # sched: [34:1.00]
; SANDY-NEXT: #NO_APP
; SLM-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [3:1.00]
; SLM-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [3:1.00]
; SLM-NEXT: #APP
-; SLM-NEXT: fdivr %st(0), %st(1) # sched: [34:34.00]
-; SLM-NEXT: fdivr %st(2) # sched: [34:34.00]
-; SLM-NEXT: fdivrs (%ecx) # sched: [37:34.00]
-; SLM-NEXT: fdivrl (%eax) # sched: [37:34.00]
+; SLM-NEXT: fdivr %st(0), %st(1) # sched: [19:17.00]
+; SLM-NEXT: fdivr %st(2) # sched: [19:17.00]
+; SLM-NEXT: fdivrs (%ecx) # sched: [22:17.00]
+; SLM-NEXT: fdivrl (%eax) # sched: [22:17.00]
; SLM-NEXT: #NO_APP
; SLM-NEXT: retl # sched: [4:1.00]
;
; SANDY-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
; SANDY-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
; SANDY-NEXT: #APP
-; SANDY-NEXT: fdivr %st(0), %st(1) # sched: [24:1.00]
-; SANDY-NEXT: fdivr %st(2) # sched: [24:1.00]
+; SANDY-NEXT: fdivr %st(0), %st(1) # sched: [14:14.00]
+; SANDY-NEXT: fdivr %st(2) # sched: [14:14.00]
; SANDY-NEXT: fdivrs (%ecx) # sched: [31:1.00]
; SANDY-NEXT: fdivrl (%eax) # sched: [31:1.00]
; SANDY-NEXT: #NO_APP
; SLM-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [3:1.00]
; SLM-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [3:1.00]
; SLM-NEXT: #APP
-; SLM-NEXT: fdivrp %st(1) # sched: [34:34.00]
-; SLM-NEXT: fdivrp %st(2) # sched: [34:34.00]
-; SLM-NEXT: fidivrs (%ecx) # sched: [37:34.00]
-; SLM-NEXT: fidivrl (%eax) # sched: [37:34.00]
+; SLM-NEXT: fdivrp %st(1) # sched: [19:17.00]
+; SLM-NEXT: fdivrp %st(2) # sched: [19:17.00]
+; SLM-NEXT: fidivrs (%ecx) # sched: [22:17.00]
+; SLM-NEXT: fidivrl (%eax) # sched: [22:17.00]
; SLM-NEXT: #NO_APP
; SLM-NEXT: retl # sched: [4:1.00]
;
; SANDY-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
; SANDY-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
; SANDY-NEXT: #APP
-; SANDY-NEXT: fdivrp %st(1) # sched: [24:1.00]
-; SANDY-NEXT: fdivrp %st(2) # sched: [24:1.00]
+; SANDY-NEXT: fdivrp %st(1) # sched: [14:14.00]
+; SANDY-NEXT: fdivrp %st(2) # sched: [14:14.00]
; SANDY-NEXT: fidivrs (%ecx) # sched: [34:1.00]
; SANDY-NEXT: fidivrl (%eax) # sched: [34:1.00]
; SANDY-NEXT: #NO_APP
# CHECK-NEXT: 1 3 1.00 * fcompi %st(3)
# CHECK-NEXT: 1 100 1.00 * fcos
# CHECK-NEXT: 1 100 1.00 * fdecstp
-# CHECK-NEXT: 1 34 34.00 * fdiv %st(0), %st(1)
-# CHECK-NEXT: 1 34 34.00 * fdiv %st(2)
-# CHECK-NEXT: 1 37 34.00 * * fdivs (%ecx)
-# CHECK-NEXT: 1 37 34.00 * * fdivl (%eax)
-# CHECK-NEXT: 1 34 34.00 * fdivp %st(1)
-# CHECK-NEXT: 1 34 34.00 * fdivp %st(2)
-# CHECK-NEXT: 1 37 34.00 * * fidivs (%ecx)
-# CHECK-NEXT: 1 37 34.00 * * fidivl (%eax)
-# CHECK-NEXT: 1 34 34.00 * fdivr %st(0), %st(1)
-# CHECK-NEXT: 1 34 34.00 * fdivr %st(2)
-# CHECK-NEXT: 1 37 34.00 * * fdivrs (%ecx)
-# CHECK-NEXT: 1 37 34.00 * * fdivrl (%eax)
-# CHECK-NEXT: 1 34 34.00 * fdivrp %st(1)
-# CHECK-NEXT: 1 34 34.00 * fdivrp %st(2)
-# CHECK-NEXT: 1 37 34.00 * * fidivrs (%ecx)
-# CHECK-NEXT: 1 37 34.00 * * fidivrl (%eax)
+# CHECK-NEXT: 1 19 17.00 * fdiv %st(0), %st(1)
+# CHECK-NEXT: 1 19 17.00 * fdiv %st(2)
+# CHECK-NEXT: 1 22 17.00 * * fdivs (%ecx)
+# CHECK-NEXT: 1 22 17.00 * * fdivl (%eax)
+# CHECK-NEXT: 1 19 17.00 * fdivp %st(1)
+# CHECK-NEXT: 1 19 17.00 * fdivp %st(2)
+# CHECK-NEXT: 1 22 17.00 * * fidivs (%ecx)
+# CHECK-NEXT: 1 22 17.00 * * fidivl (%eax)
+# CHECK-NEXT: 1 19 17.00 * fdivr %st(0), %st(1)
+# CHECK-NEXT: 1 19 17.00 * fdivr %st(2)
+# CHECK-NEXT: 1 22 17.00 * * fdivrs (%ecx)
+# CHECK-NEXT: 1 22 17.00 * * fdivrl (%eax)
+# CHECK-NEXT: 1 19 17.00 * fdivrp %st(1)
+# CHECK-NEXT: 1 19 17.00 * fdivrp %st(2)
+# CHECK-NEXT: 1 22 17.00 * * fidivrs (%ecx)
+# CHECK-NEXT: 1 22 17.00 * * fidivrl (%eax)
# CHECK-NEXT: 1 100 1.00 * ffree %st(0)
# CHECK-NEXT: 1 6 1.00 * ficoms (%ecx)
# CHECK-NEXT: 1 6 1.00 * ficoml (%eax)
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7]
-# CHECK-NEXT: - 584.00 16.00 64.00 55.00 9.50 9.50 52.00
+# CHECK-NEXT: - 312.00 16.00 64.00 55.00 9.50 9.50 52.00
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] Instructions:
# CHECK-NEXT: - - - - 1.00 - - - fcompi %st(3)
# CHECK-NEXT: - - - 1.00 - - - - fcos
# CHECK-NEXT: - - - 1.00 - - - - fdecstp
-# CHECK-NEXT: - 34.00 - 1.00 - - - - fdiv %st(0), %st(1)
-# CHECK-NEXT: - 34.00 - 1.00 - - - - fdiv %st(2)
-# CHECK-NEXT: - 34.00 - 1.00 - - - 1.00 fdivs (%ecx)
-# CHECK-NEXT: - 34.00 - 1.00 - - - 1.00 fdivl (%eax)
-# CHECK-NEXT: - 34.00 - 1.00 - - - - fdivp %st(1)
-# CHECK-NEXT: - 34.00 - 1.00 - - - - fdivp %st(2)
-# CHECK-NEXT: - 34.00 - 1.00 - - - 1.00 fidivs (%ecx)
-# CHECK-NEXT: - 34.00 - 1.00 - - - 1.00 fidivl (%eax)
-# CHECK-NEXT: - 34.00 - 1.00 - - - - fdivr %st(0), %st(1)
-# CHECK-NEXT: - 34.00 - 1.00 - - - - fdivr %st(2)
-# CHECK-NEXT: - 34.00 - 1.00 - - - 1.00 fdivrs (%ecx)
-# CHECK-NEXT: - 34.00 - 1.00 - - - 1.00 fdivrl (%eax)
-# CHECK-NEXT: - 34.00 - 1.00 - - - - fdivrp %st(1)
-# CHECK-NEXT: - 34.00 - 1.00 - - - - fdivrp %st(2)
-# CHECK-NEXT: - 34.00 - 1.00 - - - 1.00 fidivrs (%ecx)
-# CHECK-NEXT: - 34.00 - 1.00 - - - 1.00 fidivrl (%eax)
+# CHECK-NEXT: - 17.00 - 1.00 - - - - fdiv %st(0), %st(1)
+# CHECK-NEXT: - 17.00 - 1.00 - - - - fdiv %st(2)
+# CHECK-NEXT: - 17.00 - 1.00 - - - 1.00 fdivs (%ecx)
+# CHECK-NEXT: - 17.00 - 1.00 - - - 1.00 fdivl (%eax)
+# CHECK-NEXT: - 17.00 - 1.00 - - - - fdivp %st(1)
+# CHECK-NEXT: - 17.00 - 1.00 - - - - fdivp %st(2)
+# CHECK-NEXT: - 17.00 - 1.00 - - - 1.00 fidivs (%ecx)
+# CHECK-NEXT: - 17.00 - 1.00 - - - 1.00 fidivl (%eax)
+# CHECK-NEXT: - 17.00 - 1.00 - - - - fdivr %st(0), %st(1)
+# CHECK-NEXT: - 17.00 - 1.00 - - - - fdivr %st(2)
+# CHECK-NEXT: - 17.00 - 1.00 - - - 1.00 fdivrs (%ecx)
+# CHECK-NEXT: - 17.00 - 1.00 - - - 1.00 fdivrl (%eax)
+# CHECK-NEXT: - 17.00 - 1.00 - - - - fdivrp %st(1)
+# CHECK-NEXT: - 17.00 - 1.00 - - - - fdivrp %st(2)
+# CHECK-NEXT: - 17.00 - 1.00 - - - 1.00 fidivrs (%ecx)
+# CHECK-NEXT: - 17.00 - 1.00 - - - 1.00 fidivrl (%eax)
# CHECK-NEXT: - - - 1.00 - - - - ffree %st(0)
# CHECK-NEXT: - - - - 1.00 - - 1.00 ficoms (%ecx)
# CHECK-NEXT: - - - - 1.00 - - 1.00 ficoml (%eax)
# CHECK-NEXT: 3 3 1.00 * fcompi %st(3)
# CHECK-NEXT: 1 100 0.33 * fcos
# CHECK-NEXT: 1 1 1.00 * fdecstp
-# CHECK-NEXT: 1 24 1.00 * fdiv %st(0), %st(1)
-# CHECK-NEXT: 1 24 1.00 * fdiv %st(2)
+# CHECK-NEXT: 1 14 14.00 * fdiv %st(0), %st(1)
+# CHECK-NEXT: 1 14 14.00 * fdiv %st(2)
# CHECK-NEXT: 2 31 1.00 * * fdivs (%ecx)
# CHECK-NEXT: 2 31 1.00 * * fdivl (%eax)
-# CHECK-NEXT: 1 24 1.00 * fdivp %st(1)
-# CHECK-NEXT: 1 24 1.00 * fdivp %st(2)
+# CHECK-NEXT: 1 14 14.00 * fdivp %st(1)
+# CHECK-NEXT: 1 14 14.00 * fdivp %st(2)
# CHECK-NEXT: 3 34 1.00 * * fidivs (%ecx)
# CHECK-NEXT: 3 34 1.00 * * fidivl (%eax)
-# CHECK-NEXT: 1 24 1.00 * fdivr %st(0), %st(1)
-# CHECK-NEXT: 1 24 1.00 * fdivr %st(2)
+# CHECK-NEXT: 1 14 14.00 * fdivr %st(0), %st(1)
+# CHECK-NEXT: 1 14 14.00 * fdivr %st(2)
# CHECK-NEXT: 2 31 1.00 * * fdivrs (%ecx)
# CHECK-NEXT: 2 31 1.00 * * fdivrl (%eax)
-# CHECK-NEXT: 1 24 1.00 * fdivrp %st(1)
-# CHECK-NEXT: 1 24 1.00 * fdivrp %st(2)
+# CHECK-NEXT: 1 14 14.00 * fdivrp %st(1)
+# CHECK-NEXT: 1 14 14.00 * fdivrp %st(2)
# CHECK-NEXT: 3 34 1.00 * * fidivrs (%ecx)
# CHECK-NEXT: 3 34 1.00 * * fidivrl (%eax)
# CHECK-NEXT: 1 1 1.00 * ffree %st(0)
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1]
-# CHECK-NEXT: - 24.00 48.33 87.33 17.00 54.33 34.00 34.00
+# CHECK-NEXT: - 136.00 48.33 87.33 17.00 54.33 34.00 34.00
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
# CHECK-NEXT: - - 1.00 1.00 - 1.00 - - fcompi %st(3)
# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - fcos
# CHECK-NEXT: - - - - - 1.00 - - fdecstp
-# CHECK-NEXT: - - 1.00 - - - - - fdiv %st(0), %st(1)
-# CHECK-NEXT: - - 1.00 - - - - - fdiv %st(2)
+# CHECK-NEXT: - 14.00 1.00 - - - - - fdiv %st(0), %st(1)
+# CHECK-NEXT: - 14.00 1.00 - - - - - fdiv %st(2)
# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 fdivs (%ecx)
# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 fdivl (%eax)
-# CHECK-NEXT: - - 1.00 - - - - - fdivp %st(1)
-# CHECK-NEXT: - - 1.00 - - - - - fdivp %st(2)
+# CHECK-NEXT: - 14.00 1.00 - - - - - fdivp %st(1)
+# CHECK-NEXT: - 14.00 1.00 - - - - - fdivp %st(2)
# CHECK-NEXT: - - 1.00 1.00 - - 0.50 0.50 fidivs (%ecx)
# CHECK-NEXT: - - 1.00 1.00 - - 0.50 0.50 fidivl (%eax)
-# CHECK-NEXT: - - 1.00 - - - - - fdivr %st(0), %st(1)
-# CHECK-NEXT: - - 1.00 - - - - - fdivr %st(2)
+# CHECK-NEXT: - 14.00 1.00 - - - - - fdivr %st(0), %st(1)
+# CHECK-NEXT: - 14.00 1.00 - - - - - fdivr %st(2)
# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 fdivrs (%ecx)
# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 fdivrl (%eax)
-# CHECK-NEXT: - - 1.00 - - - - - fdivrp %st(1)
-# CHECK-NEXT: - - 1.00 - - - - - fdivrp %st(2)
+# CHECK-NEXT: - 14.00 1.00 - - - - - fdivrp %st(1)
+# CHECK-NEXT: - 14.00 1.00 - - - - - fdivrp %st(2)
# CHECK-NEXT: - - 1.00 1.00 - - 0.50 0.50 fidivrs (%ecx)
# CHECK-NEXT: - - 1.00 1.00 - - 0.50 0.50 fidivrl (%eax)
# CHECK-NEXT: - - - - - 1.00 - - ffree %st(0)