let Predicates = [HasAVX512] in {
let ExeDomain = SSEPackedSingle in
defm VCMPSSZ : avx512_cmp_scalar<f32x_info, X86cmpms, X86cmpmsRnd,
- WriteFCmp>, AVX512XSIi8Base;
+ SchedWriteFCmp.Scl>, AVX512XSIi8Base;
let ExeDomain = SSEPackedDouble in
defm VCMPSDZ : avx512_cmp_scalar<f64x_info, X86cmpms, X86cmpmsRnd,
- WriteFCmp>, AVX512XDIi8Base, VEX_W;
+ SchedWriteFCmp.Scl>, AVX512XDIi8Base, VEX_W;
}
multiclass avx512_icmp_packed<bits<8> opc, string OpcodeStr, PatFrag OpNode,
}
}
-multiclass avx512_vcmp<X86FoldableSchedWrite sched, AVX512VLVectorVTInfo _> {
+multiclass avx512_vcmp<X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> {
let Predicates = [HasAVX512] in {
- defm Z : avx512_vcmp_common<sched, _.info512>,
- avx512_vcmp_sae<sched, _.info512>, EVEX_V512;
+ defm Z : avx512_vcmp_common<sched.ZMM, _.info512>,
+ avx512_vcmp_sae<sched.ZMM, _.info512>, EVEX_V512;
}
let Predicates = [HasAVX512,HasVLX] in {
- defm Z128 : avx512_vcmp_common<sched, _.info128>, EVEX_V128;
- defm Z256 : avx512_vcmp_common<sched, _.info256>, EVEX_V256;
+ defm Z128 : avx512_vcmp_common<sched.XMM, _.info128>, EVEX_V128;
+ defm Z256 : avx512_vcmp_common<sched.YMM, _.info256>, EVEX_V256;
}
}
-defm VCMPPD : avx512_vcmp<WriteFCmp, avx512vl_f64_info>,
+defm VCMPPD : avx512_vcmp<SchedWriteFCmp, avx512vl_f64_info>,
AVX512PDIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
-defm VCMPPS : avx512_vcmp<WriteFCmp, avx512vl_f32_info>,
+defm VCMPPS : avx512_vcmp<SchedWriteFCmp, avx512vl_f32_info>,
AVX512PSIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
-
// Patterns to select fp compares with load as first operand.
let Predicates = [HasAVX512] in {
def : Pat<(v1i1 (X86cmpms (loadf64 addr:$src2), FR64X:$src1,
}
}
defm VMINCSSZ : avx512_comutable_binop_s<0x5D, "vminss", f32x_info, X86fminc,
- WriteFCmp>, XS, EVEX_4V, VEX_LIG,
- EVEX_CD8<32, CD8VT1>;
+ SchedWriteFCmp.Scl>, XS, EVEX_4V,
+ VEX_LIG, EVEX_CD8<32, CD8VT1>;
defm VMINCSDZ : avx512_comutable_binop_s<0x5D, "vminsd", f64x_info, X86fminc,
- WriteFCmp>, XD, VEX_W, EVEX_4V, VEX_LIG,
- EVEX_CD8<64, CD8VT1>;
+ SchedWriteFCmp.Scl>, XD, VEX_W, EVEX_4V,
+ VEX_LIG, EVEX_CD8<64, CD8VT1>;
defm VMAXCSSZ : avx512_comutable_binop_s<0x5F, "vmaxss", f32x_info, X86fmaxc,
- WriteFCmp>, XS, EVEX_4V, VEX_LIG,
- EVEX_CD8<32, CD8VT1>;
+ SchedWriteFCmp.Scl>, XS, EVEX_4V,
+ VEX_LIG, EVEX_CD8<32, CD8VT1>;
defm VMAXCSDZ : avx512_comutable_binop_s<0x5F, "vmaxsd", f64x_info, X86fmaxc,
- WriteFCmp>, XD, VEX_W, EVEX_4V, VEX_LIG,
- EVEX_CD8<64, CD8VT1>;
+ SchedWriteFCmp.Scl>, XD, VEX_W, EVEX_4V,
+ VEX_LIG, EVEX_CD8<64, CD8VT1>;
multiclass avx512_fp_packed<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
X86VectorVTInfo _, X86FoldableSchedWrite sched,
defm VCMPSS : sse12_cmp_scalar<FR32, f32mem, AVXCC, X86cmps, f32, loadf32,
"cmp${cc}ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
"cmpss\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}",
- WriteFCmp>, XS, VEX_4V, VEX_LIG, VEX_WIG;
+ SchedWriteFCmp.Scl>, XS, VEX_4V, VEX_LIG, VEX_WIG;
let ExeDomain = SSEPackedDouble in
defm VCMPSD : sse12_cmp_scalar<FR64, f64mem, AVXCC, X86cmps, f64, loadf64,
"cmp${cc}sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
"cmpsd\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}",
- WriteFCmp>, // same latency as 32 bit compare
+ SchedWriteFCmp.Scl>, // same latency as 32 bit compare
XD, VEX_4V, VEX_LIG, VEX_WIG;
let Constraints = "$src1 = $dst" in {
let ExeDomain = SSEPackedSingle in
defm CMPSS : sse12_cmp_scalar<FR32, f32mem, SSECC, X86cmps, f32, loadf32,
"cmp${cc}ss\t{$src2, $dst|$dst, $src2}",
- "cmpss\t{$cc, $src2, $dst|$dst, $src2, $cc}", WriteFCmp>, XS;
+ "cmpss\t{$cc, $src2, $dst|$dst, $src2, $cc}",
+ SchedWriteFCmp.Scl>, XS;
let ExeDomain = SSEPackedDouble in
defm CMPSD : sse12_cmp_scalar<FR64, f64mem, SSECC, X86cmps, f64, loadf64,
"cmp${cc}sd\t{$src2, $dst|$dst, $src2}",
- "cmpsd\t{$cc, $src2, $dst|$dst, $src2, $cc}", WriteFCmp>, XD;
+ "cmpsd\t{$cc, $src2, $dst|$dst, $src2, $cc}",
+ SchedWriteFCmp.Scl>, XD;
}
multiclass sse12_cmp_scalar_int<Operand memop, Operand CC,
let ExeDomain = SSEPackedSingle in
defm VCMPSS : sse12_cmp_scalar_int<ssmem, AVXCC, int_x86_sse_cmp_ss,
"cmp${cc}ss\t{$src, $src1, $dst|$dst, $src1, $src}",
- WriteFCmp, sse_load_f32>, XS, VEX_4V;
+ SchedWriteFCmp.Scl, sse_load_f32>, XS, VEX_4V;
let ExeDomain = SSEPackedDouble in
defm VCMPSD : sse12_cmp_scalar_int<sdmem, AVXCC, int_x86_sse2_cmp_sd,
"cmp${cc}sd\t{$src, $src1, $dst|$dst, $src1, $src}",
- WriteFCmp, sse_load_f64>, // same latency as f32
+ SchedWriteFCmp.Scl, sse_load_f64>, // same latency as f32
XD, VEX_4V;
let Constraints = "$src1 = $dst" in {
let ExeDomain = SSEPackedSingle in
defm CMPSS : sse12_cmp_scalar_int<ssmem, SSECC, int_x86_sse_cmp_ss,
"cmp${cc}ss\t{$src, $dst|$dst, $src}",
- WriteFCmp, sse_load_f32>, XS;
+ SchedWriteFCmp.Scl, sse_load_f32>, XS;
let ExeDomain = SSEPackedDouble in
defm CMPSD : sse12_cmp_scalar_int<sdmem, SSECC, int_x86_sse2_cmp_sd,
"cmp${cc}sd\t{$src, $dst|$dst, $src}",
- WriteFCmp, sse_load_f64>, XD;
+ SchedWriteFCmp.Scl, sse_load_f64>, XD;
}
}
defm VCMPPS : sse12_cmp_packed<VR128, f128mem, AVXCC, v4f32,
"cmp${cc}ps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
"cmpps\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}",
- WriteFCmp, SSEPackedSingle, loadv4f32>, PS, VEX_4V, VEX_WIG;
+ SchedWriteFCmp.XMM, SSEPackedSingle, loadv4f32>, PS, VEX_4V, VEX_WIG;
defm VCMPPD : sse12_cmp_packed<VR128, f128mem, AVXCC, v2f64,
"cmp${cc}pd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
"cmppd\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}",
- WriteFCmp, SSEPackedDouble, loadv2f64>, PD, VEX_4V, VEX_WIG;
+ SchedWriteFCmp.XMM, SSEPackedDouble, loadv2f64>, PD, VEX_4V, VEX_WIG;
defm VCMPPSY : sse12_cmp_packed<VR256, f256mem, AVXCC, v8f32,
"cmp${cc}ps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
"cmpps\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}",
- WriteFCmp, SSEPackedSingle, loadv8f32>, PS, VEX_4V, VEX_L, VEX_WIG;
+ SchedWriteFCmp.YMM, SSEPackedSingle, loadv8f32>, PS, VEX_4V, VEX_L, VEX_WIG;
defm VCMPPDY : sse12_cmp_packed<VR256, f256mem, AVXCC, v4f64,
"cmp${cc}pd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
"cmppd\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}",
- WriteFCmp, SSEPackedDouble, loadv4f64>, PD, VEX_4V, VEX_L, VEX_WIG;
+ SchedWriteFCmp.YMM, SSEPackedDouble, loadv4f64>, PD, VEX_4V, VEX_L, VEX_WIG;
let Constraints = "$src1 = $dst" in {
defm CMPPS : sse12_cmp_packed<VR128, f128mem, SSECC, v4f32,
"cmp${cc}ps\t{$src2, $dst|$dst, $src2}",
"cmpps\t{$cc, $src2, $dst|$dst, $src2, $cc}",
- WriteFCmp, SSEPackedSingle, memopv4f32>, PS;
+ SchedWriteFCmp.XMM, SSEPackedSingle, memopv4f32>, PS;
defm CMPPD : sse12_cmp_packed<VR128, f128mem, SSECC, v2f64,
"cmp${cc}pd\t{$src2, $dst|$dst, $src2}",
"cmppd\t{$cc, $src2, $dst|$dst, $src2, $cc}",
- WriteFCmp, SSEPackedDouble, memopv2f64>, PD;
+ SchedWriteFCmp.XMM, SSEPackedDouble, memopv2f64>, PD;
}
def CommutableCMPCC : PatLeaf<(imm), [{
defm : BWWriteResPair<WriteFAdd, [BWPort1], 3, [1], 1, 5>; // Floating point add/sub.
defm : BWWriteResPair<WriteFAddY, [BWPort1], 3, [1], 1, 7>; // Floating point add/sub (YMM/ZMM).
-defm : BWWriteResPair<WriteFCmp, [BWPort1], 3>; // Floating point compare.
+defm : BWWriteResPair<WriteFCmp, [BWPort1], 3, [1], 1, 5>; // Floating point compare.
+defm : BWWriteResPair<WriteFCmpY, [BWPort1], 3, [1], 1, 7>; // Floating point compare (YMM/ZMM).
defm : BWWriteResPair<WriteFCom, [BWPort1], 3>; // Floating point compare to flags.
defm : BWWriteResPair<WriteFMul, [BWPort0], 5>; // Floating point multiplication.
defm : BWWriteResPair<WriteFDiv, [BWPort0], 12>; // 10-14 cycles. // Floating point division.
defm : HWWriteResPair<WriteFAdd, [HWPort1], 3, [1], 1, 5>;
defm : HWWriteResPair<WriteFAddY, [HWPort1], 3, [1], 1, 7>;
defm : HWWriteResPair<WriteFCmp, [HWPort1], 3, [1], 1, 6>;
+defm : HWWriteResPair<WriteFCmpY, [HWPort1], 3, [1], 1, 7>;
defm : HWWriteResPair<WriteFCom, [HWPort1], 3>;
defm : HWWriteResPair<WriteFMul, [HWPort0], 5>;
defm : HWWriteResPair<WriteFDiv, [HWPort0], 12>; // 10-14 cycles.
}
def: InstRW<[HWWriteResGroup52_1], (instregex "(ADD|SUB|SUBR)_F(32|64)m",
"ILD_F(16|32|64)m",
- "VCMPPDYrmi",
- "VCMPPSYrmi",
"VCVTDQ2PSYrm",
"VCVTPS2DQYrm",
- "VCVTTPS2DQYrm",
- "VMAX(C?)PDYrm",
- "VMAX(C?)PSYrm",
- "VMIN(C?)PDYrm",
- "VMIN(C?)PSYrm")>;
+ "VCVTTPS2DQYrm")>;
def HWWriteResGroup53 : SchedWriteRes<[HWPort5,HWPort23]> {
let Latency = 10;
defm : SBWriteResPair<WriteFAdd, [SBPort1], 3, [1], 1, 5>;
defm : SBWriteResPair<WriteFAddY, [SBPort1], 3, [1], 1, 7>;
defm : SBWriteResPair<WriteFCmp, [SBPort1], 3, [1], 1, 6>;
+defm : SBWriteResPair<WriteFCmpY, [SBPort1], 3, [1], 1, 7>;
defm : SBWriteResPair<WriteFCom, [SBPort1], 3>;
defm : SBWriteResPair<WriteFMul, [SBPort0], 5, [1], 1, 6>;
defm : SBWriteResPair<WriteFDiv, [SBPort0], 24>;
}
def: InstRW<[SBWriteResGroup101], (instregex "(ADD|SUB|SUBR)_F(32|64)m",
"ILD_F(16|32|64)m",
- "VCMPPDYrmi",
- "VCMPPSYrmi",
"VCVTDQ2PSYrm",
"VCVTPS2DQYrm",
- "VCVTTPS2DQYrm",
- "VMAX(C?)PDYrm",
- "VMAX(C?)PSYrm",
- "VMIN(C?)PDYrm",
- "VMIN(C?)PSYrm")>;
+ "VCVTTPS2DQYrm")>;
def SBWriteResGroup102 : SchedWriteRes<[SBPort0,SBPort1,SBPort23]> {
let Latency = 10;
defm : SKLWriteResPair<WriteFAdd, [SKLPort1], 3, [1], 1, 5>; // Floating point add/sub.
defm : SKLWriteResPair<WriteFAddY, [SKLPort1], 3, [1], 1, 7>; // Floating point add/sub (YMM/ZMM).
defm : SKLWriteResPair<WriteFCmp, [SKLPort01], 4, [1], 1, 6>; // Floating point compare.
+defm : SKLWriteResPair<WriteFCmpY, [SKLPort01], 4, [1], 1, 7>; // Floating point compare (YMM/ZMM).
defm : SKLWriteResPair<WriteFCom, [SKLPort0], 2>; // Floating point compare to flags.
defm : SKLWriteResPair<WriteFMul, [SKLPort0], 5>; // Floating point multiplication.
defm : SKLWriteResPair<WriteFDiv, [SKLPort0], 12>; // 10-14 cycles. // Floating point division.
"VADDPSYrm",
"VADDSUBPDYrm",
"VADDSUBPSYrm",
- "VCMPPDYrmi",
- "VCMPPSYrmi",
"VCVTDQ2PSYrm",
"VCVTPS2DQYrm",
"VCVTPS2PDYrm",
"VCVTTPS2DQYrm",
- "VMAX(C?)PDYrm",
- "VMAX(C?)PSYrm",
- "VMIN(C?)PDYrm",
- "VMIN(C?)PSYrm",
"VMULPDYrm",
"VMULPSYrm",
"VPMADDUBSWYrm",
defm : SKXWriteResPair<WriteFAdd, [SKXPort015], 4, [1], 1, 6>; // Floating point add/sub.
defm : SKXWriteResPair<WriteFAddY,[SKXPort015], 4, [1], 1, 7>; // Floating point add/sub (YMM/ZMM).
defm : SKXWriteResPair<WriteFCmp, [SKXPort015], 4, [1], 1, 6>; // Floating point compare.
+defm : SKXWriteResPair<WriteFCmpY,[SKXPort015], 4, [1], 1, 7>; // Floating point compare (YMM/ZMM).
defm : SKXWriteResPair<WriteFCom, [SKXPort0], 2>; // Floating point compare to flags.
defm : SKXWriteResPair<WriteFMul, [SKXPort015], 4, [1], 1, 6>; // Floating point multiplication.
defm : SKXWriteResPair<WriteFDiv, [SKXPort0], 12>; // 10-14 cycles. // Floating point division.
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[SKXWriteResGroup161], (instregex "VCMPPDYrmi",
- "VCMPPSYrmi",
- "VCVTDQ2PDZ256rm(b?)",
+def: InstRW<[SKXWriteResGroup161], (instregex "VCVTDQ2PDZ256rm(b?)",
"VCVTDQ2PDZrm(b?)",
"VCVTDQ2PSYrm",
"VCVTDQ2PSZ256rm(b?)",
"VCVTUQQ2PDZ256rm(b?)",
"VCVTUQQ2PDZrm(b?)",
"VCVTUQQ2PSZ256rm(b?)",
- "VMAX(C?)PDYrm",
- "VMAX(C?)PDZ256rm(b?)",
- "VMAX(C?)PDZrm(b?)",
- "VMAX(C?)PSYrm",
- "VMAX(C?)PSZ256rm(b?)",
- "VMAX(C?)PSZrm(b?)",
- "VMIN(C?)PDYrm",
- "VMIN(C?)PDZ256rm(b?)",
- "VMIN(C?)PDZrm(b?)",
- "VMIN(C?)PSYrm",
- "VMIN(C?)PSZ256rm(b?)",
- "VMIN(C?)PSZrm(b?)",
"VMULPDYrm",
"VMULPDZ256rm(b?)",
"VMULPDZrm(b?)",
defm WriteFAdd : X86SchedWritePair; // Floating point add/sub.
defm WriteFAddY : X86SchedWritePair; // Floating point add/sub (YMM/ZMM).
defm WriteFCmp : X86SchedWritePair; // Floating point compare.
+defm WriteFCmpY : X86SchedWritePair; // Floating point compare (YMM/ZMM).
defm WriteFCom : X86SchedWritePair; // Floating point compare to flags.
defm WriteFMul : X86SchedWritePair; // Floating point multiplication.
defm WriteFDiv : X86SchedWritePair; // Floating point division.
def SchedWriteFAdd
: X86SchedWriteWidths<WriteFAdd, WriteFAdd, WriteFAddY, WriteFAddY>;
def SchedWriteFCmp
- : X86SchedWriteWidths<WriteFCmp, WriteFCmp, WriteFCmp, WriteFCmp>;
+ : X86SchedWriteWidths<WriteFCmp, WriteFCmp, WriteFCmpY, WriteFCmpY>;
def SchedWriteFMul
: X86SchedWriteWidths<WriteFMul, WriteFMul, WriteFMul, WriteFMul>;
def SchedWriteFDiv
defm : AtomWriteResPair<WriteFAdd, [AtomPort0], [AtomPort0], 5, 5, [5], [5]>;
defm : AtomWriteResPair<WriteFAddY, [AtomPort0], [AtomPort0], 5, 5, [5], [5]>;
defm : AtomWriteResPair<WriteFCmp, [AtomPort0], [AtomPort0], 5, 5, [5], [5]>;
+defm : AtomWriteResPair<WriteFCmpY, [AtomPort0], [AtomPort0], 5, 5, [5], [5]>;
defm : AtomWriteResPair<WriteFCom, [AtomPort0], [AtomPort0], 5, 5, [5], [5]>;
defm : AtomWriteResPair<WriteFMul, [AtomPort0], [AtomPort0], 4, 4, [4], [4]>;
defm : AtomWriteResPair<WriteFRcp, [AtomPort0], [AtomPort0], 4, 4, [4], [4]>;
defm : JWriteResFpuPair<WriteFAdd, [JFPU0, JFPA], 3>;
defm : JWriteResYMMPair<WriteFAddY, [JFPU0, JFPA], 3, [2,2], 2>;
defm : JWriteResFpuPair<WriteFCmp, [JFPU0, JFPA], 2>;
+defm : JWriteResYMMPair<WriteFCmpY, [JFPU0, JFPA], 2, [2,2], 2>;
defm : JWriteResFpuPair<WriteFCom, [JFPU0, JFPA, JALU0], 3>;
defm : JWriteResFpuPair<WriteFMul, [JFPU1, JFPM], 2>;
defm : JWriteResFpuPair<WriteFMA, [JFPU1, JFPM], 2>; // NOTE: Doesn't exist on Jaguar.
}
def : InstRW<[JWriteVMOVNTPYSt], (instrs VMOVNTDQYmr, VMOVNTPDYmr, VMOVNTPSYmr)>;
-def JWriteFCmpY: SchedWriteRes<[JFPU0, JFPA]> {
- let Latency = 2;
- let ResourceCycles = [2, 2];
- let NumMicroOps = 2;
-}
-def : InstRW<[JWriteFCmpY], (instregex "VCMPP(S|D)Yrri", "VM(AX|IN)P(D|S)Yrr")>;
-
-def JWriteFCmpYLd: SchedWriteRes<[JLAGU, JFPU0, JFPA]> {
- let Latency = 7;
- let ResourceCycles = [2, 2, 2];
- let NumMicroOps = 2;
-}
-def : InstRW<[JWriteFCmpYLd, ReadAfterLd], (instregex "VCMPP(S|D)Yrmi", "VM(AX|IN)P(D|S)Yrm")>;
-
def JWriteVCVTPDY: SchedWriteRes<[JFPU1, JSTC, JFPX]> {
let Latency = 6;
let ResourceCycles = [2, 2, 4];
defm : SLMWriteResPair<WriteFAdd, [SLM_FPC_RSV1], 3>;
defm : SLMWriteResPair<WriteFAddY, [SLM_FPC_RSV1], 3>;
defm : SLMWriteResPair<WriteFCmp, [SLM_FPC_RSV1], 3>;
+defm : SLMWriteResPair<WriteFCmpY, [SLM_FPC_RSV1], 3>;
defm : SLMWriteResPair<WriteFCom, [SLM_FPC_RSV1], 3>;
defm : SLMWriteResPair<WriteFMul, [SLM_FPC_RSV0, SLMFPMultiplier], 5, [1,2]>;
defm : SLMWriteResPair<WriteFDiv, [SLM_FPC_RSV0, SLMFPDivider], 34, [1,34]>;
defm : ZnWriteResFpuPair<WriteFAdd, [ZnFPU0], 3>;
defm : ZnWriteResFpuPair<WriteFAddY, [ZnFPU0], 3>;
defm : ZnWriteResFpuPair<WriteFCmp, [ZnFPU0], 3>;
+defm : ZnWriteResFpuPair<WriteFCmpY, [ZnFPU0], 3>;
defm : ZnWriteResFpuPair<WriteFCom, [ZnFPU0], 3>;
defm : ZnWriteResFpuPair<WriteFBlend, [ZnFPU01], 1>;
defm : ZnWriteResFpuPair<WriteFBlendY, [ZnFPU01], 1>;