let Predicates = [HasAVX512] in
defm VCVTPH2PSZ : avx512_cvtph2ps<v16f32_info, v16i16x_info, f256mem, loadv4i64,
- WriteCvtF2F>,
+ WriteCvtPH2PSY>,
avx512_cvtph2ps_sae<v16f32_info, v16i16x_info, WriteCvtF2F>,
EVEX, EVEX_V512, EVEX_CD8<32, CD8VH>;
let Predicates = [HasVLX] in {
defm VCVTPH2PSZ256 : avx512_cvtph2ps<v8f32x_info, v8i16x_info, f128mem,
- loadv2i64, WriteCvtF2F>, EVEX, EVEX_V256,
+ loadv2i64, WriteCvtPH2PSY>, EVEX, EVEX_V256,
EVEX_CD8<32, CD8VH>;
defm VCVTPH2PSZ128 : avx512_cvtph2ps<v4f32x_info, v8i16x_info, f64mem,
- loadv2i64, WriteCvtF2F>, EVEX, EVEX_V128,
+ loadv2i64, WriteCvtPH2PS>, EVEX, EVEX_V128,
EVEX_CD8<32, CD8VH>;
// Pattern match vcvtph2ps of a scalar i64 load.
}
multiclass avx512_cvtps2ph<X86VectorVTInfo _dest, X86VectorVTInfo _src,
- X86MemOperand x86memop> {
+ X86MemOperand x86memop, SchedWrite RR, SchedWrite MR> {
defm rr : AVX512_maskable<0x1D, MRMDestReg, _dest ,(outs _dest.RC:$dst),
(ins _src.RC:$src1, i32u8imm:$src2),
"vcvtps2ph", "$src2, $src1", "$src1, $src2",
(X86cvtps2ph (_src.VT _src.RC:$src1),
(i32 imm:$src2)), 0, 0>,
- AVX512AIi8Base, Sched<[WriteCvtF2F]>;
+ AVX512AIi8Base, Sched<[RR]>;
let hasSideEffects = 0, mayStore = 1 in {
def mr : AVX512AIi8<0x1D, MRMDestMem, (outs),
(ins x86memop:$dst, _src.RC:$src1, i32u8imm:$src2),
"vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
- Sched<[WriteCvtF2FSt]>;
+ Sched<[MR]>;
def mrk : AVX512AIi8<0x1D, MRMDestMem, (outs),
(ins x86memop:$dst, _dest.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2),
"vcvtps2ph\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}", []>,
- EVEX_K, Sched<[WriteCvtF2FSt]>;
+ EVEX_K, Sched<[MR]>;
}
}
-multiclass avx512_cvtps2ph_sae<X86VectorVTInfo _dest, X86VectorVTInfo _src> {
+multiclass avx512_cvtps2ph_sae<X86VectorVTInfo _dest, X86VectorVTInfo _src,
+ SchedWrite Sched> {
let hasSideEffects = 0 in
defm rrb : AVX512_maskable_in_asm<0x1D, MRMDestReg, _dest,
(outs _dest.RC:$dst),
(ins _src.RC:$src1, i32u8imm:$src2),
"vcvtps2ph", "$src2, {sae}, $src1", "$src1, {sae}, $src2", []>,
- EVEX_B, AVX512AIi8Base, Sched<[WriteCvtF2F]>;
+ EVEX_B, AVX512AIi8Base, Sched<[Sched]>;
}
let Predicates = [HasAVX512] in {
- defm VCVTPS2PHZ : avx512_cvtps2ph<v16i16x_info, v16f32_info, f256mem>,
- avx512_cvtps2ph_sae<v16i16x_info, v16f32_info>,
+ defm VCVTPS2PHZ : avx512_cvtps2ph<v16i16x_info, v16f32_info, f256mem,
+ WriteCvtPS2PHY, WriteCvtPS2PHYSt>,
+ avx512_cvtps2ph_sae<v16i16x_info, v16f32_info, WriteCvtPS2PH>,
EVEX, EVEX_V512, EVEX_CD8<32, CD8VH>;
let Predicates = [HasVLX] in {
- defm VCVTPS2PHZ256 : avx512_cvtps2ph<v8i16x_info, v8f32x_info, f128mem>,
+ defm VCVTPS2PHZ256 : avx512_cvtps2ph<v8i16x_info, v8f32x_info, f128mem,
+ WriteCvtPS2PHY, WriteCvtPS2PHYSt>,
EVEX, EVEX_V256, EVEX_CD8<32, CD8VH>;
- defm VCVTPS2PHZ128 : avx512_cvtps2ph<v8i16x_info, v4f32x_info, f64mem>,
+ defm VCVTPS2PHZ128 : avx512_cvtps2ph<v8i16x_info, v4f32x_info, f64mem,
+ WriteCvtPS2PH, WriteCvtPS2PHSt>,
EVEX, EVEX_V128, EVEX_CD8<32, CD8VH>;
}
// Half precision conversion instructions
//
-multiclass f16c_ph2ps<RegisterClass RC, X86MemOperand x86memop> {
+multiclass f16c_ph2ps<RegisterClass RC, X86MemOperand x86memop,
+ X86FoldableSchedWrite sched> {
def rr : I<0x13, MRMSrcReg, (outs RC:$dst), (ins VR128:$src),
"vcvtph2ps\t{$src, $dst|$dst, $src}",
[(set RC:$dst, (X86cvtph2ps VR128:$src))]>,
- T8PD, VEX, Sched<[WriteCvtF2F]>;
+ T8PD, VEX, Sched<[sched]>;
let hasSideEffects = 0, mayLoad = 1 in
def rm : I<0x13, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
"vcvtph2ps\t{$src, $dst|$dst, $src}",
[(set RC:$dst, (X86cvtph2ps (bc_v8i16
(loadv2i64 addr:$src))))]>,
- T8PD, VEX, Sched<[WriteCvtF2FLd]>;
+ T8PD, VEX, Sched<[sched.Folded]>;
}
-multiclass f16c_ps2ph<RegisterClass RC, X86MemOperand x86memop> {
+multiclass f16c_ps2ph<RegisterClass RC, X86MemOperand x86memop,
+ SchedWrite RR, SchedWrite MR> {
def rr : Ii8<0x1D, MRMDestReg, (outs VR128:$dst),
(ins RC:$src1, i32u8imm:$src2),
"vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR128:$dst, (X86cvtps2ph RC:$src1, imm:$src2))]>,
- TAPD, VEX, Sched<[WriteCvtF2F]>;
+ TAPD, VEX, Sched<[RR]>;
let hasSideEffects = 0, mayStore = 1 in
def mr : Ii8<0x1D, MRMDestMem, (outs),
(ins x86memop:$dst, RC:$src1, i32u8imm:$src2),
"vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
- TAPD, VEX, Sched<[WriteCvtF2FSt]>;
+ TAPD, VEX, Sched<[MR]>;
}
let Predicates = [HasF16C, NoVLX] in {
- defm VCVTPH2PS : f16c_ph2ps<VR128, f64mem>;
- defm VCVTPH2PSY : f16c_ph2ps<VR256, f128mem>, VEX_L;
- defm VCVTPS2PH : f16c_ps2ph<VR128, f64mem>;
- defm VCVTPS2PHY : f16c_ps2ph<VR256, f128mem>, VEX_L;
+ defm VCVTPH2PS : f16c_ph2ps<VR128, f64mem, WriteCvtPH2PS>;
+ defm VCVTPH2PSY : f16c_ph2ps<VR256, f128mem, WriteCvtPH2PSY>, VEX_L;
+ defm VCVTPS2PH : f16c_ps2ph<VR128, f64mem, WriteCvtPS2PH,
+ WriteCvtPS2PHSt>;
+ defm VCVTPS2PHY : f16c_ps2ph<VR256, f128mem, WriteCvtPS2PHY,
+ WriteCvtPS2PHYSt>, VEX_L;
// Pattern match vcvtph2ps of a scalar i64 load.
def : Pat<(v4f32 (X86cvtph2ps (v8i16 (vzmovl_v2i64 addr:$src)))),
defm : BWWriteResPair<WriteFVarBlend, [BWPort5], 2, [2], 2, 5>; // Fp vector variable blends.
defm : BWWriteResPair<WriteFVarBlendY, [BWPort5], 2, [2], 2, 6>; // Fp vector variable blends.
-def : WriteRes<WriteCvtF2FSt, [BWPort1,BWPort4,BWPort237]> {
- let Latency = 4;
- let NumMicroOps = 3;
- let ResourceCycles = [1,1,1];
-}
-
// FMA Scheduling helper class.
// class FMASC { X86FoldableSchedWrite Sched = WriteFAdd; }
defm : BWWriteResPair<WriteCvtI2F, [BWPort1], 4>; // Integer -> Float.
defm : BWWriteResPair<WriteCvtF2F, [BWPort1], 3>; // Float -> Float size conversion.
+defm : X86WriteRes<WriteCvtPH2PS, [BWPort0,BWPort5], 2, [1,1], 2>;
+defm : X86WriteRes<WriteCvtPH2PSY, [BWPort0,BWPort5], 2, [1,1], 2>;
+defm : X86WriteRes<WriteCvtPH2PSLd, [BWPort0,BWPort23], 6, [1,1], 2>;
+defm : X86WriteRes<WriteCvtPH2PSYLd, [BWPort0,BWPort23], 6, [1,1], 2>;
+
+defm : X86WriteRes<WriteCvtPS2PH, [BWPort1,BWPort5], 4, [1,1], 2>;
+defm : X86WriteRes<WriteCvtPS2PHY, [BWPort1,BWPort5], 6, [1,1], 2>;
+defm : X86WriteRes<WriteCvtPS2PHSt, [BWPort1,BWPort4,BWPort237], 5, [1,1,1], 3>;
+defm : X86WriteRes<WriteCvtPS2PHYSt, [BWPort1,BWPort4,BWPort237], 7, [1,1,1], 3>;
+
// Strings instructions.
// Packed Compare Implicit Length Strings, Return Mask
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[BWWriteResGroup15], (instregex "VCVTPH2PS(Y?)rr",
- "(V?)CVTPS2PDrr",
+def: InstRW<[BWWriteResGroup15], (instregex "(V?)CVTPS2PDrr",
"(V?)CVTSS2SDrr")>;
def BWWriteResGroup16 : SchedWriteRes<[BWPort6,BWPort0156]> {
"MMX_CVT(T?)PS2PIirr",
"(V?)CVTDQ2PDrr",
"(V?)CVTPD2PSrr",
- "VCVTPS2PHrr",
"(V?)CVTSD2SSrr",
"(V?)CVTSI642SDrr",
"(V?)CVTSI2SDrr",
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[BWWriteResGroup59], (instregex "VCVTPH2PS(Y?)rm",
- "(V?)CVTPS2PDrm",
+def: InstRW<[BWWriteResGroup59], (instregex "(V?)CVTPS2PDrm",
"(V?)CVTSS2SDrm",
"VPSLLVQrm",
"VPSRLVQrm")>;
}
def: InstRW<[BWWriteResGroup60], (instregex "VCVTDQ2PDYrr",
"VCVTPD2PSYrr",
- "VCVTPS2PHYrr",
"VCVT(T?)PD2DQYrr")>;
def BWWriteResGroup62 : SchedWriteRes<[BWPort6,BWPort23]> {
defm : HWWriteResPair<WriteFVarBlend, [HWPort5], 2, [2], 2, 6>;
defm : HWWriteResPair<WriteFVarBlendY, [HWPort5], 2, [2], 2, 7>;
-def : WriteRes<WriteCvtF2FSt, [HWPort1,HWPort4,HWPort5,HWPort237]> {
- let Latency = 5;
- let NumMicroOps = 4;
- let ResourceCycles = [1,1,1,1];
-}
+defm : X86WriteRes<WriteCvtPH2PS, [HWPort0,HWPort5], 2, [1,1], 2>;
+defm : X86WriteRes<WriteCvtPH2PSY, [HWPort0,HWPort5], 2, [1,1], 2>;
+defm : X86WriteRes<WriteCvtPH2PSLd, [HWPort0,HWPort23], 6, [1,1], 2>;
+defm : X86WriteRes<WriteCvtPH2PSYLd, [HWPort0,HWPort23], 7, [1,1], 2>;
+
+defm : X86WriteRes<WriteCvtPS2PH, [HWPort1,HWPort5], 4, [1,1], 2>;
+defm : X86WriteRes<WriteCvtPS2PHY, [HWPort1,HWPort5], 6, [1,1], 2>;
+defm : X86WriteRes<WriteCvtPS2PHSt, [HWPort1,HWPort4,HWPort5,HWPort237], 5, [1,1,1,1], 4>;
+defm : X86WriteRes<WriteCvtPS2PHYSt, [HWPort1,HWPort4,HWPort5,HWPort237], 7, [1,1,1,1], 4>;
// Vector integer operations.
defm : X86WriteRes<WriteVecLoad, [HWPort23], 5, [1], 1>;
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[HWWriteResGroup11], (instregex "VCVTPH2PSrm",
- "(V?)CVTPS2PDrm")>;
+def: InstRW<[HWWriteResGroup11], (instregex "(V?)CVTPS2PDrm")>;
def HWWriteResGroup11_1 : SchedWriteRes<[HWPort0,HWPort23]> {
let Latency = 7;
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[HWWriteResGroup11_1], (instregex "VCVTPH2PSYrm",
- "(V?)CVTSS2SDrm",
+def: InstRW<[HWWriteResGroup11_1], (instregex "(V?)CVTSS2SDrm",
"VPSLLVQrm",
"VPSRLVQrm")>;
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[HWWriteResGroup31], (instregex "VCVTPH2PSYrr",
- "VCVTPH2PSrr",
- "(V?)CVTPS2PDrr",
+def: InstRW<[HWWriteResGroup31], (instregex "(V?)CVTPS2PDrr",
"(V?)CVTSS2SDrr")>;
def HWWriteResGroup32 : SchedWriteRes<[HWPort6,HWPort0156]> {
"MMX_CVT(T?)PS2PIirr",
"(V?)CVTDQ2PDrr",
"(V?)CVTPD2PSrr",
- "VCVTPS2PHrr",
"(V?)CVTSD2SSrr",
"(V?)CVTSI(64)?2SDrr",
"(V?)CVTSI2SSrr",
}
def: InstRW<[HWWriteResGroup102], (instregex "VCVTDQ2PDYrr",
"VCVTPD2PSYrr",
- "VCVTPS2PHYrr",
"VCVT(T?)PD2DQYrr")>;
def HWWriteResGroup103 : SchedWriteRes<[HWPort1,HWPort23]> {
def: InstRW<[HWWriteResGroup105], (instregex "SHLD(16|32|64)rrCL",
"SHRD(16|32|64)rrCL")>;
-def HWWriteResGroup106 : SchedWriteRes<[HWPort1,HWPort4,HWPort5,HWPort237]> {
- let Latency = 7;
- let NumMicroOps = 4;
- let ResourceCycles = [1,1,1,1];
-}
-def: InstRW<[HWWriteResGroup106], (instregex "VCVTPS2PHYmr")>;
-
def HWWriteResGroup107 : SchedWriteRes<[HWPort1,HWPort6,HWPort06,HWPort0156]> {
let Latency = 6;
let NumMicroOps = 4;
defm : SBWriteResPair<WriteFBlendY, [SBPort05], 1, [1], 1, 7>;
defm : SBWriteResPair<WriteFVarBlend, [SBPort05], 2, [2], 2, 6>;
defm : SBWriteResPair<WriteFVarBlendY,[SBPort05], 2, [2], 2, 7>;
-def : WriteRes<WriteCvtF2FSt, [SBPort1, SBPort23, SBPort4]> { let Latency = 4; }
+
+defm : SBWriteResPair<WriteCvtPH2PS, [SBPort1], 3>;
+defm : SBWriteResPair<WriteCvtPH2PSY, [SBPort1], 3>;
+
+defm : X86WriteRes<WriteCvtPS2PH, [SBPort1], 3, [1], 1>;
+defm : X86WriteRes<WriteCvtPS2PHY, [SBPort1], 3, [1], 1>;
+defm : X86WriteRes<WriteCvtPS2PHSt, [SBPort1, SBPort23, SBPort4], 4, [1,1,1], 1>;
+defm : X86WriteRes<WriteCvtPS2PHYSt, [SBPort1, SBPort23, SBPort4], 4, [1,1,1], 1>;
// Vector integer operations.
defm : X86WriteRes<WriteVecLoad, [SBPort23], 5, [1], 1>;
defm : SKLWriteResPair<WriteFVarBlend, [SKLPort015], 2, [2], 2, 6>; // Fp vector variable blends.
defm : SKLWriteResPair<WriteFVarBlendY,[SKLPort015], 2, [2], 2, 7>; // Fp vector variable blends.
-def : WriteRes<WriteCvtF2FSt, [SKLPort4,SKLPort5,SKLPort237,SKLPort01]> {
- let Latency = 6;
- let NumMicroOps = 4;
- let ResourceCycles = [1,1,1,1];
-}
-
// FMA Scheduling helper class.
// class FMASC { X86FoldableSchedWrite Sched = WriteFAdd; }
defm : SKLWriteResPair<WriteCvtI2F, [SKLPort1], 4>; // Integer -> Float.
defm : SKLWriteResPair<WriteCvtF2F, [SKLPort1], 3>; // Float -> Float size conversion.
+defm : X86WriteRes<WriteCvtPH2PS, [SKLPort5,SKLPort015], 5, [1,1], 2>;
+defm : X86WriteRes<WriteCvtPH2PSY, [SKLPort5,SKLPort01], 7, [1,1], 2>;
+defm : X86WriteRes<WriteCvtPH2PSLd, [SKLPort23,SKLPort01], 9, [1,1], 2>;
+defm : X86WriteRes<WriteCvtPH2PSYLd, [SKLPort23,SKLPort01], 10, [1,1], 2>;
+
+defm : X86WriteRes<WriteCvtPS2PH, [SKLPort5,SKLPort015], 5, [1,1], 2>;
+defm : X86WriteRes<WriteCvtPS2PHY, [SKLPort5,SKLPort01], 7, [1,1], 2>;
+defm : X86WriteRes<WriteCvtPS2PHSt, [SKLPort4,SKLPort5,SKLPort237,SKLPort01], 6, [1,1,1,1], 4>;
+defm : X86WriteRes<WriteCvtPS2PHYSt, [SKLPort4,SKLPort5,SKLPort237,SKLPort01], 8, [1,1,1,1], 4>;
+
// Strings instructions.
// Packed Compare Implicit Length Strings, Return Mask
"MMX_CVT(T?)PS2PIirr",
"(V?)CVT(T?)PD2DQrr",
"(V?)CVTPD2PSrr",
- "VCVTPH2PSrr",
"(V?)CVTPS2PDrr",
- "VCVTPS2PHrr",
"(V?)CVTSD2SSrr",
"(V?)CVTSI642SDrr",
"(V?)CVTSI2SDrr",
let ResourceCycles = [1,1];
}
def: InstRW<[SKLWriteResGroup89], (instregex "VCVTPD2PSYrr",
- "VCVTPH2PSYrr",
"VCVTPS2PDYrr",
- "VCVTPS2PHYrr",
"VCVT(T?)PD2DQYrr")>;
def SKLWriteResGroup91 : SchedWriteRes<[SKLPort23,SKLPort015]> {
}
def: InstRW<[SKLWriteResGroup112], (instregex "MMX_PH(ADD|SUB)SWrm")>;
-def SKLWriteResGroup114 : SchedWriteRes<[SKLPort4,SKLPort5,SKLPort237,SKLPort01]> {
- let Latency = 8;
- let NumMicroOps = 4;
- let ResourceCycles = [1,1,1,1];
-}
-def: InstRW<[SKLWriteResGroup114], (instregex "VCVTPS2PHYmr")>;
-
def SKLWriteResGroup115 : SchedWriteRes<[SKLPort23,SKLPort237,SKLPort06]> {
let Latency = 8;
let NumMicroOps = 5;
let ResourceCycles = [1,1];
}
def: InstRW<[SKLWriteResGroup123], (instregex "MMX_CVT(T?)PS2PIirm",
- "VCVTPH2PSrm",
"(V?)CVTPS2PDrm")>;
def SKLWriteResGroup127 : SchedWriteRes<[SKLPort1,SKLPort5,SKLPort23]> {
let ResourceCycles = [1,1];
}
def: InstRW<[SKLWriteResGroup134], (instregex "(V?)CVTDQ2PSrm",
- "(V?)CVTPH2PSYrm",
"(V?)CVTPS2DQrm",
"(V?)CVTSS2SDrm",
"(V?)CVTTPS2DQrm")>;
defm : SKXWriteResPair<WriteFVarBlend, [SKXPort015], 2, [2], 2, 6>; // Fp vector variable blends.
defm : SKXWriteResPair<WriteFVarBlendY,[SKXPort015], 2, [2], 2, 7>; // Fp vector variable blends.
-def : WriteRes<WriteCvtF2FSt, [SKXPort4,SKXPort5,SKXPort237,SKXPort015]> {
- let Latency = 6;
- let NumMicroOps = 4;
- let ResourceCycles = [1,1,1,1];
-}
-
// FMA Scheduling helper class.
// class FMASC { X86FoldableSchedWrite Sched = WriteFAdd; }
defm : SKXWriteResPair<WriteCvtI2F, [SKXPort1], 4>; // Integer -> Float.
defm : SKXWriteResPair<WriteCvtF2F, [SKXPort1], 3>; // Float -> Float size conversion.
+defm : X86WriteRes<WriteCvtPH2PS, [SKXPort5,SKXPort015], 5, [1,1], 2>;
+defm : X86WriteRes<WriteCvtPH2PSY, [SKXPort5,SKXPort015], 7, [1,1], 2>;
+defm : X86WriteRes<WriteCvtPH2PSLd, [SKXPort23,SKXPort015], 9, [1,1], 2>;
+defm : X86WriteRes<WriteCvtPH2PSYLd, [SKXPort23,SKXPort015], 10, [1,1], 2>;
+
+defm : X86WriteRes<WriteCvtPS2PH, [SKXPort5,SKXPort015], 5, [1,1], 2>;
+defm : X86WriteRes<WriteCvtPS2PHY, [SKXPort5,SKXPort015], 7, [1,1], 2>;
+defm : X86WriteRes<WriteCvtPS2PHSt, [SKXPort4,SKXPort5,SKXPort237,SKXPort015], 6, [1,1,1,1], 4>;
+defm : X86WriteRes<WriteCvtPS2PHYSt, [SKXPort4,SKXPort5,SKXPort237,SKXPort015], 8, [1,1,1,1], 4>;
+
// Strings instructions.
// Packed Compare Implicit Length Strings, Return Mask
"VCVTPD2PSZ128rr",
"(V?)CVTPD2PSrr",
"VCVTPD2UDQZ128rr",
- "VCVTPH2PSZ128rr",
- "VCVTPH2PSrr",
"VCVTPS2PDZ128rr",
"(V?)CVTPS2PDrr",
- "VCVTPS2PHZ128rr",
- "VCVTPS2PHrr",
"VCVTPS2QQZ128rr",
"VCVTPS2UQQZ128rr",
"VCVTQQ2PSZ128rr",
"VCVTPD2DQ(Y|Z|Z256)rr",
"VCVTPD2PS(Y|Z|Z256)rr",
"VCVTPD2UDQ(Z|Z256)rr",
- "VCVTPH2PS(Y|Z|Z256)rr",
"VCVTPS2PD(Y|Z|Z256)rr",
- "VCVTPS2PH(Y|Z|Z256)rr",
"VCVTPS2QQ(Z|Z256)rr",
"VCVTPS2UQQ(Z|Z256)rr",
"VCVTQQ2PS(Z|Z256)rr",
}
def: InstRW<[SKXWriteResGroup123], (instregex "MMX_PH(ADD|SUB)SWrm")>;
-def SKXWriteResGroup125 : SchedWriteRes<[SKXPort4,SKXPort5,SKXPort237,SKXPort015]> {
- let Latency = 8;
- let NumMicroOps = 4;
- let ResourceCycles = [1,1,1,1];
-}
-def: InstRW<[SKXWriteResGroup125], (instregex "VCVTPS2PHYmr")>;
-
def SKXWriteResGroup126 : SchedWriteRes<[SKXPort23,SKXPort237,SKXPort06]> {
let Latency = 8;
let NumMicroOps = 5;
let ResourceCycles = [1,1];
}
def: InstRW<[SKXWriteResGroup137], (instregex "MMX_CVT(T?)PS2PIirm",
- "VCVTPH2PSrm",
"(V?)CVTPS2PDrm")>;
def SKXWriteResGroup138 : SchedWriteRes<[SKXPort0,SKXPort015]> {
"(V?)CVTDQ2PSrm",
"VCVTPD2QQZ128rm(b?)",
"VCVTPD2UQQZ128rm(b?)",
- "VCVTPH2PSYrm",
"VCVTPH2PSZ128rm(b?)",
"VCVTPS2DQZ128rm(b?)",
"(V?)CVTPS2DQrm",
defm WriteCvtF2I : X86SchedWritePair; // Float -> Integer.
defm WriteCvtI2F : X86SchedWritePair; // Integer -> Float.
defm WriteCvtF2F : X86SchedWritePair; // Float -> Float size conversion.
-def WriteCvtF2FSt : SchedWrite; // // Float -> Float + store size conversion.
+
+defm WriteCvtPH2PS : X86SchedWritePair; // Half -> Float size conversion.
+defm WriteCvtPH2PSY : X86SchedWritePair; // Half -> Float size conversion (YMM/ZMM).
+
+def WriteCvtPS2PH : SchedWrite; // // Float -> Half size conversion.
+def WriteCvtPS2PHY : SchedWrite; // // Float -> Half size conversion (YMM/ZMM).
+def WriteCvtPS2PHSt : SchedWrite; // // Float -> Half + store size conversion.
+def WriteCvtPS2PHYSt : SchedWrite; // // Float -> Half + store size conversion (YMM/ZMM).
// CRC32 instruction.
defm WriteCRC32 : X86SchedWritePair;
defm : AtomWriteResPair<WriteCvtF2I, [AtomPort01], [AtomPort01], 8, 9, [8], [9]>; // Float -> Integer.
defm : AtomWriteResPair<WriteCvtI2F, [AtomPort01], [AtomPort01], 6, 7, [6], [7]>; // Integer -> Float.
defm : AtomWriteResPair<WriteCvtF2F, [AtomPort01], [AtomPort01], 6, 7, [6], [7]>; // Float -> Float size conversion.
-def : WriteRes<WriteCvtF2FSt, [AtomPort0]>; // NOTE: Doesn't exist on Atom.
+
+defm : AtomWriteResPair<WriteCvtPH2PS, [AtomPort0], [AtomPort0]>; // NOTE: Doesn't exist on Atom.
+defm : AtomWriteResPair<WriteCvtPH2PSY, [AtomPort0], [AtomPort0]>; // NOTE: Doesn't exist on Atom.
+def : WriteRes<WriteCvtPS2PH, [AtomPort0]>; // NOTE: Doesn't exist on Atom.
+def : WriteRes<WriteCvtPS2PHY, [AtomPort0]>; // NOTE: Doesn't exist on Atom.
+def : WriteRes<WriteCvtPS2PHSt, [AtomPort0]>; // NOTE: Doesn't exist on Atom.
+def : WriteRes<WriteCvtPS2PHYSt, [AtomPort0]>; // NOTE: Doesn't exist on Atom.
////////////////////////////////////////////////////////////////////////////////
// Vector integer operations.
defm : JWriteResFpuPair<WriteCvtF2I, [JFPU1, JSTC], 3>; // Float -> Integer.
defm : JWriteResFpuPair<WriteCvtI2F, [JFPU1, JSTC], 3>; // Integer -> Float.
defm : JWriteResFpuPair<WriteCvtF2F, [JFPU1, JSTC], 3>; // Float -> Float size conversion.
-def : WriteRes<WriteCvtF2FSt, [JFPU1, JSTC, JSAGU]> { let Latency = 4; }
+
+defm : JWriteResFpuPair<WriteCvtPH2PS, [JFPU1, JSTC], 3, [1,1], 1>;
+defm : JWriteResYMMPair<WriteCvtPH2PSY, [JFPU1, JSTC], 3, [2,2], 2>;
+
+defm : X86WriteRes<WriteCvtPS2PH, [JFPU1, JSTC], 3, [1,1], 1>;
+defm : X86WriteRes<WriteCvtPS2PHY, [JFPU1, JSTC, JFPX], 6, [2,2,2], 3>;
+defm : X86WriteRes<WriteCvtPS2PHSt, [JFPU1, JSTC, JSAGU], 4, [1,1,1], 1>;
+defm : X86WriteRes<WriteCvtPS2PHYSt, [JFPU1, JSTC, JFPX, JSAGU], 7, [2,2,2,1], 3>;
def JWriteCVTF2F : SchedWriteRes<[JFPU1, JSTC]> {
let Latency = 7;
def : InstRW<[JWriteINSERTQ], (instrs INSERTQ, INSERTQI)>;
////////////////////////////////////////////////////////////////////////////////
-// F16C instructions.
-////////////////////////////////////////////////////////////////////////////////
-
-def JWriteCVTPS2PHY: SchedWriteRes<[JFPU1, JSTC, JFPX]> {
- let Latency = 6;
- let ResourceCycles = [2, 2, 2];
- let NumMicroOps = 3;
-}
-def : InstRW<[JWriteCVTPS2PHY], (instrs VCVTPS2PHYrr)>;
-
-def JWriteCVTPS2PHYSt: SchedWriteRes<[JFPU1, JSTC, JFPX, JSAGU]> {
- let Latency = 7;
- let ResourceCycles = [2, 2, 2, 1];
- let NumMicroOps = 3;
-}
-def : InstRW<[JWriteCVTPS2PHYSt], (instrs VCVTPS2PHYmr)>;
-
-def JWriteCVTPH2PSY: SchedWriteRes<[JFPU1, JSTC]> {
- let Latency = 3;
- let ResourceCycles = [2, 2];
- let NumMicroOps = 2;
-}
-def : InstRW<[JWriteCVTPH2PSY], (instrs VCVTPH2PSYrr)>;
-
-def JWriteCVTPH2PSYLd: SchedWriteRes<[JLAGU, JFPU1, JSTC]> {
- let Latency = 8;
- let ResourceCycles = [1, 2, 2];
- let NumMicroOps = 2;
-}
-def : InstRW<[JWriteCVTPH2PSYLd], (instrs VCVTPH2PSYrm)>;
-
-////////////////////////////////////////////////////////////////////////////////
// AVX instructions.
////////////////////////////////////////////////////////////////////////////////
defm : SLMWriteResPair<WriteFVarShuffle, [SLM_FPC_RSV0], 1>;
defm : SLMWriteResPair<WriteFVarShuffleY,[SLM_FPC_RSV0], 1>;
defm : SLMWriteResPair<WriteFBlend, [SLM_FPC_RSV0], 1>;
-def : WriteRes<WriteCvtF2FSt, [SLM_FPC_RSV01, SLM_MEC_RSV]>;
// Vector integer operations.
def : WriteRes<WriteVecLoad, [SLM_MEC_RSV]> { let Latency = 3; }
defm : SLMWriteResPair<WriteFMAX, [SLM_FPC_RSV0], 1>;
defm : SLMWriteResPair<WriteFMAY, [SLM_FPC_RSV0], 1>;
+defm : SLMWriteResPair<WriteCvtPH2PS, [SLM_FPC_RSV0], 1>;
+defm : SLMWriteResPair<WriteCvtPH2PSY, [SLM_FPC_RSV0], 1>;
+def : WriteRes<WriteCvtPS2PH, [SLM_FPC_RSV0]>;
+def : WriteRes<WriteCvtPS2PHY, [SLM_FPC_RSV0]>;
+def : WriteRes<WriteCvtPS2PHSt, [SLM_FPC_RSV0]>;
+def : WriteRes<WriteCvtPS2PHYSt, [SLM_FPC_RSV0]>;
+
} // SchedModel
defm : ZnWriteResFpuPair<WriteFSqrt64Y, [ZnFPU3], 40, [40], 1, 7, 1>;
defm : ZnWriteResFpuPair<WriteFSqrt64Z, [ZnFPU3], 40, [40], 1, 7, 1>;
defm : ZnWriteResFpuPair<WriteFSqrt80, [ZnFPU3], 20, [20]>;
-def : WriteRes<WriteCvtF2FSt, [ZnFPU3, ZnAGU]>;
// Vector integer operations which uses FPU units
defm : X86WriteRes<WriteVecLoad, [ZnAGU], 8, [1], 1>;
// r32,m32.
def : InstRW<[ZnWriteCVSTSI2SILd], (instregex "(V?)CVT(T?)SD2SI(64)?rm")>;
-
// VCVTPS2PH.
// x,v,i.
-def : InstRW<[WriteMicrocoded], (instregex "VCVTPS2PH(Y?)rr")>;
+def : SchedAlias<WriteCvtPS2PH, ZnWriteMicrocoded>;
+def : SchedAlias<WriteCvtPS2PHY, ZnWriteMicrocoded>;
// m,v,i.
-def : InstRW<[WriteMicrocoded], (instregex "VCVTPS2PH(Y?)mr")>;
+def : SchedAlias<WriteCvtPS2PHSt, ZnWriteMicrocoded>;
+def : SchedAlias<WriteCvtPS2PHYSt, ZnWriteMicrocoded>;
// VCVTPH2PS.
// v,x.
-def : InstRW<[WriteMicrocoded], (instregex "VCVTPH2PS(Y?)rr")>;
+def : SchedAlias<WriteCvtPH2PS, ZnWriteMicrocoded>;
+def : SchedAlias<WriteCvtPH2PSY, ZnWriteMicrocoded>;
// v,m.
-def : InstRW<[WriteMicrocoded], (instregex "VCVTPH2PS(Y?)rm")>;
+def : SchedAlias<WriteCvtPH2PSLd, ZnWriteMicrocoded>;
+def : SchedAlias<WriteCvtPH2PSYLd, ZnWriteMicrocoded>;
//-- SSE4A instructions --//
// EXTRQ
; BROADWELL-LABEL: test_vcvtps2ph_128:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vcvtps2ph $0, %xmm0, %xmm0 # sched: [4:1.00]
-; BROADWELL-NEXT: vcvtps2ph $0, %xmm1, (%rdi) # sched: [4:1.00]
+; BROADWELL-NEXT: vcvtps2ph $0, %xmm1, (%rdi) # sched: [5:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
; SKYLAKE-LABEL: test_vcvtps2ph_128:
; BROADWELL-LABEL: test_vcvtps2ph_256:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vcvtps2ph $0, %ymm0, %xmm0 # sched: [6:1.00]
-; BROADWELL-NEXT: vcvtps2ph $0, %ymm1, (%rdi) # sched: [4:1.00]
+; BROADWELL-NEXT: vcvtps2ph $0, %ymm1, (%rdi) # sched: [7:1.00]
; BROADWELL-NEXT: vzeroupper # sched: [4:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
# CHECK-NEXT: 2 2 1.00 vcvtph2ps %xmm0, %ymm2
# CHECK-NEXT: 2 6 1.00 * vcvtph2ps (%rax), %ymm2
# CHECK-NEXT: 2 4 1.00 vcvtps2ph $0, %xmm0, %xmm2
-# CHECK-NEXT: 3 4 1.00 * vcvtps2ph $0, %xmm0, (%rax)
+# CHECK-NEXT: 3 5 1.00 * vcvtps2ph $0, %xmm0, (%rax)
# CHECK-NEXT: 2 6 1.00 vcvtps2ph $0, %ymm0, %xmm2
-# CHECK-NEXT: 3 4 1.00 * vcvtps2ph $0, %ymm0, (%rax)
+# CHECK-NEXT: 3 7 1.00 * vcvtps2ph $0, %ymm0, (%rax)
# CHECK: Resources:
# CHECK-NEXT: [0] - BWDivider
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13]
-# CHECK-NEXT: - - - 2.00 2.00 - 12.00 2.00 - 2.00 12.00 - - -
+# CHECK-NEXT: - - - 2.00 2.00 - 12.00 3.00 - 2.00 12.00 - - -
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] Instructions:
# CHECK-NEXT: - - - - - - 1.00 - - - 1.00 - - - vcvtph2ps %xmm0, %xmm2
# CHECK-NEXT: - - - - - - 1.00 1.00 - - 1.00 - - - vcvtph2ps (%rax), %xmm2
# CHECK-NEXT: - - - - - - 2.00 - - - 2.00 - - - vcvtph2ps %xmm0, %ymm2
-# CHECK-NEXT: - - - - - - 2.00 1.00 - - 2.00 - - - vcvtph2ps (%rax), %ymm2
+# CHECK-NEXT: - - - - - - 2.00 2.00 - - 2.00 - - - vcvtph2ps (%rax), %ymm2
# CHECK-NEXT: - - - - - - 1.00 - - - 1.00 - - - vcvtps2ph $0, %xmm0, %xmm2
# CHECK-NEXT: - - - - - - 1.00 - - 1.00 1.00 - - - vcvtps2ph $0, %xmm0, (%rax)
# CHECK-NEXT: - - - 1.00 1.00 - 2.00 - - - 2.00 - - - vcvtps2ph $0, %ymm0, %xmm2