Broadwell/Haswell were completely overriding the WriteCvtPD2I class defs - we can remove those overrides entirely by just choosing better class defs.
Also fixes the scheduler for a missing YMM folded case - confirmed with Agner + uops.info that the port usage is correct
defm : BWWriteResPair<WriteCvtPS2IY, [BWPort1], 3>;
defm : X86WriteResPairUnsupported<WriteCvtPS2IZ>;
defm : BWWriteResPair<WriteCvtSD2I, [BWPort1,BWPort0], 4, [1,1], 2, 5>;
-defm : BWWriteResPair<WriteCvtPD2I, [BWPort1], 3>;
-defm : BWWriteResPair<WriteCvtPD2IY, [BWPort1], 3>;
+defm : BWWriteResPair<WriteCvtPD2I, [BWPort1,BWPort5], 4, [1,1], 2, 5>;
+defm : BWWriteResPair<WriteCvtPD2IY, [BWPort1,BWPort5], 6, [1,1], 2, 6>;
defm : X86WriteResPairUnsupported<WriteCvtPD2IZ>;
defm : BWWriteResPair<WriteCvtI2SS, [BWPort1], 4>;
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[BWWriteResGroup42], (instregex "MMX_CVT(T?)PD2PIrr",
- "MMX_CVT(T?)PS2PIrr",
+def: InstRW<[BWWriteResGroup42], (instregex "MMX_CVT(T?)PS2PIrr",
"(V?)CVTSI642SDrr",
"(V?)CVTSI2SDrr",
- "(V?)CVTSI2SSrr",
- "(V?)CVT(T?)PD2DQrr")>;
+ "(V?)CVTSI2SSrr")>;
def BWWriteResGroup43 : SchedWriteRes<[BWPort0,BWPort4,BWPort237]> {
let Latency = 4;
VPSLLVQrm,
VPSRLVQrm)>;
-def BWWriteResGroup60 : SchedWriteRes<[BWPort1,BWPort5]> {
- let Latency = 6;
- let NumMicroOps = 2;
- let ResourceCycles = [1,1];
-}
-def: InstRW<[BWWriteResGroup60], (instrs VCVTPD2DQYrr,
- VCVTTPD2DQYrr)>;
-
def BWWriteResGroup62 : SchedWriteRes<[BWPort6,BWPort23]> {
let Latency = 6;
let NumMicroOps = 2;
def: InstRW<[BWWriteResGroup101], (instrs VCVTPS2DQYrm,
VCVTTPS2DQYrm)>;
-def BWWriteResGroup107 : SchedWriteRes<[BWPort1,BWPort5,BWPort23]> {
- let Latency = 9;
- let NumMicroOps = 3;
- let ResourceCycles = [1,1,1];
-}
-def: InstRW<[BWWriteResGroup107], (instrs CVTPD2DQrm, VCVTPD2DQrm,
- CVTTPD2DQrm, VCVTTPD2DQrm)>;
-def: InstRW<[BWWriteResGroup107], (instregex "MMX_CVT(T?)PD2PIrm")>;
-
def BWWriteResGroup108 : SchedWriteRes<[BWPort5,BWPort23,BWPort015]> {
let Latency = 9;
let NumMicroOps = 3;
// Conversion between integer and float.
defm : HWWriteResPair<WriteCvtSD2I, [HWPort1,HWPort0], 4, [1,1], 2, 5>;
-defm : HWWriteResPair<WriteCvtPD2I, [HWPort1], 3>;
-defm : HWWriteResPair<WriteCvtPD2IY, [HWPort1], 3>;
-defm : HWWriteResPair<WriteCvtPD2IZ, [HWPort1], 3>; // Unsupported = 1
+defm : HWWriteResPair<WriteCvtPD2I, [HWPort1,HWPort5], 4, [1,1], 2, 6>;
+defm : HWWriteResPair<WriteCvtPD2IY, [HWPort1,HWPort5], 6, [1,1], 2, 6>;
+defm : HWWriteResPair<WriteCvtPD2IZ, [HWPort1,HWPort5], 6, [1,1], 2, 6>; // Unsupported = 1
defm : HWWriteResPair<WriteCvtSS2I, [HWPort1,HWPort0], 4, [1,1], 2, 5>;
defm : HWWriteResPair<WriteCvtPS2I, [HWPort1], 3, [1], 1, 6>;
defm : HWWriteResPair<WriteCvtPS2IY, [HWPort1], 3, [1], 1, 7>;
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[HWWriteResGroup73], (instrs MMX_CVTPD2PIrr,
- MMX_CVTPS2PIrr,
- MMX_CVTTPD2PIrr,
+def: InstRW<[HWWriteResGroup73], (instrs MMX_CVTPS2PIrr,
MMX_CVTTPS2PIrr)>;
def: InstRW<[HWWriteResGroup73], (instregex "(V?)CVTSI(64)?2SDrr",
- "(V?)CVTSI2SSrr",
- "(V?)CVT(T?)PD2DQrr")>;
+ "(V?)CVTSI2SSrr")>;
def HWWriteResGroup75 : SchedWriteRes<[HWPort1,HWPort23]> {
let Latency = 11;
}
def: InstRW<[HWWriteResGroup75], (instregex "FICOM(P?)(16|32)m")>;
-def HWWriteResGroup78 : SchedWriteRes<[HWPort1,HWPort5,HWPort23]> {
- let Latency = 10;
- let NumMicroOps = 3;
- let ResourceCycles = [1,1,1];
-}
-def: InstRW<[HWWriteResGroup78], (instrs CVTPD2DQrm, VCVTPD2DQrm,
- CVTTPD2DQrm, VCVTTPD2DQrm,
- MMX_CVTPD2PIrm,
- MMX_CVTTPD2PIrm)>;
-
def HWWriteResGroup78_1 : SchedWriteRes<[HWPort1,HWPort5,HWPort23]> {
let Latency = 9;
let NumMicroOps = 3;
}
def: InstRW<[HWWriteResGroup100], (instrs XSETBV)>;
-def HWWriteResGroup102 : SchedWriteRes<[HWPort1,HWPort5]> {
- let Latency = 6;
- let NumMicroOps = 2;
- let ResourceCycles = [1,1];
-}
-def: InstRW<[HWWriteResGroup102], (instrs VCVTPD2DQYrr,
- VCVTTPD2DQYrr)>;
-
def HWWriteResGroup103 : SchedWriteRes<[HWPort1,HWPort23]> {
let Latency = 13;
let NumMicroOps = 3;
# CHECK-NEXT: 2 4 1.00 vcvtpd2dq %xmm0, %xmm2
# CHECK-NEXT: 3 9 1.00 * vcvtpd2dqx (%rax), %xmm2
# CHECK-NEXT: 2 6 1.00 vcvtpd2dq %ymm0, %xmm2
-# CHECK-NEXT: 2 8 1.00 * vcvtpd2dqy (%rax), %xmm2
+# CHECK-NEXT: 3 12 1.00 * vcvtpd2dqy (%rax), %xmm2
# CHECK-NEXT: 2 4 1.00 vcvtpd2ps %xmm0, %xmm2
# CHECK-NEXT: 3 9 1.00 * vcvtpd2psx (%rax), %xmm2
# CHECK-NEXT: 2 6 1.00 vcvtpd2ps %ymm0, %xmm2
# CHECK-NEXT: 2 4 1.00 vcvttpd2dq %xmm0, %xmm2
# CHECK-NEXT: 3 9 1.00 * vcvttpd2dqx (%rax), %xmm2
# CHECK-NEXT: 2 6 1.00 vcvttpd2dq %ymm0, %xmm2
-# CHECK-NEXT: 2 8 1.00 * vcvttpd2dqy (%rax), %xmm2
+# CHECK-NEXT: 3 12 1.00 * vcvttpd2dqy (%rax), %xmm2
# CHECK-NEXT: 1 3 1.00 vcvttps2dq %xmm0, %xmm2
# CHECK-NEXT: 2 8 1.00 * vcvttps2dq (%rax), %xmm2
# CHECK-NEXT: 1 3 1.00 vcvttps2dq %ymm0, %ymm2
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9]
-# CHECK-NEXT: - 257.00 215.25 235.25 176.17 176.17 38.00 430.25 2.25 12.67
+# CHECK-NEXT: - 257.00 215.25 235.25 176.17 176.17 38.00 432.25 2.25 12.67
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions:
# CHECK-NEXT: - - - 1.00 - - - 1.00 - - vcvtpd2dq %xmm0, %xmm2
# CHECK-NEXT: - - - 1.00 0.50 0.50 - 1.00 - - vcvtpd2dqx (%rax), %xmm2
# CHECK-NEXT: - - - 1.00 - - - 1.00 - - vcvtpd2dq %ymm0, %xmm2
-# CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - vcvtpd2dqy (%rax), %xmm2
+# CHECK-NEXT: - - - 1.00 0.50 0.50 - 1.00 - - vcvtpd2dqy (%rax), %xmm2
# CHECK-NEXT: - - - 1.00 - - - 1.00 - - vcvtpd2ps %xmm0, %xmm2
# CHECK-NEXT: - - - 1.00 0.50 0.50 - 1.00 - - vcvtpd2psx (%rax), %xmm2
# CHECK-NEXT: - - - 1.00 - - - 1.00 - - vcvtpd2ps %ymm0, %xmm2
# CHECK-NEXT: - - - 1.00 - - - 1.00 - - vcvttpd2dq %xmm0, %xmm2
# CHECK-NEXT: - - - 1.00 0.50 0.50 - 1.00 - - vcvttpd2dqx (%rax), %xmm2
# CHECK-NEXT: - - - 1.00 - - - 1.00 - - vcvttpd2dq %ymm0, %xmm2
-# CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - vcvttpd2dqy (%rax), %xmm2
+# CHECK-NEXT: - - - 1.00 0.50 0.50 - 1.00 - - vcvttpd2dqy (%rax), %xmm2
# CHECK-NEXT: - - - 1.00 - - - - - - vcvttps2dq %xmm0, %xmm2
# CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - vcvttps2dq (%rax), %xmm2
# CHECK-NEXT: - - - 1.00 - - - - - - vcvttps2dq %ymm0, %ymm2
# CHECK-NEXT: 2 4 1.00 vcvtpd2dq %xmm0, %xmm2
# CHECK-NEXT: 3 10 1.00 * vcvtpd2dqx (%rax), %xmm2
# CHECK-NEXT: 2 6 1.00 vcvtpd2dq %ymm0, %xmm2
-# CHECK-NEXT: 2 8 1.00 * vcvtpd2dqy (%rax), %xmm2
+# CHECK-NEXT: 3 12 1.00 * vcvtpd2dqy (%rax), %xmm2
# CHECK-NEXT: 2 4 1.00 vcvtpd2ps %xmm0, %xmm2
# CHECK-NEXT: 3 10 1.00 * vcvtpd2psx (%rax), %xmm2
# CHECK-NEXT: 2 6 1.00 vcvtpd2ps %ymm0, %xmm2
# CHECK-NEXT: 2 4 1.00 vcvttpd2dq %xmm0, %xmm2
# CHECK-NEXT: 3 10 1.00 * vcvttpd2dqx (%rax), %xmm2
# CHECK-NEXT: 2 6 1.00 vcvttpd2dq %ymm0, %xmm2
-# CHECK-NEXT: 2 8 1.00 * vcvttpd2dqy (%rax), %xmm2
+# CHECK-NEXT: 3 12 1.00 * vcvttpd2dqy (%rax), %xmm2
# CHECK-NEXT: 1 3 1.00 vcvttps2dq %xmm0, %xmm2
# CHECK-NEXT: 2 9 1.00 * vcvttps2dq (%rax), %xmm2
# CHECK-NEXT: 1 3 1.00 vcvttps2dq %ymm0, %ymm2
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9]
-# CHECK-NEXT: - 336.00 214.58 236.58 176.17 176.17 38.00 433.58 2.25 12.67
+# CHECK-NEXT: - 336.00 214.58 236.58 176.17 176.17 38.00 435.58 2.25 12.67
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions:
# CHECK-NEXT: - - - 1.00 - - - 1.00 - - vcvtpd2dq %xmm0, %xmm2
# CHECK-NEXT: - - - 1.00 0.50 0.50 - 1.00 - - vcvtpd2dqx (%rax), %xmm2
# CHECK-NEXT: - - - 1.00 - - - 1.00 - - vcvtpd2dq %ymm0, %xmm2
-# CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - vcvtpd2dqy (%rax), %xmm2
+# CHECK-NEXT: - - - 1.00 0.50 0.50 - 1.00 - - vcvtpd2dqy (%rax), %xmm2
# CHECK-NEXT: - - - 1.00 - - - 1.00 - - vcvtpd2ps %xmm0, %xmm2
# CHECK-NEXT: - - - 1.00 0.50 0.50 - 1.00 - - vcvtpd2psx (%rax), %xmm2
# CHECK-NEXT: - - - 1.00 - - - 1.00 - - vcvtpd2ps %ymm0, %xmm2
# CHECK-NEXT: - - - 1.00 - - - 1.00 - - vcvttpd2dq %xmm0, %xmm2
# CHECK-NEXT: - - - 1.00 0.50 0.50 - 1.00 - - vcvttpd2dqx (%rax), %xmm2
# CHECK-NEXT: - - - 1.00 - - - 1.00 - - vcvttpd2dq %ymm0, %xmm2
-# CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - vcvttpd2dqy (%rax), %xmm2
+# CHECK-NEXT: - - - 1.00 0.50 0.50 - 1.00 - - vcvttpd2dqy (%rax), %xmm2
# CHECK-NEXT: - - - 1.00 - - - - - - vcvttps2dq %xmm0, %xmm2
# CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - vcvttps2dq (%rax), %xmm2
# CHECK-NEXT: - - - 1.00 - - - - - - vcvttps2dq %ymm0, %ymm2