None of Haswell/Broadwell/Skylake/Icelake treat CVTTSS2SI64rm differently from CVTSS2SI64rm (or the AVX variants)
Confirmed with Agner, uops.info and Intel AoM
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[BWWriteResGroup39], (instregex "(V?)CVT(T?)SD2SI64rr",
- "(V?)CVT(T?)SD2SIrr",
- "(V?)CVT(T?)SS2SI64rr",
- "(V?)CVT(T?)SS2SIrr")>;
+def: InstRW<[BWWriteResGroup39], (instregex "(V?)CVT(T?)SD2SI(64)?rr",
+ "(V?)CVT(T?)SS2SI(64)?rr")>;
def BWWriteResGroup41 : SchedWriteRes<[BWPort0,BWPort0156]> {
let Latency = 4;
let NumMicroOps = 3;
let ResourceCycles = [1,1,1];
}
-def: InstRW<[BWWriteResGroup105], (instregex "(V?)CVTSS2SI(64)?rm",
- "(V?)CVT(T?)SD2SI64rm",
- "(V?)CVT(T?)SD2SIrm",
- "VCVTTSS2SI64rm",
- "(V?)CVTTSS2SIrm")>;
+def: InstRW<[BWWriteResGroup105], (instregex "(V?)CVT(T?)SD2SI(64)?rm",
+ "(V?)CVT(T?)SS2SI(64)?rm")>;
def BWWriteResGroup107 : SchedWriteRes<[BWPort1,BWPort5,BWPort23]> {
let Latency = 9;
}
def: InstRW<[BWWriteResGroup117], (instregex "FICOM(P?)(16|32)m")>;
-def BWWriteResGroup120 : SchedWriteRes<[BWPort0,BWPort1,BWPort5,BWPort23]> {
- let Latency = 10;
- let NumMicroOps = 4;
- let ResourceCycles = [1,1,1,1];
-}
-def: InstRW<[BWWriteResGroup120], (instregex "CVTTSS2SI64rm")>;
-
def BWWriteResGroup122_1 : SchedWriteRes<[BWPort0,BWFPDivider]> {
let Latency = 11;
let NumMicroOps = 1;
let NumMicroOps = 3;
let ResourceCycles = [1,1,1];
}
-def: InstRW<[HWWriteResGroup76], (instregex "(V?)CVTSD2SI(64)?rm",
- "(V?)CVTSS2SI(64)?rm",
- "(V?)CVTTSD2SI(64)?rm",
- "VCVTTSS2SI64rm",
- "(V?)CVTTSS2SIrm")>;
+def: InstRW<[HWWriteResGroup76], (instregex "(V?)CVT(T?)SD2SI(64)?rm",
+ "(V?)CVT(T?)SS2SI(64)?rm")>;
def HWWriteResGroup78 : SchedWriteRes<[HWPort1,HWPort5,HWPort23]> {
let Latency = 10;
}
def: InstRW<[HWWriteResGroup94], (instregex "STR(16|32|64)r")>;
-def HWWriteResGroup97 : SchedWriteRes<[HWPort0,HWPort1,HWPort5,HWPort23]> {
- let Latency = 10;
- let NumMicroOps = 4;
- let ResourceCycles = [1,1,1,1];
-}
-def: InstRW<[HWWriteResGroup97], (instregex "CVTTSS2SI64rm")>;
-
def HWWriteResGroup99 : SchedWriteRes<[HWPort6,HWPort0156]> {
let Latency = 5;
let NumMicroOps = 5;
let NumMicroOps = 3;
let ResourceCycles = [1,1,1];
}
-def: InstRW<[ICXWriteResGroup100], (instregex "VCVTSS2USI64Zrr",
- "(V?)CVTSS2SI64(Z?)rr",
- "(V?)CVTTSS2SI64(Z?)rr",
- "VCVTTSS2USI64Zrr")>;
+def: InstRW<[ICXWriteResGroup100], (instregex "(V?)CVT(T?)SS2SI64(Z?)rr",
+ "VCVT(T?)SS2USI64Zrr")>;
def ICXWriteResGroup101 : SchedWriteRes<[ICXPort0,ICXPort23,ICXPort05]> {
let Latency = 7;
def: InstRW<[ICXWriteResGroup177], (instregex "VCVT(T?)PS2QQZrm(b?)",
"VCVT(T?)PS2UQQZrm(b?)")>;
-def ICXWriteResGroup179 : SchedWriteRes<[ICXPort0,ICXPort5,ICXPort23,ICXPort015]> {
- let Latency = 12;
- let NumMicroOps = 4;
- let ResourceCycles = [1,1,1,1];
-}
-def: InstRW<[ICXWriteResGroup179], (instregex "CVTTSS2SI64rm")>;
-
def ICXWriteResGroup180 : SchedWriteRes<[ICXPort5,ICXPort23]> {
let Latency = 13;
let NumMicroOps = 3;
let NumMicroOps = 3;
let ResourceCycles = [1,1,1];
}
-def: InstRW<[SKLWriteResGroup151], (instregex "(V?)CVTSS2SI64rm",
- "(V?)CVT(T?)SD2SI(64)?rm",
- "VCVTTSS2SI64rm",
- "(V?)CVT(T?)SS2SIrm")>;
+def: InstRW<[SKLWriteResGroup151], (instregex "(V?)CVT(T?)SD2SI(64)?rm",
+ "(V?)CVT(T?)SS2SI(64)?rm")>;
def SKLWriteResGroup152 : SchedWriteRes<[SKLPort5,SKLPort23,SKLPort01]> {
let Latency = 11;
}
def: InstRW<[SKLWriteResGroup156], (instrs LOOPE, LOOPNE)>;
-def SKLWriteResGroup160 : SchedWriteRes<[SKLPort0,SKLPort5,SKLPort23,SKLPort01]> {
- let Latency = 12;
- let NumMicroOps = 4;
- let ResourceCycles = [1,1,1,1];
-}
-def: InstRW<[SKLWriteResGroup160], (instregex "CVTTSS2SI64rm")>;
-
def SKLWriteResGroup162 : SchedWriteRes<[SKLPort5,SKLPort23]> {
let Latency = 13;
let NumMicroOps = 3;
let NumMicroOps = 3;
let ResourceCycles = [1,1,1];
}
-def: InstRW<[SKXWriteResGroup100], (instregex "VCVTSS2USI64Zrr",
- "(V?)CVTSS2SI64(Z?)rr",
- "(V?)CVTTSS2SI64(Z?)rr",
- "VCVTTSS2USI64Zrr")>;
+def: InstRW<[SKXWriteResGroup100], (instregex "(V?)CVT(T?)SS2SI64(Z?)rr",
+ "VCVT(T?)SS2USI64Zrr")>;
def SKXWriteResGroup101 : SchedWriteRes<[SKXPort0,SKXPort23,SKXPort05]> {
let Latency = 7;
def: InstRW<[SKXWriteResGroup177], (instregex "VCVT(T?)PS2QQZrm(b?)",
"VCVT(T?)PS2UQQZrm(b?)")>;
-def SKXWriteResGroup179 : SchedWriteRes<[SKXPort0,SKXPort5,SKXPort23,SKXPort015]> {
- let Latency = 12;
- let NumMicroOps = 4;
- let ResourceCycles = [1,1,1,1];
-}
-def: InstRW<[SKXWriteResGroup179], (instregex "CVTTSS2SI64rm")>;
-
def SKXWriteResGroup180 : SchedWriteRes<[SKXPort5,SKXPort23]> {
let Latency = 13;
let NumMicroOps = 3;
# CHECK-NEXT: 2 4 1.00 cvttss2si %xmm0, %ecx
# CHECK-NEXT: 2 4 1.00 cvttss2si %xmm0, %rcx
# CHECK-NEXT: 3 9 1.00 * cvttss2si (%rax), %ecx
-# CHECK-NEXT: 4 10 1.00 * cvttss2si (%rax), %rcx
+# CHECK-NEXT: 3 9 1.00 * cvttss2si (%rax), %rcx
# CHECK-NEXT: 1 11 5.00 divps %xmm0, %xmm2
# CHECK-NEXT: 2 16 5.00 * divps (%rax), %xmm2
# CHECK-NEXT: 1 11 3.00 divss %xmm0, %xmm2
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9]
-# CHECK-NEXT: - 43.00 34.50 50.50 32.00 32.00 8.00 41.50 0.50 3.00
+# CHECK-NEXT: - 43.00 34.50 50.50 32.00 32.00 8.00 40.50 0.50 3.00
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions:
# CHECK-NEXT: - - 1.00 1.00 - - - - - - cvttss2si %xmm0, %ecx
# CHECK-NEXT: - - 1.00 1.00 - - - - - - cvttss2si %xmm0, %rcx
# CHECK-NEXT: - - 1.00 1.00 0.50 0.50 - - - - cvttss2si (%rax), %ecx
-# CHECK-NEXT: - - 1.00 1.00 0.50 0.50 - 1.00 - - cvttss2si (%rax), %rcx
+# CHECK-NEXT: - - 1.00 1.00 0.50 0.50 - - - - cvttss2si (%rax), %rcx
# CHECK-NEXT: - 5.00 1.00 - - - - - - - divps %xmm0, %xmm2
# CHECK-NEXT: - 5.00 1.00 - 0.50 0.50 - - - - divps (%rax), %xmm2
# CHECK-NEXT: - 3.00 1.00 - - - - - - - divss %xmm0, %xmm2
# CHECK-NEXT: 2 4 1.00 cvttss2si %xmm0, %ecx
# CHECK-NEXT: 2 4 1.00 cvttss2si %xmm0, %rcx
# CHECK-NEXT: 3 9 1.00 * cvttss2si (%rax), %ecx
-# CHECK-NEXT: 4 10 1.00 * cvttss2si (%rax), %rcx
+# CHECK-NEXT: 3 9 1.00 * cvttss2si (%rax), %rcx
# CHECK-NEXT: 1 13 7.00 divps %xmm0, %xmm2
# CHECK-NEXT: 2 19 7.00 * divps (%rax), %xmm2
# CHECK-NEXT: 1 13 7.00 divss %xmm0, %xmm2
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9]
-# CHECK-NEXT: - 56.00 34.50 50.50 32.00 32.00 8.00 41.50 0.50 3.00
+# CHECK-NEXT: - 56.00 34.50 50.50 32.00 32.00 8.00 40.50 0.50 3.00
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions:
# CHECK-NEXT: - - 1.00 1.00 - - - - - - cvttss2si %xmm0, %ecx
# CHECK-NEXT: - - 1.00 1.00 - - - - - - cvttss2si %xmm0, %rcx
# CHECK-NEXT: - - 1.00 1.00 0.50 0.50 - - - - cvttss2si (%rax), %ecx
-# CHECK-NEXT: - - 1.00 1.00 0.50 0.50 - 1.00 - - cvttss2si (%rax), %rcx
+# CHECK-NEXT: - - 1.00 1.00 0.50 0.50 - - - - cvttss2si (%rax), %rcx
# CHECK-NEXT: - 7.00 1.00 - - - - - - - divps %xmm0, %xmm2
# CHECK-NEXT: - 7.00 1.00 - 0.50 0.50 - - - - divps (%rax), %xmm2
# CHECK-NEXT: - 7.00 1.00 - - - - - - - divss %xmm0, %xmm2
# CHECK-NEXT: 2 6 1.00 cvttss2si %xmm0, %ecx
# CHECK-NEXT: 3 7 1.00 cvttss2si %xmm0, %rcx
# CHECK-NEXT: 3 11 1.00 * cvttss2si (%rax), %ecx
-# CHECK-NEXT: 4 12 1.00 * cvttss2si (%rax), %rcx
+# CHECK-NEXT: 3 11 1.00 * cvttss2si (%rax), %rcx
# CHECK-NEXT: 1 11 3.00 divps %xmm0, %xmm2
# CHECK-NEXT: 2 17 3.00 * divps (%rax), %xmm2
# CHECK-NEXT: 1 11 3.00 divss %xmm0, %xmm2
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11]
-# CHECK-NEXT: - 24.00 66.17 27.17 32.00 32.00 8.00 36.17 0.50 3.00 - -
+# CHECK-NEXT: - 24.00 65.83 27.83 32.00 32.00 8.00 34.83 0.50 3.00 - -
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] Instructions:
# CHECK-NEXT: - - 1.00 1.00 - - - - - - - - cvttss2si %xmm0, %ecx
# CHECK-NEXT: - - 1.33 0.33 - - - 1.33 - - - - cvttss2si %xmm0, %rcx
# CHECK-NEXT: - - 1.00 1.00 0.50 0.50 - - - - - - cvttss2si (%rax), %ecx
-# CHECK-NEXT: - - 1.33 0.33 0.50 0.50 - 1.33 - - - - cvttss2si (%rax), %rcx
+# CHECK-NEXT: - - 1.00 1.00 0.50 0.50 - - - - - - cvttss2si (%rax), %rcx
# CHECK-NEXT: - 3.00 1.00 - - - - - - - - - divps %xmm0, %xmm2
# CHECK-NEXT: - 3.00 1.00 - 0.50 0.50 - - - - - - divps (%rax), %xmm2
# CHECK-NEXT: - 3.00 1.00 - - - - - - - - - divss %xmm0, %xmm2
# CHECK-NEXT: 2 6 1.00 cvttss2si %xmm0, %ecx
# CHECK-NEXT: 3 7 1.00 cvttss2si %xmm0, %rcx
# CHECK-NEXT: 3 11 1.00 * cvttss2si (%rax), %ecx
-# CHECK-NEXT: 4 12 1.00 * cvttss2si (%rax), %rcx
+# CHECK-NEXT: 3 11 1.00 * cvttss2si (%rax), %rcx
# CHECK-NEXT: 1 11 3.00 divps %xmm0, %xmm2
# CHECK-NEXT: 2 17 3.00 * divps (%rax), %xmm2
# CHECK-NEXT: 1 11 3.00 divss %xmm0, %xmm2
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9]
-# CHECK-NEXT: - 24.00 71.33 24.33 32.00 32.00 8.00 33.83 0.50 3.00
+# CHECK-NEXT: - 24.00 71.33 24.33 32.00 32.00 8.00 32.83 0.50 3.00
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions:
# CHECK-NEXT: - - 1.50 0.50 - - - - - - cvttss2si %xmm0, %ecx
# CHECK-NEXT: - - 1.50 0.50 - - - 1.00 - - cvttss2si %xmm0, %rcx
# CHECK-NEXT: - - 1.50 0.50 0.50 0.50 - - - - cvttss2si (%rax), %ecx
-# CHECK-NEXT: - - 1.50 0.50 0.50 0.50 - 1.00 - - cvttss2si (%rax), %rcx
+# CHECK-NEXT: - - 1.50 0.50 0.50 0.50 - - - - cvttss2si (%rax), %rcx
# CHECK-NEXT: - 3.00 1.00 - - - - - - - divps %xmm0, %xmm2
# CHECK-NEXT: - 3.00 1.00 - 0.50 0.50 - - - - divps (%rax), %xmm2
# CHECK-NEXT: - 3.00 1.00 - - - - - - - divss %xmm0, %xmm2
# CHECK-NEXT: 2 6 1.00 cvttss2si %xmm0, %ecx
# CHECK-NEXT: 3 7 1.00 cvttss2si %xmm0, %rcx
# CHECK-NEXT: 3 11 1.00 * cvttss2si (%rax), %ecx
-# CHECK-NEXT: 4 12 1.00 * cvttss2si (%rax), %rcx
+# CHECK-NEXT: 3 11 1.00 * cvttss2si (%rax), %rcx
# CHECK-NEXT: 1 11 3.00 divps %xmm0, %xmm2
# CHECK-NEXT: 2 17 3.00 * divps (%rax), %xmm2
# CHECK-NEXT: 1 11 3.00 divss %xmm0, %xmm2
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9]
-# CHECK-NEXT: - 24.00 65.83 25.83 32.00 32.00 8.00 37.83 0.50 3.00
+# CHECK-NEXT: - 24.00 65.50 26.50 32.00 32.00 8.00 36.50 0.50 3.00
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions:
# CHECK-NEXT: - - 1.00 1.00 - - - - - - cvttss2si %xmm0, %ecx
# CHECK-NEXT: - - 1.33 0.33 - - - 1.33 - - cvttss2si %xmm0, %rcx
# CHECK-NEXT: - - 1.00 1.00 0.50 0.50 - - - - cvttss2si (%rax), %ecx
-# CHECK-NEXT: - - 1.33 0.33 0.50 0.50 - 1.33 - - cvttss2si (%rax), %rcx
+# CHECK-NEXT: - - 1.00 1.00 0.50 0.50 - - - - cvttss2si (%rax), %rcx
# CHECK-NEXT: - 3.00 1.00 - - - - - - - divps %xmm0, %xmm2
# CHECK-NEXT: - 3.00 1.00 - 0.50 0.50 - - - - divps (%rax), %xmm2
# CHECK-NEXT: - 3.00 1.00 - - - - - - - divss %xmm0, %xmm2