The znver1/znver2 schedules for CVTPD2PS were incorrectly double pumping the xmm-load variant instead of the ymm variants (znver1 only)
Also, the xmm-load variant was incorrectly using FP03 instead of just FP3
Confirmed by the AMD SoG 17h tables, Agner + uops.info
Another step towards removing a lot of unnecessary overrides from all the x86 scheduler models - these should hopefully be convertible into regular WriteCvtPD2I classes soon.
}
def ZnWriteCVTPD2PSYr: SchedWriteRes<[ZnFPU3]> {
let Latency = 5;
+ let NumMicroOps = 2;
+ let ResourceCycles = [2];
}
// CVTPD2PS.
// z,z.
defm : X86WriteResUnsupported<WriteCvtPD2PSZ>;
-def ZnWriteCVTPD2PSLd: SchedWriteRes<[ZnAGU,ZnFPU03]> {
+def ZnWriteCVTPD2PSLd: SchedWriteRes<[ZnAGU,ZnFPU3]> {
let Latency = 11;
- let NumMicroOps = 2;
- let ResourceCycles = [1,2];
}
// x,m128.
def : SchedAlias<WriteCvtPD2PSLd, ZnWriteCVTPD2PSLd>;
// x,m256.
def ZnWriteCVTPD2PSYLd : SchedWriteRes<[ZnAGU, ZnFPU3]> {
let Latency = 11;
+ let NumMicroOps = 2;
+ let ResourceCycles = [1,2];
}
def : SchedAlias<WriteCvtPD2PSYLd, ZnWriteCVTPD2PSYLd>;
// z,m512
// z,z.
defm : X86WriteResUnsupported<WriteCvtPD2PSZ>;
-def Zn2WriteCVTPD2PSLd: SchedWriteRes<[Zn2AGU,Zn2FPU03]> {
+def Zn2WriteCVTPD2PSLd: SchedWriteRes<[Zn2AGU,Zn2FPU3]> {
let Latency = 10;
- let NumMicroOps = 2;
}
// x,m128.
def : SchedAlias<WriteCvtPD2PSLd, Zn2WriteCVTPD2PSLd>;
# CHECK-NEXT: 1 5 1.00 vcvtpd2dq %ymm0, %xmm2
# CHECK-NEXT: 2 12 1.00 * vcvtpd2dqy (%rax), %xmm2
# CHECK-NEXT: 1 4 1.00 vcvtpd2ps %xmm0, %xmm2
-# CHECK-NEXT: 2 11 1.00 * vcvtpd2psx (%rax), %xmm2
-# CHECK-NEXT: 1 5 1.00 vcvtpd2ps %ymm0, %xmm2
-# CHECK-NEXT: 1 11 1.00 * vcvtpd2psy (%rax), %xmm2
+# CHECK-NEXT: 1 11 1.00 * vcvtpd2psx (%rax), %xmm2
+# CHECK-NEXT: 2 5 2.00 vcvtpd2ps %ymm0, %xmm2
+# CHECK-NEXT: 2 11 2.00 * vcvtpd2psy (%rax), %xmm2
# CHECK-NEXT: 1 5 1.00 vcvtps2dq %xmm0, %xmm2
# CHECK-NEXT: 1 12 1.00 * vcvtps2dq (%rax), %xmm2
# CHECK-NEXT: 1 5 1.00 vcvtps2dq %ymm0, %ymm2
# CHECK-NEXT: 1 12 1.00 * vcvtsd2si (%rax), %ecx
# CHECK-NEXT: 1 12 1.00 * vcvtsd2si (%rax), %rcx
# CHECK-NEXT: 1 4 1.00 vcvtsd2ss %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: 2 11 1.00 * vcvtsd2ss (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 11 1.00 * vcvtsd2ss (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 5 1.00 vcvtsi2sd %ecx, %xmm0, %xmm2
# CHECK-NEXT: 1 5 1.00 vcvtsi2sd %rcx, %xmm0, %xmm2
# CHECK-NEXT: 1 12 1.00 * vcvtsi2sdl (%rax), %xmm0, %xmm2
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11]
-# CHECK-NEXT: 175.00 175.00 - - - - - 146.25 227.25 223.75 313.75 -
+# CHECK-NEXT: 175.00 175.00 - - - - - 144.25 227.25 223.75 315.75 -
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] Instructions:
# CHECK-NEXT: - - - - - - - - 0.50 0.50 1.00 - vcvtpd2dq %ymm0, %xmm2
# CHECK-NEXT: 0.50 0.50 - - - - - - 0.50 0.50 1.00 - vcvtpd2dqy (%rax), %xmm2
# CHECK-NEXT: - - - - - - - - - - 1.00 - vcvtpd2ps %xmm0, %xmm2
-# CHECK-NEXT: 0.50 0.50 - - - - - 1.00 - - 1.00 - vcvtpd2psx (%rax), %xmm2
-# CHECK-NEXT: - - - - - - - - - - 1.00 - vcvtpd2ps %ymm0, %xmm2
-# CHECK-NEXT: 0.50 0.50 - - - - - - - - 1.00 - vcvtpd2psy (%rax), %xmm2
+# CHECK-NEXT: 0.50 0.50 - - - - - - - - 1.00 - vcvtpd2psx (%rax), %xmm2
+# CHECK-NEXT: - - - - - - - - - - 2.00 - vcvtpd2ps %ymm0, %xmm2
+# CHECK-NEXT: 0.50 0.50 - - - - - - - - 2.00 - vcvtpd2psy (%rax), %xmm2
# CHECK-NEXT: - - - - - - - - - - 1.00 - vcvtps2dq %xmm0, %xmm2
# CHECK-NEXT: 0.50 0.50 - - - - - - - - 1.00 - vcvtps2dq (%rax), %xmm2
# CHECK-NEXT: - - - - - - - - - - 1.00 - vcvtps2dq %ymm0, %ymm2
# CHECK-NEXT: 0.50 0.50 - - - - - - - 1.00 1.00 - vcvtsd2si (%rax), %ecx
# CHECK-NEXT: 0.50 0.50 - - - - - - - 1.00 1.00 - vcvtsd2si (%rax), %rcx
# CHECK-NEXT: - - - - - - - - - - 1.00 - vcvtsd2ss %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: 0.50 0.50 - - - - - 1.00 - - 1.00 - vcvtsd2ss (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 0.50 0.50 - - - - - - - - 1.00 - vcvtsd2ss (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - - - - - - 0.33 0.33 - 1.33 - vcvtsi2sd %ecx, %xmm0, %xmm2
# CHECK-NEXT: - - - - - - - 0.33 0.33 - 1.33 - vcvtsi2sd %rcx, %xmm0, %xmm2
# CHECK-NEXT: 0.50 0.50 - - - - - - - - 1.00 - vcvtsi2sdl (%rax), %xmm0, %xmm2
# CHECK-NEXT: 1 4 1.00 cvtpd2pi %xmm0, %mm2
# CHECK-NEXT: 1 12 1.00 * cvtpd2pi (%rax), %mm2
# CHECK-NEXT: 1 4 1.00 cvtpd2ps %xmm0, %xmm2
-# CHECK-NEXT: 2 11 1.00 * cvtpd2ps (%rax), %xmm2
+# CHECK-NEXT: 1 11 1.00 * cvtpd2ps (%rax), %xmm2
# CHECK-NEXT: 1 3 1.00 cvtpi2pd %mm0, %xmm2
# CHECK-NEXT: 1 12 1.00 * cvtpi2pd (%rax), %xmm2
# CHECK-NEXT: 1 5 1.00 cvtps2dq %xmm0, %xmm2
# CHECK-NEXT: 1 12 1.00 * cvtsd2si (%rax), %ecx
# CHECK-NEXT: 1 12 1.00 * cvtsd2si (%rax), %rcx
# CHECK-NEXT: 1 4 1.00 cvtsd2ss %xmm0, %xmm2
-# CHECK-NEXT: 2 11 1.00 * cvtsd2ss (%rax), %xmm2
+# CHECK-NEXT: 1 11 1.00 * cvtsd2ss (%rax), %xmm2
# CHECK-NEXT: 1 5 1.00 cvtsi2sd %ecx, %xmm2
# CHECK-NEXT: 1 5 1.00 cvtsi2sd %rcx, %xmm2
# CHECK-NEXT: 1 12 1.00 * cvtsi2sdl (%rax), %xmm2
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11]
-# CHECK-NEXT: 66.50 66.50 - - - - - 52.92 59.42 76.75 121.92 -
+# CHECK-NEXT: 66.50 66.50 - - - - - 50.92 59.42 76.75 121.92 -
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] Instructions:
# CHECK-NEXT: - - - - - - - - - - 1.00 - cvtpd2pi %xmm0, %mm2
# CHECK-NEXT: 0.50 0.50 - - - - - - - - 1.00 - cvtpd2pi (%rax), %mm2
# CHECK-NEXT: - - - - - - - - - - 1.00 - cvtpd2ps %xmm0, %xmm2
-# CHECK-NEXT: 0.50 0.50 - - - - - 1.00 - - 1.00 - cvtpd2ps (%rax), %xmm2
+# CHECK-NEXT: 0.50 0.50 - - - - - - - - 1.00 - cvtpd2ps (%rax), %xmm2
# CHECK-NEXT: - - - - - - - - - - 1.00 - cvtpi2pd %mm0, %xmm2
# CHECK-NEXT: 0.50 0.50 - - - - - - - - 1.00 - cvtpi2pd (%rax), %xmm2
# CHECK-NEXT: - - - - - - - - - - 1.00 - cvtps2dq %xmm0, %xmm2
# CHECK-NEXT: 0.50 0.50 - - - - - - - 1.00 1.00 - cvtsd2si (%rax), %ecx
# CHECK-NEXT: 0.50 0.50 - - - - - - - 1.00 1.00 - cvtsd2si (%rax), %rcx
# CHECK-NEXT: - - - - - - - - - - 1.00 - cvtsd2ss %xmm0, %xmm2
-# CHECK-NEXT: 0.50 0.50 - - - - - 1.00 - - 1.00 - cvtsd2ss (%rax), %xmm2
+# CHECK-NEXT: 0.50 0.50 - - - - - - - - 1.00 - cvtsd2ss (%rax), %xmm2
# CHECK-NEXT: - - - - - - - 0.33 0.33 - 1.33 - cvtsi2sd %ecx, %xmm2
# CHECK-NEXT: - - - - - - - 0.33 0.33 - 1.33 - cvtsi2sd %rcx, %xmm2
# CHECK-NEXT: 0.50 0.50 - - - - - - - - 1.00 - cvtsi2sdl (%rax), %xmm2
# CHECK-NEXT: 1 3 1.00 vcvtpd2dq %ymm0, %xmm2
# CHECK-NEXT: 2 10 1.00 * vcvtpd2dqy (%rax), %xmm2
# CHECK-NEXT: 1 3 1.00 vcvtpd2ps %xmm0, %xmm2
-# CHECK-NEXT: 2 10 0.50 * vcvtpd2psx (%rax), %xmm2
+# CHECK-NEXT: 1 10 1.00 * vcvtpd2psx (%rax), %xmm2
# CHECK-NEXT: 1 3 1.00 vcvtpd2ps %ymm0, %xmm2
# CHECK-NEXT: 1 10 1.00 * vcvtpd2psy (%rax), %xmm2
# CHECK-NEXT: 1 3 1.00 vcvtps2dq %xmm0, %xmm2
# CHECK-NEXT: 1 11 1.00 * vcvtsd2si (%rax), %ecx
# CHECK-NEXT: 1 11 1.00 * vcvtsd2si (%rax), %rcx
# CHECK-NEXT: 1 3 1.00 vcvtsd2ss %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: 2 10 0.50 * vcvtsd2ss (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 10 1.00 * vcvtsd2ss (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 3 1.00 vcvtsi2sd %ecx, %xmm0, %xmm2
# CHECK-NEXT: 1 3 1.00 vcvtsi2sd %rcx, %xmm0, %xmm2
# CHECK-NEXT: 1 12 1.00 * vcvtsi2sdl (%rax), %xmm0, %xmm2
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12]
-# CHECK-NEXT: 117.00 117.00 117.00 0.25 0.25 0.25 0.25 - 127.58 192.58 196.75 304.08 -
+# CHECK-NEXT: 117.00 117.00 117.00 0.25 0.25 0.25 0.25 - 126.58 192.58 196.75 305.08 -
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions:
# CHECK-NEXT: - - - - - - - - - 0.50 0.50 1.00 - vcvtpd2dq %ymm0, %xmm2
# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - 0.50 0.50 1.00 - vcvtpd2dqy (%rax), %xmm2
# CHECK-NEXT: - - - - - - - - - - - 1.00 - vcvtpd2ps %xmm0, %xmm2
-# CHECK-NEXT: 0.33 0.33 0.33 - - - - - 0.50 - - 0.50 - vcvtpd2psx (%rax), %xmm2
+# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - 1.00 - vcvtpd2psx (%rax), %xmm2
# CHECK-NEXT: - - - - - - - - - - - 1.00 - vcvtpd2ps %ymm0, %xmm2
# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - 1.00 - vcvtpd2psy (%rax), %xmm2
# CHECK-NEXT: - - - - - - - - - 0.50 0.50 1.00 - vcvtps2dq %xmm0, %xmm2
# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - 1.00 1.00 - vcvtsd2si (%rax), %ecx
# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - 1.00 1.00 - vcvtsd2si (%rax), %rcx
# CHECK-NEXT: - - - - - - - - - - - 1.00 - vcvtsd2ss %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: 0.33 0.33 0.33 - - - - - 0.50 - - 0.50 - vcvtsd2ss (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - 1.00 - vcvtsd2ss (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - - - - - - - 0.33 0.33 - 1.33 - vcvtsi2sd %ecx, %xmm0, %xmm2
# CHECK-NEXT: - - - - - - - - 0.33 0.33 - 1.33 - vcvtsi2sd %rcx, %xmm0, %xmm2
# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - 1.00 - vcvtsi2sdl (%rax), %xmm0, %xmm2
# CHECK-NEXT: 1 4 1.00 cvtpd2pi %xmm0, %mm2
# CHECK-NEXT: 1 12 1.00 * cvtpd2pi (%rax), %mm2
# CHECK-NEXT: 1 3 1.00 cvtpd2ps %xmm0, %xmm2
-# CHECK-NEXT: 2 10 0.50 * cvtpd2ps (%rax), %xmm2
+# CHECK-NEXT: 1 10 1.00 * cvtpd2ps (%rax), %xmm2
# CHECK-NEXT: 1 3 1.00 cvtpi2pd %mm0, %xmm2
# CHECK-NEXT: 1 12 1.00 * cvtpi2pd (%rax), %xmm2
# CHECK-NEXT: 1 3 1.00 cvtps2dq %xmm0, %xmm2
# CHECK-NEXT: 1 11 1.00 * cvtsd2si (%rax), %ecx
# CHECK-NEXT: 1 11 1.00 * cvtsd2si (%rax), %rcx
# CHECK-NEXT: 1 3 1.00 cvtsd2ss %xmm0, %xmm2
-# CHECK-NEXT: 2 10 0.50 * cvtsd2ss (%rax), %xmm2
+# CHECK-NEXT: 1 10 1.00 * cvtsd2ss (%rax), %xmm2
# CHECK-NEXT: 1 3 1.00 cvtsi2sd %ecx, %xmm2
# CHECK-NEXT: 1 3 1.00 cvtsi2sd %rcx, %xmm2
# CHECK-NEXT: 1 12 1.00 * cvtsi2sdl (%rax), %xmm2
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12]
-# CHECK-NEXT: 44.33 44.33 44.33 - - - - - 51.92 60.92 78.25 132.92 -
+# CHECK-NEXT: 44.33 44.33 44.33 - - - - - 50.92 60.92 78.25 133.92 -
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions:
# CHECK-NEXT: - - - - - - - - - - - 1.00 - cvtpd2pi %xmm0, %mm2
# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - 1.00 - cvtpd2pi (%rax), %mm2
# CHECK-NEXT: - - - - - - - - - - - 1.00 - cvtpd2ps %xmm0, %xmm2
-# CHECK-NEXT: 0.33 0.33 0.33 - - - - - 0.50 - - 0.50 - cvtpd2ps (%rax), %xmm2
+# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - 1.00 - cvtpd2ps (%rax), %xmm2
# CHECK-NEXT: - - - - - - - - - - - 1.00 - cvtpi2pd %mm0, %xmm2
# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - 1.00 - cvtpi2pd (%rax), %xmm2
# CHECK-NEXT: - - - - - - - - - 0.50 0.50 1.00 - cvtps2dq %xmm0, %xmm2
# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - 1.00 1.00 - cvtsd2si (%rax), %ecx
# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - 1.00 1.00 - cvtsd2si (%rax), %rcx
# CHECK-NEXT: - - - - - - - - - - - 1.00 - cvtsd2ss %xmm0, %xmm2
-# CHECK-NEXT: 0.33 0.33 0.33 - - - - - 0.50 - - 0.50 - cvtsd2ss (%rax), %xmm2
+# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - 1.00 - cvtsd2ss (%rax), %xmm2
# CHECK-NEXT: - - - - - - - - 0.33 0.33 - 1.33 - cvtsi2sd %ecx, %xmm2
# CHECK-NEXT: - - - - - - - - 0.33 0.33 - 1.33 - cvtsi2sd %rcx, %xmm2
# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - 1.00 - cvtsi2sdl (%rax), %xmm2