The packed variants of the instructions had been modelled as the same as the scalar variants.
Reported during a run of llvm-exegesis on a cheap SLM box and matches what Agner / InstLatX64 report as well.
defm : X86WriteResPairUnsupported<WriteFMAY>;
defm : X86WriteResPairUnsupported<WriteFMAZ>;
defm : SLMWriteResPair<WriteFDiv, [SLM_FPC_RSV0, SLMFPDivider], 19, [1,17]>;
-defm : SLMWriteResPair<WriteFDivX, [SLM_FPC_RSV0, SLMFPDivider], 39, [1,39]>;
-defm : SLMWriteResPair<WriteFDivY, [SLM_FPC_RSV0, SLMFPDivider], 39, [1,39]>;
+defm : SLMWriteResPair<WriteFDivX, [SLM_FPC_RSV0, SLMFPDivider], 39, [1,39], 6, 1>;
+defm : X86WriteResPairUnsupported<WriteFDivY>;
defm : X86WriteResPairUnsupported<WriteFDivZ>;
defm : SLMWriteResPair<WriteFDiv64, [SLM_FPC_RSV0, SLMFPDivider], 34, [1,32]>;
-defm : SLMWriteResPair<WriteFDiv64X, [SLM_FPC_RSV0, SLMFPDivider], 69, [1,69]>;
-defm : SLMWriteResPair<WriteFDiv64Y, [SLM_FPC_RSV0, SLMFPDivider], 69, [1,69]>;
+defm : SLMWriteResPair<WriteFDiv64X, [SLM_FPC_RSV0, SLMFPDivider], 69, [1,69], 6, 1>;
+defm : X86WriteResPairUnsupported<WriteFDiv64Y>;
defm : X86WriteResPairUnsupported<WriteFDiv64Z>;
-defm : SLMWriteResPair<WriteFRcp, [SLM_FPC_RSV0], 5>;
-defm : SLMWriteResPair<WriteFRcpX, [SLM_FPC_RSV0], 5>;
-defm : SLMWriteResPair<WriteFRcpY, [SLM_FPC_RSV0], 5>;
+defm : SLMWriteResPair<WriteFRcp, [SLM_FPC_RSV0], 4>;
+defm : SLMWriteResPair<WriteFRcpX, [SLM_FPC_RSV0], 9, [8], 5, 1>;
+defm : X86WriteResPairUnsupported<WriteFRcpY>;
defm : X86WriteResPairUnsupported<WriteFRcpZ>;
-defm : SLMWriteResPair<WriteFRsqrt, [SLM_FPC_RSV0], 5>;
-defm : SLMWriteResPair<WriteFRsqrtX, [SLM_FPC_RSV0], 5>;
-defm : SLMWriteResPair<WriteFRsqrtY, [SLM_FPC_RSV0], 5>;
+defm : SLMWriteResPair<WriteFRsqrt, [SLM_FPC_RSV0], 4>;
+defm : SLMWriteResPair<WriteFRsqrtX, [SLM_FPC_RSV0], 9, [8], 5, 1>;
+defm : X86WriteResPairUnsupported<WriteFRsqrtY>;
defm : X86WriteResPairUnsupported<WriteFRsqrtZ>;
-defm : SLMWriteResPair<WriteFSqrt, [SLM_FPC_RSV0,SLMFPDivider], 20, [1,20]>;
-defm : SLMWriteResPair<WriteFSqrtX, [SLM_FPC_RSV0,SLMFPDivider], 41, [1,40]>;
-defm : SLMWriteResPair<WriteFSqrtY, [SLM_FPC_RSV0,SLMFPDivider], 41, [1,40]>;
+defm : SLMWriteResPair<WriteFSqrt, [SLM_FPC_RSV0, SLMFPDivider], 20, [1,20]>;
+defm : SLMWriteResPair<WriteFSqrtX, [SLM_FPC_RSV0, SLMFPDivider], 41, [1,40], 5, 1>;
+defm : X86WriteResPairUnsupported<WriteFSqrtY>;
defm : X86WriteResPairUnsupported<WriteFSqrtZ>;
-defm : SLMWriteResPair<WriteFSqrt64, [SLM_FPC_RSV0,SLMFPDivider], 35, [1,35]>;
-defm : SLMWriteResPair<WriteFSqrt64X, [SLM_FPC_RSV0,SLMFPDivider], 71, [1,70]>;
-defm : SLMWriteResPair<WriteFSqrt64Y, [SLM_FPC_RSV0,SLMFPDivider], 71, [1,70]>;
+defm : SLMWriteResPair<WriteFSqrt64, [SLM_FPC_RSV0, SLMFPDivider], 35, [1,35]>;
+defm : SLMWriteResPair<WriteFSqrt64X, [SLM_FPC_RSV0, SLMFPDivider], 71, [1,70], 5, 1>;
+defm : X86WriteResPairUnsupported<WriteFSqrt64Y>;
defm : X86WriteResPairUnsupported<WriteFSqrt64Z>;
defm : SLMWriteResPair<WriteFSqrt80, [SLM_FPC_RSV0,SLMFPDivider], 40, [1,40]>;
-defm : SLMWriteResPair<WriteDPPD, [SLM_FPC_RSV1], 3>;
-defm : SLMWriteResPair<WriteDPPS, [SLM_FPC_RSV1], 3>;
-defm : SLMWriteResPair<WriteDPPSY, [SLM_FPC_RSV1], 3>;
+defm : SLMWriteResPair<WriteDPPD, [SLM_FPC_RSV1], 12, [8], 5, 1>;
+defm : SLMWriteResPair<WriteDPPS, [SLM_FPC_RSV1], 15, [12], 9, 1>;
+defm : X86WriteResPairUnsupported<WriteDPPSY>;
defm : X86WriteResPairUnsupported<WriteDPPSZ>;
defm : SLMWriteResPair<WriteFSign, [SLM_FPC_RSV01], 1>;
defm : SLMWriteResPair<WriteFRnd, [SLM_FPC_RSV1], 3>;
# CHECK-NEXT: 1 5 1.00 cvttss2si %xmm0, %rcx
# CHECK-NEXT: 1 8 1.00 * cvttss2si (%rax), %ecx
# CHECK-NEXT: 1 8 1.00 * cvttss2si (%rax), %rcx
-# CHECK-NEXT: 1 39 39.00 divps %xmm0, %xmm2
-# CHECK-NEXT: 1 42 39.00 * divps (%rax), %xmm2
+# CHECK-NEXT: 6 39 39.00 divps %xmm0, %xmm2
+# CHECK-NEXT: 7 42 39.00 * divps (%rax), %xmm2
# CHECK-NEXT: 1 19 17.00 divss %xmm0, %xmm2
# CHECK-NEXT: 1 22 17.00 * divss (%rax), %xmm2
# CHECK-NEXT: 1 3 1.00 * * U ldmxcsr (%rax)
# CHECK-NEXT: 1 7 1.00 * psadbw (%rax), %mm2
# CHECK-NEXT: 1 1 1.00 pshufw $1, %mm0, %mm2
# CHECK-NEXT: 1 4 1.00 * pshufw $1, (%rax), %mm2
-# CHECK-NEXT: 1 5 1.00 rcpps %xmm0, %xmm2
-# CHECK-NEXT: 1 8 1.00 * rcpps (%rax), %xmm2
-# CHECK-NEXT: 1 5 1.00 rcpss %xmm0, %xmm2
-# CHECK-NEXT: 1 8 1.00 * rcpss (%rax), %xmm2
-# CHECK-NEXT: 1 5 1.00 rsqrtps %xmm0, %xmm2
-# CHECK-NEXT: 1 8 1.00 * rsqrtps (%rax), %xmm2
-# CHECK-NEXT: 1 5 1.00 rsqrtss %xmm0, %xmm2
-# CHECK-NEXT: 1 8 1.00 * rsqrtss (%rax), %xmm2
+# CHECK-NEXT: 5 9 8.00 rcpps %xmm0, %xmm2
+# CHECK-NEXT: 6 12 8.00 * rcpps (%rax), %xmm2
+# CHECK-NEXT: 1 4 1.00 rcpss %xmm0, %xmm2
+# CHECK-NEXT: 1 7 1.00 * rcpss (%rax), %xmm2
+# CHECK-NEXT: 5 9 8.00 rsqrtps %xmm0, %xmm2
+# CHECK-NEXT: 6 12 8.00 * rsqrtps (%rax), %xmm2
+# CHECK-NEXT: 1 4 1.00 rsqrtss %xmm0, %xmm2
+# CHECK-NEXT: 1 7 1.00 * rsqrtss (%rax), %xmm2
# CHECK-NEXT: 1 1 1.00 * * U sfence
# CHECK-NEXT: 1 1 1.00 shufps $1, %xmm0, %xmm2
# CHECK-NEXT: 1 4 1.00 * shufps $1, (%rax), %xmm2
-# CHECK-NEXT: 1 41 40.00 sqrtps %xmm0, %xmm2
-# CHECK-NEXT: 1 44 40.00 * sqrtps (%rax), %xmm2
+# CHECK-NEXT: 5 41 40.00 sqrtps %xmm0, %xmm2
+# CHECK-NEXT: 6 44 40.00 * sqrtps (%rax), %xmm2
# CHECK-NEXT: 1 20 20.00 sqrtss %xmm0, %xmm2
# CHECK-NEXT: 1 23 20.00 * sqrtss (%rax), %xmm2
# CHECK-NEXT: 1 1 1.00 * U stmxcsr (%rax)
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7]
-# CHECK-NEXT: - 232.00 8.00 80.00 37.00 0.50 0.50 67.00
+# CHECK-NEXT: - 232.00 8.00 108.00 37.00 0.50 0.50 67.00
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] Instructions:
# CHECK-NEXT: - - - 1.00 - - - 1.00 psadbw (%rax), %mm2
# CHECK-NEXT: - - - 1.00 - - - - pshufw $1, %mm0, %mm2
# CHECK-NEXT: - - - 1.00 - - - 1.00 pshufw $1, (%rax), %mm2
-# CHECK-NEXT: - - - 1.00 - - - - rcpps %xmm0, %xmm2
-# CHECK-NEXT: - - - 1.00 - - - 1.00 rcpps (%rax), %xmm2
+# CHECK-NEXT: - - - 8.00 - - - - rcpps %xmm0, %xmm2
+# CHECK-NEXT: - - - 8.00 - - - 1.00 rcpps (%rax), %xmm2
# CHECK-NEXT: - - - 1.00 - - - - rcpss %xmm0, %xmm2
# CHECK-NEXT: - - - 1.00 - - - 1.00 rcpss (%rax), %xmm2
-# CHECK-NEXT: - - - 1.00 - - - - rsqrtps %xmm0, %xmm2
-# CHECK-NEXT: - - - 1.00 - - - 1.00 rsqrtps (%rax), %xmm2
+# CHECK-NEXT: - - - 8.00 - - - - rsqrtps %xmm0, %xmm2
+# CHECK-NEXT: - - - 8.00 - - - 1.00 rsqrtps (%rax), %xmm2
# CHECK-NEXT: - - - 1.00 - - - - rsqrtss %xmm0, %xmm2
# CHECK-NEXT: - - - 1.00 - - - 1.00 rsqrtss (%rax), %xmm2
# CHECK-NEXT: - - - - - - - 1.00 sfence
# CHECK-NEXT: 1 5 1.00 cvttsd2si %xmm0, %rcx
# CHECK-NEXT: 1 8 1.00 * cvttsd2si (%rax), %ecx
# CHECK-NEXT: 1 8 1.00 * cvttsd2si (%rax), %rcx
-# CHECK-NEXT: 1 69 69.00 divpd %xmm0, %xmm2
-# CHECK-NEXT: 1 72 69.00 * divpd (%rax), %xmm2
+# CHECK-NEXT: 6 69 69.00 divpd %xmm0, %xmm2
+# CHECK-NEXT: 7 72 69.00 * divpd (%rax), %xmm2
# CHECK-NEXT: 1 34 32.00 divsd %xmm0, %xmm2
# CHECK-NEXT: 1 37 32.00 * divsd (%rax), %xmm2
# CHECK-NEXT: 1 1 1.00 * * U lfence
# CHECK-NEXT: 1 4 1.00 * pxor (%rax), %xmm2
# CHECK-NEXT: 1 1 1.00 shufpd $1, %xmm0, %xmm2
# CHECK-NEXT: 1 4 1.00 * shufpd $1, (%rax), %xmm2
-# CHECK-NEXT: 1 71 70.00 sqrtpd %xmm0, %xmm2
-# CHECK-NEXT: 1 74 70.00 * sqrtpd (%rax), %xmm2
+# CHECK-NEXT: 5 71 70.00 sqrtpd %xmm0, %xmm2
+# CHECK-NEXT: 6 74 70.00 * sqrtpd (%rax), %xmm2
# CHECK-NEXT: 1 35 35.00 sqrtsd %xmm0, %xmm2
# CHECK-NEXT: 1 38 35.00 * sqrtsd (%rax), %xmm2
# CHECK-NEXT: 1 4 2.00 subpd %xmm0, %xmm2
# CHECK-NEXT: 3 7 4.00 * blendvpd %xmm0, (%rax), %xmm2
# CHECK-NEXT: 2 4 4.00 blendvps %xmm0, %xmm0, %xmm2
# CHECK-NEXT: 3 7 4.00 * blendvps %xmm0, (%rax), %xmm2
-# CHECK-NEXT: 1 3 1.00 dppd $22, %xmm0, %xmm2
-# CHECK-NEXT: 1 6 1.00 * dppd $22, (%rax), %xmm2
-# CHECK-NEXT: 1 3 1.00 dpps $22, %xmm0, %xmm2
-# CHECK-NEXT: 1 6 1.00 * dpps $22, (%rax), %xmm2
+# CHECK-NEXT: 5 12 8.00 dppd $22, %xmm0, %xmm2
+# CHECK-NEXT: 6 15 8.00 * dppd $22, (%rax), %xmm2
+# CHECK-NEXT: 9 15 12.00 dpps $22, %xmm0, %xmm2
+# CHECK-NEXT: 10 18 12.00 * dpps $22, (%rax), %xmm2
# CHECK-NEXT: 1 1 1.00 extractps $1, %xmm0, %ecx
# CHECK-NEXT: 2 4 2.00 * extractps $1, %xmm0, (%rax)
# CHECK-NEXT: 1 1 1.00 insertps $1, %xmm0, %xmm2
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7]
-# CHECK-NEXT: - - - 104.00 25.00 - - 54.00
+# CHECK-NEXT: - - - 104.00 61.00 - - 54.00
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] Instructions:
# CHECK-NEXT: - - - 4.00 - - - 1.00 blendvpd %xmm0, (%rax), %xmm2
# CHECK-NEXT: - - - 4.00 - - - - blendvps %xmm0, %xmm0, %xmm2
# CHECK-NEXT: - - - 4.00 - - - 1.00 blendvps %xmm0, (%rax), %xmm2
-# CHECK-NEXT: - - - - 1.00 - - - dppd $22, %xmm0, %xmm2
-# CHECK-NEXT: - - - - 1.00 - - 1.00 dppd $22, (%rax), %xmm2
-# CHECK-NEXT: - - - - 1.00 - - - dpps $22, %xmm0, %xmm2
-# CHECK-NEXT: - - - - 1.00 - - 1.00 dpps $22, (%rax), %xmm2
+# CHECK-NEXT: - - - - 8.00 - - - dppd $22, %xmm0, %xmm2
+# CHECK-NEXT: - - - - 8.00 - - 1.00 dppd $22, (%rax), %xmm2
+# CHECK-NEXT: - - - - 12.00 - - - dpps $22, %xmm0, %xmm2
+# CHECK-NEXT: - - - - 12.00 - - 1.00 dpps $22, (%rax), %xmm2
# CHECK-NEXT: - - - 1.00 - - - - extractps $1, %xmm0, %ecx
# CHECK-NEXT: - - - 1.00 - - - 2.00 extractps $1, %xmm0, (%rax)
# CHECK-NEXT: - - - 1.00 - - - - insertps $1, %xmm0, %xmm2