[X86] Fix SLM FP<->INT throughputs.
authorSimon Pilgrim <llvm-dev@redking.me.uk>
Thu, 22 Jul 2021 18:26:07 +0000 (19:26 +0100)
committerSimon Pilgrim <llvm-dev@redking.me.uk>
Thu, 22 Jul 2021 18:39:04 +0000 (19:39 +0100)
Noticed while trying to clean up the shift costs model for SSE4 targets using the script in D10369 - SLM double-pumps all the 128-bit vector conversion ops and only use FP0 pipe - numbers taken from Intel AOM + Agner.

llvm/lib/Target/X86/X86ScheduleSLM.td
llvm/test/tools/llvm-mca/X86/SLM/resources-sse1.s
llvm/test/tools/llvm-mca/X86/SLM/resources-sse2.s

index 9adc302..123844a 100644 (file)
@@ -284,31 +284,31 @@ defm : X86WriteResPairUnsupported<WriteFShuffle256>;
 defm : X86WriteResPairUnsupported<WriteFVarShuffle256>;
 
 // Conversion between integer and float.
-defm : SLMWriteResPair<WriteCvtSS2I,   [SLM_FPC_RSV01], 4>;
-defm : SLMWriteResPair<WriteCvtPS2I,   [SLM_FPC_RSV01], 4>;
-defm : SLMWriteResPair<WriteCvtPS2IY,  [SLM_FPC_RSV01], 4>;
+defm : SLMWriteResPair<WriteCvtSS2I,   [SLM_FPC_RSV0], 5>;
+defm : SLMWriteResPair<WriteCvtPS2I,   [SLM_FPC_RSV0], 5, [2]>;
+defm : SLMWriteResPair<WriteCvtPS2IY,  [SLM_FPC_RSV0], 5, [2]>;
 defm : X86WriteResPairUnsupported<WriteCvtPS2IZ>;
-defm : SLMWriteResPair<WriteCvtSD2I,   [SLM_FPC_RSV01], 4>;
-defm : SLMWriteResPair<WriteCvtPD2I,   [SLM_FPC_RSV01], 4>;
-defm : SLMWriteResPair<WriteCvtPD2IY,  [SLM_FPC_RSV01], 4>;
+defm : SLMWriteResPair<WriteCvtSD2I,   [SLM_FPC_RSV0], 5>;
+defm : SLMWriteResPair<WriteCvtPD2I,   [SLM_FPC_RSV0], 5, [2]>;
+defm : SLMWriteResPair<WriteCvtPD2IY,  [SLM_FPC_RSV0], 5, [2]>;
 defm : X86WriteResPairUnsupported<WriteCvtPD2IZ>;
 
-defm : SLMWriteResPair<WriteCvtI2SS,   [SLM_FPC_RSV01], 4>;
-defm : SLMWriteResPair<WriteCvtI2PS,   [SLM_FPC_RSV01], 4>;
-defm : SLMWriteResPair<WriteCvtI2PSY,  [SLM_FPC_RSV01], 4>;
+defm : SLMWriteResPair<WriteCvtI2SS,   [SLM_FPC_RSV0], 5, [2]>;
+defm : SLMWriteResPair<WriteCvtI2PS,   [SLM_FPC_RSV0], 5, [2]>;
+defm : SLMWriteResPair<WriteCvtI2PSY,  [SLM_FPC_RSV0], 5, [2]>;
 defm : X86WriteResPairUnsupported<WriteCvtI2PSZ>;
-defm : SLMWriteResPair<WriteCvtI2SD,   [SLM_FPC_RSV01], 4>;
-defm : SLMWriteResPair<WriteCvtI2PD,   [SLM_FPC_RSV01], 4>;
-defm : SLMWriteResPair<WriteCvtI2PDY,  [SLM_FPC_RSV01], 4>;
+defm : SLMWriteResPair<WriteCvtI2SD,   [SLM_FPC_RSV0], 5, [2]>;
+defm : SLMWriteResPair<WriteCvtI2PD,   [SLM_FPC_RSV0], 5, [2]>;
+defm : SLMWriteResPair<WriteCvtI2PDY,  [SLM_FPC_RSV0], 5, [2]>;
 defm : X86WriteResPairUnsupported<WriteCvtI2PDZ>;
 
-defm : SLMWriteResPair<WriteCvtSS2SD,  [SLM_FPC_RSV01], 4>;
-defm : SLMWriteResPair<WriteCvtPS2PD,  [SLM_FPC_RSV01], 4>;
-defm : SLMWriteResPair<WriteCvtPS2PDY, [SLM_FPC_RSV01], 4>;
+defm : SLMWriteResPair<WriteCvtSS2SD,  [SLM_FPC_RSV0], 4, [2]>;
+defm : SLMWriteResPair<WriteCvtPS2PD,  [SLM_FPC_RSV0], 5, [2]>;
+defm : SLMWriteResPair<WriteCvtPS2PDY, [SLM_FPC_RSV0], 5, [2]>;
 defm : X86WriteResPairUnsupported<WriteCvtPS2PDZ>;
-defm : SLMWriteResPair<WriteCvtSD2SS,  [SLM_FPC_RSV01], 4>;
-defm : SLMWriteResPair<WriteCvtPD2PS,  [SLM_FPC_RSV01], 4>;
-defm : SLMWriteResPair<WriteCvtPD2PSY, [SLM_FPC_RSV01], 4>;
+defm : SLMWriteResPair<WriteCvtSD2SS,  [SLM_FPC_RSV0], 4, [2]>;
+defm : SLMWriteResPair<WriteCvtPD2PS,  [SLM_FPC_RSV0], 5, [2]>;
+defm : SLMWriteResPair<WriteCvtPD2PSY, [SLM_FPC_RSV0], 5, [2]>;
 defm : X86WriteResPairUnsupported<WriteCvtPD2PSZ>;
 
 defm : X86WriteResPairUnsupported<WriteCvtPH2PS>;
index 866ba7a..c9a3271 100644 (file)
@@ -208,24 +208,24 @@ xorps       (%rax), %xmm2
 # CHECK-NEXT:  1      6     1.00    *                   cmpeqss        (%rax), %xmm2
 # CHECK-NEXT:  1      3     1.00                        comiss %xmm0, %xmm1
 # CHECK-NEXT:  1      6     1.00    *                   comiss (%rax), %xmm1
-# CHECK-NEXT:  1      4     0.50                        cvtpi2ps       %mm0, %xmm2
-# CHECK-NEXT:  1      7     1.00    *                   cvtpi2ps       (%rax), %xmm2
-# CHECK-NEXT:  1      4     0.50                        cvtps2pi       %xmm0, %mm2
-# CHECK-NEXT:  1      7     1.00    *                   cvtps2pi       (%rax), %mm2
-# CHECK-NEXT:  1      4     0.50                        cvtsi2ss       %ecx, %xmm2
-# CHECK-NEXT:  1      4     0.50                        cvtsi2ss       %rcx, %xmm2
-# CHECK-NEXT:  1      7     1.00    *                   cvtsi2ssl      (%rax), %xmm2
-# CHECK-NEXT:  1      7     1.00    *                   cvtsi2ssl      (%rax), %xmm2
-# CHECK-NEXT:  1      4     0.50                        cvtss2si       %xmm0, %ecx
-# CHECK-NEXT:  1      4     0.50                        cvtss2si       %xmm0, %rcx
-# CHECK-NEXT:  1      7     1.00    *                   cvtss2si       (%rax), %ecx
-# CHECK-NEXT:  1      7     1.00    *                   cvtss2si       (%rax), %rcx
-# CHECK-NEXT:  1      4     0.50                        cvttps2pi      %xmm0, %mm2
-# CHECK-NEXT:  1      7     1.00    *                   cvttps2pi      (%rax), %mm2
-# CHECK-NEXT:  1      4     0.50                        cvttss2si      %xmm0, %ecx
-# CHECK-NEXT:  1      4     0.50                        cvttss2si      %xmm0, %rcx
-# CHECK-NEXT:  1      7     1.00    *                   cvttss2si      (%rax), %ecx
-# CHECK-NEXT:  1      7     1.00    *                   cvttss2si      (%rax), %rcx
+# CHECK-NEXT:  1      5     2.00                        cvtpi2ps       %mm0, %xmm2
+# CHECK-NEXT:  1      8     2.00    *                   cvtpi2ps       (%rax), %xmm2
+# CHECK-NEXT:  1      5     2.00                        cvtps2pi       %xmm0, %mm2
+# CHECK-NEXT:  1      8     2.00    *                   cvtps2pi       (%rax), %mm2
+# CHECK-NEXT:  1      5     2.00                        cvtsi2ss       %ecx, %xmm2
+# CHECK-NEXT:  1      5     2.00                        cvtsi2ss       %rcx, %xmm2
+# CHECK-NEXT:  1      8     2.00    *                   cvtsi2ssl      (%rax), %xmm2
+# CHECK-NEXT:  1      8     2.00    *                   cvtsi2ssl      (%rax), %xmm2
+# CHECK-NEXT:  1      5     1.00                        cvtss2si       %xmm0, %ecx
+# CHECK-NEXT:  1      5     1.00                        cvtss2si       %xmm0, %rcx
+# CHECK-NEXT:  1      8     1.00    *                   cvtss2si       (%rax), %ecx
+# CHECK-NEXT:  1      8     1.00    *                   cvtss2si       (%rax), %rcx
+# CHECK-NEXT:  1      5     2.00                        cvttps2pi      %xmm0, %mm2
+# CHECK-NEXT:  1      8     2.00    *                   cvttps2pi      (%rax), %mm2
+# CHECK-NEXT:  1      5     1.00                        cvttss2si      %xmm0, %ecx
+# CHECK-NEXT:  1      5     1.00                        cvttss2si      %xmm0, %rcx
+# CHECK-NEXT:  1      8     1.00    *                   cvttss2si      (%rax), %ecx
+# CHECK-NEXT:  1      8     1.00    *                   cvttss2si      (%rax), %rcx
 # CHECK-NEXT:  1      39    39.00                       divps  %xmm0, %xmm2
 # CHECK-NEXT:  1      42    39.00   *                   divps  (%rax), %xmm2
 # CHECK-NEXT:  1      19    17.00                       divss  %xmm0, %xmm2
@@ -331,7 +331,7 @@ xorps       (%rax), %xmm2
 
 # CHECK:      Resource pressure per iteration:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]
-# CHECK-NEXT:  -     232.00 8.00   61.00  46.00  0.50   0.50   67.00
+# CHECK-NEXT:  -     232.00 8.00   80.00  37.00  0.50   0.50   67.00
 
 # CHECK:      Resource pressure by instruction:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    Instructions:
@@ -349,24 +349,24 @@ xorps       (%rax), %xmm2
 # CHECK-NEXT:  -      -      -      -     1.00    -      -     1.00   cmpeqss  (%rax), %xmm2
 # CHECK-NEXT:  -      -      -      -     1.00    -      -      -     comiss   %xmm0, %xmm1
 # CHECK-NEXT:  -      -      -      -     1.00    -      -     1.00   comiss   (%rax), %xmm1
-# CHECK-NEXT:  -      -      -     0.50   0.50    -      -      -     cvtpi2ps %mm0, %xmm2
-# CHECK-NEXT:  -      -      -     0.50   0.50    -      -     1.00   cvtpi2ps (%rax), %xmm2
-# CHECK-NEXT:  -      -      -     0.50   0.50    -      -      -     cvtps2pi %xmm0, %mm2
-# CHECK-NEXT:  -      -      -     0.50   0.50    -      -     1.00   cvtps2pi (%rax), %mm2
-# CHECK-NEXT:  -      -      -     0.50   0.50    -      -      -     cvtsi2ss %ecx, %xmm2
-# CHECK-NEXT:  -      -      -     0.50   0.50    -      -      -     cvtsi2ss %rcx, %xmm2
-# CHECK-NEXT:  -      -      -     0.50   0.50    -      -     1.00   cvtsi2ssl        (%rax), %xmm2
-# CHECK-NEXT:  -      -      -     0.50   0.50    -      -     1.00   cvtsi2ssl        (%rax), %xmm2
-# CHECK-NEXT:  -      -      -     0.50   0.50    -      -      -     cvtss2si %xmm0, %ecx
-# CHECK-NEXT:  -      -      -     0.50   0.50    -      -      -     cvtss2si %xmm0, %rcx
-# CHECK-NEXT:  -      -      -     0.50   0.50    -      -     1.00   cvtss2si (%rax), %ecx
-# CHECK-NEXT:  -      -      -     0.50   0.50    -      -     1.00   cvtss2si (%rax), %rcx
-# CHECK-NEXT:  -      -      -     0.50   0.50    -      -      -     cvttps2pi        %xmm0, %mm2
-# CHECK-NEXT:  -      -      -     0.50   0.50    -      -     1.00   cvttps2pi        (%rax), %mm2
-# CHECK-NEXT:  -      -      -     0.50   0.50    -      -      -     cvttss2si        %xmm0, %ecx
-# CHECK-NEXT:  -      -      -     0.50   0.50    -      -      -     cvttss2si        %xmm0, %rcx
-# CHECK-NEXT:  -      -      -     0.50   0.50    -      -     1.00   cvttss2si        (%rax), %ecx
-# CHECK-NEXT:  -      -      -     0.50   0.50    -      -     1.00   cvttss2si        (%rax), %rcx
+# CHECK-NEXT:  -      -      -     2.00    -      -      -      -     cvtpi2ps %mm0, %xmm2
+# CHECK-NEXT:  -      -      -     2.00    -      -      -     1.00   cvtpi2ps (%rax), %xmm2
+# CHECK-NEXT:  -      -      -     2.00    -      -      -      -     cvtps2pi %xmm0, %mm2
+# CHECK-NEXT:  -      -      -     2.00    -      -      -     1.00   cvtps2pi (%rax), %mm2
+# CHECK-NEXT:  -      -      -     2.00    -      -      -      -     cvtsi2ss %ecx, %xmm2
+# CHECK-NEXT:  -      -      -     2.00    -      -      -      -     cvtsi2ss %rcx, %xmm2
+# CHECK-NEXT:  -      -      -     2.00    -      -      -     1.00   cvtsi2ssl        (%rax), %xmm2
+# CHECK-NEXT:  -      -      -     2.00    -      -      -     1.00   cvtsi2ssl        (%rax), %xmm2
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -     cvtss2si %xmm0, %ecx
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -     cvtss2si %xmm0, %rcx
+# CHECK-NEXT:  -      -      -     1.00    -      -      -     1.00   cvtss2si (%rax), %ecx
+# CHECK-NEXT:  -      -      -     1.00    -      -      -     1.00   cvtss2si (%rax), %rcx
+# CHECK-NEXT:  -      -      -     2.00    -      -      -      -     cvttps2pi        %xmm0, %mm2
+# CHECK-NEXT:  -      -      -     2.00    -      -      -     1.00   cvttps2pi        (%rax), %mm2
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -     cvttss2si        %xmm0, %ecx
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -     cvttss2si        %xmm0, %rcx
+# CHECK-NEXT:  -      -      -     1.00    -      -      -     1.00   cvttss2si        (%rax), %ecx
+# CHECK-NEXT:  -      -      -     1.00    -      -      -     1.00   cvttss2si        (%rax), %rcx
 # CHECK-NEXT:  -     39.00   -     1.00    -      -      -      -     divps    %xmm0, %xmm2
 # CHECK-NEXT:  -     39.00   -     1.00    -      -      -     1.00   divps    (%rax), %xmm2
 # CHECK-NEXT:  -     17.00   -     1.00    -      -      -      -     divss    %xmm0, %xmm2
index 8759dfc..8c5fff1 100644 (file)
@@ -422,44 +422,44 @@ xorpd       (%rax), %xmm2
 # CHECK-NEXT:  1      6     1.00    *                   cmpeqsd        (%rax), %xmm2
 # CHECK-NEXT:  1      3     1.00                        comisd %xmm0, %xmm1
 # CHECK-NEXT:  1      6     1.00    *                   comisd (%rax), %xmm1
-# CHECK-NEXT:  1      4     0.50                        cvtdq2pd       %xmm0, %xmm2
-# CHECK-NEXT:  1      7     1.00    *                   cvtdq2pd       (%rax), %xmm2
-# CHECK-NEXT:  1      4     0.50                        cvtdq2ps       %xmm0, %xmm2
-# CHECK-NEXT:  1      7     1.00    *                   cvtdq2ps       (%rax), %xmm2
-# CHECK-NEXT:  1      4     0.50                        cvtpd2dq       %xmm0, %xmm2
-# CHECK-NEXT:  1      7     1.00    *                   cvtpd2dq       (%rax), %xmm2
-# CHECK-NEXT:  1      4     0.50                        cvtpd2pi       %xmm0, %mm2
-# CHECK-NEXT:  1      7     1.00    *                   cvtpd2pi       (%rax), %mm2
-# CHECK-NEXT:  1      4     0.50                        cvtpd2ps       %xmm0, %xmm2
-# CHECK-NEXT:  1      7     1.00    *                   cvtpd2ps       (%rax), %xmm2
-# CHECK-NEXT:  1      4     0.50                        cvtpi2pd       %mm0, %xmm2
-# CHECK-NEXT:  1      7     1.00    *                   cvtpi2pd       (%rax), %xmm2
-# CHECK-NEXT:  1      4     0.50                        cvtps2dq       %xmm0, %xmm2
-# CHECK-NEXT:  1      7     1.00    *                   cvtps2dq       (%rax), %xmm2
-# CHECK-NEXT:  1      4     0.50                        cvtps2pd       %xmm0, %xmm2
-# CHECK-NEXT:  1      7     1.00    *                   cvtps2pd       (%rax), %xmm2
-# CHECK-NEXT:  1      4     0.50                        cvtsd2si       %xmm0, %ecx
-# CHECK-NEXT:  1      4     0.50                        cvtsd2si       %xmm0, %rcx
-# CHECK-NEXT:  1      7     1.00    *                   cvtsd2si       (%rax), %ecx
-# CHECK-NEXT:  1      7     1.00    *                   cvtsd2si       (%rax), %rcx
-# CHECK-NEXT:  1      4     0.50                        cvtsd2ss       %xmm0, %xmm2
-# CHECK-NEXT:  1      7     1.00    *                   cvtsd2ss       (%rax), %xmm2
-# CHECK-NEXT:  1      4     0.50                        cvtsi2sd       %ecx, %xmm2
-# CHECK-NEXT:  1      4     0.50                        cvtsi2sd       %rcx, %xmm2
-# CHECK-NEXT:  1      7     1.00    *                   cvtsi2sdl      (%rax), %xmm2
-# CHECK-NEXT:  1      7     1.00    *                   cvtsi2sdl      (%rax), %xmm2
-# CHECK-NEXT:  1      4     0.50                        cvtss2sd       %xmm0, %xmm2
-# CHECK-NEXT:  1      7     1.00    *                   cvtss2sd       (%rax), %xmm2
-# CHECK-NEXT:  1      4     0.50                        cvttpd2dq      %xmm0, %xmm2
-# CHECK-NEXT:  1      7     1.00    *                   cvttpd2dq      (%rax), %xmm2
-# CHECK-NEXT:  1      4     0.50                        cvttpd2pi      %xmm0, %mm2
-# CHECK-NEXT:  1      7     1.00    *                   cvttpd2pi      (%rax), %mm2
-# CHECK-NEXT:  1      4     0.50                        cvttps2dq      %xmm0, %xmm2
-# CHECK-NEXT:  1      7     1.00    *                   cvttps2dq      (%rax), %xmm2
-# CHECK-NEXT:  1      4     0.50                        cvttsd2si      %xmm0, %ecx
-# CHECK-NEXT:  1      4     0.50                        cvttsd2si      %xmm0, %rcx
-# CHECK-NEXT:  1      7     1.00    *                   cvttsd2si      (%rax), %ecx
-# CHECK-NEXT:  1      7     1.00    *                   cvttsd2si      (%rax), %rcx
+# CHECK-NEXT:  1      5     2.00                        cvtdq2pd       %xmm0, %xmm2
+# CHECK-NEXT:  1      8     2.00    *                   cvtdq2pd       (%rax), %xmm2
+# CHECK-NEXT:  1      5     2.00                        cvtdq2ps       %xmm0, %xmm2
+# CHECK-NEXT:  1      8     2.00    *                   cvtdq2ps       (%rax), %xmm2
+# CHECK-NEXT:  1      5     2.00                        cvtpd2dq       %xmm0, %xmm2
+# CHECK-NEXT:  1      8     2.00    *                   cvtpd2dq       (%rax), %xmm2
+# CHECK-NEXT:  1      5     2.00                        cvtpd2pi       %xmm0, %mm2
+# CHECK-NEXT:  1      8     2.00    *                   cvtpd2pi       (%rax), %mm2
+# CHECK-NEXT:  1      5     2.00                        cvtpd2ps       %xmm0, %xmm2
+# CHECK-NEXT:  1      8     2.00    *                   cvtpd2ps       (%rax), %xmm2
+# CHECK-NEXT:  1      5     2.00                        cvtpi2pd       %mm0, %xmm2
+# CHECK-NEXT:  1      8     2.00    *                   cvtpi2pd       (%rax), %xmm2
+# CHECK-NEXT:  1      5     2.00                        cvtps2dq       %xmm0, %xmm2
+# CHECK-NEXT:  1      8     2.00    *                   cvtps2dq       (%rax), %xmm2
+# CHECK-NEXT:  1      5     2.00                        cvtps2pd       %xmm0, %xmm2
+# CHECK-NEXT:  1      8     2.00    *                   cvtps2pd       (%rax), %xmm2
+# CHECK-NEXT:  1      5     1.00                        cvtsd2si       %xmm0, %ecx
+# CHECK-NEXT:  1      5     1.00                        cvtsd2si       %xmm0, %rcx
+# CHECK-NEXT:  1      8     1.00    *                   cvtsd2si       (%rax), %ecx
+# CHECK-NEXT:  1      8     1.00    *                   cvtsd2si       (%rax), %rcx
+# CHECK-NEXT:  1      4     2.00                        cvtsd2ss       %xmm0, %xmm2
+# CHECK-NEXT:  1      7     2.00    *                   cvtsd2ss       (%rax), %xmm2
+# CHECK-NEXT:  1      5     2.00                        cvtsi2sd       %ecx, %xmm2
+# CHECK-NEXT:  1      5     2.00                        cvtsi2sd       %rcx, %xmm2
+# CHECK-NEXT:  1      8     2.00    *                   cvtsi2sdl      (%rax), %xmm2
+# CHECK-NEXT:  1      8     2.00    *                   cvtsi2sdl      (%rax), %xmm2
+# CHECK-NEXT:  1      4     2.00                        cvtss2sd       %xmm0, %xmm2
+# CHECK-NEXT:  1      7     2.00    *                   cvtss2sd       (%rax), %xmm2
+# CHECK-NEXT:  1      5     2.00                        cvttpd2dq      %xmm0, %xmm2
+# CHECK-NEXT:  1      8     2.00    *                   cvttpd2dq      (%rax), %xmm2
+# CHECK-NEXT:  1      5     2.00                        cvttpd2pi      %xmm0, %mm2
+# CHECK-NEXT:  1      8     2.00    *                   cvttpd2pi      (%rax), %mm2
+# CHECK-NEXT:  1      5     2.00                        cvttps2dq      %xmm0, %xmm2
+# CHECK-NEXT:  1      8     2.00    *                   cvttps2dq      (%rax), %xmm2
+# CHECK-NEXT:  1      5     1.00                        cvttsd2si      %xmm0, %ecx
+# CHECK-NEXT:  1      5     1.00                        cvttsd2si      %xmm0, %rcx
+# CHECK-NEXT:  1      8     1.00    *                   cvttsd2si      (%rax), %ecx
+# CHECK-NEXT:  1      8     1.00    *                   cvttsd2si      (%rax), %rcx
 # CHECK-NEXT:  1      69    69.00                       divpd  %xmm0, %xmm2
 # CHECK-NEXT:  1      72    69.00   *                   divpd  (%rax), %xmm2
 # CHECK-NEXT:  1      34    32.00                       divsd  %xmm0, %xmm2
@@ -687,7 +687,7 @@ xorpd       (%rax), %xmm2
 
 # CHECK:      Resource pressure per iteration:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]
-# CHECK-NEXT:  -     412.00 12.00  184.50 96.50  3.00   3.00   134.00
+# CHECK-NEXT:  -     412.00 12.00  233.50 77.50  3.00   3.00   134.00
 
 # CHECK:      Resource pressure by instruction:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    Instructions:
@@ -706,44 +706,44 @@ xorpd       (%rax), %xmm2
 # CHECK-NEXT:  -      -      -      -     1.00    -      -     1.00   cmpeqsd  (%rax), %xmm2
 # CHECK-NEXT:  -      -      -      -     1.00    -      -      -     comisd   %xmm0, %xmm1
 # CHECK-NEXT:  -      -      -      -     1.00    -      -     1.00   comisd   (%rax), %xmm1
-# CHECK-NEXT:  -      -      -     0.50   0.50    -      -      -     cvtdq2pd %xmm0, %xmm2
-# CHECK-NEXT:  -      -      -     0.50   0.50    -      -     1.00   cvtdq2pd (%rax), %xmm2
-# CHECK-NEXT:  -      -      -     0.50   0.50    -      -      -     cvtdq2ps %xmm0, %xmm2
-# CHECK-NEXT:  -      -      -     0.50   0.50    -      -     1.00   cvtdq2ps (%rax), %xmm2
-# CHECK-NEXT:  -      -      -     0.50   0.50    -      -      -     cvtpd2dq %xmm0, %xmm2
-# CHECK-NEXT:  -      -      -     0.50   0.50    -      -     1.00   cvtpd2dq (%rax), %xmm2
-# CHECK-NEXT:  -      -      -     0.50   0.50    -      -      -     cvtpd2pi %xmm0, %mm2
-# CHECK-NEXT:  -      -      -     0.50   0.50    -      -     1.00   cvtpd2pi (%rax), %mm2
-# CHECK-NEXT:  -      -      -     0.50   0.50    -      -      -     cvtpd2ps %xmm0, %xmm2
-# CHECK-NEXT:  -      -      -     0.50   0.50    -      -     1.00   cvtpd2ps (%rax), %xmm2
-# CHECK-NEXT:  -      -      -     0.50   0.50    -      -      -     cvtpi2pd %mm0, %xmm2
-# CHECK-NEXT:  -      -      -     0.50   0.50    -      -     1.00   cvtpi2pd (%rax), %xmm2
-# CHECK-NEXT:  -      -      -     0.50   0.50    -      -      -     cvtps2dq %xmm0, %xmm2
-# CHECK-NEXT:  -      -      -     0.50   0.50    -      -     1.00   cvtps2dq (%rax), %xmm2
-# CHECK-NEXT:  -      -      -     0.50   0.50    -      -      -     cvtps2pd %xmm0, %xmm2
-# CHECK-NEXT:  -      -      -     0.50   0.50    -      -     1.00   cvtps2pd (%rax), %xmm2
-# CHECK-NEXT:  -      -      -     0.50   0.50    -      -      -     cvtsd2si %xmm0, %ecx
-# CHECK-NEXT:  -      -      -     0.50   0.50    -      -      -     cvtsd2si %xmm0, %rcx
-# CHECK-NEXT:  -      -      -     0.50   0.50    -      -     1.00   cvtsd2si (%rax), %ecx
-# CHECK-NEXT:  -      -      -     0.50   0.50    -      -     1.00   cvtsd2si (%rax), %rcx
-# CHECK-NEXT:  -      -      -     0.50   0.50    -      -      -     cvtsd2ss %xmm0, %xmm2
-# CHECK-NEXT:  -      -      -     0.50   0.50    -      -     1.00   cvtsd2ss (%rax), %xmm2
-# CHECK-NEXT:  -      -      -     0.50   0.50    -      -      -     cvtsi2sd %ecx, %xmm2
-# CHECK-NEXT:  -      -      -     0.50   0.50    -      -      -     cvtsi2sd %rcx, %xmm2
-# CHECK-NEXT:  -      -      -     0.50   0.50    -      -     1.00   cvtsi2sdl        (%rax), %xmm2
-# CHECK-NEXT:  -      -      -     0.50   0.50    -      -     1.00   cvtsi2sdl        (%rax), %xmm2
-# CHECK-NEXT:  -      -      -     0.50   0.50    -      -      -     cvtss2sd %xmm0, %xmm2
-# CHECK-NEXT:  -      -      -     0.50   0.50    -      -     1.00   cvtss2sd (%rax), %xmm2
-# CHECK-NEXT:  -      -      -     0.50   0.50    -      -      -     cvttpd2dq        %xmm0, %xmm2
-# CHECK-NEXT:  -      -      -     0.50   0.50    -      -     1.00   cvttpd2dq        (%rax), %xmm2
-# CHECK-NEXT:  -      -      -     0.50   0.50    -      -      -     cvttpd2pi        %xmm0, %mm2
-# CHECK-NEXT:  -      -      -     0.50   0.50    -      -     1.00   cvttpd2pi        (%rax), %mm2
-# CHECK-NEXT:  -      -      -     0.50   0.50    -      -      -     cvttps2dq        %xmm0, %xmm2
-# CHECK-NEXT:  -      -      -     0.50   0.50    -      -     1.00   cvttps2dq        (%rax), %xmm2
-# CHECK-NEXT:  -      -      -     0.50   0.50    -      -      -     cvttsd2si        %xmm0, %ecx
-# CHECK-NEXT:  -      -      -     0.50   0.50    -      -      -     cvttsd2si        %xmm0, %rcx
-# CHECK-NEXT:  -      -      -     0.50   0.50    -      -     1.00   cvttsd2si        (%rax), %ecx
-# CHECK-NEXT:  -      -      -     0.50   0.50    -      -     1.00   cvttsd2si        (%rax), %rcx
+# CHECK-NEXT:  -      -      -     2.00    -      -      -      -     cvtdq2pd %xmm0, %xmm2
+# CHECK-NEXT:  -      -      -     2.00    -      -      -     1.00   cvtdq2pd (%rax), %xmm2
+# CHECK-NEXT:  -      -      -     2.00    -      -      -      -     cvtdq2ps %xmm0, %xmm2
+# CHECK-NEXT:  -      -      -     2.00    -      -      -     1.00   cvtdq2ps (%rax), %xmm2
+# CHECK-NEXT:  -      -      -     2.00    -      -      -      -     cvtpd2dq %xmm0, %xmm2
+# CHECK-NEXT:  -      -      -     2.00    -      -      -     1.00   cvtpd2dq (%rax), %xmm2
+# CHECK-NEXT:  -      -      -     2.00    -      -      -      -     cvtpd2pi %xmm0, %mm2
+# CHECK-NEXT:  -      -      -     2.00    -      -      -     1.00   cvtpd2pi (%rax), %mm2
+# CHECK-NEXT:  -      -      -     2.00    -      -      -      -     cvtpd2ps %xmm0, %xmm2
+# CHECK-NEXT:  -      -      -     2.00    -      -      -     1.00   cvtpd2ps (%rax), %xmm2
+# CHECK-NEXT:  -      -      -     2.00    -      -      -      -     cvtpi2pd %mm0, %xmm2
+# CHECK-NEXT:  -      -      -     2.00    -      -      -     1.00   cvtpi2pd (%rax), %xmm2
+# CHECK-NEXT:  -      -      -     2.00    -      -      -      -     cvtps2dq %xmm0, %xmm2
+# CHECK-NEXT:  -      -      -     2.00    -      -      -     1.00   cvtps2dq (%rax), %xmm2
+# CHECK-NEXT:  -      -      -     2.00    -      -      -      -     cvtps2pd %xmm0, %xmm2
+# CHECK-NEXT:  -      -      -     2.00    -      -      -     1.00   cvtps2pd (%rax), %xmm2
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -     cvtsd2si %xmm0, %ecx
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -     cvtsd2si %xmm0, %rcx
+# CHECK-NEXT:  -      -      -     1.00    -      -      -     1.00   cvtsd2si (%rax), %ecx
+# CHECK-NEXT:  -      -      -     1.00    -      -      -     1.00   cvtsd2si (%rax), %rcx
+# CHECK-NEXT:  -      -      -     2.00    -      -      -      -     cvtsd2ss %xmm0, %xmm2
+# CHECK-NEXT:  -      -      -     2.00    -      -      -     1.00   cvtsd2ss (%rax), %xmm2
+# CHECK-NEXT:  -      -      -     2.00    -      -      -      -     cvtsi2sd %ecx, %xmm2
+# CHECK-NEXT:  -      -      -     2.00    -      -      -      -     cvtsi2sd %rcx, %xmm2
+# CHECK-NEXT:  -      -      -     2.00    -      -      -     1.00   cvtsi2sdl        (%rax), %xmm2
+# CHECK-NEXT:  -      -      -     2.00    -      -      -     1.00   cvtsi2sdl        (%rax), %xmm2
+# CHECK-NEXT:  -      -      -     2.00    -      -      -      -     cvtss2sd %xmm0, %xmm2
+# CHECK-NEXT:  -      -      -     2.00    -      -      -     1.00   cvtss2sd (%rax), %xmm2
+# CHECK-NEXT:  -      -      -     2.00    -      -      -      -     cvttpd2dq        %xmm0, %xmm2
+# CHECK-NEXT:  -      -      -     2.00    -      -      -     1.00   cvttpd2dq        (%rax), %xmm2
+# CHECK-NEXT:  -      -      -     2.00    -      -      -      -     cvttpd2pi        %xmm0, %mm2
+# CHECK-NEXT:  -      -      -     2.00    -      -      -     1.00   cvttpd2pi        (%rax), %mm2
+# CHECK-NEXT:  -      -      -     2.00    -      -      -      -     cvttps2dq        %xmm0, %xmm2
+# CHECK-NEXT:  -      -      -     2.00    -      -      -     1.00   cvttps2dq        (%rax), %xmm2
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -     cvttsd2si        %xmm0, %ecx
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -     cvttsd2si        %xmm0, %rcx
+# CHECK-NEXT:  -      -      -     1.00    -      -      -     1.00   cvttsd2si        (%rax), %ecx
+# CHECK-NEXT:  -      -      -     1.00    -      -      -     1.00   cvttsd2si        (%rax), %rcx
 # CHECK-NEXT:  -     69.00   -     1.00    -      -      -      -     divpd    %xmm0, %xmm2
 # CHECK-NEXT:  -     69.00   -     1.00    -      -      -     1.00   divpd    (%rax), %xmm2
 # CHECK-NEXT:  -     32.00   -     1.00    -      -      -      -     divsd    %xmm0, %xmm2