[X86] Replace unnecessary SKL CVTSI2SS/CVTSI2SD overrides with better base class...
authorSimon Pilgrim <llvm-dev@redking.me.uk>
Sat, 12 Nov 2022 14:29:45 +0000 (14:29 +0000)
committerSimon Pilgrim <llvm-dev@redking.me.uk>
Sat, 12 Nov 2022 14:29:45 +0000 (14:29 +0000)
The folded patterns were missing entirely - confirmed by both Agner + uops.info

llvm/lib/Target/X86/X86SchedSkylakeClient.td
llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-avx1.s
llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-sse1.s
llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-sse2.s

index accd75a..a054fb0 100644 (file)
@@ -451,13 +451,15 @@ defm : SKLWriteResPair<WriteCvtPD2I,   [SKLPort5,SKLPort01], 5, [1,1], 2, 6>;
 defm : SKLWriteResPair<WriteCvtPD2IY,  [SKLPort5,SKLPort01], 7, [1,1], 2, 6>;
 defm : X86WriteResPairUnsupported<WriteCvtPD2IZ>;
 
-defm : SKLWriteResPair<WriteCvtI2SS,   [SKLPort1], 4>;
+defm : X86WriteRes<WriteCvtI2SS,      [SKLPort5,SKLPort01],  5, [1,1], 2>;
+defm : X86WriteRes<WriteCvtI2SSLd,   [SKLPort23,SKLPort01], 10, [1,1], 2>;
 defm : SKLWriteResPair<WriteCvtI2PS,   [SKLPort01], 4, [1], 1, 6>;
 defm : SKLWriteResPair<WriteCvtI2PSY,  [SKLPort01], 4, [1], 1, 7>;
 defm : X86WriteResPairUnsupported<WriteCvtI2PSZ>;
-defm : SKLWriteResPair<WriteCvtI2SD,   [SKLPort1], 4>;
-defm : SKLWriteResPair<WriteCvtI2PD,   [SKLPort0,SKLPort5], 5, [1,1], 2, 6>;
-defm : SKLWriteResPair<WriteCvtI2PDY,  [SKLPort0,SKLPort5], 7, [1,1], 2, 6>;
+defm : X86WriteRes<WriteCvtI2SD,      [SKLPort5,SKLPort01],  5, [1,1], 2>;
+defm : X86WriteRes<WriteCvtI2SDLd,   [SKLPort23,SKLPort01], 10, [1,1], 2>;
+defm : SKLWriteResPair<WriteCvtI2PD,   [SKLPort0,SKLPort5],  5, [1,1], 2, 6>;
+defm : SKLWriteResPair<WriteCvtI2PDY,  [SKLPort0,SKLPort5],  7, [1,1], 2, 6>;
 defm : X86WriteResPairUnsupported<WriteCvtI2PDZ>;
 
 defm : X86WriteRes<WriteCvtSS2SD,     [SKLPort5,SKLPort01],  5, [1,1], 2>;
@@ -928,10 +930,7 @@ def SKLWriteResGroup60 : SchedWriteRes<[SKLPort5,SKLPort01]> {
   let NumMicroOps = 2;
   let ResourceCycles = [1,1];
 }
-def: InstRW<[SKLWriteResGroup60], (instregex "MMX_CVT(T?)PS2PIrr",
-                                             "(V?)CVTSI642SDrr",
-                                             "(V?)CVTSI2SDrr",
-                                             "(V?)CVTSI2SSrr")>;
+def: InstRW<[SKLWriteResGroup60], (instregex "MMX_CVT(T?)PS2PIrr")>;
 
 def SKLWriteResGroup61 : SchedWriteRes<[SKLPort1,SKLPort6,SKLPort06]> {
   let Latency = 5;
index 780d9f5..b446886 100644 (file)
@@ -1146,12 +1146,12 @@ vzeroupper
 # CHECK-NEXT:  3      10    1.00    *                   vcvtsd2ss      (%rax), %xmm1, %xmm2
 # CHECK-NEXT:  2      5     1.00                        vcvtsi2sd      %ecx, %xmm0, %xmm2
 # CHECK-NEXT:  2      5     1.00                        vcvtsi2sd      %rcx, %xmm0, %xmm2
-# CHECK-NEXT:  2      9     1.00    *                   vcvtsi2sdl     (%rax), %xmm0, %xmm2
-# CHECK-NEXT:  2      9     1.00    *                   vcvtsi2sdq     (%rax), %xmm0, %xmm2
+# CHECK-NEXT:  2      10    0.50    *                   vcvtsi2sdl     (%rax), %xmm0, %xmm2
+# CHECK-NEXT:  2      10    0.50    *                   vcvtsi2sdq     (%rax), %xmm0, %xmm2
 # CHECK-NEXT:  2      5     1.00                        vcvtsi2ss      %ecx, %xmm0, %xmm2
 # CHECK-NEXT:  3      6     2.00                        vcvtsi2ss      %rcx, %xmm0, %xmm2
-# CHECK-NEXT:  2      9     1.00    *                   vcvtsi2ssl     (%rax), %xmm0, %xmm2
-# CHECK-NEXT:  2      9     1.00    *                   vcvtsi2ssq     (%rax), %xmm0, %xmm2
+# CHECK-NEXT:  2      10    0.50    *                   vcvtsi2ssl     (%rax), %xmm0, %xmm2
+# CHECK-NEXT:  2      10    0.50    *                   vcvtsi2ssq     (%rax), %xmm0, %xmm2
 # CHECK-NEXT:  2      5     1.00                        vcvtss2sd      %xmm0, %xmm1, %xmm2
 # CHECK-NEXT:  2      10    0.50    *                   vcvtss2sd      (%rax), %xmm1, %xmm2
 # CHECK-NEXT:  2      6     1.00                        vcvtss2si      %xmm0, %ecx
@@ -1736,7 +1736,7 @@ vzeroupper
 
 # CHECK:      Resource pressure per iteration:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]
-# CHECK-NEXT:  -     126.00 336.58 201.58 173.17 173.17 34.00  326.58 5.25   12.67
+# CHECK-NEXT:  -     126.00 338.58 199.58 173.17 173.17 34.00  326.58 5.25   12.67
 
 # CHECK:      Resource pressure by instruction:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    Instructions:
@@ -1856,12 +1856,12 @@ vzeroupper
 # CHECK-NEXT:  -      -     0.50   0.50   0.50   0.50    -     1.00    -      -     vcvtsd2ss  (%rax), %xmm1, %xmm2
 # CHECK-NEXT:  -      -     0.50   0.50    -      -      -     1.00    -      -     vcvtsi2sd  %ecx, %xmm0, %xmm2
 # CHECK-NEXT:  -      -     0.50   0.50    -      -      -     1.00    -      -     vcvtsi2sd  %rcx, %xmm0, %xmm2
-# CHECK-NEXT:  -      -      -     1.00   0.50   0.50    -      -      -      -     vcvtsi2sdl (%rax), %xmm0, %xmm2
-# CHECK-NEXT:  -      -      -     1.00   0.50   0.50    -      -      -      -     vcvtsi2sdq (%rax), %xmm0, %xmm2
+# CHECK-NEXT:  -      -     0.50   0.50   0.50   0.50    -      -      -      -     vcvtsi2sdl (%rax), %xmm0, %xmm2
+# CHECK-NEXT:  -      -     0.50   0.50   0.50   0.50    -      -      -      -     vcvtsi2sdq (%rax), %xmm0, %xmm2
 # CHECK-NEXT:  -      -     0.50   0.50    -      -      -     1.00    -      -     vcvtsi2ss  %ecx, %xmm0, %xmm2
 # CHECK-NEXT:  -      -     0.50   0.50    -      -      -     2.00    -      -     vcvtsi2ss  %rcx, %xmm0, %xmm2
-# CHECK-NEXT:  -      -      -     1.00   0.50   0.50    -      -      -      -     vcvtsi2ssl (%rax), %xmm0, %xmm2
-# CHECK-NEXT:  -      -      -     1.00   0.50   0.50    -      -      -      -     vcvtsi2ssq (%rax), %xmm0, %xmm2
+# CHECK-NEXT:  -      -     0.50   0.50   0.50   0.50    -      -      -      -     vcvtsi2ssl (%rax), %xmm0, %xmm2
+# CHECK-NEXT:  -      -     0.50   0.50   0.50   0.50    -      -      -      -     vcvtsi2ssq (%rax), %xmm0, %xmm2
 # CHECK-NEXT:  -      -     0.50   0.50    -      -      -     1.00    -      -     vcvtss2sd  %xmm0, %xmm1, %xmm2
 # CHECK-NEXT:  -      -     0.50   0.50   0.50   0.50    -      -      -      -     vcvtss2sd  (%rax), %xmm1, %xmm2
 # CHECK-NEXT:  -      -     1.50   0.50    -      -      -      -      -      -     vcvtss2si  %xmm0, %ecx
index 007eb96..b066ce3 100644 (file)
@@ -214,8 +214,8 @@ xorps       (%rax), %xmm2
 # CHECK-NEXT:  2      9     0.50    *                   cvtps2pi       (%rax), %mm2
 # CHECK-NEXT:  2      5     1.00                        cvtsi2ss       %ecx, %xmm2
 # CHECK-NEXT:  3      6     2.00                        cvtsi2ss       %rcx, %xmm2
-# CHECK-NEXT:  2      9     1.00    *                   cvtsi2ssl      (%rax), %xmm2
-# CHECK-NEXT:  2      9     1.00    *                   cvtsi2ssq      (%rax), %xmm2
+# CHECK-NEXT:  2      10    0.50    *                   cvtsi2ssl      (%rax), %xmm2
+# CHECK-NEXT:  2      10    0.50    *                   cvtsi2ssq      (%rax), %xmm2
 # CHECK-NEXT:  2      6     1.00                        cvtss2si       %xmm0, %ecx
 # CHECK-NEXT:  3      7     1.00                        cvtss2si       %xmm0, %rcx
 # CHECK-NEXT:  3      11    1.00    *                   cvtss2si       (%rax), %ecx
@@ -333,7 +333,7 @@ xorps       (%rax), %xmm2
 
 # CHECK:      Resource pressure per iteration:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]
-# CHECK-NEXT:  -     24.00  71.83  24.83  32.00  32.00  8.00   31.83  0.50   3.00
+# CHECK-NEXT:  -     24.00  72.83  23.83  32.00  32.00  8.00   31.83  0.50   3.00
 
 # CHECK:      Resource pressure by instruction:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    Instructions:
@@ -357,8 +357,8 @@ xorps       (%rax), %xmm2
 # CHECK-NEXT:  -      -     0.50   0.50   0.50   0.50    -      -      -      -     cvtps2pi   (%rax), %mm2
 # CHECK-NEXT:  -      -     0.50   0.50    -      -      -     1.00    -      -     cvtsi2ss   %ecx, %xmm2
 # CHECK-NEXT:  -      -     0.50   0.50    -      -      -     2.00    -      -     cvtsi2ss   %rcx, %xmm2
-# CHECK-NEXT:  -      -      -     1.00   0.50   0.50    -      -      -      -     cvtsi2ssl  (%rax), %xmm2
-# CHECK-NEXT:  -      -      -     1.00   0.50   0.50    -      -      -      -     cvtsi2ssq  (%rax), %xmm2
+# CHECK-NEXT:  -      -     0.50   0.50   0.50   0.50    -      -      -      -     cvtsi2ssl  (%rax), %xmm2
+# CHECK-NEXT:  -      -     0.50   0.50   0.50   0.50    -      -      -      -     cvtsi2ssq  (%rax), %xmm2
 # CHECK-NEXT:  -      -     1.50   0.50    -      -      -      -      -      -     cvtss2si   %xmm0, %ecx
 # CHECK-NEXT:  -      -     1.50   0.50    -      -      -     1.00    -      -     cvtss2si   %xmm0, %rcx
 # CHECK-NEXT:  -      -     1.50   0.50   0.50   0.50    -      -      -      -     cvtss2si   (%rax), %ecx
index 31d260b..188a07e 100644 (file)
@@ -446,8 +446,8 @@ xorpd       (%rax), %xmm2
 # CHECK-NEXT:  3      10    1.00    *                   cvtsd2ss       (%rax), %xmm2
 # CHECK-NEXT:  2      5     1.00                        cvtsi2sd       %ecx, %xmm2
 # CHECK-NEXT:  2      5     1.00                        cvtsi2sd       %rcx, %xmm2
-# CHECK-NEXT:  2      9     1.00    *                   cvtsi2sdl      (%rax), %xmm2
-# CHECK-NEXT:  2      9     1.00    *                   cvtsi2sdq      (%rax), %xmm2
+# CHECK-NEXT:  2      10    0.50    *                   cvtsi2sdl      (%rax), %xmm2
+# CHECK-NEXT:  2      10    0.50    *                   cvtsi2sdq      (%rax), %xmm2
 # CHECK-NEXT:  2      5     1.00                        cvtss2sd       %xmm0, %xmm2
 # CHECK-NEXT:  2      10    0.50    *                   cvtss2sd       (%rax), %xmm2
 # CHECK-NEXT:  2      5     1.00                        cvttpd2dq      %xmm0, %xmm2
@@ -689,7 +689,7 @@ xorpd       (%rax), %xmm2
 
 # CHECK:      Resource pressure per iteration:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]
-# CHECK-NEXT:  -     40.00  112.58 80.58  63.50  63.50  14.00  93.58  2.25   5.00
+# CHECK-NEXT:  -     40.00  113.58 79.58  63.50  63.50  14.00  93.58  2.25   5.00
 
 # CHECK:      Resource pressure by instruction:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    Instructions:
@@ -732,8 +732,8 @@ xorpd       (%rax), %xmm2
 # CHECK-NEXT:  -      -     0.50   0.50   0.50   0.50    -     1.00    -      -     cvtsd2ss   (%rax), %xmm2
 # CHECK-NEXT:  -      -     0.50   0.50    -      -      -     1.00    -      -     cvtsi2sd   %ecx, %xmm2
 # CHECK-NEXT:  -      -     0.50   0.50    -      -      -     1.00    -      -     cvtsi2sd   %rcx, %xmm2
-# CHECK-NEXT:  -      -      -     1.00   0.50   0.50    -      -      -      -     cvtsi2sdl  (%rax), %xmm2
-# CHECK-NEXT:  -      -      -     1.00   0.50   0.50    -      -      -      -     cvtsi2sdq  (%rax), %xmm2
+# CHECK-NEXT:  -      -     0.50   0.50   0.50   0.50    -      -      -      -     cvtsi2sdl  (%rax), %xmm2
+# CHECK-NEXT:  -      -     0.50   0.50   0.50   0.50    -      -      -      -     cvtsi2sdq  (%rax), %xmm2
 # CHECK-NEXT:  -      -     0.50   0.50    -      -      -     1.00    -      -     cvtss2sd   %xmm0, %xmm2
 # CHECK-NEXT:  -      -     0.50   0.50   0.50   0.50    -      -      -      -     cvtss2sd   (%rax), %xmm2
 # CHECK-NEXT:  -      -     0.50   0.50    -      -      -     1.00    -      -     cvttpd2dq  %xmm0, %xmm2