[X86] Replace unnecessary CVTPS2PI/CVTPS2DQ overrides with better base class defs
authorSimon Pilgrim <llvm-dev@redking.me.uk>
Wed, 9 Nov 2022 17:08:36 +0000 (17:08 +0000)
committerSimon Pilgrim <llvm-dev@redking.me.uk>
Wed, 9 Nov 2022 17:08:45 +0000 (17:08 +0000)
Broadwell/Haswell were completely overriding the WriteCvtPD2I class defs - we can remove those overrides entirely by just choosing better class defs.

Also fixes the scheduler for a missing YMM folded case - confirmed with Agner + uops.info that the port usage is correct

llvm/lib/Target/X86/X86SchedBroadwell.td
llvm/lib/Target/X86/X86SchedHaswell.td
llvm/test/tools/llvm-mca/X86/Broadwell/resources-avx1.s
llvm/test/tools/llvm-mca/X86/Haswell/resources-avx1.s

index c6bc775..7156c2e 100644 (file)
@@ -362,8 +362,8 @@ defm : BWWriteResPair<WriteCvtPS2I,   [BWPort1], 3>;
 defm : BWWriteResPair<WriteCvtPS2IY,  [BWPort1], 3>;
 defm : X86WriteResPairUnsupported<WriteCvtPS2IZ>;
 defm : BWWriteResPair<WriteCvtSD2I,   [BWPort1,BWPort0], 4, [1,1], 2, 5>;
-defm : BWWriteResPair<WriteCvtPD2I,   [BWPort1], 3>;
-defm : BWWriteResPair<WriteCvtPD2IY,  [BWPort1], 3>;
+defm : BWWriteResPair<WriteCvtPD2I,   [BWPort1,BWPort5], 4, [1,1], 2, 5>;
+defm : BWWriteResPair<WriteCvtPD2IY,  [BWPort1,BWPort5], 6, [1,1], 2, 6>;
 defm : X86WriteResPairUnsupported<WriteCvtPD2IZ>;
 
 defm : BWWriteResPair<WriteCvtI2SS,   [BWPort1], 4>;
@@ -851,12 +851,10 @@ def BWWriteResGroup42 : SchedWriteRes<[BWPort1,BWPort5]> {
   let NumMicroOps = 2;
   let ResourceCycles = [1,1];
 }
-def: InstRW<[BWWriteResGroup42], (instregex "MMX_CVT(T?)PD2PIrr",
-                                            "MMX_CVT(T?)PS2PIrr",
+def: InstRW<[BWWriteResGroup42], (instregex "MMX_CVT(T?)PS2PIrr",
                                             "(V?)CVTSI642SDrr",
                                             "(V?)CVTSI2SDrr",
-                                            "(V?)CVTSI2SSrr",
-                                            "(V?)CVT(T?)PD2DQrr")>;
+                                            "(V?)CVTSI2SSrr")>;
 
 def BWWriteResGroup43 : SchedWriteRes<[BWPort0,BWPort4,BWPort237]> {
   let Latency = 4;
@@ -968,14 +966,6 @@ def: InstRW<[BWWriteResGroup59], (instrs CVTPS2PDrm, VCVTPS2PDrm,
                                          VPSLLVQrm,
                                          VPSRLVQrm)>;
 
-def BWWriteResGroup60 : SchedWriteRes<[BWPort1,BWPort5]> {
-  let Latency = 6;
-  let NumMicroOps = 2;
-  let ResourceCycles = [1,1];
-}
-def: InstRW<[BWWriteResGroup60], (instrs VCVTPD2DQYrr,
-                                         VCVTTPD2DQYrr)>;
-
 def BWWriteResGroup62 : SchedWriteRes<[BWPort6,BWPort23]> {
   let Latency = 6;
   let NumMicroOps = 2;
@@ -1188,15 +1178,6 @@ def: InstRW<[BWWriteResGroup101], (instregex "(ADD|SUB|SUBR)_F(32|64)m",
 def: InstRW<[BWWriteResGroup101], (instrs VCVTPS2DQYrm,
                                           VCVTTPS2DQYrm)>;
 
-def BWWriteResGroup107 : SchedWriteRes<[BWPort1,BWPort5,BWPort23]> {
-  let Latency = 9;
-  let NumMicroOps = 3;
-  let ResourceCycles = [1,1,1];
-}
-def: InstRW<[BWWriteResGroup107], (instrs CVTPD2DQrm, VCVTPD2DQrm,
-                                          CVTTPD2DQrm, VCVTTPD2DQrm)>;
-def: InstRW<[BWWriteResGroup107], (instregex "MMX_CVT(T?)PD2PIrm")>;
-
 def BWWriteResGroup108 : SchedWriteRes<[BWPort5,BWPort23,BWPort015]> {
   let Latency = 9;
   let NumMicroOps = 3;
index 0cd007c..7c5804f 100644 (file)
@@ -356,9 +356,9 @@ defm : HWWriteResPair<WriteFVarBlendZ, [HWPort5], 2, [2], 2, 7>; // Unsupported
 
 // Conversion between integer and float.
 defm : HWWriteResPair<WriteCvtSD2I,   [HWPort1,HWPort0], 4, [1,1], 2, 5>;
-defm : HWWriteResPair<WriteCvtPD2I,   [HWPort1], 3>;
-defm : HWWriteResPair<WriteCvtPD2IY,  [HWPort1], 3>;
-defm : HWWriteResPair<WriteCvtPD2IZ,  [HWPort1], 3>; // Unsupported = 1
+defm : HWWriteResPair<WriteCvtPD2I,   [HWPort1,HWPort5], 4, [1,1], 2, 6>;
+defm : HWWriteResPair<WriteCvtPD2IY,  [HWPort1,HWPort5], 6, [1,1], 2, 6>;
+defm : HWWriteResPair<WriteCvtPD2IZ,  [HWPort1,HWPort5], 6, [1,1], 2, 6>; // Unsupported = 1
 defm : HWWriteResPair<WriteCvtSS2I,   [HWPort1,HWPort0], 4, [1,1], 2, 5>;
 defm : HWWriteResPair<WriteCvtPS2I,   [HWPort1], 3, [1], 1, 6>;
 defm : HWWriteResPair<WriteCvtPS2IY,  [HWPort1], 3, [1], 1, 7>;
@@ -1354,13 +1354,10 @@ def HWWriteResGroup73 : SchedWriteRes<[HWPort1,HWPort5]> {
   let NumMicroOps = 2;
   let ResourceCycles = [1,1];
 }
-def: InstRW<[HWWriteResGroup73], (instrs MMX_CVTPD2PIrr,
-                                         MMX_CVTPS2PIrr,
-                                         MMX_CVTTPD2PIrr,
+def: InstRW<[HWWriteResGroup73], (instrs MMX_CVTPS2PIrr,
                                          MMX_CVTTPS2PIrr)>;
 def: InstRW<[HWWriteResGroup73], (instregex "(V?)CVTSI(64)?2SDrr",
-                                            "(V?)CVTSI2SSrr",
-                                            "(V?)CVT(T?)PD2DQrr")>;
+                                            "(V?)CVTSI2SSrr")>;
 
 def HWWriteResGroup75 : SchedWriteRes<[HWPort1,HWPort23]> {
   let Latency = 11;
@@ -1369,16 +1366,6 @@ def HWWriteResGroup75 : SchedWriteRes<[HWPort1,HWPort23]> {
 }
 def: InstRW<[HWWriteResGroup75], (instregex "FICOM(P?)(16|32)m")>;
 
-def HWWriteResGroup78 : SchedWriteRes<[HWPort1,HWPort5,HWPort23]> {
-  let Latency = 10;
-  let NumMicroOps = 3;
-  let ResourceCycles = [1,1,1];
-}
-def: InstRW<[HWWriteResGroup78], (instrs CVTPD2DQrm, VCVTPD2DQrm,
-                                         CVTTPD2DQrm, VCVTTPD2DQrm,
-                                         MMX_CVTPD2PIrm,
-                                         MMX_CVTTPD2PIrm)>;
-
 def HWWriteResGroup78_1 : SchedWriteRes<[HWPort1,HWPort5,HWPort23]> {
   let Latency = 9;
   let NumMicroOps = 3;
@@ -1479,14 +1466,6 @@ def HWWriteResGroup100 : SchedWriteRes<[HWPort06,HWPort0156]> {
 }
 def: InstRW<[HWWriteResGroup100], (instrs XSETBV)>;
 
-def HWWriteResGroup102 : SchedWriteRes<[HWPort1,HWPort5]> {
-  let Latency = 6;
-  let NumMicroOps = 2;
-  let ResourceCycles = [1,1];
-}
-def: InstRW<[HWWriteResGroup102], (instrs VCVTPD2DQYrr,
-                                          VCVTTPD2DQYrr)>;
-
 def HWWriteResGroup103 : SchedWriteRes<[HWPort1,HWPort23]> {
   let Latency = 13;
   let NumMicroOps = 3;
index 27c6120..c33cc79 100644 (file)
@@ -1125,7 +1125,7 @@ vzeroupper
 # CHECK-NEXT:  2      4     1.00                        vcvtpd2dq      %xmm0, %xmm2
 # CHECK-NEXT:  3      9     1.00    *                   vcvtpd2dqx     (%rax), %xmm2
 # CHECK-NEXT:  2      6     1.00                        vcvtpd2dq      %ymm0, %xmm2
-# CHECK-NEXT:  2      8     1.00    *                   vcvtpd2dqy     (%rax), %xmm2
+# CHECK-NEXT:  3      12    1.00    *                   vcvtpd2dqy     (%rax), %xmm2
 # CHECK-NEXT:  2      4     1.00                        vcvtpd2ps      %xmm0, %xmm2
 # CHECK-NEXT:  3      9     1.00    *                   vcvtpd2psx     (%rax), %xmm2
 # CHECK-NEXT:  2      6     1.00                        vcvtpd2ps      %ymm0, %xmm2
@@ -1161,7 +1161,7 @@ vzeroupper
 # CHECK-NEXT:  2      4     1.00                        vcvttpd2dq     %xmm0, %xmm2
 # CHECK-NEXT:  3      9     1.00    *                   vcvttpd2dqx    (%rax), %xmm2
 # CHECK-NEXT:  2      6     1.00                        vcvttpd2dq     %ymm0, %xmm2
-# CHECK-NEXT:  2      8     1.00    *                   vcvttpd2dqy    (%rax), %xmm2
+# CHECK-NEXT:  3      12    1.00    *                   vcvttpd2dqy    (%rax), %xmm2
 # CHECK-NEXT:  1      3     1.00                        vcvttps2dq     %xmm0, %xmm2
 # CHECK-NEXT:  2      8     1.00    *                   vcvttps2dq     (%rax), %xmm2
 # CHECK-NEXT:  1      3     1.00                        vcvttps2dq     %ymm0, %ymm2
@@ -1736,7 +1736,7 @@ vzeroupper
 
 # CHECK:      Resource pressure per iteration:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]
-# CHECK-NEXT:  -     257.00 215.25 235.25 176.17 176.17 38.00  430.25 2.25   12.67
+# CHECK-NEXT:  -     257.00 215.25 235.25 176.17 176.17 38.00  432.25 2.25   12.67
 
 # CHECK:      Resource pressure by instruction:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    Instructions:
@@ -1835,7 +1835,7 @@ vzeroupper
 # CHECK-NEXT:  -      -      -     1.00    -      -      -     1.00    -      -     vcvtpd2dq  %xmm0, %xmm2
 # CHECK-NEXT:  -      -      -     1.00   0.50   0.50    -     1.00    -      -     vcvtpd2dqx (%rax), %xmm2
 # CHECK-NEXT:  -      -      -     1.00    -      -      -     1.00    -      -     vcvtpd2dq  %ymm0, %xmm2
-# CHECK-NEXT:  -      -      -     1.00   0.50   0.50    -      -      -      -     vcvtpd2dqy (%rax), %xmm2
+# CHECK-NEXT:  -      -      -     1.00   0.50   0.50    -     1.00    -      -     vcvtpd2dqy (%rax), %xmm2
 # CHECK-NEXT:  -      -      -     1.00    -      -      -     1.00    -      -     vcvtpd2ps  %xmm0, %xmm2
 # CHECK-NEXT:  -      -      -     1.00   0.50   0.50    -     1.00    -      -     vcvtpd2psx (%rax), %xmm2
 # CHECK-NEXT:  -      -      -     1.00    -      -      -     1.00    -      -     vcvtpd2ps  %ymm0, %xmm2
@@ -1871,7 +1871,7 @@ vzeroupper
 # CHECK-NEXT:  -      -      -     1.00    -      -      -     1.00    -      -     vcvttpd2dq %xmm0, %xmm2
 # CHECK-NEXT:  -      -      -     1.00   0.50   0.50    -     1.00    -      -     vcvttpd2dqx        (%rax), %xmm2
 # CHECK-NEXT:  -      -      -     1.00    -      -      -     1.00    -      -     vcvttpd2dq %ymm0, %xmm2
-# CHECK-NEXT:  -      -      -     1.00   0.50   0.50    -      -      -      -     vcvttpd2dqy        (%rax), %xmm2
+# CHECK-NEXT:  -      -      -     1.00   0.50   0.50    -     1.00    -      -     vcvttpd2dqy        (%rax), %xmm2
 # CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     vcvttps2dq %xmm0, %xmm2
 # CHECK-NEXT:  -      -      -     1.00   0.50   0.50    -      -      -      -     vcvttps2dq (%rax), %xmm2
 # CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     vcvttps2dq %ymm0, %ymm2
index ea7d251..3da547d 100644 (file)
@@ -1125,7 +1125,7 @@ vzeroupper
 # CHECK-NEXT:  2      4     1.00                        vcvtpd2dq      %xmm0, %xmm2
 # CHECK-NEXT:  3      10    1.00    *                   vcvtpd2dqx     (%rax), %xmm2
 # CHECK-NEXT:  2      6     1.00                        vcvtpd2dq      %ymm0, %xmm2
-# CHECK-NEXT:  2      8     1.00    *                   vcvtpd2dqy     (%rax), %xmm2
+# CHECK-NEXT:  3      12    1.00    *                   vcvtpd2dqy     (%rax), %xmm2
 # CHECK-NEXT:  2      4     1.00                        vcvtpd2ps      %xmm0, %xmm2
 # CHECK-NEXT:  3      10    1.00    *                   vcvtpd2psx     (%rax), %xmm2
 # CHECK-NEXT:  2      6     1.00                        vcvtpd2ps      %ymm0, %xmm2
@@ -1161,7 +1161,7 @@ vzeroupper
 # CHECK-NEXT:  2      4     1.00                        vcvttpd2dq     %xmm0, %xmm2
 # CHECK-NEXT:  3      10    1.00    *                   vcvttpd2dqx    (%rax), %xmm2
 # CHECK-NEXT:  2      6     1.00                        vcvttpd2dq     %ymm0, %xmm2
-# CHECK-NEXT:  2      8     1.00    *                   vcvttpd2dqy    (%rax), %xmm2
+# CHECK-NEXT:  3      12    1.00    *                   vcvttpd2dqy    (%rax), %xmm2
 # CHECK-NEXT:  1      3     1.00                        vcvttps2dq     %xmm0, %xmm2
 # CHECK-NEXT:  2      9     1.00    *                   vcvttps2dq     (%rax), %xmm2
 # CHECK-NEXT:  1      3     1.00                        vcvttps2dq     %ymm0, %ymm2
@@ -1736,7 +1736,7 @@ vzeroupper
 
 # CHECK:      Resource pressure per iteration:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]
-# CHECK-NEXT:  -     336.00 214.58 236.58 176.17 176.17 38.00  433.58 2.25   12.67
+# CHECK-NEXT:  -     336.00 214.58 236.58 176.17 176.17 38.00  435.58 2.25   12.67
 
 # CHECK:      Resource pressure by instruction:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    Instructions:
@@ -1835,7 +1835,7 @@ vzeroupper
 # CHECK-NEXT:  -      -      -     1.00    -      -      -     1.00    -      -     vcvtpd2dq  %xmm0, %xmm2
 # CHECK-NEXT:  -      -      -     1.00   0.50   0.50    -     1.00    -      -     vcvtpd2dqx (%rax), %xmm2
 # CHECK-NEXT:  -      -      -     1.00    -      -      -     1.00    -      -     vcvtpd2dq  %ymm0, %xmm2
-# CHECK-NEXT:  -      -      -     1.00   0.50   0.50    -      -      -      -     vcvtpd2dqy (%rax), %xmm2
+# CHECK-NEXT:  -      -      -     1.00   0.50   0.50    -     1.00    -      -     vcvtpd2dqy (%rax), %xmm2
 # CHECK-NEXT:  -      -      -     1.00    -      -      -     1.00    -      -     vcvtpd2ps  %xmm0, %xmm2
 # CHECK-NEXT:  -      -      -     1.00   0.50   0.50    -     1.00    -      -     vcvtpd2psx (%rax), %xmm2
 # CHECK-NEXT:  -      -      -     1.00    -      -      -     1.00    -      -     vcvtpd2ps  %ymm0, %xmm2
@@ -1871,7 +1871,7 @@ vzeroupper
 # CHECK-NEXT:  -      -      -     1.00    -      -      -     1.00    -      -     vcvttpd2dq %xmm0, %xmm2
 # CHECK-NEXT:  -      -      -     1.00   0.50   0.50    -     1.00    -      -     vcvttpd2dqx        (%rax), %xmm2
 # CHECK-NEXT:  -      -      -     1.00    -      -      -     1.00    -      -     vcvttpd2dq %ymm0, %xmm2
-# CHECK-NEXT:  -      -      -     1.00   0.50   0.50    -      -      -      -     vcvttpd2dqy        (%rax), %xmm2
+# CHECK-NEXT:  -      -      -     1.00   0.50   0.50    -     1.00    -      -     vcvttpd2dqy        (%rax), %xmm2
 # CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     vcvttps2dq %xmm0, %xmm2
 # CHECK-NEXT:  -      -      -     1.00   0.50   0.50    -      -      -      -     vcvttps2dq (%rax), %xmm2
 # CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     vcvttps2dq %ymm0, %ymm2