From b31a5d7270e06920132333908907536cd848d8c1 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Sat, 12 Nov 2022 12:15:56 +0000 Subject: [PATCH] [X86] Replace unnecessary SKL CVTPD2DQ overrides with better base class defs Also fixes some AVX missing folded instructions --- llvm/lib/Target/X86/X86SchedSkylakeClient.td | 26 +++------------------- .../llvm-mca/X86/SkylakeClient/resources-avx1.s | 18 +++++++-------- 2 files changed, 12 insertions(+), 32 deletions(-) diff --git a/llvm/lib/Target/X86/X86SchedSkylakeClient.td b/llvm/lib/Target/X86/X86SchedSkylakeClient.td index bb3f3cf..accd75a 100644 --- a/llvm/lib/Target/X86/X86SchedSkylakeClient.td +++ b/llvm/lib/Target/X86/X86SchedSkylakeClient.td @@ -447,8 +447,8 @@ defm : SKLWriteResPair; defm : SKLWriteResPair; defm : X86WriteResPairUnsupported; defm : SKLWriteResPair; -defm : SKLWriteResPair; -defm : SKLWriteResPair; +defm : SKLWriteResPair; +defm : SKLWriteResPair; defm : X86WriteResPairUnsupported; defm : SKLWriteResPair; @@ -928,9 +928,7 @@ def SKLWriteResGroup60 : SchedWriteRes<[SKLPort5,SKLPort01]> { let NumMicroOps = 2; let ResourceCycles = [1,1]; } -def: InstRW<[SKLWriteResGroup60], (instregex "MMX_CVT(T?)PD2PIrr", - "MMX_CVT(T?)PS2PIrr", - "(V?)CVT(T?)PD2DQrr", +def: InstRW<[SKLWriteResGroup60], (instregex "MMX_CVT(T?)PS2PIrr", "(V?)CVTSI642SDrr", "(V?)CVTSI2SDrr", "(V?)CVTSI2SSrr")>; @@ -1091,14 +1089,6 @@ def: InstRW<[SKLWriteResGroup88], (instregex "(V?)PMOV(SX|ZX)BDrm", "(V?)PMOV(SX|ZX)WDrm", "(V?)PMOV(SX|ZX)WQrm")>; -def SKLWriteResGroup89 : SchedWriteRes<[SKLPort5,SKLPort01]> { - let Latency = 7; - let NumMicroOps = 2; - let ResourceCycles = [1,1]; -} -def: InstRW<[SKLWriteResGroup89], (instrs VCVTPD2DQYrr, - VCVTTPD2DQYrr)>; - def SKLWriteResGroup91 : SchedWriteRes<[SKLPort23,SKLPort015]> { let Latency = 7; let NumMicroOps = 2; @@ -1337,16 +1327,6 @@ def SKLWriteResGroup149 : SchedWriteRes<[SKLPort5,SKLPort23]> { } def: InstRW<[SKLWriteResGroup149], (instregex "FICOM(P?)(16|32)m")>; -def SKLWriteResGroup152 : SchedWriteRes<[SKLPort5,SKLPort23,SKLPort01]> { - let Latency = 11; - let NumMicroOps = 3; - let ResourceCycles = [1,1,1]; -} -def: InstRW<[SKLWriteResGroup152], (instrs CVTPD2DQrm, - CVTTPD2DQrm, - MMX_CVTPD2PIrm, - MMX_CVTTPD2PIrm)>; - def SKLWriteResGroup154 : SchedWriteRes<[SKLPort1,SKLPort06,SKLPort0156]> { let Latency = 11; let NumMicroOps = 7; diff --git a/llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-avx1.s b/llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-avx1.s index 4f7227b..780d9f5 100644 --- a/llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-avx1.s +++ b/llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-avx1.s @@ -1123,9 +1123,9 @@ vzeroupper # CHECK-NEXT: 1 4 0.50 vcvtdq2ps %ymm0, %ymm2 # CHECK-NEXT: 2 11 0.50 * vcvtdq2ps (%rax), %ymm2 # CHECK-NEXT: 2 5 1.00 vcvtpd2dq %xmm0, %xmm2 -# CHECK-NEXT: 2 8 1.00 * vcvtpd2dqx (%rax), %xmm2 +# CHECK-NEXT: 3 11 1.00 * vcvtpd2dqx (%rax), %xmm2 # CHECK-NEXT: 2 7 1.00 vcvtpd2dq %ymm0, %xmm2 -# CHECK-NEXT: 2 8 1.00 * vcvtpd2dqy (%rax), %xmm2 +# CHECK-NEXT: 3 13 1.00 * vcvtpd2dqy (%rax), %xmm2 # CHECK-NEXT: 2 5 1.00 vcvtpd2ps %xmm0, %xmm2 # CHECK-NEXT: 3 11 1.00 * vcvtpd2psx (%rax), %xmm2 # CHECK-NEXT: 2 7 1.00 vcvtpd2ps %ymm0, %xmm2 @@ -1159,9 +1159,9 @@ vzeroupper # CHECK-NEXT: 3 11 1.00 * vcvtss2si (%rax), %ecx # CHECK-NEXT: 3 11 1.00 * vcvtss2si (%rax), %rcx # CHECK-NEXT: 2 5 1.00 vcvttpd2dq %xmm0, %xmm2 -# CHECK-NEXT: 2 8 1.00 * vcvttpd2dqx (%rax), %xmm2 +# CHECK-NEXT: 3 11 1.00 * vcvttpd2dqx (%rax), %xmm2 # CHECK-NEXT: 2 7 1.00 vcvttpd2dq %ymm0, %xmm2 -# CHECK-NEXT: 2 8 1.00 * vcvttpd2dqy (%rax), %xmm2 +# CHECK-NEXT: 3 13 1.00 * vcvttpd2dqy (%rax), %xmm2 # CHECK-NEXT: 1 4 0.50 vcvttps2dq %xmm0, %xmm2 # CHECK-NEXT: 2 10 0.50 * vcvttps2dq (%rax), %xmm2 # CHECK-NEXT: 1 4 0.50 vcvttps2dq %ymm0, %ymm2 @@ -1736,7 +1736,7 @@ vzeroupper # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] -# CHECK-NEXT: - 126.00 334.58 203.58 173.17 173.17 34.00 322.58 5.25 12.67 +# CHECK-NEXT: - 126.00 336.58 201.58 173.17 173.17 34.00 326.58 5.25 12.67 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions: @@ -1833,9 +1833,9 @@ vzeroupper # CHECK-NEXT: - - 0.50 0.50 - - - - - - vcvtdq2ps %ymm0, %ymm2 # CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vcvtdq2ps (%rax), %ymm2 # CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - vcvtpd2dq %xmm0, %xmm2 -# CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - vcvtpd2dqx (%rax), %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - 1.00 - - vcvtpd2dqx (%rax), %xmm2 # CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - vcvtpd2dq %ymm0, %xmm2 -# CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - vcvtpd2dqy (%rax), %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - 1.00 - - vcvtpd2dqy (%rax), %xmm2 # CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - vcvtpd2ps %xmm0, %xmm2 # CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - 1.00 - - vcvtpd2psx (%rax), %xmm2 # CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - vcvtpd2ps %ymm0, %xmm2 @@ -1869,9 +1869,9 @@ vzeroupper # CHECK-NEXT: - - 1.50 0.50 0.50 0.50 - - - - vcvtss2si (%rax), %ecx # CHECK-NEXT: - - 1.50 0.50 0.50 0.50 - - - - vcvtss2si (%rax), %rcx # CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - vcvttpd2dq %xmm0, %xmm2 -# CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - vcvttpd2dqx (%rax), %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - 1.00 - - vcvttpd2dqx (%rax), %xmm2 # CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - vcvttpd2dq %ymm0, %xmm2 -# CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - vcvttpd2dqy (%rax), %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - 1.00 - - vcvttpd2dqy (%rax), %xmm2 # CHECK-NEXT: - - 0.50 0.50 - - - - - - vcvttps2dq %xmm0, %xmm2 # CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vcvttps2dq (%rax), %xmm2 # CHECK-NEXT: - - 0.50 0.50 - - - - - - vcvttps2dq %ymm0, %ymm2 -- 2.7.4