From 30498cf7c46f90db0f67ff01f0246860e55be0f2 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Thu, 10 Nov 2022 11:58:45 +0000 Subject: [PATCH] [X86] SkylakeClientModel - conversion instructions don't use Port015 Fixes a lot of throughput mismatches - the more complicated conversion instructions use SKLPort5+SKLPort01, not SKLPort5+SKLPort015 (SKLPort015 is mainly used for basic Logic + blend ops) Fixing this should allow us to remove a lot of unnecessary scheduler overrides from SkylakeClientModel Confirmed by both Agner + uops.info --- llvm/lib/Target/X86/X86SchedSkylakeClient.td | 6 +++--- .../tools/llvm-mca/X86/SkylakeClient/resources-avx1.s | 16 ++++++++-------- .../tools/llvm-mca/X86/SkylakeClient/resources-f16c.s | 6 +++--- .../tools/llvm-mca/X86/SkylakeClient/resources-sse1.s | 8 ++++---- .../tools/llvm-mca/X86/SkylakeClient/resources-sse2.s | 18 +++++++++--------- 5 files changed, 27 insertions(+), 27 deletions(-) diff --git a/llvm/lib/Target/X86/X86SchedSkylakeClient.td b/llvm/lib/Target/X86/X86SchedSkylakeClient.td index efd9fb5..42b81eb 100644 --- a/llvm/lib/Target/X86/X86SchedSkylakeClient.td +++ b/llvm/lib/Target/X86/X86SchedSkylakeClient.td @@ -469,14 +469,14 @@ defm : SKLWriteResPair; defm : SKLWriteResPair; defm : X86WriteResPairUnsupported; -defm : X86WriteRes; +defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteResUnsupported; defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteResUnsupported; -defm : X86WriteRes; +defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteResUnsupported; defm : X86WriteRes; @@ -928,7 +928,7 @@ def SKLWriteResGroup57 : SchedWriteRes<[SKLPort1,SKLPort6,SKLPort0156]> { } def: InstRW<[SKLWriteResGroup57], (instregex "LAR(16|32|64)rr")>; -def SKLWriteResGroup60 : SchedWriteRes<[SKLPort5,SKLPort015]> { +def SKLWriteResGroup60 : SchedWriteRes<[SKLPort5,SKLPort01]> { let Latency = 5; let NumMicroOps = 2; let ResourceCycles = [1,1]; diff --git a/llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-avx1.s b/llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-avx1.s index f28cd83..4f7227b 100644 --- a/llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-avx1.s +++ b/llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-avx1.s @@ -1736,7 +1736,7 @@ vzeroupper # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] -# CHECK-NEXT: - 126.00 333.42 202.42 173.17 173.17 34.00 324.92 5.25 12.67 +# CHECK-NEXT: - 126.00 334.58 203.58 173.17 173.17 34.00 322.58 5.25 12.67 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions: @@ -1832,7 +1832,7 @@ vzeroupper # CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vcvtdq2ps (%rax), %xmm2 # CHECK-NEXT: - - 0.50 0.50 - - - - - - vcvtdq2ps %ymm0, %ymm2 # CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vcvtdq2ps (%rax), %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - vcvtpd2dq %xmm0, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - vcvtpd2dq %xmm0, %xmm2 # CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - vcvtpd2dqx (%rax), %xmm2 # CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - vcvtpd2dq %ymm0, %xmm2 # CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - vcvtpd2dqy (%rax), %xmm2 @@ -1844,7 +1844,7 @@ vzeroupper # CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vcvtps2dq (%rax), %xmm2 # CHECK-NEXT: - - 0.50 0.50 - - - - - - vcvtps2dq %ymm0, %ymm2 # CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vcvtps2dq (%rax), %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - vcvtps2pd %xmm0, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - vcvtps2pd %xmm0, %xmm2 # CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vcvtps2pd (%rax), %xmm2 # CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - vcvtps2pd %xmm0, %ymm2 # CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vcvtps2pd (%rax), %ymm2 @@ -1854,21 +1854,21 @@ vzeroupper # CHECK-NEXT: - - 1.50 0.50 0.50 0.50 - - - - vcvtsd2si (%rax), %rcx # CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - vcvtsd2ss %xmm0, %xmm1, %xmm2 # CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - 1.00 - - vcvtsd2ss (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - vcvtsi2sd %ecx, %xmm0, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - vcvtsi2sd %rcx, %xmm0, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - vcvtsi2sd %ecx, %xmm0, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - vcvtsi2sd %rcx, %xmm0, %xmm2 # CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - vcvtsi2sdl (%rax), %xmm0, %xmm2 # CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - vcvtsi2sdq (%rax), %xmm0, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - vcvtsi2ss %ecx, %xmm0, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - vcvtsi2ss %ecx, %xmm0, %xmm2 # CHECK-NEXT: - - 0.50 0.50 - - - 2.00 - - vcvtsi2ss %rcx, %xmm0, %xmm2 # CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - vcvtsi2ssl (%rax), %xmm0, %xmm2 # CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - vcvtsi2ssq (%rax), %xmm0, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - vcvtss2sd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - vcvtss2sd %xmm0, %xmm1, %xmm2 # CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vcvtss2sd (%rax), %xmm1, %xmm2 # CHECK-NEXT: - - 1.50 0.50 - - - - - - vcvtss2si %xmm0, %ecx # CHECK-NEXT: - - 1.50 0.50 - - - 1.00 - - vcvtss2si %xmm0, %rcx # CHECK-NEXT: - - 1.50 0.50 0.50 0.50 - - - - vcvtss2si (%rax), %ecx # CHECK-NEXT: - - 1.50 0.50 0.50 0.50 - - - - vcvtss2si (%rax), %rcx -# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - vcvttpd2dq %xmm0, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - vcvttpd2dq %xmm0, %xmm2 # CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - vcvttpd2dqx (%rax), %xmm2 # CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - vcvttpd2dq %ymm0, %xmm2 # CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - vcvttpd2dqy (%rax), %xmm2 diff --git a/llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-f16c.s b/llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-f16c.s index 648c7b8..03325f2 100644 --- a/llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-f16c.s +++ b/llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-f16c.s @@ -45,15 +45,15 @@ vcvtps2ph $0, %ymm0, (%rax) # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] -# CHECK-NEXT: - - 3.67 3.67 1.67 1.67 2.00 6.67 - 0.67 +# CHECK-NEXT: - - 4.00 4.00 1.67 1.67 2.00 6.00 - 0.67 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions: -# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - vcvtph2ps %xmm0, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - vcvtph2ps %xmm0, %xmm2 # CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vcvtph2ps (%rax), %xmm2 # CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - vcvtph2ps %xmm0, %ymm2 # CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vcvtph2ps (%rax), %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - vcvtps2ph $0, %xmm0, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - vcvtps2ph $0, %xmm0, %xmm2 # CHECK-NEXT: - - 0.50 0.50 0.33 0.33 1.00 1.00 - 0.33 vcvtps2ph $0, %xmm0, (%rax) # CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - vcvtps2ph $0, %ymm0, %xmm2 # CHECK-NEXT: - - 0.50 0.50 0.33 0.33 1.00 1.00 - 0.33 vcvtps2ph $0, %ymm0, (%rax) diff --git a/llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-sse1.s b/llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-sse1.s index 1c77b29..007eb96 100644 --- a/llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-sse1.s +++ b/llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-sse1.s @@ -333,7 +333,7 @@ xorps (%rax), %xmm2 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] -# CHECK-NEXT: - 24.00 71.33 24.33 32.00 32.00 8.00 32.83 0.50 3.00 +# CHECK-NEXT: - 24.00 71.83 24.83 32.00 32.00 8.00 31.83 0.50 3.00 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions: @@ -353,9 +353,9 @@ xorps (%rax), %xmm2 # CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - comiss (%rax), %xmm1 # CHECK-NEXT: - - 2.00 - - - - - - - cvtpi2ps %mm0, %xmm2 # CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - cvtpi2ps (%rax), %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - cvtps2pi %xmm0, %mm2 +# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - cvtps2pi %xmm0, %mm2 # CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - cvtps2pi (%rax), %mm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - cvtsi2ss %ecx, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - cvtsi2ss %ecx, %xmm2 # CHECK-NEXT: - - 0.50 0.50 - - - 2.00 - - cvtsi2ss %rcx, %xmm2 # CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - cvtsi2ssl (%rax), %xmm2 # CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - cvtsi2ssq (%rax), %xmm2 @@ -363,7 +363,7 @@ xorps (%rax), %xmm2 # CHECK-NEXT: - - 1.50 0.50 - - - 1.00 - - cvtss2si %xmm0, %rcx # CHECK-NEXT: - - 1.50 0.50 0.50 0.50 - - - - cvtss2si (%rax), %ecx # CHECK-NEXT: - - 1.50 0.50 0.50 0.50 - - - - cvtss2si (%rax), %rcx -# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - cvttps2pi %xmm0, %mm2 +# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - cvttps2pi %xmm0, %mm2 # CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - cvttps2pi (%rax), %mm2 # CHECK-NEXT: - - 1.50 0.50 - - - - - - cvttss2si %xmm0, %ecx # CHECK-NEXT: - - 1.50 0.50 - - - 1.00 - - cvttss2si %xmm0, %rcx diff --git a/llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-sse2.s b/llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-sse2.s index 082346c..31d260b 100644 --- a/llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-sse2.s +++ b/llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-sse2.s @@ -689,7 +689,7 @@ xorpd (%rax), %xmm2 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] -# CHECK-NEXT: - 40.00 111.25 79.25 63.50 63.50 14.00 96.25 2.25 5.00 +# CHECK-NEXT: - 40.00 112.58 80.58 63.50 63.50 14.00 93.58 2.25 5.00 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions: @@ -712,9 +712,9 @@ xorpd (%rax), %xmm2 # CHECK-NEXT: - - 1.00 - 0.50 0.50 - 1.00 - - cvtdq2pd (%rax), %xmm2 # CHECK-NEXT: - - 0.50 0.50 - - - - - - cvtdq2ps %xmm0, %xmm2 # CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - cvtdq2ps (%rax), %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - cvtpd2dq %xmm0, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - cvtpd2dq %xmm0, %xmm2 # CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - 1.00 - - cvtpd2dq (%rax), %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - cvtpd2pi %xmm0, %mm2 +# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - cvtpd2pi %xmm0, %mm2 # CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - 1.00 - - cvtpd2pi (%rax), %mm2 # CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - cvtpd2ps %xmm0, %xmm2 # CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - 1.00 - - cvtpd2ps (%rax), %xmm2 @@ -722,7 +722,7 @@ xorpd (%rax), %xmm2 # CHECK-NEXT: - - 1.00 - 0.50 0.50 - 1.00 - - cvtpi2pd (%rax), %xmm2 # CHECK-NEXT: - - 0.50 0.50 - - - - - - cvtps2dq %xmm0, %xmm2 # CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - cvtps2dq (%rax), %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - cvtps2pd %xmm0, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - cvtps2pd %xmm0, %xmm2 # CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - cvtps2pd (%rax), %xmm2 # CHECK-NEXT: - - 1.50 0.50 - - - - - - cvtsd2si %xmm0, %ecx # CHECK-NEXT: - - 1.50 0.50 - - - - - - cvtsd2si %xmm0, %rcx @@ -730,15 +730,15 @@ xorpd (%rax), %xmm2 # CHECK-NEXT: - - 1.50 0.50 0.50 0.50 - - - - cvtsd2si (%rax), %rcx # CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - cvtsd2ss %xmm0, %xmm2 # CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - 1.00 - - cvtsd2ss (%rax), %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - cvtsi2sd %ecx, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - cvtsi2sd %rcx, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - cvtsi2sd %ecx, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - cvtsi2sd %rcx, %xmm2 # CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - cvtsi2sdl (%rax), %xmm2 # CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - cvtsi2sdq (%rax), %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - cvtss2sd %xmm0, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - cvtss2sd %xmm0, %xmm2 # CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - cvtss2sd (%rax), %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - cvttpd2dq %xmm0, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - cvttpd2dq %xmm0, %xmm2 # CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - 1.00 - - cvttpd2dq (%rax), %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - cvttpd2pi %xmm0, %mm2 +# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - cvttpd2pi %xmm0, %mm2 # CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - 1.00 - - cvttpd2pi (%rax), %mm2 # CHECK-NEXT: - - 0.50 0.50 - - - - - - cvttps2dq %xmm0, %xmm2 # CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - cvttps2dq (%rax), %xmm2 -- 2.7.4