From d384a4c530623c73048da040210d44fea1167321 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Tue, 31 May 2022 09:14:06 +0100 Subject: [PATCH] [X86] Adjust vector test costs to match SoG (Issue #54889) znver1/2 models were incorrectly modelling the latency/throughput/uops and znver1 ymm variants also require double pumping. Now matches what I can decipher from the AMD SoG, Agner and instlatx64 numbers vs the llvm-exegesis report provided by @fabian-r --- llvm/lib/Target/X86/X86ScheduleZnver1.td | 8 ++--- llvm/lib/Target/X86/X86ScheduleZnver2.td | 8 ++--- .../tools/llvm-mca/X86/Znver1/resources-avx1.s | 38 +++++++++++----------- .../tools/llvm-mca/X86/Znver1/resources-sse41.s | 4 +-- .../tools/llvm-mca/X86/Znver2/resources-avx1.s | 24 +++++++------- .../tools/llvm-mca/X86/Znver2/resources-sse41.s | 4 +-- 6 files changed, 43 insertions(+), 43 deletions(-) diff --git a/llvm/lib/Target/X86/X86ScheduleZnver1.td b/llvm/lib/Target/X86/X86ScheduleZnver1.td index bfa5a14..aada3e0 100644 --- a/llvm/lib/Target/X86/X86ScheduleZnver1.td +++ b/llvm/lib/Target/X86/X86ScheduleZnver1.td @@ -334,8 +334,8 @@ defm : X86WriteResPairUnsupported; defm : ZnWriteResFpuPair; defm : ZnWriteResFpuPair; defm : X86WriteResPairUnsupported; -defm : ZnWriteResFpuPair; -defm : ZnWriteResFpuPair; +defm : ZnWriteResFpuPair; +defm : ZnWriteResFpuPair; defm : X86WriteResPairUnsupported; defm : ZnWriteResFpuPair; defm : ZnWriteResFpuPair; @@ -413,8 +413,8 @@ defm : ZnWriteResFpuPair; defm : ZnWriteResFpuPair; defm : ZnWriteResFpuPair; defm : X86WriteResPairUnsupported; -defm : ZnWriteResFpuPair; -defm : ZnWriteResFpuPair; +defm : ZnWriteResFpuPair; +defm : ZnWriteResFpuPair; defm : X86WriteResPairUnsupported; defm : ZnWriteResFpuPair; defm : ZnWriteResFpuPair; diff --git a/llvm/lib/Target/X86/X86ScheduleZnver2.td b/llvm/lib/Target/X86/X86ScheduleZnver2.td index 7a98b94..c47d235 100644 --- a/llvm/lib/Target/X86/X86ScheduleZnver2.td +++ b/llvm/lib/Target/X86/X86ScheduleZnver2.td @@ -331,8 +331,8 @@ defm : X86WriteResPairUnsupported; defm : Zn2WriteResFpuPair; defm : Zn2WriteResFpuPair; defm : X86WriteResPairUnsupported; -defm : Zn2WriteResFpuPair; -defm : Zn2WriteResFpuPair; +defm : Zn2WriteResFpuPair; +defm : Zn2WriteResFpuPair; defm : X86WriteResPairUnsupported; defm : Zn2WriteResFpuPair; defm : Zn2WriteResFpuPair; @@ -408,8 +408,8 @@ defm : Zn2WriteResFpuPair; defm : Zn2WriteResFpuPair; defm : Zn2WriteResFpuPair; defm : X86WriteResPairUnsupported; -defm : Zn2WriteResFpuPair; -defm : Zn2WriteResFpuPair; +defm : Zn2WriteResFpuPair; +defm : Zn2WriteResFpuPair; defm : X86WriteResPairUnsupported; defm : Zn2WriteResFpuPair; defm : Zn2WriteResFpuPair; diff --git a/llvm/test/tools/llvm-mca/X86/Znver1/resources-avx1.s b/llvm/test/tools/llvm-mca/X86/Znver1/resources-avx1.s index 905fbe0..c6315d5 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver1/resources-avx1.s +++ b/llvm/test/tools/llvm-mca/X86/Znver1/resources-avx1.s @@ -1604,10 +1604,10 @@ vzeroupper # CHECK-NEXT: 1 8 0.50 * vpsubusw (%rax), %xmm1, %xmm2 # CHECK-NEXT: 1 1 0.25 vpsubw %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 1 8 0.50 * vpsubw (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 1 1.00 vptest %xmm0, %xmm1 -# CHECK-NEXT: 2 8 1.00 * vptest (%rax), %xmm1 -# CHECK-NEXT: 1 1 1.00 vptest %ymm0, %ymm1 -# CHECK-NEXT: 2 8 1.00 * vptest (%rax), %ymm1 +# CHECK-NEXT: 1 2 1.00 vptest %xmm0, %xmm1 +# CHECK-NEXT: 2 9 1.00 * vptest (%rax), %xmm1 +# CHECK-NEXT: 3 4 2.00 vptest %ymm0, %ymm1 +# CHECK-NEXT: 5 11 2.00 * vptest (%rax), %ymm1 # CHECK-NEXT: 1 1 0.25 vpunpckhbw %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 1 8 0.50 * vpunpckhbw (%rax), %xmm1, %xmm2 # CHECK-NEXT: 1 1 0.25 vpunpckhdq %xmm0, %xmm1, %xmm2 @@ -1683,14 +1683,14 @@ vzeroupper # CHECK-NEXT: 1 10 0.50 * vsubsd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 1 3 0.50 vsubss %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 1 10 0.50 * vsubss (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 1 1.00 vtestpd %xmm0, %xmm1 -# CHECK-NEXT: 2 8 1.00 * vtestpd (%rax), %xmm1 -# CHECK-NEXT: 1 1 1.00 vtestpd %ymm0, %ymm1 -# CHECK-NEXT: 2 8 1.00 * vtestpd (%rax), %ymm1 -# CHECK-NEXT: 1 1 1.00 vtestps %xmm0, %xmm1 -# CHECK-NEXT: 2 8 1.00 * vtestps (%rax), %xmm1 -# CHECK-NEXT: 1 1 1.00 vtestps %ymm0, %ymm1 -# CHECK-NEXT: 2 8 1.00 * vtestps (%rax), %ymm1 +# CHECK-NEXT: 1 2 1.00 vtestpd %xmm0, %xmm1 +# CHECK-NEXT: 2 9 1.00 * vtestpd (%rax), %xmm1 +# CHECK-NEXT: 3 4 2.00 vtestpd %ymm0, %ymm1 +# CHECK-NEXT: 5 11 2.00 * vtestpd (%rax), %ymm1 +# CHECK-NEXT: 1 2 1.00 vtestps %xmm0, %xmm1 +# CHECK-NEXT: 2 9 1.00 * vtestps (%rax), %xmm1 +# CHECK-NEXT: 3 4 2.00 vtestps %ymm0, %ymm1 +# CHECK-NEXT: 5 11 2.00 * vtestps (%rax), %ymm1 # CHECK-NEXT: 2 3 1.00 vucomisd %xmm0, %xmm1 # CHECK-NEXT: 2 10 1.00 * vucomisd (%rax), %xmm1 # CHECK-NEXT: 2 3 1.00 vucomiss %xmm0, %xmm1 @@ -1738,7 +1738,7 @@ vzeroupper # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] -# CHECK-NEXT: 175.00 175.00 - - - - - 142.58 175.08 210.25 523.08 - +# CHECK-NEXT: 175.00 175.00 - - - - - 142.58 181.08 216.25 523.08 - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] Instructions: @@ -2318,8 +2318,8 @@ vzeroupper # CHECK-NEXT: 0.50 0.50 - - - - - 0.25 0.25 0.25 0.25 - vpsubw (%rax), %xmm1, %xmm2 # CHECK-NEXT: - - - - - - - - 1.00 1.00 - - vptest %xmm0, %xmm1 # CHECK-NEXT: 0.50 0.50 - - - - - - 1.00 1.00 - - vptest (%rax), %xmm1 -# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - vptest %ymm0, %ymm1 -# CHECK-NEXT: 0.50 0.50 - - - - - - 1.00 1.00 - - vptest (%rax), %ymm1 +# CHECK-NEXT: - - - - - - - - 2.00 2.00 - - vptest %ymm0, %ymm1 +# CHECK-NEXT: 0.50 0.50 - - - - - - 2.00 2.00 - - vptest (%rax), %ymm1 # CHECK-NEXT: - - - - - - - 0.25 0.25 0.25 0.25 - vpunpckhbw %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - 0.25 0.25 0.25 0.25 - vpunpckhbw (%rax), %xmm1, %xmm2 # CHECK-NEXT: - - - - - - - 0.25 0.25 0.25 0.25 - vpunpckhdq %xmm0, %xmm1, %xmm2 @@ -2397,12 +2397,12 @@ vzeroupper # CHECK-NEXT: 0.50 0.50 - - - - - - - 0.50 0.50 - vsubss (%rax), %xmm1, %xmm2 # CHECK-NEXT: - - - - - - - - 1.00 1.00 - - vtestpd %xmm0, %xmm1 # CHECK-NEXT: 0.50 0.50 - - - - - - 1.00 1.00 - - vtestpd (%rax), %xmm1 -# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - vtestpd %ymm0, %ymm1 -# CHECK-NEXT: 0.50 0.50 - - - - - - 1.00 1.00 - - vtestpd (%rax), %ymm1 +# CHECK-NEXT: - - - - - - - - 2.00 2.00 - - vtestpd %ymm0, %ymm1 +# CHECK-NEXT: 0.50 0.50 - - - - - - 2.00 2.00 - - vtestpd (%rax), %ymm1 # CHECK-NEXT: - - - - - - - - 1.00 1.00 - - vtestps %xmm0, %xmm1 # CHECK-NEXT: 0.50 0.50 - - - - - - 1.00 1.00 - - vtestps (%rax), %xmm1 -# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - vtestps %ymm0, %ymm1 -# CHECK-NEXT: 0.50 0.50 - - - - - - 1.00 1.00 - - vtestps (%rax), %ymm1 +# CHECK-NEXT: - - - - - - - - 2.00 2.00 - - vtestps %ymm0, %ymm1 +# CHECK-NEXT: 0.50 0.50 - - - - - - 2.00 2.00 - - vtestps (%rax), %ymm1 # CHECK-NEXT: - - - - - - - 0.50 0.50 1.00 - - vucomisd %xmm0, %xmm1 # CHECK-NEXT: 0.50 0.50 - - - - - 0.50 0.50 1.00 - - vucomisd (%rax), %xmm1 # CHECK-NEXT: - - - - - - - 0.50 0.50 1.00 - - vucomiss %xmm0, %xmm1 diff --git a/llvm/test/tools/llvm-mca/X86/Znver1/resources-sse41.s b/llvm/test/tools/llvm-mca/X86/Znver1/resources-sse41.s index 6c0f877..fb39f94 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver1/resources-sse41.s +++ b/llvm/test/tools/llvm-mca/X86/Znver1/resources-sse41.s @@ -241,8 +241,8 @@ roundss $1, (%rax), %xmm2 # CHECK-NEXT: 1 11 1.00 * pmuldq (%rax), %xmm2 # CHECK-NEXT: 1 4 1.00 pmulld %xmm0, %xmm2 # CHECK-NEXT: 2 11 1.00 * pmulld (%rax), %xmm2 -# CHECK-NEXT: 1 1 1.00 ptest %xmm0, %xmm1 -# CHECK-NEXT: 2 8 1.00 * ptest (%rax), %xmm1 +# CHECK-NEXT: 1 2 1.00 ptest %xmm0, %xmm1 +# CHECK-NEXT: 2 9 1.00 * ptest (%rax), %xmm1 # CHECK-NEXT: 1 4 1.00 roundpd $1, %xmm0, %xmm2 # CHECK-NEXT: 2 11 1.00 * roundpd $1, (%rax), %xmm2 # CHECK-NEXT: 1 4 1.00 roundps $1, %xmm0, %xmm2 diff --git a/llvm/test/tools/llvm-mca/X86/Znver2/resources-avx1.s b/llvm/test/tools/llvm-mca/X86/Znver2/resources-avx1.s index 755a754..4fd1cfa 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver2/resources-avx1.s +++ b/llvm/test/tools/llvm-mca/X86/Znver2/resources-avx1.s @@ -1604,10 +1604,10 @@ vzeroupper # CHECK-NEXT: 1 8 0.33 * vpsubusw (%rax), %xmm1, %xmm2 # CHECK-NEXT: 1 1 0.25 vpsubw %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 1 8 0.33 * vpsubw (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 1 1.00 vptest %xmm0, %xmm1 -# CHECK-NEXT: 2 8 1.00 * vptest (%rax), %xmm1 -# CHECK-NEXT: 1 1 1.00 vptest %ymm0, %ymm1 -# CHECK-NEXT: 2 8 1.00 * vptest (%rax), %ymm1 +# CHECK-NEXT: 1 3 1.00 vptest %xmm0, %xmm1 +# CHECK-NEXT: 2 10 1.00 * vptest (%rax), %xmm1 +# CHECK-NEXT: 1 3 1.00 vptest %ymm0, %ymm1 +# CHECK-NEXT: 2 10 1.00 * vptest (%rax), %ymm1 # CHECK-NEXT: 1 1 0.25 vpunpckhbw %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 1 8 0.33 * vpunpckhbw (%rax), %xmm1, %xmm2 # CHECK-NEXT: 1 1 0.25 vpunpckhdq %xmm0, %xmm1, %xmm2 @@ -1683,14 +1683,14 @@ vzeroupper # CHECK-NEXT: 1 10 0.50 * vsubsd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 1 3 0.50 vsubss %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 1 10 0.50 * vsubss (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 1 1.00 vtestpd %xmm0, %xmm1 -# CHECK-NEXT: 2 8 1.00 * vtestpd (%rax), %xmm1 -# CHECK-NEXT: 1 1 1.00 vtestpd %ymm0, %ymm1 -# CHECK-NEXT: 2 8 1.00 * vtestpd (%rax), %ymm1 -# CHECK-NEXT: 1 1 1.00 vtestps %xmm0, %xmm1 -# CHECK-NEXT: 2 8 1.00 * vtestps (%rax), %xmm1 -# CHECK-NEXT: 1 1 1.00 vtestps %ymm0, %ymm1 -# CHECK-NEXT: 2 8 1.00 * vtestps (%rax), %ymm1 +# CHECK-NEXT: 1 3 1.00 vtestpd %xmm0, %xmm1 +# CHECK-NEXT: 2 10 1.00 * vtestpd (%rax), %xmm1 +# CHECK-NEXT: 1 3 1.00 vtestpd %ymm0, %ymm1 +# CHECK-NEXT: 2 10 1.00 * vtestpd (%rax), %ymm1 +# CHECK-NEXT: 1 3 1.00 vtestps %xmm0, %xmm1 +# CHECK-NEXT: 2 10 1.00 * vtestps (%rax), %xmm1 +# CHECK-NEXT: 1 3 1.00 vtestps %ymm0, %ymm1 +# CHECK-NEXT: 2 10 1.00 * vtestps (%rax), %ymm1 # CHECK-NEXT: 2 3 1.00 vucomisd %xmm0, %xmm1 # CHECK-NEXT: 2 10 1.00 * vucomisd (%rax), %xmm1 # CHECK-NEXT: 2 3 1.00 vucomiss %xmm0, %xmm1 diff --git a/llvm/test/tools/llvm-mca/X86/Znver2/resources-sse41.s b/llvm/test/tools/llvm-mca/X86/Znver2/resources-sse41.s index fb26a43..d15850a 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver2/resources-sse41.s +++ b/llvm/test/tools/llvm-mca/X86/Znver2/resources-sse41.s @@ -241,8 +241,8 @@ roundss $1, (%rax), %xmm2 # CHECK-NEXT: 1 11 1.00 * pmuldq (%rax), %xmm2 # CHECK-NEXT: 1 4 1.00 pmulld %xmm0, %xmm2 # CHECK-NEXT: 2 11 1.00 * pmulld (%rax), %xmm2 -# CHECK-NEXT: 1 1 1.00 ptest %xmm0, %xmm1 -# CHECK-NEXT: 2 8 1.00 * ptest (%rax), %xmm1 +# CHECK-NEXT: 1 3 1.00 ptest %xmm0, %xmm1 +# CHECK-NEXT: 2 10 1.00 * ptest (%rax), %xmm1 # CHECK-NEXT: 1 3 1.00 roundpd $1, %xmm0, %xmm2 # CHECK-NEXT: 1 10 1.00 * roundpd $1, (%rax), %xmm2 # CHECK-NEXT: 1 3 1.00 roundps $1, %xmm0, %xmm2 -- 2.7.4