From 65ad09da0ea7b947ce4bfa96dee00a53230f4cf9 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Sat, 11 Sep 2021 20:29:25 +0100 Subject: [PATCH] [X86][SLM] Fix DIVPD/DIVPS/RCPPS/RSQRTPS/SQRTPD/SQRTPS/DPPD/DPPS uops, latency and throughput The packed variants of the instructions had been modelled as the same as the scalar variants. Reported during a run of llvm-exegesis on a cheap SLM box and matches what Agner / InstLatX64 report as well. --- llvm/lib/Target/X86/X86ScheduleSLM.td | 38 +++++++++++----------- llvm/test/tools/llvm-mca/X86/SLM/resources-sse1.s | 34 +++++++++---------- llvm/test/tools/llvm-mca/X86/SLM/resources-sse2.s | 8 ++--- llvm/test/tools/llvm-mca/X86/SLM/resources-sse41.s | 18 +++++----- 4 files changed, 49 insertions(+), 49 deletions(-) diff --git a/llvm/lib/Target/X86/X86ScheduleSLM.td b/llvm/lib/Target/X86/X86ScheduleSLM.td index 36d0cad..b0a2c75 100644 --- a/llvm/lib/Target/X86/X86ScheduleSLM.td +++ b/llvm/lib/Target/X86/X86ScheduleSLM.td @@ -233,33 +233,33 @@ defm : X86WriteResPairUnsupported; defm : X86WriteResPairUnsupported; defm : X86WriteResPairUnsupported; defm : SLMWriteResPair; -defm : SLMWriteResPair; -defm : SLMWriteResPair; +defm : SLMWriteResPair; +defm : X86WriteResPairUnsupported; defm : X86WriteResPairUnsupported; defm : SLMWriteResPair; -defm : SLMWriteResPair; -defm : SLMWriteResPair; +defm : SLMWriteResPair; +defm : X86WriteResPairUnsupported; defm : X86WriteResPairUnsupported; -defm : SLMWriteResPair; -defm : SLMWriteResPair; -defm : SLMWriteResPair; +defm : SLMWriteResPair; +defm : SLMWriteResPair; +defm : X86WriteResPairUnsupported; defm : X86WriteResPairUnsupported; -defm : SLMWriteResPair; -defm : SLMWriteResPair; -defm : SLMWriteResPair; +defm : SLMWriteResPair; +defm : SLMWriteResPair; +defm : X86WriteResPairUnsupported; defm : X86WriteResPairUnsupported; -defm : SLMWriteResPair; -defm : SLMWriteResPair; -defm : SLMWriteResPair; +defm : SLMWriteResPair; +defm : SLMWriteResPair; +defm : X86WriteResPairUnsupported; defm : X86WriteResPairUnsupported; -defm : SLMWriteResPair; -defm : SLMWriteResPair; -defm : SLMWriteResPair; +defm : SLMWriteResPair; +defm : SLMWriteResPair; +defm : X86WriteResPairUnsupported; defm : X86WriteResPairUnsupported; defm : SLMWriteResPair; -defm : SLMWriteResPair; -defm : SLMWriteResPair; -defm : SLMWriteResPair; +defm : SLMWriteResPair; +defm : SLMWriteResPair; +defm : X86WriteResPairUnsupported; defm : X86WriteResPairUnsupported; defm : SLMWriteResPair; defm : SLMWriteResPair; diff --git a/llvm/test/tools/llvm-mca/X86/SLM/resources-sse1.s b/llvm/test/tools/llvm-mca/X86/SLM/resources-sse1.s index c9a3271..8c0023e 100644 --- a/llvm/test/tools/llvm-mca/X86/SLM/resources-sse1.s +++ b/llvm/test/tools/llvm-mca/X86/SLM/resources-sse1.s @@ -226,8 +226,8 @@ xorps (%rax), %xmm2 # CHECK-NEXT: 1 5 1.00 cvttss2si %xmm0, %rcx # CHECK-NEXT: 1 8 1.00 * cvttss2si (%rax), %ecx # CHECK-NEXT: 1 8 1.00 * cvttss2si (%rax), %rcx -# CHECK-NEXT: 1 39 39.00 divps %xmm0, %xmm2 -# CHECK-NEXT: 1 42 39.00 * divps (%rax), %xmm2 +# CHECK-NEXT: 6 39 39.00 divps %xmm0, %xmm2 +# CHECK-NEXT: 7 42 39.00 * divps (%rax), %xmm2 # CHECK-NEXT: 1 19 17.00 divss %xmm0, %xmm2 # CHECK-NEXT: 1 22 17.00 * divss (%rax), %xmm2 # CHECK-NEXT: 1 3 1.00 * * U ldmxcsr (%rax) @@ -290,19 +290,19 @@ xorps (%rax), %xmm2 # CHECK-NEXT: 1 7 1.00 * psadbw (%rax), %mm2 # CHECK-NEXT: 1 1 1.00 pshufw $1, %mm0, %mm2 # CHECK-NEXT: 1 4 1.00 * pshufw $1, (%rax), %mm2 -# CHECK-NEXT: 1 5 1.00 rcpps %xmm0, %xmm2 -# CHECK-NEXT: 1 8 1.00 * rcpps (%rax), %xmm2 -# CHECK-NEXT: 1 5 1.00 rcpss %xmm0, %xmm2 -# CHECK-NEXT: 1 8 1.00 * rcpss (%rax), %xmm2 -# CHECK-NEXT: 1 5 1.00 rsqrtps %xmm0, %xmm2 -# CHECK-NEXT: 1 8 1.00 * rsqrtps (%rax), %xmm2 -# CHECK-NEXT: 1 5 1.00 rsqrtss %xmm0, %xmm2 -# CHECK-NEXT: 1 8 1.00 * rsqrtss (%rax), %xmm2 +# CHECK-NEXT: 5 9 8.00 rcpps %xmm0, %xmm2 +# CHECK-NEXT: 6 12 8.00 * rcpps (%rax), %xmm2 +# CHECK-NEXT: 1 4 1.00 rcpss %xmm0, %xmm2 +# CHECK-NEXT: 1 7 1.00 * rcpss (%rax), %xmm2 +# CHECK-NEXT: 5 9 8.00 rsqrtps %xmm0, %xmm2 +# CHECK-NEXT: 6 12 8.00 * rsqrtps (%rax), %xmm2 +# CHECK-NEXT: 1 4 1.00 rsqrtss %xmm0, %xmm2 +# CHECK-NEXT: 1 7 1.00 * rsqrtss (%rax), %xmm2 # CHECK-NEXT: 1 1 1.00 * * U sfence # CHECK-NEXT: 1 1 1.00 shufps $1, %xmm0, %xmm2 # CHECK-NEXT: 1 4 1.00 * shufps $1, (%rax), %xmm2 -# CHECK-NEXT: 1 41 40.00 sqrtps %xmm0, %xmm2 -# CHECK-NEXT: 1 44 40.00 * sqrtps (%rax), %xmm2 +# CHECK-NEXT: 5 41 40.00 sqrtps %xmm0, %xmm2 +# CHECK-NEXT: 6 44 40.00 * sqrtps (%rax), %xmm2 # CHECK-NEXT: 1 20 20.00 sqrtss %xmm0, %xmm2 # CHECK-NEXT: 1 23 20.00 * sqrtss (%rax), %xmm2 # CHECK-NEXT: 1 1 1.00 * U stmxcsr (%rax) @@ -331,7 +331,7 @@ xorps (%rax), %xmm2 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] -# CHECK-NEXT: - 232.00 8.00 80.00 37.00 0.50 0.50 67.00 +# CHECK-NEXT: - 232.00 8.00 108.00 37.00 0.50 0.50 67.00 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] Instructions: @@ -431,12 +431,12 @@ xorps (%rax), %xmm2 # CHECK-NEXT: - - - 1.00 - - - 1.00 psadbw (%rax), %mm2 # CHECK-NEXT: - - - 1.00 - - - - pshufw $1, %mm0, %mm2 # CHECK-NEXT: - - - 1.00 - - - 1.00 pshufw $1, (%rax), %mm2 -# CHECK-NEXT: - - - 1.00 - - - - rcpps %xmm0, %xmm2 -# CHECK-NEXT: - - - 1.00 - - - 1.00 rcpps (%rax), %xmm2 +# CHECK-NEXT: - - - 8.00 - - - - rcpps %xmm0, %xmm2 +# CHECK-NEXT: - - - 8.00 - - - 1.00 rcpps (%rax), %xmm2 # CHECK-NEXT: - - - 1.00 - - - - rcpss %xmm0, %xmm2 # CHECK-NEXT: - - - 1.00 - - - 1.00 rcpss (%rax), %xmm2 -# CHECK-NEXT: - - - 1.00 - - - - rsqrtps %xmm0, %xmm2 -# CHECK-NEXT: - - - 1.00 - - - 1.00 rsqrtps (%rax), %xmm2 +# CHECK-NEXT: - - - 8.00 - - - - rsqrtps %xmm0, %xmm2 +# CHECK-NEXT: - - - 8.00 - - - 1.00 rsqrtps (%rax), %xmm2 # CHECK-NEXT: - - - 1.00 - - - - rsqrtss %xmm0, %xmm2 # CHECK-NEXT: - - - 1.00 - - - 1.00 rsqrtss (%rax), %xmm2 # CHECK-NEXT: - - - - - - - 1.00 sfence diff --git a/llvm/test/tools/llvm-mca/X86/SLM/resources-sse2.s b/llvm/test/tools/llvm-mca/X86/SLM/resources-sse2.s index e00613e..6c88d2b 100644 --- a/llvm/test/tools/llvm-mca/X86/SLM/resources-sse2.s +++ b/llvm/test/tools/llvm-mca/X86/SLM/resources-sse2.s @@ -460,8 +460,8 @@ xorpd (%rax), %xmm2 # CHECK-NEXT: 1 5 1.00 cvttsd2si %xmm0, %rcx # CHECK-NEXT: 1 8 1.00 * cvttsd2si (%rax), %ecx # CHECK-NEXT: 1 8 1.00 * cvttsd2si (%rax), %rcx -# CHECK-NEXT: 1 69 69.00 divpd %xmm0, %xmm2 -# CHECK-NEXT: 1 72 69.00 * divpd (%rax), %xmm2 +# CHECK-NEXT: 6 69 69.00 divpd %xmm0, %xmm2 +# CHECK-NEXT: 7 72 69.00 * divpd (%rax), %xmm2 # CHECK-NEXT: 1 34 32.00 divsd %xmm0, %xmm2 # CHECK-NEXT: 1 37 32.00 * divsd (%rax), %xmm2 # CHECK-NEXT: 1 1 1.00 * * U lfence @@ -658,8 +658,8 @@ xorpd (%rax), %xmm2 # CHECK-NEXT: 1 4 1.00 * pxor (%rax), %xmm2 # CHECK-NEXT: 1 1 1.00 shufpd $1, %xmm0, %xmm2 # CHECK-NEXT: 1 4 1.00 * shufpd $1, (%rax), %xmm2 -# CHECK-NEXT: 1 71 70.00 sqrtpd %xmm0, %xmm2 -# CHECK-NEXT: 1 74 70.00 * sqrtpd (%rax), %xmm2 +# CHECK-NEXT: 5 71 70.00 sqrtpd %xmm0, %xmm2 +# CHECK-NEXT: 6 74 70.00 * sqrtpd (%rax), %xmm2 # CHECK-NEXT: 1 35 35.00 sqrtsd %xmm0, %xmm2 # CHECK-NEXT: 1 38 35.00 * sqrtsd (%rax), %xmm2 # CHECK-NEXT: 1 4 2.00 subpd %xmm0, %xmm2 diff --git a/llvm/test/tools/llvm-mca/X86/SLM/resources-sse41.s b/llvm/test/tools/llvm-mca/X86/SLM/resources-sse41.s index 6fb8d19..3779e5a 100644 --- a/llvm/test/tools/llvm-mca/X86/SLM/resources-sse41.s +++ b/llvm/test/tools/llvm-mca/X86/SLM/resources-sse41.s @@ -163,10 +163,10 @@ roundss $1, (%rax), %xmm2 # CHECK-NEXT: 3 7 4.00 * blendvpd %xmm0, (%rax), %xmm2 # CHECK-NEXT: 2 4 4.00 blendvps %xmm0, %xmm0, %xmm2 # CHECK-NEXT: 3 7 4.00 * blendvps %xmm0, (%rax), %xmm2 -# CHECK-NEXT: 1 3 1.00 dppd $22, %xmm0, %xmm2 -# CHECK-NEXT: 1 6 1.00 * dppd $22, (%rax), %xmm2 -# CHECK-NEXT: 1 3 1.00 dpps $22, %xmm0, %xmm2 -# CHECK-NEXT: 1 6 1.00 * dpps $22, (%rax), %xmm2 +# CHECK-NEXT: 5 12 8.00 dppd $22, %xmm0, %xmm2 +# CHECK-NEXT: 6 15 8.00 * dppd $22, (%rax), %xmm2 +# CHECK-NEXT: 9 15 12.00 dpps $22, %xmm0, %xmm2 +# CHECK-NEXT: 10 18 12.00 * dpps $22, (%rax), %xmm2 # CHECK-NEXT: 1 1 1.00 extractps $1, %xmm0, %ecx # CHECK-NEXT: 2 4 2.00 * extractps $1, %xmm0, (%rax) # CHECK-NEXT: 1 1 1.00 insertps $1, %xmm0, %xmm2 @@ -264,7 +264,7 @@ roundss $1, (%rax), %xmm2 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] -# CHECK-NEXT: - - - 104.00 25.00 - - 54.00 +# CHECK-NEXT: - - - 104.00 61.00 - - 54.00 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] Instructions: @@ -276,10 +276,10 @@ roundss $1, (%rax), %xmm2 # CHECK-NEXT: - - - 4.00 - - - 1.00 blendvpd %xmm0, (%rax), %xmm2 # CHECK-NEXT: - - - 4.00 - - - - blendvps %xmm0, %xmm0, %xmm2 # CHECK-NEXT: - - - 4.00 - - - 1.00 blendvps %xmm0, (%rax), %xmm2 -# CHECK-NEXT: - - - - 1.00 - - - dppd $22, %xmm0, %xmm2 -# CHECK-NEXT: - - - - 1.00 - - 1.00 dppd $22, (%rax), %xmm2 -# CHECK-NEXT: - - - - 1.00 - - - dpps $22, %xmm0, %xmm2 -# CHECK-NEXT: - - - - 1.00 - - 1.00 dpps $22, (%rax), %xmm2 +# CHECK-NEXT: - - - - 8.00 - - - dppd $22, %xmm0, %xmm2 +# CHECK-NEXT: - - - - 8.00 - - 1.00 dppd $22, (%rax), %xmm2 +# CHECK-NEXT: - - - - 12.00 - - - dpps $22, %xmm0, %xmm2 +# CHECK-NEXT: - - - - 12.00 - - 1.00 dpps $22, (%rax), %xmm2 # CHECK-NEXT: - - - 1.00 - - - - extractps $1, %xmm0, %ecx # CHECK-NEXT: - - - 1.00 - - - 2.00 extractps $1, %xmm0, (%rax) # CHECK-NEXT: - - - 1.00 - - - - insertps $1, %xmm0, %xmm2 -- 2.7.4