From df975e459008504db1248414718c1b936685d16f Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Sat, 11 Sep 2021 11:11:37 +0100 Subject: [PATCH] [X86][SLM] Fix PSAD/MPSAD uops, latency and throughput Noticed while trying to improve generic reduction costs via the D103695 helper script. Confirmed with Intel AoM / Agner / InstLatX64. --- llvm/lib/Target/X86/X86ScheduleSLM.td | 8 ++++---- llvm/test/tools/llvm-mca/X86/SLM/resources-sse2.s | 10 +++++----- llvm/test/tools/llvm-mca/X86/SLM/resources-sse41.s | 10 +++++----- 3 files changed, 14 insertions(+), 14 deletions(-) diff --git a/llvm/lib/Target/X86/X86ScheduleSLM.td b/llvm/lib/Target/X86/X86ScheduleSLM.td index bd7b56a..36d0cad 100644 --- a/llvm/lib/Target/X86/X86ScheduleSLM.td +++ b/llvm/lib/Target/X86/X86ScheduleSLM.td @@ -394,12 +394,12 @@ defm : X86WriteResPairUnsupported; defm : SLMWriteResPair; defm : X86WriteResPairUnsupported; defm : X86WriteResPairUnsupported; -defm : SLMWriteResPair; -defm : SLMWriteResPair; +defm : SLMWriteResPair; +defm : X86WriteResPairUnsupported; defm : X86WriteResPairUnsupported; defm : SLMWriteResPair; -defm : SLMWriteResPair; -defm : SLMWriteResPair; +defm : SLMWriteResPair; +defm : X86WriteResPairUnsupported; defm : X86WriteResPairUnsupported; defm : SLMWriteResPair; defm : X86WriteResPairUnsupported; diff --git a/llvm/test/tools/llvm-mca/X86/SLM/resources-sse2.s b/llvm/test/tools/llvm-mca/X86/SLM/resources-sse2.s index dfdfa13..e00613e 100644 --- a/llvm/test/tools/llvm-mca/X86/SLM/resources-sse2.s +++ b/llvm/test/tools/llvm-mca/X86/SLM/resources-sse2.s @@ -586,8 +586,8 @@ xorpd (%rax), %xmm2 # CHECK-NEXT: 1 8 2.00 * pmuludq (%rax), %xmm2 # CHECK-NEXT: 1 1 0.50 por %xmm0, %xmm2 # CHECK-NEXT: 1 4 1.00 * por (%rax), %xmm2 -# CHECK-NEXT: 1 4 1.00 psadbw %xmm0, %xmm2 -# CHECK-NEXT: 1 7 1.00 * psadbw (%rax), %xmm2 +# CHECK-NEXT: 1 5 2.00 psadbw %xmm0, %xmm2 +# CHECK-NEXT: 1 8 2.00 * psadbw (%rax), %xmm2 # CHECK-NEXT: 1 1 1.00 pshufd $1, %xmm0, %xmm2 # CHECK-NEXT: 1 4 1.00 * pshufd $1, (%rax), %xmm2 # CHECK-NEXT: 1 1 1.00 pshufhw $1, %xmm0, %xmm2 @@ -687,7 +687,7 @@ xorpd (%rax), %xmm2 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] -# CHECK-NEXT: - 412.00 12.00 233.50 77.50 3.00 3.00 134.00 +# CHECK-NEXT: - 412.00 12.00 235.50 77.50 3.00 3.00 134.00 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] Instructions: @@ -870,8 +870,8 @@ xorpd (%rax), %xmm2 # CHECK-NEXT: - - - 2.00 - - - 1.00 pmuludq (%rax), %xmm2 # CHECK-NEXT: - - - 0.50 0.50 - - - por %xmm0, %xmm2 # CHECK-NEXT: - - - 0.50 0.50 - - 1.00 por (%rax), %xmm2 -# CHECK-NEXT: - - - 1.00 - - - - psadbw %xmm0, %xmm2 -# CHECK-NEXT: - - - 1.00 - - - 1.00 psadbw (%rax), %xmm2 +# CHECK-NEXT: - - - 2.00 - - - - psadbw %xmm0, %xmm2 +# CHECK-NEXT: - - - 2.00 - - - 1.00 psadbw (%rax), %xmm2 # CHECK-NEXT: - - - 1.00 - - - - pshufd $1, %xmm0, %xmm2 # CHECK-NEXT: - - - 1.00 - - - 1.00 pshufd $1, (%rax), %xmm2 # CHECK-NEXT: - - - 1.00 - - - - pshufhw $1, %xmm0, %xmm2 diff --git a/llvm/test/tools/llvm-mca/X86/SLM/resources-sse41.s b/llvm/test/tools/llvm-mca/X86/SLM/resources-sse41.s index e0e19e6..6fb8d19 100644 --- a/llvm/test/tools/llvm-mca/X86/SLM/resources-sse41.s +++ b/llvm/test/tools/llvm-mca/X86/SLM/resources-sse41.s @@ -172,8 +172,8 @@ roundss $1, (%rax), %xmm2 # CHECK-NEXT: 1 1 1.00 insertps $1, %xmm0, %xmm2 # CHECK-NEXT: 1 4 1.00 * insertps $1, (%rax), %xmm2 # CHECK-NEXT: 1 3 1.00 * movntdqa (%rax), %xmm2 -# CHECK-NEXT: 1 7 1.00 mpsadbw $1, %xmm0, %xmm2 -# CHECK-NEXT: 1 10 1.00 * mpsadbw $1, (%rax), %xmm2 +# CHECK-NEXT: 3 7 5.00 mpsadbw $1, %xmm0, %xmm2 +# CHECK-NEXT: 4 10 5.00 * mpsadbw $1, (%rax), %xmm2 # CHECK-NEXT: 1 1 1.00 packusdw %xmm0, %xmm2 # CHECK-NEXT: 1 4 1.00 * packusdw (%rax), %xmm2 # CHECK-NEXT: 2 4 4.00 pblendvb %xmm0, %xmm0, %xmm2 @@ -264,7 +264,7 @@ roundss $1, (%rax), %xmm2 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] -# CHECK-NEXT: - - - 96.00 25.00 - - 54.00 +# CHECK-NEXT: - - - 104.00 25.00 - - 54.00 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] Instructions: @@ -285,8 +285,8 @@ roundss $1, (%rax), %xmm2 # CHECK-NEXT: - - - 1.00 - - - - insertps $1, %xmm0, %xmm2 # CHECK-NEXT: - - - 1.00 - - - 1.00 insertps $1, (%rax), %xmm2 # CHECK-NEXT: - - - - - - - 1.00 movntdqa (%rax), %xmm2 -# CHECK-NEXT: - - - 1.00 - - - - mpsadbw $1, %xmm0, %xmm2 -# CHECK-NEXT: - - - 1.00 - - - 1.00 mpsadbw $1, (%rax), %xmm2 +# CHECK-NEXT: - - - 5.00 - - - - mpsadbw $1, %xmm0, %xmm2 +# CHECK-NEXT: - - - 5.00 - - - 1.00 mpsadbw $1, (%rax), %xmm2 # CHECK-NEXT: - - - 1.00 - - - - packusdw %xmm0, %xmm2 # CHECK-NEXT: - - - 1.00 - - - 1.00 packusdw (%rax), %xmm2 # CHECK-NEXT: - - - 4.00 - - - - pblendvb %xmm0, %xmm0, %xmm2 -- 2.7.4