From c9649eb9dab747c3b5c1d2b8ab6d54145fce40b2 Mon Sep 17 00:00:00 2001 From: Andrea Di Biagio Date: Thu, 22 Aug 2019 15:20:16 +0000 Subject: [PATCH] [X86][BtVer2] Fix latency/throughput of scalar integer MUL instructions. Single operand MUL instructions that implicitly set EAX have the following latency/throughput profile (see below): imul %cl # latency: 3cy - uOPs: 1 - 1 JMul imul %cx # latency: 3cy - uOPs: 3 - 3 JMul imul %ecx # latency: 3cy - uOPs: 2 - 2 JMul imul %rcx # latency: 6cy - uOPs: 2 - 4 JMul mul %cl # latency: 3cy - uOPs: 1 - 1 JMul mul %cx # latency: 3cy - uOPs: 3 - 3 JMul mul %ecx # latency: 3cy - uOPs: 2 - 2 JMul mul %rcx # latency: 6cy - uOPs: 2 - 4 JMul Excluding the 64bit variant, which has a latency of 6cy, every other instruction has a latency of 3cy. However, the number of decoded macro-opcodes (as well as the resource cyles) depend on the MUL size. The two operand MULs have a more predictable profile (see below): imul %dx, %dx # latency: 3cy - uOPs: 1 - 1 JMul imul %edx, %edx # latency: 3cy - uOPs: 1 - 1 JMul imul %rdx, %rdx # latency: 6cy - uOPs: 1 - 4 JMul imul $3, %dx, %dx # latency: 4cy - uOPs: 2 - 2 JMul imul $3, %ecx, %ecx # latency: 3cy - uOPs: 1 - 1 JMul imul $3, %rdx, %rdx # latency: 6cy - uOPs: 1 - 4 JMul This patch updates the values in the Jaguar scheduling model and regenerates llvm-mca tests. Differential Revision: https://reviews.llvm.org/D66547 llvm-svn: 369661 --- llvm/lib/Target/X86/X86ScheduleBtVer2.td | 20 ++--- .../llvm-mca/X86/BtVer2/clear-super-register-1.s | 34 ++++----- .../llvm-mca/X86/BtVer2/cmpxchg-read-advance.s | 24 +++--- .../X86/BtVer2/dependency-breaking-sbb-2.s | 32 ++++---- .../llvm-mca/X86/BtVer2/partial-reg-update-2.s | 10 +-- .../llvm-mca/X86/BtVer2/partial-reg-update-4.s | 28 +++---- .../llvm-mca/X86/BtVer2/partial-reg-update-6.s | 32 ++++---- .../llvm-mca/X86/BtVer2/partial-reg-update-7.s | 62 ++++++++-------- .../tools/llvm-mca/X86/BtVer2/partial-reg-update.s | 12 +-- .../tools/llvm-mca/X86/BtVer2/read-advance-2.s | 19 ++--- .../tools/llvm-mca/X86/BtVer2/resources-x86_64.s | 86 +++++++++++----------- llvm/test/tools/llvm-mca/X86/BtVer2/xadd.s | 62 ++++++++-------- llvm/test/tools/llvm-mca/X86/BtVer2/xchg.s | 32 ++++---- llvm/test/tools/llvm-mca/X86/intel-syntax.s | 12 +-- llvm/test/tools/llvm-mca/X86/llvm-mca-markers-10.s | 24 +++--- llvm/test/tools/llvm-mca/X86/llvm-mca-markers-9.s | 24 +++--- 16 files changed, 257 insertions(+), 256 deletions(-) diff --git a/llvm/lib/Target/X86/X86ScheduleBtVer2.td b/llvm/lib/Target/X86/X86ScheduleBtVer2.td index b5dce16..954e67c 100644 --- a/llvm/lib/Target/X86/X86ScheduleBtVer2.td +++ b/llvm/lib/Target/X86/X86ScheduleBtVer2.td @@ -197,16 +197,16 @@ defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; -defm : JWriteResIntPair; -defm : JWriteResIntPair; -defm : JWriteResIntPair; -defm : JWriteResIntPair; -defm : JWriteResIntPair; -defm : JWriteResIntPair; -defm : JWriteResIntPair; -defm : JWriteResIntPair; -defm : JWriteResIntPair; -defm : JWriteResIntPair; +defm : JWriteResIntPair; +defm : JWriteResIntPair; +defm : JWriteResIntPair; +defm : JWriteResIntPair; +defm : JWriteResIntPair; +defm : JWriteResIntPair; +defm : JWriteResIntPair; +defm : JWriteResIntPair; +defm : JWriteResIntPair; +defm : JWriteResIntPair; defm : X86WriteRes; defm : JWriteResIntPair; diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/clear-super-register-1.s b/llvm/test/tools/llvm-mca/X86/BtVer2/clear-super-register-1.s index ec028a6..0681f7d 100644 --- a/llvm/test/tools/llvm-mca/X86/BtVer2/clear-super-register-1.s +++ b/llvm/test/tools/llvm-mca/X86/BtVer2/clear-super-register-1.s @@ -15,13 +15,13 @@ bsf %rax, %rcx # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 400 -# CHECK-NEXT: Total Cycles: 656 -# CHECK-NEXT: Total uOps: 1100 +# CHECK-NEXT: Total Cycles: 655 +# CHECK-NEXT: Total uOps: 1000 # CHECK: Dispatch Width: 2 -# CHECK-NEXT: uOps Per Cycle: 1.68 +# CHECK-NEXT: uOps Per Cycle: 1.53 # CHECK-NEXT: IPC: 0.61 -# CHECK-NEXT: Block RThroughput: 5.5 +# CHECK-NEXT: Block RThroughput: 5.0 # CHECK: Instruction Info: # CHECK-NEXT: [1]: #uOps @@ -32,23 +32,23 @@ bsf %rax, %rcx # CHECK-NEXT: [6]: HasSideEffects (U) # CHECK: [1] [2] [3] [4] [5] [6] Instructions: -# CHECK-NEXT: 2 6 4.00 imulq $5, %rcx, %rax +# CHECK-NEXT: 1 6 4.00 imulq $5, %rcx, %rax # CHECK-NEXT: 1 1 0.50 lzcntl %ecx, %eax # CHECK-NEXT: 1 1 0.50 andq %rcx, %rax # CHECK-NEXT: 7 4 4.00 bsfq %rax, %rcx # CHECK: Timeline view: -# CHECK-NEXT: 0123456 +# CHECK-NEXT: 012345 # CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DeeeeeeER . .. imulq $5, %rcx, %rax -# CHECK-NEXT: [0,1] .DeE----R . .. lzcntl %ecx, %eax -# CHECK-NEXT: [0,2] .D=eE----R. .. andq %rcx, %rax -# CHECK-NEXT: [0,3] . D=eeeeER. .. bsfq %rax, %rcx -# CHECK-NEXT: [1,0] . .D=eeeeeeER. imulq $5, %rcx, %rax -# CHECK-NEXT: [1,1] . . D=eE----R. lzcntl %ecx, %eax -# CHECK-NEXT: [1,2] . . D==eE----R andq %rcx, %rax -# CHECK-NEXT: [1,3] . . D==eeeeER bsfq %rax, %rcx +# CHECK: [0,0] DeeeeeeER . . imulq $5, %rcx, %rax +# CHECK-NEXT: [0,1] DeE-----R . . lzcntl %ecx, %eax +# CHECK-NEXT: [0,2] .DeE-----R. . andq %rcx, %rax +# CHECK-NEXT: [0,3] . DeeeeE-R. . bsfq %rax, %rcx +# CHECK-NEXT: [1,0] . D=eeeeeeER. imulq $5, %rcx, %rax +# CHECK-NEXT: [1,1] . .D=eE----R. lzcntl %ecx, %eax +# CHECK-NEXT: [1,2] . .D==eE----R andq %rcx, %rax +# CHECK-NEXT: [1,3] . . D==eeeeER bsfq %rax, %rcx # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -58,6 +58,6 @@ bsf %rax, %rcx # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 2 1.5 0.5 0.0 imulq $5, %rcx, %rax -# CHECK-NEXT: 1. 2 1.5 1.0 4.0 lzcntl %ecx, %eax -# CHECK-NEXT: 2. 2 2.5 0.0 4.0 andq %rcx, %rax -# CHECK-NEXT: 3. 2 2.5 0.0 0.0 bsfq %rax, %rcx +# CHECK-NEXT: 1. 2 1.5 1.0 4.5 lzcntl %ecx, %eax +# CHECK-NEXT: 2. 2 2.0 0.0 4.5 andq %rcx, %rax +# CHECK-NEXT: 3. 2 2.0 0.0 0.5 bsfq %rax, %rcx diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/cmpxchg-read-advance.s b/llvm/test/tools/llvm-mca/X86/BtVer2/cmpxchg-read-advance.s index cb70b59..43dec12 100644 --- a/llvm/test/tools/llvm-mca/X86/BtVer2/cmpxchg-read-advance.s +++ b/llvm/test/tools/llvm-mca/X86/BtVer2/cmpxchg-read-advance.s @@ -26,10 +26,10 @@ lock cmpxchgq %rcx, (%rdx) # CHECK: Iterations: 1 # CHECK-NEXT: Instructions: 2 # CHECK-NEXT: Total Cycles: 17 -# CHECK-NEXT: Total uOps: 8 +# CHECK-NEXT: Total uOps: 7 # CHECK: Dispatch Width: 2 -# CHECK-NEXT: uOps Per Cycle: 0.47 +# CHECK-NEXT: uOps Per Cycle: 0.41 # CHECK-NEXT: IPC: 0.12 # CHECK-NEXT: Block RThroughput: 4.0 @@ -42,7 +42,7 @@ lock cmpxchgq %rcx, (%rdx) # CHECK-NEXT: [6]: HasSideEffects (U) # CHECK: [1] [2] [3] [4] [5] [6] Instructions: -# CHECK-NEXT: 2 6 4.00 imulq %rax, %rax +# CHECK-NEXT: 1 6 4.00 imulq %rax, %rax # CHECK-NEXT: 6 11 1.50 * * cmpxchgq %rcx, (%rdx) # CHECK: Resources: @@ -92,10 +92,10 @@ lock cmpxchgq %rcx, (%rdx) # CHECK: Iterations: 1 # CHECK-NEXT: Instructions: 2 # CHECK-NEXT: Total Cycles: 17 -# CHECK-NEXT: Total uOps: 8 +# CHECK-NEXT: Total uOps: 7 # CHECK: Dispatch Width: 2 -# CHECK-NEXT: uOps Per Cycle: 0.47 +# CHECK-NEXT: uOps Per Cycle: 0.41 # CHECK-NEXT: IPC: 0.12 # CHECK-NEXT: Block RThroughput: 4.0 @@ -108,7 +108,7 @@ lock cmpxchgq %rcx, (%rdx) # CHECK-NEXT: [6]: HasSideEffects (U) # CHECK: [1] [2] [3] [4] [5] [6] Instructions: -# CHECK-NEXT: 2 6 4.00 imulq %rcx, %rcx +# CHECK-NEXT: 1 6 4.00 imulq %rcx, %rcx # CHECK-NEXT: 6 11 1.50 * * cmpxchgq %rcx, (%rdx) # CHECK: Resources: @@ -158,10 +158,10 @@ lock cmpxchgq %rcx, (%rdx) # CHECK: Iterations: 1 # CHECK-NEXT: Instructions: 2 # CHECK-NEXT: Total Cycles: 23 -# CHECK-NEXT: Total uOps: 8 +# CHECK-NEXT: Total uOps: 7 # CHECK: Dispatch Width: 2 -# CHECK-NEXT: uOps Per Cycle: 0.35 +# CHECK-NEXT: uOps Per Cycle: 0.30 # CHECK-NEXT: IPC: 0.09 # CHECK-NEXT: Block RThroughput: 17.0 @@ -174,7 +174,7 @@ lock cmpxchgq %rcx, (%rdx) # CHECK-NEXT: [6]: HasSideEffects (U) # CHECK: [1] [2] [3] [4] [5] [6] Instructions: -# CHECK-NEXT: 2 6 4.00 imulq %rax, %rax +# CHECK-NEXT: 1 6 4.00 imulq %rax, %rax # CHECK-NEXT: 6 17 17.00 * * lock cmpxchgq %rcx, (%rdx) # CHECK: Resources: @@ -224,10 +224,10 @@ lock cmpxchgq %rcx, (%rdx) # CHECK: Iterations: 1 # CHECK-NEXT: Instructions: 2 # CHECK-NEXT: Total Cycles: 23 -# CHECK-NEXT: Total uOps: 8 +# CHECK-NEXT: Total uOps: 7 # CHECK: Dispatch Width: 2 -# CHECK-NEXT: uOps Per Cycle: 0.35 +# CHECK-NEXT: uOps Per Cycle: 0.30 # CHECK-NEXT: IPC: 0.09 # CHECK-NEXT: Block RThroughput: 17.0 @@ -240,7 +240,7 @@ lock cmpxchgq %rcx, (%rdx) # CHECK-NEXT: [6]: HasSideEffects (U) # CHECK: [1] [2] [3] [4] [5] [6] Instructions: -# CHECK-NEXT: 2 6 4.00 imulq %rcx, %rcx +# CHECK-NEXT: 1 6 4.00 imulq %rcx, %rcx # CHECK-NEXT: 6 17 17.00 * * lock cmpxchgq %rcx, (%rdx) # CHECK: Resources: diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/dependency-breaking-sbb-2.s b/llvm/test/tools/llvm-mca/X86/BtVer2/dependency-breaking-sbb-2.s index e121941..6adf58f 100644 --- a/llvm/test/tools/llvm-mca/X86/BtVer2/dependency-breaking-sbb-2.s +++ b/llvm/test/tools/llvm-mca/X86/BtVer2/dependency-breaking-sbb-2.s @@ -13,13 +13,13 @@ sbb %eax, %eax # CHECK: Iterations: 1500 # CHECK-NEXT: Instructions: 4500 -# CHECK-NEXT: Total Cycles: 3007 -# CHECK-NEXT: Total uOps: 6000 +# CHECK-NEXT: Total Cycles: 3006 +# CHECK-NEXT: Total uOps: 4500 # CHECK: Dispatch Width: 2 -# CHECK-NEXT: uOps Per Cycle: 2.00 +# CHECK-NEXT: uOps Per Cycle: 1.50 # CHECK-NEXT: IPC: 1.50 -# CHECK-NEXT: Block RThroughput: 2.0 +# CHECK-NEXT: Block RThroughput: 1.5 # CHECK: Instruction Info: # CHECK-NEXT: [1]: #uOps @@ -30,7 +30,7 @@ sbb %eax, %eax # CHECK-NEXT: [6]: HasSideEffects (U) # CHECK: [1] [2] [3] [4] [5] [6] Instructions: -# CHECK-NEXT: 2 3 1.00 imull %edx, %eax +# CHECK-NEXT: 1 3 1.00 imull %edx, %eax # CHECK-NEXT: 1 1 0.50 addl %edx, %edx # CHECK-NEXT: 1 1 1.00 sbbl %eax, %eax @@ -65,14 +65,14 @@ sbb %eax, %eax # CHECK-NEXT: Index 0123456789 # CHECK: [0,0] DeeeER .. imull %edx, %eax -# CHECK-NEXT: [0,1] .DeE-R .. addl %edx, %edx -# CHECK-NEXT: [0,2] .D=eE-R .. sbbl %eax, %eax -# CHECK-NEXT: [1,0] . D==eeeER.. imull %edx, %eax -# CHECK-NEXT: [1,1] . DeE---R.. addl %edx, %edx -# CHECK-NEXT: [1,2] . D=eE---R. sbbl %eax, %eax -# CHECK-NEXT: [2,0] . D=eeeER. imull %edx, %eax -# CHECK-NEXT: [2,1] . D=eE--R addl %edx, %edx -# CHECK-NEXT: [2,2] . D==eE-R sbbl %eax, %eax +# CHECK-NEXT: [0,1] DeE--R .. addl %edx, %edx +# CHECK-NEXT: [0,2] .DeE--R .. sbbl %eax, %eax +# CHECK-NEXT: [1,0] .D==eeeER .. imull %edx, %eax +# CHECK-NEXT: [1,1] . DeE---R .. addl %edx, %edx +# CHECK-NEXT: [1,2] . D=eE---R.. sbbl %eax, %eax +# CHECK-NEXT: [2,0] . D==eeeER. imull %edx, %eax +# CHECK-NEXT: [2,1] . D=eE---R. addl %edx, %edx +# CHECK-NEXT: [2,2] . D=eE---R sbbl %eax, %eax # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -81,6 +81,6 @@ sbb %eax, %eax # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage # CHECK: [0] [1] [2] [3] -# CHECK-NEXT: 0. 3 2.0 0.7 0.0 imull %edx, %eax -# CHECK-NEXT: 1. 3 1.3 1.3 2.0 addl %edx, %edx -# CHECK-NEXT: 2. 3 2.3 0.0 1.7 sbbl %eax, %eax +# CHECK-NEXT: 0. 3 2.3 1.0 0.0 imull %edx, %eax +# CHECK-NEXT: 1. 3 1.3 1.0 2.7 addl %edx, %edx +# CHECK-NEXT: 2. 3 1.7 0.0 2.7 sbbl %eax, %eax diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/partial-reg-update-2.s b/llvm/test/tools/llvm-mca/X86/BtVer2/partial-reg-update-2.s index 657eb97..66e4526 100644 --- a/llvm/test/tools/llvm-mca/X86/BtVer2/partial-reg-update-2.s +++ b/llvm/test/tools/llvm-mca/X86/BtVer2/partial-reg-update-2.s @@ -8,10 +8,10 @@ add %ecx, %ebx # CHECK: Iterations: 1 # CHECK-NEXT: Instructions: 3 # CHECK-NEXT: Total Cycles: 11 -# CHECK-NEXT: Total uOps: 4 +# CHECK-NEXT: Total uOps: 3 # CHECK: Dispatch Width: 2 -# CHECK-NEXT: uOps Per Cycle: 0.36 +# CHECK-NEXT: uOps Per Cycle: 0.27 # CHECK-NEXT: IPC: 0.27 # CHECK-NEXT: Block RThroughput: 4.0 @@ -24,7 +24,7 @@ add %ecx, %ebx # CHECK-NEXT: [6]: HasSideEffects (U) # CHECK: [1] [2] [3] [4] [5] [6] Instructions: -# CHECK-NEXT: 2 6 4.00 imulq %rax, %rbx +# CHECK-NEXT: 1 6 4.00 imulq %rax, %rbx # CHECK-NEXT: 1 1 0.50 lzcntw %ax, %bx # CHECK-NEXT: 1 1 0.50 addl %ecx, %ebx @@ -33,7 +33,7 @@ add %ecx, %ebx # CHECK-NEXT: Index 0123456789 # CHECK: [0,0] DeeeeeeER . imulq %rax, %rbx -# CHECK-NEXT: [0,1] .D=====eER. lzcntw %ax, %bx +# CHECK-NEXT: [0,1] D======eER. lzcntw %ax, %bx # CHECK-NEXT: [0,2] .D======eER addl %ecx, %ebx # CHECK: Average Wait times (based on the timeline view): @@ -44,5 +44,5 @@ add %ecx, %ebx # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 imulq %rax, %rbx -# CHECK-NEXT: 1. 1 6.0 0.0 0.0 lzcntw %ax, %bx +# CHECK-NEXT: 1. 1 7.0 0.0 0.0 lzcntw %ax, %bx # CHECK-NEXT: 2. 1 7.0 0.0 0.0 addl %ecx, %ebx diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/partial-reg-update-4.s b/llvm/test/tools/llvm-mca/X86/BtVer2/partial-reg-update-4.s index ad660a2..4c839d5 100644 --- a/llvm/test/tools/llvm-mca/X86/BtVer2/partial-reg-update-4.s +++ b/llvm/test/tools/llvm-mca/X86/BtVer2/partial-reg-update-4.s @@ -13,12 +13,12 @@ add %cx, %bx # CHECK: Iterations: 1500 # CHECK-NEXT: Instructions: 4500 # CHECK-NEXT: Total Cycles: 7503 -# CHECK-NEXT: Total uOps: 6000 +# CHECK-NEXT: Total uOps: 4500 # CHECK: Dispatch Width: 2 -# CHECK-NEXT: uOps Per Cycle: 0.80 +# CHECK-NEXT: uOps Per Cycle: 0.60 # CHECK-NEXT: IPC: 0.60 -# CHECK-NEXT: Block RThroughput: 2.0 +# CHECK-NEXT: Block RThroughput: 1.5 # CHECK: Instruction Info: # CHECK-NEXT: [1]: #uOps @@ -29,7 +29,7 @@ add %cx, %bx # CHECK-NEXT: [6]: HasSideEffects (U) # CHECK: [1] [2] [3] [4] [5] [6] Instructions: -# CHECK-NEXT: 2 3 1.00 imulw %ax, %bx +# CHECK-NEXT: 1 3 1.00 imulw %ax, %bx # CHECK-NEXT: 1 1 0.50 lzcntw %ax, %bx # CHECK-NEXT: 1 1 0.50 addw %cx, %bx @@ -64,14 +64,14 @@ add %cx, %bx # CHECK-NEXT: Index 0123456789 # CHECK: [0,0] DeeeER . . . imulw %ax, %bx -# CHECK-NEXT: [0,1] .D==eER . . . lzcntw %ax, %bx +# CHECK-NEXT: [0,1] D===eER . . . lzcntw %ax, %bx # CHECK-NEXT: [0,2] .D===eER . . . addw %cx, %bx -# CHECK-NEXT: [1,0] . D===eeeER . . imulw %ax, %bx -# CHECK-NEXT: [1,1] . D=====eER . . lzcntw %ax, %bx -# CHECK-NEXT: [1,2] . D======eER . . addw %cx, %bx -# CHECK-NEXT: [2,0] . D======eeeER . imulw %ax, %bx -# CHECK-NEXT: [2,1] . D========eER. lzcntw %ax, %bx -# CHECK-NEXT: [2,2] . D=========eER addw %cx, %bx +# CHECK-NEXT: [1,0] .D====eeeER . . imulw %ax, %bx +# CHECK-NEXT: [1,1] . D======eER . . lzcntw %ax, %bx +# CHECK-NEXT: [1,2] . D=======eER . . addw %cx, %bx +# CHECK-NEXT: [2,0] . D=======eeeER . imulw %ax, %bx +# CHECK-NEXT: [2,1] . D==========eER. lzcntw %ax, %bx +# CHECK-NEXT: [2,2] . D==========eER addw %cx, %bx # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -80,6 +80,6 @@ add %cx, %bx # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage # CHECK: [0] [1] [2] [3] -# CHECK-NEXT: 0. 3 4.0 0.3 0.0 imulw %ax, %bx -# CHECK-NEXT: 1. 3 6.0 0.0 0.0 lzcntw %ax, %bx -# CHECK-NEXT: 2. 3 7.0 0.0 0.0 addw %cx, %bx +# CHECK-NEXT: 0. 3 4.7 0.3 0.0 imulw %ax, %bx +# CHECK-NEXT: 1. 3 7.3 0.0 0.0 lzcntw %ax, %bx +# CHECK-NEXT: 2. 3 7.7 0.0 0.0 addw %cx, %bx diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/partial-reg-update-6.s b/llvm/test/tools/llvm-mca/X86/BtVer2/partial-reg-update-6.s index 9843a7c..1714dc7 100644 --- a/llvm/test/tools/llvm-mca/X86/BtVer2/partial-reg-update-6.s +++ b/llvm/test/tools/llvm-mca/X86/BtVer2/partial-reg-update-6.s @@ -13,11 +13,11 @@ lzcnt 2(%rsp), %cx # CHECK: Iterations: 1500 # CHECK-NEXT: Instructions: 4500 -# CHECK-NEXT: Total Cycles: 7504 -# CHECK-NEXT: Total uOps: 6000 +# CHECK-NEXT: Total Cycles: 7503 +# CHECK-NEXT: Total uOps: 4500 # CHECK: Dispatch Width: 2 -# CHECK-NEXT: uOps Per Cycle: 0.80 +# CHECK-NEXT: uOps Per Cycle: 0.60 # CHECK-NEXT: IPC: 0.60 # CHECK-NEXT: Block RThroughput: 2.0 @@ -30,7 +30,7 @@ lzcnt 2(%rsp), %cx # CHECK-NEXT: [6]: HasSideEffects (U) # CHECK: [1] [2] [3] [4] [5] [6] Instructions: -# CHECK-NEXT: 2 3 1.00 imull %edx, %ecx +# CHECK-NEXT: 1 3 1.00 imull %edx, %ecx # CHECK-NEXT: 1 4 1.00 * lzcntw (%rsp), %cx # CHECK-NEXT: 1 4 1.00 * lzcntw 2(%rsp), %cx @@ -61,18 +61,18 @@ lzcnt 2(%rsp), %cx # CHECK-NEXT: 0.50 0.50 - - - - - 1.00 - - - - - - lzcntw 2(%rsp), %cx # CHECK: Timeline view: -# CHECK-NEXT: 012345678 +# CHECK-NEXT: 01234567 # CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DeeeER . . . imull %edx, %ecx -# CHECK-NEXT: [0,1] .DeeeeER . . . lzcntw (%rsp), %cx -# CHECK-NEXT: [0,2] .D=eeeeER . . . lzcntw 2(%rsp), %cx -# CHECK-NEXT: [1,0] . D====eeeER . . imull %edx, %ecx -# CHECK-NEXT: [1,1] . D===eeeeER . . lzcntw (%rsp), %cx -# CHECK-NEXT: [1,2] . D====eeeeER . . lzcntw 2(%rsp), %cx -# CHECK-NEXT: [2,0] . D=======eeeER . imull %edx, %ecx -# CHECK-NEXT: [2,1] . D======eeeeER. lzcntw (%rsp), %cx -# CHECK-NEXT: [2,2] . D=======eeeeER lzcntw 2(%rsp), %cx +# CHECK: [0,0] DeeeER . . . imull %edx, %ecx +# CHECK-NEXT: [0,1] DeeeeER . . . lzcntw (%rsp), %cx +# CHECK-NEXT: [0,2] .DeeeeER . . . lzcntw 2(%rsp), %cx +# CHECK-NEXT: [1,0] .D====eeeER . . imull %edx, %ecx +# CHECK-NEXT: [1,1] . D===eeeeER . . lzcntw (%rsp), %cx +# CHECK-NEXT: [1,2] . D====eeeeER . . lzcntw 2(%rsp), %cx +# CHECK-NEXT: [2,0] . D=======eeeER . imull %edx, %ecx +# CHECK-NEXT: [2,1] . D=======eeeeER. lzcntw (%rsp), %cx +# CHECK-NEXT: [2,2] . D=======eeeeER lzcntw 2(%rsp), %cx # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -82,5 +82,5 @@ lzcnt 2(%rsp), %cx # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 3 4.7 0.3 0.0 imull %edx, %ecx -# CHECK-NEXT: 1. 3 4.0 0.3 0.0 lzcntw (%rsp), %cx -# CHECK-NEXT: 2. 3 5.0 0.0 0.0 lzcntw 2(%rsp), %cx +# CHECK-NEXT: 1. 3 4.3 0.0 0.0 lzcntw (%rsp), %cx +# CHECK-NEXT: 2. 3 4.7 0.0 0.0 lzcntw 2(%rsp), %cx diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/partial-reg-update-7.s b/llvm/test/tools/llvm-mca/X86/BtVer2/partial-reg-update-7.s index 720a1ed..f3991dc 100644 --- a/llvm/test/tools/llvm-mca/X86/BtVer2/partial-reg-update-7.s +++ b/llvm/test/tools/llvm-mca/X86/BtVer2/partial-reg-update-7.s @@ -10,12 +10,12 @@ cmpl $1025, %eax # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 500 # CHECK-NEXT: Total Cycles: 504 -# CHECK-NEXT: Total uOps: 600 +# CHECK-NEXT: Total uOps: 500 # CHECK: Dispatch Width: 2 -# CHECK-NEXT: uOps Per Cycle: 1.19 +# CHECK-NEXT: uOps Per Cycle: 0.99 # CHECK-NEXT: IPC: 0.99 -# CHECK-NEXT: Block RThroughput: 3.0 +# CHECK-NEXT: Block RThroughput: 2.5 # CHECK: Instruction Info: # CHECK-NEXT: [1]: #uOps @@ -29,7 +29,7 @@ cmpl $1025, %eax # CHECK-NEXT: 1 1 0.50 sete %r9b # CHECK-NEXT: 1 1 0.50 movzbl %al, %eax # CHECK-NEXT: 1 1 0.50 shll $2, %eax -# CHECK-NEXT: 2 3 1.00 imull %ecx, %eax +# CHECK-NEXT: 1 3 1.00 imull %ecx, %eax # CHECK-NEXT: 1 1 0.50 cmpl $1025, %eax # CHECK: Resources: @@ -67,28 +67,28 @@ cmpl $1025, %eax # CHECK: [0,0] DeER . . . . . . sete %r9b # CHECK-NEXT: [0,1] DeER . . . . . . movzbl %al, %eax # CHECK-NEXT: [0,2] .DeER. . . . . . shll $2, %eax -# CHECK-NEXT: [0,3] . DeeeER . . . . . imull %ecx, %eax -# CHECK-NEXT: [0,4] . D==eER . . . . . cmpl $1025, %eax -# CHECK-NEXT: [1,0] . D===eER. . . . . sete %r9b -# CHECK-NEXT: [1,1] . D=eE-R. . . . . movzbl %al, %eax -# CHECK-NEXT: [1,2] . D==eE-R . . . . shll $2, %eax -# CHECK-NEXT: [1,3] . D==eeeER . . . . imull %ecx, %eax -# CHECK-NEXT: [1,4] . .D====eER . . . . cmpl $1025, %eax -# CHECK-NEXT: [2,0] . .D=====eER. . . . sete %r9b -# CHECK-NEXT: [2,1] . . D===eE-R. . . . movzbl %al, %eax -# CHECK-NEXT: [2,2] . . D====eE-R . . . shll $2, %eax -# CHECK-NEXT: [2,3] . . D====eeeER . . . imull %ecx, %eax -# CHECK-NEXT: [2,4] . . D======eER . . . cmpl $1025, %eax -# CHECK-NEXT: [3,0] . . D=======eER. . . sete %r9b -# CHECK-NEXT: [3,1] . . D=====eE-R. . . movzbl %al, %eax -# CHECK-NEXT: [3,2] . . D======eE-R . . shll $2, %eax -# CHECK-NEXT: [3,3] . . .D======eeeER . . imull %ecx, %eax -# CHECK-NEXT: [3,4] . . . D========eER . . cmpl $1025, %eax -# CHECK-NEXT: [4,0] . . . D=========eER. . sete %r9b -# CHECK-NEXT: [4,1] . . . D=======eE-R. . movzbl %al, %eax -# CHECK-NEXT: [4,2] . . . D========eE-R . shll $2, %eax -# CHECK-NEXT: [4,3] . . . D========eeeER. imull %ecx, %eax -# CHECK-NEXT: [4,4] . . . D==========eER cmpl $1025, %eax +# CHECK-NEXT: [0,3] .D=eeeER . . . . . imull %ecx, %eax +# CHECK-NEXT: [0,4] . D===eER . . . . . cmpl $1025, %eax +# CHECK-NEXT: [1,0] . D====eER. . . . . sete %r9b +# CHECK-NEXT: [1,1] . D==eE-R. . . . . movzbl %al, %eax +# CHECK-NEXT: [1,2] . D===eE-R . . . . shll $2, %eax +# CHECK-NEXT: [1,3] . D===eeeER . . . . imull %ecx, %eax +# CHECK-NEXT: [1,4] . D======eER . . . . cmpl $1025, %eax +# CHECK-NEXT: [2,0] . D======eER. . . . sete %r9b +# CHECK-NEXT: [2,1] . D=====eE-R. . . . movzbl %al, %eax +# CHECK-NEXT: [2,2] . .D=====eE-R . . . shll $2, %eax +# CHECK-NEXT: [2,3] . .D======eeeER . . . imull %ecx, %eax +# CHECK-NEXT: [2,4] . . D========eER . . . cmpl $1025, %eax +# CHECK-NEXT: [3,0] . . D=========eER. . . sete %r9b +# CHECK-NEXT: [3,1] . . D=======eE-R. . . movzbl %al, %eax +# CHECK-NEXT: [3,2] . . D========eE-R . . shll $2, %eax +# CHECK-NEXT: [3,3] . . D========eeeER . . imull %ecx, %eax +# CHECK-NEXT: [3,4] . . D===========eER . . cmpl $1025, %eax +# CHECK-NEXT: [4,0] . . D===========eER. . sete %r9b +# CHECK-NEXT: [4,1] . . D==========eE-R. . movzbl %al, %eax +# CHECK-NEXT: [4,2] . . .D==========eE-R . shll $2, %eax +# CHECK-NEXT: [4,3] . . .D===========eeeER. imull %ecx, %eax +# CHECK-NEXT: [4,4] . . . D=============eER cmpl $1025, %eax # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -97,8 +97,8 @@ cmpl $1025, %eax # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage # CHECK: [0] [1] [2] [3] -# CHECK-NEXT: 0. 5 5.8 0.2 0.0 sete %r9b -# CHECK-NEXT: 1. 5 4.2 0.2 0.8 movzbl %al, %eax -# CHECK-NEXT: 2. 5 5.0 0.0 0.8 shll $2, %eax -# CHECK-NEXT: 3. 5 5.0 0.0 0.0 imull %ecx, %eax -# CHECK-NEXT: 4. 5 7.0 0.0 0.0 cmpl $1025, %eax +# CHECK-NEXT: 0. 5 7.0 0.2 0.0 sete %r9b +# CHECK-NEXT: 1. 5 5.8 0.2 0.8 movzbl %al, %eax +# CHECK-NEXT: 2. 5 6.2 0.0 0.8 shll $2, %eax +# CHECK-NEXT: 3. 5 6.8 0.0 0.0 imull %ecx, %eax +# CHECK-NEXT: 4. 5 9.2 0.0 0.0 cmpl $1025, %eax diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/partial-reg-update.s b/llvm/test/tools/llvm-mca/X86/BtVer2/partial-reg-update.s index 6bc7e75..c7ec67d 100644 --- a/llvm/test/tools/llvm-mca/X86/BtVer2/partial-reg-update.s +++ b/llvm/test/tools/llvm-mca/X86/BtVer2/partial-reg-update.s @@ -8,12 +8,12 @@ add %ecx, %ebx # CHECK: Iterations: 1 # CHECK-NEXT: Instructions: 3 # CHECK-NEXT: Total Cycles: 8 -# CHECK-NEXT: Total uOps: 4 +# CHECK-NEXT: Total uOps: 3 # CHECK: Dispatch Width: 2 -# CHECK-NEXT: uOps Per Cycle: 0.50 +# CHECK-NEXT: uOps Per Cycle: 0.38 # CHECK-NEXT: IPC: 0.38 -# CHECK-NEXT: Block RThroughput: 2.0 +# CHECK-NEXT: Block RThroughput: 1.5 # CHECK: Instruction Info: # CHECK-NEXT: [1]: #uOps @@ -24,7 +24,7 @@ add %ecx, %ebx # CHECK-NEXT: [6]: HasSideEffects (U) # CHECK: [1] [2] [3] [4] [5] [6] Instructions: -# CHECK-NEXT: 2 3 1.00 imulw %ax, %cx +# CHECK-NEXT: 1 3 1.00 imulw %ax, %cx # CHECK-NEXT: 1 1 0.50 addb %al, %cl # CHECK-NEXT: 1 1 0.50 addl %ecx, %ebx @@ -32,7 +32,7 @@ add %ecx, %ebx # CHECK-NEXT: Index 01234567 # CHECK: [0,0] DeeeER . imulw %ax, %cx -# CHECK-NEXT: [0,1] .D==eER. addb %al, %cl +# CHECK-NEXT: [0,1] D===eER. addb %al, %cl # CHECK-NEXT: [0,2] .D===eER addl %ecx, %ebx # CHECK: Average Wait times (based on the timeline view): @@ -43,5 +43,5 @@ add %ecx, %ebx # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 imulw %ax, %cx -# CHECK-NEXT: 1. 1 3.0 0.0 0.0 addb %al, %cl +# CHECK-NEXT: 1. 1 4.0 0.0 0.0 addb %al, %cl # CHECK-NEXT: 2. 1 4.0 0.0 0.0 addl %ecx, %ebx diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/read-advance-2.s b/llvm/test/tools/llvm-mca/X86/BtVer2/read-advance-2.s index 99ef892..5e199a1 100644 --- a/llvm/test/tools/llvm-mca/X86/BtVer2/read-advance-2.s +++ b/llvm/test/tools/llvm-mca/X86/BtVer2/read-advance-2.s @@ -9,13 +9,13 @@ # CHECK: Iterations: 1 # CHECK-NEXT: Instructions: 2 -# CHECK-NEXT: Total Cycles: 10 +# CHECK-NEXT: Total Cycles: 11 # CHECK-NEXT: Total uOps: 4 # CHECK: Dispatch Width: 2 -# CHECK-NEXT: uOps Per Cycle: 0.40 -# CHECK-NEXT: IPC: 0.20 -# CHECK-NEXT: Block RThroughput: 2.0 +# CHECK-NEXT: uOps Per Cycle: 0.36 +# CHECK-NEXT: IPC: 0.18 +# CHECK-NEXT: Block RThroughput: 4.0 # CHECK: Instruction Info: # CHECK-NEXT: [1]: #uOps @@ -26,14 +26,15 @@ # CHECK-NEXT: [6]: HasSideEffects (U) # CHECK: [1] [2] [3] [4] [5] [6] Instructions: -# CHECK-NEXT: 2 3 1.00 imull %esi -# CHECK-NEXT: 2 6 1.00 * imull (%rdi) +# CHECK-NEXT: 2 3 2.00 imull %esi +# CHECK-NEXT: 2 6 2.00 * imull (%rdi) # CHECK: Timeline view: +# CHECK-NEXT: 0 # CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DeeeER . imull %esi -# CHECK-NEXT: [0,1] .DeeeeeeER imull (%rdi) +# CHECK: [0,0] DeeeER . imull %esi +# CHECK-NEXT: [0,1] .D=eeeeeeER imull (%rdi) # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -43,4 +44,4 @@ # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 imull %esi -# CHECK-NEXT: 1. 1 1.0 1.0 0.0 imull (%rdi) +# CHECK-NEXT: 1. 1 2.0 2.0 0.0 imull (%rdi) diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/resources-x86_64.s b/llvm/test/tools/llvm-mca/X86/BtVer2/resources-x86_64.s index e64af22..9e20d13 100644 --- a/llvm/test/tools/llvm-mca/X86/BtVer2/resources-x86_64.s +++ b/llvm/test/tools/llvm-mca/X86/BtVer2/resources-x86_64.s @@ -1334,32 +1334,32 @@ xorq (%rax), %rdi # CHECK-NEXT: 2 28 25.00 * U idivl (%rax) # CHECK-NEXT: 2 41 41.00 U idivq %rcx # CHECK-NEXT: 2 44 41.00 * U idivq (%rax) -# CHECK-NEXT: 2 3 1.00 imulb %dil -# CHECK-NEXT: 2 6 1.00 * imulb (%rax) -# CHECK-NEXT: 2 3 1.00 imulw %di -# CHECK-NEXT: 2 6 1.00 * imulw (%rax) -# CHECK-NEXT: 2 3 1.00 imulw %si, %di -# CHECK-NEXT: 2 6 1.00 * imulw (%rax), %di -# CHECK-NEXT: 2 3 1.00 imulw $511, %si, %di -# CHECK-NEXT: 2 6 1.00 * imulw $511, (%rax), %di -# CHECK-NEXT: 2 3 1.00 imulw $7, %si, %di -# CHECK-NEXT: 2 6 1.00 * imulw $7, (%rax), %di -# CHECK-NEXT: 2 3 1.00 imull %edi -# CHECK-NEXT: 2 6 1.00 * imull (%rax) -# CHECK-NEXT: 2 3 1.00 imull %esi, %edi -# CHECK-NEXT: 2 6 1.00 * imull (%rax), %edi -# CHECK-NEXT: 2 3 1.00 imull $665536, %esi, %edi -# CHECK-NEXT: 2 6 1.00 * imull $665536, (%rax), %edi -# CHECK-NEXT: 2 3 1.00 imull $7, %esi, %edi -# CHECK-NEXT: 2 6 1.00 * imull $7, (%rax), %edi +# CHECK-NEXT: 1 3 1.00 imulb %dil +# CHECK-NEXT: 1 6 1.00 * imulb (%rax) +# CHECK-NEXT: 3 3 3.00 imulw %di +# CHECK-NEXT: 3 6 3.00 * imulw (%rax) +# CHECK-NEXT: 1 3 1.00 imulw %si, %di +# CHECK-NEXT: 1 6 1.00 * imulw (%rax), %di +# CHECK-NEXT: 2 4 2.00 imulw $511, %si, %di +# CHECK-NEXT: 2 7 2.00 * imulw $511, (%rax), %di +# CHECK-NEXT: 2 4 2.00 imulw $7, %si, %di +# CHECK-NEXT: 2 7 2.00 * imulw $7, (%rax), %di +# CHECK-NEXT: 2 3 2.00 imull %edi +# CHECK-NEXT: 2 6 2.00 * imull (%rax) +# CHECK-NEXT: 1 3 1.00 imull %esi, %edi +# CHECK-NEXT: 1 6 1.00 * imull (%rax), %edi +# CHECK-NEXT: 1 3 1.00 imull $665536, %esi, %edi +# CHECK-NEXT: 1 6 1.00 * imull $665536, (%rax), %edi +# CHECK-NEXT: 1 3 1.00 imull $7, %esi, %edi +# CHECK-NEXT: 1 6 1.00 * imull $7, (%rax), %edi # CHECK-NEXT: 2 6 4.00 imulq %rdi # CHECK-NEXT: 2 9 4.00 * imulq (%rax) -# CHECK-NEXT: 2 6 4.00 imulq %rsi, %rdi -# CHECK-NEXT: 2 9 4.00 * imulq (%rax), %rdi -# CHECK-NEXT: 2 6 4.00 imulq $665536, %rsi, %rdi -# CHECK-NEXT: 2 9 4.00 * imulq $665536, (%rax), %rdi -# CHECK-NEXT: 2 6 4.00 imulq $7, %rsi, %rdi -# CHECK-NEXT: 2 9 4.00 * imulq $7, (%rax), %rdi +# CHECK-NEXT: 1 6 4.00 imulq %rsi, %rdi +# CHECK-NEXT: 1 9 4.00 * imulq (%rax), %rdi +# CHECK-NEXT: 1 6 4.00 imulq $665536, %rsi, %rdi +# CHECK-NEXT: 1 9 4.00 * imulq $665536, (%rax), %rdi +# CHECK-NEXT: 1 6 4.00 imulq $7, %rsi, %rdi +# CHECK-NEXT: 1 9 4.00 * imulq $7, (%rax), %rdi # CHECK-NEXT: 1 100 0.50 U inb $7, %al # CHECK-NEXT: 1 100 0.50 U inb %dx, %al # CHECK-NEXT: 1 100 0.50 U inw $7, %ax @@ -1416,12 +1416,12 @@ xorq (%rax), %rdi # CHECK-NEXT: 1 4 1.00 * movzwq (%rax), %rdi # CHECK-NEXT: 1 1 0.50 movslq %eax, %rdi # CHECK-NEXT: 1 4 1.00 * movslq (%rax), %rdi -# CHECK-NEXT: 2 3 1.00 mulb %dil -# CHECK-NEXT: 2 6 1.00 * mulb (%rax) -# CHECK-NEXT: 2 3 1.00 mulw %si -# CHECK-NEXT: 2 6 1.00 * mulw (%rax) -# CHECK-NEXT: 2 3 1.00 mull %edx -# CHECK-NEXT: 2 6 1.00 * mull (%rax) +# CHECK-NEXT: 1 3 1.00 mulb %dil +# CHECK-NEXT: 1 6 1.00 * mulb (%rax) +# CHECK-NEXT: 3 3 3.00 mulw %si +# CHECK-NEXT: 3 6 3.00 * mulw (%rax) +# CHECK-NEXT: 2 3 2.00 mull %edx +# CHECK-NEXT: 2 6 2.00 * mull (%rax) # CHECK-NEXT: 2 6 4.00 mulq %rcx # CHECK-NEXT: 2 9 4.00 * mulq (%rax) # CHECK-NEXT: 1 1 0.50 negb %dil @@ -1959,7 +1959,7 @@ xorq (%rax), %rdi # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] -# CHECK-NEXT: 722.50 772.50 380.00 - - - - 992.00 64.00 893.00 - - - - +# CHECK-NEXT: 722.50 772.50 380.00 - - - - 992.00 80.00 893.00 - - - - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] Instructions: @@ -2269,16 +2269,16 @@ xorq (%rax), %rdi # CHECK-NEXT: - 1.00 41.00 - - - - 1.00 - - - - - - idivq (%rax) # CHECK-NEXT: - 1.00 - - - - - - 1.00 - - - - - imulb %dil # CHECK-NEXT: - 1.00 - - - - - 1.00 1.00 - - - - - imulb (%rax) -# CHECK-NEXT: - 1.00 - - - - - - 1.00 - - - - - imulw %di -# CHECK-NEXT: - 1.00 - - - - - 1.00 1.00 - - - - - imulw (%rax) +# CHECK-NEXT: - 1.00 - - - - - - 3.00 - - - - - imulw %di +# CHECK-NEXT: - 1.00 - - - - - 1.00 3.00 - - - - - imulw (%rax) # CHECK-NEXT: - 1.00 - - - - - - 1.00 - - - - - imulw %si, %di # CHECK-NEXT: - 1.00 - - - - - 1.00 1.00 - - - - - imulw (%rax), %di -# CHECK-NEXT: - 1.00 - - - - - - 1.00 - - - - - imulw $511, %si, %di -# CHECK-NEXT: - 1.00 - - - - - 1.00 1.00 - - - - - imulw $511, (%rax), %di -# CHECK-NEXT: - 1.00 - - - - - - 1.00 - - - - - imulw $7, %si, %di -# CHECK-NEXT: - 1.00 - - - - - 1.00 1.00 - - - - - imulw $7, (%rax), %di -# CHECK-NEXT: - 1.00 - - - - - - 1.00 - - - - - imull %edi -# CHECK-NEXT: - 1.00 - - - - - 1.00 1.00 - - - - - imull (%rax) +# CHECK-NEXT: - 1.00 - - - - - - 2.00 - - - - - imulw $511, %si, %di +# CHECK-NEXT: - 1.00 - - - - - 1.00 2.00 - - - - - imulw $511, (%rax), %di +# CHECK-NEXT: - 1.00 - - - - - - 2.00 - - - - - imulw $7, %si, %di +# CHECK-NEXT: - 1.00 - - - - - 1.00 2.00 - - - - - imulw $7, (%rax), %di +# CHECK-NEXT: - 1.00 - - - - - - 2.00 - - - - - imull %edi +# CHECK-NEXT: - 1.00 - - - - - 1.00 2.00 - - - - - imull (%rax) # CHECK-NEXT: - 1.00 - - - - - - 1.00 - - - - - imull %esi, %edi # CHECK-NEXT: - 1.00 - - - - - 1.00 1.00 - - - - - imull (%rax), %edi # CHECK-NEXT: - 1.00 - - - - - - 1.00 - - - - - imull $665536, %esi, %edi @@ -2351,10 +2351,10 @@ xorq (%rax), %rdi # CHECK-NEXT: 0.50 0.50 - - - - - 1.00 - - - - - - movslq (%rax), %rdi # CHECK-NEXT: - 1.00 - - - - - - 1.00 - - - - - mulb %dil # CHECK-NEXT: - 1.00 - - - - - 1.00 1.00 - - - - - mulb (%rax) -# CHECK-NEXT: - 1.00 - - - - - - 1.00 - - - - - mulw %si -# CHECK-NEXT: - 1.00 - - - - - 1.00 1.00 - - - - - mulw (%rax) -# CHECK-NEXT: - 1.00 - - - - - - 1.00 - - - - - mull %edx -# CHECK-NEXT: - 1.00 - - - - - 1.00 1.00 - - - - - mull (%rax) +# CHECK-NEXT: - 1.00 - - - - - - 3.00 - - - - - mulw %si +# CHECK-NEXT: - 1.00 - - - - - 1.00 3.00 - - - - - mulw (%rax) +# CHECK-NEXT: - 1.00 - - - - - - 2.00 - - - - - mull %edx +# CHECK-NEXT: - 1.00 - - - - - 1.00 2.00 - - - - - mull (%rax) # CHECK-NEXT: - 1.00 - - - - - - 4.00 - - - - - mulq %rcx # CHECK-NEXT: - 1.00 - - - - - 1.00 4.00 - - - - - mulq (%rax) # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - - negb %dil diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/xadd.s b/llvm/test/tools/llvm-mca/X86/BtVer2/xadd.s index f3671dd..a3bb2cd 100644 --- a/llvm/test/tools/llvm-mca/X86/BtVer2/xadd.s +++ b/llvm/test/tools/llvm-mca/X86/BtVer2/xadd.s @@ -22,12 +22,12 @@ imul %ecx, %ecx # CHECK: Iterations: 2 # CHECK-NEXT: Instructions: 10 # CHECK-NEXT: Total Cycles: 27 -# CHECK-NEXT: Total uOps: 20 +# CHECK-NEXT: Total uOps: 16 # CHECK: Dispatch Width: 2 -# CHECK-NEXT: uOps Per Cycle: 0.74 +# CHECK-NEXT: uOps Per Cycle: 0.59 # CHECK-NEXT: IPC: 0.37 -# CHECK-NEXT: Block RThroughput: 5.0 +# CHECK-NEXT: Block RThroughput: 4.0 # CHECK: Instruction Info: # CHECK-NEXT: [1]: #uOps @@ -41,8 +41,8 @@ imul %ecx, %ecx # CHECK-NEXT: 4 11 1.50 * * xaddl %ecx, (%rsp) # CHECK-NEXT: 1 1 0.50 addl %ecx, %ecx # CHECK-NEXT: 1 1 0.50 addl %ecx, %ecx -# CHECK-NEXT: 2 3 1.00 imull %ecx, %ecx -# CHECK-NEXT: 2 3 1.00 imull %ecx, %ecx +# CHECK-NEXT: 1 3 1.00 imull %ecx, %ecx +# CHECK-NEXT: 1 3 1.00 imull %ecx, %ecx # CHECK: Resources: # CHECK-NEXT: [0] - JALU0 @@ -80,12 +80,12 @@ imul %ecx, %ecx # CHECK-NEXT: [0,1] . D=eE-------R . . .. addl %ecx, %ecx # CHECK-NEXT: [0,2] . D==eE-------R. . .. addl %ecx, %ecx # CHECK-NEXT: [0,3] . D==eeeE----R. . .. imull %ecx, %ecx -# CHECK-NEXT: [0,4] . D====eeeE--R . .. imull %ecx, %ecx -# CHECK-NEXT: [1,0] . D======eeeeeeeeeeeER.. xaddl %ecx, (%rsp) -# CHECK-NEXT: [1,1] . . D=======eE-------R.. addl %ecx, %ecx -# CHECK-NEXT: [1,2] . . D========eE-------R. addl %ecx, %ecx -# CHECK-NEXT: [1,3] . . D========eeeE----R. imull %ecx, %ecx -# CHECK-NEXT: [1,4] . . D==========eeeE--R imull %ecx, %ecx +# CHECK-NEXT: [0,4] . D=====eeeE--R . .. imull %ecx, %ecx +# CHECK-NEXT: [1,0] . D=======eeeeeeeeeeeER.. xaddl %ecx, (%rsp) +# CHECK-NEXT: [1,1] . .D========eE-------R.. addl %ecx, %ecx +# CHECK-NEXT: [1,2] . .D=========eE-------R. addl %ecx, %ecx +# CHECK-NEXT: [1,3] . . D=========eeeE----R. imull %ecx, %ecx +# CHECK-NEXT: [1,4] . . D============eeeE--R imull %ecx, %ecx # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -94,21 +94,21 @@ imul %ecx, %ecx # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage # CHECK: [0] [1] [2] [3] -# CHECK-NEXT: 0. 2 4.0 0.5 0.0 xaddl %ecx, (%rsp) -# CHECK-NEXT: 1. 2 5.0 0.0 7.0 addl %ecx, %ecx -# CHECK-NEXT: 2. 2 6.0 0.0 7.0 addl %ecx, %ecx -# CHECK-NEXT: 3. 2 6.0 0.0 4.0 imull %ecx, %ecx -# CHECK-NEXT: 4. 2 8.0 0.0 2.0 imull %ecx, %ecx +# CHECK-NEXT: 0. 2 4.5 0.5 0.0 xaddl %ecx, (%rsp) +# CHECK-NEXT: 1. 2 5.5 0.0 7.0 addl %ecx, %ecx +# CHECK-NEXT: 2. 2 6.5 0.0 7.0 addl %ecx, %ecx +# CHECK-NEXT: 3. 2 6.5 0.0 4.0 imull %ecx, %ecx +# CHECK-NEXT: 4. 2 9.5 0.0 2.0 imull %ecx, %ecx # CHECK: [1] Code Region # CHECK: Iterations: 2 # CHECK-NEXT: Instructions: 10 # CHECK-NEXT: Total Cycles: 38 -# CHECK-NEXT: Total uOps: 20 +# CHECK-NEXT: Total uOps: 16 # CHECK: Dispatch Width: 2 -# CHECK-NEXT: uOps Per Cycle: 0.53 +# CHECK-NEXT: uOps Per Cycle: 0.42 # CHECK-NEXT: IPC: 0.26 # CHECK-NEXT: Block RThroughput: 16.0 @@ -124,8 +124,8 @@ imul %ecx, %ecx # CHECK-NEXT: 4 16 16.00 * * lock xaddl %ecx, (%rsp) # CHECK-NEXT: 1 1 0.50 addl %ecx, %ecx # CHECK-NEXT: 1 1 0.50 addl %ecx, %ecx -# CHECK-NEXT: 2 3 1.00 imull %ecx, %ecx -# CHECK-NEXT: 2 3 1.00 imull %ecx, %ecx +# CHECK-NEXT: 1 3 1.00 imull %ecx, %ecx +# CHECK-NEXT: 1 3 1.00 imull %ecx, %ecx # CHECK: Resources: # CHECK-NEXT: [0] - JALU0 @@ -163,12 +163,12 @@ imul %ecx, %ecx # CHECK-NEXT: [0,1] . D=========eE----R . . . . . addl %ecx, %ecx # CHECK-NEXT: [0,2] . D==========eE----R. . . . . addl %ecx, %ecx # CHECK-NEXT: [0,3] . D==========eeeE-R. . . . . imull %ecx, %ecx -# CHECK-NEXT: [0,4] . D============eeeER . . . . imull %ecx, %ecx -# CHECK-NEXT: [1,0] . D===========eeeeeeeeeeeeeeeeER. . lock xaddl %ecx, (%rsp) -# CHECK-NEXT: [1,1] . . D====================eE----R. . addl %ecx, %ecx -# CHECK-NEXT: [1,2] . . D=====================eE----R . addl %ecx, %ecx -# CHECK-NEXT: [1,3] . . D=====================eeeE-R . imull %ecx, %ecx -# CHECK-NEXT: [1,4] . . D=======================eeeER imull %ecx, %ecx +# CHECK-NEXT: [0,4] . D=============eeeER . . . . imull %ecx, %ecx +# CHECK-NEXT: [1,0] . D============eeeeeeeeeeeeeeeeER. . lock xaddl %ecx, (%rsp) +# CHECK-NEXT: [1,1] . .D=====================eE----R. . addl %ecx, %ecx +# CHECK-NEXT: [1,2] . .D======================eE----R . addl %ecx, %ecx +# CHECK-NEXT: [1,3] . . D======================eeeE-R . imull %ecx, %ecx +# CHECK-NEXT: [1,4] . . D=========================eeeER imull %ecx, %ecx # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -177,8 +177,8 @@ imul %ecx, %ecx # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage # CHECK: [0] [1] [2] [3] -# CHECK-NEXT: 0. 2 6.5 0.5 0.0 lock xaddl %ecx, (%rsp) -# CHECK-NEXT: 1. 2 15.5 0.0 4.0 addl %ecx, %ecx -# CHECK-NEXT: 2. 2 16.5 0.0 4.0 addl %ecx, %ecx -# CHECK-NEXT: 3. 2 16.5 0.0 1.0 imull %ecx, %ecx -# CHECK-NEXT: 4. 2 18.5 0.0 0.0 imull %ecx, %ecx +# CHECK-NEXT: 0. 2 7.0 0.5 0.0 lock xaddl %ecx, (%rsp) +# CHECK-NEXT: 1. 2 16.0 0.0 4.0 addl %ecx, %ecx +# CHECK-NEXT: 2. 2 17.0 0.0 4.0 addl %ecx, %ecx +# CHECK-NEXT: 3. 2 17.0 0.0 1.0 imull %ecx, %ecx +# CHECK-NEXT: 4. 2 20.0 0.0 0.0 imull %ecx, %ecx diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/xchg.s b/llvm/test/tools/llvm-mca/X86/BtVer2/xchg.s index d6cfb04..22eddda 100644 --- a/llvm/test/tools/llvm-mca/X86/BtVer2/xchg.s +++ b/llvm/test/tools/llvm-mca/X86/BtVer2/xchg.s @@ -10,10 +10,10 @@ imul %ecx, %ecx # CHECK: Iterations: 2 # CHECK-NEXT: Instructions: 10 # CHECK-NEXT: Total Cycles: 38 -# CHECK-NEXT: Total uOps: 18 +# CHECK-NEXT: Total uOps: 14 # CHECK: Dispatch Width: 2 -# CHECK-NEXT: uOps Per Cycle: 0.47 +# CHECK-NEXT: uOps Per Cycle: 0.37 # CHECK-NEXT: IPC: 0.26 # CHECK-NEXT: Block RThroughput: 16.0 @@ -29,8 +29,8 @@ imul %ecx, %ecx # CHECK-NEXT: 3 16 16.00 * * xchgl %ecx, (%rsp) # CHECK-NEXT: 1 1 0.50 addl %ecx, %ecx # CHECK-NEXT: 1 1 0.50 addl %ecx, %ecx -# CHECK-NEXT: 2 3 1.00 imull %ecx, %ecx -# CHECK-NEXT: 2 3 1.00 imull %ecx, %ecx +# CHECK-NEXT: 1 3 1.00 imull %ecx, %ecx +# CHECK-NEXT: 1 3 1.00 imull %ecx, %ecx # CHECK: Resources: # CHECK-NEXT: [0] - JALU0 @@ -67,13 +67,13 @@ imul %ecx, %ecx # CHECK: [0,0] DeeeeeeeeeeeeeeeeER . . . . . xchgl %ecx, (%rsp) # CHECK-NEXT: [0,1] .D==========eE----R . . . . . addl %ecx, %ecx # CHECK-NEXT: [0,2] . D==========eE----R. . . . . addl %ecx, %ecx -# CHECK-NEXT: [0,3] . D==========eeeE-R. . . . . imull %ecx, %ecx -# CHECK-NEXT: [0,4] . D============eeeER . . . . imull %ecx, %ecx -# CHECK-NEXT: [1,0] . D===========eeeeeeeeeeeeeeeeER. . xchgl %ecx, (%rsp) -# CHECK-NEXT: [1,1] . .D=====================eE----R. . addl %ecx, %ecx -# CHECK-NEXT: [1,2] . . D=====================eE----R . addl %ecx, %ecx -# CHECK-NEXT: [1,3] . . D=====================eeeE-R . imull %ecx, %ecx -# CHECK-NEXT: [1,4] . . D=======================eeeER imull %ecx, %ecx +# CHECK-NEXT: [0,3] . D===========eeeE-R. . . . . imull %ecx, %ecx +# CHECK-NEXT: [0,4] . D=============eeeER . . . . imull %ecx, %ecx +# CHECK-NEXT: [1,0] . D============eeeeeeeeeeeeeeeeER. . xchgl %ecx, (%rsp) +# CHECK-NEXT: [1,1] . D======================eE----R. . addl %ecx, %ecx +# CHECK-NEXT: [1,2] . .D======================eE----R . addl %ecx, %ecx +# CHECK-NEXT: [1,3] . .D=======================eeeE-R . imull %ecx, %ecx +# CHECK-NEXT: [1,4] . . D=========================eeeER imull %ecx, %ecx # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -82,8 +82,8 @@ imul %ecx, %ecx # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage # CHECK: [0] [1] [2] [3] -# CHECK-NEXT: 0. 2 6.5 0.5 0.0 xchgl %ecx, (%rsp) -# CHECK-NEXT: 1. 2 16.5 0.0 4.0 addl %ecx, %ecx -# CHECK-NEXT: 2. 2 16.5 0.0 4.0 addl %ecx, %ecx -# CHECK-NEXT: 3. 2 16.5 0.0 1.0 imull %ecx, %ecx -# CHECK-NEXT: 4. 2 18.5 0.0 0.0 imull %ecx, %ecx +# CHECK-NEXT: 0. 2 7.0 0.5 0.0 xchgl %ecx, (%rsp) +# CHECK-NEXT: 1. 2 17.0 0.0 4.0 addl %ecx, %ecx +# CHECK-NEXT: 2. 2 17.0 0.0 4.0 addl %ecx, %ecx +# CHECK-NEXT: 3. 2 18.0 0.0 1.0 imull %ecx, %ecx +# CHECK-NEXT: 4. 2 20.0 0.0 0.0 imull %ecx, %ecx diff --git a/llvm/test/tools/llvm-mca/X86/intel-syntax.s b/llvm/test/tools/llvm-mca/X86/intel-syntax.s index 786d06b..dc6a585 100644 --- a/llvm/test/tools/llvm-mca/X86/intel-syntax.s +++ b/llvm/test/tools/llvm-mca/X86/intel-syntax.s @@ -11,13 +11,13 @@ # ALL: Iterations: 100 # ALL-NEXT: Instructions: 400 -# ALL-NEXT: Total Cycles: 305 -# ALL-NEXT: Total uOps: 500 +# ALL-NEXT: Total Cycles: 306 +# ALL-NEXT: Total uOps: 400 # ALL: Dispatch Width: 2 -# ALL-NEXT: uOps Per Cycle: 1.64 +# ALL-NEXT: uOps Per Cycle: 1.31 # ALL-NEXT: IPC: 1.31 -# ALL-NEXT: Block RThroughput: 2.5 +# ALL-NEXT: Block RThroughput: 2.0 # ALL: Instruction Info: # ALL-NEXT: [1]: #uOps @@ -31,10 +31,10 @@ # ATT-NEXT: 1 1 0.50 movl $1, %eax # ATT-NEXT: 1 1 0.50 movl $255, %ebx -# ATT-NEXT: 2 3 1.00 imull %edi, %esi +# ATT-NEXT: 1 3 1.00 imull %edi, %esi # ATT-NEXT: 1 1 0.50 leal (%rsi,%rdi), %eax # INTEL-NEXT: 1 1 0.50 mov eax, 1 # INTEL-NEXT: 1 1 0.50 mov ebx, 255 -# INTEL-NEXT: 2 3 1.00 imul esi, edi +# INTEL-NEXT: 1 3 1.00 imul esi, edi # INTEL-NEXT: 1 1 0.50 lea eax, [rsi + rdi] diff --git a/llvm/test/tools/llvm-mca/X86/llvm-mca-markers-10.s b/llvm/test/tools/llvm-mca/X86/llvm-mca-markers-10.s index 8420f03b..ea5bfcc 100644 --- a/llvm/test/tools/llvm-mca/X86/llvm-mca-markers-10.s +++ b/llvm/test/tools/llvm-mca/X86/llvm-mca-markers-10.s @@ -15,13 +15,13 @@ testloop: # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 200 -# CHECK-NEXT: Total Cycles: 205 -# CHECK-NEXT: Total uOps: 300 +# CHECK-NEXT: Total Cycles: 106 +# CHECK-NEXT: Total uOps: 200 # CHECK: Dispatch Width: 2 -# CHECK-NEXT: uOps Per Cycle: 1.46 -# CHECK-NEXT: IPC: 0.98 -# CHECK-NEXT: Block RThroughput: 1.5 +# CHECK-NEXT: uOps Per Cycle: 1.89 +# CHECK-NEXT: IPC: 1.89 +# CHECK-NEXT: Block RThroughput: 1.0 # CHECK: Instruction Info: # CHECK-NEXT: [1]: #uOps @@ -33,7 +33,7 @@ testloop: # CHECK: [1] [2] [3] [4] [5] [6] Instructions: # CHECK-NEXT: 1 1 0.50 leal 42(%rdi), %eax -# CHECK-NEXT: 2 3 1.00 imull %esi, %eax +# CHECK-NEXT: 1 3 1.00 imull %esi, %eax # CHECK: Resources: # CHECK-NEXT: [0] - JALU0 @@ -64,13 +64,13 @@ testloop: # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 200 -# CHECK-NEXT: Total Cycles: 204 -# CHECK-NEXT: Total uOps: 300 +# CHECK-NEXT: Total Cycles: 105 +# CHECK-NEXT: Total uOps: 200 # CHECK: Dispatch Width: 2 -# CHECK-NEXT: uOps Per Cycle: 1.47 -# CHECK-NEXT: IPC: 0.98 -# CHECK-NEXT: Block RThroughput: 1.5 +# CHECK-NEXT: uOps Per Cycle: 1.90 +# CHECK-NEXT: IPC: 1.90 +# CHECK-NEXT: Block RThroughput: 1.0 # CHECK: Instruction Info: # CHECK-NEXT: [1]: #uOps @@ -81,7 +81,7 @@ testloop: # CHECK-NEXT: [6]: HasSideEffects (U) # CHECK: [1] [2] [3] [4] [5] [6] Instructions: -# CHECK-NEXT: 2 3 1.00 imull %esi, %eax +# CHECK-NEXT: 1 3 1.00 imull %esi, %eax # CHECK-NEXT: 1 1 0.50 leal 42(%rdi), %eax # CHECK: Resources: diff --git a/llvm/test/tools/llvm-mca/X86/llvm-mca-markers-9.s b/llvm/test/tools/llvm-mca/X86/llvm-mca-markers-9.s index 37b51e3..8806042 100644 --- a/llvm/test/tools/llvm-mca/X86/llvm-mca-markers-9.s +++ b/llvm/test/tools/llvm-mca/X86/llvm-mca-markers-9.s @@ -15,13 +15,13 @@ testloop: # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 300 -# CHECK-NEXT: Total Cycles: 205 -# CHECK-NEXT: Total uOps: 400 +# CHECK-NEXT: Total Cycles: 156 +# CHECK-NEXT: Total uOps: 300 # CHECK: Dispatch Width: 2 -# CHECK-NEXT: uOps Per Cycle: 1.95 -# CHECK-NEXT: IPC: 1.46 -# CHECK-NEXT: Block RThroughput: 2.0 +# CHECK-NEXT: uOps Per Cycle: 1.92 +# CHECK-NEXT: IPC: 1.92 +# CHECK-NEXT: Block RThroughput: 1.5 # CHECK: Instruction Info: # CHECK-NEXT: [1]: #uOps @@ -33,7 +33,7 @@ testloop: # CHECK: [1] [2] [3] [4] [5] [6] Instructions: # CHECK-NEXT: 1 1 0.50 leal 42(%rdi), %eax -# CHECK-NEXT: 2 3 1.00 imull %esi, %eax +# CHECK-NEXT: 1 3 1.00 imull %esi, %eax # CHECK-NEXT: 1 1 0.50 leal 42(%rdi), %eax # CHECK: Resources: @@ -54,23 +54,23 @@ testloop: # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] -# CHECK-NEXT: 1.00 2.00 - - - - - - 1.00 - - - - - +# CHECK-NEXT: 1.49 1.51 - - - - - - 1.00 - - - - - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] Instructions: -# CHECK-NEXT: - 1.00 - - - - - - - - - - - - leal 42(%rdi), %eax +# CHECK-NEXT: 0.99 0.01 - - - - - - - - - - - - leal 42(%rdi), %eax # CHECK-NEXT: - 1.00 - - - - - - 1.00 - - - - - imull %esi, %eax -# CHECK-NEXT: 1.00 - - - - - - - - - - - - - leal 42(%rdi), %eax +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - - leal 42(%rdi), %eax # CHECK: [1] Code Region - inner # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 100 # CHECK-NEXT: Total Cycles: 303 -# CHECK-NEXT: Total uOps: 200 +# CHECK-NEXT: Total uOps: 100 # CHECK: Dispatch Width: 2 -# CHECK-NEXT: uOps Per Cycle: 0.66 +# CHECK-NEXT: uOps Per Cycle: 0.33 # CHECK-NEXT: IPC: 0.33 # CHECK-NEXT: Block RThroughput: 1.0 @@ -83,7 +83,7 @@ testloop: # CHECK-NEXT: [6]: HasSideEffects (U) # CHECK: [1] [2] [3] [4] [5] [6] Instructions: -# CHECK-NEXT: 2 3 1.00 imull %esi, %eax +# CHECK-NEXT: 1 3 1.00 imull %esi, %eax # CHECK: Resources: # CHECK-NEXT: [0] - JALU0 -- 2.7.4