[NFC][MCA] ZnVer1: add partial-reg-update tests
authorRoman Lebedev <lebedev.ri@gmail.com>
Mon, 23 Jul 2018 10:10:04 +0000 (10:10 +0000)
committerRoman Lebedev <lebedev.ri@gmail.com>
Mon, 23 Jul 2018 10:10:04 +0000 (10:10 +0000)
Reviewers: andreadb, courbet, RKSimon, craig.topper, GGanesh

Reviewed By: GGanesh

Subscribers: gbedwell, llvm-commits

Differential Revision: https://reviews.llvm.org/D49392

llvm-svn: 337675

llvm/test/tools/llvm-mca/X86/Znver1/partial-reg-update-2.s [new file with mode: 0644]
llvm/test/tools/llvm-mca/X86/Znver1/partial-reg-update-3.s [new file with mode: 0644]
llvm/test/tools/llvm-mca/X86/Znver1/partial-reg-update-4.s [new file with mode: 0644]
llvm/test/tools/llvm-mca/X86/Znver1/partial-reg-update-5.s [new file with mode: 0644]
llvm/test/tools/llvm-mca/X86/Znver1/partial-reg-update-6.s [new file with mode: 0644]
llvm/test/tools/llvm-mca/X86/Znver1/partial-reg-update-7.s [new file with mode: 0644]
llvm/test/tools/llvm-mca/X86/Znver1/partial-reg-update.s [new file with mode: 0644]

diff --git a/llvm/test/tools/llvm-mca/X86/Znver1/partial-reg-update-2.s b/llvm/test/tools/llvm-mca/X86/Znver1/partial-reg-update-2.s
new file mode 100644 (file)
index 0000000..c9998e7
--- /dev/null
@@ -0,0 +1,44 @@
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=znver1 -iterations=1 -resource-pressure=false -timeline < %s | FileCheck %s
+
+imul   %rax, %rbx
+lzcnt  %ax,  %bx
+add    %ecx, %ebx
+
+# CHECK:      Iterations:        1
+# CHECK-NEXT: Instructions:      3
+# CHECK-NEXT: Total Cycles:      8
+# CHECK-NEXT: Dispatch Width:    4
+# CHECK-NEXT: IPC:               0.38
+# CHECK-NEXT: Block RThroughput: 1.0
+
+# CHECK:      Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK:      [1]    [2]    [3]    [4]    [5]    [6]    Instructions:
+# CHECK-NEXT:  2      4     1.00                        imulq  %rax, %rbx
+# CHECK-NEXT:  1      2     0.25                        lzcntw %ax, %bx
+# CHECK-NEXT:  1      1     0.25                        addl   %ecx, %ebx
+
+# CHECK:      Timeline view:
+# CHECK-NEXT: Index     01234567
+
+# CHECK:      [0,0]     DeeeeER.   imulq       %rax, %rbx
+# CHECK-NEXT: [0,1]     DeeE--R.   lzcntw      %ax, %bx
+# CHECK-NEXT: [0,2]     D====eER   addl        %ecx, %ebx
+
+# CHECK:      Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK:            [0]    [1]    [2]    [3]
+# CHECK-NEXT: 0.     1     1.0    1.0    0.0       imulq       %rax, %rbx
+# CHECK-NEXT: 1.     1     1.0    1.0    2.0       lzcntw      %ax, %bx
+# CHECK-NEXT: 2.     1     5.0    0.0    0.0       addl        %ecx, %ebx
diff --git a/llvm/test/tools/llvm-mca/X86/Znver1/partial-reg-update-3.s b/llvm/test/tools/llvm-mca/X86/Znver1/partial-reg-update-3.s
new file mode 100644 (file)
index 0000000..0e49de7
--- /dev/null
@@ -0,0 +1,86 @@
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=znver1 -iterations=1500 -timeline -timeline-max-iterations=6 < %s | FileCheck %s
+
+# The ILP is limited by the false dependency on %dx. So, the mov cannot execute
+# in parallel with the add.
+
+add %cx, %dx
+mov %ax, %dx
+xor %bx, %dx
+
+# CHECK:      Iterations:        1500
+# CHECK-NEXT: Instructions:      4500
+# CHECK-NEXT: Total Cycles:      1129
+# CHECK-NEXT: Dispatch Width:    4
+# CHECK-NEXT: IPC:               3.99
+# CHECK-NEXT: Block RThroughput: 0.8
+
+# CHECK:      Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK:      [1]    [2]    [3]    [4]    [5]    [6]    Instructions:
+# CHECK-NEXT:  1      1     0.25                        addw   %cx, %dx
+# CHECK-NEXT:  1      1     0.25                        movw   %ax, %dx
+# CHECK-NEXT:  1      1     0.25                        xorw   %bx, %dx
+
+# CHECK:      Resources:
+# CHECK-NEXT: [0]   - ZnAGU0
+# CHECK-NEXT: [1]   - ZnAGU1
+# CHECK-NEXT: [2]   - ZnALU0
+# CHECK-NEXT: [3]   - ZnALU1
+# CHECK-NEXT: [4]   - ZnALU2
+# CHECK-NEXT: [5]   - ZnALU3
+# CHECK-NEXT: [6]   - ZnDivider
+# CHECK-NEXT: [7]   - ZnFPU0
+# CHECK-NEXT: [8]   - ZnFPU1
+# CHECK-NEXT: [9]   - ZnFPU2
+# CHECK-NEXT: [10]  - ZnFPU3
+# CHECK-NEXT: [11]  - ZnMultiplier
+
+# CHECK:      Resource pressure per iteration:
+# CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    [10]   [11]
+# CHECK-NEXT:  -      -     0.75   0.75   0.75   0.75    -      -      -      -      -      -
+
+# CHECK:      Resource pressure by instruction:
+# CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    [10]   [11]   Instructions:
+# CHECK-NEXT:  -      -      -     0.25   0.75    -      -      -      -      -      -      -     addw %cx, %dx
+# CHECK-NEXT:  -      -     0.25    -      -     0.75    -      -      -      -      -      -     movw %ax, %dx
+# CHECK-NEXT:  -      -     0.50   0.50    -      -      -      -      -      -      -      -     xorw %bx, %dx
+
+# CHECK:      Timeline view:
+# CHECK-NEXT: Index     012345678
+
+# CHECK:      [0,0]     DeER .  .   addw       %cx, %dx
+# CHECK-NEXT: [0,1]     DeER .  .   movw       %ax, %dx
+# CHECK-NEXT: [0,2]     D=eER.  .   xorw       %bx, %dx
+# CHECK-NEXT: [1,0]     D==eER  .   addw       %cx, %dx
+# CHECK-NEXT: [1,1]     .DeE-R  .   movw       %ax, %dx
+# CHECK-NEXT: [1,2]     .D=eER  .   xorw       %bx, %dx
+# CHECK-NEXT: [2,0]     .D==eER .   addw       %cx, %dx
+# CHECK-NEXT: [2,1]     .DeE--R .   movw       %ax, %dx
+# CHECK-NEXT: [2,2]     . DeE-R .   xorw       %bx, %dx
+# CHECK-NEXT: [3,0]     . D=eER .   addw       %cx, %dx
+# CHECK-NEXT: [3,1]     . DeE-R .   movw       %ax, %dx
+# CHECK-NEXT: [3,2]     . D=eER .   xorw       %bx, %dx
+# CHECK-NEXT: [4,0]     .  D=eER.   addw       %cx, %dx
+# CHECK-NEXT: [4,1]     .  DeE-R.   movw       %ax, %dx
+# CHECK-NEXT: [4,2]     .  D=eER.   xorw       %bx, %dx
+# CHECK-NEXT: [5,0]     .  D==eER   addw       %cx, %dx
+# CHECK-NEXT: [5,1]     .   DeE-R   movw       %ax, %dx
+# CHECK-NEXT: [5,2]     .   D=eER   xorw       %bx, %dx
+
+# CHECK:      Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK:            [0]    [1]    [2]    [3]
+# CHECK-NEXT: 0.     6     2.3    0.2    0.0       addw        %cx, %dx
+# CHECK-NEXT: 1.     6     1.0    1.0    1.0       movw        %ax, %dx
+# CHECK-NEXT: 2.     6     1.8    0.0    0.2       xorw        %bx, %dx
diff --git a/llvm/test/tools/llvm-mca/X86/Znver1/partial-reg-update-4.s b/llvm/test/tools/llvm-mca/X86/Znver1/partial-reg-update-4.s
new file mode 100644 (file)
index 0000000..94e066b
--- /dev/null
@@ -0,0 +1,90 @@
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=znver1 -iterations=1500 -timeline -timeline-max-iterations=7 < %s | FileCheck %s
+
+# The lzcnt cannot execute in parallel with the imul because there is a false
+# dependency on %bx.
+
+imul %ax, %bx
+lzcnt %ax, %bx
+add %cx, %bx
+
+# CHECK:      Iterations:        1500
+# CHECK-NEXT: Instructions:      4500
+# CHECK-NEXT: Total Cycles:      1507
+# CHECK-NEXT: Dispatch Width:    4
+# CHECK-NEXT: IPC:               2.99
+# CHECK-NEXT: Block RThroughput: 1.0
+
+# CHECK:      Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK:      [1]    [2]    [3]    [4]    [5]    [6]    Instructions:
+# CHECK-NEXT:  1      3     1.00                        imulw  %ax, %bx
+# CHECK-NEXT:  1      2     0.25                        lzcntw %ax, %bx
+# CHECK-NEXT:  1      1     0.25                        addw   %cx, %bx
+
+# CHECK:      Resources:
+# CHECK-NEXT: [0]   - ZnAGU0
+# CHECK-NEXT: [1]   - ZnAGU1
+# CHECK-NEXT: [2]   - ZnALU0
+# CHECK-NEXT: [3]   - ZnALU1
+# CHECK-NEXT: [4]   - ZnALU2
+# CHECK-NEXT: [5]   - ZnALU3
+# CHECK-NEXT: [6]   - ZnDivider
+# CHECK-NEXT: [7]   - ZnFPU0
+# CHECK-NEXT: [8]   - ZnFPU1
+# CHECK-NEXT: [9]   - ZnFPU2
+# CHECK-NEXT: [10]  - ZnFPU3
+# CHECK-NEXT: [11]  - ZnMultiplier
+
+# CHECK:      Resource pressure per iteration:
+# CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    [10]   [11]
+# CHECK-NEXT:  -      -     0.67   1.00   0.67   0.67    -      -      -      -      -     1.00
+
+# CHECK:      Resource pressure by instruction:
+# CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    [10]   [11]   Instructions:
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -      -     1.00   imulw        %ax, %bx
+# CHECK-NEXT:  -      -     0.33    -     0.33   0.33    -      -      -      -      -      -     lzcntw       %ax, %bx
+# CHECK-NEXT:  -      -     0.33    -     0.33   0.33    -      -      -      -      -      -     addw %cx, %bx
+
+# CHECK:      Timeline view:
+# CHECK-NEXT:                     0123
+# CHECK-NEXT: Index     0123456789
+
+# CHECK:      [0,0]     DeeeER    .  .   imulw %ax, %bx
+# CHECK-NEXT: [0,1]     DeeE-R    .  .   lzcntw        %ax, %bx
+# CHECK-NEXT: [0,2]     D==eER    .  .   addw  %cx, %bx
+# CHECK-NEXT: [1,0]     D===eeeER .  .   imulw %ax, %bx
+# CHECK-NEXT: [1,1]     .DeeE---R .  .   lzcntw        %ax, %bx
+# CHECK-NEXT: [1,2]     .D==eE--R .  .   addw  %cx, %bx
+# CHECK-NEXT: [2,0]     .D===eeeER.  .   imulw %ax, %bx
+# CHECK-NEXT: [2,1]     .DeeE----R.  .   lzcntw        %ax, %bx
+# CHECK-NEXT: [2,2]     . D=eE---R.  .   addw  %cx, %bx
+# CHECK-NEXT: [3,0]     . D===eeeER  .   imulw %ax, %bx
+# CHECK-NEXT: [3,1]     . DeeE----R  .   lzcntw        %ax, %bx
+# CHECK-NEXT: [3,2]     . D==eE---R  .   addw  %cx, %bx
+# CHECK-NEXT: [4,0]     .  D===eeeER .   imulw %ax, %bx
+# CHECK-NEXT: [4,1]     .  DeeE----R .   lzcntw        %ax, %bx
+# CHECK-NEXT: [4,2]     .  D==eE---R .   addw  %cx, %bx
+# CHECK-NEXT: [5,0]     .  D====eeeER.   imulw %ax, %bx
+# CHECK-NEXT: [5,1]     .   DeeE----R.   lzcntw        %ax, %bx
+# CHECK-NEXT: [5,2]     .   D==eE---R.   addw  %cx, %bx
+# CHECK-NEXT: [6,0]     .   D====eeeER   imulw %ax, %bx
+# CHECK-NEXT: [6,1]     .   DeeE-----R   lzcntw        %ax, %bx
+# CHECK-NEXT: [6,2]     .    D=eE----R   addw  %cx, %bx
+
+# CHECK:      Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK:            [0]    [1]    [2]    [3]
+# CHECK-NEXT: 0.     7     3.9    0.7    0.0       imulw       %ax, %bx
+# CHECK-NEXT: 1.     7     1.0    1.0    3.6       lzcntw      %ax, %bx
+# CHECK-NEXT: 2.     7     2.7    0.0    2.6       addw        %cx, %bx
diff --git a/llvm/test/tools/llvm-mca/X86/Znver1/partial-reg-update-5.s b/llvm/test/tools/llvm-mca/X86/Znver1/partial-reg-update-5.s
new file mode 100644 (file)
index 0000000..58c0819
--- /dev/null
@@ -0,0 +1,65 @@
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=znver1 -iterations=1500 -timeline -timeline-max-iterations=8 < %s | FileCheck %s
+
+lzcnt %ax, %bx  ## partial register stall.
+
+# CHECK:      Iterations:        1500
+# CHECK-NEXT: Instructions:      1500
+# CHECK-NEXT: Total Cycles:      379
+# CHECK-NEXT: Dispatch Width:    4
+# CHECK-NEXT: IPC:               3.96
+# CHECK-NEXT: Block RThroughput: 0.3
+
+# CHECK:      Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK:      [1]    [2]    [3]    [4]    [5]    [6]    Instructions:
+# CHECK-NEXT:  1      2     0.25                        lzcntw %ax, %bx
+
+# CHECK:      Resources:
+# CHECK-NEXT: [0]   - ZnAGU0
+# CHECK-NEXT: [1]   - ZnAGU1
+# CHECK-NEXT: [2]   - ZnALU0
+# CHECK-NEXT: [3]   - ZnALU1
+# CHECK-NEXT: [4]   - ZnALU2
+# CHECK-NEXT: [5]   - ZnALU3
+# CHECK-NEXT: [6]   - ZnDivider
+# CHECK-NEXT: [7]   - ZnFPU0
+# CHECK-NEXT: [8]   - ZnFPU1
+# CHECK-NEXT: [9]   - ZnFPU2
+# CHECK-NEXT: [10]  - ZnFPU3
+# CHECK-NEXT: [11]  - ZnMultiplier
+
+# CHECK:      Resource pressure per iteration:
+# CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    [10]   [11]
+# CHECK-NEXT:  -      -     0.25   0.25   0.25   0.25    -      -      -      -      -      -
+
+# CHECK:      Resource pressure by instruction:
+# CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    [10]   [11]   Instructions:
+# CHECK-NEXT:  -      -     0.25   0.25   0.25   0.25    -      -      -      -      -      -     lzcntw       %ax, %bx
+
+# CHECK:      Timeline view:
+# CHECK-NEXT: Index     012345
+
+# CHECK:      [0,0]     DeeER.   lzcntw        %ax, %bx
+# CHECK-NEXT: [1,0]     DeeER.   lzcntw        %ax, %bx
+# CHECK-NEXT: [2,0]     DeeER.   lzcntw        %ax, %bx
+# CHECK-NEXT: [3,0]     DeeER.   lzcntw        %ax, %bx
+# CHECK-NEXT: [4,0]     .DeeER   lzcntw        %ax, %bx
+# CHECK-NEXT: [5,0]     .DeeER   lzcntw        %ax, %bx
+# CHECK-NEXT: [6,0]     .DeeER   lzcntw        %ax, %bx
+# CHECK-NEXT: [7,0]     .DeeER   lzcntw        %ax, %bx
+
+# CHECK:      Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK:            [0]    [1]    [2]    [3]
+# CHECK-NEXT: 0.     8     1.0    1.0    0.0       lzcntw      %ax, %bx
diff --git a/llvm/test/tools/llvm-mca/X86/Znver1/partial-reg-update-6.s b/llvm/test/tools/llvm-mca/X86/Znver1/partial-reg-update-6.s
new file mode 100644 (file)
index 0000000..dd620b2
--- /dev/null
@@ -0,0 +1,83 @@
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=znver1 -iterations=1500 -timeline -timeline-max-iterations=4 < %s | FileCheck %s
+
+# Each lzcnt has a false dependency on %ecx; the first lzcnt has to wait on the
+# imul. However, the folded load can start immediately.
+# The last lzcnt has a false dependency on %cx. However, even in this case, the
+# folded load can start immediately.
+
+imul %edx, %ecx
+lzcnt (%rsp), %cx
+lzcnt 2(%rsp), %cx
+
+# CHECK:      Iterations:        1500
+# CHECK-NEXT: Instructions:      4500
+# CHECK-NEXT: Total Cycles:      4507
+# CHECK-NEXT: Dispatch Width:    4
+# CHECK-NEXT: IPC:               1.00
+# CHECK-NEXT: Block RThroughput: 1.3
+
+# CHECK:      Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK:      [1]    [2]    [3]    [4]    [5]    [6]    Instructions:
+# CHECK-NEXT:  1      3     1.00                        imull  %edx, %ecx
+# CHECK-NEXT:  2      6     0.50    *                   lzcntw (%rsp), %cx
+# CHECK-NEXT:  2      6     0.50    *                   lzcntw 2(%rsp), %cx
+
+# CHECK:      Resources:
+# CHECK-NEXT: [0]   - ZnAGU0
+# CHECK-NEXT: [1]   - ZnAGU1
+# CHECK-NEXT: [2]   - ZnALU0
+# CHECK-NEXT: [3]   - ZnALU1
+# CHECK-NEXT: [4]   - ZnALU2
+# CHECK-NEXT: [5]   - ZnALU3
+# CHECK-NEXT: [6]   - ZnDivider
+# CHECK-NEXT: [7]   - ZnFPU0
+# CHECK-NEXT: [8]   - ZnFPU1
+# CHECK-NEXT: [9]   - ZnFPU2
+# CHECK-NEXT: [10]  - ZnFPU3
+# CHECK-NEXT: [11]  - ZnMultiplier
+
+# CHECK:      Resource pressure per iteration:
+# CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    [10]   [11]
+# CHECK-NEXT: 1.00   1.00   0.66   1.00   0.67   0.67    -      -      -      -      -     1.00
+
+# CHECK:      Resource pressure by instruction:
+# CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    [10]   [11]   Instructions:
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -      -     1.00   imull        %edx, %ecx
+# CHECK-NEXT:  -     1.00   0.33    -     0.33   0.33    -      -      -      -      -      -     lzcntw       (%rsp), %cx
+# CHECK-NEXT: 1.00    -     0.33    -     0.33   0.33    -      -      -      -      -      -     lzcntw       2(%rsp), %cx
+
+# CHECK:      Timeline view:
+# CHECK-NEXT:                     012345678
+# CHECK-NEXT: Index     0123456789
+
+# CHECK:      [0,0]     DeeeER    .    .  .   imull    %edx, %ecx
+# CHECK-NEXT: [0,1]     DeeeeeeER .    .  .   lzcntw   (%rsp), %cx
+# CHECK-NEXT: [0,2]     .DeeeeeeER.    .  .   lzcntw   2(%rsp), %cx
+# CHECK-NEXT: [1,0]     .D======eeeER  .  .   imull    %edx, %ecx
+# CHECK-NEXT: [1,1]     . DeeeeeeE--R  .  .   lzcntw   (%rsp), %cx
+# CHECK-NEXT: [1,2]     . DeeeeeeE--R  .  .   lzcntw   2(%rsp), %cx
+# CHECK-NEXT: [2,0]     .  D=======eeeER  .   imull    %edx, %ecx
+# CHECK-NEXT: [2,1]     .  DeeeeeeE----R  .   lzcntw   (%rsp), %cx
+# CHECK-NEXT: [2,2]     .   DeeeeeeE---R  .   lzcntw   2(%rsp), %cx
+# CHECK-NEXT: [3,0]     .   D=========eeeER   imull    %edx, %ecx
+# CHECK-NEXT: [3,1]     .    DeeeeeeE-----R   lzcntw   (%rsp), %cx
+# CHECK-NEXT: [3,2]     .    DeeeeeeE-----R   lzcntw   2(%rsp), %cx
+
+# CHECK:      Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK:            [0]    [1]    [2]    [3]
+# CHECK-NEXT: 0.     4     6.5    0.3    0.0       imull       %edx, %ecx
+# CHECK-NEXT: 1.     4     1.0    1.0    2.8       lzcntw      (%rsp), %cx
+# CHECK-NEXT: 2.     4     1.0    1.0    2.5       lzcntw      2(%rsp), %cx
diff --git a/llvm/test/tools/llvm-mca/X86/Znver1/partial-reg-update-7.s b/llvm/test/tools/llvm-mca/X86/Znver1/partial-reg-update-7.s
new file mode 100644 (file)
index 0000000..5be7b42
--- /dev/null
@@ -0,0 +1,48 @@
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=znver1 -iterations=1 -resource-pressure=false -timeline < %s | FileCheck %s
+
+# An instruction that writes to a 32-bit register will not have any false
+# dependence on the corresponding 64-bit register because the upper part of
+# the 64-bit register is set to zero
+
+imulq %rax, %rcx
+addl  %edx, %ecx
+addq  %rcx, %rdx
+
+# CHECK:      Iterations:        1
+# CHECK-NEXT: Instructions:      3
+# CHECK-NEXT: Total Cycles:      9
+# CHECK-NEXT: Dispatch Width:    4
+# CHECK-NEXT: IPC:               0.33
+# CHECK-NEXT: Block RThroughput: 1.0
+
+# CHECK:      Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK:      [1]    [2]    [3]    [4]    [5]    [6]    Instructions:
+# CHECK-NEXT:  2      4     1.00                        imulq  %rax, %rcx
+# CHECK-NEXT:  1      1     0.25                        addl   %edx, %ecx
+# CHECK-NEXT:  1      1     0.25                        addq   %rcx, %rdx
+
+# CHECK:      Timeline view:
+# CHECK-NEXT: Index     012345678
+
+# CHECK:      [0,0]     DeeeeER .   imulq      %rax, %rcx
+# CHECK-NEXT: [0,1]     D====eER.   addl       %edx, %ecx
+# CHECK-NEXT: [0,2]     D=====eER   addq       %rcx, %rdx
+
+# CHECK:      Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK:            [0]    [1]    [2]    [3]
+# CHECK-NEXT: 0.     1     1.0    1.0    0.0       imulq       %rax, %rcx
+# CHECK-NEXT: 1.     1     5.0    0.0    0.0       addl        %edx, %ecx
+# CHECK-NEXT: 2.     1     6.0    0.0    0.0       addq        %rcx, %rdx
diff --git a/llvm/test/tools/llvm-mca/X86/Znver1/partial-reg-update.s b/llvm/test/tools/llvm-mca/X86/Znver1/partial-reg-update.s
new file mode 100644 (file)
index 0000000..9a7b43f
--- /dev/null
@@ -0,0 +1,44 @@
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=znver1 -iterations=1 -resource-pressure=false -timeline < %s | FileCheck %s
+
+imul %ax, %cx
+add  %al, %cl
+add  %ecx, %ebx
+
+# CHECK:      Iterations:        1
+# CHECK-NEXT: Instructions:      3
+# CHECK-NEXT: Total Cycles:      8
+# CHECK-NEXT: Dispatch Width:    4
+# CHECK-NEXT: IPC:               0.38
+# CHECK-NEXT: Block RThroughput: 1.0
+
+# CHECK:      Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK:      [1]    [2]    [3]    [4]    [5]    [6]    Instructions:
+# CHECK-NEXT:  1      3     1.00                        imulw  %ax, %cx
+# CHECK-NEXT:  1      1     0.25                        addb   %al, %cl
+# CHECK-NEXT:  1      1     0.25                        addl   %ecx, %ebx
+
+# CHECK:      Timeline view:
+# CHECK-NEXT: Index     01234567
+
+# CHECK:      [0,0]     DeeeER .   imulw       %ax, %cx
+# CHECK-NEXT: [0,1]     D===eER.   addb        %al, %cl
+# CHECK-NEXT: [0,2]     D====eER   addl        %ecx, %ebx
+
+# CHECK:      Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK:            [0]    [1]    [2]    [3]
+# CHECK-NEXT: 0.     1     1.0    1.0    0.0       imulw       %ax, %cx
+# CHECK-NEXT: 1.     1     4.0    0.0    0.0       addb        %al, %cl
+# CHECK-NEXT: 2.     1     5.0    0.0    0.0       addl        %ecx, %ebx