--- /dev/null
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=znver1 -iterations=1 -resource-pressure=false -timeline < %s | FileCheck %s
+
+imul %rax, %rbx
+lzcnt %ax, %bx
+add %ecx, %ebx
+
+# CHECK: Iterations: 1
+# CHECK-NEXT: Instructions: 3
+# CHECK-NEXT: Total Cycles: 8
+# CHECK-NEXT: Dispatch Width: 4
+# CHECK-NEXT: IPC: 0.38
+# CHECK-NEXT: Block RThroughput: 1.0
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 2 4 1.00 imulq %rax, %rbx
+# CHECK-NEXT: 1 2 0.25 lzcntw %ax, %bx
+# CHECK-NEXT: 1 1 0.25 addl %ecx, %ebx
+
+# CHECK: Timeline view:
+# CHECK-NEXT: Index 01234567
+
+# CHECK: [0,0] DeeeeER. imulq %rax, %rbx
+# CHECK-NEXT: [0,1] DeeE--R. lzcntw %ax, %bx
+# CHECK-NEXT: [0,2] D====eER addl %ecx, %ebx
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 imulq %rax, %rbx
+# CHECK-NEXT: 1. 1 1.0 1.0 2.0 lzcntw %ax, %bx
+# CHECK-NEXT: 2. 1 5.0 0.0 0.0 addl %ecx, %ebx
--- /dev/null
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=znver1 -iterations=1500 -timeline -timeline-max-iterations=6 < %s | FileCheck %s
+
+# The ILP is limited by the false dependency on %dx. So, the mov cannot execute
+# in parallel with the add.
+
+add %cx, %dx
+mov %ax, %dx
+xor %bx, %dx
+
+# CHECK: Iterations: 1500
+# CHECK-NEXT: Instructions: 4500
+# CHECK-NEXT: Total Cycles: 1129
+# CHECK-NEXT: Dispatch Width: 4
+# CHECK-NEXT: IPC: 3.99
+# CHECK-NEXT: Block RThroughput: 0.8
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 1 1 0.25 addw %cx, %dx
+# CHECK-NEXT: 1 1 0.25 movw %ax, %dx
+# CHECK-NEXT: 1 1 0.25 xorw %bx, %dx
+
+# CHECK: Resources:
+# CHECK-NEXT: [0] - ZnAGU0
+# CHECK-NEXT: [1] - ZnAGU1
+# CHECK-NEXT: [2] - ZnALU0
+# CHECK-NEXT: [3] - ZnALU1
+# CHECK-NEXT: [4] - ZnALU2
+# CHECK-NEXT: [5] - ZnALU3
+# CHECK-NEXT: [6] - ZnDivider
+# CHECK-NEXT: [7] - ZnFPU0
+# CHECK-NEXT: [8] - ZnFPU1
+# CHECK-NEXT: [9] - ZnFPU2
+# CHECK-NEXT: [10] - ZnFPU3
+# CHECK-NEXT: [11] - ZnMultiplier
+
+# CHECK: Resource pressure per iteration:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11]
+# CHECK-NEXT: - - 0.75 0.75 0.75 0.75 - - - - - -
+
+# CHECK: Resource pressure by instruction:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] Instructions:
+# CHECK-NEXT: - - - 0.25 0.75 - - - - - - - addw %cx, %dx
+# CHECK-NEXT: - - 0.25 - - 0.75 - - - - - - movw %ax, %dx
+# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - xorw %bx, %dx
+
+# CHECK: Timeline view:
+# CHECK-NEXT: Index 012345678
+
+# CHECK: [0,0] DeER . . addw %cx, %dx
+# CHECK-NEXT: [0,1] DeER . . movw %ax, %dx
+# CHECK-NEXT: [0,2] D=eER. . xorw %bx, %dx
+# CHECK-NEXT: [1,0] D==eER . addw %cx, %dx
+# CHECK-NEXT: [1,1] .DeE-R . movw %ax, %dx
+# CHECK-NEXT: [1,2] .D=eER . xorw %bx, %dx
+# CHECK-NEXT: [2,0] .D==eER . addw %cx, %dx
+# CHECK-NEXT: [2,1] .DeE--R . movw %ax, %dx
+# CHECK-NEXT: [2,2] . DeE-R . xorw %bx, %dx
+# CHECK-NEXT: [3,0] . D=eER . addw %cx, %dx
+# CHECK-NEXT: [3,1] . DeE-R . movw %ax, %dx
+# CHECK-NEXT: [3,2] . D=eER . xorw %bx, %dx
+# CHECK-NEXT: [4,0] . D=eER. addw %cx, %dx
+# CHECK-NEXT: [4,1] . DeE-R. movw %ax, %dx
+# CHECK-NEXT: [4,2] . D=eER. xorw %bx, %dx
+# CHECK-NEXT: [5,0] . D==eER addw %cx, %dx
+# CHECK-NEXT: [5,1] . DeE-R movw %ax, %dx
+# CHECK-NEXT: [5,2] . D=eER xorw %bx, %dx
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 6 2.3 0.2 0.0 addw %cx, %dx
+# CHECK-NEXT: 1. 6 1.0 1.0 1.0 movw %ax, %dx
+# CHECK-NEXT: 2. 6 1.8 0.0 0.2 xorw %bx, %dx
--- /dev/null
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=znver1 -iterations=1500 -timeline -timeline-max-iterations=7 < %s | FileCheck %s
+
+# The lzcnt cannot execute in parallel with the imul because there is a false
+# dependency on %bx.
+
+imul %ax, %bx
+lzcnt %ax, %bx
+add %cx, %bx
+
+# CHECK: Iterations: 1500
+# CHECK-NEXT: Instructions: 4500
+# CHECK-NEXT: Total Cycles: 1507
+# CHECK-NEXT: Dispatch Width: 4
+# CHECK-NEXT: IPC: 2.99
+# CHECK-NEXT: Block RThroughput: 1.0
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 1 3 1.00 imulw %ax, %bx
+# CHECK-NEXT: 1 2 0.25 lzcntw %ax, %bx
+# CHECK-NEXT: 1 1 0.25 addw %cx, %bx
+
+# CHECK: Resources:
+# CHECK-NEXT: [0] - ZnAGU0
+# CHECK-NEXT: [1] - ZnAGU1
+# CHECK-NEXT: [2] - ZnALU0
+# CHECK-NEXT: [3] - ZnALU1
+# CHECK-NEXT: [4] - ZnALU2
+# CHECK-NEXT: [5] - ZnALU3
+# CHECK-NEXT: [6] - ZnDivider
+# CHECK-NEXT: [7] - ZnFPU0
+# CHECK-NEXT: [8] - ZnFPU1
+# CHECK-NEXT: [9] - ZnFPU2
+# CHECK-NEXT: [10] - ZnFPU3
+# CHECK-NEXT: [11] - ZnMultiplier
+
+# CHECK: Resource pressure per iteration:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11]
+# CHECK-NEXT: - - 0.67 1.00 0.67 0.67 - - - - - 1.00
+
+# CHECK: Resource pressure by instruction:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] Instructions:
+# CHECK-NEXT: - - - 1.00 - - - - - - - 1.00 imulw %ax, %bx
+# CHECK-NEXT: - - 0.33 - 0.33 0.33 - - - - - - lzcntw %ax, %bx
+# CHECK-NEXT: - - 0.33 - 0.33 0.33 - - - - - - addw %cx, %bx
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeeER . . imulw %ax, %bx
+# CHECK-NEXT: [0,1] DeeE-R . . lzcntw %ax, %bx
+# CHECK-NEXT: [0,2] D==eER . . addw %cx, %bx
+# CHECK-NEXT: [1,0] D===eeeER . . imulw %ax, %bx
+# CHECK-NEXT: [1,1] .DeeE---R . . lzcntw %ax, %bx
+# CHECK-NEXT: [1,2] .D==eE--R . . addw %cx, %bx
+# CHECK-NEXT: [2,0] .D===eeeER. . imulw %ax, %bx
+# CHECK-NEXT: [2,1] .DeeE----R. . lzcntw %ax, %bx
+# CHECK-NEXT: [2,2] . D=eE---R. . addw %cx, %bx
+# CHECK-NEXT: [3,0] . D===eeeER . imulw %ax, %bx
+# CHECK-NEXT: [3,1] . DeeE----R . lzcntw %ax, %bx
+# CHECK-NEXT: [3,2] . D==eE---R . addw %cx, %bx
+# CHECK-NEXT: [4,0] . D===eeeER . imulw %ax, %bx
+# CHECK-NEXT: [4,1] . DeeE----R . lzcntw %ax, %bx
+# CHECK-NEXT: [4,2] . D==eE---R . addw %cx, %bx
+# CHECK-NEXT: [5,0] . D====eeeER. imulw %ax, %bx
+# CHECK-NEXT: [5,1] . DeeE----R. lzcntw %ax, %bx
+# CHECK-NEXT: [5,2] . D==eE---R. addw %cx, %bx
+# CHECK-NEXT: [6,0] . D====eeeER imulw %ax, %bx
+# CHECK-NEXT: [6,1] . DeeE-----R lzcntw %ax, %bx
+# CHECK-NEXT: [6,2] . D=eE----R addw %cx, %bx
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 7 3.9 0.7 0.0 imulw %ax, %bx
+# CHECK-NEXT: 1. 7 1.0 1.0 3.6 lzcntw %ax, %bx
+# CHECK-NEXT: 2. 7 2.7 0.0 2.6 addw %cx, %bx
--- /dev/null
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=znver1 -iterations=1500 -timeline -timeline-max-iterations=8 < %s | FileCheck %s
+
+lzcnt %ax, %bx ## partial register stall.
+
+# CHECK: Iterations: 1500
+# CHECK-NEXT: Instructions: 1500
+# CHECK-NEXT: Total Cycles: 379
+# CHECK-NEXT: Dispatch Width: 4
+# CHECK-NEXT: IPC: 3.96
+# CHECK-NEXT: Block RThroughput: 0.3
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 1 2 0.25 lzcntw %ax, %bx
+
+# CHECK: Resources:
+# CHECK-NEXT: [0] - ZnAGU0
+# CHECK-NEXT: [1] - ZnAGU1
+# CHECK-NEXT: [2] - ZnALU0
+# CHECK-NEXT: [3] - ZnALU1
+# CHECK-NEXT: [4] - ZnALU2
+# CHECK-NEXT: [5] - ZnALU3
+# CHECK-NEXT: [6] - ZnDivider
+# CHECK-NEXT: [7] - ZnFPU0
+# CHECK-NEXT: [8] - ZnFPU1
+# CHECK-NEXT: [9] - ZnFPU2
+# CHECK-NEXT: [10] - ZnFPU3
+# CHECK-NEXT: [11] - ZnMultiplier
+
+# CHECK: Resource pressure per iteration:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11]
+# CHECK-NEXT: - - 0.25 0.25 0.25 0.25 - - - - - -
+
+# CHECK: Resource pressure by instruction:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] Instructions:
+# CHECK-NEXT: - - 0.25 0.25 0.25 0.25 - - - - - - lzcntw %ax, %bx
+
+# CHECK: Timeline view:
+# CHECK-NEXT: Index 012345
+
+# CHECK: [0,0] DeeER. lzcntw %ax, %bx
+# CHECK-NEXT: [1,0] DeeER. lzcntw %ax, %bx
+# CHECK-NEXT: [2,0] DeeER. lzcntw %ax, %bx
+# CHECK-NEXT: [3,0] DeeER. lzcntw %ax, %bx
+# CHECK-NEXT: [4,0] .DeeER lzcntw %ax, %bx
+# CHECK-NEXT: [5,0] .DeeER lzcntw %ax, %bx
+# CHECK-NEXT: [6,0] .DeeER lzcntw %ax, %bx
+# CHECK-NEXT: [7,0] .DeeER lzcntw %ax, %bx
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 8 1.0 1.0 0.0 lzcntw %ax, %bx
--- /dev/null
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=znver1 -iterations=1500 -timeline -timeline-max-iterations=4 < %s | FileCheck %s
+
+# Each lzcnt has a false dependency on %ecx; the first lzcnt has to wait on the
+# imul. However, the folded load can start immediately.
+# The last lzcnt has a false dependency on %cx. However, even in this case, the
+# folded load can start immediately.
+
+imul %edx, %ecx
+lzcnt (%rsp), %cx
+lzcnt 2(%rsp), %cx
+
+# CHECK: Iterations: 1500
+# CHECK-NEXT: Instructions: 4500
+# CHECK-NEXT: Total Cycles: 4507
+# CHECK-NEXT: Dispatch Width: 4
+# CHECK-NEXT: IPC: 1.00
+# CHECK-NEXT: Block RThroughput: 1.3
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 1 3 1.00 imull %edx, %ecx
+# CHECK-NEXT: 2 6 0.50 * lzcntw (%rsp), %cx
+# CHECK-NEXT: 2 6 0.50 * lzcntw 2(%rsp), %cx
+
+# CHECK: Resources:
+# CHECK-NEXT: [0] - ZnAGU0
+# CHECK-NEXT: [1] - ZnAGU1
+# CHECK-NEXT: [2] - ZnALU0
+# CHECK-NEXT: [3] - ZnALU1
+# CHECK-NEXT: [4] - ZnALU2
+# CHECK-NEXT: [5] - ZnALU3
+# CHECK-NEXT: [6] - ZnDivider
+# CHECK-NEXT: [7] - ZnFPU0
+# CHECK-NEXT: [8] - ZnFPU1
+# CHECK-NEXT: [9] - ZnFPU2
+# CHECK-NEXT: [10] - ZnFPU3
+# CHECK-NEXT: [11] - ZnMultiplier
+
+# CHECK: Resource pressure per iteration:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11]
+# CHECK-NEXT: 1.00 1.00 0.66 1.00 0.67 0.67 - - - - - 1.00
+
+# CHECK: Resource pressure by instruction:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] Instructions:
+# CHECK-NEXT: - - - 1.00 - - - - - - - 1.00 imull %edx, %ecx
+# CHECK-NEXT: - 1.00 0.33 - 0.33 0.33 - - - - - - lzcntw (%rsp), %cx
+# CHECK-NEXT: 1.00 - 0.33 - 0.33 0.33 - - - - - - lzcntw 2(%rsp), %cx
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 012345678
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeeER . . . imull %edx, %ecx
+# CHECK-NEXT: [0,1] DeeeeeeER . . . lzcntw (%rsp), %cx
+# CHECK-NEXT: [0,2] .DeeeeeeER. . . lzcntw 2(%rsp), %cx
+# CHECK-NEXT: [1,0] .D======eeeER . . imull %edx, %ecx
+# CHECK-NEXT: [1,1] . DeeeeeeE--R . . lzcntw (%rsp), %cx
+# CHECK-NEXT: [1,2] . DeeeeeeE--R . . lzcntw 2(%rsp), %cx
+# CHECK-NEXT: [2,0] . D=======eeeER . imull %edx, %ecx
+# CHECK-NEXT: [2,1] . DeeeeeeE----R . lzcntw (%rsp), %cx
+# CHECK-NEXT: [2,2] . DeeeeeeE---R . lzcntw 2(%rsp), %cx
+# CHECK-NEXT: [3,0] . D=========eeeER imull %edx, %ecx
+# CHECK-NEXT: [3,1] . DeeeeeeE-----R lzcntw (%rsp), %cx
+# CHECK-NEXT: [3,2] . DeeeeeeE-----R lzcntw 2(%rsp), %cx
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 4 6.5 0.3 0.0 imull %edx, %ecx
+# CHECK-NEXT: 1. 4 1.0 1.0 2.8 lzcntw (%rsp), %cx
+# CHECK-NEXT: 2. 4 1.0 1.0 2.5 lzcntw 2(%rsp), %cx
--- /dev/null
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=znver1 -iterations=1 -resource-pressure=false -timeline < %s | FileCheck %s
+
+# An instruction that writes to a 32-bit register will not have any false
+# dependence on the corresponding 64-bit register because the upper part of
+# the 64-bit register is set to zero
+
+imulq %rax, %rcx
+addl %edx, %ecx
+addq %rcx, %rdx
+
+# CHECK: Iterations: 1
+# CHECK-NEXT: Instructions: 3
+# CHECK-NEXT: Total Cycles: 9
+# CHECK-NEXT: Dispatch Width: 4
+# CHECK-NEXT: IPC: 0.33
+# CHECK-NEXT: Block RThroughput: 1.0
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 2 4 1.00 imulq %rax, %rcx
+# CHECK-NEXT: 1 1 0.25 addl %edx, %ecx
+# CHECK-NEXT: 1 1 0.25 addq %rcx, %rdx
+
+# CHECK: Timeline view:
+# CHECK-NEXT: Index 012345678
+
+# CHECK: [0,0] DeeeeER . imulq %rax, %rcx
+# CHECK-NEXT: [0,1] D====eER. addl %edx, %ecx
+# CHECK-NEXT: [0,2] D=====eER addq %rcx, %rdx
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 imulq %rax, %rcx
+# CHECK-NEXT: 1. 1 5.0 0.0 0.0 addl %edx, %ecx
+# CHECK-NEXT: 2. 1 6.0 0.0 0.0 addq %rcx, %rdx
--- /dev/null
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=znver1 -iterations=1 -resource-pressure=false -timeline < %s | FileCheck %s
+
+imul %ax, %cx
+add %al, %cl
+add %ecx, %ebx
+
+# CHECK: Iterations: 1
+# CHECK-NEXT: Instructions: 3
+# CHECK-NEXT: Total Cycles: 8
+# CHECK-NEXT: Dispatch Width: 4
+# CHECK-NEXT: IPC: 0.38
+# CHECK-NEXT: Block RThroughput: 1.0
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 1 3 1.00 imulw %ax, %cx
+# CHECK-NEXT: 1 1 0.25 addb %al, %cl
+# CHECK-NEXT: 1 1 0.25 addl %ecx, %ebx
+
+# CHECK: Timeline view:
+# CHECK-NEXT: Index 01234567
+
+# CHECK: [0,0] DeeeER . imulw %ax, %cx
+# CHECK-NEXT: [0,1] D===eER. addb %al, %cl
+# CHECK-NEXT: [0,2] D====eER addl %ecx, %ebx
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 imulw %ax, %cx
+# CHECK-NEXT: 1. 1 4.0 0.0 0.0 addb %al, %cl
+# CHECK-NEXT: 2. 1 5.0 0.0 0.0 addl %ecx, %ebx