--- /dev/null
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -iterations=1000 -timeline < %s | FileCheck %s
+
+add %eax, %ecx
+add %esi, %eax
+add %eax, %edx
+
+# CHECK: Iterations: 1000
+# CHECK-NEXT: Instructions: 3000
+# CHECK-NEXT: Total Cycles: 1506
+# CHECK-NEXT: Dispatch Width: 2
+# CHECK-NEXT: IPC: 1.99
+# CHECK-NEXT: Block RThroughput: 1.5
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 1 1 0.50 addl %eax, %ecx
+# CHECK-NEXT: 1 1 0.50 addl %esi, %eax
+# CHECK-NEXT: 1 1 0.50 addl %eax, %edx
+
+# CHECK: Resources:
+# CHECK-NEXT: [0] - JALU0
+# CHECK-NEXT: [1] - JALU1
+# CHECK-NEXT: [2] - JDiv
+# CHECK-NEXT: [3] - JFPA
+# CHECK-NEXT: [4] - JFPM
+# CHECK-NEXT: [5] - JFPU0
+# CHECK-NEXT: [6] - JFPU1
+# CHECK-NEXT: [7] - JLAGU
+# CHECK-NEXT: [8] - JMul
+# CHECK-NEXT: [9] - JSAGU
+# CHECK-NEXT: [10] - JSTC
+# CHECK-NEXT: [11] - JVALU0
+# CHECK-NEXT: [12] - JVALU1
+# CHECK-NEXT: [13] - JVIMUL
+
+# CHECK: Resource pressure per iteration:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13]
+# CHECK-NEXT: 1.50 1.50 - - - - - - - - - - - -
+
+# CHECK: Resource pressure by instruction:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] Instructions:
+# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - - addl %eax, %ecx
+# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - - addl %esi, %eax
+# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - - addl %eax, %edx
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 0
+
+# CHECK: [0,0] DeER . . . . addl %eax, %ecx
+# CHECK-NEXT: [0,1] DeER . . . . addl %esi, %eax
+# CHECK-NEXT: [0,2] .DeER. . . . addl %eax, %edx
+# CHECK-NEXT: [1,0] .DeER. . . . addl %eax, %ecx
+# CHECK-NEXT: [1,1] . DeER . . . addl %esi, %eax
+# CHECK-NEXT: [1,2] . D=eER . . . addl %eax, %edx
+# CHECK-NEXT: [2,0] . DeER . . . addl %eax, %ecx
+# CHECK-NEXT: [2,1] . D=eER . . . addl %esi, %eax
+# CHECK-NEXT: [2,2] . D=eER . . . addl %eax, %edx
+# CHECK-NEXT: [3,0] . D=eER . . . addl %eax, %ecx
+# CHECK-NEXT: [3,1] . D=eER. . . addl %esi, %eax
+# CHECK-NEXT: [3,2] . D==eER . . addl %eax, %edx
+# CHECK-NEXT: [4,0] . .D=eER . . addl %eax, %ecx
+# CHECK-NEXT: [4,1] . .D==eER . . addl %esi, %eax
+# CHECK-NEXT: [4,2] . . D==eER . . addl %eax, %edx
+# CHECK-NEXT: [5,0] . . D===eER . . addl %eax, %ecx
+# CHECK-NEXT: [5,1] . . D=eE-R . . addl %esi, %eax
+# CHECK-NEXT: [5,2] . . D==eE-R. . addl %eax, %edx
+# CHECK-NEXT: [6,0] . . D==eER. . addl %eax, %ecx
+# CHECK-NEXT: [6,1] . . D==eE-R . addl %esi, %eax
+# CHECK-NEXT: [6,2] . . D==eER . addl %eax, %edx
+# CHECK-NEXT: [7,0] . . D===eER . addl %eax, %ecx
+# CHECK-NEXT: [7,1] . . .D=eE-R . addl %esi, %eax
+# CHECK-NEXT: [7,2] . . .D==eE-R . addl %eax, %edx
+# CHECK-NEXT: [8,0] . . . D==eER . addl %eax, %ecx
+# CHECK-NEXT: [8,1] . . . D==eE-R . addl %esi, %eax
+# CHECK-NEXT: [8,2] . . . D==eER . addl %eax, %edx
+# CHECK-NEXT: [9,0] . . . D===eER. addl %eax, %ecx
+# CHECK-NEXT: [9,1] . . . D=eE-R. addl %esi, %eax
+# CHECK-NEXT: [9,2] . . . D==eE-R addl %eax, %edx
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 10 2.5 0.4 0.0 addl %eax, %ecx
+# CHECK-NEXT: 1. 10 2.1 0.7 0.5 addl %esi, %eax
+# CHECK-NEXT: 2. 10 2.6 0.0 0.3 addl %eax, %edx
--- /dev/null
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -iterations=500 -timeline < %s | FileCheck %s
+
+vpmuld %xmm0, %xmm0, %xmm1
+vpaddd %xmm1, %xmm1, %xmm0
+vpaddd %xmm0, %xmm0, %xmm3
+
+# CHECK: Iterations: 500
+# CHECK-NEXT: Instructions: 1500
+# CHECK-NEXT: Total Cycles: 1504
+# CHECK-NEXT: Dispatch Width: 2
+# CHECK-NEXT: IPC: 1.00
+# CHECK-NEXT: Block RThroughput: 1.5
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 1 2 1.00 vpmuldq %xmm0, %xmm0, %xmm1
+# CHECK-NEXT: 1 1 0.50 vpaddd %xmm1, %xmm1, %xmm0
+# CHECK-NEXT: 1 1 0.50 vpaddd %xmm0, %xmm0, %xmm3
+
+# CHECK: Resources:
+# CHECK-NEXT: [0] - JALU0
+# CHECK-NEXT: [1] - JALU1
+# CHECK-NEXT: [2] - JDiv
+# CHECK-NEXT: [3] - JFPA
+# CHECK-NEXT: [4] - JFPM
+# CHECK-NEXT: [5] - JFPU0
+# CHECK-NEXT: [6] - JFPU1
+# CHECK-NEXT: [7] - JLAGU
+# CHECK-NEXT: [8] - JMul
+# CHECK-NEXT: [9] - JSAGU
+# CHECK-NEXT: [10] - JSTC
+# CHECK-NEXT: [11] - JVALU0
+# CHECK-NEXT: [12] - JVALU1
+# CHECK-NEXT: [13] - JVIMUL
+
+# CHECK: Resource pressure per iteration:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13]
+# CHECK-NEXT: - - - - - 1.50 1.50 - - - - 1.00 1.00 1.00
+
+# CHECK: Resource pressure by instruction:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] Instructions:
+# CHECK-NEXT: - - - - - 1.00 - - - - - - - 1.00 vpmuldq %xmm0, %xmm0, %xmm1
+# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - 1.00 - vpaddd %xmm1, %xmm1, %xmm0
+# CHECK-NEXT: - - - - - - 1.00 - - - - 1.00 - - vpaddd %xmm0, %xmm0, %xmm3
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeER. . . . . . . vpmuldq %xmm0, %xmm0, %xmm1
+# CHECK-NEXT: [0,1] D==eER . . . . . . vpaddd %xmm1, %xmm1, %xmm0
+# CHECK-NEXT: [0,2] .D==eER . . . . . . vpaddd %xmm0, %xmm0, %xmm3
+# CHECK-NEXT: [1,0] .D==eeER . . . . . . vpmuldq %xmm0, %xmm0, %xmm1
+# CHECK-NEXT: [1,1] . D===eER . . . . . . vpaddd %xmm1, %xmm1, %xmm0
+# CHECK-NEXT: [1,2] . D====eER. . . . . . vpaddd %xmm0, %xmm0, %xmm3
+# CHECK-NEXT: [2,0] . D===eeER . . . . . vpmuldq %xmm0, %xmm0, %xmm1
+# CHECK-NEXT: [2,1] . D=====eER . . . . . vpaddd %xmm1, %xmm1, %xmm0
+# CHECK-NEXT: [2,2] . D=====eER . . . . . vpaddd %xmm0, %xmm0, %xmm3
+# CHECK-NEXT: [3,0] . D=====eeER . . . . . vpmuldq %xmm0, %xmm0, %xmm1
+# CHECK-NEXT: [3,1] . D======eER. . . . . vpaddd %xmm1, %xmm1, %xmm0
+# CHECK-NEXT: [3,2] . D=======eER . . . . vpaddd %xmm0, %xmm0, %xmm3
+# CHECK-NEXT: [4,0] . .D======eeER . . . . vpmuldq %xmm0, %xmm0, %xmm1
+# CHECK-NEXT: [4,1] . .D========eER . . . . vpaddd %xmm1, %xmm1, %xmm0
+# CHECK-NEXT: [4,2] . . D========eER . . . . vpaddd %xmm0, %xmm0, %xmm3
+# CHECK-NEXT: [5,0] . . D========eeER. . . . vpmuldq %xmm0, %xmm0, %xmm1
+# CHECK-NEXT: [5,1] . . D=========eER . . . vpaddd %xmm1, %xmm1, %xmm0
+# CHECK-NEXT: [5,2] . . D==========eER . . . vpaddd %xmm0, %xmm0, %xmm3
+# CHECK-NEXT: [6,0] . . D=========eeER . . . vpmuldq %xmm0, %xmm0, %xmm1
+# CHECK-NEXT: [6,1] . . D===========eER . . . vpaddd %xmm1, %xmm1, %xmm0
+# CHECK-NEXT: [6,2] . . D===========eER. . . vpaddd %xmm0, %xmm0, %xmm3
+# CHECK-NEXT: [7,0] . . D===========eeER . . vpmuldq %xmm0, %xmm0, %xmm1
+# CHECK-NEXT: [7,1] . . .D============eER . . vpaddd %xmm1, %xmm1, %xmm0
+# CHECK-NEXT: [7,2] . . .D=============eER . . vpaddd %xmm0, %xmm0, %xmm3
+# CHECK-NEXT: [8,0] . . . D============eeER . . vpmuldq %xmm0, %xmm0, %xmm1
+# CHECK-NEXT: [8,1] . . . D==============eER. . vpaddd %xmm1, %xmm1, %xmm0
+# CHECK-NEXT: [8,2] . . . D==============eER . vpaddd %xmm0, %xmm0, %xmm3
+# CHECK-NEXT: [9,0] . . . D==============eeER . vpmuldq %xmm0, %xmm0, %xmm1
+# CHECK-NEXT: [9,1] . . . D===============eER. vpaddd %xmm1, %xmm1, %xmm0
+# CHECK-NEXT: [9,2] . . . D================eER vpaddd %xmm0, %xmm0, %xmm3
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 10 8.0 0.1 0.0 vpmuldq %xmm0, %xmm0, %xmm1
+# CHECK-NEXT: 1. 10 9.5 0.0 0.0 vpaddd %xmm1, %xmm1, %xmm0
+# CHECK-NEXT: 2. 10 10.0 0.0 0.0 vpaddd %xmm0, %xmm0, %xmm3
# CHECK: Iterations: 1
# CHECK-NEXT: Instructions: 33
-# CHECK-NEXT: Total Cycles: 70
+# CHECK-NEXT: Total Cycles: 69
# CHECK-NEXT: Dispatch Width: 2
-# CHECK-NEXT: IPC: 0.47
+# CHECK-NEXT: IPC: 0.48
# CHECK-NEXT: Block RThroughput: 64.0
# CHECK: Dynamic Dispatch Stall Cycles:
# CHECK: Dispatch Logic - number of cycles where we saw N instructions dispatched:
# CHECK-NEXT: [# dispatched], [# cycles]
-# CHECK-NEXT: 0, 37 (52.9%)
-# CHECK-NEXT: 1, 33 (47.1%)
+# CHECK-NEXT: 0, 36 (52.2%)
+# CHECK-NEXT: 1, 33 (47.8%)
# CHECK: Register File statistics:
# CHECK-NEXT: Total number of mappings created: 66
# CHECK: Timeline view:
# CHECK-NEXT: 0123456789 0123456789 0123456789
-# CHECK-NEXT: Index 0123456789 0123456789 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789 0123456789 012345678
-# CHECK: [0,0] DeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeER . . . . . . vdivps %ymm0, %ymm0, %ymm1
-# CHECK-NEXT: [0,1] .DeeeE----------------------------------R . . . . . . vaddps %ymm0, %ymm0, %ymm2
-# CHECK-NEXT: [0,2] . D=eeeE---------------------------------R . . . . . . vaddps %ymm0, %ymm0, %ymm3
-# CHECK-NEXT: [0,3] . D==eeeE-------------------------------R . . . . . . vaddps %ymm0, %ymm0, %ymm4
-# CHECK-NEXT: [0,4] . D===eeeE------------------------------R . . . . . . vaddps %ymm0, %ymm0, %ymm5
-# CHECK-NEXT: [0,5] . D====eeeE----------------------------R . . . . . . vaddps %ymm0, %ymm0, %ymm6
-# CHECK-NEXT: [0,6] . .D=====eeeE---------------------------R . . . . . . vaddps %ymm0, %ymm0, %ymm7
-# CHECK-NEXT: [0,7] . . D======eeeE-------------------------R . . . . . . vaddps %ymm0, %ymm0, %ymm8
-# CHECK-NEXT: [0,8] . . D=======eeeE------------------------R. . . . . . vaddps %ymm0, %ymm0, %ymm9
-# CHECK-NEXT: [0,9] . . D========eeeE----------------------R. . . . . . vaddps %ymm0, %ymm0, %ymm10
-# CHECK-NEXT: [0,10] . . D=========eeeE---------------------R . . . . . vaddps %ymm0, %ymm0, %ymm11
-# CHECK-NEXT: [0,11] . . .D==========eeeE-------------------R . . . . . vaddps %ymm0, %ymm0, %ymm12
-# CHECK-NEXT: [0,12] . . . D===========eeeE------------------R . . . . . vaddps %ymm0, %ymm0, %ymm13
-# CHECK-NEXT: [0,13] . . . D============eeeE----------------R . . . . . vaddps %ymm0, %ymm0, %ymm14
-# CHECK-NEXT: [0,14] . . . D=============eeeE---------------R . . . . . vaddps %ymm0, %ymm0, %ymm15
-# CHECK-NEXT: [0,15] . . . D==============eeeE-------------R . . . . . vaddps %ymm2, %ymm0, %ymm0
-# CHECK-NEXT: [0,16] . . . .D================eeeE-----------R . . . . . vaddps %ymm2, %ymm0, %ymm3
-# CHECK-NEXT: [0,17] . . . . D=================eeeE---------R . . . . . vaddps %ymm2, %ymm0, %ymm4
-# CHECK-NEXT: [0,18] . . . . D==================eeeE--------R. . . . . vaddps %ymm2, %ymm0, %ymm5
-# CHECK-NEXT: [0,19] . . . . D===================eeeE------R. . . . . vaddps %ymm2, %ymm0, %ymm6
-# CHECK-NEXT: [0,20] . . . . D====================eeeE-----R . . . . vaddps %ymm2, %ymm0, %ymm7
-# CHECK-NEXT: [0,21] . . . . .D=====================eeeE---R . . . . vaddps %ymm2, %ymm0, %ymm8
-# CHECK-NEXT: [0,22] . . . . . D======================eeeE--R . . . . vaddps %ymm2, %ymm0, %ymm9
-# CHECK-NEXT: [0,23] . . . . . D=======================eeeER . . . . vaddps %ymm2, %ymm0, %ymm10
-# CHECK-NEXT: [0,24] . . . . . D========================eeeER . . . . vaddps %ymm2, %ymm0, %ymm11
-# CHECK-NEXT: [0,25] . . . . . D=========================eeeER . . . vaddps %ymm2, %ymm0, %ymm12
-# CHECK-NEXT: [0,26] . . . . . .D==========================eeeER . . . vaddps %ymm2, %ymm0, %ymm13
-# CHECK-NEXT: [0,27] . . . . . . D===========================eeeER. . . vaddps %ymm2, %ymm0, %ymm14
-# CHECK-NEXT: [0,28] . . . . . . D============================eeeER . . vaddps %ymm2, %ymm0, %ymm15
-# CHECK-NEXT: [0,29] . . . . . . D=============================eeeER . . vaddps %ymm3, %ymm0, %ymm2
-# CHECK-NEXT: [0,30] . . . . . . D==============================eeeER . vaddps %ymm3, %ymm0, %ymm4
-# CHECK-NEXT: [0,31] . . . . . . .D===============================eeeER . vaddps %ymm3, %ymm0, %ymm5
-# CHECK-NEXT: [0,32] . . . . . . . . D========================eeeER vaddps %ymm3, %ymm0, %ymm6
+# CHECK: [0,0] DeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeER . . . . . . vdivps %ymm0, %ymm0, %ymm1
+# CHECK-NEXT: [0,1] .DeeeE----------------------------------R . . . . . . vaddps %ymm0, %ymm0, %ymm2
+# CHECK-NEXT: [0,2] . D=eeeE---------------------------------R . . . . . . vaddps %ymm0, %ymm0, %ymm3
+# CHECK-NEXT: [0,3] . D==eeeE-------------------------------R . . . . . . vaddps %ymm0, %ymm0, %ymm4
+# CHECK-NEXT: [0,4] . D===eeeE------------------------------R . . . . . . vaddps %ymm0, %ymm0, %ymm5
+# CHECK-NEXT: [0,5] . D====eeeE----------------------------R . . . . . . vaddps %ymm0, %ymm0, %ymm6
+# CHECK-NEXT: [0,6] . .D=====eeeE---------------------------R . . . . . . vaddps %ymm0, %ymm0, %ymm7
+# CHECK-NEXT: [0,7] . . D======eeeE-------------------------R . . . . . . vaddps %ymm0, %ymm0, %ymm8
+# CHECK-NEXT: [0,8] . . D=======eeeE------------------------R. . . . . . vaddps %ymm0, %ymm0, %ymm9
+# CHECK-NEXT: [0,9] . . D========eeeE----------------------R. . . . . . vaddps %ymm0, %ymm0, %ymm10
+# CHECK-NEXT: [0,10] . . D=========eeeE---------------------R . . . . . vaddps %ymm0, %ymm0, %ymm11
+# CHECK-NEXT: [0,11] . . .D============eeeE-----------------R . . . . . vaddps %ymm0, %ymm0, %ymm12
+# CHECK-NEXT: [0,12] . . . D=============eeeE----------------R . . . . . vaddps %ymm0, %ymm0, %ymm13
+# CHECK-NEXT: [0,13] . . . D==============eeeE--------------R . . . . . vaddps %ymm0, %ymm0, %ymm14
+# CHECK-NEXT: [0,14] . . . D===============eeeE-------------R . . . . . vaddps %ymm0, %ymm0, %ymm15
+# CHECK-NEXT: [0,15] . . . D======eeeE---------------------R . . . . . vaddps %ymm2, %ymm0, %ymm0
+# CHECK-NEXT: [0,16] . . . .D===============eeeE------------R . . . . . vaddps %ymm2, %ymm0, %ymm3
+# CHECK-NEXT: [0,17] . . . . D================eeeE----------R . . . . . vaddps %ymm2, %ymm0, %ymm4
+# CHECK-NEXT: [0,18] . . . . D=================eeeE---------R. . . . . vaddps %ymm2, %ymm0, %ymm5
+# CHECK-NEXT: [0,19] . . . . D==================eeeE-------R. . . . . vaddps %ymm2, %ymm0, %ymm6
+# CHECK-NEXT: [0,20] . . . . D===================eeeE------R . . . . vaddps %ymm2, %ymm0, %ymm7
+# CHECK-NEXT: [0,21] . . . . .D====================eeeE----R . . . . vaddps %ymm2, %ymm0, %ymm8
+# CHECK-NEXT: [0,22] . . . . . D=====================eeeE---R . . . . vaddps %ymm2, %ymm0, %ymm9
+# CHECK-NEXT: [0,23] . . . . . D======================eeeE-R . . . . vaddps %ymm2, %ymm0, %ymm10
+# CHECK-NEXT: [0,24] . . . . . D=======================eeeER . . . . vaddps %ymm2, %ymm0, %ymm11
+# CHECK-NEXT: [0,25] . . . . . D========================eeeER. . . . vaddps %ymm2, %ymm0, %ymm12
+# CHECK-NEXT: [0,26] . . . . . .D=========================eeeER . . . vaddps %ymm2, %ymm0, %ymm13
+# CHECK-NEXT: [0,27] . . . . . . D==========================eeeER . . . vaddps %ymm2, %ymm0, %ymm14
+# CHECK-NEXT: [0,28] . . . . . . D===========================eeeER . . vaddps %ymm2, %ymm0, %ymm15
+# CHECK-NEXT: [0,29] . . . . . . D============================eeeER . . vaddps %ymm3, %ymm0, %ymm2
+# CHECK-NEXT: [0,30] . . . . . . D=============================eeeER. . vaddps %ymm3, %ymm0, %ymm4
+# CHECK-NEXT: [0,31] . . . . . . .D==============================eeeER . vaddps %ymm3, %ymm0, %ymm5
+# CHECK-NEXT: [0,32] . . . . . . . . D=======================eeeER vaddps %ymm3, %ymm0, %ymm6
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
# CHECK-NEXT: 8. 1 8.0 8.0 24.0 vaddps %ymm0, %ymm0, %ymm9
# CHECK-NEXT: 9. 1 9.0 9.0 22.0 vaddps %ymm0, %ymm0, %ymm10
# CHECK-NEXT: 10. 1 10.0 10.0 21.0 vaddps %ymm0, %ymm0, %ymm11
-# CHECK-NEXT: 11. 1 11.0 11.0 19.0 vaddps %ymm0, %ymm0, %ymm12
-# CHECK-NEXT: 12. 1 12.0 12.0 18.0 vaddps %ymm0, %ymm0, %ymm13
-# CHECK-NEXT: 13. 1 13.0 13.0 16.0 vaddps %ymm0, %ymm0, %ymm14
-# CHECK-NEXT: 14. 1 14.0 14.0 15.0 vaddps %ymm0, %ymm0, %ymm15
-# CHECK-NEXT: 15. 1 15.0 15.0 13.0 vaddps %ymm2, %ymm0, %ymm0
-# CHECK-NEXT: 16. 1 17.0 0.0 11.0 vaddps %ymm2, %ymm0, %ymm3
-# CHECK-NEXT: 17. 1 18.0 2.0 9.0 vaddps %ymm2, %ymm0, %ymm4
-# CHECK-NEXT: 18. 1 19.0 4.0 8.0 vaddps %ymm2, %ymm0, %ymm5
-# CHECK-NEXT: 19. 1 20.0 6.0 6.0 vaddps %ymm2, %ymm0, %ymm6
-# CHECK-NEXT: 20. 1 21.0 8.0 5.0 vaddps %ymm2, %ymm0, %ymm7
-# CHECK-NEXT: 21. 1 22.0 10.0 3.0 vaddps %ymm2, %ymm0, %ymm8
-# CHECK-NEXT: 22. 1 23.0 12.0 2.0 vaddps %ymm2, %ymm0, %ymm9
-# CHECK-NEXT: 23. 1 24.0 14.0 0.0 vaddps %ymm2, %ymm0, %ymm10
-# CHECK-NEXT: 24. 1 25.0 16.0 0.0 vaddps %ymm2, %ymm0, %ymm11
-# CHECK-NEXT: 25. 1 26.0 18.0 0.0 vaddps %ymm2, %ymm0, %ymm12
-# CHECK-NEXT: 26. 1 27.0 20.0 0.0 vaddps %ymm2, %ymm0, %ymm13
-# CHECK-NEXT: 27. 1 28.0 22.0 0.0 vaddps %ymm2, %ymm0, %ymm14
-# CHECK-NEXT: 28. 1 29.0 24.0 0.0 vaddps %ymm2, %ymm0, %ymm15
-# CHECK-NEXT: 29. 1 30.0 23.0 0.0 vaddps %ymm3, %ymm0, %ymm2
-# CHECK-NEXT: 30. 1 31.0 25.0 0.0 vaddps %ymm3, %ymm0, %ymm4
-# CHECK-NEXT: 31. 1 32.0 27.0 0.0 vaddps %ymm3, %ymm0, %ymm5
-# CHECK-NEXT: 32. 1 25.0 25.0 0.0 vaddps %ymm3, %ymm0, %ymm6
+# CHECK-NEXT: 11. 1 13.0 13.0 17.0 vaddps %ymm0, %ymm0, %ymm12
+# CHECK-NEXT: 12. 1 14.0 14.0 16.0 vaddps %ymm0, %ymm0, %ymm13
+# CHECK-NEXT: 13. 1 15.0 15.0 14.0 vaddps %ymm0, %ymm0, %ymm14
+# CHECK-NEXT: 14. 1 16.0 16.0 13.0 vaddps %ymm0, %ymm0, %ymm15
+# CHECK-NEXT: 15. 1 7.0 7.0 21.0 vaddps %ymm2, %ymm0, %ymm0
+# CHECK-NEXT: 16. 1 16.0 7.0 12.0 vaddps %ymm2, %ymm0, %ymm3
+# CHECK-NEXT: 17. 1 17.0 9.0 10.0 vaddps %ymm2, %ymm0, %ymm4
+# CHECK-NEXT: 18. 1 18.0 11.0 9.0 vaddps %ymm2, %ymm0, %ymm5
+# CHECK-NEXT: 19. 1 19.0 13.0 7.0 vaddps %ymm2, %ymm0, %ymm6
+# CHECK-NEXT: 20. 1 20.0 15.0 6.0 vaddps %ymm2, %ymm0, %ymm7
+# CHECK-NEXT: 21. 1 21.0 17.0 4.0 vaddps %ymm2, %ymm0, %ymm8
+# CHECK-NEXT: 22. 1 22.0 19.0 3.0 vaddps %ymm2, %ymm0, %ymm9
+# CHECK-NEXT: 23. 1 23.0 21.0 1.0 vaddps %ymm2, %ymm0, %ymm10
+# CHECK-NEXT: 24. 1 24.0 23.0 0.0 vaddps %ymm2, %ymm0, %ymm11
+# CHECK-NEXT: 25. 1 25.0 25.0 0.0 vaddps %ymm2, %ymm0, %ymm12
+# CHECK-NEXT: 26. 1 26.0 26.0 0.0 vaddps %ymm2, %ymm0, %ymm13
+# CHECK-NEXT: 27. 1 27.0 27.0 0.0 vaddps %ymm2, %ymm0, %ymm14
+# CHECK-NEXT: 28. 1 28.0 28.0 0.0 vaddps %ymm2, %ymm0, %ymm15
+# CHECK-NEXT: 29. 1 29.0 23.0 0.0 vaddps %ymm3, %ymm0, %ymm2
+# CHECK-NEXT: 30. 1 30.0 25.0 0.0 vaddps %ymm3, %ymm0, %ymm4
+# CHECK-NEXT: 31. 1 31.0 27.0 0.0 vaddps %ymm3, %ymm0, %ymm5
+# CHECK-NEXT: 32. 1 24.0 24.0 0.0 vaddps %ymm3, %ymm0, %ymm6
unsigned getRegisterID() const { return RegisterID; }
void addUser(ReadState *Use, int ReadAdvance);
+ unsigned getNumUsers() const { return Users.size(); }
bool clearsSuperRegisters() const { return ClearsSuperRegs; }
// On every cycle, update CyclesLeft and notify dependent users.
public:
Instruction(const InstrDesc &D)
- : Desc(D), Stage(IS_INVALID), CyclesLeft(-1) {}
+ : Desc(D), Stage(IS_INVALID), CyclesLeft(UNKNOWN_CYCLES) {}
Instruction(const Instruction &Other) = delete;
Instruction &operator=(const Instruction &Other) = delete;
const InstrDesc &getDesc() const { return Desc; }
unsigned getRCUTokenID() const { return RCUTokenID; }
+ unsigned getNumUsers() const {
+ unsigned NumUsers = 0;
+ for (const UniqueDef &Def : Defs)
+ NumUsers += Def->getNumUsers();
+ return NumUsers;
+ }
+
// Transition to the dispatch stage, and assign a RCUToken to this
// instruction. The RCUToken is used to track the completion of every
// register write performed by this instruction.
}
InstRef Scheduler::select() {
- // Give priority to older instructions in the ReadyQueue. Since the ready
- // queue is ordered by key, this will always prioritize older instructions.
- const auto It = std::find_if(ReadyQueue.begin(), ReadyQueue.end(),
- [&](const QueueEntryTy &Entry) {
- const InstrDesc &D = Entry.second->getDesc();
- return Resources->canBeIssued(D);
- });
+ // Find the oldest ready-to-issue instruction in the ReadyQueue.
+ auto It = std::find_if(ReadyQueue.begin(), ReadyQueue.end(),
+ [&](const QueueEntryTy &Entry) {
+ const InstrDesc &D = Entry.second->getDesc();
+ return Resources->canBeIssued(D);
+ });
if (It == ReadyQueue.end())
return {0, nullptr};
+ // We want to prioritize older instructions over younger instructions to
+ // minimize the pressure on the reorder buffer. We also want to
+ // rank higher the instructions with more users to better expose ILP.
+
+ // Compute a rank value based on the age of an instruction (i.e. its source
+ // index) and its number of users. The lower the rank value, the better.
+ int Rank = It->first - It->second->getNumUsers();
+ for (auto I = It, E = ReadyQueue.end(); I != E; ++I) {
+ int CurrentRank = I->first - I->second->getNumUsers();
+ if (CurrentRank < Rank) {
+ const InstrDesc &D = I->second->getDesc();
+ if (Resources->canBeIssued(D))
+ It = I;
+ }
+ }
+
// We found an instruction to issue.
InstRef IR(It->first, It->second);
ReadyQueue.erase(It);