From 357f1c4ef1333064d1f7b68662d914141dd788fa Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Sun, 20 Nov 2022 12:13:02 +0000 Subject: [PATCH] [X86] Improve LOOP/LOOPE/LOOPNE schedule on SandyBridge model D138359 was reporting that this override was superfluous, but it had never been setup - I took the numbers from uops.info (I couldn't find an estimate in Intel docs). --- llvm/lib/Target/X86/X86SchedSandyBridge.td | 15 ++++++++++++++- llvm/test/tools/llvm-mca/X86/Barcelona/resources-x86_64.s | 14 +++++++------- llvm/test/tools/llvm-mca/X86/Generic/resources-x86_64.s | 14 +++++++------- .../tools/llvm-mca/X86/SandyBridge/resources-x86_64.s | 14 +++++++------- 4 files changed, 35 insertions(+), 22 deletions(-) diff --git a/llvm/lib/Target/X86/X86SchedSandyBridge.td b/llvm/lib/Target/X86/X86SchedSandyBridge.td index 9d7069a..9ceb8db 100644 --- a/llvm/lib/Target/X86/X86SchedSandyBridge.td +++ b/llvm/lib/Target/X86/X86SchedSandyBridge.td @@ -615,7 +615,6 @@ def SBWriteResGroup2 : SchedWriteRes<[SBPort5]> { } def: InstRW<[SBWriteResGroup2], (instrs FDECSTP, FINCSTP, FFREE, FFREEP, FNOP, LD_Frr, ST_Frr, ST_FPrr)>; -def: InstRW<[SBWriteResGroup2], (instrs LOOP, LOOPE, LOOPNE)>; // FIXME: This seems wrong compared to other Intel CPUs. def: InstRW<[SBWriteResGroup2], (instrs RET64)>; def SBWriteResGroup4 : SchedWriteRes<[SBPort05]> { @@ -736,6 +735,20 @@ def SBWriteResGroup29_2 : SchedWriteRes<[SBPort5,SBPort015]> { } def: InstRW<[SBWriteResGroup29_2], (instrs PAUSE)>; +def SBWriteResGroup30 : SchedWriteRes<[SBPort1,SBPort5,SBPort015]> { + let Latency = 3; + let NumMicroOps = 8; + let ResourceCycles = [1,3,4]; +} +def: InstRW<[SBWriteResGroup30], (instrs LOOP)>; + +def SBWriteResGroup31 : SchedWriteRes<[SBPort1,SBPort5,SBPort015,SBPort05]> { + let Latency = 4; + let NumMicroOps = 12; + let ResourceCycles = [1,3,6,2]; +} +def: InstRW<[SBWriteResGroup31], (instrs LOOPE, LOOPNE)>; + def SBWriteResGroup76 : SchedWriteRes<[SBPort05]> { let Latency = 5; let NumMicroOps = 8; diff --git a/llvm/test/tools/llvm-mca/X86/Barcelona/resources-x86_64.s b/llvm/test/tools/llvm-mca/X86/Barcelona/resources-x86_64.s index 358a1aa..37ccff2 100644 --- a/llvm/test/tools/llvm-mca/X86/Barcelona/resources-x86_64.s +++ b/llvm/test/tools/llvm-mca/X86/Barcelona/resources-x86_64.s @@ -1394,9 +1394,9 @@ xorq (%rax), %rdi # CHECK-NEXT: 3 7 0.67 U lodsw (%rsi), %ax # CHECK-NEXT: 2 6 0.50 U lodsl (%rsi), %eax # CHECK-NEXT: 2 6 0.50 U lodsq (%rsi), %rax -# CHECK-NEXT: 1 1 1.00 U loop 0 -# CHECK-NEXT: 1 1 1.00 U loope 0 -# CHECK-NEXT: 1 1 1.00 U loopne 0 +# CHECK-NEXT: 8 3 3.00 U loop 0 +# CHECK-NEXT: 12 4 4.00 U loope 0 +# CHECK-NEXT: 12 4 4.00 U loopne 0 # CHECK-NEXT: 5 8 1.00 U movsb (%rsi), %es:(%rdi) # CHECK-NEXT: 5 8 1.00 U movsw (%rsi), %es:(%rdi) # CHECK-NEXT: 5 8 1.00 U movsl (%rsi), %es:(%rdi) @@ -1960,7 +1960,7 @@ xorq (%rax), %rdi # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] -# CHECK-NEXT: 160.00 - 658.17 310.67 361.00 686.17 455.50 455.50 +# CHECK-NEXT: 160.00 - 665.50 319.00 361.00 699.50 455.50 455.50 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions: @@ -2324,9 +2324,9 @@ xorq (%rax), %rdi # CHECK-NEXT: - - 0.67 0.67 - 0.67 0.50 0.50 lodsw (%rsi), %ax # CHECK-NEXT: - - 0.33 0.33 - 0.33 0.50 0.50 lodsl (%rsi), %eax # CHECK-NEXT: - - 0.33 0.33 - 0.33 0.50 0.50 lodsq (%rsi), %rax -# CHECK-NEXT: - - - - - 1.00 - - loop 0 -# CHECK-NEXT: - - - - - 1.00 - - loope 0 -# CHECK-NEXT: - - - - - 1.00 - - loopne 0 +# CHECK-NEXT: - - 1.33 2.33 - 4.33 - - loop 0 +# CHECK-NEXT: - - 3.00 3.00 - 6.00 - - loope 0 +# CHECK-NEXT: - - 3.00 3.00 - 6.00 - - loopne 0 # CHECK-NEXT: - - 0.67 0.67 1.00 0.67 1.00 1.00 movsb (%rsi), %es:(%rdi) # CHECK-NEXT: - - 0.67 0.67 1.00 0.67 1.00 1.00 movsw (%rsi), %es:(%rdi) # CHECK-NEXT: - - 0.67 0.67 1.00 0.67 1.00 1.00 movsl (%rsi), %es:(%rdi) diff --git a/llvm/test/tools/llvm-mca/X86/Generic/resources-x86_64.s b/llvm/test/tools/llvm-mca/X86/Generic/resources-x86_64.s index 358a1aa..37ccff2 100644 --- a/llvm/test/tools/llvm-mca/X86/Generic/resources-x86_64.s +++ b/llvm/test/tools/llvm-mca/X86/Generic/resources-x86_64.s @@ -1394,9 +1394,9 @@ xorq (%rax), %rdi # CHECK-NEXT: 3 7 0.67 U lodsw (%rsi), %ax # CHECK-NEXT: 2 6 0.50 U lodsl (%rsi), %eax # CHECK-NEXT: 2 6 0.50 U lodsq (%rsi), %rax -# CHECK-NEXT: 1 1 1.00 U loop 0 -# CHECK-NEXT: 1 1 1.00 U loope 0 -# CHECK-NEXT: 1 1 1.00 U loopne 0 +# CHECK-NEXT: 8 3 3.00 U loop 0 +# CHECK-NEXT: 12 4 4.00 U loope 0 +# CHECK-NEXT: 12 4 4.00 U loopne 0 # CHECK-NEXT: 5 8 1.00 U movsb (%rsi), %es:(%rdi) # CHECK-NEXT: 5 8 1.00 U movsw (%rsi), %es:(%rdi) # CHECK-NEXT: 5 8 1.00 U movsl (%rsi), %es:(%rdi) @@ -1960,7 +1960,7 @@ xorq (%rax), %rdi # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] -# CHECK-NEXT: 160.00 - 658.17 310.67 361.00 686.17 455.50 455.50 +# CHECK-NEXT: 160.00 - 665.50 319.00 361.00 699.50 455.50 455.50 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions: @@ -2324,9 +2324,9 @@ xorq (%rax), %rdi # CHECK-NEXT: - - 0.67 0.67 - 0.67 0.50 0.50 lodsw (%rsi), %ax # CHECK-NEXT: - - 0.33 0.33 - 0.33 0.50 0.50 lodsl (%rsi), %eax # CHECK-NEXT: - - 0.33 0.33 - 0.33 0.50 0.50 lodsq (%rsi), %rax -# CHECK-NEXT: - - - - - 1.00 - - loop 0 -# CHECK-NEXT: - - - - - 1.00 - - loope 0 -# CHECK-NEXT: - - - - - 1.00 - - loopne 0 +# CHECK-NEXT: - - 1.33 2.33 - 4.33 - - loop 0 +# CHECK-NEXT: - - 3.00 3.00 - 6.00 - - loope 0 +# CHECK-NEXT: - - 3.00 3.00 - 6.00 - - loopne 0 # CHECK-NEXT: - - 0.67 0.67 1.00 0.67 1.00 1.00 movsb (%rsi), %es:(%rdi) # CHECK-NEXT: - - 0.67 0.67 1.00 0.67 1.00 1.00 movsw (%rsi), %es:(%rdi) # CHECK-NEXT: - - 0.67 0.67 1.00 0.67 1.00 1.00 movsl (%rsi), %es:(%rdi) diff --git a/llvm/test/tools/llvm-mca/X86/SandyBridge/resources-x86_64.s b/llvm/test/tools/llvm-mca/X86/SandyBridge/resources-x86_64.s index c56257b..c046cbd 100644 --- a/llvm/test/tools/llvm-mca/X86/SandyBridge/resources-x86_64.s +++ b/llvm/test/tools/llvm-mca/X86/SandyBridge/resources-x86_64.s @@ -1394,9 +1394,9 @@ xorq (%rax), %rdi # CHECK-NEXT: 3 7 0.67 U lodsw (%rsi), %ax # CHECK-NEXT: 2 6 0.50 U lodsl (%rsi), %eax # CHECK-NEXT: 2 6 0.50 U lodsq (%rsi), %rax -# CHECK-NEXT: 1 1 1.00 U loop 0 -# CHECK-NEXT: 1 1 1.00 U loope 0 -# CHECK-NEXT: 1 1 1.00 U loopne 0 +# CHECK-NEXT: 8 3 3.00 U loop 0 +# CHECK-NEXT: 12 4 4.00 U loope 0 +# CHECK-NEXT: 12 4 4.00 U loopne 0 # CHECK-NEXT: 5 8 1.00 U movsb (%rsi), %es:(%rdi) # CHECK-NEXT: 5 8 1.00 U movsw (%rsi), %es:(%rdi) # CHECK-NEXT: 5 8 1.00 U movsl (%rsi), %es:(%rdi) @@ -1960,7 +1960,7 @@ xorq (%rax), %rdi # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] -# CHECK-NEXT: 160.00 - 658.17 310.67 361.00 686.17 455.50 455.50 +# CHECK-NEXT: 160.00 - 665.50 319.00 361.00 699.50 455.50 455.50 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions: @@ -2324,9 +2324,9 @@ xorq (%rax), %rdi # CHECK-NEXT: - - 0.67 0.67 - 0.67 0.50 0.50 lodsw (%rsi), %ax # CHECK-NEXT: - - 0.33 0.33 - 0.33 0.50 0.50 lodsl (%rsi), %eax # CHECK-NEXT: - - 0.33 0.33 - 0.33 0.50 0.50 lodsq (%rsi), %rax -# CHECK-NEXT: - - - - - 1.00 - - loop 0 -# CHECK-NEXT: - - - - - 1.00 - - loope 0 -# CHECK-NEXT: - - - - - 1.00 - - loopne 0 +# CHECK-NEXT: - - 1.33 2.33 - 4.33 - - loop 0 +# CHECK-NEXT: - - 3.00 3.00 - 6.00 - - loope 0 +# CHECK-NEXT: - - 3.00 3.00 - 6.00 - - loopne 0 # CHECK-NEXT: - - 0.67 0.67 1.00 0.67 1.00 1.00 movsb (%rsi), %es:(%rdi) # CHECK-NEXT: - - 0.67 0.67 1.00 0.67 1.00 1.00 movsw (%rsi), %es:(%rdi) # CHECK-NEXT: - - 0.67 0.67 1.00 0.67 1.00 1.00 movsl (%rsi), %es:(%rdi) -- 2.7.4