From f855ef260148df0f08c73a70b9425a5215232874 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Sun, 19 Sep 2021 20:39:10 +0100 Subject: [PATCH] [X86][Atom] Fix FP uops + port usage Both ports are required in most cases. Update the uops counts + port usage based off the most recent llvm-exegesis captures (PR36895) and what Intel AoM / Agner / InstLatX64 reports as well. Noticed while trying to improve fp costs for vectorization via the D103695 helper script. --- llvm/lib/Target/X86/X86ScheduleAtom.td | 44 +++++++------- llvm/test/tools/llvm-mca/X86/Atom/resources-sse1.s | 70 +++++++++++----------- llvm/test/tools/llvm-mca/X86/Atom/resources-sse2.s | 58 +++++++++--------- llvm/test/tools/llvm-mca/X86/Atom/resources-sse3.s | 38 ++++++------ .../test/tools/llvm-mca/X86/Atom/resources-ssse3.s | 50 ++++++++-------- llvm/test/tools/llvm-mca/X86/Atom/resources-x87.s | 70 +++++++++++----------- 6 files changed, 165 insertions(+), 165 deletions(-) diff --git a/llvm/lib/Target/X86/X86ScheduleAtom.td b/llvm/lib/Target/X86/X86ScheduleAtom.td index 1087fda..1708226 100644 --- a/llvm/lib/Target/X86/X86ScheduleAtom.td +++ b/llvm/lib/Target/X86/X86ScheduleAtom.td @@ -237,15 +237,15 @@ defm : AtomWriteResPair; defm : X86WriteResPairUnsupported; defm : AtomWriteResPair; -defm : AtomWriteResPair; +defm : AtomWriteResPair; defm : X86WriteResPairUnsupported; defm : X86WriteResPairUnsupported; defm : AtomWriteResPair; -defm : AtomWriteResPair; +defm : AtomWriteResPair; defm : X86WriteResPairUnsupported; defm : X86WriteResPairUnsupported; defm : AtomWriteResPair; -defm : AtomWriteResPair; +defm : AtomWriteResPair; defm : X86WriteResPairUnsupported; defm : X86WriteResPairUnsupported; defm : AtomWriteResPair; @@ -255,34 +255,34 @@ defm : AtomWriteResPair; defm : X86WriteResPairUnsupported; defm : AtomWriteResPair; -defm : AtomWriteResPair; +defm : AtomWriteResPair; defm : X86WriteResPairUnsupported; defm : X86WriteResPairUnsupported; defm : AtomWriteResPair; -defm : AtomWriteResPair; +defm : AtomWriteResPair; defm : X86WriteResPairUnsupported; defm : X86WriteResPairUnsupported; defm : AtomWriteResPair; -defm : AtomWriteResPair; +defm : AtomWriteResPair; defm : X86WriteResPairUnsupported; defm : X86WriteResPairUnsupported; -defm : AtomWriteResPair; -defm : AtomWriteResPair; +defm : AtomWriteResPair; +defm : AtomWriteResPair; defm : X86WriteResPairUnsupported; defm : X86WriteResPairUnsupported; -defm : AtomWriteResPair; -defm : AtomWriteResPair; +defm : AtomWriteResPair; +defm : AtomWriteResPair; defm : X86WriteResPairUnsupported; defm : X86WriteResPairUnsupported; -defm : AtomWriteResPair; -defm : AtomWriteResPair; +defm : AtomWriteResPair; +defm : AtomWriteResPair; defm : X86WriteResPairUnsupported; defm : X86WriteResPairUnsupported; -defm : AtomWriteResPair; -defm : AtomWriteResPair; +defm : AtomWriteResPair; +defm : AtomWriteResPair; defm : X86WriteResPairUnsupported; defm : X86WriteResPairUnsupported; -defm : AtomWriteResPair; +defm : AtomWriteResPair; defm : AtomWriteResPair; defm : AtomWriteResPair; defm : X86WriteResPairUnsupported; @@ -478,11 +478,11 @@ defm : X86WriteResPairUnsupported; // Horizontal add/sub instructions. //////////////////////////////////////////////////////////////////////////////// -defm : AtomWriteResPair; -defm : AtomWriteResPair; -defm : AtomWriteResPair; -defm : AtomWriteResPair; -defm : AtomWriteResPair; +defm : AtomWriteResPair; +defm : X86WriteResPairUnsupported; +defm : AtomWriteResPair; +defm : AtomWriteResPair; +defm : X86WriteResPairUnsupported; //////////////////////////////////////////////////////////////////////////////// // Carry-less multiplication instructions. @@ -494,8 +494,8 @@ defm : X86WriteResPairUnsupported; // Load/store MXCSR. //////////////////////////////////////////////////////////////////////////////// -def : WriteRes { let Latency = 5; let ResourceCycles = [5]; } -def : WriteRes { let Latency = 15; let ResourceCycles = [15]; } +defm : X86WriteRes; +defm : X86WriteRes; //////////////////////////////////////////////////////////////////////////////// // Special Cases. diff --git a/llvm/test/tools/llvm-mca/X86/Atom/resources-sse1.s b/llvm/test/tools/llvm-mca/X86/Atom/resources-sse1.s index a75c2bb..a59deae 100644 --- a/llvm/test/tools/llvm-mca/X86/Atom/resources-sse1.s +++ b/llvm/test/tools/llvm-mca/X86/Atom/resources-sse1.s @@ -202,8 +202,8 @@ xorps (%rax), %xmm2 # CHECK-NEXT: 1 1 1.00 * andnps (%rax), %xmm2 # CHECK-NEXT: 1 1 0.50 andps %xmm0, %xmm2 # CHECK-NEXT: 1 1 1.00 * andps (%rax), %xmm2 -# CHECK-NEXT: 1 6 5.00 cmpeqps %xmm0, %xmm2 -# CHECK-NEXT: 1 7 6.00 * cmpeqps (%rax), %xmm2 +# CHECK-NEXT: 3 6 5.00 cmpeqps %xmm0, %xmm2 +# CHECK-NEXT: 4 7 6.00 * cmpeqps (%rax), %xmm2 # CHECK-NEXT: 1 5 1.00 cmpeqss %xmm0, %xmm2 # CHECK-NEXT: 1 5 1.00 * cmpeqss (%rax), %xmm2 # CHECK-NEXT: 4 9 9.00 comiss %xmm0, %xmm1 @@ -226,18 +226,18 @@ xorps (%rax), %xmm2 # CHECK-NEXT: 1 9 4.50 cvttss2si %xmm0, %rcx # CHECK-NEXT: 1 9 6.00 * cvttss2si (%rax), %ecx # CHECK-NEXT: 1 10 5.00 * cvttss2si (%rax), %rcx -# CHECK-NEXT: 1 70 35.00 divps %xmm0, %xmm2 -# CHECK-NEXT: 1 70 35.00 * divps (%rax), %xmm2 -# CHECK-NEXT: 1 34 17.00 divss %xmm0, %xmm2 -# CHECK-NEXT: 1 34 17.00 * divss (%rax), %xmm2 -# CHECK-NEXT: 1 5 2.50 * * U ldmxcsr (%rax) +# CHECK-NEXT: 6 70 70.00 divps %xmm0, %xmm2 +# CHECK-NEXT: 7 70 70.00 * divps (%rax), %xmm2 +# CHECK-NEXT: 3 34 34.00 divss %xmm0, %xmm2 +# CHECK-NEXT: 4 34 34.00 * divss (%rax), %xmm2 +# CHECK-NEXT: 4 5 5.00 * * U ldmxcsr (%rax) # CHECK-NEXT: 1 1 1.00 * * U maskmovq %mm0, %mm1 -# CHECK-NEXT: 1 6 5.00 maxps %xmm0, %xmm2 -# CHECK-NEXT: 1 7 6.00 * maxps (%rax), %xmm2 +# CHECK-NEXT: 3 6 5.00 maxps %xmm0, %xmm2 +# CHECK-NEXT: 4 7 6.00 * maxps (%rax), %xmm2 # CHECK-NEXT: 1 5 1.00 maxss %xmm0, %xmm2 # CHECK-NEXT: 1 5 1.00 * maxss (%rax), %xmm2 -# CHECK-NEXT: 1 6 5.00 minps %xmm0, %xmm2 -# CHECK-NEXT: 1 7 6.00 * minps (%rax), %xmm2 +# CHECK-NEXT: 3 6 5.00 minps %xmm0, %xmm2 +# CHECK-NEXT: 4 7 6.00 * minps (%rax), %xmm2 # CHECK-NEXT: 1 5 1.00 minss %xmm0, %xmm2 # CHECK-NEXT: 1 5 1.00 * minss (%rax), %xmm2 # CHECK-NEXT: 1 1 0.50 movaps %xmm0, %xmm2 @@ -290,22 +290,22 @@ xorps (%rax), %xmm2 # CHECK-NEXT: 1 4 1.00 * psadbw (%rax), %mm2 # CHECK-NEXT: 1 1 1.00 pshufw $1, %mm0, %mm2 # CHECK-NEXT: 1 1 1.00 * pshufw $1, (%rax), %mm2 -# CHECK-NEXT: 1 9 4.50 rcpps %xmm0, %xmm2 -# CHECK-NEXT: 1 10 5.00 * rcpps (%rax), %xmm2 +# CHECK-NEXT: 5 9 9.00 rcpps %xmm0, %xmm2 +# CHECK-NEXT: 6 10 10.00 * rcpps (%rax), %xmm2 # CHECK-NEXT: 1 4 4.00 rcpss %xmm0, %xmm2 # CHECK-NEXT: 1 4 4.00 * rcpss (%rax), %xmm2 -# CHECK-NEXT: 1 9 4.50 rsqrtps %xmm0, %xmm2 -# CHECK-NEXT: 1 10 5.00 * rsqrtps (%rax), %xmm2 +# CHECK-NEXT: 5 9 9.00 rsqrtps %xmm0, %xmm2 +# CHECK-NEXT: 6 10 10.00 * rsqrtps (%rax), %xmm2 # CHECK-NEXT: 1 4 4.00 rsqrtss %xmm0, %xmm2 # CHECK-NEXT: 1 4 4.00 * rsqrtss (%rax), %xmm2 # CHECK-NEXT: 1 1 1.00 * * U sfence # CHECK-NEXT: 1 1 1.00 shufps $1, %xmm0, %xmm2 # CHECK-NEXT: 1 1 1.00 * shufps $1, (%rax), %xmm2 -# CHECK-NEXT: 1 70 35.00 sqrtps %xmm0, %xmm2 -# CHECK-NEXT: 1 70 35.00 * sqrtps (%rax), %xmm2 -# CHECK-NEXT: 1 34 17.00 sqrtss %xmm0, %xmm2 -# CHECK-NEXT: 1 34 17.00 * sqrtss (%rax), %xmm2 -# CHECK-NEXT: 1 15 7.50 * U stmxcsr (%rax) +# CHECK-NEXT: 5 70 70.00 sqrtps %xmm0, %xmm2 +# CHECK-NEXT: 6 70 70.00 * sqrtps (%rax), %xmm2 +# CHECK-NEXT: 3 34 34.00 sqrtss %xmm0, %xmm2 +# CHECK-NEXT: 4 34 34.00 * sqrtss (%rax), %xmm2 +# CHECK-NEXT: 4 15 15.00 * U stmxcsr (%rax) # CHECK-NEXT: 1 5 1.00 subps %xmm0, %xmm2 # CHECK-NEXT: 1 5 1.00 * subps (%rax), %xmm2 # CHECK-NEXT: 1 5 1.00 subss %xmm0, %xmm2 @@ -325,7 +325,7 @@ xorps (%rax), %xmm2 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] -# CHECK-NEXT: 475.00 430.00 +# CHECK-NEXT: 712.00 667.00 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] Instructions: @@ -361,11 +361,11 @@ xorps (%rax), %xmm2 # CHECK-NEXT: 4.50 4.50 cvttss2si %xmm0, %rcx # CHECK-NEXT: 6.00 6.00 cvttss2si (%rax), %ecx # CHECK-NEXT: 5.00 5.00 cvttss2si (%rax), %rcx -# CHECK-NEXT: 35.00 35.00 divps %xmm0, %xmm2 -# CHECK-NEXT: 35.00 35.00 divps (%rax), %xmm2 -# CHECK-NEXT: 17.00 17.00 divss %xmm0, %xmm2 -# CHECK-NEXT: 17.00 17.00 divss (%rax), %xmm2 -# CHECK-NEXT: 2.50 2.50 ldmxcsr (%rax) +# CHECK-NEXT: 70.00 70.00 divps %xmm0, %xmm2 +# CHECK-NEXT: 70.00 70.00 divps (%rax), %xmm2 +# CHECK-NEXT: 34.00 34.00 divss %xmm0, %xmm2 +# CHECK-NEXT: 34.00 34.00 divss (%rax), %xmm2 +# CHECK-NEXT: 5.00 5.00 ldmxcsr (%rax) # CHECK-NEXT: 1.00 - maskmovq %mm0, %mm1 # CHECK-NEXT: 5.00 5.00 maxps %xmm0, %xmm2 # CHECK-NEXT: 6.00 6.00 maxps (%rax), %xmm2 @@ -425,22 +425,22 @@ xorps (%rax), %xmm2 # CHECK-NEXT: 1.00 - psadbw (%rax), %mm2 # CHECK-NEXT: 1.00 - pshufw $1, %mm0, %mm2 # CHECK-NEXT: 1.00 - pshufw $1, (%rax), %mm2 -# CHECK-NEXT: 4.50 4.50 rcpps %xmm0, %xmm2 -# CHECK-NEXT: 5.00 5.00 rcpps (%rax), %xmm2 +# CHECK-NEXT: 9.00 9.00 rcpps %xmm0, %xmm2 +# CHECK-NEXT: 10.00 10.00 rcpps (%rax), %xmm2 # CHECK-NEXT: 4.00 - rcpss %xmm0, %xmm2 # CHECK-NEXT: 4.00 - rcpss (%rax), %xmm2 -# CHECK-NEXT: 4.50 4.50 rsqrtps %xmm0, %xmm2 -# CHECK-NEXT: 5.00 5.00 rsqrtps (%rax), %xmm2 +# CHECK-NEXT: 9.00 9.00 rsqrtps %xmm0, %xmm2 +# CHECK-NEXT: 10.00 10.00 rsqrtps (%rax), %xmm2 # CHECK-NEXT: 4.00 - rsqrtss %xmm0, %xmm2 # CHECK-NEXT: 4.00 - rsqrtss (%rax), %xmm2 # CHECK-NEXT: 1.00 - sfence # CHECK-NEXT: 1.00 - shufps $1, %xmm0, %xmm2 # CHECK-NEXT: 1.00 - shufps $1, (%rax), %xmm2 -# CHECK-NEXT: 35.00 35.00 sqrtps %xmm0, %xmm2 -# CHECK-NEXT: 35.00 35.00 sqrtps (%rax), %xmm2 -# CHECK-NEXT: 17.00 17.00 sqrtss %xmm0, %xmm2 -# CHECK-NEXT: 17.00 17.00 sqrtss (%rax), %xmm2 -# CHECK-NEXT: 7.50 7.50 stmxcsr (%rax) +# CHECK-NEXT: 70.00 70.00 sqrtps %xmm0, %xmm2 +# CHECK-NEXT: 70.00 70.00 sqrtps (%rax), %xmm2 +# CHECK-NEXT: 34.00 34.00 sqrtss %xmm0, %xmm2 +# CHECK-NEXT: 34.00 34.00 sqrtss (%rax), %xmm2 +# CHECK-NEXT: 15.00 15.00 stmxcsr (%rax) # CHECK-NEXT: - 1.00 subps %xmm0, %xmm2 # CHECK-NEXT: 1.00 1.00 subps (%rax), %xmm2 # CHECK-NEXT: - 1.00 subss %xmm0, %xmm2 diff --git a/llvm/test/tools/llvm-mca/X86/Atom/resources-sse2.s b/llvm/test/tools/llvm-mca/X86/Atom/resources-sse2.s index 525ba77c..36b4c75 100644 --- a/llvm/test/tools/llvm-mca/X86/Atom/resources-sse2.s +++ b/llvm/test/tools/llvm-mca/X86/Atom/resources-sse2.s @@ -407,8 +407,8 @@ xorpd (%rax), %xmm2 # CHECK-NEXT: [6]: HasSideEffects (U) # CHECK: [1] [2] [3] [4] [5] [6] Instructions: -# CHECK-NEXT: 1 6 5.00 addpd %xmm0, %xmm2 -# CHECK-NEXT: 1 7 6.00 * addpd (%rax), %xmm2 +# CHECK-NEXT: 3 6 5.00 addpd %xmm0, %xmm2 +# CHECK-NEXT: 4 7 6.00 * addpd (%rax), %xmm2 # CHECK-NEXT: 1 5 1.00 addsd %xmm0, %xmm2 # CHECK-NEXT: 1 5 1.00 * addsd (%rax), %xmm2 # CHECK-NEXT: 1 1 0.50 andnpd %xmm0, %xmm2 @@ -416,8 +416,8 @@ xorpd (%rax), %xmm2 # CHECK-NEXT: 1 1 0.50 andpd %xmm0, %xmm2 # CHECK-NEXT: 1 1 1.00 * andpd (%rax), %xmm2 # CHECK-NEXT: 1 1 1.00 * * U clflush (%rax) -# CHECK-NEXT: 1 6 5.00 cmpeqpd %xmm0, %xmm2 -# CHECK-NEXT: 1 7 6.00 * cmpeqpd (%rax), %xmm2 +# CHECK-NEXT: 3 6 5.00 cmpeqpd %xmm0, %xmm2 +# CHECK-NEXT: 4 7 6.00 * cmpeqpd (%rax), %xmm2 # CHECK-NEXT: 1 5 1.00 cmpeqsd %xmm0, %xmm2 # CHECK-NEXT: 1 5 1.00 * cmpeqsd (%rax), %xmm2 # CHECK-NEXT: 4 9 9.00 comisd %xmm0, %xmm1 @@ -460,19 +460,19 @@ xorpd (%rax), %xmm2 # CHECK-NEXT: 1 8 7.00 cvttsd2si %xmm0, %rcx # CHECK-NEXT: 1 9 6.00 * cvttsd2si (%rax), %ecx # CHECK-NEXT: 1 9 6.00 * cvttsd2si (%rax), %rcx -# CHECK-NEXT: 1 125 62.50 divpd %xmm0, %xmm2 -# CHECK-NEXT: 1 125 62.50 * divpd (%rax), %xmm2 -# CHECK-NEXT: 1 62 31.00 divsd %xmm0, %xmm2 -# CHECK-NEXT: 1 62 31.00 * divsd (%rax), %xmm2 +# CHECK-NEXT: 6 125 125.00 divpd %xmm0, %xmm2 +# CHECK-NEXT: 7 125 125.00 * divpd (%rax), %xmm2 +# CHECK-NEXT: 3 62 62.00 divsd %xmm0, %xmm2 +# CHECK-NEXT: 4 62 62.00 * divsd (%rax), %xmm2 # CHECK-NEXT: 1 1 0.50 * * U lfence # CHECK-NEXT: 1 2 1.00 * * U maskmovdqu %xmm0, %xmm1 -# CHECK-NEXT: 1 6 5.00 maxpd %xmm0, %xmm2 -# CHECK-NEXT: 1 7 6.00 * maxpd (%rax), %xmm2 +# CHECK-NEXT: 3 6 5.00 maxpd %xmm0, %xmm2 +# CHECK-NEXT: 4 7 6.00 * maxpd (%rax), %xmm2 # CHECK-NEXT: 1 5 1.00 maxsd %xmm0, %xmm2 # CHECK-NEXT: 1 5 1.00 * maxsd (%rax), %xmm2 # CHECK-NEXT: 1 1 1.00 * * U mfence -# CHECK-NEXT: 1 6 5.00 minpd %xmm0, %xmm2 -# CHECK-NEXT: 1 7 6.00 * minpd (%rax), %xmm2 +# CHECK-NEXT: 3 6 5.00 minpd %xmm0, %xmm2 +# CHECK-NEXT: 4 7 6.00 * minpd (%rax), %xmm2 # CHECK-NEXT: 1 5 1.00 minsd %xmm0, %xmm2 # CHECK-NEXT: 1 5 1.00 * minsd (%rax), %xmm2 # CHECK-NEXT: 1 1 0.50 movapd %xmm0, %xmm2 @@ -510,8 +510,8 @@ xorpd (%rax), %xmm2 # CHECK-NEXT: 1 1 0.50 movupd %xmm0, %xmm2 # CHECK-NEXT: 1 2 1.00 * movupd %xmm0, (%rax) # CHECK-NEXT: 1 3 1.50 * movupd (%rax), %xmm2 -# CHECK-NEXT: 1 9 9.00 mulpd %xmm0, %xmm2 -# CHECK-NEXT: 1 10 10.00 * mulpd (%rax), %xmm2 +# CHECK-NEXT: 6 9 9.00 mulpd %xmm0, %xmm2 +# CHECK-NEXT: 7 10 10.00 * mulpd (%rax), %xmm2 # CHECK-NEXT: 1 5 2.00 mulsd %xmm0, %xmm2 # CHECK-NEXT: 1 5 2.00 * mulsd (%rax), %xmm2 # CHECK-NEXT: 1 1 0.50 orpd %xmm0, %xmm2 @@ -658,12 +658,12 @@ xorpd (%rax), %xmm2 # CHECK-NEXT: 1 1 1.00 * pxor (%rax), %xmm2 # CHECK-NEXT: 1 1 1.00 shufpd $1, %xmm0, %xmm2 # CHECK-NEXT: 1 1 1.00 * shufpd $1, (%rax), %xmm2 -# CHECK-NEXT: 1 125 62.50 sqrtpd %xmm0, %xmm2 -# CHECK-NEXT: 1 125 62.50 * sqrtpd (%rax), %xmm2 -# CHECK-NEXT: 1 62 31.00 sqrtsd %xmm0, %xmm2 -# CHECK-NEXT: 1 62 31.00 * sqrtsd (%rax), %xmm2 -# CHECK-NEXT: 1 6 5.00 subpd %xmm0, %xmm2 -# CHECK-NEXT: 1 7 6.00 * subpd (%rax), %xmm2 +# CHECK-NEXT: 5 125 125.00 sqrtpd %xmm0, %xmm2 +# CHECK-NEXT: 6 125 125.00 * sqrtpd (%rax), %xmm2 +# CHECK-NEXT: 3 62 62.00 sqrtsd %xmm0, %xmm2 +# CHECK-NEXT: 4 62 62.00 * sqrtsd (%rax), %xmm2 +# CHECK-NEXT: 3 6 5.00 subpd %xmm0, %xmm2 +# CHECK-NEXT: 4 7 6.00 * subpd (%rax), %xmm2 # CHECK-NEXT: 1 5 1.00 subsd %xmm0, %xmm2 # CHECK-NEXT: 1 5 1.00 * subsd (%rax), %xmm2 # CHECK-NEXT: 4 9 9.00 ucomisd %xmm0, %xmm1 @@ -681,7 +681,7 @@ xorpd (%rax), %xmm2 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] -# CHECK-NEXT: 932.00 792.00 +# CHECK-NEXT: 1306.00 1166.00 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] Instructions: @@ -738,10 +738,10 @@ xorpd (%rax), %xmm2 # CHECK-NEXT: 7.00 7.00 cvttsd2si %xmm0, %rcx # CHECK-NEXT: 6.00 6.00 cvttsd2si (%rax), %ecx # CHECK-NEXT: 6.00 6.00 cvttsd2si (%rax), %rcx -# CHECK-NEXT: 62.50 62.50 divpd %xmm0, %xmm2 -# CHECK-NEXT: 62.50 62.50 divpd (%rax), %xmm2 -# CHECK-NEXT: 31.00 31.00 divsd %xmm0, %xmm2 -# CHECK-NEXT: 31.00 31.00 divsd (%rax), %xmm2 +# CHECK-NEXT: 125.00 125.00 divpd %xmm0, %xmm2 +# CHECK-NEXT: 125.00 125.00 divpd (%rax), %xmm2 +# CHECK-NEXT: 62.00 62.00 divsd %xmm0, %xmm2 +# CHECK-NEXT: 62.00 62.00 divsd (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 lfence # CHECK-NEXT: 1.00 1.00 maskmovdqu %xmm0, %xmm1 # CHECK-NEXT: 5.00 5.00 maxpd %xmm0, %xmm2 @@ -936,10 +936,10 @@ xorpd (%rax), %xmm2 # CHECK-NEXT: 1.00 - pxor (%rax), %xmm2 # CHECK-NEXT: 1.00 - shufpd $1, %xmm0, %xmm2 # CHECK-NEXT: 1.00 - shufpd $1, (%rax), %xmm2 -# CHECK-NEXT: 62.50 62.50 sqrtpd %xmm0, %xmm2 -# CHECK-NEXT: 62.50 62.50 sqrtpd (%rax), %xmm2 -# CHECK-NEXT: 31.00 31.00 sqrtsd %xmm0, %xmm2 -# CHECK-NEXT: 31.00 31.00 sqrtsd (%rax), %xmm2 +# CHECK-NEXT: 125.00 125.00 sqrtpd %xmm0, %xmm2 +# CHECK-NEXT: 125.00 125.00 sqrtpd (%rax), %xmm2 +# CHECK-NEXT: 62.00 62.00 sqrtsd %xmm0, %xmm2 +# CHECK-NEXT: 62.00 62.00 sqrtsd (%rax), %xmm2 # CHECK-NEXT: 5.00 5.00 subpd %xmm0, %xmm2 # CHECK-NEXT: 6.00 6.00 subpd (%rax), %xmm2 # CHECK-NEXT: - 1.00 subsd %xmm0, %xmm2 diff --git a/llvm/test/tools/llvm-mca/X86/Atom/resources-sse3.s b/llvm/test/tools/llvm-mca/X86/Atom/resources-sse3.s index 65fe282..0519fe9 100644 --- a/llvm/test/tools/llvm-mca/X86/Atom/resources-sse3.s +++ b/llvm/test/tools/llvm-mca/X86/Atom/resources-sse3.s @@ -43,18 +43,18 @@ mwait # CHECK-NEXT: [6]: HasSideEffects (U) # CHECK: [1] [2] [3] [4] [5] [6] Instructions: -# CHECK-NEXT: 1 6 5.00 addsubpd %xmm0, %xmm2 -# CHECK-NEXT: 1 7 6.00 * addsubpd (%rax), %xmm2 +# CHECK-NEXT: 3 6 5.00 addsubpd %xmm0, %xmm2 +# CHECK-NEXT: 4 7 6.00 * addsubpd (%rax), %xmm2 # CHECK-NEXT: 1 5 1.00 addsubps %xmm0, %xmm2 # CHECK-NEXT: 1 5 1.00 * addsubps (%rax), %xmm2 -# CHECK-NEXT: 1 8 4.00 haddpd %xmm0, %xmm2 -# CHECK-NEXT: 1 9 4.50 * haddpd (%rax), %xmm2 -# CHECK-NEXT: 1 8 4.00 haddps %xmm0, %xmm2 -# CHECK-NEXT: 1 9 4.50 * haddps (%rax), %xmm2 -# CHECK-NEXT: 1 8 4.00 hsubpd %xmm0, %xmm2 -# CHECK-NEXT: 1 9 4.50 * hsubpd (%rax), %xmm2 -# CHECK-NEXT: 1 8 4.00 hsubps %xmm0, %xmm2 -# CHECK-NEXT: 1 9 4.50 * hsubps (%rax), %xmm2 +# CHECK-NEXT: 5 8 8.00 haddpd %xmm0, %xmm2 +# CHECK-NEXT: 6 9 9.00 * haddpd (%rax), %xmm2 +# CHECK-NEXT: 5 8 8.00 haddps %xmm0, %xmm2 +# CHECK-NEXT: 6 9 9.00 * haddps (%rax), %xmm2 +# CHECK-NEXT: 5 8 8.00 hsubpd %xmm0, %xmm2 +# CHECK-NEXT: 6 9 9.00 * hsubpd (%rax), %xmm2 +# CHECK-NEXT: 5 8 8.00 hsubps %xmm0, %xmm2 +# CHECK-NEXT: 6 9 9.00 * hsubps (%rax), %xmm2 # CHECK-NEXT: 1 3 1.50 * lddqu (%rax), %xmm2 # CHECK-NEXT: 1 45 22.50 U monitor # CHECK-NEXT: 1 1 1.00 movddup %xmm0, %xmm2 @@ -71,7 +71,7 @@ mwait # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] -# CHECK-NEXT: 99.00 94.00 +# CHECK-NEXT: 133.00 128.00 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] Instructions: @@ -79,14 +79,14 @@ mwait # CHECK-NEXT: 6.00 6.00 addsubpd (%rax), %xmm2 # CHECK-NEXT: - 1.00 addsubps %xmm0, %xmm2 # CHECK-NEXT: 1.00 1.00 addsubps (%rax), %xmm2 -# CHECK-NEXT: 4.00 4.00 haddpd %xmm0, %xmm2 -# CHECK-NEXT: 4.50 4.50 haddpd (%rax), %xmm2 -# CHECK-NEXT: 4.00 4.00 haddps %xmm0, %xmm2 -# CHECK-NEXT: 4.50 4.50 haddps (%rax), %xmm2 -# CHECK-NEXT: 4.00 4.00 hsubpd %xmm0, %xmm2 -# CHECK-NEXT: 4.50 4.50 hsubpd (%rax), %xmm2 -# CHECK-NEXT: 4.00 4.00 hsubps %xmm0, %xmm2 -# CHECK-NEXT: 4.50 4.50 hsubps (%rax), %xmm2 +# CHECK-NEXT: 8.00 8.00 haddpd %xmm0, %xmm2 +# CHECK-NEXT: 9.00 9.00 haddpd (%rax), %xmm2 +# CHECK-NEXT: 8.00 8.00 haddps %xmm0, %xmm2 +# CHECK-NEXT: 9.00 9.00 haddps (%rax), %xmm2 +# CHECK-NEXT: 8.00 8.00 hsubpd %xmm0, %xmm2 +# CHECK-NEXT: 9.00 9.00 hsubpd (%rax), %xmm2 +# CHECK-NEXT: 8.00 8.00 hsubps %xmm0, %xmm2 +# CHECK-NEXT: 9.00 9.00 hsubps (%rax), %xmm2 # CHECK-NEXT: 1.50 1.50 lddqu (%rax), %xmm2 # CHECK-NEXT: 22.50 22.50 monitor # CHECK-NEXT: 1.00 - movddup %xmm0, %xmm2 diff --git a/llvm/test/tools/llvm-mca/X86/Atom/resources-ssse3.s b/llvm/test/tools/llvm-mca/X86/Atom/resources-ssse3.s index b04be1a..905d755 100644 --- a/llvm/test/tools/llvm-mca/X86/Atom/resources-ssse3.s +++ b/llvm/test/tools/llvm-mca/X86/Atom/resources-ssse3.s @@ -122,30 +122,30 @@ psignw (%rax), %xmm2 # CHECK-NEXT: 1 1 1.00 * palignr $1, (%rax), %mm2 # CHECK-NEXT: 1 1 1.00 palignr $1, %xmm0, %xmm2 # CHECK-NEXT: 1 1 1.00 * palignr $1, (%rax), %xmm2 -# CHECK-NEXT: 1 3 1.50 phaddd %mm0, %mm2 -# CHECK-NEXT: 1 4 2.00 * phaddd (%rax), %mm2 +# CHECK-NEXT: 3 3 3.00 phaddd %mm0, %mm2 +# CHECK-NEXT: 4 4 4.00 * phaddd (%rax), %mm2 # CHECK-NEXT: 1 3 1.50 phaddd %xmm0, %xmm2 # CHECK-NEXT: 1 4 2.00 * phaddd (%rax), %xmm2 # CHECK-NEXT: 1 5 2.50 phaddsw %mm0, %mm2 # CHECK-NEXT: 1 6 3.00 * phaddsw (%rax), %mm2 -# CHECK-NEXT: 1 7 3.50 phaddsw %xmm0, %xmm2 -# CHECK-NEXT: 1 8 4.00 * phaddsw (%rax), %xmm2 +# CHECK-NEXT: 3 7 7.00 phaddsw %xmm0, %xmm2 +# CHECK-NEXT: 4 8 8.00 * phaddsw (%rax), %xmm2 # CHECK-NEXT: 1 5 2.50 phaddw %mm0, %mm2 # CHECK-NEXT: 1 6 3.00 * phaddw (%rax), %mm2 -# CHECK-NEXT: 1 7 3.50 phaddw %xmm0, %xmm2 -# CHECK-NEXT: 1 8 4.00 * phaddw (%rax), %xmm2 -# CHECK-NEXT: 1 3 1.50 phsubd %mm0, %mm2 -# CHECK-NEXT: 1 4 2.00 * phsubd (%rax), %mm2 +# CHECK-NEXT: 3 7 7.00 phaddw %xmm0, %xmm2 +# CHECK-NEXT: 4 8 8.00 * phaddw (%rax), %xmm2 +# CHECK-NEXT: 3 3 3.00 phsubd %mm0, %mm2 +# CHECK-NEXT: 4 4 4.00 * phsubd (%rax), %mm2 # CHECK-NEXT: 1 3 1.50 phsubd %xmm0, %xmm2 # CHECK-NEXT: 1 4 2.00 * phsubd (%rax), %xmm2 # CHECK-NEXT: 1 5 2.50 phsubsw %mm0, %mm2 # CHECK-NEXT: 1 6 3.00 * phsubsw (%rax), %mm2 -# CHECK-NEXT: 1 7 3.50 phsubsw %xmm0, %xmm2 -# CHECK-NEXT: 1 8 4.00 * phsubsw (%rax), %xmm2 +# CHECK-NEXT: 3 7 7.00 phsubsw %xmm0, %xmm2 +# CHECK-NEXT: 4 8 8.00 * phsubsw (%rax), %xmm2 # CHECK-NEXT: 1 5 2.50 phsubw %mm0, %mm2 # CHECK-NEXT: 1 6 3.00 * phsubw (%rax), %mm2 -# CHECK-NEXT: 1 7 3.50 phsubw %xmm0, %xmm2 -# CHECK-NEXT: 1 8 4.00 * phsubw (%rax), %xmm2 +# CHECK-NEXT: 3 7 7.00 phsubw %xmm0, %xmm2 +# CHECK-NEXT: 4 8 8.00 * phsubw (%rax), %xmm2 # CHECK-NEXT: 1 4 1.00 pmaddubsw %mm0, %mm2 # CHECK-NEXT: 1 4 1.00 * pmaddubsw (%rax), %mm2 # CHECK-NEXT: 1 5 2.00 pmaddubsw %xmm0, %xmm2 @@ -177,7 +177,7 @@ psignw (%rax), %xmm2 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] -# CHECK-NEXT: 109.00 79.00 +# CHECK-NEXT: 146.00 116.00 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] Instructions: @@ -197,30 +197,30 @@ psignw (%rax), %xmm2 # CHECK-NEXT: 1.00 - palignr $1, (%rax), %mm2 # CHECK-NEXT: 1.00 - palignr $1, %xmm0, %xmm2 # CHECK-NEXT: 1.00 - palignr $1, (%rax), %xmm2 -# CHECK-NEXT: 1.50 1.50 phaddd %mm0, %mm2 -# CHECK-NEXT: 2.00 2.00 phaddd (%rax), %mm2 +# CHECK-NEXT: 3.00 3.00 phaddd %mm0, %mm2 +# CHECK-NEXT: 4.00 4.00 phaddd (%rax), %mm2 # CHECK-NEXT: 1.50 1.50 phaddd %xmm0, %xmm2 # CHECK-NEXT: 2.00 2.00 phaddd (%rax), %xmm2 # CHECK-NEXT: 2.50 2.50 phaddsw %mm0, %mm2 # CHECK-NEXT: 3.00 3.00 phaddsw (%rax), %mm2 -# CHECK-NEXT: 3.50 3.50 phaddsw %xmm0, %xmm2 -# CHECK-NEXT: 4.00 4.00 phaddsw (%rax), %xmm2 +# CHECK-NEXT: 7.00 7.00 phaddsw %xmm0, %xmm2 +# CHECK-NEXT: 8.00 8.00 phaddsw (%rax), %xmm2 # CHECK-NEXT: 2.50 2.50 phaddw %mm0, %mm2 # CHECK-NEXT: 3.00 3.00 phaddw (%rax), %mm2 -# CHECK-NEXT: 3.50 3.50 phaddw %xmm0, %xmm2 -# CHECK-NEXT: 4.00 4.00 phaddw (%rax), %xmm2 -# CHECK-NEXT: 1.50 1.50 phsubd %mm0, %mm2 -# CHECK-NEXT: 2.00 2.00 phsubd (%rax), %mm2 +# CHECK-NEXT: 7.00 7.00 phaddw %xmm0, %xmm2 +# CHECK-NEXT: 8.00 8.00 phaddw (%rax), %xmm2 +# CHECK-NEXT: 3.00 3.00 phsubd %mm0, %mm2 +# CHECK-NEXT: 4.00 4.00 phsubd (%rax), %mm2 # CHECK-NEXT: 1.50 1.50 phsubd %xmm0, %xmm2 # CHECK-NEXT: 2.00 2.00 phsubd (%rax), %xmm2 # CHECK-NEXT: 2.50 2.50 phsubsw %mm0, %mm2 # CHECK-NEXT: 3.00 3.00 phsubsw (%rax), %mm2 -# CHECK-NEXT: 3.50 3.50 phsubsw %xmm0, %xmm2 -# CHECK-NEXT: 4.00 4.00 phsubsw (%rax), %xmm2 +# CHECK-NEXT: 7.00 7.00 phsubsw %xmm0, %xmm2 +# CHECK-NEXT: 8.00 8.00 phsubsw (%rax), %xmm2 # CHECK-NEXT: 2.50 2.50 phsubw %mm0, %mm2 # CHECK-NEXT: 3.00 3.00 phsubw (%rax), %mm2 -# CHECK-NEXT: 3.50 3.50 phsubw %xmm0, %xmm2 -# CHECK-NEXT: 4.00 4.00 phsubw (%rax), %xmm2 +# CHECK-NEXT: 7.00 7.00 phsubw %xmm0, %xmm2 +# CHECK-NEXT: 8.00 8.00 phsubw (%rax), %xmm2 # CHECK-NEXT: 1.00 - pmaddubsw %mm0, %mm2 # CHECK-NEXT: 1.00 - pmaddubsw (%rax), %mm2 # CHECK-NEXT: 2.00 - pmaddubsw %xmm0, %xmm2 diff --git a/llvm/test/tools/llvm-mca/X86/Atom/resources-x87.s b/llvm/test/tools/llvm-mca/X86/Atom/resources-x87.s index 9952afd..07e8531 100644 --- a/llvm/test/tools/llvm-mca/X86/Atom/resources-x87.s +++ b/llvm/test/tools/llvm-mca/X86/Atom/resources-x87.s @@ -241,22 +241,22 @@ fyl2xp1 # CHECK-NEXT: 1 9 4.50 U fcompi %st(3), %st # CHECK-NEXT: 1 174 87.00 U fcos # CHECK-NEXT: 1 1 0.50 U fdecstp -# CHECK-NEXT: 1 34 17.00 U fdiv %st, %st(1) -# CHECK-NEXT: 1 34 17.00 U fdiv %st(2), %st -# CHECK-NEXT: 1 34 17.00 * U fdivs (%ecx) -# CHECK-NEXT: 1 34 17.00 * U fdivl (%eax) -# CHECK-NEXT: 1 34 17.00 U fdivp %st, %st(1) -# CHECK-NEXT: 1 34 17.00 U fdivp %st, %st(2) -# CHECK-NEXT: 1 34 17.00 * U fidivs (%ecx) -# CHECK-NEXT: 1 34 17.00 * U fidivl (%eax) -# CHECK-NEXT: 1 34 17.00 U fdivr %st, %st(1) -# CHECK-NEXT: 1 34 17.00 U fdivr %st(2), %st -# CHECK-NEXT: 1 34 17.00 * U fdivrs (%ecx) -# CHECK-NEXT: 1 34 17.00 * U fdivrl (%eax) -# CHECK-NEXT: 1 34 17.00 U fdivrp %st, %st(1) -# CHECK-NEXT: 1 34 17.00 U fdivrp %st, %st(2) -# CHECK-NEXT: 1 34 17.00 * U fidivrs (%ecx) -# CHECK-NEXT: 1 34 17.00 * U fidivrl (%eax) +# CHECK-NEXT: 3 34 34.00 U fdiv %st, %st(1) +# CHECK-NEXT: 3 34 34.00 U fdiv %st(2), %st +# CHECK-NEXT: 4 34 34.00 * U fdivs (%ecx) +# CHECK-NEXT: 4 34 34.00 * U fdivl (%eax) +# CHECK-NEXT: 3 34 34.00 U fdivp %st, %st(1) +# CHECK-NEXT: 3 34 34.00 U fdivp %st, %st(2) +# CHECK-NEXT: 4 34 34.00 * U fidivs (%ecx) +# CHECK-NEXT: 4 34 34.00 * U fidivl (%eax) +# CHECK-NEXT: 3 34 34.00 U fdivr %st, %st(1) +# CHECK-NEXT: 3 34 34.00 U fdivr %st(2), %st +# CHECK-NEXT: 4 34 34.00 * U fdivrs (%ecx) +# CHECK-NEXT: 4 34 34.00 * U fdivrl (%eax) +# CHECK-NEXT: 3 34 34.00 U fdivrp %st, %st(1) +# CHECK-NEXT: 3 34 34.00 U fdivrp %st, %st(2) +# CHECK-NEXT: 4 34 34.00 * U fidivrs (%ecx) +# CHECK-NEXT: 4 34 34.00 * U fidivrl (%eax) # CHECK-NEXT: 1 1 0.50 U ffree %st(0) # CHECK-NEXT: 1 5 5.00 * U ficoms (%ecx) # CHECK-NEXT: 1 5 5.00 * U ficoml (%eax) @@ -307,7 +307,7 @@ fyl2xp1 # CHECK-NEXT: 1 77 38.50 U fscale # CHECK-NEXT: 1 174 87.00 U fsin # CHECK-NEXT: 1 174 87.00 U fsincos -# CHECK-NEXT: 1 71 35.50 U fsqrt +# CHECK-NEXT: 1 71 71.00 U fsqrt # CHECK-NEXT: 1 2 1.00 U fst %st(0) # CHECK-NEXT: 1 2 1.00 * U fsts (%edx) # CHECK-NEXT: 1 2 1.00 * U fstl (%ecx) @@ -361,7 +361,7 @@ fyl2xp1 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] -# CHECK-NEXT: 1500.00 1440.00 +# CHECK-NEXT: 1807.50 1676.50 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] Instructions: @@ -400,22 +400,22 @@ fyl2xp1 # CHECK-NEXT: 4.50 4.50 fcompi %st(3), %st # CHECK-NEXT: 87.00 87.00 fcos # CHECK-NEXT: 0.50 0.50 fdecstp -# CHECK-NEXT: 17.00 17.00 fdiv %st, %st(1) -# CHECK-NEXT: 17.00 17.00 fdiv %st(2), %st -# CHECK-NEXT: 17.00 17.00 fdivs (%ecx) -# CHECK-NEXT: 17.00 17.00 fdivl (%eax) -# CHECK-NEXT: 17.00 17.00 fdivp %st, %st(1) -# CHECK-NEXT: 17.00 17.00 fdivp %st, %st(2) -# CHECK-NEXT: 17.00 17.00 fidivs (%ecx) -# CHECK-NEXT: 17.00 17.00 fidivl (%eax) -# CHECK-NEXT: 17.00 17.00 fdivr %st, %st(1) -# CHECK-NEXT: 17.00 17.00 fdivr %st(2), %st -# CHECK-NEXT: 17.00 17.00 fdivrs (%ecx) -# CHECK-NEXT: 17.00 17.00 fdivrl (%eax) -# CHECK-NEXT: 17.00 17.00 fdivrp %st, %st(1) -# CHECK-NEXT: 17.00 17.00 fdivrp %st, %st(2) -# CHECK-NEXT: 17.00 17.00 fidivrs (%ecx) -# CHECK-NEXT: 17.00 17.00 fidivrl (%eax) +# CHECK-NEXT: 34.00 34.00 fdiv %st, %st(1) +# CHECK-NEXT: 34.00 34.00 fdiv %st(2), %st +# CHECK-NEXT: 34.00 34.00 fdivs (%ecx) +# CHECK-NEXT: 34.00 34.00 fdivl (%eax) +# CHECK-NEXT: 34.00 34.00 fdivp %st, %st(1) +# CHECK-NEXT: 34.00 34.00 fdivp %st, %st(2) +# CHECK-NEXT: 34.00 34.00 fidivs (%ecx) +# CHECK-NEXT: 34.00 34.00 fidivl (%eax) +# CHECK-NEXT: 34.00 34.00 fdivr %st, %st(1) +# CHECK-NEXT: 34.00 34.00 fdivr %st(2), %st +# CHECK-NEXT: 34.00 34.00 fdivrs (%ecx) +# CHECK-NEXT: 34.00 34.00 fdivrl (%eax) +# CHECK-NEXT: 34.00 34.00 fdivrp %st, %st(1) +# CHECK-NEXT: 34.00 34.00 fdivrp %st, %st(2) +# CHECK-NEXT: 34.00 34.00 fidivrs (%ecx) +# CHECK-NEXT: 34.00 34.00 fidivrl (%eax) # CHECK-NEXT: 0.50 0.50 ffree %st(0) # CHECK-NEXT: 5.00 - ficoms (%ecx) # CHECK-NEXT: 5.00 - ficoml (%eax) @@ -466,7 +466,7 @@ fyl2xp1 # CHECK-NEXT: 38.50 38.50 fscale # CHECK-NEXT: 87.00 87.00 fsin # CHECK-NEXT: 87.00 87.00 fsincos -# CHECK-NEXT: 35.50 35.50 fsqrt +# CHECK-NEXT: 71.00 - fsqrt # CHECK-NEXT: 1.00 1.00 fst %st(0) # CHECK-NEXT: 1.00 1.00 fsts (%edx) # CHECK-NEXT: 1.00 1.00 fstl (%ecx) -- 2.7.4