From: Simon Pilgrim Date: Fri, 17 Sep 2021 10:51:46 +0000 (+0100) Subject: [X86][Atom] Fix integer shuffles uops, latency and throughput X-Git-Tag: upstream/15.0.7~31248 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=5ebe95e256737506f152395b5a3eb66cf448e969;p=platform%2Fupstream%2Fllvm.git [X86][Atom] Fix integer shuffles uops, latency and throughput The MMX pack/unpck shuffles don't need an override - they have the same behaviour as other shuffles (Port0 only). The SSE pslldq/psrldq shuffles don't need an override - they have the same behaviour as other shuffles (Port0 only). The SSE pshufb shuffles use 4uops (+1 load). Noticed the pslldq/psrldq issue while trying to improve reduction costs via the D103695 helper script, and fixed the others while reviewing. Confirmed with Intel AoM / Agner / InstLatX64. --- diff --git a/llvm/lib/Target/X86/X86ScheduleAtom.td b/llvm/lib/Target/X86/X86ScheduleAtom.td index 796baa2..e9020f5 100644 --- a/llvm/lib/Target/X86/X86ScheduleAtom.td +++ b/llvm/lib/Target/X86/X86ScheduleAtom.td @@ -424,7 +424,7 @@ defm : AtomWriteResPair; defm : X86WriteResPairUnsupported; defm : X86WriteResPairUnsupported; defm : AtomWriteResPair; -defm : AtomWriteResPair; +defm : AtomWriteResPair; defm : X86WriteResPairUnsupported; defm : X86WriteResPairUnsupported; defm : X86WriteResPairUnsupported; @@ -558,10 +558,7 @@ def AtomWrite01_1 : SchedWriteRes<[AtomPort01]> { def : InstRW<[AtomWrite01_1], (instrs FDECSTP, FFREE, FFREEP, FINCSTP, WAIT, LFENCE, STOSB, STOSL, STOSQ, STOSW, - MOVSSrr, MOVSSrr_REV, - PSLLDQri, PSRLDQri)>; -def : InstRW<[AtomWrite01_1], (instregex "MMX_PACK(SSDW|SSWB|USWB)irr", - "MMX_PUNPCKH(BW|DQ|WD)irr")>; + MOVSSrr, MOVSSrr_REV)>; def AtomWrite01_2 : SchedWriteRes<[AtomPort01]> { let Latency = 2; diff --git a/llvm/test/tools/llvm-mca/X86/Atom/resources-mmx.s b/llvm/test/tools/llvm-mca/X86/Atom/resources-mmx.s index 1bc1ab0..bc929e1 100644 --- a/llvm/test/tools/llvm-mca/X86/Atom/resources-mmx.s +++ b/llvm/test/tools/llvm-mca/X86/Atom/resources-mmx.s @@ -173,11 +173,11 @@ pxor (%rax), %mm2 # CHECK-NEXT: 1 1 1.00 * movq (%rax), %mm2 # CHECK-NEXT: 1 3 3.00 movq %mm0, %rcx # CHECK-NEXT: 1 1 1.00 * movq %mm0, (%rax) -# CHECK-NEXT: 1 1 0.50 packsswb %mm0, %mm2 +# CHECK-NEXT: 1 1 1.00 packsswb %mm0, %mm2 # CHECK-NEXT: 1 1 1.00 * packsswb (%rax), %mm2 -# CHECK-NEXT: 1 1 0.50 packssdw %mm0, %mm2 +# CHECK-NEXT: 1 1 1.00 packssdw %mm0, %mm2 # CHECK-NEXT: 1 1 1.00 * packssdw (%rax), %mm2 -# CHECK-NEXT: 1 1 0.50 packuswb %mm0, %mm2 +# CHECK-NEXT: 1 1 1.00 packuswb %mm0, %mm2 # CHECK-NEXT: 1 1 1.00 * packuswb (%rax), %mm2 # CHECK-NEXT: 1 1 0.50 paddb %mm0, %mm2 # CHECK-NEXT: 1 1 1.00 * paddb (%rax), %mm2 @@ -255,11 +255,11 @@ pxor (%rax), %mm2 # CHECK-NEXT: 1 1 1.00 * psubusw (%rax), %mm2 # CHECK-NEXT: 1 1 0.50 psubw %mm0, %mm2 # CHECK-NEXT: 1 1 1.00 * psubw (%rax), %mm2 -# CHECK-NEXT: 1 1 0.50 punpckhbw %mm0, %mm2 +# CHECK-NEXT: 1 1 1.00 punpckhbw %mm0, %mm2 # CHECK-NEXT: 1 1 1.00 * punpckhbw (%rax), %mm2 -# CHECK-NEXT: 1 1 0.50 punpckhdq %mm0, %mm2 +# CHECK-NEXT: 1 1 1.00 punpckhdq %mm0, %mm2 # CHECK-NEXT: 1 1 1.00 * punpckhdq (%rax), %mm2 -# CHECK-NEXT: 1 1 0.50 punpckhwd %mm0, %mm2 +# CHECK-NEXT: 1 1 1.00 punpckhwd %mm0, %mm2 # CHECK-NEXT: 1 1 1.00 * punpckhwd (%rax), %mm2 # CHECK-NEXT: 1 1 1.00 punpcklbw %mm0, %mm2 # CHECK-NEXT: 1 1 1.00 * punpcklbw (%rax), %mm2 @@ -276,7 +276,7 @@ pxor (%rax), %mm2 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] -# CHECK-NEXT: 103.50 41.50 +# CHECK-NEXT: 106.50 38.50 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] Instructions: @@ -289,11 +289,11 @@ pxor (%rax), %mm2 # CHECK-NEXT: 1.00 - movq (%rax), %mm2 # CHECK-NEXT: 3.00 - movq %mm0, %rcx # CHECK-NEXT: 1.00 - movq %mm0, (%rax) -# CHECK-NEXT: 0.50 0.50 packsswb %mm0, %mm2 +# CHECK-NEXT: 1.00 - packsswb %mm0, %mm2 # CHECK-NEXT: 1.00 - packsswb (%rax), %mm2 -# CHECK-NEXT: 0.50 0.50 packssdw %mm0, %mm2 +# CHECK-NEXT: 1.00 - packssdw %mm0, %mm2 # CHECK-NEXT: 1.00 - packssdw (%rax), %mm2 -# CHECK-NEXT: 0.50 0.50 packuswb %mm0, %mm2 +# CHECK-NEXT: 1.00 - packuswb %mm0, %mm2 # CHECK-NEXT: 1.00 - packuswb (%rax), %mm2 # CHECK-NEXT: 0.50 0.50 paddb %mm0, %mm2 # CHECK-NEXT: 1.00 - paddb (%rax), %mm2 @@ -371,11 +371,11 @@ pxor (%rax), %mm2 # CHECK-NEXT: 1.00 - psubusw (%rax), %mm2 # CHECK-NEXT: 0.50 0.50 psubw %mm0, %mm2 # CHECK-NEXT: 1.00 - psubw (%rax), %mm2 -# CHECK-NEXT: 0.50 0.50 punpckhbw %mm0, %mm2 +# CHECK-NEXT: 1.00 - punpckhbw %mm0, %mm2 # CHECK-NEXT: 1.00 - punpckhbw (%rax), %mm2 -# CHECK-NEXT: 0.50 0.50 punpckhdq %mm0, %mm2 +# CHECK-NEXT: 1.00 - punpckhdq %mm0, %mm2 # CHECK-NEXT: 1.00 - punpckhdq (%rax), %mm2 -# CHECK-NEXT: 0.50 0.50 punpckhwd %mm0, %mm2 +# CHECK-NEXT: 1.00 - punpckhwd %mm0, %mm2 # CHECK-NEXT: 1.00 - punpckhwd (%rax), %mm2 # CHECK-NEXT: 1.00 - punpcklbw %mm0, %mm2 # CHECK-NEXT: 1.00 - punpcklbw (%rax), %mm2 diff --git a/llvm/test/tools/llvm-mca/X86/Atom/resources-sse2.s b/llvm/test/tools/llvm-mca/X86/Atom/resources-sse2.s index 4536b7f..2cd1d8d 100644 --- a/llvm/test/tools/llvm-mca/X86/Atom/resources-sse2.s +++ b/llvm/test/tools/llvm-mca/X86/Atom/resources-sse2.s @@ -597,7 +597,7 @@ xorpd (%rax), %xmm2 # CHECK-NEXT: 1 1 1.00 pslld $1, %xmm2 # CHECK-NEXT: 1 2 1.00 pslld %xmm0, %xmm2 # CHECK-NEXT: 1 3 2.00 * pslld (%rax), %xmm2 -# CHECK-NEXT: 1 1 0.50 pslldq $1, %xmm2 +# CHECK-NEXT: 1 1 1.00 pslldq $1, %xmm2 # CHECK-NEXT: 1 1 1.00 psllq $1, %xmm2 # CHECK-NEXT: 1 2 1.00 psllq %xmm0, %xmm2 # CHECK-NEXT: 1 3 2.00 * psllq (%rax), %xmm2 @@ -613,7 +613,7 @@ xorpd (%rax), %xmm2 # CHECK-NEXT: 1 1 1.00 psrld $1, %xmm2 # CHECK-NEXT: 1 2 1.00 psrld %xmm0, %xmm2 # CHECK-NEXT: 1 3 2.00 * psrld (%rax), %xmm2 -# CHECK-NEXT: 1 1 0.50 psrldq $1, %xmm2 +# CHECK-NEXT: 1 1 1.00 psrldq $1, %xmm2 # CHECK-NEXT: 1 1 1.00 psrlq $1, %xmm2 # CHECK-NEXT: 1 2 1.00 psrlq %xmm0, %xmm2 # CHECK-NEXT: 1 3 2.00 * psrlq (%rax), %xmm2 @@ -681,7 +681,7 @@ xorpd (%rax), %xmm2 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] -# CHECK-NEXT: 912.00 774.00 +# CHECK-NEXT: 913.00 773.00 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] Instructions: @@ -875,7 +875,7 @@ xorpd (%rax), %xmm2 # CHECK-NEXT: 1.00 - pslld $1, %xmm2 # CHECK-NEXT: 1.00 1.00 pslld %xmm0, %xmm2 # CHECK-NEXT: 2.00 2.00 pslld (%rax), %xmm2 -# CHECK-NEXT: 0.50 0.50 pslldq $1, %xmm2 +# CHECK-NEXT: 1.00 - pslldq $1, %xmm2 # CHECK-NEXT: 1.00 - psllq $1, %xmm2 # CHECK-NEXT: 1.00 1.00 psllq %xmm0, %xmm2 # CHECK-NEXT: 2.00 2.00 psllq (%rax), %xmm2 @@ -891,7 +891,7 @@ xorpd (%rax), %xmm2 # CHECK-NEXT: 1.00 - psrld $1, %xmm2 # CHECK-NEXT: 1.00 1.00 psrld %xmm0, %xmm2 # CHECK-NEXT: 2.00 2.00 psrld (%rax), %xmm2 -# CHECK-NEXT: 0.50 0.50 psrldq $1, %xmm2 +# CHECK-NEXT: 1.00 - psrldq $1, %xmm2 # CHECK-NEXT: 1.00 - psrlq $1, %xmm2 # CHECK-NEXT: 1.00 1.00 psrlq %xmm0, %xmm2 # CHECK-NEXT: 2.00 2.00 psrlq (%rax), %xmm2 diff --git a/llvm/test/tools/llvm-mca/X86/Atom/resources-ssse3.s b/llvm/test/tools/llvm-mca/X86/Atom/resources-ssse3.s index dc2e41a..b04be1a 100644 --- a/llvm/test/tools/llvm-mca/X86/Atom/resources-ssse3.s +++ b/llvm/test/tools/llvm-mca/X86/Atom/resources-ssse3.s @@ -156,8 +156,8 @@ psignw (%rax), %xmm2 # CHECK-NEXT: 1 5 2.00 * pmulhrsw (%rax), %xmm2 # CHECK-NEXT: 1 1 1.00 pshufb %mm0, %mm2 # CHECK-NEXT: 1 1 1.00 * pshufb (%rax), %mm2 -# CHECK-NEXT: 1 4 3.00 pshufb %xmm0, %xmm2 -# CHECK-NEXT: 1 5 4.00 * pshufb (%rax), %xmm2 +# CHECK-NEXT: 4 4 3.00 pshufb %xmm0, %xmm2 +# CHECK-NEXT: 5 5 4.00 * pshufb (%rax), %xmm2 # CHECK-NEXT: 1 1 0.50 psignb %mm0, %mm2 # CHECK-NEXT: 1 1 1.00 * psignb (%rax), %mm2 # CHECK-NEXT: 1 1 0.50 psignb %xmm0, %xmm2