[X86] Set some more plausible latencies for horizontal add/subs on znver1
authorSimon Pilgrim <llvm-dev@redking.me.uk>
Sun, 8 May 2022 14:48:42 +0000 (15:48 +0100)
committerSimon Pilgrim <llvm-dev@redking.me.uk>
Sun, 8 May 2022 14:48:42 +0000 (15:48 +0100)
These are all microcoded/multi-pipe nightmares on Ryzen, but we shouldn't just be using the WriteMicrocoded class which is for REALLY bad microcoded nightmares - instead use the same approximate latencies as znver2 (Agner and uops.info both suggest similar values) - and make sure we use the FPU defs for both

Fixes #53242

llvm/lib/Target/X86/X86ScheduleZnver1.td
llvm/lib/Target/X86/X86ScheduleZnver2.td
llvm/test/tools/llvm-mca/X86/Znver1/resources-avx1.s
llvm/test/tools/llvm-mca/X86/Znver1/resources-avx2.s
llvm/test/tools/llvm-mca/X86/Znver1/resources-sse3.s
llvm/test/tools/llvm-mca/X86/Znver1/resources-ssse3.s
llvm/test/tools/llvm-mca/X86/Znver2/resources-avx1.s
llvm/test/tools/llvm-mca/X86/Znver2/resources-avx2.s
llvm/test/tools/llvm-mca/X86/Znver2/resources-sse3.s
llvm/test/tools/llvm-mca/X86/Znver2/resources-ssse3.s

index a7e0a33..805ba11 100644 (file)
@@ -1088,12 +1088,11 @@ def : InstRW<[WriteMicrocoded], (instregex "VPGATHER(Q|D)(Q|D)(Y?)rm")>;
 
 // HADD, HSUB PS/PD
 // PHADD|PHSUB (S) W/D.
-def : SchedAlias<WritePHAdd,    ZnWriteMicrocoded>;
-def : SchedAlias<WritePHAddLd,  ZnWriteMicrocoded>;
-def : SchedAlias<WritePHAddX,   ZnWriteMicrocoded>;
-def : SchedAlias<WritePHAddXLd, ZnWriteMicrocoded>;
-def : SchedAlias<WritePHAddY,   ZnWriteMicrocoded>;
-def : SchedAlias<WritePHAddYLd, ZnWriteMicrocoded>;
+defm : ZnWriteResFpuPair<WriteFHAdd, [], 7>;
+defm : ZnWriteResFpuPair<WriteFHAddY, [], 7>;
+defm : ZnWriteResFpuPair<WritePHAdd, [], 3>;
+defm : ZnWriteResFpuPair<WritePHAddX, [], 3>;
+defm : ZnWriteResFpuPair<WritePHAddY, [], 3>;
 
 // PCMPGTQ.
 def ZnWritePCMPGTQr : SchedWriteRes<[ZnFPU03]>;
@@ -1433,12 +1432,6 @@ def : InstRW<[ZnWriteSHA256RNDS2Ld], (instrs SHA256RNDS2rm)>;
 
 //-- Arithmetic instructions --//
 
-// HADD, HSUB PS/PD
-def : SchedAlias<WriteFHAdd,    ZnWriteMicrocoded>;
-def : SchedAlias<WriteFHAddLd,  ZnWriteMicrocoded>;
-def : SchedAlias<WriteFHAddY,   ZnWriteMicrocoded>;
-def : SchedAlias<WriteFHAddYLd, ZnWriteMicrocoded>;
-
 // VDIVPS.
 // TODO - convert to ZnWriteResFpuPair
 // y,y,y.
index 5051d4c..4cf4d0f 100644 (file)
@@ -1104,11 +1104,11 @@ def : InstRW<[WriteMicrocoded], (instregex "VPGATHER(Q|D)(Q|D)(Y?)rm")>;
 
 // HADD, HSUB PS/PD
 // PHADD|PHSUB (S) W/D.
-defm : Zn2WriteResPair<WriteFHAdd, [], 7>;
-defm : Zn2WriteResPair<WriteFHAddY, [], 7>;
-defm : Zn2WriteResPair<WritePHAdd, [], 3>;
-defm : Zn2WriteResPair<WritePHAddX, [], 3>;
-defm : Zn2WriteResPair<WritePHAddY, [], 3>;
+defm : Zn2WriteResFpuPair<WriteFHAdd, [], 7>;
+defm : Zn2WriteResFpuPair<WriteFHAddY, [], 7>;
+defm : Zn2WriteResFpuPair<WritePHAdd, [], 3>;
+defm : Zn2WriteResFpuPair<WritePHAddX, [], 3>;
+defm : Zn2WriteResFpuPair<WritePHAddY, [], 3>;
 
 // PCMPGTQ.
 def Zn2WritePCMPGTQr : SchedWriteRes<[Zn2FPU03]>;
index 7173135..1f10df4 100644 (file)
@@ -1196,22 +1196,22 @@ vzeroupper
 # CHECK-NEXT:  2      8     0.50           *            vextractf128   $1, %ymm0, (%rax)
 # CHECK-NEXT:  2      2     2.00                        vextractps     $1, %xmm0, %ecx
 # CHECK-NEXT:  2      5     2.50           *            vextractps     $1, %xmm0, (%rax)
-# CHECK-NEXT:  1      100   0.25                        vhaddpd        %xmm0, %xmm1, %xmm2
-# CHECK-NEXT:  1      100   0.25    *                   vhaddpd        (%rax), %xmm1, %xmm2
-# CHECK-NEXT:  1      100   0.25                        vhaddpd        %ymm0, %ymm1, %ymm2
-# CHECK-NEXT:  1      100   0.25    *                   vhaddpd        (%rax), %ymm1, %ymm2
-# CHECK-NEXT:  1      100   0.25                        vhaddps        %xmm0, %xmm1, %xmm2
-# CHECK-NEXT:  1      100   0.25    *                   vhaddps        (%rax), %xmm1, %xmm2
-# CHECK-NEXT:  1      100   0.25                        vhaddps        %ymm0, %ymm1, %ymm2
-# CHECK-NEXT:  1      100   0.25    *                   vhaddps        (%rax), %ymm1, %ymm2
-# CHECK-NEXT:  1      100   0.25                        vhsubpd        %xmm0, %xmm1, %xmm2
-# CHECK-NEXT:  1      100   0.25    *                   vhsubpd        (%rax), %xmm1, %xmm2
-# CHECK-NEXT:  1      100   0.25                        vhsubpd        %ymm0, %ymm1, %ymm2
-# CHECK-NEXT:  1      100   0.25    *                   vhsubpd        (%rax), %ymm1, %ymm2
-# CHECK-NEXT:  1      100   0.25                        vhsubps        %xmm0, %xmm1, %xmm2
-# CHECK-NEXT:  1      100   0.25    *                   vhsubps        (%rax), %xmm1, %xmm2
-# CHECK-NEXT:  1      100   0.25                        vhsubps        %ymm0, %ymm1, %ymm2
-# CHECK-NEXT:  1      100   0.25    *                   vhsubps        (%rax), %ymm1, %ymm2
+# CHECK-NEXT:  1      7     0.25                        vhaddpd        %xmm0, %xmm1, %xmm2
+# CHECK-NEXT:  1      14    0.50    *                   vhaddpd        (%rax), %xmm1, %xmm2
+# CHECK-NEXT:  1      7     0.25                        vhaddpd        %ymm0, %ymm1, %ymm2
+# CHECK-NEXT:  1      14    0.50    *                   vhaddpd        (%rax), %ymm1, %ymm2
+# CHECK-NEXT:  1      7     0.25                        vhaddps        %xmm0, %xmm1, %xmm2
+# CHECK-NEXT:  1      14    0.50    *                   vhaddps        (%rax), %xmm1, %xmm2
+# CHECK-NEXT:  1      7     0.25                        vhaddps        %ymm0, %ymm1, %ymm2
+# CHECK-NEXT:  1      14    0.50    *                   vhaddps        (%rax), %ymm1, %ymm2
+# CHECK-NEXT:  1      7     0.25                        vhsubpd        %xmm0, %xmm1, %xmm2
+# CHECK-NEXT:  1      14    0.50    *                   vhsubpd        (%rax), %xmm1, %xmm2
+# CHECK-NEXT:  1      7     0.25                        vhsubpd        %ymm0, %ymm1, %ymm2
+# CHECK-NEXT:  1      14    0.50    *                   vhsubpd        (%rax), %ymm1, %ymm2
+# CHECK-NEXT:  1      7     0.25                        vhsubps        %xmm0, %xmm1, %xmm2
+# CHECK-NEXT:  1      14    0.50    *                   vhsubps        (%rax), %xmm1, %xmm2
+# CHECK-NEXT:  1      7     0.25                        vhsubps        %ymm0, %ymm1, %ymm2
+# CHECK-NEXT:  1      14    0.50    *                   vhsubps        (%rax), %ymm1, %ymm2
 # CHECK-NEXT:  1      2     0.67                        vinsertf128    $1, %xmm0, %ymm1, %ymm2
 # CHECK-NEXT:  2      9     0.67    *                   vinsertf128    $1, (%rax), %ymm1, %ymm2
 # CHECK-NEXT:  1      1     0.50                        vinsertps      $1, %xmm0, %xmm1, %xmm2
@@ -1455,20 +1455,20 @@ vzeroupper
 # CHECK-NEXT:  2      5     3.00           *            vpextrq        $1, %xmm0, (%rax)
 # CHECK-NEXT:  1      2     2.00                        vpextrw        $1, %xmm0, %ecx
 # CHECK-NEXT:  2      5     3.00           *            vpextrw        $1, %xmm0, (%rax)
-# CHECK-NEXT:  1      100   0.25                        vphaddd        %xmm0, %xmm1, %xmm2
-# CHECK-NEXT:  1      100   0.25    *                   vphaddd        (%rax), %xmm1, %xmm2
-# CHECK-NEXT:  1      100   0.25                        vphaddsw       %xmm0, %xmm1, %xmm2
-# CHECK-NEXT:  1      100   0.25    *                   vphaddsw       (%rax), %xmm1, %xmm2
-# CHECK-NEXT:  1      100   0.25                        vphaddw        %xmm0, %xmm1, %xmm2
-# CHECK-NEXT:  1      100   0.25    *                   vphaddw        (%rax), %xmm1, %xmm2
+# CHECK-NEXT:  1      3     0.25                        vphaddd        %xmm0, %xmm1, %xmm2
+# CHECK-NEXT:  1      10    0.50    *                   vphaddd        (%rax), %xmm1, %xmm2
+# CHECK-NEXT:  1      3     0.25                        vphaddsw       %xmm0, %xmm1, %xmm2
+# CHECK-NEXT:  1      10    0.50    *                   vphaddsw       (%rax), %xmm1, %xmm2
+# CHECK-NEXT:  1      3     0.25                        vphaddw        %xmm0, %xmm1, %xmm2
+# CHECK-NEXT:  1      10    0.50    *                   vphaddw        (%rax), %xmm1, %xmm2
 # CHECK-NEXT:  1      4     1.00                        vphminposuw    %xmm0, %xmm2
 # CHECK-NEXT:  1      11    1.00    *                   vphminposuw    (%rax), %xmm2
-# CHECK-NEXT:  1      100   0.25                        vphsubd        %xmm0, %xmm1, %xmm2
-# CHECK-NEXT:  1      100   0.25    *                   vphsubd        (%rax), %xmm1, %xmm2
-# CHECK-NEXT:  1      100   0.25                        vphsubsw       %xmm0, %xmm1, %xmm2
-# CHECK-NEXT:  1      100   0.25    *                   vphsubsw       (%rax), %xmm1, %xmm2
-# CHECK-NEXT:  1      100   0.25                        vphsubw        %xmm0, %xmm1, %xmm2
-# CHECK-NEXT:  1      100   0.25    *                   vphsubw        (%rax), %xmm1, %xmm2
+# CHECK-NEXT:  1      3     0.25                        vphsubd        %xmm0, %xmm1, %xmm2
+# CHECK-NEXT:  1      10    0.50    *                   vphsubd        (%rax), %xmm1, %xmm2
+# CHECK-NEXT:  1      3     0.25                        vphsubsw       %xmm0, %xmm1, %xmm2
+# CHECK-NEXT:  1      10    0.50    *                   vphsubsw       (%rax), %xmm1, %xmm2
+# CHECK-NEXT:  1      3     0.25                        vphsubw        %xmm0, %xmm1, %xmm2
+# CHECK-NEXT:  1      10    0.50    *                   vphsubw        (%rax), %xmm1, %xmm2
 # CHECK-NEXT:  1      1     0.25                        vpinsrb        $1, %eax, %xmm1, %xmm2
 # CHECK-NEXT:  1      8     0.50    *                   vpinsrb        $1, (%rax), %xmm1, %xmm2
 # CHECK-NEXT:  1      1     0.25                        vpinsrd        $1, %eax, %xmm1, %xmm2
@@ -1738,7 +1738,7 @@ vzeroupper
 
 # CHECK:      Resource pressure per iteration:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    [10]   [11]
-# CHECK-NEXT: 168.00 168.00  -      -      -      -      -     178.58 171.08 176.25 505.08  -
+# CHECK-NEXT: 175.00 175.00  -      -      -      -      -     178.58 171.08 176.25 505.08  -
 
 # CHECK:      Resource pressure by instruction:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    [10]   [11]   Instructions:
@@ -1909,21 +1909,21 @@ vzeroupper
 # CHECK-NEXT:  -      -      -      -      -      -      -      -     0.50   2.50    -      -     vextractps   $1, %xmm0, %ecx
 # CHECK-NEXT: 2.50   2.50    -      -      -      -      -      -     0.50   2.50    -      -     vextractps   $1, %xmm0, (%rax)
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -     vhaddpd      %xmm0, %xmm1, %xmm2
-# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -     vhaddpd      (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -      -      -      -      -     vhaddpd      (%rax), %xmm1, %xmm2
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -     vhaddpd      %ymm0, %ymm1, %ymm2
-# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -     vhaddpd      (%rax), %ymm1, %ymm2
+# CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -      -      -      -      -     vhaddpd      (%rax), %ymm1, %ymm2
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -     vhaddps      %xmm0, %xmm1, %xmm2
-# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -     vhaddps      (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -      -      -      -      -     vhaddps      (%rax), %xmm1, %xmm2
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -     vhaddps      %ymm0, %ymm1, %ymm2
-# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -     vhaddps      (%rax), %ymm1, %ymm2
+# CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -      -      -      -      -     vhaddps      (%rax), %ymm1, %ymm2
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -     vhsubpd      %xmm0, %xmm1, %xmm2
-# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -     vhsubpd      (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -      -      -      -      -     vhsubpd      (%rax), %xmm1, %xmm2
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -     vhsubpd      %ymm0, %ymm1, %ymm2
-# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -     vhsubpd      (%rax), %ymm1, %ymm2
+# CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -      -      -      -      -     vhsubpd      (%rax), %ymm1, %ymm2
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -     vhsubps      %xmm0, %xmm1, %xmm2
-# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -     vhsubps      (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -      -      -      -      -     vhsubps      (%rax), %xmm1, %xmm2
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -     vhsubps      %ymm0, %ymm1, %ymm2
-# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -     vhsubps      (%rax), %ymm1, %ymm2
+# CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -      -      -      -      -     vhsubps      (%rax), %ymm1, %ymm2
 # CHECK-NEXT:  -      -      -      -      -      -      -     0.67   0.67    -     0.67    -     vinsertf128  $1, %xmm0, %ymm1, %ymm2
 # CHECK-NEXT: 0.50   0.50    -      -      -      -      -     0.67   0.67    -     0.67    -     vinsertf128  $1, (%rax), %ymm1, %ymm2
 # CHECK-NEXT:  -      -      -      -      -      -      -      -     0.50   0.50    -      -     vinsertps    $1, %xmm0, %xmm1, %xmm2
@@ -2168,19 +2168,19 @@ vzeroupper
 # CHECK-NEXT:  -      -      -      -      -      -      -      -     0.50   2.50    -      -     vpextrw      $1, %xmm0, %ecx
 # CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -     1.00   4.00    -      -     vpextrw      $1, %xmm0, (%rax)
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -     vphaddd      %xmm0, %xmm1, %xmm2
-# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -     vphaddd      (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -      -      -      -      -     vphaddd      (%rax), %xmm1, %xmm2
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -     vphaddsw     %xmm0, %xmm1, %xmm2
-# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -     vphaddsw     (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -      -      -      -      -     vphaddsw     (%rax), %xmm1, %xmm2
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -     vphaddw      %xmm0, %xmm1, %xmm2
-# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -     vphaddw      (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -      -      -      -      -     vphaddw      (%rax), %xmm1, %xmm2
 # CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -      -      -     vphminposuw  %xmm0, %xmm2
 # CHECK-NEXT: 0.50   0.50    -      -      -      -      -     1.00    -      -      -      -     vphminposuw  (%rax), %xmm2
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -     vphsubd      %xmm0, %xmm1, %xmm2
-# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -     vphsubd      (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -      -      -      -      -     vphsubd      (%rax), %xmm1, %xmm2
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -     vphsubsw     %xmm0, %xmm1, %xmm2
-# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -     vphsubsw     (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -      -      -      -      -     vphsubsw     (%rax), %xmm1, %xmm2
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -     vphsubw      %xmm0, %xmm1, %xmm2
-# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -     vphsubw      (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -      -      -      -      -     vphsubw      (%rax), %xmm1, %xmm2
 # CHECK-NEXT:  -      -      -      -      -      -      -     0.25   0.25   0.25   0.25    -     vpinsrb      $1, %eax, %xmm1, %xmm2
 # CHECK-NEXT: 0.50   0.50    -      -      -      -      -     0.25   0.25   0.25   0.25    -     vpinsrb      $1, (%rax), %xmm1, %xmm2
 # CHECK-NEXT:  -      -      -      -      -      -      -     0.25   0.25   0.25   0.25    -     vpinsrd      $1, %eax, %xmm1, %xmm2
index 3d7f92f..6bd2baa 100644 (file)
@@ -576,18 +576,18 @@ vpxor           (%rax), %ymm1, %ymm2
 # CHECK-NEXT:  1      100   0.25    *                   vpgatherqd     %xmm0, (%rax,%ymm1,2), %xmm2
 # CHECK-NEXT:  1      100   0.25    *                   vpgatherqq     %xmm0, (%rax,%xmm1,2), %xmm2
 # CHECK-NEXT:  1      100   0.25    *                   vpgatherqq     %ymm0, (%rax,%ymm1,2), %ymm2
-# CHECK-NEXT:  1      100   0.25                        vphaddd        %ymm0, %ymm1, %ymm2
-# CHECK-NEXT:  1      100   0.25    *                   vphaddd        (%rax), %ymm1, %ymm2
-# CHECK-NEXT:  1      100   0.25                        vphaddsw       %ymm0, %ymm1, %ymm2
-# CHECK-NEXT:  1      100   0.25    *                   vphaddsw       (%rax), %ymm1, %ymm2
-# CHECK-NEXT:  1      100   0.25                        vphaddw        %ymm0, %ymm1, %ymm2
-# CHECK-NEXT:  1      100   0.25    *                   vphaddw        (%rax), %ymm1, %ymm2
-# CHECK-NEXT:  1      100   0.25                        vphsubd        %ymm0, %ymm1, %ymm2
-# CHECK-NEXT:  1      100   0.25    *                   vphsubd        (%rax), %ymm1, %ymm2
-# CHECK-NEXT:  1      100   0.25                        vphsubsw       %ymm0, %ymm1, %ymm2
-# CHECK-NEXT:  1      100   0.25    *                   vphsubsw       (%rax), %ymm1, %ymm2
-# CHECK-NEXT:  1      100   0.25                        vphsubw        %ymm0, %ymm1, %ymm2
-# CHECK-NEXT:  1      100   0.25    *                   vphsubw        (%rax), %ymm1, %ymm2
+# CHECK-NEXT:  1      3     0.25                        vphaddd        %ymm0, %ymm1, %ymm2
+# CHECK-NEXT:  1      10    0.50    *                   vphaddd        (%rax), %ymm1, %ymm2
+# CHECK-NEXT:  1      3     0.25                        vphaddsw       %ymm0, %ymm1, %ymm2
+# CHECK-NEXT:  1      10    0.50    *                   vphaddsw       (%rax), %ymm1, %ymm2
+# CHECK-NEXT:  1      3     0.25                        vphaddw        %ymm0, %ymm1, %ymm2
+# CHECK-NEXT:  1      10    0.50    *                   vphaddw        (%rax), %ymm1, %ymm2
+# CHECK-NEXT:  1      3     0.25                        vphsubd        %ymm0, %ymm1, %ymm2
+# CHECK-NEXT:  1      10    0.50    *                   vphsubd        (%rax), %ymm1, %ymm2
+# CHECK-NEXT:  1      3     0.25                        vphsubsw       %ymm0, %ymm1, %ymm2
+# CHECK-NEXT:  1      10    0.50    *                   vphsubsw       (%rax), %ymm1, %ymm2
+# CHECK-NEXT:  1      3     0.25                        vphsubw        %ymm0, %ymm1, %ymm2
+# CHECK-NEXT:  1      10    0.50    *                   vphsubw        (%rax), %ymm1, %ymm2
 # CHECK-NEXT:  1      4     1.00                        vpmaddubsw     %ymm0, %ymm1, %ymm2
 # CHECK-NEXT:  1      11    1.00    *                   vpmaddubsw     (%rax), %ymm1, %ymm2
 # CHECK-NEXT:  1      4     1.00                        vpmaddwd       %ymm0, %ymm1, %ymm2
@@ -778,7 +778,7 @@ vpxor           (%rax), %ymm1, %ymm2
 
 # CHECK:      Resource pressure per iteration:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    [10]   [11]
-# CHECK-NEXT: 64.00  64.00   -      -      -      -      -     73.17  75.67  85.00  43.17   -
+# CHECK-NEXT: 67.00  67.00   -      -      -      -      -     73.17  75.67  85.00  43.17   -
 
 # CHECK:      Resource pressure by instruction:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    [10]   [11]   Instructions:
@@ -899,17 +899,17 @@ vpxor           (%rax), %ymm1, %ymm2
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -     vpgatherqq   %xmm0, (%rax,%xmm1,2), %xmm2
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -     vpgatherqq   %ymm0, (%rax,%ymm1,2), %ymm2
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -     vphaddd      %ymm0, %ymm1, %ymm2
-# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -     vphaddd      (%rax), %ymm1, %ymm2
+# CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -      -      -      -      -     vphaddd      (%rax), %ymm1, %ymm2
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -     vphaddsw     %ymm0, %ymm1, %ymm2
-# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -     vphaddsw     (%rax), %ymm1, %ymm2
+# CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -      -      -      -      -     vphaddsw     (%rax), %ymm1, %ymm2
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -     vphaddw      %ymm0, %ymm1, %ymm2
-# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -     vphaddw      (%rax), %ymm1, %ymm2
+# CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -      -      -      -      -     vphaddw      (%rax), %ymm1, %ymm2
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -     vphsubd      %ymm0, %ymm1, %ymm2
-# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -     vphsubd      (%rax), %ymm1, %ymm2
+# CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -      -      -      -      -     vphsubd      (%rax), %ymm1, %ymm2
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -     vphsubsw     %ymm0, %ymm1, %ymm2
-# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -     vphsubsw     (%rax), %ymm1, %ymm2
+# CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -      -      -      -      -     vphsubsw     (%rax), %ymm1, %ymm2
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -     vphsubw      %ymm0, %ymm1, %ymm2
-# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -     vphsubw      (%rax), %ymm1, %ymm2
+# CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -      -      -      -      -     vphsubw      (%rax), %ymm1, %ymm2
 # CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -      -      -     vpmaddubsw   %ymm0, %ymm1, %ymm2
 # CHECK-NEXT: 0.50   0.50    -      -      -      -      -     1.00    -      -      -      -     vpmaddubsw   (%rax), %ymm1, %ymm2
 # CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -      -      -     vpmaddwd     %ymm0, %ymm1, %ymm2
index 8de91b6..8e05fcf 100644 (file)
@@ -47,14 +47,14 @@ mwait
 # CHECK-NEXT:  1      10    1.00    *                   addsubpd       (%rax), %xmm2
 # CHECK-NEXT:  1      3     1.00                        addsubps       %xmm0, %xmm2
 # CHECK-NEXT:  1      10    1.00    *                   addsubps       (%rax), %xmm2
-# CHECK-NEXT:  1      100   0.25                        haddpd %xmm0, %xmm2
-# CHECK-NEXT:  1      100   0.25    *                   haddpd (%rax), %xmm2
-# CHECK-NEXT:  1      100   0.25                        haddps %xmm0, %xmm2
-# CHECK-NEXT:  1      100   0.25    *                   haddps (%rax), %xmm2
-# CHECK-NEXT:  1      100   0.25                        hsubpd %xmm0, %xmm2
-# CHECK-NEXT:  1      100   0.25    *                   hsubpd (%rax), %xmm2
-# CHECK-NEXT:  1      100   0.25                        hsubps %xmm0, %xmm2
-# CHECK-NEXT:  1      100   0.25    *                   hsubps (%rax), %xmm2
+# CHECK-NEXT:  1      7     0.25                        haddpd %xmm0, %xmm2
+# CHECK-NEXT:  1      14    0.50    *                   haddpd (%rax), %xmm2
+# CHECK-NEXT:  1      7     0.25                        haddps %xmm0, %xmm2
+# CHECK-NEXT:  1      14    0.50    *                   haddps (%rax), %xmm2
+# CHECK-NEXT:  1      7     0.25                        hsubpd %xmm0, %xmm2
+# CHECK-NEXT:  1      14    0.50    *                   hsubpd (%rax), %xmm2
+# CHECK-NEXT:  1      7     0.25                        hsubps %xmm0, %xmm2
+# CHECK-NEXT:  1      14    0.50    *                   hsubps (%rax), %xmm2
 # CHECK-NEXT:  1      8     0.50    *                   lddqu  (%rax), %xmm2
 # CHECK-NEXT:  1      100   0.25                  U     monitor
 # CHECK-NEXT:  1      1     0.50                        movddup        %xmm0, %xmm2
@@ -81,7 +81,7 @@ mwait
 
 # CHECK:      Resource pressure per iteration:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    [10]   [11]
-# CHECK-NEXT: 2.50   2.50    -      -      -      -      -     4.00   2.00   2.00    -      -
+# CHECK-NEXT: 4.50   4.50    -      -      -      -      -     4.00   2.00   2.00    -      -
 
 # CHECK:      Resource pressure by instruction:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    [10]   [11]   Instructions:
@@ -90,13 +90,13 @@ mwait
 # CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -      -      -     addsubps     %xmm0, %xmm2
 # CHECK-NEXT: 0.50   0.50    -      -      -      -      -     1.00    -      -      -      -     addsubps     (%rax), %xmm2
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -     haddpd       %xmm0, %xmm2
-# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -     haddpd       (%rax), %xmm2
+# CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -      -      -      -      -     haddpd       (%rax), %xmm2
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -     haddps       %xmm0, %xmm2
-# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -     haddps       (%rax), %xmm2
+# CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -      -      -      -      -     haddps       (%rax), %xmm2
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -     hsubpd       %xmm0, %xmm2
-# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -     hsubpd       (%rax), %xmm2
+# CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -      -      -      -      -     hsubpd       (%rax), %xmm2
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -     hsubps       %xmm0, %xmm2
-# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -     hsubps       (%rax), %xmm2
+# CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -      -      -      -      -     hsubps       (%rax), %xmm2
 # CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -      -      -      -      -     lddqu        (%rax), %xmm2
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -     monitor
 # CHECK-NEXT:  -      -      -      -      -      -      -      -     0.50   0.50    -      -     movddup      %xmm0, %xmm2
index 8febb07..2c76369 100644 (file)
@@ -122,30 +122,30 @@ psignw      (%rax), %xmm2
 # CHECK-NEXT:  1      8     0.50    *                   palignr        $1, (%rax), %mm2
 # CHECK-NEXT:  1      1     0.25                        palignr        $1, %xmm0, %xmm2
 # CHECK-NEXT:  1      8     0.50    *                   palignr        $1, (%rax), %xmm2
-# CHECK-NEXT:  1      100   0.25                        phaddd %mm0, %mm2
-# CHECK-NEXT:  1      100   0.25    *                   phaddd (%rax), %mm2
-# CHECK-NEXT:  1      100   0.25                        phaddd %xmm0, %xmm2
-# CHECK-NEXT:  1      100   0.25    *                   phaddd (%rax), %xmm2
-# CHECK-NEXT:  1      100   0.25                        phaddsw        %mm0, %mm2
-# CHECK-NEXT:  1      100   0.25    *                   phaddsw        (%rax), %mm2
-# CHECK-NEXT:  1      100   0.25                        phaddsw        %xmm0, %xmm2
-# CHECK-NEXT:  1      100   0.25    *                   phaddsw        (%rax), %xmm2
-# CHECK-NEXT:  1      100   0.25                        phaddw %mm0, %mm2
-# CHECK-NEXT:  1      100   0.25    *                   phaddw (%rax), %mm2
-# CHECK-NEXT:  1      100   0.25                        phaddw %xmm0, %xmm2
-# CHECK-NEXT:  1      100   0.25    *                   phaddw (%rax), %xmm2
-# CHECK-NEXT:  1      100   0.25                        phsubd %mm0, %mm2
-# CHECK-NEXT:  1      100   0.25    *                   phsubd (%rax), %mm2
-# CHECK-NEXT:  1      100   0.25                        phsubd %xmm0, %xmm2
-# CHECK-NEXT:  1      100   0.25    *                   phsubd (%rax), %xmm2
-# CHECK-NEXT:  1      100   0.25                        phsubsw        %mm0, %mm2
-# CHECK-NEXT:  1      100   0.25    *                   phsubsw        (%rax), %mm2
-# CHECK-NEXT:  1      100   0.25                        phsubsw        %xmm0, %xmm2
-# CHECK-NEXT:  1      100   0.25    *                   phsubsw        (%rax), %xmm2
-# CHECK-NEXT:  1      100   0.25                        phsubw %mm0, %mm2
-# CHECK-NEXT:  1      100   0.25    *                   phsubw (%rax), %mm2
-# CHECK-NEXT:  1      100   0.25                        phsubw %xmm0, %xmm2
-# CHECK-NEXT:  1      100   0.25    *                   phsubw (%rax), %xmm2
+# CHECK-NEXT:  1      3     0.25                        phaddd %mm0, %mm2
+# CHECK-NEXT:  1      10    0.50    *                   phaddd (%rax), %mm2
+# CHECK-NEXT:  1      3     0.25                        phaddd %xmm0, %xmm2
+# CHECK-NEXT:  1      10    0.50    *                   phaddd (%rax), %xmm2
+# CHECK-NEXT:  1      3     0.25                        phaddsw        %mm0, %mm2
+# CHECK-NEXT:  1      10    0.50    *                   phaddsw        (%rax), %mm2
+# CHECK-NEXT:  1      3     0.25                        phaddsw        %xmm0, %xmm2
+# CHECK-NEXT:  1      10    0.50    *                   phaddsw        (%rax), %xmm2
+# CHECK-NEXT:  1      3     0.25                        phaddw %mm0, %mm2
+# CHECK-NEXT:  1      10    0.50    *                   phaddw (%rax), %mm2
+# CHECK-NEXT:  1      3     0.25                        phaddw %xmm0, %xmm2
+# CHECK-NEXT:  1      10    0.50    *                   phaddw (%rax), %xmm2
+# CHECK-NEXT:  1      3     0.25                        phsubd %mm0, %mm2
+# CHECK-NEXT:  1      10    0.50    *                   phsubd (%rax), %mm2
+# CHECK-NEXT:  1      3     0.25                        phsubd %xmm0, %xmm2
+# CHECK-NEXT:  1      10    0.50    *                   phsubd (%rax), %xmm2
+# CHECK-NEXT:  1      3     0.25                        phsubsw        %mm0, %mm2
+# CHECK-NEXT:  1      10    0.50    *                   phsubsw        (%rax), %mm2
+# CHECK-NEXT:  1      3     0.25                        phsubsw        %xmm0, %xmm2
+# CHECK-NEXT:  1      10    0.50    *                   phsubsw        (%rax), %xmm2
+# CHECK-NEXT:  1      3     0.25                        phsubw %mm0, %mm2
+# CHECK-NEXT:  1      10    0.50    *                   phsubw (%rax), %mm2
+# CHECK-NEXT:  1      3     0.25                        phsubw %xmm0, %xmm2
+# CHECK-NEXT:  1      10    0.50    *                   phsubw (%rax), %xmm2
 # CHECK-NEXT:  1      4     1.00                        pmaddubsw      %mm0, %mm2
 # CHECK-NEXT:  1      11    1.00    *                   pmaddubsw      (%rax), %mm2
 # CHECK-NEXT:  1      4     1.00                        pmaddubsw      %xmm0, %xmm2
@@ -187,7 +187,7 @@ psignw      (%rax), %xmm2
 
 # CHECK:      Resource pressure per iteration:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    [10]   [11]
-# CHECK-NEXT: 10.00  10.00   -      -      -      -      -     16.00  8.00   8.00   8.00    -
+# CHECK-NEXT: 16.00  16.00   -      -      -      -      -     16.00  8.00   8.00   8.00    -
 
 # CHECK:      Resource pressure by instruction:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    [10]   [11]   Instructions:
@@ -208,29 +208,29 @@ psignw      (%rax), %xmm2
 # CHECK-NEXT:  -      -      -      -      -      -      -     0.25   0.25   0.25   0.25    -     palignr      $1, %xmm0, %xmm2
 # CHECK-NEXT: 0.50   0.50    -      -      -      -      -     0.25   0.25   0.25   0.25    -     palignr      $1, (%rax), %xmm2
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -     phaddd       %mm0, %mm2
-# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -     phaddd       (%rax), %mm2
+# CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -      -      -      -      -     phaddd       (%rax), %mm2
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -     phaddd       %xmm0, %xmm2
-# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -     phaddd       (%rax), %xmm2
+# CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -      -      -      -      -     phaddd       (%rax), %xmm2
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -     phaddsw      %mm0, %mm2
-# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -     phaddsw      (%rax), %mm2
+# CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -      -      -      -      -     phaddsw      (%rax), %mm2
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -     phaddsw      %xmm0, %xmm2
-# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -     phaddsw      (%rax), %xmm2
+# CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -      -      -      -      -     phaddsw      (%rax), %xmm2
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -     phaddw       %mm0, %mm2
-# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -     phaddw       (%rax), %mm2
+# CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -      -      -      -      -     phaddw       (%rax), %mm2
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -     phaddw       %xmm0, %xmm2
-# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -     phaddw       (%rax), %xmm2
+# CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -      -      -      -      -     phaddw       (%rax), %xmm2
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -     phsubd       %mm0, %mm2
-# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -     phsubd       (%rax), %mm2
+# CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -      -      -      -      -     phsubd       (%rax), %mm2
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -     phsubd       %xmm0, %xmm2
-# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -     phsubd       (%rax), %xmm2
+# CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -      -      -      -      -     phsubd       (%rax), %xmm2
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -     phsubsw      %mm0, %mm2
-# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -     phsubsw      (%rax), %mm2
+# CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -      -      -      -      -     phsubsw      (%rax), %mm2
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -     phsubsw      %xmm0, %xmm2
-# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -     phsubsw      (%rax), %xmm2
+# CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -      -      -      -      -     phsubsw      (%rax), %xmm2
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -     phsubw       %mm0, %mm2
-# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -     phsubw       (%rax), %mm2
+# CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -      -      -      -      -     phsubw       (%rax), %mm2
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -     phsubw       %xmm0, %xmm2
-# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -     phsubw       (%rax), %xmm2
+# CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -      -      -      -      -     phsubw       (%rax), %xmm2
 # CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -      -      -     pmaddubsw    %mm0, %mm2
 # CHECK-NEXT: 0.50   0.50    -      -      -      -      -     1.00    -      -      -      -     pmaddubsw    (%rax), %mm2
 # CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -      -      -     pmaddubsw    %xmm0, %xmm2
index 14cd7ed..16003db 100644 (file)
@@ -1197,21 +1197,21 @@ vzeroupper
 # CHECK-NEXT:  1      2     2.00                        vextractps     $1, %xmm0, %ecx
 # CHECK-NEXT:  2      5     2.00           *            vextractps     $1, %xmm0, (%rax)
 # CHECK-NEXT:  1      7     0.25                        vhaddpd        %xmm0, %xmm1, %xmm2
-# CHECK-NEXT:  2      11    0.33    *                   vhaddpd        (%rax), %xmm1, %xmm2
+# CHECK-NEXT:  1      14    0.33    *                   vhaddpd        (%rax), %xmm1, %xmm2
 # CHECK-NEXT:  1      7     0.25                        vhaddpd        %ymm0, %ymm1, %ymm2
-# CHECK-NEXT:  2      11    0.33    *                   vhaddpd        (%rax), %ymm1, %ymm2
+# CHECK-NEXT:  1      14    0.33    *                   vhaddpd        (%rax), %ymm1, %ymm2
 # CHECK-NEXT:  1      7     0.25                        vhaddps        %xmm0, %xmm1, %xmm2
-# CHECK-NEXT:  2      11    0.33    *                   vhaddps        (%rax), %xmm1, %xmm2
+# CHECK-NEXT:  1      14    0.33    *                   vhaddps        (%rax), %xmm1, %xmm2
 # CHECK-NEXT:  1      7     0.25                        vhaddps        %ymm0, %ymm1, %ymm2
-# CHECK-NEXT:  2      11    0.33    *                   vhaddps        (%rax), %ymm1, %ymm2
+# CHECK-NEXT:  1      14    0.33    *                   vhaddps        (%rax), %ymm1, %ymm2
 # CHECK-NEXT:  1      7     0.25                        vhsubpd        %xmm0, %xmm1, %xmm2
-# CHECK-NEXT:  2      11    0.33    *                   vhsubpd        (%rax), %xmm1, %xmm2
+# CHECK-NEXT:  1      14    0.33    *                   vhsubpd        (%rax), %xmm1, %xmm2
 # CHECK-NEXT:  1      7     0.25                        vhsubpd        %ymm0, %ymm1, %ymm2
-# CHECK-NEXT:  2      11    0.33    *                   vhsubpd        (%rax), %ymm1, %ymm2
+# CHECK-NEXT:  1      14    0.33    *                   vhsubpd        (%rax), %ymm1, %ymm2
 # CHECK-NEXT:  1      7     0.25                        vhsubps        %xmm0, %xmm1, %xmm2
-# CHECK-NEXT:  2      11    0.33    *                   vhsubps        (%rax), %xmm1, %xmm2
+# CHECK-NEXT:  1      14    0.33    *                   vhsubps        (%rax), %xmm1, %xmm2
 # CHECK-NEXT:  1      7     0.25                        vhsubps        %ymm0, %ymm1, %ymm2
-# CHECK-NEXT:  2      11    0.33    *                   vhsubps        (%rax), %ymm1, %ymm2
+# CHECK-NEXT:  1      14    0.33    *                   vhsubps        (%rax), %ymm1, %ymm2
 # CHECK-NEXT:  1      2     0.33                        vinsertf128    $1, %xmm0, %ymm1, %ymm2
 # CHECK-NEXT:  2      9     0.33    *                   vinsertf128    $1, (%rax), %ymm1, %ymm2
 # CHECK-NEXT:  1      1     0.50                        vinsertps      $1, %xmm0, %xmm1, %xmm2
@@ -1456,19 +1456,19 @@ vzeroupper
 # CHECK-NEXT:  1      2     2.00                        vpextrw        $1, %xmm0, %ecx
 # CHECK-NEXT:  2      5     3.00           *            vpextrw        $1, %xmm0, (%rax)
 # CHECK-NEXT:  1      3     0.25                        vphaddd        %xmm0, %xmm1, %xmm2
-# CHECK-NEXT:  2      7     0.33    *                   vphaddd        (%rax), %xmm1, %xmm2
+# CHECK-NEXT:  1      10    0.33    *                   vphaddd        (%rax), %xmm1, %xmm2
 # CHECK-NEXT:  1      3     0.25                        vphaddsw       %xmm0, %xmm1, %xmm2
-# CHECK-NEXT:  2      7     0.33    *                   vphaddsw       (%rax), %xmm1, %xmm2
+# CHECK-NEXT:  1      10    0.33    *                   vphaddsw       (%rax), %xmm1, %xmm2
 # CHECK-NEXT:  1      3     0.25                        vphaddw        %xmm0, %xmm1, %xmm2
-# CHECK-NEXT:  2      7     0.33    *                   vphaddw        (%rax), %xmm1, %xmm2
+# CHECK-NEXT:  1      10    0.33    *                   vphaddw        (%rax), %xmm1, %xmm2
 # CHECK-NEXT:  1      4     1.00                        vphminposuw    %xmm0, %xmm2
 # CHECK-NEXT:  1      11    1.00    *                   vphminposuw    (%rax), %xmm2
 # CHECK-NEXT:  1      3     0.25                        vphsubd        %xmm0, %xmm1, %xmm2
-# CHECK-NEXT:  2      7     0.33    *                   vphsubd        (%rax), %xmm1, %xmm2
+# CHECK-NEXT:  1      10    0.33    *                   vphsubd        (%rax), %xmm1, %xmm2
 # CHECK-NEXT:  1      3     0.25                        vphsubsw       %xmm0, %xmm1, %xmm2
-# CHECK-NEXT:  2      7     0.33    *                   vphsubsw       (%rax), %xmm1, %xmm2
+# CHECK-NEXT:  1      10    0.33    *                   vphsubsw       (%rax), %xmm1, %xmm2
 # CHECK-NEXT:  1      3     0.25                        vphsubw        %xmm0, %xmm1, %xmm2
-# CHECK-NEXT:  2      7     0.33    *                   vphsubw        (%rax), %xmm1, %xmm2
+# CHECK-NEXT:  1      10    0.33    *                   vphsubw        (%rax), %xmm1, %xmm2
 # CHECK-NEXT:  1      1     0.25                        vpinsrb        $1, %eax, %xmm1, %xmm2
 # CHECK-NEXT:  1      8     0.33    *                   vpinsrb        $1, (%rax), %xmm1, %xmm2
 # CHECK-NEXT:  1      1     0.25                        vpinsrd        $1, %eax, %xmm1, %xmm2
index f958c1f..01831f5 100644 (file)
@@ -577,17 +577,17 @@ vpxor           (%rax), %ymm1, %ymm2
 # CHECK-NEXT:  1      100   0.25    *                   vpgatherqq     %xmm0, (%rax,%xmm1,2), %xmm2
 # CHECK-NEXT:  1      100   0.25    *                   vpgatherqq     %ymm0, (%rax,%ymm1,2), %ymm2
 # CHECK-NEXT:  1      3     0.25                        vphaddd        %ymm0, %ymm1, %ymm2
-# CHECK-NEXT:  2      7     0.33    *                   vphaddd        (%rax), %ymm1, %ymm2
+# CHECK-NEXT:  1      10    0.33    *                   vphaddd        (%rax), %ymm1, %ymm2
 # CHECK-NEXT:  1      3     0.25                        vphaddsw       %ymm0, %ymm1, %ymm2
-# CHECK-NEXT:  2      7     0.33    *                   vphaddsw       (%rax), %ymm1, %ymm2
+# CHECK-NEXT:  1      10    0.33    *                   vphaddsw       (%rax), %ymm1, %ymm2
 # CHECK-NEXT:  1      3     0.25                        vphaddw        %ymm0, %ymm1, %ymm2
-# CHECK-NEXT:  2      7     0.33    *                   vphaddw        (%rax), %ymm1, %ymm2
+# CHECK-NEXT:  1      10    0.33    *                   vphaddw        (%rax), %ymm1, %ymm2
 # CHECK-NEXT:  1      3     0.25                        vphsubd        %ymm0, %ymm1, %ymm2
-# CHECK-NEXT:  2      7     0.33    *                   vphsubd        (%rax), %ymm1, %ymm2
+# CHECK-NEXT:  1      10    0.33    *                   vphsubd        (%rax), %ymm1, %ymm2
 # CHECK-NEXT:  1      3     0.25                        vphsubsw       %ymm0, %ymm1, %ymm2
-# CHECK-NEXT:  2      7     0.33    *                   vphsubsw       (%rax), %ymm1, %ymm2
+# CHECK-NEXT:  1      10    0.33    *                   vphsubsw       (%rax), %ymm1, %ymm2
 # CHECK-NEXT:  1      3     0.25                        vphsubw        %ymm0, %ymm1, %ymm2
-# CHECK-NEXT:  2      7     0.33    *                   vphsubw        (%rax), %ymm1, %ymm2
+# CHECK-NEXT:  1      10    0.33    *                   vphsubw        (%rax), %ymm1, %ymm2
 # CHECK-NEXT:  1      4     1.00                        vpmaddubsw     %ymm0, %ymm1, %ymm2
 # CHECK-NEXT:  1      11    1.00    *                   vpmaddubsw     (%rax), %ymm1, %ymm2
 # CHECK-NEXT:  1      4     1.00                        vpmaddwd       %ymm0, %ymm1, %ymm2
index 5419f06..4c250d4 100644 (file)
@@ -48,13 +48,13 @@ mwait
 # CHECK-NEXT:  1      3     1.00                        addsubps       %xmm0, %xmm2
 # CHECK-NEXT:  1      10    1.00    *                   addsubps       (%rax), %xmm2
 # CHECK-NEXT:  1      7     0.25                        haddpd %xmm0, %xmm2
-# CHECK-NEXT:  2      11    0.33    *                   haddpd (%rax), %xmm2
+# CHECK-NEXT:  1      14    0.33    *                   haddpd (%rax), %xmm2
 # CHECK-NEXT:  1      7     0.25                        haddps %xmm0, %xmm2
-# CHECK-NEXT:  2      11    0.33    *                   haddps (%rax), %xmm2
+# CHECK-NEXT:  1      14    0.33    *                   haddps (%rax), %xmm2
 # CHECK-NEXT:  1      7     0.25                        hsubpd %xmm0, %xmm2
-# CHECK-NEXT:  2      11    0.33    *                   hsubpd (%rax), %xmm2
+# CHECK-NEXT:  1      14    0.33    *                   hsubpd (%rax), %xmm2
 # CHECK-NEXT:  1      7     0.25                        hsubps %xmm0, %xmm2
-# CHECK-NEXT:  2      11    0.33    *                   hsubps (%rax), %xmm2
+# CHECK-NEXT:  1      14    0.33    *                   hsubps (%rax), %xmm2
 # CHECK-NEXT:  1      8     0.33    *                   lddqu  (%rax), %xmm2
 # CHECK-NEXT:  1      100   0.25                  U     monitor
 # CHECK-NEXT:  1      1     0.50                        movddup        %xmm0, %xmm2
index cdcc493..9af48dc 100644 (file)
@@ -123,29 +123,29 @@ psignw      (%rax), %xmm2
 # CHECK-NEXT:  1      1     0.25                        palignr        $1, %xmm0, %xmm2
 # CHECK-NEXT:  1      8     0.33    *                   palignr        $1, (%rax), %xmm2
 # CHECK-NEXT:  1      3     0.25                        phaddd %mm0, %mm2
-# CHECK-NEXT:  2      7     0.33    *                   phaddd (%rax), %mm2
+# CHECK-NEXT:  1      10    0.33    *                   phaddd (%rax), %mm2
 # CHECK-NEXT:  1      3     0.25                        phaddd %xmm0, %xmm2
-# CHECK-NEXT:  2      7     0.33    *                   phaddd (%rax), %xmm2
+# CHECK-NEXT:  1      10    0.33    *                   phaddd (%rax), %xmm2
 # CHECK-NEXT:  1      3     0.25                        phaddsw        %mm0, %mm2
-# CHECK-NEXT:  2      7     0.33    *                   phaddsw        (%rax), %mm2
+# CHECK-NEXT:  1      10    0.33    *                   phaddsw        (%rax), %mm2
 # CHECK-NEXT:  1      3     0.25                        phaddsw        %xmm0, %xmm2
-# CHECK-NEXT:  2      7     0.33    *                   phaddsw        (%rax), %xmm2
+# CHECK-NEXT:  1      10    0.33    *                   phaddsw        (%rax), %xmm2
 # CHECK-NEXT:  1      3     0.25                        phaddw %mm0, %mm2
-# CHECK-NEXT:  2      7     0.33    *                   phaddw (%rax), %mm2
+# CHECK-NEXT:  1      10    0.33    *                   phaddw (%rax), %mm2
 # CHECK-NEXT:  1      3     0.25                        phaddw %xmm0, %xmm2
-# CHECK-NEXT:  2      7     0.33    *                   phaddw (%rax), %xmm2
+# CHECK-NEXT:  1      10    0.33    *                   phaddw (%rax), %xmm2
 # CHECK-NEXT:  1      3     0.25                        phsubd %mm0, %mm2
-# CHECK-NEXT:  2      7     0.33    *                   phsubd (%rax), %mm2
+# CHECK-NEXT:  1      10    0.33    *                   phsubd (%rax), %mm2
 # CHECK-NEXT:  1      3     0.25                        phsubd %xmm0, %xmm2
-# CHECK-NEXT:  2      7     0.33    *                   phsubd (%rax), %xmm2
+# CHECK-NEXT:  1      10    0.33    *                   phsubd (%rax), %xmm2
 # CHECK-NEXT:  1      3     0.25                        phsubsw        %mm0, %mm2
-# CHECK-NEXT:  2      7     0.33    *                   phsubsw        (%rax), %mm2
+# CHECK-NEXT:  1      10    0.33    *                   phsubsw        (%rax), %mm2
 # CHECK-NEXT:  1      3     0.25                        phsubsw        %xmm0, %xmm2
-# CHECK-NEXT:  2      7     0.33    *                   phsubsw        (%rax), %xmm2
+# CHECK-NEXT:  1      10    0.33    *                   phsubsw        (%rax), %xmm2
 # CHECK-NEXT:  1      3     0.25                        phsubw %mm0, %mm2
-# CHECK-NEXT:  2      7     0.33    *                   phsubw (%rax), %mm2
+# CHECK-NEXT:  1      10    0.33    *                   phsubw (%rax), %mm2
 # CHECK-NEXT:  1      3     0.25                        phsubw %xmm0, %xmm2
-# CHECK-NEXT:  2      7     0.33    *                   phsubw (%rax), %xmm2
+# CHECK-NEXT:  1      10    0.33    *                   phsubw (%rax), %xmm2
 # CHECK-NEXT:  1      4     1.00                        pmaddubsw      %mm0, %mm2
 # CHECK-NEXT:  1      11    1.00    *                   pmaddubsw      (%rax), %mm2
 # CHECK-NEXT:  1      4     1.00                        pmaddubsw      %xmm0, %xmm2