[X86] Ensure 256-bit inlane shuffles are set to 2 uops + half rate
authorSimon Pilgrim <llvm-dev@redking.me.uk>
Sat, 29 Oct 2022 10:52:12 +0000 (11:52 +0100)
committerSimon Pilgrim <llvm-dev@redking.me.uk>
Sat, 29 Oct 2022 11:03:43 +0000 (12:03 +0100)
znver1 double pumps regular 256-bit shuffles (crosslane shuffles are messier....)

Fixes yet another mismatch between the numbers coming out of the script from D103695 and the znver1 scheduler model

Confirmed with the AMD SoG, Agner + instlatx64

llvm/lib/Target/X86/X86ScheduleZnver1.td
llvm/test/tools/llvm-mca/X86/Znver1/resources-avx1.s
llvm/test/tools/llvm-mca/X86/Znver1/resources-avx2.s

index 43f9377..79beb3e 100644 (file)
@@ -338,10 +338,10 @@ defm : ZnWriteResFpuPair<WriteFTest,     [ZnFPU12], 2, [2], 1, 7, 1>;
 defm : ZnWriteResFpuPair<WriteFTestY,    [ZnFPU12], 4, [4], 3, 7, 2>;
 defm : X86WriteResPairUnsupported<WriteFTestZ>;
 defm : ZnWriteResFpuPair<WriteFShuffle,  [ZnFPU12], 1>;
-defm : ZnWriteResFpuPair<WriteFShuffleY, [ZnFPU12], 1>;
+defm : ZnWriteResFpuPair<WriteFShuffleY, [ZnFPU12], 1, [2], 2>;
 defm : X86WriteResPairUnsupported<WriteFShuffleZ>;
 defm : ZnWriteResFpuPair<WriteFVarShuffle, [ZnFPU12], 1>;
-defm : ZnWriteResFpuPair<WriteFVarShuffleY,[ZnFPU12], 1>;
+defm : ZnWriteResFpuPair<WriteFVarShuffleY,[ZnFPU12], 1, [2], 2>;
 defm : X86WriteResPairUnsupported<WriteFVarShuffleZ>;
 defm : ZnWriteResFpuPair<WriteFMul,      [ZnFPU01], 3>;
 defm : ZnWriteResFpuPair<WriteFMulX,     [ZnFPU01], 3>;
@@ -429,21 +429,21 @@ defm : ZnWriteResFpuPair<WritePMULLDY,    [ZnFPU0],  4, [4], 2>;
 defm : X86WriteResPairUnsupported<WritePMULLDZ>;
 defm : ZnWriteResFpuPair<WriteShuffle,    [ZnFPU],   1>;
 defm : ZnWriteResFpuPair<WriteShuffleX,   [ZnFPU],   1>;
-defm : ZnWriteResFpuPair<WriteShuffleY,   [ZnFPU],   1>;
+defm : ZnWriteResFpuPair<WriteShuffleY,   [ZnFPU],   1, [2], 2>;
 defm : X86WriteResPairUnsupported<WriteShuffleZ>;
 defm : ZnWriteResFpuPair<WriteVarShuffle, [ZnFPU],   1>;
 defm : ZnWriteResFpuPair<WriteVarShuffleX,[ZnFPU],   1>;
-defm : ZnWriteResFpuPair<WriteVarShuffleY,[ZnFPU],   1>;
+defm : ZnWriteResFpuPair<WriteVarShuffleY,[ZnFPU],   1, [2], 2>;
 defm : X86WriteResPairUnsupported<WriteVarShuffleZ>;
 defm : ZnWriteResFpuPair<WriteBlend,      [ZnFPU01], 1>;
-defm : ZnWriteResFpuPair<WriteBlendY,     [ZnFPU01], 1>;
+defm : ZnWriteResFpuPair<WriteBlendY,     [ZnFPU01], 1, [2], 2>;
 defm : X86WriteResPairUnsupported<WriteBlendZ>;
 defm : ZnWriteResFpuPair<WriteShuffle256, [ZnFPU],   2>;
 defm : ZnWriteResFpuPair<WriteVPMOV256,   [ZnFPU12],  1, [4], 3>;
 defm : ZnWriteResFpuPair<WriteVarShuffle256, [ZnFPU],   2>;
 defm : ZnWriteResFpuPair<WritePSADBW,     [ZnFPU0],  3>;
 defm : ZnWriteResFpuPair<WritePSADBWX,    [ZnFPU0],  3>;
-defm : ZnWriteResFpuPair<WritePSADBWY,    [ZnFPU0],  3>;
+defm : ZnWriteResFpuPair<WritePSADBWY,    [ZnFPU0],  3, [2], 2>;
 defm : X86WriteResPairUnsupported<WritePSADBWZ>;
 defm : ZnWriteResFpuPair<WritePHMINPOS,   [ZnFPU0],  4>;
 
index 47f86d0..421cbbf 100644 (file)
@@ -1270,8 +1270,8 @@ vzeroupper
 # CHECK-NEXT:  1      1     0.50           *            vmovd  %xmm0, (%rax)
 # CHECK-NEXT:  1      1     0.50                        vmovddup       %xmm0, %xmm2
 # CHECK-NEXT:  1      8     0.50    *                   vmovddup       (%rax), %xmm2
-# CHECK-NEXT:  1      1     0.50                        vmovddup       %ymm0, %ymm2
-# CHECK-NEXT:  1      8     0.50    *                   vmovddup       (%rax), %ymm2
+# CHECK-NEXT:  2      1     1.00                        vmovddup       %ymm0, %ymm2
+# CHECK-NEXT:  2      8     1.00    *                   vmovddup       (%rax), %ymm2
 # CHECK-NEXT:  1      1     0.25                        vmovdqa        %xmm0, %xmm2
 # CHECK-NEXT:  1      1     0.50           *            vmovdqa        %xmm0, (%rax)
 # CHECK-NEXT:  1      8     0.50    *                   vmovdqa        (%rax), %xmm2
@@ -1316,12 +1316,12 @@ vzeroupper
 # CHECK-NEXT:  1      8     0.50    *                   vmovsd (%rax), %xmm2
 # CHECK-NEXT:  1      1     0.50                        vmovshdup      %xmm0, %xmm2
 # CHECK-NEXT:  1      8     0.50    *                   vmovshdup      (%rax), %xmm2
-# CHECK-NEXT:  1      1     0.50                        vmovshdup      %ymm0, %ymm2
-# CHECK-NEXT:  1      8     0.50    *                   vmovshdup      (%rax), %ymm2
+# CHECK-NEXT:  2      1     1.00                        vmovshdup      %ymm0, %ymm2
+# CHECK-NEXT:  2      8     1.00    *                   vmovshdup      (%rax), %ymm2
 # CHECK-NEXT:  1      1     0.50                        vmovsldup      %xmm0, %xmm2
 # CHECK-NEXT:  1      8     0.50    *                   vmovsldup      (%rax), %xmm2
-# CHECK-NEXT:  1      1     0.50                        vmovsldup      %ymm0, %ymm2
-# CHECK-NEXT:  1      8     0.50    *                   vmovsldup      (%rax), %ymm2
+# CHECK-NEXT:  2      1     1.00                        vmovsldup      %ymm0, %ymm2
+# CHECK-NEXT:  2      8     1.00    *                   vmovsldup      (%rax), %ymm2
 # CHECK-NEXT:  1      1     0.50                        vmovss %xmm0, %xmm1, %xmm2
 # CHECK-NEXT:  1      1     0.50           *            vmovss %xmm0, (%rax)
 # CHECK-NEXT:  1      8     0.50    *                   vmovss (%rax), %xmm2
@@ -1435,18 +1435,18 @@ vzeroupper
 # CHECK-NEXT:  1      8     0.50    *                   vpermilpd      $1, (%rax), %xmm2
 # CHECK-NEXT:  1      1     0.50                        vpermilpd      %xmm0, %xmm1, %xmm2
 # CHECK-NEXT:  1      8     0.50    *                   vpermilpd      (%rax), %xmm1, %xmm2
-# CHECK-NEXT:  1      1     0.50                        vpermilpd      $1, %ymm0, %ymm2
-# CHECK-NEXT:  1      8     0.50    *                   vpermilpd      $1, (%rax), %ymm2
-# CHECK-NEXT:  1      1     0.50                        vpermilpd      %ymm0, %ymm1, %ymm2
-# CHECK-NEXT:  1      8     0.50    *                   vpermilpd      (%rax), %ymm1, %ymm2
+# CHECK-NEXT:  2      1     1.00                        vpermilpd      $1, %ymm0, %ymm2
+# CHECK-NEXT:  2      8     1.00    *                   vpermilpd      $1, (%rax), %ymm2
+# CHECK-NEXT:  2      1     1.00                        vpermilpd      %ymm0, %ymm1, %ymm2
+# CHECK-NEXT:  2      8     1.00    *                   vpermilpd      (%rax), %ymm1, %ymm2
 # CHECK-NEXT:  1      1     0.50                        vpermilps      $1, %xmm0, %xmm2
 # CHECK-NEXT:  1      8     0.50    *                   vpermilps      $1, (%rax), %xmm2
 # CHECK-NEXT:  1      1     0.50                        vpermilps      %xmm0, %xmm1, %xmm2
 # CHECK-NEXT:  1      8     0.50    *                   vpermilps      (%rax), %xmm1, %xmm2
-# CHECK-NEXT:  1      1     0.50                        vpermilps      $1, %ymm0, %ymm2
-# CHECK-NEXT:  1      8     0.50    *                   vpermilps      $1, (%rax), %ymm2
-# CHECK-NEXT:  1      1     0.50                        vpermilps      %ymm0, %ymm1, %ymm2
-# CHECK-NEXT:  1      8     0.50    *                   vpermilps      (%rax), %ymm1, %ymm2
+# CHECK-NEXT:  2      1     1.00                        vpermilps      $1, %ymm0, %ymm2
+# CHECK-NEXT:  2      8     1.00    *                   vpermilps      $1, (%rax), %ymm2
+# CHECK-NEXT:  2      1     1.00                        vpermilps      %ymm0, %ymm1, %ymm2
+# CHECK-NEXT:  2      8     1.00    *                   vpermilps      (%rax), %ymm1, %ymm2
 # CHECK-NEXT:  1      2     2.00                        vpextrb        $1, %xmm0, %ecx
 # CHECK-NEXT:  2      5     3.00           *            vpextrb        $1, %xmm0, (%rax)
 # CHECK-NEXT:  1      2     2.00                        vpextrd        $1, %xmm0, %ecx
@@ -1652,12 +1652,12 @@ vzeroupper
 # CHECK-NEXT:  1      12    0.50    *                   vrsqrtss       (%rax), %xmm1, %xmm2
 # CHECK-NEXT:  1      1     0.50                        vshufpd        $1, %xmm0, %xmm1, %xmm2
 # CHECK-NEXT:  1      8     0.50    *                   vshufpd        $1, (%rax), %xmm1, %xmm2
-# CHECK-NEXT:  1      1     0.50                        vshufpd        $1, %ymm0, %ymm1, %ymm2
-# CHECK-NEXT:  1      8     0.50    *                   vshufpd        $1, (%rax), %ymm1, %ymm2
+# CHECK-NEXT:  2      1     1.00                        vshufpd        $1, %ymm0, %ymm1, %ymm2
+# CHECK-NEXT:  2      8     1.00    *                   vshufpd        $1, (%rax), %ymm1, %ymm2
 # CHECK-NEXT:  1      1     0.50                        vshufps        $1, %xmm0, %xmm1, %xmm2
 # CHECK-NEXT:  1      8     0.50    *                   vshufps        $1, (%rax), %xmm1, %xmm2
-# CHECK-NEXT:  1      1     0.50                        vshufps        $1, %ymm0, %ymm1, %ymm2
-# CHECK-NEXT:  1      8     0.50    *                   vshufps        $1, (%rax), %ymm1, %ymm2
+# CHECK-NEXT:  2      1     1.00                        vshufps        $1, %ymm0, %ymm1, %ymm2
+# CHECK-NEXT:  2      8     1.00    *                   vshufps        $1, (%rax), %ymm1, %ymm2
 # CHECK-NEXT:  1      20    8.00                        vsqrtpd        %xmm0, %xmm2
 # CHECK-NEXT:  1      27    8.00    *                   vsqrtpd        (%rax), %xmm2
 # CHECK-NEXT:  1      20    16.00                       vsqrtpd        %ymm0, %ymm2
@@ -1697,20 +1697,20 @@ vzeroupper
 # CHECK-NEXT:  2      10    1.00    *                   vucomiss       (%rax), %xmm1
 # CHECK-NEXT:  1      1     0.50                        vunpckhpd      %xmm0, %xmm1, %xmm2
 # CHECK-NEXT:  1      8     0.50    *                   vunpckhpd      (%rax), %xmm1, %xmm2
-# CHECK-NEXT:  1      1     0.50                        vunpckhpd      %ymm0, %ymm1, %ymm2
-# CHECK-NEXT:  1      8     0.50    *                   vunpckhpd      (%rax), %ymm1, %ymm2
+# CHECK-NEXT:  2      1     1.00                        vunpckhpd      %ymm0, %ymm1, %ymm2
+# CHECK-NEXT:  2      8     1.00    *                   vunpckhpd      (%rax), %ymm1, %ymm2
 # CHECK-NEXT:  1      1     0.50                        vunpckhps      %xmm0, %xmm1, %xmm2
 # CHECK-NEXT:  1      8     0.50    *                   vunpckhps      (%rax), %xmm1, %xmm2
-# CHECK-NEXT:  1      1     0.50                        vunpckhps      %ymm0, %ymm1, %ymm2
-# CHECK-NEXT:  1      8     0.50    *                   vunpckhps      (%rax), %ymm1, %ymm2
+# CHECK-NEXT:  2      1     1.00                        vunpckhps      %ymm0, %ymm1, %ymm2
+# CHECK-NEXT:  2      8     1.00    *                   vunpckhps      (%rax), %ymm1, %ymm2
 # CHECK-NEXT:  1      1     0.50                        vunpcklpd      %xmm0, %xmm1, %xmm2
 # CHECK-NEXT:  1      8     0.50    *                   vunpcklpd      (%rax), %xmm1, %xmm2
-# CHECK-NEXT:  1      1     0.50                        vunpcklpd      %ymm0, %ymm1, %ymm2
-# CHECK-NEXT:  1      8     0.50    *                   vunpcklpd      (%rax), %ymm1, %ymm2
+# CHECK-NEXT:  2      1     1.00                        vunpcklpd      %ymm0, %ymm1, %ymm2
+# CHECK-NEXT:  2      8     1.00    *                   vunpcklpd      (%rax), %ymm1, %ymm2
 # CHECK-NEXT:  1      1     0.50                        vunpcklps      %xmm0, %xmm1, %xmm2
 # CHECK-NEXT:  1      8     0.50    *                   vunpcklps      (%rax), %xmm1, %xmm2
-# CHECK-NEXT:  1      1     0.50                        vunpcklps      %ymm0, %ymm1, %ymm2
-# CHECK-NEXT:  1      8     0.50    *                   vunpcklps      (%rax), %ymm1, %ymm2
+# CHECK-NEXT:  2      1     1.00                        vunpcklps      %ymm0, %ymm1, %ymm2
+# CHECK-NEXT:  2      8     1.00    *                   vunpcklps      (%rax), %ymm1, %ymm2
 # CHECK-NEXT:  1      1     0.25                        vxorpd %xmm0, %xmm1, %xmm2
 # CHECK-NEXT:  1      8     0.50    *                   vxorpd (%rax), %xmm1, %xmm2
 # CHECK-NEXT:  2      1     0.50                        vxorpd %ymm0, %ymm1, %ymm2
@@ -1738,7 +1738,7 @@ vzeroupper
 
 # CHECK:      Resource pressure per iteration:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    [10]   [11]
-# CHECK-NEXT: 175.00 175.00  -      -      -      -      -     154.08 192.08 217.25 321.58  -
+# CHECK-NEXT: 175.00 175.00  -      -      -      -      -     154.08 205.08 230.25 321.58  -
 
 # CHECK:      Resource pressure by instruction:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    [10]   [11]   Instructions:
@@ -1982,8 +1982,8 @@ vzeroupper
 # CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -      -      -      -      -     vmovd        %xmm0, (%rax)
 # CHECK-NEXT:  -      -      -      -      -      -      -      -     0.50   0.50    -      -     vmovddup     %xmm0, %xmm2
 # CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -     0.50   0.50    -      -     vmovddup     (%rax), %xmm2
-# CHECK-NEXT:  -      -      -      -      -      -      -      -     0.50   0.50    -      -     vmovddup     %ymm0, %ymm2
-# CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -     0.50   0.50    -      -     vmovddup     (%rax), %ymm2
+# CHECK-NEXT:  -      -      -      -      -      -      -      -     1.00   1.00    -      -     vmovddup     %ymm0, %ymm2
+# CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -     1.00   1.00    -      -     vmovddup     (%rax), %ymm2
 # CHECK-NEXT:  -      -      -      -      -      -      -     0.25   0.25   0.25   0.25    -     vmovdqa      %xmm0, %xmm2
 # CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -      -      -      -      -     vmovdqa      %xmm0, (%rax)
 # CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -      -      -      -      -     vmovdqa      (%rax), %xmm2
@@ -2028,12 +2028,12 @@ vzeroupper
 # CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -      -      -      -      -     vmovsd       (%rax), %xmm2
 # CHECK-NEXT:  -      -      -      -      -      -      -      -     0.50   0.50    -      -     vmovshdup    %xmm0, %xmm2
 # CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -     0.50   0.50    -      -     vmovshdup    (%rax), %xmm2
-# CHECK-NEXT:  -      -      -      -      -      -      -      -     0.50   0.50    -      -     vmovshdup    %ymm0, %ymm2
-# CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -     0.50   0.50    -      -     vmovshdup    (%rax), %ymm2
+# CHECK-NEXT:  -      -      -      -      -      -      -      -     1.00   1.00    -      -     vmovshdup    %ymm0, %ymm2
+# CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -     1.00   1.00    -      -     vmovshdup    (%rax), %ymm2
 # CHECK-NEXT:  -      -      -      -      -      -      -      -     0.50   0.50    -      -     vmovsldup    %xmm0, %xmm2
 # CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -     0.50   0.50    -      -     vmovsldup    (%rax), %xmm2
-# CHECK-NEXT:  -      -      -      -      -      -      -      -     0.50   0.50    -      -     vmovsldup    %ymm0, %ymm2
-# CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -     0.50   0.50    -      -     vmovsldup    (%rax), %ymm2
+# CHECK-NEXT:  -      -      -      -      -      -      -      -     1.00   1.00    -      -     vmovsldup    %ymm0, %ymm2
+# CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -     1.00   1.00    -      -     vmovsldup    (%rax), %ymm2
 # CHECK-NEXT:  -      -      -      -      -      -      -      -     0.50   0.50    -      -     vmovss       %xmm0, %xmm1, %xmm2
 # CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -      -      -      -      -     vmovss       %xmm0, (%rax)
 # CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -      -      -      -      -     vmovss       (%rax), %xmm2
@@ -2147,18 +2147,18 @@ vzeroupper
 # CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -     0.50   0.50    -      -     vpermilpd    $1, (%rax), %xmm2
 # CHECK-NEXT:  -      -      -      -      -      -      -      -     0.50   0.50    -      -     vpermilpd    %xmm0, %xmm1, %xmm2
 # CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -     0.50   0.50    -      -     vpermilpd    (%rax), %xmm1, %xmm2
-# CHECK-NEXT:  -      -      -      -      -      -      -      -     0.50   0.50    -      -     vpermilpd    $1, %ymm0, %ymm2
-# CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -     0.50   0.50    -      -     vpermilpd    $1, (%rax), %ymm2
-# CHECK-NEXT:  -      -      -      -      -      -      -      -     0.50   0.50    -      -     vpermilpd    %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -     0.50   0.50    -      -     vpermilpd    (%rax), %ymm1, %ymm2
+# CHECK-NEXT:  -      -      -      -      -      -      -      -     1.00   1.00    -      -     vpermilpd    $1, %ymm0, %ymm2
+# CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -     1.00   1.00    -      -     vpermilpd    $1, (%rax), %ymm2
+# CHECK-NEXT:  -      -      -      -      -      -      -      -     1.00   1.00    -      -     vpermilpd    %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -     1.00   1.00    -      -     vpermilpd    (%rax), %ymm1, %ymm2
 # CHECK-NEXT:  -      -      -      -      -      -      -      -     0.50   0.50    -      -     vpermilps    $1, %xmm0, %xmm2
 # CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -     0.50   0.50    -      -     vpermilps    $1, (%rax), %xmm2
 # CHECK-NEXT:  -      -      -      -      -      -      -      -     0.50   0.50    -      -     vpermilps    %xmm0, %xmm1, %xmm2
 # CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -     0.50   0.50    -      -     vpermilps    (%rax), %xmm1, %xmm2
-# CHECK-NEXT:  -      -      -      -      -      -      -      -     0.50   0.50    -      -     vpermilps    $1, %ymm0, %ymm2
-# CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -     0.50   0.50    -      -     vpermilps    $1, (%rax), %ymm2
-# CHECK-NEXT:  -      -      -      -      -      -      -      -     0.50   0.50    -      -     vpermilps    %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -     0.50   0.50    -      -     vpermilps    (%rax), %ymm1, %ymm2
+# CHECK-NEXT:  -      -      -      -      -      -      -      -     1.00   1.00    -      -     vpermilps    $1, %ymm0, %ymm2
+# CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -     1.00   1.00    -      -     vpermilps    $1, (%rax), %ymm2
+# CHECK-NEXT:  -      -      -      -      -      -      -      -     1.00   1.00    -      -     vpermilps    %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -     1.00   1.00    -      -     vpermilps    (%rax), %ymm1, %ymm2
 # CHECK-NEXT:  -      -      -      -      -      -      -      -     0.50   2.50    -      -     vpextrb      $1, %xmm0, %ecx
 # CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -     1.00   4.00    -      -     vpextrb      $1, %xmm0, (%rax)
 # CHECK-NEXT:  -      -      -      -      -      -      -      -     0.50   2.50    -      -     vpextrd      $1, %xmm0, %ecx
@@ -2364,12 +2364,12 @@ vzeroupper
 # CHECK-NEXT: 0.50   0.50    -      -      -      -      -     0.50   0.50    -      -      -     vrsqrtss     (%rax), %xmm1, %xmm2
 # CHECK-NEXT:  -      -      -      -      -      -      -      -     0.50   0.50    -      -     vshufpd      $1, %xmm0, %xmm1, %xmm2
 # CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -     0.50   0.50    -      -     vshufpd      $1, (%rax), %xmm1, %xmm2
-# CHECK-NEXT:  -      -      -      -      -      -      -      -     0.50   0.50    -      -     vshufpd      $1, %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -     0.50   0.50    -      -     vshufpd      $1, (%rax), %ymm1, %ymm2
+# CHECK-NEXT:  -      -      -      -      -      -      -      -     1.00   1.00    -      -     vshufpd      $1, %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -     1.00   1.00    -      -     vshufpd      $1, (%rax), %ymm1, %ymm2
 # CHECK-NEXT:  -      -      -      -      -      -      -      -     0.50   0.50    -      -     vshufps      $1, %xmm0, %xmm1, %xmm2
 # CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -     0.50   0.50    -      -     vshufps      $1, (%rax), %xmm1, %xmm2
-# CHECK-NEXT:  -      -      -      -      -      -      -      -     0.50   0.50    -      -     vshufps      $1, %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -     0.50   0.50    -      -     vshufps      $1, (%rax), %ymm1, %ymm2
+# CHECK-NEXT:  -      -      -      -      -      -      -      -     1.00   1.00    -      -     vshufps      $1, %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -     1.00   1.00    -      -     vshufps      $1, (%rax), %ymm1, %ymm2
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     8.00    -     vsqrtpd      %xmm0, %xmm2
 # CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -      -      -     8.00    -     vsqrtpd      (%rax), %xmm2
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     16.00   -     vsqrtpd      %ymm0, %ymm2
@@ -2409,20 +2409,20 @@ vzeroupper
 # CHECK-NEXT: 0.50   0.50    -      -      -      -      -     0.50   0.50   1.00    -      -     vucomiss     (%rax), %xmm1
 # CHECK-NEXT:  -      -      -      -      -      -      -      -     0.50   0.50    -      -     vunpckhpd    %xmm0, %xmm1, %xmm2
 # CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -     0.50   0.50    -      -     vunpckhpd    (%rax), %xmm1, %xmm2
-# CHECK-NEXT:  -      -      -      -      -      -      -      -     0.50   0.50    -      -     vunpckhpd    %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -     0.50   0.50    -      -     vunpckhpd    (%rax), %ymm1, %ymm2
+# CHECK-NEXT:  -      -      -      -      -      -      -      -     1.00   1.00    -      -     vunpckhpd    %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -     1.00   1.00    -      -     vunpckhpd    (%rax), %ymm1, %ymm2
 # CHECK-NEXT:  -      -      -      -      -      -      -      -     0.50   0.50    -      -     vunpckhps    %xmm0, %xmm1, %xmm2
 # CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -     0.50   0.50    -      -     vunpckhps    (%rax), %xmm1, %xmm2
-# CHECK-NEXT:  -      -      -      -      -      -      -      -     0.50   0.50    -      -     vunpckhps    %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -     0.50   0.50    -      -     vunpckhps    (%rax), %ymm1, %ymm2
+# CHECK-NEXT:  -      -      -      -      -      -      -      -     1.00   1.00    -      -     vunpckhps    %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -     1.00   1.00    -      -     vunpckhps    (%rax), %ymm1, %ymm2
 # CHECK-NEXT:  -      -      -      -      -      -      -      -     0.50   0.50    -      -     vunpcklpd    %xmm0, %xmm1, %xmm2
 # CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -     0.50   0.50    -      -     vunpcklpd    (%rax), %xmm1, %xmm2
-# CHECK-NEXT:  -      -      -      -      -      -      -      -     0.50   0.50    -      -     vunpcklpd    %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -     0.50   0.50    -      -     vunpcklpd    (%rax), %ymm1, %ymm2
+# CHECK-NEXT:  -      -      -      -      -      -      -      -     1.00   1.00    -      -     vunpcklpd    %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -     1.00   1.00    -      -     vunpcklpd    (%rax), %ymm1, %ymm2
 # CHECK-NEXT:  -      -      -      -      -      -      -      -     0.50   0.50    -      -     vunpcklps    %xmm0, %xmm1, %xmm2
 # CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -     0.50   0.50    -      -     vunpcklps    (%rax), %xmm1, %xmm2
-# CHECK-NEXT:  -      -      -      -      -      -      -      -     0.50   0.50    -      -     vunpcklps    %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -     0.50   0.50    -      -     vunpcklps    (%rax), %ymm1, %ymm2
+# CHECK-NEXT:  -      -      -      -      -      -      -      -     1.00   1.00    -      -     vunpcklps    %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -     1.00   1.00    -      -     vunpcklps    (%rax), %ymm1, %ymm2
 # CHECK-NEXT:  -      -      -      -      -      -      -     0.25   0.25   0.25   0.25    -     vxorpd       %xmm0, %xmm1, %xmm2
 # CHECK-NEXT: 0.50   0.50    -      -      -      -      -     0.25   0.25   0.25   0.25    -     vxorpd       (%rax), %xmm1, %xmm2
 # CHECK-NEXT:  -      -      -      -      -      -      -     0.50   0.50   0.50   0.50    -     vxorpd       %ymm0, %ymm1, %ymm2
index ffd0102..0648c31 100644 (file)
@@ -484,14 +484,14 @@ vpxor           (%rax), %ymm1, %ymm2
 # CHECK-NEXT:  2      8     0.50    *                   vpabsd (%rax), %ymm2
 # CHECK-NEXT:  2      1     0.50                        vpabsw %ymm0, %ymm2
 # CHECK-NEXT:  2      8     0.50    *                   vpabsw (%rax), %ymm2
-# CHECK-NEXT:  1      1     0.25                        vpackssdw      %ymm0, %ymm1, %ymm2
-# CHECK-NEXT:  1      8     0.50    *                   vpackssdw      (%rax), %ymm1, %ymm2
-# CHECK-NEXT:  1      1     0.25                        vpacksswb      %ymm0, %ymm1, %ymm2
-# CHECK-NEXT:  1      8     0.50    *                   vpacksswb      (%rax), %ymm1, %ymm2
-# CHECK-NEXT:  1      1     0.25                        vpackusdw      %ymm0, %ymm1, %ymm2
-# CHECK-NEXT:  1      8     0.50    *                   vpackusdw      (%rax), %ymm1, %ymm2
-# CHECK-NEXT:  1      1     0.25                        vpackuswb      %ymm0, %ymm1, %ymm2
-# CHECK-NEXT:  1      8     0.50    *                   vpackuswb      (%rax), %ymm1, %ymm2
+# CHECK-NEXT:  2      1     0.50                        vpackssdw      %ymm0, %ymm1, %ymm2
+# CHECK-NEXT:  2      8     0.50    *                   vpackssdw      (%rax), %ymm1, %ymm2
+# CHECK-NEXT:  2      1     0.50                        vpacksswb      %ymm0, %ymm1, %ymm2
+# CHECK-NEXT:  2      8     0.50    *                   vpacksswb      (%rax), %ymm1, %ymm2
+# CHECK-NEXT:  2      1     0.50                        vpackusdw      %ymm0, %ymm1, %ymm2
+# CHECK-NEXT:  2      8     0.50    *                   vpackusdw      (%rax), %ymm1, %ymm2
+# CHECK-NEXT:  2      1     0.50                        vpackuswb      %ymm0, %ymm1, %ymm2
+# CHECK-NEXT:  2      8     0.50    *                   vpackuswb      (%rax), %ymm1, %ymm2
 # CHECK-NEXT:  2      1     0.50                        vpaddb %ymm0, %ymm1, %ymm2
 # CHECK-NEXT:  2      8     0.50    *                   vpaddb (%rax), %ymm1, %ymm2
 # CHECK-NEXT:  2      1     0.50                        vpaddd %ymm0, %ymm1, %ymm2
@@ -508,8 +508,8 @@ vpxor           (%rax), %ymm1, %ymm2
 # CHECK-NEXT:  2      8     0.50    *                   vpaddusw       (%rax), %ymm1, %ymm2
 # CHECK-NEXT:  2      1     0.50                        vpaddw %ymm0, %ymm1, %ymm2
 # CHECK-NEXT:  2      8     0.50    *                   vpaddw (%rax), %ymm1, %ymm2
-# CHECK-NEXT:  1      1     0.25                        vpalignr       $1, %ymm0, %ymm1, %ymm2
-# CHECK-NEXT:  1      8     0.50    *                   vpalignr       $1, (%rax), %ymm1, %ymm2
+# CHECK-NEXT:  2      1     0.50                        vpalignr       $1, %ymm0, %ymm1, %ymm2
+# CHECK-NEXT:  2      8     0.50    *                   vpalignr       $1, (%rax), %ymm1, %ymm2
 # CHECK-NEXT:  2      1     0.50                        vpand  %ymm0, %ymm1, %ymm2
 # CHECK-NEXT:  2      8     0.50    *                   vpand  (%rax), %ymm1, %ymm2
 # CHECK-NEXT:  2      1     0.50                        vpandn %ymm0, %ymm1, %ymm2
@@ -665,16 +665,16 @@ vpxor           (%rax), %ymm1, %ymm2
 # CHECK-NEXT:  2      11    2.00    *                   vpmuludq       (%rax), %ymm1, %ymm2
 # CHECK-NEXT:  2      1     0.50                        vpor   %ymm0, %ymm1, %ymm2
 # CHECK-NEXT:  2      8     0.50    *                   vpor   (%rax), %ymm1, %ymm2
-# CHECK-NEXT:  1      3     1.00                        vpsadbw        %ymm0, %ymm1, %ymm2
-# CHECK-NEXT:  1      10    1.00    *                   vpsadbw        (%rax), %ymm1, %ymm2
-# CHECK-NEXT:  1      1     0.25                        vpshufb        %ymm0, %ymm1, %ymm2
-# CHECK-NEXT:  1      8     0.50    *                   vpshufb        (%rax), %ymm1, %ymm2
-# CHECK-NEXT:  1      1     0.25                        vpshufd        $1, %ymm0, %ymm2
-# CHECK-NEXT:  1      8     0.50    *                   vpshufd        $1, (%rax), %ymm2
-# CHECK-NEXT:  1      1     0.25                        vpshufhw       $1, %ymm0, %ymm2
-# CHECK-NEXT:  1      8     0.50    *                   vpshufhw       $1, (%rax), %ymm2
-# CHECK-NEXT:  1      1     0.25                        vpshuflw       $1, %ymm0, %ymm2
-# CHECK-NEXT:  1      8     0.50    *                   vpshuflw       $1, (%rax), %ymm2
+# CHECK-NEXT:  2      3     2.00                        vpsadbw        %ymm0, %ymm1, %ymm2
+# CHECK-NEXT:  2      10    2.00    *                   vpsadbw        (%rax), %ymm1, %ymm2
+# CHECK-NEXT:  2      1     0.50                        vpshufb        %ymm0, %ymm1, %ymm2
+# CHECK-NEXT:  2      8     0.50    *                   vpshufb        (%rax), %ymm1, %ymm2
+# CHECK-NEXT:  2      1     0.50                        vpshufd        $1, %ymm0, %ymm2
+# CHECK-NEXT:  2      8     0.50    *                   vpshufd        $1, (%rax), %ymm2
+# CHECK-NEXT:  2      1     0.50                        vpshufhw       $1, %ymm0, %ymm2
+# CHECK-NEXT:  2      8     0.50    *                   vpshufhw       $1, (%rax), %ymm2
+# CHECK-NEXT:  2      1     0.50                        vpshuflw       $1, %ymm0, %ymm2
+# CHECK-NEXT:  2      8     0.50    *                   vpshuflw       $1, (%rax), %ymm2
 # CHECK-NEXT:  2      1     0.50                        vpsignb        %ymm0, %ymm1, %ymm2
 # CHECK-NEXT:  2      8     0.50    *                   vpsignb        (%rax), %ymm1, %ymm2
 # CHECK-NEXT:  2      1     0.50                        vpsignd        %ymm0, %ymm1, %ymm2
@@ -684,7 +684,7 @@ vpxor           (%rax), %ymm1, %ymm2
 # CHECK-NEXT:  2      1     2.00                        vpslld $1, %ymm0, %ymm2
 # CHECK-NEXT:  2      1     2.00                        vpslld %xmm0, %ymm1, %ymm2
 # CHECK-NEXT:  2      8     2.00    *                   vpslld (%rax), %ymm1, %ymm2
-# CHECK-NEXT:  1      1     0.25                        vpslldq        $1, %ymm1, %ymm2
+# CHECK-NEXT:  2      1     0.50                        vpslldq        $1, %ymm1, %ymm2
 # CHECK-NEXT:  2      1     2.00                        vpsllq $1, %ymm0, %ymm2
 # CHECK-NEXT:  2      1     2.00                        vpsllq %xmm0, %ymm1, %ymm2
 # CHECK-NEXT:  2      8     2.00    *                   vpsllq (%rax), %ymm1, %ymm2
@@ -712,7 +712,7 @@ vpxor           (%rax), %ymm1, %ymm2
 # CHECK-NEXT:  2      1     2.00                        vpsrld $1, %ymm0, %ymm2
 # CHECK-NEXT:  2      1     2.00                        vpsrld %xmm0, %ymm1, %ymm2
 # CHECK-NEXT:  2      8     2.00    *                   vpsrld (%rax), %ymm1, %ymm2
-# CHECK-NEXT:  1      1     0.25                        vpsrldq        $1, %ymm1, %ymm2
+# CHECK-NEXT:  2      1     0.50                        vpsrldq        $1, %ymm1, %ymm2
 # CHECK-NEXT:  2      1     2.00                        vpsrlq $1, %ymm0, %ymm2
 # CHECK-NEXT:  2      1     2.00                        vpsrlq %xmm0, %ymm1, %ymm2
 # CHECK-NEXT:  2      8     2.00    *                   vpsrlq (%rax), %ymm1, %ymm2
@@ -743,22 +743,22 @@ vpxor           (%rax), %ymm1, %ymm2
 # CHECK-NEXT:  2      8     0.50    *                   vpsubusw       (%rax), %ymm1, %ymm2
 # CHECK-NEXT:  2      1     0.50                        vpsubw %ymm0, %ymm1, %ymm2
 # CHECK-NEXT:  2      8     0.50    *                   vpsubw (%rax), %ymm1, %ymm2
-# CHECK-NEXT:  1      1     0.25                        vpunpckhbw     %ymm0, %ymm1, %ymm2
-# CHECK-NEXT:  1      8     0.50    *                   vpunpckhbw     (%rax), %ymm1, %ymm2
-# CHECK-NEXT:  1      1     0.25                        vpunpckhdq     %ymm0, %ymm1, %ymm2
-# CHECK-NEXT:  1      8     0.50    *                   vpunpckhdq     (%rax), %ymm1, %ymm2
-# CHECK-NEXT:  1      1     0.25                        vpunpckhqdq    %ymm0, %ymm1, %ymm2
-# CHECK-NEXT:  1      8     0.50    *                   vpunpckhqdq    (%rax), %ymm1, %ymm2
-# CHECK-NEXT:  1      1     0.25                        vpunpckhwd     %ymm0, %ymm1, %ymm2
-# CHECK-NEXT:  1      8     0.50    *                   vpunpckhwd     (%rax), %ymm1, %ymm2
-# CHECK-NEXT:  1      1     0.25                        vpunpcklbw     %ymm0, %ymm1, %ymm2
-# CHECK-NEXT:  1      8     0.50    *                   vpunpcklbw     (%rax), %ymm1, %ymm2
-# CHECK-NEXT:  1      1     0.25                        vpunpckldq     %ymm0, %ymm1, %ymm2
-# CHECK-NEXT:  1      8     0.50    *                   vpunpckldq     (%rax), %ymm1, %ymm2
-# CHECK-NEXT:  1      1     0.25                        vpunpcklqdq    %ymm0, %ymm1, %ymm2
-# CHECK-NEXT:  1      8     0.50    *                   vpunpcklqdq    (%rax), %ymm1, %ymm2
-# CHECK-NEXT:  1      1     0.25                        vpunpcklwd     %ymm0, %ymm1, %ymm2
-# CHECK-NEXT:  1      8     0.50    *                   vpunpcklwd     (%rax), %ymm1, %ymm2
+# CHECK-NEXT:  2      1     0.50                        vpunpckhbw     %ymm0, %ymm1, %ymm2
+# CHECK-NEXT:  2      8     0.50    *                   vpunpckhbw     (%rax), %ymm1, %ymm2
+# CHECK-NEXT:  2      1     0.50                        vpunpckhdq     %ymm0, %ymm1, %ymm2
+# CHECK-NEXT:  2      8     0.50    *                   vpunpckhdq     (%rax), %ymm1, %ymm2
+# CHECK-NEXT:  2      1     0.50                        vpunpckhqdq    %ymm0, %ymm1, %ymm2
+# CHECK-NEXT:  2      8     0.50    *                   vpunpckhqdq    (%rax), %ymm1, %ymm2
+# CHECK-NEXT:  2      1     0.50                        vpunpckhwd     %ymm0, %ymm1, %ymm2
+# CHECK-NEXT:  2      8     0.50    *                   vpunpckhwd     (%rax), %ymm1, %ymm2
+# CHECK-NEXT:  2      1     0.50                        vpunpcklbw     %ymm0, %ymm1, %ymm2
+# CHECK-NEXT:  2      8     0.50    *                   vpunpcklbw     (%rax), %ymm1, %ymm2
+# CHECK-NEXT:  2      1     0.50                        vpunpckldq     %ymm0, %ymm1, %ymm2
+# CHECK-NEXT:  2      8     0.50    *                   vpunpckldq     (%rax), %ymm1, %ymm2
+# CHECK-NEXT:  2      1     0.50                        vpunpcklqdq    %ymm0, %ymm1, %ymm2
+# CHECK-NEXT:  2      8     0.50    *                   vpunpcklqdq    (%rax), %ymm1, %ymm2
+# CHECK-NEXT:  2      1     0.50                        vpunpcklwd     %ymm0, %ymm1, %ymm2
+# CHECK-NEXT:  2      8     0.50    *                   vpunpcklwd     (%rax), %ymm1, %ymm2
 # CHECK-NEXT:  2      1     0.50                        vpxor  %ymm0, %ymm1, %ymm2
 # CHECK-NEXT:  2      8     0.50    *                   vpxor  (%rax), %ymm1, %ymm2
 
@@ -778,7 +778,7 @@ vpxor           (%rax), %ymm1, %ymm2
 
 # CHECK:      Resource pressure per iteration:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    [10]   [11]
-# CHECK-NEXT: 67.00  67.00   -      -      -      -      -     117.17 183.67 163.00 65.17   -
+# CHECK-NEXT: 67.00  67.00   -      -      -      -      -     128.17 192.67 172.00 74.17   -
 
 # CHECK:      Resource pressure by instruction:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    [10]   [11]   Instructions:
@@ -806,14 +806,14 @@ vpxor           (%rax), %ymm1, %ymm2
 # CHECK-NEXT: 0.50   0.50    -      -      -      -      -     0.50   0.50   0.50   0.50    -     vpabsd       (%rax), %ymm2
 # CHECK-NEXT:  -      -      -      -      -      -      -     0.50   0.50   0.50   0.50    -     vpabsw       %ymm0, %ymm2
 # CHECK-NEXT: 0.50   0.50    -      -      -      -      -     0.50   0.50   0.50   0.50    -     vpabsw       (%rax), %ymm2
-# CHECK-NEXT:  -      -      -      -      -      -      -     0.25   0.25   0.25   0.25    -     vpackssdw    %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: 0.50   0.50    -      -      -      -      -     0.25   0.25   0.25   0.25    -     vpackssdw    (%rax), %ymm1, %ymm2
-# CHECK-NEXT:  -      -      -      -      -      -      -     0.25   0.25   0.25   0.25    -     vpacksswb    %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: 0.50   0.50    -      -      -      -      -     0.25   0.25   0.25   0.25    -     vpacksswb    (%rax), %ymm1, %ymm2
-# CHECK-NEXT:  -      -      -      -      -      -      -     0.25   0.25   0.25   0.25    -     vpackusdw    %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: 0.50   0.50    -      -      -      -      -     0.25   0.25   0.25   0.25    -     vpackusdw    (%rax), %ymm1, %ymm2
-# CHECK-NEXT:  -      -      -      -      -      -      -     0.25   0.25   0.25   0.25    -     vpackuswb    %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: 0.50   0.50    -      -      -      -      -     0.25   0.25   0.25   0.25    -     vpackuswb    (%rax), %ymm1, %ymm2
+# CHECK-NEXT:  -      -      -      -      -      -      -     0.50   0.50   0.50   0.50    -     vpackssdw    %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 0.50   0.50    -      -      -      -      -     0.50   0.50   0.50   0.50    -     vpackssdw    (%rax), %ymm1, %ymm2
+# CHECK-NEXT:  -      -      -      -      -      -      -     0.50   0.50   0.50   0.50    -     vpacksswb    %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 0.50   0.50    -      -      -      -      -     0.50   0.50   0.50   0.50    -     vpacksswb    (%rax), %ymm1, %ymm2
+# CHECK-NEXT:  -      -      -      -      -      -      -     0.50   0.50   0.50   0.50    -     vpackusdw    %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 0.50   0.50    -      -      -      -      -     0.50   0.50   0.50   0.50    -     vpackusdw    (%rax), %ymm1, %ymm2
+# CHECK-NEXT:  -      -      -      -      -      -      -     0.50   0.50   0.50   0.50    -     vpackuswb    %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 0.50   0.50    -      -      -      -      -     0.50   0.50   0.50   0.50    -     vpackuswb    (%rax), %ymm1, %ymm2
 # CHECK-NEXT:  -      -      -      -      -      -      -     0.50   0.50   0.50   0.50    -     vpaddb       %ymm0, %ymm1, %ymm2
 # CHECK-NEXT: 0.50   0.50    -      -      -      -      -     0.50   0.50   0.50   0.50    -     vpaddb       (%rax), %ymm1, %ymm2
 # CHECK-NEXT:  -      -      -      -      -      -      -     0.50   0.50   0.50   0.50    -     vpaddd       %ymm0, %ymm1, %ymm2
@@ -830,8 +830,8 @@ vpxor           (%rax), %ymm1, %ymm2
 # CHECK-NEXT: 0.50   0.50    -      -      -      -      -     0.50   0.50   0.50   0.50    -     vpaddusw     (%rax), %ymm1, %ymm2
 # CHECK-NEXT:  -      -      -      -      -      -      -     0.50   0.50   0.50   0.50    -     vpaddw       %ymm0, %ymm1, %ymm2
 # CHECK-NEXT: 0.50   0.50    -      -      -      -      -     0.50   0.50   0.50   0.50    -     vpaddw       (%rax), %ymm1, %ymm2
-# CHECK-NEXT:  -      -      -      -      -      -      -     0.25   0.25   0.25   0.25    -     vpalignr     $1, %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: 0.50   0.50    -      -      -      -      -     0.25   0.25   0.25   0.25    -     vpalignr     $1, (%rax), %ymm1, %ymm2
+# CHECK-NEXT:  -      -      -      -      -      -      -     0.50   0.50   0.50   0.50    -     vpalignr     $1, %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 0.50   0.50    -      -      -      -      -     0.50   0.50   0.50   0.50    -     vpalignr     $1, (%rax), %ymm1, %ymm2
 # CHECK-NEXT:  -      -      -      -      -      -      -     0.50   0.50   0.50   0.50    -     vpand        %ymm0, %ymm1, %ymm2
 # CHECK-NEXT: 0.50   0.50    -      -      -      -      -     0.50   0.50   0.50   0.50    -     vpand        (%rax), %ymm1, %ymm2
 # CHECK-NEXT:  -      -      -      -      -      -      -     0.50   0.50   0.50   0.50    -     vpandn       %ymm0, %ymm1, %ymm2
@@ -987,16 +987,16 @@ vpxor           (%rax), %ymm1, %ymm2
 # CHECK-NEXT: 0.50   0.50    -      -      -      -      -     2.00    -      -      -      -     vpmuludq     (%rax), %ymm1, %ymm2
 # CHECK-NEXT:  -      -      -      -      -      -      -     0.50   0.50   0.50   0.50    -     vpor %ymm0, %ymm1, %ymm2
 # CHECK-NEXT: 0.50   0.50    -      -      -      -      -     0.50   0.50   0.50   0.50    -     vpor (%rax), %ymm1, %ymm2
-# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -      -      -     vpsadbw      %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: 0.50   0.50    -      -      -      -      -     1.00    -      -      -      -     vpsadbw      (%rax), %ymm1, %ymm2
-# CHECK-NEXT:  -      -      -      -      -      -      -     0.25   0.25   0.25   0.25    -     vpshufb      %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: 0.50   0.50    -      -      -      -      -     0.25   0.25   0.25   0.25    -     vpshufb      (%rax), %ymm1, %ymm2
-# CHECK-NEXT:  -      -      -      -      -      -      -     0.25   0.25   0.25   0.25    -     vpshufd      $1, %ymm0, %ymm2
-# CHECK-NEXT: 0.50   0.50    -      -      -      -      -     0.25   0.25   0.25   0.25    -     vpshufd      $1, (%rax), %ymm2
-# CHECK-NEXT:  -      -      -      -      -      -      -     0.25   0.25   0.25   0.25    -     vpshufhw     $1, %ymm0, %ymm2
-# CHECK-NEXT: 0.50   0.50    -      -      -      -      -     0.25   0.25   0.25   0.25    -     vpshufhw     $1, (%rax), %ymm2
-# CHECK-NEXT:  -      -      -      -      -      -      -     0.25   0.25   0.25   0.25    -     vpshuflw     $1, %ymm0, %ymm2
-# CHECK-NEXT: 0.50   0.50    -      -      -      -      -     0.25   0.25   0.25   0.25    -     vpshuflw     $1, (%rax), %ymm2
+# CHECK-NEXT:  -      -      -      -      -      -      -     2.00    -      -      -      -     vpsadbw      %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 0.50   0.50    -      -      -      -      -     2.00    -      -      -      -     vpsadbw      (%rax), %ymm1, %ymm2
+# CHECK-NEXT:  -      -      -      -      -      -      -     0.50   0.50   0.50   0.50    -     vpshufb      %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 0.50   0.50    -      -      -      -      -     0.50   0.50   0.50   0.50    -     vpshufb      (%rax), %ymm1, %ymm2
+# CHECK-NEXT:  -      -      -      -      -      -      -     0.50   0.50   0.50   0.50    -     vpshufd      $1, %ymm0, %ymm2
+# CHECK-NEXT: 0.50   0.50    -      -      -      -      -     0.50   0.50   0.50   0.50    -     vpshufd      $1, (%rax), %ymm2
+# CHECK-NEXT:  -      -      -      -      -      -      -     0.50   0.50   0.50   0.50    -     vpshufhw     $1, %ymm0, %ymm2
+# CHECK-NEXT: 0.50   0.50    -      -      -      -      -     0.50   0.50   0.50   0.50    -     vpshufhw     $1, (%rax), %ymm2
+# CHECK-NEXT:  -      -      -      -      -      -      -     0.50   0.50   0.50   0.50    -     vpshuflw     $1, %ymm0, %ymm2
+# CHECK-NEXT: 0.50   0.50    -      -      -      -      -     0.50   0.50   0.50   0.50    -     vpshuflw     $1, (%rax), %ymm2
 # CHECK-NEXT:  -      -      -      -      -      -      -     0.50   0.50   0.50   0.50    -     vpsignb      %ymm0, %ymm1, %ymm2
 # CHECK-NEXT: 0.50   0.50    -      -      -      -      -     0.50   0.50   0.50   0.50    -     vpsignb      (%rax), %ymm1, %ymm2
 # CHECK-NEXT:  -      -      -      -      -      -      -     0.50   0.50   0.50   0.50    -     vpsignd      %ymm0, %ymm1, %ymm2
@@ -1006,7 +1006,7 @@ vpxor           (%rax), %ymm1, %ymm2
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -     2.00    -      -     vpslld       $1, %ymm0, %ymm2
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -     2.00    -      -     vpslld       %xmm0, %ymm1, %ymm2
 # CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -      -     2.00    -      -     vpslld       (%rax), %ymm1, %ymm2
-# CHECK-NEXT:  -      -      -      -      -      -      -     0.25   0.25   0.25   0.25    -     vpslldq      $1, %ymm1, %ymm2
+# CHECK-NEXT:  -      -      -      -      -      -      -     0.50   0.50   0.50   0.50    -     vpslldq      $1, %ymm1, %ymm2
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -     2.00    -      -     vpsllq       $1, %ymm0, %ymm2
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -     2.00    -      -     vpsllq       %xmm0, %ymm1, %ymm2
 # CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -      -     2.00    -      -     vpsllq       (%rax), %ymm1, %ymm2
@@ -1034,7 +1034,7 @@ vpxor           (%rax), %ymm1, %ymm2
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -     2.00    -      -     vpsrld       $1, %ymm0, %ymm2
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -     2.00    -      -     vpsrld       %xmm0, %ymm1, %ymm2
 # CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -      -     2.00    -      -     vpsrld       (%rax), %ymm1, %ymm2
-# CHECK-NEXT:  -      -      -      -      -      -      -     0.25   0.25   0.25   0.25    -     vpsrldq      $1, %ymm1, %ymm2
+# CHECK-NEXT:  -      -      -      -      -      -      -     0.50   0.50   0.50   0.50    -     vpsrldq      $1, %ymm1, %ymm2
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -     2.00    -      -     vpsrlq       $1, %ymm0, %ymm2
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -     2.00    -      -     vpsrlq       %xmm0, %ymm1, %ymm2
 # CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -      -     2.00    -      -     vpsrlq       (%rax), %ymm1, %ymm2
@@ -1065,21 +1065,21 @@ vpxor           (%rax), %ymm1, %ymm2
 # CHECK-NEXT: 0.50   0.50    -      -      -      -      -     0.50   0.50   0.50   0.50    -     vpsubusw     (%rax), %ymm1, %ymm2
 # CHECK-NEXT:  -      -      -      -      -      -      -     0.50   0.50   0.50   0.50    -     vpsubw       %ymm0, %ymm1, %ymm2
 # CHECK-NEXT: 0.50   0.50    -      -      -      -      -     0.50   0.50   0.50   0.50    -     vpsubw       (%rax), %ymm1, %ymm2
-# CHECK-NEXT:  -      -      -      -      -      -      -     0.25   0.25   0.25   0.25    -     vpunpckhbw   %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: 0.50   0.50    -      -      -      -      -     0.25   0.25   0.25   0.25    -     vpunpckhbw   (%rax), %ymm1, %ymm2
-# CHECK-NEXT:  -      -      -      -      -      -      -     0.25   0.25   0.25   0.25    -     vpunpckhdq   %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: 0.50   0.50    -      -      -      -      -     0.25   0.25   0.25   0.25    -     vpunpckhdq   (%rax), %ymm1, %ymm2
-# CHECK-NEXT:  -      -      -      -      -      -      -     0.25   0.25   0.25   0.25    -     vpunpckhqdq  %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: 0.50   0.50    -      -      -      -      -     0.25   0.25   0.25   0.25    -     vpunpckhqdq  (%rax), %ymm1, %ymm2
-# CHECK-NEXT:  -      -      -      -      -      -      -     0.25   0.25   0.25   0.25    -     vpunpckhwd   %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: 0.50   0.50    -      -      -      -      -     0.25   0.25   0.25   0.25    -     vpunpckhwd   (%rax), %ymm1, %ymm2
-# CHECK-NEXT:  -      -      -      -      -      -      -     0.25   0.25   0.25   0.25    -     vpunpcklbw   %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: 0.50   0.50    -      -      -      -      -     0.25   0.25   0.25   0.25    -     vpunpcklbw   (%rax), %ymm1, %ymm2
-# CHECK-NEXT:  -      -      -      -      -      -      -     0.25   0.25   0.25   0.25    -     vpunpckldq   %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: 0.50   0.50    -      -      -      -      -     0.25   0.25   0.25   0.25    -     vpunpckldq   (%rax), %ymm1, %ymm2
-# CHECK-NEXT:  -      -      -      -      -      -      -     0.25   0.25   0.25   0.25    -     vpunpcklqdq  %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: 0.50   0.50    -      -      -      -      -     0.25   0.25   0.25   0.25    -     vpunpcklqdq  (%rax), %ymm1, %ymm2
-# CHECK-NEXT:  -      -      -      -      -      -      -     0.25   0.25   0.25   0.25    -     vpunpcklwd   %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: 0.50   0.50    -      -      -      -      -     0.25   0.25   0.25   0.25    -     vpunpcklwd   (%rax), %ymm1, %ymm2
+# CHECK-NEXT:  -      -      -      -      -      -      -     0.50   0.50   0.50   0.50    -     vpunpckhbw   %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 0.50   0.50    -      -      -      -      -     0.50   0.50   0.50   0.50    -     vpunpckhbw   (%rax), %ymm1, %ymm2
+# CHECK-NEXT:  -      -      -      -      -      -      -     0.50   0.50   0.50   0.50    -     vpunpckhdq   %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 0.50   0.50    -      -      -      -      -     0.50   0.50   0.50   0.50    -     vpunpckhdq   (%rax), %ymm1, %ymm2
+# CHECK-NEXT:  -      -      -      -      -      -      -     0.50   0.50   0.50   0.50    -     vpunpckhqdq  %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 0.50   0.50    -      -      -      -      -     0.50   0.50   0.50   0.50    -     vpunpckhqdq  (%rax), %ymm1, %ymm2
+# CHECK-NEXT:  -      -      -      -      -      -      -     0.50   0.50   0.50   0.50    -     vpunpckhwd   %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 0.50   0.50    -      -      -      -      -     0.50   0.50   0.50   0.50    -     vpunpckhwd   (%rax), %ymm1, %ymm2
+# CHECK-NEXT:  -      -      -      -      -      -      -     0.50   0.50   0.50   0.50    -     vpunpcklbw   %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 0.50   0.50    -      -      -      -      -     0.50   0.50   0.50   0.50    -     vpunpcklbw   (%rax), %ymm1, %ymm2
+# CHECK-NEXT:  -      -      -      -      -      -      -     0.50   0.50   0.50   0.50    -     vpunpckldq   %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 0.50   0.50    -      -      -      -      -     0.50   0.50   0.50   0.50    -     vpunpckldq   (%rax), %ymm1, %ymm2
+# CHECK-NEXT:  -      -      -      -      -      -      -     0.50   0.50   0.50   0.50    -     vpunpcklqdq  %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 0.50   0.50    -      -      -      -      -     0.50   0.50   0.50   0.50    -     vpunpcklqdq  (%rax), %ymm1, %ymm2
+# CHECK-NEXT:  -      -      -      -      -      -      -     0.50   0.50   0.50   0.50    -     vpunpcklwd   %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 0.50   0.50    -      -      -      -      -     0.50   0.50   0.50   0.50    -     vpunpcklwd   (%rax), %ymm1, %ymm2
 # CHECK-NEXT:  -      -      -      -      -      -      -     0.50   0.50   0.50   0.50    -     vpxor        %ymm0, %ymm1, %ymm2
 # CHECK-NEXT: 0.50   0.50    -      -      -      -      -     0.50   0.50   0.50   0.50    -     vpxor        (%rax), %ymm1, %ymm2