[MCA][X86] Add AVX512 movddup/movshdup/movsldup instruction test coverage
authorSimon Pilgrim <llvm-dev@redking.me.uk>
Mon, 13 Dec 2021 18:01:19 +0000 (18:01 +0000)
committerSimon Pilgrim <llvm-dev@redking.me.uk>
Mon, 13 Dec 2021 18:04:56 +0000 (18:04 +0000)
As noted on D115547

llvm/test/tools/llvm-mca/X86/Generic/resources-avx512.s
llvm/test/tools/llvm-mca/X86/Generic/resources-avx512vl.s
llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-avx512.s
llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-avx512vl.s
llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx512.s
llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx512vl.s

index e91da22..992fd9e 100644 (file)
@@ -196,6 +196,27 @@ vminps            %zmm16, %zmm17, %zmm19 {z}{k1}
 vminps            (%rax), %zmm17, %zmm19 {z}{k1}
 vminps            (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
 
+vmovddup          %zmm16, %zmm19
+vmovddup          (%rax), %zmm19
+vmovddup          %zmm16, %zmm19 {k1}
+vmovddup          (%rax), %zmm19 {k1}
+vmovddup          %zmm16, %zmm19 {z}{k1}
+vmovddup          (%rax), %zmm19 {z}{k1}
+
+vmovshdup         %zmm16, %zmm19
+vmovshdup         (%rax), %zmm19
+vmovshdup         %zmm16, %zmm19 {k1}
+vmovshdup         (%rax), %zmm19 {k1}
+vmovshdup         %zmm16, %zmm19 {z}{k1}
+vmovshdup         (%rax), %zmm19 {z}{k1}
+
+vmovsldup         %zmm16, %zmm19
+vmovsldup         (%rax), %zmm19
+vmovsldup         %zmm16, %zmm19 {k1}
+vmovsldup         (%rax), %zmm19 {k1}
+vmovsldup         %zmm16, %zmm19 {z}{k1}
+vmovsldup         (%rax), %zmm19 {z}{k1}
+
 vmulpd            %zmm16, %zmm17, %zmm19
 vmulpd            (%rax), %zmm17, %zmm19
 vmulpd            (%rax){1to8}, %zmm17, %zmm19
@@ -947,6 +968,24 @@ vunpcklps         (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
 # CHECK-NEXT:  1      3     1.00                        vminps %zmm16, %zmm17, %zmm19 {%k1} {z}
 # CHECK-NEXT:  2      10    1.00    *                   vminps (%rax), %zmm17, %zmm19 {%k1} {z}
 # CHECK-NEXT:  2      10    1.00    *                   vminps (%rax){1to16}, %zmm17, %zmm19 {%k1} {z}
+# CHECK-NEXT:  1      1     1.00                        vmovddup       %zmm16, %zmm19
+# CHECK-NEXT:  2      8     1.00    *                   vmovddup       (%rax), %zmm19
+# CHECK-NEXT:  1      1     1.00                        vmovddup       %zmm16, %zmm19 {%k1}
+# CHECK-NEXT:  2      8     1.00    *                   vmovddup       (%rax), %zmm19 {%k1}
+# CHECK-NEXT:  1      1     1.00                        vmovddup       %zmm16, %zmm19 {%k1} {z}
+# CHECK-NEXT:  2      8     1.00    *                   vmovddup       (%rax), %zmm19 {%k1} {z}
+# CHECK-NEXT:  1      1     1.00                        vmovshdup      %zmm16, %zmm19
+# CHECK-NEXT:  2      8     1.00    *                   vmovshdup      (%rax), %zmm19
+# CHECK-NEXT:  1      1     1.00                        vmovshdup      %zmm16, %zmm19 {%k1}
+# CHECK-NEXT:  2      8     1.00    *                   vmovshdup      (%rax), %zmm19 {%k1}
+# CHECK-NEXT:  1      1     1.00                        vmovshdup      %zmm16, %zmm19 {%k1} {z}
+# CHECK-NEXT:  2      8     1.00    *                   vmovshdup      (%rax), %zmm19 {%k1} {z}
+# CHECK-NEXT:  1      1     1.00                        vmovsldup      %zmm16, %zmm19
+# CHECK-NEXT:  2      8     1.00    *                   vmovsldup      (%rax), %zmm19
+# CHECK-NEXT:  1      1     1.00                        vmovsldup      %zmm16, %zmm19 {%k1}
+# CHECK-NEXT:  2      8     1.00    *                   vmovsldup      (%rax), %zmm19 {%k1}
+# CHECK-NEXT:  1      1     1.00                        vmovsldup      %zmm16, %zmm19 {%k1} {z}
+# CHECK-NEXT:  2      8     1.00    *                   vmovsldup      (%rax), %zmm19 {%k1} {z}
 # CHECK-NEXT:  1      5     1.00                        vmulpd %zmm16, %zmm17, %zmm19
 # CHECK-NEXT:  2      12    1.00    *                   vmulpd (%rax), %zmm17, %zmm19
 # CHECK-NEXT:  2      12    1.00    *                   vmulpd (%rax){1to8}, %zmm17, %zmm19
@@ -1464,7 +1503,7 @@ vunpcklps         (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
 
 # CHECK:      Resource pressure per iteration:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6.0]  [6.1]
-# CHECK-NEXT:  -     1506.00 138.67 201.67  -    408.67 215.00 215.00
+# CHECK-NEXT:  -     1506.00 138.67 201.67  -    426.67 219.50 219.50
 
 # CHECK:      Resource pressure by instruction:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6.0]  [6.1]  Instructions:
@@ -1640,6 +1679,24 @@ vunpcklps         (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
 # CHECK-NEXT:  -      -      -     1.00    -      -      -      -     vminps   %zmm16, %zmm17, %zmm19 {%k1} {z}
 # CHECK-NEXT:  -      -      -     1.00    -      -     0.50   0.50   vminps   (%rax), %zmm17, %zmm19 {%k1} {z}
 # CHECK-NEXT:  -      -      -     1.00    -      -     0.50   0.50   vminps   (%rax){1to16}, %zmm17, %zmm19 {%k1} {z}
+# CHECK-NEXT:  -      -      -      -      -     1.00    -      -     vmovddup %zmm16, %zmm19
+# CHECK-NEXT:  -      -      -      -      -     1.00   0.50   0.50   vmovddup (%rax), %zmm19
+# CHECK-NEXT:  -      -      -      -      -     1.00    -      -     vmovddup %zmm16, %zmm19 {%k1}
+# CHECK-NEXT:  -      -      -      -      -     1.00   0.50   0.50   vmovddup (%rax), %zmm19 {%k1}
+# CHECK-NEXT:  -      -      -      -      -     1.00    -      -     vmovddup %zmm16, %zmm19 {%k1} {z}
+# CHECK-NEXT:  -      -      -      -      -     1.00   0.50   0.50   vmovddup (%rax), %zmm19 {%k1} {z}
+# CHECK-NEXT:  -      -      -      -      -     1.00    -      -     vmovshdup        %zmm16, %zmm19
+# CHECK-NEXT:  -      -      -      -      -     1.00   0.50   0.50   vmovshdup        (%rax), %zmm19
+# CHECK-NEXT:  -      -      -      -      -     1.00    -      -     vmovshdup        %zmm16, %zmm19 {%k1}
+# CHECK-NEXT:  -      -      -      -      -     1.00   0.50   0.50   vmovshdup        (%rax), %zmm19 {%k1}
+# CHECK-NEXT:  -      -      -      -      -     1.00    -      -     vmovshdup        %zmm16, %zmm19 {%k1} {z}
+# CHECK-NEXT:  -      -      -      -      -     1.00   0.50   0.50   vmovshdup        (%rax), %zmm19 {%k1} {z}
+# CHECK-NEXT:  -      -      -      -      -     1.00    -      -     vmovsldup        %zmm16, %zmm19
+# CHECK-NEXT:  -      -      -      -      -     1.00   0.50   0.50   vmovsldup        (%rax), %zmm19
+# CHECK-NEXT:  -      -      -      -      -     1.00    -      -     vmovsldup        %zmm16, %zmm19 {%k1}
+# CHECK-NEXT:  -      -      -      -      -     1.00   0.50   0.50   vmovsldup        (%rax), %zmm19 {%k1}
+# CHECK-NEXT:  -      -      -      -      -     1.00    -      -     vmovsldup        %zmm16, %zmm19 {%k1} {z}
+# CHECK-NEXT:  -      -      -      -      -     1.00   0.50   0.50   vmovsldup        (%rax), %zmm19 {%k1} {z}
 # CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vmulpd   %zmm16, %zmm17, %zmm19
 # CHECK-NEXT:  -      -     1.00    -      -      -     0.50   0.50   vmulpd   (%rax), %zmm17, %zmm19
 # CHECK-NEXT:  -      -     1.00    -      -      -     0.50   0.50   vmulpd   (%rax){1to8}, %zmm17, %zmm19
index 65b6d41..f6af2f6 100644 (file)
@@ -300,6 +300,48 @@ vminps            %ymm16, %ymm17, %ymm19 {z}{k1}
 vminps            (%rax), %ymm17, %ymm19 {z}{k1}
 vminps            (%rax){1to8}, %ymm17, %ymm19 {z}{k1}
 
+vmovddup          %xmm16, %xmm19
+vmovddup          (%rax), %xmm19
+vmovddup          %xmm16, %xmm19 {k1}
+vmovddup          (%rax), %xmm19 {k1}
+vmovddup          %xmm16, %xmm19 {z}{k1}
+vmovddup          (%rax), %xmm19 {z}{k1}
+
+vmovddup          %ymm16, %ymm19
+vmovddup          (%rax), %ymm19
+vmovddup          %ymm16, %ymm19 {k1}
+vmovddup          (%rax), %ymm19 {k1}
+vmovddup          %ymm16, %ymm19 {z}{k1}
+vmovddup          (%rax), %ymm19 {z}{k1}
+
+vmovshdup         %xmm16, %xmm19
+vmovshdup         (%rax), %xmm19
+vmovshdup         %xmm16, %xmm19 {k1}
+vmovshdup         (%rax), %xmm19 {k1}
+vmovshdup         %xmm16, %xmm19 {z}{k1}
+vmovshdup         (%rax), %xmm19 {z}{k1}
+
+vmovshdup         %ymm16, %ymm19
+vmovshdup         (%rax), %ymm19
+vmovshdup         %ymm16, %ymm19 {k1}
+vmovshdup         (%rax), %ymm19 {k1}
+vmovshdup         %ymm16, %ymm19 {z}{k1}
+vmovshdup         (%rax), %ymm19 {z}{k1}
+
+vmovsldup         %xmm16, %xmm19
+vmovsldup         (%rax), %xmm19
+vmovsldup         %xmm16, %xmm19 {k1}
+vmovsldup         (%rax), %xmm19 {k1}
+vmovsldup         %xmm16, %xmm19 {z}{k1}
+vmovsldup         (%rax), %xmm19 {z}{k1}
+
+vmovsldup         %ymm16, %ymm19
+vmovsldup         (%rax), %ymm19
+vmovsldup         %ymm16, %ymm19 {k1}
+vmovsldup         (%rax), %ymm19 {k1}
+vmovsldup         %ymm16, %ymm19 {z}{k1}
+vmovsldup         (%rax), %ymm19 {z}{k1}
+
 vmulpd            %xmm16, %xmm17, %xmm19
 vmulpd            (%rax), %xmm17, %xmm19
 vmulpd            (%rax){1to2}, %xmm17, %xmm19
@@ -1461,6 +1503,42 @@ vunpcklps         (%rax){1to8}, %ymm17, %ymm19 {z}{k1}
 # CHECK-NEXT:  1      3     1.00                        vminps %ymm16, %ymm17, %ymm19 {%k1} {z}
 # CHECK-NEXT:  2      10    1.00    *                   vminps (%rax), %ymm17, %ymm19 {%k1} {z}
 # CHECK-NEXT:  2      10    1.00    *                   vminps (%rax){1to8}, %ymm17, %ymm19 {%k1} {z}
+# CHECK-NEXT:  1      1     1.00                        vmovddup       %xmm16, %xmm19
+# CHECK-NEXT:  2      7     1.00    *                   vmovddup       (%rax), %xmm19
+# CHECK-NEXT:  1      1     1.00                        vmovddup       %xmm16, %xmm19 {%k1}
+# CHECK-NEXT:  2      7     1.00    *                   vmovddup       (%rax), %xmm19 {%k1}
+# CHECK-NEXT:  1      1     1.00                        vmovddup       %xmm16, %xmm19 {%k1} {z}
+# CHECK-NEXT:  2      7     1.00    *                   vmovddup       (%rax), %xmm19 {%k1} {z}
+# CHECK-NEXT:  1      1     1.00                        vmovddup       %ymm16, %ymm19
+# CHECK-NEXT:  2      8     1.00    *                   vmovddup       (%rax), %ymm19
+# CHECK-NEXT:  1      1     1.00                        vmovddup       %ymm16, %ymm19 {%k1}
+# CHECK-NEXT:  2      8     1.00    *                   vmovddup       (%rax), %ymm19 {%k1}
+# CHECK-NEXT:  1      1     1.00                        vmovddup       %ymm16, %ymm19 {%k1} {z}
+# CHECK-NEXT:  2      8     1.00    *                   vmovddup       (%rax), %ymm19 {%k1} {z}
+# CHECK-NEXT:  1      1     1.00                        vmovshdup      %xmm16, %xmm19
+# CHECK-NEXT:  2      7     1.00    *                   vmovshdup      (%rax), %xmm19
+# CHECK-NEXT:  1      1     1.00                        vmovshdup      %xmm16, %xmm19 {%k1}
+# CHECK-NEXT:  2      7     1.00    *                   vmovshdup      (%rax), %xmm19 {%k1}
+# CHECK-NEXT:  1      1     1.00                        vmovshdup      %xmm16, %xmm19 {%k1} {z}
+# CHECK-NEXT:  2      7     1.00    *                   vmovshdup      (%rax), %xmm19 {%k1} {z}
+# CHECK-NEXT:  1      1     1.00                        vmovshdup      %ymm16, %ymm19
+# CHECK-NEXT:  2      8     1.00    *                   vmovshdup      (%rax), %ymm19
+# CHECK-NEXT:  1      1     1.00                        vmovshdup      %ymm16, %ymm19 {%k1}
+# CHECK-NEXT:  2      8     1.00    *                   vmovshdup      (%rax), %ymm19 {%k1}
+# CHECK-NEXT:  1      1     1.00                        vmovshdup      %ymm16, %ymm19 {%k1} {z}
+# CHECK-NEXT:  2      8     1.00    *                   vmovshdup      (%rax), %ymm19 {%k1} {z}
+# CHECK-NEXT:  1      1     1.00                        vmovsldup      %xmm16, %xmm19
+# CHECK-NEXT:  2      7     1.00    *                   vmovsldup      (%rax), %xmm19
+# CHECK-NEXT:  1      1     1.00                        vmovsldup      %xmm16, %xmm19 {%k1}
+# CHECK-NEXT:  2      7     1.00    *                   vmovsldup      (%rax), %xmm19 {%k1}
+# CHECK-NEXT:  1      1     1.00                        vmovsldup      %xmm16, %xmm19 {%k1} {z}
+# CHECK-NEXT:  2      7     1.00    *                   vmovsldup      (%rax), %xmm19 {%k1} {z}
+# CHECK-NEXT:  1      1     1.00                        vmovsldup      %ymm16, %ymm19
+# CHECK-NEXT:  2      8     1.00    *                   vmovsldup      (%rax), %ymm19
+# CHECK-NEXT:  1      1     1.00                        vmovsldup      %ymm16, %ymm19 {%k1}
+# CHECK-NEXT:  2      8     1.00    *                   vmovsldup      (%rax), %ymm19 {%k1}
+# CHECK-NEXT:  1      1     1.00                        vmovsldup      %ymm16, %ymm19 {%k1} {z}
+# CHECK-NEXT:  2      8     1.00    *                   vmovsldup      (%rax), %ymm19 {%k1} {z}
 # CHECK-NEXT:  1      5     1.00                        vmulpd %xmm16, %xmm17, %xmm19
 # CHECK-NEXT:  2      11    1.00    *                   vmulpd (%rax), %xmm17, %xmm19
 # CHECK-NEXT:  2      11    1.00    *                   vmulpd (%rax){1to2}, %xmm17, %xmm19
@@ -2256,7 +2334,7 @@ vunpcklps         (%rax){1to8}, %ymm17, %ymm19 {z}{k1}
 
 # CHECK:      Resource pressure per iteration:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6.0]  [6.1]
-# CHECK-NEXT:  -     1935.00 196.00 359.50  -    566.50 338.50 338.50
+# CHECK-NEXT:  -     1935.00 196.00 359.50  -    602.50 347.50 347.50
 
 # CHECK:      Resource pressure by instruction:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6.0]  [6.1]  Instructions:
@@ -2526,6 +2604,42 @@ vunpcklps         (%rax){1to8}, %ymm17, %ymm19 {z}{k1}
 # CHECK-NEXT:  -      -      -     1.00    -      -      -      -     vminps   %ymm16, %ymm17, %ymm19 {%k1} {z}
 # CHECK-NEXT:  -      -      -     1.00    -      -     0.50   0.50   vminps   (%rax), %ymm17, %ymm19 {%k1} {z}
 # CHECK-NEXT:  -      -      -     1.00    -      -     0.50   0.50   vminps   (%rax){1to8}, %ymm17, %ymm19 {%k1} {z}
+# CHECK-NEXT:  -      -      -      -      -     1.00    -      -     vmovddup %xmm16, %xmm19
+# CHECK-NEXT:  -      -      -      -      -     1.00   0.50   0.50   vmovddup (%rax), %xmm19
+# CHECK-NEXT:  -      -      -      -      -     1.00    -      -     vmovddup %xmm16, %xmm19 {%k1}
+# CHECK-NEXT:  -      -      -      -      -     1.00   0.50   0.50   vmovddup (%rax), %xmm19 {%k1}
+# CHECK-NEXT:  -      -      -      -      -     1.00    -      -     vmovddup %xmm16, %xmm19 {%k1} {z}
+# CHECK-NEXT:  -      -      -      -      -     1.00   0.50   0.50   vmovddup (%rax), %xmm19 {%k1} {z}
+# CHECK-NEXT:  -      -      -      -      -     1.00    -      -     vmovddup %ymm16, %ymm19
+# CHECK-NEXT:  -      -      -      -      -     1.00   0.50   0.50   vmovddup (%rax), %ymm19
+# CHECK-NEXT:  -      -      -      -      -     1.00    -      -     vmovddup %ymm16, %ymm19 {%k1}
+# CHECK-NEXT:  -      -      -      -      -     1.00   0.50   0.50   vmovddup (%rax), %ymm19 {%k1}
+# CHECK-NEXT:  -      -      -      -      -     1.00    -      -     vmovddup %ymm16, %ymm19 {%k1} {z}
+# CHECK-NEXT:  -      -      -      -      -     1.00   0.50   0.50   vmovddup (%rax), %ymm19 {%k1} {z}
+# CHECK-NEXT:  -      -      -      -      -     1.00    -      -     vmovshdup        %xmm16, %xmm19
+# CHECK-NEXT:  -      -      -      -      -     1.00   0.50   0.50   vmovshdup        (%rax), %xmm19
+# CHECK-NEXT:  -      -      -      -      -     1.00    -      -     vmovshdup        %xmm16, %xmm19 {%k1}
+# CHECK-NEXT:  -      -      -      -      -     1.00   0.50   0.50   vmovshdup        (%rax), %xmm19 {%k1}
+# CHECK-NEXT:  -      -      -      -      -     1.00    -      -     vmovshdup        %xmm16, %xmm19 {%k1} {z}
+# CHECK-NEXT:  -      -      -      -      -     1.00   0.50   0.50   vmovshdup        (%rax), %xmm19 {%k1} {z}
+# CHECK-NEXT:  -      -      -      -      -     1.00    -      -     vmovshdup        %ymm16, %ymm19
+# CHECK-NEXT:  -      -      -      -      -     1.00   0.50   0.50   vmovshdup        (%rax), %ymm19
+# CHECK-NEXT:  -      -      -      -      -     1.00    -      -     vmovshdup        %ymm16, %ymm19 {%k1}
+# CHECK-NEXT:  -      -      -      -      -     1.00   0.50   0.50   vmovshdup        (%rax), %ymm19 {%k1}
+# CHECK-NEXT:  -      -      -      -      -     1.00    -      -     vmovshdup        %ymm16, %ymm19 {%k1} {z}
+# CHECK-NEXT:  -      -      -      -      -     1.00   0.50   0.50   vmovshdup        (%rax), %ymm19 {%k1} {z}
+# CHECK-NEXT:  -      -      -      -      -     1.00    -      -     vmovsldup        %xmm16, %xmm19
+# CHECK-NEXT:  -      -      -      -      -     1.00   0.50   0.50   vmovsldup        (%rax), %xmm19
+# CHECK-NEXT:  -      -      -      -      -     1.00    -      -     vmovsldup        %xmm16, %xmm19 {%k1}
+# CHECK-NEXT:  -      -      -      -      -     1.00   0.50   0.50   vmovsldup        (%rax), %xmm19 {%k1}
+# CHECK-NEXT:  -      -      -      -      -     1.00    -      -     vmovsldup        %xmm16, %xmm19 {%k1} {z}
+# CHECK-NEXT:  -      -      -      -      -     1.00   0.50   0.50   vmovsldup        (%rax), %xmm19 {%k1} {z}
+# CHECK-NEXT:  -      -      -      -      -     1.00    -      -     vmovsldup        %ymm16, %ymm19
+# CHECK-NEXT:  -      -      -      -      -     1.00   0.50   0.50   vmovsldup        (%rax), %ymm19
+# CHECK-NEXT:  -      -      -      -      -     1.00    -      -     vmovsldup        %ymm16, %ymm19 {%k1}
+# CHECK-NEXT:  -      -      -      -      -     1.00   0.50   0.50   vmovsldup        (%rax), %ymm19 {%k1}
+# CHECK-NEXT:  -      -      -      -      -     1.00    -      -     vmovsldup        %ymm16, %ymm19 {%k1} {z}
+# CHECK-NEXT:  -      -      -      -      -     1.00   0.50   0.50   vmovsldup        (%rax), %ymm19 {%k1} {z}
 # CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vmulpd   %xmm16, %xmm17, %xmm19
 # CHECK-NEXT:  -      -     1.00    -      -      -     0.50   0.50   vmulpd   (%rax), %xmm17, %xmm19
 # CHECK-NEXT:  -      -     1.00    -      -      -     0.50   0.50   vmulpd   (%rax){1to2}, %xmm17, %xmm19
index c9a1bef..51ecdc0 100644 (file)
@@ -196,6 +196,27 @@ vminps            %zmm16, %zmm17, %zmm19 {z}{k1}
 vminps            (%rax), %zmm17, %zmm19 {z}{k1}
 vminps            (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
 
+vmovddup          %zmm16, %zmm19
+vmovddup          (%rax), %zmm19
+vmovddup          %zmm16, %zmm19 {k1}
+vmovddup          (%rax), %zmm19 {k1}
+vmovddup          %zmm16, %zmm19 {z}{k1}
+vmovddup          (%rax), %zmm19 {z}{k1}
+
+vmovshdup         %zmm16, %zmm19
+vmovshdup         (%rax), %zmm19
+vmovshdup         %zmm16, %zmm19 {k1}
+vmovshdup         (%rax), %zmm19 {k1}
+vmovshdup         %zmm16, %zmm19 {z}{k1}
+vmovshdup         (%rax), %zmm19 {z}{k1}
+
+vmovsldup         %zmm16, %zmm19
+vmovsldup         (%rax), %zmm19
+vmovsldup         %zmm16, %zmm19 {k1}
+vmovsldup         (%rax), %zmm19 {k1}
+vmovsldup         %zmm16, %zmm19 {z}{k1}
+vmovsldup         (%rax), %zmm19 {z}{k1}
+
 vmulpd            %zmm16, %zmm17, %zmm19
 vmulpd            (%rax), %zmm17, %zmm19
 vmulpd            (%rax){1to8}, %zmm17, %zmm19
@@ -947,6 +968,24 @@ vunpcklps         (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
 # CHECK-NEXT:  1      4     0.50                        vminps %zmm16, %zmm17, %zmm19 {%k1} {z}
 # CHECK-NEXT:  2      11    0.50    *                   vminps (%rax), %zmm17, %zmm19 {%k1} {z}
 # CHECK-NEXT:  2      11    0.50    *                   vminps (%rax){1to16}, %zmm17, %zmm19 {%k1} {z}
+# CHECK-NEXT:  1      1     1.00                        vmovddup       %zmm16, %zmm19
+# CHECK-NEXT:  2      8     0.50    *                   vmovddup       (%rax), %zmm19
+# CHECK-NEXT:  1      1     1.00                        vmovddup       %zmm16, %zmm19 {%k1}
+# CHECK-NEXT:  2      8     0.50    *                   vmovddup       (%rax), %zmm19 {%k1}
+# CHECK-NEXT:  1      1     1.00                        vmovddup       %zmm16, %zmm19 {%k1} {z}
+# CHECK-NEXT:  2      8     0.50    *                   vmovddup       (%rax), %zmm19 {%k1} {z}
+# CHECK-NEXT:  1      1     1.00                        vmovshdup      %zmm16, %zmm19
+# CHECK-NEXT:  2      8     0.50    *                   vmovshdup      (%rax), %zmm19
+# CHECK-NEXT:  1      1     1.00                        vmovshdup      %zmm16, %zmm19 {%k1}
+# CHECK-NEXT:  2      8     0.50    *                   vmovshdup      (%rax), %zmm19 {%k1}
+# CHECK-NEXT:  1      1     1.00                        vmovshdup      %zmm16, %zmm19 {%k1} {z}
+# CHECK-NEXT:  2      8     0.50    *                   vmovshdup      (%rax), %zmm19 {%k1} {z}
+# CHECK-NEXT:  1      1     1.00                        vmovsldup      %zmm16, %zmm19
+# CHECK-NEXT:  2      8     0.50    *                   vmovsldup      (%rax), %zmm19
+# CHECK-NEXT:  1      1     1.00                        vmovsldup      %zmm16, %zmm19 {%k1}
+# CHECK-NEXT:  2      8     0.50    *                   vmovsldup      (%rax), %zmm19 {%k1}
+# CHECK-NEXT:  1      1     1.00                        vmovsldup      %zmm16, %zmm19 {%k1} {z}
+# CHECK-NEXT:  2      8     0.50    *                   vmovsldup      (%rax), %zmm19 {%k1} {z}
 # CHECK-NEXT:  1      4     0.50                        vmulpd %zmm16, %zmm17, %zmm19
 # CHECK-NEXT:  2      11    0.50    *                   vmulpd (%rax), %zmm17, %zmm19
 # CHECK-NEXT:  2      11    0.50    *                   vmulpd (%rax){1to8}, %zmm17, %zmm19
@@ -1468,7 +1507,7 @@ vunpcklps         (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
 
 # CHECK:      Resource pressure per iteration:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    [10]   [11]
-# CHECK-NEXT:  -     612.00 213.67 34.67  251.00 251.00  -     523.67 2.00    -      -      -
+# CHECK-NEXT:  -     612.00 216.67 37.67  255.50 255.50  -     535.67 2.00    -      -      -
 
 # CHECK:      Resource pressure by instruction:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    [10]   [11]   Instructions:
@@ -1644,6 +1683,24 @@ vunpcklps         (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
 # CHECK-NEXT:  -      -     0.50    -      -      -      -     0.50    -      -      -      -     vminps       %zmm16, %zmm17, %zmm19 {%k1} {z}
 # CHECK-NEXT:  -      -     0.50    -     0.50   0.50    -     0.50    -      -      -      -     vminps       (%rax), %zmm17, %zmm19 {%k1} {z}
 # CHECK-NEXT:  -      -     0.50    -     0.50   0.50    -     0.50    -      -      -      -     vminps       (%rax){1to16}, %zmm17, %zmm19 {%k1} {z}
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -      -      -     vmovddup     %zmm16, %zmm19
+# CHECK-NEXT:  -      -     0.33   0.33   0.50   0.50    -     0.33    -      -      -      -     vmovddup     (%rax), %zmm19
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -      -      -     vmovddup     %zmm16, %zmm19 {%k1}
+# CHECK-NEXT:  -      -     0.33   0.33   0.50   0.50    -     0.33    -      -      -      -     vmovddup     (%rax), %zmm19 {%k1}
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -      -      -     vmovddup     %zmm16, %zmm19 {%k1} {z}
+# CHECK-NEXT:  -      -     0.33   0.33   0.50   0.50    -     0.33    -      -      -      -     vmovddup     (%rax), %zmm19 {%k1} {z}
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -      -      -     vmovshdup    %zmm16, %zmm19
+# CHECK-NEXT:  -      -     0.33   0.33   0.50   0.50    -     0.33    -      -      -      -     vmovshdup    (%rax), %zmm19
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -      -      -     vmovshdup    %zmm16, %zmm19 {%k1}
+# CHECK-NEXT:  -      -     0.33   0.33   0.50   0.50    -     0.33    -      -      -      -     vmovshdup    (%rax), %zmm19 {%k1}
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -      -      -     vmovshdup    %zmm16, %zmm19 {%k1} {z}
+# CHECK-NEXT:  -      -     0.33   0.33   0.50   0.50    -     0.33    -      -      -      -     vmovshdup    (%rax), %zmm19 {%k1} {z}
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -      -      -     vmovsldup    %zmm16, %zmm19
+# CHECK-NEXT:  -      -     0.33   0.33   0.50   0.50    -     0.33    -      -      -      -     vmovsldup    (%rax), %zmm19
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -      -      -     vmovsldup    %zmm16, %zmm19 {%k1}
+# CHECK-NEXT:  -      -     0.33   0.33   0.50   0.50    -     0.33    -      -      -      -     vmovsldup    (%rax), %zmm19 {%k1}
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -      -      -     vmovsldup    %zmm16, %zmm19 {%k1} {z}
+# CHECK-NEXT:  -      -     0.33   0.33   0.50   0.50    -     0.33    -      -      -      -     vmovsldup    (%rax), %zmm19 {%k1} {z}
 # CHECK-NEXT:  -      -     0.50    -      -      -      -     0.50    -      -      -      -     vmulpd       %zmm16, %zmm17, %zmm19
 # CHECK-NEXT:  -      -     0.50    -     0.50   0.50    -     0.50    -      -      -      -     vmulpd       (%rax), %zmm17, %zmm19
 # CHECK-NEXT:  -      -     0.50    -     0.50   0.50    -     0.50    -      -      -      -     vmulpd       (%rax){1to8}, %zmm17, %zmm19
index e00b1d9..eb35bb3 100644 (file)
@@ -300,6 +300,48 @@ vminps            %ymm16, %ymm17, %ymm19 {z}{k1}
 vminps            (%rax), %ymm17, %ymm19 {z}{k1}
 vminps            (%rax){1to8}, %ymm17, %ymm19 {z}{k1}
 
+vmovddup          %xmm16, %xmm19
+vmovddup          (%rax), %xmm19
+vmovddup          %xmm16, %xmm19 {k1}
+vmovddup          (%rax), %xmm19 {k1}
+vmovddup          %xmm16, %xmm19 {z}{k1}
+vmovddup          (%rax), %xmm19 {z}{k1}
+
+vmovddup          %ymm16, %ymm19
+vmovddup          (%rax), %ymm19
+vmovddup          %ymm16, %ymm19 {k1}
+vmovddup          (%rax), %ymm19 {k1}
+vmovddup          %ymm16, %ymm19 {z}{k1}
+vmovddup          (%rax), %ymm19 {z}{k1}
+
+vmovshdup         %xmm16, %xmm19
+vmovshdup         (%rax), %xmm19
+vmovshdup         %xmm16, %xmm19 {k1}
+vmovshdup         (%rax), %xmm19 {k1}
+vmovshdup         %xmm16, %xmm19 {z}{k1}
+vmovshdup         (%rax), %xmm19 {z}{k1}
+
+vmovshdup         %ymm16, %ymm19
+vmovshdup         (%rax), %ymm19
+vmovshdup         %ymm16, %ymm19 {k1}
+vmovshdup         (%rax), %ymm19 {k1}
+vmovshdup         %ymm16, %ymm19 {z}{k1}
+vmovshdup         (%rax), %ymm19 {z}{k1}
+
+vmovsldup         %xmm16, %xmm19
+vmovsldup         (%rax), %xmm19
+vmovsldup         %xmm16, %xmm19 {k1}
+vmovsldup         (%rax), %xmm19 {k1}
+vmovsldup         %xmm16, %xmm19 {z}{k1}
+vmovsldup         (%rax), %xmm19 {z}{k1}
+
+vmovsldup         %ymm16, %ymm19
+vmovsldup         (%rax), %ymm19
+vmovsldup         %ymm16, %ymm19 {k1}
+vmovsldup         (%rax), %ymm19 {k1}
+vmovsldup         %ymm16, %ymm19 {z}{k1}
+vmovsldup         (%rax), %ymm19 {z}{k1}
+
 vmulpd            %xmm16, %xmm17, %xmm19
 vmulpd            (%rax), %xmm17, %xmm19
 vmulpd            (%rax){1to2}, %xmm17, %xmm19
@@ -1461,6 +1503,42 @@ vunpcklps         (%rax){1to8}, %ymm17, %ymm19 {z}{k1}
 # CHECK-NEXT:  1      4     0.50                        vminps %ymm16, %ymm17, %ymm19 {%k1} {z}
 # CHECK-NEXT:  2      11    0.50    *                   vminps (%rax), %ymm17, %ymm19 {%k1} {z}
 # CHECK-NEXT:  2      11    0.50    *                   vminps (%rax){1to8}, %ymm17, %ymm19 {%k1} {z}
+# CHECK-NEXT:  1      1     1.00                        vmovddup       %xmm16, %xmm19
+# CHECK-NEXT:  2      7     0.50    *                   vmovddup       (%rax), %xmm19
+# CHECK-NEXT:  1      1     1.00                        vmovddup       %xmm16, %xmm19 {%k1}
+# CHECK-NEXT:  2      7     0.50    *                   vmovddup       (%rax), %xmm19 {%k1}
+# CHECK-NEXT:  1      1     1.00                        vmovddup       %xmm16, %xmm19 {%k1} {z}
+# CHECK-NEXT:  2      7     0.50    *                   vmovddup       (%rax), %xmm19 {%k1} {z}
+# CHECK-NEXT:  1      1     1.00                        vmovddup       %ymm16, %ymm19
+# CHECK-NEXT:  2      8     0.50    *                   vmovddup       (%rax), %ymm19
+# CHECK-NEXT:  1      1     1.00                        vmovddup       %ymm16, %ymm19 {%k1}
+# CHECK-NEXT:  2      8     0.50    *                   vmovddup       (%rax), %ymm19 {%k1}
+# CHECK-NEXT:  1      1     1.00                        vmovddup       %ymm16, %ymm19 {%k1} {z}
+# CHECK-NEXT:  2      8     0.50    *                   vmovddup       (%rax), %ymm19 {%k1} {z}
+# CHECK-NEXT:  1      1     1.00                        vmovshdup      %xmm16, %xmm19
+# CHECK-NEXT:  2      7     0.50    *                   vmovshdup      (%rax), %xmm19
+# CHECK-NEXT:  1      1     1.00                        vmovshdup      %xmm16, %xmm19 {%k1}
+# CHECK-NEXT:  2      7     0.50    *                   vmovshdup      (%rax), %xmm19 {%k1}
+# CHECK-NEXT:  1      1     1.00                        vmovshdup      %xmm16, %xmm19 {%k1} {z}
+# CHECK-NEXT:  2      7     0.50    *                   vmovshdup      (%rax), %xmm19 {%k1} {z}
+# CHECK-NEXT:  1      1     1.00                        vmovshdup      %ymm16, %ymm19
+# CHECK-NEXT:  2      8     0.50    *                   vmovshdup      (%rax), %ymm19
+# CHECK-NEXT:  1      1     1.00                        vmovshdup      %ymm16, %ymm19 {%k1}
+# CHECK-NEXT:  2      8     0.50    *                   vmovshdup      (%rax), %ymm19 {%k1}
+# CHECK-NEXT:  1      1     1.00                        vmovshdup      %ymm16, %ymm19 {%k1} {z}
+# CHECK-NEXT:  2      8     0.50    *                   vmovshdup      (%rax), %ymm19 {%k1} {z}
+# CHECK-NEXT:  1      1     1.00                        vmovsldup      %xmm16, %xmm19
+# CHECK-NEXT:  2      7     0.50    *                   vmovsldup      (%rax), %xmm19
+# CHECK-NEXT:  1      1     1.00                        vmovsldup      %xmm16, %xmm19 {%k1}
+# CHECK-NEXT:  2      7     0.50    *                   vmovsldup      (%rax), %xmm19 {%k1}
+# CHECK-NEXT:  1      1     1.00                        vmovsldup      %xmm16, %xmm19 {%k1} {z}
+# CHECK-NEXT:  2      7     0.50    *                   vmovsldup      (%rax), %xmm19 {%k1} {z}
+# CHECK-NEXT:  1      1     1.00                        vmovsldup      %ymm16, %ymm19
+# CHECK-NEXT:  2      8     0.50    *                   vmovsldup      (%rax), %ymm19
+# CHECK-NEXT:  1      1     1.00                        vmovsldup      %ymm16, %ymm19 {%k1}
+# CHECK-NEXT:  2      8     0.50    *                   vmovsldup      (%rax), %ymm19 {%k1}
+# CHECK-NEXT:  1      1     1.00                        vmovsldup      %ymm16, %ymm19 {%k1} {z}
+# CHECK-NEXT:  2      8     0.50    *                   vmovsldup      (%rax), %ymm19 {%k1} {z}
 # CHECK-NEXT:  1      4     0.50                        vmulpd %xmm16, %xmm17, %xmm19
 # CHECK-NEXT:  2      10    0.50    *                   vmulpd (%rax), %xmm17, %xmm19
 # CHECK-NEXT:  2      10    0.50    *                   vmulpd (%rax){1to2}, %xmm17, %xmm19
@@ -2260,7 +2338,7 @@ vunpcklps         (%rax){1to8}, %ymm17, %ymm19 {z}{k1}
 
 # CHECK:      Resource pressure per iteration:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    [10]   [11]
-# CHECK-NEXT:  -     423.00 249.33 161.33 360.50 360.50  -     689.33 4.00    -      -      -
+# CHECK-NEXT:  -     423.00 255.33 167.33 369.50 369.50  -     713.33 4.00    -      -      -
 
 # CHECK:      Resource pressure by instruction:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    [10]   [11]   Instructions:
@@ -2530,6 +2608,42 @@ vunpcklps         (%rax){1to8}, %ymm17, %ymm19 {z}{k1}
 # CHECK-NEXT:  -      -     0.50   0.50    -      -      -      -      -      -      -      -     vminps       %ymm16, %ymm17, %ymm19 {%k1} {z}
 # CHECK-NEXT:  -      -     0.50   0.50   0.50   0.50    -      -      -      -      -      -     vminps       (%rax), %ymm17, %ymm19 {%k1} {z}
 # CHECK-NEXT:  -      -     0.50   0.50   0.50   0.50    -      -      -      -      -      -     vminps       (%rax){1to8}, %ymm17, %ymm19 {%k1} {z}
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -      -      -     vmovddup     %xmm16, %xmm19
+# CHECK-NEXT:  -      -     0.33   0.33   0.50   0.50    -     0.33    -      -      -      -     vmovddup     (%rax), %xmm19
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -      -      -     vmovddup     %xmm16, %xmm19 {%k1}
+# CHECK-NEXT:  -      -     0.33   0.33   0.50   0.50    -     0.33    -      -      -      -     vmovddup     (%rax), %xmm19 {%k1}
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -      -      -     vmovddup     %xmm16, %xmm19 {%k1} {z}
+# CHECK-NEXT:  -      -     0.33   0.33   0.50   0.50    -     0.33    -      -      -      -     vmovddup     (%rax), %xmm19 {%k1} {z}
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -      -      -     vmovddup     %ymm16, %ymm19
+# CHECK-NEXT:  -      -     0.33   0.33   0.50   0.50    -     0.33    -      -      -      -     vmovddup     (%rax), %ymm19
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -      -      -     vmovddup     %ymm16, %ymm19 {%k1}
+# CHECK-NEXT:  -      -     0.33   0.33   0.50   0.50    -     0.33    -      -      -      -     vmovddup     (%rax), %ymm19 {%k1}
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -      -      -     vmovddup     %ymm16, %ymm19 {%k1} {z}
+# CHECK-NEXT:  -      -     0.33   0.33   0.50   0.50    -     0.33    -      -      -      -     vmovddup     (%rax), %ymm19 {%k1} {z}
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -      -      -     vmovshdup    %xmm16, %xmm19
+# CHECK-NEXT:  -      -     0.33   0.33   0.50   0.50    -     0.33    -      -      -      -     vmovshdup    (%rax), %xmm19
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -      -      -     vmovshdup    %xmm16, %xmm19 {%k1}
+# CHECK-NEXT:  -      -     0.33   0.33   0.50   0.50    -     0.33    -      -      -      -     vmovshdup    (%rax), %xmm19 {%k1}
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -      -      -     vmovshdup    %xmm16, %xmm19 {%k1} {z}
+# CHECK-NEXT:  -      -     0.33   0.33   0.50   0.50    -     0.33    -      -      -      -     vmovshdup    (%rax), %xmm19 {%k1} {z}
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -      -      -     vmovshdup    %ymm16, %ymm19
+# CHECK-NEXT:  -      -     0.33   0.33   0.50   0.50    -     0.33    -      -      -      -     vmovshdup    (%rax), %ymm19
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -      -      -     vmovshdup    %ymm16, %ymm19 {%k1}
+# CHECK-NEXT:  -      -     0.33   0.33   0.50   0.50    -     0.33    -      -      -      -     vmovshdup    (%rax), %ymm19 {%k1}
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -      -      -     vmovshdup    %ymm16, %ymm19 {%k1} {z}
+# CHECK-NEXT:  -      -     0.33   0.33   0.50   0.50    -     0.33    -      -      -      -     vmovshdup    (%rax), %ymm19 {%k1} {z}
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -      -      -     vmovsldup    %xmm16, %xmm19
+# CHECK-NEXT:  -      -     0.33   0.33   0.50   0.50    -     0.33    -      -      -      -     vmovsldup    (%rax), %xmm19
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -      -      -     vmovsldup    %xmm16, %xmm19 {%k1}
+# CHECK-NEXT:  -      -     0.33   0.33   0.50   0.50    -     0.33    -      -      -      -     vmovsldup    (%rax), %xmm19 {%k1}
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -      -      -     vmovsldup    %xmm16, %xmm19 {%k1} {z}
+# CHECK-NEXT:  -      -     0.33   0.33   0.50   0.50    -     0.33    -      -      -      -     vmovsldup    (%rax), %xmm19 {%k1} {z}
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -      -      -     vmovsldup    %ymm16, %ymm19
+# CHECK-NEXT:  -      -     0.33   0.33   0.50   0.50    -     0.33    -      -      -      -     vmovsldup    (%rax), %ymm19
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -      -      -     vmovsldup    %ymm16, %ymm19 {%k1}
+# CHECK-NEXT:  -      -     0.33   0.33   0.50   0.50    -     0.33    -      -      -      -     vmovsldup    (%rax), %ymm19 {%k1}
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -      -      -     vmovsldup    %ymm16, %ymm19 {%k1} {z}
+# CHECK-NEXT:  -      -     0.33   0.33   0.50   0.50    -     0.33    -      -      -      -     vmovsldup    (%rax), %ymm19 {%k1} {z}
 # CHECK-NEXT:  -      -     0.50   0.50    -      -      -      -      -      -      -      -     vmulpd       %xmm16, %xmm17, %xmm19
 # CHECK-NEXT:  -      -     0.50   0.50   0.50   0.50    -      -      -      -      -      -     vmulpd       (%rax), %xmm17, %xmm19
 # CHECK-NEXT:  -      -     0.50   0.50   0.50   0.50    -      -      -      -      -      -     vmulpd       (%rax){1to2}, %xmm17, %xmm19
index bf2b6a3..4db1d95 100644 (file)
@@ -196,6 +196,27 @@ vminps            %zmm16, %zmm17, %zmm19 {z}{k1}
 vminps            (%rax), %zmm17, %zmm19 {z}{k1}
 vminps            (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
 
+vmovddup          %zmm16, %zmm19
+vmovddup          (%rax), %zmm19
+vmovddup          %zmm16, %zmm19 {k1}
+vmovddup          (%rax), %zmm19 {k1}
+vmovddup          %zmm16, %zmm19 {z}{k1}
+vmovddup          (%rax), %zmm19 {z}{k1}
+
+vmovshdup         %zmm16, %zmm19
+vmovshdup         (%rax), %zmm19
+vmovshdup         %zmm16, %zmm19 {k1}
+vmovshdup         (%rax), %zmm19 {k1}
+vmovshdup         %zmm16, %zmm19 {z}{k1}
+vmovshdup         (%rax), %zmm19 {z}{k1}
+
+vmovsldup         %zmm16, %zmm19
+vmovsldup         (%rax), %zmm19
+vmovsldup         %zmm16, %zmm19 {k1}
+vmovsldup         (%rax), %zmm19 {k1}
+vmovsldup         %zmm16, %zmm19 {z}{k1}
+vmovsldup         (%rax), %zmm19 {z}{k1}
+
 vmulpd            %zmm16, %zmm17, %zmm19
 vmulpd            (%rax), %zmm17, %zmm19
 vmulpd            (%rax){1to8}, %zmm17, %zmm19
@@ -947,6 +968,24 @@ vunpcklps         (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
 # CHECK-NEXT:  1      4     0.50                        vminps %zmm16, %zmm17, %zmm19 {%k1} {z}
 # CHECK-NEXT:  2      11    0.50    *                   vminps (%rax), %zmm17, %zmm19 {%k1} {z}
 # CHECK-NEXT:  2      11    0.50    *                   vminps (%rax){1to16}, %zmm17, %zmm19 {%k1} {z}
+# CHECK-NEXT:  1      1     1.00                        vmovddup       %zmm16, %zmm19
+# CHECK-NEXT:  2      8     0.50    *                   vmovddup       (%rax), %zmm19
+# CHECK-NEXT:  1      1     1.00                        vmovddup       %zmm16, %zmm19 {%k1}
+# CHECK-NEXT:  2      8     0.50    *                   vmovddup       (%rax), %zmm19 {%k1}
+# CHECK-NEXT:  1      1     1.00                        vmovddup       %zmm16, %zmm19 {%k1} {z}
+# CHECK-NEXT:  2      8     0.50    *                   vmovddup       (%rax), %zmm19 {%k1} {z}
+# CHECK-NEXT:  1      1     1.00                        vmovshdup      %zmm16, %zmm19
+# CHECK-NEXT:  2      8     0.50    *                   vmovshdup      (%rax), %zmm19
+# CHECK-NEXT:  1      1     1.00                        vmovshdup      %zmm16, %zmm19 {%k1}
+# CHECK-NEXT:  2      8     0.50    *                   vmovshdup      (%rax), %zmm19 {%k1}
+# CHECK-NEXT:  1      1     1.00                        vmovshdup      %zmm16, %zmm19 {%k1} {z}
+# CHECK-NEXT:  2      8     0.50    *                   vmovshdup      (%rax), %zmm19 {%k1} {z}
+# CHECK-NEXT:  1      1     1.00                        vmovsldup      %zmm16, %zmm19
+# CHECK-NEXT:  2      8     0.50    *                   vmovsldup      (%rax), %zmm19
+# CHECK-NEXT:  1      1     1.00                        vmovsldup      %zmm16, %zmm19 {%k1}
+# CHECK-NEXT:  2      8     0.50    *                   vmovsldup      (%rax), %zmm19 {%k1}
+# CHECK-NEXT:  1      1     1.00                        vmovsldup      %zmm16, %zmm19 {%k1} {z}
+# CHECK-NEXT:  2      8     0.50    *                   vmovsldup      (%rax), %zmm19 {%k1} {z}
 # CHECK-NEXT:  1      4     0.50                        vmulpd %zmm16, %zmm17, %zmm19
 # CHECK-NEXT:  2      11    0.50    *                   vmulpd (%rax), %zmm17, %zmm19
 # CHECK-NEXT:  2      11    0.50    *                   vmulpd (%rax){1to8}, %zmm17, %zmm19
@@ -1466,7 +1505,7 @@ vunpcklps         (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
 
 # CHECK:      Resource pressure per iteration:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]
-# CHECK-NEXT:  -     612.00 213.67 34.67  251.00 251.00  -     523.67 2.00    -
+# CHECK-NEXT:  -     612.00 216.67 37.67  255.50 255.50  -     535.67 2.00    -
 
 # CHECK:      Resource pressure by instruction:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    Instructions:
@@ -1642,6 +1681,24 @@ vunpcklps         (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
 # CHECK-NEXT:  -      -     0.50    -      -      -      -     0.50    -      -     vminps     %zmm16, %zmm17, %zmm19 {%k1} {z}
 # CHECK-NEXT:  -      -     0.50    -     0.50   0.50    -     0.50    -      -     vminps     (%rax), %zmm17, %zmm19 {%k1} {z}
 # CHECK-NEXT:  -      -     0.50    -     0.50   0.50    -     0.50    -      -     vminps     (%rax){1to16}, %zmm17, %zmm19 {%k1} {z}
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vmovddup   %zmm16, %zmm19
+# CHECK-NEXT:  -      -     0.33   0.33   0.50   0.50    -     0.33    -      -     vmovddup   (%rax), %zmm19
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vmovddup   %zmm16, %zmm19 {%k1}
+# CHECK-NEXT:  -      -     0.33   0.33   0.50   0.50    -     0.33    -      -     vmovddup   (%rax), %zmm19 {%k1}
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vmovddup   %zmm16, %zmm19 {%k1} {z}
+# CHECK-NEXT:  -      -     0.33   0.33   0.50   0.50    -     0.33    -      -     vmovddup   (%rax), %zmm19 {%k1} {z}
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vmovshdup  %zmm16, %zmm19
+# CHECK-NEXT:  -      -     0.33   0.33   0.50   0.50    -     0.33    -      -     vmovshdup  (%rax), %zmm19
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vmovshdup  %zmm16, %zmm19 {%k1}
+# CHECK-NEXT:  -      -     0.33   0.33   0.50   0.50    -     0.33    -      -     vmovshdup  (%rax), %zmm19 {%k1}
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vmovshdup  %zmm16, %zmm19 {%k1} {z}
+# CHECK-NEXT:  -      -     0.33   0.33   0.50   0.50    -     0.33    -      -     vmovshdup  (%rax), %zmm19 {%k1} {z}
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vmovsldup  %zmm16, %zmm19
+# CHECK-NEXT:  -      -     0.33   0.33   0.50   0.50    -     0.33    -      -     vmovsldup  (%rax), %zmm19
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vmovsldup  %zmm16, %zmm19 {%k1}
+# CHECK-NEXT:  -      -     0.33   0.33   0.50   0.50    -     0.33    -      -     vmovsldup  (%rax), %zmm19 {%k1}
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vmovsldup  %zmm16, %zmm19 {%k1} {z}
+# CHECK-NEXT:  -      -     0.33   0.33   0.50   0.50    -     0.33    -      -     vmovsldup  (%rax), %zmm19 {%k1} {z}
 # CHECK-NEXT:  -      -     0.50    -      -      -      -     0.50    -      -     vmulpd     %zmm16, %zmm17, %zmm19
 # CHECK-NEXT:  -      -     0.50    -     0.50   0.50    -     0.50    -      -     vmulpd     (%rax), %zmm17, %zmm19
 # CHECK-NEXT:  -      -     0.50    -     0.50   0.50    -     0.50    -      -     vmulpd     (%rax){1to8}, %zmm17, %zmm19
index 9fdf265..b9d621e 100644 (file)
@@ -300,6 +300,48 @@ vminps            %ymm16, %ymm17, %ymm19 {z}{k1}
 vminps            (%rax), %ymm17, %ymm19 {z}{k1}
 vminps            (%rax){1to8}, %ymm17, %ymm19 {z}{k1}
 
+vmovddup          %xmm16, %xmm19
+vmovddup          (%rax), %xmm19
+vmovddup          %xmm16, %xmm19 {k1}
+vmovddup          (%rax), %xmm19 {k1}
+vmovddup          %xmm16, %xmm19 {z}{k1}
+vmovddup          (%rax), %xmm19 {z}{k1}
+
+vmovddup          %ymm16, %ymm19
+vmovddup          (%rax), %ymm19
+vmovddup          %ymm16, %ymm19 {k1}
+vmovddup          (%rax), %ymm19 {k1}
+vmovddup          %ymm16, %ymm19 {z}{k1}
+vmovddup          (%rax), %ymm19 {z}{k1}
+
+vmovshdup         %xmm16, %xmm19
+vmovshdup         (%rax), %xmm19
+vmovshdup         %xmm16, %xmm19 {k1}
+vmovshdup         (%rax), %xmm19 {k1}
+vmovshdup         %xmm16, %xmm19 {z}{k1}
+vmovshdup         (%rax), %xmm19 {z}{k1}
+
+vmovshdup         %ymm16, %ymm19
+vmovshdup         (%rax), %ymm19
+vmovshdup         %ymm16, %ymm19 {k1}
+vmovshdup         (%rax), %ymm19 {k1}
+vmovshdup         %ymm16, %ymm19 {z}{k1}
+vmovshdup         (%rax), %ymm19 {z}{k1}
+
+vmovsldup         %xmm16, %xmm19
+vmovsldup         (%rax), %xmm19
+vmovsldup         %xmm16, %xmm19 {k1}
+vmovsldup         (%rax), %xmm19 {k1}
+vmovsldup         %xmm16, %xmm19 {z}{k1}
+vmovsldup         (%rax), %xmm19 {z}{k1}
+
+vmovsldup         %ymm16, %ymm19
+vmovsldup         (%rax), %ymm19
+vmovsldup         %ymm16, %ymm19 {k1}
+vmovsldup         (%rax), %ymm19 {k1}
+vmovsldup         %ymm16, %ymm19 {z}{k1}
+vmovsldup         (%rax), %ymm19 {z}{k1}
+
 vmulpd            %xmm16, %xmm17, %xmm19
 vmulpd            (%rax), %xmm17, %xmm19
 vmulpd            (%rax){1to2}, %xmm17, %xmm19
@@ -1461,6 +1503,42 @@ vunpcklps         (%rax){1to8}, %ymm17, %ymm19 {z}{k1}
 # CHECK-NEXT:  1      4     0.50                        vminps %ymm16, %ymm17, %ymm19 {%k1} {z}
 # CHECK-NEXT:  2      11    0.50    *                   vminps (%rax), %ymm17, %ymm19 {%k1} {z}
 # CHECK-NEXT:  2      11    0.50    *                   vminps (%rax){1to8}, %ymm17, %ymm19 {%k1} {z}
+# CHECK-NEXT:  1      1     1.00                        vmovddup       %xmm16, %xmm19
+# CHECK-NEXT:  2      7     0.50    *                   vmovddup       (%rax), %xmm19
+# CHECK-NEXT:  1      1     1.00                        vmovddup       %xmm16, %xmm19 {%k1}
+# CHECK-NEXT:  2      7     0.50    *                   vmovddup       (%rax), %xmm19 {%k1}
+# CHECK-NEXT:  1      1     1.00                        vmovddup       %xmm16, %xmm19 {%k1} {z}
+# CHECK-NEXT:  2      7     0.50    *                   vmovddup       (%rax), %xmm19 {%k1} {z}
+# CHECK-NEXT:  1      1     1.00                        vmovddup       %ymm16, %ymm19
+# CHECK-NEXT:  2      8     0.50    *                   vmovddup       (%rax), %ymm19
+# CHECK-NEXT:  1      1     1.00                        vmovddup       %ymm16, %ymm19 {%k1}
+# CHECK-NEXT:  2      8     0.50    *                   vmovddup       (%rax), %ymm19 {%k1}
+# CHECK-NEXT:  1      1     1.00                        vmovddup       %ymm16, %ymm19 {%k1} {z}
+# CHECK-NEXT:  2      8     0.50    *                   vmovddup       (%rax), %ymm19 {%k1} {z}
+# CHECK-NEXT:  1      1     1.00                        vmovshdup      %xmm16, %xmm19
+# CHECK-NEXT:  2      7     0.50    *                   vmovshdup      (%rax), %xmm19
+# CHECK-NEXT:  1      1     1.00                        vmovshdup      %xmm16, %xmm19 {%k1}
+# CHECK-NEXT:  2      7     0.50    *                   vmovshdup      (%rax), %xmm19 {%k1}
+# CHECK-NEXT:  1      1     1.00                        vmovshdup      %xmm16, %xmm19 {%k1} {z}
+# CHECK-NEXT:  2      7     0.50    *                   vmovshdup      (%rax), %xmm19 {%k1} {z}
+# CHECK-NEXT:  1      1     1.00                        vmovshdup      %ymm16, %ymm19
+# CHECK-NEXT:  2      8     0.50    *                   vmovshdup      (%rax), %ymm19
+# CHECK-NEXT:  1      1     1.00                        vmovshdup      %ymm16, %ymm19 {%k1}
+# CHECK-NEXT:  2      8     0.50    *                   vmovshdup      (%rax), %ymm19 {%k1}
+# CHECK-NEXT:  1      1     1.00                        vmovshdup      %ymm16, %ymm19 {%k1} {z}
+# CHECK-NEXT:  2      8     0.50    *                   vmovshdup      (%rax), %ymm19 {%k1} {z}
+# CHECK-NEXT:  1      1     1.00                        vmovsldup      %xmm16, %xmm19
+# CHECK-NEXT:  2      7     0.50    *                   vmovsldup      (%rax), %xmm19
+# CHECK-NEXT:  1      1     1.00                        vmovsldup      %xmm16, %xmm19 {%k1}
+# CHECK-NEXT:  2      7     0.50    *                   vmovsldup      (%rax), %xmm19 {%k1}
+# CHECK-NEXT:  1      1     1.00                        vmovsldup      %xmm16, %xmm19 {%k1} {z}
+# CHECK-NEXT:  2      7     0.50    *                   vmovsldup      (%rax), %xmm19 {%k1} {z}
+# CHECK-NEXT:  1      1     1.00                        vmovsldup      %ymm16, %ymm19
+# CHECK-NEXT:  2      8     0.50    *                   vmovsldup      (%rax), %ymm19
+# CHECK-NEXT:  1      1     1.00                        vmovsldup      %ymm16, %ymm19 {%k1}
+# CHECK-NEXT:  2      8     0.50    *                   vmovsldup      (%rax), %ymm19 {%k1}
+# CHECK-NEXT:  1      1     1.00                        vmovsldup      %ymm16, %ymm19 {%k1} {z}
+# CHECK-NEXT:  2      8     0.50    *                   vmovsldup      (%rax), %ymm19 {%k1} {z}
 # CHECK-NEXT:  1      4     0.50                        vmulpd %xmm16, %xmm17, %xmm19
 # CHECK-NEXT:  2      10    0.50    *                   vmulpd (%rax), %xmm17, %xmm19
 # CHECK-NEXT:  2      10    0.50    *                   vmulpd (%rax){1to2}, %xmm17, %xmm19
@@ -2258,7 +2336,7 @@ vunpcklps         (%rax){1to8}, %ymm17, %ymm19 {z}{k1}
 
 # CHECK:      Resource pressure per iteration:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]
-# CHECK-NEXT:  -     423.00 249.33 161.33 360.50 360.50  -     689.33 4.00    -
+# CHECK-NEXT:  -     423.00 255.33 167.33 369.50 369.50  -     713.33 4.00    -
 
 # CHECK:      Resource pressure by instruction:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    Instructions:
@@ -2528,6 +2606,42 @@ vunpcklps         (%rax){1to8}, %ymm17, %ymm19 {z}{k1}
 # CHECK-NEXT:  -      -     0.50   0.50    -      -      -      -      -      -     vminps     %ymm16, %ymm17, %ymm19 {%k1} {z}
 # CHECK-NEXT:  -      -     0.50   0.50   0.50   0.50    -      -      -      -     vminps     (%rax), %ymm17, %ymm19 {%k1} {z}
 # CHECK-NEXT:  -      -     0.50   0.50   0.50   0.50    -      -      -      -     vminps     (%rax){1to8}, %ymm17, %ymm19 {%k1} {z}
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vmovddup   %xmm16, %xmm19
+# CHECK-NEXT:  -      -     0.33   0.33   0.50   0.50    -     0.33    -      -     vmovddup   (%rax), %xmm19
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vmovddup   %xmm16, %xmm19 {%k1}
+# CHECK-NEXT:  -      -     0.33   0.33   0.50   0.50    -     0.33    -      -     vmovddup   (%rax), %xmm19 {%k1}
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vmovddup   %xmm16, %xmm19 {%k1} {z}
+# CHECK-NEXT:  -      -     0.33   0.33   0.50   0.50    -     0.33    -      -     vmovddup   (%rax), %xmm19 {%k1} {z}
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vmovddup   %ymm16, %ymm19
+# CHECK-NEXT:  -      -     0.33   0.33   0.50   0.50    -     0.33    -      -     vmovddup   (%rax), %ymm19
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vmovddup   %ymm16, %ymm19 {%k1}
+# CHECK-NEXT:  -      -     0.33   0.33   0.50   0.50    -     0.33    -      -     vmovddup   (%rax), %ymm19 {%k1}
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vmovddup   %ymm16, %ymm19 {%k1} {z}
+# CHECK-NEXT:  -      -     0.33   0.33   0.50   0.50    -     0.33    -      -     vmovddup   (%rax), %ymm19 {%k1} {z}
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vmovshdup  %xmm16, %xmm19
+# CHECK-NEXT:  -      -     0.33   0.33   0.50   0.50    -     0.33    -      -     vmovshdup  (%rax), %xmm19
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vmovshdup  %xmm16, %xmm19 {%k1}
+# CHECK-NEXT:  -      -     0.33   0.33   0.50   0.50    -     0.33    -      -     vmovshdup  (%rax), %xmm19 {%k1}
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vmovshdup  %xmm16, %xmm19 {%k1} {z}
+# CHECK-NEXT:  -      -     0.33   0.33   0.50   0.50    -     0.33    -      -     vmovshdup  (%rax), %xmm19 {%k1} {z}
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vmovshdup  %ymm16, %ymm19
+# CHECK-NEXT:  -      -     0.33   0.33   0.50   0.50    -     0.33    -      -     vmovshdup  (%rax), %ymm19
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vmovshdup  %ymm16, %ymm19 {%k1}
+# CHECK-NEXT:  -      -     0.33   0.33   0.50   0.50    -     0.33    -      -     vmovshdup  (%rax), %ymm19 {%k1}
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vmovshdup  %ymm16, %ymm19 {%k1} {z}
+# CHECK-NEXT:  -      -     0.33   0.33   0.50   0.50    -     0.33    -      -     vmovshdup  (%rax), %ymm19 {%k1} {z}
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vmovsldup  %xmm16, %xmm19
+# CHECK-NEXT:  -      -     0.33   0.33   0.50   0.50    -     0.33    -      -     vmovsldup  (%rax), %xmm19
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vmovsldup  %xmm16, %xmm19 {%k1}
+# CHECK-NEXT:  -      -     0.33   0.33   0.50   0.50    -     0.33    -      -     vmovsldup  (%rax), %xmm19 {%k1}
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vmovsldup  %xmm16, %xmm19 {%k1} {z}
+# CHECK-NEXT:  -      -     0.33   0.33   0.50   0.50    -     0.33    -      -     vmovsldup  (%rax), %xmm19 {%k1} {z}
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vmovsldup  %ymm16, %ymm19
+# CHECK-NEXT:  -      -     0.33   0.33   0.50   0.50    -     0.33    -      -     vmovsldup  (%rax), %ymm19
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vmovsldup  %ymm16, %ymm19 {%k1}
+# CHECK-NEXT:  -      -     0.33   0.33   0.50   0.50    -     0.33    -      -     vmovsldup  (%rax), %ymm19 {%k1}
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vmovsldup  %ymm16, %ymm19 {%k1} {z}
+# CHECK-NEXT:  -      -     0.33   0.33   0.50   0.50    -     0.33    -      -     vmovsldup  (%rax), %ymm19 {%k1} {z}
 # CHECK-NEXT:  -      -     0.50   0.50    -      -      -      -      -      -     vmulpd     %xmm16, %xmm17, %xmm19
 # CHECK-NEXT:  -      -     0.50   0.50   0.50   0.50    -      -      -      -     vmulpd     (%rax), %xmm17, %xmm19
 # CHECK-NEXT:  -      -     0.50   0.50   0.50   0.50    -      -      -      -     vmulpd     (%rax){1to2}, %xmm17, %xmm19