[MCA][X86] Add AVX512 subvector broadcast instruction test coverage
authorSimon Pilgrim <llvm-dev@redking.me.uk>
Mon, 13 Dec 2021 18:39:31 +0000 (18:39 +0000)
committerSimon Pilgrim <llvm-dev@redking.me.uk>
Mon, 13 Dec 2021 18:48:25 +0000 (18:48 +0000)
12 files changed:
llvm/test/tools/llvm-mca/X86/Generic/resources-avx512.s
llvm/test/tools/llvm-mca/X86/Generic/resources-avx512dq.s
llvm/test/tools/llvm-mca/X86/Generic/resources-avx512dqvl.s
llvm/test/tools/llvm-mca/X86/Generic/resources-avx512vl.s
llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-avx512.s
llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-avx512dq.s
llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-avx512dqvl.s
llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-avx512vl.s
llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx512.s
llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx512dq.s
llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx512dqvl.s
llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx512vl.s

index 992fd9e..0eeee89 100644 (file)
@@ -50,6 +50,22 @@ valignq           $1, %zmm16, %zmm17, %zmm19 {z}{k1}
 valignq           $1, (%rax), %zmm17, %zmm19 {z}{k1}
 valignq           $1, (%rax){1to8}, %zmm17, %zmm19 {z}{k1}
 
+vbroadcastf32x4   (%rax), %zmm19
+vbroadcastf32x4   (%rax), %zmm19 {k1}
+vbroadcastf32x4   (%rax), %zmm19 {z}{k1}
+
+vbroadcastf64x4   (%rax), %zmm19
+vbroadcastf64x4   (%rax), %zmm19 {k1}
+vbroadcastf64x4   (%rax), %zmm19 {z}{k1}
+
+vbroadcasti32x4   (%rax), %zmm19
+vbroadcasti32x4   (%rax), %zmm19 {k1}
+vbroadcasti32x4   (%rax), %zmm19 {z}{k1}
+
+vbroadcasti64x4   (%rax), %zmm19
+vbroadcasti64x4   (%rax), %zmm19 {k1}
+vbroadcasti64x4   (%rax), %zmm19 {z}{k1}
+
 vbroadcastsd      %xmm16, %zmm19
 vbroadcastsd      (%rax), %zmm19
 vbroadcastsd      %xmm16, %zmm19 {k1}
@@ -840,6 +856,18 @@ vunpcklps         (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
 # CHECK-NEXT:  1      1     1.00                        valignq        $1, %zmm16, %zmm17, %zmm19 {%k1} {z}
 # CHECK-NEXT:  2      8     1.00    *                   valignq        $1, (%rax), %zmm17, %zmm19 {%k1} {z}
 # CHECK-NEXT:  2      8     1.00    *                   valignq        $1, (%rax){1to8}, %zmm17, %zmm19 {%k1} {z}
+# CHECK-NEXT:  2      8     1.00    *                   vbroadcastf32x4        (%rax), %zmm19
+# CHECK-NEXT:  2      8     1.00    *                   vbroadcastf32x4        (%rax), %zmm19 {%k1}
+# CHECK-NEXT:  2      8     1.00    *                   vbroadcastf32x4        (%rax), %zmm19 {%k1} {z}
+# CHECK-NEXT:  2      8     1.00    *                   vbroadcastf64x4        (%rax), %zmm19
+# CHECK-NEXT:  2      8     1.00    *                   vbroadcastf64x4        (%rax), %zmm19 {%k1}
+# CHECK-NEXT:  2      8     1.00    *                   vbroadcastf64x4        (%rax), %zmm19 {%k1} {z}
+# CHECK-NEXT:  2      8     1.00    *                   vbroadcasti32x4        (%rax), %zmm19
+# CHECK-NEXT:  2      8     1.00    *                   vbroadcasti32x4        (%rax), %zmm19 {%k1}
+# CHECK-NEXT:  2      8     1.00    *                   vbroadcasti32x4        (%rax), %zmm19 {%k1} {z}
+# CHECK-NEXT:  2      8     1.00    *                   vbroadcasti64x4        (%rax), %zmm19
+# CHECK-NEXT:  2      8     1.00    *                   vbroadcasti64x4        (%rax), %zmm19 {%k1}
+# CHECK-NEXT:  2      8     1.00    *                   vbroadcasti64x4        (%rax), %zmm19 {%k1} {z}
 # CHECK-NEXT:  1      1     1.00                        vbroadcastsd   %xmm16, %zmm19
 # CHECK-NEXT:  2      8     1.00    *                   vbroadcastsd   (%rax), %zmm19
 # CHECK-NEXT:  1      1     1.00                        vbroadcastsd   %xmm16, %zmm19 {%k1}
@@ -1503,7 +1531,7 @@ vunpcklps         (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
 
 # CHECK:      Resource pressure per iteration:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6.0]  [6.1]
-# CHECK-NEXT:  -     1506.00 138.67 201.67  -    426.67 219.50 219.50
+# CHECK-NEXT:  -     1506.00 138.67 201.67  -    438.67 225.50 225.50
 
 # CHECK:      Resource pressure by instruction:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6.0]  [6.1]  Instructions:
@@ -1551,6 +1579,18 @@ vunpcklps         (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
 # CHECK-NEXT:  -      -      -      -      -     1.00    -      -     valignq  $1, %zmm16, %zmm17, %zmm19 {%k1} {z}
 # CHECK-NEXT:  -      -      -      -      -     1.00   0.50   0.50   valignq  $1, (%rax), %zmm17, %zmm19 {%k1} {z}
 # CHECK-NEXT:  -      -      -      -      -     1.00   0.50   0.50   valignq  $1, (%rax){1to8}, %zmm17, %zmm19 {%k1} {z}
+# CHECK-NEXT:  -      -      -      -      -     1.00   0.50   0.50   vbroadcastf32x4  (%rax), %zmm19
+# CHECK-NEXT:  -      -      -      -      -     1.00   0.50   0.50   vbroadcastf32x4  (%rax), %zmm19 {%k1}
+# CHECK-NEXT:  -      -      -      -      -     1.00   0.50   0.50   vbroadcastf32x4  (%rax), %zmm19 {%k1} {z}
+# CHECK-NEXT:  -      -      -      -      -     1.00   0.50   0.50   vbroadcastf64x4  (%rax), %zmm19
+# CHECK-NEXT:  -      -      -      -      -     1.00   0.50   0.50   vbroadcastf64x4  (%rax), %zmm19 {%k1}
+# CHECK-NEXT:  -      -      -      -      -     1.00   0.50   0.50   vbroadcastf64x4  (%rax), %zmm19 {%k1} {z}
+# CHECK-NEXT:  -      -      -      -      -     1.00   0.50   0.50   vbroadcasti32x4  (%rax), %zmm19
+# CHECK-NEXT:  -      -      -      -      -     1.00   0.50   0.50   vbroadcasti32x4  (%rax), %zmm19 {%k1}
+# CHECK-NEXT:  -      -      -      -      -     1.00   0.50   0.50   vbroadcasti32x4  (%rax), %zmm19 {%k1} {z}
+# CHECK-NEXT:  -      -      -      -      -     1.00   0.50   0.50   vbroadcasti64x4  (%rax), %zmm19
+# CHECK-NEXT:  -      -      -      -      -     1.00   0.50   0.50   vbroadcasti64x4  (%rax), %zmm19 {%k1}
+# CHECK-NEXT:  -      -      -      -      -     1.00   0.50   0.50   vbroadcasti64x4  (%rax), %zmm19 {%k1} {z}
 # CHECK-NEXT:  -      -      -      -      -     1.00    -      -     vbroadcastsd     %xmm16, %zmm19
 # CHECK-NEXT:  -      -      -      -      -     1.00   0.50   0.50   vbroadcastsd     (%rax), %zmm19
 # CHECK-NEXT:  -      -      -      -      -     1.00    -      -     vbroadcastsd     %xmm16, %zmm19 {%k1}
index f960d77..9d08164 100644 (file)
@@ -51,6 +51,36 @@ vandps            %zmm16, %zmm17, %zmm19 {z}{k1}
 vandps            (%rax), %zmm17, %zmm19 {z}{k1}
 vandps            (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
 
+vbroadcastf32x2   %xmm16, %zmm19
+vbroadcastf32x2   (%rax), %zmm19
+vbroadcastf32x2   %xmm16, %zmm19 {k1}
+vbroadcastf32x2   (%rax), %zmm19 {k1}
+vbroadcastf32x2   %xmm16, %zmm19 {z}{k1}
+vbroadcastf32x2   (%rax), %zmm19 {z}{k1}
+
+vbroadcastf32x8   (%rax), %zmm19
+vbroadcastf32x8   (%rax), %zmm19 {k1}
+vbroadcastf32x8   (%rax), %zmm19 {z}{k1}
+
+vbroadcastf64x2   (%rax), %zmm19
+vbroadcastf64x2   (%rax), %zmm19 {k1}
+vbroadcastf64x2   (%rax), %zmm19 {z}{k1}
+
+vbroadcasti32x2   %xmm16, %zmm19
+vbroadcasti32x2   (%rax), %zmm19
+vbroadcasti32x2   %xmm16, %zmm19 {k1}
+vbroadcasti32x2   (%rax), %zmm19 {k1}
+vbroadcasti32x2   %xmm16, %zmm19 {z}{k1}
+vbroadcasti32x2   (%rax), %zmm19 {z}{k1}
+
+vbroadcasti32x8   (%rax), %zmm19
+vbroadcasti32x8   (%rax), %zmm19 {k1}
+vbroadcasti32x8   (%rax), %zmm19 {z}{k1}
+
+vbroadcasti64x2   (%rax), %zmm19
+vbroadcasti64x2   (%rax), %zmm19 {k1}
+vbroadcasti64x2   (%rax), %zmm19 {z}{k1}
+
 vcvtqq2pd         %zmm16, %zmm19
 vcvtqq2pd         (%rax), %zmm19
 vcvtqq2pd         (%rax){1to8}, %zmm19
@@ -202,6 +232,30 @@ vpmovq2m          %zmm0, %k0
 # CHECK-NEXT:  1      1     1.00                        vandps %zmm16, %zmm17, %zmm19 {%k1} {z}
 # CHECK-NEXT:  2      8     1.00    *                   vandps (%rax), %zmm17, %zmm19 {%k1} {z}
 # CHECK-NEXT:  2      8     1.00    *                   vandps (%rax){1to16}, %zmm17, %zmm19 {%k1} {z}
+# CHECK-NEXT:  1      1     1.00                        vbroadcastf32x2        %xmm16, %zmm19
+# CHECK-NEXT:  2      8     1.00    *                   vbroadcastf32x2        (%rax), %zmm19
+# CHECK-NEXT:  1      1     1.00                        vbroadcastf32x2        %xmm16, %zmm19 {%k1}
+# CHECK-NEXT:  2      8     1.00    *                   vbroadcastf32x2        (%rax), %zmm19 {%k1}
+# CHECK-NEXT:  1      1     1.00                        vbroadcastf32x2        %xmm16, %zmm19 {%k1} {z}
+# CHECK-NEXT:  2      8     1.00    *                   vbroadcastf32x2        (%rax), %zmm19 {%k1} {z}
+# CHECK-NEXT:  2      8     1.00    *                   vbroadcastf32x8        (%rax), %zmm19
+# CHECK-NEXT:  2      8     1.00    *                   vbroadcastf32x8        (%rax), %zmm19 {%k1}
+# CHECK-NEXT:  2      8     1.00    *                   vbroadcastf32x8        (%rax), %zmm19 {%k1} {z}
+# CHECK-NEXT:  2      8     1.00    *                   vbroadcastf64x2        (%rax), %zmm19
+# CHECK-NEXT:  2      8     1.00    *                   vbroadcastf64x2        (%rax), %zmm19 {%k1}
+# CHECK-NEXT:  2      8     1.00    *                   vbroadcastf64x2        (%rax), %zmm19 {%k1} {z}
+# CHECK-NEXT:  1      1     1.00                        vbroadcasti32x2        %xmm16, %zmm19
+# CHECK-NEXT:  2      8     1.00    *                   vbroadcasti32x2        (%rax), %zmm19
+# CHECK-NEXT:  1      1     1.00                        vbroadcasti32x2        %xmm16, %zmm19 {%k1}
+# CHECK-NEXT:  2      8     1.00    *                   vbroadcasti32x2        (%rax), %zmm19 {%k1}
+# CHECK-NEXT:  1      1     1.00                        vbroadcasti32x2        %xmm16, %zmm19 {%k1} {z}
+# CHECK-NEXT:  2      8     1.00    *                   vbroadcasti32x2        (%rax), %zmm19 {%k1} {z}
+# CHECK-NEXT:  2      8     1.00    *                   vbroadcasti32x8        (%rax), %zmm19
+# CHECK-NEXT:  2      8     1.00    *                   vbroadcasti32x8        (%rax), %zmm19 {%k1}
+# CHECK-NEXT:  2      8     1.00    *                   vbroadcasti32x8        (%rax), %zmm19 {%k1} {z}
+# CHECK-NEXT:  2      8     1.00    *                   vbroadcasti64x2        (%rax), %zmm19
+# CHECK-NEXT:  2      8     1.00    *                   vbroadcasti64x2        (%rax), %zmm19 {%k1}
+# CHECK-NEXT:  2      8     1.00    *                   vbroadcasti64x2        (%rax), %zmm19 {%k1} {z}
 # CHECK-NEXT:  2      4     1.00                        vcvtqq2pd      %zmm16, %zmm19
 # CHECK-NEXT:  3      10    1.00    *                   vcvtqq2pd      (%rax), %zmm19
 # CHECK-NEXT:  3      10    1.00    *                   vcvtqq2pd      (%rax){1to8}, %zmm19
@@ -299,7 +353,7 @@ vpmovq2m          %zmm0, %k0
 
 # CHECK:      Resource pressure per iteration:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6.0]  [6.1]
-# CHECK-NEXT:  -      -     9.67   41.67   -     86.67  37.50  37.50
+# CHECK-NEXT:  -      -     9.67   41.67   -     110.67 46.50  46.50
 
 # CHECK:      Resource pressure by instruction:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6.0]  [6.1]  Instructions:
@@ -348,6 +402,30 @@ vpmovq2m          %zmm0, %k0
 # CHECK-NEXT:  -      -      -      -      -     1.00    -      -     vandps   %zmm16, %zmm17, %zmm19 {%k1} {z}
 # CHECK-NEXT:  -      -      -      -      -     1.00   0.50   0.50   vandps   (%rax), %zmm17, %zmm19 {%k1} {z}
 # CHECK-NEXT:  -      -      -      -      -     1.00   0.50   0.50   vandps   (%rax){1to16}, %zmm17, %zmm19 {%k1} {z}
+# CHECK-NEXT:  -      -      -      -      -     1.00    -      -     vbroadcastf32x2  %xmm16, %zmm19
+# CHECK-NEXT:  -      -      -      -      -     1.00   0.50   0.50   vbroadcastf32x2  (%rax), %zmm19
+# CHECK-NEXT:  -      -      -      -      -     1.00    -      -     vbroadcastf32x2  %xmm16, %zmm19 {%k1}
+# CHECK-NEXT:  -      -      -      -      -     1.00   0.50   0.50   vbroadcastf32x2  (%rax), %zmm19 {%k1}
+# CHECK-NEXT:  -      -      -      -      -     1.00    -      -     vbroadcastf32x2  %xmm16, %zmm19 {%k1} {z}
+# CHECK-NEXT:  -      -      -      -      -     1.00   0.50   0.50   vbroadcastf32x2  (%rax), %zmm19 {%k1} {z}
+# CHECK-NEXT:  -      -      -      -      -     1.00   0.50   0.50   vbroadcastf32x8  (%rax), %zmm19
+# CHECK-NEXT:  -      -      -      -      -     1.00   0.50   0.50   vbroadcastf32x8  (%rax), %zmm19 {%k1}
+# CHECK-NEXT:  -      -      -      -      -     1.00   0.50   0.50   vbroadcastf32x8  (%rax), %zmm19 {%k1} {z}
+# CHECK-NEXT:  -      -      -      -      -     1.00   0.50   0.50   vbroadcastf64x2  (%rax), %zmm19
+# CHECK-NEXT:  -      -      -      -      -     1.00   0.50   0.50   vbroadcastf64x2  (%rax), %zmm19 {%k1}
+# CHECK-NEXT:  -      -      -      -      -     1.00   0.50   0.50   vbroadcastf64x2  (%rax), %zmm19 {%k1} {z}
+# CHECK-NEXT:  -      -      -      -      -     1.00    -      -     vbroadcasti32x2  %xmm16, %zmm19
+# CHECK-NEXT:  -      -      -      -      -     1.00   0.50   0.50   vbroadcasti32x2  (%rax), %zmm19
+# CHECK-NEXT:  -      -      -      -      -     1.00    -      -     vbroadcasti32x2  %xmm16, %zmm19 {%k1}
+# CHECK-NEXT:  -      -      -      -      -     1.00   0.50   0.50   vbroadcasti32x2  (%rax), %zmm19 {%k1}
+# CHECK-NEXT:  -      -      -      -      -     1.00    -      -     vbroadcasti32x2  %xmm16, %zmm19 {%k1} {z}
+# CHECK-NEXT:  -      -      -      -      -     1.00   0.50   0.50   vbroadcasti32x2  (%rax), %zmm19 {%k1} {z}
+# CHECK-NEXT:  -      -      -      -      -     1.00   0.50   0.50   vbroadcasti32x8  (%rax), %zmm19
+# CHECK-NEXT:  -      -      -      -      -     1.00   0.50   0.50   vbroadcasti32x8  (%rax), %zmm19 {%k1}
+# CHECK-NEXT:  -      -      -      -      -     1.00   0.50   0.50   vbroadcasti32x8  (%rax), %zmm19 {%k1} {z}
+# CHECK-NEXT:  -      -      -      -      -     1.00   0.50   0.50   vbroadcasti64x2  (%rax), %zmm19
+# CHECK-NEXT:  -      -      -      -      -     1.00   0.50   0.50   vbroadcasti64x2  (%rax), %zmm19 {%k1}
+# CHECK-NEXT:  -      -      -      -      -     1.00   0.50   0.50   vbroadcasti64x2  (%rax), %zmm19 {%k1} {z}
 # CHECK-NEXT:  -      -      -     1.00    -     1.00    -      -     vcvtqq2pd        %zmm16, %zmm19
 # CHECK-NEXT:  -      -      -     1.00    -     1.00   0.50   0.50   vcvtqq2pd        (%rax), %zmm19
 # CHECK-NEXT:  -      -      -     1.00    -     1.00   0.50   0.50   vcvtqq2pd        (%rax){1to8}, %zmm19
index f769c35..7ab33c3 100644 (file)
@@ -61,6 +61,35 @@ vandps            %ymm16, %ymm17, %ymm19 {z}{k1}
 vandps            (%rax), %ymm17, %ymm19 {z}{k1}
 vandps            (%rax){1to8}, %ymm17, %ymm19 {z}{k1}
 
+vbroadcastf32x2   %xmm16, %ymm19
+vbroadcastf32x2   (%rax), %ymm19
+vbroadcastf32x2   %xmm16, %ymm19 {k1}
+vbroadcastf32x2   (%rax), %ymm19 {k1}
+vbroadcastf32x2   %xmm16, %ymm19 {z}{k1}
+vbroadcastf32x2   (%rax), %ymm19 {z}{k1}
+
+vbroadcastf64x2   (%rax), %ymm19
+vbroadcastf64x2   (%rax), %ymm19 {k1}
+vbroadcastf64x2   (%rax), %ymm19 {z}{k1}
+
+vbroadcasti32x2   %xmm16, %xmm19
+vbroadcasti32x2   (%rax), %xmm19
+vbroadcasti32x2   %xmm16, %xmm19 {k1}
+vbroadcasti32x2   (%rax), %xmm19 {k1}
+vbroadcasti32x2   %xmm16, %xmm19 {z}{k1}
+vbroadcasti32x2   (%rax), %xmm19 {z}{k1}
+
+vbroadcasti32x2   %xmm16, %ymm19
+vbroadcasti32x2   (%rax), %ymm19
+vbroadcasti32x2   %xmm16, %ymm19 {k1}
+vbroadcasti32x2   (%rax), %ymm19 {k1}
+vbroadcasti32x2   %xmm16, %ymm19 {z}{k1}
+vbroadcasti32x2   (%rax), %ymm19 {z}{k1}
+
+vbroadcasti64x2   (%rax), %ymm19
+vbroadcasti64x2   (%rax), %ymm19 {k1}
+vbroadcasti64x2   (%rax), %ymm19 {z}{k1}
+
 vcvtqq2pd         %xmm16, %xmm19
 vcvtqq2pd         (%rax), %xmm19
 vcvtqq2pd         (%rax){1to2}, %xmm19
@@ -278,6 +307,30 @@ vpmovq2m          %ymm0, %k0
 # CHECK-NEXT:  1      1     1.00                        vandps %ymm16, %ymm17, %ymm19 {%k1} {z}
 # CHECK-NEXT:  2      8     1.00    *                   vandps (%rax), %ymm17, %ymm19 {%k1} {z}
 # CHECK-NEXT:  2      8     1.00    *                   vandps (%rax){1to8}, %ymm17, %ymm19 {%k1} {z}
+# CHECK-NEXT:  1      1     1.00                        vbroadcastf32x2        %xmm16, %ymm19
+# CHECK-NEXT:  2      8     1.00    *                   vbroadcastf32x2        (%rax), %ymm19
+# CHECK-NEXT:  1      1     1.00                        vbroadcastf32x2        %xmm16, %ymm19 {%k1}
+# CHECK-NEXT:  2      8     1.00    *                   vbroadcastf32x2        (%rax), %ymm19 {%k1}
+# CHECK-NEXT:  1      1     1.00                        vbroadcastf32x2        %xmm16, %ymm19 {%k1} {z}
+# CHECK-NEXT:  2      8     1.00    *                   vbroadcastf32x2        (%rax), %ymm19 {%k1} {z}
+# CHECK-NEXT:  2      8     1.00    *                   vbroadcastf64x2        (%rax), %ymm19
+# CHECK-NEXT:  2      8     1.00    *                   vbroadcastf64x2        (%rax), %ymm19 {%k1}
+# CHECK-NEXT:  2      8     1.00    *                   vbroadcastf64x2        (%rax), %ymm19 {%k1} {z}
+# CHECK-NEXT:  1      1     1.00                        vbroadcasti32x2        %xmm16, %xmm19
+# CHECK-NEXT:  2      7     0.50    *                   vbroadcasti32x2        (%rax), %xmm19
+# CHECK-NEXT:  1      1     1.00                        vbroadcasti32x2        %xmm16, %xmm19 {%k1}
+# CHECK-NEXT:  2      7     0.50    *                   vbroadcasti32x2        (%rax), %xmm19 {%k1}
+# CHECK-NEXT:  1      1     1.00                        vbroadcasti32x2        %xmm16, %xmm19 {%k1} {z}
+# CHECK-NEXT:  2      7     0.50    *                   vbroadcasti32x2        (%rax), %xmm19 {%k1} {z}
+# CHECK-NEXT:  1      1     1.00                        vbroadcasti32x2        %xmm16, %ymm19
+# CHECK-NEXT:  2      8     1.00    *                   vbroadcasti32x2        (%rax), %ymm19
+# CHECK-NEXT:  1      1     1.00                        vbroadcasti32x2        %xmm16, %ymm19 {%k1}
+# CHECK-NEXT:  2      8     1.00    *                   vbroadcasti32x2        (%rax), %ymm19 {%k1}
+# CHECK-NEXT:  1      1     1.00                        vbroadcasti32x2        %xmm16, %ymm19 {%k1} {z}
+# CHECK-NEXT:  2      8     1.00    *                   vbroadcasti32x2        (%rax), %ymm19 {%k1} {z}
+# CHECK-NEXT:  2      8     1.00    *                   vbroadcasti64x2        (%rax), %ymm19
+# CHECK-NEXT:  2      8     1.00    *                   vbroadcasti64x2        (%rax), %ymm19 {%k1}
+# CHECK-NEXT:  2      8     1.00    *                   vbroadcasti64x2        (%rax), %ymm19 {%k1} {z}
 # CHECK-NEXT:  2      4     1.00                        vcvtqq2pd      %xmm16, %xmm19
 # CHECK-NEXT:  3      10    1.00    *                   vcvtqq2pd      (%rax), %xmm19
 # CHECK-NEXT:  3      10    1.00    *                   vcvtqq2pd      (%rax){1to2}, %xmm19
@@ -425,7 +478,7 @@ vpmovq2m          %ymm0, %k0
 
 # CHECK:      Resource pressure per iteration:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6.0]  [6.1]
-# CHECK-NEXT:  -      -     14.67  44.67   -     146.67 59.00  59.00
+# CHECK-NEXT:  -      -     14.67  46.17   -     169.17 66.50  66.50
 
 # CHECK:      Resource pressure by instruction:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6.0]  [6.1]  Instructions:
@@ -483,6 +536,30 @@ vpmovq2m          %ymm0, %k0
 # CHECK-NEXT:  -      -      -      -      -     1.00    -      -     vandps   %ymm16, %ymm17, %ymm19 {%k1} {z}
 # CHECK-NEXT:  -      -      -      -      -     1.00   0.50   0.50   vandps   (%rax), %ymm17, %ymm19 {%k1} {z}
 # CHECK-NEXT:  -      -      -      -      -     1.00   0.50   0.50   vandps   (%rax){1to8}, %ymm17, %ymm19 {%k1} {z}
+# CHECK-NEXT:  -      -      -      -      -     1.00    -      -     vbroadcastf32x2  %xmm16, %ymm19
+# CHECK-NEXT:  -      -      -      -      -     1.00   0.50   0.50   vbroadcastf32x2  (%rax), %ymm19
+# CHECK-NEXT:  -      -      -      -      -     1.00    -      -     vbroadcastf32x2  %xmm16, %ymm19 {%k1}
+# CHECK-NEXT:  -      -      -      -      -     1.00   0.50   0.50   vbroadcastf32x2  (%rax), %ymm19 {%k1}
+# CHECK-NEXT:  -      -      -      -      -     1.00    -      -     vbroadcastf32x2  %xmm16, %ymm19 {%k1} {z}
+# CHECK-NEXT:  -      -      -      -      -     1.00   0.50   0.50   vbroadcastf32x2  (%rax), %ymm19 {%k1} {z}
+# CHECK-NEXT:  -      -      -      -      -     1.00   0.50   0.50   vbroadcastf64x2  (%rax), %ymm19
+# CHECK-NEXT:  -      -      -      -      -     1.00   0.50   0.50   vbroadcastf64x2  (%rax), %ymm19 {%k1}
+# CHECK-NEXT:  -      -      -      -      -     1.00   0.50   0.50   vbroadcastf64x2  (%rax), %ymm19 {%k1} {z}
+# CHECK-NEXT:  -      -      -      -      -     1.00    -      -     vbroadcasti32x2  %xmm16, %xmm19
+# CHECK-NEXT:  -      -      -     0.50    -     0.50   0.50   0.50   vbroadcasti32x2  (%rax), %xmm19
+# CHECK-NEXT:  -      -      -      -      -     1.00    -      -     vbroadcasti32x2  %xmm16, %xmm19 {%k1}
+# CHECK-NEXT:  -      -      -     0.50    -     0.50   0.50   0.50   vbroadcasti32x2  (%rax), %xmm19 {%k1}
+# CHECK-NEXT:  -      -      -      -      -     1.00    -      -     vbroadcasti32x2  %xmm16, %xmm19 {%k1} {z}
+# CHECK-NEXT:  -      -      -     0.50    -     0.50   0.50   0.50   vbroadcasti32x2  (%rax), %xmm19 {%k1} {z}
+# CHECK-NEXT:  -      -      -      -      -     1.00    -      -     vbroadcasti32x2  %xmm16, %ymm19
+# CHECK-NEXT:  -      -      -      -      -     1.00   0.50   0.50   vbroadcasti32x2  (%rax), %ymm19
+# CHECK-NEXT:  -      -      -      -      -     1.00    -      -     vbroadcasti32x2  %xmm16, %ymm19 {%k1}
+# CHECK-NEXT:  -      -      -      -      -     1.00   0.50   0.50   vbroadcasti32x2  (%rax), %ymm19 {%k1}
+# CHECK-NEXT:  -      -      -      -      -     1.00    -      -     vbroadcasti32x2  %xmm16, %ymm19 {%k1} {z}
+# CHECK-NEXT:  -      -      -      -      -     1.00   0.50   0.50   vbroadcasti32x2  (%rax), %ymm19 {%k1} {z}
+# CHECK-NEXT:  -      -      -      -      -     1.00   0.50   0.50   vbroadcasti64x2  (%rax), %ymm19
+# CHECK-NEXT:  -      -      -      -      -     1.00   0.50   0.50   vbroadcasti64x2  (%rax), %ymm19 {%k1}
+# CHECK-NEXT:  -      -      -      -      -     1.00   0.50   0.50   vbroadcasti64x2  (%rax), %ymm19 {%k1} {z}
 # CHECK-NEXT:  -      -      -     1.00    -     1.00    -      -     vcvtqq2pd        %xmm16, %xmm19
 # CHECK-NEXT:  -      -      -     1.00    -     1.00   0.50   0.50   vcvtqq2pd        (%rax), %xmm19
 # CHECK-NEXT:  -      -      -     1.00    -     1.00   0.50   0.50   vcvtqq2pd        (%rax){1to2}, %xmm19
index f6af2f6..b78ce71 100644 (file)
@@ -81,6 +81,14 @@ valignq           $1, %ymm16, %ymm17, %ymm19 {z}{k1}
 valignq           $1, (%rax), %ymm17, %ymm19 {z}{k1}
 valignq           $1, (%rax){1to4}, %ymm17, %ymm19 {z}{k1}
 
+vbroadcastf32x4   (%rax), %ymm19
+vbroadcastf32x4   (%rax), %ymm19 {k1}
+vbroadcastf32x4   (%rax), %ymm19 {z}{k1}
+
+vbroadcasti32x4   (%rax), %ymm19
+vbroadcasti32x4   (%rax), %ymm19 {k1}
+vbroadcasti32x4   (%rax), %ymm19 {z}{k1}
+
 vbroadcastsd      %xmm16, %ymm19
 vbroadcastsd      (%rax), %ymm19
 vbroadcastsd      %xmm16, %ymm19 {k1}
@@ -1309,6 +1317,12 @@ vunpcklps         (%rax){1to8}, %ymm17, %ymm19 {z}{k1}
 # CHECK-NEXT:  1      1     1.00                        valignq        $1, %ymm16, %ymm17, %ymm19 {%k1} {z}
 # CHECK-NEXT:  2      8     1.00    *                   valignq        $1, (%rax), %ymm17, %ymm19 {%k1} {z}
 # CHECK-NEXT:  2      8     1.00    *                   valignq        $1, (%rax){1to4}, %ymm17, %ymm19 {%k1} {z}
+# CHECK-NEXT:  2      8     1.00    *                   vbroadcastf32x4        (%rax), %ymm19
+# CHECK-NEXT:  2      8     1.00    *                   vbroadcastf32x4        (%rax), %ymm19 {%k1}
+# CHECK-NEXT:  2      8     1.00    *                   vbroadcastf32x4        (%rax), %ymm19 {%k1} {z}
+# CHECK-NEXT:  2      8     1.00    *                   vbroadcasti32x4        (%rax), %ymm19
+# CHECK-NEXT:  2      8     1.00    *                   vbroadcasti32x4        (%rax), %ymm19 {%k1}
+# CHECK-NEXT:  2      8     1.00    *                   vbroadcasti32x4        (%rax), %ymm19 {%k1} {z}
 # CHECK-NEXT:  1      1     1.00                        vbroadcastsd   %xmm16, %ymm19
 # CHECK-NEXT:  2      8     1.00    *                   vbroadcastsd   (%rax), %ymm19
 # CHECK-NEXT:  1      1     1.00                        vbroadcastsd   %xmm16, %ymm19 {%k1}
@@ -2334,7 +2348,7 @@ vunpcklps         (%rax){1to8}, %ymm17, %ymm19 {z}{k1}
 
 # CHECK:      Resource pressure per iteration:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6.0]  [6.1]
-# CHECK-NEXT:  -     1935.00 196.00 359.50  -    602.50 347.50 347.50
+# CHECK-NEXT:  -     1935.00 196.00 359.50  -    608.50 350.50 350.50
 
 # CHECK:      Resource pressure by instruction:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6.0]  [6.1]  Instructions:
@@ -2410,6 +2424,12 @@ vunpcklps         (%rax){1to8}, %ymm17, %ymm19 {z}{k1}
 # CHECK-NEXT:  -      -      -      -      -     1.00    -      -     valignq  $1, %ymm16, %ymm17, %ymm19 {%k1} {z}
 # CHECK-NEXT:  -      -      -      -      -     1.00   0.50   0.50   valignq  $1, (%rax), %ymm17, %ymm19 {%k1} {z}
 # CHECK-NEXT:  -      -      -      -      -     1.00   0.50   0.50   valignq  $1, (%rax){1to4}, %ymm17, %ymm19 {%k1} {z}
+# CHECK-NEXT:  -      -      -      -      -     1.00   0.50   0.50   vbroadcastf32x4  (%rax), %ymm19
+# CHECK-NEXT:  -      -      -      -      -     1.00   0.50   0.50   vbroadcastf32x4  (%rax), %ymm19 {%k1}
+# CHECK-NEXT:  -      -      -      -      -     1.00   0.50   0.50   vbroadcastf32x4  (%rax), %ymm19 {%k1} {z}
+# CHECK-NEXT:  -      -      -      -      -     1.00   0.50   0.50   vbroadcasti32x4  (%rax), %ymm19
+# CHECK-NEXT:  -      -      -      -      -     1.00   0.50   0.50   vbroadcasti32x4  (%rax), %ymm19 {%k1}
+# CHECK-NEXT:  -      -      -      -      -     1.00   0.50   0.50   vbroadcasti32x4  (%rax), %ymm19 {%k1} {z}
 # CHECK-NEXT:  -      -      -      -      -     1.00    -      -     vbroadcastsd     %xmm16, %ymm19
 # CHECK-NEXT:  -      -      -      -      -     1.00   0.50   0.50   vbroadcastsd     (%rax), %ymm19
 # CHECK-NEXT:  -      -      -      -      -     1.00    -      -     vbroadcastsd     %xmm16, %ymm19 {%k1}
index 51ecdc0..bbdfba5 100644 (file)
@@ -50,6 +50,22 @@ valignq           $1, %zmm16, %zmm17, %zmm19 {z}{k1}
 valignq           $1, (%rax), %zmm17, %zmm19 {z}{k1}
 valignq           $1, (%rax){1to8}, %zmm17, %zmm19 {z}{k1}
 
+vbroadcastf32x4   (%rax), %zmm19
+vbroadcastf32x4   (%rax), %zmm19 {k1}
+vbroadcastf32x4   (%rax), %zmm19 {z}{k1}
+
+vbroadcastf64x4   (%rax), %zmm19
+vbroadcastf64x4   (%rax), %zmm19 {k1}
+vbroadcastf64x4   (%rax), %zmm19 {z}{k1}
+
+vbroadcasti32x4   (%rax), %zmm19
+vbroadcasti32x4   (%rax), %zmm19 {k1}
+vbroadcasti32x4   (%rax), %zmm19 {z}{k1}
+
+vbroadcasti64x4   (%rax), %zmm19
+vbroadcasti64x4   (%rax), %zmm19 {k1}
+vbroadcasti64x4   (%rax), %zmm19 {z}{k1}
+
 vbroadcastsd      %xmm16, %zmm19
 vbroadcastsd      (%rax), %zmm19
 vbroadcastsd      %xmm16, %zmm19 {k1}
@@ -840,6 +856,18 @@ vunpcklps         (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
 # CHECK-NEXT:  1      3     1.00                        valignq        $1, %zmm16, %zmm17, %zmm19 {%k1} {z}
 # CHECK-NEXT:  2      10    1.00    *                   valignq        $1, (%rax), %zmm17, %zmm19 {%k1} {z}
 # CHECK-NEXT:  2      10    1.00    *                   valignq        $1, (%rax){1to8}, %zmm17, %zmm19 {%k1} {z}
+# CHECK-NEXT:  2      8     0.50    *                   vbroadcastf32x4        (%rax), %zmm19
+# CHECK-NEXT:  2      8     0.50    *                   vbroadcastf32x4        (%rax), %zmm19 {%k1}
+# CHECK-NEXT:  2      8     0.50    *                   vbroadcastf32x4        (%rax), %zmm19 {%k1} {z}
+# CHECK-NEXT:  2      8     0.50    *                   vbroadcastf64x4        (%rax), %zmm19
+# CHECK-NEXT:  2      8     0.50    *                   vbroadcastf64x4        (%rax), %zmm19 {%k1}
+# CHECK-NEXT:  2      8     0.50    *                   vbroadcastf64x4        (%rax), %zmm19 {%k1} {z}
+# CHECK-NEXT:  2      8     0.50    *                   vbroadcasti32x4        (%rax), %zmm19
+# CHECK-NEXT:  2      8     0.50    *                   vbroadcasti32x4        (%rax), %zmm19 {%k1}
+# CHECK-NEXT:  2      8     0.50    *                   vbroadcasti32x4        (%rax), %zmm19 {%k1} {z}
+# CHECK-NEXT:  2      8     0.50    *                   vbroadcasti64x4        (%rax), %zmm19
+# CHECK-NEXT:  2      8     0.50    *                   vbroadcasti64x4        (%rax), %zmm19 {%k1}
+# CHECK-NEXT:  2      8     0.50    *                   vbroadcasti64x4        (%rax), %zmm19 {%k1} {z}
 # CHECK-NEXT:  1      3     1.00                        vbroadcastsd   %xmm16, %zmm19
 # CHECK-NEXT:  2      8     0.50    *                   vbroadcastsd   (%rax), %zmm19
 # CHECK-NEXT:  1      3     1.00                        vbroadcastsd   %xmm16, %zmm19 {%k1}
@@ -1507,7 +1535,7 @@ vunpcklps         (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
 
 # CHECK:      Resource pressure per iteration:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    [10]   [11]
-# CHECK-NEXT:  -     612.00 216.67 37.67  255.50 255.50  -     535.67 2.00    -      -      -
+# CHECK-NEXT:  -     612.00 220.67 41.67  261.50 261.50  -     539.67 2.00    -      -      -
 
 # CHECK:      Resource pressure by instruction:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    [10]   [11]   Instructions:
@@ -1555,6 +1583,18 @@ vunpcklps         (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
 # CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -      -      -     valignq      $1, %zmm16, %zmm17, %zmm19 {%k1} {z}
 # CHECK-NEXT:  -      -      -      -     0.50   0.50    -     1.00    -      -      -      -     valignq      $1, (%rax), %zmm17, %zmm19 {%k1} {z}
 # CHECK-NEXT:  -      -      -      -     0.50   0.50    -     1.00    -      -      -      -     valignq      $1, (%rax){1to8}, %zmm17, %zmm19 {%k1} {z}
+# CHECK-NEXT:  -      -     0.33   0.33   0.50   0.50    -     0.33    -      -      -      -     vbroadcastf32x4      (%rax), %zmm19
+# CHECK-NEXT:  -      -     0.33   0.33   0.50   0.50    -     0.33    -      -      -      -     vbroadcastf32x4      (%rax), %zmm19 {%k1}
+# CHECK-NEXT:  -      -     0.33   0.33   0.50   0.50    -     0.33    -      -      -      -     vbroadcastf32x4      (%rax), %zmm19 {%k1} {z}
+# CHECK-NEXT:  -      -     0.33   0.33   0.50   0.50    -     0.33    -      -      -      -     vbroadcastf64x4      (%rax), %zmm19
+# CHECK-NEXT:  -      -     0.33   0.33   0.50   0.50    -     0.33    -      -      -      -     vbroadcastf64x4      (%rax), %zmm19 {%k1}
+# CHECK-NEXT:  -      -     0.33   0.33   0.50   0.50    -     0.33    -      -      -      -     vbroadcastf64x4      (%rax), %zmm19 {%k1} {z}
+# CHECK-NEXT:  -      -     0.33   0.33   0.50   0.50    -     0.33    -      -      -      -     vbroadcasti32x4      (%rax), %zmm19
+# CHECK-NEXT:  -      -     0.33   0.33   0.50   0.50    -     0.33    -      -      -      -     vbroadcasti32x4      (%rax), %zmm19 {%k1}
+# CHECK-NEXT:  -      -     0.33   0.33   0.50   0.50    -     0.33    -      -      -      -     vbroadcasti32x4      (%rax), %zmm19 {%k1} {z}
+# CHECK-NEXT:  -      -     0.33   0.33   0.50   0.50    -     0.33    -      -      -      -     vbroadcasti64x4      (%rax), %zmm19
+# CHECK-NEXT:  -      -     0.33   0.33   0.50   0.50    -     0.33    -      -      -      -     vbroadcasti64x4      (%rax), %zmm19 {%k1}
+# CHECK-NEXT:  -      -     0.33   0.33   0.50   0.50    -     0.33    -      -      -      -     vbroadcasti64x4      (%rax), %zmm19 {%k1} {z}
 # CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -      -      -     vbroadcastsd %xmm16, %zmm19
 # CHECK-NEXT:  -      -     0.33   0.33   0.50   0.50    -     0.33    -      -      -      -     vbroadcastsd (%rax), %zmm19
 # CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -      -      -     vbroadcastsd %xmm16, %zmm19 {%k1}
index f69df2a..102602b 100644 (file)
@@ -51,6 +51,36 @@ vandps            %zmm16, %zmm17, %zmm19 {z}{k1}
 vandps            (%rax), %zmm17, %zmm19 {z}{k1}
 vandps            (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
 
+vbroadcastf32x2   %xmm16, %zmm19
+vbroadcastf32x2   (%rax), %zmm19
+vbroadcastf32x2   %xmm16, %zmm19 {k1}
+vbroadcastf32x2   (%rax), %zmm19 {k1}
+vbroadcastf32x2   %xmm16, %zmm19 {z}{k1}
+vbroadcastf32x2   (%rax), %zmm19 {z}{k1}
+
+vbroadcastf32x8   (%rax), %zmm19
+vbroadcastf32x8   (%rax), %zmm19 {k1}
+vbroadcastf32x8   (%rax), %zmm19 {z}{k1}
+
+vbroadcastf64x2   (%rax), %zmm19
+vbroadcastf64x2   (%rax), %zmm19 {k1}
+vbroadcastf64x2   (%rax), %zmm19 {z}{k1}
+
+vbroadcasti32x2   %xmm16, %zmm19
+vbroadcasti32x2   (%rax), %zmm19
+vbroadcasti32x2   %xmm16, %zmm19 {k1}
+vbroadcasti32x2   (%rax), %zmm19 {k1}
+vbroadcasti32x2   %xmm16, %zmm19 {z}{k1}
+vbroadcasti32x2   (%rax), %zmm19 {z}{k1}
+
+vbroadcasti32x8   (%rax), %zmm19
+vbroadcasti32x8   (%rax), %zmm19 {k1}
+vbroadcasti32x8   (%rax), %zmm19 {z}{k1}
+
+vbroadcasti64x2   (%rax), %zmm19
+vbroadcasti64x2   (%rax), %zmm19 {k1}
+vbroadcasti64x2   (%rax), %zmm19 {z}{k1}
+
 vcvtqq2pd         %zmm16, %zmm19
 vcvtqq2pd         (%rax), %zmm19
 vcvtqq2pd         (%rax){1to8}, %zmm19
@@ -202,6 +232,30 @@ vpmovq2m          %zmm0, %k0
 # CHECK-NEXT:  1      1     0.50                        vandps %zmm16, %zmm17, %zmm19 {%k1} {z}
 # CHECK-NEXT:  2      8     0.50    *                   vandps (%rax), %zmm17, %zmm19 {%k1} {z}
 # CHECK-NEXT:  2      8     0.50    *                   vandps (%rax){1to16}, %zmm17, %zmm19 {%k1} {z}
+# CHECK-NEXT:  1      3     1.00                        vbroadcastf32x2        %xmm16, %zmm19
+# CHECK-NEXT:  2      8     0.50    *                   vbroadcastf32x2        (%rax), %zmm19
+# CHECK-NEXT:  1      3     1.00                        vbroadcastf32x2        %xmm16, %zmm19 {%k1}
+# CHECK-NEXT:  2      8     0.50    *                   vbroadcastf32x2        (%rax), %zmm19 {%k1}
+# CHECK-NEXT:  1      3     1.00                        vbroadcastf32x2        %xmm16, %zmm19 {%k1} {z}
+# CHECK-NEXT:  2      8     0.50    *                   vbroadcastf32x2        (%rax), %zmm19 {%k1} {z}
+# CHECK-NEXT:  2      8     0.50    *                   vbroadcastf32x8        (%rax), %zmm19
+# CHECK-NEXT:  2      8     0.50    *                   vbroadcastf32x8        (%rax), %zmm19 {%k1}
+# CHECK-NEXT:  2      8     0.50    *                   vbroadcastf32x8        (%rax), %zmm19 {%k1} {z}
+# CHECK-NEXT:  2      8     0.50    *                   vbroadcastf64x2        (%rax), %zmm19
+# CHECK-NEXT:  2      8     0.50    *                   vbroadcastf64x2        (%rax), %zmm19 {%k1}
+# CHECK-NEXT:  2      8     0.50    *                   vbroadcastf64x2        (%rax), %zmm19 {%k1} {z}
+# CHECK-NEXT:  1      3     1.00                        vbroadcasti32x2        %xmm16, %zmm19
+# CHECK-NEXT:  2      8     0.50    *                   vbroadcasti32x2        (%rax), %zmm19
+# CHECK-NEXT:  1      3     1.00                        vbroadcasti32x2        %xmm16, %zmm19 {%k1}
+# CHECK-NEXT:  2      8     0.50    *                   vbroadcasti32x2        (%rax), %zmm19 {%k1}
+# CHECK-NEXT:  1      3     1.00                        vbroadcasti32x2        %xmm16, %zmm19 {%k1} {z}
+# CHECK-NEXT:  2      8     0.50    *                   vbroadcasti32x2        (%rax), %zmm19 {%k1} {z}
+# CHECK-NEXT:  2      8     0.50    *                   vbroadcasti32x8        (%rax), %zmm19
+# CHECK-NEXT:  2      8     0.50    *                   vbroadcasti32x8        (%rax), %zmm19 {%k1}
+# CHECK-NEXT:  2      8     0.50    *                   vbroadcasti32x8        (%rax), %zmm19 {%k1} {z}
+# CHECK-NEXT:  2      8     0.50    *                   vbroadcasti64x2        (%rax), %zmm19
+# CHECK-NEXT:  2      8     0.50    *                   vbroadcasti64x2        (%rax), %zmm19 {%k1}
+# CHECK-NEXT:  2      8     0.50    *                   vbroadcasti64x2        (%rax), %zmm19 {%k1} {z}
 # CHECK-NEXT:  1      4     0.50                        vcvtqq2pd      %zmm16, %zmm19
 # CHECK-NEXT:  2      11    0.50    *                   vcvtqq2pd      (%rax), %zmm19
 # CHECK-NEXT:  2      11    0.50    *                   vcvtqq2pd      (%rax){1to8}, %zmm19
@@ -303,7 +357,7 @@ vpmovq2m          %zmm0, %k0
 
 # CHECK:      Resource pressure per iteration:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    [10]   [11]
-# CHECK-NEXT:  -      -     59.50  4.50   37.50  37.50   -     83.50  0.50    -      -      -
+# CHECK-NEXT:  -      -     65.50  10.50  46.50  46.50   -     95.50  0.50    -      -      -
 
 # CHECK:      Resource pressure by instruction:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    [10]   [11]   Instructions:
@@ -352,6 +406,30 @@ vpmovq2m          %zmm0, %k0
 # CHECK-NEXT:  -      -     0.50    -      -      -      -     0.50    -      -      -      -     vandps       %zmm16, %zmm17, %zmm19 {%k1} {z}
 # CHECK-NEXT:  -      -     0.50    -     0.50   0.50    -     0.50    -      -      -      -     vandps       (%rax), %zmm17, %zmm19 {%k1} {z}
 # CHECK-NEXT:  -      -     0.50    -     0.50   0.50    -     0.50    -      -      -      -     vandps       (%rax){1to16}, %zmm17, %zmm19 {%k1} {z}
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -      -      -     vbroadcastf32x2      %xmm16, %zmm19
+# CHECK-NEXT:  -      -     0.33   0.33   0.50   0.50    -     0.33    -      -      -      -     vbroadcastf32x2      (%rax), %zmm19
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -      -      -     vbroadcastf32x2      %xmm16, %zmm19 {%k1}
+# CHECK-NEXT:  -      -     0.33   0.33   0.50   0.50    -     0.33    -      -      -      -     vbroadcastf32x2      (%rax), %zmm19 {%k1}
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -      -      -     vbroadcastf32x2      %xmm16, %zmm19 {%k1} {z}
+# CHECK-NEXT:  -      -     0.33   0.33   0.50   0.50    -     0.33    -      -      -      -     vbroadcastf32x2      (%rax), %zmm19 {%k1} {z}
+# CHECK-NEXT:  -      -     0.33   0.33   0.50   0.50    -     0.33    -      -      -      -     vbroadcastf32x8      (%rax), %zmm19
+# CHECK-NEXT:  -      -     0.33   0.33   0.50   0.50    -     0.33    -      -      -      -     vbroadcastf32x8      (%rax), %zmm19 {%k1}
+# CHECK-NEXT:  -      -     0.33   0.33   0.50   0.50    -     0.33    -      -      -      -     vbroadcastf32x8      (%rax), %zmm19 {%k1} {z}
+# CHECK-NEXT:  -      -     0.33   0.33   0.50   0.50    -     0.33    -      -      -      -     vbroadcastf64x2      (%rax), %zmm19
+# CHECK-NEXT:  -      -     0.33   0.33   0.50   0.50    -     0.33    -      -      -      -     vbroadcastf64x2      (%rax), %zmm19 {%k1}
+# CHECK-NEXT:  -      -     0.33   0.33   0.50   0.50    -     0.33    -      -      -      -     vbroadcastf64x2      (%rax), %zmm19 {%k1} {z}
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -      -      -     vbroadcasti32x2      %xmm16, %zmm19
+# CHECK-NEXT:  -      -     0.33   0.33   0.50   0.50    -     0.33    -      -      -      -     vbroadcasti32x2      (%rax), %zmm19
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -      -      -     vbroadcasti32x2      %xmm16, %zmm19 {%k1}
+# CHECK-NEXT:  -      -     0.33   0.33   0.50   0.50    -     0.33    -      -      -      -     vbroadcasti32x2      (%rax), %zmm19 {%k1}
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -      -      -     vbroadcasti32x2      %xmm16, %zmm19 {%k1} {z}
+# CHECK-NEXT:  -      -     0.33   0.33   0.50   0.50    -     0.33    -      -      -      -     vbroadcasti32x2      (%rax), %zmm19 {%k1} {z}
+# CHECK-NEXT:  -      -     0.33   0.33   0.50   0.50    -     0.33    -      -      -      -     vbroadcasti32x8      (%rax), %zmm19
+# CHECK-NEXT:  -      -     0.33   0.33   0.50   0.50    -     0.33    -      -      -      -     vbroadcasti32x8      (%rax), %zmm19 {%k1}
+# CHECK-NEXT:  -      -     0.33   0.33   0.50   0.50    -     0.33    -      -      -      -     vbroadcasti32x8      (%rax), %zmm19 {%k1} {z}
+# CHECK-NEXT:  -      -     0.33   0.33   0.50   0.50    -     0.33    -      -      -      -     vbroadcasti64x2      (%rax), %zmm19
+# CHECK-NEXT:  -      -     0.33   0.33   0.50   0.50    -     0.33    -      -      -      -     vbroadcasti64x2      (%rax), %zmm19 {%k1}
+# CHECK-NEXT:  -      -     0.33   0.33   0.50   0.50    -     0.33    -      -      -      -     vbroadcasti64x2      (%rax), %zmm19 {%k1} {z}
 # CHECK-NEXT:  -      -     0.50    -      -      -      -     0.50    -      -      -      -     vcvtqq2pd    %zmm16, %zmm19
 # CHECK-NEXT:  -      -     0.33   0.33   0.50   0.50    -     0.33    -      -      -      -     vcvtqq2pd    (%rax), %zmm19
 # CHECK-NEXT:  -      -     0.33   0.33   0.50   0.50    -     0.33    -      -      -      -     vcvtqq2pd    (%rax){1to8}, %zmm19
index 90d3ad2..647a5cf 100644 (file)
@@ -61,6 +61,35 @@ vandps            %ymm16, %ymm17, %ymm19 {z}{k1}
 vandps            (%rax), %ymm17, %ymm19 {z}{k1}
 vandps            (%rax){1to8}, %ymm17, %ymm19 {z}{k1}
 
+vbroadcastf32x2   %xmm16, %ymm19
+vbroadcastf32x2   (%rax), %ymm19
+vbroadcastf32x2   %xmm16, %ymm19 {k1}
+vbroadcastf32x2   (%rax), %ymm19 {k1}
+vbroadcastf32x2   %xmm16, %ymm19 {z}{k1}
+vbroadcastf32x2   (%rax), %ymm19 {z}{k1}
+
+vbroadcastf64x2   (%rax), %ymm19
+vbroadcastf64x2   (%rax), %ymm19 {k1}
+vbroadcastf64x2   (%rax), %ymm19 {z}{k1}
+
+vbroadcasti32x2   %xmm16, %xmm19
+vbroadcasti32x2   (%rax), %xmm19
+vbroadcasti32x2   %xmm16, %xmm19 {k1}
+vbroadcasti32x2   (%rax), %xmm19 {k1}
+vbroadcasti32x2   %xmm16, %xmm19 {z}{k1}
+vbroadcasti32x2   (%rax), %xmm19 {z}{k1}
+
+vbroadcasti32x2   %xmm16, %ymm19
+vbroadcasti32x2   (%rax), %ymm19
+vbroadcasti32x2   %xmm16, %ymm19 {k1}
+vbroadcasti32x2   (%rax), %ymm19 {k1}
+vbroadcasti32x2   %xmm16, %ymm19 {z}{k1}
+vbroadcasti32x2   (%rax), %ymm19 {z}{k1}
+
+vbroadcasti64x2   (%rax), %ymm19
+vbroadcasti64x2   (%rax), %ymm19 {k1}
+vbroadcasti64x2   (%rax), %ymm19 {z}{k1}
+
 vcvtqq2pd         %xmm16, %xmm19
 vcvtqq2pd         (%rax), %xmm19
 vcvtqq2pd         (%rax){1to2}, %xmm19
@@ -278,6 +307,30 @@ vpmovq2m          %ymm0, %k0
 # CHECK-NEXT:  1      1     0.33                        vandps %ymm16, %ymm17, %ymm19 {%k1} {z}
 # CHECK-NEXT:  2      8     0.50    *                   vandps (%rax), %ymm17, %ymm19 {%k1} {z}
 # CHECK-NEXT:  2      8     0.50    *                   vandps (%rax){1to8}, %ymm17, %ymm19 {%k1} {z}
+# CHECK-NEXT:  1      3     1.00                        vbroadcastf32x2        %xmm16, %ymm19
+# CHECK-NEXT:  2      8     0.50    *                   vbroadcastf32x2        (%rax), %ymm19
+# CHECK-NEXT:  1      3     1.00                        vbroadcastf32x2        %xmm16, %ymm19 {%k1}
+# CHECK-NEXT:  2      8     0.50    *                   vbroadcastf32x2        (%rax), %ymm19 {%k1}
+# CHECK-NEXT:  1      3     1.00                        vbroadcastf32x2        %xmm16, %ymm19 {%k1} {z}
+# CHECK-NEXT:  2      8     0.50    *                   vbroadcastf32x2        (%rax), %ymm19 {%k1} {z}
+# CHECK-NEXT:  2      8     0.50    *                   vbroadcastf64x2        (%rax), %ymm19
+# CHECK-NEXT:  2      8     0.50    *                   vbroadcastf64x2        (%rax), %ymm19 {%k1}
+# CHECK-NEXT:  2      8     0.50    *                   vbroadcastf64x2        (%rax), %ymm19 {%k1} {z}
+# CHECK-NEXT:  1      1     1.00                        vbroadcasti32x2        %xmm16, %xmm19
+# CHECK-NEXT:  2      7     0.50    *                   vbroadcasti32x2        (%rax), %xmm19
+# CHECK-NEXT:  1      1     1.00                        vbroadcasti32x2        %xmm16, %xmm19 {%k1}
+# CHECK-NEXT:  2      7     0.50    *                   vbroadcasti32x2        (%rax), %xmm19 {%k1}
+# CHECK-NEXT:  1      1     1.00                        vbroadcasti32x2        %xmm16, %xmm19 {%k1} {z}
+# CHECK-NEXT:  2      7     0.50    *                   vbroadcasti32x2        (%rax), %xmm19 {%k1} {z}
+# CHECK-NEXT:  1      3     1.00                        vbroadcasti32x2        %xmm16, %ymm19
+# CHECK-NEXT:  2      8     0.50    *                   vbroadcasti32x2        (%rax), %ymm19
+# CHECK-NEXT:  1      3     1.00                        vbroadcasti32x2        %xmm16, %ymm19 {%k1}
+# CHECK-NEXT:  2      8     0.50    *                   vbroadcasti32x2        (%rax), %ymm19 {%k1}
+# CHECK-NEXT:  1      3     1.00                        vbroadcasti32x2        %xmm16, %ymm19 {%k1} {z}
+# CHECK-NEXT:  2      8     0.50    *                   vbroadcasti32x2        (%rax), %ymm19 {%k1} {z}
+# CHECK-NEXT:  2      8     0.50    *                   vbroadcasti64x2        (%rax), %ymm19
+# CHECK-NEXT:  2      8     0.50    *                   vbroadcasti64x2        (%rax), %ymm19 {%k1}
+# CHECK-NEXT:  2      8     0.50    *                   vbroadcasti64x2        (%rax), %ymm19 {%k1} {z}
 # CHECK-NEXT:  1      4     0.50                        vcvtqq2pd      %xmm16, %xmm19
 # CHECK-NEXT:  2      10    0.50    *                   vcvtqq2pd      (%rax), %xmm19
 # CHECK-NEXT:  2      10    0.50    *                   vcvtqq2pd      (%rax){1to2}, %xmm19
@@ -429,7 +482,7 @@ vpmovq2m          %ymm0, %k0
 
 # CHECK:      Resource pressure per iteration:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    [10]   [11]
-# CHECK-NEXT:  -      -     72.00  68.00  59.00  59.00   -     71.00  1.00    -      -      -
+# CHECK-NEXT:  -      -     77.00  73.00  66.50  66.50   -     85.00  1.00    -      -      -
 
 # CHECK:      Resource pressure by instruction:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    [10]   [11]   Instructions:
@@ -487,6 +540,30 @@ vpmovq2m          %ymm0, %k0
 # CHECK-NEXT:  -      -     0.33   0.33    -      -      -     0.33    -      -      -      -     vandps       %ymm16, %ymm17, %ymm19 {%k1} {z}
 # CHECK-NEXT:  -      -     0.33   0.33   0.50   0.50    -     0.33    -      -      -      -     vandps       (%rax), %ymm17, %ymm19 {%k1} {z}
 # CHECK-NEXT:  -      -     0.33   0.33   0.50   0.50    -     0.33    -      -      -      -     vandps       (%rax){1to8}, %ymm17, %ymm19 {%k1} {z}
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -      -      -     vbroadcastf32x2      %xmm16, %ymm19
+# CHECK-NEXT:  -      -     0.33   0.33   0.50   0.50    -     0.33    -      -      -      -     vbroadcastf32x2      (%rax), %ymm19
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -      -      -     vbroadcastf32x2      %xmm16, %ymm19 {%k1}
+# CHECK-NEXT:  -      -     0.33   0.33   0.50   0.50    -     0.33    -      -      -      -     vbroadcastf32x2      (%rax), %ymm19 {%k1}
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -      -      -     vbroadcastf32x2      %xmm16, %ymm19 {%k1} {z}
+# CHECK-NEXT:  -      -     0.33   0.33   0.50   0.50    -     0.33    -      -      -      -     vbroadcastf32x2      (%rax), %ymm19 {%k1} {z}
+# CHECK-NEXT:  -      -     0.33   0.33   0.50   0.50    -     0.33    -      -      -      -     vbroadcastf64x2      (%rax), %ymm19
+# CHECK-NEXT:  -      -     0.33   0.33   0.50   0.50    -     0.33    -      -      -      -     vbroadcastf64x2      (%rax), %ymm19 {%k1}
+# CHECK-NEXT:  -      -     0.33   0.33   0.50   0.50    -     0.33    -      -      -      -     vbroadcastf64x2      (%rax), %ymm19 {%k1} {z}
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -      -      -     vbroadcasti32x2      %xmm16, %xmm19
+# CHECK-NEXT:  -      -     0.33   0.33   0.50   0.50    -     0.33    -      -      -      -     vbroadcasti32x2      (%rax), %xmm19
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -      -      -     vbroadcasti32x2      %xmm16, %xmm19 {%k1}
+# CHECK-NEXT:  -      -     0.33   0.33   0.50   0.50    -     0.33    -      -      -      -     vbroadcasti32x2      (%rax), %xmm19 {%k1}
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -      -      -     vbroadcasti32x2      %xmm16, %xmm19 {%k1} {z}
+# CHECK-NEXT:  -      -     0.33   0.33   0.50   0.50    -     0.33    -      -      -      -     vbroadcasti32x2      (%rax), %xmm19 {%k1} {z}
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -      -      -     vbroadcasti32x2      %xmm16, %ymm19
+# CHECK-NEXT:  -      -     0.33   0.33   0.50   0.50    -     0.33    -      -      -      -     vbroadcasti32x2      (%rax), %ymm19
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -      -      -     vbroadcasti32x2      %xmm16, %ymm19 {%k1}
+# CHECK-NEXT:  -      -     0.33   0.33   0.50   0.50    -     0.33    -      -      -      -     vbroadcasti32x2      (%rax), %ymm19 {%k1}
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -      -      -     vbroadcasti32x2      %xmm16, %ymm19 {%k1} {z}
+# CHECK-NEXT:  -      -     0.33   0.33   0.50   0.50    -     0.33    -      -      -      -     vbroadcasti32x2      (%rax), %ymm19 {%k1} {z}
+# CHECK-NEXT:  -      -     0.33   0.33   0.50   0.50    -     0.33    -      -      -      -     vbroadcasti64x2      (%rax), %ymm19
+# CHECK-NEXT:  -      -     0.33   0.33   0.50   0.50    -     0.33    -      -      -      -     vbroadcasti64x2      (%rax), %ymm19 {%k1}
+# CHECK-NEXT:  -      -     0.33   0.33   0.50   0.50    -     0.33    -      -      -      -     vbroadcasti64x2      (%rax), %ymm19 {%k1} {z}
 # CHECK-NEXT:  -      -     0.50   0.50    -      -      -      -      -      -      -      -     vcvtqq2pd    %xmm16, %xmm19
 # CHECK-NEXT:  -      -     0.33   0.33   0.50   0.50    -     0.33    -      -      -      -     vcvtqq2pd    (%rax), %xmm19
 # CHECK-NEXT:  -      -     0.33   0.33   0.50   0.50    -     0.33    -      -      -      -     vcvtqq2pd    (%rax){1to2}, %xmm19
index eb35bb3..31b934c 100644 (file)
@@ -81,6 +81,14 @@ valignq           $1, %ymm16, %ymm17, %ymm19 {z}{k1}
 valignq           $1, (%rax), %ymm17, %ymm19 {z}{k1}
 valignq           $1, (%rax){1to4}, %ymm17, %ymm19 {z}{k1}
 
+vbroadcastf32x4   (%rax), %ymm19
+vbroadcastf32x4   (%rax), %ymm19 {k1}
+vbroadcastf32x4   (%rax), %ymm19 {z}{k1}
+
+vbroadcasti32x4   (%rax), %ymm19
+vbroadcasti32x4   (%rax), %ymm19 {k1}
+vbroadcasti32x4   (%rax), %ymm19 {z}{k1}
+
 vbroadcastsd      %xmm16, %ymm19
 vbroadcastsd      (%rax), %ymm19
 vbroadcastsd      %xmm16, %ymm19 {k1}
@@ -1309,6 +1317,12 @@ vunpcklps         (%rax){1to8}, %ymm17, %ymm19 {z}{k1}
 # CHECK-NEXT:  1      3     1.00                        valignq        $1, %ymm16, %ymm17, %ymm19 {%k1} {z}
 # CHECK-NEXT:  2      10    1.00    *                   valignq        $1, (%rax), %ymm17, %ymm19 {%k1} {z}
 # CHECK-NEXT:  2      10    1.00    *                   valignq        $1, (%rax){1to4}, %ymm17, %ymm19 {%k1} {z}
+# CHECK-NEXT:  2      8     0.50    *                   vbroadcastf32x4        (%rax), %ymm19
+# CHECK-NEXT:  2      8     0.50    *                   vbroadcastf32x4        (%rax), %ymm19 {%k1}
+# CHECK-NEXT:  2      8     0.50    *                   vbroadcastf32x4        (%rax), %ymm19 {%k1} {z}
+# CHECK-NEXT:  2      8     0.50    *                   vbroadcasti32x4        (%rax), %ymm19
+# CHECK-NEXT:  2      8     0.50    *                   vbroadcasti32x4        (%rax), %ymm19 {%k1}
+# CHECK-NEXT:  2      8     0.50    *                   vbroadcasti32x4        (%rax), %ymm19 {%k1} {z}
 # CHECK-NEXT:  1      3     1.00                        vbroadcastsd   %xmm16, %ymm19
 # CHECK-NEXT:  2      8     0.50    *                   vbroadcastsd   (%rax), %ymm19
 # CHECK-NEXT:  1      3     1.00                        vbroadcastsd   %xmm16, %ymm19 {%k1}
@@ -2338,7 +2352,7 @@ vunpcklps         (%rax){1to8}, %ymm17, %ymm19 {z}{k1}
 
 # CHECK:      Resource pressure per iteration:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    [10]   [11]
-# CHECK-NEXT:  -     423.00 255.33 167.33 369.50 369.50  -     713.33 4.00    -      -      -
+# CHECK-NEXT:  -     423.00 257.33 169.33 372.50 372.50  -     715.33 4.00    -      -      -
 
 # CHECK:      Resource pressure by instruction:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    [10]   [11]   Instructions:
@@ -2414,6 +2428,12 @@ vunpcklps         (%rax){1to8}, %ymm17, %ymm19 {z}{k1}
 # CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -      -      -     valignq      $1, %ymm16, %ymm17, %ymm19 {%k1} {z}
 # CHECK-NEXT:  -      -      -      -     0.50   0.50    -     1.00    -      -      -      -     valignq      $1, (%rax), %ymm17, %ymm19 {%k1} {z}
 # CHECK-NEXT:  -      -      -      -     0.50   0.50    -     1.00    -      -      -      -     valignq      $1, (%rax){1to4}, %ymm17, %ymm19 {%k1} {z}
+# CHECK-NEXT:  -      -     0.33   0.33   0.50   0.50    -     0.33    -      -      -      -     vbroadcastf32x4      (%rax), %ymm19
+# CHECK-NEXT:  -      -     0.33   0.33   0.50   0.50    -     0.33    -      -      -      -     vbroadcastf32x4      (%rax), %ymm19 {%k1}
+# CHECK-NEXT:  -      -     0.33   0.33   0.50   0.50    -     0.33    -      -      -      -     vbroadcastf32x4      (%rax), %ymm19 {%k1} {z}
+# CHECK-NEXT:  -      -     0.33   0.33   0.50   0.50    -     0.33    -      -      -      -     vbroadcasti32x4      (%rax), %ymm19
+# CHECK-NEXT:  -      -     0.33   0.33   0.50   0.50    -     0.33    -      -      -      -     vbroadcasti32x4      (%rax), %ymm19 {%k1}
+# CHECK-NEXT:  -      -     0.33   0.33   0.50   0.50    -     0.33    -      -      -      -     vbroadcasti32x4      (%rax), %ymm19 {%k1} {z}
 # CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -      -      -     vbroadcastsd %xmm16, %ymm19
 # CHECK-NEXT:  -      -     0.33   0.33   0.50   0.50    -     0.33    -      -      -      -     vbroadcastsd (%rax), %ymm19
 # CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -      -      -     vbroadcastsd %xmm16, %ymm19 {%k1}
index 4db1d95..b664dda 100644 (file)
@@ -50,6 +50,22 @@ valignq           $1, %zmm16, %zmm17, %zmm19 {z}{k1}
 valignq           $1, (%rax), %zmm17, %zmm19 {z}{k1}
 valignq           $1, (%rax){1to8}, %zmm17, %zmm19 {z}{k1}
 
+vbroadcastf32x4   (%rax), %zmm19
+vbroadcastf32x4   (%rax), %zmm19 {k1}
+vbroadcastf32x4   (%rax), %zmm19 {z}{k1}
+
+vbroadcastf64x4   (%rax), %zmm19
+vbroadcastf64x4   (%rax), %zmm19 {k1}
+vbroadcastf64x4   (%rax), %zmm19 {z}{k1}
+
+vbroadcasti32x4   (%rax), %zmm19
+vbroadcasti32x4   (%rax), %zmm19 {k1}
+vbroadcasti32x4   (%rax), %zmm19 {z}{k1}
+
+vbroadcasti64x4   (%rax), %zmm19
+vbroadcasti64x4   (%rax), %zmm19 {k1}
+vbroadcasti64x4   (%rax), %zmm19 {z}{k1}
+
 vbroadcastsd      %xmm16, %zmm19
 vbroadcastsd      (%rax), %zmm19
 vbroadcastsd      %xmm16, %zmm19 {k1}
@@ -840,6 +856,18 @@ vunpcklps         (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
 # CHECK-NEXT:  1      3     1.00                        valignq        $1, %zmm16, %zmm17, %zmm19 {%k1} {z}
 # CHECK-NEXT:  2      10    1.00    *                   valignq        $1, (%rax), %zmm17, %zmm19 {%k1} {z}
 # CHECK-NEXT:  2      10    1.00    *                   valignq        $1, (%rax){1to8}, %zmm17, %zmm19 {%k1} {z}
+# CHECK-NEXT:  2      8     0.50    *                   vbroadcastf32x4        (%rax), %zmm19
+# CHECK-NEXT:  2      8     0.50    *                   vbroadcastf32x4        (%rax), %zmm19 {%k1}
+# CHECK-NEXT:  2      8     0.50    *                   vbroadcastf32x4        (%rax), %zmm19 {%k1} {z}
+# CHECK-NEXT:  2      8     0.50    *                   vbroadcastf64x4        (%rax), %zmm19
+# CHECK-NEXT:  2      8     0.50    *                   vbroadcastf64x4        (%rax), %zmm19 {%k1}
+# CHECK-NEXT:  2      8     0.50    *                   vbroadcastf64x4        (%rax), %zmm19 {%k1} {z}
+# CHECK-NEXT:  2      8     0.50    *                   vbroadcasti32x4        (%rax), %zmm19
+# CHECK-NEXT:  2      8     0.50    *                   vbroadcasti32x4        (%rax), %zmm19 {%k1}
+# CHECK-NEXT:  2      8     0.50    *                   vbroadcasti32x4        (%rax), %zmm19 {%k1} {z}
+# CHECK-NEXT:  2      8     0.50    *                   vbroadcasti64x4        (%rax), %zmm19
+# CHECK-NEXT:  2      8     0.50    *                   vbroadcasti64x4        (%rax), %zmm19 {%k1}
+# CHECK-NEXT:  2      8     0.50    *                   vbroadcasti64x4        (%rax), %zmm19 {%k1} {z}
 # CHECK-NEXT:  1      3     1.00                        vbroadcastsd   %xmm16, %zmm19
 # CHECK-NEXT:  2      8     0.50    *                   vbroadcastsd   (%rax), %zmm19
 # CHECK-NEXT:  1      3     1.00                        vbroadcastsd   %xmm16, %zmm19 {%k1}
@@ -1505,7 +1533,7 @@ vunpcklps         (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
 
 # CHECK:      Resource pressure per iteration:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]
-# CHECK-NEXT:  -     612.00 216.67 37.67  255.50 255.50  -     535.67 2.00    -
+# CHECK-NEXT:  -     612.00 220.67 41.67  261.50 261.50  -     539.67 2.00    -
 
 # CHECK:      Resource pressure by instruction:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    Instructions:
@@ -1553,6 +1581,18 @@ vunpcklps         (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
 # CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     valignq    $1, %zmm16, %zmm17, %zmm19 {%k1} {z}
 # CHECK-NEXT:  -      -      -      -     0.50   0.50    -     1.00    -      -     valignq    $1, (%rax), %zmm17, %zmm19 {%k1} {z}
 # CHECK-NEXT:  -      -      -      -     0.50   0.50    -     1.00    -      -     valignq    $1, (%rax){1to8}, %zmm17, %zmm19 {%k1} {z}
+# CHECK-NEXT:  -      -     0.33   0.33   0.50   0.50    -     0.33    -      -     vbroadcastf32x4    (%rax), %zmm19
+# CHECK-NEXT:  -      -     0.33   0.33   0.50   0.50    -     0.33    -      -     vbroadcastf32x4    (%rax), %zmm19 {%k1}
+# CHECK-NEXT:  -      -     0.33   0.33   0.50   0.50    -     0.33    -      -     vbroadcastf32x4    (%rax), %zmm19 {%k1} {z}
+# CHECK-NEXT:  -      -     0.33   0.33   0.50   0.50    -     0.33    -      -     vbroadcastf64x4    (%rax), %zmm19
+# CHECK-NEXT:  -      -     0.33   0.33   0.50   0.50    -     0.33    -      -     vbroadcastf64x4    (%rax), %zmm19 {%k1}
+# CHECK-NEXT:  -      -     0.33   0.33   0.50   0.50    -     0.33    -      -     vbroadcastf64x4    (%rax), %zmm19 {%k1} {z}
+# CHECK-NEXT:  -      -     0.33   0.33   0.50   0.50    -     0.33    -      -     vbroadcasti32x4    (%rax), %zmm19
+# CHECK-NEXT:  -      -     0.33   0.33   0.50   0.50    -     0.33    -      -     vbroadcasti32x4    (%rax), %zmm19 {%k1}
+# CHECK-NEXT:  -      -     0.33   0.33   0.50   0.50    -     0.33    -      -     vbroadcasti32x4    (%rax), %zmm19 {%k1} {z}
+# CHECK-NEXT:  -      -     0.33   0.33   0.50   0.50    -     0.33    -      -     vbroadcasti64x4    (%rax), %zmm19
+# CHECK-NEXT:  -      -     0.33   0.33   0.50   0.50    -     0.33    -      -     vbroadcasti64x4    (%rax), %zmm19 {%k1}
+# CHECK-NEXT:  -      -     0.33   0.33   0.50   0.50    -     0.33    -      -     vbroadcasti64x4    (%rax), %zmm19 {%k1} {z}
 # CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vbroadcastsd       %xmm16, %zmm19
 # CHECK-NEXT:  -      -     0.33   0.33   0.50   0.50    -     0.33    -      -     vbroadcastsd       (%rax), %zmm19
 # CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vbroadcastsd       %xmm16, %zmm19 {%k1}
index 6f64568..18750b0 100644 (file)
@@ -51,6 +51,36 @@ vandps            %zmm16, %zmm17, %zmm19 {z}{k1}
 vandps            (%rax), %zmm17, %zmm19 {z}{k1}
 vandps            (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
 
+vbroadcastf32x2   %xmm16, %zmm19
+vbroadcastf32x2   (%rax), %zmm19
+vbroadcastf32x2   %xmm16, %zmm19 {k1}
+vbroadcastf32x2   (%rax), %zmm19 {k1}
+vbroadcastf32x2   %xmm16, %zmm19 {z}{k1}
+vbroadcastf32x2   (%rax), %zmm19 {z}{k1}
+
+vbroadcastf32x8   (%rax), %zmm19
+vbroadcastf32x8   (%rax), %zmm19 {k1}
+vbroadcastf32x8   (%rax), %zmm19 {z}{k1}
+
+vbroadcastf64x2   (%rax), %zmm19
+vbroadcastf64x2   (%rax), %zmm19 {k1}
+vbroadcastf64x2   (%rax), %zmm19 {z}{k1}
+
+vbroadcasti32x2   %xmm16, %zmm19
+vbroadcasti32x2   (%rax), %zmm19
+vbroadcasti32x2   %xmm16, %zmm19 {k1}
+vbroadcasti32x2   (%rax), %zmm19 {k1}
+vbroadcasti32x2   %xmm16, %zmm19 {z}{k1}
+vbroadcasti32x2   (%rax), %zmm19 {z}{k1}
+
+vbroadcasti32x8   (%rax), %zmm19
+vbroadcasti32x8   (%rax), %zmm19 {k1}
+vbroadcasti32x8   (%rax), %zmm19 {z}{k1}
+
+vbroadcasti64x2   (%rax), %zmm19
+vbroadcasti64x2   (%rax), %zmm19 {k1}
+vbroadcasti64x2   (%rax), %zmm19 {z}{k1}
+
 vcvtqq2pd         %zmm16, %zmm19
 vcvtqq2pd         (%rax), %zmm19
 vcvtqq2pd         (%rax){1to8}, %zmm19
@@ -202,6 +232,30 @@ vpmovq2m          %zmm0, %k0
 # CHECK-NEXT:  1      1     0.50                        vandps %zmm16, %zmm17, %zmm19 {%k1} {z}
 # CHECK-NEXT:  2      8     0.50    *                   vandps (%rax), %zmm17, %zmm19 {%k1} {z}
 # CHECK-NEXT:  2      8     0.50    *                   vandps (%rax){1to16}, %zmm17, %zmm19 {%k1} {z}
+# CHECK-NEXT:  1      3     1.00                        vbroadcastf32x2        %xmm16, %zmm19
+# CHECK-NEXT:  2      8     0.50    *                   vbroadcastf32x2        (%rax), %zmm19
+# CHECK-NEXT:  1      3     1.00                        vbroadcastf32x2        %xmm16, %zmm19 {%k1}
+# CHECK-NEXT:  2      8     0.50    *                   vbroadcastf32x2        (%rax), %zmm19 {%k1}
+# CHECK-NEXT:  1      3     1.00                        vbroadcastf32x2        %xmm16, %zmm19 {%k1} {z}
+# CHECK-NEXT:  2      8     0.50    *                   vbroadcastf32x2        (%rax), %zmm19 {%k1} {z}
+# CHECK-NEXT:  2      8     0.50    *                   vbroadcastf32x8        (%rax), %zmm19
+# CHECK-NEXT:  2      8     0.50    *                   vbroadcastf32x8        (%rax), %zmm19 {%k1}
+# CHECK-NEXT:  2      8     0.50    *                   vbroadcastf32x8        (%rax), %zmm19 {%k1} {z}
+# CHECK-NEXT:  2      8     0.50    *                   vbroadcastf64x2        (%rax), %zmm19
+# CHECK-NEXT:  2      8     0.50    *                   vbroadcastf64x2        (%rax), %zmm19 {%k1}
+# CHECK-NEXT:  2      8     0.50    *                   vbroadcastf64x2        (%rax), %zmm19 {%k1} {z}
+# CHECK-NEXT:  1      3     1.00                        vbroadcasti32x2        %xmm16, %zmm19
+# CHECK-NEXT:  2      8     0.50    *                   vbroadcasti32x2        (%rax), %zmm19
+# CHECK-NEXT:  1      3     1.00                        vbroadcasti32x2        %xmm16, %zmm19 {%k1}
+# CHECK-NEXT:  2      8     0.50    *                   vbroadcasti32x2        (%rax), %zmm19 {%k1}
+# CHECK-NEXT:  1      3     1.00                        vbroadcasti32x2        %xmm16, %zmm19 {%k1} {z}
+# CHECK-NEXT:  2      8     0.50    *                   vbroadcasti32x2        (%rax), %zmm19 {%k1} {z}
+# CHECK-NEXT:  2      8     0.50    *                   vbroadcasti32x8        (%rax), %zmm19
+# CHECK-NEXT:  2      8     0.50    *                   vbroadcasti32x8        (%rax), %zmm19 {%k1}
+# CHECK-NEXT:  2      8     0.50    *                   vbroadcasti32x8        (%rax), %zmm19 {%k1} {z}
+# CHECK-NEXT:  2      8     0.50    *                   vbroadcasti64x2        (%rax), %zmm19
+# CHECK-NEXT:  2      8     0.50    *                   vbroadcasti64x2        (%rax), %zmm19 {%k1}
+# CHECK-NEXT:  2      8     0.50    *                   vbroadcasti64x2        (%rax), %zmm19 {%k1} {z}
 # CHECK-NEXT:  1      4     0.50                        vcvtqq2pd      %zmm16, %zmm19
 # CHECK-NEXT:  2      11    0.50    *                   vcvtqq2pd      (%rax), %zmm19
 # CHECK-NEXT:  2      11    0.50    *                   vcvtqq2pd      (%rax){1to8}, %zmm19
@@ -301,7 +355,7 @@ vpmovq2m          %zmm0, %k0
 
 # CHECK:      Resource pressure per iteration:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]
-# CHECK-NEXT:  -      -     59.50  4.50   37.50  37.50   -     83.50  0.50    -
+# CHECK-NEXT:  -      -     65.50  10.50  46.50  46.50   -     95.50  0.50    -
 
 # CHECK:      Resource pressure by instruction:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    Instructions:
@@ -350,6 +404,30 @@ vpmovq2m          %zmm0, %k0
 # CHECK-NEXT:  -      -     0.50    -      -      -      -     0.50    -      -     vandps     %zmm16, %zmm17, %zmm19 {%k1} {z}
 # CHECK-NEXT:  -      -     0.50    -     0.50   0.50    -     0.50    -      -     vandps     (%rax), %zmm17, %zmm19 {%k1} {z}
 # CHECK-NEXT:  -      -     0.50    -     0.50   0.50    -     0.50    -      -     vandps     (%rax){1to16}, %zmm17, %zmm19 {%k1} {z}
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vbroadcastf32x2    %xmm16, %zmm19
+# CHECK-NEXT:  -      -     0.33   0.33   0.50   0.50    -     0.33    -      -     vbroadcastf32x2    (%rax), %zmm19
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vbroadcastf32x2    %xmm16, %zmm19 {%k1}
+# CHECK-NEXT:  -      -     0.33   0.33   0.50   0.50    -     0.33    -      -     vbroadcastf32x2    (%rax), %zmm19 {%k1}
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vbroadcastf32x2    %xmm16, %zmm19 {%k1} {z}
+# CHECK-NEXT:  -      -     0.33   0.33   0.50   0.50    -     0.33    -      -     vbroadcastf32x2    (%rax), %zmm19 {%k1} {z}
+# CHECK-NEXT:  -      -     0.33   0.33   0.50   0.50    -     0.33    -      -     vbroadcastf32x8    (%rax), %zmm19
+# CHECK-NEXT:  -      -     0.33   0.33   0.50   0.50    -     0.33    -      -     vbroadcastf32x8    (%rax), %zmm19 {%k1}
+# CHECK-NEXT:  -      -     0.33   0.33   0.50   0.50    -     0.33    -      -     vbroadcastf32x8    (%rax), %zmm19 {%k1} {z}
+# CHECK-NEXT:  -      -     0.33   0.33   0.50   0.50    -     0.33    -      -     vbroadcastf64x2    (%rax), %zmm19
+# CHECK-NEXT:  -      -     0.33   0.33   0.50   0.50    -     0.33    -      -     vbroadcastf64x2    (%rax), %zmm19 {%k1}
+# CHECK-NEXT:  -      -     0.33   0.33   0.50   0.50    -     0.33    -      -     vbroadcastf64x2    (%rax), %zmm19 {%k1} {z}
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vbroadcasti32x2    %xmm16, %zmm19
+# CHECK-NEXT:  -      -     0.33   0.33   0.50   0.50    -     0.33    -      -     vbroadcasti32x2    (%rax), %zmm19
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vbroadcasti32x2    %xmm16, %zmm19 {%k1}
+# CHECK-NEXT:  -      -     0.33   0.33   0.50   0.50    -     0.33    -      -     vbroadcasti32x2    (%rax), %zmm19 {%k1}
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vbroadcasti32x2    %xmm16, %zmm19 {%k1} {z}
+# CHECK-NEXT:  -      -     0.33   0.33   0.50   0.50    -     0.33    -      -     vbroadcasti32x2    (%rax), %zmm19 {%k1} {z}
+# CHECK-NEXT:  -      -     0.33   0.33   0.50   0.50    -     0.33    -      -     vbroadcasti32x8    (%rax), %zmm19
+# CHECK-NEXT:  -      -     0.33   0.33   0.50   0.50    -     0.33    -      -     vbroadcasti32x8    (%rax), %zmm19 {%k1}
+# CHECK-NEXT:  -      -     0.33   0.33   0.50   0.50    -     0.33    -      -     vbroadcasti32x8    (%rax), %zmm19 {%k1} {z}
+# CHECK-NEXT:  -      -     0.33   0.33   0.50   0.50    -     0.33    -      -     vbroadcasti64x2    (%rax), %zmm19
+# CHECK-NEXT:  -      -     0.33   0.33   0.50   0.50    -     0.33    -      -     vbroadcasti64x2    (%rax), %zmm19 {%k1}
+# CHECK-NEXT:  -      -     0.33   0.33   0.50   0.50    -     0.33    -      -     vbroadcasti64x2    (%rax), %zmm19 {%k1} {z}
 # CHECK-NEXT:  -      -     0.50    -      -      -      -     0.50    -      -     vcvtqq2pd  %zmm16, %zmm19
 # CHECK-NEXT:  -      -     0.33   0.33   0.50   0.50    -     0.33    -      -     vcvtqq2pd  (%rax), %zmm19
 # CHECK-NEXT:  -      -     0.33   0.33   0.50   0.50    -     0.33    -      -     vcvtqq2pd  (%rax){1to8}, %zmm19
index 3e33eb8..4f2b0d9 100644 (file)
@@ -61,6 +61,35 @@ vandps            %ymm16, %ymm17, %ymm19 {z}{k1}
 vandps            (%rax), %ymm17, %ymm19 {z}{k1}
 vandps            (%rax){1to8}, %ymm17, %ymm19 {z}{k1}
 
+vbroadcastf32x2   %xmm16, %ymm19
+vbroadcastf32x2   (%rax), %ymm19
+vbroadcastf32x2   %xmm16, %ymm19 {k1}
+vbroadcastf32x2   (%rax), %ymm19 {k1}
+vbroadcastf32x2   %xmm16, %ymm19 {z}{k1}
+vbroadcastf32x2   (%rax), %ymm19 {z}{k1}
+
+vbroadcastf64x2   (%rax), %ymm19
+vbroadcastf64x2   (%rax), %ymm19 {k1}
+vbroadcastf64x2   (%rax), %ymm19 {z}{k1}
+
+vbroadcasti32x2   %xmm16, %xmm19
+vbroadcasti32x2   (%rax), %xmm19
+vbroadcasti32x2   %xmm16, %xmm19 {k1}
+vbroadcasti32x2   (%rax), %xmm19 {k1}
+vbroadcasti32x2   %xmm16, %xmm19 {z}{k1}
+vbroadcasti32x2   (%rax), %xmm19 {z}{k1}
+
+vbroadcasti32x2   %xmm16, %ymm19
+vbroadcasti32x2   (%rax), %ymm19
+vbroadcasti32x2   %xmm16, %ymm19 {k1}
+vbroadcasti32x2   (%rax), %ymm19 {k1}
+vbroadcasti32x2   %xmm16, %ymm19 {z}{k1}
+vbroadcasti32x2   (%rax), %ymm19 {z}{k1}
+
+vbroadcasti64x2   (%rax), %ymm19
+vbroadcasti64x2   (%rax), %ymm19 {k1}
+vbroadcasti64x2   (%rax), %ymm19 {z}{k1}
+
 vcvtqq2pd         %xmm16, %xmm19
 vcvtqq2pd         (%rax), %xmm19
 vcvtqq2pd         (%rax){1to2}, %xmm19
@@ -278,6 +307,30 @@ vpmovq2m          %ymm0, %k0
 # CHECK-NEXT:  1      1     0.33                        vandps %ymm16, %ymm17, %ymm19 {%k1} {z}
 # CHECK-NEXT:  2      8     0.50    *                   vandps (%rax), %ymm17, %ymm19 {%k1} {z}
 # CHECK-NEXT:  2      8     0.50    *                   vandps (%rax){1to8}, %ymm17, %ymm19 {%k1} {z}
+# CHECK-NEXT:  1      3     1.00                        vbroadcastf32x2        %xmm16, %ymm19
+# CHECK-NEXT:  2      8     0.50    *                   vbroadcastf32x2        (%rax), %ymm19
+# CHECK-NEXT:  1      3     1.00                        vbroadcastf32x2        %xmm16, %ymm19 {%k1}
+# CHECK-NEXT:  2      8     0.50    *                   vbroadcastf32x2        (%rax), %ymm19 {%k1}
+# CHECK-NEXT:  1      3     1.00                        vbroadcastf32x2        %xmm16, %ymm19 {%k1} {z}
+# CHECK-NEXT:  2      8     0.50    *                   vbroadcastf32x2        (%rax), %ymm19 {%k1} {z}
+# CHECK-NEXT:  2      8     0.50    *                   vbroadcastf64x2        (%rax), %ymm19
+# CHECK-NEXT:  2      8     0.50    *                   vbroadcastf64x2        (%rax), %ymm19 {%k1}
+# CHECK-NEXT:  2      8     0.50    *                   vbroadcastf64x2        (%rax), %ymm19 {%k1} {z}
+# CHECK-NEXT:  1      1     1.00                        vbroadcasti32x2        %xmm16, %xmm19
+# CHECK-NEXT:  2      7     0.50    *                   vbroadcasti32x2        (%rax), %xmm19
+# CHECK-NEXT:  1      1     1.00                        vbroadcasti32x2        %xmm16, %xmm19 {%k1}
+# CHECK-NEXT:  2      7     0.50    *                   vbroadcasti32x2        (%rax), %xmm19 {%k1}
+# CHECK-NEXT:  1      1     1.00                        vbroadcasti32x2        %xmm16, %xmm19 {%k1} {z}
+# CHECK-NEXT:  2      7     0.50    *                   vbroadcasti32x2        (%rax), %xmm19 {%k1} {z}
+# CHECK-NEXT:  1      3     1.00                        vbroadcasti32x2        %xmm16, %ymm19
+# CHECK-NEXT:  2      8     0.50    *                   vbroadcasti32x2        (%rax), %ymm19
+# CHECK-NEXT:  1      3     1.00                        vbroadcasti32x2        %xmm16, %ymm19 {%k1}
+# CHECK-NEXT:  2      8     0.50    *                   vbroadcasti32x2        (%rax), %ymm19 {%k1}
+# CHECK-NEXT:  1      3     1.00                        vbroadcasti32x2        %xmm16, %ymm19 {%k1} {z}
+# CHECK-NEXT:  2      8     0.50    *                   vbroadcasti32x2        (%rax), %ymm19 {%k1} {z}
+# CHECK-NEXT:  2      8     0.50    *                   vbroadcasti64x2        (%rax), %ymm19
+# CHECK-NEXT:  2      8     0.50    *                   vbroadcasti64x2        (%rax), %ymm19 {%k1}
+# CHECK-NEXT:  2      8     0.50    *                   vbroadcasti64x2        (%rax), %ymm19 {%k1} {z}
 # CHECK-NEXT:  1      4     0.50                        vcvtqq2pd      %xmm16, %xmm19
 # CHECK-NEXT:  2      10    0.50    *                   vcvtqq2pd      (%rax), %xmm19
 # CHECK-NEXT:  2      10    0.50    *                   vcvtqq2pd      (%rax){1to2}, %xmm19
@@ -427,7 +480,7 @@ vpmovq2m          %ymm0, %k0
 
 # CHECK:      Resource pressure per iteration:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]
-# CHECK-NEXT:  -      -     72.00  68.00  59.00  59.00   -     71.00  1.00    -
+# CHECK-NEXT:  -      -     77.00  73.00  66.50  66.50   -     85.00  1.00    -
 
 # CHECK:      Resource pressure by instruction:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    Instructions:
@@ -485,6 +538,30 @@ vpmovq2m          %ymm0, %k0
 # CHECK-NEXT:  -      -     0.33   0.33    -      -      -     0.33    -      -     vandps     %ymm16, %ymm17, %ymm19 {%k1} {z}
 # CHECK-NEXT:  -      -     0.33   0.33   0.50   0.50    -     0.33    -      -     vandps     (%rax), %ymm17, %ymm19 {%k1} {z}
 # CHECK-NEXT:  -      -     0.33   0.33   0.50   0.50    -     0.33    -      -     vandps     (%rax){1to8}, %ymm17, %ymm19 {%k1} {z}
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vbroadcastf32x2    %xmm16, %ymm19
+# CHECK-NEXT:  -      -     0.33   0.33   0.50   0.50    -     0.33    -      -     vbroadcastf32x2    (%rax), %ymm19
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vbroadcastf32x2    %xmm16, %ymm19 {%k1}
+# CHECK-NEXT:  -      -     0.33   0.33   0.50   0.50    -     0.33    -      -     vbroadcastf32x2    (%rax), %ymm19 {%k1}
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vbroadcastf32x2    %xmm16, %ymm19 {%k1} {z}
+# CHECK-NEXT:  -      -     0.33   0.33   0.50   0.50    -     0.33    -      -     vbroadcastf32x2    (%rax), %ymm19 {%k1} {z}
+# CHECK-NEXT:  -      -     0.33   0.33   0.50   0.50    -     0.33    -      -     vbroadcastf64x2    (%rax), %ymm19
+# CHECK-NEXT:  -      -     0.33   0.33   0.50   0.50    -     0.33    -      -     vbroadcastf64x2    (%rax), %ymm19 {%k1}
+# CHECK-NEXT:  -      -     0.33   0.33   0.50   0.50    -     0.33    -      -     vbroadcastf64x2    (%rax), %ymm19 {%k1} {z}
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vbroadcasti32x2    %xmm16, %xmm19
+# CHECK-NEXT:  -      -     0.33   0.33   0.50   0.50    -     0.33    -      -     vbroadcasti32x2    (%rax), %xmm19
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vbroadcasti32x2    %xmm16, %xmm19 {%k1}
+# CHECK-NEXT:  -      -     0.33   0.33   0.50   0.50    -     0.33    -      -     vbroadcasti32x2    (%rax), %xmm19 {%k1}
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vbroadcasti32x2    %xmm16, %xmm19 {%k1} {z}
+# CHECK-NEXT:  -      -     0.33   0.33   0.50   0.50    -     0.33    -      -     vbroadcasti32x2    (%rax), %xmm19 {%k1} {z}
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vbroadcasti32x2    %xmm16, %ymm19
+# CHECK-NEXT:  -      -     0.33   0.33   0.50   0.50    -     0.33    -      -     vbroadcasti32x2    (%rax), %ymm19
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vbroadcasti32x2    %xmm16, %ymm19 {%k1}
+# CHECK-NEXT:  -      -     0.33   0.33   0.50   0.50    -     0.33    -      -     vbroadcasti32x2    (%rax), %ymm19 {%k1}
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vbroadcasti32x2    %xmm16, %ymm19 {%k1} {z}
+# CHECK-NEXT:  -      -     0.33   0.33   0.50   0.50    -     0.33    -      -     vbroadcasti32x2    (%rax), %ymm19 {%k1} {z}
+# CHECK-NEXT:  -      -     0.33   0.33   0.50   0.50    -     0.33    -      -     vbroadcasti64x2    (%rax), %ymm19
+# CHECK-NEXT:  -      -     0.33   0.33   0.50   0.50    -     0.33    -      -     vbroadcasti64x2    (%rax), %ymm19 {%k1}
+# CHECK-NEXT:  -      -     0.33   0.33   0.50   0.50    -     0.33    -      -     vbroadcasti64x2    (%rax), %ymm19 {%k1} {z}
 # CHECK-NEXT:  -      -     0.50   0.50    -      -      -      -      -      -     vcvtqq2pd  %xmm16, %xmm19
 # CHECK-NEXT:  -      -     0.33   0.33   0.50   0.50    -     0.33    -      -     vcvtqq2pd  (%rax), %xmm19
 # CHECK-NEXT:  -      -     0.33   0.33   0.50   0.50    -     0.33    -      -     vcvtqq2pd  (%rax){1to2}, %xmm19
index b9d621e..36878e8 100644 (file)
@@ -81,6 +81,14 @@ valignq           $1, %ymm16, %ymm17, %ymm19 {z}{k1}
 valignq           $1, (%rax), %ymm17, %ymm19 {z}{k1}
 valignq           $1, (%rax){1to4}, %ymm17, %ymm19 {z}{k1}
 
+vbroadcastf32x4   (%rax), %ymm19
+vbroadcastf32x4   (%rax), %ymm19 {k1}
+vbroadcastf32x4   (%rax), %ymm19 {z}{k1}
+
+vbroadcasti32x4   (%rax), %ymm19
+vbroadcasti32x4   (%rax), %ymm19 {k1}
+vbroadcasti32x4   (%rax), %ymm19 {z}{k1}
+
 vbroadcastsd      %xmm16, %ymm19
 vbroadcastsd      (%rax), %ymm19
 vbroadcastsd      %xmm16, %ymm19 {k1}
@@ -1309,6 +1317,12 @@ vunpcklps         (%rax){1to8}, %ymm17, %ymm19 {z}{k1}
 # CHECK-NEXT:  1      3     1.00                        valignq        $1, %ymm16, %ymm17, %ymm19 {%k1} {z}
 # CHECK-NEXT:  2      10    1.00    *                   valignq        $1, (%rax), %ymm17, %ymm19 {%k1} {z}
 # CHECK-NEXT:  2      10    1.00    *                   valignq        $1, (%rax){1to4}, %ymm17, %ymm19 {%k1} {z}
+# CHECK-NEXT:  2      8     0.50    *                   vbroadcastf32x4        (%rax), %ymm19
+# CHECK-NEXT:  2      8     0.50    *                   vbroadcastf32x4        (%rax), %ymm19 {%k1}
+# CHECK-NEXT:  2      8     0.50    *                   vbroadcastf32x4        (%rax), %ymm19 {%k1} {z}
+# CHECK-NEXT:  2      8     0.50    *                   vbroadcasti32x4        (%rax), %ymm19
+# CHECK-NEXT:  2      8     0.50    *                   vbroadcasti32x4        (%rax), %ymm19 {%k1}
+# CHECK-NEXT:  2      8     0.50    *                   vbroadcasti32x4        (%rax), %ymm19 {%k1} {z}
 # CHECK-NEXT:  1      3     1.00                        vbroadcastsd   %xmm16, %ymm19
 # CHECK-NEXT:  2      8     0.50    *                   vbroadcastsd   (%rax), %ymm19
 # CHECK-NEXT:  1      3     1.00                        vbroadcastsd   %xmm16, %ymm19 {%k1}
@@ -2336,7 +2350,7 @@ vunpcklps         (%rax){1to8}, %ymm17, %ymm19 {z}{k1}
 
 # CHECK:      Resource pressure per iteration:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]
-# CHECK-NEXT:  -     423.00 255.33 167.33 369.50 369.50  -     713.33 4.00    -
+# CHECK-NEXT:  -     423.00 257.33 169.33 372.50 372.50  -     715.33 4.00    -
 
 # CHECK:      Resource pressure by instruction:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    Instructions:
@@ -2412,6 +2426,12 @@ vunpcklps         (%rax){1to8}, %ymm17, %ymm19 {z}{k1}
 # CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     valignq    $1, %ymm16, %ymm17, %ymm19 {%k1} {z}
 # CHECK-NEXT:  -      -      -      -     0.50   0.50    -     1.00    -      -     valignq    $1, (%rax), %ymm17, %ymm19 {%k1} {z}
 # CHECK-NEXT:  -      -      -      -     0.50   0.50    -     1.00    -      -     valignq    $1, (%rax){1to4}, %ymm17, %ymm19 {%k1} {z}
+# CHECK-NEXT:  -      -     0.33   0.33   0.50   0.50    -     0.33    -      -     vbroadcastf32x4    (%rax), %ymm19
+# CHECK-NEXT:  -      -     0.33   0.33   0.50   0.50    -     0.33    -      -     vbroadcastf32x4    (%rax), %ymm19 {%k1}
+# CHECK-NEXT:  -      -     0.33   0.33   0.50   0.50    -     0.33    -      -     vbroadcastf32x4    (%rax), %ymm19 {%k1} {z}
+# CHECK-NEXT:  -      -     0.33   0.33   0.50   0.50    -     0.33    -      -     vbroadcasti32x4    (%rax), %ymm19
+# CHECK-NEXT:  -      -     0.33   0.33   0.50   0.50    -     0.33    -      -     vbroadcasti32x4    (%rax), %ymm19 {%k1}
+# CHECK-NEXT:  -      -     0.33   0.33   0.50   0.50    -     0.33    -      -     vbroadcasti32x4    (%rax), %ymm19 {%k1} {z}
 # CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vbroadcastsd       %xmm16, %ymm19
 # CHECK-NEXT:  -      -     0.33   0.33   0.50   0.50    -     0.33    -      -     vbroadcastsd       (%rax), %ymm19
 # CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vbroadcastsd       %xmm16, %ymm19 {%k1}