[MCA][X86] Add missing 512-bit vpscatterqd/vscatterqps schedule data (REAPPLIED)
authorSimon Pilgrim <llvm-dev@redking.me.uk>
Thu, 1 Aug 2024 15:08:33 +0000 (16:08 +0100)
committerTobias Hieta <tobias@hieta.se>
Mon, 26 Aug 2024 07:20:20 +0000 (09:20 +0200)
This doesn't match uops.info yet - but it matches the existing vpscatterdq/vscatterqpd entries like uops.info says it should

Reapplied with codegen fix for scatter-schedule.ll

Fixes #105675

(cherry picked from commit cf6cd1fd67356ca0c2972992928592d2430043d2)

13 files changed:
llvm/lib/Target/X86/X86SchedIceLake.td
llvm/lib/Target/X86/X86SchedSkylakeServer.td
llvm/test/CodeGen/X86/scatter-schedule.ll
llvm/test/tools/llvm-mca/X86/Generic/resources-avx512.s
llvm/test/tools/llvm-mca/X86/Generic/resources-avx512vl.s
llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-avx512.s
llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-avx512vl.s
llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512.s
llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512vl.s
llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx512.s
llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx512vl.s
llvm/test/tools/llvm-mca/X86/Znver4/resources-avx512.s
llvm/test/tools/llvm-mca/X86/Znver4/resources-avx512vl.s

index 186d4d84c25104c8e482ce9e6601729620222976..b68be9be6d4731dd7fa61d51bd832784b2933db0 100644 (file)
@@ -1510,8 +1510,10 @@ def ICXWriteResGroup113 : SchedWriteRes<[ICXPort0,ICXPort49,ICXPort78,ICXPort015
   let ReleaseAtCycles = [1,8,8,2];
 }
 def: InstRW<[ICXWriteResGroup113], (instrs VPSCATTERDQZmr,
+                                           VPSCATTERQDZmr,
                                            VPSCATTERQQZmr,
                                            VSCATTERDPDZmr,
+                                           VSCATTERQPSZmr,
                                            VSCATTERQPDZmr)>;
 
 def ICXWriteResGroup114 : SchedWriteRes<[ICXPort0,ICXPort49,ICXPort5,ICXPort78,ICXPort0156]> {
index 4fded44085e89737e3f89be0fa7032a16cbaaf23..2423602d06c470211571f3d36179452883dcb726 100644 (file)
@@ -1499,8 +1499,10 @@ def SKXWriteResGroup113 : SchedWriteRes<[SKXPort0,SKXPort4,SKXPort237,SKXPort015
   let ReleaseAtCycles = [1,8,8,2];
 }
 def: InstRW<[SKXWriteResGroup113], (instrs VPSCATTERDQZmr,
+                                           VPSCATTERQDZmr,
                                            VPSCATTERQQZmr,
                                            VSCATTERDPDZmr,
+                                           VSCATTERQPSZmr,
                                            VSCATTERQPDZmr)>;
 
 def SKXWriteResGroup114 : SchedWriteRes<[SKXPort0,SKXPort4,SKXPort5,SKXPort237,SKXPort0156]> {
index c841e23eab76b27abe7f477cd8c2d5f27e0fdd42..762a050247a87e929264bc84f5aca723284b2763 100644 (file)
@@ -10,8 +10,8 @@ define void @test(i64 %x272, <16 x ptr> %x335, <16 x i32> %x270) {
 ; CHECK-LABEL: test:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    kxnorw %k0, %k0, %k1
-; CHECK-NEXT:    kxnorw %k0, %k0, %k2
-; CHECK-NEXT:    vpscatterqd %ymm2, (,%zmm0) {%k2}
+; CHECK-NEXT:    vpscatterqd %ymm2, (,%zmm0) {%k1}
+; CHECK-NEXT:    kxnorw %k0, %k0, %k1
 ; CHECK-NEXT:    vextracti64x4 $1, %zmm2, %ymm0
 ; CHECK-NEXT:    vpscatterqd %ymm0, (,%zmm1) {%k1}
 ; CHECK-NEXT:    vzeroupper
index a8937f7dcfd117af0be3cc6763386775d11b321d..c3453d890d76d51459fa5435780ed788be2b8ef5 100644 (file)
@@ -298,6 +298,9 @@ vdivps            %zmm16, %zmm17, %zmm19 {z}{k1}
 vdivps            (%rax), %zmm17, %zmm19 {z}{k1}
 vdivps            (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
 
+{evex} vextractps $1, %xmm0, %rcx
+{evex} vextractps $1, %xmm0, (%rax)
+
 vfmadd132pd       %zmm16, %zmm17, %zmm19
 vfmadd132pd       (%rax), %zmm17, %zmm19
 vfmadd132pd       (%rax){1to8}, %zmm17, %zmm19
@@ -811,6 +814,11 @@ vpermq            %zmm16, %zmm17, %zmm19 {z}{k1}
 vpermq            (%rax), %zmm17, %zmm19 {z}{k1}
 vpermq            (%rax){1to8}, %zmm17, %zmm19 {z}{k1}
 
+vpscatterdd       %zmm1, (%rdx,%zmm0,4) {%k1}
+vpscatterdq       %zmm1, (%rdx,%ymm0,4) {%k1}
+vpscatterqd       %ymm1, (%rdx,%zmm0,4) {%k1}
+vpscatterqq       %zmm1, (%rdx,%zmm0,4) {%k1}
+
 vpshufd           $0, %zmm16, %zmm19
 vpshufd           $0, (%rax), %zmm19
 vpshufd           $0, (%rax){1to16}, %zmm19
@@ -881,6 +889,11 @@ vpunpcklqdq       %zmm16, %zmm17, %zmm19 {z}{k1}
 vpunpcklqdq       (%rax), %zmm17, %zmm19 {z}{k1}
 vpunpcklqdq       (%rax){1to8}, %zmm17, %zmm19 {z}{k1}
 
+vscatterdps       %zmm1, (%rdx,%zmm0,4) {%k1}
+vscatterdpd       %zmm1, (%rdx,%ymm0,4) {%k1}
+vscatterqps       %ymm1, (%rdx,%zmm0,4) {%k1}
+vscatterqpd       %zmm1, (%rdx,%zmm0,4) {%k1}
+
 vshuff32x4        $0, %zmm16, %zmm17, %zmm19
 vshuff32x4        $0, (%rax), %zmm17, %zmm19
 vshuff32x4        $0, (%rax){1to16}, %zmm17, %zmm19
@@ -1334,6 +1347,8 @@ vunpcklps         (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
 # CHECK-NEXT:  3      29    28.00                       vdivps %zmm16, %zmm17, %zmm19 {%k1} {z}
 # CHECK-NEXT:  4      36    28.00   *                   vdivps (%rax), %zmm17, %zmm19 {%k1} {z}
 # CHECK-NEXT:  4      36    28.00   *                   vdivps (%rax){1to16}, %zmm17, %zmm19 {%k1} {z}
+# CHECK-NEXT:  2      3     1.00                        {evex} vextractps      $1, %xmm0, %ecx
+# CHECK-NEXT:  3      5     1.00           *            {evex} vextractps      $1, %xmm0, (%rax)
 # CHECK-NEXT:  1      5     0.50                        vfmadd132pd    %zmm16, %zmm17, %zmm19
 # CHECK-NEXT:  2      10    0.50    *                   vfmadd132pd    (%rax), %zmm17, %zmm19
 # CHECK-NEXT:  2      10    0.50    *                   vfmadd132pd    (%rax){1to8}, %zmm17, %zmm19
@@ -1787,6 +1802,10 @@ vunpcklps         (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
 # CHECK-NEXT:  1      1     1.00                        vpermq %zmm16, %zmm17, %zmm19 {%k1} {z}
 # CHECK-NEXT:  2      8     1.00    *                   vpermq (%rax), %zmm17, %zmm19 {%k1} {z}
 # CHECK-NEXT:  2      8     1.00    *                   vpermq (%rax){1to8}, %zmm17, %zmm19 {%k1} {z}
+# CHECK-NEXT:  1      1     1.00           *            vpscatterdd    %zmm1, (%rdx,%zmm0,4) {%k1}
+# CHECK-NEXT:  1      1     1.00           *            vpscatterdq    %zmm1, (%rdx,%ymm0,4) {%k1}
+# CHECK-NEXT:  1      1     1.00           *            vpscatterqd    %ymm1, (%rdx,%zmm0,4) {%k1}
+# CHECK-NEXT:  1      1     1.00           *            vpscatterqq    %zmm1, (%rdx,%zmm0,4) {%k1}
 # CHECK-NEXT:  1      1     1.00                        vpshufd        $0, %zmm16, %zmm19
 # CHECK-NEXT:  2      8     1.00    *                   vpshufd        $0, (%rax), %zmm19
 # CHECK-NEXT:  2      8     1.00    *                   vpshufd        $0, (%rax){1to16}, %zmm19
@@ -1850,6 +1869,10 @@ vunpcklps         (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
 # CHECK-NEXT:  1      1     1.00                        vpunpcklqdq    %zmm16, %zmm17, %zmm19 {%k1} {z}
 # CHECK-NEXT:  2      8     1.00    *                   vpunpcklqdq    (%rax), %zmm17, %zmm19 {%k1} {z}
 # CHECK-NEXT:  2      8     1.00    *                   vpunpcklqdq    (%rax){1to8}, %zmm17, %zmm19 {%k1} {z}
+# CHECK-NEXT:  1      1     1.00           *            vscatterdps    %zmm1, (%rdx,%zmm0,4) {%k1}
+# CHECK-NEXT:  1      1     1.00           *            vscatterdpd    %zmm1, (%rdx,%ymm0,4) {%k1}
+# CHECK-NEXT:  1      1     1.00           *            vscatterqps    %ymm1, (%rdx,%zmm0,4) {%k1}
+# CHECK-NEXT:  1      1     1.00           *            vscatterqpd    %zmm1, (%rdx,%zmm0,4) {%k1}
 # CHECK-NEXT:  1      1     1.00                        vshuff32x4     $0, %zmm16, %zmm17, %zmm19
 # CHECK-NEXT:  2      8     1.00    *                   vshuff32x4     $0, (%rax), %zmm17, %zmm19
 # CHECK-NEXT:  2      8     1.00    *                   vshuff32x4     $0, (%rax){1to16}, %zmm17, %zmm19
@@ -2027,7 +2050,7 @@ vunpcklps         (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
 
 # CHECK:      Resource pressure per iteration:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6.0]  [6.1]
-# CHECK-NEXT:  -     1506.00 197.00 334.00 16.00 522.00 299.50 299.50
+# CHECK-NEXT:  -     1506.00 198.00 335.00 25.00 523.00 304.00 304.00
 
 # CHECK:      Resource pressure by instruction:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6.0]  [6.1]  Instructions:
@@ -2290,6 +2313,8 @@ vunpcklps         (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
 # CHECK-NEXT:  -     28.00  2.50    -      -     0.50    -      -     vdivps   %zmm16, %zmm17, %zmm19 {%k1} {z}
 # CHECK-NEXT:  -     28.00  2.50    -      -     0.50   0.50   0.50   vdivps   (%rax), %zmm17, %zmm19 {%k1} {z}
 # CHECK-NEXT:  -     28.00  2.50    -      -     0.50   0.50   0.50   vdivps   (%rax){1to16}, %zmm17, %zmm19 {%k1} {z}
+# CHECK-NEXT:  -      -     1.00   0.50    -     0.50    -      -     {evex}   vextractps      $1, %xmm0, %ecx
+# CHECK-NEXT:  -      -      -     0.50   1.00   0.50   0.50   0.50   {evex}   vextractps      $1, %xmm0, (%rax)
 # CHECK-NEXT:  -      -     0.50   0.50    -      -      -      -     vfmadd132pd      %zmm16, %zmm17, %zmm19
 # CHECK-NEXT:  -      -     0.50   0.50    -      -     0.50   0.50   vfmadd132pd      (%rax), %zmm17, %zmm19
 # CHECK-NEXT:  -      -     0.50   0.50    -      -     0.50   0.50   vfmadd132pd      (%rax){1to8}, %zmm17, %zmm19
@@ -2743,6 +2768,10 @@ vunpcklps         (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
 # CHECK-NEXT:  -      -      -      -      -     1.00    -      -     vpermq   %zmm16, %zmm17, %zmm19 {%k1} {z}
 # CHECK-NEXT:  -      -      -      -      -     1.00   0.50   0.50   vpermq   (%rax), %zmm17, %zmm19 {%k1} {z}
 # CHECK-NEXT:  -      -      -      -      -     1.00   0.50   0.50   vpermq   (%rax){1to8}, %zmm17, %zmm19 {%k1} {z}
+# CHECK-NEXT:  -      -      -      -     1.00    -     0.50   0.50   vpscatterdd      %zmm1, (%rdx,%zmm0,4) {%k1}
+# CHECK-NEXT:  -      -      -      -     1.00    -     0.50   0.50   vpscatterdq      %zmm1, (%rdx,%ymm0,4) {%k1}
+# CHECK-NEXT:  -      -      -      -     1.00    -     0.50   0.50   vpscatterqd      %ymm1, (%rdx,%zmm0,4) {%k1}
+# CHECK-NEXT:  -      -      -      -     1.00    -     0.50   0.50   vpscatterqq      %zmm1, (%rdx,%zmm0,4) {%k1}
 # CHECK-NEXT:  -      -      -      -      -     1.00    -      -     vpshufd  $0, %zmm16, %zmm19
 # CHECK-NEXT:  -      -      -      -      -     1.00   0.50   0.50   vpshufd  $0, (%rax), %zmm19
 # CHECK-NEXT:  -      -      -      -      -     1.00   0.50   0.50   vpshufd  $0, (%rax){1to16}, %zmm19
@@ -2806,6 +2835,10 @@ vunpcklps         (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
 # CHECK-NEXT:  -      -      -      -      -     1.00    -      -     vpunpcklqdq      %zmm16, %zmm17, %zmm19 {%k1} {z}
 # CHECK-NEXT:  -      -      -      -      -     1.00   0.50   0.50   vpunpcklqdq      (%rax), %zmm17, %zmm19 {%k1} {z}
 # CHECK-NEXT:  -      -      -      -      -     1.00   0.50   0.50   vpunpcklqdq      (%rax){1to8}, %zmm17, %zmm19 {%k1} {z}
+# CHECK-NEXT:  -      -      -      -     1.00    -     0.50   0.50   vscatterdps      %zmm1, (%rdx,%zmm0,4) {%k1}
+# CHECK-NEXT:  -      -      -      -     1.00    -     0.50   0.50   vscatterdpd      %zmm1, (%rdx,%ymm0,4) {%k1}
+# CHECK-NEXT:  -      -      -      -     1.00    -     0.50   0.50   vscatterqps      %ymm1, (%rdx,%zmm0,4) {%k1}
+# CHECK-NEXT:  -      -      -      -     1.00    -     0.50   0.50   vscatterqpd      %zmm1, (%rdx,%zmm0,4) {%k1}
 # CHECK-NEXT:  -      -      -      -      -     1.00    -      -     vshuff32x4       $0, %zmm16, %zmm17, %zmm19
 # CHECK-NEXT:  -      -      -      -      -     1.00   0.50   0.50   vshuff32x4       $0, (%rax), %zmm17, %zmm19
 # CHECK-NEXT:  -      -      -      -      -     1.00   0.50   0.50   vshuff32x4       $0, (%rax){1to16}, %zmm17, %zmm19
index e8e7a80f690bfa34828adcf6b20bae8385a83018..4a4f77826437bdb67cb69bea7ec380cbc5d47151 100644 (file)
@@ -1344,6 +1344,16 @@ vpmulld           %ymm16, %ymm17, %ymm19 {z}{k1}
 vpmulld           (%rax), %ymm17, %ymm19 {z}{k1}
 vpmulld           (%rax){1to8}, %ymm17, %ymm19 {z}{k1}
 
+vpscatterdd       %xmm1, (%rdx,%xmm0,4) {%k1}
+vpscatterdq       %xmm1, (%rdx,%xmm0,4) {%k1}
+vpscatterqd       %xmm1, (%rdx,%xmm0,4) {%k1}
+vpscatterqq       %xmm1, (%rdx,%xmm0,4) {%k1}
+
+vpscatterdd       %ymm1, (%rdx,%ymm0,4) {%k1}
+vpscatterdq       %ymm1, (%rdx,%xmm0,4) {%k1}
+vpscatterqd       %xmm1, (%rdx,%ymm0,4) {%k1}
+vpscatterqq       %ymm1, (%rdx,%ymm0,4) {%k1}
+
 vpshufd           $0, %xmm16, %xmm19
 vpshufd           $0, (%rax), %xmm19
 vpshufd           $0, (%rax){1to4}, %xmm19
@@ -1500,6 +1510,16 @@ vpunpckldq        %ymm16, %ymm17, %ymm19 {z}{k1}
 vpunpckldq        (%rax), %ymm17, %ymm19 {z}{k1}
 vpunpckldq        (%rax){1to8}, %ymm17, %ymm19 {z}{k1}
 
+vscatterdps       %xmm1, (%rdx,%xmm0,4) {%k1}
+vscatterdpd       %xmm1, (%rdx,%xmm0,4) {%k1}
+vscatterqps       %xmm1, (%rdx,%xmm0,4) {%k1}
+vscatterqpd       %xmm1, (%rdx,%xmm0,4) {%k1}
+
+vscatterdps       %ymm1, (%rdx,%ymm0,4) {%k1}
+vscatterdpd       %ymm1, (%rdx,%xmm0,4) {%k1}
+vscatterqps       %xmm1, (%rdx,%ymm0,4) {%k1}
+vscatterqpd       %ymm1, (%rdx,%ymm0,4) {%k1}
+
 vshuff32x4        $0, %ymm16, %ymm17, %ymm19
 vshuff32x4        $0, (%rax), %ymm17, %ymm19
 vshuff32x4        $0, (%rax){1to8}, %ymm17, %ymm19
@@ -2897,6 +2917,14 @@ vunpcklps         (%rax){1to8}, %ymm17, %ymm19 {z}{k1}
 # CHECK-NEXT:  1      5     1.00                        vpmulld        %ymm16, %ymm17, %ymm19 {%k1} {z}
 # CHECK-NEXT:  2      12    1.00    *                   vpmulld        (%rax), %ymm17, %ymm19 {%k1} {z}
 # CHECK-NEXT:  2      12    1.00    *                   vpmulld        (%rax){1to8}, %ymm17, %ymm19 {%k1} {z}
+# CHECK-NEXT:  1      1     1.00           *            vpscatterdd    %xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT:  1      1     1.00           *            vpscatterdq    %xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT:  1      1     1.00           *            vpscatterqd    %xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT:  1      1     1.00           *            vpscatterqq    %xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT:  1      1     1.00           *            vpscatterdd    %ymm1, (%rdx,%ymm0,4) {%k1}
+# CHECK-NEXT:  1      1     1.00           *            vpscatterdq    %ymm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT:  1      1     1.00           *            vpscatterqd    %xmm1, (%rdx,%ymm0,4) {%k1}
+# CHECK-NEXT:  1      1     1.00           *            vpscatterqq    %ymm1, (%rdx,%ymm0,4) {%k1}
 # CHECK-NEXT:  1      1     0.50                        vpshufd        $0, %xmm16, %xmm19
 # CHECK-NEXT:  2      7     0.50    *                   vpshufd        $0, (%rax), %xmm19
 # CHECK-NEXT:  2      7     0.50    *                   vpshufd        $0, (%rax){1to4}, %xmm19
@@ -3035,6 +3063,14 @@ vunpcklps         (%rax){1to8}, %ymm17, %ymm19 {z}{k1}
 # CHECK-NEXT:  1      1     1.00                        vpunpckldq     %ymm16, %ymm17, %ymm19 {%k1} {z}
 # CHECK-NEXT:  2      8     1.00    *                   vpunpckldq     (%rax), %ymm17, %ymm19 {%k1} {z}
 # CHECK-NEXT:  2      8     1.00    *                   vpunpckldq     (%rax){1to8}, %ymm17, %ymm19 {%k1} {z}
+# CHECK-NEXT:  1      1     1.00           *            vscatterdps    %xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT:  1      1     1.00           *            vscatterdpd    %xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT:  1      1     1.00           *            vscatterqps    %xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT:  1      1     1.00           *            vscatterqpd    %xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT:  1      1     1.00           *            vscatterdps    %ymm1, (%rdx,%ymm0,4) {%k1}
+# CHECK-NEXT:  1      1     1.00           *            vscatterdpd    %ymm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT:  1      1     1.00           *            vscatterqps    %xmm1, (%rdx,%ymm0,4) {%k1}
+# CHECK-NEXT:  1      1     1.00           *            vscatterqpd    %ymm1, (%rdx,%ymm0,4) {%k1}
 # CHECK-NEXT:  1      1     1.00                        vshuff32x4     $0, %ymm16, %ymm17, %ymm19
 # CHECK-NEXT:  2      8     1.00    *                   vshuff32x4     $0, (%rax), %ymm17, %ymm19
 # CHECK-NEXT:  2      8     1.00    *                   vshuff32x4     $0, (%rax){1to8}, %ymm17, %ymm19
@@ -3228,7 +3264,7 @@ vunpcklps         (%rax){1to8}, %ymm17, %ymm19 {z}{k1}
 
 # CHECK:      Resource pressure per iteration:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6.0]  [6.1]
-# CHECK-NEXT:  -     1935.00 278.00 579.50 32.00 738.50 486.50 486.50
+# CHECK-NEXT:  -     1935.00 278.00 579.50 48.00 738.50 494.50 494.50
 
 # CHECK:      Resource pressure by instruction:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6.0]  [6.1]  Instructions:
@@ -4420,6 +4456,14 @@ vunpcklps         (%rax){1to8}, %ymm17, %ymm19 {z}{k1}
 # CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vpmulld  %ymm16, %ymm17, %ymm19 {%k1} {z}
 # CHECK-NEXT:  -      -     1.00    -      -      -     0.50   0.50   vpmulld  (%rax), %ymm17, %ymm19 {%k1} {z}
 # CHECK-NEXT:  -      -     1.00    -      -      -     0.50   0.50   vpmulld  (%rax){1to8}, %ymm17, %ymm19 {%k1} {z}
+# CHECK-NEXT:  -      -      -      -     1.00    -     0.50   0.50   vpscatterdd      %xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT:  -      -      -      -     1.00    -     0.50   0.50   vpscatterdq      %xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT:  -      -      -      -     1.00    -     0.50   0.50   vpscatterqd      %xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT:  -      -      -      -     1.00    -     0.50   0.50   vpscatterqq      %xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT:  -      -      -      -     1.00    -     0.50   0.50   vpscatterdd      %ymm1, (%rdx,%ymm0,4) {%k1}
+# CHECK-NEXT:  -      -      -      -     1.00    -     0.50   0.50   vpscatterdq      %ymm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT:  -      -      -      -     1.00    -     0.50   0.50   vpscatterqd      %xmm1, (%rdx,%ymm0,4) {%k1}
+# CHECK-NEXT:  -      -      -      -     1.00    -     0.50   0.50   vpscatterqq      %ymm1, (%rdx,%ymm0,4) {%k1}
 # CHECK-NEXT:  -      -      -     0.50    -     0.50    -      -     vpshufd  $0, %xmm16, %xmm19
 # CHECK-NEXT:  -      -      -     0.50    -     0.50   0.50   0.50   vpshufd  $0, (%rax), %xmm19
 # CHECK-NEXT:  -      -      -     0.50    -     0.50   0.50   0.50   vpshufd  $0, (%rax){1to4}, %xmm19
@@ -4558,6 +4602,14 @@ vunpcklps         (%rax){1to8}, %ymm17, %ymm19 {z}{k1}
 # CHECK-NEXT:  -      -      -      -      -     1.00    -      -     vpunpckldq       %ymm16, %ymm17, %ymm19 {%k1} {z}
 # CHECK-NEXT:  -      -      -      -      -     1.00   0.50   0.50   vpunpckldq       (%rax), %ymm17, %ymm19 {%k1} {z}
 # CHECK-NEXT:  -      -      -      -      -     1.00   0.50   0.50   vpunpckldq       (%rax){1to8}, %ymm17, %ymm19 {%k1} {z}
+# CHECK-NEXT:  -      -      -      -     1.00    -     0.50   0.50   vscatterdps      %xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT:  -      -      -      -     1.00    -     0.50   0.50   vscatterdpd      %xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT:  -      -      -      -     1.00    -     0.50   0.50   vscatterqps      %xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT:  -      -      -      -     1.00    -     0.50   0.50   vscatterqpd      %xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT:  -      -      -      -     1.00    -     0.50   0.50   vscatterdps      %ymm1, (%rdx,%ymm0,4) {%k1}
+# CHECK-NEXT:  -      -      -      -     1.00    -     0.50   0.50   vscatterdpd      %ymm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT:  -      -      -      -     1.00    -     0.50   0.50   vscatterqps      %xmm1, (%rdx,%ymm0,4) {%k1}
+# CHECK-NEXT:  -      -      -      -     1.00    -     0.50   0.50   vscatterqpd      %ymm1, (%rdx,%ymm0,4) {%k1}
 # CHECK-NEXT:  -      -      -      -      -     1.00    -      -     vshuff32x4       $0, %ymm16, %ymm17, %ymm19
 # CHECK-NEXT:  -      -      -      -      -     1.00   0.50   0.50   vshuff32x4       $0, (%rax), %ymm17, %ymm19
 # CHECK-NEXT:  -      -      -      -      -     1.00   0.50   0.50   vshuff32x4       $0, (%rax){1to8}, %ymm17, %ymm19
index 5c12c520b04af206e5725e34651acd469a4e0321..c509e766540b15917de02b961d1d87fdaf4e180e 100644 (file)
@@ -298,6 +298,9 @@ vdivps            %zmm16, %zmm17, %zmm19 {z}{k1}
 vdivps            (%rax), %zmm17, %zmm19 {z}{k1}
 vdivps            (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
 
+{evex} vextractps $1, %xmm0, %rcx
+{evex} vextractps $1, %xmm0, (%rax)
+
 vfmadd132pd       %zmm16, %zmm17, %zmm19
 vfmadd132pd       (%rax), %zmm17, %zmm19
 vfmadd132pd       (%rax){1to8}, %zmm17, %zmm19
@@ -811,6 +814,11 @@ vpermq            %zmm16, %zmm17, %zmm19 {z}{k1}
 vpermq            (%rax), %zmm17, %zmm19 {z}{k1}
 vpermq            (%rax){1to8}, %zmm17, %zmm19 {z}{k1}
 
+vpscatterdd       %zmm1, (%rdx,%zmm0,4) {%k1}
+vpscatterdq       %zmm1, (%rdx,%ymm0,4) {%k1}
+vpscatterqd       %ymm1, (%rdx,%zmm0,4) {%k1}
+vpscatterqq       %zmm1, (%rdx,%zmm0,4) {%k1}
+
 vpshufd           $0, %zmm16, %zmm19
 vpshufd           $0, (%rax), %zmm19
 vpshufd           $0, (%rax){1to16}, %zmm19
@@ -881,6 +889,11 @@ vpunpcklqdq       %zmm16, %zmm17, %zmm19 {z}{k1}
 vpunpcklqdq       (%rax), %zmm17, %zmm19 {z}{k1}
 vpunpcklqdq       (%rax){1to8}, %zmm17, %zmm19 {z}{k1}
 
+vscatterdps       %zmm1, (%rdx,%zmm0,4) {%k1}
+vscatterdpd       %zmm1, (%rdx,%ymm0,4) {%k1}
+vscatterqps       %ymm1, (%rdx,%zmm0,4) {%k1}
+vscatterqpd       %zmm1, (%rdx,%zmm0,4) {%k1}
+
 vshuff32x4        $0, %zmm16, %zmm17, %zmm19
 vshuff32x4        $0, (%rax), %zmm17, %zmm19
 vshuff32x4        $0, (%rax){1to16}, %zmm17, %zmm19
@@ -1334,6 +1347,8 @@ vunpcklps         (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
 # CHECK-NEXT:  3      18    10.00                       vdivps %zmm16, %zmm17, %zmm19 {%k1} {z}
 # CHECK-NEXT:  4      25    10.00   *                   vdivps (%rax), %zmm17, %zmm19 {%k1} {z}
 # CHECK-NEXT:  4      25    10.00   *                   vdivps (%rax){1to16}, %zmm17, %zmm19 {%k1} {z}
+# CHECK-NEXT:  2      3     1.00                        {evex} vextractps      $1, %xmm0, %ecx
+# CHECK-NEXT:  3      2     1.00           *            {evex} vextractps      $1, %xmm0, (%rax)
 # CHECK-NEXT:  1      4     1.00                        vfmadd132pd    %zmm16, %zmm17, %zmm19
 # CHECK-NEXT:  2      11    1.00    *                   vfmadd132pd    (%rax), %zmm17, %zmm19
 # CHECK-NEXT:  2      11    1.00    *                   vfmadd132pd    (%rax){1to8}, %zmm17, %zmm19
@@ -1787,6 +1802,10 @@ vunpcklps         (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
 # CHECK-NEXT:  1      3     1.00                        vpermq %zmm16, %zmm17, %zmm19 {%k1} {z}
 # CHECK-NEXT:  2      10    1.00    *                   vpermq (%rax), %zmm17, %zmm19 {%k1} {z}
 # CHECK-NEXT:  2      10    1.00    *                   vpermq (%rax){1to8}, %zmm17, %zmm19 {%k1} {z}
+# CHECK-NEXT:  36     8     8.00           *            vpscatterdd    %zmm1, (%rdx,%zmm0,4) {%k1}
+# CHECK-NEXT:  19     7     4.00           *            vpscatterdq    %zmm1, (%rdx,%ymm0,4) {%k1}
+# CHECK-NEXT:  19     7     4.00           *            vpscatterqd    %ymm1, (%rdx,%zmm0,4) {%k1}
+# CHECK-NEXT:  19     7     4.00           *            vpscatterqq    %zmm1, (%rdx,%zmm0,4) {%k1}
 # CHECK-NEXT:  1      1     1.00                        vpshufd        $0, %zmm16, %zmm19
 # CHECK-NEXT:  2      8     1.00    *                   vpshufd        $0, (%rax), %zmm19
 # CHECK-NEXT:  2      8     1.00    *                   vpshufd        $0, (%rax){1to16}, %zmm19
@@ -1850,6 +1869,10 @@ vunpcklps         (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
 # CHECK-NEXT:  1      1     1.00                        vpunpcklqdq    %zmm16, %zmm17, %zmm19 {%k1} {z}
 # CHECK-NEXT:  2      8     1.00    *                   vpunpcklqdq    (%rax), %zmm17, %zmm19 {%k1} {z}
 # CHECK-NEXT:  2      8     1.00    *                   vpunpcklqdq    (%rax){1to8}, %zmm17, %zmm19 {%k1} {z}
+# CHECK-NEXT:  36     7     8.00           *            vscatterdps    %zmm1, (%rdx,%zmm0,4) {%k1}
+# CHECK-NEXT:  19     7     4.00           *            vscatterdpd    %zmm1, (%rdx,%ymm0,4) {%k1}
+# CHECK-NEXT:  19     7     4.00           *            vscatterqps    %ymm1, (%rdx,%zmm0,4) {%k1}
+# CHECK-NEXT:  19     7     4.00           *            vscatterqpd    %zmm1, (%rdx,%zmm0,4) {%k1}
 # CHECK-NEXT:  1      3     1.00                        vshuff32x4     $0, %zmm16, %zmm17, %zmm19
 # CHECK-NEXT:  2      10    1.00    *                   vshuff32x4     $0, (%rax), %zmm17, %zmm19
 # CHECK-NEXT:  2      10    1.00    *                   vshuff32x4     $0, (%rax){1to16}, %zmm17, %zmm19
@@ -2031,7 +2054,7 @@ vunpcklps         (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
 
 # CHECK:      Resource pressure per iteration:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    [10]   [11]
-# CHECK-NEXT:  -     612.00 398.17 99.67  327.50 327.50 8.00   585.17 2.00   8.00   8.00   8.00
+# CHECK-NEXT:  -     612.00 411.17 103.67 327.50 327.50 48.50  593.17 6.00   48.50  48.50  48.50
 
 # CHECK:      Resource pressure by instruction:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    [10]   [11]   Instructions:
@@ -2294,6 +2317,8 @@ vunpcklps         (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
 # CHECK-NEXT:  -     10.00  2.00    -      -      -      -     1.00    -      -      -      -     vdivps       %zmm16, %zmm17, %zmm19 {%k1} {z}
 # CHECK-NEXT:  -     10.00  2.00    -     0.50   0.50    -     1.00    -      -      -      -     vdivps       (%rax), %zmm17, %zmm19 {%k1} {z}
 # CHECK-NEXT:  -     10.00  2.00    -     0.50   0.50    -     1.00    -      -      -      -     vdivps       (%rax){1to16}, %zmm17, %zmm19 {%k1} {z}
+# CHECK-NEXT:  -      -     1.00    -      -      -      -     1.00    -      -      -      -     {evex}       vextractps      $1, %xmm0, %ecx
+# CHECK-NEXT:  -      -      -      -      -      -     0.50   1.00    -     0.50   0.50   0.50   {evex}       vextractps      $1, %xmm0, (%rax)
 # CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -      -      -     vfmadd132pd  %zmm16, %zmm17, %zmm19
 # CHECK-NEXT:  -      -     1.00    -     0.50   0.50    -      -      -      -      -      -     vfmadd132pd  (%rax), %zmm17, %zmm19
 # CHECK-NEXT:  -      -     1.00    -     0.50   0.50    -      -      -      -      -      -     vfmadd132pd  (%rax){1to8}, %zmm17, %zmm19
@@ -2747,6 +2772,10 @@ vunpcklps         (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
 # CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -      -      -     vpermq       %zmm16, %zmm17, %zmm19 {%k1} {z}
 # CHECK-NEXT:  -      -      -      -     0.50   0.50    -     1.00    -      -      -      -     vpermq       (%rax), %zmm17, %zmm19 {%k1} {z}
 # CHECK-NEXT:  -      -      -      -     0.50   0.50    -     1.00    -      -      -      -     vpermq       (%rax){1to8}, %zmm17, %zmm19 {%k1} {z}
+# CHECK-NEXT:  -      -     1.50   0.50    -      -     8.00   1.50   0.50   8.00   8.00   8.00   vpscatterdd  %zmm1, (%rdx,%zmm0,4) {%k1}
+# CHECK-NEXT:  -      -     1.50   0.50    -      -     4.00   0.50   0.50   4.00   4.00   4.00   vpscatterdq  %zmm1, (%rdx,%ymm0,4) {%k1}
+# CHECK-NEXT:  -      -     1.50   0.50    -      -     4.00   0.50   0.50   4.00   4.00   4.00   vpscatterqd  %ymm1, (%rdx,%zmm0,4) {%k1}
+# CHECK-NEXT:  -      -     1.50   0.50    -      -     4.00   0.50   0.50   4.00   4.00   4.00   vpscatterqq  %zmm1, (%rdx,%zmm0,4) {%k1}
 # CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -      -      -     vpshufd      $0, %zmm16, %zmm19
 # CHECK-NEXT:  -      -      -      -     0.50   0.50    -     1.00    -      -      -      -     vpshufd      $0, (%rax), %zmm19
 # CHECK-NEXT:  -      -      -      -     0.50   0.50    -     1.00    -      -      -      -     vpshufd      $0, (%rax){1to16}, %zmm19
@@ -2810,6 +2839,10 @@ vunpcklps         (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
 # CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -      -      -     vpunpcklqdq  %zmm16, %zmm17, %zmm19 {%k1} {z}
 # CHECK-NEXT:  -      -      -      -     0.50   0.50    -     1.00    -      -      -      -     vpunpcklqdq  (%rax), %zmm17, %zmm19 {%k1} {z}
 # CHECK-NEXT:  -      -      -      -     0.50   0.50    -     1.00    -      -      -      -     vpunpcklqdq  (%rax){1to8}, %zmm17, %zmm19 {%k1} {z}
+# CHECK-NEXT:  -      -     1.50   0.50    -      -     8.00   1.50   0.50   8.00   8.00   8.00   vscatterdps  %zmm1, (%rdx,%zmm0,4) {%k1}
+# CHECK-NEXT:  -      -     1.50   0.50    -      -     4.00   0.50   0.50   4.00   4.00   4.00   vscatterdpd  %zmm1, (%rdx,%ymm0,4) {%k1}
+# CHECK-NEXT:  -      -     1.50   0.50    -      -     4.00   0.50   0.50   4.00   4.00   4.00   vscatterqps  %ymm1, (%rdx,%zmm0,4) {%k1}
+# CHECK-NEXT:  -      -     1.50   0.50    -      -     4.00   0.50   0.50   4.00   4.00   4.00   vscatterqpd  %zmm1, (%rdx,%zmm0,4) {%k1}
 # CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -      -      -     vshuff32x4   $0, %zmm16, %zmm17, %zmm19
 # CHECK-NEXT:  -      -      -      -     0.50   0.50    -     1.00    -      -      -      -     vshuff32x4   $0, (%rax), %zmm17, %zmm19
 # CHECK-NEXT:  -      -      -      -     0.50   0.50    -     1.00    -      -      -      -     vshuff32x4   $0, (%rax){1to16}, %zmm17, %zmm19
index 375087ae0cfe4e70c7e2b20e9c436f8d3eb0024e..00e5c3b03f6f5294a69ed56ca7de8c49866c2b0c 100644 (file)
@@ -1344,6 +1344,16 @@ vpmulld           %ymm16, %ymm17, %ymm19 {z}{k1}
 vpmulld           (%rax), %ymm17, %ymm19 {z}{k1}
 vpmulld           (%rax){1to8}, %ymm17, %ymm19 {z}{k1}
 
+vpscatterdd       %xmm1, (%rdx,%xmm0,4) {%k1}
+vpscatterdq       %xmm1, (%rdx,%xmm0,4) {%k1}
+vpscatterqd       %xmm1, (%rdx,%xmm0,4) {%k1}
+vpscatterqq       %xmm1, (%rdx,%xmm0,4) {%k1}
+
+vpscatterdd       %ymm1, (%rdx,%ymm0,4) {%k1}
+vpscatterdq       %ymm1, (%rdx,%xmm0,4) {%k1}
+vpscatterqd       %xmm1, (%rdx,%ymm0,4) {%k1}
+vpscatterqq       %ymm1, (%rdx,%ymm0,4) {%k1}
+
 vpshufd           $0, %xmm16, %xmm19
 vpshufd           $0, (%rax), %xmm19
 vpshufd           $0, (%rax){1to4}, %xmm19
@@ -1500,6 +1510,16 @@ vpunpckldq        %ymm16, %ymm17, %ymm19 {z}{k1}
 vpunpckldq        (%rax), %ymm17, %ymm19 {z}{k1}
 vpunpckldq        (%rax){1to8}, %ymm17, %ymm19 {z}{k1}
 
+vscatterdps       %xmm1, (%rdx,%xmm0,4) {%k1}
+vscatterdpd       %xmm1, (%rdx,%xmm0,4) {%k1}
+vscatterqps       %xmm1, (%rdx,%xmm0,4) {%k1}
+vscatterqpd       %xmm1, (%rdx,%xmm0,4) {%k1}
+
+vscatterdps       %ymm1, (%rdx,%ymm0,4) {%k1}
+vscatterdpd       %ymm1, (%rdx,%xmm0,4) {%k1}
+vscatterqps       %xmm1, (%rdx,%ymm0,4) {%k1}
+vscatterqpd       %ymm1, (%rdx,%ymm0,4) {%k1}
+
 vshuff32x4        $0, %ymm16, %ymm17, %ymm19
 vshuff32x4        $0, (%rax), %ymm17, %ymm19
 vshuff32x4        $0, (%rax){1to8}, %ymm17, %ymm19
@@ -2897,6 +2917,14 @@ vunpcklps         (%rax){1to8}, %ymm17, %ymm19 {z}{k1}
 # CHECK-NEXT:  2      10    1.00                        vpmulld        %ymm16, %ymm17, %ymm19 {%k1} {z}
 # CHECK-NEXT:  3      17    1.00    *                   vpmulld        (%rax), %ymm17, %ymm19 {%k1} {z}
 # CHECK-NEXT:  3      17    1.00    *                   vpmulld        (%rax){1to8}, %ymm17, %ymm19 {%k1} {z}
+# CHECK-NEXT:  12     8     2.00           *            vpscatterdd    %xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT:  7      7     1.00           *            vpscatterdq    %xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT:  8      8     1.00           *            vpscatterqd    %xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT:  7      7     1.00           *            vpscatterqq    %xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT:  20     8     4.00           *            vpscatterdd    %ymm1, (%rdx,%ymm0,4) {%k1}
+# CHECK-NEXT:  11     7     2.00           *            vpscatterdq    %ymm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT:  8      8     1.00           *            vpscatterqd    %xmm1, (%rdx,%ymm0,4) {%k1}
+# CHECK-NEXT:  11     7     2.00           *            vpscatterqq    %ymm1, (%rdx,%ymm0,4) {%k1}
 # CHECK-NEXT:  1      1     0.50                        vpshufd        $0, %xmm16, %xmm19
 # CHECK-NEXT:  2      7     0.50    *                   vpshufd        $0, (%rax), %xmm19
 # CHECK-NEXT:  2      7     0.50    *                   vpshufd        $0, (%rax){1to4}, %xmm19
@@ -3035,6 +3063,14 @@ vunpcklps         (%rax){1to8}, %ymm17, %ymm19 {z}{k1}
 # CHECK-NEXT:  1      1     0.50                        vpunpckldq     %ymm16, %ymm17, %ymm19 {%k1} {z}
 # CHECK-NEXT:  2      8     0.50    *                   vpunpckldq     (%rax), %ymm17, %ymm19 {%k1} {z}
 # CHECK-NEXT:  2      8     0.50    *                   vpunpckldq     (%rax){1to8}, %ymm17, %ymm19 {%k1} {z}
+# CHECK-NEXT:  12     8     2.00           *            vscatterdps    %xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT:  7      7     1.00           *            vscatterdpd    %xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT:  8      8     1.00           *            vscatterqps    %xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT:  7      7     1.00           *            vscatterqpd    %xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT:  20     8     4.00           *            vscatterdps    %ymm1, (%rdx,%ymm0,4) {%k1}
+# CHECK-NEXT:  11     7     2.00           *            vscatterdpd    %ymm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT:  8      8     1.00           *            vscatterqps    %xmm1, (%rdx,%ymm0,4) {%k1}
+# CHECK-NEXT:  11     7     2.00           *            vscatterqpd    %ymm1, (%rdx,%ymm0,4) {%k1}
 # CHECK-NEXT:  1      3     1.00                        vshuff32x4     $0, %ymm16, %ymm17, %ymm19
 # CHECK-NEXT:  2      10    1.00    *                   vshuff32x4     $0, (%rax), %ymm17, %ymm19
 # CHECK-NEXT:  2      10    1.00    *                   vshuff32x4     $0, (%rax){1to8}, %ymm17, %ymm19
@@ -3232,7 +3268,7 @@ vunpcklps         (%rax){1to8}, %ymm17, %ymm19 {z}{k1}
 
 # CHECK:      Resource pressure per iteration:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    [10]   [11]
-# CHECK-NEXT:  -     423.00 438.33 413.33 492.50 492.50 16.00  722.33 4.00   16.00  16.00  16.00
+# CHECK-NEXT:  -     423.00 462.33 421.33 492.50 492.50 44.00  738.33 12.00  44.00  44.00  44.00
 
 # CHECK:      Resource pressure by instruction:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    [10]   [11]   Instructions:
@@ -4424,6 +4460,14 @@ vunpcklps         (%rax){1to8}, %ymm17, %ymm19 {z}{k1}
 # CHECK-NEXT:  -      -     1.00   1.00    -      -      -      -      -      -      -      -     vpmulld      %ymm16, %ymm17, %ymm19 {%k1} {z}
 # CHECK-NEXT:  -      -     1.00   1.00   0.50   0.50    -      -      -      -      -      -     vpmulld      (%rax), %ymm17, %ymm19 {%k1} {z}
 # CHECK-NEXT:  -      -     1.00   1.00   0.50   0.50    -      -      -      -      -      -     vpmulld      (%rax){1to8}, %ymm17, %ymm19 {%k1} {z}
+# CHECK-NEXT:  -      -     1.50   0.50    -      -     2.00   1.50   0.50   2.00   2.00   2.00   vpscatterdd  %xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT:  -      -     1.50   0.50    -      -     1.00   0.50   0.50   1.00   1.00   1.00   vpscatterdq  %xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT:  -      -     1.50   0.50    -      -     1.00   1.50   0.50   1.00   1.00   1.00   vpscatterqd  %xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT:  -      -     1.50   0.50    -      -     1.00   0.50   0.50   1.00   1.00   1.00   vpscatterqq  %xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT:  -      -     1.50   0.50    -      -     4.00   1.50   0.50   4.00   4.00   4.00   vpscatterdd  %ymm1, (%rdx,%ymm0,4) {%k1}
+# CHECK-NEXT:  -      -     1.50   0.50    -      -     2.00   0.50   0.50   2.00   2.00   2.00   vpscatterdq  %ymm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT:  -      -     1.50   0.50    -      -     1.00   1.50   0.50   1.00   1.00   1.00   vpscatterqd  %xmm1, (%rdx,%ymm0,4) {%k1}
+# CHECK-NEXT:  -      -     1.50   0.50    -      -     2.00   0.50   0.50   2.00   2.00   2.00   vpscatterqq  %ymm1, (%rdx,%ymm0,4) {%k1}
 # CHECK-NEXT:  -      -      -     0.50    -      -      -     0.50    -      -      -      -     vpshufd      $0, %xmm16, %xmm19
 # CHECK-NEXT:  -      -      -     0.50   0.50   0.50    -     0.50    -      -      -      -     vpshufd      $0, (%rax), %xmm19
 # CHECK-NEXT:  -      -      -     0.50   0.50   0.50    -     0.50    -      -      -      -     vpshufd      $0, (%rax){1to4}, %xmm19
@@ -4562,6 +4606,14 @@ vunpcklps         (%rax){1to8}, %ymm17, %ymm19 {z}{k1}
 # CHECK-NEXT:  -      -      -     0.50    -      -      -     0.50    -      -      -      -     vpunpckldq   %ymm16, %ymm17, %ymm19 {%k1} {z}
 # CHECK-NEXT:  -      -      -     0.50   0.50   0.50    -     0.50    -      -      -      -     vpunpckldq   (%rax), %ymm17, %ymm19 {%k1} {z}
 # CHECK-NEXT:  -      -      -     0.50   0.50   0.50    -     0.50    -      -      -      -     vpunpckldq   (%rax){1to8}, %ymm17, %ymm19 {%k1} {z}
+# CHECK-NEXT:  -      -     1.50   0.50    -      -     2.00   1.50   0.50   2.00   2.00   2.00   vscatterdps  %xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT:  -      -     1.50   0.50    -      -     1.00   0.50   0.50   1.00   1.00   1.00   vscatterdpd  %xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT:  -      -     1.50   0.50    -      -     1.00   1.50   0.50   1.00   1.00   1.00   vscatterqps  %xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT:  -      -     1.50   0.50    -      -     1.00   0.50   0.50   1.00   1.00   1.00   vscatterqpd  %xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT:  -      -     1.50   0.50    -      -     4.00   1.50   0.50   4.00   4.00   4.00   vscatterdps  %ymm1, (%rdx,%ymm0,4) {%k1}
+# CHECK-NEXT:  -      -     1.50   0.50    -      -     2.00   0.50   0.50   2.00   2.00   2.00   vscatterdpd  %ymm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT:  -      -     1.50   0.50    -      -     1.00   1.50   0.50   1.00   1.00   1.00   vscatterqps  %xmm1, (%rdx,%ymm0,4) {%k1}
+# CHECK-NEXT:  -      -     1.50   0.50    -      -     2.00   0.50   0.50   2.00   2.00   2.00   vscatterqpd  %ymm1, (%rdx,%ymm0,4) {%k1}
 # CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -      -      -     vshuff32x4   $0, %ymm16, %ymm17, %ymm19
 # CHECK-NEXT:  -      -      -      -     0.50   0.50    -     1.00    -      -      -      -     vshuff32x4   $0, (%rax), %ymm17, %ymm19
 # CHECK-NEXT:  -      -      -      -     0.50   0.50    -     1.00    -      -      -      -     vshuff32x4   $0, (%rax){1to8}, %ymm17, %ymm19
index b34ccaacc11a32f79d51f8c849d094fab549e382..b2fde3929106a533075d3f54075a4155f4092e01 100644 (file)
@@ -298,6 +298,9 @@ vdivps            %zmm16, %zmm17, %zmm19 {z}{k1}
 vdivps            (%rax), %zmm17, %zmm19 {z}{k1}
 vdivps            (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
 
+{evex} vextractps $1, %xmm0, %rcx
+{evex} vextractps $1, %xmm0, (%rax)
+
 vfmadd132pd       %zmm16, %zmm17, %zmm19
 vfmadd132pd       (%rax), %zmm17, %zmm19
 vfmadd132pd       (%rax){1to8}, %zmm17, %zmm19
@@ -811,6 +814,11 @@ vpermq            %zmm16, %zmm17, %zmm19 {z}{k1}
 vpermq            (%rax), %zmm17, %zmm19 {z}{k1}
 vpermq            (%rax){1to8}, %zmm17, %zmm19 {z}{k1}
 
+vpscatterdd       %zmm1, (%rdx,%zmm0,4) {%k1}
+vpscatterdq       %zmm1, (%rdx,%ymm0,4) {%k1}
+vpscatterqd       %ymm1, (%rdx,%zmm0,4) {%k1}
+vpscatterqq       %zmm1, (%rdx,%zmm0,4) {%k1}
+
 vpshufd           $0, %zmm16, %zmm19
 vpshufd           $0, (%rax), %zmm19
 vpshufd           $0, (%rax){1to16}, %zmm19
@@ -881,6 +889,11 @@ vpunpcklqdq       %zmm16, %zmm17, %zmm19 {z}{k1}
 vpunpcklqdq       (%rax), %zmm17, %zmm19 {z}{k1}
 vpunpcklqdq       (%rax){1to8}, %zmm17, %zmm19 {z}{k1}
 
+vscatterdps       %zmm1, (%rdx,%zmm0,4) {%k1}
+vscatterdpd       %zmm1, (%rdx,%ymm0,4) {%k1}
+vscatterqps       %ymm1, (%rdx,%zmm0,4) {%k1}
+vscatterqpd       %zmm1, (%rdx,%zmm0,4) {%k1}
+
 vshuff32x4        $0, %zmm16, %zmm17, %zmm19
 vshuff32x4        $0, (%rax), %zmm17, %zmm19
 vshuff32x4        $0, (%rax){1to16}, %zmm17, %zmm19
@@ -1334,6 +1347,8 @@ vunpcklps         (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
 # CHECK-NEXT:  3      18    2.00                        vdivps %zmm16, %zmm17, %zmm19 {%k1} {z}
 # CHECK-NEXT:  4      25    2.00    *                   vdivps (%rax), %zmm17, %zmm19 {%k1} {z}
 # CHECK-NEXT:  4      25    2.00    *                   vdivps (%rax){1to16}, %zmm17, %zmm19 {%k1} {z}
+# CHECK-NEXT:  2      4     1.00                        {evex} vextractps      $1, %xmm0, %ecx
+# CHECK-NEXT:  3      12    1.00           *            {evex} vextractps      $1, %xmm0, (%rax)
 # CHECK-NEXT:  1      4     1.00                        vfmadd132pd    %zmm16, %zmm17, %zmm19
 # CHECK-NEXT:  2      12    1.00    *                   vfmadd132pd    (%rax), %zmm17, %zmm19
 # CHECK-NEXT:  2      12    1.00    *                   vfmadd132pd    (%rax){1to8}, %zmm17, %zmm19
@@ -1787,6 +1802,10 @@ vunpcklps         (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
 # CHECK-NEXT:  1      3     1.00                        vpermq %zmm16, %zmm17, %zmm19 {%k1} {z}
 # CHECK-NEXT:  2      11    1.00    *                   vpermq (%rax), %zmm17, %zmm19 {%k1} {z}
 # CHECK-NEXT:  2      11    1.00    *                   vpermq (%rax){1to8}, %zmm17, %zmm19 {%k1} {z}
+# CHECK-NEXT:  35     19    8.00           *            vpscatterdd    %zmm1, (%rdx,%zmm0,4) {%k1}
+# CHECK-NEXT:  19     12    4.00           *            vpscatterdq    %zmm1, (%rdx,%ymm0,4) {%k1}
+# CHECK-NEXT:  19     12    4.00           *            vpscatterqd    %ymm1, (%rdx,%zmm0,4) {%k1}
+# CHECK-NEXT:  19     12    4.00           *            vpscatterqq    %zmm1, (%rdx,%zmm0,4) {%k1}
 # CHECK-NEXT:  1      1     1.00                        vpshufd        $0, %zmm16, %zmm19
 # CHECK-NEXT:  2      9     1.00    *                   vpshufd        $0, (%rax), %zmm19
 # CHECK-NEXT:  2      9     1.00    *                   vpshufd        $0, (%rax){1to16}, %zmm19
@@ -1850,6 +1869,10 @@ vunpcklps         (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
 # CHECK-NEXT:  1      1     1.00                        vpunpcklqdq    %zmm16, %zmm17, %zmm19 {%k1} {z}
 # CHECK-NEXT:  2      9     1.00    *                   vpunpcklqdq    (%rax), %zmm17, %zmm19 {%k1} {z}
 # CHECK-NEXT:  2      9     1.00    *                   vpunpcklqdq    (%rax){1to8}, %zmm17, %zmm19 {%k1} {z}
+# CHECK-NEXT:  35     19    8.00           *            vscatterdps    %zmm1, (%rdx,%zmm0,4) {%k1}
+# CHECK-NEXT:  19     12    4.00           *            vscatterdpd    %zmm1, (%rdx,%ymm0,4) {%k1}
+# CHECK-NEXT:  19     12    4.00           *            vscatterqps    %ymm1, (%rdx,%zmm0,4) {%k1}
+# CHECK-NEXT:  19     12    4.00           *            vscatterqpd    %zmm1, (%rdx,%zmm0,4) {%k1}
 # CHECK-NEXT:  1      3     1.00                        vshuff32x4     $0, %zmm16, %zmm17, %zmm19
 # CHECK-NEXT:  2      11    1.00    *                   vshuff32x4     $0, (%rax), %zmm17, %zmm19
 # CHECK-NEXT:  2      11    1.00    *                   vshuff32x4     $0, (%rax){1to16}, %zmm17, %zmm19
@@ -2032,7 +2055,7 @@ vunpcklps         (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
 
 # CHECK:      Resource pressure per iteration:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    [10]   [11]   [12]
-# CHECK-NEXT: 490.00 12.00  218.33 218.33 8.00   575.00  -     8.00   8.00   8.00    -     218.33  -
+# CHECK-NEXT: 508.60 13.60  218.33 218.33 48.50  578.60 1.60   48.50  48.50  48.50  1.60   218.33  -
 
 # CHECK:      Resource pressure by instruction:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    [10]   [11]   [12]   Instructions:
@@ -2295,6 +2318,8 @@ vunpcklps         (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
 # CHECK-NEXT: 2.50    -      -      -      -     0.50    -      -      -      -      -      -      -     vdivps        %zmm16, %zmm17, %zmm19 {%k1} {z}
 # CHECK-NEXT: 2.50    -     0.33   0.33    -     0.50    -      -      -      -      -     0.33    -     vdivps        (%rax), %zmm17, %zmm19 {%k1} {z}
 # CHECK-NEXT: 2.50    -     0.33   0.33    -     0.50    -      -      -      -      -     0.33    -     vdivps        (%rax){1to16}, %zmm17, %zmm19 {%k1} {z}
+# CHECK-NEXT: 1.00    -      -      -      -     1.00    -      -      -      -      -      -      -     {evex}        vextractps      $1, %xmm0, %ecx
+# CHECK-NEXT:  -      -      -      -     0.50   1.00    -     0.50   0.50   0.50    -      -      -     {evex}        vextractps      $1, %xmm0, (%rax)
 # CHECK-NEXT: 1.00    -      -      -      -      -      -      -      -      -      -      -      -     vfmadd132pd   %zmm16, %zmm17, %zmm19
 # CHECK-NEXT: 1.00    -     0.33   0.33    -      -      -      -      -      -      -     0.33    -     vfmadd132pd   (%rax), %zmm17, %zmm19
 # CHECK-NEXT: 1.00    -     0.33   0.33    -      -      -      -      -      -      -     0.33    -     vfmadd132pd   (%rax){1to8}, %zmm17, %zmm19
@@ -2748,6 +2773,10 @@ vunpcklps         (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
 # CHECK-NEXT:  -      -      -      -      -     1.00    -      -      -      -      -      -      -     vpermq        %zmm16, %zmm17, %zmm19 {%k1} {z}
 # CHECK-NEXT:  -      -     0.33   0.33    -     1.00    -      -      -      -      -     0.33    -     vpermq        (%rax), %zmm17, %zmm19 {%k1} {z}
 # CHECK-NEXT:  -      -     0.33   0.33    -     1.00    -      -      -      -      -     0.33    -     vpermq        (%rax){1to8}, %zmm17, %zmm19 {%k1} {z}
+# CHECK-NEXT: 2.20   0.20    -      -     8.00   0.20   0.20   8.00   8.00   8.00   0.20    -      -     vpscatterdd   %zmm1, (%rdx,%zmm0,4) {%k1}
+# CHECK-NEXT: 2.20   0.20    -      -     4.00   0.20   0.20   4.00   4.00   4.00   0.20    -      -     vpscatterdq   %zmm1, (%rdx,%ymm0,4) {%k1}
+# CHECK-NEXT: 2.20   0.20    -      -     4.00   0.20   0.20   4.00   4.00   4.00   0.20    -      -     vpscatterqd   %ymm1, (%rdx,%zmm0,4) {%k1}
+# CHECK-NEXT: 2.20   0.20    -      -     4.00   0.20   0.20   4.00   4.00   4.00   0.20    -      -     vpscatterqq   %zmm1, (%rdx,%zmm0,4) {%k1}
 # CHECK-NEXT:  -      -      -      -      -     1.00    -      -      -      -      -      -      -     vpshufd       $0, %zmm16, %zmm19
 # CHECK-NEXT:  -      -     0.33   0.33    -     1.00    -      -      -      -      -     0.33    -     vpshufd       $0, (%rax), %zmm19
 # CHECK-NEXT:  -      -     0.33   0.33    -     1.00    -      -      -      -      -     0.33    -     vpshufd       $0, (%rax){1to16}, %zmm19
@@ -2811,6 +2840,10 @@ vunpcklps         (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
 # CHECK-NEXT:  -      -      -      -      -     1.00    -      -      -      -      -      -      -     vpunpcklqdq   %zmm16, %zmm17, %zmm19 {%k1} {z}
 # CHECK-NEXT:  -      -     0.33   0.33    -     1.00    -      -      -      -      -     0.33    -     vpunpcklqdq   (%rax), %zmm17, %zmm19 {%k1} {z}
 # CHECK-NEXT:  -      -     0.33   0.33    -     1.00    -      -      -      -      -     0.33    -     vpunpcklqdq   (%rax){1to8}, %zmm17, %zmm19 {%k1} {z}
+# CHECK-NEXT: 2.20   0.20    -      -     8.00   0.20   0.20   8.00   8.00   8.00   0.20    -      -     vscatterdps   %zmm1, (%rdx,%zmm0,4) {%k1}
+# CHECK-NEXT: 2.20   0.20    -      -     4.00   0.20   0.20   4.00   4.00   4.00   0.20    -      -     vscatterdpd   %zmm1, (%rdx,%ymm0,4) {%k1}
+# CHECK-NEXT: 2.20   0.20    -      -     4.00   0.20   0.20   4.00   4.00   4.00   0.20    -      -     vscatterqps   %ymm1, (%rdx,%zmm0,4) {%k1}
+# CHECK-NEXT: 2.20   0.20    -      -     4.00   0.20   0.20   4.00   4.00   4.00   0.20    -      -     vscatterqpd   %zmm1, (%rdx,%zmm0,4) {%k1}
 # CHECK-NEXT:  -      -      -      -      -     1.00    -      -      -      -      -      -      -     vshuff32x4    $0, %zmm16, %zmm17, %zmm19
 # CHECK-NEXT:  -      -     0.33   0.33    -     1.00    -      -      -      -      -     0.33    -     vshuff32x4    $0, (%rax), %zmm17, %zmm19
 # CHECK-NEXT:  -      -     0.33   0.33    -     1.00    -      -      -      -      -     0.33    -     vshuff32x4    $0, (%rax){1to16}, %zmm17, %zmm19
index 3ad66f1c3d7128fc658a8808d3ee2cde753bd616..d8c76832d38d3ec189d99a0613135e0adac3ebf4 100644 (file)
@@ -1344,6 +1344,16 @@ vpmulld           %ymm16, %ymm17, %ymm19 {z}{k1}
 vpmulld           (%rax), %ymm17, %ymm19 {z}{k1}
 vpmulld           (%rax){1to8}, %ymm17, %ymm19 {z}{k1}
 
+vpscatterdd       %xmm1, (%rdx,%xmm0,4) {%k1}
+vpscatterdq       %xmm1, (%rdx,%xmm0,4) {%k1}
+vpscatterqd       %xmm1, (%rdx,%xmm0,4) {%k1}
+vpscatterqq       %xmm1, (%rdx,%xmm0,4) {%k1}
+
+vpscatterdd       %ymm1, (%rdx,%ymm0,4) {%k1}
+vpscatterdq       %ymm1, (%rdx,%xmm0,4) {%k1}
+vpscatterqd       %xmm1, (%rdx,%ymm0,4) {%k1}
+vpscatterqq       %ymm1, (%rdx,%ymm0,4) {%k1}
+
 vpshufd           $0, %xmm16, %xmm19
 vpshufd           $0, (%rax), %xmm19
 vpshufd           $0, (%rax){1to4}, %xmm19
@@ -1500,6 +1510,16 @@ vpunpckldq        %ymm16, %ymm17, %ymm19 {z}{k1}
 vpunpckldq        (%rax), %ymm17, %ymm19 {z}{k1}
 vpunpckldq        (%rax){1to8}, %ymm17, %ymm19 {z}{k1}
 
+vscatterdps       %xmm1, (%rdx,%xmm0,4) {%k1}
+vscatterdpd       %xmm1, (%rdx,%xmm0,4) {%k1}
+vscatterqps       %xmm1, (%rdx,%xmm0,4) {%k1}
+vscatterqpd       %xmm1, (%rdx,%xmm0,4) {%k1}
+
+vscatterdps       %ymm1, (%rdx,%ymm0,4) {%k1}
+vscatterdpd       %ymm1, (%rdx,%xmm0,4) {%k1}
+vscatterqps       %xmm1, (%rdx,%ymm0,4) {%k1}
+vscatterqpd       %ymm1, (%rdx,%ymm0,4) {%k1}
+
 vshuff32x4        $0, %ymm16, %ymm17, %ymm19
 vshuff32x4        $0, (%rax), %ymm17, %ymm19
 vshuff32x4        $0, (%rax){1to8}, %ymm17, %ymm19
@@ -2897,6 +2917,14 @@ vunpcklps         (%rax){1to8}, %ymm17, %ymm19 {z}{k1}
 # CHECK-NEXT:  2      10    1.00                        vpmulld        %ymm16, %ymm17, %ymm19 {%k1} {z}
 # CHECK-NEXT:  3      18    1.00    *                   vpmulld        (%rax), %ymm17, %ymm19 {%k1} {z}
 # CHECK-NEXT:  3      18    1.00    *                   vpmulld        (%rax){1to8}, %ymm17, %ymm19 {%k1} {z}
+# CHECK-NEXT:  11     12    2.00           *            vpscatterdd    %xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT:  7      12    1.00           *            vpscatterdq    %xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT:  7      12    1.00           *            vpscatterqd    %xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT:  7      12    1.00           *            vpscatterqq    %xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT:  19     12    4.00           *            vpscatterdd    %ymm1, (%rdx,%ymm0,4) {%k1}
+# CHECK-NEXT:  11     12    2.00           *            vpscatterdq    %ymm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT:  11     12    2.00           *            vpscatterqd    %xmm1, (%rdx,%ymm0,4) {%k1}
+# CHECK-NEXT:  11     12    2.00           *            vpscatterqq    %ymm1, (%rdx,%ymm0,4) {%k1}
 # CHECK-NEXT:  1      1     0.50                        vpshufd        $0, %xmm16, %xmm19
 # CHECK-NEXT:  2      8     0.50    *                   vpshufd        $0, (%rax), %xmm19
 # CHECK-NEXT:  2      8     0.50    *                   vpshufd        $0, (%rax){1to4}, %xmm19
@@ -3035,6 +3063,14 @@ vunpcklps         (%rax){1to8}, %ymm17, %ymm19 {z}{k1}
 # CHECK-NEXT:  1      1     0.50                        vpunpckldq     %ymm16, %ymm17, %ymm19 {%k1} {z}
 # CHECK-NEXT:  2      9     0.50    *                   vpunpckldq     (%rax), %ymm17, %ymm19 {%k1} {z}
 # CHECK-NEXT:  2      9     0.50    *                   vpunpckldq     (%rax){1to8}, %ymm17, %ymm19 {%k1} {z}
+# CHECK-NEXT:  11     12    2.00           *            vscatterdps    %xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT:  7      12    1.00           *            vscatterdpd    %xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT:  7      12    1.00           *            vscatterqps    %xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT:  7      12    1.00           *            vscatterqpd    %xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT:  19     12    4.00           *            vscatterdps    %ymm1, (%rdx,%ymm0,4) {%k1}
+# CHECK-NEXT:  11     12    2.00           *            vscatterdpd    %ymm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT:  11     12    2.00           *            vscatterqps    %xmm1, (%rdx,%ymm0,4) {%k1}
+# CHECK-NEXT:  11     12    2.00           *            vscatterqpd    %ymm1, (%rdx,%ymm0,4) {%k1}
 # CHECK-NEXT:  1      3     1.00                        vshuff32x4     $0, %ymm16, %ymm17, %ymm19
 # CHECK-NEXT:  2      11    1.00    *                   vshuff32x4     $0, (%rax), %ymm17, %ymm19
 # CHECK-NEXT:  2      11    1.00    *                   vshuff32x4     $0, (%rax){1to8}, %ymm17, %ymm19
@@ -3233,7 +3269,7 @@ vunpcklps         (%rax){1to8}, %ymm17, %ymm19 {z}{k1}
 
 # CHECK:      Resource pressure per iteration:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    [10]   [11]   [12]
-# CHECK-NEXT: 377.33 401.33 328.33 328.33 16.00  794.33  -     16.00  16.00  16.00   -     328.33  -
+# CHECK-NEXT: 404.53 412.53 328.33 328.33 46.00  797.53 3.20   46.00  46.00  46.00  3.20   328.33  -
 
 # CHECK:      Resource pressure by instruction:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    [10]   [11]   [12]   Instructions:
@@ -4425,6 +4461,14 @@ vunpcklps         (%rax){1to8}, %ymm17, %ymm19 {z}{k1}
 # CHECK-NEXT: 1.00   1.00    -      -      -      -      -      -      -      -      -      -      -     vpmulld       %ymm16, %ymm17, %ymm19 {%k1} {z}
 # CHECK-NEXT: 1.00   1.00   0.33   0.33    -      -      -      -      -      -      -     0.33    -     vpmulld       (%rax), %ymm17, %ymm19 {%k1} {z}
 # CHECK-NEXT: 1.00   1.00   0.33   0.33    -      -      -      -      -      -      -     0.33    -     vpmulld       (%rax){1to8}, %ymm17, %ymm19 {%k1} {z}
+# CHECK-NEXT: 1.70   0.70    -      -     2.00   0.20   0.20   2.00   2.00   2.00   0.20    -      -     vpscatterdd   %xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT: 1.70   0.70    -      -     1.00   0.20   0.20   1.00   1.00   1.00   0.20    -      -     vpscatterdq   %xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT: 1.70   0.70    -      -     1.00   0.20   0.20   1.00   1.00   1.00   0.20    -      -     vpscatterqd   %xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT: 1.70   0.70    -      -     1.00   0.20   0.20   1.00   1.00   1.00   0.20    -      -     vpscatterqq   %xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT: 1.70   0.70    -      -     4.00   0.20   0.20   4.00   4.00   4.00   0.20    -      -     vpscatterdd   %ymm1, (%rdx,%ymm0,4) {%k1}
+# CHECK-NEXT: 1.70   0.70    -      -     2.00   0.20   0.20   2.00   2.00   2.00   0.20    -      -     vpscatterdq   %ymm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT: 1.70   0.70    -      -     2.00   0.20   0.20   2.00   2.00   2.00   0.20    -      -     vpscatterqd   %xmm1, (%rdx,%ymm0,4) {%k1}
+# CHECK-NEXT: 1.70   0.70    -      -     2.00   0.20   0.20   2.00   2.00   2.00   0.20    -      -     vpscatterqq   %ymm1, (%rdx,%ymm0,4) {%k1}
 # CHECK-NEXT:  -     0.50    -      -      -     0.50    -      -      -      -      -      -      -     vpshufd       $0, %xmm16, %xmm19
 # CHECK-NEXT:  -     0.50   0.33   0.33    -     0.50    -      -      -      -      -     0.33    -     vpshufd       $0, (%rax), %xmm19
 # CHECK-NEXT:  -     0.50   0.33   0.33    -     0.50    -      -      -      -      -     0.33    -     vpshufd       $0, (%rax){1to4}, %xmm19
@@ -4563,6 +4607,14 @@ vunpcklps         (%rax){1to8}, %ymm17, %ymm19 {z}{k1}
 # CHECK-NEXT:  -     0.50    -      -      -     0.50    -      -      -      -      -      -      -     vpunpckldq    %ymm16, %ymm17, %ymm19 {%k1} {z}
 # CHECK-NEXT:  -     0.50   0.33   0.33    -     0.50    -      -      -      -      -     0.33    -     vpunpckldq    (%rax), %ymm17, %ymm19 {%k1} {z}
 # CHECK-NEXT:  -     0.50   0.33   0.33    -     0.50    -      -      -      -      -     0.33    -     vpunpckldq    (%rax){1to8}, %ymm17, %ymm19 {%k1} {z}
+# CHECK-NEXT: 1.70   0.70    -      -     2.00   0.20   0.20   2.00   2.00   2.00   0.20    -      -     vscatterdps   %xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT: 1.70   0.70    -      -     1.00   0.20   0.20   1.00   1.00   1.00   0.20    -      -     vscatterdpd   %xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT: 1.70   0.70    -      -     1.00   0.20   0.20   1.00   1.00   1.00   0.20    -      -     vscatterqps   %xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT: 1.70   0.70    -      -     1.00   0.20   0.20   1.00   1.00   1.00   0.20    -      -     vscatterqpd   %xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT: 1.70   0.70    -      -     4.00   0.20   0.20   4.00   4.00   4.00   0.20    -      -     vscatterdps   %ymm1, (%rdx,%ymm0,4) {%k1}
+# CHECK-NEXT: 1.70   0.70    -      -     2.00   0.20   0.20   2.00   2.00   2.00   0.20    -      -     vscatterdpd   %ymm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT: 1.70   0.70    -      -     2.00   0.20   0.20   2.00   2.00   2.00   0.20    -      -     vscatterqps   %xmm1, (%rdx,%ymm0,4) {%k1}
+# CHECK-NEXT: 1.70   0.70    -      -     2.00   0.20   0.20   2.00   2.00   2.00   0.20    -      -     vscatterqpd   %ymm1, (%rdx,%ymm0,4) {%k1}
 # CHECK-NEXT:  -      -      -      -      -     1.00    -      -      -      -      -      -      -     vshuff32x4    $0, %ymm16, %ymm17, %ymm19
 # CHECK-NEXT:  -      -     0.33   0.33    -     1.00    -      -      -      -      -     0.33    -     vshuff32x4    $0, (%rax), %ymm17, %ymm19
 # CHECK-NEXT:  -      -     0.33   0.33    -     1.00    -      -      -      -      -     0.33    -     vshuff32x4    $0, (%rax){1to8}, %ymm17, %ymm19
index b1bfd7a9ec448a9e4c885e0fcdf63fdf34538d19..9c006d4ebb077def2488093a8702d9b35df3f533 100644 (file)
@@ -298,6 +298,9 @@ vdivps            %zmm16, %zmm17, %zmm19 {z}{k1}
 vdivps            (%rax), %zmm17, %zmm19 {z}{k1}
 vdivps            (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
 
+{evex} vextractps $1, %xmm0, %rcx
+{evex} vextractps $1, %xmm0, (%rax)
+
 vfmadd132pd       %zmm16, %zmm17, %zmm19
 vfmadd132pd       (%rax), %zmm17, %zmm19
 vfmadd132pd       (%rax){1to8}, %zmm17, %zmm19
@@ -811,6 +814,11 @@ vpermq            %zmm16, %zmm17, %zmm19 {z}{k1}
 vpermq            (%rax), %zmm17, %zmm19 {z}{k1}
 vpermq            (%rax){1to8}, %zmm17, %zmm19 {z}{k1}
 
+vpscatterdd       %zmm1, (%rdx,%zmm0,4) {%k1}
+vpscatterdq       %zmm1, (%rdx,%ymm0,4) {%k1}
+vpscatterqd       %ymm1, (%rdx,%zmm0,4) {%k1}
+vpscatterqq       %zmm1, (%rdx,%zmm0,4) {%k1}
+
 vpshufd           $0, %zmm16, %zmm19
 vpshufd           $0, (%rax), %zmm19
 vpshufd           $0, (%rax){1to16}, %zmm19
@@ -881,6 +889,11 @@ vpunpcklqdq       %zmm16, %zmm17, %zmm19 {z}{k1}
 vpunpcklqdq       (%rax), %zmm17, %zmm19 {z}{k1}
 vpunpcklqdq       (%rax){1to8}, %zmm17, %zmm19 {z}{k1}
 
+vscatterdps       %zmm1, (%rdx,%zmm0,4) {%k1}
+vscatterdpd       %zmm1, (%rdx,%ymm0,4) {%k1}
+vscatterqps       %ymm1, (%rdx,%zmm0,4) {%k1}
+vscatterqpd       %zmm1, (%rdx,%zmm0,4) {%k1}
+
 vshuff32x4        $0, %zmm16, %zmm17, %zmm19
 vshuff32x4        $0, (%rax), %zmm17, %zmm19
 vshuff32x4        $0, (%rax){1to16}, %zmm17, %zmm19
@@ -1334,6 +1347,8 @@ vunpcklps         (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
 # CHECK-NEXT:  3      18    10.00                       vdivps %zmm16, %zmm17, %zmm19 {%k1} {z}
 # CHECK-NEXT:  4      25    10.00   *                   vdivps (%rax), %zmm17, %zmm19 {%k1} {z}
 # CHECK-NEXT:  4      25    10.00   *                   vdivps (%rax){1to16}, %zmm17, %zmm19 {%k1} {z}
+# CHECK-NEXT:  2      3     1.00                        {evex} vextractps      $1, %xmm0, %ecx
+# CHECK-NEXT:  3      2     1.00           *            {evex} vextractps      $1, %xmm0, (%rax)
 # CHECK-NEXT:  1      4     0.50                        vfmadd132pd    %zmm16, %zmm17, %zmm19
 # CHECK-NEXT:  2      11    0.50    *                   vfmadd132pd    (%rax), %zmm17, %zmm19
 # CHECK-NEXT:  2      11    0.50    *                   vfmadd132pd    (%rax){1to8}, %zmm17, %zmm19
@@ -1787,6 +1802,10 @@ vunpcklps         (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
 # CHECK-NEXT:  1      3     1.00                        vpermq %zmm16, %zmm17, %zmm19 {%k1} {z}
 # CHECK-NEXT:  2      10    1.00    *                   vpermq (%rax), %zmm17, %zmm19 {%k1} {z}
 # CHECK-NEXT:  2      10    1.00    *                   vpermq (%rax){1to8}, %zmm17, %zmm19 {%k1} {z}
+# CHECK-NEXT:  36     8     16.00          *            vpscatterdd    %zmm1, (%rdx,%zmm0,4) {%k1}
+# CHECK-NEXT:  19     7     8.00           *            vpscatterdq    %zmm1, (%rdx,%ymm0,4) {%k1}
+# CHECK-NEXT:  19     7     8.00           *            vpscatterqd    %ymm1, (%rdx,%zmm0,4) {%k1}
+# CHECK-NEXT:  19     7     8.00           *            vpscatterqq    %zmm1, (%rdx,%zmm0,4) {%k1}
 # CHECK-NEXT:  1      1     1.00                        vpshufd        $0, %zmm16, %zmm19
 # CHECK-NEXT:  2      8     1.00    *                   vpshufd        $0, (%rax), %zmm19
 # CHECK-NEXT:  2      8     1.00    *                   vpshufd        $0, (%rax){1to16}, %zmm19
@@ -1850,6 +1869,10 @@ vunpcklps         (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
 # CHECK-NEXT:  1      1     1.00                        vpunpcklqdq    %zmm16, %zmm17, %zmm19 {%k1} {z}
 # CHECK-NEXT:  2      8     1.00    *                   vpunpcklqdq    (%rax), %zmm17, %zmm19 {%k1} {z}
 # CHECK-NEXT:  2      8     1.00    *                   vpunpcklqdq    (%rax){1to8}, %zmm17, %zmm19 {%k1} {z}
+# CHECK-NEXT:  36     7     16.00          *            vscatterdps    %zmm1, (%rdx,%zmm0,4) {%k1}
+# CHECK-NEXT:  19     7     8.00           *            vscatterdpd    %zmm1, (%rdx,%ymm0,4) {%k1}
+# CHECK-NEXT:  19     7     8.00           *            vscatterqps    %ymm1, (%rdx,%zmm0,4) {%k1}
+# CHECK-NEXT:  19     7     8.00           *            vscatterqpd    %zmm1, (%rdx,%zmm0,4) {%k1}
 # CHECK-NEXT:  1      3     1.00                        vshuff32x4     $0, %zmm16, %zmm17, %zmm19
 # CHECK-NEXT:  2      10    1.00    *                   vshuff32x4     $0, (%rax), %zmm17, %zmm19
 # CHECK-NEXT:  2      10    1.00    *                   vshuff32x4     $0, (%rax){1to16}, %zmm17, %zmm19
@@ -2029,7 +2052,7 @@ vunpcklps         (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
 
 # CHECK:      Resource pressure per iteration:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]
-# CHECK-NEXT:  -     612.00 339.67 99.67  332.83 332.83 16.00  643.67 2.00   5.33
+# CHECK-NEXT:  -     612.00 352.67 103.67 359.83 359.83 97.00  651.67 6.00   32.33
 
 # CHECK:      Resource pressure by instruction:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    Instructions:
@@ -2292,6 +2315,8 @@ vunpcklps         (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
 # CHECK-NEXT:  -     10.00  2.00    -      -      -      -     1.00    -      -     vdivps     %zmm16, %zmm17, %zmm19 {%k1} {z}
 # CHECK-NEXT:  -     10.00  2.00    -     0.50   0.50    -     1.00    -      -     vdivps     (%rax), %zmm17, %zmm19 {%k1} {z}
 # CHECK-NEXT:  -     10.00  2.00    -     0.50   0.50    -     1.00    -      -     vdivps     (%rax){1to16}, %zmm17, %zmm19 {%k1} {z}
+# CHECK-NEXT:  -      -     1.00    -      -      -      -     1.00    -      -     {evex}     vextractps      $1, %xmm0, %ecx
+# CHECK-NEXT:  -      -      -      -     0.33   0.33   1.00   1.00    -     0.33   {evex}     vextractps      $1, %xmm0, (%rax)
 # CHECK-NEXT:  -      -     0.50    -      -      -      -     0.50    -      -     vfmadd132pd        %zmm16, %zmm17, %zmm19
 # CHECK-NEXT:  -      -     0.50    -     0.50   0.50    -     0.50    -      -     vfmadd132pd        (%rax), %zmm17, %zmm19
 # CHECK-NEXT:  -      -     0.50    -     0.50   0.50    -     0.50    -      -     vfmadd132pd        (%rax){1to8}, %zmm17, %zmm19
@@ -2745,6 +2770,10 @@ vunpcklps         (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
 # CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vpermq     %zmm16, %zmm17, %zmm19 {%k1} {z}
 # CHECK-NEXT:  -      -      -      -     0.50   0.50    -     1.00    -      -     vpermq     (%rax), %zmm17, %zmm19 {%k1} {z}
 # CHECK-NEXT:  -      -      -      -     0.50   0.50    -     1.00    -      -     vpermq     (%rax){1to8}, %zmm17, %zmm19 {%k1} {z}
+# CHECK-NEXT:  -      -     1.50   0.50   5.33   5.33   16.00  1.50   0.50   5.33   vpscatterdd        %zmm1, (%rdx,%zmm0,4) {%k1}
+# CHECK-NEXT:  -      -     1.50   0.50   2.67   2.67   8.00   0.50   0.50   2.67   vpscatterdq        %zmm1, (%rdx,%ymm0,4) {%k1}
+# CHECK-NEXT:  -      -     1.50   0.50   2.67   2.67   8.00   0.50   0.50   2.67   vpscatterqd        %ymm1, (%rdx,%zmm0,4) {%k1}
+# CHECK-NEXT:  -      -     1.50   0.50   2.67   2.67   8.00   0.50   0.50   2.67   vpscatterqq        %zmm1, (%rdx,%zmm0,4) {%k1}
 # CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vpshufd    $0, %zmm16, %zmm19
 # CHECK-NEXT:  -      -      -      -     0.50   0.50    -     1.00    -      -     vpshufd    $0, (%rax), %zmm19
 # CHECK-NEXT:  -      -      -      -     0.50   0.50    -     1.00    -      -     vpshufd    $0, (%rax){1to16}, %zmm19
@@ -2808,6 +2837,10 @@ vunpcklps         (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
 # CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vpunpcklqdq        %zmm16, %zmm17, %zmm19 {%k1} {z}
 # CHECK-NEXT:  -      -      -      -     0.50   0.50    -     1.00    -      -     vpunpcklqdq        (%rax), %zmm17, %zmm19 {%k1} {z}
 # CHECK-NEXT:  -      -      -      -     0.50   0.50    -     1.00    -      -     vpunpcklqdq        (%rax){1to8}, %zmm17, %zmm19 {%k1} {z}
+# CHECK-NEXT:  -      -     1.50   0.50   5.33   5.33   16.00  1.50   0.50   5.33   vscatterdps        %zmm1, (%rdx,%zmm0,4) {%k1}
+# CHECK-NEXT:  -      -     1.50   0.50   2.67   2.67   8.00   0.50   0.50   2.67   vscatterdpd        %zmm1, (%rdx,%ymm0,4) {%k1}
+# CHECK-NEXT:  -      -     1.50   0.50   2.67   2.67   8.00   0.50   0.50   2.67   vscatterqps        %ymm1, (%rdx,%zmm0,4) {%k1}
+# CHECK-NEXT:  -      -     1.50   0.50   2.67   2.67   8.00   0.50   0.50   2.67   vscatterqpd        %zmm1, (%rdx,%zmm0,4) {%k1}
 # CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vshuff32x4 $0, %zmm16, %zmm17, %zmm19
 # CHECK-NEXT:  -      -      -      -     0.50   0.50    -     1.00    -      -     vshuff32x4 $0, (%rax), %zmm17, %zmm19
 # CHECK-NEXT:  -      -      -      -     0.50   0.50    -     1.00    -      -     vshuff32x4 $0, (%rax){1to16}, %zmm17, %zmm19
index 2ad91ea514aa201c15661a2f61d5090b73981bf6..b4b18101a67b801d238e0877fcbbbaa4cf801b61 100644 (file)
@@ -1344,6 +1344,16 @@ vpmulld           %ymm16, %ymm17, %ymm19 {z}{k1}
 vpmulld           (%rax), %ymm17, %ymm19 {z}{k1}
 vpmulld           (%rax){1to8}, %ymm17, %ymm19 {z}{k1}
 
+vpscatterdd       %xmm1, (%rdx,%xmm0,4) {%k1}
+vpscatterdq       %xmm1, (%rdx,%xmm0,4) {%k1}
+vpscatterqd       %xmm1, (%rdx,%xmm0,4) {%k1}
+vpscatterqq       %xmm1, (%rdx,%xmm0,4) {%k1}
+
+vpscatterdd       %ymm1, (%rdx,%ymm0,4) {%k1}
+vpscatterdq       %ymm1, (%rdx,%xmm0,4) {%k1}
+vpscatterqd       %xmm1, (%rdx,%ymm0,4) {%k1}
+vpscatterqq       %ymm1, (%rdx,%ymm0,4) {%k1}
+
 vpshufd           $0, %xmm16, %xmm19
 vpshufd           $0, (%rax), %xmm19
 vpshufd           $0, (%rax){1to4}, %xmm19
@@ -1500,6 +1510,16 @@ vpunpckldq        %ymm16, %ymm17, %ymm19 {z}{k1}
 vpunpckldq        (%rax), %ymm17, %ymm19 {z}{k1}
 vpunpckldq        (%rax){1to8}, %ymm17, %ymm19 {z}{k1}
 
+vscatterdps       %xmm1, (%rdx,%xmm0,4) {%k1}
+vscatterdpd       %xmm1, (%rdx,%xmm0,4) {%k1}
+vscatterqps       %xmm1, (%rdx,%xmm0,4) {%k1}
+vscatterqpd       %xmm1, (%rdx,%xmm0,4) {%k1}
+
+vscatterdps       %ymm1, (%rdx,%ymm0,4) {%k1}
+vscatterdpd       %ymm1, (%rdx,%xmm0,4) {%k1}
+vscatterqps       %xmm1, (%rdx,%ymm0,4) {%k1}
+vscatterqpd       %ymm1, (%rdx,%ymm0,4) {%k1}
+
 vshuff32x4        $0, %ymm16, %ymm17, %ymm19
 vshuff32x4        $0, (%rax), %ymm17, %ymm19
 vshuff32x4        $0, (%rax){1to8}, %ymm17, %ymm19
@@ -2897,6 +2917,14 @@ vunpcklps         (%rax){1to8}, %ymm17, %ymm19 {z}{k1}
 # CHECK-NEXT:  2      10    1.00                        vpmulld        %ymm16, %ymm17, %ymm19 {%k1} {z}
 # CHECK-NEXT:  3      17    1.00    *                   vpmulld        (%rax), %ymm17, %ymm19 {%k1} {z}
 # CHECK-NEXT:  3      17    1.00    *                   vpmulld        (%rax){1to8}, %ymm17, %ymm19 {%k1} {z}
+# CHECK-NEXT:  12     8     4.00           *            vpscatterdd    %xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT:  7      7     2.00           *            vpscatterdq    %xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT:  8      8     2.00           *            vpscatterqd    %xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT:  7      7     2.00           *            vpscatterqq    %xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT:  20     8     8.00           *            vpscatterdd    %ymm1, (%rdx,%ymm0,4) {%k1}
+# CHECK-NEXT:  11     7     4.00           *            vpscatterdq    %ymm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT:  8      8     2.00           *            vpscatterqd    %xmm1, (%rdx,%ymm0,4) {%k1}
+# CHECK-NEXT:  11     7     4.00           *            vpscatterqq    %ymm1, (%rdx,%ymm0,4) {%k1}
 # CHECK-NEXT:  1      1     1.00                        vpshufd        $0, %xmm16, %xmm19
 # CHECK-NEXT:  2      7     1.00    *                   vpshufd        $0, (%rax), %xmm19
 # CHECK-NEXT:  2      7     1.00    *                   vpshufd        $0, (%rax){1to4}, %xmm19
@@ -3035,6 +3063,14 @@ vunpcklps         (%rax){1to8}, %ymm17, %ymm19 {z}{k1}
 # CHECK-NEXT:  1      1     1.00                        vpunpckldq     %ymm16, %ymm17, %ymm19 {%k1} {z}
 # CHECK-NEXT:  2      8     1.00    *                   vpunpckldq     (%rax), %ymm17, %ymm19 {%k1} {z}
 # CHECK-NEXT:  2      8     1.00    *                   vpunpckldq     (%rax){1to8}, %ymm17, %ymm19 {%k1} {z}
+# CHECK-NEXT:  12     8     4.00           *            vscatterdps    %xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT:  7      7     2.00           *            vscatterdpd    %xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT:  8      8     2.00           *            vscatterqps    %xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT:  7      7     2.00           *            vscatterqpd    %xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT:  20     8     8.00           *            vscatterdps    %ymm1, (%rdx,%ymm0,4) {%k1}
+# CHECK-NEXT:  11     7     4.00           *            vscatterdpd    %ymm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT:  8      8     2.00           *            vscatterqps    %xmm1, (%rdx,%ymm0,4) {%k1}
+# CHECK-NEXT:  11     7     4.00           *            vscatterqpd    %ymm1, (%rdx,%ymm0,4) {%k1}
 # CHECK-NEXT:  1      3     1.00                        vshuff32x4     $0, %ymm16, %ymm17, %ymm19
 # CHECK-NEXT:  2      10    1.00    *                   vshuff32x4     $0, (%rax), %ymm17, %ymm19
 # CHECK-NEXT:  2      10    1.00    *                   vshuff32x4     $0, (%rax){1to8}, %ymm17, %ymm19
@@ -3230,7 +3266,7 @@ vunpcklps         (%rax){1to8}, %ymm17, %ymm19 {z}{k1}
 
 # CHECK:      Resource pressure per iteration:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]
-# CHECK-NEXT:  -     423.00 438.33 350.33 503.17 503.17 32.00  785.33 4.00   10.67
+# CHECK-NEXT:  -     423.00 462.33 358.33 521.83 521.83 88.00  801.33 12.00  29.33
 
 # CHECK:      Resource pressure by instruction:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    Instructions:
@@ -4422,6 +4458,14 @@ vunpcklps         (%rax){1to8}, %ymm17, %ymm19 {z}{k1}
 # CHECK-NEXT:  -      -     1.00   1.00    -      -      -      -      -      -     vpmulld    %ymm16, %ymm17, %ymm19 {%k1} {z}
 # CHECK-NEXT:  -      -     1.00   1.00   0.50   0.50    -      -      -      -     vpmulld    (%rax), %ymm17, %ymm19 {%k1} {z}
 # CHECK-NEXT:  -      -     1.00   1.00   0.50   0.50    -      -      -      -     vpmulld    (%rax){1to8}, %ymm17, %ymm19 {%k1} {z}
+# CHECK-NEXT:  -      -     1.50   0.50   1.33   1.33   4.00   1.50   0.50   1.33   vpscatterdd        %xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT:  -      -     1.50   0.50   0.67   0.67   2.00   0.50   0.50   0.67   vpscatterdq        %xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT:  -      -     1.50   0.50   0.67   0.67   2.00   1.50   0.50   0.67   vpscatterqd        %xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT:  -      -     1.50   0.50   0.67   0.67   2.00   0.50   0.50   0.67   vpscatterqq        %xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT:  -      -     1.50   0.50   2.67   2.67   8.00   1.50   0.50   2.67   vpscatterdd        %ymm1, (%rdx,%ymm0,4) {%k1}
+# CHECK-NEXT:  -      -     1.50   0.50   1.33   1.33   4.00   0.50   0.50   1.33   vpscatterdq        %ymm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT:  -      -     1.50   0.50   0.67   0.67   2.00   1.50   0.50   0.67   vpscatterqd        %xmm1, (%rdx,%ymm0,4) {%k1}
+# CHECK-NEXT:  -      -     1.50   0.50   1.33   1.33   4.00   0.50   0.50   1.33   vpscatterqq        %ymm1, (%rdx,%ymm0,4) {%k1}
 # CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vpshufd    $0, %xmm16, %xmm19
 # CHECK-NEXT:  -      -      -      -     0.50   0.50    -     1.00    -      -     vpshufd    $0, (%rax), %xmm19
 # CHECK-NEXT:  -      -      -      -     0.50   0.50    -     1.00    -      -     vpshufd    $0, (%rax){1to4}, %xmm19
@@ -4560,6 +4604,14 @@ vunpcklps         (%rax){1to8}, %ymm17, %ymm19 {z}{k1}
 # CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vpunpckldq %ymm16, %ymm17, %ymm19 {%k1} {z}
 # CHECK-NEXT:  -      -      -      -     0.50   0.50    -     1.00    -      -     vpunpckldq (%rax), %ymm17, %ymm19 {%k1} {z}
 # CHECK-NEXT:  -      -      -      -     0.50   0.50    -     1.00    -      -     vpunpckldq (%rax){1to8}, %ymm17, %ymm19 {%k1} {z}
+# CHECK-NEXT:  -      -     1.50   0.50   1.33   1.33   4.00   1.50   0.50   1.33   vscatterdps        %xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT:  -      -     1.50   0.50   0.67   0.67   2.00   0.50   0.50   0.67   vscatterdpd        %xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT:  -      -     1.50   0.50   0.67   0.67   2.00   1.50   0.50   0.67   vscatterqps        %xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT:  -      -     1.50   0.50   0.67   0.67   2.00   0.50   0.50   0.67   vscatterqpd        %xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT:  -      -     1.50   0.50   2.67   2.67   8.00   1.50   0.50   2.67   vscatterdps        %ymm1, (%rdx,%ymm0,4) {%k1}
+# CHECK-NEXT:  -      -     1.50   0.50   1.33   1.33   4.00   0.50   0.50   1.33   vscatterdpd        %ymm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT:  -      -     1.50   0.50   0.67   0.67   2.00   1.50   0.50   0.67   vscatterqps        %xmm1, (%rdx,%ymm0,4) {%k1}
+# CHECK-NEXT:  -      -     1.50   0.50   1.33   1.33   4.00   0.50   0.50   1.33   vscatterqpd        %ymm1, (%rdx,%ymm0,4) {%k1}
 # CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vshuff32x4 $0, %ymm16, %ymm17, %ymm19
 # CHECK-NEXT:  -      -      -      -     0.50   0.50    -     1.00    -      -     vshuff32x4 $0, (%rax), %ymm17, %ymm19
 # CHECK-NEXT:  -      -      -      -     0.50   0.50    -     1.00    -      -     vshuff32x4 $0, (%rax){1to8}, %ymm17, %ymm19
index 6742cfccb2d00108d8e6c14e8934805cf417d340..6e52eddd9a8f5ef1f7fee1b7c712a332a43858c6 100644 (file)
@@ -298,6 +298,9 @@ vdivps            %zmm16, %zmm17, %zmm19 {z}{k1}
 vdivps            (%rax), %zmm17, %zmm19 {z}{k1}
 vdivps            (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
 
+{evex} vextractps $1, %xmm0, %rcx
+{evex} vextractps $1, %xmm0, (%rax)
+
 vfmadd132pd       %zmm16, %zmm17, %zmm19
 vfmadd132pd       (%rax), %zmm17, %zmm19
 vfmadd132pd       (%rax){1to8}, %zmm17, %zmm19
@@ -811,6 +814,11 @@ vpermq            %zmm16, %zmm17, %zmm19 {z}{k1}
 vpermq            (%rax), %zmm17, %zmm19 {z}{k1}
 vpermq            (%rax){1to8}, %zmm17, %zmm19 {z}{k1}
 
+vpscatterdd       %zmm1, (%rdx,%zmm0,4) {%k1}
+vpscatterdq       %zmm1, (%rdx,%ymm0,4) {%k1}
+vpscatterqd       %ymm1, (%rdx,%zmm0,4) {%k1}
+vpscatterqq       %zmm1, (%rdx,%zmm0,4) {%k1}
+
 vpshufd           $0, %zmm16, %zmm19
 vpshufd           $0, (%rax), %zmm19
 vpshufd           $0, (%rax){1to16}, %zmm19
@@ -881,6 +889,11 @@ vpunpcklqdq       %zmm16, %zmm17, %zmm19 {z}{k1}
 vpunpcklqdq       (%rax), %zmm17, %zmm19 {z}{k1}
 vpunpcklqdq       (%rax){1to8}, %zmm17, %zmm19 {z}{k1}
 
+vscatterdps       %zmm1, (%rdx,%zmm0,4) {%k1}
+vscatterdpd       %zmm1, (%rdx,%ymm0,4) {%k1}
+vscatterqps       %ymm1, (%rdx,%zmm0,4) {%k1}
+vscatterqpd       %zmm1, (%rdx,%zmm0,4) {%k1}
+
 vshuff32x4        $0, %zmm16, %zmm17, %zmm19
 vshuff32x4        $0, (%rax), %zmm17, %zmm19
 vshuff32x4        $0, (%rax){1to16}, %zmm17, %zmm19
@@ -1334,6 +1347,8 @@ vunpcklps         (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
 # CHECK-NEXT:  1      11    6.00                        vdivps %zmm16, %zmm17, %zmm19 {%k1} {z}
 # CHECK-NEXT:  1      18    6.00    *                   vdivps (%rax), %zmm17, %zmm19 {%k1} {z}
 # CHECK-NEXT:  1      18    6.00    *                   vdivps (%rax){1to16}, %zmm17, %zmm19 {%k1} {z}
+# CHECK-NEXT:  2      1     1.00                        {evex} vextractps      $1, %xmm0, %ecx
+# CHECK-NEXT:  2      2     1.00           *            {evex} vextractps      $1, %xmm0, (%rax)
 # CHECK-NEXT:  1      4     1.00                        vfmadd132pd    %zmm16, %zmm17, %zmm19
 # CHECK-NEXT:  1      11    1.00    *                   vfmadd132pd    (%rax), %zmm17, %zmm19
 # CHECK-NEXT:  1      11    1.00    *                   vfmadd132pd    (%rax){1to8}, %zmm17, %zmm19
@@ -1787,6 +1802,10 @@ vunpcklps         (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
 # CHECK-NEXT:  2      1     0.50                        vpermq %zmm16, %zmm17, %zmm19 {%k1} {z}
 # CHECK-NEXT:  2      8     0.50    *                   vpermq (%rax), %zmm17, %zmm19 {%k1} {z}
 # CHECK-NEXT:  2      8     0.50    *                   vpermq (%rax){1to8}, %zmm17, %zmm19 {%k1} {z}
+# CHECK-NEXT:  1      1     1.00           *            vpscatterdd    %zmm1, (%rdx,%zmm0,4) {%k1}
+# CHECK-NEXT:  1      1     1.00           *            vpscatterdq    %zmm1, (%rdx,%ymm0,4) {%k1}
+# CHECK-NEXT:  1      1     1.00           *            vpscatterqd    %ymm1, (%rdx,%zmm0,4) {%k1}
+# CHECK-NEXT:  1      1     1.00           *            vpscatterqq    %zmm1, (%rdx,%zmm0,4) {%k1}
 # CHECK-NEXT:  1      1     1.00                        vpshufd        $0, %zmm16, %zmm19
 # CHECK-NEXT:  1      8     1.00    *                   vpshufd        $0, (%rax), %zmm19
 # CHECK-NEXT:  1      8     1.00    *                   vpshufd        $0, (%rax){1to16}, %zmm19
@@ -1850,6 +1869,10 @@ vunpcklps         (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
 # CHECK-NEXT:  1      1     1.00                        vpunpcklqdq    %zmm16, %zmm17, %zmm19 {%k1} {z}
 # CHECK-NEXT:  1      8     1.00    *                   vpunpcklqdq    (%rax), %zmm17, %zmm19 {%k1} {z}
 # CHECK-NEXT:  1      8     1.00    *                   vpunpcklqdq    (%rax){1to8}, %zmm17, %zmm19 {%k1} {z}
+# CHECK-NEXT:  1      1     1.00           *            vscatterdps    %zmm1, (%rdx,%zmm0,4) {%k1}
+# CHECK-NEXT:  1      1     1.00           *            vscatterdpd    %zmm1, (%rdx,%ymm0,4) {%k1}
+# CHECK-NEXT:  1      1     1.00           *            vscatterqps    %ymm1, (%rdx,%zmm0,4) {%k1}
+# CHECK-NEXT:  1      1     1.00           *            vscatterqpd    %zmm1, (%rdx,%zmm0,4) {%k1}
 # CHECK-NEXT:  1      2     1.00                        vshuff32x4     $0, %zmm16, %zmm17, %zmm19
 # CHECK-NEXT:  3      9     1.00    *                   vshuff32x4     $0, (%rax), %zmm17, %zmm19
 # CHECK-NEXT:  3      9     1.00    *                   vshuff32x4     $0, (%rax){1to16}, %zmm17, %zmm19
@@ -2042,7 +2065,7 @@ vunpcklps         (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
 
 # CHECK:      Resource pressure per iteration:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    [10]   [11]   [12.0] [12.1] [13]   [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1]
-# CHECK-NEXT: 2.67   2.67   2.67    -      -      -      -      -     221.00 1060.50 618.00 352.50 295.50 295.50 16.00 199.67 199.67 199.67 194.33 194.33 194.33 8.00   8.00
+# CHECK-NEXT: 5.33   5.33   5.33    -      -      -      -      -     221.00 1060.50 618.00 352.50 297.00 297.00 17.00 205.33 205.33 205.33 194.33 194.33 194.33 16.50  16.50
 
 # CHECK:      Resource pressure by instruction:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    [10]   [11]   [12.0] [12.1] [13]   [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1] Instructions:
@@ -2305,6 +2328,8 @@ vunpcklps         (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -     6.00    -      -      -      -      -      -      -      -      -      -      -      -      -     vdivps  %zmm16, %zmm17, %zmm19 {%k1} {z}
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -     6.00    -      -     0.50   0.50    -     0.33   0.33   0.33   0.33   0.33   0.33    -      -     vdivps  (%rax), %zmm17, %zmm19 {%k1} {z}
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -     6.00    -      -     0.50   0.50    -     0.33   0.33   0.33   0.33   0.33   0.33    -      -     vdivps  (%rax){1to16}, %zmm17, %zmm19 {%k1} {z}
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -     1.00   1.00    -      -      -      -      -      -      -      -      -     {evex}  vextractps      $1, %xmm0, %ecx
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -     0.50   0.50   1.00   0.33   0.33   0.33    -      -      -     0.50   0.50   {evex}  vextractps      $1, %xmm0, (%rax)
 # CHECK-NEXT:  -      -      -      -      -      -      -      -     1.00   1.00    -      -      -      -      -      -      -      -      -      -      -      -      -     vfmadd132pd     %zmm16, %zmm17, %zmm19
 # CHECK-NEXT:  -      -      -      -      -      -      -      -     1.00   1.00    -      -     0.50   0.50    -     0.33   0.33   0.33   0.33   0.33   0.33    -      -     vfmadd132pd     (%rax), %zmm17, %zmm19
 # CHECK-NEXT:  -      -      -      -      -      -      -      -     1.00   1.00    -      -     0.50   0.50    -     0.33   0.33   0.33   0.33   0.33   0.33    -      -     vfmadd132pd     (%rax){1to8}, %zmm17, %zmm19
@@ -2758,6 +2783,10 @@ vunpcklps         (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -     0.50   0.50    -      -      -      -      -      -      -      -      -      -      -      -     vpermq  %zmm16, %zmm17, %zmm19 {%k1} {z}
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -     0.50   0.50    -     0.50   0.50    -     0.33   0.33   0.33   0.33   0.33   0.33    -      -     vpermq  (%rax), %zmm17, %zmm19 {%k1} {z}
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -     0.50   0.50    -     0.50   0.50    -     0.33   0.33   0.33   0.33   0.33   0.33    -      -     vpermq  (%rax){1to8}, %zmm17, %zmm19 {%k1} {z}
+# CHECK-NEXT: 0.33   0.33   0.33    -      -      -      -      -      -      -      -      -      -      -      -     0.67   0.67   0.67    -      -      -     1.00   1.00   vpscatterdd     %zmm1, (%rdx,%zmm0,4) {%k1}
+# CHECK-NEXT: 0.33   0.33   0.33    -      -      -      -      -      -      -      -      -      -      -      -     0.67   0.67   0.67    -      -      -     1.00   1.00   vpscatterdq     %zmm1, (%rdx,%ymm0,4) {%k1}
+# CHECK-NEXT: 0.33   0.33   0.33    -      -      -      -      -      -      -      -      -      -      -      -     0.67   0.67   0.67    -      -      -     1.00   1.00   vpscatterqd     %ymm1, (%rdx,%zmm0,4) {%k1}
+# CHECK-NEXT: 0.33   0.33   0.33    -      -      -      -      -      -      -      -      -      -      -      -     0.67   0.67   0.67    -      -      -     1.00   1.00   vpscatterqq     %zmm1, (%rdx,%zmm0,4) {%k1}
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -     1.00   1.00    -      -      -      -      -      -      -      -      -      -      -      -     vpshufd $0, %zmm16, %zmm19
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -     1.00   1.00    -     0.50   0.50    -     0.33   0.33   0.33   0.33   0.33   0.33    -      -     vpshufd $0, (%rax), %zmm19
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -     1.00   1.00    -     0.50   0.50    -     0.33   0.33   0.33   0.33   0.33   0.33    -      -     vpshufd $0, (%rax){1to16}, %zmm19
@@ -2821,6 +2850,10 @@ vunpcklps         (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -     1.00   1.00    -      -      -      -      -      -      -      -      -      -      -      -     vpunpcklqdq     %zmm16, %zmm17, %zmm19 {%k1} {z}
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -     1.00   1.00    -     0.50   0.50    -     0.33   0.33   0.33   0.33   0.33   0.33    -      -     vpunpcklqdq     (%rax), %zmm17, %zmm19 {%k1} {z}
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -     1.00   1.00    -     0.50   0.50    -     0.33   0.33   0.33   0.33   0.33   0.33    -      -     vpunpcklqdq     (%rax){1to8}, %zmm17, %zmm19 {%k1} {z}
+# CHECK-NEXT: 0.33   0.33   0.33    -      -      -      -      -      -      -      -      -      -      -      -     0.67   0.67   0.67    -      -      -     1.00   1.00   vscatterdps     %zmm1, (%rdx,%zmm0,4) {%k1}
+# CHECK-NEXT: 0.33   0.33   0.33    -      -      -      -      -      -      -      -      -      -      -      -     0.67   0.67   0.67    -      -      -     1.00   1.00   vscatterdpd     %zmm1, (%rdx,%ymm0,4) {%k1}
+# CHECK-NEXT: 0.33   0.33   0.33    -      -      -      -      -      -      -      -      -      -      -      -     0.67   0.67   0.67    -      -      -     1.00   1.00   vscatterqps     %ymm1, (%rdx,%zmm0,4) {%k1}
+# CHECK-NEXT: 0.33   0.33   0.33    -      -      -      -      -      -      -      -      -      -      -      -     0.67   0.67   0.67    -      -      -     1.00   1.00   vscatterqpd     %zmm1, (%rdx,%zmm0,4) {%k1}
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -     1.00    -      -      -      -      -      -      -      -      -      -      -      -      -     vshuff32x4      $0, %zmm16, %zmm17, %zmm19
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -     1.00    -      -     0.50   0.50    -     0.33   0.33   0.33   0.33   0.33   0.33    -      -     vshuff32x4      $0, (%rax), %zmm17, %zmm19
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -     1.00    -      -     0.50   0.50    -     0.33   0.33   0.33   0.33   0.33   0.33    -      -     vshuff32x4      $0, (%rax){1to16}, %zmm17, %zmm19
index 2d26eb50351a087d341984aaa385d37e109b38e6..4636e23d9df3e2e14602dca190e2e3a809a0d442 100644 (file)
@@ -1344,6 +1344,16 @@ vpmulld           %ymm16, %ymm17, %ymm19 {z}{k1}
 vpmulld           (%rax), %ymm17, %ymm19 {z}{k1}
 vpmulld           (%rax){1to8}, %ymm17, %ymm19 {z}{k1}
 
+vpscatterdd       %xmm1, (%rdx,%xmm0,4) {%k1}
+vpscatterdq       %xmm1, (%rdx,%xmm0,4) {%k1}
+vpscatterqd       %xmm1, (%rdx,%xmm0,4) {%k1}
+vpscatterqq       %xmm1, (%rdx,%xmm0,4) {%k1}
+
+vpscatterdd       %ymm1, (%rdx,%ymm0,4) {%k1}
+vpscatterdq       %ymm1, (%rdx,%xmm0,4) {%k1}
+vpscatterqd       %xmm1, (%rdx,%ymm0,4) {%k1}
+vpscatterqq       %ymm1, (%rdx,%ymm0,4) {%k1}
+
 vpshufd           $0, %xmm16, %xmm19
 vpshufd           $0, (%rax), %xmm19
 vpshufd           $0, (%rax){1to4}, %xmm19
@@ -1500,6 +1510,16 @@ vpunpckldq        %ymm16, %ymm17, %ymm19 {z}{k1}
 vpunpckldq        (%rax), %ymm17, %ymm19 {z}{k1}
 vpunpckldq        (%rax){1to8}, %ymm17, %ymm19 {z}{k1}
 
+vscatterdps       %xmm1, (%rdx,%xmm0,4) {%k1}
+vscatterdpd       %xmm1, (%rdx,%xmm0,4) {%k1}
+vscatterqps       %xmm1, (%rdx,%xmm0,4) {%k1}
+vscatterqpd       %xmm1, (%rdx,%xmm0,4) {%k1}
+
+vscatterdps       %ymm1, (%rdx,%ymm0,4) {%k1}
+vscatterdpd       %ymm1, (%rdx,%xmm0,4) {%k1}
+vscatterqps       %xmm1, (%rdx,%ymm0,4) {%k1}
+vscatterqpd       %ymm1, (%rdx,%ymm0,4) {%k1}
+
 vshuff32x4        $0, %ymm16, %ymm17, %ymm19
 vshuff32x4        $0, (%rax), %ymm17, %ymm19
 vshuff32x4        $0, (%rax){1to8}, %ymm17, %ymm19
@@ -2897,6 +2917,14 @@ vunpcklps         (%rax){1to8}, %ymm17, %ymm19 {z}{k1}
 # CHECK-NEXT:  1      3     0.50                        vpmulld        %ymm16, %ymm17, %ymm19 {%k1} {z}
 # CHECK-NEXT:  1      10    0.50    *                   vpmulld        (%rax), %ymm17, %ymm19 {%k1} {z}
 # CHECK-NEXT:  1      10    0.50    *                   vpmulld        (%rax){1to8}, %ymm17, %ymm19 {%k1} {z}
+# CHECK-NEXT:  1      1     1.00           *            vpscatterdd    %xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT:  1      1     1.00           *            vpscatterdq    %xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT:  1      1     1.00           *            vpscatterqd    %xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT:  1      1     1.00           *            vpscatterqq    %xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT:  1      1     1.00           *            vpscatterdd    %ymm1, (%rdx,%ymm0,4) {%k1}
+# CHECK-NEXT:  1      1     1.00           *            vpscatterdq    %ymm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT:  1      1     1.00           *            vpscatterqd    %xmm1, (%rdx,%ymm0,4) {%k1}
+# CHECK-NEXT:  1      1     1.00           *            vpscatterqq    %ymm1, (%rdx,%ymm0,4) {%k1}
 # CHECK-NEXT:  1      1     0.50                        vpshufd        $0, %xmm16, %xmm19
 # CHECK-NEXT:  1      8     0.50    *                   vpshufd        $0, (%rax), %xmm19
 # CHECK-NEXT:  1      8     0.50    *                   vpshufd        $0, (%rax){1to4}, %xmm19
@@ -3035,6 +3063,14 @@ vunpcklps         (%rax){1to8}, %ymm17, %ymm19 {z}{k1}
 # CHECK-NEXT:  1      1     0.50                        vpunpckldq     %ymm16, %ymm17, %ymm19 {%k1} {z}
 # CHECK-NEXT:  1      8     0.50    *                   vpunpckldq     (%rax), %ymm17, %ymm19 {%k1} {z}
 # CHECK-NEXT:  1      8     0.50    *                   vpunpckldq     (%rax){1to8}, %ymm17, %ymm19 {%k1} {z}
+# CHECK-NEXT:  1      1     1.00           *            vscatterdps    %xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT:  1      1     1.00           *            vscatterdpd    %xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT:  1      1     1.00           *            vscatterqps    %xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT:  1      1     1.00           *            vscatterqpd    %xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT:  1      1     1.00           *            vscatterdps    %ymm1, (%rdx,%ymm0,4) {%k1}
+# CHECK-NEXT:  1      1     1.00           *            vscatterdpd    %ymm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT:  1      1     1.00           *            vscatterqps    %xmm1, (%rdx,%ymm0,4) {%k1}
+# CHECK-NEXT:  1      1     1.00           *            vscatterqpd    %ymm1, (%rdx,%ymm0,4) {%k1}
 # CHECK-NEXT:  1      2     1.00                        vshuff32x4     $0, %ymm16, %ymm17, %ymm19
 # CHECK-NEXT:  3      9     1.00    *                   vshuff32x4     $0, (%rax), %ymm17, %ymm19
 # CHECK-NEXT:  3      9     1.00    *                   vshuff32x4     $0, (%rax){1to8}, %ymm17, %ymm19
@@ -3243,7 +3279,7 @@ vunpcklps         (%rax){1to8}, %ymm17, %ymm19 {z}{k1}
 
 # CHECK:      Resource pressure per iteration:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    [10]   [11]   [12.0] [12.1] [13]   [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1]
-# CHECK-NEXT: 5.33   5.33   5.33    -      -      -      -      -     208.00 948.00 501.50 261.50 478.50 478.50 32.00  324.33 324.33 324.33 313.67 313.67 313.67 16.00  16.00
+# CHECK-NEXT: 10.67  10.67  10.67   -      -      -      -      -     208.00 948.00 501.50 261.50 478.50 478.50 32.00  335.00 335.00 335.00 313.67 313.67 313.67 32.00  32.00
 
 # CHECK:      Resource pressure by instruction:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    [10]   [11]   [12.0] [12.1] [13]   [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1] Instructions:
@@ -4435,6 +4471,14 @@ vunpcklps         (%rax){1to8}, %ymm17, %ymm19 {z}{k1}
 # CHECK-NEXT:  -      -      -      -      -      -      -      -     0.50    -      -     0.50    -      -      -      -      -      -      -      -      -      -      -     vpmulld %ymm16, %ymm17, %ymm19 {%k1} {z}
 # CHECK-NEXT:  -      -      -      -      -      -      -      -     0.50    -      -     0.50   0.50   0.50    -     0.33   0.33   0.33   0.33   0.33   0.33    -      -     vpmulld (%rax), %ymm17, %ymm19 {%k1} {z}
 # CHECK-NEXT:  -      -      -      -      -      -      -      -     0.50    -      -     0.50   0.50   0.50    -     0.33   0.33   0.33   0.33   0.33   0.33    -      -     vpmulld (%rax){1to8}, %ymm17, %ymm19 {%k1} {z}
+# CHECK-NEXT: 0.33   0.33   0.33    -      -      -      -      -      -      -      -      -      -      -      -     0.67   0.67   0.67    -      -      -     1.00   1.00   vpscatterdd     %xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT: 0.33   0.33   0.33    -      -      -      -      -      -      -      -      -      -      -      -     0.67   0.67   0.67    -      -      -     1.00   1.00   vpscatterdq     %xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT: 0.33   0.33   0.33    -      -      -      -      -      -      -      -      -      -      -      -     0.67   0.67   0.67    -      -      -     1.00   1.00   vpscatterqd     %xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT: 0.33   0.33   0.33    -      -      -      -      -      -      -      -      -      -      -      -     0.67   0.67   0.67    -      -      -     1.00   1.00   vpscatterqq     %xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT: 0.33   0.33   0.33    -      -      -      -      -      -      -      -      -      -      -      -     0.67   0.67   0.67    -      -      -     1.00   1.00   vpscatterdd     %ymm1, (%rdx,%ymm0,4) {%k1}
+# CHECK-NEXT: 0.33   0.33   0.33    -      -      -      -      -      -      -      -      -      -      -      -     0.67   0.67   0.67    -      -      -     1.00   1.00   vpscatterdq     %ymm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT: 0.33   0.33   0.33    -      -      -      -      -      -      -      -      -      -      -      -     0.67   0.67   0.67    -      -      -     1.00   1.00   vpscatterqd     %xmm1, (%rdx,%ymm0,4) {%k1}
+# CHECK-NEXT: 0.33   0.33   0.33    -      -      -      -      -      -      -      -      -      -      -      -     0.67   0.67   0.67    -      -      -     1.00   1.00   vpscatterqq     %ymm1, (%rdx,%ymm0,4) {%k1}
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -     0.50   0.50    -      -      -      -      -      -      -      -      -      -      -      -     vpshufd $0, %xmm16, %xmm19
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -     0.50   0.50    -     0.50   0.50    -     0.33   0.33   0.33   0.33   0.33   0.33    -      -     vpshufd $0, (%rax), %xmm19
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -     0.50   0.50    -     0.50   0.50    -     0.33   0.33   0.33   0.33   0.33   0.33    -      -     vpshufd $0, (%rax){1to4}, %xmm19
@@ -4573,6 +4617,14 @@ vunpcklps         (%rax){1to8}, %ymm17, %ymm19 {z}{k1}
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -     0.50   0.50    -      -      -      -      -      -      -      -      -      -      -      -     vpunpckldq      %ymm16, %ymm17, %ymm19 {%k1} {z}
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -     0.50   0.50    -     0.50   0.50    -     0.33   0.33   0.33   0.33   0.33   0.33    -      -     vpunpckldq      (%rax), %ymm17, %ymm19 {%k1} {z}
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -     0.50   0.50    -     0.50   0.50    -     0.33   0.33   0.33   0.33   0.33   0.33    -      -     vpunpckldq      (%rax){1to8}, %ymm17, %ymm19 {%k1} {z}
+# CHECK-NEXT: 0.33   0.33   0.33    -      -      -      -      -      -      -      -      -      -      -      -     0.67   0.67   0.67    -      -      -     1.00   1.00   vscatterdps     %xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT: 0.33   0.33   0.33    -      -      -      -      -      -      -      -      -      -      -      -     0.67   0.67   0.67    -      -      -     1.00   1.00   vscatterdpd     %xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT: 0.33   0.33   0.33    -      -      -      -      -      -      -      -      -      -      -      -     0.67   0.67   0.67    -      -      -     1.00   1.00   vscatterqps     %xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT: 0.33   0.33   0.33    -      -      -      -      -      -      -      -      -      -      -      -     0.67   0.67   0.67    -      -      -     1.00   1.00   vscatterqpd     %xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT: 0.33   0.33   0.33    -      -      -      -      -      -      -      -      -      -      -      -     0.67   0.67   0.67    -      -      -     1.00   1.00   vscatterdps     %ymm1, (%rdx,%ymm0,4) {%k1}
+# CHECK-NEXT: 0.33   0.33   0.33    -      -      -      -      -      -      -      -      -      -      -      -     0.67   0.67   0.67    -      -      -     1.00   1.00   vscatterdpd     %ymm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT: 0.33   0.33   0.33    -      -      -      -      -      -      -      -      -      -      -      -     0.67   0.67   0.67    -      -      -     1.00   1.00   vscatterqps     %xmm1, (%rdx,%ymm0,4) {%k1}
+# CHECK-NEXT: 0.33   0.33   0.33    -      -      -      -      -      -      -      -      -      -      -      -     0.67   0.67   0.67    -      -      -     1.00   1.00   vscatterqpd     %ymm1, (%rdx,%ymm0,4) {%k1}
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -     1.00    -      -      -      -      -      -      -      -      -      -      -      -      -     vshuff32x4      $0, %ymm16, %ymm17, %ymm19
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -     1.00    -      -     0.50   0.50    -     0.33   0.33   0.33   0.33   0.33   0.33    -      -     vshuff32x4      $0, (%rax), %ymm17, %ymm19
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -     1.00    -      -     0.50   0.50    -     0.33   0.33   0.33   0.33   0.33   0.33    -      -     vshuff32x4      $0, (%rax){1to8}, %ymm17, %ymm19