AVX-512: Implemented SHUFF32x4/SHUFF64x2/SHUFI32x4/SHUFI64x2 instructions for SKX...
authorElena Demikhovsky <elena.demikhovsky@intel.com>
Wed, 3 Jun 2015 10:56:40 +0000 (10:56 +0000)
committerElena Demikhovsky <elena.demikhovsky@intel.com>
Wed, 3 Jun 2015 10:56:40 +0000 (10:56 +0000)
Added tests for encoding.

By Igor Breger (igor.breger@intel.com)

llvm-svn: 238917

llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/lib/Target/X86/X86ISelLowering.h
llvm/lib/Target/X86/X86InstrAVX512.td
llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
llvm/test/MC/X86/avx512-encodings.s
llvm/test/MC/X86/x86-64-avx512f_vl.s

index efc2f49..5e1441f 100644 (file)
@@ -18301,6 +18301,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
   case X86ISD::PSHUFHW:            return "X86ISD::PSHUFHW";
   case X86ISD::PSHUFLW:            return "X86ISD::PSHUFLW";
   case X86ISD::SHUFP:              return "X86ISD::SHUFP";
+  case X86ISD::SHUF128:            return "X86ISD::SHUF128";
   case X86ISD::MOVLHPS:            return "X86ISD::MOVLHPS";
   case X86ISD::MOVLHPD:            return "X86ISD::MOVLHPD";
   case X86ISD::MOVHLPS:            return "X86ISD::MOVHLPS";
index a403ee8..fc412cd 100644 (file)
@@ -354,6 +354,8 @@ namespace llvm {
       PSHUFHW,
       PSHUFLW,
       SHUFP,
+      //Shuffle Packed Values at 128-bit granularity
+      SHUF128,
       MOVDDUP,
       MOVSHDUP,
       MOVSLDUP,
index 15fd060..24c7200 100644 (file)
@@ -5996,6 +5996,34 @@ multiclass avx512_fp_packed_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
   }
 }
 
+//handle instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm)
+//                               op(reg_vec2,mem_vec,imm)
+//                               op(reg_vec2,broadcast(eltVt),imm)
+multiclass avx512_3Op_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode,
+                                                            X86VectorVTInfo _>{
+  defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
+                      (ins _.RC:$src1, _.RC:$src2, u8imm:$src3),
+                      OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
+                      (OpNode (_.VT _.RC:$src1),
+                              (_.VT _.RC:$src2),
+                              (i8 imm:$src3))>;
+  let mayLoad = 1 in {
+    defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
+                      (ins _.RC:$src1, _.MemOp:$src2, u8imm:$src3),
+                      OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
+                      (OpNode (_.VT _.RC:$src1),
+                              (_.VT (bitconvert (_.LdFrag addr:$src2))),
+                              (i8 imm:$src3))>;
+    defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
+                      (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3),
+                      OpcodeStr, "$src3, ${src2}"##_.BroadcastStr##", $src1",
+                      "$src1, ${src2}"##_.BroadcastStr##", $src3",
+                      (OpNode (_.VT _.RC:$src1),
+                              (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))),
+                              (i8 imm:$src3))>, EVEX_B;
+  }
+}
+
 //handle scalar instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm)
 //                                      op(reg_vec2,mem_scalar,imm)
 //all instruction created with FROUND_CURRENT
@@ -6048,18 +6076,18 @@ multiclass avx512_fp_sae_scalar_imm<bits<8> opc, string OpcodeStr,
 
 multiclass avx512_common_fp_sae_packed_imm<string OpcodeStr,
             AVX512VLVectorVTInfo _, bits<8> opc, SDNode OpNode, Predicate prd>{
-    let Predicates = [prd] in {
-      defm Z    : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, _.info512>,
+  let Predicates = [prd] in {
+    defm Z    : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, _.info512>,
                   avx512_fp_sae_packed_imm<opc, OpcodeStr, OpNode, _.info512>,
                                   EVEX_V512;
 
-    }
-    let Predicates = [prd, HasVLX] in {
-     defm Z128 : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, _.info128>,
+  }
+  let Predicates = [prd, HasVLX] in {
+    defm Z128 : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, _.info128>,
                                   EVEX_V128;
-     defm Z256 : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, _.info256>,
+    defm Z256 : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, _.info256>,
                                   EVEX_V256;
-    }
+  }
 }
 
 multiclass avx512_common_fp_sae_scalar_imm<string OpcodeStr,
@@ -6067,7 +6095,7 @@ multiclass avx512_common_fp_sae_scalar_imm<string OpcodeStr,
   let Predicates = [prd] in {
      defm Z128 : avx512_fp_scalar_imm<opc, OpcodeStr, OpNode, _>,
                  avx512_fp_sae_scalar_imm<opc, OpcodeStr, OpNode, _>;
-    }
+  }
 }
 
 defm VFIXUPIMMPD : avx512_common_fp_sae_packed_imm<"vfixupimmpd",
@@ -6098,3 +6126,23 @@ defm VRANGESS: avx512_common_fp_sae_scalar_imm<"vrangess", f32x_info,
                                                  0x51, X86VRange, HasDQI>,
       AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
 
+
+multiclass avx512_shuff_packed_128<string OpcodeStr, AVX512VLVectorVTInfo _,
+                                       bits<8> opc, SDNode OpNode = X86Shuf128>{
+  let Predicates = [HasAVX512] in {
+    defm Z    : avx512_3Op_imm8<opc, OpcodeStr, OpNode, _.info512>, EVEX_V512;
+
+  }
+  let Predicates = [HasAVX512, HasVLX] in {
+     defm Z256 : avx512_3Op_imm8<opc, OpcodeStr, OpNode, _.info256>, EVEX_V256;
+  }
+}
+
+defm VSHUFF32X4 : avx512_shuff_packed_128<"vshuff32x4",avx512vl_f32_info, 0x23>,
+      AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
+defm VSHUFF64X2 : avx512_shuff_packed_128<"vshuff64x2",avx512vl_f64_info, 0x23>,
+      AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
+defm VSHUFI32X4 : avx512_shuff_packed_128<"vshufi32x4",avx512vl_i32_info, 0x43>,
+      AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
+defm VSHUFI64X2 : avx512_shuff_packed_128<"vshufi64x2",avx512vl_i64_info, 0x43>,
+      AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
index 488dfc7..bb894bb 100644 (file)
@@ -249,7 +249,8 @@ def X86PShufd  : SDNode<"X86ISD::PSHUFD", SDTShuff2OpI>;
 def X86PShufhw : SDNode<"X86ISD::PSHUFHW", SDTShuff2OpI>;
 def X86PShuflw : SDNode<"X86ISD::PSHUFLW", SDTShuff2OpI>;
 
-def X86Shufp : SDNode<"X86ISD::SHUFP", SDTShuff3OpI>;
+def X86Shufp   : SDNode<"X86ISD::SHUFP", SDTShuff3OpI>;
+def X86Shuf128 : SDNode<"X86ISD::SHUF128", SDTShuff3OpI>;
 
 def X86Movddup  : SDNode<"X86ISD::MOVDDUP", SDTShuff1Op>;
 def X86Movshdup : SDNode<"X86ISD::MOVSHDUP", SDTShuff1Op>;
index d2ec6bd..4232fff 100644 (file)
@@ -8016,3 +8016,243 @@ vpermilpd $0x23, 0x400(%rbx), %zmm2
 // CHECK:  encoding: [0x62,0xe1,0x7d,0x58,0x70,0x9a,0xfc,0xfd,0xff,0xff,0x7b]
           vpshufd $123, -516(%rdx){1to16}, %zmm19
 
+// CHECK: vshuff32x4 $171, %zmm3, %zmm24, %zmm6
+// CHECK:  encoding: [0x62,0xf3,0x3d,0x40,0x23,0xf3,0xab]
+          vshuff32x4 $171, %zmm3, %zmm24, %zmm6
+
+// CHECK: vshuff32x4 $171, %zmm3, %zmm24, %zmm6 {%k2}
+// CHECK:  encoding: [0x62,0xf3,0x3d,0x42,0x23,0xf3,0xab]
+          vshuff32x4 $171, %zmm3, %zmm24, %zmm6 {%k2}
+
+// CHECK: vshuff32x4 $171, %zmm3, %zmm24, %zmm6 {%k2} {z}
+// CHECK:  encoding: [0x62,0xf3,0x3d,0xc2,0x23,0xf3,0xab]
+          vshuff32x4 $171, %zmm3, %zmm24, %zmm6 {%k2} {z}
+
+// CHECK: vshuff32x4 $123, %zmm3, %zmm24, %zmm6
+// CHECK:  encoding: [0x62,0xf3,0x3d,0x40,0x23,0xf3,0x7b]
+          vshuff32x4 $123, %zmm3, %zmm24, %zmm6
+
+// CHECK: vshuff32x4 $123, (%rcx), %zmm24, %zmm6
+// CHECK:  encoding: [0x62,0xf3,0x3d,0x40,0x23,0x31,0x7b]
+          vshuff32x4 $123, (%rcx), %zmm24, %zmm6
+
+// CHECK: vshuff32x4 $123, 291(%rax,%r14,8), %zmm24, %zmm6
+// CHECK:  encoding: [0x62,0xb3,0x3d,0x40,0x23,0xb4,0xf0,0x23,0x01,0x00,0x00,0x7b]
+          vshuff32x4 $123, 291(%rax,%r14,8), %zmm24, %zmm6
+
+// CHECK: vshuff32x4 $123, (%rcx){1to16}, %zmm24, %zmm6
+// CHECK:  encoding: [0x62,0xf3,0x3d,0x50,0x23,0x31,0x7b]
+          vshuff32x4 $123, (%rcx){1to16}, %zmm24, %zmm6
+
+// CHECK: vshuff32x4 $123, 8128(%rdx), %zmm24, %zmm6
+// CHECK:  encoding: [0x62,0xf3,0x3d,0x40,0x23,0x72,0x7f,0x7b]
+          vshuff32x4 $123, 8128(%rdx), %zmm24, %zmm6
+
+// CHECK: vshuff32x4 $123, 8192(%rdx), %zmm24, %zmm6
+// CHECK:  encoding: [0x62,0xf3,0x3d,0x40,0x23,0xb2,0x00,0x20,0x00,0x00,0x7b]
+          vshuff32x4 $123, 8192(%rdx), %zmm24, %zmm6
+
+// CHECK: vshuff32x4 $123, -8192(%rdx), %zmm24, %zmm6
+// CHECK:  encoding: [0x62,0xf3,0x3d,0x40,0x23,0x72,0x80,0x7b]
+          vshuff32x4 $123, -8192(%rdx), %zmm24, %zmm6
+
+// CHECK: vshuff32x4 $123, -8256(%rdx), %zmm24, %zmm6
+// CHECK:  encoding: [0x62,0xf3,0x3d,0x40,0x23,0xb2,0xc0,0xdf,0xff,0xff,0x7b]
+          vshuff32x4 $123, -8256(%rdx), %zmm24, %zmm6
+
+// CHECK: vshuff32x4 $123, 508(%rdx){1to16}, %zmm24, %zmm6
+// CHECK:  encoding: [0x62,0xf3,0x3d,0x50,0x23,0x72,0x7f,0x7b]
+          vshuff32x4 $123, 508(%rdx){1to16}, %zmm24, %zmm6
+
+// CHECK: vshuff32x4 $123, 512(%rdx){1to16}, %zmm24, %zmm6
+// CHECK:  encoding: [0x62,0xf3,0x3d,0x50,0x23,0xb2,0x00,0x02,0x00,0x00,0x7b]
+          vshuff32x4 $123, 512(%rdx){1to16}, %zmm24, %zmm6
+
+// CHECK: vshuff32x4 $123, -512(%rdx){1to16}, %zmm24, %zmm6
+// CHECK:  encoding: [0x62,0xf3,0x3d,0x50,0x23,0x72,0x80,0x7b]
+          vshuff32x4 $123, -512(%rdx){1to16}, %zmm24, %zmm6
+
+// CHECK: vshuff32x4 $123, -516(%rdx){1to16}, %zmm24, %zmm6
+// CHECK:  encoding: [0x62,0xf3,0x3d,0x50,0x23,0xb2,0xfc,0xfd,0xff,0xff,0x7b]
+          vshuff32x4 $123, -516(%rdx){1to16}, %zmm24, %zmm6
+
+// CHECK: vshuff64x2 $171, %zmm11, %zmm25, %zmm15
+// CHECK:  encoding: [0x62,0x53,0xb5,0x40,0x23,0xfb,0xab]
+          vshuff64x2 $171, %zmm11, %zmm25, %zmm15
+
+// CHECK: vshuff64x2 $171, %zmm11, %zmm25, %zmm15 {%k2}
+// CHECK:  encoding: [0x62,0x53,0xb5,0x42,0x23,0xfb,0xab]
+          vshuff64x2 $171, %zmm11, %zmm25, %zmm15 {%k2}
+
+// CHECK: vshuff64x2 $171, %zmm11, %zmm25, %zmm15 {%k2} {z}
+// CHECK:  encoding: [0x62,0x53,0xb5,0xc2,0x23,0xfb,0xab]
+          vshuff64x2 $171, %zmm11, %zmm25, %zmm15 {%k2} {z}
+
+// CHECK: vshuff64x2 $123, %zmm11, %zmm25, %zmm15
+// CHECK:  encoding: [0x62,0x53,0xb5,0x40,0x23,0xfb,0x7b]
+          vshuff64x2 $123, %zmm11, %zmm25, %zmm15
+
+// CHECK: vshuff64x2 $123, (%rcx), %zmm25, %zmm15
+// CHECK:  encoding: [0x62,0x73,0xb5,0x40,0x23,0x39,0x7b]
+          vshuff64x2 $123, (%rcx), %zmm25, %zmm15
+
+// CHECK: vshuff64x2 $123, 291(%rax,%r14,8), %zmm25, %zmm15
+// CHECK:  encoding: [0x62,0x33,0xb5,0x40,0x23,0xbc,0xf0,0x23,0x01,0x00,0x00,0x7b]
+          vshuff64x2 $123, 291(%rax,%r14,8), %zmm25, %zmm15
+
+// CHECK: vshuff64x2 $123, (%rcx){1to8}, %zmm25, %zmm15
+// CHECK:  encoding: [0x62,0x73,0xb5,0x50,0x23,0x39,0x7b]
+          vshuff64x2 $123, (%rcx){1to8}, %zmm25, %zmm15
+
+// CHECK: vshuff64x2 $123, 8128(%rdx), %zmm25, %zmm15
+// CHECK:  encoding: [0x62,0x73,0xb5,0x40,0x23,0x7a,0x7f,0x7b]
+          vshuff64x2 $123, 8128(%rdx), %zmm25, %zmm15
+
+// CHECK: vshuff64x2 $123, 8192(%rdx), %zmm25, %zmm15
+// CHECK:  encoding: [0x62,0x73,0xb5,0x40,0x23,0xba,0x00,0x20,0x00,0x00,0x7b]
+          vshuff64x2 $123, 8192(%rdx), %zmm25, %zmm15
+
+// CHECK: vshuff64x2 $123, -8192(%rdx), %zmm25, %zmm15
+// CHECK:  encoding: [0x62,0x73,0xb5,0x40,0x23,0x7a,0x80,0x7b]
+          vshuff64x2 $123, -8192(%rdx), %zmm25, %zmm15
+
+// CHECK: vshuff64x2 $123, -8256(%rdx), %zmm25, %zmm15
+// CHECK:  encoding: [0x62,0x73,0xb5,0x40,0x23,0xba,0xc0,0xdf,0xff,0xff,0x7b]
+          vshuff64x2 $123, -8256(%rdx), %zmm25, %zmm15
+
+// CHECK: vshuff64x2 $123, 1016(%rdx){1to8}, %zmm25, %zmm15
+// CHECK:  encoding: [0x62,0x73,0xb5,0x50,0x23,0x7a,0x7f,0x7b]
+          vshuff64x2 $123, 1016(%rdx){1to8}, %zmm25, %zmm15
+
+// CHECK: vshuff64x2 $123, 1024(%rdx){1to8}, %zmm25, %zmm15
+// CHECK:  encoding: [0x62,0x73,0xb5,0x50,0x23,0xba,0x00,0x04,0x00,0x00,0x7b]
+          vshuff64x2 $123, 1024(%rdx){1to8}, %zmm25, %zmm15
+
+// CHECK: vshuff64x2 $123, -1024(%rdx){1to8}, %zmm25, %zmm15
+// CHECK:  encoding: [0x62,0x73,0xb5,0x50,0x23,0x7a,0x80,0x7b]
+          vshuff64x2 $123, -1024(%rdx){1to8}, %zmm25, %zmm15
+
+// CHECK: vshuff64x2 $123, -1032(%rdx){1to8}, %zmm25, %zmm15
+// CHECK:  encoding: [0x62,0x73,0xb5,0x50,0x23,0xba,0xf8,0xfb,0xff,0xff,0x7b]
+          vshuff64x2 $123, -1032(%rdx){1to8}, %zmm25, %zmm15
+
+// CHECK: vshufi32x4 $171, %zmm25, %zmm28, %zmm1
+// CHECK:  encoding: [0x62,0x93,0x1d,0x40,0x43,0xc9,0xab]
+          vshufi32x4 $171, %zmm25, %zmm28, %zmm1
+
+// CHECK: vshufi32x4 $171, %zmm25, %zmm28, %zmm1 {%k4}
+// CHECK:  encoding: [0x62,0x93,0x1d,0x44,0x43,0xc9,0xab]
+          vshufi32x4 $171, %zmm25, %zmm28, %zmm1 {%k4}
+
+// CHECK: vshufi32x4 $171, %zmm25, %zmm28, %zmm1 {%k4} {z}
+// CHECK:  encoding: [0x62,0x93,0x1d,0xc4,0x43,0xc9,0xab]
+          vshufi32x4 $171, %zmm25, %zmm28, %zmm1 {%k4} {z}
+
+// CHECK: vshufi32x4 $123, %zmm25, %zmm28, %zmm1
+// CHECK:  encoding: [0x62,0x93,0x1d,0x40,0x43,0xc9,0x7b]
+          vshufi32x4 $123, %zmm25, %zmm28, %zmm1
+
+// CHECK: vshufi32x4 $123, (%rcx), %zmm28, %zmm1
+// CHECK:  encoding: [0x62,0xf3,0x1d,0x40,0x43,0x09,0x7b]
+          vshufi32x4 $123, (%rcx), %zmm28, %zmm1
+
+// CHECK: vshufi32x4 $123, 291(%rax,%r14,8), %zmm28, %zmm1
+// CHECK:  encoding: [0x62,0xb3,0x1d,0x40,0x43,0x8c,0xf0,0x23,0x01,0x00,0x00,0x7b]
+          vshufi32x4 $123, 291(%rax,%r14,8), %zmm28, %zmm1
+
+// CHECK: vshufi32x4 $123, (%rcx){1to16}, %zmm28, %zmm1
+// CHECK:  encoding: [0x62,0xf3,0x1d,0x50,0x43,0x09,0x7b]
+          vshufi32x4 $123, (%rcx){1to16}, %zmm28, %zmm1
+
+// CHECK: vshufi32x4 $123, 8128(%rdx), %zmm28, %zmm1
+// CHECK:  encoding: [0x62,0xf3,0x1d,0x40,0x43,0x4a,0x7f,0x7b]
+          vshufi32x4 $123, 8128(%rdx), %zmm28, %zmm1
+
+// CHECK: vshufi32x4 $123, 8192(%rdx), %zmm28, %zmm1
+// CHECK:  encoding: [0x62,0xf3,0x1d,0x40,0x43,0x8a,0x00,0x20,0x00,0x00,0x7b]
+          vshufi32x4 $123, 8192(%rdx), %zmm28, %zmm1
+
+// CHECK: vshufi32x4 $123, -8192(%rdx), %zmm28, %zmm1
+// CHECK:  encoding: [0x62,0xf3,0x1d,0x40,0x43,0x4a,0x80,0x7b]
+          vshufi32x4 $123, -8192(%rdx), %zmm28, %zmm1
+
+// CHECK: vshufi32x4 $123, -8256(%rdx), %zmm28, %zmm1
+// CHECK:  encoding: [0x62,0xf3,0x1d,0x40,0x43,0x8a,0xc0,0xdf,0xff,0xff,0x7b]
+          vshufi32x4 $123, -8256(%rdx), %zmm28, %zmm1
+
+// CHECK: vshufi32x4 $123, 508(%rdx){1to16}, %zmm28, %zmm1
+// CHECK:  encoding: [0x62,0xf3,0x1d,0x50,0x43,0x4a,0x7f,0x7b]
+          vshufi32x4 $123, 508(%rdx){1to16}, %zmm28, %zmm1
+
+// CHECK: vshufi32x4 $123, 512(%rdx){1to16}, %zmm28, %zmm1
+// CHECK:  encoding: [0x62,0xf3,0x1d,0x50,0x43,0x8a,0x00,0x02,0x00,0x00,0x7b]
+          vshufi32x4 $123, 512(%rdx){1to16}, %zmm28, %zmm1
+
+// CHECK: vshufi32x4 $123, -512(%rdx){1to16}, %zmm28, %zmm1
+// CHECK:  encoding: [0x62,0xf3,0x1d,0x50,0x43,0x4a,0x80,0x7b]
+          vshufi32x4 $123, -512(%rdx){1to16}, %zmm28, %zmm1
+
+// CHECK: vshufi32x4 $123, -516(%rdx){1to16}, %zmm28, %zmm1
+// CHECK:  encoding: [0x62,0xf3,0x1d,0x50,0x43,0x8a,0xfc,0xfd,0xff,0xff,0x7b]
+          vshufi32x4 $123, -516(%rdx){1to16}, %zmm28, %zmm1
+
+// CHECK: vshufi64x2 $171, %zmm19, %zmm16, %zmm3
+// CHECK:  encoding: [0x62,0xb3,0xfd,0x40,0x43,0xdb,0xab]
+          vshufi64x2 $171, %zmm19, %zmm16, %zmm3
+
+// CHECK: vshufi64x2 $171, %zmm19, %zmm16, %zmm3 {%k7}
+// CHECK:  encoding: [0x62,0xb3,0xfd,0x47,0x43,0xdb,0xab]
+          vshufi64x2 $171, %zmm19, %zmm16, %zmm3 {%k7}
+
+// CHECK: vshufi64x2 $171, %zmm19, %zmm16, %zmm3 {%k7} {z}
+// CHECK:  encoding: [0x62,0xb3,0xfd,0xc7,0x43,0xdb,0xab]
+          vshufi64x2 $171, %zmm19, %zmm16, %zmm3 {%k7} {z}
+
+// CHECK: vshufi64x2 $123, %zmm19, %zmm16, %zmm3
+// CHECK:  encoding: [0x62,0xb3,0xfd,0x40,0x43,0xdb,0x7b]
+          vshufi64x2 $123, %zmm19, %zmm16, %zmm3
+
+// CHECK: vshufi64x2 $123, (%rcx), %zmm16, %zmm3
+// CHECK:  encoding: [0x62,0xf3,0xfd,0x40,0x43,0x19,0x7b]
+          vshufi64x2 $123, (%rcx), %zmm16, %zmm3
+
+// CHECK: vshufi64x2 $123, 291(%rax,%r14,8), %zmm16, %zmm3
+// CHECK:  encoding: [0x62,0xb3,0xfd,0x40,0x43,0x9c,0xf0,0x23,0x01,0x00,0x00,0x7b]
+          vshufi64x2 $123, 291(%rax,%r14,8), %zmm16, %zmm3
+
+// CHECK: vshufi64x2 $123, (%rcx){1to8}, %zmm16, %zmm3
+// CHECK:  encoding: [0x62,0xf3,0xfd,0x50,0x43,0x19,0x7b]
+          vshufi64x2 $123, (%rcx){1to8}, %zmm16, %zmm3
+
+// CHECK: vshufi64x2 $123, 8128(%rdx), %zmm16, %zmm3
+// CHECK:  encoding: [0x62,0xf3,0xfd,0x40,0x43,0x5a,0x7f,0x7b]
+          vshufi64x2 $123, 8128(%rdx), %zmm16, %zmm3
+
+// CHECK: vshufi64x2 $123, 8192(%rdx), %zmm16, %zmm3
+// CHECK:  encoding: [0x62,0xf3,0xfd,0x40,0x43,0x9a,0x00,0x20,0x00,0x00,0x7b]
+          vshufi64x2 $123, 8192(%rdx), %zmm16, %zmm3
+
+// CHECK: vshufi64x2 $123, -8192(%rdx), %zmm16, %zmm3
+// CHECK:  encoding: [0x62,0xf3,0xfd,0x40,0x43,0x5a,0x80,0x7b]
+          vshufi64x2 $123, -8192(%rdx), %zmm16, %zmm3
+
+// CHECK: vshufi64x2 $123, -8256(%rdx), %zmm16, %zmm3
+// CHECK:  encoding: [0x62,0xf3,0xfd,0x40,0x43,0x9a,0xc0,0xdf,0xff,0xff,0x7b]
+          vshufi64x2 $123, -8256(%rdx), %zmm16, %zmm3
+
+// CHECK: vshufi64x2 $123, 1016(%rdx){1to8}, %zmm16, %zmm3
+// CHECK:  encoding: [0x62,0xf3,0xfd,0x50,0x43,0x5a,0x7f,0x7b]
+          vshufi64x2 $123, 1016(%rdx){1to8}, %zmm16, %zmm3
+
+// CHECK: vshufi64x2 $123, 1024(%rdx){1to8}, %zmm16, %zmm3
+// CHECK:  encoding: [0x62,0xf3,0xfd,0x50,0x43,0x9a,0x00,0x04,0x00,0x00,0x7b]
+          vshufi64x2 $123, 1024(%rdx){1to8}, %zmm16, %zmm3
+
+// CHECK: vshufi64x2 $123, -1024(%rdx){1to8}, %zmm16, %zmm3
+// CHECK:  encoding: [0x62,0xf3,0xfd,0x50,0x43,0x5a,0x80,0x7b]
+          vshufi64x2 $123, -1024(%rdx){1to8}, %zmm16, %zmm3
+
+// CHECK: vshufi64x2 $123, -1032(%rdx){1to8}, %zmm16, %zmm3
+// CHECK:  encoding: [0x62,0xf3,0xfd,0x50,0x43,0x9a,0xf8,0xfb,0xff,0xff,0x7b]
+          vshufi64x2 $123, -1032(%rdx){1to8}, %zmm16, %zmm3
+
index 24caa45..dd2a49d 100644 (file)
@@ -10549,3 +10549,243 @@ vaddpd  {rz-sae}, %zmm2, %zmm1, %zmm1
 // CHECK:  encoding: [0x62,0xe1,0x7d,0x38,0x70,0xa2,0xfc,0xfd,0xff,0xff,0x7b]
           vpshufd $123, -516(%rdx){1to8}, %ymm20
 
+// CHECK: vshuff32x4 $171, %ymm18, %ymm27, %ymm29
+// CHECK:  encoding: [0x62,0x23,0x25,0x20,0x23,0xea,0xab]
+          vshuff32x4 $0xab, %ymm18, %ymm27, %ymm29
+
+// CHECK: vshuff32x4 $171, %ymm18, %ymm27, %ymm29 {%k7}
+// CHECK:  encoding: [0x62,0x23,0x25,0x27,0x23,0xea,0xab]
+          vshuff32x4 $0xab, %ymm18, %ymm27, %ymm29 {%k7}
+
+// CHECK: vshuff32x4 $171, %ymm18, %ymm27, %ymm29 {%k7} {z}
+// CHECK:  encoding: [0x62,0x23,0x25,0xa7,0x23,0xea,0xab]
+          vshuff32x4 $0xab, %ymm18, %ymm27, %ymm29 {%k7} {z}
+
+// CHECK: vshuff32x4 $123, %ymm18, %ymm27, %ymm29
+// CHECK:  encoding: [0x62,0x23,0x25,0x20,0x23,0xea,0x7b]
+          vshuff32x4 $0x7b, %ymm18, %ymm27, %ymm29
+
+// CHECK: vshuff32x4 $123, (%rcx), %ymm27, %ymm29
+// CHECK:  encoding: [0x62,0x63,0x25,0x20,0x23,0x29,0x7b]
+          vshuff32x4 $0x7b, (%rcx), %ymm27, %ymm29
+
+// CHECK: vshuff32x4 $123, 291(%rax,%r14,8), %ymm27, %ymm29
+// CHECK:  encoding: [0x62,0x23,0x25,0x20,0x23,0xac,0xf0,0x23,0x01,0x00,0x00,0x7b]
+          vshuff32x4 $0x7b, 291(%rax,%r14,8), %ymm27, %ymm29
+
+// CHECK: vshuff32x4 $123, (%rcx){1to8}, %ymm27, %ymm29
+// CHECK:  encoding: [0x62,0x63,0x25,0x30,0x23,0x29,0x7b]
+          vshuff32x4 $0x7b, (%rcx){1to8}, %ymm27, %ymm29
+
+// CHECK: vshuff32x4 $123, 4064(%rdx), %ymm27, %ymm29
+// CHECK:  encoding: [0x62,0x63,0x25,0x20,0x23,0x6a,0x7f,0x7b]
+          vshuff32x4 $0x7b, 4064(%rdx), %ymm27, %ymm29
+
+// CHECK: vshuff32x4 $123, 4096(%rdx), %ymm27, %ymm29
+// CHECK:  encoding: [0x62,0x63,0x25,0x20,0x23,0xaa,0x00,0x10,0x00,0x00,0x7b]
+          vshuff32x4 $0x7b, 4096(%rdx), %ymm27, %ymm29
+
+// CHECK: vshuff32x4 $123, -4096(%rdx), %ymm27, %ymm29
+// CHECK:  encoding: [0x62,0x63,0x25,0x20,0x23,0x6a,0x80,0x7b]
+          vshuff32x4 $0x7b, -4096(%rdx), %ymm27, %ymm29
+
+// CHECK: vshuff32x4 $123, -4128(%rdx), %ymm27, %ymm29
+// CHECK:  encoding: [0x62,0x63,0x25,0x20,0x23,0xaa,0xe0,0xef,0xff,0xff,0x7b]
+          vshuff32x4 $0x7b, -4128(%rdx), %ymm27, %ymm29
+
+// CHECK: vshuff32x4 $123, 508(%rdx){1to8}, %ymm27, %ymm29
+// CHECK:  encoding: [0x62,0x63,0x25,0x30,0x23,0x6a,0x7f,0x7b]
+          vshuff32x4 $0x7b, 508(%rdx){1to8}, %ymm27, %ymm29
+
+// CHECK: vshuff32x4 $123, 512(%rdx){1to8}, %ymm27, %ymm29
+// CHECK:  encoding: [0x62,0x63,0x25,0x30,0x23,0xaa,0x00,0x02,0x00,0x00,0x7b]
+          vshuff32x4 $0x7b, 512(%rdx){1to8}, %ymm27, %ymm29
+
+// CHECK: vshuff32x4 $123, -512(%rdx){1to8}, %ymm27, %ymm29
+// CHECK:  encoding: [0x62,0x63,0x25,0x30,0x23,0x6a,0x80,0x7b]
+          vshuff32x4 $0x7b, -512(%rdx){1to8}, %ymm27, %ymm29
+
+// CHECK: vshuff32x4 $123, -516(%rdx){1to8}, %ymm27, %ymm29
+// CHECK:  encoding: [0x62,0x63,0x25,0x30,0x23,0xaa,0xfc,0xfd,0xff,0xff,0x7b]
+          vshuff32x4 $0x7b, -516(%rdx){1to8}, %ymm27, %ymm29
+
+// CHECK: vshuff64x2 $171, %ymm20, %ymm18, %ymm18
+// CHECK:  encoding: [0x62,0xa3,0xed,0x20,0x23,0xd4,0xab]
+          vshuff64x2 $0xab, %ymm20, %ymm18, %ymm18
+
+// CHECK: vshuff64x2 $171, %ymm20, %ymm18, %ymm18 {%k5}
+// CHECK:  encoding: [0x62,0xa3,0xed,0x25,0x23,0xd4,0xab]
+          vshuff64x2 $0xab, %ymm20, %ymm18, %ymm18 {%k5}
+
+// CHECK: vshuff64x2 $171, %ymm20, %ymm18, %ymm18 {%k5} {z}
+// CHECK:  encoding: [0x62,0xa3,0xed,0xa5,0x23,0xd4,0xab]
+          vshuff64x2 $0xab, %ymm20, %ymm18, %ymm18 {%k5} {z}
+
+// CHECK: vshuff64x2 $123, %ymm20, %ymm18, %ymm18
+// CHECK:  encoding: [0x62,0xa3,0xed,0x20,0x23,0xd4,0x7b]
+          vshuff64x2 $0x7b, %ymm20, %ymm18, %ymm18
+
+// CHECK: vshuff64x2 $123, (%rcx), %ymm18, %ymm18
+// CHECK:  encoding: [0x62,0xe3,0xed,0x20,0x23,0x11,0x7b]
+          vshuff64x2 $0x7b, (%rcx), %ymm18, %ymm18
+
+// CHECK: vshuff64x2 $123, 291(%rax,%r14,8), %ymm18, %ymm18
+// CHECK:  encoding: [0x62,0xa3,0xed,0x20,0x23,0x94,0xf0,0x23,0x01,0x00,0x00,0x7b]
+          vshuff64x2 $0x7b, 291(%rax,%r14,8), %ymm18, %ymm18
+
+// CHECK: vshuff64x2 $123, (%rcx){1to4}, %ymm18, %ymm18
+// CHECK:  encoding: [0x62,0xe3,0xed,0x30,0x23,0x11,0x7b]
+          vshuff64x2 $0x7b, (%rcx){1to4}, %ymm18, %ymm18
+
+// CHECK: vshuff64x2 $123, 4064(%rdx), %ymm18, %ymm18
+// CHECK:  encoding: [0x62,0xe3,0xed,0x20,0x23,0x52,0x7f,0x7b]
+          vshuff64x2 $0x7b, 4064(%rdx), %ymm18, %ymm18
+
+// CHECK: vshuff64x2 $123, 4096(%rdx), %ymm18, %ymm18
+// CHECK:  encoding: [0x62,0xe3,0xed,0x20,0x23,0x92,0x00,0x10,0x00,0x00,0x7b]
+          vshuff64x2 $0x7b, 4096(%rdx), %ymm18, %ymm18
+
+// CHECK: vshuff64x2 $123, -4096(%rdx), %ymm18, %ymm18
+// CHECK:  encoding: [0x62,0xe3,0xed,0x20,0x23,0x52,0x80,0x7b]
+          vshuff64x2 $0x7b, -4096(%rdx), %ymm18, %ymm18
+
+// CHECK: vshuff64x2 $123, -4128(%rdx), %ymm18, %ymm18
+// CHECK:  encoding: [0x62,0xe3,0xed,0x20,0x23,0x92,0xe0,0xef,0xff,0xff,0x7b]
+          vshuff64x2 $0x7b, -4128(%rdx), %ymm18, %ymm18
+
+// CHECK: vshuff64x2 $123, 1016(%rdx){1to4}, %ymm18, %ymm18
+// CHECK:  encoding: [0x62,0xe3,0xed,0x30,0x23,0x52,0x7f,0x7b]
+          vshuff64x2 $0x7b, 1016(%rdx){1to4}, %ymm18, %ymm18
+
+// CHECK: vshuff64x2 $123, 1024(%rdx){1to4}, %ymm18, %ymm18
+// CHECK:  encoding: [0x62,0xe3,0xed,0x30,0x23,0x92,0x00,0x04,0x00,0x00,0x7b]
+          vshuff64x2 $0x7b, 1024(%rdx){1to4}, %ymm18, %ymm18
+
+// CHECK: vshuff64x2 $123, -1024(%rdx){1to4}, %ymm18, %ymm18
+// CHECK:  encoding: [0x62,0xe3,0xed,0x30,0x23,0x52,0x80,0x7b]
+          vshuff64x2 $0x7b, -1024(%rdx){1to4}, %ymm18, %ymm18
+
+// CHECK: vshuff64x2 $123, -1032(%rdx){1to4}, %ymm18, %ymm18
+// CHECK:  encoding: [0x62,0xe3,0xed,0x30,0x23,0x92,0xf8,0xfb,0xff,0xff,0x7b]
+          vshuff64x2 $0x7b, -1032(%rdx){1to4}, %ymm18, %ymm18
+
+// CHECK: vshufi32x4 $171, %ymm17, %ymm27, %ymm18
+// CHECK:  encoding: [0x62,0xa3,0x25,0x20,0x43,0xd1,0xab]
+          vshufi32x4 $0xab, %ymm17, %ymm27, %ymm18
+
+// CHECK: vshufi32x4 $171, %ymm17, %ymm27, %ymm18 {%k7}
+// CHECK:  encoding: [0x62,0xa3,0x25,0x27,0x43,0xd1,0xab]
+          vshufi32x4 $0xab, %ymm17, %ymm27, %ymm18 {%k7}
+
+// CHECK: vshufi32x4 $171, %ymm17, %ymm27, %ymm18 {%k7} {z}
+// CHECK:  encoding: [0x62,0xa3,0x25,0xa7,0x43,0xd1,0xab]
+          vshufi32x4 $0xab, %ymm17, %ymm27, %ymm18 {%k7} {z}
+
+// CHECK: vshufi32x4 $123, %ymm17, %ymm27, %ymm18
+// CHECK:  encoding: [0x62,0xa3,0x25,0x20,0x43,0xd1,0x7b]
+          vshufi32x4 $0x7b, %ymm17, %ymm27, %ymm18
+
+// CHECK: vshufi32x4 $123, (%rcx), %ymm27, %ymm18
+// CHECK:  encoding: [0x62,0xe3,0x25,0x20,0x43,0x11,0x7b]
+          vshufi32x4 $0x7b, (%rcx), %ymm27, %ymm18
+
+// CHECK: vshufi32x4 $123, 291(%rax,%r14,8), %ymm27, %ymm18
+// CHECK:  encoding: [0x62,0xa3,0x25,0x20,0x43,0x94,0xf0,0x23,0x01,0x00,0x00,0x7b]
+          vshufi32x4 $0x7b, 291(%rax,%r14,8), %ymm27, %ymm18
+
+// CHECK: vshufi32x4 $123, (%rcx){1to8}, %ymm27, %ymm18
+// CHECK:  encoding: [0x62,0xe3,0x25,0x30,0x43,0x11,0x7b]
+          vshufi32x4 $0x7b, (%rcx){1to8}, %ymm27, %ymm18
+
+// CHECK: vshufi32x4 $123, 4064(%rdx), %ymm27, %ymm18
+// CHECK:  encoding: [0x62,0xe3,0x25,0x20,0x43,0x52,0x7f,0x7b]
+          vshufi32x4 $0x7b, 4064(%rdx), %ymm27, %ymm18
+
+// CHECK: vshufi32x4 $123, 4096(%rdx), %ymm27, %ymm18
+// CHECK:  encoding: [0x62,0xe3,0x25,0x20,0x43,0x92,0x00,0x10,0x00,0x00,0x7b]
+          vshufi32x4 $0x7b, 4096(%rdx), %ymm27, %ymm18
+
+// CHECK: vshufi32x4 $123, -4096(%rdx), %ymm27, %ymm18
+// CHECK:  encoding: [0x62,0xe3,0x25,0x20,0x43,0x52,0x80,0x7b]
+          vshufi32x4 $0x7b, -4096(%rdx), %ymm27, %ymm18
+
+// CHECK: vshufi32x4 $123, -4128(%rdx), %ymm27, %ymm18
+// CHECK:  encoding: [0x62,0xe3,0x25,0x20,0x43,0x92,0xe0,0xef,0xff,0xff,0x7b]
+          vshufi32x4 $0x7b, -4128(%rdx), %ymm27, %ymm18
+
+// CHECK: vshufi32x4 $123, 508(%rdx){1to8}, %ymm27, %ymm18
+// CHECK:  encoding: [0x62,0xe3,0x25,0x30,0x43,0x52,0x7f,0x7b]
+          vshufi32x4 $0x7b, 508(%rdx){1to8}, %ymm27, %ymm18
+
+// CHECK: vshufi32x4 $123, 512(%rdx){1to8}, %ymm27, %ymm18
+// CHECK:  encoding: [0x62,0xe3,0x25,0x30,0x43,0x92,0x00,0x02,0x00,0x00,0x7b]
+          vshufi32x4 $0x7b, 512(%rdx){1to8}, %ymm27, %ymm18
+
+// CHECK: vshufi32x4 $123, -512(%rdx){1to8}, %ymm27, %ymm18
+// CHECK:  encoding: [0x62,0xe3,0x25,0x30,0x43,0x52,0x80,0x7b]
+          vshufi32x4 $0x7b, -512(%rdx){1to8}, %ymm27, %ymm18
+
+// CHECK: vshufi32x4 $123, -516(%rdx){1to8}, %ymm27, %ymm18
+// CHECK:  encoding: [0x62,0xe3,0x25,0x30,0x43,0x92,0xfc,0xfd,0xff,0xff,0x7b]
+          vshufi32x4 $0x7b, -516(%rdx){1to8}, %ymm27, %ymm18
+
+// CHECK: vshufi64x2 $171, %ymm21, %ymm26, %ymm25
+// CHECK:  encoding: [0x62,0x23,0xad,0x20,0x43,0xcd,0xab]
+          vshufi64x2 $0xab, %ymm21, %ymm26, %ymm25
+
+// CHECK: vshufi64x2 $171, %ymm21, %ymm26, %ymm25 {%k3}
+// CHECK:  encoding: [0x62,0x23,0xad,0x23,0x43,0xcd,0xab]
+          vshufi64x2 $0xab, %ymm21, %ymm26, %ymm25 {%k3}
+
+// CHECK: vshufi64x2 $171, %ymm21, %ymm26, %ymm25 {%k3} {z}
+// CHECK:  encoding: [0x62,0x23,0xad,0xa3,0x43,0xcd,0xab]
+          vshufi64x2 $0xab, %ymm21, %ymm26, %ymm25 {%k3} {z}
+
+// CHECK: vshufi64x2 $123, %ymm21, %ymm26, %ymm25
+// CHECK:  encoding: [0x62,0x23,0xad,0x20,0x43,0xcd,0x7b]
+          vshufi64x2 $0x7b, %ymm21, %ymm26, %ymm25
+
+// CHECK: vshufi64x2 $123, (%rcx), %ymm26, %ymm25
+// CHECK:  encoding: [0x62,0x63,0xad,0x20,0x43,0x09,0x7b]
+          vshufi64x2 $0x7b, (%rcx), %ymm26, %ymm25
+
+// CHECK: vshufi64x2 $123, 291(%rax,%r14,8), %ymm26, %ymm25
+// CHECK:  encoding: [0x62,0x23,0xad,0x20,0x43,0x8c,0xf0,0x23,0x01,0x00,0x00,0x7b]
+          vshufi64x2 $0x7b, 291(%rax,%r14,8), %ymm26, %ymm25
+
+// CHECK: vshufi64x2 $123, (%rcx){1to4}, %ymm26, %ymm25
+// CHECK:  encoding: [0x62,0x63,0xad,0x30,0x43,0x09,0x7b]
+          vshufi64x2 $0x7b, (%rcx){1to4}, %ymm26, %ymm25
+
+// CHECK: vshufi64x2 $123, 4064(%rdx), %ymm26, %ymm25
+// CHECK:  encoding: [0x62,0x63,0xad,0x20,0x43,0x4a,0x7f,0x7b]
+          vshufi64x2 $0x7b, 4064(%rdx), %ymm26, %ymm25
+
+// CHECK: vshufi64x2 $123, 4096(%rdx), %ymm26, %ymm25
+// CHECK:  encoding: [0x62,0x63,0xad,0x20,0x43,0x8a,0x00,0x10,0x00,0x00,0x7b]
+          vshufi64x2 $0x7b, 4096(%rdx), %ymm26, %ymm25
+
+// CHECK: vshufi64x2 $123, -4096(%rdx), %ymm26, %ymm25
+// CHECK:  encoding: [0x62,0x63,0xad,0x20,0x43,0x4a,0x80,0x7b]
+          vshufi64x2 $0x7b, -4096(%rdx), %ymm26, %ymm25
+
+// CHECK: vshufi64x2 $123, -4128(%rdx), %ymm26, %ymm25
+// CHECK:  encoding: [0x62,0x63,0xad,0x20,0x43,0x8a,0xe0,0xef,0xff,0xff,0x7b]
+          vshufi64x2 $0x7b, -4128(%rdx), %ymm26, %ymm25
+
+// CHECK: vshufi64x2 $123, 1016(%rdx){1to4}, %ymm26, %ymm25
+// CHECK:  encoding: [0x62,0x63,0xad,0x30,0x43,0x4a,0x7f,0x7b]
+          vshufi64x2 $0x7b, 1016(%rdx){1to4}, %ymm26, %ymm25
+
+// CHECK: vshufi64x2 $123, 1024(%rdx){1to4}, %ymm26, %ymm25
+// CHECK:  encoding: [0x62,0x63,0xad,0x30,0x43,0x8a,0x00,0x04,0x00,0x00,0x7b]
+          vshufi64x2 $0x7b, 1024(%rdx){1to4}, %ymm26, %ymm25
+
+// CHECK: vshufi64x2 $123, -1024(%rdx){1to4}, %ymm26, %ymm25
+// CHECK:  encoding: [0x62,0x63,0xad,0x30,0x43,0x4a,0x80,0x7b]
+          vshufi64x2 $0x7b, -1024(%rdx){1to4}, %ymm26, %ymm25
+
+// CHECK: vshufi64x2 $123, -1032(%rdx){1to4}, %ymm26, %ymm25
+// CHECK:  encoding: [0x62,0x63,0xad,0x30,0x43,0x8a,0xf8,0xfb,0xff,0xff,0x7b]
+          vshufi64x2 $0x7b, -1032(%rdx){1to4}, %ymm26, %ymm25
+