AVX-512: Added all forms of VPABS instruction
authorElena Demikhovsky <elena.demikhovsky@intel.com>
Tue, 23 Jun 2015 08:19:46 +0000 (08:19 +0000)
committerElena Demikhovsky <elena.demikhovsky@intel.com>
Tue, 23 Jun 2015 08:19:46 +0000 (08:19 +0000)
Added all intrinsics, tests for encoding, tests for intrinsics.

llvm-svn: 240386

13 files changed:
llvm/include/llvm/IR/IntrinsicsX86.td
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/lib/Target/X86/X86ISelLowering.h
llvm/lib/Target/X86/X86InstrAVX512.td
llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
llvm/lib/Target/X86/X86IntrinsicsInfo.h
llvm/test/CodeGen/X86/avx512-intrinsics.ll
llvm/test/CodeGen/X86/avx512bw-intrinsics.ll
llvm/test/CodeGen/X86/avx512bwvl-intrinsics.ll
llvm/test/CodeGen/X86/avx512vl-intrinsics.ll
llvm/test/MC/X86/avx512-encodings.s
llvm/test/MC/X86/avx512vl-encoding.s
llvm/test/MC/X86/x86-64-avx512bw.s

index 8e1d668..8caedfa 100644 (file)
@@ -1417,6 +1417,7 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
           [IntrNoMem]>;
 }
 
+
 // Vector blend
 let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
   def int_x86_avx_blendv_pd_256 : GCCBuiltin<"__builtin_ia32_blendvpd256">,
@@ -1981,12 +1982,78 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
               Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty], [IntrNoMem]>;
   def int_x86_avx2_pabs_d : GCCBuiltin<"__builtin_ia32_pabsd256">,
               Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_pabs_d_512 : GCCBuiltin<"__builtin_ia32_pabsd512_mask">,
-              Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_v16i32_ty,
-                                           llvm_i16_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_pabs_q_512 : GCCBuiltin<"__builtin_ia32_pabsq512_mask">,
-              Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_v8i64_ty,
-                                          llvm_i8_ty], [IntrNoMem]>;
+
+  def int_x86_avx512_mask_pabs_b_128 : 
+       GCCBuiltin<"__builtin_ia32_pabsb128_mask">,
+        Intrinsic<[llvm_v16i8_ty],
+        [llvm_v16i8_ty, llvm_v16i8_ty,  llvm_i16_ty],
+        [IntrNoMem]>;
+
+  def int_x86_avx512_mask_pabs_b_256 : 
+        GCCBuiltin<"__builtin_ia32_pabsb256_mask">,
+          Intrinsic<[llvm_v32i8_ty],
+          [llvm_v32i8_ty, llvm_v32i8_ty,  llvm_i32_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_pabs_b_512 : 
+        GCCBuiltin<"__builtin_ia32_pabsb512_mask">,
+          Intrinsic<[llvm_v64i8_ty],
+          [llvm_v64i8_ty, llvm_v64i8_ty,  llvm_i64_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_pabs_d_128 : 
+        GCCBuiltin<"__builtin_ia32_pabsd128_mask">,
+          Intrinsic<[llvm_v4i32_ty],
+          [llvm_v4i32_ty, llvm_v4i32_ty,  llvm_i8_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_pabs_d_256 : 
+        GCCBuiltin<"__builtin_ia32_pabsd256_mask">,
+          Intrinsic<[llvm_v8i32_ty],
+          [llvm_v8i32_ty, llvm_v8i32_ty,  llvm_i8_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_pabs_d_512 : 
+        GCCBuiltin<"__builtin_ia32_pabsd512_mask">,
+          Intrinsic<[llvm_v16i32_ty],
+          [llvm_v16i32_ty, llvm_v16i32_ty,  llvm_i16_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_pabs_q_128 : 
+        GCCBuiltin<"__builtin_ia32_pabsq128_mask">,
+          Intrinsic<[llvm_v2i64_ty],
+          [llvm_v2i64_ty, llvm_v2i64_ty,  llvm_i8_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_pabs_q_256 : 
+        GCCBuiltin<"__builtin_ia32_pabsq256_mask">,
+          Intrinsic<[llvm_v4i64_ty],
+          [llvm_v4i64_ty, llvm_v4i64_ty,  llvm_i8_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_pabs_q_512 : 
+        GCCBuiltin<"__builtin_ia32_pabsq512_mask">,
+          Intrinsic<[llvm_v8i64_ty],
+          [llvm_v8i64_ty, llvm_v8i64_ty,  llvm_i8_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_pabs_w_128 : 
+        GCCBuiltin<"__builtin_ia32_pabsw128_mask">,
+          Intrinsic<[llvm_v8i16_ty],
+          [llvm_v8i16_ty, llvm_v8i16_ty,  llvm_i8_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_pabs_w_256 : 
+        GCCBuiltin<"__builtin_ia32_pabsw256_mask">,
+          Intrinsic<[llvm_v16i16_ty],
+          [llvm_v16i16_ty, llvm_v16i16_ty,  llvm_i16_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_pabs_w_512 : 
+        GCCBuiltin<"__builtin_ia32_pabsw512_mask">,
+          Intrinsic<[llvm_v32i16_ty],
+          [llvm_v32i16_ty, llvm_v32i16_ty,  llvm_i32_ty],
+          [IntrNoMem]>;
 }
 
 // Horizontal arithmetic ops
index d7d8e1e..ccf75f3 100644 (file)
@@ -18334,6 +18334,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
   case X86ISD::UMIN:               return "X86ISD::UMIN";
   case X86ISD::SMAX:               return "X86ISD::SMAX";
   case X86ISD::SMIN:               return "X86ISD::SMIN";
+  case X86ISD::ABS:                return "X86ISD::ABS";
   case X86ISD::FMAX:               return "X86ISD::FMAX";
   case X86ISD::FMAX_RND:           return "X86ISD::FMAX_RND";
   case X86ISD::FMIN:               return "X86ISD::FMIN";
index 9c98333..fdf0e59 100644 (file)
@@ -238,6 +238,9 @@ namespace llvm {
       /// Signed integer max and min.
       SMAX, SMIN,
 
+      // Integer absolute value
+      ABS,
+
       /// Floating point max and min.
       FMAX, FMIN,
 
index 56c5a63..b4ccbfb 100644 (file)
@@ -5612,77 +5612,6 @@ def : Pat<(v8i64 (X86Shufp VR512:$src1,
 def v16i1sextv16i32  : PatLeaf<(v16i32 (X86vsrai VR512:$src, (i8 31)))>;
 def v8i1sextv8i64  : PatLeaf<(v8i64 (X86vsrai VR512:$src, (i8 63)))>;
 
-multiclass avx512_vpabs<bits<8> opc, string OpcodeStr, ValueType OpVT,
-                        RegisterClass KRC, RegisterClass RC,
-                        X86MemOperand x86memop, X86MemOperand x86scalar_mop,
-                        string BrdcstStr> {
-  def rr : AVX5128I<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src),
-            !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
-            []>, EVEX;
-  def rrk : AVX5128I<opc, MRMSrcReg, (outs RC:$dst), (ins KRC:$mask, RC:$src),
-             !strconcat(OpcodeStr, "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}"),
-             []>, EVEX, EVEX_K;
-  def rrkz : AVX5128I<opc, MRMSrcReg, (outs RC:$dst), (ins KRC:$mask, RC:$src),
-              !strconcat(OpcodeStr,
-                         "\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}"),
-              []>, EVEX, EVEX_KZ;
-  let mayLoad = 1 in {
-    def rm : AVX5128I<opc, MRMSrcMem, (outs VR512:$dst),
-              (ins x86memop:$src),
-              !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
-              []>, EVEX;
-    def rmk : AVX5128I<opc, MRMSrcMem, (outs VR512:$dst),
-               (ins KRC:$mask, x86memop:$src),
-               !strconcat(OpcodeStr,
-                          "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}"),
-               []>, EVEX, EVEX_K;
-    def rmkz : AVX5128I<opc, MRMSrcMem, (outs VR512:$dst),
-                (ins KRC:$mask, x86memop:$src),
-                !strconcat(OpcodeStr,
-                           "\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}"),
-                []>, EVEX, EVEX_KZ;
-    def rmb : AVX5128I<opc, MRMSrcMem, (outs VR512:$dst),
-               (ins x86scalar_mop:$src),
-               !strconcat(OpcodeStr, "\t{${src}", BrdcstStr,
-                          ", $dst|$dst, ${src}", BrdcstStr, "}"),
-               []>, EVEX, EVEX_B;
-    def rmbk : AVX5128I<opc, MRMSrcMem, (outs VR512:$dst),
-                (ins KRC:$mask, x86scalar_mop:$src),
-                !strconcat(OpcodeStr, "\t{${src}", BrdcstStr,
-                           ", $dst {${mask}}|$dst {${mask}}, ${src}", BrdcstStr, "}"),
-                []>, EVEX, EVEX_B, EVEX_K;
-    def rmbkz : AVX5128I<opc, MRMSrcMem, (outs VR512:$dst),
-                 (ins KRC:$mask, x86scalar_mop:$src),
-                 !strconcat(OpcodeStr, "\t{${src}", BrdcstStr,
-                            ", $dst {${mask}} {z}|$dst {${mask}} {z}, ${src}",
-                            BrdcstStr, "}"),
-                 []>, EVEX, EVEX_B, EVEX_KZ;
-  }
-}
-
-defm VPABSDZ : avx512_vpabs<0x1E, "vpabsd", v16i32, VK16WM, VR512,
-                           i512mem, i32mem, "{1to16}">, EVEX_V512,
-                           EVEX_CD8<32, CD8VF>;
-defm VPABSQZ : avx512_vpabs<0x1F, "vpabsq", v8i64, VK8WM, VR512,
-                           i512mem, i64mem, "{1to8}">, EVEX_V512, VEX_W,
-                           EVEX_CD8<64, CD8VF>;
-
-def : Pat<(xor
-          (bc_v16i32 (v16i1sextv16i32)),
-          (bc_v16i32 (add (v16i32 VR512:$src), (v16i1sextv16i32)))),
-          (VPABSDZrr VR512:$src)>;
-def : Pat<(xor
-          (bc_v8i64 (v8i1sextv8i64)),
-          (bc_v8i64 (add (v8i64 VR512:$src), (v8i1sextv8i64)))),
-          (VPABSQZrr VR512:$src)>;
-
-def : Pat<(v16i32 (int_x86_avx512_mask_pabs_d_512 (v16i32 VR512:$src),
-                   (v16i32 immAllZerosV), (i16 -1))),
-          (VPABSDZrr VR512:$src)>;
-def : Pat<(v8i64 (int_x86_avx512_mask_pabs_q_512 (v8i64 VR512:$src),
-                   (bc_v8i64 (v16i32 immAllZerosV)), (i8 -1))),
-          (VPABSQZrr VR512:$src)>;
-
 multiclass avx512_conflict<bits<8> opc, string OpcodeStr,
                         RegisterClass RC, RegisterClass KRC,
                         X86MemOperand x86memop,
@@ -6165,3 +6094,91 @@ defm VALIGND: avx512_valign<"valignd", avx512vl_i32_info, avx512vl_f32_info>,
                                                   EVEX_CD8<32, CD8VF>;
 defm VALIGNQ: avx512_valign<"valignq", avx512vl_i64_info, avx512vl_f64_info>,
                                                   EVEX_CD8<64, CD8VF>, VEX_W;
+
+multiclass avx512_unary_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
+                           X86VectorVTInfo _> {
+  defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
+                    (ins _.RC:$src1), OpcodeStr##_.Suffix,
+                    "$src1", "$src1",
+                    (_.VT (OpNode _.RC:$src1))>, EVEX, AVX5128IBase;
+
+  let mayLoad = 1 in
+    defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
+                    (ins _.MemOp:$src1), OpcodeStr##_.Suffix,
+                    "$src1", "$src1",
+                    (_.VT (OpNode (bitconvert (_.LdFrag addr:$src1))))>,
+              EVEX, AVX5128IBase, EVEX_CD8<_.EltSize, CD8VF>;
+}
+
+multiclass avx512_unary_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
+                            X86VectorVTInfo _> :
+           avx512_unary_rm<opc, OpcodeStr, OpNode, _> {
+  let mayLoad = 1 in
+    defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
+                    (ins _.ScalarMemOp:$src1), OpcodeStr##_.Suffix,
+                    "${src1}"##_.BroadcastStr,
+                    "${src1}"##_.BroadcastStr,
+                    (_.VT (OpNode (X86VBroadcast
+                                      (_.ScalarLdFrag addr:$src1))))>,
+               EVEX, AVX5128IBase, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>;
+}
+
+multiclass avx512_unary_rm_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
+                              AVX512VLVectorVTInfo VTInfo, Predicate prd> {
+  let Predicates = [prd] in
+    defm Z : avx512_unary_rm<opc, OpcodeStr, OpNode, VTInfo.info512>, EVEX_V512;
+
+  let Predicates = [prd, HasVLX] in {
+    defm Z256 : avx512_unary_rm<opc, OpcodeStr, OpNode, VTInfo.info256>,
+                              EVEX_V256;
+    defm Z128 : avx512_unary_rm<opc, OpcodeStr, OpNode, VTInfo.info128>,
+                              EVEX_V128;
+  }
+}
+
+multiclass avx512_unary_rmb_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
+                               AVX512VLVectorVTInfo VTInfo, Predicate prd> {
+  let Predicates = [prd] in
+    defm Z : avx512_unary_rmb<opc, OpcodeStr, OpNode, VTInfo.info512>,
+                              EVEX_V512;
+
+  let Predicates = [prd, HasVLX] in {
+    defm Z256 : avx512_unary_rmb<opc, OpcodeStr, OpNode, VTInfo.info256>,
+                                 EVEX_V256;
+    defm Z128 : avx512_unary_rmb<opc, OpcodeStr, OpNode, VTInfo.info128>,
+                                 EVEX_V128;
+  }
+}
+
+multiclass avx512_unary_rm_vl_dq<bits<8> opc_d, bits<8> opc_q, string OpcodeStr,
+                                 SDNode OpNode, Predicate prd> {
+  defm Q : avx512_unary_rmb_vl<opc_q, OpcodeStr, OpNode, avx512vl_i64_info,
+                               prd>, VEX_W;
+  defm D : avx512_unary_rmb_vl<opc_d, OpcodeStr, OpNode, avx512vl_i32_info, prd>;
+}
+
+multiclass avx512_unary_rm_vl_bw<bits<8> opc_b, bits<8> opc_w, string OpcodeStr,
+                                 SDNode OpNode, Predicate prd> {
+  defm W : avx512_unary_rm_vl<opc_w, OpcodeStr, OpNode, avx512vl_i16_info, prd>;
+  defm B : avx512_unary_rm_vl<opc_b, OpcodeStr, OpNode, avx512vl_i8_info, prd>;
+}
+
+multiclass avx512_unary_rm_vl_all<bits<8> opc_b, bits<8> opc_w,
+                                  bits<8> opc_d, bits<8> opc_q,
+                                  string OpcodeStr, SDNode OpNode> {
+  defm NAME : avx512_unary_rm_vl_dq<opc_d, opc_q, OpcodeStr, OpNode,
+                                    HasAVX512>,
+              avx512_unary_rm_vl_bw<opc_b, opc_w, OpcodeStr, OpNode,
+                                    HasBWI>;
+}
+
+defm VPABS : avx512_unary_rm_vl_all<0x1C, 0x1D, 0x1E, 0x1F, "vpabs", X86Abs>;
+
+def : Pat<(xor
+          (bc_v16i32 (v16i1sextv16i32)),
+          (bc_v16i32 (add (v16i32 VR512:$src), (v16i1sextv16i32)))),
+          (VPABSDZrr VR512:$src)>;
+def : Pat<(xor
+          (bc_v8i64 (v8i1sextv8i64)),
+          (bc_v8i64 (add (v8i64 VR512:$src), (v8i1sextv8i64)))),
+          (VPABSQZrr VR512:$src)>;
index de3b3b6..6bf589f 100644 (file)
@@ -251,6 +251,7 @@ def STDFp3SrcRm : SDTypeProfile<1, 4, [SDTCisSameAs<0,1>,
 
 def X86PAlignr : SDNode<"X86ISD::PALIGNR", SDTShuff3OpI>;
 def X86VAlign  : SDNode<"X86ISD::VALIGN", SDTShuff3OpI>;
+def X86Abs     : SDNode<"X86ISD::ABS", SDTIntUnaryOp>;
 
 def X86PShufd  : SDNode<"X86ISD::PSHUFD", SDTShuff2OpI>;
 def X86PShufhw : SDNode<"X86ISD::PSHUFHW", SDTShuff2OpI>;
index b03f2b3..a15404c 100644 (file)
@@ -429,6 +429,18 @@ static const IntrinsicData  IntrinsicsWithoutChain[] = {
   X86_INTRINSIC_DATA(avx512_mask_or_ps_128, INTR_TYPE_2OP_MASK, X86ISD::FOR, 0),
   X86_INTRINSIC_DATA(avx512_mask_or_ps_256, INTR_TYPE_2OP_MASK, X86ISD::FOR, 0),
   X86_INTRINSIC_DATA(avx512_mask_or_ps_512, INTR_TYPE_2OP_MASK, X86ISD::FOR, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pabs_b_128, INTR_TYPE_1OP_MASK, X86ISD::ABS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pabs_b_256, INTR_TYPE_1OP_MASK, X86ISD::ABS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pabs_b_512, INTR_TYPE_1OP_MASK, X86ISD::ABS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pabs_d_128, INTR_TYPE_1OP_MASK, X86ISD::ABS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pabs_d_256, INTR_TYPE_1OP_MASK, X86ISD::ABS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pabs_d_512, INTR_TYPE_1OP_MASK, X86ISD::ABS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pabs_q_128, INTR_TYPE_1OP_MASK, X86ISD::ABS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pabs_q_256, INTR_TYPE_1OP_MASK, X86ISD::ABS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pabs_q_512, INTR_TYPE_1OP_MASK, X86ISD::ABS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pabs_w_128, INTR_TYPE_1OP_MASK, X86ISD::ABS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pabs_w_256, INTR_TYPE_1OP_MASK, X86ISD::ABS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pabs_w_512, INTR_TYPE_1OP_MASK, X86ISD::ABS, 0),
   X86_INTRINSIC_DATA(avx512_mask_packssdw_128, INTR_TYPE_2OP_MASK, X86ISD::PACKSS, 0),
   X86_INTRINSIC_DATA(avx512_mask_packssdw_256, INTR_TYPE_2OP_MASK, X86ISD::PACKSS, 0),
   X86_INTRINSIC_DATA(avx512_mask_packssdw_512, INTR_TYPE_2OP_MASK, X86ISD::PACKSS, 0),
index 3495d50..dc3dc0f 100644 (file)
@@ -489,19 +489,31 @@ declare <8 x double> @llvm.x86.avx512.mask.min.pd.512(<8 x double>, <8 x double>
  }
  declare <8 x float> @llvm.x86.avx512.mask.cvtpd2ps.512(<8 x double>, <8 x float>, i8, i32)
 
- define <16 x i32> @test_pabsd(<16 x i32> %a) {
- ;CHECK: vpabsd {{.*}}encoding: [0x62,0xf2,0x7d,0x48,0x1e,0xc0]
- %res = call <16 x i32> @llvm.x86.avx512.mask.pabs.d.512(<16 x i32> %a, <16 x i32>zeroinitializer, i16 -1)
- ret < 16 x i32> %res
- }
  declare <16 x i32> @llvm.x86.avx512.mask.pabs.d.512(<16 x i32>, <16 x i32>, i16)
 
- define <8 x i64> @test_pabsq(<8 x i64> %a) {
- ;CHECK: vpabsq {{.*}}encoding: [0x62,0xf2,0xfd,0x48,0x1f,0xc0]
- %res = call <8 x i64> @llvm.x86.avx512.mask.pabs.q.512(<8 x i64> %a, <8 x i64>zeroinitializer, i8 -1)
- ret <8 x i64> %res
- }
- declare <8 x i64> @llvm.x86.avx512.mask.pabs.q.512(<8 x i64>, <8 x i64>, i8)
+; CHECK-LABEL: @test_int_x86_avx512_mask_pabs_d_512
+; CHECK-NOT: call 
+; CHECK: kmov 
+; CHECK: vpabsd{{.*}}{%k1} 
+define <16 x i32>@test_int_x86_avx512_mask_pabs_d_512(<16 x i32> %x0, <16 x i32> %x1, i16 %x2) {
+  %res = call <16 x i32> @llvm.x86.avx512.mask.pabs.d.512(<16 x i32> %x0, <16 x i32> %x1, i16 %x2)
+  %res1 = call <16 x i32> @llvm.x86.avx512.mask.pabs.d.512(<16 x i32> %x0, <16 x i32> %x1, i16 -1)
+  %res2 = add <16 x i32> %res, %res1
+  ret <16 x i32> %res2
+}
+
+declare <8 x i64> @llvm.x86.avx512.mask.pabs.q.512(<8 x i64>, <8 x i64>, i8)
+
+; CHECK-LABEL: @test_int_x86_avx512_mask_pabs_q_512
+; CHECK-NOT: call 
+; CHECK: kmov 
+; CHECK: vpabsq{{.*}}{%k1} 
+define <8 x i64>@test_int_x86_avx512_mask_pabs_q_512(<8 x i64> %x0, <8 x i64> %x1, i8 %x2) {
+  %res = call <8 x i64> @llvm.x86.avx512.mask.pabs.q.512(<8 x i64> %x0, <8 x i64> %x1, i8 %x2)
+  %res1 = call <8 x i64> @llvm.x86.avx512.mask.pabs.q.512(<8 x i64> %x0, <8 x i64> %x1, i8 -1)
+  %res2 = add <8 x i64> %res, %res1
+  ret <8 x i64> %res2
+}
 
 define i8 @test_vptestmq(<8 x i64> %a0, <8 x i64> %a1) {
   ; CHECK: vptestmq {{.*}}encoding: [0x62,0xf2,0xfd,0x48,0x27,0xc1]
index 7c82d5f..9574c01 100644 (file)
@@ -969,4 +969,31 @@ define <64 x i8>@test_int_x86_avx512_mask_pshuf_b_512(<64 x i8> %x0, <64 x i8> %
   %res1 = call <64 x i8> @llvm.x86.avx512.mask.pshuf.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1)
   %res2 = add <64 x i8> %res, %res1
   ret <64 x i8> %res2
-}
\ No newline at end of file
+}
+
+declare <32 x i16> @llvm.x86.avx512.mask.pabs.w.512(<32 x i16>, <32 x i16>, i32)
+
+; CHECK-LABEL: @test_int_x86_avx512_mask_pabs_w_512
+; CHECK-NOT: call 
+; CHECK: kmov 
+; CHECK: vpabsw{{.*}}{%k1} 
+define <32 x i16>@test_int_x86_avx512_mask_pabs_w_512(<32 x i16> %x0, <32 x i16> %x1, i32 %x2) {
+  %res = call <32 x i16> @llvm.x86.avx512.mask.pabs.w.512(<32 x i16> %x0, <32 x i16> %x1, i32 %x2)
+  %res1 = call <32 x i16> @llvm.x86.avx512.mask.pabs.w.512(<32 x i16> %x0, <32 x i16> %x1, i32 -1)
+  %res2 = add <32 x i16> %res, %res1
+  ret <32 x i16> %res2
+}
+
+declare <64 x i8> @llvm.x86.avx512.mask.pabs.b.512(<64 x i8>, <64 x i8>, i64)
+
+; CHECK-LABEL: @test_int_x86_avx512_mask_pabs_b_512
+; CHECK-NOT: call 
+; CHECK: kmov 
+; CHECK: vpabsb{{.*}}{%k1} 
+define <64 x i8>@test_int_x86_avx512_mask_pabs_b_512(<64 x i8> %x0, <64 x i8> %x1, i64 %x2) {
+  %res = call <64 x i8> @llvm.x86.avx512.mask.pabs.b.512(<64 x i8> %x0, <64 x i8> %x1, i64 %x2)
+  %res1 = call <64 x i8> @llvm.x86.avx512.mask.pabs.b.512(<64 x i8> %x0, <64 x i8> %x1, i64 -1)
+  %res2 = add <64 x i8> %res, %res1
+  ret <64 x i8> %res2
+}
+
index 6272cc8..8a66204 100644 (file)
@@ -3033,3 +3033,56 @@ define <32 x i8>@test_int_x86_avx512_mask_pshuf_b_256(<32 x i8> %x0, <32 x i8> %
   %res2 = add <32 x i8> %res, %res1
   ret <32 x i8> %res2
 }
+
+declare <16 x i8> @llvm.x86.avx512.mask.pabs.b.128(<16 x i8>, <16 x i8>, i16)
+
+; CHECK-LABEL: @test_int_x86_avx512_mask_pabs_b_128
+; CHECK-NOT: call 
+; CHECK: kmov 
+; CHECK: vpabsb{{.*}}{%k1} 
+define <16 x i8>@test_int_x86_avx512_mask_pabs_b_128(<16 x i8> %x0, <16 x i8> %x1, i16 %x2) {
+  %res = call <16 x i8> @llvm.x86.avx512.mask.pabs.b.128(<16 x i8> %x0, <16 x i8> %x1, i16 %x2)
+  %res1 = call <16 x i8> @llvm.x86.avx512.mask.pabs.b.128(<16 x i8> %x0, <16 x i8> %x1, i16 -1)
+  %res2 = add <16 x i8> %res, %res1
+  ret <16 x i8> %res2
+}
+
+declare <32 x i8> @llvm.x86.avx512.mask.pabs.b.256(<32 x i8>, <32 x i8>, i32)
+
+; CHECK-LABEL: @test_int_x86_avx512_mask_pabs_b_256
+; CHECK-NOT: call 
+; CHECK: kmov 
+; CHECK: vpabsb{{.*}}{%k1} 
+define <32 x i8>@test_int_x86_avx512_mask_pabs_b_256(<32 x i8> %x0, <32 x i8> %x1, i32 %x2) {
+  %res = call <32 x i8> @llvm.x86.avx512.mask.pabs.b.256(<32 x i8> %x0, <32 x i8> %x1, i32 %x2)
+  %res1 = call <32 x i8> @llvm.x86.avx512.mask.pabs.b.256(<32 x i8> %x0, <32 x i8> %x1, i32 -1)
+  %res2 = add <32 x i8> %res, %res1
+  ret <32 x i8> %res2
+}
+
+declare <8 x i16> @llvm.x86.avx512.mask.pabs.w.128(<8 x i16>, <8 x i16>, i8)
+
+; CHECK-LABEL: @test_int_x86_avx512_mask_pabs_w_128
+; CHECK-NOT: call 
+; CHECK: kmov 
+; CHECK: vpabsw{{.*}}{%k1} 
+define <8 x i16>@test_int_x86_avx512_mask_pabs_w_128(<8 x i16> %x0, <8 x i16> %x1, i8 %x2) {
+  %res = call <8 x i16> @llvm.x86.avx512.mask.pabs.w.128(<8 x i16> %x0, <8 x i16> %x1, i8 %x2)
+  %res1 = call <8 x i16> @llvm.x86.avx512.mask.pabs.w.128(<8 x i16> %x0, <8 x i16> %x1, i8 -1)
+  %res2 = add <8 x i16> %res, %res1
+  ret <8 x i16> %res2
+}
+
+declare <16 x i16> @llvm.x86.avx512.mask.pabs.w.256(<16 x i16>, <16 x i16>, i16)
+
+; CHECK-LABEL: @test_int_x86_avx512_mask_pabs_w_256
+; CHECK-NOT: call 
+; CHECK: kmov 
+; CHECK: vpabsw{{.*}}{%k1} 
+define <16 x i16>@test_int_x86_avx512_mask_pabs_w_256(<16 x i16> %x0, <16 x i16> %x1, i16 %x2) {
+  %res = call <16 x i16> @llvm.x86.avx512.mask.pabs.w.256(<16 x i16> %x0, <16 x i16> %x1, i16 %x2)
+  %res1 = call <16 x i16> @llvm.x86.avx512.mask.pabs.w.256(<16 x i16> %x0, <16 x i16> %x1, i16 -1)
+  %res2 = add <16 x i16> %res, %res1
+  ret <16 x i16> %res2
+}
+
index b9f0338..b2da994 100644 (file)
@@ -2902,4 +2902,55 @@ define <8 x float>@test_int_x86_avx512_mask_vpermi2var_ps_256(<8 x float> %x0, <
   ret <8 x float> %res2
 }
 
+declare <2 x i64> @llvm.x86.avx512.mask.pabs.q.128(<2 x i64>, <2 x i64>, i8)
+
+; CHECK-LABEL: @test_int_x86_avx512_mask_pabs_q_128
+; CHECK-NOT: call 
+; CHECK: kmov 
+; CHECK: vpabsq{{.*}}{%k1} 
+define <2 x i64>@test_int_x86_avx512_mask_pabs_q_128(<2 x i64> %x0, <2 x i64> %x1, i8 %x2) {
+  %res = call <2 x i64> @llvm.x86.avx512.mask.pabs.q.128(<2 x i64> %x0, <2 x i64> %x1, i8 %x2)
+  %res1 = call <2 x i64> @llvm.x86.avx512.mask.pabs.q.128(<2 x i64> %x0, <2 x i64> %x1, i8 -1)
+  %res2 = add <2 x i64> %res, %res1
+  ret <2 x i64> %res2
+}
+
+declare <4 x i64> @llvm.x86.avx512.mask.pabs.q.256(<4 x i64>, <4 x i64>, i8)
+
+; CHECK-LABEL: @test_int_x86_avx512_mask_pabs_q_256
+; CHECK-NOT: call 
+; CHECK: kmov 
+; CHECK: vpabsq{{.*}}{%k1} 
+define <4 x i64>@test_int_x86_avx512_mask_pabs_q_256(<4 x i64> %x0, <4 x i64> %x1, i8 %x2) {
+  %res = call <4 x i64> @llvm.x86.avx512.mask.pabs.q.256(<4 x i64> %x0, <4 x i64> %x1, i8 %x2)
+  %res1 = call <4 x i64> @llvm.x86.avx512.mask.pabs.q.256(<4 x i64> %x0, <4 x i64> %x1, i8 -1)
+  %res2 = add <4 x i64> %res, %res1
+  ret <4 x i64> %res2
+}
+
+declare <4 x i32> @llvm.x86.avx512.mask.pabs.d.128(<4 x i32>, <4 x i32>, i8)
+
+; CHECK-LABEL: @test_int_x86_avx512_mask_pabs_d_128
+; CHECK-NOT: call 
+; CHECK: kmov 
+; CHECK: vpabsd{{.*}}{%k1} 
+define <4 x i32>@test_int_x86_avx512_mask_pabs_d_128(<4 x i32> %x0, <4 x i32> %x1, i8 %x2) {
+  %res = call <4 x i32> @llvm.x86.avx512.mask.pabs.d.128(<4 x i32> %x0, <4 x i32> %x1, i8 %x2)
+  %res1 = call <4 x i32> @llvm.x86.avx512.mask.pabs.d.128(<4 x i32> %x0, <4 x i32> %x1, i8 -1)
+  %res2 = add <4 x i32> %res, %res1
+  ret <4 x i32> %res2
+}
+
+declare <8 x i32> @llvm.x86.avx512.mask.pabs.d.256(<8 x i32>, <8 x i32>, i8)
+
+; CHECK-LABEL: @test_int_x86_avx512_mask_pabs_d_256
+; CHECK-NOT: call 
+; CHECK: kmov 
+; CHECK: vpabsd{{.*}}{%k1} 
+define <8 x i32>@test_int_x86_avx512_mask_pabs_d_256(<8 x i32> %x0, <8 x i32> %x1, i8 %x2) {
+  %res = call <8 x i32> @llvm.x86.avx512.mask.pabs.d.256(<8 x i32> %x0, <8 x i32> %x1, i8 %x2)
+  %res1 = call <8 x i32> @llvm.x86.avx512.mask.pabs.d.256(<8 x i32> %x0, <8 x i32> %x1, i8 -1)
+  %res2 = add <8 x i32> %res, %res1
+  ret <8 x i32> %res2
+}
 
index 05a7b1b..9bd3081 100644 (file)
@@ -9681,3 +9681,115 @@ vpermilpd $0x23, 0x400(%rbx), %zmm2
 // CHECK: vexpandps %zmm9, %zmm14 {%k2} {z}
 // CHECK:  encoding: [0x62,0x52,0x7d,0xca,0x88,0xf1]
           vexpandps %zmm9, %zmm14 {%k2} {z}
+
+// CHECK: vpabsd %zmm14, %zmm15
+// CHECK:  encoding: [0x62,0x52,0x7d,0x48,0x1e,0xfe]
+          vpabsd %zmm14, %zmm15
+
+// CHECK: vpabsd %zmm14, %zmm15 {%k6}
+// CHECK:  encoding: [0x62,0x52,0x7d,0x4e,0x1e,0xfe]
+          vpabsd %zmm14, %zmm15 {%k6}
+
+// CHECK: vpabsd %zmm14, %zmm15 {%k6} {z}
+// CHECK:  encoding: [0x62,0x52,0x7d,0xce,0x1e,0xfe]
+          vpabsd %zmm14, %zmm15 {%k6} {z}
+
+// CHECK: vpabsd (%rcx), %zmm15
+// CHECK:  encoding: [0x62,0x72,0x7d,0x48,0x1e,0x39]
+          vpabsd (%rcx), %zmm15
+
+// CHECK: vpabsd 291(%rax,%r14,8), %zmm15
+// CHECK:  encoding: [0x62,0x32,0x7d,0x48,0x1e,0xbc,0xf0,0x23,0x01,0x00,0x00]
+          vpabsd 291(%rax,%r14,8), %zmm15
+
+// CHECK: vpabsd (%rcx){1to16}, %zmm15
+// CHECK:  encoding: [0x62,0x72,0x7d,0x58,0x1e,0x39]
+          vpabsd (%rcx){1to16}, %zmm15
+
+// CHECK: vpabsd 8128(%rdx), %zmm15
+// CHECK:  encoding: [0x62,0x72,0x7d,0x48,0x1e,0x7a,0x7f]
+          vpabsd 8128(%rdx), %zmm15
+
+// CHECK: vpabsd 8192(%rdx), %zmm15
+// CHECK:  encoding: [0x62,0x72,0x7d,0x48,0x1e,0xba,0x00,0x20,0x00,0x00]
+          vpabsd 8192(%rdx), %zmm15
+
+// CHECK: vpabsd -8192(%rdx), %zmm15
+// CHECK:  encoding: [0x62,0x72,0x7d,0x48,0x1e,0x7a,0x80]
+          vpabsd -8192(%rdx), %zmm15
+
+// CHECK: vpabsd -8256(%rdx), %zmm15
+// CHECK:  encoding: [0x62,0x72,0x7d,0x48,0x1e,0xba,0xc0,0xdf,0xff,0xff]
+          vpabsd -8256(%rdx), %zmm15
+
+// CHECK: vpabsd 508(%rdx){1to16}, %zmm15
+// CHECK:  encoding: [0x62,0x72,0x7d,0x58,0x1e,0x7a,0x7f]
+          vpabsd 508(%rdx){1to16}, %zmm15
+
+// CHECK: vpabsd 512(%rdx){1to16}, %zmm15
+// CHECK:  encoding: [0x62,0x72,0x7d,0x58,0x1e,0xba,0x00,0x02,0x00,0x00]
+          vpabsd 512(%rdx){1to16}, %zmm15
+
+// CHECK: vpabsd -512(%rdx){1to16}, %zmm15
+// CHECK:  encoding: [0x62,0x72,0x7d,0x58,0x1e,0x7a,0x80]
+          vpabsd -512(%rdx){1to16}, %zmm15
+
+// CHECK: vpabsd -516(%rdx){1to16}, %zmm15
+// CHECK:  encoding: [0x62,0x72,0x7d,0x58,0x1e,0xba,0xfc,0xfd,0xff,0xff]
+          vpabsd -516(%rdx){1to16}, %zmm15
+
+// CHECK: vpabsq %zmm24, %zmm5
+// CHECK:  encoding: [0x62,0x92,0xfd,0x48,0x1f,0xe8]
+          vpabsq %zmm24, %zmm5
+
+// CHECK: vpabsq %zmm24, %zmm5 {%k6}
+// CHECK:  encoding: [0x62,0x92,0xfd,0x4e,0x1f,0xe8]
+          vpabsq %zmm24, %zmm5 {%k6}
+
+// CHECK: vpabsq %zmm24, %zmm5 {%k6} {z}
+// CHECK:  encoding: [0x62,0x92,0xfd,0xce,0x1f,0xe8]
+          vpabsq %zmm24, %zmm5 {%k6} {z}
+
+// CHECK: vpabsq (%rcx), %zmm5
+// CHECK:  encoding: [0x62,0xf2,0xfd,0x48,0x1f,0x29]
+          vpabsq (%rcx), %zmm5
+
+// CHECK: vpabsq 291(%rax,%r14,8), %zmm5
+// CHECK:  encoding: [0x62,0xb2,0xfd,0x48,0x1f,0xac,0xf0,0x23,0x01,0x00,0x00]
+          vpabsq 291(%rax,%r14,8), %zmm5
+
+// CHECK: vpabsq (%rcx){1to8}, %zmm5
+// CHECK:  encoding: [0x62,0xf2,0xfd,0x58,0x1f,0x29]
+          vpabsq (%rcx){1to8}, %zmm5
+
+// CHECK: vpabsq 8128(%rdx), %zmm5
+// CHECK:  encoding: [0x62,0xf2,0xfd,0x48,0x1f,0x6a,0x7f]
+          vpabsq 8128(%rdx), %zmm5
+
+// CHECK: vpabsq 8192(%rdx), %zmm5
+// CHECK:  encoding: [0x62,0xf2,0xfd,0x48,0x1f,0xaa,0x00,0x20,0x00,0x00]
+          vpabsq 8192(%rdx), %zmm5
+
+// CHECK: vpabsq -8192(%rdx), %zmm5
+// CHECK:  encoding: [0x62,0xf2,0xfd,0x48,0x1f,0x6a,0x80]
+          vpabsq -8192(%rdx), %zmm5
+
+// CHECK: vpabsq -8256(%rdx), %zmm5
+// CHECK:  encoding: [0x62,0xf2,0xfd,0x48,0x1f,0xaa,0xc0,0xdf,0xff,0xff]
+          vpabsq -8256(%rdx), %zmm5
+
+// CHECK: vpabsq 1016(%rdx){1to8}, %zmm5
+// CHECK:  encoding: [0x62,0xf2,0xfd,0x58,0x1f,0x6a,0x7f]
+          vpabsq 1016(%rdx){1to8}, %zmm5
+
+// CHECK: vpabsq 1024(%rdx){1to8}, %zmm5
+// CHECK:  encoding: [0x62,0xf2,0xfd,0x58,0x1f,0xaa,0x00,0x04,0x00,0x00]
+          vpabsq 1024(%rdx){1to8}, %zmm5
+
+// CHECK: vpabsq -1024(%rdx){1to8}, %zmm5
+// CHECK:  encoding: [0x62,0xf2,0xfd,0x58,0x1f,0x6a,0x80]
+          vpabsq -1024(%rdx){1to8}, %zmm5
+
+// CHECK: vpabsq -1032(%rdx){1to8}, %zmm5
+// CHECK:  encoding: [0x62,0xf2,0xfd,0x58,0x1f,0xaa,0xf8,0xfb,0xff,0xff]
+          vpabsq -1032(%rdx){1to8}, %zmm5
index deae35f..e3ec448 100644 (file)
 // CHECK: vexpandps %ymm29, %ymm29 {%k5} {z}
 // CHECK:  encoding: [0x62,0x02,0x7d,0xad,0x88,0xed]
           vexpandps %ymm29, %ymm29 {%k5} {z}
+
+// CHECK: vpabsd %xmm19, %xmm28
+// CHECK:  encoding: [0x62,0x22,0x7d,0x08,0x1e,0xe3]
+          vpabsd %xmm19, %xmm28
+
+// CHECK: vpabsd %xmm19, %xmm28 {%k6}
+// CHECK:  encoding: [0x62,0x22,0x7d,0x0e,0x1e,0xe3]
+          vpabsd %xmm19, %xmm28 {%k6}
+
+// CHECK: vpabsd %xmm19, %xmm28 {%k6} {z}
+// CHECK:  encoding: [0x62,0x22,0x7d,0x8e,0x1e,0xe3]
+          vpabsd %xmm19, %xmm28 {%k6} {z}
+
+// CHECK: vpabsd (%rcx), %xmm28
+// CHECK:  encoding: [0x62,0x62,0x7d,0x08,0x1e,0x21]
+          vpabsd (%rcx), %xmm28
+
+// CHECK: vpabsd 291(%rax,%r14,8), %xmm28
+// CHECK:  encoding: [0x62,0x22,0x7d,0x08,0x1e,0xa4,0xf0,0x23,0x01,0x00,0x00]
+          vpabsd 291(%rax,%r14,8), %xmm28
+
+// CHECK: vpabsd (%rcx){1to4}, %xmm28
+// CHECK:  encoding: [0x62,0x62,0x7d,0x18,0x1e,0x21]
+          vpabsd (%rcx){1to4}, %xmm28
+
+// CHECK: vpabsd 2032(%rdx), %xmm28
+// CHECK:  encoding: [0x62,0x62,0x7d,0x08,0x1e,0x62,0x7f]
+          vpabsd 2032(%rdx), %xmm28
+
+// CHECK: vpabsd 2048(%rdx), %xmm28
+// CHECK:  encoding: [0x62,0x62,0x7d,0x08,0x1e,0xa2,0x00,0x08,0x00,0x00]
+          vpabsd 2048(%rdx), %xmm28
+
+// CHECK: vpabsd -2048(%rdx), %xmm28
+// CHECK:  encoding: [0x62,0x62,0x7d,0x08,0x1e,0x62,0x80]
+          vpabsd -2048(%rdx), %xmm28
+
+// CHECK: vpabsd -2064(%rdx), %xmm28
+// CHECK:  encoding: [0x62,0x62,0x7d,0x08,0x1e,0xa2,0xf0,0xf7,0xff,0xff]
+          vpabsd -2064(%rdx), %xmm28
+
+// CHECK: vpabsd 508(%rdx){1to4}, %xmm28
+// CHECK:  encoding: [0x62,0x62,0x7d,0x18,0x1e,0x62,0x7f]
+          vpabsd 508(%rdx){1to4}, %xmm28
+
+// CHECK: vpabsd 512(%rdx){1to4}, %xmm28
+// CHECK:  encoding: [0x62,0x62,0x7d,0x18,0x1e,0xa2,0x00,0x02,0x00,0x00]
+          vpabsd 512(%rdx){1to4}, %xmm28
+
+// CHECK: vpabsd -512(%rdx){1to4}, %xmm28
+// CHECK:  encoding: [0x62,0x62,0x7d,0x18,0x1e,0x62,0x80]
+          vpabsd -512(%rdx){1to4}, %xmm28
+
+// CHECK: vpabsd -516(%rdx){1to4}, %xmm28
+// CHECK:  encoding: [0x62,0x62,0x7d,0x18,0x1e,0xa2,0xfc,0xfd,0xff,0xff]
+          vpabsd -516(%rdx){1to4}, %xmm28
+
+// CHECK: vpabsd %ymm18, %ymm25
+// CHECK:  encoding: [0x62,0x22,0x7d,0x28,0x1e,0xca]
+          vpabsd %ymm18, %ymm25
+
+// CHECK: vpabsd %ymm18, %ymm25 {%k2}
+// CHECK:  encoding: [0x62,0x22,0x7d,0x2a,0x1e,0xca]
+          vpabsd %ymm18, %ymm25 {%k2}
+
+// CHECK: vpabsd %ymm18, %ymm25 {%k2} {z}
+// CHECK:  encoding: [0x62,0x22,0x7d,0xaa,0x1e,0xca]
+          vpabsd %ymm18, %ymm25 {%k2} {z}
+
+// CHECK: vpabsd (%rcx), %ymm25
+// CHECK:  encoding: [0x62,0x62,0x7d,0x28,0x1e,0x09]
+          vpabsd (%rcx), %ymm25
+
+// CHECK: vpabsd 291(%rax,%r14,8), %ymm25
+// CHECK:  encoding: [0x62,0x22,0x7d,0x28,0x1e,0x8c,0xf0,0x23,0x01,0x00,0x00]
+          vpabsd 291(%rax,%r14,8), %ymm25
+
+// CHECK: vpabsd (%rcx){1to8}, %ymm25
+// CHECK:  encoding: [0x62,0x62,0x7d,0x38,0x1e,0x09]
+          vpabsd (%rcx){1to8}, %ymm25
+
+// CHECK: vpabsd 4064(%rdx), %ymm25
+// CHECK:  encoding: [0x62,0x62,0x7d,0x28,0x1e,0x4a,0x7f]
+          vpabsd 4064(%rdx), %ymm25
+
+// CHECK: vpabsd 4096(%rdx), %ymm25
+// CHECK:  encoding: [0x62,0x62,0x7d,0x28,0x1e,0x8a,0x00,0x10,0x00,0x00]
+          vpabsd 4096(%rdx), %ymm25
+
+// CHECK: vpabsd -4096(%rdx), %ymm25
+// CHECK:  encoding: [0x62,0x62,0x7d,0x28,0x1e,0x4a,0x80]
+          vpabsd -4096(%rdx), %ymm25
+
+// CHECK: vpabsd -4128(%rdx), %ymm25
+// CHECK:  encoding: [0x62,0x62,0x7d,0x28,0x1e,0x8a,0xe0,0xef,0xff,0xff]
+          vpabsd -4128(%rdx), %ymm25
+
+// CHECK: vpabsd 508(%rdx){1to8}, %ymm25
+// CHECK:  encoding: [0x62,0x62,0x7d,0x38,0x1e,0x4a,0x7f]
+          vpabsd 508(%rdx){1to8}, %ymm25
+
+// CHECK: vpabsd 512(%rdx){1to8}, %ymm25
+// CHECK:  encoding: [0x62,0x62,0x7d,0x38,0x1e,0x8a,0x00,0x02,0x00,0x00]
+          vpabsd 512(%rdx){1to8}, %ymm25
+
+// CHECK: vpabsd -512(%rdx){1to8}, %ymm25
+// CHECK:  encoding: [0x62,0x62,0x7d,0x38,0x1e,0x4a,0x80]
+          vpabsd -512(%rdx){1to8}, %ymm25
+
+// CHECK: vpabsd -516(%rdx){1to8}, %ymm25
+// CHECK:  encoding: [0x62,0x62,0x7d,0x38,0x1e,0x8a,0xfc,0xfd,0xff,0xff]
+          vpabsd -516(%rdx){1to8}, %ymm25
+
+// CHECK: vpabsq %xmm22, %xmm19
+// CHECK:  encoding: [0x62,0xa2,0xfd,0x08,0x1f,0xde]
+          vpabsq %xmm22, %xmm19
+
+// CHECK: vpabsq %xmm22, %xmm19 {%k2}
+// CHECK:  encoding: [0x62,0xa2,0xfd,0x0a,0x1f,0xde]
+          vpabsq %xmm22, %xmm19 {%k2}
+
+// CHECK: vpabsq %xmm22, %xmm19 {%k2} {z}
+// CHECK:  encoding: [0x62,0xa2,0xfd,0x8a,0x1f,0xde]
+          vpabsq %xmm22, %xmm19 {%k2} {z}
+
+// CHECK: vpabsq (%rcx), %xmm19
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x08,0x1f,0x19]
+          vpabsq (%rcx), %xmm19
+
+// CHECK: vpabsq 291(%rax,%r14,8), %xmm19
+// CHECK:  encoding: [0x62,0xa2,0xfd,0x08,0x1f,0x9c,0xf0,0x23,0x01,0x00,0x00]
+          vpabsq 291(%rax,%r14,8), %xmm19
+
+// CHECK: vpabsq (%rcx){1to2}, %xmm19
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x18,0x1f,0x19]
+          vpabsq (%rcx){1to2}, %xmm19
+
+// CHECK: vpabsq 2032(%rdx), %xmm19
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x08,0x1f,0x5a,0x7f]
+          vpabsq 2032(%rdx), %xmm19
+
+// CHECK: vpabsq 2048(%rdx), %xmm19
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x08,0x1f,0x9a,0x00,0x08,0x00,0x00]
+          vpabsq 2048(%rdx), %xmm19
+
+// CHECK: vpabsq -2048(%rdx), %xmm19
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x08,0x1f,0x5a,0x80]
+          vpabsq -2048(%rdx), %xmm19
+
+// CHECK: vpabsq -2064(%rdx), %xmm19
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x08,0x1f,0x9a,0xf0,0xf7,0xff,0xff]
+          vpabsq -2064(%rdx), %xmm19
+
+// CHECK: vpabsq 1016(%rdx){1to2}, %xmm19
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x18,0x1f,0x5a,0x7f]
+          vpabsq 1016(%rdx){1to2}, %xmm19
+
+// CHECK: vpabsq 1024(%rdx){1to2}, %xmm19
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x18,0x1f,0x9a,0x00,0x04,0x00,0x00]
+          vpabsq 1024(%rdx){1to2}, %xmm19
+
+// CHECK: vpabsq -1024(%rdx){1to2}, %xmm19
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x18,0x1f,0x5a,0x80]
+          vpabsq -1024(%rdx){1to2}, %xmm19
+
+// CHECK: vpabsq -1032(%rdx){1to2}, %xmm19
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x18,0x1f,0x9a,0xf8,0xfb,0xff,0xff]
+          vpabsq -1032(%rdx){1to2}, %xmm19
+
+// CHECK: vpabsq %ymm17, %ymm22
+// CHECK:  encoding: [0x62,0xa2,0xfd,0x28,0x1f,0xf1]
+          vpabsq %ymm17, %ymm22
+
+// CHECK: vpabsq %ymm17, %ymm22 {%k6}
+// CHECK:  encoding: [0x62,0xa2,0xfd,0x2e,0x1f,0xf1]
+          vpabsq %ymm17, %ymm22 {%k6}
+
+// CHECK: vpabsq %ymm17, %ymm22 {%k6} {z}
+// CHECK:  encoding: [0x62,0xa2,0xfd,0xae,0x1f,0xf1]
+          vpabsq %ymm17, %ymm22 {%k6} {z}
+
+// CHECK: vpabsq (%rcx), %ymm22
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x28,0x1f,0x31]
+          vpabsq (%rcx), %ymm22
+
+// CHECK: vpabsq 291(%rax,%r14,8), %ymm22
+// CHECK:  encoding: [0x62,0xa2,0xfd,0x28,0x1f,0xb4,0xf0,0x23,0x01,0x00,0x00]
+          vpabsq 291(%rax,%r14,8), %ymm22
+
+// CHECK: vpabsq (%rcx){1to4}, %ymm22
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x38,0x1f,0x31]
+          vpabsq (%rcx){1to4}, %ymm22
+
+// CHECK: vpabsq 4064(%rdx), %ymm22
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x28,0x1f,0x72,0x7f]
+          vpabsq 4064(%rdx), %ymm22
+
+// CHECK: vpabsq 4096(%rdx), %ymm22
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x28,0x1f,0xb2,0x00,0x10,0x00,0x00]
+          vpabsq 4096(%rdx), %ymm22
+
+// CHECK: vpabsq -4096(%rdx), %ymm22
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x28,0x1f,0x72,0x80]
+          vpabsq -4096(%rdx), %ymm22
+
+// CHECK: vpabsq -4128(%rdx), %ymm22
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x28,0x1f,0xb2,0xe0,0xef,0xff,0xff]
+          vpabsq -4128(%rdx), %ymm22
+
+// CHECK: vpabsq 1016(%rdx){1to4}, %ymm22
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x38,0x1f,0x72,0x7f]
+          vpabsq 1016(%rdx){1to4}, %ymm22
+
+// CHECK: vpabsq 1024(%rdx){1to4}, %ymm22
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x38,0x1f,0xb2,0x00,0x04,0x00,0x00]
+          vpabsq 1024(%rdx){1to4}, %ymm22
+
+// CHECK: vpabsq -1024(%rdx){1to4}, %ymm22
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x38,0x1f,0x72,0x80]
+          vpabsq -1024(%rdx){1to4}, %ymm22
+
+// CHECK: vpabsq -1032(%rdx){1to4}, %ymm22
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x38,0x1f,0xb2,0xf8,0xfb,0xff,0xff]
+          vpabsq -1032(%rdx){1to4}, %ymm22
+
index 5473124..fc6df8c 100644 (file)
 // CHECK: vpshufb -8256(%rdx), %zmm26, %zmm22
 // CHECK:  encoding: [0x62,0xe2,0x2d,0x40,0x00,0xb2,0xc0,0xdf,0xff,0xff]
           vpshufb -8256(%rdx), %zmm26, %zmm22
+
+// CHECK: vpabsb %zmm27, %zmm17
+// CHECK:  encoding: [0x62,0x82,0x7d,0x48,0x1c,0xcb]
+          vpabsb %zmm27, %zmm17
+
+// CHECK: vpabsb %zmm27, %zmm17 {%k7}
+// CHECK:  encoding: [0x62,0x82,0x7d,0x4f,0x1c,0xcb]
+          vpabsb %zmm27, %zmm17 {%k7}
+
+// CHECK: vpabsb %zmm27, %zmm17 {%k7} {z}
+// CHECK:  encoding: [0x62,0x82,0x7d,0xcf,0x1c,0xcb]
+          vpabsb %zmm27, %zmm17 {%k7} {z}
+
+// CHECK: vpabsb (%rcx), %zmm17
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x48,0x1c,0x09]
+          vpabsb (%rcx), %zmm17
+
+// CHECK: vpabsb 291(%rax,%r14,8), %zmm17
+// CHECK:  encoding: [0x62,0xa2,0x7d,0x48,0x1c,0x8c,0xf0,0x23,0x01,0x00,0x00]
+          vpabsb 291(%rax,%r14,8), %zmm17
+
+// CHECK: vpabsb 8128(%rdx), %zmm17
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x48,0x1c,0x4a,0x7f]
+          vpabsb 8128(%rdx), %zmm17
+
+// CHECK: vpabsb 8192(%rdx), %zmm17
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x48,0x1c,0x8a,0x00,0x20,0x00,0x00]
+          vpabsb 8192(%rdx), %zmm17
+
+// CHECK: vpabsb -8192(%rdx), %zmm17
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x48,0x1c,0x4a,0x80]
+          vpabsb -8192(%rdx), %zmm17
+
+// CHECK: vpabsb -8256(%rdx), %zmm17
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x48,0x1c,0x8a,0xc0,0xdf,0xff,0xff]
+          vpabsb -8256(%rdx), %zmm17
+
+// CHECK: vpabsw %zmm24, %zmm30
+// CHECK:  encoding: [0x62,0x02,0x7d,0x48,0x1d,0xf0]
+          vpabsw %zmm24, %zmm30
+
+// CHECK: vpabsw %zmm24, %zmm30 {%k6}
+// CHECK:  encoding: [0x62,0x02,0x7d,0x4e,0x1d,0xf0]
+          vpabsw %zmm24, %zmm30 {%k6}
+
+// CHECK: vpabsw %zmm24, %zmm30 {%k6} {z}
+// CHECK:  encoding: [0x62,0x02,0x7d,0xce,0x1d,0xf0]
+          vpabsw %zmm24, %zmm30 {%k6} {z}
+
+// CHECK: vpabsw (%rcx), %zmm30
+// CHECK:  encoding: [0x62,0x62,0x7d,0x48,0x1d,0x31]
+          vpabsw (%rcx), %zmm30
+
+// CHECK: vpabsw 291(%rax,%r14,8), %zmm30
+// CHECK:  encoding: [0x62,0x22,0x7d,0x48,0x1d,0xb4,0xf0,0x23,0x01,0x00,0x00]
+          vpabsw 291(%rax,%r14,8), %zmm30
+
+// CHECK: vpabsw 8128(%rdx), %zmm30
+// CHECK:  encoding: [0x62,0x62,0x7d,0x48,0x1d,0x72,0x7f]
+          vpabsw 8128(%rdx), %zmm30
+
+// CHECK: vpabsw 8192(%rdx), %zmm30
+// CHECK:  encoding: [0x62,0x62,0x7d,0x48,0x1d,0xb2,0x00,0x20,0x00,0x00]
+          vpabsw 8192(%rdx), %zmm30
+
+// CHECK: vpabsw -8192(%rdx), %zmm30
+// CHECK:  encoding: [0x62,0x62,0x7d,0x48,0x1d,0x72,0x80]
+          vpabsw -8192(%rdx), %zmm30
+
+// CHECK: vpabsw -8256(%rdx), %zmm30
+// CHECK:  encoding: [0x62,0x62,0x7d,0x48,0x1d,0xb2,0xc0,0xdf,0xff,0xff]
+          vpabsw -8256(%rdx), %zmm30