From 52266388f8941adff5f7ca6c246d14a40a6ddbc0 Mon Sep 17 00:00:00 2001 From: Elena Demikhovsky Date: Mon, 4 May 2015 12:35:55 +0000 Subject: [PATCH] AVX-512: added integer "add" and "sub" instructions with saturation for SKX with intrinsics and tests by Asaf Badouh (asaf.badouh@intel.com) llvm-svn: 236418 --- llvm/include/llvm/IR/IntrinsicsX86.td | 72 + llvm/lib/Target/X86/X86ISelLowering.h | 6 +- llvm/lib/Target/X86/X86InstrAVX512.td | 8 + llvm/lib/Target/X86/X86InstrFragmentsSIMD.td | 3 + llvm/lib/Target/X86/X86IntrinsicsInfo.h | 24 + llvm/test/CodeGen/X86/avx512bw-intrinsics.ll | 190 +- llvm/test/CodeGen/X86/avx512bwvl-intrinsics.ll | 751 ++++++++ llvm/test/MC/X86/x86-64-avx512bw.s | 901 +++++++++ llvm/test/MC/X86/x86-64-avx512bw_vl.s | 2304 ++++++++++++++++++++++++ 9 files changed, 4257 insertions(+), 2 deletions(-) diff --git a/llvm/include/llvm/IR/IntrinsicsX86.td b/llvm/include/llvm/IR/IntrinsicsX86.td index c9d73d7..9a85a33 100644 --- a/llvm/include/llvm/IR/IntrinsicsX86.td +++ b/llvm/include/llvm/IR/IntrinsicsX86.td @@ -3425,6 +3425,42 @@ let TargetPrefix = "x86" in { def int_x86_avx512_mask_padd_w_512 : GCCBuiltin<"__builtin_ia32_paddw512_mask">, Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty, llvm_v32i16_ty, llvm_v32i16_ty, llvm_i32_ty], [IntrNoMem]>; + def int_x86_avx512_mask_padds_b_128 : GCCBuiltin<"__builtin_ia32_paddsb128_mask">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, + llvm_v16i8_ty, llvm_i16_ty], [IntrNoMem]>; + def int_x86_avx512_mask_padds_b_256 : GCCBuiltin<"__builtin_ia32_paddsb256_mask">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty, + llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; + def int_x86_avx512_mask_padds_b_512 : GCCBuiltin<"__builtin_ia32_paddsb512_mask">, + Intrinsic<[llvm_v64i8_ty], [llvm_v64i8_ty, llvm_v64i8_ty, + llvm_v64i8_ty, llvm_i64_ty], [IntrNoMem]>; + def int_x86_avx512_mask_padds_w_128 : GCCBuiltin<"__builtin_ia32_paddsw128_mask">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty, + llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_padds_w_256 : GCCBuiltin<"__builtin_ia32_paddsw256_mask">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty, + llvm_v16i16_ty, llvm_i16_ty], [IntrNoMem]>; + def int_x86_avx512_mask_padds_w_512 : GCCBuiltin<"__builtin_ia32_paddsw512_mask">, + Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty, llvm_v32i16_ty, + llvm_v32i16_ty, llvm_i32_ty], [IntrNoMem]>; + def int_x86_avx512_mask_paddus_b_128 : GCCBuiltin<"__builtin_ia32_paddusb128_mask">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, + llvm_v16i8_ty, llvm_i16_ty], [IntrNoMem]>; + def int_x86_avx512_mask_paddus_b_256 : GCCBuiltin<"__builtin_ia32_paddusb256_mask">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty, + llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; + def int_x86_avx512_mask_paddus_b_512 : GCCBuiltin<"__builtin_ia32_paddusb512_mask">, + Intrinsic<[llvm_v64i8_ty], [llvm_v64i8_ty, llvm_v64i8_ty, + llvm_v64i8_ty, llvm_i64_ty], [IntrNoMem]>; + def int_x86_avx512_mask_paddus_w_128 : GCCBuiltin<"__builtin_ia32_paddusw128_mask">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty, + llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_paddus_w_256 : GCCBuiltin<"__builtin_ia32_paddusw256_mask">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty, + llvm_v16i16_ty, llvm_i16_ty], [IntrNoMem]>; + def int_x86_avx512_mask_paddus_w_512 : GCCBuiltin<"__builtin_ia32_paddusw512_mask">, + Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty, llvm_v32i16_ty, + llvm_v32i16_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_mask_padd_d_128 : GCCBuiltin<"__builtin_ia32_paddd128_mask">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>; @@ -3461,6 +3497,42 @@ let TargetPrefix = "x86" in { def int_x86_avx512_mask_psub_w_512 : GCCBuiltin<"__builtin_ia32_psubw512_mask">, Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty, llvm_v32i16_ty, llvm_v32i16_ty, llvm_i32_ty], [IntrNoMem]>; + def int_x86_avx512_mask_psubs_b_128 : GCCBuiltin<"__builtin_ia32_psubsb128_mask">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, + llvm_v16i8_ty, llvm_i16_ty], [IntrNoMem]>; + def int_x86_avx512_mask_psubs_b_256 : GCCBuiltin<"__builtin_ia32_psubsb256_mask">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty, + llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; + def int_x86_avx512_mask_psubs_b_512 : GCCBuiltin<"__builtin_ia32_psubsb512_mask">, + Intrinsic<[llvm_v64i8_ty], [llvm_v64i8_ty, llvm_v64i8_ty, + llvm_v64i8_ty, llvm_i64_ty], [IntrNoMem]>; + def int_x86_avx512_mask_psubs_w_128 : GCCBuiltin<"__builtin_ia32_psubsw128_mask">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty, + llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_psubs_w_256 : GCCBuiltin<"__builtin_ia32_psubsw256_mask">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty, + llvm_v16i16_ty, llvm_i16_ty], [IntrNoMem]>; + def int_x86_avx512_mask_psubs_w_512 : GCCBuiltin<"__builtin_ia32_psubsw512_mask">, + Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty, llvm_v32i16_ty, + llvm_v32i16_ty, llvm_i32_ty], [IntrNoMem]>; + def int_x86_avx512_mask_psubus_b_128 : GCCBuiltin<"__builtin_ia32_psubusb128_mask">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, + llvm_v16i8_ty, llvm_i16_ty], [IntrNoMem]>; + def int_x86_avx512_mask_psubus_b_256 : GCCBuiltin<"__builtin_ia32_psubusb256_mask">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty, + llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; + def int_x86_avx512_mask_psubus_b_512 : GCCBuiltin<"__builtin_ia32_psubusb512_mask">, + Intrinsic<[llvm_v64i8_ty], [llvm_v64i8_ty, llvm_v64i8_ty, + llvm_v64i8_ty, llvm_i64_ty], [IntrNoMem]>; + def int_x86_avx512_mask_psubus_w_128 : GCCBuiltin<"__builtin_ia32_psubusw128_mask">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty, + llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_psubus_w_256 : GCCBuiltin<"__builtin_ia32_psubusw256_mask">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty, + llvm_v16i16_ty, llvm_i16_ty], [IntrNoMem]>; + def int_x86_avx512_mask_psubus_w_512 : GCCBuiltin<"__builtin_ia32_psubusw512_mask">, + Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty, llvm_v32i16_ty, + llvm_v32i16_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_mask_psub_d_128 : GCCBuiltin<"__builtin_ia32_psubd128_mask">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>; diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h index 5130c37..027a3ca 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.h +++ b/llvm/lib/Target/X86/X86ISelLowering.h @@ -206,8 +206,12 @@ namespace llvm { FMUL_RND, FDIV_RND, - // Integer sub with unsigned saturation. + // Integer add/sub with unsigned saturation. + ADDUS, SUBUS, + // Integer add/sub with signed saturation. + ADDS, + SUBS, /// Integer horizontal add. HADD, diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index 8a9bd78..1fbea4b 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -3089,6 +3089,14 @@ defm VPADD : avx512_binop_rm_vl_all<0xFC, 0xFD, 0xFE, 0xD4, "vpadd", add, SSE_INTALU_ITINS_P, 1>; defm VPSUB : avx512_binop_rm_vl_all<0xF8, 0xF9, 0xFA, 0xFB, "vpsub", sub, SSE_INTALU_ITINS_P, 0>; +defm VPADDS : avx512_binop_rm_vl_bw<0xEC, 0xED, "vpadds", X86adds, + SSE_INTALU_ITINS_P, HasBWI, 1>; +defm VPSUBS : avx512_binop_rm_vl_bw<0xE8, 0xE9, "vpsubs", X86subs, + SSE_INTALU_ITINS_P, HasBWI, 0>; +defm VPADDUS : avx512_binop_rm_vl_bw<0xDC, 0xDD, "vpaddus", X86addus, + SSE_INTALU_ITINS_P, HasBWI, 1>; +defm VPSUBUS : avx512_binop_rm_vl_bw<0xD8, 0xD9, "vpsubus", X86subus, + SSE_INTALU_ITINS_P, HasBWI, 0>; defm VPMULLD : avx512_binop_rm_vl_d<0x40, "vpmull", mul, SSE_INTALU_ITINS_P, HasAVX512, 1>, T8PD; defm VPMULLW : avx512_binop_rm_vl_w<0xD5, "vpmull", mul, diff --git a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td index b5f56eb3..0a9f9d8 100644 --- a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -173,7 +173,10 @@ def X86vsrai : SDNode<"X86ISD::VSRAI", SDTIntShiftOp>; def SDTX86CmpPTest : SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisVec<1>, SDTCisSameAs<2, 1>]>; +def X86addus : SDNode<"X86ISD::ADDUS", SDTIntBinOp>; def X86subus : SDNode<"X86ISD::SUBUS", SDTIntBinOp>; +def X86adds : SDNode<"X86ISD::ADDS", SDTIntBinOp>; +def X86subs : SDNode<"X86ISD::SUBS", SDTIntBinOp>; def X86ptest : SDNode<"X86ISD::PTEST", SDTX86CmpPTest>; def X86testp : SDNode<"X86ISD::TESTP", SDTX86CmpPTest>; def X86kortest : SDNode<"X86ISD::KORTEST", SDTX86CmpPTest>; diff --git a/llvm/lib/Target/X86/X86IntrinsicsInfo.h b/llvm/lib/Target/X86/X86IntrinsicsInfo.h index e633303..7f6be89 100644 --- a/llvm/lib/Target/X86/X86IntrinsicsInfo.h +++ b/llvm/lib/Target/X86/X86IntrinsicsInfo.h @@ -377,6 +377,18 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86_INTRINSIC_DATA(avx512_mask_padd_w_128, INTR_TYPE_2OP_MASK, ISD::ADD, 0), X86_INTRINSIC_DATA(avx512_mask_padd_w_256, INTR_TYPE_2OP_MASK, ISD::ADD, 0), X86_INTRINSIC_DATA(avx512_mask_padd_w_512, INTR_TYPE_2OP_MASK, ISD::ADD, 0), + X86_INTRINSIC_DATA(avx512_mask_padds_b_128, INTR_TYPE_2OP_MASK, X86ISD::ADDS, 0), + X86_INTRINSIC_DATA(avx512_mask_padds_b_256, INTR_TYPE_2OP_MASK, X86ISD::ADDS, 0), + X86_INTRINSIC_DATA(avx512_mask_padds_b_512, INTR_TYPE_2OP_MASK, X86ISD::ADDS, 0), + X86_INTRINSIC_DATA(avx512_mask_padds_w_128, INTR_TYPE_2OP_MASK, X86ISD::ADDS, 0), + X86_INTRINSIC_DATA(avx512_mask_padds_w_256, INTR_TYPE_2OP_MASK, X86ISD::ADDS, 0), + X86_INTRINSIC_DATA(avx512_mask_padds_w_512, INTR_TYPE_2OP_MASK, X86ISD::ADDS, 0), + X86_INTRINSIC_DATA(avx512_mask_paddus_b_128, INTR_TYPE_2OP_MASK, X86ISD::ADDUS, 0), + X86_INTRINSIC_DATA(avx512_mask_paddus_b_256, INTR_TYPE_2OP_MASK, X86ISD::ADDUS, 0), + X86_INTRINSIC_DATA(avx512_mask_paddus_b_512, INTR_TYPE_2OP_MASK, X86ISD::ADDUS, 0), + X86_INTRINSIC_DATA(avx512_mask_paddus_w_128, INTR_TYPE_2OP_MASK, X86ISD::ADDUS, 0), + X86_INTRINSIC_DATA(avx512_mask_paddus_w_256, INTR_TYPE_2OP_MASK, X86ISD::ADDUS, 0), + X86_INTRINSIC_DATA(avx512_mask_paddus_w_512, INTR_TYPE_2OP_MASK, X86ISD::ADDUS, 0), X86_INTRINSIC_DATA(avx512_mask_pand_d_128, INTR_TYPE_2OP_MASK, ISD::AND, 0), X86_INTRINSIC_DATA(avx512_mask_pand_d_256, INTR_TYPE_2OP_MASK, ISD::AND, 0), X86_INTRINSIC_DATA(avx512_mask_pand_d_512, INTR_TYPE_2OP_MASK, ISD::AND, 0), @@ -470,6 +482,18 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86_INTRINSIC_DATA(avx512_mask_psub_w_128, INTR_TYPE_2OP_MASK, ISD::SUB, 0), X86_INTRINSIC_DATA(avx512_mask_psub_w_256, INTR_TYPE_2OP_MASK, ISD::SUB, 0), X86_INTRINSIC_DATA(avx512_mask_psub_w_512, INTR_TYPE_2OP_MASK, ISD::SUB, 0), + X86_INTRINSIC_DATA(avx512_mask_psubs_b_128, INTR_TYPE_2OP_MASK, X86ISD::SUBS, 0), + X86_INTRINSIC_DATA(avx512_mask_psubs_b_256, INTR_TYPE_2OP_MASK, X86ISD::SUBS, 0), + X86_INTRINSIC_DATA(avx512_mask_psubs_b_512, INTR_TYPE_2OP_MASK, X86ISD::SUBS, 0), + X86_INTRINSIC_DATA(avx512_mask_psubs_w_128, INTR_TYPE_2OP_MASK, X86ISD::SUBS, 0), + X86_INTRINSIC_DATA(avx512_mask_psubs_w_256, INTR_TYPE_2OP_MASK, X86ISD::SUBS, 0), + X86_INTRINSIC_DATA(avx512_mask_psubs_w_512, INTR_TYPE_2OP_MASK, X86ISD::SUBS, 0), + X86_INTRINSIC_DATA(avx512_mask_psubus_b_128, INTR_TYPE_2OP_MASK, X86ISD::SUBUS, 0), + X86_INTRINSIC_DATA(avx512_mask_psubus_b_256, INTR_TYPE_2OP_MASK, X86ISD::SUBUS, 0), + X86_INTRINSIC_DATA(avx512_mask_psubus_b_512, INTR_TYPE_2OP_MASK, X86ISD::SUBUS, 0), + X86_INTRINSIC_DATA(avx512_mask_psubus_w_128, INTR_TYPE_2OP_MASK, X86ISD::SUBUS, 0), + X86_INTRINSIC_DATA(avx512_mask_psubus_w_256, INTR_TYPE_2OP_MASK, X86ISD::SUBUS, 0), + X86_INTRINSIC_DATA(avx512_mask_psubus_w_512, INTR_TYPE_2OP_MASK, X86ISD::SUBUS, 0), X86_INTRINSIC_DATA(avx512_mask_pxor_d_128, INTR_TYPE_2OP_MASK, ISD::XOR, 0), X86_INTRINSIC_DATA(avx512_mask_pxor_d_256, INTR_TYPE_2OP_MASK, ISD::XOR, 0), X86_INTRINSIC_DATA(avx512_mask_pxor_d_512, INTR_TYPE_2OP_MASK, ISD::XOR, 0), diff --git a/llvm/test/CodeGen/X86/avx512bw-intrinsics.ll b/llvm/test/CodeGen/X86/avx512bw-intrinsics.ll index 0006efd..825885c 100644 --- a/llvm/test/CodeGen/X86/avx512bw-intrinsics.ll +++ b/llvm/test/CodeGen/X86/avx512bw-intrinsics.ll @@ -599,4 +599,192 @@ define <64 x i8> @test_mask_packus_epi16_rmkz_512(<32 x i16> %a, <32 x i16>* %pt ret <64 x i8> %res } -declare <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16>, <32 x i16>, <64 x i8>, i64) \ No newline at end of file +declare <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16>, <32 x i16>, <64 x i8>, i64) + +define <32 x i16> @test_mask_adds_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) { + ;CHECK-LABEL: test_mask_adds_epi16_rr_512 + ;CHECK: vpaddsw %zmm1, %zmm0, %zmm0 + %res = call <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1) + ret <32 x i16> %res +} + +define <32 x i16> @test_mask_adds_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) { + ;CHECK-LABEL: test_mask_adds_epi16_rrk_512 + ;CHECK: vpaddsw %zmm1, %zmm0, %zmm2 {%k1} + %res = call <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) + ret <32 x i16> %res +} + +define <32 x i16> @test_mask_adds_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i32 %mask) { + ;CHECK-LABEL: test_mask_adds_epi16_rrkz_512 + ;CHECK: vpaddsw %zmm1, %zmm0, %zmm0 {%k1} {z} + %res = call <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask) + ret <32 x i16> %res +} + +define <32 x i16> @test_mask_adds_epi16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) { + ;CHECK-LABEL: test_mask_adds_epi16_rm_512 + ;CHECK: vpaddsw (%rdi), %zmm0, %zmm0 + %b = load <32 x i16>, <32 x i16>* %ptr_b + %res = call <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1) + ret <32 x i16> %res +} + +define <32 x i16> @test_mask_adds_epi16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <32 x i16> %passThru, i32 %mask) { + ;CHECK-LABEL: test_mask_adds_epi16_rmk_512 + ;CHECK: vpaddsw (%rdi), %zmm0, %zmm1 {%k1} + %b = load <32 x i16>, <32 x i16>* %ptr_b + %res = call <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) + ret <32 x i16> %res +} + +define <32 x i16> @test_mask_adds_epi16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i32 %mask) { + ;CHECK-LABEL: test_mask_adds_epi16_rmkz_512 + ;CHECK: vpaddsw (%rdi), %zmm0, %zmm0 {%k1} {z} + %b = load <32 x i16>, <32 x i16>* %ptr_b + %res = call <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask) + ret <32 x i16> %res +} + +declare <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32) + +define <32 x i16> @test_mask_subs_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) { + ;CHECK-LABEL: test_mask_subs_epi16_rr_512 + ;CHECK: vpsubsw %zmm1, %zmm0, %zmm0 + %res = call <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1) + ret <32 x i16> %res +} + +define <32 x i16> @test_mask_subs_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) { + ;CHECK-LABEL: test_mask_subs_epi16_rrk_512 + ;CHECK: vpsubsw %zmm1, %zmm0, %zmm2 {%k1} + %res = call <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) + ret <32 x i16> %res +} + +define <32 x i16> @test_mask_subs_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i32 %mask) { + ;CHECK-LABEL: test_mask_subs_epi16_rrkz_512 + ;CHECK: vpsubsw %zmm1, %zmm0, %zmm0 {%k1} {z} + %res = call <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask) + ret <32 x i16> %res +} + +define <32 x i16> @test_mask_subs_epi16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) { + ;CHECK-LABEL: test_mask_subs_epi16_rm_512 + ;CHECK: vpsubsw (%rdi), %zmm0, %zmm0 + %b = load <32 x i16>, <32 x i16>* %ptr_b + %res = call <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1) + ret <32 x i16> %res +} + +define <32 x i16> @test_mask_subs_epi16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <32 x i16> %passThru, i32 %mask) { + ;CHECK-LABEL: test_mask_subs_epi16_rmk_512 + ;CHECK: vpsubsw (%rdi), %zmm0, %zmm1 {%k1} + %b = load <32 x i16>, <32 x i16>* %ptr_b + %res = call <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) + ret <32 x i16> %res +} + +define <32 x i16> @test_mask_subs_epi16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i32 %mask) { + ;CHECK-LABEL: test_mask_subs_epi16_rmkz_512 + ;CHECK: vpsubsw (%rdi), %zmm0, %zmm0 {%k1} {z} + %b = load <32 x i16>, <32 x i16>* %ptr_b + %res = call <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask) + ret <32 x i16> %res +} + +declare <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32) + +define <32 x i16> @test_mask_adds_epu16_rr_512(<32 x i16> %a, <32 x i16> %b) { + ;CHECK-LABEL: test_mask_adds_epu16_rr_512 + ;CHECK: vpaddusw %zmm1, %zmm0, %zmm0 + %res = call <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1) + ret <32 x i16> %res +} + +define <32 x i16> @test_mask_adds_epu16_rrk_512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) { + ;CHECK-LABEL: test_mask_adds_epu16_rrk_512 + ;CHECK: vpaddusw %zmm1, %zmm0, %zmm2 {%k1} + %res = call <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) + ret <32 x i16> %res +} + +define <32 x i16> @test_mask_adds_epu16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i32 %mask) { + ;CHECK-LABEL: test_mask_adds_epu16_rrkz_512 + ;CHECK: vpaddusw %zmm1, %zmm0, %zmm0 {%k1} {z} + %res = call <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask) + ret <32 x i16> %res +} + +define <32 x i16> @test_mask_adds_epu16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) { + ;CHECK-LABEL: test_mask_adds_epu16_rm_512 + ;CHECK: vpaddusw (%rdi), %zmm0, %zmm0 + %b = load <32 x i16>, <32 x i16>* %ptr_b + %res = call <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1) + ret <32 x i16> %res +} + +define <32 x i16> @test_mask_adds_epu16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <32 x i16> %passThru, i32 %mask) { + ;CHECK-LABEL: test_mask_adds_epu16_rmk_512 + ;CHECK: vpaddusw (%rdi), %zmm0, %zmm1 {%k1} + %b = load <32 x i16>, <32 x i16>* %ptr_b + %res = call <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) + ret <32 x i16> %res +} + +define <32 x i16> @test_mask_adds_epu16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i32 %mask) { + ;CHECK-LABEL: test_mask_adds_epu16_rmkz_512 + ;CHECK: vpaddusw (%rdi), %zmm0, %zmm0 {%k1} {z} + %b = load <32 x i16>, <32 x i16>* %ptr_b + %res = call <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask) + ret <32 x i16> %res +} + +declare <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32) + +define <32 x i16> @test_mask_subs_epu16_rr_512(<32 x i16> %a, <32 x i16> %b) { + ;CHECK-LABEL: test_mask_subs_epu16_rr_512 + ;CHECK: vpsubusw %zmm1, %zmm0, %zmm0 + %res = call <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1) + ret <32 x i16> %res +} + +define <32 x i16> @test_mask_subs_epu16_rrk_512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) { + ;CHECK-LABEL: test_mask_subs_epu16_rrk_512 + ;CHECK: vpsubusw %zmm1, %zmm0, %zmm2 {%k1} + %res = call <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) + ret <32 x i16> %res +} + +define <32 x i16> @test_mask_subs_epu16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i32 %mask) { + ;CHECK-LABEL: test_mask_subs_epu16_rrkz_512 + ;CHECK: vpsubusw %zmm1, %zmm0, %zmm0 {%k1} {z} + %res = call <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask) + ret <32 x i16> %res +} + +define <32 x i16> @test_mask_subs_epu16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) { + ;CHECK-LABEL: test_mask_subs_epu16_rm_512 + ;CHECK: vpsubusw (%rdi), %zmm0, %zmm0 + %b = load <32 x i16>, <32 x i16>* %ptr_b + %res = call <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1) + ret <32 x i16> %res +} + +define <32 x i16> @test_mask_subs_epu16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <32 x i16> %passThru, i32 %mask) { + ;CHECK-LABEL: test_mask_subs_epu16_rmk_512 + ;CHECK: vpsubusw (%rdi), %zmm0, %zmm1 {%k1} + %b = load <32 x i16>, <32 x i16>* %ptr_b + %res = call <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) + ret <32 x i16> %res +} + +define <32 x i16> @test_mask_subs_epu16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i32 %mask) { + ;CHECK-LABEL: test_mask_subs_epu16_rmkz_512 + ;CHECK: vpsubusw (%rdi), %zmm0, %zmm0 {%k1} {z} + %b = load <32 x i16>, <32 x i16>* %ptr_b + %res = call <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask) + ret <32 x i16> %res +} + +declare <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32) diff --git a/llvm/test/CodeGen/X86/avx512bwvl-intrinsics.ll b/llvm/test/CodeGen/X86/avx512bwvl-intrinsics.ll index 0834bc6..5f58e3a 100644 --- a/llvm/test/CodeGen/X86/avx512bwvl-intrinsics.ll +++ b/llvm/test/CodeGen/X86/avx512bwvl-intrinsics.ll @@ -1917,3 +1917,754 @@ define <32 x i8> @test_mask_packus_epi16_rmkz_256(<16 x i16> %a, <16 x i16>* %pt declare <32 x i8> @llvm.x86.avx512.mask.packuswb.256(<16 x i16>, <16 x i16>, <32 x i8>, i32) +define <8 x i16> @test_mask_adds_epi16_rr_128(<8 x i16> %a, <8 x i16> %b) { + ;CHECK-LABEL: test_mask_adds_epi16_rr_128 + ;CHECK: vpaddsw %xmm1, %xmm0, %xmm0 + %res = call <8 x i16> @llvm.x86.avx512.mask.padds.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1) + ret <8 x i16> %res +} + +define <8 x i16> @test_mask_adds_epi16_rrk_128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) { + ;CHECK-LABEL: test_mask_adds_epi16_rrk_128 + ;CHECK: vpaddsw %xmm1, %xmm0, %xmm2 {%k1} + %res = call <8 x i16> @llvm.x86.avx512.mask.padds.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) + ret <8 x i16> %res +} + +define <8 x i16> @test_mask_adds_epi16_rrkz_128(<8 x i16> %a, <8 x i16> %b, i8 %mask) { + ;CHECK-LABEL: test_mask_adds_epi16_rrkz_128 + ;CHECK: vpaddsw %xmm1, %xmm0, %xmm0 {%k1} {z} + %res = call <8 x i16> @llvm.x86.avx512.mask.padds.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask) + ret <8 x i16> %res +} + +define <8 x i16> @test_mask_adds_epi16_rm_128(<8 x i16> %a, <8 x i16>* %ptr_b) { + ;CHECK-LABEL: test_mask_adds_epi16_rm_128 + ;CHECK: vpaddsw (%rdi), %xmm0, %xmm0 + %b = load <8 x i16>, <8 x i16>* %ptr_b + %res = call <8 x i16> @llvm.x86.avx512.mask.padds.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1) + ret <8 x i16> %res +} + +define <8 x i16> @test_mask_adds_epi16_rmk_128(<8 x i16> %a, <8 x i16>* %ptr_b, <8 x i16> %passThru, i8 %mask) { + ;CHECK-LABEL: test_mask_adds_epi16_rmk_128 + ;CHECK: vpaddsw (%rdi), %xmm0, %xmm1 {%k1} + %b = load <8 x i16>, <8 x i16>* %ptr_b + %res = call <8 x i16> @llvm.x86.avx512.mask.padds.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) + ret <8 x i16> %res +} + +define <8 x i16> @test_mask_adds_epi16_rmkz_128(<8 x i16> %a, <8 x i16>* %ptr_b, i8 %mask) { + ;CHECK-LABEL: test_mask_adds_epi16_rmkz_128 + ;CHECK: vpaddsw (%rdi), %xmm0, %xmm0 {%k1} {z} + %b = load <8 x i16>, <8 x i16>* %ptr_b + %res = call <8 x i16> @llvm.x86.avx512.mask.padds.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask) + ret <8 x i16> %res +} + +declare <8 x i16> @llvm.x86.avx512.mask.padds.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8) + +define <16 x i16> @test_mask_adds_epi16_rr_256(<16 x i16> %a, <16 x i16> %b) { + ;CHECK-LABEL: test_mask_adds_epi16_rr_256 + ;CHECK: vpaddsw %ymm1, %ymm0, %ymm0 + %res = call <16 x i16> @llvm.x86.avx512.mask.padds.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1) + ret <16 x i16> %res +} + +define <16 x i16> @test_mask_adds_epi16_rrk_256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) { + ;CHECK-LABEL: test_mask_adds_epi16_rrk_256 + ;CHECK: vpaddsw %ymm1, %ymm0, %ymm2 {%k1} + %res = call <16 x i16> @llvm.x86.avx512.mask.padds.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) + ret <16 x i16> %res +} + +define <16 x i16> @test_mask_adds_epi16_rrkz_256(<16 x i16> %a, <16 x i16> %b, i16 %mask) { + ;CHECK-LABEL: test_mask_adds_epi16_rrkz_256 + ;CHECK: vpaddsw %ymm1, %ymm0, %ymm0 {%k1} {z} + %res = call <16 x i16> @llvm.x86.avx512.mask.padds.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask) + ret <16 x i16> %res +} + +define <16 x i16> @test_mask_adds_epi16_rm_256(<16 x i16> %a, <16 x i16>* %ptr_b) { + ;CHECK-LABEL: test_mask_adds_epi16_rm_256 + ;CHECK: vpaddsw (%rdi), %ymm0, %ymm0 + %b = load <16 x i16>, <16 x i16>* %ptr_b + %res = call <16 x i16> @llvm.x86.avx512.mask.padds.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1) + ret <16 x i16> %res +} + +define <16 x i16> @test_mask_adds_epi16_rmk_256(<16 x i16> %a, <16 x i16>* %ptr_b, <16 x i16> %passThru, i16 %mask) { + ;CHECK-LABEL: test_mask_adds_epi16_rmk_256 + ;CHECK: vpaddsw (%rdi), %ymm0, %ymm1 {%k1} + %b = load <16 x i16>, <16 x i16>* %ptr_b + %res = call <16 x i16> @llvm.x86.avx512.mask.padds.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) + ret <16 x i16> %res +} + +define <16 x i16> @test_mask_adds_epi16_rmkz_256(<16 x i16> %a, <16 x i16>* %ptr_b, i16 %mask) { + ;CHECK-LABEL: test_mask_adds_epi16_rmkz_256 + ;CHECK: vpaddsw (%rdi), %ymm0, %ymm0 {%k1} {z} + %b = load <16 x i16>, <16 x i16>* %ptr_b + %res = call <16 x i16> @llvm.x86.avx512.mask.padds.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask) + ret <16 x i16> %res +} + +declare <16 x i16> @llvm.x86.avx512.mask.padds.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16) + +define <8 x i16> @test_mask_subs_epi16_rr_128(<8 x i16> %a, <8 x i16> %b) { + ;CHECK-LABEL: test_mask_subs_epi16_rr_128 + ;CHECK: vpsubsw %xmm1, %xmm0, %xmm0 + %res = call <8 x i16> @llvm.x86.avx512.mask.psubs.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1) + ret <8 x i16> %res +} + +define <8 x i16> @test_mask_subs_epi16_rrk_128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) { + ;CHECK-LABEL: test_mask_subs_epi16_rrk_128 + ;CHECK: vpsubsw %xmm1, %xmm0, %xmm2 {%k1} + %res = call <8 x i16> @llvm.x86.avx512.mask.psubs.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) + ret <8 x i16> %res +} + +define <8 x i16> @test_mask_subs_epi16_rrkz_128(<8 x i16> %a, <8 x i16> %b, i8 %mask) { + ;CHECK-LABEL: test_mask_subs_epi16_rrkz_128 + ;CHECK: vpsubsw %xmm1, %xmm0, %xmm0 {%k1} {z} + %res = call <8 x i16> @llvm.x86.avx512.mask.psubs.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask) + ret <8 x i16> %res +} + +define <8 x i16> @test_mask_subs_epi16_rm_128(<8 x i16> %a, <8 x i16>* %ptr_b) { + ;CHECK-LABEL: test_mask_subs_epi16_rm_128 + ;CHECK: vpsubsw (%rdi), %xmm0, %xmm0 + %b = load <8 x i16>, <8 x i16>* %ptr_b + %res = call <8 x i16> @llvm.x86.avx512.mask.psubs.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1) + ret <8 x i16> %res +} + +define <8 x i16> @test_mask_subs_epi16_rmk_128(<8 x i16> %a, <8 x i16>* %ptr_b, <8 x i16> %passThru, i8 %mask) { + ;CHECK-LABEL: test_mask_subs_epi16_rmk_128 + ;CHECK: vpsubsw (%rdi), %xmm0, %xmm1 {%k1} + %b = load <8 x i16>, <8 x i16>* %ptr_b + %res = call <8 x i16> @llvm.x86.avx512.mask.psubs.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) + ret <8 x i16> %res +} + +define <8 x i16> @test_mask_subs_epi16_rmkz_128(<8 x i16> %a, <8 x i16>* %ptr_b, i8 %mask) { + ;CHECK-LABEL: test_mask_subs_epi16_rmkz_128 + ;CHECK: vpsubsw (%rdi), %xmm0, %xmm0 {%k1} {z} + %b = load <8 x i16>, <8 x i16>* %ptr_b + %res = call <8 x i16> @llvm.x86.avx512.mask.psubs.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask) + ret <8 x i16> %res +} + +declare <8 x i16> @llvm.x86.avx512.mask.psubs.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8) + +define <16 x i16> @test_mask_subs_epi16_rr_256(<16 x i16> %a, <16 x i16> %b) { + ;CHECK-LABEL: test_mask_subs_epi16_rr_256 + ;CHECK: vpsubsw %ymm1, %ymm0, %ymm0 + %res = call <16 x i16> @llvm.x86.avx512.mask.psubs.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1) + ret <16 x i16> %res +} + +define <16 x i16> @test_mask_subs_epi16_rrk_256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) { + ;CHECK-LABEL: test_mask_subs_epi16_rrk_256 + ;CHECK: vpsubsw %ymm1, %ymm0, %ymm2 {%k1} + %res = call <16 x i16> @llvm.x86.avx512.mask.psubs.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) + ret <16 x i16> %res +} + +define <16 x i16> @test_mask_subs_epi16_rrkz_256(<16 x i16> %a, <16 x i16> %b, i16 %mask) { + ;CHECK-LABEL: test_mask_subs_epi16_rrkz_256 + ;CHECK: vpsubsw %ymm1, %ymm0, %ymm0 {%k1} {z} + %res = call <16 x i16> @llvm.x86.avx512.mask.psubs.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask) + ret <16 x i16> %res +} + +define <16 x i16> @test_mask_subs_epi16_rm_256(<16 x i16> %a, <16 x i16>* %ptr_b) { + ;CHECK-LABEL: test_mask_subs_epi16_rm_256 + ;CHECK: vpsubsw (%rdi), %ymm0, %ymm0 + %b = load <16 x i16>, <16 x i16>* %ptr_b + %res = call <16 x i16> @llvm.x86.avx512.mask.psubs.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1) + ret <16 x i16> %res +} + +define <16 x i16> @test_mask_subs_epi16_rmk_256(<16 x i16> %a, <16 x i16>* %ptr_b, <16 x i16> %passThru, i16 %mask) { + ;CHECK-LABEL: test_mask_subs_epi16_rmk_256 + ;CHECK: vpsubsw (%rdi), %ymm0, %ymm1 {%k1} + %b = load <16 x i16>, <16 x i16>* %ptr_b + %res = call <16 x i16> @llvm.x86.avx512.mask.psubs.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) + ret <16 x i16> %res +} + +define <16 x i16> @test_mask_subs_epi16_rmkz_256(<16 x i16> %a, <16 x i16>* %ptr_b, i16 %mask) { + ;CHECK-LABEL: test_mask_subs_epi16_rmkz_256 + ;CHECK: vpsubsw (%rdi), %ymm0, %ymm0 {%k1} {z} + %b = load <16 x i16>, <16 x i16>* %ptr_b + %res = call <16 x i16> @llvm.x86.avx512.mask.psubs.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask) + ret <16 x i16> %res +} + +declare <16 x i16> @llvm.x86.avx512.mask.psubs.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16) + +define <8 x i16> @test_mask_adds_epu16_rr_128(<8 x i16> %a, <8 x i16> %b) { + ;CHECK-LABEL: test_mask_adds_epu16_rr_128 + ;CHECK: vpaddusw %xmm1, %xmm0, %xmm0 + %res = call <8 x i16> @llvm.x86.avx512.mask.paddus.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1) + ret <8 x i16> %res +} + +define <8 x i16> @test_mask_adds_epu16_rrk_128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) { + ;CHECK-LABEL: test_mask_adds_epu16_rrk_128 + ;CHECK: vpaddusw %xmm1, %xmm0, %xmm2 {%k1} + %res = call <8 x i16> @llvm.x86.avx512.mask.paddus.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) + ret <8 x i16> %res +} + +define <8 x i16> @test_mask_adds_epu16_rrkz_128(<8 x i16> %a, <8 x i16> %b, i8 %mask) { + ;CHECK-LABEL: test_mask_adds_epu16_rrkz_128 + ;CHECK: vpaddusw %xmm1, %xmm0, %xmm0 {%k1} {z} + %res = call <8 x i16> @llvm.x86.avx512.mask.paddus.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask) + ret <8 x i16> %res +} + +define <8 x i16> @test_mask_adds_epu16_rm_128(<8 x i16> %a, <8 x i16>* %ptr_b) { + ;CHECK-LABEL: test_mask_adds_epu16_rm_128 + ;CHECK: vpaddusw (%rdi), %xmm0, %xmm0 + %b = load <8 x i16>, <8 x i16>* %ptr_b + %res = call <8 x i16> @llvm.x86.avx512.mask.paddus.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1) + ret <8 x i16> %res +} + +define <8 x i16> @test_mask_adds_epu16_rmk_128(<8 x i16> %a, <8 x i16>* %ptr_b, <8 x i16> %passThru, i8 %mask) { + ;CHECK-LABEL: test_mask_adds_epu16_rmk_128 + ;CHECK: vpaddusw (%rdi), %xmm0, %xmm1 {%k1} + %b = load <8 x i16>, <8 x i16>* %ptr_b + %res = call <8 x i16> @llvm.x86.avx512.mask.paddus.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) + ret <8 x i16> %res +} + +define <8 x i16> @test_mask_adds_epu16_rmkz_128(<8 x i16> %a, <8 x i16>* %ptr_b, i8 %mask) { + ;CHECK-LABEL: test_mask_adds_epu16_rmkz_128 + ;CHECK: vpaddusw (%rdi), %xmm0, %xmm0 {%k1} {z} + %b = load <8 x i16>, <8 x i16>* %ptr_b + %res = call <8 x i16> @llvm.x86.avx512.mask.paddus.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask) + ret <8 x i16> %res +} + +declare <8 x i16> @llvm.x86.avx512.mask.paddus.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8) + +define <16 x i16> @test_mask_adds_epu16_rr_256(<16 x i16> %a, <16 x i16> %b) { + ;CHECK-LABEL: test_mask_adds_epu16_rr_256 + ;CHECK: vpaddusw %ymm1, %ymm0, %ymm0 + %res = call <16 x i16> @llvm.x86.avx512.mask.paddus.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1) + ret <16 x i16> %res +} + +define <16 x i16> @test_mask_adds_epu16_rrk_256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) { + ;CHECK-LABEL: test_mask_adds_epu16_rrk_256 + ;CHECK: vpaddusw %ymm1, %ymm0, %ymm2 {%k1} + %res = call <16 x i16> @llvm.x86.avx512.mask.paddus.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) + ret <16 x i16> %res +} + +define <16 x i16> @test_mask_adds_epu16_rrkz_256(<16 x i16> %a, <16 x i16> %b, i16 %mask) { + ;CHECK-LABEL: test_mask_adds_epu16_rrkz_256 + ;CHECK: vpaddusw %ymm1, %ymm0, %ymm0 {%k1} {z} + %res = call <16 x i16> @llvm.x86.avx512.mask.paddus.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask) + ret <16 x i16> %res +} + +define <16 x i16> @test_mask_adds_epu16_rm_256(<16 x i16> %a, <16 x i16>* %ptr_b) { + ;CHECK-LABEL: test_mask_adds_epu16_rm_256 + ;CHECK: vpaddusw (%rdi), %ymm0, %ymm0 + %b = load <16 x i16>, <16 x i16>* %ptr_b + %res = call <16 x i16> @llvm.x86.avx512.mask.paddus.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1) + ret <16 x i16> %res +} + +define <16 x i16> @test_mask_adds_epu16_rmk_256(<16 x i16> %a, <16 x i16>* %ptr_b, <16 x i16> %passThru, i16 %mask) { + ;CHECK-LABEL: test_mask_adds_epu16_rmk_256 + ;CHECK: vpaddusw (%rdi), %ymm0, %ymm1 {%k1} + %b = load <16 x i16>, <16 x i16>* %ptr_b + %res = call <16 x i16> @llvm.x86.avx512.mask.paddus.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) + ret <16 x i16> %res +} + +define <16 x i16> @test_mask_adds_epu16_rmkz_256(<16 x i16> %a, <16 x i16>* %ptr_b, i16 %mask) { + ;CHECK-LABEL: test_mask_adds_epu16_rmkz_256 + ;CHECK: vpaddusw (%rdi), %ymm0, %ymm0 {%k1} {z} + %b = load <16 x i16>, <16 x i16>* %ptr_b + %res = call <16 x i16> @llvm.x86.avx512.mask.paddus.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask) + ret <16 x i16> %res +} + +declare <16 x i16> @llvm.x86.avx512.mask.paddus.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16) + +define <8 x i16> @test_mask_subs_epu16_rr_128(<8 x i16> %a, <8 x i16> %b) { + ;CHECK-LABEL: test_mask_subs_epu16_rr_128 + ;CHECK: vpsubusw %xmm1, %xmm0, %xmm0 + %res = call <8 x i16> @llvm.x86.avx512.mask.psubus.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1) + ret <8 x i16> %res +} + +define <8 x i16> @test_mask_subs_epu16_rrk_128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) { + ;CHECK-LABEL: test_mask_subs_epu16_rrk_128 + ;CHECK: vpsubusw %xmm1, %xmm0, %xmm2 {%k1} + %res = call <8 x i16> @llvm.x86.avx512.mask.psubus.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) + ret <8 x i16> %res +} + +define <8 x i16> @test_mask_subs_epu16_rrkz_128(<8 x i16> %a, <8 x i16> %b, i8 %mask) { + ;CHECK-LABEL: test_mask_subs_epu16_rrkz_128 + ;CHECK: vpsubusw %xmm1, %xmm0, %xmm0 {%k1} {z} + %res = call <8 x i16> @llvm.x86.avx512.mask.psubus.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask) + ret <8 x i16> %res +} + +define <8 x i16> @test_mask_subs_epu16_rm_128(<8 x i16> %a, <8 x i16>* %ptr_b) { + ;CHECK-LABEL: test_mask_subs_epu16_rm_128 + ;CHECK: vpsubusw (%rdi), %xmm0, %xmm0 + %b = load <8 x i16>, <8 x i16>* %ptr_b + %res = call <8 x i16> @llvm.x86.avx512.mask.psubus.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1) + ret <8 x i16> %res +} + +define <8 x i16> @test_mask_subs_epu16_rmk_128(<8 x i16> %a, <8 x i16>* %ptr_b, <8 x i16> %passThru, i8 %mask) { + ;CHECK-LABEL: test_mask_subs_epu16_rmk_128 + ;CHECK: vpsubusw (%rdi), %xmm0, %xmm1 {%k1} + %b = load <8 x i16>, <8 x i16>* %ptr_b + %res = call <8 x i16> @llvm.x86.avx512.mask.psubus.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) + ret <8 x i16> %res +} + +define <8 x i16> @test_mask_subs_epu16_rmkz_128(<8 x i16> %a, <8 x i16>* %ptr_b, i8 %mask) { + ;CHECK-LABEL: test_mask_subs_epu16_rmkz_128 + ;CHECK: vpsubusw (%rdi), %xmm0, %xmm0 {%k1} {z} + %b = load <8 x i16>, <8 x i16>* %ptr_b + %res = call <8 x i16> @llvm.x86.avx512.mask.psubus.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask) + ret <8 x i16> %res +} + +declare <8 x i16> @llvm.x86.avx512.mask.psubus.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8) + +define <16 x i16> @test_mask_subs_epu16_rr_256(<16 x i16> %a, <16 x i16> %b) { + ;CHECK-LABEL: test_mask_subs_epu16_rr_256 + ;CHECK: vpsubusw %ymm1, %ymm0, %ymm0 + %res = call <16 x i16> @llvm.x86.avx512.mask.psubus.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1) + ret <16 x i16> %res +} + +define <16 x i16> @test_mask_subs_epu16_rrk_256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) { + ;CHECK-LABEL: test_mask_subs_epu16_rrk_256 + ;CHECK: vpsubusw %ymm1, %ymm0, %ymm2 {%k1} + %res = call <16 x i16> @llvm.x86.avx512.mask.psubus.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) + ret <16 x i16> %res +} + +define <16 x i16> @test_mask_subs_epu16_rrkz_256(<16 x i16> %a, <16 x i16> %b, i16 %mask) { + ;CHECK-LABEL: test_mask_subs_epu16_rrkz_256 + ;CHECK: vpsubusw %ymm1, %ymm0, %ymm0 {%k1} {z} + %res = call <16 x i16> @llvm.x86.avx512.mask.psubus.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask) + ret <16 x i16> %res +} + +define <16 x i16> @test_mask_subs_epu16_rm_256(<16 x i16> %a, <16 x i16>* %ptr_b) { + ;CHECK-LABEL: test_mask_subs_epu16_rm_256 + ;CHECK: vpsubusw (%rdi), %ymm0, %ymm0 + %b = load <16 x i16>, <16 x i16>* %ptr_b + %res = call <16 x i16> @llvm.x86.avx512.mask.psubus.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1) + ret <16 x i16> %res +} + +define <16 x i16> @test_mask_subs_epu16_rmk_256(<16 x i16> %a, <16 x i16>* %ptr_b, <16 x i16> %passThru, i16 %mask) { + ;CHECK-LABEL: test_mask_subs_epu16_rmk_256 + ;CHECK: vpsubusw (%rdi), %ymm0, %ymm1 {%k1} + %b = load <16 x i16>, <16 x i16>* %ptr_b + %res = call <16 x i16> @llvm.x86.avx512.mask.psubus.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) + ret <16 x i16> %res +} + +define <16 x i16> @test_mask_subs_epu16_rmkz_256(<16 x i16> %a, <16 x i16>* %ptr_b, i16 %mask) { + ;CHECK-LABEL: test_mask_subs_epu16_rmkz_256 + ;CHECK: vpsubusw (%rdi), %ymm0, %ymm0 {%k1} {z} + %b = load <16 x i16>, <16 x i16>* %ptr_b + %res = call <16 x i16> @llvm.x86.avx512.mask.psubus.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask) + ret <16 x i16> %res +} + +declare <16 x i16> @llvm.x86.avx512.mask.psubus.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16) + +define <16 x i8> @test_mask_adds_epi8_rr_128(<16 x i8> %a, <16 x i8> %b) { + ;CHECK-LABEL: test_mask_adds_epi8_rr_128 + ;CHECK: vpaddsb %xmm1, %xmm0, %xmm0 + %res = call <16 x i8> @llvm.x86.avx512.mask.padds.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 -1) + ret <16 x i8> %res +} + +define <16 x i8> @test_mask_adds_epi8_rrk_128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask) { + ;CHECK-LABEL: test_mask_adds_epi8_rrk_128 + ;CHECK: vpaddsb %xmm1, %xmm0, %xmm2 {%k1} + %res = call <16 x i8> @llvm.x86.avx512.mask.padds.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask) + ret <16 x i8> %res +} + +define <16 x i8> @test_mask_adds_epi8_rrkz_128(<16 x i8> %a, <16 x i8> %b, i16 %mask) { + ;CHECK-LABEL: test_mask_adds_epi8_rrkz_128 + ;CHECK: vpaddsb %xmm1, %xmm0, %xmm0 {%k1} {z} + %res = call <16 x i8> @llvm.x86.avx512.mask.padds.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 %mask) + ret <16 x i8> %res +} + +define <16 x i8> @test_mask_adds_epi8_rm_128(<16 x i8> %a, <16 x i8>* %ptr_b) { + ;CHECK-LABEL: test_mask_adds_epi8_rm_128 + ;CHECK: vpaddsb (%rdi), %xmm0, %xmm0 + %b = load <16 x i8>, <16 x i8>* %ptr_b + %res = call <16 x i8> @llvm.x86.avx512.mask.padds.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 -1) + ret <16 x i8> %res +} + +define <16 x i8> @test_mask_adds_epi8_rmk_128(<16 x i8> %a, <16 x i8>* %ptr_b, <16 x i8> %passThru, i16 %mask) { + ;CHECK-LABEL: test_mask_adds_epi8_rmk_128 + ;CHECK: vpaddsb (%rdi), %xmm0, %xmm1 {%k1} + %b = load <16 x i8>, <16 x i8>* %ptr_b + %res = call <16 x i8> @llvm.x86.avx512.mask.padds.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask) + ret <16 x i8> %res +} + +define <16 x i8> @test_mask_adds_epi8_rmkz_128(<16 x i8> %a, <16 x i8>* %ptr_b, i16 %mask) { + ;CHECK-LABEL: test_mask_adds_epi8_rmkz_128 + ;CHECK: vpaddsb (%rdi), %xmm0, %xmm0 {%k1} {z} + %b = load <16 x i8>, <16 x i8>* %ptr_b + %res = call <16 x i8> @llvm.x86.avx512.mask.padds.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 %mask) + ret <16 x i8> %res +} + +declare <16 x i8> @llvm.x86.avx512.mask.padds.b.128(<16 x i8>, <16 x i8>, <16 x i8>, i16) + +define <32 x i8> @test_mask_adds_epi8_rr_256(<32 x i8> %a, <32 x i8> %b) { + ;CHECK-LABEL: test_mask_adds_epi8_rr_256 + ;CHECK: vpaddsb %ymm1, %ymm0, %ymm0 + %res = call <32 x i8> @llvm.x86.avx512.mask.padds.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 -1) + ret <32 x i8> %res +} + +define <32 x i8> @test_mask_adds_epi8_rrk_256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask) { + ;CHECK-LABEL: test_mask_adds_epi8_rrk_256 + ;CHECK: vpaddsb %ymm1, %ymm0, %ymm2 {%k1} + %res = call <32 x i8> @llvm.x86.avx512.mask.padds.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask) + ret <32 x i8> %res +} + +define <32 x i8> @test_mask_adds_epi8_rrkz_256(<32 x i8> %a, <32 x i8> %b, i32 %mask) { + ;CHECK-LABEL: test_mask_adds_epi8_rrkz_256 + ;CHECK: vpaddsb %ymm1, %ymm0, %ymm0 {%k1} {z} + %res = call <32 x i8> @llvm.x86.avx512.mask.padds.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 %mask) + ret <32 x i8> %res +} + +define <32 x i8> @test_mask_adds_epi8_rm_256(<32 x i8> %a, <32 x i8>* %ptr_b) { + ;CHECK-LABEL: test_mask_adds_epi8_rm_256 + ;CHECK: vpaddsb (%rdi), %ymm0, %ymm0 + %b = load <32 x i8>, <32 x i8>* %ptr_b + %res = call <32 x i8> @llvm.x86.avx512.mask.padds.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 -1) + ret <32 x i8> %res +} + +define <32 x i8> @test_mask_adds_epi8_rmk_256(<32 x i8> %a, <32 x i8>* %ptr_b, <32 x i8> %passThru, i32 %mask) { + ;CHECK-LABEL: test_mask_adds_epi8_rmk_256 + ;CHECK: vpaddsb (%rdi), %ymm0, %ymm1 {%k1} + %b = load <32 x i8>, <32 x i8>* %ptr_b + %res = call <32 x i8> @llvm.x86.avx512.mask.padds.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask) + ret <32 x i8> %res +} + +define <32 x i8> @test_mask_adds_epi8_rmkz_256(<32 x i8> %a, <32 x i8>* %ptr_b, i32 %mask) { + ;CHECK-LABEL: test_mask_adds_epi8_rmkz_256 + ;CHECK: vpaddsb (%rdi), %ymm0, %ymm0 {%k1} {z} + %b = load <32 x i8>, <32 x i8>* %ptr_b + %res = call <32 x i8> @llvm.x86.avx512.mask.padds.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 %mask) + ret <32 x i8> %res +} + +declare <32 x i8> @llvm.x86.avx512.mask.padds.b.256(<32 x i8>, <32 x i8>, <32 x i8>, i32) + +define <16 x i8> @test_mask_subs_epi8_rr_128(<16 x i8> %a, <16 x i8> %b) { + ;CHECK-LABEL: test_mask_subs_epi8_rr_128 + ;CHECK: vpsubsb %xmm1, %xmm0, %xmm0 + %res = call <16 x i8> @llvm.x86.avx512.mask.psubs.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 -1) + ret <16 x i8> %res +} + +define <16 x i8> @test_mask_subs_epi8_rrk_128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask) { + ;CHECK-LABEL: test_mask_subs_epi8_rrk_128 + ;CHECK: vpsubsb %xmm1, %xmm0, %xmm2 {%k1} + %res = call <16 x i8> @llvm.x86.avx512.mask.psubs.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask) + ret <16 x i8> %res +} + +define <16 x i8> @test_mask_subs_epi8_rrkz_128(<16 x i8> %a, <16 x i8> %b, i16 %mask) { + ;CHECK-LABEL: test_mask_subs_epi8_rrkz_128 + ;CHECK: vpsubsb %xmm1, %xmm0, %xmm0 {%k1} {z} + %res = call <16 x i8> @llvm.x86.avx512.mask.psubs.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 %mask) + ret <16 x i8> %res +} + +define <16 x i8> @test_mask_subs_epi8_rm_128(<16 x i8> %a, <16 x i8>* %ptr_b) { + ;CHECK-LABEL: test_mask_subs_epi8_rm_128 + ;CHECK: vpsubsb (%rdi), %xmm0, %xmm0 + %b = load <16 x i8>, <16 x i8>* %ptr_b + %res = call <16 x i8> @llvm.x86.avx512.mask.psubs.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 -1) + ret <16 x i8> %res +} + +define <16 x i8> @test_mask_subs_epi8_rmk_128(<16 x i8> %a, <16 x i8>* %ptr_b, <16 x i8> %passThru, i16 %mask) { + ;CHECK-LABEL: test_mask_subs_epi8_rmk_128 + ;CHECK: vpsubsb (%rdi), %xmm0, %xmm1 {%k1} + %b = load <16 x i8>, <16 x i8>* %ptr_b + %res = call <16 x i8> @llvm.x86.avx512.mask.psubs.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask) + ret <16 x i8> %res +} + +define <16 x i8> @test_mask_subs_epi8_rmkz_128(<16 x i8> %a, <16 x i8>* %ptr_b, i16 %mask) { + ;CHECK-LABEL: test_mask_subs_epi8_rmkz_128 + ;CHECK: vpsubsb (%rdi), %xmm0, %xmm0 {%k1} {z} + %b = load <16 x i8>, <16 x i8>* %ptr_b + %res = call <16 x i8> @llvm.x86.avx512.mask.psubs.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 %mask) + ret <16 x i8> %res +} + +declare <16 x i8> @llvm.x86.avx512.mask.psubs.b.128(<16 x i8>, <16 x i8>, <16 x i8>, i16) + +define <32 x i8> @test_mask_subs_epi8_rr_256(<32 x i8> %a, <32 x i8> %b) { + ;CHECK-LABEL: test_mask_subs_epi8_rr_256 + ;CHECK: vpsubsb %ymm1, %ymm0, %ymm0 + %res = call <32 x i8> @llvm.x86.avx512.mask.psubs.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 -1) + ret <32 x i8> %res +} + +define <32 x i8> @test_mask_subs_epi8_rrk_256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask) { + ;CHECK-LABEL: test_mask_subs_epi8_rrk_256 + ;CHECK: vpsubsb %ymm1, %ymm0, %ymm2 {%k1} + %res = call <32 x i8> @llvm.x86.avx512.mask.psubs.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask) + ret <32 x i8> %res +} + +define <32 x i8> @test_mask_subs_epi8_rrkz_256(<32 x i8> %a, <32 x i8> %b, i32 %mask) { + ;CHECK-LABEL: test_mask_subs_epi8_rrkz_256 + ;CHECK: vpsubsb %ymm1, %ymm0, %ymm0 {%k1} {z} + %res = call <32 x i8> @llvm.x86.avx512.mask.psubs.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 %mask) + ret <32 x i8> %res +} + +define <32 x i8> @test_mask_subs_epi8_rm_256(<32 x i8> %a, <32 x i8>* %ptr_b) { + ;CHECK-LABEL: test_mask_subs_epi8_rm_256 + ;CHECK: vpsubsb (%rdi), %ymm0, %ymm0 + %b = load <32 x i8>, <32 x i8>* %ptr_b + %res = call <32 x i8> @llvm.x86.avx512.mask.psubs.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 -1) + ret <32 x i8> %res +} + +define <32 x i8> @test_mask_subs_epi8_rmk_256(<32 x i8> %a, <32 x i8>* %ptr_b, <32 x i8> %passThru, i32 %mask) { + ;CHECK-LABEL: test_mask_subs_epi8_rmk_256 + ;CHECK: vpsubsb (%rdi), %ymm0, %ymm1 {%k1} + %b = load <32 x i8>, <32 x i8>* %ptr_b + %res = call <32 x i8> @llvm.x86.avx512.mask.psubs.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask) + ret <32 x i8> %res +} + +define <32 x i8> @test_mask_subs_epi8_rmkz_256(<32 x i8> %a, <32 x i8>* %ptr_b, i32 %mask) { + ;CHECK-LABEL: test_mask_subs_epi8_rmkz_256 + ;CHECK: vpsubsb (%rdi), %ymm0, %ymm0 {%k1} {z} + %b = load <32 x i8>, <32 x i8>* %ptr_b + %res = call <32 x i8> @llvm.x86.avx512.mask.psubs.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 %mask) + ret <32 x i8> %res +} + +declare <32 x i8> @llvm.x86.avx512.mask.psubs.b.256(<32 x i8>, <32 x i8>, <32 x i8>, i32) + +define <16 x i8> @test_mask_adds_epu8_rr_128(<16 x i8> %a, <16 x i8> %b) { + ;CHECK-LABEL: test_mask_adds_epu8_rr_128 + ;CHECK: vpaddusb %xmm1, %xmm0, %xmm0 + %res = call <16 x i8> @llvm.x86.avx512.mask.paddus.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 -1) + ret <16 x i8> %res +} + +define <16 x i8> @test_mask_adds_epu8_rrk_128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask) { + ;CHECK-LABEL: test_mask_adds_epu8_rrk_128 + ;CHECK: vpaddusb %xmm1, %xmm0, %xmm2 {%k1} + %res = call <16 x i8> @llvm.x86.avx512.mask.paddus.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask) + ret <16 x i8> %res +} + +define <16 x i8> @test_mask_adds_epu8_rrkz_128(<16 x i8> %a, <16 x i8> %b, i16 %mask) { + ;CHECK-LABEL: test_mask_adds_epu8_rrkz_128 + ;CHECK: vpaddusb %xmm1, %xmm0, %xmm0 {%k1} {z} + %res = call <16 x i8> @llvm.x86.avx512.mask.paddus.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 %mask) + ret <16 x i8> %res +} + +define <16 x i8> @test_mask_adds_epu8_rm_128(<16 x i8> %a, <16 x i8>* %ptr_b) { + ;CHECK-LABEL: test_mask_adds_epu8_rm_128 + ;CHECK: vpaddusb (%rdi), %xmm0, %xmm0 + %b = load <16 x i8>, <16 x i8>* %ptr_b + %res = call <16 x i8> @llvm.x86.avx512.mask.paddus.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 -1) + ret <16 x i8> %res +} + +define <16 x i8> @test_mask_adds_epu8_rmk_128(<16 x i8> %a, <16 x i8>* %ptr_b, <16 x i8> %passThru, i16 %mask) { + ;CHECK-LABEL: test_mask_adds_epu8_rmk_128 + ;CHECK: vpaddusb (%rdi), %xmm0, %xmm1 {%k1} + %b = load <16 x i8>, <16 x i8>* %ptr_b + %res = call <16 x i8> @llvm.x86.avx512.mask.paddus.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask) + ret <16 x i8> %res +} + +define <16 x i8> @test_mask_adds_epu8_rmkz_128(<16 x i8> %a, <16 x i8>* %ptr_b, i16 %mask) { + ;CHECK-LABEL: test_mask_adds_epu8_rmkz_128 + ;CHECK: vpaddusb (%rdi), %xmm0, %xmm0 {%k1} {z} + %b = load <16 x i8>, <16 x i8>* %ptr_b + %res = call <16 x i8> @llvm.x86.avx512.mask.paddus.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 %mask) + ret <16 x i8> %res +} + +declare <16 x i8> @llvm.x86.avx512.mask.paddus.b.128(<16 x i8>, <16 x i8>, <16 x i8>, i16) + +define <32 x i8> @test_mask_adds_epu8_rr_256(<32 x i8> %a, <32 x i8> %b) { + ;CHECK-LABEL: test_mask_adds_epu8_rr_256 + ;CHECK: vpaddusb %ymm1, %ymm0, %ymm0 + %res = call <32 x i8> @llvm.x86.avx512.mask.paddus.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 -1) + ret <32 x i8> %res +} + +define <32 x i8> @test_mask_adds_epu8_rrk_256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask) { + ;CHECK-LABEL: test_mask_adds_epu8_rrk_256 + ;CHECK: vpaddusb %ymm1, %ymm0, %ymm2 {%k1} + %res = call <32 x i8> @llvm.x86.avx512.mask.paddus.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask) + ret <32 x i8> %res +} + +define <32 x i8> @test_mask_adds_epu8_rrkz_256(<32 x i8> %a, <32 x i8> %b, i32 %mask) { + ;CHECK-LABEL: test_mask_adds_epu8_rrkz_256 + ;CHECK: vpaddusb %ymm1, %ymm0, %ymm0 {%k1} {z} + %res = call <32 x i8> @llvm.x86.avx512.mask.paddus.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 %mask) + ret <32 x i8> %res +} + +define <32 x i8> @test_mask_adds_epu8_rm_256(<32 x i8> %a, <32 x i8>* %ptr_b) { + ;CHECK-LABEL: test_mask_adds_epu8_rm_256 + ;CHECK: vpaddusb (%rdi), %ymm0, %ymm0 + %b = load <32 x i8>, <32 x i8>* %ptr_b + %res = call <32 x i8> @llvm.x86.avx512.mask.paddus.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 -1) + ret <32 x i8> %res +} + +define <32 x i8> @test_mask_adds_epu8_rmk_256(<32 x i8> %a, <32 x i8>* %ptr_b, <32 x i8> %passThru, i32 %mask) { + ;CHECK-LABEL: test_mask_adds_epu8_rmk_256 + ;CHECK: vpaddusb (%rdi), %ymm0, %ymm1 {%k1} + %b = load <32 x i8>, <32 x i8>* %ptr_b + %res = call <32 x i8> @llvm.x86.avx512.mask.paddus.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask) + ret <32 x i8> %res +} + +define <32 x i8> @test_mask_adds_epu8_rmkz_256(<32 x i8> %a, <32 x i8>* %ptr_b, i32 %mask) { + ;CHECK-LABEL: test_mask_adds_epu8_rmkz_256 + ;CHECK: vpaddusb (%rdi), %ymm0, %ymm0 {%k1} {z} + %b = load <32 x i8>, <32 x i8>* %ptr_b + %res = call <32 x i8> @llvm.x86.avx512.mask.paddus.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 %mask) + ret <32 x i8> %res +} + +declare <32 x i8> @llvm.x86.avx512.mask.paddus.b.256(<32 x i8>, <32 x i8>, <32 x i8>, i32) + +define <16 x i8> @test_mask_subs_epu8_rr_128(<16 x i8> %a, <16 x i8> %b) { + ;CHECK-LABEL: test_mask_subs_epu8_rr_128 + ;CHECK: vpsubusb %xmm1, %xmm0, %xmm0 + %res = call <16 x i8> @llvm.x86.avx512.mask.psubus.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 -1) + ret <16 x i8> %res +} + +define <16 x i8> @test_mask_subs_epu8_rrk_128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask) { + ;CHECK-LABEL: test_mask_subs_epu8_rrk_128 + ;CHECK: vpsubusb %xmm1, %xmm0, %xmm2 {%k1} + %res = call <16 x i8> @llvm.x86.avx512.mask.psubus.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask) + ret <16 x i8> %res +} + +define <16 x i8> @test_mask_subs_epu8_rrkz_128(<16 x i8> %a, <16 x i8> %b, i16 %mask) { + ;CHECK-LABEL: test_mask_subs_epu8_rrkz_128 + ;CHECK: vpsubusb %xmm1, %xmm0, %xmm0 {%k1} {z} + %res = call <16 x i8> @llvm.x86.avx512.mask.psubus.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 %mask) + ret <16 x i8> %res +} + +define <16 x i8> @test_mask_subs_epu8_rm_128(<16 x i8> %a, <16 x i8>* %ptr_b) { + ;CHECK-LABEL: test_mask_subs_epu8_rm_128 + ;CHECK: vpsubusb (%rdi), %xmm0, %xmm0 + %b = load <16 x i8>, <16 x i8>* %ptr_b + %res = call <16 x i8> @llvm.x86.avx512.mask.psubus.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 -1) + ret <16 x i8> %res +} + +define <16 x i8> @test_mask_subs_epu8_rmk_128(<16 x i8> %a, <16 x i8>* %ptr_b, <16 x i8> %passThru, i16 %mask) { + ;CHECK-LABEL: test_mask_subs_epu8_rmk_128 + ;CHECK: vpsubusb (%rdi), %xmm0, %xmm1 {%k1} + %b = load <16 x i8>, <16 x i8>* %ptr_b + %res = call <16 x i8> @llvm.x86.avx512.mask.psubus.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask) + ret <16 x i8> %res +} + +define <16 x i8> @test_mask_subs_epu8_rmkz_128(<16 x i8> %a, <16 x i8>* %ptr_b, i16 %mask) { + ;CHECK-LABEL: test_mask_subs_epu8_rmkz_128 + ;CHECK: vpsubusb (%rdi), %xmm0, %xmm0 {%k1} {z} + %b = load <16 x i8>, <16 x i8>* %ptr_b + %res = call <16 x i8> @llvm.x86.avx512.mask.psubus.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 %mask) + ret <16 x i8> %res +} + +declare <16 x i8> @llvm.x86.avx512.mask.psubus.b.128(<16 x i8>, <16 x i8>, <16 x i8>, i16) + +define <32 x i8> @test_mask_subs_epu8_rr_256(<32 x i8> %a, <32 x i8> %b) { + ;CHECK-LABEL: test_mask_subs_epu8_rr_256 + ;CHECK: vpsubusb %ymm1, %ymm0, %ymm0 + %res = call <32 x i8> @llvm.x86.avx512.mask.psubus.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 -1) + ret <32 x i8> %res +} + +define <32 x i8> @test_mask_subs_epu8_rrk_256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask) { + ;CHECK-LABEL: test_mask_subs_epu8_rrk_256 + ;CHECK: vpsubusb %ymm1, %ymm0, %ymm2 {%k1} + %res = call <32 x i8> @llvm.x86.avx512.mask.psubus.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask) + ret <32 x i8> %res +} + +define <32 x i8> @test_mask_subs_epu8_rrkz_256(<32 x i8> %a, <32 x i8> %b, i32 %mask) { + ;CHECK-LABEL: test_mask_subs_epu8_rrkz_256 + ;CHECK: vpsubusb %ymm1, %ymm0, %ymm0 {%k1} {z} + %res = call <32 x i8> @llvm.x86.avx512.mask.psubus.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 %mask) + ret <32 x i8> %res +} + +define <32 x i8> @test_mask_subs_epu8_rm_256(<32 x i8> %a, <32 x i8>* %ptr_b) { + ;CHECK-LABEL: test_mask_subs_epu8_rm_256 + ;CHECK: vpsubusb (%rdi), %ymm0, %ymm0 + %b = load <32 x i8>, <32 x i8>* %ptr_b + %res = call <32 x i8> @llvm.x86.avx512.mask.psubus.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 -1) + ret <32 x i8> %res +} + +define <32 x i8> @test_mask_subs_epu8_rmk_256(<32 x i8> %a, <32 x i8>* %ptr_b, <32 x i8> %passThru, i32 %mask) { + ;CHECK-LABEL: test_mask_subs_epu8_rmk_256 + ;CHECK: vpsubusb (%rdi), %ymm0, %ymm1 {%k1} + %b = load <32 x i8>, <32 x i8>* %ptr_b + %res = call <32 x i8> @llvm.x86.avx512.mask.psubus.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask) + ret <32 x i8> %res +} + +define <32 x i8> @test_mask_subs_epu8_rmkz_256(<32 x i8> %a, <32 x i8>* %ptr_b, i32 %mask) { + ;CHECK-LABEL: test_mask_subs_epu8_rmkz_256 + ;CHECK: vpsubusb (%rdi), %ymm0, %ymm0 {%k1} {z} + %b = load <32 x i8>, <32 x i8>* %ptr_b + %res = call <32 x i8> @llvm.x86.avx512.mask.psubus.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 %mask) + ret <32 x i8> %res +} + +declare <32 x i8> @llvm.x86.avx512.mask.psubus.b.256(<32 x i8>, <32 x i8>, <32 x i8>, i32) \ No newline at end of file diff --git a/llvm/test/MC/X86/x86-64-avx512bw.s b/llvm/test/MC/X86/x86-64-avx512bw.s index 3f4855a..8b43dcb 100644 --- a/llvm/test/MC/X86/x86-64-avx512bw.s +++ b/llvm/test/MC/X86/x86-64-avx512bw.s @@ -2335,3 +2335,904 @@ // CHECK: vpackuswb -8256(%rdx), %zmm18, %zmm23 // CHECK: encoding: [0x62,0xe1,0xed,0x40,0x67,0xba,0xc0,0xdf,0xff,0xff] vpackuswb -8256(%rdx), %zmm18, %zmm23 + +// CHECK: vpaddsb %zmm20, %zmm19, %zmm19 +// CHECK: encoding: [0x62,0xa1,0x65,0x40,0xec,0xdc] + vpaddsb %zmm20, %zmm19, %zmm19 + +// CHECK: vpaddsb %zmm20, %zmm19, %zmm19 {%k6} +// CHECK: encoding: [0x62,0xa1,0x65,0x46,0xec,0xdc] + vpaddsb %zmm20, %zmm19, %zmm19 {%k6} + +// CHECK: vpaddsb %zmm20, %zmm19, %zmm19 {%k6} {z} +// CHECK: encoding: [0x62,0xa1,0x65,0xc6,0xec,0xdc] + vpaddsb %zmm20, %zmm19, %zmm19 {%k6} {z} + +// CHECK: vpaddsb (%rcx), %zmm19, %zmm19 +// CHECK: encoding: [0x62,0xe1,0x65,0x40,0xec,0x19] + vpaddsb (%rcx), %zmm19, %zmm19 + +// CHECK: vpaddsb 291(%rax,%r14,8), %zmm19, %zmm19 +// CHECK: encoding: [0x62,0xa1,0x65,0x40,0xec,0x9c,0xf0,0x23,0x01,0x00,0x00] + vpaddsb 291(%rax,%r14,8), %zmm19, %zmm19 + +// CHECK: vpaddsb 8128(%rdx), %zmm19, %zmm19 +// CHECK: encoding: [0x62,0xe1,0x65,0x40,0xec,0x5a,0x7f] + vpaddsb 8128(%rdx), %zmm19, %zmm19 + +// CHECK: vpaddsb 8192(%rdx), %zmm19, %zmm19 +// CHECK: encoding: [0x62,0xe1,0x65,0x40,0xec,0x9a,0x00,0x20,0x00,0x00] + vpaddsb 8192(%rdx), %zmm19, %zmm19 + +// CHECK: vpaddsb -8192(%rdx), %zmm19, %zmm19 +// CHECK: encoding: [0x62,0xe1,0x65,0x40,0xec,0x5a,0x80] + vpaddsb -8192(%rdx), %zmm19, %zmm19 + +// CHECK: vpaddsb -8256(%rdx), %zmm19, %zmm19 +// CHECK: encoding: [0x62,0xe1,0x65,0x40,0xec,0x9a,0xc0,0xdf,0xff,0xff] + vpaddsb -8256(%rdx), %zmm19, %zmm19 + +// CHECK: vpaddsw %zmm22, %zmm19, %zmm20 +// CHECK: encoding: [0x62,0xa1,0x65,0x40,0xed,0xe6] + vpaddsw %zmm22, %zmm19, %zmm20 + +// CHECK: vpaddsw %zmm22, %zmm19, %zmm20 {%k2} +// CHECK: encoding: [0x62,0xa1,0x65,0x42,0xed,0xe6] + vpaddsw %zmm22, %zmm19, %zmm20 {%k2} + +// CHECK: vpaddsw %zmm22, %zmm19, %zmm20 {%k2} {z} +// CHECK: encoding: [0x62,0xa1,0x65,0xc2,0xed,0xe6] + vpaddsw %zmm22, %zmm19, %zmm20 {%k2} {z} + +// CHECK: vpaddsw (%rcx), %zmm19, %zmm20 +// CHECK: encoding: [0x62,0xe1,0x65,0x40,0xed,0x21] + vpaddsw (%rcx), %zmm19, %zmm20 + +// CHECK: vpaddsw 291(%rax,%r14,8), %zmm19, %zmm20 +// CHECK: encoding: [0x62,0xa1,0x65,0x40,0xed,0xa4,0xf0,0x23,0x01,0x00,0x00] + vpaddsw 291(%rax,%r14,8), %zmm19, %zmm20 + +// CHECK: vpaddsw 8128(%rdx), %zmm19, %zmm20 +// CHECK: encoding: [0x62,0xe1,0x65,0x40,0xed,0x62,0x7f] + vpaddsw 8128(%rdx), %zmm19, %zmm20 + +// CHECK: vpaddsw 8192(%rdx), %zmm19, %zmm20 +// CHECK: encoding: [0x62,0xe1,0x65,0x40,0xed,0xa2,0x00,0x20,0x00,0x00] + vpaddsw 8192(%rdx), %zmm19, %zmm20 + +// CHECK: vpaddsw -8192(%rdx), %zmm19, %zmm20 +// CHECK: encoding: [0x62,0xe1,0x65,0x40,0xed,0x62,0x80] + vpaddsw -8192(%rdx), %zmm19, %zmm20 + +// CHECK: vpaddsw -8256(%rdx), %zmm19, %zmm20 +// CHECK: encoding: [0x62,0xe1,0x65,0x40,0xed,0xa2,0xc0,0xdf,0xff,0xff] + vpaddsw -8256(%rdx), %zmm19, %zmm20 + +// CHECK: vpaddusb %zmm25, %zmm29, %zmm29 +// CHECK: encoding: [0x62,0x01,0x15,0x40,0xdc,0xe9] + vpaddusb %zmm25, %zmm29, %zmm29 + +// CHECK: vpaddusb %zmm25, %zmm29, %zmm29 {%k1} +// CHECK: encoding: [0x62,0x01,0x15,0x41,0xdc,0xe9] + vpaddusb %zmm25, %zmm29, %zmm29 {%k1} + +// CHECK: vpaddusb %zmm25, %zmm29, %zmm29 {%k1} {z} +// CHECK: encoding: [0x62,0x01,0x15,0xc1,0xdc,0xe9] + vpaddusb %zmm25, %zmm29, %zmm29 {%k1} {z} + +// CHECK: vpaddusb (%rcx), %zmm29, %zmm29 +// CHECK: encoding: [0x62,0x61,0x15,0x40,0xdc,0x29] + vpaddusb (%rcx), %zmm29, %zmm29 + +// CHECK: vpaddusb 291(%rax,%r14,8), %zmm29, %zmm29 +// CHECK: encoding: [0x62,0x21,0x15,0x40,0xdc,0xac,0xf0,0x23,0x01,0x00,0x00] + vpaddusb 291(%rax,%r14,8), %zmm29, %zmm29 + +// CHECK: vpaddusb 8128(%rdx), %zmm29, %zmm29 +// CHECK: encoding: [0x62,0x61,0x15,0x40,0xdc,0x6a,0x7f] + vpaddusb 8128(%rdx), %zmm29, %zmm29 + +// CHECK: vpaddusb 8192(%rdx), %zmm29, %zmm29 +// CHECK: encoding: [0x62,0x61,0x15,0x40,0xdc,0xaa,0x00,0x20,0x00,0x00] + vpaddusb 8192(%rdx), %zmm29, %zmm29 + +// CHECK: vpaddusb -8192(%rdx), %zmm29, %zmm29 +// CHECK: encoding: [0x62,0x61,0x15,0x40,0xdc,0x6a,0x80] + vpaddusb -8192(%rdx), %zmm29, %zmm29 + +// CHECK: vpaddusb -8256(%rdx), %zmm29, %zmm29 +// CHECK: encoding: [0x62,0x61,0x15,0x40,0xdc,0xaa,0xc0,0xdf,0xff,0xff] + vpaddusb -8256(%rdx), %zmm29, %zmm29 + +// CHECK: vpaddusw %zmm17, %zmm25, %zmm21 +// CHECK: encoding: [0x62,0xa1,0x35,0x40,0xdd,0xe9] + vpaddusw %zmm17, %zmm25, %zmm21 + +// CHECK: vpaddusw %zmm17, %zmm25, %zmm21 {%k4} +// CHECK: encoding: [0x62,0xa1,0x35,0x44,0xdd,0xe9] + vpaddusw %zmm17, %zmm25, %zmm21 {%k4} + +// CHECK: vpaddusw %zmm17, %zmm25, %zmm21 {%k4} {z} +// CHECK: encoding: [0x62,0xa1,0x35,0xc4,0xdd,0xe9] + vpaddusw %zmm17, %zmm25, %zmm21 {%k4} {z} + +// CHECK: vpaddusw (%rcx), %zmm25, %zmm21 +// CHECK: encoding: [0x62,0xe1,0x35,0x40,0xdd,0x29] + vpaddusw (%rcx), %zmm25, %zmm21 + +// CHECK: vpaddusw 291(%rax,%r14,8), %zmm25, %zmm21 +// CHECK: encoding: [0x62,0xa1,0x35,0x40,0xdd,0xac,0xf0,0x23,0x01,0x00,0x00] + vpaddusw 291(%rax,%r14,8), %zmm25, %zmm21 + +// CHECK: vpaddusw 8128(%rdx), %zmm25, %zmm21 +// CHECK: encoding: [0x62,0xe1,0x35,0x40,0xdd,0x6a,0x7f] + vpaddusw 8128(%rdx), %zmm25, %zmm21 + +// CHECK: vpaddusw 8192(%rdx), %zmm25, %zmm21 +// CHECK: encoding: [0x62,0xe1,0x35,0x40,0xdd,0xaa,0x00,0x20,0x00,0x00] + vpaddusw 8192(%rdx), %zmm25, %zmm21 + +// CHECK: vpaddusw -8192(%rdx), %zmm25, %zmm21 +// CHECK: encoding: [0x62,0xe1,0x35,0x40,0xdd,0x6a,0x80] + vpaddusw -8192(%rdx), %zmm25, %zmm21 + +// CHECK: vpaddusw -8256(%rdx), %zmm25, %zmm21 +// CHECK: encoding: [0x62,0xe1,0x35,0x40,0xdd,0xaa,0xc0,0xdf,0xff,0xff] + vpaddusw -8256(%rdx), %zmm25, %zmm21 + +// CHECK: vpsubsb %zmm20, %zmm24, %zmm25 +// CHECK: encoding: [0x62,0x21,0x3d,0x40,0xe8,0xcc] + vpsubsb %zmm20, %zmm24, %zmm25 + +// CHECK: vpsubsb %zmm20, %zmm24, %zmm25 {%k2} +// CHECK: encoding: [0x62,0x21,0x3d,0x42,0xe8,0xcc] + vpsubsb %zmm20, %zmm24, %zmm25 {%k2} + +// CHECK: vpsubsb %zmm20, %zmm24, %zmm25 {%k2} {z} +// CHECK: encoding: [0x62,0x21,0x3d,0xc2,0xe8,0xcc] + vpsubsb %zmm20, %zmm24, %zmm25 {%k2} {z} + +// CHECK: vpsubsb (%rcx), %zmm24, %zmm25 +// CHECK: encoding: [0x62,0x61,0x3d,0x40,0xe8,0x09] + vpsubsb (%rcx), %zmm24, %zmm25 + +// CHECK: vpsubsb 291(%rax,%r14,8), %zmm24, %zmm25 +// CHECK: encoding: [0x62,0x21,0x3d,0x40,0xe8,0x8c,0xf0,0x23,0x01,0x00,0x00] + vpsubsb 291(%rax,%r14,8), %zmm24, %zmm25 + +// CHECK: vpsubsb 8128(%rdx), %zmm24, %zmm25 +// CHECK: encoding: [0x62,0x61,0x3d,0x40,0xe8,0x4a,0x7f] + vpsubsb 8128(%rdx), %zmm24, %zmm25 + +// CHECK: vpsubsb 8192(%rdx), %zmm24, %zmm25 +// CHECK: encoding: [0x62,0x61,0x3d,0x40,0xe8,0x8a,0x00,0x20,0x00,0x00] + vpsubsb 8192(%rdx), %zmm24, %zmm25 + +// CHECK: vpsubsb -8192(%rdx), %zmm24, %zmm25 +// CHECK: encoding: [0x62,0x61,0x3d,0x40,0xe8,0x4a,0x80] + vpsubsb -8192(%rdx), %zmm24, %zmm25 + +// CHECK: vpsubsb -8256(%rdx), %zmm24, %zmm25 +// CHECK: encoding: [0x62,0x61,0x3d,0x40,0xe8,0x8a,0xc0,0xdf,0xff,0xff] + vpsubsb -8256(%rdx), %zmm24, %zmm25 + +// CHECK: vpsubsw %zmm23, %zmm22, %zmm23 +// CHECK: encoding: [0x62,0xa1,0x4d,0x40,0xe9,0xff] + vpsubsw %zmm23, %zmm22, %zmm23 + +// CHECK: vpsubsw %zmm23, %zmm22, %zmm23 {%k3} +// CHECK: encoding: [0x62,0xa1,0x4d,0x43,0xe9,0xff] + vpsubsw %zmm23, %zmm22, %zmm23 {%k3} + +// CHECK: vpsubsw %zmm23, %zmm22, %zmm23 {%k3} {z} +// CHECK: encoding: [0x62,0xa1,0x4d,0xc3,0xe9,0xff] + vpsubsw %zmm23, %zmm22, %zmm23 {%k3} {z} + +// CHECK: vpsubsw (%rcx), %zmm22, %zmm23 +// CHECK: encoding: [0x62,0xe1,0x4d,0x40,0xe9,0x39] + vpsubsw (%rcx), %zmm22, %zmm23 + +// CHECK: vpsubsw 291(%rax,%r14,8), %zmm22, %zmm23 +// CHECK: encoding: [0x62,0xa1,0x4d,0x40,0xe9,0xbc,0xf0,0x23,0x01,0x00,0x00] + vpsubsw 291(%rax,%r14,8), %zmm22, %zmm23 + +// CHECK: vpsubsw 8128(%rdx), %zmm22, %zmm23 +// CHECK: encoding: [0x62,0xe1,0x4d,0x40,0xe9,0x7a,0x7f] + vpsubsw 8128(%rdx), %zmm22, %zmm23 + +// CHECK: vpsubsw 8192(%rdx), %zmm22, %zmm23 +// CHECK: encoding: [0x62,0xe1,0x4d,0x40,0xe9,0xba,0x00,0x20,0x00,0x00] + vpsubsw 8192(%rdx), %zmm22, %zmm23 + +// CHECK: vpsubsw -8192(%rdx), %zmm22, %zmm23 +// CHECK: encoding: [0x62,0xe1,0x4d,0x40,0xe9,0x7a,0x80] + vpsubsw -8192(%rdx), %zmm22, %zmm23 + +// CHECK: vpsubsw -8256(%rdx), %zmm22, %zmm23 +// CHECK: encoding: [0x62,0xe1,0x4d,0x40,0xe9,0xba,0xc0,0xdf,0xff,0xff] + vpsubsw -8256(%rdx), %zmm22, %zmm23 + +// CHECK: vpsubusb %zmm22, %zmm21, %zmm24 +// CHECK: encoding: [0x62,0x21,0x55,0x40,0xd8,0xc6] + vpsubusb %zmm22, %zmm21, %zmm24 + +// CHECK: vpsubusb %zmm22, %zmm21, %zmm24 {%k4} +// CHECK: encoding: [0x62,0x21,0x55,0x44,0xd8,0xc6] + vpsubusb %zmm22, %zmm21, %zmm24 {%k4} + +// CHECK: vpsubusb %zmm22, %zmm21, %zmm24 {%k4} {z} +// CHECK: encoding: [0x62,0x21,0x55,0xc4,0xd8,0xc6] + vpsubusb %zmm22, %zmm21, %zmm24 {%k4} {z} + +// CHECK: vpsubusb (%rcx), %zmm21, %zmm24 +// CHECK: encoding: [0x62,0x61,0x55,0x40,0xd8,0x01] + vpsubusb (%rcx), %zmm21, %zmm24 + +// CHECK: vpsubusb 291(%rax,%r14,8), %zmm21, %zmm24 +// CHECK: encoding: [0x62,0x21,0x55,0x40,0xd8,0x84,0xf0,0x23,0x01,0x00,0x00] + vpsubusb 291(%rax,%r14,8), %zmm21, %zmm24 + +// CHECK: vpsubusb 8128(%rdx), %zmm21, %zmm24 +// CHECK: encoding: [0x62,0x61,0x55,0x40,0xd8,0x42,0x7f] + vpsubusb 8128(%rdx), %zmm21, %zmm24 + +// CHECK: vpsubusb 8192(%rdx), %zmm21, %zmm24 +// CHECK: encoding: [0x62,0x61,0x55,0x40,0xd8,0x82,0x00,0x20,0x00,0x00] + vpsubusb 8192(%rdx), %zmm21, %zmm24 + +// CHECK: vpsubusb -8192(%rdx), %zmm21, %zmm24 +// CHECK: encoding: [0x62,0x61,0x55,0x40,0xd8,0x42,0x80] + vpsubusb -8192(%rdx), %zmm21, %zmm24 + +// CHECK: vpsubusb -8256(%rdx), %zmm21, %zmm24 +// CHECK: encoding: [0x62,0x61,0x55,0x40,0xd8,0x82,0xc0,0xdf,0xff,0xff] + vpsubusb -8256(%rdx), %zmm21, %zmm24 + +// CHECK: vpsubusw %zmm23, %zmm17, %zmm25 +// CHECK: encoding: [0x62,0x21,0x75,0x40,0xd9,0xcf] + vpsubusw %zmm23, %zmm17, %zmm25 + +// CHECK: vpsubusw %zmm23, %zmm17, %zmm25 {%k1} +// CHECK: encoding: [0x62,0x21,0x75,0x41,0xd9,0xcf] + vpsubusw %zmm23, %zmm17, %zmm25 {%k1} + +// CHECK: vpsubusw %zmm23, %zmm17, %zmm25 {%k1} {z} +// CHECK: encoding: [0x62,0x21,0x75,0xc1,0xd9,0xcf] + vpsubusw %zmm23, %zmm17, %zmm25 {%k1} {z} + +// CHECK: vpsubusw (%rcx), %zmm17, %zmm25 +// CHECK: encoding: [0x62,0x61,0x75,0x40,0xd9,0x09] + vpsubusw (%rcx), %zmm17, %zmm25 + +// CHECK: vpsubusw 291(%rax,%r14,8), %zmm17, %zmm25 +// CHECK: encoding: [0x62,0x21,0x75,0x40,0xd9,0x8c,0xf0,0x23,0x01,0x00,0x00] + vpsubusw 291(%rax,%r14,8), %zmm17, %zmm25 + +// CHECK: vpsubusw 8128(%rdx), %zmm17, %zmm25 +// CHECK: encoding: [0x62,0x61,0x75,0x40,0xd9,0x4a,0x7f] + vpsubusw 8128(%rdx), %zmm17, %zmm25 + +// CHECK: vpsubusw 8192(%rdx), %zmm17, %zmm25 +// CHECK: encoding: [0x62,0x61,0x75,0x40,0xd9,0x8a,0x00,0x20,0x00,0x00] + vpsubusw 8192(%rdx), %zmm17, %zmm25 + +// CHECK: vpsubusw -8192(%rdx), %zmm17, %zmm25 +// CHECK: encoding: [0x62,0x61,0x75,0x40,0xd9,0x4a,0x80] + vpsubusw -8192(%rdx), %zmm17, %zmm25 + +// CHECK: vpsubusw -8256(%rdx), %zmm17, %zmm25 +// CHECK: encoding: [0x62,0x61,0x75,0x40,0xd9,0x8a,0xc0,0xdf,0xff,0xff] + vpsubusw -8256(%rdx), %zmm17, %zmm25 + +// CHECK: vpaddb %zmm26, %zmm29, %zmm21 +// CHECK: encoding: [0x62,0x81,0x15,0x40,0xfc,0xea] + vpaddb %zmm26, %zmm29, %zmm21 + +// CHECK: vpaddb %zmm26, %zmm29, %zmm21 {%k5} +// CHECK: encoding: [0x62,0x81,0x15,0x45,0xfc,0xea] + vpaddb %zmm26, %zmm29, %zmm21 {%k5} + +// CHECK: vpaddb %zmm26, %zmm29, %zmm21 {%k5} {z} +// CHECK: encoding: [0x62,0x81,0x15,0xc5,0xfc,0xea] + vpaddb %zmm26, %zmm29, %zmm21 {%k5} {z} + +// CHECK: vpaddb (%rcx), %zmm29, %zmm21 +// CHECK: encoding: [0x62,0xe1,0x15,0x40,0xfc,0x29] + vpaddb (%rcx), %zmm29, %zmm21 + +// CHECK: vpaddb 4660(%rax,%r14,8), %zmm29, %zmm21 +// CHECK: encoding: [0x62,0xa1,0x15,0x40,0xfc,0xac,0xf0,0x34,0x12,0x00,0x00] + vpaddb 4660(%rax,%r14,8), %zmm29, %zmm21 + +// CHECK: vpaddb 8128(%rdx), %zmm29, %zmm21 +// CHECK: encoding: [0x62,0xe1,0x15,0x40,0xfc,0x6a,0x7f] + vpaddb 8128(%rdx), %zmm29, %zmm21 + +// CHECK: vpaddb 8192(%rdx), %zmm29, %zmm21 +// CHECK: encoding: [0x62,0xe1,0x15,0x40,0xfc,0xaa,0x00,0x20,0x00,0x00] + vpaddb 8192(%rdx), %zmm29, %zmm21 + +// CHECK: vpaddb -8192(%rdx), %zmm29, %zmm21 +// CHECK: encoding: [0x62,0xe1,0x15,0x40,0xfc,0x6a,0x80] + vpaddb -8192(%rdx), %zmm29, %zmm21 + +// CHECK: vpaddb -8256(%rdx), %zmm29, %zmm21 +// CHECK: encoding: [0x62,0xe1,0x15,0x40,0xfc,0xaa,0xc0,0xdf,0xff,0xff] + vpaddb -8256(%rdx), %zmm29, %zmm21 + +// CHECK: vpaddsb %zmm19, %zmm24, %zmm18 +// CHECK: encoding: [0x62,0xa1,0x3d,0x40,0xec,0xd3] + vpaddsb %zmm19, %zmm24, %zmm18 + +// CHECK: vpaddsb %zmm19, %zmm24, %zmm18 {%k1} +// CHECK: encoding: [0x62,0xa1,0x3d,0x41,0xec,0xd3] + vpaddsb %zmm19, %zmm24, %zmm18 {%k1} + +// CHECK: vpaddsb %zmm19, %zmm24, %zmm18 {%k1} {z} +// CHECK: encoding: [0x62,0xa1,0x3d,0xc1,0xec,0xd3] + vpaddsb %zmm19, %zmm24, %zmm18 {%k1} {z} + +// CHECK: vpaddsb (%rcx), %zmm24, %zmm18 +// CHECK: encoding: [0x62,0xe1,0x3d,0x40,0xec,0x11] + vpaddsb (%rcx), %zmm24, %zmm18 + +// CHECK: vpaddsb 4660(%rax,%r14,8), %zmm24, %zmm18 +// CHECK: encoding: [0x62,0xa1,0x3d,0x40,0xec,0x94,0xf0,0x34,0x12,0x00,0x00] + vpaddsb 4660(%rax,%r14,8), %zmm24, %zmm18 + +// CHECK: vpaddsb 8128(%rdx), %zmm24, %zmm18 +// CHECK: encoding: [0x62,0xe1,0x3d,0x40,0xec,0x52,0x7f] + vpaddsb 8128(%rdx), %zmm24, %zmm18 + +// CHECK: vpaddsb 8192(%rdx), %zmm24, %zmm18 +// CHECK: encoding: [0x62,0xe1,0x3d,0x40,0xec,0x92,0x00,0x20,0x00,0x00] + vpaddsb 8192(%rdx), %zmm24, %zmm18 + +// CHECK: vpaddsb -8192(%rdx), %zmm24, %zmm18 +// CHECK: encoding: [0x62,0xe1,0x3d,0x40,0xec,0x52,0x80] + vpaddsb -8192(%rdx), %zmm24, %zmm18 + +// CHECK: vpaddsb -8256(%rdx), %zmm24, %zmm18 +// CHECK: encoding: [0x62,0xe1,0x3d,0x40,0xec,0x92,0xc0,0xdf,0xff,0xff] + vpaddsb -8256(%rdx), %zmm24, %zmm18 + +// CHECK: vpaddsw %zmm28, %zmm17, %zmm20 +// CHECK: encoding: [0x62,0x81,0x75,0x40,0xed,0xe4] + vpaddsw %zmm28, %zmm17, %zmm20 + +// CHECK: vpaddsw %zmm28, %zmm17, %zmm20 {%k2} +// CHECK: encoding: [0x62,0x81,0x75,0x42,0xed,0xe4] + vpaddsw %zmm28, %zmm17, %zmm20 {%k2} + +// CHECK: vpaddsw %zmm28, %zmm17, %zmm20 {%k2} {z} +// CHECK: encoding: [0x62,0x81,0x75,0xc2,0xed,0xe4] + vpaddsw %zmm28, %zmm17, %zmm20 {%k2} {z} + +// CHECK: vpaddsw (%rcx), %zmm17, %zmm20 +// CHECK: encoding: [0x62,0xe1,0x75,0x40,0xed,0x21] + vpaddsw (%rcx), %zmm17, %zmm20 + +// CHECK: vpaddsw 4660(%rax,%r14,8), %zmm17, %zmm20 +// CHECK: encoding: [0x62,0xa1,0x75,0x40,0xed,0xa4,0xf0,0x34,0x12,0x00,0x00] + vpaddsw 4660(%rax,%r14,8), %zmm17, %zmm20 + +// CHECK: vpaddsw 8128(%rdx), %zmm17, %zmm20 +// CHECK: encoding: [0x62,0xe1,0x75,0x40,0xed,0x62,0x7f] + vpaddsw 8128(%rdx), %zmm17, %zmm20 + +// CHECK: vpaddsw 8192(%rdx), %zmm17, %zmm20 +// CHECK: encoding: [0x62,0xe1,0x75,0x40,0xed,0xa2,0x00,0x20,0x00,0x00] + vpaddsw 8192(%rdx), %zmm17, %zmm20 + +// CHECK: vpaddsw -8192(%rdx), %zmm17, %zmm20 +// CHECK: encoding: [0x62,0xe1,0x75,0x40,0xed,0x62,0x80] + vpaddsw -8192(%rdx), %zmm17, %zmm20 + +// CHECK: vpaddsw -8256(%rdx), %zmm17, %zmm20 +// CHECK: encoding: [0x62,0xe1,0x75,0x40,0xed,0xa2,0xc0,0xdf,0xff,0xff] + vpaddsw -8256(%rdx), %zmm17, %zmm20 + +// CHECK: vpaddusb %zmm22, %zmm27, %zmm19 +// CHECK: encoding: [0x62,0xa1,0x25,0x40,0xdc,0xde] + vpaddusb %zmm22, %zmm27, %zmm19 + +// CHECK: vpaddusb %zmm22, %zmm27, %zmm19 {%k7} +// CHECK: encoding: [0x62,0xa1,0x25,0x47,0xdc,0xde] + vpaddusb %zmm22, %zmm27, %zmm19 {%k7} + +// CHECK: vpaddusb %zmm22, %zmm27, %zmm19 {%k7} {z} +// CHECK: encoding: [0x62,0xa1,0x25,0xc7,0xdc,0xde] + vpaddusb %zmm22, %zmm27, %zmm19 {%k7} {z} + +// CHECK: vpaddusb (%rcx), %zmm27, %zmm19 +// CHECK: encoding: [0x62,0xe1,0x25,0x40,0xdc,0x19] + vpaddusb (%rcx), %zmm27, %zmm19 + +// CHECK: vpaddusb 4660(%rax,%r14,8), %zmm27, %zmm19 +// CHECK: encoding: [0x62,0xa1,0x25,0x40,0xdc,0x9c,0xf0,0x34,0x12,0x00,0x00] + vpaddusb 4660(%rax,%r14,8), %zmm27, %zmm19 + +// CHECK: vpaddusb 8128(%rdx), %zmm27, %zmm19 +// CHECK: encoding: [0x62,0xe1,0x25,0x40,0xdc,0x5a,0x7f] + vpaddusb 8128(%rdx), %zmm27, %zmm19 + +// CHECK: vpaddusb 8192(%rdx), %zmm27, %zmm19 +// CHECK: encoding: [0x62,0xe1,0x25,0x40,0xdc,0x9a,0x00,0x20,0x00,0x00] + vpaddusb 8192(%rdx), %zmm27, %zmm19 + +// CHECK: vpaddusb -8192(%rdx), %zmm27, %zmm19 +// CHECK: encoding: [0x62,0xe1,0x25,0x40,0xdc,0x5a,0x80] + vpaddusb -8192(%rdx), %zmm27, %zmm19 + +// CHECK: vpaddusb -8256(%rdx), %zmm27, %zmm19 +// CHECK: encoding: [0x62,0xe1,0x25,0x40,0xdc,0x9a,0xc0,0xdf,0xff,0xff] + vpaddusb -8256(%rdx), %zmm27, %zmm19 + +// CHECK: vpaddusw %zmm23, %zmm23, %zmm27 +// CHECK: encoding: [0x62,0x21,0x45,0x40,0xdd,0xdf] + vpaddusw %zmm23, %zmm23, %zmm27 + +// CHECK: vpaddusw %zmm23, %zmm23, %zmm27 {%k7} +// CHECK: encoding: [0x62,0x21,0x45,0x47,0xdd,0xdf] + vpaddusw %zmm23, %zmm23, %zmm27 {%k7} + +// CHECK: vpaddusw %zmm23, %zmm23, %zmm27 {%k7} {z} +// CHECK: encoding: [0x62,0x21,0x45,0xc7,0xdd,0xdf] + vpaddusw %zmm23, %zmm23, %zmm27 {%k7} {z} + +// CHECK: vpaddusw (%rcx), %zmm23, %zmm27 +// CHECK: encoding: [0x62,0x61,0x45,0x40,0xdd,0x19] + vpaddusw (%rcx), %zmm23, %zmm27 + +// CHECK: vpaddusw 4660(%rax,%r14,8), %zmm23, %zmm27 +// CHECK: encoding: [0x62,0x21,0x45,0x40,0xdd,0x9c,0xf0,0x34,0x12,0x00,0x00] + vpaddusw 4660(%rax,%r14,8), %zmm23, %zmm27 + +// CHECK: vpaddusw 8128(%rdx), %zmm23, %zmm27 +// CHECK: encoding: [0x62,0x61,0x45,0x40,0xdd,0x5a,0x7f] + vpaddusw 8128(%rdx), %zmm23, %zmm27 + +// CHECK: vpaddusw 8192(%rdx), %zmm23, %zmm27 +// CHECK: encoding: [0x62,0x61,0x45,0x40,0xdd,0x9a,0x00,0x20,0x00,0x00] + vpaddusw 8192(%rdx), %zmm23, %zmm27 + +// CHECK: vpaddusw -8192(%rdx), %zmm23, %zmm27 +// CHECK: encoding: [0x62,0x61,0x45,0x40,0xdd,0x5a,0x80] + vpaddusw -8192(%rdx), %zmm23, %zmm27 + +// CHECK: vpaddusw -8256(%rdx), %zmm23, %zmm27 +// CHECK: encoding: [0x62,0x61,0x45,0x40,0xdd,0x9a,0xc0,0xdf,0xff,0xff] + vpaddusw -8256(%rdx), %zmm23, %zmm27 + +// CHECK: vpsubsb %zmm18, %zmm28, %zmm17 +// CHECK: encoding: [0x62,0xa1,0x1d,0x40,0xe8,0xca] + vpsubsb %zmm18, %zmm28, %zmm17 + +// CHECK: vpsubsb %zmm18, %zmm28, %zmm17 {%k5} +// CHECK: encoding: [0x62,0xa1,0x1d,0x45,0xe8,0xca] + vpsubsb %zmm18, %zmm28, %zmm17 {%k5} + +// CHECK: vpsubsb %zmm18, %zmm28, %zmm17 {%k5} {z} +// CHECK: encoding: [0x62,0xa1,0x1d,0xc5,0xe8,0xca] + vpsubsb %zmm18, %zmm28, %zmm17 {%k5} {z} + +// CHECK: vpsubsb (%rcx), %zmm28, %zmm17 +// CHECK: encoding: [0x62,0xe1,0x1d,0x40,0xe8,0x09] + vpsubsb (%rcx), %zmm28, %zmm17 + +// CHECK: vpsubsb 4660(%rax,%r14,8), %zmm28, %zmm17 +// CHECK: encoding: [0x62,0xa1,0x1d,0x40,0xe8,0x8c,0xf0,0x34,0x12,0x00,0x00] + vpsubsb 4660(%rax,%r14,8), %zmm28, %zmm17 + +// CHECK: vpsubsb 8128(%rdx), %zmm28, %zmm17 +// CHECK: encoding: [0x62,0xe1,0x1d,0x40,0xe8,0x4a,0x7f] + vpsubsb 8128(%rdx), %zmm28, %zmm17 + +// CHECK: vpsubsb 8192(%rdx), %zmm28, %zmm17 +// CHECK: encoding: [0x62,0xe1,0x1d,0x40,0xe8,0x8a,0x00,0x20,0x00,0x00] + vpsubsb 8192(%rdx), %zmm28, %zmm17 + +// CHECK: vpsubsb -8192(%rdx), %zmm28, %zmm17 +// CHECK: encoding: [0x62,0xe1,0x1d,0x40,0xe8,0x4a,0x80] + vpsubsb -8192(%rdx), %zmm28, %zmm17 + +// CHECK: vpsubsb -8256(%rdx), %zmm28, %zmm17 +// CHECK: encoding: [0x62,0xe1,0x1d,0x40,0xe8,0x8a,0xc0,0xdf,0xff,0xff] + vpsubsb -8256(%rdx), %zmm28, %zmm17 + +// CHECK: vpsubsw %zmm26, %zmm24, %zmm30 +// CHECK: encoding: [0x62,0x01,0x3d,0x40,0xe9,0xf2] + vpsubsw %zmm26, %zmm24, %zmm30 + +// CHECK: vpsubsw %zmm26, %zmm24, %zmm30 {%k3} +// CHECK: encoding: [0x62,0x01,0x3d,0x43,0xe9,0xf2] + vpsubsw %zmm26, %zmm24, %zmm30 {%k3} + +// CHECK: vpsubsw %zmm26, %zmm24, %zmm30 {%k3} {z} +// CHECK: encoding: [0x62,0x01,0x3d,0xc3,0xe9,0xf2] + vpsubsw %zmm26, %zmm24, %zmm30 {%k3} {z} + +// CHECK: vpsubsw (%rcx), %zmm24, %zmm30 +// CHECK: encoding: [0x62,0x61,0x3d,0x40,0xe9,0x31] + vpsubsw (%rcx), %zmm24, %zmm30 + +// CHECK: vpsubsw 4660(%rax,%r14,8), %zmm24, %zmm30 +// CHECK: encoding: [0x62,0x21,0x3d,0x40,0xe9,0xb4,0xf0,0x34,0x12,0x00,0x00] + vpsubsw 4660(%rax,%r14,8), %zmm24, %zmm30 + +// CHECK: vpsubsw 8128(%rdx), %zmm24, %zmm30 +// CHECK: encoding: [0x62,0x61,0x3d,0x40,0xe9,0x72,0x7f] + vpsubsw 8128(%rdx), %zmm24, %zmm30 + +// CHECK: vpsubsw 8192(%rdx), %zmm24, %zmm30 +// CHECK: encoding: [0x62,0x61,0x3d,0x40,0xe9,0xb2,0x00,0x20,0x00,0x00] + vpsubsw 8192(%rdx), %zmm24, %zmm30 + +// CHECK: vpsubsw -8192(%rdx), %zmm24, %zmm30 +// CHECK: encoding: [0x62,0x61,0x3d,0x40,0xe9,0x72,0x80] + vpsubsw -8192(%rdx), %zmm24, %zmm30 + +// CHECK: vpsubsw -8256(%rdx), %zmm24, %zmm30 +// CHECK: encoding: [0x62,0x61,0x3d,0x40,0xe9,0xb2,0xc0,0xdf,0xff,0xff] + vpsubsw -8256(%rdx), %zmm24, %zmm30 + +// CHECK: vpsubusb %zmm28, %zmm29, %zmm28 +// CHECK: encoding: [0x62,0x01,0x15,0x40,0xd8,0xe4] + vpsubusb %zmm28, %zmm29, %zmm28 + +// CHECK: vpsubusb %zmm28, %zmm29, %zmm28 {%k2} +// CHECK: encoding: [0x62,0x01,0x15,0x42,0xd8,0xe4] + vpsubusb %zmm28, %zmm29, %zmm28 {%k2} + +// CHECK: vpsubusb %zmm28, %zmm29, %zmm28 {%k2} {z} +// CHECK: encoding: [0x62,0x01,0x15,0xc2,0xd8,0xe4] + vpsubusb %zmm28, %zmm29, %zmm28 {%k2} {z} + +// CHECK: vpsubusb (%rcx), %zmm29, %zmm28 +// CHECK: encoding: [0x62,0x61,0x15,0x40,0xd8,0x21] + vpsubusb (%rcx), %zmm29, %zmm28 + +// CHECK: vpsubusb 4660(%rax,%r14,8), %zmm29, %zmm28 +// CHECK: encoding: [0x62,0x21,0x15,0x40,0xd8,0xa4,0xf0,0x34,0x12,0x00,0x00] + vpsubusb 4660(%rax,%r14,8), %zmm29, %zmm28 + +// CHECK: vpsubusb 8128(%rdx), %zmm29, %zmm28 +// CHECK: encoding: [0x62,0x61,0x15,0x40,0xd8,0x62,0x7f] + vpsubusb 8128(%rdx), %zmm29, %zmm28 + +// CHECK: vpsubusb 8192(%rdx), %zmm29, %zmm28 +// CHECK: encoding: [0x62,0x61,0x15,0x40,0xd8,0xa2,0x00,0x20,0x00,0x00] + vpsubusb 8192(%rdx), %zmm29, %zmm28 + +// CHECK: vpsubusb -8192(%rdx), %zmm29, %zmm28 +// CHECK: encoding: [0x62,0x61,0x15,0x40,0xd8,0x62,0x80] + vpsubusb -8192(%rdx), %zmm29, %zmm28 + +// CHECK: vpsubusb -8256(%rdx), %zmm29, %zmm28 +// CHECK: encoding: [0x62,0x61,0x15,0x40,0xd8,0xa2,0xc0,0xdf,0xff,0xff] + vpsubusb -8256(%rdx), %zmm29, %zmm28 + +// CHECK: vpsubusw %zmm19, %zmm28, %zmm22 +// CHECK: encoding: [0x62,0xa1,0x1d,0x40,0xd9,0xf3] + vpsubusw %zmm19, %zmm28, %zmm22 + +// CHECK: vpsubusw %zmm19, %zmm28, %zmm22 {%k5} +// CHECK: encoding: [0x62,0xa1,0x1d,0x45,0xd9,0xf3] + vpsubusw %zmm19, %zmm28, %zmm22 {%k5} + +// CHECK: vpsubusw %zmm19, %zmm28, %zmm22 {%k5} {z} +// CHECK: encoding: [0x62,0xa1,0x1d,0xc5,0xd9,0xf3] + vpsubusw %zmm19, %zmm28, %zmm22 {%k5} {z} + +// CHECK: vpsubusw (%rcx), %zmm28, %zmm22 +// CHECK: encoding: [0x62,0xe1,0x1d,0x40,0xd9,0x31] + vpsubusw (%rcx), %zmm28, %zmm22 + +// CHECK: vpsubusw 4660(%rax,%r14,8), %zmm28, %zmm22 +// CHECK: encoding: [0x62,0xa1,0x1d,0x40,0xd9,0xb4,0xf0,0x34,0x12,0x00,0x00] + vpsubusw 4660(%rax,%r14,8), %zmm28, %zmm22 + +// CHECK: vpsubusw 8128(%rdx), %zmm28, %zmm22 +// CHECK: encoding: [0x62,0xe1,0x1d,0x40,0xd9,0x72,0x7f] + vpsubusw 8128(%rdx), %zmm28, %zmm22 + +// CHECK: vpsubusw 8192(%rdx), %zmm28, %zmm22 +// CHECK: encoding: [0x62,0xe1,0x1d,0x40,0xd9,0xb2,0x00,0x20,0x00,0x00] + vpsubusw 8192(%rdx), %zmm28, %zmm22 + +// CHECK: vpsubusw -8192(%rdx), %zmm28, %zmm22 +// CHECK: encoding: [0x62,0xe1,0x1d,0x40,0xd9,0x72,0x80] + vpsubusw -8192(%rdx), %zmm28, %zmm22 + +// CHECK: vpsubusw -8256(%rdx), %zmm28, %zmm22 +// CHECK: encoding: [0x62,0xe1,0x1d,0x40,0xd9,0xb2,0xc0,0xdf,0xff,0xff] + vpsubusw -8256(%rdx), %zmm28, %zmm22 + +// CHECK: vpaddsb %zmm25, %zmm19, %zmm28 +// CHECK: encoding: [0x62,0x01,0x65,0x40,0xec,0xe1] + vpaddsb %zmm25, %zmm19, %zmm28 + +// CHECK: vpaddsb %zmm25, %zmm19, %zmm28 {%k4} +// CHECK: encoding: [0x62,0x01,0x65,0x44,0xec,0xe1] + vpaddsb %zmm25, %zmm19, %zmm28 {%k4} + +// CHECK: vpaddsb %zmm25, %zmm19, %zmm28 {%k4} {z} +// CHECK: encoding: [0x62,0x01,0x65,0xc4,0xec,0xe1] + vpaddsb %zmm25, %zmm19, %zmm28 {%k4} {z} + +// CHECK: vpaddsb (%rcx), %zmm19, %zmm28 +// CHECK: encoding: [0x62,0x61,0x65,0x40,0xec,0x21] + vpaddsb (%rcx), %zmm19, %zmm28 + +// CHECK: vpaddsb 291(%rax,%r14,8), %zmm19, %zmm28 +// CHECK: encoding: [0x62,0x21,0x65,0x40,0xec,0xa4,0xf0,0x23,0x01,0x00,0x00] + vpaddsb 291(%rax,%r14,8), %zmm19, %zmm28 + +// CHECK: vpaddsb 8128(%rdx), %zmm19, %zmm28 +// CHECK: encoding: [0x62,0x61,0x65,0x40,0xec,0x62,0x7f] + vpaddsb 8128(%rdx), %zmm19, %zmm28 + +// CHECK: vpaddsb 8192(%rdx), %zmm19, %zmm28 +// CHECK: encoding: [0x62,0x61,0x65,0x40,0xec,0xa2,0x00,0x20,0x00,0x00] + vpaddsb 8192(%rdx), %zmm19, %zmm28 + +// CHECK: vpaddsb -8192(%rdx), %zmm19, %zmm28 +// CHECK: encoding: [0x62,0x61,0x65,0x40,0xec,0x62,0x80] + vpaddsb -8192(%rdx), %zmm19, %zmm28 + +// CHECK: vpaddsb -8256(%rdx), %zmm19, %zmm28 +// CHECK: encoding: [0x62,0x61,0x65,0x40,0xec,0xa2,0xc0,0xdf,0xff,0xff] + vpaddsb -8256(%rdx), %zmm19, %zmm28 + +// CHECK: vpaddsw %zmm20, %zmm22, %zmm20 +// CHECK: encoding: [0x62,0xa1,0x4d,0x40,0xed,0xe4] + vpaddsw %zmm20, %zmm22, %zmm20 + +// CHECK: vpaddsw %zmm20, %zmm22, %zmm20 {%k7} +// CHECK: encoding: [0x62,0xa1,0x4d,0x47,0xed,0xe4] + vpaddsw %zmm20, %zmm22, %zmm20 {%k7} + +// CHECK: vpaddsw %zmm20, %zmm22, %zmm20 {%k7} {z} +// CHECK: encoding: [0x62,0xa1,0x4d,0xc7,0xed,0xe4] + vpaddsw %zmm20, %zmm22, %zmm20 {%k7} {z} + +// CHECK: vpaddsw (%rcx), %zmm22, %zmm20 +// CHECK: encoding: [0x62,0xe1,0x4d,0x40,0xed,0x21] + vpaddsw (%rcx), %zmm22, %zmm20 + +// CHECK: vpaddsw 291(%rax,%r14,8), %zmm22, %zmm20 +// CHECK: encoding: [0x62,0xa1,0x4d,0x40,0xed,0xa4,0xf0,0x23,0x01,0x00,0x00] + vpaddsw 291(%rax,%r14,8), %zmm22, %zmm20 + +// CHECK: vpaddsw 8128(%rdx), %zmm22, %zmm20 +// CHECK: encoding: [0x62,0xe1,0x4d,0x40,0xed,0x62,0x7f] + vpaddsw 8128(%rdx), %zmm22, %zmm20 + +// CHECK: vpaddsw 8192(%rdx), %zmm22, %zmm20 +// CHECK: encoding: [0x62,0xe1,0x4d,0x40,0xed,0xa2,0x00,0x20,0x00,0x00] + vpaddsw 8192(%rdx), %zmm22, %zmm20 + +// CHECK: vpaddsw -8192(%rdx), %zmm22, %zmm20 +// CHECK: encoding: [0x62,0xe1,0x4d,0x40,0xed,0x62,0x80] + vpaddsw -8192(%rdx), %zmm22, %zmm20 + +// CHECK: vpaddsw -8256(%rdx), %zmm22, %zmm20 +// CHECK: encoding: [0x62,0xe1,0x4d,0x40,0xed,0xa2,0xc0,0xdf,0xff,0xff] + vpaddsw -8256(%rdx), %zmm22, %zmm20 + +// CHECK: vpaddusb %zmm17, %zmm27, %zmm26 +// CHECK: encoding: [0x62,0x21,0x25,0x40,0xdc,0xd1] + vpaddusb %zmm17, %zmm27, %zmm26 + +// CHECK: vpaddusb %zmm17, %zmm27, %zmm26 {%k3} +// CHECK: encoding: [0x62,0x21,0x25,0x43,0xdc,0xd1] + vpaddusb %zmm17, %zmm27, %zmm26 {%k3} + +// CHECK: vpaddusb %zmm17, %zmm27, %zmm26 {%k3} {z} +// CHECK: encoding: [0x62,0x21,0x25,0xc3,0xdc,0xd1] + vpaddusb %zmm17, %zmm27, %zmm26 {%k3} {z} + +// CHECK: vpaddusb (%rcx), %zmm27, %zmm26 +// CHECK: encoding: [0x62,0x61,0x25,0x40,0xdc,0x11] + vpaddusb (%rcx), %zmm27, %zmm26 + +// CHECK: vpaddusb 291(%rax,%r14,8), %zmm27, %zmm26 +// CHECK: encoding: [0x62,0x21,0x25,0x40,0xdc,0x94,0xf0,0x23,0x01,0x00,0x00] + vpaddusb 291(%rax,%r14,8), %zmm27, %zmm26 + +// CHECK: vpaddusb 8128(%rdx), %zmm27, %zmm26 +// CHECK: encoding: [0x62,0x61,0x25,0x40,0xdc,0x52,0x7f] + vpaddusb 8128(%rdx), %zmm27, %zmm26 + +// CHECK: vpaddusb 8192(%rdx), %zmm27, %zmm26 +// CHECK: encoding: [0x62,0x61,0x25,0x40,0xdc,0x92,0x00,0x20,0x00,0x00] + vpaddusb 8192(%rdx), %zmm27, %zmm26 + +// CHECK: vpaddusb -8192(%rdx), %zmm27, %zmm26 +// CHECK: encoding: [0x62,0x61,0x25,0x40,0xdc,0x52,0x80] + vpaddusb -8192(%rdx), %zmm27, %zmm26 + +// CHECK: vpaddusb -8256(%rdx), %zmm27, %zmm26 +// CHECK: encoding: [0x62,0x61,0x25,0x40,0xdc,0x92,0xc0,0xdf,0xff,0xff] + vpaddusb -8256(%rdx), %zmm27, %zmm26 + +// CHECK: vpaddusw %zmm20, %zmm22, %zmm21 +// CHECK: encoding: [0x62,0xa1,0x4d,0x40,0xdd,0xec] + vpaddusw %zmm20, %zmm22, %zmm21 + +// CHECK: vpaddusw %zmm20, %zmm22, %zmm21 {%k7} +// CHECK: encoding: [0x62,0xa1,0x4d,0x47,0xdd,0xec] + vpaddusw %zmm20, %zmm22, %zmm21 {%k7} + +// CHECK: vpaddusw %zmm20, %zmm22, %zmm21 {%k7} {z} +// CHECK: encoding: [0x62,0xa1,0x4d,0xc7,0xdd,0xec] + vpaddusw %zmm20, %zmm22, %zmm21 {%k7} {z} + +// CHECK: vpaddusw (%rcx), %zmm22, %zmm21 +// CHECK: encoding: [0x62,0xe1,0x4d,0x40,0xdd,0x29] + vpaddusw (%rcx), %zmm22, %zmm21 + +// CHECK: vpaddusw 291(%rax,%r14,8), %zmm22, %zmm21 +// CHECK: encoding: [0x62,0xa1,0x4d,0x40,0xdd,0xac,0xf0,0x23,0x01,0x00,0x00] + vpaddusw 291(%rax,%r14,8), %zmm22, %zmm21 + +// CHECK: vpaddusw 8128(%rdx), %zmm22, %zmm21 +// CHECK: encoding: [0x62,0xe1,0x4d,0x40,0xdd,0x6a,0x7f] + vpaddusw 8128(%rdx), %zmm22, %zmm21 + +// CHECK: vpaddusw 8192(%rdx), %zmm22, %zmm21 +// CHECK: encoding: [0x62,0xe1,0x4d,0x40,0xdd,0xaa,0x00,0x20,0x00,0x00] + vpaddusw 8192(%rdx), %zmm22, %zmm21 + +// CHECK: vpaddusw -8192(%rdx), %zmm22, %zmm21 +// CHECK: encoding: [0x62,0xe1,0x4d,0x40,0xdd,0x6a,0x80] + vpaddusw -8192(%rdx), %zmm22, %zmm21 + +// CHECK: vpaddusw -8256(%rdx), %zmm22, %zmm21 +// CHECK: encoding: [0x62,0xe1,0x4d,0x40,0xdd,0xaa,0xc0,0xdf,0xff,0xff] + vpaddusw -8256(%rdx), %zmm22, %zmm21 + +// CHECK: vpsubsb %zmm28, %zmm21, %zmm19 +// CHECK: encoding: [0x62,0x81,0x55,0x40,0xe8,0xdc] + vpsubsb %zmm28, %zmm21, %zmm19 + +// CHECK: vpsubsb %zmm28, %zmm21, %zmm19 {%k2} +// CHECK: encoding: [0x62,0x81,0x55,0x42,0xe8,0xdc] + vpsubsb %zmm28, %zmm21, %zmm19 {%k2} + +// CHECK: vpsubsb %zmm28, %zmm21, %zmm19 {%k2} {z} +// CHECK: encoding: [0x62,0x81,0x55,0xc2,0xe8,0xdc] + vpsubsb %zmm28, %zmm21, %zmm19 {%k2} {z} + +// CHECK: vpsubsb (%rcx), %zmm21, %zmm19 +// CHECK: encoding: [0x62,0xe1,0x55,0x40,0xe8,0x19] + vpsubsb (%rcx), %zmm21, %zmm19 + +// CHECK: vpsubsb 291(%rax,%r14,8), %zmm21, %zmm19 +// CHECK: encoding: [0x62,0xa1,0x55,0x40,0xe8,0x9c,0xf0,0x23,0x01,0x00,0x00] + vpsubsb 291(%rax,%r14,8), %zmm21, %zmm19 + +// CHECK: vpsubsb 8128(%rdx), %zmm21, %zmm19 +// CHECK: encoding: [0x62,0xe1,0x55,0x40,0xe8,0x5a,0x7f] + vpsubsb 8128(%rdx), %zmm21, %zmm19 + +// CHECK: vpsubsb 8192(%rdx), %zmm21, %zmm19 +// CHECK: encoding: [0x62,0xe1,0x55,0x40,0xe8,0x9a,0x00,0x20,0x00,0x00] + vpsubsb 8192(%rdx), %zmm21, %zmm19 + +// CHECK: vpsubsb -8192(%rdx), %zmm21, %zmm19 +// CHECK: encoding: [0x62,0xe1,0x55,0x40,0xe8,0x5a,0x80] + vpsubsb -8192(%rdx), %zmm21, %zmm19 + +// CHECK: vpsubsb -8256(%rdx), %zmm21, %zmm19 +// CHECK: encoding: [0x62,0xe1,0x55,0x40,0xe8,0x9a,0xc0,0xdf,0xff,0xff] + vpsubsb -8256(%rdx), %zmm21, %zmm19 + +// CHECK: vpsubsw %zmm23, %zmm23, %zmm23 +// CHECK: encoding: [0x62,0xa1,0x45,0x40,0xe9,0xff] + vpsubsw %zmm23, %zmm23, %zmm23 + +// CHECK: vpsubsw %zmm23, %zmm23, %zmm23 {%k6} +// CHECK: encoding: [0x62,0xa1,0x45,0x46,0xe9,0xff] + vpsubsw %zmm23, %zmm23, %zmm23 {%k6} + +// CHECK: vpsubsw %zmm23, %zmm23, %zmm23 {%k6} {z} +// CHECK: encoding: [0x62,0xa1,0x45,0xc6,0xe9,0xff] + vpsubsw %zmm23, %zmm23, %zmm23 {%k6} {z} + +// CHECK: vpsubsw (%rcx), %zmm23, %zmm23 +// CHECK: encoding: [0x62,0xe1,0x45,0x40,0xe9,0x39] + vpsubsw (%rcx), %zmm23, %zmm23 + +// CHECK: vpsubsw 291(%rax,%r14,8), %zmm23, %zmm23 +// CHECK: encoding: [0x62,0xa1,0x45,0x40,0xe9,0xbc,0xf0,0x23,0x01,0x00,0x00] + vpsubsw 291(%rax,%r14,8), %zmm23, %zmm23 + +// CHECK: vpsubsw 8128(%rdx), %zmm23, %zmm23 +// CHECK: encoding: [0x62,0xe1,0x45,0x40,0xe9,0x7a,0x7f] + vpsubsw 8128(%rdx), %zmm23, %zmm23 + +// CHECK: vpsubsw 8192(%rdx), %zmm23, %zmm23 +// CHECK: encoding: [0x62,0xe1,0x45,0x40,0xe9,0xba,0x00,0x20,0x00,0x00] + vpsubsw 8192(%rdx), %zmm23, %zmm23 + +// CHECK: vpsubsw -8192(%rdx), %zmm23, %zmm23 +// CHECK: encoding: [0x62,0xe1,0x45,0x40,0xe9,0x7a,0x80] + vpsubsw -8192(%rdx), %zmm23, %zmm23 + +// CHECK: vpsubsw -8256(%rdx), %zmm23, %zmm23 +// CHECK: encoding: [0x62,0xe1,0x45,0x40,0xe9,0xba,0xc0,0xdf,0xff,0xff] + vpsubsw -8256(%rdx), %zmm23, %zmm23 + +// CHECK: vpsubusb %zmm25, %zmm29, %zmm27 +// CHECK: encoding: [0x62,0x01,0x15,0x40,0xd8,0xd9] + vpsubusb %zmm25, %zmm29, %zmm27 + +// CHECK: vpsubusb %zmm25, %zmm29, %zmm27 {%k4} +// CHECK: encoding: [0x62,0x01,0x15,0x44,0xd8,0xd9] + vpsubusb %zmm25, %zmm29, %zmm27 {%k4} + +// CHECK: vpsubusb %zmm25, %zmm29, %zmm27 {%k4} {z} +// CHECK: encoding: [0x62,0x01,0x15,0xc4,0xd8,0xd9] + vpsubusb %zmm25, %zmm29, %zmm27 {%k4} {z} + +// CHECK: vpsubusb (%rcx), %zmm29, %zmm27 +// CHECK: encoding: [0x62,0x61,0x15,0x40,0xd8,0x19] + vpsubusb (%rcx), %zmm29, %zmm27 + +// CHECK: vpsubusb 291(%rax,%r14,8), %zmm29, %zmm27 +// CHECK: encoding: [0x62,0x21,0x15,0x40,0xd8,0x9c,0xf0,0x23,0x01,0x00,0x00] + vpsubusb 291(%rax,%r14,8), %zmm29, %zmm27 + +// CHECK: vpsubusb 8128(%rdx), %zmm29, %zmm27 +// CHECK: encoding: [0x62,0x61,0x15,0x40,0xd8,0x5a,0x7f] + vpsubusb 8128(%rdx), %zmm29, %zmm27 + +// CHECK: vpsubusb 8192(%rdx), %zmm29, %zmm27 +// CHECK: encoding: [0x62,0x61,0x15,0x40,0xd8,0x9a,0x00,0x20,0x00,0x00] + vpsubusb 8192(%rdx), %zmm29, %zmm27 + +// CHECK: vpsubusb -8192(%rdx), %zmm29, %zmm27 +// CHECK: encoding: [0x62,0x61,0x15,0x40,0xd8,0x5a,0x80] + vpsubusb -8192(%rdx), %zmm29, %zmm27 + +// CHECK: vpsubusb -8256(%rdx), %zmm29, %zmm27 +// CHECK: encoding: [0x62,0x61,0x15,0x40,0xd8,0x9a,0xc0,0xdf,0xff,0xff] + vpsubusb -8256(%rdx), %zmm29, %zmm27 + +// CHECK: vpsubusw %zmm25, %zmm20, %zmm20 +// CHECK: encoding: [0x62,0x81,0x5d,0x40,0xd9,0xe1] + vpsubusw %zmm25, %zmm20, %zmm20 + +// CHECK: vpsubusw %zmm25, %zmm20, %zmm20 {%k6} +// CHECK: encoding: [0x62,0x81,0x5d,0x46,0xd9,0xe1] + vpsubusw %zmm25, %zmm20, %zmm20 {%k6} + +// CHECK: vpsubusw %zmm25, %zmm20, %zmm20 {%k6} {z} +// CHECK: encoding: [0x62,0x81,0x5d,0xc6,0xd9,0xe1] + vpsubusw %zmm25, %zmm20, %zmm20 {%k6} {z} + +// CHECK: vpsubusw (%rcx), %zmm20, %zmm20 +// CHECK: encoding: [0x62,0xe1,0x5d,0x40,0xd9,0x21] + vpsubusw (%rcx), %zmm20, %zmm20 + +// CHECK: vpsubusw 291(%rax,%r14,8), %zmm20, %zmm20 +// CHECK: encoding: [0x62,0xa1,0x5d,0x40,0xd9,0xa4,0xf0,0x23,0x01,0x00,0x00] + vpsubusw 291(%rax,%r14,8), %zmm20, %zmm20 + +// CHECK: vpsubusw 8128(%rdx), %zmm20, %zmm20 +// CHECK: encoding: [0x62,0xe1,0x5d,0x40,0xd9,0x62,0x7f] + vpsubusw 8128(%rdx), %zmm20, %zmm20 + +// CHECK: vpsubusw 8192(%rdx), %zmm20, %zmm20 +// CHECK: encoding: [0x62,0xe1,0x5d,0x40,0xd9,0xa2,0x00,0x20,0x00,0x00] + vpsubusw 8192(%rdx), %zmm20, %zmm20 + +// CHECK: vpsubusw -8192(%rdx), %zmm20, %zmm20 +// CHECK: encoding: [0x62,0xe1,0x5d,0x40,0xd9,0x62,0x80] + vpsubusw -8192(%rdx), %zmm20, %zmm20 + +// CHECK: vpsubusw -8256(%rdx), %zmm20, %zmm20 +// CHECK: encoding: [0x62,0xe1,0x5d,0x40,0xd9,0xa2,0xc0,0xdf,0xff,0xff] + vpsubusw -8256(%rdx), %zmm20, %zmm20 + diff --git a/llvm/test/MC/X86/x86-64-avx512bw_vl.s b/llvm/test/MC/X86/x86-64-avx512bw_vl.s index b3883c8..f6ee1cc 100644 --- a/llvm/test/MC/X86/x86-64-avx512bw_vl.s +++ b/llvm/test/MC/X86/x86-64-avx512bw_vl.s @@ -3327,3 +3327,2307 @@ // CHECK: vpackuswb -4128(%rdx), %ymm19, %ymm20 // CHECK: encoding: [0x62,0xe1,0xe5,0x20,0x67,0xa2,0xe0,0xef,0xff,0xff] vpackuswb -4128(%rdx), %ymm19, %ymm20 + +// CHECK: vpaddsb %xmm27, %xmm19, %xmm19 +// CHECK: encoding: [0x62,0x81,0x65,0x00,0xec,0xdb] + vpaddsb %xmm27, %xmm19, %xmm19 + +// CHECK: vpaddsb %xmm27, %xmm19, %xmm19 {%k2} +// CHECK: encoding: [0x62,0x81,0x65,0x02,0xec,0xdb] + vpaddsb %xmm27, %xmm19, %xmm19 {%k2} + +// CHECK: vpaddsb %xmm27, %xmm19, %xmm19 {%k2} {z} +// CHECK: encoding: [0x62,0x81,0x65,0x82,0xec,0xdb] + vpaddsb %xmm27, %xmm19, %xmm19 {%k2} {z} + +// CHECK: vpaddsb (%rcx), %xmm19, %xmm19 +// CHECK: encoding: [0x62,0xe1,0x65,0x00,0xec,0x19] + vpaddsb (%rcx), %xmm19, %xmm19 + +// CHECK: vpaddsb 291(%rax,%r14,8), %xmm19, %xmm19 +// CHECK: encoding: [0x62,0xa1,0x65,0x00,0xec,0x9c,0xf0,0x23,0x01,0x00,0x00] + vpaddsb 291(%rax,%r14,8), %xmm19, %xmm19 + +// CHECK: vpaddsb 2032(%rdx), %xmm19, %xmm19 +// CHECK: encoding: [0x62,0xe1,0x65,0x00,0xec,0x5a,0x7f] + vpaddsb 2032(%rdx), %xmm19, %xmm19 + +// CHECK: vpaddsb 2048(%rdx), %xmm19, %xmm19 +// CHECK: encoding: [0x62,0xe1,0x65,0x00,0xec,0x9a,0x00,0x08,0x00,0x00] + vpaddsb 2048(%rdx), %xmm19, %xmm19 + +// CHECK: vpaddsb -2048(%rdx), %xmm19, %xmm19 +// CHECK: encoding: [0x62,0xe1,0x65,0x00,0xec,0x5a,0x80] + vpaddsb -2048(%rdx), %xmm19, %xmm19 + +// CHECK: vpaddsb -2064(%rdx), %xmm19, %xmm19 +// CHECK: encoding: [0x62,0xe1,0x65,0x00,0xec,0x9a,0xf0,0xf7,0xff,0xff] + vpaddsb -2064(%rdx), %xmm19, %xmm19 + +// CHECK: vpaddsb %ymm27, %ymm21, %ymm19 +// CHECK: encoding: [0x62,0x81,0x55,0x20,0xec,0xdb] + vpaddsb %ymm27, %ymm21, %ymm19 + +// CHECK: vpaddsb %ymm27, %ymm21, %ymm19 {%k4} +// CHECK: encoding: [0x62,0x81,0x55,0x24,0xec,0xdb] + vpaddsb %ymm27, %ymm21, %ymm19 {%k4} + +// CHECK: vpaddsb %ymm27, %ymm21, %ymm19 {%k4} {z} +// CHECK: encoding: [0x62,0x81,0x55,0xa4,0xec,0xdb] + vpaddsb %ymm27, %ymm21, %ymm19 {%k4} {z} + +// CHECK: vpaddsb (%rcx), %ymm21, %ymm19 +// CHECK: encoding: [0x62,0xe1,0x55,0x20,0xec,0x19] + vpaddsb (%rcx), %ymm21, %ymm19 + +// CHECK: vpaddsb 291(%rax,%r14,8), %ymm21, %ymm19 +// CHECK: encoding: [0x62,0xa1,0x55,0x20,0xec,0x9c,0xf0,0x23,0x01,0x00,0x00] + vpaddsb 291(%rax,%r14,8), %ymm21, %ymm19 + +// CHECK: vpaddsb 4064(%rdx), %ymm21, %ymm19 +// CHECK: encoding: [0x62,0xe1,0x55,0x20,0xec,0x5a,0x7f] + vpaddsb 4064(%rdx), %ymm21, %ymm19 + +// CHECK: vpaddsb 4096(%rdx), %ymm21, %ymm19 +// CHECK: encoding: [0x62,0xe1,0x55,0x20,0xec,0x9a,0x00,0x10,0x00,0x00] + vpaddsb 4096(%rdx), %ymm21, %ymm19 + +// CHECK: vpaddsb -4096(%rdx), %ymm21, %ymm19 +// CHECK: encoding: [0x62,0xe1,0x55,0x20,0xec,0x5a,0x80] + vpaddsb -4096(%rdx), %ymm21, %ymm19 + +// CHECK: vpaddsb -4128(%rdx), %ymm21, %ymm19 +// CHECK: encoding: [0x62,0xe1,0x55,0x20,0xec,0x9a,0xe0,0xef,0xff,0xff] + vpaddsb -4128(%rdx), %ymm21, %ymm19 + +// CHECK: vpaddsw %xmm21, %xmm27, %xmm29 +// CHECK: encoding: [0x62,0x21,0x25,0x00,0xed,0xed] + vpaddsw %xmm21, %xmm27, %xmm29 + +// CHECK: vpaddsw %xmm21, %xmm27, %xmm29 {%k1} +// CHECK: encoding: [0x62,0x21,0x25,0x01,0xed,0xed] + vpaddsw %xmm21, %xmm27, %xmm29 {%k1} + +// CHECK: vpaddsw %xmm21, %xmm27, %xmm29 {%k1} {z} +// CHECK: encoding: [0x62,0x21,0x25,0x81,0xed,0xed] + vpaddsw %xmm21, %xmm27, %xmm29 {%k1} {z} + +// CHECK: vpaddsw (%rcx), %xmm27, %xmm29 +// CHECK: encoding: [0x62,0x61,0x25,0x00,0xed,0x29] + vpaddsw (%rcx), %xmm27, %xmm29 + +// CHECK: vpaddsw 291(%rax,%r14,8), %xmm27, %xmm29 +// CHECK: encoding: [0x62,0x21,0x25,0x00,0xed,0xac,0xf0,0x23,0x01,0x00,0x00] + vpaddsw 291(%rax,%r14,8), %xmm27, %xmm29 + +// CHECK: vpaddsw 2032(%rdx), %xmm27, %xmm29 +// CHECK: encoding: [0x62,0x61,0x25,0x00,0xed,0x6a,0x7f] + vpaddsw 2032(%rdx), %xmm27, %xmm29 + +// CHECK: vpaddsw 2048(%rdx), %xmm27, %xmm29 +// CHECK: encoding: [0x62,0x61,0x25,0x00,0xed,0xaa,0x00,0x08,0x00,0x00] + vpaddsw 2048(%rdx), %xmm27, %xmm29 + +// CHECK: vpaddsw -2048(%rdx), %xmm27, %xmm29 +// CHECK: encoding: [0x62,0x61,0x25,0x00,0xed,0x6a,0x80] + vpaddsw -2048(%rdx), %xmm27, %xmm29 + +// CHECK: vpaddsw -2064(%rdx), %xmm27, %xmm29 +// CHECK: encoding: [0x62,0x61,0x25,0x00,0xed,0xaa,0xf0,0xf7,0xff,0xff] + vpaddsw -2064(%rdx), %xmm27, %xmm29 + +// CHECK: vpaddsw %ymm26, %ymm17, %ymm22 +// CHECK: encoding: [0x62,0x81,0x75,0x20,0xed,0xf2] + vpaddsw %ymm26, %ymm17, %ymm22 + +// CHECK: vpaddsw %ymm26, %ymm17, %ymm22 {%k3} +// CHECK: encoding: [0x62,0x81,0x75,0x23,0xed,0xf2] + vpaddsw %ymm26, %ymm17, %ymm22 {%k3} + +// CHECK: vpaddsw %ymm26, %ymm17, %ymm22 {%k3} {z} +// CHECK: encoding: [0x62,0x81,0x75,0xa3,0xed,0xf2] + vpaddsw %ymm26, %ymm17, %ymm22 {%k3} {z} + +// CHECK: vpaddsw (%rcx), %ymm17, %ymm22 +// CHECK: encoding: [0x62,0xe1,0x75,0x20,0xed,0x31] + vpaddsw (%rcx), %ymm17, %ymm22 + +// CHECK: vpaddsw 291(%rax,%r14,8), %ymm17, %ymm22 +// CHECK: encoding: [0x62,0xa1,0x75,0x20,0xed,0xb4,0xf0,0x23,0x01,0x00,0x00] + vpaddsw 291(%rax,%r14,8), %ymm17, %ymm22 + +// CHECK: vpaddsw 4064(%rdx), %ymm17, %ymm22 +// CHECK: encoding: [0x62,0xe1,0x75,0x20,0xed,0x72,0x7f] + vpaddsw 4064(%rdx), %ymm17, %ymm22 + +// CHECK: vpaddsw 4096(%rdx), %ymm17, %ymm22 +// CHECK: encoding: [0x62,0xe1,0x75,0x20,0xed,0xb2,0x00,0x10,0x00,0x00] + vpaddsw 4096(%rdx), %ymm17, %ymm22 + +// CHECK: vpaddsw -4096(%rdx), %ymm17, %ymm22 +// CHECK: encoding: [0x62,0xe1,0x75,0x20,0xed,0x72,0x80] + vpaddsw -4096(%rdx), %ymm17, %ymm22 + +// CHECK: vpaddsw -4128(%rdx), %ymm17, %ymm22 +// CHECK: encoding: [0x62,0xe1,0x75,0x20,0xed,0xb2,0xe0,0xef,0xff,0xff] + vpaddsw -4128(%rdx), %ymm17, %ymm22 + +// CHECK: vpaddusb %xmm26, %xmm28, %xmm21 +// CHECK: encoding: [0x62,0x81,0x1d,0x00,0xdc,0xea] + vpaddusb %xmm26, %xmm28, %xmm21 + +// CHECK: vpaddusb %xmm26, %xmm28, %xmm21 {%k5} +// CHECK: encoding: [0x62,0x81,0x1d,0x05,0xdc,0xea] + vpaddusb %xmm26, %xmm28, %xmm21 {%k5} + +// CHECK: vpaddusb %xmm26, %xmm28, %xmm21 {%k5} {z} +// CHECK: encoding: [0x62,0x81,0x1d,0x85,0xdc,0xea] + vpaddusb %xmm26, %xmm28, %xmm21 {%k5} {z} + +// CHECK: vpaddusb (%rcx), %xmm28, %xmm21 +// CHECK: encoding: [0x62,0xe1,0x1d,0x00,0xdc,0x29] + vpaddusb (%rcx), %xmm28, %xmm21 + +// CHECK: vpaddusb 291(%rax,%r14,8), %xmm28, %xmm21 +// CHECK: encoding: [0x62,0xa1,0x1d,0x00,0xdc,0xac,0xf0,0x23,0x01,0x00,0x00] + vpaddusb 291(%rax,%r14,8), %xmm28, %xmm21 + +// CHECK: vpaddusb 2032(%rdx), %xmm28, %xmm21 +// CHECK: encoding: [0x62,0xe1,0x1d,0x00,0xdc,0x6a,0x7f] + vpaddusb 2032(%rdx), %xmm28, %xmm21 + +// CHECK: vpaddusb 2048(%rdx), %xmm28, %xmm21 +// CHECK: encoding: [0x62,0xe1,0x1d,0x00,0xdc,0xaa,0x00,0x08,0x00,0x00] + vpaddusb 2048(%rdx), %xmm28, %xmm21 + +// CHECK: vpaddusb -2048(%rdx), %xmm28, %xmm21 +// CHECK: encoding: [0x62,0xe1,0x1d,0x00,0xdc,0x6a,0x80] + vpaddusb -2048(%rdx), %xmm28, %xmm21 + +// CHECK: vpaddusb -2064(%rdx), %xmm28, %xmm21 +// CHECK: encoding: [0x62,0xe1,0x1d,0x00,0xdc,0xaa,0xf0,0xf7,0xff,0xff] + vpaddusb -2064(%rdx), %xmm28, %xmm21 + +// CHECK: vpaddusb %ymm17, %ymm23, %ymm20 +// CHECK: encoding: [0x62,0xa1,0x45,0x20,0xdc,0xe1] + vpaddusb %ymm17, %ymm23, %ymm20 + +// CHECK: vpaddusb %ymm17, %ymm23, %ymm20 {%k1} +// CHECK: encoding: [0x62,0xa1,0x45,0x21,0xdc,0xe1] + vpaddusb %ymm17, %ymm23, %ymm20 {%k1} + +// CHECK: vpaddusb %ymm17, %ymm23, %ymm20 {%k1} {z} +// CHECK: encoding: [0x62,0xa1,0x45,0xa1,0xdc,0xe1] + vpaddusb %ymm17, %ymm23, %ymm20 {%k1} {z} + +// CHECK: vpaddusb (%rcx), %ymm23, %ymm20 +// CHECK: encoding: [0x62,0xe1,0x45,0x20,0xdc,0x21] + vpaddusb (%rcx), %ymm23, %ymm20 + +// CHECK: vpaddusb 291(%rax,%r14,8), %ymm23, %ymm20 +// CHECK: encoding: [0x62,0xa1,0x45,0x20,0xdc,0xa4,0xf0,0x23,0x01,0x00,0x00] + vpaddusb 291(%rax,%r14,8), %ymm23, %ymm20 + +// CHECK: vpaddusb 4064(%rdx), %ymm23, %ymm20 +// CHECK: encoding: [0x62,0xe1,0x45,0x20,0xdc,0x62,0x7f] + vpaddusb 4064(%rdx), %ymm23, %ymm20 + +// CHECK: vpaddusb 4096(%rdx), %ymm23, %ymm20 +// CHECK: encoding: [0x62,0xe1,0x45,0x20,0xdc,0xa2,0x00,0x10,0x00,0x00] + vpaddusb 4096(%rdx), %ymm23, %ymm20 + +// CHECK: vpaddusb -4096(%rdx), %ymm23, %ymm20 +// CHECK: encoding: [0x62,0xe1,0x45,0x20,0xdc,0x62,0x80] + vpaddusb -4096(%rdx), %ymm23, %ymm20 + +// CHECK: vpaddusb -4128(%rdx), %ymm23, %ymm20 +// CHECK: encoding: [0x62,0xe1,0x45,0x20,0xdc,0xa2,0xe0,0xef,0xff,0xff] + vpaddusb -4128(%rdx), %ymm23, %ymm20 + +// CHECK: vpaddusw %xmm24, %xmm27, %xmm23 +// CHECK: encoding: [0x62,0x81,0x25,0x00,0xdd,0xf8] + vpaddusw %xmm24, %xmm27, %xmm23 + +// CHECK: vpaddusw %xmm24, %xmm27, %xmm23 {%k2} +// CHECK: encoding: [0x62,0x81,0x25,0x02,0xdd,0xf8] + vpaddusw %xmm24, %xmm27, %xmm23 {%k2} + +// CHECK: vpaddusw %xmm24, %xmm27, %xmm23 {%k2} {z} +// CHECK: encoding: [0x62,0x81,0x25,0x82,0xdd,0xf8] + vpaddusw %xmm24, %xmm27, %xmm23 {%k2} {z} + +// CHECK: vpaddusw (%rcx), %xmm27, %xmm23 +// CHECK: encoding: [0x62,0xe1,0x25,0x00,0xdd,0x39] + vpaddusw (%rcx), %xmm27, %xmm23 + +// CHECK: vpaddusw 291(%rax,%r14,8), %xmm27, %xmm23 +// CHECK: encoding: [0x62,0xa1,0x25,0x00,0xdd,0xbc,0xf0,0x23,0x01,0x00,0x00] + vpaddusw 291(%rax,%r14,8), %xmm27, %xmm23 + +// CHECK: vpaddusw 2032(%rdx), %xmm27, %xmm23 +// CHECK: encoding: [0x62,0xe1,0x25,0x00,0xdd,0x7a,0x7f] + vpaddusw 2032(%rdx), %xmm27, %xmm23 + +// CHECK: vpaddusw 2048(%rdx), %xmm27, %xmm23 +// CHECK: encoding: [0x62,0xe1,0x25,0x00,0xdd,0xba,0x00,0x08,0x00,0x00] + vpaddusw 2048(%rdx), %xmm27, %xmm23 + +// CHECK: vpaddusw -2048(%rdx), %xmm27, %xmm23 +// CHECK: encoding: [0x62,0xe1,0x25,0x00,0xdd,0x7a,0x80] + vpaddusw -2048(%rdx), %xmm27, %xmm23 + +// CHECK: vpaddusw -2064(%rdx), %xmm27, %xmm23 +// CHECK: encoding: [0x62,0xe1,0x25,0x00,0xdd,0xba,0xf0,0xf7,0xff,0xff] + vpaddusw -2064(%rdx), %xmm27, %xmm23 + +// CHECK: vpaddusw %ymm17, %ymm22, %ymm23 +// CHECK: encoding: [0x62,0xa1,0x4d,0x20,0xdd,0xf9] + vpaddusw %ymm17, %ymm22, %ymm23 + +// CHECK: vpaddusw %ymm17, %ymm22, %ymm23 {%k1} +// CHECK: encoding: [0x62,0xa1,0x4d,0x21,0xdd,0xf9] + vpaddusw %ymm17, %ymm22, %ymm23 {%k1} + +// CHECK: vpaddusw %ymm17, %ymm22, %ymm23 {%k1} {z} +// CHECK: encoding: [0x62,0xa1,0x4d,0xa1,0xdd,0xf9] + vpaddusw %ymm17, %ymm22, %ymm23 {%k1} {z} + +// CHECK: vpaddusw (%rcx), %ymm22, %ymm23 +// CHECK: encoding: [0x62,0xe1,0x4d,0x20,0xdd,0x39] + vpaddusw (%rcx), %ymm22, %ymm23 + +// CHECK: vpaddusw 291(%rax,%r14,8), %ymm22, %ymm23 +// CHECK: encoding: [0x62,0xa1,0x4d,0x20,0xdd,0xbc,0xf0,0x23,0x01,0x00,0x00] + vpaddusw 291(%rax,%r14,8), %ymm22, %ymm23 + +// CHECK: vpaddusw 4064(%rdx), %ymm22, %ymm23 +// CHECK: encoding: [0x62,0xe1,0x4d,0x20,0xdd,0x7a,0x7f] + vpaddusw 4064(%rdx), %ymm22, %ymm23 + +// CHECK: vpaddusw 4096(%rdx), %ymm22, %ymm23 +// CHECK: encoding: [0x62,0xe1,0x4d,0x20,0xdd,0xba,0x00,0x10,0x00,0x00] + vpaddusw 4096(%rdx), %ymm22, %ymm23 + +// CHECK: vpaddusw -4096(%rdx), %ymm22, %ymm23 +// CHECK: encoding: [0x62,0xe1,0x4d,0x20,0xdd,0x7a,0x80] + vpaddusw -4096(%rdx), %ymm22, %ymm23 + +// CHECK: vpaddusw -4128(%rdx), %ymm22, %ymm23 +// CHECK: encoding: [0x62,0xe1,0x4d,0x20,0xdd,0xba,0xe0,0xef,0xff,0xff] + vpaddusw -4128(%rdx), %ymm22, %ymm23 + +// CHECK: vpsubsb %xmm27, %xmm17, %xmm17 +// CHECK: encoding: [0x62,0x81,0x75,0x00,0xe8,0xcb] + vpsubsb %xmm27, %xmm17, %xmm17 + +// CHECK: vpsubsb %xmm27, %xmm17, %xmm17 {%k7} +// CHECK: encoding: [0x62,0x81,0x75,0x07,0xe8,0xcb] + vpsubsb %xmm27, %xmm17, %xmm17 {%k7} + +// CHECK: vpsubsb %xmm27, %xmm17, %xmm17 {%k7} {z} +// CHECK: encoding: [0x62,0x81,0x75,0x87,0xe8,0xcb] + vpsubsb %xmm27, %xmm17, %xmm17 {%k7} {z} + +// CHECK: vpsubsb (%rcx), %xmm17, %xmm17 +// CHECK: encoding: [0x62,0xe1,0x75,0x00,0xe8,0x09] + vpsubsb (%rcx), %xmm17, %xmm17 + +// CHECK: vpsubsb 291(%rax,%r14,8), %xmm17, %xmm17 +// CHECK: encoding: [0x62,0xa1,0x75,0x00,0xe8,0x8c,0xf0,0x23,0x01,0x00,0x00] + vpsubsb 291(%rax,%r14,8), %xmm17, %xmm17 + +// CHECK: vpsubsb 2032(%rdx), %xmm17, %xmm17 +// CHECK: encoding: [0x62,0xe1,0x75,0x00,0xe8,0x4a,0x7f] + vpsubsb 2032(%rdx), %xmm17, %xmm17 + +// CHECK: vpsubsb 2048(%rdx), %xmm17, %xmm17 +// CHECK: encoding: [0x62,0xe1,0x75,0x00,0xe8,0x8a,0x00,0x08,0x00,0x00] + vpsubsb 2048(%rdx), %xmm17, %xmm17 + +// CHECK: vpsubsb -2048(%rdx), %xmm17, %xmm17 +// CHECK: encoding: [0x62,0xe1,0x75,0x00,0xe8,0x4a,0x80] + vpsubsb -2048(%rdx), %xmm17, %xmm17 + +// CHECK: vpsubsb -2064(%rdx), %xmm17, %xmm17 +// CHECK: encoding: [0x62,0xe1,0x75,0x00,0xe8,0x8a,0xf0,0xf7,0xff,0xff] + vpsubsb -2064(%rdx), %xmm17, %xmm17 + +// CHECK: vpsubsb %ymm27, %ymm25, %ymm17 +// CHECK: encoding: [0x62,0x81,0x35,0x20,0xe8,0xcb] + vpsubsb %ymm27, %ymm25, %ymm17 + +// CHECK: vpsubsb %ymm27, %ymm25, %ymm17 {%k2} +// CHECK: encoding: [0x62,0x81,0x35,0x22,0xe8,0xcb] + vpsubsb %ymm27, %ymm25, %ymm17 {%k2} + +// CHECK: vpsubsb %ymm27, %ymm25, %ymm17 {%k2} {z} +// CHECK: encoding: [0x62,0x81,0x35,0xa2,0xe8,0xcb] + vpsubsb %ymm27, %ymm25, %ymm17 {%k2} {z} + +// CHECK: vpsubsb (%rcx), %ymm25, %ymm17 +// CHECK: encoding: [0x62,0xe1,0x35,0x20,0xe8,0x09] + vpsubsb (%rcx), %ymm25, %ymm17 + +// CHECK: vpsubsb 291(%rax,%r14,8), %ymm25, %ymm17 +// CHECK: encoding: [0x62,0xa1,0x35,0x20,0xe8,0x8c,0xf0,0x23,0x01,0x00,0x00] + vpsubsb 291(%rax,%r14,8), %ymm25, %ymm17 + +// CHECK: vpsubsb 4064(%rdx), %ymm25, %ymm17 +// CHECK: encoding: [0x62,0xe1,0x35,0x20,0xe8,0x4a,0x7f] + vpsubsb 4064(%rdx), %ymm25, %ymm17 + +// CHECK: vpsubsb 4096(%rdx), %ymm25, %ymm17 +// CHECK: encoding: [0x62,0xe1,0x35,0x20,0xe8,0x8a,0x00,0x10,0x00,0x00] + vpsubsb 4096(%rdx), %ymm25, %ymm17 + +// CHECK: vpsubsb -4096(%rdx), %ymm25, %ymm17 +// CHECK: encoding: [0x62,0xe1,0x35,0x20,0xe8,0x4a,0x80] + vpsubsb -4096(%rdx), %ymm25, %ymm17 + +// CHECK: vpsubsb -4128(%rdx), %ymm25, %ymm17 +// CHECK: encoding: [0x62,0xe1,0x35,0x20,0xe8,0x8a,0xe0,0xef,0xff,0xff] + vpsubsb -4128(%rdx), %ymm25, %ymm17 + +// CHECK: vpsubsw %xmm23, %xmm26, %xmm30 +// CHECK: encoding: [0x62,0x21,0x2d,0x00,0xe9,0xf7] + vpsubsw %xmm23, %xmm26, %xmm30 + +// CHECK: vpsubsw %xmm23, %xmm26, %xmm30 {%k7} +// CHECK: encoding: [0x62,0x21,0x2d,0x07,0xe9,0xf7] + vpsubsw %xmm23, %xmm26, %xmm30 {%k7} + +// CHECK: vpsubsw %xmm23, %xmm26, %xmm30 {%k7} {z} +// CHECK: encoding: [0x62,0x21,0x2d,0x87,0xe9,0xf7] + vpsubsw %xmm23, %xmm26, %xmm30 {%k7} {z} + +// CHECK: vpsubsw (%rcx), %xmm26, %xmm30 +// CHECK: encoding: [0x62,0x61,0x2d,0x00,0xe9,0x31] + vpsubsw (%rcx), %xmm26, %xmm30 + +// CHECK: vpsubsw 291(%rax,%r14,8), %xmm26, %xmm30 +// CHECK: encoding: [0x62,0x21,0x2d,0x00,0xe9,0xb4,0xf0,0x23,0x01,0x00,0x00] + vpsubsw 291(%rax,%r14,8), %xmm26, %xmm30 + +// CHECK: vpsubsw 2032(%rdx), %xmm26, %xmm30 +// CHECK: encoding: [0x62,0x61,0x2d,0x00,0xe9,0x72,0x7f] + vpsubsw 2032(%rdx), %xmm26, %xmm30 + +// CHECK: vpsubsw 2048(%rdx), %xmm26, %xmm30 +// CHECK: encoding: [0x62,0x61,0x2d,0x00,0xe9,0xb2,0x00,0x08,0x00,0x00] + vpsubsw 2048(%rdx), %xmm26, %xmm30 + +// CHECK: vpsubsw -2048(%rdx), %xmm26, %xmm30 +// CHECK: encoding: [0x62,0x61,0x2d,0x00,0xe9,0x72,0x80] + vpsubsw -2048(%rdx), %xmm26, %xmm30 + +// CHECK: vpsubsw -2064(%rdx), %xmm26, %xmm30 +// CHECK: encoding: [0x62,0x61,0x2d,0x00,0xe9,0xb2,0xf0,0xf7,0xff,0xff] + vpsubsw -2064(%rdx), %xmm26, %xmm30 + +// CHECK: vpsubsw %ymm17, %ymm29, %ymm19 +// CHECK: encoding: [0x62,0xa1,0x15,0x20,0xe9,0xd9] + vpsubsw %ymm17, %ymm29, %ymm19 + +// CHECK: vpsubsw %ymm17, %ymm29, %ymm19 {%k2} +// CHECK: encoding: [0x62,0xa1,0x15,0x22,0xe9,0xd9] + vpsubsw %ymm17, %ymm29, %ymm19 {%k2} + +// CHECK: vpsubsw %ymm17, %ymm29, %ymm19 {%k2} {z} +// CHECK: encoding: [0x62,0xa1,0x15,0xa2,0xe9,0xd9] + vpsubsw %ymm17, %ymm29, %ymm19 {%k2} {z} + +// CHECK: vpsubsw (%rcx), %ymm29, %ymm19 +// CHECK: encoding: [0x62,0xe1,0x15,0x20,0xe9,0x19] + vpsubsw (%rcx), %ymm29, %ymm19 + +// CHECK: vpsubsw 291(%rax,%r14,8), %ymm29, %ymm19 +// CHECK: encoding: [0x62,0xa1,0x15,0x20,0xe9,0x9c,0xf0,0x23,0x01,0x00,0x00] + vpsubsw 291(%rax,%r14,8), %ymm29, %ymm19 + +// CHECK: vpsubsw 4064(%rdx), %ymm29, %ymm19 +// CHECK: encoding: [0x62,0xe1,0x15,0x20,0xe9,0x5a,0x7f] + vpsubsw 4064(%rdx), %ymm29, %ymm19 + +// CHECK: vpsubsw 4096(%rdx), %ymm29, %ymm19 +// CHECK: encoding: [0x62,0xe1,0x15,0x20,0xe9,0x9a,0x00,0x10,0x00,0x00] + vpsubsw 4096(%rdx), %ymm29, %ymm19 + +// CHECK: vpsubsw -4096(%rdx), %ymm29, %ymm19 +// CHECK: encoding: [0x62,0xe1,0x15,0x20,0xe9,0x5a,0x80] + vpsubsw -4096(%rdx), %ymm29, %ymm19 + +// CHECK: vpsubsw -4128(%rdx), %ymm29, %ymm19 +// CHECK: encoding: [0x62,0xe1,0x15,0x20,0xe9,0x9a,0xe0,0xef,0xff,0xff] + vpsubsw -4128(%rdx), %ymm29, %ymm19 + +// CHECK: vpsubusb %xmm23, %xmm27, %xmm17 +// CHECK: encoding: [0x62,0xa1,0x25,0x00,0xd8,0xcf] + vpsubusb %xmm23, %xmm27, %xmm17 + +// CHECK: vpsubusb %xmm23, %xmm27, %xmm17 {%k6} +// CHECK: encoding: [0x62,0xa1,0x25,0x06,0xd8,0xcf] + vpsubusb %xmm23, %xmm27, %xmm17 {%k6} + +// CHECK: vpsubusb %xmm23, %xmm27, %xmm17 {%k6} {z} +// CHECK: encoding: [0x62,0xa1,0x25,0x86,0xd8,0xcf] + vpsubusb %xmm23, %xmm27, %xmm17 {%k6} {z} + +// CHECK: vpsubusb (%rcx), %xmm27, %xmm17 +// CHECK: encoding: [0x62,0xe1,0x25,0x00,0xd8,0x09] + vpsubusb (%rcx), %xmm27, %xmm17 + +// CHECK: vpsubusb 291(%rax,%r14,8), %xmm27, %xmm17 +// CHECK: encoding: [0x62,0xa1,0x25,0x00,0xd8,0x8c,0xf0,0x23,0x01,0x00,0x00] + vpsubusb 291(%rax,%r14,8), %xmm27, %xmm17 + +// CHECK: vpsubusb 2032(%rdx), %xmm27, %xmm17 +// CHECK: encoding: [0x62,0xe1,0x25,0x00,0xd8,0x4a,0x7f] + vpsubusb 2032(%rdx), %xmm27, %xmm17 + +// CHECK: vpsubusb 2048(%rdx), %xmm27, %xmm17 +// CHECK: encoding: [0x62,0xe1,0x25,0x00,0xd8,0x8a,0x00,0x08,0x00,0x00] + vpsubusb 2048(%rdx), %xmm27, %xmm17 + +// CHECK: vpsubusb -2048(%rdx), %xmm27, %xmm17 +// CHECK: encoding: [0x62,0xe1,0x25,0x00,0xd8,0x4a,0x80] + vpsubusb -2048(%rdx), %xmm27, %xmm17 + +// CHECK: vpsubusb -2064(%rdx), %xmm27, %xmm17 +// CHECK: encoding: [0x62,0xe1,0x25,0x00,0xd8,0x8a,0xf0,0xf7,0xff,0xff] + vpsubusb -2064(%rdx), %xmm27, %xmm17 + +// CHECK: vpsubusb %ymm18, %ymm21, %ymm17 +// CHECK: encoding: [0x62,0xa1,0x55,0x20,0xd8,0xca] + vpsubusb %ymm18, %ymm21, %ymm17 + +// CHECK: vpsubusb %ymm18, %ymm21, %ymm17 {%k5} +// CHECK: encoding: [0x62,0xa1,0x55,0x25,0xd8,0xca] + vpsubusb %ymm18, %ymm21, %ymm17 {%k5} + +// CHECK: vpsubusb %ymm18, %ymm21, %ymm17 {%k5} {z} +// CHECK: encoding: [0x62,0xa1,0x55,0xa5,0xd8,0xca] + vpsubusb %ymm18, %ymm21, %ymm17 {%k5} {z} + +// CHECK: vpsubusb (%rcx), %ymm21, %ymm17 +// CHECK: encoding: [0x62,0xe1,0x55,0x20,0xd8,0x09] + vpsubusb (%rcx), %ymm21, %ymm17 + +// CHECK: vpsubusb 291(%rax,%r14,8), %ymm21, %ymm17 +// CHECK: encoding: [0x62,0xa1,0x55,0x20,0xd8,0x8c,0xf0,0x23,0x01,0x00,0x00] + vpsubusb 291(%rax,%r14,8), %ymm21, %ymm17 + +// CHECK: vpsubusb 4064(%rdx), %ymm21, %ymm17 +// CHECK: encoding: [0x62,0xe1,0x55,0x20,0xd8,0x4a,0x7f] + vpsubusb 4064(%rdx), %ymm21, %ymm17 + +// CHECK: vpsubusb 4096(%rdx), %ymm21, %ymm17 +// CHECK: encoding: [0x62,0xe1,0x55,0x20,0xd8,0x8a,0x00,0x10,0x00,0x00] + vpsubusb 4096(%rdx), %ymm21, %ymm17 + +// CHECK: vpsubusb -4096(%rdx), %ymm21, %ymm17 +// CHECK: encoding: [0x62,0xe1,0x55,0x20,0xd8,0x4a,0x80] + vpsubusb -4096(%rdx), %ymm21, %ymm17 + +// CHECK: vpsubusb -4128(%rdx), %ymm21, %ymm17 +// CHECK: encoding: [0x62,0xe1,0x55,0x20,0xd8,0x8a,0xe0,0xef,0xff,0xff] + vpsubusb -4128(%rdx), %ymm21, %ymm17 + +// CHECK: vpsubusw %xmm19, %xmm21, %xmm25 +// CHECK: encoding: [0x62,0x21,0x55,0x00,0xd9,0xcb] + vpsubusw %xmm19, %xmm21, %xmm25 + +// CHECK: vpsubusw %xmm19, %xmm21, %xmm25 {%k6} +// CHECK: encoding: [0x62,0x21,0x55,0x06,0xd9,0xcb] + vpsubusw %xmm19, %xmm21, %xmm25 {%k6} + +// CHECK: vpsubusw %xmm19, %xmm21, %xmm25 {%k6} {z} +// CHECK: encoding: [0x62,0x21,0x55,0x86,0xd9,0xcb] + vpsubusw %xmm19, %xmm21, %xmm25 {%k6} {z} + +// CHECK: vpsubusw (%rcx), %xmm21, %xmm25 +// CHECK: encoding: [0x62,0x61,0x55,0x00,0xd9,0x09] + vpsubusw (%rcx), %xmm21, %xmm25 + +// CHECK: vpsubusw 291(%rax,%r14,8), %xmm21, %xmm25 +// CHECK: encoding: [0x62,0x21,0x55,0x00,0xd9,0x8c,0xf0,0x23,0x01,0x00,0x00] + vpsubusw 291(%rax,%r14,8), %xmm21, %xmm25 + +// CHECK: vpsubusw 2032(%rdx), %xmm21, %xmm25 +// CHECK: encoding: [0x62,0x61,0x55,0x00,0xd9,0x4a,0x7f] + vpsubusw 2032(%rdx), %xmm21, %xmm25 + +// CHECK: vpsubusw 2048(%rdx), %xmm21, %xmm25 +// CHECK: encoding: [0x62,0x61,0x55,0x00,0xd9,0x8a,0x00,0x08,0x00,0x00] + vpsubusw 2048(%rdx), %xmm21, %xmm25 + +// CHECK: vpsubusw -2048(%rdx), %xmm21, %xmm25 +// CHECK: encoding: [0x62,0x61,0x55,0x00,0xd9,0x4a,0x80] + vpsubusw -2048(%rdx), %xmm21, %xmm25 + +// CHECK: vpsubusw -2064(%rdx), %xmm21, %xmm25 +// CHECK: encoding: [0x62,0x61,0x55,0x00,0xd9,0x8a,0xf0,0xf7,0xff,0xff] + vpsubusw -2064(%rdx), %xmm21, %xmm25 + +// CHECK: vpsubusw %ymm17, %ymm17, %ymm26 +// CHECK: encoding: [0x62,0x21,0x75,0x20,0xd9,0xd1] + vpsubusw %ymm17, %ymm17, %ymm26 + +// CHECK: vpsubusw %ymm17, %ymm17, %ymm26 {%k4} +// CHECK: encoding: [0x62,0x21,0x75,0x24,0xd9,0xd1] + vpsubusw %ymm17, %ymm17, %ymm26 {%k4} + +// CHECK: vpsubusw %ymm17, %ymm17, %ymm26 {%k4} {z} +// CHECK: encoding: [0x62,0x21,0x75,0xa4,0xd9,0xd1] + vpsubusw %ymm17, %ymm17, %ymm26 {%k4} {z} + +// CHECK: vpsubusw (%rcx), %ymm17, %ymm26 +// CHECK: encoding: [0x62,0x61,0x75,0x20,0xd9,0x11] + vpsubusw (%rcx), %ymm17, %ymm26 + +// CHECK: vpsubusw 291(%rax,%r14,8), %ymm17, %ymm26 +// CHECK: encoding: [0x62,0x21,0x75,0x20,0xd9,0x94,0xf0,0x23,0x01,0x00,0x00] + vpsubusw 291(%rax,%r14,8), %ymm17, %ymm26 + +// CHECK: vpsubusw 4064(%rdx), %ymm17, %ymm26 +// CHECK: encoding: [0x62,0x61,0x75,0x20,0xd9,0x52,0x7f] + vpsubusw 4064(%rdx), %ymm17, %ymm26 + +// CHECK: vpsubusw 4096(%rdx), %ymm17, %ymm26 +// CHECK: encoding: [0x62,0x61,0x75,0x20,0xd9,0x92,0x00,0x10,0x00,0x00] + vpsubusw 4096(%rdx), %ymm17, %ymm26 + +// CHECK: vpsubusw -4096(%rdx), %ymm17, %ymm26 +// CHECK: encoding: [0x62,0x61,0x75,0x20,0xd9,0x52,0x80] + vpsubusw -4096(%rdx), %ymm17, %ymm26 + +// CHECK: vpsubusw -4128(%rdx), %ymm17, %ymm26 +// CHECK: encoding: [0x62,0x61,0x75,0x20,0xd9,0x92,0xe0,0xef,0xff,0xff] + vpsubusw -4128(%rdx), %ymm17, %ymm26 + +// CHECK: vpaddsb %xmm23, %xmm24, %xmm20 +// CHECK: encoding: [0x62,0xa1,0x3d,0x00,0xec,0xe7] + vpaddsb %xmm23, %xmm24, %xmm20 + +// CHECK: vpaddsb %xmm23, %xmm24, %xmm20 {%k1} +// CHECK: encoding: [0x62,0xa1,0x3d,0x01,0xec,0xe7] + vpaddsb %xmm23, %xmm24, %xmm20 {%k1} + +// CHECK: vpaddsb %xmm23, %xmm24, %xmm20 {%k1} {z} +// CHECK: encoding: [0x62,0xa1,0x3d,0x81,0xec,0xe7] + vpaddsb %xmm23, %xmm24, %xmm20 {%k1} {z} + +// CHECK: vpaddsb (%rcx), %xmm24, %xmm20 +// CHECK: encoding: [0x62,0xe1,0x3d,0x00,0xec,0x21] + vpaddsb (%rcx), %xmm24, %xmm20 + +// CHECK: vpaddsb 4660(%rax,%r14,8), %xmm24, %xmm20 +// CHECK: encoding: [0x62,0xa1,0x3d,0x00,0xec,0xa4,0xf0,0x34,0x12,0x00,0x00] + vpaddsb 4660(%rax,%r14,8), %xmm24, %xmm20 + +// CHECK: vpaddsb 2032(%rdx), %xmm24, %xmm20 +// CHECK: encoding: [0x62,0xe1,0x3d,0x00,0xec,0x62,0x7f] + vpaddsb 2032(%rdx), %xmm24, %xmm20 + +// CHECK: vpaddsb 2048(%rdx), %xmm24, %xmm20 +// CHECK: encoding: [0x62,0xe1,0x3d,0x00,0xec,0xa2,0x00,0x08,0x00,0x00] + vpaddsb 2048(%rdx), %xmm24, %xmm20 + +// CHECK: vpaddsb -2048(%rdx), %xmm24, %xmm20 +// CHECK: encoding: [0x62,0xe1,0x3d,0x00,0xec,0x62,0x80] + vpaddsb -2048(%rdx), %xmm24, %xmm20 + +// CHECK: vpaddsb -2064(%rdx), %xmm24, %xmm20 +// CHECK: encoding: [0x62,0xe1,0x3d,0x00,0xec,0xa2,0xf0,0xf7,0xff,0xff] + vpaddsb -2064(%rdx), %xmm24, %xmm20 + +// CHECK: vpaddsb %ymm19, %ymm20, %ymm18 +// CHECK: encoding: [0x62,0xa1,0x5d,0x20,0xec,0xd3] + vpaddsb %ymm19, %ymm20, %ymm18 + +// CHECK: vpaddsb %ymm19, %ymm20, %ymm18 {%k7} +// CHECK: encoding: [0x62,0xa1,0x5d,0x27,0xec,0xd3] + vpaddsb %ymm19, %ymm20, %ymm18 {%k7} + +// CHECK: vpaddsb %ymm19, %ymm20, %ymm18 {%k7} {z} +// CHECK: encoding: [0x62,0xa1,0x5d,0xa7,0xec,0xd3] + vpaddsb %ymm19, %ymm20, %ymm18 {%k7} {z} + +// CHECK: vpaddsb (%rcx), %ymm20, %ymm18 +// CHECK: encoding: [0x62,0xe1,0x5d,0x20,0xec,0x11] + vpaddsb (%rcx), %ymm20, %ymm18 + +// CHECK: vpaddsb 4660(%rax,%r14,8), %ymm20, %ymm18 +// CHECK: encoding: [0x62,0xa1,0x5d,0x20,0xec,0x94,0xf0,0x34,0x12,0x00,0x00] + vpaddsb 4660(%rax,%r14,8), %ymm20, %ymm18 + +// CHECK: vpaddsb 4064(%rdx), %ymm20, %ymm18 +// CHECK: encoding: [0x62,0xe1,0x5d,0x20,0xec,0x52,0x7f] + vpaddsb 4064(%rdx), %ymm20, %ymm18 + +// CHECK: vpaddsb 4096(%rdx), %ymm20, %ymm18 +// CHECK: encoding: [0x62,0xe1,0x5d,0x20,0xec,0x92,0x00,0x10,0x00,0x00] + vpaddsb 4096(%rdx), %ymm20, %ymm18 + +// CHECK: vpaddsb -4096(%rdx), %ymm20, %ymm18 +// CHECK: encoding: [0x62,0xe1,0x5d,0x20,0xec,0x52,0x80] + vpaddsb -4096(%rdx), %ymm20, %ymm18 + +// CHECK: vpaddsb -4128(%rdx), %ymm20, %ymm18 +// CHECK: encoding: [0x62,0xe1,0x5d,0x20,0xec,0x92,0xe0,0xef,0xff,0xff] + vpaddsb -4128(%rdx), %ymm20, %ymm18 + +// CHECK: vpaddsw %xmm24, %xmm23, %xmm23 +// CHECK: encoding: [0x62,0x81,0x45,0x00,0xed,0xf8] + vpaddsw %xmm24, %xmm23, %xmm23 + +// CHECK: vpaddsw %xmm24, %xmm23, %xmm23 {%k5} +// CHECK: encoding: [0x62,0x81,0x45,0x05,0xed,0xf8] + vpaddsw %xmm24, %xmm23, %xmm23 {%k5} + +// CHECK: vpaddsw %xmm24, %xmm23, %xmm23 {%k5} {z} +// CHECK: encoding: [0x62,0x81,0x45,0x85,0xed,0xf8] + vpaddsw %xmm24, %xmm23, %xmm23 {%k5} {z} + +// CHECK: vpaddsw (%rcx), %xmm23, %xmm23 +// CHECK: encoding: [0x62,0xe1,0x45,0x00,0xed,0x39] + vpaddsw (%rcx), %xmm23, %xmm23 + +// CHECK: vpaddsw 4660(%rax,%r14,8), %xmm23, %xmm23 +// CHECK: encoding: [0x62,0xa1,0x45,0x00,0xed,0xbc,0xf0,0x34,0x12,0x00,0x00] + vpaddsw 4660(%rax,%r14,8), %xmm23, %xmm23 + +// CHECK: vpaddsw 2032(%rdx), %xmm23, %xmm23 +// CHECK: encoding: [0x62,0xe1,0x45,0x00,0xed,0x7a,0x7f] + vpaddsw 2032(%rdx), %xmm23, %xmm23 + +// CHECK: vpaddsw 2048(%rdx), %xmm23, %xmm23 +// CHECK: encoding: [0x62,0xe1,0x45,0x00,0xed,0xba,0x00,0x08,0x00,0x00] + vpaddsw 2048(%rdx), %xmm23, %xmm23 + +// CHECK: vpaddsw -2048(%rdx), %xmm23, %xmm23 +// CHECK: encoding: [0x62,0xe1,0x45,0x00,0xed,0x7a,0x80] + vpaddsw -2048(%rdx), %xmm23, %xmm23 + +// CHECK: vpaddsw -2064(%rdx), %xmm23, %xmm23 +// CHECK: encoding: [0x62,0xe1,0x45,0x00,0xed,0xba,0xf0,0xf7,0xff,0xff] + vpaddsw -2064(%rdx), %xmm23, %xmm23 + +// CHECK: vpaddsw %ymm19, %ymm22, %ymm29 +// CHECK: encoding: [0x62,0x21,0x4d,0x20,0xed,0xeb] + vpaddsw %ymm19, %ymm22, %ymm29 + +// CHECK: vpaddsw %ymm19, %ymm22, %ymm29 {%k6} +// CHECK: encoding: [0x62,0x21,0x4d,0x26,0xed,0xeb] + vpaddsw %ymm19, %ymm22, %ymm29 {%k6} + +// CHECK: vpaddsw %ymm19, %ymm22, %ymm29 {%k6} {z} +// CHECK: encoding: [0x62,0x21,0x4d,0xa6,0xed,0xeb] + vpaddsw %ymm19, %ymm22, %ymm29 {%k6} {z} + +// CHECK: vpaddsw (%rcx), %ymm22, %ymm29 +// CHECK: encoding: [0x62,0x61,0x4d,0x20,0xed,0x29] + vpaddsw (%rcx), %ymm22, %ymm29 + +// CHECK: vpaddsw 4660(%rax,%r14,8), %ymm22, %ymm29 +// CHECK: encoding: [0x62,0x21,0x4d,0x20,0xed,0xac,0xf0,0x34,0x12,0x00,0x00] + vpaddsw 4660(%rax,%r14,8), %ymm22, %ymm29 + +// CHECK: vpaddsw 4064(%rdx), %ymm22, %ymm29 +// CHECK: encoding: [0x62,0x61,0x4d,0x20,0xed,0x6a,0x7f] + vpaddsw 4064(%rdx), %ymm22, %ymm29 + +// CHECK: vpaddsw 4096(%rdx), %ymm22, %ymm29 +// CHECK: encoding: [0x62,0x61,0x4d,0x20,0xed,0xaa,0x00,0x10,0x00,0x00] + vpaddsw 4096(%rdx), %ymm22, %ymm29 + +// CHECK: vpaddsw -4096(%rdx), %ymm22, %ymm29 +// CHECK: encoding: [0x62,0x61,0x4d,0x20,0xed,0x6a,0x80] + vpaddsw -4096(%rdx), %ymm22, %ymm29 + +// CHECK: vpaddsw -4128(%rdx), %ymm22, %ymm29 +// CHECK: encoding: [0x62,0x61,0x4d,0x20,0xed,0xaa,0xe0,0xef,0xff,0xff] + vpaddsw -4128(%rdx), %ymm22, %ymm29 + +// CHECK: vpaddusb %xmm26, %xmm18, %xmm20 +// CHECK: encoding: [0x62,0x81,0x6d,0x00,0xdc,0xe2] + vpaddusb %xmm26, %xmm18, %xmm20 + +// CHECK: vpaddusb %xmm26, %xmm18, %xmm20 {%k6} +// CHECK: encoding: [0x62,0x81,0x6d,0x06,0xdc,0xe2] + vpaddusb %xmm26, %xmm18, %xmm20 {%k6} + +// CHECK: vpaddusb %xmm26, %xmm18, %xmm20 {%k6} {z} +// CHECK: encoding: [0x62,0x81,0x6d,0x86,0xdc,0xe2] + vpaddusb %xmm26, %xmm18, %xmm20 {%k6} {z} + +// CHECK: vpaddusb (%rcx), %xmm18, %xmm20 +// CHECK: encoding: [0x62,0xe1,0x6d,0x00,0xdc,0x21] + vpaddusb (%rcx), %xmm18, %xmm20 + +// CHECK: vpaddusb 4660(%rax,%r14,8), %xmm18, %xmm20 +// CHECK: encoding: [0x62,0xa1,0x6d,0x00,0xdc,0xa4,0xf0,0x34,0x12,0x00,0x00] + vpaddusb 4660(%rax,%r14,8), %xmm18, %xmm20 + +// CHECK: vpaddusb 2032(%rdx), %xmm18, %xmm20 +// CHECK: encoding: [0x62,0xe1,0x6d,0x00,0xdc,0x62,0x7f] + vpaddusb 2032(%rdx), %xmm18, %xmm20 + +// CHECK: vpaddusb 2048(%rdx), %xmm18, %xmm20 +// CHECK: encoding: [0x62,0xe1,0x6d,0x00,0xdc,0xa2,0x00,0x08,0x00,0x00] + vpaddusb 2048(%rdx), %xmm18, %xmm20 + +// CHECK: vpaddusb -2048(%rdx), %xmm18, %xmm20 +// CHECK: encoding: [0x62,0xe1,0x6d,0x00,0xdc,0x62,0x80] + vpaddusb -2048(%rdx), %xmm18, %xmm20 + +// CHECK: vpaddusb -2064(%rdx), %xmm18, %xmm20 +// CHECK: encoding: [0x62,0xe1,0x6d,0x00,0xdc,0xa2,0xf0,0xf7,0xff,0xff] + vpaddusb -2064(%rdx), %xmm18, %xmm20 + +// CHECK: vpaddusb %ymm24, %ymm26, %ymm25 +// CHECK: encoding: [0x62,0x01,0x2d,0x20,0xdc,0xc8] + vpaddusb %ymm24, %ymm26, %ymm25 + +// CHECK: vpaddusb %ymm24, %ymm26, %ymm25 {%k3} +// CHECK: encoding: [0x62,0x01,0x2d,0x23,0xdc,0xc8] + vpaddusb %ymm24, %ymm26, %ymm25 {%k3} + +// CHECK: vpaddusb %ymm24, %ymm26, %ymm25 {%k3} {z} +// CHECK: encoding: [0x62,0x01,0x2d,0xa3,0xdc,0xc8] + vpaddusb %ymm24, %ymm26, %ymm25 {%k3} {z} + +// CHECK: vpaddusb (%rcx), %ymm26, %ymm25 +// CHECK: encoding: [0x62,0x61,0x2d,0x20,0xdc,0x09] + vpaddusb (%rcx), %ymm26, %ymm25 + +// CHECK: vpaddusb 4660(%rax,%r14,8), %ymm26, %ymm25 +// CHECK: encoding: [0x62,0x21,0x2d,0x20,0xdc,0x8c,0xf0,0x34,0x12,0x00,0x00] + vpaddusb 4660(%rax,%r14,8), %ymm26, %ymm25 + +// CHECK: vpaddusb 4064(%rdx), %ymm26, %ymm25 +// CHECK: encoding: [0x62,0x61,0x2d,0x20,0xdc,0x4a,0x7f] + vpaddusb 4064(%rdx), %ymm26, %ymm25 + +// CHECK: vpaddusb 4096(%rdx), %ymm26, %ymm25 +// CHECK: encoding: [0x62,0x61,0x2d,0x20,0xdc,0x8a,0x00,0x10,0x00,0x00] + vpaddusb 4096(%rdx), %ymm26, %ymm25 + +// CHECK: vpaddusb -4096(%rdx), %ymm26, %ymm25 +// CHECK: encoding: [0x62,0x61,0x2d,0x20,0xdc,0x4a,0x80] + vpaddusb -4096(%rdx), %ymm26, %ymm25 + +// CHECK: vpaddusb -4128(%rdx), %ymm26, %ymm25 +// CHECK: encoding: [0x62,0x61,0x2d,0x20,0xdc,0x8a,0xe0,0xef,0xff,0xff] + vpaddusb -4128(%rdx), %ymm26, %ymm25 + +// CHECK: vpaddusw %xmm24, %xmm29, %xmm28 +// CHECK: encoding: [0x62,0x01,0x15,0x00,0xdd,0xe0] + vpaddusw %xmm24, %xmm29, %xmm28 + +// CHECK: vpaddusw %xmm24, %xmm29, %xmm28 {%k4} +// CHECK: encoding: [0x62,0x01,0x15,0x04,0xdd,0xe0] + vpaddusw %xmm24, %xmm29, %xmm28 {%k4} + +// CHECK: vpaddusw %xmm24, %xmm29, %xmm28 {%k4} {z} +// CHECK: encoding: [0x62,0x01,0x15,0x84,0xdd,0xe0] + vpaddusw %xmm24, %xmm29, %xmm28 {%k4} {z} + +// CHECK: vpaddusw (%rcx), %xmm29, %xmm28 +// CHECK: encoding: [0x62,0x61,0x15,0x00,0xdd,0x21] + vpaddusw (%rcx), %xmm29, %xmm28 + +// CHECK: vpaddusw 4660(%rax,%r14,8), %xmm29, %xmm28 +// CHECK: encoding: [0x62,0x21,0x15,0x00,0xdd,0xa4,0xf0,0x34,0x12,0x00,0x00] + vpaddusw 4660(%rax,%r14,8), %xmm29, %xmm28 + +// CHECK: vpaddusw 2032(%rdx), %xmm29, %xmm28 +// CHECK: encoding: [0x62,0x61,0x15,0x00,0xdd,0x62,0x7f] + vpaddusw 2032(%rdx), %xmm29, %xmm28 + +// CHECK: vpaddusw 2048(%rdx), %xmm29, %xmm28 +// CHECK: encoding: [0x62,0x61,0x15,0x00,0xdd,0xa2,0x00,0x08,0x00,0x00] + vpaddusw 2048(%rdx), %xmm29, %xmm28 + +// CHECK: vpaddusw -2048(%rdx), %xmm29, %xmm28 +// CHECK: encoding: [0x62,0x61,0x15,0x00,0xdd,0x62,0x80] + vpaddusw -2048(%rdx), %xmm29, %xmm28 + +// CHECK: vpaddusw -2064(%rdx), %xmm29, %xmm28 +// CHECK: encoding: [0x62,0x61,0x15,0x00,0xdd,0xa2,0xf0,0xf7,0xff,0xff] + vpaddusw -2064(%rdx), %xmm29, %xmm28 + +// CHECK: vpaddusw %ymm22, %ymm26, %ymm27 +// CHECK: encoding: [0x62,0x21,0x2d,0x20,0xdd,0xde] + vpaddusw %ymm22, %ymm26, %ymm27 + +// CHECK: vpaddusw %ymm22, %ymm26, %ymm27 {%k3} +// CHECK: encoding: [0x62,0x21,0x2d,0x23,0xdd,0xde] + vpaddusw %ymm22, %ymm26, %ymm27 {%k3} + +// CHECK: vpaddusw %ymm22, %ymm26, %ymm27 {%k3} {z} +// CHECK: encoding: [0x62,0x21,0x2d,0xa3,0xdd,0xde] + vpaddusw %ymm22, %ymm26, %ymm27 {%k3} {z} + +// CHECK: vpaddusw (%rcx), %ymm26, %ymm27 +// CHECK: encoding: [0x62,0x61,0x2d,0x20,0xdd,0x19] + vpaddusw (%rcx), %ymm26, %ymm27 + +// CHECK: vpaddusw 4660(%rax,%r14,8), %ymm26, %ymm27 +// CHECK: encoding: [0x62,0x21,0x2d,0x20,0xdd,0x9c,0xf0,0x34,0x12,0x00,0x00] + vpaddusw 4660(%rax,%r14,8), %ymm26, %ymm27 + +// CHECK: vpaddusw 4064(%rdx), %ymm26, %ymm27 +// CHECK: encoding: [0x62,0x61,0x2d,0x20,0xdd,0x5a,0x7f] + vpaddusw 4064(%rdx), %ymm26, %ymm27 + +// CHECK: vpaddusw 4096(%rdx), %ymm26, %ymm27 +// CHECK: encoding: [0x62,0x61,0x2d,0x20,0xdd,0x9a,0x00,0x10,0x00,0x00] + vpaddusw 4096(%rdx), %ymm26, %ymm27 + +// CHECK: vpaddusw -4096(%rdx), %ymm26, %ymm27 +// CHECK: encoding: [0x62,0x61,0x2d,0x20,0xdd,0x5a,0x80] + vpaddusw -4096(%rdx), %ymm26, %ymm27 + +// CHECK: vpaddusw -4128(%rdx), %ymm26, %ymm27 +// CHECK: encoding: [0x62,0x61,0x2d,0x20,0xdd,0x9a,0xe0,0xef,0xff,0xff] + vpaddusw -4128(%rdx), %ymm26, %ymm27 + +// CHECK: vpsubsb %xmm18, %xmm22, %xmm23 +// CHECK: encoding: [0x62,0xa1,0x4d,0x00,0xe8,0xfa] + vpsubsb %xmm18, %xmm22, %xmm23 + +// CHECK: vpsubsb %xmm18, %xmm22, %xmm23 {%k5} +// CHECK: encoding: [0x62,0xa1,0x4d,0x05,0xe8,0xfa] + vpsubsb %xmm18, %xmm22, %xmm23 {%k5} + +// CHECK: vpsubsb %xmm18, %xmm22, %xmm23 {%k5} {z} +// CHECK: encoding: [0x62,0xa1,0x4d,0x85,0xe8,0xfa] + vpsubsb %xmm18, %xmm22, %xmm23 {%k5} {z} + +// CHECK: vpsubsb (%rcx), %xmm22, %xmm23 +// CHECK: encoding: [0x62,0xe1,0x4d,0x00,0xe8,0x39] + vpsubsb (%rcx), %xmm22, %xmm23 + +// CHECK: vpsubsb 4660(%rax,%r14,8), %xmm22, %xmm23 +// CHECK: encoding: [0x62,0xa1,0x4d,0x00,0xe8,0xbc,0xf0,0x34,0x12,0x00,0x00] + vpsubsb 4660(%rax,%r14,8), %xmm22, %xmm23 + +// CHECK: vpsubsb 2032(%rdx), %xmm22, %xmm23 +// CHECK: encoding: [0x62,0xe1,0x4d,0x00,0xe8,0x7a,0x7f] + vpsubsb 2032(%rdx), %xmm22, %xmm23 + +// CHECK: vpsubsb 2048(%rdx), %xmm22, %xmm23 +// CHECK: encoding: [0x62,0xe1,0x4d,0x00,0xe8,0xba,0x00,0x08,0x00,0x00] + vpsubsb 2048(%rdx), %xmm22, %xmm23 + +// CHECK: vpsubsb -2048(%rdx), %xmm22, %xmm23 +// CHECK: encoding: [0x62,0xe1,0x4d,0x00,0xe8,0x7a,0x80] + vpsubsb -2048(%rdx), %xmm22, %xmm23 + +// CHECK: vpsubsb -2064(%rdx), %xmm22, %xmm23 +// CHECK: encoding: [0x62,0xe1,0x4d,0x00,0xe8,0xba,0xf0,0xf7,0xff,0xff] + vpsubsb -2064(%rdx), %xmm22, %xmm23 + +// CHECK: vpsubsb %ymm24, %ymm21, %ymm28 +// CHECK: encoding: [0x62,0x01,0x55,0x20,0xe8,0xe0] + vpsubsb %ymm24, %ymm21, %ymm28 + +// CHECK: vpsubsb %ymm24, %ymm21, %ymm28 {%k5} +// CHECK: encoding: [0x62,0x01,0x55,0x25,0xe8,0xe0] + vpsubsb %ymm24, %ymm21, %ymm28 {%k5} + +// CHECK: vpsubsb %ymm24, %ymm21, %ymm28 {%k5} {z} +// CHECK: encoding: [0x62,0x01,0x55,0xa5,0xe8,0xe0] + vpsubsb %ymm24, %ymm21, %ymm28 {%k5} {z} + +// CHECK: vpsubsb (%rcx), %ymm21, %ymm28 +// CHECK: encoding: [0x62,0x61,0x55,0x20,0xe8,0x21] + vpsubsb (%rcx), %ymm21, %ymm28 + +// CHECK: vpsubsb 4660(%rax,%r14,8), %ymm21, %ymm28 +// CHECK: encoding: [0x62,0x21,0x55,0x20,0xe8,0xa4,0xf0,0x34,0x12,0x00,0x00] + vpsubsb 4660(%rax,%r14,8), %ymm21, %ymm28 + +// CHECK: vpsubsb 4064(%rdx), %ymm21, %ymm28 +// CHECK: encoding: [0x62,0x61,0x55,0x20,0xe8,0x62,0x7f] + vpsubsb 4064(%rdx), %ymm21, %ymm28 + +// CHECK: vpsubsb 4096(%rdx), %ymm21, %ymm28 +// CHECK: encoding: [0x62,0x61,0x55,0x20,0xe8,0xa2,0x00,0x10,0x00,0x00] + vpsubsb 4096(%rdx), %ymm21, %ymm28 + +// CHECK: vpsubsb -4096(%rdx), %ymm21, %ymm28 +// CHECK: encoding: [0x62,0x61,0x55,0x20,0xe8,0x62,0x80] + vpsubsb -4096(%rdx), %ymm21, %ymm28 + +// CHECK: vpsubsb -4128(%rdx), %ymm21, %ymm28 +// CHECK: encoding: [0x62,0x61,0x55,0x20,0xe8,0xa2,0xe0,0xef,0xff,0xff] + vpsubsb -4128(%rdx), %ymm21, %ymm28 + +// CHECK: vpsubsw %xmm18, %xmm27, %xmm19 +// CHECK: encoding: [0x62,0xa1,0x25,0x00,0xe9,0xda] + vpsubsw %xmm18, %xmm27, %xmm19 + +// CHECK: vpsubsw %xmm18, %xmm27, %xmm19 {%k4} +// CHECK: encoding: [0x62,0xa1,0x25,0x04,0xe9,0xda] + vpsubsw %xmm18, %xmm27, %xmm19 {%k4} + +// CHECK: vpsubsw %xmm18, %xmm27, %xmm19 {%k4} {z} +// CHECK: encoding: [0x62,0xa1,0x25,0x84,0xe9,0xda] + vpsubsw %xmm18, %xmm27, %xmm19 {%k4} {z} + +// CHECK: vpsubsw (%rcx), %xmm27, %xmm19 +// CHECK: encoding: [0x62,0xe1,0x25,0x00,0xe9,0x19] + vpsubsw (%rcx), %xmm27, %xmm19 + +// CHECK: vpsubsw 4660(%rax,%r14,8), %xmm27, %xmm19 +// CHECK: encoding: [0x62,0xa1,0x25,0x00,0xe9,0x9c,0xf0,0x34,0x12,0x00,0x00] + vpsubsw 4660(%rax,%r14,8), %xmm27, %xmm19 + +// CHECK: vpsubsw 2032(%rdx), %xmm27, %xmm19 +// CHECK: encoding: [0x62,0xe1,0x25,0x00,0xe9,0x5a,0x7f] + vpsubsw 2032(%rdx), %xmm27, %xmm19 + +// CHECK: vpsubsw 2048(%rdx), %xmm27, %xmm19 +// CHECK: encoding: [0x62,0xe1,0x25,0x00,0xe9,0x9a,0x00,0x08,0x00,0x00] + vpsubsw 2048(%rdx), %xmm27, %xmm19 + +// CHECK: vpsubsw -2048(%rdx), %xmm27, %xmm19 +// CHECK: encoding: [0x62,0xe1,0x25,0x00,0xe9,0x5a,0x80] + vpsubsw -2048(%rdx), %xmm27, %xmm19 + +// CHECK: vpsubsw -2064(%rdx), %xmm27, %xmm19 +// CHECK: encoding: [0x62,0xe1,0x25,0x00,0xe9,0x9a,0xf0,0xf7,0xff,0xff] + vpsubsw -2064(%rdx), %xmm27, %xmm19 + +// CHECK: vpsubsw %ymm25, %ymm29, %ymm18 +// CHECK: encoding: [0x62,0x81,0x15,0x20,0xe9,0xd1] + vpsubsw %ymm25, %ymm29, %ymm18 + +// CHECK: vpsubsw %ymm25, %ymm29, %ymm18 {%k5} +// CHECK: encoding: [0x62,0x81,0x15,0x25,0xe9,0xd1] + vpsubsw %ymm25, %ymm29, %ymm18 {%k5} + +// CHECK: vpsubsw %ymm25, %ymm29, %ymm18 {%k5} {z} +// CHECK: encoding: [0x62,0x81,0x15,0xa5,0xe9,0xd1] + vpsubsw %ymm25, %ymm29, %ymm18 {%k5} {z} + +// CHECK: vpsubsw (%rcx), %ymm29, %ymm18 +// CHECK: encoding: [0x62,0xe1,0x15,0x20,0xe9,0x11] + vpsubsw (%rcx), %ymm29, %ymm18 + +// CHECK: vpsubsw 4660(%rax,%r14,8), %ymm29, %ymm18 +// CHECK: encoding: [0x62,0xa1,0x15,0x20,0xe9,0x94,0xf0,0x34,0x12,0x00,0x00] + vpsubsw 4660(%rax,%r14,8), %ymm29, %ymm18 + +// CHECK: vpsubsw 4064(%rdx), %ymm29, %ymm18 +// CHECK: encoding: [0x62,0xe1,0x15,0x20,0xe9,0x52,0x7f] + vpsubsw 4064(%rdx), %ymm29, %ymm18 + +// CHECK: vpsubsw 4096(%rdx), %ymm29, %ymm18 +// CHECK: encoding: [0x62,0xe1,0x15,0x20,0xe9,0x92,0x00,0x10,0x00,0x00] + vpsubsw 4096(%rdx), %ymm29, %ymm18 + +// CHECK: vpsubsw -4096(%rdx), %ymm29, %ymm18 +// CHECK: encoding: [0x62,0xe1,0x15,0x20,0xe9,0x52,0x80] + vpsubsw -4096(%rdx), %ymm29, %ymm18 + +// CHECK: vpsubsw -4128(%rdx), %ymm29, %ymm18 +// CHECK: encoding: [0x62,0xe1,0x15,0x20,0xe9,0x92,0xe0,0xef,0xff,0xff] + vpsubsw -4128(%rdx), %ymm29, %ymm18 + +// CHECK: vpsubusb %xmm23, %xmm26, %xmm20 +// CHECK: encoding: [0x62,0xa1,0x2d,0x00,0xd8,0xe7] + vpsubusb %xmm23, %xmm26, %xmm20 + +// CHECK: vpsubusb %xmm23, %xmm26, %xmm20 {%k5} +// CHECK: encoding: [0x62,0xa1,0x2d,0x05,0xd8,0xe7] + vpsubusb %xmm23, %xmm26, %xmm20 {%k5} + +// CHECK: vpsubusb %xmm23, %xmm26, %xmm20 {%k5} {z} +// CHECK: encoding: [0x62,0xa1,0x2d,0x85,0xd8,0xe7] + vpsubusb %xmm23, %xmm26, %xmm20 {%k5} {z} + +// CHECK: vpsubusb (%rcx), %xmm26, %xmm20 +// CHECK: encoding: [0x62,0xe1,0x2d,0x00,0xd8,0x21] + vpsubusb (%rcx), %xmm26, %xmm20 + +// CHECK: vpsubusb 4660(%rax,%r14,8), %xmm26, %xmm20 +// CHECK: encoding: [0x62,0xa1,0x2d,0x00,0xd8,0xa4,0xf0,0x34,0x12,0x00,0x00] + vpsubusb 4660(%rax,%r14,8), %xmm26, %xmm20 + +// CHECK: vpsubusb 2032(%rdx), %xmm26, %xmm20 +// CHECK: encoding: [0x62,0xe1,0x2d,0x00,0xd8,0x62,0x7f] + vpsubusb 2032(%rdx), %xmm26, %xmm20 + +// CHECK: vpsubusb 2048(%rdx), %xmm26, %xmm20 +// CHECK: encoding: [0x62,0xe1,0x2d,0x00,0xd8,0xa2,0x00,0x08,0x00,0x00] + vpsubusb 2048(%rdx), %xmm26, %xmm20 + +// CHECK: vpsubusb -2048(%rdx), %xmm26, %xmm20 +// CHECK: encoding: [0x62,0xe1,0x2d,0x00,0xd8,0x62,0x80] + vpsubusb -2048(%rdx), %xmm26, %xmm20 + +// CHECK: vpsubusb -2064(%rdx), %xmm26, %xmm20 +// CHECK: encoding: [0x62,0xe1,0x2d,0x00,0xd8,0xa2,0xf0,0xf7,0xff,0xff] + vpsubusb -2064(%rdx), %xmm26, %xmm20 + +// CHECK: vpsubusb %ymm19, %ymm28, %ymm30 +// CHECK: encoding: [0x62,0x21,0x1d,0x20,0xd8,0xf3] + vpsubusb %ymm19, %ymm28, %ymm30 + +// CHECK: vpsubusb %ymm19, %ymm28, %ymm30 {%k3} +// CHECK: encoding: [0x62,0x21,0x1d,0x23,0xd8,0xf3] + vpsubusb %ymm19, %ymm28, %ymm30 {%k3} + +// CHECK: vpsubusb %ymm19, %ymm28, %ymm30 {%k3} {z} +// CHECK: encoding: [0x62,0x21,0x1d,0xa3,0xd8,0xf3] + vpsubusb %ymm19, %ymm28, %ymm30 {%k3} {z} + +// CHECK: vpsubusb (%rcx), %ymm28, %ymm30 +// CHECK: encoding: [0x62,0x61,0x1d,0x20,0xd8,0x31] + vpsubusb (%rcx), %ymm28, %ymm30 + +// CHECK: vpsubusb 4660(%rax,%r14,8), %ymm28, %ymm30 +// CHECK: encoding: [0x62,0x21,0x1d,0x20,0xd8,0xb4,0xf0,0x34,0x12,0x00,0x00] + vpsubusb 4660(%rax,%r14,8), %ymm28, %ymm30 + +// CHECK: vpsubusb 4064(%rdx), %ymm28, %ymm30 +// CHECK: encoding: [0x62,0x61,0x1d,0x20,0xd8,0x72,0x7f] + vpsubusb 4064(%rdx), %ymm28, %ymm30 + +// CHECK: vpsubusb 4096(%rdx), %ymm28, %ymm30 +// CHECK: encoding: [0x62,0x61,0x1d,0x20,0xd8,0xb2,0x00,0x10,0x00,0x00] + vpsubusb 4096(%rdx), %ymm28, %ymm30 + +// CHECK: vpsubusb -4096(%rdx), %ymm28, %ymm30 +// CHECK: encoding: [0x62,0x61,0x1d,0x20,0xd8,0x72,0x80] + vpsubusb -4096(%rdx), %ymm28, %ymm30 + +// CHECK: vpsubusb -4128(%rdx), %ymm28, %ymm30 +// CHECK: encoding: [0x62,0x61,0x1d,0x20,0xd8,0xb2,0xe0,0xef,0xff,0xff] + vpsubusb -4128(%rdx), %ymm28, %ymm30 + +// CHECK: vpsubusw %xmm21, %xmm23, %xmm26 +// CHECK: encoding: [0x62,0x21,0x45,0x00,0xd9,0xd5] + vpsubusw %xmm21, %xmm23, %xmm26 + +// CHECK: vpsubusw %xmm21, %xmm23, %xmm26 {%k6} +// CHECK: encoding: [0x62,0x21,0x45,0x06,0xd9,0xd5] + vpsubusw %xmm21, %xmm23, %xmm26 {%k6} + +// CHECK: vpsubusw %xmm21, %xmm23, %xmm26 {%k6} {z} +// CHECK: encoding: [0x62,0x21,0x45,0x86,0xd9,0xd5] + vpsubusw %xmm21, %xmm23, %xmm26 {%k6} {z} + +// CHECK: vpsubusw (%rcx), %xmm23, %xmm26 +// CHECK: encoding: [0x62,0x61,0x45,0x00,0xd9,0x11] + vpsubusw (%rcx), %xmm23, %xmm26 + +// CHECK: vpsubusw 4660(%rax,%r14,8), %xmm23, %xmm26 +// CHECK: encoding: [0x62,0x21,0x45,0x00,0xd9,0x94,0xf0,0x34,0x12,0x00,0x00] + vpsubusw 4660(%rax,%r14,8), %xmm23, %xmm26 + +// CHECK: vpsubusw 2032(%rdx), %xmm23, %xmm26 +// CHECK: encoding: [0x62,0x61,0x45,0x00,0xd9,0x52,0x7f] + vpsubusw 2032(%rdx), %xmm23, %xmm26 + +// CHECK: vpsubusw 2048(%rdx), %xmm23, %xmm26 +// CHECK: encoding: [0x62,0x61,0x45,0x00,0xd9,0x92,0x00,0x08,0x00,0x00] + vpsubusw 2048(%rdx), %xmm23, %xmm26 + +// CHECK: vpsubusw -2048(%rdx), %xmm23, %xmm26 +// CHECK: encoding: [0x62,0x61,0x45,0x00,0xd9,0x52,0x80] + vpsubusw -2048(%rdx), %xmm23, %xmm26 + +// CHECK: vpsubusw -2064(%rdx), %xmm23, %xmm26 +// CHECK: encoding: [0x62,0x61,0x45,0x00,0xd9,0x92,0xf0,0xf7,0xff,0xff] + vpsubusw -2064(%rdx), %xmm23, %xmm26 + +// CHECK: vpsubusw %ymm24, %ymm20, %ymm25 +// CHECK: encoding: [0x62,0x01,0x5d,0x20,0xd9,0xc8] + vpsubusw %ymm24, %ymm20, %ymm25 + +// CHECK: vpsubusw %ymm24, %ymm20, %ymm25 {%k1} +// CHECK: encoding: [0x62,0x01,0x5d,0x21,0xd9,0xc8] + vpsubusw %ymm24, %ymm20, %ymm25 {%k1} + +// CHECK: vpsubusw %ymm24, %ymm20, %ymm25 {%k1} {z} +// CHECK: encoding: [0x62,0x01,0x5d,0xa1,0xd9,0xc8] + vpsubusw %ymm24, %ymm20, %ymm25 {%k1} {z} + +// CHECK: vpsubusw (%rcx), %ymm20, %ymm25 +// CHECK: encoding: [0x62,0x61,0x5d,0x20,0xd9,0x09] + vpsubusw (%rcx), %ymm20, %ymm25 + +// CHECK: vpsubusw 4660(%rax,%r14,8), %ymm20, %ymm25 +// CHECK: encoding: [0x62,0x21,0x5d,0x20,0xd9,0x8c,0xf0,0x34,0x12,0x00,0x00] + vpsubusw 4660(%rax,%r14,8), %ymm20, %ymm25 + +// CHECK: vpsubusw 4064(%rdx), %ymm20, %ymm25 +// CHECK: encoding: [0x62,0x61,0x5d,0x20,0xd9,0x4a,0x7f] + vpsubusw 4064(%rdx), %ymm20, %ymm25 + +// CHECK: vpsubusw 4096(%rdx), %ymm20, %ymm25 +// CHECK: encoding: [0x62,0x61,0x5d,0x20,0xd9,0x8a,0x00,0x10,0x00,0x00] + vpsubusw 4096(%rdx), %ymm20, %ymm25 + +// CHECK: vpsubusw -4096(%rdx), %ymm20, %ymm25 +// CHECK: encoding: [0x62,0x61,0x5d,0x20,0xd9,0x4a,0x80] + vpsubusw -4096(%rdx), %ymm20, %ymm25 + +// CHECK: vpsubusw -4128(%rdx), %ymm20, %ymm25 +// CHECK: encoding: [0x62,0x61,0x5d,0x20,0xd9,0x8a,0xe0,0xef,0xff,0xff] + vpsubusw -4128(%rdx), %ymm20, %ymm25 + +// CHECK: vpaddsb %xmm25, %xmm19, %xmm30 +// CHECK: encoding: [0x62,0x01,0x65,0x00,0xec,0xf1] + vpaddsb %xmm25, %xmm19, %xmm30 + +// CHECK: vpaddsb %xmm25, %xmm19, %xmm30 {%k6} +// CHECK: encoding: [0x62,0x01,0x65,0x06,0xec,0xf1] + vpaddsb %xmm25, %xmm19, %xmm30 {%k6} + +// CHECK: vpaddsb %xmm25, %xmm19, %xmm30 {%k6} {z} +// CHECK: encoding: [0x62,0x01,0x65,0x86,0xec,0xf1] + vpaddsb %xmm25, %xmm19, %xmm30 {%k6} {z} + +// CHECK: vpaddsb (%rcx), %xmm19, %xmm30 +// CHECK: encoding: [0x62,0x61,0x65,0x00,0xec,0x31] + vpaddsb (%rcx), %xmm19, %xmm30 + +// CHECK: vpaddsb 291(%rax,%r14,8), %xmm19, %xmm30 +// CHECK: encoding: [0x62,0x21,0x65,0x00,0xec,0xb4,0xf0,0x23,0x01,0x00,0x00] + vpaddsb 291(%rax,%r14,8), %xmm19, %xmm30 + +// CHECK: vpaddsb 2032(%rdx), %xmm19, %xmm30 +// CHECK: encoding: [0x62,0x61,0x65,0x00,0xec,0x72,0x7f] + vpaddsb 2032(%rdx), %xmm19, %xmm30 + +// CHECK: vpaddsb 2048(%rdx), %xmm19, %xmm30 +// CHECK: encoding: [0x62,0x61,0x65,0x00,0xec,0xb2,0x00,0x08,0x00,0x00] + vpaddsb 2048(%rdx), %xmm19, %xmm30 + +// CHECK: vpaddsb -2048(%rdx), %xmm19, %xmm30 +// CHECK: encoding: [0x62,0x61,0x65,0x00,0xec,0x72,0x80] + vpaddsb -2048(%rdx), %xmm19, %xmm30 + +// CHECK: vpaddsb -2064(%rdx), %xmm19, %xmm30 +// CHECK: encoding: [0x62,0x61,0x65,0x00,0xec,0xb2,0xf0,0xf7,0xff,0xff] + vpaddsb -2064(%rdx), %xmm19, %xmm30 + +// CHECK: vpaddsb %ymm28, %ymm29, %ymm22 +// CHECK: encoding: [0x62,0x81,0x15,0x20,0xec,0xf4] + vpaddsb %ymm28, %ymm29, %ymm22 + +// CHECK: vpaddsb %ymm28, %ymm29, %ymm22 {%k4} +// CHECK: encoding: [0x62,0x81,0x15,0x24,0xec,0xf4] + vpaddsb %ymm28, %ymm29, %ymm22 {%k4} + +// CHECK: vpaddsb %ymm28, %ymm29, %ymm22 {%k4} {z} +// CHECK: encoding: [0x62,0x81,0x15,0xa4,0xec,0xf4] + vpaddsb %ymm28, %ymm29, %ymm22 {%k4} {z} + +// CHECK: vpaddsb (%rcx), %ymm29, %ymm22 +// CHECK: encoding: [0x62,0xe1,0x15,0x20,0xec,0x31] + vpaddsb (%rcx), %ymm29, %ymm22 + +// CHECK: vpaddsb 291(%rax,%r14,8), %ymm29, %ymm22 +// CHECK: encoding: [0x62,0xa1,0x15,0x20,0xec,0xb4,0xf0,0x23,0x01,0x00,0x00] + vpaddsb 291(%rax,%r14,8), %ymm29, %ymm22 + +// CHECK: vpaddsb 4064(%rdx), %ymm29, %ymm22 +// CHECK: encoding: [0x62,0xe1,0x15,0x20,0xec,0x72,0x7f] + vpaddsb 4064(%rdx), %ymm29, %ymm22 + +// CHECK: vpaddsb 4096(%rdx), %ymm29, %ymm22 +// CHECK: encoding: [0x62,0xe1,0x15,0x20,0xec,0xb2,0x00,0x10,0x00,0x00] + vpaddsb 4096(%rdx), %ymm29, %ymm22 + +// CHECK: vpaddsb -4096(%rdx), %ymm29, %ymm22 +// CHECK: encoding: [0x62,0xe1,0x15,0x20,0xec,0x72,0x80] + vpaddsb -4096(%rdx), %ymm29, %ymm22 + +// CHECK: vpaddsb -4128(%rdx), %ymm29, %ymm22 +// CHECK: encoding: [0x62,0xe1,0x15,0x20,0xec,0xb2,0xe0,0xef,0xff,0xff] + vpaddsb -4128(%rdx), %ymm29, %ymm22 + +// CHECK: vpaddsw %xmm19, %xmm19, %xmm20 +// CHECK: encoding: [0x62,0xa1,0x65,0x00,0xed,0xe3] + vpaddsw %xmm19, %xmm19, %xmm20 + +// CHECK: vpaddsw %xmm19, %xmm19, %xmm20 {%k7} +// CHECK: encoding: [0x62,0xa1,0x65,0x07,0xed,0xe3] + vpaddsw %xmm19, %xmm19, %xmm20 {%k7} + +// CHECK: vpaddsw %xmm19, %xmm19, %xmm20 {%k7} {z} +// CHECK: encoding: [0x62,0xa1,0x65,0x87,0xed,0xe3] + vpaddsw %xmm19, %xmm19, %xmm20 {%k7} {z} + +// CHECK: vpaddsw (%rcx), %xmm19, %xmm20 +// CHECK: encoding: [0x62,0xe1,0x65,0x00,0xed,0x21] + vpaddsw (%rcx), %xmm19, %xmm20 + +// CHECK: vpaddsw 291(%rax,%r14,8), %xmm19, %xmm20 +// CHECK: encoding: [0x62,0xa1,0x65,0x00,0xed,0xa4,0xf0,0x23,0x01,0x00,0x00] + vpaddsw 291(%rax,%r14,8), %xmm19, %xmm20 + +// CHECK: vpaddsw 2032(%rdx), %xmm19, %xmm20 +// CHECK: encoding: [0x62,0xe1,0x65,0x00,0xed,0x62,0x7f] + vpaddsw 2032(%rdx), %xmm19, %xmm20 + +// CHECK: vpaddsw 2048(%rdx), %xmm19, %xmm20 +// CHECK: encoding: [0x62,0xe1,0x65,0x00,0xed,0xa2,0x00,0x08,0x00,0x00] + vpaddsw 2048(%rdx), %xmm19, %xmm20 + +// CHECK: vpaddsw -2048(%rdx), %xmm19, %xmm20 +// CHECK: encoding: [0x62,0xe1,0x65,0x00,0xed,0x62,0x80] + vpaddsw -2048(%rdx), %xmm19, %xmm20 + +// CHECK: vpaddsw -2064(%rdx), %xmm19, %xmm20 +// CHECK: encoding: [0x62,0xe1,0x65,0x00,0xed,0xa2,0xf0,0xf7,0xff,0xff] + vpaddsw -2064(%rdx), %xmm19, %xmm20 + +// CHECK: vpaddsw %ymm19, %ymm27, %ymm20 +// CHECK: encoding: [0x62,0xa1,0x25,0x20,0xed,0xe3] + vpaddsw %ymm19, %ymm27, %ymm20 + +// CHECK: vpaddsw %ymm19, %ymm27, %ymm20 {%k3} +// CHECK: encoding: [0x62,0xa1,0x25,0x23,0xed,0xe3] + vpaddsw %ymm19, %ymm27, %ymm20 {%k3} + +// CHECK: vpaddsw %ymm19, %ymm27, %ymm20 {%k3} {z} +// CHECK: encoding: [0x62,0xa1,0x25,0xa3,0xed,0xe3] + vpaddsw %ymm19, %ymm27, %ymm20 {%k3} {z} + +// CHECK: vpaddsw (%rcx), %ymm27, %ymm20 +// CHECK: encoding: [0x62,0xe1,0x25,0x20,0xed,0x21] + vpaddsw (%rcx), %ymm27, %ymm20 + +// CHECK: vpaddsw 291(%rax,%r14,8), %ymm27, %ymm20 +// CHECK: encoding: [0x62,0xa1,0x25,0x20,0xed,0xa4,0xf0,0x23,0x01,0x00,0x00] + vpaddsw 291(%rax,%r14,8), %ymm27, %ymm20 + +// CHECK: vpaddsw 4064(%rdx), %ymm27, %ymm20 +// CHECK: encoding: [0x62,0xe1,0x25,0x20,0xed,0x62,0x7f] + vpaddsw 4064(%rdx), %ymm27, %ymm20 + +// CHECK: vpaddsw 4096(%rdx), %ymm27, %ymm20 +// CHECK: encoding: [0x62,0xe1,0x25,0x20,0xed,0xa2,0x00,0x10,0x00,0x00] + vpaddsw 4096(%rdx), %ymm27, %ymm20 + +// CHECK: vpaddsw -4096(%rdx), %ymm27, %ymm20 +// CHECK: encoding: [0x62,0xe1,0x25,0x20,0xed,0x62,0x80] + vpaddsw -4096(%rdx), %ymm27, %ymm20 + +// CHECK: vpaddsw -4128(%rdx), %ymm27, %ymm20 +// CHECK: encoding: [0x62,0xe1,0x25,0x20,0xed,0xa2,0xe0,0xef,0xff,0xff] + vpaddsw -4128(%rdx), %ymm27, %ymm20 + +// CHECK: vpaddusb %xmm19, %xmm21, %xmm25 +// CHECK: encoding: [0x62,0x21,0x55,0x00,0xdc,0xcb] + vpaddusb %xmm19, %xmm21, %xmm25 + +// CHECK: vpaddusb %xmm19, %xmm21, %xmm25 {%k7} +// CHECK: encoding: [0x62,0x21,0x55,0x07,0xdc,0xcb] + vpaddusb %xmm19, %xmm21, %xmm25 {%k7} + +// CHECK: vpaddusb %xmm19, %xmm21, %xmm25 {%k7} {z} +// CHECK: encoding: [0x62,0x21,0x55,0x87,0xdc,0xcb] + vpaddusb %xmm19, %xmm21, %xmm25 {%k7} {z} + +// CHECK: vpaddusb (%rcx), %xmm21, %xmm25 +// CHECK: encoding: [0x62,0x61,0x55,0x00,0xdc,0x09] + vpaddusb (%rcx), %xmm21, %xmm25 + +// CHECK: vpaddusb 291(%rax,%r14,8), %xmm21, %xmm25 +// CHECK: encoding: [0x62,0x21,0x55,0x00,0xdc,0x8c,0xf0,0x23,0x01,0x00,0x00] + vpaddusb 291(%rax,%r14,8), %xmm21, %xmm25 + +// CHECK: vpaddusb 2032(%rdx), %xmm21, %xmm25 +// CHECK: encoding: [0x62,0x61,0x55,0x00,0xdc,0x4a,0x7f] + vpaddusb 2032(%rdx), %xmm21, %xmm25 + +// CHECK: vpaddusb 2048(%rdx), %xmm21, %xmm25 +// CHECK: encoding: [0x62,0x61,0x55,0x00,0xdc,0x8a,0x00,0x08,0x00,0x00] + vpaddusb 2048(%rdx), %xmm21, %xmm25 + +// CHECK: vpaddusb -2048(%rdx), %xmm21, %xmm25 +// CHECK: encoding: [0x62,0x61,0x55,0x00,0xdc,0x4a,0x80] + vpaddusb -2048(%rdx), %xmm21, %xmm25 + +// CHECK: vpaddusb -2064(%rdx), %xmm21, %xmm25 +// CHECK: encoding: [0x62,0x61,0x55,0x00,0xdc,0x8a,0xf0,0xf7,0xff,0xff] + vpaddusb -2064(%rdx), %xmm21, %xmm25 + +// CHECK: vpaddusb %ymm25, %ymm21, %ymm21 +// CHECK: encoding: [0x62,0x81,0x55,0x20,0xdc,0xe9] + vpaddusb %ymm25, %ymm21, %ymm21 + +// CHECK: vpaddusb %ymm25, %ymm21, %ymm21 {%k1} +// CHECK: encoding: [0x62,0x81,0x55,0x21,0xdc,0xe9] + vpaddusb %ymm25, %ymm21, %ymm21 {%k1} + +// CHECK: vpaddusb %ymm25, %ymm21, %ymm21 {%k1} {z} +// CHECK: encoding: [0x62,0x81,0x55,0xa1,0xdc,0xe9] + vpaddusb %ymm25, %ymm21, %ymm21 {%k1} {z} + +// CHECK: vpaddusb (%rcx), %ymm21, %ymm21 +// CHECK: encoding: [0x62,0xe1,0x55,0x20,0xdc,0x29] + vpaddusb (%rcx), %ymm21, %ymm21 + +// CHECK: vpaddusb 291(%rax,%r14,8), %ymm21, %ymm21 +// CHECK: encoding: [0x62,0xa1,0x55,0x20,0xdc,0xac,0xf0,0x23,0x01,0x00,0x00] + vpaddusb 291(%rax,%r14,8), %ymm21, %ymm21 + +// CHECK: vpaddusb 4064(%rdx), %ymm21, %ymm21 +// CHECK: encoding: [0x62,0xe1,0x55,0x20,0xdc,0x6a,0x7f] + vpaddusb 4064(%rdx), %ymm21, %ymm21 + +// CHECK: vpaddusb 4096(%rdx), %ymm21, %ymm21 +// CHECK: encoding: [0x62,0xe1,0x55,0x20,0xdc,0xaa,0x00,0x10,0x00,0x00] + vpaddusb 4096(%rdx), %ymm21, %ymm21 + +// CHECK: vpaddusb -4096(%rdx), %ymm21, %ymm21 +// CHECK: encoding: [0x62,0xe1,0x55,0x20,0xdc,0x6a,0x80] + vpaddusb -4096(%rdx), %ymm21, %ymm21 + +// CHECK: vpaddusb -4128(%rdx), %ymm21, %ymm21 +// CHECK: encoding: [0x62,0xe1,0x55,0x20,0xdc,0xaa,0xe0,0xef,0xff,0xff] + vpaddusb -4128(%rdx), %ymm21, %ymm21 + +// CHECK: vpaddusw %xmm26, %xmm17, %xmm24 +// CHECK: encoding: [0x62,0x01,0x75,0x00,0xdd,0xc2] + vpaddusw %xmm26, %xmm17, %xmm24 + +// CHECK: vpaddusw %xmm26, %xmm17, %xmm24 {%k6} +// CHECK: encoding: [0x62,0x01,0x75,0x06,0xdd,0xc2] + vpaddusw %xmm26, %xmm17, %xmm24 {%k6} + +// CHECK: vpaddusw %xmm26, %xmm17, %xmm24 {%k6} {z} +// CHECK: encoding: [0x62,0x01,0x75,0x86,0xdd,0xc2] + vpaddusw %xmm26, %xmm17, %xmm24 {%k6} {z} + +// CHECK: vpaddusw (%rcx), %xmm17, %xmm24 +// CHECK: encoding: [0x62,0x61,0x75,0x00,0xdd,0x01] + vpaddusw (%rcx), %xmm17, %xmm24 + +// CHECK: vpaddusw 291(%rax,%r14,8), %xmm17, %xmm24 +// CHECK: encoding: [0x62,0x21,0x75,0x00,0xdd,0x84,0xf0,0x23,0x01,0x00,0x00] + vpaddusw 291(%rax,%r14,8), %xmm17, %xmm24 + +// CHECK: vpaddusw 2032(%rdx), %xmm17, %xmm24 +// CHECK: encoding: [0x62,0x61,0x75,0x00,0xdd,0x42,0x7f] + vpaddusw 2032(%rdx), %xmm17, %xmm24 + +// CHECK: vpaddusw 2048(%rdx), %xmm17, %xmm24 +// CHECK: encoding: [0x62,0x61,0x75,0x00,0xdd,0x82,0x00,0x08,0x00,0x00] + vpaddusw 2048(%rdx), %xmm17, %xmm24 + +// CHECK: vpaddusw -2048(%rdx), %xmm17, %xmm24 +// CHECK: encoding: [0x62,0x61,0x75,0x00,0xdd,0x42,0x80] + vpaddusw -2048(%rdx), %xmm17, %xmm24 + +// CHECK: vpaddusw -2064(%rdx), %xmm17, %xmm24 +// CHECK: encoding: [0x62,0x61,0x75,0x00,0xdd,0x82,0xf0,0xf7,0xff,0xff] + vpaddusw -2064(%rdx), %xmm17, %xmm24 + +// CHECK: vpaddusw %ymm22, %ymm27, %ymm22 +// CHECK: encoding: [0x62,0xa1,0x25,0x20,0xdd,0xf6] + vpaddusw %ymm22, %ymm27, %ymm22 + +// CHECK: vpaddusw %ymm22, %ymm27, %ymm22 {%k5} +// CHECK: encoding: [0x62,0xa1,0x25,0x25,0xdd,0xf6] + vpaddusw %ymm22, %ymm27, %ymm22 {%k5} + +// CHECK: vpaddusw %ymm22, %ymm27, %ymm22 {%k5} {z} +// CHECK: encoding: [0x62,0xa1,0x25,0xa5,0xdd,0xf6] + vpaddusw %ymm22, %ymm27, %ymm22 {%k5} {z} + +// CHECK: vpaddusw (%rcx), %ymm27, %ymm22 +// CHECK: encoding: [0x62,0xe1,0x25,0x20,0xdd,0x31] + vpaddusw (%rcx), %ymm27, %ymm22 + +// CHECK: vpaddusw 291(%rax,%r14,8), %ymm27, %ymm22 +// CHECK: encoding: [0x62,0xa1,0x25,0x20,0xdd,0xb4,0xf0,0x23,0x01,0x00,0x00] + vpaddusw 291(%rax,%r14,8), %ymm27, %ymm22 + +// CHECK: vpaddusw 4064(%rdx), %ymm27, %ymm22 +// CHECK: encoding: [0x62,0xe1,0x25,0x20,0xdd,0x72,0x7f] + vpaddusw 4064(%rdx), %ymm27, %ymm22 + +// CHECK: vpaddusw 4096(%rdx), %ymm27, %ymm22 +// CHECK: encoding: [0x62,0xe1,0x25,0x20,0xdd,0xb2,0x00,0x10,0x00,0x00] + vpaddusw 4096(%rdx), %ymm27, %ymm22 + +// CHECK: vpaddusw -4096(%rdx), %ymm27, %ymm22 +// CHECK: encoding: [0x62,0xe1,0x25,0x20,0xdd,0x72,0x80] + vpaddusw -4096(%rdx), %ymm27, %ymm22 + +// CHECK: vpaddusw -4128(%rdx), %ymm27, %ymm22 +// CHECK: encoding: [0x62,0xe1,0x25,0x20,0xdd,0xb2,0xe0,0xef,0xff,0xff] + vpaddusw -4128(%rdx), %ymm27, %ymm22 + +// CHECK: vpsubsb %xmm19, %xmm19, %xmm28 +// CHECK: encoding: [0x62,0x21,0x65,0x00,0xe8,0xe3] + vpsubsb %xmm19, %xmm19, %xmm28 + +// CHECK: vpsubsb %xmm19, %xmm19, %xmm28 {%k4} +// CHECK: encoding: [0x62,0x21,0x65,0x04,0xe8,0xe3] + vpsubsb %xmm19, %xmm19, %xmm28 {%k4} + +// CHECK: vpsubsb %xmm19, %xmm19, %xmm28 {%k4} {z} +// CHECK: encoding: [0x62,0x21,0x65,0x84,0xe8,0xe3] + vpsubsb %xmm19, %xmm19, %xmm28 {%k4} {z} + +// CHECK: vpsubsb (%rcx), %xmm19, %xmm28 +// CHECK: encoding: [0x62,0x61,0x65,0x00,0xe8,0x21] + vpsubsb (%rcx), %xmm19, %xmm28 + +// CHECK: vpsubsb 291(%rax,%r14,8), %xmm19, %xmm28 +// CHECK: encoding: [0x62,0x21,0x65,0x00,0xe8,0xa4,0xf0,0x23,0x01,0x00,0x00] + vpsubsb 291(%rax,%r14,8), %xmm19, %xmm28 + +// CHECK: vpsubsb 2032(%rdx), %xmm19, %xmm28 +// CHECK: encoding: [0x62,0x61,0x65,0x00,0xe8,0x62,0x7f] + vpsubsb 2032(%rdx), %xmm19, %xmm28 + +// CHECK: vpsubsb 2048(%rdx), %xmm19, %xmm28 +// CHECK: encoding: [0x62,0x61,0x65,0x00,0xe8,0xa2,0x00,0x08,0x00,0x00] + vpsubsb 2048(%rdx), %xmm19, %xmm28 + +// CHECK: vpsubsb -2048(%rdx), %xmm19, %xmm28 +// CHECK: encoding: [0x62,0x61,0x65,0x00,0xe8,0x62,0x80] + vpsubsb -2048(%rdx), %xmm19, %xmm28 + +// CHECK: vpsubsb -2064(%rdx), %xmm19, %xmm28 +// CHECK: encoding: [0x62,0x61,0x65,0x00,0xe8,0xa2,0xf0,0xf7,0xff,0xff] + vpsubsb -2064(%rdx), %xmm19, %xmm28 + +// CHECK: vpsubsb %ymm18, %ymm23, %ymm18 +// CHECK: encoding: [0x62,0xa1,0x45,0x20,0xe8,0xd2] + vpsubsb %ymm18, %ymm23, %ymm18 + +// CHECK: vpsubsb %ymm18, %ymm23, %ymm18 {%k6} +// CHECK: encoding: [0x62,0xa1,0x45,0x26,0xe8,0xd2] + vpsubsb %ymm18, %ymm23, %ymm18 {%k6} + +// CHECK: vpsubsb %ymm18, %ymm23, %ymm18 {%k6} {z} +// CHECK: encoding: [0x62,0xa1,0x45,0xa6,0xe8,0xd2] + vpsubsb %ymm18, %ymm23, %ymm18 {%k6} {z} + +// CHECK: vpsubsb (%rcx), %ymm23, %ymm18 +// CHECK: encoding: [0x62,0xe1,0x45,0x20,0xe8,0x11] + vpsubsb (%rcx), %ymm23, %ymm18 + +// CHECK: vpsubsb 291(%rax,%r14,8), %ymm23, %ymm18 +// CHECK: encoding: [0x62,0xa1,0x45,0x20,0xe8,0x94,0xf0,0x23,0x01,0x00,0x00] + vpsubsb 291(%rax,%r14,8), %ymm23, %ymm18 + +// CHECK: vpsubsb 4064(%rdx), %ymm23, %ymm18 +// CHECK: encoding: [0x62,0xe1,0x45,0x20,0xe8,0x52,0x7f] + vpsubsb 4064(%rdx), %ymm23, %ymm18 + +// CHECK: vpsubsb 4096(%rdx), %ymm23, %ymm18 +// CHECK: encoding: [0x62,0xe1,0x45,0x20,0xe8,0x92,0x00,0x10,0x00,0x00] + vpsubsb 4096(%rdx), %ymm23, %ymm18 + +// CHECK: vpsubsb -4096(%rdx), %ymm23, %ymm18 +// CHECK: encoding: [0x62,0xe1,0x45,0x20,0xe8,0x52,0x80] + vpsubsb -4096(%rdx), %ymm23, %ymm18 + +// CHECK: vpsubsb -4128(%rdx), %ymm23, %ymm18 +// CHECK: encoding: [0x62,0xe1,0x45,0x20,0xe8,0x92,0xe0,0xef,0xff,0xff] + vpsubsb -4128(%rdx), %ymm23, %ymm18 + +// CHECK: vpsubsw %xmm19, %xmm24, %xmm26 +// CHECK: encoding: [0x62,0x21,0x3d,0x00,0xe9,0xd3] + vpsubsw %xmm19, %xmm24, %xmm26 + +// CHECK: vpsubsw %xmm19, %xmm24, %xmm26 {%k7} +// CHECK: encoding: [0x62,0x21,0x3d,0x07,0xe9,0xd3] + vpsubsw %xmm19, %xmm24, %xmm26 {%k7} + +// CHECK: vpsubsw %xmm19, %xmm24, %xmm26 {%k7} {z} +// CHECK: encoding: [0x62,0x21,0x3d,0x87,0xe9,0xd3] + vpsubsw %xmm19, %xmm24, %xmm26 {%k7} {z} + +// CHECK: vpsubsw (%rcx), %xmm24, %xmm26 +// CHECK: encoding: [0x62,0x61,0x3d,0x00,0xe9,0x11] + vpsubsw (%rcx), %xmm24, %xmm26 + +// CHECK: vpsubsw 291(%rax,%r14,8), %xmm24, %xmm26 +// CHECK: encoding: [0x62,0x21,0x3d,0x00,0xe9,0x94,0xf0,0x23,0x01,0x00,0x00] + vpsubsw 291(%rax,%r14,8), %xmm24, %xmm26 + +// CHECK: vpsubsw 2032(%rdx), %xmm24, %xmm26 +// CHECK: encoding: [0x62,0x61,0x3d,0x00,0xe9,0x52,0x7f] + vpsubsw 2032(%rdx), %xmm24, %xmm26 + +// CHECK: vpsubsw 2048(%rdx), %xmm24, %xmm26 +// CHECK: encoding: [0x62,0x61,0x3d,0x00,0xe9,0x92,0x00,0x08,0x00,0x00] + vpsubsw 2048(%rdx), %xmm24, %xmm26 + +// CHECK: vpsubsw -2048(%rdx), %xmm24, %xmm26 +// CHECK: encoding: [0x62,0x61,0x3d,0x00,0xe9,0x52,0x80] + vpsubsw -2048(%rdx), %xmm24, %xmm26 + +// CHECK: vpsubsw -2064(%rdx), %xmm24, %xmm26 +// CHECK: encoding: [0x62,0x61,0x3d,0x00,0xe9,0x92,0xf0,0xf7,0xff,0xff] + vpsubsw -2064(%rdx), %xmm24, %xmm26 + +// CHECK: vpsubsw %ymm19, %ymm27, %ymm20 +// CHECK: encoding: [0x62,0xa1,0x25,0x20,0xe9,0xe3] + vpsubsw %ymm19, %ymm27, %ymm20 + +// CHECK: vpsubsw %ymm19, %ymm27, %ymm20 {%k3} +// CHECK: encoding: [0x62,0xa1,0x25,0x23,0xe9,0xe3] + vpsubsw %ymm19, %ymm27, %ymm20 {%k3} + +// CHECK: vpsubsw %ymm19, %ymm27, %ymm20 {%k3} {z} +// CHECK: encoding: [0x62,0xa1,0x25,0xa3,0xe9,0xe3] + vpsubsw %ymm19, %ymm27, %ymm20 {%k3} {z} + +// CHECK: vpsubsw (%rcx), %ymm27, %ymm20 +// CHECK: encoding: [0x62,0xe1,0x25,0x20,0xe9,0x21] + vpsubsw (%rcx), %ymm27, %ymm20 + +// CHECK: vpsubsw 291(%rax,%r14,8), %ymm27, %ymm20 +// CHECK: encoding: [0x62,0xa1,0x25,0x20,0xe9,0xa4,0xf0,0x23,0x01,0x00,0x00] + vpsubsw 291(%rax,%r14,8), %ymm27, %ymm20 + +// CHECK: vpsubsw 4064(%rdx), %ymm27, %ymm20 +// CHECK: encoding: [0x62,0xe1,0x25,0x20,0xe9,0x62,0x7f] + vpsubsw 4064(%rdx), %ymm27, %ymm20 + +// CHECK: vpsubsw 4096(%rdx), %ymm27, %ymm20 +// CHECK: encoding: [0x62,0xe1,0x25,0x20,0xe9,0xa2,0x00,0x10,0x00,0x00] + vpsubsw 4096(%rdx), %ymm27, %ymm20 + +// CHECK: vpsubsw -4096(%rdx), %ymm27, %ymm20 +// CHECK: encoding: [0x62,0xe1,0x25,0x20,0xe9,0x62,0x80] + vpsubsw -4096(%rdx), %ymm27, %ymm20 + +// CHECK: vpsubsw -4128(%rdx), %ymm27, %ymm20 +// CHECK: encoding: [0x62,0xe1,0x25,0x20,0xe9,0xa2,0xe0,0xef,0xff,0xff] + vpsubsw -4128(%rdx), %ymm27, %ymm20 + +// CHECK: vpsubusb %xmm25, %xmm19, %xmm20 +// CHECK: encoding: [0x62,0x81,0x65,0x00,0xd8,0xe1] + vpsubusb %xmm25, %xmm19, %xmm20 + +// CHECK: vpsubusb %xmm25, %xmm19, %xmm20 {%k6} +// CHECK: encoding: [0x62,0x81,0x65,0x06,0xd8,0xe1] + vpsubusb %xmm25, %xmm19, %xmm20 {%k6} + +// CHECK: vpsubusb %xmm25, %xmm19, %xmm20 {%k6} {z} +// CHECK: encoding: [0x62,0x81,0x65,0x86,0xd8,0xe1] + vpsubusb %xmm25, %xmm19, %xmm20 {%k6} {z} + +// CHECK: vpsubusb (%rcx), %xmm19, %xmm20 +// CHECK: encoding: [0x62,0xe1,0x65,0x00,0xd8,0x21] + vpsubusb (%rcx), %xmm19, %xmm20 + +// CHECK: vpsubusb 291(%rax,%r14,8), %xmm19, %xmm20 +// CHECK: encoding: [0x62,0xa1,0x65,0x00,0xd8,0xa4,0xf0,0x23,0x01,0x00,0x00] + vpsubusb 291(%rax,%r14,8), %xmm19, %xmm20 + +// CHECK: vpsubusb 2032(%rdx), %xmm19, %xmm20 +// CHECK: encoding: [0x62,0xe1,0x65,0x00,0xd8,0x62,0x7f] + vpsubusb 2032(%rdx), %xmm19, %xmm20 + +// CHECK: vpsubusb 2048(%rdx), %xmm19, %xmm20 +// CHECK: encoding: [0x62,0xe1,0x65,0x00,0xd8,0xa2,0x00,0x08,0x00,0x00] + vpsubusb 2048(%rdx), %xmm19, %xmm20 + +// CHECK: vpsubusb -2048(%rdx), %xmm19, %xmm20 +// CHECK: encoding: [0x62,0xe1,0x65,0x00,0xd8,0x62,0x80] + vpsubusb -2048(%rdx), %xmm19, %xmm20 + +// CHECK: vpsubusb -2064(%rdx), %xmm19, %xmm20 +// CHECK: encoding: [0x62,0xe1,0x65,0x00,0xd8,0xa2,0xf0,0xf7,0xff,0xff] + vpsubusb -2064(%rdx), %xmm19, %xmm20 + +// CHECK: vpsubusb %ymm19, %ymm19, %ymm21 +// CHECK: encoding: [0x62,0xa1,0x65,0x20,0xd8,0xeb] + vpsubusb %ymm19, %ymm19, %ymm21 + +// CHECK: vpsubusb %ymm19, %ymm19, %ymm21 {%k3} +// CHECK: encoding: [0x62,0xa1,0x65,0x23,0xd8,0xeb] + vpsubusb %ymm19, %ymm19, %ymm21 {%k3} + +// CHECK: vpsubusb %ymm19, %ymm19, %ymm21 {%k3} {z} +// CHECK: encoding: [0x62,0xa1,0x65,0xa3,0xd8,0xeb] + vpsubusb %ymm19, %ymm19, %ymm21 {%k3} {z} + +// CHECK: vpsubusb (%rcx), %ymm19, %ymm21 +// CHECK: encoding: [0x62,0xe1,0x65,0x20,0xd8,0x29] + vpsubusb (%rcx), %ymm19, %ymm21 + +// CHECK: vpsubusb 291(%rax,%r14,8), %ymm19, %ymm21 +// CHECK: encoding: [0x62,0xa1,0x65,0x20,0xd8,0xac,0xf0,0x23,0x01,0x00,0x00] + vpsubusb 291(%rax,%r14,8), %ymm19, %ymm21 + +// CHECK: vpsubusb 4064(%rdx), %ymm19, %ymm21 +// CHECK: encoding: [0x62,0xe1,0x65,0x20,0xd8,0x6a,0x7f] + vpsubusb 4064(%rdx), %ymm19, %ymm21 + +// CHECK: vpsubusb 4096(%rdx), %ymm19, %ymm21 +// CHECK: encoding: [0x62,0xe1,0x65,0x20,0xd8,0xaa,0x00,0x10,0x00,0x00] + vpsubusb 4096(%rdx), %ymm19, %ymm21 + +// CHECK: vpsubusb -4096(%rdx), %ymm19, %ymm21 +// CHECK: encoding: [0x62,0xe1,0x65,0x20,0xd8,0x6a,0x80] + vpsubusb -4096(%rdx), %ymm19, %ymm21 + +// CHECK: vpsubusb -4128(%rdx), %ymm19, %ymm21 +// CHECK: encoding: [0x62,0xe1,0x65,0x20,0xd8,0xaa,0xe0,0xef,0xff,0xff] + vpsubusb -4128(%rdx), %ymm19, %ymm21 + +// CHECK: vpsubusw %xmm22, %xmm20, %xmm23 +// CHECK: encoding: [0x62,0xa1,0x5d,0x00,0xd9,0xfe] + vpsubusw %xmm22, %xmm20, %xmm23 + +// CHECK: vpsubusw %xmm22, %xmm20, %xmm23 {%k1} +// CHECK: encoding: [0x62,0xa1,0x5d,0x01,0xd9,0xfe] + vpsubusw %xmm22, %xmm20, %xmm23 {%k1} + +// CHECK: vpsubusw %xmm22, %xmm20, %xmm23 {%k1} {z} +// CHECK: encoding: [0x62,0xa1,0x5d,0x81,0xd9,0xfe] + vpsubusw %xmm22, %xmm20, %xmm23 {%k1} {z} + +// CHECK: vpsubusw (%rcx), %xmm20, %xmm23 +// CHECK: encoding: [0x62,0xe1,0x5d,0x00,0xd9,0x39] + vpsubusw (%rcx), %xmm20, %xmm23 + +// CHECK: vpsubusw 291(%rax,%r14,8), %xmm20, %xmm23 +// CHECK: encoding: [0x62,0xa1,0x5d,0x00,0xd9,0xbc,0xf0,0x23,0x01,0x00,0x00] + vpsubusw 291(%rax,%r14,8), %xmm20, %xmm23 + +// CHECK: vpsubusw 2032(%rdx), %xmm20, %xmm23 +// CHECK: encoding: [0x62,0xe1,0x5d,0x00,0xd9,0x7a,0x7f] + vpsubusw 2032(%rdx), %xmm20, %xmm23 + +// CHECK: vpsubusw 2048(%rdx), %xmm20, %xmm23 +// CHECK: encoding: [0x62,0xe1,0x5d,0x00,0xd9,0xba,0x00,0x08,0x00,0x00] + vpsubusw 2048(%rdx), %xmm20, %xmm23 + +// CHECK: vpsubusw -2048(%rdx), %xmm20, %xmm23 +// CHECK: encoding: [0x62,0xe1,0x5d,0x00,0xd9,0x7a,0x80] + vpsubusw -2048(%rdx), %xmm20, %xmm23 + +// CHECK: vpsubusw -2064(%rdx), %xmm20, %xmm23 +// CHECK: encoding: [0x62,0xe1,0x5d,0x00,0xd9,0xba,0xf0,0xf7,0xff,0xff] + vpsubusw -2064(%rdx), %xmm20, %xmm23 + +// CHECK: vpsubusw %ymm28, %ymm17, %ymm27 +// CHECK: encoding: [0x62,0x01,0x75,0x20,0xd9,0xdc] + vpsubusw %ymm28, %ymm17, %ymm27 + +// CHECK: vpsubusw %ymm28, %ymm17, %ymm27 {%k3} +// CHECK: encoding: [0x62,0x01,0x75,0x23,0xd9,0xdc] + vpsubusw %ymm28, %ymm17, %ymm27 {%k3} + +// CHECK: vpsubusw %ymm28, %ymm17, %ymm27 {%k3} {z} +// CHECK: encoding: [0x62,0x01,0x75,0xa3,0xd9,0xdc] + vpsubusw %ymm28, %ymm17, %ymm27 {%k3} {z} + +// CHECK: vpsubusw (%rcx), %ymm17, %ymm27 +// CHECK: encoding: [0x62,0x61,0x75,0x20,0xd9,0x19] + vpsubusw (%rcx), %ymm17, %ymm27 + +// CHECK: vpsubusw 291(%rax,%r14,8), %ymm17, %ymm27 +// CHECK: encoding: [0x62,0x21,0x75,0x20,0xd9,0x9c,0xf0,0x23,0x01,0x00,0x00] + vpsubusw 291(%rax,%r14,8), %ymm17, %ymm27 + +// CHECK: vpsubusw 4064(%rdx), %ymm17, %ymm27 +// CHECK: encoding: [0x62,0x61,0x75,0x20,0xd9,0x5a,0x7f] + vpsubusw 4064(%rdx), %ymm17, %ymm27 + +// CHECK: vpsubusw 4096(%rdx), %ymm17, %ymm27 +// CHECK: encoding: [0x62,0x61,0x75,0x20,0xd9,0x9a,0x00,0x10,0x00,0x00] + vpsubusw 4096(%rdx), %ymm17, %ymm27 + +// CHECK: vpsubusw -4096(%rdx), %ymm17, %ymm27 +// CHECK: encoding: [0x62,0x61,0x75,0x20,0xd9,0x5a,0x80] + vpsubusw -4096(%rdx), %ymm17, %ymm27 + +// CHECK: vpsubusw -4128(%rdx), %ymm17, %ymm27 +// CHECK: encoding: [0x62,0x61,0x75,0x20,0xd9,0x9a,0xe0,0xef,0xff,0xff] + vpsubusw -4128(%rdx), %ymm17, %ymm27 + +// CHECK: vpaddsb %xmm18, %xmm23, %xmm17 +// CHECK: encoding: [0x62,0xa1,0x45,0x00,0xec,0xca] + vpaddsb %xmm18, %xmm23, %xmm17 + +// CHECK: vpaddsb %xmm18, %xmm23, %xmm17 {%k2} +// CHECK: encoding: [0x62,0xa1,0x45,0x02,0xec,0xca] + vpaddsb %xmm18, %xmm23, %xmm17 {%k2} + +// CHECK: vpaddsb %xmm18, %xmm23, %xmm17 {%k2} {z} +// CHECK: encoding: [0x62,0xa1,0x45,0x82,0xec,0xca] + vpaddsb %xmm18, %xmm23, %xmm17 {%k2} {z} + +// CHECK: vpaddsb (%rcx), %xmm23, %xmm17 +// CHECK: encoding: [0x62,0xe1,0x45,0x00,0xec,0x09] + vpaddsb (%rcx), %xmm23, %xmm17 + +// CHECK: vpaddsb 4660(%rax,%r14,8), %xmm23, %xmm17 +// CHECK: encoding: [0x62,0xa1,0x45,0x00,0xec,0x8c,0xf0,0x34,0x12,0x00,0x00] + vpaddsb 4660(%rax,%r14,8), %xmm23, %xmm17 + +// CHECK: vpaddsb 2032(%rdx), %xmm23, %xmm17 +// CHECK: encoding: [0x62,0xe1,0x45,0x00,0xec,0x4a,0x7f] + vpaddsb 2032(%rdx), %xmm23, %xmm17 + +// CHECK: vpaddsb 2048(%rdx), %xmm23, %xmm17 +// CHECK: encoding: [0x62,0xe1,0x45,0x00,0xec,0x8a,0x00,0x08,0x00,0x00] + vpaddsb 2048(%rdx), %xmm23, %xmm17 + +// CHECK: vpaddsb -2048(%rdx), %xmm23, %xmm17 +// CHECK: encoding: [0x62,0xe1,0x45,0x00,0xec,0x4a,0x80] + vpaddsb -2048(%rdx), %xmm23, %xmm17 + +// CHECK: vpaddsb -2064(%rdx), %xmm23, %xmm17 +// CHECK: encoding: [0x62,0xe1,0x45,0x00,0xec,0x8a,0xf0,0xf7,0xff,0xff] + vpaddsb -2064(%rdx), %xmm23, %xmm17 + +// CHECK: vpaddsb %ymm19, %ymm26, %ymm20 +// CHECK: encoding: [0x62,0xa1,0x2d,0x20,0xec,0xe3] + vpaddsb %ymm19, %ymm26, %ymm20 + +// CHECK: vpaddsb %ymm19, %ymm26, %ymm20 {%k2} +// CHECK: encoding: [0x62,0xa1,0x2d,0x22,0xec,0xe3] + vpaddsb %ymm19, %ymm26, %ymm20 {%k2} + +// CHECK: vpaddsb %ymm19, %ymm26, %ymm20 {%k2} {z} +// CHECK: encoding: [0x62,0xa1,0x2d,0xa2,0xec,0xe3] + vpaddsb %ymm19, %ymm26, %ymm20 {%k2} {z} + +// CHECK: vpaddsb (%rcx), %ymm26, %ymm20 +// CHECK: encoding: [0x62,0xe1,0x2d,0x20,0xec,0x21] + vpaddsb (%rcx), %ymm26, %ymm20 + +// CHECK: vpaddsb 4660(%rax,%r14,8), %ymm26, %ymm20 +// CHECK: encoding: [0x62,0xa1,0x2d,0x20,0xec,0xa4,0xf0,0x34,0x12,0x00,0x00] + vpaddsb 4660(%rax,%r14,8), %ymm26, %ymm20 + +// CHECK: vpaddsb 4064(%rdx), %ymm26, %ymm20 +// CHECK: encoding: [0x62,0xe1,0x2d,0x20,0xec,0x62,0x7f] + vpaddsb 4064(%rdx), %ymm26, %ymm20 + +// CHECK: vpaddsb 4096(%rdx), %ymm26, %ymm20 +// CHECK: encoding: [0x62,0xe1,0x2d,0x20,0xec,0xa2,0x00,0x10,0x00,0x00] + vpaddsb 4096(%rdx), %ymm26, %ymm20 + +// CHECK: vpaddsb -4096(%rdx), %ymm26, %ymm20 +// CHECK: encoding: [0x62,0xe1,0x2d,0x20,0xec,0x62,0x80] + vpaddsb -4096(%rdx), %ymm26, %ymm20 + +// CHECK: vpaddsb -4128(%rdx), %ymm26, %ymm20 +// CHECK: encoding: [0x62,0xe1,0x2d,0x20,0xec,0xa2,0xe0,0xef,0xff,0xff] + vpaddsb -4128(%rdx), %ymm26, %ymm20 + +// CHECK: vpaddsw %xmm26, %xmm17, %xmm26 +// CHECK: encoding: [0x62,0x01,0x75,0x00,0xed,0xd2] + vpaddsw %xmm26, %xmm17, %xmm26 + +// CHECK: vpaddsw %xmm26, %xmm17, %xmm26 {%k6} +// CHECK: encoding: [0x62,0x01,0x75,0x06,0xed,0xd2] + vpaddsw %xmm26, %xmm17, %xmm26 {%k6} + +// CHECK: vpaddsw %xmm26, %xmm17, %xmm26 {%k6} {z} +// CHECK: encoding: [0x62,0x01,0x75,0x86,0xed,0xd2] + vpaddsw %xmm26, %xmm17, %xmm26 {%k6} {z} + +// CHECK: vpaddsw (%rcx), %xmm17, %xmm26 +// CHECK: encoding: [0x62,0x61,0x75,0x00,0xed,0x11] + vpaddsw (%rcx), %xmm17, %xmm26 + +// CHECK: vpaddsw 4660(%rax,%r14,8), %xmm17, %xmm26 +// CHECK: encoding: [0x62,0x21,0x75,0x00,0xed,0x94,0xf0,0x34,0x12,0x00,0x00] + vpaddsw 4660(%rax,%r14,8), %xmm17, %xmm26 + +// CHECK: vpaddsw 2032(%rdx), %xmm17, %xmm26 +// CHECK: encoding: [0x62,0x61,0x75,0x00,0xed,0x52,0x7f] + vpaddsw 2032(%rdx), %xmm17, %xmm26 + +// CHECK: vpaddsw 2048(%rdx), %xmm17, %xmm26 +// CHECK: encoding: [0x62,0x61,0x75,0x00,0xed,0x92,0x00,0x08,0x00,0x00] + vpaddsw 2048(%rdx), %xmm17, %xmm26 + +// CHECK: vpaddsw -2048(%rdx), %xmm17, %xmm26 +// CHECK: encoding: [0x62,0x61,0x75,0x00,0xed,0x52,0x80] + vpaddsw -2048(%rdx), %xmm17, %xmm26 + +// CHECK: vpaddsw -2064(%rdx), %xmm17, %xmm26 +// CHECK: encoding: [0x62,0x61,0x75,0x00,0xed,0x92,0xf0,0xf7,0xff,0xff] + vpaddsw -2064(%rdx), %xmm17, %xmm26 + +// CHECK: vpaddsw %ymm23, %ymm27, %ymm28 +// CHECK: encoding: [0x62,0x21,0x25,0x20,0xed,0xe7] + vpaddsw %ymm23, %ymm27, %ymm28 + +// CHECK: vpaddsw %ymm23, %ymm27, %ymm28 {%k4} +// CHECK: encoding: [0x62,0x21,0x25,0x24,0xed,0xe7] + vpaddsw %ymm23, %ymm27, %ymm28 {%k4} + +// CHECK: vpaddsw %ymm23, %ymm27, %ymm28 {%k4} {z} +// CHECK: encoding: [0x62,0x21,0x25,0xa4,0xed,0xe7] + vpaddsw %ymm23, %ymm27, %ymm28 {%k4} {z} + +// CHECK: vpaddsw (%rcx), %ymm27, %ymm28 +// CHECK: encoding: [0x62,0x61,0x25,0x20,0xed,0x21] + vpaddsw (%rcx), %ymm27, %ymm28 + +// CHECK: vpaddsw 4660(%rax,%r14,8), %ymm27, %ymm28 +// CHECK: encoding: [0x62,0x21,0x25,0x20,0xed,0xa4,0xf0,0x34,0x12,0x00,0x00] + vpaddsw 4660(%rax,%r14,8), %ymm27, %ymm28 + +// CHECK: vpaddsw 4064(%rdx), %ymm27, %ymm28 +// CHECK: encoding: [0x62,0x61,0x25,0x20,0xed,0x62,0x7f] + vpaddsw 4064(%rdx), %ymm27, %ymm28 + +// CHECK: vpaddsw 4096(%rdx), %ymm27, %ymm28 +// CHECK: encoding: [0x62,0x61,0x25,0x20,0xed,0xa2,0x00,0x10,0x00,0x00] + vpaddsw 4096(%rdx), %ymm27, %ymm28 + +// CHECK: vpaddsw -4096(%rdx), %ymm27, %ymm28 +// CHECK: encoding: [0x62,0x61,0x25,0x20,0xed,0x62,0x80] + vpaddsw -4096(%rdx), %ymm27, %ymm28 + +// CHECK: vpaddsw -4128(%rdx), %ymm27, %ymm28 +// CHECK: encoding: [0x62,0x61,0x25,0x20,0xed,0xa2,0xe0,0xef,0xff,0xff] + vpaddsw -4128(%rdx), %ymm27, %ymm28 + +// CHECK: vpaddusb %xmm18, %xmm21, %xmm17 +// CHECK: encoding: [0x62,0xa1,0x55,0x00,0xdc,0xca] + vpaddusb %xmm18, %xmm21, %xmm17 + +// CHECK: vpaddusb %xmm18, %xmm21, %xmm17 {%k4} +// CHECK: encoding: [0x62,0xa1,0x55,0x04,0xdc,0xca] + vpaddusb %xmm18, %xmm21, %xmm17 {%k4} + +// CHECK: vpaddusb %xmm18, %xmm21, %xmm17 {%k4} {z} +// CHECK: encoding: [0x62,0xa1,0x55,0x84,0xdc,0xca] + vpaddusb %xmm18, %xmm21, %xmm17 {%k4} {z} + +// CHECK: vpaddusb (%rcx), %xmm21, %xmm17 +// CHECK: encoding: [0x62,0xe1,0x55,0x00,0xdc,0x09] + vpaddusb (%rcx), %xmm21, %xmm17 + +// CHECK: vpaddusb 4660(%rax,%r14,8), %xmm21, %xmm17 +// CHECK: encoding: [0x62,0xa1,0x55,0x00,0xdc,0x8c,0xf0,0x34,0x12,0x00,0x00] + vpaddusb 4660(%rax,%r14,8), %xmm21, %xmm17 + +// CHECK: vpaddusb 2032(%rdx), %xmm21, %xmm17 +// CHECK: encoding: [0x62,0xe1,0x55,0x00,0xdc,0x4a,0x7f] + vpaddusb 2032(%rdx), %xmm21, %xmm17 + +// CHECK: vpaddusb 2048(%rdx), %xmm21, %xmm17 +// CHECK: encoding: [0x62,0xe1,0x55,0x00,0xdc,0x8a,0x00,0x08,0x00,0x00] + vpaddusb 2048(%rdx), %xmm21, %xmm17 + +// CHECK: vpaddusb -2048(%rdx), %xmm21, %xmm17 +// CHECK: encoding: [0x62,0xe1,0x55,0x00,0xdc,0x4a,0x80] + vpaddusb -2048(%rdx), %xmm21, %xmm17 + +// CHECK: vpaddusb -2064(%rdx), %xmm21, %xmm17 +// CHECK: encoding: [0x62,0xe1,0x55,0x00,0xdc,0x8a,0xf0,0xf7,0xff,0xff] + vpaddusb -2064(%rdx), %xmm21, %xmm17 + +// CHECK: vpaddusb %ymm24, %ymm20, %ymm21 +// CHECK: encoding: [0x62,0x81,0x5d,0x20,0xdc,0xe8] + vpaddusb %ymm24, %ymm20, %ymm21 + +// CHECK: vpaddusb %ymm24, %ymm20, %ymm21 {%k2} +// CHECK: encoding: [0x62,0x81,0x5d,0x22,0xdc,0xe8] + vpaddusb %ymm24, %ymm20, %ymm21 {%k2} + +// CHECK: vpaddusb %ymm24, %ymm20, %ymm21 {%k2} {z} +// CHECK: encoding: [0x62,0x81,0x5d,0xa2,0xdc,0xe8] + vpaddusb %ymm24, %ymm20, %ymm21 {%k2} {z} + +// CHECK: vpaddusb (%rcx), %ymm20, %ymm21 +// CHECK: encoding: [0x62,0xe1,0x5d,0x20,0xdc,0x29] + vpaddusb (%rcx), %ymm20, %ymm21 + +// CHECK: vpaddusb 4660(%rax,%r14,8), %ymm20, %ymm21 +// CHECK: encoding: [0x62,0xa1,0x5d,0x20,0xdc,0xac,0xf0,0x34,0x12,0x00,0x00] + vpaddusb 4660(%rax,%r14,8), %ymm20, %ymm21 + +// CHECK: vpaddusb 4064(%rdx), %ymm20, %ymm21 +// CHECK: encoding: [0x62,0xe1,0x5d,0x20,0xdc,0x6a,0x7f] + vpaddusb 4064(%rdx), %ymm20, %ymm21 + +// CHECK: vpaddusb 4096(%rdx), %ymm20, %ymm21 +// CHECK: encoding: [0x62,0xe1,0x5d,0x20,0xdc,0xaa,0x00,0x10,0x00,0x00] + vpaddusb 4096(%rdx), %ymm20, %ymm21 + +// CHECK: vpaddusb -4096(%rdx), %ymm20, %ymm21 +// CHECK: encoding: [0x62,0xe1,0x5d,0x20,0xdc,0x6a,0x80] + vpaddusb -4096(%rdx), %ymm20, %ymm21 + +// CHECK: vpaddusb -4128(%rdx), %ymm20, %ymm21 +// CHECK: encoding: [0x62,0xe1,0x5d,0x20,0xdc,0xaa,0xe0,0xef,0xff,0xff] + vpaddusb -4128(%rdx), %ymm20, %ymm21 + +// CHECK: vpaddusw %xmm28, %xmm24, %xmm22 +// CHECK: encoding: [0x62,0x81,0x3d,0x00,0xdd,0xf4] + vpaddusw %xmm28, %xmm24, %xmm22 + +// CHECK: vpaddusw %xmm28, %xmm24, %xmm22 {%k6} +// CHECK: encoding: [0x62,0x81,0x3d,0x06,0xdd,0xf4] + vpaddusw %xmm28, %xmm24, %xmm22 {%k6} + +// CHECK: vpaddusw %xmm28, %xmm24, %xmm22 {%k6} {z} +// CHECK: encoding: [0x62,0x81,0x3d,0x86,0xdd,0xf4] + vpaddusw %xmm28, %xmm24, %xmm22 {%k6} {z} + +// CHECK: vpaddusw (%rcx), %xmm24, %xmm22 +// CHECK: encoding: [0x62,0xe1,0x3d,0x00,0xdd,0x31] + vpaddusw (%rcx), %xmm24, %xmm22 + +// CHECK: vpaddusw 4660(%rax,%r14,8), %xmm24, %xmm22 +// CHECK: encoding: [0x62,0xa1,0x3d,0x00,0xdd,0xb4,0xf0,0x34,0x12,0x00,0x00] + vpaddusw 4660(%rax,%r14,8), %xmm24, %xmm22 + +// CHECK: vpaddusw 2032(%rdx), %xmm24, %xmm22 +// CHECK: encoding: [0x62,0xe1,0x3d,0x00,0xdd,0x72,0x7f] + vpaddusw 2032(%rdx), %xmm24, %xmm22 + +// CHECK: vpaddusw 2048(%rdx), %xmm24, %xmm22 +// CHECK: encoding: [0x62,0xe1,0x3d,0x00,0xdd,0xb2,0x00,0x08,0x00,0x00] + vpaddusw 2048(%rdx), %xmm24, %xmm22 + +// CHECK: vpaddusw -2048(%rdx), %xmm24, %xmm22 +// CHECK: encoding: [0x62,0xe1,0x3d,0x00,0xdd,0x72,0x80] + vpaddusw -2048(%rdx), %xmm24, %xmm22 + +// CHECK: vpaddusw -2064(%rdx), %xmm24, %xmm22 +// CHECK: encoding: [0x62,0xe1,0x3d,0x00,0xdd,0xb2,0xf0,0xf7,0xff,0xff] + vpaddusw -2064(%rdx), %xmm24, %xmm22 + +// CHECK: vpaddusw %ymm25, %ymm19, %ymm21 +// CHECK: encoding: [0x62,0x81,0x65,0x20,0xdd,0xe9] + vpaddusw %ymm25, %ymm19, %ymm21 + +// CHECK: vpaddusw %ymm25, %ymm19, %ymm21 {%k2} +// CHECK: encoding: [0x62,0x81,0x65,0x22,0xdd,0xe9] + vpaddusw %ymm25, %ymm19, %ymm21 {%k2} + +// CHECK: vpaddusw %ymm25, %ymm19, %ymm21 {%k2} {z} +// CHECK: encoding: [0x62,0x81,0x65,0xa2,0xdd,0xe9] + vpaddusw %ymm25, %ymm19, %ymm21 {%k2} {z} + +// CHECK: vpaddusw (%rcx), %ymm19, %ymm21 +// CHECK: encoding: [0x62,0xe1,0x65,0x20,0xdd,0x29] + vpaddusw (%rcx), %ymm19, %ymm21 + +// CHECK: vpaddusw 4660(%rax,%r14,8), %ymm19, %ymm21 +// CHECK: encoding: [0x62,0xa1,0x65,0x20,0xdd,0xac,0xf0,0x34,0x12,0x00,0x00] + vpaddusw 4660(%rax,%r14,8), %ymm19, %ymm21 + +// CHECK: vpaddusw 4064(%rdx), %ymm19, %ymm21 +// CHECK: encoding: [0x62,0xe1,0x65,0x20,0xdd,0x6a,0x7f] + vpaddusw 4064(%rdx), %ymm19, %ymm21 + +// CHECK: vpaddusw 4096(%rdx), %ymm19, %ymm21 +// CHECK: encoding: [0x62,0xe1,0x65,0x20,0xdd,0xaa,0x00,0x10,0x00,0x00] + vpaddusw 4096(%rdx), %ymm19, %ymm21 + +// CHECK: vpaddusw -4096(%rdx), %ymm19, %ymm21 +// CHECK: encoding: [0x62,0xe1,0x65,0x20,0xdd,0x6a,0x80] + vpaddusw -4096(%rdx), %ymm19, %ymm21 + +// CHECK: vpaddusw -4128(%rdx), %ymm19, %ymm21 +// CHECK: encoding: [0x62,0xe1,0x65,0x20,0xdd,0xaa,0xe0,0xef,0xff,0xff] + vpaddusw -4128(%rdx), %ymm19, %ymm21 + +// CHECK: vpsubsb %xmm22, %xmm26, %xmm20 +// CHECK: encoding: [0x62,0xa1,0x2d,0x00,0xe8,0xe6] + vpsubsb %xmm22, %xmm26, %xmm20 + +// CHECK: vpsubsb %xmm22, %xmm26, %xmm20 {%k1} +// CHECK: encoding: [0x62,0xa1,0x2d,0x01,0xe8,0xe6] + vpsubsb %xmm22, %xmm26, %xmm20 {%k1} + +// CHECK: vpsubsb %xmm22, %xmm26, %xmm20 {%k1} {z} +// CHECK: encoding: [0x62,0xa1,0x2d,0x81,0xe8,0xe6] + vpsubsb %xmm22, %xmm26, %xmm20 {%k1} {z} + +// CHECK: vpsubsb (%rcx), %xmm26, %xmm20 +// CHECK: encoding: [0x62,0xe1,0x2d,0x00,0xe8,0x21] + vpsubsb (%rcx), %xmm26, %xmm20 + +// CHECK: vpsubsb 4660(%rax,%r14,8), %xmm26, %xmm20 +// CHECK: encoding: [0x62,0xa1,0x2d,0x00,0xe8,0xa4,0xf0,0x34,0x12,0x00,0x00] + vpsubsb 4660(%rax,%r14,8), %xmm26, %xmm20 + +// CHECK: vpsubsb 2032(%rdx), %xmm26, %xmm20 +// CHECK: encoding: [0x62,0xe1,0x2d,0x00,0xe8,0x62,0x7f] + vpsubsb 2032(%rdx), %xmm26, %xmm20 + +// CHECK: vpsubsb 2048(%rdx), %xmm26, %xmm20 +// CHECK: encoding: [0x62,0xe1,0x2d,0x00,0xe8,0xa2,0x00,0x08,0x00,0x00] + vpsubsb 2048(%rdx), %xmm26, %xmm20 + +// CHECK: vpsubsb -2048(%rdx), %xmm26, %xmm20 +// CHECK: encoding: [0x62,0xe1,0x2d,0x00,0xe8,0x62,0x80] + vpsubsb -2048(%rdx), %xmm26, %xmm20 + +// CHECK: vpsubsb -2064(%rdx), %xmm26, %xmm20 +// CHECK: encoding: [0x62,0xe1,0x2d,0x00,0xe8,0xa2,0xf0,0xf7,0xff,0xff] + vpsubsb -2064(%rdx), %xmm26, %xmm20 + +// CHECK: vpsubsb %ymm26, %ymm18, %ymm25 +// CHECK: encoding: [0x62,0x01,0x6d,0x20,0xe8,0xca] + vpsubsb %ymm26, %ymm18, %ymm25 + +// CHECK: vpsubsb %ymm26, %ymm18, %ymm25 {%k6} +// CHECK: encoding: [0x62,0x01,0x6d,0x26,0xe8,0xca] + vpsubsb %ymm26, %ymm18, %ymm25 {%k6} + +// CHECK: vpsubsb %ymm26, %ymm18, %ymm25 {%k6} {z} +// CHECK: encoding: [0x62,0x01,0x6d,0xa6,0xe8,0xca] + vpsubsb %ymm26, %ymm18, %ymm25 {%k6} {z} + +// CHECK: vpsubsb (%rcx), %ymm18, %ymm25 +// CHECK: encoding: [0x62,0x61,0x6d,0x20,0xe8,0x09] + vpsubsb (%rcx), %ymm18, %ymm25 + +// CHECK: vpsubsb 4660(%rax,%r14,8), %ymm18, %ymm25 +// CHECK: encoding: [0x62,0x21,0x6d,0x20,0xe8,0x8c,0xf0,0x34,0x12,0x00,0x00] + vpsubsb 4660(%rax,%r14,8), %ymm18, %ymm25 + +// CHECK: vpsubsb 4064(%rdx), %ymm18, %ymm25 +// CHECK: encoding: [0x62,0x61,0x6d,0x20,0xe8,0x4a,0x7f] + vpsubsb 4064(%rdx), %ymm18, %ymm25 + +// CHECK: vpsubsb 4096(%rdx), %ymm18, %ymm25 +// CHECK: encoding: [0x62,0x61,0x6d,0x20,0xe8,0x8a,0x00,0x10,0x00,0x00] + vpsubsb 4096(%rdx), %ymm18, %ymm25 + +// CHECK: vpsubsb -4096(%rdx), %ymm18, %ymm25 +// CHECK: encoding: [0x62,0x61,0x6d,0x20,0xe8,0x4a,0x80] + vpsubsb -4096(%rdx), %ymm18, %ymm25 + +// CHECK: vpsubsb -4128(%rdx), %ymm18, %ymm25 +// CHECK: encoding: [0x62,0x61,0x6d,0x20,0xe8,0x8a,0xe0,0xef,0xff,0xff] + vpsubsb -4128(%rdx), %ymm18, %ymm25 + +// CHECK: vpsubsw %xmm28, %xmm27, %xmm26 +// CHECK: encoding: [0x62,0x01,0x25,0x00,0xe9,0xd4] + vpsubsw %xmm28, %xmm27, %xmm26 + +// CHECK: vpsubsw %xmm28, %xmm27, %xmm26 {%k5} +// CHECK: encoding: [0x62,0x01,0x25,0x05,0xe9,0xd4] + vpsubsw %xmm28, %xmm27, %xmm26 {%k5} + +// CHECK: vpsubsw %xmm28, %xmm27, %xmm26 {%k5} {z} +// CHECK: encoding: [0x62,0x01,0x25,0x85,0xe9,0xd4] + vpsubsw %xmm28, %xmm27, %xmm26 {%k5} {z} + +// CHECK: vpsubsw (%rcx), %xmm27, %xmm26 +// CHECK: encoding: [0x62,0x61,0x25,0x00,0xe9,0x11] + vpsubsw (%rcx), %xmm27, %xmm26 + +// CHECK: vpsubsw 4660(%rax,%r14,8), %xmm27, %xmm26 +// CHECK: encoding: [0x62,0x21,0x25,0x00,0xe9,0x94,0xf0,0x34,0x12,0x00,0x00] + vpsubsw 4660(%rax,%r14,8), %xmm27, %xmm26 + +// CHECK: vpsubsw 2032(%rdx), %xmm27, %xmm26 +// CHECK: encoding: [0x62,0x61,0x25,0x00,0xe9,0x52,0x7f] + vpsubsw 2032(%rdx), %xmm27, %xmm26 + +// CHECK: vpsubsw 2048(%rdx), %xmm27, %xmm26 +// CHECK: encoding: [0x62,0x61,0x25,0x00,0xe9,0x92,0x00,0x08,0x00,0x00] + vpsubsw 2048(%rdx), %xmm27, %xmm26 + +// CHECK: vpsubsw -2048(%rdx), %xmm27, %xmm26 +// CHECK: encoding: [0x62,0x61,0x25,0x00,0xe9,0x52,0x80] + vpsubsw -2048(%rdx), %xmm27, %xmm26 + +// CHECK: vpsubsw -2064(%rdx), %xmm27, %xmm26 +// CHECK: encoding: [0x62,0x61,0x25,0x00,0xe9,0x92,0xf0,0xf7,0xff,0xff] + vpsubsw -2064(%rdx), %xmm27, %xmm26 + +// CHECK: vpsubsw %ymm21, %ymm25, %ymm22 +// CHECK: encoding: [0x62,0xa1,0x35,0x20,0xe9,0xf5] + vpsubsw %ymm21, %ymm25, %ymm22 + +// CHECK: vpsubsw %ymm21, %ymm25, %ymm22 {%k3} +// CHECK: encoding: [0x62,0xa1,0x35,0x23,0xe9,0xf5] + vpsubsw %ymm21, %ymm25, %ymm22 {%k3} + +// CHECK: vpsubsw %ymm21, %ymm25, %ymm22 {%k3} {z} +// CHECK: encoding: [0x62,0xa1,0x35,0xa3,0xe9,0xf5] + vpsubsw %ymm21, %ymm25, %ymm22 {%k3} {z} + +// CHECK: vpsubsw (%rcx), %ymm25, %ymm22 +// CHECK: encoding: [0x62,0xe1,0x35,0x20,0xe9,0x31] + vpsubsw (%rcx), %ymm25, %ymm22 + +// CHECK: vpsubsw 4660(%rax,%r14,8), %ymm25, %ymm22 +// CHECK: encoding: [0x62,0xa1,0x35,0x20,0xe9,0xb4,0xf0,0x34,0x12,0x00,0x00] + vpsubsw 4660(%rax,%r14,8), %ymm25, %ymm22 + +// CHECK: vpsubsw 4064(%rdx), %ymm25, %ymm22 +// CHECK: encoding: [0x62,0xe1,0x35,0x20,0xe9,0x72,0x7f] + vpsubsw 4064(%rdx), %ymm25, %ymm22 + +// CHECK: vpsubsw 4096(%rdx), %ymm25, %ymm22 +// CHECK: encoding: [0x62,0xe1,0x35,0x20,0xe9,0xb2,0x00,0x10,0x00,0x00] + vpsubsw 4096(%rdx), %ymm25, %ymm22 + +// CHECK: vpsubsw -4096(%rdx), %ymm25, %ymm22 +// CHECK: encoding: [0x62,0xe1,0x35,0x20,0xe9,0x72,0x80] + vpsubsw -4096(%rdx), %ymm25, %ymm22 + +// CHECK: vpsubsw -4128(%rdx), %ymm25, %ymm22 +// CHECK: encoding: [0x62,0xe1,0x35,0x20,0xe9,0xb2,0xe0,0xef,0xff,0xff] + vpsubsw -4128(%rdx), %ymm25, %ymm22 + +// CHECK: vpsubusb %xmm26, %xmm27, %xmm18 +// CHECK: encoding: [0x62,0x81,0x25,0x00,0xd8,0xd2] + vpsubusb %xmm26, %xmm27, %xmm18 + +// CHECK: vpsubusb %xmm26, %xmm27, %xmm18 {%k2} +// CHECK: encoding: [0x62,0x81,0x25,0x02,0xd8,0xd2] + vpsubusb %xmm26, %xmm27, %xmm18 {%k2} + +// CHECK: vpsubusb %xmm26, %xmm27, %xmm18 {%k2} {z} +// CHECK: encoding: [0x62,0x81,0x25,0x82,0xd8,0xd2] + vpsubusb %xmm26, %xmm27, %xmm18 {%k2} {z} + +// CHECK: vpsubusb (%rcx), %xmm27, %xmm18 +// CHECK: encoding: [0x62,0xe1,0x25,0x00,0xd8,0x11] + vpsubusb (%rcx), %xmm27, %xmm18 + +// CHECK: vpsubusb 4660(%rax,%r14,8), %xmm27, %xmm18 +// CHECK: encoding: [0x62,0xa1,0x25,0x00,0xd8,0x94,0xf0,0x34,0x12,0x00,0x00] + vpsubusb 4660(%rax,%r14,8), %xmm27, %xmm18 + +// CHECK: vpsubusb 2032(%rdx), %xmm27, %xmm18 +// CHECK: encoding: [0x62,0xe1,0x25,0x00,0xd8,0x52,0x7f] + vpsubusb 2032(%rdx), %xmm27, %xmm18 + +// CHECK: vpsubusb 2048(%rdx), %xmm27, %xmm18 +// CHECK: encoding: [0x62,0xe1,0x25,0x00,0xd8,0x92,0x00,0x08,0x00,0x00] + vpsubusb 2048(%rdx), %xmm27, %xmm18 + +// CHECK: vpsubusb -2048(%rdx), %xmm27, %xmm18 +// CHECK: encoding: [0x62,0xe1,0x25,0x00,0xd8,0x52,0x80] + vpsubusb -2048(%rdx), %xmm27, %xmm18 + +// CHECK: vpsubusb -2064(%rdx), %xmm27, %xmm18 +// CHECK: encoding: [0x62,0xe1,0x25,0x00,0xd8,0x92,0xf0,0xf7,0xff,0xff] + vpsubusb -2064(%rdx), %xmm27, %xmm18 + +// CHECK: vpsubusb %ymm19, %ymm19, %ymm25 +// CHECK: encoding: [0x62,0x21,0x65,0x20,0xd8,0xcb] + vpsubusb %ymm19, %ymm19, %ymm25 + +// CHECK: vpsubusb %ymm19, %ymm19, %ymm25 {%k6} +// CHECK: encoding: [0x62,0x21,0x65,0x26,0xd8,0xcb] + vpsubusb %ymm19, %ymm19, %ymm25 {%k6} + +// CHECK: vpsubusb %ymm19, %ymm19, %ymm25 {%k6} {z} +// CHECK: encoding: [0x62,0x21,0x65,0xa6,0xd8,0xcb] + vpsubusb %ymm19, %ymm19, %ymm25 {%k6} {z} + +// CHECK: vpsubusb (%rcx), %ymm19, %ymm25 +// CHECK: encoding: [0x62,0x61,0x65,0x20,0xd8,0x09] + vpsubusb (%rcx), %ymm19, %ymm25 + +// CHECK: vpsubusb 4660(%rax,%r14,8), %ymm19, %ymm25 +// CHECK: encoding: [0x62,0x21,0x65,0x20,0xd8,0x8c,0xf0,0x34,0x12,0x00,0x00] + vpsubusb 4660(%rax,%r14,8), %ymm19, %ymm25 + +// CHECK: vpsubusb 4064(%rdx), %ymm19, %ymm25 +// CHECK: encoding: [0x62,0x61,0x65,0x20,0xd8,0x4a,0x7f] + vpsubusb 4064(%rdx), %ymm19, %ymm25 + +// CHECK: vpsubusb 4096(%rdx), %ymm19, %ymm25 +// CHECK: encoding: [0x62,0x61,0x65,0x20,0xd8,0x8a,0x00,0x10,0x00,0x00] + vpsubusb 4096(%rdx), %ymm19, %ymm25 + +// CHECK: vpsubusb -4096(%rdx), %ymm19, %ymm25 +// CHECK: encoding: [0x62,0x61,0x65,0x20,0xd8,0x4a,0x80] + vpsubusb -4096(%rdx), %ymm19, %ymm25 + +// CHECK: vpsubusb -4128(%rdx), %ymm19, %ymm25 +// CHECK: encoding: [0x62,0x61,0x65,0x20,0xd8,0x8a,0xe0,0xef,0xff,0xff] + vpsubusb -4128(%rdx), %ymm19, %ymm25 + +// CHECK: vpsubusw %xmm25, %xmm27, %xmm21 +// CHECK: encoding: [0x62,0x81,0x25,0x00,0xd9,0xe9] + vpsubusw %xmm25, %xmm27, %xmm21 + +// CHECK: vpsubusw %xmm25, %xmm27, %xmm21 {%k5} +// CHECK: encoding: [0x62,0x81,0x25,0x05,0xd9,0xe9] + vpsubusw %xmm25, %xmm27, %xmm21 {%k5} + +// CHECK: vpsubusw %xmm25, %xmm27, %xmm21 {%k5} {z} +// CHECK: encoding: [0x62,0x81,0x25,0x85,0xd9,0xe9] + vpsubusw %xmm25, %xmm27, %xmm21 {%k5} {z} + +// CHECK: vpsubusw (%rcx), %xmm27, %xmm21 +// CHECK: encoding: [0x62,0xe1,0x25,0x00,0xd9,0x29] + vpsubusw (%rcx), %xmm27, %xmm21 + +// CHECK: vpsubusw 4660(%rax,%r14,8), %xmm27, %xmm21 +// CHECK: encoding: [0x62,0xa1,0x25,0x00,0xd9,0xac,0xf0,0x34,0x12,0x00,0x00] + vpsubusw 4660(%rax,%r14,8), %xmm27, %xmm21 + +// CHECK: vpsubusw 2032(%rdx), %xmm27, %xmm21 +// CHECK: encoding: [0x62,0xe1,0x25,0x00,0xd9,0x6a,0x7f] + vpsubusw 2032(%rdx), %xmm27, %xmm21 + +// CHECK: vpsubusw 2048(%rdx), %xmm27, %xmm21 +// CHECK: encoding: [0x62,0xe1,0x25,0x00,0xd9,0xaa,0x00,0x08,0x00,0x00] + vpsubusw 2048(%rdx), %xmm27, %xmm21 + +// CHECK: vpsubusw -2048(%rdx), %xmm27, %xmm21 +// CHECK: encoding: [0x62,0xe1,0x25,0x00,0xd9,0x6a,0x80] + vpsubusw -2048(%rdx), %xmm27, %xmm21 + +// CHECK: vpsubusw -2064(%rdx), %xmm27, %xmm21 +// CHECK: encoding: [0x62,0xe1,0x25,0x00,0xd9,0xaa,0xf0,0xf7,0xff,0xff] + vpsubusw -2064(%rdx), %xmm27, %xmm21 + +// CHECK: vpsubusw %ymm17, %ymm25, %ymm27 +// CHECK: encoding: [0x62,0x21,0x35,0x20,0xd9,0xd9] + vpsubusw %ymm17, %ymm25, %ymm27 + +// CHECK: vpsubusw %ymm17, %ymm25, %ymm27 {%k4} +// CHECK: encoding: [0x62,0x21,0x35,0x24,0xd9,0xd9] + vpsubusw %ymm17, %ymm25, %ymm27 {%k4} + +// CHECK: vpsubusw %ymm17, %ymm25, %ymm27 {%k4} {z} +// CHECK: encoding: [0x62,0x21,0x35,0xa4,0xd9,0xd9] + vpsubusw %ymm17, %ymm25, %ymm27 {%k4} {z} + +// CHECK: vpsubusw (%rcx), %ymm25, %ymm27 +// CHECK: encoding: [0x62,0x61,0x35,0x20,0xd9,0x19] + vpsubusw (%rcx), %ymm25, %ymm27 + +// CHECK: vpsubusw 4660(%rax,%r14,8), %ymm25, %ymm27 +// CHECK: encoding: [0x62,0x21,0x35,0x20,0xd9,0x9c,0xf0,0x34,0x12,0x00,0x00] + vpsubusw 4660(%rax,%r14,8), %ymm25, %ymm27 + +// CHECK: vpsubusw 4064(%rdx), %ymm25, %ymm27 +// CHECK: encoding: [0x62,0x61,0x35,0x20,0xd9,0x5a,0x7f] + vpsubusw 4064(%rdx), %ymm25, %ymm27 + +// CHECK: vpsubusw 4096(%rdx), %ymm25, %ymm27 +// CHECK: encoding: [0x62,0x61,0x35,0x20,0xd9,0x9a,0x00,0x10,0x00,0x00] + vpsubusw 4096(%rdx), %ymm25, %ymm27 + +// CHECK: vpsubusw -4096(%rdx), %ymm25, %ymm27 +// CHECK: encoding: [0x62,0x61,0x35,0x20,0xd9,0x5a,0x80] + vpsubusw -4096(%rdx), %ymm25, %ymm27 + +// CHECK: vpsubusw -4128(%rdx), %ymm25, %ymm27 +// CHECK: encoding: [0x62,0x61,0x35,0x20,0xd9,0x9a,0xe0,0xef,0xff,0xff] + vpsubusw -4128(%rdx), %ymm25, %ymm27 -- 2.7.4