From 3880f2a36396bec311ba6f1e70021d1edd6fa614 Mon Sep 17 00:00:00 2001 From: Coby Tayree Date: Tue, 21 Nov 2017 10:04:28 +0000 Subject: [PATCH] [x86][icelake]VNNI Introducing Vector Neural Network Instructions, consisting of: vpdpbusd{s} vpdpwssd{s} Differential Revision: https://reviews.llvm.org/D40208 llvm-svn: 318746 --- llvm/include/llvm/IR/IntrinsicsX86.td | 103 +++ llvm/lib/Support/Host.cpp | 3 + llvm/lib/Target/X86/X86.td | 3 + llvm/lib/Target/X86/X86ISelLowering.cpp | 4 + llvm/lib/Target/X86/X86ISelLowering.h | 6 + llvm/lib/Target/X86/X86InstrAVX512.td | 44 ++ llvm/lib/Target/X86/X86InstrFragmentsSIMD.td | 9 + llvm/lib/Target/X86/X86InstrInfo.td | 1 + llvm/lib/Target/X86/X86IntrinsicsInfo.h | 26 + llvm/lib/Target/X86/X86Subtarget.cpp | 1 + llvm/lib/Target/X86/X86Subtarget.h | 4 + llvm/test/CodeGen/X86/avx512vl_vnni-intrinsics.ll | 195 +++++ llvm/test/CodeGen/X86/avx512vnni-intrinsics.ll | 98 +++ llvm/test/MC/X86/avx512vl_vnni-encoding.s | 898 ++++++++++++++++++++++ llvm/test/MC/X86/avx512vnni-encoding.s | 450 +++++++++++ 15 files changed, 1845 insertions(+) create mode 100644 llvm/test/CodeGen/X86/avx512vl_vnni-intrinsics.ll create mode 100644 llvm/test/CodeGen/X86/avx512vnni-intrinsics.ll create mode 100644 llvm/test/MC/X86/avx512vl_vnni-encoding.s create mode 100644 llvm/test/MC/X86/avx512vnni-encoding.s diff --git a/llvm/include/llvm/IR/IntrinsicsX86.td b/llvm/include/llvm/IR/IntrinsicsX86.td index f63194b..aeed363 100644 --- a/llvm/include/llvm/IR/IntrinsicsX86.td +++ b/llvm/include/llvm/IR/IntrinsicsX86.td @@ -2728,6 +2728,109 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>; } +// VNNI +let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". + def int_x86_avx512_mask_vpdpbusd_128 : + GCCBuiltin<"__builtin_ia32_vpdpbusd128_mask">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, + llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_maskz_vpdpbusd_128 : + GCCBuiltin<"__builtin_ia32_vpdpbusd128_maskz">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, + llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_vpdpbusd_256 : + GCCBuiltin<"__builtin_ia32_vpdpbusd256_mask">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty, + llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_maskz_vpdpbusd_256 : + GCCBuiltin<"__builtin_ia32_vpdpbusd256_maskz">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty, + llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_vpdpbusd_512 : + GCCBuiltin<"__builtin_ia32_vpdpbusd512_mask">, + Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_v16i32_ty, + llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>; + def int_x86_avx512_maskz_vpdpbusd_512 : + GCCBuiltin<"__builtin_ia32_vpdpbusd512_maskz">, + Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_v16i32_ty, + llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>; + + def int_x86_avx512_mask_vpdpbusds_128 : + GCCBuiltin<"__builtin_ia32_vpdpbusds128_mask">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, + llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_maskz_vpdpbusds_128 : + GCCBuiltin<"__builtin_ia32_vpdpbusds128_maskz">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, + llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_vpdpbusds_256 : + GCCBuiltin<"__builtin_ia32_vpdpbusds256_mask">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty, + llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_maskz_vpdpbusds_256 : + GCCBuiltin<"__builtin_ia32_vpdpbusds256_maskz">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty, + llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_vpdpbusds_512 : + GCCBuiltin<"__builtin_ia32_vpdpbusds512_mask">, + Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_v16i32_ty, + llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>; + def int_x86_avx512_maskz_vpdpbusds_512 : + GCCBuiltin<"__builtin_ia32_vpdpbusds512_maskz">, + Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_v16i32_ty, + llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>; + + def int_x86_avx512_mask_vpdpwssd_128 : + GCCBuiltin<"__builtin_ia32_vpdpwssd128_mask">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, + llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_maskz_vpdpwssd_128 : + GCCBuiltin<"__builtin_ia32_vpdpwssd128_maskz">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, + llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_vpdpwssd_256 : + GCCBuiltin<"__builtin_ia32_vpdpwssd256_mask">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty, + llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_maskz_vpdpwssd_256 : + GCCBuiltin<"__builtin_ia32_vpdpwssd256_maskz">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty, + llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_vpdpwssd_512 : + GCCBuiltin<"__builtin_ia32_vpdpwssd512_mask">, + Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_v16i32_ty, + llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>; + def int_x86_avx512_maskz_vpdpwssd_512 : + GCCBuiltin<"__builtin_ia32_vpdpwssd512_maskz">, + Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_v16i32_ty, + llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>; + + def int_x86_avx512_mask_vpdpwssds_128 : + GCCBuiltin<"__builtin_ia32_vpdpwssds128_mask">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, + llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_maskz_vpdpwssds_128 : + GCCBuiltin<"__builtin_ia32_vpdpwssds128_maskz">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, + llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_vpdpwssds_256 : + GCCBuiltin<"__builtin_ia32_vpdpwssds256_mask">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty, + llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_maskz_vpdpwssds_256 : + GCCBuiltin<"__builtin_ia32_vpdpwssds256_maskz">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty, + llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_vpdpwssds_512 : + GCCBuiltin<"__builtin_ia32_vpdpwssds512_mask">, + Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_v16i32_ty, + llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>; + def int_x86_avx512_maskz_vpdpwssds_512 : + GCCBuiltin<"__builtin_ia32_vpdpwssds512_maskz">, + Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_v16i32_ty, + llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>; +} + //===----------------------------------------------------------------------===// // XOP diff --git a/llvm/lib/Support/Host.cpp b/llvm/lib/Support/Host.cpp index f80ec6b..c5436f7 100644 --- a/llvm/lib/Support/Host.cpp +++ b/llvm/lib/Support/Host.cpp @@ -1266,6 +1266,9 @@ bool sys::getHostCPUFeatures(StringMap &Features) { // VPCLMULQDQ (carry-less multiplication quadword) Features["vpclmulqdq"] = HasLeaf7 && ((ECX >> 10) & 1) && HasAVXSave; + // Enable Vector Neural Network Instructions + Features["avx512vnni"] = HasLeaf7 && ((ECX >> 11) & 1) && HasAVX512Save; + bool HasLeafD = MaxLevel >= 0xd && !getX86CpuIDAndInfoEx(0xd, 0x1, &EAX, &EBX, &ECX, &EDX); diff --git a/llvm/lib/Target/X86/X86.td b/llvm/lib/Target/X86/X86.td index 0218693..e0745ec 100644 --- a/llvm/lib/Target/X86/X86.td +++ b/llvm/lib/Target/X86/X86.td @@ -160,6 +160,9 @@ def FeatureIFMA : SubtargetFeature<"avx512ifma", "HasIFMA", "true", [FeatureAVX512]>; def FeaturePKU : SubtargetFeature<"pku", "HasPKU", "true", "Enable protection keys">; +def FeatureVNNI : SubtargetFeature<"avx512vnni", "HasVNNI", "true", + "Enable AVX-512 Vector Neural Network Instructions", + [FeatureAVX512]>; def FeaturePCLMUL : SubtargetFeature<"pclmul", "HasPCLMUL", "true", "Enable packed carry-less multiplication instructions", [FeatureSSE2]>; diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index ac4b1d6..6aebfb7 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -25250,6 +25250,10 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::CVTS2UI_RND: return "X86ISD::CVTS2UI_RND"; case X86ISD::LWPINS: return "X86ISD::LWPINS"; case X86ISD::MGATHER: return "X86ISD::MGATHER"; + case X86ISD::VPDPBUSD: return "X86ISD::VPDPBUSD"; + case X86ISD::VPDPBUSDS: return "X86ISD::VPDPBUSDS"; + case X86ISD::VPDPWSSD: return "X86ISD::VPDPWSSD"; + case X86ISD::VPDPWSSDS: return "X86ISD::VPDPWSSDS"; } return nullptr; } diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h index 1327cf2..b79addf 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.h +++ b/llvm/lib/Target/X86/X86ISelLowering.h @@ -481,6 +481,12 @@ namespace llvm { // op0 x op1 + op2. VPMADD52L, VPMADD52H, + // VNNI + VPDPBUSD, + VPDPBUSDS, + VPDPWSSD, + VPDPWSSDS, + // FMA nodes. // We use the target independent ISD::FMA for the non-inverted case. FNMADD, diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index 9c6e923b..fa04425 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -10160,3 +10160,47 @@ defm VPEXPANDB : expand_by_elt_width <0x62, "vpexpandb", avx512vl_i8_info, defm VPEXPANDW : expand_by_elt_width <0x62, "vpexpandw", avx512vl_i16_info, HasVBMI2>, EVEX, VEX_W; +//===----------------------------------------------------------------------===// +// VNNI +//===----------------------------------------------------------------------===// + +let Constraints = "$src1 = $dst" in +multiclass VNNI_rmb Op, string OpStr, SDNode OpNode, + X86VectorVTInfo VTI> { + defm r : AVX512_maskable_3src, + EVEX_4V, T8PD; + defm m : AVX512_maskable_3src, + EVEX_4V, EVEX_CD8<32, CD8VF>, T8PD; + defm mb : AVX512_maskable_3src, + EVEX_4V, EVEX_CD8<32, CD8VF>, EVEX_B, T8PD; +} + +multiclass VNNI_common Op, string OpStr, SDNode OpNode> { + let Predicates = [HasVNNI] in + defm Z : VNNI_rmb, EVEX_V512; + let Predicates = [HasVNNI, HasVLX] in { + defm Z256 : VNNI_rmb, EVEX_V256; + defm Z128 : VNNI_rmb, EVEX_V128; + } +} + +defm VPDPBUSD : VNNI_common<0x50, "vpdpbusd", X86Vpdpbusd>; +defm VPDPBUSDS : VNNI_common<0x51, "vpdpbusds", X86Vpdpbusds>; +defm VPDPWSSD : VNNI_common<0x52, "vpdpwssd", X86Vpdpwssd>; +defm VPDPWSSDS : VNNI_common<0x53, "vpdpwssds", X86Vpdpwssds>; + diff --git a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td index c98aa3b..263babd 100644 --- a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -533,6 +533,15 @@ def x86vpmadd52h : SDNode<"X86ISD::VPMADD52H", SDTIFma, [SDNPCommutative def X86rsqrt14 : SDNode<"X86ISD::RSQRT14", SDTFPUnaryOp>; def X86rcp14 : SDNode<"X86ISD::RCP14", SDTFPUnaryOp>; + +// VNNI +def SDTVnni : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>, + SDTCisSameAs<1,2>, SDTCisSameAs<1,3>]>; +def X86Vpdpbusd : SDNode<"X86ISD::VPDPBUSD", SDTVnni>; +def X86Vpdpbusds : SDNode<"X86ISD::VPDPBUSDS", SDTVnni>; +def X86Vpdpwssd : SDNode<"X86ISD::VPDPWSSD", SDTVnni>; +def X86Vpdpwssds : SDNode<"X86ISD::VPDPWSSDS", SDTVnni>; + def X86rsqrt28 : SDNode<"X86ISD::RSQRT28", SDTFPUnaryOpRound>; def X86rcp28 : SDNode<"X86ISD::RCP28", SDTFPUnaryOpRound>; def X86exp2 : SDNode<"X86ISD::EXP2", SDTFPUnaryOpRound>; diff --git a/llvm/lib/Target/X86/X86InstrInfo.td b/llvm/lib/Target/X86/X86InstrInfo.td index 97f11a3..8fb56b7 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.td +++ b/llvm/lib/Target/X86/X86InstrInfo.td @@ -832,6 +832,7 @@ def NoVLX : Predicate<"!Subtarget->hasVLX()">; def NoVLX_Or_NoBWI : Predicate<"!Subtarget->hasVLX() || !Subtarget->hasBWI()">; def NoVLX_Or_NoDQI : Predicate<"!Subtarget->hasVLX() || !Subtarget->hasDQI()">; def PKU : Predicate<"Subtarget->hasPKU()">; +def HasVNNI : Predicate<"Subtarget->hasVNNI()">; def HasPOPCNT : Predicate<"Subtarget->hasPOPCNT()">; def HasAES : Predicate<"Subtarget->hasAES()">; diff --git a/llvm/lib/Target/X86/X86IntrinsicsInfo.h b/llvm/lib/Target/X86/X86IntrinsicsInfo.h index 11f338b..bc1a5ec 100644 --- a/llvm/lib/Target/X86/X86IntrinsicsInfo.h +++ b/llvm/lib/Target/X86/X86IntrinsicsInfo.h @@ -1157,6 +1157,19 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86_INTRINSIC_DATA(avx512_mask_vfnmsub_ps_512, FMA_OP_MASK, X86ISD::FNMSUB, X86ISD::FNMSUB_RND), + X86_INTRINSIC_DATA(avx512_mask_vpdpbusd_128, FMA_OP_MASK, X86ISD::VPDPBUSD, 0), + X86_INTRINSIC_DATA(avx512_mask_vpdpbusd_256, FMA_OP_MASK, X86ISD::VPDPBUSD, 0), + X86_INTRINSIC_DATA(avx512_mask_vpdpbusd_512, FMA_OP_MASK, X86ISD::VPDPBUSD, 0), + X86_INTRINSIC_DATA(avx512_mask_vpdpbusds_128, FMA_OP_MASK, X86ISD::VPDPBUSDS, 0), + X86_INTRINSIC_DATA(avx512_mask_vpdpbusds_256, FMA_OP_MASK, X86ISD::VPDPBUSDS, 0), + X86_INTRINSIC_DATA(avx512_mask_vpdpbusds_512, FMA_OP_MASK, X86ISD::VPDPBUSDS, 0), + X86_INTRINSIC_DATA(avx512_mask_vpdpwssd_128, FMA_OP_MASK, X86ISD::VPDPWSSD, 0), + X86_INTRINSIC_DATA(avx512_mask_vpdpwssd_256, FMA_OP_MASK, X86ISD::VPDPWSSD, 0), + X86_INTRINSIC_DATA(avx512_mask_vpdpwssd_512, FMA_OP_MASK, X86ISD::VPDPWSSD, 0), + X86_INTRINSIC_DATA(avx512_mask_vpdpwssds_128, FMA_OP_MASK, X86ISD::VPDPWSSDS, 0), + X86_INTRINSIC_DATA(avx512_mask_vpdpwssds_256, FMA_OP_MASK, X86ISD::VPDPWSSDS, 0), + X86_INTRINSIC_DATA(avx512_mask_vpdpwssds_512, FMA_OP_MASK, X86ISD::VPDPWSSDS, 0), + X86_INTRINSIC_DATA(avx512_mask_vpermi2var_d_128, VPERM_3OP_MASK, X86ISD::VPERMIV3, 0), X86_INTRINSIC_DATA(avx512_mask_vpermi2var_d_256, VPERM_3OP_MASK, @@ -1377,6 +1390,19 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86_INTRINSIC_DATA(avx512_maskz_vfmaddsub_ps_512, FMA_OP_MASKZ, X86ISD::FMADDSUB, X86ISD::FMADDSUB_RND), + X86_INTRINSIC_DATA(avx512_maskz_vpdpbusd_128, FMA_OP_MASKZ, X86ISD::VPDPBUSD, 0), + X86_INTRINSIC_DATA(avx512_maskz_vpdpbusd_256, FMA_OP_MASKZ, X86ISD::VPDPBUSD, 0), + X86_INTRINSIC_DATA(avx512_maskz_vpdpbusd_512, FMA_OP_MASKZ, X86ISD::VPDPBUSD, 0), + X86_INTRINSIC_DATA(avx512_maskz_vpdpbusds_128, FMA_OP_MASKZ, X86ISD::VPDPBUSDS, 0), + X86_INTRINSIC_DATA(avx512_maskz_vpdpbusds_256, FMA_OP_MASKZ, X86ISD::VPDPBUSDS, 0), + X86_INTRINSIC_DATA(avx512_maskz_vpdpbusds_512, FMA_OP_MASKZ, X86ISD::VPDPBUSDS, 0), + X86_INTRINSIC_DATA(avx512_maskz_vpdpwssd_128, FMA_OP_MASKZ, X86ISD::VPDPWSSD, 0), + X86_INTRINSIC_DATA(avx512_maskz_vpdpwssd_256, FMA_OP_MASKZ, X86ISD::VPDPWSSD, 0), + X86_INTRINSIC_DATA(avx512_maskz_vpdpwssd_512, FMA_OP_MASKZ, X86ISD::VPDPWSSD, 0), + X86_INTRINSIC_DATA(avx512_maskz_vpdpwssds_128, FMA_OP_MASKZ, X86ISD::VPDPWSSDS, 0), + X86_INTRINSIC_DATA(avx512_maskz_vpdpwssds_256, FMA_OP_MASKZ, X86ISD::VPDPWSSDS, 0), + X86_INTRINSIC_DATA(avx512_maskz_vpdpwssds_512, FMA_OP_MASKZ, X86ISD::VPDPWSSDS, 0), + X86_INTRINSIC_DATA(avx512_maskz_vpermt2var_d_128, VPERM_3OP_MASKZ, X86ISD::VPERMV3, 0), X86_INTRINSIC_DATA(avx512_maskz_vpermt2var_d_256, VPERM_3OP_MASKZ, diff --git a/llvm/lib/Target/X86/X86Subtarget.cpp b/llvm/lib/Target/X86/X86Subtarget.cpp index eb73b12..76e7f7b 100644 --- a/llvm/lib/Target/X86/X86Subtarget.cpp +++ b/llvm/lib/Target/X86/X86Subtarget.cpp @@ -325,6 +325,7 @@ void X86Subtarget::initializeEnvironment() { HasVLX = false; HasADX = false; HasPKU = false; + HasVNNI = false; HasSHA = false; HasPRFCHW = false; HasRDSEED = false; diff --git a/llvm/lib/Target/X86/X86Subtarget.h b/llvm/lib/Target/X86/X86Subtarget.h index e1711ec..a10b4c0 100644 --- a/llvm/lib/Target/X86/X86Subtarget.h +++ b/llvm/lib/Target/X86/X86Subtarget.h @@ -304,6 +304,9 @@ protected: /// Processor has PKU extenstions bool HasPKU; + /// Processor has AVX-512 Vector Neural Network Instructions + bool HasVNNI; + /// Processor supports MPX - Memory Protection Extensions bool HasMPX; @@ -530,6 +533,7 @@ public: bool hasBWI() const { return HasBWI; } bool hasVLX() const { return HasVLX; } bool hasPKU() const { return HasPKU; } + bool hasVNNI() const { return HasVNNI; } bool hasMPX() const { return HasMPX; } bool hasCLFLUSHOPT() const { return HasCLFLUSHOPT; } bool hasCLWB() const { return HasCLWB; } diff --git a/llvm/test/CodeGen/X86/avx512vl_vnni-intrinsics.ll b/llvm/test/CodeGen/X86/avx512vl_vnni-intrinsics.ll new file mode 100644 index 0000000..10e82ee --- /dev/null +++ b/llvm/test/CodeGen/X86/avx512vl_vnni-intrinsics.ll @@ -0,0 +1,195 @@ +; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512vnni,+avx512vl| FileCheck %s + +declare <8 x i32> @llvm.x86.avx512.mask.vpdpbusd.256(<8 x i32>, <8 x i32>, <8 x i32>, i8) +declare <8 x i32> @llvm.x86.avx512.maskz.vpdpbusd.256(<8 x i32>, <8 x i32>, <8 x i32>, i8) + +define <8 x i32>@test_int_x86_avx512_mask_vpdpbusd_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32>* %x2p, <8 x i32> %x4, i8 %x3) { +; CHECK-LABEL: test_int_x86_avx512_mask_vpdpbusd_256: +; CHECK: ## BB#0: +; CHECK-NEXT: kmovw %esi, %k1 +; CHECK-NEXT: vmovaps %ymm0, %ymm3 +; CHECK-NEXT: vpdpbusd (%rdi), %ymm1, %ymm3 {%k1} +; CHECK-NEXT: vmovaps %ymm0, %ymm4 +; CHECK-NEXT: vpdpbusd %ymm2, %ymm1, %ymm4 +; CHECK-NEXT: vpdpbusd %ymm2, %ymm1, %ymm0 {%k1} {z} +; CHECK-NEXT: vpaddd %ymm0, %ymm4, %ymm0 +; CHECK-NEXT: vpaddd %ymm0, %ymm3, %ymm0 +; CHECK-NEXT: retq + %x2 = load <8 x i32>, <8 x i32>* %x2p + %res = call <8 x i32> @llvm.x86.avx512.mask.vpdpbusd.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) + %res1 = call <8 x i32> @llvm.x86.avx512.mask.vpdpbusd.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x4, i8 -1) + %res2 = call <8 x i32> @llvm.x86.avx512.maskz.vpdpbusd.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x4, i8 %x3) + %res3 = add <8 x i32> %res, %res1 + %res4 = add <8 x i32> %res2, %res3 + ret <8 x i32> %res4 +} + +declare <4 x i32> @llvm.x86.avx512.mask.vpdpbusd.128(<4 x i32>, <4 x i32>, <4 x i32>, i8) +declare <4 x i32> @llvm.x86.avx512.maskz.vpdpbusd.128(<4 x i32>, <4 x i32>, <4 x i32>, i8) + +define <4 x i32>@test_int_x86_avx512_mask_vpdpbusd_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32>* %x2p, <4 x i32> %x4, i8 %x3) { +; CHECK-LABEL: test_int_x86_avx512_mask_vpdpbusd_128: +; CHECK: ## BB#0: +; CHECK-NEXT: kmovw %esi, %k1 +; CHECK-NEXT: vmovaps %xmm0, %xmm3 +; CHECK-NEXT: vpdpbusd (%rdi), %xmm1, %xmm3 {%k1} +; CHECK-NEXT: vmovaps %xmm0, %xmm4 +; CHECK-NEXT: vpdpbusd %xmm2, %xmm1, %xmm4 +; CHECK-NEXT: vpdpbusd %xmm2, %xmm1, %xmm0 {%k1} {z} +; CHECK-NEXT: vpaddd %xmm0, %xmm4, %xmm0 +; CHECK-NEXT: vpaddd %xmm0, %xmm3, %xmm0 +; CHECK-NEXT: retq + %x2 = load <4 x i32>, <4 x i32>* %x2p + %res = call <4 x i32> @llvm.x86.avx512.mask.vpdpbusd.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) + %res1 = call <4 x i32> @llvm.x86.avx512.mask.vpdpbusd.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x4, i8 -1) + %res2 = call <4 x i32> @llvm.x86.avx512.maskz.vpdpbusd.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x4, i8 %x3) + %res3 = add <4 x i32> %res, %res1 + %res4 = add <4 x i32> %res2, %res3 + ret <4 x i32> %res4 +} + +declare <8 x i32> @llvm.x86.avx512.mask.vpdpbusds.256(<8 x i32>, <8 x i32>, <8 x i32>, i8) +declare <8 x i32> @llvm.x86.avx512.maskz.vpdpbusds.256(<8 x i32>, <8 x i32>, <8 x i32>, i8) + +define <8 x i32>@test_int_x86_avx512_mask_vpdpbusds_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32>* %x2p, <8 x i32> %x4, i8 %x3) { +; CHECK-LABEL: test_int_x86_avx512_mask_vpdpbusds_256: +; CHECK: ## BB#0: +; CHECK-NEXT: kmovw %esi, %k1 +; CHECK-NEXT: vmovaps %ymm0, %ymm3 +; CHECK-NEXT: vpdpbusds (%rdi), %ymm1, %ymm3 {%k1} +; CHECK-NEXT: vmovaps %ymm0, %ymm4 +; CHECK-NEXT: vpdpbusds %ymm2, %ymm1, %ymm4 +; CHECK-NEXT: vpdpbusds %ymm2, %ymm1, %ymm0 {%k1} {z} +; CHECK-NEXT: vpaddd %ymm0, %ymm4, %ymm0 +; CHECK-NEXT: vpaddd %ymm0, %ymm3, %ymm0 +; CHECK-NEXT: retq + %x2 = load <8 x i32>, <8 x i32>* %x2p + %res = call <8 x i32> @llvm.x86.avx512.mask.vpdpbusds.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) + %res1 = call <8 x i32> @llvm.x86.avx512.mask.vpdpbusds.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x4, i8 -1) + %res2 = call <8 x i32> @llvm.x86.avx512.maskz.vpdpbusds.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x4, i8 %x3) + %res3 = add <8 x i32> %res, %res1 + %res4 = add <8 x i32> %res2, %res3 + ret <8 x i32> %res4 +} + +declare <4 x i32> @llvm.x86.avx512.mask.vpdpbusds.128(<4 x i32>, <4 x i32>, <4 x i32>, i8) +declare <4 x i32> @llvm.x86.avx512.maskz.vpdpbusds.128(<4 x i32>, <4 x i32>, <4 x i32>, i8) + +define <4 x i32>@test_int_x86_avx512_mask_vpdpbusds_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32>* %x2p, <4 x i32> %x4, i8 %x3) { +; CHECK-LABEL: test_int_x86_avx512_mask_vpdpbusds_128: +; CHECK: ## BB#0: +; CHECK-NEXT: kmovw %esi, %k1 +; CHECK-NEXT: vmovaps %xmm0, %xmm3 +; CHECK-NEXT: vpdpbusds (%rdi), %xmm1, %xmm3 {%k1} +; CHECK-NEXT: vmovaps %xmm0, %xmm4 +; CHECK-NEXT: vpdpbusds %xmm2, %xmm1, %xmm4 +; CHECK-NEXT: vpdpbusds %xmm2, %xmm1, %xmm0 {%k1} {z} +; CHECK-NEXT: vpaddd %xmm0, %xmm4, %xmm0 +; CHECK-NEXT: vpaddd %xmm0, %xmm3, %xmm0 +; CHECK-NEXT: retq + %x2 = load <4 x i32>, <4 x i32>* %x2p + %res = call <4 x i32> @llvm.x86.avx512.mask.vpdpbusds.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) + %res1 = call <4 x i32> @llvm.x86.avx512.mask.vpdpbusds.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x4, i8 -1) + %res2 = call <4 x i32> @llvm.x86.avx512.maskz.vpdpbusds.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x4, i8 %x3) + %res3 = add <4 x i32> %res, %res1 + %res4 = add <4 x i32> %res2, %res3 + ret <4 x i32> %res4 +} + +declare <8 x i32> @llvm.x86.avx512.mask.vpdpwssd.256(<8 x i32>, <8 x i32>, <8 x i32>, i8) +declare <8 x i32> @llvm.x86.avx512.maskz.vpdpwssd.256(<8 x i32>, <8 x i32>, <8 x i32>, i8) + +define <8 x i32>@test_int_x86_avx512_mask_vpdpwssd_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32>* %x2p, <8 x i32> %x4, i8 %x3) { +; CHECK-LABEL: test_int_x86_avx512_mask_vpdpwssd_256: +; CHECK: ## BB#0: +; CHECK-NEXT: kmovw %esi, %k1 +; CHECK-NEXT: vmovaps %ymm0, %ymm3 +; CHECK-NEXT: vpdpwssd (%rdi), %ymm1, %ymm3 {%k1} +; CHECK-NEXT: vmovaps %ymm0, %ymm4 +; CHECK-NEXT: vpdpwssd %ymm2, %ymm1, %ymm4 +; CHECK-NEXT: vpdpwssd %ymm2, %ymm1, %ymm0 {%k1} {z} +; CHECK-NEXT: vpaddd %ymm0, %ymm4, %ymm0 +; CHECK-NEXT: vpaddd %ymm0, %ymm3, %ymm0 +; CHECK-NEXT: retq + %x2 = load <8 x i32>, <8 x i32>* %x2p + %res = call <8 x i32> @llvm.x86.avx512.mask.vpdpwssd.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) + %res1 = call <8 x i32> @llvm.x86.avx512.mask.vpdpwssd.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x4, i8 -1) + %res2 = call <8 x i32> @llvm.x86.avx512.maskz.vpdpwssd.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x4, i8 %x3) + %res3 = add <8 x i32> %res, %res1 + %res4 = add <8 x i32> %res2, %res3 + ret <8 x i32> %res4 +} + +declare <4 x i32> @llvm.x86.avx512.mask.vpdpwssd.128(<4 x i32>, <4 x i32>, <4 x i32>, i8) +declare <4 x i32> @llvm.x86.avx512.maskz.vpdpwssd.128(<4 x i32>, <4 x i32>, <4 x i32>, i8) + +define <4 x i32>@test_int_x86_avx512_mask_vpdpwssd_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32>* %x2p, <4 x i32> %x4, i8 %x3) { +; CHECK-LABEL: test_int_x86_avx512_mask_vpdpwssd_128: +; CHECK: ## BB#0: +; CHECK-NEXT: kmovw %esi, %k1 +; CHECK-NEXT: vmovaps %xmm0, %xmm3 +; CHECK-NEXT: vpdpwssd (%rdi), %xmm1, %xmm3 {%k1} +; CHECK-NEXT: vmovaps %xmm0, %xmm4 +; CHECK-NEXT: vpdpwssd %xmm2, %xmm1, %xmm4 +; CHECK-NEXT: vpdpwssd %xmm2, %xmm1, %xmm0 {%k1} {z} +; CHECK-NEXT: vpaddd %xmm0, %xmm4, %xmm0 +; CHECK-NEXT: vpaddd %xmm0, %xmm3, %xmm0 +; CHECK-NEXT: retq + %x2 = load <4 x i32>, <4 x i32>* %x2p + %res = call <4 x i32> @llvm.x86.avx512.mask.vpdpwssd.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) + %res1 = call <4 x i32> @llvm.x86.avx512.mask.vpdpwssd.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x4, i8 -1) + %res2 = call <4 x i32> @llvm.x86.avx512.maskz.vpdpwssd.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x4, i8 %x3) + %res3 = add <4 x i32> %res, %res1 + %res4 = add <4 x i32> %res2, %res3 + ret <4 x i32> %res4 +} + + +declare <8 x i32> @llvm.x86.avx512.mask.vpdpwssds.256(<8 x i32>, <8 x i32>, <8 x i32>, i8) +declare <8 x i32> @llvm.x86.avx512.maskz.vpdpwssds.256(<8 x i32>, <8 x i32>, <8 x i32>, i8) + +define <8 x i32>@test_int_x86_avx512_mask_vpdpwssds_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32>* %x2p, <8 x i32> %x4, i8 %x3) { +; CHECK-LABEL: test_int_x86_avx512_mask_vpdpwssds_256: +; CHECK: ## BB#0: +; CHECK-NEXT: kmovw %esi, %k1 +; CHECK-NEXT: vmovaps %ymm0, %ymm3 +; CHECK-NEXT: vpdpwssds (%rdi), %ymm1, %ymm3 {%k1} +; CHECK-NEXT: vmovaps %ymm0, %ymm4 +; CHECK-NEXT: vpdpwssds %ymm2, %ymm1, %ymm4 +; CHECK-NEXT: vpdpwssds %ymm2, %ymm1, %ymm0 {%k1} {z} +; CHECK-NEXT: vpaddd %ymm0, %ymm4, %ymm0 +; CHECK-NEXT: vpaddd %ymm0, %ymm3, %ymm0 +; CHECK-NEXT: retq + %x2 = load <8 x i32>, <8 x i32>* %x2p + %res = call <8 x i32> @llvm.x86.avx512.mask.vpdpwssds.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) + %res1 = call <8 x i32> @llvm.x86.avx512.mask.vpdpwssds.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x4, i8 -1) + %res2 = call <8 x i32> @llvm.x86.avx512.maskz.vpdpwssds.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x4, i8 %x3) + %res3 = add <8 x i32> %res, %res1 + %res4 = add <8 x i32> %res2, %res3 + ret <8 x i32> %res4 +} + +declare <4 x i32> @llvm.x86.avx512.mask.vpdpwssds.128(<4 x i32>, <4 x i32>, <4 x i32>, i8) +declare <4 x i32> @llvm.x86.avx512.maskz.vpdpwssds.128(<4 x i32>, <4 x i32>, <4 x i32>, i8) + +define <4 x i32>@test_int_x86_avx512_mask_vpdpwssds_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32>* %x2p, <4 x i32> %x4, i8 %x3) { +; CHECK-LABEL: test_int_x86_avx512_mask_vpdpwssds_128: +; CHECK: ## BB#0: +; CHECK-NEXT: kmovw %esi, %k1 +; CHECK-NEXT: vmovaps %xmm0, %xmm3 +; CHECK-NEXT: vpdpwssds (%rdi), %xmm1, %xmm3 {%k1} +; CHECK-NEXT: vmovaps %xmm0, %xmm4 +; CHECK-NEXT: vpdpwssds %xmm2, %xmm1, %xmm4 +; CHECK-NEXT: vpdpwssds %xmm2, %xmm1, %xmm0 {%k1} {z} +; CHECK-NEXT: vpaddd %xmm0, %xmm4, %xmm0 +; CHECK-NEXT: vpaddd %xmm0, %xmm3, %xmm0 +; CHECK-NEXT: retq + %x2 = load <4 x i32>, <4 x i32>* %x2p + %res = call <4 x i32> @llvm.x86.avx512.mask.vpdpwssds.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) + %res1 = call <4 x i32> @llvm.x86.avx512.mask.vpdpwssds.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x4, i8 -1) + %res2 = call <4 x i32> @llvm.x86.avx512.maskz.vpdpwssds.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x4, i8 %x3) + %res3 = add <4 x i32> %res, %res1 + %res4 = add <4 x i32> %res2, %res3 + ret <4 x i32> %res4 +} + diff --git a/llvm/test/CodeGen/X86/avx512vnni-intrinsics.ll b/llvm/test/CodeGen/X86/avx512vnni-intrinsics.ll new file mode 100644 index 0000000..3cd1011 --- /dev/null +++ b/llvm/test/CodeGen/X86/avx512vnni-intrinsics.ll @@ -0,0 +1,98 @@ +; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512vnni | FileCheck %s + +declare <16 x i32> @llvm.x86.avx512.mask.vpdpbusd.512(<16 x i32>, <16 x i32>, <16 x i32>, i16) +declare <16 x i32> @llvm.x86.avx512.maskz.vpdpbusd.512(<16 x i32>, <16 x i32>, <16 x i32>, i16) + +define <16 x i32>@test_int_x86_avx512_mask_vpdpbusd_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32>* %x2p, <16 x i32> %x4, i16 %x3) { +; CHECK-LABEL: test_int_x86_avx512_mask_vpdpbusd_512: +; CHECK: ## BB#0: +; CHECK-NEXT: kmovw %esi, %k1 +; CHECK-NEXT: vmovaps %zmm0, %zmm3 +; CHECK-NEXT: vpdpbusd (%rdi), %zmm1, %zmm3 {%k1} +; CHECK-NEXT: vmovaps %zmm0, %zmm4 +; CHECK-NEXT: vpdpbusd %zmm2, %zmm1, %zmm4 +; CHECK-NEXT: vpdpbusd %zmm2, %zmm1, %zmm0 {%k1} {z} +; CHECK-NEXT: vpaddd %zmm0, %zmm4, %zmm0 +; CHECK-NEXT: vpaddd %zmm0, %zmm3, %zmm0 +; CHECK-NEXT: retq + %x2 = load <16 x i32>, <16 x i32>* %x2p + %res = call <16 x i32> @llvm.x86.avx512.mask.vpdpbusd.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) + %res1 = call <16 x i32> @llvm.x86.avx512.mask.vpdpbusd.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x4, i16 -1) + %res2 = call <16 x i32> @llvm.x86.avx512.maskz.vpdpbusd.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x4, i16 %x3) + %res3 = add <16 x i32> %res, %res1 + %res4 = add <16 x i32> %res2, %res3 + ret <16 x i32> %res4 +} + +declare <16 x i32> @llvm.x86.avx512.mask.vpdpbusds.512(<16 x i32>, <16 x i32>, <16 x i32>, i16) +declare <16 x i32> @llvm.x86.avx512.maskz.vpdpbusds.512(<16 x i32>, <16 x i32>, <16 x i32>, i16) + +define <16 x i32>@test_int_x86_avx512_mask_vpdpbusds_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32>* %x2p, <16 x i32> %x4, i16 %x3) { +; CHECK-LABEL: test_int_x86_avx512_mask_vpdpbusds_512: +; CHECK: ## BB#0: +; CHECK-NEXT: kmovw %esi, %k1 +; CHECK-NEXT: vmovaps %zmm0, %zmm3 +; CHECK-NEXT: vpdpbusds (%rdi), %zmm1, %zmm3 {%k1} +; CHECK-NEXT: vmovaps %zmm0, %zmm4 +; CHECK-NEXT: vpdpbusds %zmm2, %zmm1, %zmm4 +; CHECK-NEXT: vpdpbusds %zmm2, %zmm1, %zmm0 {%k1} {z} +; CHECK-NEXT: vpaddd %zmm0, %zmm4, %zmm0 +; CHECK-NEXT: vpaddd %zmm0, %zmm3, %zmm0 +; CHECK-NEXT: retq + %x2 = load <16 x i32>, <16 x i32>* %x2p + %res = call <16 x i32> @llvm.x86.avx512.mask.vpdpbusds.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) + %res1 = call <16 x i32> @llvm.x86.avx512.mask.vpdpbusds.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x4, i16 -1) + %res2 = call <16 x i32> @llvm.x86.avx512.maskz.vpdpbusds.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x4, i16 %x3) + %res3 = add <16 x i32> %res, %res1 + %res4 = add <16 x i32> %res2, %res3 + ret <16 x i32> %res4 +} + +declare <16 x i32> @llvm.x86.avx512.mask.vpdpwssd.512(<16 x i32>, <16 x i32>, <16 x i32>, i16) +declare <16 x i32> @llvm.x86.avx512.maskz.vpdpwssd.512(<16 x i32>, <16 x i32>, <16 x i32>, i16) + +define <16 x i32>@test_int_x86_avx512_mask_vpdpwssd_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32>* %x2p, <16 x i32> %x4, i16 %x3) { +; CHECK-LABEL: test_int_x86_avx512_mask_vpdpwssd_512: +; CHECK: ## BB#0: +; CHECK-NEXT: kmovw %esi, %k1 +; CHECK-NEXT: vmovaps %zmm0, %zmm3 +; CHECK-NEXT: vpdpwssd (%rdi), %zmm1, %zmm3 {%k1} +; CHECK-NEXT: vmovaps %zmm0, %zmm4 +; CHECK-NEXT: vpdpwssd %zmm2, %zmm1, %zmm4 +; CHECK-NEXT: vpdpwssd %zmm2, %zmm1, %zmm0 {%k1} {z} +; CHECK-NEXT: vpaddd %zmm0, %zmm4, %zmm0 +; CHECK-NEXT: vpaddd %zmm0, %zmm3, %zmm0 +; CHECK-NEXT: retq + %x2 = load <16 x i32>, <16 x i32>* %x2p + %res = call <16 x i32> @llvm.x86.avx512.mask.vpdpwssd.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) + %res1 = call <16 x i32> @llvm.x86.avx512.mask.vpdpwssd.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x4, i16 -1) + %res2 = call <16 x i32> @llvm.x86.avx512.maskz.vpdpwssd.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x4, i16 %x3) + %res3 = add <16 x i32> %res, %res1 + %res4 = add <16 x i32> %res2, %res3 + ret <16 x i32> %res4 +} + +declare <16 x i32> @llvm.x86.avx512.mask.vpdpwssds.512(<16 x i32>, <16 x i32>, <16 x i32>, i16) +declare <16 x i32> @llvm.x86.avx512.maskz.vpdpwssds.512(<16 x i32>, <16 x i32>, <16 x i32>, i16) + +define <16 x i32>@test_int_x86_avx512_mask_vpdpwssds_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32>* %x2p, <16 x i32> %x4, i16 %x3) { +; CHECK-LABEL: test_int_x86_avx512_mask_vpdpwssds_512: +; CHECK: ## BB#0: +; CHECK-NEXT: kmovw %esi, %k1 +; CHECK-NEXT: vmovaps %zmm0, %zmm3 +; CHECK-NEXT: vpdpwssds (%rdi), %zmm1, %zmm3 {%k1} +; CHECK-NEXT: vmovaps %zmm0, %zmm4 +; CHECK-NEXT: vpdpwssds %zmm2, %zmm1, %zmm4 +; CHECK-NEXT: vpdpwssds %zmm2, %zmm1, %zmm0 {%k1} {z} +; CHECK-NEXT: vpaddd %zmm0, %zmm4, %zmm0 +; CHECK-NEXT: vpaddd %zmm0, %zmm3, %zmm0 +; CHECK-NEXT: retq + %x2 = load <16 x i32>, <16 x i32>* %x2p + %res = call <16 x i32> @llvm.x86.avx512.mask.vpdpwssds.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) + %res1 = call <16 x i32> @llvm.x86.avx512.mask.vpdpwssds.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x4, i16 -1) + %res2 = call <16 x i32> @llvm.x86.avx512.maskz.vpdpwssds.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x4, i16 %x3) + %res3 = add <16 x i32> %res, %res1 + %res4 = add <16 x i32> %res2, %res3 + ret <16 x i32> %res4 +} + diff --git a/llvm/test/MC/X86/avx512vl_vnni-encoding.s b/llvm/test/MC/X86/avx512vl_vnni-encoding.s new file mode 100644 index 0000000..9c38803 --- /dev/null +++ b/llvm/test/MC/X86/avx512vl_vnni-encoding.s @@ -0,0 +1,898 @@ +// RUN: llvm-mc -triple x86_64-unknown-unknown -mcpu=knl -mattr=+avx512vnni,+avx512vl --show-encoding < %s | FileCheck %s + +// CHECK: vpdpbusd %xmm3, %xmm2, %xmm1 +// CHECK: encoding: [0x62,0xf2,0x6d,0x08,0x50,0xcb] + vpdpbusd %xmm3, %xmm2, %xmm1 + +// CHECK: vpdpbusds %xmm3, %xmm2, %xmm1 +// CHECK: encoding: [0x62,0xf2,0x6d,0x08,0x51,0xcb] + vpdpbusds %xmm3, %xmm2, %xmm1 + +// CHECK: vpdpwssd %xmm3, %xmm2, %xmm1 +// CHECK: encoding: [0x62,0xf2,0x6d,0x08,0x52,0xcb] + vpdpwssd %xmm3, %xmm2, %xmm1 + +// CHECK: vpdpwssds %xmm3, %xmm2, %xmm1 +// CHECK: encoding: [0x62,0xf2,0x6d,0x08,0x53,0xcb] + vpdpwssds %xmm3, %xmm2, %xmm1 + +// CHECK: vpdpbusd %xmm23, %xmm22, %xmm21 +// CHECK: encoding: [0x62,0xa2,0x4d,0x00,0x50,0xef] + vpdpbusd %xmm23, %xmm22, %xmm21 + +// CHECK: vpdpbusds %xmm23, %xmm22, %xmm21 +// CHECK: encoding: [0x62,0xa2,0x4d,0x00,0x51,0xef] + vpdpbusds %xmm23, %xmm22, %xmm21 + +// CHECK: vpdpwssd %xmm23, %xmm22, %xmm21 +// CHECK: encoding: [0x62,0xa2,0x4d,0x00,0x52,0xef] + vpdpwssd %xmm23, %xmm22, %xmm21 + +// CHECK: vpdpwssds %xmm23, %xmm22, %xmm21 +// CHECK: encoding: [0x62,0xa2,0x4d,0x00,0x53,0xef] + vpdpwssds %xmm23, %xmm22, %xmm21 + +// CHECK: vpdpbusd %xmm3, %xmm2, %xmm1 {%k2} +// CHECK: encoding: [0x62,0xf2,0x6d,0x0a,0x50,0xcb] + vpdpbusd %xmm3, %xmm2, %xmm1 {%k2} + +// CHECK: vpdpbusds %xmm3, %xmm2, %xmm1 {%k2} +// CHECK: encoding: [0x62,0xf2,0x6d,0x0a,0x51,0xcb] + vpdpbusds %xmm3, %xmm2, %xmm1 {%k2} + +// CHECK: vpdpwssd %xmm3, %xmm2, %xmm1 {%k2} +// CHECK: encoding: [0x62,0xf2,0x6d,0x0a,0x52,0xcb] + vpdpwssd %xmm3, %xmm2, %xmm1 {%k2} + +// CHECK: vpdpwssds %xmm3, %xmm2, %xmm1 {%k2} +// CHECK: encoding: [0x62,0xf2,0x6d,0x0a,0x53,0xcb] + vpdpwssds %xmm3, %xmm2, %xmm1 {%k2} + +// CHECK: vpdpbusd %xmm23, %xmm22, %xmm21 {%k2} +// CHECK: encoding: [0x62,0xa2,0x4d,0x02,0x50,0xef] + vpdpbusd %xmm23, %xmm22, %xmm21 {%k2} + +// CHECK: vpdpbusds %xmm23, %xmm22, %xmm21 {%k2} +// CHECK: encoding: [0x62,0xa2,0x4d,0x02,0x51,0xef] + vpdpbusds %xmm23, %xmm22, %xmm21 {%k2} + +// CHECK: vpdpwssd %xmm23, %xmm22, %xmm21 {%k2} +// CHECK: encoding: [0x62,0xa2,0x4d,0x02,0x52,0xef] + vpdpwssd %xmm23, %xmm22, %xmm21 {%k2} + +// CHECK: vpdpwssds %xmm23, %xmm22, %xmm21 {%k2} +// CHECK: encoding: [0x62,0xa2,0x4d,0x02,0x53,0xef] + vpdpwssds %xmm23, %xmm22, %xmm21 {%k2} + +// CHECK: vpdpbusd (%rcx), %xmm2, %xmm1 +// CHECK: encoding: [0x62,0xf2,0x6d,0x08,0x50,0x09] + vpdpbusd (%rcx), %xmm2, %xmm1 + +// CHECK: vpdpbusd -64(%rsp), %xmm2, %xmm1 +// CHECK: encoding: [0x62,0xf2,0x6d,0x08,0x50,0x4c,0x24,0xfc] + vpdpbusd -64(%rsp), %xmm2, %xmm1 + +// CHECK: vpdpbusd 64(%rsp), %xmm2, %xmm1 +// CHECK: encoding: [0x62,0xf2,0x6d,0x08,0x50,0x4c,0x24,0x04] + vpdpbusd 64(%rsp), %xmm2, %xmm1 + +// CHECK: vpdpbusd 268435456(%rcx,%r14,8), %xmm2, %xmm1 +// CHECK: encoding: [0x62,0xb2,0x6d,0x08,0x50,0x8c,0xf1,0x00,0x00,0x00,0x10] + vpdpbusd 268435456(%rcx,%r14,8), %xmm2, %xmm1 + +// CHECK: vpdpbusd -536870912(%rcx,%r14,8), %xmm2, %xmm1 +// CHECK: encoding: [0x62,0xb2,0x6d,0x08,0x50,0x8c,0xf1,0x00,0x00,0x00,0xe0] + vpdpbusd -536870912(%rcx,%r14,8), %xmm2, %xmm1 + +// CHECK: vpdpbusd -536870910(%rcx,%r14,8), %xmm2, %xmm1 +// CHECK: encoding: [0x62,0xb2,0x6d,0x08,0x50,0x8c,0xf1,0x02,0x00,0x00,0xe0] + vpdpbusd -536870910(%rcx,%r14,8), %xmm2, %xmm1 + +// CHECK: vpdpbusds (%rcx), %xmm2, %xmm1 +// CHECK: encoding: [0x62,0xf2,0x6d,0x08,0x51,0x09] + vpdpbusds (%rcx), %xmm2, %xmm1 + +// CHECK: vpdpbusds -64(%rsp), %xmm2, %xmm1 +// CHECK: encoding: [0x62,0xf2,0x6d,0x08,0x51,0x4c,0x24,0xfc] + vpdpbusds -64(%rsp), %xmm2, %xmm1 + +// CHECK: vpdpbusds 64(%rsp), %xmm2, %xmm1 +// CHECK: encoding: [0x62,0xf2,0x6d,0x08,0x51,0x4c,0x24,0x04] + vpdpbusds 64(%rsp), %xmm2, %xmm1 + +// CHECK: vpdpbusds 268435456(%rcx,%r14,8), %xmm2, %xmm1 +// CHECK: encoding: [0x62,0xb2,0x6d,0x08,0x51,0x8c,0xf1,0x00,0x00,0x00,0x10] + vpdpbusds 268435456(%rcx,%r14,8), %xmm2, %xmm1 + +// CHECK: vpdpbusds -536870912(%rcx,%r14,8), %xmm2, %xmm1 +// CHECK: encoding: [0x62,0xb2,0x6d,0x08,0x51,0x8c,0xf1,0x00,0x00,0x00,0xe0] + vpdpbusds -536870912(%rcx,%r14,8), %xmm2, %xmm1 + +// CHECK: vpdpbusds -536870910(%rcx,%r14,8), %xmm2, %xmm1 +// CHECK: encoding: [0x62,0xb2,0x6d,0x08,0x51,0x8c,0xf1,0x02,0x00,0x00,0xe0] + vpdpbusds -536870910(%rcx,%r14,8), %xmm2, %xmm1 + +// CHECK: vpdpwssd (%rcx), %xmm2, %xmm1 +// CHECK: encoding: [0x62,0xf2,0x6d,0x08,0x52,0x09] + vpdpwssd (%rcx), %xmm2, %xmm1 + +// CHECK: vpdpwssd -64(%rsp), %xmm2, %xmm1 +// CHECK: encoding: [0x62,0xf2,0x6d,0x08,0x52,0x4c,0x24,0xfc] + vpdpwssd -64(%rsp), %xmm2, %xmm1 + +// CHECK: vpdpwssd 64(%rsp), %xmm2, %xmm1 +// CHECK: encoding: [0x62,0xf2,0x6d,0x08,0x52,0x4c,0x24,0x04] + vpdpwssd 64(%rsp), %xmm2, %xmm1 + +// CHECK: vpdpwssd 268435456(%rcx,%r14,8), %xmm2, %xmm1 +// CHECK: encoding: [0x62,0xb2,0x6d,0x08,0x52,0x8c,0xf1,0x00,0x00,0x00,0x10] + vpdpwssd 268435456(%rcx,%r14,8), %xmm2, %xmm1 + +// CHECK: vpdpwssd -536870912(%rcx,%r14,8), %xmm2, %xmm1 +// CHECK: encoding: [0x62,0xb2,0x6d,0x08,0x52,0x8c,0xf1,0x00,0x00,0x00,0xe0] + vpdpwssd -536870912(%rcx,%r14,8), %xmm2, %xmm1 + +// CHECK: vpdpwssd -536870910(%rcx,%r14,8), %xmm2, %xmm1 +// CHECK: encoding: [0x62,0xb2,0x6d,0x08,0x52,0x8c,0xf1,0x02,0x00,0x00,0xe0] + vpdpwssd -536870910(%rcx,%r14,8), %xmm2, %xmm1 + +// CHECK: vpdpwssds (%rcx), %xmm2, %xmm1 +// CHECK: encoding: [0x62,0xf2,0x6d,0x08,0x53,0x09] + vpdpwssds (%rcx), %xmm2, %xmm1 + +// CHECK: vpdpwssds -64(%rsp), %xmm2, %xmm1 +// CHECK: encoding: [0x62,0xf2,0x6d,0x08,0x53,0x4c,0x24,0xfc] + vpdpwssds -64(%rsp), %xmm2, %xmm1 + +// CHECK: vpdpwssds 64(%rsp), %xmm2, %xmm1 +// CHECK: encoding: [0x62,0xf2,0x6d,0x08,0x53,0x4c,0x24,0x04] + vpdpwssds 64(%rsp), %xmm2, %xmm1 + +// CHECK: vpdpwssds 268435456(%rcx,%r14,8), %xmm2, %xmm1 +// CHECK: encoding: [0x62,0xb2,0x6d,0x08,0x53,0x8c,0xf1,0x00,0x00,0x00,0x10] + vpdpwssds 268435456(%rcx,%r14,8), %xmm2, %xmm1 + +// CHECK: vpdpwssds -536870912(%rcx,%r14,8), %xmm2, %xmm1 +// CHECK: encoding: [0x62,0xb2,0x6d,0x08,0x53,0x8c,0xf1,0x00,0x00,0x00,0xe0] + vpdpwssds -536870912(%rcx,%r14,8), %xmm2, %xmm1 + +// CHECK: vpdpwssds -536870910(%rcx,%r14,8), %xmm2, %xmm1 +// CHECK: encoding: [0x62,0xb2,0x6d,0x08,0x53,0x8c,0xf1,0x02,0x00,0x00,0xe0] + vpdpwssds -536870910(%rcx,%r14,8), %xmm2, %xmm1 + +// CHECK: vpdpbusd (%rcx), %xmm22, %xmm21 +// CHECK: encoding: [0x62,0xe2,0x4d,0x00,0x50,0x29] + vpdpbusd (%rcx), %xmm22, %xmm21 + +// CHECK: vpdpbusd -64(%rsp), %xmm22, %xmm21 +// CHECK: encoding: [0x62,0xe2,0x4d,0x00,0x50,0x6c,0x24,0xfc] + vpdpbusd -64(%rsp), %xmm22, %xmm21 + +// CHECK: vpdpbusd 64(%rsp), %xmm22, %xmm21 +// CHECK: encoding: [0x62,0xe2,0x4d,0x00,0x50,0x6c,0x24,0x04] + vpdpbusd 64(%rsp), %xmm22, %xmm21 + +// CHECK: vpdpbusd 268435456(%rcx,%r14,8), %xmm22, %xmm21 +// CHECK: encoding: [0x62,0xa2,0x4d,0x00,0x50,0xac,0xf1,0x00,0x00,0x00,0x10] + vpdpbusd 268435456(%rcx,%r14,8), %xmm22, %xmm21 + +// CHECK: vpdpbusd -536870912(%rcx,%r14,8), %xmm22, %xmm21 +// CHECK: encoding: [0x62,0xa2,0x4d,0x00,0x50,0xac,0xf1,0x00,0x00,0x00,0xe0] + vpdpbusd -536870912(%rcx,%r14,8), %xmm22, %xmm21 + +// CHECK: vpdpbusd -536870910(%rcx,%r14,8), %xmm22, %xmm21 +// CHECK: encoding: [0x62,0xa2,0x4d,0x00,0x50,0xac,0xf1,0x02,0x00,0x00,0xe0] + vpdpbusd -536870910(%rcx,%r14,8), %xmm22, %xmm21 + +// CHECK: vpdpbusds (%rcx), %xmm22, %xmm21 +// CHECK: encoding: [0x62,0xe2,0x4d,0x00,0x51,0x29] + vpdpbusds (%rcx), %xmm22, %xmm21 + +// CHECK: vpdpbusds -64(%rsp), %xmm22, %xmm21 +// CHECK: encoding: [0x62,0xe2,0x4d,0x00,0x51,0x6c,0x24,0xfc] + vpdpbusds -64(%rsp), %xmm22, %xmm21 + +// CHECK: vpdpbusds 64(%rsp), %xmm22, %xmm21 +// CHECK: encoding: [0x62,0xe2,0x4d,0x00,0x51,0x6c,0x24,0x04] + vpdpbusds 64(%rsp), %xmm22, %xmm21 + +// CHECK: vpdpbusds 268435456(%rcx,%r14,8), %xmm22, %xmm21 +// CHECK: encoding: [0x62,0xa2,0x4d,0x00,0x51,0xac,0xf1,0x00,0x00,0x00,0x10] + vpdpbusds 268435456(%rcx,%r14,8), %xmm22, %xmm21 + +// CHECK: vpdpbusds -536870912(%rcx,%r14,8), %xmm22, %xmm21 +// CHECK: encoding: [0x62,0xa2,0x4d,0x00,0x51,0xac,0xf1,0x00,0x00,0x00,0xe0] + vpdpbusds -536870912(%rcx,%r14,8), %xmm22, %xmm21 + +// CHECK: vpdpbusds -536870910(%rcx,%r14,8), %xmm22, %xmm21 +// CHECK: encoding: [0x62,0xa2,0x4d,0x00,0x51,0xac,0xf1,0x02,0x00,0x00,0xe0] + vpdpbusds -536870910(%rcx,%r14,8), %xmm22, %xmm21 + +// CHECK: vpdpwssd (%rcx), %xmm22, %xmm21 +// CHECK: encoding: [0x62,0xe2,0x4d,0x00,0x52,0x29] + vpdpwssd (%rcx), %xmm22, %xmm21 + +// CHECK: vpdpwssd -64(%rsp), %xmm22, %xmm21 +// CHECK: encoding: [0x62,0xe2,0x4d,0x00,0x52,0x6c,0x24,0xfc] + vpdpwssd -64(%rsp), %xmm22, %xmm21 + +// CHECK: vpdpwssd 64(%rsp), %xmm22, %xmm21 +// CHECK: encoding: [0x62,0xe2,0x4d,0x00,0x52,0x6c,0x24,0x04] + vpdpwssd 64(%rsp), %xmm22, %xmm21 + +// CHECK: vpdpwssd 268435456(%rcx,%r14,8), %xmm22, %xmm21 +// CHECK: encoding: [0x62,0xa2,0x4d,0x00,0x52,0xac,0xf1,0x00,0x00,0x00,0x10] + vpdpwssd 268435456(%rcx,%r14,8), %xmm22, %xmm21 + +// CHECK: vpdpwssd -536870912(%rcx,%r14,8), %xmm22, %xmm21 +// CHECK: encoding: [0x62,0xa2,0x4d,0x00,0x52,0xac,0xf1,0x00,0x00,0x00,0xe0] + vpdpwssd -536870912(%rcx,%r14,8), %xmm22, %xmm21 + +// CHECK: vpdpwssd -536870910(%rcx,%r14,8), %xmm22, %xmm21 +// CHECK: encoding: [0x62,0xa2,0x4d,0x00,0x52,0xac,0xf1,0x02,0x00,0x00,0xe0] + vpdpwssd -536870910(%rcx,%r14,8), %xmm22, %xmm21 + +// CHECK: vpdpwssds (%rcx), %xmm22, %xmm21 +// CHECK: encoding: [0x62,0xe2,0x4d,0x00,0x53,0x29] + vpdpwssds (%rcx), %xmm22, %xmm21 + +// CHECK: vpdpwssds -64(%rsp), %xmm22, %xmm21 +// CHECK: encoding: [0x62,0xe2,0x4d,0x00,0x53,0x6c,0x24,0xfc] + vpdpwssds -64(%rsp), %xmm22, %xmm21 + +// CHECK: vpdpwssds 64(%rsp), %xmm22, %xmm21 +// CHECK: encoding: [0x62,0xe2,0x4d,0x00,0x53,0x6c,0x24,0x04] + vpdpwssds 64(%rsp), %xmm22, %xmm21 + +// CHECK: vpdpwssds 268435456(%rcx,%r14,8), %xmm22, %xmm21 +// CHECK: encoding: [0x62,0xa2,0x4d,0x00,0x53,0xac,0xf1,0x00,0x00,0x00,0x10] + vpdpwssds 268435456(%rcx,%r14,8), %xmm22, %xmm21 + +// CHECK: vpdpwssds -536870912(%rcx,%r14,8), %xmm22, %xmm21 +// CHECK: encoding: [0x62,0xa2,0x4d,0x00,0x53,0xac,0xf1,0x00,0x00,0x00,0xe0] + vpdpwssds -536870912(%rcx,%r14,8), %xmm22, %xmm21 + +// CHECK: vpdpwssds -536870910(%rcx,%r14,8), %xmm22, %xmm21 +// CHECK: encoding: [0x62,0xa2,0x4d,0x00,0x53,0xac,0xf1,0x02,0x00,0x00,0xe0] + vpdpwssds -536870910(%rcx,%r14,8), %xmm22, %xmm21 + +// CHECK: vpdpbusd (%rcx), %xmm2, %xmm1 {%k2} +// CHECK: encoding: [0x62,0xf2,0x6d,0x0a,0x50,0x09] + vpdpbusd (%rcx), %xmm2, %xmm1 {%k2} + +// CHECK: vpdpbusd -64(%rsp), %xmm2, %xmm1 {%k2} +// CHECK: encoding: [0x62,0xf2,0x6d,0x0a,0x50,0x4c,0x24,0xfc] + vpdpbusd -64(%rsp), %xmm2, %xmm1 {%k2} + +// CHECK: vpdpbusd 64(%rsp), %xmm2, %xmm1 {%k2} +// CHECK: encoding: [0x62,0xf2,0x6d,0x0a,0x50,0x4c,0x24,0x04] + vpdpbusd 64(%rsp), %xmm2, %xmm1 {%k2} + +// CHECK: vpdpbusd 268435456(%rcx,%r14,8), %xmm2, %xmm1 {%k2} +// CHECK: encoding: [0x62,0xb2,0x6d,0x0a,0x50,0x8c,0xf1,0x00,0x00,0x00,0x10] + vpdpbusd 268435456(%rcx,%r14,8), %xmm2, %xmm1 {%k2} + +// CHECK: vpdpbusd -536870912(%rcx,%r14,8), %xmm2, %xmm1 {%k2} +// CHECK: encoding: [0x62,0xb2,0x6d,0x0a,0x50,0x8c,0xf1,0x00,0x00,0x00,0xe0] + vpdpbusd -536870912(%rcx,%r14,8), %xmm2, %xmm1 {%k2} + +// CHECK: vpdpbusd -536870910(%rcx,%r14,8), %xmm2, %xmm1 {%k2} +// CHECK: encoding: [0x62,0xb2,0x6d,0x0a,0x50,0x8c,0xf1,0x02,0x00,0x00,0xe0] + vpdpbusd -536870910(%rcx,%r14,8), %xmm2, %xmm1 {%k2} + +// CHECK: vpdpbusds (%rcx), %xmm2, %xmm1 {%k2} +// CHECK: encoding: [0x62,0xf2,0x6d,0x0a,0x51,0x09] + vpdpbusds (%rcx), %xmm2, %xmm1 {%k2} + +// CHECK: vpdpbusds -64(%rsp), %xmm2, %xmm1 {%k2} +// CHECK: encoding: [0x62,0xf2,0x6d,0x0a,0x51,0x4c,0x24,0xfc] + vpdpbusds -64(%rsp), %xmm2, %xmm1 {%k2} + +// CHECK: vpdpbusds 64(%rsp), %xmm2, %xmm1 {%k2} +// CHECK: encoding: [0x62,0xf2,0x6d,0x0a,0x51,0x4c,0x24,0x04] + vpdpbusds 64(%rsp), %xmm2, %xmm1 {%k2} + +// CHECK: vpdpbusds 268435456(%rcx,%r14,8), %xmm2, %xmm1 {%k2} +// CHECK: encoding: [0x62,0xb2,0x6d,0x0a,0x51,0x8c,0xf1,0x00,0x00,0x00,0x10] + vpdpbusds 268435456(%rcx,%r14,8), %xmm2, %xmm1 {%k2} + +// CHECK: vpdpbusds -536870912(%rcx,%r14,8), %xmm2, %xmm1 {%k2} +// CHECK: encoding: [0x62,0xb2,0x6d,0x0a,0x51,0x8c,0xf1,0x00,0x00,0x00,0xe0] + vpdpbusds -536870912(%rcx,%r14,8), %xmm2, %xmm1 {%k2} + +// CHECK: vpdpbusds -536870910(%rcx,%r14,8), %xmm2, %xmm1 {%k2} +// CHECK: encoding: [0x62,0xb2,0x6d,0x0a,0x51,0x8c,0xf1,0x02,0x00,0x00,0xe0] + vpdpbusds -536870910(%rcx,%r14,8), %xmm2, %xmm1 {%k2} + +// CHECK: vpdpwssd (%rcx), %xmm2, %xmm1 {%k2} +// CHECK: encoding: [0x62,0xf2,0x6d,0x0a,0x52,0x09] + vpdpwssd (%rcx), %xmm2, %xmm1 {%k2} + +// CHECK: vpdpwssd -64(%rsp), %xmm2, %xmm1 {%k2} +// CHECK: encoding: [0x62,0xf2,0x6d,0x0a,0x52,0x4c,0x24,0xfc] + vpdpwssd -64(%rsp), %xmm2, %xmm1 {%k2} + +// CHECK: vpdpwssd 64(%rsp), %xmm2, %xmm1 {%k2} +// CHECK: encoding: [0x62,0xf2,0x6d,0x0a,0x52,0x4c,0x24,0x04] + vpdpwssd 64(%rsp), %xmm2, %xmm1 {%k2} + +// CHECK: vpdpwssd 268435456(%rcx,%r14,8), %xmm2, %xmm1 {%k2} +// CHECK: encoding: [0x62,0xb2,0x6d,0x0a,0x52,0x8c,0xf1,0x00,0x00,0x00,0x10] + vpdpwssd 268435456(%rcx,%r14,8), %xmm2, %xmm1 {%k2} + +// CHECK: vpdpwssd -536870912(%rcx,%r14,8), %xmm2, %xmm1 {%k2} +// CHECK: encoding: [0x62,0xb2,0x6d,0x0a,0x52,0x8c,0xf1,0x00,0x00,0x00,0xe0] + vpdpwssd -536870912(%rcx,%r14,8), %xmm2, %xmm1 {%k2} + +// CHECK: vpdpwssd -536870910(%rcx,%r14,8), %xmm2, %xmm1 {%k2} +// CHECK: encoding: [0x62,0xb2,0x6d,0x0a,0x52,0x8c,0xf1,0x02,0x00,0x00,0xe0] + vpdpwssd -536870910(%rcx,%r14,8), %xmm2, %xmm1 {%k2} + +// CHECK: vpdpwssds (%rcx), %xmm2, %xmm1 {%k2} +// CHECK: encoding: [0x62,0xf2,0x6d,0x0a,0x53,0x09] + vpdpwssds (%rcx), %xmm2, %xmm1 {%k2} + +// CHECK: vpdpwssds -64(%rsp), %xmm2, %xmm1 {%k2} +// CHECK: encoding: [0x62,0xf2,0x6d,0x0a,0x53,0x4c,0x24,0xfc] + vpdpwssds -64(%rsp), %xmm2, %xmm1 {%k2} + +// CHECK: vpdpwssds 64(%rsp), %xmm2, %xmm1 {%k2} +// CHECK: encoding: [0x62,0xf2,0x6d,0x0a,0x53,0x4c,0x24,0x04] + vpdpwssds 64(%rsp), %xmm2, %xmm1 {%k2} + +// CHECK: vpdpwssds 268435456(%rcx,%r14,8), %xmm2, %xmm1 {%k2} +// CHECK: encoding: [0x62,0xb2,0x6d,0x0a,0x53,0x8c,0xf1,0x00,0x00,0x00,0x10] + vpdpwssds 268435456(%rcx,%r14,8), %xmm2, %xmm1 {%k2} + +// CHECK: vpdpwssds -536870912(%rcx,%r14,8), %xmm2, %xmm1 {%k2} +// CHECK: encoding: [0x62,0xb2,0x6d,0x0a,0x53,0x8c,0xf1,0x00,0x00,0x00,0xe0] + vpdpwssds -536870912(%rcx,%r14,8), %xmm2, %xmm1 {%k2} + +// CHECK: vpdpwssds -536870910(%rcx,%r14,8), %xmm2, %xmm1 {%k2} +// CHECK: encoding: [0x62,0xb2,0x6d,0x0a,0x53,0x8c,0xf1,0x02,0x00,0x00,0xe0] + vpdpwssds -536870910(%rcx,%r14,8), %xmm2, %xmm1 {%k2} + +// CHECK: vpdpbusd (%rcx), %xmm22, %xmm21 {%k2} +// CHECK: encoding: [0x62,0xe2,0x4d,0x02,0x50,0x29] + vpdpbusd (%rcx), %xmm22, %xmm21 {%k2} + +// CHECK: vpdpbusd -64(%rsp), %xmm22, %xmm21 {%k2} +// CHECK: encoding: [0x62,0xe2,0x4d,0x02,0x50,0x6c,0x24,0xfc] + vpdpbusd -64(%rsp), %xmm22, %xmm21 {%k2} + +// CHECK: vpdpbusd 64(%rsp), %xmm22, %xmm21 {%k2} +// CHECK: encoding: [0x62,0xe2,0x4d,0x02,0x50,0x6c,0x24,0x04] + vpdpbusd 64(%rsp), %xmm22, %xmm21 {%k2} + +// CHECK: vpdpbusd 268435456(%rcx,%r14,8), %xmm22, %xmm21 {%k2} +// CHECK: encoding: [0x62,0xa2,0x4d,0x02,0x50,0xac,0xf1,0x00,0x00,0x00,0x10] + vpdpbusd 268435456(%rcx,%r14,8), %xmm22, %xmm21 {%k2} + +// CHECK: vpdpbusd -536870912(%rcx,%r14,8), %xmm22, %xmm21 {%k2} +// CHECK: encoding: [0x62,0xa2,0x4d,0x02,0x50,0xac,0xf1,0x00,0x00,0x00,0xe0] + vpdpbusd -536870912(%rcx,%r14,8), %xmm22, %xmm21 {%k2} + +// CHECK: vpdpbusd -536870910(%rcx,%r14,8), %xmm22, %xmm21 {%k2} +// CHECK: encoding: [0x62,0xa2,0x4d,0x02,0x50,0xac,0xf1,0x02,0x00,0x00,0xe0] + vpdpbusd -536870910(%rcx,%r14,8), %xmm22, %xmm21 {%k2} + +// CHECK: vpdpbusds (%rcx), %xmm22, %xmm21 {%k2} +// CHECK: encoding: [0x62,0xe2,0x4d,0x02,0x51,0x29] + vpdpbusds (%rcx), %xmm22, %xmm21 {%k2} + +// CHECK: vpdpbusds -64(%rsp), %xmm22, %xmm21 {%k2} +// CHECK: encoding: [0x62,0xe2,0x4d,0x02,0x51,0x6c,0x24,0xfc] + vpdpbusds -64(%rsp), %xmm22, %xmm21 {%k2} + +// CHECK: vpdpbusds 64(%rsp), %xmm22, %xmm21 {%k2} +// CHECK: encoding: [0x62,0xe2,0x4d,0x02,0x51,0x6c,0x24,0x04] + vpdpbusds 64(%rsp), %xmm22, %xmm21 {%k2} + +// CHECK: vpdpbusds 268435456(%rcx,%r14,8), %xmm22, %xmm21 {%k2} +// CHECK: encoding: [0x62,0xa2,0x4d,0x02,0x51,0xac,0xf1,0x00,0x00,0x00,0x10] + vpdpbusds 268435456(%rcx,%r14,8), %xmm22, %xmm21 {%k2} + +// CHECK: vpdpbusds -536870912(%rcx,%r14,8), %xmm22, %xmm21 {%k2} +// CHECK: encoding: [0x62,0xa2,0x4d,0x02,0x51,0xac,0xf1,0x00,0x00,0x00,0xe0] + vpdpbusds -536870912(%rcx,%r14,8), %xmm22, %xmm21 {%k2} + +// CHECK: vpdpbusds -536870910(%rcx,%r14,8), %xmm22, %xmm21 {%k2} +// CHECK: encoding: [0x62,0xa2,0x4d,0x02,0x51,0xac,0xf1,0x02,0x00,0x00,0xe0] + vpdpbusds -536870910(%rcx,%r14,8), %xmm22, %xmm21 {%k2} + +// CHECK: vpdpwssd (%rcx), %xmm22, %xmm21 {%k2} +// CHECK: encoding: [0x62,0xe2,0x4d,0x02,0x52,0x29] + vpdpwssd (%rcx), %xmm22, %xmm21 {%k2} + +// CHECK: vpdpwssd -64(%rsp), %xmm22, %xmm21 {%k2} +// CHECK: encoding: [0x62,0xe2,0x4d,0x02,0x52,0x6c,0x24,0xfc] + vpdpwssd -64(%rsp), %xmm22, %xmm21 {%k2} + +// CHECK: vpdpwssd 64(%rsp), %xmm22, %xmm21 {%k2} +// CHECK: encoding: [0x62,0xe2,0x4d,0x02,0x52,0x6c,0x24,0x04] + vpdpwssd 64(%rsp), %xmm22, %xmm21 {%k2} + +// CHECK: vpdpwssd 268435456(%rcx,%r14,8), %xmm22, %xmm21 {%k2} +// CHECK: encoding: [0x62,0xa2,0x4d,0x02,0x52,0xac,0xf1,0x00,0x00,0x00,0x10] + vpdpwssd 268435456(%rcx,%r14,8), %xmm22, %xmm21 {%k2} + +// CHECK: vpdpwssd -536870912(%rcx,%r14,8), %xmm22, %xmm21 {%k2} +// CHECK: encoding: [0x62,0xa2,0x4d,0x02,0x52,0xac,0xf1,0x00,0x00,0x00,0xe0] + vpdpwssd -536870912(%rcx,%r14,8), %xmm22, %xmm21 {%k2} + +// CHECK: vpdpwssd -536870910(%rcx,%r14,8), %xmm22, %xmm21 {%k2} +// CHECK: encoding: [0x62,0xa2,0x4d,0x02,0x52,0xac,0xf1,0x02,0x00,0x00,0xe0] + vpdpwssd -536870910(%rcx,%r14,8), %xmm22, %xmm21 {%k2} + +// CHECK: vpdpwssds (%rcx), %xmm22, %xmm21 {%k2} +// CHECK: encoding: [0x62,0xe2,0x4d,0x02,0x53,0x29] + vpdpwssds (%rcx), %xmm22, %xmm21 {%k2} + +// CHECK: vpdpwssds -64(%rsp), %xmm22, %xmm21 {%k2} +// CHECK: encoding: [0x62,0xe2,0x4d,0x02,0x53,0x6c,0x24,0xfc] + vpdpwssds -64(%rsp), %xmm22, %xmm21 {%k2} + +// CHECK: vpdpwssds 64(%rsp), %xmm22, %xmm21 {%k2} +// CHECK: encoding: [0x62,0xe2,0x4d,0x02,0x53,0x6c,0x24,0x04] + vpdpwssds 64(%rsp), %xmm22, %xmm21 {%k2} + +// CHECK: vpdpwssds 268435456(%rcx,%r14,8), %xmm22, %xmm21 {%k2} +// CHECK: encoding: [0x62,0xa2,0x4d,0x02,0x53,0xac,0xf1,0x00,0x00,0x00,0x10] + vpdpwssds 268435456(%rcx,%r14,8), %xmm22, %xmm21 {%k2} + +// CHECK: vpdpwssds -536870912(%rcx,%r14,8), %xmm22, %xmm21 {%k2} +// CHECK: encoding: [0x62,0xa2,0x4d,0x02,0x53,0xac,0xf1,0x00,0x00,0x00,0xe0] + vpdpwssds -536870912(%rcx,%r14,8), %xmm22, %xmm21 {%k2} + +// CHECK: vpdpwssds -536870910(%rcx,%r14,8), %xmm22, %xmm21 {%k2} +// CHECK: encoding: [0x62,0xa2,0x4d,0x02,0x53,0xac,0xf1,0x02,0x00,0x00,0xe0] + vpdpwssds -536870910(%rcx,%r14,8), %xmm22, %xmm21 {%k2} + +// CHECK: vpdpbusd %ymm3, %ymm2, %ymm1 +// CHECK: encoding: [0x62,0xf2,0x6d,0x28,0x50,0xcb] + vpdpbusd %ymm3, %ymm2, %ymm1 + +// CHECK: vpdpbusds %ymm3, %ymm2, %ymm1 +// CHECK: encoding: [0x62,0xf2,0x6d,0x28,0x51,0xcb] + vpdpbusds %ymm3, %ymm2, %ymm1 + +// CHECK: vpdpwssd %ymm3, %ymm2, %ymm1 +// CHECK: encoding: [0x62,0xf2,0x6d,0x28,0x52,0xcb] + vpdpwssd %ymm3, %ymm2, %ymm1 + +// CHECK: vpdpwssds %ymm3, %ymm2, %ymm1 +// CHECK: encoding: [0x62,0xf2,0x6d,0x28,0x53,0xcb] + vpdpwssds %ymm3, %ymm2, %ymm1 + +// CHECK: vpdpbusd %ymm23, %ymm22, %ymm21 +// CHECK: encoding: [0x62,0xa2,0x4d,0x20,0x50,0xef] + vpdpbusd %ymm23, %ymm22, %ymm21 + +// CHECK: vpdpbusds %ymm23, %ymm22, %ymm21 +// CHECK: encoding: [0x62,0xa2,0x4d,0x20,0x51,0xef] + vpdpbusds %ymm23, %ymm22, %ymm21 + +// CHECK: vpdpwssd %ymm23, %ymm22, %ymm21 +// CHECK: encoding: [0x62,0xa2,0x4d,0x20,0x52,0xef] + vpdpwssd %ymm23, %ymm22, %ymm21 + +// CHECK: vpdpwssds %ymm23, %ymm22, %ymm21 +// CHECK: encoding: [0x62,0xa2,0x4d,0x20,0x53,0xef] + vpdpwssds %ymm23, %ymm22, %ymm21 + +// CHECK: vpdpbusd %ymm3, %ymm2, %ymm1 {%k2} +// CHECK: encoding: [0x62,0xf2,0x6d,0x2a,0x50,0xcb] + vpdpbusd %ymm3, %ymm2, %ymm1 {%k2} + +// CHECK: vpdpbusds %ymm3, %ymm2, %ymm1 {%k2} +// CHECK: encoding: [0x62,0xf2,0x6d,0x2a,0x51,0xcb] + vpdpbusds %ymm3, %ymm2, %ymm1 {%k2} + +// CHECK: vpdpwssd %ymm3, %ymm2, %ymm1 {%k2} +// CHECK: encoding: [0x62,0xf2,0x6d,0x2a,0x52,0xcb] + vpdpwssd %ymm3, %ymm2, %ymm1 {%k2} + +// CHECK: vpdpwssds %ymm3, %ymm2, %ymm1 {%k2} +// CHECK: encoding: [0x62,0xf2,0x6d,0x2a,0x53,0xcb] + vpdpwssds %ymm3, %ymm2, %ymm1 {%k2} + +// CHECK: vpdpbusd %ymm23, %ymm22, %ymm21 {%k2} +// CHECK: encoding: [0x62,0xa2,0x4d,0x22,0x50,0xef] + vpdpbusd %ymm23, %ymm22, %ymm21 {%k2} + +// CHECK: vpdpbusds %ymm23, %ymm22, %ymm21 {%k2} +// CHECK: encoding: [0x62,0xa2,0x4d,0x22,0x51,0xef] + vpdpbusds %ymm23, %ymm22, %ymm21 {%k2} + +// CHECK: vpdpwssd %ymm23, %ymm22, %ymm21 {%k2} +// CHECK: encoding: [0x62,0xa2,0x4d,0x22,0x52,0xef] + vpdpwssd %ymm23, %ymm22, %ymm21 {%k2} + +// CHECK: vpdpwssds %ymm23, %ymm22, %ymm21 {%k2} +// CHECK: encoding: [0x62,0xa2,0x4d,0x22,0x53,0xef] + vpdpwssds %ymm23, %ymm22, %ymm21 {%k2} + +// CHECK: vpdpbusd (%rcx), %ymm2, %ymm1 +// CHECK: encoding: [0x62,0xf2,0x6d,0x28,0x50,0x09] + vpdpbusd (%rcx), %ymm2, %ymm1 + +// CHECK: vpdpbusd -128(%rsp), %ymm2, %ymm1 +// CHECK: encoding: [0x62,0xf2,0x6d,0x28,0x50,0x4c,0x24,0xfc] + vpdpbusd -128(%rsp), %ymm2, %ymm1 + +// CHECK: vpdpbusd 128(%rsp), %ymm2, %ymm1 +// CHECK: encoding: [0x62,0xf2,0x6d,0x28,0x50,0x4c,0x24,0x04] + vpdpbusd 128(%rsp), %ymm2, %ymm1 + +// CHECK: vpdpbusd 268435456(%rcx,%r14,8), %ymm2, %ymm1 +// CHECK: encoding: [0x62,0xb2,0x6d,0x28,0x50,0x8c,0xf1,0x00,0x00,0x00,0x10] + vpdpbusd 268435456(%rcx,%r14,8), %ymm2, %ymm1 + +// CHECK: vpdpbusd -536870912(%rcx,%r14,8), %ymm2, %ymm1 +// CHECK: encoding: [0x62,0xb2,0x6d,0x28,0x50,0x8c,0xf1,0x00,0x00,0x00,0xe0] + vpdpbusd -536870912(%rcx,%r14,8), %ymm2, %ymm1 + +// CHECK: vpdpbusd -536870910(%rcx,%r14,8), %ymm2, %ymm1 +// CHECK: encoding: [0x62,0xb2,0x6d,0x28,0x50,0x8c,0xf1,0x02,0x00,0x00,0xe0] + vpdpbusd -536870910(%rcx,%r14,8), %ymm2, %ymm1 + +// CHECK: vpdpbusds (%rcx), %ymm2, %ymm1 +// CHECK: encoding: [0x62,0xf2,0x6d,0x28,0x51,0x09] + vpdpbusds (%rcx), %ymm2, %ymm1 + +// CHECK: vpdpbusds -128(%rsp), %ymm2, %ymm1 +// CHECK: encoding: [0x62,0xf2,0x6d,0x28,0x51,0x4c,0x24,0xfc] + vpdpbusds -128(%rsp), %ymm2, %ymm1 + +// CHECK: vpdpbusds 128(%rsp), %ymm2, %ymm1 +// CHECK: encoding: [0x62,0xf2,0x6d,0x28,0x51,0x4c,0x24,0x04] + vpdpbusds 128(%rsp), %ymm2, %ymm1 + +// CHECK: vpdpbusds 268435456(%rcx,%r14,8), %ymm2, %ymm1 +// CHECK: encoding: [0x62,0xb2,0x6d,0x28,0x51,0x8c,0xf1,0x00,0x00,0x00,0x10] + vpdpbusds 268435456(%rcx,%r14,8), %ymm2, %ymm1 + +// CHECK: vpdpbusds -536870912(%rcx,%r14,8), %ymm2, %ymm1 +// CHECK: encoding: [0x62,0xb2,0x6d,0x28,0x51,0x8c,0xf1,0x00,0x00,0x00,0xe0] + vpdpbusds -536870912(%rcx,%r14,8), %ymm2, %ymm1 + +// CHECK: vpdpbusds -536870910(%rcx,%r14,8), %ymm2, %ymm1 +// CHECK: encoding: [0x62,0xb2,0x6d,0x28,0x51,0x8c,0xf1,0x02,0x00,0x00,0xe0] + vpdpbusds -536870910(%rcx,%r14,8), %ymm2, %ymm1 + +// CHECK: vpdpwssd (%rcx), %ymm2, %ymm1 +// CHECK: encoding: [0x62,0xf2,0x6d,0x28,0x52,0x09] + vpdpwssd (%rcx), %ymm2, %ymm1 + +// CHECK: vpdpwssd -128(%rsp), %ymm2, %ymm1 +// CHECK: encoding: [0x62,0xf2,0x6d,0x28,0x52,0x4c,0x24,0xfc] + vpdpwssd -128(%rsp), %ymm2, %ymm1 + +// CHECK: vpdpwssd 128(%rsp), %ymm2, %ymm1 +// CHECK: encoding: [0x62,0xf2,0x6d,0x28,0x52,0x4c,0x24,0x04] + vpdpwssd 128(%rsp), %ymm2, %ymm1 + +// CHECK: vpdpwssd 268435456(%rcx,%r14,8), %ymm2, %ymm1 +// CHECK: encoding: [0x62,0xb2,0x6d,0x28,0x52,0x8c,0xf1,0x00,0x00,0x00,0x10] + vpdpwssd 268435456(%rcx,%r14,8), %ymm2, %ymm1 + +// CHECK: vpdpwssd -536870912(%rcx,%r14,8), %ymm2, %ymm1 +// CHECK: encoding: [0x62,0xb2,0x6d,0x28,0x52,0x8c,0xf1,0x00,0x00,0x00,0xe0] + vpdpwssd -536870912(%rcx,%r14,8), %ymm2, %ymm1 + +// CHECK: vpdpwssd -536870910(%rcx,%r14,8), %ymm2, %ymm1 +// CHECK: encoding: [0x62,0xb2,0x6d,0x28,0x52,0x8c,0xf1,0x02,0x00,0x00,0xe0] + vpdpwssd -536870910(%rcx,%r14,8), %ymm2, %ymm1 + +// CHECK: vpdpwssds (%rcx), %ymm2, %ymm1 +// CHECK: encoding: [0x62,0xf2,0x6d,0x28,0x53,0x09] + vpdpwssds (%rcx), %ymm2, %ymm1 + +// CHECK: vpdpwssds -128(%rsp), %ymm2, %ymm1 +// CHECK: encoding: [0x62,0xf2,0x6d,0x28,0x53,0x4c,0x24,0xfc] + vpdpwssds -128(%rsp), %ymm2, %ymm1 + +// CHECK: vpdpwssds 128(%rsp), %ymm2, %ymm1 +// CHECK: encoding: [0x62,0xf2,0x6d,0x28,0x53,0x4c,0x24,0x04] + vpdpwssds 128(%rsp), %ymm2, %ymm1 + +// CHECK: vpdpwssds 268435456(%rcx,%r14,8), %ymm2, %ymm1 +// CHECK: encoding: [0x62,0xb2,0x6d,0x28,0x53,0x8c,0xf1,0x00,0x00,0x00,0x10] + vpdpwssds 268435456(%rcx,%r14,8), %ymm2, %ymm1 + +// CHECK: vpdpwssds -536870912(%rcx,%r14,8), %ymm2, %ymm1 +// CHECK: encoding: [0x62,0xb2,0x6d,0x28,0x53,0x8c,0xf1,0x00,0x00,0x00,0xe0] + vpdpwssds -536870912(%rcx,%r14,8), %ymm2, %ymm1 + +// CHECK: vpdpwssds -536870910(%rcx,%r14,8), %ymm2, %ymm1 +// CHECK: encoding: [0x62,0xb2,0x6d,0x28,0x53,0x8c,0xf1,0x02,0x00,0x00,0xe0] + vpdpwssds -536870910(%rcx,%r14,8), %ymm2, %ymm1 + +// CHECK: vpdpbusd (%rcx), %ymm22, %ymm21 +// CHECK: encoding: [0x62,0xe2,0x4d,0x20,0x50,0x29] + vpdpbusd (%rcx), %ymm22, %ymm21 + +// CHECK: vpdpbusd -128(%rsp), %ymm22, %ymm21 +// CHECK: encoding: [0x62,0xe2,0x4d,0x20,0x50,0x6c,0x24,0xfc] + vpdpbusd -128(%rsp), %ymm22, %ymm21 + +// CHECK: vpdpbusd 128(%rsp), %ymm22, %ymm21 +// CHECK: encoding: [0x62,0xe2,0x4d,0x20,0x50,0x6c,0x24,0x04] + vpdpbusd 128(%rsp), %ymm22, %ymm21 + +// CHECK: vpdpbusd 268435456(%rcx,%r14,8), %ymm22, %ymm21 +// CHECK: encoding: [0x62,0xa2,0x4d,0x20,0x50,0xac,0xf1,0x00,0x00,0x00,0x10] + vpdpbusd 268435456(%rcx,%r14,8), %ymm22, %ymm21 + +// CHECK: vpdpbusd -536870912(%rcx,%r14,8), %ymm22, %ymm21 +// CHECK: encoding: [0x62,0xa2,0x4d,0x20,0x50,0xac,0xf1,0x00,0x00,0x00,0xe0] + vpdpbusd -536870912(%rcx,%r14,8), %ymm22, %ymm21 + +// CHECK: vpdpbusd -536870910(%rcx,%r14,8), %ymm22, %ymm21 +// CHECK: encoding: [0x62,0xa2,0x4d,0x20,0x50,0xac,0xf1,0x02,0x00,0x00,0xe0] + vpdpbusd -536870910(%rcx,%r14,8), %ymm22, %ymm21 + +// CHECK: vpdpbusds (%rcx), %ymm22, %ymm21 +// CHECK: encoding: [0x62,0xe2,0x4d,0x20,0x51,0x29] + vpdpbusds (%rcx), %ymm22, %ymm21 + +// CHECK: vpdpbusds -128(%rsp), %ymm22, %ymm21 +// CHECK: encoding: [0x62,0xe2,0x4d,0x20,0x51,0x6c,0x24,0xfc] + vpdpbusds -128(%rsp), %ymm22, %ymm21 + +// CHECK: vpdpbusds 128(%rsp), %ymm22, %ymm21 +// CHECK: encoding: [0x62,0xe2,0x4d,0x20,0x51,0x6c,0x24,0x04] + vpdpbusds 128(%rsp), %ymm22, %ymm21 + +// CHECK: vpdpbusds 268435456(%rcx,%r14,8), %ymm22, %ymm21 +// CHECK: encoding: [0x62,0xa2,0x4d,0x20,0x51,0xac,0xf1,0x00,0x00,0x00,0x10] + vpdpbusds 268435456(%rcx,%r14,8), %ymm22, %ymm21 + +// CHECK: vpdpbusds -536870912(%rcx,%r14,8), %ymm22, %ymm21 +// CHECK: encoding: [0x62,0xa2,0x4d,0x20,0x51,0xac,0xf1,0x00,0x00,0x00,0xe0] + vpdpbusds -536870912(%rcx,%r14,8), %ymm22, %ymm21 + +// CHECK: vpdpbusds -536870910(%rcx,%r14,8), %ymm22, %ymm21 +// CHECK: encoding: [0x62,0xa2,0x4d,0x20,0x51,0xac,0xf1,0x02,0x00,0x00,0xe0] + vpdpbusds -536870910(%rcx,%r14,8), %ymm22, %ymm21 + +// CHECK: vpdpwssd (%rcx), %ymm22, %ymm21 +// CHECK: encoding: [0x62,0xe2,0x4d,0x20,0x52,0x29] + vpdpwssd (%rcx), %ymm22, %ymm21 + +// CHECK: vpdpwssd -128(%rsp), %ymm22, %ymm21 +// CHECK: encoding: [0x62,0xe2,0x4d,0x20,0x52,0x6c,0x24,0xfc] + vpdpwssd -128(%rsp), %ymm22, %ymm21 + +// CHECK: vpdpwssd 128(%rsp), %ymm22, %ymm21 +// CHECK: encoding: [0x62,0xe2,0x4d,0x20,0x52,0x6c,0x24,0x04] + vpdpwssd 128(%rsp), %ymm22, %ymm21 + +// CHECK: vpdpwssd 268435456(%rcx,%r14,8), %ymm22, %ymm21 +// CHECK: encoding: [0x62,0xa2,0x4d,0x20,0x52,0xac,0xf1,0x00,0x00,0x00,0x10] + vpdpwssd 268435456(%rcx,%r14,8), %ymm22, %ymm21 + +// CHECK: vpdpwssd -536870912(%rcx,%r14,8), %ymm22, %ymm21 +// CHECK: encoding: [0x62,0xa2,0x4d,0x20,0x52,0xac,0xf1,0x00,0x00,0x00,0xe0] + vpdpwssd -536870912(%rcx,%r14,8), %ymm22, %ymm21 + +// CHECK: vpdpwssd -536870910(%rcx,%r14,8), %ymm22, %ymm21 +// CHECK: encoding: [0x62,0xa2,0x4d,0x20,0x52,0xac,0xf1,0x02,0x00,0x00,0xe0] + vpdpwssd -536870910(%rcx,%r14,8), %ymm22, %ymm21 + +// CHECK: vpdpwssds (%rcx), %ymm22, %ymm21 +// CHECK: encoding: [0x62,0xe2,0x4d,0x20,0x53,0x29] + vpdpwssds (%rcx), %ymm22, %ymm21 + +// CHECK: vpdpwssds -128(%rsp), %ymm22, %ymm21 +// CHECK: encoding: [0x62,0xe2,0x4d,0x20,0x53,0x6c,0x24,0xfc] + vpdpwssds -128(%rsp), %ymm22, %ymm21 + +// CHECK: vpdpwssds 128(%rsp), %ymm22, %ymm21 +// CHECK: encoding: [0x62,0xe2,0x4d,0x20,0x53,0x6c,0x24,0x04] + vpdpwssds 128(%rsp), %ymm22, %ymm21 + +// CHECK: vpdpwssds 268435456(%rcx,%r14,8), %ymm22, %ymm21 +// CHECK: encoding: [0x62,0xa2,0x4d,0x20,0x53,0xac,0xf1,0x00,0x00,0x00,0x10] + vpdpwssds 268435456(%rcx,%r14,8), %ymm22, %ymm21 + +// CHECK: vpdpwssds -536870912(%rcx,%r14,8), %ymm22, %ymm21 +// CHECK: encoding: [0x62,0xa2,0x4d,0x20,0x53,0xac,0xf1,0x00,0x00,0x00,0xe0] + vpdpwssds -536870912(%rcx,%r14,8), %ymm22, %ymm21 + +// CHECK: vpdpwssds -536870910(%rcx,%r14,8), %ymm22, %ymm21 +// CHECK: encoding: [0x62,0xa2,0x4d,0x20,0x53,0xac,0xf1,0x02,0x00,0x00,0xe0] + vpdpwssds -536870910(%rcx,%r14,8), %ymm22, %ymm21 + +// CHECK: vpdpbusd (%rcx), %ymm2, %ymm1 {%k2} +// CHECK: encoding: [0x62,0xf2,0x6d,0x2a,0x50,0x09] + vpdpbusd (%rcx), %ymm2, %ymm1 {%k2} + +// CHECK: vpdpbusd -128(%rsp), %ymm2, %ymm1 {%k2} +// CHECK: encoding: [0x62,0xf2,0x6d,0x2a,0x50,0x4c,0x24,0xfc] + vpdpbusd -128(%rsp), %ymm2, %ymm1 {%k2} + +// CHECK: vpdpbusd 128(%rsp), %ymm2, %ymm1 {%k2} +// CHECK: encoding: [0x62,0xf2,0x6d,0x2a,0x50,0x4c,0x24,0x04] + vpdpbusd 128(%rsp), %ymm2, %ymm1 {%k2} + +// CHECK: vpdpbusd 268435456(%rcx,%r14,8), %ymm2, %ymm1 {%k2} +// CHECK: encoding: [0x62,0xb2,0x6d,0x2a,0x50,0x8c,0xf1,0x00,0x00,0x00,0x10] + vpdpbusd 268435456(%rcx,%r14,8), %ymm2, %ymm1 {%k2} + +// CHECK: vpdpbusd -536870912(%rcx,%r14,8), %ymm2, %ymm1 {%k2} +// CHECK: encoding: [0x62,0xb2,0x6d,0x2a,0x50,0x8c,0xf1,0x00,0x00,0x00,0xe0] + vpdpbusd -536870912(%rcx,%r14,8), %ymm2, %ymm1 {%k2} + +// CHECK: vpdpbusd -536870910(%rcx,%r14,8), %ymm2, %ymm1 {%k2} +// CHECK: encoding: [0x62,0xb2,0x6d,0x2a,0x50,0x8c,0xf1,0x02,0x00,0x00,0xe0] + vpdpbusd -536870910(%rcx,%r14,8), %ymm2, %ymm1 {%k2} + +// CHECK: vpdpbusds (%rcx), %ymm2, %ymm1 {%k2} +// CHECK: encoding: [0x62,0xf2,0x6d,0x2a,0x51,0x09] + vpdpbusds (%rcx), %ymm2, %ymm1 {%k2} + +// CHECK: vpdpbusds -128(%rsp), %ymm2, %ymm1 {%k2} +// CHECK: encoding: [0x62,0xf2,0x6d,0x2a,0x51,0x4c,0x24,0xfc] + vpdpbusds -128(%rsp), %ymm2, %ymm1 {%k2} + +// CHECK: vpdpbusds 128(%rsp), %ymm2, %ymm1 {%k2} +// CHECK: encoding: [0x62,0xf2,0x6d,0x2a,0x51,0x4c,0x24,0x04] + vpdpbusds 128(%rsp), %ymm2, %ymm1 {%k2} + +// CHECK: vpdpbusds 268435456(%rcx,%r14,8), %ymm2, %ymm1 {%k2} +// CHECK: encoding: [0x62,0xb2,0x6d,0x2a,0x51,0x8c,0xf1,0x00,0x00,0x00,0x10] + vpdpbusds 268435456(%rcx,%r14,8), %ymm2, %ymm1 {%k2} + +// CHECK: vpdpbusds -536870912(%rcx,%r14,8), %ymm2, %ymm1 {%k2} +// CHECK: encoding: [0x62,0xb2,0x6d,0x2a,0x51,0x8c,0xf1,0x00,0x00,0x00,0xe0] + vpdpbusds -536870912(%rcx,%r14,8), %ymm2, %ymm1 {%k2} + +// CHECK: vpdpbusds -536870910(%rcx,%r14,8), %ymm2, %ymm1 {%k2} +// CHECK: encoding: [0x62,0xb2,0x6d,0x2a,0x51,0x8c,0xf1,0x02,0x00,0x00,0xe0] + vpdpbusds -536870910(%rcx,%r14,8), %ymm2, %ymm1 {%k2} + +// CHECK: vpdpwssd (%rcx), %ymm2, %ymm1 {%k2} +// CHECK: encoding: [0x62,0xf2,0x6d,0x2a,0x52,0x09] + vpdpwssd (%rcx), %ymm2, %ymm1 {%k2} + +// CHECK: vpdpwssd -128(%rsp), %ymm2, %ymm1 {%k2} +// CHECK: encoding: [0x62,0xf2,0x6d,0x2a,0x52,0x4c,0x24,0xfc] + vpdpwssd -128(%rsp), %ymm2, %ymm1 {%k2} + +// CHECK: vpdpwssd 128(%rsp), %ymm2, %ymm1 {%k2} +// CHECK: encoding: [0x62,0xf2,0x6d,0x2a,0x52,0x4c,0x24,0x04] + vpdpwssd 128(%rsp), %ymm2, %ymm1 {%k2} + +// CHECK: vpdpwssd 268435456(%rcx,%r14,8), %ymm2, %ymm1 {%k2} +// CHECK: encoding: [0x62,0xb2,0x6d,0x2a,0x52,0x8c,0xf1,0x00,0x00,0x00,0x10] + vpdpwssd 268435456(%rcx,%r14,8), %ymm2, %ymm1 {%k2} + +// CHECK: vpdpwssd -536870912(%rcx,%r14,8), %ymm2, %ymm1 {%k2} +// CHECK: encoding: [0x62,0xb2,0x6d,0x2a,0x52,0x8c,0xf1,0x00,0x00,0x00,0xe0] + vpdpwssd -536870912(%rcx,%r14,8), %ymm2, %ymm1 {%k2} + +// CHECK: vpdpwssd -536870910(%rcx,%r14,8), %ymm2, %ymm1 {%k2} +// CHECK: encoding: [0x62,0xb2,0x6d,0x2a,0x52,0x8c,0xf1,0x02,0x00,0x00,0xe0] + vpdpwssd -536870910(%rcx,%r14,8), %ymm2, %ymm1 {%k2} + +// CHECK: vpdpwssds (%rcx), %ymm2, %ymm1 {%k2} +// CHECK: encoding: [0x62,0xf2,0x6d,0x2a,0x53,0x09] + vpdpwssds (%rcx), %ymm2, %ymm1 {%k2} + +// CHECK: vpdpwssds -128(%rsp), %ymm2, %ymm1 {%k2} +// CHECK: encoding: [0x62,0xf2,0x6d,0x2a,0x53,0x4c,0x24,0xfc] + vpdpwssds -128(%rsp), %ymm2, %ymm1 {%k2} + +// CHECK: vpdpwssds 128(%rsp), %ymm2, %ymm1 {%k2} +// CHECK: encoding: [0x62,0xf2,0x6d,0x2a,0x53,0x4c,0x24,0x04] + vpdpwssds 128(%rsp), %ymm2, %ymm1 {%k2} + +// CHECK: vpdpwssds 268435456(%rcx,%r14,8), %ymm2, %ymm1 {%k2} +// CHECK: encoding: [0x62,0xb2,0x6d,0x2a,0x53,0x8c,0xf1,0x00,0x00,0x00,0x10] + vpdpwssds 268435456(%rcx,%r14,8), %ymm2, %ymm1 {%k2} + +// CHECK: vpdpwssds -536870912(%rcx,%r14,8), %ymm2, %ymm1 {%k2} +// CHECK: encoding: [0x62,0xb2,0x6d,0x2a,0x53,0x8c,0xf1,0x00,0x00,0x00,0xe0] + vpdpwssds -536870912(%rcx,%r14,8), %ymm2, %ymm1 {%k2} + +// CHECK: vpdpwssds -536870910(%rcx,%r14,8), %ymm2, %ymm1 {%k2} +// CHECK: encoding: [0x62,0xb2,0x6d,0x2a,0x53,0x8c,0xf1,0x02,0x00,0x00,0xe0] + vpdpwssds -536870910(%rcx,%r14,8), %ymm2, %ymm1 {%k2} + +// CHECK: vpdpbusd (%rcx), %ymm22, %ymm21 {%k2} +// CHECK: encoding: [0x62,0xe2,0x4d,0x22,0x50,0x29] + vpdpbusd (%rcx), %ymm22, %ymm21 {%k2} + +// CHECK: vpdpbusd -128(%rsp), %ymm22, %ymm21 {%k2} +// CHECK: encoding: [0x62,0xe2,0x4d,0x22,0x50,0x6c,0x24,0xfc] + vpdpbusd -128(%rsp), %ymm22, %ymm21 {%k2} + +// CHECK: vpdpbusd 128(%rsp), %ymm22, %ymm21 {%k2} +// CHECK: encoding: [0x62,0xe2,0x4d,0x22,0x50,0x6c,0x24,0x04] + vpdpbusd 128(%rsp), %ymm22, %ymm21 {%k2} + +// CHECK: vpdpbusd 268435456(%rcx,%r14,8), %ymm22, %ymm21 {%k2} +// CHECK: encoding: [0x62,0xa2,0x4d,0x22,0x50,0xac,0xf1,0x00,0x00,0x00,0x10] + vpdpbusd 268435456(%rcx,%r14,8), %ymm22, %ymm21 {%k2} + +// CHECK: vpdpbusd -536870912(%rcx,%r14,8), %ymm22, %ymm21 {%k2} +// CHECK: encoding: [0x62,0xa2,0x4d,0x22,0x50,0xac,0xf1,0x00,0x00,0x00,0xe0] + vpdpbusd -536870912(%rcx,%r14,8), %ymm22, %ymm21 {%k2} + +// CHECK: vpdpbusd -536870910(%rcx,%r14,8), %ymm22, %ymm21 {%k2} +// CHECK: encoding: [0x62,0xa2,0x4d,0x22,0x50,0xac,0xf1,0x02,0x00,0x00,0xe0] + vpdpbusd -536870910(%rcx,%r14,8), %ymm22, %ymm21 {%k2} + +// CHECK: vpdpbusds (%rcx), %ymm22, %ymm21 {%k2} +// CHECK: encoding: [0x62,0xe2,0x4d,0x22,0x51,0x29] + vpdpbusds (%rcx), %ymm22, %ymm21 {%k2} + +// CHECK: vpdpbusds -128(%rsp), %ymm22, %ymm21 {%k2} +// CHECK: encoding: [0x62,0xe2,0x4d,0x22,0x51,0x6c,0x24,0xfc] + vpdpbusds -128(%rsp), %ymm22, %ymm21 {%k2} + +// CHECK: vpdpbusds 128(%rsp), %ymm22, %ymm21 {%k2} +// CHECK: encoding: [0x62,0xe2,0x4d,0x22,0x51,0x6c,0x24,0x04] + vpdpbusds 128(%rsp), %ymm22, %ymm21 {%k2} + +// CHECK: vpdpbusds 268435456(%rcx,%r14,8), %ymm22, %ymm21 {%k2} +// CHECK: encoding: [0x62,0xa2,0x4d,0x22,0x51,0xac,0xf1,0x00,0x00,0x00,0x10] + vpdpbusds 268435456(%rcx,%r14,8), %ymm22, %ymm21 {%k2} + +// CHECK: vpdpbusds -536870912(%rcx,%r14,8), %ymm22, %ymm21 {%k2} +// CHECK: encoding: [0x62,0xa2,0x4d,0x22,0x51,0xac,0xf1,0x00,0x00,0x00,0xe0] + vpdpbusds -536870912(%rcx,%r14,8), %ymm22, %ymm21 {%k2} + +// CHECK: vpdpbusds -536870910(%rcx,%r14,8), %ymm22, %ymm21 {%k2} +// CHECK: encoding: [0x62,0xa2,0x4d,0x22,0x51,0xac,0xf1,0x02,0x00,0x00,0xe0] + vpdpbusds -536870910(%rcx,%r14,8), %ymm22, %ymm21 {%k2} + +// CHECK: vpdpwssd (%rcx), %ymm22, %ymm21 {%k2} +// CHECK: encoding: [0x62,0xe2,0x4d,0x22,0x52,0x29] + vpdpwssd (%rcx), %ymm22, %ymm21 {%k2} + +// CHECK: vpdpwssd -128(%rsp), %ymm22, %ymm21 {%k2} +// CHECK: encoding: [0x62,0xe2,0x4d,0x22,0x52,0x6c,0x24,0xfc] + vpdpwssd -128(%rsp), %ymm22, %ymm21 {%k2} + +// CHECK: vpdpwssd 128(%rsp), %ymm22, %ymm21 {%k2} +// CHECK: encoding: [0x62,0xe2,0x4d,0x22,0x52,0x6c,0x24,0x04] + vpdpwssd 128(%rsp), %ymm22, %ymm21 {%k2} + +// CHECK: vpdpwssd 268435456(%rcx,%r14,8), %ymm22, %ymm21 {%k2} +// CHECK: encoding: [0x62,0xa2,0x4d,0x22,0x52,0xac,0xf1,0x00,0x00,0x00,0x10] + vpdpwssd 268435456(%rcx,%r14,8), %ymm22, %ymm21 {%k2} + +// CHECK: vpdpwssd -536870912(%rcx,%r14,8), %ymm22, %ymm21 {%k2} +// CHECK: encoding: [0x62,0xa2,0x4d,0x22,0x52,0xac,0xf1,0x00,0x00,0x00,0xe0] + vpdpwssd -536870912(%rcx,%r14,8), %ymm22, %ymm21 {%k2} + +// CHECK: vpdpwssd -536870910(%rcx,%r14,8), %ymm22, %ymm21 {%k2} +// CHECK: encoding: [0x62,0xa2,0x4d,0x22,0x52,0xac,0xf1,0x02,0x00,0x00,0xe0] + vpdpwssd -536870910(%rcx,%r14,8), %ymm22, %ymm21 {%k2} + +// CHECK: vpdpwssds (%rcx), %ymm22, %ymm21 {%k2} +// CHECK: encoding: [0x62,0xe2,0x4d,0x22,0x53,0x29] + vpdpwssds (%rcx), %ymm22, %ymm21 {%k2} + +// CHECK: vpdpwssds -128(%rsp), %ymm22, %ymm21 {%k2} +// CHECK: encoding: [0x62,0xe2,0x4d,0x22,0x53,0x6c,0x24,0xfc] + vpdpwssds -128(%rsp), %ymm22, %ymm21 {%k2} + +// CHECK: vpdpwssds 128(%rsp), %ymm22, %ymm21 {%k2} +// CHECK: encoding: [0x62,0xe2,0x4d,0x22,0x53,0x6c,0x24,0x04] + vpdpwssds 128(%rsp), %ymm22, %ymm21 {%k2} + +// CHECK: vpdpwssds 268435456(%rcx,%r14,8), %ymm22, %ymm21 {%k2} +// CHECK: encoding: [0x62,0xa2,0x4d,0x22,0x53,0xac,0xf1,0x00,0x00,0x00,0x10] + vpdpwssds 268435456(%rcx,%r14,8), %ymm22, %ymm21 {%k2} + +// CHECK: vpdpwssds -536870912(%rcx,%r14,8), %ymm22, %ymm21 {%k2} +// CHECK: encoding: [0x62,0xa2,0x4d,0x22,0x53,0xac,0xf1,0x00,0x00,0x00,0xe0] + vpdpwssds -536870912(%rcx,%r14,8), %ymm22, %ymm21 {%k2} + +// CHECK: vpdpwssds -536870910(%rcx,%r14,8), %ymm22, %ymm21 {%k2} +// CHECK: encoding: [0x62,0xa2,0x4d,0x22,0x53,0xac,0xf1,0x02,0x00,0x00,0xe0] + vpdpwssds -536870910(%rcx,%r14,8), %ymm22, %ymm21 {%k2} + diff --git a/llvm/test/MC/X86/avx512vnni-encoding.s b/llvm/test/MC/X86/avx512vnni-encoding.s new file mode 100644 index 0000000..f12930e --- /dev/null +++ b/llvm/test/MC/X86/avx512vnni-encoding.s @@ -0,0 +1,450 @@ +// RUN: llvm-mc -triple x86_64-unknown-unknown -mcpu=knl -mattr=+avx512vnni --show-encoding < %s | FileCheck %s + +// CHECK: vpdpbusd %zmm3, %zmm2, %zmm1 +// CHECK: encoding: [0x62,0xf2,0x6d,0x48,0x50,0xcb] + vpdpbusd %zmm3, %zmm2, %zmm1 + +// CHECK: vpdpbusds %zmm3, %zmm2, %zmm1 +// CHECK: encoding: [0x62,0xf2,0x6d,0x48,0x51,0xcb] + vpdpbusds %zmm3, %zmm2, %zmm1 + +// CHECK: vpdpwssd %zmm3, %zmm2, %zmm1 +// CHECK: encoding: [0x62,0xf2,0x6d,0x48,0x52,0xcb] + vpdpwssd %zmm3, %zmm2, %zmm1 + +// CHECK: vpdpwssds %zmm3, %zmm2, %zmm1 +// CHECK: encoding: [0x62,0xf2,0x6d,0x48,0x53,0xcb] + vpdpwssds %zmm3, %zmm2, %zmm1 + +// CHECK: vpdpbusd %zmm23, %zmm22, %zmm21 +// CHECK: encoding: [0x62,0xa2,0x4d,0x40,0x50,0xef] + vpdpbusd %zmm23, %zmm22, %zmm21 + +// CHECK: vpdpbusds %zmm23, %zmm22, %zmm21 +// CHECK: encoding: [0x62,0xa2,0x4d,0x40,0x51,0xef] + vpdpbusds %zmm23, %zmm22, %zmm21 + +// CHECK: vpdpwssd %zmm23, %zmm22, %zmm21 +// CHECK: encoding: [0x62,0xa2,0x4d,0x40,0x52,0xef] + vpdpwssd %zmm23, %zmm22, %zmm21 + +// CHECK: vpdpwssds %zmm23, %zmm22, %zmm21 +// CHECK: encoding: [0x62,0xa2,0x4d,0x40,0x53,0xef] + vpdpwssds %zmm23, %zmm22, %zmm21 + +// CHECK: vpdpbusd %zmm3, %zmm2, %zmm1 {%k2} +// CHECK: encoding: [0x62,0xf2,0x6d,0x4a,0x50,0xcb] + vpdpbusd %zmm3, %zmm2, %zmm1 {%k2} + +// CHECK: vpdpbusds %zmm3, %zmm2, %zmm1 {%k2} +// CHECK: encoding: [0x62,0xf2,0x6d,0x4a,0x51,0xcb] + vpdpbusds %zmm3, %zmm2, %zmm1 {%k2} + +// CHECK: vpdpwssd %zmm3, %zmm2, %zmm1 {%k2} +// CHECK: encoding: [0x62,0xf2,0x6d,0x4a,0x52,0xcb] + vpdpwssd %zmm3, %zmm2, %zmm1 {%k2} + +// CHECK: vpdpwssds %zmm3, %zmm2, %zmm1 {%k2} +// CHECK: encoding: [0x62,0xf2,0x6d,0x4a,0x53,0xcb] + vpdpwssds %zmm3, %zmm2, %zmm1 {%k2} + +// CHECK: vpdpbusd %zmm23, %zmm22, %zmm21 {%k2} +// CHECK: encoding: [0x62,0xa2,0x4d,0x42,0x50,0xef] + vpdpbusd %zmm23, %zmm22, %zmm21 {%k2} + +// CHECK: vpdpbusds %zmm23, %zmm22, %zmm21 {%k2} +// CHECK: encoding: [0x62,0xa2,0x4d,0x42,0x51,0xef] + vpdpbusds %zmm23, %zmm22, %zmm21 {%k2} + +// CHECK: vpdpwssd %zmm23, %zmm22, %zmm21 {%k2} +// CHECK: encoding: [0x62,0xa2,0x4d,0x42,0x52,0xef] + vpdpwssd %zmm23, %zmm22, %zmm21 {%k2} + +// CHECK: vpdpwssds %zmm23, %zmm22, %zmm21 {%k2} +// CHECK: encoding: [0x62,0xa2,0x4d,0x42,0x53,0xef] + vpdpwssds %zmm23, %zmm22, %zmm21 {%k2} + +// CHECK: vpdpbusd (%rcx), %zmm2, %zmm1 +// CHECK: encoding: [0x62,0xf2,0x6d,0x48,0x50,0x09] + vpdpbusd (%rcx), %zmm2, %zmm1 + +// CHECK: vpdpbusd -256(%rsp), %zmm2, %zmm1 +// CHECK: encoding: [0x62,0xf2,0x6d,0x48,0x50,0x4c,0x24,0xfc] + vpdpbusd -256(%rsp), %zmm2, %zmm1 + +// CHECK: vpdpbusd 256(%rsp), %zmm2, %zmm1 +// CHECK: encoding: [0x62,0xf2,0x6d,0x48,0x50,0x4c,0x24,0x04] + vpdpbusd 256(%rsp), %zmm2, %zmm1 + +// CHECK: vpdpbusd 268435456(%rcx,%r14,8), %zmm2, %zmm1 +// CHECK: encoding: [0x62,0xb2,0x6d,0x48,0x50,0x8c,0xf1,0x00,0x00,0x00,0x10] + vpdpbusd 268435456(%rcx,%r14,8), %zmm2, %zmm1 + +// CHECK: vpdpbusd -536870912(%rcx,%r14,8), %zmm2, %zmm1 +// CHECK: encoding: [0x62,0xb2,0x6d,0x48,0x50,0x8c,0xf1,0x00,0x00,0x00,0xe0] + vpdpbusd -536870912(%rcx,%r14,8), %zmm2, %zmm1 + +// CHECK: vpdpbusd -536870910(%rcx,%r14,8), %zmm2, %zmm1 +// CHECK: encoding: [0x62,0xb2,0x6d,0x48,0x50,0x8c,0xf1,0x02,0x00,0x00,0xe0] + vpdpbusd -536870910(%rcx,%r14,8), %zmm2, %zmm1 + +// CHECK: vpdpbusds (%rcx), %zmm2, %zmm1 +// CHECK: encoding: [0x62,0xf2,0x6d,0x48,0x51,0x09] + vpdpbusds (%rcx), %zmm2, %zmm1 + +// CHECK: vpdpbusds -256(%rsp), %zmm2, %zmm1 +// CHECK: encoding: [0x62,0xf2,0x6d,0x48,0x51,0x4c,0x24,0xfc] + vpdpbusds -256(%rsp), %zmm2, %zmm1 + +// CHECK: vpdpbusds 256(%rsp), %zmm2, %zmm1 +// CHECK: encoding: [0x62,0xf2,0x6d,0x48,0x51,0x4c,0x24,0x04] + vpdpbusds 256(%rsp), %zmm2, %zmm1 + +// CHECK: vpdpbusds 268435456(%rcx,%r14,8), %zmm2, %zmm1 +// CHECK: encoding: [0x62,0xb2,0x6d,0x48,0x51,0x8c,0xf1,0x00,0x00,0x00,0x10] + vpdpbusds 268435456(%rcx,%r14,8), %zmm2, %zmm1 + +// CHECK: vpdpbusds -536870912(%rcx,%r14,8), %zmm2, %zmm1 +// CHECK: encoding: [0x62,0xb2,0x6d,0x48,0x51,0x8c,0xf1,0x00,0x00,0x00,0xe0] + vpdpbusds -536870912(%rcx,%r14,8), %zmm2, %zmm1 + +// CHECK: vpdpbusds -536870910(%rcx,%r14,8), %zmm2, %zmm1 +// CHECK: encoding: [0x62,0xb2,0x6d,0x48,0x51,0x8c,0xf1,0x02,0x00,0x00,0xe0] + vpdpbusds -536870910(%rcx,%r14,8), %zmm2, %zmm1 + +// CHECK: vpdpwssd (%rcx), %zmm2, %zmm1 +// CHECK: encoding: [0x62,0xf2,0x6d,0x48,0x52,0x09] + vpdpwssd (%rcx), %zmm2, %zmm1 + +// CHECK: vpdpwssd -256(%rsp), %zmm2, %zmm1 +// CHECK: encoding: [0x62,0xf2,0x6d,0x48,0x52,0x4c,0x24,0xfc] + vpdpwssd -256(%rsp), %zmm2, %zmm1 + +// CHECK: vpdpwssd 256(%rsp), %zmm2, %zmm1 +// CHECK: encoding: [0x62,0xf2,0x6d,0x48,0x52,0x4c,0x24,0x04] + vpdpwssd 256(%rsp), %zmm2, %zmm1 + +// CHECK: vpdpwssd 268435456(%rcx,%r14,8), %zmm2, %zmm1 +// CHECK: encoding: [0x62,0xb2,0x6d,0x48,0x52,0x8c,0xf1,0x00,0x00,0x00,0x10] + vpdpwssd 268435456(%rcx,%r14,8), %zmm2, %zmm1 + +// CHECK: vpdpwssd -536870912(%rcx,%r14,8), %zmm2, %zmm1 +// CHECK: encoding: [0x62,0xb2,0x6d,0x48,0x52,0x8c,0xf1,0x00,0x00,0x00,0xe0] + vpdpwssd -536870912(%rcx,%r14,8), %zmm2, %zmm1 + +// CHECK: vpdpwssd -536870910(%rcx,%r14,8), %zmm2, %zmm1 +// CHECK: encoding: [0x62,0xb2,0x6d,0x48,0x52,0x8c,0xf1,0x02,0x00,0x00,0xe0] + vpdpwssd -536870910(%rcx,%r14,8), %zmm2, %zmm1 + +// CHECK: vpdpwssds (%rcx), %zmm2, %zmm1 +// CHECK: encoding: [0x62,0xf2,0x6d,0x48,0x53,0x09] + vpdpwssds (%rcx), %zmm2, %zmm1 + +// CHECK: vpdpwssds -256(%rsp), %zmm2, %zmm1 +// CHECK: encoding: [0x62,0xf2,0x6d,0x48,0x53,0x4c,0x24,0xfc] + vpdpwssds -256(%rsp), %zmm2, %zmm1 + +// CHECK: vpdpwssds 256(%rsp), %zmm2, %zmm1 +// CHECK: encoding: [0x62,0xf2,0x6d,0x48,0x53,0x4c,0x24,0x04] + vpdpwssds 256(%rsp), %zmm2, %zmm1 + +// CHECK: vpdpwssds 268435456(%rcx,%r14,8), %zmm2, %zmm1 +// CHECK: encoding: [0x62,0xb2,0x6d,0x48,0x53,0x8c,0xf1,0x00,0x00,0x00,0x10] + vpdpwssds 268435456(%rcx,%r14,8), %zmm2, %zmm1 + +// CHECK: vpdpwssds -536870912(%rcx,%r14,8), %zmm2, %zmm1 +// CHECK: encoding: [0x62,0xb2,0x6d,0x48,0x53,0x8c,0xf1,0x00,0x00,0x00,0xe0] + vpdpwssds -536870912(%rcx,%r14,8), %zmm2, %zmm1 + +// CHECK: vpdpwssds -536870910(%rcx,%r14,8), %zmm2, %zmm1 +// CHECK: encoding: [0x62,0xb2,0x6d,0x48,0x53,0x8c,0xf1,0x02,0x00,0x00,0xe0] + vpdpwssds -536870910(%rcx,%r14,8), %zmm2, %zmm1 + +// CHECK: vpdpbusd (%rcx), %zmm22, %zmm21 +// CHECK: encoding: [0x62,0xe2,0x4d,0x40,0x50,0x29] + vpdpbusd (%rcx), %zmm22, %zmm21 + +// CHECK: vpdpbusd -256(%rsp), %zmm22, %zmm21 +// CHECK: encoding: [0x62,0xe2,0x4d,0x40,0x50,0x6c,0x24,0xfc] + vpdpbusd -256(%rsp), %zmm22, %zmm21 + +// CHECK: vpdpbusd 256(%rsp), %zmm22, %zmm21 +// CHECK: encoding: [0x62,0xe2,0x4d,0x40,0x50,0x6c,0x24,0x04] + vpdpbusd 256(%rsp), %zmm22, %zmm21 + +// CHECK: vpdpbusd 268435456(%rcx,%r14,8), %zmm22, %zmm21 +// CHECK: encoding: [0x62,0xa2,0x4d,0x40,0x50,0xac,0xf1,0x00,0x00,0x00,0x10] + vpdpbusd 268435456(%rcx,%r14,8), %zmm22, %zmm21 + +// CHECK: vpdpbusd -536870912(%rcx,%r14,8), %zmm22, %zmm21 +// CHECK: encoding: [0x62,0xa2,0x4d,0x40,0x50,0xac,0xf1,0x00,0x00,0x00,0xe0] + vpdpbusd -536870912(%rcx,%r14,8), %zmm22, %zmm21 + +// CHECK: vpdpbusd -536870910(%rcx,%r14,8), %zmm22, %zmm21 +// CHECK: encoding: [0x62,0xa2,0x4d,0x40,0x50,0xac,0xf1,0x02,0x00,0x00,0xe0] + vpdpbusd -536870910(%rcx,%r14,8), %zmm22, %zmm21 + +// CHECK: vpdpbusds (%rcx), %zmm22, %zmm21 +// CHECK: encoding: [0x62,0xe2,0x4d,0x40,0x51,0x29] + vpdpbusds (%rcx), %zmm22, %zmm21 + +// CHECK: vpdpbusds -256(%rsp), %zmm22, %zmm21 +// CHECK: encoding: [0x62,0xe2,0x4d,0x40,0x51,0x6c,0x24,0xfc] + vpdpbusds -256(%rsp), %zmm22, %zmm21 + +// CHECK: vpdpbusds 256(%rsp), %zmm22, %zmm21 +// CHECK: encoding: [0x62,0xe2,0x4d,0x40,0x51,0x6c,0x24,0x04] + vpdpbusds 256(%rsp), %zmm22, %zmm21 + +// CHECK: vpdpbusds 268435456(%rcx,%r14,8), %zmm22, %zmm21 +// CHECK: encoding: [0x62,0xa2,0x4d,0x40,0x51,0xac,0xf1,0x00,0x00,0x00,0x10] + vpdpbusds 268435456(%rcx,%r14,8), %zmm22, %zmm21 + +// CHECK: vpdpbusds -536870912(%rcx,%r14,8), %zmm22, %zmm21 +// CHECK: encoding: [0x62,0xa2,0x4d,0x40,0x51,0xac,0xf1,0x00,0x00,0x00,0xe0] + vpdpbusds -536870912(%rcx,%r14,8), %zmm22, %zmm21 + +// CHECK: vpdpbusds -536870910(%rcx,%r14,8), %zmm22, %zmm21 +// CHECK: encoding: [0x62,0xa2,0x4d,0x40,0x51,0xac,0xf1,0x02,0x00,0x00,0xe0] + vpdpbusds -536870910(%rcx,%r14,8), %zmm22, %zmm21 + +// CHECK: vpdpwssd (%rcx), %zmm22, %zmm21 +// CHECK: encoding: [0x62,0xe2,0x4d,0x40,0x52,0x29] + vpdpwssd (%rcx), %zmm22, %zmm21 + +// CHECK: vpdpwssd -256(%rsp), %zmm22, %zmm21 +// CHECK: encoding: [0x62,0xe2,0x4d,0x40,0x52,0x6c,0x24,0xfc] + vpdpwssd -256(%rsp), %zmm22, %zmm21 + +// CHECK: vpdpwssd 256(%rsp), %zmm22, %zmm21 +// CHECK: encoding: [0x62,0xe2,0x4d,0x40,0x52,0x6c,0x24,0x04] + vpdpwssd 256(%rsp), %zmm22, %zmm21 + +// CHECK: vpdpwssd 268435456(%rcx,%r14,8), %zmm22, %zmm21 +// CHECK: encoding: [0x62,0xa2,0x4d,0x40,0x52,0xac,0xf1,0x00,0x00,0x00,0x10] + vpdpwssd 268435456(%rcx,%r14,8), %zmm22, %zmm21 + +// CHECK: vpdpwssd -536870912(%rcx,%r14,8), %zmm22, %zmm21 +// CHECK: encoding: [0x62,0xa2,0x4d,0x40,0x52,0xac,0xf1,0x00,0x00,0x00,0xe0] + vpdpwssd -536870912(%rcx,%r14,8), %zmm22, %zmm21 + +// CHECK: vpdpwssd -536870910(%rcx,%r14,8), %zmm22, %zmm21 +// CHECK: encoding: [0x62,0xa2,0x4d,0x40,0x52,0xac,0xf1,0x02,0x00,0x00,0xe0] + vpdpwssd -536870910(%rcx,%r14,8), %zmm22, %zmm21 + +// CHECK: vpdpwssds (%rcx), %zmm22, %zmm21 +// CHECK: encoding: [0x62,0xe2,0x4d,0x40,0x53,0x29] + vpdpwssds (%rcx), %zmm22, %zmm21 + +// CHECK: vpdpwssds -256(%rsp), %zmm22, %zmm21 +// CHECK: encoding: [0x62,0xe2,0x4d,0x40,0x53,0x6c,0x24,0xfc] + vpdpwssds -256(%rsp), %zmm22, %zmm21 + +// CHECK: vpdpwssds 256(%rsp), %zmm22, %zmm21 +// CHECK: encoding: [0x62,0xe2,0x4d,0x40,0x53,0x6c,0x24,0x04] + vpdpwssds 256(%rsp), %zmm22, %zmm21 + +// CHECK: vpdpwssds 268435456(%rcx,%r14,8), %zmm22, %zmm21 +// CHECK: encoding: [0x62,0xa2,0x4d,0x40,0x53,0xac,0xf1,0x00,0x00,0x00,0x10] + vpdpwssds 268435456(%rcx,%r14,8), %zmm22, %zmm21 + +// CHECK: vpdpwssds -536870912(%rcx,%r14,8), %zmm22, %zmm21 +// CHECK: encoding: [0x62,0xa2,0x4d,0x40,0x53,0xac,0xf1,0x00,0x00,0x00,0xe0] + vpdpwssds -536870912(%rcx,%r14,8), %zmm22, %zmm21 + +// CHECK: vpdpwssds -536870910(%rcx,%r14,8), %zmm22, %zmm21 +// CHECK: encoding: [0x62,0xa2,0x4d,0x40,0x53,0xac,0xf1,0x02,0x00,0x00,0xe0] + vpdpwssds -536870910(%rcx,%r14,8), %zmm22, %zmm21 + +// CHECK: vpdpbusd (%rcx), %zmm2, %zmm1 {%k2} +// CHECK: encoding: [0x62,0xf2,0x6d,0x4a,0x50,0x09] + vpdpbusd (%rcx), %zmm2, %zmm1 {%k2} + +// CHECK: vpdpbusd -256(%rsp), %zmm2, %zmm1 {%k2} +// CHECK: encoding: [0x62,0xf2,0x6d,0x4a,0x50,0x4c,0x24,0xfc] + vpdpbusd -256(%rsp), %zmm2, %zmm1 {%k2} + +// CHECK: vpdpbusd 256(%rsp), %zmm2, %zmm1 {%k2} +// CHECK: encoding: [0x62,0xf2,0x6d,0x4a,0x50,0x4c,0x24,0x04] + vpdpbusd 256(%rsp), %zmm2, %zmm1 {%k2} + +// CHECK: vpdpbusd 268435456(%rcx,%r14,8), %zmm2, %zmm1 {%k2} +// CHECK: encoding: [0x62,0xb2,0x6d,0x4a,0x50,0x8c,0xf1,0x00,0x00,0x00,0x10] + vpdpbusd 268435456(%rcx,%r14,8), %zmm2, %zmm1 {%k2} + +// CHECK: vpdpbusd -536870912(%rcx,%r14,8), %zmm2, %zmm1 {%k2} +// CHECK: encoding: [0x62,0xb2,0x6d,0x4a,0x50,0x8c,0xf1,0x00,0x00,0x00,0xe0] + vpdpbusd -536870912(%rcx,%r14,8), %zmm2, %zmm1 {%k2} + +// CHECK: vpdpbusd -536870910(%rcx,%r14,8), %zmm2, %zmm1 {%k2} +// CHECK: encoding: [0x62,0xb2,0x6d,0x4a,0x50,0x8c,0xf1,0x02,0x00,0x00,0xe0] + vpdpbusd -536870910(%rcx,%r14,8), %zmm2, %zmm1 {%k2} + +// CHECK: vpdpbusds (%rcx), %zmm2, %zmm1 {%k2} +// CHECK: encoding: [0x62,0xf2,0x6d,0x4a,0x51,0x09] + vpdpbusds (%rcx), %zmm2, %zmm1 {%k2} + +// CHECK: vpdpbusds -256(%rsp), %zmm2, %zmm1 {%k2} +// CHECK: encoding: [0x62,0xf2,0x6d,0x4a,0x51,0x4c,0x24,0xfc] + vpdpbusds -256(%rsp), %zmm2, %zmm1 {%k2} + +// CHECK: vpdpbusds 256(%rsp), %zmm2, %zmm1 {%k2} +// CHECK: encoding: [0x62,0xf2,0x6d,0x4a,0x51,0x4c,0x24,0x04] + vpdpbusds 256(%rsp), %zmm2, %zmm1 {%k2} + +// CHECK: vpdpbusds 268435456(%rcx,%r14,8), %zmm2, %zmm1 {%k2} +// CHECK: encoding: [0x62,0xb2,0x6d,0x4a,0x51,0x8c,0xf1,0x00,0x00,0x00,0x10] + vpdpbusds 268435456(%rcx,%r14,8), %zmm2, %zmm1 {%k2} + +// CHECK: vpdpbusds -536870912(%rcx,%r14,8), %zmm2, %zmm1 {%k2} +// CHECK: encoding: [0x62,0xb2,0x6d,0x4a,0x51,0x8c,0xf1,0x00,0x00,0x00,0xe0] + vpdpbusds -536870912(%rcx,%r14,8), %zmm2, %zmm1 {%k2} + +// CHECK: vpdpbusds -536870910(%rcx,%r14,8), %zmm2, %zmm1 {%k2} +// CHECK: encoding: [0x62,0xb2,0x6d,0x4a,0x51,0x8c,0xf1,0x02,0x00,0x00,0xe0] + vpdpbusds -536870910(%rcx,%r14,8), %zmm2, %zmm1 {%k2} + +// CHECK: vpdpwssd (%rcx), %zmm2, %zmm1 {%k2} +// CHECK: encoding: [0x62,0xf2,0x6d,0x4a,0x52,0x09] + vpdpwssd (%rcx), %zmm2, %zmm1 {%k2} + +// CHECK: vpdpwssd -256(%rsp), %zmm2, %zmm1 {%k2} +// CHECK: encoding: [0x62,0xf2,0x6d,0x4a,0x52,0x4c,0x24,0xfc] + vpdpwssd -256(%rsp), %zmm2, %zmm1 {%k2} + +// CHECK: vpdpwssd 256(%rsp), %zmm2, %zmm1 {%k2} +// CHECK: encoding: [0x62,0xf2,0x6d,0x4a,0x52,0x4c,0x24,0x04] + vpdpwssd 256(%rsp), %zmm2, %zmm1 {%k2} + +// CHECK: vpdpwssd 268435456(%rcx,%r14,8), %zmm2, %zmm1 {%k2} +// CHECK: encoding: [0x62,0xb2,0x6d,0x4a,0x52,0x8c,0xf1,0x00,0x00,0x00,0x10] + vpdpwssd 268435456(%rcx,%r14,8), %zmm2, %zmm1 {%k2} + +// CHECK: vpdpwssd -536870912(%rcx,%r14,8), %zmm2, %zmm1 {%k2} +// CHECK: encoding: [0x62,0xb2,0x6d,0x4a,0x52,0x8c,0xf1,0x00,0x00,0x00,0xe0] + vpdpwssd -536870912(%rcx,%r14,8), %zmm2, %zmm1 {%k2} + +// CHECK: vpdpwssd -536870910(%rcx,%r14,8), %zmm2, %zmm1 {%k2} +// CHECK: encoding: [0x62,0xb2,0x6d,0x4a,0x52,0x8c,0xf1,0x02,0x00,0x00,0xe0] + vpdpwssd -536870910(%rcx,%r14,8), %zmm2, %zmm1 {%k2} + +// CHECK: vpdpwssds (%rcx), %zmm2, %zmm1 {%k2} +// CHECK: encoding: [0x62,0xf2,0x6d,0x4a,0x53,0x09] + vpdpwssds (%rcx), %zmm2, %zmm1 {%k2} + +// CHECK: vpdpwssds -256(%rsp), %zmm2, %zmm1 {%k2} +// CHECK: encoding: [0x62,0xf2,0x6d,0x4a,0x53,0x4c,0x24,0xfc] + vpdpwssds -256(%rsp), %zmm2, %zmm1 {%k2} + +// CHECK: vpdpwssds 256(%rsp), %zmm2, %zmm1 {%k2} +// CHECK: encoding: [0x62,0xf2,0x6d,0x4a,0x53,0x4c,0x24,0x04] + vpdpwssds 256(%rsp), %zmm2, %zmm1 {%k2} + +// CHECK: vpdpwssds 268435456(%rcx,%r14,8), %zmm2, %zmm1 {%k2} +// CHECK: encoding: [0x62,0xb2,0x6d,0x4a,0x53,0x8c,0xf1,0x00,0x00,0x00,0x10] + vpdpwssds 268435456(%rcx,%r14,8), %zmm2, %zmm1 {%k2} + +// CHECK: vpdpwssds -536870912(%rcx,%r14,8), %zmm2, %zmm1 {%k2} +// CHECK: encoding: [0x62,0xb2,0x6d,0x4a,0x53,0x8c,0xf1,0x00,0x00,0x00,0xe0] + vpdpwssds -536870912(%rcx,%r14,8), %zmm2, %zmm1 {%k2} + +// CHECK: vpdpwssds -536870910(%rcx,%r14,8), %zmm2, %zmm1 {%k2} +// CHECK: encoding: [0x62,0xb2,0x6d,0x4a,0x53,0x8c,0xf1,0x02,0x00,0x00,0xe0] + vpdpwssds -536870910(%rcx,%r14,8), %zmm2, %zmm1 {%k2} + +// CHECK: vpdpbusd (%rcx), %zmm22, %zmm21 {%k2} +// CHECK: encoding: [0x62,0xe2,0x4d,0x42,0x50,0x29] + vpdpbusd (%rcx), %zmm22, %zmm21 {%k2} + +// CHECK: vpdpbusd -256(%rsp), %zmm22, %zmm21 {%k2} +// CHECK: encoding: [0x62,0xe2,0x4d,0x42,0x50,0x6c,0x24,0xfc] + vpdpbusd -256(%rsp), %zmm22, %zmm21 {%k2} + +// CHECK: vpdpbusd 256(%rsp), %zmm22, %zmm21 {%k2} +// CHECK: encoding: [0x62,0xe2,0x4d,0x42,0x50,0x6c,0x24,0x04] + vpdpbusd 256(%rsp), %zmm22, %zmm21 {%k2} + +// CHECK: vpdpbusd 268435456(%rcx,%r14,8), %zmm22, %zmm21 {%k2} +// CHECK: encoding: [0x62,0xa2,0x4d,0x42,0x50,0xac,0xf1,0x00,0x00,0x00,0x10] + vpdpbusd 268435456(%rcx,%r14,8), %zmm22, %zmm21 {%k2} + +// CHECK: vpdpbusd -536870912(%rcx,%r14,8), %zmm22, %zmm21 {%k2} +// CHECK: encoding: [0x62,0xa2,0x4d,0x42,0x50,0xac,0xf1,0x00,0x00,0x00,0xe0] + vpdpbusd -536870912(%rcx,%r14,8), %zmm22, %zmm21 {%k2} + +// CHECK: vpdpbusd -536870910(%rcx,%r14,8), %zmm22, %zmm21 {%k2} +// CHECK: encoding: [0x62,0xa2,0x4d,0x42,0x50,0xac,0xf1,0x02,0x00,0x00,0xe0] + vpdpbusd -536870910(%rcx,%r14,8), %zmm22, %zmm21 {%k2} + +// CHECK: vpdpbusds (%rcx), %zmm22, %zmm21 {%k2} +// CHECK: encoding: [0x62,0xe2,0x4d,0x42,0x51,0x29] + vpdpbusds (%rcx), %zmm22, %zmm21 {%k2} + +// CHECK: vpdpbusds -256(%rsp), %zmm22, %zmm21 {%k2} +// CHECK: encoding: [0x62,0xe2,0x4d,0x42,0x51,0x6c,0x24,0xfc] + vpdpbusds -256(%rsp), %zmm22, %zmm21 {%k2} + +// CHECK: vpdpbusds 256(%rsp), %zmm22, %zmm21 {%k2} +// CHECK: encoding: [0x62,0xe2,0x4d,0x42,0x51,0x6c,0x24,0x04] + vpdpbusds 256(%rsp), %zmm22, %zmm21 {%k2} + +// CHECK: vpdpbusds 268435456(%rcx,%r14,8), %zmm22, %zmm21 {%k2} +// CHECK: encoding: [0x62,0xa2,0x4d,0x42,0x51,0xac,0xf1,0x00,0x00,0x00,0x10] + vpdpbusds 268435456(%rcx,%r14,8), %zmm22, %zmm21 {%k2} + +// CHECK: vpdpbusds -536870912(%rcx,%r14,8), %zmm22, %zmm21 {%k2} +// CHECK: encoding: [0x62,0xa2,0x4d,0x42,0x51,0xac,0xf1,0x00,0x00,0x00,0xe0] + vpdpbusds -536870912(%rcx,%r14,8), %zmm22, %zmm21 {%k2} + +// CHECK: vpdpbusds -536870910(%rcx,%r14,8), %zmm22, %zmm21 {%k2} +// CHECK: encoding: [0x62,0xa2,0x4d,0x42,0x51,0xac,0xf1,0x02,0x00,0x00,0xe0] + vpdpbusds -536870910(%rcx,%r14,8), %zmm22, %zmm21 {%k2} + +// CHECK: vpdpwssd (%rcx), %zmm22, %zmm21 {%k2} +// CHECK: encoding: [0x62,0xe2,0x4d,0x42,0x52,0x29] + vpdpwssd (%rcx), %zmm22, %zmm21 {%k2} + +// CHECK: vpdpwssd -256(%rsp), %zmm22, %zmm21 {%k2} +// CHECK: encoding: [0x62,0xe2,0x4d,0x42,0x52,0x6c,0x24,0xfc] + vpdpwssd -256(%rsp), %zmm22, %zmm21 {%k2} + +// CHECK: vpdpwssd 256(%rsp), %zmm22, %zmm21 {%k2} +// CHECK: encoding: [0x62,0xe2,0x4d,0x42,0x52,0x6c,0x24,0x04] + vpdpwssd 256(%rsp), %zmm22, %zmm21 {%k2} + +// CHECK: vpdpwssd 268435456(%rcx,%r14,8), %zmm22, %zmm21 {%k2} +// CHECK: encoding: [0x62,0xa2,0x4d,0x42,0x52,0xac,0xf1,0x00,0x00,0x00,0x10] + vpdpwssd 268435456(%rcx,%r14,8), %zmm22, %zmm21 {%k2} + +// CHECK: vpdpwssd -536870912(%rcx,%r14,8), %zmm22, %zmm21 {%k2} +// CHECK: encoding: [0x62,0xa2,0x4d,0x42,0x52,0xac,0xf1,0x00,0x00,0x00,0xe0] + vpdpwssd -536870912(%rcx,%r14,8), %zmm22, %zmm21 {%k2} + +// CHECK: vpdpwssd -536870910(%rcx,%r14,8), %zmm22, %zmm21 {%k2} +// CHECK: encoding: [0x62,0xa2,0x4d,0x42,0x52,0xac,0xf1,0x02,0x00,0x00,0xe0] + vpdpwssd -536870910(%rcx,%r14,8), %zmm22, %zmm21 {%k2} + +// CHECK: vpdpwssds (%rcx), %zmm22, %zmm21 {%k2} +// CHECK: encoding: [0x62,0xe2,0x4d,0x42,0x53,0x29] + vpdpwssds (%rcx), %zmm22, %zmm21 {%k2} + +// CHECK: vpdpwssds -256(%rsp), %zmm22, %zmm21 {%k2} +// CHECK: encoding: [0x62,0xe2,0x4d,0x42,0x53,0x6c,0x24,0xfc] + vpdpwssds -256(%rsp), %zmm22, %zmm21 {%k2} + +// CHECK: vpdpwssds 256(%rsp), %zmm22, %zmm21 {%k2} +// CHECK: encoding: [0x62,0xe2,0x4d,0x42,0x53,0x6c,0x24,0x04] + vpdpwssds 256(%rsp), %zmm22, %zmm21 {%k2} + +// CHECK: vpdpwssds 268435456(%rcx,%r14,8), %zmm22, %zmm21 {%k2} +// CHECK: encoding: [0x62,0xa2,0x4d,0x42,0x53,0xac,0xf1,0x00,0x00,0x00,0x10] + vpdpwssds 268435456(%rcx,%r14,8), %zmm22, %zmm21 {%k2} + +// CHECK: vpdpwssds -536870912(%rcx,%r14,8), %zmm22, %zmm21 {%k2} +// CHECK: encoding: [0x62,0xa2,0x4d,0x42,0x53,0xac,0xf1,0x00,0x00,0x00,0xe0] + vpdpwssds -536870912(%rcx,%r14,8), %zmm22, %zmm21 {%k2} + +// CHECK: vpdpwssds -536870910(%rcx,%r14,8), %zmm22, %zmm21 {%k2} +// CHECK: encoding: [0x62,0xa2,0x4d,0x42,0x53,0xac,0xf1,0x02,0x00,0x00,0xe0] + vpdpwssds -536870910(%rcx,%r14,8), %zmm22, %zmm21 {%k2} + -- 2.7.4