Enable AVX512_BF16 instructions, which are supported for BFLOAT16 in Cooper Lake
authorLuo, Yuanke <yuanke.luo@intel.com>
Mon, 6 May 2019 08:22:37 +0000 (08:22 +0000)
committerLuo, Yuanke <yuanke.luo@intel.com>
Mon, 6 May 2019 08:22:37 +0000 (08:22 +0000)
Summary:
1. Enable infrastructure of AVX512_BF16, which is supported for BFLOAT16 in Cooper Lake;
2. Enable VCVTNE2PS2BF16, VCVTNEPS2BF16 and DPBF16PS  instructions, which are Vector Neural Network Instructions supporting BFLOAT16 inputs and conversion instructions from IEEE single precision.
VCVTNE2PS2BF16: Convert Two Packed Single Data to One Packed BF16 Data.
VCVTNEPS2BF16: Convert Packed Single Data to Packed BF16 Data.
VDPBF16PS: Dot Product of BF16 Pairs Accumulated into Packed Single Precision.
For more details about BF16 isa, please refer to the latest ISE document: https://software.intel.com/en-us/download/intel-architecture-instruction-set-extensions-programming-reference

Author: LiuTianle

Reviewers: craig.topper, smaslov, LuoYuanke, wxiao3, annita.zhang, RKSimon, spatel

Reviewed By: craig.topper

Subscribers: kristina, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D60550

llvm-svn: 360017

28 files changed:
llvm/include/llvm/IR/IntrinsicsX86.td
llvm/lib/Support/Host.cpp
llvm/lib/Target/X86/X86.td
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/lib/Target/X86/X86ISelLowering.h
llvm/lib/Target/X86/X86InstrAVX512.td
llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
llvm/lib/Target/X86/X86InstrInfo.td
llvm/lib/Target/X86/X86IntrinsicsInfo.h
llvm/lib/Target/X86/X86Subtarget.h
test/CodeGen/X86/avx512bf16-intrinsics.ll [new file with mode: 0644]
test/CodeGen/X86/avx512bf16-vl-intrinsics.ll [new file with mode: 0644]
test/MC/Disassembler/X86/avx512bf16-att.txt [new file with mode: 0644]
test/MC/Disassembler/X86/avx512bf16-intel.txt [new file with mode: 0644]
test/MC/Disassembler/X86/avx512bf16vl-att.txt [new file with mode: 0644]
test/MC/Disassembler/X86/avx512bf16vl-intel.txt [new file with mode: 0644]
test/MC/Disassembler/X86/x86-64-avx512bf16-att.txt [new file with mode: 0644]
test/MC/Disassembler/X86/x86-64-avx512bf16-intel.txt [new file with mode: 0644]
test/MC/Disassembler/X86/x86-64-avx512bf16vl-att.txt [new file with mode: 0644]
test/MC/Disassembler/X86/x86-64-avx512bf16vl-intel.txt [new file with mode: 0644]
test/MC/X86/avx512_bf16-encoding.s [new file with mode: 0644]
test/MC/X86/avx512_bf16_vl-encoding.s [new file with mode: 0644]
test/MC/X86/intel-syntax-avx512_bf16.s [new file with mode: 0644]
test/MC/X86/intel-syntax-avx512_bf16_vl.s [new file with mode: 0644]
test/MC/X86/intel-syntax-x86-64-avx512_bf16.s [new file with mode: 0644]
test/MC/X86/intel-syntax-x86-64-avx512_bf16_vl.s [new file with mode: 0644]
test/MC/X86/x86-64-avx512_bf16-encoding.s [new file with mode: 0644]
test/MC/X86/x86-64-avx512_bf16_vl-encoding.s [new file with mode: 0644]

index 06b603a..2635e3d 100644 (file)
@@ -4834,3 +4834,41 @@ let TargetPrefix = "x86" in {
   def int_x86_invpcid : GCCBuiltin<"__builtin_ia32_invpcid">,
               Intrinsic<[], [llvm_i32_ty, llvm_ptr_ty], []>;
 }
+
+let TargetPrefix = "x86" in {
+  def int_x86_avx512bf16_cvtne2ps2bf16_128:
+              GCCBuiltin<"__builtin_ia32_cvtne2ps2bf16_128">,
+              Intrinsic<[llvm_v8i16_ty], [llvm_v4f32_ty, llvm_v4f32_ty],
+              [IntrNoMem]>;
+  def int_x86_avx512bf16_cvtne2ps2bf16_256:
+              GCCBuiltin<"__builtin_ia32_cvtne2ps2bf16_256">,
+              Intrinsic<[llvm_v16i16_ty], [llvm_v8f32_ty, llvm_v8f32_ty],
+              [IntrNoMem]>;
+  def int_x86_avx512bf16_cvtne2ps2bf16_512:
+              GCCBuiltin<"__builtin_ia32_cvtne2ps2bf16_512">,
+              Intrinsic<[llvm_v32i16_ty], [llvm_v16f32_ty, llvm_v16f32_ty],
+              [IntrNoMem]>;
+  // Intrinsic must be masked due to it producing less than 128 bits of results.
+  def int_x86_avx512bf16_mask_cvtneps2bf16_128:
+              Intrinsic<[llvm_v8i16_ty],
+                        [llvm_v4f32_ty, llvm_v8i16_ty, llvm_v4i1_ty],
+                        [IntrNoMem]>;
+  def int_x86_avx512bf16_cvtneps2bf16_256:
+              GCCBuiltin<"__builtin_ia32_cvtneps2bf16_256">,
+              Intrinsic<[llvm_v8i16_ty], [llvm_v8f32_ty], [IntrNoMem]>;
+  def int_x86_avx512bf16_cvtneps2bf16_512:
+              GCCBuiltin<"__builtin_ia32_cvtneps2bf16_512">,
+              Intrinsic<[llvm_v16i16_ty], [llvm_v16f32_ty], [IntrNoMem]>;
+  def int_x86_avx512bf16_dpbf16ps_128:
+              GCCBuiltin<"__builtin_ia32_dpbf16ps_128">,
+              Intrinsic<[llvm_v4f32_ty],
+              [llvm_v4f32_ty, llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>;
+  def int_x86_avx512bf16_dpbf16ps_256:
+              GCCBuiltin<"__builtin_ia32_dpbf16ps_256">,
+              Intrinsic<[llvm_v8f32_ty],
+              [llvm_v8f32_ty, llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>;
+  def int_x86_avx512bf16_dpbf16ps_512:
+              GCCBuiltin<"__builtin_ia32_dpbf16ps_512">,
+              Intrinsic<[llvm_v16f32_ty],
+              [llvm_v16f32_ty, llvm_v16i32_ty, llvm_v16i32_ty], [IntrNoMem]>;
+}
index 6936270..4a7eff3 100644 (file)
@@ -1375,6 +1375,9 @@ bool sys::getHostCPUFeatures(StringMap<bool> &Features) {
   // detecting features using the "-march=native" flag.
   // For more info, see X86 ISA docs.
   Features["pconfig"] = HasLeaf7 && ((EDX >> 18) & 1);
+  bool HasLeaf7Subleaf1 =
+      MaxLevel >= 7 && !getX86CpuIDAndInfoEx(0x7, 0x1, &EAX, &EBX, &ECX, &EDX);
+  Features["avx512bf16"] = HasLeaf7Subleaf1 && ((EAX >> 5) & 1) && HasAVX512Save;
 
   bool HasLeafD = MaxLevel >= 0xd &&
                   !getX86CpuIDAndInfoEx(0xd, 0x1, &EAX, &EBX, &ECX, &EDX);
index fe23a29..a799c1f 100644 (file)
@@ -167,6 +167,9 @@ def FeaturePKU   : SubtargetFeature<"pku", "HasPKU", "true",
 def FeatureVNNI    : SubtargetFeature<"avx512vnni", "HasVNNI", "true",
                           "Enable AVX-512 Vector Neural Network Instructions",
                                       [FeatureAVX512]>;
+def FeatureBF16    : SubtargetFeature<"avx512bf16", "HasBF16", "true",
+                           "Support bfloat16 floating point",
+                                      [FeatureBWI]>;
 def FeatureBITALG  : SubtargetFeature<"avx512bitalg", "HasBITALG", "true",
                        "Enable AVX-512 Bit Algorithms",
                         [FeatureBWI]>;
index 053fe90..fc100fe 100644 (file)
@@ -22624,6 +22624,21 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
                          PassThru, Mask);
 
     }
+    case CVTNEPS2BF16_MASK: {
+      SDValue Src = Op.getOperand(1);
+      SDValue PassThru = Op.getOperand(2);
+      SDValue Mask = Op.getOperand(3);
+
+      if (ISD::isBuildVectorAllOnes(Mask.getNode()))
+        return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(), Src);
+
+      // Break false dependency.
+      if (PassThru.isUndef())
+        PassThru = DAG.getConstant(0, dl, PassThru.getValueType());
+
+      return DAG.getNode(IntrData->Opc1, dl, Op.getValueType(), Src, PassThru,
+                         Mask);
+    }
     default:
       break;
     }
@@ -28073,6 +28088,10 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
   case X86ISD::CVTS2UI:            return "X86ISD::CVTS2UI";
   case X86ISD::CVTS2SI_RND:        return "X86ISD::CVTS2SI_RND";
   case X86ISD::CVTS2UI_RND:        return "X86ISD::CVTS2UI_RND";
+  case X86ISD::CVTNE2PS2BF16:      return "X86ISD::CVTNE2PS2BF16";
+  case X86ISD::CVTNEPS2BF16:       return "X86ISD::CVTNEPS2BF16";
+  case X86ISD::MCVTNEPS2BF16:      return "X86ISD::MCVTNEPS2BF16";
+  case X86ISD::DPBF16PS:           return "X86ISD::DPBF16PS";
   case X86ISD::LWPINS:             return "X86ISD::LWPINS";
   case X86ISD::MGATHER:            return "X86ISD::MGATHER";
   case X86ISD::MSCATTER:           return "X86ISD::MSCATTER";
index 78e414b..9b2f059 100644 (file)
@@ -509,6 +509,19 @@ namespace llvm {
       MCVTP2SI, MCVTP2UI, MCVTTP2SI, MCVTTP2UI,
       MCVTSI2P, MCVTUI2P,
 
+      // Vector float to bfloat16.
+      // Convert TWO packed single data to one packed BF16 data
+      CVTNE2PS2BF16, 
+      // Convert packed single data to packed BF16 data
+      CVTNEPS2BF16,
+      // Masked version of above.
+      // SRC, PASSTHRU, MASK
+      MCVTNEPS2BF16,
+
+      // Dot product of BF16 pairs to accumulated into
+      // packed single precision.
+      DPBF16PS,
+
       // Save xmm argument registers to the stack, according to %al. An operator
       // is needed so that this can be expanded with control flow.
       VASTART_SAVE_XMM_REGS,
index d857b8e..d0d255b 100644 (file)
@@ -12647,3 +12647,143 @@ defm VP4DPWSSDSrm : AVX512_maskable_3src_in_asm<0x53, MRMSrcMem, v16i32_info,
                      Sched<[SchedWriteFMA.ZMM.Folded]>;
 }
 
+multiclass avx512_binop_all2<bits<8> opc, string OpcodeStr,
+                             X86SchedWriteWidths sched,
+                             AVX512VLVectorVTInfo _SrcVTInfo,
+                             AVX512VLVectorVTInfo _DstVTInfo,
+                             SDNode OpNode, Predicate prd,
+                             bit IsCommutable = 0> {
+  let Predicates = [prd] in
+    defm NAME#Z : avx512_binop_rm2<opc, OpcodeStr, sched.ZMM, OpNode,
+                                   _SrcVTInfo.info512, _DstVTInfo.info512,
+                                   _SrcVTInfo.info512, IsCommutable>,
+                                   EVEX_V512, EVEX_CD8<32, CD8VF>;
+  let Predicates = [HasVLX, prd] in {
+    defm NAME#Z256 : avx512_binop_rm2<opc, OpcodeStr, sched.YMM, OpNode,
+                                      _SrcVTInfo.info256, _DstVTInfo.info256,
+                                      _SrcVTInfo.info256, IsCommutable>,
+                                     EVEX_V256, EVEX_CD8<32, CD8VF>;
+    defm NAME#Z128 : avx512_binop_rm2<opc, OpcodeStr, sched.XMM, OpNode,
+                                      _SrcVTInfo.info128, _DstVTInfo.info128,
+                                      _SrcVTInfo.info128, IsCommutable>,
+                                      EVEX_V128, EVEX_CD8<32, CD8VF>;
+  }
+}
+
+defm VCVTNE2PS2BF16 : avx512_binop_all2<0x72, "vcvtne2ps2bf16",
+                                        SchedWriteCvtPD2PS, //FIXME: Shoulod be SchedWriteCvtPS2BF
+                                        avx512vl_f32_info, avx512vl_i16_info,
+                                        X86cvtne2ps2bf16, HasBF16, 0>, T8XD;
+
+// Truncate Float to BFloat16
+multiclass avx512_cvtps2bf16<bits<8> opc, string OpcodeStr,
+                             X86SchedWriteWidths sched> {
+  let Predicates = [HasBF16] in {
+    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i16x_info, v16f32_info,
+                            X86cvtneps2bf16, sched.ZMM>, EVEX_V512;
+  }
+  let Predicates = [HasBF16, HasVLX] in {
+    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v8i16x_info, v4f32x_info,
+                               null_frag, sched.XMM, "{1to4}", "{x}", f128mem,
+                               VK4WM>, EVEX_V128;
+    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i16x_info, v8f32x_info,
+                               X86cvtneps2bf16,
+                               sched.YMM, "{1to8}", "{y}">, EVEX_V256;
+
+    def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
+                    (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst,
+                    VR128X:$src), 0>;
+    def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
+                    (!cast<Instruction>(NAME # "Z128rm") VR128X:$dst,
+                    f128mem:$src), 0, "intel">;
+    def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
+                    (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst,
+                    VR256X:$src), 0>;
+    def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
+                    (!cast<Instruction>(NAME # "Z256rm") VR128X:$dst,
+                    f256mem:$src), 0, "intel">;
+  }
+}
+
+defm VCVTNEPS2BF16 : avx512_cvtps2bf16<0x72, "vcvtneps2bf16",
+                                       SchedWriteCvtPD2PS>, T8XS,
+                                       EVEX_CD8<32, CD8VF>;
+
+let Predicates = [HasBF16, HasVLX] in {
+  // Special patterns to allow use of X86mcvtneps2bf16 for masking. Instruction
+  // patterns have been disabled with null_frag.
+  def : Pat<(v8i16 (X86cvtneps2bf16 (v4f32 VR128X:$src))),
+            (VCVTNEPS2BF16Z128rr VR128X:$src)>;
+  def : Pat<(X86mcvtneps2bf16 (v4f32 VR128X:$src), (v8i16 VR128X:$src0),
+                              VK4WM:$mask),
+            (VCVTNEPS2BF16Z128rrk VR128X:$src0, VK4WM:$mask, VR128X:$src)>;
+  def : Pat<(X86mcvtneps2bf16 (v4f32 VR128X:$src), v8i16x_info.ImmAllZerosV,
+                              VK4WM:$mask),
+            (VCVTNEPS2BF16Z128rrkz VK4WM:$mask, VR128X:$src)>;
+
+  def : Pat<(v8i16 (X86cvtneps2bf16 (loadv4f32 addr:$src))),
+            (VCVTNEPS2BF16Z128rm addr:$src)>;
+  def : Pat<(X86mcvtneps2bf16 (loadv4f32 addr:$src), (v8i16 VR128X:$src0),
+                              VK4WM:$mask),
+            (VCVTNEPS2BF16Z128rmk VR128X:$src0, VK4WM:$mask, addr:$src)>;
+  def : Pat<(X86mcvtneps2bf16 (loadv4f32 addr:$src), v8i16x_info.ImmAllZerosV,
+                              VK4WM:$mask),
+            (VCVTNEPS2BF16Z128rmkz VK4WM:$mask, addr:$src)>;
+
+  def : Pat<(v8i16 (X86cvtneps2bf16 (v4f32
+                                     (X86VBroadcast (loadf32 addr:$src))))),
+            (VCVTNEPS2BF16Z128rmb addr:$src)>;
+  def : Pat<(X86mcvtneps2bf16 (v4f32 (X86VBroadcast (loadf32 addr:$src))),
+                              (v8i16 VR128X:$src0), VK4WM:$mask),
+            (VCVTNEPS2BF16Z128rmbk VR128X:$src0, VK4WM:$mask, addr:$src)>;
+  def : Pat<(X86mcvtneps2bf16 (v4f32 (X86VBroadcast (loadf32 addr:$src))),
+                              v8i16x_info.ImmAllZerosV, VK4WM:$mask),
+            (VCVTNEPS2BF16Z128rmbkz VK4WM:$mask, addr:$src)>;
+}
+
+let Constraints = "$src1 = $dst" in {
+multiclass avx512_dpbf16ps_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
+                              X86VectorVTInfo _, X86VectorVTInfo src_v> {
+  defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
+                           (ins _.RC:$src2, _.RC:$src3),
+                           OpcodeStr, "$src3, $src2", "$src2, $src3",
+                           (_.VT (OpNode _.RC:$src1, _.RC:$src2, _.RC:$src3))>,
+                           EVEX_4V;
+
+  defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
+                               (ins _.RC:$src2, _.MemOp:$src3),
+                               OpcodeStr, "$src3, $src2", "$src2, $src3",
+                               (_.VT (OpNode _.RC:$src1, _.RC:$src2,
+                               (src_v.VT (bitconvert
+                               (src_v.LdFrag addr:$src3)))))>, EVEX_4V;
+
+  defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
+                  (ins _.RC:$src2, _.ScalarMemOp:$src3),
+                  OpcodeStr,
+                  !strconcat("${src3}", _.BroadcastStr,", $src2"),
+                  !strconcat("$src2, ${src3}", _.BroadcastStr),
+                  (_.VT (OpNode _.RC:$src1, _.RC:$src2,
+                  (src_v.VT (X86VBroadcast(src_v.ScalarLdFrag addr:$src3)))))>,
+                  EVEX_B, EVEX_4V;
+
+}
+} // Constraints = "$src1 = $dst"
+
+multiclass avx512_dpbf16ps_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
+                                 AVX512VLVectorVTInfo _,
+                                 AVX512VLVectorVTInfo src_v, Predicate prd> {
+  let Predicates = [prd] in {
+    defm Z    : avx512_dpbf16ps_rm<opc, OpcodeStr, OpNode, _.info512,
+                                   src_v.info512>, EVEX_V512;
+  }
+  let Predicates = [HasVLX, prd] in {
+    defm Z256 : avx512_dpbf16ps_rm<opc, OpcodeStr, OpNode, _.info256,
+                                   src_v.info256>, EVEX_V256;
+    defm Z128 : avx512_dpbf16ps_rm<opc, OpcodeStr, OpNode, _.info128,
+                                   src_v.info128>, EVEX_V128;
+  }
+}
+
+defm VDPBF16PS : avx512_dpbf16ps_sizes<0x52, "vdpbf16ps", X86dpbf16ps,
+                                       avx512vl_f32_info, avx512vl_i32_info,
+                                       HasBF16>, T8XS, EVEX_CD8<32, CD8VF>;
index 4d4d5fa..d79959e 100644 (file)
@@ -664,6 +664,25 @@ def X86vfproundRnd: SDNode<"X86ISD::VFPROUND_RND",
                                              SDTCisOpSmallerThanOp<0, 1>,
                                              SDTCisVT<2, i32>]>>;
 
+// cvt fp to bfloat16
+def X86cvtne2ps2bf16 : SDNode<"X86ISD::CVTNE2PS2BF16",
+                       SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>,
+                                            SDTCisSameAs<1,2>]>>;
+def X86mcvtneps2bf16 : SDNode<"X86ISD::MCVTNEPS2BF16",
+                       SDTypeProfile<1, 3, [SDTCVecEltisVT<0, i16>,
+                                            SDTCVecEltisVT<1, f32>,
+                                            SDTCisSameAs<0, 2>,
+                                            SDTCVecEltisVT<3, i1>,
+                                            SDTCisSameNumEltsAs<1, 3>]>>;
+def X86cvtneps2bf16 :  SDNode<"X86ISD::CVTNEPS2BF16",
+                       SDTypeProfile<1, 1, [SDTCVecEltisVT<0, i16>,
+                                            SDTCVecEltisVT<1, f32>]>>;
+def X86dpbf16ps :      SDNode<"X86ISD::DPBF16PS",
+                       SDTypeProfile<1, 3, [SDTCVecEltisVT<0, f32>,
+                                            SDTCisSameAs<0,1>,
+                                            SDTCVecEltisVT<2, i32>,
+                                            SDTCisSameAs<2,3>]>>;
+
 // galois field arithmetic
 def X86GF2P8affineinvqb : SDNode<"X86ISD::GF2P8AFFINEINVQB", SDTBlend>;
 def X86GF2P8affineqb    : SDNode<"X86ISD::GF2P8AFFINEQB", SDTBlend>;
index 0176c2d..56bc050 100644 (file)
@@ -835,6 +835,7 @@ def NoVLX_Or_NoBWI : Predicate<"!Subtarget->hasVLX() || !Subtarget->hasBWI()">;
 def NoVLX_Or_NoDQI : Predicate<"!Subtarget->hasVLX() || !Subtarget->hasDQI()">;
 def PKU        : Predicate<"Subtarget->hasPKU()">;
 def HasVNNI    : Predicate<"Subtarget->hasVNNI()">;
+def HasBF16      : Predicate<"Subtarget->hasBF16()">;
 
 def HasBITALG    : Predicate<"Subtarget->hasBITALG()">;
 def HasPOPCNT    : Predicate<"Subtarget->hasPOPCNT()">;
index 5416876..40141d8 100644 (file)
@@ -19,6 +19,7 @@
 namespace llvm {
 
 enum IntrinsicType : uint16_t {
+  CVTNEPS2BF16_MASK,
   GATHER, SCATTER, PREFETCH, RDSEED, RDRAND, RDPMC, RDTSC, XTEST, XGETBV, ADX, FPCLASSS,
   INTR_TYPE_1OP, INTR_TYPE_2OP, INTR_TYPE_3OP, INTR_TYPE_4OP,
   INTR_TYPE_3OP_IMM8,
@@ -981,6 +982,16 @@ static const IntrinsicData  IntrinsicsWithoutChain[] = {
   X86_INTRINSIC_DATA(avx512_vpshufbitqmb_128, INTR_TYPE_2OP, X86ISD::VPSHUFBITQMB, 0),
   X86_INTRINSIC_DATA(avx512_vpshufbitqmb_256, INTR_TYPE_2OP, X86ISD::VPSHUFBITQMB, 0),
   X86_INTRINSIC_DATA(avx512_vpshufbitqmb_512, INTR_TYPE_2OP, X86ISD::VPSHUFBITQMB, 0),
+  // bfloat16
+  X86_INTRINSIC_DATA(avx512bf16_cvtne2ps2bf16_128, INTR_TYPE_2OP, X86ISD::CVTNE2PS2BF16, 0),
+  X86_INTRINSIC_DATA(avx512bf16_cvtne2ps2bf16_256, INTR_TYPE_2OP, X86ISD::CVTNE2PS2BF16, 0),
+  X86_INTRINSIC_DATA(avx512bf16_cvtne2ps2bf16_512, INTR_TYPE_2OP, X86ISD::CVTNE2PS2BF16, 0),
+  X86_INTRINSIC_DATA(avx512bf16_cvtneps2bf16_256, INTR_TYPE_1OP, X86ISD::CVTNEPS2BF16, 0),
+  X86_INTRINSIC_DATA(avx512bf16_cvtneps2bf16_512, INTR_TYPE_1OP, X86ISD::CVTNEPS2BF16, 0),
+  X86_INTRINSIC_DATA(avx512bf16_dpbf16ps_128, INTR_TYPE_3OP, X86ISD::DPBF16PS, 0),
+  X86_INTRINSIC_DATA(avx512bf16_dpbf16ps_256, INTR_TYPE_3OP, X86ISD::DPBF16PS, 0),
+  X86_INTRINSIC_DATA(avx512bf16_dpbf16ps_512, INTR_TYPE_3OP, X86ISD::DPBF16PS, 0),
+  X86_INTRINSIC_DATA(avx512bf16_mask_cvtneps2bf16_128, CVTNEPS2BF16_MASK, X86ISD::CVTNEPS2BF16, X86ISD::MCVTNEPS2BF16),
   X86_INTRINSIC_DATA(bmi_bextr_32,         INTR_TYPE_2OP, X86ISD::BEXTR, 0),
   X86_INTRINSIC_DATA(bmi_bextr_64,         INTR_TYPE_2OP, X86ISD::BEXTR, 0),
   X86_INTRINSIC_DATA(bmi_bzhi_32,          INTR_TYPE_2OP, X86ISD::BZHI, 0),
index 0ff9d54..3b11bb1 100644 (file)
@@ -353,6 +353,9 @@ protected:
   /// Processor has AVX-512 Vector Neural Network Instructions
   bool HasVNNI = false;
 
+  /// Processor has AVX-512 bfloat16 floating-point extensions
+  bool HasBF16 = false;
+
   /// Processor has AVX-512 Bit Algorithms instructions
   bool HasBITALG = false;
 
@@ -668,6 +671,7 @@ public:
   bool hasVLX() const { return HasVLX; }
   bool hasPKU() const { return HasPKU; }
   bool hasVNNI() const { return HasVNNI; }
+  bool hasBF16() const { return HasBF16; }
   bool hasBITALG() const { return HasBITALG; }
   bool hasMPX() const { return HasMPX; }
   bool hasSHSTK() const { return HasSHSTK; }
diff --git a/test/CodeGen/X86/avx512bf16-intrinsics.ll b/test/CodeGen/X86/avx512bf16-intrinsics.ll
new file mode 100644 (file)
index 0000000..7b64c57
--- /dev/null
@@ -0,0 +1,160 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512bf16 --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bf16 --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64
+
+declare <32 x i16> @llvm.x86.avx512bf16.cvtne2ps2bf16.512(<16 x float>, <16 x float>) #3
+
+define <8 x i64> @test_mm512_cvtne2ps2bf16_512(<16 x float> %A, <16 x float> %B) local_unnamed_addr #2 {
+; CHECK-LABEL: test_mm512_cvtne2ps2bf16_512:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vcvtne2ps2bf16 %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf2,0x7f,0x48,0x72,0xc1]
+; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
+entry:
+  %0 = tail call <32 x i16> @llvm.x86.avx512bf16.cvtne2ps2bf16.512(<16 x float> %A, <16 x float> %B) #4
+  %1 = bitcast <32 x i16> %0 to <8 x i64>
+  ret <8 x i64> %1
+}
+
+define <8 x i64> @test_mm512_maskz_cvtne2ps2bf16_512(<16 x float> %A, <16 x float> %B, i32 %U) local_unnamed_addr #2 {
+; X86-LABEL: test_mm512_maskz_cvtne2ps2bf16_512:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vcvtne2ps2bf16 %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7f,0xc9,0x72,0xc1]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_mm512_maskz_cvtne2ps2bf16_512:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vcvtne2ps2bf16 %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7f,0xc9,0x72,0xc1]
+; X64-NEXT:    retq # encoding: [0xc3]
+entry:
+  %0 = tail call <32 x i16> @llvm.x86.avx512bf16.cvtne2ps2bf16.512(<16 x float> %A, <16 x float> %B) #4
+  %1 = bitcast i32 %U to <32 x i1>
+  %2 = select <32 x i1> %1, <32 x i16> %0, <32 x i16> zeroinitializer
+  %3 = bitcast <32 x i16> %2 to <8 x i64>
+  ret <8 x i64> %3
+}
+
+define <8 x i64> @test_mm512_mask_cvtne2ps2bf16_512(<8 x i64> %C, i32 %U, <16 x float> %A, <16 x float> %B) local_unnamed_addr #2 {
+; X86-LABEL: test_mm512_mask_cvtne2ps2bf16_512:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vcvtne2ps2bf16 %zmm2, %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf2,0x77,0x49,0x72,0xc2]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_mm512_mask_cvtne2ps2bf16_512:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vcvtne2ps2bf16 %zmm2, %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf2,0x77,0x49,0x72,0xc2]
+; X64-NEXT:    retq # encoding: [0xc3]
+entry:
+  %0 = tail call <32 x i16> @llvm.x86.avx512bf16.cvtne2ps2bf16.512(<16 x float> %A, <16 x float> %B) #4
+  %1 = bitcast <8 x i64> %C to <32 x i16>
+  %2 = bitcast i32 %U to <32 x i1>
+  %3 = select <32 x i1> %2, <32 x i16> %0, <32 x i16> %1
+  %4 = bitcast <32 x i16> %3 to <8 x i64>
+  ret <8 x i64> %4
+}
+
+declare <16 x i16> @llvm.x86.avx512bf16.cvtneps2bf16.512(<16 x float>) #3
+
+define <4 x i64> @test_mm512_cvtneps2bf16_512(<16 x float> %A) local_unnamed_addr #2 {
+; CHECK-LABEL: test_mm512_cvtneps2bf16_512:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vcvtneps2bf16 %zmm0, %ymm0 # encoding: [0x62,0xf2,0x7e,0x48,0x72,0xc0]
+; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
+entry:
+  %0 = tail call <16 x i16> @llvm.x86.avx512bf16.cvtneps2bf16.512(<16 x float> %A) #4
+  %1 = bitcast <16 x i16> %0 to <4 x i64>
+  ret <4 x i64> %1
+}
+
+define <4 x i64> @test_mm512_maskz_cvtneps2bf16_512(<16 x float> %A, i16 %U) local_unnamed_addr #2 {
+; X86-LABEL: test_mm512_maskz_cvtneps2bf16_512:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:   kmovw 4(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vcvtneps2bf16 %zmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xc9,0x72,0xc0]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_mm512_maskz_cvtneps2bf16_512:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vcvtneps2bf16 %zmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xc9,0x72,0xc0]
+; X64-NEXT:    retq # encoding: [0xc3]
+entry:
+  %0 = tail call <16 x i16> @llvm.x86.avx512bf16.cvtneps2bf16.512(<16 x float> %A) #4
+  %1 = bitcast i16 %U to <16 x i1>
+  %2 = select <16 x i1> %1, <16 x i16> %0, <16 x i16> zeroinitializer
+  %3 = bitcast <16 x i16> %2 to <4 x i64>
+  ret <4 x i64> %3
+}
+
+define <4 x i64> @test_mm512_mask_cvtneps2bf16_512(<4 x i64> %C, i16 %U, <16 x float> %A) local_unnamed_addr #2 {
+; X86-LABEL: test_mm512_mask_cvtneps2bf16_512:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:   kmovw 4(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vcvtneps2bf16 %zmm1, %ymm0 {%k1} # encoding: [0x62,0xf2,0x7e,0x49,0x72,0xc1]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_mm512_mask_cvtneps2bf16_512:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vcvtneps2bf16 %zmm1, %ymm0 {%k1} # encoding: [0x62,0xf2,0x7e,0x49,0x72,0xc1]
+; X64-NEXT:    retq # encoding: [0xc3]
+entry:
+  %0 = tail call <16 x i16> @llvm.x86.avx512bf16.cvtneps2bf16.512(<16 x float> %A) #4
+  %1 = bitcast <4 x i64> %C to <16 x i16>
+  %2 = bitcast i16 %U to <16 x i1>
+  %3 = select <16 x i1> %2, <16 x i16> %0, <16 x i16> %1
+  %4 = bitcast <16 x i16> %3 to <4 x i64>
+  ret <4 x i64> %4
+}
+
+declare <16 x float> @llvm.x86.avx512bf16.dpbf16ps.512(<16 x float>, <16 x i32>, <16 x i32>) #3
+
+define <16 x float> @test_mm512_dpbf16ps_512(<16 x float> %E, <16 x i32> %A, <16 x i32> %B) local_unnamed_addr #2 {
+; CHECK-LABEL: test_mm512_dpbf16ps_512:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vdpbf16ps       %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0x76,0x48,0x52,0xc2]
+; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
+entry:
+  %0 = tail call <16 x float> @llvm.x86.avx512bf16.dpbf16ps.512(<16 x float> %E, <16 x i32> %A, <16 x i32> %B) #4
+  ret <16 x float> %0
+}
+
+define <16 x float> @test_mm512_maskz_dpbf16ps_512(<16 x float> %E, <16 x i32> %A, <16 x i32> %B, i16 zeroext %U) local_unnamed_addr #2 {
+; X86-LABEL: test_mm512_maskz_dpbf16ps_512:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    kmovw   4(%esp), %k1            # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vdpbf16ps       %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x76,0xc9,0x52,0xc2]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_mm512_maskz_dpbf16ps_512:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vdpbf16ps       %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x76,0xc9,0x52,0xc2]
+; X64-NEXT:    retq # encoding: [0xc3]
+entry:
+  %0 = tail call <16 x float> @llvm.x86.avx512bf16.dpbf16ps.512(<16 x float> %E, <16 x i32> %A, <16 x i32> %B) #4
+  %1 = bitcast i16 %U to <16 x i1>
+  %2 = select <16 x i1> %1, <16 x float> %0, <16 x float> zeroinitializer
+  ret <16 x float> %2
+}
+define <16 x float> @test_mm512_mask_dpbf16ps_512(i16 zeroext %U, <16 x float> %E, <16 x i32> %A, <16 x i32> %B) local_unnamed_addr #2 {
+; X86-LABEL: test_mm512_mask_dpbf16ps_512:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    kmovw   4(%esp), %k1            # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vdpbf16ps       %zmm2, %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf2,0x76,0x49,0x52,0xc2]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_mm512_mask_dpbf16ps_512:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vdpbf16ps       %zmm2, %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf2,0x76,0x49,0x52,0xc2]
+; X64-NEXT:    retq # encoding: [0xc3]
+entry:
+  %0 = tail call <16 x float> @llvm.x86.avx512bf16.dpbf16ps.512(<16 x float> %E, <16 x i32> %A, <16 x i32> %B) #4
+  %1 = bitcast i16 %U to <16 x i1>
+  %2 = select <16 x i1> %1, <16 x float> %0, <16 x float> %E
+  ret <16 x float> %2
+}
diff --git a/test/CodeGen/X86/avx512bf16-vl-intrinsics.ll b/test/CodeGen/X86/avx512bf16-vl-intrinsics.ll
new file mode 100644 (file)
index 0000000..b497ff7
--- /dev/null
@@ -0,0 +1,358 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512bf16 -mattr=+avx512vl --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bf16 -mattr=+avx512vl --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64
+
+declare <8 x i16> @llvm.x86.avx512bf16.cvtne2ps2bf16.128(<4 x float>, <4 x float>) #1
+
+define <2 x i64> @test_mm_cvtne2ps2bf16_128(<4 x float> %A, <4 x float> %B) local_unnamed_addr #0 {
+; CHECK-LABEL: test_mm_cvtne2ps2bf16_128:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vcvtne2ps2bf16 %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf2,0x7f,0x08,0x72,0xc1]
+; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
+entry:
+  %0 = tail call <8 x i16> @llvm.x86.avx512bf16.cvtne2ps2bf16.128(<4 x float> %A, <4 x float> %B) #2
+  %1 = bitcast <8 x i16> %0 to <2 x i64>
+  ret <2 x i64> %1
+}
+
+define <2 x i64> @test_mm_maskz_cvtne2ps2bf16_128(<4 x float> %A, <4 x float> %B, i8 zeroext %U) local_unnamed_addr #0 {
+; X86-LABEL: test_mm_maskz_cvtne2ps2bf16_128:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
+; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
+; X86-NEXT:    vcvtne2ps2bf16 %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7f,0x89,0x72,0xc1]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_mm_maskz_cvtne2ps2bf16_128:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vcvtne2ps2bf16 %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7f,0x89,0x72,0xc1]
+; X64-NEXT:    retq # encoding: [0xc3]
+entry:
+  %0 = tail call <8 x i16> @llvm.x86.avx512bf16.cvtne2ps2bf16.128(<4 x float> %A, <4 x float> %B) #2
+  %1 = bitcast i8 %U to <8 x i1>
+  %2 = select <8 x i1> %1, <8 x i16> %0, <8 x i16> zeroinitializer
+  %3 = bitcast <8 x i16> %2 to <2 x i64>
+  ret <2 x i64> %3
+}
+
+define <2 x i64> @test_mm_mask_cvtne2ps2bf16_128(<2 x i64> %C, i8 zeroext %U, <4 x float> %A, <4 x float> %B) local_unnamed_addr #0 {
+; X86-LABEL: test_mm_mask_cvtne2ps2bf16_128:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
+; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
+; X86-NEXT:    vcvtne2ps2bf16 %xmm2, %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0x77,0x09,0x72,0xc2]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_mm_mask_cvtne2ps2bf16_128:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vcvtne2ps2bf16 %xmm2, %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0x77,0x09,0x72,0xc2]
+; X64-NEXT:    retq # encoding: [0xc3]
+entry:
+  %0 = tail call <8 x i16> @llvm.x86.avx512bf16.cvtne2ps2bf16.128(<4 x float> %A, <4 x float> %B) #2
+  %1 = bitcast <2 x i64> %C to <8 x i16>
+  %2 = bitcast i8 %U to <8 x i1>
+  %3 = select <8 x i1> %2, <8 x i16> %0, <8 x i16> %1
+  %4 = bitcast <8 x i16> %3 to <2 x i64>
+  ret <2 x i64> %4
+}
+
+declare <16 x i16> @llvm.x86.avx512bf16.cvtne2ps2bf16.256(<8 x float>, <8 x float>) #3
+
+define <4 x i64> @test_mm256_cvtne2ps2bf16_256(<8 x float> %A, <8 x float> %B) local_unnamed_addr #1 {
+; CHECK-LABEL: test_mm256_cvtne2ps2bf16_256:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vcvtne2ps2bf16 %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf2,0x7f,0x28,0x72,0xc1]
+; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
+entry:
+  %0 = tail call <16 x i16> @llvm.x86.avx512bf16.cvtne2ps2bf16.256(<8 x float> %A, <8 x float> %B) #4
+  %1 = bitcast <16 x i16> %0 to <4 x i64>
+  ret <4 x i64> %1
+}
+
+define <4 x i64> @test_mm256_maskz_cvtne2ps2bf16_256(<8 x float> %A, <8 x float> %B, i16 zeroext %U) local_unnamed_addr #1 {
+; X86-LABEL: test_mm256_maskz_cvtne2ps2bf16_256:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vcvtne2ps2bf16 %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7f,0xa9,0x72,0xc1]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_mm256_maskz_cvtne2ps2bf16_256:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vcvtne2ps2bf16 %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7f,0xa9,0x72,0xc1]
+; X64-NEXT:    retq # encoding: [0xc3]
+entry:
+  %0 = tail call <16 x i16> @llvm.x86.avx512bf16.cvtne2ps2bf16.256(<8 x float> %A, <8 x float> %B) #4
+  %1 = bitcast i16 %U to <16 x i1>
+  %2 = select <16 x i1> %1, <16 x i16> %0, <16 x i16> zeroinitializer
+  %3 = bitcast <16 x i16> %2 to <4 x i64>
+  ret <4 x i64> %3
+}
+
+define <4 x i64> @test_mm256_mask_cvtne2ps2bf16_256(<4 x i64> %C, i16 zeroext %U, <8 x float> %A, <8 x float> %B) local_unnamed_addr #1 {
+; X86-LABEL: test_mm256_mask_cvtne2ps2bf16_256:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vcvtne2ps2bf16 %ymm2, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf2,0x77,0x29,0x72,0xc2]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_mm256_mask_cvtne2ps2bf16_256:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vcvtne2ps2bf16 %ymm2, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf2,0x77,0x29,0x72,0xc2]
+; X64-NEXT:    retq # encoding: [0xc3]
+entry:
+  %0 = tail call <16 x i16> @llvm.x86.avx512bf16.cvtne2ps2bf16.256(<8 x float> %A, <8 x float> %B) #4
+  %1 = bitcast <4 x i64> %C to <16 x i16>
+  %2 = bitcast i16 %U to <16 x i1>
+  %3 = select <16 x i1> %2, <16 x i16> %0, <16 x i16> %1
+  %4 = bitcast <16 x i16> %3 to <4 x i64>
+  ret <4 x i64> %4
+}
+
+declare <8 x i16> @llvm.x86.avx512bf16.cvtneps2bf16.256(<8 x float>) #3
+
+define <2 x i64> @test_mm256_cvtneps2bf16_256(<8 x float> %A) local_unnamed_addr #2 {
+; CHECK-LABEL: test_mm256_cvtneps2bf16_256:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vcvtneps2bf16 %ymm0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x28,0x72,0xc0]
+; CHECK-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
+; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
+entry:
+  %0 = tail call <8 x i16> @llvm.x86.avx512bf16.cvtneps2bf16.256(<8 x float> %A) #4
+  %1 = bitcast <8 x i16> %0 to <2 x i64>
+  ret <2 x i64> %1
+}
+
+define <2 x i64> @test_mm256_maskz_cvtneps2bf16_256(<8 x float> %A, i8 zeroext %U) local_unnamed_addr #2 {
+; X86-LABEL: test_mm256_maskz_cvtneps2bf16_256:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
+; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
+; X86-NEXT:    vcvtneps2bf16 %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x72,0xc0]
+; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_mm256_maskz_cvtneps2bf16_256:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vcvtneps2bf16 %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x72,0xc0]
+; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
+; X64-NEXT:    retq # encoding: [0xc3]
+entry:
+  %0 = tail call <8 x i16> @llvm.x86.avx512bf16.cvtneps2bf16.256(<8 x float> %A) #4
+  %1 = bitcast i8 %U to <8 x i1>
+  %2 = select <8 x i1> %1, <8 x i16> %0, <8 x i16> zeroinitializer
+  %3 = bitcast <8 x i16> %2 to <2 x i64>
+  ret <2 x i64> %3
+}
+
+define <2 x i64> @test_mm256_mask_cvtneps2bf16_256(<2 x i64> %C, i8 zeroext %U, <8 x float> %A) local_unnamed_addr #2 {
+; X86-LABEL: test_mm256_mask_cvtneps2bf16_256:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
+; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
+; X86-NEXT:    vcvtneps2bf16 %ymm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x72,0xc1]
+; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_mm256_mask_cvtneps2bf16_256:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vcvtneps2bf16 %ymm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x72,0xc1]
+; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
+; X64-NEXT:    retq # encoding: [0xc3]
+entry:
+  %0 = tail call <8 x i16> @llvm.x86.avx512bf16.cvtneps2bf16.256(<8 x float> %A) #4
+  %1 = bitcast <2 x i64> %C to <8 x i16>
+  %2 = bitcast i8 %U to <8 x i1>
+  %3 = select <8 x i1> %2, <8 x i16> %0, <8 x i16> %1
+  %4 = bitcast <8 x i16> %3 to <2 x i64>
+  ret <2 x i64> %4
+}
+
+declare <8 x i16> @llvm.x86.avx512bf16.mask.cvtneps2bf16.128(<4 x float>, <8 x i16>, <4 x i1>) #3
+
+define <2 x i64> @test_mm128_cvtneps2bf16_128(<4 x float> %A) local_unnamed_addr #2 {
+; CHECK-LABEL: test_mm128_cvtneps2bf16_128:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vcvtneps2bf16 %xmm0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x72,0xc0]
+; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
+entry:
+  %0 = tail call <8 x i16> @llvm.x86.avx512bf16.mask.cvtneps2bf16.128(<4 x float> %A, <8 x i16> undef, <4 x i1> <i1 true, i1 true, i1 true, i1 true>) #4
+  %1 = bitcast <8 x i16> %0 to <2 x i64>
+  ret <2 x i64> %1
+}
+
+define <2 x i64> @test_mm128_maskz_cvtneps2bf16_128(<4 x float> %A, i8 zeroext %U) local_unnamed_addr #2 {
+; X86-LABEL: test_mm128_maskz_cvtneps2bf16_128:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
+; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
+; X86-NEXT:    vcvtneps2bf16 %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x72,0xc0]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_mm128_maskz_cvtneps2bf16_128:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vcvtneps2bf16 %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x72,0xc0]
+; X64-NEXT:    retq # encoding: [0xc3]
+entry:
+  %0 = bitcast i8 %U to <8 x i1>
+  %1 = shufflevector <8 x i1> %0, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %2 = tail call <8 x i16> @llvm.x86.avx512bf16.mask.cvtneps2bf16.128(<4 x float> %A, <8 x i16> zeroinitializer, <4 x i1> %1) #4
+  %3 = bitcast <8 x i16> %2 to <2 x i64>
+  ret <2 x i64> %3
+}
+
+define <2 x i64> @test_mm128_mask_cvtneps2bf16_128(<2 x i64> %C, i8 zeroext %U, <4 x float> %A) local_unnamed_addr #2 {
+; X86-LABEL: test_mm128_mask_cvtneps2bf16_128:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
+; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
+; X86-NEXT:    vcvtneps2bf16 %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x72,0xc1]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_mm128_mask_cvtneps2bf16_128:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vcvtneps2bf16 %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x72,0xc1]
+; X64-NEXT:    retq # encoding: [0xc3]
+entry:
+  %0 = bitcast i8 %U to <8 x i1>
+  %1 = shufflevector <8 x i1> %0, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %2 = bitcast <2 x i64> %C to <8 x i16>
+  %3 = tail call <8 x i16> @llvm.x86.avx512bf16.mask.cvtneps2bf16.128(<4 x float> %A, <8 x i16> %2, <4 x i1> %1) #4
+  %4 = bitcast <8 x i16> %3 to <2 x i64>
+  ret <2 x i64> %4
+}
+
+; Make sure we don't fold a select into the 128 bit form of cvtneps2bf16. It
+; always writes zeros to bits 127:64 regardless of mask.
+define <2 x i64> @test_mm128_cvtneps2bf16_128_select(<2 x i64> %C, i8 zeroext %U, <4 x float> %A) local_unnamed_addr #2 {
+; X86-LABEL: test_mm128_cvtneps2bf16_128_select:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
+; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
+; X86-NEXT:    vcvtneps2bf16 %xmm1, %xmm1 # encoding: [0x62,0xf2,0x7e,0x08,0x72,0xc9]
+; X86-NEXT:    vmovdqu16 %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf1,0xff,0x09,0x6f,0xc1]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_mm128_cvtneps2bf16_128_select:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vcvtneps2bf16 %xmm1, %xmm1 # encoding: [0x62,0xf2,0x7e,0x08,0x72,0xc9]
+; X64-NEXT:    vmovdqu16 %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf1,0xff,0x09,0x6f,0xc1]
+; X64-NEXT:    retq # encoding: [0xc3]
+entry:
+  %0 = bitcast i8 %U to <8 x i1>
+  %1 = bitcast <2 x i64> %C to <8 x i16>
+  %2 = tail call <8 x i16> @llvm.x86.avx512bf16.mask.cvtneps2bf16.128(<4 x float> %A, <8 x i16> undef, <4 x i1> <i1 true, i1 true, i1 true, i1 true>) #4
+  %3 = select <8 x i1> %0, <8 x i16> %2, <8 x i16> %1
+  %4 = bitcast <8 x i16> %3 to <2 x i64>
+  ret <2 x i64> %4
+}
+
+declare <8 x float> @llvm.x86.avx512bf16.dpbf16ps.256(<8 x float>, <8 x i32>, <8 x i32>) #3
+
+define <8 x float> @test_mm256_dpbf16ps_256(<8 x float> %E, <8 x i32> %A, <8 x i32> %B) local_unnamed_addr #2 {
+; CHECK-LABEL: test_mm256_dpbf16ps_256:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vdpbf16ps %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf2,0x76,0x28,0x52,0xc2]
+; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
+entry:
+  %0 = tail call <8 x float> @llvm.x86.avx512bf16.dpbf16ps.256(<8 x float> %E, <8 x i32> %A, <8 x i32> %B) #4
+  ret <8 x float> %0
+}
+
+define <8 x float> @test_mm256_maskz_dpbf16ps_256(<8 x float> %E, <8 x i32> %A, <8 x i32> %B, i8 zeroext %U) local_unnamed_addr #2 {
+; X86-LABEL: test_mm256_maskz_dpbf16ps_256:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
+; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
+; X86-NEXT:    vdpbf16ps %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x76,0xa9,0x52,0xc2]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_mm256_maskz_dpbf16ps_256:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vdpbf16ps %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x76,0xa9,0x52,0xc2]
+; X64-NEXT:    retq # encoding: [0xc3]
+entry:
+  %0 = tail call <8 x float> @llvm.x86.avx512bf16.dpbf16ps.256(<8 x float> %E, <8 x i32> %A, <8 x i32> %B) #4
+  %1 = bitcast i8 %U to <8 x i1>
+  %2 = select <8 x i1> %1, <8 x float> %0, <8 x float> zeroinitializer
+  ret <8 x float> %2
+}
+define <8 x float> @test_mm256_mask_dpbf16ps_256(i8 zeroext %U, <8 x float> %E, <8 x i32> %A, <8 x i32> %B) local_unnamed_addr #2 {
+; X86-LABEL: test_mm256_mask_dpbf16ps_256:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
+; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
+; X86-NEXT:    vdpbf16ps %ymm2, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf2,0x76,0x29,0x52,0xc2]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_mm256_mask_dpbf16ps_256:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vdpbf16ps %ymm2, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf2,0x76,0x29,0x52,0xc2]
+; X64-NEXT:    retq # encoding: [0xc3]
+entry:
+  %0 = tail call <8 x float> @llvm.x86.avx512bf16.dpbf16ps.256(<8 x float> %E, <8 x i32> %A, <8 x i32> %B) #4
+  %1 = bitcast i8 %U to <8 x i1>
+  %2 = select <8 x i1> %1, <8 x float> %0, <8 x float> %E
+  ret <8 x float> %2
+}
+
+declare <4 x float> @llvm.x86.avx512bf16.dpbf16ps.128(<4 x float>, <4 x i32>, <4 x i32>) #3
+
+define <4 x float> @test_mm128_dpbf16ps_128(<4 x float> %E, <4 x i32> %A, <4 x i32> %B) local_unnamed_addr #2 {
+; CHECK-LABEL: test_mm128_dpbf16ps_128:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vdpbf16ps %xmm2, %xmm1, %xmm0 # encoding: [0x62,0xf2,0x76,0x08,0x52,0xc2]
+; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
+entry:
+  %0 = tail call <4 x float> @llvm.x86.avx512bf16.dpbf16ps.128(<4 x float> %E, <4 x i32> %A, <4x i32> %B) #4
+  ret <4 x float> %0
+}
+
+define <4 x float> @test_mm128_maskz_dpbf16ps_128(<4 x float> %E, <4 x i32> %A, <4 x i32> %B, i4 zeroext %U) local_unnamed_addr #2 {
+; X86-LABEL: test_mm128_maskz_dpbf16ps_128:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al # encoding: [0x8a,0x44,0x24,0x04]
+; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
+; X86-NEXT:    vdpbf16ps %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x76,0x89,0x52,0xc2]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_mm128_maskz_dpbf16ps_128:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vdpbf16ps %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x76,0x89,0x52,0xc2]
+; X64-NEXT:    retq # encoding: [0xc3]
+entry:
+  %0 = tail call <4 x float> @llvm.x86.avx512bf16.dpbf16ps.128(<4 x float> %E, <4 x i32> %A, <4 x i32> %B) #4
+  %1 = bitcast i4 %U to <4 x i1>
+  %2 = select <4 x i1> %1, <4 x float> %0, <4 x float> zeroinitializer
+  ret <4 x float> %2
+}
+define <4 x float> @test_mm128_mask_dpbf16ps_128(i4 zeroext %U, <4 x float> %E, <4 x i32> %A, <4 x i32> %B) local_unnamed_addr #2 {
+; X86-LABEL: test_mm128_mask_dpbf16ps_128:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al # encoding: [0x8a,0x44,0x24,0x04]
+; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
+; X86-NEXT:    vdpbf16ps %xmm2, %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0x76,0x09,0x52,0xc2]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_mm128_mask_dpbf16ps_128:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vdpbf16ps %xmm2, %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0x76,0x09,0x52,0xc2]
+; X64-NEXT:    retq # encoding: [0xc3]
+entry:
+  %0 = tail call <4 x float> @llvm.x86.avx512bf16.dpbf16ps.128(<4 x float> %E, <4 x i32> %A, <4 x i32> %B) #4
+  %1 = bitcast i4 %U to <4 x i1>
+  %2 = select <4 x i1> %1, <4 x float> %0, <4 x float> %E
+  ret <4 x float> %2
+}
diff --git a/test/MC/Disassembler/X86/avx512bf16-att.txt b/test/MC/Disassembler/X86/avx512bf16-att.txt
new file mode 100644 (file)
index 0000000..a6dc4b6
--- /dev/null
@@ -0,0 +1,82 @@
+# RUN: llvm-mc --disassemble %s -triple=i686-apple-darwin9 | FileCheck %s
+
+# CHECK: vcvtne2ps2bf16 %zmm4, %zmm3, %zmm2
+0x62,0xf2,0x67,0x48,0x72,0xd4
+
+# CHECK: vcvtne2ps2bf16 %zmm4, %zmm3, %zmm2 {%k7}
+0x62,0xf2,0x67,0x4f,0x72,0xd4
+
+# CHECK: vcvtne2ps2bf16 %zmm4, %zmm3, %zmm2 {%k7} {z}
+0x62,0xf2,0x67,0xcf,0x72,0xd4
+
+# CHECK: vcvtne2ps2bf16  268435456(%esp,%esi,8), %zmm3, %zmm2
+0x62,0xf2,0x67,0x48,0x72,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# CHECK: vcvtne2ps2bf16  291(%edi,%eax,4), %zmm3, %zmm2 {%k7}
+0x62,0xf2,0x67,0x4f,0x72,0x94,0x87,0x23,0x01,0x00,0x00
+
+# CHECK: vcvtne2ps2bf16  (%eax){1to16}, %zmm3, %zmm2
+0x62,0xf2,0x67,0x58,0x72,0x10
+
+# CHECK: vcvtne2ps2bf16  -2048(,%ebp,2), %zmm3, %zmm2
+0x62,0xf2,0x67,0x48,0x72,0x14,0x6d,0x00,0xf8,0xff,0xff
+
+# CHECK: vcvtne2ps2bf16  8128(%ecx), %zmm3, %zmm2 {%k7} {z}
+0x62,0xf2,0x67,0xcf,0x72,0x51,0x7f
+
+# CHECK: vcvtne2ps2bf16  -512(%edx){1to16}, %zmm3, %zmm2 {%k7} {z}
+0x62,0xf2,0x67,0xdf,0x72,0x52,0x80
+
+# CHECK: vcvtneps2bf16 %zmm3, %ymm2
+0x62,0xf2,0x7e,0x48,0x72,0xd3
+
+# CHECK: vcvtneps2bf16 %zmm3, %ymm2 {%k7}
+0x62,0xf2,0x7e,0x4f,0x72,0xd3
+
+# CHECK: vcvtneps2bf16 %zmm3, %ymm2 {%k7} {z}
+0x62,0xf2,0x7e,0xcf,0x72,0xd3
+
+# CHECK: vcvtneps2bf16  268435456(%esp,%esi,8), %ymm2
+0x62,0xf2,0x7e,0x48,0x72,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# CHECK: vcvtneps2bf16  291(%edi,%eax,4), %ymm2 {%k7}
+0x62,0xf2,0x7e,0x4f,0x72,0x94,0x87,0x23,0x01,0x00,0x00
+
+# CHECK: vcvtneps2bf16  (%eax){1to16}, %ymm2
+0x62,0xf2,0x7e,0x58,0x72,0x10
+
+# CHECK: vcvtneps2bf16  -2048(,%ebp,2), %ymm2
+0x62,0xf2,0x7e,0x48,0x72,0x14,0x6d,0x00,0xf8,0xff,0xff
+
+# CHECK: vcvtneps2bf16  8128(%ecx), %ymm2 {%k7} {z}
+0x62,0xf2,0x7e,0xcf,0x72,0x51,0x7f
+
+# CHECK: vcvtneps2bf16  -512(%edx){1to16}, %ymm2 {%k7} {z}
+0x62,0xf2,0x7e,0xdf,0x72,0x52,0x80
+
+# CHECK: vdpbf16ps %zmm4, %zmm3, %zmm2
+0x62,0xf2,0x66,0x48,0x52,0xd4
+
+# CHECK: vdpbf16ps %zmm4, %zmm3, %zmm2 {%k7}
+0x62,0xf2,0x66,0x4f,0x52,0xd4
+
+# CHECK: vdpbf16ps %zmm4, %zmm3, %zmm2 {%k7} {z}
+0x62,0xf2,0x66,0xcf,0x52,0xd4
+
+# CHECK: vdpbf16ps  268435456(%esp,%esi,8), %zmm3, %zmm2
+0x62,0xf2,0x66,0x48,0x52,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# CHECK: vdpbf16ps  291(%edi,%eax,4), %zmm3, %zmm2 {%k7}
+0x62,0xf2,0x66,0x4f,0x52,0x94,0x87,0x23,0x01,0x00,0x00
+
+# CHECK: vdpbf16ps  (%eax){1to16}, %zmm3, %zmm2
+0x62,0xf2,0x66,0x58,0x52,0x10
+
+# CHECK: vdpbf16ps  -2048(,%ebp,2), %zmm3, %zmm2
+0x62,0xf2,0x66,0x48,0x52,0x14,0x6d,0x00,0xf8,0xff,0xff
+
+# CHECK: vdpbf16ps  8128(%ecx), %zmm3, %zmm2 {%k7} {z}
+0x62,0xf2,0x66,0xcf,0x52,0x51,0x7f
+
+# CHECK: vdpbf16ps  -512(%edx){1to16}, %zmm3, %zmm2 {%k7} {z}
+0x62,0xf2,0x66,0xdf,0x52,0x52,0x80
diff --git a/test/MC/Disassembler/X86/avx512bf16-intel.txt b/test/MC/Disassembler/X86/avx512bf16-intel.txt
new file mode 100644 (file)
index 0000000..53d44fa
--- /dev/null
@@ -0,0 +1,82 @@
+# RUN: llvm-mc --disassemble %s -triple=i686 -x86-asm-syntax=intel --output-asm-variant=1 | FileCheck %s
+
+# CHECK: vcvtne2ps2bf16 zmm2, zmm3, zmm4
+0x62,0xf2,0x67,0x48,0x72,0xd4
+
+# CHECK: vcvtne2ps2bf16 zmm2 {k7}, zmm3, zmm4
+0x62,0xf2,0x67,0x4f,0x72,0xd4
+
+# CHECK: vcvtne2ps2bf16 zmm2 {k7} {z}, zmm3, zmm4
+0x62,0xf2,0x67,0xcf,0x72,0xd4
+
+# CHECK: vcvtne2ps2bf16 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456]
+0x62,0xf2,0x67,0x48,0x72,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# CHECK: vcvtne2ps2bf16 zmm2 {k7}, zmm3, zmmword ptr [edi + 4*eax + 291]
+0x62,0xf2,0x67,0x4f,0x72,0x94,0x87,0x23,0x01,0x00,0x00
+
+# CHECK: vcvtne2ps2bf16 zmm2, zmm3, dword ptr [eax]{1to16}
+0x62,0xf2,0x67,0x58,0x72,0x10
+
+# CHECK: vcvtne2ps2bf16 zmm2, zmm3, zmmword ptr [2*ebp - 2048]
+0x62,0xf2,0x67,0x48,0x72,0x14,0x6d,0x00,0xf8,0xff,0xff
+
+# CHECK: vcvtne2ps2bf16 zmm2 {k7} {z}, zmm3, zmmword ptr [ecx + 8128]
+0x62,0xf2,0x67,0xcf,0x72,0x51,0x7f
+
+# CHECK: vcvtne2ps2bf16 zmm2 {k7} {z}, zmm3, dword ptr [edx - 512]{1to16}
+0x62,0xf2,0x67,0xdf,0x72,0x52,0x80
+
+# CHECK: vcvtneps2bf16 ymm2, zmm3
+0x62,0xf2,0x7e,0x48,0x72,0xd3
+
+# CHECK: vcvtneps2bf16 ymm2 {k7}, zmm3
+0x62,0xf2,0x7e,0x4f,0x72,0xd3
+
+# CHECK: vcvtneps2bf16 ymm2 {k7} {z}, zmm3
+0x62,0xf2,0x7e,0xcf,0x72,0xd3
+
+# CHECK: vcvtneps2bf16 ymm2, zmmword ptr [esp + 8*esi + 268435456]
+0x62,0xf2,0x7e,0x48,0x72,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# CHECK: vcvtneps2bf16 ymm2 {k7}, zmmword ptr [edi + 4*eax + 291]
+0x62,0xf2,0x7e,0x4f,0x72,0x94,0x87,0x23,0x01,0x00,0x00
+
+# CHECK: vcvtneps2bf16 ymm2, dword ptr [eax]{1to16}
+0x62,0xf2,0x7e,0x58,0x72,0x10
+
+# CHECK: vcvtneps2bf16 ymm2, zmmword ptr [2*ebp - 2048]
+0x62,0xf2,0x7e,0x48,0x72,0x14,0x6d,0x00,0xf8,0xff,0xff
+
+# CHECK: vcvtneps2bf16 ymm2 {k7} {z}, zmmword ptr [ecx + 8128]
+0x62,0xf2,0x7e,0xcf,0x72,0x51,0x7f
+
+# CHECK: vcvtneps2bf16 ymm2 {k7} {z}, dword ptr [edx - 512]{1to16}
+0x62,0xf2,0x7e,0xdf,0x72,0x52,0x80
+
+# CHECK: vdpbf16ps zmm2, zmm3, zmm4
+0x62,0xf2,0x66,0x48,0x52,0xd4
+
+# CHECK: vdpbf16ps zmm2 {k7}, zmm3, zmm4
+0x62,0xf2,0x66,0x4f,0x52,0xd4
+
+# CHECK: vdpbf16ps zmm2 {k7} {z}, zmm3, zmm4
+0x62,0xf2,0x66,0xcf,0x52,0xd4
+
+# CHECK: vdpbf16ps zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456]
+0x62,0xf2,0x66,0x48,0x52,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# CHECK: vdpbf16ps zmm2 {k7}, zmm3, zmmword ptr [edi + 4*eax + 291]
+0x62,0xf2,0x66,0x4f,0x52,0x94,0x87,0x23,0x01,0x00,0x00
+
+# CHECK: vdpbf16ps zmm2, zmm3, dword ptr [eax]{1to16}
+0x62,0xf2,0x66,0x58,0x52,0x10
+
+# CHECK: vdpbf16ps zmm2, zmm3, zmmword ptr [2*ebp - 2048]
+0x62,0xf2,0x66,0x48,0x52,0x14,0x6d,0x00,0xf8,0xff,0xff
+
+# CHECK: vdpbf16ps zmm2 {k7} {z}, zmm3, zmmword ptr [ecx + 8128]
+0x62,0xf2,0x66,0xcf,0x52,0x51,0x7f
+
+# CHECK: vdpbf16ps zmm2 {k7} {z}, zmm3, dword ptr [edx - 512]{1to16}
+0x62,0xf2,0x66,0xdf,0x52,0x52,0x80
diff --git a/test/MC/Disassembler/X86/avx512bf16vl-att.txt b/test/MC/Disassembler/X86/avx512bf16vl-att.txt
new file mode 100644 (file)
index 0000000..ea5e84f
--- /dev/null
@@ -0,0 +1,157 @@
+# RUN: llvm-mc --disassemble %s -triple=i686-apple-darwin9 | FileCheck %s
+
+# CHECK: vcvtne2ps2bf16 %ymm4, %ymm3, %ymm2
+0x62,0xf2,0x67,0x28,0x72,0xd4
+
+# CHECK: vcvtne2ps2bf16 %ymm4, %ymm3, %ymm2 {%k7}
+0x62,0xf2,0x67,0x2f,0x72,0xd4
+
+# CHECK: vcvtne2ps2bf16 %ymm4, %ymm3, %ymm2 {%k7} {z}
+0x62,0xf2,0x67,0xaf,0x72,0xd4
+
+# CHECK: vcvtne2ps2bf16 %xmm4, %xmm3, %xmm2
+0x62,0xf2,0x67,0x08,0x72,0xd4
+
+# CHECK: vcvtne2ps2bf16 %xmm4, %xmm3, %xmm2 {%k7}
+0x62,0xf2,0x67,0x0f,0x72,0xd4
+
+# CHECK: vcvtne2ps2bf16 %xmm4, %xmm3, %xmm2 {%k7} {z}
+0x62,0xf2,0x67,0x8f,0x72,0xd4
+
+# CHECK: vcvtne2ps2bf16  268435456(%esp,%esi,8), %ymm3, %ymm2
+0x62,0xf2,0x67,0x28,0x72,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# CHECK: vcvtne2ps2bf16  291(%edi,%eax,4), %ymm3, %ymm2 {%k7}
+0x62,0xf2,0x67,0x2f,0x72,0x94,0x87,0x23,0x01,0x00,0x00
+
+# CHECK: vcvtne2ps2bf16  (%eax){1to8}, %ymm3, %ymm2
+0x62,0xf2,0x67,0x38,0x72,0x10
+
+# CHECK: vcvtne2ps2bf16  -1024(,%ebp,2), %ymm3, %ymm2
+0x62,0xf2,0x67,0x28,0x72,0x14,0x6d,0x00,0xfc,0xff,0xff
+
+# CHECK: vcvtne2ps2bf16  4064(%ecx), %ymm3, %ymm2 {%k7} {z}
+0x62,0xf2,0x67,0xaf,0x72,0x51,0x7f
+
+# CHECK: vcvtne2ps2bf16  -512(%edx){1to8}, %ymm3, %ymm2 {%k7} {z}
+0x62,0xf2,0x67,0xbf,0x72,0x52,0x80
+
+# CHECK: vcvtne2ps2bf16  268435456(%esp,%esi,8), %xmm3, %xmm2
+0x62,0xf2,0x67,0x08,0x72,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# CHECK: vcvtne2ps2bf16  291(%edi,%eax,4), %xmm3, %xmm2 {%k7}
+0x62,0xf2,0x67,0x0f,0x72,0x94,0x87,0x23,0x01,0x00,0x00
+
+# CHECK: vcvtne2ps2bf16  (%eax){1to4}, %xmm3, %xmm2
+0x62,0xf2,0x67,0x18,0x72,0x10
+
+# CHECK: vcvtne2ps2bf16  -512(,%ebp,2), %xmm3, %xmm2
+0x62,0xf2,0x67,0x08,0x72,0x14,0x6d,0x00,0xfe,0xff,0xff
+
+# CHECK: vcvtne2ps2bf16  2032(%ecx), %xmm3, %xmm2 {%k7} {z}
+0x62,0xf2,0x67,0x8f,0x72,0x51,0x7f
+
+# CHECK: vcvtne2ps2bf16  -512(%edx){1to4}, %xmm3, %xmm2 {%k7} {z}
+0x62,0xf2,0x67,0x9f,0x72,0x52,0x80
+
+# CHECK: vcvtneps2bf16 %xmm3, %xmm2
+0x62,0xf2,0x7e,0x08,0x72,0xd3
+
+# CHECK: vcvtneps2bf16 %xmm3, %xmm2 {%k7}
+0x62,0xf2,0x7e,0x0f,0x72,0xd3
+
+# CHECK: vcvtneps2bf16 %xmm3, %xmm2 {%k7} {z}
+0x62,0xf2,0x7e,0x8f,0x72,0xd3
+
+# CHECK: vcvtneps2bf16 %ymm3, %xmm2
+0x62,0xf2,0x7e,0x28,0x72,0xd3
+
+# CHECK: vcvtneps2bf16 %ymm3, %xmm2 {%k7}
+0x62,0xf2,0x7e,0x2f,0x72,0xd3
+
+# CHECK: vcvtneps2bf16 %ymm3, %xmm2 {%k7} {z}
+0x62,0xf2,0x7e,0xaf,0x72,0xd3
+
+# CHECK: vcvtneps2bf16x  268435456(%esp,%esi,8), %xmm2
+0x62,0xf2,0x7e,0x08,0x72,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# CHECK: vcvtneps2bf16x  291(%edi,%eax,4), %xmm2 {%k7}
+0x62,0xf2,0x7e,0x0f,0x72,0x94,0x87,0x23,0x01,0x00,0x00
+
+# CHECK: vcvtneps2bf16  (%eax){1to4}, %xmm2
+0x62,0xf2,0x7e,0x18,0x72,0x10
+
+# CHECK: vcvtneps2bf16x  -512(,%ebp,2), %xmm2
+0x62,0xf2,0x7e,0x08,0x72,0x14,0x6d,0x00,0xfe,0xff,0xff
+
+# CHECK: vcvtneps2bf16x  2032(%ecx), %xmm2 {%k7} {z}
+0x62,0xf2,0x7e,0x8f,0x72,0x51,0x7f
+
+# CHECK: vcvtneps2bf16  -512(%edx){1to4}, %xmm2 {%k7} {z}
+0x62,0xf2,0x7e,0x9f,0x72,0x52,0x80
+
+# CHECK: vcvtneps2bf16  (%eax){1to8}, %xmm2
+0x62,0xf2,0x7e,0x38,0x72,0x10
+
+# CHECK: vcvtneps2bf16y  -1024(,%ebp,2), %xmm2
+0x62,0xf2,0x7e,0x28,0x72,0x14,0x6d,0x00,0xfc,0xff,0xff
+
+# CHECK: vcvtneps2bf16y  4064(%ecx), %xmm2 {%k7} {z}
+0x62,0xf2,0x7e,0xaf,0x72,0x51,0x7f
+
+# CHECK: vcvtneps2bf16  -512(%edx){1to8}, %xmm2 {%k7} {z}
+0x62,0xf2,0x7e,0xbf,0x72,0x52,0x80
+
+# CHECK: vdpbf16ps %ymm4, %ymm3, %ymm2
+0x62,0xf2,0x66,0x28,0x52,0xd4
+
+# CHECK: vdpbf16ps %ymm4, %ymm3, %ymm2 {%k7}
+0x62,0xf2,0x66,0x2f,0x52,0xd4
+
+# CHECK: vdpbf16ps %ymm4, %ymm3, %ymm2 {%k7} {z}
+0x62,0xf2,0x66,0xaf,0x52,0xd4
+
+# CHECK: vdpbf16ps %xmm4, %xmm3, %xmm2
+0x62,0xf2,0x66,0x08,0x52,0xd4
+
+# CHECK: vdpbf16ps %xmm4, %xmm3, %xmm2 {%k7}
+0x62,0xf2,0x66,0x0f,0x52,0xd4
+
+# CHECK: vdpbf16ps %xmm4, %xmm3, %xmm2 {%k7} {z}
+0x62,0xf2,0x66,0x8f,0x52,0xd4
+
+# CHECK: vdpbf16ps  268435456(%esp,%esi,8), %ymm3, %ymm2
+0x62,0xf2,0x66,0x28,0x52,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# CHECK: vdpbf16ps  291(%edi,%eax,4), %ymm3, %ymm2 {%k7}
+0x62,0xf2,0x66,0x2f,0x52,0x94,0x87,0x23,0x01,0x00,0x00
+
+# CHECK: vdpbf16ps  (%eax){1to8}, %ymm3, %ymm2
+0x62,0xf2,0x66,0x38,0x52,0x10
+
+# CHECK: vdpbf16ps  -1024(,%ebp,2), %ymm3, %ymm2
+0x62,0xf2,0x66,0x28,0x52,0x14,0x6d,0x00,0xfc,0xff,0xff
+
+# CHECK: vdpbf16ps  4064(%ecx), %ymm3, %ymm2 {%k7} {z}
+0x62,0xf2,0x66,0xaf,0x52,0x51,0x7f
+
+# CHECK: vdpbf16ps  -512(%edx){1to8}, %ymm3, %ymm2 {%k7} {z}
+0x62,0xf2,0x66,0xbf,0x52,0x52,0x80
+
+# CHECK: vdpbf16ps  268435456(%esp,%esi,8), %xmm3, %xmm2
+0x62,0xf2,0x66,0x08,0x52,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# CHECK: vdpbf16ps  291(%edi,%eax,4), %xmm3, %xmm2 {%k7}
+0x62,0xf2,0x66,0x0f,0x52,0x94,0x87,0x23,0x01,0x00,0x00
+
+# CHECK: vdpbf16ps  (%eax){1to4}, %xmm3, %xmm2
+0x62,0xf2,0x66,0x18,0x52,0x10
+
+# CHECK: vdpbf16ps  -512(,%ebp,2), %xmm3, %xmm2
+0x62,0xf2,0x66,0x08,0x52,0x14,0x6d,0x00,0xfe,0xff,0xff
+
+# CHECK: vdpbf16ps  2032(%ecx), %xmm3, %xmm2 {%k7} {z}
+0x62,0xf2,0x66,0x8f,0x52,0x51,0x7f
+
+# CHECK: vdpbf16ps  -512(%edx){1to4}, %xmm3, %xmm2 {%k7} {z}
+0x62,0xf2,0x66,0x9f,0x52,0x52,0x80
diff --git a/test/MC/Disassembler/X86/avx512bf16vl-intel.txt b/test/MC/Disassembler/X86/avx512bf16vl-intel.txt
new file mode 100644 (file)
index 0000000..3d9dcb1
--- /dev/null
@@ -0,0 +1,157 @@
+# RUN: llvm-mc --disassemble %s -triple=i686 -x86-asm-syntax=intel --output-asm-variant=1 | FileCheck %s
+
+# CHECK: vcvtne2ps2bf16 ymm2, ymm3, ymm4
+0x62,0xf2,0x67,0x28,0x72,0xd4
+
+# CHECK: vcvtne2ps2bf16 ymm2 {k7}, ymm3, ymm4
+0x62,0xf2,0x67,0x2f,0x72,0xd4
+
+# CHECK: vcvtne2ps2bf16 ymm2 {k7} {z}, ymm3, ymm4
+0x62,0xf2,0x67,0xaf,0x72,0xd4
+
+# CHECK: vcvtne2ps2bf16 xmm2, xmm3, xmm4
+0x62,0xf2,0x67,0x08,0x72,0xd4
+
+# CHECK: vcvtne2ps2bf16 xmm2 {k7}, xmm3, xmm4
+0x62,0xf2,0x67,0x0f,0x72,0xd4
+
+# CHECK: vcvtne2ps2bf16 xmm2 {k7} {z}, xmm3, xmm4
+0x62,0xf2,0x67,0x8f,0x72,0xd4
+
+# CHECK: vcvtne2ps2bf16 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456]
+0x62,0xf2,0x67,0x28,0x72,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# CHECK: vcvtne2ps2bf16 ymm2 {k7}, ymm3, ymmword ptr [edi + 4*eax + 291]
+0x62,0xf2,0x67,0x2f,0x72,0x94,0x87,0x23,0x01,0x00,0x00
+
+# CHECK: vcvtne2ps2bf16 ymm2, ymm3, dword ptr [eax]{1to8}
+0x62,0xf2,0x67,0x38,0x72,0x10
+
+# CHECK: vcvtne2ps2bf16 ymm2, ymm3, ymmword ptr [2*ebp - 1024]
+0x62,0xf2,0x67,0x28,0x72,0x14,0x6d,0x00,0xfc,0xff,0xff
+
+# CHECK: vcvtne2ps2bf16 ymm2 {k7} {z}, ymm3, ymmword ptr [ecx + 4064]
+0x62,0xf2,0x67,0xaf,0x72,0x51,0x7f
+
+# CHECK: vcvtne2ps2bf16 ymm2 {k7} {z}, ymm3, dword ptr [edx - 512]{1to8}
+0x62,0xf2,0x67,0xbf,0x72,0x52,0x80
+
+# CHECK: vcvtne2ps2bf16 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456]
+0x62,0xf2,0x67,0x08,0x72,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# CHECK: vcvtne2ps2bf16 xmm2 {k7}, xmm3, xmmword ptr [edi + 4*eax + 291]
+0x62,0xf2,0x67,0x0f,0x72,0x94,0x87,0x23,0x01,0x00,0x00
+
+# CHECK: vcvtne2ps2bf16 xmm2, xmm3, dword ptr [eax]{1to4}
+0x62,0xf2,0x67,0x18,0x72,0x10
+
+# CHECK: vcvtne2ps2bf16 xmm2, xmm3, xmmword ptr [2*ebp - 512]
+0x62,0xf2,0x67,0x08,0x72,0x14,0x6d,0x00,0xfe,0xff,0xff
+
+# CHECK: vcvtne2ps2bf16 xmm2 {k7} {z}, xmm3, xmmword ptr [ecx + 2032]
+0x62,0xf2,0x67,0x8f,0x72,0x51,0x7f
+
+# CHECK: vcvtne2ps2bf16 xmm2 {k7} {z}, xmm3, dword ptr [edx - 512]{1to4}
+0x62,0xf2,0x67,0x9f,0x72,0x52,0x80
+
+# CHECK: vcvtneps2bf16 xmm2, xmm3
+0x62,0xf2,0x7e,0x08,0x72,0xd3
+
+# CHECK: vcvtneps2bf16 xmm2 {k7}, xmm3
+0x62,0xf2,0x7e,0x0f,0x72,0xd3
+
+# CHECK: vcvtneps2bf16 xmm2 {k7} {z}, xmm3
+0x62,0xf2,0x7e,0x8f,0x72,0xd3
+
+# CHECK: vcvtneps2bf16 xmm2, ymm3
+0x62,0xf2,0x7e,0x28,0x72,0xd3
+
+# CHECK: vcvtneps2bf16 xmm2 {k7}, ymm3
+0x62,0xf2,0x7e,0x2f,0x72,0xd3
+
+# CHECK: vcvtneps2bf16 xmm2 {k7} {z}, ymm3
+0x62,0xf2,0x7e,0xaf,0x72,0xd3
+
+# CHECK: vcvtneps2bf16 xmm2, xmmword ptr [esp + 8*esi + 268435456]
+0x62,0xf2,0x7e,0x08,0x72,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# CHECK: vcvtneps2bf16 xmm2 {k7}, xmmword ptr [edi + 4*eax + 291]
+0x62,0xf2,0x7e,0x0f,0x72,0x94,0x87,0x23,0x01,0x00,0x00
+
+# CHECK: vcvtneps2bf16 xmm2, dword ptr [eax]{1to4}
+0x62,0xf2,0x7e,0x18,0x72,0x10
+
+# CHECK: vcvtneps2bf16 xmm2, xmmword ptr [2*ebp - 512]
+0x62,0xf2,0x7e,0x08,0x72,0x14,0x6d,0x00,0xfe,0xff,0xff
+
+# CHECK: vcvtneps2bf16 xmm2 {k7} {z}, xmmword ptr [ecx + 2032]
+0x62,0xf2,0x7e,0x8f,0x72,0x51,0x7f
+
+# CHECK: vcvtneps2bf16 xmm2 {k7} {z}, dword ptr [edx - 512]{1to4}
+0x62,0xf2,0x7e,0x9f,0x72,0x52,0x80
+
+# CHECK: vcvtneps2bf16 xmm2, dword ptr [eax]{1to8}
+0x62,0xf2,0x7e,0x38,0x72,0x10
+
+# CHECK: vcvtneps2bf16 xmm2, ymmword ptr [2*ebp - 1024]
+0x62,0xf2,0x7e,0x28,0x72,0x14,0x6d,0x00,0xfc,0xff,0xff
+
+# CHECK: vcvtneps2bf16 xmm2 {k7} {z}, ymmword ptr [ecx + 4064]
+0x62,0xf2,0x7e,0xaf,0x72,0x51,0x7f
+
+# CHECK: vcvtneps2bf16 xmm2 {k7} {z}, dword ptr [edx - 512]{1to8}
+0x62,0xf2,0x7e,0xbf,0x72,0x52,0x80
+
+# CHECK: vdpbf16ps ymm2, ymm3, ymm4
+0x62,0xf2,0x66,0x28,0x52,0xd4
+
+# CHECK: vdpbf16ps ymm2 {k7}, ymm3, ymm4
+0x62,0xf2,0x66,0x2f,0x52,0xd4
+
+# CHECK: vdpbf16ps ymm2 {k7} {z}, ymm3, ymm4
+0x62,0xf2,0x66,0xaf,0x52,0xd4
+
+# CHECK: vdpbf16ps xmm2, xmm3, xmm4
+0x62,0xf2,0x66,0x08,0x52,0xd4
+
+# CHECK: vdpbf16ps xmm2 {k7}, xmm3, xmm4
+0x62,0xf2,0x66,0x0f,0x52,0xd4
+
+# CHECK: vdpbf16ps xmm2 {k7} {z}, xmm3, xmm4
+0x62,0xf2,0x66,0x8f,0x52,0xd4
+
+# CHECK: vdpbf16ps ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456]
+0x62,0xf2,0x66,0x28,0x52,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# CHECK: vdpbf16ps ymm2 {k7}, ymm3, ymmword ptr [edi + 4*eax + 291]
+0x62,0xf2,0x66,0x2f,0x52,0x94,0x87,0x23,0x01,0x00,0x00
+
+# CHECK: vdpbf16ps ymm2, ymm3, dword ptr [eax]{1to8}
+0x62,0xf2,0x66,0x38,0x52,0x10
+
+# CHECK: vdpbf16ps ymm2, ymm3, ymmword ptr [2*ebp - 1024]
+0x62,0xf2,0x66,0x28,0x52,0x14,0x6d,0x00,0xfc,0xff,0xff
+
+# CHECK: vdpbf16ps ymm2 {k7} {z}, ymm3, ymmword ptr [ecx + 4064]
+0x62,0xf2,0x66,0xaf,0x52,0x51,0x7f
+
+# CHECK: vdpbf16ps ymm2 {k7} {z}, ymm3, dword ptr [edx - 512]{1to8}
+0x62,0xf2,0x66,0xbf,0x52,0x52,0x80
+
+# CHECK: vdpbf16ps xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456]
+0x62,0xf2,0x66,0x08,0x52,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# CHECK: vdpbf16ps xmm2 {k7}, xmm3, xmmword ptr [edi + 4*eax + 291]
+0x62,0xf2,0x66,0x0f,0x52,0x94,0x87,0x23,0x01,0x00,0x00
+
+# CHECK: vdpbf16ps xmm2, xmm3, dword ptr [eax]{1to4}
+0x62,0xf2,0x66,0x18,0x52,0x10
+
+# CHECK: vdpbf16ps xmm2, xmm3, xmmword ptr [2*ebp - 512]
+0x62,0xf2,0x66,0x08,0x52,0x14,0x6d,0x00,0xfe,0xff,0xff
+
+# CHECK: vdpbf16ps xmm2 {k7} {z}, xmm3, xmmword ptr [ecx + 2032]
+0x62,0xf2,0x66,0x8f,0x52,0x51,0x7f
+
+# CHECK: vdpbf16ps xmm2 {k7} {z}, xmm3, dword ptr [edx - 512]{1to4}
+0x62,0xf2,0x66,0x9f,0x52,0x52,0x80
diff --git a/test/MC/Disassembler/X86/x86-64-avx512bf16-att.txt b/test/MC/Disassembler/X86/x86-64-avx512bf16-att.txt
new file mode 100644 (file)
index 0000000..2b633a9
--- /dev/null
@@ -0,0 +1,82 @@
+# RUN: llvm-mc --disassemble %s -triple=x86_64-apple-darwin9 | FileCheck %s
+
+# CHECK: vcvtne2ps2bf16 %zmm24, %zmm23, %zmm22
+0x62,0x82,0x47,0x40,0x72,0xf0
+
+# CHECK: vcvtne2ps2bf16 %zmm24, %zmm23, %zmm22 {%k7}
+0x62,0x82,0x47,0x47,0x72,0xf0
+
+# CHECK: vcvtne2ps2bf16 %zmm24, %zmm23, %zmm22 {%k7} {z}
+0x62,0x82,0x47,0xc7,0x72,0xf0
+
+# CHECK: vcvtne2ps2bf16  268435456(%rbp,%r14,8), %zmm23, %zmm22
+0x62,0xa2,0x47,0x40,0x72,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# CHECK: vcvtne2ps2bf16  291(%r8,%rax,4), %zmm23, %zmm22 {%k7}
+0x62,0xc2,0x47,0x47,0x72,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# CHECK: vcvtne2ps2bf16  (%rip){1to16}, %zmm23, %zmm22
+0x62,0xe2,0x47,0x50,0x72,0x35,0x00,0x00,0x00,0x00
+
+# CHECK: vcvtne2ps2bf16  -2048(,%rbp,2), %zmm23, %zmm22
+0x62,0xe2,0x47,0x40,0x72,0x34,0x6d,0x00,0xf8,0xff,0xff
+
+# CHECK: vcvtne2ps2bf16  8128(%rcx), %zmm23, %zmm22 {%k7} {z}
+0x62,0xe2,0x47,0xc7,0x72,0x71,0x7f
+
+# CHECK: vcvtne2ps2bf16  -512(%rdx){1to16}, %zmm23, %zmm22 {%k7} {z}
+0x62,0xe2,0x47,0xd7,0x72,0x72,0x80
+
+# CHECK: vcvtneps2bf16 %zmm23, %ymm22
+0x62,0xa2,0x7e,0x48,0x72,0xf7
+
+# CHECK: vcvtneps2bf16 %zmm23, %ymm22 {%k7}
+0x62,0xa2,0x7e,0x4f,0x72,0xf7
+
+# CHECK: vcvtneps2bf16 %zmm23, %ymm22 {%k7} {z}
+0x62,0xa2,0x7e,0xcf,0x72,0xf7
+
+# CHECK: vcvtneps2bf16  268435456(%rbp,%r14,8), %ymm22
+0x62,0xa2,0x7e,0x48,0x72,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# CHECK: vcvtneps2bf16  291(%r8,%rax,4), %ymm22 {%k7}
+0x62,0xc2,0x7e,0x4f,0x72,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# CHECK: vcvtneps2bf16  (%rip){1to16}, %ymm22
+0x62,0xe2,0x7e,0x58,0x72,0x35,0x00,0x00,0x00,0x00
+
+# CHECK: vcvtneps2bf16  -2048(,%rbp,2), %ymm22
+0x62,0xe2,0x7e,0x48,0x72,0x34,0x6d,0x00,0xf8,0xff,0xff
+
+# CHECK: vcvtneps2bf16  8128(%rcx), %ymm22 {%k7} {z}
+0x62,0xe2,0x7e,0xcf,0x72,0x71,0x7f
+
+# CHECK: vcvtneps2bf16  -512(%rdx){1to16}, %ymm22 {%k7} {z}
+0x62,0xe2,0x7e,0xdf,0x72,0x72,0x80
+
+# CHECK: vdpbf16ps %zmm24, %zmm23, %zmm22
+0x62,0x82,0x46,0x40,0x52,0xf0
+
+# CHECK: vdpbf16ps %zmm24, %zmm23, %zmm22 {%k7}
+0x62,0x82,0x46,0x47,0x52,0xf0
+
+# CHECK: vdpbf16ps %zmm24, %zmm23, %zmm22 {%k7} {z}
+0x62,0x82,0x46,0xc7,0x52,0xf0
+
+# CHECK: vdpbf16ps  268435456(%rbp,%r14,8), %zmm23, %zmm22
+0x62,0xa2,0x46,0x40,0x52,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# CHECK: vdpbf16ps  291(%r8,%rax,4), %zmm23, %zmm22 {%k7}
+0x62,0xc2,0x46,0x47,0x52,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# CHECK: vdpbf16ps  (%rip){1to16}, %zmm23, %zmm22
+0x62,0xe2,0x46,0x50,0x52,0x35,0x00,0x00,0x00,0x00
+
+# CHECK: vdpbf16ps  -2048(,%rbp,2), %zmm23, %zmm22
+0x62,0xe2,0x46,0x40,0x52,0x34,0x6d,0x00,0xf8,0xff,0xff
+
+# CHECK: vdpbf16ps  8128(%rcx), %zmm23, %zmm22 {%k7} {z}
+0x62,0xe2,0x46,0xc7,0x52,0x71,0x7f
+
+# CHECK: vdpbf16ps  -512(%rdx){1to16}, %zmm23, %zmm22 {%k7} {z}
+0x62,0xe2,0x46,0xd7,0x52,0x72,0x80
diff --git a/test/MC/Disassembler/X86/x86-64-avx512bf16-intel.txt b/test/MC/Disassembler/X86/x86-64-avx512bf16-intel.txt
new file mode 100644 (file)
index 0000000..8bb3be9
--- /dev/null
@@ -0,0 +1,83 @@
+# RUN: llvm-mc --disassemble %s -triple=x86_64 -x86-asm-syntax=intel --output-asm-variant=1 | FileCheck %s
+
+# CHECK: vcvtne2ps2bf16 zmm22, zmm23, zmm24
+0x62,0x82,0x47,0x40,0x72,0xf0
+
+# CHECK: vcvtne2ps2bf16 zmm22 {k7}, zmm23, zmm24
+0x62,0x82,0x47,0x47,0x72,0xf0
+
+# CHECK: vcvtne2ps2bf16 zmm22 {k7} {z}, zmm23, zmm24
+0x62,0x82,0x47,0xc7,0x72,0xf0
+
+# CHECK: vcvtne2ps2bf16 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa2,0x47,0x40,0x72,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# CHECK: vcvtne2ps2bf16 zmm22 {k7}, zmm23, zmmword ptr [r8 + 4*rax + 291]
+0x62,0xc2,0x47,0x47,0x72,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# CHECK: vcvtne2ps2bf16 zmm22, zmm23, dword ptr [rip]{1to16}
+0x62,0xe2,0x47,0x50,0x72,0x35,0x00,0x00,0x00,0x00
+
+# CHECK: vcvtne2ps2bf16 zmm22, zmm23, zmmword ptr [2*rbp - 2048]
+0x62,0xe2,0x47,0x40,0x72,0x34,0x6d,0x00,0xf8,0xff,0xff
+
+# CHECK: vcvtne2ps2bf16 zmm22 {k7} {z}, zmm23, zmmword ptr [rcx + 8128]
+0x62,0xe2,0x47,0xc7,0x72,0x71,0x7f
+
+# CHECK: vcvtne2ps2bf16 zmm22 {k7} {z}, zmm23, dword ptr [rdx - 512]{1to16}
+0x62,0xe2,0x47,0xd7,0x72,0x72,0x80
+
+# CHECK: vcvtneps2bf16 ymm22, zmm23
+0x62,0xa2,0x7e,0x48,0x72,0xf7
+
+# CHECK: vcvtneps2bf16 ymm22 {k7}, zmm23
+0x62,0xa2,0x7e,0x4f,0x72,0xf7
+
+# CHECK: vcvtneps2bf16 ymm22 {k7} {z}, zmm23
+0x62,0xa2,0x7e,0xcf,0x72,0xf7
+
+# CHECK: vcvtneps2bf16 ymm22, zmmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa2,0x7e,0x48,0x72,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# CHECK: vcvtneps2bf16 ymm22 {k7}, zmmword ptr [r8 + 4*rax + 291]
+0x62,0xc2,0x7e,0x4f,0x72,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# CHECK: vcvtneps2bf16 ymm22, dword ptr [rip]{1to16}
+0x62,0xe2,0x7e,0x58,0x72,0x35,0x00,0x00,0x00,0x00
+
+# CHECK: vcvtneps2bf16 ymm22, zmmword ptr [2*rbp - 2048]
+0x62,0xe2,0x7e,0x48,0x72,0x34,0x6d,0x00,0xf8,0xff,0xff
+
+# CHECK: vcvtneps2bf16 ymm22 {k7} {z}, zmmword ptr [rcx + 8128]
+0x62,0xe2,0x7e,0xcf,0x72,0x71,0x7f
+
+# CHECK: vcvtneps2bf16 ymm22 {k7} {z}, dword ptr [rdx - 512]{1to16}
+0x62,0xe2,0x7e,0xdf,0x72,0x72,0x80
+
+# CHECK: vdpbf16ps zmm22, zmm23, zmm24
+0x62,0x82,0x46,0x40,0x52,0xf0
+
+# CHECK: vdpbf16ps zmm22 {k7}, zmm23, zmm24
+0x62,0x82,0x46,0x47,0x52,0xf0
+
+# CHECK: vdpbf16ps zmm22 {k7} {z}, zmm23, zmm24
+0x62,0x82,0x46,0xc7,0x52,0xf0
+
+# CHECK: vdpbf16ps zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa2,0x46,0x40,0x52,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# CHECK: vdpbf16ps zmm22 {k7}, zmm23, zmmword ptr [r8 + 4*rax + 291]
+0x62,0xc2,0x46,0x47,0x52,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# CHECK: vdpbf16ps zmm22, zmm23, dword ptr [rip]{1to16}
+0x62,0xe2,0x46,0x50,0x52,0x35,0x00,0x00,0x00,0x00
+
+# CHECK: vdpbf16ps zmm22, zmm23, zmmword ptr [2*rbp - 2048]
+0x62,0xe2,0x46,0x40,0x52,0x34,0x6d,0x00,0xf8,0xff,0xff
+
+# CHECK: vdpbf16ps zmm22 {k7} {z}, zmm23, zmmword ptr [rcx + 8128]
+0x62,0xe2,0x46,0xc7,0x52,0x71,0x7f
+
+# CHECK: vdpbf16ps zmm22 {k7} {z}, zmm23, dword ptr [rdx - 512]{1to16}
+0x62,0xe2,0x46,0xd7,0x52,0x72,0x80
+
diff --git a/test/MC/Disassembler/X86/x86-64-avx512bf16vl-att.txt b/test/MC/Disassembler/X86/x86-64-avx512bf16vl-att.txt
new file mode 100644 (file)
index 0000000..e2bfc98
--- /dev/null
@@ -0,0 +1,158 @@
+# RUN: llvm-mc --disassemble %s -triple=x86_64-apple-darwin9 | FileCheck %s
+
+# CHECK: vcvtne2ps2bf16 %ymm24, %ymm23, %ymm22
+0x62,0x82,0x47,0x20,0x72,0xf0
+
+# CHECK: vcvtne2ps2bf16 %ymm24, %ymm23, %ymm22 {%k7}
+0x62,0x82,0x47,0x27,0x72,0xf0
+
+# CHECK: vcvtne2ps2bf16 %ymm24, %ymm23, %ymm22 {%k7} {z}
+0x62,0x82,0x47,0xa7,0x72,0xf0
+
+# CHECK: vcvtne2ps2bf16 %xmm24, %xmm23, %xmm22
+0x62,0x82,0x47,0x00,0x72,0xf0
+
+# CHECK: vcvtne2ps2bf16 %xmm24, %xmm23, %xmm22 {%k7}
+0x62,0x82,0x47,0x07,0x72,0xf0
+
+# CHECK: vcvtne2ps2bf16 %xmm24, %xmm23, %xmm22 {%k7} {z}
+0x62,0x82,0x47,0x87,0x72,0xf0
+
+# CHECK: vcvtne2ps2bf16  268435456(%rbp,%r14,8), %ymm23, %ymm22
+0x62,0xa2,0x47,0x20,0x72,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# CHECK: vcvtne2ps2bf16  291(%r8,%rax,4), %ymm23, %ymm22 {%k7}
+0x62,0xc2,0x47,0x27,0x72,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# CHECK: vcvtne2ps2bf16  (%rip){1to8}, %ymm23, %ymm22
+0x62,0xe2,0x47,0x30,0x72,0x35,0x00,0x00,0x00,0x00
+
+# CHECK: vcvtne2ps2bf16  -1024(,%rbp,2), %ymm23, %ymm22
+0x62,0xe2,0x47,0x20,0x72,0x34,0x6d,0x00,0xfc,0xff,0xff
+
+# CHECK: vcvtne2ps2bf16  4064(%rcx), %ymm23, %ymm22 {%k7} {z}
+0x62,0xe2,0x47,0xa7,0x72,0x71,0x7f
+
+# CHECK: vcvtne2ps2bf16  -512(%rdx){1to8}, %ymm23, %ymm22 {%k7} {z}
+0x62,0xe2,0x47,0xb7,0x72,0x72,0x80
+
+# CHECK: vcvtne2ps2bf16  268435456(%rbp,%r14,8), %xmm23, %xmm22
+0x62,0xa2,0x47,0x00,0x72,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# CHECK: vcvtne2ps2bf16  291(%r8,%rax,4), %xmm23, %xmm22 {%k7}
+0x62,0xc2,0x47,0x07,0x72,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# CHECK: vcvtne2ps2bf16  (%rip){1to4}, %xmm23, %xmm22
+0x62,0xe2,0x47,0x10,0x72,0x35,0x00,0x00,0x00,0x00
+
+# CHECK: vcvtne2ps2bf16  -512(,%rbp,2), %xmm23, %xmm22
+0x62,0xe2,0x47,0x00,0x72,0x34,0x6d,0x00,0xfe,0xff,0xff
+
+# CHECK: vcvtne2ps2bf16  2032(%rcx), %xmm23, %xmm22 {%k7} {z}
+0x62,0xe2,0x47,0x87,0x72,0x71,0x7f
+
+# CHECK: vcvtne2ps2bf16  -512(%rdx){1to4}, %xmm23, %xmm22 {%k7} {z}
+0x62,0xe2,0x47,0x97,0x72,0x72,0x80
+
+# CHECK: vcvtneps2bf16 %xmm23, %xmm22
+0x62,0xa2,0x7e,0x08,0x72,0xf7
+
+# CHECK: vcvtneps2bf16 %xmm23, %xmm22 {%k7}
+0x62,0xa2,0x7e,0x0f,0x72,0xf7
+
+# CHECK: vcvtneps2bf16 %xmm23, %xmm22 {%k7} {z}
+0x62,0xa2,0x7e,0x8f,0x72,0xf7
+
+# CHECK: vcvtneps2bf16 %ymm23, %xmm22
+0x62,0xa2,0x7e,0x28,0x72,0xf7
+
+# CHECK: vcvtneps2bf16 %ymm23, %xmm22 {%k7}
+0x62,0xa2,0x7e,0x2f,0x72,0xf7
+
+# CHECK: vcvtneps2bf16 %ymm23, %xmm22 {%k7} {z}
+0x62,0xa2,0x7e,0xaf,0x72,0xf7
+
+# CHECK: vcvtneps2bf16x  268435456(%rbp,%r14,8), %xmm22
+0x62,0xa2,0x7e,0x08,0x72,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# CHECK: vcvtneps2bf16x  291(%r8,%rax,4), %xmm22 {%k7}
+0x62,0xc2,0x7e,0x0f,0x72,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# CHECK: vcvtneps2bf16  (%rip){1to4}, %xmm22
+0x62,0xe2,0x7e,0x18,0x72,0x35,0x00,0x00,0x00,0x00
+
+# CHECK: vcvtneps2bf16x  -512(,%rbp,2), %xmm22
+0x62,0xe2,0x7e,0x08,0x72,0x34,0x6d,0x00,0xfe,0xff,0xff
+
+# CHECK: vcvtneps2bf16x  2032(%rcx), %xmm22 {%k7} {z}
+0x62,0xe2,0x7e,0x8f,0x72,0x71,0x7f
+
+# CHECK: vcvtneps2bf16  -512(%rdx){1to4}, %xmm22 {%k7} {z}
+0x62,0xe2,0x7e,0x9f,0x72,0x72,0x80
+
+# CHECK: vcvtneps2bf16  (%rip){1to8}, %xmm22
+0x62,0xe2,0x7e,0x38,0x72,0x35,0x00,0x00,0x00,0x00
+
+# CHECK: vcvtneps2bf16y  -1024(,%rbp,2), %xmm22
+0x62,0xe2,0x7e,0x28,0x72,0x34,0x6d,0x00,0xfc,0xff,0xff
+
+# CHECK: vcvtneps2bf16y  4064(%rcx), %xmm22 {%k7} {z}
+0x62,0xe2,0x7e,0xaf,0x72,0x71,0x7f
+
+# CHECK: vcvtneps2bf16  -512(%rdx){1to8}, %xmm22 {%k7} {z}
+0x62,0xe2,0x7e,0xbf,0x72,0x72,0x80
+
+# CHECK: vdpbf16ps %ymm24, %ymm23, %ymm22
+0x62,0x82,0x46,0x20,0x52,0xf0
+
+# CHECK: vdpbf16ps %ymm24, %ymm23, %ymm22 {%k7}
+0x62,0x82,0x46,0x27,0x52,0xf0
+
+# CHECK: vdpbf16ps %ymm24, %ymm23, %ymm22 {%k7} {z}
+0x62,0x82,0x46,0xa7,0x52,0xf0
+
+# CHECK: vdpbf16ps %xmm24, %xmm23, %xmm22
+0x62,0x82,0x46,0x00,0x52,0xf0
+
+# CHECK: vdpbf16ps %xmm24, %xmm23, %xmm22 {%k7}
+0x62,0x82,0x46,0x07,0x52,0xf0
+
+# CHECK: vdpbf16ps %xmm24, %xmm23, %xmm22 {%k7} {z}
+0x62,0x82,0x46,0x87,0x52,0xf0
+
+# CHECK: vdpbf16ps  268435456(%rbp,%r14,8), %ymm23, %ymm22
+0x62,0xa2,0x46,0x20,0x52,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# CHECK: vdpbf16ps  291(%r8,%rax,4), %ymm23, %ymm22 {%k7}
+0x62,0xc2,0x46,0x27,0x52,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# CHECK: vdpbf16ps  (%rip){1to8}, %ymm23, %ymm22
+0x62,0xe2,0x46,0x30,0x52,0x35,0x00,0x00,0x00,0x00
+
+# CHECK: vdpbf16ps  -1024(,%rbp,2), %ymm23, %ymm22
+0x62,0xe2,0x46,0x20,0x52,0x34,0x6d,0x00,0xfc,0xff,0xff
+
+# CHECK: vdpbf16ps  4064(%rcx), %ymm23, %ymm22 {%k7} {z}
+0x62,0xe2,0x46,0xa7,0x52,0x71,0x7f
+
+# CHECK: vdpbf16ps  -512(%rdx){1to8}, %ymm23, %ymm22 {%k7} {z}
+0x62,0xe2,0x46,0xb7,0x52,0x72,0x80
+
+# CHECK: vdpbf16ps  268435456(%rbp,%r14,8), %xmm23, %xmm22
+0x62,0xa2,0x46,0x00,0x52,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# CHECK: vdpbf16ps  291(%r8,%rax,4), %xmm23, %xmm22 {%k7}
+0x62,0xc2,0x46,0x07,0x52,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# CHECK: vdpbf16ps  (%rip){1to4}, %xmm23, %xmm22
+0x62,0xe2,0x46,0x10,0x52,0x35,0x00,0x00,0x00,0x00
+
+# CHECK: vdpbf16ps  -512(,%rbp,2), %xmm23, %xmm22
+0x62,0xe2,0x46,0x00,0x52,0x34,0x6d,0x00,0xfe,0xff,0xff
+
+# CHECK: vdpbf16ps  2032(%rcx), %xmm23, %xmm22 {%k7} {z}
+0x62,0xe2,0x46,0x87,0x52,0x71,0x7f
+
+# CHECK: vdpbf16ps  -512(%rdx){1to4}, %xmm23, %xmm22 {%k7} {z}
+0x62,0xe2,0x46,0x97,0x52,0x72,0x80
+
diff --git a/test/MC/Disassembler/X86/x86-64-avx512bf16vl-intel.txt b/test/MC/Disassembler/X86/x86-64-avx512bf16vl-intel.txt
new file mode 100644 (file)
index 0000000..4def95e
--- /dev/null
@@ -0,0 +1,158 @@
+# RUN: llvm-mc --disassemble %s -triple=x86_64 -x86-asm-syntax=intel --output-asm-variant=1 | FileCheck %s
+
+# CHECK: vcvtne2ps2bf16 ymm22, ymm23, ymm24
+0x62,0x82,0x47,0x20,0x72,0xf0
+
+# CHECK: vcvtne2ps2bf16 ymm22 {k7}, ymm23, ymm24
+0x62,0x82,0x47,0x27,0x72,0xf0
+
+# CHECK: vcvtne2ps2bf16 ymm22 {k7} {z}, ymm23, ymm24
+0x62,0x82,0x47,0xa7,0x72,0xf0
+
+# CHECK: vcvtne2ps2bf16 xmm22, xmm23, xmm24
+0x62,0x82,0x47,0x00,0x72,0xf0
+
+# CHECK: vcvtne2ps2bf16 xmm22 {k7}, xmm23, xmm24
+0x62,0x82,0x47,0x07,0x72,0xf0
+
+# CHECK: vcvtne2ps2bf16 xmm22 {k7} {z}, xmm23, xmm24
+0x62,0x82,0x47,0x87,0x72,0xf0
+
+# CHECK: vcvtne2ps2bf16 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa2,0x47,0x20,0x72,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# CHECK: vcvtne2ps2bf16 ymm22 {k7}, ymm23, ymmword ptr [r8 + 4*rax + 291]
+0x62,0xc2,0x47,0x27,0x72,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# CHECK: vcvtne2ps2bf16 ymm22, ymm23, dword ptr [rip]{1to8}
+0x62,0xe2,0x47,0x30,0x72,0x35,0x00,0x00,0x00,0x00
+
+# CHECK: vcvtne2ps2bf16 ymm22, ymm23, ymmword ptr [2*rbp - 1024]
+0x62,0xe2,0x47,0x20,0x72,0x34,0x6d,0x00,0xfc,0xff,0xff
+
+# CHECK: vcvtne2ps2bf16 ymm22 {k7} {z}, ymm23, ymmword ptr [rcx + 4064]
+0x62,0xe2,0x47,0xa7,0x72,0x71,0x7f
+
+# CHECK: vcvtne2ps2bf16 ymm22 {k7} {z}, ymm23, dword ptr [rdx - 512]{1to8}
+0x62,0xe2,0x47,0xb7,0x72,0x72,0x80
+
+# CHECK: vcvtne2ps2bf16 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa2,0x47,0x00,0x72,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# CHECK: vcvtne2ps2bf16 xmm22 {k7}, xmm23, xmmword ptr [r8 + 4*rax + 291]
+0x62,0xc2,0x47,0x07,0x72,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# CHECK: vcvtne2ps2bf16 xmm22, xmm23, dword ptr [rip]{1to4}
+0x62,0xe2,0x47,0x10,0x72,0x35,0x00,0x00,0x00,0x00
+
+# CHECK: vcvtne2ps2bf16 xmm22, xmm23, xmmword ptr [2*rbp - 512]
+0x62,0xe2,0x47,0x00,0x72,0x34,0x6d,0x00,0xfe,0xff,0xff
+
+# CHECK: vcvtne2ps2bf16 xmm22 {k7} {z}, xmm23, xmmword ptr [rcx + 2032]
+0x62,0xe2,0x47,0x87,0x72,0x71,0x7f
+
+# CHECK: vcvtne2ps2bf16 xmm22 {k7} {z}, xmm23, dword ptr [rdx - 512]{1to4}
+0x62,0xe2,0x47,0x97,0x72,0x72,0x80
+
+# CHECK: vcvtneps2bf16 xmm22, xmm23
+0x62,0xa2,0x7e,0x08,0x72,0xf7
+
+# CHECK: vcvtneps2bf16 xmm22 {k7}, xmm23
+0x62,0xa2,0x7e,0x0f,0x72,0xf7
+
+# CHECK: vcvtneps2bf16 xmm22 {k7} {z}, xmm23
+0x62,0xa2,0x7e,0x8f,0x72,0xf7
+
+# CHECK: vcvtneps2bf16 xmm22, ymm23
+0x62,0xa2,0x7e,0x28,0x72,0xf7
+
+# CHECK: vcvtneps2bf16 xmm22 {k7}, ymm23
+0x62,0xa2,0x7e,0x2f,0x72,0xf7
+
+# CHECK: vcvtneps2bf16 xmm22 {k7} {z}, ymm23
+0x62,0xa2,0x7e,0xaf,0x72,0xf7
+
+# CHECK: vcvtneps2bf16 xmm22, xmmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa2,0x7e,0x08,0x72,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# CHECK: vcvtneps2bf16 xmm22 {k7}, xmmword ptr [r8 + 4*rax + 291]
+0x62,0xc2,0x7e,0x0f,0x72,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# CHECK: vcvtneps2bf16 xmm22, dword ptr [rip]{1to4}
+0x62,0xe2,0x7e,0x18,0x72,0x35,0x00,0x00,0x00,0x00
+
+# CHECK: vcvtneps2bf16 xmm22, xmmword ptr [2*rbp - 512]
+0x62,0xe2,0x7e,0x08,0x72,0x34,0x6d,0x00,0xfe,0xff,0xff
+
+# CHECK: vcvtneps2bf16 xmm22 {k7} {z}, xmmword ptr [rcx + 2032]
+0x62,0xe2,0x7e,0x8f,0x72,0x71,0x7f
+
+# CHECK: vcvtneps2bf16 xmm22 {k7} {z}, dword ptr [rdx - 512]{1to4}
+0x62,0xe2,0x7e,0x9f,0x72,0x72,0x80
+
+# CHECK: vcvtneps2bf16 xmm22, dword ptr [rip]{1to8}
+0x62,0xe2,0x7e,0x38,0x72,0x35,0x00,0x00,0x00,0x00
+
+# CHECK: vcvtneps2bf16 xmm22, ymmword ptr [2*rbp - 1024]
+0x62,0xe2,0x7e,0x28,0x72,0x34,0x6d,0x00,0xfc,0xff,0xff
+
+# CHECK: vcvtneps2bf16 xmm22 {k7} {z}, ymmword ptr [rcx + 4064]
+0x62,0xe2,0x7e,0xaf,0x72,0x71,0x7f
+
+# CHECK: vcvtneps2bf16 xmm22 {k7} {z}, dword ptr [rdx - 512]{1to8}
+0x62,0xe2,0x7e,0xbf,0x72,0x72,0x80
+
+# CHECK: vdpbf16ps ymm22, ymm23, ymm24
+0x62,0x82,0x46,0x20,0x52,0xf0
+
+# CHECK: vdpbf16ps ymm22 {k7}, ymm23, ymm24
+0x62,0x82,0x46,0x27,0x52,0xf0
+
+# CHECK: vdpbf16ps ymm22 {k7} {z}, ymm23, ymm24
+0x62,0x82,0x46,0xa7,0x52,0xf0
+
+# CHECK: vdpbf16ps xmm22, xmm23, xmm24
+0x62,0x82,0x46,0x00,0x52,0xf0
+
+# CHECK: vdpbf16ps xmm22 {k7}, xmm23, xmm24
+0x62,0x82,0x46,0x07,0x52,0xf0
+
+# CHECK: vdpbf16ps xmm22 {k7} {z}, xmm23, xmm24
+0x62,0x82,0x46,0x87,0x52,0xf0
+
+# CHECK: vdpbf16ps ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa2,0x46,0x20,0x52,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# CHECK: vdpbf16ps ymm22 {k7}, ymm23, ymmword ptr [r8 + 4*rax + 291]
+0x62,0xc2,0x46,0x27,0x52,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# CHECK: vdpbf16ps ymm22, ymm23, dword ptr [rip]{1to8}
+0x62,0xe2,0x46,0x30,0x52,0x35,0x00,0x00,0x00,0x00
+
+# CHECK: vdpbf16ps ymm22, ymm23, ymmword ptr [2*rbp - 1024]
+0x62,0xe2,0x46,0x20,0x52,0x34,0x6d,0x00,0xfc,0xff,0xff
+
+# CHECK: vdpbf16ps ymm22 {k7} {z}, ymm23, ymmword ptr [rcx + 4064]
+0x62,0xe2,0x46,0xa7,0x52,0x71,0x7f
+
+# CHECK: vdpbf16ps ymm22 {k7} {z}, ymm23, dword ptr [rdx - 512]{1to8}
+0x62,0xe2,0x46,0xb7,0x52,0x72,0x80
+
+# CHECK: vdpbf16ps xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa2,0x46,0x00,0x52,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# CHECK: vdpbf16ps xmm22 {k7}, xmm23, xmmword ptr [r8 + 4*rax + 291]
+0x62,0xc2,0x46,0x07,0x52,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# CHECK: vdpbf16ps xmm22, xmm23, dword ptr [rip]{1to4}
+0x62,0xe2,0x46,0x10,0x52,0x35,0x00,0x00,0x00,0x00
+
+# CHECK: vdpbf16ps xmm22, xmm23, xmmword ptr [2*rbp - 512]
+0x62,0xe2,0x46,0x00,0x52,0x34,0x6d,0x00,0xfe,0xff,0xff
+
+# CHECK: vdpbf16ps xmm22 {k7} {z}, xmm23, xmmword ptr [rcx + 2032]
+0x62,0xe2,0x46,0x87,0x52,0x71,0x7f
+
+# CHECK: vdpbf16ps xmm22 {k7} {z}, xmm23, dword ptr [rdx - 512]{1to4}
+0x62,0xe2,0x46,0x97,0x52,0x72,0x80
+
diff --git a/test/MC/X86/avx512_bf16-encoding.s b/test/MC/X86/avx512_bf16-encoding.s
new file mode 100644 (file)
index 0000000..675bc33
--- /dev/null
@@ -0,0 +1,90 @@
+// RUN: llvm-mc -triple i686-unknown-unknown --show-encoding < %s | FileCheck %s
+
+// CHECK: vcvtne2ps2bf16 %zmm4, %zmm5, %zmm6
+// CHECK: encoding: [0x62,0xf2,0x57,0x48,0x72,0xf4]
+          vcvtne2ps2bf16 %zmm4, %zmm5, %zmm6
+
+// CHECK: vcvtne2ps2bf16 %zmm4, %zmm5, %zmm6 {%k7}
+// CHECK: encoding: [0x62,0xf2,0x57,0x4f,0x72,0xf4]
+          vcvtne2ps2bf16 %zmm4, %zmm5, %zmm6 {%k7}
+
+// CHECK: vcvtne2ps2bf16 %zmm4, %zmm5, %zmm6 {%k7} {z}
+// CHECK: encoding: [0x62,0xf2,0x57,0xcf,0x72,0xf4]
+          vcvtne2ps2bf16 %zmm4, %zmm5, %zmm6 {%k7} {z}
+
+// CHECK: vcvtne2ps2bf16   (%ecx), %zmm5, %zmm6
+// CHECK: encoding: [0x62,0xf2,0x57,0x48,0x72,0x31]
+          vcvtne2ps2bf16   (%ecx), %zmm5, %zmm6
+
+// CHECK: vcvtne2ps2bf16   291(%esp,%esi,8), %zmm5, %zmm6
+// CHECK: encoding: [0x62,0xf2,0x57,0x48,0x72,0xb4,0xf4,0x23,0x01,0x00,0x00]
+          vcvtne2ps2bf16   291(%esp,%esi,8), %zmm5, %zmm6
+
+// CHECK: vcvtne2ps2bf16   268435456(%esp,%esi,8), %zmm5, %zmm6
+// CHECK: encoding: [0x62,0xf2,0x57,0x48,0x72,0xb4,0xf4,0x00,0x00,0x00,0x10]
+          vcvtne2ps2bf16   268435456(%esp,%esi,8), %zmm5, %zmm6
+
+// CHECK: vcvtne2ps2bf16   -64(%esp), %zmm5, %zmm6
+// CHECK: encoding: [0x62,0xf2,0x57,0x48,0x72,0x74,0x24,0xff]
+          vcvtne2ps2bf16   -64(%esp), %zmm5, %zmm6
+
+// CHECK: vcvtne2ps2bf16   (%eax){1to16}, %zmm5, %zmm6
+// CHECK: encoding: [0x62,0xf2,0x57,0x58,0x72,0x30]
+          vcvtne2ps2bf16   (%eax){1to16}, %zmm5, %zmm6
+
+// CHECK: vcvtne2ps2bf16   8128(%edx), %zmm5, %zmm6
+// CHECK: encoding: [0x62,0xf2,0x57,0x48,0x72,0x72,0x7f]
+          vcvtne2ps2bf16   8128(%edx), %zmm5, %zmm6
+
+// CHECK: vcvtne2ps2bf16   -8192(%edx), %zmm5, %zmm6
+// CHECK: encoding: [0x62,0xf2,0x57,0x48,0x72,0x72,0x80]
+          vcvtne2ps2bf16   -8192(%edx), %zmm5, %zmm6
+
+// CHECK: vcvtne2ps2bf16   508(%edx){1to16}, %zmm5, %zmm6
+// CHECK: encoding: [0x62,0xf2,0x57,0x58,0x72,0x72,0x7f]
+          vcvtne2ps2bf16   508(%edx){1to16}, %zmm5, %zmm6
+
+// CHECK: vcvtne2ps2bf16   -512(%edx){1to16}, %zmm5, %zmm6
+// CHECK: encoding: [0x62,0xf2,0x57,0x58,0x72,0x72,0x80]
+          vcvtne2ps2bf16   -512(%edx){1to16}, %zmm5, %zmm6
+
+// CHECK: vcvtneps2bf16 %zmm5, %ymm6
+// CHECK: encoding: [0x62,0xf2,0x7e,0x48,0x72,0xf5]
+          vcvtneps2bf16 %zmm5, %ymm6
+
+// CHECK: vcvtneps2bf16   268435456(%esp,%esi,8), %ymm6 {%k7}
+// CHECK: encoding: [0x62,0xf2,0x7e,0x4f,0x72,0xb4,0xf4,0x00,0x00,0x00,0x10]
+          vcvtneps2bf16   268435456(%esp,%esi,8), %ymm6 {%k7}
+
+// CHECK: vcvtneps2bf16   (%ecx){1to16}, %ymm6
+// CHECK: encoding: [0x62,0xf2,0x7e,0x58,0x72,0x31]
+          vcvtneps2bf16   (%ecx){1to16}, %ymm6
+
+// CHECK: vcvtneps2bf16   8128(%ecx), %ymm6
+// CHECK: encoding: [0x62,0xf2,0x7e,0x48,0x72,0x71,0x7f]
+          vcvtneps2bf16   8128(%ecx), %ymm6
+
+// CHECK: vcvtneps2bf16   -512(%edx){1to16}, %ymm6 {%k7} {z}
+// CHECK: encoding: [0x62,0xf2,0x7e,0xdf,0x72,0x72,0x80]
+          vcvtneps2bf16   -512(%edx){1to16}, %ymm6 {%k7} {z}
+
+// CHECK: vdpbf16ps %zmm4, %zmm5, %zmm6
+// CHECK: encoding: [0x62,0xf2,0x56,0x48,0x52,0xf4]
+          vdpbf16ps %zmm4, %zmm5, %zmm6
+
+// CHECK: vdpbf16ps   268435456(%esp,%esi,8), %zmm5, %zmm6 {%k7}
+// CHECK: encoding: [0x62,0xf2,0x56,0x4f,0x52,0xb4,0xf4,0x00,0x00,0x00,0x10]
+          vdpbf16ps   268435456(%esp,%esi,8), %zmm5, %zmm6 {%k7}
+
+// CHECK: vdpbf16ps   (%ecx){1to16}, %zmm5, %zmm6
+// CHECK: encoding: [0x62,0xf2,0x56,0x58,0x52,0x31]
+          vdpbf16ps   (%ecx){1to16}, %zmm5, %zmm6
+
+// CHECK: vdpbf16ps   8128(%ecx), %zmm5, %zmm6
+// CHECK: encoding: [0x62,0xf2,0x56,0x48,0x52,0x71,0x7f]
+          vdpbf16ps   8128(%ecx), %zmm5, %zmm6
+
+// CHECK: vdpbf16ps   -512(%edx){1to16}, %zmm5, %zmm6 {%k7} {z}
+// CHECK: encoding: [0x62,0xf2,0x56,0xdf,0x52,0x72,0x80]
+          vdpbf16ps   -512(%edx){1to16}, %zmm5, %zmm6 {%k7} {z}
+
diff --git a/test/MC/X86/avx512_bf16_vl-encoding.s b/test/MC/X86/avx512_bf16_vl-encoding.s
new file mode 100644 (file)
index 0000000..ffa42e2
--- /dev/null
@@ -0,0 +1,170 @@
+// RUN: llvm-mc -triple i686-unknown-unknown --show-encoding < %s | FileCheck %s
+
+// CHECK: vcvtne2ps2bf16 %xmm4, %xmm5, %xmm6 {%k7}
+// CHECK: encoding: [0x62,0xf2,0x57,0x0f,0x72,0xf4]
+          vcvtne2ps2bf16 %xmm4, %xmm5, %xmm6 {%k7}
+
+// CHECK: vcvtne2ps2bf16 %xmm4, %xmm5, %xmm6 {%k7} {z}
+// CHECK: encoding: [0x62,0xf2,0x57,0x8f,0x72,0xf4]
+          vcvtne2ps2bf16 %xmm4, %xmm5, %xmm6 {%k7} {z}
+
+// CHECK: vcvtne2ps2bf16   (%ecx), %xmm5, %xmm6 {%k7}
+// CHECK: encoding: [0x62,0xf2,0x57,0x0f,0x72,0x31]
+          vcvtne2ps2bf16   (%ecx), %xmm5, %xmm6 {%k7}
+
+// CHECK: vcvtne2ps2bf16   291(%esp,%esi,8), %xmm5, %xmm6 {%k7}
+// CHECK: encoding: [0x62,0xf2,0x57,0x0f,0x72,0xb4,0xf4,0x23,0x01,0x00,0x00]
+          vcvtne2ps2bf16   291(%esp,%esi,8), %xmm5, %xmm6 {%k7}
+
+// CHECK: vcvtne2ps2bf16   268435456(%esp,%esi,8), %xmm5, %xmm6 {%k7}
+// CHECK: encoding: [0x62,0xf2,0x57,0x0f,0x72,0xb4,0xf4,0x00,0x00,0x00,0x10]
+          vcvtne2ps2bf16   268435456(%esp,%esi,8), %xmm5, %xmm6 {%k7}
+
+// CHECK: vcvtne2ps2bf16   -16(%esp), %xmm5, %xmm6 {%k7}
+// CHECK: encoding: [0x62,0xf2,0x57,0x0f,0x72,0x74,0x24,0xff]
+          vcvtne2ps2bf16   -16(%esp), %xmm5, %xmm6 {%k7}
+
+// CHECK: vcvtne2ps2bf16   (%eax){1to4}, %xmm5, %xmm6 {%k7}
+// CHECK: encoding: [0x62,0xf2,0x57,0x1f,0x72,0x30]
+          vcvtne2ps2bf16   (%eax){1to4}, %xmm5, %xmm6 {%k7}
+
+// CHECK: vcvtne2ps2bf16   2032(%edx), %xmm5, %xmm6 {%k7}
+// CHECK: encoding: [0x62,0xf2,0x57,0x0f,0x72,0x72,0x7f]
+          vcvtne2ps2bf16   2032(%edx), %xmm5, %xmm6 {%k7}
+
+// CHECK: vcvtne2ps2bf16   -2048(%edx), %xmm5, %xmm6 {%k7}
+// CHECK: encoding: [0x62,0xf2,0x57,0x0f,0x72,0x72,0x80]
+          vcvtne2ps2bf16   -2048(%edx), %xmm5, %xmm6 {%k7}
+
+// CHECK: vcvtne2ps2bf16   508(%edx){1to4}, %xmm5, %xmm6 {%k7}
+// CHECK: encoding: [0x62,0xf2,0x57,0x1f,0x72,0x72,0x7f]
+          vcvtne2ps2bf16   508(%edx){1to4}, %xmm5, %xmm6 {%k7}
+
+// CHECK: vcvtne2ps2bf16   -512(%edx){1to4}, %xmm5, %xmm6 {%k7}
+// CHECK: encoding: [0x62,0xf2,0x57,0x1f,0x72,0x72,0x80]
+          vcvtne2ps2bf16   -512(%edx){1to4}, %xmm5, %xmm6 {%k7}
+
+// CHECK: vcvtne2ps2bf16 %ymm4, %ymm5, %ymm6 {%k7}
+// CHECK: encoding: [0x62,0xf2,0x57,0x2f,0x72,0xf4]
+          vcvtne2ps2bf16 %ymm4, %ymm5, %ymm6 {%k7}
+
+// CHECK: vcvtne2ps2bf16 %ymm4, %ymm5, %ymm6 {%k7} {z}
+// CHECK: encoding: [0x62,0xf2,0x57,0xaf,0x72,0xf4]
+          vcvtne2ps2bf16 %ymm4, %ymm5, %ymm6 {%k7} {z}
+
+// CHECK: vcvtne2ps2bf16   (%ecx), %ymm5, %ymm6 {%k7}
+// CHECK: encoding: [0x62,0xf2,0x57,0x2f,0x72,0x31]
+          vcvtne2ps2bf16   (%ecx), %ymm5, %ymm6 {%k7}
+
+// CHECK: vcvtne2ps2bf16   291(%esp,%esi,8), %ymm5, %ymm6 {%k7}
+// CHECK: encoding: [0x62,0xf2,0x57,0x2f,0x72,0xb4,0xf4,0x23,0x01,0x00,0x00]
+          vcvtne2ps2bf16   291(%esp,%esi,8), %ymm5, %ymm6 {%k7}
+
+// CHECK: vcvtne2ps2bf16   268435456(%esp,%esi,8), %ymm5, %ymm6 {%k7}
+// CHECK: encoding: [0x62,0xf2,0x57,0x2f,0x72,0xb4,0xf4,0x00,0x00,0x00,0x10]
+          vcvtne2ps2bf16   268435456(%esp,%esi,8), %ymm5, %ymm6 {%k7}
+
+// CHECK: vcvtne2ps2bf16   -32(%esp), %ymm5, %ymm6 {%k7}
+// CHECK: encoding: [0x62,0xf2,0x57,0x2f,0x72,0x74,0x24,0xff]
+          vcvtne2ps2bf16   -32(%esp), %ymm5, %ymm6 {%k7}
+
+// CHECK: vcvtne2ps2bf16   (%eax){1to8}, %ymm5, %ymm6 {%k7}
+// CHECK: encoding: [0x62,0xf2,0x57,0x3f,0x72,0x30]
+          vcvtne2ps2bf16   (%eax){1to8}, %ymm5, %ymm6 {%k7}
+
+// CHECK: vcvtne2ps2bf16   4064(%edx), %ymm5, %ymm6 {%k7}
+// CHECK: encoding: [0x62,0xf2,0x57,0x2f,0x72,0x72,0x7f]
+          vcvtne2ps2bf16   4064(%edx), %ymm5, %ymm6 {%k7}
+
+// CHECK: vcvtne2ps2bf16   -4096(%edx), %ymm5, %ymm6 {%k7}
+// CHECK: encoding: [0x62,0xf2,0x57,0x2f,0x72,0x72,0x80]
+          vcvtne2ps2bf16   -4096(%edx), %ymm5, %ymm6 {%k7}
+
+// CHECK: vcvtne2ps2bf16   508(%edx){1to8}, %ymm5, %ymm6 {%k7}
+// CHECK: encoding: [0x62,0xf2,0x57,0x3f,0x72,0x72,0x7f]
+          vcvtne2ps2bf16   508(%edx){1to8}, %ymm5, %ymm6 {%k7}
+
+// CHECK: vcvtne2ps2bf16   -512(%edx){1to8}, %ymm5, %ymm6 {%k7}
+// CHECK: encoding: [0x62,0xf2,0x57,0x3f,0x72,0x72,0x80]
+          vcvtne2ps2bf16   -512(%edx){1to8}, %ymm5, %ymm6 {%k7}
+
+// CHECK: vcvtneps2bf16 %xmm5, %xmm6
+// CHECK: encoding: [0x62,0xf2,0x7e,0x08,0x72,0xf5]
+          vcvtneps2bf16 %xmm5, %xmm6
+
+// CHECK: vcvtneps2bf16x  268435456(%esp,%esi,8), %xmm6 {%k7}
+// CHECK: encoding: [0x62,0xf2,0x7e,0x0f,0x72,0xb4,0xf4,0x00,0x00,0x00,0x10]
+          vcvtneps2bf16x  268435456(%esp,%esi,8), %xmm6 {%k7}
+
+// CHECK: vcvtneps2bf16   (%ecx){1to4}, %xmm6
+// CHECK: encoding: [0x62,0xf2,0x7e,0x18,0x72,0x31]
+          vcvtneps2bf16   (%ecx){1to4}, %xmm6
+
+// CHECK: vcvtneps2bf16x  2032(%ecx), %xmm6
+// CHECK: encoding: [0x62,0xf2,0x7e,0x08,0x72,0x71,0x7f]
+          vcvtneps2bf16x  2032(%ecx), %xmm6
+
+// CHECK: vcvtneps2bf16   -512(%edx){1to4}, %xmm6 {%k7} {z}
+// CHECK: encoding: [0x62,0xf2,0x7e,0x9f,0x72,0x72,0x80]
+          vcvtneps2bf16   -512(%edx){1to4}, %xmm6 {%k7} {z}
+
+// CHECK: vcvtneps2bf16 %ymm5, %xmm6
+// CHECK: encoding: [0x62,0xf2,0x7e,0x28,0x72,0xf5]
+          vcvtneps2bf16 %ymm5, %xmm6
+
+// CHECK: vcvtneps2bf16y  268435456(%esp,%esi,8), %xmm6 {%k7}
+// CHECK: encoding: [0x62,0xf2,0x7e,0x2f,0x72,0xb4,0xf4,0x00,0x00,0x00,0x10]
+          vcvtneps2bf16y  268435456(%esp,%esi,8), %xmm6 {%k7}
+
+// CHECK: vcvtneps2bf16   (%ecx){1to8}, %xmm6
+// CHECK: encoding: [0x62,0xf2,0x7e,0x38,0x72,0x31]
+          vcvtneps2bf16   (%ecx){1to8}, %xmm6
+
+// CHECK: vcvtneps2bf16y  4064(%ecx), %xmm6
+// CHECK: encoding: [0x62,0xf2,0x7e,0x28,0x72,0x71,0x7f]
+          vcvtneps2bf16y  4064(%ecx), %xmm6
+
+// CHECK: vcvtneps2bf16   -512(%edx){1to8}, %xmm6 {%k7} {z}
+// CHECK: encoding: [0x62,0xf2,0x7e,0xbf,0x72,0x72,0x80]
+          vcvtneps2bf16   -512(%edx){1to8}, %xmm6 {%k7} {z}
+
+// CHECK: vdpbf16ps %ymm4, %ymm5, %ymm6
+// CHECK: encoding: [0x62,0xf2,0x56,0x28,0x52,0xf4]
+          vdpbf16ps %ymm4, %ymm5, %ymm6
+
+// CHECK: vdpbf16ps   268435456(%esp,%esi,8), %ymm5, %ymm6 {%k7}
+// CHECK: encoding: [0x62,0xf2,0x56,0x2f,0x52,0xb4,0xf4,0x00,0x00,0x00,0x10]
+          vdpbf16ps   268435456(%esp,%esi,8), %ymm5, %ymm6 {%k7}
+
+// CHECK: vdpbf16ps   (%ecx){1to8}, %ymm5, %ymm6
+// CHECK: encoding: [0x62,0xf2,0x56,0x38,0x52,0x31]
+          vdpbf16ps   (%ecx){1to8}, %ymm5, %ymm6
+
+// CHECK: vdpbf16ps   4064(%ecx), %ymm5, %ymm6
+// CHECK: encoding: [0x62,0xf2,0x56,0x28,0x52,0x71,0x7f]
+          vdpbf16ps   4064(%ecx), %ymm5, %ymm6
+
+// CHECK: vdpbf16ps   -512(%edx){1to8}, %ymm5, %ymm6 {%k7} {z}
+// CHECK: encoding: [0x62,0xf2,0x56,0xbf,0x52,0x72,0x80]
+          vdpbf16ps   -512(%edx){1to8}, %ymm5, %ymm6 {%k7} {z}
+
+// CHECK: vdpbf16ps %xmm4, %xmm5, %xmm6
+// CHECK: encoding: [0x62,0xf2,0x56,0x08,0x52,0xf4]
+          vdpbf16ps %xmm4, %xmm5, %xmm6
+
+// CHECK: vdpbf16ps   268435456(%esp,%esi,8), %xmm5, %xmm6 {%k7}
+// CHECK: encoding: [0x62,0xf2,0x56,0x0f,0x52,0xb4,0xf4,0x00,0x00,0x00,0x10]
+          vdpbf16ps   268435456(%esp,%esi,8), %xmm5, %xmm6 {%k7}
+
+// CHECK: vdpbf16ps   (%ecx){1to4}, %xmm5, %xmm6
+// CHECK: encoding: [0x62,0xf2,0x56,0x18,0x52,0x31]
+          vdpbf16ps   (%ecx){1to4}, %xmm5, %xmm6
+
+// CHECK: vdpbf16ps   2032(%ecx), %xmm5, %xmm6
+// CHECK: encoding: [0x62,0xf2,0x56,0x08,0x52,0x71,0x7f]
+          vdpbf16ps   2032(%ecx), %xmm5, %xmm6
+
+// CHECK: vdpbf16ps   -512(%edx){1to4}, %xmm5, %xmm6 {%k7} {z}
+// CHECK: encoding: [0x62,0xf2,0x56,0x9f,0x52,0x72,0x80]
+          vdpbf16ps   -512(%edx){1to4}, %xmm5, %xmm6 {%k7} {z}
+
diff --git a/test/MC/X86/intel-syntax-avx512_bf16.s b/test/MC/X86/intel-syntax-avx512_bf16.s
new file mode 100644 (file)
index 0000000..3c9fca9
--- /dev/null
@@ -0,0 +1,90 @@
+// RUN: llvm-mc -triple i686-unknown-unknown -x86-asm-syntax=intel -output-asm-variant=1 --show-encoding %s | FileCheck %s
+
+// CHECK: vcvtne2ps2bf16 zmm6, zmm5, zmm4
+// CHECK: encoding: [0x62,0xf2,0x57,0x48,0x72,0xf4]
+          vcvtne2ps2bf16 zmm6, zmm5, zmm4
+
+// CHECK: vcvtne2ps2bf16 zmm6 {k7}, zmm5, zmm4
+// CHECK: encoding: [0x62,0xf2,0x57,0x4f,0x72,0xf4]
+          vcvtne2ps2bf16 zmm6 {k7}, zmm5, zmm4
+
+// CHECK: vcvtne2ps2bf16 zmm6 {k7} {z}, zmm5, zmm4
+// CHECK: encoding: [0x62,0xf2,0x57,0xcf,0x72,0xf4]
+          vcvtne2ps2bf16 zmm6 {k7} {z}, zmm5, zmm4
+
+// CHECK: vcvtne2ps2bf16 zmm6, zmm5, zmmword ptr [ecx]
+// CHECK: encoding: [0x62,0xf2,0x57,0x48,0x72,0x31]
+          vcvtne2ps2bf16 zmm6, zmm5, zmmword ptr [ecx]
+
+// CHECK: vcvtne2ps2bf16 zmm6, zmm5, zmmword ptr [esp + 8*esi + 291]
+// CHECK: encoding: [0x62,0xf2,0x57,0x48,0x72,0xb4,0xf4,0x23,0x01,0x00,0x00]
+          vcvtne2ps2bf16 zmm6, zmm5, zmmword ptr [esp + 8*esi + 291]
+
+// CHECK: vcvtne2ps2bf16 zmm6, zmm5, zmmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf2,0x57,0x48,0x72,0xb4,0xf4,0x00,0x00,0x00,0x10]
+          vcvtne2ps2bf16 zmm6, zmm5, zmmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vcvtne2ps2bf16 zmm6, zmm5, zmmword ptr [esp - 4]
+// CHECK: encoding: [0x62,0xf2,0x57,0x48,0x72,0xb4,0x24,0xfc,0xff,0xff,0xff]
+          vcvtne2ps2bf16 zmm6, zmm5, zmmword ptr [esp - 4]
+
+// CHECK: vcvtne2ps2bf16 zmm6, zmm5, dword ptr [eax]{1to16}
+// CHECK: encoding: [0x62,0xf2,0x57,0x58,0x72,0x30]
+          vcvtne2ps2bf16 zmm6, zmm5, dword ptr [eax]{1to16}
+
+// CHECK: vcvtne2ps2bf16 zmm6, zmm5, zmmword ptr [edx + 8128]
+// CHECK: encoding: [0x62,0xf2,0x57,0x48,0x72,0x72,0x7f]
+          vcvtne2ps2bf16 zmm6, zmm5, zmmword ptr [edx + 8128]
+
+// CHECK: vcvtne2ps2bf16 zmm6, zmm5, zmmword ptr [edx - 8192]
+// CHECK: encoding: [0x62,0xf2,0x57,0x48,0x72,0x72,0x80]
+          vcvtne2ps2bf16 zmm6, zmm5, zmmword ptr [edx - 8192]
+
+// CHECK: vcvtne2ps2bf16 zmm6, zmm5, dword ptr [edx + 508]{1to16}
+// CHECK: encoding: [0x62,0xf2,0x57,0x58,0x72,0x72,0x7f]
+          vcvtne2ps2bf16 zmm6, zmm5, dword ptr [edx + 508]{1to16}
+
+// CHECK: vcvtne2ps2bf16 zmm6, zmm5, dword ptr [edx - 512]{1to16}
+// CHECK: encoding: [0x62,0xf2,0x57,0x58,0x72,0x72,0x80]
+          vcvtne2ps2bf16 zmm6, zmm5, dword ptr [edx - 512]{1to16}
+
+// CHECK: vcvtneps2bf16 ymm6, zmm5
+// CHECK: encoding: [0x62,0xf2,0x7e,0x48,0x72,0xf5]
+          vcvtneps2bf16 ymm6, zmm5
+
+// CHECK: vcvtneps2bf16 ymm6 {k7}, zmmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf2,0x7e,0x4f,0x72,0xb4,0xf4,0x00,0x00,0x00,0x10]
+          vcvtneps2bf16 ymm6 {k7}, zmmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vcvtneps2bf16 ymm6, dword ptr [ecx]{1to16}
+// CHECK: encoding: [0x62,0xf2,0x7e,0x58,0x72,0x31]
+          vcvtneps2bf16 ymm6, dword ptr [ecx]{1to16}
+
+// CHECK: vcvtneps2bf16 ymm6, zmmword ptr [ecx + 8128]
+// CHECK: encoding: [0x62,0xf2,0x7e,0x48,0x72,0x71,0x7f]
+          vcvtneps2bf16 ymm6, zmmword ptr [ecx + 8128]
+
+// CHECK: vcvtneps2bf16 ymm6 {k7} {z}, dword ptr [edx - 512]{1to16}
+// CHECK: encoding: [0x62,0xf2,0x7e,0xdf,0x72,0x72,0x80]
+          vcvtneps2bf16 ymm6 {k7} {z}, dword ptr [edx - 512]{1to16}
+
+// CHECK: vdpbf16ps zmm6, zmm5, zmm4
+// CHECK: encoding: [0x62,0xf2,0x56,0x48,0x52,0xf4]
+          vdpbf16ps zmm6, zmm5, zmm4
+
+// CHECK: vdpbf16ps zmm6 {k7}, zmm5, zmmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf2,0x56,0x4f,0x52,0xb4,0xf4,0x00,0x00,0x00,0x10]
+          vdpbf16ps zmm6 {k7}, zmm5, zmmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vdpbf16ps zmm6, zmm5, dword ptr [ecx]{1to16}
+// CHECK: encoding: [0x62,0xf2,0x56,0x58,0x52,0x31]
+          vdpbf16ps zmm6, zmm5, dword ptr [ecx]{1to16}
+
+// CHECK: vdpbf16ps zmm6, zmm5, zmmword ptr [ecx + 8128]
+// CHECK: encoding: [0x62,0xf2,0x56,0x48,0x52,0x71,0x7f]
+          vdpbf16ps zmm6, zmm5, zmmword ptr [ecx + 8128]
+
+// CHECK: vdpbf16ps zmm6 {k7} {z}, zmm5, dword ptr [edx - 512]{1to16}
+// CHECK: encoding: [0x62,0xf2,0x56,0xdf,0x52,0x72,0x80]
+          vdpbf16ps zmm6 {k7} {z}, zmm5, dword ptr [edx - 512]{1to16}
+
diff --git a/test/MC/X86/intel-syntax-avx512_bf16_vl.s b/test/MC/X86/intel-syntax-avx512_bf16_vl.s
new file mode 100644 (file)
index 0000000..d0310fd
--- /dev/null
@@ -0,0 +1,170 @@
+// RUN: llvm-mc -triple i686-unknown-unknown -x86-asm-syntax=intel -output-asm-variant=1 --show-encoding %s | FileCheck %s
+
+// CHECK: vcvtne2ps2bf16 xmm6 {k7}, xmm5, xmm4
+// CHECK: encoding: [0x62,0xf2,0x57,0x0f,0x72,0xf4]
+          vcvtne2ps2bf16 xmm6 {k7}, xmm5, xmm4
+
+// CHECK: vcvtne2ps2bf16 xmm6 {k7} {z}, xmm5, xmm4
+// CHECK: encoding: [0x62,0xf2,0x57,0x8f,0x72,0xf4]
+          vcvtne2ps2bf16 xmm6 {k7} {z}, xmm5, xmm4
+
+// CHECK: vcvtne2ps2bf16 xmm6 {k7}, xmm5, xmmword ptr [ecx]
+// CHECK: encoding: [0x62,0xf2,0x57,0x0f,0x72,0x31]
+          vcvtne2ps2bf16 xmm6 {k7}, xmm5, xmmword ptr [ecx]
+
+// CHECK: vcvtne2ps2bf16 xmm6 {k7}, xmm5, xmmword ptr [esp + 8*esi + 291]
+// CHECK: encoding: [0x62,0xf2,0x57,0x0f,0x72,0xb4,0xf4,0x23,0x01,0x00,0x00]
+          vcvtne2ps2bf16 xmm6 {k7}, xmm5, xmmword ptr [esp + 8*esi + 291]
+
+// CHECK: vcvtne2ps2bf16 xmm6 {k7}, xmm5, xmmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf2,0x57,0x0f,0x72,0xb4,0xf4,0x00,0x00,0x00,0x10]
+          vcvtne2ps2bf16 xmm6 {k7}, xmm5, xmmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vcvtne2ps2bf16 xmm6 {k7}, xmm5, xmmword ptr [esp - 4]
+// CHECK: encoding: [0x62,0xf2,0x57,0x0f,0x72,0xb4,0x24,0xfc,0xff,0xff,0xff]
+          vcvtne2ps2bf16 xmm6 {k7}, xmm5, xmmword ptr [esp - 4]
+
+// CHECK: vcvtne2ps2bf16 xmm6 {k7}, xmm5, dword ptr [eax]{1to4}
+// CHECK: encoding: [0x62,0xf2,0x57,0x1f,0x72,0x30]
+          vcvtne2ps2bf16 xmm6 {k7}, xmm5, dword ptr [eax]{1to4}
+
+// CHECK: vcvtne2ps2bf16 xmm6 {k7}, xmm5, xmmword ptr [edx + 2032]
+// CHECK: encoding: [0x62,0xf2,0x57,0x0f,0x72,0x72,0x7f]
+          vcvtne2ps2bf16 xmm6 {k7}, xmm5, xmmword ptr [edx + 2032]
+
+// CHECK: vcvtne2ps2bf16 xmm6 {k7}, xmm5, xmmword ptr [edx - 2048]
+// CHECK: encoding: [0x62,0xf2,0x57,0x0f,0x72,0x72,0x80]
+          vcvtne2ps2bf16 xmm6 {k7}, xmm5, xmmword ptr [edx - 2048]
+
+// CHECK: vcvtne2ps2bf16 xmm6 {k7}, xmm5, dword ptr [edx + 508]{1to4}
+// CHECK: encoding: [0x62,0xf2,0x57,0x1f,0x72,0x72,0x7f]
+          vcvtne2ps2bf16 xmm6 {k7}, xmm5, dword ptr [edx + 508]{1to4}
+
+// CHECK: vcvtne2ps2bf16 xmm6 {k7}, xmm5, dword ptr [edx - 512]{1to4}
+// CHECK: encoding: [0x62,0xf2,0x57,0x1f,0x72,0x72,0x80]
+          vcvtne2ps2bf16 xmm6 {k7}, xmm5, dword ptr [edx - 512]{1to4}
+
+// CHECK: vcvtne2ps2bf16 ymm6 {k7}, ymm5, ymm4
+// CHECK: encoding: [0x62,0xf2,0x57,0x2f,0x72,0xf4]
+          vcvtne2ps2bf16 ymm6 {k7}, ymm5, ymm4
+
+// CHECK: vcvtne2ps2bf16 ymm6 {k7} {z}, ymm5, ymm4
+// CHECK: encoding: [0x62,0xf2,0x57,0xaf,0x72,0xf4]
+          vcvtne2ps2bf16 ymm6 {k7} {z}, ymm5, ymm4
+
+// CHECK: vcvtne2ps2bf16 ymm6 {k7}, ymm5, ymmword ptr [ecx]
+// CHECK: encoding: [0x62,0xf2,0x57,0x2f,0x72,0x31]
+          vcvtne2ps2bf16 ymm6 {k7}, ymm5, ymmword ptr [ecx]
+
+// CHECK: vcvtne2ps2bf16 ymm6 {k7}, ymm5, ymmword ptr [esp + 8*esi + 291]
+// CHECK: encoding: [0x62,0xf2,0x57,0x2f,0x72,0xb4,0xf4,0x23,0x01,0x00,0x00]
+          vcvtne2ps2bf16 ymm6 {k7}, ymm5, ymmword ptr [esp + 8*esi + 291]
+
+// CHECK: vcvtne2ps2bf16 ymm6 {k7}, ymm5, ymmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf2,0x57,0x2f,0x72,0xb4,0xf4,0x00,0x00,0x00,0x10]
+          vcvtne2ps2bf16 ymm6 {k7}, ymm5, ymmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vcvtne2ps2bf16 ymm6 {k7}, ymm5, ymmword ptr [esp - 4]
+// CHECK: encoding: [0x62,0xf2,0x57,0x2f,0x72,0xb4,0x24,0xfc,0xff,0xff,0xff]
+          vcvtne2ps2bf16 ymm6 {k7}, ymm5, ymmword ptr [esp - 4]
+
+// CHECK: vcvtne2ps2bf16 ymm6 {k7}, ymm5, dword ptr [eax]{1to8}
+// CHECK: encoding: [0x62,0xf2,0x57,0x3f,0x72,0x30]
+          vcvtne2ps2bf16 ymm6 {k7}, ymm5, dword ptr [eax]{1to8}
+
+// CHECK: vcvtne2ps2bf16 ymm6 {k7}, ymm5, ymmword ptr [edx + 4064]
+// CHECK: encoding: [0x62,0xf2,0x57,0x2f,0x72,0x72,0x7f]
+          vcvtne2ps2bf16 ymm6 {k7}, ymm5, ymmword ptr [edx + 4064]
+
+// CHECK: vcvtne2ps2bf16 ymm6 {k7}, ymm5, ymmword ptr [edx - 4096]
+// CHECK: encoding: [0x62,0xf2,0x57,0x2f,0x72,0x72,0x80]
+          vcvtne2ps2bf16 ymm6 {k7}, ymm5, ymmword ptr [edx - 4096]
+
+// CHECK: vcvtne2ps2bf16 ymm6 {k7}, ymm5, dword ptr [edx + 508]{1to8}
+// CHECK: encoding: [0x62,0xf2,0x57,0x3f,0x72,0x72,0x7f]
+          vcvtne2ps2bf16 ymm6 {k7}, ymm5, dword ptr [edx + 508]{1to8}
+
+// CHECK: vcvtne2ps2bf16 ymm6 {k7}, ymm5, dword ptr [edx - 512]{1to8}
+// CHECK: encoding: [0x62,0xf2,0x57,0x3f,0x72,0x72,0x80]
+          vcvtne2ps2bf16 ymm6 {k7}, ymm5, dword ptr [edx - 512]{1to8}
+
+// CHECK: vcvtneps2bf16 xmm6, xmm5
+// CHECK: encoding: [0x62,0xf2,0x7e,0x08,0x72,0xf5]
+          vcvtneps2bf16 xmm6, xmm5
+
+// CHECK: vcvtneps2bf16 xmm6 {k7}, xmmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf2,0x7e,0x0f,0x72,0xb4,0xf4,0x00,0x00,0x00,0x10]
+          vcvtneps2bf16 xmm6 {k7}, xmmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vcvtneps2bf16 xmm6, dword ptr [ecx]{1to4}
+// CHECK: encoding: [0x62,0xf2,0x7e,0x18,0x72,0x31]
+          vcvtneps2bf16 xmm6, dword ptr [ecx]{1to4}
+
+// CHECK: vcvtneps2bf16 xmm6, xmmword ptr [ecx + 2032]
+// CHECK: encoding: [0x62,0xf2,0x7e,0x08,0x72,0x71,0x7f]
+          vcvtneps2bf16 xmm6, xmmword ptr [ecx + 2032]
+
+// CHECK: vcvtneps2bf16 xmm6 {k7} {z}, dword ptr [edx - 512]{1to4}
+// CHECK: encoding: [0x62,0xf2,0x7e,0x9f,0x72,0x72,0x80]
+          vcvtneps2bf16 xmm6 {k7} {z}, dword ptr [edx - 512]{1to4}
+
+// CHECK: vcvtneps2bf16 xmm6, ymm5
+// CHECK: encoding: [0x62,0xf2,0x7e,0x28,0x72,0xf5]
+          vcvtneps2bf16 xmm6, ymm5
+
+// CHECK: vcvtneps2bf16 xmm6 {k7}, ymmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf2,0x7e,0x2f,0x72,0xb4,0xf4,0x00,0x00,0x00,0x10]
+          vcvtneps2bf16 xmm6 {k7}, ymmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vcvtneps2bf16 xmm6, dword ptr [ecx]{1to8}
+// CHECK: encoding: [0x62,0xf2,0x7e,0x38,0x72,0x31]
+          vcvtneps2bf16 xmm6, dword ptr [ecx]{1to8}
+
+// CHECK: vcvtneps2bf16 xmm6, ymmword ptr [ecx + 4064]
+// CHECK: encoding: [0x62,0xf2,0x7e,0x28,0x72,0x71,0x7f]
+          vcvtneps2bf16 xmm6, ymmword ptr [ecx + 4064]
+
+// CHECK: vcvtneps2bf16 xmm6 {k7} {z}, dword ptr [edx - 512]{1to8}
+// CHECK: encoding: [0x62,0xf2,0x7e,0xbf,0x72,0x72,0x80]
+          vcvtneps2bf16 xmm6 {k7} {z}, dword ptr [edx - 512]{1to8}
+
+// CHECK: vdpbf16ps ymm6, ymm5, ymm4
+// CHECK: encoding: [0x62,0xf2,0x56,0x28,0x52,0xf4]
+          vdpbf16ps ymm6, ymm5, ymm4
+
+// CHECK: vdpbf16ps ymm6 {k7}, ymm5, ymmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf2,0x56,0x2f,0x52,0xb4,0xf4,0x00,0x00,0x00,0x10]
+          vdpbf16ps ymm6 {k7}, ymm5, ymmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vdpbf16ps ymm6, ymm5, dword ptr [ecx]{1to8}
+// CHECK: encoding: [0x62,0xf2,0x56,0x38,0x52,0x31]
+          vdpbf16ps ymm6, ymm5, dword ptr [ecx]{1to8}
+
+// CHECK: vdpbf16ps ymm6, ymm5, ymmword ptr [ecx + 4064]
+// CHECK: encoding: [0x62,0xf2,0x56,0x28,0x52,0x71,0x7f]
+          vdpbf16ps ymm6, ymm5, ymmword ptr [ecx + 4064]
+
+// CHECK: vdpbf16ps ymm6 {k7} {z}, ymm5, dword ptr [edx - 512]{1to8}
+// CHECK: encoding: [0x62,0xf2,0x56,0xbf,0x52,0x72,0x80]
+          vdpbf16ps ymm6 {k7} {z}, ymm5, dword ptr [edx - 512]{1to8}
+
+// CHECK: vdpbf16ps xmm6, xmm5, xmm4
+// CHECK: encoding: [0x62,0xf2,0x56,0x08,0x52,0xf4]
+          vdpbf16ps xmm6, xmm5, xmm4
+
+// CHECK: vdpbf16ps xmm6 {k7}, xmm5, xmmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf2,0x56,0x0f,0x52,0xb4,0xf4,0x00,0x00,0x00,0x10]
+          vdpbf16ps xmm6 {k7}, xmm5, xmmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vdpbf16ps xmm6, xmm5, dword ptr [ecx]{1to4}
+// CHECK: encoding: [0x62,0xf2,0x56,0x18,0x52,0x31]
+          vdpbf16ps xmm6, xmm5, dword ptr [ecx]{1to4}
+
+// CHECK: vdpbf16ps xmm6, xmm5, xmmword ptr [ecx + 2032]
+// CHECK: encoding: [0x62,0xf2,0x56,0x08,0x52,0x71,0x7f]
+          vdpbf16ps xmm6, xmm5, xmmword ptr [ecx + 2032]
+
+// CHECK: vdpbf16ps xmm6 {k7} {z}, xmm5, dword ptr [edx - 512]{1to4}
+// CHECK: encoding: [0x62,0xf2,0x56,0x9f,0x52,0x72,0x80]
+          vdpbf16ps xmm6 {k7} {z}, xmm5, dword ptr [edx - 512]{1to4}
+
diff --git a/test/MC/X86/intel-syntax-x86-64-avx512_bf16.s b/test/MC/X86/intel-syntax-x86-64-avx512_bf16.s
new file mode 100644 (file)
index 0000000..faaeaa7
--- /dev/null
@@ -0,0 +1,90 @@
+// RUN: llvm-mc -triple x86_64-unknown-unknown -x86-asm-syntax=intel -output-asm-variant=1 --show-encoding %s | FileCheck %s
+
+// CHECK: vcvtne2ps2bf16 zmm30, zmm29, zmm28
+// CHECK: encoding: [0x62,0x02,0x17,0x40,0x72,0xf4]
+          vcvtne2ps2bf16 zmm30, zmm29, zmm28
+
+// CHECK: vcvtne2ps2bf16 zmm30 {k7}, zmm29, zmm28
+// CHECK: encoding: [0x62,0x02,0x17,0x47,0x72,0xf4]
+          vcvtne2ps2bf16 zmm30 {k7}, zmm29, zmm28
+
+// CHECK: vcvtne2ps2bf16 zmm30 {k7} {z}, zmm29, zmm28
+// CHECK: encoding: [0x62,0x02,0x17,0xc7,0x72,0xf4]
+          vcvtne2ps2bf16 zmm30 {k7} {z}, zmm29, zmm28
+
+// CHECK: vcvtne2ps2bf16 zmm30, zmm29, zmmword ptr [rcx]
+// CHECK: encoding: [0x62,0x62,0x17,0x40,0x72,0x31]
+          vcvtne2ps2bf16 zmm30, zmm29, zmmword ptr [rcx]
+
+// CHECK: vcvtne2ps2bf16 zmm30, zmm29, zmmword ptr [rax + 8*r14 + 291]
+// CHECK: encoding: [0x62,0x22,0x17,0x40,0x72,0xb4,0xf0,0x23,0x01,0x00,0x00]
+          vcvtne2ps2bf16 zmm30, zmm29, zmmword ptr [rax + 8*r14 + 291]
+
+// CHECK: vcvtne2ps2bf16 zmm30, zmm29, zmmword ptr [rax + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0x22,0x17,0x40,0x72,0xb4,0xf0,0x00,0x00,0x00,0x10]
+          vcvtne2ps2bf16 zmm30, zmm29, zmmword ptr [rax + 8*r14 + 268435456]
+
+// CHECK: vcvtne2ps2bf16 zmm30, zmm29, zmmword ptr [rsp - 4]
+// CHECK: encoding: [0x62,0x62,0x17,0x40,0x72,0xb4,0x24,0xfc,0xff,0xff,0xff]
+          vcvtne2ps2bf16 zmm30, zmm29, zmmword ptr [rsp - 4]
+
+// CHECK: vcvtne2ps2bf16 zmm30, zmm29, dword ptr [rcx]{1to16}
+// CHECK: encoding: [0x62,0x62,0x17,0x50,0x72,0x31]
+          vcvtne2ps2bf16 zmm30, zmm29, dword ptr [rcx]{1to16}
+
+// CHECK: vcvtne2ps2bf16 zmm30, zmm29, zmmword ptr [rdx + 8128]
+// CHECK: encoding: [0x62,0x62,0x17,0x40,0x72,0x72,0x7f]
+          vcvtne2ps2bf16 zmm30, zmm29, zmmword ptr [rdx + 8128]
+
+// CHECK: vcvtne2ps2bf16 zmm30, zmm29, zmmword ptr [rdx - 8192]
+// CHECK: encoding: [0x62,0x62,0x17,0x40,0x72,0x72,0x80]
+          vcvtne2ps2bf16 zmm30, zmm29, zmmword ptr [rdx - 8192]
+
+// CHECK: vcvtne2ps2bf16 zmm30, zmm29, dword ptr [rdx + 508]{1to16}
+// CHECK: encoding: [0x62,0x62,0x17,0x50,0x72,0x72,0x7f]
+          vcvtne2ps2bf16 zmm30, zmm29, dword ptr [rdx + 508]{1to16}
+
+// CHECK: vcvtne2ps2bf16 zmm30, zmm29, dword ptr [rdx - 512]{1to16}
+// CHECK: encoding: [0x62,0x62,0x17,0x50,0x72,0x72,0x80]
+          vcvtne2ps2bf16 zmm30, zmm29, dword ptr [rdx - 512]{1to16}
+
+// CHECK: vcvtneps2bf16 ymm30, zmm29
+// CHECK: encoding: [0x62,0x02,0x7e,0x48,0x72,0xf5]
+          vcvtneps2bf16 ymm30, zmm29
+
+// CHECK: vcvtneps2bf16 ymm30 {k7}, zmmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0x22,0x7e,0x4f,0x72,0xb4,0xf5,0x00,0x00,0x00,0x10]
+          vcvtneps2bf16 ymm30 {k7}, zmmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vcvtneps2bf16 ymm30, dword ptr [r9]{1to16}
+// CHECK: encoding: [0x62,0x42,0x7e,0x58,0x72,0x31]
+          vcvtneps2bf16 ymm30, dword ptr [r9]{1to16}
+
+// CHECK: vcvtneps2bf16 ymm30, zmmword ptr [rcx + 8128]
+// CHECK: encoding: [0x62,0x62,0x7e,0x48,0x72,0x71,0x7f]
+          vcvtneps2bf16 ymm30, zmmword ptr [rcx + 8128]
+
+// CHECK: vcvtneps2bf16 ymm30 {k7} {z}, dword ptr [rdx - 512]{1to16}
+// CHECK: encoding: [0x62,0x62,0x7e,0xdf,0x72,0x72,0x80]
+          vcvtneps2bf16 ymm30 {k7} {z}, dword ptr [rdx - 512]{1to16}
+
+// CHECK: vdpbf16ps zmm30, zmm29, zmm28
+// CHECK: encoding: [0x62,0x02,0x16,0x40,0x52,0xf4]
+          vdpbf16ps zmm30, zmm29, zmm28
+
+// CHECK: vdpbf16ps zmm30 {k7}, zmm29, zmmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0x22,0x16,0x47,0x52,0xb4,0xf5,0x00,0x00,0x00,0x10]
+          vdpbf16ps zmm30 {k7}, zmm29, zmmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vdpbf16ps zmm30, zmm29, dword ptr [r9]{1to16}
+// CHECK: encoding: [0x62,0x42,0x16,0x50,0x52,0x31]
+          vdpbf16ps zmm30, zmm29, dword ptr [r9]{1to16}
+
+// CHECK: vdpbf16ps zmm30, zmm29, zmmword ptr [rcx + 8128]
+// CHECK: encoding: [0x62,0x62,0x16,0x40,0x52,0x71,0x7f]
+          vdpbf16ps zmm30, zmm29, zmmword ptr [rcx + 8128]
+
+// CHECK: vdpbf16ps zmm30 {k7} {z}, zmm29, dword ptr [rdx - 512]{1to16}
+// CHECK: encoding: [0x62,0x62,0x16,0xd7,0x52,0x72,0x80]
+          vdpbf16ps zmm30 {k7} {z}, zmm29, dword ptr [rdx - 512]{1to16}
+
diff --git a/test/MC/X86/intel-syntax-x86-64-avx512_bf16_vl.s b/test/MC/X86/intel-syntax-x86-64-avx512_bf16_vl.s
new file mode 100644 (file)
index 0000000..e8c5311
--- /dev/null
@@ -0,0 +1,178 @@
+// RUN: llvm-mc -triple x86_64-unknown-unknown -x86-asm-syntax=intel -output-asm-variant=1 --show-encoding %s | FileCheck %s
+
+// CHECK: vcvtne2ps2bf16 xmm30, xmm29, xmm28
+// CHECK: encoding: [0x62,0x02,0x17,0x00,0x72,0xf4]
+          vcvtne2ps2bf16 xmm30, xmm29, xmm28
+
+// CHECK: vcvtne2ps2bf16 xmm30 {k7}, xmm29, xmm28
+// CHECK: encoding: [0x62,0x02,0x17,0x07,0x72,0xf4]
+          vcvtne2ps2bf16 xmm30 {k7}, xmm29, xmm28
+
+// CHECK: vcvtne2ps2bf16 xmm30 {k7} {z}, xmm29, xmm28
+// CHECK: encoding: [0x62,0x02,0x17,0x87,0x72,0xf4]
+          vcvtne2ps2bf16 xmm30 {k7} {z}, xmm29, xmm28
+
+// CHECK: vcvtne2ps2bf16 xmm30, xmm29, xmmword ptr [rcx]
+// CHECK: encoding: [0x62,0x62,0x17,0x00,0x72,0x31]
+          vcvtne2ps2bf16 xmm30, xmm29, xmmword ptr [rcx]
+
+// CHECK: vcvtne2ps2bf16 xmm30, xmm29, xmmword ptr [rax + 8*r14 + 291]
+// CHECK: encoding: [0x62,0x22,0x17,0x00,0x72,0xb4,0xf0,0x23,0x01,0x00,0x00]
+          vcvtne2ps2bf16 xmm30, xmm29, xmmword ptr [rax + 8*r14 + 291]
+
+// CHECK: vcvtne2ps2bf16 xmm30, xmm29, xmmword ptr [rax + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0x22,0x17,0x00,0x72,0xb4,0xf0,0x00,0x00,0x00,0x10]
+          vcvtne2ps2bf16 xmm30, xmm29, xmmword ptr [rax + 8*r14 + 268435456]
+
+// CHECK: vcvtne2ps2bf16 xmm30, xmm29, xmmword ptr [rsp - 4]
+// CHECK: encoding: [0x62,0x62,0x17,0x00,0x72,0xb4,0x24,0xfc,0xff,0xff,0xff]
+          vcvtne2ps2bf16 xmm30, xmm29, xmmword ptr [rsp - 4]
+
+// CHECK: vcvtne2ps2bf16 xmm30, xmm29, dword ptr [rcx]{1to4}
+// CHECK: encoding: [0x62,0x62,0x17,0x10,0x72,0x31]
+          vcvtne2ps2bf16 xmm30, xmm29, dword ptr [rcx]{1to4}
+
+// CHECK: vcvtne2ps2bf16 xmm30, xmm29, xmmword ptr [rdx + 2032]
+// CHECK: encoding: [0x62,0x62,0x17,0x00,0x72,0x72,0x7f]
+          vcvtne2ps2bf16 xmm30, xmm29, xmmword ptr [rdx + 2032]
+
+// CHECK: vcvtne2ps2bf16 xmm30, xmm29, xmmword ptr [rdx - 2048]
+// CHECK: encoding: [0x62,0x62,0x17,0x00,0x72,0x72,0x80]
+          vcvtne2ps2bf16 xmm30, xmm29, xmmword ptr [rdx - 2048]
+
+// CHECK: vcvtne2ps2bf16 xmm30, xmm29, dword ptr [rdx + 508]{1to4}
+// CHECK: encoding: [0x62,0x62,0x17,0x10,0x72,0x72,0x7f]
+          vcvtne2ps2bf16 xmm30, xmm29, dword ptr [rdx + 508]{1to4}
+
+// CHECK: vcvtne2ps2bf16 xmm30, xmm29, dword ptr [rdx - 512]{1to4}
+// CHECK: encoding: [0x62,0x62,0x17,0x10,0x72,0x72,0x80]
+          vcvtne2ps2bf16 xmm30, xmm29, dword ptr [rdx - 512]{1to4}
+
+// CHECK: vcvtne2ps2bf16 ymm30, ymm29, ymm28
+// CHECK: encoding: [0x62,0x02,0x17,0x20,0x72,0xf4]
+          vcvtne2ps2bf16 ymm30, ymm29, ymm28
+
+// CHECK: vcvtne2ps2bf16 ymm30 {k7}, ymm29, ymm28
+// CHECK: encoding: [0x62,0x02,0x17,0x27,0x72,0xf4]
+          vcvtne2ps2bf16 ymm30 {k7}, ymm29, ymm28
+
+// CHECK: vcvtne2ps2bf16 ymm30 {k7} {z}, ymm29, ymm28
+// CHECK: encoding: [0x62,0x02,0x17,0xa7,0x72,0xf4]
+          vcvtne2ps2bf16 ymm30 {k7} {z}, ymm29, ymm28
+
+// CHECK: vcvtne2ps2bf16 ymm30, ymm29, ymmword ptr [rcx]
+// CHECK: encoding: [0x62,0x62,0x17,0x20,0x72,0x31]
+          vcvtne2ps2bf16 ymm30, ymm29, ymmword ptr [rcx]
+
+// CHECK: vcvtne2ps2bf16 ymm30, ymm29, ymmword ptr [rax + 8*r14 + 291]
+// CHECK: encoding: [0x62,0x22,0x17,0x20,0x72,0xb4,0xf0,0x23,0x01,0x00,0x00]
+          vcvtne2ps2bf16 ymm30, ymm29, ymmword ptr [rax + 8*r14 + 291]
+
+// CHECK: vcvtne2ps2bf16 ymm30, ymm29, ymmword ptr [rax + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0x22,0x17,0x20,0x72,0xb4,0xf0,0x00,0x00,0x00,0x10]
+          vcvtne2ps2bf16 ymm30, ymm29, ymmword ptr [rax + 8*r14 + 268435456]
+
+// CHECK: vcvtne2ps2bf16 ymm30, ymm29, ymmword ptr [rsp - 4]
+// CHECK: encoding: [0x62,0x62,0x17,0x20,0x72,0xb4,0x24,0xfc,0xff,0xff,0xff]
+          vcvtne2ps2bf16 ymm30, ymm29, ymmword ptr [rsp - 4]
+
+// CHECK: vcvtne2ps2bf16 ymm30, ymm29, dword ptr [rcx]{1to8}
+// CHECK: encoding: [0x62,0x62,0x17,0x30,0x72,0x31]
+          vcvtne2ps2bf16 ymm30, ymm29, dword ptr [rcx]{1to8}
+
+// CHECK: vcvtne2ps2bf16 ymm30, ymm29, ymmword ptr [rdx + 4064]
+// CHECK: encoding: [0x62,0x62,0x17,0x20,0x72,0x72,0x7f]
+          vcvtne2ps2bf16 ymm30, ymm29, ymmword ptr [rdx + 4064]
+
+// CHECK: vcvtne2ps2bf16 ymm30, ymm29, ymmword ptr [rdx - 4096]
+// CHECK: encoding: [0x62,0x62,0x17,0x20,0x72,0x72,0x80]
+          vcvtne2ps2bf16 ymm30, ymm29, ymmword ptr [rdx - 4096]
+
+// CHECK: vcvtne2ps2bf16 ymm30, ymm29, dword ptr [rdx + 508]{1to8}
+// CHECK: encoding: [0x62,0x62,0x17,0x30,0x72,0x72,0x7f]
+          vcvtne2ps2bf16 ymm30, ymm29, dword ptr [rdx + 508]{1to8}
+
+// CHECK: vcvtne2ps2bf16 ymm30, ymm29, dword ptr [rdx - 512]{1to8}
+// CHECK: encoding: [0x62,0x62,0x17,0x30,0x72,0x72,0x80]
+          vcvtne2ps2bf16 ymm30, ymm29, dword ptr [rdx - 512]{1to8}
+
+// CHECK: vcvtneps2bf16 xmm30, xmm29
+// CHECK: encoding: [0x62,0x02,0x7e,0x08,0x72,0xf5]
+          vcvtneps2bf16 xmm30, xmm29
+
+// CHECK: vcvtneps2bf16 xmm30 {k7}, xmmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0x22,0x7e,0x0f,0x72,0xb4,0xf5,0x00,0x00,0x00,0x10]
+          vcvtneps2bf16 xmm30 {k7}, xmmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vcvtneps2bf16 xmm30, dword ptr [r9]{1to4}
+// CHECK: encoding: [0x62,0x42,0x7e,0x18,0x72,0x31]
+          vcvtneps2bf16 xmm30, dword ptr [r9]{1to4}
+
+// CHECK: vcvtneps2bf16 xmm30, xmmword ptr [rcx + 2032]
+// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x72,0x71,0x7f]
+          vcvtneps2bf16 xmm30, xmmword ptr [rcx + 2032]
+
+// CHECK: vcvtneps2bf16 xmm30 {k7} {z}, dword ptr [rdx - 512]{1to4}
+// CHECK: encoding: [0x62,0x62,0x7e,0x9f,0x72,0x72,0x80]
+          vcvtneps2bf16 xmm30 {k7} {z}, dword ptr [rdx - 512]{1to4}
+
+// CHECK: vcvtneps2bf16 xmm30, ymm29
+// CHECK: encoding: [0x62,0x02,0x7e,0x28,0x72,0xf5]
+          vcvtneps2bf16 xmm30, ymm29
+
+// CHECK: vcvtneps2bf16 xmm30 {k7}, ymmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0x22,0x7e,0x2f,0x72,0xb4,0xf5,0x00,0x00,0x00,0x10]
+          vcvtneps2bf16 xmm30 {k7}, ymmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vcvtneps2bf16 xmm30, dword ptr [r9]{1to8}
+// CHECK: encoding: [0x62,0x42,0x7e,0x38,0x72,0x31]
+          vcvtneps2bf16 xmm30, dword ptr [r9]{1to8}
+
+// CHECK: vcvtneps2bf16 xmm30, ymmword ptr [rcx + 4064]
+// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x72,0x71,0x7f]
+          vcvtneps2bf16 xmm30, ymmword ptr [rcx + 4064]
+
+// CHECK: vcvtneps2bf16 xmm30 {k7} {z}, dword ptr [rdx - 512]{1to8}
+// CHECK: encoding: [0x62,0x62,0x7e,0xbf,0x72,0x72,0x80]
+          vcvtneps2bf16 xmm30 {k7} {z}, dword ptr [rdx - 512]{1to8}
+
+// CHECK: vdpbf16ps ymm30, ymm29, ymm28
+// CHECK: encoding: [0x62,0x02,0x16,0x20,0x52,0xf4]
+          vdpbf16ps ymm30, ymm29, ymm28
+
+// CHECK: vdpbf16ps ymm30 {k7}, ymm29, ymmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0x22,0x16,0x27,0x52,0xb4,0xf5,0x00,0x00,0x00,0x10]
+          vdpbf16ps ymm30 {k7}, ymm29, ymmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vdpbf16ps ymm30, ymm29, dword ptr [r9]{1to8}
+// CHECK: encoding: [0x62,0x42,0x16,0x30,0x52,0x31]
+          vdpbf16ps ymm30, ymm29, dword ptr [r9]{1to8}
+
+// CHECK: vdpbf16ps ymm30, ymm29, ymmword ptr [rcx + 4064]
+// CHECK: encoding: [0x62,0x62,0x16,0x20,0x52,0x71,0x7f]
+          vdpbf16ps ymm30, ymm29, ymmword ptr [rcx + 4064]
+
+// CHECK: vdpbf16ps ymm30 {k7} {z}, ymm29, dword ptr [rdx - 512]{1to8}
+// CHECK: encoding: [0x62,0x62,0x16,0xb7,0x52,0x72,0x80]
+          vdpbf16ps ymm30 {k7} {z}, ymm29, dword ptr [rdx - 512]{1to8}
+
+// CHECK: vdpbf16ps xmm30, xmm29, xmm28
+// CHECK: encoding: [0x62,0x02,0x16,0x00,0x52,0xf4]
+          vdpbf16ps xmm30, xmm29, xmm28
+
+// CHECK: vdpbf16ps xmm30 {k7}, xmm29, xmmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0x22,0x16,0x07,0x52,0xb4,0xf5,0x00,0x00,0x00,0x10]
+          vdpbf16ps xmm30 {k7}, xmm29, xmmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vdpbf16ps xmm30, xmm29, dword ptr [r9]{1to4}
+// CHECK: encoding: [0x62,0x42,0x16,0x10,0x52,0x31]
+          vdpbf16ps xmm30, xmm29, dword ptr [r9]{1to4}
+
+// CHECK: vdpbf16ps xmm30, xmm29, xmmword ptr [rcx + 2032]
+// CHECK: encoding: [0x62,0x62,0x16,0x00,0x52,0x71,0x7f]
+          vdpbf16ps xmm30, xmm29, xmmword ptr [rcx + 2032]
+
+// CHECK: vdpbf16ps xmm30 {k7} {z}, xmm29, dword ptr [rdx - 512]{1to4}
+// CHECK: encoding: [0x62,0x62,0x16,0x97,0x52,0x72,0x80]
+          vdpbf16ps xmm30 {k7} {z}, xmm29, dword ptr [rdx - 512]{1to4}
+
diff --git a/test/MC/X86/x86-64-avx512_bf16-encoding.s b/test/MC/X86/x86-64-avx512_bf16-encoding.s
new file mode 100644 (file)
index 0000000..dcd8f79
--- /dev/null
@@ -0,0 +1,90 @@
+// RUN: llvm-mc -triple x86_64-unknown-unknown --show-encoding < %s | FileCheck %s
+
+// CHECK: vcvtne2ps2bf16 %zmm28, %zmm29, %zmm30
+// CHECK: encoding: [0x62,0x02,0x17,0x40,0x72,0xf4]
+          vcvtne2ps2bf16 %zmm28, %zmm29, %zmm30
+
+// CHECK: vcvtne2ps2bf16 %zmm28, %zmm29, %zmm30 {%k7}
+// CHECK: encoding: [0x62,0x02,0x17,0x47,0x72,0xf4]
+          vcvtne2ps2bf16 %zmm28, %zmm29, %zmm30 {%k7}
+
+// CHECK: vcvtne2ps2bf16 %zmm28, %zmm29, %zmm30 {%k7} {z}
+// CHECK: encoding: [0x62,0x02,0x17,0xc7,0x72,0xf4]
+          vcvtne2ps2bf16 %zmm28, %zmm29, %zmm30 {%k7} {z}
+
+// CHECK: vcvtne2ps2bf16   (%rcx), %zmm29, %zmm30
+// CHECK: encoding: [0x62,0x62,0x17,0x40,0x72,0x31]
+          vcvtne2ps2bf16   (%rcx), %zmm29, %zmm30
+
+// CHECK: vcvtne2ps2bf16   291(%rax,%r14,8), %zmm29, %zmm30
+// CHECK: encoding: [0x62,0x22,0x17,0x40,0x72,0xb4,0xf0,0x23,0x01,0x00,0x00]
+          vcvtne2ps2bf16   291(%rax,%r14,8), %zmm29, %zmm30
+
+// CHECK: vcvtne2ps2bf16   268435456(%rax,%r14,8), %zmm29, %zmm30
+// CHECK: encoding: [0x62,0x22,0x17,0x40,0x72,0xb4,0xf0,0x00,0x00,0x00,0x10]
+          vcvtne2ps2bf16   268435456(%rax,%r14,8), %zmm29, %zmm30
+
+// CHECK: vcvtne2ps2bf16   -64(%rsp), %zmm29, %zmm30
+// CHECK: encoding: [0x62,0x62,0x17,0x40,0x72,0x74,0x24,0xff]
+          vcvtne2ps2bf16   -64(%rsp), %zmm29, %zmm30
+
+// CHECK: vcvtne2ps2bf16   (%rcx){1to16}, %zmm29, %zmm30
+// CHECK: encoding: [0x62,0x62,0x17,0x50,0x72,0x31]
+          vcvtne2ps2bf16   (%rcx){1to16}, %zmm29, %zmm30
+
+// CHECK: vcvtne2ps2bf16   8128(%rdx), %zmm29, %zmm30
+// CHECK: encoding: [0x62,0x62,0x17,0x40,0x72,0x72,0x7f]
+          vcvtne2ps2bf16   8128(%rdx), %zmm29, %zmm30
+
+// CHECK: vcvtne2ps2bf16   -8192(%rdx), %zmm29, %zmm30
+// CHECK: encoding: [0x62,0x62,0x17,0x40,0x72,0x72,0x80]
+          vcvtne2ps2bf16   -8192(%rdx), %zmm29, %zmm30
+
+// CHECK: vcvtne2ps2bf16   508(%rdx){1to16}, %zmm29, %zmm30
+// CHECK: encoding: [0x62,0x62,0x17,0x50,0x72,0x72,0x7f]
+          vcvtne2ps2bf16   508(%rdx){1to16}, %zmm29, %zmm30
+
+// CHECK: vcvtne2ps2bf16   -512(%rdx){1to16}, %zmm29, %zmm30
+// CHECK: encoding: [0x62,0x62,0x17,0x50,0x72,0x72,0x80]
+          vcvtne2ps2bf16   -512(%rdx){1to16}, %zmm29, %zmm30
+
+// CHECK: vcvtneps2bf16 %zmm29, %ymm30
+// CHECK: encoding: [0x62,0x02,0x7e,0x48,0x72,0xf5]
+          vcvtneps2bf16 %zmm29, %ymm30
+
+// CHECK: vcvtneps2bf16   268435456(%rbp,%r14,8), %ymm30 {%k7}
+// CHECK: encoding: [0x62,0x22,0x7e,0x4f,0x72,0xb4,0xf5,0x00,0x00,0x00,0x10]
+          vcvtneps2bf16   268435456(%rbp,%r14,8), %ymm30 {%k7}
+
+// CHECK: vcvtneps2bf16   (%r9){1to16}, %ymm30
+// CHECK: encoding: [0x62,0x42,0x7e,0x58,0x72,0x31]
+          vcvtneps2bf16   (%r9){1to16}, %ymm30
+
+// CHECK: vcvtneps2bf16   8128(%rcx), %ymm30
+// CHECK: encoding: [0x62,0x62,0x7e,0x48,0x72,0x71,0x7f]
+          vcvtneps2bf16   8128(%rcx), %ymm30
+
+// CHECK: vcvtneps2bf16   -512(%rdx){1to16}, %ymm30 {%k7} {z}
+// CHECK: encoding: [0x62,0x62,0x7e,0xdf,0x72,0x72,0x80]
+          vcvtneps2bf16   -512(%rdx){1to16}, %ymm30 {%k7} {z}
+
+// CHECK: vdpbf16ps %zmm28, %zmm29, %zmm30
+// CHECK: encoding: [0x62,0x02,0x16,0x40,0x52,0xf4]
+          vdpbf16ps %zmm28, %zmm29, %zmm30
+
+// CHECK: vdpbf16ps   268435456(%rbp,%r14,8), %zmm29, %zmm30 {%k7}
+// CHECK: encoding: [0x62,0x22,0x16,0x47,0x52,0xb4,0xf5,0x00,0x00,0x00,0x10]
+          vdpbf16ps   268435456(%rbp,%r14,8), %zmm29, %zmm30 {%k7}
+
+// CHECK: vdpbf16ps   (%r9){1to16}, %zmm29, %zmm30
+// CHECK: encoding: [0x62,0x42,0x16,0x50,0x52,0x31]
+          vdpbf16ps   (%r9){1to16}, %zmm29, %zmm30
+
+// CHECK: vdpbf16ps   8128(%rcx), %zmm29, %zmm30
+// CHECK: encoding: [0x62,0x62,0x16,0x40,0x52,0x71,0x7f]
+          vdpbf16ps   8128(%rcx), %zmm29, %zmm30
+
+// CHECK: vdpbf16ps   -512(%rdx){1to16}, %zmm29, %zmm30 {%k7} {z}
+// CHECK: encoding: [0x62,0x62,0x16,0xd7,0x52,0x72,0x80]
+          vdpbf16ps   -512(%rdx){1to16}, %zmm29, %zmm30 {%k7} {z}
+
diff --git a/test/MC/X86/x86-64-avx512_bf16_vl-encoding.s b/test/MC/X86/x86-64-avx512_bf16_vl-encoding.s
new file mode 100644 (file)
index 0000000..041a690
--- /dev/null
@@ -0,0 +1,178 @@
+// RUN: llvm-mc -triple x86_64-unknown-unknown --show-encoding < %s | FileCheck %s
+
+// CHECK: vcvtne2ps2bf16 %xmm28, %xmm29, %xmm30
+// CHECK: encoding: [0x62,0x02,0x17,0x00,0x72,0xf4]
+          vcvtne2ps2bf16 %xmm28, %xmm29, %xmm30
+
+// CHECK: vcvtne2ps2bf16 %xmm28, %xmm29, %xmm30 {%k7}
+// CHECK: encoding: [0x62,0x02,0x17,0x07,0x72,0xf4]
+          vcvtne2ps2bf16 %xmm28, %xmm29, %xmm30 {%k7}
+
+// CHECK: vcvtne2ps2bf16 %xmm28, %xmm29, %xmm30 {%k7} {z}
+// CHECK: encoding: [0x62,0x02,0x17,0x87,0x72,0xf4]
+          vcvtne2ps2bf16 %xmm28, %xmm29, %xmm30 {%k7} {z}
+
+// CHECK: vcvtne2ps2bf16   (%rcx), %xmm29, %xmm30
+// CHECK: encoding: [0x62,0x62,0x17,0x00,0x72,0x31]
+          vcvtne2ps2bf16   (%rcx), %xmm29, %xmm30
+
+// CHECK: vcvtne2ps2bf16   291(%rax,%r14,8), %xmm29, %xmm30
+// CHECK: encoding: [0x62,0x22,0x17,0x00,0x72,0xb4,0xf0,0x23,0x01,0x00,0x00]
+          vcvtne2ps2bf16   291(%rax,%r14,8), %xmm29, %xmm30
+
+// CHECK: vcvtne2ps2bf16   268435456(%rax,%r14,8), %xmm29, %xmm30
+// CHECK: encoding: [0x62,0x22,0x17,0x00,0x72,0xb4,0xf0,0x00,0x00,0x00,0x10]
+          vcvtne2ps2bf16   268435456(%rax,%r14,8), %xmm29, %xmm30
+
+// CHECK: vcvtne2ps2bf16   -16(%rsp), %xmm29, %xmm30
+// CHECK: encoding: [0x62,0x62,0x17,0x00,0x72,0x74,0x24,0xff]
+          vcvtne2ps2bf16   -16(%rsp), %xmm29, %xmm30
+
+// CHECK: vcvtne2ps2bf16   (%rcx){1to4}, %xmm29, %xmm30
+// CHECK: encoding: [0x62,0x62,0x17,0x10,0x72,0x31]
+          vcvtne2ps2bf16   (%rcx){1to4}, %xmm29, %xmm30
+
+// CHECK: vcvtne2ps2bf16   2032(%rdx), %xmm29, %xmm30
+// CHECK: encoding: [0x62,0x62,0x17,0x00,0x72,0x72,0x7f]
+          vcvtne2ps2bf16   2032(%rdx), %xmm29, %xmm30
+
+// CHECK: vcvtne2ps2bf16   -2048(%rdx), %xmm29, %xmm30
+// CHECK: encoding: [0x62,0x62,0x17,0x00,0x72,0x72,0x80]
+          vcvtne2ps2bf16   -2048(%rdx), %xmm29, %xmm30
+
+// CHECK: vcvtne2ps2bf16   508(%rdx){1to4}, %xmm29, %xmm30
+// CHECK: encoding: [0x62,0x62,0x17,0x10,0x72,0x72,0x7f]
+          vcvtne2ps2bf16   508(%rdx){1to4}, %xmm29, %xmm30
+
+// CHECK: vcvtne2ps2bf16   -512(%rdx){1to4}, %xmm29, %xmm30
+// CHECK: encoding: [0x62,0x62,0x17,0x10,0x72,0x72,0x80]
+          vcvtne2ps2bf16   -512(%rdx){1to4}, %xmm29, %xmm30
+
+// CHECK: vcvtne2ps2bf16 %ymm28, %ymm29, %ymm30
+// CHECK: encoding: [0x62,0x02,0x17,0x20,0x72,0xf4]
+          vcvtne2ps2bf16 %ymm28, %ymm29, %ymm30
+
+// CHECK: vcvtne2ps2bf16 %ymm28, %ymm29, %ymm30 {%k7}
+// CHECK: encoding: [0x62,0x02,0x17,0x27,0x72,0xf4]
+          vcvtne2ps2bf16 %ymm28, %ymm29, %ymm30 {%k7}
+
+// CHECK: vcvtne2ps2bf16 %ymm28, %ymm29, %ymm30 {%k7} {z}
+// CHECK: encoding: [0x62,0x02,0x17,0xa7,0x72,0xf4]
+          vcvtne2ps2bf16 %ymm28, %ymm29, %ymm30 {%k7} {z}
+
+// CHECK: vcvtne2ps2bf16   (%rcx), %ymm29, %ymm30
+// CHECK: encoding: [0x62,0x62,0x17,0x20,0x72,0x31]
+          vcvtne2ps2bf16   (%rcx), %ymm29, %ymm30
+
+// CHECK: vcvtne2ps2bf16   291(%rax,%r14,8), %ymm29, %ymm30
+// CHECK: encoding: [0x62,0x22,0x17,0x20,0x72,0xb4,0xf0,0x23,0x01,0x00,0x00]
+          vcvtne2ps2bf16   291(%rax,%r14,8), %ymm29, %ymm30
+
+// CHECK: vcvtne2ps2bf16   268435456(%rax,%r14,8), %ymm29, %ymm30
+// CHECK: encoding: [0x62,0x22,0x17,0x20,0x72,0xb4,0xf0,0x00,0x00,0x00,0x10]
+          vcvtne2ps2bf16   268435456(%rax,%r14,8), %ymm29, %ymm30
+
+// CHECK: vcvtne2ps2bf16   -32(%rsp), %ymm29, %ymm30
+// CHECK: encoding: [0x62,0x62,0x17,0x20,0x72,0x74,0x24,0xff]
+          vcvtne2ps2bf16   -32(%rsp), %ymm29, %ymm30
+
+// CHECK: vcvtne2ps2bf16   (%rcx){1to8}, %ymm29, %ymm30
+// CHECK: encoding: [0x62,0x62,0x17,0x30,0x72,0x31]
+          vcvtne2ps2bf16   (%rcx){1to8}, %ymm29, %ymm30
+
+// CHECK: vcvtne2ps2bf16   4064(%rdx), %ymm29, %ymm30
+// CHECK: encoding: [0x62,0x62,0x17,0x20,0x72,0x72,0x7f]
+          vcvtne2ps2bf16   4064(%rdx), %ymm29, %ymm30
+
+// CHECK: vcvtne2ps2bf16   -4096(%rdx), %ymm29, %ymm30
+// CHECK: encoding: [0x62,0x62,0x17,0x20,0x72,0x72,0x80]
+          vcvtne2ps2bf16   -4096(%rdx), %ymm29, %ymm30
+
+// CHECK: vcvtne2ps2bf16   508(%rdx){1to8}, %ymm29, %ymm30
+// CHECK: encoding: [0x62,0x62,0x17,0x30,0x72,0x72,0x7f]
+          vcvtne2ps2bf16   508(%rdx){1to8}, %ymm29, %ymm30
+
+// CHECK: vcvtne2ps2bf16   -512(%rdx){1to8}, %ymm29, %ymm30
+// CHECK: encoding: [0x62,0x62,0x17,0x30,0x72,0x72,0x80]
+          vcvtne2ps2bf16   -512(%rdx){1to8}, %ymm29, %ymm30
+
+// CHECK: vcvtneps2bf16 %xmm29, %xmm30
+// CHECK: encoding: [0x62,0x02,0x7e,0x08,0x72,0xf5]
+          vcvtneps2bf16 %xmm29, %xmm30
+
+// CHECK: vcvtneps2bf16x  268435456(%rbp,%r14,8), %xmm30 {%k7}
+// CHECK: encoding: [0x62,0x22,0x7e,0x0f,0x72,0xb4,0xf5,0x00,0x00,0x00,0x10]
+          vcvtneps2bf16x  268435456(%rbp,%r14,8), %xmm30 {%k7}
+
+// CHECK: vcvtneps2bf16   (%r9){1to4}, %xmm30
+// CHECK: encoding: [0x62,0x42,0x7e,0x18,0x72,0x31]
+          vcvtneps2bf16   (%r9){1to4}, %xmm30
+
+// CHECK: vcvtneps2bf16x  2032(%rcx), %xmm30
+// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x72,0x71,0x7f]
+          vcvtneps2bf16x  2032(%rcx), %xmm30
+
+// CHECK: vcvtneps2bf16   -512(%rdx){1to4}, %xmm30 {%k7} {z}
+// CHECK: encoding: [0x62,0x62,0x7e,0x9f,0x72,0x72,0x80]
+          vcvtneps2bf16   -512(%rdx){1to4}, %xmm30 {%k7} {z}
+
+// CHECK: vcvtneps2bf16 %ymm29, %xmm30
+// CHECK: encoding: [0x62,0x02,0x7e,0x28,0x72,0xf5]
+          vcvtneps2bf16 %ymm29, %xmm30
+
+// CHECK: vcvtneps2bf16y  268435456(%rbp,%r14,8), %xmm30 {%k7}
+// CHECK: encoding: [0x62,0x22,0x7e,0x2f,0x72,0xb4,0xf5,0x00,0x00,0x00,0x10]
+          vcvtneps2bf16y  268435456(%rbp,%r14,8), %xmm30 {%k7}
+
+// CHECK: vcvtneps2bf16   (%r9){1to8}, %xmm30
+// CHECK: encoding: [0x62,0x42,0x7e,0x38,0x72,0x31]
+          vcvtneps2bf16   (%r9){1to8}, %xmm30
+
+// CHECK: vcvtneps2bf16y  4064(%rcx), %xmm30
+// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x72,0x71,0x7f]
+          vcvtneps2bf16y  4064(%rcx), %xmm30
+
+// CHECK: vcvtneps2bf16   -512(%rdx){1to8}, %xmm30 {%k7} {z}
+// CHECK: encoding: [0x62,0x62,0x7e,0xbf,0x72,0x72,0x80]
+          vcvtneps2bf16   -512(%rdx){1to8}, %xmm30 {%k7} {z}
+
+// CHECK: vdpbf16ps %ymm28, %ymm29, %ymm30
+// CHECK: encoding: [0x62,0x02,0x16,0x20,0x52,0xf4]
+          vdpbf16ps %ymm28, %ymm29, %ymm30
+
+// CHECK: vdpbf16ps   268435456(%rbp,%r14,8), %ymm29, %ymm30 {%k7}
+// CHECK: encoding: [0x62,0x22,0x16,0x27,0x52,0xb4,0xf5,0x00,0x00,0x00,0x10]
+          vdpbf16ps   268435456(%rbp,%r14,8), %ymm29, %ymm30 {%k7}
+
+// CHECK: vdpbf16ps   (%r9){1to8}, %ymm29, %ymm30
+// CHECK: encoding: [0x62,0x42,0x16,0x30,0x52,0x31]
+          vdpbf16ps   (%r9){1to8}, %ymm29, %ymm30
+
+// CHECK: vdpbf16ps   4064(%rcx), %ymm29, %ymm30
+// CHECK: encoding: [0x62,0x62,0x16,0x20,0x52,0x71,0x7f]
+          vdpbf16ps   4064(%rcx), %ymm29, %ymm30
+
+// CHECK: vdpbf16ps   -512(%rdx){1to8}, %ymm29, %ymm30 {%k7} {z}
+// CHECK: encoding: [0x62,0x62,0x16,0xb7,0x52,0x72,0x80]
+          vdpbf16ps   -512(%rdx){1to8}, %ymm29, %ymm30 {%k7} {z}
+
+// CHECK: vdpbf16ps %xmm28, %xmm29, %xmm30
+// CHECK: encoding: [0x62,0x02,0x16,0x00,0x52,0xf4]
+          vdpbf16ps %xmm28, %xmm29, %xmm30
+
+// CHECK: vdpbf16ps   268435456(%rbp,%r14,8), %xmm29, %xmm30 {%k7}
+// CHECK: encoding: [0x62,0x22,0x16,0x07,0x52,0xb4,0xf5,0x00,0x00,0x00,0x10]
+          vdpbf16ps   268435456(%rbp,%r14,8), %xmm29, %xmm30 {%k7}
+
+// CHECK: vdpbf16ps   (%r9){1to4}, %xmm29, %xmm30
+// CHECK: encoding: [0x62,0x42,0x16,0x10,0x52,0x31]
+          vdpbf16ps   (%r9){1to4}, %xmm29, %xmm30
+
+// CHECK: vdpbf16ps   2032(%rcx), %xmm29, %xmm30
+// CHECK: encoding: [0x62,0x62,0x16,0x00,0x52,0x71,0x7f]
+          vdpbf16ps   2032(%rcx), %xmm29, %xmm30
+
+// CHECK: vdpbf16ps   -512(%rdx){1to4}, %xmm29, %xmm30 {%k7} {z}
+// CHECK: encoding: [0x62,0x62,0x16,0x97,0x52,0x72,0x80]
+          vdpbf16ps   -512(%rdx){1to4}, %xmm29, %xmm30 {%k7} {z}
+