void AArch64TargetInfo::getTargetDefinesARMV81A(const LangOptions &Opts,
MacroBuilder &Builder) const {
+ // FIXME: Armv8.1 makes __ARM_FEATURE_CRC32 mandatory. Handle it here.
Builder.defineMacro("__ARM_FEATURE_QRDMX", "1");
}
void AArch64TargetInfo::getTargetDefinesARMV84A(const LangOptions &Opts,
MacroBuilder &Builder) const {
// Also include the Armv8.3 defines
- // FIXME: Armv8.4 makes some extensions mandatory. Handle them here.
+ // FIXME: Armv8.4 makes __ARM_FEATURE_ATOMICS, defined in GCC, mandatory.
+ // Add and handle it here.
getTargetDefinesARMV83A(Opts, Builder);
}
void AArch64TargetInfo::getTargetDefinesARMV85A(const LangOptions &Opts,
MacroBuilder &Builder) const {
// Also include the Armv8.4 defines
- // FIXME: Armv8.5 makes some extensions mandatory. Handle them here.
getTargetDefinesARMV84A(Opts, Builder);
}
+void AArch64TargetInfo::getTargetDefinesARMV86A(const LangOptions &Opts,
+ MacroBuilder &Builder) const {
+ // Also include the Armv8.5 defines
+ // FIXME: Armv8.6 makes the following extensions mandatory:
+ // - __ARM_FEATURE_BF16
+ // - __ARM_FEATURE_MATMUL_INT8
+ // Handle them here.
+ getTargetDefinesARMV85A(Opts, Builder);
+}
void AArch64TargetInfo::getTargetDefines(const LangOptions &Opts,
MacroBuilder &Builder) const {
case llvm::AArch64::ArchKind::ARMV8_5A:
getTargetDefinesARMV85A(Opts, Builder);
break;
+ case llvm::AArch64::ArchKind::ARMV8_6A:
+ getTargetDefinesARMV86A(Opts, Builder);
+ break;
}
// All of the __sync_(bool|val)_compare_and_swap_(1|2|4|8) builtins work.
ArchKind = llvm::AArch64::ArchKind::ARMV8_4A;
if (Feature == "+v8.5a")
ArchKind = llvm::AArch64::ArchKind::ARMV8_5A;
+ if (Feature == "+v8.6a")
+ ArchKind = llvm::AArch64::ArchKind::ARMV8_6A;
if (Feature == "+fullfp16")
HasFullFP16 = true;
if (Feature == "+dotprod")
MacroBuilder &Builder) const;
void getTargetDefinesARMV85A(const LangOptions &Opts,
MacroBuilder &Builder) const;
+ void getTargetDefinesARMV86A(const LangOptions &Opts,
+ MacroBuilder &Builder) const;
void getTargetDefines(const LangOptions &Opts,
MacroBuilder &Builder) const override;
return "8_4A";
case llvm::ARM::ArchKind::ARMV8_5A:
return "8_5A";
+ case llvm::ARM::ArchKind::ARMV8_6A:
+ return "8_6A";
case llvm::ARM::ArchKind::ARMV8MBaseline:
return "8M_BASE";
case llvm::ARM::ArchKind::ARMV8MMainline:
case llvm::ARM::ArchKind::ARMV8_3A:
case llvm::ARM::ArchKind::ARMV8_4A:
case llvm::ARM::ArchKind::ARMV8_5A:
+ case llvm::ARM::ArchKind::ARMV8_6A:
getTargetDefinesARMV83A(Opts, Builder);
break;
}
// RUN: %clang -target aarch64 -march=armv8.5-a+fp16 -### -c %s 2>&1 | FileCheck -check-prefix=GENERICV85A-FP16 %s
// GENERICV85A-FP16: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-cpu" "generic" "-target-feature" "+neon" "-target-feature" "+v8.5a" "-target-feature" "+fullfp16"
+// RUN: %clang -target aarch64 -march=armv8.6a -### -c %s 2>&1 | FileCheck -check-prefix=GENERICV86A %s
+// RUN: %clang -target aarch64 -march=armv8.6-a -### -c %s 2>&1 | FileCheck -check-prefix=GENERICV86A %s
+// RUN: %clang -target aarch64 -mlittle-endian -march=armv8.6a -### -c %s 2>&1 | FileCheck -check-prefix=GENERICV86A %s
+// RUN: %clang -target aarch64 -mlittle-endian -march=armv8.6-a -### -c %s 2>&1 | FileCheck -check-prefix=GENERICV86A %s
+// RUN: %clang -target aarch64_be -mlittle-endian -march=armv8.6a -### -c %s 2>&1 | FileCheck -check-prefix=GENERICV86A %s
+// RUN: %clang -target aarch64_be -mlittle-endian -march=armv8.6-a -### -c %s 2>&1 | FileCheck -check-prefix=GENERICV86A %s
+// GENERICV86A: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-cpu" "generic" "-target-feature" "+neon" "-target-feature" "+v8.6a"
+
+// RUN: %clang -target aarch64_be -march=armv8.6a -### -c %s 2>&1 | FileCheck -check-prefix=GENERICV86A-BE %s
+// RUN: %clang -target aarch64_be -march=armv8.6-a -### -c %s 2>&1 | FileCheck -check-prefix=GENERICV86A-BE %s
+// RUN: %clang -target aarch64 -mbig-endian -march=armv8.6a -### -c %s 2>&1 | FileCheck -check-prefix=GENERICV86A-BE %s
+// RUN: %clang -target aarch64 -mbig-endian -march=armv8.6-a -### -c %s 2>&1 | FileCheck -check-prefix=GENERICV86A-BE %s
+// RUN: %clang -target aarch64_be -mbig-endian -march=armv8.6a -### -c %s 2>&1 | FileCheck -check-prefix=GENERICV86A-BE %s
+// RUN: %clang -target aarch64_be -mbig-endian -march=armv8.6-a -### -c %s 2>&1 | FileCheck -check-prefix=GENERICV86A-BE %s
+// GENERICV86A-BE: "-cc1"{{.*}} "-triple" "aarch64_be{{.*}}" "-target-cpu" "generic" "-target-feature" "+neon" "-target-feature" "+v8.6a"
+
+// The SVE extension is an optional extension for Armv8-A.
+// RUN: %clang -target aarch64 -march=armv8a+sve -### -c %s 2>&1 | FileCheck -check-prefix=GENERICV8A-SVE %s
+// RUN: %clang -target aarch64 -march=armv8.6a+sve -### -c %s 2>&1 | FileCheck -check-prefix=GENERICV8A-SVE %s
+// GENERICV8A-SVE: "-target-feature" "+sve"
+// RUN: %clang -target aarch64 -march=armv8a -### -c %s 2>&1 | FileCheck -check-prefix=GENERICV8A-NOSVE %s
+// RUN: %clang -target aarch64 -march=armv8.6a -### -c %s 2>&1 | FileCheck -check-prefix=GENERICV8A-NOSVE %s
+// GENERICV8A-NOSVE-NOT: "-target-feature" "+sve"
+
+// The BFloat16 extension is a mandatory component of the Armv8.6-A extensions, but is permitted as an
+// optional feature for any implementation of Armv8.2-A to Armv8.5-A (inclusive)
+// RUN: %clang -target aarch64 -march=armv8.5a+bf16 -### -c %s 2>&1 | FileCheck -check-prefix=GENERICV85A-BF16 %s
+// GENERICV85A-BF16: "-target-feature" "+bf16"
+// RUN: %clang -target aarch64 -march=armv8.5a+bf16+nobf16 -### -c %s 2>&1 | FileCheck -check-prefix=GENERICV85A-BF16-NO-BF16 %s
+// GENERICV85A-BF16-NO-BF16: "-target-feature" "-bf16"
+// RUN: %clang -target aarch64 -march=armv8.5a+bf16+sve -### -c %s 2>&1 | FileCheck -check-prefixes=GENERICV85A-BF16-SVE %s
+// GENERICV85A-BF16-SVE: "-target-feature" "+bf16" "-target-feature" "+sve"
+
// fullfp16 is off by default for v8a, feature must not be mentioned
// RUN: %clang -target aarch64 -march=armv8a -### -c %s 2>&1 | FileCheck -check-prefix=V82ANOFP16 -check-prefix=GENERIC %s
// RUN: %clang -target aarch64 -march=armv8-a -### -c %s 2>&1 | FileCheck -check-prefix=V82ANOFP16 -check-prefix=GENERIC %s
// RUN: %clang -target arm -march=armebv8.5-a -mbig-endian -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-BE-V85A %s
// CHECK-BE-V85A: "-cc1"{{.*}} "-triple" "armebv8.5{{.*}}" "-target-cpu" "generic"
+// RUN: %clang -target armv8.6a -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-V86A %s
+// RUN: %clang -target arm -march=armv8.6a -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-V86A %s
+// RUN: %clang -target arm -march=armv8.6-a -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-V86A %s
+// RUN: %clang -target arm -march=armv8.6a -mlittle-endian -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-V86A %s
+// RUN: %clang -target armv8.6a -mlittle-endian -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-V86A %s
+// RUN: %clang -target arm -march=armv8.6a -mlittle-endian -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-V86A %s
+// RUN: %clang -target arm -mlittle-endian -march=armv8.6-a -mlittle-endian -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-V86A %s
+// CHECK-V86A: "-cc1"{{.*}} "-triple" "armv8.6{{.*}}" "-target-cpu" "generic"
+
+// RUN: %clang -target armebv8.6a -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-BE-V86A %s
+// RUN: %clang -target armv8.6a -mbig-endian -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-BE-V86A %s
+// RUN: %clang -target armeb -march=armebv8.6a -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-BE-V86A %s
+// RUN: %clang -target armeb -march=armebv8.6-a -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-BE-V86A %s
+// RUN: %clang -target arm -march=armebv8.6a -mbig-endian -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-BE-V86A %s
+// RUN: %clang -target arm -march=armebv8.6-a -mbig-endian -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-BE-V86A %s
+// CHECK-BE-V86A: "-cc1"{{.*}} "-triple" "armebv8.6{{.*}}" "-target-cpu" "generic"
+
// Once we have CPUs with optional v8.2-A FP16, we will need a way to turn it
// on and off. Cortex-A53 is a placeholder for now.
// RUN: %clang -target armv8a-linux-eabi -mcpu=cortex-a53+fp16 -### -c %s 2>&1 | FileCheck --check-prefix CHECK-CORTEX-A53-FP16 %s
// RUN: %clang -target armv8a-linux-eabi -march=armv8.5-a+fp16 -### -c %s 2>&1 | FileCheck --check-prefix CHECK-V85A-FP16 %s
// CHECK-V85A-FP16: "-cc1"{{.*}} "-triple" "armv8.5{{.*}}" "-target-cpu" "generic" {{.*}}"-target-feature" "+fullfp16"
+// RUN: %clang -target armv8a-linux-eabi -march=armv8.6-a+bf16 -### -c %s 2>&1 | FileCheck --check-prefix CHECK-V86A-BF16 %s
+// CHECK-V86A-BF16: "-cc1"{{.*}} "-triple" "armv8.6{{.*}}" "-target-cpu" "generic" {{.*}}"-target-feature" "+bf16"
+
// RUN: %clang -target arm -march=armv8.2-a+fp16 -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-FULLFP16-SOFT %s
// RUN: %clang -target arm -march=armv8.2-a+fp16fml -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-FULLFP16-SOFT %s
// RUN: %clang -target arm -march=armv8.2-a+fp16+fp16fml -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-FULLFP16-SOFT %s
// CHECK-V85A: #define __ARM_ARCH_8_5A__ 1
// CHECK-V85A: #define __ARM_ARCH_PROFILE 'A'
+// RUN: %clang -target armv8.6a-none-none-eabi -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=CHECK-V86A %s
+// CHECK-V86A: #define __ARM_ARCH 8
+// CHECK-V86A: #define __ARM_ARCH_8_6A__ 1
+// CHECK-V86A: #define __ARM_ARCH_PROFILE 'A'
+
// RUN: %clang -target arm-none-none-eabi -march=armv7-m -mfpu=softvfp -x c -E -dM %s -o - | FileCheck --check-prefix=CHECK-SOFTVFP %s
// CHECK-SOFTVFP-NOT: #define __ARM_FP 0x
enum SubArchType {
NoSubArch,
+ ARMSubArch_v8_6a,
ARMSubArch_v8_5a,
ARMSubArch_v8_4a,
ARMSubArch_v8_3a,
(AArch64::AEK_CRC | AArch64::AEK_CRYPTO | AArch64::AEK_FP |
AArch64::AEK_SIMD | AArch64::AEK_RAS | AArch64::AEK_LSE |
AArch64::AEK_RDM | AArch64::AEK_RCPC | AArch64::AEK_DOTPROD))
+AARCH64_ARCH("armv8.6-a", ARMV8_6A, "8.6-A", "v8.6a",
+ ARMBuildAttrs::CPUArch::v8_A, FK_CRYPTO_NEON_FP_ARMV8,
+ (AArch64::AEK_CRC | AArch64::AEK_FP |
+ AArch64::AEK_SIMD | AArch64::AEK_RAS | AArch64::AEK_LSE |
+ AArch64::AEK_RDM | AArch64::AEK_RCPC | AArch64::AEK_DOTPROD |
+ AArch64::AEK_SM4 | AArch64::AEK_SHA3 | AArch64::AEK_BF16 |
+ AArch64::AEK_SHA2 | AArch64::AEK_AES | AArch64::AEK_I8MM))
#undef AARCH64_ARCH
#ifndef AARCH64_ARCH_EXT_NAME
AARCH64_ARCH_EXT_NAME("ssbs", AArch64::AEK_SSBS, "+ssbs", "-ssbs")
AARCH64_ARCH_EXT_NAME("sb", AArch64::AEK_SB, "+sb", "-sb")
AARCH64_ARCH_EXT_NAME("predres", AArch64::AEK_PREDRES, "+predres", "-predres")
+AARCH64_ARCH_EXT_NAME("bf16", AArch64::AEK_BF16, "+bf16", "-bf16")
+AARCH64_ARCH_EXT_NAME("i8mm", AArch64::AEK_I8MM, "+i8mm", "-i8mm")
AARCH64_ARCH_EXT_NAME("tme", AArch64::AEK_TME, "+tme", "-tme")
#undef AARCH64_ARCH_EXT_NAME
AEK_SVE2SHA3 = 1 << 26,
AEK_SVE2BITPERM = 1 << 27,
AEK_TME = 1 << 28,
+ AEK_BF16 = 1 << 29,
+ AEK_I8MM = 1 << 30,
};
enum class ArchKind {
(ARM::AEK_SEC | ARM::AEK_MP | ARM::AEK_VIRT | ARM::AEK_HWDIVARM |
ARM::AEK_HWDIVTHUMB | ARM::AEK_DSP | ARM::AEK_CRC | ARM::AEK_RAS |
ARM::AEK_DOTPROD))
+ARM_ARCH("armv8.6-a", ARMV8_6A, "8.6-A", "v8.6a",
+ ARMBuildAttrs::CPUArch::v8_A, FK_CRYPTO_NEON_FP_ARMV8,
+ (ARM::AEK_SEC | ARM::AEK_MP | ARM::AEK_VIRT | ARM::AEK_HWDIVARM |
+ ARM::AEK_HWDIVTHUMB | ARM::AEK_DSP | ARM::AEK_CRC | ARM::AEK_RAS |
+ ARM::AEK_DOTPROD | ARM::AEK_BF16 | ARM::AEK_SHA2 | ARM::AEK_AES))
ARM_ARCH("armv8-r", ARMV8R, "8-R", "v8r", ARMBuildAttrs::CPUArch::v8_R,
FK_NEON_FP_ARMV8,
(ARM::AEK_MP | ARM::AEK_VIRT | ARM::AEK_HWDIVARM | ARM::AEK_HWDIVTHUMB |
ARM_ARCH_EXT_NAME("maverick", ARM::AEK_MAVERICK, nullptr, nullptr)
ARM_ARCH_EXT_NAME("xscale", ARM::AEK_XSCALE, nullptr, nullptr)
ARM_ARCH_EXT_NAME("fp16fml", ARM::AEK_FP16FML, "+fp16fml", "-fp16fml")
+ARM_ARCH_EXT_NAME("bf16", ARM::AEK_BF16, "+bf16", "-bf16")
ARM_ARCH_EXT_NAME("sb", ARM::AEK_SB, "+sb", "-sb")
ARM_ARCH_EXT_NAME("lob", ARM::AEK_LOB, "+lob", "-lob")
ARM_ARCH_EXT_NAME("cdecp0", ARM::AEK_CDECP0, "+cdecp0", "-cdecp0")
AEK_SB = 1 << 17,
AEK_FP_DP = 1 << 18,
AEK_LOB = 1 << 19,
- AEK_CDECP0 = 1 << 20,
- AEK_CDECP1 = 1 << 21,
- AEK_CDECP2 = 1 << 22,
- AEK_CDECP3 = 1 << 23,
- AEK_CDECP4 = 1 << 24,
- AEK_CDECP5 = 1 << 25,
- AEK_CDECP6 = 1 << 26,
- AEK_CDECP7 = 1 << 27,
+ AEK_BF16 = 1 << 20,
+ AEK_CDECP0 = 1 << 21,
+ AEK_CDECP1 = 1 << 22,
+ AEK_CDECP2 = 1 << 23,
+ AEK_CDECP3 = 1 << 24,
+ AEK_CDECP4 = 1 << 25,
+ AEK_CDECP5 = 1 << 26,
+ AEK_CDECP6 = 1 << 27,
+ AEK_CDECP7 = 1 << 28,
// Unsupported extensions.
AEK_OS = 1ULL << 59,
Features.push_back("+v8.4a");
if (AK == ArchKind::ARMV8_5A)
Features.push_back("+v8.5a");
+ if (AK == AArch64::ArchKind::ARMV8_6A)
+ Features.push_back("+v8.6a");
return AK != ArchKind::INVALID;
}
case ArchKind::ARMV8_3A:
case ArchKind::ARMV8_4A:
case ArchKind::ARMV8_5A:
+ case ArchKind::ARMV8_6A:
case ArchKind::ARMV8R:
case ArchKind::ARMV8MBaseline:
case ArchKind::ARMV8MMainline:
case ArchKind::ARMV8_3A:
case ArchKind::ARMV8_4A:
case ArchKind::ARMV8_5A:
+ case ArchKind::ARMV8_6A:
return ProfileKind::A;
case ArchKind::ARMV2:
case ArchKind::ARMV2A:
.Case("v8.3a", "v8.3-a")
.Case("v8.4a", "v8.4-a")
.Case("v8.5a", "v8.5-a")
+ .Case("v8.6a", "v8.6-a")
.Case("v8r", "v8-r")
.Case("v8m.base", "v8-m.base")
.Case("v8m.main", "v8-m.main")
return Triple::ARMSubArch_v8_4a;
case ARM::ArchKind::ARMV8_5A:
return Triple::ARMSubArch_v8_5a;
+ case ARM::ArchKind::ARMV8_6A:
+ return Triple::ARMSubArch_v8_6a;
case ARM::ArchKind::ARMV8R:
return Triple::ARMSubArch_v8r;
case ARM::ArchKind::ARMV8MBaseline:
"true", "Use an instruction sequence for taking the address of a global "
"that allows a memory tag in the upper address bits">;
+def FeatureBF16 : SubtargetFeature<"bf16", "HasBF16",
+ "true", "Enable BFloat16 Extension" >;
+
//===----------------------------------------------------------------------===//
// Architectures.
//
"v8.5a", "HasV8_5aOps", "true", "Support ARM v8.5a instructions",
[HasV8_4aOps, FeatureAltFPCmp, FeatureFRInt3264, FeatureSpecRestrict,
FeatureSSBS, FeatureSB, FeaturePredRes, FeatureCacheDeepPersist,
- FeatureBranchTargetId]
->;
+ FeatureBranchTargetId]>;
+
+def HasV8_6aOps : SubtargetFeature<
+ "v8.6a", "HasV8_6aOps", "true", "Support ARM v8.6a instructions",
+ [HasV8_5aOps, FeatureBF16]>;
//===----------------------------------------------------------------------===//
// Register File Description
let Inst{4-0} = Rd;
}
+
+//----------------------------------------------------------------------------
+// Armv8.6 BFloat16 Extension
+//----------------------------------------------------------------------------
+let mayStore = 0, mayLoad = 0, hasSideEffects = 0 in {
+
+class BaseSIMDThreeSameVectorBFDot<bit Q, bit U, string asm, string kind1,
+ string kind2, RegisterOperand RegType,
+ ValueType AccumType, ValueType InputType>
+ : BaseSIMDThreeSameVectorTied<Q, U, 0b010, 0b11111, RegType, asm, kind1, []> {
+ let AsmString = !strconcat(asm,
+ "{\t$Rd" # kind1 # ", $Rn" # kind2 #
+ ", $Rm" # kind2 # "}");
+}
+
+multiclass SIMDThreeSameVectorBFDot<bit U, string asm> {
+ def v4f16 : BaseSIMDThreeSameVectorBFDot<0, U, asm, ".2s", ".4h", V64,
+ v2f32, v8i8>;
+ def v8f16 : BaseSIMDThreeSameVectorBFDot<1, U, asm, ".4s", ".8h", V128,
+ v4f32, v16i8>;
+}
+
+class BaseSIMDThreeSameVectorBF16DotI<bit Q, bit U, string asm,
+ string dst_kind, string lhs_kind,
+ string rhs_kind,
+ RegisterOperand RegType,
+ ValueType AccumType,
+ ValueType InputType>
+ : BaseSIMDIndexedTied<Q, U, 0b0, 0b01, 0b1111,
+ RegType, RegType, V128, VectorIndexS,
+ asm, "", dst_kind, lhs_kind, rhs_kind,
+ []> {
+
+ bits<2> idx;
+ let Inst{21} = idx{0}; // L
+ let Inst{11} = idx{1}; // H
+}
+
+multiclass SIMDThreeSameVectorBF16DotI<bit U, string asm> {
+
+ def v4f16 : BaseSIMDThreeSameVectorBF16DotI<0, U, asm, ".2s", ".4h",
+ ".2h", V64, v2f32, v8i8>;
+ def v8f16 : BaseSIMDThreeSameVectorBF16DotI<1, U, asm, ".4s", ".8h",
+ ".2h", V128, v4f32, v16i8>;
+}
+
+class SIMDBF16MLAL<bit Q, string asm>
+ : BaseSIMDThreeSameVectorTied<Q, 0b1, 0b110, 0b11111, V128, asm, ".4s",
+ []> { // TODO: Add intrinsics
+ let AsmString = !strconcat(asm, "{\t$Rd.4s, $Rn.8h, $Rm.8h}");
+}
+
+class SIMDBF16MLALIndex<bit Q, string asm>
+ : I<(outs V128:$dst),
+ (ins V128:$Rd, V128:$Rn, V128_lo:$Rm, VectorIndexH:$idx), asm,
+ "{\t$Rd.4s, $Rn.8h, $Rm.h$idx}", "$Rd = $dst",
+ []>, // TODO: Add intrinsics
+ Sched<[WriteV]> {
+ bits<5> Rd;
+ bits<5> Rn;
+ bits<4> Rm;
+ bits<3> idx;
+
+ let Inst{31} = 0;
+ let Inst{30} = Q;
+ let Inst{29-22} = 0b00111111;
+ let Inst{21-20} = idx{1-0};
+ let Inst{19-16} = Rm;
+ let Inst{15-12} = 0b1111;
+ let Inst{11} = idx{2}; // H
+ let Inst{10} = 0;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Rd;
+}
+
+class SIMDThreeSameVectorBF16MatrixMul<string asm>
+ : BaseSIMDThreeSameVectorTied<1, 1, 0b010, 0b11101,
+ V128, asm, ".4s",
+ []> {
+ let AsmString = !strconcat(asm, "{\t$Rd", ".4s", ", $Rn", ".8h",
+ ", $Rm", ".8h", "}");
+}
+
+class SIMD_BFCVTN
+ : BaseSIMDMixedTwoVector<0, 0, 0b10, 0b10110, V128, V128,
+ "bfcvtn", ".4h", ".4s",
+ []>;
+
+class SIMD_BFCVTN2
+ : BaseSIMDMixedTwoVectorTied<1, 0, 0b10, 0b10110, V128, V128,
+ "bfcvtn2", ".8h", ".4s",
+ []>;
+
+class BF16ToSinglePrecision<string asm>
+ : I<(outs FPR16:$Rd), (ins FPR32:$Rn), asm, "\t$Rd, $Rn", "", []>,
+ Sched<[WriteFCvt]> {
+ bits<5> Rd;
+ bits<5> Rn;
+ let Inst{31-10} = 0b0001111001100011010000;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Rd;
+}
+} // End of let mayStore = 0, mayLoad = 0, hasSideEffects = 0
+
// ARMv8.2-A Dot Product Instructions (Indexed)
class BaseSIMDThreeSameVectorDotIndex<bit Q, bit U, string asm, string dst_kind,
string lhs_kind, string rhs_kind,
AssemblerPredicate<(all_of HasV8_4aOps), "armv8.4a">;
def HasV8_5a : Predicate<"Subtarget->hasV8_5aOps()">,
AssemblerPredicate<(all_of HasV8_5aOps), "armv8.5a">;
+def HasV8_6a : Predicate<"Subtarget->hasV8_6aOps()">,
+ AssemblerPredicate<(all_of HasV8_6aOps), "armv8.6a">;
def HasVH : Predicate<"Subtarget->hasVH()">,
AssemblerPredicate<(all_of FeatureVH), "vh">;
AssemblerPredicate<(all_of FeatureETE), "ete">;
def HasTRBE : Predicate<"Subtarget->hasTRBE()">,
AssemblerPredicate<(all_of FeatureTRBE), "trbe">;
+def HasBF16 : Predicate<"Subtarget->hasBF16()">,
+ AssemblerPredicate<(all_of FeatureBF16), "bf16">;
def IsLE : Predicate<"Subtarget->isLittleEndian()">;
def IsBE : Predicate<"!Subtarget->isLittleEndian()">;
def IsWindows : Predicate<"Subtarget->isTargetWindows()">;
defm UDOTlane : SIMDThreeSameVectorDotIndex<1, "udot", int_aarch64_neon_udot>;
}
+// ARMv8.6-A BFloat
+let Predicates = [HasBF16] in {
+defm BFDOT : SIMDThreeSameVectorBFDot<1, "bfdot">;
+defm BF16DOTlane : SIMDThreeSameVectorBF16DotI<0, "bfdot">;
+def BFMMLA : SIMDThreeSameVectorBF16MatrixMul<"bfmmla">;
+def BFMLALB : SIMDBF16MLAL<0, "bfmlalb">;
+def BFMLALT : SIMDBF16MLAL<1, "bfmlalt">;
+def BFMLALBIdx : SIMDBF16MLALIndex<0, "bfmlalb">;
+def BFMLALTIdx : SIMDBF16MLALIndex<1, "bfmlalt">;
+def BFCVTN : SIMD_BFCVTN;
+def BFCVTN2 : SIMD_BFCVTN2;
+def BFCVT : BF16ToSinglePrecision<"bfcvt">;
+}
+
// ARMv8.2-A FP16 Fused Multiply-Add Long
let Predicates = [HasNEON, HasFP16FML] in {
defm FMLAL : SIMDThreeSameVectorFML<0, 1, 0b001, "fmlal", int_aarch64_neon_fmlal>;
defm FRECPX_ZPmZ : sve_fp_2op_p_zd_HSD<0b01100, "frecpx", int_aarch64_sve_frecpx>;
defm FSQRT_ZPmZ : sve_fp_2op_p_zd_HSD<0b01101, "fsqrt", int_aarch64_sve_fsqrt>;
+ let Predicates = [HasBF16, HasSVE] in {
+ def BFDOT_ZZZ : sve_bfloat_dot<"bfdot">;
+ def BFDOT_ZZI : sve_bfloat_dot_indexed<"bfdot">;
+ def BFMMLA_ZZZ : sve_bfloat_matmul<"bfmmla">;
+ def BFMMLA_B_ZZZ : sve_bfloat_matmul_longvecl<0b0, "bfmlalb">;
+ def BFMMLA_T_ZZZ : sve_bfloat_matmul_longvecl<0b1, "bfmlalt">;
+ def BFMMLA_B_ZZI : sve_bfloat_matmul_longvecl_idx<0b0, "bfmlalb">;
+ def BFMMLA_T_ZZI : sve_bfloat_matmul_longvecl_idx<0b1, "bfmlalt">;
+ def BFCVT_ZPmZ : sve_bfloat_convert<0b1, "bfcvt">;
+ def BFCVTNT_ZPmZ : sve_bfloat_convert<0b0, "bfcvtnt">;
+ }
+
// InstAliases
def : InstAlias<"mov $Zd, $Zn",
(ORR_ZZZ ZPR64:$Zd, ZPR64:$Zn, ZPR64:$Zn), 1>;
bool HasV8_3aOps = false;
bool HasV8_4aOps = false;
bool HasV8_5aOps = false;
+ bool HasV8_6aOps = false;
bool HasFPARMv8 = false;
bool HasNEON = false;
bool HasMTE = false;
bool HasTME = false;
+ // Armv8.6-A Extensions
+ bool HasBF16 = false;
+
// Arm SVE2 extensions
bool HasSVE2AES = false;
bool HasSVE2SM4 = false;
bool hasSVE2SHA3() const { return HasSVE2SHA3; }
bool hasSVE2BitPerm() const { return HasSVE2BitPerm; }
+ // Armv8.6-A Extensions
+ bool hasBF16() const { return HasBF16; }
+
bool isLittleEndian() const { return IsLittle; }
bool isTargetDarwin() const { return TargetTriple.isOSDarwin(); }
Str += "ARMv8.4a";
else if (FBS[AArch64::HasV8_5aOps])
Str += "ARMv8.5a";
+ else if (FBS[AArch64::HasV8_6aOps])
+ Str += "ARMv8.6a";
else {
auto ext = std::find_if(std::begin(ExtensionMap),
std::end(ExtensionMap),
break;
case AArch64::ArchKind::ARMV8_4A:
case AArch64::ArchKind::ARMV8_5A:
+ case AArch64::ArchKind::ARMV8_6A:
RequestedExtensions.push_back("sm4");
RequestedExtensions.push_back("sha3");
RequestedExtensions.push_back("sha2");
break;
case AArch64::ArchKind::ARMV8_4A:
case AArch64::ArchKind::ARMV8_5A:
+ case AArch64::ArchKind::ARMV8_6A:
RequestedExtensions.push_back("nosm4");
RequestedExtensions.push_back("nosha3");
RequestedExtensions.push_back("nosha2");
def : SVE_1_Op_Pat<nxv16i8, op, nxv16i8, !cast<Instruction>(NAME)>;
}
+//===----------------------------------------------------------------------===//
+// SVE BFloat16 Group
+//===----------------------------------------------------------------------===//
+
+class sve_bfloat_dot_base<bits<2> opc, string asm, string ops, dag iops>
+: I<(outs ZPR32:$Zda), iops, asm, ops, "", []>, Sched<[]> {
+ bits<5> Zda;
+ bits<5> Zn;
+ let Inst{31-21} = 0b01100100011;
+ let Inst{15-14} = opc;
+ let Inst{13-10} = 0b0000;
+ let Inst{9-5} = Zn;
+ let Inst{4-0} = Zda;
+
+ let Constraints = "$Zda = $_Zda";
+ let DestructiveInstType = DestructiveOther;
+ let ElementSize = ElementSizeH;
+}
+
+class sve_bfloat_dot<string asm>
+: sve_bfloat_dot_base<0b10, asm, "\t$Zda, $Zn, $Zm",
+ (ins ZPR32:$_Zda, ZPR16:$Zn, ZPR16:$Zm)> {
+ bits<5> Zm;
+ let Inst{20-16} = Zm;
+}
+
+class sve_bfloat_dot_indexed<string asm>
+: sve_bfloat_dot_base<0b01, asm, "\t$Zda, $Zn, $Zm$iop",
+ (ins ZPR32:$_Zda, ZPR16:$Zn, ZPR3b16:$Zm, VectorIndexS:$iop)> {
+ bits<2> iop;
+ bits<3> Zm;
+ let Inst{20-19} = iop;
+ let Inst{18-16} = Zm;
+}
+
+class sve_bfloat_matmul<string asm>
+: I<(outs ZPR32:$Zda), (ins ZPR32:$_Zda, ZPR16:$Zn, ZPR16:$Zm),
+ asm, "\t$Zda, $Zn, $Zm", "", []>, Sched<[]> {
+ bits<5> Zm;
+ bits<5> Zda;
+ bits<5> Zn;
+ let Inst{31-21} = 0b01100100011;
+ let Inst{20-16} = Zm;
+ let Inst{15-10} = 0b111001;
+ let Inst{9-5} = Zn;
+ let Inst{4-0} = Zda;
+
+ let Constraints = "$Zda = $_Zda";
+ let DestructiveInstType = DestructiveOther;
+ let ElementSize = ElementSizeH;
+}
+
+class sve_bfloat_matmul_longvecl<bit BT, string asm>
+: sve_bfloat_matmul<asm> {
+ let Inst{23} = 0b1;
+ let Inst{14-13} = 0b00;
+ let Inst{10} = BT;
+}
+
+class sve_bfloat_matmul_longvecl_idx<bit BT, string asm>
+: sve_bfloat_dot_base<0b01, asm, "\t$Zda, $Zn, $Zm$iop",
+ (ins ZPR32:$_Zda, ZPR16:$Zn, ZPR3b16:$Zm, VectorIndexH:$iop)> {
+ bits<3> iop;
+ bits<3> Zm;
+ let Inst{23} = 0b1;
+ let Inst{20-19} = iop{2-1};
+ let Inst{18-16} = Zm;
+ let Inst{11} = iop{0};
+ let Inst{10} = BT;
+}
+
+class sve_bfloat_convert<bit N, string asm>
+: I<(outs ZPR16:$Zd), (ins ZPR16:$_Zd, PPR3bAny:$Pg, ZPR32:$Zn),
+ asm, "\t$Zd, $Pg/m, $Zn", "", []>, Sched<[]> {
+ bits<5> Zd;
+ bits<3> Pg;
+ bits<5> Zn;
+ let Inst{31-25} = 0b0110010;
+ let Inst{24} = N;
+ let Inst{23-13} = 0b10001010101;
+ let Inst{12-10} = Pg;
+ let Inst{9-5} = Zn;
+ let Inst{4-0} = Zd;
+
+ let Constraints = "$Zd = $_Zd";
+ let DestructiveInstType = DestructiveOther;
+ let hasSideEffects = 1;
+ let ElementSize = ElementSizeS;
+}
+
/// Addressing modes
def am_sve_indexed_s4 :ComplexPattern<i64, 2, "SelectAddrModeIndexedSVE<-8,7>", [], [SDNPWantRoot]>;
def am_sve_indexed_s6 :ComplexPattern<i64, 2, "SelectAddrModeIndexedSVE<-32,31>", [], [SDNPWantRoot]>;
def FeatureSB : SubtargetFeature<"sb", "HasSB", "true",
"Enable v8.5a Speculation Barrier" >;
+// Armv8.6-A extensions
+def FeatureBF16 : SubtargetFeature<"bf16", "HasBF16", "true",
+ "Enable support for BFloat16 instructions", [FeatureNEON]>;
+
// Armv8.1-M extensions
def FeatureLOB : SubtargetFeature<"lob", "HasLOB", "true",
"Support ARM v8.5a instructions",
[HasV8_4aOps, FeatureSB]>;
+def HasV8_6aOps : SubtargetFeature<"v8.6a", "HasV8_6aOps", "true",
+ "Support ARM v8.6a instructions",
+ [HasV8_5aOps, FeatureBF16]>;
+
def HasV8_1MMainlineOps : SubtargetFeature<
"v8.1m.main", "HasV8_1MMainlineOps", "true",
"Support ARM v8-1M Mainline instructions",
FeatureCRC,
FeatureRAS,
FeatureDotProd]>;
+def ARMv86a : Architecture<"armv8.6-a", "ARMv86a", [HasV8_6aOps,
+ FeatureAClass,
+ FeatureDB,
+ FeatureFPARMv8,
+ FeatureNEON,
+ FeatureDSP,
+ FeatureTrustZone,
+ FeatureMP,
+ FeatureVirtualization,
+ FeatureCrypto,
+ FeatureCRC,
+ FeatureRAS,
+ FeatureDotProd]>;
def ARMv8r : Architecture<"armv8-r", "ARMv8r", [HasV8Ops,
FeatureRClass,
(VMOVv4i32 QPR:$Vd, nImmVMOVI32:$imm, pred:$p)>;
def : NEONInstAlias<"vmov${p}.f32 $Vd, $imm",
(VMOVv2i32 DPR:$Vd, nImmVMOVI32:$imm, pred:$p)>;
+
+// ARMv8.6a BFloat16 instructions.
+let Predicates = [HasBF16, HasNEON] in {
+class BF16VDOT<bits<5> op27_23, bits<2> op21_20, bit op6,
+ dag oops, dag iops>
+ : N3Vnp<op27_23, op21_20, 0b1101, op6, 0, oops, iops,
+ N3RegFrm, IIC_VDOTPROD, "", "", []> {
+ let hasNoSchedulingInfo = 1;
+ let DecoderNamespace = "VFPV8";
+}
+
+class BF16VDOTS<bit Q, RegisterClass RegTy, string opc, ValueType AccumTy, ValueType InputTy>
+ : BF16VDOT<0b11000, 0b00, Q, (outs RegTy:$dst),
+ (ins RegTy:$Vd, RegTy:$Vn, RegTy:$Vm)> {
+ let Constraints = "$dst = $Vd";
+ let AsmString = !strconcat(opc, ".bf16", "\t$Vd, $Vn, $Vm");
+ let DecoderNamespace = "VFPV8";
+}
+
+multiclass BF16VDOTI<bit Q, RegisterClass RegTy, string opc, ValueType AccumTy,
+ ValueType InputTy, dag RHS> {
+
+ def "" : BF16VDOT<0b11100, 0b00, Q, (outs RegTy:$dst),
+ (ins RegTy:$Vd, RegTy:$Vn,
+ DPR_VFP2:$Vm, VectorIndex32:$lane)> {
+ bit lane;
+ let Inst{5} = lane;
+ let Constraints = "$dst = $Vd";
+ let AsmString = !strconcat(opc, ".bf16", "\t$Vd, $Vn, $Vm$lane");
+ let DecoderNamespace = "VFPV8";
+ }
+
+}
+
+def BF16VDOTS_VDOTD : BF16VDOTS<0, DPR, "vdot", v2f32, v8i8>;
+def BF16VDOTS_VDOTQ : BF16VDOTS<1, QPR, "vdot", v4f32, v16i8>;
+
+defm BF16VDOTI_VDOTD : BF16VDOTI<0, DPR, "vdot", v2f32, v8i8, (v2f32 DPR_VFP2:$Vm)>;
+defm BF16VDOTI_VDOTQ : BF16VDOTI<1, QPR, "vdot", v4f32, v16i8, (EXTRACT_SUBREG QPR:$Vm, dsub_0)>;
+
+class BF16MM<bit Q, RegisterClass RegTy,
+ string opc>
+ : N3Vnp<0b11000, 0b00, 0b1100, Q, 0,
+ (outs RegTy:$dst), (ins RegTy:$Vd, RegTy:$Vn, RegTy:$Vm),
+ N3RegFrm, IIC_VDOTPROD, "", "", []> {
+ let Constraints = "$dst = $Vd";
+ let AsmString = !strconcat(opc, ".bf16", "\t$Vd, $Vn, $Vm");
+ let DecoderNamespace = "VFPV8";
+ let hasNoSchedulingInfo = 1;
+}
+
+def VMMLA : BF16MM<1, QPR, "vmmla">;
+
+class VBF16MALQ<bit T, string suffix>
+ : N3VCP8<0b00, 0b11, T, 1,
+ (outs QPR:$dst), (ins QPR:$Vd, QPR:$Vn, QPR:$Vm),
+ NoItinerary, "vfma" # suffix, "bf16", "$Vd, $Vn, $Vm", "",
+ []> { // TODO: Add intrinsics
+ let Constraints = "$dst = $Vd";
+ let DecoderNamespace = "VFPV8";
+ let hasNoSchedulingInfo = 1;
+}
+
+def VBF16MALTQ: VBF16MALQ<1, "t">;
+def VBF16MALBQ: VBF16MALQ<0, "b">;
+
+multiclass VBF16MALQI<bit T, string suffix> {
+ def "" : N3VLaneCP8<0, 0b11, T, 1, (outs QPR:$dst),
+ (ins QPR:$Vd, QPR:$Vn, DPR_8:$Vm, VectorIndex16:$idx),
+ IIC_VMACD, "vfma" # suffix, "bf16", "$Vd, $Vn, $Vm$idx", "", []> {
+ bits<2> idx;
+ let Inst{5} = idx{1};
+ let Inst{3} = idx{0};
+ let Constraints = "$dst = $Vd";
+ let DecoderNamespace = "VFPV8";
+ let hasNoSchedulingInfo = 1;
+ }
+
+}
+
+defm VBF16MALTQI: VBF16MALQI<1, "t">;
+defm VBF16MALBQI: VBF16MALQI<0, "b">;
+
+let hasNoSchedulingInfo = 1 in {
+def BF16_VCVT : N2V<0b11, 0b11, 0b01, 0b10, 0b01100, 1, 0,
+ (outs DPR:$Vd), (ins QPR:$Vm),
+ NoItinerary, "vcvt", "bf16.f32", "$Vd, $Vm", "", []>;
+}
+}
+// End of BFloat16 instructions
} // End of 'let Constraints = "$a = $dst" in'
+// BFloat16 - Single precision, unary, predicated
+class BF16_VCVT<string opc, bits<2> op7_6>
+ : VFPAI<(outs SPR:$Sd), (ins SPR:$dst, SPR:$Sm),
+ VFPUnaryFrm, NoItinerary,
+ opc, ".bf16.f32\t$Sd, $Sm", []>,
+ RegConstraint<"$dst = $Sd">,
+ Requires<[HasBF16]>,
+ Sched<[]> {
+ bits<5> Sd;
+ bits<5> Sm;
+
+ // Encode instruction operands.
+ let Inst{3-0} = Sm{4-1};
+ let Inst{5} = Sm{0};
+ let Inst{15-12} = Sd{4-1};
+ let Inst{22} = Sd{0};
+
+ let Inst{27-23} = 0b11101; // opcode1
+ let Inst{21-20} = 0b11; // opcode2
+ let Inst{19-16} = 0b0011; // opcode3
+ let Inst{11-8} = 0b1001;
+ let Inst{7-6} = op7_6;
+ let Inst{4} = 0;
+ let DecoderNamespace = "VFPV8";
+}
+
+def BF16_VCVTB : BF16_VCVT<"vcvtb", 0b01>;
+def BF16_VCVTT : BF16_VCVT<"vcvtt", 0b11>;
+
//===----------------------------------------------------------------------===//
// FP Multiply-Accumulate Operations.
//
AssemblerPredicate<(all_of HasV8_4aOps), "armv8.4a">;
def HasV8_5a : Predicate<"Subtarget->hasV8_5aOps()">,
AssemblerPredicate<(all_of HasV8_5aOps), "armv8.5a">;
+def HasV8_6a : Predicate<"Subtarget->hasV8_6aOps()">,
+ AssemblerPredicate<(all_of HasV8_6aOps), "armv8.6a">;
def NoVFP : Predicate<"!Subtarget->hasVFP2Base()">;
def HasVFP2 : Predicate<"Subtarget->hasVFP2Base()">,
AssemblerPredicate<(all_of FeatureVFP2_SP), "VFP2">;
AssemblerPredicate<(all_of FeatureFullFP16),"full half-float">;
def HasFP16FML : Predicate<"Subtarget->hasFP16FML()">,
AssemblerPredicate<(all_of FeatureFP16FML),"full half-float fml">;
+def HasBF16 : Predicate<"Subtarget->hasBF16()">,
+ AssemblerPredicate<(all_of FeatureBF16),"BFloat16 floating point extension">;
def HasDivideInThumb : Predicate<"Subtarget->hasDivideInThumbMode()">,
AssemblerPredicate<(all_of FeatureHWDivThumb), "divide in THUMB">;
def HasDivideInARM : Predicate<"Subtarget->hasDivideInARMMode()">,
ARMv83a,
ARMv84a,
ARMv85a,
+ ARMv86a,
ARMv8a,
ARMv8mBaseline,
ARMv8mMainline,
bool HasV8_3aOps = false;
bool HasV8_4aOps = false;
bool HasV8_5aOps = false;
+ bool HasV8_6aOps = false;
bool HasV8MBaselineOps = false;
bool HasV8MMainlineOps = false;
bool HasV8_1MMainlineOps = false;
/// HasFP16FML - True if subtarget supports half-precision FP fml operations
bool HasFP16FML = false;
+ /// HasBF16 - True if subtarget supports BFloat16 floating point operations
+ bool HasBF16 = false;
+
/// HasD32 - True if subtarget has the full 32 double precision
/// FP registers for VFPv3.
bool HasD32 = false;
bool hasV8_3aOps() const { return HasV8_3aOps; }
bool hasV8_4aOps() const { return HasV8_4aOps; }
bool hasV8_5aOps() const { return HasV8_5aOps; }
+ bool hasV8_6aOps() const { return HasV8_6aOps; }
bool hasV8MBaselineOps() const { return HasV8MBaselineOps; }
bool hasV8MMainlineOps() const { return HasV8MMainlineOps; }
bool hasV8_1MMainlineOps() const { return HasV8_1MMainlineOps; }
Mnemonic == "vrintp" || Mnemonic == "vrintm" || Mnemonic == "hvc" ||
Mnemonic.startswith("vsel") || Mnemonic == "vins" || Mnemonic == "vmovx" ||
Mnemonic == "bxns" || Mnemonic == "blxns" ||
+ Mnemonic == "vdot" || Mnemonic == "vmmla" ||
Mnemonic == "vudot" || Mnemonic == "vsdot" ||
Mnemonic == "vcmla" || Mnemonic == "vcadd" ||
Mnemonic == "vfmal" || Mnemonic == "vfmsl" ||
Mnemonic == "vudot" || Mnemonic == "vsdot" ||
Mnemonic == "vcmla" || Mnemonic == "vcadd" ||
Mnemonic == "vfmal" || Mnemonic == "vfmsl" ||
+ Mnemonic == "vfmat" || Mnemonic == "vfmab" ||
+ Mnemonic == "vdot" || Mnemonic == "vmmla" ||
Mnemonic == "sb" || Mnemonic == "ssbb" ||
Mnemonic == "pssbb" ||
Mnemonic == "bfcsel" || Mnemonic == "wls" ||
case ARM::ArchKind::ARMV8_3A:
case ARM::ArchKind::ARMV8_4A:
case ARM::ArchKind::ARMV8_5A:
+ case ARM::ArchKind::ARMV8_6A:
setAttributeItem(CPU_arch_profile, ApplicationProfile, false);
setAttributeItem(ARM_ISA_use, Allowed, false);
setAttributeItem(THUMB_ISA_use, AllowThumb32, false);
--- /dev/null
+// RUN: not llvm-mc -triple=aarch64 -mattr=+sve,bf16 2>&1 < %s| FileCheck %s
+
+bfcvt z0.s, p0/m, z1.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: bfcvt z0.s, p0/m, z1.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+bfcvt z0.h, p0/m, z1.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: bfcvt z0.h, p0/m, z1.h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+bfcvt z0.h, p0/z, z1.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: bfcvt z0.h, p0/z, z1.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+bfcvt z0.h, p8/m, z1.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate register, expected p0..p7 (without element suffix)
+// CHECK-NEXT: bfcvt z0.h, p8/m, z1.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0.h, p0/m, z7.h
+bfcvt z0.h, p0/m, z1.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a predicated movprfx with a different element size
+// CHECK-NEXT: bfcvt z0.h, p0/m, z1.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
--- /dev/null
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve,+bf16 < %s \
+// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN: | FileCheck %s --check-prefix=CHECK-ERROR
+
+bfcvt z0.H, p0/m, z1.S
+// CHECK-INST: bfcvt z0.h, p0/m, z1.s
+// CHECK-ENCODING: [0x20,0xa0,0x8a,0x65]
+// CHECK-ERROR: instruction requires: bf16 sve
+
+movprfx z0.S, p0/m, z2.S
+// CHECK-INST: movprfx z0.s, p0/m, z2.s
+// CHECK-ENCODING: [0x40,0x20,0x91,0x04]
+// CHECK-ERROR: instruction requires: sve
+
+bfcvt z0.H, p0/m, z1.S
+// CHECK-INST: bfcvt z0.h, p0/m, z1.s
+// CHECK-ENCODING: [0x20,0xa0,0x8a,0x65]
+// CHECK-ERROR: instruction requires: bf16 sve
+
+movprfx z0, z2
+// CHECK-INST: movprfx z0, z2
+// CHECK-ENCODING: [0x40,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+
+bfcvt z0.H, p0/m, z1.S
+// CHECK-INST: bfcvt z0.h, p0/m, z1.s
+// CHECK-ENCODING: [0x20,0xa0,0x8a,0x65]
+// CHECK-ERROR: instruction requires: bf16 sve
--- /dev/null
+// RUN: not llvm-mc -triple=aarch64 -mattr=+sve,bf16 2>&1 < %s| FileCheck %s
+
+bfcvtnt z0.s, p0/m, z1.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: bfcvtnt z0.s, p0/m, z1.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+bfcvtnt z0.h, p0/m, z1.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: bfcvtnt z0.h, p0/m, z1.h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+bfcvtnt z0.h, p0/z, z1.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: bfcvtnt z0.h, p0/z, z1.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+bfcvtnt z0.h, p8/m, z1.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate register, expected p0..p7 (without element suffix)
+// CHECK-NEXT: bfcvtnt z0.h, p8/m, z1.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0.h, p0/m, z7.h
+bfcvtnt z0.h, p0/m, z1.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a predicated movprfx with a different element size
+// CHECK-NEXT: bfcvtnt z0.h, p0/m, z1.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
--- /dev/null
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve,+bf16 < %s \
+// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN: | FileCheck %s --check-prefix=CHECK-ERROR
+
+bfcvtnt z0.H, p0/m, z1.S
+// CHECK-INST: bfcvtnt z0.h, p0/m, z1.s
+// CHECK-ENCODING: [0x20,0xa0,0x8a,0x64]
+// CHECK-ERROR: instruction requires: bf16 sve
+
+movprfx z0.S, p0/m, z2.S
+// CHECK-INST: movprfx z0.s, p0/m, z2.s
+// CHECK-ENCODING: [0x40,0x20,0x91,0x04]
+// CHECK-ERROR: instruction requires: sve
+
+bfcvtnt z0.H, p0/m, z1.S
+// CHECK-INST: bfcvtnt z0.h, p0/m, z1.s
+// CHECK-ENCODING: [0x20,0xa0,0x8a,0x64]
+// CHECK-ERROR: instruction requires: bf16 sve
+
+movprfx z0, z2
+// CHECK-INST: movprfx z0, z2
+// CHECK-ENCODING: [0x40,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+
+bfcvtnt z0.H, p0/m, z1.S
+// CHECK-INST: bfcvtnt z0.h, p0/m, z1.s
+// CHECK-ENCODING: [0x20,0xa0,0x8a,0x64]
+// CHECK-ERROR: instruction requires: bf16 sve
--- /dev/null
+// RUN: not llvm-mc -triple=aarch64 -mattr=+sve,bf16 2>&1 < %s| FileCheck %s
+
+bfdot z0.s, z1.s, z2.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: bfdot z0.s, z1.s, z2.h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+bfdot z0.h, z1.h, z2.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: bfdot z0.h, z1.h, z2.h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+bfdot z0.s, z1.h, z2.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected z0.h..z7.h
+// CHECK-NEXT: bfdot z0.s, z1.h, z2.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0.s, p0/m, z7.s
+bfdot z0.s, z1.h, z2.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a predicated movprfx, suggest using unpredicated movprfx
+// CHECK-NEXT: bfdot z0.s, z1.h, z2.h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+bfdot z0.s, z1.s, z2.h[0]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: bfdot z0.s, z1.s, z2.h[0]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+bfdot z0.h, z1.h, z2.h[0]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: bfdot z0.h, z1.h, z2.h[0]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+bfdot z0.s, z1.h, z2.s[0]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected z0.h..z7.h
+// CHECK-NEXT: bfdot z0.s, z1.h, z2.s[0]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+bfdot z0.s, z1.h, z8.h[0]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: bfdot z0.s, z1.h, z8.h[0]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+bfdot z0.s, z1.h, z2.h[4]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 3].
+// CHECK-NEXT: bfdot z0.s, z1.h, z2.h[4]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0.s, p0/m, z7.s
+bfdot z0.s, z1.h, z2.h[0]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a predicated movprfx, suggest using unpredicated movprfx
+// CHECK-NEXT: bfdot z0.s, z1.h, z2.h[0]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
--- /dev/null
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve,+bf16 < %s \
+// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN: | FileCheck %s --check-prefix=CHECK-ERROR
+
+bfdot z0.S, z1.H, z2.H
+// CHECK-INST: bfdot z0.s, z1.h, z2.h
+// CHECK-ENCODING: [0x20,0x80,0x62,0x64]
+// CHECK-ERROR: instruction requires: bf16 sve
+
+bfdot z0.S, z1.H, z2.H[0]
+// CHECK-INST: bfdot z0.s, z1.h, z2.h[0]
+// CHECK-ENCODING: [0x20,0x40,0x62,0x64]
+// CHECK-ERROR: instruction requires: bf16 sve
+
+bfdot z0.S, z1.H, z2.H[3]
+// CHECK-INST: bfdot z0.s, z1.h, z2.h[3]
+// CHECK-ENCODING: [0x20,0x40,0x7a,0x64]
+// CHECK-ERROR: instruction requires: bf16 sve
+
+// --------------------------------------------------------------------------//
+// Test compatibility with MOVPRFX instruction.
+
+movprfx z0, z7
+// CHECK-INST: movprfx z0, z7
+// CHECK-ENCODING: [0xe0,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+
+bfdot z0.S, z1.H, z2.H
+// CHECK-INST: bfdot z0.s, z1.h, z2.h
+// CHECK-ENCODING: [0x20,0x80,0x62,0x64]
+// CHECK-ERROR: instruction requires: bf16 sve
+
+movprfx z0, z7
+// CHECK-INST: movprfx z0, z7
+// CHECK-ENCODING: [0xe0,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+
+bfdot z0.S, z1.H, z2.H[0]
+// CHECK-INST: bfdot z0.s, z1.h, z2.h[0]
+// CHECK-ENCODING: [0x20,0x40,0x62,0x64]
+// CHECK-ERROR: instruction requires: bf16 sve
+
+movprfx z0, z7
+// CHECK-INST: movprfx z0, z7
+// CHECK-ENCODING: [0xe0,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+
+bfdot z0.S, z1.H, z2.H[3]
+// CHECK-INST: bfdot z0.s, z1.h, z2.h[3]
+// CHECK-ENCODING: [0x20,0x40,0x7a,0x64]
+// CHECK-ERROR: instruction requires: bf16 sve
--- /dev/null
+// RUN: not llvm-mc -o - -triple=aarch64 -mattr=+sve,bf16 2>&1 %s | FileCheck %s
+
+bfmlalb z0.S, z1.H, z7.H[8]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 7].
+// CHECK-NEXT: bfmlalb z0.S, z1.H, z7.H[8]
+// CHECK-NEXT: ^
+
+bfmlalb z0.S, z1.H, z8.H[7]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: bfmlalb z0.S, z1.H, z8.H[7]
+// CHECK-NEXT: ^
+
+bfmlalt z0.S, z1.H, z7.H[8]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 7].
+// CHECK-NEXT: bfmlalt z0.S, z1.H, z7.H[8]
+// CHECK-NEXT: ^
+
+bfmlalt z0.S, z1.H, z8.H[7]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: bfmlalt z0.S, z1.H, z8.H[7]
+// CHECK-NEXT: ^
+
+bfmlalt z0.S, z1.H, z7.2h[2]
+// CHECK: error: invalid vector kind qualifier
+// CHECK-NEXT: bfmlalt z0.S, z1.H, z7.2h[2]
+// CHECK-NEXT: ^
+
+bfmlalt z0.S, z1.H, z2.s[2]
+// CHECK: error: Invalid restricted vector register, expected z0.h..z7.h
+// CHECK-NEXT: bfmlalt z0.S, z1.H, z2.s[2]
+// CHECK-NEXT: ^
+
+bfmlalt z0.S, z1.s, z2.h[2]
+// CHECK: error: invalid element width
+// CHECK-NEXT: bfmlalt z0.S, z1.s, z2.h[2]
+// CHECK-NEXT: ^
+
+movprfx z0.s, p0/m, z7.s
+bfmlalt z0.s, z1.h, z2.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a predicated movprfx
+// CHECK-NEXT: bfmlalt z0.s, z1.h, z2.h
+// CHECK-NEXT: ^
--- /dev/null
+// RUN: llvm-mc -o - -triple=aarch64 -show-encoding -mattr=+sve,+bf16 %s \
+// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -o - -triple=aarch64 -show-encoding %s 2>&1 \
+// RUN: | FileCheck %s --check-prefix=CHECK-ERROR
+
+bfmlalb z0.S, z1.H, z2.H
+// CHECK-INST: bfmlalb z0.s, z1.h, z2.h
+// CHECK-ENCODING: [0x20,0x80,0xe2,0x64]
+// CHECK-ERROR: instruction requires: bf16 sve
+
+bfmlalt z0.S, z1.H, z2.H
+// CHECK-INST: bfmlalt z0.s, z1.h, z2.h
+// CHECK-ENCODING: [0x20,0x84,0xe2,0x64]
+// CHECK-ERROR: instruction requires: bf16 sve
+
+bfmlalb z0.S, z1.H, z2.H[0]
+// CHECK-INST: bfmlalb z0.s, z1.h, z2.h[0]
+// CHECK-ENCODING: [0x20,0x40,0xe2,0x64]
+// CHECK-ERROR: instruction requires: bf16 sve
+
+bfmlalt z0.S, z1.H, z2.H[0]
+// CHECK-INST: bfmlalt z0.s, z1.h, z2.h[0]
+// CHECK-ENCODING: [0x20,0x44,0xe2,0x64]
+// CHECK-ERROR: instruction requires: bf16 sve
+
+bfmlalb z0.S, z1.H, z2.H[7]
+// CHECK-INST: bfmlalb z0.s, z1.h, z2.h[7]
+// CHECK-ENCODING: [0x20,0x48,0xfa,0x64]
+// CHECK-ERROR: instruction requires: bf16 sve
+
+bfmlalt z0.S, z1.H, z2.H[7]
+// CHECK-INST: bfmlalt z0.s, z1.h, z2.h[7]
+// CHECK-ENCODING: [0x20,0x4c,0xfa,0x64]
+// CHECK-ERROR: instruction requires: bf16 sve
+
+bfmlalt z0.S, z1.H, z7.H[7]
+// CHECK-INST: bfmlalt z0.s, z1.h, z7.h[7]
+// CHECK-ENCODING: [0x20,0x4c,0xff,0x64]
+// CHECK-ERROR: instruction requires: bf16 sve
+
+bfmlalb z10.S, z21.H, z14.H
+// CHECK-INST: bfmlalb z10.s, z21.h, z14.h
+// CHECK-ENCODING: [0xaa,0x82,0xee,0x64]
+// CHECK-ERROR: instruction requires: bf16 sve
+
+bfmlalt z14.S, z10.H, z21.H
+// CHECK-INST: bfmlalt z14.s, z10.h, z21.h
+// CHECK-ENCODING: [0x4e,0x85,0xf5,0x64]
+// CHECK-ERROR: instruction requires: bf16 sve
+
+bfmlalb z21.s, z14.h, z3.h[2]
+// CHECK-INST: bfmlalb z21.s, z14.h, z3.h[2]
+// CHECK-ENCODING: [0xd5,0x41,0xeb,0x64]
+// CHECK-ERROR: instruction requires: bf16 sve
+
+// --------------------------------------------------------------------------//
+// Test compatibility with MOVPRFX instruction.
+
+movprfx z0, z7
+// CHECK-INST: movprfx z0, z7
+// CHECK-ENCODING: [0xe0,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+
+bfmlalb z0.S, z1.H, z2.H
+// CHECK-INST: bfmlalb z0.s, z1.h, z2.h
+// CHECK-ENCODING: [0x20,0x80,0xe2,0x64]
+// CHECK-ERROR: instruction requires: bf16 sve
+
+movprfx z0, z7
+// CHECK-INST: movprfx z0, z7
+// CHECK-ENCODING: [0xe0,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+
+bfmlalt z0.S, z1.H, z2.H
+// CHECK-INST: bfmlalt z0.s, z1.h, z2.h
+// CHECK-ENCODING: [0x20,0x84,0xe2,0x64]
+// CHECK-ERROR: instruction requires: bf16 sve
+
+movprfx z0, z7
+// CHECK-INST: movprfx z0, z7
+// CHECK-ENCODING: [0xe0,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+
+bfmlalb z0.S, z1.H, z2.H[0]
+// CHECK-INST: bfmlalb z0.s, z1.h, z2.h[0]
+// CHECK-ENCODING: [0x20,0x40,0xe2,0x64]
+// CHECK-ERROR: instruction requires: bf16 sve
+
+movprfx z0, z7
+// CHECK-INST: movprfx z0, z7
+// CHECK-ENCODING: [0xe0,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+
+bfmlalt z0.S, z1.H, z2.H[0]
+// CHECK-INST: bfmlalt z0.s, z1.h, z2.h[0]
+// CHECK-ENCODING: [0x20,0x44,0xe2,0x64]
+// CHECK-ERROR: instruction requires: bf16 sve
+
+movprfx z0, z7
+// CHECK-INST: movprfx z0, z7
+// CHECK-ENCODING: [0xe0,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+
+bfmlalb z0.S, z1.H, z2.H[7]
+// CHECK-INST: bfmlalb z0.s, z1.h, z2.h[7]
+// CHECK-ENCODING: [0x20,0x48,0xfa,0x64]
+// CHECK-ERROR: instruction requires: bf16 sve
+
+movprfx z0, z7
+// CHECK-INST: movprfx z0, z7
+// CHECK-ENCODING: [0xe0,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+
+bfmlalt z0.S, z1.H, z2.H[7]
+// CHECK-INST: bfmlalt z0.s, z1.h, z2.h[7]
+// CHECK-ENCODING: [0x20,0x4c,0xfa,0x64]
+// CHECK-ERROR: instruction requires: bf16 sve
+
+movprfx z0, z7
+// CHECK-INST: movprfx z0, z7
+// CHECK-ENCODING: [0xe0,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+
+bfmlalt z0.S, z1.H, z7.H[7]
+// CHECK-INST: bfmlalt z0.s, z1.h, z7.h[7]
+// CHECK-ENCODING: [0x20,0x4c,0xff,0x64]
+// CHECK-ERROR: instruction requires: bf16 sve
+
+movprfx z10, z7
+// CHECK-INST: movprfx z10, z7
+// CHECK-ENCODING: [0xea,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+
+bfmlalb z10.S, z21.H, z14.H
+// CHECK-INST: bfmlalb z10.s, z21.h, z14.h
+// CHECK-ENCODING: [0xaa,0x82,0xee,0x64]
+// CHECK-ERROR: instruction requires: bf16 sve
+
+movprfx z14, z7
+// CHECK-INST: movprfx z14, z7
+// CHECK-ENCODING: [0xee,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+
+bfmlalt z14.S, z10.H, z21.H
+// CHECK-INST: bfmlalt z14.s, z10.h, z21.h
+// CHECK-ENCODING: [0x4e,0x85,0xf5,0x64]
+// CHECK-ERROR: instruction requires: bf16 sve
+
+movprfx z21, z7
+// CHECK-INST: movprfx z21, z7
+// CHECK-ENCODING: [0xf5,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+
+bfmlalb z21.s, z14.h, z3.h[2]
+// CHECK-INST: bfmlalb z21.s, z14.h, z3.h[2]
+// CHECK-ENCODING: [0xd5,0x41,0xeb,0x64]
+// CHECK-ERROR: instruction requires: bf16 sve
--- /dev/null
+// RUN: not llvm-mc -triple=aarch64 -mattr=+sve,bf16 2>&1 < %s| FileCheck %s
+
+bfmmla z0.s, z1.s, z2.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: bfmmla z0.s, z1.s, z2.h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+bfmmla z0.h, z1.h, z2.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: bfmmla z0.h, z1.h, z2.h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+bfmmla z0.s, z1.h, z2.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: bfmmla z0.s, z1.h, z2.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0.s, p0/m, z7.s
+bfmmla z0.s, z1.h, z2.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a predicated movprfx, suggest using unpredicated movprfx
+// CHECK-NEXT: bfmmla z0.s, z1.h, z2.h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
--- /dev/null
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve,+bf16 < %s \
+// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN: | FileCheck %s --check-prefix=CHECK-ERROR
+
+bfmmla z0.S, z1.H, z2.H
+// CHECK-INST: bfmmla z0.s, z1.h, z2.h
+// CHECK-ENCODING: [0x20,0xe4,0x62,0x64]
+// CHECK-ERROR: instruction requires: bf16 sve
+
+// --------------------------------------------------------------------------//
+// Test compatibility with MOVPRFX instruction.
+
+movprfx z0, z7
+// CHECK-INST: movprfx z0, z7
+// CHECK-ENCODING: [0xe0,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+
+bfmmla z0.S, z1.H, z2.H
+// CHECK-INST: bfmmla z0.s, z1.h, z2.h
+// CHECK-ENCODING: [0x20,0xe4,0x62,0x64]
+// CHECK-ERROR: instruction requires: bf16 sve
--- /dev/null
+// RUN: llvm-mc -triple aarch64 -show-encoding -mattr=+bf16 < %s | FileCheck %s
+// RUN: llvm-mc -triple aarch64 -show-encoding -mattr=+v8.6a < %s | FileCheck %s
+// RUN: not llvm-mc -triple aarch64 -show-encoding -mattr=-bf16 < %s 2>&1 | FileCheck %s --check-prefix=NOBF16
+// RUN: not llvm-mc -triple aarch64 -show-encoding < %s 2>&1 | FileCheck %s --check-prefix=NOBF16
+
+
+bfdot v2.2s, v3.4h, v4.4h
+bfdot v2.4s, v3.8h, v4.8h
+// CHECK: bfdot v2.2s, v3.4h, v4.4h // encoding: [0x62,0xfc,0x44,0x2e]
+// CHECK: bfdot v2.4s, v3.8h, v4.8h // encoding: [0x62,0xfc,0x44,0x6e]
+// NOBF16: instruction requires: bf16
+// NOBF16-NEXT: bfdot v2.2s, v3.4h, v4.4h
+// NOBF16: instruction requires: bf16
+// NOBF16-NEXT: bfdot v2.4s, v3.8h, v4.8h
+
+bfdot v2.2s, v3.4h, v4.2h[0]
+bfdot v2.2s, v3.4h, v4.2h[1]
+bfdot v2.2s, v3.4h, v4.2h[2]
+bfdot v2.2s, v3.4h, v4.2h[3]
+// CHECK: bfdot v2.2s, v3.4h, v4.2h[0] // encoding: [0x62,0xf0,0x44,0x0f]
+// CHECK: bfdot v2.2s, v3.4h, v4.2h[1] // encoding: [0x62,0xf0,0x64,0x0f]
+// CHECK: bfdot v2.2s, v3.4h, v4.2h[2] // encoding: [0x62,0xf8,0x44,0x0f]
+// CHECK: bfdot v2.2s, v3.4h, v4.2h[3] // encoding: [0x62,0xf8,0x64,0x0f]
+// NOBF16: instruction requires: bf16
+// NOBF16-NEXT: bfdot v2.2s, v3.4h, v4.2h[0]
+// NOBF16: instruction requires: bf16
+// NOBF16-NEXT: bfdot v2.2s, v3.4h, v4.2h[1]
+// NOBF16: instruction requires: bf16
+// NOBF16-NEXT: bfdot v2.2s, v3.4h, v4.2h[2]
+// NOBF16: instruction requires: bf16
+// NOBF16-NEXT: bfdot v2.2s, v3.4h, v4.2h[3]
+
+
+bfdot v2.4s, v3.8h, v4.2h[0]
+bfdot v2.4s, v3.8h, v4.2h[1]
+bfdot v2.4s, v3.8h, v4.2h[2]
+bfdot v2.4s, v3.8h, v4.2h[3]
+// CHECK: bfdot v2.4s, v3.8h, v4.2h[0] // encoding: [0x62,0xf0,0x44,0x4f]
+// CHECK: bfdot v2.4s, v3.8h, v4.2h[1] // encoding: [0x62,0xf0,0x64,0x4f]
+// CHECK: bfdot v2.4s, v3.8h, v4.2h[2] // encoding: [0x62,0xf8,0x44,0x4f]
+// CHECK: bfdot v2.4s, v3.8h, v4.2h[3] // encoding: [0x62,0xf8,0x64,0x4f]
+// NOBF16: instruction requires: bf16
+// NOBF16-NEXT: bfdot v2.4s, v3.8h, v4.2h[0]
+// NOBF16: instruction requires: bf16
+// NOBF16-NEXT: bfdot v2.4s, v3.8h, v4.2h[1]
+// NOBF16: instruction requires: bf16
+// NOBF16-NEXT: bfdot v2.4s, v3.8h, v4.2h[2]
+// NOBF16: instruction requires: bf16
+// NOBF16-NEXT: bfdot v2.4s, v3.8h, v4.2h[3]
+
+
+bfmmla v2.4s, v3.8h, v4.8h
+bfmmla v3.4s, v4.8h, v5.8h
+// CHECK: bfmmla v2.4s, v3.8h, v4.8h // encoding: [0x62,0xec,0x44,0x6e]
+// CHECK: bfmmla v3.4s, v4.8h, v5.8h // encoding: [0x83,0xec,0x45,0x6e]
+// NOBF16: instruction requires: bf16
+// NOBF16-NEXT: bfmmla v2.4s, v3.8h, v4.8h
+// NOBF16: instruction requires: bf16
+// NOBF16-NEXT: bfmmla v3.4s, v4.8h, v5.8h
+
+bfcvtn v5.4h, v5.4s
+bfcvtn2 v5.8h, v5.4s
+// CHECK: bfcvtn v5.4h, v5.4s // encoding: [0xa5,0x68,0xa1,0x0e]
+// CHECK: bfcvtn2 v5.8h, v5.4s // encoding: [0xa5,0x68,0xa1,0x4e]
+// NOBF16: instruction requires: bf16
+// NOBF16-NEXT: bfcvtn v5.4h, v5.4s
+// NOBF16: instruction requires: bf16
+// NOBF16-NEXT: bfcvtn2 v5.8h, v5.4s
+
+bfcvt h5, s3
+// CHECK: bfcvt h5, s3 // encoding: [0x65,0x40,0x63,0x1e]
+// NOBF16: instruction requires: bf16
+// NOBF16-NEXT: bfcvt h5, s3
+
+bfmlalb V10.4S, V21.8h, V14.8H
+bfmlalt V21.4S, V14.8h, V10.8H
+// CHECK: bfmlalb v10.4s, v21.8h, v14.8h // encoding: [0xaa,0xfe,0xce,0x2e]
+// CHECK-NEXT: bfmlalt v21.4s, v14.8h, v10.8h // encoding: [0xd5,0xfd,0xca,0x6e]
+// NOBF16: error: instruction requires: bf16
+// NOBF16-NEXT: bfmlalb V10.4S, V21.8h, V14.8H
+// NOBF16-NEXT: ^
+// NOBF16: instruction requires: bf16
+// NOBF16-NEXT: bfmlalt V21.4S, V14.8h, V10.8H
+// NOBF16-NEXT: ^
+
+bfmlalb V14.4S, V21.8H, V10.H[1]
+bfmlalb V14.4S, V21.8H, V10.H[2]
+bfmlalb V14.4S, V21.8H, V10.H[7]
+bfmlalt V21.4S, V10.8H, V14.H[1]
+bfmlalt V21.4S, V10.8H, V14.H[2]
+bfmlalt V21.4S, V10.8H, V14.H[7]
+// CHECK: bfmlalb v14.4s, v21.8h, v10.h[1] // encoding: [0xae,0xf2,0xda,0x0f]
+// CHECK-NEXT: bfmlalb v14.4s, v21.8h, v10.h[2] // encoding: [0xae,0xf2,0xea,0x0f]
+// CHECK-NEXT: bfmlalb v14.4s, v21.8h, v10.h[7] // encoding: [0xae,0xfa,0xfa,0x0f]
+// CHECK-NEXT: bfmlalt v21.4s, v10.8h, v14.h[1] // encoding: [0x55,0xf1,0xde,0x4f]
+// CHECK-NEXT: bfmlalt v21.4s, v10.8h, v14.h[2] // encoding: [0x55,0xf1,0xee,0x4f]
+// CHECK-NEXT: bfmlalt v21.4s, v10.8h, v14.h[7] // encoding: [0x55,0xf9,0xfe,0x4f]
+// NOBF16: error: instruction requires: bf16
+// NOBF16-NEXT: bfmlalb V14.4S, V21.8H, V10.H[1]
+// NOBF16-NEXT: ^
+// NOBF16: error: instruction requires: bf16
+// NOBF16-NEXT: bfmlalb V14.4S, V21.8H, V10.H[2]
+// NOBF16-NEXT: ^
+// NOBF16: error: instruction requires: bf16
+// NOBF16-NEXT: bfmlalb V14.4S, V21.8H, V10.H[7]
+// NOBF16-NEXT: ^
+// NOBF16: instruction requires: bf16
+// NOBF16-NEXT: bfmlalt V21.4S, V10.8H, V14.H[1]
+// NOBF16-NEXT: ^
+// NOBF16: instruction requires: bf16
+// NOBF16-NEXT: bfmlalt V21.4S, V10.8H, V14.H[2]
+// NOBF16-NEXT: ^
+// NOBF16: instruction requires: bf16
+// NOBF16-NEXT: bfmlalt V21.4S, V10.8H, V14.H[7]
+// NOBF16-NEXT: ^
--- /dev/null
+// RUN: not llvm-mc -triple arm -mattr=+bf16,-neon %s -o /dev/null 2>&1 | FileCheck %s --check-prefixes=NONEON,ALL
+// RUN: not llvm-mc -triple arm -mattr=-bf16 %s -o /dev/null 2>&1 | FileCheck %s --check-prefixes=NOBF16,ALL
+// RUN: not llvm-mc -triple arm %s -o /dev/null 2>&1 | FileCheck %s --check-prefixes=NONEON,ALL
+//
+vdot.bf16 d3, d4, d5
+vdot.bf16 q0, q1, q2
+vdot.bf16 d3, d4, d5[1]
+vdot.bf16 q0, q1, d5[1]
+vmmla.bf16 q0, q1, q2
+vcvt.bf16.f32 d1, q3
+vcvtbeq.bf16.f32 s1, s3
+vcvttne.bf16.f32 s1, s3
+// NOBF16: error: instruction requires: BFloat16 floating point extension
+// NOBF16-NEXT: vdot.bf16 d3, d4, d5
+// NOBF16-NEXT: ^
+// NOBF16-NEXT: error: instruction requires: BFloat16 floating point extension
+// NOBF16-NEXT: vdot.bf16 q0, q1, q2
+// NOBF16-NEXT: ^
+// NOBF16-NEXT: error: instruction requires: BFloat16 floating point extension
+// NOBF16-NEXT: vdot.bf16 d3, d4, d5[1]
+// NOBF16-NEXT: ^
+// NOBF16-NEXT: error: instruction requires: BFloat16 floating point extension
+// NOBF16-NEXT: vdot.bf16 q0, q1, d5[1]
+// NOBF16-NEXT: ^
+// NOBF16-NEXT: error: instruction requires: BFloat16 floating point extension
+// NOBF16-NEXT: vmmla.bf16 q0, q1, q2
+// NOBF16-NEXT: ^
+// NOBF16-NEXT: error: instruction requires: BFloat16 floating point extension
+// NOBF16-NEXT: vcvt.bf16.f32 d1, q3
+// NOBF16-NEXT: ^
+
+// NONEON: error: instruction requires: BFloat16 floating point extension NEON
+// NONEON-NEXT: vdot.bf16 d3, d4, d5
+// NONEON-NEXT: ^
+// NONEON-NEXT: error: instruction requires: BFloat16 floating point extension NEON
+// NONEON-NEXT: vdot.bf16 q0, q1, q2
+// NONEON-NEXT: ^
+// NONEON-NEXT: error: instruction requires: BFloat16 floating point extension NEON
+// NONEON-NEXT: vdot.bf16 d3, d4, d5[1]
+// NONEON-NEXT: ^
+// NONEON-NEXT: error: instruction requires: BFloat16 floating point extension NEON
+// NONEON-NEXT: vdot.bf16 q0, q1, d5[1]
+// NONEON-NEXT: ^
+// NONEON-NEXT: error: instruction requires: BFloat16 floating point extension NEON
+// NONEON-NEXT: vmmla.bf16 q0, q1, q2
+// NONEON-NEXT: ^
+// NONEON-NEXT: error: instruction requires: BFloat16 floating point extension NEON
+// NONEON-NEXT: vcvt.bf16.f32 d1, q3
+// NONEON-NEXT: ^
+
+
+// ALL-NEXT: error: instruction requires: BFloat16 floating point extension
+// ALL-NEXT: vcvtbeq.bf16.f32 s1, s3
+// ALL-NEXT: ^
+// ALL-NEXT: error: instruction requires: BFloat16 floating point extension
+// ALL-NEXT: vcvttne.bf16.f32 s1, s3
+// ALL-NEXT: ^
--- /dev/null
+// RUN: not llvm-mc -o - -triple arm -mattr=+v8.6a -show-encoding %s 2>&1 | FileCheck %s
+vfmat.bf16 d0, d0, d0
+vfmat.bf16 d0, d0, q0
+vfmat.bf16 d0, q0, d0
+vfmat.bf16 q0, d0, d0
+vfmat.bf16 q0, q0, d0
+vfmat.bf16 q0, d0, q0
+vfmat.bf16 d0, q0, q0
+vfmat.bf16 q0, q0, q0[3]
+vfmat.bf16 q0, q0, q0[3]
+vfmat.bf16 q0, d0, d0[0]
+vfmat.bf16 d0, q0, d0[0]
+vfmat.bf16 q0, d0, d0[9]
+
+vfmab.bf16 d0, d0, d0
+vfmab.bf16 d0, d0, q0
+vfmab.bf16 d0, q0, d0
+vfmab.bf16 q0, d0, d0
+vfmab.bf16 q0, q0, d0
+vfmab.bf16 q0, d0, q0
+vfmab.bf16 d0, q0, q0
+vfmab.bf16 q0, q0, q0[3]
+vfmab.bf16 q0, q0, q0[3]
+vfmab.bf16 q0, d0, d0[0]
+vfmab.bf16 d0, q0, d0[0]
+vfmab.bf16 q0, d0, d0[9]
+
+//CHECK:error: invalid instruction
+//CHECK-NEXT:vfmat.bf16 d0, d0, d0
+//CHECK-NEXT:^
+//CHECK-NEXT:error: invalid instruction
+//CHECK-NEXT:vfmat.bf16 d0, d0, q0
+//CHECK-NEXT:^
+//CHECK-NEXT:error: invalid instruction
+//CHECK-NEXT:vfmat.bf16 d0, q0, d0
+//CHECK-NEXT:^
+//CHECK-NEXT:error: invalid instruction
+//CHECK-NEXT:vfmat.bf16 q0, d0, d0
+//CHECK-NEXT:^
+//CHECK-NEXT:error: invalid instruction, any one of the following would fix this:
+//CHECK-NEXT:vfmat.bf16 q0, q0, d0
+//CHECK-NEXT:^
+//CHECK-NEXT:note: too few operands for instruction
+//CHECK-NEXT:vfmat.bf16 q0, q0, d0
+//CHECK-NEXT: ^
+//CHECK-NEXT:note: operand must be a register in range [q0, q15]
+//CHECK-NEXT:vfmat.bf16 q0, q0, d0
+//CHECK-NEXT: ^
+//CHECK-NEXT:error: operand must be a register in range [q0, q15]
+//CHECK-NEXT:vfmat.bf16 q0, d0, q0
+//CHECK-NEXT: ^
+//CHECK-NEXT:error: operand must be a register in range [q0, q15]
+//CHECK-NEXT:vfmat.bf16 d0, q0, q0
+//CHECK-NEXT: ^
+//CHECK-NEXT:error: invalid instruction, any one of the following would fix this:
+//CHECK-NEXT:vfmat.bf16 q0, q0, q0[3]
+//CHECK-NEXT:^
+//CHECK-NEXT:note: operand must be a register in range [d0, d7]
+//CHECK-NEXT:vfmat.bf16 q0, q0, q0[3]
+//CHECK-NEXT: ^
+//CHECK-NEXT:note: too many operands for instruction
+//CHECK-NEXT:vfmat.bf16 q0, q0, q0[3]
+//CHECK-NEXT: ^
+//CHECK-NEXT:error: invalid instruction, any one of the following would fix this:
+//CHECK-NEXT:vfmat.bf16 q0, q0, q0[3]
+//CHECK-NEXT:^
+//CHECK-NEXT:note: operand must be a register in range [d0, d7]
+//CHECK-NEXT:vfmat.bf16 q0, q0, q0[3]
+//CHECK-NEXT: ^
+//CHECK-NEXT:note: too many operands for instruction
+//CHECK-NEXT:vfmat.bf16 q0, q0, q0[3]
+//CHECK-NEXT: ^
+//CHECK-NEXT:error: operand must be a register in range [q0, q15]
+//CHECK-NEXT:vfmat.bf16 q0, d0, d0[0]
+//CHECK-NEXT: ^
+//CHECK-NEXT:error: operand must be a register in range [q0, q15]
+//CHECK-NEXT:vfmat.bf16 d0, q0, d0[0]
+//CHECK-NEXT: ^
+//CHECK-NEXT:error: invalid instruction
+//CHECK-NEXT:vfmat.bf16 q0, d0, d0[9]
+//CHECK-NEXT:^
+//CHECK-NEXT:error: invalid instruction
+//CHECK-NEXT:vfmab.bf16 d0, d0, d0
+//CHECK-NEXT:^
+//CHECK-NEXT:error: invalid instruction
+//CHECK-NEXT:vfmab.bf16 d0, d0, q0
+//CHECK-NEXT:^
+//CHECK-NEXT:error: invalid instruction
+//CHECK-NEXT:vfmab.bf16 d0, q0, d0
+//CHECK-NEXT:^
+//CHECK-NEXT:error: invalid instruction
+//CHECK-NEXT:vfmab.bf16 q0, d0, d0
+//CHECK-NEXT:^
+//CHECK-NEXT:error: invalid instruction, any one of the following would fix this:
+//CHECK-NEXT:vfmab.bf16 q0, q0, d0
+//CHECK-NEXT:^
+//CHECK-NEXT:note: too few operands for instruction
+//CHECK-NEXT:vfmab.bf16 q0, q0, d0
+//CHECK-NEXT: ^
+//CHECK-NEXT:note: operand must be a register in range [q0, q15]
+//CHECK-NEXT:vfmab.bf16 q0, q0, d0
+//CHECK-NEXT: ^
+//CHECK-NEXT:error: operand must be a register in range [q0, q15]
+//CHECK-NEXT:vfmab.bf16 q0, d0, q0
+//CHECK-NEXT: ^
+//CHECK-NEXT:error: operand must be a register in range [q0, q15]
+//CHECK-NEXT:vfmab.bf16 d0, q0, q0
+//CHECK-NEXT: ^
+//CHECK-NEXT:error: invalid instruction, any one of the following would fix this:
+//CHECK-NEXT:vfmab.bf16 q0, q0, q0[3]
+//CHECK-NEXT:^
+//CHECK-NEXT:note: operand must be a register in range [d0, d7]
+//CHECK-NEXT:vfmab.bf16 q0, q0, q0[3]
+//CHECK-NEXT: ^
+//CHECK-NEXT:note: too many operands for instruction
+//CHECK-NEXT:vfmab.bf16 q0, q0, q0[3]
+//CHECK-NEXT: ^
+//CHECK-NEXT:error: invalid instruction, any one of the following would fix this:
+//CHECK-NEXT:vfmab.bf16 q0, q0, q0[3]
+//CHECK-NEXT:^
+//CHECK-NEXT:note: operand must be a register in range [d0, d7]
+//CHECK-NEXT:vfmab.bf16 q0, q0, q0[3]
+//CHECK-NEXT: ^
+//CHECK-NEXT:note: too many operands for instruction
+//CHECK-NEXT:vfmab.bf16 q0, q0, q0[3]
+//CHECK-NEXT: ^
+//CHECK-NEXT:error: operand must be a register in range [q0, q15]
+//CHECK-NEXT:vfmab.bf16 q0, d0, d0[0]
+//CHECK-NEXT: ^
+//CHECK-NEXT:error: operand must be a register in range [q0, q15]
+//CHECK-NEXT:vfmab.bf16 d0, q0, d0[0]
+//CHECK-NEXT: ^
+//CHECK-NEXT:error: invalid instruction
+//CHECK-NEXT:vfmab.bf16 q0, d0, d0[9]
--- /dev/null
+// RUN: llvm-mc -triple arm -mattr=+bf16,+neon -show-encoding < %s | FileCheck %s --check-prefix=CHECK
+// RUN: llvm-mc -triple arm -mattr=+v8.6a -show-encoding < %s | FileCheck %s --check-prefix=CHECK
+
+vdot.bf16 d3, d4, d5
+// CHECK: vdot.bf16 d3, d4, d5 @ encoding: [0x05,0x3d,0x04,0xfc]
+vdot.bf16 q0, q1, q2
+// CHECK-NEXT: vdot.bf16 q0, q1, q2 @ encoding: [0x44,0x0d,0x02,0xfc]
+vdot.bf16 d3, d4, d5[1]
+// CHECK-NEXT: vdot.bf16 d3, d4, d5[1] @ encoding: [0x25,0x3d,0x04,0xfe]
+vdot.bf16 q0, q1, d5[1]
+// CHECK-NEXT: vdot.bf16 q0, q1, d5[1] @ encoding: [0x65,0x0d,0x02,0xfe]
+vmmla.bf16 q0, q1, q2
+// CHECK-NEXT: vmmla.bf16 q0, q1, q2 @ encoding: [0x44,0x0c,0x02,0xfc]
+vcvt.bf16.f32 d1, q3
+// CHECK-NEXT: vcvt.bf16.f32 d1, q3 @ encoding: [0x46,0x16,0xb6,0xf3]
+vcvtbeq.bf16.f32 s1, s3
+// CHECK-NEXT: vcvtbeq.bf16.f32 s1, s3 @ encoding: [0x61,0x09,0xf3,0x0e]
+vcvttne.bf16.f32 s1, s3
+// CHECK-NEXT: vcvttne.bf16.f32 s1, s3 @ encoding: [0xe1,0x09,0xf3,0x1e]
+vfmat.bf16 q0, q0, q0
+//CHECK-NEXT: vfmat.bf16 q0, q0, q0 @ encoding: [0x50,0x08,0x30,0xfc]
+vfmat.bf16 q0, q0, q15
+//CHECK-NEXT: vfmat.bf16 q0, q0, q15 @ encoding: [0x7e,0x08,0x30,0xfc]
+vfmat.bf16 q0, q15, q0
+//CHECK-NEXT: vfmat.bf16 q0, q15, q0 @ encoding: [0xd0,0x08,0x3e,0xfc]
+vfmat.bf16 q0, q15, q15
+//CHECK-NEXT: vfmat.bf16 q0, q15, q15 @ encoding: [0xfe,0x08,0x3e,0xfc]
+vfmat.bf16 q7, q0, q0
+//CHECK-NEXT: vfmat.bf16 q7, q0, q0 @ encoding: [0x50,0xe8,0x30,0xfc]
+vfmat.bf16 q8, q0, q0
+//CHECK-NEXT: vfmat.bf16 q8, q0, q0 @ encoding: [0x50,0x08,0x70,0xfc]
+vfmab.bf16 q0, q0, q0
+//CHECK-NEXT: vfmab.bf16 q0, q0, q0 @ encoding: [0x10,0x08,0x30,0xfc]
+vfmab.bf16 q0, q0, q15
+//CHECK-NEXT: vfmab.bf16 q0, q0, q15 @ encoding: [0x3e,0x08,0x30,0xfc]
+vfmab.bf16 q0, q15, q0
+//CHECK-NEXT: vfmab.bf16 q0, q15, q0 @ encoding: [0x90,0x08,0x3e,0xfc]
+vfmab.bf16 q0, q15, q15
+//CHECK-NEXT: vfmab.bf16 q0, q15, q15 @ encoding: [0xbe,0x08,0x3e,0xfc]
+vfmab.bf16 q7, q0, q0
+//CHECK-NEXT: vfmab.bf16 q7, q0, q0 @ encoding: [0x10,0xe8,0x30,0xfc]
+vfmab.bf16 q8, q0, q0
+//CHECK-NEXT: vfmab.bf16 q8, q0, q0 @ encoding: [0x10,0x08,0x70,0xfc]
+vfmat.bf16 q0, q0, d0[0]
+//CHECK-NEXT: vfmat.bf16 q0, q0, d0[0] @ encoding: [0x50,0x08,0x30,0xfe]
+vfmat.bf16 q0, q0, d0[3]
+//CHECK-NEXT: vfmat.bf16 q0, q0, d0[3] @ encoding: [0x78,0x08,0x30,0xfe]
+vfmat.bf16 q0, q0, d7[0]
+//CHECK-NEXT: vfmat.bf16 q0, q0, d7[0] @ encoding: [0x57,0x08,0x30,0xfe]
+vfmab.bf16 q0, q0, d0[0]
+//CHECK-NEXT: vfmab.bf16 q0, q0, d0[0] @ encoding: [0x10,0x08,0x30,0xfe]
+vfmab.bf16 q0, q0, d0[3]
+//CHECK-NEXT: vfmab.bf16 q0, q0, d0[3] @ encoding: [0x38,0x08,0x30,0xfe]
+vfmab.bf16 q0, q0, d7[0]
+//CHECK-NEXT: vfmab.bf16 q0, q0, d7[0] @ encoding: [0x17,0x08,0x30,0xfe]
--- /dev/null
+// RUN: not llvm-mc -triple thumbv8 -mattr=-bf16 < %s 2>&1 | FileCheck %s
+
+vdot.bf16 d3, d4, d5
+// CHECK: instruction requires: BFloat16 floating point extension
+// CHECK-NEXT: vdot.bf16 d3, d4, d5
+
+vdot.bf16 q0, q1, q2
+// CHECK: instruction requires: BFloat16 floating point extension
+// CHECK-NEXT: vdot.bf16 q0, q1, q2
+
+vdot.bf16 d3, d4, d5[1]
+// CHECK: instruction requires: BFloat16 floating point extension
+// CHECK-NEXT: vdot.bf16 d3, d4, d5[1]
+
+vdot.bf16 q0, q1, d5[1]
+// CHECK: instruction requires: BFloat16 floating point extension
+// CHECK-NEXT: vdot.bf16 q0, q1, d5[1]
+
+vmmla.bf16 q0, q1, q2
+// CHECK: instruction requires: BFloat16 floating point extension
+// CHECK-NEXT: vmmla.bf16 q0, q1, q2
+
+vcvt.bf16.f32 d1, q3
+// CHECK: instruction requires: BFloat16 floating point extension
+// CHECK-NEXT: vcvt.bf16.f32 d1, q3
+
+vcvtbeq.bf16.f32 s1, s3
+// CHECK: note: instruction requires: BFloat16 floating point extension
+// CHECK-NEXT: vcvtbeq.bf16.f32 s1, s3
+vcvttne.bf16.f32 s1, s3
+// CHECK: note: instruction requires: BFloat16 floating point extension
+// CHECK-NEXT: vcvttne.bf16.f32 s1, s3
--- /dev/null
+// RUN: llvm-mc -triple thumbv8 -mattr=+bf16,+neon -show-encoding < %s | FileCheck %s --check-prefix=CHECK
+// RUN: llvm-mc -triple thumbv8 -mattr=+v8.6a -show-encoding < %s | FileCheck %s --check-prefix=CHECK
+
+vcvt.bf16.f32 d1, q3
+// CHECK: vcvt.bf16.f32 d1, q3 @ encoding: [0xb6,0xff,0x46,0x16]
+
+it eq
+vcvtbeq.bf16.f32 s1, s3
+// CHECK: it eq @ encoding: [0x08,0xbf]
+// CHECK-NEXT: vcvtbeq.bf16.f32 s1, s3 @ encoding: [0xf3,0xee,0x61,0x09]
+
+it ne
+vcvttne.bf16.f32 s1, s3
+// CHECK: it ne @ encoding: [0x18,0xbf]
+// CHECK: vcvttne.bf16.f32 s1, s3 @ encoding: [0xf3,0xee,0xe1,0x09]
--- /dev/null
+# RUN: llvm-mc -triple=aarch64 -mattr=+bf16 -disassemble < %s | FileCheck %s
+# RUN: llvm-mc -triple=aarch64 -mattr=+v8.6a -disassemble < %s | FileCheck %s
+# RUN: not llvm-mc -triple=aarch64 -mattr=-bf16 -disassemble < %s 2>&1 | FileCheck %s --check-prefix=NOBF16
+# RUN: not llvm-mc -triple=aarch64 -disassemble < %s 2>&1 | FileCheck %s --check-prefix=NOBF16
+
+
+[0x62,0xfc,0x44,0x2e]
+[0x62,0xfc,0x44,0x6e]
+# CHECK: bfdot v2.2s, v3.4h, v4.4h
+# CHECK: bfdot v2.4s, v3.8h, v4.8h
+# NOBF16: warning: invalid instruction encoding
+# NOBF16-NEXT: [0x62,0xfc,0x44,0x2e]
+# NOBF16: warning: invalid instruction encoding
+# NOBF16-NEXT: [0x62,0xfc,0x44,0x6e]
+
+[0x62,0xf0,0x44,0x4f]
+[0x62,0xf0,0x64,0x4f]
+[0x62,0xf8,0x44,0x4f]
+[0x62,0xf8,0x64,0x4f]
+# CHECK: bfdot v2.4s, v3.8h, v4.2h[0]
+# CHECK: bfdot v2.4s, v3.8h, v4.2h[1]
+# CHECK: bfdot v2.4s, v3.8h, v4.2h[2]
+# CHECK: bfdot v2.4s, v3.8h, v4.2h[3]
+# NOBF16: warning: invalid instruction encoding
+# NOBF-NEXT: [0x62,0xf0,0x44,0x4f]
+# NOBF16: warning: invalid instruction encoding
+# NOBF6-NEXT: [0x62,0xf0,0x64,0x4f]
+# NOBF16: warning: invalid instruction encoding
+# NOBF6-NEXT: [0x62,0xf8,0x44,0x4f]
+# NOBF16: warning: invalid instruction encoding
+# NOBF6-NEXT: [0x62,0xf8,0x64,0x4f]
+
+
+[0x62,0xf0,0x44,0x0f]
+[0x62,0xf0,0x64,0x0f]
+[0x62,0xf8,0x44,0x0f]
+[0x62,0xf8,0x64,0x0f]
+# CHECK: bfdot v2.2s, v3.4h, v4.2h[0]
+# CHECK: bfdot v2.2s, v3.4h, v4.2h[1]
+# CHECK: bfdot v2.2s, v3.4h, v4.2h[2]
+# CHECK: bfdot v2.2s, v3.4h, v4.2h[3]
+# NOBF16: warning: invalid instruction encoding
+# NOBF-NEXT: [0x62,0xf0,0x44,0x0f]
+# NOBF16: warning: invalid instruction encoding
+# NOBF6-NEXT: [0x62,0xf0,0x64,0x0f]
+# NOBF16: warning: invalid instruction encoding
+# NOBF6-NEXT: [0x62,0xf8,0x44,0x0f]
+# NOBF16: warning: invalid instruction encoding
+# NOBF6-NEXT: [0x62,0xf8,0x64,0x0f]
+
+
+[0x62,0xec,0x44,0x6e]
+[0x83,0xec,0x45,0x6e]
+# CHECK: bfmmla v2.4s, v3.8h, v4.8h
+# CHECK: bfmmla v3.4s, v4.8h, v5.8h
+# NOBF16: warning: invalid instruction encoding
+NOBF16-NEXT: [0x62,0xec,0x44,0x6e]
+# NOBF16: warning: invalid instruction encoding
+# NOBF16-NEXT: [0x83,0xec,0x45,0x6e]
+
+
+[0xa5,0x68,0xa1,0x0e]
+[0xa5,0x68,0xa1,0x4e]
+# CHECK: bfcvtn v5.4h, v5.4s
+# CHECK: bfcvtn2 v5.8h, v5.4s
+# NOBF16: warning: invalid instruction encoding
+# NOBF16-NEXT: [0xa5,0x68,0xa1,0x0e]
+# NOBF16: warning: invalid instruction encoding
+# NOBF16-NEXT: [0xa5,0x68,0xa1,0x4e]
+
+[0x65, 0x40, 0x63, 0x1e]
+# CHECK: bfcvt h5, s3
+# NOBF16: warning: invalid instruction encoding
+# NOBF16-NEXT: [0x65, 0x40, 0x63, 0x1e]
--- /dev/null
+# RUN: llvm-mc -triple arm-none-linux-gnu -mattr=+bf16,+neon --disassemble < %s | FileCheck %s
+# RUN: llvm-mc -triple arm-none-linux-gnu -mattr=+v8.6a --disassemble < %s | FileCheck %s
+# RUN: llvm-mc -triple arm-none-linux-gnu -mattr=-bf16 --disassemble < %s 2>&1 | FileCheck %s --check-prefix=NOBF16
+# RUN: llvm-mc -triple arm-none-linux-gnu --disassemble < %s 2>&1 | FileCheck %s --check-prefix=NOBF16
+#
+# Tests BFloat16 instruction decodings.
+# Without BFloat16 enabled, some of these get disassembled to coprocessor instructions.
+[0x25,0x3d,0x04,0xfe]
+# CHECK: vdot.bf16 d3, d4, d5[1]
+# NOBF16: cdp2 p13, #0, c3, c4, c5, #1
+#
+[0x65,0x0d,0x02,0xfe]
+# CHECK-NEXT: vdot.bf16 q0, q1, d5[1]
+# NOBF16-NEXT: cdp2 p13, #0, c0, c2, c5, #3
+#
+[0x61,0x09,0xf3,0x0e]
+# CHECK-NEXT: vcvtbeq.bf16.f32 s1, s3
+# NOBF16-NEXT: cdpeq p9, #15, c0, c3, c1, #3
+#
+[0xe1,0x09,0xf3,0x1e]
+# CHECK-NEXT: vcvttne.bf16.f32 s1, s3
+# NOBF16-NEXT: cdpne p9, #15, c0, c3, c1, #7
+#
+[0x50,0x08,0x30,0xfc]
+# CHECK-NEXT: vfmat.bf16 q0, q0, q0
+# NOBF16-NEXT: ldc2 p8, c0, [r0], #-320
+#
+[0x7e,0x08,0x30,0xfc]
+# CHECK-NEXT: vfmat.bf16 q0, q0, q15
+# NOBF16-NEXT: ldc2 p8, c0, [r0], #-504
+#
+[0xd0,0x08,0x3e,0xfc]
+# CHECK-NEXT: vfmat.bf16 q0, q15, q0
+# NOBF16-NEXT: ldc2 p8, c0, [lr], #-832
+#
+[0xfe,0x08,0x3e,0xfc]
+# CHECK-NEXT: vfmat.bf16 q0, q15, q15
+# NOBF16-NEXT: ldc2 p8, c0, [lr], #-1016
+#
+[0xd0,0x08,0x30,0xfc]
+# CHECK-NEXT: vfmat.bf16 q0, q8, q0
+# NOBF16-NEXT: ldc2 p8, c0, [r0], #-832
+#
+[0x50,0xe8,0x30,0xfc]
+# CHECK-NEXT: vfmat.bf16 q7, q0, q0
+# NOBF16-NEXT: ldc2 p8, c14, [r0], #-320
+#
+[0x50,0x08,0x70,0xfc]
+# CHECK-NEXT: vfmat.bf16 q8, q0, q0
+# NOBF16-NEXT: ldc2l p8, c0, [r0], #-320
+#
+[0x10,0x08,0x30,0xfc]
+# CHECK-NEXT: vfmab.bf16 q0, q0, q0
+# NOBF16-NEXT: ldc2 p8, c0, [r0], #-64
+#
+[0x3e,0x08,0x30,0xfc]
+# CHECK-NEXT: vfmab.bf16 q0, q0, q15
+# NOBF16-NEXT: ldc2 p8, c0, [r0], #-248
+#
+[0x90,0x08,0x3e,0xfc]
+# CHECK-NEXT: vfmab.bf16 q0, q15, q0
+# NOBF16-NEXT: ldc2 p8, c0, [lr], #-576
+#
+[0xbe,0x08,0x3e,0xfc]
+# CHECK-NEXT: vfmab.bf16 q0, q15, q15
+# NOBF16-NEXT: ldc2 p8, c0, [lr], #-760
+#
+[0x90,0x08,0x30,0xfc]
+# CHECK-NEXT: vfmab.bf16 q0, q8, q0
+# NOBF16-NEXT: ldc2 p8, c0, [r0], #-576
+#
+[0x10,0xe8,0x30,0xfc]
+# CHECK-NEXT: vfmab.bf16 q7, q0, q0
+# NOBF16-NEXT: ldc2 p8, c14, [r0], #-64
+#
+[0x10,0x08,0x70,0xfc]
+# CHECK-NEXT: vfmab.bf16 q8, q0, q0
+# NOBF16-NEXT: ldc2l p8, c0, [r0], #-64
+#
+[0x50,0x08,0x30,0xfe]
+# CHECK-NEXT: vfmat.bf16 q0, q0, d0[0]
+# NOBF16-NEXT: mrc2 p8, #1, r0, c0, c0, #2
+#
+[0x78,0x08,0x30,0xfe]
+# CHECK-NEXT: vfmat.bf16 q0, q0, d0[3]
+# NOBF16-NEXT: mrc2 p8, #1, r0, c0, c8, #3
+[0x57,0x08,0x30,0xfe]
+#
+# CHECK-NEXT: vfmat.bf16 q0, q0, d7[0]
+# NOBF16-NEXT: mrc2 p8, #1, r0, c0, c7, #2
+[0x10,0x08,0x30,0xfe]
+#
+# CHECK-NEXT: vfmab.bf16 q0, q0, d0[0]
+# NOBF16-NEXT: mrc2 p8, #1, r0, c0, c0, #0
+[0x38,0x08,0x30,0xfe]
+#
+# CHECK-NEXT: vfmab.bf16 q0, q0, d0[3]
+# NOBF16-NEXT: mrc2 p8, #1, r0, c0, c8, #1
+#
+[0x17,0x08,0x30,0xfe]
+# CHECK-NEXT: vfmab.bf16 q0, q0, d7[0]
+# NOBF16-NEXT: mrc2 p8, #1, r0, c0, c7, #0
--- /dev/null
+# RUN: llvm-mc -triple arm-none-linux-gnu -mattr=+bf16,+neon --disassemble < %s | FileCheck %s
+# RUN: llvm-mc -triple arm-none-linux-gnu -mattr=+v8.6a --disassemble < %s | FileCheck %s
+# RUN: not llvm-mc -triple arm-none-linux-gnu -mattr=-bf16 --disassemble < %s 2>&1 | FileCheck %s --check-prefix=CHECK-NOBF16
+# RUN: not llvm-mc -triple arm-none-linux-gnu --disassemble < %s 2>&1 | FileCheck %s --check-prefix=CHECK-NOBF16
+
+[0x05,0x3d,0x04,0xfc]
+# CHECK: vdot.bf16 d3, d4, d5
+# CHECK-NOBF16: warning: invalid instruction encoding
+
+[0x44,0x0d,0x02,0xfc]
+# CHECK: vdot.bf16 q0, q1, q2
+# CHECK-NOBF16: warning: invalid instruction encoding
+
+[0x44,0x0c,0x02,0xfc]
+# CHECK: vmmla.bf16 q0, q1, q2
+# CHECK-NOBF16: warning: invalid instruction encoding
+
+[0x46,0x16,0xb6,0xf3]
+# CHECK: vcvt.bf16.f32 d1, q3
+# CHECK-ERROR: warning: invalid instruction encoding
--- /dev/null
+# RUN: llvm-mc -triple thumbv8-none-linux-gnu -mattr=+bf16,+neon --disassemble < %s | FileCheck %s
+# RUN: llvm-mc -triple thumbv8-none-linux-gnu -mattr=+v8.6a --disassemble < %s | FileCheck %s
+
+[0x04,0xfc,0x05,0x3d]
+[0x02,0xfc,0x44,0x0d]
+# CHECK: vdot.bf16 d3, d4, d5
+# CHECK: vdot.bf16 q0, q1, q2
+
+[0x04,0xfe,0x25,0x3d]
+# CHECK: vdot.bf16 d3, d4, d5[1]
+
+[0x02,0xfe,0x65,0x0d]
+# CHECK: vdot.bf16 q0, q1, d5[1]
+
+[0x02,0xfc,0x44,0x0c]
+# CHECK: vmmla.bf16 q0, q1, q2
+
+[0xb6,0xff,0x46,0x16]
+# CHECK: vcvt.bf16.f32 d1, q3
+
+[0xf3,0xee,0x61,0x09]
+# CHECK: vcvtb.bf16.f32 s1, s3
+
+[0xf3,0xee,0xe1,0x09]
+# CHECK: vcvtt.bf16.f32 s1, s3
--- /dev/null
+# RUN: not llvm-mc -triple thumbv8-none-linux-gnu -mattr=-bf16 --disassemble < %s 2>&1 | FileCheck %s
+# RUN: not llvm-mc -triple thumbv8-none-linux-gnu --disassemble < %s 2>&1 | FileCheck %s
+
+[0x04,0xfc,0x05,0x3d]
+# CHECK: warning: invalid instruction encoding
+# CHECK-NEXT: [0x04,0xfc,0x05,0x3d]
+
+[0x02,0xfc,0x44,0x0d]
+# CHECK: warning: invalid instruction encoding
+# CHECK-NEXT: [0x02,0xfc,0x44,0x0d]
+
+
+[0x04,0xfe,0x25,0x3d]
+# CHECK: warning: invalid instruction encoding
+# CHECK-NEXT: [0x04,0xfe,0x25,0x3d]
+
+
+[0x02,0xfe,0x65,0x0d]
+# CHECK: warning: invalid instruction encoding
+# CHECK-NEXT: [0x02,0xfe,0x65,0x0d]
+
+
+[0x02,0xfc,0x44,0x0c]
+# CHECK: warning: invalid instruction encoding
+# CHECK-NEXT: [0x02,0xfc,0x44,0x0c]
+
+
+[0xb6,0xff,0x46,0x16]
+# CHECK: warning: invalid instruction encoding
+# CHECK-NEXT: [0xb6,0xff,0x46,0x16]
+
+
+[0xf3,0xee,0x61,0x09]
+# CHECK: warning: invalid instruction encoding
+# CHECK-NEXT: [0xf3,0xee,0x61,0x09]
+
+
+[0xf3,0xee,0xe1,0x09]
+# CHECK: warning: invalid instruction encoding
+# CHECK-NEXT: [0xf3,0xee,0xe1,0x09]
"armv7e-m", "armv7em", "armv8-a", "armv8", "armv8a",
"armv8l", "armv8.1-a", "armv8.1a", "armv8.2-a", "armv8.2a",
"armv8.3-a", "armv8.3a", "armv8.4-a", "armv8.4a", "armv8.5-a",
- "armv8.5a", "armv8-r", "armv8r", "armv8-m.base", "armv8m.base",
- "armv8-m.main", "armv8m.main", "iwmmxt", "iwmmxt2", "xscale",
- "armv8.1-m.main",
+ "armv8.5a", "armv8.6-a", "armv8.6a", "armv8-r", "armv8r",
+ "armv8-m.base", "armv8m.base", "armv8-m.main", "armv8m.main", "iwmmxt",
+ "iwmmxt2", "xscale", "armv8.1-m.main",
};
bool testARMCPU(StringRef CPUName, StringRef ExpectedArch,
testARMArch("armv8.5-a", "generic", "v8.5a",
ARMBuildAttrs::CPUArch::v8_A));
EXPECT_TRUE(
+ testARMArch("armv8.6-a", "generic", "v8.6a",
+ ARMBuildAttrs::CPUArch::v8_A));
+ EXPECT_TRUE(
testARMArch("armv8-r", "cortex-r52", "v8r",
ARMBuildAttrs::CPUArch::v8_R));
EXPECT_TRUE(
"v7", "v7a", "v7ve", "v7hl", "v7l", "v7-r", "v7r", "v7-m",
"v7m", "v7k", "v7s", "v7e-m", "v7em", "v8-a", "v8", "v8a",
"v8l", "v8.1-a", "v8.1a", "v8.2-a", "v8.2a", "v8.3-a", "v8.3a", "v8.4-a",
- "v8.4a", "v8.5-a","v8.5a", "v8-r", "v8m.base", "v8m.main", "v8.1m.main"
+ "v8.4a", "v8.5-a","v8.5a", "v8.6-a", "v8.6a", "v8-r", "v8m.base", "v8m.main", "v8.1m.main"
};
for (unsigned i = 0; i < array_lengthof(Arch); i++) {
case ARM::ArchKind::ARMV8_3A:
case ARM::ArchKind::ARMV8_4A:
case ARM::ArchKind::ARMV8_5A:
+ case ARM::ArchKind::ARMV8_6A:
EXPECT_EQ(ARM::ProfileKind::A, ARM::parseArchProfile(ARMArch[i]));
break;
default:
ARMBuildAttrs::CPUArch::v8_A));
EXPECT_TRUE(testAArch64Arch("armv8.5-a", "generic", "v8.5a",
ARMBuildAttrs::CPUArch::v8_A));
+ EXPECT_TRUE(testAArch64Arch("armv8.6-a", "generic", "v8.6a",
+ ARMBuildAttrs::CPUArch::v8_A));
}
bool testAArch64Extension(StringRef CPUName, AArch64::ArchKind AK,