// Decode AArch64 features from string like +[no]featureA+[no]featureB+...
static bool DecodeAArch64Features(const Driver &D, StringRef text,
- std::vector<StringRef> &Features) {
+ std::vector<StringRef> &Features,
+ llvm::AArch64::ArchKind ArchKind) {
SmallVector<StringRef, 8> Split;
text.split(Split, StringRef("+"), -1, false);
D.Diag(clang::diag::err_drv_no_neon_modifier);
else
return false;
+
+ // +sve implies +f32mm if the base architecture is v8.6A
+ // it isn't the case in general that sve implies both f64mm and f32mm
+ if ((ArchKind == llvm::AArch64::ArchKind::ARMV8_6A) && Feature == "sve")
+ Features.push_back("+f32mm");
}
return true;
}
std::vector<StringRef> &Features) {
std::pair<StringRef, StringRef> Split = Mcpu.split("+");
CPU = Split.first;
+ llvm::AArch64::ArchKind ArchKind = llvm::AArch64::ArchKind::ARMV8A;
if (CPU == "native")
CPU = llvm::sys::getHostCPUName();
if (CPU == "generic") {
Features.push_back("+neon");
} else {
- llvm::AArch64::ArchKind ArchKind = llvm::AArch64::parseCPUArch(CPU);
+ ArchKind = llvm::AArch64::parseCPUArch(CPU);
if (!llvm::AArch64::getArchFeatures(ArchKind, Features))
return false;
return false;
}
- if (Split.second.size() && !DecodeAArch64Features(D, Split.second, Features))
- return false;
+ if (Split.second.size() &&
+ !DecodeAArch64Features(D, Split.second, Features, ArchKind))
+ return false;
- return true;
+ return true;
}
static bool
llvm::AArch64::ArchKind ArchKind = llvm::AArch64::parseArch(Split.first);
if (ArchKind == llvm::AArch64::ArchKind::INVALID ||
!llvm::AArch64::getArchFeatures(ArchKind, Features) ||
- (Split.second.size() && !DecodeAArch64Features(D, Split.second, Features)))
+ (Split.second.size() &&
+ !DecodeAArch64Features(D, Split.second, Features, ArchKind)))
return false;
return true;
// RUN: %clang -target aarch64 -march=armv8.5a+bf16+sve -### -c %s 2>&1 | FileCheck -check-prefixes=GENERICV85A-BF16-SVE %s
// GENERICV85A-BF16-SVE: "-target-feature" "+bf16" "-target-feature" "+sve"
+// The 8-bit integer matrix multiply extension is a mandatory component of the
+// Armv8.6-A extensions, but is permitted as an optional feature for any
+// implementation of Armv8.2-A to Armv8.5-A (inclusive)
+// RUN: %clang -target aarch64 -march=armv8.5a -### -c %s 2>&1 | FileCheck -check-prefix=NO-I8MM %s
+// RUN: %clang -target aarch64 -march=armv8.5a+i8mm -### -c %s 2>&1 | FileCheck -check-prefix=I8MM %s
+// NO-I8MM-NOT: "-target-feature" "+i8mm"
+// I8MM: "-target-feature" "+i8mm"
+
+// The 32-bit floating point matrix multiply extension is enabled by default
+// for armv8.6-a targets (or later) with SVE, and can optionally be enabled for
+// any target from armv8.2a onwards (we don't enforce not using it with earlier
+// targets).
+// RUN: %clang -target aarch64 -march=armv8.6a -### -c %s 2>&1 | FileCheck -check-prefix=NO-F32MM %s
+// RUN: %clang -target aarch64 -march=armv8.6a+sve -### -c %s 2>&1 | FileCheck -check-prefix=F32MM %s
+// RUN: %clang -target aarch64 -march=armv8.5a+f32mm -### -c %s 2>&1 | FileCheck -check-prefix=F32MM %s
+// NO-F32MM-NOT: "-target-feature" "+f32mm"
+// F32MM: "-target-feature" "+f32mm"
+
+// The 64-bit floating point matrix multiply extension is not currently enabled
+// by default for any targets, because it requires an SVE vector length >= 256
+// bits. When we add a CPU which has that, then it can be enabled by default,
+// but for now it can only be used by adding the +f64mm feature.
+// RUN: %clang -target aarch64 -march=armv8.6a -### -c %s 2>&1 | FileCheck -check-prefix=NO-F64MM %s
+// RUN: %clang -target aarch64 -march=armv8.6a+sve -### -c %s 2>&1 | FileCheck -check-prefix=NO-F64MM %s
+// RUN: %clang -target aarch64 -march=armv8.6a+f64mm -### -c %s 2>&1 | FileCheck -check-prefix=F64MM %s
+// NO-F64MM-NOT: "-target-feature" "+f64mm"
+// F64MM: "-target-feature" "+f64mm"
+
// fullfp16 is off by default for v8a, feature must not be mentioned
// RUN: %clang -target aarch64 -march=armv8a -### -c %s 2>&1 | FileCheck -check-prefix=V82ANOFP16 -check-prefix=GENERIC %s
// RUN: %clang -target aarch64 -march=armv8-a -### -c %s 2>&1 | FileCheck -check-prefix=V82ANOFP16 -check-prefix=GENERIC %s
--- /dev/null
+// RUN: %clang -### -target arm-none-none-eabi -march=armv8.5a+i8mm %s 2>&1 | FileCheck %s
+// RUN: %clang -### -target aarch64-none-none-eabi -march=armv8.5a+i8mm %s 2>&1 | FileCheck %s
+// CHECK: "-target-feature" "+i8mm"
+// CHECK-NOT: "-target-feature" "-i8mm"
+
+// RUN: %clang -### -target arm-none-none-eabi -march=armv8.6a+noi8mm %s 2>&1 | FileCheck %s --check-prefix=NOI8MM
+// RUN: %clang -### -target aarch64-none-none-eabi -march=armv8.6a+noi8mm %s 2>&1 | FileCheck %s --check-prefix=NOI8MM
+// NOI8MM: "-target-feature" "-i8mm"
+// NOI8MM-NOT: "-target-feature" "+i8mm"
+
+// RUN: %clang -### -target arm-none-none-eabi %s 2>&1 | FileCheck %s --check-prefix=ABSENT
+// RUN: %clang -### -target aarch64-none-none-eabi %s 2>&1 | FileCheck %s --check-prefix=ABSENT
+// ABSENT-NOT: "-target-feature" "+i8mm"
+// ABSENT-NOT: "-target-feature" "-i8mm"
AARCH64_ARCH_EXT_NAME("predres", AArch64::AEK_PREDRES, "+predres", "-predres")
AARCH64_ARCH_EXT_NAME("bf16", AArch64::AEK_BF16, "+bf16", "-bf16")
AARCH64_ARCH_EXT_NAME("i8mm", AArch64::AEK_I8MM, "+i8mm", "-i8mm")
+AARCH64_ARCH_EXT_NAME("f32mm", AArch64::AEK_F32MM, "+f32mm", "-f32mm")
+AARCH64_ARCH_EXT_NAME("f64mm", AArch64::AEK_F64MM, "+f64mm", "-f64mm")
AARCH64_ARCH_EXT_NAME("tme", AArch64::AEK_TME, "+tme", "-tme")
#undef AARCH64_ARCH_EXT_NAME
namespace AArch64 {
// Arch extension modifiers for CPUs.
-enum ArchExtKind : unsigned {
+enum ArchExtKind : uint64_t {
AEK_INVALID = 0,
AEK_NONE = 1,
AEK_CRC = 1 << 1,
AEK_TME = 1 << 28,
AEK_BF16 = 1 << 29,
AEK_I8MM = 1 << 30,
+ AEK_F32MM = 1ULL << 31,
+ AEK_F64MM = 1ULL << 32,
};
enum class ArchKind {
ARMBuildAttrs::CPUArch::v8_A, FK_CRYPTO_NEON_FP_ARMV8,
(ARM::AEK_SEC | ARM::AEK_MP | ARM::AEK_VIRT | ARM::AEK_HWDIVARM |
ARM::AEK_HWDIVTHUMB | ARM::AEK_DSP | ARM::AEK_CRC | ARM::AEK_RAS |
- ARM::AEK_DOTPROD | ARM::AEK_BF16 | ARM::AEK_SHA2 | ARM::AEK_AES))
+ ARM::AEK_DOTPROD | ARM::AEK_BF16 | ARM::AEK_SHA2 | ARM::AEK_AES |
+ ARM::AEK_I8MM))
ARM_ARCH("armv8-r", ARMV8R, "8-R", "v8r", ARMBuildAttrs::CPUArch::v8_R,
FK_NEON_FP_ARMV8,
(ARM::AEK_MP | ARM::AEK_VIRT | ARM::AEK_HWDIVARM | ARM::AEK_HWDIVTHUMB |
ARM_ARCH_EXT_NAME("fp16fml", ARM::AEK_FP16FML, "+fp16fml", "-fp16fml")
ARM_ARCH_EXT_NAME("bf16", ARM::AEK_BF16, "+bf16", "-bf16")
ARM_ARCH_EXT_NAME("sb", ARM::AEK_SB, "+sb", "-sb")
+ARM_ARCH_EXT_NAME("i8mm", ARM::AEK_I8MM, "+i8mm", "-i8mm")
ARM_ARCH_EXT_NAME("lob", ARM::AEK_LOB, "+lob", "-lob")
ARM_ARCH_EXT_NAME("cdecp0", ARM::AEK_CDECP0, "+cdecp0", "-cdecp0")
ARM_ARCH_EXT_NAME("cdecp1", ARM::AEK_CDECP1, "+cdecp1", "-cdecp1")
AEK_FP_DP = 1 << 18,
AEK_LOB = 1 << 19,
AEK_BF16 = 1 << 20,
- AEK_CDECP0 = 1 << 21,
- AEK_CDECP1 = 1 << 22,
- AEK_CDECP2 = 1 << 23,
- AEK_CDECP3 = 1 << 24,
- AEK_CDECP4 = 1 << 25,
- AEK_CDECP5 = 1 << 26,
- AEK_CDECP6 = 1 << 27,
- AEK_CDECP7 = 1 << 28,
+ AEK_I8MM = 1 << 21,
+ AEK_CDECP0 = 1 << 22,
+ AEK_CDECP1 = 1 << 23,
+ AEK_CDECP2 = 1 << 24,
+ AEK_CDECP3 = 1 << 25,
+ AEK_CDECP4 = 1 << 26,
+ AEK_CDECP5 = 1 << 27,
+ AEK_CDECP6 = 1 << 28,
+ AEK_CDECP7 = 1 << 29,
// Unsupported extensions.
AEK_OS = 1ULL << 59,
{"maverick", "maverick", nullptr, nullptr},
{"xscale", "noxscale", nullptr, nullptr},
{"sb", "nosb", "+sb", "-sb"},
+ {"i8mm", "noi8mm", "+i8mm", "-i8mm"},
{"mve", "nomve", "+mve", "-mve"},
{"mve.fp", "nomve.fp", "+mve.fp", "-mve.fp"}};
{"tme", "notme", "+tme", "-tme"},
{"ssbs", "nossbs", "+ssbs", "-ssbs"},
{"sb", "nosb", "+sb", "-sb"},
- {"predres", "nopredres", "+predres", "-predres"}
+ {"predres", "nopredres", "+predres", "-predres"},
+ {"i8mm", "noi8mm", "+i8mm", "-i8mm"},
+ {"f32mm", "nof32mm", "+f32mm", "-f32mm"},
+ {"f64mm", "nof64mm", "+f64mm", "-f64mm"},
};
for (unsigned i = 0; i < array_lengthof(ArchExt); i++) {