From c2c2cc13601374f987cb03dfc8ef841c64b14024 Mon Sep 17 00:00:00 2001 From: Lucas Prates Date: Wed, 28 Oct 2020 11:07:17 +0000 Subject: [PATCH] [ARM][AArch64] Adding Neoverse V1 CPU support Add support for the Neoverse V1 CPU to the ARM and AArch64 backends. This is based on patches from Mark Murray and Victor Campos. Reviewed By: dmgreen Differential Revision: https://reviews.llvm.org/D90765 --- clang/test/Driver/aarch64-cpus.c | 2 ++ clang/test/Driver/arm-cortex-cpus.c | 16 ++++++++++++++++ llvm/include/llvm/MC/SubtargetFeature.h | 2 +- llvm/include/llvm/Support/AArch64TargetParser.def | 4 ++++ llvm/include/llvm/Support/ARMTargetParser.def | 2 ++ llvm/lib/Target/AArch64/AArch64.td | 21 +++++++++++++++++++++ llvm/lib/Target/AArch64/AArch64Subtarget.cpp | 1 + llvm/lib/Target/AArch64/AArch64Subtarget.h | 5 +++-- llvm/lib/Target/ARM/ARM.td | 12 ++++++++++++ llvm/lib/Target/ARM/ARMSubtarget.cpp | 1 + llvm/lib/Target/ARM/ARMSubtarget.h | 1 + llvm/test/CodeGen/AArch64/cpus.ll | 1 + llvm/unittests/Support/TargetParserTest.cpp | 18 ++++++++++++++++-- 13 files changed, 81 insertions(+), 5 deletions(-) diff --git a/clang/test/Driver/aarch64-cpus.c b/clang/test/Driver/aarch64-cpus.c index 9cdf346..1397468 100644 --- a/clang/test/Driver/aarch64-cpus.c +++ b/clang/test/Driver/aarch64-cpus.c @@ -177,6 +177,8 @@ // CORTEXX1: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-cpu" "cortex-x1" // RUN: %clang -target aarch64 -mcpu=cortex-a78 -### -c %s 2>&1 | FileCheck -check-prefix=CORTEXA78 %s // CORTEXA78: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-cpu" "cortex-a78" +// RUN: %clang -target aarch64 -mcpu=neoverse-v1 -### -c %s 2>&1 | FileCheck -check-prefix=NEOVERSE-V1 %s +// NEOVERSE-V1: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-cpu" "neoverse-v1" // RUN: %clang -target aarch64 -mcpu=cortex-r82 -### -c %s 2>&1 | FileCheck -check-prefix=CORTEXR82 %s // CORTEXR82: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-cpu" "cortex-r82" diff --git a/clang/test/Driver/arm-cortex-cpus.c b/clang/test/Driver/arm-cortex-cpus.c index 4481ba5..5df8723 100644 --- a/clang/test/Driver/arm-cortex-cpus.c +++ b/clang/test/Driver/arm-cortex-cpus.c @@ -840,6 +840,22 @@ // CHECK-CORTEX-A76AE-SOFT: "-target-feature" "+soft-float" // CHECK-CORTEX-A76AE-SOFT: "-target-feature" "+soft-float-abi" +// RUN: %clang -target arm -mcpu=neoverse-v1 -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-CPUV84A %s +// RUN: %clang -target arm -mcpu=neoverse-v1 -mlittle-endian -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-CPUV84A %s +// CHECK-CPUV84A: "-cc1"{{.*}} "-triple" "armv8.4a-{{.*}} + +// RUN: %clang -target armeb -mcpu=neoverse-v1 -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-BE-CPUV84A %s +// RUN: %clang -target arm -mcpu=neoverse-v1 -mbig-endian -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-BE-CPUV84A %s +// CHECK-BE-CPUV84A: "-cc1"{{.*}} "-triple" "armebv8.4a-{{.*}} + +// RUN: %clang -target arm -mcpu=neoverse-v1 -mthumb -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-CPUV84A-THUMB %s +// RUN: %clang -target arm -mcpu=neoverse-v1 -mlittle-endian -mthumb -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-CPUV84A-THUMB %s +// CHECK-CPUV84A-THUMB: "-cc1"{{.*}} "-triple" "thumbv8.4a-{{.*}} + +// RUN: %clang -target armeb -mcpu=neoverse-v1 -mthumb -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-BE-CPUV84A-THUMB %s +// RUN: %clang -target arm -mcpu=neoverse-v1 -mbig-endian -mthumb -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-BE-CPUV84A-THUMB %s +// CHECK-BE-CPUV84A-THUMB: "-cc1"{{.*}} "-triple" "thumbebv8.4a-{{.*}} + // RUN: %clang -target armv8a-arm-none-eabi -mcpu=cortex-x1 -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-CORTEX-X1 %s // RUN: %clang -target armv8a-arm-none-eabi -mcpu=cortex-x1 -mfpu=crypto-neon-fp-armv8 -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-CORTEX-X1-MFPU %s // CHECK-CORTEX-X1: "-cc1"{{.*}} "-triple" "armv8.2a-{{.*}} "-target-cpu" "cortex-x1" diff --git a/llvm/include/llvm/MC/SubtargetFeature.h b/llvm/include/llvm/MC/SubtargetFeature.h index 01ea794..cc36b25 100644 --- a/llvm/include/llvm/MC/SubtargetFeature.h +++ b/llvm/include/llvm/MC/SubtargetFeature.h @@ -30,7 +30,7 @@ namespace llvm { class raw_ostream; class Triple; -const unsigned MAX_SUBTARGET_WORDS = 3; +const unsigned MAX_SUBTARGET_WORDS = 4; const unsigned MAX_SUBTARGET_FEATURES = MAX_SUBTARGET_WORDS * 64; /// Container class for subtarget features. diff --git a/llvm/include/llvm/Support/AArch64TargetParser.def b/llvm/include/llvm/Support/AArch64TargetParser.def index e6bc1a2..cbf0d5d 100644 --- a/llvm/include/llvm/Support/AArch64TargetParser.def +++ b/llvm/include/llvm/Support/AArch64TargetParser.def @@ -150,6 +150,10 @@ AARCH64_CPU_NAME("neoverse-n1", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false, (AArch64::AEK_DOTPROD | AArch64::AEK_FP16 | AArch64::AEK_PROFILE | AArch64::AEK_RAS | AArch64::AEK_RCPC | AArch64::AEK_SSBS)) +AARCH64_CPU_NAME("neoverse-v1", ARMV8_4A, FK_CRYPTO_NEON_FP_ARMV8, false, + (AArch64::AEK_RAS | AArch64::AEK_SVE | AArch64::AEK_SSBS | + AArch64::AEK_RCPC | AArch64::AEK_FP16 | AArch64::AEK_BF16 | + AArch64::AEK_DOTPROD )) AARCH64_CPU_NAME("cyclone", ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false, (AArch64::AEK_NONE)) AARCH64_CPU_NAME("apple-a7", ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false, diff --git a/llvm/include/llvm/Support/ARMTargetParser.def b/llvm/include/llvm/Support/ARMTargetParser.def index 9f51c841..35c94fd 100644 --- a/llvm/include/llvm/Support/ARMTargetParser.def +++ b/llvm/include/llvm/Support/ARMTargetParser.def @@ -300,6 +300,8 @@ ARM_CPU_NAME("cortex-x1", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false, (ARM::AEK_FP16 | ARM::AEK_DOTPROD)) ARM_CPU_NAME("neoverse-n1", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false, (ARM::AEK_FP16 | ARM::AEK_DOTPROD)) +ARM_CPU_NAME("neoverse-v1", ARMV8_4A, FK_CRYPTO_NEON_FP_ARMV8, false, + (ARM::AEK_RAS | ARM::AEK_FP16 | ARM::AEK_BF16 | ARM::AEK_DOTPROD)) ARM_CPU_NAME("cyclone", ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false, ARM::AEK_CRC) ARM_CPU_NAME("exynos-m3", ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false, ARM::AEK_CRC) ARM_CPU_NAME("exynos-m4", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false, diff --git a/llvm/lib/Target/AArch64/AArch64.td b/llvm/lib/Target/AArch64/AArch64.td index 0e16a4e..5913010 100644 --- a/llvm/lib/Target/AArch64/AArch64.td +++ b/llvm/lib/Target/AArch64/AArch64.td @@ -903,6 +903,26 @@ def ProcNeoverseN1 : SubtargetFeature<"neoversen1", "ARMProcFamily", FeatureSSBS, ]>; +def ProcNeoverseV1 : SubtargetFeature<"neoversev1", "ARMProcFamily", + "NeoverseV1", + "Neoverse V1 ARM processors", [ + HasV8_4aOps, + FeatureBF16, + FeatureCacheDeepPersist, + FeatureCrypto, + FeatureFPARMv8, + FeatureFP16FML, + FeatureFullFP16, + FeatureFuseAES, + FeatureMatMulInt8, + FeatureNEON, + FeaturePerfMon, + FeaturePostRAScheduler, + FeatureRandGen, + FeatureSPE, + FeatureSSBS, + FeatureSVE]>; + def ProcSaphira : SubtargetFeature<"saphira", "ARMProcFamily", "Saphira", "Qualcomm Saphira processors", [ FeatureCrypto, @@ -1044,6 +1064,7 @@ def : ProcessorModel<"cortex-r82", CortexA55Model, [ProcR82]>; def : ProcessorModel<"cortex-x1", CortexA57Model, [ProcX1]>; def : ProcessorModel<"neoverse-e1", CortexA53Model, [ProcNeoverseE1]>; def : ProcessorModel<"neoverse-n1", CortexA57Model, [ProcNeoverseN1]>; +def : ProcessorModel<"neoverse-v1", CortexA57Model, [ProcNeoverseV1]>; def : ProcessorModel<"exynos-m3", ExynosM3Model, [ProcExynosM3]>; def : ProcessorModel<"exynos-m4", ExynosM4Model, [ProcExynosM4]>; def : ProcessorModel<"exynos-m5", ExynosM5Model, [ProcExynosM4]>; diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp index a389bfb..fdf979b 100644 --- a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp +++ b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp @@ -151,6 +151,7 @@ void AArch64Subtarget::initializeProperties() { PrefFunctionLogAlignment = 3; break; case NeoverseN1: + case NeoverseV1: PrefFunctionLogAlignment = 4; break; case Saphira: diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.h b/llvm/lib/Target/AArch64/AArch64Subtarget.h index 135dee0..0710781 100644 --- a/llvm/lib/Target/AArch64/AArch64Subtarget.h +++ b/llvm/lib/Target/AArch64/AArch64Subtarget.h @@ -64,14 +64,15 @@ public: Kryo, NeoverseE1, NeoverseN1, + NeoverseV1, Saphira, ThunderX2T99, ThunderX, ThunderXT81, ThunderXT83, ThunderXT88, - TSV110, - ThunderX3T110 + ThunderX3T110, + TSV110 }; protected: diff --git a/llvm/lib/Target/ARM/ARM.td b/llvm/lib/Target/ARM/ARM.td index 58e393a..fac6fea 100644 --- a/llvm/lib/Target/ARM/ARM.td +++ b/llvm/lib/Target/ARM/ARM.td @@ -601,6 +601,9 @@ def ProcA78 : SubtargetFeature<"cortex-a78", "ARMProcFamily", "CortexA78", def ProcX1 : SubtargetFeature<"cortex-x1", "ARMProcFamily", "CortexX1", "Cortex-X1 ARM processors", []>; +def ProcV1 : SubtargetFeature<"neoverse-v1", "ARMProcFamily", + "NeoverseV1", "Neoverse-V1 ARM processors", []>; + def ProcKrait : SubtargetFeature<"krait", "ARMProcFamily", "Krait", "Qualcomm Krait processors", []>; def ProcKryo : SubtargetFeature<"kryo", "ARMProcFamily", "Kryo", @@ -1279,6 +1282,15 @@ def : ProcNoItin<"cortex-x1", [ARMv82a, ProcX1, FeatureFullFP16, FeatureDotProd]>; +def : ProcNoItin<"neoverse-v1", [ARMv84a, + FeatureHWDivThumb, + FeatureHWDivARM, + FeatureCrypto, + FeatureCRC, + FeatureFullFP16, + FeatureBF16, + FeatureMatMulInt8]>; + def : ProcNoItin<"neoverse-n1", [ARMv82a, FeatureHWDivThumb, FeatureHWDivARM, diff --git a/llvm/lib/Target/ARM/ARMSubtarget.cpp b/llvm/lib/Target/ARM/ARMSubtarget.cpp index 5d0c5f7..0026901 100644 --- a/llvm/lib/Target/ARM/ARMSubtarget.cpp +++ b/llvm/lib/Target/ARM/ARMSubtarget.cpp @@ -314,6 +314,7 @@ void ARMSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) { PreISelOperandLatencyAdjustment = 1; break; case NeoverseN1: + case NeoverseV1: break; case Swift: MaxInterleaveFactor = 2; diff --git a/llvm/lib/Target/ARM/ARMSubtarget.h b/llvm/lib/Target/ARM/ARMSubtarget.h index 75c3b52..4c29b2b 100644 --- a/llvm/lib/Target/ARM/ARMSubtarget.h +++ b/llvm/lib/Target/ARM/ARMSubtarget.h @@ -76,6 +76,7 @@ protected: Krait, Kryo, NeoverseN1, + NeoverseV1, Swift }; enum ARMProcClassEnum { diff --git a/llvm/test/CodeGen/AArch64/cpus.ll b/llvm/test/CodeGen/AArch64/cpus.ll index 3d4ad97..863e1e7 100644 --- a/llvm/test/CodeGen/AArch64/cpus.ll +++ b/llvm/test/CodeGen/AArch64/cpus.ll @@ -20,6 +20,7 @@ ; RUN: llc < %s -mtriple=arm64-unknown-unknown -mcpu=cortex-x1 2>&1 | FileCheck %s ; RUN: llc < %s -mtriple=arm64-unknown-unknown -mcpu=neoverse-e1 2>&1 | FileCheck %s ; RUN: llc < %s -mtriple=arm64-unknown-unknown -mcpu=neoverse-n1 2>&1 | FileCheck %s +; RUN: llc < %s -mtriple=arm64-unknown-unknown -mcpu=neoverse-v1 2>&1 | FileCheck %s ; RUN: llc < %s -mtriple=arm64-unknown-unknown -mcpu=exynos-m3 2>&1 | FileCheck %s ; RUN: llc < %s -mtriple=arm64-unknown-unknown -mcpu=exynos-m4 2>&1 | FileCheck %s ; RUN: llc < %s -mtriple=arm64-unknown-unknown -mcpu=exynos-m5 2>&1 | FileCheck %s diff --git a/llvm/unittests/Support/TargetParserTest.cpp b/llvm/unittests/Support/TargetParserTest.cpp index 4b167dc..f0eee09 100644 --- a/llvm/unittests/Support/TargetParserTest.cpp +++ b/llvm/unittests/Support/TargetParserTest.cpp @@ -280,6 +280,12 @@ TEST(TargetParserTest, testARMCPU) { ARM::AEK_HWDIVTHUMB | ARM::AEK_DSP | ARM::AEK_FP16 | ARM::AEK_RAS | ARM::AEK_DOTPROD, "8.2-A")); + EXPECT_TRUE(testARMCPU("neoverse-v1", "armv8.4-a", "crypto-neon-fp-armv8", + ARM::AEK_SEC | ARM::AEK_MP | ARM::AEK_VIRT | + ARM::AEK_HWDIVARM | ARM::AEK_HWDIVTHUMB | + ARM::AEK_DSP | ARM::AEK_CRC | ARM::AEK_RAS | + ARM::AEK_FP16 | ARM::AEK_BF16 | ARM::AEK_DOTPROD, + "8.4-A")); EXPECT_TRUE(testARMCPU("cyclone", "armv8-a", "crypto-neon-fp-armv8", ARM::AEK_CRC | ARM::AEK_SEC | ARM::AEK_MP | ARM::AEK_VIRT | ARM::AEK_HWDIVARM | @@ -322,7 +328,7 @@ TEST(TargetParserTest, testARMCPU) { "7-S")); } -static constexpr unsigned NumARMCPUArchs = 89; +static constexpr unsigned NumARMCPUArchs = 90; TEST(TargetParserTest, testARMCPUArchList) { SmallVector List; @@ -882,6 +888,14 @@ TEST(TargetParserTest, testAArch64CPU) { AArch64::AEK_RCPC | AArch64::AEK_SSBS, "8.2-A")); EXPECT_TRUE(testAArch64CPU( + "neoverse-v1", "armv8.4-a", "crypto-neon-fp-armv8", + AArch64::AEK_RAS | AArch64::AEK_SVE | AArch64::AEK_SSBS | + AArch64::AEK_RCPC | AArch64::AEK_CRC | AArch64::AEK_FP | + AArch64::AEK_SIMD | AArch64::AEK_RAS | AArch64::AEK_LSE | + AArch64::AEK_RDM | AArch64::AEK_RCPC | AArch64::AEK_DOTPROD | + AArch64::AEK_CRYPTO | AArch64::AEK_FP16 | AArch64::AEK_BF16, + "8.4-A")); + EXPECT_TRUE(testAArch64CPU( "cortex-r82", "armv8-r", "crypto-neon-fp-armv8", AArch64::AEK_CRC | AArch64::AEK_RDM | AArch64::AEK_SSBS | AArch64::AEK_CRYPTO | AArch64::AEK_SM4 | AArch64::AEK_SHA3 | @@ -1034,7 +1048,7 @@ TEST(TargetParserTest, testAArch64CPU) { "8.2-A")); } -static constexpr unsigned NumAArch64CPUArchs = 43; +static constexpr unsigned NumAArch64CPUArchs = 44; TEST(TargetParserTest, testAArch64CPUArchList) { SmallVector List; -- 2.7.4