From 09b8545fc51316d0fecf34c9e753b8a20358a3e8 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Sun, 27 Mar 2022 15:24:40 +0200 Subject: [PATCH] Add initial support for M1 on Linux, Phytium FT2xxx series, ARM Cortex 510/710/X1/X2 --- Makefile.arm64 | 44 ++++++++++++++++++++++++++++++ TargetList.txt | 7 +++++ c_check | 1 + cpuid_arm64.c | 76 +++++++++++++++++++++++++++++++++++++++++++++------ getarch.c | 86 ++++++++++++++++++++++++++++++++++++++++++++++++---------- param.h | 4 +-- 6 files changed, 193 insertions(+), 25 deletions(-) diff --git a/Makefile.arm64 b/Makefile.arm64 index 2eade8d..96e31a4 100644 --- a/Makefile.arm64 +++ b/Makefile.arm64 @@ -55,6 +55,13 @@ FCOMMON_OPT += -march=armv8-a -mtune=cortex-a73 endif endif +ifeq ($(CORE), FT2000) +CCOMMON_OPT += -march=armv8-a -mtune=cortex-a72 +ifneq ($(F_COMPILER), NAG) +FCOMMON_OPT += -march=armv8-a -mtune=cortex-a72 +endif +endif + # Use a72 tunings because Neoverse-N1 is only available # in GCC>=9 ifeq ($(CORE), NEOVERSEN1) @@ -229,6 +236,43 @@ endif endif endif +ifeq (1, $(filter 1,$(GCCVERSIONGTEQ11) $(ISCLANG))) +ifeq ($(CORE), CORTEXX1) +CCOMMON_OPT += -march=armv9 -mtune=cortexx1 +ifneq ($(F_COMPILER), NAG) +FCOMMON_OPT += -march=armv9 -mtune=cortexx1 +endif +endif +endif + +ifeq (1, $(filter 1,$(GCCVERSIONGTEQ11) $(ISCLANG))) +ifeq ($(CORE), CORTEXX2) +CCOMMON_OPT += -march=armv9 -mtune=cortexx2 +ifneq ($(F_COMPILER), NAG) +FCOMMON_OPT += -march=armv9 -mtune=cortexx2 +endif +endif +endif + +#ifeq (1, $(filter 1,$(ISCLANG))) +ifeq (1, $(filter 1,$(GCCVERSIONGTEQ11) $(ISCLANG))) +ifeq ($(CORE), CORTEXA510) +CCOMMON_OPT += -march=armv8.4-a+sve +ifneq ($(F_COMPILER), NAG) +FCOMMON_OPT += -march=armv8.4-a+sve +endif +endif +endif + +ifeq (1, $(filter 1,$(GCCVERSIONGTEQ11) $(ISCLANG))) +ifeq ($(CORE), CORTEXA710) +CCOMMON_OPT += -march=armv8.2-a+sve -mtune=cortexa710 +ifneq ($(F_COMPILER), NAG) +FCOMMON_OPT += -march=armv8.2-a+sve -mtune=cortexa710 +endif +endif +endif + endif endif diff --git a/TargetList.txt b/TargetList.txt index a5a07a6..a297fd0 100644 --- a/TargetList.txt +++ b/TargetList.txt @@ -92,6 +92,10 @@ CORTEXA53 CORTEXA57 CORTEXA72 CORTEXA73 +CORTEXA510 +CORTEXA710 +CORTEXX1 +CORTEXX2 NEOVERSEN1 NEOVERSEV1 NEOVERSEN2 @@ -103,6 +107,9 @@ THUNDERX2T99 TSV110 THUNDERX3T110 VORTEX +A64FX +ARMV8SVE +FT2000 9.System Z: ZARCH_GENERIC diff --git a/c_check b/c_check index e10ddfe..f9d3f2c 100644 --- a/c_check +++ b/c_check @@ -316,6 +316,7 @@ if ($architecture ne $hostarch) { } $cross = 1 if ($os ne $hostos); +$cross = 0 if (($os eq "Android") && ($hostos eq "Linux") && ($ENV{TERMUX_APP_PID} != "")); $openmp = "" if $ENV{USE_OPENMP} != 1; diff --git a/cpuid_arm64.c b/cpuid_arm64.c index cc3a828..89ec186 100644 --- a/cpuid_arm64.c +++ b/cpuid_arm64.c @@ -45,6 +45,10 @@ size_t length64=sizeof(value64); #define CPU_NEOVERSEN1 11 #define CPU_NEOVERSEV1 16 #define CPU_NEOVERSEN2 17 +#define CPU_CORTEXX1 18 +#define CPU_CORTEXX2 19 +#define CPU_CORTEXA510 20 +#define CPU_CORTEXA710 21 // Qualcomm #define CPU_FALKOR 6 // Cavium @@ -59,6 +63,8 @@ size_t length64=sizeof(value64); #define CPU_VORTEX 13 // Fujitsu #define CPU_A64FX 15 +// Phytium +#define CPU_FT2000 22 static char *cpuname[] = { "UNKNOWN", @@ -73,12 +79,17 @@ static char *cpuname[] = { "TSV110", "EMAG8180", "NEOVERSEN1", - "NEOVERSEV1" - "NEOVERSEN2" "THUNDERX3T110", "VORTEX", "CORTEXA55", - "A64FX" + "A64FX", + "NEOVERSEV1", + "NEOVERSEN2", + "CORTEXX1", + "CORTEXX2", + "CORTEXA510", + "CORTEXA710", + "FT2000" }; static char *cpuname_lower[] = { @@ -94,12 +105,17 @@ static char *cpuname_lower[] = { "tsv110", "emag8180", "neoversen1", - "neoversev1", - "neoversen2", "thunderx3t110", "vortex", "cortexa55", - "a64fx" + "a64fx", + "neoversev1", + "neoversen2", + "cortexx1", + "cortexx2", + "cortexa510", + "cortexa710", + "ft2000" }; int get_feature(char *search) @@ -182,6 +198,14 @@ int detect(void) return CPU_NEOVERSEN2; else if (strstr(cpu_part, "0xd05")) return CPU_CORTEXA55; + else if (strstr(cpu_part, "0xd46")) + return CPU_CORTEXA510; + else if (strstr(cpu_part, "0xd47")) + return CPU_CORTEXA710; + else if (strstr(cpu_part, "0xd44")) + return CPU_CORTEXX1; + else if (strstr(cpu_part, "0xd4c")) + return CPU_CORTEXX2; } // Qualcomm else if (strstr(cpu_implementer, "0x51") && strstr(cpu_part, "0xc00")) @@ -202,6 +226,13 @@ int detect(void) // Fujitsu else if (strstr(cpu_implementer, "0x46") && strstr(cpu_part, "0x001")) return CPU_A64FX; + // Apple + else if (strstr(cpu_implementer, "0x61") && strstr(cpu_part, "0x022")) + return CPU_VORTEX; + // Phytium + else if (strstr(cpu_implementer, "0x70") && (strstr(cpu_part, "0x660") || strstr(cpu_part, "0x661") + || strstr(cpu_part, "0x662") || strstr(cpu_part, "0x663"))) + return CPU_FT2000; } p = (char *) NULL ; @@ -382,7 +413,24 @@ void get_cpuconfig(void) printf("#define DTB_DEFAULT_ENTRIES 48\n"); printf("#define DTB_SIZE 4096\n"); break; - + case CPU_CORTEXA510: + case CPU_CORTEXA710: + case CPU_CORTEXX1: + case CPU_CORTEXX2: + printf("#define ARMV9\n"); + printf("#define %s\n", cpuname[d]); + printf("#define L1_CODE_SIZE 65536\n"); + printf("#define L1_CODE_LINESIZE 64\n"); + printf("#define L1_CODE_ASSOCIATIVE 4\n"); + printf("#define L1_DATA_SIZE 65536\n"); + printf("#define L1_DATA_LINESIZE 64\n"); + printf("#define L1_DATA_ASSOCIATIVE 4\n"); + printf("#define L2_SIZE 1048576\n"); + printf("#define L2_LINESIZE 64\n"); + printf("#define L2_ASSOCIATIVE 8\n"); + printf("#define DTB_DEFAULT_ENTRIES 64\n"); + printf("#define DTB_SIZE 4096\n"); + break; case CPU_FALKOR: printf("#define FALKOR\n"); printf("#define L1_CODE_SIZE 65536\n"); @@ -469,9 +517,9 @@ void get_cpuconfig(void) printf("#define DTB_DEFAULT_ENTRIES 64 \n"); printf("#define DTB_SIZE 4096 \n"); break; -#ifdef __APPLE__ case CPU_VORTEX: printf("#define VORTEX \n"); +#ifdef __APPLE__ sysctlbyname("hw.l1icachesize",&value64,&length64,NULL,0); printf("#define L1_CODE_SIZE %lld \n",value64); sysctlbyname("hw.cachelinesize",&value64,&length64,NULL,0); @@ -480,10 +528,10 @@ void get_cpuconfig(void) printf("#define L1_DATA_SIZE %lld \n",value64); sysctlbyname("hw.l2cachesize",&value64,&length64,NULL,0); printf("#define L2_SIZE %lld \n",value64); +#endif printf("#define DTB_DEFAULT_ENTRIES 64 \n"); printf("#define DTB_SIZE 4096 \n"); break; -#endif case CPU_A64FX: printf("#define A64FX\n"); printf("#define L1_CODE_SIZE 65535\n"); @@ -494,6 +542,16 @@ void get_cpuconfig(void) printf("#define DTB_DEFAULT_ENTRIES 64\n"); printf("#define DTB_SIZE 4096\n"); break; + case CPU_FT2000: + printf("#define FT2000\n"); + printf("#define L1_CODE_SIZE 32768\n"); + printf("#define L1_DATA_SIZE 32768\n"); + printf("#define L1_DATA_LINESIZE 64\n"); + printf("#define L2_SIZE 33554432\n"); + printf("#define L2_LINESIZE 64\n"); + printf("#define DTB_DEFAULT_ENTRIES 64\n"); + printf("#define DTB_SIZE 4096\n"); + break; } get_cpucount(); } diff --git a/getarch.c b/getarch.c index e49eac1..26a2dd4 100644 --- a/getarch.c +++ b/getarch.c @@ -1232,7 +1232,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. "-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8" #define LIBNAME "cortexa53" #define CORENAME "CORTEXA53" -#else #endif #ifdef FORCE_CORTEXA57 @@ -1248,7 +1247,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. "-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8" #define LIBNAME "cortexa57" #define CORENAME "CORTEXA57" -#else #endif #ifdef FORCE_CORTEXA72 @@ -1264,7 +1262,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. "-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8" #define LIBNAME "cortexa72" #define CORENAME "CORTEXA72" -#else #endif #ifdef FORCE_CORTEXA73 @@ -1280,7 +1277,62 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. "-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8" #define LIBNAME "cortexa73" #define CORENAME "CORTEXA73" -#else +#endif + +#ifdef FORCE_CORTEXX1 +#define FORCE +#define ARCHITECTURE "ARM64" +#define SUBARCHITECTURE "CORTEXX1" +#define SUBDIRNAME "arm64" +#define ARCHCONFIG "-DCORTEXX1 " \ + "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 " \ + "-DL2_SIZE=262144 -DL2_LINESIZE=64 " \ + "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=32 " \ + "-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DHAVE_SVE -DARMV8 -DARMV9" +#define LIBNAME "cortexx1" +#define CORENAME "CORTEXX1" +#endif + +#ifdef FORCE_CORTEXX2 +#define FORCE +#define ARCHITECTURE "ARM64" +#define SUBARCHITECTURE "CORTEXX2" +#define SUBDIRNAME "arm64" +#define ARCHCONFIG "-DCORTEXX2 " \ + "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 " \ + "-DL2_SIZE=262144 -DL2_LINESIZE=64 " \ + "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=32 " \ + "-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DHAVE_SVE -DARMV8 -DARMV9" +#define LIBNAME "cortexx2" +#define CORENAME "CORTEXX2" +#endif + +#ifdef FORCE_CORTEXA510 +#define FORCE +#define ARCHITECTURE "ARM64" +#define SUBARCHITECTURE "CORTEXA510" +#define SUBDIRNAME "arm64" +#define ARCHCONFIG "-DCORTEXA510 " \ + "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 " \ + "-DL2_SIZE=262144 -DL2_LINESIZE=64 " \ + "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=32 " \ + "-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DHAVE_SVE -DARMV8 -DARMV9" +#define LIBNAME "cortexa510" +#define CORENAME "CORTEXA510" +#endif + +#ifdef FORCE_CORTEXA710 +#define FORCE +#define ARCHITECTURE "ARM64" +#define SUBARCHITECTURE "CORTEXA710" +#define SUBDIRNAME "arm64" +#define ARCHCONFIG "-DCORTEXA710 " \ + "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 " \ + "-DL2_SIZE=262144 -DL2_LINESIZE=64 " \ + "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=32 " \ + "-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DHAVE_SVE -DARMV8 -DARMV9" +#define LIBNAME "cortexa710" +#define CORENAME "CORTEXA710" #endif #ifdef FORCE_NEOVERSEN1 @@ -1297,7 +1349,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. "-march=armv8.2-a -mtune=neoverse-n1" #define LIBNAME "neoversen1" #define CORENAME "NEOVERSEN1" -#else #endif #ifdef FORCE_NEOVERSEV1 @@ -1314,7 +1365,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. "-march=armv8.4-a -mtune=neoverse-v1" #define LIBNAME "neoversev1" #define CORENAME "NEOVERSEV1" -#else #endif @@ -1332,7 +1382,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. "-march=armv8.5-a -mtune=neoverse-n2" #define LIBNAME "neoversen2" #define CORENAME "NEOVERSEN2" -#else #endif #ifdef FORCE_CORTEXA55 @@ -1348,7 +1397,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. "-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8" #define LIBNAME "cortexa55" #define CORENAME "CORTEXA55" -#else #endif #ifdef FORCE_FALKOR @@ -1364,7 +1412,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. "-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8" #define LIBNAME "falkor" #define CORENAME "FALKOR" -#else #endif #ifdef FORCE_THUNDERX @@ -1379,7 +1426,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. "-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8" #define LIBNAME "thunderx" #define CORENAME "THUNDERX" -#else #endif #ifdef FORCE_THUNDERX2T99 @@ -1397,7 +1443,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. "-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8" #define LIBNAME "thunderx2t99" #define CORENAME "THUNDERX2T99" -#else #endif #ifdef FORCE_TSV110 @@ -1413,7 +1458,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. "-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8" #define LIBNAME "tsv110" #define CORENAME "TSV110" -#else #endif #ifdef FORCE_EMAG8180 @@ -1448,7 +1492,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. "-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8" #define LIBNAME "thunderx3t110" #define CORENAME "THUNDERX3T110" -#else #endif #ifdef FORCE_VORTEX @@ -1480,7 +1523,22 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. "-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DHAVE_SVE -DARMV8" #define LIBNAME "a64fx" #define CORENAME "A64FX" -#else +#endif + +#ifdef FORCE_FT2000 +#define ARMV8 +#define FORCE +#define ARCHITECTURE "ARM64" +#define SUBARCHITECTURE "FT2000" +#define SUBDIRNAME "arm64" +#define ARCHCONFIG "-DFT2000 " \ + "-DL1_CODE_SIZE=32768 -DL1_CODE_LINESIZE=64 -DL1_CODE_ASSOCIATIVE=8 " \ + "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 -DL1_DATA_ASSOCIATIVE=8 " \ + "-DL2_SIZE=33554426-DL2_LINESIZE=64 -DL2_ASSOCIATIVE=8 " \ + "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \ + "-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8" +#define LIBNAME "ft2000" +#define CORENAME "FT2000" #endif #ifdef FORCE_ZARCH_GENERIC diff --git a/param.h b/param.h index f5cbe96..792c178 100644 --- a/param.h +++ b/param.h @@ -3130,7 +3130,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #if defined(CORTEXA57) || \ defined(CORTEXA72) || defined(CORTEXA73) || \ - defined(FALKOR) || defined(TSV110) || defined(EMAG8180) || defined(VORTEX) + defined(FALKOR) || defined(TSV110) || defined(EMAG8180) || defined(VORTEX) || defined(FT2000) #define SGEMM_DEFAULT_UNROLL_M 16 #define SGEMM_DEFAULT_UNROLL_N 4 @@ -3377,7 +3377,7 @@ is a big desktop or server with abundant cache rather than a phone or embedded d #define CGEMM_DEFAULT_R 4096 #define ZGEMM_DEFAULT_R 4096 -#elif defined(ARMV8SVE) || defined(A64FX) +#elif defined(ARMV8SVE) || defined(A64FX) || defined(ARMV9) || defined(CORTEXA510) /* When all BLAS3 routines are implemeted with SVE, SGEMM_DEFAULT_UNROLL_M should be "sve_vl". Until then, just keep it different than DGEMM_DEFAULT_UNROLL_N to keep copy routines in both directions seperated. */ -- 2.7.4