Add initial support for M1 on Linux, Phytium FT2xxx series, ARM Cortex 510/710/X1/X2
authorMartin Kroeker <martin@ruby.chemie.uni-freiburg.de>
Sun, 27 Mar 2022 13:24:40 +0000 (15:24 +0200)
committerGitHub <noreply@github.com>
Sun, 27 Mar 2022 13:24:40 +0000 (15:24 +0200)
Makefile.arm64
TargetList.txt
c_check
cpuid_arm64.c
getarch.c
param.h

index 2eade8d..96e31a4 100644 (file)
@@ -55,6 +55,13 @@ FCOMMON_OPT += -march=armv8-a -mtune=cortex-a73
 endif
 endif
 
+ifeq ($(CORE), FT2000)
+CCOMMON_OPT += -march=armv8-a -mtune=cortex-a72
+ifneq ($(F_COMPILER), NAG)
+FCOMMON_OPT += -march=armv8-a -mtune=cortex-a72
+endif
+endif
+
 # Use a72 tunings because Neoverse-N1 is only available
 # in GCC>=9
 ifeq ($(CORE), NEOVERSEN1)
@@ -229,6 +236,43 @@ endif
 endif
 endif
 
+ifeq (1, $(filter 1,$(GCCVERSIONGTEQ11) $(ISCLANG)))
+ifeq ($(CORE), CORTEXX1)
+CCOMMON_OPT += -march=armv9 -mtune=cortexx1
+ifneq ($(F_COMPILER), NAG)
+FCOMMON_OPT += -march=armv9 -mtune=cortexx1
+endif
+endif
+endif
+
+ifeq (1, $(filter 1,$(GCCVERSIONGTEQ11) $(ISCLANG)))
+ifeq ($(CORE), CORTEXX2)
+CCOMMON_OPT += -march=armv9 -mtune=cortexx2
+ifneq ($(F_COMPILER), NAG)
+FCOMMON_OPT += -march=armv9 -mtune=cortexx2
+endif
+endif
+endif
+
+#ifeq (1, $(filter 1,$(ISCLANG)))
+ifeq (1, $(filter 1,$(GCCVERSIONGTEQ11) $(ISCLANG)))
+ifeq ($(CORE), CORTEXA510)
+CCOMMON_OPT += -march=armv8.4-a+sve
+ifneq ($(F_COMPILER), NAG)
+FCOMMON_OPT += -march=armv8.4-a+sve
+endif
+endif
+endif
+
+ifeq (1, $(filter 1,$(GCCVERSIONGTEQ11) $(ISCLANG)))
+ifeq ($(CORE), CORTEXA710)
+CCOMMON_OPT += -march=armv8.2-a+sve -mtune=cortexa710
+ifneq ($(F_COMPILER), NAG)
+FCOMMON_OPT += -march=armv8.2-a+sve -mtune=cortexa710
+endif
+endif
+endif
+
 endif
 
 endif
index a5a07a6..a297fd0 100644 (file)
@@ -92,6 +92,10 @@ CORTEXA53
 CORTEXA57
 CORTEXA72
 CORTEXA73
+CORTEXA510
+CORTEXA710
+CORTEXX1
+CORTEXX2
 NEOVERSEN1
 NEOVERSEV1
 NEOVERSEN2
@@ -103,6 +107,9 @@ THUNDERX2T99
 TSV110
 THUNDERX3T110
 VORTEX
+A64FX
+ARMV8SVE
+FT2000
 
 9.System Z:
 ZARCH_GENERIC
diff --git a/c_check b/c_check
index e10ddfe..f9d3f2c 100644 (file)
--- a/c_check
+++ b/c_check
@@ -316,6 +316,7 @@ if ($architecture ne $hostarch) {
 }
 
 $cross = 1 if ($os ne $hostos);
+$cross = 0 if (($os eq "Android") && ($hostos eq "Linux") && ($ENV{TERMUX_APP_PID} != ""));
 
 $openmp = "" if $ENV{USE_OPENMP} != 1;
 
index cc3a828..89ec186 100644 (file)
@@ -45,6 +45,10 @@ size_t length64=sizeof(value64);
 #define CPU_NEOVERSEN1    11
 #define CPU_NEOVERSEV1    16
 #define CPU_NEOVERSEN2    17
+#define CPU_CORTEXX1      18
+#define CPU_CORTEXX2     19
+#define CPU_CORTEXA510   20
+#define CPU_CORTEXA710    21
 // Qualcomm
 #define CPU_FALKOR        6
 // Cavium
@@ -59,6 +63,8 @@ size_t length64=sizeof(value64);
 #define CPU_VORTEX       13
 // Fujitsu
 #define CPU_A64FX       15
+// Phytium
+#define CPU_FT2000       22
 
 static char *cpuname[] = {
   "UNKNOWN",
@@ -73,12 +79,17 @@ static char *cpuname[] = {
   "TSV110",
   "EMAG8180",
   "NEOVERSEN1",
-  "NEOVERSEV1"
-  "NEOVERSEN2"
   "THUNDERX3T110",
   "VORTEX",
   "CORTEXA55",
-  "A64FX"
+  "A64FX",
+  "NEOVERSEV1",
+  "NEOVERSEN2",
+  "CORTEXX1",
+  "CORTEXX2",
+  "CORTEXA510",
+  "CORTEXA710",
+  "FT2000"
 };
 
 static char *cpuname_lower[] = {
@@ -94,12 +105,17 @@ static char *cpuname_lower[] = {
   "tsv110",
   "emag8180",
   "neoversen1",
-  "neoversev1",
-  "neoversen2",
   "thunderx3t110",
   "vortex",
   "cortexa55",
-  "a64fx"
+  "a64fx",
+  "neoversev1",
+  "neoversen2",
+  "cortexx1",
+  "cortexx2",
+  "cortexa510",
+  "cortexa710",
+  "ft2000"
 };
 
 int get_feature(char *search)
@@ -182,6 +198,14 @@ int detect(void)
         return CPU_NEOVERSEN2;
       else if (strstr(cpu_part, "0xd05"))
        return CPU_CORTEXA55;
+      else if (strstr(cpu_part, "0xd46"))
+        return CPU_CORTEXA510;
+      else if (strstr(cpu_part, "0xd47"))
+       return CPU_CORTEXA710;
+      else if (strstr(cpu_part, "0xd44"))
+        return CPU_CORTEXX1;
+      else if (strstr(cpu_part, "0xd4c"))
+       return CPU_CORTEXX2;
     }
     // Qualcomm
     else if (strstr(cpu_implementer, "0x51") && strstr(cpu_part, "0xc00"))
@@ -202,6 +226,13 @@ int detect(void)
     // Fujitsu
     else if (strstr(cpu_implementer, "0x46") && strstr(cpu_part, "0x001"))
                         return CPU_A64FX;
+    // Apple
+    else if (strstr(cpu_implementer, "0x61") && strstr(cpu_part, "0x022"))
+                       return CPU_VORTEX;
+   // Phytium
+   else if (strstr(cpu_implementer, "0x70") && (strstr(cpu_part, "0x660") || strstr(cpu_part, "0x661") 
+                       || strstr(cpu_part, "0x662") || strstr(cpu_part, "0x663")))
+                       return CPU_FT2000;
        }
 
        p = (char *) NULL ;
@@ -382,7 +413,24 @@ void get_cpuconfig(void)
                 printf("#define DTB_DEFAULT_ENTRIES 48\n");
                 printf("#define DTB_SIZE 4096\n");
                 break;
-
+           case CPU_CORTEXA510:
+           case CPU_CORTEXA710:
+           case CPU_CORTEXX1:
+           case CPU_CORTEXX2:
+               printf("#define ARMV9\n");
+                printf("#define %s\n", cpuname[d]);
+                printf("#define L1_CODE_SIZE 65536\n");
+                printf("#define L1_CODE_LINESIZE 64\n");
+                printf("#define L1_CODE_ASSOCIATIVE 4\n");
+                printf("#define L1_DATA_SIZE 65536\n");
+                printf("#define L1_DATA_LINESIZE 64\n");
+                printf("#define L1_DATA_ASSOCIATIVE 4\n");
+                printf("#define L2_SIZE 1048576\n");
+                printf("#define L2_LINESIZE 64\n");
+                printf("#define L2_ASSOCIATIVE 8\n");
+               printf("#define DTB_DEFAULT_ENTRIES 64\n");
+               printf("#define DTB_SIZE 4096\n");
+               break;
            case CPU_FALKOR:
                printf("#define FALKOR\n");
                printf("#define L1_CODE_SIZE 65536\n");
@@ -469,9 +517,9 @@ void get_cpuconfig(void)
                printf("#define DTB_DEFAULT_ENTRIES  64       \n");
                printf("#define DTB_SIZE             4096     \n");
                break;
-#ifdef __APPLE__
            case CPU_VORTEX:
                printf("#define VORTEX                        \n");
+#ifdef __APPLE__
                sysctlbyname("hw.l1icachesize",&value64,&length64,NULL,0);
                printf("#define L1_CODE_SIZE         %lld       \n",value64);
                sysctlbyname("hw.cachelinesize",&value64,&length64,NULL,0);
@@ -480,10 +528,10 @@ void get_cpuconfig(void)
                printf("#define L1_DATA_SIZE         %lld       \n",value64);
                sysctlbyname("hw.l2cachesize",&value64,&length64,NULL,0);
                printf("#define L2_SIZE      %lld       \n",value64);
+#endif 
                printf("#define DTB_DEFAULT_ENTRIES  64       \n");
                printf("#define DTB_SIZE             4096     \n");
                break;
-#endif                 
            case CPU_A64FX:
                printf("#define A64FX\n");
                printf("#define L1_CODE_SIZE 65535\n");
@@ -494,6 +542,16 @@ void get_cpuconfig(void)
                printf("#define DTB_DEFAULT_ENTRIES 64\n");
                printf("#define DTB_SIZE 4096\n");
                break;
+           case CPU_FT2000:
+               printf("#define FT2000\n");
+               printf("#define L1_CODE_SIZE 32768\n");
+               printf("#define L1_DATA_SIZE 32768\n");
+               printf("#define L1_DATA_LINESIZE 64\n");
+               printf("#define L2_SIZE 33554432\n");
+               printf("#define L2_LINESIZE 64\n");
+               printf("#define DTB_DEFAULT_ENTRIES 64\n");
+               printf("#define DTB_SIZE 4096\n");
+               break;
        }
        get_cpucount();
 }
index e49eac1..26a2dd4 100644 (file)
--- a/getarch.c
+++ b/getarch.c
@@ -1232,7 +1232,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
        "-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8"
 #define LIBNAME   "cortexa53"
 #define CORENAME  "CORTEXA53"
-#else
 #endif
 
 #ifdef FORCE_CORTEXA57
@@ -1248,7 +1247,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
        "-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8"
 #define LIBNAME   "cortexa57"
 #define CORENAME  "CORTEXA57"
-#else
 #endif
 
 #ifdef FORCE_CORTEXA72
@@ -1264,7 +1262,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
        "-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8"
 #define LIBNAME   "cortexa72"
 #define CORENAME  "CORTEXA72"
-#else
 #endif
 
 #ifdef FORCE_CORTEXA73
@@ -1280,7 +1277,62 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
        "-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8"
 #define LIBNAME   "cortexa73"
 #define CORENAME  "CORTEXA73"
-#else
+#endif
+
+#ifdef FORCE_CORTEXX1
+#define FORCE
+#define ARCHITECTURE    "ARM64"
+#define SUBARCHITECTURE "CORTEXX1"
+#define SUBDIRNAME      "arm64"
+#define ARCHCONFIG   "-DCORTEXX1 " \
+       "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 " \
+       "-DL2_SIZE=262144 -DL2_LINESIZE=64 " \
+       "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=32 " \
+       "-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DHAVE_SVE -DARMV8 -DARMV9"
+#define LIBNAME   "cortexx1"
+#define CORENAME  "CORTEXX1"
+#endif
+
+#ifdef FORCE_CORTEXX2
+#define FORCE
+#define ARCHITECTURE    "ARM64"
+#define SUBARCHITECTURE "CORTEXX2"
+#define SUBDIRNAME      "arm64"
+#define ARCHCONFIG   "-DCORTEXX2 " \
+       "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 " \
+       "-DL2_SIZE=262144 -DL2_LINESIZE=64 " \
+       "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=32 " \
+       "-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DHAVE_SVE -DARMV8 -DARMV9"
+#define LIBNAME   "cortexx2"
+#define CORENAME  "CORTEXX2"
+#endif
+
+#ifdef FORCE_CORTEXA510
+#define FORCE
+#define ARCHITECTURE    "ARM64"
+#define SUBARCHITECTURE "CORTEXA510"
+#define SUBDIRNAME      "arm64"
+#define ARCHCONFIG   "-DCORTEXA510 " \
+       "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 " \
+       "-DL2_SIZE=262144 -DL2_LINESIZE=64 " \
+       "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=32 " \
+       "-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DHAVE_SVE -DARMV8 -DARMV9"
+#define LIBNAME   "cortexa510"
+#define CORENAME  "CORTEXA510"
+#endif
+
+#ifdef FORCE_CORTEXA710
+#define FORCE
+#define ARCHITECTURE    "ARM64"
+#define SUBARCHITECTURE "CORTEXA710"
+#define SUBDIRNAME      "arm64"
+#define ARCHCONFIG   "-DCORTEXA710 " \
+       "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 " \
+       "-DL2_SIZE=262144 -DL2_LINESIZE=64 " \
+       "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=32 " \
+       "-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DHAVE_SVE -DARMV8 -DARMV9"
+#define LIBNAME   "cortexa710"
+#define CORENAME  "CORTEXA710"
 #endif
 
 #ifdef FORCE_NEOVERSEN1
@@ -1297,7 +1349,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
        "-march=armv8.2-a -mtune=neoverse-n1"
 #define LIBNAME   "neoversen1"
 #define CORENAME  "NEOVERSEN1"
-#else
 #endif
 
 #ifdef FORCE_NEOVERSEV1
@@ -1314,7 +1365,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
        "-march=armv8.4-a -mtune=neoverse-v1"
 #define LIBNAME   "neoversev1"
 #define CORENAME  "NEOVERSEV1"
-#else
 #endif
 
 
@@ -1332,7 +1382,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
        "-march=armv8.5-a -mtune=neoverse-n2"
 #define LIBNAME   "neoversen2"
 #define CORENAME  "NEOVERSEN2"
-#else
 #endif
 
 #ifdef FORCE_CORTEXA55
@@ -1348,7 +1397,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
        "-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8"
 #define LIBNAME   "cortexa55"
 #define CORENAME  "CORTEXA55"
-#else
 #endif
 
 #ifdef FORCE_FALKOR
@@ -1364,7 +1412,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
        "-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8"
 #define LIBNAME   "falkor"
 #define CORENAME  "FALKOR"
-#else
 #endif
 
 #ifdef FORCE_THUNDERX
@@ -1379,7 +1426,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
        "-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8"
 #define LIBNAME   "thunderx"
 #define CORENAME  "THUNDERX"
-#else
 #endif
 
 #ifdef FORCE_THUNDERX2T99
@@ -1397,7 +1443,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
        "-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8"
 #define LIBNAME   "thunderx2t99"
 #define CORENAME  "THUNDERX2T99"
-#else
 #endif
 
 #ifdef FORCE_TSV110
@@ -1413,7 +1458,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
        "-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8"
 #define LIBNAME   "tsv110"
 #define CORENAME  "TSV110"
-#else
 #endif
 
 #ifdef FORCE_EMAG8180
@@ -1448,7 +1492,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
        "-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8"
 #define LIBNAME   "thunderx3t110"
 #define CORENAME  "THUNDERX3T110"
-#else
 #endif
 
 #ifdef FORCE_VORTEX
@@ -1480,7 +1523,22 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
        "-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DHAVE_SVE -DARMV8"
 #define LIBNAME   "a64fx"
 #define CORENAME  "A64FX"
-#else
+#endif
+
+#ifdef FORCE_FT2000
+#define ARMV8
+#define FORCE
+#define ARCHITECTURE    "ARM64"
+#define SUBARCHITECTURE "FT2000"
+#define SUBDIRNAME      "arm64"
+#define ARCHCONFIG   "-DFT2000 " \
+       "-DL1_CODE_SIZE=32768 -DL1_CODE_LINESIZE=64 -DL1_CODE_ASSOCIATIVE=8 " \
+       "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 -DL1_DATA_ASSOCIATIVE=8 " \
+       "-DL2_SIZE=33554426-DL2_LINESIZE=64 -DL2_ASSOCIATIVE=8 " \
+       "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \
+       "-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8"
+#define LIBNAME   "ft2000"
+#define CORENAME  "FT2000"
 #endif
 
 #ifdef FORCE_ZARCH_GENERIC
diff --git a/param.h b/param.h
index f5cbe96..792c178 100644 (file)
--- a/param.h
+++ b/param.h
@@ -3130,7 +3130,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #if defined(CORTEXA57) || \
     defined(CORTEXA72) || defined(CORTEXA73) || \
-    defined(FALKOR)    || defined(TSV110) || defined(EMAG8180) || defined(VORTEX)
+    defined(FALKOR)    || defined(TSV110) || defined(EMAG8180) || defined(VORTEX) || defined(FT2000)
 
 #define SGEMM_DEFAULT_UNROLL_M  16
 #define SGEMM_DEFAULT_UNROLL_N  4
@@ -3377,7 +3377,7 @@ is a big desktop or server with abundant cache rather than a phone or embedded d
 #define CGEMM_DEFAULT_R 4096
 #define ZGEMM_DEFAULT_R 4096
 
-#elif defined(ARMV8SVE) || defined(A64FX)
+#elif defined(ARMV8SVE) || defined(A64FX) || defined(ARMV9) || defined(CORTEXA510)
 
 /* When all BLAS3 routines are implemeted with SVE, SGEMM_DEFAULT_UNROLL_M should be "sve_vl".
 Until then, just keep it different than DGEMM_DEFAULT_UNROLL_N to keep copy routines in both directions seperated. */