endif
endif
+ifeq ($(CORE), FT2000)
+CCOMMON_OPT += -march=armv8-a -mtune=cortex-a72
+ifneq ($(F_COMPILER), NAG)
+FCOMMON_OPT += -march=armv8-a -mtune=cortex-a72
+endif
+endif
+
# Use a72 tunings because Neoverse-N1 is only available
# in GCC>=9
ifeq ($(CORE), NEOVERSEN1)
endif
endif
+ifeq (1, $(filter 1,$(GCCVERSIONGTEQ11) $(ISCLANG)))
+ifeq ($(CORE), CORTEXX1)
+CCOMMON_OPT += -march=armv9 -mtune=cortexx1
+ifneq ($(F_COMPILER), NAG)
+FCOMMON_OPT += -march=armv9 -mtune=cortexx1
+endif
+endif
+endif
+
+ifeq (1, $(filter 1,$(GCCVERSIONGTEQ11) $(ISCLANG)))
+ifeq ($(CORE), CORTEXX2)
+CCOMMON_OPT += -march=armv9 -mtune=cortexx2
+ifneq ($(F_COMPILER), NAG)
+FCOMMON_OPT += -march=armv9 -mtune=cortexx2
+endif
+endif
+endif
+
+#ifeq (1, $(filter 1,$(ISCLANG)))
+ifeq (1, $(filter 1,$(GCCVERSIONGTEQ11) $(ISCLANG)))
+ifeq ($(CORE), CORTEXA510)
+CCOMMON_OPT += -march=armv8.4-a+sve
+ifneq ($(F_COMPILER), NAG)
+FCOMMON_OPT += -march=armv8.4-a+sve
+endif
+endif
+endif
+
+ifeq (1, $(filter 1,$(GCCVERSIONGTEQ11) $(ISCLANG)))
+ifeq ($(CORE), CORTEXA710)
+CCOMMON_OPT += -march=armv8.2-a+sve -mtune=cortexa710
+ifneq ($(F_COMPILER), NAG)
+FCOMMON_OPT += -march=armv8.2-a+sve -mtune=cortexa710
+endif
+endif
+endif
+
endif
endif
CORTEXA57
CORTEXA72
CORTEXA73
+CORTEXA510
+CORTEXA710
+CORTEXX1
+CORTEXX2
NEOVERSEN1
NEOVERSEV1
NEOVERSEN2
TSV110
THUNDERX3T110
VORTEX
+A64FX
+ARMV8SVE
+FT2000
9.System Z:
ZARCH_GENERIC
}
$cross = 1 if ($os ne $hostos);
+$cross = 0 if (($os eq "Android") && ($hostos eq "Linux") && ($ENV{TERMUX_APP_PID} != ""));
$openmp = "" if $ENV{USE_OPENMP} != 1;
#define CPU_NEOVERSEN1 11
#define CPU_NEOVERSEV1 16
#define CPU_NEOVERSEN2 17
+#define CPU_CORTEXX1 18
+#define CPU_CORTEXX2 19
+#define CPU_CORTEXA510 20
+#define CPU_CORTEXA710 21
// Qualcomm
#define CPU_FALKOR 6
// Cavium
#define CPU_VORTEX 13
// Fujitsu
#define CPU_A64FX 15
+// Phytium
+#define CPU_FT2000 22
static char *cpuname[] = {
"UNKNOWN",
"TSV110",
"EMAG8180",
"NEOVERSEN1",
- "NEOVERSEV1"
- "NEOVERSEN2"
"THUNDERX3T110",
"VORTEX",
"CORTEXA55",
- "A64FX"
+ "A64FX",
+ "NEOVERSEV1",
+ "NEOVERSEN2",
+ "CORTEXX1",
+ "CORTEXX2",
+ "CORTEXA510",
+ "CORTEXA710",
+ "FT2000"
};
static char *cpuname_lower[] = {
"tsv110",
"emag8180",
"neoversen1",
- "neoversev1",
- "neoversen2",
"thunderx3t110",
"vortex",
"cortexa55",
- "a64fx"
+ "a64fx",
+ "neoversev1",
+ "neoversen2",
+ "cortexx1",
+ "cortexx2",
+ "cortexa510",
+ "cortexa710",
+ "ft2000"
};
int get_feature(char *search)
return CPU_NEOVERSEN2;
else if (strstr(cpu_part, "0xd05"))
return CPU_CORTEXA55;
+ else if (strstr(cpu_part, "0xd46"))
+ return CPU_CORTEXA510;
+ else if (strstr(cpu_part, "0xd47"))
+ return CPU_CORTEXA710;
+ else if (strstr(cpu_part, "0xd44"))
+ return CPU_CORTEXX1;
+ else if (strstr(cpu_part, "0xd4c"))
+ return CPU_CORTEXX2;
}
// Qualcomm
else if (strstr(cpu_implementer, "0x51") && strstr(cpu_part, "0xc00"))
// Fujitsu
else if (strstr(cpu_implementer, "0x46") && strstr(cpu_part, "0x001"))
return CPU_A64FX;
+ // Apple
+ else if (strstr(cpu_implementer, "0x61") && strstr(cpu_part, "0x022"))
+ return CPU_VORTEX;
+ // Phytium
+ else if (strstr(cpu_implementer, "0x70") && (strstr(cpu_part, "0x660") || strstr(cpu_part, "0x661")
+ || strstr(cpu_part, "0x662") || strstr(cpu_part, "0x663")))
+ return CPU_FT2000;
}
p = (char *) NULL ;
printf("#define DTB_DEFAULT_ENTRIES 48\n");
printf("#define DTB_SIZE 4096\n");
break;
-
+ case CPU_CORTEXA510:
+ case CPU_CORTEXA710:
+ case CPU_CORTEXX1:
+ case CPU_CORTEXX2:
+ printf("#define ARMV9\n");
+ printf("#define %s\n", cpuname[d]);
+ printf("#define L1_CODE_SIZE 65536\n");
+ printf("#define L1_CODE_LINESIZE 64\n");
+ printf("#define L1_CODE_ASSOCIATIVE 4\n");
+ printf("#define L1_DATA_SIZE 65536\n");
+ printf("#define L1_DATA_LINESIZE 64\n");
+ printf("#define L1_DATA_ASSOCIATIVE 4\n");
+ printf("#define L2_SIZE 1048576\n");
+ printf("#define L2_LINESIZE 64\n");
+ printf("#define L2_ASSOCIATIVE 8\n");
+ printf("#define DTB_DEFAULT_ENTRIES 64\n");
+ printf("#define DTB_SIZE 4096\n");
+ break;
case CPU_FALKOR:
printf("#define FALKOR\n");
printf("#define L1_CODE_SIZE 65536\n");
printf("#define DTB_DEFAULT_ENTRIES 64 \n");
printf("#define DTB_SIZE 4096 \n");
break;
-#ifdef __APPLE__
case CPU_VORTEX:
printf("#define VORTEX \n");
+#ifdef __APPLE__
sysctlbyname("hw.l1icachesize",&value64,&length64,NULL,0);
printf("#define L1_CODE_SIZE %lld \n",value64);
sysctlbyname("hw.cachelinesize",&value64,&length64,NULL,0);
printf("#define L1_DATA_SIZE %lld \n",value64);
sysctlbyname("hw.l2cachesize",&value64,&length64,NULL,0);
printf("#define L2_SIZE %lld \n",value64);
+#endif
printf("#define DTB_DEFAULT_ENTRIES 64 \n");
printf("#define DTB_SIZE 4096 \n");
break;
-#endif
case CPU_A64FX:
printf("#define A64FX\n");
printf("#define L1_CODE_SIZE 65535\n");
printf("#define DTB_DEFAULT_ENTRIES 64\n");
printf("#define DTB_SIZE 4096\n");
break;
+ case CPU_FT2000:
+ printf("#define FT2000\n");
+ printf("#define L1_CODE_SIZE 32768\n");
+ printf("#define L1_DATA_SIZE 32768\n");
+ printf("#define L1_DATA_LINESIZE 64\n");
+ printf("#define L2_SIZE 33554432\n");
+ printf("#define L2_LINESIZE 64\n");
+ printf("#define DTB_DEFAULT_ENTRIES 64\n");
+ printf("#define DTB_SIZE 4096\n");
+ break;
}
get_cpucount();
}
"-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8"
#define LIBNAME "cortexa53"
#define CORENAME "CORTEXA53"
-#else
#endif
#ifdef FORCE_CORTEXA57
"-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8"
#define LIBNAME "cortexa57"
#define CORENAME "CORTEXA57"
-#else
#endif
#ifdef FORCE_CORTEXA72
"-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8"
#define LIBNAME "cortexa72"
#define CORENAME "CORTEXA72"
-#else
#endif
#ifdef FORCE_CORTEXA73
"-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8"
#define LIBNAME "cortexa73"
#define CORENAME "CORTEXA73"
-#else
+#endif
+
+#ifdef FORCE_CORTEXX1
+#define FORCE
+#define ARCHITECTURE "ARM64"
+#define SUBARCHITECTURE "CORTEXX1"
+#define SUBDIRNAME "arm64"
+#define ARCHCONFIG "-DCORTEXX1 " \
+ "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 " \
+ "-DL2_SIZE=262144 -DL2_LINESIZE=64 " \
+ "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=32 " \
+ "-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DHAVE_SVE -DARMV8 -DARMV9"
+#define LIBNAME "cortexx1"
+#define CORENAME "CORTEXX1"
+#endif
+
+#ifdef FORCE_CORTEXX2
+#define FORCE
+#define ARCHITECTURE "ARM64"
+#define SUBARCHITECTURE "CORTEXX2"
+#define SUBDIRNAME "arm64"
+#define ARCHCONFIG "-DCORTEXX2 " \
+ "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 " \
+ "-DL2_SIZE=262144 -DL2_LINESIZE=64 " \
+ "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=32 " \
+ "-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DHAVE_SVE -DARMV8 -DARMV9"
+#define LIBNAME "cortexx2"
+#define CORENAME "CORTEXX2"
+#endif
+
+#ifdef FORCE_CORTEXA510
+#define FORCE
+#define ARCHITECTURE "ARM64"
+#define SUBARCHITECTURE "CORTEXA510"
+#define SUBDIRNAME "arm64"
+#define ARCHCONFIG "-DCORTEXA510 " \
+ "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 " \
+ "-DL2_SIZE=262144 -DL2_LINESIZE=64 " \
+ "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=32 " \
+ "-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DHAVE_SVE -DARMV8 -DARMV9"
+#define LIBNAME "cortexa510"
+#define CORENAME "CORTEXA510"
+#endif
+
+#ifdef FORCE_CORTEXA710
+#define FORCE
+#define ARCHITECTURE "ARM64"
+#define SUBARCHITECTURE "CORTEXA710"
+#define SUBDIRNAME "arm64"
+#define ARCHCONFIG "-DCORTEXA710 " \
+ "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 " \
+ "-DL2_SIZE=262144 -DL2_LINESIZE=64 " \
+ "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=32 " \
+ "-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DHAVE_SVE -DARMV8 -DARMV9"
+#define LIBNAME "cortexa710"
+#define CORENAME "CORTEXA710"
#endif
#ifdef FORCE_NEOVERSEN1
"-march=armv8.2-a -mtune=neoverse-n1"
#define LIBNAME "neoversen1"
#define CORENAME "NEOVERSEN1"
-#else
#endif
#ifdef FORCE_NEOVERSEV1
"-march=armv8.4-a -mtune=neoverse-v1"
#define LIBNAME "neoversev1"
#define CORENAME "NEOVERSEV1"
-#else
#endif
"-march=armv8.5-a -mtune=neoverse-n2"
#define LIBNAME "neoversen2"
#define CORENAME "NEOVERSEN2"
-#else
#endif
#ifdef FORCE_CORTEXA55
"-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8"
#define LIBNAME "cortexa55"
#define CORENAME "CORTEXA55"
-#else
#endif
#ifdef FORCE_FALKOR
"-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8"
#define LIBNAME "falkor"
#define CORENAME "FALKOR"
-#else
#endif
#ifdef FORCE_THUNDERX
"-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8"
#define LIBNAME "thunderx"
#define CORENAME "THUNDERX"
-#else
#endif
#ifdef FORCE_THUNDERX2T99
"-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8"
#define LIBNAME "thunderx2t99"
#define CORENAME "THUNDERX2T99"
-#else
#endif
#ifdef FORCE_TSV110
"-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8"
#define LIBNAME "tsv110"
#define CORENAME "TSV110"
-#else
#endif
#ifdef FORCE_EMAG8180
"-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8"
#define LIBNAME "thunderx3t110"
#define CORENAME "THUNDERX3T110"
-#else
#endif
#ifdef FORCE_VORTEX
"-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DHAVE_SVE -DARMV8"
#define LIBNAME "a64fx"
#define CORENAME "A64FX"
-#else
+#endif
+
+#ifdef FORCE_FT2000
+#define ARMV8
+#define FORCE
+#define ARCHITECTURE "ARM64"
+#define SUBARCHITECTURE "FT2000"
+#define SUBDIRNAME "arm64"
+#define ARCHCONFIG "-DFT2000 " \
+ "-DL1_CODE_SIZE=32768 -DL1_CODE_LINESIZE=64 -DL1_CODE_ASSOCIATIVE=8 " \
+ "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 -DL1_DATA_ASSOCIATIVE=8 " \
+ "-DL2_SIZE=33554426-DL2_LINESIZE=64 -DL2_ASSOCIATIVE=8 " \
+ "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \
+ "-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8"
+#define LIBNAME "ft2000"
+#define CORENAME "FT2000"
#endif
#ifdef FORCE_ZARCH_GENERIC
#if defined(CORTEXA57) || \
defined(CORTEXA72) || defined(CORTEXA73) || \
- defined(FALKOR) || defined(TSV110) || defined(EMAG8180) || defined(VORTEX)
+ defined(FALKOR) || defined(TSV110) || defined(EMAG8180) || defined(VORTEX) || defined(FT2000)
#define SGEMM_DEFAULT_UNROLL_M 16
#define SGEMM_DEFAULT_UNROLL_N 4
#define CGEMM_DEFAULT_R 4096
#define ZGEMM_DEFAULT_R 4096
-#elif defined(ARMV8SVE) || defined(A64FX)
+#elif defined(ARMV8SVE) || defined(A64FX) || defined(ARMV9) || defined(CORTEXA510)
/* When all BLAS3 routines are implemeted with SVE, SGEMM_DEFAULT_UNROLL_M should be "sve_vl".
Until then, just keep it different than DGEMM_DEFAULT_UNROLL_N to keep copy routines in both directions seperated. */