FCOMMON_OPT += -march=armv8.1-a -mtune=thunderx2t99
endif
+ifeq ($(CORE), THUNDERX3T110)
+ifeq ($(GCCVERSIONGTEQ10), 1)
+CCOMMON_OPT += -march=armv8.3-a -mtune=thunderx3t110
+FCOMMON_OPT += -march=armv8.3-a -mtune=thunderx3t110
+else
+CCOMMON_OPT += -march=armv8.1-a -mtune=thunderx2t99
+FCOMMON_OPT += -march=armv8.1-a -mtune=thunderx2t99
+endif
+endif
+
ifeq ($(GCCVERSIONGTEQ9), 1)
ifeq ($(CORE), TSV110)
CCOMMON_OPT += -march=armv8.2-a -mtune=tsv110
DYNAMIC_CORE += THUNDERX2T99
DYNAMIC_CORE += TSV110
DYNAMIC_CORE += EMAG8180
+DYNAMIC_CORE += THUNDERX3T110
endif
ifeq ($(ARCH), zarch)
THUNDERX
THUNDERX2T99
TSV110
+THUNDERX3T110
9.System Z:
ZARCH_GENERIC
if (DYNAMIC_ARCH)
if (ARM64)
- set(DYNAMIC_CORE ARMV8 CORTEXA53 CORTEXA57 CORTEXA72 CORTEXA73 FALKOR THUNDERX THUNDERX2T99 TSV110 EMAG8180 NEOVERSEN1)
+ set(DYNAMIC_CORE ARMV8 CORTEXA53 CORTEXA57 CORTEXA72 CORTEXA73 FALKOR THUNDERX THUNDERX2T99 TSV110 EMAG8180 NEOVERSEN1 THUNDERX3T110)
endif ()
if (POWER)
set(ZGEMM_UNROLL_M 4)
set(ZGEMM_UNROLL_N 4)
set(SYMV_P 16)
+ elseif ("${TCORE}" STREQUAL "THUNDERX3T110")
+ file(APPEND ${TARGET_CONF_TEMP}
+ "#define THUNDERX3T110\n"
+ "#define L1_CODE_SIZE\t65536\n"
+ "#define L1_CODE_LINESIZE\t64\n"
+ "#define L1_CODE_ASSOCIATIVE\t8\n"
+ "#define L1_DATA_SIZE\t65536\n"
+ "#define L1_DATA_LINESIZE\t64\n"
+ "#define L1_DATA_ASSOCIATIVE\t8\n"
+ "#define L2_SIZE\t524288\n"
+ "#define L2_LINESIZE\t64\n"
+ "#define L2_ASSOCIATIVE\t8\n"
+ "#define L3_SIZE\t94371840\n"
+ "#define L3_LINESIZE\t64\n"
+ "#define L3_ASSOCIATIVE\t32\n"
+ "#define DTB_DEFAULT_ENTRIES\t64\n"
+ "#define DTB_SIZE\t4096\n"
+ "#define ARMV8\n")
+ set(SGEMM_UNROLL_M 16)
+ set(SGEMM_UNROLL_N 4)
+ set(DGEMM_UNROLL_M 8)
+ set(DGEMM_UNROLL_N 4)
+ set(CGEMM_UNROLL_M 8)
+ set(CGEMM_UNROLL_N 4)
+ set(ZGEMM_UNROLL_M 4)
+ set(ZGEMM_UNROLL_N 4)
+ set(SYMV_P 16)
elseif ("${TCORE}" STREQUAL "TSV110")
file(APPEND ${TARGET_CONF_TEMP}
"#define ARMV8\n"
// Cavium
#define CPU_THUNDERX 7
#define CPU_THUNDERX2T99 8
+#define CPU_THUNDERX3T110 12
//Hisilicon
#define CPU_TSV110 9
// Ampere
"THUNDERX2T99",
"TSV110",
"EMAG8180",
- "NEOVERSEN1"
+ "NEOVERSEN1",
+ "THUNDERX3T110"
};
static char *cpuname_lower[] = {
"thunderx2t99",
"tsv110",
"emag8180",
- "neoversen1"
+ "neoversen1",
+ "thunderx3t110"
};
int get_feature(char *search)
return CPU_THUNDERX;
else if (strstr(cpu_implementer, "0x43") && strstr(cpu_part, "0x0af"))
return CPU_THUNDERX2T99;
+ else if (strstr(cpu_implementer, "0x43") && strstr(cpu_part, "0x0b8"))
+ return CPU_THUNDERX3T110;
// HiSilicon
else if (strstr(cpu_implementer, "0x48") && strstr(cpu_part, "0xd01"))
return CPU_TSV110;
printf("#define L2_LINESIZE 64\n");
printf("#define DTB_DEFAULT_ENTRIES 64\n");
printf("#define DTB_SIZE 4096\n");
+ break;
+ case CPU_THUNDERX3T110:
+ printf("#define THUNDERX3T110 \n");
+ printf("#define L1_CODE_SIZE 65536 \n");
+ printf("#define L1_CODE_LINESIZE 64 \n");
+ printf("#define L1_CODE_ASSOCIATIVE 8 \n");
+ printf("#define L1_DATA_SIZE 32768 \n");
+ printf("#define L1_DATA_LINESIZE 64 \n");
+ printf("#define L1_DATA_ASSOCIATIVE 8 \n");
+ printf("#define L2_SIZE 524288 \n");
+ printf("#define L2_LINESIZE 64 \n");
+ printf("#define L2_ASSOCIATIVE 8 \n");
+ printf("#define L3_SIZE 94371840 \n");
+ printf("#define L3_LINESIZE 64 \n");
+ printf("#define L3_ASSOCIATIVE 32 \n");
+ printf("#define DTB_DEFAULT_ENTRIES 64 \n");
+ printf("#define DTB_SIZE 4096 \n");
+ break;
}
get_cpucount();
}
extern gotoblas_t gotoblas_TSV110;
extern gotoblas_t gotoblas_EMAG8180;
extern gotoblas_t gotoblas_NEOVERSEN1;
+extern gotoblas_t gotoblas_THUNDERX3T110;
extern void openblas_warning(int verbose, const char * msg);
-#define NUM_CORETYPES 11
+#define NUM_CORETYPES 12
/*
* In case asm/hwcap.h is outdated on the build system, make sure
"tsv110",
"emag8180",
"neoversen1",
+ "thunderx3t110",
"unknown"
};
if (gotoblas == &gotoblas_TSV110) return corename[ 8];
if (gotoblas == &gotoblas_EMAG8180) return corename[ 9];
if (gotoblas == &gotoblas_NEOVERSEN1) return corename[10];
+ if (gotoblas == &gotoblas_THUNDERX3T110) return corename[11];
return corename[NUM_CORETYPES];
}
case 8: return (&gotoblas_TSV110);
case 9: return (&gotoblas_EMAG8180);
case 10: return (&gotoblas_NEOVERSEN1);
+ case 11: return (&gotoblas_THUNDERX3T110);
}
snprintf(message, 128, "Core not found: %s\n", coretype);
openblas_warning(1, message);
return &gotoblas_THUNDERX;
case 0x0af: // ThunderX2
return &gotoblas_THUNDERX2T99;
+ case 0x0b8: // ThunderX3
+ return &gotoblas_THUNDERX3T110;
}
break;
case 0x48: // HiSilicon
#define CORENAME "EMAG8180"
#endif
+#ifdef FORCE_THUNDERX3T110
+#define ARMV8
+#define FORCE
+#define ARCHITECTURE "ARM64"
+#define SUBARCHITECTURE "THUNDERX3T110"
+#define SUBDIRNAME "arm64"
+#define ARCHCONFIG "-DTHUNDERX3T110 " \
+ "-DL1_CODE_SIZE=65536 -DL1_CODE_LINESIZE=64 -DL1_CODE_ASSOCIATIVE=8 " \
+ "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 -DL1_DATA_ASSOCIATIVE=8 " \
+ "-DL2_SIZE=524288 -DL2_LINESIZE=64 -DL2_ASSOCIATIVE=8 " \
+ "-DL3_SIZE=94371840 -DL3_LINESIZE=64 -DL3_ASSOCIATIVE=32 " \
+ "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \
+ "-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8"
+#define LIBNAME "thunderx3t110"
+#define CORENAME "THUNDERX3T110"
+#else
+#endif
+
#ifdef FORCE_ZARCH_GENERIC
#define FORCE
#define ARCHITECTURE "ZARCH"
#include "functable.h"
#endif
-#if defined(THUNDERX2T99) || defined(VULCAN) || defined(ARMV8)
+#if defined(THUNDERX2T99) || defined(VULCAN) || defined(ARMV8) || defined(THUNDERX3T110)
// Multithreaded swap gives performance benefits in ThunderX2T99
#else
// Disable multi-threading as it does not show any performance
#include "functable.h"
#endif
-#if defined(THUNDERX2T99) || defined(VULCAN) || defined(ARMV8)
+#if defined(THUNDERX2T99) || defined(VULCAN) || defined(ARMV8) || defined(THUNDERX3T110)
// Multithreaded swap gives performance benefits in ThunderX2T99
#else
// Disable multi-threading as it does not show any performance
--- /dev/null
+SAMINKERNEL = ../arm/amin.c
+DAMINKERNEL = ../arm/amin.c
+CAMINKERNEL = ../arm/zamin.c
+ZAMINKERNEL = ../arm/zamin.c
+
+SMAXKERNEL = ../arm/max.c
+DMAXKERNEL = ../arm/max.c
+
+SMINKERNEL = ../arm/min.c
+DMINKERNEL = ../arm/min.c
+
+ISAMINKERNEL = ../arm/iamin.c
+IDAMINKERNEL = ../arm/iamin.c
+ICAMINKERNEL = ../arm/izamin.c
+IZAMINKERNEL = ../arm/izamin.c
+
+ISMAXKERNEL = ../arm/imax.c
+IDMAXKERNEL = ../arm/imax.c
+
+ISMINKERNEL = ../arm/imin.c
+IDMINKERNEL = ../arm/imin.c
+
+STRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
+STRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
+STRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
+STRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
+
+DTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
+DTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
+DTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
+DTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
+
+CTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
+CTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
+CTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
+CTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
+
+ZTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
+ZTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
+ZTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
+ZTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
+
+SAMAXKERNEL = amax.S
+DAMAXKERNEL = amax.S
+CAMAXKERNEL = zamax.S
+ZAMAXKERNEL = zamax.S
+
+SAXPYKERNEL = axpy.S
+DAXPYKERNEL = daxpy_thunderx2t99.S
+CAXPYKERNEL = zaxpy.S
+ZAXPYKERNEL = zaxpy.S
+
+SROTKERNEL = rot.S
+DROTKERNEL = rot.S
+CROTKERNEL = zrot.S
+ZROTKERNEL = zrot.S
+
+SSCALKERNEL = scal.S
+DSCALKERNEL = scal.S
+CSCALKERNEL = zscal.S
+ZSCALKERNEL = zscal.S
+
+SGEMVNKERNEL = gemv_n.S
+DGEMVNKERNEL = gemv_n.S
+CGEMVNKERNEL = zgemv_n.S
+ZGEMVNKERNEL = zgemv_n.S
+
+SGEMVTKERNEL = gemv_t.S
+DGEMVTKERNEL = gemv_t.S
+CGEMVTKERNEL = zgemv_t.S
+ZGEMVTKERNEL = zgemv_t.S
+
+STRMMKERNEL = strmm_kernel_$(SGEMM_UNROLL_M)x$(SGEMM_UNROLL_N).S
+ifneq ($(SGEMM_UNROLL_M), $(SGEMM_UNROLL_N))
+SGEMMINCOPY = ../generic/gemm_ncopy_$(SGEMM_UNROLL_M).c
+SGEMMITCOPY = ../generic/gemm_tcopy_$(SGEMM_UNROLL_M).c
+SGEMMINCOPYOBJ = sgemm_incopy$(TSUFFIX).$(SUFFIX)
+SGEMMITCOPYOBJ = sgemm_itcopy$(TSUFFIX).$(SUFFIX)
+endif
+SGEMMONCOPY = ../generic/gemm_ncopy_$(SGEMM_UNROLL_N).c
+SGEMMOTCOPY = ../generic/gemm_tcopy_$(SGEMM_UNROLL_N).c
+SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX)
+SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX)
+
+DTRMMKERNEL = dtrmm_kernel_$(DGEMM_UNROLL_M)x$(DGEMM_UNROLL_N).S
+
+ifneq ($(DGEMM_UNROLL_M), $(DGEMM_UNROLL_N))
+
+ifeq ($(DGEMM_UNROLL_M), 8)
+DGEMMINCOPY = dgemm_ncopy_$(DGEMM_UNROLL_M).S
+DGEMMITCOPY = dgemm_tcopy_$(DGEMM_UNROLL_M).S
+else
+DGEMMINCOPY = ../generic/gemm_ncopy_$(DGEMM_UNROLL_M).c
+DGEMMITCOPY = ../generic/gemm_tcopy_$(DGEMM_UNROLL_M).c
+endif
+
+DGEMMINCOPYOBJ = dgemm_incopy$(TSUFFIX).$(SUFFIX)
+DGEMMITCOPYOBJ = dgemm_itcopy$(TSUFFIX).$(SUFFIX)
+endif
+
+ifeq ($(DGEMM_UNROLL_N), 4)
+DGEMMONCOPY = dgemm_ncopy_$(DGEMM_UNROLL_N).S
+DGEMMOTCOPY = dgemm_tcopy_$(DGEMM_UNROLL_N).S
+else
+DGEMMONCOPY = ../generic/gemm_ncopy_$(DGEMM_UNROLL_N).c
+DGEMMOTCOPY = ../generic/gemm_tcopy_$(DGEMM_UNROLL_N).c
+endif
+
+DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX)
+DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX)
+
+CTRMMKERNEL = ctrmm_kernel_$(CGEMM_UNROLL_M)x$(CGEMM_UNROLL_N).S
+ifneq ($(CGEMM_UNROLL_M), $(CGEMM_UNROLL_N))
+CGEMMINCOPY = ../generic/zgemm_ncopy_$(CGEMM_UNROLL_M).c
+CGEMMITCOPY = ../generic/zgemm_tcopy_$(CGEMM_UNROLL_M).c
+CGEMMINCOPYOBJ = cgemm_incopy$(TSUFFIX).$(SUFFIX)
+CGEMMITCOPYOBJ = cgemm_itcopy$(TSUFFIX).$(SUFFIX)
+endif
+CGEMMONCOPY = ../generic/zgemm_ncopy_$(CGEMM_UNROLL_N).c
+CGEMMOTCOPY = ../generic/zgemm_tcopy_$(CGEMM_UNROLL_N).c
+CGEMMONCOPYOBJ = cgemm_oncopy$(TSUFFIX).$(SUFFIX)
+CGEMMOTCOPYOBJ = cgemm_otcopy$(TSUFFIX).$(SUFFIX)
+
+ZTRMMKERNEL = ztrmm_kernel_$(ZGEMM_UNROLL_M)x$(ZGEMM_UNROLL_N).S
+ifneq ($(ZGEMM_UNROLL_M), $(ZGEMM_UNROLL_N))
+ZGEMMINCOPY = ../generic/zgemm_ncopy_$(ZGEMM_UNROLL_M).c
+ZGEMMITCOPY = ../generic/zgemm_tcopy_$(ZGEMM_UNROLL_M).c
+ZGEMMINCOPYOBJ = zgemm_incopy$(TSUFFIX).$(SUFFIX)
+ZGEMMITCOPYOBJ = zgemm_itcopy$(TSUFFIX).$(SUFFIX)
+endif
+ZGEMMONCOPY = ../generic/zgemm_ncopy_$(ZGEMM_UNROLL_N).c
+ZGEMMOTCOPY = ../generic/zgemm_tcopy_$(ZGEMM_UNROLL_N).c
+ZGEMMONCOPYOBJ = zgemm_oncopy$(TSUFFIX).$(SUFFIX)
+ZGEMMOTCOPYOBJ = zgemm_otcopy$(TSUFFIX).$(SUFFIX)
+
+SASUMKERNEL = sasum_thunderx2t99.c
+DASUMKERNEL = dasum_thunderx2t99.c
+CASUMKERNEL = casum_thunderx2t99.c
+ZASUMKERNEL = zasum_thunderx2t99.c
+
+SCOPYKERNEL = copy_thunderx2t99.c
+DCOPYKERNEL = copy_thunderx2t99.c
+CCOPYKERNEL = copy_thunderx2t99.c
+ZCOPYKERNEL = copy_thunderx2t99.c
+
+SSWAPKERNEL = swap_thunderx2t99.S
+DSWAPKERNEL = swap_thunderx2t99.S
+CSWAPKERNEL = swap_thunderx2t99.S
+ZSWAPKERNEL = swap_thunderx2t99.S
+
+ISAMAXKERNEL = iamax_thunderx2t99.c
+IDAMAXKERNEL = iamax_thunderx2t99.c
+ICAMAXKERNEL = izamax_thunderx2t99.c
+IZAMAXKERNEL = izamax_thunderx2t99.c
+
+SNRM2KERNEL = scnrm2_thunderx2t99.c
+CNRM2KERNEL = scnrm2_thunderx2t99.c
+#DNRM2KERNEL = dznrm2_thunderx2t99_fast.c
+#ZNRM2KERNEL = dznrm2_thunderx2t99_fast.c
+DNRM2KERNEL = dznrm2_thunderx2t99.c
+ZNRM2KERNEL = dznrm2_thunderx2t99.c
+
+
+DDOTKERNEL = dot_thunderx2t99.c
+SDOTKERNEL = dot_thunderx2t99.c
+CDOTKERNEL = zdot_thunderx2t99.c
+ZDOTKERNEL = zdot_thunderx2t99.c
+DSDOTKERNEL = dot.S
+
+ifeq ($(DGEMM_UNROLL_M)x$(DGEMM_UNROLL_N), 8x4)
+DGEMMKERNEL = dgemm_kernel_8x4_thunderx2t99.S
+endif
+
+ifeq ($(SGEMM_UNROLL_M)x$(SGEMM_UNROLL_N), 16x4)
+SGEMMKERNEL = sgemm_kernel_16x4_thunderx2t99.S
+endif
+
+ifeq ($(CGEMM_UNROLL_M)x$(CGEMM_UNROLL_N), 8x4)
+CGEMMKERNEL = cgemm_kernel_8x4_thunderx2t99.S
+endif
+
+ifeq ($(ZGEMM_UNROLL_M)x$(ZGEMM_UNROLL_N), 4x4)
+ZGEMMKERNEL = zgemm_kernel_4x4_thunderx2t99.S
+endif
#define CGEMM_DEFAULT_R 4096
#define ZGEMM_DEFAULT_R 4096
+#elif defined(THUNDERX3T110)
+
+#define SGEMM_DEFAULT_UNROLL_M 16
+#define SGEMM_DEFAULT_UNROLL_N 4
+
+#define DGEMM_DEFAULT_UNROLL_M 8
+#define DGEMM_DEFAULT_UNROLL_N 4
+
+#define CGEMM_DEFAULT_UNROLL_M 8
+#define CGEMM_DEFAULT_UNROLL_N 4
+
+#define ZGEMM_DEFAULT_UNROLL_M 4
+#define ZGEMM_DEFAULT_UNROLL_N 4
+
+#define SGEMM_DEFAULT_P 128
+#define DGEMM_DEFAULT_P 320
+#define CGEMM_DEFAULT_P 128
+#define ZGEMM_DEFAULT_P 128
+
+#define SGEMM_DEFAULT_Q 352
+#define DGEMM_DEFAULT_Q 128
+#define CGEMM_DEFAULT_Q 224
+#define ZGEMM_DEFAULT_Q 112
+
+#define SGEMM_DEFAULT_R 4096
+#define DGEMM_DEFAULT_R 4096
+#define CGEMM_DEFAULT_R 4096
+#define ZGEMM_DEFAULT_R 4096
+
#elif defined(NEOVERSEN1)
#define SGEMM_DEFAULT_UNROLL_M 16