CCOMMON_OPT += -mtune=thunderx -mcpu=thunderx
FCOMMON_OPT += -mtune=thunderx -mcpu=thunderx
endif
+
+ifeq ($(CORE), THUNDERX2T99)
+CCOMMON_OPT += -mtune=vulcan -mcpu=vulcan
+FCOMMON_OPT += -mtune=vulcan -mcpu=vulcan
+endif
CORTEXA57
VULCAN
THUNDERX
+THUNDERX2T99
#define CPU_CORTEXA57 2
#define CPU_VULCAN 3
#define CPU_THUNDERX 4
+#define CPU_THUNDERX2T99 5
static char *cpuname[] = {
"UNKNOWN",
"ARMV8" ,
"CORTEXA57",
"VULCAN",
- "THUNDERX"
+ "THUNDERX",
+ "THUNDERX2T99"
};
static char *cpuname_lower[] = {
"armv8" ,
"cortexa57",
"vulcan",
- "thunderx"
+ "thunderx",
+ "thunderx2t99"
};
int get_feature(char *search)
return CPU_VULCAN;
else if (strstr(cpu_part, "0x0a1") && strstr(cpu_implementer, "0x43"))
return CPU_THUNDERX;
+ else if (strstr(cpu_part, "0xFFF") && strstr(cpu_implementer, "0x43")) /* TODO */
+ return CPU_THUNDERX2T99;
}
p = (char *) NULL ;
printf("#define DTB_SIZE 4096\n");
printf("#define L2_ASSOCIATIVE 16\n");
break;
+
+ case CPU_THUNDERX2T99:
+ printf("#define VULCAN \n");
+ printf("#define HAVE_VFP \n");
+ printf("#define HAVE_VFPV3 \n");
+ printf("#define HAVE_NEON \n");
+ printf("#define HAVE_VFPV4 \n");
+ printf("#define L1_CODE_SIZE 32768 \n");
+ printf("#define L1_CODE_LINESIZE 64 \n");
+ printf("#define L1_CODE_ASSOCIATIVE 8 \n");
+ printf("#define L1_DATA_SIZE 32768 \n");
+ printf("#define L1_DATA_LINESIZE 64 \n");
+ printf("#define L1_DATA_ASSOCIATIVE 8 \n");
+ printf("#define L2_SIZE 262144 \n");
+ printf("#define L2_LINESIZE 64 \n");
+ printf("#define L2_ASSOCIATIVE 8 \n");
+ printf("#define L3_SIZE 33554432 \n");
+ printf("#define L3_LINESIZE 64 \n");
+ printf("#define L3_ASSOCIATIVE 32 \n");
+ printf("#define DTB_DEFAULT_ENTRIES 64 \n");
+ printf("#define DTB_SIZE 4096 \n");
+ break;
}
}
#if defined(ARCH_ARM64)
-#if defined(VULCAN)
-unsigned long vulcan_pre_a;
-unsigned long vulcan_pre_b;
-unsigned long vulcan_pre_c;
+#if defined(VULCAN) || defined(THUNDERX2T99)
+unsigned long dgemm_prefetch_size_a;
+unsigned long dgemm_prefetch_size_b;
+unsigned long dgemm_prefetch_size_c;
#endif
void blas_set_parameter(void)
{
-#if defined(VULCAN)
+#if defined(VULCAN) || defined(THUNDERX2T99)
dgemm_p = 160;
dgemm_q = 128;
dgemm_r = 4096;
- vulcan_pre_a = 3584;
- vulcan_pre_b = 512;
- vulcan_pre_c = 128;
+ dgemm_prefetch_size_a = 3584;
+ dgemm_prefetch_size_b = 512;
+ dgemm_prefetch_size_c = 128;
#endif
}
#else
#endif
+#ifdef FORCE_THUNDERX2T99
+#define FORCE
+#define ARCHITECTURE "ARM64"
+#define SUBARCHITECTURE "THUNDERX2T99"
+#define SUBDIRNAME "arm64"
+#define ARCHCONFIG "-DTHUNDERX2T99 " \
+ "-DL1_CODE_SIZE=32768 -DL1_CODE_LINESIZE=64 -DL1_CODE_ASSOCIATIVE=8 " \
+ "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 -DL1_DATA_ASSOCIATIVE=8 " \
+ "-DL2_SIZE=262144 -DL2_LINESIZE=64 -DL2_ASSOCIATIVE=8 " \
+ "-DL3_SIZE=33554432 -DL3_LINESIZE=64 -DL3_ASSOCIATIVE=32 " \
+ "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \
+ "-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON"
+#define LIBNAME "thunderx2t99"
+#define CORENAME "THUNDERX2T99"
+#else
+#endif
+
#ifndef FORCE
#if defined(__powerpc__) || defined(__powerpc) || defined(powerpc) || \
--- /dev/null
+include $(KERNELDIR)/KERNEL.VULCAN
+
prfm PLDL1KEEP, [origPA]
- ldr A_PRE_SIZE, =vulcan_pre_a
+ ldr A_PRE_SIZE, =dgemm_prefetch_size_a
ldr A_PRE_SIZE, [A_PRE_SIZE]
- ldr B_PRE_SIZE, =vulcan_pre_b
+ ldr B_PRE_SIZE, =dgemm_prefetch_size_b
ldr B_PRE_SIZE, [B_PRE_SIZE]
- ldr C_PRE_SIZE, =vulcan_pre_c
+ ldr C_PRE_SIZE, =dgemm_prefetch_size_c
ldr C_PRE_SIZE, [C_PRE_SIZE]
add A_PRE_SIZE_64, A_PRE_SIZE, #64
add B_PRE_SIZE_64, B_PRE_SIZE, #64
#define SYMV_P 16
#endif
+#if defined(THUNDERX2T99)
+#define SNUMOPT 2
+#define DNUMOPT 2
+
+#define GEMM_DEFAULT_OFFSET_A 0
+#define GEMM_DEFAULT_OFFSET_B 0
+#define GEMM_DEFAULT_ALIGN 0x03fffUL
+
+#define SGEMM_DEFAULT_UNROLL_M 16
+#define SGEMM_DEFAULT_UNROLL_N 4
+
+#define DGEMM_DEFAULT_UNROLL_M 8
+#define DGEMM_DEFAULT_UNROLL_N 4
+
+#define CGEMM_DEFAULT_UNROLL_M 8
+#define CGEMM_DEFAULT_UNROLL_N 4
+
+#define ZGEMM_DEFAULT_UNROLL_M 4
+#define ZGEMM_DEFAULT_UNROLL_N 4
+
+#define SGEMM_DEFAULT_P 512
+#define DGEMM_DEFAULT_P dgemm_p
+#define CGEMM_DEFAULT_P 256
+#define ZGEMM_DEFAULT_P 128
+
+#define SGEMM_DEFAULT_Q 1024
+#define DGEMM_DEFAULT_Q dgemm_q
+#define CGEMM_DEFAULT_Q 512
+#define ZGEMM_DEFAULT_Q 512
+
+#define SGEMM_DEFAULT_R 4096
+#define DGEMM_DEFAULT_R dgemm_r
+#define CGEMM_DEFAULT_R 4096
+#define ZGEMM_DEFAULT_R 2048
+
+#define SYMV_P 16
+#endif
+
#if defined(ARMV5)
#define SNUMOPT 2
#define DNUMOPT 2