ARM64: Add the VULCAN Target
authorAshwin Sekhar T K <ashwin.sekhar@cavium.com>
Tue, 4 Oct 2016 08:50:20 +0000 (01:50 -0700)
committerAshwin Sekhar T K <ashwin.sekhar@cavium.com>
Tue, 10 Jan 2017 09:31:17 +0000 (15:01 +0530)
Makefile.arm64
TargetList.txt
common_macro.h
cpuid_arm64.c
driver/others/memory.c
driver/others/parameter.c
getarch.c
kernel/arm64/KERNEL.VULCAN [new file with mode: 0644]
param.h

index b517016..33e76d0 100644 (file)
@@ -9,3 +9,8 @@ CCOMMON_OPT += -march=armv8-a+crc+crypto+fp+simd -mtune=cortex-a57
 FCOMMON_OPT += -march=armv8-a+crc+crypto+fp+simd -mtune=cortex-a57
 endif
 
+ifeq ($(CORE), VULCAN)
+CCOMMON_OPT += -mtune=vulcan -mcpu=vulcan
+FCOMMON_OPT += -mtune=vulcan -mcpu=vulcan
+endif
+
index 52a60b4..904dbe8 100644 (file)
@@ -80,4 +80,5 @@ ARMV5
 8.ARM 64-bit CPU:
 ARMV8
 CORTEXA57
+VULCAN
 
index 4976e76..15ba6f9 100644 (file)
 #endif
 
 #ifndef ASSEMBLER
-#if defined(ARCH_X86) || defined(ARCH_X86_64) || defined(ARCH_IA64) || defined(ARCH_MIPS64)
+#if defined(ARCH_X86) || defined(ARCH_X86_64) || defined(ARCH_IA64) || defined(ARCH_MIPS64) || defined(ARCH_ARM64)
 extern BLASLONG gemm_offset_a;
 extern BLASLONG gemm_offset_b;
 extern BLASLONG sgemm_p;
index 506c9d0..f074f9b 100644 (file)
 #define CPU_UNKNOWN            0
 #define CPU_ARMV8              1
 #define CPU_CORTEXA57          2
+#define CPU_VULCAN             3
 
 static char *cpuname[] = {
   "UNKNOWN",
   "ARMV8" ,
   "CORTEXA57"
+  "VULCAN"
 };
 
 static char *cpuname_lower[] = {
   "unknown",
   "armv8" ,
   "cortexa57"
+  "vulcan"
 };
 
 int get_feature(char *search)
@@ -85,25 +88,27 @@ int detect(void)
 #ifdef linux
 
        FILE *infile;
-       char buffer[512], *p;
+       char buffer[512], *p, *cpu_part, *cpu_implementer;
        p = (char *) NULL ;
 
        infile = fopen("/proc/cpuinfo", "r");
-       while (fgets(buffer, sizeof(buffer), infile))
-       {
+       while (fgets(buffer, sizeof(buffer), infile)) {
 
-               if (!strncmp("CPU part", buffer, 8))
-               {
-                       p = strchr(buffer, ':') + 2;
+               if (!strncmp("CPU part", buffer, 8)) {
+                       cpu_part = strchr(buffer, ':') + 2;
+                       break;
+               } else if (!strncmp("CPU implementer", buffer, 15)) {
+                       cpu_implementer = strchr(buffer, ':') + 2;
                        break;
                }
        }
 
        fclose(infile);
-       if(p != NULL) {
-         if (strstr(p, "0xd07")) {
-           return CPU_CORTEXA57;
-         }
+       if(cpu_part != NULL && cpu_implementer != NULL) {
+               if (strstr(cpu_part, "0xd07") && strstr(cpu_implementer, "0x41"))
+                       return CPU_CORTEXA57;
+               else if (strstr(cpu_part, "0x516") && strstr(cpu_implementer, "0x42"))
+                       return CPU_VULCAN;
        }
 
        p = (char *) NULL ;
@@ -176,6 +181,28 @@ void get_cpuconfig(void)
                        printf("#define L2_ASSOCIATIVE 4\n");
                        break;
 
+               case CPU_VULCAN:
+                       printf("#define VULCAN                        \n");
+                       printf("#define HAVE_VFP                      \n");
+                       printf("#define HAVE_VFPV3                    \n");
+                       printf("#define HAVE_NEON                     \n");
+                       printf("#define HAVE_VFPV4                    \n");
+                       printf("#define L1_CODE_SIZE         32768    \n");
+                       printf("#define L1_CODE_LINESIZE     64       \n");
+                       printf("#define L1_CODE_ASSOCIATIVE  8        \n");
+                       printf("#define L1_DATA_SIZE         32768    \n");
+                       printf("#define L1_DATA_LINESIZE     64       \n");
+                       printf("#define L1_DATA_ASSOCIATIVE  8        \n");
+                       printf("#define L2_SIZE              262144   \n");
+                       printf("#define L2_LINESIZE          64       \n");
+                       printf("#define L2_ASSOCIATIVE       8        \n");
+                       printf("#define L3_SIZE              33554432 \n");
+                       printf("#define L3_LINESIZE          64       \n");
+                       printf("#define L3_ASSOCIATIVE       32       \n");
+                       printf("#define DTB_DEFAULT_ENTRIES  64       \n");
+                       printf("#define DTB_SIZE             4096     \n");
+                       break;
+
                case CPU_CORTEXA57:
                        printf("#define CORTEXA57\n");
                        printf("#define HAVE_VFP\n");
@@ -191,8 +218,8 @@ void get_cpuconfig(void)
                        printf("#define L2_SIZE 2097152\n");
                        printf("#define L2_LINESIZE 64\n");
                        printf("#define L2_ASSOCIATIVE 16\n");
-                       printf("#define DTB_DEFAULT_ENTRIES 64\n");
-                       printf("#define DTB_SIZE 4096\n");
+                       printf("#define DTB_DEFAULT_ENTRIES 64\n");
+                       printf("#define DTB_SIZE 4096\n");
                        break;
        }
 }
index b3c850a..0ac44f6 100644 (file)
@@ -995,7 +995,7 @@ void *blas_memory_alloc(int procpos){
     if (!blas_num_threads) blas_cpu_number = blas_get_cpu_number();
 #endif
 
-#if defined(ARCH_X86) || defined(ARCH_X86_64) || defined(ARCH_IA64) || defined(ARCH_MIPS64)
+#if defined(ARCH_X86) || defined(ARCH_X86_64) || defined(ARCH_IA64) || defined(ARCH_MIPS64) || defined(ARCH_ARM64)
 #ifndef DYNAMIC_ARCH
     blas_set_parameter();
 #endif
index f22c6b6..60961b7 100644 (file)
@@ -727,3 +727,16 @@ void blas_set_parameter(void){
 
 }
 #endif
+
+#if defined(ARCH_ARM64)
+
+void blas_set_parameter(void)
+{
+#if defined(VULCAN)
+  dgemm_p = 160;
+  dgemm_q = 128;
+  dgemm_r = 4096;
+#endif
+}
+
+#endif
index a91d42c..41c181e 100644 (file)
--- a/getarch.c
+++ b/getarch.c
@@ -897,6 +897,23 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #else
 #endif
 
+#ifdef FORCE_VULCAN
+#define FORCE
+#define ARCHITECTURE    "ARM64"
+#define SUBARCHITECTURE "ARMV8"
+#define SUBDIRNAME      "arm64"
+#define ARCHCONFIG   "-DVULCAN " \
+       "-DL1_CODE_SIZE=32768 -DL1_CODE_LINESIZE=64 -DL1_CODE_ASSOCIATIVE=8 " \
+       "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 -DL1_DATA_ASSOCIATIVE=8 " \
+       "-DL2_SIZE=262144 -DL2_LINESIZE=64 -DL2_ASSOCIATIVE=8 " \
+       "-DL3_SIZE=33554432 -DL3_LINESIZE=64 -DL3_ASSOCIATIVE=32 " \
+       "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \
+       "-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON"
+#define LIBNAME   "vulcan"
+#define CORENAME  "VULCAN"
+#else
+#endif
+
 #ifndef FORCE
 
 #if defined(__powerpc__) || defined(__powerpc) || defined(powerpc) || \
diff --git a/kernel/arm64/KERNEL.VULCAN b/kernel/arm64/KERNEL.VULCAN
new file mode 100644 (file)
index 0000000..3372529
--- /dev/null
@@ -0,0 +1,2 @@
+include $(KERNELDIR)/KERNEL.CORTEXA57
+
diff --git a/param.h b/param.h
index d28c63a..3860f43 100644 (file)
--- a/param.h
+++ b/param.h
@@ -2306,6 +2306,44 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #define SYMV_P 16
 #endif
 
+#if defined(VULCAN)
+#define SNUMOPT                2
+#define DNUMOPT                2
+
+#define GEMM_DEFAULT_OFFSET_A 0
+#define GEMM_DEFAULT_OFFSET_B 0
+#define GEMM_DEFAULT_ALIGN 0x03fffUL
+
+#define SGEMM_DEFAULT_UNROLL_M  16
+#define SGEMM_DEFAULT_UNROLL_N  4
+
+#define DGEMM_DEFAULT_UNROLL_M  8
+#define DGEMM_DEFAULT_UNROLL_N  4
+
+#define CGEMM_DEFAULT_UNROLL_M  8
+#define CGEMM_DEFAULT_UNROLL_N  4
+
+#define ZGEMM_DEFAULT_UNROLL_M  4
+#define ZGEMM_DEFAULT_UNROLL_N  4
+
+#define SGEMM_DEFAULT_P        512
+#define DGEMM_DEFAULT_P        dgemm_p
+#define CGEMM_DEFAULT_P 256
+#define ZGEMM_DEFAULT_P 128
+
+#define SGEMM_DEFAULT_Q 1024
+#define DGEMM_DEFAULT_Q dgemm_q
+#define CGEMM_DEFAULT_Q 512
+#define ZGEMM_DEFAULT_Q 512
+
+#define SGEMM_DEFAULT_R 4096
+#define DGEMM_DEFAULT_R dgemm_r
+#define CGEMM_DEFAULT_R 4096
+#define ZGEMM_DEFAULT_R 2048
+
+#define SYMV_P 16
+#endif
+
 
 #if defined(CORTEXA57)
 #define SNUMOPT                2