ARM64: Add THUNDERX3T110 Target
authorAshwin Sekhar T K <asekhar@marvell.com>
Thu, 11 Jun 2020 11:12:49 +0000 (04:12 -0700)
committerAshwin Sekhar T K <asekhar@marvell.com>
Mon, 27 Jul 2020 06:32:24 +0000 (23:32 -0700)
12 files changed:
Makefile.arm64
Makefile.system
TargetList.txt
cmake/arch.cmake
cmake/prebuild.cmake
cpuid_arm64.c
driver/others/dynamic_arm64.c
getarch.c
interface/swap.c
interface/zswap.c
kernel/arm64/KERNEL.THUNDERX3T110 [new file with mode: 0644]
param.h

index a7cd82e..1091edf 100644 (file)
@@ -56,6 +56,16 @@ CCOMMON_OPT += -march=armv8.1-a -mtune=thunderx2t99
 FCOMMON_OPT += -march=armv8.1-a -mtune=thunderx2t99
 endif
 
+ifeq ($(CORE), THUNDERX3T110)
+ifeq ($(GCCVERSIONGTEQ10), 1)
+CCOMMON_OPT += -march=armv8.3-a -mtune=thunderx3t110
+FCOMMON_OPT += -march=armv8.3-a -mtune=thunderx3t110
+else
+CCOMMON_OPT += -march=armv8.1-a -mtune=thunderx2t99
+FCOMMON_OPT += -march=armv8.1-a -mtune=thunderx2t99
+endif
+endif
+
 ifeq ($(GCCVERSIONGTEQ9), 1)
 ifeq ($(CORE), TSV110)
 CCOMMON_OPT += -march=armv8.2-a -mtune=tsv110
index db651ef..d7e71d0 100644 (file)
@@ -578,6 +578,7 @@ DYNAMIC_CORE += THUNDERX
 DYNAMIC_CORE += THUNDERX2T99
 DYNAMIC_CORE += TSV110
 DYNAMIC_CORE += EMAG8180
+DYNAMIC_CORE += THUNDERX3T110
 endif
 
 ifeq ($(ARCH), zarch)
index 4e54e30..8ea2df9 100644 (file)
@@ -96,6 +96,7 @@ FALKOR
 THUNDERX
 THUNDERX2T99
 TSV110
+THUNDERX3T110
 
 9.System Z:
 ZARCH_GENERIC
index d56ba99..5388156 100644 (file)
@@ -45,7 +45,7 @@ endif ()
 
 if (DYNAMIC_ARCH)
   if (ARM64)
-    set(DYNAMIC_CORE ARMV8 CORTEXA53 CORTEXA57 CORTEXA72 CORTEXA73 FALKOR THUNDERX THUNDERX2T99 TSV110 EMAG8180 NEOVERSEN1)
+    set(DYNAMIC_CORE ARMV8 CORTEXA53 CORTEXA57 CORTEXA72 CORTEXA73 FALKOR THUNDERX THUNDERX2T99 TSV110 EMAG8180 NEOVERSEN1 THUNDERX3T110)
   endif ()
   
   if (POWER)
index 3025687..e50483a 100644 (file)
@@ -338,6 +338,33 @@ if (DEFINED CORE AND CMAKE_CROSSCOMPILING AND NOT (${HOST_OS} STREQUAL "WINDOWSS
     set(ZGEMM_UNROLL_M 4)
     set(ZGEMM_UNROLL_N 4)
     set(SYMV_P 16)
+  elseif ("${TCORE}" STREQUAL "THUNDERX3T110")
+    file(APPEND ${TARGET_CONF_TEMP}
+      "#define THUNDERX3T110\n"
+      "#define L1_CODE_SIZE\t65536\n"
+      "#define L1_CODE_LINESIZE\t64\n"
+      "#define L1_CODE_ASSOCIATIVE\t8\n"
+      "#define L1_DATA_SIZE\t65536\n"
+      "#define L1_DATA_LINESIZE\t64\n"
+      "#define L1_DATA_ASSOCIATIVE\t8\n"
+      "#define L2_SIZE\t524288\n"
+      "#define L2_LINESIZE\t64\n"
+      "#define L2_ASSOCIATIVE\t8\n"
+      "#define L3_SIZE\t94371840\n"
+      "#define L3_LINESIZE\t64\n"
+      "#define L3_ASSOCIATIVE\t32\n"
+      "#define DTB_DEFAULT_ENTRIES\t64\n"
+      "#define DTB_SIZE\t4096\n"
+      "#define ARMV8\n")
+    set(SGEMM_UNROLL_M 16)
+    set(SGEMM_UNROLL_N 4)
+    set(DGEMM_UNROLL_M 8)
+    set(DGEMM_UNROLL_N 4)
+    set(CGEMM_UNROLL_M 8)
+    set(CGEMM_UNROLL_N 4)
+    set(ZGEMM_UNROLL_M 4)
+    set(ZGEMM_UNROLL_N 4)
+    set(SYMV_P 16)
   elseif ("${TCORE}" STREQUAL "TSV110")
     file(APPEND ${TARGET_CONF_TEMP}
       "#define ARMV8\n"
index 4103216..6f41be6 100644 (file)
@@ -40,6 +40,7 @@
 // Cavium
 #define CPU_THUNDERX      7
 #define CPU_THUNDERX2T99  8
+#define CPU_THUNDERX3T110 12
 //Hisilicon
 #define CPU_TSV110        9
 // Ampere
@@ -57,7 +58,8 @@ static char *cpuname[] = {
   "THUNDERX2T99",
   "TSV110",
   "EMAG8180",
-  "NEOVERSEN1"
+  "NEOVERSEN1",
+  "THUNDERX3T110"
 };
 
 static char *cpuname_lower[] = {
@@ -72,7 +74,8 @@ static char *cpuname_lower[] = {
   "thunderx2t99",
   "tsv110",
   "emag8180",
-  "neoversen1"
+  "neoversen1",
+  "thunderx3t110"
 };
 
 int get_feature(char *search)
@@ -158,6 +161,8 @@ int detect(void)
                        return CPU_THUNDERX;
     else if (strstr(cpu_implementer, "0x43") && strstr(cpu_part, "0x0af"))
                        return CPU_THUNDERX2T99;
+    else if (strstr(cpu_implementer, "0x43") && strstr(cpu_part, "0x0b8"))
+                       return CPU_THUNDERX3T110;
     // HiSilicon
     else if (strstr(cpu_implementer, "0x48") && strstr(cpu_part, "0xd01"))
                         return CPU_TSV110;
@@ -372,7 +377,25 @@ void get_cpuconfig(void)
        printf("#define L2_LINESIZE 64\n");
        printf("#define DTB_DEFAULT_ENTRIES 64\n");
        printf("#define DTB_SIZE 4096\n");
+                       break;
 
+               case CPU_THUNDERX3T110:
+                       printf("#define THUNDERX3T110                 \n");
+                       printf("#define L1_CODE_SIZE         65536    \n");
+                       printf("#define L1_CODE_LINESIZE     64       \n");
+                       printf("#define L1_CODE_ASSOCIATIVE  8        \n");
+                       printf("#define L1_DATA_SIZE         32768    \n");
+                       printf("#define L1_DATA_LINESIZE     64       \n");
+                       printf("#define L1_DATA_ASSOCIATIVE  8        \n");
+                       printf("#define L2_SIZE              524288   \n");
+                       printf("#define L2_LINESIZE          64       \n");
+                       printf("#define L2_ASSOCIATIVE       8        \n");
+                       printf("#define L3_SIZE              94371840 \n");
+                       printf("#define L3_LINESIZE          64       \n");
+                       printf("#define L3_ASSOCIATIVE       32       \n");
+                       printf("#define DTB_DEFAULT_ENTRIES  64       \n");
+                       printf("#define DTB_SIZE             4096     \n");
+                       break;
        }
        get_cpucount();
 }
index 11ef272..157b033 100644 (file)
@@ -53,10 +53,11 @@ extern gotoblas_t  gotoblas_THUNDERX2T99;
 extern gotoblas_t  gotoblas_TSV110;
 extern gotoblas_t  gotoblas_EMAG8180;
 extern gotoblas_t  gotoblas_NEOVERSEN1;
+extern gotoblas_t  gotoblas_THUNDERX3T110;
 
 extern void openblas_warning(int verbose, const char * msg);
 
-#define NUM_CORETYPES   11
+#define NUM_CORETYPES   12
 
 /*
  * In case asm/hwcap.h is outdated on the build system, make sure
@@ -82,6 +83,7 @@ static char *corename[] = {
   "tsv110",
   "emag8180",
   "neoversen1",
+  "thunderx3t110",
   "unknown"
 };
 
@@ -97,6 +99,7 @@ char *gotoblas_corename(void) {
   if (gotoblas == &gotoblas_TSV110)       return corename[ 8];
   if (gotoblas == &gotoblas_EMAG8180)     return corename[ 9];
   if (gotoblas == &gotoblas_NEOVERSEN1)   return corename[10];
+  if (gotoblas == &gotoblas_THUNDERX3T110) return corename[11];
   return corename[NUM_CORETYPES];
 }
 
@@ -127,6 +130,7 @@ static gotoblas_t *force_coretype(char *coretype) {
     case  8: return (&gotoblas_TSV110);
     case  9: return (&gotoblas_EMAG8180);
     case 10: return (&gotoblas_NEOVERSEN1);
+    case 11: return (&gotoblas_THUNDERX3T110);
   }
   snprintf(message, 128, "Core not found: %s\n", coretype);
   openblas_warning(1, message);
@@ -190,6 +194,8 @@ static gotoblas_t *get_coretype(void) {
           return &gotoblas_THUNDERX;
         case 0x0af: // ThunderX2
           return &gotoblas_THUNDERX2T99;
+        case 0x0b8: // ThunderX3
+          return &gotoblas_THUNDERX3T110;
       }
       break;
     case 0x48: // HiSilicon
index 2cdf772..51c9a84 100644 (file)
--- a/getarch.c
+++ b/getarch.c
@@ -1174,6 +1174,24 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #define CORENAME  "EMAG8180"
 #endif
 
+#ifdef FORCE_THUNDERX3T110
+#define ARMV8
+#define FORCE
+#define ARCHITECTURE    "ARM64"
+#define SUBARCHITECTURE "THUNDERX3T110"
+#define SUBDIRNAME      "arm64"
+#define ARCHCONFIG   "-DTHUNDERX3T110 " \
+       "-DL1_CODE_SIZE=65536 -DL1_CODE_LINESIZE=64 -DL1_CODE_ASSOCIATIVE=8 " \
+       "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 -DL1_DATA_ASSOCIATIVE=8 " \
+       "-DL2_SIZE=524288 -DL2_LINESIZE=64 -DL2_ASSOCIATIVE=8 " \
+       "-DL3_SIZE=94371840 -DL3_LINESIZE=64 -DL3_ASSOCIATIVE=32 " \
+       "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \
+       "-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8"
+#define LIBNAME   "thunderx3t110"
+#define CORENAME  "THUNDERX3T110"
+#else
+#endif
+
 #ifdef FORCE_ZARCH_GENERIC
 #define FORCE
 #define ARCHITECTURE    "ZARCH"
index 17a9868..ea40b1f 100644 (file)
@@ -42,7 +42,7 @@
 #include "functable.h"
 #endif
 
-#if defined(THUNDERX2T99) || defined(VULCAN) || defined(ARMV8)
+#if defined(THUNDERX2T99) || defined(VULCAN) || defined(ARMV8) || defined(THUNDERX3T110)
 // Multithreaded swap gives performance benefits in ThunderX2T99
 #else
 // Disable multi-threading as it does not show any performance
index 372b154..43971b7 100644 (file)
@@ -42,7 +42,7 @@
 #include "functable.h"
 #endif
 
-#if defined(THUNDERX2T99) || defined(VULCAN) || defined(ARMV8)
+#if defined(THUNDERX2T99) || defined(VULCAN) || defined(ARMV8) || defined(THUNDERX3T110)
 // Multithreaded swap gives performance benefits in ThunderX2T99
 #else
 // Disable multi-threading as it does not show any performance
diff --git a/kernel/arm64/KERNEL.THUNDERX3T110 b/kernel/arm64/KERNEL.THUNDERX3T110
new file mode 100644 (file)
index 0000000..a20d0d4
--- /dev/null
@@ -0,0 +1,184 @@
+SAMINKERNEL  = ../arm/amin.c
+DAMINKERNEL  = ../arm/amin.c
+CAMINKERNEL  = ../arm/zamin.c
+ZAMINKERNEL  = ../arm/zamin.c
+
+SMAXKERNEL   = ../arm/max.c
+DMAXKERNEL   = ../arm/max.c
+
+SMINKERNEL   = ../arm/min.c
+DMINKERNEL   = ../arm/min.c
+
+ISAMINKERNEL = ../arm/iamin.c
+IDAMINKERNEL = ../arm/iamin.c
+ICAMINKERNEL = ../arm/izamin.c
+IZAMINKERNEL = ../arm/izamin.c
+
+ISMAXKERNEL  = ../arm/imax.c
+IDMAXKERNEL  = ../arm/imax.c
+
+ISMINKERNEL  = ../arm/imin.c
+IDMINKERNEL  = ../arm/imin.c
+
+STRSMKERNEL_LN =  ../generic/trsm_kernel_LN.c
+STRSMKERNEL_LT =  ../generic/trsm_kernel_LT.c
+STRSMKERNEL_RN =  ../generic/trsm_kernel_RN.c
+STRSMKERNEL_RT =  ../generic/trsm_kernel_RT.c
+
+DTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
+DTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
+DTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
+DTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
+
+CTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
+CTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
+CTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
+CTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
+
+ZTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
+ZTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
+ZTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
+ZTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
+
+SAMAXKERNEL  = amax.S
+DAMAXKERNEL  = amax.S
+CAMAXKERNEL  = zamax.S
+ZAMAXKERNEL  = zamax.S
+
+SAXPYKERNEL  = axpy.S
+DAXPYKERNEL  = daxpy_thunderx2t99.S
+CAXPYKERNEL  = zaxpy.S
+ZAXPYKERNEL  = zaxpy.S
+
+SROTKERNEL   = rot.S
+DROTKERNEL   = rot.S
+CROTKERNEL   = zrot.S
+ZROTKERNEL   = zrot.S
+
+SSCALKERNEL  = scal.S
+DSCALKERNEL  = scal.S
+CSCALKERNEL  = zscal.S
+ZSCALKERNEL  = zscal.S
+
+SGEMVNKERNEL = gemv_n.S
+DGEMVNKERNEL = gemv_n.S
+CGEMVNKERNEL = zgemv_n.S
+ZGEMVNKERNEL = zgemv_n.S
+
+SGEMVTKERNEL = gemv_t.S
+DGEMVTKERNEL = gemv_t.S
+CGEMVTKERNEL = zgemv_t.S
+ZGEMVTKERNEL = zgemv_t.S
+
+STRMMKERNEL    =  strmm_kernel_$(SGEMM_UNROLL_M)x$(SGEMM_UNROLL_N).S
+ifneq ($(SGEMM_UNROLL_M), $(SGEMM_UNROLL_N))
+SGEMMINCOPY    =  ../generic/gemm_ncopy_$(SGEMM_UNROLL_M).c
+SGEMMITCOPY    =  ../generic/gemm_tcopy_$(SGEMM_UNROLL_M).c
+SGEMMINCOPYOBJ =  sgemm_incopy$(TSUFFIX).$(SUFFIX)
+SGEMMITCOPYOBJ =  sgemm_itcopy$(TSUFFIX).$(SUFFIX)
+endif
+SGEMMONCOPY    =  ../generic/gemm_ncopy_$(SGEMM_UNROLL_N).c
+SGEMMOTCOPY    =  ../generic/gemm_tcopy_$(SGEMM_UNROLL_N).c
+SGEMMONCOPYOBJ =  sgemm_oncopy$(TSUFFIX).$(SUFFIX)
+SGEMMOTCOPYOBJ =  sgemm_otcopy$(TSUFFIX).$(SUFFIX)
+
+DTRMMKERNEL    =  dtrmm_kernel_$(DGEMM_UNROLL_M)x$(DGEMM_UNROLL_N).S
+
+ifneq ($(DGEMM_UNROLL_M), $(DGEMM_UNROLL_N))
+
+ifeq ($(DGEMM_UNROLL_M), 8)
+DGEMMINCOPY    =  dgemm_ncopy_$(DGEMM_UNROLL_M).S
+DGEMMITCOPY    =  dgemm_tcopy_$(DGEMM_UNROLL_M).S
+else
+DGEMMINCOPY    =  ../generic/gemm_ncopy_$(DGEMM_UNROLL_M).c
+DGEMMITCOPY    =  ../generic/gemm_tcopy_$(DGEMM_UNROLL_M).c
+endif
+
+DGEMMINCOPYOBJ =  dgemm_incopy$(TSUFFIX).$(SUFFIX)
+DGEMMITCOPYOBJ =  dgemm_itcopy$(TSUFFIX).$(SUFFIX)
+endif
+
+ifeq ($(DGEMM_UNROLL_N), 4)
+DGEMMONCOPY    =  dgemm_ncopy_$(DGEMM_UNROLL_N).S
+DGEMMOTCOPY    =  dgemm_tcopy_$(DGEMM_UNROLL_N).S
+else
+DGEMMONCOPY    =  ../generic/gemm_ncopy_$(DGEMM_UNROLL_N).c
+DGEMMOTCOPY    =  ../generic/gemm_tcopy_$(DGEMM_UNROLL_N).c
+endif
+
+DGEMMONCOPYOBJ =  dgemm_oncopy$(TSUFFIX).$(SUFFIX)
+DGEMMOTCOPYOBJ =  dgemm_otcopy$(TSUFFIX).$(SUFFIX)
+
+CTRMMKERNEL    =  ctrmm_kernel_$(CGEMM_UNROLL_M)x$(CGEMM_UNROLL_N).S
+ifneq ($(CGEMM_UNROLL_M), $(CGEMM_UNROLL_N))
+CGEMMINCOPY    =  ../generic/zgemm_ncopy_$(CGEMM_UNROLL_M).c
+CGEMMITCOPY    =  ../generic/zgemm_tcopy_$(CGEMM_UNROLL_M).c
+CGEMMINCOPYOBJ =  cgemm_incopy$(TSUFFIX).$(SUFFIX)
+CGEMMITCOPYOBJ =  cgemm_itcopy$(TSUFFIX).$(SUFFIX)
+endif
+CGEMMONCOPY    =  ../generic/zgemm_ncopy_$(CGEMM_UNROLL_N).c
+CGEMMOTCOPY    =  ../generic/zgemm_tcopy_$(CGEMM_UNROLL_N).c
+CGEMMONCOPYOBJ =  cgemm_oncopy$(TSUFFIX).$(SUFFIX)
+CGEMMOTCOPYOBJ =  cgemm_otcopy$(TSUFFIX).$(SUFFIX)
+
+ZTRMMKERNEL    =  ztrmm_kernel_$(ZGEMM_UNROLL_M)x$(ZGEMM_UNROLL_N).S
+ifneq ($(ZGEMM_UNROLL_M), $(ZGEMM_UNROLL_N))
+ZGEMMINCOPY    =  ../generic/zgemm_ncopy_$(ZGEMM_UNROLL_M).c
+ZGEMMITCOPY    =  ../generic/zgemm_tcopy_$(ZGEMM_UNROLL_M).c
+ZGEMMINCOPYOBJ =  zgemm_incopy$(TSUFFIX).$(SUFFIX)
+ZGEMMITCOPYOBJ =  zgemm_itcopy$(TSUFFIX).$(SUFFIX)
+endif
+ZGEMMONCOPY    =  ../generic/zgemm_ncopy_$(ZGEMM_UNROLL_N).c
+ZGEMMOTCOPY    =  ../generic/zgemm_tcopy_$(ZGEMM_UNROLL_N).c
+ZGEMMONCOPYOBJ =  zgemm_oncopy$(TSUFFIX).$(SUFFIX)
+ZGEMMOTCOPYOBJ =  zgemm_otcopy$(TSUFFIX).$(SUFFIX)
+
+SASUMKERNEL    = sasum_thunderx2t99.c
+DASUMKERNEL    = dasum_thunderx2t99.c
+CASUMKERNEL    = casum_thunderx2t99.c
+ZASUMKERNEL    = zasum_thunderx2t99.c
+
+SCOPYKERNEL    = copy_thunderx2t99.c
+DCOPYKERNEL    = copy_thunderx2t99.c
+CCOPYKERNEL    = copy_thunderx2t99.c
+ZCOPYKERNEL    = copy_thunderx2t99.c
+
+SSWAPKERNEL    = swap_thunderx2t99.S
+DSWAPKERNEL    = swap_thunderx2t99.S
+CSWAPKERNEL    = swap_thunderx2t99.S
+ZSWAPKERNEL    = swap_thunderx2t99.S
+
+ISAMAXKERNEL   = iamax_thunderx2t99.c
+IDAMAXKERNEL   = iamax_thunderx2t99.c
+ICAMAXKERNEL   = izamax_thunderx2t99.c
+IZAMAXKERNEL   = izamax_thunderx2t99.c
+
+SNRM2KERNEL    = scnrm2_thunderx2t99.c
+CNRM2KERNEL    = scnrm2_thunderx2t99.c
+#DNRM2KERNEL    = dznrm2_thunderx2t99_fast.c
+#ZNRM2KERNEL    = dznrm2_thunderx2t99_fast.c
+DNRM2KERNEL    = dznrm2_thunderx2t99.c
+ZNRM2KERNEL    = dznrm2_thunderx2t99.c
+
+
+DDOTKERNEL     = dot_thunderx2t99.c
+SDOTKERNEL     = dot_thunderx2t99.c
+CDOTKERNEL     = zdot_thunderx2t99.c
+ZDOTKERNEL     = zdot_thunderx2t99.c
+DSDOTKERNEL    = dot.S
+
+ifeq ($(DGEMM_UNROLL_M)x$(DGEMM_UNROLL_N), 8x4)
+DGEMMKERNEL    = dgemm_kernel_8x4_thunderx2t99.S
+endif
+
+ifeq ($(SGEMM_UNROLL_M)x$(SGEMM_UNROLL_N), 16x4)
+SGEMMKERNEL    =  sgemm_kernel_16x4_thunderx2t99.S
+endif
+
+ifeq ($(CGEMM_UNROLL_M)x$(CGEMM_UNROLL_N), 8x4)
+CGEMMKERNEL    =  cgemm_kernel_8x4_thunderx2t99.S
+endif
+
+ifeq ($(ZGEMM_UNROLL_M)x$(ZGEMM_UNROLL_N), 4x4)
+ZGEMMKERNEL    =  zgemm_kernel_4x4_thunderx2t99.S
+endif
diff --git a/param.h b/param.h
index efe0e10..476f237 100644 (file)
--- a/param.h
+++ b/param.h
@@ -2779,6 +2779,35 @@ is a big desktop or server with abundant cache rather than a phone or embedded d
 #define CGEMM_DEFAULT_R 4096
 #define ZGEMM_DEFAULT_R 4096
 
+#elif defined(THUNDERX3T110)
+
+#define SGEMM_DEFAULT_UNROLL_M  16
+#define SGEMM_DEFAULT_UNROLL_N  4
+
+#define DGEMM_DEFAULT_UNROLL_M  8
+#define DGEMM_DEFAULT_UNROLL_N  4
+
+#define CGEMM_DEFAULT_UNROLL_M  8
+#define CGEMM_DEFAULT_UNROLL_N  4
+
+#define ZGEMM_DEFAULT_UNROLL_M  4
+#define ZGEMM_DEFAULT_UNROLL_N  4
+
+#define SGEMM_DEFAULT_P        128
+#define DGEMM_DEFAULT_P        320
+#define CGEMM_DEFAULT_P 128
+#define ZGEMM_DEFAULT_P 128
+
+#define SGEMM_DEFAULT_Q 352
+#define DGEMM_DEFAULT_Q 128
+#define CGEMM_DEFAULT_Q 224
+#define ZGEMM_DEFAULT_Q 112
+
+#define SGEMM_DEFAULT_R 4096
+#define DGEMM_DEFAULT_R 4096
+#define CGEMM_DEFAULT_R 4096
+#define ZGEMM_DEFAULT_R 4096
+
 #elif defined(NEOVERSEN1)
 
 #define SGEMM_DEFAULT_UNROLL_M  16