added target processor STEAMROLLER
authorWerner Saar <wernsaar@googlemail.com>
Sun, 28 Dec 2014 12:16:46 +0000 (20:16 +0800)
committerWerner Saar <wernsaar@googlemail.com>
Sun, 28 Dec 2014 12:16:46 +0000 (20:16 +0800)
19 files changed:
Makefile.rule
Makefile.system
README.md
TargetList.txt
common_x86.h
common_x86_64.h
cpuid.h
cpuid_x86.c
driver/others/dynamic.c
driver/others/parameter.c
getarch.c
kernel/setparam-ref.c
kernel/x86_64/KERNEL.STEAMROLLER [new file with mode: 0644]
kernel/x86_64/ddot.c
kernel/x86_64/sdot.c
kernel/x86_64/sgemv_n_4.c
kernel/x86_64/sgemv_t_4.c
kernel/x86_64/zgemv_t_4.c
param.h

index d3a2d1f..4bd1ab1 100644 (file)
@@ -15,7 +15,7 @@ VERSION = 0.2.13
 # TARGET = PENRYN
 
 # If you want to support multiple architecture in one binary
-DYNAMIC_ARCH = 1
+DYNAMIC_ARCH = 1
 
 # C compiler including binary type(32bit / 64bit). Default is gcc.
 # Don't use Intel Compiler or PGI, it won't generate right codes as I expect.
index ec6339d..e3e2d52 100644 (file)
@@ -61,6 +61,9 @@ endif
 ifeq ($(TARGET), PILEDRIVER)
 GETARCH_FLAGS := -DFORCE_BARCELONA
 endif
+ifeq ($(TARGET), STEAMROLLER)
+GETARCH_FLAGS := -DFORCE_BARCELONA
+endif
 endif
 
 
@@ -85,6 +88,9 @@ endif
 ifeq ($(TARGET_CORE), PILEDRIVER)
 GETARCH_FLAGS := -DFORCE_BARCELONA
 endif
+ifeq ($(TARGET_CORE), STEAMROLLER)
+GETARCH_FLAGS := -DFORCE_BARCELONA
+endif
 endif
 
 
@@ -392,7 +398,7 @@ endif
 ifeq ($(ARCH), x86_64)
 DYNAMIC_CORE = PRESCOTT CORE2 PENRYN DUNNINGTON NEHALEM OPTERON OPTERON_SSE3 BARCELONA BOBCAT ATOM NANO
 ifneq ($(NO_AVX), 1)
-DYNAMIC_CORE += SANDYBRIDGE BULLDOZER PILEDRIVER
+DYNAMIC_CORE += SANDYBRIDGE BULLDOZER PILEDRIVER STEAMROLLER
 endif
 ifneq ($(NO_AVX2), 1)
 DYNAMIC_CORE += HASWELL
index f4c5477..cdacf98 100644 (file)
--- a/README.md
+++ b/README.md
@@ -60,6 +60,7 @@ Please read GotoBLAS_01Readme.txt
 - **AMD Bobcat**: Used GotoBLAS2 Barcelona codes.
 - **AMD Bulldozer**: x86-64 ?GEMM FMA4 kernels. (Thank Werner Saar)
 - **AMD PILEDRIVER**: Uses Bulldozer codes with some optimizations.
+- **AMD STEAMROLLER**: Uses Bulldozer codes with some optimizations.
 
 #### MIPS64:
 - **ICT Loongson 3A**: Optimized Level-3 BLAS and the part of Level-1,2.
index 97661fd..c91401f 100644 (file)
@@ -32,6 +32,7 @@ ISTANBUL
 BOBCAT
 BULLDOZER
 PILEDRIVER
+STEAMROLLER
 
 c)VIA CPU:
 SSE_GENERIC
index f97fd34..9d82090 100644 (file)
@@ -171,7 +171,7 @@ static __inline int blas_quickdivide(unsigned int x, unsigned int y){
 #define MMXSTORE       movd
 #endif
 
-#if defined(PILEDRIVER) || defined(BULLDOZER)
+#if defined(PILEDRIVER) || defined(BULLDOZER) || defined(STEAMROLLER)
 //Enable some optimazation for barcelona.
 #define BARCELONA_OPTIMIZATION
 #endif
index 547614f..e0a6c4c 100644 (file)
@@ -226,7 +226,7 @@ static __inline int blas_quickdivide(unsigned int x, unsigned int y){
 
 #ifdef ASSEMBLER
 
-#if defined(PILEDRIVER) || defined(BULLDOZER)
+#if defined(PILEDRIVER) || defined(BULLDOZER) || defined(STEAMROLLER)
 //Enable some optimazation for barcelona.
 #define BARCELONA_OPTIMIZATION
 #endif
diff --git a/cpuid.h b/cpuid.h
index cb4404c..ab6a3fb 100644 (file)
--- a/cpuid.h
+++ b/cpuid.h
 #define CORE_ATOM      18
 #define CORE_NANO      19
 #define CORE_SANDYBRIDGE 20
-#define CORE_BOBCAT     21
-#define CORE_BULLDOZER  22
+#define CORE_BOBCAT      21
+#define CORE_BULLDOZER   22
 #define CORE_PILEDRIVER  23
-#define CORE_HASWELL 24
+#define CORE_HASWELL     24
+#define CORE_STEAMROLLER 25
 
 #define HAVE_SSE      (1 <<  0)
 #define HAVE_SSE2     (1 <<  1)
@@ -200,6 +201,7 @@ typedef struct {
 #define CPUTYPE_BOBCAT                  45
 #define CPUTYPE_BULLDOZER               46
 #define CPUTYPE_PILEDRIVER              47
-#define CPUTYPE_HASWELL 48
+#define CPUTYPE_HASWELL                48
+#define CPUTYPE_STEAMROLLER            49
 
 #endif
index 44446e5..ef90b26 100644 (file)
@@ -1162,6 +1162,12 @@ int get_cpuname(void){
            return CPUTYPE_PILEDRIVER;
          else
            return CPUTYPE_BARCELONA; //OS don't support AVX.
+       case 0:
+         if(support_avx())
+           return CPUTYPE_STEAMROLLER;
+         else
+           return CPUTYPE_BARCELONA; //OS don't support AVX.
+
        }
        break;
       case  5:
@@ -1290,6 +1296,7 @@ static char *cpuname[] = {
   "BULLDOZER",
   "PILEDRIVER",
   "HASWELL",
+  "STEAMROLLER",
 };
 
 static char *lowercpuname[] = {
@@ -1341,6 +1348,7 @@ static char *lowercpuname[] = {
   "bulldozer",
   "piledriver",
   "haswell",
+  "steamroller",
 };
 
 static char *corename[] = {
@@ -1369,6 +1377,7 @@ static char *corename[] = {
   "BULLDOZER",
   "PILEDRIVER",
   "HASWELL",
+  "STEAMROLLER",
 };
 
 static char *corename_lower[] = {
@@ -1397,6 +1406,7 @@ static char *corename_lower[] = {
   "bulldozer",
   "piledriver",
   "haswell",
+  "steamroller",
 };
 
 
@@ -1562,7 +1572,15 @@ int get_coretype(void){
            return CORE_PILEDRIVER;
          else
            return CORE_BARCELONA; //OS don't support AVX.
+       
+       case 0:
+         if(support_avx())
+           return CORE_STEAMROLLER;
+         else
+           return CORE_BARCELONA; //OS don't support AVX.
        }
+
+
       }else return CORE_BARCELONA;
     }
   }
index 1235df2..6fd1d8c 100644 (file)
@@ -66,6 +66,7 @@ extern gotoblas_t  gotoblas_BOBCAT;
 extern gotoblas_t  gotoblas_SANDYBRIDGE;
 extern gotoblas_t  gotoblas_BULLDOZER;
 extern gotoblas_t  gotoblas_PILEDRIVER;
+extern gotoblas_t  gotoblas_STEAMROLLER;
 #ifdef NO_AVX2
 #define gotoblas_HASWELL gotoblas_SANDYBRIDGE
 #else
@@ -77,6 +78,7 @@ extern gotoblas_t  gotoblas_HASWELL;
 #define gotoblas_HASWELL gotoblas_NEHALEM
 #define gotoblas_BULLDOZER gotoblas_BARCELONA
 #define gotoblas_PILEDRIVER gotoblas_BARCELONA
+#define gotoblas_STEAMROLLER gotoblas_BARCELONA
 #endif
 
 
@@ -275,7 +277,17 @@ static gotoblas_t *get_coretype(void){
            openblas_warning(FALLBACK_VERBOSE, BARCELONA_FALLBACK);
            return &gotoblas_BARCELONA; //OS doesn't support AVX. Use old kernels.
          }
+       }else if(model == 0){
+         //AMD STEAMROLLER
+         if(support_avx())
+           return &gotoblas_STEAMROLLER;
+         else{
+           openblas_warning(FALLBACK_VERBOSE, BARCELONA_FALLBACK);
+           return &gotoblas_BARCELONA; //OS doesn't support AVX. Use old kernels.
+         }
        }
+
+
       } else {
        return &gotoblas_BARCELONA;
       }
@@ -315,6 +327,7 @@ static char *corename[] = {
     "Bulldozer",
     "Piledriver",
     "Haswell",
+    "Steamroller",
 };
 
 char *gotoblas_corename(void) {
@@ -339,6 +352,7 @@ char *gotoblas_corename(void) {
   if (gotoblas == &gotoblas_BULLDOZER)    return corename[18];
   if (gotoblas == &gotoblas_PILEDRIVER)   return corename[19];
   if (gotoblas == &gotoblas_HASWELL)      return corename[20];
+  if (gotoblas == &gotoblas_STEAMROLLER)  return corename[21];
 
   return corename[0];
 }
@@ -370,6 +384,7 @@ static gotoblas_t *force_coretype(char *coretype){
        switch (found)
        {
 
+               case 21: return (&gotoblas_STEAMROLLER);
                case 20: return (&gotoblas_HASWELL);
                case 19: return (&gotoblas_PILEDRIVER);
                case 18: return (&gotoblas_BULLDOZER);
index f0f889a..d741f2f 100644 (file)
@@ -166,7 +166,7 @@ int get_L2_size(void){
 #if defined(ATHLON) || defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER) || \
     defined(CORE_PRESCOTT) || defined(CORE_CORE2) || defined(PENRYN) || defined(DUNNINGTON) || \
     defined(CORE_NEHALEM) || defined(CORE_SANDYBRIDGE) || defined(ATOM) || defined(GENERIC) || \
-    defined(PILEDRIVER) || defined(HASWELL)
+    defined(PILEDRIVER) || defined(HASWELL) || defined(STEAMROLLER)
 
   cpuid(0x80000006, &eax, &ebx, &ecx, &edx);
 
@@ -251,7 +251,7 @@ void blas_set_parameter(void){
 
   env_var_t p;
   int factor;
-#if defined(BULLDOZER) || defined(PILEDRIVER) || defined(SANDYBRIDGE) || defined(NEHALEM) || defined(HASWELL)
+#if defined(BULLDOZER) || defined(PILEDRIVER) || defined(SANDYBRIDGE) || defined(NEHALEM) || defined(HASWELL) || defined(STEAMROLLER)
   int size = 16;
 #else
   int size = get_L2_size();
index 81ab9e3..f6a5ecb 100644 (file)
--- a/getarch.c
+++ b/getarch.c
@@ -432,6 +432,23 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #define CORENAME  "PILEDRIVER"
 #endif
 
+#if defined (FORCE_STEAMROLLER)
+#define FORCE
+#define FORCE_INTEL
+#define ARCHITECTURE    "X86"
+#define SUBARCHITECTURE "STEAMROLLER"
+#define ARCHCONFIG   "-DSTEAMROLLER " \
+                    "-DL1_DATA_SIZE=16384 -DL1_DATA_LINESIZE=64 " \
+                    "-DL2_SIZE=2097152 -DL2_LINESIZE=64  -DL3_SIZE=12582912 " \
+                    "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \
+                    "-DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 -DHAVE_SSE3 -DHAVE_SSE4_1 -DHAVE_SSE4_2 " \
+                    "-DHAVE_SSE4A -DHAVE_MISALIGNSSE -DHAVE_128BITFPU -DHAVE_FASTMOVU -DHAVE_CFLUSH " \
+                     "-DHAVE_AVX -DHAVE_FMA4 -DHAVE_FMA3"
+#define LIBNAME   "steamroller"
+#define CORENAME  "STEAMROLLER"
+#endif
+
+
 #ifdef FORCE_SSE_GENERIC
 #define FORCE
 #define FORCE_INTEL
index 0d7bbd4..1fa7f79 100644 (file)
@@ -941,6 +941,23 @@ static void init_parameter(void) {
 #endif
 #endif
 
+#ifdef STEAMROLLER
+
+#ifdef DEBUG
+  fprintf(stderr, "Steamroller\n");
+#endif
+
+  TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
+  TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
+  TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
+  TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
+#ifdef EXPRECISION
+  TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
+  TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
+#endif
+#endif
+
+
 #ifdef NANO
 
 #ifdef DEBUG
diff --git a/kernel/x86_64/KERNEL.STEAMROLLER b/kernel/x86_64/KERNEL.STEAMROLLER
new file mode 100644 (file)
index 0000000..55285e3
--- /dev/null
@@ -0,0 +1,76 @@
+SGEMVNKERNEL = sgemv_n_4.c
+SGEMVTKERNEL = sgemv_t_4.c
+
+ZGEMVNKERNEL = zgemv_n_dup.S
+ZGEMVTKERNEL = zgemv_t_4.c
+
+DGEMVNKERNEL = dgemv_n_bulldozer.S
+DGEMVTKERNEL = dgemv_t_bulldozer.S
+
+DDOTKERNEL   = ddot_bulldozer.S
+DCOPYKERNEL  = dcopy_bulldozer.S
+
+SGEMMKERNEL    =  sgemm_kernel_16x2_piledriver.S
+SGEMMINCOPY    =  ../generic/gemm_ncopy_16.c
+SGEMMITCOPY    =  ../generic/gemm_tcopy_16.c
+SGEMMONCOPY    =  gemm_ncopy_2_bulldozer.S
+SGEMMOTCOPY    =  gemm_tcopy_2_bulldozer.S
+SGEMMINCOPYOBJ =  sgemm_incopy$(TSUFFIX).$(SUFFIX)
+SGEMMITCOPYOBJ =  sgemm_itcopy$(TSUFFIX).$(SUFFIX)
+SGEMMONCOPYOBJ =  sgemm_oncopy$(TSUFFIX).$(SUFFIX)
+SGEMMOTCOPYOBJ =  sgemm_otcopy$(TSUFFIX).$(SUFFIX)
+
+DGEMMKERNEL    =  dgemm_kernel_8x2_piledriver.S
+DGEMMINCOPY    =  dgemm_ncopy_8_bulldozer.S
+DGEMMITCOPY    =  dgemm_tcopy_8_bulldozer.S
+DGEMMONCOPY    =  gemm_ncopy_2_bulldozer.S
+DGEMMOTCOPY    =  gemm_tcopy_2_bulldozer.S
+DGEMMINCOPYOBJ =  dgemm_incopy$(TSUFFIX).$(SUFFIX)
+DGEMMITCOPYOBJ =  dgemm_itcopy$(TSUFFIX).$(SUFFIX)
+DGEMMONCOPYOBJ =  dgemm_oncopy$(TSUFFIX).$(SUFFIX)
+DGEMMOTCOPYOBJ =  dgemm_otcopy$(TSUFFIX).$(SUFFIX)
+
+CGEMMKERNEL    =  cgemm_kernel_4x2_piledriver.S
+CGEMMINCOPY    =  ../generic/zgemm_ncopy_4.c
+CGEMMITCOPY    =  ../generic/zgemm_tcopy_4.c
+CGEMMONCOPY    =  ../generic/zgemm_ncopy_2.c
+CGEMMOTCOPY    =  ../generic/zgemm_tcopy_2.c
+CGEMMINCOPYOBJ =  cgemm_incopy$(TSUFFIX).$(SUFFIX)
+CGEMMITCOPYOBJ =  cgemm_itcopy$(TSUFFIX).$(SUFFIX)
+CGEMMONCOPYOBJ =  cgemm_oncopy$(TSUFFIX).$(SUFFIX)
+CGEMMOTCOPYOBJ =  cgemm_otcopy$(TSUFFIX).$(SUFFIX)
+ZGEMMKERNEL    =  zgemm_kernel_2x2_piledriver.S
+ZGEMMINCOPY    =
+ZGEMMITCOPY    =
+ZGEMMONCOPY    =  ../generic/zgemm_ncopy_2.c
+ZGEMMOTCOPY    =  ../generic/zgemm_tcopy_2.c
+ZGEMMINCOPYOBJ =
+ZGEMMITCOPYOBJ =
+ZGEMMONCOPYOBJ =  zgemm_oncopy$(TSUFFIX).$(SUFFIX)
+ZGEMMOTCOPYOBJ =  zgemm_otcopy$(TSUFFIX).$(SUFFIX)
+
+CGEMM3MKERNEL    =  zgemm3m_kernel_8x4_barcelona.S
+ZGEMM3MKERNEL    =  zgemm3m_kernel_4x4_barcelona.S
+
+STRSMKERNEL_LN  =  ../generic/trsm_kernel_LN.c
+STRSMKERNEL_LT  =  ../generic/trsm_kernel_LT.c
+STRSMKERNEL_RN  =  ../generic/trsm_kernel_RN.c
+STRSMKERNEL_RT  =  ../generic/trsm_kernel_RT.c
+
+
+DTRSMKERNEL_LN  = ../generic/trsm_kernel_LN.c
+DTRSMKERNEL_LT  = dtrsm_kernel_LT_8x2_bulldozer.S
+DTRSMKERNEL_RN  = dtrsm_kernel_RN_8x2_bulldozer.S
+DTRSMKERNEL_RT  = ../generic/trsm_kernel_RT.c
+
+CTRSMKERNEL_LN  = ../generic/trsm_kernel_LN.c
+CTRSMKERNEL_LT  = ../generic/trsm_kernel_LT.c
+CTRSMKERNEL_RN  = ../generic/trsm_kernel_RN.c
+CTRSMKERNEL_RT  = ../generic/trsm_kernel_RT.c
+
+ZTRSMKERNEL_LN  = ../generic/trsm_kernel_LN.c
+ZTRSMKERNEL_LT  = ../generic/trsm_kernel_LT.c
+ZTRSMKERNEL_RN  = ../generic/trsm_kernel_RN.c
+ZTRSMKERNEL_RT  = ../generic/trsm_kernel_RT.c
+
+
index b3aad43..d501c2f 100644 (file)
@@ -29,7 +29,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include "common.h"
 
 
-#if defined(BULLDOZER) || defined(PILEDRIVER)
+#if defined(BULLDOZER) || defined(PILEDRIVER) || defined(STEAMROLLER)
 #include "ddot_microk_bulldozer-2.c"
 #elif defined(NEHALEM)
 #include "ddot_microk_nehalem-2.c"
index 632d168..6fec481 100644 (file)
@@ -28,7 +28,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include "common.h"
 
-#if defined(BULLDOZER) || defined(PILEDRIVER)
+#if defined(BULLDOZER) || defined(PILEDRIVER) || defined(STEAMROLLER)
 #include "sdot_microk_bulldozer-2.c"
 #elif defined(NEHALEM)
 #include "sdot_microk_nehalem-2.c"
index a840f8b..930dd26 100644 (file)
@@ -29,7 +29,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include "common.h"
 
 
-#if defined(BULLDOZER) || defined(PILEDRIVER)
+#if defined(BULLDOZER) || defined(PILEDRIVER) || defined(STEAMROLLER)
 #include "sgemv_n_microk_bulldozer-4.c"
 #elif defined(NEHALEM)
 #include "sgemv_n_microk_nehalem-4.c"
index cd13bb6..2bb5809 100644 (file)
@@ -30,7 +30,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #if defined(NEHALEM)
 #include "sgemv_t_microk_nehalem-4.c"
-#elif defined(BULLDOZER) || defined(PILEDRIVER)
+#elif defined(BULLDOZER) || defined(PILEDRIVER) || defined(STEAMROLLER)
 #include "sgemv_t_microk_bulldozer-4.c"
 #elif defined(SANDYBRIDGE)
 #include "sgemv_t_microk_sandy-4.c"
index 84cf4e2..4abb2d5 100644 (file)
@@ -29,7 +29,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include "common.h"
 
 
-#if defined(BULLDOZER) || defined(PILEDRIVER)
+#if defined(BULLDOZER) || defined(PILEDRIVER) || defined(STEAMROLLER)
 #include "zgemv_t_microk_bulldozer-4.c"
 #elif defined(HASWELL)
 #include "zgemv_t_microk_haswell-4.c"
diff --git a/param.h b/param.h
index bce05c9..e3e535b 100644 (file)
--- a/param.h
+++ b/param.h
@@ -406,6 +406,99 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #endif
 
+#ifdef STEAMROLLER
+#define SNUMOPT         8
+#define DNUMOPT         4
+
+#define GEMM_DEFAULT_OFFSET_A  64
+#define GEMM_DEFAULT_OFFSET_B 832
+#define GEMM_DEFAULT_ALIGN 0x0fffUL
+
+
+
+#define QGEMM_DEFAULT_UNROLL_N 2
+#define CGEMM_DEFAULT_UNROLL_N 2
+#define ZGEMM_DEFAULT_UNROLL_N 2
+#define XGEMM_DEFAULT_UNROLL_N 1
+
+#ifdef ARCH_X86
+#define SGEMM_DEFAULT_UNROLL_N 4
+#define DGEMM_DEFAULT_UNROLL_N 4
+#define SGEMM_DEFAULT_UNROLL_M 4
+#define DGEMM_DEFAULT_UNROLL_M 2
+#define QGEMM_DEFAULT_UNROLL_M 2
+#define CGEMM_DEFAULT_UNROLL_M 2
+#define ZGEMM_DEFAULT_UNROLL_M 1
+#define XGEMM_DEFAULT_UNROLL_M 1
+#else
+#define SGEMM_DEFAULT_UNROLL_N 2
+#define DGEMM_DEFAULT_UNROLL_N 2
+#define SGEMM_DEFAULT_UNROLL_M 16
+#define DGEMM_DEFAULT_UNROLL_M 8
+#define QGEMM_DEFAULT_UNROLL_M 2
+#define CGEMM_DEFAULT_UNROLL_M 4
+#define ZGEMM_DEFAULT_UNROLL_M 2
+#define XGEMM_DEFAULT_UNROLL_M 1
+#define CGEMM3M_DEFAULT_UNROLL_N 4
+#define CGEMM3M_DEFAULT_UNROLL_M 8
+#define ZGEMM3M_DEFAULT_UNROLL_N 4
+#define ZGEMM3M_DEFAULT_UNROLL_M 4
+#define GEMV_UNROLL 8
+#endif
+
+#if defined(ARCH_X86_64)
+#define SGEMM_DEFAULT_P 768
+#define DGEMM_DEFAULT_P 768
+#define ZGEMM_DEFAULT_P 384
+#define CGEMM_DEFAULT_P 768
+#else
+#define SGEMM_DEFAULT_P 448
+#define DGEMM_DEFAULT_P 480
+#define ZGEMM_DEFAULT_P 112
+#define CGEMM_DEFAULT_P 224
+#endif
+#define QGEMM_DEFAULT_P 112
+#define XGEMM_DEFAULT_P  56
+
+#if defined(ARCH_X86_64)
+#define SGEMM_DEFAULT_Q 192
+#define DGEMM_DEFAULT_Q 168
+#define ZGEMM_DEFAULT_Q 168
+#define CGEMM_DEFAULT_Q 168
+#else
+#define SGEMM_DEFAULT_Q 224
+#define DGEMM_DEFAULT_Q 224
+#define ZGEMM_DEFAULT_Q 224
+#define CGEMM_DEFAULT_Q 224
+#endif
+#define QGEMM_DEFAULT_Q 224
+#define XGEMM_DEFAULT_Q 224
+
+#define CGEMM3M_DEFAULT_P 448
+#define ZGEMM3M_DEFAULT_P 224
+#define XGEMM3M_DEFAULT_P 112
+#define CGEMM3M_DEFAULT_Q 224
+#define ZGEMM3M_DEFAULT_Q 224
+#define XGEMM3M_DEFAULT_Q 224
+#define CGEMM3M_DEFAULT_R 12288
+#define ZGEMM3M_DEFAULT_R 12288
+#define XGEMM3M_DEFAULT_R 12288
+
+#define SGEMM_DEFAULT_R 12288
+#define QGEMM_DEFAULT_R qgemm_r
+#define DGEMM_DEFAULT_R 12288
+#define CGEMM_DEFAULT_R cgemm_r
+#define ZGEMM_DEFAULT_R zgemm_r
+#define XGEMM_DEFAULT_R xgemm_r
+
+#define SYMV_P  16
+#define HAVE_EXCLUSIVE_CACHE
+
+#define GEMM_THREAD gemm_thread_mn
+
+#endif
+
+
 #ifdef ATHLON
 
 #define SNUMOPT                4