Add AMD Excavator target.
authorZhang Xianyi <traits.zhang@gmail.com>
Wed, 13 May 2015 21:16:30 +0000 (16:16 -0500)
committerZhang Xianyi <traits.zhang@gmail.com>
Wed, 13 May 2015 21:16:30 +0000 (16:16 -0500)
Makefile.system
TargetList.txt
common_x86.h
common_x86_64.h
cpuid.h
cpuid_x86.c
driver/others/dynamic.c
getarch.c
kernel/x86_64/KERNEL.EXCAVATOR [new file with mode: 0644]
param.h

index 65294b9..78eeb12 100644 (file)
@@ -65,6 +65,9 @@ endif
 ifeq ($(TARGET), STEAMROLLER)
 GETARCH_FLAGS := -DFORCE_BARCELONA
 endif
+ifeq ($(TARGET), EXCAVATOR)
+GETARCH_FLAGS := -DFORCE_BARCELONA
+endif
 endif
 
 
@@ -92,6 +95,9 @@ endif
 ifeq ($(TARGET_CORE), STEAMROLLER)
 GETARCH_FLAGS := -DFORCE_BARCELONA
 endif
+ifeq ($(TARGET_CORE), EXCAVATOR)
+GETARCH_FLAGS := -DFORCE_BARCELONA
+endif
 endif
 
 
@@ -409,7 +415,7 @@ endif
 ifeq ($(ARCH), x86_64)
 DYNAMIC_CORE = PRESCOTT CORE2 PENRYN DUNNINGTON NEHALEM OPTERON OPTERON_SSE3 BARCELONA BOBCAT ATOM NANO
 ifneq ($(NO_AVX), 1)
-DYNAMIC_CORE += SANDYBRIDGE BULLDOZER PILEDRIVER STEAMROLLER
+DYNAMIC_CORE += SANDYBRIDGE BULLDOZER PILEDRIVER STEAMROLLER EXCAVATOR
 endif
 ifneq ($(NO_AVX2), 1)
 DYNAMIC_CORE += HASWELL
index 1c98508..0a9d8b4 100644 (file)
@@ -33,6 +33,7 @@ BOBCAT
 BULLDOZER
 PILEDRIVER
 STEAMROLLER
+EXCAVATOR
 
 c)VIA CPU:
 SSE_GENERIC
index 9d82090..99a723f 100644 (file)
@@ -171,7 +171,7 @@ static __inline int blas_quickdivide(unsigned int x, unsigned int y){
 #define MMXSTORE       movd
 #endif
 
-#if defined(PILEDRIVER) || defined(BULLDOZER) || defined(STEAMROLLER)
+#if defined(PILEDRIVER) || defined(BULLDOZER) || defined(STEAMROLLER) || defined(EXCAVATOR)
 //Enable some optimazation for barcelona.
 #define BARCELONA_OPTIMIZATION
 #endif
index e0a6c4c..efb9024 100644 (file)
@@ -226,7 +226,7 @@ static __inline int blas_quickdivide(unsigned int x, unsigned int y){
 
 #ifdef ASSEMBLER
 
-#if defined(PILEDRIVER) || defined(BULLDOZER) || defined(STEAMROLLER)
+#if defined(PILEDRIVER) || defined(BULLDOZER) || defined(STEAMROLLER) || defined(EXCAVATOR)
 //Enable some optimazation for barcelona.
 #define BARCELONA_OPTIMIZATION
 #endif
diff --git a/cpuid.h b/cpuid.h
index ab6a3fb..8a8cdf6 100644 (file)
--- a/cpuid.h
+++ b/cpuid.h
 #define CORE_PILEDRIVER  23
 #define CORE_HASWELL     24
 #define CORE_STEAMROLLER 25
+#define CORE_EXCAVATOR   26
 
 #define HAVE_SSE      (1 <<  0)
 #define HAVE_SSE2     (1 <<  1)
@@ -203,5 +204,6 @@ typedef struct {
 #define CPUTYPE_PILEDRIVER              47
 #define CPUTYPE_HASWELL                48
 #define CPUTYPE_STEAMROLLER            49
+#define CPUTYPE_EXCAVATOR              50
 
 #endif
index aece9d8..4f97cfb 100644 (file)
@@ -1198,11 +1198,20 @@ int get_cpuname(void){
          else
            return CPUTYPE_BARCELONA; //OS don't support AVX.
        case 0:
-         if(support_avx())
-           return CPUTYPE_STEAMROLLER;
-         else
-           return CPUTYPE_BARCELONA; //OS don't support AVX.
-
+         switch(exmodel){
+         case 3:
+           if(support_avx())
+             return CPUTYPE_STEAMROLLER;
+           else
+             return CPUTYPE_BARCELONA; //OS don't support AVX.
+
+         case 6:
+           if(support_avx())
+             return CPUTYPE_EXCAVATOR;
+           else
+             return CPUTYPE_BARCELONA; //OS don't support AVX.
+         }
+         break;
        }
        break;
       case  5:
@@ -1332,6 +1341,7 @@ static char *cpuname[] = {
   "PILEDRIVER",
   "HASWELL",
   "STEAMROLLER",
+  "EXCAVATOR",
 };
 
 static char *lowercpuname[] = {
@@ -1384,6 +1394,7 @@ static char *lowercpuname[] = {
   "piledriver",
   "haswell",
   "steamroller",
+  "excavator",
 };
 
 static char *corename[] = {
@@ -1413,6 +1424,7 @@ static char *corename[] = {
   "PILEDRIVER",
   "HASWELL",
   "STEAMROLLER",
+  "EXCAVATOR",
 };
 
 static char *corename_lower[] = {
@@ -1442,6 +1454,7 @@ static char *corename_lower[] = {
   "piledriver",
   "haswell",
   "steamroller",
+  "excavator",
 };
 
 
@@ -1644,10 +1657,20 @@ int get_coretype(void){
            return CORE_BARCELONA; //OS don't support AVX.
        
        case 0:
-         if(support_avx())
-           return CORE_STEAMROLLER;
-         else
-           return CORE_BARCELONA; //OS don't support AVX.
+         switch(exmodel){
+         case 3:
+           if(support_avx())
+             return CORE_STEAMROLLER;
+           else
+             return CORE_BARCELONA; //OS don't support AVX.
+
+         case 6:
+           if(support_avx())
+             return CORE_EXCAVATOR;
+           else
+             return CORE_BARCELONA; //OS don't support AVX.
+         }
+         break;
        }
 
 
index 6945c17..ff80504 100644 (file)
@@ -67,6 +67,7 @@ extern gotoblas_t  gotoblas_SANDYBRIDGE;
 extern gotoblas_t  gotoblas_BULLDOZER;
 extern gotoblas_t  gotoblas_PILEDRIVER;
 extern gotoblas_t  gotoblas_STEAMROLLER;
+extern gotoblas_t  gotoblas_EXCAVATOR;
 #ifdef NO_AVX2
 #define gotoblas_HASWELL gotoblas_SANDYBRIDGE
 #else
@@ -79,6 +80,7 @@ extern gotoblas_t  gotoblas_HASWELL;
 #define gotoblas_BULLDOZER gotoblas_BARCELONA
 #define gotoblas_PILEDRIVER gotoblas_BARCELONA
 #define gotoblas_STEAMROLLER gotoblas_BARCELONA
+#define gotoblas_EXCAVATOR gotoblas_BARCELONA
 #endif
 
 
@@ -307,12 +309,22 @@ static gotoblas_t *get_coretype(void){
            return &gotoblas_BARCELONA; //OS doesn't support AVX. Use old kernels.
          }
        }else if(model == 0){
-         //AMD STEAMROLLER
-         if(support_avx())
-           return &gotoblas_STEAMROLLER;
-         else{
-           openblas_warning(FALLBACK_VERBOSE, BARCELONA_FALLBACK);
-           return &gotoblas_BARCELONA; //OS doesn't support AVX. Use old kernels.
+         if (exmodel == 3) {
+           //AMD STEAMROLLER
+           if(support_avx())
+             return &gotoblas_STEAMROLLER;
+           else{
+             openblas_warning(FALLBACK_VERBOSE, BARCELONA_FALLBACK);
+             return &gotoblas_BARCELONA; //OS doesn't support AVX. Use old kernels.
+           }
+         }else if (exmodel == 6) {
+           if(support_avx())
+             return &gotoblas_EXCAVATOR;
+           else{
+             openblas_warning(FALLBACK_VERBOSE, BARCELONA_FALLBACK);
+             return &gotoblas_BARCELONA; //OS doesn't support AVX. Use old kernels.
+           }
+
          }
        }
 
@@ -357,6 +369,7 @@ static char *corename[] = {
     "Piledriver",
     "Haswell",
     "Steamroller",
+    "Excavator",
 };
 
 char *gotoblas_corename(void) {
@@ -382,6 +395,7 @@ char *gotoblas_corename(void) {
   if (gotoblas == &gotoblas_PILEDRIVER)   return corename[19];
   if (gotoblas == &gotoblas_HASWELL)      return corename[20];
   if (gotoblas == &gotoblas_STEAMROLLER)  return corename[21];
+  if (gotoblas == &gotoblas_EXCAVATOR)    return corename[22];
 
   return corename[0];
 }
@@ -412,7 +426,7 @@ static gotoblas_t *force_coretype(char *coretype){
 
        switch (found)
        {
-
+               case 22: return (&gotoblas_EXCAVATOR);
                case 21: return (&gotoblas_STEAMROLLER);
                case 20: return (&gotoblas_HASWELL);
                case 19: return (&gotoblas_PILEDRIVER);
index ee5f55f..d6ecaeb 100644 (file)
--- a/getarch.c
+++ b/getarch.c
@@ -448,6 +448,22 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #define CORENAME  "STEAMROLLER"
 #endif
 
+#if defined (FORCE_EXCAVATOR)
+#define FORCE
+#define FORCE_INTEL
+#define ARCHITECTURE    "X86"
+#define SUBARCHITECTURE "EXCAVATOR"
+#define ARCHCONFIG   "-DEXCAVATOR " \
+                    "-DL1_DATA_SIZE=16384 -DL1_DATA_LINESIZE=64 " \
+                    "-DL2_SIZE=2097152 -DL2_LINESIZE=64  -DL3_SIZE=12582912 " \
+                    "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \
+                    "-DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 -DHAVE_SSE3 -DHAVE_SSE4_1 -DHAVE_SSE4_2 " \
+                    "-DHAVE_SSE4A -DHAVE_MISALIGNSSE -DHAVE_128BITFPU -DHAVE_FASTMOVU -DHAVE_CFLUSH " \
+                     "-DHAVE_AVX -DHAVE_FMA4 -DHAVE_FMA3"
+#define LIBNAME   "excavator"
+#define CORENAME  "EXCAVATOR"
+#endif
+
 
 #ifdef FORCE_SSE_GENERIC
 #define FORCE
diff --git a/kernel/x86_64/KERNEL.EXCAVATOR b/kernel/x86_64/KERNEL.EXCAVATOR
new file mode 100644 (file)
index 0000000..dbdd1fe
--- /dev/null
@@ -0,0 +1,92 @@
+SAXPYKERNEL = saxpy.c
+DAXPYKERNEL = daxpy.c
+CAXPYKERNEL = caxpy.c
+ZAXPYKERNEL = zaxpy.c
+
+SDOTKERNEL     = sdot.c
+DDOTKERNEL     = ddot.c
+CDOTKERNEL     = cdot.c
+ZDOTKERNEL     = zdot.c
+
+
+DSYMV_U_KERNEL = dsymv_U.c
+DSYMV_L_KERNEL = dsymv_L.c
+SSYMV_U_KERNEL = ssymv_U.c
+SSYMV_L_KERNEL = ssymv_L.c
+
+SGEMVNKERNEL = sgemv_n_4.c
+SGEMVTKERNEL = sgemv_t_4.c
+
+DGEMVNKERNEL = dgemv_n_4.c
+DGEMVTKERNEL = dgemv_t_4.c
+
+ZGEMVNKERNEL = zgemv_n_dup.S
+ZGEMVTKERNEL = zgemv_t_4.c
+
+DCOPYKERNEL  = dcopy_bulldozer.S
+
+
+SGEMMKERNEL    =  sgemm_kernel_16x2_piledriver.S
+SGEMMINCOPY    =  ../generic/gemm_ncopy_16.c
+SGEMMITCOPY    =  ../generic/gemm_tcopy_16.c
+SGEMMONCOPY    =  gemm_ncopy_2_bulldozer.S
+SGEMMOTCOPY    =  gemm_tcopy_2_bulldozer.S
+SGEMMINCOPYOBJ =  sgemm_incopy$(TSUFFIX).$(SUFFIX)
+SGEMMITCOPYOBJ =  sgemm_itcopy$(TSUFFIX).$(SUFFIX)
+SGEMMONCOPYOBJ =  sgemm_oncopy$(TSUFFIX).$(SUFFIX)
+SGEMMOTCOPYOBJ =  sgemm_otcopy$(TSUFFIX).$(SUFFIX)
+
+DGEMMKERNEL    =  dgemm_kernel_8x2_piledriver.S
+DGEMMINCOPY    =  ../generic/gemm_ncopy_8.c
+DGEMMITCOPY    =  ../generic/gemm_tcopy_8.c
+DGEMMONCOPY    =  gemm_ncopy_2_bulldozer.S
+DGEMMOTCOPY    =  gemm_tcopy_2_bulldozer.S
+DGEMMINCOPYOBJ =  dgemm_incopy$(TSUFFIX).$(SUFFIX)
+DGEMMITCOPYOBJ =  dgemm_itcopy$(TSUFFIX).$(SUFFIX)
+DGEMMONCOPYOBJ =  dgemm_oncopy$(TSUFFIX).$(SUFFIX)
+DGEMMOTCOPYOBJ =  dgemm_otcopy$(TSUFFIX).$(SUFFIX)
+
+CGEMMKERNEL    =  cgemm_kernel_4x2_piledriver.S
+CGEMMINCOPY    =  ../generic/zgemm_ncopy_4.c
+CGEMMITCOPY    =  ../generic/zgemm_tcopy_4.c
+CGEMMONCOPY    =  ../generic/zgemm_ncopy_2.c
+CGEMMOTCOPY    =  ../generic/zgemm_tcopy_2.c
+CGEMMINCOPYOBJ =  cgemm_incopy$(TSUFFIX).$(SUFFIX)
+CGEMMITCOPYOBJ =  cgemm_itcopy$(TSUFFIX).$(SUFFIX)
+CGEMMONCOPYOBJ =  cgemm_oncopy$(TSUFFIX).$(SUFFIX)
+CGEMMOTCOPYOBJ =  cgemm_otcopy$(TSUFFIX).$(SUFFIX)
+ZGEMMKERNEL    =  zgemm_kernel_2x2_piledriver.S
+ZGEMMINCOPY    =
+ZGEMMITCOPY    =
+ZGEMMONCOPY    =  ../generic/zgemm_ncopy_2.c
+ZGEMMOTCOPY    =  ../generic/zgemm_tcopy_2.c
+ZGEMMINCOPYOBJ =
+ZGEMMITCOPYOBJ =
+ZGEMMONCOPYOBJ =  zgemm_oncopy$(TSUFFIX).$(SUFFIX)
+ZGEMMOTCOPYOBJ =  zgemm_otcopy$(TSUFFIX).$(SUFFIX)
+
+CGEMM3MKERNEL    =  zgemm3m_kernel_8x4_barcelona.S
+ZGEMM3MKERNEL    =  zgemm3m_kernel_4x4_barcelona.S
+
+STRSMKERNEL_LN  =  ../generic/trsm_kernel_LN.c
+STRSMKERNEL_LT  =  ../generic/trsm_kernel_LT.c
+STRSMKERNEL_RN  =  ../generic/trsm_kernel_RN.c
+STRSMKERNEL_RT  =  ../generic/trsm_kernel_RT.c
+
+
+DTRSMKERNEL_LN  = ../generic/trsm_kernel_LN.c
+DTRSMKERNEL_LT  = dtrsm_kernel_LT_8x2_bulldozer.S
+DTRSMKERNEL_RN  = dtrsm_kernel_RN_8x2_bulldozer.S
+DTRSMKERNEL_RT  = ../generic/trsm_kernel_RT.c
+
+CTRSMKERNEL_LN  = ../generic/trsm_kernel_LN.c
+CTRSMKERNEL_LT  = ../generic/trsm_kernel_LT.c
+CTRSMKERNEL_RN  = ../generic/trsm_kernel_RN.c
+CTRSMKERNEL_RT  = ../generic/trsm_kernel_RT.c
+
+ZTRSMKERNEL_LN  = ../generic/trsm_kernel_LN.c
+ZTRSMKERNEL_LT  = ../generic/trsm_kernel_LT.c
+ZTRSMKERNEL_RN  = ../generic/trsm_kernel_RN.c
+ZTRSMKERNEL_RT  = ../generic/trsm_kernel_RT.c
+
+
diff --git a/param.h b/param.h
index 18c711e..245b678 100644 (file)
--- a/param.h
+++ b/param.h
@@ -499,6 +499,98 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #endif
 
 
+#ifdef EXCAVATOR
+#define SNUMOPT         8
+#define DNUMOPT         4
+
+#define GEMM_DEFAULT_OFFSET_A  64
+#define GEMM_DEFAULT_OFFSET_B 832
+#define GEMM_DEFAULT_ALIGN 0x0fffUL
+
+
+
+#define QGEMM_DEFAULT_UNROLL_N 2
+#define CGEMM_DEFAULT_UNROLL_N 2
+#define ZGEMM_DEFAULT_UNROLL_N 2
+#define XGEMM_DEFAULT_UNROLL_N 1
+
+#ifdef ARCH_X86
+#define SGEMM_DEFAULT_UNROLL_N 4
+#define DGEMM_DEFAULT_UNROLL_N 4
+#define SGEMM_DEFAULT_UNROLL_M 4
+#define DGEMM_DEFAULT_UNROLL_M 2
+#define QGEMM_DEFAULT_UNROLL_M 2
+#define CGEMM_DEFAULT_UNROLL_M 2
+#define ZGEMM_DEFAULT_UNROLL_M 1
+#define XGEMM_DEFAULT_UNROLL_M 1
+#else
+#define SGEMM_DEFAULT_UNROLL_N 2
+#define DGEMM_DEFAULT_UNROLL_N 2
+#define SGEMM_DEFAULT_UNROLL_M 16
+#define DGEMM_DEFAULT_UNROLL_M 8
+#define QGEMM_DEFAULT_UNROLL_M 2
+#define CGEMM_DEFAULT_UNROLL_M 4
+#define ZGEMM_DEFAULT_UNROLL_M 2
+#define XGEMM_DEFAULT_UNROLL_M 1
+#define CGEMM3M_DEFAULT_UNROLL_N 4
+#define CGEMM3M_DEFAULT_UNROLL_M 8
+#define ZGEMM3M_DEFAULT_UNROLL_N 4
+#define ZGEMM3M_DEFAULT_UNROLL_M 4
+#define GEMV_UNROLL 8
+#endif
+
+#if defined(ARCH_X86_64)
+#define SGEMM_DEFAULT_P 768
+#define DGEMM_DEFAULT_P 576
+#define ZGEMM_DEFAULT_P 288
+#define CGEMM_DEFAULT_P 576
+#else
+#define SGEMM_DEFAULT_P 448
+#define DGEMM_DEFAULT_P 480
+#define ZGEMM_DEFAULT_P 112
+#define CGEMM_DEFAULT_P 224
+#endif
+#define QGEMM_DEFAULT_P 112
+#define XGEMM_DEFAULT_P  56
+
+#if defined(ARCH_X86_64)
+#define SGEMM_DEFAULT_Q 192
+#define DGEMM_DEFAULT_Q 160
+#define ZGEMM_DEFAULT_Q 160
+#define CGEMM_DEFAULT_Q 160
+#else
+#define SGEMM_DEFAULT_Q 224
+#define DGEMM_DEFAULT_Q 224
+#define ZGEMM_DEFAULT_Q 224
+#define CGEMM_DEFAULT_Q 224
+#endif
+#define QGEMM_DEFAULT_Q 224
+#define XGEMM_DEFAULT_Q 224
+
+#define CGEMM3M_DEFAULT_P 448
+#define ZGEMM3M_DEFAULT_P 224
+#define XGEMM3M_DEFAULT_P 112
+#define CGEMM3M_DEFAULT_Q 224
+#define ZGEMM3M_DEFAULT_Q 224
+#define XGEMM3M_DEFAULT_Q 224
+#define CGEMM3M_DEFAULT_R 12288
+#define ZGEMM3M_DEFAULT_R 12288
+#define XGEMM3M_DEFAULT_R 12288
+
+#define SGEMM_DEFAULT_R 12288
+#define QGEMM_DEFAULT_R qgemm_r
+#define DGEMM_DEFAULT_R 12288
+#define CGEMM_DEFAULT_R cgemm_r
+#define ZGEMM_DEFAULT_R zgemm_r
+#define XGEMM_DEFAULT_R xgemm_r
+
+#define SYMV_P  16
+#define HAVE_EXCLUSIVE_CACHE
+
+#define GEMM_THREAD gemm_thread_mn
+
+#endif
+
 #ifdef ATHLON
 
 #define SNUMOPT                4