From: Zhang Xianyi Date: Wed, 13 May 2015 21:16:30 +0000 (-0500) Subject: Add AMD Excavator target. X-Git-Tag: v0.2.15^2~52 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=51ff17d46e0584a414e4ef97600f348877410adc;p=platform%2Fupstream%2Fopenblas.git Add AMD Excavator target. --- diff --git a/Makefile.system b/Makefile.system index 65294b9..78eeb12 100644 --- a/Makefile.system +++ b/Makefile.system @@ -65,6 +65,9 @@ endif ifeq ($(TARGET), STEAMROLLER) GETARCH_FLAGS := -DFORCE_BARCELONA endif +ifeq ($(TARGET), EXCAVATOR) +GETARCH_FLAGS := -DFORCE_BARCELONA +endif endif @@ -92,6 +95,9 @@ endif ifeq ($(TARGET_CORE), STEAMROLLER) GETARCH_FLAGS := -DFORCE_BARCELONA endif +ifeq ($(TARGET_CORE), EXCAVATOR) +GETARCH_FLAGS := -DFORCE_BARCELONA +endif endif @@ -409,7 +415,7 @@ endif ifeq ($(ARCH), x86_64) DYNAMIC_CORE = PRESCOTT CORE2 PENRYN DUNNINGTON NEHALEM OPTERON OPTERON_SSE3 BARCELONA BOBCAT ATOM NANO ifneq ($(NO_AVX), 1) -DYNAMIC_CORE += SANDYBRIDGE BULLDOZER PILEDRIVER STEAMROLLER +DYNAMIC_CORE += SANDYBRIDGE BULLDOZER PILEDRIVER STEAMROLLER EXCAVATOR endif ifneq ($(NO_AVX2), 1) DYNAMIC_CORE += HASWELL diff --git a/TargetList.txt b/TargetList.txt index 1c98508..0a9d8b4 100644 --- a/TargetList.txt +++ b/TargetList.txt @@ -33,6 +33,7 @@ BOBCAT BULLDOZER PILEDRIVER STEAMROLLER +EXCAVATOR c)VIA CPU: SSE_GENERIC diff --git a/common_x86.h b/common_x86.h index 9d82090..99a723f 100644 --- a/common_x86.h +++ b/common_x86.h @@ -171,7 +171,7 @@ static __inline int blas_quickdivide(unsigned int x, unsigned int y){ #define MMXSTORE movd #endif -#if defined(PILEDRIVER) || defined(BULLDOZER) || defined(STEAMROLLER) +#if defined(PILEDRIVER) || defined(BULLDOZER) || defined(STEAMROLLER) || defined(EXCAVATOR) //Enable some optimazation for barcelona. #define BARCELONA_OPTIMIZATION #endif diff --git a/common_x86_64.h b/common_x86_64.h index e0a6c4c..efb9024 100644 --- a/common_x86_64.h +++ b/common_x86_64.h @@ -226,7 +226,7 @@ static __inline int blas_quickdivide(unsigned int x, unsigned int y){ #ifdef ASSEMBLER -#if defined(PILEDRIVER) || defined(BULLDOZER) || defined(STEAMROLLER) +#if defined(PILEDRIVER) || defined(BULLDOZER) || defined(STEAMROLLER) || defined(EXCAVATOR) //Enable some optimazation for barcelona. #define BARCELONA_OPTIMIZATION #endif diff --git a/cpuid.h b/cpuid.h index ab6a3fb..8a8cdf6 100644 --- a/cpuid.h +++ b/cpuid.h @@ -109,6 +109,7 @@ #define CORE_PILEDRIVER 23 #define CORE_HASWELL 24 #define CORE_STEAMROLLER 25 +#define CORE_EXCAVATOR 26 #define HAVE_SSE (1 << 0) #define HAVE_SSE2 (1 << 1) @@ -203,5 +204,6 @@ typedef struct { #define CPUTYPE_PILEDRIVER 47 #define CPUTYPE_HASWELL 48 #define CPUTYPE_STEAMROLLER 49 +#define CPUTYPE_EXCAVATOR 50 #endif diff --git a/cpuid_x86.c b/cpuid_x86.c index aece9d8..4f97cfb 100644 --- a/cpuid_x86.c +++ b/cpuid_x86.c @@ -1198,11 +1198,20 @@ int get_cpuname(void){ else return CPUTYPE_BARCELONA; //OS don't support AVX. case 0: - if(support_avx()) - return CPUTYPE_STEAMROLLER; - else - return CPUTYPE_BARCELONA; //OS don't support AVX. - + switch(exmodel){ + case 3: + if(support_avx()) + return CPUTYPE_STEAMROLLER; + else + return CPUTYPE_BARCELONA; //OS don't support AVX. + + case 6: + if(support_avx()) + return CPUTYPE_EXCAVATOR; + else + return CPUTYPE_BARCELONA; //OS don't support AVX. + } + break; } break; case 5: @@ -1332,6 +1341,7 @@ static char *cpuname[] = { "PILEDRIVER", "HASWELL", "STEAMROLLER", + "EXCAVATOR", }; static char *lowercpuname[] = { @@ -1384,6 +1394,7 @@ static char *lowercpuname[] = { "piledriver", "haswell", "steamroller", + "excavator", }; static char *corename[] = { @@ -1413,6 +1424,7 @@ static char *corename[] = { "PILEDRIVER", "HASWELL", "STEAMROLLER", + "EXCAVATOR", }; static char *corename_lower[] = { @@ -1442,6 +1454,7 @@ static char *corename_lower[] = { "piledriver", "haswell", "steamroller", + "excavator", }; @@ -1644,10 +1657,20 @@ int get_coretype(void){ return CORE_BARCELONA; //OS don't support AVX. case 0: - if(support_avx()) - return CORE_STEAMROLLER; - else - return CORE_BARCELONA; //OS don't support AVX. + switch(exmodel){ + case 3: + if(support_avx()) + return CORE_STEAMROLLER; + else + return CORE_BARCELONA; //OS don't support AVX. + + case 6: + if(support_avx()) + return CORE_EXCAVATOR; + else + return CORE_BARCELONA; //OS don't support AVX. + } + break; } diff --git a/driver/others/dynamic.c b/driver/others/dynamic.c index 6945c17..ff80504 100644 --- a/driver/others/dynamic.c +++ b/driver/others/dynamic.c @@ -67,6 +67,7 @@ extern gotoblas_t gotoblas_SANDYBRIDGE; extern gotoblas_t gotoblas_BULLDOZER; extern gotoblas_t gotoblas_PILEDRIVER; extern gotoblas_t gotoblas_STEAMROLLER; +extern gotoblas_t gotoblas_EXCAVATOR; #ifdef NO_AVX2 #define gotoblas_HASWELL gotoblas_SANDYBRIDGE #else @@ -79,6 +80,7 @@ extern gotoblas_t gotoblas_HASWELL; #define gotoblas_BULLDOZER gotoblas_BARCELONA #define gotoblas_PILEDRIVER gotoblas_BARCELONA #define gotoblas_STEAMROLLER gotoblas_BARCELONA +#define gotoblas_EXCAVATOR gotoblas_BARCELONA #endif @@ -307,12 +309,22 @@ static gotoblas_t *get_coretype(void){ return &gotoblas_BARCELONA; //OS doesn't support AVX. Use old kernels. } }else if(model == 0){ - //AMD STEAMROLLER - if(support_avx()) - return &gotoblas_STEAMROLLER; - else{ - openblas_warning(FALLBACK_VERBOSE, BARCELONA_FALLBACK); - return &gotoblas_BARCELONA; //OS doesn't support AVX. Use old kernels. + if (exmodel == 3) { + //AMD STEAMROLLER + if(support_avx()) + return &gotoblas_STEAMROLLER; + else{ + openblas_warning(FALLBACK_VERBOSE, BARCELONA_FALLBACK); + return &gotoblas_BARCELONA; //OS doesn't support AVX. Use old kernels. + } + }else if (exmodel == 6) { + if(support_avx()) + return &gotoblas_EXCAVATOR; + else{ + openblas_warning(FALLBACK_VERBOSE, BARCELONA_FALLBACK); + return &gotoblas_BARCELONA; //OS doesn't support AVX. Use old kernels. + } + } } @@ -357,6 +369,7 @@ static char *corename[] = { "Piledriver", "Haswell", "Steamroller", + "Excavator", }; char *gotoblas_corename(void) { @@ -382,6 +395,7 @@ char *gotoblas_corename(void) { if (gotoblas == &gotoblas_PILEDRIVER) return corename[19]; if (gotoblas == &gotoblas_HASWELL) return corename[20]; if (gotoblas == &gotoblas_STEAMROLLER) return corename[21]; + if (gotoblas == &gotoblas_EXCAVATOR) return corename[22]; return corename[0]; } @@ -412,7 +426,7 @@ static gotoblas_t *force_coretype(char *coretype){ switch (found) { - + case 22: return (&gotoblas_EXCAVATOR); case 21: return (&gotoblas_STEAMROLLER); case 20: return (&gotoblas_HASWELL); case 19: return (&gotoblas_PILEDRIVER); diff --git a/getarch.c b/getarch.c index ee5f55f..d6ecaeb 100644 --- a/getarch.c +++ b/getarch.c @@ -448,6 +448,22 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define CORENAME "STEAMROLLER" #endif +#if defined (FORCE_EXCAVATOR) +#define FORCE +#define FORCE_INTEL +#define ARCHITECTURE "X86" +#define SUBARCHITECTURE "EXCAVATOR" +#define ARCHCONFIG "-DEXCAVATOR " \ + "-DL1_DATA_SIZE=16384 -DL1_DATA_LINESIZE=64 " \ + "-DL2_SIZE=2097152 -DL2_LINESIZE=64 -DL3_SIZE=12582912 " \ + "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \ + "-DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 -DHAVE_SSE3 -DHAVE_SSE4_1 -DHAVE_SSE4_2 " \ + "-DHAVE_SSE4A -DHAVE_MISALIGNSSE -DHAVE_128BITFPU -DHAVE_FASTMOVU -DHAVE_CFLUSH " \ + "-DHAVE_AVX -DHAVE_FMA4 -DHAVE_FMA3" +#define LIBNAME "excavator" +#define CORENAME "EXCAVATOR" +#endif + #ifdef FORCE_SSE_GENERIC #define FORCE diff --git a/kernel/x86_64/KERNEL.EXCAVATOR b/kernel/x86_64/KERNEL.EXCAVATOR new file mode 100644 index 0000000..dbdd1fe --- /dev/null +++ b/kernel/x86_64/KERNEL.EXCAVATOR @@ -0,0 +1,92 @@ +SAXPYKERNEL = saxpy.c +DAXPYKERNEL = daxpy.c +CAXPYKERNEL = caxpy.c +ZAXPYKERNEL = zaxpy.c + +SDOTKERNEL = sdot.c +DDOTKERNEL = ddot.c +CDOTKERNEL = cdot.c +ZDOTKERNEL = zdot.c + + +DSYMV_U_KERNEL = dsymv_U.c +DSYMV_L_KERNEL = dsymv_L.c +SSYMV_U_KERNEL = ssymv_U.c +SSYMV_L_KERNEL = ssymv_L.c + +SGEMVNKERNEL = sgemv_n_4.c +SGEMVTKERNEL = sgemv_t_4.c + +DGEMVNKERNEL = dgemv_n_4.c +DGEMVTKERNEL = dgemv_t_4.c + +ZGEMVNKERNEL = zgemv_n_dup.S +ZGEMVTKERNEL = zgemv_t_4.c + +DCOPYKERNEL = dcopy_bulldozer.S + + +SGEMMKERNEL = sgemm_kernel_16x2_piledriver.S +SGEMMINCOPY = ../generic/gemm_ncopy_16.c +SGEMMITCOPY = ../generic/gemm_tcopy_16.c +SGEMMONCOPY = gemm_ncopy_2_bulldozer.S +SGEMMOTCOPY = gemm_tcopy_2_bulldozer.S +SGEMMINCOPYOBJ = sgemm_incopy$(TSUFFIX).$(SUFFIX) +SGEMMITCOPYOBJ = sgemm_itcopy$(TSUFFIX).$(SUFFIX) +SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX) +SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX) + +DGEMMKERNEL = dgemm_kernel_8x2_piledriver.S +DGEMMINCOPY = ../generic/gemm_ncopy_8.c +DGEMMITCOPY = ../generic/gemm_tcopy_8.c +DGEMMONCOPY = gemm_ncopy_2_bulldozer.S +DGEMMOTCOPY = gemm_tcopy_2_bulldozer.S +DGEMMINCOPYOBJ = dgemm_incopy$(TSUFFIX).$(SUFFIX) +DGEMMITCOPYOBJ = dgemm_itcopy$(TSUFFIX).$(SUFFIX) +DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX) +DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX) + +CGEMMKERNEL = cgemm_kernel_4x2_piledriver.S +CGEMMINCOPY = ../generic/zgemm_ncopy_4.c +CGEMMITCOPY = ../generic/zgemm_tcopy_4.c +CGEMMONCOPY = ../generic/zgemm_ncopy_2.c +CGEMMOTCOPY = ../generic/zgemm_tcopy_2.c +CGEMMINCOPYOBJ = cgemm_incopy$(TSUFFIX).$(SUFFIX) +CGEMMITCOPYOBJ = cgemm_itcopy$(TSUFFIX).$(SUFFIX) +CGEMMONCOPYOBJ = cgemm_oncopy$(TSUFFIX).$(SUFFIX) +CGEMMOTCOPYOBJ = cgemm_otcopy$(TSUFFIX).$(SUFFIX) +ZGEMMKERNEL = zgemm_kernel_2x2_piledriver.S +ZGEMMINCOPY = +ZGEMMITCOPY = +ZGEMMONCOPY = ../generic/zgemm_ncopy_2.c +ZGEMMOTCOPY = ../generic/zgemm_tcopy_2.c +ZGEMMINCOPYOBJ = +ZGEMMITCOPYOBJ = +ZGEMMONCOPYOBJ = zgemm_oncopy$(TSUFFIX).$(SUFFIX) +ZGEMMOTCOPYOBJ = zgemm_otcopy$(TSUFFIX).$(SUFFIX) + +CGEMM3MKERNEL = zgemm3m_kernel_8x4_barcelona.S +ZGEMM3MKERNEL = zgemm3m_kernel_4x4_barcelona.S + +STRSMKERNEL_LN = ../generic/trsm_kernel_LN.c +STRSMKERNEL_LT = ../generic/trsm_kernel_LT.c +STRSMKERNEL_RN = ../generic/trsm_kernel_RN.c +STRSMKERNEL_RT = ../generic/trsm_kernel_RT.c + + +DTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c +DTRSMKERNEL_LT = dtrsm_kernel_LT_8x2_bulldozer.S +DTRSMKERNEL_RN = dtrsm_kernel_RN_8x2_bulldozer.S +DTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c + +CTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c +CTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c +CTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c +CTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c + +ZTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c +ZTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c +ZTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c +ZTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c + + diff --git a/param.h b/param.h index 18c711e..245b678 100644 --- a/param.h +++ b/param.h @@ -499,6 +499,98 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #endif +#ifdef EXCAVATOR +#define SNUMOPT 8 +#define DNUMOPT 4 + +#define GEMM_DEFAULT_OFFSET_A 64 +#define GEMM_DEFAULT_OFFSET_B 832 +#define GEMM_DEFAULT_ALIGN 0x0fffUL + + + +#define QGEMM_DEFAULT_UNROLL_N 2 +#define CGEMM_DEFAULT_UNROLL_N 2 +#define ZGEMM_DEFAULT_UNROLL_N 2 +#define XGEMM_DEFAULT_UNROLL_N 1 + +#ifdef ARCH_X86 +#define SGEMM_DEFAULT_UNROLL_N 4 +#define DGEMM_DEFAULT_UNROLL_N 4 +#define SGEMM_DEFAULT_UNROLL_M 4 +#define DGEMM_DEFAULT_UNROLL_M 2 +#define QGEMM_DEFAULT_UNROLL_M 2 +#define CGEMM_DEFAULT_UNROLL_M 2 +#define ZGEMM_DEFAULT_UNROLL_M 1 +#define XGEMM_DEFAULT_UNROLL_M 1 +#else +#define SGEMM_DEFAULT_UNROLL_N 2 +#define DGEMM_DEFAULT_UNROLL_N 2 +#define SGEMM_DEFAULT_UNROLL_M 16 +#define DGEMM_DEFAULT_UNROLL_M 8 +#define QGEMM_DEFAULT_UNROLL_M 2 +#define CGEMM_DEFAULT_UNROLL_M 4 +#define ZGEMM_DEFAULT_UNROLL_M 2 +#define XGEMM_DEFAULT_UNROLL_M 1 +#define CGEMM3M_DEFAULT_UNROLL_N 4 +#define CGEMM3M_DEFAULT_UNROLL_M 8 +#define ZGEMM3M_DEFAULT_UNROLL_N 4 +#define ZGEMM3M_DEFAULT_UNROLL_M 4 +#define GEMV_UNROLL 8 +#endif + +#if defined(ARCH_X86_64) +#define SGEMM_DEFAULT_P 768 +#define DGEMM_DEFAULT_P 576 +#define ZGEMM_DEFAULT_P 288 +#define CGEMM_DEFAULT_P 576 +#else +#define SGEMM_DEFAULT_P 448 +#define DGEMM_DEFAULT_P 480 +#define ZGEMM_DEFAULT_P 112 +#define CGEMM_DEFAULT_P 224 +#endif +#define QGEMM_DEFAULT_P 112 +#define XGEMM_DEFAULT_P 56 + +#if defined(ARCH_X86_64) +#define SGEMM_DEFAULT_Q 192 +#define DGEMM_DEFAULT_Q 160 +#define ZGEMM_DEFAULT_Q 160 +#define CGEMM_DEFAULT_Q 160 +#else +#define SGEMM_DEFAULT_Q 224 +#define DGEMM_DEFAULT_Q 224 +#define ZGEMM_DEFAULT_Q 224 +#define CGEMM_DEFAULT_Q 224 +#endif +#define QGEMM_DEFAULT_Q 224 +#define XGEMM_DEFAULT_Q 224 + +#define CGEMM3M_DEFAULT_P 448 +#define ZGEMM3M_DEFAULT_P 224 +#define XGEMM3M_DEFAULT_P 112 +#define CGEMM3M_DEFAULT_Q 224 +#define ZGEMM3M_DEFAULT_Q 224 +#define XGEMM3M_DEFAULT_Q 224 +#define CGEMM3M_DEFAULT_R 12288 +#define ZGEMM3M_DEFAULT_R 12288 +#define XGEMM3M_DEFAULT_R 12288 + +#define SGEMM_DEFAULT_R 12288 +#define QGEMM_DEFAULT_R qgemm_r +#define DGEMM_DEFAULT_R 12288 +#define CGEMM_DEFAULT_R cgemm_r +#define ZGEMM_DEFAULT_R zgemm_r +#define XGEMM_DEFAULT_R xgemm_r + +#define SYMV_P 16 +#define HAVE_EXCLUSIVE_CACHE + +#define GEMM_THREAD gemm_thread_mn + +#endif + #ifdef ATHLON #define SNUMOPT 4