static inline unsigned int rpcc(void){
unsigned long ret;
-#if defined(LOONGSON3A)
+#if defined(LOONGSON3A)
unsigned long long tmp;
__asm__ __volatile__("dmfc0 %0, $25, 1": "=r"(tmp):: "memory");
ret=tmp;
+#elif defined(LOONGSON3B)
+ //Temp Implementation.
+ return 1;
#else
__asm__ __volatile__(".set push \n"
".set mips32r2\n"
#define FIXED_PAGESIZE (16UL << 10)
#endif
+#if defined(LOONGSON3B)
+#define PAGESIZE (16UL << 10)
+#define FIXED_PAGESIZE (16UL << 10)
+#endif
+
#ifndef PAGESIZE
#define PAGESIZE (64UL << 10)
#endif
#define MAP_ANONYMOUS MAP_ANON
#endif
-#if defined(LOONGSON3A)
+#if defined(LOONGSON3A) || defined(LOONGSON3B)
#define PREFETCHD_(x) ld $0, x
#define PREFETCHD(x) PREFETCHD_(x)
#else
#define CPU_UNKNOWN 0
#define CPU_SICORTEX 1
#define CPU_LOONGSON3A 2
+#define CPU_LOONGSON3B 3
static char *cpuname[] = {
"UNKOWN",
"SICORTEX",
- "LOONGSON3A"
+ "LOONGSON3A",
+ "LOONGSON3B"
};
int detect(void){
if (strstr(p, "Loongson-3A")){
return CPU_LOONGSON3A;
+ }else if(strstr(p, "Loongson-3B")){
+ return CPU_LOONGSON3B;
}else if (strstr(p, "Loongson-3")){
infile = fopen("/proc/cpuinfo", "r");
while (fgets(buffer, sizeof(buffer), infile)){
void get_subarchitecture(void){
if(detect()==CPU_LOONGSON3A) {
printf("LOONGSON3A");
+ }else if(detect()==CPU_LOONGSON3B){
+ printf("LOONGSON3B");
}else{
printf("SICORTEX");
}
printf("#define DTB_DEFAULT_ENTRIES 64\n");
printf("#define DTB_SIZE 4096\n");
printf("#define L2_ASSOCIATIVE 4\n");
+ }else if(detect()==CPU_LOONGSON3B){
+ printf("#define LOONGSON3B\n");
+ printf("#define L1_DATA_SIZE 65536\n");
+ printf("#define L1_DATA_LINESIZE 32\n");
+ printf("#define L2_SIZE 512488\n");
+ printf("#define L2_LINESIZE 32\n");
+ printf("#define DTB_DEFAULT_ENTRIES 64\n");
+ printf("#define DTB_SIZE 4096\n");
+ printf("#define L2_ASSOCIATIVE 4\n");
}else{
printf("#define SICORTEX\n");
printf("#define L1_DATA_SIZE 32768\n");
void get_libname(void){
if(detect()==CPU_LOONGSON3A) {
printf("loongson3a\n");
+ }else if(detect()==CPU_LOONGSON3B) {
+ printf("loongson3b\n");
}else{
#ifdef __mips64
printf("mips64\n");
#if defined(ARCH_MIPS64)
void blas_set_parameter(void){
-#if defined(LOONGSON3A)
+#if defined(LOONGSON3A) || defined(LOONGSON3B)
#ifdef SMP
if(blas_num_threads == 1){
#endif
/* #define FORCE_CELL */
/* #define FORCE_SICORTEX */
/* #define FORCE_LOONGSON3A */
+/* #define FORCE_LOONGSON3B */
/* #define FORCE_ITANIUM2 */
/* #define FORCE_GENERIC */
/* #define FORCE_SPARC */
#else
#endif
+#ifdef FORCE_LOONGSON3B
+#define FORCE
+#define ARCHITECTURE "MIPS"
+#define SUBARCHITECTURE "LOONGSON3B"
+#define SUBDIRNAME "mips64"
+#define ARCHCONFIG "-DLOONGSON3B " \
+ "-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=32 " \
+ "-DL2_SIZE=512488 -DL2_LINESIZE=32 " \
+ "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=4 "
+#define LIBNAME "loongson3b"
+#define CORENAME "LOONGSON3B"
+#else
+#endif
+
#ifdef FORCE_ITANIUM2
#define FORCE
#define ARCHITECTURE "IA64"
--- /dev/null
+SAXPYKERNEL=axpy_loongson3a.S
+DAXPYKERNEL=daxpy_loongson3a_simd.S
+
+SGEMVNKERNEL = gemv_n_loongson3a.c
+SGEMVTKERNEL = gemv_t_loongson3a.c
+DGEMVNKERNEL = gemv_n_loongson3a.c
+DGEMVTKERNEL = gemv_t_loongson3a.c
+CGEMVNKERNEL = zgemv_n_loongson3a.c
+CGEMVTKERNEL = zgemv_t_loongson3a.c
+ZGEMVNKERNEL = zgemv_n_loongson3a.c
+ZGEMVTKERNEL = zgemv_t_loongson3a.c
+
+
+SGEMMKERNEL = sgemm_kernel_8x4_ps.S
+SGEMMINCOPY = ../generic/gemm_ncopy_8.c
+SGEMMITCOPY = ../generic/gemm_tcopy_8.c
+SGEMMONCOPY = ../generic/gemm_ncopy_4.c
+SGEMMOTCOPY = ../generic/gemm_tcopy_4.c
+SGEMMINCOPYOBJ = sgemm_incopy.o
+SGEMMITCOPYOBJ = sgemm_itcopy.o
+SGEMMONCOPYOBJ = sgemm_oncopy.o
+SGEMMOTCOPYOBJ = sgemm_otcopy.o
+
+DGEMMKERNEL = dgemm_kernel_loongson3a_4x4.S
+DGEMMONCOPY = ../generic/gemm_ncopy_4.c
+DGEMMOTCOPY = ../generic/gemm_tcopy_4.c
+DGEMMONCOPYOBJ = dgemm_oncopy.o
+DGEMMOTCOPYOBJ = dgemm_otcopy.o
+
+CGEMMKERNEL = cgemm_kernel_loongson3a_4x2_ps.S
+CGEMMINCOPY = ../generic/zgemm_ncopy_4.c
+CGEMMITCOPY = ../generic/zgemm_tcopy_4.c
+CGEMMONCOPY = ../generic/zgemm_ncopy_2.c
+CGEMMOTCOPY = ../generic/zgemm_tcopy_2.c
+CGEMMINCOPYOBJ = cgemm_incopy.o
+CGEMMITCOPYOBJ = cgemm_itcopy.o
+CGEMMONCOPYOBJ = cgemm_oncopy.o
+CGEMMOTCOPYOBJ = cgemm_otcopy.o
+
+ZGEMMKERNEL = zgemm_kernel_loongson3a_2x2.S
+ZGEMMONCOPY = ../generic/zgemm_ncopy_2.c
+ZGEMMOTCOPY = ../generic/zgemm_tcopy_2.c
+ZGEMMONCOPYOBJ = zgemm_oncopy.o
+ZGEMMOTCOPYOBJ = zgemm_otcopy.o
+
+STRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
+STRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
+STRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
+STRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
+
+DTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
+DTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
+DTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
+DTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
+
+CTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
+CTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
+CTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
+CTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
+
+ZTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
+ZTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
+ZTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
+ZTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
+
+
+
+
#define SYMV_P 16
#endif
+#ifdef LOONGSON3B
+#define SNUMOPT 2
+#define DNUMOPT 2
+
+#define GEMM_DEFAULT_OFFSET_A 0
+#define GEMM_DEFAULT_OFFSET_B 0
+#define GEMM_DEFAULT_ALIGN 0x03fffUL
+
+#define SGEMM_DEFAULT_UNROLL_M 8
+#define SGEMM_DEFAULT_UNROLL_N 4
+
+#define DGEMM_DEFAULT_UNROLL_M 4
+#define DGEMM_DEFAULT_UNROLL_N 4
+
+#define CGEMM_DEFAULT_UNROLL_M 4
+#define CGEMM_DEFAULT_UNROLL_N 2
+
+#define ZGEMM_DEFAULT_UNROLL_M 2
+#define ZGEMM_DEFAULT_UNROLL_N 2
+
+#define SGEMM_DEFAULT_P 64
+#define DGEMM_DEFAULT_P 44
+#define CGEMM_DEFAULT_P 64
+#define ZGEMM_DEFAULT_P 32
+
+#define SGEMM_DEFAULT_Q 192
+#define DGEMM_DEFAULT_Q 92
+#define CGEMM_DEFAULT_Q 128
+#define ZGEMM_DEFAULT_Q 80
+
+#define SGEMM_DEFAULT_R 1024
+#define DGEMM_DEFAULT_R dgemm_r
+#define CGEMM_DEFAULT_R 1024
+#define ZGEMM_DEFAULT_R 1024
+
+#define GEMM_OFFSET_A1 0x10000
+#define GEMM_OFFSET_B1 0x100000
+
+#define SYMV_P 16
+#endif
+
#ifdef GENERIC
#define SNUMOPT 2