Support detecting ICT Loongson-3B CPU.
authorXianyi Zhang <traits.zhang@gmail.com>
Wed, 9 Nov 2011 19:28:22 +0000 (19:28 +0000)
committerXianyi Zhang <traits.zhang@gmail.com>
Wed, 9 Nov 2011 19:29:50 +0000 (19:29 +0000)
common_mips64.h
cpuid_mips.c
driver/others/parameter.c
getarch.c
kernel/mips64/KERNEL.LOONGSON3B [new file with mode: 0644]
param.h

index 35d8265..15f947e 100644 (file)
@@ -101,10 +101,13 @@ static void INLINE blas_lock(volatile unsigned long *address){
 
 static inline unsigned int rpcc(void){
   unsigned long ret;
-#if defined(LOONGSON3A)
+#if defined(LOONGSON3A) 
   unsigned long long tmp;
   __asm__ __volatile__("dmfc0 %0, $25, 1": "=r"(tmp):: "memory");
   ret=tmp;
+#elif defined(LOONGSON3B)
+  //Temp Implementation.
+  return 1;
 #else
   __asm__ __volatile__(".set   push    \n"                                     
           ".set   mips32r2\n"                                                  
@@ -234,6 +237,11 @@ REALNAME: ;\
 #define FIXED_PAGESIZE (16UL << 10)
 #endif
 
+#if defined(LOONGSON3B)
+#define PAGESIZE       (16UL << 10)
+#define FIXED_PAGESIZE (16UL << 10)
+#endif
+
 #ifndef PAGESIZE
 #define PAGESIZE       (64UL << 10)
 #endif
@@ -245,7 +253,7 @@ REALNAME: ;\
 #define MAP_ANONYMOUS MAP_ANON
 #endif
 
-#if defined(LOONGSON3A)
+#if defined(LOONGSON3A) || defined(LOONGSON3B)
 #define PREFETCHD_(x) ld $0, x
 #define PREFETCHD(x)  PREFETCHD_(x)  
 #else
index f50a4ec..217492d 100644 (file)
@@ -72,11 +72,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #define CPU_UNKNOWN     0
 #define CPU_SICORTEX    1
 #define CPU_LOONGSON3A  2
+#define CPU_LOONGSON3B  3
 
 static char *cpuname[] = {
   "UNKOWN",
   "SICORTEX",
-  "LOONGSON3A"
+  "LOONGSON3A",
+  "LOONGSON3B"
 };
 
 int detect(void){
@@ -101,6 +103,8 @@ int detect(void){
 
   if (strstr(p, "Loongson-3A")){
     return CPU_LOONGSON3A;
+  }else if(strstr(p, "Loongson-3B")){
+    return CPU_LOONGSON3B;
   }else if (strstr(p, "Loongson-3")){
     infile = fopen("/proc/cpuinfo", "r");
     while (fgets(buffer, sizeof(buffer), infile)){
@@ -130,6 +134,8 @@ void get_architecture(void){
 void get_subarchitecture(void){
   if(detect()==CPU_LOONGSON3A) {
     printf("LOONGSON3A");
+  }else if(detect()==CPU_LOONGSON3B){
+    printf("LOONGSON3B");
   }else{
     printf("SICORTEX");
   }
@@ -149,6 +155,15 @@ void get_cpuconfig(void){
     printf("#define DTB_DEFAULT_ENTRIES 64\n");
     printf("#define DTB_SIZE 4096\n");
     printf("#define L2_ASSOCIATIVE 4\n");
+  }else if(detect()==CPU_LOONGSON3B){
+    printf("#define LOONGSON3B\n");
+    printf("#define L1_DATA_SIZE 65536\n");
+    printf("#define L1_DATA_LINESIZE 32\n");
+    printf("#define L2_SIZE 512488\n");
+    printf("#define L2_LINESIZE 32\n");
+    printf("#define DTB_DEFAULT_ENTRIES 64\n");
+    printf("#define DTB_SIZE 4096\n");
+    printf("#define L2_ASSOCIATIVE 4\n");
   }else{
     printf("#define SICORTEX\n");
     printf("#define L1_DATA_SIZE 32768\n");
@@ -164,6 +179,8 @@ void get_cpuconfig(void){
 void get_libname(void){
   if(detect()==CPU_LOONGSON3A) {
     printf("loongson3a\n");
+  }else if(detect()==CPU_LOONGSON3B) {
+    printf("loongson3b\n");
   }else{
 #ifdef __mips64
   printf("mips64\n");
index fc7f044..3e66022 100644 (file)
@@ -683,7 +683,7 @@ void blas_set_parameter(void){
 
 #if defined(ARCH_MIPS64) 
 void blas_set_parameter(void){
-#if defined(LOONGSON3A)
+#if defined(LOONGSON3A) || defined(LOONGSON3B) 
 #ifdef SMP
   if(blas_num_threads == 1){
 #endif
index df052df..5b61447 100644 (file)
--- a/getarch.c
+++ b/getarch.c
@@ -117,6 +117,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 /* #define FORCE_CELL          */
 /* #define FORCE_SICORTEX      */
 /* #define FORCE_LOONGSON3A      */
+/* #define FORCE_LOONGSON3B      */
 /* #define FORCE_ITANIUM2      */
 /* #define FORCE_GENERIC       */
 /* #define FORCE_SPARC         */
@@ -548,6 +549,20 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #else
 #endif
 
+#ifdef FORCE_LOONGSON3B
+#define FORCE
+#define ARCHITECTURE    "MIPS"
+#define SUBARCHITECTURE "LOONGSON3B"
+#define SUBDIRNAME      "mips64"
+#define ARCHCONFIG   "-DLOONGSON3B " \
+       "-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=32 " \
+       "-DL2_SIZE=512488 -DL2_LINESIZE=32 " \
+       "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=4 "
+#define LIBNAME   "loongson3b"
+#define CORENAME  "LOONGSON3B"
+#else
+#endif
+
 #ifdef FORCE_ITANIUM2
 #define FORCE
 #define ARCHITECTURE    "IA64"
diff --git a/kernel/mips64/KERNEL.LOONGSON3B b/kernel/mips64/KERNEL.LOONGSON3B
new file mode 100644 (file)
index 0000000..fc247e4
--- /dev/null
@@ -0,0 +1,68 @@
+SAXPYKERNEL=axpy_loongson3a.S
+DAXPYKERNEL=daxpy_loongson3a_simd.S
+
+SGEMVNKERNEL = gemv_n_loongson3a.c
+SGEMVTKERNEL = gemv_t_loongson3a.c
+DGEMVNKERNEL = gemv_n_loongson3a.c
+DGEMVTKERNEL = gemv_t_loongson3a.c
+CGEMVNKERNEL = zgemv_n_loongson3a.c
+CGEMVTKERNEL = zgemv_t_loongson3a.c
+ZGEMVNKERNEL = zgemv_n_loongson3a.c
+ZGEMVTKERNEL = zgemv_t_loongson3a.c
+
+
+SGEMMKERNEL    =  sgemm_kernel_8x4_ps.S                
+SGEMMINCOPY    =  ../generic/gemm_ncopy_8.c
+SGEMMITCOPY    =  ../generic/gemm_tcopy_8.c
+SGEMMONCOPY    =  ../generic/gemm_ncopy_4.c
+SGEMMOTCOPY    =  ../generic/gemm_tcopy_4.c
+SGEMMINCOPYOBJ =  sgemm_incopy.o
+SGEMMITCOPYOBJ =  sgemm_itcopy.o
+SGEMMONCOPYOBJ =  sgemm_oncopy.o
+SGEMMOTCOPYOBJ =  sgemm_otcopy.o
+
+DGEMMKERNEL    =  dgemm_kernel_loongson3a_4x4.S
+DGEMMONCOPY    = ../generic/gemm_ncopy_4.c
+DGEMMOTCOPY    = ../generic/gemm_tcopy_4.c
+DGEMMONCOPYOBJ = dgemm_oncopy.o
+DGEMMOTCOPYOBJ = dgemm_otcopy.o
+
+CGEMMKERNEL    =  cgemm_kernel_loongson3a_4x2_ps.S
+CGEMMINCOPY    = ../generic/zgemm_ncopy_4.c
+CGEMMITCOPY    = ../generic/zgemm_tcopy_4.c
+CGEMMONCOPY    = ../generic/zgemm_ncopy_2.c
+CGEMMOTCOPY    = ../generic/zgemm_tcopy_2.c
+CGEMMINCOPYOBJ =  cgemm_incopy.o
+CGEMMITCOPYOBJ =  cgemm_itcopy.o
+CGEMMONCOPYOBJ =  cgemm_oncopy.o
+CGEMMOTCOPYOBJ =  cgemm_otcopy.o
+
+ZGEMMKERNEL    =  zgemm_kernel_loongson3a_2x2.S
+ZGEMMONCOPY    = ../generic/zgemm_ncopy_2.c
+ZGEMMOTCOPY    = ../generic/zgemm_tcopy_2.c
+ZGEMMONCOPYOBJ =  zgemm_oncopy.o
+ZGEMMOTCOPYOBJ =  zgemm_otcopy.o
+
+STRSMKERNEL_LN =  ../generic/trsm_kernel_LN.c
+STRSMKERNEL_LT =  ../generic/trsm_kernel_LT.c
+STRSMKERNEL_RN =  ../generic/trsm_kernel_RN.c
+STRSMKERNEL_RT =  ../generic/trsm_kernel_RT.c
+
+DTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
+DTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
+DTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
+DTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
+
+CTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
+CTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
+CTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
+CTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
+
+ZTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
+ZTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
+ZTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
+ZTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
+
+
+
+
diff --git a/param.h b/param.h
index 4ffe05c..39f0d99 100644 (file)
--- a/param.h
+++ b/param.h
@@ -1513,6 +1513,47 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #define SYMV_P 16
 #endif
 
+#ifdef LOONGSON3B
+#define SNUMOPT                2
+#define DNUMOPT                2
+
+#define GEMM_DEFAULT_OFFSET_A 0
+#define GEMM_DEFAULT_OFFSET_B 0
+#define GEMM_DEFAULT_ALIGN 0x03fffUL
+
+#define SGEMM_DEFAULT_UNROLL_M  8
+#define SGEMM_DEFAULT_UNROLL_N  4
+
+#define DGEMM_DEFAULT_UNROLL_M  4
+#define DGEMM_DEFAULT_UNROLL_N  4
+
+#define CGEMM_DEFAULT_UNROLL_M  4
+#define CGEMM_DEFAULT_UNROLL_N  2
+
+#define ZGEMM_DEFAULT_UNROLL_M  2
+#define ZGEMM_DEFAULT_UNROLL_N  2
+
+#define SGEMM_DEFAULT_P        64
+#define DGEMM_DEFAULT_P        44 
+#define CGEMM_DEFAULT_P 64
+#define ZGEMM_DEFAULT_P 32
+
+#define SGEMM_DEFAULT_Q 192
+#define DGEMM_DEFAULT_Q 92
+#define CGEMM_DEFAULT_Q 128
+#define ZGEMM_DEFAULT_Q 80
+
+#define SGEMM_DEFAULT_R 1024
+#define DGEMM_DEFAULT_R dgemm_r 
+#define CGEMM_DEFAULT_R 1024
+#define ZGEMM_DEFAULT_R 1024
+
+#define GEMM_OFFSET_A1 0x10000
+#define        GEMM_OFFSET_B1  0x100000
+
+#define SYMV_P 16
+#endif
+
 #ifdef GENERIC
 
 #define SNUMOPT                2