Enable thread affinity on Loongson 3B. Fixed the bug of reading cycle counter.
authorXianyi Zhang <traits.zhang@gmail.com>
Fri, 11 Nov 2011 17:49:41 +0000 (17:49 +0000)
committerXianyi Zhang <traits.zhang@gmail.com>
Fri, 11 Nov 2011 17:49:41 +0000 (17:49 +0000)
In Loongson 3A and 3B, the CPU core increases the counter in every 2 cycles by default.

Makefile.system
common_mips64.h

index 84f41a7..985f950 100644 (file)
@@ -591,9 +591,11 @@ endif
 
 ifneq ($(ARCH), x86_64)
 ifneq ($(ARCH), x86)
+ifneq ($(CORE), LOONGSON3B)
 NO_AFFINITY = 1
 endif
 endif
+endif
 
 ifdef NO_AFFINITY
 CCOMMON_OPT    += -DNO_AFFINITY
index 15f947e..5db96c4 100644 (file)
@@ -101,13 +101,15 @@ static void INLINE blas_lock(volatile unsigned long *address){
 
 static inline unsigned int rpcc(void){
   unsigned long ret;
-#if defined(LOONGSON3A) 
-  unsigned long long tmp;
-  __asm__ __volatile__("dmfc0 %0, $25, 1": "=r"(tmp):: "memory");
-  ret=tmp;
-#elif defined(LOONGSON3B)
-  //Temp Implementation.
-  return 1;
+#if defined(LOONGSON3A) || defined(LOONGSON3B)
+  //  unsigned long long tmp;
+  //__asm__ __volatile__("dmfc0 %0, $25, 1": "=r"(tmp):: "memory");
+  //ret=tmp;
+  __asm__ __volatile__(".set push \n"
+                       ".set mips32r2\n"
+                       "rdhwr %0, $2\n"
+                       ".set pop": "=r"(ret):: "memory");
+
 #else
   __asm__ __volatile__(".set   push    \n"                                     
           ".set   mips32r2\n"                                                  
@@ -117,6 +119,18 @@ static inline unsigned int rpcc(void){
   return ret;
 }
 
+//#if defined(LOONGSON3A) || defined(LOONGSON3B)
+static inline int WhereAmI(void){
+  int ret=0;
+  __asm__ __volatile__(".set push \n"
+                       ".set mips32r2\n"
+                       "rdhwr %0, $0\n"
+                       ".set pop": "=r"(ret):: "memory");
+  return ret;
+
+}
+//#endif
+
 static inline int blas_quickdivide(blasint x, blasint y){
   return x / y;
 }