LoongArch64: Add DYNAMIC_ARCH support
authorgxw <guxiwei-hf@loongson.cn>
Thu, 28 Jul 2022 05:47:20 +0000 (13:47 +0800)
committergxw <guxiwei-hf@loongson.cn>
Thu, 28 Jul 2022 06:28:45 +0000 (14:28 +0800)
Makefile.system
driver/others/Makefile
driver/others/dynamic_loongarch64.c [new file with mode: 0644]
kernel/loongarch64/KERNEL
kernel/loongarch64/KERNEL.LOONGSON3R5
kernel/loongarch64/KERNEL.generic
kernel/setparam-ref.c
param.h

index 5919be8..1b90dce 100644 (file)
@@ -679,6 +679,10 @@ ifeq ($(ARCH), mips64)
 DYNAMIC_CORE = LOONGSON3R3 LOONGSON3R4
 endif
 
+ifeq ($(ARCH), loongarch64)
+DYNAMIC_CORE = LOONGSON3R5 LOONGSON2K1000 LOONGSONGENERIC
+endif
+
 ifeq ($(ARCH), zarch)
 DYNAMIC_CORE = ZARCH_GENERIC
 
index 4a421ef..e4e9ee1 100644 (file)
@@ -27,11 +27,15 @@ else
 ifeq ($(ARCH),mips64)
 COMMONOBJS += dynamic_mips64.$(SUFFIX)
 else
+ifeq ($(ARCH),loongarch64)
+COMMONOBJS += dynamic_loongarch64.$(SUFFIX)
+else
 COMMONOBJS     +=  dynamic.$(SUFFIX)
 endif
 endif
 endif
 endif
+endif
 else
 COMMONOBJS     +=  parameter.$(SUFFIX)
 endif
@@ -99,11 +103,15 @@ else
 ifeq ($(ARCH),mips64)
 HPLOBJS = memory.$(SUFFIX) xerbla.$(SUFFIX) dynamic_mips64.$(SUFFIX)
 else
+ifeq ($(ARCH),loongarch64)
+HPLOBJS = memory.$(SUFFIX) xerbla.$(SUFFIX) dynamic_loongarch64.$(SUFFIX)
+else
 HPLOBJS = memory.$(SUFFIX) xerbla.$(SUFFIX) dynamic.$(SUFFIX)
 endif
 endif
 endif
 endif
+endif
 else
 HPLOBJS = memory.$(SUFFIX) xerbla.$(SUFFIX) parameter.$(SUFFIX)
 endif
diff --git a/driver/others/dynamic_loongarch64.c b/driver/others/dynamic_loongarch64.c
new file mode 100644 (file)
index 0000000..52f8bcb
--- /dev/null
@@ -0,0 +1,128 @@
+/*******************************************************************************
+Copyright (c) 2022, The OpenBLAS Project
+All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in
+the documentation and/or other materials provided with the
+distribution.
+3. Neither the name of the OpenBLAS project nor the names of
+its contributors may be used to endorse or promote products
+derived from this software without specific prior written permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
+USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*******************************************************************************/
+
+#include "common.h"
+
+extern gotoblas_t  gotoblas_LOONGSON3R5;
+extern gotoblas_t  gotoblas_LOONGSON2K1000;
+extern gotoblas_t  gotoblas_LOONGSONGENERIC;
+
+extern void openblas_warning(int verbose, const char * msg);
+
+#define NUM_CORETYPES    3
+
+static char *corename[] = {
+  "loongson3r5",
+  "loongson2k1000",
+  "loongsongeneric",
+  "unknown"
+};
+
+char *gotoblas_corename(void) {
+  if (gotoblas == &gotoblas_LOONGSON3R5)     return corename[0];
+  if (gotoblas == &gotoblas_LOONGSON2K1000)  return corename[1];
+  if (gotoblas == &gotoblas_LOONGSONGENERIC) return corename[2];
+  return corename[NUM_CORETYPES];
+}
+
+static gotoblas_t *force_coretype(char *coretype) {
+  int i;
+  int found = -1;
+  char message[128];
+
+  for ( i=0 ; i < NUM_CORETYPES; i++)
+  {
+    if (!strncasecmp(coretype, corename[i], 20))
+    {
+      found = i;
+      break;
+    }
+  }
+
+  switch (found)
+  {
+    case  0: return (&gotoblas_LOONGSON3R5);
+    case  1: return (&gotoblas_LOONGSON2K1000);
+    case  2: return (&gotoblas_LOONGSONGENERIC);
+  }
+  snprintf(message, 128, "Core not found: %s\n", coretype);
+  openblas_warning(1, message);
+  return NULL;
+}
+
+#define LASX_MASK       1<<7
+#define LSX_MASK        1<<6
+#define LOONGARCH_CFG2  0x02
+
+static gotoblas_t *get_coretype(void) {
+  int ret = 0;
+  __asm__ volatile (
+    "cpucfg %0, %1 \n\t"
+    : "+&r"(ret)
+    : "r"(LOONGARCH_CFG2)
+  );
+
+  if (ret & LASX_MASK)
+    return &gotoblas_LOONGSON3R5;
+  else if (ret & LSX_MASK)
+    return &gotoblas_LOONGSON2K1000;
+  else
+    return &gotoblas_LOONGSONGENERIC;
+}
+
+void gotoblas_dynamic_init(void) {
+  char coremsg[128];
+  char coren[22];
+  char *p;
+
+  if (gotoblas) return;
+
+  p = getenv("OPENBLAS_CORETYPE");
+  if ( p )
+  {
+    gotoblas = force_coretype(p);
+  }
+  else
+  {
+    gotoblas = get_coretype();
+  }
+
+  if (gotoblas && gotoblas->init) {
+    strncpy(coren, gotoblas_corename(), 20);
+    sprintf(coremsg, "Core: %s\n", coren);
+    openblas_warning(2, coremsg);
+    gotoblas -> init();
+  } else {
+    openblas_warning(0, "OpenBLAS : Architecture Initialization failed. No initialization function found.\n");
+    exit(1);
+  }
+
+}
+
+void gotoblas_dynamic_quit(void) {
+  gotoblas = NULL;
+}
index 1c11df9..e5d145a 100644 (file)
@@ -108,10 +108,10 @@ SGEMMINCOPY    = ../generic/gemm_ncopy_2.c
 SGEMMITCOPY    = ../generic/gemm_tcopy_2.c
 SGEMMONCOPY    = ../generic/gemm_ncopy_8.c
 SGEMMOTCOPY    = ../generic/gemm_tcopy_8.c
-SGEMMINCOPYOBJ =  sgemm_incopy.o
-SGEMMITCOPYOBJ =  sgemm_itcopy.o
-SGEMMONCOPYOBJ =  sgemm_oncopy.o
-SGEMMOTCOPYOBJ =  sgemm_otcopy.o
+SGEMMINCOPYOBJ =  sgemm_incopy$(TSUFFIX).$(SUFFIX)
+SGEMMITCOPYOBJ =  sgemm_itcopy$(TSUFFIX).$(SUFFIX)
+SGEMMONCOPYOBJ =  sgemm_oncopy$(TSUFFIX).$(SUFFIX)
+SGEMMOTCOPYOBJ =  sgemm_otcopy$(TSUFFIX).$(SUFFIX)
 endif
 
 ifndef DGEMMKERNEL
@@ -120,10 +120,10 @@ DGEMMINCOPY    = ../generic/gemm_ncopy_2.c
 DGEMMITCOPY    = ../generic/gemm_tcopy_2.c
 DGEMMONCOPY    = ../generic/gemm_ncopy_8.c
 DGEMMOTCOPY    = ../generic/gemm_tcopy_8.c
-DGEMMINCOPYOBJ =  dgemm_incopy.o
-DGEMMITCOPYOBJ =  dgemm_itcopy.o
-DGEMMONCOPYOBJ =  dgemm_oncopy.o
-DGEMMOTCOPYOBJ =  dgemm_otcopy.o
+DGEMMINCOPYOBJ =  dgemm_incopy$(TSUFFIX).$(SUFFIX)
+DGEMMITCOPYOBJ =  dgemm_itcopy$(TSUFFIX).$(SUFFIX)
+DGEMMONCOPYOBJ =  dgemm_oncopy$(TSUFFIX).$(SUFFIX)
+DGEMMOTCOPYOBJ =  dgemm_otcopy$(TSUFFIX).$(SUFFIX)
 endif
 
 ifndef CGEMMKERNEL
@@ -132,10 +132,10 @@ CGEMMINCOPY    = ../generic/zgemm_ncopy_1.c
 CGEMMITCOPY    = ../generic/zgemm_tcopy_1.c
 CGEMMONCOPY    = ../generic/zgemm_ncopy_4.c
 CGEMMOTCOPY    = ../generic/zgemm_tcopy_4.c
-CGEMMINCOPYOBJ =  cgemm_incopy.o
-CGEMMITCOPYOBJ =  cgemm_itcopy.o
-CGEMMONCOPYOBJ =  cgemm_oncopy.o
-CGEMMOTCOPYOBJ =  cgemm_otcopy.o
+CGEMMINCOPYOBJ =  cgemm_incopy$(TSUFFIX).$(SUFFIX)
+CGEMMITCOPYOBJ =  cgemm_itcopy$(TSUFFIX).$(SUFFIX)
+CGEMMONCOPYOBJ =  cgemm_oncopy$(TSUFFIX).$(SUFFIX)
+CGEMMOTCOPYOBJ =  cgemm_otcopy$(TSUFFIX).$(SUFFIX)
 endif
 
 ifndef ZGEMMKERNEL
@@ -144,10 +144,10 @@ ZGEMMINCOPY    = ../generic/zgemm_ncopy_1.c
 ZGEMMITCOPY    = ../generic/zgemm_tcopy_1.c
 ZGEMMONCOPY    = ../generic/zgemm_ncopy_4.c
 ZGEMMOTCOPY    = ../generic/zgemm_tcopy_4.c
-ZGEMMINCOPYOBJ =  zgemm_incopy.o
-ZGEMMITCOPYOBJ =  zgemm_itcopy.o
-ZGEMMONCOPYOBJ =  zgemm_oncopy.o
-ZGEMMOTCOPYOBJ =  zgemm_otcopy.o
+ZGEMMINCOPYOBJ =  zgemm_incopy$(TSUFFIX).$(SUFFIX)
+ZGEMMITCOPYOBJ =  zgemm_itcopy$(TSUFFIX).$(SUFFIX)
+ZGEMMONCOPYOBJ =  zgemm_oncopy$(TSUFFIX).$(SUFFIX)
+ZGEMMOTCOPYOBJ =  zgemm_otcopy$(TSUFFIX).$(SUFFIX)
 endif
 
 ifndef SGEMM_BETA
index bb0441a..cda3590 100644 (file)
@@ -3,10 +3,10 @@ DGEMMINCOPY    = dgemm_ncopy_16.S
 DGEMMITCOPY    = dgemm_tcopy_16.S
 DGEMMONCOPY    = dgemm_ncopy_4.S
 DGEMMOTCOPY    = dgemm_tcopy_4.S
-DGEMMINCOPYOBJ = dgemm_incopy.o
-DGEMMITCOPYOBJ = dgemm_itcopy.o
-DGEMMONCOPYOBJ = dgemm_oncopy.o
-DGEMMOTCOPYOBJ = dgemm_otcopy.o
+DGEMMINCOPYOBJ = dgemm_incopy$(TSUFFIX).$(SUFFIX)
+DGEMMITCOPYOBJ = dgemm_itcopy$(TSUFFIX).$(SUFFIX)
+DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX)
+DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX)
 
 DTRSMKERNEL_LN  = ../generic/trsm_kernel_LN.c
 DTRSMKERNEL_LT  = ../generic/trsm_kernel_LT.c
index 105b2f6..b772a6f 100644 (file)
@@ -11,26 +11,26 @@ ZTRMMKERNEL    = ../generic/ztrmmkernel_2x2.c
 SGEMMKERNEL    =  ../generic/gemmkernel_2x2.c
 SGEMMONCOPY    =  ../generic/gemm_ncopy_2.c
 SGEMMOTCOPY    =  ../generic/gemm_tcopy_2.c
-SGEMMONCOPYOBJ =  sgemm_oncopy.o
-SGEMMOTCOPYOBJ =  sgemm_otcopy.o
+SGEMMONCOPYOBJ =  sgemm_oncopy$(TSUFFIX).$(SUFFIX)
+SGEMMOTCOPYOBJ =  sgemm_otcopy$(TSUFFIX).$(SUFFIX)
 
 DGEMMKERNEL    =  ../generic/gemmkernel_2x2.c
 DGEMMONCOPY    = ../generic/gemm_ncopy_2.c
 DGEMMOTCOPY    = ../generic/gemm_tcopy_2.c
-DGEMMONCOPYOBJ = dgemm_oncopy.o
-DGEMMOTCOPYOBJ = dgemm_otcopy.o
+DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX)
+DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX)
 
 CGEMMKERNEL    = ../generic/zgemmkernel_2x2.c
 CGEMMONCOPY    = ../generic/zgemm_ncopy_2.c
 CGEMMOTCOPY    = ../generic/zgemm_tcopy_2.c
-CGEMMONCOPYOBJ =  cgemm_oncopy.o
-CGEMMOTCOPYOBJ =  cgemm_otcopy.o
+CGEMMONCOPYOBJ = cgemm_oncopy$(TSUFFIX).$(SUFFIX)
+CGEMMOTCOPYOBJ = cgemm_otcopy$(TSUFFIX).$(SUFFIX)
 
 ZGEMMKERNEL    = ../generic/zgemmkernel_2x2.c
 ZGEMMONCOPY    = ../generic/zgemm_ncopy_2.c
 ZGEMMOTCOPY    = ../generic/zgemm_tcopy_2.c
-ZGEMMONCOPYOBJ =  zgemm_oncopy.o
-ZGEMMOTCOPYOBJ =  zgemm_otcopy.o
+ZGEMMONCOPYOBJ = zgemm_oncopy$(TSUFFIX).$(SUFFIX)
+ZGEMMOTCOPYOBJ = zgemm_otcopy$(TSUFFIX).$(SUFFIX)
 
 STRSMKERNEL_LN  =  ../generic/trsm_kernel_LN.c
 STRSMKERNEL_LT  =  ../generic/trsm_kernel_LT.c
index 9f5d34d..8bcd31e 100644 (file)
@@ -1046,6 +1046,34 @@ static void init_parameter(void) {
 #endif
 }
 #else // (ARCH_MIPS64)
+#if (ARCH_LOONGARCH64)
+static void init_parameter(void) {
+
+#ifdef BUILD_BFLOAT16
+  TABLE_NAME.sbgemm_p = SBGEMM_DEFAULT_P;
+#endif
+  TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
+  TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
+  TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
+  TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
+
+#ifdef BUILD_BFLOAT16
+  TABLE_NAME.sbgemm_r = SBGEMM_DEFAULT_R;
+#endif
+  TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R;
+  TABLE_NAME.dgemm_r = DGEMM_DEFAULT_R;
+  TABLE_NAME.cgemm_r = CGEMM_DEFAULT_R;
+  TABLE_NAME.zgemm_r = ZGEMM_DEFAULT_R;
+
+#ifdef BUILD_BFLOAT16
+  TABLE_NAME.sbgemm_q = SBGEMM_DEFAULT_Q;
+#endif
+  TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
+  TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
+  TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
+  TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
+}
+#else // (ARCH_LOONGARCH64)
 #if (ARCH_POWER)
 static void init_parameter(void) {
 
@@ -1899,5 +1927,6 @@ static void init_parameter(void) {
 }
 #endif //POWER
 #endif //ZARCH
+#endif //(ARCH_LOONGARCH64)
 #endif //(ARCH_MIPS64)
 #endif //(ARCH_ARM64)
diff --git a/param.h b/param.h
index eb52ef9..dc02147 100644 (file)
--- a/param.h
+++ b/param.h
@@ -2857,26 +2857,20 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #define ZGEMM_DEFAULT_UNROLL_M 1
 #define XGEMM_DEFAULT_UNROLL_M 1
 
-#define SGEMM_DEFAULT_P sgemm_p
+#define SGEMM_DEFAULT_P 512
 #define DGEMM_DEFAULT_P 32
-#define QGEMM_DEFAULT_P qgemm_p
-#define CGEMM_DEFAULT_P cgemm_p
-#define ZGEMM_DEFAULT_P zgemm_p
-#define XGEMM_DEFAULT_P xgemm_p
+#define CGEMM_DEFAULT_P 128
+#define ZGEMM_DEFAULT_P 128
 
-#define SGEMM_DEFAULT_R sgemm_r
+#define SGEMM_DEFAULT_R 12288
 #define DGEMM_DEFAULT_R 858
-#define QGEMM_DEFAULT_R qgemm_r
-#define CGEMM_DEFAULT_R cgemm_r
-#define ZGEMM_DEFAULT_R zgemm_r
-#define XGEMM_DEFAULT_R xgemm_r
+#define CGEMM_DEFAULT_R 4096
+#define ZGEMM_DEFAULT_R 4096
 
 #define SGEMM_DEFAULT_Q 128
 #define DGEMM_DEFAULT_Q 152
-#define QGEMM_DEFAULT_Q 128
 #define CGEMM_DEFAULT_Q 128
 #define ZGEMM_DEFAULT_Q 128
-#define XGEMM_DEFAULT_Q 128
 
 #define SYMV_P  16
 #endif
@@ -3795,6 +3789,21 @@ Until then, just keep it different than DGEMM_DEFAULT_UNROLL_N to keep copy rout
 #define DGEMM_DEFAULT_R  8192
 #define CGEMM_DEFAULT_R  4096
 #define ZGEMM_DEFAULT_R  4096
+#elif defined(ARCH_LOONGARCH64)
+#define SGEMM_DEFAULT_P 128
+#define DGEMM_DEFAULT_P 128
+#define CGEMM_DEFAULT_P 96
+#define ZGEMM_DEFAULT_P 64
+
+#define SGEMM_DEFAULT_Q 240
+#define DGEMM_DEFAULT_Q 120
+#define CGEMM_DEFAULT_Q 120
+#define ZGEMM_DEFAULT_Q 120
+
+#define SGEMM_DEFAULT_R 12288
+#define DGEMM_DEFAULT_R 8192
+#define CGEMM_DEFAULT_R 4096
+#define ZGEMM_DEFAULT_R 4096
 #else
 #define SGEMM_DEFAULT_P sgemm_p
 #define DGEMM_DEFAULT_P dgemm_p