DYNAMIC_CORE = LOONGSON3R3 LOONGSON3R4
endif
+ifeq ($(ARCH), loongarch64)
+DYNAMIC_CORE = LOONGSON3R5 LOONGSON2K1000 LOONGSONGENERIC
+endif
+
ifeq ($(ARCH), zarch)
DYNAMIC_CORE = ZARCH_GENERIC
ifeq ($(ARCH),mips64)
COMMONOBJS += dynamic_mips64.$(SUFFIX)
else
+ifeq ($(ARCH),loongarch64)
+COMMONOBJS += dynamic_loongarch64.$(SUFFIX)
+else
COMMONOBJS += dynamic.$(SUFFIX)
endif
endif
endif
endif
+endif
else
COMMONOBJS += parameter.$(SUFFIX)
endif
ifeq ($(ARCH),mips64)
HPLOBJS = memory.$(SUFFIX) xerbla.$(SUFFIX) dynamic_mips64.$(SUFFIX)
else
+ifeq ($(ARCH),loongarch64)
+HPLOBJS = memory.$(SUFFIX) xerbla.$(SUFFIX) dynamic_loongarch64.$(SUFFIX)
+else
HPLOBJS = memory.$(SUFFIX) xerbla.$(SUFFIX) dynamic.$(SUFFIX)
endif
endif
endif
endif
+endif
else
HPLOBJS = memory.$(SUFFIX) xerbla.$(SUFFIX) parameter.$(SUFFIX)
endif
--- /dev/null
+/*******************************************************************************
+Copyright (c) 2022, The OpenBLAS Project
+All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in
+the documentation and/or other materials provided with the
+distribution.
+3. Neither the name of the OpenBLAS project nor the names of
+its contributors may be used to endorse or promote products
+derived from this software without specific prior written permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
+USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*******************************************************************************/
+
+#include "common.h"
+
+extern gotoblas_t gotoblas_LOONGSON3R5;
+extern gotoblas_t gotoblas_LOONGSON2K1000;
+extern gotoblas_t gotoblas_LOONGSONGENERIC;
+
+extern void openblas_warning(int verbose, const char * msg);
+
+#define NUM_CORETYPES 3
+
+static char *corename[] = {
+ "loongson3r5",
+ "loongson2k1000",
+ "loongsongeneric",
+ "unknown"
+};
+
+char *gotoblas_corename(void) {
+ if (gotoblas == &gotoblas_LOONGSON3R5) return corename[0];
+ if (gotoblas == &gotoblas_LOONGSON2K1000) return corename[1];
+ if (gotoblas == &gotoblas_LOONGSONGENERIC) return corename[2];
+ return corename[NUM_CORETYPES];
+}
+
+static gotoblas_t *force_coretype(char *coretype) {
+ int i;
+ int found = -1;
+ char message[128];
+
+ for ( i=0 ; i < NUM_CORETYPES; i++)
+ {
+ if (!strncasecmp(coretype, corename[i], 20))
+ {
+ found = i;
+ break;
+ }
+ }
+
+ switch (found)
+ {
+ case 0: return (&gotoblas_LOONGSON3R5);
+ case 1: return (&gotoblas_LOONGSON2K1000);
+ case 2: return (&gotoblas_LOONGSONGENERIC);
+ }
+ snprintf(message, 128, "Core not found: %s\n", coretype);
+ openblas_warning(1, message);
+ return NULL;
+}
+
+#define LASX_MASK 1<<7
+#define LSX_MASK 1<<6
+#define LOONGARCH_CFG2 0x02
+
+static gotoblas_t *get_coretype(void) {
+ int ret = 0;
+ __asm__ volatile (
+ "cpucfg %0, %1 \n\t"
+ : "+&r"(ret)
+ : "r"(LOONGARCH_CFG2)
+ );
+
+ if (ret & LASX_MASK)
+ return &gotoblas_LOONGSON3R5;
+ else if (ret & LSX_MASK)
+ return &gotoblas_LOONGSON2K1000;
+ else
+ return &gotoblas_LOONGSONGENERIC;
+}
+
+void gotoblas_dynamic_init(void) {
+ char coremsg[128];
+ char coren[22];
+ char *p;
+
+ if (gotoblas) return;
+
+ p = getenv("OPENBLAS_CORETYPE");
+ if ( p )
+ {
+ gotoblas = force_coretype(p);
+ }
+ else
+ {
+ gotoblas = get_coretype();
+ }
+
+ if (gotoblas && gotoblas->init) {
+ strncpy(coren, gotoblas_corename(), 20);
+ sprintf(coremsg, "Core: %s\n", coren);
+ openblas_warning(2, coremsg);
+ gotoblas -> init();
+ } else {
+ openblas_warning(0, "OpenBLAS : Architecture Initialization failed. No initialization function found.\n");
+ exit(1);
+ }
+
+}
+
+void gotoblas_dynamic_quit(void) {
+ gotoblas = NULL;
+}
SGEMMITCOPY = ../generic/gemm_tcopy_2.c
SGEMMONCOPY = ../generic/gemm_ncopy_8.c
SGEMMOTCOPY = ../generic/gemm_tcopy_8.c
-SGEMMINCOPYOBJ = sgemm_incopy.o
-SGEMMITCOPYOBJ = sgemm_itcopy.o
-SGEMMONCOPYOBJ = sgemm_oncopy.o
-SGEMMOTCOPYOBJ = sgemm_otcopy.o
+SGEMMINCOPYOBJ = sgemm_incopy$(TSUFFIX).$(SUFFIX)
+SGEMMITCOPYOBJ = sgemm_itcopy$(TSUFFIX).$(SUFFIX)
+SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX)
+SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX)
endif
ifndef DGEMMKERNEL
DGEMMITCOPY = ../generic/gemm_tcopy_2.c
DGEMMONCOPY = ../generic/gemm_ncopy_8.c
DGEMMOTCOPY = ../generic/gemm_tcopy_8.c
-DGEMMINCOPYOBJ = dgemm_incopy.o
-DGEMMITCOPYOBJ = dgemm_itcopy.o
-DGEMMONCOPYOBJ = dgemm_oncopy.o
-DGEMMOTCOPYOBJ = dgemm_otcopy.o
+DGEMMINCOPYOBJ = dgemm_incopy$(TSUFFIX).$(SUFFIX)
+DGEMMITCOPYOBJ = dgemm_itcopy$(TSUFFIX).$(SUFFIX)
+DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX)
+DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX)
endif
ifndef CGEMMKERNEL
CGEMMITCOPY = ../generic/zgemm_tcopy_1.c
CGEMMONCOPY = ../generic/zgemm_ncopy_4.c
CGEMMOTCOPY = ../generic/zgemm_tcopy_4.c
-CGEMMINCOPYOBJ = cgemm_incopy.o
-CGEMMITCOPYOBJ = cgemm_itcopy.o
-CGEMMONCOPYOBJ = cgemm_oncopy.o
-CGEMMOTCOPYOBJ = cgemm_otcopy.o
+CGEMMINCOPYOBJ = cgemm_incopy$(TSUFFIX).$(SUFFIX)
+CGEMMITCOPYOBJ = cgemm_itcopy$(TSUFFIX).$(SUFFIX)
+CGEMMONCOPYOBJ = cgemm_oncopy$(TSUFFIX).$(SUFFIX)
+CGEMMOTCOPYOBJ = cgemm_otcopy$(TSUFFIX).$(SUFFIX)
endif
ifndef ZGEMMKERNEL
ZGEMMITCOPY = ../generic/zgemm_tcopy_1.c
ZGEMMONCOPY = ../generic/zgemm_ncopy_4.c
ZGEMMOTCOPY = ../generic/zgemm_tcopy_4.c
-ZGEMMINCOPYOBJ = zgemm_incopy.o
-ZGEMMITCOPYOBJ = zgemm_itcopy.o
-ZGEMMONCOPYOBJ = zgemm_oncopy.o
-ZGEMMOTCOPYOBJ = zgemm_otcopy.o
+ZGEMMINCOPYOBJ = zgemm_incopy$(TSUFFIX).$(SUFFIX)
+ZGEMMITCOPYOBJ = zgemm_itcopy$(TSUFFIX).$(SUFFIX)
+ZGEMMONCOPYOBJ = zgemm_oncopy$(TSUFFIX).$(SUFFIX)
+ZGEMMOTCOPYOBJ = zgemm_otcopy$(TSUFFIX).$(SUFFIX)
endif
ifndef SGEMM_BETA
DGEMMITCOPY = dgemm_tcopy_16.S
DGEMMONCOPY = dgemm_ncopy_4.S
DGEMMOTCOPY = dgemm_tcopy_4.S
-DGEMMINCOPYOBJ = dgemm_incopy.o
-DGEMMITCOPYOBJ = dgemm_itcopy.o
-DGEMMONCOPYOBJ = dgemm_oncopy.o
-DGEMMOTCOPYOBJ = dgemm_otcopy.o
+DGEMMINCOPYOBJ = dgemm_incopy$(TSUFFIX).$(SUFFIX)
+DGEMMITCOPYOBJ = dgemm_itcopy$(TSUFFIX).$(SUFFIX)
+DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX)
+DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX)
DTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
DTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
SGEMMKERNEL = ../generic/gemmkernel_2x2.c
SGEMMONCOPY = ../generic/gemm_ncopy_2.c
SGEMMOTCOPY = ../generic/gemm_tcopy_2.c
-SGEMMONCOPYOBJ = sgemm_oncopy.o
-SGEMMOTCOPYOBJ = sgemm_otcopy.o
+SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX)
+SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX)
DGEMMKERNEL = ../generic/gemmkernel_2x2.c
DGEMMONCOPY = ../generic/gemm_ncopy_2.c
DGEMMOTCOPY = ../generic/gemm_tcopy_2.c
-DGEMMONCOPYOBJ = dgemm_oncopy.o
-DGEMMOTCOPYOBJ = dgemm_otcopy.o
+DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX)
+DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX)
CGEMMKERNEL = ../generic/zgemmkernel_2x2.c
CGEMMONCOPY = ../generic/zgemm_ncopy_2.c
CGEMMOTCOPY = ../generic/zgemm_tcopy_2.c
-CGEMMONCOPYOBJ = cgemm_oncopy.o
-CGEMMOTCOPYOBJ = cgemm_otcopy.o
+CGEMMONCOPYOBJ = cgemm_oncopy$(TSUFFIX).$(SUFFIX)
+CGEMMOTCOPYOBJ = cgemm_otcopy$(TSUFFIX).$(SUFFIX)
ZGEMMKERNEL = ../generic/zgemmkernel_2x2.c
ZGEMMONCOPY = ../generic/zgemm_ncopy_2.c
ZGEMMOTCOPY = ../generic/zgemm_tcopy_2.c
-ZGEMMONCOPYOBJ = zgemm_oncopy.o
-ZGEMMOTCOPYOBJ = zgemm_otcopy.o
+ZGEMMONCOPYOBJ = zgemm_oncopy$(TSUFFIX).$(SUFFIX)
+ZGEMMOTCOPYOBJ = zgemm_otcopy$(TSUFFIX).$(SUFFIX)
STRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
STRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
#endif
}
#else // (ARCH_MIPS64)
+#if (ARCH_LOONGARCH64)
+static void init_parameter(void) {
+
+#ifdef BUILD_BFLOAT16
+ TABLE_NAME.sbgemm_p = SBGEMM_DEFAULT_P;
+#endif
+ TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
+ TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
+ TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
+ TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
+
+#ifdef BUILD_BFLOAT16
+ TABLE_NAME.sbgemm_r = SBGEMM_DEFAULT_R;
+#endif
+ TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R;
+ TABLE_NAME.dgemm_r = DGEMM_DEFAULT_R;
+ TABLE_NAME.cgemm_r = CGEMM_DEFAULT_R;
+ TABLE_NAME.zgemm_r = ZGEMM_DEFAULT_R;
+
+#ifdef BUILD_BFLOAT16
+ TABLE_NAME.sbgemm_q = SBGEMM_DEFAULT_Q;
+#endif
+ TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
+ TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
+ TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
+ TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
+}
+#else // (ARCH_LOONGARCH64)
#if (ARCH_POWER)
static void init_parameter(void) {
}
#endif //POWER
#endif //ZARCH
+#endif //(ARCH_LOONGARCH64)
#endif //(ARCH_MIPS64)
#endif //(ARCH_ARM64)
#define ZGEMM_DEFAULT_UNROLL_M 1
#define XGEMM_DEFAULT_UNROLL_M 1
-#define SGEMM_DEFAULT_P sgemm_p
+#define SGEMM_DEFAULT_P 512
#define DGEMM_DEFAULT_P 32
-#define QGEMM_DEFAULT_P qgemm_p
-#define CGEMM_DEFAULT_P cgemm_p
-#define ZGEMM_DEFAULT_P zgemm_p
-#define XGEMM_DEFAULT_P xgemm_p
+#define CGEMM_DEFAULT_P 128
+#define ZGEMM_DEFAULT_P 128
-#define SGEMM_DEFAULT_R sgemm_r
+#define SGEMM_DEFAULT_R 12288
#define DGEMM_DEFAULT_R 858
-#define QGEMM_DEFAULT_R qgemm_r
-#define CGEMM_DEFAULT_R cgemm_r
-#define ZGEMM_DEFAULT_R zgemm_r
-#define XGEMM_DEFAULT_R xgemm_r
+#define CGEMM_DEFAULT_R 4096
+#define ZGEMM_DEFAULT_R 4096
#define SGEMM_DEFAULT_Q 128
#define DGEMM_DEFAULT_Q 152
-#define QGEMM_DEFAULT_Q 128
#define CGEMM_DEFAULT_Q 128
#define ZGEMM_DEFAULT_Q 128
-#define XGEMM_DEFAULT_Q 128
#define SYMV_P 16
#endif
#define DGEMM_DEFAULT_R 8192
#define CGEMM_DEFAULT_R 4096
#define ZGEMM_DEFAULT_R 4096
+#elif defined(ARCH_LOONGARCH64)
+#define SGEMM_DEFAULT_P 128
+#define DGEMM_DEFAULT_P 128
+#define CGEMM_DEFAULT_P 96
+#define ZGEMM_DEFAULT_P 64
+
+#define SGEMM_DEFAULT_Q 240
+#define DGEMM_DEFAULT_Q 120
+#define CGEMM_DEFAULT_Q 120
+#define ZGEMM_DEFAULT_Q 120
+
+#define SGEMM_DEFAULT_R 12288
+#define DGEMM_DEFAULT_R 8192
+#define CGEMM_DEFAULT_R 4096
+#define ZGEMM_DEFAULT_R 4096
#else
#define SGEMM_DEFAULT_P sgemm_p
#define DGEMM_DEFAULT_P dgemm_p