Merge branch 'openblas-open-910' of git://github.com/damonyu1989/OpenBLAS into damonyu1989-openblas-open-910
INCLUDED = 1
ifndef TOPDIR
-TOPDIR = .
+TOPDIR = .
endif
# If ARCH is not set, we use the host system's architecture for getarch compile options.
ifdef TARGET
GETARCH_FLAGS := -DFORCE_$(TARGET)
GETARCH_FLAGS += -DUSER_TARGET
+ifeq ($(TARGET), GENERIC)
+ifeq ($(DYNAMIC_ARCH), 1)
+override NO_EXPRECISION=1
+export NO_EXPRECiSION
+endif
+endif
endif
# Force fallbacks for 32bit
ifndef TARGET_CORE
include $(TOPDIR)/Makefile.conf
else
+HAVE_NEON=
+HAVE_VFP=
+HAVE_VFPV3=
+HAVE_VFPV4=
+HAVE_MMX=
+HAVE_SSE=
+HAVE_SSE2=
+HAVE_SSE3=
+HAVE_SSSE3=
+HAVE_SSE4_1=
+HAVE_SSE4_2=
+HAVE_SSE4A=
+HAVE_SSE5=
+HAVE_AVX=
+HAVE_AVX2=
+HAVE_FMA3=
include $(TOPDIR)/Makefile_kernel.conf
endif
else
GCCDUMPVERSION_PARAM := -dumpversion
endif
+GCCMINORVERSIONGTEQ1 := $(shell expr `$(CC) $(GCCDUMPVERSION_PARAM) | cut -f2 -d.` \>= 1)
GCCMINORVERSIONGTEQ2 := $(shell expr `$(CC) $(GCCDUMPVERSION_PARAM) | cut -f2 -d.` \>= 2)
GCCMINORVERSIONGTEQ7 := $(shell expr `$(CC) $(GCCDUMPVERSION_PARAM) | cut -f2 -d.` \>= 7)
endif
ifneq ($(C_COMPILER), GCC)
DYNAMIC_CORE += POWER9
DYNAMIC_CORE += POWER10
+CCOMMON_OPT += -DHAVE_P10_SUPPORT
endif
ifeq ($(C_COMPILER), GCC)
ifeq ($(GCCVERSIONGT5), 1)
else
$(info, OpenBLAS: Your gcc version is too old to build the POWER9 kernels.)
endif
-ifeq ($(GCCVERSIONGTEQ11), 1)
+LDVERSIONGTEQ35 := $(shell expr `ld --version | head -1 | cut -f2 -d "." | cut -f1 -d "-"` >= 35)
+ifeq ($(GCCVERSIONGTEQ11)$(LDVERSIONGTEQ35), 11)
DYNAMIC_CORE += POWER10
+CCOMMON_OPT += -DHAVE_P10_SUPPORT
else ifeq ($(GCCVERSIONGTEQ10), 1)
-ifeq ($(GCCMINORVERSIONGTEQ2), 1)
+ifeq ($(GCCMINORVERSIONGTEQ2)$(LDVERSIONGTEQ35), 11)
DYNAMIC_CORE += POWER10
+CCOMMON_OPT += -DHAVE_P10_SUPPORT
endif
else
$(info, OpenBLAS: Your gcc version is too old to build the POWER10 kernels.)
endif
endif
-
+ ifeq ($(ARCH), riscv64)
+ NO_BINARY_MODE = 1
+ BINARY_DEFINED = 1
+ endif
#
ifndef BINARY_DEFINED
ifneq ($(OSNAME), AIX)
ifdef BINARY64
+ifneq ($(ARCH), riscv64)
CCOMMON_OPT += -m64
+endif
else
CCOMMON_OPT += -m32
endif
FCOMMON_OPT += -Mrecursive -Kieee
ifeq ($(OSNAME), Linux)
ifeq ($(ARCH), x86_64)
-FLANG_VENDOR := $(shell expr `$(FC) --version|cut -f 1 -d "."|head -1`)
+FLANG_VENDOR := $(shell `$(FC) --version|cut -f 1 -d "."|head -1`)
ifeq ($(FLANG_VENDOR),AOCC)
FCOMMON_OPT += -fno-unroll-loops
endif
else
ifdef BINARY64
ifneq ($(OSNAME), AIX)
+ifneq ($(ARCH), riscv64)
FCOMMON_OPT += -m64
endif
+endif
ifdef INTERFACE64
ifneq ($(INTERFACE64), 0)
FCOMMON_OPT += -fdefault-integer-8
export HAVE_SSE4A
export HAVE_SSE5
export HAVE_AVX
+export HAVE_AVX2
+export HAVE_FMA3
export HAVE_VFP
export HAVE_VFPV3
export HAVE_VFPV4
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
- 3. Neither the name of the OpenBLAS project nor the names of
- its contributors may be used to endorse or promote products
- derived from this software without specific prior written
+ 3. Neither the name of the OpenBLAS project nor the names of
+ its contributors may be used to endorse or promote products
+ derived from this software without specific prior written
permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
#define MB __sync_synchronize()
#define WMB __sync_synchronize()
+ #define RMB __sync_synchronize()
#define INLINE inline
#define BUFFER_SIZE ( 32 << 20)
#define SEEK_ADDRESS
+ #if defined(C910V)
+ #include <riscv-vector.h>
+ #endif
+
#endif
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
- 3. Neither the name of the OpenBLAS project nor the names of
- its contributors may be used to endorse or promote products
- derived from this software without specific prior written
+ 3. Neither the name of the OpenBLAS project nor the names of
+ its contributors may be used to endorse or promote products
+ derived from this software without specific prior written
permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
/*********************************************************************/
#define CPU_UNKNOWN 0
+ #define CPU_C910V 1
static char *cpuname[] = {
"UNKOWN",
+ "C910V"
};
int detect(void){
"-DL2_SIZE=262144 -DL2_LINESIZE=64 " \
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \
"-DHAVE_CMOV -DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 -DHAVE_SSE3 -DHAVE_SSSE3 -DHAVE_SSE4_1 -DHAVE_SSE4_2 -DHAVE_AVX " \
- "-DFMA3"
+ "-DHAVE_AVX2 -DHAVE_FMA3 -DFMA3"
#define LIBNAME "haswell"
#define CORENAME "HASWELL"
#endif
"-DL2_SIZE=262144 -DL2_LINESIZE=64 " \
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \
"-DHAVE_CMOV -DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 -DHAVE_SSE3 -DHAVE_SSSE3 -DHAVE_SSE4_1 -DHAVE_SSE4_2 -DHAVE_AVX " \
- "-DFMA3"
+ "-DHAVE_AVX2 -DHAVE_FMA3 -DFMA3"
#define LIBNAME "haswell"
#define CORENAME "HASWELL"
#else
"-DL2_SIZE=262144 -DL2_LINESIZE=64 " \
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \
"-DHAVE_CMOV -DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 -DHAVE_SSE3 -DHAVE_SSSE3 -DHAVE_SSE4_1 -DHAVE_SSE4_2 -DHAVE_AVX " \
- "-DFMA3 -DHAVE_AVX512VL -march=skylake-avx512"
+ "-DHAVE_AVX2 -DHAVE_FMA3 -DFMA3 -DHAVE_AVX512VL -march=skylake-avx512"
#define LIBNAME "skylakex"
#define CORENAME "SKYLAKEX"
#endif
"-DL2_SIZE=262144 -DL2_LINESIZE=64 " \
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \
"-DHAVE_CMOV -DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 -DHAVE_SSE3 -DHAVE_SSSE3 -DHAVE_SSE4_1 -DHAVE_SSE4_2 -DHAVE_AVX " \
- "-DFMA3"
+ "-DHAVE_AVX2 -DHAVE_FMA3 -DFMA3"
#define LIBNAME "haswell"
#define CORENAME "HASWELL"
#else
"-DL2_SIZE=262144 -DL2_LINESIZE=64 " \
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \
"-DHAVE_CMOV -DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 -DHAVE_SSE3 -DHAVE_SSSE3 -DHAVE_SSE4_1 -DHAVE_SSE4_2 -DHAVE_AVX " \
- "-DFMA3 -DHAVE_AVX512VL -DHAVE_AVX512BF16 -march=cooperlake"
+ "-DHAVE_AVX2 -DHAVE_FMA3 -DFMA3 -DHAVE_AVX512VL -DHAVE_AVX512BF16 -march=cooperlake"
#define LIBNAME "cooperlake"
#define CORENAME "COOPERLAKE"
#endif
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \
"-DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 -DHAVE_SSE3 -DHAVE_SSE4_1 -DHAVE_SSE4_2 " \
"-DHAVE_SSE4A -DHAVE_MISALIGNSSE -DHAVE_128BITFPU -DHAVE_FASTMOVU -DHAVE_CFLUSH " \
- "-DHAVE_AVX -DHAVE_FMA3 -DFMA3"
+ "-DHAVE_AVX -DHAVE_AVX2 -DHAVE_FMA3 -DFMA3"
#define LIBNAME "zen"
#define CORENAME "ZEN"
#endif
#else
#endif
+#ifdef FORCE_VORTEX
+#define FORCE
+#define ARCHITECTURE "ARM64"
+#define SUBARCHITECTURE "VORTEX"
+#define SUBDIRNAME "arm64"
+#define ARCHCONFIG "-DVORTEX " \
+ "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 " \
+ "-DL2_SIZE=262144 -DL2_LINESIZE=64 " \
+ "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=32 " \
+ "-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8"
+#define LIBNAME "vortex"
+#define CORENAME "VORTEX"
+#endif
+
#ifdef FORCE_ZARCH_GENERIC
#define FORCE
#define ARCHITECTURE "ZARCH"
#define CORENAME "Z14"
#endif
+ #ifdef FORCE_C910V
+ #define FORCE
+ #define ARCHITECTURE "RISCV64"
+ #define SUBARCHITECTURE "C910V"
+ #define SUBDIRNAME "riscv64"
+ #define ARCHCONFIG "-DC910V " \
+ "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=32 " \
+ "-DL2_SIZE=1048576 -DL2_LINESIZE=32 " \
+ "-DDTB_DEFAULT_ENTRIES=128 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=4 "
+ #define LIBNAME "c910v"
+ #define CORENAME "C910V"
+ #else
+ #endif
+
+
#ifndef FORCE
#ifdef USER_TARGET
printf("NUM_CORES=%d\n", get_num_cores());
-#if defined(__arm__) && !defined(FORCE)
+#if defined(__arm__)
+#if !defined(FORCE)
+ fprintf(stderr,"get features!\n");
get_features();
+#else
+ fprintf(stderr,"split archconfig!\n");
+ sprintf(buffer, "%s", ARCHCONFIG);
+
+ p = &buffer[0];
+
+ while (*p) {
+ if ((*p == '-') && (*(p + 1) == 'D')) {
+ p += 2;
+ if (*p != 'H') {
+ while( (*p != ' ') && (*p != '-') && (*p != '\0') && (*p != '\n')) {p++; }
+ if (*p == '-') continue;
+ }
+ while ((*p != ' ') && (*p != '\0')) {
+
+ if (*p == '=') {
+ printf("=");
+ p ++;
+ while ((*p != ' ') && (*p != '\0')) {
+ printf("%c", *p);
+ p ++;
+ }
+ } else {
+ printf("%c", *p);
+ p ++;
+ if ((*p == ' ') || (*p =='\0')) printf("=1\n");
+ }
+ }
+ } else p ++;
+ }
+#endif
#endif
#define SBGEMM_DEFAULT_P 832
#define SBGEMM_DEFAULT_Q 1026
#define SBGEMM_DEFAULT_R 4096
+#undef DGEMM_DEFAULT_UNROLL_M
+#undef DGEMM_DEFAULT_UNROLL_N
+#define DGEMM_DEFAULT_UNROLL_M 8
+#define DGEMM_DEFAULT_UNROLL_N 8
#endif
#if defined(SPARC) && defined(V7)
#endif
+ #ifdef C910V
+ #define GEMM_DEFAULT_OFFSET_A 0
+ #define GEMM_DEFAULT_OFFSET_B 0
+ #define GEMM_DEFAULT_ALIGN 0x03fffUL
+
+ #define SGEMM_DEFAULT_UNROLL_M 16
+ #define SGEMM_DEFAULT_UNROLL_N 4
+
+ #define DGEMM_DEFAULT_UNROLL_M 8
+ #define DGEMM_DEFAULT_UNROLL_N 4
+
+ #define CGEMM_DEFAULT_UNROLL_M 2
+ #define CGEMM_DEFAULT_UNROLL_N 2
+
+ #define ZGEMM_DEFAULT_UNROLL_M 2
+ #define ZGEMM_DEFAULT_UNROLL_N 2
+
+ #define SGEMM_DEFAULT_P 160
+ #define DGEMM_DEFAULT_P 160
+ #define CGEMM_DEFAULT_P 96
+ #define ZGEMM_DEFAULT_P 64
+
+ #define SGEMM_DEFAULT_Q 240
+ #define DGEMM_DEFAULT_Q 128
+ #define CGEMM_DEFAULT_Q 120
+ #define ZGEMM_DEFAULT_Q 120
+
+ #define SGEMM_DEFAULT_R 12288
+ #define DGEMM_DEFAULT_R 8192
+ #define CGEMM_DEFAULT_R 4096
+ #define ZGEMM_DEFAULT_R 4096
+
+ #define SYMV_P 16
+
+ #define GEMM_DEFAULT_OFFSET_A 0
+ #define GEMM_DEFAULT_OFFSET_B 0
+
+ #endif
+
#ifdef ARMV7
#define SNUMOPT 2
#define DNUMOPT 2