Disable CBLAS and LAPACK.
set(OpenBLAS_LIBNAME openblas)
#######
-option(BUILD_WITHOUT_LAPACK "Without LAPACK and LAPACKE (Only BLAS and CBLAS)" ON)
+option(BUILD_WITHOUT_LAPACK "Without LAPACK and LAPACKE (Only BLAS or CBLAS)" ON)
+option(BUILD_WITHOUT_CBLAS "Without CBLAS" ON)
option(BUILD_DEBUG "Build Debug Version" OFF)
#######
if(BUILD_WITHOUT_LAPACK)
set(NO_LAPACK 1)
+set(NO_LAPACKE 1)
endif()
if(BUILD_DEBUG)
else()
set(CMAKE_BUILD_TYPE Release)
endif()
+
+if(BUILD_WITHOUT_CBLAS)
+set(NO_CBLAS 1)
+endif()
+
#######
set(SUBDIRS ${BLASDIRS})
if (NOT NO_LAPACK)
- message ("error 1")
list(APPEND SUBDIRS lapack)
endif ()
# Can't just use lapack-netlib's CMake files, since they are set up to search for BLAS, build and install a binary. We just want to build a couple of lib files out of lapack and lapacke.
# Not using add_subdirectory here because lapack-netlib already has its own CMakeLists.txt. Instead include a cmake script with the sources we want.
if (NOT NOFORTRAN AND NOT NO_LAPACK)
- message ("error 2")
include("${CMAKE_SOURCE_DIR}/cmake/lapack.cmake")
if (NOT NO_LAPACKE)
include("${CMAKE_SOURCE_DIR}/cmake/lapacke.cmake")
endif ()
endif ()
+#Only generate .def for dll on MSVC
+if(MSVC)
+set(OpenBLAS_DEF_FILE "${PROJECT_BINARY_DIR}/openblas.def")
+endif()
+
# add objects to the openblas lib
-add_library(${OpenBLAS_LIBNAME} SHARED ${LA_SOURCES} ${LAPACKE_SOURCES} ${TARGET_OBJS})
+add_library(${OpenBLAS_LIBNAME} SHARED ${LA_SOURCES} ${LAPACKE_SOURCES} ${TARGET_OBJS} ${PROJECT_BINARY_DIR}/openblas.def)
+
+include("${CMAKE_SOURCE_DIR}/cmake/export.cmake")
#only build shared library for MSVC
if(NOT MSVC)
--- /dev/null
+
+#Only generate .def for dll on MSVC
+if(MSVC)
+
+set_source_files_properties(${OpenBLAS_DEF_FILE} PROPERTIES GENERATED 1)
+
+if (NOT DEFINED ARCH)
+ set(ARCH_IN "x86_64")
+else()
+ set(ARCH_IN ${ARCH})
+endif()
+
+if (${CORE} STREQUAL "generic")
+ set(ARCH_IN "GENERIC")
+endif ()
+
+if (NOT DEFINED EXPRECISION)
+ set(EXPRECISION_IN 0)
+else()
+ set(EXPRECISION_IN ${EXPRECISION})
+endif()
+
+if (NOT DEFINED NO_CBLAS)
+ set(NO_CBLAS_IN 0)
+else()
+ set(NO_CBLAS_IN ${NO_CBLAS})
+endif()
+
+if (NOT DEFINED NO_LAPACK)
+ set(NO_LAPACK_IN 0)
+else()
+ set(NO_LAPACK_IN ${NO_LAPACK})
+endif()
+
+if (NOT DEFINED NO_LAPACKE)
+ set(NO_LAPACKE_IN 0)
+else()
+ set(NO_LAPACKE_IN ${NO_LAPACKE})
+endif()
+
+if (NOT DEFINED NEED2UNDERSCORES)
+ set(NEED2UNDERSCORES_IN 0)
+else()
+ set(NEED2UNDERSCORES_IN ${NEED2UNDERSCORES})
+endif()
+
+if (NOT DEFINED ONLY_CBLAS)
+ set(ONLY_CBLAS_IN 0)
+else()
+ set(ONLY_CBLAS_IN ${ONLY_CBLAS})
+endif()
+
+add_custom_command(
+ TARGET ${OpenBLAS_LIBNAME} PRE_LINK
+ COMMAND perl
+ ARGS "${CMAKE_SOURCE_DIR}/exports/gensymbol" "win2k" "${ARCH_IN}" "dummy" "${EXPRECISION_IN}" "${NO_CBLAS_IN}" "${NO_LAPACK_IN}" "${NO_LAPACKE_IN}" "${NEED2UNDERSCORES_IN}" "${ONLY_CBLAS_IN}" "${SYMBOLPREFIX}" "${SYMBOLSUFFIX}" > "${PROJECT_BINARY_DIR}/openblas.def"
+ COMMENT "Create openblas.def file"
+ VERBATIM)
+
+endif()
\ No newline at end of file
include(CMakeForceCompiler)
CMAKE_FORCE_Fortran_COMPILER(gfortran GNU)
endif ()
+
+if (NOT NO_LAPACK)
enable_language(Fortran)
+endif()
if (NOT ONLY_CBLAS)
# N.B. f_check is not cross-platform, so instead try to use CMake variables
set(QGEMVTKERNEL gemv_t.S)
set(XGEMVNKERNEL zgemv_n.S)
set(XGEMVTKERNEL zgemv_t.S)
- set(SCABS_KERNEL cabs.S)
- set(DCABS_KERNEL cabs.S)
- set(QCABS_KERNEL cabs.S)
- set(LSAME_KERNEL lsame.S)
+ set(SCABS_KERNEL ../generic/cabs.c)
+ set(DCABS_KERNEL ../generic/cabs.S)
+ set(QCABS_KERNEL ../generic/cabs.S)
+ set(LSAME_KERNEL ../generic/lsame.c)
set(SAXPBYKERNEL ../arm/axpby.c)
set(DAXPBYKERNEL ../arm/axpby.c)
set(CAXPBYKERNEL ../arm/zaxpby.c)
set(XHEMV_V_KERNEL ../generic/zhemv_k.c)
set(XHEMV_M_KERNEL ../generic/zhemv_k.c)
endmacro ()
+
+macro(SetDefaultL3)
+ set(SGEADD_KERNEL ../generic/geadd.c)
+ set(DGEADD_KERNEL ../generic/geadd.c)
+ set(CGEADD_KERNEL ../generic/zgeadd.c)
+ set(ZGEADD_KERNEL ../generic/zgeadd.c)
+endmacro ()
\ No newline at end of file
list(APPEND GETARCH_SRC ${CMAKE_SOURCE_DIR}/cpuid.S)
endif ()
+if (MSVC)
+#Use generic for MSVC now
+set(GETARCH_FLAGS ${GETARCH_FLAGS} -DFORCE_GENERIC)
+endif()
+
set(GETARCH_DIR "${PROJECT_BINARY_DIR}/getarch_build")
set(GETARCH_BIN "getarch${CMAKE_EXECUTABLE_SUFFIX}")
file(MAKE_DIRECTORY ${GETARCH_DIR})
SOURCES ${GETARCH_SRC}
COMPILE_DEFINITIONS ${EXFLAGS} ${GETARCH_FLAGS} -I${CMAKE_SOURCE_DIR}
OUTPUT_VARIABLE GETARCH_LOG
- COPY_FILE ${GETARCH_BIN}
+ COPY_FILE ${PROJECT_BINARY_DIR}/${GETARCH_BIN}
)
message(STATUS "Running getarch")
SOURCES ${CMAKE_SOURCE_DIR}/getarch_2nd.c
COMPILE_DEFINITIONS ${EXFLAGS} ${GETARCH_FLAGS} ${GETARCH2_FLAGS} -I${CMAKE_SOURCE_DIR}
OUTPUT_VARIABLE GETARCH2_LOG
- COPY_FILE ${GETARCH2_BIN}
+ COPY_FILE ${PROJECT_BINARY_DIR}/${GETARCH2_BIN}
)
# use the cmake binary w/ the -E param to run a shell command in a cross-platform way
set(LIB_COMPONENTS CBLAS)
endif ()
+
+# For GEMM3M
+set(USE_GEMM3M 0)
+
+if (DEFINED ARCH)
+ if (${ARCH} STREQUAL "x86" OR ${ARCH} STREQUAL "x86_64" OR ${ARCH} STREQUAL "ia64" OR ${ARCH} STREQUAL "MIPS")
+ set(USE_GEMM3M 1)
+ endif ()
+
+ if (${CORE} STREQUAL "generic")
+ set(USE_GEMM3M 0)
+ endif ()
+endif ()
+
+
#export OSNAME
#export ARCH
#export CORE
# 1 - compiles the sources for non-complex types only (SINGLE/DOUBLE)
# 2 - compiles for complex types only (COMPLEX/DOUBLE COMPLEX)
# 3 - compiles for all types, but changes source names for complex by prepending z (e.g. axpy.c becomes zaxpy.c)
+# 4 - compiles for complex types only, but changes source names for complex by prepending z (e.g. hemv.c becomes zhemv.c)
# STRING - compiles only the given type (e.g. DOUBLE)
function(GenerateNamedObjects sources_in)
set(complex_only true)
elseif (${ARGV7} EQUAL 3)
set(mangle_complex_sources true)
+ elseif (${ARGV7} EQUAL 4)
+ set(mangle_complex_sources true)
+ set(complex_only true)
elseif (NOT ${ARGV7} EQUAL 0)
set(float_list ${ARGV7})
endif ()
#define COMPSIZE 2
#endif
-#if defined(C_PGI) || defined(C_SUN)
-#define CREAL(X) (*((FLOAT *)&X + 0))
-#define CIMAG(X) (*((FLOAT *)&X + 1))
-#else
-#define CREAL __real__
-#define CIMAG __imag__
-#endif
#define Address_H(x) (((x)+(1<<15))>>16)
#define Address_L(x) ((x)-((Address_H(x))<<16))
extension since version 3.0. If neither are available, use a compatible
structure as fallback (see Clause 6.2.5.13 of the C99 standard). */
#if (defined(__STDC_IEC_559_COMPLEX__) || __STDC_VERSION__ >= 199901L || \
- (__GNUC__ >= 3 && !defined(__cplusplus)))
+ (__GNUC__ >= 3 && !defined(__cplusplus)) || \
+ _MSC_VER >= 1800) // Visual Studio 2013 supports complex
#define OPENBLAS_COMPLEX_C99
typedef float _Complex openblas_complex_float;
typedef double _Complex openblas_complex_double;
typedef xdouble _Complex openblas_complex_xdouble;
+ #define openblas_make_complex_float(real, imag) ((real) + ((imag) * _Complex_I))
+ #define openblas_make_complex_double(real, imag) ((real) + ((imag) * _Complex_I))
+ #define openblas_make_complex_xdouble(real, imag) ((real) + ((imag) * _Complex_I))
#else
#define OPENBLAS_COMPLEX_STRUCT
typedef struct { float real, imag; } openblas_complex_float;
typedef struct { double real, imag; } openblas_complex_double;
typedef struct { xdouble real, imag; } openblas_complex_xdouble;
+ #define openblas_make_complex_float(real, imag) {(real), (imag)}
+ #define openblas_make_complex_double(real, imag) {(real), (imag)}
+ #define openblas_make_complex_xdouble(real, imag) {(real), (imag)}
#endif
+
+#ifdef XDOUBLE
+#define OPENBLAS_COMPLEX_FLOAT openblas_complex_xdouble
+#define OPENBLAS_MAKE_COMPLEX_FLOAT(r,i) openblas_make_complex_xdouble(r,i)
+#elif defined(DOUBLE)
+#define OPENBLAS_COMPLEX_FLOAT openblas_complex_double
+#define OPENBLAS_MAKE_COMPLEX_FLOAT(r,i) openblas_make_complex_double(r,i)
+#else
+#define OPENBLAS_COMPLEX_FLOAT openblas_complex_float
+#define OPENBLAS_MAKE_COMPLEX_FLOAT(r,i) openblas_make_complex_float(r,i)
+#endif
+
+#if defined(C_PGI) || defined(C_SUN)
+#define CREAL(X) (*((FLOAT *)&X + 0))
+#define CIMAG(X) (*((FLOAT *)&X + 1))
+#else
+#ifdef OPENBLAS_COMPLEX_STRUCT
+#define CREAL(Z) ((Z).real)
+#define CIMAG(Z) ((Z).imag)
+#else
+#define CREAL __real__
+#define CIMAG __imag__
+#endif
+#endif
+
#endif // ASSEMBLER
#ifndef IFLUSH
#endif
#endif
+#if defined(C_MSVC)
+#define inline __inline
+#endif
+
#ifndef ASSEMBLER
#ifndef MIN
#ifndef ASSEMBLER
+#ifdef C_MSVC
+#include <intrin.h>
+#endif
+
#ifdef C_SUN
#define __asm__ __asm
#define __volatile__
static void __inline blas_lock(volatile BLASULONG *address){
- int ret;
+ BLASULONG ret;
do {
while (*address) {YIELDING;};
+#ifndef C_MSVC
__asm__ __volatile__(
"xchgl %0, %1\n"
: "=r"(ret), "=m"(*address)
: "0"(1), "m"(*address)
: "memory");
-
+#else
+ ret=InterlockedExchange64((volatile LONG64 *)(address), 1);
+#endif
} while (ret);
+
}
static __inline BLASULONG rpcc(void){
+#ifdef C_MSVC
+ return __rdtsc();
+#else
BLASULONG a, d;
__asm__ __volatile__ ("rdtsc" : "=a" (a), "=d" (d));
return ((BLASULONG)a + ((BLASULONG)d << 32));
+#endif
}
#define RPCC64BIT
+#ifndef C_MSVC
static __inline BLASULONG getstackaddr(void){
BLASULONG addr;
return addr;
}
+#endif
static __inline void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx){
+#ifdef C_MSVC
+ int cpuinfo[4];
+ __cpuid(cpuinfo, op);
+ *eax=cpuinfo[0];
+ *ebx=cpuinfo[1];
+ *ecx=cpuinfo[2];
+ *edx=cpuinfo[3];
+#else
__asm__ __volatile__("cpuid"
: "=a" (*eax),
"=b" (*ebx),
"=c" (*ecx),
"=d" (*edx)
: "0" (op));
+#endif
}
/*
#define WHEREAMI
*/
-static inline int WhereAmI(void){
+static __inline int WhereAmI(void){
int eax, ebx, ecx, edx;
int apicid;
#define GET_IMAGE_CANCEL
#ifdef SMP
-#ifdef USE64BITINT
+#if defined(USE64BITINT)
static __inline blasint blas_quickdivide(blasint x, blasint y){
return x / y;
}
+#elif defined (C_MSVC)
+static __inline BLASLONG blas_quickdivide(BLASLONG x, BLASLONG y){
+ return x / y;
+}
#else
extern unsigned int blas_quick_divide_table[];
tbmv_thread.c
)
+set(ULVM_COMPLEX_SOURCES
+ hbmv_k.c
+ hpmv_k.c
+ hpr_k.c
+ hpr2_k.c
+ her_k.c
+ her2_k.c
+)
+
# objects that need LOWER set
GenerateCombinationObjects("${UL_SOURCES}" "LOWER" "U" "" 1 "" "" 3)
# gbmv uses a lowercase n and t
GenerateNamedObjects("gbmv_k.c" "" "gbmv_n" false "" "" "" 3)
GenerateNamedObjects("gbmv_k.c" "TRANS" "gbmv_t" false "" "" "" 3)
+# c/zgbmv
+GenerateNamedObjects("zgbmv_k.c" "CONJ" "gbmv_r" false "" "" "" 2)
+GenerateNamedObjects("zgbmv_k.c" "TRANS;CONJ" "gbmv_c" false "" "" "" 2)
+GenerateNamedObjects("zgbmv_k.c" "XCONJ" "gbmv_o" false "" "" "" 2)
+GenerateNamedObjects("zgbmv_k.c" "TRANS;XCONJ" "gbmv_u" false "" "" "" 2)
+GenerateNamedObjects("zgbmv_k.c" "CONJ;XCONJ" "gbmv_s" false "" "" "" 2)
+GenerateNamedObjects("zgbmv_k.c" "TRANS;CONJ;XCONJ" "gbmv_d" false "" "" "" 2)
# special defines for complex
foreach (float_type ${FLOAT_TYPES})
GenerateCombinationObjects("z${l_source}" "UNIT" "N" "TRANSA=4" 0 "${op_name}_CU" false ${float_type})
endforeach ()
+ foreach (ulvm_source ${ULVM_COMPLEX_SOURCES})
+ string(REGEX MATCH "[a-z0-9]+" op_name ${ulvm_source})
+ GenerateNamedObjects("z${ulvm_source}" "" "${op_name}_U" false "" "" false ${float_type})
+ GenerateNamedObjects("z${ulvm_source}" "LOWER" "${op_name}_L" false "" "" false ${float_type})
+ GenerateNamedObjects("z${ulvm_source}" "HEMVREV" "${op_name}_V" false "" "" false ${float_type})
+ GenerateNamedObjects("z${ulvm_source}" "LOWER;HEMVREV" "${op_name}_M" false "" "" false ${float_type})
+ endforeach()
+
if (SMP)
GenerateNamedObjects("gemv_thread.c" "CONJ" "gemv_thread_r" false "" "" false ${float_type})
GenerateNamedObjects("ger_thread.c" "XCONJ" "ger_thread_V" false "" "" false ${float_type})
GenerateNamedObjects("ger_thread.c" "XCONJ;CONJ" "ger_thread_D" false "" "" false ${float_type})
+ GenerateNamedObjects("sbmv_thread.c" "HEMV" "hbmv_thread_U" false "" "" false ${float_type})
+ GenerateNamedObjects("sbmv_thread.c" "HEMV;LOWER" "hbmv_thread_L" false "" "" false ${float_type})
+ GenerateNamedObjects("sbmv_thread.c" "HEMVREV" "hbmv_thread_V" false "" "" false ${float_type})
+ GenerateNamedObjects("sbmv_thread.c" "LOWER;HEMVREV" "hbmv_thread_M" false "" "" false ${float_type})
+
+ GenerateNamedObjects("spmv_thread.c" "HEMV" "hpmv_thread_U" false "" "" false ${float_type})
+ GenerateNamedObjects("spmv_thread.c" "HEMV;LOWER" "hpmv_thread_L" false "" "" false ${float_type})
+ GenerateNamedObjects("spmv_thread.c" "HEMVREV" "hpmv_thread_V" false "" "" false ${float_type})
+ GenerateNamedObjects("spmv_thread.c" "LOWER;HEMVREV" "hpmv_thread_M" false "" "" false ${float_type})
+
+ GenerateNamedObjects("spr_thread.c" "HEMV" "hpr_thread_U" false "" "" false ${float_type})
+ GenerateNamedObjects("spr_thread.c" "HEMV;LOWER" "hpr_thread_L" false "" "" false ${float_type})
+ GenerateNamedObjects("spr_thread.c" "HEMVREV" "hpr_thread_V" false "" "" false ${float_type})
+ GenerateNamedObjects("spr_thread.c" "LOWER;HEMVREV" "hpr_thread_M" false "" "" false ${float_type})
+
+ GenerateNamedObjects("spr2_thread.c" "HEMV" "hpr2_thread_U" false "" "" false ${float_type})
+ GenerateNamedObjects("spr2_thread.c" "HEMV;LOWER" "hpr2_thread_L" false "" "" false ${float_type})
+ GenerateNamedObjects("spr2_thread.c" "HEMVREV" "hpr2_thread_V" false "" "" false ${float_type})
+ GenerateNamedObjects("spr2_thread.c" "LOWER;HEMVREV" "hpr2_thread_M" false "" "" false ${float_type})
+
+ GenerateNamedObjects("symv_thread.c" "HEMV" "hemv_thread_U" false "" "" false ${float_type})
+ GenerateNamedObjects("symv_thread.c" "HEMV;LOWER" "hemv_thread_L" false "" "" false ${float_type})
+ GenerateNamedObjects("symv_thread.c" "HEMVREV" "hemv_thread_V" false "" "" false ${float_type})
+ GenerateNamedObjects("symv_thread.c" "LOWER;HEMVREV" "hemv_thread_M" false "" "" false ${float_type})
+
+ GenerateNamedObjects("syr_thread.c" "HER" "her_thread_U" false "" "" false ${float_type})
+ GenerateNamedObjects("syr_thread.c" "HER;LOWER" "her_thread_L" false "" "" false ${float_type})
+ GenerateNamedObjects("syr_thread.c" "HEMVREV" "her_thread_V" false "" "" false ${float_type})
+ GenerateNamedObjects("syr_thread.c" "LOWER;HEMVREV" "her_thread_M" false "" "" false ${float_type})
+
+ GenerateNamedObjects("syr2_thread.c" "HER2" "her2_thread_U" false "" "" false ${float_type})
+ GenerateNamedObjects("syr2_thread.c" "HER2;LOWER" "her2_thread_L" false "" "" false ${float_type})
+ GenerateNamedObjects("syr2_thread.c" "HEMVREV" "her2_thread_V" false "" "" false ${float_type})
+ GenerateNamedObjects("syr2_thread.c" "LOWER;HEMVREV" "her2_thread_M" false "" "" false ${float_type})
+
foreach (nu_smp_src ${NU_SMP_SOURCES})
string(REGEX MATCH "[a-z]+_[a-z]+" op_name ${nu_smp_src})
GenerateCombinationObjects("${nu_smp_src}" "LOWER;UNIT" "U;N" "TRANSA=1" 0 "${op_name}_N" false ${float_type})
#ifndef COMPLEX
FLOAT result;
#else
- FLOAT _Complex result;
+ OPENBLAS_COMPLEX_FLOAT result;
#endif
#endif
#ifndef COMPLEX
FLOAT result;
#else
- FLOAT _Complex result;
+ OPENBLAS_COMPLEX_FLOAT result;
#endif
a = (FLOAT *)args -> a;
#ifndef COMPLEX
FLOAT result;
#else
- FLOAT _Complex result;
+ OPENBLAS_COMPLEX_FLOAT result;
#endif
a = (FLOAT *)args -> a;
#ifndef COMPLEX
FLOAT result;
#else
- FLOAT _Complex result;
+ OPENBLAS_COMPLEX_FLOAT result;
#endif
#endif
#ifndef COMPLEX
FLOAT result;
#else
- FLOAT _Complex result;
+ OPENBLAS_COMPLEX_FLOAT result;
#endif
#endif
#ifndef COMPLEX
FLOAT result;
#else
- FLOAT _Complex result;
+ OPENBLAS_COMPLEX_FLOAT result;
#endif
#endif
FLOAT *bufferY = gemvbuffer;
FLOAT *bufferX = gemvbuffer;
#ifdef TRANS
- FLOAT _Complex temp;
+ OPENBLAS_COMPLEX_FLOAT temp;
#endif
if (incy != 1) {
FLOAT *bufferX = sbmvbuffer;
FLOAT temp[2];
+ OPENBLAS_COMPLEX_FLOAT result;
+
if (incy != 1) {
Y = bufferY;
bufferX = (FLOAT *)(((BLASLONG)bufferY + n * sizeof(FLOAT) * COMPSIZE + 4095) & ~4095);
Y[i * 2 + 1] += alpha_r * temp[1] + alpha_i * temp[0];
if (length > 0) {
- FLOAT _Complex result = DOTC_K(length, a + offset * COMPSIZE, 1, X + (i - length) * COMPSIZE, 1);
+ result = DOTC_K(length, a + offset * COMPSIZE, 1, X + (i - length) * COMPSIZE, 1);
Y[i * 2 + 0] += alpha_r * CREAL(result) - alpha_i * CIMAG(result);
Y[i * 2 + 1] += alpha_r * CIMAG(result) + alpha_i * CREAL(result);
Y[i * 2 + 1] += alpha_r * temp[1] + alpha_i * temp[0];
if (length > 0) {
- FLOAT _Complex result = DOTC_K(length, a + COMPSIZE, 1, X + (i + 1) * COMPSIZE, 1);
+ result = DOTC_K(length, a + COMPSIZE, 1, X + (i + 1) * COMPSIZE, 1);
Y[i * 2 + 0] += alpha_r * CREAL(result) - alpha_i * CIMAG(result);
Y[i * 2 + 1] += alpha_r * CIMAG(result) + alpha_i * CREAL(result);
Y[i * 2 + 1] += alpha_r * temp[1] + alpha_i * temp[0];
if (length > 0) {
- FLOAT _Complex result = DOTU_K(length, a + offset * COMPSIZE, 1, X + (i - length) * COMPSIZE, 1);
+ result = DOTU_K(length, a + offset * COMPSIZE, 1, X + (i - length) * COMPSIZE, 1);
Y[i * 2 + 0] += alpha_r * CREAL(result) - alpha_i * CIMAG(result);
Y[i * 2 + 1] += alpha_r * CIMAG(result) + alpha_i * CREAL(result);
Y[i * 2 + 1] += alpha_r * temp[1] + alpha_i * temp[0];
if (length > 0) {
- FLOAT _Complex result = DOTU_K(length, a + COMPSIZE, 1, X + (i + 1) * COMPSIZE, 1);
+ result = DOTU_K(length, a + COMPSIZE, 1, X + (i + 1) * COMPSIZE, 1);
Y[i * 2 + 0] += alpha_r * CREAL(result) - alpha_i * CIMAG(result);
Y[i * 2 + 1] += alpha_r * CIMAG(result) + alpha_i * CREAL(result);
FLOAT *bufferX = gemvbuffer;
FLOAT temp[2];
+ OPENBLAS_COMPLEX_FLOAT result;
+
if (incy != 1) {
Y = bufferY;
bufferX = (FLOAT *)(((BLASLONG)bufferY + m * sizeof(FLOAT) * 2 + 4095) & ~4095);
#ifndef HEMVREV
#ifndef LOWER
if (i > 0) {
- FLOAT _Complex result = DOTC_K(i, a, 1, X, 1);
+ result = DOTC_K(i, a, 1, X, 1);
Y[i * 2 + 0] += alpha_r * CREAL(result) - alpha_i * CIMAG(result);
Y[i * 2 + 1] += alpha_r * CIMAG(result) + alpha_i * CREAL(result);
#else
if (m - i > 1) {
- FLOAT _Complex result = DOTC_K(m - i - 1, a + (i + 1) * 2, 1, X + (i + 1) * 2, 1);
+ result = DOTC_K(m - i - 1, a + (i + 1) * 2, 1, X + (i + 1) * 2, 1);
Y[i * 2 + 0] += alpha_r * CREAL(result) - alpha_i * CIMAG(result);
Y[i * 2 + 1] += alpha_r * CIMAG(result) + alpha_i * CREAL(result);
#else
#ifndef LOWER
if (i > 0) {
- FLOAT _Complex result = DOTU_K(i, a, 1, X, 1);
+ result = DOTU_K(i, a, 1, X, 1);
Y[i * 2 + 0] += alpha_r * CREAL(result) - alpha_i * CIMAG(result);
Y[i * 2 + 1] += alpha_r * CIMAG(result) + alpha_i * CREAL(result);
#else
if (m - i > 1) {
- FLOAT _Complex result = DOTU_K(m - i - 1, a + (i + 1) * 2, 1, X + (i + 1) * 2, 1);
+ result = DOTU_K(m - i - 1, a + (i + 1) * 2, 1, X + (i + 1) * 2, 1);
Y[i * 2 + 0] += alpha_r * CREAL(result) - alpha_i * CIMAG(result);
Y[i * 2 + 1] += alpha_r * CIMAG(result) + alpha_i * CREAL(result);
FLOAT *bufferY = sbmvbuffer;
FLOAT *bufferX = sbmvbuffer;
+ OPENBLAS_COMPLEX_FLOAT result;
+
if (incy != 1) {
Y = bufferY;
bufferX = (FLOAT *)(((BLASLONG)bufferY + n * sizeof(FLOAT) * COMPSIZE + 4095) & ~4095);
a + offset * COMPSIZE, 1, Y + (i - length) * COMPSIZE, 1, NULL, 0);
if (length > 0) {
- FLOAT _Complex result = DOTU_K(length, a + offset * COMPSIZE, 1, X + (i - length) * COMPSIZE, 1);
+ result = DOTU_K(length, a + offset * COMPSIZE, 1, X + (i - length) * COMPSIZE, 1);
Y[i * 2 + 0] += alpha_r * CREAL(result) - alpha_i * CIMAG(result);
Y[i * 2 + 1] += alpha_r * CIMAG(result) + alpha_i * CREAL(result);
a, 1, Y + i * COMPSIZE, 1, NULL, 0);
if (length > 0) {
- FLOAT _Complex result = DOTU_K(length, a + COMPSIZE, 1, X + (i + 1) * COMPSIZE, 1);
+ result = DOTU_K(length, a + COMPSIZE, 1, X + (i + 1) * COMPSIZE, 1);
Y[i * 2 + 0] += alpha_r * CREAL(result) - alpha_i * CIMAG(result);
Y[i * 2 + 1] += alpha_r * CIMAG(result) + alpha_i * CREAL(result);
FLOAT *gemvbuffer = (FLOAT *)buffer;
FLOAT *bufferY = gemvbuffer;
FLOAT *bufferX = gemvbuffer;
- FLOAT _Complex result;
+
+ OPENBLAS_COMPLEX_FLOAT result;
if (incy != 1) {
Y = bufferY;
FLOAT *B = b;
BLASLONG length;
#if (TRANSA == 2) || (TRANSA == 4)
- FLOAT _Complex temp;
+ OPENBLAS_COMPLEX_FLOAT temp;
#endif
#ifndef UNIT
FLOAT atemp1, atemp2, btemp1, btemp2;
FLOAT *B = b;
BLASLONG length;
#if (TRANSA == 2) || (TRANSA == 4)
- FLOAT _Complex temp;
+ OPENBLAS_COMPLEX_FLOAT temp;
#endif
#ifndef UNIT
FLOAT atemp1, atemp2, btemp1, btemp2;
FLOAT *B = b;
BLASLONG length;
#if (TRANSA == 2) || (TRANSA == 4)
- FLOAT _Complex temp;
+ OPENBLAS_COMPLEX_FLOAT temp;
#endif
#ifndef UNIT
FLOAT ar, ai, br, bi, ratio, den;
FLOAT *B = b;
BLASLONG length;
#if (TRANSA == 2) || (TRANSA == 4)
- FLOAT _Complex temp;
+ OPENBLAS_COMPLEX_FLOAT temp;
#endif
#ifndef UNIT
FLOAT ar, ai, br, bi, ratio, den;
BLASLONG i;
#if (TRANSA == 2) || (TRANSA == 4)
- FLOAT _Complex temp;
+ OPENBLAS_COMPLEX_FLOAT temp;
#endif
#ifndef UNIT
FLOAT atemp1, atemp2, btemp1, btemp2;
BLASLONG i;
#if (TRANSA == 2) || (TRANSA == 4)
- FLOAT _Complex temp;
+ OPENBLAS_COMPLEX_FLOAT temp;
#endif
#ifndef UNIT
FLOAT atemp1, atemp2, btemp1, btemp2;
BLASLONG i;
#if (TRANSA == 2) || (TRANSA == 4)
- FLOAT _Complex result;
+ OPENBLAS_COMPLEX_FLOAT result;
#endif
#ifndef UNIT
FLOAT ar, ai, br, bi, ratio, den;
BLASLONG i;
#if (TRANSA == 2) || (TRANSA == 4)
- FLOAT _Complex result;
+ OPENBLAS_COMPLEX_FLOAT result;
#endif
#ifndef UNIT
FLOAT ar, ai, br, bi, ratio, den;
BLASLONG i, is, min_i;
#if (TRANSA == 2) || (TRANSA == 4)
- FLOAT _Complex temp;
+ OPENBLAS_COMPLEX_FLOAT temp;
#endif
#ifndef UNIT
FLOAT atemp1, atemp2, btemp1, btemp2;
BLASLONG i, is, min_i;
#if (TRANSA == 2) || (TRANSA == 4)
- FLOAT _Complex temp;
+ OPENBLAS_COMPLEX_FLOAT temp;
#endif
#ifndef UNIT
FLOAT atemp1, atemp2, btemp1, btemp2;
BLASLONG i, is, min_i;
#if (TRANSA == 2) || (TRANSA == 4)
- FLOAT _Complex result;
+ OPENBLAS_COMPLEX_FLOAT result;
#endif
#ifndef UNIT
FLOAT ar, ai, br, bi, ratio, den;
BLASLONG i, is, min_i;
#if (TRANSA == 2) || (TRANSA == 4)
- FLOAT _Complex result;
+ OPENBLAS_COMPLEX_FLOAT result;
#endif
#ifndef UNIT
FLOAT ar, ai, br, bi, ratio, den;
include_directories(${CMAKE_SOURCE_DIR})
-set(USE_GEMM3M 0)
-
-if (DEFINED ARCH)
- if (${ARCH} STREQUAL "x86" OR ${ARCH} STREQUAL "x86_64" OR ${ARCH} STREQUAL "ia64" OR ${ARCH} STREQUAL "MIPS")
- set(USE_GEMM3M 1)
- endif ()
-endif ()
-
# N.B. In the original makefile there was a BLOCKS define used in the compilation of these files but I don't see any evidence of it being set anywhere. -hpa
# loop through gemm.c defines
GenerateCombinationObjects("trsm_L.c" "UPPER;UNIT" "L;N" "TRANS;CONJ" 0 "trsm_LC" false ${float_type})
GenerateCombinationObjects("trsm_R.c" "UPPER;UNIT" "L;N" "CONJ" 0 "trsm_RR" false ${float_type})
GenerateCombinationObjects("trsm_R.c" "UPPER;UNIT" "L;N" "TRANS;CONJ" 0 "trsm_RC" false ${float_type})
+
+ #hemm
+ GenerateCombinationObjects("zhemm_k.c" "LOWER" "U" "NN" 0 "hemm_L" false ${float_type})
+ GenerateCombinationObjects("zhemm_k.c" "LOWER" "U" "NC;RSIDE" 0 "hemm_R" false ${float_type})
+
+ #her2k
+ GenerateCombinationObjects("zher2k_kernel.c" "LOWER;CONJ" "U;N" "" 2 "her2k_kernel" false ${float_type})
+ GenerateNamedObjects("zher2k_k.c" "HER2K" "her2k_UN" false "" "" false ${float_type})
+ GenerateNamedObjects("zher2k_k.c" "HER2K;TRANS;CONJ" "her2k_UC" false "" "" false ${float_type})
+ GenerateNamedObjects("zher2k_k.c" "HER2K;LOWER" "her2k_LN" false "" "" false ${float_type})
+ GenerateNamedObjects("zher2k_k.c" "HER2K;LOWER;TRANS;CONJ" "her2k_LC" false "" "" false ${float_type})
+
+ if (SMP AND NOT USE_SIMPLE_THREADED_LEVEL3)
+ #hemm
+ GenerateCombinationObjects("zhemm_k.c" "LOWER" "U" "NN;THREADED_LEVEL3" 0 "hemm_thread_L" false ${float_type})
+ GenerateCombinationObjects("zhemm_k.c" "LOWER" "U" "NC;RSIDE;THREADED_LEVEL3" 0 "hemm_thread_R" false ${float_type})
+ #her2k
+ GenerateNamedObjects("zher2k_k.c" "HER2K" "her2k_UN" false "" "" false ${float_type})
+ GenerateNamedObjects("zher2k_k.c" "HER2K;TRANS;CONJ" "her2k_UC" false "" "" false ${float_type})
+ GenerateNamedObjects("zher2k_k.c" "HER2K;LOWER" "her2k_LN" false "" "" false ${float_type})
+ GenerateNamedObjects("zher2k_k.c" "HER2K;LOWER;TRANS;CONJ" "her2k_LC" false "" "" false ${float_type})
+ endif()
+
# special gemm defines for complex
foreach (gemm_define ${GEMM_COMPLEX_DEFINES})
string(TOLOWER ${gemm_define} gemm_define_LC)
GenerateNamedObjects("gemm.c" "${gemm_define}" "gemm_${gemm_define_LC}" false "" "" false ${float_type})
+ if(USE_GEMM3M)
+ GenerateNamedObjects("gemm3m.c" "${gemm_define}" "gemm3m_${gemm_define_LC}" false "" "" false ${float_type})
+ endif()
if (SMP AND NOT USE_SIMPLE_THREADED_LEVEL3)
GenerateNamedObjects("gemm.c" "${gemm_define};THREADED_LEVEL3" "gemm_thread_${gemm_define_LC}" false "" "" false ${float_type})
+ if(USE_GEMM3M)
+ GenerateNamedObjects("gemm3m.c" "${gemm_define};THREADED_LEVEL3" "gemm3m_thread_${gemm_define_LC}" false "" "" false ${float_type})
+ endif()
endif ()
endforeach ()
endif ()
xerbla.c
openblas_set_num_threads.c
openblas_error_handle.c
+ openblas_get_num_procs.c
+ openblas_get_num_threads.c
)
# these need to have NAME/CNAME set, so use GenerateNamedObjects, but don't use standard name mangling
include_directories(${CMAKE_SOURCE_DIR})
+
set(BLAS1_SOURCES
copy.c
- asum.c nrm2.c
+ nrm2.c
)
set(BLAS1_REAL_ONLY_SOURCES
rotm.c rotmg.c # N.B. these do not have complex counterparts
+ rot.c
+ asum.c
)
# these will have 'z' prepended for the complex version
axpy.c swap.c
scal.c
dot.c
- rot.c rotg.c
+ rotg.c
axpby.c
)
tpsv.c tpmv.c
)
+set(BLAS2_COMPLEX_ONLY_MANGLED_SOURCES
+ hemv.c hbmv.c
+ her.c her2.c
+ hpmv.c hpr.c
+ hpr2.c
+)
+
# these do not have separate 'z' sources
set(BLAS3_SOURCES
gemm.c symm.c
set(BLAS3_MANGLED_SOURCES
omatcopy.c imatcopy.c
+ geadd.c
)
# generate the BLAS objs once with and once without cblas
GenerateNamedObjects("${BLAS1_REAL_ONLY_SOURCES}" "" "" ${CBLAS_FLAG} "" "" false 1)
GenerateNamedObjects("${BLAS1_MANGLED_SOURCES}" "" "" ${CBLAS_FLAG} "" "" false ${MANGLE_COMPLEX})
GenerateNamedObjects("${BLAS2_SOURCES}" "" "" ${CBLAS_FLAG} "" "" false ${MANGLE_COMPLEX})
+ GenerateNamedObjects("${BLAS2_COMPLEX_ONLY_MANGLED_SOURCES}" "" "" ${CBLAS_FLAG} "" "" false 4)
GenerateNamedObjects("${BLAS3_SOURCES}" "" "" ${CBLAS_FLAG} "" "" false ${DISABLE_COMPLEX})
GenerateNamedObjects("${BLAS3_MANGLED_SOURCES}" "" "" ${CBLAS_FLAG} "" "" false ${MANGLE_COMPLEX})
+ #sdsdot, dsdot
+ GenerateNamedObjects("sdsdot.c" "" "sdsdot" ${CBLAS_FLAG} "" "" true "SINGLE")
+ GenerateNamedObjects("dsdot.c" "" "dsdot" ${CBLAS_FLAG} "" "" true "SINGLE")
+
# trmm is trsm with a compiler flag set
GenerateNamedObjects("trsm.c" "TRMM" "trmm" ${CBLAS_FLAG})
# complex-specific sources
foreach (float_type ${FLOAT_TYPES})
+
if (${float_type} STREQUAL "COMPLEX" OR ${float_type} STREQUAL "ZCOMPLEX")
GenerateNamedObjects("zger.c" "" "geru" false "" "" false ${float_type})
GenerateNamedObjects("zger.c" "CONJ" "gerc" false "" "" false ${float_type})
+ GenerateNamedObjects("zdot.c" "CONJ" "dotc" false "" "" false ${float_type})
+ GenerateNamedObjects("zdot.c" "" "dotu" false "" "" false ${float_type})
+
+ GenerateNamedObjects("symm.c" "HEMM" "hemm" false "" "" false ${float_type})
+ GenerateNamedObjects("syrk.c" "HEMM" "herk" false "" "" false ${float_type})
+ GenerateNamedObjects("syr2k.c" "HEMM" "her2k" false "" "" false ${float_type})
+
+ if (USE_GEMM3M)
+ GenerateNamedObjects("gemm.c" "GEMM3M" "gemm3m" false "" "" false ${float_type})
+ endif()
endif ()
if (${float_type} STREQUAL "COMPLEX")
GenerateNamedObjects("zscal.c" "SSCAL" "sscal" false "" "" false "COMPLEX")
GenerateNamedObjects("nrm2.c" "" "scnrm2" false "" "" true "COMPLEX")
+ GenerateNamedObjects("zrot.c" "" "csrot" false "" "" true "COMPLEX")
+ GenerateNamedObjects("max.c" "USE_ABS;USE_MIN" "scamin" false "" "" true "COMPLEX")
+ GenerateNamedObjects("max.c" "USE_ABS" "scamax" false "" "" true "COMPLEX")
+ GenerateNamedObjects("asum.c" "" "scasum" false "" "" true "COMPLEX")
endif ()
if (${float_type} STREQUAL "ZCOMPLEX")
GenerateNamedObjects("zscal.c" "SSCAL" "dscal" false "" "" false "ZCOMPLEX")
GenerateNamedObjects("nrm2.c" "" "dznrm2" false "" "" true "ZCOMPLEX")
+ GenerateNamedObjects("zrot.c" "" "zdrot" false "" "" true "ZCOMPLEX")
+ GenerateNamedObjects("max.c" "USE_ABS;USE_MIN" "dzamin" false "" "" true "ZCOMPLEX")
+ GenerateNamedObjects("max.c" "USE_ABS" "dzamax" false "" "" true "ZCOMPLEX")
+ GenerateNamedObjects("asum.c" "" "dzasum" false "" "" true "ZCOMPLEX")
endif ()
endforeach ()
#endif
-
-#if defined(__i386__) || defined(__x86_64__) || defined(__ia64__)
+#if defined(__i386__) || defined(__x86_64__) || defined(__ia64__) || defined(_M_X64) || defined(_M_IX86)
long double da = *DA;
long double db = *DB;
#endif
- if (n <= 0) return;
-
FLOAT alpha_r = *(ALPHA + 0);
FLOAT alpha_i = *(ALPHA + 1);
FLOAT beta_r = *(BETA + 0);
FLOAT beta_i = *(BETA + 1);
+ if (n <= 0) return;
+
FUNCTION_PROFILE_START();
if (incx < 0) x -= (n - 1) * incx * 2;
#ifdef RETURN_BY_STRUCT
MYTYPE NAME( blasint *N, FLOAT *x, blasint *INCX, FLOAT *y, blasint *INCY) {
#elif defined RETURN_BY_STACK
-void NAME(FLOAT _Complex *result, blasint *N, FLOAT *x, blasint *INCX, FLOAT *y, blasint *INCY) {
+void NAME(OPENBLAS_COMPLEX_FLOAT *result, blasint *N, FLOAT *x, blasint *INCX, FLOAT *y, blasint *INCY) {
#else
-FLOAT _Complex NAME( blasint *N, FLOAT *x, blasint *INCX, FLOAT *y, blasint *INCY) {
+OPENBLAS_COMPLEX_FLOAT NAME( blasint *N, FLOAT *x, blasint *INCX, FLOAT *y, blasint *INCY) {
#endif
BLASLONG n = *N;
BLASLONG incx = *INCX;
BLASLONG incy = *INCY;
#ifndef RETURN_BY_STACK
- FLOAT _Complex ret;
+ OPENBLAS_COMPLEX_FLOAT ret;
#endif
#ifdef RETURN_BY_STRUCT
MYTYPE myret;
#endif
+#ifndef RETURN_BY_STRUCT
+ OPENBLAS_COMPLEX_FLOAT zero=OPENBLAS_MAKE_COMPLEX_FLOAT(0.0, 0.0);
+#endif
+
PRINT_DEBUG_NAME;
if (n <= 0) {
myret.i = 0.;
return myret;
#elif defined RETURN_BY_STACK
- *result = ZERO;
+ *result = zero;
return;
#else
- return ZERO;
+ return zero;
#endif
}
#else
#ifdef FORCE_USE_STACK
-void CNAME(blasint n, FLOAT *x, blasint incx, FLOAT *y, blasint incy, FLOAT _Complex *result){
+void CNAME(blasint n, FLOAT *x, blasint incx, FLOAT *y, blasint incy, OPENBLAS_COMPLEX_FLOAT *result){
#else
-FLOAT _Complex CNAME(blasint n, FLOAT *x, blasint incx, FLOAT *y, blasint incy){
+OPENBLAS_COMPLEX_FLOAT CNAME(blasint n, FLOAT *x, blasint incx, FLOAT *y, blasint incy){
- FLOAT _Complex ret;
+ OPENBLAS_COMPLEX_FLOAT ret;
#endif
PRINT_DEBUG_CNAME;
if (n <= 0) {
#ifdef FORCE_USE_STACK
- *result = ZERO;
+ *result = OPENBLAS_MAKE_COMPLEX_FLOAT(0.0, 0.0);
return;
#else
- return ZERO;
+ return OPENBLAS_MAKE_COMPLEX_FLOAT(0.0, 0.0);
#endif
}
FLOAT *buffer;
#ifdef SMP
int nthreads;
+ int nthreads_max;
+ int nthreads_avail;
+ double MNK;
#endif
int (*gemv[])(BLASLONG, BLASLONG, BLASLONG, FLOAT, FLOAT, FLOAT *, BLASLONG,
blasint lenx, leny;
blasint i;
- PRINT_DEBUG_NAME;
-
FLOAT alpha_r = *(ALPHA + 0);
FLOAT alpha_i = *(ALPHA + 1);
FLOAT beta_r = *(BETA + 0);
FLOAT beta_i = *(BETA + 1);
+ PRINT_DEBUG_NAME;
+
TOUPPER(trans);
info = 0;
GEMV_O, GEMV_U, GEMV_S, GEMV_D,
};
- PRINT_DEBUG_CNAME;
-
FLOAT alpha_r = *(ALPHA + 0);
FLOAT alpha_i = *(ALPHA + 1);
FLOAT beta_r = *(BETA + 0);
FLOAT beta_i = *(BETA + 1);
+ PRINT_DEBUG_CNAME;
+
trans = -1;
info = 0;
#ifdef SMP
- int nthreads_max = num_cpu_avail(2);
- int nthreads_avail = nthreads_max;
+ nthreads_max = num_cpu_avail(2);
+ nthreads_avail = nthreads_max;
- double MNK = (double) m * (double) n;
+ MNK = (double) m * (double) n;
if ( MNK <= ( 256.0 * (double) (GEMM_MULTITHREAD_THRESHOLD * GEMM_MULTITHREAD_THRESHOLD) ))
nthreads_max = 1;
void NAME(FLOAT *DA, FLOAT *DB, FLOAT *C, FLOAT *S){
- PRINT_DEBUG_NAME;
-
- IDEBUG_START;
-
- FUNCTION_PROFILE_START();
-
-#if defined(__i386__) || defined(__x86_64__) || defined(__ia64__)
+#if defined(__i386__) || defined(__x86_64__) || defined(__ia64__) || defined(_M_X64) || defined(_M_IX86)
long double da_r = *(DA + 0);
long double da_i = *(DA + 1);
long double ada = fabs(da_r) + fabs(da_i);
+ PRINT_DEBUG_NAME;
+
+ IDEBUG_START;
+
+ FUNCTION_PROFILE_START();
+
if (ada == ZERO) {
*C = ZERO;
*(S + 0) = ONE;
FLOAT ada = fabs(da_r) + fabs(da_i);
FLOAT adb;
+ PRINT_DEBUG_NAME;
+
+ IDEBUG_START;
+
+ FUNCTION_PROFILE_START();
+
if (ada == ZERO) {
*C = ZERO;
*(S + 0) = ONE;
SetDefaultL1()
SetDefaultL2()
+SetDefaultL3()
ParseMakefileVars("${KERNELDIR}/KERNEL")
ParseMakefileVars("${KERNELDIR}/KERNEL.${TARGET_CORE}")
else ()
GenerateNamedObjects("${KERNELDIR}/${${float_char}DOTKERNEL}" "" "dot_k" false "" "" false ${float_type})
endif ()
+
+ if (${float_type} STREQUAL "COMPLEX")
+ GenerateNamedObjects("${KERNELDIR}/${${float_char}ROTKERNEL}" "" "srot_k" false "" "" false ${float_type})
+ endif()
+ if (${float_type} STREQUAL "ZCOMPLEX")
+ GenerateNamedObjects("${KERNELDIR}/${${float_char}ROTKERNEL}" "" "drot_k" false "" "" false ${float_type})
+ endif()
+
endforeach ()
+#dsdot,sdsdot
+GenerateNamedObjects("${KERNELDIR}/${DSDOTKERNEL}" "DSDOT" "d*dot_k" false "" "" false "SINGLE")
+GenerateNamedObjects("${KERNELDIR}/${DSDOTKERNEL}" "DSDOT" "dsdot_k" false "" "" false "SINGLE")
+
# Makefile.L2
GenerateCombinationObjects("generic/symv_k.c" "LOWER" "U" "" 1 "" "" 3)
GenerateNamedObjects("generic/ger.c" "" "ger_k" false "" "" "" 3)
GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMVTKERNEL}" "XCONJ;TRANSA" "gemv_u" false "" "" false ${float_type})
GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMVNKERNEL}" "XCONJ;CONJ" "gemv_s" false "" "" false ${float_type})
GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMVTKERNEL}" "XCONJ;CONJ;TRANSA" "gemv_d" false "" "" false ${float_type})
+
+ GenerateNamedObjects("${KERNELDIR}/${${float_char}HEMV_U_KERNEL}" "HEMV" "hemv_U" false "" "" false ${float_type})
+ GenerateNamedObjects("${KERNELDIR}/${${float_char}HEMV_L_KERNEL}" "HEMV;LOWER" "hemv_L" false "" "" false ${float_type})
+ GenerateNamedObjects("${KERNELDIR}/${${float_char}HEMV_V_KERNEL}" "HEMV;HEMVREV" "hemv_V" false "" "" false ${float_type})
+ GenerateNamedObjects("${KERNELDIR}/${${float_char}HEMV_M_KERNEL}" "HEMV;HEMVREV;LOWER" "hemv_M" false "" "" false ${float_type})
+
else ()
GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMVNKERNEL}" "" "gemv_n" false "" "" false ${float_type})
GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMVTKERNEL}" "TRANS" "gemv_t" false "" "" false ${float_type})
endforeach ()
# Makefile.L3
-set(USE_GEMM3M false)
set(USE_TRMM false)
-if (${ARCH} STREQUAL "x86" OR ${ARCH} STREQUAL "x86_64" OR ${ARCH} STREQUAL "ia64" OR ${ARCH} STREQUAL "MIPS")
- set(USE_GEMM3M true)
-endif ()
-
-if (${ARCH} STREQUAL "arm" OR ${ARCH} STREQUAL "arm64" OR "${TARGET}" STREQUAL "LONGSOON3B" OR "${TARGET}" STREQUAL "GENERIC")
+if (${ARCH} STREQUAL "arm" OR ${ARCH} STREQUAL "arm64" OR "${TARGET}" STREQUAL "LONGSOON3B" OR "${TARGET}" STREQUAL "GENERIC" OR "${CORE}" STREQUAL "generic")
set(USE_TRMM true)
endif ()
GenerateNamedObjects("${KERNELDIR}/${${float_char}TRSMKERNEL_RN}" "UPPER;RN;TRSMKERNEL;CONJ" "trsm_kernel_RR" false "" "" false ${float_type})
GenerateNamedObjects("${KERNELDIR}/${${float_char}TRSMKERNEL_RT}" "UPPER;RN;TRSMKERNEL;CONJ" "trsm_kernel_RC" false "" "" false ${float_type})
+
+ #hemm
+ GenerateNamedObjects("generic/zhemm_utcopy_${${float_char}GEMM_UNROLL_M}.c" "" "hemm_iutcopy" false "" "" false ${float_type})
+ GenerateNamedObjects("generic/zhemm_ltcopy_${${float_char}GEMM_UNROLL_M}.c" "LOWER" "hemm_iltcopy" false "" "" false ${float_type})
+ GenerateNamedObjects("generic/zhemm_utcopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER" "hemm_outcopy" false "" "" false ${float_type})
+ GenerateNamedObjects("generic/zhemm_ltcopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER;LOWER" "hemm_oltcopy" false "" "" false ${float_type})
+
else ()
GenerateCombinationObjects("${KERNELDIR}/${TRMM_KERNEL}" "LEFT;TRANSA" "R;N" "TRMMKERNEL" 2 "trmm_kernel" false ${float_type})
endif ()
endif ()
endif ()
- GenerateNamedObjects("${KERNELDIR}/${${float_char}OMATCOPY_CN}" "" "domatcopy_k_cn" false "" "" false ${float_type})
- GenerateNamedObjects("${KERNELDIR}/${${float_char}OMATCOPY_RN}" "ROWM" "domatcopy_k_rn" false "" "" false ${float_type})
- GenerateNamedObjects("${KERNELDIR}/${${float_char}OMATCOPY_CT}" "" "domatcopy_k_ct" false "" "" false ${float_type})
- GenerateNamedObjects("${KERNELDIR}/${${float_char}OMATCOPY_RT}" "ROWM" "domatcopy_k_rt" false "" "" false ${float_type})
+ GenerateNamedObjects("${KERNELDIR}/${${float_char}OMATCOPY_CN}" "" "omatcopy_k_cn" false "" "" false ${float_type})
+ GenerateNamedObjects("${KERNELDIR}/${${float_char}OMATCOPY_RN}" "ROWM" "omatcopy_k_rn" false "" "" false ${float_type})
+ GenerateNamedObjects("${KERNELDIR}/${${float_char}OMATCOPY_CT}" "" "omatcopy_k_ct" false "" "" false ${float_type})
+ GenerateNamedObjects("${KERNELDIR}/${${float_char}OMATCOPY_RT}" "ROWM" "omatcopy_k_rt" false "" "" false ${float_type})
+
+ if (NOT DEFINED ${float_char}OMATCOPY_CNC)
+ if (${float_char} STREQUAL "Z" OR ${float_char} STREQUAL "C")
+ set(${float_char}OMATCOPY_CNC ../arm/zomatcopy_cnc.c)
+ endif ()
+ endif ()
+ if (NOT DEFINED ${float_char}OMATCOPY_RNC)
+ if (${float_char} STREQUAL "Z" OR ${float_char} STREQUAL "C")
+ set(${float_char}OMATCOPY_RNC ../arm/zomatcopy_rnc.c)
+ endif ()
+ endif ()
+ if (NOT DEFINED ${float_char}OMATCOPY_CTC)
+ if (${float_char} STREQUAL "Z" OR ${float_char} STREQUAL "C")
+ set(${float_char}OMATCOPY_CTC ../arm/zomatcopy_ctc.c)
+ endif ()
+ endif ()
+ if (NOT DEFINED ${float_char}OMATCOPY_RTC)
+ if (${float_char} STREQUAL "Z" OR ${float_char} STREQUAL "C")
+ set(${float_char}OMATCOPY_RTC ../arm/zomatcopy_rtc.c)
+ endif ()
+ endif ()
+
+ if (${float_char} STREQUAL "Z" OR ${float_char} STREQUAL "C")
+ GenerateNamedObjects("${KERNELDIR}/${${float_char}OMATCOPY_CNC}" "CONJ" "omatcopy_k_cnc" false "" "" false ${float_type})
+ GenerateNamedObjects("${KERNELDIR}/${${float_char}OMATCOPY_RNC}" "CONJ;ROWM" "omatcopy_k_rnc" false "" "" false ${float_type})
+ GenerateNamedObjects("${KERNELDIR}/${${float_char}OMATCOPY_CTC}" "CONJ" "omatcopy_k_ctc" false "" "" false ${float_type})
+ GenerateNamedObjects("${KERNELDIR}/${${float_char}OMATCOPY_RTC}" "CONJ;ROWM" "omatcopy_k_rtc" false "" "" false ${float_type})
+ endif()
+ GenerateNamedObjects("${KERNELDIR}/${${float_char}GEADD_KERNEL}" "" "geadd_k" false "" "" false ${float_type})
endforeach ()
# Makefile.LA
DGEADD_K = ../generic/geadd.c
endif
-$(KDIR)dgeadd_k$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SGEADD_K)
+$(KDIR)dgeadd_k$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(DGEADD_K)
$(CC) $(CFLAGS) -c -DDOUBLE -UCOMPLEX -UROWM $< -o $@
ifndef CGEADD_K
BLASLONG ix,iy;
FLOAT temp;
+ BLASLONG inc_x2;
+ BLASLONG inc_y2;
+
if ( n < 0 ) return(0);
ix = 0;
iy = 0;
- BLASLONG inc_x2 = 2 * inc_x;
- BLASLONG inc_y2 = 2 * inc_y;
+ inc_x2 = 2 * inc_x;
+ inc_y2 = 2 * inc_y;
if ( beta_r == 0.0 && beta_i == 0.0)
{
{
BLASLONG i=0;
BLASLONG ix,iy;
+ BLASLONG inc_x2;
+ BLASLONG inc_y2;
if ( n < 0 ) return(0);
if ( da_r == 0.0 && da_i == 0.0 ) return(0);
ix = 0;
iy = 0;
- BLASLONG inc_x2 = 2 * inc_x;
- BLASLONG inc_y2 = 2 * inc_y;
+ inc_x2 = 2 * inc_x;
+ inc_y2 = 2 * inc_y;
while(i < n)
{
{
BLASLONG i=0;
BLASLONG ix=0,iy=0;
+ BLASLONG inc_x2;
+ BLASLONG inc_y2;
if ( n < 0 ) return(0);
- BLASLONG inc_x2 = 2 * inc_x;
- BLASLONG inc_y2 = 2 * inc_y;
+ inc_x2 = 2 * inc_x;
+ inc_y2 = 2 * inc_y;
while(i < n)
{
#include <complex.h>
FLOAT _Complex CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y)
#else
-openblas_complex_double CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y)
+OPENBLAS_COMPLEX_FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y)
#endif
{
BLASLONG i=0;
BLASLONG ix=0,iy=0;
FLOAT dot[2];
- FLOAT _Complex result;
+ OPENBLAS_COMPLEX_FLOAT result;
+ BLASLONG inc_x2;
+ BLASLONG inc_y2;
dot[0]=0.0;
dot[1]=0.0;
- __real__ result = 0.0 ;
- __imag__ result = 0.0 ;
+ CREAL(result) = 0.0 ;
+ CIMAG(result) = 0.0 ;
if ( n < 1 ) return(result);
- BLASLONG inc_x2 = 2 * inc_x ;
- BLASLONG inc_y2 = 2 * inc_y ;
+ inc_x2 = 2 * inc_x ;
+ inc_y2 = 2 * inc_y ;
while(i < n)
{
i++ ;
}
- __real__ result = dot[0];
- __imag__ result = dot[1];
+ CREAL(result) = dot[0];
+ CIMAG(result) = dot[1];
return(result);
}
BLASLONG i=0;
BLASLONG ix=0,iy=0;
FLOAT temp[2];
+ BLASLONG inc_x2;
+ BLASLONG inc_y2;
if ( n <= 0 ) return(0);
- BLASLONG inc_x2 = 2 * inc_x ;
- BLASLONG inc_y2 = 2 * inc_y ;
+ inc_x2 = 2 * inc_x ;
+ inc_y2 = 2 * inc_y ;
while(i < n)
{
BLASLONG i=0;
BLASLONG ix=0,iy=0;
FLOAT temp[2];
+ BLASLONG inc_x2;
+ BLASLONG inc_y2;
if ( n < 0 ) return(0);
- BLASLONG inc_x2 = 2 * inc_x;
- BLASLONG inc_y2 = 2 * inc_y;
+ inc_x2 = 2 * inc_x;
+ inc_y2 = 2 * inc_y;
while(i < n)
{
ZHEMV_U_KERNEL = ../generic/zhemv_k.c
ZHEMV_L_KERNEL = ../generic/zhemv_k.c
+LSAME_KERNEL = ../generic/lsame.c
+SCABS_KERNEL = ../generic/cabs.c
+DCABS_KERNEL = ../generic/cabs.c
+QCABS_KERNEL = ../generic/cabs.c
+
+#Dump kernel
CGEMM3MKERNEL = ../generic/zgemm3mkernel_dump.c
ZGEMM3MKERNEL = ../generic/zgemm3mkernel_dump.c
extension since version 3.0. If neither are available, use a compatible
structure as fallback (see Clause 6.2.5.13 of the C99 standard). */
#if (defined(__STDC_IEC_559_COMPLEX__) || __STDC_VERSION__ >= 199901L || \
- (__GNUC__ >= 3 && !defined(__cplusplus)))
+ (__GNUC__ >= 3 && !defined(__cplusplus)) || \
+ _MSC_VER >= 1800) // Visual Studio 2013 supports complex
#define OPENBLAS_COMPLEX_C99
#ifndef __cplusplus
#include <complex.h>