set(ZSUMKERNEL zsum.S)
set(QSUMKERNEL sum.S)
set(XSUMKERNEL zsum.S)
+ set(SHAMINKERNEL ../arm/amin.c)
+ set(SHAMAXKERNEL amax.S)
+ set(SHMAXKERNEL ../arm/max.c)
+ set(SHMINKERNEL ../arm/min.c)
+ set(ISHAMAXKERNEL iamax.S)
+ set(ISHAMINKERNEL ../arm/iamin.c)
+ set(ISHMAXKERNEL ../arm/imax.c)
+ set(ISHMINKERNEL ../arm/imin.c)
+ set(SHASUMKERNEL asum.S)
+ set(SHAXPYKERNEL axpy.S)
+ set(SHAXPBYKERNEL ../arm/axpby.c)
+ set(SHCOPYKERNEL copy.S)
+ set(SHDOTKERNEL dot.S)
+ set(SHROTKERNEL rot.S)
+ set(SHSCALKERNEL scal.S)
+ set(SHNRM2KERNEL nrm2.S)
+ set(SHSUMKERNEL sum.S)
+ set(SHSWAPKERNEL swap.S)
endmacro ()
macro(SetDefaultL2)
- set(SGEMVNKERNEL gemv_n.S)
- set(SGEMVTKERNEL gemv_t.S)
+ set(SGEMVNKERNEL ../arm/gemv_n.c)
+ set(SGEMVTKERNEL ../arm/gemv_t.c)
set(DGEMVNKERNEL gemv_n.S)
set(DGEMVTKERNEL gemv_t.S)
set(CGEMVNKERNEL zgemv_n.S)
set(XHEMV_L_KERNEL ../generic/zhemv_k.c)
set(XHEMV_V_KERNEL ../generic/zhemv_k.c)
set(XHEMV_M_KERNEL ../generic/zhemv_k.c)
+ set(SHGEMVNKERNEL ../arm/gemv_n.c)
+ set(SHGEMVTKERNEL ../arm/gemv_t.c)
+ set(SHGERKERNEL ../generic/ger.c)
+
endmacro ()
macro(SetDefaultL3)
set(DGEADD_KERNEL ../generic/geadd.c)
set(CGEADD_KERNEL ../generic/zgeadd.c)
set(ZGEADD_KERNEL ../generic/zgeadd.c)
+ set(SHGEADD_KERNEL ../generic/geadd.c)
+ set(SHGEMMKERNEL ../generic/gemmkernel_2x2.c)
+ set(SHGEMM_BETA ../generic/gemm_beta.c)
+ set(SHGEMMINCOPY ../generic/gemm_ncopy_2.c)
+ set(SHGEMMITCOPY ../generic/gemm_tcopy_2.c)
+ set(SHGEMMONCOPY ../generic/gemm_ncopy_2.c)
+ set(SHGEMMOTCOPY ../generic/gemm_tcopy_2.c)
+ set(SHGEMMINCOPYOBJ shgemm_incopy.o)
+ set(SHGEMMITCOPYOBJ shgemm_itcopy.o)
+ set(SHGEMMONCOPYOBJ shgemm_oncopy.o)
+ set(SHGEMMOTCOPYOBJ shgemm_otcopy.o)
+
+
endmacro ()
#elif defined(HALF)
+#define AXPYU_K SAXPYU_K
+#define AXPYC_K SAXPYC_K
+#define SCAL_K SSCAL_K
+#define GEMV_N SGEMV_N
+#define GEMV_T SGEMV_T
+#define SYMV_U SSYMV_U
+#define SYMV_L SSYMV_L
+#define GERU_K SGERU_K
+#define GERC_K SGERC_K
+#define GERV_K SGERV_K
+#define GERD_K SGERD_K
+#define SYMV_THREAD_U SSYMV_THREAD_U
+#define SYMV_THREAD_L SSYMV_THREAD_L
#define GEMM_BETA SHGEMM_BETA
#define GEMM_KERNEL_N SHGEMM_KERNEL
#define GEMM_KERNEL_L SHGEMM_KERNEL
#define GEMM_OTCOPY SHGEMM_OTCOPY
#define GEMM_INCOPY SHGEMM_INCOPY
#define GEMM_ITCOPY SHGEMM_ITCOPY
+#define SYMM_THREAD_LU SSYMM_THREAD_LU
+#define SYMM_THREAD_LL SSYMM_THREAD_LL
+#define SYMM_THREAD_RU SSYMM_THREAD_RU
+#define SYMM_THREAD_RL SSYMM_THREAD_RL
+#define SYMM_LU SSYMM_LU
+#define SYMM_LL SSYMM_LL
+#define SYMM_RU SSYMM_RU
+#define SYMM_RL SSYMM_RL
+
+
+#define HEMM_THREAD_LU SHEMM_THREAD_LU
+#define HEMM_THREAD_LL SHEMM_THREAD_LL
+#define HEMM_THREAD_RU SHEMM_THREAD_RU
+#define HEMM_THREAD_RL SHEMM_THREAD_RL
#define GEMM_THREAD_NN SHGEMM_THREAD_NN
#define GEMM_THREAD_CN SHGEMM_THREAD_TN
#define GEMM_THREAD_RC SHGEMM_THREAD_NT
#define GEMM_THREAD_RR SHGEMM_THREAD_NN
+#ifdef UNIT
+
+#define TRMM_OUNCOPY STRMM_OUNUCOPY
+#define TRMM_OUTCOPY STRMM_OUTUCOPY
+#define TRMM_OLNCOPY STRMM_OLNUCOPY
+#define TRMM_OLTCOPY STRMM_OLTUCOPY
+#define TRSM_OUNCOPY STRSM_OUNUCOPY
+#define TRSM_OUTCOPY STRSM_OUTUCOPY
+#define TRSM_OLNCOPY STRSM_OLNUCOPY
+#define TRSM_OLTCOPY STRSM_OLTUCOPY
+
+#define TRMM_IUNCOPY STRMM_IUNUCOPY
+#define TRMM_IUTCOPY STRMM_IUTUCOPY
+#define TRMM_ILNCOPY STRMM_ILNUCOPY
+#define TRMM_ILTCOPY STRMM_ILTUCOPY
+#define TRSM_IUNCOPY STRSM_IUNUCOPY
+#define TRSM_IUTCOPY STRSM_IUTUCOPY
+#define TRSM_ILNCOPY STRSM_ILNUCOPY
+#define TRSM_ILTCOPY STRSM_ILTUCOPY
+
+#else
+
+#define TRMM_OUNCOPY STRMM_OUNNCOPY
+#define TRMM_OUTCOPY STRMM_OUTNCOPY
+#define TRMM_OLNCOPY STRMM_OLNNCOPY
+#define TRMM_OLTCOPY STRMM_OLTNCOPY
+#define TRSM_OUNCOPY STRSM_OUNNCOPY
+#define TRSM_OUTCOPY STRSM_OUTNCOPY
+#define TRSM_OLNCOPY STRSM_OLNNCOPY
+#define TRSM_OLTCOPY STRSM_OLTNCOPY
+
+#define TRMM_IUNCOPY STRMM_IUNNCOPY
+#define TRMM_IUTCOPY STRMM_IUTNCOPY
+#define TRMM_ILNCOPY STRMM_ILNNCOPY
+#define TRMM_ILTCOPY STRMM_ILTNCOPY
+#define TRSM_IUNCOPY STRSM_IUNNCOPY
+#define TRSM_IUTCOPY STRSM_IUTNCOPY
+#define TRSM_ILNCOPY STRSM_ILNNCOPY
+#define TRSM_ILTCOPY STRSM_ILTNCOPY
+
+#define TRMM_KERNEL_LN STRMM_KERNEL_LN
+#define TRMM_KERNEL_LT STRMM_KERNEL_LT
+#define TRMM_KERNEL_LR STRMM_KERNEL_LN
+#define TRMM_KERNEL_LC STRMM_KERNEL_LT
+#define TRMM_KERNEL_RN STRMM_KERNEL_RN
+#define TRMM_KERNEL_RT STRMM_KERNEL_RT
+#define TRMM_KERNEL_RR STRMM_KERNEL_RN
+#define TRMM_KERNEL_RC STRMM_KERNEL_RT
+
+#define TRSM_KERNEL_LN STRSM_KERNEL_LN
+#define TRSM_KERNEL_LT STRSM_KERNEL_LT
+#define TRSM_KERNEL_LR STRSM_KERNEL_LN
+#define TRSM_KERNEL_LC STRSM_KERNEL_LT
+#define TRSM_KERNEL_RN STRSM_KERNEL_RN
+#define TRSM_KERNEL_RT STRSM_KERNEL_RT
+#define TRSM_KERNEL_RR STRSM_KERNEL_RN
+#define TRSM_KERNEL_RC STRSM_KERNEL_RT
+
+#define SYMM_IUTCOPY SSYMM_IUTCOPY
+#define SYMM_ILTCOPY SSYMM_ILTCOPY
+#define SYMM_OUTCOPY SSYMM_OUTCOPY
+#define SYMM_OLTCOPY SSYMM_OLTCOPY
+#define TRMM_LNUU STRMM_LNUU
+#define TRMM_LNUN STRMM_LNUN
+#define TRMM_LNLU STRMM_LNLU
+#define TRMM_LNLN STRMM_LNLN
+#define TRMM_LTUU STRMM_LTUU
+#define TRMM_LTUN STRMM_LTUN
+#define TRMM_LTLU STRMM_LTLU
+#define TRMM_LTLN STRMM_LTLN
+#define TRMM_LRUU STRMM_LNUU
+#define TRMM_LRUN STRMM_LNUN
+#define TRMM_LRLU STRMM_LNLU
+#define TRMM_LRLN STRMM_LNLN
+#define TRMM_LCUU STRMM_LTUU
+#define TRMM_LCUN STRMM_LTUN
+#define TRMM_LCLU STRMM_LTLU
+#define TRMM_LCLN STRMM_LTLN
+#define TRMM_RNUU STRMM_RNUU
+#define TRMM_RNUN STRMM_RNUN
+#define TRMM_RNLU STRMM_RNLU
+#define TRMM_RNLN STRMM_RNLN
+#define TRMM_RTUU STRMM_RTUU
+#define TRMM_RTUN STRMM_RTUN
+#define TRMM_RTLU STRMM_RTLU
+#define TRMM_RTLN STRMM_RTLN
+#define TRMM_RRUU STRMM_RNUU
+#define TRMM_RRUN STRMM_RNUN
+#define TRMM_RRLU STRMM_RNLU
+#define TRMM_RRLN STRMM_RNLN
+#define TRMM_RCUU STRMM_RTUU
+#define TRMM_RCUN STRMM_RTUN
+#define TRMM_RCLU STRMM_RTLU
+#define TRMM_RCLN STRMM_RTLN
+
+#define TRSM_LNUU STRSM_LNUU
+#define TRSM_LNUN STRSM_LNUN
+#define TRSM_LNLU STRSM_LNLU
+#define TRSM_LNLN STRSM_LNLN
+#define TRSM_LTUU STRSM_LTUU
+#define TRSM_LTUN STRSM_LTUN
+#define TRSM_LTLU STRSM_LTLU
+#define TRSM_LTLN STRSM_LTLN
+#define TRSM_LRUU STRSM_LNUU
+#define TRSM_LRUN STRSM_LNUN
+#define TRSM_LRLU STRSM_LNLU
+#define TRSM_LRLN STRSM_LNLN
+#define TRSM_LCUU STRSM_LTUU
+#define TRSM_LCUN STRSM_LTUN
+#define TRSM_LCLU STRSM_LTLU
+#define TRSM_LCLN STRSM_LTLN
+#define TRSM_RNUU STRSM_RNUU
+#define TRSM_RNUN STRSM_RNUN
+#define TRSM_RNLU STRSM_RNLU
+#define TRSM_RNLN STRSM_RNLN
+#define TRSM_RTUU STRSM_RTUU
+#define TRSM_RTUN STRSM_RTUN
+#define TRSM_RTLU STRSM_RTLU
+#define TRSM_RTLN STRSM_RTLN
+#define TRSM_RRUU STRSM_RNUU
+#define TRSM_RRUN STRSM_RNUN
+#define TRSM_RRLU STRSM_RNLU
+#define TRSM_RRLN STRSM_RNLN
+#define TRSM_RCUU STRSM_RTUU
+#define TRSM_RCUN STRSM_RTUN
+#define TRSM_RCLU STRSM_RTLU
+#define TRSM_RCLN STRSM_RTLN
+#define SYRK_UN SSYRK_UN
+#define SYRK_UT SSYRK_UT
+#define SYRK_LN SSYRK_LN
+#define SYRK_LT SSYRK_LT
+#define SYRK_UR SSYRK_UN
+#define SYRK_UC SSYRK_UT
+#define SYRK_LR SSYRK_LN
+#define SYRK_LC SSYRK_LT
+
+#define SYRK_KERNEL_U SSYRK_KERNEL_U
+#define SYRK_KERNEL_L SSYRK_KERNEL_L
+
+#define HERK_UN SSYRK_UN
+#define HERK_LN SSYRK_LN
+#define HERK_UC SSYRK_UT
+#define HERK_LC SSYRK_LT
+
+#define HER2K_UN SSYR2K_UN
+#define HER2K_LN SSYR2K_LN
+#define HER2K_UC SSYR2K_UT
+#define HER2K_LC SSYR2K_LT
+
+#define SYR2K_UN SSYR2K_UN
+#define SYR2K_UT SSYR2K_UT
+#define SYR2K_LN SSYR2K_LN
+#define SYR2K_LT SSYR2K_LT
+#define SYR2K_UR SSYR2K_UN
+#define SYR2K_UC SSYR2K_UT
+#define SYR2K_LR SSYR2K_LN
+#define SYR2K_LC SSYR2K_LT
+
+#define SYR2K_KERNEL_U SSYR2K_KERNEL_U
+#define SYR2K_KERNEL_L SSYR2K_KERNEL_L
+#define SYRK_THREAD_UN SSYRK_THREAD_UN
+#define SYRK_THREAD_UT SSYRK_THREAD_UT
+#define SYRK_THREAD_LN SSYRK_THREAD_LN
+#define SYRK_THREAD_LT SSYRK_THREAD_LT
+#define SYRK_THREAD_UR SSYRK_THREAD_UR
+#define SYRK_THREAD_UC SSYRK_THREAD_UC
+#define SYRK_THREAD_LR SSYRK_THREAD_LN
+#define SYRK_THREAD_LC SSYRK_THREAD_LT
+
+#define HERK_THREAD_UN SSYRK_THREAD_UN
+#define HERK_THREAD_UT SSYRK_THREAD_UT
+#define HERK_THREAD_LN SSYRK_THREAD_LN
+#define HERK_THREAD_LT SSYRK_THREAD_LT
+#define HERK_THREAD_UR SSYRK_THREAD_UR
+#define HERK_THREAD_UC SSYRK_THREAD_UC
+#define HERK_THREAD_LR SSYRK_THREAD_LN
+#define HERK_THREAD_LC SSYRK_THREAD_LT
+
+#endif
+
#else
#define AMAX_K SAMAX_K
#define GEMV_S SGEMV_S
#define GEMV_D SGEMV_D
+
+#define SYMV_U SSYMV_U
+#define SYMV_L SSYMV_L
#define GERU_K SGERU_K
#define GERC_K SGERC_K
#define GERV_K SGERV_K
#define GERD_K SGERD_K
-#define SYMV_U SSYMV_U
-#define SYMV_L SSYMV_L
-
#define SYMV_THREAD_U SSYMV_THREAD_U
#define SYMV_THREAD_L SSYMV_THREAD_L
foreach (float_type ${FLOAT_TYPES})
# a bit of metaprogramming here to pull out the appropriate KERNEL var
string(SUBSTRING ${float_type} 0 1 float_char)
+ if (${float_type} STREQUAL "HALF")
+ set (float_char "SH")
+ endif ()
GenerateNamedObjects("${KERNELDIR}/${${float_char}AMAXKERNEL}" "USE_ABS" "amax_k" false "" "" false ${float_type})
GenerateNamedObjects("${KERNELDIR}/${${float_char}AMINKERNEL}" "USE_ABS;USE_MIN" "amin_k" false "" "" false ${float_type})
if (DEFINED ${float_char}MAXKERNEL)
GenerateNamedObjects("generic/ger.c" "" "ger_k" false "" "" "" 3)
foreach (float_type ${FLOAT_TYPES})
string(SUBSTRING ${float_type} 0 1 float_char)
+ if (${float_type} STREQUAL "HALF")
+ set (float_char "SH")
+ endif ()
if (${float_type} STREQUAL "COMPLEX" OR ${float_type} STREQUAL "ZCOMPLEX")
GenerateNamedObjects("${KERNELDIR}/${${float_char}GERUKERNEL}" "" "geru_k" false "" "" false ${float_type})
GenerateNamedObjects("${KERNELDIR}/${${float_char}GERCKERNEL}" "CONJ" "gerc_k" false "" "" false ${float_type})
set(USE_TRMM true)
endif ()
- foreach (float_type SINGLE DOUBLE)
+ foreach (float_type SINGLE DOUBLE HALF)
string(SUBSTRING ${float_type} 0 1 float_char)
+ if (${float_type} STREQUAL "HALF")
+ set (float_char "SH")
+ endif ()
GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMMKERNEL}" "" "gemm_kernel" false "" "" false ${float_type})
endforeach()
foreach (float_type ${FLOAT_TYPES})
string(SUBSTRING ${float_type} 0 1 float_char)
+ if (${float_type} STREQUAL "HALF")
+ set (float_char "SH")
+ endif ()
if (${float_char}GEMMINCOPY)
GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMMINCOPY}" "${float_type}" "${${float_char}GEMMINCOPYOBJ}" false "" "" true ${float_type})
endif ()
GenerateNamedObjects("${KERNELDIR}/${${float_char}GEADD_KERNEL}" "" "geadd_k" false "" "" false ${float_type})
endforeach ()
+
# Makefile.LA
if(NOT NO_LAPACK)
foreach (float_type ${FLOAT_TYPES})
+ if (${float_type} STREQUAL "HALF")
+ set (float_char "SH")
+ endif ()
if (NOT DEFINED ${float_char}NEG_TCOPY)
if (${float_char} STREQUAL "Z" OR ${float_char} STREQUAL "C" OR ${float_char} STREQUAL "X")
set(${float_char}NEG_TCOPY ../generic/zneg_tcopy.c)
foreach (float_type ${FLOAT_TYPES})
# a bit of metaprogramming here to pull out the appropriate KERNEL var
string(SUBSTRING ${float_type} 0 1 float_char)
+ if (${float_type} STREQUAL "HALF")
+ set (float_char "SH")
+ endif ()
GenerateNamedObjects("generic/neg_tcopy_${${float_char}GEMM_UNROLL_M}.c" "" "neg_tcopy" false "" ${TSUFFIX} false ${float_type})
GenerateNamedObjects("generic/laswp_ncopy_${${float_char}GEMM_UNROLL_N}.c" "" "laswp_ncopy" false "" ${TSUFFIX} false ${float_type})
endforeach ()