From 22bb50fb8115909ab8ba4a977913cd6adc1b3290 Mon Sep 17 00:00:00 2001 From: Rajalakshmi Srinivasaraghavan Date: Fri, 17 Apr 2020 13:35:17 -0500 Subject: [PATCH] cmake fixes --- CMakeLists.txt | 6 ++ cmake/kernel.cmake | 39 ++++++++- cmake/utils.cmake | 7 ++ common_macro.h | 213 +++++++++++++++++++++++++++++++++++++++++++++++++- ctest/CMakeLists.txt | 3 + kernel/CMakeLists.txt | 21 ++++- lapack/CMakeLists.txt | 4 + 7 files changed, 287 insertions(+), 6 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 9512717..20cf741 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -89,6 +89,7 @@ endif () # set which float types we want to build for if (NOT DEFINED BUILD_SINGLE AND NOT DEFINED BUILD_DOUBLE AND NOT DEFINED BUILD_COMPLEX AND NOT DEFINED BUILD_COMPLEX16) # if none are defined, build for all + set(BUILD_HALF true) set(BUILD_SINGLE true) set(BUILD_DOUBLE true) set(BUILD_COMPLEX true) @@ -120,6 +121,11 @@ if (BUILD_COMPLEX16) list(APPEND FLOAT_TYPES "ZCOMPLEX") # defines COMPLEX and DOUBLE endif () +if (BUILD_SINGLE OR BUILD_HALF) + message(STATUS "Building Half Precision") + list(APPEND FLOAT_TYPES "HALF") # defines nothing +endif () + if (NOT DEFINED CORE OR "${CORE}" STREQUAL "UNKNOWN") message(FATAL_ERROR "Detecting CPU failed. Please set TARGET explicitly, e.g. make TARGET=your_cpu_target. Please read README for details.") endif () diff --git a/cmake/kernel.cmake b/cmake/kernel.cmake index 9b238f0..7b64a03 100644 --- a/cmake/kernel.cmake +++ b/cmake/kernel.cmake @@ -113,11 +113,29 @@ macro(SetDefaultL1) set(ZSUMKERNEL zsum.S) set(QSUMKERNEL sum.S) set(XSUMKERNEL zsum.S) + set(SHAMINKERNEL ../arm/amin.c) + set(SHAMAXKERNEL amax.S) + set(SHMAXKERNEL ../arm/max.c) + set(SHMINKERNEL ../arm/min.c) + set(ISHAMAXKERNEL iamax.S) + set(ISHAMINKERNEL ../arm/iamin.c) + set(ISHMAXKERNEL ../arm/imax.c) + set(ISHMINKERNEL ../arm/imin.c) + set(SHASUMKERNEL asum.S) + set(SHAXPYKERNEL axpy.S) + set(SHAXPBYKERNEL ../arm/axpby.c) + set(SHCOPYKERNEL copy.S) + set(SHDOTKERNEL dot.S) + set(SHROTKERNEL rot.S) + set(SHSCALKERNEL scal.S) + set(SHNRM2KERNEL nrm2.S) + set(SHSUMKERNEL sum.S) + set(SHSWAPKERNEL swap.S) endmacro () macro(SetDefaultL2) - set(SGEMVNKERNEL gemv_n.S) - set(SGEMVTKERNEL gemv_t.S) + set(SGEMVNKERNEL ../arm/gemv_n.c) + set(SGEMVTKERNEL ../arm/gemv_t.c) set(DGEMVNKERNEL gemv_n.S) set(DGEMVTKERNEL gemv_t.S) set(CGEMVNKERNEL zgemv_n.S) @@ -161,6 +179,10 @@ macro(SetDefaultL2) set(XHEMV_L_KERNEL ../generic/zhemv_k.c) set(XHEMV_V_KERNEL ../generic/zhemv_k.c) set(XHEMV_M_KERNEL ../generic/zhemv_k.c) + set(SHGEMVNKERNEL ../arm/gemv_n.c) + set(SHGEMVTKERNEL ../arm/gemv_t.c) + set(SHGERKERNEL ../generic/ger.c) + endmacro () macro(SetDefaultL3) @@ -168,4 +190,17 @@ macro(SetDefaultL3) set(DGEADD_KERNEL ../generic/geadd.c) set(CGEADD_KERNEL ../generic/zgeadd.c) set(ZGEADD_KERNEL ../generic/zgeadd.c) + set(SHGEADD_KERNEL ../generic/geadd.c) + set(SHGEMMKERNEL ../generic/gemmkernel_2x2.c) + set(SHGEMM_BETA ../generic/gemm_beta.c) + set(SHGEMMINCOPY ../generic/gemm_ncopy_2.c) + set(SHGEMMITCOPY ../generic/gemm_tcopy_2.c) + set(SHGEMMONCOPY ../generic/gemm_ncopy_2.c) + set(SHGEMMOTCOPY ../generic/gemm_tcopy_2.c) + set(SHGEMMINCOPYOBJ shgemm_incopy.o) + set(SHGEMMITCOPYOBJ shgemm_itcopy.o) + set(SHGEMMONCOPYOBJ shgemm_oncopy.o) + set(SHGEMMOTCOPYOBJ shgemm_otcopy.o) + + endmacro () diff --git a/cmake/utils.cmake b/cmake/utils.cmake index fd93f8a..831ddff 100644 --- a/cmake/utils.cmake +++ b/cmake/utils.cmake @@ -163,6 +163,7 @@ function(GenerateNamedObjects sources_in) if (complex_only) list(REMOVE_ITEM float_list "SINGLE") list(REMOVE_ITEM float_list "DOUBLE") + list(REMOVE_ITEM float_list "HALF") elseif (real_only) list(REMOVE_ITEM float_list "COMPLEX") list(REMOVE_ITEM float_list "ZCOMPLEX") @@ -176,6 +177,9 @@ function(GenerateNamedObjects sources_in) if (NOT no_float_type) string(SUBSTRING ${float_type} 0 1 float_char) string(TOLOWER ${float_char} float_char) + if (${float_type} STREQUAL "HALF") + set (float_char "sh") + endif () endif () if (NOT name_in) @@ -210,6 +214,9 @@ function(GenerateNamedObjects sources_in) if (${float_type} STREQUAL "DOUBLE" OR ${float_type} STREQUAL "ZCOMPLEX") list(APPEND obj_defines "DOUBLE") endif () + if (${float_type} STREQUAL "HALF") + list(APPEND obj_defines "HALF") + endif () if (${float_type} STREQUAL "COMPLEX" OR ${float_type} STREQUAL "ZCOMPLEX") list(APPEND obj_defines "COMPLEX") if (mangle_complex_sources) diff --git a/common_macro.h b/common_macro.h index b438c83..2166e62 100644 --- a/common_macro.h +++ b/common_macro.h @@ -646,6 +646,19 @@ #elif defined(HALF) +#define AXPYU_K SAXPYU_K +#define AXPYC_K SAXPYC_K +#define SCAL_K SSCAL_K +#define GEMV_N SGEMV_N +#define GEMV_T SGEMV_T +#define SYMV_U SSYMV_U +#define SYMV_L SSYMV_L +#define GERU_K SGERU_K +#define GERC_K SGERC_K +#define GERV_K SGERV_K +#define GERD_K SGERD_K +#define SYMV_THREAD_U SSYMV_THREAD_U +#define SYMV_THREAD_L SSYMV_THREAD_L #define GEMM_BETA SHGEMM_BETA #define GEMM_KERNEL_N SHGEMM_KERNEL #define GEMM_KERNEL_L SHGEMM_KERNEL @@ -672,6 +685,20 @@ #define GEMM_OTCOPY SHGEMM_OTCOPY #define GEMM_INCOPY SHGEMM_INCOPY #define GEMM_ITCOPY SHGEMM_ITCOPY +#define SYMM_THREAD_LU SSYMM_THREAD_LU +#define SYMM_THREAD_LL SSYMM_THREAD_LL +#define SYMM_THREAD_RU SSYMM_THREAD_RU +#define SYMM_THREAD_RL SSYMM_THREAD_RL +#define SYMM_LU SSYMM_LU +#define SYMM_LL SSYMM_LL +#define SYMM_RU SSYMM_RU +#define SYMM_RL SSYMM_RL + + +#define HEMM_THREAD_LU SHEMM_THREAD_LU +#define HEMM_THREAD_LL SHEMM_THREAD_LL +#define HEMM_THREAD_RU SHEMM_THREAD_RU +#define HEMM_THREAD_RL SHEMM_THREAD_RL #define GEMM_THREAD_NN SHGEMM_THREAD_NN #define GEMM_THREAD_CN SHGEMM_THREAD_TN @@ -690,6 +717,186 @@ #define GEMM_THREAD_RC SHGEMM_THREAD_NT #define GEMM_THREAD_RR SHGEMM_THREAD_NN +#ifdef UNIT + +#define TRMM_OUNCOPY STRMM_OUNUCOPY +#define TRMM_OUTCOPY STRMM_OUTUCOPY +#define TRMM_OLNCOPY STRMM_OLNUCOPY +#define TRMM_OLTCOPY STRMM_OLTUCOPY +#define TRSM_OUNCOPY STRSM_OUNUCOPY +#define TRSM_OUTCOPY STRSM_OUTUCOPY +#define TRSM_OLNCOPY STRSM_OLNUCOPY +#define TRSM_OLTCOPY STRSM_OLTUCOPY + +#define TRMM_IUNCOPY STRMM_IUNUCOPY +#define TRMM_IUTCOPY STRMM_IUTUCOPY +#define TRMM_ILNCOPY STRMM_ILNUCOPY +#define TRMM_ILTCOPY STRMM_ILTUCOPY +#define TRSM_IUNCOPY STRSM_IUNUCOPY +#define TRSM_IUTCOPY STRSM_IUTUCOPY +#define TRSM_ILNCOPY STRSM_ILNUCOPY +#define TRSM_ILTCOPY STRSM_ILTUCOPY + +#else + +#define TRMM_OUNCOPY STRMM_OUNNCOPY +#define TRMM_OUTCOPY STRMM_OUTNCOPY +#define TRMM_OLNCOPY STRMM_OLNNCOPY +#define TRMM_OLTCOPY STRMM_OLTNCOPY +#define TRSM_OUNCOPY STRSM_OUNNCOPY +#define TRSM_OUTCOPY STRSM_OUTNCOPY +#define TRSM_OLNCOPY STRSM_OLNNCOPY +#define TRSM_OLTCOPY STRSM_OLTNCOPY + +#define TRMM_IUNCOPY STRMM_IUNNCOPY +#define TRMM_IUTCOPY STRMM_IUTNCOPY +#define TRMM_ILNCOPY STRMM_ILNNCOPY +#define TRMM_ILTCOPY STRMM_ILTNCOPY +#define TRSM_IUNCOPY STRSM_IUNNCOPY +#define TRSM_IUTCOPY STRSM_IUTNCOPY +#define TRSM_ILNCOPY STRSM_ILNNCOPY +#define TRSM_ILTCOPY STRSM_ILTNCOPY + +#define TRMM_KERNEL_LN STRMM_KERNEL_LN +#define TRMM_KERNEL_LT STRMM_KERNEL_LT +#define TRMM_KERNEL_LR STRMM_KERNEL_LN +#define TRMM_KERNEL_LC STRMM_KERNEL_LT +#define TRMM_KERNEL_RN STRMM_KERNEL_RN +#define TRMM_KERNEL_RT STRMM_KERNEL_RT +#define TRMM_KERNEL_RR STRMM_KERNEL_RN +#define TRMM_KERNEL_RC STRMM_KERNEL_RT + +#define TRSM_KERNEL_LN STRSM_KERNEL_LN +#define TRSM_KERNEL_LT STRSM_KERNEL_LT +#define TRSM_KERNEL_LR STRSM_KERNEL_LN +#define TRSM_KERNEL_LC STRSM_KERNEL_LT +#define TRSM_KERNEL_RN STRSM_KERNEL_RN +#define TRSM_KERNEL_RT STRSM_KERNEL_RT +#define TRSM_KERNEL_RR STRSM_KERNEL_RN +#define TRSM_KERNEL_RC STRSM_KERNEL_RT + +#define SYMM_IUTCOPY SSYMM_IUTCOPY +#define SYMM_ILTCOPY SSYMM_ILTCOPY +#define SYMM_OUTCOPY SSYMM_OUTCOPY +#define SYMM_OLTCOPY SSYMM_OLTCOPY +#define TRMM_LNUU STRMM_LNUU +#define TRMM_LNUN STRMM_LNUN +#define TRMM_LNLU STRMM_LNLU +#define TRMM_LNLN STRMM_LNLN +#define TRMM_LTUU STRMM_LTUU +#define TRMM_LTUN STRMM_LTUN +#define TRMM_LTLU STRMM_LTLU +#define TRMM_LTLN STRMM_LTLN +#define TRMM_LRUU STRMM_LNUU +#define TRMM_LRUN STRMM_LNUN +#define TRMM_LRLU STRMM_LNLU +#define TRMM_LRLN STRMM_LNLN +#define TRMM_LCUU STRMM_LTUU +#define TRMM_LCUN STRMM_LTUN +#define TRMM_LCLU STRMM_LTLU +#define TRMM_LCLN STRMM_LTLN +#define TRMM_RNUU STRMM_RNUU +#define TRMM_RNUN STRMM_RNUN +#define TRMM_RNLU STRMM_RNLU +#define TRMM_RNLN STRMM_RNLN +#define TRMM_RTUU STRMM_RTUU +#define TRMM_RTUN STRMM_RTUN +#define TRMM_RTLU STRMM_RTLU +#define TRMM_RTLN STRMM_RTLN +#define TRMM_RRUU STRMM_RNUU +#define TRMM_RRUN STRMM_RNUN +#define TRMM_RRLU STRMM_RNLU +#define TRMM_RRLN STRMM_RNLN +#define TRMM_RCUU STRMM_RTUU +#define TRMM_RCUN STRMM_RTUN +#define TRMM_RCLU STRMM_RTLU +#define TRMM_RCLN STRMM_RTLN + +#define TRSM_LNUU STRSM_LNUU +#define TRSM_LNUN STRSM_LNUN +#define TRSM_LNLU STRSM_LNLU +#define TRSM_LNLN STRSM_LNLN +#define TRSM_LTUU STRSM_LTUU +#define TRSM_LTUN STRSM_LTUN +#define TRSM_LTLU STRSM_LTLU +#define TRSM_LTLN STRSM_LTLN +#define TRSM_LRUU STRSM_LNUU +#define TRSM_LRUN STRSM_LNUN +#define TRSM_LRLU STRSM_LNLU +#define TRSM_LRLN STRSM_LNLN +#define TRSM_LCUU STRSM_LTUU +#define TRSM_LCUN STRSM_LTUN +#define TRSM_LCLU STRSM_LTLU +#define TRSM_LCLN STRSM_LTLN +#define TRSM_RNUU STRSM_RNUU +#define TRSM_RNUN STRSM_RNUN +#define TRSM_RNLU STRSM_RNLU +#define TRSM_RNLN STRSM_RNLN +#define TRSM_RTUU STRSM_RTUU +#define TRSM_RTUN STRSM_RTUN +#define TRSM_RTLU STRSM_RTLU +#define TRSM_RTLN STRSM_RTLN +#define TRSM_RRUU STRSM_RNUU +#define TRSM_RRUN STRSM_RNUN +#define TRSM_RRLU STRSM_RNLU +#define TRSM_RRLN STRSM_RNLN +#define TRSM_RCUU STRSM_RTUU +#define TRSM_RCUN STRSM_RTUN +#define TRSM_RCLU STRSM_RTLU +#define TRSM_RCLN STRSM_RTLN +#define SYRK_UN SSYRK_UN +#define SYRK_UT SSYRK_UT +#define SYRK_LN SSYRK_LN +#define SYRK_LT SSYRK_LT +#define SYRK_UR SSYRK_UN +#define SYRK_UC SSYRK_UT +#define SYRK_LR SSYRK_LN +#define SYRK_LC SSYRK_LT + +#define SYRK_KERNEL_U SSYRK_KERNEL_U +#define SYRK_KERNEL_L SSYRK_KERNEL_L + +#define HERK_UN SSYRK_UN +#define HERK_LN SSYRK_LN +#define HERK_UC SSYRK_UT +#define HERK_LC SSYRK_LT + +#define HER2K_UN SSYR2K_UN +#define HER2K_LN SSYR2K_LN +#define HER2K_UC SSYR2K_UT +#define HER2K_LC SSYR2K_LT + +#define SYR2K_UN SSYR2K_UN +#define SYR2K_UT SSYR2K_UT +#define SYR2K_LN SSYR2K_LN +#define SYR2K_LT SSYR2K_LT +#define SYR2K_UR SSYR2K_UN +#define SYR2K_UC SSYR2K_UT +#define SYR2K_LR SSYR2K_LN +#define SYR2K_LC SSYR2K_LT + +#define SYR2K_KERNEL_U SSYR2K_KERNEL_U +#define SYR2K_KERNEL_L SSYR2K_KERNEL_L +#define SYRK_THREAD_UN SSYRK_THREAD_UN +#define SYRK_THREAD_UT SSYRK_THREAD_UT +#define SYRK_THREAD_LN SSYRK_THREAD_LN +#define SYRK_THREAD_LT SSYRK_THREAD_LT +#define SYRK_THREAD_UR SSYRK_THREAD_UR +#define SYRK_THREAD_UC SSYRK_THREAD_UC +#define SYRK_THREAD_LR SSYRK_THREAD_LN +#define SYRK_THREAD_LC SSYRK_THREAD_LT + +#define HERK_THREAD_UN SSYRK_THREAD_UN +#define HERK_THREAD_UT SSYRK_THREAD_UT +#define HERK_THREAD_LN SSYRK_THREAD_LN +#define HERK_THREAD_LT SSYRK_THREAD_LT +#define HERK_THREAD_UR SSYRK_THREAD_UR +#define HERK_THREAD_UC SSYRK_THREAD_UC +#define HERK_THREAD_LR SSYRK_THREAD_LN +#define HERK_THREAD_LC SSYRK_THREAD_LT + +#endif + #else #define AMAX_K SAMAX_K @@ -721,14 +928,14 @@ #define GEMV_S SGEMV_S #define GEMV_D SGEMV_D + +#define SYMV_U SSYMV_U +#define SYMV_L SSYMV_L #define GERU_K SGERU_K #define GERC_K SGERC_K #define GERV_K SGERV_K #define GERD_K SGERD_K -#define SYMV_U SSYMV_U -#define SYMV_L SSYMV_L - #define SYMV_THREAD_U SSYMV_THREAD_U #define SYMV_THREAD_L SSYMV_THREAD_L diff --git a/ctest/CMakeLists.txt b/ctest/CMakeLists.txt index 14c9d19..8d301c2 100644 --- a/ctest/CMakeLists.txt +++ b/ctest/CMakeLists.txt @@ -12,6 +12,9 @@ FILE(WRITE ${CMAKE_CURRENT_BINARY_DIR}/test_cblas_helper.sh foreach(float_type ${FLOAT_TYPES}) string(SUBSTRING ${float_type} 0 1 float_char_upper) string(TOLOWER ${float_char_upper} float_char) + if (${float_char} STREQUAL "h") + continue() + endif() #level1 add_executable(x${float_char}cblat1 c_${float_char}blat1.f diff --git a/kernel/CMakeLists.txt b/kernel/CMakeLists.txt index 35e0fff..4113a16 100644 --- a/kernel/CMakeLists.txt +++ b/kernel/CMakeLists.txt @@ -41,6 +41,9 @@ function (build_core TARGET_CORE KDIR TSUFFIX KERNEL_DEFINITIONS) foreach (float_type ${FLOAT_TYPES}) # a bit of metaprogramming here to pull out the appropriate KERNEL var string(SUBSTRING ${float_type} 0 1 float_char) + if (${float_type} STREQUAL "HALF") + set (float_char "SH") + endif () GenerateNamedObjects("${KERNELDIR}/${${float_char}AMAXKERNEL}" "USE_ABS" "amax_k" false "" "" false ${float_type}) GenerateNamedObjects("${KERNELDIR}/${${float_char}AMINKERNEL}" "USE_ABS;USE_MIN" "amin_k" false "" "" false ${float_type}) if (DEFINED ${float_char}MAXKERNEL) @@ -93,6 +96,9 @@ function (build_core TARGET_CORE KDIR TSUFFIX KERNEL_DEFINITIONS) GenerateNamedObjects("generic/ger.c" "" "ger_k" false "" "" "" 3) foreach (float_type ${FLOAT_TYPES}) string(SUBSTRING ${float_type} 0 1 float_char) + if (${float_type} STREQUAL "HALF") + set (float_char "SH") + endif () if (${float_type} STREQUAL "COMPLEX" OR ${float_type} STREQUAL "ZCOMPLEX") GenerateNamedObjects("${KERNELDIR}/${${float_char}GERUKERNEL}" "" "geru_k" false "" "" false ${float_type}) GenerateNamedObjects("${KERNELDIR}/${${float_char}GERCKERNEL}" "CONJ" "gerc_k" false "" "" false ${float_type}) @@ -128,13 +134,19 @@ function (build_core TARGET_CORE KDIR TSUFFIX KERNEL_DEFINITIONS) set(USE_TRMM true) endif () - foreach (float_type SINGLE DOUBLE) + foreach (float_type SINGLE DOUBLE HALF) string(SUBSTRING ${float_type} 0 1 float_char) + if (${float_type} STREQUAL "HALF") + set (float_char "SH") + endif () GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMMKERNEL}" "" "gemm_kernel" false "" "" false ${float_type}) endforeach() foreach (float_type ${FLOAT_TYPES}) string(SUBSTRING ${float_type} 0 1 float_char) + if (${float_type} STREQUAL "HALF") + set (float_char "SH") + endif () if (${float_char}GEMMINCOPY) GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMMINCOPY}" "${float_type}" "${${float_char}GEMMINCOPYOBJ}" false "" "" true ${float_type}) endif () @@ -470,9 +482,13 @@ function (build_core TARGET_CORE KDIR TSUFFIX KERNEL_DEFINITIONS) GenerateNamedObjects("${KERNELDIR}/${${float_char}GEADD_KERNEL}" "" "geadd_k" false "" "" false ${float_type}) endforeach () + # Makefile.LA if(NOT NO_LAPACK) foreach (float_type ${FLOAT_TYPES}) + if (${float_type} STREQUAL "HALF") + set (float_char "SH") + endif () if (NOT DEFINED ${float_char}NEG_TCOPY) if (${float_char} STREQUAL "Z" OR ${float_char} STREQUAL "C" OR ${float_char} STREQUAL "X") set(${float_char}NEG_TCOPY ../generic/zneg_tcopy.c) @@ -516,6 +532,9 @@ function (build_core TARGET_CORE KDIR TSUFFIX KERNEL_DEFINITIONS) foreach (float_type ${FLOAT_TYPES}) # a bit of metaprogramming here to pull out the appropriate KERNEL var string(SUBSTRING ${float_type} 0 1 float_char) + if (${float_type} STREQUAL "HALF") + set (float_char "SH") + endif () GenerateNamedObjects("generic/neg_tcopy_${${float_char}GEMM_UNROLL_M}.c" "" "neg_tcopy" false "" ${TSUFFIX} false ${float_type}) GenerateNamedObjects("generic/laswp_ncopy_${${float_char}GEMM_UNROLL_N}.c" "" "laswp_ncopy" false "" ${TSUFFIX} false ${float_type}) endforeach () diff --git a/lapack/CMakeLists.txt b/lapack/CMakeLists.txt index e21a9aa..778e6f8 100644 --- a/lapack/CMakeLists.txt +++ b/lapack/CMakeLists.txt @@ -2,6 +2,7 @@ include_directories(${PROJECT_SOURCE_DIR}) include_directories(${PROJECT_BINARY_DIR}) +list (REMOVE_ITEM FLOAT_TYPES "HALF") set(LAPACK_SOURCES potrf/potrf_U_single.c @@ -45,6 +46,9 @@ GenerateNamedObjects("laswp/generic/laswp_k_4.c" "" "laswp_plus" false "" "" fa GenerateNamedObjects("laswp/generic/laswp_k_4.c" "MINUS" "laswp_minus" false "" "" false 3) foreach (float_type ${FLOAT_TYPES}) +if (${float_type} STREQUAL "HALF") + continue() +endif() GenerateNamedObjects("getrf/getrf_single.c" "UNIT" "getrf_single" false "" "" false ${float_type}) endforeach () -- 2.7.4