From f8c2697701dfbcc3cba307245aab06134c86f53f Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Tue, 14 Jul 2020 18:11:19 +0200 Subject: [PATCH] Use POWER6 GEMM, TRMM and DTRSM on 32bit POWER8 --- kernel/power/KERNEL.POWER8 | 84 ++++++++++++++++++++++++++++++-------- 1 file changed, 68 insertions(+), 16 deletions(-) diff --git a/kernel/power/KERNEL.POWER8 b/kernel/power/KERNEL.POWER8 index 7fba5b4d..dc6646d5 100644 --- a/kernel/power/KERNEL.POWER8 +++ b/kernel/power/KERNEL.POWER8 @@ -1,3 +1,51 @@ +ifeq ($(__BYTE_ORDER__)$(BINARY32),__ORDER_BIG_ENDIAN__1) +$(info baue power6) +SGEMMKERNEL = gemm_kernel_power6.S +SGEMMINCOPY = +SGEMMITCOPY = +SGEMMONCOPY = gemm_ncopy_4.S +SGEMMOTCOPY = gemm_tcopy_4.S +SGEMMINCOPYOBJ = +SGEMMITCOPYOBJ = +SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX) +SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX) +DGEMMKERNEL = gemm_kernel_power6.S +DGEMMINCOPY = +DGEMMITCOPY = +DGEMMONCOPY = gemm_ncopy_4.S +DGEMMOTCOPY = gemm_tcopy_4.S +DGEMMINCOPYOBJ = +DGEMMITCOPYOBJ = +DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX) +DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX) +CGEMMKERNEL = zgemm_kernel_power6.S +CGEMMINCOPY = ../generic/zgemm_ncopy_2.c +CGEMMITCOPY = ../generic/zgemm_tcopy_2.c +CGEMMONCOPY = ../generic/zgemm_ncopy_4.c +CGEMMOTCOPY = ../generic/zgemm_tcopy_4.c +CGEMMINCOPYOBJ = cgemm_incopy$(TSUFFIX).$(SUFFIX) +CGEMMITCOPYOBJ = cgemm_itcopy$(TSUFFIX).$(SUFFIX) +CGEMMONCOPYOBJ = cgemm_oncopy$(TSUFFIX).$(SUFFIX) +CGEMMOTCOPYOBJ = cgemm_otcopy$(TSUFFIX).$(SUFFIX) +ZGEMMKERNEL = zgemm_kernel_power6.S +ZGEMMINCOPY = ../generic/zgemm_ncopy_2.c +ZGEMMITCOPY = ../generic/zgemm_tcopy_2.c +ZGEMMONCOPY = ../generic/zgemm_ncopy_4.c +ZGEMMOTCOPY = ../generic/zgemm_tcopy_4.c +ZGEMMINCOPYOBJ = zgemm_incopy$(TSUFFIX).$(SUFFIX) +ZGEMMITCOPYOBJ = zgemm_itcopy$(TSUFFIX).$(SUFFIX) +ZGEMMONCOPYOBJ = zgemm_oncopy$(TSUFFIX).$(SUFFIX) +ZGEMMOTCOPYOBJ = zgemm_otcopy$(TSUFFIX).$(SUFFIX) +DTRSMKERNEL_LN = trsm_kernel_power6_LN.S +DTRSMKERNEL_LT = trsm_kernel_power6_LT.S +DTRSMKERNEL_RN = trsm_kernel_power6_LT.S +DTRSMKERNEL_RT = trsm_kernel_power6_RT.S + +CAXPYKERNEL = zaxpy.S + +else + +$(info baue power8) #SGEMM_BETA = ../generic/gemm_beta.c #DGEMM_BETA = ../generic/gemm_beta.c #CGEMM_BETA = ../generic/zgemm_beta.c @@ -47,16 +95,21 @@ ZGEMMONCOPYOBJ = zgemm_oncopy$(TSUFFIX).$(SUFFIX) ZGEMMOTCOPYOBJ = zgemm_otcopy$(TSUFFIX).$(SUFFIX) ZGEMMINCOPYOBJ = zgemm_incopy$(TSUFFIX).$(SUFFIX) ZGEMMITCOPYOBJ = zgemm_itcopy$(TSUFFIX).$(SUFFIX) +DTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c +DTRSMKERNEL_LT = dtrsm_kernel_LT_16x4_power8.S +DTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c +DTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c +endif STRSMKERNEL_LN = ../generic/trsm_kernel_LN.c STRSMKERNEL_LT = ../generic/trsm_kernel_LT.c STRSMKERNEL_RN = ../generic/trsm_kernel_RN.c STRSMKERNEL_RT = ../generic/trsm_kernel_RT.c -DTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c -DTRSMKERNEL_LT = dtrsm_kernel_LT_16x4_power8.S -DTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c -DTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c +#DTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c +#DTRSMKERNEL_LT = dtrsm_kernel_LT_16x4_power8.S +#DTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c +#DTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c CTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c CTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c @@ -153,15 +206,15 @@ ZASUMKERNEL = zasum.c # SAXPYKERNEL = saxpy.c DAXPYKERNEL = daxpy.c -ifneq ($(__BYTE_ORDER__),__ORDER_BIG_ENDIAN__) -ifneq ($(GCCVERSIONGTEQ9),1) -CAXPYKERNEL = caxpy_power8.S -else -CAXPYKERNEL = caxpy.c -endif -else -CAXPYKERNEL = caxpy.c -endif +#ifneq ($(__BYTE_ORDER__),__ORDER_BIG_ENDIAN__) +#ifneq ($(GCCVERSIONGTEQ9),1) +#CAXPYKERNEL = caxpy_power8.S +#else +#CAXPYKERNEL = caxpy.c +#endif +#else +#CAXPYKERNEL = caxpy.c +#endif # ZAXPYKERNEL = zaxpy.c # @@ -173,7 +226,7 @@ ZCOPYKERNEL = zcopy.c SDOTKERNEL = sdot.c DDOTKERNEL = ddot.c DSDOTKERNEL = sdot.c -CDOTKERNEL = cdot.c +CDOTKERNEL = ../arm/zdot.c ZDOTKERNEL = zdot.c # SNRM2KERNEL = ../arm/nrm2.c @@ -183,7 +236,7 @@ ZNRM2KERNEL = ../arm/znrm2.c # SROTKERNEL = srot.c DROTKERNEL = drot.c -CROTKERNEL = crot.c +#CROTKERNEL = crot.c ZROTKERNEL = zrot.c # SSCALKERNEL = sscal.c @@ -239,4 +292,3 @@ IDAMINKERNEL = ../arm/iamin.c IZAMAXKERNEL = ../arm/izamax.c IZAMINKERNEL = ../arm/izamin.c endif - -- 2.34.1