Use POWER6 GEMM, TRMM and DTRSM on 32bit POWER8
authorMartin Kroeker <martin@ruby.chemie.uni-freiburg.de>
Tue, 14 Jul 2020 16:11:19 +0000 (18:11 +0200)
committerGitHub <noreply@github.com>
Tue, 14 Jul 2020 16:11:19 +0000 (18:11 +0200)
kernel/power/KERNEL.POWER8

index 7fba5b4d6c9b3c95b12b31d65fdba8fcbb724a93..dc6646d509a1647d29158113cecf35ab505a9568 100644 (file)
@@ -1,3 +1,51 @@
+ifeq ($(__BYTE_ORDER__)$(BINARY32),__ORDER_BIG_ENDIAN__1)
+$(info baue power6)
+SGEMMKERNEL    =  gemm_kernel_power6.S
+SGEMMINCOPY    =
+SGEMMITCOPY    =
+SGEMMONCOPY    =  gemm_ncopy_4.S
+SGEMMOTCOPY    =  gemm_tcopy_4.S
+SGEMMINCOPYOBJ =
+SGEMMITCOPYOBJ =
+SGEMMONCOPYOBJ =  sgemm_oncopy$(TSUFFIX).$(SUFFIX)
+SGEMMOTCOPYOBJ =  sgemm_otcopy$(TSUFFIX).$(SUFFIX)
+DGEMMKERNEL    =  gemm_kernel_power6.S
+DGEMMINCOPY    =
+DGEMMITCOPY    =
+DGEMMONCOPY    =  gemm_ncopy_4.S
+DGEMMOTCOPY    =  gemm_tcopy_4.S
+DGEMMINCOPYOBJ =
+DGEMMITCOPYOBJ =
+DGEMMONCOPYOBJ =  dgemm_oncopy$(TSUFFIX).$(SUFFIX)
+DGEMMOTCOPYOBJ =  dgemm_otcopy$(TSUFFIX).$(SUFFIX)
+CGEMMKERNEL    =  zgemm_kernel_power6.S
+CGEMMINCOPY    =  ../generic/zgemm_ncopy_2.c
+CGEMMITCOPY    =  ../generic/zgemm_tcopy_2.c
+CGEMMONCOPY    =  ../generic/zgemm_ncopy_4.c
+CGEMMOTCOPY    =  ../generic/zgemm_tcopy_4.c
+CGEMMINCOPYOBJ =  cgemm_incopy$(TSUFFIX).$(SUFFIX)
+CGEMMITCOPYOBJ =  cgemm_itcopy$(TSUFFIX).$(SUFFIX)
+CGEMMONCOPYOBJ =  cgemm_oncopy$(TSUFFIX).$(SUFFIX)
+CGEMMOTCOPYOBJ =  cgemm_otcopy$(TSUFFIX).$(SUFFIX)
+ZGEMMKERNEL    =  zgemm_kernel_power6.S
+ZGEMMINCOPY    =  ../generic/zgemm_ncopy_2.c
+ZGEMMITCOPY    =  ../generic/zgemm_tcopy_2.c
+ZGEMMONCOPY    =  ../generic/zgemm_ncopy_4.c
+ZGEMMOTCOPY    =  ../generic/zgemm_tcopy_4.c
+ZGEMMINCOPYOBJ =  zgemm_incopy$(TSUFFIX).$(SUFFIX)
+ZGEMMITCOPYOBJ =  zgemm_itcopy$(TSUFFIX).$(SUFFIX)
+ZGEMMONCOPYOBJ =  zgemm_oncopy$(TSUFFIX).$(SUFFIX)
+ZGEMMOTCOPYOBJ =  zgemm_otcopy$(TSUFFIX).$(SUFFIX)
+DTRSMKERNEL_LN  =  trsm_kernel_power6_LN.S
+DTRSMKERNEL_LT  =  trsm_kernel_power6_LT.S
+DTRSMKERNEL_RN  =  trsm_kernel_power6_LT.S
+DTRSMKERNEL_RT  =  trsm_kernel_power6_RT.S
+
+CAXPYKERNEL  = zaxpy.S
+
+else
+
+$(info baue power8)
 #SGEMM_BETA = ../generic/gemm_beta.c
 #DGEMM_BETA = ../generic/gemm_beta.c
 #CGEMM_BETA = ../generic/zgemm_beta.c
@@ -47,16 +95,21 @@ ZGEMMONCOPYOBJ =  zgemm_oncopy$(TSUFFIX).$(SUFFIX)
 ZGEMMOTCOPYOBJ =  zgemm_otcopy$(TSUFFIX).$(SUFFIX)
 ZGEMMINCOPYOBJ =  zgemm_incopy$(TSUFFIX).$(SUFFIX)
 ZGEMMITCOPYOBJ =  zgemm_itcopy$(TSUFFIX).$(SUFFIX)
+DTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
+DTRSMKERNEL_LT = dtrsm_kernel_LT_16x4_power8.S
+DTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
+DTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
+endif
 
 STRSMKERNEL_LN =  ../generic/trsm_kernel_LN.c
 STRSMKERNEL_LT =  ../generic/trsm_kernel_LT.c
 STRSMKERNEL_RN =  ../generic/trsm_kernel_RN.c
 STRSMKERNEL_RT =  ../generic/trsm_kernel_RT.c
 
-DTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
-DTRSMKERNEL_LT = dtrsm_kernel_LT_16x4_power8.S
-DTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
-DTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
+#DTRSMKERNEL_LN        = ../generic/trsm_kernel_LN.c
+#DTRSMKERNEL_LT        = dtrsm_kernel_LT_16x4_power8.S
+#DTRSMKERNEL_RN        = ../generic/trsm_kernel_RN.c
+#DTRSMKERNEL_RT        = ../generic/trsm_kernel_RT.c
 
 CTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
 CTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
@@ -153,15 +206,15 @@ ZASUMKERNEL  = zasum.c
 #
 SAXPYKERNEL  = saxpy.c
 DAXPYKERNEL  = daxpy.c
-ifneq ($(__BYTE_ORDER__),__ORDER_BIG_ENDIAN__)
-ifneq ($(GCCVERSIONGTEQ9),1)
-CAXPYKERNEL  = caxpy_power8.S
-else
-CAXPYKERNEL  = caxpy.c
-endif
-else
-CAXPYKERNEL  = caxpy.c
-endif
+#ifneq ($(__BYTE_ORDER__),__ORDER_BIG_ENDIAN__)
+#ifneq ($(GCCVERSIONGTEQ9),1)
+#CAXPYKERNEL  = caxpy_power8.S
+#else
+#CAXPYKERNEL  = caxpy.c
+#endif
+#else
+#CAXPYKERNEL  = caxpy.c
+#endif
 #
 ZAXPYKERNEL  = zaxpy.c
 #
@@ -173,7 +226,7 @@ ZCOPYKERNEL  = zcopy.c
 SDOTKERNEL   =  sdot.c
 DDOTKERNEL   =  ddot.c
 DSDOTKERNEL  =  sdot.c
-CDOTKERNEL   =  cdot.c
+CDOTKERNEL   =  ../arm/zdot.c
 ZDOTKERNEL   =  zdot.c
 #
 SNRM2KERNEL  = ../arm/nrm2.c
@@ -183,7 +236,7 @@ ZNRM2KERNEL  = ../arm/znrm2.c
 #
 SROTKERNEL   = srot.c
 DROTKERNEL   = drot.c
-CROTKERNEL   = crot.c
+#CROTKERNEL   = crot.c
 ZROTKERNEL   = zrot.c
 #
 SSCALKERNEL  = sscal.c
@@ -239,4 +292,3 @@ IDAMINKERNEL  = ../arm/iamin.c
 IZAMAXKERNEL  = ../arm/izamax.c
 IZAMINKERNEL  = ../arm/izamin.c
 endif
-