Define alternate kernels for big-endian PPC970
authorMartin Kroeker <martin@ruby.chemie.uni-freiburg.de>
Sun, 17 Nov 2019 14:19:39 +0000 (15:19 +0100)
committerGitHub <noreply@github.com>
Sun, 17 Nov 2019 14:19:39 +0000 (15:19 +0100)
The altivec versions of SGEMM and CGEMM fail most test in LAPACK-TESTING when compiled for big endian, STRSM/CTRSM even cause segfaults. The rot kernels either fail the corresponding utest or lead to failures in LAPACK-TESTING.

kernel/power/KERNEL.PPC970

index 7431a77..de30977 100644 (file)
@@ -1,3 +1,14 @@
+ifeq ($(__BYTE_ORDER__),$(__ORDER_BIG_ENDIAN__))
+SGEMMKERNEL    =  gemm_kernel.S
+SGEMMINCOPY    =  
+SGEMMITCOPY    =  
+SGEMMONCOPY    =  ../generic/gemm_ncopy_4.c
+SGEMMOTCOPY    =  ../generic/gemm_tcopy_4.c
+SGEMMINCOPYOBJ =  
+SGEMMITCOPYOBJ =  
+SGEMMONCOPYOBJ =  sgemm_oncopy$(TSUFFIX).$(SUFFIX)
+SGEMMOTCOPYOBJ =  sgemm_otcopy$(TSUFFIX).$(SUFFIX)
+else
 SGEMMKERNEL    =  gemm_kernel_altivec.S
 SGEMMINCOPY    =  ../generic/gemm_ncopy_16.c
 SGEMMITCOPY    =  ../generic/gemm_tcopy_16.c
@@ -7,6 +18,8 @@ SGEMMINCOPYOBJ =  sgemm_incopy$(TSUFFIX).$(SUFFIX)
 SGEMMITCOPYOBJ =  sgemm_itcopy$(TSUFFIX).$(SUFFIX)
 SGEMMONCOPYOBJ =  sgemm_oncopy$(TSUFFIX).$(SUFFIX)
 SGEMMOTCOPYOBJ =  sgemm_otcopy$(TSUFFIX).$(SUFFIX)
+endif
+
 DGEMMKERNEL    =  gemm_kernel.S
 DGEMMINCOPY    =
 DGEMMITCOPY    =
@@ -16,6 +29,18 @@ DGEMMINCOPYOBJ =
 DGEMMITCOPYOBJ =
 DGEMMONCOPYOBJ =  dgemm_oncopy$(TSUFFIX).$(SUFFIX)
 DGEMMOTCOPYOBJ =  dgemm_otcopy$(TSUFFIX).$(SUFFIX)
+
+ifeq ($(__BYTE_ORDER__),$(__ORDER_BIG_ENDIAN__))
+CGEMMKERNEL    =  zgemm_kernel.S
+CGEMMINCOPY    =
+CGEMMITCOPY    =
+CGEMMONCOPY    =  ../generic/zgemm_ncopy_2.c
+CGEMMOTCOPY    =  ../generic/zgemm_tcopy_2.c
+CGEMMINCOPYOBJ =
+CGEMMITCOPYOBJ =
+CGEMMONCOPYOBJ =  cgemm_oncopy$(TSUFFIX).$(SUFFIX)
+CGEMMOTCOPYOBJ =  cgemm_otcopy$(TSUFFIX).$(SUFFIX)
+else
 CGEMMKERNEL    =  zgemm_kernel_altivec.S
 CGEMMINCOPY    =  ../generic/zgemm_ncopy_8.c
 CGEMMITCOPY    =  ../generic/zgemm_tcopy_8.c
@@ -25,6 +50,8 @@ CGEMMINCOPYOBJ =  cgemm_incopy$(TSUFFIX).$(SUFFIX)
 CGEMMITCOPYOBJ =  cgemm_itcopy$(TSUFFIX).$(SUFFIX)
 CGEMMONCOPYOBJ =  cgemm_oncopy$(TSUFFIX).$(SUFFIX)
 CGEMMOTCOPYOBJ =  cgemm_otcopy$(TSUFFIX).$(SUFFIX)
+endif
+
 ZGEMMKERNEL    =  zgemm_kernel.S
 ZGEMMINCOPY    =
 ZGEMMITCOPY    =
@@ -35,22 +62,30 @@ ZGEMMITCOPYOBJ =
 ZGEMMONCOPYOBJ =  zgemm_oncopy$(TSUFFIX).$(SUFFIX)
 ZGEMMOTCOPYOBJ =  zgemm_otcopy$(TSUFFIX).$(SUFFIX)
 
-#STRSMKERNEL_LN        =  trsm_kernel_LN.S
-#STRSMKERNEL_LT        =  trsm_kernel_LT.S
-#STRSMKERNEL_RN        =  trsm_kernel_LT.S
-#STRSMKERNEL_RT        =  trsm_kernel_RT.S
-
 DTRSMKERNEL_LN =  trsm_kernel_LN.S
 DTRSMKERNEL_LT =  trsm_kernel_LT.S
 DTRSMKERNEL_RN =  trsm_kernel_LT.S
 DTRSMKERNEL_RT =  trsm_kernel_RT.S
 
-#CTRSMKERNEL_LN        =  ztrsm_kernel_LN.S
-#CTRSMKERNEL_LT        =  ztrsm_kernel_LT.S
-#CTRSMKERNEL_RN        =  ztrsm_kernel_LT.S
-#CTRSMKERNEL_RT        =  ztrsm_kernel_RT.S
-
 ZTRSMKERNEL_LN =  ztrsm_kernel_LN.S
 ZTRSMKERNEL_LT =  ztrsm_kernel_LT.S
 ZTRSMKERNEL_RN =  ztrsm_kernel_LT.S
 ZTRSMKERNEL_RT =  ztrsm_kernel_RT.S
+
+ifeq ($(__BYTE_ORDER__),$(__ORDER_BIG_ENDIAN__))
+STRSMKERNEL_LN =  trsm_kernel_LN.S
+STRSMKERNEL_LT =  trsm_kernel_LT.S
+STRSMKERNEL_RN =  trsm_kernel_LT.S
+STRSMKERNEL_RT =  trsm_kernel_RT.S
+
+CTRSMKERNEL_LN =  ztrsm_kernel_LN.S
+CTRSMKERNEL_LT =  ztrsm_kernel_LT.S
+CTRSMKERNEL_RN =  ztrsm_kernel_LT.S
+CTRSMKERNEL_RT =  ztrsm_kernel_RT.S
+
+
+SROTKERNEL   = ../arm/rot.c
+DROTKERNEL   = ../arm/rot.c
+CROTKERNEL   = ../arm/zrot.c
+ZROTKERNEL   = ../arm/zrot.c
+endif