Use "generic" S/CGEMM unroll M on big-endian PPC970
authorMartin Kroeker <martin@ruby.chemie.uni-freiburg.de>
Sun, 17 Nov 2019 14:10:26 +0000 (15:10 +0100)
committerGitHub <noreply@github.com>
Sun, 17 Nov 2019 14:10:26 +0000 (15:10 +0100)
as the respective PPC970 "altivec" kernels give wrong results when compiled for big endian

param.h

diff --git a/param.h b/param.h
index 1cf4137..9dc94c4 100644 (file)
--- a/param.h
+++ b/param.h
@@ -1990,11 +1990,19 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #define GEMM_DEFAULT_OFFSET_B 3072
 #define GEMM_DEFAULT_ALIGN 0x03fffUL
 
+#if defined(__BYTE_ORDER__)&&(__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
+#define SGEMM_DEFAULT_UNROLL_M 4
+#else
 #define SGEMM_DEFAULT_UNROLL_M 16
+#endif
 #define SGEMM_DEFAULT_UNROLL_N 4
 #define DGEMM_DEFAULT_UNROLL_M 4
 #define DGEMM_DEFAULT_UNROLL_N 4
+#if defined(__BYTE_ORDER__)&&(__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
+#define CGEMM_DEFAULT_UNROLL_M 2
+#else
 #define CGEMM_DEFAULT_UNROLL_M 8
+#endif
 #define CGEMM_DEFAULT_UNROLL_N 2
 #define ZGEMM_DEFAULT_UNROLL_M 2
 #define ZGEMM_DEFAULT_UNROLL_N 2