From 6082e556cd990fc4d13e89d83db403b79d771e52 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Sun, 17 Nov 2019 15:10:26 +0100 Subject: [PATCH] Use "generic" S/CGEMM unroll M on big-endian PPC970 as the respective PPC970 "altivec" kernels give wrong results when compiled for big endian --- param.h | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/param.h b/param.h index 1cf4137..9dc94c4 100644 --- a/param.h +++ b/param.h @@ -1990,11 +1990,19 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define GEMM_DEFAULT_OFFSET_B 3072 #define GEMM_DEFAULT_ALIGN 0x03fffUL +#if defined(__BYTE_ORDER__)&&(__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) +#define SGEMM_DEFAULT_UNROLL_M 4 +#else #define SGEMM_DEFAULT_UNROLL_M 16 +#endif #define SGEMM_DEFAULT_UNROLL_N 4 #define DGEMM_DEFAULT_UNROLL_M 4 #define DGEMM_DEFAULT_UNROLL_N 4 +#if defined(__BYTE_ORDER__)&&(__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) +#define CGEMM_DEFAULT_UNROLL_M 2 +#else #define CGEMM_DEFAULT_UNROLL_M 8 +#endif #define CGEMM_DEFAULT_UNROLL_N 2 #define ZGEMM_DEFAULT_UNROLL_M 2 #define ZGEMM_DEFAULT_UNROLL_N 2 -- 2.7.4