#ifndef PARAM_H
#define PARAM_H
+#include "common.h"
+
+ #define SBGEMM_DEFAULT_UNROLL_N 4
+ #define SBGEMM_DEFAULT_UNROLL_M 8
+ #define SBGEMM_DEFAULT_UNROLL_MN 32
+ #define SBGEMM_DEFAULT_P 256
+ #define SBGEMM_DEFAULT_R 256
+ #define SBGEMM_DEFAULT_Q 256
#ifdef OPTERON
#define SNUMOPT 4
#define GEMM_DEFAULT_OFFSET_A 2688
#define GEMM_DEFAULT_OFFSET_B 3072
-#define GEMM_DEFAULT_ALIGN 0x03fffUL
+#define GEMM_DEFAULT_ALIGN (BLASLONG)0x03fffUL
+ #if defined(__BYTE_ORDER__)&&(__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
+ #define SGEMM_DEFAULT_UNROLL_M 4
+ #else
#define SGEMM_DEFAULT_UNROLL_M 16
+ #endif
#define SGEMM_DEFAULT_UNROLL_N 4
#define DGEMM_DEFAULT_UNROLL_M 4
#define DGEMM_DEFAULT_UNROLL_N 4
#define GEMM_DEFAULT_OFFSET_A 0
#define GEMM_DEFAULT_OFFSET_B 65536
- #define GEMM_DEFAULT_ALIGN (BLASLONG)0x0ffffUL
-#define GEMM_DEFAULT_ALIGN 0x0ffffUL
+
++#define GEMM_DEFAULT_ALIGN (BLASLONG)0x0ffffUL
+ #if defined(__32BIT__)
+ #warning using BINARY32==POWER6
+ #define SGEMM_DEFAULT_UNROLL_M 4
+ #define SGEMM_DEFAULT_UNROLL_N 4
+ #define DGEMM_DEFAULT_UNROLL_M 4
+ #define DGEMM_DEFAULT_UNROLL_N 4
+ #define CGEMM_DEFAULT_UNROLL_M 2
+ #define CGEMM_DEFAULT_UNROLL_N 4
+ #define ZGEMM_DEFAULT_UNROLL_M 2
+ #define ZGEMM_DEFAULT_UNROLL_N 4
+ #else
#define SGEMM_DEFAULT_UNROLL_M 16
#define SGEMM_DEFAULT_UNROLL_N 8
#define DGEMM_DEFAULT_UNROLL_M 16
#define GEMM_DEFAULT_OFFSET_A 0
#define GEMM_DEFAULT_OFFSET_B 0
-#define GEMM_DEFAULT_ALIGN 0x03fffUL
+#define GEMM_DEFAULT_ALIGN (BLASLONG)0x03fffUL
+ #ifdef HAVE_MSA
+ #define SGEMM_DEFAULT_UNROLL_M 8
+ #define SGEMM_DEFAULT_UNROLL_N 8
+
+ #define DGEMM_DEFAULT_UNROLL_M 8
+ #define DGEMM_DEFAULT_UNROLL_N 4
+
+ #define CGEMM_DEFAULT_UNROLL_M 8
+ #define CGEMM_DEFAULT_UNROLL_N 4
+
+ #define ZGEMM_DEFAULT_UNROLL_M 4
+ #define ZGEMM_DEFAULT_UNROLL_N 4
+ #else
#define SGEMM_DEFAULT_UNROLL_M 8
#define SGEMM_DEFAULT_UNROLL_N 4
#define GEMM_DEFAULT_OFFSET_A 0
#define GEMM_DEFAULT_OFFSET_B 0
-#define GEMM_DEFAULT_ALIGN 0x03fffUL
+#define GEMM_DEFAULT_ALIGN (BLASLONG)0x03fffUL
- #define SGEMM_DEFAULT_UNROLL_M 2
- #define SGEMM_DEFAULT_UNROLL_N 2
+ #define SGEMM_DEFAULT_UNROLL_M 8
+ #define SGEMM_DEFAULT_UNROLL_N 4
- #define DGEMM_DEFAULT_UNROLL_M 2
- #define DGEMM_DEFAULT_UNROLL_N 2
+ #define DGEMM_DEFAULT_UNROLL_M 4
+ #define DGEMM_DEFAULT_UNROLL_N 4
- #define CGEMM_DEFAULT_UNROLL_M 2
+ #define CGEMM_DEFAULT_UNROLL_M 4
#define CGEMM_DEFAULT_UNROLL_N 2
#define ZGEMM_DEFAULT_UNROLL_M 2
#define SYMV_P 16
#endif
- #ifdef ARMV7
- #define SNUMOPT 2
- #define DNUMOPT 2
-
+ #ifdef RISCV64_GENERIC
#define GEMM_DEFAULT_OFFSET_A 0
#define GEMM_DEFAULT_OFFSET_B 0
-#define GEMM_DEFAULT_ALIGN 0x03fffUL
+#define GEMM_DEFAULT_ALIGN (BLASLONG)0x03fffUL
- #define SGEMM_DEFAULT_UNROLL_M 4
- #define SGEMM_DEFAULT_UNROLL_N 4
+ #define SGEMM_DEFAULT_UNROLL_M 2
+ #define SGEMM_DEFAULT_UNROLL_N 2
- #define DGEMM_DEFAULT_UNROLL_M 4
- #define DGEMM_DEFAULT_UNROLL_N 4
+ #define DGEMM_DEFAULT_UNROLL_M 2
+ #define DGEMM_DEFAULT_UNROLL_N 2
#define CGEMM_DEFAULT_UNROLL_M 2
#define CGEMM_DEFAULT_UNROLL_N 2
#define GEMM_DEFAULT_OFFSET_A 0
#define GEMM_DEFAULT_OFFSET_B 0
-#define GEMM_DEFAULT_ALIGN 0x03fffUL
+#define GEMM_DEFAULT_ALIGN (BLASLONG)0x03fffUL
#define SGEMM_DEFAULT_UNROLL_M 4
- #define SGEMM_DEFAULT_UNROLL_N 2
+ #define SGEMM_DEFAULT_UNROLL_N 4
#define DGEMM_DEFAULT_UNROLL_M 4
- #define DGEMM_DEFAULT_UNROLL_N 2
+ #define DGEMM_DEFAULT_UNROLL_N 4
#define CGEMM_DEFAULT_UNROLL_M 2
#define CGEMM_DEFAULT_UNROLL_N 2
#define GEMM_DEFAULT_OFFSET_A 0
#define GEMM_DEFAULT_OFFSET_B 0
-#define GEMM_DEFAULT_ALIGN 0x03fffUL
+#define GEMM_DEFAULT_ALIGN (BLASLONG)0x03fffUL
- #define SYMV_P 16
-
- // Darwin / Cross
- #if defined(OS_DARWIN) && defined(CROSS)
-
- #define SGEMM_DEFAULT_UNROLL_M 2
+ #define SGEMM_DEFAULT_UNROLL_M 4
#define SGEMM_DEFAULT_UNROLL_N 2
- #define DGEMM_DEFAULT_UNROLL_M 2
+ #define DGEMM_DEFAULT_UNROLL_M 4
#define DGEMM_DEFAULT_UNROLL_N 2
#define CGEMM_DEFAULT_UNROLL_M 2