#define SYMV_P 8
-#define SWITCH_RATIO 4
+#define SWITCH_RATIO 16
#ifdef ARCH_X86
#define SYMV_P 8
#define SWITCH_RATIO 32
+#define GEMM_PREFERED_SIZE 16
#ifdef ARCH_X86
#define SYMV_P 8
#define SWITCH_RATIO 32
+#define GEMM_PREFERED_SIZE 32
+#define USE_SGEMM_KERNEL_DIRECT 1
#ifdef ARCH_X86
#define ZGEMM_DEFAULT_UNROLL_M 2
#define ZGEMM_DEFAULT_UNROLL_N 2
-#ifdef OS_LINUX
+#if defined(OS_LINUX) || defined(OS_DARWIN) || defined(OS_FREEBSD)
#if L2_SIZE == 1024976
#define SGEMM_DEFAULT_P 320
#define DGEMM_DEFAULT_P 256
#endif
+#if defined(POWER9)
+
+#define SNUMOPT 16
+#define DNUMOPT 8
+
+#define GEMM_DEFAULT_OFFSET_A 0
+#define GEMM_DEFAULT_OFFSET_B 65536
+#define GEMM_DEFAULT_ALIGN 0x0ffffUL
+
+#define SGEMM_DEFAULT_UNROLL_M 16
+#define SGEMM_DEFAULT_UNROLL_N 8
+#define DGEMM_DEFAULT_UNROLL_M 16
+#define DGEMM_DEFAULT_UNROLL_N 4
+#define CGEMM_DEFAULT_UNROLL_M 8
+#define CGEMM_DEFAULT_UNROLL_N 4
+#define ZGEMM_DEFAULT_UNROLL_M 8
+#define ZGEMM_DEFAULT_UNROLL_N 2
+
+#define SGEMM_DEFAULT_P 832
+#define DGEMM_DEFAULT_P 128
+#define CGEMM_DEFAULT_P 512
+#define ZGEMM_DEFAULT_P 256
+
+#define SGEMM_DEFAULT_Q 1026
+#define DGEMM_DEFAULT_Q 384
+#define CGEMM_DEFAULT_Q 1026
+#define ZGEMM_DEFAULT_Q 1026
+
+#define SYMV_P 8
+
+#endif
#if defined(SPARC) && defined(V7)
#define SYMV_P 16
#endif
+// Common ARMv8 parameters
+#if defined(ARMV8)
-#if defined(CORTEXA57)
#define SNUMOPT 2
#define DNUMOPT 2
#define GEMM_DEFAULT_OFFSET_B 0
#define GEMM_DEFAULT_ALIGN 0x03fffUL
-#define SGEMM_DEFAULT_UNROLL_M 16
-#define SGEMM_DEFAULT_UNROLL_N 4
-
-#define DGEMM_DEFAULT_UNROLL_M 8
-#define DGEMM_DEFAULT_UNROLL_N 4
-
-#define CGEMM_DEFAULT_UNROLL_M 8
-#define CGEMM_DEFAULT_UNROLL_N 4
-
-#define ZGEMM_DEFAULT_UNROLL_M 4
-#define ZGEMM_DEFAULT_UNROLL_N 4
-
-#define SGEMM_DEFAULT_P 512
-#define DGEMM_DEFAULT_P 256
-#define CGEMM_DEFAULT_P 256
-#define ZGEMM_DEFAULT_P 128
-
-#define SGEMM_DEFAULT_Q 1024
-#define DGEMM_DEFAULT_Q 512
-#define CGEMM_DEFAULT_Q 512
-#define ZGEMM_DEFAULT_Q 512
-
-#define SGEMM_DEFAULT_R 4096
-#define DGEMM_DEFAULT_R 4096
-#define CGEMM_DEFAULT_R 4096
-#define ZGEMM_DEFAULT_R 2048
-
-
#define SYMV_P 16
-#endif
-
-#if defined(ARMV8)
+// Darwin / Cross
#if defined(OS_DARWIN) && defined(CROSS)
-#define SNUMOPT 2
-#define DNUMOPT 2
-
-#define GEMM_DEFAULT_OFFSET_A 0
-#define GEMM_DEFAULT_OFFSET_B 0
-#define GEMM_DEFAULT_ALIGN 0x03fffUL
#define SGEMM_DEFAULT_UNROLL_M 2
#define SGEMM_DEFAULT_UNROLL_N 2
#define CGEMM_DEFAULT_R 4096
#define ZGEMM_DEFAULT_R 4096
-#define SYMV_P 16
-#else
+#else // Linux / Native
-#define SNUMOPT 2
-#define DNUMOPT 2
-
-#define GEMM_DEFAULT_OFFSET_A 0
-#define GEMM_DEFAULT_OFFSET_B 0
-#define GEMM_DEFAULT_ALIGN 0x03fffUL
+#if defined(CORTEXA53) || defined(CORTEXA57) || \
+ defined(CORTEXA72) || defined(CORTEXA73) || \
+ defined(FALKOR) || defined(TSV110)
#define SGEMM_DEFAULT_UNROLL_M 16
#define SGEMM_DEFAULT_UNROLL_N 4
#define ZGEMM_DEFAULT_UNROLL_M 4
#define ZGEMM_DEFAULT_UNROLL_N 4
-#define SGEMM_DEFAULT_P sgemm_p
-#define DGEMM_DEFAULT_P dgemm_p
-#define CGEMM_DEFAULT_P cgemm_p
-#define ZGEMM_DEFAULT_P zgemm_p
-
-#define SGEMM_DEFAULT_Q sgemm_q
-#define DGEMM_DEFAULT_Q dgemm_q
-#define CGEMM_DEFAULT_Q cgemm_q
-#define ZGEMM_DEFAULT_Q zgemm_q
-
-#define SGEMM_DEFAULT_R sgemm_r
-#define DGEMM_DEFAULT_R dgemm_r
-#define CGEMM_DEFAULT_R cgemm_r
-#define ZGEMM_DEFAULT_R zgemm_r
-
-#define SYMV_P 16
-#endif
+#define SGEMM_DEFAULT_P 512
+#define DGEMM_DEFAULT_P 256
+#define CGEMM_DEFAULT_P 256
+#define ZGEMM_DEFAULT_P 128
-#endif
+#define SGEMM_DEFAULT_Q 1024
+#define DGEMM_DEFAULT_Q 512
+#define CGEMM_DEFAULT_Q 512
+#define ZGEMM_DEFAULT_Q 512
-#if defined(THUNDERX)
-#define SNUMOPT 2
-#define DNUMOPT 2
+#define SGEMM_DEFAULT_R 4096
+#define DGEMM_DEFAULT_R 4096
+#define CGEMM_DEFAULT_R 4096
+#define ZGEMM_DEFAULT_R 2048
-#define GEMM_DEFAULT_OFFSET_A 0
-#define GEMM_DEFAULT_OFFSET_B 0
-#define GEMM_DEFAULT_ALIGN 0x03fffUL
+#elif defined(THUNDERX)
#define SGEMM_DEFAULT_UNROLL_M 4
#define SGEMM_DEFAULT_UNROLL_N 4
#define CGEMM_DEFAULT_R 4096
#define ZGEMM_DEFAULT_R 4096
+#elif defined(THUNDERX2T99)
-#define SYMV_P 16
-#endif
+#define SGEMM_DEFAULT_UNROLL_M 16
+#define SGEMM_DEFAULT_UNROLL_N 4
-#if defined(THUNDERX2T99) || defined(VULCAN)
-#define SNUMOPT 2
-#define DNUMOPT 2
+#define DGEMM_DEFAULT_UNROLL_M 8
+#define DGEMM_DEFAULT_UNROLL_N 4
-#define GEMM_DEFAULT_OFFSET_A 0
-#define GEMM_DEFAULT_OFFSET_B 0
-#define GEMM_DEFAULT_ALIGN 0x03fffUL
+#define CGEMM_DEFAULT_UNROLL_M 8
+#define CGEMM_DEFAULT_UNROLL_N 4
+
+#define ZGEMM_DEFAULT_UNROLL_M 4
+#define ZGEMM_DEFAULT_UNROLL_N 4
+
+#define SGEMM_DEFAULT_P 128
+#define DGEMM_DEFAULT_P 160
+#define CGEMM_DEFAULT_P 128
+#define ZGEMM_DEFAULT_P 128
+
+#define SGEMM_DEFAULT_Q 352
+#define DGEMM_DEFAULT_Q 128
+#define CGEMM_DEFAULT_Q 224
+#define ZGEMM_DEFAULT_Q 112
+
+#define SGEMM_DEFAULT_R 4096
+#define DGEMM_DEFAULT_R 4096
+#define CGEMM_DEFAULT_R 4096
+#define ZGEMM_DEFAULT_R 4096
+
+#else // Other/undetected ARMv8 cores
#define SGEMM_DEFAULT_UNROLL_M 16
#define SGEMM_DEFAULT_UNROLL_N 4
#define ZGEMM_DEFAULT_UNROLL_M 4
#define ZGEMM_DEFAULT_UNROLL_N 4
-#define SGEMM_DEFAULT_P sgemm_p
-#define DGEMM_DEFAULT_P dgemm_p
-#define CGEMM_DEFAULT_P cgemm_p
-#define ZGEMM_DEFAULT_P zgemm_p
+#define SGEMM_DEFAULT_P 128
+#define DGEMM_DEFAULT_P 160
+#define CGEMM_DEFAULT_P 128
+#define ZGEMM_DEFAULT_P 128
-#define SGEMM_DEFAULT_Q sgemm_q
-#define DGEMM_DEFAULT_Q dgemm_q
-#define CGEMM_DEFAULT_Q cgemm_q
-#define ZGEMM_DEFAULT_Q zgemm_q
+#define SGEMM_DEFAULT_Q 352
+#define DGEMM_DEFAULT_Q 128
+#define CGEMM_DEFAULT_Q 224
+#define ZGEMM_DEFAULT_Q 112
-#define SGEMM_DEFAULT_R sgemm_r
-#define DGEMM_DEFAULT_R dgemm_r
-#define CGEMM_DEFAULT_R cgemm_r
-#define ZGEMM_DEFAULT_R zgemm_r
+#define SGEMM_DEFAULT_R 4096
+#define DGEMM_DEFAULT_R 4096
+#define CGEMM_DEFAULT_R 4096
+#define ZGEMM_DEFAULT_R 4096
-#define SYMV_P 16
-#endif
+#endif // Cores
+
+#endif // Linux / Darwin
+
+#endif // ARMv8
#if defined(ARMV5)
#define SNUMOPT 2
#endif
+#if defined(Z14)
+#define SNUMOPT 2
+#define DNUMOPT 2
+
+#define GEMM_DEFAULT_OFFSET_A 0
+#define GEMM_DEFAULT_OFFSET_B 0
+#define GEMM_DEFAULT_ALIGN 0x03fffUL
+
+#define SGEMM_DEFAULT_UNROLL_M 8
+#define SGEMM_DEFAULT_UNROLL_N 4
+
+#define DGEMM_DEFAULT_UNROLL_M 8
+#define DGEMM_DEFAULT_UNROLL_N 4
+
+#define CGEMM_DEFAULT_UNROLL_M 4
+#define CGEMM_DEFAULT_UNROLL_N 4
+
+#define ZGEMM_DEFAULT_UNROLL_M 4
+#define ZGEMM_DEFAULT_UNROLL_N 4
+
+#define SGEMM_DEFAULT_P 456
+#define DGEMM_DEFAULT_P 320
+#define CGEMM_DEFAULT_P 480
+#define ZGEMM_DEFAULT_P 224
+
+#define SGEMM_DEFAULT_Q 488
+#define DGEMM_DEFAULT_Q 384
+#define CGEMM_DEFAULT_Q 128
+#define ZGEMM_DEFAULT_Q 352
+
+#define SGEMM_DEFAULT_R 8192
+#define DGEMM_DEFAULT_R 4096
+#define CGEMM_DEFAULT_R 4096
+#define ZGEMM_DEFAULT_R 2048
+
+
+#define SYMV_P 16
+#endif
+
+
#ifdef GENERIC