ifeq ($(TARGET), COOPERLAKE)
GETARCH_FLAGS := -DFORCE_NEHALEM
endif
+ifeq ($(TARGET), SAPPHIRERAPIDS)
+GETARCH_FLAGS := -DFORCE_NEHALEM
+endif
ifeq ($(TARGET), SANDYBRIDGE)
GETARCH_FLAGS := -DFORCE_NEHALEM
endif
ifeq ($(TARGET_CORE), COOPERLAKE)
GETARCH_FLAGS := -DFORCE_NEHALEM
endif
+ifeq ($(TARGET_CORE), SAPPHIRERAPIDS)
+GETARCH_FLAGS := -DFORCE_NEHALEM
+endif
ifeq ($(TARGET_CORE), SANDYBRIDGE)
GETARCH_FLAGS := -DFORCE_NEHALEM
endif
endif
endif
+ifeq ($(CORE), SAPPHIRERAPIDS)
+ifndef NO_AVX512
+ifeq ($(C_COMPILER), GCC)
+# sapphire rapids support was added in 11
+ifeq ($(GCCVERSIONGTEQ11), 1)
+CCOMMON_OPT += -march=sapphirerapids
+ifneq ($(F_COMPILER), NAG)
+FCOMMON_OPT += -march=sapphirerapids
+endif
+endif
+endif
+ifeq ($(OSNAME), CYGWIN_NT)
+CCOMMON_OPT += -fno-asynchronous-unwind-tables
+FCOMMON_OPT += -fno-asynchronous-unwind-tables
+endif
+ifeq ($(OSNAME), WINNT)
+ifeq ($(C_COMPILER), GCC)
+CCOMMON_OPT += -fno-asynchronous-unwind-tables
+FCOMMON_OPT += -fno-asynchronous-unwind-tables
+endif
+endif
+endif
+endif
+
ifdef HAVE_AVX2
ifndef NO_AVX2
ifeq ($(C_COMPILER), GCC)
SKYLAKEX
ATOM
COOPERLAKE
+SAPPHIRERAPIDS
b)AMD CPU:
ATHLON
endif ()
endif ()
+if (${CORE} STREQUAL SAPPHIRERAPIDS)
+ if (NOT DYNAMIC_ARCH)
+ if (NOT NO_AVX512)
+ execute_process(COMMAND ${CMAKE_C_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_VERSION)
+ if (${GCC_VERSION} VERSION_GREATER 11.0 OR ${GCC_VERSION} VERSION_EQUAL 11.0)
+ set (CCOMMON_OPT "${CCOMMON_OPT} -march=sapphirerapids")
+ else ()
+ set (CCOMMON_OPT "${CCOMMON_OPT} -march=skylake-avx512")
+ endif()
+ endif ()
+ endif ()
+endif ()
+
if (NOT DYNAMIC_ARCH)
if (HAVE_AVX2)
set (CCOMMON_OPT "${CCOMMON_OPT} -mavx2")
if (DEFINED BINARY AND DEFINED TARGET AND BINARY EQUAL 32)
message(STATUS "Compiling a ${BINARY}-bit binary.")
set(NO_AVX 1)
- if (${TARGET} STREQUAL "HASWELL" OR ${TARGET} STREQUAL "SANDYBRIDGE" OR ${TARGET} STREQUAL "SKYLAKEX" OR ${TARGET} STREQUAL "COOPERLAKE")
+ if (${TARGET} STREQUAL "HASWELL" OR ${TARGET} STREQUAL "SANDYBRIDGE" OR ${TARGET} STREQUAL "SKYLAKEX" OR ${TARGET} STREQUAL "COOPERLAKE" OR ${TARGET} STREQUAL "SAPPHIRERAPIDS")
set(TARGET "NEHALEM")
endif ()
if (${TARGET} STREQUAL "BULLDOZER" OR ${TARGET} STREQUAL "PILEDRIVER" OR ${TARGET} STREQUAL "ZEN")
endif()
endif()
endif()
+ if (${TARGET} STREQUAL SAPPHIRERAPIDS AND NOT NO_AVX512)
+ if (${CMAKE_C_COMPILER_ID} STREQUAL "GNU")
+ execute_process(COMMAND ${CMAKE_C_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_VERSION)
+ if (${CMAKE_C_COMPILER_VERSION} VERSION_GREATER 11.0)
+ set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=sapphirerapids")
+ else()
+ set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=skylake-avx512")
+ endif()
+ elseif (${CMAKE_C_COMPILER_ID} STREQUAL "Clang" OR ${CMAKE_C_COMPILER_ID} STREQUAL "AppleClang")
+ if (${CMAKE_C_COMPILER_VERSION} VERSION_GREATER 12.0)
+ set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=sapphirerapids")
+ else()
+ set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=skylake-avx512")
+ endif()
+ endif()
+ endif()
if (${TARGET} STREQUAL SKYLAKEX AND NOT NO_AVX512)
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=skylake-avx512")
endif()
#define CORE_SKYLAKEX 28
#define CORE_DHYANA 29
#define CORE_COOPERLAKE 30
+#define CORE_SAPPHIRERAPIDS 31
#define HAVE_SSE (1 << 0)
#define HAVE_SSE2 (1 << 1)
#define HAVE_AVX512VL (1 << 21)
#define HAVE_AVX2 (1 << 22)
#define HAVE_AVX512BF16 (1 << 23)
+#define HAVE_AMXBF16 (1 << 24)
#define CACHE_INFO_L1_I 1
#define CACHE_INFO_L1_D 2
#define CPUTYPE_SKYLAKEX 52
#define CPUTYPE_DHYANA 53
#define CPUTYPE_COOPERLAKE 54
+#define CPUTYPE_SAPPHIRERAPIDS 55
#define CPUTYPE_HYGON_UNKNOWN 99
#endif
}
+#define BIT_AMX_TILE 0x01000000
+#define BIT_AMX_BF16 0x00400000
+#define BIT_AMX_ENBD 0x00060000
+
+int support_amx_bf16() {
+#if !defined(NO_AVX) && !defined(NO_AVX512)
+ int eax, ebx, ecx, edx;
+ int ret=0;
+
+ if (!support_avx512())
+ return 0;
+ // CPUID.7.0:EDX indicates AMX support
+ cpuid_count(7, 0, &eax, &ebx, &ecx, &edx);
+ if ((edx & BIT_AMX_TILE) && (edx & BIT_AMX_BF16)) {
+ // CPUID.D.0:EAX[17:18] indicates AMX enabled
+ cpuid_count(0xd, 0, &eax, &ebx, &ecx, &edx);
+ if ((eax & BIT_AMX_ENBD) == BIT_AMX_ENBD)
+ ret = 1;
+ }
+ return ret;
+#else
+ return 0;
+#endif
+}
+
int get_vendor(void){
int eax, ebx, ecx, edx;
char vendor[13];
if (support_avx2()) feature |= HAVE_AVX2;
if (support_avx512()) feature |= HAVE_AVX512VL;
if (support_avx512_bf16()) feature |= HAVE_AVX512BF16;
+ if (support_amx_bf16()) feature |= HAVE_AMXBF16;
if ((ecx & (1 << 12)) != 0) feature |= HAVE_FMA3;
#endif
if (features & HAVE_AVX2 ) printf("#define HAVE_AVX2\n");
if (features & HAVE_AVX512VL ) printf("#define HAVE_AVX512VL\n");
if (features & HAVE_AVX512BF16 ) printf("#define HAVE_AVX512BF16\n");
+ if (features & HAVE_AMXBF16 ) printf("#define HAVE_AMXBF16\n");
if (features & HAVE_3DNOWEX) printf("#define HAVE_3DNOWEX\n");
if (features & HAVE_3DNOW) printf("#define HAVE_3DNOW\n");
if (features & HAVE_FMA4 ) printf("#define HAVE_FMA4\n");
if (features & HAVE_AVX2 ) printf("HAVE_AVX2=1\n");
if (features & HAVE_AVX512VL ) printf("HAVE_AVX512VL=1\n");
if (features & HAVE_AVX512BF16 ) printf("HAVE_AVX512BF16=1\n");
+ if (features & HAVE_AMXBF16 ) printf("HAVE_AMXBF16=1\n");
if (features & HAVE_3DNOWEX) printf("HAVE_3DNOWEX=1\n");
if (features & HAVE_3DNOW) printf("HAVE_3DNOW=1\n");
if (features & HAVE_FMA4 ) printf("HAVE_FMA4=1\n");
#else
for(jjs = js; jjs < js + min_j; jjs += min_jj){
min_jj = min_j + js - jjs;
-#if defined(SKYLAKEX) || defined(COOPERLAKE)
+#if defined(SKYLAKEX) || defined(COOPERLAKE) || defined(SAPPHIRERAPIDS)
/* the current AVX512 s/d/c/z GEMM kernel requires n>=6*GEMM_UNROLL_N to achieve best performance */
if (min_jj >= 6*GEMM_UNROLL_N) min_jj = 6*GEMM_UNROLL_N;
#else
/* Split local region of B into parts */
for(jjs = js; jjs < MIN(n_to, js + div_n); jjs += min_jj){
min_jj = MIN(n_to, js + div_n) - jjs;
-#if defined(SKYLAKEX) || defined(COOPERLAKE)
+#if defined(SKYLAKEX) || defined(COOPERLAKE) || defined(SAPPHIRERAPIDS)
/* the current AVX512 s/d/c/z GEMM kernel requires n>=6*GEMM_UNROLL_N to achieve the best performance */
if (min_jj >= 6*GEMM_UNROLL_N) min_jj = 6*GEMM_UNROLL_N;
#else
for(jjs = js; jjs < js + min_j; jjs += min_jj){
min_jj = min_j + js - jjs;
-#if defined(SKYLAKEX) || defined(COOPERLAKE)
+#if defined(SKYLAKEX) || defined(COOPERLAKE) || defined(SAPPHIRERAPIDS)
/* the current AVX512 s/d/c/z GEMM kernel requires n>=6*GEMM_UNROLL_N to achieve the best performance */
if (min_jj >= 6*GEMM_UNROLL_N) min_jj = 6*GEMM_UNROLL_N;
#else
for(jjs = js; jjs < js + min_j; jjs += min_jj){
min_jj = min_j + js - jjs;
-#if defined(SKYLAKEX) || defined(COOPERLAKE)
+#if defined(SKYLAKEX) || defined(COOPERLAKE) || defined(SAPPHIRERAPIDS)
/* the current AVX512 s/d/c/z GEMM kernel requires n>=6*GEMM_UNROLL_N to achieve the best performance */
if (min_jj >= 6*GEMM_UNROLL_N) min_jj = 6*GEMM_UNROLL_N;
#else
for(jjs = js; jjs < js + min_j; jjs += min_jj){
min_jj = min_j + js - jjs;
-#if defined(SKYLAKEX) || defined(COOPERLAKE)
+#if defined(SKYLAKEX) || defined(COOPERLAKE) || defined(SAPPHIRERAPIDS)
/* the current AVX512 s/d/c/z GEMM kernel requires n>=6*GEMM_UNROLL_N to achieve the best performance */
if (min_jj >= 6*GEMM_UNROLL_N) min_jj = 6*GEMM_UNROLL_N;
#else
for(jjs = js; jjs < js + min_j; jjs += min_jj){
min_jj = min_j + js - jjs;
-#if defined(SKYLAKEX) || defined(COOPERLAKE)
+#if defined(SKYLAKEX) || defined(COOPERLAKE) || defined(SAPPHIRERAPIDS)
/* the current AVX512 s/d/c/z GEMM kernel requires n>=6*GEMM_UNROLL_N to achieve the best performance */
if (min_jj >= 6*GEMM_UNROLL_N) min_jj = 6*GEMM_UNROLL_N;
#else
for(jjs = 0; jjs < ls - js; jjs += min_jj){
min_jj = ls - js - jjs;
-#if defined(SKYLAKEX) || defined(COOPERLAKE)
+#if defined(SKYLAKEX) || defined(COOPERLAKE) || defined(SAPPHIRERAPIDS)
/* the current AVX512 s/d/c/z GEMM kernel requires n>=6*GEMM_UNROLL_N to achieve the best performance */
if (min_jj >= 6*GEMM_UNROLL_N) min_jj = 6*GEMM_UNROLL_N;
#else
for(jjs = 0; jjs < min_l; jjs += min_jj){
min_jj = min_l - jjs;
-#if defined(SKYLAKEX) || defined(COOPERLAKE)
+#if defined(SKYLAKEX) || defined(COOPERLAKE) || defined(SAPPHIRERAPIDS)
/* the current AVX512 s/d/c/z GEMM kernel requires n>=6*GEMM_UNROLL_N to achieve the best performance */
if (min_jj >= 6*GEMM_UNROLL_N) min_jj = 6*GEMM_UNROLL_N;
#else
for(jjs = js; jjs < js + min_j; jjs += min_jj){
min_jj = min_j + js - jjs;
-#if defined(SKYLAKEX) || defined(COOPERLAKE)
+#if defined(SKYLAKEX) || defined(COOPERLAKE) || defined(SAPPHIRERAPIDS)
/* the current AVX512 s/d/c/z GEMM kernel requires n>=6*GEMM_UNROLL_N to achieve the best performance */
if (min_jj >= 6*GEMM_UNROLL_N) min_jj = 6*GEMM_UNROLL_N;
#else
for(jjs = 0; jjs < min_l; jjs += min_jj){
min_jj = min_l - jjs;
-#if defined(SKYLAKEX) || defined(COOPERLAKE)
+#if defined(SKYLAKEX) || defined(COOPERLAKE) || defined(SAPPHIRERAPIDS)
/* the current AVX512 s/d/c/z GEMM kernel requires n>=6*GEMM_UNROLL_N to achieve the best performance */
if (min_jj >= 6*GEMM_UNROLL_N) min_jj = 6*GEMM_UNROLL_N;
#else
for(jjs = 0; jjs < js - ls - min_l; jjs += min_jj){
min_jj = js - ls - min_l - jjs;
-#if defined(SKYLAKEX) || defined(COOPERLAKE)
+#if defined(SKYLAKEX) || defined(COOPERLAKE) || defined(SAPPHIRERAPIDS)
/* the current AVX512 s/d/c/z GEMM kernel requires n>=6*GEMM_UNROLL_N to achieve the best performance */
if (min_jj >= 6*GEMM_UNROLL_N) min_jj = 6*GEMM_UNROLL_N;
#else
for(jjs = js; jjs < js + min_j; jjs += min_jj){
min_jj = min_j + js - jjs;
-#if defined(SKYLAKEX) || defined(COOPERLAKE)
+#if defined(SKYLAKEX) || defined(COOPERLAKE) || defined(SAPPHIRERAPIDS)
/* the current AVX512 s/d/c/z GEMM kernel requires n>=6*GEMM_UNROLL_N to achieve the best performance */
if (min_jj >= 6*GEMM_UNROLL_N) min_jj = 6*GEMM_UNROLL_N;
#else
defined(CORE_PRESCOTT) || defined(CORE_CORE2) || defined(PENRYN) || defined(DUNNINGTON) || \
defined(CORE_NEHALEM) || defined(CORE_SANDYBRIDGE) || defined(ATOM) || defined(GENERIC) || \
defined(PILEDRIVER) || defined(HASWELL) || defined(STEAMROLLER) || defined(EXCAVATOR) || \
- defined(ZEN) || defined(SKYLAKEX) || defined(COOPERLAKE)
+ defined(ZEN) || defined(SKYLAKEX) || defined(COOPERLAKE) || defined(SAPPHIRERAPIDS)
cpuid(0x80000006, &eax, &ebx, &ecx, &edx);
int factor;
#if defined(BULLDOZER) || defined(PILEDRIVER) || defined(SANDYBRIDGE) || defined(NEHALEM) || \
defined(HASWELL) || defined(STEAMROLLER) || defined(EXCAVATOR) || defined(ZEN) || \
- defined(SKYLAKEX) || defined(COOPERLAKE)
+ defined(SKYLAKEX) || defined(COOPERLAKE) || defined(SAPPHIRERAPIDS)
int size = 16;
#else
int size = get_L2_size();
#endif
#endif
+#ifdef FORCE_SAPPHIRERAPIDS
+#ifdef NO_AVX512
+#define FORCE
+#define FORCE_INTEL
+#define ARCHITECTURE "X86"
+#define SUBARCHITECTURE "HASWELL"
+#define ARCHCONFIG "-DHASWELL " \
+ "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 " \
+ "-DL2_SIZE=262144 -DL2_LINESIZE=64 " \
+ "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \
+ "-DHAVE_CMOV -DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 -DHAVE_SSE3 -DHAVE_SSSE3 -DHAVE_SSE4_1 -DHAVE_SSE4_2 -DHAVE_AVX " \
+ "-DHAVE_AVX2 -DHAVE_FMA3 -DFMA3"
+#define LIBNAME "haswell"
+#define CORENAME "HASWELL"
+#else
+#define FORCE
+#define FORCE_INTEL
+#define ARCHITECTURE "X86"
+#define SUBARCHITECTURE "SAPPHIRERAPIDS"
+#define ARCHCONFIG "-DSAPPHIRERAPIDS " \
+ "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 " \
+ "-DL2_SIZE=262144 -DL2_LINESIZE=64 " \
+ "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \
+ "-DHAVE_CMOV -DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 -DHAVE_SSE3 -DHAVE_SSSE3 -DHAVE_SSE4_1 -DHAVE_SSE4_2 -DHAVE_AVX " \
+ "-DHAVE_AVX2 -DHAVE_FMA3 -DFMA3 -DHAVE_AVX512VL -DHAVE_AVX512BF16 -march=sapphirerapids"
+#define LIBNAME "sapphirerapids"
+#define CORENAME "SAPPHIRERAPIDS"
+#endif
+#endif
+
#ifdef FORCE_ATOM
#define FORCE
#define FORCE_INTEL
# Makefile.L3
set(USE_TRMM false)
string(TOUPPER ${TARGET_CORE} UC_TARGET_CORE)
- if (ARM OR ARM64 OR (UC_TARGET_CORE MATCHES LONGSOON3B) OR (UC_TARGET_CORE MATCHES GENERIC) OR (UC_TARGET_CORE MATCHES HASWELL) OR (UC_TARGET_CORE MATCHES ZEN) OR (UC_TARGET_CORE MATCHES SKYLAKEX) OR (UC_TARGET_CORE MATCHES COOPERLAKE))
+ if (ARM OR ARM64 OR (UC_TARGET_CORE MATCHES LONGSOON3B) OR (UC_TARGET_CORE MATCHES GENERIC) OR (UC_TARGET_CORE MATCHES HASWELL) OR (UC_TARGET_CORE MATCHES ZEN) OR (UC_TARGET_CORE MATCHES SKYLAKEX) OR (UC_TARGET_CORE MATCHES COOPERLAKE) OR (UC_TARGET_CORE MATCHES SAPPHIRERAPIDS))
set(USE_TRMM true)
endif ()
if (ZARCH OR (UC_TARGET_CORE MATCHES POWER8) OR (UC_TARGET_CORE MATCHES POWER9) OR (UC_TARGET_CORE MATCHES POWER10))
endif
ifdef TARGET_CORE
-ifeq ($(TARGET_CORE), COOPERLAKE)
+ifeq ($(TARGET_CORE), SAPPHIRERAPIDS)
+ override CFLAGS += -DBUILD_KERNEL -DTABLE_NAME=gotoblas_$(TARGET_CORE)
+ ifeq ($(GCCVERSIONGTEQ10), 1)
+ override CFLAGS += -march=sapphirerapids
+ else
+ override CFLAGS += -march=skylake-avx512 -mavx512f
+ endif
+ ifeq ($(OSNAME), CYGWIN_NT)
+ override CFLAGS += -fno-asynchronous-unwind-tables
+ endif
+ ifeq ($(OSNAME), WINNT)
+ ifeq ($(C_COMPILER), GCC)
+ override CFLAGS += -fno-asynchronous-unwind-tables
+ endif
+ endif
+else ifeq ($(TARGET_CORE), COOPERLAKE)
override CFLAGS += -DBUILD_KERNEL -DTABLE_NAME=gotoblas_$(TARGET_CORE)
ifeq ($(GCCVERSIONGTEQ10), 1)
override CFLAGS += -march=cooperlake
USE_TRMM = 1
endif
+ifeq ($(CORE), SAPPHIRERAPIDS)
+USE_TRMM = 1
+endif
+
ifeq ($(CORE), ZEN)
USE_TRMM = 1
endif
#endif
#endif
-#if defined(SKYLAKEX) || defined(COOPERLAKE)
+#if defined(SKYLAKEX) || defined(COOPERLAKE) || defined(SAPPHIRERAPIDS)
#ifdef DEBUG
fprintf(stderr, "SkylakeX\n");
#define PREFETCHSIZE (8 * 21 + 4)
#endif
-#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined (SKYLAKEX) || defined (COOPERLAKE)
+#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined (SKYLAKEX) || defined (COOPERLAKE) || defined (SAPPHIRERAPIDS)
#define PREFETCH prefetcht0
#define PREFETCHSIZE (8 * 21 + 4)
#endif
#define PREFETCHSIZE (8 * 21 + 4)
#endif
-#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined (SKYLAKEX) || defined (COOPERLAKE)
+#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined (SKYLAKEX) || defined (COOPERLAKE) || defined (SAPPHIRERAPIDS)
#define PREFETCH prefetcht0
#define PREFETCHSIZE (8 * 21 + 4)
#endif
#define PREFETCHSIZE (8 * 21 + 4)
#endif
-#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined (SKYLAKEX) || defined (COOPERLAKE)
+#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined (SKYLAKEX) || defined (COOPERLAKE) || defined (SAPPHIRERAPIDS)
#define PREFETCH prefetcht0
#define PREFETCHSIZE (8 * 21 + 4)
#endif
#define PREFETCHSIZE (8 * 21 + 4)
#endif
-#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined (SKYLAKEX) || defined (COOPERLAKE)
+#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined (SKYLAKEX) || defined (COOPERLAKE) || defined (SAPPHIRERAPIDS)
#define PREFETCH prefetcht0
#define PREFETCHSIZE (8 * 21 + 4)
#endif
#define PREFETCHSIZE (8 * 21 + 4)
#endif
-#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined (SKYLAKEX) || defined (COOPERLAKE)
+#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined (SKYLAKEX) || defined (COOPERLAKE) || defined (SAPPHIRERAPIDS)
#define PREFETCH prefetcht0
#define PREFETCHSIZE (8 * 21 + 4)
#endif
#define PREFETCHSIZE (8 * 21 + 4)
#endif
-#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined (SKYLAKEX) || defined (COOPERLAKE)
+#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined (SKYLAKEX) || defined (COOPERLAKE) || defined (SAPPHIRERAPIDS)
#define PREFETCH prefetcht0
#define PREFETCHSIZE (8 * 21 + 4)
#endif
#define PREFETCHSIZE 84
#endif
-#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined (SKYLAKEX) || defined (COOPERLAKE)
+#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined (SKYLAKEX) || defined (COOPERLAKE) || defined (SAPPHIRERAPIDS)
#define PREFETCH prefetcht1
#define PREFETCHSIZE 84
#endif
#define PREFETCHSIZE 84
#endif
-#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined (SKYLAKEX) || defined (COOPERLAKE)
+#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined (SKYLAKEX) || defined (COOPERLAKE) || defined (SAPPHIRERAPIDS)
#define PREFETCH prefetcht1
#define PREFETCHSIZE 84
#endif
#define PREFETCHSIZE 84
#endif
-#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined (SKYLAKEX) || defined (COOPERLAKE)
+#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined (SKYLAKEX) || defined (COOPERLAKE) || defined (SAPPHIRERAPIDS)
#define PREFETCH prefetcht1
#define PREFETCHSIZE 84
#endif
#define PREFETCHSIZE 84
#endif
-#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined (SKYLAKEX) || defined (COOPERLAKE)
+#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined (SKYLAKEX) || defined (COOPERLAKE) || defined (SAPPHIRERAPIDS)
#define PREFETCH prefetcht1
#define PREFETCHSIZE 84
#endif
#define PREFETCHSIZE 84
#endif
-#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined (SKYLAKEX) || defined (COOPERLAKE)
+#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined (SKYLAKEX) || defined (COOPERLAKE) || defined (SAPPHIRERAPIDS)
#define PREFETCH prefetcht1
#define PREFETCHSIZE 84
#endif
--- /dev/null
+include $(KERNELDIR)/KERNEL.COOPERLAKE
#include "caxpy_microk_steamroller-2.c"
#elif defined(BULLDOZER)
#include "caxpy_microk_bulldozer-2.c"
-#elif defined(HASWELL) || defined(ZEN) || defined(SKYLAKEX) || defined(COOPERLAKE)
+#elif defined(HASWELL) || defined(ZEN) || defined(SKYLAKEX) || defined(COOPERLAKE) || defined(SAPPHIRERAPIDS)
#include "caxpy_microk_haswell-2.c"
#elif defined(SANDYBRIDGE)
#include "caxpy_microk_sandy-2.c"
#include "cdot_microk_bulldozer-2.c"
#elif defined(STEAMROLLER) || defined(PILEDRIVER) || defined(EXCAVATOR)
#include "cdot_microk_steamroller-2.c"
-#elif defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX) || defined (COOPERLAKE)
+#elif defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX) || defined (COOPERLAKE) || defined (SAPPHIRERAPIDS)
#include "cdot_microk_haswell-2.c"
#elif defined(SANDYBRIDGE)
#include "cdot_microk_sandy-2.c"
#include <stdio.h>
#include "common.h"
-#if defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX) || defined (COOPERLAKE)
+#if defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX) || defined (COOPERLAKE) || defined (SAPPHIRERAPIDS)
#include "cgemv_n_microk_haswell-4.c"
#elif defined(BULLDOZER) || defined(PILEDRIVER) || defined(STEAMROLLER) || defined(EXCAVATOR)
#include "cgemv_n_microk_bulldozer-4.c"
#include "common.h"
-#if defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX) || defined (COOPERLAKE)
+#if defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX) || defined (COOPERLAKE) || defined (SAPPHIRERAPIDS)
#include "cgemv_t_microk_haswell-4.c"
#elif defined(BULLDOZER) || defined(PILEDRIVER) || defined(STEAMROLLER) || defined(EXCAVATOR)
#include "cgemv_t_microk_bulldozer-4.c"
#include "common.h"
-#if defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX) || defined (COOPERLAKE)
+#if defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX) || defined (COOPERLAKE) || defined (SAPPHIRERAPIDS)
#include "cscal_microk_haswell-2.c"
#elif defined(BULLDOZER) || defined(PILEDRIVER)
#include "cscal_microk_bulldozer-2.c"
#include "daxpy_microk_piledriver-2.c"
#elif defined(HASWELL) || defined(ZEN)
#include "daxpy_microk_haswell-2.c"
-#elif defined (SKYLAKEX) || defined (COOPERLAKE)
+#elif defined (SKYLAKEX) || defined (COOPERLAKE) || defined (SAPPHIRERAPIDS)
#include "daxpy_microk_skylakex-2.c"
#elif defined(SANDYBRIDGE)
#include "daxpy_microk_sandy-2.c"
#include "ddot_microk_nehalem-2.c"
#elif defined(HASWELL) || defined(ZEN)
#include "ddot_microk_haswell-2.c"
-#elif defined (SKYLAKEX) || defined (COOPERLAKE)
+#elif defined (SKYLAKEX) || defined (COOPERLAKE) || defined (SAPPHIRERAPIDS)
#include "ddot_microk_skylakex-2.c"
#elif defined(SANDYBRIDGE)
#include "ddot_microk_sandy-2.c"
#include "dgemv_n_microk_nehalem-4.c"
#elif defined(HASWELL) || defined(ZEN) || defined(STEAMROLLER) || defined(EXCAVATOR)
#include "dgemv_n_microk_haswell-4.c"
-#elif defined (SKYLAKEX) || defined (COOPERLAKE)
+#elif defined (SKYLAKEX) || defined (COOPERLAKE) || defined (SAPPHIRERAPIDS)
#include "dgemv_n_microk_skylakex-4.c"
#endif
#include "common.h"
-#if defined(HASWELL) || defined(ZEN) || defined(STEAMROLLER) || defined(EXCAVATOR) || defined (SKYLAKEX) || defined (COOPERLAKE)
+#if defined(HASWELL) || defined(ZEN) || defined(STEAMROLLER) || defined(EXCAVATOR) || defined (SKYLAKEX) || defined (COOPERLAKE) || defined (SAPPHIRERAPIDS)
#include "dgemv_t_microk_haswell-4.c"
#endif
#include "dscal_microk_sandy-2.c"
#elif defined(HASWELL) || defined(ZEN)
#include "dscal_microk_haswell-2.c"
-#elif defined (SKYLAKEX) || defined (COOPERLAKE)
+#elif defined (SKYLAKEX) || defined (COOPERLAKE) || defined (SAPPHIRERAPIDS)
#include "dscal_microk_skylakex-2.c"
#endif
#include "dsymv_L_microk_bulldozer-2.c"
#elif defined(HASWELL) || defined(ZEN)
#include "dsymv_L_microk_haswell-2.c"
-#elif defined (SKYLAKEX) || defined (COOPERLAKE)
+#elif defined (SKYLAKEX) || defined (COOPERLAKE) || defined (SAPPHIRERAPIDS)
#include "dsymv_L_microk_skylakex-2.c"
#elif defined(SANDYBRIDGE)
#include "dsymv_L_microk_sandy-2.c"
#if defined(BULLDOZER) || defined(PILEDRIVER) || defined(STEAMROLLER) || defined(EXCAVATOR)
#include "dsymv_U_microk_bulldozer-2.c"
-#elif defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX) || defined (COOPERLAKE)
+#elif defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX) || defined (COOPERLAKE) || defined (SAPPHIRERAPIDS)
#include "dsymv_U_microk_haswell-2.c"
#elif defined(SANDYBRIDGE)
#include "dsymv_U_microk_sandy-2.c"
#include "saxpy_microk_nehalem-2.c"
#elif defined(HASWELL) || defined(ZEN)
#include "saxpy_microk_haswell-2.c"
-#elif defined (SKYLAKEX) || defined (COOPERLAKE)
+#elif defined (SKYLAKEX) || defined (COOPERLAKE) || defined (SAPPHIRERAPIDS)
#include "saxpy_microk_skylakex-2.c"
#elif defined(SANDYBRIDGE)
#include "saxpy_microk_sandy-2.c"
#include "common.h"
-#if defined(COOPERLAKE)
+#if defined(COOPERLAKE) || defined(SAPPHIRERAPIDS)
#include "sbdot_microk_cooperlake.c"
#endif
#include "common.h"
-#if defined (COOPERLAKE)
+#if defined (COOPERLAKE) || defined (SAPPHIRERAPIDS)
#include "sbgemv_n_microk_cooperlake.c"
#endif
#include "common.h"
-#if defined (COOPERLAKE)
+#if defined (COOPERLAKE) || defined (SAPPHIRERAPIDS)
#include "sbgemv_t_microk_cooperlake.c"
#endif
#include "sdot_microk_nehalem-2.c"
#elif defined(HASWELL) || defined(ZEN)
#include "sdot_microk_haswell-2.c"
-#elif defined (SKYLAKEX) || defined (COOPERLAKE)
+#elif defined (SKYLAKEX) || defined (COOPERLAKE) || defined (SAPPHIRERAPIDS)
#include "sdot_microk_skylakex-2.c"
#elif defined(SANDYBRIDGE)
#include "sdot_microk_sandy-2.c"
/* the direct sgemm code written by Arjan van der Ven */
#include "common.h"
-#if defined(SKYLAKEX) || defined (COOPERLAKE)
+#if defined(SKYLAKEX) || defined (COOPERLAKE) || defined (SAPPHIRERAPIDS)
#include <immintrin.h>
#include "sgemv_n_microk_sandy-4.c"
#elif defined(HASWELL) || defined(ZEN)
#include "sgemv_n_microk_haswell-4.c"
-#elif defined (SKYLAKEX) || defined (COOPERLAKE)
+#elif defined (SKYLAKEX) || defined (COOPERLAKE) || defined (SAPPHIRERAPIDS)
#include "sgemv_n_microk_haswell-4.c"
#include "sgemv_n_microk_skylakex-8.c"
#endif
#include "sgemv_t_microk_sandy-4.c"
#elif defined(HASWELL) || defined(ZEN)
#include "sgemv_t_microk_haswell-4.c"
-#elif defined (SKYLAKEX) || defined (COOPERLAKE)
+#elif defined (SKYLAKEX) || defined (COOPERLAKE) || defined (SAPPHIRERAPIDS)
#include "sgemv_t_microk_haswell-4.c"
#include "sgemv_t_microk_skylakex.c"
#endif
#include "ssymv_L_microk_bulldozer-2.c"
#elif defined(NEHALEM)
#include "ssymv_L_microk_nehalem-2.c"
-#elif defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX) || defined (COOPERLAKE)
+#elif defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX) || defined (COOPERLAKE) || defined (SAPPHIRERAPIDS)
#include "ssymv_L_microk_haswell-2.c"
#elif defined(SANDYBRIDGE)
#include "ssymv_L_microk_sandy-2.c"
#include "ssymv_U_microk_bulldozer-2.c"
#elif defined(NEHALEM)
#include "ssymv_U_microk_nehalem-2.c"
-#elif defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX) || defined (COOPERLAKE)
+#elif defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX) || defined (COOPERLAKE) || defined (SAPPHIRERAPIDS)
#include "ssymv_U_microk_haswell-2.c"
#elif defined(SANDYBRIDGE)
#include "ssymv_U_microk_sandy-2.c"
#define PREFETCHSIZE (16 * 12)
#endif
-#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX) || defined (COOPERLAKE)
+#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX) || defined (COOPERLAKE) || defined (SAPPHIRERAPIDS)
#define PREFETCH prefetcht0
#define PREFETCHW prefetcht0
#define PREFETCHSIZE (16 * 12)
#define PREFETCHSIZE (16 * 12)
#endif
-#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX) || defined (COOPERLAKE)
+#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX) || defined (COOPERLAKE) || defined (SAPPHIRERAPIDS)
#define PREFETCH prefetcht0
#define PREFETCHW prefetcht0
#define PREFETCHSIZE (16 * 12)
#define PREFETCHSIZE (16 * 12)
#endif
-#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX) || defined (COOPERLAKE)
+#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX) || defined (COOPERLAKE) || defined (SAPPHIRERAPIDS)
#define PREFETCH prefetcht0
#define PREFETCHW prefetcht0
#define PREFETCHSIZE (16 * 12)
#define PREFETCHSIZE (16 * 12)
#endif
-#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX) || defined (COOPERLAKE)
+#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX) || defined (COOPERLAKE) || defined (SAPPHIRERAPIDS)
#define PREFETCH prefetcht0
#define PREFETCHW prefetcht0
#define PREFETCHSIZE (16 * 24)
#else
#endif
-#if defined(COOPERLAKE)
+#if defined(COOPERLAKE) || defined(SAPPHIRERAPIDS)
#if defined(DOUBLE)
#include "dtobf16_microk_cooperlake.c"
#elif defined(SINGLE)
#include "zaxpy_microk_bulldozer-2.c"
#elif defined(PILEDRIVER) || defined(STEAMROLLER) || defined(EXCAVATOR)
#include "zaxpy_microk_steamroller-2.c"
-#elif defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX) || defined (COOPERLAKE)
+#elif defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX) || defined (COOPERLAKE) || defined (SAPPHIRERAPIDS)
#include "zaxpy_microk_haswell-2.c"
#elif defined(SANDYBRIDGE)
#include "zaxpy_microk_sandy-2.c"
#include "zdot_microk_bulldozer-2.c"
#elif defined(STEAMROLLER) || defined(PILEDRIVER) || defined(EXCAVATOR)
#include "zdot_microk_steamroller-2.c"
-#elif defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX) || defined (COOPERLAKE)
+#elif defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX) || defined (COOPERLAKE) || defined (SAPPHIRERAPIDS)
#include "zdot_microk_haswell-2.c"
#elif defined(SANDYBRIDGE)
#include "zdot_microk_sandy-2.c"
#include "common.h"
-#if defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX) || defined (COOPERLAKE)
+#if defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX) || defined (COOPERLAKE) || defined (SAPPHIRERAPIDS)
#include "zgemv_n_microk_haswell-4.c"
#elif defined(SANDYBRIDGE)
#include "zgemv_n_microk_sandy-4.c"
#if defined(BULLDOZER) || defined(PILEDRIVER) || defined(STEAMROLLER) || defined(EXCAVATOR)
#include "zgemv_t_microk_bulldozer-4.c"
-#elif defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX) || defined (COOPERLAKE)
+#elif defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX) || defined (COOPERLAKE) || defined (SAPPHIRERAPIDS)
#include "zgemv_t_microk_haswell-4.c"
#endif
#include "common.h"
-#if defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX) || defined (COOPERLAKE)
+#if defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX) || defined (COOPERLAKE) || defined (SAPPHIRERAPIDS)
#include "zscal_microk_haswell-2.c"
#elif defined(BULLDOZER) || defined(PILEDRIVER)
#include "zscal_microk_bulldozer-2.c"
#define PREFETCHSIZE (16 * 24)
#endif
-#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX) || defined (COOPERLAKE)
+#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX) || defined (COOPERLAKE) || defined (SAPPHIRERAPIDS)
#define PREFETCH prefetcht0
#define PREFETCHW prefetcht0
#define PREFETCHSIZE (16 * 24)
#define PREFETCHSIZE (16 * 24)
#endif
-#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX) || defined (COOPERLAKE)
+#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX) || defined (COOPERLAKE) || defined (SAPPHIRERAPIDS)
#define PREFETCH prefetcht0
#define PREFETCHW prefetcht0
#define PREFETCHSIZE (16 * 24)
#define PREFETCHSIZE (16 * 24)
#endif
-#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX) || defined (COOPERLAKE)
+#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX) || defined (COOPERLAKE) || defined (SAPPHIRERAPIDS)
#define PREFETCH prefetcht0
#define PREFETCHW prefetcht0
#define PREFETCHSIZE (16 * 24)
#define PREFETCHSIZE (16 * 24)
#endif
-#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX) || defined (COOPERLAKE)
+#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX) || defined (COOPERLAKE) || defined (SAPPHIRERAPIDS)
#define PREFETCH prefetcht0
#define PREFETCHW prefetcht0
#define PREFETCHSIZE (16 * 24)
#endif
+#ifdef SAPPHIRERAPIDS
+
+#define SNUMOPT 16
+#define DNUMOPT 8
+
+#define GEMM_DEFAULT_OFFSET_A 0
+#define GEMM_DEFAULT_OFFSET_B 0
+#define GEMM_DEFAULT_ALIGN 0x03fffUL
+
+#define SYMV_P 8
+
+#if defined(XDOUBLE) || defined(DOUBLE)
+#define SWITCH_RATIO 8
+#define GEMM_PREFERED_SIZE 8
+#else
+#define SWITCH_RATIO 16
+#define GEMM_PREFERED_SIZE 16
+#endif
+#define USE_SGEMM_KERNEL_DIRECT 1
+
+#ifdef ARCH_X86
+
+#define SGEMM_DEFAULT_UNROLL_M 4
+#define DGEMM_DEFAULT_UNROLL_M 2
+#define QGEMM_DEFAULT_UNROLL_M 2
+#define CGEMM_DEFAULT_UNROLL_M 2
+#define ZGEMM_DEFAULT_UNROLL_M 1
+#define XGEMM_DEFAULT_UNROLL_M 1
+
+#define SGEMM_DEFAULT_UNROLL_N 4
+#define DGEMM_DEFAULT_UNROLL_N 4
+#define QGEMM_DEFAULT_UNROLL_N 2
+#define CGEMM_DEFAULT_UNROLL_N 2
+#define ZGEMM_DEFAULT_UNROLL_N 2
+#define XGEMM_DEFAULT_UNROLL_N 1
+
+#else
+
+#define SGEMM_DEFAULT_UNROLL_M 16
+#define DGEMM_DEFAULT_UNROLL_M 16
+#define QGEMM_DEFAULT_UNROLL_M 2
+#define CGEMM_DEFAULT_UNROLL_M 8
+#define ZGEMM_DEFAULT_UNROLL_M 4
+#define XGEMM_DEFAULT_UNROLL_M 1
+
+#define SGEMM_DEFAULT_UNROLL_N 4
+#define DGEMM_DEFAULT_UNROLL_N 2
+#define QGEMM_DEFAULT_UNROLL_N 2
+#define CGEMM_DEFAULT_UNROLL_N 2
+#define ZGEMM_DEFAULT_UNROLL_N 2
+#define XGEMM_DEFAULT_UNROLL_N 1
+
+#define SGEMM_DEFAULT_UNROLL_MN 32
+#define DGEMM_DEFAULT_UNROLL_MN 32
+#endif
+
+#ifdef ARCH_X86
+
+#define SGEMM_DEFAULT_P 512
+#define SGEMM_DEFAULT_R sgemm_r
+#define DGEMM_DEFAULT_P 512
+#define DGEMM_DEFAULT_R dgemm_r
+#define QGEMM_DEFAULT_P 504
+#define QGEMM_DEFAULT_R qgemm_r
+#define CGEMM_DEFAULT_P 128
+#define CGEMM_DEFAULT_R 1024
+#define ZGEMM_DEFAULT_P 512
+#define ZGEMM_DEFAULT_R zgemm_r
+#define XGEMM_DEFAULT_P 252
+#define XGEMM_DEFAULT_R xgemm_r
+#define SGEMM_DEFAULT_Q 256
+#define DGEMM_DEFAULT_Q 256
+#define QGEMM_DEFAULT_Q 128
+#define CGEMM_DEFAULT_Q 256
+#define ZGEMM_DEFAULT_Q 192
+#define XGEMM_DEFAULT_Q 128
+
+#else
+
+#define SGEMM_DEFAULT_P 640
+#define DGEMM_DEFAULT_P 192
+#define CGEMM_DEFAULT_P 384
+#define ZGEMM_DEFAULT_P 256
+
+#define SGEMM_DEFAULT_Q 320
+#define DGEMM_DEFAULT_Q 384
+#define CGEMM_DEFAULT_Q 192
+#define ZGEMM_DEFAULT_Q 128
+
+#define SGEMM_DEFAULT_R sgemm_r
+#define DGEMM_DEFAULT_R 8640
+#define CGEMM_DEFAULT_R cgemm_r
+#define ZGEMM_DEFAULT_R zgemm_r
+
+#define QGEMM_DEFAULT_Q 128
+#define QGEMM_DEFAULT_P 504
+#define QGEMM_DEFAULT_R qgemm_r
+#define XGEMM_DEFAULT_P 252
+#define XGEMM_DEFAULT_R xgemm_r
+#define XGEMM_DEFAULT_Q 128
+
+#define CGEMM3M_DEFAULT_UNROLL_N 4
+#define CGEMM3M_DEFAULT_UNROLL_M 8
+#define ZGEMM3M_DEFAULT_UNROLL_N 4
+#define ZGEMM3M_DEFAULT_UNROLL_M 4
+
+#define CGEMM3M_DEFAULT_P 320
+#define ZGEMM3M_DEFAULT_P 256
+#define XGEMM3M_DEFAULT_P 112
+#define CGEMM3M_DEFAULT_Q 320
+#define ZGEMM3M_DEFAULT_Q 256
+#define XGEMM3M_DEFAULT_Q 224
+#define CGEMM3M_DEFAULT_R 12288
+#define ZGEMM3M_DEFAULT_R 12288
+#define XGEMM3M_DEFAULT_R 12288
+
+#endif
+#endif
+
#ifdef COOPERLAKE
#define SNUMOPT 16