/* Split local region of B into parts */
for(jjs = js; jjs < MIN(n_to, js + div_n); jjs += min_jj){
min_jj = MIN(n_to, js + div_n) - jjs;
+#ifdef SKYLAKEX
+ /* the current AVX512 s/d/c/z GEMM kernel requires n>=6*GEMM_UNROLL_N to achieve the best performance */
+ if (min_jj >= 6*GEMM_UNROLL_N) min_jj = 6*GEMM_UNROLL_N;
+#else
if (min_jj >= 3*GEMM_UNROLL_N) min_jj = 3*GEMM_UNROLL_N;
else
if (min_jj >= 2*GEMM_UNROLL_N) min_jj = 2*GEMM_UNROLL_N;
else
if (min_jj > GEMM_UNROLL_N) min_jj = GEMM_UNROLL_N;
-
+#endif
/* Copy part of local region of B into workspace */
START_RPCC();
OCOPY_OPERATION(min_l, min_jj, b, ldb, ls, jjs,