for(jjs = js; jjs < js + min_j; jjs += min_jj){
min_jj = min_j + js - jjs;
- if (min_jj > GEMM_UNROLL_N) min_jj = GEMM_UNROLL_N;
+ if (min_jj > GEMM_UNROLL_N*3) min_jj = GEMM_UNROLL_N*3;
+ else
+ if (min_jj > GEMM_UNROLL_N) min_jj = GEMM_UNROLL_N;
GEMM_ONCOPY(min_l, min_jj, b + (ls + jjs * ldb) * COMPSIZE, ldb, sb + min_l * (jjs - js) * COMPSIZE);
for(jjs = js; jjs < js + min_j; jjs += min_jj){
min_jj = min_j + js - jjs;
- if (min_jj > GEMM_UNROLL_N) min_jj = GEMM_UNROLL_N;
+ if (min_jj > GEMM_UNROLL_N*3) min_jj = GEMM_UNROLL_N*3;
+ else
+ if (min_jj > GEMM_UNROLL_N) min_jj = GEMM_UNROLL_N;
GEMM_ONCOPY(min_l, min_jj, b + (ls - min_l + jjs * ldb) * COMPSIZE, ldb, sb + min_l * (jjs - js) * COMPSIZE);
for(jjs = js; jjs < js + min_j; jjs += min_jj){
min_jj = min_j + js - jjs;
- if (min_jj > GEMM_UNROLL_N) min_jj = GEMM_UNROLL_N;
+ if (min_jj > GEMM_UNROLL_N*3) min_jj = GEMM_UNROLL_N*3;
+ else
+ if (min_jj > GEMM_UNROLL_N) min_jj = GEMM_UNROLL_N;
#ifndef TRANSA
GEMM_ONCOPY(min_l, min_jj, a + (ls + jjs * lda) * COMPSIZE, lda, sb + min_l * (jjs - js) * COMPSIZE);
for(jjs = 0; jjs < min_j - min_l - ls + js; jjs += min_jj){
min_jj = min_j - min_l - ls + js - jjs;
- if (min_jj > GEMM_UNROLL_N) min_jj = GEMM_UNROLL_N;
+ if (min_jj > GEMM_UNROLL_N*3) min_jj = GEMM_UNROLL_N*3;
+ else
+ if (min_jj > GEMM_UNROLL_N) min_jj = GEMM_UNROLL_N;
#ifndef TRANSA
GEMM_ONCOPY (min_l, min_jj, a + (ls + (ls + min_l + jjs) * lda) * COMPSIZE, lda,
for(jjs = js; jjs < js + min_j; jjs += min_jj){
min_jj = min_j + js - jjs;
- if (min_jj > GEMM_UNROLL_N) min_jj = GEMM_UNROLL_N;
+ if (min_jj > GEMM_UNROLL_N*3) min_jj = GEMM_UNROLL_N*3;
+ else
+ if (min_jj > GEMM_UNROLL_N) min_jj = GEMM_UNROLL_N;
#ifndef TRANSA
GEMM_ONCOPY(min_l, min_jj, a + (ls + (jjs - min_j) * lda) * COMPSIZE, lda, sb + min_l * (jjs - js) * COMPSIZE);
for(jjs = 0; jjs < min_j - js + ls; jjs += min_jj){
min_jj = min_j - js + ls - jjs;
- if (min_jj > GEMM_UNROLL_N) min_jj = GEMM_UNROLL_N;
+ if (min_jj > GEMM_UNROLL_N*3) min_jj = GEMM_UNROLL_N*3;
+ else
+ if (min_jj > GEMM_UNROLL_N) min_jj = GEMM_UNROLL_N;
#ifndef TRANSA
GEMM_ONCOPY (min_l, min_jj, a + (ls + (js - min_j + jjs) * lda) * COMPSIZE, lda,