for (i = 1; i < DIVIDE_RATE; i++) {
- buffer[i] = buffer[i - 1] + GEMM_Q * ((div_n + GEMM_UNROLL_N - 1) & ~(GEMM_UNROLL_N - 1)) * COMPSIZE;
+ buffer[i] = buffer[i - 1] + GEMM_Q * (((div_n + GEMM_UNROLL_N - 1)/GEMM_UNROLL_N) * GEMM_UNROLL_N - 1) * COMPSIZE;
}
for (xxx = n_from, bufferside = 0; xxx < n_to; xxx += div_n, bufferside ++) {
min_i = GEMM_P;
} else
if (min_i > GEMM_P) {
- min_i = ((min_i + 1) / 2 + GEMM_UNROLL_M - 1) & ~(GEMM_UNROLL_M - 1);
+ min_i = (((min_i + 1) / 2 + GEMM_UNROLL_M - 1)/GEMM_UNROLL_M) * GEMM_UNROLL_M;
}
ICOPY_OPERATION(k, min_i, a, lda, 0, is, sa);
mn = MIN(m, n);
- init_bk = (mn / 2 + GEMM_UNROLL_N - 1) & ~(GEMM_UNROLL_N - 1);
+ init_bk = ((mn / 2 + GEMM_UNROLL_N - 1)/GEMM_UNROLL_N) * GEMM_UNROLL_N;
if (init_bk > GEMM_Q) init_bk = GEMM_Q;
if (init_bk <= GEMM_UNROLL_N) {
while (is < mn) {
- width = (FORMULA1(m, n, is, bk, args -> nthreads) + GEMM_UNROLL_N - 1) & ~(GEMM_UNROLL_N - 1);
+ width = ((FORMULA1(m, n, is, bk, args -> nthreads) + GEMM_UNROLL_N - 1)/GEMM_UNROLL_N) * GEMM_UNROLL_N;
if (width > mn - is - bk) width = mn - is - bk;
if (width < bk) {
- next_bk = (FORMULA2(m, n, is, bk, args -> nthreads) + GEMM_UNROLL_N) & ~(GEMM_UNROLL_N - 1);
+ next_bk = ((FORMULA2(m, n, is, bk, args -> nthreads) + GEMM_UNROLL_N)/GEMM_UNROLL_N) * GEMM_UNROLL_N;
if (next_bk > bk) next_bk = bk;
bk = mn - is;
if (bk > next_bk) bk = next_bk;
- width = (FORMULA1(m, n, is, bk, args -> nthreads) + GEMM_UNROLL_N - 1) & ~(GEMM_UNROLL_N - 1);
+ width = ((FORMULA1(m, n, is, bk, args -> nthreads) + GEMM_UNROLL_N - 1)/GEMM_UNROLL_N) * GEMM_UNROLL_N;
if (width > mn - is - bk) width = mn - is - bk;
if (width < bk) {
- next_bk = (FORMULA2(m, n, is, bk, args -> nthreads) + GEMM_UNROLL_N) & ~(GEMM_UNROLL_N - 1);
+ next_bk = ((FORMULA2(m, n, is, bk, args -> nthreads) + GEMM_UNROLL_N)/GEMM_UNROLL_N) * GEMM_UNROLL_N;
if (next_bk > bk) next_bk = bk;
}
mn = MIN(m, n);
- init_bk = (mn / 2 + GEMM_UNROLL_N - 1) & ~(GEMM_UNROLL_N - 1);
+ init_bk = ((mn / 2 + GEMM_UNROLL_N - 1)/GEMM_UNROLL_N) * GEMM_UNROLL_N;
if (init_bk > GEMM_Q) init_bk = GEMM_Q;
if (init_bk <= GEMM_UNROLL_N) {
}
width = FORMULA1(m, n, 0, init_bk, args -> nthreads);
- width = (width + GEMM_UNROLL_N - 1) & ~(GEMM_UNROLL_N - 1);
+ width = ((width + GEMM_UNROLL_N - 1)/GEMM_UNROLL_N) * GEMM_UNROLL_N;
if (width > n - init_bk) width = n - init_bk;
if (width < init_bk) {
BLASLONG temp;
temp = FORMULA2(m, n, 0, init_bk, args -> nthreads);
- temp = (temp + GEMM_UNROLL_N - 1) & ~(GEMM_UNROLL_N - 1);
+ temp = ((temp + GEMM_UNROLL_N - 1)/GEMM_UNROLL_N) * GEMM_UNROLL_N;
if (temp < GEMM_UNROLL_N) temp = GEMM_UNROLL_N;
if (temp < init_bk) init_bk = temp;
while (is < mn) {
width = FORMULA1(m, n, is, bk, args -> nthreads);
- width = (width + GEMM_UNROLL_N - 1) & ~(GEMM_UNROLL_N - 1);
+ width = ((width + GEMM_UNROLL_N - 1)/GEMM_UNROLL_N) * GEMM_UNROLL_N;
if (width < bk) {
next_bk = FORMULA2(m, n, is, bk, args -> nthreads);
- next_bk = (next_bk + GEMM_UNROLL_N - 1) & ~(GEMM_UNROLL_N - 1);
+ next_bk = ((next_bk + GEMM_UNROLL_N - 1)/GEMM_UNROLL_N) * GEMM_UNROLL_N;
if (next_bk > bk) next_bk = bk;
#if 0
if (bk > next_bk) bk = next_bk;
width = FORMULA1(m, n, is, bk, args -> nthreads);
- width = (width + GEMM_UNROLL_N - 1) & ~(GEMM_UNROLL_N - 1);
+ width = ((width + GEMM_UNROLL_N - 1)/GEMM_UNROLL_N) * GEMM_UNROLL_N;
if (width < bk) {
next_bk = FORMULA2(m, n, is, bk, args -> nthreads);
- next_bk = (next_bk + GEMM_UNROLL_N - 1) & ~(GEMM_UNROLL_N - 1);
+ next_bk = ((next_bk + GEMM_UNROLL_N - 1)/GEMM_UNROLL_N) * GEMM_UNROLL_N;
if (next_bk > bk) next_bk = bk;
#if 0