if (min_l > GEMM3M_Q) {
min_l = (min_l + 1) / 2;
#ifdef UNROLL_X
- min_l = (min_l + UNROLL_X - 1) & ~(UNROLL_X - 1);
+ min_l = ((min_l + UNROLL_X - 1)/UNROLL_X) * UNROLL_X;
#endif
}
}
min_i = GEMM3M_P;
} else {
if (min_i > GEMM3M_P) {
- min_i = (min_i / 2 + GEMM3M_UNROLL_M - 1) & ~(GEMM3M_UNROLL_M - 1);
+ min_i = ((min_i / 2 + GEMM3M_UNROLL_M - 1)/GEMM3M_UNROLL_M) * GEMM3M_UNROLL_M;
}
}
min_i = GEMM3M_P;
} else
if (min_i > GEMM3M_P) {
- min_i = (min_i / 2 + GEMM3M_UNROLL_M - 1) & ~(GEMM3M_UNROLL_M - 1);
+ min_i = ((min_i / 2 + GEMM3M_UNROLL_M - 1)/GEMM3M_UNROLL_M) * GEMM3M_UNROLL_M;
}
START_RPCC();
min_i = GEMM3M_P;
} else {
if (min_i > GEMM3M_P) {
- min_i = (min_i / 2 + GEMM3M_UNROLL_M - 1) & ~(GEMM3M_UNROLL_M - 1);
+ min_i = ((min_i / 2 + GEMM3M_UNROLL_M - 1)/GEMM3M_UNROLL_M) * GEMM3M_UNROLL_M;
}
}
min_i = GEMM3M_P;
} else
if (min_i > GEMM3M_P) {
- min_i = (min_i / 2 + GEMM3M_UNROLL_M - 1) & ~(GEMM3M_UNROLL_M - 1);
+ min_i = ((min_i / 2 + GEMM3M_UNROLL_M - 1)/GEMM3M_UNROLL_M) * GEMM3M_UNROLL_M;
}
START_RPCC();
min_i = GEMM3M_P;
} else {
if (min_i > GEMM3M_P) {
- min_i = (min_i / 2 + GEMM3M_UNROLL_M - 1) & ~(GEMM3M_UNROLL_M - 1);
+ min_i = ((min_i / 2 + GEMM3M_UNROLL_M - 1)/GEMM3M_UNROLL_M) * GEMM3M_UNROLL_M;
}
}
min_i = GEMM3M_P;
} else
if (min_i > GEMM3M_P) {
- min_i = (min_i / 2 + GEMM3M_UNROLL_M - 1) & ~(GEMM3M_UNROLL_M - 1);
+ min_i = ((min_i / 2 + GEMM3M_UNROLL_M - 1)/GEMM3M_UNROLL_M) * GEMM3M_UNROLL_M;
}
START_RPCC();
min_l = GEMM_Q;
} else {
if (min_l > GEMM_Q) {
- min_l = (min_l / 2 + GEMM_UNROLL_M - 1) & ~(GEMM_UNROLL_M - 1);
+ min_l = ((min_l / 2 + GEMM_UNROLL_M - 1)/GEMM_UNROLL_M) * GEMM_UNROLL_M;
}
- gemm_p = ((l2size / min_l + GEMM_UNROLL_M - 1) & ~(GEMM_UNROLL_M - 1));
+ gemm_p = ((l2size / min_l + GEMM_UNROLL_M - 1)/GEMM_UNROLL_M) * GEMM_UNROLL_M;
while (gemm_p * min_l > l2size) gemm_p -= GEMM_UNROLL_M;
}
min_i = GEMM_P;
} else {
if (min_i > GEMM_P) {
- min_i = (min_i / 2 + GEMM_UNROLL_M - 1) & ~(GEMM_UNROLL_M - 1);
+ min_i = ((min_i / 2 + GEMM_UNROLL_M - 1)/GEMM_UNROLL_M) * GEMM_UNROLL_M;
} else {
l1stride = 0;
}
min_i = GEMM_P;
} else
if (min_i > GEMM_P) {
- min_i = (min_i / 2 + GEMM_UNROLL_M - 1) & ~(GEMM_UNROLL_M - 1);
+ min_i = ((min_i / 2 + GEMM_UNROLL_M - 1)/GEMM_UNROLL_M) * GEMM_UNROLL_M;
}
START_RPCC();
buffer[0] = sb;
for (i = 1; i < DIVIDE_RATE; i++) {
- buffer[i] = buffer[i - 1] + GEMM3M_Q * ((div_n + GEMM3M_UNROLL_N - 1) & ~(GEMM3M_UNROLL_N - 1));
+ buffer[i] = buffer[i - 1] + GEMM3M_Q * (((div_n + GEMM3M_UNROLL_N - 1)/GEMM3M_UNROLL_N) * GEMM3M_UNROLL_N);
}
for(ls = 0; ls < k; ls += min_l){
min_i = GEMM3M_P;
} else {
if (min_i > GEMM3M_P) {
- min_i = (min_i / 2 + GEMM3M_UNROLL_M - 1) & ~(GEMM3M_UNROLL_M - 1);
+ min_i = ((min_i / 2 + GEMM3M_UNROLL_M - 1)/GEMM3M_UNROLL_M) * GEMM3M_UNROLL_M;
}
}
min_i = GEMM3M_P;
} else
if (min_i > GEMM3M_P) {
- min_i = ((min_i + 1) / 2 + GEMM3M_UNROLL_M - 1) & ~(GEMM3M_UNROLL_M - 1);
+ min_i = (((min_i + 1) / 2 + GEMM3M_UNROLL_M - 1)/GEMM3M_UNROLL_M) * GEMM3M_UNROLL_M;
}
START_RPCC();
min_i = GEMM3M_P;
} else
if (min_i > GEMM3M_P) {
- min_i = ((min_i + 1) / 2 + GEMM3M_UNROLL_M - 1) & ~(GEMM3M_UNROLL_M - 1);
+ min_i = (((min_i + 1) / 2 + GEMM3M_UNROLL_M - 1)/GEMM3M_UNROLL_M) * GEMM3M_UNROLL_M;
}
START_RPCC();
min_i = GEMM3M_P;
} else
if (min_i > GEMM3M_P) {
- min_i = ((min_i + 1) / 2 + GEMM3M_UNROLL_M - 1) & ~(GEMM3M_UNROLL_M - 1);
+ min_i = (((min_i + 1) / 2 + GEMM3M_UNROLL_M - 1)/GEMM3M_UNROLL_M) * GEMM3M_UNROLL_M;
}
START_RPCC();
min_i = GEMM_P;
} else
if (min_i > GEMM_P) {
- min_i = (min_i / 2 + GEMM_UNROLL_MN - 1) & ~(GEMM_UNROLL_MN - 1);
+ min_i = ((min_i / 2 + GEMM_UNROLL_MN - 1)/GEMM_UNROLL_MN) * GEMM_UNROLL_MN;
}
#ifndef LOWER
min_i = GEMM_P;
} else
if (min_i > GEMM_P) {
- min_i = (min_i / 2 + GEMM_UNROLL_MN - 1) & ~(GEMM_UNROLL_MN - 1);
+ min_i = ((min_i / 2 + GEMM_UNROLL_MN - 1)/GEMM_UNROLL_MN) * GEMM_UNROLL_MN;
}
ICOPY_OPERATION(min_l, min_i, a, lda, ls, is, sa);
min_i = GEMM_P;
} else
if (min_i > GEMM_P) {
- min_i = (min_i / 2 + GEMM_UNROLL_MN - 1) & ~(GEMM_UNROLL_MN - 1);
+ min_i = ((min_i / 2 + GEMM_UNROLL_MN - 1)/GEMM_UNROLL_MN) * GEMM_UNROLL_MN;
}
if (m_start >= js) {
min_i = GEMM_P;
} else
if (min_i > GEMM_P) {
- min_i = (min_i / 2 + GEMM_UNROLL_MN - 1) & ~(GEMM_UNROLL_MN - 1);
+ min_i = ((min_i / 2 + GEMM_UNROLL_MN - 1)/GEMM_UNROLL_MN) * GEMM_UNROLL_MN;
}
ICOPY_OPERATION(min_l, min_i, b, ldb, ls, is, sa);
min_i = GEMM_P;
} else
if (min_i > GEMM_P) {
- min_i = (min_i / 2 + GEMM_UNROLL_MN - 1) & ~(GEMM_UNROLL_MN - 1);
+ min_i = ((min_i / 2 + GEMM_UNROLL_MN - 1)/GEMM_UNROLL_MN) * GEMM_UNROLL_MN;
}
aa = sb + min_l * (is - js) * COMPSIZE;
min_i = GEMM_P;
} else
if (min_i > GEMM_P) {
- min_i = (min_i / 2 + GEMM_UNROLL_MN - 1) & ~(GEMM_UNROLL_MN - 1);
+ min_i = ((min_i / 2 + GEMM_UNROLL_MN - 1)/GEMM_UNROLL_MN) * GEMM_UNROLL_MN;
}
aa = sb + min_l * (m_start - js) * COMPSIZE;
min_i = GEMM_P;
} else
if (min_i > GEMM_P) {
- min_i = (min_i / 2 + GEMM_UNROLL_MN - 1) & ~(GEMM_UNROLL_MN - 1);
+ min_i = ((min_i / 2 + GEMM_UNROLL_MN - 1)/GEMM_UNROLL_MN) * GEMM_UNROLL_MN;
}
aa = sb + min_l * (is - js) * COMPSIZE;
min_i = GEMM_P;
} else
if (min_i > GEMM_P) {
- min_i = (min_i / 2 + GEMM_UNROLL_MN - 1) & ~(GEMM_UNROLL_MN - 1);
+ min_i = ((min_i / 2 + GEMM_UNROLL_MN - 1)/GEMM_UNROLL_MN) * GEMM_UNROLL_MN;
}
#ifndef LOWER
min_i = GEMM_P;
} else
if (min_i > GEMM_P) {
- min_i = (min_i / 2 + GEMM_UNROLL_MN - 1) & ~(GEMM_UNROLL_MN - 1);
+ min_i = ((min_i / 2 + GEMM_UNROLL_MN - 1)/GEMM_UNROLL_MN) * GEMM_UNROLL_MN;
}
aa = sb + min_l * (is - js) * COMPSIZE;
min_i = GEMM_P;
} else
if (min_i > GEMM_P) {
- min_i = (min_i / 2 + GEMM_UNROLL_MN - 1) & ~(GEMM_UNROLL_MN - 1);
+ min_i = ((min_i / 2 + GEMM_UNROLL_MN - 1)/GEMM_UNROLL_MN) * GEMM_UNROLL_MN;
}
START_RPCC();
min_i = GEMM_P;
} else
if (min_i > GEMM_P) {
- min_i = (min_i / 2 + GEMM_UNROLL_MN - 1) & ~(GEMM_UNROLL_MN - 1);
+ min_i = ((min_i / 2 + GEMM_UNROLL_MN - 1)/GEMM_UNROLL_MN) * GEMM_UNROLL_MN;
}
if (is < js + min_j) {
min_i = GEMM_P;
} else
if (min_i > GEMM_P) {
- min_i = (min_i / 2 + GEMM_UNROLL_MN - 1) & ~(GEMM_UNROLL_MN - 1);
+ min_i = ((min_i / 2 + GEMM_UNROLL_MN - 1)/GEMM_UNROLL_MN) * GEMM_UNROLL_MN;
}
START_RPCC();
fprintf(stderr, "Thread[%ld] m_from : %ld m_to : %ld n_from : %ld n_to : %ld\n", mypos, m_from, m_to, n_from, n_to);
#endif
- div_n = ((m_to - m_from + DIVIDE_RATE - 1) / DIVIDE_RATE
- + GEMM_UNROLL_MN - 1) & ~(GEMM_UNROLL_MN - 1);
+ div_n = (((m_to - m_from + DIVIDE_RATE - 1) / DIVIDE_RATE + GEMM_UNROLL_MN - 1)/GEMM_UNROLL_MN) * GEMM_UNROLL_MN;
buffer[0] = sb;
for (i = 1; i < DIVIDE_RATE; i++) {
min_i = GEMM_P;
} else {
if (min_i > GEMM_P) {
- min_i = (min_i / 2 + GEMM_UNROLL_MN - 1) & ~(GEMM_UNROLL_MN - 1);
+ min_i = ((min_i / 2 + GEMM_UNROLL_MN - 1)/GEMM_UNROLL_MN) * GEMM_UNROLL_MN;
}
}
STOP_RPCC(copy_A);
- div_n = ((m_to - m_from + DIVIDE_RATE - 1) / DIVIDE_RATE
- + GEMM_UNROLL_MN - 1) & ~(GEMM_UNROLL_MN - 1);
+ div_n = (((m_to - m_from + DIVIDE_RATE - 1) / DIVIDE_RATE + GEMM_UNROLL_MN - 1)/GEMM_UNROLL_MN) * GEMM_UNROLL_MN;
for (xxx = m_from, bufferside = 0; xxx < m_to; xxx += div_n, bufferside ++) {
while (current >= 0) {
#endif
- div_n = ((range_n[current + 1] - range_n[current] + DIVIDE_RATE - 1) / DIVIDE_RATE
- + GEMM_UNROLL_MN - 1) & ~(GEMM_UNROLL_MN - 1);
-
+ div_n = (((range_n[current + 1] - range_n[current] + DIVIDE_RATE - 1) / DIVIDE_RATE + GEMM_UNROLL_MN - 1)/GEMM_UNROLL_MN) * GEMM_UNROLL_MN;
+
for (xxx = range_n[current], bufferside = 0; xxx < range_n[current + 1]; xxx += div_n, bufferside ++) {
START_RPCC();
min_i = GEMM_P;
} else
if (min_i > GEMM_P) {
- min_i = ((min_i + 1) / 2 + GEMM_UNROLL_MN - 1) & ~(GEMM_UNROLL_MN - 1);
+ min_i = (((min_i + 1) / 2 + GEMM_UNROLL_MN - 1)/GEMM_UNROLL_MN) * GEMM_UNROLL_MN;
}
START_RPCC();
do {
- div_n = ((range_n[current + 1] - range_n[current] + DIVIDE_RATE - 1) / DIVIDE_RATE
- + GEMM_UNROLL_MN - 1) & ~(GEMM_UNROLL_MN - 1);
+ div_n = (((range_n[current + 1] - range_n[current] + DIVIDE_RATE - 1) / DIVIDE_RATE + GEMM_UNROLL_MN - 1)/GEMM_UNROLL_MN) * GEMM_UNROLL_MN;
for (xxx = range_n[current], bufferside = 0; xxx < range_n[current + 1]; xxx += div_n, bufferside ++) {
double di = (double)i;
- width = (((BLASLONG)(sqrt(di * di + dnum) - di) + mask) & ~mask);
+ width = (((BLASLONG)((sqrt(di * di + dnum) - di) + mask)/(mask+1)) * (mask+1) );
- if (num_cpu == 0) width = n - ((n - width) & ~mask);
+ if (num_cpu == 0) width = n - (((n - width)/(mask+1)) * (mask+1) );
if ((width > n - i) || (width < mask)) width = n - i;
double di = (double)i;
- width = (((BLASLONG)(sqrt(di * di + dnum) - di) + mask) & ~mask);
+ width = (((BLASLONG)((sqrt(di * di + dnum) - di) + mask)/(mask+1)) * (mask+1));
if ((width > n - i) || (width < mask)) width = n - i;
buffer[0] = sb;
for (i = 1; i < DIVIDE_RATE; i++) {
- buffer[i] = buffer[i - 1] + GEMM_Q * ((div_n + GEMM_UNROLL_N - 1) & ~(GEMM_UNROLL_N - 1)) * COMPSIZE;
+ buffer[i] = buffer[i - 1] + GEMM_Q * ((div_n + GEMM_UNROLL_N - 1)/GEMM_UNROLL_N) * GEMM_UNROLL_N * COMPSIZE;
}
min_i = GEMM_P;
} else {
if (min_i > GEMM_P) {
- min_i = (min_i / 2 + GEMM_UNROLL_M - 1) & ~(GEMM_UNROLL_M - 1);
+ min_i = ((min_i / 2 + GEMM_UNROLL_M - 1)/GEMM_UNROLL_M) * GEMM_UNROLL_M;
} else {
if (args -> nthreads == 1) l1stride = 0;
}
min_i = GEMM_P;
} else
if (min_i > GEMM_P) {
- min_i = ((min_i + 1) / 2 + GEMM_UNROLL_M - 1) & ~(GEMM_UNROLL_M - 1);
+ min_i = (((min_i + 1) / 2 + GEMM_UNROLL_M - 1)/GEMM_UNROLL_M) * GEMM_UNROLL_M;
}
START_RPCC();
int mm, nn;
- mm = (loop & ~(GEMM_UNROLL_MN - 1));
+ mm = (loop/GEMM_UNROLL_MN) * GEMM_UNROLL_MN;
nn = MIN(GEMM_UNROLL_MN, n - loop);
#ifndef LOWER
int mm, nn;
- mm = (loop & ~(GEMM_UNROLL_MN - 1));
+ mm = (loop/GEMM_UNROLL_MN) * GEMM_UNROLL_MN;
nn = MIN(GEMM_UNROLL_MN, n - loop);
#ifndef LOWER
int mm, nn;
- mm = (loop & ~(GEMM_UNROLL_MN - 1));
+ mm = (loop/GEMM_UNROLL_MN) * GEMM_UNROLL_MN;
nn = MIN(GEMM_UNROLL_MN, n - loop);
#ifndef LOWER