/* Make sure if no one is using workspace */
START_RPCC();
for (i = 0; i < args -> nthreads; i++)
- while (job[mypos].working[i][CACHE_LINE_SIZE * bufferside]) {YIELDING;};
+ while (job[mypos].working[i][CACHE_LINE_SIZE * bufferside]) {YIELDING;MB;};
STOP_RPCC(waiting1);
#if defined(FUSED_GEMM) && !defined(TIMING)
/* Wait until other region of B is initialized */
START_RPCC();
- while(job[current].working[mypos][CACHE_LINE_SIZE * bufferside] == 0) {YIELDING;};
+ while(job[current].working[mypos][CACHE_LINE_SIZE * bufferside] == 0) {YIELDING;MB;};
STOP_RPCC(waiting2);
/* Apply kernel with local region of A and part of other region of B */
/* Clear synchronization flag if this thread is done with other region of B */
if (m_to - m_from == min_i) {
job[current].working[mypos][CACHE_LINE_SIZE * bufferside] &= 0;
+ WMB;
}
}
} while (current != mypos);
START_RPCC();
for (i = 0; i < args -> nthreads; i++) {
for (js = 0; js < DIVIDE_RATE; js++) {
- while (job[mypos].working[i][CACHE_LINE_SIZE * js] ) {YIELDING;};
+ while (job[mypos].working[i][CACHE_LINE_SIZE * js] ) {YIELDING;MB;};
}
}
STOP_RPCC(waiting3);