\r
SAVE4x12\r
\r
+ /* here for the prefetch of next b source block */\r
+ /* the increment should be proportional to the ratio of GEMM_Q/GEMM_P */\r
+ /* currently an increment of 128 byte is suitable */\r
salq $3, K\r
prefetcht2 32(B)\r
prefetcht2 32(B, K, 8)\r
prefetcht2 96(B)\r
prefetcht2 96(B, K, 8)\r
- addq $128, B\r
+ addq $128, B /* increment */\r
sarq $3, K\r
\r
decq I # i --\r
/**************************************************************************\r
* Rest of M \r
***************************************************************************/\r
+ /* recover the original value of pointer B */\r
movq M, I\r
sarq $2, I\r
salq $7, I\r
\r
SAVE4x12\r
\r
+ /* here for the prefetch of next b source block */\r
+ /* the increment should be proportional to the ratio of GEMM_Q/GEMM_P */\r
+ /* currently an increment of 128 byte is suitable */\r
salq $3, K\r
prefetcht2 (B)\r
prefetcht2 (B, K, 8)\r
/**************************************************************************\r
* Rest of M \r
***************************************************************************/\r
-\r
+ /* recover the original value of pointer B */\r
movq M, I\r
sarq $2, I\r
salq $7, I\r