#ifndef TRANSA
range_m[num_cpu] = num_cpu * ((m + 15) & ~15);
- if (range_m[num_cpu] > m) range_m[num_cpu] = m;
+ if (range_m[num_cpu] > m * num_cpu) range_m[num_cpu] = m * num_cpu;
#else
range_m[num_cpu] = num_cpu * ((n + 15) & ~15);
- if (range_m[num_cpu] > n) range_m[num_cpu] = n;
+ if (range_m[num_cpu] > n * num_cpu) range_m[num_cpu] = n * num_cpu;
#endif
queue[num_cpu].mode = mode;
range_m[MAX_CPU_NUMBER - num_cpu - 1] = range_m[MAX_CPU_NUMBER - num_cpu] - width;
range_n[num_cpu] = num_cpu * (((n + 15) & ~15) + 16);
- if (range_n[num_cpu] > n) range_n[num_cpu] = n;
+ if (range_n[num_cpu] > n * num_cpu) range_n[num_cpu] = n * num_cpu;
queue[num_cpu].mode = mode;
queue[num_cpu].routine = sbmv_kernel;
range_m[num_cpu + 1] = range_m[num_cpu] + width;
range_n[num_cpu] = num_cpu * (((n + 15) & ~15) + 16);
- if (range_n[num_cpu] > n) range_n[num_cpu] = n;
+ if (range_n[num_cpu] > n * num_cpu) range_n[num_cpu] = n * num_cpu;
queue[num_cpu].mode = mode;
queue[num_cpu].routine = sbmv_kernel;
range_m[num_cpu + 1] = range_m[num_cpu] + width;
range_n[num_cpu] = num_cpu * ((n + 15) & ~15);
- if (range_n[num_cpu] > n) range_n[num_cpu] = n;
+ if (range_n[num_cpu] > n * num_cpu) range_n[num_cpu] = n * num_cpu;
queue[num_cpu].mode = mode;
queue[num_cpu].routine = sbmv_kernel;
range_m[MAX_CPU_NUMBER - num_cpu - 1] = range_m[MAX_CPU_NUMBER - num_cpu] - width;
range_n[num_cpu] = num_cpu * (((m + 15) & ~15) + 16);
- if (range_n[num_cpu] > m) range_n[num_cpu] = m;
+ if (range_n[num_cpu] > m * num_cpu) range_n[num_cpu] = m * num_cpu;
queue[num_cpu].mode = mode;
queue[num_cpu].routine = spmv_kernel;
range_m[num_cpu + 1] = range_m[num_cpu] + width;
range_n[num_cpu] = num_cpu * (((m + 15) & ~15) + 16);
- if (range_n[num_cpu] > m) range_n[num_cpu] = m;
+ if (range_n[num_cpu] > m * num_cpu) range_n[num_cpu] = m * num_cpu;
queue[num_cpu].mode = mode;
queue[num_cpu].routine = spmv_kernel;
range_m[num_cpu + 1] = range_m[num_cpu] + width;
range_n[num_cpu] = num_cpu * (((m + 15) & ~15) + 16);
- if (range_n[num_cpu] > m) range_n[num_cpu] = m;
+ if (range_n[num_cpu] > m * num_cpu) range_n[num_cpu] = m * num_cpu;
queue[MAX_CPU_NUMBER - num_cpu - 1].mode = mode;
queue[MAX_CPU_NUMBER - num_cpu - 1].routine = symv_kernel;
range_m[num_cpu + 1] = range_m[num_cpu] + width;
range_n[num_cpu] = num_cpu * (((m + 15) & ~15) + 16);
- if (range_n[num_cpu] > m) range_n[num_cpu] = m;
+ if (range_n[num_cpu] > m * num_cpu) range_n[num_cpu] = m * num_cpu;
queue[num_cpu].mode = mode;
queue[num_cpu].routine = symv_kernel;
range_m[MAX_CPU_NUMBER - num_cpu - 1] = range_m[MAX_CPU_NUMBER - num_cpu] - width;
range_n[num_cpu] = num_cpu * (((n + 15) & ~15) + 16);
- if (range_n[num_cpu] > n) range_n[num_cpu] = n;
+ if (range_n[num_cpu] > n * num_cpu) range_n[num_cpu] = n * num_cpu;
queue[num_cpu].mode = mode;
queue[num_cpu].routine = trmv_kernel;
range_m[num_cpu + 1] = range_m[num_cpu] + width;
range_n[num_cpu] = num_cpu * (((n + 15) & ~15) + 16);
- if (range_n[num_cpu] > n) range_n[num_cpu] = n;
+ if (range_n[num_cpu] > n * num_cpu) range_n[num_cpu] = n * num_cpu;
queue[num_cpu].mode = mode;
queue[num_cpu].routine = trmv_kernel;
range_m[num_cpu + 1] = range_m[num_cpu] + width;
range_n[num_cpu] = num_cpu * (((n + 15) & ~15) + 16);
- if (range_n[num_cpu] > n) range_n[num_cpu] = n;
+ if (range_n[num_cpu] > n * num_cpu) range_n[num_cpu] = n * num_cpu;
queue[num_cpu].mode = mode;
queue[num_cpu].routine = trmv_kernel;
range_m[MAX_CPU_NUMBER - num_cpu - 1] = range_m[MAX_CPU_NUMBER - num_cpu] - width;
range_n[num_cpu] = num_cpu * (((m + 15) & ~15) + 16);
- if (range_n[num_cpu] > m) range_n[num_cpu] = m;
+ if (range_n[num_cpu] > m * num_cpu) range_n[num_cpu] = m * num_cpu;
queue[num_cpu].mode = mode;
queue[num_cpu].routine = tpmv_kernel;
range_m[num_cpu + 1] = range_m[num_cpu] + width;
range_n[num_cpu] = num_cpu * (((m + 15) & ~15) + 16);
- if (range_n[num_cpu] > m) range_n[num_cpu] = m;
+ if (range_n[num_cpu] > m * num_cpu) range_n[num_cpu] = m * num_cpu;
queue[num_cpu].mode = mode;
queue[num_cpu].routine = tpmv_kernel;