Fixing a performance bug in trsm_[LR].c.
authorfossum <fossum@us.ibm.com>
Mon, 14 Sep 2020 18:10:48 +0000 (13:10 -0500)
committerfossum <fossum@us.ibm.com>
Mon, 14 Sep 2020 18:10:48 +0000 (13:10 -0500)
driver/level3/trsm_L.c
driver/level3/trsm_R.c

index d8130ee7e1065057b5340b3630b7d470c4c0d6cc..d842efa9301c1c7dc00c5db45a80479d6f75c5b6 100644 (file)
@@ -131,7 +131,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
 
       for(jjs = js; jjs < js + min_j; jjs += min_jj){
        min_jj = min_j + js - jjs;
-       if (min_jj > GEMM_UNROLL_N*3) min_jj = GEMM_UNROLL_N*3;
+       if (min_jj >= GEMM_UNROLL_N*3) min_jj = GEMM_UNROLL_N*3;
        else
          if (min_jj > GEMM_UNROLL_N) min_jj = GEMM_UNROLL_N;
 
@@ -197,7 +197,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
 
       for(jjs = js; jjs < js + min_j; jjs += min_jj){
        min_jj = min_j + js - jjs;
-       if (min_jj > GEMM_UNROLL_N*3) min_jj = GEMM_UNROLL_N*3;
+       if (min_jj >= GEMM_UNROLL_N*3) min_jj = GEMM_UNROLL_N*3;
        else
          if (min_jj > GEMM_UNROLL_N) min_jj = GEMM_UNROLL_N;
 
index f6a57f93fd5415150b0faf6bce1fac7288c8815e..f76a8f7f34d96cc98a78f028dc9cdb1c17ecb8ed 100644 (file)
@@ -126,7 +126,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
 
       for(jjs = js; jjs < js + min_j; jjs += min_jj){
        min_jj = min_j + js - jjs;
-       if (min_jj > GEMM_UNROLL_N*3) min_jj = GEMM_UNROLL_N*3;
+       if (min_jj >= GEMM_UNROLL_N*3) min_jj = GEMM_UNROLL_N*3;
        else
          if (min_jj > GEMM_UNROLL_N) min_jj = GEMM_UNROLL_N;
 
@@ -182,7 +182,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
 
       for(jjs = 0; jjs < min_j - min_l - ls + js; jjs += min_jj){
        min_jj = min_j - min_l - ls + js - jjs;
-       if (min_jj > GEMM_UNROLL_N*3) min_jj = GEMM_UNROLL_N*3;
+       if (min_jj >= GEMM_UNROLL_N*3) min_jj = GEMM_UNROLL_N*3;
        else
          if (min_jj > GEMM_UNROLL_N) min_jj = GEMM_UNROLL_N;
 
@@ -243,7 +243,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
 
       for(jjs = js; jjs < js + min_j; jjs += min_jj){
        min_jj = min_j + js - jjs;
-       if (min_jj > GEMM_UNROLL_N*3) min_jj = GEMM_UNROLL_N*3;
+       if (min_jj >= GEMM_UNROLL_N*3) min_jj = GEMM_UNROLL_N*3;
        else
          if (min_jj > GEMM_UNROLL_N) min_jj = GEMM_UNROLL_N;
 
@@ -304,7 +304,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
 
       for(jjs = 0; jjs < min_j - js + ls; jjs += min_jj){
        min_jj = min_j - js + ls - jjs;
-       if (min_jj > GEMM_UNROLL_N*3) min_jj = GEMM_UNROLL_N*3;
+       if (min_jj >= GEMM_UNROLL_N*3) min_jj = GEMM_UNROLL_N*3;
        else
          if (min_jj > GEMM_UNROLL_N) min_jj = GEMM_UNROLL_N;