From: Werner Saar Date: Mon, 9 Jan 2017 11:57:26 +0000 (+0100) Subject: prepared lapack/getrf functions for UNROLL values, that are not a power of two X-Git-Tag: upstream/0.2.20^2~83^2 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=3e1bbd6b5f01a1c404e2c9e4703c02cf671384eb;p=platform%2Fupstream%2Fopenblas.git prepared lapack/getrf functions for UNROLL values, that are not a power of two --- diff --git a/lapack/getrf/getrf_parallel.c b/lapack/getrf/getrf_parallel.c index 8fdf769..4f8febf 100644 --- a/lapack/getrf/getrf_parallel.c +++ b/lapack/getrf/getrf_parallel.c @@ -239,7 +239,7 @@ static int inner_advanced_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG * for (i = 1; i < DIVIDE_RATE; i++) { - buffer[i] = buffer[i - 1] + GEMM_Q * ((div_n + GEMM_UNROLL_N - 1) & ~(GEMM_UNROLL_N - 1)) * COMPSIZE; + buffer[i] = buffer[i - 1] + GEMM_Q * (((div_n + GEMM_UNROLL_N - 1)/GEMM_UNROLL_N) * GEMM_UNROLL_N - 1) * COMPSIZE; } for (xxx = n_from, bufferside = 0; xxx < n_to; xxx += div_n, bufferside ++) { @@ -303,7 +303,7 @@ static int inner_advanced_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG * min_i = GEMM_P; } else if (min_i > GEMM_P) { - min_i = ((min_i + 1) / 2 + GEMM_UNROLL_M - 1) & ~(GEMM_UNROLL_M - 1); + min_i = (((min_i + 1) / 2 + GEMM_UNROLL_M - 1)/GEMM_UNROLL_M) * GEMM_UNROLL_M; } ICOPY_OPERATION(k, min_i, a, lda, 0, is, sa); @@ -420,7 +420,7 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, mn = MIN(m, n); - init_bk = (mn / 2 + GEMM_UNROLL_N - 1) & ~(GEMM_UNROLL_N - 1); + init_bk = ((mn / 2 + GEMM_UNROLL_N - 1)/GEMM_UNROLL_N) * GEMM_UNROLL_N; if (init_bk > GEMM_Q) init_bk = GEMM_Q; if (init_bk <= GEMM_UNROLL_N) { @@ -459,11 +459,11 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, while (is < mn) { - width = (FORMULA1(m, n, is, bk, args -> nthreads) + GEMM_UNROLL_N - 1) & ~(GEMM_UNROLL_N - 1); + width = ((FORMULA1(m, n, is, bk, args -> nthreads) + GEMM_UNROLL_N - 1)/GEMM_UNROLL_N) * GEMM_UNROLL_N; if (width > mn - is - bk) width = mn - is - bk; if (width < bk) { - next_bk = (FORMULA2(m, n, is, bk, args -> nthreads) + GEMM_UNROLL_N) & ~(GEMM_UNROLL_N - 1); + next_bk = ((FORMULA2(m, n, is, bk, args -> nthreads) + GEMM_UNROLL_N)/GEMM_UNROLL_N) * GEMM_UNROLL_N; if (next_bk > bk) next_bk = bk; @@ -594,11 +594,11 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, bk = mn - is; if (bk > next_bk) bk = next_bk; - width = (FORMULA1(m, n, is, bk, args -> nthreads) + GEMM_UNROLL_N - 1) & ~(GEMM_UNROLL_N - 1); + width = ((FORMULA1(m, n, is, bk, args -> nthreads) + GEMM_UNROLL_N - 1)/GEMM_UNROLL_N) * GEMM_UNROLL_N; if (width > mn - is - bk) width = mn - is - bk; if (width < bk) { - next_bk = (FORMULA2(m, n, is, bk, args -> nthreads) + GEMM_UNROLL_N) & ~(GEMM_UNROLL_N - 1); + next_bk = ((FORMULA2(m, n, is, bk, args -> nthreads) + GEMM_UNROLL_N)/GEMM_UNROLL_N) * GEMM_UNROLL_N; if (next_bk > bk) next_bk = bk; } @@ -676,7 +676,7 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, mn = MIN(m, n); - init_bk = (mn / 2 + GEMM_UNROLL_N - 1) & ~(GEMM_UNROLL_N - 1); + init_bk = ((mn / 2 + GEMM_UNROLL_N - 1)/GEMM_UNROLL_N) * GEMM_UNROLL_N; if (init_bk > GEMM_Q) init_bk = GEMM_Q; if (init_bk <= GEMM_UNROLL_N) { @@ -685,14 +685,14 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, } width = FORMULA1(m, n, 0, init_bk, args -> nthreads); - width = (width + GEMM_UNROLL_N - 1) & ~(GEMM_UNROLL_N - 1); + width = ((width + GEMM_UNROLL_N - 1)/GEMM_UNROLL_N) * GEMM_UNROLL_N; if (width > n - init_bk) width = n - init_bk; if (width < init_bk) { BLASLONG temp; temp = FORMULA2(m, n, 0, init_bk, args -> nthreads); - temp = (temp + GEMM_UNROLL_N - 1) & ~(GEMM_UNROLL_N - 1); + temp = ((temp + GEMM_UNROLL_N - 1)/GEMM_UNROLL_N) * GEMM_UNROLL_N; if (temp < GEMM_UNROLL_N) temp = GEMM_UNROLL_N; if (temp < init_bk) init_bk = temp; @@ -717,12 +717,12 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, while (is < mn) { width = FORMULA1(m, n, is, bk, args -> nthreads); - width = (width + GEMM_UNROLL_N - 1) & ~(GEMM_UNROLL_N - 1); + width = ((width + GEMM_UNROLL_N - 1)/GEMM_UNROLL_N) * GEMM_UNROLL_N; if (width < bk) { next_bk = FORMULA2(m, n, is, bk, args -> nthreads); - next_bk = (next_bk + GEMM_UNROLL_N - 1) & ~(GEMM_UNROLL_N - 1); + next_bk = ((next_bk + GEMM_UNROLL_N - 1)/GEMM_UNROLL_N) * GEMM_UNROLL_N; if (next_bk > bk) next_bk = bk; #if 0 @@ -852,11 +852,11 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, if (bk > next_bk) bk = next_bk; width = FORMULA1(m, n, is, bk, args -> nthreads); - width = (width + GEMM_UNROLL_N - 1) & ~(GEMM_UNROLL_N - 1); + width = ((width + GEMM_UNROLL_N - 1)/GEMM_UNROLL_N) * GEMM_UNROLL_N; if (width < bk) { next_bk = FORMULA2(m, n, is, bk, args -> nthreads); - next_bk = (next_bk + GEMM_UNROLL_N - 1) & ~(GEMM_UNROLL_N - 1); + next_bk = ((next_bk + GEMM_UNROLL_N - 1)/GEMM_UNROLL_N) * GEMM_UNROLL_N; if (next_bk > bk) next_bk = bk; #if 0 diff --git a/lapack/getrf/getrf_parallel_omp.c b/lapack/getrf/getrf_parallel_omp.c index 6b8cbda..79d6f51 100644 --- a/lapack/getrf/getrf_parallel_omp.c +++ b/lapack/getrf/getrf_parallel_omp.c @@ -170,7 +170,7 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, mn = MIN(m, n); - blocking = (mn / 2 + GEMM_UNROLL_N - 1) & ~(GEMM_UNROLL_N - 1); + blocking = ((mn / 2 + GEMM_UNROLL_N - 1)/GEMM_UNROLL_N) * GEMM_UNROLL_N; if (blocking > GEMM_Q) blocking = GEMM_Q; #ifdef POWER8 diff --git a/lapack/getrf/getrf_single.c b/lapack/getrf/getrf_single.c index 9f0f36b..581feeb 100644 --- a/lapack/getrf/getrf_single.c +++ b/lapack/getrf/getrf_single.c @@ -74,7 +74,7 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, mn = MIN(m, n); - blocking = (mn / 2 + GEMM_UNROLL_N - 1) & ~(GEMM_UNROLL_N - 1); + blocking = ((mn / 2 + GEMM_UNROLL_N - 1)/GEMM_UNROLL_N) * GEMM_UNROLL_N; if (blocking > GEMM_Q) blocking = GEMM_Q; #ifdef POWER8