From dd6212e6849cfb4f0f4847c9f0a72b542bf46d59 Mon Sep 17 00:00:00 2001 From: Werner Saar Date: Tue, 10 Jan 2017 14:05:07 +0100 Subject: [PATCH] updated some level1 funcions, that are not thread save --- interface/asum.c | 116 ------------------------------------------------------- interface/copy.c | 50 ------------------------ interface/dot.c | 97 ---------------------------------------------- interface/rot.c | 41 -------------------- 4 files changed, 304 deletions(-) diff --git a/interface/asum.c b/interface/asum.c index e6fcf4d..1393989 100644 --- a/interface/asum.c +++ b/interface/asum.c @@ -42,24 +42,6 @@ #include "functable.h" #endif -#ifdef SMP -static int asum_threads (BLASLONG m, BLASLONG n, BLASLONG k, float alpha, - float* x, BLASLONG incx, float* y, BLASLONG incy, float* z, BLASLONG incz) -{ -#ifndef CBLAS - FLOATRET ret; - ret = (FLOATRET)ASUM_K(m, x, incx); - *((double *)z) = (double)ret; -#else - FLOAT ret; - ret = ASUM_K(m, x, incx); - *((double *)z) = (double)ret; -#endif - - return 0; -} -#endif - #ifndef CBLAS FLOATRET NAME(blasint *N, FLOAT *x, blasint *INCX){ @@ -70,62 +52,14 @@ FLOATRET NAME(blasint *N, FLOAT *x, blasint *INCX){ PRINT_DEBUG_NAME; -#ifdef SMP - int i; - int mode, nthreads; - double mid_result= 0.0; - FLOAT dummyalpha[2] = {ZERO, ZERO}; - double *buffer = (double*)blas_memory_alloc(0); -#endif - if (n <= 0) return 0; IDEBUG_START; FUNCTION_PROFILE_START(); -#ifdef SMP - nthreads = num_cpu_avail(1); - - //Temporarily work-around the low performance issue with small imput size & - //multithreads. - if (n <= 100000) - nthreads = 1; - - if (nthreads == 1) { -#endif - ret = (FLOATRET)ASUM_K(n, x, incx); -#ifdef SMP - } else { - -#ifndef DOUBLE -#ifndef COMPLEX - mode = BLAS_SINGLE | BLAS_REAL; -#else - mode = BLAS_SINGLE | BLAS_COMPLEX; -#endif -#else -#ifndef COMPLEX - mode = BLAS_DOUBLE | BLAS_REAL; -#else - mode = BLAS_DOUBLE | BLAS_COMPLEX; -#endif -#endif - - blas_level1_thread_with_return_value(mode, n, 0, 0, dummyalpha, - x, incx, NULL, 0, buffer, 0, (void *)asum_threads, nthreads); - - for(i = 0; i < nthreads; i++) - mid_result += buffer[2*i]; - - ret = (FLOATRET)mid_result; - } - - blas_memory_free(buffer); -#endif - FUNCTION_PROFILE_END(COMPSIZE, n, n); IDEBUG_END; @@ -141,68 +75,18 @@ FLOAT CNAME(blasint n, FLOAT *x, blasint incx){ PRINT_DEBUG_CNAME; -#ifdef SMP - int i; - int mode, nthreads; - double mid_result= 0.0; - FLOAT dummyalpha[2] = {ZERO, ZERO}; - - double *buffer = (double*)blas_memory_alloc(0); -#endif - if (n <= 0) return 0; IDEBUG_START; FUNCTION_PROFILE_START(); -#ifdef SMP - nthreads = num_cpu_avail(1); - - //Temporarily work-around the low performance issue with small imput size & - //multithreads. - if (n <= 100000) - nthreads = 1; - - if (nthreads == 1) { -#endif - ret = ASUM_K(n, x, incx); -#ifdef SMP - } else { - -#ifndef DOUBLE -#ifndef COMPLEX - mode = BLAS_SINGLE | BLAS_REAL; -#else - mode = BLAS_SINGLE | BLAS_COMPLEX; -#endif -#else -#ifndef COMPLEX - mode = BLAS_DOUBLE | BLAS_REAL; -#else - mode = BLAS_DOUBLE | BLAS_COMPLEX; -#endif -#endif - - blas_level1_thread_with_return_value(mode, n, 0, 0, dummyalpha, - x, incx, NULL, 0, buffer, 0, (void *)asum_threads, nthreads); - - for(i = 0; i < nthreads; i++) - mid_result += buffer[2*i]; - - ret = (FLOAT)mid_result; - } - - blas_memory_free(buffer); -#endif - FUNCTION_PROFILE_END(COMPSIZE, n, n); IDEBUG_END; - return ret; } diff --git a/interface/copy.c b/interface/copy.c index 7452c58..3fb2182 100644 --- a/interface/copy.c +++ b/interface/copy.c @@ -42,17 +42,6 @@ #include "functable.h" #endif -#ifdef SMP - -static int copy_threads (BLASLONG m, BLASLONG n, BLASLONG k, float alpha, - float* x, BLASLONG incx, float* y, BLASLONG incy, float* z, BLASLONG incz) -{ - COPY_K(m, x, incx, y, incy); - return 0; -} - -#endif - #ifndef CBLAS void NAME(blasint *N, FLOAT *x, blasint *INCX, FLOAT *y, blasint *INCY){ @@ -71,11 +60,6 @@ void CNAME(blasint n, FLOAT *x, blasint incx, FLOAT *y, blasint incy){ #endif -#ifdef SMP - int mode, nthreads; - FLOAT dummyalpha[2] = {ZERO, ZERO}; -#endif - if (n <= 0) return; IDEBUG_START; @@ -85,42 +69,8 @@ void CNAME(blasint n, FLOAT *x, blasint incx, FLOAT *y, blasint incy){ if (incx < 0) x -= (n - 1) * incx * COMPSIZE; if (incy < 0) y -= (n - 1) * incy * COMPSIZE; -#ifdef SMP - nthreads = num_cpu_avail(1); - - //Temporarily work-around the low performance issue with small imput size & - //multithreads. - if (n <= 100000) - nthreads = 1; - - if (nthreads == 1) { -#endif - COPY_K(n, x, incx, y, incy); -#ifdef SMP - } else { - -#ifndef DOUBLE -#ifndef COMPLEX - mode = BLAS_SINGLE | BLAS_REAL; -#else - mode = BLAS_SINGLE | BLAS_COMPLEX; -#endif -#else -#ifndef COMPLEX - mode = BLAS_DOUBLE | BLAS_REAL; -#else - mode = BLAS_DOUBLE | BLAS_COMPLEX; -#endif -#endif - - blas_level1_thread(mode, n, 0, 0, dummyalpha, - x, incx, y, incy, NULL, 0, (void *)copy_threads, nthreads); - - } -#endif - FUNCTION_PROFILE_END(COMPSIZE, COMPSIZE * n, 0); IDEBUG_END; diff --git a/interface/dot.c b/interface/dot.c index 1ef9b34..3a91840 100644 --- a/interface/dot.c +++ b/interface/dot.c @@ -42,24 +42,6 @@ #include "functable.h" #endif -#ifdef SMP -static int dot_threads (BLASLONG m, BLASLONG n, BLASLONG k, float alpha, - float* x, BLASLONG incx, float* y, BLASLONG incy, float* z, BLASLONG incz) -{ -#ifndef CBLAS - FLOATRET ret; - ret = (FLOATRET)DOTU_K(m, x, incx, y, incy); - *((double *)z) = (double)ret; -#else - FLOAT ret; - ret = DOTU_K(n, x, incx, y, incy); - *((double *)z) = (double)ret; -#endif - - return 0; -} -#endif - #ifndef CBLAS FLOATRET NAME(blasint *N, FLOAT *x, blasint *INCX, FLOAT *y, blasint *INCY){ @@ -71,14 +53,6 @@ FLOATRET NAME(blasint *N, FLOAT *x, blasint *INCX, FLOAT *y, blasint *INCY){ PRINT_DEBUG_NAME; -#ifdef SMP - int i; - int mode, nthreads; - double mid_result= 0.0; - FLOAT dummyalpha[2] = {ZERO, ZERO}; - double *buffer = (double*)blas_memory_alloc(0); -#endif - if (n <= 0) return 0.; IDEBUG_START; @@ -88,40 +62,8 @@ FLOATRET NAME(blasint *N, FLOAT *x, blasint *INCX, FLOAT *y, blasint *INCY){ if (incx < 0) x -= (n - 1) * incx; if (incy < 0) y -= (n - 1) * incy; -#ifdef SMP - nthreads = num_cpu_avail(1); - - //Temporarily work-around the low performance issue with small imput size & - //multithreads. - if (n <= 100000) - nthreads = 1; - - if (nthreads == 1) { -#endif - ret = (FLOATRET)DOTU_K(n, x, incx, y, incy); -#ifdef SMP - } else { - -#ifndef DOUBLE - mode = BLAS_SINGLE | BLAS_REAL; -#else - mode = BLAS_DOUBLE | BLAS_REAL; -#endif - - blas_level1_thread_with_return_value(mode, n, 0, 0, dummyalpha, - x, incx, y, incy, buffer, 0, (void *)dot_threads, nthreads); - - for(i = 0; i < nthreads; i++) - mid_result += buffer[2*i]; - - ret = (FLOATRET)mid_result; - } - - blas_memory_free(buffer); -#endif - FUNCTION_PROFILE_END(1, 2 * n, 2 * n); IDEBUG_END; @@ -137,14 +79,6 @@ FLOAT CNAME(blasint n, FLOAT *x, blasint incx, FLOAT *y, blasint incy){ PRINT_DEBUG_CNAME; -#ifdef SMP - int i; - int mode, nthreads; - double mid_result= 0.0; - FLOAT dummyalpha[2] = {ZERO, ZERO}; - - double *buffer = (double*)blas_memory_alloc(0); -#endif if (n <= 0) return 0.; IDEBUG_START; @@ -154,39 +88,8 @@ FLOAT CNAME(blasint n, FLOAT *x, blasint incx, FLOAT *y, blasint incy){ if (incx < 0) x -= (n - 1) * incx; if (incy < 0) y -= (n - 1) * incy; -#ifdef SMP - nthreads = num_cpu_avail(1); - - //Temporarily work-around the low performance issue with small imput size & - //multithreads. - if (n <= 100000) - nthreads = 1; - - if (nthreads == 1) { -#endif ret = DOTU_K(n, x, incx, y, incy); -#ifdef SMP - } else { - -#ifndef DOUBLE - mode = BLAS_SINGLE | BLAS_REAL; -#else - mode = BLAS_DOUBLE | BLAS_REAL; -#endif - - blas_level1_thread_with_return_value(mode, n, 0, 0, dummyalpha, - x, incx, y, incy, buffer, 0, (void *)dot_threads, nthreads); - - for(i = 0; i < nthreads; i++) - mid_result += buffer[2*i]; - - ret = (FLOAT)mid_result; - } - - blas_memory_free(buffer); -#endif - FUNCTION_PROFILE_END(1, 2 * n, 2 * n); IDEBUG_END; diff --git a/interface/rot.c b/interface/rot.c index ae6b49c..125275a 100644 --- a/interface/rot.c +++ b/interface/rot.c @@ -42,16 +42,6 @@ #include "functable.h" #endif -#ifdef SMP -static int rot_threads (BLASLONG m, BLASLONG n, BLASLONG k, float alpha, - float* x, BLASLONG incx, float* y, BLASLONG incy, float* z, BLASLONG incz) -{ - ROT_K(m, x, incx, y, incy, n, k); - return 0; -} - -#endif - #ifndef CBLAS void NAME(blasint *N, FLOAT *x, blasint *INCX, FLOAT *y, blasint *INCY, FLOAT *C, FLOAT *S){ @@ -72,11 +62,6 @@ void CNAME(blasint n, FLOAT *x, blasint incx, FLOAT *y, blasint incy, FLOAT c, F #endif -#ifdef SMP - int mode, nthreads; - FLOAT dummyalpha[2] = {ZERO, ZERO}; -#endif - if (n <= 0) return; IDEBUG_START; @@ -86,34 +71,8 @@ void CNAME(blasint n, FLOAT *x, blasint incx, FLOAT *y, blasint incy, FLOAT c, F if (incx < 0) x -= (n - 1) * incx; if (incy < 0) y -= (n - 1) * incy; -#ifdef SMP - nthreads = num_cpu_avail(1); - - //Temporarily work-around the low performance issue with small imput size & - //multithreads. - if (n <= 100000) - nthreads = 1; - - if (nthreads == 1) { -#endif - ROT_K(n, x, incx, y, incy, c, s); -#ifdef SMP - } else { - -#ifndef DOUBLE - mode = BLAS_SINGLE | BLAS_REAL; -#else - mode = BLAS_DOUBLE | BLAS_REAL; -#endif - - blas_level1_thread(mode, n, c, s, dummyalpha, - x, incx, y, incy, NULL, 0, (void *)rot_threads, nthreads); - - } -#endif - FUNCTION_PROFILE_END(1, n, n); IDEBUG_END; -- 2.7.4