From 125610d23b35980524aad77696b3703804b3b810 Mon Sep 17 00:00:00 2001 From: wernsaar Date: Thu, 24 Jul 2014 18:43:31 +0200 Subject: [PATCH] allow to set custom value for ?GEMM_DEFAULT_UNROLL_MN, optimizations for syrk --- common_param.h | 16 ++++++++++++++++ driver/level3/level3_syrk_threaded.c | 8 ++++---- driver/level3/syrk_thread.c | 8 ++++---- kernel/setparam-ref.c | 30 ++++++++++++++++++++++++++---- param.h | 1 + 5 files changed, 51 insertions(+), 12 deletions(-) diff --git a/common_param.h b/common_param.h index 8632164..1c362e8 100644 --- a/common_param.h +++ b/common_param.h @@ -919,14 +919,22 @@ extern gotoblas_t *gotoblas; #define SGEMM_R SGEMM_DEFAULT_R #define SGEMM_UNROLL_M SGEMM_DEFAULT_UNROLL_M #define SGEMM_UNROLL_N SGEMM_DEFAULT_UNROLL_N +#ifdef SGEMM_DEFAULT_UNROLL_MN +#define SGEMM_UNROLL_MN SGEMM_DEFAULT_UNROLL_MN +#else #define SGEMM_UNROLL_MN MAX((SGEMM_UNROLL_M), (SGEMM_UNROLL_N)) +#endif #define DGEMM_P DGEMM_DEFAULT_P #define DGEMM_Q DGEMM_DEFAULT_Q #define DGEMM_R DGEMM_DEFAULT_R #define DGEMM_UNROLL_M DGEMM_DEFAULT_UNROLL_M #define DGEMM_UNROLL_N DGEMM_DEFAULT_UNROLL_N +#ifdef DGEMM_DEFAULT_UNROLL_MN +#define DGEMM_UNROLL_MN DGEMM_DEFAULT_UNROLL_MN +#else #define DGEMM_UNROLL_MN MAX((DGEMM_UNROLL_M), (DGEMM_UNROLL_N)) +#endif #define QGEMM_P QGEMM_DEFAULT_P #define QGEMM_Q QGEMM_DEFAULT_Q @@ -940,14 +948,22 @@ extern gotoblas_t *gotoblas; #define CGEMM_R CGEMM_DEFAULT_R #define CGEMM_UNROLL_M CGEMM_DEFAULT_UNROLL_M #define CGEMM_UNROLL_N CGEMM_DEFAULT_UNROLL_N +#ifdef CGEMM_DEFAULT_UNROLL_MN +#define CGEMM_UNROLL_MN CGEMM_DEFAULT_UNROLL_MN +#else #define CGEMM_UNROLL_MN MAX((CGEMM_UNROLL_M), (CGEMM_UNROLL_N)) +#endif #define ZGEMM_P ZGEMM_DEFAULT_P #define ZGEMM_Q ZGEMM_DEFAULT_Q #define ZGEMM_R ZGEMM_DEFAULT_R #define ZGEMM_UNROLL_M ZGEMM_DEFAULT_UNROLL_M #define ZGEMM_UNROLL_N ZGEMM_DEFAULT_UNROLL_N +#ifdef ZGEMM_DEFAULT_UNROLL_MN +#define ZGEMM_UNROLL_MN ZGEMM_DEFAULT_UNROLL_MN +#else #define ZGEMM_UNROLL_MN MAX((ZGEMM_UNROLL_M), (ZGEMM_UNROLL_N)) +#endif #define XGEMM_P XGEMM_DEFAULT_P #define XGEMM_Q XGEMM_DEFAULT_Q diff --git a/driver/level3/level3_syrk_threaded.c b/driver/level3/level3_syrk_threaded.c index 01c7b23..5119baa 100644 --- a/driver/level3/level3_syrk_threaded.c +++ b/driver/level3/level3_syrk_threaded.c @@ -538,10 +538,10 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO mask = MAX(QGEMM_UNROLL_M, QGEMM_UNROLL_N) - 1; #elif defined(DOUBLE) mode = BLAS_DOUBLE | BLAS_REAL; - mask = MAX(DGEMM_UNROLL_M, DGEMM_UNROLL_N) - 1; + mask = DGEMM_UNROLL_MN - 1; #else mode = BLAS_SINGLE | BLAS_REAL; - mask = MAX(SGEMM_UNROLL_M, SGEMM_UNROLL_N) - 1; + mask = SGEMM_UNROLL_MN - 1; #endif #else #ifdef XDOUBLE @@ -549,10 +549,10 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO mask = MAX(XGEMM_UNROLL_M, XGEMM_UNROLL_N) - 1; #elif defined(DOUBLE) mode = BLAS_DOUBLE | BLAS_COMPLEX; - mask = MAX(ZGEMM_UNROLL_M, ZGEMM_UNROLL_N) - 1; + mask = ZGEMM_UNROLL_MN - 1; #else mode = BLAS_SINGLE | BLAS_COMPLEX; - mask = MAX(CGEMM_UNROLL_M, CGEMM_UNROLL_N) - 1; + mask = CGEMM_UNROLL_MN - 1; #endif #endif diff --git a/driver/level3/syrk_thread.c b/driver/level3/syrk_thread.c index 0d9bdf2..94274be 100644 --- a/driver/level3/syrk_thread.c +++ b/driver/level3/syrk_thread.c @@ -57,10 +57,10 @@ int CNAME(int mode, blas_arg_t *arg, BLASLONG *range_m, BLASLONG *range_n, int ( switch (mode & BLAS_PREC) { case BLAS_SINGLE: - mask = MAX(SGEMM_UNROLL_M, SGEMM_UNROLL_N) - 1; + mask = SGEMM_UNROLL_MN - 1; break; case BLAS_DOUBLE: - mask = MAX(DGEMM_UNROLL_M, DGEMM_UNROLL_N) - 1; + mask = DGEMM_UNROLL_MN - 1; break; #ifdef EXPRECISION case BLAS_XDOUBLE: @@ -71,10 +71,10 @@ int CNAME(int mode, blas_arg_t *arg, BLASLONG *range_m, BLASLONG *range_n, int ( } else { switch (mode & BLAS_PREC) { case BLAS_SINGLE: - mask = MAX(CGEMM_UNROLL_M, CGEMM_UNROLL_N) - 1; + mask = CGEMM_UNROLL_MN - 1; break; case BLAS_DOUBLE: - mask = MAX(ZGEMM_UNROLL_M, ZGEMM_UNROLL_N) - 1; + mask = ZGEMM_UNROLL_MN - 1; break; #ifdef EXPRECISION case BLAS_XDOUBLE: diff --git a/kernel/setparam-ref.c b/kernel/setparam-ref.c index 5086420..b1beeae 100644 --- a/kernel/setparam-ref.c +++ b/kernel/setparam-ref.c @@ -54,7 +54,14 @@ gotoblas_t TABLE_NAME = { GEMM_DEFAULT_OFFSET_A, GEMM_DEFAULT_OFFSET_B, GEMM_DEFAULT_ALIGN, 0, 0, 0, - SGEMM_DEFAULT_UNROLL_M, SGEMM_DEFAULT_UNROLL_N, MAX(SGEMM_DEFAULT_UNROLL_M, SGEMM_DEFAULT_UNROLL_N), + SGEMM_DEFAULT_UNROLL_M, SGEMM_DEFAULT_UNROLL_N, +#ifdef SGEMM_DEFAULT_UNROLL_MN + SGEMM_DEFAULT_UNROLL_MN, +#else + MAX(SGEMM_DEFAULT_UNROLL_M, SGEMM_DEFAULT_UNROLL_N), +#endif + + #ifdef HAVE_EXCLUSIVE_CACHE 1, #else @@ -110,7 +117,12 @@ gotoblas_t TABLE_NAME = { #endif 0, 0, 0, - DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N, MAX(DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N), + DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N, +#ifdef DGEMM_DEFAULT_UNROLL_MN + DGEMM_DEFAULT_UNROLL_MN, +#else + MAX(DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N), +#endif damax_kTS, damin_kTS, dmax_kTS, dmin_kTS, idamax_kTS, idamin_kTS, idmax_kTS, idmin_kTS, @@ -214,7 +226,12 @@ gotoblas_t TABLE_NAME = { #endif 0, 0, 0, - CGEMM_DEFAULT_UNROLL_M, CGEMM_DEFAULT_UNROLL_N, MAX(CGEMM_DEFAULT_UNROLL_M, CGEMM_DEFAULT_UNROLL_N), + CGEMM_DEFAULT_UNROLL_M, CGEMM_DEFAULT_UNROLL_N, +#ifdef CGEMM_DEFAULT_UNROLL_MN + CGEMM_DEFAULT_UNROLL_MN, +#else + MAX(CGEMM_DEFAULT_UNROLL_M, CGEMM_DEFAULT_UNROLL_N), +#endif camax_kTS, camin_kTS, icamax_kTS, icamin_kTS, cnrm2_kTS, casum_kTS, ccopy_kTS, @@ -307,7 +324,12 @@ gotoblas_t TABLE_NAME = { #endif 0, 0, 0, - ZGEMM_DEFAULT_UNROLL_M, ZGEMM_DEFAULT_UNROLL_N, MAX(ZGEMM_DEFAULT_UNROLL_M, ZGEMM_DEFAULT_UNROLL_N), + ZGEMM_DEFAULT_UNROLL_M, ZGEMM_DEFAULT_UNROLL_N, +#ifdef ZGEMM_DEFAULT_UNROLL_MN + ZGEMM_DEFAULT_UNROLL_MN, +#else + MAX(ZGEMM_DEFAULT_UNROLL_M, ZGEMM_DEFAULT_UNROLL_N), +#endif zamax_kTS, zamin_kTS, izamax_kTS, izamin_kTS, znrm2_kTS, zasum_kTS, zcopy_kTS, diff --git a/param.h b/param.h index 880219b..863e83c 100644 --- a/param.h +++ b/param.h @@ -1206,6 +1206,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define ZGEMM_DEFAULT_UNROLL_N 2 #define XGEMM_DEFAULT_UNROLL_N 1 +#define DGEMM_DEFAULT_UNROLL_MN 16 #endif #ifdef ARCH_X86 -- 2.7.4