From: wernsaar Date: Wed, 18 Jun 2014 13:04:11 +0000 (+0200) Subject: Ref #103: enhancement for small matrix dimensions X-Git-Tag: v0.2.10.rc1^2~5^2~15 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=a19d20900575b3f61e1155ceed3ebfef46a2fcfc;p=platform%2Fupstream%2Fopenblas.git Ref #103: enhancement for small matrix dimensions --- diff --git a/interface/gemm.c b/interface/gemm.c index 587175e..9ce7fe5 100644 --- a/interface/gemm.c +++ b/interface/gemm.c @@ -72,7 +72,7 @@ #endif #ifndef GEMM_MULTITHREAD_THRESHOLD -# define GEMM_MULTITHREAD_THRESHOLD 4 +#define GEMM_MULTITHREAD_THRESHOLD 4 #endif static int (*gemm[])(blas_arg_t *, BLASLONG *, BLASLONG *, FLOAT *, FLOAT *, BLASLONG) = { @@ -400,14 +400,63 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE TransA, enum CBLAS_TRANS mode |= (transa << BLAS_TRANSA_SHIFT); mode |= (transb << BLAS_TRANSB_SHIFT); - args.common = NULL; + int nthreads_max = num_cpu_avail(3); + int nthreads_avail = nthreads_max; - if(args.m <= GEMM_MULTITHREAD_THRESHOLD || args.n <= GEMM_MULTITHREAD_THRESHOLD - || args.k <=GEMM_MULTITHREAD_THRESHOLD){ - args.nthreads = 1; - }else{ - args.nthreads = num_cpu_avail(3); +#ifndef COMPLEX + double MNK = (double) args.m * (double) args.n * (double) args.k; + if ( MNK <= (1024.0 * (double) GEMM_MULTITHREAD_THRESHOLD) ) + nthreads_max = 1; + else + { + if ( MNK <= (65536.0 * (double) GEMM_MULTITHREAD_THRESHOLD) ) + { + nthreads_max = 4; + if ( args.m < 16 * GEMM_MULTITHREAD_THRESHOLD ) + { + nthreads_max = 2; + if ( args.m < 3 * GEMM_MULTITHREAD_THRESHOLD ) nthreads_max = 1; + if ( args.n < 1 * GEMM_MULTITHREAD_THRESHOLD ) nthreads_max = 1; + if ( args.k < 3 * GEMM_MULTITHREAD_THRESHOLD ) nthreads_max = 1; + } + else + { + if ( args.n <= 1 * GEMM_MULTITHREAD_THRESHOLD ) nthreads_max = 2; + } + } } +#else + double MNK = (double) args.m * (double) args.n * (double) args.k; + if ( MNK <= (256.0 * (double) GEMM_MULTITHREAD_THRESHOLD) ) + nthreads_max = 1; + else + { + if ( MNK <= (16384.0 * (double) GEMM_MULTITHREAD_THRESHOLD) ) + { + nthreads_max = 4; + if ( args.m < 3 * GEMM_MULTITHREAD_THRESHOLD ) + { + nthreads_max = 2; + if ( args.m <= 1 * GEMM_MULTITHREAD_THRESHOLD ) nthreads_max = 1; + if ( args.n < 1 * GEMM_MULTITHREAD_THRESHOLD ) nthreads_max = 1; + if ( args.k < 1 * GEMM_MULTITHREAD_THRESHOLD ) nthreads_max = 1; + } + else + { + if ( args.n < 2 * GEMM_MULTITHREAD_THRESHOLD ) nthreads_max = 2; + } + } + } + +#endif + args.common = NULL; + + if ( nthreads_max > nthreads_avail ) + args.nthreads = nthreads_avail; + else + args.nthreads = nthreads_max; + + if (args.nthreads == 1) { #endif