From cda81cfae0e3dc18b1c2e9d05d6e0f8e1bec3917 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Sat, 19 Jan 2019 00:10:01 +0100 Subject: [PATCH] Shift transition to multithreading towards larger matrix sizes See #1886 and JuliaRobotics issue 500. trsm benchmarks on Haswell and Zen showed that with these values performance is roughly doubled for matrix sizes between 8x8 and 14x14, and still 10 to 20 percent better near the new cutoff at 32x32. --- interface/trsm.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/interface/trsm.c b/interface/trsm.c index 5c2750e..faec03a 100644 --- a/interface/trsm.c +++ b/interface/trsm.c @@ -81,6 +81,12 @@ #endif #endif +#ifndef COMPLEX +#define SMP_FACTOR 8 +#else +#define SMP_FACTOR 4 +#endif + static int (*trsm[])(blas_arg_t *, BLASLONG *, BLASLONG *, FLOAT *, FLOAT *, BLASLONG) = { #ifndef TRMM TRSM_LNUU, TRSM_LNUN, TRSM_LNLU, TRSM_LNLN, @@ -366,10 +372,10 @@ void CNAME(enum CBLAS_ORDER order, mode |= (trans << BLAS_TRANSA_SHIFT); mode |= (side << BLAS_RSIDE_SHIFT); - if ( args.m < 2*GEMM_MULTITHREAD_THRESHOLD ) + if ( args.m < SMP_FACTOR * GEMM_MULTITHREAD_THRESHOLD ) args.nthreads = 1; else - if ( args.n < 2*GEMM_MULTITHREAD_THRESHOLD ) + if ( args.n < SMP_FACTOR * GEMM_MULTITHREAD_THRESHOLD ) args.nthreads = 1; else args.nthreads = num_cpu_avail(3); -- 2.7.4