From: jiahaipeng Date: Sun, 11 Dec 2016 09:02:18 +0000 (+0000) Subject: modify the blas_l1_thread.c for support multi-threded for L1 fuction with return... X-Git-Tag: upstream/0.2.20^2~81 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=1aa1e6cb54ced625915bc7b8be59ae3f55f369e6;p=platform%2Fupstream%2Fopenblas.git modify the blas_l1_thread.c for support multi-threded for L1 fuction with return value --- diff --git a/driver/others/blas_l1_thread.c b/driver/others/blas_l1_thread.c index 83fc2688..e405c746 100644 --- a/driver/others/blas_l1_thread.c +++ b/driver/others/blas_l1_thread.c @@ -110,3 +110,74 @@ int blas_level1_thread(int mode, BLASLONG m, BLASLONG n, BLASLONG k, void *alpha return 0; } + +int blas_level1_thread_with_return_value(int mode, BLASLONG m, BLASLONG n, BLASLONG k, void *alpha, + void *a, BLASLONG lda, + void *b, BLASLONG ldb, + void *c, BLASLONG ldc, int (*function)(), int nthreads){ + + blas_queue_t queue[MAX_CPU_NUMBER]; + blas_arg_t args [MAX_CPU_NUMBER]; + + BLASLONG i, width, astride, bstride; + int num_cpu, calc_type; + + calc_type = (mode & BLAS_PREC) + ((mode & BLAS_COMPLEX) != 0) + 2; + + mode |= BLAS_LEGACY; + + for (i = 0; i < nthreads; i++) blas_queue_init(&queue[i]); + + num_cpu = 0; + i = m; + + while (i > 0){ + + /* Adjust Parameters */ + width = blas_quickdivide(i + nthreads - num_cpu - 1, + nthreads - num_cpu); + + i -= width; + if (i < 0) width = width + i; + + astride = width * lda; + + if (!(mode & BLAS_TRANSB_T)) { + bstride = width * ldb; + } else { + bstride = width; + } + + astride <<= calc_type; + bstride <<= calc_type; + + args[num_cpu].m = width; + args[num_cpu].n = n; + args[num_cpu].k = k; + args[num_cpu].a = (void *)a; + args[num_cpu].b = (void *)b; + args[num_cpu].c = (void *)((char *)c + num_cpu * sizeof(double)*2); + args[num_cpu].lda = lda; + args[num_cpu].ldb = ldb; + args[num_cpu].ldc = ldc; + args[num_cpu].alpha = alpha; + + queue[num_cpu].mode = mode; + queue[num_cpu].routine = function; + queue[num_cpu].args = &args[num_cpu]; + queue[num_cpu].next = &queue[num_cpu + 1]; + + a = (void *)((BLASULONG)a + astride); + b = (void *)((BLASULONG)b + bstride); + + num_cpu ++; + } + + if (num_cpu) { + queue[num_cpu - 1].next = NULL; + + exec_blas(num_cpu, queue); + } + + return 0; +}