1 /*********************************************************************/
2 /* Copyright 2009, 2010 The University of Texas at Austin. */
3 /* All rights reserved. */
5 /* Redistribution and use in source and binary forms, with or */
6 /* without modification, are permitted provided that the following */
7 /* conditions are met: */
9 /* 1. Redistributions of source code must retain the above */
10 /* copyright notice, this list of conditions and the following */
13 /* 2. Redistributions in binary form must reproduce the above */
14 /* copyright notice, this list of conditions and the following */
15 /* disclaimer in the documentation and/or other materials */
16 /* provided with the distribution. */
18 /* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
19 /* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
20 /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
21 /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
22 /* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
23 /* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
24 /* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
25 /* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
26 /* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
27 /* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
28 /* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
29 /* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
30 /* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
31 /* POSSIBILITY OF SUCH DAMAGE. */
33 /* The views and conclusions contained in the software and */
34 /* documentation are those of the authors and should not be */
35 /* interpreted as representing official policies, either expressed */
36 /* or implied, of The University of Texas at Austin. */
37 /*********************************************************************/
44 extern void goto_set_num_threads(int nthreads);
47 /* Basic Thread Debugging */
50 /* Thread Timing Debugging */
53 /* Global Parameter */
54 extern int blas_cpu_number;
55 extern int blas_num_threads;
56 extern int blas_omp_linked;
58 #define BLAS_LEGACY 0x8000U
59 #define BLAS_PTHREAD 0x4000U
60 #define BLAS_NODE 0x2000U
62 #define BLAS_PREC 0x000FU
63 #define BLAS_INT8 0x0000U
64 #define BLAS_BFLOAT16 0x0001U
65 #define BLAS_SINGLE 0x0002U
66 #define BLAS_DOUBLE 0x0003U
67 #define BLAS_XDOUBLE 0x0004U
68 #define BLAS_STOBF16 0x0008U
69 #define BLAS_DTOBF16 0x0009U
70 #define BLAS_BF16TOS 0x000AU
71 #define BLAS_BF16TOD 0x000BU
73 #define BLAS_REAL 0x0000U
74 #define BLAS_COMPLEX 0x1000U
76 #define BLAS_TRANSA 0x0030U /* 2bit */
77 #define BLAS_TRANSA_N 0x0000U
78 #define BLAS_TRANSA_T 0x0010U
79 #define BLAS_TRANSA_R 0x0020U
80 #define BLAS_TRANSA_C 0x0030U
81 #define BLAS_TRANSA_SHIFT 4
83 #define BLAS_TRANSB 0x0300U /* 2bit */
84 #define BLAS_TRANSB_N 0x0000U
85 #define BLAS_TRANSB_T 0x0100U
86 #define BLAS_TRANSB_R 0x0200U
87 #define BLAS_TRANSB_C 0x0300U
88 #define BLAS_TRANSB_SHIFT 8
90 #define BLAS_RSIDE 0x0400U
91 #define BLAS_RSIDE_SHIFT 10
92 #define BLAS_UPLO 0x0800U
93 #define BLAS_UPLO_SHIFT 11
95 #define BLAS_STATUS_NOTYET 0
96 #define BLAS_STATUS_QUEUED 1
97 #define BLAS_STATUS_RUNNING 2
98 #define BLAS_STATUS_FINISHED 4
100 typedef struct blas_queue {
111 struct blas_queue *next;
113 #if defined( __WIN32__) || defined(__CYGWIN32__) || defined(_WIN32) || defined(__CYGWIN__)
114 CRITICAL_SECTION lock;
117 pthread_mutex_t lock;
118 pthread_cond_t finished;
123 #ifdef CONSISTENT_FPCSR
124 unsigned int sse_mode, x87_mode;
137 extern int blas_server_avail;
139 static __inline int num_cpu_avail(int level) {
142 int openmp_nthreads=omp_get_max_threads();
146 if (blas_cpu_number == 1
149 if (openmp_nthreads == 1 || omp_in_parallel()
154 if (blas_cpu_number != openmp_nthreads) {
155 goto_set_num_threads(openmp_nthreads);
159 return blas_cpu_number;
163 static __inline void blas_queue_init(blas_queue_t *queue){
170 int blas_thread_init(void);
171 int BLASFUNC(blas_thread_shutdown)(void);
172 int exec_blas(BLASLONG, blas_queue_t *);
173 int exec_blas_async(BLASLONG, blas_queue_t *);
174 int exec_blas_async_wait(BLASLONG, blas_queue_t *);
177 int exec_blas_async(BLASLONG num_cpu, blas_param_t *param, pthread_t *);
178 int exec_blas_async_wait(BLASLONG num_cpu, pthread_t *blas_threads);
179 int exec_blas(BLASLONG num_cpu, blas_param_t *param, void *buffer);
184 int blas_level1_thread(int mode, BLASLONG m, BLASLONG n, BLASLONG k, void *alpha,
185 void *a, BLASLONG lda,
186 void *b, BLASLONG ldb,
187 void *c, BLASLONG ldc, int (*function)(), int threads);
189 int gemm_thread_m (int mode, blas_arg_t *, BLASLONG *, BLASLONG *, int (*function)(), void *, void *, BLASLONG);
191 int gemm_thread_n (int mode, blas_arg_t *, BLASLONG *, BLASLONG *, int (*function)(), void *, void *, BLASLONG);
193 int gemm_thread_mn(int mode, blas_arg_t *, BLASLONG *, BLASLONG *, int (*function)(), void *, void *, BLASLONG);
195 int gemm_thread_variable(int mode, blas_arg_t *, BLASLONG *, BLASLONG *, int (*function)(), void *, void *, BLASLONG, BLASLONG);
197 int trsm_thread(int mode, BLASLONG m, BLASLONG n,
198 double alpha_r, double alpha_i,
199 void *a, BLASLONG lda,
200 void *c, BLASLONG ldc, int (*function)(), void *buffer);
202 int syrk_thread(int mode, blas_arg_t *, BLASLONG *, BLASLONG *, int (*function)(), void *, void *, BLASLONG);
204 int getrf_thread(int mode, BLASLONG m, BLASLONG n, BLASLONG k,
205 void *offsetA, BLASLONG lda,
206 void *offsetB, BLASLONG jb,
207 void *ipiv, BLASLONG offset, int (*function)(), void *buffer);
209 #endif /* ENDIF ASSEMBLER */