1 /*********************************************************************/
2 /* Copyright 2009, 2010 The University of Texas at Austin. */
3 /* All rights reserved. */
5 /* Redistribution and use in source and binary forms, with or */
6 /* without modification, are permitted provided that the following */
7 /* conditions are met: */
9 /* 1. Redistributions of source code must retain the above */
10 /* copyright notice, this list of conditions and the following */
13 /* 2. Redistributions in binary form must reproduce the above */
14 /* copyright notice, this list of conditions and the following */
15 /* disclaimer in the documentation and/or other materials */
16 /* provided with the distribution. */
18 /* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
19 /* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
20 /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
21 /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
22 /* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
23 /* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
24 /* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
25 /* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
26 /* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
27 /* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
28 /* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
29 /* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
30 /* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
31 /* POSSIBILITY OF SUCH DAMAGE. */
33 /* The views and conclusions contained in the software and */
34 /* documentation are those of the authors and should not be */
35 /* interpreted as representing official policies, either expressed */
36 /* or implied, of The University of Texas at Austin. */
37 /*********************************************************************/
42 #ifdef FUNCTION_PROFILE
43 #include "functable.h"
48 #define ERROR_NAME "QSYMM "
50 #define ERROR_NAME "DSYMM "
52 #define ERROR_NAME "SSYMM "
58 #define ERROR_NAME "XSYMM "
60 #define ERROR_NAME "ZSYMM "
62 #define ERROR_NAME "CSYMM "
66 #define ERROR_NAME "XHEMM "
68 #define ERROR_NAME "ZHEMM "
70 #define ERROR_NAME "CHEMM "
76 #define ERROR_NAME "XSYMM3M "
78 #define ERROR_NAME "ZSYMM3M "
80 #define ERROR_NAME "CSYMM3M "
84 #define ERROR_NAME "XHEMM3M "
86 #define ERROR_NAME "ZHEMM3M "
88 #define ERROR_NAME "CHEMM3M "
98 #define MODE (BLAS_XDOUBLE | BLAS_REAL)
100 #define MODE (BLAS_DOUBLE | BLAS_REAL)
102 #define MODE (BLAS_SINGLE | BLAS_REAL)
106 #define MODE (BLAS_XDOUBLE | BLAS_COMPLEX)
107 #elif defined(DOUBLE)
108 #define MODE (BLAS_DOUBLE | BLAS_COMPLEX)
110 #define MODE (BLAS_SINGLE | BLAS_COMPLEX)
115 static int (*symm[])(blas_arg_t *, BLASLONG *, BLASLONG *, FLOAT *, FLOAT *, BLASLONG) = {
118 SYMM_LU, SYMM_LL, SYMM_RU, SYMM_RL,
119 #if defined(SMP) && !defined(USE_SIMPLE_THREADED_LEVEL3)
120 SYMM_THREAD_LU, SYMM_THREAD_LL, SYMM_THREAD_RU, SYMM_THREAD_RL,
123 HEMM_LU, HEMM_LL, HEMM_RU, HEMM_RL,
124 #if defined(SMP) && !defined(USE_SIMPLE_THREADED_LEVEL3)
125 HEMM_THREAD_LU, HEMM_THREAD_LL, HEMM_THREAD_RU, HEMM_THREAD_RL,
130 SYMM3M_LU, SYMM3M_LL, SYMM3M_RU, SYMM3M_RL,
131 #if defined(SMP) && !defined(USE_SIMPLE_THREADED_LEVEL3)
132 SYMM3M_THREAD_LU, SYMM3M_THREAD_LL, SYMM3M_THREAD_RU, SYMM3M_THREAD_RL,
135 HEMM3M_LU, HEMM3M_LL, HEMM3M_RU, HEMM3M_RL,
136 #if defined(SMP) && !defined(USE_SIMPLE_THREADED_LEVEL3)
137 HEMM3M_THREAD_LU, HEMM3M_THREAD_LL, HEMM3M_THREAD_RU, HEMM3M_THREAD_RL,
145 void NAME(char *SIDE, char *UPLO,
146 blasint *M, blasint *N,
147 FLOAT *alpha, FLOAT *a, blasint *ldA,
148 FLOAT *b, blasint *ldB,
149 FLOAT *beta, FLOAT *c, blasint *ldC){
151 char side_arg = *SIDE;
152 char uplo_arg = *UPLO;
159 #if defined(SMP) && !defined(NO_AFFINITY)
169 args.alpha = (void *)alpha;
170 args.beta = (void *)beta;
178 if (side_arg == 'L') side = 0;
179 if (side_arg == 'R') side = 1;
181 if (uplo_arg == 'U') uplo = 0;
182 if (uplo_arg == 'L') uplo = 1;
192 if (args.ldc < MAX(1, args.m)) info = 12;
201 if (args.ldb < MAX(1, args.m)) info = 9;
202 if (args.lda < MAX(1, args.m)) info = 7;
211 if (args.lda < MAX(1, args.m)) info = 9;
212 if (args.ldb < MAX(1, args.n)) info = 7;
215 if (args.n < 0) info = 4;
216 if (args.m < 0) info = 3;
217 if (uplo < 0) info = 2;
218 if (side < 0) info = 1;
221 BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME));
227 void CNAME(enum CBLAS_ORDER order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo,
228 blasint m, blasint n,
231 FLOAT *a, blasint lda,
232 FLOAT *b, blasint ldb,
234 FLOAT *c, blasint ldc) {
237 void *va, blasint lda,
238 void *vb, blasint ldb,
240 void *vc, blasint ldc) {
241 FLOAT *alpha = (FLOAT*) valpha;
242 FLOAT *beta = (FLOAT*) vbeta;
243 FLOAT *a = (FLOAT*) va;
244 FLOAT *b = (FLOAT*) vb;
245 FLOAT *c = (FLOAT*) vc;
255 #if defined(SMP) && !defined(NO_AFFINITY)
262 args.alpha = (void *)α
263 args.beta = (void *)β
265 args.alpha = (void *)alpha;
266 args.beta = (void *)beta;
276 if (order == CblasColMajor) {
277 if (Side == CblasLeft) side = 0;
278 if (Side == CblasRight) side = 1;
280 if (Uplo == CblasUpper) uplo = 0;
281 if (Uplo == CblasLower) uplo = 1;
288 if (args.ldc < MAX(1, args.m)) info = 12;
297 if (args.ldb < MAX(1, args.m)) info = 9;
298 if (args.lda < MAX(1, args.m)) info = 7;
307 if (args.lda < MAX(1, args.m)) info = 9;
308 if (args.ldb < MAX(1, args.n)) info = 7;
311 if (args.n < 0) info = 4;
312 if (args.m < 0) info = 3;
313 if (uplo < 0) info = 2;
314 if (side < 0) info = 1;
317 if (order == CblasRowMajor) {
318 if (Side == CblasLeft) side = 1;
319 if (Side == CblasRight) side = 0;
321 if (Uplo == CblasUpper) uplo = 1;
322 if (Uplo == CblasLower) uplo = 0;
329 if (args.ldc < MAX(1, args.m)) info = 12;
338 if (args.ldb < MAX(1, args.m)) info = 9;
339 if (args.lda < MAX(1, args.m)) info = 7;
348 if (args.lda < MAX(1, args.m)) info = 9;
349 if (args.ldb < MAX(1, args.n)) info = 7;
352 if (args.n < 0) info = 4;
353 if (args.m < 0) info = 3;
354 if (uplo < 0) info = 2;
355 if (side < 0) info = 1;
359 BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME));
365 if (args.m == 0 || args.n == 0) return;
369 FUNCTION_PROFILE_START();
371 buffer = (FLOAT *)blas_memory_alloc(0);
373 sa = (FLOAT *)((BLASLONG)buffer + GEMM_OFFSET_A);
374 sb = (FLOAT *)(((BLASLONG)sa + ((GEMM_P * GEMM_Q * COMPSIZE * SIZE + GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B);
378 args.nthreads = num_cpu_avail(3);
380 if (args.nthreads == 1) {
383 (symm[(side << 1) | uplo ])(&args, NULL, NULL, sa, sb, 0);
390 nodes = get_num_nodes();
394 args.nthreads /= nodes;
396 gemm_thread_mn(MODE, &args, NULL, NULL,
397 symm[4 | (side << 1) | uplo ], sa, sb, nodes);
402 #ifndef USE_SIMPLE_THREADED_LEVEL3
404 (symm[4 | (side << 1) | uplo ])(&args, NULL, NULL, sa, sb, 0);
408 GEMM_THREAD(MODE, &args, NULL, NULL, symm[(side << 1) | uplo ], sa, sb, args.nthreads);
419 blas_memory_free(buffer);
421 FUNCTION_PROFILE_END(COMPSIZE * COMPSIZE,
422 (!side)? args.m * (args.m / 2 + args.n) : args.n * (args.m + args.n / 2),
423 (!side)? 2 * args.m * args.m * args.n : 2 * args.m * args.n * args.n);