1 /*********************************************************************/
2 /* Copyright 2009, 2010 The University of Texas at Austin. */
3 /* All rights reserved. */
5 /* Redistribution and use in source and binary forms, with or */
6 /* without modification, are permitted provided that the following */
7 /* conditions are met: */
9 /* 1. Redistributions of source code must retain the above */
10 /* copyright notice, this list of conditions and the following */
13 /* 2. Redistributions in binary form must reproduce the above */
14 /* copyright notice, this list of conditions and the following */
15 /* disclaimer in the documentation and/or other materials */
16 /* provided with the distribution. */
18 /* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
19 /* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
20 /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
21 /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
22 /* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
23 /* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
24 /* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
25 /* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
26 /* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
27 /* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
28 /* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
29 /* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
30 /* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
31 /* POSSIBILITY OF SUCH DAMAGE. */
33 /* The views and conclusions contained in the software and */
34 /* documentation are those of the authors and should not be */
35 /* interpreted as representing official policies, either expressed */
36 /* or implied, of The University of Texas at Austin. */
37 /*********************************************************************/
42 #ifdef FUNCTION_PROFILE
43 #include "functable.h"
48 #define ERROR_NAME "QSYRK "
50 #define ERROR_NAME "DSYRK "
52 #define ERROR_NAME "SSYRK "
57 #define ERROR_NAME "XSYRK "
59 #define ERROR_NAME "ZSYRK "
61 #define ERROR_NAME "CSYRK "
65 #define ERROR_NAME "XHERK "
67 #define ERROR_NAME "ZHERK "
69 #define ERROR_NAME "CHERK "
74 static int (*syrk[])(blas_arg_t *, BLASLONG *, BLASLONG *, FLOAT *, FLOAT *, BLASLONG) = {
76 SYRK_UN, SYRK_UC, SYRK_LN, SYRK_LC,
77 #if defined(SMP) && !defined(USE_SIMPLE_THREADED_LEVEL3)
78 SYRK_THREAD_UN, SYRK_THREAD_UC, SYRK_THREAD_LN, SYRK_THREAD_LC,
81 HERK_UN, HERK_UC, HERK_LN, HERK_LC,
82 #if defined(SMP) && !defined(USE_SIMPLE_THREADED_LEVEL3)
83 HERK_THREAD_UN, HERK_THREAD_UC, HERK_THREAD_LN, HERK_THREAD_LC,
90 void NAME(char *UPLO, char *TRANS,
91 blasint *N, blasint *K,
92 FLOAT *alpha, FLOAT *a, blasint *ldA,
93 FLOAT *beta, FLOAT *c, blasint *ldC){
95 char uplo_arg = *UPLO;
96 char trans_arg = *TRANS;
104 #ifdef USE_SIMPLE_THREADED_LEVEL3
107 int mode = BLAS_XDOUBLE | BLAS_REAL;
108 #elif defined(DOUBLE)
109 int mode = BLAS_DOUBLE | BLAS_REAL;
111 int mode = BLAS_SINGLE | BLAS_REAL;
115 int mode = BLAS_XDOUBLE | BLAS_COMPLEX;
116 #elif defined(DOUBLE)
117 int mode = BLAS_DOUBLE | BLAS_COMPLEX;
119 int mode = BLAS_SINGLE | BLAS_COMPLEX;
141 args.alpha = (void *)alpha;
142 args.beta = (void *)beta;
150 if (uplo_arg == 'U') uplo = 0;
151 if (uplo_arg == 'L') uplo = 1;
155 if (trans_arg == 'N') trans = 0;
156 if (trans_arg == 'T') trans = 1;
157 if (trans_arg == 'C') trans = 1;
160 if (trans_arg == 'N') trans = 0;
161 if (trans_arg == 'C') trans = 1;
163 if (trans_arg == 'N') trans = 0;
164 if (trans_arg == 'T') trans = 1;
170 if (trans & 1) nrowa = args.k;
174 if (args.ldc < MAX(1,args.n)) info = 10;
175 if (args.lda < MAX(1,nrowa)) info = 7;
176 if (args.k < 0) info = 4;
177 if (args.n < 0) info = 3;
178 if (trans < 0) info = 2;
179 if (uplo < 0) info = 1;
182 BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME));
188 void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Trans,
189 blasint n, blasint k,
190 #if !defined(COMPLEX) || defined(HEMM)
195 #if !defined(COMPLEX)
196 FLOAT *a, blasint lda,
198 void *va, blasint lda,
200 #if !defined(COMPLEX) || defined(HEMM)
205 #if !defined(COMPLEX)
206 FLOAT *c, blasint ldc) {
208 void *vc, blasint ldc) {
213 FLOAT* alpha = (FLOAT*) valpha;
214 FLOAT* beta = (FLOAT*) vbeta;
216 FLOAT* a = (FLOAT*) va;
217 FLOAT* c = (FLOAT*) vc;
228 #ifdef USE_SIMPLE_THREADED_LEVEL3
231 int mode = BLAS_XDOUBLE | BLAS_REAL;
232 #elif defined(DOUBLE)
233 int mode = BLAS_DOUBLE | BLAS_REAL;
235 int mode = BLAS_SINGLE | BLAS_REAL;
239 int mode = BLAS_XDOUBLE | BLAS_COMPLEX;
240 #elif defined(DOUBLE)
241 int mode = BLAS_DOUBLE | BLAS_COMPLEX;
243 int mode = BLAS_SINGLE | BLAS_COMPLEX;
260 #if !defined(COMPLEX) || defined(HEMM)
261 args.alpha = (void *)α
262 args.beta = (void *)β
264 args.alpha = (void *)alpha;
265 args.beta = (void *)beta;
272 if (order == CblasColMajor) {
273 if (Uplo == CblasUpper) uplo = 0;
274 if (Uplo == CblasLower) uplo = 1;
276 if (Trans == CblasNoTrans) trans = 0;
278 if (Trans == CblasTrans) trans = 1;
279 if (Trans == CblasConjNoTrans) trans = 0;
280 if (Trans == CblasConjTrans) trans = 1;
282 if (Trans == CblasTrans) trans = 1;
284 if (Trans == CblasConjTrans) trans = 1;
290 if (trans & 1) nrowa = args.k;
292 if (args.ldc < MAX(1,args.n)) info = 10;
293 if (args.lda < MAX(1,nrowa)) info = 7;
294 if (args.k < 0) info = 4;
295 if (args.n < 0) info = 3;
296 if (trans < 0) info = 2;
297 if (uplo < 0) info = 1;
300 if (order == CblasRowMajor) {
301 if (Uplo == CblasUpper) uplo = 1;
302 if (Uplo == CblasLower) uplo = 0;
304 if (Trans == CblasNoTrans) trans = 1;
306 if (Trans == CblasTrans) trans = 0;
307 if (Trans == CblasConjNoTrans) trans = 1;
308 if (Trans == CblasConjTrans) trans = 0;
310 if (Trans == CblasTrans) trans = 0;
312 if (Trans == CblasConjTrans) trans = 0;
318 if (trans & 1) nrowa = args.k;
320 if (args.ldc < MAX(1,args.n)) info = 10;
321 if (args.lda < MAX(1,nrowa)) info = 7;
322 if (args.k < 0) info = 4;
323 if (args.n < 0) info = 3;
324 if (trans < 0) info = 2;
325 if (uplo < 0) info = 1;
329 BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME));
335 if (args.n == 0) return;
339 FUNCTION_PROFILE_START();
341 buffer = (FLOAT *)blas_memory_alloc(0);
343 sa = (FLOAT *)((BLASLONG)buffer + GEMM_OFFSET_A);
344 sb = (FLOAT *)(((BLASLONG)sa + ((GEMM_P * GEMM_Q * COMPSIZE * SIZE + GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B);
347 #ifdef USE_SIMPLE_THREADED_LEVEL3
349 mode |= (BLAS_TRANSA_N | BLAS_TRANSB_T);
351 mode |= (BLAS_TRANSA_T | BLAS_TRANSB_N);
353 mode |= (uplo << BLAS_UPLO_SHIFT);
357 args.nthreads = num_cpu_avail(3);
359 if (args.nthreads == 1) {
362 (syrk[(uplo << 1) | trans ])(&args, NULL, NULL, sa, sb, 0);
368 #ifndef USE_SIMPLE_THREADED_LEVEL3
370 (syrk[4 | (uplo << 1) | trans ])(&args, NULL, NULL, sa, sb, 0);
374 syrk_thread(mode, &args, NULL, NULL, syrk[(uplo << 1) | trans ], sa, sb, args.nthreads);
381 blas_memory_free(buffer);
383 FUNCTION_PROFILE_END(COMPSIZE * COMPSIZE, args.n * args.k + args.n * args.n / 2, args.n * args.n * args.k);