1 /*********************************************************************/
2 /* Copyright 2009, 2010 The University of Texas at Austin. */
3 /* All rights reserved. */
5 /* Redistribution and use in source and binary forms, with or */
6 /* without modification, are permitted provided that the following */
7 /* conditions are met: */
9 /* 1. Redistributions of source code must retain the above */
10 /* copyright notice, this list of conditions and the following */
13 /* 2. Redistributions in binary form must reproduce the above */
14 /* copyright notice, this list of conditions and the following */
15 /* disclaimer in the documentation and/or other materials */
16 /* provided with the distribution. */
18 /* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
19 /* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
20 /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
21 /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
22 /* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
23 /* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
24 /* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
25 /* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
26 /* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
27 /* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
28 /* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
29 /* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
30 /* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
31 /* POSSIBILITY OF SUCH DAMAGE. */
33 /* The views and conclusions contained in the software and */
34 /* documentation are those of the authors and should not be */
35 /* interpreted as representing official policies, either expressed */
36 /* or implied, of The University of Texas at Austin. */
37 /*********************************************************************/
42 #ifdef FUNCTION_PROFILE
43 #include "functable.h"
48 #define ERROR_NAME "QSYR2K"
50 #define ERROR_NAME "DSYR2K"
52 #define ERROR_NAME "SSYR2K"
57 #define ERROR_NAME "XSYR2K"
59 #define ERROR_NAME "ZSYR2K"
61 #define ERROR_NAME "CSYR2K"
65 #define ERROR_NAME "XHER2K"
67 #define ERROR_NAME "ZHER2K"
69 #define ERROR_NAME "CHER2K"
74 static int (*syr2k[])(blas_arg_t *, BLASLONG *, BLASLONG *, FLOAT *, FLOAT *, BLASLONG) = {
76 SYR2K_UN, SYR2K_UC, SYR2K_LN, SYR2K_LC,
78 HER2K_UN, HER2K_UC, HER2K_LN, HER2K_LC,
84 void NAME(char *UPLO, char *TRANS,
85 blasint *N, blasint *K,
86 FLOAT *alpha, FLOAT *a, blasint *ldA,
87 FLOAT *b, blasint *ldB,
88 FLOAT *beta, FLOAT *c, blasint *ldC){
90 char uplo_arg = *UPLO;
91 char trans_arg = *TRANS;
101 int mode = BLAS_XDOUBLE | BLAS_REAL;
102 #elif defined(DOUBLE)
103 int mode = BLAS_DOUBLE | BLAS_REAL;
105 int mode = BLAS_SINGLE | BLAS_REAL;
109 int mode = BLAS_XDOUBLE | BLAS_COMPLEX;
110 #elif defined(DOUBLE)
111 int mode = BLAS_DOUBLE | BLAS_COMPLEX;
113 int mode = BLAS_SINGLE | BLAS_COMPLEX;
136 args.alpha = (void *)alpha;
137 args.beta = (void *)beta;
145 if (uplo_arg == 'U') uplo = 0;
146 if (uplo_arg == 'L') uplo = 1;
149 if (trans_arg == 'N') trans = 0;
150 if (trans_arg == 'T') trans = 1;
151 if (trans_arg == 'C') trans = 1;
154 if (trans_arg == 'N') trans = 0;
155 if (trans_arg == 'C') trans = 1;
157 if (trans_arg == 'N') trans = 0;
158 if (trans_arg == 'T') trans = 1;
165 if (trans & 1) nrowa = args.k;
169 if (args.ldc < MAX(1,args.n)) info = 12;
170 if (args.ldb < MAX(1,nrowa)) info = 9;
171 if (args.lda < MAX(1,nrowa)) info = 7;
172 if (args.k < 0) info = 4;
173 if (args.n < 0) info = 3;
174 if (trans < 0) info = 2;
175 if (uplo < 0) info = 1;
178 BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME));
184 void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Trans,
185 blasint n, blasint k,
188 FLOAT *a, blasint lda,
189 FLOAT *b, blasint ldb,
192 void *va, blasint lda,
193 void *vb, blasint ldb,
195 #if !defined(COMPLEX) || defined(HEMM)
208 FLOAT* alpha = (FLOAT*) valpha;
210 FLOAT* beta = (FLOAT*) vbeta;
212 FLOAT* a = (FLOAT*) va;
213 FLOAT* b = (FLOAT*) vb;
214 FLOAT* c = (FLOAT*) vc;
231 int mode = BLAS_XDOUBLE | BLAS_REAL;
232 #elif defined(DOUBLE)
233 int mode = BLAS_DOUBLE | BLAS_REAL;
235 int mode = BLAS_SINGLE | BLAS_REAL;
239 int mode = BLAS_XDOUBLE | BLAS_COMPLEX;
240 #elif defined(DOUBLE)
241 int mode = BLAS_DOUBLE | BLAS_COMPLEX;
243 int mode = BLAS_SINGLE | BLAS_COMPLEX;
262 args.alpha = (void *)α
264 args.alpha = (void *)alpha;
267 #if !defined(COMPLEX) || defined(HEMM)
268 args.beta = (void *)β
270 args.beta = (void *)beta;
277 if (order == CblasColMajor) {
278 if (Uplo == CblasUpper) uplo = 0;
279 if (Uplo == CblasLower) uplo = 1;
281 if (Trans == CblasNoTrans) trans = 0;
283 if (Trans == CblasTrans) trans = 1;
284 if (Trans == CblasConjNoTrans) trans = 0;
285 if (Trans == CblasConjTrans) trans = 1;
287 if (Trans == CblasTrans) trans = 1;
289 if (Trans == CblasConjTrans) trans = 1;
295 if (trans & 1) nrowa = args.k;
297 if (args.ldc < MAX(1,args.n)) info = 12;
298 if (args.ldb < MAX(1,nrowa)) info = 9;
299 if (args.lda < MAX(1,nrowa)) info = 7;
300 if (args.k < 0) info = 4;
301 if (args.n < 0) info = 3;
302 if (trans < 0) info = 2;
303 if (uplo < 0) info = 1;
306 if (order == CblasRowMajor) {
309 CAlpha[0] = alpha[0];
310 CAlpha[1] = -alpha[1];
312 args.alpha = (void *)CAlpha;
315 if (Uplo == CblasUpper) uplo = 1;
316 if (Uplo == CblasLower) uplo = 0;
318 if (Trans == CblasNoTrans) trans = 1;
320 if (Trans == CblasTrans) trans = 0;
321 if (Trans == CblasConjNoTrans) trans = 1;
322 if (Trans == CblasConjTrans) trans = 0;
324 if (Trans == CblasTrans) trans = 0;
326 if (Trans == CblasConjTrans) trans = 0;
332 if (trans & 1) nrowa = args.k;
334 if (args.ldc < MAX(1,args.n)) info = 12;
335 if (args.ldb < MAX(1,nrowa)) info = 9;
336 if (args.lda < MAX(1,nrowa)) info = 7;
337 if (args.k < 0) info = 4;
338 if (args.n < 0) info = 3;
339 if (trans < 0) info = 2;
340 if (uplo < 0) info = 1;
344 BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME));
350 if (args.n == 0) return;
354 FUNCTION_PROFILE_START();
356 buffer = (FLOAT *)blas_memory_alloc(0);
358 sa = (FLOAT *)((BLASLONG)buffer + GEMM_OFFSET_A);
359 sb = (FLOAT *)(((BLASLONG)sa + ((GEMM_P * GEMM_Q * COMPSIZE * SIZE + GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B);
363 mode |= (BLAS_TRANSA_N | BLAS_TRANSB_T);
365 mode |= (BLAS_TRANSA_T | BLAS_TRANSB_N);
368 mode |= (uplo << BLAS_UPLO_SHIFT);
371 args.nthreads = num_cpu_avail(3);
373 if (args.nthreads == 1) {
376 (syr2k[(uplo << 1) | trans ])(&args, NULL, NULL, sa, sb, 0);
382 syrk_thread(mode, &args, NULL, NULL, syr2k[(uplo << 1) | trans ], sa, sb, args.nthreads);
387 blas_memory_free(buffer);
389 FUNCTION_PROFILE_END(COMPSIZE * COMPSIZE, 2 * args.n * args.k + args.n * args.n, 2 * args.n * args.n * args.k);