Merge pull request #2193 from martin-frbg/makeutest
[platform/upstream/openblas.git] / kernel / setparam-ref.c
1 /*********************************************************************/
2 /* Copyright 2009, 2010 The University of Texas at Austin.           */
3 /* All rights reserved.                                              */
4 /*                                                                   */
5 /* Redistribution and use in source and binary forms, with or        */
6 /* without modification, are permitted provided that the following   */
7 /* conditions are met:                                               */
8 /*                                                                   */
9 /*   1. Redistributions of source code must retain the above         */
10 /*      copyright notice, this list of conditions and the following  */
11 /*      disclaimer.                                                  */
12 /*                                                                   */
13 /*   2. Redistributions in binary form must reproduce the above      */
14 /*      copyright notice, this list of conditions and the following  */
15 /*      disclaimer in the documentation and/or other materials       */
16 /*      provided with the distribution.                              */
17 /*                                                                   */
18 /*    THIS  SOFTWARE IS PROVIDED  BY THE  UNIVERSITY OF  TEXAS AT    */
19 /*    AUSTIN  ``AS IS''  AND ANY  EXPRESS OR  IMPLIED WARRANTIES,    */
20 /*    INCLUDING, BUT  NOT LIMITED  TO, THE IMPLIED  WARRANTIES OF    */
21 /*    MERCHANTABILITY  AND FITNESS FOR  A PARTICULAR  PURPOSE ARE    */
22 /*    DISCLAIMED.  IN  NO EVENT SHALL THE UNIVERSITY  OF TEXAS AT    */
23 /*    AUSTIN OR CONTRIBUTORS BE  LIABLE FOR ANY DIRECT, INDIRECT,    */
24 /*    INCIDENTAL,  SPECIAL, EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES    */
25 /*    (INCLUDING, BUT  NOT LIMITED TO,  PROCUREMENT OF SUBSTITUTE    */
26 /*    GOODS  OR  SERVICES; LOSS  OF  USE,  DATA,  OR PROFITS;  OR    */
27 /*    BUSINESS INTERRUPTION) HOWEVER CAUSED  AND ON ANY THEORY OF    */
28 /*    LIABILITY, WHETHER  IN CONTRACT, STRICT  LIABILITY, OR TORT    */
29 /*    (INCLUDING NEGLIGENCE OR OTHERWISE)  ARISING IN ANY WAY OUT    */
30 /*    OF  THE  USE OF  THIS  SOFTWARE,  EVEN  IF ADVISED  OF  THE    */
31 /*    POSSIBILITY OF SUCH DAMAGE.                                    */
32 /*                                                                   */
33 /* The views and conclusions contained in the software and           */
34 /* documentation are those of the authors and should not be          */
35 /* interpreted as representing official policies, either expressed   */
36 /* or implied, of The University of Texas at Austin.                 */
37 /*********************************************************************/
38
39 #include <stdio.h>
40 #include <string.h>
41 #include "common.h"
42
43 #ifdef BUILD_KERNEL
44 #include "kernelTS.h"
45 #endif
46
47 #undef DEBUG
48
49 static void init_parameter(void);
50
51 gotoblas_t TABLE_NAME = {
52   DTB_DEFAULT_ENTRIES ,
53
54   GEMM_DEFAULT_OFFSET_A, GEMM_DEFAULT_OFFSET_B, GEMM_DEFAULT_ALIGN,
55
56   0, 0, 0,
57   SGEMM_DEFAULT_UNROLL_M, SGEMM_DEFAULT_UNROLL_N,
58 #ifdef SGEMM_DEFAULT_UNROLL_MN
59  SGEMM_DEFAULT_UNROLL_MN,
60 #else
61  MAX(SGEMM_DEFAULT_UNROLL_M, SGEMM_DEFAULT_UNROLL_N),
62 #endif
63
64
65 #ifdef HAVE_EXCLUSIVE_CACHE
66   1,
67 #else
68   0,
69 #endif
70
71   samax_kTS,  samin_kTS,  smax_kTS,  smin_kTS,
72   isamax_kTS, isamin_kTS, ismax_kTS, ismin_kTS,
73   snrm2_kTS,  sasum_kTS, ssum_kTS, scopy_kTS, sdot_kTS,
74   dsdot_kTS,
75   srot_kTS,   saxpy_kTS,  sscal_kTS, sswap_kTS,
76   sgemv_nTS,  sgemv_tTS, sger_kTS,
77   ssymv_LTS, ssymv_UTS,
78
79   sgemm_kernelTS, sgemm_betaTS,
80 #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
81   sgemm_incopyTS, sgemm_itcopyTS,
82 #else
83   sgemm_oncopyTS, sgemm_otcopyTS,
84 #endif
85   sgemm_oncopyTS, sgemm_otcopyTS,
86   strsm_kernel_LNTS, strsm_kernel_LTTS, strsm_kernel_RNTS, strsm_kernel_RTTS,
87 #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
88   strsm_iunucopyTS, strsm_iunncopyTS, strsm_iutucopyTS, strsm_iutncopyTS,
89   strsm_ilnucopyTS, strsm_ilnncopyTS, strsm_iltucopyTS, strsm_iltncopyTS,
90 #else
91   strsm_ounucopyTS, strsm_ounncopyTS, strsm_outucopyTS, strsm_outncopyTS,
92   strsm_olnucopyTS, strsm_olnncopyTS, strsm_oltucopyTS, strsm_oltncopyTS,
93 #endif
94   strsm_ounucopyTS, strsm_ounncopyTS, strsm_outucopyTS, strsm_outncopyTS,
95   strsm_olnucopyTS, strsm_olnncopyTS, strsm_oltucopyTS, strsm_oltncopyTS,
96   strmm_kernel_RNTS, strmm_kernel_RTTS, strmm_kernel_LNTS, strmm_kernel_LTTS,
97 #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
98   strmm_iunucopyTS, strmm_iunncopyTS, strmm_iutucopyTS, strmm_iutncopyTS,
99   strmm_ilnucopyTS, strmm_ilnncopyTS, strmm_iltucopyTS, strmm_iltncopyTS,
100 #else
101   strmm_ounucopyTS, strmm_ounncopyTS, strmm_outucopyTS, strmm_outncopyTS,
102   strmm_olnucopyTS, strmm_olnncopyTS, strmm_oltucopyTS, strmm_oltncopyTS,
103 #endif
104   strmm_ounucopyTS, strmm_ounncopyTS, strmm_outucopyTS, strmm_outncopyTS,
105   strmm_olnucopyTS, strmm_olnncopyTS, strmm_oltucopyTS, strmm_oltncopyTS,
106 #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
107   ssymm_iutcopyTS, ssymm_iltcopyTS,
108 #else
109   ssymm_outcopyTS, ssymm_oltcopyTS,
110 #endif
111   ssymm_outcopyTS, ssymm_oltcopyTS,
112
113 #ifndef NO_LAPACK
114   sneg_tcopyTS, slaswp_ncopyTS,
115 #else
116   NULL,NULL,
117 #endif
118
119   0, 0, 0,
120   DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N,
121 #ifdef DGEMM_DEFAULT_UNROLL_MN
122  DGEMM_DEFAULT_UNROLL_MN,
123 #else
124  MAX(DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N),
125 #endif
126
127   damax_kTS,  damin_kTS,  dmax_kTS,  dmin_kTS,
128   idamax_kTS, idamin_kTS, idmax_kTS, idmin_kTS,
129   dnrm2_kTS,  dasum_kTS,  dsum_kTS, dcopy_kTS, ddot_kTS,
130   drot_kTS,   daxpy_kTS,  dscal_kTS, dswap_kTS,
131   dgemv_nTS,  dgemv_tTS,  dger_kTS,
132   dsymv_LTS,  dsymv_UTS,
133
134   dgemm_kernelTS, dgemm_betaTS,
135 #if DGEMM_DEFAULT_UNROLL_M != DGEMM_DEFAULT_UNROLL_N
136   dgemm_incopyTS, dgemm_itcopyTS,
137 #else
138   dgemm_oncopyTS, dgemm_otcopyTS,
139 #endif
140   dgemm_oncopyTS, dgemm_otcopyTS,
141   dtrsm_kernel_LNTS, dtrsm_kernel_LTTS, dtrsm_kernel_RNTS, dtrsm_kernel_RTTS,
142 #if DGEMM_DEFAULT_UNROLL_M != DGEMM_DEFAULT_UNROLL_N
143   dtrsm_iunucopyTS, dtrsm_iunncopyTS, dtrsm_iutucopyTS, dtrsm_iutncopyTS,
144   dtrsm_ilnucopyTS, dtrsm_ilnncopyTS, dtrsm_iltucopyTS, dtrsm_iltncopyTS,
145 #else
146   dtrsm_ounucopyTS, dtrsm_ounncopyTS, dtrsm_outucopyTS, dtrsm_outncopyTS,
147   dtrsm_olnucopyTS, dtrsm_olnncopyTS, dtrsm_oltucopyTS, dtrsm_oltncopyTS,
148 #endif
149   dtrsm_ounucopyTS, dtrsm_ounncopyTS, dtrsm_outucopyTS, dtrsm_outncopyTS,
150   dtrsm_olnucopyTS, dtrsm_olnncopyTS, dtrsm_oltucopyTS, dtrsm_oltncopyTS,
151   dtrmm_kernel_RNTS, dtrmm_kernel_RTTS, dtrmm_kernel_LNTS, dtrmm_kernel_LTTS,
152 #if DGEMM_DEFAULT_UNROLL_M != DGEMM_DEFAULT_UNROLL_N
153   dtrmm_iunucopyTS, dtrmm_iunncopyTS, dtrmm_iutucopyTS, dtrmm_iutncopyTS,
154   dtrmm_ilnucopyTS, dtrmm_ilnncopyTS, dtrmm_iltucopyTS, dtrmm_iltncopyTS,
155 #else
156   dtrmm_ounucopyTS, dtrmm_ounncopyTS, dtrmm_outucopyTS, dtrmm_outncopyTS,
157   dtrmm_olnucopyTS, dtrmm_olnncopyTS, dtrmm_oltucopyTS, dtrmm_oltncopyTS,
158 #endif
159   dtrmm_ounucopyTS, dtrmm_ounncopyTS, dtrmm_outucopyTS, dtrmm_outncopyTS,
160   dtrmm_olnucopyTS, dtrmm_olnncopyTS, dtrmm_oltucopyTS, dtrmm_oltncopyTS,
161 #if DGEMM_DEFAULT_UNROLL_M != DGEMM_DEFAULT_UNROLL_N
162   dsymm_iutcopyTS, dsymm_iltcopyTS,
163 #else
164   dsymm_outcopyTS, dsymm_oltcopyTS,
165 #endif
166   dsymm_outcopyTS, dsymm_oltcopyTS,
167
168 #ifndef NO_LAPACK
169   dneg_tcopyTS, dlaswp_ncopyTS,
170 #else
171   NULL, NULL,
172 #endif
173
174 #ifdef EXPRECISION
175
176   0, 0, 0,
177   QGEMM_DEFAULT_UNROLL_M, QGEMM_DEFAULT_UNROLL_N, MAX(QGEMM_DEFAULT_UNROLL_M, QGEMM_DEFAULT_UNROLL_N),
178
179   qamax_kTS,  qamin_kTS,  qmax_kTS,  qmin_kTS,
180   iqamax_kTS, iqamin_kTS, iqmax_kTS, iqmin_kTS,
181   qnrm2_kTS,  qasum_kTS,  qsum_kTS, qcopy_kTS, qdot_kTS,
182   qrot_kTS,   qaxpy_kTS,  qscal_kTS, qswap_kTS,
183   qgemv_nTS,  qgemv_tTS,  qger_kTS,
184   qsymv_LTS,  qsymv_UTS,
185
186   qgemm_kernelTS, qgemm_betaTS,
187 #if QGEMM_DEFAULT_UNROLL_M != QGEMM_DEFAULT_UNROLL_N
188   qgemm_incopyTS, qgemm_itcopyTS,
189 #else
190   qgemm_oncopyTS, qgemm_otcopyTS,
191 #endif
192   qgemm_oncopyTS, qgemm_otcopyTS,
193   qtrsm_kernel_LNTS, qtrsm_kernel_LTTS, qtrsm_kernel_RNTS, qtrsm_kernel_RTTS,
194 #if QGEMM_DEFAULT_UNROLL_M != QGEMM_DEFAULT_UNROLL_N
195   qtrsm_iunucopyTS, qtrsm_iunncopyTS, qtrsm_iutucopyTS, qtrsm_iutncopyTS,
196   qtrsm_ilnucopyTS, qtrsm_ilnncopyTS, qtrsm_iltucopyTS, qtrsm_iltncopyTS,
197 #else
198   qtrsm_ounucopyTS, qtrsm_ounncopyTS, qtrsm_outucopyTS, qtrsm_outncopyTS,
199   qtrsm_olnucopyTS, qtrsm_olnncopyTS, qtrsm_oltucopyTS, qtrsm_oltncopyTS,
200 #endif
201   qtrsm_ounucopyTS, qtrsm_ounncopyTS, qtrsm_outucopyTS, qtrsm_outncopyTS,
202   qtrsm_olnucopyTS, qtrsm_olnncopyTS, qtrsm_oltucopyTS, qtrsm_oltncopyTS,
203   qtrmm_kernel_RNTS, qtrmm_kernel_RTTS, qtrmm_kernel_LNTS, qtrmm_kernel_LTTS,
204 #if QGEMM_DEFAULT_UNROLL_M != QGEMM_DEFAULT_UNROLL_N
205   qtrmm_iunucopyTS, qtrmm_iunncopyTS, qtrmm_iutucopyTS, qtrmm_iutncopyTS,
206   qtrmm_ilnucopyTS, qtrmm_ilnncopyTS, qtrmm_iltucopyTS, qtrmm_iltncopyTS,
207 #else
208   qtrmm_ounucopyTS, qtrmm_ounncopyTS, qtrmm_outucopyTS, qtrmm_outncopyTS,
209   qtrmm_olnucopyTS, qtrmm_olnncopyTS, qtrmm_oltucopyTS, qtrmm_oltncopyTS,
210 #endif
211   qtrmm_ounucopyTS, qtrmm_ounncopyTS, qtrmm_outucopyTS, qtrmm_outncopyTS,
212   qtrmm_olnucopyTS, qtrmm_olnncopyTS, qtrmm_oltucopyTS, qtrmm_oltncopyTS,
213 #if QGEMM_DEFAULT_UNROLL_M != QGEMM_DEFAULT_UNROLL_N
214   qsymm_iutcopyTS, qsymm_iltcopyTS,
215 #else
216   qsymm_outcopyTS, qsymm_oltcopyTS,
217 #endif
218   qsymm_outcopyTS, qsymm_oltcopyTS,
219
220 #ifndef NO_LAPACK
221   qneg_tcopyTS, qlaswp_ncopyTS,
222 #else
223   NULL, NULL,
224 #endif
225
226 #endif
227
228   0, 0, 0,
229   CGEMM_DEFAULT_UNROLL_M, CGEMM_DEFAULT_UNROLL_N,
230 #ifdef CGEMM_DEFAULT_UNROLL_MN
231  CGEMM_DEFAULT_UNROLL_MN,
232 #else
233  MAX(CGEMM_DEFAULT_UNROLL_M, CGEMM_DEFAULT_UNROLL_N),
234 #endif
235
236   camax_kTS, camin_kTS, icamax_kTS, icamin_kTS,
237   cnrm2_kTS, casum_kTS, csum_kTS, ccopy_kTS,
238   cdotu_kTS, cdotc_kTS, csrot_kTS,
239   caxpy_kTS, caxpyc_kTS, cscal_kTS, cswap_kTS,
240
241   cgemv_nTS, cgemv_tTS, cgemv_rTS, cgemv_cTS,
242   cgemv_oTS, cgemv_uTS, cgemv_sTS, cgemv_dTS,
243   cgeru_kTS, cgerc_kTS, cgerv_kTS, cgerd_kTS,
244   csymv_LTS, csymv_UTS,
245   chemv_LTS, chemv_UTS, chemv_MTS, chemv_VTS,
246
247   cgemm_kernel_nTS, cgemm_kernel_lTS, cgemm_kernel_rTS, cgemm_kernel_bTS,
248   cgemm_betaTS,
249
250 #if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N
251   cgemm_incopyTS, cgemm_itcopyTS,
252 #else
253   cgemm_oncopyTS, cgemm_otcopyTS,
254 #endif
255   cgemm_oncopyTS, cgemm_otcopyTS,
256
257   ctrsm_kernel_LNTS, ctrsm_kernel_LTTS, ctrsm_kernel_LRTS, ctrsm_kernel_LCTS,
258   ctrsm_kernel_RNTS, ctrsm_kernel_RTTS, ctrsm_kernel_RRTS, ctrsm_kernel_RCTS,
259
260 #if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N
261   ctrsm_iunucopyTS,  ctrsm_iunncopyTS,  ctrsm_iutucopyTS,  ctrsm_iutncopyTS,
262   ctrsm_ilnucopyTS,  ctrsm_ilnncopyTS,  ctrsm_iltucopyTS,  ctrsm_iltncopyTS,
263 #else
264   ctrsm_ounucopyTS,  ctrsm_ounncopyTS,  ctrsm_outucopyTS,  ctrsm_outncopyTS,
265   ctrsm_olnucopyTS,  ctrsm_olnncopyTS,  ctrsm_oltucopyTS,  ctrsm_oltncopyTS,
266 #endif
267   ctrsm_ounucopyTS,  ctrsm_ounncopyTS,  ctrsm_outucopyTS,  ctrsm_outncopyTS,
268   ctrsm_olnucopyTS,  ctrsm_olnncopyTS,  ctrsm_oltucopyTS,  ctrsm_oltncopyTS,
269
270   ctrmm_kernel_RNTS,  ctrmm_kernel_RTTS,  ctrmm_kernel_RRTS,  ctrmm_kernel_RCTS,
271   ctrmm_kernel_LNTS,  ctrmm_kernel_LTTS,  ctrmm_kernel_LRTS,  ctrmm_kernel_LCTS,
272
273 #if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N
274   ctrmm_iunucopyTS,  ctrmm_iunncopyTS,  ctrmm_iutucopyTS,  ctrmm_iutncopyTS,
275   ctrmm_ilnucopyTS,  ctrmm_ilnncopyTS,  ctrmm_iltucopyTS,  ctrmm_iltncopyTS,
276 #else
277   ctrmm_ounucopyTS,  ctrmm_ounncopyTS,  ctrmm_outucopyTS,  ctrmm_outncopyTS,
278   ctrmm_olnucopyTS,  ctrmm_olnncopyTS,  ctrmm_oltucopyTS,  ctrmm_oltncopyTS,
279 #endif
280   ctrmm_ounucopyTS,  ctrmm_ounncopyTS,  ctrmm_outucopyTS,  ctrmm_outncopyTS,
281   ctrmm_olnucopyTS,  ctrmm_olnncopyTS,  ctrmm_oltucopyTS,  ctrmm_oltncopyTS,
282
283 #if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N
284   csymm_iutcopyTS,  csymm_iltcopyTS,
285 #else
286   csymm_outcopyTS,  csymm_oltcopyTS,
287 #endif
288   csymm_outcopyTS,  csymm_oltcopyTS,
289 #if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N
290   chemm_iutcopyTS,  chemm_iltcopyTS,
291 #else
292   chemm_outcopyTS,  chemm_oltcopyTS,
293 #endif
294   chemm_outcopyTS,  chemm_oltcopyTS,
295
296   0, 0, 0,
297
298 #if defined(USE_GEMM3M)
299 #ifdef CGEMM3M_DEFAULT_UNROLL_M
300   CGEMM3M_DEFAULT_UNROLL_M, CGEMM3M_DEFAULT_UNROLL_N, MAX(CGEMM3M_DEFAULT_UNROLL_M, CGEMM3M_DEFAULT_UNROLL_N),
301 #else
302   SGEMM_DEFAULT_UNROLL_M, SGEMM_DEFAULT_UNROLL_N, MAX(SGEMM_DEFAULT_UNROLL_M, SGEMM_DEFAULT_UNROLL_N),
303 #endif
304
305
306   cgemm3m_kernelTS,
307
308   cgemm3m_incopybTS,  cgemm3m_incopyrTS,
309   cgemm3m_incopyiTS,  cgemm3m_itcopybTS,
310   cgemm3m_itcopyrTS,  cgemm3m_itcopyiTS,
311   cgemm3m_oncopybTS,  cgemm3m_oncopyrTS,
312   cgemm3m_oncopyiTS,  cgemm3m_otcopybTS,
313   cgemm3m_otcopyrTS,  cgemm3m_otcopyiTS,
314
315   csymm3m_iucopybTS,  csymm3m_ilcopybTS,
316   csymm3m_iucopyrTS,  csymm3m_ilcopyrTS,
317   csymm3m_iucopyiTS,  csymm3m_ilcopyiTS,
318   csymm3m_oucopybTS,  csymm3m_olcopybTS,
319   csymm3m_oucopyrTS,  csymm3m_olcopyrTS,
320   csymm3m_oucopyiTS,  csymm3m_olcopyiTS,
321
322   chemm3m_iucopybTS,  chemm3m_ilcopybTS,
323   chemm3m_iucopyrTS,  chemm3m_ilcopyrTS,
324   chemm3m_iucopyiTS,  chemm3m_ilcopyiTS,
325
326   chemm3m_oucopybTS,  chemm3m_olcopybTS,
327   chemm3m_oucopyrTS,  chemm3m_olcopyrTS,
328   chemm3m_oucopyiTS,  chemm3m_olcopyiTS,
329 #else
330   0, 0, 0,
331
332   NULL,
333
334   NULL, NULL,
335   NULL, NULL,
336   NULL, NULL,
337   NULL, NULL,
338   NULL, NULL,
339   NULL, NULL,
340
341   NULL, NULL,
342   NULL, NULL,
343   NULL, NULL,
344   NULL, NULL,
345   NULL, NULL,
346   NULL, NULL,
347
348   NULL, NULL,
349   NULL, NULL,
350   NULL, NULL,
351
352   NULL, NULL,
353   NULL, NULL,
354   NULL, NULL,
355 #endif
356
357 #ifndef NO_LAPACK
358   cneg_tcopyTS, claswp_ncopyTS,
359 #else
360   NULL, NULL,
361 #endif
362
363   0, 0, 0,
364   ZGEMM_DEFAULT_UNROLL_M, ZGEMM_DEFAULT_UNROLL_N,
365 #ifdef ZGEMM_DEFAULT_UNROLL_MN
366  ZGEMM_DEFAULT_UNROLL_MN,
367 #else
368  MAX(ZGEMM_DEFAULT_UNROLL_M, ZGEMM_DEFAULT_UNROLL_N),
369 #endif
370
371   zamax_kTS, zamin_kTS, izamax_kTS, izamin_kTS,
372   znrm2_kTS, zasum_kTS, zsum_kTS, zcopy_kTS,
373   zdotu_kTS, zdotc_kTS, zdrot_kTS,
374   zaxpy_kTS, zaxpyc_kTS, zscal_kTS, zswap_kTS,
375
376   zgemv_nTS, zgemv_tTS, zgemv_rTS, zgemv_cTS,
377   zgemv_oTS, zgemv_uTS, zgemv_sTS, zgemv_dTS,
378   zgeru_kTS, zgerc_kTS, zgerv_kTS, zgerd_kTS,
379   zsymv_LTS, zsymv_UTS,
380   zhemv_LTS, zhemv_UTS, zhemv_MTS, zhemv_VTS,
381
382   zgemm_kernel_nTS, zgemm_kernel_lTS, zgemm_kernel_rTS, zgemm_kernel_bTS,
383   zgemm_betaTS,
384
385 #if ZGEMM_DEFAULT_UNROLL_M != ZGEMM_DEFAULT_UNROLL_N
386   zgemm_incopyTS, zgemm_itcopyTS,
387 #else
388   zgemm_oncopyTS, zgemm_otcopyTS,
389 #endif
390   zgemm_oncopyTS, zgemm_otcopyTS,
391
392   ztrsm_kernel_LNTS, ztrsm_kernel_LTTS, ztrsm_kernel_LRTS, ztrsm_kernel_LCTS,
393   ztrsm_kernel_RNTS, ztrsm_kernel_RTTS, ztrsm_kernel_RRTS, ztrsm_kernel_RCTS,
394
395 #if ZGEMM_DEFAULT_UNROLL_M != ZGEMM_DEFAULT_UNROLL_N
396   ztrsm_iunucopyTS,  ztrsm_iunncopyTS,  ztrsm_iutucopyTS,  ztrsm_iutncopyTS,
397   ztrsm_ilnucopyTS,  ztrsm_ilnncopyTS,  ztrsm_iltucopyTS,  ztrsm_iltncopyTS,
398 #else
399   ztrsm_ounucopyTS,  ztrsm_ounncopyTS,  ztrsm_outucopyTS,  ztrsm_outncopyTS,
400   ztrsm_olnucopyTS,  ztrsm_olnncopyTS,  ztrsm_oltucopyTS,  ztrsm_oltncopyTS,
401 #endif
402   ztrsm_ounucopyTS,  ztrsm_ounncopyTS,  ztrsm_outucopyTS,  ztrsm_outncopyTS,
403   ztrsm_olnucopyTS,  ztrsm_olnncopyTS,  ztrsm_oltucopyTS,  ztrsm_oltncopyTS,
404
405   ztrmm_kernel_RNTS,  ztrmm_kernel_RTTS,  ztrmm_kernel_RRTS,  ztrmm_kernel_RCTS,
406   ztrmm_kernel_LNTS,  ztrmm_kernel_LTTS,  ztrmm_kernel_LRTS,  ztrmm_kernel_LCTS,
407
408 #if ZGEMM_DEFAULT_UNROLL_M != ZGEMM_DEFAULT_UNROLL_N
409   ztrmm_iunucopyTS,  ztrmm_iunncopyTS,  ztrmm_iutucopyTS,  ztrmm_iutncopyTS,
410   ztrmm_ilnucopyTS,  ztrmm_ilnncopyTS,  ztrmm_iltucopyTS,  ztrmm_iltncopyTS,
411 #else
412   ztrmm_ounucopyTS,  ztrmm_ounncopyTS,  ztrmm_outucopyTS,  ztrmm_outncopyTS,
413   ztrmm_olnucopyTS,  ztrmm_olnncopyTS,  ztrmm_oltucopyTS,  ztrmm_oltncopyTS,
414 #endif
415   ztrmm_ounucopyTS,  ztrmm_ounncopyTS,  ztrmm_outucopyTS,  ztrmm_outncopyTS,
416   ztrmm_olnucopyTS,  ztrmm_olnncopyTS,  ztrmm_oltucopyTS,  ztrmm_oltncopyTS,
417
418 #if ZGEMM_DEFAULT_UNROLL_M != ZGEMM_DEFAULT_UNROLL_N
419   zsymm_iutcopyTS,  zsymm_iltcopyTS,
420 #else
421   zsymm_outcopyTS,  zsymm_oltcopyTS,
422 #endif
423   zsymm_outcopyTS,  zsymm_oltcopyTS,
424 #if ZGEMM_DEFAULT_UNROLL_M != ZGEMM_DEFAULT_UNROLL_N
425   zhemm_iutcopyTS,  zhemm_iltcopyTS,
426 #else
427   zhemm_outcopyTS,  zhemm_oltcopyTS,
428 #endif
429   zhemm_outcopyTS,  zhemm_oltcopyTS,
430
431   0, 0, 0,
432 #if defined(USE_GEMM3M)
433 #ifdef ZGEMM3M_DEFAULT_UNROLL_M
434   ZGEMM3M_DEFAULT_UNROLL_M, ZGEMM3M_DEFAULT_UNROLL_N, MAX(ZGEMM3M_DEFAULT_UNROLL_M, ZGEMM3M_DEFAULT_UNROLL_N),
435 #else
436   DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N, MAX(DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N),
437 #endif
438
439
440   zgemm3m_kernelTS,
441
442   zgemm3m_incopybTS,  zgemm3m_incopyrTS,
443   zgemm3m_incopyiTS,  zgemm3m_itcopybTS,
444   zgemm3m_itcopyrTS,  zgemm3m_itcopyiTS,
445   zgemm3m_oncopybTS,  zgemm3m_oncopyrTS,
446   zgemm3m_oncopyiTS,  zgemm3m_otcopybTS,
447   zgemm3m_otcopyrTS,  zgemm3m_otcopyiTS,
448
449   zsymm3m_iucopybTS,  zsymm3m_ilcopybTS,
450   zsymm3m_iucopyrTS,  zsymm3m_ilcopyrTS,
451   zsymm3m_iucopyiTS,  zsymm3m_ilcopyiTS,
452   zsymm3m_oucopybTS,  zsymm3m_olcopybTS,
453   zsymm3m_oucopyrTS,  zsymm3m_olcopyrTS,
454   zsymm3m_oucopyiTS,  zsymm3m_olcopyiTS,
455
456   zhemm3m_iucopybTS,  zhemm3m_ilcopybTS,
457   zhemm3m_iucopyrTS,  zhemm3m_ilcopyrTS,
458   zhemm3m_iucopyiTS,  zhemm3m_ilcopyiTS,
459
460   zhemm3m_oucopybTS,  zhemm3m_olcopybTS,
461   zhemm3m_oucopyrTS,  zhemm3m_olcopyrTS,
462   zhemm3m_oucopyiTS,  zhemm3m_olcopyiTS,
463 #else
464   0, 0, 0,
465
466   NULL,
467
468   NULL, NULL,
469   NULL, NULL,
470   NULL, NULL,
471   NULL, NULL,
472   NULL, NULL,
473   NULL, NULL,
474
475   NULL, NULL,
476   NULL, NULL,
477   NULL, NULL,
478   NULL, NULL,
479   NULL, NULL,
480   NULL, NULL,
481
482   NULL, NULL,
483   NULL, NULL,
484   NULL, NULL,
485
486   NULL, NULL,
487   NULL, NULL,
488   NULL, NULL,
489 #endif
490
491 #ifndef NO_LAPACK
492   zneg_tcopyTS, zlaswp_ncopyTS,
493 #else
494   NULL, NULL,
495 #endif
496
497 #ifdef EXPRECISION
498
499   0, 0, 0,
500   XGEMM_DEFAULT_UNROLL_M, XGEMM_DEFAULT_UNROLL_N, MAX(XGEMM_DEFAULT_UNROLL_M, XGEMM_DEFAULT_UNROLL_N),
501
502   xamax_kTS, xamin_kTS, ixamax_kTS, ixamin_kTS,
503   xnrm2_kTS, xasum_kTS, xsum_kTS, xcopy_kTS,
504   xdotu_kTS, xdotc_kTS, xqrot_kTS,
505   xaxpy_kTS, xaxpyc_kTS, xscal_kTS, xswap_kTS,
506
507   xgemv_nTS, xgemv_tTS, xgemv_rTS, xgemv_cTS,
508   xgemv_oTS, xgemv_uTS, xgemv_sTS, xgemv_dTS,
509   xgeru_kTS, xgerc_kTS, xgerv_kTS, xgerd_kTS,
510   xsymv_LTS, xsymv_UTS,
511   xhemv_LTS, xhemv_UTS, xhemv_MTS, xhemv_VTS,
512
513   xgemm_kernel_nTS, xgemm_kernel_lTS, xgemm_kernel_rTS, xgemm_kernel_bTS,
514   xgemm_betaTS,
515
516 #if XGEMM_DEFAULT_UNROLL_M != XGEMM_DEFAULT_UNROLL_N
517   xgemm_incopyTS, xgemm_itcopyTS,
518 #else
519   xgemm_oncopyTS, xgemm_otcopyTS,
520 #endif
521   xgemm_oncopyTS, xgemm_otcopyTS,
522
523   xtrsm_kernel_LNTS, xtrsm_kernel_LTTS, xtrsm_kernel_LRTS, xtrsm_kernel_LCTS,
524   xtrsm_kernel_RNTS, xtrsm_kernel_RTTS, xtrsm_kernel_RRTS, xtrsm_kernel_RCTS,
525
526 #if XGEMM_DEFAULT_UNROLL_M != XGEMM_DEFAULT_UNROLL_N
527   xtrsm_iunucopyTS,  xtrsm_iunncopyTS,  xtrsm_iutucopyTS,  xtrsm_iutncopyTS,
528   xtrsm_ilnucopyTS,  xtrsm_ilnncopyTS,  xtrsm_iltucopyTS,  xtrsm_iltncopyTS,
529 #else
530   xtrsm_ounucopyTS,  xtrsm_ounncopyTS,  xtrsm_outucopyTS,  xtrsm_outncopyTS,
531   xtrsm_olnucopyTS,  xtrsm_olnncopyTS,  xtrsm_oltucopyTS,  xtrsm_oltncopyTS,
532 #endif
533   xtrsm_ounucopyTS,  xtrsm_ounncopyTS,  xtrsm_outucopyTS,  xtrsm_outncopyTS,
534   xtrsm_olnucopyTS,  xtrsm_olnncopyTS,  xtrsm_oltucopyTS,  xtrsm_oltncopyTS,
535
536   xtrmm_kernel_RNTS,  xtrmm_kernel_RTTS,  xtrmm_kernel_RRTS,  xtrmm_kernel_RCTS,
537   xtrmm_kernel_LNTS,  xtrmm_kernel_LTTS,  xtrmm_kernel_LRTS,  xtrmm_kernel_LCTS,
538
539 #if XGEMM_DEFAULT_UNROLL_M != XGEMM_DEFAULT_UNROLL_N
540   xtrmm_iunucopyTS,  xtrmm_iunncopyTS,  xtrmm_iutucopyTS,  xtrmm_iutncopyTS,
541   xtrmm_ilnucopyTS,  xtrmm_ilnncopyTS,  xtrmm_iltucopyTS,  xtrmm_iltncopyTS,
542 #else
543   xtrmm_ounucopyTS,  xtrmm_ounncopyTS,  xtrmm_outucopyTS,  xtrmm_outncopyTS,
544   xtrmm_olnucopyTS,  xtrmm_olnncopyTS,  xtrmm_oltucopyTS,  xtrmm_oltncopyTS,
545 #endif
546   xtrmm_ounucopyTS,  xtrmm_ounncopyTS,  xtrmm_outucopyTS,  xtrmm_outncopyTS,
547   xtrmm_olnucopyTS,  xtrmm_olnncopyTS,  xtrmm_oltucopyTS,  xtrmm_oltncopyTS,
548
549 #if XGEMM_DEFAULT_UNROLL_M != XGEMM_DEFAULT_UNROLL_N
550   xsymm_iutcopyTS,  xsymm_iltcopyTS,
551 #else
552   xsymm_outcopyTS,  xsymm_oltcopyTS,
553 #endif
554   xsymm_outcopyTS,  xsymm_oltcopyTS,
555 #if XGEMM_DEFAULT_UNROLL_M != XGEMM_DEFAULT_UNROLL_N
556   xhemm_iutcopyTS,  xhemm_iltcopyTS,
557 #else
558   xhemm_outcopyTS,  xhemm_oltcopyTS,
559 #endif
560   xhemm_outcopyTS,  xhemm_oltcopyTS,
561
562   0, 0, 0,
563 #if defined(USE_GEMM3M)
564   QGEMM_DEFAULT_UNROLL_M, QGEMM_DEFAULT_UNROLL_N, MAX(QGEMM_DEFAULT_UNROLL_M, QGEMM_DEFAULT_UNROLL_N),
565
566   xgemm3m_kernelTS,
567
568   xgemm3m_incopybTS,  xgemm3m_incopyrTS,
569   xgemm3m_incopyiTS,  xgemm3m_itcopybTS,
570   xgemm3m_itcopyrTS,  xgemm3m_itcopyiTS,
571   xgemm3m_oncopybTS,  xgemm3m_oncopyrTS,
572   xgemm3m_oncopyiTS,  xgemm3m_otcopybTS,
573   xgemm3m_otcopyrTS,  xgemm3m_otcopyiTS,
574
575   xsymm3m_iucopybTS,  xsymm3m_ilcopybTS,
576   xsymm3m_iucopyrTS,  xsymm3m_ilcopyrTS,
577   xsymm3m_iucopyiTS,  xsymm3m_ilcopyiTS,
578   xsymm3m_oucopybTS,  xsymm3m_olcopybTS,
579   xsymm3m_oucopyrTS,  xsymm3m_olcopyrTS,
580   xsymm3m_oucopyiTS,  xsymm3m_olcopyiTS,
581
582   xhemm3m_iucopybTS,  xhemm3m_ilcopybTS,
583   xhemm3m_iucopyrTS,  xhemm3m_ilcopyrTS,
584   xhemm3m_iucopyiTS,  xhemm3m_ilcopyiTS,
585
586   xhemm3m_oucopybTS,  xhemm3m_olcopybTS,
587   xhemm3m_oucopyrTS,  xhemm3m_olcopyrTS,
588   xhemm3m_oucopyiTS,  xhemm3m_olcopyiTS,
589 #else
590   0, 0, 0,
591
592   NULL,
593
594   NULL, NULL,
595   NULL, NULL,
596   NULL, NULL,
597   NULL, NULL,
598   NULL, NULL,
599   NULL, NULL,
600
601   NULL, NULL,
602   NULL, NULL,
603   NULL, NULL,
604   NULL, NULL,
605   NULL, NULL,
606   NULL, NULL,
607
608   NULL, NULL,
609   NULL, NULL,
610   NULL, NULL,
611
612   NULL, NULL,
613   NULL, NULL,
614   NULL, NULL,
615 #endif
616
617 #ifndef NO_LAPACK
618   xneg_tcopyTS, xlaswp_ncopyTS,
619 #else
620   NULL, NULL,
621 #endif
622
623 #endif
624
625   init_parameter,
626
627   SNUMOPT, DNUMOPT, QNUMOPT,
628
629   saxpby_kTS, daxpby_kTS, caxpby_kTS, zaxpby_kTS,
630
631   somatcopy_k_cnTS, somatcopy_k_ctTS, somatcopy_k_rnTS, somatcopy_k_rtTS,
632   domatcopy_k_cnTS, domatcopy_k_ctTS, domatcopy_k_rnTS, domatcopy_k_rtTS,
633   comatcopy_k_cnTS, comatcopy_k_ctTS, comatcopy_k_rnTS, comatcopy_k_rtTS,
634   comatcopy_k_cncTS, comatcopy_k_ctcTS, comatcopy_k_rncTS, comatcopy_k_rtcTS,
635   zomatcopy_k_cnTS, zomatcopy_k_ctTS, zomatcopy_k_rnTS, zomatcopy_k_rtTS,
636   zomatcopy_k_cncTS, zomatcopy_k_ctcTS, zomatcopy_k_rncTS, zomatcopy_k_rtcTS,
637
638   simatcopy_k_cnTS, simatcopy_k_ctTS, simatcopy_k_rnTS, simatcopy_k_rtTS,
639   dimatcopy_k_cnTS, dimatcopy_k_ctTS, dimatcopy_k_rnTS, dimatcopy_k_rtTS,
640   cimatcopy_k_cnTS, cimatcopy_k_ctTS, cimatcopy_k_rnTS, cimatcopy_k_rtTS,
641   cimatcopy_k_cncTS, cimatcopy_k_ctcTS, cimatcopy_k_rncTS, cimatcopy_k_rtcTS,
642   zimatcopy_k_cnTS, zimatcopy_k_ctTS, zimatcopy_k_rnTS, zimatcopy_k_rtTS,
643   zimatcopy_k_cncTS, zimatcopy_k_ctcTS, zimatcopy_k_rncTS, zimatcopy_k_rtcTS,
644
645   sgeadd_kTS, dgeadd_kTS, cgeadd_kTS, zgeadd_kTS
646
647 };
648
649 #if defined(ARCH_ARM64)
650 static void init_parameter(void) {
651   TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
652   TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
653   TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
654   TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
655
656   TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
657   TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
658   TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
659   TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
660
661   TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R;
662   TABLE_NAME.dgemm_r = DGEMM_DEFAULT_R;
663   TABLE_NAME.cgemm_r = CGEMM_DEFAULT_R;
664   TABLE_NAME.zgemm_r = ZGEMM_DEFAULT_R;
665
666 #ifdef EXPRECISION
667   TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
668   TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
669   TABLE_NAME.qgemm_q = QGEMM_DEFAULT_Q;
670   TABLE_NAME.xgemm_q = XGEMM_DEFAULT_Q;
671   TABLE_NAME.qgemm_r = QGEMM_DEFAULT_R;
672   TABLE_NAME.xgemm_r = XGEMM_DEFAULT_R;
673 #endif
674
675 #if defined(USE_GEMM3M)
676 #ifdef CGEMM3M_DEFAULT_P
677   TABLE_NAME.cgemm3m_p = CGEMM3M_DEFAULT_P;
678 #else
679   TABLE_NAME.cgemm3m_p = TABLE_NAME.sgemm_p;
680 #endif
681
682 #ifdef ZGEMM3M_DEFAULT_P
683   TABLE_NAME.zgemm3m_p = ZGEMM3M_DEFAULT_P;
684 #else
685   TABLE_NAME.zgemm3m_p = TABLE_NAME.dgemm_p;
686 #endif
687
688 #ifdef CGEMM3M_DEFAULT_Q
689   TABLE_NAME.cgemm3m_q = CGEMM3M_DEFAULT_Q;
690 #else
691   TABLE_NAME.cgemm3m_q = TABLE_NAME.sgemm_q;
692 #endif
693
694 #ifdef ZGEMM3M_DEFAULT_Q
695   TABLE_NAME.zgemm3m_q = ZGEMM3M_DEFAULT_Q;
696 #else
697   TABLE_NAME.zgemm3m_q = TABLE_NAME.dgemm_q;
698 #endif
699
700 #ifdef CGEMM3M_DEFAULT_R
701   TABLE_NAME.cgemm3m_r = CGEMM3M_DEFAULT_R;
702 #else
703   TABLE_NAME.cgemm3m_r = TABLE_NAME.sgemm_r;
704 #endif
705
706 #ifdef ZGEMM3M_DEFAULT_R
707   TABLE_NAME.zgemm3m_r = ZGEMM3M_DEFAULT_R;
708 #else
709   TABLE_NAME.zgemm3m_r = TABLE_NAME.dgemm_r;
710 #endif
711
712 #ifdef EXPRECISION
713   TABLE_NAME.xgemm3m_p = TABLE_NAME.qgemm_p;
714   TABLE_NAME.xgemm3m_q = TABLE_NAME.qgemm_q;
715   TABLE_NAME.xgemm3m_r = TABLE_NAME.qgemm_r;
716 #endif
717 #endif
718
719 }
720 #else // defined(ARCH_ARM64)
721 #if defined(ARCH_POWER)
722 static void init_parameter(void) {
723
724   TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
725   TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
726   TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
727   TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
728
729   TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R;
730   TABLE_NAME.dgemm_r = DGEMM_DEFAULT_R;
731   TABLE_NAME.cgemm_r = CGEMM_DEFAULT_R;
732   TABLE_NAME.zgemm_r = ZGEMM_DEFAULT_R;
733
734
735   TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
736   TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
737   TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
738   TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
739 }
740 #else //POWER
741
742 #ifdef ARCH_X86
743 static int get_l2_size_old(void){
744   int i, eax, ebx, ecx, edx, cpuid_level;
745   int info[15];
746
747   cpuid(2, &eax, &ebx, &ecx, &edx);
748
749   info[ 0] = BITMASK(eax,  8, 0xff);
750   info[ 1] = BITMASK(eax, 16, 0xff);
751   info[ 2] = BITMASK(eax, 24, 0xff);
752
753   info[ 3] = BITMASK(ebx,  0, 0xff);
754   info[ 4] = BITMASK(ebx,  8, 0xff);
755   info[ 5] = BITMASK(ebx, 16, 0xff);
756   info[ 6] = BITMASK(ebx, 24, 0xff);
757
758   info[ 7] = BITMASK(ecx,  0, 0xff);
759   info[ 8] = BITMASK(ecx,  8, 0xff);
760   info[ 9] = BITMASK(ecx, 16, 0xff);
761   info[10] = BITMASK(ecx, 24, 0xff);
762
763   info[11] = BITMASK(edx,  0, 0xff);
764   info[12] = BITMASK(edx,  8, 0xff);
765   info[13] = BITMASK(edx, 16, 0xff);
766   info[14] = BITMASK(edx, 24, 0xff);
767
768   for (i = 0; i < 15; i++){
769
770     switch (info[i]){
771
772       /* This table is from http://www.sandpile.org/ia32/cpuid.htm */
773
774     case 0x1a :
775       return 96;
776
777     case 0x39 :
778     case 0x3b :
779     case 0x41 :
780     case 0x79 :
781     case 0x81 :
782       return 128;
783
784     case 0x3a :
785       return 192;
786
787     case 0x21 :
788     case 0x3c :
789     case 0x42 :
790     case 0x7a :
791     case 0x7e :
792     case 0x82 :
793       return 256;
794
795     case 0x3d :
796       return 384;
797
798     case 0x3e :
799     case 0x43 :
800     case 0x7b :
801     case 0x7f :
802     case 0x83 :
803     case 0x86 :
804       return 512;
805
806     case 0x44 :
807     case 0x78 :
808     case 0x7c :
809     case 0x84 :
810     case 0x87 :
811       return 1024;
812
813     case 0x45 :
814     case 0x7d :
815     case 0x85 :
816       return 2048;
817
818     case 0x48 :
819       return 3184;
820
821     case 0x49 :
822       return 4096;
823
824     case 0x4e :
825       return 6144;
826     }
827   }
828 //  return 0;
829 fprintf (stderr,"OpenBLAS WARNING - could not determine the L2 cache size on this system, assuming 256k\n");
830 return 256;
831 }
832 #endif
833
834 static __inline__ int get_l2_size(void){
835
836   int eax, ebx, ecx, edx, l2;
837
838   cpuid(0x80000006, &eax, &ebx, &ecx, &edx);
839
840   l2 = BITMASK(ecx, 16, 0xffff);
841
842 #ifndef ARCH_X86
843   if (l2 <= 0) {
844      fprintf (stderr,"OpenBLAS WARNING - could not determine the L2 cache size on this system, assuming 256k\n");
845      return 256;
846   }
847   return l2;
848
849 #else
850
851   if (l2 > 0) return l2;
852
853   return get_l2_size_old();
854 #endif
855 }
856
857 static __inline__ int get_l3_size(void){
858
859   int eax, ebx, ecx, edx;
860
861   cpuid(0x80000006, &eax, &ebx, &ecx, &edx);
862
863   return BITMASK(edx, 18, 0x3fff) * 512;
864 }
865
866
867 static void init_parameter(void) {
868
869   int l2 = get_l2_size();
870
871   (void) l2; /* dirty trick to suppress unused variable warning for targets */
872              /* where the GEMM unrolling parameters do not depend on l2 */
873   
874   TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
875   TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
876   TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
877   TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
878
879 #ifdef CGEMM3M_DEFAULT_Q
880   TABLE_NAME.cgemm3m_q = CGEMM3M_DEFAULT_Q;
881 #else
882   TABLE_NAME.cgemm3m_q = SGEMM_DEFAULT_Q;
883 #endif
884
885 #ifdef ZGEMM3M_DEFAULT_Q
886   TABLE_NAME.zgemm3m_q = ZGEMM3M_DEFAULT_Q;
887 #else
888   TABLE_NAME.zgemm3m_q = DGEMM_DEFAULT_Q;
889 #endif
890
891 #ifdef EXPRECISION
892   TABLE_NAME.qgemm_q = QGEMM_DEFAULT_Q;
893   TABLE_NAME.xgemm_q = XGEMM_DEFAULT_Q;
894   TABLE_NAME.xgemm3m_q = QGEMM_DEFAULT_Q;
895 #endif
896
897 #if defined(CORE_KATMAI)  || defined(CORE_COPPERMINE) || defined(CORE_BANIAS) || defined(CORE_YONAH) || defined(CORE_ATHLON)
898
899 #ifdef DEBUG
900   fprintf(stderr, "Katmai, Coppermine, Banias, Athlon\n");
901 #endif
902
903   TABLE_NAME.sgemm_p =  64 * (l2 >> 7);
904   TABLE_NAME.dgemm_p =  32 * (l2 >> 7);
905   TABLE_NAME.cgemm_p =  32 * (l2 >> 7);
906   TABLE_NAME.zgemm_p =  16 * (l2 >> 7);
907 #ifdef EXPRECISION
908   TABLE_NAME.qgemm_p =  16 * (l2 >> 7);
909   TABLE_NAME.xgemm_p =   8 * (l2 >> 7);
910 #endif
911 #endif
912
913 #ifdef CORE_NORTHWOOD
914
915 #ifdef DEBUG
916   fprintf(stderr, "Northwood\n");
917 #endif
918
919   TABLE_NAME.sgemm_p =  96 * (l2 >> 7);
920   TABLE_NAME.dgemm_p =  48 * (l2 >> 7);
921   TABLE_NAME.cgemm_p =  48 * (l2 >> 7);
922   TABLE_NAME.zgemm_p =  24 * (l2 >> 7);
923 #ifdef EXPRECISION
924   TABLE_NAME.qgemm_p =  24 * (l2 >> 7);
925   TABLE_NAME.xgemm_p =  12 * (l2 >> 7);
926 #endif
927 #endif
928
929 #ifdef ATOM
930
931 #ifdef DEBUG
932   fprintf(stderr, "Atom\n");
933 #endif
934
935   TABLE_NAME.sgemm_p = 256;
936   TABLE_NAME.dgemm_p = 128;
937   TABLE_NAME.cgemm_p = 128;
938   TABLE_NAME.zgemm_p =  64;
939 #ifdef EXPRECISION
940   TABLE_NAME.qgemm_p =  64;
941   TABLE_NAME.xgemm_p =  32;
942 #endif
943 #endif
944
945 #ifdef CORE_PRESCOTT
946
947 #ifdef DEBUG
948   fprintf(stderr, "Prescott\n");
949 #endif
950
951   TABLE_NAME.sgemm_p =  56 * (l2 >> 7);
952   TABLE_NAME.dgemm_p =  28 * (l2 >> 7);
953   TABLE_NAME.cgemm_p =  28 * (l2 >> 7);
954   TABLE_NAME.zgemm_p =  14 * (l2 >> 7);
955 #ifdef EXPRECISION
956   TABLE_NAME.qgemm_p =  14 * (l2 >> 7);
957   TABLE_NAME.xgemm_p =   7 * (l2 >> 7);
958 #endif
959 #endif
960
961 #ifdef CORE2
962
963 #ifdef DEBUG
964   fprintf(stderr, "Core2\n");
965 #endif
966
967   TABLE_NAME.sgemm_p =  92 * (l2 >> 9) + 8;
968   TABLE_NAME.dgemm_p =  46 * (l2 >> 9) + 8;
969   TABLE_NAME.cgemm_p =  46 * (l2 >> 9) + 4;
970   TABLE_NAME.zgemm_p =  23 * (l2 >> 9) + 4;
971 #ifdef EXPRECISION
972   TABLE_NAME.qgemm_p =  92 * (l2 >> 9) + 8;
973   TABLE_NAME.xgemm_p =  46 * (l2 >> 9) + 4;
974 #endif
975 #endif
976
977 #ifdef PENRYN
978
979 #ifdef DEBUG
980   fprintf(stderr, "Penryn\n");
981 #endif
982
983   TABLE_NAME.sgemm_p =  42 * (l2 >> 9) + 8;
984   TABLE_NAME.dgemm_p =  42 * (l2 >> 9) + 8;
985   TABLE_NAME.cgemm_p =  21 * (l2 >> 9) + 4;
986   TABLE_NAME.zgemm_p =  21 * (l2 >> 9) + 4;
987 #ifdef EXPRECISION
988   TABLE_NAME.qgemm_p =  42 * (l2 >> 9) + 8;
989   TABLE_NAME.xgemm_p =  21 * (l2 >> 9) + 4;
990 #endif
991 #endif
992
993 #ifdef DUNNINGTON
994
995 #ifdef DEBUG
996   fprintf(stderr, "Dunnington\n");
997 #endif
998
999   TABLE_NAME.sgemm_p =  42 * (l2 >> 9) + 8;
1000   TABLE_NAME.dgemm_p =  42 * (l2 >> 9) + 8;
1001   TABLE_NAME.cgemm_p =  21 * (l2 >> 9) + 4;
1002   TABLE_NAME.zgemm_p =  21 * (l2 >> 9) + 4;
1003 #ifdef EXPRECISION
1004   TABLE_NAME.qgemm_p =  42 * (l2 >> 9) + 8;
1005   TABLE_NAME.xgemm_p =  21 * (l2 >> 9) + 4;
1006 #endif
1007 #endif
1008
1009
1010 #ifdef NEHALEM
1011
1012 #ifdef DEBUG
1013   fprintf(stderr, "Nehalem\n");
1014 #endif
1015
1016   TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
1017   TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
1018   TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
1019   TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
1020 #ifdef EXPRECISION
1021   TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
1022   TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
1023 #endif
1024 #endif
1025
1026 #ifdef SANDYBRIDGE
1027
1028 #ifdef DEBUG
1029   fprintf(stderr, "Sandybridge\n");
1030 #endif
1031
1032   TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
1033   TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
1034   TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
1035   TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
1036 #ifdef EXPRECISION
1037   TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
1038   TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
1039 #endif
1040 #endif
1041
1042 #ifdef HASWELL
1043
1044 #ifdef DEBUG
1045   fprintf(stderr, "Haswell\n");
1046 #endif
1047
1048   TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
1049   TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
1050   TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
1051   TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
1052 #ifdef EXPRECISION
1053   TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
1054   TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
1055 #endif
1056 #endif
1057
1058 #ifdef SKYLAKEX
1059
1060 #ifdef DEBUG
1061   fprintf(stderr, "SkylakeX\n");
1062 #endif
1063
1064   TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
1065   TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
1066   TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
1067   TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
1068 #ifdef EXPRECISION
1069   TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
1070   TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
1071 #endif
1072 #endif
1073
1074
1075 #ifdef OPTERON
1076
1077 #ifdef DEBUG
1078   fprintf(stderr, "Opteron\n");
1079 #endif
1080
1081   TABLE_NAME.sgemm_p = 224 +  56 * (l2 >> 7);
1082   TABLE_NAME.dgemm_p = 112 +  28 * (l2 >> 7);
1083   TABLE_NAME.cgemm_p = 112 +  28 * (l2 >> 7);
1084   TABLE_NAME.zgemm_p =  56 +  14 * (l2 >> 7);
1085 #ifdef EXPRECISION
1086   TABLE_NAME.qgemm_p =  56 +  14 * (l2 >> 7);
1087   TABLE_NAME.xgemm_p =  28 +   7 * (l2 >> 7);
1088 #endif
1089 #endif
1090
1091 #ifdef BARCELONA
1092
1093 #ifdef DEBUG
1094   fprintf(stderr, "Barcelona\n");
1095 #endif
1096
1097   TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
1098   TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
1099   TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
1100   TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
1101 #ifdef EXPRECISION
1102   TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
1103   TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
1104 #endif
1105 #endif
1106
1107 #ifdef BOBCAT
1108
1109 #ifdef DEBUG
1110   fprintf(stderr, "Bobcate\n");
1111 #endif
1112
1113   TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
1114   TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
1115   TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
1116   TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
1117 #ifdef EXPRECISION
1118   TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
1119   TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
1120 #endif
1121 #endif
1122
1123 #ifdef BULLDOZER
1124
1125 #ifdef DEBUG
1126   fprintf(stderr, "Bulldozer\n");
1127 #endif
1128
1129   TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
1130   TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
1131   TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
1132   TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
1133 #ifdef EXPRECISION
1134   TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
1135   TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
1136 #endif
1137 #endif
1138
1139 #ifdef EXCAVATOR
1140
1141 #ifdef DEBUG
1142   fprintf(stderr, "Excavator\n");
1143 #endif
1144
1145   TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
1146   TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
1147   TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
1148   TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
1149 #ifdef EXPRECISION
1150   TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
1151   TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
1152 #endif
1153 #endif
1154
1155
1156 #ifdef PILEDRIVER
1157
1158 #ifdef DEBUG
1159   fprintf(stderr, "Piledriver\n");
1160 #endif
1161
1162   TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
1163   TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
1164   TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
1165   TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
1166 #ifdef EXPRECISION
1167   TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
1168   TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
1169 #endif
1170 #endif
1171
1172 #ifdef STEAMROLLER
1173
1174 #ifdef DEBUG
1175   fprintf(stderr, "Steamroller\n");
1176 #endif
1177
1178   TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
1179   TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
1180   TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
1181   TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
1182 #ifdef EXPRECISION
1183   TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
1184   TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
1185 #endif
1186 #endif
1187
1188 #ifdef ZEN
1189
1190 #ifdef DEBUG
1191   fprintf(stderr, "Zen\n");
1192 #endif
1193
1194   TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
1195   TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
1196   TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
1197   TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
1198 #ifdef EXPRECISION
1199   TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
1200   TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
1201 #endif
1202 #endif
1203
1204
1205 #ifdef NANO
1206
1207 #ifdef DEBUG
1208   fprintf(stderr, "NANO\n");
1209 #endif
1210
1211   TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
1212   TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
1213   TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
1214   TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
1215
1216
1217
1218 #ifdef EXPRECISION
1219   TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
1220   TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
1221 #endif
1222
1223 #endif
1224
1225
1226 #ifdef CGEMM3M_DEFAULT_P
1227   TABLE_NAME.cgemm3m_p = CGEMM3M_DEFAULT_P;
1228 #else
1229   TABLE_NAME.cgemm3m_p = TABLE_NAME.sgemm_p;
1230 #endif
1231
1232 #ifdef ZGEMM3M_DEFAULT_P
1233   TABLE_NAME.zgemm3m_p = ZGEMM3M_DEFAULT_P;
1234 #else
1235   TABLE_NAME.zgemm3m_p = TABLE_NAME.dgemm_p;
1236 #endif
1237
1238 #ifdef EXPRECISION
1239   TABLE_NAME.xgemm3m_p = TABLE_NAME.qgemm_p;
1240 #endif
1241
1242
1243
1244   TABLE_NAME.sgemm_p = ((TABLE_NAME.sgemm_p + SGEMM_DEFAULT_UNROLL_M - 1)/SGEMM_DEFAULT_UNROLL_M) * SGEMM_DEFAULT_UNROLL_M;
1245   TABLE_NAME.dgemm_p = ((TABLE_NAME.dgemm_p + DGEMM_DEFAULT_UNROLL_M - 1)/DGEMM_DEFAULT_UNROLL_M) * DGEMM_DEFAULT_UNROLL_M;
1246   TABLE_NAME.cgemm_p = ((TABLE_NAME.cgemm_p + CGEMM_DEFAULT_UNROLL_M - 1)/CGEMM_DEFAULT_UNROLL_M) * CGEMM_DEFAULT_UNROLL_M;
1247   TABLE_NAME.zgemm_p = ((TABLE_NAME.zgemm_p + ZGEMM_DEFAULT_UNROLL_M - 1)/ZGEMM_DEFAULT_UNROLL_M) * ZGEMM_DEFAULT_UNROLL_M;
1248
1249 #ifdef CGEMM3M_DEFAULT_UNROLL_M
1250   TABLE_NAME.cgemm3m_p = ((TABLE_NAME.cgemm3m_p + CGEMM3M_DEFAULT_UNROLL_M - 1)/CGEMM3M_DEFAULT_UNROLL_M) * CGEMM3M_DEFAULT_UNROLL_M;
1251 #else
1252   TABLE_NAME.cgemm3m_p = ((TABLE_NAME.cgemm3m_p + SGEMM_DEFAULT_UNROLL_M - 1)/SGEMM_DEFAULT_UNROLL_M) * SGEMM_DEFAULT_UNROLL_M;
1253 #endif
1254
1255 #ifdef ZGEMM3M_DEFAULT_UNROLL_M
1256   TABLE_NAME.zgemm3m_p = ((TABLE_NAME.zgemm3m_p + ZGEMM3M_DEFAULT_UNROLL_M - 1)/ZGEMM3M_DEFAULT_UNROLL_M) * ZGEMM3M_DEFAULT_UNROLL_M;
1257 #else
1258   TABLE_NAME.zgemm3m_p = ((TABLE_NAME.zgemm3m_p + DGEMM_DEFAULT_UNROLL_M - 1)/DGEMM_DEFAULT_UNROLL_M) * DGEMM_DEFAULT_UNROLL_M;
1259 #endif
1260
1261 #ifdef QUAD_PRECISION
1262   TABLE_NAME.qgemm_p = ((TABLE_NAME.qgemm_p + QGEMM_DEFAULT_UNROLL_M - 1)/QGEMM_DEFAULT_UNROLL_M) * QGEMM_DEFAULT_UNROLL_M;
1263   TABLE_NAME.xgemm_p = ((TABLE_NAME.xgemm_p + XGEMM_DEFAULT_UNROLL_M - 1)/XGEMM_DEFAULT_UNROLL_M) * XGEMM_DEFAULT_UNROLL_M;
1264   TABLE_NAME.xgemm3m_p = ((TABLE_NAME.xgemm3m_p + QGEMM_DEFAULT_UNROLL_M - 1)/QGEMM_DEFAULT_UNROLL_M) * QGEMM_DEFAULT_UNROLL_M;
1265 #endif
1266
1267 #ifdef DEBUG
1268   fprintf(stderr, "L2 = %8d DGEMM_P  .. %d\n", l2, TABLE_NAME.dgemm_p);
1269 #endif
1270
1271   TABLE_NAME.sgemm_r = (((BUFFER_SIZE -
1272                                ((TABLE_NAME.sgemm_p * TABLE_NAME.sgemm_q *  4 + TABLE_NAME.offsetA
1273                                  + TABLE_NAME.align) & ~TABLE_NAME.align)
1274                                ) / (TABLE_NAME.sgemm_q *  4) - 15) & ~15);
1275
1276   TABLE_NAME.dgemm_r = (((BUFFER_SIZE -
1277                                ((TABLE_NAME.dgemm_p * TABLE_NAME.dgemm_q *  8 + TABLE_NAME.offsetA
1278                                  + TABLE_NAME.align) & ~TABLE_NAME.align)
1279                                ) / (TABLE_NAME.dgemm_q *  8) - 15) & ~15);
1280
1281 #ifdef EXPRECISION
1282   TABLE_NAME.qgemm_r = (((BUFFER_SIZE -
1283                                ((TABLE_NAME.qgemm_p * TABLE_NAME.qgemm_q * 16 + TABLE_NAME.offsetA
1284                                  + TABLE_NAME.align) & ~TABLE_NAME.align)
1285                                ) / (TABLE_NAME.qgemm_q * 16) - 15) & ~15);
1286 #endif
1287
1288   TABLE_NAME.cgemm_r = (((BUFFER_SIZE -
1289                                ((TABLE_NAME.cgemm_p * TABLE_NAME.cgemm_q *  8 + TABLE_NAME.offsetA
1290                                  + TABLE_NAME.align) & ~TABLE_NAME.align)
1291                                ) / (TABLE_NAME.cgemm_q *  8) - 15) & ~15);
1292
1293   TABLE_NAME.zgemm_r = (((BUFFER_SIZE -
1294                                ((TABLE_NAME.zgemm_p * TABLE_NAME.zgemm_q * 16 + TABLE_NAME.offsetA
1295                                  + TABLE_NAME.align) & ~TABLE_NAME.align)
1296                                ) / (TABLE_NAME.zgemm_q * 16) - 15) & ~15);
1297
1298   TABLE_NAME.cgemm3m_r = (((BUFFER_SIZE -
1299                                ((TABLE_NAME.cgemm3m_p * TABLE_NAME.cgemm3m_q *  8 + TABLE_NAME.offsetA
1300                                  + TABLE_NAME.align) & ~TABLE_NAME.align)
1301                                ) / (TABLE_NAME.cgemm3m_q *  8) - 15) & ~15);
1302
1303   TABLE_NAME.zgemm3m_r = (((BUFFER_SIZE -
1304                                ((TABLE_NAME.zgemm3m_p * TABLE_NAME.zgemm3m_q * 16 + TABLE_NAME.offsetA
1305                                  + TABLE_NAME.align) & ~TABLE_NAME.align)
1306                                ) / (TABLE_NAME.zgemm3m_q * 16) - 15) & ~15);
1307
1308
1309
1310
1311 #ifdef EXPRECISION
1312   TABLE_NAME.xgemm_r = (((BUFFER_SIZE -
1313                                ((TABLE_NAME.xgemm_p * TABLE_NAME.xgemm_q * 32 + TABLE_NAME.offsetA
1314                                  + TABLE_NAME.align) & ~TABLE_NAME.align)
1315                        ) / (TABLE_NAME.xgemm_q * 32) - 15) & ~15);
1316
1317   TABLE_NAME.xgemm3m_r = (((BUFFER_SIZE -
1318                                ((TABLE_NAME.xgemm3m_p * TABLE_NAME.xgemm3m_q * 32 + TABLE_NAME.offsetA
1319                                  + TABLE_NAME.align) & ~TABLE_NAME.align)
1320                        ) / (TABLE_NAME.xgemm3m_q * 32) - 15) & ~15);
1321
1322 #endif
1323
1324
1325
1326 }
1327 #endif //POWER
1328 #endif //defined(ARCH_ARM64)