Merge branch 'develop' of https://github.com/quickwritereader/OpenBLAS into develop
[platform/upstream/openblas.git] / kernel / setparam-ref.c
1 /*********************************************************************/
2 /* Copyright 2009, 2010 The University of Texas at Austin.           */
3 /* All rights reserved.                                              */
4 /*                                                                   */
5 /* Redistribution and use in source and binary forms, with or        */
6 /* without modification, are permitted provided that the following   */
7 /* conditions are met:                                               */
8 /*                                                                   */
9 /*   1. Redistributions of source code must retain the above         */
10 /*      copyright notice, this list of conditions and the following  */
11 /*      disclaimer.                                                  */
12 /*                                                                   */
13 /*   2. Redistributions in binary form must reproduce the above      */
14 /*      copyright notice, this list of conditions and the following  */
15 /*      disclaimer in the documentation and/or other materials       */
16 /*      provided with the distribution.                              */
17 /*                                                                   */
18 /*    THIS  SOFTWARE IS PROVIDED  BY THE  UNIVERSITY OF  TEXAS AT    */
19 /*    AUSTIN  ``AS IS''  AND ANY  EXPRESS OR  IMPLIED WARRANTIES,    */
20 /*    INCLUDING, BUT  NOT LIMITED  TO, THE IMPLIED  WARRANTIES OF    */
21 /*    MERCHANTABILITY  AND FITNESS FOR  A PARTICULAR  PURPOSE ARE    */
22 /*    DISCLAIMED.  IN  NO EVENT SHALL THE UNIVERSITY  OF TEXAS AT    */
23 /*    AUSTIN OR CONTRIBUTORS BE  LIABLE FOR ANY DIRECT, INDIRECT,    */
24 /*    INCIDENTAL,  SPECIAL, EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES    */
25 /*    (INCLUDING, BUT  NOT LIMITED TO,  PROCUREMENT OF SUBSTITUTE    */
26 /*    GOODS  OR  SERVICES; LOSS  OF  USE,  DATA,  OR PROFITS;  OR    */
27 /*    BUSINESS INTERRUPTION) HOWEVER CAUSED  AND ON ANY THEORY OF    */
28 /*    LIABILITY, WHETHER  IN CONTRACT, STRICT  LIABILITY, OR TORT    */
29 /*    (INCLUDING NEGLIGENCE OR OTHERWISE)  ARISING IN ANY WAY OUT    */
30 /*    OF  THE  USE OF  THIS  SOFTWARE,  EVEN  IF ADVISED  OF  THE    */
31 /*    POSSIBILITY OF SUCH DAMAGE.                                    */
32 /*                                                                   */
33 /* The views and conclusions contained in the software and           */
34 /* documentation are those of the authors and should not be          */
35 /* interpreted as representing official policies, either expressed   */
36 /* or implied, of The University of Texas at Austin.                 */
37 /*********************************************************************/
38
39 #include <stdio.h>
40 #include <string.h>
41 #include "common.h"
42
43 #ifdef BUILD_KERNEL
44 #include "kernelTS.h"
45 #endif
46
47 #undef DEBUG
48
49 static void init_parameter(void);
50
51 gotoblas_t TABLE_NAME = {
52   DTB_DEFAULT_ENTRIES ,
53
54   GEMM_DEFAULT_OFFSET_A, GEMM_DEFAULT_OFFSET_B, GEMM_DEFAULT_ALIGN,
55
56   0, 0, 0,
57   SGEMM_DEFAULT_UNROLL_M, SGEMM_DEFAULT_UNROLL_N,
58 #ifdef SGEMM_DEFAULT_UNROLL_MN
59  SGEMM_DEFAULT_UNROLL_MN,
60 #else
61  MAX(SGEMM_DEFAULT_UNROLL_M, SGEMM_DEFAULT_UNROLL_N),
62 #endif
63
64
65 #ifdef HAVE_EXCLUSIVE_CACHE
66   1,
67 #else
68   0,
69 #endif
70
71   samax_kTS,  samin_kTS,  smax_kTS,  smin_kTS,
72   isamax_kTS, isamin_kTS, ismax_kTS, ismin_kTS,
73   snrm2_kTS,  sasum_kTS,  scopy_kTS, sdot_kTS,
74   dsdot_kTS,
75   srot_kTS,   saxpy_kTS,  sscal_kTS, sswap_kTS,
76   sgemv_nTS,  sgemv_tTS, sger_kTS,
77   ssymv_LTS, ssymv_UTS,
78
79   sgemm_kernelTS, sgemm_betaTS,
80 #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
81   sgemm_incopyTS, sgemm_itcopyTS,
82 #else
83   sgemm_oncopyTS, sgemm_otcopyTS,
84 #endif
85   sgemm_oncopyTS, sgemm_otcopyTS,
86   strsm_kernel_LNTS, strsm_kernel_LTTS, strsm_kernel_RNTS, strsm_kernel_RTTS,
87 #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
88   strsm_iunucopyTS, strsm_iunncopyTS, strsm_iutucopyTS, strsm_iutncopyTS,
89   strsm_ilnucopyTS, strsm_ilnncopyTS, strsm_iltucopyTS, strsm_iltncopyTS,
90 #else
91   strsm_ounucopyTS, strsm_ounncopyTS, strsm_outucopyTS, strsm_outncopyTS,
92   strsm_olnucopyTS, strsm_olnncopyTS, strsm_oltucopyTS, strsm_oltncopyTS,
93 #endif
94   strsm_ounucopyTS, strsm_ounncopyTS, strsm_outucopyTS, strsm_outncopyTS,
95   strsm_olnucopyTS, strsm_olnncopyTS, strsm_oltucopyTS, strsm_oltncopyTS,
96   strmm_kernel_RNTS, strmm_kernel_RTTS, strmm_kernel_LNTS, strmm_kernel_LTTS,
97 #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
98   strmm_iunucopyTS, strmm_iunncopyTS, strmm_iutucopyTS, strmm_iutncopyTS,
99   strmm_ilnucopyTS, strmm_ilnncopyTS, strmm_iltucopyTS, strmm_iltncopyTS,
100 #else
101   strmm_ounucopyTS, strmm_ounncopyTS, strmm_outucopyTS, strmm_outncopyTS,
102   strmm_olnucopyTS, strmm_olnncopyTS, strmm_oltucopyTS, strmm_oltncopyTS,
103 #endif
104   strmm_ounucopyTS, strmm_ounncopyTS, strmm_outucopyTS, strmm_outncopyTS,
105   strmm_olnucopyTS, strmm_olnncopyTS, strmm_oltucopyTS, strmm_oltncopyTS,
106 #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
107   ssymm_iutcopyTS, ssymm_iltcopyTS,
108 #else
109   ssymm_outcopyTS, ssymm_oltcopyTS,
110 #endif
111   ssymm_outcopyTS, ssymm_oltcopyTS,
112
113 #ifndef NO_LAPACK
114   sneg_tcopyTS, slaswp_ncopyTS,
115 #else
116   NULL,NULL,
117 #endif
118
119   0, 0, 0,
120   DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N,
121 #ifdef DGEMM_DEFAULT_UNROLL_MN
122  DGEMM_DEFAULT_UNROLL_MN,
123 #else
124  MAX(DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N),
125 #endif
126
127   damax_kTS,  damin_kTS,  dmax_kTS,  dmin_kTS,
128   idamax_kTS, idamin_kTS, idmax_kTS, idmin_kTS,
129   dnrm2_kTS,  dasum_kTS,  dcopy_kTS, ddot_kTS,
130   drot_kTS,   daxpy_kTS,  dscal_kTS, dswap_kTS,
131   dgemv_nTS,  dgemv_tTS,  dger_kTS,
132   dsymv_LTS,  dsymv_UTS,
133
134   dgemm_kernelTS, dgemm_betaTS,
135 #if DGEMM_DEFAULT_UNROLL_M != DGEMM_DEFAULT_UNROLL_N
136   dgemm_incopyTS, dgemm_itcopyTS,
137 #else
138   dgemm_oncopyTS, dgemm_otcopyTS,
139 #endif
140   dgemm_oncopyTS, dgemm_otcopyTS,
141   dtrsm_kernel_LNTS, dtrsm_kernel_LTTS, dtrsm_kernel_RNTS, dtrsm_kernel_RTTS,
142 #if DGEMM_DEFAULT_UNROLL_M != DGEMM_DEFAULT_UNROLL_N
143   dtrsm_iunucopyTS, dtrsm_iunncopyTS, dtrsm_iutucopyTS, dtrsm_iutncopyTS,
144   dtrsm_ilnucopyTS, dtrsm_ilnncopyTS, dtrsm_iltucopyTS, dtrsm_iltncopyTS,
145 #else
146   dtrsm_ounucopyTS, dtrsm_ounncopyTS, dtrsm_outucopyTS, dtrsm_outncopyTS,
147   dtrsm_olnucopyTS, dtrsm_olnncopyTS, dtrsm_oltucopyTS, dtrsm_oltncopyTS,
148 #endif
149   dtrsm_ounucopyTS, dtrsm_ounncopyTS, dtrsm_outucopyTS, dtrsm_outncopyTS,
150   dtrsm_olnucopyTS, dtrsm_olnncopyTS, dtrsm_oltucopyTS, dtrsm_oltncopyTS,
151   dtrmm_kernel_RNTS, dtrmm_kernel_RTTS, dtrmm_kernel_LNTS, dtrmm_kernel_LTTS,
152 #if DGEMM_DEFAULT_UNROLL_M != DGEMM_DEFAULT_UNROLL_N
153   dtrmm_iunucopyTS, dtrmm_iunncopyTS, dtrmm_iutucopyTS, dtrmm_iutncopyTS,
154   dtrmm_ilnucopyTS, dtrmm_ilnncopyTS, dtrmm_iltucopyTS, dtrmm_iltncopyTS,
155 #else
156   dtrmm_ounucopyTS, dtrmm_ounncopyTS, dtrmm_outucopyTS, dtrmm_outncopyTS,
157   dtrmm_olnucopyTS, dtrmm_olnncopyTS, dtrmm_oltucopyTS, dtrmm_oltncopyTS,
158 #endif
159   dtrmm_ounucopyTS, dtrmm_ounncopyTS, dtrmm_outucopyTS, dtrmm_outncopyTS,
160   dtrmm_olnucopyTS, dtrmm_olnncopyTS, dtrmm_oltucopyTS, dtrmm_oltncopyTS,
161 #if DGEMM_DEFAULT_UNROLL_M != DGEMM_DEFAULT_UNROLL_N
162   dsymm_iutcopyTS, dsymm_iltcopyTS,
163 #else
164   dsymm_outcopyTS, dsymm_oltcopyTS,
165 #endif
166   dsymm_outcopyTS, dsymm_oltcopyTS,
167
168 #ifndef NO_LAPACK
169   dneg_tcopyTS, dlaswp_ncopyTS,
170 #else
171   NULL, NULL,
172 #endif
173
174 #ifdef EXPRECISION
175
176   0, 0, 0,
177   QGEMM_DEFAULT_UNROLL_M, QGEMM_DEFAULT_UNROLL_N, MAX(QGEMM_DEFAULT_UNROLL_M, QGEMM_DEFAULT_UNROLL_N),
178
179   qamax_kTS,  qamin_kTS,  qmax_kTS,  qmin_kTS,
180   iqamax_kTS, iqamin_kTS, iqmax_kTS, iqmin_kTS,
181   qnrm2_kTS,  qasum_kTS,  qcopy_kTS, qdot_kTS,
182   qrot_kTS,   qaxpy_kTS,  qscal_kTS, qswap_kTS,
183   qgemv_nTS,  qgemv_tTS,  qger_kTS,
184   qsymv_LTS,  qsymv_UTS,
185
186   qgemm_kernelTS, qgemm_betaTS,
187 #if QGEMM_DEFAULT_UNROLL_M != QGEMM_DEFAULT_UNROLL_N
188   qgemm_incopyTS, qgemm_itcopyTS,
189 #else
190   qgemm_oncopyTS, qgemm_otcopyTS,
191 #endif
192   qgemm_oncopyTS, qgemm_otcopyTS,
193   qtrsm_kernel_LNTS, qtrsm_kernel_LTTS, qtrsm_kernel_RNTS, qtrsm_kernel_RTTS,
194 #if QGEMM_DEFAULT_UNROLL_M != QGEMM_DEFAULT_UNROLL_N
195   qtrsm_iunucopyTS, qtrsm_iunncopyTS, qtrsm_iutucopyTS, qtrsm_iutncopyTS,
196   qtrsm_ilnucopyTS, qtrsm_ilnncopyTS, qtrsm_iltucopyTS, qtrsm_iltncopyTS,
197 #else
198   qtrsm_ounucopyTS, qtrsm_ounncopyTS, qtrsm_outucopyTS, qtrsm_outncopyTS,
199   qtrsm_olnucopyTS, qtrsm_olnncopyTS, qtrsm_oltucopyTS, qtrsm_oltncopyTS,
200 #endif
201   qtrsm_ounucopyTS, qtrsm_ounncopyTS, qtrsm_outucopyTS, qtrsm_outncopyTS,
202   qtrsm_olnucopyTS, qtrsm_olnncopyTS, qtrsm_oltucopyTS, qtrsm_oltncopyTS,
203   qtrmm_kernel_RNTS, qtrmm_kernel_RTTS, qtrmm_kernel_LNTS, qtrmm_kernel_LTTS,
204 #if QGEMM_DEFAULT_UNROLL_M != QGEMM_DEFAULT_UNROLL_N
205   qtrmm_iunucopyTS, qtrmm_iunncopyTS, qtrmm_iutucopyTS, qtrmm_iutncopyTS,
206   qtrmm_ilnucopyTS, qtrmm_ilnncopyTS, qtrmm_iltucopyTS, qtrmm_iltncopyTS,
207 #else
208   qtrmm_ounucopyTS, qtrmm_ounncopyTS, qtrmm_outucopyTS, qtrmm_outncopyTS,
209   qtrmm_olnucopyTS, qtrmm_olnncopyTS, qtrmm_oltucopyTS, qtrmm_oltncopyTS,
210 #endif
211   qtrmm_ounucopyTS, qtrmm_ounncopyTS, qtrmm_outucopyTS, qtrmm_outncopyTS,
212   qtrmm_olnucopyTS, qtrmm_olnncopyTS, qtrmm_oltucopyTS, qtrmm_oltncopyTS,
213 #if QGEMM_DEFAULT_UNROLL_M != QGEMM_DEFAULT_UNROLL_N
214   qsymm_iutcopyTS, qsymm_iltcopyTS,
215 #else
216   qsymm_outcopyTS, qsymm_oltcopyTS,
217 #endif
218   qsymm_outcopyTS, qsymm_oltcopyTS,
219
220 #ifndef NO_LAPACK
221   qneg_tcopyTS, qlaswp_ncopyTS,
222 #else
223   NULL, NULL,
224 #endif
225
226 #endif
227
228   0, 0, 0,
229   CGEMM_DEFAULT_UNROLL_M, CGEMM_DEFAULT_UNROLL_N,
230 #ifdef CGEMM_DEFAULT_UNROLL_MN
231  CGEMM_DEFAULT_UNROLL_MN,
232 #else
233  MAX(CGEMM_DEFAULT_UNROLL_M, CGEMM_DEFAULT_UNROLL_N),
234 #endif
235
236   camax_kTS, camin_kTS, icamax_kTS, icamin_kTS,
237   cnrm2_kTS, casum_kTS, ccopy_kTS,
238   cdotu_kTS, cdotc_kTS, csrot_kTS,
239   caxpy_kTS, caxpyc_kTS, cscal_kTS, cswap_kTS,
240
241   cgemv_nTS, cgemv_tTS, cgemv_rTS, cgemv_cTS,
242   cgemv_oTS, cgemv_uTS, cgemv_sTS, cgemv_dTS,
243   cgeru_kTS, cgerc_kTS, cgerv_kTS, cgerd_kTS,
244   csymv_LTS, csymv_UTS,
245   chemv_LTS, chemv_UTS, chemv_MTS, chemv_VTS,
246
247   cgemm_kernel_nTS, cgemm_kernel_lTS, cgemm_kernel_rTS, cgemm_kernel_bTS,
248   cgemm_betaTS,
249
250 #if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N
251   cgemm_incopyTS, cgemm_itcopyTS,
252 #else
253   cgemm_oncopyTS, cgemm_otcopyTS,
254 #endif
255   cgemm_oncopyTS, cgemm_otcopyTS,
256
257   ctrsm_kernel_LNTS, ctrsm_kernel_LTTS, ctrsm_kernel_LRTS, ctrsm_kernel_LCTS,
258   ctrsm_kernel_RNTS, ctrsm_kernel_RTTS, ctrsm_kernel_RRTS, ctrsm_kernel_RCTS,
259
260 #if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N
261   ctrsm_iunucopyTS,  ctrsm_iunncopyTS,  ctrsm_iutucopyTS,  ctrsm_iutncopyTS,
262   ctrsm_ilnucopyTS,  ctrsm_ilnncopyTS,  ctrsm_iltucopyTS,  ctrsm_iltncopyTS,
263 #else
264   ctrsm_ounucopyTS,  ctrsm_ounncopyTS,  ctrsm_outucopyTS,  ctrsm_outncopyTS,
265   ctrsm_olnucopyTS,  ctrsm_olnncopyTS,  ctrsm_oltucopyTS,  ctrsm_oltncopyTS,
266 #endif
267   ctrsm_ounucopyTS,  ctrsm_ounncopyTS,  ctrsm_outucopyTS,  ctrsm_outncopyTS,
268   ctrsm_olnucopyTS,  ctrsm_olnncopyTS,  ctrsm_oltucopyTS,  ctrsm_oltncopyTS,
269
270   ctrmm_kernel_RNTS,  ctrmm_kernel_RTTS,  ctrmm_kernel_RRTS,  ctrmm_kernel_RCTS,
271   ctrmm_kernel_LNTS,  ctrmm_kernel_LTTS,  ctrmm_kernel_LRTS,  ctrmm_kernel_LCTS,
272
273 #if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N
274   ctrmm_iunucopyTS,  ctrmm_iunncopyTS,  ctrmm_iutucopyTS,  ctrmm_iutncopyTS,
275   ctrmm_ilnucopyTS,  ctrmm_ilnncopyTS,  ctrmm_iltucopyTS,  ctrmm_iltncopyTS,
276 #else
277   ctrmm_ounucopyTS,  ctrmm_ounncopyTS,  ctrmm_outucopyTS,  ctrmm_outncopyTS,
278   ctrmm_olnucopyTS,  ctrmm_olnncopyTS,  ctrmm_oltucopyTS,  ctrmm_oltncopyTS,
279 #endif
280   ctrmm_ounucopyTS,  ctrmm_ounncopyTS,  ctrmm_outucopyTS,  ctrmm_outncopyTS,
281   ctrmm_olnucopyTS,  ctrmm_olnncopyTS,  ctrmm_oltucopyTS,  ctrmm_oltncopyTS,
282
283 #if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N
284   csymm_iutcopyTS,  csymm_iltcopyTS,
285 #else
286   csymm_outcopyTS,  csymm_oltcopyTS,
287 #endif
288   csymm_outcopyTS,  csymm_oltcopyTS,
289 #if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N
290   chemm_iutcopyTS,  chemm_iltcopyTS,
291 #else
292   chemm_outcopyTS,  chemm_oltcopyTS,
293 #endif
294   chemm_outcopyTS,  chemm_oltcopyTS,
295
296   0, 0, 0,
297
298 #if defined(USE_GEMM3M)
299 #ifdef CGEMM3M_DEFAULT_UNROLL_M
300   CGEMM3M_DEFAULT_UNROLL_M, CGEMM3M_DEFAULT_UNROLL_N, MAX(CGEMM3M_DEFAULT_UNROLL_M, CGEMM3M_DEFAULT_UNROLL_N),
301 #else
302   SGEMM_DEFAULT_UNROLL_M, SGEMM_DEFAULT_UNROLL_N, MAX(SGEMM_DEFAULT_UNROLL_M, SGEMM_DEFAULT_UNROLL_N),
303 #endif
304
305
306   cgemm3m_kernelTS,
307
308   cgemm3m_incopybTS,  cgemm3m_incopyrTS,
309   cgemm3m_incopyiTS,  cgemm3m_itcopybTS,
310   cgemm3m_itcopyrTS,  cgemm3m_itcopyiTS,
311   cgemm3m_oncopybTS,  cgemm3m_oncopyrTS,
312   cgemm3m_oncopyiTS,  cgemm3m_otcopybTS,
313   cgemm3m_otcopyrTS,  cgemm3m_otcopyiTS,
314
315   csymm3m_iucopybTS,  csymm3m_ilcopybTS,
316   csymm3m_iucopyrTS,  csymm3m_ilcopyrTS,
317   csymm3m_iucopyiTS,  csymm3m_ilcopyiTS,
318   csymm3m_oucopybTS,  csymm3m_olcopybTS,
319   csymm3m_oucopyrTS,  csymm3m_olcopyrTS,
320   csymm3m_oucopyiTS,  csymm3m_olcopyiTS,
321
322   chemm3m_iucopybTS,  chemm3m_ilcopybTS,
323   chemm3m_iucopyrTS,  chemm3m_ilcopyrTS,
324   chemm3m_iucopyiTS,  chemm3m_ilcopyiTS,
325
326   chemm3m_oucopybTS,  chemm3m_olcopybTS,
327   chemm3m_oucopyrTS,  chemm3m_olcopyrTS,
328   chemm3m_oucopyiTS,  chemm3m_olcopyiTS,
329 #else
330   0, 0, 0,
331
332   NULL,
333
334   NULL, NULL,
335   NULL, NULL,
336   NULL, NULL,
337   NULL, NULL,
338   NULL, NULL,
339   NULL, NULL,
340
341   NULL, NULL,
342   NULL, NULL,
343   NULL, NULL,
344   NULL, NULL,
345   NULL, NULL,
346   NULL, NULL,
347
348   NULL, NULL,
349   NULL, NULL,
350   NULL, NULL,
351
352   NULL, NULL,
353   NULL, NULL,
354   NULL, NULL,
355 #endif
356
357 #ifndef NO_LAPACK
358   cneg_tcopyTS, claswp_ncopyTS,
359 #else
360   NULL, NULL,
361 #endif
362
363   0, 0, 0,
364   ZGEMM_DEFAULT_UNROLL_M, ZGEMM_DEFAULT_UNROLL_N,
365 #ifdef ZGEMM_DEFAULT_UNROLL_MN
366  ZGEMM_DEFAULT_UNROLL_MN,
367 #else
368  MAX(ZGEMM_DEFAULT_UNROLL_M, ZGEMM_DEFAULT_UNROLL_N),
369 #endif
370
371   zamax_kTS, zamin_kTS, izamax_kTS, izamin_kTS,
372   znrm2_kTS, zasum_kTS, zcopy_kTS,
373   zdotu_kTS, zdotc_kTS, zdrot_kTS,
374   zaxpy_kTS, zaxpyc_kTS, zscal_kTS, zswap_kTS,
375
376   zgemv_nTS, zgemv_tTS, zgemv_rTS, zgemv_cTS,
377   zgemv_oTS, zgemv_uTS, zgemv_sTS, zgemv_dTS,
378   zgeru_kTS, zgerc_kTS, zgerv_kTS, zgerd_kTS,
379   zsymv_LTS, zsymv_UTS,
380   zhemv_LTS, zhemv_UTS, zhemv_MTS, zhemv_VTS,
381
382   zgemm_kernel_nTS, zgemm_kernel_lTS, zgemm_kernel_rTS, zgemm_kernel_bTS,
383   zgemm_betaTS,
384
385 #if ZGEMM_DEFAULT_UNROLL_M != ZGEMM_DEFAULT_UNROLL_N
386   zgemm_incopyTS, zgemm_itcopyTS,
387 #else
388   zgemm_oncopyTS, zgemm_otcopyTS,
389 #endif
390   zgemm_oncopyTS, zgemm_otcopyTS,
391
392   ztrsm_kernel_LNTS, ztrsm_kernel_LTTS, ztrsm_kernel_LRTS, ztrsm_kernel_LCTS,
393   ztrsm_kernel_RNTS, ztrsm_kernel_RTTS, ztrsm_kernel_RRTS, ztrsm_kernel_RCTS,
394
395 #if ZGEMM_DEFAULT_UNROLL_M != ZGEMM_DEFAULT_UNROLL_N
396   ztrsm_iunucopyTS,  ztrsm_iunncopyTS,  ztrsm_iutucopyTS,  ztrsm_iutncopyTS,
397   ztrsm_ilnucopyTS,  ztrsm_ilnncopyTS,  ztrsm_iltucopyTS,  ztrsm_iltncopyTS,
398 #else
399   ztrsm_ounucopyTS,  ztrsm_ounncopyTS,  ztrsm_outucopyTS,  ztrsm_outncopyTS,
400   ztrsm_olnucopyTS,  ztrsm_olnncopyTS,  ztrsm_oltucopyTS,  ztrsm_oltncopyTS,
401 #endif
402   ztrsm_ounucopyTS,  ztrsm_ounncopyTS,  ztrsm_outucopyTS,  ztrsm_outncopyTS,
403   ztrsm_olnucopyTS,  ztrsm_olnncopyTS,  ztrsm_oltucopyTS,  ztrsm_oltncopyTS,
404
405   ztrmm_kernel_RNTS,  ztrmm_kernel_RTTS,  ztrmm_kernel_RRTS,  ztrmm_kernel_RCTS,
406   ztrmm_kernel_LNTS,  ztrmm_kernel_LTTS,  ztrmm_kernel_LRTS,  ztrmm_kernel_LCTS,
407
408 #if ZGEMM_DEFAULT_UNROLL_M != ZGEMM_DEFAULT_UNROLL_N
409   ztrmm_iunucopyTS,  ztrmm_iunncopyTS,  ztrmm_iutucopyTS,  ztrmm_iutncopyTS,
410   ztrmm_ilnucopyTS,  ztrmm_ilnncopyTS,  ztrmm_iltucopyTS,  ztrmm_iltncopyTS,
411 #else
412   ztrmm_ounucopyTS,  ztrmm_ounncopyTS,  ztrmm_outucopyTS,  ztrmm_outncopyTS,
413   ztrmm_olnucopyTS,  ztrmm_olnncopyTS,  ztrmm_oltucopyTS,  ztrmm_oltncopyTS,
414 #endif
415   ztrmm_ounucopyTS,  ztrmm_ounncopyTS,  ztrmm_outucopyTS,  ztrmm_outncopyTS,
416   ztrmm_olnucopyTS,  ztrmm_olnncopyTS,  ztrmm_oltucopyTS,  ztrmm_oltncopyTS,
417
418 #if ZGEMM_DEFAULT_UNROLL_M != ZGEMM_DEFAULT_UNROLL_N
419   zsymm_iutcopyTS,  zsymm_iltcopyTS,
420 #else
421   zsymm_outcopyTS,  zsymm_oltcopyTS,
422 #endif
423   zsymm_outcopyTS,  zsymm_oltcopyTS,
424 #if ZGEMM_DEFAULT_UNROLL_M != ZGEMM_DEFAULT_UNROLL_N
425   zhemm_iutcopyTS,  zhemm_iltcopyTS,
426 #else
427   zhemm_outcopyTS,  zhemm_oltcopyTS,
428 #endif
429   zhemm_outcopyTS,  zhemm_oltcopyTS,
430
431   0, 0, 0,
432 #if defined(USE_GEMM3M)
433 #ifdef ZGEMM3M_DEFAULT_UNROLL_M
434   ZGEMM3M_DEFAULT_UNROLL_M, ZGEMM3M_DEFAULT_UNROLL_N, MAX(ZGEMM3M_DEFAULT_UNROLL_M, ZGEMM3M_DEFAULT_UNROLL_N),
435 #else
436   DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N, MAX(DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N),
437 #endif
438
439
440   zgemm3m_kernelTS,
441
442   zgemm3m_incopybTS,  zgemm3m_incopyrTS,
443   zgemm3m_incopyiTS,  zgemm3m_itcopybTS,
444   zgemm3m_itcopyrTS,  zgemm3m_itcopyiTS,
445   zgemm3m_oncopybTS,  zgemm3m_oncopyrTS,
446   zgemm3m_oncopyiTS,  zgemm3m_otcopybTS,
447   zgemm3m_otcopyrTS,  zgemm3m_otcopyiTS,
448
449   zsymm3m_iucopybTS,  zsymm3m_ilcopybTS,
450   zsymm3m_iucopyrTS,  zsymm3m_ilcopyrTS,
451   zsymm3m_iucopyiTS,  zsymm3m_ilcopyiTS,
452   zsymm3m_oucopybTS,  zsymm3m_olcopybTS,
453   zsymm3m_oucopyrTS,  zsymm3m_olcopyrTS,
454   zsymm3m_oucopyiTS,  zsymm3m_olcopyiTS,
455
456   zhemm3m_iucopybTS,  zhemm3m_ilcopybTS,
457   zhemm3m_iucopyrTS,  zhemm3m_ilcopyrTS,
458   zhemm3m_iucopyiTS,  zhemm3m_ilcopyiTS,
459
460   zhemm3m_oucopybTS,  zhemm3m_olcopybTS,
461   zhemm3m_oucopyrTS,  zhemm3m_olcopyrTS,
462   zhemm3m_oucopyiTS,  zhemm3m_olcopyiTS,
463 #else
464   0, 0, 0,
465
466   NULL,
467
468   NULL, NULL,
469   NULL, NULL,
470   NULL, NULL,
471   NULL, NULL,
472   NULL, NULL,
473   NULL, NULL,
474
475   NULL, NULL,
476   NULL, NULL,
477   NULL, NULL,
478   NULL, NULL,
479   NULL, NULL,
480   NULL, NULL,
481
482   NULL, NULL,
483   NULL, NULL,
484   NULL, NULL,
485
486   NULL, NULL,
487   NULL, NULL,
488   NULL, NULL,
489 #endif
490
491 #ifndef NO_LAPACK
492   zneg_tcopyTS, zlaswp_ncopyTS,
493 #else
494   NULL, NULL,
495 #endif
496
497 #ifdef EXPRECISION
498
499   0, 0, 0,
500   XGEMM_DEFAULT_UNROLL_M, XGEMM_DEFAULT_UNROLL_N, MAX(XGEMM_DEFAULT_UNROLL_M, XGEMM_DEFAULT_UNROLL_N),
501
502   xamax_kTS, xamin_kTS, ixamax_kTS, ixamin_kTS,
503   xnrm2_kTS, xasum_kTS, xcopy_kTS,
504   xdotu_kTS, xdotc_kTS, xqrot_kTS,
505   xaxpy_kTS, xaxpyc_kTS, xscal_kTS, xswap_kTS,
506
507   xgemv_nTS, xgemv_tTS, xgemv_rTS, xgemv_cTS,
508   xgemv_oTS, xgemv_uTS, xgemv_sTS, xgemv_dTS,
509   xgeru_kTS, xgerc_kTS, xgerv_kTS, xgerd_kTS,
510   xsymv_LTS, xsymv_UTS,
511   xhemv_LTS, xhemv_UTS, xhemv_MTS, xhemv_VTS,
512
513   xgemm_kernel_nTS, xgemm_kernel_lTS, xgemm_kernel_rTS, xgemm_kernel_bTS,
514   xgemm_betaTS,
515
516 #if XGEMM_DEFAULT_UNROLL_M != XGEMM_DEFAULT_UNROLL_N
517   xgemm_incopyTS, xgemm_itcopyTS,
518 #else
519   xgemm_oncopyTS, xgemm_otcopyTS,
520 #endif
521   xgemm_oncopyTS, xgemm_otcopyTS,
522
523   xtrsm_kernel_LNTS, xtrsm_kernel_LTTS, xtrsm_kernel_LRTS, xtrsm_kernel_LCTS,
524   xtrsm_kernel_RNTS, xtrsm_kernel_RTTS, xtrsm_kernel_RRTS, xtrsm_kernel_RCTS,
525
526 #if XGEMM_DEFAULT_UNROLL_M != XGEMM_DEFAULT_UNROLL_N
527   xtrsm_iunucopyTS,  xtrsm_iunncopyTS,  xtrsm_iutucopyTS,  xtrsm_iutncopyTS,
528   xtrsm_ilnucopyTS,  xtrsm_ilnncopyTS,  xtrsm_iltucopyTS,  xtrsm_iltncopyTS,
529 #else
530   xtrsm_ounucopyTS,  xtrsm_ounncopyTS,  xtrsm_outucopyTS,  xtrsm_outncopyTS,
531   xtrsm_olnucopyTS,  xtrsm_olnncopyTS,  xtrsm_oltucopyTS,  xtrsm_oltncopyTS,
532 #endif
533   xtrsm_ounucopyTS,  xtrsm_ounncopyTS,  xtrsm_outucopyTS,  xtrsm_outncopyTS,
534   xtrsm_olnucopyTS,  xtrsm_olnncopyTS,  xtrsm_oltucopyTS,  xtrsm_oltncopyTS,
535
536   xtrmm_kernel_RNTS,  xtrmm_kernel_RTTS,  xtrmm_kernel_RRTS,  xtrmm_kernel_RCTS,
537   xtrmm_kernel_LNTS,  xtrmm_kernel_LTTS,  xtrmm_kernel_LRTS,  xtrmm_kernel_LCTS,
538
539 #if XGEMM_DEFAULT_UNROLL_M != XGEMM_DEFAULT_UNROLL_N
540   xtrmm_iunucopyTS,  xtrmm_iunncopyTS,  xtrmm_iutucopyTS,  xtrmm_iutncopyTS,
541   xtrmm_ilnucopyTS,  xtrmm_ilnncopyTS,  xtrmm_iltucopyTS,  xtrmm_iltncopyTS,
542 #else
543   xtrmm_ounucopyTS,  xtrmm_ounncopyTS,  xtrmm_outucopyTS,  xtrmm_outncopyTS,
544   xtrmm_olnucopyTS,  xtrmm_olnncopyTS,  xtrmm_oltucopyTS,  xtrmm_oltncopyTS,
545 #endif
546   xtrmm_ounucopyTS,  xtrmm_ounncopyTS,  xtrmm_outucopyTS,  xtrmm_outncopyTS,
547   xtrmm_olnucopyTS,  xtrmm_olnncopyTS,  xtrmm_oltucopyTS,  xtrmm_oltncopyTS,
548
549 #if XGEMM_DEFAULT_UNROLL_M != XGEMM_DEFAULT_UNROLL_N
550   xsymm_iutcopyTS,  xsymm_iltcopyTS,
551 #else
552   xsymm_outcopyTS,  xsymm_oltcopyTS,
553 #endif
554   xsymm_outcopyTS,  xsymm_oltcopyTS,
555 #if XGEMM_DEFAULT_UNROLL_M != XGEMM_DEFAULT_UNROLL_N
556   xhemm_iutcopyTS,  xhemm_iltcopyTS,
557 #else
558   xhemm_outcopyTS,  xhemm_oltcopyTS,
559 #endif
560   xhemm_outcopyTS,  xhemm_oltcopyTS,
561
562   0, 0, 0,
563 #if defined(USE_GEMM3M)
564   QGEMM_DEFAULT_UNROLL_M, QGEMM_DEFAULT_UNROLL_N, MAX(QGEMM_DEFAULT_UNROLL_M, QGEMM_DEFAULT_UNROLL_N),
565
566   xgemm3m_kernelTS,
567
568   xgemm3m_incopybTS,  xgemm3m_incopyrTS,
569   xgemm3m_incopyiTS,  xgemm3m_itcopybTS,
570   xgemm3m_itcopyrTS,  xgemm3m_itcopyiTS,
571   xgemm3m_oncopybTS,  xgemm3m_oncopyrTS,
572   xgemm3m_oncopyiTS,  xgemm3m_otcopybTS,
573   xgemm3m_otcopyrTS,  xgemm3m_otcopyiTS,
574
575   xsymm3m_iucopybTS,  xsymm3m_ilcopybTS,
576   xsymm3m_iucopyrTS,  xsymm3m_ilcopyrTS,
577   xsymm3m_iucopyiTS,  xsymm3m_ilcopyiTS,
578   xsymm3m_oucopybTS,  xsymm3m_olcopybTS,
579   xsymm3m_oucopyrTS,  xsymm3m_olcopyrTS,
580   xsymm3m_oucopyiTS,  xsymm3m_olcopyiTS,
581
582   xhemm3m_iucopybTS,  xhemm3m_ilcopybTS,
583   xhemm3m_iucopyrTS,  xhemm3m_ilcopyrTS,
584   xhemm3m_iucopyiTS,  xhemm3m_ilcopyiTS,
585
586   xhemm3m_oucopybTS,  xhemm3m_olcopybTS,
587   xhemm3m_oucopyrTS,  xhemm3m_olcopyrTS,
588   xhemm3m_oucopyiTS,  xhemm3m_olcopyiTS,
589 #else
590   0, 0, 0,
591
592   NULL,
593
594   NULL, NULL,
595   NULL, NULL,
596   NULL, NULL,
597   NULL, NULL,
598   NULL, NULL,
599   NULL, NULL,
600
601   NULL, NULL,
602   NULL, NULL,
603   NULL, NULL,
604   NULL, NULL,
605   NULL, NULL,
606   NULL, NULL,
607
608   NULL, NULL,
609   NULL, NULL,
610   NULL, NULL,
611
612   NULL, NULL,
613   NULL, NULL,
614   NULL, NULL,
615 #endif
616
617 #ifndef NO_LAPACK
618   xneg_tcopyTS, xlaswp_ncopyTS,
619 #else
620   NULL, NULL,
621 #endif
622
623 #endif
624
625   init_parameter,
626
627   SNUMOPT, DNUMOPT, QNUMOPT,
628
629   saxpby_kTS, daxpby_kTS, caxpby_kTS, zaxpby_kTS,
630
631   somatcopy_k_cnTS, somatcopy_k_ctTS, somatcopy_k_rnTS, somatcopy_k_rtTS,
632   domatcopy_k_cnTS, domatcopy_k_ctTS, domatcopy_k_rnTS, domatcopy_k_rtTS,
633   comatcopy_k_cnTS, comatcopy_k_ctTS, comatcopy_k_rnTS, comatcopy_k_rtTS,
634   comatcopy_k_cncTS, comatcopy_k_ctcTS, comatcopy_k_rncTS, comatcopy_k_rtcTS,
635   zomatcopy_k_cnTS, zomatcopy_k_ctTS, zomatcopy_k_rnTS, zomatcopy_k_rtTS,
636   zomatcopy_k_cncTS, zomatcopy_k_ctcTS, zomatcopy_k_rncTS, zomatcopy_k_rtcTS,
637
638   simatcopy_k_cnTS, simatcopy_k_ctTS, simatcopy_k_rnTS, simatcopy_k_rtTS,
639   dimatcopy_k_cnTS, dimatcopy_k_ctTS, dimatcopy_k_rnTS, dimatcopy_k_rtTS,
640   cimatcopy_k_cnTS, cimatcopy_k_ctTS, cimatcopy_k_rnTS, cimatcopy_k_rtTS,
641   cimatcopy_k_cncTS, cimatcopy_k_ctcTS, cimatcopy_k_rncTS, cimatcopy_k_rtcTS,
642   zimatcopy_k_cnTS, zimatcopy_k_ctTS, zimatcopy_k_rnTS, zimatcopy_k_rtTS,
643   zimatcopy_k_cncTS, zimatcopy_k_ctcTS, zimatcopy_k_rncTS, zimatcopy_k_rtcTS,
644
645   sgeadd_kTS, dgeadd_kTS, cgeadd_kTS, zgeadd_kTS
646
647 };
648
649 #if defined(ARCH_ARM64)
650 static void init_parameter(void) {
651   TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
652   TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
653   TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
654   TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
655
656   TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
657   TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
658   TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
659   TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
660
661   TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R;
662   TABLE_NAME.dgemm_r = DGEMM_DEFAULT_R;
663   TABLE_NAME.cgemm_r = CGEMM_DEFAULT_R;
664   TABLE_NAME.zgemm_r = ZGEMM_DEFAULT_R;
665
666 #ifdef EXPRECISION
667   TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
668   TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
669   TABLE_NAME.qgemm_q = QGEMM_DEFAULT_Q;
670   TABLE_NAME.xgemm_q = XGEMM_DEFAULT_Q;
671   TABLE_NAME.qgemm_r = QGEMM_DEFAULT_R;
672   TABLE_NAME.xgemm_r = XGEMM_DEFAULT_R;
673 #endif
674
675 #if defined(USE_GEMM3M)
676 #ifdef CGEMM3M_DEFAULT_P
677   TABLE_NAME.cgemm3m_p = CGEMM3M_DEFAULT_P;
678 #else
679   TABLE_NAME.cgemm3m_p = TABLE_NAME.sgemm_p;
680 #endif
681
682 #ifdef ZGEMM3M_DEFAULT_P
683   TABLE_NAME.zgemm3m_p = ZGEMM3M_DEFAULT_P;
684 #else
685   TABLE_NAME.zgemm3m_p = TABLE_NAME.dgemm_p;
686 #endif
687
688 #ifdef CGEMM3M_DEFAULT_Q
689   TABLE_NAME.cgemm3m_q = CGEMM3M_DEFAULT_Q;
690 #else
691   TABLE_NAME.cgemm3m_q = TABLE_NAME.sgemm_q;
692 #endif
693
694 #ifdef ZGEMM3M_DEFAULT_Q
695   TABLE_NAME.zgemm3m_q = ZGEMM3M_DEFAULT_Q;
696 #else
697   TABLE_NAME.zgemm3m_q = TABLE_NAME.dgemm_q;
698 #endif
699
700 #ifdef CGEMM3M_DEFAULT_R
701   TABLE_NAME.cgemm3m_r = CGEMM3M_DEFAULT_R;
702 #else
703   TABLE_NAME.cgemm3m_r = TABLE_NAME.sgemm_r;
704 #endif
705
706 #ifdef ZGEMM3M_DEFAULT_R
707   TABLE_NAME.zgemm3m_r = ZGEMM3M_DEFAULT_R;
708 #else
709   TABLE_NAME.zgemm3m_r = TABLE_NAME.dgemm_r;
710 #endif
711
712 #ifdef EXPRECISION
713   TABLE_NAME.xgemm3m_p = TABLE_NAME.qgemm_p;
714   TABLE_NAME.xgemm3m_q = TABLE_NAME.qgemm_q;
715   TABLE_NAME.xgemm3m_r = TABLE_NAME.qgemm_r;
716 #endif
717 #endif
718
719 }
720 #else // defined(ARCH_ARM64)
721 #ifdef ARCH_X86
722 static int get_l2_size_old(void){
723   int i, eax, ebx, ecx, edx, cpuid_level;
724   int info[15];
725
726   cpuid(2, &eax, &ebx, &ecx, &edx);
727
728   info[ 0] = BITMASK(eax,  8, 0xff);
729   info[ 1] = BITMASK(eax, 16, 0xff);
730   info[ 2] = BITMASK(eax, 24, 0xff);
731
732   info[ 3] = BITMASK(ebx,  0, 0xff);
733   info[ 4] = BITMASK(ebx,  8, 0xff);
734   info[ 5] = BITMASK(ebx, 16, 0xff);
735   info[ 6] = BITMASK(ebx, 24, 0xff);
736
737   info[ 7] = BITMASK(ecx,  0, 0xff);
738   info[ 8] = BITMASK(ecx,  8, 0xff);
739   info[ 9] = BITMASK(ecx, 16, 0xff);
740   info[10] = BITMASK(ecx, 24, 0xff);
741
742   info[11] = BITMASK(edx,  0, 0xff);
743   info[12] = BITMASK(edx,  8, 0xff);
744   info[13] = BITMASK(edx, 16, 0xff);
745   info[14] = BITMASK(edx, 24, 0xff);
746
747   for (i = 0; i < 15; i++){
748
749     switch (info[i]){
750
751       /* This table is from http://www.sandpile.org/ia32/cpuid.htm */
752
753     case 0x1a :
754       return 96;
755
756     case 0x39 :
757     case 0x3b :
758     case 0x41 :
759     case 0x79 :
760     case 0x81 :
761       return 128;
762
763     case 0x3a :
764       return 192;
765
766     case 0x21 :
767     case 0x3c :
768     case 0x42 :
769     case 0x7a :
770     case 0x7e :
771     case 0x82 :
772       return 256;
773
774     case 0x3d :
775       return 384;
776
777     case 0x3e :
778     case 0x43 :
779     case 0x7b :
780     case 0x7f :
781     case 0x83 :
782     case 0x86 :
783       return 512;
784
785     case 0x44 :
786     case 0x78 :
787     case 0x7c :
788     case 0x84 :
789     case 0x87 :
790       return 1024;
791
792     case 0x45 :
793     case 0x7d :
794     case 0x85 :
795       return 2048;
796
797     case 0x48 :
798       return 3184;
799
800     case 0x49 :
801       return 4096;
802
803     case 0x4e :
804       return 6144;
805     }
806   }
807 //  return 0;
808 fprintf (stderr,"OpenBLAS WARNING - could not determine the L2 cache size on this system, assuming 256k\n");
809 return 256;
810 }
811 #endif
812
813 static __inline__ int get_l2_size(void){
814
815   int eax, ebx, ecx, edx, l2;
816
817   cpuid(0x80000006, &eax, &ebx, &ecx, &edx);
818
819   l2 = BITMASK(ecx, 16, 0xffff);
820
821 #ifndef ARCH_X86
822   if (l2 <= 0) {
823      fprintf (stderr,"OpenBLAS WARNING - could not determine the L2 cache size on this system, assuming 256k\n");
824      return 256;
825   }
826   return l2;
827
828 #else
829
830   if (l2 > 0) return l2;
831
832   return get_l2_size_old();
833 #endif
834 }
835
836 static __inline__ int get_l3_size(void){
837
838   int eax, ebx, ecx, edx;
839
840   cpuid(0x80000006, &eax, &ebx, &ecx, &edx);
841
842   return BITMASK(edx, 18, 0x3fff) * 512;
843 }
844
845
846 static void init_parameter(void) {
847
848   int l2 = get_l2_size();
849
850   (void) l2; /* dirty trick to suppress unused variable warning for targets */
851              /* where the GEMM unrolling parameters do not depend on l2 */
852   
853   TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
854   TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
855   TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
856   TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
857
858 #ifdef CGEMM3M_DEFAULT_Q
859   TABLE_NAME.cgemm3m_q = CGEMM3M_DEFAULT_Q;
860 #else
861   TABLE_NAME.cgemm3m_q = SGEMM_DEFAULT_Q;
862 #endif
863
864 #ifdef ZGEMM3M_DEFAULT_Q
865   TABLE_NAME.zgemm3m_q = ZGEMM3M_DEFAULT_Q;
866 #else
867   TABLE_NAME.zgemm3m_q = DGEMM_DEFAULT_Q;
868 #endif
869
870 #ifdef EXPRECISION
871   TABLE_NAME.qgemm_q = QGEMM_DEFAULT_Q;
872   TABLE_NAME.xgemm_q = XGEMM_DEFAULT_Q;
873   TABLE_NAME.xgemm3m_q = QGEMM_DEFAULT_Q;
874 #endif
875
876 #if defined(CORE_KATMAI)  || defined(CORE_COPPERMINE) || defined(CORE_BANIAS) || defined(CORE_YONAH) || defined(CORE_ATHLON)
877
878 #ifdef DEBUG
879   fprintf(stderr, "Katmai, Coppermine, Banias, Athlon\n");
880 #endif
881
882   TABLE_NAME.sgemm_p =  64 * (l2 >> 7);
883   TABLE_NAME.dgemm_p =  32 * (l2 >> 7);
884   TABLE_NAME.cgemm_p =  32 * (l2 >> 7);
885   TABLE_NAME.zgemm_p =  16 * (l2 >> 7);
886 #ifdef EXPRECISION
887   TABLE_NAME.qgemm_p =  16 * (l2 >> 7);
888   TABLE_NAME.xgemm_p =   8 * (l2 >> 7);
889 #endif
890 #endif
891
892 #ifdef CORE_NORTHWOOD
893
894 #ifdef DEBUG
895   fprintf(stderr, "Northwood\n");
896 #endif
897
898   TABLE_NAME.sgemm_p =  96 * (l2 >> 7);
899   TABLE_NAME.dgemm_p =  48 * (l2 >> 7);
900   TABLE_NAME.cgemm_p =  48 * (l2 >> 7);
901   TABLE_NAME.zgemm_p =  24 * (l2 >> 7);
902 #ifdef EXPRECISION
903   TABLE_NAME.qgemm_p =  24 * (l2 >> 7);
904   TABLE_NAME.xgemm_p =  12 * (l2 >> 7);
905 #endif
906 #endif
907
908 #ifdef ATOM
909
910 #ifdef DEBUG
911   fprintf(stderr, "Atom\n");
912 #endif
913
914   TABLE_NAME.sgemm_p = 256;
915   TABLE_NAME.dgemm_p = 128;
916   TABLE_NAME.cgemm_p = 128;
917   TABLE_NAME.zgemm_p =  64;
918 #ifdef EXPRECISION
919   TABLE_NAME.qgemm_p =  64;
920   TABLE_NAME.xgemm_p =  32;
921 #endif
922 #endif
923
924 #ifdef CORE_PRESCOTT
925
926 #ifdef DEBUG
927   fprintf(stderr, "Prescott\n");
928 #endif
929
930   TABLE_NAME.sgemm_p =  56 * (l2 >> 7);
931   TABLE_NAME.dgemm_p =  28 * (l2 >> 7);
932   TABLE_NAME.cgemm_p =  28 * (l2 >> 7);
933   TABLE_NAME.zgemm_p =  14 * (l2 >> 7);
934 #ifdef EXPRECISION
935   TABLE_NAME.qgemm_p =  14 * (l2 >> 7);
936   TABLE_NAME.xgemm_p =   7 * (l2 >> 7);
937 #endif
938 #endif
939
940 #ifdef CORE2
941
942 #ifdef DEBUG
943   fprintf(stderr, "Core2\n");
944 #endif
945
946   TABLE_NAME.sgemm_p =  92 * (l2 >> 9) + 8;
947   TABLE_NAME.dgemm_p =  46 * (l2 >> 9) + 8;
948   TABLE_NAME.cgemm_p =  46 * (l2 >> 9) + 4;
949   TABLE_NAME.zgemm_p =  23 * (l2 >> 9) + 4;
950 #ifdef EXPRECISION
951   TABLE_NAME.qgemm_p =  92 * (l2 >> 9) + 8;
952   TABLE_NAME.xgemm_p =  46 * (l2 >> 9) + 4;
953 #endif
954 #endif
955
956 #ifdef PENRYN
957
958 #ifdef DEBUG
959   fprintf(stderr, "Penryn\n");
960 #endif
961
962   TABLE_NAME.sgemm_p =  42 * (l2 >> 9) + 8;
963   TABLE_NAME.dgemm_p =  42 * (l2 >> 9) + 8;
964   TABLE_NAME.cgemm_p =  21 * (l2 >> 9) + 4;
965   TABLE_NAME.zgemm_p =  21 * (l2 >> 9) + 4;
966 #ifdef EXPRECISION
967   TABLE_NAME.qgemm_p =  42 * (l2 >> 9) + 8;
968   TABLE_NAME.xgemm_p =  21 * (l2 >> 9) + 4;
969 #endif
970 #endif
971
972 #ifdef DUNNINGTON
973
974 #ifdef DEBUG
975   fprintf(stderr, "Dunnington\n");
976 #endif
977
978   TABLE_NAME.sgemm_p =  42 * (l2 >> 9) + 8;
979   TABLE_NAME.dgemm_p =  42 * (l2 >> 9) + 8;
980   TABLE_NAME.cgemm_p =  21 * (l2 >> 9) + 4;
981   TABLE_NAME.zgemm_p =  21 * (l2 >> 9) + 4;
982 #ifdef EXPRECISION
983   TABLE_NAME.qgemm_p =  42 * (l2 >> 9) + 8;
984   TABLE_NAME.xgemm_p =  21 * (l2 >> 9) + 4;
985 #endif
986 #endif
987
988
989 #ifdef NEHALEM
990
991 #ifdef DEBUG
992   fprintf(stderr, "Nehalem\n");
993 #endif
994
995   TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
996   TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
997   TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
998   TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
999 #ifdef EXPRECISION
1000   TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
1001   TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
1002 #endif
1003 #endif
1004
1005 #ifdef SANDYBRIDGE
1006
1007 #ifdef DEBUG
1008   fprintf(stderr, "Sandybridge\n");
1009 #endif
1010
1011   TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
1012   TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
1013   TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
1014   TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
1015 #ifdef EXPRECISION
1016   TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
1017   TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
1018 #endif
1019 #endif
1020
1021 #ifdef HASWELL
1022
1023 #ifdef DEBUG
1024   fprintf(stderr, "Haswell\n");
1025 #endif
1026
1027   TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
1028   TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
1029   TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
1030   TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
1031 #ifdef EXPRECISION
1032   TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
1033   TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
1034 #endif
1035 #endif
1036
1037 #ifdef SKYLAKEX
1038
1039 #ifdef DEBUG
1040   fprintf(stderr, "SkylakeX\n");
1041 #endif
1042
1043   TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
1044   TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
1045   TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
1046   TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
1047 #ifdef EXPRECISION
1048   TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
1049   TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
1050 #endif
1051 #endif
1052
1053
1054 #ifdef OPTERON
1055
1056 #ifdef DEBUG
1057   fprintf(stderr, "Opteron\n");
1058 #endif
1059
1060   TABLE_NAME.sgemm_p = 224 +  56 * (l2 >> 7);
1061   TABLE_NAME.dgemm_p = 112 +  28 * (l2 >> 7);
1062   TABLE_NAME.cgemm_p = 112 +  28 * (l2 >> 7);
1063   TABLE_NAME.zgemm_p =  56 +  14 * (l2 >> 7);
1064 #ifdef EXPRECISION
1065   TABLE_NAME.qgemm_p =  56 +  14 * (l2 >> 7);
1066   TABLE_NAME.xgemm_p =  28 +   7 * (l2 >> 7);
1067 #endif
1068 #endif
1069
1070 #ifdef BARCELONA
1071
1072 #ifdef DEBUG
1073   fprintf(stderr, "Barcelona\n");
1074 #endif
1075
1076   TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
1077   TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
1078   TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
1079   TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
1080 #ifdef EXPRECISION
1081   TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
1082   TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
1083 #endif
1084 #endif
1085
1086 #ifdef BOBCAT
1087
1088 #ifdef DEBUG
1089   fprintf(stderr, "Bobcate\n");
1090 #endif
1091
1092   TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
1093   TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
1094   TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
1095   TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
1096 #ifdef EXPRECISION
1097   TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
1098   TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
1099 #endif
1100 #endif
1101
1102 #ifdef BULLDOZER
1103
1104 #ifdef DEBUG
1105   fprintf(stderr, "Bulldozer\n");
1106 #endif
1107
1108   TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
1109   TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
1110   TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
1111   TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
1112 #ifdef EXPRECISION
1113   TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
1114   TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
1115 #endif
1116 #endif
1117
1118 #ifdef EXCAVATOR
1119
1120 #ifdef DEBUG
1121   fprintf(stderr, "Excavator\n");
1122 #endif
1123
1124   TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
1125   TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
1126   TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
1127   TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
1128 #ifdef EXPRECISION
1129   TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
1130   TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
1131 #endif
1132 #endif
1133
1134
1135 #ifdef PILEDRIVER
1136
1137 #ifdef DEBUG
1138   fprintf(stderr, "Piledriver\n");
1139 #endif
1140
1141   TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
1142   TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
1143   TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
1144   TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
1145 #ifdef EXPRECISION
1146   TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
1147   TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
1148 #endif
1149 #endif
1150
1151 #ifdef STEAMROLLER
1152
1153 #ifdef DEBUG
1154   fprintf(stderr, "Steamroller\n");
1155 #endif
1156
1157   TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
1158   TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
1159   TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
1160   TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
1161 #ifdef EXPRECISION
1162   TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
1163   TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
1164 #endif
1165 #endif
1166
1167 #ifdef ZEN
1168
1169 #ifdef DEBUG
1170   fprintf(stderr, "Zen\n");
1171 #endif
1172
1173   TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
1174   TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
1175   TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
1176   TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
1177 #ifdef EXPRECISION
1178   TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
1179   TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
1180 #endif
1181 #endif
1182
1183
1184 #ifdef NANO
1185
1186 #ifdef DEBUG
1187   fprintf(stderr, "NANO\n");
1188 #endif
1189
1190   TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
1191   TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
1192   TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
1193   TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
1194
1195
1196
1197 #ifdef EXPRECISION
1198   TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
1199   TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
1200 #endif
1201
1202 #endif
1203
1204
1205 #ifdef CGEMM3M_DEFAULT_P
1206   TABLE_NAME.cgemm3m_p = CGEMM3M_DEFAULT_P;
1207 #else
1208   TABLE_NAME.cgemm3m_p = TABLE_NAME.sgemm_p;
1209 #endif
1210
1211 #ifdef ZGEMM3M_DEFAULT_P
1212   TABLE_NAME.zgemm3m_p = ZGEMM3M_DEFAULT_P;
1213 #else
1214   TABLE_NAME.zgemm3m_p = TABLE_NAME.dgemm_p;
1215 #endif
1216
1217 #ifdef EXPRECISION
1218   TABLE_NAME.xgemm3m_p = TABLE_NAME.qgemm_p;
1219 #endif
1220
1221
1222
1223   TABLE_NAME.sgemm_p = ((TABLE_NAME.sgemm_p + SGEMM_DEFAULT_UNROLL_M - 1)/SGEMM_DEFAULT_UNROLL_M) * SGEMM_DEFAULT_UNROLL_M;
1224   TABLE_NAME.dgemm_p = ((TABLE_NAME.dgemm_p + DGEMM_DEFAULT_UNROLL_M - 1)/DGEMM_DEFAULT_UNROLL_M) * DGEMM_DEFAULT_UNROLL_M;
1225   TABLE_NAME.cgemm_p = ((TABLE_NAME.cgemm_p + CGEMM_DEFAULT_UNROLL_M - 1)/CGEMM_DEFAULT_UNROLL_M) * CGEMM_DEFAULT_UNROLL_M;
1226   TABLE_NAME.zgemm_p = ((TABLE_NAME.zgemm_p + ZGEMM_DEFAULT_UNROLL_M - 1)/ZGEMM_DEFAULT_UNROLL_M) * ZGEMM_DEFAULT_UNROLL_M;
1227
1228 #ifdef CGEMM3M_DEFAULT_UNROLL_M
1229   TABLE_NAME.cgemm3m_p = ((TABLE_NAME.cgemm3m_p + CGEMM3M_DEFAULT_UNROLL_M - 1)/CGEMM3M_DEFAULT_UNROLL_M) * CGEMM3M_DEFAULT_UNROLL_M;
1230 #else
1231   TABLE_NAME.cgemm3m_p = ((TABLE_NAME.cgemm3m_p + SGEMM_DEFAULT_UNROLL_M - 1)/SGEMM_DEFAULT_UNROLL_M) * SGEMM_DEFAULT_UNROLL_M;
1232 #endif
1233
1234 #ifdef ZGEMM3M_DEFAULT_UNROLL_M
1235   TABLE_NAME.zgemm3m_p = ((TABLE_NAME.zgemm3m_p + ZGEMM3M_DEFAULT_UNROLL_M - 1)/ZGEMM3M_DEFAULT_UNROLL_M) * ZGEMM3M_DEFAULT_UNROLL_M;
1236 #else
1237   TABLE_NAME.zgemm3m_p = ((TABLE_NAME.zgemm3m_p + DGEMM_DEFAULT_UNROLL_M - 1)/DGEMM_DEFAULT_UNROLL_M) * DGEMM_DEFAULT_UNROLL_M;
1238 #endif
1239
1240 #ifdef QUAD_PRECISION
1241   TABLE_NAME.qgemm_p = ((TABLE_NAME.qgemm_p + QGEMM_DEFAULT_UNROLL_M - 1)/QGEMM_DEFAULT_UNROLL_M) * QGEMM_DEFAULT_UNROLL_M;
1242   TABLE_NAME.xgemm_p = ((TABLE_NAME.xgemm_p + XGEMM_DEFAULT_UNROLL_M - 1)/XGEMM_DEFAULT_UNROLL_M) * XGEMM_DEFAULT_UNROLL_M;
1243   TABLE_NAME.xgemm3m_p = ((TABLE_NAME.xgemm3m_p + QGEMM_DEFAULT_UNROLL_M - 1)/QGEMM_DEFAULT_UNROLL_M) * QGEMM_DEFAULT_UNROLL_M;
1244 #endif
1245
1246 #ifdef DEBUG
1247   fprintf(stderr, "L2 = %8d DGEMM_P  .. %d\n", l2, TABLE_NAME.dgemm_p);
1248 #endif
1249
1250   TABLE_NAME.sgemm_r = (((BUFFER_SIZE -
1251                                ((TABLE_NAME.sgemm_p * TABLE_NAME.sgemm_q *  4 + TABLE_NAME.offsetA
1252                                  + TABLE_NAME.align) & ~TABLE_NAME.align)
1253                                ) / (TABLE_NAME.sgemm_q *  4) - 15) & ~15);
1254
1255   TABLE_NAME.dgemm_r = (((BUFFER_SIZE -
1256                                ((TABLE_NAME.dgemm_p * TABLE_NAME.dgemm_q *  8 + TABLE_NAME.offsetA
1257                                  + TABLE_NAME.align) & ~TABLE_NAME.align)
1258                                ) / (TABLE_NAME.dgemm_q *  8) - 15) & ~15);
1259
1260 #ifdef EXPRECISION
1261   TABLE_NAME.qgemm_r = (((BUFFER_SIZE -
1262                                ((TABLE_NAME.qgemm_p * TABLE_NAME.qgemm_q * 16 + TABLE_NAME.offsetA
1263                                  + TABLE_NAME.align) & ~TABLE_NAME.align)
1264                                ) / (TABLE_NAME.qgemm_q * 16) - 15) & ~15);
1265 #endif
1266
1267   TABLE_NAME.cgemm_r = (((BUFFER_SIZE -
1268                                ((TABLE_NAME.cgemm_p * TABLE_NAME.cgemm_q *  8 + TABLE_NAME.offsetA
1269                                  + TABLE_NAME.align) & ~TABLE_NAME.align)
1270                                ) / (TABLE_NAME.cgemm_q *  8) - 15) & ~15);
1271
1272   TABLE_NAME.zgemm_r = (((BUFFER_SIZE -
1273                                ((TABLE_NAME.zgemm_p * TABLE_NAME.zgemm_q * 16 + TABLE_NAME.offsetA
1274                                  + TABLE_NAME.align) & ~TABLE_NAME.align)
1275                                ) / (TABLE_NAME.zgemm_q * 16) - 15) & ~15);
1276
1277   TABLE_NAME.cgemm3m_r = (((BUFFER_SIZE -
1278                                ((TABLE_NAME.cgemm3m_p * TABLE_NAME.cgemm3m_q *  8 + TABLE_NAME.offsetA
1279                                  + TABLE_NAME.align) & ~TABLE_NAME.align)
1280                                ) / (TABLE_NAME.cgemm3m_q *  8) - 15) & ~15);
1281
1282   TABLE_NAME.zgemm3m_r = (((BUFFER_SIZE -
1283                                ((TABLE_NAME.zgemm3m_p * TABLE_NAME.zgemm3m_q * 16 + TABLE_NAME.offsetA
1284                                  + TABLE_NAME.align) & ~TABLE_NAME.align)
1285                                ) / (TABLE_NAME.zgemm3m_q * 16) - 15) & ~15);
1286
1287
1288
1289
1290 #ifdef EXPRECISION
1291   TABLE_NAME.xgemm_r = (((BUFFER_SIZE -
1292                                ((TABLE_NAME.xgemm_p * TABLE_NAME.xgemm_q * 32 + TABLE_NAME.offsetA
1293                                  + TABLE_NAME.align) & ~TABLE_NAME.align)
1294                        ) / (TABLE_NAME.xgemm_q * 32) - 15) & ~15);
1295
1296   TABLE_NAME.xgemm3m_r = (((BUFFER_SIZE -
1297                                ((TABLE_NAME.xgemm3m_p * TABLE_NAME.xgemm3m_q * 32 + TABLE_NAME.offsetA
1298                                  + TABLE_NAME.align) & ~TABLE_NAME.align)
1299                        ) / (TABLE_NAME.xgemm3m_q * 32) - 15) & ~15);
1300
1301 #endif
1302
1303
1304
1305 }
1306 #endif //defined(ARCH_ARM64)