83f2b047f218238fe4265712ab944d773a8bda1f
[platform/upstream/openblas.git] / kernel / setparam-ref.c
1 /*********************************************************************/
2 /* Copyright 2009, 2010 The University of Texas at Austin.           */
3 /* All rights reserved.                                              */
4 /*                                                                   */
5 /* Redistribution and use in source and binary forms, with or        */
6 /* without modification, are permitted provided that the following   */
7 /* conditions are met:                                               */
8 /*                                                                   */
9 /*   1. Redistributions of source code must retain the above         */
10 /*      copyright notice, this list of conditions and the following  */
11 /*      disclaimer.                                                  */
12 /*                                                                   */
13 /*   2. Redistributions in binary form must reproduce the above      */
14 /*      copyright notice, this list of conditions and the following  */
15 /*      disclaimer in the documentation and/or other materials       */
16 /*      provided with the distribution.                              */
17 /*                                                                   */
18 /*    THIS  SOFTWARE IS PROVIDED  BY THE  UNIVERSITY OF  TEXAS AT    */
19 /*    AUSTIN  ``AS IS''  AND ANY  EXPRESS OR  IMPLIED WARRANTIES,    */
20 /*    INCLUDING, BUT  NOT LIMITED  TO, THE IMPLIED  WARRANTIES OF    */
21 /*    MERCHANTABILITY  AND FITNESS FOR  A PARTICULAR  PURPOSE ARE    */
22 /*    DISCLAIMED.  IN  NO EVENT SHALL THE UNIVERSITY  OF TEXAS AT    */
23 /*    AUSTIN OR CONTRIBUTORS BE  LIABLE FOR ANY DIRECT, INDIRECT,    */
24 /*    INCIDENTAL,  SPECIAL, EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES    */
25 /*    (INCLUDING, BUT  NOT LIMITED TO,  PROCUREMENT OF SUBSTITUTE    */
26 /*    GOODS  OR  SERVICES; LOSS  OF  USE,  DATA,  OR PROFITS;  OR    */
27 /*    BUSINESS INTERRUPTION) HOWEVER CAUSED  AND ON ANY THEORY OF    */
28 /*    LIABILITY, WHETHER  IN CONTRACT, STRICT  LIABILITY, OR TORT    */
29 /*    (INCLUDING NEGLIGENCE OR OTHERWISE)  ARISING IN ANY WAY OUT    */
30 /*    OF  THE  USE OF  THIS  SOFTWARE,  EVEN  IF ADVISED  OF  THE    */
31 /*    POSSIBILITY OF SUCH DAMAGE.                                    */
32 /*                                                                   */
33 /* The views and conclusions contained in the software and           */
34 /* documentation are those of the authors and should not be          */
35 /* interpreted as representing official policies, either expressed   */
36 /* or implied, of The University of Texas at Austin.                 */
37 /*********************************************************************/
38
39 #include <stdio.h>
40 #include <string.h>
41 #include "common.h"
42
43 #ifdef BUILD_KERNEL
44 #include "kernelTS.h"
45 #endif
46
47 #undef DEBUG
48
49 static void init_parameter(void);
50
51 gotoblas_t TABLE_NAME = {
52   DTB_DEFAULT_ENTRIES ,
53
54   GEMM_DEFAULT_OFFSET_A, GEMM_DEFAULT_OFFSET_B, GEMM_DEFAULT_ALIGN,
55
56   0, 0, 0,
57   SGEMM_DEFAULT_UNROLL_M, SGEMM_DEFAULT_UNROLL_N, MAX(SGEMM_DEFAULT_UNROLL_M, SGEMM_DEFAULT_UNROLL_N),
58 #ifdef HAVE_EXCLUSIVE_CACHE
59   1,
60 #else
61   0,
62 #endif
63
64   samax_kTS,  samin_kTS,  smax_kTS,  smin_kTS,
65   isamax_kTS, isamin_kTS, ismax_kTS, ismin_kTS,
66   snrm2_kTS,  sasum_kTS,  scopy_kTS, sdot_kTS,
67   dsdot_kTS,
68   srot_kTS,   saxpy_kTS,  sscal_kTS, sswap_kTS,
69   sgemv_nTS,  sgemv_tTS, sger_kTS,
70   ssymv_LTS, ssymv_UTS,
71
72   sgemm_kernelTS, sgemm_betaTS, 
73 #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
74   sgemm_incopyTS, sgemm_itcopyTS, 
75 #else
76   sgemm_oncopyTS, sgemm_otcopyTS,
77 #endif
78   sgemm_oncopyTS, sgemm_otcopyTS,
79   strsm_kernel_LNTS, strsm_kernel_LTTS, strsm_kernel_RNTS, strsm_kernel_RTTS,
80 #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
81   strsm_iunucopyTS, strsm_iunncopyTS, strsm_iutucopyTS, strsm_iutncopyTS,
82   strsm_ilnucopyTS, strsm_ilnncopyTS, strsm_iltucopyTS, strsm_iltncopyTS,
83 #else
84   strsm_ounucopyTS, strsm_ounncopyTS, strsm_outucopyTS, strsm_outncopyTS,
85   strsm_olnucopyTS, strsm_olnncopyTS, strsm_oltucopyTS, strsm_oltncopyTS,
86 #endif
87   strsm_ounucopyTS, strsm_ounncopyTS, strsm_outucopyTS, strsm_outncopyTS,
88   strsm_olnucopyTS, strsm_olnncopyTS, strsm_oltucopyTS, strsm_oltncopyTS,
89   strmm_kernel_RNTS, strmm_kernel_RTTS, strmm_kernel_LNTS, strmm_kernel_LTTS,
90 #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
91   strmm_iunucopyTS, strmm_iunncopyTS, strmm_iutucopyTS, strmm_iutncopyTS,
92   strmm_ilnucopyTS, strmm_ilnncopyTS, strmm_iltucopyTS, strmm_iltncopyTS,
93 #else
94   strmm_ounucopyTS, strmm_ounncopyTS, strmm_outucopyTS, strmm_outncopyTS,
95   strmm_olnucopyTS, strmm_olnncopyTS, strmm_oltucopyTS, strmm_oltncopyTS,
96 #endif
97   strmm_ounucopyTS, strmm_ounncopyTS, strmm_outucopyTS, strmm_outncopyTS,
98   strmm_olnucopyTS, strmm_olnncopyTS, strmm_oltucopyTS, strmm_oltncopyTS,
99 #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
100   ssymm_iutcopyTS, ssymm_iltcopyTS, 
101 #else
102   ssymm_outcopyTS, ssymm_oltcopyTS,
103 #endif
104   ssymm_outcopyTS, ssymm_oltcopyTS,
105
106 #ifndef NO_LAPACK
107   sneg_tcopyTS, slaswp_ncopyTS,
108 #else
109   NULL,NULL,
110 #endif
111
112   0, 0, 0,
113   DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N, MAX(DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N),
114
115   damax_kTS,  damin_kTS,  dmax_kTS,  dmin_kTS,
116   idamax_kTS, idamin_kTS, idmax_kTS, idmin_kTS,
117   dnrm2_kTS,  dasum_kTS,  dcopy_kTS, ddot_kTS,
118   drot_kTS,   daxpy_kTS,  dscal_kTS, dswap_kTS,
119   dgemv_nTS,  dgemv_tTS,  dger_kTS,
120   dsymv_LTS,  dsymv_UTS,
121
122   dgemm_kernelTS, dgemm_betaTS, 
123 #if DGEMM_DEFAULT_UNROLL_M != DGEMM_DEFAULT_UNROLL_N
124   dgemm_incopyTS, dgemm_itcopyTS, 
125 #else
126   dgemm_oncopyTS, dgemm_otcopyTS,
127 #endif
128   dgemm_oncopyTS, dgemm_otcopyTS,
129   dtrsm_kernel_LNTS, dtrsm_kernel_LTTS, dtrsm_kernel_RNTS, dtrsm_kernel_RTTS,
130 #if DGEMM_DEFAULT_UNROLL_M != DGEMM_DEFAULT_UNROLL_N
131   dtrsm_iunucopyTS, dtrsm_iunncopyTS, dtrsm_iutucopyTS, dtrsm_iutncopyTS,
132   dtrsm_ilnucopyTS, dtrsm_ilnncopyTS, dtrsm_iltucopyTS, dtrsm_iltncopyTS,
133 #else
134   dtrsm_ounucopyTS, dtrsm_ounncopyTS, dtrsm_outucopyTS, dtrsm_outncopyTS,
135   dtrsm_olnucopyTS, dtrsm_olnncopyTS, dtrsm_oltucopyTS, dtrsm_oltncopyTS,
136 #endif
137   dtrsm_ounucopyTS, dtrsm_ounncopyTS, dtrsm_outucopyTS, dtrsm_outncopyTS,
138   dtrsm_olnucopyTS, dtrsm_olnncopyTS, dtrsm_oltucopyTS, dtrsm_oltncopyTS,
139   dtrmm_kernel_RNTS, dtrmm_kernel_RTTS, dtrmm_kernel_LNTS, dtrmm_kernel_LTTS,
140 #if DGEMM_DEFAULT_UNROLL_M != DGEMM_DEFAULT_UNROLL_N
141   dtrmm_iunucopyTS, dtrmm_iunncopyTS, dtrmm_iutucopyTS, dtrmm_iutncopyTS,
142   dtrmm_ilnucopyTS, dtrmm_ilnncopyTS, dtrmm_iltucopyTS, dtrmm_iltncopyTS,
143 #else
144   dtrmm_ounucopyTS, dtrmm_ounncopyTS, dtrmm_outucopyTS, dtrmm_outncopyTS,
145   dtrmm_olnucopyTS, dtrmm_olnncopyTS, dtrmm_oltucopyTS, dtrmm_oltncopyTS,
146 #endif
147   dtrmm_ounucopyTS, dtrmm_ounncopyTS, dtrmm_outucopyTS, dtrmm_outncopyTS,
148   dtrmm_olnucopyTS, dtrmm_olnncopyTS, dtrmm_oltucopyTS, dtrmm_oltncopyTS,
149 #if DGEMM_DEFAULT_UNROLL_M != DGEMM_DEFAULT_UNROLL_N
150   dsymm_iutcopyTS, dsymm_iltcopyTS, 
151 #else
152   dsymm_outcopyTS, dsymm_oltcopyTS,
153 #endif
154   dsymm_outcopyTS, dsymm_oltcopyTS,
155
156 #ifndef NO_LAPACK
157   dneg_tcopyTS, dlaswp_ncopyTS,
158 #else
159   NULL, NULL,
160 #endif
161
162 #ifdef EXPRECISION
163
164   0, 0, 0,
165   QGEMM_DEFAULT_UNROLL_M, QGEMM_DEFAULT_UNROLL_N, MAX(QGEMM_DEFAULT_UNROLL_M, QGEMM_DEFAULT_UNROLL_N),
166
167   qamax_kTS,  qamin_kTS,  qmax_kTS,  qmin_kTS,
168   iqamax_kTS, iqamin_kTS, iqmax_kTS, iqmin_kTS,
169   qnrm2_kTS,  qasum_kTS,  qcopy_kTS, qdot_kTS,
170   qrot_kTS,   qaxpy_kTS,  qscal_kTS, qswap_kTS,
171   qgemv_nTS,  qgemv_tTS,  qger_kTS,
172   qsymv_LTS,  qsymv_UTS,
173
174   qgemm_kernelTS, qgemm_betaTS, 
175 #if QGEMM_DEFAULT_UNROLL_M != QGEMM_DEFAULT_UNROLL_N
176   qgemm_incopyTS, qgemm_itcopyTS, 
177 #else
178   qgemm_oncopyTS, qgemm_otcopyTS,
179 #endif
180   qgemm_oncopyTS, qgemm_otcopyTS,
181   qtrsm_kernel_LNTS, qtrsm_kernel_LTTS, qtrsm_kernel_RNTS, qtrsm_kernel_RTTS,
182 #if QGEMM_DEFAULT_UNROLL_M != QGEMM_DEFAULT_UNROLL_N
183   qtrsm_iunucopyTS, qtrsm_iunncopyTS, qtrsm_iutucopyTS, qtrsm_iutncopyTS,
184   qtrsm_ilnucopyTS, qtrsm_ilnncopyTS, qtrsm_iltucopyTS, qtrsm_iltncopyTS,
185 #else
186   qtrsm_ounucopyTS, qtrsm_ounncopyTS, qtrsm_outucopyTS, qtrsm_outncopyTS,
187   qtrsm_olnucopyTS, qtrsm_olnncopyTS, qtrsm_oltucopyTS, qtrsm_oltncopyTS,
188 #endif
189   qtrsm_ounucopyTS, qtrsm_ounncopyTS, qtrsm_outucopyTS, qtrsm_outncopyTS,
190   qtrsm_olnucopyTS, qtrsm_olnncopyTS, qtrsm_oltucopyTS, qtrsm_oltncopyTS,
191   qtrmm_kernel_RNTS, qtrmm_kernel_RTTS, qtrmm_kernel_LNTS, qtrmm_kernel_LTTS,
192 #if QGEMM_DEFAULT_UNROLL_M != QGEMM_DEFAULT_UNROLL_N
193   qtrmm_iunucopyTS, qtrmm_iunncopyTS, qtrmm_iutucopyTS, qtrmm_iutncopyTS,
194   qtrmm_ilnucopyTS, qtrmm_ilnncopyTS, qtrmm_iltucopyTS, qtrmm_iltncopyTS,
195 #else
196   qtrmm_ounucopyTS, qtrmm_ounncopyTS, qtrmm_outucopyTS, qtrmm_outncopyTS,
197   qtrmm_olnucopyTS, qtrmm_olnncopyTS, qtrmm_oltucopyTS, qtrmm_oltncopyTS,
198 #endif
199   qtrmm_ounucopyTS, qtrmm_ounncopyTS, qtrmm_outucopyTS, qtrmm_outncopyTS,
200   qtrmm_olnucopyTS, qtrmm_olnncopyTS, qtrmm_oltucopyTS, qtrmm_oltncopyTS,
201 #if QGEMM_DEFAULT_UNROLL_M != QGEMM_DEFAULT_UNROLL_N
202   qsymm_iutcopyTS, qsymm_iltcopyTS, 
203 #else
204   qsymm_outcopyTS, qsymm_oltcopyTS,
205 #endif
206   qsymm_outcopyTS, qsymm_oltcopyTS,
207
208 #ifndef NO_LAPACK
209   qneg_tcopyTS, qlaswp_ncopyTS,
210 #else
211   NULL, NULL,
212 #endif
213
214 #endif
215
216   0, 0, 0,
217   CGEMM_DEFAULT_UNROLL_M, CGEMM_DEFAULT_UNROLL_N, MAX(CGEMM_DEFAULT_UNROLL_M, CGEMM_DEFAULT_UNROLL_N),
218
219   camax_kTS, camin_kTS, icamax_kTS, icamin_kTS,
220   cnrm2_kTS, casum_kTS, ccopy_kTS,
221   cdotu_kTS, cdotc_kTS, csrot_kTS,
222   caxpy_kTS, caxpyc_kTS, cscal_kTS, cswap_kTS, 
223
224   cgemv_nTS, cgemv_tTS, cgemv_rTS, cgemv_cTS, 
225   cgemv_oTS, cgemv_uTS, cgemv_sTS, cgemv_dTS, 
226   cgeru_kTS, cgerc_kTS, cgerv_kTS, cgerd_kTS, 
227   csymv_LTS, csymv_UTS,
228   chemv_LTS, chemv_UTS, chemv_MTS, chemv_VTS,
229   
230   cgemm_kernel_nTS, cgemm_kernel_lTS, cgemm_kernel_rTS, cgemm_kernel_bTS,
231   cgemm_betaTS,
232
233 #if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N
234   cgemm_incopyTS, cgemm_itcopyTS,
235 #else
236   cgemm_oncopyTS, cgemm_otcopyTS,
237 #endif
238   cgemm_oncopyTS, cgemm_otcopyTS,
239   
240   ctrsm_kernel_LNTS, ctrsm_kernel_LTTS, ctrsm_kernel_LRTS, ctrsm_kernel_LCTS,
241   ctrsm_kernel_RNTS, ctrsm_kernel_RTTS, ctrsm_kernel_RRTS, ctrsm_kernel_RCTS,
242   
243 #if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N
244   ctrsm_iunucopyTS,  ctrsm_iunncopyTS,  ctrsm_iutucopyTS,  ctrsm_iutncopyTS,
245   ctrsm_ilnucopyTS,  ctrsm_ilnncopyTS,  ctrsm_iltucopyTS,  ctrsm_iltncopyTS,
246 #else
247   ctrsm_ounucopyTS,  ctrsm_ounncopyTS,  ctrsm_outucopyTS,  ctrsm_outncopyTS,
248   ctrsm_olnucopyTS,  ctrsm_olnncopyTS,  ctrsm_oltucopyTS,  ctrsm_oltncopyTS,
249 #endif
250   ctrsm_ounucopyTS,  ctrsm_ounncopyTS,  ctrsm_outucopyTS,  ctrsm_outncopyTS,
251   ctrsm_olnucopyTS,  ctrsm_olnncopyTS,  ctrsm_oltucopyTS,  ctrsm_oltncopyTS,
252   
253   ctrmm_kernel_RNTS,  ctrmm_kernel_RTTS,  ctrmm_kernel_RRTS,  ctrmm_kernel_RCTS,
254   ctrmm_kernel_LNTS,  ctrmm_kernel_LTTS,  ctrmm_kernel_LRTS,  ctrmm_kernel_LCTS,
255   
256 #if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N
257   ctrmm_iunucopyTS,  ctrmm_iunncopyTS,  ctrmm_iutucopyTS,  ctrmm_iutncopyTS,
258   ctrmm_ilnucopyTS,  ctrmm_ilnncopyTS,  ctrmm_iltucopyTS,  ctrmm_iltncopyTS,
259 #else
260   ctrmm_ounucopyTS,  ctrmm_ounncopyTS,  ctrmm_outucopyTS,  ctrmm_outncopyTS,
261   ctrmm_olnucopyTS,  ctrmm_olnncopyTS,  ctrmm_oltucopyTS,  ctrmm_oltncopyTS,
262 #endif
263   ctrmm_ounucopyTS,  ctrmm_ounncopyTS,  ctrmm_outucopyTS,  ctrmm_outncopyTS,
264   ctrmm_olnucopyTS,  ctrmm_olnncopyTS,  ctrmm_oltucopyTS,  ctrmm_oltncopyTS,
265   
266 #if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N
267   csymm_iutcopyTS,  csymm_iltcopyTS,
268 #else
269   csymm_outcopyTS,  csymm_oltcopyTS,
270 #endif
271   csymm_outcopyTS,  csymm_oltcopyTS,
272 #if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N
273   chemm_iutcopyTS,  chemm_iltcopyTS,
274 #else
275   chemm_outcopyTS,  chemm_oltcopyTS,
276 #endif
277   chemm_outcopyTS,  chemm_oltcopyTS,
278   
279   cgemm3m_kernelTS,
280   
281   cgemm3m_incopybTS,  cgemm3m_incopyrTS,
282   cgemm3m_incopyiTS,  cgemm3m_itcopybTS,
283   cgemm3m_itcopyrTS,  cgemm3m_itcopyiTS,
284   cgemm3m_oncopybTS,  cgemm3m_oncopyrTS,
285   cgemm3m_oncopyiTS,  cgemm3m_otcopybTS,
286   cgemm3m_otcopyrTS,  cgemm3m_otcopyiTS,
287   
288   csymm3m_iucopybTS,  csymm3m_ilcopybTS,
289   csymm3m_iucopyrTS,  csymm3m_ilcopyrTS,
290   csymm3m_iucopyiTS,  csymm3m_ilcopyiTS,
291   csymm3m_oucopybTS,  csymm3m_olcopybTS,
292   csymm3m_oucopyrTS,  csymm3m_olcopyrTS,
293   csymm3m_oucopyiTS,  csymm3m_olcopyiTS,
294
295   chemm3m_iucopybTS,  chemm3m_ilcopybTS,
296   chemm3m_iucopyrTS,  chemm3m_ilcopyrTS,
297   chemm3m_iucopyiTS,  chemm3m_ilcopyiTS, 
298
299   chemm3m_oucopybTS,  chemm3m_olcopybTS,
300   chemm3m_oucopyrTS,  chemm3m_olcopyrTS,
301   chemm3m_oucopyiTS,  chemm3m_olcopyiTS,
302
303 #ifndef NO_LAPACK
304   cneg_tcopyTS, claswp_ncopyTS,
305 #else
306   NULL, NULL,
307 #endif
308
309   0, 0, 0,
310   ZGEMM_DEFAULT_UNROLL_M, ZGEMM_DEFAULT_UNROLL_N, MAX(ZGEMM_DEFAULT_UNROLL_M, ZGEMM_DEFAULT_UNROLL_N),
311
312   zamax_kTS, zamin_kTS, izamax_kTS, izamin_kTS,
313   znrm2_kTS, zasum_kTS, zcopy_kTS,
314   zdotu_kTS, zdotc_kTS, zdrot_kTS,
315   zaxpy_kTS, zaxpyc_kTS, zscal_kTS, zswap_kTS, 
316
317   zgemv_nTS, zgemv_tTS, zgemv_rTS, zgemv_cTS, 
318   zgemv_oTS, zgemv_uTS, zgemv_sTS, zgemv_dTS, 
319   zgeru_kTS, zgerc_kTS, zgerv_kTS, zgerd_kTS, 
320   zsymv_LTS, zsymv_UTS,
321   zhemv_LTS, zhemv_UTS, zhemv_MTS, zhemv_VTS,
322
323   zgemm_kernel_nTS, zgemm_kernel_lTS, zgemm_kernel_rTS, zgemm_kernel_bTS,
324   zgemm_betaTS,
325
326 #if ZGEMM_DEFAULT_UNROLL_M != ZGEMM_DEFAULT_UNROLL_N
327   zgemm_incopyTS, zgemm_itcopyTS,
328 #else
329   zgemm_oncopyTS, zgemm_otcopyTS,
330 #endif
331   zgemm_oncopyTS, zgemm_otcopyTS,
332   
333   ztrsm_kernel_LNTS, ztrsm_kernel_LTTS, ztrsm_kernel_LRTS, ztrsm_kernel_LCTS,
334   ztrsm_kernel_RNTS, ztrsm_kernel_RTTS, ztrsm_kernel_RRTS, ztrsm_kernel_RCTS,
335   
336 #if ZGEMM_DEFAULT_UNROLL_M != ZGEMM_DEFAULT_UNROLL_N
337   ztrsm_iunucopyTS,  ztrsm_iunncopyTS,  ztrsm_iutucopyTS,  ztrsm_iutncopyTS,
338   ztrsm_ilnucopyTS,  ztrsm_ilnncopyTS,  ztrsm_iltucopyTS,  ztrsm_iltncopyTS,
339 #else
340   ztrsm_ounucopyTS,  ztrsm_ounncopyTS,  ztrsm_outucopyTS,  ztrsm_outncopyTS,
341   ztrsm_olnucopyTS,  ztrsm_olnncopyTS,  ztrsm_oltucopyTS,  ztrsm_oltncopyTS,
342 #endif
343   ztrsm_ounucopyTS,  ztrsm_ounncopyTS,  ztrsm_outucopyTS,  ztrsm_outncopyTS,
344   ztrsm_olnucopyTS,  ztrsm_olnncopyTS,  ztrsm_oltucopyTS,  ztrsm_oltncopyTS,
345   
346   ztrmm_kernel_RNTS,  ztrmm_kernel_RTTS,  ztrmm_kernel_RRTS,  ztrmm_kernel_RCTS,
347   ztrmm_kernel_LNTS,  ztrmm_kernel_LTTS,  ztrmm_kernel_LRTS,  ztrmm_kernel_LCTS,
348   
349 #if ZGEMM_DEFAULT_UNROLL_M != ZGEMM_DEFAULT_UNROLL_N
350   ztrmm_iunucopyTS,  ztrmm_iunncopyTS,  ztrmm_iutucopyTS,  ztrmm_iutncopyTS,
351   ztrmm_ilnucopyTS,  ztrmm_ilnncopyTS,  ztrmm_iltucopyTS,  ztrmm_iltncopyTS,
352 #else
353   ztrmm_ounucopyTS,  ztrmm_ounncopyTS,  ztrmm_outucopyTS,  ztrmm_outncopyTS,
354   ztrmm_olnucopyTS,  ztrmm_olnncopyTS,  ztrmm_oltucopyTS,  ztrmm_oltncopyTS,
355 #endif
356   ztrmm_ounucopyTS,  ztrmm_ounncopyTS,  ztrmm_outucopyTS,  ztrmm_outncopyTS,
357   ztrmm_olnucopyTS,  ztrmm_olnncopyTS,  ztrmm_oltucopyTS,  ztrmm_oltncopyTS,
358   
359 #if ZGEMM_DEFAULT_UNROLL_M != ZGEMM_DEFAULT_UNROLL_N
360   zsymm_iutcopyTS,  zsymm_iltcopyTS,
361 #else
362   zsymm_outcopyTS,  zsymm_oltcopyTS,
363 #endif
364   zsymm_outcopyTS,  zsymm_oltcopyTS,
365 #if ZGEMM_DEFAULT_UNROLL_M != ZGEMM_DEFAULT_UNROLL_N
366   zhemm_iutcopyTS,  zhemm_iltcopyTS,
367 #else
368   zhemm_outcopyTS,  zhemm_oltcopyTS,
369 #endif
370   zhemm_outcopyTS,  zhemm_oltcopyTS,
371   
372   zgemm3m_kernelTS,
373   
374   zgemm3m_incopybTS,  zgemm3m_incopyrTS,
375   zgemm3m_incopyiTS,  zgemm3m_itcopybTS,
376   zgemm3m_itcopyrTS,  zgemm3m_itcopyiTS,
377   zgemm3m_oncopybTS,  zgemm3m_oncopyrTS,
378   zgemm3m_oncopyiTS,  zgemm3m_otcopybTS,
379   zgemm3m_otcopyrTS,  zgemm3m_otcopyiTS,
380   
381   zsymm3m_iucopybTS,  zsymm3m_ilcopybTS,
382   zsymm3m_iucopyrTS,  zsymm3m_ilcopyrTS,
383   zsymm3m_iucopyiTS,  zsymm3m_ilcopyiTS,
384   zsymm3m_oucopybTS,  zsymm3m_olcopybTS,
385   zsymm3m_oucopyrTS,  zsymm3m_olcopyrTS,
386   zsymm3m_oucopyiTS,  zsymm3m_olcopyiTS,
387
388   zhemm3m_iucopybTS,  zhemm3m_ilcopybTS,
389   zhemm3m_iucopyrTS,  zhemm3m_ilcopyrTS,
390   zhemm3m_iucopyiTS,  zhemm3m_ilcopyiTS, 
391
392   zhemm3m_oucopybTS,  zhemm3m_olcopybTS,
393   zhemm3m_oucopyrTS,  zhemm3m_olcopyrTS,
394   zhemm3m_oucopyiTS,  zhemm3m_olcopyiTS,
395
396 #ifndef NO_LAPACK
397   zneg_tcopyTS, zlaswp_ncopyTS,
398 #else
399   NULL, NULL,
400 #endif
401
402 #ifdef EXPRECISION
403
404   0, 0, 0,
405   XGEMM_DEFAULT_UNROLL_M, XGEMM_DEFAULT_UNROLL_N, MAX(XGEMM_DEFAULT_UNROLL_M, XGEMM_DEFAULT_UNROLL_N),
406
407   xamax_kTS, xamin_kTS, ixamax_kTS, ixamin_kTS,
408   xnrm2_kTS, xasum_kTS, xcopy_kTS,
409   xdotu_kTS, xdotc_kTS, xqrot_kTS,
410   xaxpy_kTS, xaxpyc_kTS, xscal_kTS, xswap_kTS, 
411
412   xgemv_nTS, xgemv_tTS, xgemv_rTS, xgemv_cTS, 
413   xgemv_oTS, xgemv_uTS, xgemv_sTS, xgemv_dTS, 
414   xgeru_kTS, xgerc_kTS, xgerv_kTS, xgerd_kTS, 
415   xsymv_LTS, xsymv_UTS,
416   xhemv_LTS, xhemv_UTS, xhemv_MTS, xhemv_VTS,
417
418   xgemm_kernel_nTS, xgemm_kernel_lTS, xgemm_kernel_rTS, xgemm_kernel_bTS,
419   xgemm_betaTS,
420
421 #if XGEMM_DEFAULT_UNROLL_M != XGEMM_DEFAULT_UNROLL_N
422   xgemm_incopyTS, xgemm_itcopyTS,
423 #else
424   xgemm_oncopyTS, xgemm_otcopyTS,
425 #endif
426   xgemm_oncopyTS, xgemm_otcopyTS,
427   
428   xtrsm_kernel_LNTS, xtrsm_kernel_LTTS, xtrsm_kernel_LRTS, xtrsm_kernel_LCTS,
429   xtrsm_kernel_RNTS, xtrsm_kernel_RTTS, xtrsm_kernel_RRTS, xtrsm_kernel_RCTS,
430   
431 #if XGEMM_DEFAULT_UNROLL_M != XGEMM_DEFAULT_UNROLL_N
432   xtrsm_iunucopyTS,  xtrsm_iunncopyTS,  xtrsm_iutucopyTS,  xtrsm_iutncopyTS,
433   xtrsm_ilnucopyTS,  xtrsm_ilnncopyTS,  xtrsm_iltucopyTS,  xtrsm_iltncopyTS,
434 #else
435   xtrsm_ounucopyTS,  xtrsm_ounncopyTS,  xtrsm_outucopyTS,  xtrsm_outncopyTS,
436   xtrsm_olnucopyTS,  xtrsm_olnncopyTS,  xtrsm_oltucopyTS,  xtrsm_oltncopyTS,
437 #endif
438   xtrsm_ounucopyTS,  xtrsm_ounncopyTS,  xtrsm_outucopyTS,  xtrsm_outncopyTS,
439   xtrsm_olnucopyTS,  xtrsm_olnncopyTS,  xtrsm_oltucopyTS,  xtrsm_oltncopyTS,
440   
441   xtrmm_kernel_RNTS,  xtrmm_kernel_RTTS,  xtrmm_kernel_RRTS,  xtrmm_kernel_RCTS,
442   xtrmm_kernel_LNTS,  xtrmm_kernel_LTTS,  xtrmm_kernel_LRTS,  xtrmm_kernel_LCTS,
443   
444 #if XGEMM_DEFAULT_UNROLL_M != XGEMM_DEFAULT_UNROLL_N
445   xtrmm_iunucopyTS,  xtrmm_iunncopyTS,  xtrmm_iutucopyTS,  xtrmm_iutncopyTS,
446   xtrmm_ilnucopyTS,  xtrmm_ilnncopyTS,  xtrmm_iltucopyTS,  xtrmm_iltncopyTS,
447 #else
448   xtrmm_ounucopyTS,  xtrmm_ounncopyTS,  xtrmm_outucopyTS,  xtrmm_outncopyTS,
449   xtrmm_olnucopyTS,  xtrmm_olnncopyTS,  xtrmm_oltucopyTS,  xtrmm_oltncopyTS,
450 #endif
451   xtrmm_ounucopyTS,  xtrmm_ounncopyTS,  xtrmm_outucopyTS,  xtrmm_outncopyTS,
452   xtrmm_olnucopyTS,  xtrmm_olnncopyTS,  xtrmm_oltucopyTS,  xtrmm_oltncopyTS,
453   
454 #if XGEMM_DEFAULT_UNROLL_M != XGEMM_DEFAULT_UNROLL_N
455   xsymm_iutcopyTS,  xsymm_iltcopyTS,
456 #else
457   xsymm_outcopyTS,  xsymm_oltcopyTS,
458 #endif
459   xsymm_outcopyTS,  xsymm_oltcopyTS,
460 #if XGEMM_DEFAULT_UNROLL_M != XGEMM_DEFAULT_UNROLL_N
461   xhemm_iutcopyTS,  xhemm_iltcopyTS,
462 #else
463   xhemm_outcopyTS,  xhemm_oltcopyTS,
464 #endif
465   xhemm_outcopyTS,  xhemm_oltcopyTS,
466   
467   xgemm3m_kernelTS,
468   
469   xgemm3m_incopybTS,  xgemm3m_incopyrTS,
470   xgemm3m_incopyiTS,  xgemm3m_itcopybTS,
471   xgemm3m_itcopyrTS,  xgemm3m_itcopyiTS,
472   xgemm3m_oncopybTS,  xgemm3m_oncopyrTS,
473   xgemm3m_oncopyiTS,  xgemm3m_otcopybTS,
474   xgemm3m_otcopyrTS,  xgemm3m_otcopyiTS,
475   
476   xsymm3m_iucopybTS,  xsymm3m_ilcopybTS,
477   xsymm3m_iucopyrTS,  xsymm3m_ilcopyrTS,
478   xsymm3m_iucopyiTS,  xsymm3m_ilcopyiTS,
479   xsymm3m_oucopybTS,  xsymm3m_olcopybTS,
480   xsymm3m_oucopyrTS,  xsymm3m_olcopyrTS,
481   xsymm3m_oucopyiTS,  xsymm3m_olcopyiTS,
482
483   xhemm3m_iucopybTS,  xhemm3m_ilcopybTS,
484   xhemm3m_iucopyrTS,  xhemm3m_ilcopyrTS,
485   xhemm3m_iucopyiTS,  xhemm3m_ilcopyiTS, 
486
487   xhemm3m_oucopybTS,  xhemm3m_olcopybTS,
488   xhemm3m_oucopyrTS,  xhemm3m_olcopyrTS,
489   xhemm3m_oucopyiTS,  xhemm3m_olcopyiTS,
490
491 #ifndef NO_LAPACK
492   xneg_tcopyTS, xlaswp_ncopyTS,
493 #else
494   NULL, NULL,
495 #endif
496
497 #endif
498
499   init_parameter,
500
501   SNUMOPT, DNUMOPT, QNUMOPT,
502
503 };
504
505 #ifdef ARCH_X86
506 static int get_l2_size_old(void){
507   int i, eax, ebx, ecx, edx, cpuid_level;
508   int info[15];
509
510   cpuid(2, &eax, &ebx, &ecx, &edx);
511   
512   info[ 0] = BITMASK(eax,  8, 0xff);
513   info[ 1] = BITMASK(eax, 16, 0xff);
514   info[ 2] = BITMASK(eax, 24, 0xff);
515   
516   info[ 3] = BITMASK(ebx,  0, 0xff);
517   info[ 4] = BITMASK(ebx,  8, 0xff);
518   info[ 5] = BITMASK(ebx, 16, 0xff);
519   info[ 6] = BITMASK(ebx, 24, 0xff);
520   
521   info[ 7] = BITMASK(ecx,  0, 0xff);
522   info[ 8] = BITMASK(ecx,  8, 0xff);
523   info[ 9] = BITMASK(ecx, 16, 0xff);
524   info[10] = BITMASK(ecx, 24, 0xff);
525   
526   info[11] = BITMASK(edx,  0, 0xff);
527   info[12] = BITMASK(edx,  8, 0xff);
528   info[13] = BITMASK(edx, 16, 0xff);
529   info[14] = BITMASK(edx, 24, 0xff);
530   
531   for (i = 0; i < 15; i++){
532     
533     switch (info[i]){
534       
535       /* This table is from http://www.sandpile.org/ia32/cpuid.htm */
536       
537     case 0x1a :
538       return 96;
539       
540     case 0x39 :
541     case 0x3b :
542     case 0x41 :
543     case 0x79 :
544     case 0x81 :
545       return 128;
546       
547     case 0x3a :
548       return 192;
549       
550     case 0x21 :
551     case 0x3c :
552     case 0x42 :
553     case 0x7a :
554     case 0x7e :
555     case 0x82 :
556       return 256;
557       
558     case 0x3d :
559       return 384;
560       
561     case 0x3e :
562     case 0x43 :
563     case 0x7b :
564     case 0x7f :
565     case 0x83 :
566     case 0x86 :
567       return 512;
568       
569     case 0x44 :
570     case 0x78 :
571     case 0x7c :
572     case 0x84 :
573     case 0x87 :
574       return 1024;
575       
576     case 0x45 :
577     case 0x7d :
578     case 0x85 :
579       return 2048;
580
581     case 0x48 :
582       return 3184;
583       
584     case 0x49 :
585       return 4096;
586       
587     case 0x4e :
588       return 6144;
589     }
590   }
591   return 0;
592 }
593 #endif
594
595 static __inline__ int get_l2_size(void){
596
597   int eax, ebx, ecx, edx, l2;
598
599   cpuid(0x80000006, &eax, &ebx, &ecx, &edx);
600
601   l2 = BITMASK(ecx, 16, 0xffff);
602
603 #ifndef ARCH_X86
604   return l2;
605
606 #else
607
608   if (l2 > 0) return l2;
609
610   return get_l2_size_old();
611 #endif
612 }
613
614 static __inline__ int get_l3_size(void){
615
616   int eax, ebx, ecx, edx;
617
618   cpuid(0x80000006, &eax, &ebx, &ecx, &edx);
619
620   return BITMASK(edx, 18, 0x3fff) * 512;
621 }
622
623
624 static void init_parameter(void) {
625
626   int l2 = get_l2_size();
627
628   TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
629   TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
630   TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
631   TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
632 #ifdef EXPRECISION
633   TABLE_NAME.qgemm_q = QGEMM_DEFAULT_Q;
634   TABLE_NAME.xgemm_q = XGEMM_DEFAULT_Q;
635 #endif
636
637 #if defined(CORE_KATMAI)  || defined(CORE_COPPERMINE) || defined(CORE_BANIAS) || defined(CORE_YONAH) || defined(CORE_ATHLON)
638
639 #ifdef DEBUG
640   fprintf(stderr, "Katmai, Coppermine, Banias, Athlon\n");
641 #endif
642
643   TABLE_NAME.sgemm_p =  64 * (l2 >> 7);
644   TABLE_NAME.dgemm_p =  32 * (l2 >> 7);
645   TABLE_NAME.cgemm_p =  32 * (l2 >> 7);
646   TABLE_NAME.zgemm_p =  16 * (l2 >> 7);
647 #ifdef EXPRECISION
648   TABLE_NAME.qgemm_p =  16 * (l2 >> 7);
649   TABLE_NAME.xgemm_p =   8 * (l2 >> 7);
650 #endif
651 #endif
652
653 #ifdef CORE_NORTHWOOD
654
655 #ifdef DEBUG
656   fprintf(stderr, "Northwood\n");
657 #endif
658
659   TABLE_NAME.sgemm_p =  96 * (l2 >> 7);
660   TABLE_NAME.dgemm_p =  48 * (l2 >> 7);
661   TABLE_NAME.cgemm_p =  48 * (l2 >> 7);
662   TABLE_NAME.zgemm_p =  24 * (l2 >> 7);
663 #ifdef EXPRECISION
664   TABLE_NAME.qgemm_p =  24 * (l2 >> 7);
665   TABLE_NAME.xgemm_p =  12 * (l2 >> 7);
666 #endif
667 #endif
668
669 #ifdef ATOM
670
671 #ifdef DEBUG
672   fprintf(stderr, "Atom\n");
673 #endif
674
675   TABLE_NAME.sgemm_p = 256;
676   TABLE_NAME.dgemm_p = 128;
677   TABLE_NAME.cgemm_p = 128;
678   TABLE_NAME.zgemm_p =  64;
679 #ifdef EXPRECISION
680   TABLE_NAME.qgemm_p =  64;
681   TABLE_NAME.xgemm_p =  32;
682 #endif
683 #endif
684
685 #ifdef CORE_PRESCOTT
686
687 #ifdef DEBUG
688   fprintf(stderr, "Prescott\n");
689 #endif
690
691   TABLE_NAME.sgemm_p =  56 * (l2 >> 7);
692   TABLE_NAME.dgemm_p =  28 * (l2 >> 7);
693   TABLE_NAME.cgemm_p =  28 * (l2 >> 7);
694   TABLE_NAME.zgemm_p =  14 * (l2 >> 7);
695 #ifdef EXPRECISION
696   TABLE_NAME.qgemm_p =  14 * (l2 >> 7);
697   TABLE_NAME.xgemm_p =   7 * (l2 >> 7);
698 #endif
699 #endif
700
701 #ifdef CORE2
702
703 #ifdef DEBUG
704   fprintf(stderr, "Core2\n");
705 #endif
706
707   TABLE_NAME.sgemm_p =  92 * (l2 >> 9);
708   TABLE_NAME.dgemm_p =  46 * (l2 >> 9);
709   TABLE_NAME.cgemm_p =  46 * (l2 >> 9);
710   TABLE_NAME.zgemm_p =  23 * (l2 >> 9);
711 #ifdef EXPRECISION
712   TABLE_NAME.qgemm_p =  92 * (l2 >> 9);
713   TABLE_NAME.xgemm_p =  46 * (l2 >> 9);
714 #endif
715 #endif
716
717 #ifdef PENRYN
718
719 #ifdef DEBUG
720   fprintf(stderr, "Penryn\n");
721 #endif
722
723   TABLE_NAME.sgemm_p =  42 * (l2 >> 9) + 8;
724   TABLE_NAME.dgemm_p =  42 * (l2 >> 9) + 8;
725   TABLE_NAME.cgemm_p =  21 * (l2 >> 9) + 4;
726   TABLE_NAME.zgemm_p =  21 * (l2 >> 9) + 4;
727 #ifdef EXPRECISION
728   TABLE_NAME.qgemm_p =  42 * (l2 >> 9) + 8;
729   TABLE_NAME.xgemm_p =  21 * (l2 >> 9) + 4;
730 #endif
731 #endif
732
733 #ifdef NEHALEM
734
735 #ifdef DEBUG
736   fprintf(stderr, "Nehalem\n");
737 #endif
738
739   TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
740   TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
741   TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
742   TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
743 #ifdef EXPRECISION
744   TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
745   TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
746 #endif
747 #endif
748
749 #ifdef SANDYBRIDGE
750
751 #ifdef DEBUG
752   fprintf(stderr, "Sandybridge\n");
753 #endif
754
755   TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
756   TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
757   TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
758   TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
759 #ifdef EXPRECISION
760   TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
761   TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
762 #endif
763 #endif
764
765 #ifdef OPTERON
766
767 #ifdef DEBUG
768   fprintf(stderr, "Opteron\n");
769 #endif
770
771   TABLE_NAME.sgemm_p = 224 +  56 * (l2 >> 7);
772   TABLE_NAME.dgemm_p = 112 +  28 * (l2 >> 7);
773   TABLE_NAME.cgemm_p = 112 +  28 * (l2 >> 7);
774   TABLE_NAME.zgemm_p =  56 +  14 * (l2 >> 7);
775 #ifdef EXPRECISION
776   TABLE_NAME.qgemm_p =  56 +  14 * (l2 >> 7);
777   TABLE_NAME.xgemm_p =  28 +   7 * (l2 >> 7);
778 #endif
779 #endif
780
781 #ifdef BARCELONA
782
783 #ifdef DEBUG
784   fprintf(stderr, "Barcelona\n");
785 #endif
786
787   TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
788   TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
789   TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
790   TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
791 #ifdef EXPRECISION
792   TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
793   TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
794 #endif
795 #endif
796
797 #ifdef BOBCAT
798
799 #ifdef DEBUG
800   fprintf(stderr, "Bobcate\n");
801 #endif
802
803   TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
804   TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
805   TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
806   TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
807 #ifdef EXPRECISION
808   TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
809   TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
810 #endif
811 #endif
812
813 #ifdef BULLDOZER
814
815 #ifdef DEBUG
816   fprintf(stderr, "Bulldozer\n");
817 #endif
818
819   TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
820   TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
821   TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
822   TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
823 #ifdef EXPRECISION
824   TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
825   TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
826 #endif
827 #endif
828
829 #ifdef NANO
830
831 #ifdef DEBUG
832   fprintf(stderr, "NANO\n");
833 #endif
834
835   TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
836   TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
837   TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
838   TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
839 #ifdef EXPRECISION
840   TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
841   TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
842 #endif
843 #endif
844
845
846   TABLE_NAME.sgemm_p = (TABLE_NAME.sgemm_p + SGEMM_DEFAULT_UNROLL_M - 1) & ~(SGEMM_DEFAULT_UNROLL_M - 1);
847   TABLE_NAME.dgemm_p = (TABLE_NAME.dgemm_p + DGEMM_DEFAULT_UNROLL_M - 1) & ~(DGEMM_DEFAULT_UNROLL_M - 1);
848   TABLE_NAME.cgemm_p = (TABLE_NAME.cgemm_p + CGEMM_DEFAULT_UNROLL_M - 1) & ~(CGEMM_DEFAULT_UNROLL_M - 1);
849   TABLE_NAME.zgemm_p = (TABLE_NAME.zgemm_p + ZGEMM_DEFAULT_UNROLL_M - 1) & ~(ZGEMM_DEFAULT_UNROLL_M - 1);
850 #ifdef QUAD_PRECISION
851   TABLE_NAME.qgemm_p = (TABLE_NAME.qgemm_p + QGEMM_DEFAULT_UNROLL_M - 1) & ~(QGEMM_DEFAULT_UNROLL_M - 1);
852   TABLE_NAME.xgemm_p = (TABLE_NAME.xgemm_p + XGEMM_DEFAULT_UNROLL_M - 1) & ~(XGEMM_DEFAULT_UNROLL_M - 1);
853 #endif
854
855 #ifdef DEBUG
856   fprintf(stderr, "L2 = %8d DGEMM_P  .. %d\n", l2, TABLE_NAME.dgemm_p);
857 #endif
858
859   TABLE_NAME.sgemm_r = (((BUFFER_SIZE - 
860                                ((TABLE_NAME.sgemm_p * TABLE_NAME.sgemm_q *  4 + TABLE_NAME.offsetA 
861                                  + TABLE_NAME.align) & ~TABLE_NAME.align)
862                                ) / (TABLE_NAME.sgemm_q *  4) - 15) & ~15);
863
864   TABLE_NAME.dgemm_r = (((BUFFER_SIZE - 
865                                ((TABLE_NAME.dgemm_p * TABLE_NAME.dgemm_q *  8 + TABLE_NAME.offsetA 
866                                  + TABLE_NAME.align) & ~TABLE_NAME.align)
867                                ) / (TABLE_NAME.dgemm_q *  8) - 15) & ~15);
868
869 #ifdef EXPRECISION
870   TABLE_NAME.qgemm_r = (((BUFFER_SIZE - 
871                                ((TABLE_NAME.qgemm_p * TABLE_NAME.qgemm_q * 16 + TABLE_NAME.offsetA 
872                                  + TABLE_NAME.align) & ~TABLE_NAME.align)
873                                ) / (TABLE_NAME.qgemm_q * 16) - 15) & ~15);
874 #endif
875
876   TABLE_NAME.cgemm_r = (((BUFFER_SIZE - 
877                                ((TABLE_NAME.cgemm_p * TABLE_NAME.cgemm_q *  8 + TABLE_NAME.offsetA 
878                                  + TABLE_NAME.align) & ~TABLE_NAME.align)
879                                ) / (TABLE_NAME.cgemm_q *  8) - 15) & ~15);
880
881   TABLE_NAME.zgemm_r = (((BUFFER_SIZE - 
882                                ((TABLE_NAME.zgemm_p * TABLE_NAME.zgemm_q * 16 + TABLE_NAME.offsetA 
883                                  + TABLE_NAME.align) & ~TABLE_NAME.align)
884                                ) / (TABLE_NAME.zgemm_q * 16) - 15) & ~15);
885
886 #ifdef EXPRECISION
887   TABLE_NAME.xgemm_r = (((BUFFER_SIZE - 
888                                ((TABLE_NAME.xgemm_p * TABLE_NAME.xgemm_q * 32 + TABLE_NAME.offsetA 
889                                  + TABLE_NAME.align) & ~TABLE_NAME.align)       
890                        ) / (TABLE_NAME.xgemm_q * 32) - 15) & ~15);
891 #endif
892
893 }