1 /*****************************************************************************
2 Copyright (c) 2011,2012 Lab of Parallel Software and Computational Science,ISCAS
5 Redistribution and use in source and binary forms, with or without
6 modification, are permitted provided that the following conditions are
9 1. Redistributions of source code must retain the above copyright
10 notice, this list of conditions and the following disclaimer.
12 2. Redistributions in binary form must reproduce the above copyright
13 notice, this list of conditions and the following disclaimer in
14 the documentation and/or other materials provided with the
16 3. Neither the name of the ISCAS nor the names of its contributors may
17 be used to endorse or promote products derived from this software
18 without specific prior written permission.
20 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
29 USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 **********************************************************************************/
33 /*********************************************************************/
34 /* Copyright 2009, 2010 The University of Texas at Austin. */
35 /* All rights reserved. */
37 /* Redistribution and use in source and binary forms, with or */
38 /* without modification, are permitted provided that the following */
39 /* conditions are met: */
41 /* 1. Redistributions of source code must retain the above */
42 /* copyright notice, this list of conditions and the following */
45 /* 2. Redistributions in binary form must reproduce the above */
46 /* copyright notice, this list of conditions and the following */
47 /* disclaimer in the documentation and/or other materials */
48 /* provided with the distribution. */
50 /* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
51 /* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
52 /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
53 /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
54 /* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
55 /* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
56 /* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
57 /* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
58 /* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
59 /* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
60 /* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
61 /* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
62 /* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
63 /* POSSIBILITY OF SUCH DAMAGE. */
65 /* The views and conclusions contained in the software and */
66 /* documentation are those of the authors and should not be */
67 /* interpreted as representing official policies, either expressed */
68 /* or implied, of The University of Texas at Austin. */
69 /*********************************************************************/
79 #define GEMM_DEFAULT_OFFSET_A 64
80 #define GEMM_DEFAULT_OFFSET_B 256
81 #define GEMM_DEFAULT_ALIGN 0x01ffffUL
83 #define SGEMM_DEFAULT_UNROLL_N 4
84 #define DGEMM_DEFAULT_UNROLL_N 4
85 #define QGEMM_DEFAULT_UNROLL_N 2
86 #define CGEMM_DEFAULT_UNROLL_N 2
87 #define ZGEMM_DEFAULT_UNROLL_N 2
88 #define XGEMM_DEFAULT_UNROLL_N 1
91 #define SGEMM_DEFAULT_UNROLL_M 4
92 #define DGEMM_DEFAULT_UNROLL_M 2
93 #define QGEMM_DEFAULT_UNROLL_M 2
94 #define CGEMM_DEFAULT_UNROLL_M 2
95 #define ZGEMM_DEFAULT_UNROLL_M 1
96 #define XGEMM_DEFAULT_UNROLL_M 1
98 #define SGEMM_DEFAULT_UNROLL_M 8
99 #define DGEMM_DEFAULT_UNROLL_M 4
100 #define QGEMM_DEFAULT_UNROLL_M 2
101 #define CGEMM_DEFAULT_UNROLL_M 4
102 #define ZGEMM_DEFAULT_UNROLL_M 2
103 #define XGEMM_DEFAULT_UNROLL_M 1
106 #define SGEMM_DEFAULT_P sgemm_p
107 #define DGEMM_DEFAULT_P dgemm_p
108 #define QGEMM_DEFAULT_P qgemm_p
109 #define CGEMM_DEFAULT_P cgemm_p
110 #define ZGEMM_DEFAULT_P zgemm_p
111 #define XGEMM_DEFAULT_P xgemm_p
113 #define SGEMM_DEFAULT_R sgemm_r
114 #define DGEMM_DEFAULT_R dgemm_r
115 #define QGEMM_DEFAULT_R qgemm_r
116 #define CGEMM_DEFAULT_R cgemm_r
117 #define ZGEMM_DEFAULT_R zgemm_r
118 #define XGEMM_DEFAULT_R xgemm_r
122 #define SGEMM_DEFAULT_Q 248
123 #define DGEMM_DEFAULT_Q 248
124 #define QGEMM_DEFAULT_Q 248
125 #define CGEMM_DEFAULT_Q 248
126 #define ZGEMM_DEFAULT_Q 248
127 #define XGEMM_DEFAULT_Q 248
131 #define SGEMM_DEFAULT_Q 240
132 #define DGEMM_DEFAULT_Q 240
133 #define QGEMM_DEFAULT_Q 240
134 #define CGEMM_DEFAULT_Q 240
135 #define ZGEMM_DEFAULT_Q 240
136 #define XGEMM_DEFAULT_Q 240
142 #define HAVE_EXCLUSIVE_CACHE
146 #if defined(BARCELONA) || defined(SHANGHAI) || defined(BOBCAT)
151 #define GEMM_DEFAULT_OFFSET_A 64
152 #define GEMM_DEFAULT_OFFSET_B 832
153 #define GEMM_DEFAULT_ALIGN 0x0fffUL
155 #define SGEMM_DEFAULT_UNROLL_N 4
156 #define DGEMM_DEFAULT_UNROLL_N 4
157 #define QGEMM_DEFAULT_UNROLL_N 2
158 #define CGEMM_DEFAULT_UNROLL_N 2
159 #define ZGEMM_DEFAULT_UNROLL_N 2
160 #define XGEMM_DEFAULT_UNROLL_N 1
163 #define SGEMM_DEFAULT_UNROLL_M 4
164 #define DGEMM_DEFAULT_UNROLL_M 2
165 #define QGEMM_DEFAULT_UNROLL_M 2
166 #define CGEMM_DEFAULT_UNROLL_M 2
167 #define ZGEMM_DEFAULT_UNROLL_M 1
168 #define XGEMM_DEFAULT_UNROLL_M 1
170 #define SGEMM_DEFAULT_UNROLL_M 8
171 #define DGEMM_DEFAULT_UNROLL_M 4
172 #define QGEMM_DEFAULT_UNROLL_M 2
173 #define CGEMM_DEFAULT_UNROLL_M 4
174 #define ZGEMM_DEFAULT_UNROLL_M 2
175 #define XGEMM_DEFAULT_UNROLL_M 1
179 #define SGEMM_DEFAULT_P 496
180 #define DGEMM_DEFAULT_P 248
181 #define QGEMM_DEFAULT_P 124
182 #define CGEMM_DEFAULT_P 248
183 #define ZGEMM_DEFAULT_P 124
184 #define XGEMM_DEFAULT_P 62
186 #define SGEMM_DEFAULT_Q 248
187 #define DGEMM_DEFAULT_Q 248
188 #define QGEMM_DEFAULT_Q 248
189 #define CGEMM_DEFAULT_Q 248
190 #define ZGEMM_DEFAULT_Q 248
191 #define XGEMM_DEFAULT_Q 248
195 #define SGEMM_DEFAULT_P 448
196 #define DGEMM_DEFAULT_P 224
197 #define QGEMM_DEFAULT_P 112
198 #define CGEMM_DEFAULT_P 224
199 #define ZGEMM_DEFAULT_P 112
200 #define XGEMM_DEFAULT_P 56
202 #define SGEMM_DEFAULT_Q 224
203 #define DGEMM_DEFAULT_Q 224
204 #define QGEMM_DEFAULT_Q 224
205 #define CGEMM_DEFAULT_Q 224
206 #define ZGEMM_DEFAULT_Q 224
207 #define XGEMM_DEFAULT_Q 224
211 #define SGEMM_DEFAULT_R sgemm_r
212 #define QGEMM_DEFAULT_R qgemm_r
213 #define DGEMM_DEFAULT_R dgemm_r
214 #define CGEMM_DEFAULT_R cgemm_r
215 #define ZGEMM_DEFAULT_R zgemm_r
216 #define XGEMM_DEFAULT_R xgemm_r
219 #define HAVE_EXCLUSIVE_CACHE
221 #define GEMM_THREAD gemm_thread_mn
231 #define GEMM_DEFAULT_OFFSET_A 64
232 #define GEMM_DEFAULT_OFFSET_B 832
233 #define GEMM_DEFAULT_ALIGN 0x0fffUL
237 #define QGEMM_DEFAULT_UNROLL_N 2
238 #define CGEMM_DEFAULT_UNROLL_N 2
239 #define ZGEMM_DEFAULT_UNROLL_N 2
240 #define XGEMM_DEFAULT_UNROLL_N 1
243 #define SGEMM_DEFAULT_UNROLL_N 4
244 #define DGEMM_DEFAULT_UNROLL_N 4
245 #define SGEMM_DEFAULT_UNROLL_M 4
246 #define DGEMM_DEFAULT_UNROLL_M 2
247 #define QGEMM_DEFAULT_UNROLL_M 2
248 #define CGEMM_DEFAULT_UNROLL_M 2
249 #define ZGEMM_DEFAULT_UNROLL_M 1
250 #define XGEMM_DEFAULT_UNROLL_M 1
252 #define SGEMM_DEFAULT_UNROLL_N 2
253 #define DGEMM_DEFAULT_UNROLL_N 2
254 #define SGEMM_DEFAULT_UNROLL_M 16
255 #define DGEMM_DEFAULT_UNROLL_M 8
256 #define QGEMM_DEFAULT_UNROLL_M 2
257 #define CGEMM_DEFAULT_UNROLL_M 4
258 #define ZGEMM_DEFAULT_UNROLL_M 2
259 #define XGEMM_DEFAULT_UNROLL_M 1
260 #define CGEMM3M_DEFAULT_UNROLL_N 4
261 #define CGEMM3M_DEFAULT_UNROLL_M 8
262 #define ZGEMM3M_DEFAULT_UNROLL_N 4
263 #define ZGEMM3M_DEFAULT_UNROLL_M 4
264 #define GEMV_UNROLL 8
268 #if defined(ARCH_X86_64)
269 #define SGEMM_DEFAULT_P 768
270 #define DGEMM_DEFAULT_P 384
272 #define SGEMM_DEFAULT_P 448
273 #define DGEMM_DEFAULT_P 224
275 #define QGEMM_DEFAULT_P 112
276 #define CGEMM_DEFAULT_P 224
277 #define ZGEMM_DEFAULT_P 112
278 #define XGEMM_DEFAULT_P 56
280 #if defined(ARCH_X86_64)
281 #define SGEMM_DEFAULT_Q 168
282 #define DGEMM_DEFAULT_Q 168
284 #define SGEMM_DEFAULT_Q 224
285 #define DGEMM_DEFAULT_Q 224
287 #define QGEMM_DEFAULT_Q 224
288 #define CGEMM_DEFAULT_Q 224
289 #define ZGEMM_DEFAULT_Q 224
290 #define XGEMM_DEFAULT_Q 224
292 #define SGEMM_DEFAULT_R sgemm_r
293 #define QGEMM_DEFAULT_R qgemm_r
294 #define DGEMM_DEFAULT_R dgemm_r
295 #define CGEMM_DEFAULT_R cgemm_r
296 #define ZGEMM_DEFAULT_R zgemm_r
297 #define XGEMM_DEFAULT_R xgemm_r
300 #define HAVE_EXCLUSIVE_CACHE
302 #define GEMM_THREAD gemm_thread_mn
310 #define GEMM_DEFAULT_OFFSET_A 64
311 #define GEMM_DEFAULT_OFFSET_B 832
312 #define GEMM_DEFAULT_ALIGN 0x0fffUL
316 #define QGEMM_DEFAULT_UNROLL_N 2
317 #define CGEMM_DEFAULT_UNROLL_N 2
318 #define ZGEMM_DEFAULT_UNROLL_N 2
319 #define XGEMM_DEFAULT_UNROLL_N 1
322 #define SGEMM_DEFAULT_UNROLL_N 4
323 #define DGEMM_DEFAULT_UNROLL_N 4
324 #define SGEMM_DEFAULT_UNROLL_M 4
325 #define DGEMM_DEFAULT_UNROLL_M 2
326 #define QGEMM_DEFAULT_UNROLL_M 2
327 #define CGEMM_DEFAULT_UNROLL_M 2
328 #define ZGEMM_DEFAULT_UNROLL_M 1
329 #define XGEMM_DEFAULT_UNROLL_M 1
331 #define SGEMM_DEFAULT_UNROLL_N 2
332 #define DGEMM_DEFAULT_UNROLL_N 2
333 #define SGEMM_DEFAULT_UNROLL_M 16
334 #define DGEMM_DEFAULT_UNROLL_M 8
335 #define QGEMM_DEFAULT_UNROLL_M 2
336 #define CGEMM_DEFAULT_UNROLL_M 4
337 #define ZGEMM_DEFAULT_UNROLL_M 2
338 #define XGEMM_DEFAULT_UNROLL_M 1
339 #define CGEMM3M_DEFAULT_UNROLL_N 4
340 #define CGEMM3M_DEFAULT_UNROLL_M 8
341 #define ZGEMM3M_DEFAULT_UNROLL_N 4
342 #define ZGEMM3M_DEFAULT_UNROLL_M 4
343 #define GEMV_UNROLL 8
346 #if defined(ARCH_X86_64)
347 #define SGEMM_DEFAULT_P 768
348 #define DGEMM_DEFAULT_P 768
349 #define ZGEMM_DEFAULT_P 384
350 #define CGEMM_DEFAULT_P 768
352 #define SGEMM_DEFAULT_P 448
353 #define DGEMM_DEFAULT_P 480
354 #define ZGEMM_DEFAULT_P 112
355 #define CGEMM_DEFAULT_P 224
357 #define QGEMM_DEFAULT_P 112
358 #define XGEMM_DEFAULT_P 56
360 #if defined(ARCH_X86_64)
361 #define SGEMM_DEFAULT_Q 192
362 #define DGEMM_DEFAULT_Q 168
363 #define ZGEMM_DEFAULT_Q 168
364 #define CGEMM_DEFAULT_Q 168
366 #define SGEMM_DEFAULT_Q 224
367 #define DGEMM_DEFAULT_Q 224
368 #define ZGEMM_DEFAULT_Q 224
369 #define CGEMM_DEFAULT_Q 224
371 #define QGEMM_DEFAULT_Q 224
372 #define XGEMM_DEFAULT_Q 224
374 #define SGEMM_DEFAULT_R 12288
375 #define QGEMM_DEFAULT_R qgemm_r
376 #define DGEMM_DEFAULT_R 12288
377 #define CGEMM_DEFAULT_R cgemm_r
378 #define ZGEMM_DEFAULT_R zgemm_r
379 #define XGEMM_DEFAULT_R xgemm_r
382 #define HAVE_EXCLUSIVE_CACHE
384 #define GEMM_THREAD gemm_thread_mn
393 #define GEMM_DEFAULT_OFFSET_A 0
394 #define GEMM_DEFAULT_OFFSET_B 384
395 #define GEMM_DEFAULT_ALIGN 0x0ffffUL
397 #define SGEMM_DEFAULT_UNROLL_N 4
398 #define DGEMM_DEFAULT_UNROLL_N 4
399 #define QGEMM_DEFAULT_UNROLL_N 2
400 #define CGEMM_DEFAULT_UNROLL_N 2
401 #define ZGEMM_DEFAULT_UNROLL_N 2
402 #define XGEMM_DEFAULT_UNROLL_N 1
404 #define SGEMM_DEFAULT_UNROLL_M 2
405 #define DGEMM_DEFAULT_UNROLL_M 1
406 #define QGEMM_DEFAULT_UNROLL_M 2
407 #define CGEMM_DEFAULT_UNROLL_M 1
408 #define ZGEMM_DEFAULT_UNROLL_M 1
409 #define XGEMM_DEFAULT_UNROLL_M 1
411 #define SGEMM_DEFAULT_R sgemm_r
412 #define DGEMM_DEFAULT_R dgemm_r
413 #define QGEMM_DEFAULT_R qgemm_r
414 #define CGEMM_DEFAULT_R cgemm_r
415 #define ZGEMM_DEFAULT_R zgemm_r
416 #define XGEMM_DEFAULT_R xgemm_r
418 #define SGEMM_DEFAULT_P 208
419 #define DGEMM_DEFAULT_P 104
420 #define QGEMM_DEFAULT_P 56
421 #define CGEMM_DEFAULT_P 104
422 #define ZGEMM_DEFAULT_P 56
423 #define XGEMM_DEFAULT_P 28
425 #define SGEMM_DEFAULT_Q 208
426 #define DGEMM_DEFAULT_Q 208
427 #define QGEMM_DEFAULT_Q 208
428 #define CGEMM_DEFAULT_Q 208
429 #define ZGEMM_DEFAULT_Q 208
430 #define XGEMM_DEFAULT_Q 208
433 #define HAVE_EXCLUSIVE_CACHE
441 #define GEMM_DEFAULT_OFFSET_A 0
442 #define GEMM_DEFAULT_OFFSET_B 256
443 #define GEMM_DEFAULT_ALIGN 0x0ffffUL
445 #define SGEMM_DEFAULT_UNROLL_N 4
446 #define DGEMM_DEFAULT_UNROLL_N 4
447 #define QGEMM_DEFAULT_UNROLL_N 2
448 #define CGEMM_DEFAULT_UNROLL_N 2
449 #define ZGEMM_DEFAULT_UNROLL_N 2
450 #define XGEMM_DEFAULT_UNROLL_N 1
452 #define SGEMM_DEFAULT_UNROLL_M 2
453 #define DGEMM_DEFAULT_UNROLL_M 1
454 #define QGEMM_DEFAULT_UNROLL_M 2
455 #define CGEMM_DEFAULT_UNROLL_M 1
456 #define ZGEMM_DEFAULT_UNROLL_M 1
457 #define XGEMM_DEFAULT_UNROLL_M 1
459 #define SGEMM_DEFAULT_R sgemm_r
460 #define DGEMM_DEFAULT_R dgemm_r
461 #define QGEMM_DEFAULT_R qgemm_r
462 #define CGEMM_DEFAULT_R cgemm_r
463 #define ZGEMM_DEFAULT_R zgemm_r
464 #define XGEMM_DEFAULT_R xgemm_r
466 #define SGEMM_DEFAULT_P 128
467 #define DGEMM_DEFAULT_P 128
468 #define QGEMM_DEFAULT_P 128
469 #define CGEMM_DEFAULT_P 128
470 #define ZGEMM_DEFAULT_P 128
471 #define XGEMM_DEFAULT_P 128
473 #define SGEMM_DEFAULT_Q 512
474 #define DGEMM_DEFAULT_Q 256
475 #define QGEMM_DEFAULT_Q 256
476 #define CGEMM_DEFAULT_Q 256
477 #define ZGEMM_DEFAULT_Q 128
478 #define XGEMM_DEFAULT_Q 128
488 #define GEMM_DEFAULT_OFFSET_A 64
489 #define GEMM_DEFAULT_OFFSET_B 256
490 #define GEMM_DEFAULT_ALIGN 0x01ffffUL
493 #define SGEMM_DEFAULT_UNROLL_N 4
494 #define DGEMM_DEFAULT_UNROLL_N 4
495 #define QGEMM_DEFAULT_UNROLL_N 2
496 #define CGEMM_DEFAULT_UNROLL_N 2
497 #define ZGEMM_DEFAULT_UNROLL_N 2
498 #define XGEMM_DEFAULT_UNROLL_N 1
500 #define SGEMM_DEFAULT_UNROLL_M 4
501 #define DGEMM_DEFAULT_UNROLL_M 2
502 #define QGEMM_DEFAULT_UNROLL_M 2
503 #define CGEMM_DEFAULT_UNROLL_M 2
504 #define ZGEMM_DEFAULT_UNROLL_M 1
505 #define XGEMM_DEFAULT_UNROLL_M 1
507 #define SGEMM_DEFAULT_UNROLL_N 8
508 #define DGEMM_DEFAULT_UNROLL_N 4
509 #define QGEMM_DEFAULT_UNROLL_N 2
510 #define CGEMM_DEFAULT_UNROLL_N 4
511 #define ZGEMM_DEFAULT_UNROLL_N 2
512 #define XGEMM_DEFAULT_UNROLL_N 1
514 #define SGEMM_DEFAULT_UNROLL_M 4
515 #define DGEMM_DEFAULT_UNROLL_M 4
516 #define QGEMM_DEFAULT_UNROLL_M 2
517 #define CGEMM_DEFAULT_UNROLL_M 2
518 #define ZGEMM_DEFAULT_UNROLL_M 2
519 #define XGEMM_DEFAULT_UNROLL_M 1
522 #define SGEMM_DEFAULT_P 288
523 #define DGEMM_DEFAULT_P 288
524 #define QGEMM_DEFAULT_P 288
525 #define CGEMM_DEFAULT_P 288
526 #define ZGEMM_DEFAULT_P 288
527 #define XGEMM_DEFAULT_P 288
529 #define SGEMM_DEFAULT_R sgemm_r
530 #define DGEMM_DEFAULT_R dgemm_r
531 #define QGEMM_DEFAULT_R qgemm_r
532 #define CGEMM_DEFAULT_R cgemm_r
533 #define ZGEMM_DEFAULT_R zgemm_r
534 #define XGEMM_DEFAULT_R xgemm_r
536 #define SGEMM_DEFAULT_Q 256
537 #define DGEMM_DEFAULT_Q 128
538 #define QGEMM_DEFAULT_Q 64
539 #define CGEMM_DEFAULT_Q 128
540 #define ZGEMM_DEFAULT_Q 64
541 #define XGEMM_DEFAULT_Q 32
544 #define HAVE_EXCLUSIVE_CACHE
548 #if defined(PENTIUM) || defined(PENTIUM2) || defined(PENTIUM3)
557 #define GEMM_DEFAULT_OFFSET_A 0
558 #define GEMM_DEFAULT_OFFSET_B 0
559 #define GEMM_DEFAULT_ALIGN 0x0ffffUL
562 #define SGEMM_DEFAULT_UNROLL_M 8
563 #define CGEMM_DEFAULT_UNROLL_M 4
565 #define SGEMM_DEFAULT_UNROLL_M 4
566 #define CGEMM_DEFAULT_UNROLL_M 2
568 #define DGEMM_DEFAULT_UNROLL_M 2
569 #define SGEMM_DEFAULT_UNROLL_N 2
570 #define DGEMM_DEFAULT_UNROLL_N 2
571 #define QGEMM_DEFAULT_UNROLL_M 2
572 #define QGEMM_DEFAULT_UNROLL_N 2
573 #define CGEMM_DEFAULT_UNROLL_N 1
574 #define ZGEMM_DEFAULT_UNROLL_M 1
575 #define ZGEMM_DEFAULT_UNROLL_N 1
576 #define XGEMM_DEFAULT_UNROLL_M 1
577 #define XGEMM_DEFAULT_UNROLL_N 1
579 #define SGEMM_DEFAULT_P sgemm_p
580 #define SGEMM_DEFAULT_Q 256
581 #define SGEMM_DEFAULT_R sgemm_r
583 #define DGEMM_DEFAULT_P dgemm_p
584 #define DGEMM_DEFAULT_Q 256
585 #define DGEMM_DEFAULT_R dgemm_r
587 #define QGEMM_DEFAULT_P qgemm_p
588 #define QGEMM_DEFAULT_Q 256
589 #define QGEMM_DEFAULT_R qgemm_r
591 #define CGEMM_DEFAULT_P cgemm_p
592 #define CGEMM_DEFAULT_Q 256
593 #define CGEMM_DEFAULT_R cgemm_r
595 #define ZGEMM_DEFAULT_P zgemm_p
596 #define ZGEMM_DEFAULT_Q 256
597 #define ZGEMM_DEFAULT_R zgemm_r
599 #define XGEMM_DEFAULT_P xgemm_p
600 #define XGEMM_DEFAULT_Q 256
601 #define XGEMM_DEFAULT_R xgemm_r
612 #define GEMM_DEFAULT_OFFSET_A 0
613 #define GEMM_DEFAULT_OFFSET_B 0
614 #define GEMM_DEFAULT_ALIGN 0x0ffffUL
617 #define SGEMM_DEFAULT_UNROLL_M 4
618 #define SGEMM_DEFAULT_UNROLL_N 4
619 #define DGEMM_DEFAULT_UNROLL_M 2
620 #define DGEMM_DEFAULT_UNROLL_N 4
621 #define QGEMM_DEFAULT_UNROLL_M 2
622 #define QGEMM_DEFAULT_UNROLL_N 2
623 #define CGEMM_DEFAULT_UNROLL_M 2
624 #define CGEMM_DEFAULT_UNROLL_N 2
625 #define ZGEMM_DEFAULT_UNROLL_M 1
626 #define ZGEMM_DEFAULT_UNROLL_N 2
627 #define XGEMM_DEFAULT_UNROLL_M 1
628 #define XGEMM_DEFAULT_UNROLL_N 1
630 #define SGEMM_DEFAULT_UNROLL_M 8
631 #define SGEMM_DEFAULT_UNROLL_N 2
632 #define DGEMM_DEFAULT_UNROLL_M 2
633 #define DGEMM_DEFAULT_UNROLL_N 2
634 #define QGEMM_DEFAULT_UNROLL_M 2
635 #define QGEMM_DEFAULT_UNROLL_N 2
636 #define CGEMM_DEFAULT_UNROLL_M 4
637 #define CGEMM_DEFAULT_UNROLL_N 1
638 #define ZGEMM_DEFAULT_UNROLL_M 1
639 #define ZGEMM_DEFAULT_UNROLL_N 1
640 #define XGEMM_DEFAULT_UNROLL_M 1
641 #define XGEMM_DEFAULT_UNROLL_N 1
645 #define SGEMM_DEFAULT_P sgemm_p
646 #define SGEMM_DEFAULT_Q 256
647 #define SGEMM_DEFAULT_R sgemm_r
649 #define DGEMM_DEFAULT_P dgemm_p
650 #define DGEMM_DEFAULT_Q 256
651 #define DGEMM_DEFAULT_R dgemm_r
653 #define QGEMM_DEFAULT_P qgemm_p
654 #define QGEMM_DEFAULT_Q 256
655 #define QGEMM_DEFAULT_R qgemm_r
657 #define CGEMM_DEFAULT_P cgemm_p
658 #define CGEMM_DEFAULT_Q 256
659 #define CGEMM_DEFAULT_R cgemm_r
661 #define ZGEMM_DEFAULT_P zgemm_p
662 #define ZGEMM_DEFAULT_Q 256
663 #define ZGEMM_DEFAULT_R zgemm_r
665 #define XGEMM_DEFAULT_P xgemm_p
666 #define XGEMM_DEFAULT_Q 256
667 #define XGEMM_DEFAULT_R xgemm_r
672 #ifdef CORE_NORTHWOOD
677 #define GEMM_DEFAULT_OFFSET_A 0
678 #define GEMM_DEFAULT_OFFSET_B 32
680 #define GEMM_DEFAULT_ALIGN 0x0ffffUL
684 #define SGEMM_DEFAULT_UNROLL_M 8
685 #define DGEMM_DEFAULT_UNROLL_M 4
686 #define QGEMM_DEFAULT_UNROLL_M 2
687 #define CGEMM_DEFAULT_UNROLL_M 4
688 #define ZGEMM_DEFAULT_UNROLL_M 2
689 #define XGEMM_DEFAULT_UNROLL_M 1
691 #define SGEMM_DEFAULT_UNROLL_N 2
692 #define DGEMM_DEFAULT_UNROLL_N 2
693 #define QGEMM_DEFAULT_UNROLL_N 2
694 #define CGEMM_DEFAULT_UNROLL_N 1
695 #define ZGEMM_DEFAULT_UNROLL_N 1
696 #define XGEMM_DEFAULT_UNROLL_N 1
698 #define SGEMM_DEFAULT_P sgemm_p
699 #define SGEMM_DEFAULT_R sgemm_r
701 #define DGEMM_DEFAULT_P dgemm_p
702 #define DGEMM_DEFAULT_R dgemm_r
704 #define QGEMM_DEFAULT_P qgemm_p
705 #define QGEMM_DEFAULT_R qgemm_r
707 #define CGEMM_DEFAULT_P cgemm_p
708 #define CGEMM_DEFAULT_R cgemm_r
710 #define ZGEMM_DEFAULT_P zgemm_p
711 #define ZGEMM_DEFAULT_R zgemm_r
713 #define XGEMM_DEFAULT_P xgemm_p
714 #define XGEMM_DEFAULT_R xgemm_r
716 #define SGEMM_DEFAULT_Q 128
717 #define DGEMM_DEFAULT_Q 128
718 #define QGEMM_DEFAULT_Q 128
719 #define CGEMM_DEFAULT_Q 128
720 #define ZGEMM_DEFAULT_Q 128
721 #define XGEMM_DEFAULT_Q 128
730 #define GEMM_DEFAULT_OFFSET_A 128
731 #define GEMM_DEFAULT_OFFSET_B 192
733 #define GEMM_DEFAULT_OFFSET_A 0
734 #define GEMM_DEFAULT_OFFSET_B 256
737 #define GEMM_DEFAULT_ALIGN 0x0ffffUL
742 #define SGEMM_DEFAULT_UNROLL_M 4
743 #define DGEMM_DEFAULT_UNROLL_M 2
744 #define QGEMM_DEFAULT_UNROLL_M 2
745 #define CGEMM_DEFAULT_UNROLL_M 2
746 #define ZGEMM_DEFAULT_UNROLL_M 1
747 #define XGEMM_DEFAULT_UNROLL_M 1
749 #define SGEMM_DEFAULT_UNROLL_M 8
750 #define DGEMM_DEFAULT_UNROLL_M 4
751 #define QGEMM_DEFAULT_UNROLL_M 2
752 #define CGEMM_DEFAULT_UNROLL_M 4
753 #define ZGEMM_DEFAULT_UNROLL_M 2
754 #define XGEMM_DEFAULT_UNROLL_M 1
757 #define SGEMM_DEFAULT_UNROLL_N 4
758 #define DGEMM_DEFAULT_UNROLL_N 4
759 #define QGEMM_DEFAULT_UNROLL_N 2
760 #define CGEMM_DEFAULT_UNROLL_N 2
761 #define ZGEMM_DEFAULT_UNROLL_N 2
762 #define XGEMM_DEFAULT_UNROLL_N 1
764 #define SGEMM_DEFAULT_P sgemm_p
765 #define SGEMM_DEFAULT_R sgemm_r
767 #define DGEMM_DEFAULT_P dgemm_p
768 #define DGEMM_DEFAULT_R dgemm_r
770 #define QGEMM_DEFAULT_P qgemm_p
771 #define QGEMM_DEFAULT_R qgemm_r
773 #define CGEMM_DEFAULT_P cgemm_p
774 #define CGEMM_DEFAULT_R cgemm_r
776 #define ZGEMM_DEFAULT_P zgemm_p
777 #define ZGEMM_DEFAULT_R zgemm_r
779 #define XGEMM_DEFAULT_P xgemm_p
780 #define XGEMM_DEFAULT_R xgemm_r
782 #define SGEMM_DEFAULT_Q 128
783 #define DGEMM_DEFAULT_Q 128
784 #define QGEMM_DEFAULT_Q 128
785 #define CGEMM_DEFAULT_Q 128
786 #define ZGEMM_DEFAULT_Q 128
787 #define XGEMM_DEFAULT_Q 128
795 #define GEMM_DEFAULT_OFFSET_A 448
796 #define GEMM_DEFAULT_OFFSET_B 128
797 #define GEMM_DEFAULT_ALIGN 0x03fffUL
801 #define SWITCH_RATIO 4
804 #define SGEMM_DEFAULT_UNROLL_M 8
805 #define DGEMM_DEFAULT_UNROLL_M 4
806 #define QGEMM_DEFAULT_UNROLL_M 2
807 #define CGEMM_DEFAULT_UNROLL_M 4
808 #define ZGEMM_DEFAULT_UNROLL_M 2
809 #define XGEMM_DEFAULT_UNROLL_M 1
811 #define SGEMM_DEFAULT_UNROLL_N 2
812 #define DGEMM_DEFAULT_UNROLL_N 2
813 #define QGEMM_DEFAULT_UNROLL_N 2
814 #define CGEMM_DEFAULT_UNROLL_N 1
815 #define ZGEMM_DEFAULT_UNROLL_N 1
816 #define XGEMM_DEFAULT_UNROLL_N 1
818 #define MASK(a, b) ((((a) + (b) - 1) / (b)) * (b))
821 #define SGEMM_DEFAULT_UNROLL_M 8
822 #define DGEMM_DEFAULT_UNROLL_M 4
823 #define QGEMM_DEFAULT_UNROLL_M 2
824 #define CGEMM_DEFAULT_UNROLL_M 4
825 #define ZGEMM_DEFAULT_UNROLL_M 2
826 #define XGEMM_DEFAULT_UNROLL_M 1
828 #define SGEMM_DEFAULT_UNROLL_N 4
829 #define DGEMM_DEFAULT_UNROLL_N 4
830 #define QGEMM_DEFAULT_UNROLL_N 2
831 #define CGEMM_DEFAULT_UNROLL_N 2
832 #define ZGEMM_DEFAULT_UNROLL_N 2
833 #define XGEMM_DEFAULT_UNROLL_N 1
836 #define SGEMM_DEFAULT_P sgemm_p
837 #define SGEMM_DEFAULT_R sgemm_r
839 #define DGEMM_DEFAULT_P dgemm_p
840 #define DGEMM_DEFAULT_R dgemm_r
842 #define QGEMM_DEFAULT_P qgemm_p
843 #define QGEMM_DEFAULT_R qgemm_r
845 #define CGEMM_DEFAULT_P cgemm_p
846 #define CGEMM_DEFAULT_R cgemm_r
848 #define ZGEMM_DEFAULT_P zgemm_p
849 #define ZGEMM_DEFAULT_R zgemm_r
851 #define XGEMM_DEFAULT_P xgemm_p
852 #define XGEMM_DEFAULT_R xgemm_r
854 #define SGEMM_DEFAULT_Q 256
855 #define DGEMM_DEFAULT_Q 256
856 #define QGEMM_DEFAULT_Q 256
857 #define CGEMM_DEFAULT_Q 256
858 #define ZGEMM_DEFAULT_Q 256
859 #define XGEMM_DEFAULT_Q 256
868 #define GEMM_DEFAULT_OFFSET_A 128
869 #define GEMM_DEFAULT_OFFSET_B 0
870 #define GEMM_DEFAULT_ALIGN 0x03fffUL
874 #define SWITCH_RATIO 4
877 #define SGEMM_DEFAULT_UNROLL_M 4
878 #define DGEMM_DEFAULT_UNROLL_M 2
879 #define QGEMM_DEFAULT_UNROLL_M 2
880 #define CGEMM_DEFAULT_UNROLL_M 2
881 #define ZGEMM_DEFAULT_UNROLL_M 1
882 #define XGEMM_DEFAULT_UNROLL_M 1
884 #define SGEMM_DEFAULT_UNROLL_N 4
885 #define DGEMM_DEFAULT_UNROLL_N 4
886 #define QGEMM_DEFAULT_UNROLL_N 2
887 #define CGEMM_DEFAULT_UNROLL_N 2
888 #define ZGEMM_DEFAULT_UNROLL_N 2
889 #define XGEMM_DEFAULT_UNROLL_N 1
891 #define SGEMM_DEFAULT_UNROLL_M 8
892 #define DGEMM_DEFAULT_UNROLL_M 4
893 #define QGEMM_DEFAULT_UNROLL_M 2
894 #define CGEMM_DEFAULT_UNROLL_M 4
895 #define ZGEMM_DEFAULT_UNROLL_M 2
896 #define XGEMM_DEFAULT_UNROLL_M 1
898 #define SGEMM_DEFAULT_UNROLL_N 4
899 #define DGEMM_DEFAULT_UNROLL_N 4
900 #define QGEMM_DEFAULT_UNROLL_N 2
901 #define CGEMM_DEFAULT_UNROLL_N 2
902 #define ZGEMM_DEFAULT_UNROLL_N 2
903 #define XGEMM_DEFAULT_UNROLL_N 1
906 #define SGEMM_DEFAULT_P sgemm_p
907 #define SGEMM_DEFAULT_R sgemm_r
909 #define DGEMM_DEFAULT_P dgemm_p
910 #define DGEMM_DEFAULT_R dgemm_r
912 #define QGEMM_DEFAULT_P qgemm_p
913 #define QGEMM_DEFAULT_R qgemm_r
915 #define CGEMM_DEFAULT_P cgemm_p
916 #define CGEMM_DEFAULT_R cgemm_r
918 #define ZGEMM_DEFAULT_P zgemm_p
919 #define ZGEMM_DEFAULT_R zgemm_r
921 #define XGEMM_DEFAULT_P xgemm_p
922 #define XGEMM_DEFAULT_R xgemm_r
924 #define SGEMM_DEFAULT_Q 512
925 #define DGEMM_DEFAULT_Q 256
926 #define QGEMM_DEFAULT_Q 128
927 #define CGEMM_DEFAULT_Q 512
928 #define ZGEMM_DEFAULT_Q 256
929 #define XGEMM_DEFAULT_Q 128
931 #define GETRF_FACTOR 0.75
939 #define GEMM_DEFAULT_OFFSET_A 128
940 #define GEMM_DEFAULT_OFFSET_B 0
941 #define GEMM_DEFAULT_ALIGN 0x03fffUL
945 #define SWITCH_RATIO 4
948 #define SGEMM_DEFAULT_UNROLL_M 4
949 #define DGEMM_DEFAULT_UNROLL_M 2
950 #define QGEMM_DEFAULT_UNROLL_M 2
951 #define CGEMM_DEFAULT_UNROLL_M 2
952 #define ZGEMM_DEFAULT_UNROLL_M 1
953 #define XGEMM_DEFAULT_UNROLL_M 1
955 #define SGEMM_DEFAULT_UNROLL_N 4
956 #define DGEMM_DEFAULT_UNROLL_N 4
957 #define QGEMM_DEFAULT_UNROLL_N 2
958 #define CGEMM_DEFAULT_UNROLL_N 2
959 #define ZGEMM_DEFAULT_UNROLL_N 2
960 #define XGEMM_DEFAULT_UNROLL_N 1
962 #define SGEMM_DEFAULT_UNROLL_M 8
963 #define DGEMM_DEFAULT_UNROLL_M 4
964 #define QGEMM_DEFAULT_UNROLL_M 2
965 #define CGEMM_DEFAULT_UNROLL_M 4
966 #define ZGEMM_DEFAULT_UNROLL_M 2
967 #define XGEMM_DEFAULT_UNROLL_M 1
969 #define SGEMM_DEFAULT_UNROLL_N 4
970 #define DGEMM_DEFAULT_UNROLL_N 4
971 #define QGEMM_DEFAULT_UNROLL_N 2
972 #define CGEMM_DEFAULT_UNROLL_N 2
973 #define ZGEMM_DEFAULT_UNROLL_N 2
974 #define XGEMM_DEFAULT_UNROLL_N 1
977 #define SGEMM_DEFAULT_P sgemm_p
978 #define SGEMM_DEFAULT_R sgemm_r
980 #define DGEMM_DEFAULT_P dgemm_p
981 #define DGEMM_DEFAULT_R dgemm_r
983 #define QGEMM_DEFAULT_P qgemm_p
984 #define QGEMM_DEFAULT_R qgemm_r
986 #define CGEMM_DEFAULT_P cgemm_p
987 #define CGEMM_DEFAULT_R cgemm_r
989 #define ZGEMM_DEFAULT_P zgemm_p
990 #define ZGEMM_DEFAULT_R zgemm_r
992 #define XGEMM_DEFAULT_P xgemm_p
993 #define XGEMM_DEFAULT_R xgemm_r
995 #define SGEMM_DEFAULT_Q 768
996 #define DGEMM_DEFAULT_Q 384
997 #define QGEMM_DEFAULT_Q 192
998 #define CGEMM_DEFAULT_Q 768
999 #define ZGEMM_DEFAULT_Q 384
1000 #define XGEMM_DEFAULT_Q 192
1002 #define GETRF_FACTOR 0.75
1003 #define GEMM_THREAD gemm_thread_mn
1011 #define GEMM_DEFAULT_OFFSET_A 32
1012 #define GEMM_DEFAULT_OFFSET_B 0
1013 #define GEMM_DEFAULT_ALIGN 0x03fffUL
1017 #define SWITCH_RATIO 4
1020 #define SGEMM_DEFAULT_UNROLL_M 4
1021 #define DGEMM_DEFAULT_UNROLL_M 2
1022 #define QGEMM_DEFAULT_UNROLL_M 2
1023 #define CGEMM_DEFAULT_UNROLL_M 2
1024 #define ZGEMM_DEFAULT_UNROLL_M 1
1025 #define XGEMM_DEFAULT_UNROLL_M 1
1027 #define SGEMM_DEFAULT_UNROLL_N 4
1028 #define DGEMM_DEFAULT_UNROLL_N 4
1029 #define QGEMM_DEFAULT_UNROLL_N 2
1030 #define CGEMM_DEFAULT_UNROLL_N 2
1031 #define ZGEMM_DEFAULT_UNROLL_N 2
1032 #define XGEMM_DEFAULT_UNROLL_N 1
1034 #define SGEMM_DEFAULT_UNROLL_M 4
1035 #define DGEMM_DEFAULT_UNROLL_M 2
1036 #define QGEMM_DEFAULT_UNROLL_M 2
1037 #define CGEMM_DEFAULT_UNROLL_M 2
1038 #define ZGEMM_DEFAULT_UNROLL_M 1
1039 #define XGEMM_DEFAULT_UNROLL_M 1
1041 #define SGEMM_DEFAULT_UNROLL_N 8
1042 #define DGEMM_DEFAULT_UNROLL_N 8
1043 #define QGEMM_DEFAULT_UNROLL_N 2
1044 #define CGEMM_DEFAULT_UNROLL_N 4
1045 #define ZGEMM_DEFAULT_UNROLL_N 4
1046 #define XGEMM_DEFAULT_UNROLL_N 1
1049 #define SGEMM_DEFAULT_P 504
1050 #define SGEMM_DEFAULT_R sgemm_r
1052 #define DGEMM_DEFAULT_P 504
1053 #define DGEMM_DEFAULT_R dgemm_r
1055 #define QGEMM_DEFAULT_P 504
1056 #define QGEMM_DEFAULT_R qgemm_r
1058 #define CGEMM_DEFAULT_P 252
1059 #define CGEMM_DEFAULT_R cgemm_r
1061 #define ZGEMM_DEFAULT_P 252
1062 #define ZGEMM_DEFAULT_R zgemm_r
1064 #define XGEMM_DEFAULT_P 252
1065 #define XGEMM_DEFAULT_R xgemm_r
1067 #define SGEMM_DEFAULT_Q 512
1068 #define DGEMM_DEFAULT_Q 256
1069 #define QGEMM_DEFAULT_Q 128
1070 #define CGEMM_DEFAULT_Q 512
1071 #define ZGEMM_DEFAULT_Q 256
1072 #define XGEMM_DEFAULT_Q 128
1074 #define GETRF_FACTOR 0.72
1076 #define CGEMM3M_DEFAULT_UNROLL_N 4
1077 #define CGEMM3M_DEFAULT_UNROLL_M 8
1078 #define ZGEMM3M_DEFAULT_UNROLL_N 2
1079 #define ZGEMM3M_DEFAULT_UNROLL_M 8
1088 #define GEMM_DEFAULT_OFFSET_A 0
1089 #define GEMM_DEFAULT_OFFSET_B 0
1090 #define GEMM_DEFAULT_ALIGN 0x03fffUL
1094 #define SWITCH_RATIO 4
1097 #define SGEMM_DEFAULT_UNROLL_M 4
1098 #define DGEMM_DEFAULT_UNROLL_M 2
1099 #define QGEMM_DEFAULT_UNROLL_M 2
1100 #define CGEMM_DEFAULT_UNROLL_M 2
1101 #define ZGEMM_DEFAULT_UNROLL_M 1
1102 #define XGEMM_DEFAULT_UNROLL_M 1
1104 #define SGEMM_DEFAULT_UNROLL_N 4
1105 #define DGEMM_DEFAULT_UNROLL_N 4
1106 #define QGEMM_DEFAULT_UNROLL_N 2
1107 #define CGEMM_DEFAULT_UNROLL_N 2
1108 #define ZGEMM_DEFAULT_UNROLL_N 2
1109 #define XGEMM_DEFAULT_UNROLL_N 1
1111 #define SGEMM_DEFAULT_UNROLL_M 16
1112 #define DGEMM_DEFAULT_UNROLL_M 8
1113 #define QGEMM_DEFAULT_UNROLL_M 2
1114 #define CGEMM_DEFAULT_UNROLL_M 8
1115 #define ZGEMM_DEFAULT_UNROLL_M 4
1116 #define XGEMM_DEFAULT_UNROLL_M 1
1118 #define SGEMM_DEFAULT_UNROLL_N 4
1119 #define DGEMM_DEFAULT_UNROLL_N 4
1120 #define QGEMM_DEFAULT_UNROLL_N 2
1121 #define CGEMM_DEFAULT_UNROLL_N 2
1122 #define ZGEMM_DEFAULT_UNROLL_N 4
1123 #define XGEMM_DEFAULT_UNROLL_N 1
1126 #define SGEMM_DEFAULT_P 768
1127 #define SGEMM_DEFAULT_R sgemm_r
1128 //#define SGEMM_DEFAULT_R 1024
1130 #define DGEMM_DEFAULT_P 512
1131 #define DGEMM_DEFAULT_R dgemm_r
1132 //#define DGEMM_DEFAULT_R 1024
1134 #define QGEMM_DEFAULT_P 504
1135 #define QGEMM_DEFAULT_R qgemm_r
1137 #define CGEMM_DEFAULT_P 384
1138 //#define CGEMM_DEFAULT_R cgemm_r
1139 #define CGEMM_DEFAULT_R 1024
1141 #define ZGEMM_DEFAULT_P 512
1142 #define ZGEMM_DEFAULT_R zgemm_r
1143 //#define ZGEMM_DEFAULT_R 1024
1145 #define XGEMM_DEFAULT_P 252
1146 #define XGEMM_DEFAULT_R xgemm_r
1148 #define SGEMM_DEFAULT_Q 384
1149 #define DGEMM_DEFAULT_Q 256
1150 #define QGEMM_DEFAULT_Q 128
1151 #define CGEMM_DEFAULT_Q 192
1152 #define ZGEMM_DEFAULT_Q 192
1153 #define XGEMM_DEFAULT_Q 128
1155 #define CGEMM3M_DEFAULT_UNROLL_N 4
1156 #define CGEMM3M_DEFAULT_UNROLL_M 8
1157 #define ZGEMM3M_DEFAULT_UNROLL_N 2
1158 #define ZGEMM3M_DEFAULT_UNROLL_M 8
1160 #define GETRF_FACTOR 0.72
1169 #define GEMM_DEFAULT_OFFSET_A 0
1170 #define GEMM_DEFAULT_OFFSET_B 0
1171 #define GEMM_DEFAULT_ALIGN 0x03fffUL
1175 #define SWITCH_RATIO 4
1179 #define SGEMM_DEFAULT_UNROLL_M 4
1180 #define DGEMM_DEFAULT_UNROLL_M 2
1181 #define QGEMM_DEFAULT_UNROLL_M 2
1182 #define CGEMM_DEFAULT_UNROLL_M 2
1183 #define ZGEMM_DEFAULT_UNROLL_M 1
1184 #define XGEMM_DEFAULT_UNROLL_M 1
1186 #define SGEMM_DEFAULT_UNROLL_N 4
1187 #define DGEMM_DEFAULT_UNROLL_N 4
1188 #define QGEMM_DEFAULT_UNROLL_N 2
1189 #define CGEMM_DEFAULT_UNROLL_N 2
1190 #define ZGEMM_DEFAULT_UNROLL_N 2
1191 #define XGEMM_DEFAULT_UNROLL_N 1
1195 #define SGEMM_DEFAULT_UNROLL_M 16
1196 #define DGEMM_DEFAULT_UNROLL_M 4
1197 #define QGEMM_DEFAULT_UNROLL_M 2
1198 #define CGEMM_DEFAULT_UNROLL_M 8
1199 #define ZGEMM_DEFAULT_UNROLL_M 4
1200 #define XGEMM_DEFAULT_UNROLL_M 1
1202 #define SGEMM_DEFAULT_UNROLL_N 4
1203 #define DGEMM_DEFAULT_UNROLL_N 4
1204 #define QGEMM_DEFAULT_UNROLL_N 2
1205 #define CGEMM_DEFAULT_UNROLL_N 2
1206 #define ZGEMM_DEFAULT_UNROLL_N 2
1207 #define XGEMM_DEFAULT_UNROLL_N 1
1213 #define SGEMM_DEFAULT_P 512
1214 #define SGEMM_DEFAULT_R sgemm_r
1215 #define DGEMM_DEFAULT_P 512
1216 #define DGEMM_DEFAULT_R dgemm_r
1217 #define QGEMM_DEFAULT_P 504
1218 #define QGEMM_DEFAULT_R qgemm_r
1219 #define CGEMM_DEFAULT_P 128
1220 #define CGEMM_DEFAULT_R 1024
1221 #define ZGEMM_DEFAULT_P 512
1222 #define ZGEMM_DEFAULT_R zgemm_r
1223 #define XGEMM_DEFAULT_P 252
1224 #define XGEMM_DEFAULT_R xgemm_r
1225 #define SGEMM_DEFAULT_Q 256
1226 #define DGEMM_DEFAULT_Q 256
1227 #define QGEMM_DEFAULT_Q 128
1228 #define CGEMM_DEFAULT_Q 256
1229 #define ZGEMM_DEFAULT_Q 192
1230 #define XGEMM_DEFAULT_Q 128
1234 #define SGEMM_DEFAULT_P 768
1235 #define DGEMM_DEFAULT_P 512
1236 #define CGEMM_DEFAULT_P 384
1237 #define ZGEMM_DEFAULT_P 256
1239 #define SGEMM_DEFAULT_Q 384
1241 #define DGEMM_DEFAULT_Q 128
1243 #define DGEMM_DEFAULT_Q 256
1245 #define CGEMM_DEFAULT_Q 192
1246 #define ZGEMM_DEFAULT_Q 128
1248 #define SGEMM_DEFAULT_R sgemm_r
1249 #define DGEMM_DEFAULT_R 13824
1250 #define CGEMM_DEFAULT_R cgemm_r
1251 #define ZGEMM_DEFAULT_R zgemm_r
1253 #define QGEMM_DEFAULT_Q 128
1254 #define QGEMM_DEFAULT_P 504
1255 #define QGEMM_DEFAULT_R qgemm_r
1256 #define XGEMM_DEFAULT_P 252
1257 #define XGEMM_DEFAULT_R xgemm_r
1258 #define XGEMM_DEFAULT_Q 128
1260 #define CGEMM3M_DEFAULT_UNROLL_N 4
1261 #define CGEMM3M_DEFAULT_UNROLL_M 8
1262 #define ZGEMM3M_DEFAULT_UNROLL_N 2
1263 #define ZGEMM3M_DEFAULT_UNROLL_M 8
1276 #define GEMM_DEFAULT_OFFSET_A 64
1277 #define GEMM_DEFAULT_OFFSET_B 0
1278 #define GEMM_DEFAULT_ALIGN 0x0ffffUL
1283 #define SGEMM_DEFAULT_UNROLL_M 4
1284 #define DGEMM_DEFAULT_UNROLL_M 2
1285 #define QGEMM_DEFAULT_UNROLL_M 2
1286 #define CGEMM_DEFAULT_UNROLL_M 2
1287 #define ZGEMM_DEFAULT_UNROLL_M 1
1288 #define XGEMM_DEFAULT_UNROLL_M 1
1290 #define SGEMM_DEFAULT_UNROLL_M 8
1291 #define DGEMM_DEFAULT_UNROLL_M 4
1292 #define QGEMM_DEFAULT_UNROLL_M 2
1293 #define CGEMM_DEFAULT_UNROLL_M 4
1294 #define ZGEMM_DEFAULT_UNROLL_M 2
1295 #define XGEMM_DEFAULT_UNROLL_M 1
1298 #define SGEMM_DEFAULT_UNROLL_N 4
1299 #define DGEMM_DEFAULT_UNROLL_N 2
1300 #define QGEMM_DEFAULT_UNROLL_N 2
1301 #define CGEMM_DEFAULT_UNROLL_N 2
1302 #define ZGEMM_DEFAULT_UNROLL_N 1
1303 #define XGEMM_DEFAULT_UNROLL_N 1
1305 #define SGEMM_DEFAULT_P sgemm_p
1306 #define SGEMM_DEFAULT_R sgemm_r
1308 #define DGEMM_DEFAULT_P dgemm_p
1309 #define DGEMM_DEFAULT_R dgemm_r
1311 #define QGEMM_DEFAULT_P qgemm_p
1312 #define QGEMM_DEFAULT_R qgemm_r
1314 #define CGEMM_DEFAULT_P cgemm_p
1315 #define CGEMM_DEFAULT_R cgemm_r
1317 #define ZGEMM_DEFAULT_P zgemm_p
1318 #define ZGEMM_DEFAULT_R zgemm_r
1320 #define XGEMM_DEFAULT_P xgemm_p
1321 #define XGEMM_DEFAULT_R xgemm_r
1323 #define SGEMM_DEFAULT_Q 256
1324 #define DGEMM_DEFAULT_Q 256
1325 #define QGEMM_DEFAULT_Q 256
1326 #define CGEMM_DEFAULT_Q 256
1327 #define ZGEMM_DEFAULT_Q 256
1328 #define XGEMM_DEFAULT_Q 256
1338 #define GEMM_DEFAULT_OFFSET_A 0
1339 #define GEMM_DEFAULT_OFFSET_B 128
1340 #define GEMM_DEFAULT_ALIGN 0x03fffUL
1342 #define SGEMM_DEFAULT_UNROLL_M 8
1343 #define SGEMM_DEFAULT_UNROLL_N 8
1344 #define DGEMM_DEFAULT_UNROLL_M 8
1345 #define DGEMM_DEFAULT_UNROLL_N 8
1346 #define QGEMM_DEFAULT_UNROLL_M 8
1347 #define QGEMM_DEFAULT_UNROLL_N 8
1348 #define CGEMM_DEFAULT_UNROLL_M 4
1349 #define CGEMM_DEFAULT_UNROLL_N 4
1350 #define ZGEMM_DEFAULT_UNROLL_M 4
1351 #define ZGEMM_DEFAULT_UNROLL_N 4
1352 #define XGEMM_DEFAULT_UNROLL_M 4
1353 #define XGEMM_DEFAULT_UNROLL_N 4
1355 #define SGEMM_DEFAULT_P sgemm_p
1356 #define DGEMM_DEFAULT_P dgemm_p
1357 #define QGEMM_DEFAULT_P qgemm_p
1358 #define CGEMM_DEFAULT_P cgemm_p
1359 #define ZGEMM_DEFAULT_P zgemm_p
1360 #define XGEMM_DEFAULT_P xgemm_p
1362 #define SGEMM_DEFAULT_Q 1024
1363 #define DGEMM_DEFAULT_Q 1024
1364 #define QGEMM_DEFAULT_Q 1024
1365 #define CGEMM_DEFAULT_Q 1024
1366 #define ZGEMM_DEFAULT_Q 1024
1367 #define XGEMM_DEFAULT_Q 1024
1369 #define SGEMM_DEFAULT_R sgemm_r
1370 #define DGEMM_DEFAULT_R dgemm_r
1371 #define QGEMM_DEFAULT_R qgemm_r
1372 #define CGEMM_DEFAULT_R cgemm_r
1373 #define ZGEMM_DEFAULT_R zgemm_r
1374 #define XGEMM_DEFAULT_R xgemm_r
1378 #define GETRF_FACTOR 0.65
1382 #if defined(EV4) || defined(EV5) || defined(EV6)
1392 #define GEMM_DEFAULT_OFFSET_A 512
1393 #define GEMM_DEFAULT_OFFSET_B 512
1394 #define GEMM_DEFAULT_ALIGN 0x0ffffUL
1396 #define SGEMM_DEFAULT_UNROLL_M 4
1397 #define SGEMM_DEFAULT_UNROLL_N 4
1398 #define DGEMM_DEFAULT_UNROLL_M 4
1399 #define DGEMM_DEFAULT_UNROLL_N 4
1400 #define CGEMM_DEFAULT_UNROLL_M 2
1401 #define CGEMM_DEFAULT_UNROLL_N 2
1402 #define ZGEMM_DEFAULT_UNROLL_M 2
1403 #define ZGEMM_DEFAULT_UNROLL_N 2
1408 #define SGEMM_DEFAULT_P 32
1409 #define SGEMM_DEFAULT_Q 112
1410 #define SGEMM_DEFAULT_R 256
1412 #define DGEMM_DEFAULT_P 32
1413 #define DGEMM_DEFAULT_Q 56
1414 #define DGEMM_DEFAULT_R 256
1416 #define CGEMM_DEFAULT_P 32
1417 #define CGEMM_DEFAULT_Q 64
1418 #define CGEMM_DEFAULT_R 240
1420 #define ZGEMM_DEFAULT_P 32
1421 #define ZGEMM_DEFAULT_Q 32
1422 #define ZGEMM_DEFAULT_R 240
1426 #define SGEMM_DEFAULT_P 64
1427 #define SGEMM_DEFAULT_Q 256
1429 #define DGEMM_DEFAULT_P 64
1430 #define DGEMM_DEFAULT_Q 128
1432 #define CGEMM_DEFAULT_P 64
1433 #define CGEMM_DEFAULT_Q 128
1435 #define ZGEMM_DEFAULT_P 64
1436 #define ZGEMM_DEFAULT_Q 64
1440 #define SGEMM_DEFAULT_P 256
1441 #define SGEMM_DEFAULT_Q 512
1443 #define DGEMM_DEFAULT_P 256
1444 #define DGEMM_DEFAULT_Q 256
1446 #define CGEMM_DEFAULT_P 256
1447 #define CGEMM_DEFAULT_Q 256
1449 #define ZGEMM_DEFAULT_P 128
1450 #define ZGEMM_DEFAULT_Q 256
1460 #define GEMM_DEFAULT_OFFSET_A 0
1461 #define GEMM_DEFAULT_OFFSET_B 8192
1462 #define GEMM_DEFAULT_ALIGN 0x0ffffUL
1464 #define SGEMM_DEFAULT_UNROLL_M 16
1465 #define SGEMM_DEFAULT_UNROLL_N 4
1466 #define DGEMM_DEFAULT_UNROLL_M 4
1467 #define DGEMM_DEFAULT_UNROLL_N 4
1468 #define CGEMM_DEFAULT_UNROLL_M 8
1469 #define CGEMM_DEFAULT_UNROLL_N 2
1470 #define ZGEMM_DEFAULT_UNROLL_M 2
1471 #define ZGEMM_DEFAULT_UNROLL_N 2
1473 #define SGEMM_DEFAULT_P 128
1474 #define DGEMM_DEFAULT_P 128
1475 #define CGEMM_DEFAULT_P 128
1476 #define ZGEMM_DEFAULT_P 128
1478 #define SGEMM_DEFAULT_Q 512
1479 #define DGEMM_DEFAULT_Q 256
1480 #define CGEMM_DEFAULT_Q 256
1481 #define ZGEMM_DEFAULT_Q 128
1487 #define GEMM_DEFAULT_OFFSET_A 0
1488 #define GEMM_DEFAULT_OFFSET_B 1024
1489 #define GEMM_DEFAULT_ALIGN 0x0ffffUL
1491 #define SGEMM_DEFAULT_UNROLL_M 16
1492 #define SGEMM_DEFAULT_UNROLL_N 4
1493 #define DGEMM_DEFAULT_UNROLL_M 4
1494 #define DGEMM_DEFAULT_UNROLL_N 4
1495 #define CGEMM_DEFAULT_UNROLL_M 8
1496 #define CGEMM_DEFAULT_UNROLL_N 2
1497 #define ZGEMM_DEFAULT_UNROLL_M 2
1498 #define ZGEMM_DEFAULT_UNROLL_N 2
1500 #define SGEMM_DEFAULT_P 256
1501 #define DGEMM_DEFAULT_P 128
1502 #define CGEMM_DEFAULT_P 128
1503 #define ZGEMM_DEFAULT_P 64
1505 #define SGEMM_DEFAULT_Q 256
1506 #define DGEMM_DEFAULT_Q 256
1507 #define CGEMM_DEFAULT_Q 256
1508 #define ZGEMM_DEFAULT_Q 256
1518 #define GEMM_DEFAULT_OFFSET_A 2688
1519 #define GEMM_DEFAULT_OFFSET_B 3072
1520 #define GEMM_DEFAULT_ALIGN 0x03fffUL
1522 #define SGEMM_DEFAULT_UNROLL_M 16
1523 #define SGEMM_DEFAULT_UNROLL_N 4
1524 #define DGEMM_DEFAULT_UNROLL_M 4
1525 #define DGEMM_DEFAULT_UNROLL_N 4
1526 #define CGEMM_DEFAULT_UNROLL_M 8
1527 #define CGEMM_DEFAULT_UNROLL_N 2
1528 #define ZGEMM_DEFAULT_UNROLL_M 2
1529 #define ZGEMM_DEFAULT_UNROLL_N 2
1532 #if L2_SIZE == 1024976
1533 #define SGEMM_DEFAULT_P 320
1534 #define DGEMM_DEFAULT_P 256
1535 #define CGEMM_DEFAULT_P 256
1536 #define ZGEMM_DEFAULT_P 256
1538 #define SGEMM_DEFAULT_P 176
1539 #define DGEMM_DEFAULT_P 176
1540 #define CGEMM_DEFAULT_P 176
1541 #define ZGEMM_DEFAULT_P 176
1545 #define SGEMM_DEFAULT_Q 512
1546 #define DGEMM_DEFAULT_Q 256
1547 #define CGEMM_DEFAULT_Q 256
1548 #define ZGEMM_DEFAULT_Q 128
1559 #define GEMM_DEFAULT_OFFSET_A (32 * 0)
1560 #define GEMM_DEFAULT_OFFSET_B (32 * 0)
1561 #define GEMM_DEFAULT_ALIGN 0x0ffffUL
1563 #define SGEMM_DEFAULT_UNROLL_M 4
1564 #define SGEMM_DEFAULT_UNROLL_N 4
1565 #define DGEMM_DEFAULT_UNROLL_M 4
1566 #define DGEMM_DEFAULT_UNROLL_N 4
1567 #define CGEMM_DEFAULT_UNROLL_M 2
1568 #define CGEMM_DEFAULT_UNROLL_N 2
1569 #define ZGEMM_DEFAULT_UNROLL_M 2
1570 #define ZGEMM_DEFAULT_UNROLL_N 2
1572 #define SGEMM_DEFAULT_P 512
1573 #define DGEMM_DEFAULT_P 512
1574 #define CGEMM_DEFAULT_P 512
1575 #define ZGEMM_DEFAULT_P 512
1577 #define SGEMM_DEFAULT_Q 1024
1578 #define DGEMM_DEFAULT_Q 512
1579 #define CGEMM_DEFAULT_Q 512
1580 #define ZGEMM_DEFAULT_Q 256
1582 #define SGEMM_DEFAULT_R SGEMM_DEFAULT_P
1583 #define DGEMM_DEFAULT_R DGEMM_DEFAULT_P
1584 #define CGEMM_DEFAULT_R CGEMM_DEFAULT_P
1585 #define ZGEMM_DEFAULT_R ZGEMM_DEFAULT_P
1595 #define GEMM_DEFAULT_OFFSET_A (32 * 0)
1596 #define GEMM_DEFAULT_OFFSET_B (32 * 0)
1597 #define GEMM_DEFAULT_ALIGN 0x0ffffUL
1599 #define SGEMM_DEFAULT_UNROLL_M 8
1600 #define SGEMM_DEFAULT_UNROLL_N 4
1601 #define DGEMM_DEFAULT_UNROLL_M 8
1602 #define DGEMM_DEFAULT_UNROLL_N 4
1603 #define CGEMM_DEFAULT_UNROLL_M 4
1604 #define CGEMM_DEFAULT_UNROLL_N 2
1605 #define ZGEMM_DEFAULT_UNROLL_M 4
1606 #define ZGEMM_DEFAULT_UNROLL_N 2
1608 #define SGEMM_DEFAULT_P 128
1609 #define DGEMM_DEFAULT_P 128
1610 #define CGEMM_DEFAULT_P 128
1611 #define ZGEMM_DEFAULT_P 128
1613 #define SGEMM_DEFAULT_Q 4096
1614 #define DGEMM_DEFAULT_Q 3072
1615 #define CGEMM_DEFAULT_Q 2048
1616 #define ZGEMM_DEFAULT_Q 1024
1618 #define SGEMM_DEFAULT_Q 512
1619 #define DGEMM_DEFAULT_Q 256
1620 #define CGEMM_DEFAULT_Q 256
1621 #define ZGEMM_DEFAULT_Q 128
1629 #if defined(POWER3) || defined(POWER4) || defined(POWER5)
1630 #define GEMM_DEFAULT_OFFSET_A 0
1631 #define GEMM_DEFAULT_OFFSET_B 2048
1632 #define GEMM_DEFAULT_ALIGN 0x0ffffUL
1634 #define SGEMM_DEFAULT_UNROLL_M 4
1635 #define SGEMM_DEFAULT_UNROLL_N 4
1636 #define DGEMM_DEFAULT_UNROLL_M 4
1637 #define DGEMM_DEFAULT_UNROLL_N 4
1638 #define CGEMM_DEFAULT_UNROLL_M 2
1639 #define CGEMM_DEFAULT_UNROLL_N 2
1640 #define ZGEMM_DEFAULT_UNROLL_M 2
1641 #define ZGEMM_DEFAULT_UNROLL_N 2
1648 #define SGEMM_DEFAULT_P 256
1649 #define SGEMM_DEFAULT_Q 432
1650 #define SGEMM_DEFAULT_R 1012
1652 #define DGEMM_DEFAULT_P 256
1653 #define DGEMM_DEFAULT_Q 216
1654 #define DGEMM_DEFAULT_R 1012
1656 #define ZGEMM_DEFAULT_P 256
1657 #define ZGEMM_DEFAULT_Q 104
1658 #define ZGEMM_DEFAULT_R 1012
1662 #ifdef ALLOC_HUGETLB
1663 #define SGEMM_DEFAULT_P 184
1664 #define DGEMM_DEFAULT_P 184
1665 #define CGEMM_DEFAULT_P 184
1666 #define ZGEMM_DEFAULT_P 184
1668 #define SGEMM_DEFAULT_P 144
1669 #define DGEMM_DEFAULT_P 144
1670 #define CGEMM_DEFAULT_P 144
1671 #define ZGEMM_DEFAULT_P 144
1676 #ifdef ALLOC_HUGETLB
1677 #define SGEMM_DEFAULT_P 512
1678 #define DGEMM_DEFAULT_P 256
1679 #define CGEMM_DEFAULT_P 256
1680 #define ZGEMM_DEFAULT_P 128
1682 #define SGEMM_DEFAULT_P 320
1683 #define DGEMM_DEFAULT_P 160
1684 #define CGEMM_DEFAULT_P 160
1685 #define ZGEMM_DEFAULT_P 80
1688 #define SGEMM_DEFAULT_Q 256
1689 #define CGEMM_DEFAULT_Q 256
1690 #define DGEMM_DEFAULT_Q 256
1691 #define ZGEMM_DEFAULT_Q 256
1703 #define GEMM_DEFAULT_OFFSET_A 384
1704 #define GEMM_DEFAULT_OFFSET_B 1024
1705 #define GEMM_DEFAULT_ALIGN 0x03fffUL
1707 #define SGEMM_DEFAULT_UNROLL_M 4
1708 #define SGEMM_DEFAULT_UNROLL_N 4
1709 #define DGEMM_DEFAULT_UNROLL_M 4
1710 #define DGEMM_DEFAULT_UNROLL_N 4
1711 #define CGEMM_DEFAULT_UNROLL_M 2
1712 #define CGEMM_DEFAULT_UNROLL_N 4
1713 #define ZGEMM_DEFAULT_UNROLL_M 2
1714 #define ZGEMM_DEFAULT_UNROLL_N 4
1716 #define SGEMM_DEFAULT_P 992
1717 #define DGEMM_DEFAULT_P 480
1718 #define CGEMM_DEFAULT_P 488
1719 #define ZGEMM_DEFAULT_P 248
1721 #define SGEMM_DEFAULT_Q 504
1722 #define DGEMM_DEFAULT_Q 504
1723 #define CGEMM_DEFAULT_Q 400
1724 #define ZGEMM_DEFAULT_Q 400
1730 #if defined(SPARC) && defined(V7)
1735 #define GEMM_DEFAULT_OFFSET_A 0
1736 #define GEMM_DEFAULT_OFFSET_B 2048
1737 #define GEMM_DEFAULT_ALIGN 0x03fffUL
1739 #define SGEMM_DEFAULT_UNROLL_M 2
1740 #define SGEMM_DEFAULT_UNROLL_N 8
1741 #define DGEMM_DEFAULT_UNROLL_M 2
1742 #define DGEMM_DEFAULT_UNROLL_N 8
1743 #define CGEMM_DEFAULT_UNROLL_M 1
1744 #define CGEMM_DEFAULT_UNROLL_N 4
1745 #define ZGEMM_DEFAULT_UNROLL_M 1
1746 #define ZGEMM_DEFAULT_UNROLL_N 4
1748 #define SGEMM_DEFAULT_P 256
1749 #define DGEMM_DEFAULT_P 256
1750 #define CGEMM_DEFAULT_P 256
1751 #define ZGEMM_DEFAULT_P 256
1753 #define SGEMM_DEFAULT_Q 512
1754 #define DGEMM_DEFAULT_Q 256
1755 #define CGEMM_DEFAULT_Q 256
1756 #define ZGEMM_DEFAULT_Q 128
1759 #define GEMM_THREAD gemm_thread_mn
1762 #if (defined(SPARC) && defined(V9)) || defined(__sparc_v9__)
1767 #define GEMM_DEFAULT_OFFSET_A 0
1768 #define GEMM_DEFAULT_OFFSET_B 2048
1769 #define GEMM_DEFAULT_ALIGN 0x03fffUL
1771 #define SGEMM_DEFAULT_UNROLL_M 4
1772 #define SGEMM_DEFAULT_UNROLL_N 4
1773 #define DGEMM_DEFAULT_UNROLL_M 4
1774 #define DGEMM_DEFAULT_UNROLL_N 4
1775 #define CGEMM_DEFAULT_UNROLL_M 2
1776 #define CGEMM_DEFAULT_UNROLL_N 2
1777 #define ZGEMM_DEFAULT_UNROLL_M 2
1778 #define ZGEMM_DEFAULT_UNROLL_N 2
1780 #define SGEMM_DEFAULT_P 512
1781 #define DGEMM_DEFAULT_P 512
1782 #define CGEMM_DEFAULT_P 512
1783 #define ZGEMM_DEFAULT_P 512
1785 #define SGEMM_DEFAULT_Q 1024
1786 #define DGEMM_DEFAULT_Q 512
1787 #define CGEMM_DEFAULT_Q 512
1788 #define ZGEMM_DEFAULT_Q 256
1798 #define GEMM_DEFAULT_OFFSET_A 0
1799 #define GEMM_DEFAULT_OFFSET_B 0
1800 #define GEMM_DEFAULT_ALIGN 0x03fffUL
1802 #define SGEMM_DEFAULT_UNROLL_M 2
1803 #define SGEMM_DEFAULT_UNROLL_N 8
1804 #define DGEMM_DEFAULT_UNROLL_M 2
1805 #define DGEMM_DEFAULT_UNROLL_N 8
1806 #define CGEMM_DEFAULT_UNROLL_M 1
1807 #define CGEMM_DEFAULT_UNROLL_N 4
1808 #define ZGEMM_DEFAULT_UNROLL_M 1
1809 #define ZGEMM_DEFAULT_UNROLL_N 4
1811 #define SGEMM_DEFAULT_P 108
1812 #define DGEMM_DEFAULT_P 112
1813 #define CGEMM_DEFAULT_P 108
1814 #define ZGEMM_DEFAULT_P 112
1816 #define SGEMM_DEFAULT_Q 288
1817 #define DGEMM_DEFAULT_Q 144
1818 #define CGEMM_DEFAULT_Q 144
1819 #define ZGEMM_DEFAULT_Q 72
1821 #define SGEMM_DEFAULT_R 2000
1822 #define DGEMM_DEFAULT_R 2000
1823 #define CGEMM_DEFAULT_R 2000
1824 #define ZGEMM_DEFAULT_R 2000
1830 ////Copy from SICORTEX
1834 #define GEMM_DEFAULT_OFFSET_A 0
1835 #define GEMM_DEFAULT_OFFSET_B 0
1836 #define GEMM_DEFAULT_ALIGN 0x03fffUL
1838 #define SGEMM_DEFAULT_UNROLL_M 8
1839 #define SGEMM_DEFAULT_UNROLL_N 4
1841 #define DGEMM_DEFAULT_UNROLL_M 4
1842 #define DGEMM_DEFAULT_UNROLL_N 4
1844 #define CGEMM_DEFAULT_UNROLL_M 4
1845 #define CGEMM_DEFAULT_UNROLL_N 2
1847 #define ZGEMM_DEFAULT_UNROLL_M 2
1848 #define ZGEMM_DEFAULT_UNROLL_N 2
1850 #define SGEMM_DEFAULT_P 64
1851 #define DGEMM_DEFAULT_P 44
1852 #define CGEMM_DEFAULT_P 64
1853 #define ZGEMM_DEFAULT_P 32
1855 #define SGEMM_DEFAULT_Q 192
1856 #define DGEMM_DEFAULT_Q 92
1857 #define CGEMM_DEFAULT_Q 128
1858 #define ZGEMM_DEFAULT_Q 80
1860 #define SGEMM_DEFAULT_R 640
1861 #define DGEMM_DEFAULT_R dgemm_r
1862 #define CGEMM_DEFAULT_R 640
1863 #define ZGEMM_DEFAULT_R 640
1865 #define GEMM_OFFSET_A1 0x10000
1866 #define GEMM_OFFSET_B1 0x100000
1875 #define GEMM_DEFAULT_OFFSET_A 0
1876 #define GEMM_DEFAULT_OFFSET_B 0
1877 #define GEMM_DEFAULT_ALIGN 0x03fffUL
1879 #define SGEMM_DEFAULT_UNROLL_M 2
1880 #define SGEMM_DEFAULT_UNROLL_N 2
1882 #define DGEMM_DEFAULT_UNROLL_M 2
1883 #define DGEMM_DEFAULT_UNROLL_N 2
1885 #define CGEMM_DEFAULT_UNROLL_M 2
1886 #define CGEMM_DEFAULT_UNROLL_N 2
1888 #define ZGEMM_DEFAULT_UNROLL_M 2
1889 #define ZGEMM_DEFAULT_UNROLL_N 2
1891 #define SGEMM_DEFAULT_P 64
1892 #define DGEMM_DEFAULT_P 24
1893 #define CGEMM_DEFAULT_P 24
1894 #define ZGEMM_DEFAULT_P 20
1896 #define SGEMM_DEFAULT_Q 192
1897 #define DGEMM_DEFAULT_Q 128
1898 #define CGEMM_DEFAULT_Q 128
1899 #define ZGEMM_DEFAULT_Q 64
1901 #define SGEMM_DEFAULT_R 512
1902 #define DGEMM_DEFAULT_R 512
1903 #define CGEMM_DEFAULT_R 512
1904 #define ZGEMM_DEFAULT_R 512
1906 #define GEMM_OFFSET_A1 0x10000
1907 #define GEMM_OFFSET_B1 0x100000
1917 #define GEMM_DEFAULT_OFFSET_A 0
1918 #define GEMM_DEFAULT_OFFSET_B 0
1919 #define GEMM_DEFAULT_ALIGN 0x03fffUL
1921 #define SGEMM_DEFAULT_UNROLL_M 4
1922 #define SGEMM_DEFAULT_UNROLL_N 4
1924 #define DGEMM_DEFAULT_UNROLL_M 4
1925 #define DGEMM_DEFAULT_UNROLL_N 4
1927 #define CGEMM_DEFAULT_UNROLL_M 2
1928 #define CGEMM_DEFAULT_UNROLL_N 2
1930 #define ZGEMM_DEFAULT_UNROLL_M 2
1931 #define ZGEMM_DEFAULT_UNROLL_N 2
1933 #define SGEMM_DEFAULT_P 128
1934 #define DGEMM_DEFAULT_P 128
1935 #define CGEMM_DEFAULT_P 96
1936 #define ZGEMM_DEFAULT_P 64
1938 #define SGEMM_DEFAULT_Q 240
1939 #define DGEMM_DEFAULT_Q 120
1940 #define CGEMM_DEFAULT_Q 120
1941 #define ZGEMM_DEFAULT_Q 120
1943 #define SGEMM_DEFAULT_R 12288
1944 #define DGEMM_DEFAULT_R 8192
1945 #define CGEMM_DEFAULT_R 4096
1946 #define ZGEMM_DEFAULT_R 4096
1958 #define GEMM_DEFAULT_OFFSET_A 0
1959 #define GEMM_DEFAULT_OFFSET_B 0
1960 #define GEMM_DEFAULT_ALIGN 0x03fffUL
1962 #define SGEMM_DEFAULT_UNROLL_M 4
1963 #define SGEMM_DEFAULT_UNROLL_N 2
1965 #define DGEMM_DEFAULT_UNROLL_M 4
1966 #define DGEMM_DEFAULT_UNROLL_N 2
1968 #define CGEMM_DEFAULT_UNROLL_M 2
1969 #define CGEMM_DEFAULT_UNROLL_N 2
1971 #define ZGEMM_DEFAULT_UNROLL_M 2
1972 #define ZGEMM_DEFAULT_UNROLL_N 2
1974 #define SGEMM_DEFAULT_P 128
1975 #define DGEMM_DEFAULT_P 128
1976 #define CGEMM_DEFAULT_P 96
1977 #define ZGEMM_DEFAULT_P 64
1979 #define SGEMM_DEFAULT_Q 240
1980 #define DGEMM_DEFAULT_Q 120
1981 #define CGEMM_DEFAULT_Q 120
1982 #define ZGEMM_DEFAULT_Q 120
1984 #define SGEMM_DEFAULT_R 12288
1985 #define DGEMM_DEFAULT_R 8192
1986 #define CGEMM_DEFAULT_R 4096
1987 #define ZGEMM_DEFAULT_R 4096
1997 #define GEMM_DEFAULT_OFFSET_A 0
1998 #define GEMM_DEFAULT_OFFSET_B 0
1999 #define GEMM_DEFAULT_ALIGN 0x03fffUL
2001 #define SGEMM_DEFAULT_UNROLL_M 2
2002 #define SGEMM_DEFAULT_UNROLL_N 2
2004 #define DGEMM_DEFAULT_UNROLL_M 2
2005 #define DGEMM_DEFAULT_UNROLL_N 2
2007 #define CGEMM_DEFAULT_UNROLL_M 2
2008 #define CGEMM_DEFAULT_UNROLL_N 2
2010 #define ZGEMM_DEFAULT_UNROLL_M 2
2011 #define ZGEMM_DEFAULT_UNROLL_N 2
2013 #define SGEMM_DEFAULT_P 128
2014 #define DGEMM_DEFAULT_P 128
2015 #define CGEMM_DEFAULT_P 96
2016 #define ZGEMM_DEFAULT_P 64
2018 #define SGEMM_DEFAULT_Q 240
2019 #define DGEMM_DEFAULT_Q 120
2020 #define CGEMM_DEFAULT_Q 120
2021 #define ZGEMM_DEFAULT_Q 120
2023 #define SGEMM_DEFAULT_R 12288
2024 #define DGEMM_DEFAULT_R 8192
2025 #define CGEMM_DEFAULT_R 4096
2026 #define ZGEMM_DEFAULT_R 4096
2037 #define GEMM_DEFAULT_OFFSET_A 0
2038 #define GEMM_DEFAULT_OFFSET_B 0
2039 #define GEMM_DEFAULT_ALIGN 0x03fffUL
2041 #define SGEMM_DEFAULT_UNROLL_M 2
2042 #define SGEMM_DEFAULT_UNROLL_N 2
2044 #define DGEMM_DEFAULT_UNROLL_M 2
2045 #define DGEMM_DEFAULT_UNROLL_N 2
2047 #define CGEMM_DEFAULT_UNROLL_M 2
2048 #define CGEMM_DEFAULT_UNROLL_N 2
2050 #define ZGEMM_DEFAULT_UNROLL_M 2
2051 #define ZGEMM_DEFAULT_UNROLL_N 2
2053 #define SGEMM_DEFAULT_P 128
2054 #define DGEMM_DEFAULT_P 128
2055 #define CGEMM_DEFAULT_P 96
2056 #define ZGEMM_DEFAULT_P 64
2058 #define SGEMM_DEFAULT_Q 240
2059 #define DGEMM_DEFAULT_Q 120
2060 #define CGEMM_DEFAULT_Q 120
2061 #define ZGEMM_DEFAULT_Q 120
2063 #define SGEMM_DEFAULT_R 12288
2064 #define DGEMM_DEFAULT_R 8192
2065 #define CGEMM_DEFAULT_R 4096
2066 #define ZGEMM_DEFAULT_R 4096
2080 #define GEMM_DEFAULT_OFFSET_A 0
2081 #define GEMM_DEFAULT_OFFSET_B 0
2082 #define GEMM_DEFAULT_ALIGN 0x0ffffUL
2084 #define SGEMM_DEFAULT_UNROLL_N 4
2085 #define DGEMM_DEFAULT_UNROLL_N 4
2086 #define QGEMM_DEFAULT_UNROLL_N 2
2087 #define CGEMM_DEFAULT_UNROLL_N 2
2088 #define ZGEMM_DEFAULT_UNROLL_N 2
2089 #define XGEMM_DEFAULT_UNROLL_N 1
2092 #define SGEMM_DEFAULT_UNROLL_M 4
2093 #define DGEMM_DEFAULT_UNROLL_M 2
2094 #define QGEMM_DEFAULT_UNROLL_M 2
2095 #define CGEMM_DEFAULT_UNROLL_M 2
2096 #define ZGEMM_DEFAULT_UNROLL_M 1
2097 #define XGEMM_DEFAULT_UNROLL_M 1
2099 #define SGEMM_DEFAULT_UNROLL_M 8
2100 #define DGEMM_DEFAULT_UNROLL_M 4
2101 #define QGEMM_DEFAULT_UNROLL_M 2
2102 #define CGEMM_DEFAULT_UNROLL_M 4
2103 #define ZGEMM_DEFAULT_UNROLL_M 2
2104 #define XGEMM_DEFAULT_UNROLL_M 1
2107 #define SGEMM_DEFAULT_P sgemm_p
2108 #define DGEMM_DEFAULT_P dgemm_p
2109 #define QGEMM_DEFAULT_P qgemm_p
2110 #define CGEMM_DEFAULT_P cgemm_p
2111 #define ZGEMM_DEFAULT_P zgemm_p
2112 #define XGEMM_DEFAULT_P xgemm_p
2114 #define SGEMM_DEFAULT_R sgemm_r
2115 #define DGEMM_DEFAULT_R dgemm_r
2116 #define QGEMM_DEFAULT_R qgemm_r
2117 #define CGEMM_DEFAULT_R cgemm_r
2118 #define ZGEMM_DEFAULT_R zgemm_r
2119 #define XGEMM_DEFAULT_R xgemm_r
2121 #define SGEMM_DEFAULT_Q 128
2122 #define DGEMM_DEFAULT_Q 128
2123 #define QGEMM_DEFAULT_Q 128
2124 #define CGEMM_DEFAULT_Q 128
2125 #define ZGEMM_DEFAULT_Q 128
2126 #define XGEMM_DEFAULT_Q 128
2132 #ifndef QGEMM_DEFAULT_UNROLL_M
2133 #define QGEMM_DEFAULT_UNROLL_M 2
2136 #ifndef QGEMM_DEFAULT_UNROLL_N
2137 #define QGEMM_DEFAULT_UNROLL_N 2
2140 #ifndef XGEMM_DEFAULT_UNROLL_M
2141 #define XGEMM_DEFAULT_UNROLL_M 2
2144 #ifndef XGEMM_DEFAULT_UNROLL_N
2145 #define XGEMM_DEFAULT_UNROLL_N 2
2149 #define SHUFPD_0 shufps $0x44,
2150 #define SHUFPD_1 shufps $0x4e,
2151 #define SHUFPD_2 shufps $0xe4,
2152 #define SHUFPD_3 shufps $0xee,
2156 #define SHUFPD_0 shufpd $0,
2160 #define SHUFPD_1 shufpd $1,
2164 #define SHUFPD_2 shufpd $2,
2168 #define SHUFPD_3 shufpd $3,
2172 #define SHUFPS_39 shufps $0x39,