1 /*****************************************************************************
2 Copyright (c) 2011-2014, The OpenBLAS Project
5 Redistribution and use in source and binary forms, with or without
6 modification, are permitted provided that the following conditions are
9 1. Redistributions of source code must retain the above copyright
10 notice, this list of conditions and the following disclaimer.
12 2. Redistributions in binary form must reproduce the above copyright
13 notice, this list of conditions and the following disclaimer in
14 the documentation and/or other materials provided with the
16 3. Neither the name of the OpenBLAS project nor the names of
17 its contributors may be used to endorse or promote products
18 derived from this software without specific prior written
21 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
25 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
27 SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28 CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
29 OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
30 USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 **********************************************************************************/
34 /*********************************************************************/
35 /* Copyright 2009, 2010 The University of Texas at Austin. */
36 /* All rights reserved. */
38 /* Redistribution and use in source and binary forms, with or */
39 /* without modification, are permitted provided that the following */
40 /* conditions are met: */
42 /* 1. Redistributions of source code must retain the above */
43 /* copyright notice, this list of conditions and the following */
46 /* 2. Redistributions in binary form must reproduce the above */
47 /* copyright notice, this list of conditions and the following */
48 /* disclaimer in the documentation and/or other materials */
49 /* provided with the distribution. */
51 /* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
52 /* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
53 /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
54 /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
55 /* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
56 /* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
57 /* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
58 /* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
59 /* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
60 /* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
61 /* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
62 /* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
63 /* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
64 /* POSSIBILITY OF SUCH DAMAGE. */
66 /* The views and conclusions contained in the software and */
67 /* documentation are those of the authors and should not be */
68 /* interpreted as representing official policies, either expressed */
69 /* or implied, of The University of Texas at Austin. */
70 /*********************************************************************/
80 #define GEMM_DEFAULT_OFFSET_A 64
81 #define GEMM_DEFAULT_OFFSET_B 256
82 #define GEMM_DEFAULT_ALIGN 0x01ffffUL
84 #define SGEMM_DEFAULT_UNROLL_N 4
85 #define DGEMM_DEFAULT_UNROLL_N 4
86 #define QGEMM_DEFAULT_UNROLL_N 2
87 #define CGEMM_DEFAULT_UNROLL_N 2
88 #define ZGEMM_DEFAULT_UNROLL_N 2
89 #define XGEMM_DEFAULT_UNROLL_N 1
92 #define SGEMM_DEFAULT_UNROLL_M 4
93 #define DGEMM_DEFAULT_UNROLL_M 2
94 #define QGEMM_DEFAULT_UNROLL_M 2
95 #define CGEMM_DEFAULT_UNROLL_M 2
96 #define ZGEMM_DEFAULT_UNROLL_M 1
97 #define XGEMM_DEFAULT_UNROLL_M 1
99 #define SGEMM_DEFAULT_UNROLL_M 8
100 #define DGEMM_DEFAULT_UNROLL_M 4
101 #define QGEMM_DEFAULT_UNROLL_M 2
102 #define CGEMM_DEFAULT_UNROLL_M 4
103 #define ZGEMM_DEFAULT_UNROLL_M 2
104 #define XGEMM_DEFAULT_UNROLL_M 1
107 #define SGEMM_DEFAULT_P sgemm_p
108 #define DGEMM_DEFAULT_P dgemm_p
109 #define QGEMM_DEFAULT_P qgemm_p
110 #define CGEMM_DEFAULT_P cgemm_p
111 #define ZGEMM_DEFAULT_P zgemm_p
112 #define XGEMM_DEFAULT_P xgemm_p
114 #define SGEMM_DEFAULT_R sgemm_r
115 #define DGEMM_DEFAULT_R dgemm_r
116 #define QGEMM_DEFAULT_R qgemm_r
117 #define CGEMM_DEFAULT_R cgemm_r
118 #define ZGEMM_DEFAULT_R zgemm_r
119 #define XGEMM_DEFAULT_R xgemm_r
123 #define SGEMM_DEFAULT_Q 248
124 #define DGEMM_DEFAULT_Q 248
125 #define QGEMM_DEFAULT_Q 248
126 #define CGEMM_DEFAULT_Q 248
127 #define ZGEMM_DEFAULT_Q 248
128 #define XGEMM_DEFAULT_Q 248
132 #define SGEMM_DEFAULT_Q 240
133 #define DGEMM_DEFAULT_Q 240
134 #define QGEMM_DEFAULT_Q 240
135 #define CGEMM_DEFAULT_Q 240
136 #define ZGEMM_DEFAULT_Q 240
137 #define XGEMM_DEFAULT_Q 240
143 #define HAVE_EXCLUSIVE_CACHE
147 #if defined(BARCELONA) || defined(SHANGHAI) || defined(BOBCAT)
152 #define GEMM_DEFAULT_OFFSET_A 64
153 #define GEMM_DEFAULT_OFFSET_B 832
154 #define GEMM_DEFAULT_ALIGN 0x0fffUL
156 #define SGEMM_DEFAULT_UNROLL_N 4
157 #define DGEMM_DEFAULT_UNROLL_N 4
158 #define QGEMM_DEFAULT_UNROLL_N 2
159 #define CGEMM_DEFAULT_UNROLL_N 2
160 #define ZGEMM_DEFAULT_UNROLL_N 2
161 #define XGEMM_DEFAULT_UNROLL_N 1
164 #define SGEMM_DEFAULT_UNROLL_M 4
165 #define DGEMM_DEFAULT_UNROLL_M 2
166 #define QGEMM_DEFAULT_UNROLL_M 2
167 #define CGEMM_DEFAULT_UNROLL_M 2
168 #define ZGEMM_DEFAULT_UNROLL_M 1
169 #define XGEMM_DEFAULT_UNROLL_M 1
171 #define SGEMM_DEFAULT_UNROLL_M 8
172 #define DGEMM_DEFAULT_UNROLL_M 4
173 #define QGEMM_DEFAULT_UNROLL_M 2
174 #define CGEMM_DEFAULT_UNROLL_M 4
175 #define ZGEMM_DEFAULT_UNROLL_M 2
176 #define XGEMM_DEFAULT_UNROLL_M 1
180 #define SGEMM_DEFAULT_P 496
181 #define DGEMM_DEFAULT_P 248
182 #define QGEMM_DEFAULT_P 124
183 #define CGEMM_DEFAULT_P 248
184 #define ZGEMM_DEFAULT_P 124
185 #define XGEMM_DEFAULT_P 62
187 #define SGEMM_DEFAULT_Q 248
188 #define DGEMM_DEFAULT_Q 248
189 #define QGEMM_DEFAULT_Q 248
190 #define CGEMM_DEFAULT_Q 248
191 #define ZGEMM_DEFAULT_Q 248
192 #define XGEMM_DEFAULT_Q 248
196 #define SGEMM_DEFAULT_P 448
197 #define DGEMM_DEFAULT_P 224
198 #define QGEMM_DEFAULT_P 112
199 #define CGEMM_DEFAULT_P 224
200 #define ZGEMM_DEFAULT_P 112
201 #define XGEMM_DEFAULT_P 56
203 #define SGEMM_DEFAULT_Q 224
204 #define DGEMM_DEFAULT_Q 224
205 #define QGEMM_DEFAULT_Q 224
206 #define CGEMM_DEFAULT_Q 224
207 #define ZGEMM_DEFAULT_Q 224
208 #define XGEMM_DEFAULT_Q 224
212 #define SGEMM_DEFAULT_R sgemm_r
213 #define QGEMM_DEFAULT_R qgemm_r
214 #define DGEMM_DEFAULT_R dgemm_r
215 #define CGEMM_DEFAULT_R cgemm_r
216 #define ZGEMM_DEFAULT_R zgemm_r
217 #define XGEMM_DEFAULT_R xgemm_r
220 #define HAVE_EXCLUSIVE_CACHE
222 #define GEMM_THREAD gemm_thread_mn
232 #define GEMM_DEFAULT_OFFSET_A 64
233 #define GEMM_DEFAULT_OFFSET_B 832
234 #define GEMM_DEFAULT_ALIGN 0x0fffUL
238 #define QGEMM_DEFAULT_UNROLL_N 2
239 #define CGEMM_DEFAULT_UNROLL_N 2
240 #define ZGEMM_DEFAULT_UNROLL_N 2
241 #define XGEMM_DEFAULT_UNROLL_N 1
244 #define SGEMM_DEFAULT_UNROLL_N 4
245 #define DGEMM_DEFAULT_UNROLL_N 4
246 #define SGEMM_DEFAULT_UNROLL_M 4
247 #define DGEMM_DEFAULT_UNROLL_M 2
248 #define QGEMM_DEFAULT_UNROLL_M 2
249 #define CGEMM_DEFAULT_UNROLL_M 2
250 #define ZGEMM_DEFAULT_UNROLL_M 1
251 #define XGEMM_DEFAULT_UNROLL_M 1
253 #define SGEMM_DEFAULT_UNROLL_N 2
254 #define DGEMM_DEFAULT_UNROLL_N 2
255 #define SGEMM_DEFAULT_UNROLL_M 16
256 #define DGEMM_DEFAULT_UNROLL_M 8
257 #define QGEMM_DEFAULT_UNROLL_M 2
258 #define CGEMM_DEFAULT_UNROLL_M 4
259 #define ZGEMM_DEFAULT_UNROLL_M 2
260 #define XGEMM_DEFAULT_UNROLL_M 1
261 #define CGEMM3M_DEFAULT_UNROLL_N 4
262 #define CGEMM3M_DEFAULT_UNROLL_M 8
263 #define ZGEMM3M_DEFAULT_UNROLL_N 4
264 #define ZGEMM3M_DEFAULT_UNROLL_M 4
266 #define DGEMM_DEFAULT_UNROLL_MN 16
267 #define GEMV_UNROLL 8
271 #if defined(ARCH_X86_64)
272 #define SGEMM_DEFAULT_P 768
273 #define DGEMM_DEFAULT_P 384
275 #define SGEMM_DEFAULT_P 448
276 #define DGEMM_DEFAULT_P 224
279 #define QGEMM_DEFAULT_P 112
280 #define CGEMM_DEFAULT_P 224
281 #define ZGEMM_DEFAULT_P 112
282 #define XGEMM_DEFAULT_P 56
284 #if defined(ARCH_X86_64)
285 #define SGEMM_DEFAULT_Q 168
286 #define DGEMM_DEFAULT_Q 168
288 #define SGEMM_DEFAULT_Q 224
289 #define DGEMM_DEFAULT_Q 224
292 #define QGEMM_DEFAULT_Q 224
293 #define CGEMM_DEFAULT_Q 224
294 #define ZGEMM_DEFAULT_Q 224
295 #define XGEMM_DEFAULT_Q 224
297 #define CGEMM3M_DEFAULT_P 448
298 #define ZGEMM3M_DEFAULT_P 224
299 #define XGEMM3M_DEFAULT_P 112
300 #define CGEMM3M_DEFAULT_Q 224
301 #define ZGEMM3M_DEFAULT_Q 224
302 #define XGEMM3M_DEFAULT_Q 224
303 #define CGEMM3M_DEFAULT_R 12288
304 #define ZGEMM3M_DEFAULT_R 12288
305 #define XGEMM3M_DEFAULT_R 12288
307 #define SGEMM_DEFAULT_R sgemm_r
308 #define QGEMM_DEFAULT_R qgemm_r
309 #define DGEMM_DEFAULT_R dgemm_r
310 #define CGEMM_DEFAULT_R cgemm_r
311 #define ZGEMM_DEFAULT_R zgemm_r
312 #define XGEMM_DEFAULT_R xgemm_r
315 #define HAVE_EXCLUSIVE_CACHE
317 #define GEMM_THREAD gemm_thread_mn
325 #define GEMM_DEFAULT_OFFSET_A 64
326 #define GEMM_DEFAULT_OFFSET_B 832
327 #define GEMM_DEFAULT_ALIGN 0x0fffUL
331 #define QGEMM_DEFAULT_UNROLL_N 2
332 #define CGEMM_DEFAULT_UNROLL_N 2
333 #define ZGEMM_DEFAULT_UNROLL_N 2
334 #define XGEMM_DEFAULT_UNROLL_N 1
337 #define SGEMM_DEFAULT_UNROLL_N 4
338 #define DGEMM_DEFAULT_UNROLL_N 4
339 #define SGEMM_DEFAULT_UNROLL_M 4
340 #define DGEMM_DEFAULT_UNROLL_M 2
341 #define QGEMM_DEFAULT_UNROLL_M 2
342 #define CGEMM_DEFAULT_UNROLL_M 2
343 #define ZGEMM_DEFAULT_UNROLL_M 1
344 #define XGEMM_DEFAULT_UNROLL_M 1
346 #define SGEMM_DEFAULT_UNROLL_N 2
347 #define DGEMM_DEFAULT_UNROLL_N 2
348 #define SGEMM_DEFAULT_UNROLL_M 16
349 #define DGEMM_DEFAULT_UNROLL_M 8
350 #define QGEMM_DEFAULT_UNROLL_M 2
351 #define CGEMM_DEFAULT_UNROLL_M 4
352 #define ZGEMM_DEFAULT_UNROLL_M 2
353 #define XGEMM_DEFAULT_UNROLL_M 1
354 #define CGEMM3M_DEFAULT_UNROLL_N 4
355 #define CGEMM3M_DEFAULT_UNROLL_M 8
356 #define ZGEMM3M_DEFAULT_UNROLL_N 4
357 #define ZGEMM3M_DEFAULT_UNROLL_M 4
358 #define GEMV_UNROLL 8
361 #if defined(ARCH_X86_64)
362 #define SGEMM_DEFAULT_P 768
363 #define DGEMM_DEFAULT_P 768
364 #define ZGEMM_DEFAULT_P 384
365 #define CGEMM_DEFAULT_P 768
367 #define SGEMM_DEFAULT_P 448
368 #define DGEMM_DEFAULT_P 480
369 #define ZGEMM_DEFAULT_P 112
370 #define CGEMM_DEFAULT_P 224
372 #define QGEMM_DEFAULT_P 112
373 #define XGEMM_DEFAULT_P 56
375 #if defined(ARCH_X86_64)
376 #define SGEMM_DEFAULT_Q 192
377 #define DGEMM_DEFAULT_Q 168
378 #define ZGEMM_DEFAULT_Q 168
379 #define CGEMM_DEFAULT_Q 168
381 #define SGEMM_DEFAULT_Q 224
382 #define DGEMM_DEFAULT_Q 224
383 #define ZGEMM_DEFAULT_Q 224
384 #define CGEMM_DEFAULT_Q 224
386 #define QGEMM_DEFAULT_Q 224
387 #define XGEMM_DEFAULT_Q 224
389 #define CGEMM3M_DEFAULT_P 448
390 #define ZGEMM3M_DEFAULT_P 224
391 #define XGEMM3M_DEFAULT_P 112
392 #define CGEMM3M_DEFAULT_Q 224
393 #define ZGEMM3M_DEFAULT_Q 224
394 #define XGEMM3M_DEFAULT_Q 224
395 #define CGEMM3M_DEFAULT_R 12288
396 #define ZGEMM3M_DEFAULT_R 12288
397 #define XGEMM3M_DEFAULT_R 12288
399 #define SGEMM_DEFAULT_R 12288
400 #define QGEMM_DEFAULT_R qgemm_r
401 #define DGEMM_DEFAULT_R 12288
402 #define CGEMM_DEFAULT_R cgemm_r
403 #define ZGEMM_DEFAULT_R zgemm_r
404 #define XGEMM_DEFAULT_R xgemm_r
407 #define HAVE_EXCLUSIVE_CACHE
409 #define GEMM_THREAD gemm_thread_mn
417 #define GEMM_DEFAULT_OFFSET_A 64
418 #define GEMM_DEFAULT_OFFSET_B 832
419 #define GEMM_DEFAULT_ALIGN 0x0fffUL
423 #define QGEMM_DEFAULT_UNROLL_N 2
424 #define CGEMM_DEFAULT_UNROLL_N 2
425 #define ZGEMM_DEFAULT_UNROLL_N 2
426 #define XGEMM_DEFAULT_UNROLL_N 1
429 #define SGEMM_DEFAULT_UNROLL_N 4
430 #define DGEMM_DEFAULT_UNROLL_N 4
431 #define SGEMM_DEFAULT_UNROLL_M 4
432 #define DGEMM_DEFAULT_UNROLL_M 2
433 #define QGEMM_DEFAULT_UNROLL_M 2
434 #define CGEMM_DEFAULT_UNROLL_M 2
435 #define ZGEMM_DEFAULT_UNROLL_M 1
436 #define XGEMM_DEFAULT_UNROLL_M 1
438 #define SGEMM_DEFAULT_UNROLL_N 2
439 #define DGEMM_DEFAULT_UNROLL_N 2
440 #define SGEMM_DEFAULT_UNROLL_M 16
441 #define DGEMM_DEFAULT_UNROLL_M 8
442 #define QGEMM_DEFAULT_UNROLL_M 2
443 #define CGEMM_DEFAULT_UNROLL_M 4
444 #define ZGEMM_DEFAULT_UNROLL_M 2
445 #define XGEMM_DEFAULT_UNROLL_M 1
446 #define CGEMM3M_DEFAULT_UNROLL_N 4
447 #define CGEMM3M_DEFAULT_UNROLL_M 8
448 #define ZGEMM3M_DEFAULT_UNROLL_N 4
449 #define ZGEMM3M_DEFAULT_UNROLL_M 4
450 #define GEMV_UNROLL 8
453 #if defined(ARCH_X86_64)
454 #define SGEMM_DEFAULT_P 768
455 #define DGEMM_DEFAULT_P 576
456 #define ZGEMM_DEFAULT_P 288
457 #define CGEMM_DEFAULT_P 576
459 #define SGEMM_DEFAULT_P 448
460 #define DGEMM_DEFAULT_P 480
461 #define ZGEMM_DEFAULT_P 112
462 #define CGEMM_DEFAULT_P 224
464 #define QGEMM_DEFAULT_P 112
465 #define XGEMM_DEFAULT_P 56
467 #if defined(ARCH_X86_64)
468 #define SGEMM_DEFAULT_Q 192
469 #define DGEMM_DEFAULT_Q 160
470 #define ZGEMM_DEFAULT_Q 160
471 #define CGEMM_DEFAULT_Q 160
473 #define SGEMM_DEFAULT_Q 224
474 #define DGEMM_DEFAULT_Q 224
475 #define ZGEMM_DEFAULT_Q 224
476 #define CGEMM_DEFAULT_Q 224
478 #define QGEMM_DEFAULT_Q 224
479 #define XGEMM_DEFAULT_Q 224
481 #define CGEMM3M_DEFAULT_P 448
482 #define ZGEMM3M_DEFAULT_P 224
483 #define XGEMM3M_DEFAULT_P 112
484 #define CGEMM3M_DEFAULT_Q 224
485 #define ZGEMM3M_DEFAULT_Q 224
486 #define XGEMM3M_DEFAULT_Q 224
487 #define CGEMM3M_DEFAULT_R 12288
488 #define ZGEMM3M_DEFAULT_R 12288
489 #define XGEMM3M_DEFAULT_R 12288
491 #define SGEMM_DEFAULT_R 12288
492 #define QGEMM_DEFAULT_R qgemm_r
493 #define DGEMM_DEFAULT_R 12288
494 #define CGEMM_DEFAULT_R cgemm_r
495 #define ZGEMM_DEFAULT_R zgemm_r
496 #define XGEMM_DEFAULT_R xgemm_r
499 #define HAVE_EXCLUSIVE_CACHE
501 #define GEMM_THREAD gemm_thread_mn
510 #define GEMM_DEFAULT_OFFSET_A 64
511 #define GEMM_DEFAULT_OFFSET_B 832
512 #define GEMM_DEFAULT_ALIGN 0x0fffUL
516 #define QGEMM_DEFAULT_UNROLL_N 2
517 #define CGEMM_DEFAULT_UNROLL_N 2
518 #define ZGEMM_DEFAULT_UNROLL_N 2
519 #define XGEMM_DEFAULT_UNROLL_N 1
522 #define SGEMM_DEFAULT_UNROLL_N 4
523 #define DGEMM_DEFAULT_UNROLL_N 4
524 #define SGEMM_DEFAULT_UNROLL_M 4
525 #define DGEMM_DEFAULT_UNROLL_M 2
526 #define QGEMM_DEFAULT_UNROLL_M 2
527 #define CGEMM_DEFAULT_UNROLL_M 2
528 #define ZGEMM_DEFAULT_UNROLL_M 1
529 #define XGEMM_DEFAULT_UNROLL_M 1
531 #define SGEMM_DEFAULT_UNROLL_N 2
532 #define DGEMM_DEFAULT_UNROLL_N 2
533 #define SGEMM_DEFAULT_UNROLL_M 16
534 #define DGEMM_DEFAULT_UNROLL_M 8
535 #define QGEMM_DEFAULT_UNROLL_M 2
536 #define CGEMM_DEFAULT_UNROLL_M 4
537 #define ZGEMM_DEFAULT_UNROLL_M 2
538 #define XGEMM_DEFAULT_UNROLL_M 1
539 #define CGEMM3M_DEFAULT_UNROLL_N 4
540 #define CGEMM3M_DEFAULT_UNROLL_M 8
541 #define ZGEMM3M_DEFAULT_UNROLL_N 4
542 #define ZGEMM3M_DEFAULT_UNROLL_M 4
543 #define GEMV_UNROLL 8
546 #if defined(ARCH_X86_64)
547 #define SGEMM_DEFAULT_P 768
548 #define DGEMM_DEFAULT_P 576
549 #define ZGEMM_DEFAULT_P 288
550 #define CGEMM_DEFAULT_P 576
552 #define SGEMM_DEFAULT_P 448
553 #define DGEMM_DEFAULT_P 480
554 #define ZGEMM_DEFAULT_P 112
555 #define CGEMM_DEFAULT_P 224
557 #define QGEMM_DEFAULT_P 112
558 #define XGEMM_DEFAULT_P 56
560 #if defined(ARCH_X86_64)
561 #define SGEMM_DEFAULT_Q 192
562 #define DGEMM_DEFAULT_Q 160
563 #define ZGEMM_DEFAULT_Q 160
564 #define CGEMM_DEFAULT_Q 160
566 #define SGEMM_DEFAULT_Q 224
567 #define DGEMM_DEFAULT_Q 224
568 #define ZGEMM_DEFAULT_Q 224
569 #define CGEMM_DEFAULT_Q 224
571 #define QGEMM_DEFAULT_Q 224
572 #define XGEMM_DEFAULT_Q 224
574 #define CGEMM3M_DEFAULT_P 448
575 #define ZGEMM3M_DEFAULT_P 224
576 #define XGEMM3M_DEFAULT_P 112
577 #define CGEMM3M_DEFAULT_Q 224
578 #define ZGEMM3M_DEFAULT_Q 224
579 #define XGEMM3M_DEFAULT_Q 224
580 #define CGEMM3M_DEFAULT_R 12288
581 #define ZGEMM3M_DEFAULT_R 12288
582 #define XGEMM3M_DEFAULT_R 12288
584 #define SGEMM_DEFAULT_R 12288
585 #define QGEMM_DEFAULT_R qgemm_r
586 #define DGEMM_DEFAULT_R 12288
587 #define CGEMM_DEFAULT_R cgemm_r
588 #define ZGEMM_DEFAULT_R zgemm_r
589 #define XGEMM_DEFAULT_R xgemm_r
592 #define HAVE_EXCLUSIVE_CACHE
594 #define GEMM_THREAD gemm_thread_mn
602 #define GEMM_DEFAULT_OFFSET_A 0
603 #define GEMM_DEFAULT_OFFSET_B 0
604 #define GEMM_DEFAULT_ALIGN 0x03fffUL
608 #define SWITCH_RATIO 16
612 #define SGEMM_DEFAULT_UNROLL_M 4
613 #define DGEMM_DEFAULT_UNROLL_M 2
614 #define QGEMM_DEFAULT_UNROLL_M 2
615 #define CGEMM_DEFAULT_UNROLL_M 2
616 #define ZGEMM_DEFAULT_UNROLL_M 1
617 #define XGEMM_DEFAULT_UNROLL_M 1
619 #define SGEMM_DEFAULT_UNROLL_N 4
620 #define DGEMM_DEFAULT_UNROLL_N 4
621 #define QGEMM_DEFAULT_UNROLL_N 2
622 #define CGEMM_DEFAULT_UNROLL_N 2
623 #define ZGEMM_DEFAULT_UNROLL_N 2
624 #define XGEMM_DEFAULT_UNROLL_N 1
628 #define SGEMM_DEFAULT_UNROLL_M 16
629 #define DGEMM_DEFAULT_UNROLL_M 4
630 #define QGEMM_DEFAULT_UNROLL_M 2
631 #define CGEMM_DEFAULT_UNROLL_M 8
632 #define ZGEMM_DEFAULT_UNROLL_M 4
633 #define XGEMM_DEFAULT_UNROLL_M 1
635 #define SGEMM_DEFAULT_UNROLL_N 4
636 #define DGEMM_DEFAULT_UNROLL_N 8
637 #define QGEMM_DEFAULT_UNROLL_N 2
638 #define CGEMM_DEFAULT_UNROLL_N 2
639 #define ZGEMM_DEFAULT_UNROLL_N 2
640 #define XGEMM_DEFAULT_UNROLL_N 1
642 #define SGEMM_DEFAULT_UNROLL_MN 32
643 #define DGEMM_DEFAULT_UNROLL_MN 32
648 #define SGEMM_DEFAULT_P 512
649 #define SGEMM_DEFAULT_R sgemm_r
650 #define DGEMM_DEFAULT_P 512
651 #define DGEMM_DEFAULT_R dgemm_r
652 #define QGEMM_DEFAULT_P 504
653 #define QGEMM_DEFAULT_R qgemm_r
654 #define CGEMM_DEFAULT_P 128
655 #define CGEMM_DEFAULT_R 1024
656 #define ZGEMM_DEFAULT_P 512
657 #define ZGEMM_DEFAULT_R zgemm_r
658 #define XGEMM_DEFAULT_P 252
659 #define XGEMM_DEFAULT_R xgemm_r
660 #define SGEMM_DEFAULT_Q 256
661 #define DGEMM_DEFAULT_Q 256
662 #define QGEMM_DEFAULT_Q 128
663 #define CGEMM_DEFAULT_Q 256
664 #define ZGEMM_DEFAULT_Q 192
665 #define XGEMM_DEFAULT_Q 128
669 #define SGEMM_DEFAULT_P 768
670 #define DGEMM_DEFAULT_P 512
671 #define CGEMM_DEFAULT_P 384
672 #define ZGEMM_DEFAULT_P 256
675 #define SGEMM_DEFAULT_Q 320
676 #define DGEMM_DEFAULT_Q 128
678 #define SGEMM_DEFAULT_Q 384
679 #define DGEMM_DEFAULT_Q 256
681 #define CGEMM_DEFAULT_Q 192
682 #define ZGEMM_DEFAULT_Q 128
684 #define SGEMM_DEFAULT_R sgemm_r
685 #define DGEMM_DEFAULT_R 13824
686 #define CGEMM_DEFAULT_R cgemm_r
687 #define ZGEMM_DEFAULT_R zgemm_r
689 #define QGEMM_DEFAULT_Q 128
690 #define QGEMM_DEFAULT_P 504
691 #define QGEMM_DEFAULT_R qgemm_r
692 #define XGEMM_DEFAULT_P 252
693 #define XGEMM_DEFAULT_R xgemm_r
694 #define XGEMM_DEFAULT_Q 128
696 #define CGEMM3M_DEFAULT_UNROLL_N 8
697 #define CGEMM3M_DEFAULT_UNROLL_M 4
698 #define ZGEMM3M_DEFAULT_UNROLL_N 8
699 #define ZGEMM3M_DEFAULT_UNROLL_M 2
701 #define CGEMM3M_DEFAULT_P 448
702 #define ZGEMM3M_DEFAULT_P 224
703 #define XGEMM3M_DEFAULT_P 112
704 #define CGEMM3M_DEFAULT_Q 224
705 #define ZGEMM3M_DEFAULT_Q 224
706 #define XGEMM3M_DEFAULT_Q 224
707 #define CGEMM3M_DEFAULT_R 12288
708 #define ZGEMM3M_DEFAULT_R 12288
709 #define XGEMM3M_DEFAULT_R 12288
720 #define GEMM_DEFAULT_OFFSET_A 0
721 #define GEMM_DEFAULT_OFFSET_B 384
722 #define GEMM_DEFAULT_ALIGN 0x0ffffUL
724 #define SGEMM_DEFAULT_UNROLL_N 4
725 #define DGEMM_DEFAULT_UNROLL_N 4
726 #define QGEMM_DEFAULT_UNROLL_N 2
727 #define CGEMM_DEFAULT_UNROLL_N 2
728 #define ZGEMM_DEFAULT_UNROLL_N 2
729 #define XGEMM_DEFAULT_UNROLL_N 1
731 #define SGEMM_DEFAULT_UNROLL_M 2
732 #define DGEMM_DEFAULT_UNROLL_M 1
733 #define QGEMM_DEFAULT_UNROLL_M 2
734 #define CGEMM_DEFAULT_UNROLL_M 1
735 #define ZGEMM_DEFAULT_UNROLL_M 1
736 #define XGEMM_DEFAULT_UNROLL_M 1
738 #define SGEMM_DEFAULT_R sgemm_r
739 #define DGEMM_DEFAULT_R dgemm_r
740 #define QGEMM_DEFAULT_R qgemm_r
741 #define CGEMM_DEFAULT_R cgemm_r
742 #define ZGEMM_DEFAULT_R zgemm_r
743 #define XGEMM_DEFAULT_R xgemm_r
745 #define SGEMM_DEFAULT_P 208
746 #define DGEMM_DEFAULT_P 104
747 #define QGEMM_DEFAULT_P 56
748 #define CGEMM_DEFAULT_P 104
749 #define ZGEMM_DEFAULT_P 56
750 #define XGEMM_DEFAULT_P 28
752 #define SGEMM_DEFAULT_Q 208
753 #define DGEMM_DEFAULT_Q 208
754 #define QGEMM_DEFAULT_Q 208
755 #define CGEMM_DEFAULT_Q 208
756 #define ZGEMM_DEFAULT_Q 208
757 #define XGEMM_DEFAULT_Q 208
760 #define HAVE_EXCLUSIVE_CACHE
768 #define GEMM_DEFAULT_OFFSET_A 0
769 #define GEMM_DEFAULT_OFFSET_B 256
770 #define GEMM_DEFAULT_ALIGN 0x0ffffUL
772 #define SGEMM_DEFAULT_UNROLL_N 4
773 #define DGEMM_DEFAULT_UNROLL_N 4
774 #define QGEMM_DEFAULT_UNROLL_N 2
775 #define CGEMM_DEFAULT_UNROLL_N 2
776 #define ZGEMM_DEFAULT_UNROLL_N 2
777 #define XGEMM_DEFAULT_UNROLL_N 1
779 #define SGEMM_DEFAULT_UNROLL_M 2
780 #define DGEMM_DEFAULT_UNROLL_M 1
781 #define QGEMM_DEFAULT_UNROLL_M 2
782 #define CGEMM_DEFAULT_UNROLL_M 1
783 #define ZGEMM_DEFAULT_UNROLL_M 1
784 #define XGEMM_DEFAULT_UNROLL_M 1
786 #define SGEMM_DEFAULT_R sgemm_r
787 #define DGEMM_DEFAULT_R dgemm_r
788 #define QGEMM_DEFAULT_R qgemm_r
789 #define CGEMM_DEFAULT_R cgemm_r
790 #define ZGEMM_DEFAULT_R zgemm_r
791 #define XGEMM_DEFAULT_R xgemm_r
793 #define SGEMM_DEFAULT_P 128
794 #define DGEMM_DEFAULT_P 128
795 #define QGEMM_DEFAULT_P 128
796 #define CGEMM_DEFAULT_P 128
797 #define ZGEMM_DEFAULT_P 128
798 #define XGEMM_DEFAULT_P 128
800 #define SGEMM_DEFAULT_Q 512
801 #define DGEMM_DEFAULT_Q 256
802 #define QGEMM_DEFAULT_Q 256
803 #define CGEMM_DEFAULT_Q 256
804 #define ZGEMM_DEFAULT_Q 128
805 #define XGEMM_DEFAULT_Q 128
815 #define GEMM_DEFAULT_OFFSET_A 64
816 #define GEMM_DEFAULT_OFFSET_B 256
817 #define GEMM_DEFAULT_ALIGN 0x01ffffUL
820 #define SGEMM_DEFAULT_UNROLL_N 4
821 #define DGEMM_DEFAULT_UNROLL_N 4
822 #define QGEMM_DEFAULT_UNROLL_N 2
823 #define CGEMM_DEFAULT_UNROLL_N 2
824 #define ZGEMM_DEFAULT_UNROLL_N 2
825 #define XGEMM_DEFAULT_UNROLL_N 1
827 #define SGEMM_DEFAULT_UNROLL_M 4
828 #define DGEMM_DEFAULT_UNROLL_M 2
829 #define QGEMM_DEFAULT_UNROLL_M 2
830 #define CGEMM_DEFAULT_UNROLL_M 2
831 #define ZGEMM_DEFAULT_UNROLL_M 1
832 #define XGEMM_DEFAULT_UNROLL_M 1
834 #define SGEMM_DEFAULT_UNROLL_N 8
835 #define DGEMM_DEFAULT_UNROLL_N 4
836 #define QGEMM_DEFAULT_UNROLL_N 2
837 #define CGEMM_DEFAULT_UNROLL_N 4
838 #define ZGEMM_DEFAULT_UNROLL_N 2
839 #define XGEMM_DEFAULT_UNROLL_N 1
841 #define SGEMM_DEFAULT_UNROLL_M 4
842 #define DGEMM_DEFAULT_UNROLL_M 4
843 #define QGEMM_DEFAULT_UNROLL_M 2
844 #define CGEMM_DEFAULT_UNROLL_M 2
845 #define ZGEMM_DEFAULT_UNROLL_M 2
846 #define XGEMM_DEFAULT_UNROLL_M 1
849 #define SGEMM_DEFAULT_P 288
850 #define DGEMM_DEFAULT_P 288
851 #define QGEMM_DEFAULT_P 288
852 #define CGEMM_DEFAULT_P 288
853 #define ZGEMM_DEFAULT_P 288
854 #define XGEMM_DEFAULT_P 288
856 #define SGEMM_DEFAULT_R sgemm_r
857 #define DGEMM_DEFAULT_R dgemm_r
858 #define QGEMM_DEFAULT_R qgemm_r
859 #define CGEMM_DEFAULT_R cgemm_r
860 #define ZGEMM_DEFAULT_R zgemm_r
861 #define XGEMM_DEFAULT_R xgemm_r
863 #define SGEMM_DEFAULT_Q 256
864 #define DGEMM_DEFAULT_Q 128
865 #define QGEMM_DEFAULT_Q 64
866 #define CGEMM_DEFAULT_Q 128
867 #define ZGEMM_DEFAULT_Q 64
868 #define XGEMM_DEFAULT_Q 32
871 #define HAVE_EXCLUSIVE_CACHE
875 #if defined(PENTIUM) || defined(PENTIUM2) || defined(PENTIUM3)
884 #define GEMM_DEFAULT_OFFSET_A 0
885 #define GEMM_DEFAULT_OFFSET_B 0
886 #define GEMM_DEFAULT_ALIGN 0x0ffffUL
889 #define SGEMM_DEFAULT_UNROLL_M 8
890 #define CGEMM_DEFAULT_UNROLL_M 4
892 #define SGEMM_DEFAULT_UNROLL_M 4
893 #define CGEMM_DEFAULT_UNROLL_M 2
895 #define DGEMM_DEFAULT_UNROLL_M 2
896 #define SGEMM_DEFAULT_UNROLL_N 2
897 #define DGEMM_DEFAULT_UNROLL_N 2
898 #define QGEMM_DEFAULT_UNROLL_M 2
899 #define QGEMM_DEFAULT_UNROLL_N 2
900 #define CGEMM_DEFAULT_UNROLL_N 1
901 #define ZGEMM_DEFAULT_UNROLL_M 1
902 #define ZGEMM_DEFAULT_UNROLL_N 1
903 #define XGEMM_DEFAULT_UNROLL_M 1
904 #define XGEMM_DEFAULT_UNROLL_N 1
906 #define SGEMM_DEFAULT_P sgemm_p
907 #define SGEMM_DEFAULT_Q 256
908 #define SGEMM_DEFAULT_R sgemm_r
910 #define DGEMM_DEFAULT_P dgemm_p
911 #define DGEMM_DEFAULT_Q 256
912 #define DGEMM_DEFAULT_R dgemm_r
914 #define QGEMM_DEFAULT_P qgemm_p
915 #define QGEMM_DEFAULT_Q 256
916 #define QGEMM_DEFAULT_R qgemm_r
918 #define CGEMM_DEFAULT_P cgemm_p
919 #define CGEMM_DEFAULT_Q 256
920 #define CGEMM_DEFAULT_R cgemm_r
922 #define ZGEMM_DEFAULT_P zgemm_p
923 #define ZGEMM_DEFAULT_Q 256
924 #define ZGEMM_DEFAULT_R zgemm_r
926 #define XGEMM_DEFAULT_P xgemm_p
927 #define XGEMM_DEFAULT_Q 256
928 #define XGEMM_DEFAULT_R xgemm_r
939 #define GEMM_DEFAULT_OFFSET_A 0
940 #define GEMM_DEFAULT_OFFSET_B 0
941 #define GEMM_DEFAULT_ALIGN 0x0ffffUL
944 #define SGEMM_DEFAULT_UNROLL_M 4
945 #define SGEMM_DEFAULT_UNROLL_N 4
946 #define DGEMM_DEFAULT_UNROLL_M 2
947 #define DGEMM_DEFAULT_UNROLL_N 4
948 #define QGEMM_DEFAULT_UNROLL_M 2
949 #define QGEMM_DEFAULT_UNROLL_N 2
950 #define CGEMM_DEFAULT_UNROLL_M 2
951 #define CGEMM_DEFAULT_UNROLL_N 2
952 #define ZGEMM_DEFAULT_UNROLL_M 1
953 #define ZGEMM_DEFAULT_UNROLL_N 2
954 #define XGEMM_DEFAULT_UNROLL_M 1
955 #define XGEMM_DEFAULT_UNROLL_N 1
957 #define SGEMM_DEFAULT_UNROLL_M 8
958 #define SGEMM_DEFAULT_UNROLL_N 2
959 #define DGEMM_DEFAULT_UNROLL_M 2
960 #define DGEMM_DEFAULT_UNROLL_N 2
961 #define QGEMM_DEFAULT_UNROLL_M 2
962 #define QGEMM_DEFAULT_UNROLL_N 2
963 #define CGEMM_DEFAULT_UNROLL_M 4
964 #define CGEMM_DEFAULT_UNROLL_N 1
965 #define ZGEMM_DEFAULT_UNROLL_M 1
966 #define ZGEMM_DEFAULT_UNROLL_N 1
967 #define XGEMM_DEFAULT_UNROLL_M 1
968 #define XGEMM_DEFAULT_UNROLL_N 1
972 #define SGEMM_DEFAULT_P sgemm_p
973 #define SGEMM_DEFAULT_Q 256
974 #define SGEMM_DEFAULT_R sgemm_r
976 #define DGEMM_DEFAULT_P dgemm_p
977 #define DGEMM_DEFAULT_Q 256
978 #define DGEMM_DEFAULT_R dgemm_r
980 #define QGEMM_DEFAULT_P qgemm_p
981 #define QGEMM_DEFAULT_Q 256
982 #define QGEMM_DEFAULT_R qgemm_r
984 #define CGEMM_DEFAULT_P cgemm_p
985 #define CGEMM_DEFAULT_Q 256
986 #define CGEMM_DEFAULT_R cgemm_r
988 #define ZGEMM_DEFAULT_P zgemm_p
989 #define ZGEMM_DEFAULT_Q 256
990 #define ZGEMM_DEFAULT_R zgemm_r
992 #define XGEMM_DEFAULT_P xgemm_p
993 #define XGEMM_DEFAULT_Q 256
994 #define XGEMM_DEFAULT_R xgemm_r
999 #ifdef CORE_NORTHWOOD
1004 #define GEMM_DEFAULT_OFFSET_A 0
1005 #define GEMM_DEFAULT_OFFSET_B 32
1007 #define GEMM_DEFAULT_ALIGN 0x0ffffUL
1011 #define SGEMM_DEFAULT_UNROLL_M 8
1012 #define DGEMM_DEFAULT_UNROLL_M 4
1013 #define QGEMM_DEFAULT_UNROLL_M 2
1014 #define CGEMM_DEFAULT_UNROLL_M 4
1015 #define ZGEMM_DEFAULT_UNROLL_M 2
1016 #define XGEMM_DEFAULT_UNROLL_M 1
1018 #define SGEMM_DEFAULT_UNROLL_N 2
1019 #define DGEMM_DEFAULT_UNROLL_N 2
1020 #define QGEMM_DEFAULT_UNROLL_N 2
1021 #define CGEMM_DEFAULT_UNROLL_N 1
1022 #define ZGEMM_DEFAULT_UNROLL_N 1
1023 #define XGEMM_DEFAULT_UNROLL_N 1
1025 #define SGEMM_DEFAULT_P sgemm_p
1026 #define SGEMM_DEFAULT_R sgemm_r
1028 #define DGEMM_DEFAULT_P dgemm_p
1029 #define DGEMM_DEFAULT_R dgemm_r
1031 #define QGEMM_DEFAULT_P qgemm_p
1032 #define QGEMM_DEFAULT_R qgemm_r
1034 #define CGEMM_DEFAULT_P cgemm_p
1035 #define CGEMM_DEFAULT_R cgemm_r
1037 #define ZGEMM_DEFAULT_P zgemm_p
1038 #define ZGEMM_DEFAULT_R zgemm_r
1040 #define XGEMM_DEFAULT_P xgemm_p
1041 #define XGEMM_DEFAULT_R xgemm_r
1043 #define SGEMM_DEFAULT_Q 128
1044 #define DGEMM_DEFAULT_Q 128
1045 #define QGEMM_DEFAULT_Q 128
1046 #define CGEMM_DEFAULT_Q 128
1047 #define ZGEMM_DEFAULT_Q 128
1048 #define XGEMM_DEFAULT_Q 128
1051 #ifdef CORE_PRESCOTT
1057 #define GEMM_DEFAULT_OFFSET_A 128
1058 #define GEMM_DEFAULT_OFFSET_B 192
1060 #define GEMM_DEFAULT_OFFSET_A 0
1061 #define GEMM_DEFAULT_OFFSET_B 256
1064 #define GEMM_DEFAULT_ALIGN 0x0ffffUL
1069 #define SGEMM_DEFAULT_UNROLL_M 4
1070 #define DGEMM_DEFAULT_UNROLL_M 2
1071 #define QGEMM_DEFAULT_UNROLL_M 2
1072 #define CGEMM_DEFAULT_UNROLL_M 2
1073 #define ZGEMM_DEFAULT_UNROLL_M 1
1074 #define XGEMM_DEFAULT_UNROLL_M 1
1076 #define SGEMM_DEFAULT_UNROLL_M 8
1077 #define DGEMM_DEFAULT_UNROLL_M 4
1078 #define QGEMM_DEFAULT_UNROLL_M 2
1079 #define CGEMM_DEFAULT_UNROLL_M 4
1080 #define ZGEMM_DEFAULT_UNROLL_M 2
1081 #define XGEMM_DEFAULT_UNROLL_M 1
1084 #define SGEMM_DEFAULT_UNROLL_N 4
1085 #define DGEMM_DEFAULT_UNROLL_N 4
1086 #define QGEMM_DEFAULT_UNROLL_N 2
1087 #define CGEMM_DEFAULT_UNROLL_N 2
1088 #define ZGEMM_DEFAULT_UNROLL_N 2
1089 #define XGEMM_DEFAULT_UNROLL_N 1
1091 #define SGEMM_DEFAULT_P sgemm_p
1092 #define SGEMM_DEFAULT_R sgemm_r
1094 #define DGEMM_DEFAULT_P dgemm_p
1095 #define DGEMM_DEFAULT_R dgemm_r
1097 #define QGEMM_DEFAULT_P qgemm_p
1098 #define QGEMM_DEFAULT_R qgemm_r
1100 #define CGEMM_DEFAULT_P cgemm_p
1101 #define CGEMM_DEFAULT_R cgemm_r
1103 #define ZGEMM_DEFAULT_P zgemm_p
1104 #define ZGEMM_DEFAULT_R zgemm_r
1106 #define XGEMM_DEFAULT_P xgemm_p
1107 #define XGEMM_DEFAULT_R xgemm_r
1109 #define SGEMM_DEFAULT_Q 128
1110 #define DGEMM_DEFAULT_Q 128
1111 #define QGEMM_DEFAULT_Q 128
1112 #define CGEMM_DEFAULT_Q 128
1113 #define ZGEMM_DEFAULT_Q 128
1114 #define XGEMM_DEFAULT_Q 128
1122 #define GEMM_DEFAULT_OFFSET_A 448
1123 #define GEMM_DEFAULT_OFFSET_B 128
1124 #define GEMM_DEFAULT_ALIGN 0x03fffUL
1128 #define SWITCH_RATIO 4
1131 #define SGEMM_DEFAULT_UNROLL_M 8
1132 #define DGEMM_DEFAULT_UNROLL_M 4
1133 #define QGEMM_DEFAULT_UNROLL_M 2
1134 #define CGEMM_DEFAULT_UNROLL_M 4
1135 #define ZGEMM_DEFAULT_UNROLL_M 2
1136 #define XGEMM_DEFAULT_UNROLL_M 1
1138 #define SGEMM_DEFAULT_UNROLL_N 2
1139 #define DGEMM_DEFAULT_UNROLL_N 2
1140 #define QGEMM_DEFAULT_UNROLL_N 2
1141 #define CGEMM_DEFAULT_UNROLL_N 1
1142 #define ZGEMM_DEFAULT_UNROLL_N 1
1143 #define XGEMM_DEFAULT_UNROLL_N 1
1145 #define MASK(a, b) ((((a) + (b) - 1) / (b)) * (b))
1148 #define SGEMM_DEFAULT_UNROLL_M 8
1149 #define DGEMM_DEFAULT_UNROLL_M 4
1150 #define QGEMM_DEFAULT_UNROLL_M 2
1151 #define CGEMM_DEFAULT_UNROLL_M 4
1152 #define ZGEMM_DEFAULT_UNROLL_M 2
1153 #define XGEMM_DEFAULT_UNROLL_M 1
1155 #define SGEMM_DEFAULT_UNROLL_N 4
1156 #define DGEMM_DEFAULT_UNROLL_N 4
1157 #define QGEMM_DEFAULT_UNROLL_N 2
1158 #define CGEMM_DEFAULT_UNROLL_N 2
1159 #define ZGEMM_DEFAULT_UNROLL_N 2
1160 #define XGEMM_DEFAULT_UNROLL_N 1
1163 #define SGEMM_DEFAULT_P sgemm_p
1164 #define SGEMM_DEFAULT_R sgemm_r
1166 #define DGEMM_DEFAULT_P dgemm_p
1167 #define DGEMM_DEFAULT_R dgemm_r
1169 #define QGEMM_DEFAULT_P qgemm_p
1170 #define QGEMM_DEFAULT_R qgemm_r
1172 #define CGEMM_DEFAULT_P cgemm_p
1173 #define CGEMM_DEFAULT_R cgemm_r
1175 #define ZGEMM_DEFAULT_P zgemm_p
1176 #define ZGEMM_DEFAULT_R zgemm_r
1178 #define XGEMM_DEFAULT_P xgemm_p
1179 #define XGEMM_DEFAULT_R xgemm_r
1181 #define SGEMM_DEFAULT_Q 256
1182 #define DGEMM_DEFAULT_Q 256
1183 #define QGEMM_DEFAULT_Q 256
1184 #define CGEMM_DEFAULT_Q 256
1185 #define ZGEMM_DEFAULT_Q 256
1186 #define XGEMM_DEFAULT_Q 256
1195 #define GEMM_DEFAULT_OFFSET_A 128
1196 #define GEMM_DEFAULT_OFFSET_B 0
1197 #define GEMM_DEFAULT_ALIGN 0x03fffUL
1201 #define SWITCH_RATIO 4
1204 #define SGEMM_DEFAULT_UNROLL_M 4
1205 #define DGEMM_DEFAULT_UNROLL_M 2
1206 #define QGEMM_DEFAULT_UNROLL_M 2
1207 #define CGEMM_DEFAULT_UNROLL_M 2
1208 #define ZGEMM_DEFAULT_UNROLL_M 1
1209 #define XGEMM_DEFAULT_UNROLL_M 1
1211 #define SGEMM_DEFAULT_UNROLL_N 4
1212 #define DGEMM_DEFAULT_UNROLL_N 4
1213 #define QGEMM_DEFAULT_UNROLL_N 2
1214 #define CGEMM_DEFAULT_UNROLL_N 2
1215 #define ZGEMM_DEFAULT_UNROLL_N 2
1216 #define XGEMM_DEFAULT_UNROLL_N 1
1218 #define SGEMM_DEFAULT_UNROLL_M 8
1219 #define DGEMM_DEFAULT_UNROLL_M 4
1220 #define QGEMM_DEFAULT_UNROLL_M 2
1221 #define CGEMM_DEFAULT_UNROLL_M 4
1222 #define ZGEMM_DEFAULT_UNROLL_M 2
1223 #define XGEMM_DEFAULT_UNROLL_M 1
1225 #define SGEMM_DEFAULT_UNROLL_N 4
1226 #define DGEMM_DEFAULT_UNROLL_N 4
1227 #define QGEMM_DEFAULT_UNROLL_N 2
1228 #define CGEMM_DEFAULT_UNROLL_N 2
1229 #define ZGEMM_DEFAULT_UNROLL_N 2
1230 #define XGEMM_DEFAULT_UNROLL_N 1
1233 #define SGEMM_DEFAULT_P sgemm_p
1234 #define SGEMM_DEFAULT_R sgemm_r
1236 #define DGEMM_DEFAULT_P dgemm_p
1237 #define DGEMM_DEFAULT_R dgemm_r
1239 #define QGEMM_DEFAULT_P qgemm_p
1240 #define QGEMM_DEFAULT_R qgemm_r
1242 #define CGEMM_DEFAULT_P cgemm_p
1243 #define CGEMM_DEFAULT_R cgemm_r
1245 #define ZGEMM_DEFAULT_P zgemm_p
1246 #define ZGEMM_DEFAULT_R zgemm_r
1248 #define XGEMM_DEFAULT_P xgemm_p
1249 #define XGEMM_DEFAULT_R xgemm_r
1251 #define SGEMM_DEFAULT_Q 512
1252 #define DGEMM_DEFAULT_Q 256
1253 #define QGEMM_DEFAULT_Q 128
1254 #define CGEMM_DEFAULT_Q 512
1255 #define ZGEMM_DEFAULT_Q 256
1256 #define XGEMM_DEFAULT_Q 128
1258 #define GETRF_FACTOR 0.75
1266 #define GEMM_DEFAULT_OFFSET_A 128
1267 #define GEMM_DEFAULT_OFFSET_B 0
1268 #define GEMM_DEFAULT_ALIGN 0x03fffUL
1272 #define SWITCH_RATIO 4
1275 #define SGEMM_DEFAULT_UNROLL_M 4
1276 #define DGEMM_DEFAULT_UNROLL_M 2
1277 #define QGEMM_DEFAULT_UNROLL_M 2
1278 #define CGEMM_DEFAULT_UNROLL_M 2
1279 #define ZGEMM_DEFAULT_UNROLL_M 1
1280 #define XGEMM_DEFAULT_UNROLL_M 1
1282 #define SGEMM_DEFAULT_UNROLL_N 4
1283 #define DGEMM_DEFAULT_UNROLL_N 4
1284 #define QGEMM_DEFAULT_UNROLL_N 2
1285 #define CGEMM_DEFAULT_UNROLL_N 2
1286 #define ZGEMM_DEFAULT_UNROLL_N 2
1287 #define XGEMM_DEFAULT_UNROLL_N 1
1289 #define SGEMM_DEFAULT_UNROLL_M 8
1290 #define DGEMM_DEFAULT_UNROLL_M 4
1291 #define QGEMM_DEFAULT_UNROLL_M 2
1292 #define CGEMM_DEFAULT_UNROLL_M 4
1293 #define ZGEMM_DEFAULT_UNROLL_M 2
1294 #define XGEMM_DEFAULT_UNROLL_M 1
1296 #define SGEMM_DEFAULT_UNROLL_N 4
1297 #define DGEMM_DEFAULT_UNROLL_N 4
1298 #define QGEMM_DEFAULT_UNROLL_N 2
1299 #define CGEMM_DEFAULT_UNROLL_N 2
1300 #define ZGEMM_DEFAULT_UNROLL_N 2
1301 #define XGEMM_DEFAULT_UNROLL_N 1
1304 #define SGEMM_DEFAULT_P sgemm_p
1305 #define SGEMM_DEFAULT_R sgemm_r
1307 #define DGEMM_DEFAULT_P dgemm_p
1308 #define DGEMM_DEFAULT_R dgemm_r
1310 #define QGEMM_DEFAULT_P qgemm_p
1311 #define QGEMM_DEFAULT_R qgemm_r
1313 #define CGEMM_DEFAULT_P cgemm_p
1314 #define CGEMM_DEFAULT_R cgemm_r
1316 #define ZGEMM_DEFAULT_P zgemm_p
1317 #define ZGEMM_DEFAULT_R zgemm_r
1319 #define XGEMM_DEFAULT_P xgemm_p
1320 #define XGEMM_DEFAULT_R xgemm_r
1322 #define SGEMM_DEFAULT_Q 768
1323 #define DGEMM_DEFAULT_Q 384
1324 #define QGEMM_DEFAULT_Q 192
1325 #define CGEMM_DEFAULT_Q 768
1326 #define ZGEMM_DEFAULT_Q 384
1327 #define XGEMM_DEFAULT_Q 192
1329 #define GETRF_FACTOR 0.75
1330 #define GEMM_THREAD gemm_thread_mn
1338 #define GEMM_DEFAULT_OFFSET_A 32
1339 #define GEMM_DEFAULT_OFFSET_B 0
1340 #define GEMM_DEFAULT_ALIGN 0x03fffUL
1344 #define SWITCH_RATIO 4
1347 #define SGEMM_DEFAULT_UNROLL_M 4
1348 #define DGEMM_DEFAULT_UNROLL_M 2
1349 #define QGEMM_DEFAULT_UNROLL_M 2
1350 #define CGEMM_DEFAULT_UNROLL_M 2
1351 #define ZGEMM_DEFAULT_UNROLL_M 1
1352 #define XGEMM_DEFAULT_UNROLL_M 1
1354 #define SGEMM_DEFAULT_UNROLL_N 4
1355 #define DGEMM_DEFAULT_UNROLL_N 4
1356 #define QGEMM_DEFAULT_UNROLL_N 2
1357 #define CGEMM_DEFAULT_UNROLL_N 2
1358 #define ZGEMM_DEFAULT_UNROLL_N 2
1359 #define XGEMM_DEFAULT_UNROLL_N 1
1361 #define SGEMM_DEFAULT_UNROLL_M 4
1362 #define DGEMM_DEFAULT_UNROLL_M 2
1363 #define QGEMM_DEFAULT_UNROLL_M 2
1364 #define CGEMM_DEFAULT_UNROLL_M 2
1365 #define ZGEMM_DEFAULT_UNROLL_M 1
1366 #define XGEMM_DEFAULT_UNROLL_M 1
1368 #define SGEMM_DEFAULT_UNROLL_N 8
1369 #define DGEMM_DEFAULT_UNROLL_N 8
1370 #define QGEMM_DEFAULT_UNROLL_N 2
1371 #define CGEMM_DEFAULT_UNROLL_N 4
1372 #define ZGEMM_DEFAULT_UNROLL_N 4
1373 #define XGEMM_DEFAULT_UNROLL_N 1
1376 #define SGEMM_DEFAULT_P 504
1377 #define SGEMM_DEFAULT_R sgemm_r
1379 #define DGEMM_DEFAULT_P 504
1380 #define DGEMM_DEFAULT_R dgemm_r
1382 #define QGEMM_DEFAULT_P 504
1383 #define QGEMM_DEFAULT_R qgemm_r
1385 #define CGEMM_DEFAULT_P 252
1386 #define CGEMM_DEFAULT_R cgemm_r
1388 #define ZGEMM_DEFAULT_P 252
1389 #define ZGEMM_DEFAULT_R zgemm_r
1391 #define XGEMM_DEFAULT_P 252
1392 #define XGEMM_DEFAULT_R xgemm_r
1394 #define SGEMM_DEFAULT_Q 512
1395 #define DGEMM_DEFAULT_Q 256
1396 #define QGEMM_DEFAULT_Q 128
1397 #define CGEMM_DEFAULT_Q 512
1398 #define ZGEMM_DEFAULT_Q 256
1399 #define XGEMM_DEFAULT_Q 128
1401 #define GETRF_FACTOR 0.72
1411 #define GEMM_DEFAULT_OFFSET_A 0
1412 #define GEMM_DEFAULT_OFFSET_B 0
1413 #define GEMM_DEFAULT_ALIGN 0x03fffUL
1417 #define SWITCH_RATIO 4
1420 #define SGEMM_DEFAULT_UNROLL_M 4
1421 #define DGEMM_DEFAULT_UNROLL_M 2
1422 #define QGEMM_DEFAULT_UNROLL_M 2
1423 #define CGEMM_DEFAULT_UNROLL_M 2
1424 #define ZGEMM_DEFAULT_UNROLL_M 1
1425 #define XGEMM_DEFAULT_UNROLL_M 1
1427 #define SGEMM_DEFAULT_UNROLL_N 4
1428 #define DGEMM_DEFAULT_UNROLL_N 4
1429 #define QGEMM_DEFAULT_UNROLL_N 2
1430 #define CGEMM_DEFAULT_UNROLL_N 2
1431 #define ZGEMM_DEFAULT_UNROLL_N 2
1432 #define XGEMM_DEFAULT_UNROLL_N 1
1434 #define SGEMM_DEFAULT_UNROLL_M 16
1435 #define DGEMM_DEFAULT_UNROLL_M 8
1436 #define QGEMM_DEFAULT_UNROLL_M 2
1437 #define CGEMM_DEFAULT_UNROLL_M 8
1438 #define ZGEMM_DEFAULT_UNROLL_M 1
1439 #define XGEMM_DEFAULT_UNROLL_M 1
1441 #define SGEMM_DEFAULT_UNROLL_N 4
1442 #define DGEMM_DEFAULT_UNROLL_N 4
1443 #define QGEMM_DEFAULT_UNROLL_N 2
1444 #define CGEMM_DEFAULT_UNROLL_N 2
1445 #define ZGEMM_DEFAULT_UNROLL_N 4
1446 #define XGEMM_DEFAULT_UNROLL_N 1
1449 #define SGEMM_DEFAULT_P 768
1450 #define SGEMM_DEFAULT_R sgemm_r
1451 //#define SGEMM_DEFAULT_R 1024
1453 #define DGEMM_DEFAULT_P 512
1454 #define DGEMM_DEFAULT_R dgemm_r
1455 //#define DGEMM_DEFAULT_R 1024
1457 #define QGEMM_DEFAULT_P 504
1458 #define QGEMM_DEFAULT_R qgemm_r
1460 #define CGEMM_DEFAULT_P 768
1461 #define CGEMM_DEFAULT_R cgemm_r
1462 //#define CGEMM_DEFAULT_R 1024
1464 #define ZGEMM_DEFAULT_P 512
1465 #define ZGEMM_DEFAULT_R zgemm_r
1466 //#define ZGEMM_DEFAULT_R 1024
1468 #define XGEMM_DEFAULT_P 252
1469 #define XGEMM_DEFAULT_R xgemm_r
1471 #define SGEMM_DEFAULT_Q 384
1472 #define DGEMM_DEFAULT_Q 256
1473 #define QGEMM_DEFAULT_Q 128
1474 #define CGEMM_DEFAULT_Q 512
1475 #define ZGEMM_DEFAULT_Q 192
1476 #define XGEMM_DEFAULT_Q 128
1478 #define CGEMM3M_DEFAULT_UNROLL_N 8
1479 #define CGEMM3M_DEFAULT_UNROLL_M 4
1480 #define ZGEMM3M_DEFAULT_UNROLL_N 8
1481 #define ZGEMM3M_DEFAULT_UNROLL_M 2
1483 #define CGEMM3M_DEFAULT_P 448
1484 #define ZGEMM3M_DEFAULT_P 224
1485 #define XGEMM3M_DEFAULT_P 112
1486 #define CGEMM3M_DEFAULT_Q 224
1487 #define ZGEMM3M_DEFAULT_Q 224
1488 #define XGEMM3M_DEFAULT_Q 224
1489 #define CGEMM3M_DEFAULT_R 12288
1490 #define ZGEMM3M_DEFAULT_R 12288
1491 #define XGEMM3M_DEFAULT_R 12288
1495 #define GETRF_FACTOR 0.72
1504 #define GEMM_DEFAULT_OFFSET_A 0
1505 #define GEMM_DEFAULT_OFFSET_B 0
1506 #define GEMM_DEFAULT_ALIGN 0x03fffUL
1510 #define SWITCH_RATIO 32
1511 #define GEMM_PREFERED_SIZE 16
1515 #define SGEMM_DEFAULT_UNROLL_M 4
1516 #define DGEMM_DEFAULT_UNROLL_M 2
1517 #define QGEMM_DEFAULT_UNROLL_M 2
1518 #define CGEMM_DEFAULT_UNROLL_M 2
1519 #define ZGEMM_DEFAULT_UNROLL_M 1
1520 #define XGEMM_DEFAULT_UNROLL_M 1
1522 #define SGEMM_DEFAULT_UNROLL_N 4
1523 #define DGEMM_DEFAULT_UNROLL_N 4
1524 #define QGEMM_DEFAULT_UNROLL_N 2
1525 #define CGEMM_DEFAULT_UNROLL_N 2
1526 #define ZGEMM_DEFAULT_UNROLL_N 2
1527 #define XGEMM_DEFAULT_UNROLL_N 1
1531 #define SGEMM_DEFAULT_UNROLL_M 16
1532 #define DGEMM_DEFAULT_UNROLL_M 4
1533 #define QGEMM_DEFAULT_UNROLL_M 2
1534 #define CGEMM_DEFAULT_UNROLL_M 8
1535 #define ZGEMM_DEFAULT_UNROLL_M 4
1536 #define XGEMM_DEFAULT_UNROLL_M 1
1538 #define SGEMM_DEFAULT_UNROLL_N 4
1539 #define DGEMM_DEFAULT_UNROLL_N 8
1540 #define QGEMM_DEFAULT_UNROLL_N 2
1541 #define CGEMM_DEFAULT_UNROLL_N 2
1542 #define ZGEMM_DEFAULT_UNROLL_N 2
1543 #define XGEMM_DEFAULT_UNROLL_N 1
1545 #define SGEMM_DEFAULT_UNROLL_MN 32
1546 #define DGEMM_DEFAULT_UNROLL_MN 32
1551 #define SGEMM_DEFAULT_P 512
1552 #define SGEMM_DEFAULT_R sgemm_r
1553 #define DGEMM_DEFAULT_P 512
1554 #define DGEMM_DEFAULT_R dgemm_r
1555 #define QGEMM_DEFAULT_P 504
1556 #define QGEMM_DEFAULT_R qgemm_r
1557 #define CGEMM_DEFAULT_P 128
1558 #define CGEMM_DEFAULT_R 1024
1559 #define ZGEMM_DEFAULT_P 512
1560 #define ZGEMM_DEFAULT_R zgemm_r
1561 #define XGEMM_DEFAULT_P 252
1562 #define XGEMM_DEFAULT_R xgemm_r
1563 #define SGEMM_DEFAULT_Q 256
1564 #define DGEMM_DEFAULT_Q 256
1565 #define QGEMM_DEFAULT_Q 128
1566 #define CGEMM_DEFAULT_Q 256
1567 #define ZGEMM_DEFAULT_Q 192
1568 #define XGEMM_DEFAULT_Q 128
1572 #define SGEMM_DEFAULT_P 768
1573 #define DGEMM_DEFAULT_P 512
1574 #define CGEMM_DEFAULT_P 384
1575 #define ZGEMM_DEFAULT_P 256
1578 #define SGEMM_DEFAULT_Q 320
1579 #define DGEMM_DEFAULT_Q 128
1581 #define SGEMM_DEFAULT_Q 384
1582 #define DGEMM_DEFAULT_Q 256
1584 #define CGEMM_DEFAULT_Q 192
1585 #define ZGEMM_DEFAULT_Q 128
1587 #define SGEMM_DEFAULT_R sgemm_r
1588 #define DGEMM_DEFAULT_R 13824
1589 #define CGEMM_DEFAULT_R cgemm_r
1590 #define ZGEMM_DEFAULT_R zgemm_r
1592 #define QGEMM_DEFAULT_Q 128
1593 #define QGEMM_DEFAULT_P 504
1594 #define QGEMM_DEFAULT_R qgemm_r
1595 #define XGEMM_DEFAULT_P 252
1596 #define XGEMM_DEFAULT_R xgemm_r
1597 #define XGEMM_DEFAULT_Q 128
1599 #define CGEMM3M_DEFAULT_UNROLL_N 8
1600 #define CGEMM3M_DEFAULT_UNROLL_M 4
1601 #define ZGEMM3M_DEFAULT_UNROLL_N 8
1602 #define ZGEMM3M_DEFAULT_UNROLL_M 2
1604 #define CGEMM3M_DEFAULT_P 448
1605 #define ZGEMM3M_DEFAULT_P 224
1606 #define XGEMM3M_DEFAULT_P 112
1607 #define CGEMM3M_DEFAULT_Q 224
1608 #define ZGEMM3M_DEFAULT_Q 224
1609 #define XGEMM3M_DEFAULT_Q 224
1610 #define CGEMM3M_DEFAULT_R 12288
1611 #define ZGEMM3M_DEFAULT_R 12288
1612 #define XGEMM3M_DEFAULT_R 12288
1624 #define GEMM_DEFAULT_OFFSET_A 0
1625 #define GEMM_DEFAULT_OFFSET_B 0
1626 #define GEMM_DEFAULT_ALIGN 0x03fffUL
1630 #define SWITCH_RATIO 32
1631 #define GEMM_PREFERED_SIZE 32
1632 #define USE_SGEMM_KERNEL_DIRECT 1
1636 #define SGEMM_DEFAULT_UNROLL_M 4
1637 #define DGEMM_DEFAULT_UNROLL_M 2
1638 #define QGEMM_DEFAULT_UNROLL_M 2
1639 #define CGEMM_DEFAULT_UNROLL_M 2
1640 #define ZGEMM_DEFAULT_UNROLL_M 1
1641 #define XGEMM_DEFAULT_UNROLL_M 1
1643 #define SGEMM_DEFAULT_UNROLL_N 4
1644 #define DGEMM_DEFAULT_UNROLL_N 4
1645 #define QGEMM_DEFAULT_UNROLL_N 2
1646 #define CGEMM_DEFAULT_UNROLL_N 2
1647 #define ZGEMM_DEFAULT_UNROLL_N 2
1648 #define XGEMM_DEFAULT_UNROLL_N 1
1652 #define SGEMM_DEFAULT_UNROLL_M 16
1653 #define DGEMM_DEFAULT_UNROLL_M 4
1654 #define QGEMM_DEFAULT_UNROLL_M 2
1655 #define CGEMM_DEFAULT_UNROLL_M 8
1656 #define ZGEMM_DEFAULT_UNROLL_M 4
1657 #define XGEMM_DEFAULT_UNROLL_M 1
1659 #define SGEMM_DEFAULT_UNROLL_N 4
1660 #define DGEMM_DEFAULT_UNROLL_N 8
1661 #define QGEMM_DEFAULT_UNROLL_N 2
1662 #define CGEMM_DEFAULT_UNROLL_N 2
1663 #define ZGEMM_DEFAULT_UNROLL_N 2
1664 #define XGEMM_DEFAULT_UNROLL_N 1
1666 #define SGEMM_DEFAULT_UNROLL_MN 32
1667 #define DGEMM_DEFAULT_UNROLL_MN 32
1672 #define SGEMM_DEFAULT_P 512
1673 #define SGEMM_DEFAULT_R sgemm_r
1674 #define DGEMM_DEFAULT_P 512
1675 #define DGEMM_DEFAULT_R dgemm_r
1676 #define QGEMM_DEFAULT_P 504
1677 #define QGEMM_DEFAULT_R qgemm_r
1678 #define CGEMM_DEFAULT_P 128
1679 #define CGEMM_DEFAULT_R 1024
1680 #define ZGEMM_DEFAULT_P 512
1681 #define ZGEMM_DEFAULT_R zgemm_r
1682 #define XGEMM_DEFAULT_P 252
1683 #define XGEMM_DEFAULT_R xgemm_r
1684 #define SGEMM_DEFAULT_Q 256
1685 #define DGEMM_DEFAULT_Q 256
1686 #define QGEMM_DEFAULT_Q 128
1687 #define CGEMM_DEFAULT_Q 256
1688 #define ZGEMM_DEFAULT_Q 192
1689 #define XGEMM_DEFAULT_Q 128
1693 #define SGEMM_DEFAULT_P 768
1694 #define DGEMM_DEFAULT_P 512
1695 #define CGEMM_DEFAULT_P 384
1696 #define ZGEMM_DEFAULT_P 256
1699 #define SGEMM_DEFAULT_Q 320
1700 #define DGEMM_DEFAULT_Q 128
1702 #define SGEMM_DEFAULT_Q 384
1703 #define DGEMM_DEFAULT_Q 256
1705 #define CGEMM_DEFAULT_Q 192
1706 #define ZGEMM_DEFAULT_Q 128
1708 #define SGEMM_DEFAULT_R sgemm_r
1709 #define DGEMM_DEFAULT_R 13824
1710 #define CGEMM_DEFAULT_R cgemm_r
1711 #define ZGEMM_DEFAULT_R zgemm_r
1713 #define QGEMM_DEFAULT_Q 128
1714 #define QGEMM_DEFAULT_P 504
1715 #define QGEMM_DEFAULT_R qgemm_r
1716 #define XGEMM_DEFAULT_P 252
1717 #define XGEMM_DEFAULT_R xgemm_r
1718 #define XGEMM_DEFAULT_Q 128
1720 #define CGEMM3M_DEFAULT_UNROLL_N 8
1721 #define CGEMM3M_DEFAULT_UNROLL_M 4
1722 #define ZGEMM3M_DEFAULT_UNROLL_N 8
1723 #define ZGEMM3M_DEFAULT_UNROLL_M 2
1725 #define CGEMM3M_DEFAULT_P 448
1726 #define ZGEMM3M_DEFAULT_P 224
1727 #define XGEMM3M_DEFAULT_P 112
1728 #define CGEMM3M_DEFAULT_Q 224
1729 #define ZGEMM3M_DEFAULT_Q 224
1730 #define XGEMM3M_DEFAULT_Q 224
1731 #define CGEMM3M_DEFAULT_R 12288
1732 #define ZGEMM3M_DEFAULT_R 12288
1733 #define XGEMM3M_DEFAULT_R 12288
1747 #define GEMM_DEFAULT_OFFSET_A 64
1748 #define GEMM_DEFAULT_OFFSET_B 0
1749 #define GEMM_DEFAULT_ALIGN 0x0ffffUL
1754 #define SGEMM_DEFAULT_UNROLL_M 4
1755 #define DGEMM_DEFAULT_UNROLL_M 2
1756 #define QGEMM_DEFAULT_UNROLL_M 2
1757 #define CGEMM_DEFAULT_UNROLL_M 2
1758 #define ZGEMM_DEFAULT_UNROLL_M 1
1759 #define XGEMM_DEFAULT_UNROLL_M 1
1761 #define SGEMM_DEFAULT_UNROLL_M 8
1762 #define DGEMM_DEFAULT_UNROLL_M 4
1763 #define QGEMM_DEFAULT_UNROLL_M 2
1764 #define CGEMM_DEFAULT_UNROLL_M 4
1765 #define ZGEMM_DEFAULT_UNROLL_M 2
1766 #define XGEMM_DEFAULT_UNROLL_M 1
1769 #define SGEMM_DEFAULT_UNROLL_N 4
1770 #define DGEMM_DEFAULT_UNROLL_N 2
1771 #define QGEMM_DEFAULT_UNROLL_N 2
1772 #define CGEMM_DEFAULT_UNROLL_N 2
1773 #define ZGEMM_DEFAULT_UNROLL_N 1
1774 #define XGEMM_DEFAULT_UNROLL_N 1
1776 #define SGEMM_DEFAULT_P sgemm_p
1777 #define SGEMM_DEFAULT_R sgemm_r
1779 #define DGEMM_DEFAULT_P dgemm_p
1780 #define DGEMM_DEFAULT_R dgemm_r
1782 #define QGEMM_DEFAULT_P qgemm_p
1783 #define QGEMM_DEFAULT_R qgemm_r
1785 #define CGEMM_DEFAULT_P cgemm_p
1786 #define CGEMM_DEFAULT_R cgemm_r
1788 #define ZGEMM_DEFAULT_P zgemm_p
1789 #define ZGEMM_DEFAULT_R zgemm_r
1791 #define XGEMM_DEFAULT_P xgemm_p
1792 #define XGEMM_DEFAULT_R xgemm_r
1794 #define SGEMM_DEFAULT_Q 256
1795 #define DGEMM_DEFAULT_Q 256
1796 #define QGEMM_DEFAULT_Q 256
1797 #define CGEMM_DEFAULT_Q 256
1798 #define ZGEMM_DEFAULT_Q 256
1799 #define XGEMM_DEFAULT_Q 256
1809 #define GEMM_DEFAULT_OFFSET_A 0
1810 #define GEMM_DEFAULT_OFFSET_B 128
1811 #define GEMM_DEFAULT_ALIGN 0x03fffUL
1813 #define SGEMM_DEFAULT_UNROLL_M 8
1814 #define SGEMM_DEFAULT_UNROLL_N 8
1815 #define DGEMM_DEFAULT_UNROLL_M 8
1816 #define DGEMM_DEFAULT_UNROLL_N 8
1817 #define QGEMM_DEFAULT_UNROLL_M 8
1818 #define QGEMM_DEFAULT_UNROLL_N 8
1819 #define CGEMM_DEFAULT_UNROLL_M 4
1820 #define CGEMM_DEFAULT_UNROLL_N 4
1821 #define ZGEMM_DEFAULT_UNROLL_M 4
1822 #define ZGEMM_DEFAULT_UNROLL_N 4
1823 #define XGEMM_DEFAULT_UNROLL_M 4
1824 #define XGEMM_DEFAULT_UNROLL_N 4
1826 #define SGEMM_DEFAULT_P sgemm_p
1827 #define DGEMM_DEFAULT_P dgemm_p
1828 #define QGEMM_DEFAULT_P qgemm_p
1829 #define CGEMM_DEFAULT_P cgemm_p
1830 #define ZGEMM_DEFAULT_P zgemm_p
1831 #define XGEMM_DEFAULT_P xgemm_p
1833 #define SGEMM_DEFAULT_Q 1024
1834 #define DGEMM_DEFAULT_Q 1024
1835 #define QGEMM_DEFAULT_Q 1024
1836 #define CGEMM_DEFAULT_Q 1024
1837 #define ZGEMM_DEFAULT_Q 1024
1838 #define XGEMM_DEFAULT_Q 1024
1840 #define SGEMM_DEFAULT_R sgemm_r
1841 #define DGEMM_DEFAULT_R dgemm_r
1842 #define QGEMM_DEFAULT_R qgemm_r
1843 #define CGEMM_DEFAULT_R cgemm_r
1844 #define ZGEMM_DEFAULT_R zgemm_r
1845 #define XGEMM_DEFAULT_R xgemm_r
1849 #define GETRF_FACTOR 0.65
1853 #if defined(EV4) || defined(EV5) || defined(EV6)
1863 #define GEMM_DEFAULT_OFFSET_A 512
1864 #define GEMM_DEFAULT_OFFSET_B 512
1865 #define GEMM_DEFAULT_ALIGN 0x0ffffUL
1867 #define SGEMM_DEFAULT_UNROLL_M 4
1868 #define SGEMM_DEFAULT_UNROLL_N 4
1869 #define DGEMM_DEFAULT_UNROLL_M 4
1870 #define DGEMM_DEFAULT_UNROLL_N 4
1871 #define CGEMM_DEFAULT_UNROLL_M 2
1872 #define CGEMM_DEFAULT_UNROLL_N 2
1873 #define ZGEMM_DEFAULT_UNROLL_M 2
1874 #define ZGEMM_DEFAULT_UNROLL_N 2
1879 #define SGEMM_DEFAULT_P 32
1880 #define SGEMM_DEFAULT_Q 112
1881 #define SGEMM_DEFAULT_R 256
1883 #define DGEMM_DEFAULT_P 32
1884 #define DGEMM_DEFAULT_Q 56
1885 #define DGEMM_DEFAULT_R 256
1887 #define CGEMM_DEFAULT_P 32
1888 #define CGEMM_DEFAULT_Q 64
1889 #define CGEMM_DEFAULT_R 240
1891 #define ZGEMM_DEFAULT_P 32
1892 #define ZGEMM_DEFAULT_Q 32
1893 #define ZGEMM_DEFAULT_R 240
1897 #define SGEMM_DEFAULT_P 64
1898 #define SGEMM_DEFAULT_Q 256
1900 #define DGEMM_DEFAULT_P 64
1901 #define DGEMM_DEFAULT_Q 128
1903 #define CGEMM_DEFAULT_P 64
1904 #define CGEMM_DEFAULT_Q 128
1906 #define ZGEMM_DEFAULT_P 64
1907 #define ZGEMM_DEFAULT_Q 64
1911 #define SGEMM_DEFAULT_P 256
1912 #define SGEMM_DEFAULT_Q 512
1914 #define DGEMM_DEFAULT_P 256
1915 #define DGEMM_DEFAULT_Q 256
1917 #define CGEMM_DEFAULT_P 256
1918 #define CGEMM_DEFAULT_Q 256
1920 #define ZGEMM_DEFAULT_P 128
1921 #define ZGEMM_DEFAULT_Q 256
1931 #define GEMM_DEFAULT_OFFSET_A 0
1932 #define GEMM_DEFAULT_OFFSET_B 8192
1933 #define GEMM_DEFAULT_ALIGN 0x0ffffUL
1935 #define SGEMM_DEFAULT_UNROLL_M 16
1936 #define SGEMM_DEFAULT_UNROLL_N 4
1937 #define DGEMM_DEFAULT_UNROLL_M 4
1938 #define DGEMM_DEFAULT_UNROLL_N 4
1939 #define CGEMM_DEFAULT_UNROLL_M 8
1940 #define CGEMM_DEFAULT_UNROLL_N 2
1941 #define ZGEMM_DEFAULT_UNROLL_M 2
1942 #define ZGEMM_DEFAULT_UNROLL_N 2
1944 #define SGEMM_DEFAULT_P 128
1945 #define DGEMM_DEFAULT_P 128
1946 #define CGEMM_DEFAULT_P 128
1947 #define ZGEMM_DEFAULT_P 128
1949 #define SGEMM_DEFAULT_Q 512
1950 #define DGEMM_DEFAULT_Q 256
1951 #define CGEMM_DEFAULT_Q 256
1952 #define ZGEMM_DEFAULT_Q 128
1958 #define GEMM_DEFAULT_OFFSET_A 0
1959 #define GEMM_DEFAULT_OFFSET_B 1024
1960 #define GEMM_DEFAULT_ALIGN 0x0ffffUL
1962 #define SGEMM_DEFAULT_UNROLL_M 16
1963 #define SGEMM_DEFAULT_UNROLL_N 4
1964 #define DGEMM_DEFAULT_UNROLL_M 4
1965 #define DGEMM_DEFAULT_UNROLL_N 4
1966 #define CGEMM_DEFAULT_UNROLL_M 8
1967 #define CGEMM_DEFAULT_UNROLL_N 2
1968 #define ZGEMM_DEFAULT_UNROLL_M 2
1969 #define ZGEMM_DEFAULT_UNROLL_N 2
1971 #define SGEMM_DEFAULT_P 256
1972 #define DGEMM_DEFAULT_P 128
1973 #define CGEMM_DEFAULT_P 128
1974 #define ZGEMM_DEFAULT_P 64
1976 #define SGEMM_DEFAULT_Q 256
1977 #define DGEMM_DEFAULT_Q 256
1978 #define CGEMM_DEFAULT_Q 256
1979 #define ZGEMM_DEFAULT_Q 256
1989 #define GEMM_DEFAULT_OFFSET_A 2688
1990 #define GEMM_DEFAULT_OFFSET_B 3072
1991 #define GEMM_DEFAULT_ALIGN 0x03fffUL
1993 #define SGEMM_DEFAULT_UNROLL_M 16
1994 #define SGEMM_DEFAULT_UNROLL_N 4
1995 #define DGEMM_DEFAULT_UNROLL_M 4
1996 #define DGEMM_DEFAULT_UNROLL_N 4
1997 #define CGEMM_DEFAULT_UNROLL_M 8
1998 #define CGEMM_DEFAULT_UNROLL_N 2
1999 #define ZGEMM_DEFAULT_UNROLL_M 2
2000 #define ZGEMM_DEFAULT_UNROLL_N 2
2003 #if L2_SIZE == 1024976
2004 #define SGEMM_DEFAULT_P 320
2005 #define DGEMM_DEFAULT_P 256
2006 #define CGEMM_DEFAULT_P 256
2007 #define ZGEMM_DEFAULT_P 256
2009 #define SGEMM_DEFAULT_P 176
2010 #define DGEMM_DEFAULT_P 176
2011 #define CGEMM_DEFAULT_P 176
2012 #define ZGEMM_DEFAULT_P 176
2016 #define SGEMM_DEFAULT_Q 512
2017 #define DGEMM_DEFAULT_Q 256
2018 #define CGEMM_DEFAULT_Q 256
2019 #define ZGEMM_DEFAULT_Q 128
2030 #define GEMM_DEFAULT_OFFSET_A (32 * 0)
2031 #define GEMM_DEFAULT_OFFSET_B (32 * 0)
2032 #define GEMM_DEFAULT_ALIGN 0x0ffffUL
2034 #define SGEMM_DEFAULT_UNROLL_M 4
2035 #define SGEMM_DEFAULT_UNROLL_N 4
2036 #define DGEMM_DEFAULT_UNROLL_M 4
2037 #define DGEMM_DEFAULT_UNROLL_N 4
2038 #define CGEMM_DEFAULT_UNROLL_M 2
2039 #define CGEMM_DEFAULT_UNROLL_N 2
2040 #define ZGEMM_DEFAULT_UNROLL_M 2
2041 #define ZGEMM_DEFAULT_UNROLL_N 2
2043 #define SGEMM_DEFAULT_P 512
2044 #define DGEMM_DEFAULT_P 512
2045 #define CGEMM_DEFAULT_P 512
2046 #define ZGEMM_DEFAULT_P 512
2048 #define SGEMM_DEFAULT_Q 1024
2049 #define DGEMM_DEFAULT_Q 512
2050 #define CGEMM_DEFAULT_Q 512
2051 #define ZGEMM_DEFAULT_Q 256
2053 #define SGEMM_DEFAULT_R SGEMM_DEFAULT_P
2054 #define DGEMM_DEFAULT_R DGEMM_DEFAULT_P
2055 #define CGEMM_DEFAULT_R CGEMM_DEFAULT_P
2056 #define ZGEMM_DEFAULT_R ZGEMM_DEFAULT_P
2066 #define GEMM_DEFAULT_OFFSET_A (32 * 0)
2067 #define GEMM_DEFAULT_OFFSET_B (32 * 0)
2068 #define GEMM_DEFAULT_ALIGN 0x0ffffUL
2070 #define SGEMM_DEFAULT_UNROLL_M 8
2071 #define SGEMM_DEFAULT_UNROLL_N 4
2072 #define DGEMM_DEFAULT_UNROLL_M 8
2073 #define DGEMM_DEFAULT_UNROLL_N 4
2074 #define CGEMM_DEFAULT_UNROLL_M 4
2075 #define CGEMM_DEFAULT_UNROLL_N 2
2076 #define ZGEMM_DEFAULT_UNROLL_M 4
2077 #define ZGEMM_DEFAULT_UNROLL_N 2
2079 #define SGEMM_DEFAULT_P 128
2080 #define DGEMM_DEFAULT_P 128
2081 #define CGEMM_DEFAULT_P 128
2082 #define ZGEMM_DEFAULT_P 128
2084 #define SGEMM_DEFAULT_Q 4096
2085 #define DGEMM_DEFAULT_Q 3072
2086 #define CGEMM_DEFAULT_Q 2048
2087 #define ZGEMM_DEFAULT_Q 1024
2089 #define SGEMM_DEFAULT_Q 512
2090 #define DGEMM_DEFAULT_Q 256
2091 #define CGEMM_DEFAULT_Q 256
2092 #define ZGEMM_DEFAULT_Q 128
2100 #if defined(POWER3) || defined(POWER4) || defined(POWER5)
2101 #define GEMM_DEFAULT_OFFSET_A 0
2102 #define GEMM_DEFAULT_OFFSET_B 2048
2103 #define GEMM_DEFAULT_ALIGN 0x0ffffUL
2105 #define SGEMM_DEFAULT_UNROLL_M 4
2106 #define SGEMM_DEFAULT_UNROLL_N 4
2107 #define DGEMM_DEFAULT_UNROLL_M 4
2108 #define DGEMM_DEFAULT_UNROLL_N 4
2109 #define CGEMM_DEFAULT_UNROLL_M 2
2110 #define CGEMM_DEFAULT_UNROLL_N 2
2111 #define ZGEMM_DEFAULT_UNROLL_M 2
2112 #define ZGEMM_DEFAULT_UNROLL_N 2
2119 #define SGEMM_DEFAULT_P 256
2120 #define SGEMM_DEFAULT_Q 432
2121 #define SGEMM_DEFAULT_R 1012
2123 #define DGEMM_DEFAULT_P 256
2124 #define DGEMM_DEFAULT_Q 216
2125 #define DGEMM_DEFAULT_R 1012
2127 #define ZGEMM_DEFAULT_P 256
2128 #define ZGEMM_DEFAULT_Q 104
2129 #define ZGEMM_DEFAULT_R 1012
2133 #ifdef ALLOC_HUGETLB
2134 #define SGEMM_DEFAULT_P 184
2135 #define DGEMM_DEFAULT_P 184
2136 #define CGEMM_DEFAULT_P 184
2137 #define ZGEMM_DEFAULT_P 184
2139 #define SGEMM_DEFAULT_P 144
2140 #define DGEMM_DEFAULT_P 144
2141 #define CGEMM_DEFAULT_P 144
2142 #define ZGEMM_DEFAULT_P 144
2147 #ifdef ALLOC_HUGETLB
2148 #define SGEMM_DEFAULT_P 512
2149 #define DGEMM_DEFAULT_P 256
2150 #define CGEMM_DEFAULT_P 256
2151 #define ZGEMM_DEFAULT_P 128
2153 #define SGEMM_DEFAULT_P 320
2154 #define DGEMM_DEFAULT_P 160
2155 #define CGEMM_DEFAULT_P 160
2156 #define ZGEMM_DEFAULT_P 80
2159 #define SGEMM_DEFAULT_Q 256
2160 #define CGEMM_DEFAULT_Q 256
2161 #define DGEMM_DEFAULT_Q 256
2162 #define ZGEMM_DEFAULT_Q 256
2174 #define GEMM_DEFAULT_OFFSET_A 384
2175 #define GEMM_DEFAULT_OFFSET_B 1024
2176 #define GEMM_DEFAULT_ALIGN 0x03fffUL
2178 #define SGEMM_DEFAULT_UNROLL_M 4
2179 #define SGEMM_DEFAULT_UNROLL_N 4
2180 #define DGEMM_DEFAULT_UNROLL_M 4
2181 #define DGEMM_DEFAULT_UNROLL_N 4
2182 #define CGEMM_DEFAULT_UNROLL_M 2
2183 #define CGEMM_DEFAULT_UNROLL_N 4
2184 #define ZGEMM_DEFAULT_UNROLL_M 2
2185 #define ZGEMM_DEFAULT_UNROLL_N 4
2187 #define SGEMM_DEFAULT_P 992
2188 #define DGEMM_DEFAULT_P 480
2189 #define CGEMM_DEFAULT_P 488
2190 #define ZGEMM_DEFAULT_P 248
2192 #define SGEMM_DEFAULT_Q 504
2193 #define DGEMM_DEFAULT_Q 504
2194 #define CGEMM_DEFAULT_Q 400
2195 #define ZGEMM_DEFAULT_Q 400
2206 #define GEMM_DEFAULT_OFFSET_A 0
2207 #define GEMM_DEFAULT_OFFSET_B 65536
2208 #define GEMM_DEFAULT_ALIGN 0x0ffffUL
2210 #define SGEMM_DEFAULT_UNROLL_M 16
2211 #define SGEMM_DEFAULT_UNROLL_N 8
2212 #define DGEMM_DEFAULT_UNROLL_M 16
2213 #define DGEMM_DEFAULT_UNROLL_N 4
2214 #define CGEMM_DEFAULT_UNROLL_M 8
2215 #define CGEMM_DEFAULT_UNROLL_N 4
2216 #define ZGEMM_DEFAULT_UNROLL_M 8
2217 #define ZGEMM_DEFAULT_UNROLL_N 2
2219 #define SGEMM_DEFAULT_P 1280
2220 #define DGEMM_DEFAULT_P 640
2221 #define CGEMM_DEFAULT_P 640
2222 #define ZGEMM_DEFAULT_P 320
2224 #define SGEMM_DEFAULT_Q 640
2225 #define DGEMM_DEFAULT_Q 720
2226 #define CGEMM_DEFAULT_Q 640
2227 #define ZGEMM_DEFAULT_Q 640
2234 #if defined(SPARC) && defined(V7)
2239 #define GEMM_DEFAULT_OFFSET_A 0
2240 #define GEMM_DEFAULT_OFFSET_B 2048
2241 #define GEMM_DEFAULT_ALIGN 0x03fffUL
2243 #define SGEMM_DEFAULT_UNROLL_M 2
2244 #define SGEMM_DEFAULT_UNROLL_N 8
2245 #define DGEMM_DEFAULT_UNROLL_M 2
2246 #define DGEMM_DEFAULT_UNROLL_N 8
2247 #define CGEMM_DEFAULT_UNROLL_M 1
2248 #define CGEMM_DEFAULT_UNROLL_N 4
2249 #define ZGEMM_DEFAULT_UNROLL_M 1
2250 #define ZGEMM_DEFAULT_UNROLL_N 4
2252 #define SGEMM_DEFAULT_P 256
2253 #define DGEMM_DEFAULT_P 256
2254 #define CGEMM_DEFAULT_P 256
2255 #define ZGEMM_DEFAULT_P 256
2257 #define SGEMM_DEFAULT_Q 512
2258 #define DGEMM_DEFAULT_Q 256
2259 #define CGEMM_DEFAULT_Q 256
2260 #define ZGEMM_DEFAULT_Q 128
2263 #define GEMM_THREAD gemm_thread_mn
2266 #if (defined(SPARC) && defined(V9)) || defined(__sparc_v9__)
2271 #define GEMM_DEFAULT_OFFSET_A 0
2272 #define GEMM_DEFAULT_OFFSET_B 2048
2273 #define GEMM_DEFAULT_ALIGN 0x03fffUL
2275 #define SGEMM_DEFAULT_UNROLL_M 4
2276 #define SGEMM_DEFAULT_UNROLL_N 4
2277 #define DGEMM_DEFAULT_UNROLL_M 4
2278 #define DGEMM_DEFAULT_UNROLL_N 4
2279 #define CGEMM_DEFAULT_UNROLL_M 2
2280 #define CGEMM_DEFAULT_UNROLL_N 2
2281 #define ZGEMM_DEFAULT_UNROLL_M 2
2282 #define ZGEMM_DEFAULT_UNROLL_N 2
2284 #define SGEMM_DEFAULT_P 512
2285 #define DGEMM_DEFAULT_P 512
2286 #define CGEMM_DEFAULT_P 512
2287 #define ZGEMM_DEFAULT_P 512
2289 #define SGEMM_DEFAULT_Q 1024
2290 #define DGEMM_DEFAULT_Q 512
2291 #define CGEMM_DEFAULT_Q 512
2292 #define ZGEMM_DEFAULT_Q 256
2302 #define GEMM_DEFAULT_OFFSET_A 0
2303 #define GEMM_DEFAULT_OFFSET_B 0
2304 #define GEMM_DEFAULT_ALIGN 0x03fffUL
2306 #define SGEMM_DEFAULT_UNROLL_M 2
2307 #define SGEMM_DEFAULT_UNROLL_N 8
2308 #define DGEMM_DEFAULT_UNROLL_M 2
2309 #define DGEMM_DEFAULT_UNROLL_N 8
2310 #define CGEMM_DEFAULT_UNROLL_M 1
2311 #define CGEMM_DEFAULT_UNROLL_N 4
2312 #define ZGEMM_DEFAULT_UNROLL_M 1
2313 #define ZGEMM_DEFAULT_UNROLL_N 4
2315 #define SGEMM_DEFAULT_P 108
2316 #define DGEMM_DEFAULT_P 112
2317 #define CGEMM_DEFAULT_P 108
2318 #define ZGEMM_DEFAULT_P 112
2320 #define SGEMM_DEFAULT_Q 288
2321 #define DGEMM_DEFAULT_Q 144
2322 #define CGEMM_DEFAULT_Q 144
2323 #define ZGEMM_DEFAULT_Q 72
2325 #define SGEMM_DEFAULT_R 2000
2326 #define DGEMM_DEFAULT_R 2000
2327 #define CGEMM_DEFAULT_R 2000
2328 #define ZGEMM_DEFAULT_R 2000
2334 ////Copy from SICORTEX
2338 #define GEMM_DEFAULT_OFFSET_A 0
2339 #define GEMM_DEFAULT_OFFSET_B 0
2340 #define GEMM_DEFAULT_ALIGN 0x03fffUL
2342 #define SGEMM_DEFAULT_UNROLL_M 8
2343 #define SGEMM_DEFAULT_UNROLL_N 4
2345 #define DGEMM_DEFAULT_UNROLL_M 4
2346 #define DGEMM_DEFAULT_UNROLL_N 4
2348 #define CGEMM_DEFAULT_UNROLL_M 4
2349 #define CGEMM_DEFAULT_UNROLL_N 2
2351 #define ZGEMM_DEFAULT_UNROLL_M 2
2352 #define ZGEMM_DEFAULT_UNROLL_N 2
2354 #define SGEMM_DEFAULT_P 64
2355 #define DGEMM_DEFAULT_P 44
2356 #define CGEMM_DEFAULT_P 64
2357 #define ZGEMM_DEFAULT_P 32
2359 #define SGEMM_DEFAULT_Q 192
2360 #define DGEMM_DEFAULT_Q 92
2361 #define CGEMM_DEFAULT_Q 128
2362 #define ZGEMM_DEFAULT_Q 80
2364 #define SGEMM_DEFAULT_R 640
2365 #define DGEMM_DEFAULT_R dgemm_r
2366 #define CGEMM_DEFAULT_R 640
2367 #define ZGEMM_DEFAULT_R 640
2369 #define GEMM_OFFSET_A1 0x10000
2370 #define GEMM_OFFSET_B1 0x100000
2379 #define GEMM_DEFAULT_OFFSET_A 0
2380 #define GEMM_DEFAULT_OFFSET_B 0
2381 #define GEMM_DEFAULT_ALIGN 0x03fffUL
2383 #define SGEMM_DEFAULT_UNROLL_M 2
2384 #define SGEMM_DEFAULT_UNROLL_N 2
2386 #define DGEMM_DEFAULT_UNROLL_M 2
2387 #define DGEMM_DEFAULT_UNROLL_N 2
2389 #define CGEMM_DEFAULT_UNROLL_M 2
2390 #define CGEMM_DEFAULT_UNROLL_N 2
2392 #define ZGEMM_DEFAULT_UNROLL_M 2
2393 #define ZGEMM_DEFAULT_UNROLL_N 2
2395 #define SGEMM_DEFAULT_P 64
2396 #define DGEMM_DEFAULT_P 24
2397 #define CGEMM_DEFAULT_P 24
2398 #define ZGEMM_DEFAULT_P 20
2400 #define SGEMM_DEFAULT_Q 192
2401 #define DGEMM_DEFAULT_Q 128
2402 #define CGEMM_DEFAULT_Q 128
2403 #define ZGEMM_DEFAULT_Q 64
2405 #define SGEMM_DEFAULT_R 512
2406 #define DGEMM_DEFAULT_R 512
2407 #define CGEMM_DEFAULT_R 512
2408 #define ZGEMM_DEFAULT_R 512
2410 #define GEMM_OFFSET_A1 0x10000
2411 #define GEMM_OFFSET_B1 0x100000
2416 #if defined(P5600) || defined(MIPS1004K) || defined(I6400) || defined(P6600) || defined(I6500)
2420 #define GEMM_DEFAULT_OFFSET_A 0
2421 #define GEMM_DEFAULT_OFFSET_B 0
2422 #define GEMM_DEFAULT_ALIGN 0x03fffUL
2425 #define SGEMM_DEFAULT_UNROLL_M 8
2426 #define SGEMM_DEFAULT_UNROLL_N 8
2428 #define DGEMM_DEFAULT_UNROLL_M 8
2429 #define DGEMM_DEFAULT_UNROLL_N 4
2431 #define CGEMM_DEFAULT_UNROLL_M 8
2432 #define CGEMM_DEFAULT_UNROLL_N 4
2434 #define ZGEMM_DEFAULT_UNROLL_M 4
2435 #define ZGEMM_DEFAULT_UNROLL_N 4
2437 #define SGEMM_DEFAULT_UNROLL_M 2
2438 #define SGEMM_DEFAULT_UNROLL_N 2
2440 #define DGEMM_DEFAULT_UNROLL_M 2
2441 #define DGEMM_DEFAULT_UNROLL_N 2
2443 #define CGEMM_DEFAULT_UNROLL_M 2
2444 #define CGEMM_DEFAULT_UNROLL_N 2
2446 #define ZGEMM_DEFAULT_UNROLL_M 2
2447 #define ZGEMM_DEFAULT_UNROLL_N 2
2450 #define SGEMM_DEFAULT_P 128
2451 #define DGEMM_DEFAULT_P 128
2452 #define CGEMM_DEFAULT_P 96
2453 #define ZGEMM_DEFAULT_P 64
2455 #define SGEMM_DEFAULT_Q 240
2456 #define DGEMM_DEFAULT_Q 120
2457 #define CGEMM_DEFAULT_Q 120
2458 #define ZGEMM_DEFAULT_Q 120
2460 #define SGEMM_DEFAULT_R 12288
2461 #define DGEMM_DEFAULT_R 8192
2462 #define CGEMM_DEFAULT_R 4096
2463 #define ZGEMM_DEFAULT_R 4096
2472 #define GEMM_DEFAULT_OFFSET_A 0
2473 #define GEMM_DEFAULT_OFFSET_B 0
2474 #define GEMM_DEFAULT_ALIGN 0x03fffUL
2476 #define SGEMM_DEFAULT_UNROLL_M 4
2477 #define SGEMM_DEFAULT_UNROLL_N 4
2479 #define DGEMM_DEFAULT_UNROLL_M 4
2480 #define DGEMM_DEFAULT_UNROLL_N 4
2482 #define CGEMM_DEFAULT_UNROLL_M 2
2483 #define CGEMM_DEFAULT_UNROLL_N 2
2485 #define ZGEMM_DEFAULT_UNROLL_M 2
2486 #define ZGEMM_DEFAULT_UNROLL_N 2
2488 #define SGEMM_DEFAULT_P 128
2489 #define DGEMM_DEFAULT_P 128
2490 #define CGEMM_DEFAULT_P 96
2491 #define ZGEMM_DEFAULT_P 64
2493 #define SGEMM_DEFAULT_Q 240
2494 #define DGEMM_DEFAULT_Q 120
2495 #define CGEMM_DEFAULT_Q 120
2496 #define ZGEMM_DEFAULT_Q 120
2498 #define SGEMM_DEFAULT_R 12288
2499 #define DGEMM_DEFAULT_R 8192
2500 #define CGEMM_DEFAULT_R 4096
2501 #define ZGEMM_DEFAULT_R 4096
2513 #define GEMM_DEFAULT_OFFSET_A 0
2514 #define GEMM_DEFAULT_OFFSET_B 0
2515 #define GEMM_DEFAULT_ALIGN 0x03fffUL
2517 #define SGEMM_DEFAULT_UNROLL_M 4
2518 #define SGEMM_DEFAULT_UNROLL_N 2
2520 #define DGEMM_DEFAULT_UNROLL_M 4
2521 #define DGEMM_DEFAULT_UNROLL_N 2
2523 #define CGEMM_DEFAULT_UNROLL_M 2
2524 #define CGEMM_DEFAULT_UNROLL_N 2
2526 #define ZGEMM_DEFAULT_UNROLL_M 2
2527 #define ZGEMM_DEFAULT_UNROLL_N 2
2529 #define SGEMM_DEFAULT_P 128
2530 #define DGEMM_DEFAULT_P 128
2531 #define CGEMM_DEFAULT_P 96
2532 #define ZGEMM_DEFAULT_P 64
2534 #define SGEMM_DEFAULT_Q 240
2535 #define DGEMM_DEFAULT_Q 120
2536 #define CGEMM_DEFAULT_Q 120
2537 #define ZGEMM_DEFAULT_Q 120
2539 #define SGEMM_DEFAULT_R 12288
2540 #define DGEMM_DEFAULT_R 8192
2541 #define CGEMM_DEFAULT_R 4096
2542 #define ZGEMM_DEFAULT_R 4096
2548 // Common ARMv8 parameters
2554 #define GEMM_DEFAULT_OFFSET_A 0
2555 #define GEMM_DEFAULT_OFFSET_B 0
2556 #define GEMM_DEFAULT_ALIGN 0x03fffUL
2561 #if defined(OS_DARWIN) && defined(CROSS)
2563 #define SGEMM_DEFAULT_UNROLL_M 2
2564 #define SGEMM_DEFAULT_UNROLL_N 2
2566 #define DGEMM_DEFAULT_UNROLL_M 2
2567 #define DGEMM_DEFAULT_UNROLL_N 2
2569 #define CGEMM_DEFAULT_UNROLL_M 2
2570 #define CGEMM_DEFAULT_UNROLL_N 2
2572 #define ZGEMM_DEFAULT_UNROLL_M 2
2573 #define ZGEMM_DEFAULT_UNROLL_N 2
2575 #define SGEMM_DEFAULT_P 128
2576 #define DGEMM_DEFAULT_P 128
2577 #define CGEMM_DEFAULT_P 96
2578 #define ZGEMM_DEFAULT_P 64
2580 #define SGEMM_DEFAULT_Q 240
2581 #define DGEMM_DEFAULT_Q 120
2582 #define CGEMM_DEFAULT_Q 120
2583 #define ZGEMM_DEFAULT_Q 120
2585 #define SGEMM_DEFAULT_R 12288
2586 #define DGEMM_DEFAULT_R 8192
2587 #define CGEMM_DEFAULT_R 4096
2588 #define ZGEMM_DEFAULT_R 4096
2590 #else // Linux / Native
2592 #if defined(CORTEXA53) || defined(CORTEXA57) || \
2593 defined(CORTEXA72) || defined(CORTEXA73) || \
2596 #define SGEMM_DEFAULT_UNROLL_M 16
2597 #define SGEMM_DEFAULT_UNROLL_N 4
2599 #define DGEMM_DEFAULT_UNROLL_M 8
2600 #define DGEMM_DEFAULT_UNROLL_N 4
2602 #define CGEMM_DEFAULT_UNROLL_M 8
2603 #define CGEMM_DEFAULT_UNROLL_N 4
2605 #define ZGEMM_DEFAULT_UNROLL_M 4
2606 #define ZGEMM_DEFAULT_UNROLL_N 4
2608 #define SGEMM_DEFAULT_P 512
2609 #define DGEMM_DEFAULT_P 256
2610 #define CGEMM_DEFAULT_P 256
2611 #define ZGEMM_DEFAULT_P 128
2613 #define SGEMM_DEFAULT_Q 1024
2614 #define DGEMM_DEFAULT_Q 512
2615 #define CGEMM_DEFAULT_Q 512
2616 #define ZGEMM_DEFAULT_Q 512
2618 #define SGEMM_DEFAULT_R 4096
2619 #define DGEMM_DEFAULT_R 4096
2620 #define CGEMM_DEFAULT_R 4096
2621 #define ZGEMM_DEFAULT_R 2048
2623 #elif defined(THUNDERX)
2625 #define SGEMM_DEFAULT_UNROLL_M 4
2626 #define SGEMM_DEFAULT_UNROLL_N 4
2628 #define DGEMM_DEFAULT_UNROLL_M 2
2629 #define DGEMM_DEFAULT_UNROLL_N 2
2631 #define CGEMM_DEFAULT_UNROLL_M 2
2632 #define CGEMM_DEFAULT_UNROLL_N 2
2634 #define ZGEMM_DEFAULT_UNROLL_M 2
2635 #define ZGEMM_DEFAULT_UNROLL_N 2
2637 #define SGEMM_DEFAULT_P 128
2638 #define DGEMM_DEFAULT_P 128
2639 #define CGEMM_DEFAULT_P 96
2640 #define ZGEMM_DEFAULT_P 64
2642 #define SGEMM_DEFAULT_Q 240
2643 #define DGEMM_DEFAULT_Q 120
2644 #define CGEMM_DEFAULT_Q 120
2645 #define ZGEMM_DEFAULT_Q 120
2647 #define SGEMM_DEFAULT_R 12288
2648 #define DGEMM_DEFAULT_R 8192
2649 #define CGEMM_DEFAULT_R 4096
2650 #define ZGEMM_DEFAULT_R 4096
2652 #elif defined(THUNDERX2T99)
2654 #define SGEMM_DEFAULT_UNROLL_M 16
2655 #define SGEMM_DEFAULT_UNROLL_N 4
2657 #define DGEMM_DEFAULT_UNROLL_M 8
2658 #define DGEMM_DEFAULT_UNROLL_N 4
2660 #define CGEMM_DEFAULT_UNROLL_M 8
2661 #define CGEMM_DEFAULT_UNROLL_N 4
2663 #define ZGEMM_DEFAULT_UNROLL_M 4
2664 #define ZGEMM_DEFAULT_UNROLL_N 4
2666 #define SGEMM_DEFAULT_P 128
2667 #define DGEMM_DEFAULT_P 160
2668 #define CGEMM_DEFAULT_P 128
2669 #define ZGEMM_DEFAULT_P 128
2671 #define SGEMM_DEFAULT_Q 352
2672 #define DGEMM_DEFAULT_Q 128
2673 #define CGEMM_DEFAULT_Q 224
2674 #define ZGEMM_DEFAULT_Q 112
2676 #define SGEMM_DEFAULT_R 4096
2677 #define DGEMM_DEFAULT_R 4096
2678 #define CGEMM_DEFAULT_R 4096
2679 #define ZGEMM_DEFAULT_R 4096
2681 #else // Other/undetected ARMv8 cores
2683 #define SGEMM_DEFAULT_UNROLL_M 16
2684 #define SGEMM_DEFAULT_UNROLL_N 4
2686 #define DGEMM_DEFAULT_UNROLL_M 8
2687 #define DGEMM_DEFAULT_UNROLL_N 4
2689 #define CGEMM_DEFAULT_UNROLL_M 8
2690 #define CGEMM_DEFAULT_UNROLL_N 4
2692 #define ZGEMM_DEFAULT_UNROLL_M 4
2693 #define ZGEMM_DEFAULT_UNROLL_N 4
2695 #define SGEMM_DEFAULT_P 128
2696 #define DGEMM_DEFAULT_P 160
2697 #define CGEMM_DEFAULT_P 128
2698 #define ZGEMM_DEFAULT_P 128
2700 #define SGEMM_DEFAULT_Q 352
2701 #define DGEMM_DEFAULT_Q 128
2702 #define CGEMM_DEFAULT_Q 224
2703 #define ZGEMM_DEFAULT_Q 112
2705 #define SGEMM_DEFAULT_R 4096
2706 #define DGEMM_DEFAULT_R 4096
2707 #define CGEMM_DEFAULT_R 4096
2708 #define ZGEMM_DEFAULT_R 4096
2712 #endif // Linux / Darwin
2720 #define GEMM_DEFAULT_OFFSET_A 0
2721 #define GEMM_DEFAULT_OFFSET_B 0
2722 #define GEMM_DEFAULT_ALIGN 0x03fffUL
2724 #define SGEMM_DEFAULT_UNROLL_M 2
2725 #define SGEMM_DEFAULT_UNROLL_N 2
2727 #define DGEMM_DEFAULT_UNROLL_M 2
2728 #define DGEMM_DEFAULT_UNROLL_N 2
2730 #define CGEMM_DEFAULT_UNROLL_M 2
2731 #define CGEMM_DEFAULT_UNROLL_N 2
2733 #define ZGEMM_DEFAULT_UNROLL_M 2
2734 #define ZGEMM_DEFAULT_UNROLL_N 2
2736 #define SGEMM_DEFAULT_P 128
2737 #define DGEMM_DEFAULT_P 128
2738 #define CGEMM_DEFAULT_P 96
2739 #define ZGEMM_DEFAULT_P 64
2741 #define SGEMM_DEFAULT_Q 240
2742 #define DGEMM_DEFAULT_Q 120
2743 #define CGEMM_DEFAULT_Q 120
2744 #define ZGEMM_DEFAULT_Q 120
2746 #define SGEMM_DEFAULT_R 12288
2747 #define DGEMM_DEFAULT_R 8192
2748 #define CGEMM_DEFAULT_R 4096
2749 #define ZGEMM_DEFAULT_R 4096
2761 #define GEMM_DEFAULT_OFFSET_A 0
2762 #define GEMM_DEFAULT_OFFSET_B 0
2763 #define GEMM_DEFAULT_ALIGN 0x03fffUL
2765 #define SGEMM_DEFAULT_UNROLL_M 4
2766 #define SGEMM_DEFAULT_UNROLL_N 4
2768 #define DGEMM_DEFAULT_UNROLL_M 4
2769 #define DGEMM_DEFAULT_UNROLL_N 4
2771 #define CGEMM_DEFAULT_UNROLL_M 2
2772 #define CGEMM_DEFAULT_UNROLL_N 2
2774 #define ZGEMM_DEFAULT_UNROLL_M 2
2775 #define ZGEMM_DEFAULT_UNROLL_N 2
2777 #define SGEMM_DEFAULT_P 128
2778 #define DGEMM_DEFAULT_P 128
2779 #define CGEMM_DEFAULT_P 96
2780 #define ZGEMM_DEFAULT_P 64
2782 #define SGEMM_DEFAULT_Q 240
2783 #define DGEMM_DEFAULT_Q 120
2784 #define CGEMM_DEFAULT_Q 120
2785 #define ZGEMM_DEFAULT_Q 120
2787 #define SGEMM_DEFAULT_R 12288
2788 #define DGEMM_DEFAULT_R 8192
2789 #define CGEMM_DEFAULT_R 4096
2790 #define ZGEMM_DEFAULT_R 4096
2802 #define GEMM_DEFAULT_OFFSET_A 0
2803 #define GEMM_DEFAULT_OFFSET_B 0
2804 #define GEMM_DEFAULT_ALIGN 0x03fffUL
2806 #define SGEMM_DEFAULT_UNROLL_M 4
2807 #define SGEMM_DEFAULT_UNROLL_N 4
2809 #define DGEMM_DEFAULT_UNROLL_M 4
2810 #define DGEMM_DEFAULT_UNROLL_N 4
2812 #define CGEMM_DEFAULT_UNROLL_M 2
2813 #define CGEMM_DEFAULT_UNROLL_N 2
2815 #define ZGEMM_DEFAULT_UNROLL_M 2
2816 #define ZGEMM_DEFAULT_UNROLL_N 2
2818 #define SGEMM_DEFAULT_P 128
2819 #define DGEMM_DEFAULT_P 128
2820 #define CGEMM_DEFAULT_P 96
2821 #define ZGEMM_DEFAULT_P 64
2823 #define SGEMM_DEFAULT_Q 240
2824 #define DGEMM_DEFAULT_Q 120
2825 #define CGEMM_DEFAULT_Q 120
2826 #define ZGEMM_DEFAULT_Q 120
2828 #define SGEMM_DEFAULT_R 12288
2829 #define DGEMM_DEFAULT_R 8192
2830 #define CGEMM_DEFAULT_R 4096
2831 #define ZGEMM_DEFAULT_R 4096
2839 #if defined(ZARCH_GENERIC)
2843 #define GEMM_DEFAULT_OFFSET_A 0
2844 #define GEMM_DEFAULT_OFFSET_B 0
2845 #define GEMM_DEFAULT_ALIGN 0x03fffUL
2847 #define SGEMM_DEFAULT_UNROLL_M 2
2848 #define SGEMM_DEFAULT_UNROLL_N 2
2850 #define DGEMM_DEFAULT_UNROLL_M 2
2851 #define DGEMM_DEFAULT_UNROLL_N 2
2853 #define CGEMM_DEFAULT_UNROLL_M 2
2854 #define CGEMM_DEFAULT_UNROLL_N 2
2856 #define ZGEMM_DEFAULT_UNROLL_M 2
2857 #define ZGEMM_DEFAULT_UNROLL_N 2
2859 #define SGEMM_DEFAULT_P 128
2860 #define DGEMM_DEFAULT_P 128
2861 #define CGEMM_DEFAULT_P 96
2862 #define ZGEMM_DEFAULT_P 64
2864 #define SGEMM_DEFAULT_Q 240
2865 #define DGEMM_DEFAULT_Q 120
2866 #define CGEMM_DEFAULT_Q 120
2867 #define ZGEMM_DEFAULT_Q 120
2869 #define SGEMM_DEFAULT_R 12288
2870 #define DGEMM_DEFAULT_R 8192
2871 #define CGEMM_DEFAULT_R 4096
2872 #define ZGEMM_DEFAULT_R 4096
2882 #define GEMM_DEFAULT_OFFSET_A 0
2883 #define GEMM_DEFAULT_OFFSET_B 0
2884 #define GEMM_DEFAULT_ALIGN 0x03fffUL
2886 #define SGEMM_DEFAULT_UNROLL_M 8
2887 #define SGEMM_DEFAULT_UNROLL_N 4
2889 #define DGEMM_DEFAULT_UNROLL_M 8
2890 #define DGEMM_DEFAULT_UNROLL_N 4
2892 #define CGEMM_DEFAULT_UNROLL_M 4
2893 #define CGEMM_DEFAULT_UNROLL_N 4
2895 #define ZGEMM_DEFAULT_UNROLL_M 4
2896 #define ZGEMM_DEFAULT_UNROLL_N 4
2898 #define SGEMM_DEFAULT_P 456
2899 #define DGEMM_DEFAULT_P 320
2900 #define CGEMM_DEFAULT_P 480
2901 #define ZGEMM_DEFAULT_P 224
2903 #define SGEMM_DEFAULT_Q 488
2904 #define DGEMM_DEFAULT_Q 384
2905 #define CGEMM_DEFAULT_Q 128
2906 #define ZGEMM_DEFAULT_Q 352
2908 #define SGEMM_DEFAULT_R 8192
2909 #define DGEMM_DEFAULT_R 4096
2910 #define CGEMM_DEFAULT_R 4096
2911 #define ZGEMM_DEFAULT_R 2048
2922 #define GEMM_DEFAULT_OFFSET_A 0
2923 #define GEMM_DEFAULT_OFFSET_B 0
2924 #define GEMM_DEFAULT_ALIGN 0x03fffUL
2926 #define SGEMM_DEFAULT_UNROLL_M 8
2927 #define SGEMM_DEFAULT_UNROLL_N 4
2929 #define DGEMM_DEFAULT_UNROLL_M 8
2930 #define DGEMM_DEFAULT_UNROLL_N 4
2932 #define CGEMM_DEFAULT_UNROLL_M 4
2933 #define CGEMM_DEFAULT_UNROLL_N 4
2935 #define ZGEMM_DEFAULT_UNROLL_M 4
2936 #define ZGEMM_DEFAULT_UNROLL_N 4
2938 #define SGEMM_DEFAULT_P 456
2939 #define DGEMM_DEFAULT_P 320
2940 #define CGEMM_DEFAULT_P 480
2941 #define ZGEMM_DEFAULT_P 224
2943 #define SGEMM_DEFAULT_Q 488
2944 #define DGEMM_DEFAULT_Q 384
2945 #define CGEMM_DEFAULT_Q 128
2946 #define ZGEMM_DEFAULT_Q 352
2948 #define SGEMM_DEFAULT_R 8192
2949 #define DGEMM_DEFAULT_R 4096
2950 #define CGEMM_DEFAULT_R 4096
2951 #define ZGEMM_DEFAULT_R 2048
2964 #define GEMM_DEFAULT_OFFSET_A 0
2965 #define GEMM_DEFAULT_OFFSET_B 0
2966 #define GEMM_DEFAULT_ALIGN 0x0ffffUL
2968 #define SGEMM_DEFAULT_UNROLL_N 2
2969 #define DGEMM_DEFAULT_UNROLL_N 2
2970 #define QGEMM_DEFAULT_UNROLL_N 2
2971 #define CGEMM_DEFAULT_UNROLL_N 2
2972 #define ZGEMM_DEFAULT_UNROLL_N 2
2973 #define XGEMM_DEFAULT_UNROLL_N 1
2976 #define SGEMM_DEFAULT_UNROLL_M 2
2977 #define DGEMM_DEFAULT_UNROLL_M 2
2978 #define QGEMM_DEFAULT_UNROLL_M 2
2979 #define CGEMM_DEFAULT_UNROLL_M 2
2980 #define ZGEMM_DEFAULT_UNROLL_M 2
2981 #define XGEMM_DEFAULT_UNROLL_M 1
2983 #define SGEMM_DEFAULT_UNROLL_M 2
2984 #define DGEMM_DEFAULT_UNROLL_M 2
2985 #define QGEMM_DEFAULT_UNROLL_M 2
2986 #define CGEMM_DEFAULT_UNROLL_M 2
2987 #define ZGEMM_DEFAULT_UNROLL_M 2
2988 #define XGEMM_DEFAULT_UNROLL_M 1
2991 #define SGEMM_DEFAULT_P sgemm_p
2992 #define DGEMM_DEFAULT_P dgemm_p
2993 #define QGEMM_DEFAULT_P qgemm_p
2994 #define CGEMM_DEFAULT_P cgemm_p
2995 #define ZGEMM_DEFAULT_P zgemm_p
2996 #define XGEMM_DEFAULT_P xgemm_p
2998 #define SGEMM_DEFAULT_R sgemm_r
2999 #define DGEMM_DEFAULT_R dgemm_r
3000 #define QGEMM_DEFAULT_R qgemm_r
3001 #define CGEMM_DEFAULT_R cgemm_r
3002 #define ZGEMM_DEFAULT_R zgemm_r
3003 #define XGEMM_DEFAULT_R xgemm_r
3005 #define SGEMM_DEFAULT_Q 128
3006 #define DGEMM_DEFAULT_Q 128
3007 #define QGEMM_DEFAULT_Q 128
3008 #define CGEMM_DEFAULT_Q 128
3009 #define ZGEMM_DEFAULT_Q 128
3010 #define XGEMM_DEFAULT_Q 128
3016 #ifndef QGEMM_DEFAULT_UNROLL_M
3017 #define QGEMM_DEFAULT_UNROLL_M 2
3020 #ifndef QGEMM_DEFAULT_UNROLL_N
3021 #define QGEMM_DEFAULT_UNROLL_N 2
3024 #ifndef XGEMM_DEFAULT_UNROLL_M
3025 #define XGEMM_DEFAULT_UNROLL_M 2
3028 #ifndef XGEMM_DEFAULT_UNROLL_N
3029 #define XGEMM_DEFAULT_UNROLL_N 2
3033 #define SHUFPD_0 shufps $0x44,
3034 #define SHUFPD_1 shufps $0x4e,
3035 #define SHUFPD_2 shufps $0xe4,
3036 #define SHUFPD_3 shufps $0xee,
3040 #define SHUFPD_0 shufpd $0,
3044 #define SHUFPD_1 shufpd $1,
3048 #define SHUFPD_2 shufpd $2,
3052 #define SHUFPD_3 shufpd $3,
3056 #define SHUFPS_39 shufps $0x39,