1 /*****************************************************************************
2 Copyright (c) 2011-2014, The OpenBLAS Project
5 Redistribution and use in source and binary forms, with or without
6 modification, are permitted provided that the following conditions are
9 1. Redistributions of source code must retain the above copyright
10 notice, this list of conditions and the following disclaimer.
12 2. Redistributions in binary form must reproduce the above copyright
13 notice, this list of conditions and the following disclaimer in
14 the documentation and/or other materials provided with the
16 3. Neither the name of the OpenBLAS project nor the names of
17 its contributors may be used to endorse or promote products
18 derived from this software without specific prior written
21 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
25 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
27 SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28 CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
29 OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
30 USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 **********************************************************************************/
34 /*********************************************************************/
35 /* Copyright 2009, 2010 The University of Texas at Austin. */
36 /* All rights reserved. */
38 /* Redistribution and use in source and binary forms, with or */
39 /* without modification, are permitted provided that the following */
40 /* conditions are met: */
42 /* 1. Redistributions of source code must retain the above */
43 /* copyright notice, this list of conditions and the following */
46 /* 2. Redistributions in binary form must reproduce the above */
47 /* copyright notice, this list of conditions and the following */
48 /* disclaimer in the documentation and/or other materials */
49 /* provided with the distribution. */
51 /* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
52 /* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
53 /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
54 /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
55 /* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
56 /* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
57 /* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
58 /* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
59 /* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
60 /* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
61 /* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
62 /* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
63 /* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
64 /* POSSIBILITY OF SUCH DAMAGE. */
66 /* The views and conclusions contained in the software and */
67 /* documentation are those of the authors and should not be */
68 /* interpreted as representing official policies, either expressed */
69 /* or implied, of The University of Texas at Austin. */
70 /*********************************************************************/
80 #define GEMM_DEFAULT_OFFSET_A 64
81 #define GEMM_DEFAULT_OFFSET_B 256
82 #define GEMM_DEFAULT_ALIGN 0x01ffffUL
84 #define SGEMM_DEFAULT_UNROLL_N 4
85 #define DGEMM_DEFAULT_UNROLL_N 4
86 #define QGEMM_DEFAULT_UNROLL_N 2
87 #define CGEMM_DEFAULT_UNROLL_N 2
88 #define ZGEMM_DEFAULT_UNROLL_N 2
89 #define XGEMM_DEFAULT_UNROLL_N 1
92 #define SGEMM_DEFAULT_UNROLL_M 4
93 #define DGEMM_DEFAULT_UNROLL_M 2
94 #define QGEMM_DEFAULT_UNROLL_M 2
95 #define CGEMM_DEFAULT_UNROLL_M 2
96 #define ZGEMM_DEFAULT_UNROLL_M 1
97 #define XGEMM_DEFAULT_UNROLL_M 1
99 #define SGEMM_DEFAULT_UNROLL_M 8
100 #define DGEMM_DEFAULT_UNROLL_M 4
101 #define QGEMM_DEFAULT_UNROLL_M 2
102 #define CGEMM_DEFAULT_UNROLL_M 4
103 #define ZGEMM_DEFAULT_UNROLL_M 2
104 #define XGEMM_DEFAULT_UNROLL_M 1
107 #define SGEMM_DEFAULT_P sgemm_p
108 #define DGEMM_DEFAULT_P dgemm_p
109 #define QGEMM_DEFAULT_P qgemm_p
110 #define CGEMM_DEFAULT_P cgemm_p
111 #define ZGEMM_DEFAULT_P zgemm_p
112 #define XGEMM_DEFAULT_P xgemm_p
114 #define SGEMM_DEFAULT_R sgemm_r
115 #define DGEMM_DEFAULT_R dgemm_r
116 #define QGEMM_DEFAULT_R qgemm_r
117 #define CGEMM_DEFAULT_R cgemm_r
118 #define ZGEMM_DEFAULT_R zgemm_r
119 #define XGEMM_DEFAULT_R xgemm_r
123 #define SGEMM_DEFAULT_Q 248
124 #define DGEMM_DEFAULT_Q 248
125 #define QGEMM_DEFAULT_Q 248
126 #define CGEMM_DEFAULT_Q 248
127 #define ZGEMM_DEFAULT_Q 248
128 #define XGEMM_DEFAULT_Q 248
132 #define SGEMM_DEFAULT_Q 240
133 #define DGEMM_DEFAULT_Q 240
134 #define QGEMM_DEFAULT_Q 240
135 #define CGEMM_DEFAULT_Q 240
136 #define ZGEMM_DEFAULT_Q 240
137 #define XGEMM_DEFAULT_Q 240
143 #define HAVE_EXCLUSIVE_CACHE
147 #if defined(BARCELONA) || defined(SHANGHAI) || defined(BOBCAT)
152 #define GEMM_DEFAULT_OFFSET_A 64
153 #define GEMM_DEFAULT_OFFSET_B 832
154 #define GEMM_DEFAULT_ALIGN 0x0fffUL
156 #define SGEMM_DEFAULT_UNROLL_N 4
157 #define DGEMM_DEFAULT_UNROLL_N 4
158 #define QGEMM_DEFAULT_UNROLL_N 2
159 #define CGEMM_DEFAULT_UNROLL_N 2
160 #define ZGEMM_DEFAULT_UNROLL_N 2
161 #define XGEMM_DEFAULT_UNROLL_N 1
164 #define SGEMM_DEFAULT_UNROLL_M 4
165 #define DGEMM_DEFAULT_UNROLL_M 2
166 #define QGEMM_DEFAULT_UNROLL_M 2
167 #define CGEMM_DEFAULT_UNROLL_M 2
168 #define ZGEMM_DEFAULT_UNROLL_M 1
169 #define XGEMM_DEFAULT_UNROLL_M 1
171 #define SGEMM_DEFAULT_UNROLL_M 8
172 #define DGEMM_DEFAULT_UNROLL_M 4
173 #define QGEMM_DEFAULT_UNROLL_M 2
174 #define CGEMM_DEFAULT_UNROLL_M 4
175 #define ZGEMM_DEFAULT_UNROLL_M 2
176 #define XGEMM_DEFAULT_UNROLL_M 1
180 #define SGEMM_DEFAULT_P 496
181 #define DGEMM_DEFAULT_P 248
182 #define QGEMM_DEFAULT_P 124
183 #define CGEMM_DEFAULT_P 248
184 #define ZGEMM_DEFAULT_P 124
185 #define XGEMM_DEFAULT_P 62
187 #define SGEMM_DEFAULT_Q 248
188 #define DGEMM_DEFAULT_Q 248
189 #define QGEMM_DEFAULT_Q 248
190 #define CGEMM_DEFAULT_Q 248
191 #define ZGEMM_DEFAULT_Q 248
192 #define XGEMM_DEFAULT_Q 248
196 #define SGEMM_DEFAULT_P 448
197 #define DGEMM_DEFAULT_P 224
198 #define QGEMM_DEFAULT_P 112
199 #define CGEMM_DEFAULT_P 224
200 #define ZGEMM_DEFAULT_P 112
201 #define XGEMM_DEFAULT_P 56
203 #define SGEMM_DEFAULT_Q 224
204 #define DGEMM_DEFAULT_Q 224
205 #define QGEMM_DEFAULT_Q 224
206 #define CGEMM_DEFAULT_Q 224
207 #define ZGEMM_DEFAULT_Q 224
208 #define XGEMM_DEFAULT_Q 224
212 #define SGEMM_DEFAULT_R sgemm_r
213 #define QGEMM_DEFAULT_R qgemm_r
214 #define DGEMM_DEFAULT_R dgemm_r
215 #define CGEMM_DEFAULT_R cgemm_r
216 #define ZGEMM_DEFAULT_R zgemm_r
217 #define XGEMM_DEFAULT_R xgemm_r
220 #define HAVE_EXCLUSIVE_CACHE
222 #define GEMM_THREAD gemm_thread_mn
232 #define GEMM_DEFAULT_OFFSET_A 64
233 #define GEMM_DEFAULT_OFFSET_B 832
234 #define GEMM_DEFAULT_ALIGN 0x0fffUL
238 #define QGEMM_DEFAULT_UNROLL_N 2
239 #define CGEMM_DEFAULT_UNROLL_N 2
240 #define ZGEMM_DEFAULT_UNROLL_N 2
241 #define XGEMM_DEFAULT_UNROLL_N 1
244 #define SGEMM_DEFAULT_UNROLL_N 4
245 #define DGEMM_DEFAULT_UNROLL_N 4
246 #define SGEMM_DEFAULT_UNROLL_M 4
247 #define DGEMM_DEFAULT_UNROLL_M 2
248 #define QGEMM_DEFAULT_UNROLL_M 2
249 #define CGEMM_DEFAULT_UNROLL_M 2
250 #define ZGEMM_DEFAULT_UNROLL_M 1
251 #define XGEMM_DEFAULT_UNROLL_M 1
253 #define SGEMM_DEFAULT_UNROLL_N 2
254 #define DGEMM_DEFAULT_UNROLL_N 2
255 #define SGEMM_DEFAULT_UNROLL_M 16
256 #define DGEMM_DEFAULT_UNROLL_M 8
257 #define QGEMM_DEFAULT_UNROLL_M 2
258 #define CGEMM_DEFAULT_UNROLL_M 4
259 #define ZGEMM_DEFAULT_UNROLL_M 2
260 #define XGEMM_DEFAULT_UNROLL_M 1
261 #define CGEMM3M_DEFAULT_UNROLL_N 4
262 #define CGEMM3M_DEFAULT_UNROLL_M 8
263 #define ZGEMM3M_DEFAULT_UNROLL_N 4
264 #define ZGEMM3M_DEFAULT_UNROLL_M 4
266 #define DGEMM_DEFAULT_UNROLL_MN 16
267 #define GEMV_UNROLL 8
271 #if defined(ARCH_X86_64)
272 #define SGEMM_DEFAULT_P 768
273 #define DGEMM_DEFAULT_P 384
275 #define SGEMM_DEFAULT_P 448
276 #define DGEMM_DEFAULT_P 224
279 #define QGEMM_DEFAULT_P 112
280 #define CGEMM_DEFAULT_P 224
281 #define ZGEMM_DEFAULT_P 112
282 #define XGEMM_DEFAULT_P 56
284 #if defined(ARCH_X86_64)
285 #define SGEMM_DEFAULT_Q 168
286 #define DGEMM_DEFAULT_Q 168
288 #define SGEMM_DEFAULT_Q 224
289 #define DGEMM_DEFAULT_Q 224
292 #define QGEMM_DEFAULT_Q 224
293 #define CGEMM_DEFAULT_Q 224
294 #define ZGEMM_DEFAULT_Q 224
295 #define XGEMM_DEFAULT_Q 224
297 #define CGEMM3M_DEFAULT_P 448
298 #define ZGEMM3M_DEFAULT_P 224
299 #define XGEMM3M_DEFAULT_P 112
300 #define CGEMM3M_DEFAULT_Q 224
301 #define ZGEMM3M_DEFAULT_Q 224
302 #define XGEMM3M_DEFAULT_Q 224
303 #define CGEMM3M_DEFAULT_R 12288
304 #define ZGEMM3M_DEFAULT_R 12288
305 #define XGEMM3M_DEFAULT_R 12288
307 #define SGEMM_DEFAULT_R sgemm_r
308 #define QGEMM_DEFAULT_R qgemm_r
309 #define DGEMM_DEFAULT_R dgemm_r
310 #define CGEMM_DEFAULT_R cgemm_r
311 #define ZGEMM_DEFAULT_R zgemm_r
312 #define XGEMM_DEFAULT_R xgemm_r
315 #define HAVE_EXCLUSIVE_CACHE
317 #define GEMM_THREAD gemm_thread_mn
325 #define GEMM_DEFAULT_OFFSET_A 64
326 #define GEMM_DEFAULT_OFFSET_B 832
327 #define GEMM_DEFAULT_ALIGN 0x0fffUL
331 #define QGEMM_DEFAULT_UNROLL_N 2
332 #define CGEMM_DEFAULT_UNROLL_N 2
333 #define ZGEMM_DEFAULT_UNROLL_N 2
334 #define XGEMM_DEFAULT_UNROLL_N 1
337 #define SGEMM_DEFAULT_UNROLL_N 4
338 #define DGEMM_DEFAULT_UNROLL_N 4
339 #define SGEMM_DEFAULT_UNROLL_M 4
340 #define DGEMM_DEFAULT_UNROLL_M 2
341 #define QGEMM_DEFAULT_UNROLL_M 2
342 #define CGEMM_DEFAULT_UNROLL_M 2
343 #define ZGEMM_DEFAULT_UNROLL_M 1
344 #define XGEMM_DEFAULT_UNROLL_M 1
346 #define SGEMM_DEFAULT_UNROLL_N 2
347 #define DGEMM_DEFAULT_UNROLL_N 2
348 #define SGEMM_DEFAULT_UNROLL_M 16
349 #define DGEMM_DEFAULT_UNROLL_M 8
350 #define QGEMM_DEFAULT_UNROLL_M 2
351 #define CGEMM_DEFAULT_UNROLL_M 4
352 #define ZGEMM_DEFAULT_UNROLL_M 2
353 #define XGEMM_DEFAULT_UNROLL_M 1
354 #define CGEMM3M_DEFAULT_UNROLL_N 4
355 #define CGEMM3M_DEFAULT_UNROLL_M 8
356 #define ZGEMM3M_DEFAULT_UNROLL_N 4
357 #define ZGEMM3M_DEFAULT_UNROLL_M 4
358 #define GEMV_UNROLL 8
361 #if defined(ARCH_X86_64)
362 #define SGEMM_DEFAULT_P 768
363 #define DGEMM_DEFAULT_P 768
364 #define ZGEMM_DEFAULT_P 384
365 #define CGEMM_DEFAULT_P 768
367 #define SGEMM_DEFAULT_P 448
368 #define DGEMM_DEFAULT_P 480
369 #define ZGEMM_DEFAULT_P 112
370 #define CGEMM_DEFAULT_P 224
372 #define QGEMM_DEFAULT_P 112
373 #define XGEMM_DEFAULT_P 56
375 #if defined(ARCH_X86_64)
376 #define SGEMM_DEFAULT_Q 192
377 #define DGEMM_DEFAULT_Q 168
378 #define ZGEMM_DEFAULT_Q 168
379 #define CGEMM_DEFAULT_Q 168
381 #define SGEMM_DEFAULT_Q 224
382 #define DGEMM_DEFAULT_Q 224
383 #define ZGEMM_DEFAULT_Q 224
384 #define CGEMM_DEFAULT_Q 224
386 #define QGEMM_DEFAULT_Q 224
387 #define XGEMM_DEFAULT_Q 224
389 #define CGEMM3M_DEFAULT_P 448
390 #define ZGEMM3M_DEFAULT_P 224
391 #define XGEMM3M_DEFAULT_P 112
392 #define CGEMM3M_DEFAULT_Q 224
393 #define ZGEMM3M_DEFAULT_Q 224
394 #define XGEMM3M_DEFAULT_Q 224
395 #define CGEMM3M_DEFAULT_R 12288
396 #define ZGEMM3M_DEFAULT_R 12288
397 #define XGEMM3M_DEFAULT_R 12288
399 #define SGEMM_DEFAULT_R 12288
400 #define QGEMM_DEFAULT_R qgemm_r
401 #define DGEMM_DEFAULT_R 12288
402 #define CGEMM_DEFAULT_R cgemm_r
403 #define ZGEMM_DEFAULT_R zgemm_r
404 #define XGEMM_DEFAULT_R xgemm_r
407 #define HAVE_EXCLUSIVE_CACHE
409 #define GEMM_THREAD gemm_thread_mn
417 #define GEMM_DEFAULT_OFFSET_A 64
418 #define GEMM_DEFAULT_OFFSET_B 832
419 #define GEMM_DEFAULT_ALIGN 0x0fffUL
423 #define QGEMM_DEFAULT_UNROLL_N 2
424 #define CGEMM_DEFAULT_UNROLL_N 2
425 #define ZGEMM_DEFAULT_UNROLL_N 2
426 #define XGEMM_DEFAULT_UNROLL_N 1
429 #define SGEMM_DEFAULT_UNROLL_N 4
430 #define DGEMM_DEFAULT_UNROLL_N 4
431 #define SGEMM_DEFAULT_UNROLL_M 4
432 #define DGEMM_DEFAULT_UNROLL_M 2
433 #define QGEMM_DEFAULT_UNROLL_M 2
434 #define CGEMM_DEFAULT_UNROLL_M 2
435 #define ZGEMM_DEFAULT_UNROLL_M 1
436 #define XGEMM_DEFAULT_UNROLL_M 1
438 #define SGEMM_DEFAULT_UNROLL_N 2
439 #define DGEMM_DEFAULT_UNROLL_N 2
440 #define SGEMM_DEFAULT_UNROLL_M 16
441 #define DGEMM_DEFAULT_UNROLL_M 8
442 #define QGEMM_DEFAULT_UNROLL_M 2
443 #define CGEMM_DEFAULT_UNROLL_M 4
444 #define ZGEMM_DEFAULT_UNROLL_M 2
445 #define XGEMM_DEFAULT_UNROLL_M 1
446 #define CGEMM3M_DEFAULT_UNROLL_N 4
447 #define CGEMM3M_DEFAULT_UNROLL_M 8
448 #define ZGEMM3M_DEFAULT_UNROLL_N 4
449 #define ZGEMM3M_DEFAULT_UNROLL_M 4
450 #define GEMV_UNROLL 8
453 #if defined(ARCH_X86_64)
454 #define SGEMM_DEFAULT_P 768
455 #define DGEMM_DEFAULT_P 576
456 #define ZGEMM_DEFAULT_P 288
457 #define CGEMM_DEFAULT_P 576
459 #define SGEMM_DEFAULT_P 448
460 #define DGEMM_DEFAULT_P 480
461 #define ZGEMM_DEFAULT_P 112
462 #define CGEMM_DEFAULT_P 224
464 #define QGEMM_DEFAULT_P 112
465 #define XGEMM_DEFAULT_P 56
467 #if defined(ARCH_X86_64)
468 #define SGEMM_DEFAULT_Q 192
469 #define DGEMM_DEFAULT_Q 160
470 #define ZGEMM_DEFAULT_Q 160
471 #define CGEMM_DEFAULT_Q 160
473 #define SGEMM_DEFAULT_Q 224
474 #define DGEMM_DEFAULT_Q 224
475 #define ZGEMM_DEFAULT_Q 224
476 #define CGEMM_DEFAULT_Q 224
478 #define QGEMM_DEFAULT_Q 224
479 #define XGEMM_DEFAULT_Q 224
481 #define CGEMM3M_DEFAULT_P 448
482 #define ZGEMM3M_DEFAULT_P 224
483 #define XGEMM3M_DEFAULT_P 112
484 #define CGEMM3M_DEFAULT_Q 224
485 #define ZGEMM3M_DEFAULT_Q 224
486 #define XGEMM3M_DEFAULT_Q 224
487 #define CGEMM3M_DEFAULT_R 12288
488 #define ZGEMM3M_DEFAULT_R 12288
489 #define XGEMM3M_DEFAULT_R 12288
491 #define SGEMM_DEFAULT_R 12288
492 #define QGEMM_DEFAULT_R qgemm_r
493 #define DGEMM_DEFAULT_R 12288
494 #define CGEMM_DEFAULT_R cgemm_r
495 #define ZGEMM_DEFAULT_R zgemm_r
496 #define XGEMM_DEFAULT_R xgemm_r
499 #define HAVE_EXCLUSIVE_CACHE
501 #define GEMM_THREAD gemm_thread_mn
510 #define GEMM_DEFAULT_OFFSET_A 64
511 #define GEMM_DEFAULT_OFFSET_B 832
512 #define GEMM_DEFAULT_ALIGN 0x0fffUL
516 #define QGEMM_DEFAULT_UNROLL_N 2
517 #define CGEMM_DEFAULT_UNROLL_N 2
518 #define ZGEMM_DEFAULT_UNROLL_N 2
519 #define XGEMM_DEFAULT_UNROLL_N 1
522 #define SGEMM_DEFAULT_UNROLL_N 4
523 #define DGEMM_DEFAULT_UNROLL_N 4
524 #define SGEMM_DEFAULT_UNROLL_M 4
525 #define DGEMM_DEFAULT_UNROLL_M 2
526 #define QGEMM_DEFAULT_UNROLL_M 2
527 #define CGEMM_DEFAULT_UNROLL_M 2
528 #define ZGEMM_DEFAULT_UNROLL_M 1
529 #define XGEMM_DEFAULT_UNROLL_M 1
531 #define SGEMM_DEFAULT_UNROLL_N 2
532 #define DGEMM_DEFAULT_UNROLL_N 2
533 #define SGEMM_DEFAULT_UNROLL_M 16
534 #define DGEMM_DEFAULT_UNROLL_M 8
535 #define QGEMM_DEFAULT_UNROLL_M 2
536 #define CGEMM_DEFAULT_UNROLL_M 4
537 #define ZGEMM_DEFAULT_UNROLL_M 2
538 #define XGEMM_DEFAULT_UNROLL_M 1
539 #define CGEMM3M_DEFAULT_UNROLL_N 4
540 #define CGEMM3M_DEFAULT_UNROLL_M 8
541 #define ZGEMM3M_DEFAULT_UNROLL_N 4
542 #define ZGEMM3M_DEFAULT_UNROLL_M 4
543 #define GEMV_UNROLL 8
546 #if defined(ARCH_X86_64)
547 #define SGEMM_DEFAULT_P 768
548 #define DGEMM_DEFAULT_P 576
549 #define ZGEMM_DEFAULT_P 288
550 #define CGEMM_DEFAULT_P 576
552 #define SGEMM_DEFAULT_P 448
553 #define DGEMM_DEFAULT_P 480
554 #define ZGEMM_DEFAULT_P 112
555 #define CGEMM_DEFAULT_P 224
557 #define QGEMM_DEFAULT_P 112
558 #define XGEMM_DEFAULT_P 56
560 #if defined(ARCH_X86_64)
561 #define SGEMM_DEFAULT_Q 192
562 #define DGEMM_DEFAULT_Q 160
563 #define ZGEMM_DEFAULT_Q 160
564 #define CGEMM_DEFAULT_Q 160
566 #define SGEMM_DEFAULT_Q 224
567 #define DGEMM_DEFAULT_Q 224
568 #define ZGEMM_DEFAULT_Q 224
569 #define CGEMM_DEFAULT_Q 224
571 #define QGEMM_DEFAULT_Q 224
572 #define XGEMM_DEFAULT_Q 224
574 #define CGEMM3M_DEFAULT_P 448
575 #define ZGEMM3M_DEFAULT_P 224
576 #define XGEMM3M_DEFAULT_P 112
577 #define CGEMM3M_DEFAULT_Q 224
578 #define ZGEMM3M_DEFAULT_Q 224
579 #define XGEMM3M_DEFAULT_Q 224
580 #define CGEMM3M_DEFAULT_R 12288
581 #define ZGEMM3M_DEFAULT_R 12288
582 #define XGEMM3M_DEFAULT_R 12288
584 #define SGEMM_DEFAULT_R 12288
585 #define QGEMM_DEFAULT_R qgemm_r
586 #define DGEMM_DEFAULT_R 12288
587 #define CGEMM_DEFAULT_R cgemm_r
588 #define ZGEMM_DEFAULT_R zgemm_r
589 #define XGEMM_DEFAULT_R xgemm_r
592 #define HAVE_EXCLUSIVE_CACHE
594 #define GEMM_THREAD gemm_thread_mn
603 #define GEMM_DEFAULT_OFFSET_A 0
604 #define GEMM_DEFAULT_OFFSET_B 384
605 #define GEMM_DEFAULT_ALIGN 0x0ffffUL
607 #define SGEMM_DEFAULT_UNROLL_N 4
608 #define DGEMM_DEFAULT_UNROLL_N 4
609 #define QGEMM_DEFAULT_UNROLL_N 2
610 #define CGEMM_DEFAULT_UNROLL_N 2
611 #define ZGEMM_DEFAULT_UNROLL_N 2
612 #define XGEMM_DEFAULT_UNROLL_N 1
614 #define SGEMM_DEFAULT_UNROLL_M 2
615 #define DGEMM_DEFAULT_UNROLL_M 1
616 #define QGEMM_DEFAULT_UNROLL_M 2
617 #define CGEMM_DEFAULT_UNROLL_M 1
618 #define ZGEMM_DEFAULT_UNROLL_M 1
619 #define XGEMM_DEFAULT_UNROLL_M 1
621 #define SGEMM_DEFAULT_R sgemm_r
622 #define DGEMM_DEFAULT_R dgemm_r
623 #define QGEMM_DEFAULT_R qgemm_r
624 #define CGEMM_DEFAULT_R cgemm_r
625 #define ZGEMM_DEFAULT_R zgemm_r
626 #define XGEMM_DEFAULT_R xgemm_r
628 #define SGEMM_DEFAULT_P 208
629 #define DGEMM_DEFAULT_P 104
630 #define QGEMM_DEFAULT_P 56
631 #define CGEMM_DEFAULT_P 104
632 #define ZGEMM_DEFAULT_P 56
633 #define XGEMM_DEFAULT_P 28
635 #define SGEMM_DEFAULT_Q 208
636 #define DGEMM_DEFAULT_Q 208
637 #define QGEMM_DEFAULT_Q 208
638 #define CGEMM_DEFAULT_Q 208
639 #define ZGEMM_DEFAULT_Q 208
640 #define XGEMM_DEFAULT_Q 208
643 #define HAVE_EXCLUSIVE_CACHE
651 #define GEMM_DEFAULT_OFFSET_A 0
652 #define GEMM_DEFAULT_OFFSET_B 256
653 #define GEMM_DEFAULT_ALIGN 0x0ffffUL
655 #define SGEMM_DEFAULT_UNROLL_N 4
656 #define DGEMM_DEFAULT_UNROLL_N 4
657 #define QGEMM_DEFAULT_UNROLL_N 2
658 #define CGEMM_DEFAULT_UNROLL_N 2
659 #define ZGEMM_DEFAULT_UNROLL_N 2
660 #define XGEMM_DEFAULT_UNROLL_N 1
662 #define SGEMM_DEFAULT_UNROLL_M 2
663 #define DGEMM_DEFAULT_UNROLL_M 1
664 #define QGEMM_DEFAULT_UNROLL_M 2
665 #define CGEMM_DEFAULT_UNROLL_M 1
666 #define ZGEMM_DEFAULT_UNROLL_M 1
667 #define XGEMM_DEFAULT_UNROLL_M 1
669 #define SGEMM_DEFAULT_R sgemm_r
670 #define DGEMM_DEFAULT_R dgemm_r
671 #define QGEMM_DEFAULT_R qgemm_r
672 #define CGEMM_DEFAULT_R cgemm_r
673 #define ZGEMM_DEFAULT_R zgemm_r
674 #define XGEMM_DEFAULT_R xgemm_r
676 #define SGEMM_DEFAULT_P 128
677 #define DGEMM_DEFAULT_P 128
678 #define QGEMM_DEFAULT_P 128
679 #define CGEMM_DEFAULT_P 128
680 #define ZGEMM_DEFAULT_P 128
681 #define XGEMM_DEFAULT_P 128
683 #define SGEMM_DEFAULT_Q 512
684 #define DGEMM_DEFAULT_Q 256
685 #define QGEMM_DEFAULT_Q 256
686 #define CGEMM_DEFAULT_Q 256
687 #define ZGEMM_DEFAULT_Q 128
688 #define XGEMM_DEFAULT_Q 128
698 #define GEMM_DEFAULT_OFFSET_A 64
699 #define GEMM_DEFAULT_OFFSET_B 256
700 #define GEMM_DEFAULT_ALIGN 0x01ffffUL
703 #define SGEMM_DEFAULT_UNROLL_N 4
704 #define DGEMM_DEFAULT_UNROLL_N 4
705 #define QGEMM_DEFAULT_UNROLL_N 2
706 #define CGEMM_DEFAULT_UNROLL_N 2
707 #define ZGEMM_DEFAULT_UNROLL_N 2
708 #define XGEMM_DEFAULT_UNROLL_N 1
710 #define SGEMM_DEFAULT_UNROLL_M 4
711 #define DGEMM_DEFAULT_UNROLL_M 2
712 #define QGEMM_DEFAULT_UNROLL_M 2
713 #define CGEMM_DEFAULT_UNROLL_M 2
714 #define ZGEMM_DEFAULT_UNROLL_M 1
715 #define XGEMM_DEFAULT_UNROLL_M 1
717 #define SGEMM_DEFAULT_UNROLL_N 8
718 #define DGEMM_DEFAULT_UNROLL_N 4
719 #define QGEMM_DEFAULT_UNROLL_N 2
720 #define CGEMM_DEFAULT_UNROLL_N 4
721 #define ZGEMM_DEFAULT_UNROLL_N 2
722 #define XGEMM_DEFAULT_UNROLL_N 1
724 #define SGEMM_DEFAULT_UNROLL_M 4
725 #define DGEMM_DEFAULT_UNROLL_M 4
726 #define QGEMM_DEFAULT_UNROLL_M 2
727 #define CGEMM_DEFAULT_UNROLL_M 2
728 #define ZGEMM_DEFAULT_UNROLL_M 2
729 #define XGEMM_DEFAULT_UNROLL_M 1
732 #define SGEMM_DEFAULT_P 288
733 #define DGEMM_DEFAULT_P 288
734 #define QGEMM_DEFAULT_P 288
735 #define CGEMM_DEFAULT_P 288
736 #define ZGEMM_DEFAULT_P 288
737 #define XGEMM_DEFAULT_P 288
739 #define SGEMM_DEFAULT_R sgemm_r
740 #define DGEMM_DEFAULT_R dgemm_r
741 #define QGEMM_DEFAULT_R qgemm_r
742 #define CGEMM_DEFAULT_R cgemm_r
743 #define ZGEMM_DEFAULT_R zgemm_r
744 #define XGEMM_DEFAULT_R xgemm_r
746 #define SGEMM_DEFAULT_Q 256
747 #define DGEMM_DEFAULT_Q 128
748 #define QGEMM_DEFAULT_Q 64
749 #define CGEMM_DEFAULT_Q 128
750 #define ZGEMM_DEFAULT_Q 64
751 #define XGEMM_DEFAULT_Q 32
754 #define HAVE_EXCLUSIVE_CACHE
758 #if defined(PENTIUM) || defined(PENTIUM2) || defined(PENTIUM3)
767 #define GEMM_DEFAULT_OFFSET_A 0
768 #define GEMM_DEFAULT_OFFSET_B 0
769 #define GEMM_DEFAULT_ALIGN 0x0ffffUL
772 #define SGEMM_DEFAULT_UNROLL_M 8
773 #define CGEMM_DEFAULT_UNROLL_M 4
775 #define SGEMM_DEFAULT_UNROLL_M 4
776 #define CGEMM_DEFAULT_UNROLL_M 2
778 #define DGEMM_DEFAULT_UNROLL_M 2
779 #define SGEMM_DEFAULT_UNROLL_N 2
780 #define DGEMM_DEFAULT_UNROLL_N 2
781 #define QGEMM_DEFAULT_UNROLL_M 2
782 #define QGEMM_DEFAULT_UNROLL_N 2
783 #define CGEMM_DEFAULT_UNROLL_N 1
784 #define ZGEMM_DEFAULT_UNROLL_M 1
785 #define ZGEMM_DEFAULT_UNROLL_N 1
786 #define XGEMM_DEFAULT_UNROLL_M 1
787 #define XGEMM_DEFAULT_UNROLL_N 1
789 #define SGEMM_DEFAULT_P sgemm_p
790 #define SGEMM_DEFAULT_Q 256
791 #define SGEMM_DEFAULT_R sgemm_r
793 #define DGEMM_DEFAULT_P dgemm_p
794 #define DGEMM_DEFAULT_Q 256
795 #define DGEMM_DEFAULT_R dgemm_r
797 #define QGEMM_DEFAULT_P qgemm_p
798 #define QGEMM_DEFAULT_Q 256
799 #define QGEMM_DEFAULT_R qgemm_r
801 #define CGEMM_DEFAULT_P cgemm_p
802 #define CGEMM_DEFAULT_Q 256
803 #define CGEMM_DEFAULT_R cgemm_r
805 #define ZGEMM_DEFAULT_P zgemm_p
806 #define ZGEMM_DEFAULT_Q 256
807 #define ZGEMM_DEFAULT_R zgemm_r
809 #define XGEMM_DEFAULT_P xgemm_p
810 #define XGEMM_DEFAULT_Q 256
811 #define XGEMM_DEFAULT_R xgemm_r
822 #define GEMM_DEFAULT_OFFSET_A 0
823 #define GEMM_DEFAULT_OFFSET_B 0
824 #define GEMM_DEFAULT_ALIGN 0x0ffffUL
827 #define SGEMM_DEFAULT_UNROLL_M 4
828 #define SGEMM_DEFAULT_UNROLL_N 4
829 #define DGEMM_DEFAULT_UNROLL_M 2
830 #define DGEMM_DEFAULT_UNROLL_N 4
831 #define QGEMM_DEFAULT_UNROLL_M 2
832 #define QGEMM_DEFAULT_UNROLL_N 2
833 #define CGEMM_DEFAULT_UNROLL_M 2
834 #define CGEMM_DEFAULT_UNROLL_N 2
835 #define ZGEMM_DEFAULT_UNROLL_M 1
836 #define ZGEMM_DEFAULT_UNROLL_N 2
837 #define XGEMM_DEFAULT_UNROLL_M 1
838 #define XGEMM_DEFAULT_UNROLL_N 1
840 #define SGEMM_DEFAULT_UNROLL_M 8
841 #define SGEMM_DEFAULT_UNROLL_N 2
842 #define DGEMM_DEFAULT_UNROLL_M 2
843 #define DGEMM_DEFAULT_UNROLL_N 2
844 #define QGEMM_DEFAULT_UNROLL_M 2
845 #define QGEMM_DEFAULT_UNROLL_N 2
846 #define CGEMM_DEFAULT_UNROLL_M 4
847 #define CGEMM_DEFAULT_UNROLL_N 1
848 #define ZGEMM_DEFAULT_UNROLL_M 1
849 #define ZGEMM_DEFAULT_UNROLL_N 1
850 #define XGEMM_DEFAULT_UNROLL_M 1
851 #define XGEMM_DEFAULT_UNROLL_N 1
855 #define SGEMM_DEFAULT_P sgemm_p
856 #define SGEMM_DEFAULT_Q 256
857 #define SGEMM_DEFAULT_R sgemm_r
859 #define DGEMM_DEFAULT_P dgemm_p
860 #define DGEMM_DEFAULT_Q 256
861 #define DGEMM_DEFAULT_R dgemm_r
863 #define QGEMM_DEFAULT_P qgemm_p
864 #define QGEMM_DEFAULT_Q 256
865 #define QGEMM_DEFAULT_R qgemm_r
867 #define CGEMM_DEFAULT_P cgemm_p
868 #define CGEMM_DEFAULT_Q 256
869 #define CGEMM_DEFAULT_R cgemm_r
871 #define ZGEMM_DEFAULT_P zgemm_p
872 #define ZGEMM_DEFAULT_Q 256
873 #define ZGEMM_DEFAULT_R zgemm_r
875 #define XGEMM_DEFAULT_P xgemm_p
876 #define XGEMM_DEFAULT_Q 256
877 #define XGEMM_DEFAULT_R xgemm_r
882 #ifdef CORE_NORTHWOOD
887 #define GEMM_DEFAULT_OFFSET_A 0
888 #define GEMM_DEFAULT_OFFSET_B 32
890 #define GEMM_DEFAULT_ALIGN 0x0ffffUL
894 #define SGEMM_DEFAULT_UNROLL_M 8
895 #define DGEMM_DEFAULT_UNROLL_M 4
896 #define QGEMM_DEFAULT_UNROLL_M 2
897 #define CGEMM_DEFAULT_UNROLL_M 4
898 #define ZGEMM_DEFAULT_UNROLL_M 2
899 #define XGEMM_DEFAULT_UNROLL_M 1
901 #define SGEMM_DEFAULT_UNROLL_N 2
902 #define DGEMM_DEFAULT_UNROLL_N 2
903 #define QGEMM_DEFAULT_UNROLL_N 2
904 #define CGEMM_DEFAULT_UNROLL_N 1
905 #define ZGEMM_DEFAULT_UNROLL_N 1
906 #define XGEMM_DEFAULT_UNROLL_N 1
908 #define SGEMM_DEFAULT_P sgemm_p
909 #define SGEMM_DEFAULT_R sgemm_r
911 #define DGEMM_DEFAULT_P dgemm_p
912 #define DGEMM_DEFAULT_R dgemm_r
914 #define QGEMM_DEFAULT_P qgemm_p
915 #define QGEMM_DEFAULT_R qgemm_r
917 #define CGEMM_DEFAULT_P cgemm_p
918 #define CGEMM_DEFAULT_R cgemm_r
920 #define ZGEMM_DEFAULT_P zgemm_p
921 #define ZGEMM_DEFAULT_R zgemm_r
923 #define XGEMM_DEFAULT_P xgemm_p
924 #define XGEMM_DEFAULT_R xgemm_r
926 #define SGEMM_DEFAULT_Q 128
927 #define DGEMM_DEFAULT_Q 128
928 #define QGEMM_DEFAULT_Q 128
929 #define CGEMM_DEFAULT_Q 128
930 #define ZGEMM_DEFAULT_Q 128
931 #define XGEMM_DEFAULT_Q 128
940 #define GEMM_DEFAULT_OFFSET_A 128
941 #define GEMM_DEFAULT_OFFSET_B 192
943 #define GEMM_DEFAULT_OFFSET_A 0
944 #define GEMM_DEFAULT_OFFSET_B 256
947 #define GEMM_DEFAULT_ALIGN 0x0ffffUL
952 #define SGEMM_DEFAULT_UNROLL_M 4
953 #define DGEMM_DEFAULT_UNROLL_M 2
954 #define QGEMM_DEFAULT_UNROLL_M 2
955 #define CGEMM_DEFAULT_UNROLL_M 2
956 #define ZGEMM_DEFAULT_UNROLL_M 1
957 #define XGEMM_DEFAULT_UNROLL_M 1
959 #define SGEMM_DEFAULT_UNROLL_M 8
960 #define DGEMM_DEFAULT_UNROLL_M 4
961 #define QGEMM_DEFAULT_UNROLL_M 2
962 #define CGEMM_DEFAULT_UNROLL_M 4
963 #define ZGEMM_DEFAULT_UNROLL_M 2
964 #define XGEMM_DEFAULT_UNROLL_M 1
967 #define SGEMM_DEFAULT_UNROLL_N 4
968 #define DGEMM_DEFAULT_UNROLL_N 4
969 #define QGEMM_DEFAULT_UNROLL_N 2
970 #define CGEMM_DEFAULT_UNROLL_N 2
971 #define ZGEMM_DEFAULT_UNROLL_N 2
972 #define XGEMM_DEFAULT_UNROLL_N 1
974 #define SGEMM_DEFAULT_P sgemm_p
975 #define SGEMM_DEFAULT_R sgemm_r
977 #define DGEMM_DEFAULT_P dgemm_p
978 #define DGEMM_DEFAULT_R dgemm_r
980 #define QGEMM_DEFAULT_P qgemm_p
981 #define QGEMM_DEFAULT_R qgemm_r
983 #define CGEMM_DEFAULT_P cgemm_p
984 #define CGEMM_DEFAULT_R cgemm_r
986 #define ZGEMM_DEFAULT_P zgemm_p
987 #define ZGEMM_DEFAULT_R zgemm_r
989 #define XGEMM_DEFAULT_P xgemm_p
990 #define XGEMM_DEFAULT_R xgemm_r
992 #define SGEMM_DEFAULT_Q 128
993 #define DGEMM_DEFAULT_Q 128
994 #define QGEMM_DEFAULT_Q 128
995 #define CGEMM_DEFAULT_Q 128
996 #define ZGEMM_DEFAULT_Q 128
997 #define XGEMM_DEFAULT_Q 128
1005 #define GEMM_DEFAULT_OFFSET_A 448
1006 #define GEMM_DEFAULT_OFFSET_B 128
1007 #define GEMM_DEFAULT_ALIGN 0x03fffUL
1011 #define SWITCH_RATIO 4
1014 #define SGEMM_DEFAULT_UNROLL_M 8
1015 #define DGEMM_DEFAULT_UNROLL_M 4
1016 #define QGEMM_DEFAULT_UNROLL_M 2
1017 #define CGEMM_DEFAULT_UNROLL_M 4
1018 #define ZGEMM_DEFAULT_UNROLL_M 2
1019 #define XGEMM_DEFAULT_UNROLL_M 1
1021 #define SGEMM_DEFAULT_UNROLL_N 2
1022 #define DGEMM_DEFAULT_UNROLL_N 2
1023 #define QGEMM_DEFAULT_UNROLL_N 2
1024 #define CGEMM_DEFAULT_UNROLL_N 1
1025 #define ZGEMM_DEFAULT_UNROLL_N 1
1026 #define XGEMM_DEFAULT_UNROLL_N 1
1028 #define MASK(a, b) ((((a) + (b) - 1) / (b)) * (b))
1031 #define SGEMM_DEFAULT_UNROLL_M 8
1032 #define DGEMM_DEFAULT_UNROLL_M 4
1033 #define QGEMM_DEFAULT_UNROLL_M 2
1034 #define CGEMM_DEFAULT_UNROLL_M 4
1035 #define ZGEMM_DEFAULT_UNROLL_M 2
1036 #define XGEMM_DEFAULT_UNROLL_M 1
1038 #define SGEMM_DEFAULT_UNROLL_N 4
1039 #define DGEMM_DEFAULT_UNROLL_N 4
1040 #define QGEMM_DEFAULT_UNROLL_N 2
1041 #define CGEMM_DEFAULT_UNROLL_N 2
1042 #define ZGEMM_DEFAULT_UNROLL_N 2
1043 #define XGEMM_DEFAULT_UNROLL_N 1
1046 #define SGEMM_DEFAULT_P sgemm_p
1047 #define SGEMM_DEFAULT_R sgemm_r
1049 #define DGEMM_DEFAULT_P dgemm_p
1050 #define DGEMM_DEFAULT_R dgemm_r
1052 #define QGEMM_DEFAULT_P qgemm_p
1053 #define QGEMM_DEFAULT_R qgemm_r
1055 #define CGEMM_DEFAULT_P cgemm_p
1056 #define CGEMM_DEFAULT_R cgemm_r
1058 #define ZGEMM_DEFAULT_P zgemm_p
1059 #define ZGEMM_DEFAULT_R zgemm_r
1061 #define XGEMM_DEFAULT_P xgemm_p
1062 #define XGEMM_DEFAULT_R xgemm_r
1064 #define SGEMM_DEFAULT_Q 256
1065 #define DGEMM_DEFAULT_Q 256
1066 #define QGEMM_DEFAULT_Q 256
1067 #define CGEMM_DEFAULT_Q 256
1068 #define ZGEMM_DEFAULT_Q 256
1069 #define XGEMM_DEFAULT_Q 256
1078 #define GEMM_DEFAULT_OFFSET_A 128
1079 #define GEMM_DEFAULT_OFFSET_B 0
1080 #define GEMM_DEFAULT_ALIGN 0x03fffUL
1084 #define SWITCH_RATIO 4
1087 #define SGEMM_DEFAULT_UNROLL_M 4
1088 #define DGEMM_DEFAULT_UNROLL_M 2
1089 #define QGEMM_DEFAULT_UNROLL_M 2
1090 #define CGEMM_DEFAULT_UNROLL_M 2
1091 #define ZGEMM_DEFAULT_UNROLL_M 1
1092 #define XGEMM_DEFAULT_UNROLL_M 1
1094 #define SGEMM_DEFAULT_UNROLL_N 4
1095 #define DGEMM_DEFAULT_UNROLL_N 4
1096 #define QGEMM_DEFAULT_UNROLL_N 2
1097 #define CGEMM_DEFAULT_UNROLL_N 2
1098 #define ZGEMM_DEFAULT_UNROLL_N 2
1099 #define XGEMM_DEFAULT_UNROLL_N 1
1101 #define SGEMM_DEFAULT_UNROLL_M 8
1102 #define DGEMM_DEFAULT_UNROLL_M 4
1103 #define QGEMM_DEFAULT_UNROLL_M 2
1104 #define CGEMM_DEFAULT_UNROLL_M 4
1105 #define ZGEMM_DEFAULT_UNROLL_M 2
1106 #define XGEMM_DEFAULT_UNROLL_M 1
1108 #define SGEMM_DEFAULT_UNROLL_N 4
1109 #define DGEMM_DEFAULT_UNROLL_N 4
1110 #define QGEMM_DEFAULT_UNROLL_N 2
1111 #define CGEMM_DEFAULT_UNROLL_N 2
1112 #define ZGEMM_DEFAULT_UNROLL_N 2
1113 #define XGEMM_DEFAULT_UNROLL_N 1
1116 #define SGEMM_DEFAULT_P sgemm_p
1117 #define SGEMM_DEFAULT_R sgemm_r
1119 #define DGEMM_DEFAULT_P dgemm_p
1120 #define DGEMM_DEFAULT_R dgemm_r
1122 #define QGEMM_DEFAULT_P qgemm_p
1123 #define QGEMM_DEFAULT_R qgemm_r
1125 #define CGEMM_DEFAULT_P cgemm_p
1126 #define CGEMM_DEFAULT_R cgemm_r
1128 #define ZGEMM_DEFAULT_P zgemm_p
1129 #define ZGEMM_DEFAULT_R zgemm_r
1131 #define XGEMM_DEFAULT_P xgemm_p
1132 #define XGEMM_DEFAULT_R xgemm_r
1134 #define SGEMM_DEFAULT_Q 512
1135 #define DGEMM_DEFAULT_Q 256
1136 #define QGEMM_DEFAULT_Q 128
1137 #define CGEMM_DEFAULT_Q 512
1138 #define ZGEMM_DEFAULT_Q 256
1139 #define XGEMM_DEFAULT_Q 128
1141 #define GETRF_FACTOR 0.75
1149 #define GEMM_DEFAULT_OFFSET_A 128
1150 #define GEMM_DEFAULT_OFFSET_B 0
1151 #define GEMM_DEFAULT_ALIGN 0x03fffUL
1155 #define SWITCH_RATIO 4
1158 #define SGEMM_DEFAULT_UNROLL_M 4
1159 #define DGEMM_DEFAULT_UNROLL_M 2
1160 #define QGEMM_DEFAULT_UNROLL_M 2
1161 #define CGEMM_DEFAULT_UNROLL_M 2
1162 #define ZGEMM_DEFAULT_UNROLL_M 1
1163 #define XGEMM_DEFAULT_UNROLL_M 1
1165 #define SGEMM_DEFAULT_UNROLL_N 4
1166 #define DGEMM_DEFAULT_UNROLL_N 4
1167 #define QGEMM_DEFAULT_UNROLL_N 2
1168 #define CGEMM_DEFAULT_UNROLL_N 2
1169 #define ZGEMM_DEFAULT_UNROLL_N 2
1170 #define XGEMM_DEFAULT_UNROLL_N 1
1172 #define SGEMM_DEFAULT_UNROLL_M 8
1173 #define DGEMM_DEFAULT_UNROLL_M 4
1174 #define QGEMM_DEFAULT_UNROLL_M 2
1175 #define CGEMM_DEFAULT_UNROLL_M 4
1176 #define ZGEMM_DEFAULT_UNROLL_M 2
1177 #define XGEMM_DEFAULT_UNROLL_M 1
1179 #define SGEMM_DEFAULT_UNROLL_N 4
1180 #define DGEMM_DEFAULT_UNROLL_N 4
1181 #define QGEMM_DEFAULT_UNROLL_N 2
1182 #define CGEMM_DEFAULT_UNROLL_N 2
1183 #define ZGEMM_DEFAULT_UNROLL_N 2
1184 #define XGEMM_DEFAULT_UNROLL_N 1
1187 #define SGEMM_DEFAULT_P sgemm_p
1188 #define SGEMM_DEFAULT_R sgemm_r
1190 #define DGEMM_DEFAULT_P dgemm_p
1191 #define DGEMM_DEFAULT_R dgemm_r
1193 #define QGEMM_DEFAULT_P qgemm_p
1194 #define QGEMM_DEFAULT_R qgemm_r
1196 #define CGEMM_DEFAULT_P cgemm_p
1197 #define CGEMM_DEFAULT_R cgemm_r
1199 #define ZGEMM_DEFAULT_P zgemm_p
1200 #define ZGEMM_DEFAULT_R zgemm_r
1202 #define XGEMM_DEFAULT_P xgemm_p
1203 #define XGEMM_DEFAULT_R xgemm_r
1205 #define SGEMM_DEFAULT_Q 768
1206 #define DGEMM_DEFAULT_Q 384
1207 #define QGEMM_DEFAULT_Q 192
1208 #define CGEMM_DEFAULT_Q 768
1209 #define ZGEMM_DEFAULT_Q 384
1210 #define XGEMM_DEFAULT_Q 192
1212 #define GETRF_FACTOR 0.75
1213 #define GEMM_THREAD gemm_thread_mn
1221 #define GEMM_DEFAULT_OFFSET_A 32
1222 #define GEMM_DEFAULT_OFFSET_B 0
1223 #define GEMM_DEFAULT_ALIGN 0x03fffUL
1227 #define SWITCH_RATIO 4
1230 #define SGEMM_DEFAULT_UNROLL_M 4
1231 #define DGEMM_DEFAULT_UNROLL_M 2
1232 #define QGEMM_DEFAULT_UNROLL_M 2
1233 #define CGEMM_DEFAULT_UNROLL_M 2
1234 #define ZGEMM_DEFAULT_UNROLL_M 1
1235 #define XGEMM_DEFAULT_UNROLL_M 1
1237 #define SGEMM_DEFAULT_UNROLL_N 4
1238 #define DGEMM_DEFAULT_UNROLL_N 4
1239 #define QGEMM_DEFAULT_UNROLL_N 2
1240 #define CGEMM_DEFAULT_UNROLL_N 2
1241 #define ZGEMM_DEFAULT_UNROLL_N 2
1242 #define XGEMM_DEFAULT_UNROLL_N 1
1244 #define SGEMM_DEFAULT_UNROLL_M 4
1245 #define DGEMM_DEFAULT_UNROLL_M 2
1246 #define QGEMM_DEFAULT_UNROLL_M 2
1247 #define CGEMM_DEFAULT_UNROLL_M 2
1248 #define ZGEMM_DEFAULT_UNROLL_M 1
1249 #define XGEMM_DEFAULT_UNROLL_M 1
1251 #define SGEMM_DEFAULT_UNROLL_N 8
1252 #define DGEMM_DEFAULT_UNROLL_N 8
1253 #define QGEMM_DEFAULT_UNROLL_N 2
1254 #define CGEMM_DEFAULT_UNROLL_N 4
1255 #define ZGEMM_DEFAULT_UNROLL_N 4
1256 #define XGEMM_DEFAULT_UNROLL_N 1
1259 #define SGEMM_DEFAULT_P 504
1260 #define SGEMM_DEFAULT_R sgemm_r
1262 #define DGEMM_DEFAULT_P 504
1263 #define DGEMM_DEFAULT_R dgemm_r
1265 #define QGEMM_DEFAULT_P 504
1266 #define QGEMM_DEFAULT_R qgemm_r
1268 #define CGEMM_DEFAULT_P 252
1269 #define CGEMM_DEFAULT_R cgemm_r
1271 #define ZGEMM_DEFAULT_P 252
1272 #define ZGEMM_DEFAULT_R zgemm_r
1274 #define XGEMM_DEFAULT_P 252
1275 #define XGEMM_DEFAULT_R xgemm_r
1277 #define SGEMM_DEFAULT_Q 512
1278 #define DGEMM_DEFAULT_Q 256
1279 #define QGEMM_DEFAULT_Q 128
1280 #define CGEMM_DEFAULT_Q 512
1281 #define ZGEMM_DEFAULT_Q 256
1282 #define XGEMM_DEFAULT_Q 128
1284 #define GETRF_FACTOR 0.72
1294 #define GEMM_DEFAULT_OFFSET_A 0
1295 #define GEMM_DEFAULT_OFFSET_B 0
1296 #define GEMM_DEFAULT_ALIGN 0x03fffUL
1300 #define SWITCH_RATIO 4
1303 #define SGEMM_DEFAULT_UNROLL_M 4
1304 #define DGEMM_DEFAULT_UNROLL_M 2
1305 #define QGEMM_DEFAULT_UNROLL_M 2
1306 #define CGEMM_DEFAULT_UNROLL_M 2
1307 #define ZGEMM_DEFAULT_UNROLL_M 1
1308 #define XGEMM_DEFAULT_UNROLL_M 1
1310 #define SGEMM_DEFAULT_UNROLL_N 4
1311 #define DGEMM_DEFAULT_UNROLL_N 4
1312 #define QGEMM_DEFAULT_UNROLL_N 2
1313 #define CGEMM_DEFAULT_UNROLL_N 2
1314 #define ZGEMM_DEFAULT_UNROLL_N 2
1315 #define XGEMM_DEFAULT_UNROLL_N 1
1317 #define SGEMM_DEFAULT_UNROLL_M 16
1318 #define DGEMM_DEFAULT_UNROLL_M 8
1319 #define QGEMM_DEFAULT_UNROLL_M 2
1320 #define CGEMM_DEFAULT_UNROLL_M 8
1321 #define ZGEMM_DEFAULT_UNROLL_M 1
1322 #define XGEMM_DEFAULT_UNROLL_M 1
1324 #define SGEMM_DEFAULT_UNROLL_N 4
1325 #define DGEMM_DEFAULT_UNROLL_N 4
1326 #define QGEMM_DEFAULT_UNROLL_N 2
1327 #define CGEMM_DEFAULT_UNROLL_N 2
1328 #define ZGEMM_DEFAULT_UNROLL_N 4
1329 #define XGEMM_DEFAULT_UNROLL_N 1
1332 #define SGEMM_DEFAULT_P 768
1333 #define SGEMM_DEFAULT_R sgemm_r
1334 //#define SGEMM_DEFAULT_R 1024
1336 #define DGEMM_DEFAULT_P 512
1337 #define DGEMM_DEFAULT_R dgemm_r
1338 //#define DGEMM_DEFAULT_R 1024
1340 #define QGEMM_DEFAULT_P 504
1341 #define QGEMM_DEFAULT_R qgemm_r
1343 #define CGEMM_DEFAULT_P 768
1344 #define CGEMM_DEFAULT_R cgemm_r
1345 //#define CGEMM_DEFAULT_R 1024
1347 #define ZGEMM_DEFAULT_P 512
1348 #define ZGEMM_DEFAULT_R zgemm_r
1349 //#define ZGEMM_DEFAULT_R 1024
1351 #define XGEMM_DEFAULT_P 252
1352 #define XGEMM_DEFAULT_R xgemm_r
1354 #define SGEMM_DEFAULT_Q 384
1355 #define DGEMM_DEFAULT_Q 256
1356 #define QGEMM_DEFAULT_Q 128
1357 #define CGEMM_DEFAULT_Q 512
1358 #define ZGEMM_DEFAULT_Q 192
1359 #define XGEMM_DEFAULT_Q 128
1361 #define CGEMM3M_DEFAULT_UNROLL_N 8
1362 #define CGEMM3M_DEFAULT_UNROLL_M 4
1363 #define ZGEMM3M_DEFAULT_UNROLL_N 8
1364 #define ZGEMM3M_DEFAULT_UNROLL_M 2
1366 #define CGEMM3M_DEFAULT_P 448
1367 #define ZGEMM3M_DEFAULT_P 224
1368 #define XGEMM3M_DEFAULT_P 112
1369 #define CGEMM3M_DEFAULT_Q 224
1370 #define ZGEMM3M_DEFAULT_Q 224
1371 #define XGEMM3M_DEFAULT_Q 224
1372 #define CGEMM3M_DEFAULT_R 12288
1373 #define ZGEMM3M_DEFAULT_R 12288
1374 #define XGEMM3M_DEFAULT_R 12288
1378 #define GETRF_FACTOR 0.72
1387 #define GEMM_DEFAULT_OFFSET_A 0
1388 #define GEMM_DEFAULT_OFFSET_B 0
1389 #define GEMM_DEFAULT_ALIGN 0x03fffUL
1393 #define SWITCH_RATIO 4
1397 #define SGEMM_DEFAULT_UNROLL_M 4
1398 #define DGEMM_DEFAULT_UNROLL_M 2
1399 #define QGEMM_DEFAULT_UNROLL_M 2
1400 #define CGEMM_DEFAULT_UNROLL_M 2
1401 #define ZGEMM_DEFAULT_UNROLL_M 1
1402 #define XGEMM_DEFAULT_UNROLL_M 1
1404 #define SGEMM_DEFAULT_UNROLL_N 4
1405 #define DGEMM_DEFAULT_UNROLL_N 4
1406 #define QGEMM_DEFAULT_UNROLL_N 2
1407 #define CGEMM_DEFAULT_UNROLL_N 2
1408 #define ZGEMM_DEFAULT_UNROLL_N 2
1409 #define XGEMM_DEFAULT_UNROLL_N 1
1413 #define SGEMM_DEFAULT_UNROLL_M 16
1414 #define DGEMM_DEFAULT_UNROLL_M 4
1415 #define QGEMM_DEFAULT_UNROLL_M 2
1416 #define CGEMM_DEFAULT_UNROLL_M 8
1417 #define ZGEMM_DEFAULT_UNROLL_M 4
1418 #define XGEMM_DEFAULT_UNROLL_M 1
1420 #define SGEMM_DEFAULT_UNROLL_N 4
1421 #define DGEMM_DEFAULT_UNROLL_N 8
1422 #define QGEMM_DEFAULT_UNROLL_N 2
1423 #define CGEMM_DEFAULT_UNROLL_N 2
1424 #define ZGEMM_DEFAULT_UNROLL_N 2
1425 #define XGEMM_DEFAULT_UNROLL_N 1
1427 #define SGEMM_DEFAULT_UNROLL_MN 32
1428 #define DGEMM_DEFAULT_UNROLL_MN 32
1433 #define SGEMM_DEFAULT_P 512
1434 #define SGEMM_DEFAULT_R sgemm_r
1435 #define DGEMM_DEFAULT_P 512
1436 #define DGEMM_DEFAULT_R dgemm_r
1437 #define QGEMM_DEFAULT_P 504
1438 #define QGEMM_DEFAULT_R qgemm_r
1439 #define CGEMM_DEFAULT_P 128
1440 #define CGEMM_DEFAULT_R 1024
1441 #define ZGEMM_DEFAULT_P 512
1442 #define ZGEMM_DEFAULT_R zgemm_r
1443 #define XGEMM_DEFAULT_P 252
1444 #define XGEMM_DEFAULT_R xgemm_r
1445 #define SGEMM_DEFAULT_Q 256
1446 #define DGEMM_DEFAULT_Q 256
1447 #define QGEMM_DEFAULT_Q 128
1448 #define CGEMM_DEFAULT_Q 256
1449 #define ZGEMM_DEFAULT_Q 192
1450 #define XGEMM_DEFAULT_Q 128
1454 #define SGEMM_DEFAULT_P 768
1455 #define DGEMM_DEFAULT_P 512
1456 #define CGEMM_DEFAULT_P 384
1457 #define ZGEMM_DEFAULT_P 256
1460 #define SGEMM_DEFAULT_Q 320
1461 #define DGEMM_DEFAULT_Q 128
1463 #define SGEMM_DEFAULT_Q 384
1464 #define DGEMM_DEFAULT_Q 256
1466 #define CGEMM_DEFAULT_Q 192
1467 #define ZGEMM_DEFAULT_Q 128
1469 #define SGEMM_DEFAULT_R sgemm_r
1470 #define DGEMM_DEFAULT_R 13824
1471 #define CGEMM_DEFAULT_R cgemm_r
1472 #define ZGEMM_DEFAULT_R zgemm_r
1474 #define QGEMM_DEFAULT_Q 128
1475 #define QGEMM_DEFAULT_P 504
1476 #define QGEMM_DEFAULT_R qgemm_r
1477 #define XGEMM_DEFAULT_P 252
1478 #define XGEMM_DEFAULT_R xgemm_r
1479 #define XGEMM_DEFAULT_Q 128
1481 #define CGEMM3M_DEFAULT_UNROLL_N 8
1482 #define CGEMM3M_DEFAULT_UNROLL_M 4
1483 #define ZGEMM3M_DEFAULT_UNROLL_N 8
1484 #define ZGEMM3M_DEFAULT_UNROLL_M 2
1486 #define CGEMM3M_DEFAULT_P 448
1487 #define ZGEMM3M_DEFAULT_P 224
1488 #define XGEMM3M_DEFAULT_P 112
1489 #define CGEMM3M_DEFAULT_Q 224
1490 #define ZGEMM3M_DEFAULT_Q 224
1491 #define XGEMM3M_DEFAULT_Q 224
1492 #define CGEMM3M_DEFAULT_R 12288
1493 #define ZGEMM3M_DEFAULT_R 12288
1494 #define XGEMM3M_DEFAULT_R 12288
1508 #define GEMM_DEFAULT_OFFSET_A 64
1509 #define GEMM_DEFAULT_OFFSET_B 0
1510 #define GEMM_DEFAULT_ALIGN 0x0ffffUL
1515 #define SGEMM_DEFAULT_UNROLL_M 4
1516 #define DGEMM_DEFAULT_UNROLL_M 2
1517 #define QGEMM_DEFAULT_UNROLL_M 2
1518 #define CGEMM_DEFAULT_UNROLL_M 2
1519 #define ZGEMM_DEFAULT_UNROLL_M 1
1520 #define XGEMM_DEFAULT_UNROLL_M 1
1522 #define SGEMM_DEFAULT_UNROLL_M 8
1523 #define DGEMM_DEFAULT_UNROLL_M 4
1524 #define QGEMM_DEFAULT_UNROLL_M 2
1525 #define CGEMM_DEFAULT_UNROLL_M 4
1526 #define ZGEMM_DEFAULT_UNROLL_M 2
1527 #define XGEMM_DEFAULT_UNROLL_M 1
1530 #define SGEMM_DEFAULT_UNROLL_N 4
1531 #define DGEMM_DEFAULT_UNROLL_N 2
1532 #define QGEMM_DEFAULT_UNROLL_N 2
1533 #define CGEMM_DEFAULT_UNROLL_N 2
1534 #define ZGEMM_DEFAULT_UNROLL_N 1
1535 #define XGEMM_DEFAULT_UNROLL_N 1
1537 #define SGEMM_DEFAULT_P sgemm_p
1538 #define SGEMM_DEFAULT_R sgemm_r
1540 #define DGEMM_DEFAULT_P dgemm_p
1541 #define DGEMM_DEFAULT_R dgemm_r
1543 #define QGEMM_DEFAULT_P qgemm_p
1544 #define QGEMM_DEFAULT_R qgemm_r
1546 #define CGEMM_DEFAULT_P cgemm_p
1547 #define CGEMM_DEFAULT_R cgemm_r
1549 #define ZGEMM_DEFAULT_P zgemm_p
1550 #define ZGEMM_DEFAULT_R zgemm_r
1552 #define XGEMM_DEFAULT_P xgemm_p
1553 #define XGEMM_DEFAULT_R xgemm_r
1555 #define SGEMM_DEFAULT_Q 256
1556 #define DGEMM_DEFAULT_Q 256
1557 #define QGEMM_DEFAULT_Q 256
1558 #define CGEMM_DEFAULT_Q 256
1559 #define ZGEMM_DEFAULT_Q 256
1560 #define XGEMM_DEFAULT_Q 256
1570 #define GEMM_DEFAULT_OFFSET_A 0
1571 #define GEMM_DEFAULT_OFFSET_B 128
1572 #define GEMM_DEFAULT_ALIGN 0x03fffUL
1574 #define SGEMM_DEFAULT_UNROLL_M 8
1575 #define SGEMM_DEFAULT_UNROLL_N 8
1576 #define DGEMM_DEFAULT_UNROLL_M 8
1577 #define DGEMM_DEFAULT_UNROLL_N 8
1578 #define QGEMM_DEFAULT_UNROLL_M 8
1579 #define QGEMM_DEFAULT_UNROLL_N 8
1580 #define CGEMM_DEFAULT_UNROLL_M 4
1581 #define CGEMM_DEFAULT_UNROLL_N 4
1582 #define ZGEMM_DEFAULT_UNROLL_M 4
1583 #define ZGEMM_DEFAULT_UNROLL_N 4
1584 #define XGEMM_DEFAULT_UNROLL_M 4
1585 #define XGEMM_DEFAULT_UNROLL_N 4
1587 #define SGEMM_DEFAULT_P sgemm_p
1588 #define DGEMM_DEFAULT_P dgemm_p
1589 #define QGEMM_DEFAULT_P qgemm_p
1590 #define CGEMM_DEFAULT_P cgemm_p
1591 #define ZGEMM_DEFAULT_P zgemm_p
1592 #define XGEMM_DEFAULT_P xgemm_p
1594 #define SGEMM_DEFAULT_Q 1024
1595 #define DGEMM_DEFAULT_Q 1024
1596 #define QGEMM_DEFAULT_Q 1024
1597 #define CGEMM_DEFAULT_Q 1024
1598 #define ZGEMM_DEFAULT_Q 1024
1599 #define XGEMM_DEFAULT_Q 1024
1601 #define SGEMM_DEFAULT_R sgemm_r
1602 #define DGEMM_DEFAULT_R dgemm_r
1603 #define QGEMM_DEFAULT_R qgemm_r
1604 #define CGEMM_DEFAULT_R cgemm_r
1605 #define ZGEMM_DEFAULT_R zgemm_r
1606 #define XGEMM_DEFAULT_R xgemm_r
1610 #define GETRF_FACTOR 0.65
1614 #if defined(EV4) || defined(EV5) || defined(EV6)
1624 #define GEMM_DEFAULT_OFFSET_A 512
1625 #define GEMM_DEFAULT_OFFSET_B 512
1626 #define GEMM_DEFAULT_ALIGN 0x0ffffUL
1628 #define SGEMM_DEFAULT_UNROLL_M 4
1629 #define SGEMM_DEFAULT_UNROLL_N 4
1630 #define DGEMM_DEFAULT_UNROLL_M 4
1631 #define DGEMM_DEFAULT_UNROLL_N 4
1632 #define CGEMM_DEFAULT_UNROLL_M 2
1633 #define CGEMM_DEFAULT_UNROLL_N 2
1634 #define ZGEMM_DEFAULT_UNROLL_M 2
1635 #define ZGEMM_DEFAULT_UNROLL_N 2
1640 #define SGEMM_DEFAULT_P 32
1641 #define SGEMM_DEFAULT_Q 112
1642 #define SGEMM_DEFAULT_R 256
1644 #define DGEMM_DEFAULT_P 32
1645 #define DGEMM_DEFAULT_Q 56
1646 #define DGEMM_DEFAULT_R 256
1648 #define CGEMM_DEFAULT_P 32
1649 #define CGEMM_DEFAULT_Q 64
1650 #define CGEMM_DEFAULT_R 240
1652 #define ZGEMM_DEFAULT_P 32
1653 #define ZGEMM_DEFAULT_Q 32
1654 #define ZGEMM_DEFAULT_R 240
1658 #define SGEMM_DEFAULT_P 64
1659 #define SGEMM_DEFAULT_Q 256
1661 #define DGEMM_DEFAULT_P 64
1662 #define DGEMM_DEFAULT_Q 128
1664 #define CGEMM_DEFAULT_P 64
1665 #define CGEMM_DEFAULT_Q 128
1667 #define ZGEMM_DEFAULT_P 64
1668 #define ZGEMM_DEFAULT_Q 64
1672 #define SGEMM_DEFAULT_P 256
1673 #define SGEMM_DEFAULT_Q 512
1675 #define DGEMM_DEFAULT_P 256
1676 #define DGEMM_DEFAULT_Q 256
1678 #define CGEMM_DEFAULT_P 256
1679 #define CGEMM_DEFAULT_Q 256
1681 #define ZGEMM_DEFAULT_P 128
1682 #define ZGEMM_DEFAULT_Q 256
1692 #define GEMM_DEFAULT_OFFSET_A 0
1693 #define GEMM_DEFAULT_OFFSET_B 8192
1694 #define GEMM_DEFAULT_ALIGN 0x0ffffUL
1696 #define SGEMM_DEFAULT_UNROLL_M 16
1697 #define SGEMM_DEFAULT_UNROLL_N 4
1698 #define DGEMM_DEFAULT_UNROLL_M 4
1699 #define DGEMM_DEFAULT_UNROLL_N 4
1700 #define CGEMM_DEFAULT_UNROLL_M 8
1701 #define CGEMM_DEFAULT_UNROLL_N 2
1702 #define ZGEMM_DEFAULT_UNROLL_M 2
1703 #define ZGEMM_DEFAULT_UNROLL_N 2
1705 #define SGEMM_DEFAULT_P 128
1706 #define DGEMM_DEFAULT_P 128
1707 #define CGEMM_DEFAULT_P 128
1708 #define ZGEMM_DEFAULT_P 128
1710 #define SGEMM_DEFAULT_Q 512
1711 #define DGEMM_DEFAULT_Q 256
1712 #define CGEMM_DEFAULT_Q 256
1713 #define ZGEMM_DEFAULT_Q 128
1719 #define GEMM_DEFAULT_OFFSET_A 0
1720 #define GEMM_DEFAULT_OFFSET_B 1024
1721 #define GEMM_DEFAULT_ALIGN 0x0ffffUL
1723 #define SGEMM_DEFAULT_UNROLL_M 16
1724 #define SGEMM_DEFAULT_UNROLL_N 4
1725 #define DGEMM_DEFAULT_UNROLL_M 4
1726 #define DGEMM_DEFAULT_UNROLL_N 4
1727 #define CGEMM_DEFAULT_UNROLL_M 8
1728 #define CGEMM_DEFAULT_UNROLL_N 2
1729 #define ZGEMM_DEFAULT_UNROLL_M 2
1730 #define ZGEMM_DEFAULT_UNROLL_N 2
1732 #define SGEMM_DEFAULT_P 256
1733 #define DGEMM_DEFAULT_P 128
1734 #define CGEMM_DEFAULT_P 128
1735 #define ZGEMM_DEFAULT_P 64
1737 #define SGEMM_DEFAULT_Q 256
1738 #define DGEMM_DEFAULT_Q 256
1739 #define CGEMM_DEFAULT_Q 256
1740 #define ZGEMM_DEFAULT_Q 256
1750 #define GEMM_DEFAULT_OFFSET_A 2688
1751 #define GEMM_DEFAULT_OFFSET_B 3072
1752 #define GEMM_DEFAULT_ALIGN 0x03fffUL
1754 #define SGEMM_DEFAULT_UNROLL_M 16
1755 #define SGEMM_DEFAULT_UNROLL_N 4
1756 #define DGEMM_DEFAULT_UNROLL_M 4
1757 #define DGEMM_DEFAULT_UNROLL_N 4
1758 #define CGEMM_DEFAULT_UNROLL_M 8
1759 #define CGEMM_DEFAULT_UNROLL_N 2
1760 #define ZGEMM_DEFAULT_UNROLL_M 2
1761 #define ZGEMM_DEFAULT_UNROLL_N 2
1764 #if L2_SIZE == 1024976
1765 #define SGEMM_DEFAULT_P 320
1766 #define DGEMM_DEFAULT_P 256
1767 #define CGEMM_DEFAULT_P 256
1768 #define ZGEMM_DEFAULT_P 256
1770 #define SGEMM_DEFAULT_P 176
1771 #define DGEMM_DEFAULT_P 176
1772 #define CGEMM_DEFAULT_P 176
1773 #define ZGEMM_DEFAULT_P 176
1777 #define SGEMM_DEFAULT_Q 512
1778 #define DGEMM_DEFAULT_Q 256
1779 #define CGEMM_DEFAULT_Q 256
1780 #define ZGEMM_DEFAULT_Q 128
1791 #define GEMM_DEFAULT_OFFSET_A (32 * 0)
1792 #define GEMM_DEFAULT_OFFSET_B (32 * 0)
1793 #define GEMM_DEFAULT_ALIGN 0x0ffffUL
1795 #define SGEMM_DEFAULT_UNROLL_M 4
1796 #define SGEMM_DEFAULT_UNROLL_N 4
1797 #define DGEMM_DEFAULT_UNROLL_M 4
1798 #define DGEMM_DEFAULT_UNROLL_N 4
1799 #define CGEMM_DEFAULT_UNROLL_M 2
1800 #define CGEMM_DEFAULT_UNROLL_N 2
1801 #define ZGEMM_DEFAULT_UNROLL_M 2
1802 #define ZGEMM_DEFAULT_UNROLL_N 2
1804 #define SGEMM_DEFAULT_P 512
1805 #define DGEMM_DEFAULT_P 512
1806 #define CGEMM_DEFAULT_P 512
1807 #define ZGEMM_DEFAULT_P 512
1809 #define SGEMM_DEFAULT_Q 1024
1810 #define DGEMM_DEFAULT_Q 512
1811 #define CGEMM_DEFAULT_Q 512
1812 #define ZGEMM_DEFAULT_Q 256
1814 #define SGEMM_DEFAULT_R SGEMM_DEFAULT_P
1815 #define DGEMM_DEFAULT_R DGEMM_DEFAULT_P
1816 #define CGEMM_DEFAULT_R CGEMM_DEFAULT_P
1817 #define ZGEMM_DEFAULT_R ZGEMM_DEFAULT_P
1827 #define GEMM_DEFAULT_OFFSET_A (32 * 0)
1828 #define GEMM_DEFAULT_OFFSET_B (32 * 0)
1829 #define GEMM_DEFAULT_ALIGN 0x0ffffUL
1831 #define SGEMM_DEFAULT_UNROLL_M 8
1832 #define SGEMM_DEFAULT_UNROLL_N 4
1833 #define DGEMM_DEFAULT_UNROLL_M 8
1834 #define DGEMM_DEFAULT_UNROLL_N 4
1835 #define CGEMM_DEFAULT_UNROLL_M 4
1836 #define CGEMM_DEFAULT_UNROLL_N 2
1837 #define ZGEMM_DEFAULT_UNROLL_M 4
1838 #define ZGEMM_DEFAULT_UNROLL_N 2
1840 #define SGEMM_DEFAULT_P 128
1841 #define DGEMM_DEFAULT_P 128
1842 #define CGEMM_DEFAULT_P 128
1843 #define ZGEMM_DEFAULT_P 128
1845 #define SGEMM_DEFAULT_Q 4096
1846 #define DGEMM_DEFAULT_Q 3072
1847 #define CGEMM_DEFAULT_Q 2048
1848 #define ZGEMM_DEFAULT_Q 1024
1850 #define SGEMM_DEFAULT_Q 512
1851 #define DGEMM_DEFAULT_Q 256
1852 #define CGEMM_DEFAULT_Q 256
1853 #define ZGEMM_DEFAULT_Q 128
1861 #if defined(POWER3) || defined(POWER4) || defined(POWER5)
1862 #define GEMM_DEFAULT_OFFSET_A 0
1863 #define GEMM_DEFAULT_OFFSET_B 2048
1864 #define GEMM_DEFAULT_ALIGN 0x0ffffUL
1866 #define SGEMM_DEFAULT_UNROLL_M 4
1867 #define SGEMM_DEFAULT_UNROLL_N 4
1868 #define DGEMM_DEFAULT_UNROLL_M 4
1869 #define DGEMM_DEFAULT_UNROLL_N 4
1870 #define CGEMM_DEFAULT_UNROLL_M 2
1871 #define CGEMM_DEFAULT_UNROLL_N 2
1872 #define ZGEMM_DEFAULT_UNROLL_M 2
1873 #define ZGEMM_DEFAULT_UNROLL_N 2
1880 #define SGEMM_DEFAULT_P 256
1881 #define SGEMM_DEFAULT_Q 432
1882 #define SGEMM_DEFAULT_R 1012
1884 #define DGEMM_DEFAULT_P 256
1885 #define DGEMM_DEFAULT_Q 216
1886 #define DGEMM_DEFAULT_R 1012
1888 #define ZGEMM_DEFAULT_P 256
1889 #define ZGEMM_DEFAULT_Q 104
1890 #define ZGEMM_DEFAULT_R 1012
1894 #ifdef ALLOC_HUGETLB
1895 #define SGEMM_DEFAULT_P 184
1896 #define DGEMM_DEFAULT_P 184
1897 #define CGEMM_DEFAULT_P 184
1898 #define ZGEMM_DEFAULT_P 184
1900 #define SGEMM_DEFAULT_P 144
1901 #define DGEMM_DEFAULT_P 144
1902 #define CGEMM_DEFAULT_P 144
1903 #define ZGEMM_DEFAULT_P 144
1908 #ifdef ALLOC_HUGETLB
1909 #define SGEMM_DEFAULT_P 512
1910 #define DGEMM_DEFAULT_P 256
1911 #define CGEMM_DEFAULT_P 256
1912 #define ZGEMM_DEFAULT_P 128
1914 #define SGEMM_DEFAULT_P 320
1915 #define DGEMM_DEFAULT_P 160
1916 #define CGEMM_DEFAULT_P 160
1917 #define ZGEMM_DEFAULT_P 80
1920 #define SGEMM_DEFAULT_Q 256
1921 #define CGEMM_DEFAULT_Q 256
1922 #define DGEMM_DEFAULT_Q 256
1923 #define ZGEMM_DEFAULT_Q 256
1935 #define GEMM_DEFAULT_OFFSET_A 384
1936 #define GEMM_DEFAULT_OFFSET_B 1024
1937 #define GEMM_DEFAULT_ALIGN 0x03fffUL
1939 #define SGEMM_DEFAULT_UNROLL_M 4
1940 #define SGEMM_DEFAULT_UNROLL_N 4
1941 #define DGEMM_DEFAULT_UNROLL_M 4
1942 #define DGEMM_DEFAULT_UNROLL_N 4
1943 #define CGEMM_DEFAULT_UNROLL_M 2
1944 #define CGEMM_DEFAULT_UNROLL_N 4
1945 #define ZGEMM_DEFAULT_UNROLL_M 2
1946 #define ZGEMM_DEFAULT_UNROLL_N 4
1948 #define SGEMM_DEFAULT_P 992
1949 #define DGEMM_DEFAULT_P 480
1950 #define CGEMM_DEFAULT_P 488
1951 #define ZGEMM_DEFAULT_P 248
1953 #define SGEMM_DEFAULT_Q 504
1954 #define DGEMM_DEFAULT_Q 504
1955 #define CGEMM_DEFAULT_Q 400
1956 #define ZGEMM_DEFAULT_Q 400
1967 #define GEMM_DEFAULT_OFFSET_A 384
1968 #define GEMM_DEFAULT_OFFSET_B 1024
1969 #define GEMM_DEFAULT_ALIGN 0x03fffUL
1971 #define SGEMM_DEFAULT_UNROLL_M 2
1972 #define SGEMM_DEFAULT_UNROLL_N 2
1973 #define DGEMM_DEFAULT_UNROLL_M 16
1974 #define DGEMM_DEFAULT_UNROLL_N 4
1975 #define CGEMM_DEFAULT_UNROLL_M 2
1976 #define CGEMM_DEFAULT_UNROLL_N 2
1977 #define ZGEMM_DEFAULT_UNROLL_M 8
1978 #define ZGEMM_DEFAULT_UNROLL_N 2
1980 #define SGEMM_DEFAULT_P 992
1981 #define DGEMM_DEFAULT_P 480
1982 #define CGEMM_DEFAULT_P 488
1983 #define ZGEMM_DEFAULT_P 240
1985 #define SGEMM_DEFAULT_Q 504
1986 #define DGEMM_DEFAULT_Q 720
1987 #define CGEMM_DEFAULT_Q 400
1988 #define ZGEMM_DEFAULT_Q 360
1990 #define DGEMM_DEFAULT_R 14400
1991 #define ZGEMM_DEFAULT_R 7200
1998 #if defined(SPARC) && defined(V7)
2003 #define GEMM_DEFAULT_OFFSET_A 0
2004 #define GEMM_DEFAULT_OFFSET_B 2048
2005 #define GEMM_DEFAULT_ALIGN 0x03fffUL
2007 #define SGEMM_DEFAULT_UNROLL_M 2
2008 #define SGEMM_DEFAULT_UNROLL_N 8
2009 #define DGEMM_DEFAULT_UNROLL_M 2
2010 #define DGEMM_DEFAULT_UNROLL_N 8
2011 #define CGEMM_DEFAULT_UNROLL_M 1
2012 #define CGEMM_DEFAULT_UNROLL_N 4
2013 #define ZGEMM_DEFAULT_UNROLL_M 1
2014 #define ZGEMM_DEFAULT_UNROLL_N 4
2016 #define SGEMM_DEFAULT_P 256
2017 #define DGEMM_DEFAULT_P 256
2018 #define CGEMM_DEFAULT_P 256
2019 #define ZGEMM_DEFAULT_P 256
2021 #define SGEMM_DEFAULT_Q 512
2022 #define DGEMM_DEFAULT_Q 256
2023 #define CGEMM_DEFAULT_Q 256
2024 #define ZGEMM_DEFAULT_Q 128
2027 #define GEMM_THREAD gemm_thread_mn
2030 #if (defined(SPARC) && defined(V9)) || defined(__sparc_v9__)
2035 #define GEMM_DEFAULT_OFFSET_A 0
2036 #define GEMM_DEFAULT_OFFSET_B 2048
2037 #define GEMM_DEFAULT_ALIGN 0x03fffUL
2039 #define SGEMM_DEFAULT_UNROLL_M 4
2040 #define SGEMM_DEFAULT_UNROLL_N 4
2041 #define DGEMM_DEFAULT_UNROLL_M 4
2042 #define DGEMM_DEFAULT_UNROLL_N 4
2043 #define CGEMM_DEFAULT_UNROLL_M 2
2044 #define CGEMM_DEFAULT_UNROLL_N 2
2045 #define ZGEMM_DEFAULT_UNROLL_M 2
2046 #define ZGEMM_DEFAULT_UNROLL_N 2
2048 #define SGEMM_DEFAULT_P 512
2049 #define DGEMM_DEFAULT_P 512
2050 #define CGEMM_DEFAULT_P 512
2051 #define ZGEMM_DEFAULT_P 512
2053 #define SGEMM_DEFAULT_Q 1024
2054 #define DGEMM_DEFAULT_Q 512
2055 #define CGEMM_DEFAULT_Q 512
2056 #define ZGEMM_DEFAULT_Q 256
2066 #define GEMM_DEFAULT_OFFSET_A 0
2067 #define GEMM_DEFAULT_OFFSET_B 0
2068 #define GEMM_DEFAULT_ALIGN 0x03fffUL
2070 #define SGEMM_DEFAULT_UNROLL_M 2
2071 #define SGEMM_DEFAULT_UNROLL_N 8
2072 #define DGEMM_DEFAULT_UNROLL_M 2
2073 #define DGEMM_DEFAULT_UNROLL_N 8
2074 #define CGEMM_DEFAULT_UNROLL_M 1
2075 #define CGEMM_DEFAULT_UNROLL_N 4
2076 #define ZGEMM_DEFAULT_UNROLL_M 1
2077 #define ZGEMM_DEFAULT_UNROLL_N 4
2079 #define SGEMM_DEFAULT_P 108
2080 #define DGEMM_DEFAULT_P 112
2081 #define CGEMM_DEFAULT_P 108
2082 #define ZGEMM_DEFAULT_P 112
2084 #define SGEMM_DEFAULT_Q 288
2085 #define DGEMM_DEFAULT_Q 144
2086 #define CGEMM_DEFAULT_Q 144
2087 #define ZGEMM_DEFAULT_Q 72
2089 #define SGEMM_DEFAULT_R 2000
2090 #define DGEMM_DEFAULT_R 2000
2091 #define CGEMM_DEFAULT_R 2000
2092 #define ZGEMM_DEFAULT_R 2000
2098 ////Copy from SICORTEX
2102 #define GEMM_DEFAULT_OFFSET_A 0
2103 #define GEMM_DEFAULT_OFFSET_B 0
2104 #define GEMM_DEFAULT_ALIGN 0x03fffUL
2106 #define SGEMM_DEFAULT_UNROLL_M 8
2107 #define SGEMM_DEFAULT_UNROLL_N 4
2109 #define DGEMM_DEFAULT_UNROLL_M 4
2110 #define DGEMM_DEFAULT_UNROLL_N 4
2112 #define CGEMM_DEFAULT_UNROLL_M 4
2113 #define CGEMM_DEFAULT_UNROLL_N 2
2115 #define ZGEMM_DEFAULT_UNROLL_M 2
2116 #define ZGEMM_DEFAULT_UNROLL_N 2
2118 #define SGEMM_DEFAULT_P 64
2119 #define DGEMM_DEFAULT_P 44
2120 #define CGEMM_DEFAULT_P 64
2121 #define ZGEMM_DEFAULT_P 32
2123 #define SGEMM_DEFAULT_Q 192
2124 #define DGEMM_DEFAULT_Q 92
2125 #define CGEMM_DEFAULT_Q 128
2126 #define ZGEMM_DEFAULT_Q 80
2128 #define SGEMM_DEFAULT_R 640
2129 #define DGEMM_DEFAULT_R dgemm_r
2130 #define CGEMM_DEFAULT_R 640
2131 #define ZGEMM_DEFAULT_R 640
2133 #define GEMM_OFFSET_A1 0x10000
2134 #define GEMM_OFFSET_B1 0x100000
2143 #define GEMM_DEFAULT_OFFSET_A 0
2144 #define GEMM_DEFAULT_OFFSET_B 0
2145 #define GEMM_DEFAULT_ALIGN 0x03fffUL
2147 #define SGEMM_DEFAULT_UNROLL_M 2
2148 #define SGEMM_DEFAULT_UNROLL_N 2
2150 #define DGEMM_DEFAULT_UNROLL_M 2
2151 #define DGEMM_DEFAULT_UNROLL_N 2
2153 #define CGEMM_DEFAULT_UNROLL_M 2
2154 #define CGEMM_DEFAULT_UNROLL_N 2
2156 #define ZGEMM_DEFAULT_UNROLL_M 2
2157 #define ZGEMM_DEFAULT_UNROLL_N 2
2159 #define SGEMM_DEFAULT_P 64
2160 #define DGEMM_DEFAULT_P 24
2161 #define CGEMM_DEFAULT_P 24
2162 #define ZGEMM_DEFAULT_P 20
2164 #define SGEMM_DEFAULT_Q 192
2165 #define DGEMM_DEFAULT_Q 128
2166 #define CGEMM_DEFAULT_Q 128
2167 #define ZGEMM_DEFAULT_Q 64
2169 #define SGEMM_DEFAULT_R 512
2170 #define DGEMM_DEFAULT_R 512
2171 #define CGEMM_DEFAULT_R 512
2172 #define ZGEMM_DEFAULT_R 512
2174 #define GEMM_OFFSET_A1 0x10000
2175 #define GEMM_OFFSET_B1 0x100000
2185 #define GEMM_DEFAULT_OFFSET_A 0
2186 #define GEMM_DEFAULT_OFFSET_B 0
2187 #define GEMM_DEFAULT_ALIGN 0x03fffUL
2189 #define SGEMM_DEFAULT_UNROLL_M 4
2190 #define SGEMM_DEFAULT_UNROLL_N 4
2192 #define DGEMM_DEFAULT_UNROLL_M 4
2193 #define DGEMM_DEFAULT_UNROLL_N 4
2195 #define CGEMM_DEFAULT_UNROLL_M 2
2196 #define CGEMM_DEFAULT_UNROLL_N 2
2198 #define ZGEMM_DEFAULT_UNROLL_M 2
2199 #define ZGEMM_DEFAULT_UNROLL_N 2
2201 #define SGEMM_DEFAULT_P 128
2202 #define DGEMM_DEFAULT_P 128
2203 #define CGEMM_DEFAULT_P 96
2204 #define ZGEMM_DEFAULT_P 64
2206 #define SGEMM_DEFAULT_Q 240
2207 #define DGEMM_DEFAULT_Q 120
2208 #define CGEMM_DEFAULT_Q 120
2209 #define ZGEMM_DEFAULT_Q 120
2211 #define SGEMM_DEFAULT_R 12288
2212 #define DGEMM_DEFAULT_R 8192
2213 #define CGEMM_DEFAULT_R 4096
2214 #define ZGEMM_DEFAULT_R 4096
2226 #define GEMM_DEFAULT_OFFSET_A 0
2227 #define GEMM_DEFAULT_OFFSET_B 0
2228 #define GEMM_DEFAULT_ALIGN 0x03fffUL
2230 #define SGEMM_DEFAULT_UNROLL_M 4
2231 #define SGEMM_DEFAULT_UNROLL_N 2
2233 #define DGEMM_DEFAULT_UNROLL_M 4
2234 #define DGEMM_DEFAULT_UNROLL_N 2
2236 #define CGEMM_DEFAULT_UNROLL_M 2
2237 #define CGEMM_DEFAULT_UNROLL_N 2
2239 #define ZGEMM_DEFAULT_UNROLL_M 2
2240 #define ZGEMM_DEFAULT_UNROLL_N 2
2242 #define SGEMM_DEFAULT_P 128
2243 #define DGEMM_DEFAULT_P 128
2244 #define CGEMM_DEFAULT_P 96
2245 #define ZGEMM_DEFAULT_P 64
2247 #define SGEMM_DEFAULT_Q 240
2248 #define DGEMM_DEFAULT_Q 120
2249 #define CGEMM_DEFAULT_Q 120
2250 #define ZGEMM_DEFAULT_Q 120
2252 #define SGEMM_DEFAULT_R 12288
2253 #define DGEMM_DEFAULT_R 8192
2254 #define CGEMM_DEFAULT_R 4096
2255 #define ZGEMM_DEFAULT_R 4096
2262 #if defined(CORTEXA57)
2266 #define GEMM_DEFAULT_OFFSET_A 0
2267 #define GEMM_DEFAULT_OFFSET_B 0
2268 #define GEMM_DEFAULT_ALIGN 0x03fffUL
2270 #define SGEMM_DEFAULT_UNROLL_M 4
2271 #define SGEMM_DEFAULT_UNROLL_N 4
2273 #define DGEMM_DEFAULT_UNROLL_M 4
2274 #define DGEMM_DEFAULT_UNROLL_N 4
2276 #define CGEMM_DEFAULT_UNROLL_M 4
2277 #define CGEMM_DEFAULT_UNROLL_N 4
2279 #define ZGEMM_DEFAULT_UNROLL_M 4
2280 #define ZGEMM_DEFAULT_UNROLL_N 4
2282 #define SGEMM_DEFAULT_P 512
2283 #define DGEMM_DEFAULT_P 256
2284 #define CGEMM_DEFAULT_P 256
2285 #define ZGEMM_DEFAULT_P 128
2287 #define SGEMM_DEFAULT_Q 1024
2288 #define DGEMM_DEFAULT_Q 512
2289 #define CGEMM_DEFAULT_Q 512
2290 #define ZGEMM_DEFAULT_Q 512
2292 #define SGEMM_DEFAULT_R 4096
2293 #define DGEMM_DEFAULT_R 4096
2294 #define CGEMM_DEFAULT_R 4096
2295 #define ZGEMM_DEFAULT_R 2048
2305 #define GEMM_DEFAULT_OFFSET_A 0
2306 #define GEMM_DEFAULT_OFFSET_B 0
2307 #define GEMM_DEFAULT_ALIGN 0x03fffUL
2309 #define SGEMM_DEFAULT_UNROLL_M 4
2310 #define SGEMM_DEFAULT_UNROLL_N 4
2312 #define DGEMM_DEFAULT_UNROLL_M 2
2313 #define DGEMM_DEFAULT_UNROLL_N 2
2315 #define CGEMM_DEFAULT_UNROLL_M 2
2316 #define CGEMM_DEFAULT_UNROLL_N 2
2318 #define ZGEMM_DEFAULT_UNROLL_M 2
2319 #define ZGEMM_DEFAULT_UNROLL_N 2
2321 #define SGEMM_DEFAULT_P 128
2322 #define DGEMM_DEFAULT_P 128
2323 #define CGEMM_DEFAULT_P 96
2324 #define ZGEMM_DEFAULT_P 64
2326 #define SGEMM_DEFAULT_Q 240
2327 #define DGEMM_DEFAULT_Q 120
2328 #define CGEMM_DEFAULT_Q 120
2329 #define ZGEMM_DEFAULT_Q 120
2331 #define SGEMM_DEFAULT_R 12288
2332 #define DGEMM_DEFAULT_R 8192
2333 #define CGEMM_DEFAULT_R 4096
2334 #define ZGEMM_DEFAULT_R 4096
2345 #define GEMM_DEFAULT_OFFSET_A 0
2346 #define GEMM_DEFAULT_OFFSET_B 0
2347 #define GEMM_DEFAULT_ALIGN 0x03fffUL
2349 #define SGEMM_DEFAULT_UNROLL_M 2
2350 #define SGEMM_DEFAULT_UNROLL_N 2
2352 #define DGEMM_DEFAULT_UNROLL_M 2
2353 #define DGEMM_DEFAULT_UNROLL_N 2
2355 #define CGEMM_DEFAULT_UNROLL_M 2
2356 #define CGEMM_DEFAULT_UNROLL_N 2
2358 #define ZGEMM_DEFAULT_UNROLL_M 2
2359 #define ZGEMM_DEFAULT_UNROLL_N 2
2361 #define SGEMM_DEFAULT_P 128
2362 #define DGEMM_DEFAULT_P 128
2363 #define CGEMM_DEFAULT_P 96
2364 #define ZGEMM_DEFAULT_P 64
2366 #define SGEMM_DEFAULT_Q 240
2367 #define DGEMM_DEFAULT_Q 120
2368 #define CGEMM_DEFAULT_Q 120
2369 #define ZGEMM_DEFAULT_Q 120
2371 #define SGEMM_DEFAULT_R 12288
2372 #define DGEMM_DEFAULT_R 8192
2373 #define CGEMM_DEFAULT_R 4096
2374 #define ZGEMM_DEFAULT_R 4096
2386 #define GEMM_DEFAULT_OFFSET_A 0
2387 #define GEMM_DEFAULT_OFFSET_B 0
2388 #define GEMM_DEFAULT_ALIGN 0x03fffUL
2390 #define SGEMM_DEFAULT_UNROLL_M 4
2391 #define SGEMM_DEFAULT_UNROLL_N 4
2393 #define DGEMM_DEFAULT_UNROLL_M 4
2394 #define DGEMM_DEFAULT_UNROLL_N 4
2396 #define CGEMM_DEFAULT_UNROLL_M 2
2397 #define CGEMM_DEFAULT_UNROLL_N 2
2399 #define ZGEMM_DEFAULT_UNROLL_M 2
2400 #define ZGEMM_DEFAULT_UNROLL_N 2
2402 #define SGEMM_DEFAULT_P 128
2403 #define DGEMM_DEFAULT_P 128
2404 #define CGEMM_DEFAULT_P 96
2405 #define ZGEMM_DEFAULT_P 64
2407 #define SGEMM_DEFAULT_Q 240
2408 #define DGEMM_DEFAULT_Q 120
2409 #define CGEMM_DEFAULT_Q 120
2410 #define ZGEMM_DEFAULT_Q 120
2412 #define SGEMM_DEFAULT_R 12288
2413 #define DGEMM_DEFAULT_R 8192
2414 #define CGEMM_DEFAULT_R 4096
2415 #define ZGEMM_DEFAULT_R 4096
2427 #define GEMM_DEFAULT_OFFSET_A 0
2428 #define GEMM_DEFAULT_OFFSET_B 0
2429 #define GEMM_DEFAULT_ALIGN 0x03fffUL
2431 #define SGEMM_DEFAULT_UNROLL_M 4
2432 #define SGEMM_DEFAULT_UNROLL_N 4
2434 #define DGEMM_DEFAULT_UNROLL_M 4
2435 #define DGEMM_DEFAULT_UNROLL_N 4
2437 #define CGEMM_DEFAULT_UNROLL_M 2
2438 #define CGEMM_DEFAULT_UNROLL_N 2
2440 #define ZGEMM_DEFAULT_UNROLL_M 2
2441 #define ZGEMM_DEFAULT_UNROLL_N 2
2443 #define SGEMM_DEFAULT_P 128
2444 #define DGEMM_DEFAULT_P 128
2445 #define CGEMM_DEFAULT_P 96
2446 #define ZGEMM_DEFAULT_P 64
2448 #define SGEMM_DEFAULT_Q 240
2449 #define DGEMM_DEFAULT_Q 120
2450 #define CGEMM_DEFAULT_Q 120
2451 #define ZGEMM_DEFAULT_Q 120
2453 #define SGEMM_DEFAULT_R 12288
2454 #define DGEMM_DEFAULT_R 8192
2455 #define CGEMM_DEFAULT_R 4096
2456 #define ZGEMM_DEFAULT_R 4096
2470 #define GEMM_DEFAULT_OFFSET_A 0
2471 #define GEMM_DEFAULT_OFFSET_B 0
2472 #define GEMM_DEFAULT_ALIGN 0x0ffffUL
2474 #define SGEMM_DEFAULT_UNROLL_N 2
2475 #define DGEMM_DEFAULT_UNROLL_N 2
2476 #define QGEMM_DEFAULT_UNROLL_N 2
2477 #define CGEMM_DEFAULT_UNROLL_N 2
2478 #define ZGEMM_DEFAULT_UNROLL_N 2
2479 #define XGEMM_DEFAULT_UNROLL_N 1
2482 #define SGEMM_DEFAULT_UNROLL_M 2
2483 #define DGEMM_DEFAULT_UNROLL_M 2
2484 #define QGEMM_DEFAULT_UNROLL_M 2
2485 #define CGEMM_DEFAULT_UNROLL_M 2
2486 #define ZGEMM_DEFAULT_UNROLL_M 2
2487 #define XGEMM_DEFAULT_UNROLL_M 1
2489 #define SGEMM_DEFAULT_UNROLL_M 2
2490 #define DGEMM_DEFAULT_UNROLL_M 2
2491 #define QGEMM_DEFAULT_UNROLL_M 2
2492 #define CGEMM_DEFAULT_UNROLL_M 2
2493 #define ZGEMM_DEFAULT_UNROLL_M 2
2494 #define XGEMM_DEFAULT_UNROLL_M 1
2497 #define SGEMM_DEFAULT_P sgemm_p
2498 #define DGEMM_DEFAULT_P dgemm_p
2499 #define QGEMM_DEFAULT_P qgemm_p
2500 #define CGEMM_DEFAULT_P cgemm_p
2501 #define ZGEMM_DEFAULT_P zgemm_p
2502 #define XGEMM_DEFAULT_P xgemm_p
2504 #define SGEMM_DEFAULT_R sgemm_r
2505 #define DGEMM_DEFAULT_R dgemm_r
2506 #define QGEMM_DEFAULT_R qgemm_r
2507 #define CGEMM_DEFAULT_R cgemm_r
2508 #define ZGEMM_DEFAULT_R zgemm_r
2509 #define XGEMM_DEFAULT_R xgemm_r
2511 #define SGEMM_DEFAULT_Q 128
2512 #define DGEMM_DEFAULT_Q 128
2513 #define QGEMM_DEFAULT_Q 128
2514 #define CGEMM_DEFAULT_Q 128
2515 #define ZGEMM_DEFAULT_Q 128
2516 #define XGEMM_DEFAULT_Q 128
2522 #ifndef QGEMM_DEFAULT_UNROLL_M
2523 #define QGEMM_DEFAULT_UNROLL_M 2
2526 #ifndef QGEMM_DEFAULT_UNROLL_N
2527 #define QGEMM_DEFAULT_UNROLL_N 2
2530 #ifndef XGEMM_DEFAULT_UNROLL_M
2531 #define XGEMM_DEFAULT_UNROLL_M 2
2534 #ifndef XGEMM_DEFAULT_UNROLL_N
2535 #define XGEMM_DEFAULT_UNROLL_N 2
2539 #define SHUFPD_0 shufps $0x44,
2540 #define SHUFPD_1 shufps $0x4e,
2541 #define SHUFPD_2 shufps $0xe4,
2542 #define SHUFPD_3 shufps $0xee,
2546 #define SHUFPD_0 shufpd $0,
2550 #define SHUFPD_1 shufpd $1,
2554 #define SHUFPD_2 shufpd $2,
2558 #define SHUFPD_3 shufpd $3,
2562 #define SHUFPS_39 shufps $0x39,