1 /*****************************************************************************
2 Copyright (c) 2011-2014, The OpenBLAS Project
5 Redistribution and use in source and binary forms, with or without
6 modification, are permitted provided that the following conditions are
9 1. Redistributions of source code must retain the above copyright
10 notice, this list of conditions and the following disclaimer.
12 2. Redistributions in binary form must reproduce the above copyright
13 notice, this list of conditions and the following disclaimer in
14 the documentation and/or other materials provided with the
16 3. Neither the name of the OpenBLAS project nor the names of
17 its contributors may be used to endorse or promote products
18 derived from this software without specific prior written
21 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
25 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
27 SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28 CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
29 OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
30 USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 **********************************************************************************/
34 /*********************************************************************/
35 /* Copyright 2009, 2010 The University of Texas at Austin. */
36 /* All rights reserved. */
38 /* Redistribution and use in source and binary forms, with or */
39 /* without modification, are permitted provided that the following */
40 /* conditions are met: */
42 /* 1. Redistributions of source code must retain the above */
43 /* copyright notice, this list of conditions and the following */
46 /* 2. Redistributions in binary form must reproduce the above */
47 /* copyright notice, this list of conditions and the following */
48 /* disclaimer in the documentation and/or other materials */
49 /* provided with the distribution. */
51 /* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
52 /* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
53 /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
54 /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
55 /* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
56 /* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
57 /* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
58 /* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
59 /* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
60 /* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
61 /* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
62 /* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
63 /* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
64 /* POSSIBILITY OF SUCH DAMAGE. */
66 /* The views and conclusions contained in the software and */
67 /* documentation are those of the authors and should not be */
68 /* interpreted as representing official policies, either expressed */
69 /* or implied, of The University of Texas at Austin. */
70 /*********************************************************************/
75 #define SBGEMM_DEFAULT_UNROLL_N 4
76 #define SBGEMM_DEFAULT_UNROLL_M 8
77 #define SBGEMM_DEFAULT_UNROLL_MN 32
78 #define SBGEMM_DEFAULT_P 256
79 #define SBGEMM_DEFAULT_R 256
80 #define SBGEMM_DEFAULT_Q 256
86 #define GEMM_DEFAULT_OFFSET_A 64
87 #define GEMM_DEFAULT_OFFSET_B 256
88 #define GEMM_DEFAULT_ALIGN 0x01ffffUL
90 #define SGEMM_DEFAULT_UNROLL_N 4
91 #define DGEMM_DEFAULT_UNROLL_N 4
92 #define QGEMM_DEFAULT_UNROLL_N 2
93 #define CGEMM_DEFAULT_UNROLL_N 2
94 #define ZGEMM_DEFAULT_UNROLL_N 2
95 #define XGEMM_DEFAULT_UNROLL_N 1
98 #define SGEMM_DEFAULT_UNROLL_M 4
99 #define DGEMM_DEFAULT_UNROLL_M 2
100 #define QGEMM_DEFAULT_UNROLL_M 2
101 #define CGEMM_DEFAULT_UNROLL_M 2
102 #define ZGEMM_DEFAULT_UNROLL_M 1
103 #define XGEMM_DEFAULT_UNROLL_M 1
105 #define SGEMM_DEFAULT_UNROLL_M 8
106 #define DGEMM_DEFAULT_UNROLL_M 4
107 #define QGEMM_DEFAULT_UNROLL_M 2
108 #define CGEMM_DEFAULT_UNROLL_M 4
109 #define ZGEMM_DEFAULT_UNROLL_M 2
110 #define XGEMM_DEFAULT_UNROLL_M 1
113 #define SGEMM_DEFAULT_P sgemm_p
114 #define DGEMM_DEFAULT_P dgemm_p
115 #define QGEMM_DEFAULT_P qgemm_p
116 #define CGEMM_DEFAULT_P cgemm_p
117 #define ZGEMM_DEFAULT_P zgemm_p
118 #define XGEMM_DEFAULT_P xgemm_p
120 #define SGEMM_DEFAULT_R sgemm_r
121 #define DGEMM_DEFAULT_R dgemm_r
122 #define QGEMM_DEFAULT_R qgemm_r
123 #define CGEMM_DEFAULT_R cgemm_r
124 #define ZGEMM_DEFAULT_R zgemm_r
125 #define XGEMM_DEFAULT_R xgemm_r
129 #define SGEMM_DEFAULT_Q 248
130 #define DGEMM_DEFAULT_Q 248
131 #define QGEMM_DEFAULT_Q 248
132 #define CGEMM_DEFAULT_Q 248
133 #define ZGEMM_DEFAULT_Q 248
134 #define XGEMM_DEFAULT_Q 248
138 #define SGEMM_DEFAULT_Q 240
139 #define DGEMM_DEFAULT_Q 240
140 #define QGEMM_DEFAULT_Q 240
141 #define CGEMM_DEFAULT_Q 240
142 #define ZGEMM_DEFAULT_Q 240
143 #define XGEMM_DEFAULT_Q 240
149 #define HAVE_EXCLUSIVE_CACHE
153 #if defined(BARCELONA) || defined(SHANGHAI) || defined(BOBCAT)
158 #define GEMM_DEFAULT_OFFSET_A 64
159 #define GEMM_DEFAULT_OFFSET_B 832
160 #define GEMM_DEFAULT_ALIGN 0x0fffUL
162 #define SGEMM_DEFAULT_UNROLL_N 4
163 #define DGEMM_DEFAULT_UNROLL_N 4
164 #define QGEMM_DEFAULT_UNROLL_N 2
165 #define CGEMM_DEFAULT_UNROLL_N 2
166 #define ZGEMM_DEFAULT_UNROLL_N 2
167 #define XGEMM_DEFAULT_UNROLL_N 1
170 #define SGEMM_DEFAULT_UNROLL_M 4
171 #define DGEMM_DEFAULT_UNROLL_M 2
172 #define QGEMM_DEFAULT_UNROLL_M 2
173 #define CGEMM_DEFAULT_UNROLL_M 2
174 #define ZGEMM_DEFAULT_UNROLL_M 1
175 #define XGEMM_DEFAULT_UNROLL_M 1
177 #define SGEMM_DEFAULT_UNROLL_M 8
178 #define DGEMM_DEFAULT_UNROLL_M 4
179 #define QGEMM_DEFAULT_UNROLL_M 2
180 #define CGEMM_DEFAULT_UNROLL_M 4
181 #define ZGEMM_DEFAULT_UNROLL_M 2
182 #define XGEMM_DEFAULT_UNROLL_M 1
186 #define SGEMM_DEFAULT_P 496
187 #define DGEMM_DEFAULT_P 248
188 #define QGEMM_DEFAULT_P 124
189 #define CGEMM_DEFAULT_P 248
190 #define ZGEMM_DEFAULT_P 124
191 #define XGEMM_DEFAULT_P 62
193 #define SGEMM_DEFAULT_Q 248
194 #define DGEMM_DEFAULT_Q 248
195 #define QGEMM_DEFAULT_Q 248
196 #define CGEMM_DEFAULT_Q 248
197 #define ZGEMM_DEFAULT_Q 248
198 #define XGEMM_DEFAULT_Q 248
202 #define SGEMM_DEFAULT_P 448
203 #define DGEMM_DEFAULT_P 224
204 #define QGEMM_DEFAULT_P 112
205 #define CGEMM_DEFAULT_P 224
206 #define ZGEMM_DEFAULT_P 112
207 #define XGEMM_DEFAULT_P 56
209 #define SGEMM_DEFAULT_Q 224
210 #define DGEMM_DEFAULT_Q 224
211 #define QGEMM_DEFAULT_Q 224
212 #define CGEMM_DEFAULT_Q 224
213 #define ZGEMM_DEFAULT_Q 224
214 #define XGEMM_DEFAULT_Q 224
218 #define SGEMM_DEFAULT_R sgemm_r
219 #define QGEMM_DEFAULT_R qgemm_r
220 #define DGEMM_DEFAULT_R dgemm_r
221 #define CGEMM_DEFAULT_R cgemm_r
222 #define ZGEMM_DEFAULT_R zgemm_r
223 #define XGEMM_DEFAULT_R xgemm_r
226 #define HAVE_EXCLUSIVE_CACHE
228 #define GEMM_THREAD gemm_thread_mn
238 #define GEMM_DEFAULT_OFFSET_A 64
239 #define GEMM_DEFAULT_OFFSET_B 832
240 #define GEMM_DEFAULT_ALIGN 0x0fffUL
244 #define QGEMM_DEFAULT_UNROLL_N 2
245 #define CGEMM_DEFAULT_UNROLL_N 2
246 #define ZGEMM_DEFAULT_UNROLL_N 2
247 #define XGEMM_DEFAULT_UNROLL_N 1
250 #define SGEMM_DEFAULT_UNROLL_N 4
251 #define DGEMM_DEFAULT_UNROLL_N 4
252 #define SGEMM_DEFAULT_UNROLL_M 4
253 #define DGEMM_DEFAULT_UNROLL_M 2
254 #define QGEMM_DEFAULT_UNROLL_M 2
255 #define CGEMM_DEFAULT_UNROLL_M 2
256 #define ZGEMM_DEFAULT_UNROLL_M 1
257 #define XGEMM_DEFAULT_UNROLL_M 1
259 #define SGEMM_DEFAULT_UNROLL_N 2
260 #define DGEMM_DEFAULT_UNROLL_N 2
261 #define SGEMM_DEFAULT_UNROLL_M 16
262 #define DGEMM_DEFAULT_UNROLL_M 8
263 #define QGEMM_DEFAULT_UNROLL_M 2
264 #define CGEMM_DEFAULT_UNROLL_M 4
265 #define ZGEMM_DEFAULT_UNROLL_M 2
266 #define XGEMM_DEFAULT_UNROLL_M 1
267 #define CGEMM3M_DEFAULT_UNROLL_N 4
268 #define CGEMM3M_DEFAULT_UNROLL_M 8
269 #define ZGEMM3M_DEFAULT_UNROLL_N 4
270 #define ZGEMM3M_DEFAULT_UNROLL_M 4
272 #define DGEMM_DEFAULT_UNROLL_MN 16
273 #define GEMV_UNROLL 8
277 #if defined(ARCH_X86_64)
278 #define SGEMM_DEFAULT_P 768
279 #define DGEMM_DEFAULT_P 384
281 #define SGEMM_DEFAULT_P 448
282 #define DGEMM_DEFAULT_P 224
285 #define QGEMM_DEFAULT_P 112
286 #define CGEMM_DEFAULT_P 224
287 #define ZGEMM_DEFAULT_P 112
288 #define XGEMM_DEFAULT_P 56
290 #if defined(ARCH_X86_64)
291 #define SGEMM_DEFAULT_Q 168
292 #define DGEMM_DEFAULT_Q 168
294 #define SGEMM_DEFAULT_Q 224
295 #define DGEMM_DEFAULT_Q 224
298 #define QGEMM_DEFAULT_Q 224
299 #define CGEMM_DEFAULT_Q 224
300 #define ZGEMM_DEFAULT_Q 224
301 #define XGEMM_DEFAULT_Q 224
303 #define CGEMM3M_DEFAULT_P 448
304 #define ZGEMM3M_DEFAULT_P 224
305 #define XGEMM3M_DEFAULT_P 112
306 #define CGEMM3M_DEFAULT_Q 224
307 #define ZGEMM3M_DEFAULT_Q 224
308 #define XGEMM3M_DEFAULT_Q 224
309 #define CGEMM3M_DEFAULT_R 12288
310 #define ZGEMM3M_DEFAULT_R 12288
311 #define XGEMM3M_DEFAULT_R 12288
313 #define SGEMM_DEFAULT_R sgemm_r
314 #define QGEMM_DEFAULT_R qgemm_r
315 #define DGEMM_DEFAULT_R dgemm_r
316 #define CGEMM_DEFAULT_R cgemm_r
317 #define ZGEMM_DEFAULT_R zgemm_r
318 #define XGEMM_DEFAULT_R xgemm_r
321 #define HAVE_EXCLUSIVE_CACHE
323 #define GEMM_THREAD gemm_thread_mn
331 #define GEMM_DEFAULT_OFFSET_A 64
332 #define GEMM_DEFAULT_OFFSET_B 832
333 #define GEMM_DEFAULT_ALIGN 0x0fffUL
337 #define QGEMM_DEFAULT_UNROLL_N 2
338 #define CGEMM_DEFAULT_UNROLL_N 2
339 #define ZGEMM_DEFAULT_UNROLL_N 2
340 #define XGEMM_DEFAULT_UNROLL_N 1
343 #define SGEMM_DEFAULT_UNROLL_N 4
344 #define DGEMM_DEFAULT_UNROLL_N 4
345 #define SGEMM_DEFAULT_UNROLL_M 4
346 #define DGEMM_DEFAULT_UNROLL_M 2
347 #define QGEMM_DEFAULT_UNROLL_M 2
348 #define CGEMM_DEFAULT_UNROLL_M 2
349 #define ZGEMM_DEFAULT_UNROLL_M 1
350 #define XGEMM_DEFAULT_UNROLL_M 1
352 #define SGEMM_DEFAULT_UNROLL_N 2
353 #define DGEMM_DEFAULT_UNROLL_N 2
354 #define SGEMM_DEFAULT_UNROLL_M 16
355 #define DGEMM_DEFAULT_UNROLL_M 8
356 #define QGEMM_DEFAULT_UNROLL_M 2
357 #define CGEMM_DEFAULT_UNROLL_M 4
358 #define ZGEMM_DEFAULT_UNROLL_M 2
359 #define XGEMM_DEFAULT_UNROLL_M 1
360 #define CGEMM3M_DEFAULT_UNROLL_N 4
361 #define CGEMM3M_DEFAULT_UNROLL_M 8
362 #define ZGEMM3M_DEFAULT_UNROLL_N 4
363 #define ZGEMM3M_DEFAULT_UNROLL_M 4
364 #define GEMV_UNROLL 8
367 #if defined(ARCH_X86_64)
368 #define SGEMM_DEFAULT_P 768
369 #define DGEMM_DEFAULT_P 768
370 #define ZGEMM_DEFAULT_P 384
371 #define CGEMM_DEFAULT_P 768
373 #define SGEMM_DEFAULT_P 448
374 #define DGEMM_DEFAULT_P 480
375 #define ZGEMM_DEFAULT_P 112
376 #define CGEMM_DEFAULT_P 224
378 #define QGEMM_DEFAULT_P 112
379 #define XGEMM_DEFAULT_P 56
381 #if defined(ARCH_X86_64)
382 #define SGEMM_DEFAULT_Q 192
383 #define DGEMM_DEFAULT_Q 168
384 #define ZGEMM_DEFAULT_Q 168
385 #define CGEMM_DEFAULT_Q 168
387 #define SGEMM_DEFAULT_Q 224
388 #define DGEMM_DEFAULT_Q 224
389 #define ZGEMM_DEFAULT_Q 224
390 #define CGEMM_DEFAULT_Q 224
392 #define QGEMM_DEFAULT_Q 224
393 #define XGEMM_DEFAULT_Q 224
395 #define CGEMM3M_DEFAULT_P 448
396 #define ZGEMM3M_DEFAULT_P 224
397 #define XGEMM3M_DEFAULT_P 112
398 #define CGEMM3M_DEFAULT_Q 224
399 #define ZGEMM3M_DEFAULT_Q 224
400 #define XGEMM3M_DEFAULT_Q 224
401 #define CGEMM3M_DEFAULT_R 12288
402 #define ZGEMM3M_DEFAULT_R 12288
403 #define XGEMM3M_DEFAULT_R 12288
405 #define SGEMM_DEFAULT_R 12288
406 #define QGEMM_DEFAULT_R qgemm_r
407 #define DGEMM_DEFAULT_R 12288
408 #define CGEMM_DEFAULT_R cgemm_r
409 #define ZGEMM_DEFAULT_R zgemm_r
410 #define XGEMM_DEFAULT_R xgemm_r
413 #define HAVE_EXCLUSIVE_CACHE
415 #define GEMM_THREAD gemm_thread_mn
423 #define GEMM_DEFAULT_OFFSET_A 64
424 #define GEMM_DEFAULT_OFFSET_B 832
425 #define GEMM_DEFAULT_ALIGN 0x0fffUL
429 #define QGEMM_DEFAULT_UNROLL_N 2
430 #define CGEMM_DEFAULT_UNROLL_N 2
431 #define ZGEMM_DEFAULT_UNROLL_N 2
432 #define XGEMM_DEFAULT_UNROLL_N 1
435 #define SGEMM_DEFAULT_UNROLL_N 4
436 #define DGEMM_DEFAULT_UNROLL_N 4
437 #define SGEMM_DEFAULT_UNROLL_M 4
438 #define DGEMM_DEFAULT_UNROLL_M 2
439 #define QGEMM_DEFAULT_UNROLL_M 2
440 #define CGEMM_DEFAULT_UNROLL_M 2
441 #define ZGEMM_DEFAULT_UNROLL_M 1
442 #define XGEMM_DEFAULT_UNROLL_M 1
444 #define SGEMM_DEFAULT_UNROLL_N 2
445 #define DGEMM_DEFAULT_UNROLL_N 2
446 #define SGEMM_DEFAULT_UNROLL_M 16
447 #define DGEMM_DEFAULT_UNROLL_M 8
448 #define QGEMM_DEFAULT_UNROLL_M 2
449 #define CGEMM_DEFAULT_UNROLL_M 4
450 #define ZGEMM_DEFAULT_UNROLL_M 2
451 #define XGEMM_DEFAULT_UNROLL_M 1
452 #define CGEMM3M_DEFAULT_UNROLL_N 4
453 #define CGEMM3M_DEFAULT_UNROLL_M 8
454 #define ZGEMM3M_DEFAULT_UNROLL_N 4
455 #define ZGEMM3M_DEFAULT_UNROLL_M 4
456 #define GEMV_UNROLL 8
459 #if defined(ARCH_X86_64)
460 #define SGEMM_DEFAULT_P 768
461 #define DGEMM_DEFAULT_P 576
462 #define ZGEMM_DEFAULT_P 288
463 #define CGEMM_DEFAULT_P 576
465 #define SGEMM_DEFAULT_P 448
466 #define DGEMM_DEFAULT_P 480
467 #define ZGEMM_DEFAULT_P 112
468 #define CGEMM_DEFAULT_P 224
470 #define QGEMM_DEFAULT_P 112
471 #define XGEMM_DEFAULT_P 56
473 #if defined(ARCH_X86_64)
474 #define SGEMM_DEFAULT_Q 192
475 #define DGEMM_DEFAULT_Q 160
476 #define ZGEMM_DEFAULT_Q 160
477 #define CGEMM_DEFAULT_Q 160
479 #define SGEMM_DEFAULT_Q 224
480 #define DGEMM_DEFAULT_Q 224
481 #define ZGEMM_DEFAULT_Q 224
482 #define CGEMM_DEFAULT_Q 224
484 #define QGEMM_DEFAULT_Q 224
485 #define XGEMM_DEFAULT_Q 224
487 #define CGEMM3M_DEFAULT_P 448
488 #define ZGEMM3M_DEFAULT_P 224
489 #define XGEMM3M_DEFAULT_P 112
490 #define CGEMM3M_DEFAULT_Q 224
491 #define ZGEMM3M_DEFAULT_Q 224
492 #define XGEMM3M_DEFAULT_Q 224
493 #define CGEMM3M_DEFAULT_R 12288
494 #define ZGEMM3M_DEFAULT_R 12288
495 #define XGEMM3M_DEFAULT_R 12288
497 #define SGEMM_DEFAULT_R 12288
498 #define QGEMM_DEFAULT_R qgemm_r
499 #define DGEMM_DEFAULT_R 12288
500 #define CGEMM_DEFAULT_R cgemm_r
501 #define ZGEMM_DEFAULT_R zgemm_r
502 #define XGEMM_DEFAULT_R xgemm_r
505 #define HAVE_EXCLUSIVE_CACHE
507 #define GEMM_THREAD gemm_thread_mn
516 #define GEMM_DEFAULT_OFFSET_A 64
517 #define GEMM_DEFAULT_OFFSET_B 832
518 #define GEMM_DEFAULT_ALIGN 0x0fffUL
522 #define QGEMM_DEFAULT_UNROLL_N 2
523 #define CGEMM_DEFAULT_UNROLL_N 2
524 #define ZGEMM_DEFAULT_UNROLL_N 2
525 #define XGEMM_DEFAULT_UNROLL_N 1
528 #define SGEMM_DEFAULT_UNROLL_N 4
529 #define DGEMM_DEFAULT_UNROLL_N 4
530 #define SGEMM_DEFAULT_UNROLL_M 4
531 #define DGEMM_DEFAULT_UNROLL_M 2
532 #define QGEMM_DEFAULT_UNROLL_M 2
533 #define CGEMM_DEFAULT_UNROLL_M 2
534 #define ZGEMM_DEFAULT_UNROLL_M 1
535 #define XGEMM_DEFAULT_UNROLL_M 1
537 #define SGEMM_DEFAULT_UNROLL_N 2
538 #define DGEMM_DEFAULT_UNROLL_N 2
539 #define SGEMM_DEFAULT_UNROLL_M 16
540 #define DGEMM_DEFAULT_UNROLL_M 8
541 #define QGEMM_DEFAULT_UNROLL_M 2
542 #define CGEMM_DEFAULT_UNROLL_M 4
543 #define ZGEMM_DEFAULT_UNROLL_M 2
544 #define XGEMM_DEFAULT_UNROLL_M 1
545 #define CGEMM3M_DEFAULT_UNROLL_N 4
546 #define CGEMM3M_DEFAULT_UNROLL_M 8
547 #define ZGEMM3M_DEFAULT_UNROLL_N 4
548 #define ZGEMM3M_DEFAULT_UNROLL_M 4
549 #define GEMV_UNROLL 8
552 #if defined(ARCH_X86_64)
553 #define SGEMM_DEFAULT_P 768
554 #define DGEMM_DEFAULT_P 576
555 #define ZGEMM_DEFAULT_P 288
556 #define CGEMM_DEFAULT_P 576
558 #define SGEMM_DEFAULT_P 448
559 #define DGEMM_DEFAULT_P 480
560 #define ZGEMM_DEFAULT_P 112
561 #define CGEMM_DEFAULT_P 224
563 #define QGEMM_DEFAULT_P 112
564 #define XGEMM_DEFAULT_P 56
566 #if defined(ARCH_X86_64)
567 #define SGEMM_DEFAULT_Q 192
568 #define DGEMM_DEFAULT_Q 160
569 #define ZGEMM_DEFAULT_Q 160
570 #define CGEMM_DEFAULT_Q 160
572 #define SGEMM_DEFAULT_Q 224
573 #define DGEMM_DEFAULT_Q 224
574 #define ZGEMM_DEFAULT_Q 224
575 #define CGEMM_DEFAULT_Q 224
577 #define QGEMM_DEFAULT_Q 224
578 #define XGEMM_DEFAULT_Q 224
580 #define CGEMM3M_DEFAULT_P 448
581 #define ZGEMM3M_DEFAULT_P 224
582 #define XGEMM3M_DEFAULT_P 112
583 #define CGEMM3M_DEFAULT_Q 224
584 #define ZGEMM3M_DEFAULT_Q 224
585 #define XGEMM3M_DEFAULT_Q 224
586 #define CGEMM3M_DEFAULT_R 12288
587 #define ZGEMM3M_DEFAULT_R 12288
588 #define XGEMM3M_DEFAULT_R 12288
590 #define SGEMM_DEFAULT_R 12288
591 #define QGEMM_DEFAULT_R qgemm_r
592 #define DGEMM_DEFAULT_R 12288
593 #define CGEMM_DEFAULT_R cgemm_r
594 #define ZGEMM_DEFAULT_R zgemm_r
595 #define XGEMM_DEFAULT_R xgemm_r
598 #define HAVE_EXCLUSIVE_CACHE
600 #define GEMM_THREAD gemm_thread_mn
608 #define GEMM_DEFAULT_OFFSET_A 0
609 #define GEMM_DEFAULT_OFFSET_B 0
610 #define GEMM_DEFAULT_ALIGN 0x03fffUL
614 #define SWITCH_RATIO 16
618 #define SGEMM_DEFAULT_UNROLL_M 4
619 #define DGEMM_DEFAULT_UNROLL_M 2
620 #define QGEMM_DEFAULT_UNROLL_M 2
621 #define CGEMM_DEFAULT_UNROLL_M 2
622 #define ZGEMM_DEFAULT_UNROLL_M 1
623 #define XGEMM_DEFAULT_UNROLL_M 1
625 #define SGEMM_DEFAULT_UNROLL_N 4
626 #define DGEMM_DEFAULT_UNROLL_N 4
627 #define QGEMM_DEFAULT_UNROLL_N 2
628 #define CGEMM_DEFAULT_UNROLL_N 2
629 #define ZGEMM_DEFAULT_UNROLL_N 2
630 #define XGEMM_DEFAULT_UNROLL_N 1
634 #define SGEMM_DEFAULT_UNROLL_M 8
635 #define DGEMM_DEFAULT_UNROLL_M 4
636 #define QGEMM_DEFAULT_UNROLL_M 2
637 #define CGEMM_DEFAULT_UNROLL_M 8
638 #define ZGEMM_DEFAULT_UNROLL_M 4
639 #define XGEMM_DEFAULT_UNROLL_M 1
641 #define SGEMM_DEFAULT_UNROLL_N 4
642 #define DGEMM_DEFAULT_UNROLL_N 8
643 #define QGEMM_DEFAULT_UNROLL_N 2
644 #define CGEMM_DEFAULT_UNROLL_N 2
645 #define ZGEMM_DEFAULT_UNROLL_N 2
646 #define XGEMM_DEFAULT_UNROLL_N 1
648 #define SGEMM_DEFAULT_UNROLL_MN 32
649 #define DGEMM_DEFAULT_UNROLL_MN 32
655 #define SGEMM_DEFAULT_P 512
656 #define SGEMM_DEFAULT_R sgemm_r
657 #define DGEMM_DEFAULT_P 512
658 #define DGEMM_DEFAULT_R dgemm_r
659 #define QGEMM_DEFAULT_P 504
660 #define QGEMM_DEFAULT_R qgemm_r
661 #define CGEMM_DEFAULT_P 128
662 #define CGEMM_DEFAULT_R 1024
663 #define ZGEMM_DEFAULT_P 512
664 #define ZGEMM_DEFAULT_R zgemm_r
665 #define XGEMM_DEFAULT_P 252
666 #define XGEMM_DEFAULT_R xgemm_r
667 #define SGEMM_DEFAULT_Q 256
668 #define DGEMM_DEFAULT_Q 256
669 #define QGEMM_DEFAULT_Q 128
670 #define CGEMM_DEFAULT_Q 256
671 #define ZGEMM_DEFAULT_Q 192
672 #define XGEMM_DEFAULT_Q 128
676 #define SGEMM_DEFAULT_P 320
677 #define DGEMM_DEFAULT_P 512
678 #define CGEMM_DEFAULT_P 256
679 #define ZGEMM_DEFAULT_P 192
682 #define SGEMM_DEFAULT_Q 320
683 #define DGEMM_DEFAULT_Q 128
685 #define SGEMM_DEFAULT_Q 320
686 #define DGEMM_DEFAULT_Q 256
688 #define CGEMM_DEFAULT_Q 256
689 #define ZGEMM_DEFAULT_Q 192
691 #define SGEMM_DEFAULT_R sgemm_r
692 #define DGEMM_DEFAULT_R 13824
693 #define CGEMM_DEFAULT_R cgemm_r
694 #define ZGEMM_DEFAULT_R zgemm_r
696 #define QGEMM_DEFAULT_Q 128
697 #define QGEMM_DEFAULT_P 504
698 #define QGEMM_DEFAULT_R qgemm_r
699 #define XGEMM_DEFAULT_P 252
700 #define XGEMM_DEFAULT_R xgemm_r
701 #define XGEMM_DEFAULT_Q 128
703 #define CGEMM3M_DEFAULT_UNROLL_N 4
704 #define CGEMM3M_DEFAULT_UNROLL_M 8
705 #define ZGEMM3M_DEFAULT_UNROLL_N 4
706 #define ZGEMM3M_DEFAULT_UNROLL_M 4
708 #define CGEMM3M_DEFAULT_P 320
709 #define ZGEMM3M_DEFAULT_P 256
710 #define XGEMM3M_DEFAULT_P 112
711 #define CGEMM3M_DEFAULT_Q 320
712 #define ZGEMM3M_DEFAULT_Q 256
713 #define XGEMM3M_DEFAULT_Q 224
714 #define CGEMM3M_DEFAULT_R 12288
715 #define ZGEMM3M_DEFAULT_R 12288
716 #define XGEMM3M_DEFAULT_R 12288
727 #define GEMM_DEFAULT_OFFSET_A 0
728 #define GEMM_DEFAULT_OFFSET_B 384
729 #define GEMM_DEFAULT_ALIGN 0x0ffffUL
731 #define SGEMM_DEFAULT_UNROLL_N 4
732 #define DGEMM_DEFAULT_UNROLL_N 4
733 #define QGEMM_DEFAULT_UNROLL_N 2
734 #define CGEMM_DEFAULT_UNROLL_N 2
735 #define ZGEMM_DEFAULT_UNROLL_N 2
736 #define XGEMM_DEFAULT_UNROLL_N 1
738 #define SGEMM_DEFAULT_UNROLL_M 2
739 #define DGEMM_DEFAULT_UNROLL_M 1
740 #define QGEMM_DEFAULT_UNROLL_M 2
741 #define CGEMM_DEFAULT_UNROLL_M 1
742 #define ZGEMM_DEFAULT_UNROLL_M 1
743 #define XGEMM_DEFAULT_UNROLL_M 1
745 #define SGEMM_DEFAULT_R sgemm_r
746 #define DGEMM_DEFAULT_R dgemm_r
747 #define QGEMM_DEFAULT_R qgemm_r
748 #define CGEMM_DEFAULT_R cgemm_r
749 #define ZGEMM_DEFAULT_R zgemm_r
750 #define XGEMM_DEFAULT_R xgemm_r
752 #define SGEMM_DEFAULT_P 208
753 #define DGEMM_DEFAULT_P 104
754 #define QGEMM_DEFAULT_P 56
755 #define CGEMM_DEFAULT_P 104
756 #define ZGEMM_DEFAULT_P 56
757 #define XGEMM_DEFAULT_P 28
759 #define SGEMM_DEFAULT_Q 208
760 #define DGEMM_DEFAULT_Q 208
761 #define QGEMM_DEFAULT_Q 208
762 #define CGEMM_DEFAULT_Q 208
763 #define ZGEMM_DEFAULT_Q 208
764 #define XGEMM_DEFAULT_Q 208
767 #define HAVE_EXCLUSIVE_CACHE
775 #define GEMM_DEFAULT_OFFSET_A 0
776 #define GEMM_DEFAULT_OFFSET_B 256
777 #define GEMM_DEFAULT_ALIGN 0x0ffffUL
779 #define SGEMM_DEFAULT_UNROLL_N 4
780 #define DGEMM_DEFAULT_UNROLL_N 4
781 #define QGEMM_DEFAULT_UNROLL_N 2
782 #define CGEMM_DEFAULT_UNROLL_N 2
783 #define ZGEMM_DEFAULT_UNROLL_N 2
784 #define XGEMM_DEFAULT_UNROLL_N 1
786 #define SGEMM_DEFAULT_UNROLL_M 2
787 #define DGEMM_DEFAULT_UNROLL_M 1
788 #define QGEMM_DEFAULT_UNROLL_M 2
789 #define CGEMM_DEFAULT_UNROLL_M 1
790 #define ZGEMM_DEFAULT_UNROLL_M 1
791 #define XGEMM_DEFAULT_UNROLL_M 1
793 #define SGEMM_DEFAULT_R sgemm_r
794 #define DGEMM_DEFAULT_R dgemm_r
795 #define QGEMM_DEFAULT_R qgemm_r
796 #define CGEMM_DEFAULT_R cgemm_r
797 #define ZGEMM_DEFAULT_R zgemm_r
798 #define XGEMM_DEFAULT_R xgemm_r
800 #define SGEMM_DEFAULT_P 128
801 #define DGEMM_DEFAULT_P 128
802 #define QGEMM_DEFAULT_P 128
803 #define CGEMM_DEFAULT_P 128
804 #define ZGEMM_DEFAULT_P 128
805 #define XGEMM_DEFAULT_P 128
807 #define SGEMM_DEFAULT_Q 512
808 #define DGEMM_DEFAULT_Q 256
809 #define QGEMM_DEFAULT_Q 256
810 #define CGEMM_DEFAULT_Q 256
811 #define ZGEMM_DEFAULT_Q 128
812 #define XGEMM_DEFAULT_Q 128
822 #define GEMM_DEFAULT_OFFSET_A 64
823 #define GEMM_DEFAULT_OFFSET_B 256
824 #define GEMM_DEFAULT_ALIGN 0x01ffffUL
827 #define SGEMM_DEFAULT_UNROLL_N 4
828 #define DGEMM_DEFAULT_UNROLL_N 4
829 #define QGEMM_DEFAULT_UNROLL_N 2
830 #define CGEMM_DEFAULT_UNROLL_N 2
831 #define ZGEMM_DEFAULT_UNROLL_N 2
832 #define XGEMM_DEFAULT_UNROLL_N 1
834 #define SGEMM_DEFAULT_UNROLL_M 4
835 #define DGEMM_DEFAULT_UNROLL_M 2
836 #define QGEMM_DEFAULT_UNROLL_M 2
837 #define CGEMM_DEFAULT_UNROLL_M 2
838 #define ZGEMM_DEFAULT_UNROLL_M 1
839 #define XGEMM_DEFAULT_UNROLL_M 1
841 #define SGEMM_DEFAULT_UNROLL_N 8
842 #define DGEMM_DEFAULT_UNROLL_N 4
843 #define QGEMM_DEFAULT_UNROLL_N 2
844 #define CGEMM_DEFAULT_UNROLL_N 4
845 #define ZGEMM_DEFAULT_UNROLL_N 2
846 #define XGEMM_DEFAULT_UNROLL_N 1
848 #define SGEMM_DEFAULT_UNROLL_M 4
849 #define DGEMM_DEFAULT_UNROLL_M 4
850 #define QGEMM_DEFAULT_UNROLL_M 2
851 #define CGEMM_DEFAULT_UNROLL_M 2
852 #define ZGEMM_DEFAULT_UNROLL_M 2
853 #define XGEMM_DEFAULT_UNROLL_M 1
856 #define SGEMM_DEFAULT_P 288
857 #define DGEMM_DEFAULT_P 288
858 #define QGEMM_DEFAULT_P 288
859 #define CGEMM_DEFAULT_P 288
860 #define ZGEMM_DEFAULT_P 288
861 #define XGEMM_DEFAULT_P 288
863 #define SGEMM_DEFAULT_R sgemm_r
864 #define DGEMM_DEFAULT_R dgemm_r
865 #define QGEMM_DEFAULT_R qgemm_r
866 #define CGEMM_DEFAULT_R cgemm_r
867 #define ZGEMM_DEFAULT_R zgemm_r
868 #define XGEMM_DEFAULT_R xgemm_r
870 #define SGEMM_DEFAULT_Q 256
871 #define DGEMM_DEFAULT_Q 128
872 #define QGEMM_DEFAULT_Q 64
873 #define CGEMM_DEFAULT_Q 128
874 #define ZGEMM_DEFAULT_Q 64
875 #define XGEMM_DEFAULT_Q 32
878 #define HAVE_EXCLUSIVE_CACHE
882 #if defined(PENTIUM) || defined(PENTIUM2) || defined(PENTIUM3)
891 #define GEMM_DEFAULT_OFFSET_A 0
892 #define GEMM_DEFAULT_OFFSET_B 0
893 #define GEMM_DEFAULT_ALIGN 0x0ffffUL
896 #define SGEMM_DEFAULT_UNROLL_M 8
897 #define CGEMM_DEFAULT_UNROLL_M 4
899 #define SGEMM_DEFAULT_UNROLL_M 4
900 #define CGEMM_DEFAULT_UNROLL_M 2
902 #define DGEMM_DEFAULT_UNROLL_M 2
903 #define SGEMM_DEFAULT_UNROLL_N 2
904 #define DGEMM_DEFAULT_UNROLL_N 2
905 #define QGEMM_DEFAULT_UNROLL_M 2
906 #define QGEMM_DEFAULT_UNROLL_N 2
907 #define CGEMM_DEFAULT_UNROLL_N 1
908 #define ZGEMM_DEFAULT_UNROLL_M 1
909 #define ZGEMM_DEFAULT_UNROLL_N 1
910 #define XGEMM_DEFAULT_UNROLL_M 1
911 #define XGEMM_DEFAULT_UNROLL_N 1
913 #define SGEMM_DEFAULT_P sgemm_p
914 #define SGEMM_DEFAULT_Q 256
915 #define SGEMM_DEFAULT_R sgemm_r
917 #define DGEMM_DEFAULT_P dgemm_p
918 #define DGEMM_DEFAULT_Q 256
919 #define DGEMM_DEFAULT_R dgemm_r
921 #define QGEMM_DEFAULT_P qgemm_p
922 #define QGEMM_DEFAULT_Q 256
923 #define QGEMM_DEFAULT_R qgemm_r
925 #define CGEMM_DEFAULT_P cgemm_p
926 #define CGEMM_DEFAULT_Q 256
927 #define CGEMM_DEFAULT_R cgemm_r
929 #define ZGEMM_DEFAULT_P zgemm_p
930 #define ZGEMM_DEFAULT_Q 256
931 #define ZGEMM_DEFAULT_R zgemm_r
933 #define XGEMM_DEFAULT_P xgemm_p
934 #define XGEMM_DEFAULT_Q 256
935 #define XGEMM_DEFAULT_R xgemm_r
946 #define GEMM_DEFAULT_OFFSET_A 0
947 #define GEMM_DEFAULT_OFFSET_B 0
948 #define GEMM_DEFAULT_ALIGN 0x0ffffUL
951 #define SGEMM_DEFAULT_UNROLL_M 4
952 #define SGEMM_DEFAULT_UNROLL_N 4
953 #define DGEMM_DEFAULT_UNROLL_M 2
954 #define DGEMM_DEFAULT_UNROLL_N 4
955 #define QGEMM_DEFAULT_UNROLL_M 2
956 #define QGEMM_DEFAULT_UNROLL_N 2
957 #define CGEMM_DEFAULT_UNROLL_M 2
958 #define CGEMM_DEFAULT_UNROLL_N 2
959 #define ZGEMM_DEFAULT_UNROLL_M 1
960 #define ZGEMM_DEFAULT_UNROLL_N 2
961 #define XGEMM_DEFAULT_UNROLL_M 1
962 #define XGEMM_DEFAULT_UNROLL_N 1
964 #define SGEMM_DEFAULT_UNROLL_M 8
965 #define SGEMM_DEFAULT_UNROLL_N 2
966 #define DGEMM_DEFAULT_UNROLL_M 2
967 #define DGEMM_DEFAULT_UNROLL_N 2
968 #define QGEMM_DEFAULT_UNROLL_M 2
969 #define QGEMM_DEFAULT_UNROLL_N 2
970 #define CGEMM_DEFAULT_UNROLL_M 4
971 #define CGEMM_DEFAULT_UNROLL_N 1
972 #define ZGEMM_DEFAULT_UNROLL_M 1
973 #define ZGEMM_DEFAULT_UNROLL_N 1
974 #define XGEMM_DEFAULT_UNROLL_M 1
975 #define XGEMM_DEFAULT_UNROLL_N 1
979 #define SGEMM_DEFAULT_P sgemm_p
980 #define SGEMM_DEFAULT_Q 256
981 #define SGEMM_DEFAULT_R sgemm_r
983 #define DGEMM_DEFAULT_P dgemm_p
984 #define DGEMM_DEFAULT_Q 256
985 #define DGEMM_DEFAULT_R dgemm_r
987 #define QGEMM_DEFAULT_P qgemm_p
988 #define QGEMM_DEFAULT_Q 256
989 #define QGEMM_DEFAULT_R qgemm_r
991 #define CGEMM_DEFAULT_P cgemm_p
992 #define CGEMM_DEFAULT_Q 256
993 #define CGEMM_DEFAULT_R cgemm_r
995 #define ZGEMM_DEFAULT_P zgemm_p
996 #define ZGEMM_DEFAULT_Q 256
997 #define ZGEMM_DEFAULT_R zgemm_r
999 #define XGEMM_DEFAULT_P xgemm_p
1000 #define XGEMM_DEFAULT_Q 256
1001 #define XGEMM_DEFAULT_R xgemm_r
1006 #ifdef CORE_NORTHWOOD
1011 #define GEMM_DEFAULT_OFFSET_A 0
1012 #define GEMM_DEFAULT_OFFSET_B 32
1014 #define GEMM_DEFAULT_ALIGN 0x0ffffUL
1018 #define SGEMM_DEFAULT_UNROLL_M 8
1019 #define DGEMM_DEFAULT_UNROLL_M 4
1020 #define QGEMM_DEFAULT_UNROLL_M 2
1021 #define CGEMM_DEFAULT_UNROLL_M 4
1022 #define ZGEMM_DEFAULT_UNROLL_M 2
1023 #define XGEMM_DEFAULT_UNROLL_M 1
1025 #define SGEMM_DEFAULT_UNROLL_N 2
1026 #define DGEMM_DEFAULT_UNROLL_N 2
1027 #define QGEMM_DEFAULT_UNROLL_N 2
1028 #define CGEMM_DEFAULT_UNROLL_N 1
1029 #define ZGEMM_DEFAULT_UNROLL_N 1
1030 #define XGEMM_DEFAULT_UNROLL_N 1
1032 #define SGEMM_DEFAULT_P sgemm_p
1033 #define SGEMM_DEFAULT_R sgemm_r
1035 #define DGEMM_DEFAULT_P dgemm_p
1036 #define DGEMM_DEFAULT_R dgemm_r
1038 #define QGEMM_DEFAULT_P qgemm_p
1039 #define QGEMM_DEFAULT_R qgemm_r
1041 #define CGEMM_DEFAULT_P cgemm_p
1042 #define CGEMM_DEFAULT_R cgemm_r
1044 #define ZGEMM_DEFAULT_P zgemm_p
1045 #define ZGEMM_DEFAULT_R zgemm_r
1047 #define XGEMM_DEFAULT_P xgemm_p
1048 #define XGEMM_DEFAULT_R xgemm_r
1050 #define SGEMM_DEFAULT_Q 128
1051 #define DGEMM_DEFAULT_Q 128
1052 #define QGEMM_DEFAULT_Q 128
1053 #define CGEMM_DEFAULT_Q 128
1054 #define ZGEMM_DEFAULT_Q 128
1055 #define XGEMM_DEFAULT_Q 128
1058 #ifdef CORE_PRESCOTT
1064 #define GEMM_DEFAULT_OFFSET_A 128
1065 #define GEMM_DEFAULT_OFFSET_B 192
1067 #define GEMM_DEFAULT_OFFSET_A 0
1068 #define GEMM_DEFAULT_OFFSET_B 256
1071 #define GEMM_DEFAULT_ALIGN 0x0ffffUL
1076 #define SGEMM_DEFAULT_UNROLL_M 4
1077 #define DGEMM_DEFAULT_UNROLL_M 2
1078 #define QGEMM_DEFAULT_UNROLL_M 2
1079 #define CGEMM_DEFAULT_UNROLL_M 2
1080 #define ZGEMM_DEFAULT_UNROLL_M 1
1081 #define XGEMM_DEFAULT_UNROLL_M 1
1083 #define SGEMM_DEFAULT_UNROLL_M 8
1084 #define DGEMM_DEFAULT_UNROLL_M 4
1085 #define QGEMM_DEFAULT_UNROLL_M 2
1086 #define CGEMM_DEFAULT_UNROLL_M 4
1087 #define ZGEMM_DEFAULT_UNROLL_M 2
1088 #define XGEMM_DEFAULT_UNROLL_M 1
1091 #define SGEMM_DEFAULT_UNROLL_N 4
1092 #define DGEMM_DEFAULT_UNROLL_N 4
1093 #define QGEMM_DEFAULT_UNROLL_N 2
1094 #define CGEMM_DEFAULT_UNROLL_N 2
1095 #define ZGEMM_DEFAULT_UNROLL_N 2
1096 #define XGEMM_DEFAULT_UNROLL_N 1
1098 #define SGEMM_DEFAULT_P sgemm_p
1099 #define SGEMM_DEFAULT_R sgemm_r
1101 #define DGEMM_DEFAULT_P dgemm_p
1102 #define DGEMM_DEFAULT_R dgemm_r
1104 #define QGEMM_DEFAULT_P qgemm_p
1105 #define QGEMM_DEFAULT_R qgemm_r
1107 #define CGEMM_DEFAULT_P cgemm_p
1108 #define CGEMM_DEFAULT_R cgemm_r
1110 #define ZGEMM_DEFAULT_P zgemm_p
1111 #define ZGEMM_DEFAULT_R zgemm_r
1113 #define XGEMM_DEFAULT_P xgemm_p
1114 #define XGEMM_DEFAULT_R xgemm_r
1116 #define SGEMM_DEFAULT_Q 128
1117 #define DGEMM_DEFAULT_Q 128
1118 #define QGEMM_DEFAULT_Q 128
1119 #define CGEMM_DEFAULT_Q 128
1120 #define ZGEMM_DEFAULT_Q 128
1121 #define XGEMM_DEFAULT_Q 128
1129 #define GEMM_DEFAULT_OFFSET_A 448
1130 #define GEMM_DEFAULT_OFFSET_B 128
1131 #define GEMM_DEFAULT_ALIGN 0x03fffUL
1135 #define SWITCH_RATIO 4
1138 #define SGEMM_DEFAULT_UNROLL_M 8
1139 #define DGEMM_DEFAULT_UNROLL_M 4
1140 #define QGEMM_DEFAULT_UNROLL_M 2
1141 #define CGEMM_DEFAULT_UNROLL_M 4
1142 #define ZGEMM_DEFAULT_UNROLL_M 2
1143 #define XGEMM_DEFAULT_UNROLL_M 1
1145 #define SGEMM_DEFAULT_UNROLL_N 2
1146 #define DGEMM_DEFAULT_UNROLL_N 2
1147 #define QGEMM_DEFAULT_UNROLL_N 2
1148 #define CGEMM_DEFAULT_UNROLL_N 1
1149 #define ZGEMM_DEFAULT_UNROLL_N 1
1150 #define XGEMM_DEFAULT_UNROLL_N 1
1152 #define MASK(a, b) ((((a) + (b) - 1) / (b)) * (b))
1155 #define SGEMM_DEFAULT_UNROLL_M 8
1156 #define DGEMM_DEFAULT_UNROLL_M 4
1157 #define QGEMM_DEFAULT_UNROLL_M 2
1158 #define CGEMM_DEFAULT_UNROLL_M 4
1159 #define ZGEMM_DEFAULT_UNROLL_M 2
1160 #define XGEMM_DEFAULT_UNROLL_M 1
1162 #define SGEMM_DEFAULT_UNROLL_N 4
1163 #define DGEMM_DEFAULT_UNROLL_N 4
1164 #define QGEMM_DEFAULT_UNROLL_N 2
1165 #define CGEMM_DEFAULT_UNROLL_N 2
1166 #define ZGEMM_DEFAULT_UNROLL_N 2
1167 #define XGEMM_DEFAULT_UNROLL_N 1
1170 #define SGEMM_DEFAULT_P sgemm_p
1171 #define SGEMM_DEFAULT_R sgemm_r
1173 #define DGEMM_DEFAULT_P dgemm_p
1174 #define DGEMM_DEFAULT_R dgemm_r
1176 #define QGEMM_DEFAULT_P qgemm_p
1177 #define QGEMM_DEFAULT_R qgemm_r
1179 #define CGEMM_DEFAULT_P cgemm_p
1180 #define CGEMM_DEFAULT_R cgemm_r
1182 #define ZGEMM_DEFAULT_P zgemm_p
1183 #define ZGEMM_DEFAULT_R zgemm_r
1185 #define XGEMM_DEFAULT_P xgemm_p
1186 #define XGEMM_DEFAULT_R xgemm_r
1188 #define SGEMM_DEFAULT_Q 256
1189 #define DGEMM_DEFAULT_Q 256
1190 #define QGEMM_DEFAULT_Q 256
1191 #define CGEMM_DEFAULT_Q 256
1192 #define ZGEMM_DEFAULT_Q 256
1193 #define XGEMM_DEFAULT_Q 256
1202 #define GEMM_DEFAULT_OFFSET_A 128
1203 #define GEMM_DEFAULT_OFFSET_B 0
1204 #define GEMM_DEFAULT_ALIGN 0x03fffUL
1208 #define SWITCH_RATIO 4
1211 #define SGEMM_DEFAULT_UNROLL_M 4
1212 #define DGEMM_DEFAULT_UNROLL_M 2
1213 #define QGEMM_DEFAULT_UNROLL_M 2
1214 #define CGEMM_DEFAULT_UNROLL_M 2
1215 #define ZGEMM_DEFAULT_UNROLL_M 1
1216 #define XGEMM_DEFAULT_UNROLL_M 1
1218 #define SGEMM_DEFAULT_UNROLL_N 4
1219 #define DGEMM_DEFAULT_UNROLL_N 4
1220 #define QGEMM_DEFAULT_UNROLL_N 2
1221 #define CGEMM_DEFAULT_UNROLL_N 2
1222 #define ZGEMM_DEFAULT_UNROLL_N 2
1223 #define XGEMM_DEFAULT_UNROLL_N 1
1225 #define SGEMM_DEFAULT_UNROLL_M 8
1226 #define DGEMM_DEFAULT_UNROLL_M 4
1227 #define QGEMM_DEFAULT_UNROLL_M 2
1228 #define CGEMM_DEFAULT_UNROLL_M 4
1229 #define ZGEMM_DEFAULT_UNROLL_M 2
1230 #define XGEMM_DEFAULT_UNROLL_M 1
1232 #define SGEMM_DEFAULT_UNROLL_N 4
1233 #define DGEMM_DEFAULT_UNROLL_N 4
1234 #define QGEMM_DEFAULT_UNROLL_N 2
1235 #define CGEMM_DEFAULT_UNROLL_N 2
1236 #define ZGEMM_DEFAULT_UNROLL_N 2
1237 #define XGEMM_DEFAULT_UNROLL_N 1
1240 #define SGEMM_DEFAULT_P sgemm_p
1241 #define SGEMM_DEFAULT_R sgemm_r
1243 #define DGEMM_DEFAULT_P dgemm_p
1244 #define DGEMM_DEFAULT_R dgemm_r
1246 #define QGEMM_DEFAULT_P qgemm_p
1247 #define QGEMM_DEFAULT_R qgemm_r
1249 #define CGEMM_DEFAULT_P cgemm_p
1250 #define CGEMM_DEFAULT_R cgemm_r
1252 #define ZGEMM_DEFAULT_P zgemm_p
1253 #define ZGEMM_DEFAULT_R zgemm_r
1255 #define XGEMM_DEFAULT_P xgemm_p
1256 #define XGEMM_DEFAULT_R xgemm_r
1258 #define SGEMM_DEFAULT_Q 512
1259 #define DGEMM_DEFAULT_Q 256
1260 #define QGEMM_DEFAULT_Q 128
1261 #define CGEMM_DEFAULT_Q 512
1262 #define ZGEMM_DEFAULT_Q 256
1263 #define XGEMM_DEFAULT_Q 128
1265 #define GETRF_FACTOR 0.75
1273 #define GEMM_DEFAULT_OFFSET_A 128
1274 #define GEMM_DEFAULT_OFFSET_B 0
1275 #define GEMM_DEFAULT_ALIGN 0x03fffUL
1279 #define SWITCH_RATIO 4
1282 #define SGEMM_DEFAULT_UNROLL_M 4
1283 #define DGEMM_DEFAULT_UNROLL_M 2
1284 #define QGEMM_DEFAULT_UNROLL_M 2
1285 #define CGEMM_DEFAULT_UNROLL_M 2
1286 #define ZGEMM_DEFAULT_UNROLL_M 1
1287 #define XGEMM_DEFAULT_UNROLL_M 1
1289 #define SGEMM_DEFAULT_UNROLL_N 4
1290 #define DGEMM_DEFAULT_UNROLL_N 4
1291 #define QGEMM_DEFAULT_UNROLL_N 2
1292 #define CGEMM_DEFAULT_UNROLL_N 2
1293 #define ZGEMM_DEFAULT_UNROLL_N 2
1294 #define XGEMM_DEFAULT_UNROLL_N 1
1296 #define SGEMM_DEFAULT_UNROLL_M 8
1297 #define DGEMM_DEFAULT_UNROLL_M 4
1298 #define QGEMM_DEFAULT_UNROLL_M 2
1299 #define CGEMM_DEFAULT_UNROLL_M 4
1300 #define ZGEMM_DEFAULT_UNROLL_M 2
1301 #define XGEMM_DEFAULT_UNROLL_M 1
1303 #define SGEMM_DEFAULT_UNROLL_N 4
1304 #define DGEMM_DEFAULT_UNROLL_N 4
1305 #define QGEMM_DEFAULT_UNROLL_N 2
1306 #define CGEMM_DEFAULT_UNROLL_N 2
1307 #define ZGEMM_DEFAULT_UNROLL_N 2
1308 #define XGEMM_DEFAULT_UNROLL_N 1
1311 #define SGEMM_DEFAULT_P sgemm_p
1312 #define SGEMM_DEFAULT_R sgemm_r
1314 #define DGEMM_DEFAULT_P dgemm_p
1315 #define DGEMM_DEFAULT_R dgemm_r
1317 #define QGEMM_DEFAULT_P qgemm_p
1318 #define QGEMM_DEFAULT_R qgemm_r
1320 #define CGEMM_DEFAULT_P cgemm_p
1321 #define CGEMM_DEFAULT_R cgemm_r
1323 #define ZGEMM_DEFAULT_P zgemm_p
1324 #define ZGEMM_DEFAULT_R zgemm_r
1326 #define XGEMM_DEFAULT_P xgemm_p
1327 #define XGEMM_DEFAULT_R xgemm_r
1329 #define SGEMM_DEFAULT_Q 768
1330 #define DGEMM_DEFAULT_Q 384
1331 #define QGEMM_DEFAULT_Q 192
1332 #define CGEMM_DEFAULT_Q 768
1333 #define ZGEMM_DEFAULT_Q 384
1334 #define XGEMM_DEFAULT_Q 192
1336 #define GETRF_FACTOR 0.75
1337 #define GEMM_THREAD gemm_thread_mn
1345 #define GEMM_DEFAULT_OFFSET_A 32
1346 #define GEMM_DEFAULT_OFFSET_B 0
1347 #define GEMM_DEFAULT_ALIGN 0x03fffUL
1351 #define SWITCH_RATIO 4
1354 #define SGEMM_DEFAULT_UNROLL_M 4
1355 #define DGEMM_DEFAULT_UNROLL_M 2
1356 #define QGEMM_DEFAULT_UNROLL_M 2
1357 #define CGEMM_DEFAULT_UNROLL_M 2
1358 #define ZGEMM_DEFAULT_UNROLL_M 1
1359 #define XGEMM_DEFAULT_UNROLL_M 1
1361 #define SGEMM_DEFAULT_UNROLL_N 4
1362 #define DGEMM_DEFAULT_UNROLL_N 4
1363 #define QGEMM_DEFAULT_UNROLL_N 2
1364 #define CGEMM_DEFAULT_UNROLL_N 2
1365 #define ZGEMM_DEFAULT_UNROLL_N 2
1366 #define XGEMM_DEFAULT_UNROLL_N 1
1368 #define SGEMM_DEFAULT_UNROLL_M 4
1369 #define DGEMM_DEFAULT_UNROLL_M 2
1370 #define QGEMM_DEFAULT_UNROLL_M 2
1371 #define CGEMM_DEFAULT_UNROLL_M 2
1372 #define ZGEMM_DEFAULT_UNROLL_M 1
1373 #define XGEMM_DEFAULT_UNROLL_M 1
1375 #define SGEMM_DEFAULT_UNROLL_N 8
1376 #define DGEMM_DEFAULT_UNROLL_N 8
1377 #define QGEMM_DEFAULT_UNROLL_N 2
1378 #define CGEMM_DEFAULT_UNROLL_N 4
1379 #define ZGEMM_DEFAULT_UNROLL_N 4
1380 #define XGEMM_DEFAULT_UNROLL_N 1
1383 #define SGEMM_DEFAULT_P 504
1384 #define SGEMM_DEFAULT_R sgemm_r
1386 #define DGEMM_DEFAULT_P 504
1387 #define DGEMM_DEFAULT_R dgemm_r
1389 #define QGEMM_DEFAULT_P 504
1390 #define QGEMM_DEFAULT_R qgemm_r
1392 #define CGEMM_DEFAULT_P 252
1393 #define CGEMM_DEFAULT_R cgemm_r
1395 #define ZGEMM_DEFAULT_P 252
1396 #define ZGEMM_DEFAULT_R zgemm_r
1398 #define XGEMM_DEFAULT_P 252
1399 #define XGEMM_DEFAULT_R xgemm_r
1401 #define SGEMM_DEFAULT_Q 512
1402 #define DGEMM_DEFAULT_Q 256
1403 #define QGEMM_DEFAULT_Q 128
1404 #define CGEMM_DEFAULT_Q 512
1405 #define ZGEMM_DEFAULT_Q 256
1406 #define XGEMM_DEFAULT_Q 128
1408 #define GETRF_FACTOR 0.72
1418 #define GEMM_DEFAULT_OFFSET_A 0
1419 #define GEMM_DEFAULT_OFFSET_B 0
1420 #define GEMM_DEFAULT_ALIGN 0x03fffUL
1424 #define SWITCH_RATIO 4
1427 #define SGEMM_DEFAULT_UNROLL_M 4
1428 #define DGEMM_DEFAULT_UNROLL_M 2
1429 #define QGEMM_DEFAULT_UNROLL_M 2
1430 #define CGEMM_DEFAULT_UNROLL_M 2
1431 #define ZGEMM_DEFAULT_UNROLL_M 1
1432 #define XGEMM_DEFAULT_UNROLL_M 1
1434 #define SGEMM_DEFAULT_UNROLL_N 4
1435 #define DGEMM_DEFAULT_UNROLL_N 4
1436 #define QGEMM_DEFAULT_UNROLL_N 2
1437 #define CGEMM_DEFAULT_UNROLL_N 2
1438 #define ZGEMM_DEFAULT_UNROLL_N 2
1439 #define XGEMM_DEFAULT_UNROLL_N 1
1441 #define SGEMM_DEFAULT_UNROLL_M 16
1442 #define DGEMM_DEFAULT_UNROLL_M 8
1443 #define QGEMM_DEFAULT_UNROLL_M 2
1444 #define CGEMM_DEFAULT_UNROLL_M 8
1445 #define ZGEMM_DEFAULT_UNROLL_M 1
1446 #define XGEMM_DEFAULT_UNROLL_M 1
1448 #define SGEMM_DEFAULT_UNROLL_N 4
1449 #define DGEMM_DEFAULT_UNROLL_N 4
1450 #define QGEMM_DEFAULT_UNROLL_N 2
1451 #define CGEMM_DEFAULT_UNROLL_N 2
1452 #define ZGEMM_DEFAULT_UNROLL_N 4
1453 #define XGEMM_DEFAULT_UNROLL_N 1
1456 #define SGEMM_DEFAULT_P 768
1457 #define SGEMM_DEFAULT_R sgemm_r
1458 /*#define SGEMM_DEFAULT_R 1024*/
1460 #define DGEMM_DEFAULT_P 512
1461 #define DGEMM_DEFAULT_R dgemm_r
1462 /*#define DGEMM_DEFAULT_R 1024*/
1464 #define QGEMM_DEFAULT_P 504
1465 #define QGEMM_DEFAULT_R qgemm_r
1467 #define CGEMM_DEFAULT_P 768
1468 #define CGEMM_DEFAULT_R cgemm_r
1469 /*#define CGEMM_DEFAULT_R 1024*/
1471 #define ZGEMM_DEFAULT_P 512
1472 #define ZGEMM_DEFAULT_R zgemm_r
1473 /*#define ZGEMM_DEFAULT_R 1024*/
1475 #define XGEMM_DEFAULT_P 252
1476 #define XGEMM_DEFAULT_R xgemm_r
1478 #define SGEMM_DEFAULT_Q 384
1479 #define DGEMM_DEFAULT_Q 256
1480 #define QGEMM_DEFAULT_Q 128
1481 #define CGEMM_DEFAULT_Q 512
1482 #define ZGEMM_DEFAULT_Q 192
1483 #define XGEMM_DEFAULT_Q 128
1485 #define CGEMM3M_DEFAULT_UNROLL_N 8
1486 #define CGEMM3M_DEFAULT_UNROLL_M 4
1487 #define ZGEMM3M_DEFAULT_UNROLL_N 8
1488 #define ZGEMM3M_DEFAULT_UNROLL_M 2
1490 #define CGEMM3M_DEFAULT_P 448
1491 #define ZGEMM3M_DEFAULT_P 224
1492 #define XGEMM3M_DEFAULT_P 112
1493 #define CGEMM3M_DEFAULT_Q 224
1494 #define ZGEMM3M_DEFAULT_Q 224
1495 #define XGEMM3M_DEFAULT_Q 224
1496 #define CGEMM3M_DEFAULT_R 12288
1497 #define ZGEMM3M_DEFAULT_R 12288
1498 #define XGEMM3M_DEFAULT_R 12288
1502 #define GETRF_FACTOR 0.72
1511 #define GEMM_DEFAULT_OFFSET_A 0
1512 #define GEMM_DEFAULT_OFFSET_B 0
1513 #define GEMM_DEFAULT_ALIGN 0x03fffUL
1517 #if defined(XDOUBLE) || defined(DOUBLE)
1518 #define SWITCH_RATIO 4
1519 #define GEMM_PREFERED_SIZE 4
1521 #define SWITCH_RATIO 8
1522 #define GEMM_PREFERED_SIZE 8
1527 #define SGEMM_DEFAULT_UNROLL_M 4
1528 #define DGEMM_DEFAULT_UNROLL_M 2
1529 #define QGEMM_DEFAULT_UNROLL_M 2
1530 #define CGEMM_DEFAULT_UNROLL_M 2
1531 #define ZGEMM_DEFAULT_UNROLL_M 1
1532 #define XGEMM_DEFAULT_UNROLL_M 1
1534 #define SGEMM_DEFAULT_UNROLL_N 4
1535 #define DGEMM_DEFAULT_UNROLL_N 4
1536 #define QGEMM_DEFAULT_UNROLL_N 2
1537 #define CGEMM_DEFAULT_UNROLL_N 2
1538 #define ZGEMM_DEFAULT_UNROLL_N 2
1539 #define XGEMM_DEFAULT_UNROLL_N 1
1543 #define SGEMM_DEFAULT_UNROLL_M 8
1544 #define DGEMM_DEFAULT_UNROLL_M 4
1545 #define QGEMM_DEFAULT_UNROLL_M 2
1546 #define CGEMM_DEFAULT_UNROLL_M 8
1547 #define ZGEMM_DEFAULT_UNROLL_M 4
1548 #define XGEMM_DEFAULT_UNROLL_M 1
1550 #define SGEMM_DEFAULT_UNROLL_N 4
1551 #define DGEMM_DEFAULT_UNROLL_N 8
1552 #define QGEMM_DEFAULT_UNROLL_N 2
1553 #define CGEMM_DEFAULT_UNROLL_N 2
1554 #define ZGEMM_DEFAULT_UNROLL_N 2
1555 #define XGEMM_DEFAULT_UNROLL_N 1
1557 #define SGEMM_DEFAULT_UNROLL_MN 32
1558 #define DGEMM_DEFAULT_UNROLL_MN 32
1564 #define SGEMM_DEFAULT_P 512
1565 #define SGEMM_DEFAULT_R sgemm_r
1566 #define DGEMM_DEFAULT_P 512
1567 #define DGEMM_DEFAULT_R dgemm_r
1568 #define QGEMM_DEFAULT_P 504
1569 #define QGEMM_DEFAULT_R qgemm_r
1570 #define CGEMM_DEFAULT_P 128
1571 #define CGEMM_DEFAULT_R 1024
1572 #define ZGEMM_DEFAULT_P 512
1573 #define ZGEMM_DEFAULT_R zgemm_r
1574 #define XGEMM_DEFAULT_P 252
1575 #define XGEMM_DEFAULT_R xgemm_r
1576 #define SGEMM_DEFAULT_Q 256
1577 #define DGEMM_DEFAULT_Q 256
1578 #define QGEMM_DEFAULT_Q 128
1579 #define CGEMM_DEFAULT_Q 256
1580 #define ZGEMM_DEFAULT_Q 192
1581 #define XGEMM_DEFAULT_Q 128
1585 #define SGEMM_DEFAULT_P 320
1586 #define DGEMM_DEFAULT_P 512
1587 #define CGEMM_DEFAULT_P 256
1588 #define ZGEMM_DEFAULT_P 192
1591 #define SGEMM_DEFAULT_Q 320
1592 #define DGEMM_DEFAULT_Q 128
1594 #define SGEMM_DEFAULT_Q 320
1595 #define DGEMM_DEFAULT_Q 256
1597 #define CGEMM_DEFAULT_Q 256
1598 #define ZGEMM_DEFAULT_Q 192
1600 #define SGEMM_DEFAULT_R sgemm_r
1601 #define DGEMM_DEFAULT_R 13824
1602 #define CGEMM_DEFAULT_R cgemm_r
1603 #define ZGEMM_DEFAULT_R zgemm_r
1605 #define QGEMM_DEFAULT_Q 128
1606 #define QGEMM_DEFAULT_P 504
1607 #define QGEMM_DEFAULT_R qgemm_r
1608 #define XGEMM_DEFAULT_P 252
1609 #define XGEMM_DEFAULT_R xgemm_r
1610 #define XGEMM_DEFAULT_Q 128
1612 #define CGEMM3M_DEFAULT_UNROLL_N 4
1613 #define CGEMM3M_DEFAULT_UNROLL_M 8
1614 #define ZGEMM3M_DEFAULT_UNROLL_N 4
1615 #define ZGEMM3M_DEFAULT_UNROLL_M 4
1617 #define CGEMM3M_DEFAULT_P 320
1618 #define ZGEMM3M_DEFAULT_P 256
1619 #define XGEMM3M_DEFAULT_P 112
1620 #define CGEMM3M_DEFAULT_Q 320
1621 #define ZGEMM3M_DEFAULT_Q 256
1622 #define XGEMM3M_DEFAULT_Q 224
1623 #define CGEMM3M_DEFAULT_R 12288
1624 #define ZGEMM3M_DEFAULT_R 12288
1625 #define XGEMM3M_DEFAULT_R 12288
1637 #define GEMM_DEFAULT_OFFSET_A 0
1638 #define GEMM_DEFAULT_OFFSET_B 0
1639 #define GEMM_DEFAULT_ALIGN 0x03fffUL
1643 #if defined(XDOUBLE) || defined(DOUBLE)
1644 #define SWITCH_RATIO 8
1645 #define GEMM_PREFERED_SIZE 8
1647 #define SWITCH_RATIO 16
1648 #define GEMM_PREFERED_SIZE 16
1650 #define USE_SGEMM_KERNEL_DIRECT 1
1654 #define SGEMM_DEFAULT_UNROLL_M 4
1655 #define DGEMM_DEFAULT_UNROLL_M 2
1656 #define QGEMM_DEFAULT_UNROLL_M 2
1657 #define CGEMM_DEFAULT_UNROLL_M 2
1658 #define ZGEMM_DEFAULT_UNROLL_M 1
1659 #define XGEMM_DEFAULT_UNROLL_M 1
1661 #define SGEMM_DEFAULT_UNROLL_N 4
1662 #define DGEMM_DEFAULT_UNROLL_N 4
1663 #define QGEMM_DEFAULT_UNROLL_N 2
1664 #define CGEMM_DEFAULT_UNROLL_N 2
1665 #define ZGEMM_DEFAULT_UNROLL_N 2
1666 #define XGEMM_DEFAULT_UNROLL_N 1
1670 #define SGEMM_DEFAULT_UNROLL_M 16
1671 #define DGEMM_DEFAULT_UNROLL_M 16
1672 #define QGEMM_DEFAULT_UNROLL_M 2
1673 #define CGEMM_DEFAULT_UNROLL_M 8
1674 #define ZGEMM_DEFAULT_UNROLL_M 4
1675 #define XGEMM_DEFAULT_UNROLL_M 1
1677 #define SGEMM_DEFAULT_UNROLL_N 4
1678 #define DGEMM_DEFAULT_UNROLL_N 2
1679 #define QGEMM_DEFAULT_UNROLL_N 2
1680 #define CGEMM_DEFAULT_UNROLL_N 2
1681 #define ZGEMM_DEFAULT_UNROLL_N 2
1682 #define XGEMM_DEFAULT_UNROLL_N 1
1684 #define SGEMM_DEFAULT_UNROLL_MN 32
1685 #define DGEMM_DEFAULT_UNROLL_MN 32
1690 #define SGEMM_DEFAULT_P 512
1691 #define SGEMM_DEFAULT_R sgemm_r
1692 #define DGEMM_DEFAULT_P 512
1693 #define DGEMM_DEFAULT_R dgemm_r
1694 #define QGEMM_DEFAULT_P 504
1695 #define QGEMM_DEFAULT_R qgemm_r
1696 #define CGEMM_DEFAULT_P 128
1697 #define CGEMM_DEFAULT_R 1024
1698 #define ZGEMM_DEFAULT_P 512
1699 #define ZGEMM_DEFAULT_R zgemm_r
1700 #define XGEMM_DEFAULT_P 252
1701 #define XGEMM_DEFAULT_R xgemm_r
1702 #define SGEMM_DEFAULT_Q 256
1703 #define DGEMM_DEFAULT_Q 256
1704 #define QGEMM_DEFAULT_Q 128
1705 #define CGEMM_DEFAULT_Q 256
1706 #define ZGEMM_DEFAULT_Q 192
1707 #define XGEMM_DEFAULT_Q 128
1711 #define SGEMM_DEFAULT_P 448
1712 #define DGEMM_DEFAULT_P 192
1713 #define CGEMM_DEFAULT_P 384
1714 #define ZGEMM_DEFAULT_P 256
1716 #define SGEMM_DEFAULT_Q 448
1717 #define DGEMM_DEFAULT_Q 384
1718 #define CGEMM_DEFAULT_Q 192
1719 #define ZGEMM_DEFAULT_Q 128
1721 #define SGEMM_DEFAULT_R sgemm_r
1722 #define DGEMM_DEFAULT_R 8640
1723 #define CGEMM_DEFAULT_R cgemm_r
1724 #define ZGEMM_DEFAULT_R zgemm_r
1726 #define QGEMM_DEFAULT_Q 128
1727 #define QGEMM_DEFAULT_P 504
1728 #define QGEMM_DEFAULT_R qgemm_r
1729 #define XGEMM_DEFAULT_P 252
1730 #define XGEMM_DEFAULT_R xgemm_r
1731 #define XGEMM_DEFAULT_Q 128
1733 #define CGEMM3M_DEFAULT_UNROLL_N 4
1734 #define CGEMM3M_DEFAULT_UNROLL_M 8
1735 #define ZGEMM3M_DEFAULT_UNROLL_N 4
1736 #define ZGEMM3M_DEFAULT_UNROLL_M 4
1738 #define CGEMM3M_DEFAULT_P 320
1739 #define ZGEMM3M_DEFAULT_P 256
1740 #define XGEMM3M_DEFAULT_P 112
1741 #define CGEMM3M_DEFAULT_Q 320
1742 #define ZGEMM3M_DEFAULT_Q 256
1743 #define XGEMM3M_DEFAULT_Q 224
1744 #define CGEMM3M_DEFAULT_R 12288
1745 #define ZGEMM3M_DEFAULT_R 12288
1746 #define XGEMM3M_DEFAULT_R 12288
1758 #define GEMM_DEFAULT_OFFSET_A 0
1759 #define GEMM_DEFAULT_OFFSET_B 0
1760 #define GEMM_DEFAULT_ALIGN 0x03fffUL
1764 #if defined(XDOUBLE) || defined(DOUBLE)
1765 #define SWITCH_RATIO 8
1766 #define GEMM_PREFERED_SIZE 8
1768 #define SWITCH_RATIO 16
1769 #define GEMM_PREFERED_SIZE 16
1771 #define USE_SGEMM_KERNEL_DIRECT 1
1775 #define SGEMM_DEFAULT_UNROLL_M 4
1776 #define DGEMM_DEFAULT_UNROLL_M 2
1777 #define QGEMM_DEFAULT_UNROLL_M 2
1778 #define CGEMM_DEFAULT_UNROLL_M 2
1779 #define ZGEMM_DEFAULT_UNROLL_M 1
1780 #define XGEMM_DEFAULT_UNROLL_M 1
1782 #define SGEMM_DEFAULT_UNROLL_N 4
1783 #define DGEMM_DEFAULT_UNROLL_N 4
1784 #define QGEMM_DEFAULT_UNROLL_N 2
1785 #define CGEMM_DEFAULT_UNROLL_N 2
1786 #define ZGEMM_DEFAULT_UNROLL_N 2
1787 #define XGEMM_DEFAULT_UNROLL_N 1
1791 #define SGEMM_DEFAULT_UNROLL_M 16
1792 #define DGEMM_DEFAULT_UNROLL_M 16
1793 #define QGEMM_DEFAULT_UNROLL_M 2
1794 #define CGEMM_DEFAULT_UNROLL_M 8
1795 #define ZGEMM_DEFAULT_UNROLL_M 4
1796 #define XGEMM_DEFAULT_UNROLL_M 1
1798 #define SGEMM_DEFAULT_UNROLL_N 4
1799 #define DGEMM_DEFAULT_UNROLL_N 2
1800 #define QGEMM_DEFAULT_UNROLL_N 2
1801 #define CGEMM_DEFAULT_UNROLL_N 2
1802 #define ZGEMM_DEFAULT_UNROLL_N 2
1803 #define XGEMM_DEFAULT_UNROLL_N 1
1805 #define SGEMM_DEFAULT_UNROLL_MN 32
1806 #define DGEMM_DEFAULT_UNROLL_MN 32
1811 #define SGEMM_DEFAULT_P 512
1812 #define SGEMM_DEFAULT_R sgemm_r
1813 #define DGEMM_DEFAULT_P 512
1814 #define DGEMM_DEFAULT_R dgemm_r
1815 #define QGEMM_DEFAULT_P 504
1816 #define QGEMM_DEFAULT_R qgemm_r
1817 #define CGEMM_DEFAULT_P 128
1818 #define CGEMM_DEFAULT_R 1024
1819 #define ZGEMM_DEFAULT_P 512
1820 #define ZGEMM_DEFAULT_R zgemm_r
1821 #define XGEMM_DEFAULT_P 252
1822 #define XGEMM_DEFAULT_R xgemm_r
1823 #define SGEMM_DEFAULT_Q 256
1824 #define DGEMM_DEFAULT_Q 256
1825 #define QGEMM_DEFAULT_Q 128
1826 #define CGEMM_DEFAULT_Q 256
1827 #define ZGEMM_DEFAULT_Q 192
1828 #define XGEMM_DEFAULT_Q 128
1832 #define SGEMM_DEFAULT_P 640
1833 #define DGEMM_DEFAULT_P 192
1834 #define CGEMM_DEFAULT_P 384
1835 #define ZGEMM_DEFAULT_P 256
1837 #define SGEMM_DEFAULT_Q 320
1838 #define DGEMM_DEFAULT_Q 384
1839 #define CGEMM_DEFAULT_Q 192
1840 #define ZGEMM_DEFAULT_Q 128
1842 #define SGEMM_DEFAULT_R sgemm_r
1843 #define DGEMM_DEFAULT_R 8640
1844 #define CGEMM_DEFAULT_R cgemm_r
1845 #define ZGEMM_DEFAULT_R zgemm_r
1847 #define QGEMM_DEFAULT_Q 128
1848 #define QGEMM_DEFAULT_P 504
1849 #define QGEMM_DEFAULT_R qgemm_r
1850 #define XGEMM_DEFAULT_P 252
1851 #define XGEMM_DEFAULT_R xgemm_r
1852 #define XGEMM_DEFAULT_Q 128
1854 #define CGEMM3M_DEFAULT_UNROLL_N 4
1855 #define CGEMM3M_DEFAULT_UNROLL_M 8
1856 #define ZGEMM3M_DEFAULT_UNROLL_N 4
1857 #define ZGEMM3M_DEFAULT_UNROLL_M 4
1859 #define CGEMM3M_DEFAULT_P 320
1860 #define ZGEMM3M_DEFAULT_P 256
1861 #define XGEMM3M_DEFAULT_P 112
1862 #define CGEMM3M_DEFAULT_Q 320
1863 #define ZGEMM3M_DEFAULT_Q 256
1864 #define XGEMM3M_DEFAULT_Q 224
1865 #define CGEMM3M_DEFAULT_R 12288
1866 #define ZGEMM3M_DEFAULT_R 12288
1867 #define XGEMM3M_DEFAULT_R 12288
1878 #define GEMM_DEFAULT_OFFSET_A 64
1879 #define GEMM_DEFAULT_OFFSET_B 0
1880 #define GEMM_DEFAULT_ALIGN 0x0ffffUL
1885 #define SGEMM_DEFAULT_UNROLL_M 4
1886 #define DGEMM_DEFAULT_UNROLL_M 2
1887 #define QGEMM_DEFAULT_UNROLL_M 2
1888 #define CGEMM_DEFAULT_UNROLL_M 2
1889 #define ZGEMM_DEFAULT_UNROLL_M 1
1890 #define XGEMM_DEFAULT_UNROLL_M 1
1892 #define SGEMM_DEFAULT_UNROLL_M 8
1893 #define DGEMM_DEFAULT_UNROLL_M 4
1894 #define QGEMM_DEFAULT_UNROLL_M 2
1895 #define CGEMM_DEFAULT_UNROLL_M 4
1896 #define ZGEMM_DEFAULT_UNROLL_M 2
1897 #define XGEMM_DEFAULT_UNROLL_M 1
1900 #define SGEMM_DEFAULT_UNROLL_N 4
1901 #define DGEMM_DEFAULT_UNROLL_N 2
1902 #define QGEMM_DEFAULT_UNROLL_N 2
1903 #define CGEMM_DEFAULT_UNROLL_N 2
1904 #define ZGEMM_DEFAULT_UNROLL_N 1
1905 #define XGEMM_DEFAULT_UNROLL_N 1
1907 #define SGEMM_DEFAULT_P sgemm_p
1908 #define SGEMM_DEFAULT_R sgemm_r
1910 #define DGEMM_DEFAULT_P dgemm_p
1911 #define DGEMM_DEFAULT_R dgemm_r
1913 #define QGEMM_DEFAULT_P qgemm_p
1914 #define QGEMM_DEFAULT_R qgemm_r
1916 #define CGEMM_DEFAULT_P cgemm_p
1917 #define CGEMM_DEFAULT_R cgemm_r
1919 #define ZGEMM_DEFAULT_P zgemm_p
1920 #define ZGEMM_DEFAULT_R zgemm_r
1922 #define XGEMM_DEFAULT_P xgemm_p
1923 #define XGEMM_DEFAULT_R xgemm_r
1925 #define SGEMM_DEFAULT_Q 256
1926 #define DGEMM_DEFAULT_Q 256
1927 #define QGEMM_DEFAULT_Q 256
1928 #define CGEMM_DEFAULT_Q 256
1929 #define ZGEMM_DEFAULT_Q 256
1930 #define XGEMM_DEFAULT_Q 256
1940 #define GEMM_DEFAULT_OFFSET_A 0
1941 #define GEMM_DEFAULT_OFFSET_B 128
1942 #define GEMM_DEFAULT_ALIGN 0x03fffUL
1944 #define SGEMM_DEFAULT_UNROLL_M 8
1945 #define SGEMM_DEFAULT_UNROLL_N 8
1946 #define DGEMM_DEFAULT_UNROLL_M 8
1947 #define DGEMM_DEFAULT_UNROLL_N 8
1948 #define QGEMM_DEFAULT_UNROLL_M 8
1949 #define QGEMM_DEFAULT_UNROLL_N 8
1950 #define CGEMM_DEFAULT_UNROLL_M 4
1951 #define CGEMM_DEFAULT_UNROLL_N 4
1952 #define ZGEMM_DEFAULT_UNROLL_M 4
1953 #define ZGEMM_DEFAULT_UNROLL_N 4
1954 #define XGEMM_DEFAULT_UNROLL_M 4
1955 #define XGEMM_DEFAULT_UNROLL_N 4
1957 #define SGEMM_DEFAULT_P sgemm_p
1958 #define DGEMM_DEFAULT_P dgemm_p
1959 #define QGEMM_DEFAULT_P qgemm_p
1960 #define CGEMM_DEFAULT_P cgemm_p
1961 #define ZGEMM_DEFAULT_P zgemm_p
1962 #define XGEMM_DEFAULT_P xgemm_p
1964 #define SGEMM_DEFAULT_Q 1024
1965 #define DGEMM_DEFAULT_Q 1024
1966 #define QGEMM_DEFAULT_Q 1024
1967 #define CGEMM_DEFAULT_Q 1024
1968 #define ZGEMM_DEFAULT_Q 1024
1969 #define XGEMM_DEFAULT_Q 1024
1971 #define SGEMM_DEFAULT_R sgemm_r
1972 #define DGEMM_DEFAULT_R dgemm_r
1973 #define QGEMM_DEFAULT_R qgemm_r
1974 #define CGEMM_DEFAULT_R cgemm_r
1975 #define ZGEMM_DEFAULT_R zgemm_r
1976 #define XGEMM_DEFAULT_R xgemm_r
1980 #define GETRF_FACTOR 0.65
1984 #if defined(EV4) || defined(EV5) || defined(EV6)
1994 #define GEMM_DEFAULT_OFFSET_A 512
1995 #define GEMM_DEFAULT_OFFSET_B 512
1996 #define GEMM_DEFAULT_ALIGN 0x0ffffUL
1998 #define SGEMM_DEFAULT_UNROLL_M 4
1999 #define SGEMM_DEFAULT_UNROLL_N 4
2000 #define DGEMM_DEFAULT_UNROLL_M 4
2001 #define DGEMM_DEFAULT_UNROLL_N 4
2002 #define CGEMM_DEFAULT_UNROLL_M 2
2003 #define CGEMM_DEFAULT_UNROLL_N 2
2004 #define ZGEMM_DEFAULT_UNROLL_M 2
2005 #define ZGEMM_DEFAULT_UNROLL_N 2
2010 #define SGEMM_DEFAULT_P 32
2011 #define SGEMM_DEFAULT_Q 112
2012 #define SGEMM_DEFAULT_R 256
2014 #define DGEMM_DEFAULT_P 32
2015 #define DGEMM_DEFAULT_Q 56
2016 #define DGEMM_DEFAULT_R 256
2018 #define CGEMM_DEFAULT_P 32
2019 #define CGEMM_DEFAULT_Q 64
2020 #define CGEMM_DEFAULT_R 240
2022 #define ZGEMM_DEFAULT_P 32
2023 #define ZGEMM_DEFAULT_Q 32
2024 #define ZGEMM_DEFAULT_R 240
2028 #define SGEMM_DEFAULT_P 64
2029 #define SGEMM_DEFAULT_Q 256
2031 #define DGEMM_DEFAULT_P 64
2032 #define DGEMM_DEFAULT_Q 128
2034 #define CGEMM_DEFAULT_P 64
2035 #define CGEMM_DEFAULT_Q 128
2037 #define ZGEMM_DEFAULT_P 64
2038 #define ZGEMM_DEFAULT_Q 64
2042 #define SGEMM_DEFAULT_P 256
2043 #define SGEMM_DEFAULT_Q 512
2045 #define DGEMM_DEFAULT_P 256
2046 #define DGEMM_DEFAULT_Q 256
2048 #define CGEMM_DEFAULT_P 256
2049 #define CGEMM_DEFAULT_Q 256
2051 #define ZGEMM_DEFAULT_P 128
2052 #define ZGEMM_DEFAULT_Q 256
2062 #define GEMM_DEFAULT_OFFSET_A 0
2063 #define GEMM_DEFAULT_OFFSET_B 8192
2064 #define GEMM_DEFAULT_ALIGN 0x0ffffUL
2066 #define SGEMM_DEFAULT_UNROLL_M 16
2067 #define SGEMM_DEFAULT_UNROLL_N 4
2068 #define DGEMM_DEFAULT_UNROLL_M 4
2069 #define DGEMM_DEFAULT_UNROLL_N 4
2070 #define CGEMM_DEFAULT_UNROLL_M 8
2071 #define CGEMM_DEFAULT_UNROLL_N 2
2072 #define ZGEMM_DEFAULT_UNROLL_M 2
2073 #define ZGEMM_DEFAULT_UNROLL_N 2
2075 #define SGEMM_DEFAULT_P 128
2076 #define DGEMM_DEFAULT_P 128
2077 #define CGEMM_DEFAULT_P 128
2078 #define ZGEMM_DEFAULT_P 128
2080 #define SGEMM_DEFAULT_Q 512
2081 #define DGEMM_DEFAULT_Q 256
2082 #define CGEMM_DEFAULT_Q 256
2083 #define ZGEMM_DEFAULT_Q 128
2089 #define GEMM_DEFAULT_OFFSET_A 0
2090 #define GEMM_DEFAULT_OFFSET_B 1024
2091 #define GEMM_DEFAULT_ALIGN 0x0ffffUL
2093 #define SGEMM_DEFAULT_UNROLL_M 16
2094 #define SGEMM_DEFAULT_UNROLL_N 4
2095 #define DGEMM_DEFAULT_UNROLL_M 4
2096 #define DGEMM_DEFAULT_UNROLL_N 4
2097 #define CGEMM_DEFAULT_UNROLL_M 2
2098 #define CGEMM_DEFAULT_UNROLL_N 2
2099 #define ZGEMM_DEFAULT_UNROLL_M 2
2100 #define ZGEMM_DEFAULT_UNROLL_N 2
2102 #define SGEMM_DEFAULT_P 256
2103 #define DGEMM_DEFAULT_P 128
2104 #define CGEMM_DEFAULT_P 128
2105 #define ZGEMM_DEFAULT_P 64
2107 #define SGEMM_DEFAULT_Q 256
2108 #define DGEMM_DEFAULT_Q 256
2109 #define CGEMM_DEFAULT_Q 256
2110 #define ZGEMM_DEFAULT_Q 256
2120 #define GEMM_DEFAULT_OFFSET_A 2688
2121 #define GEMM_DEFAULT_OFFSET_B 3072
2122 #define GEMM_DEFAULT_ALIGN 0x03fffUL
2124 #if defined(__BYTE_ORDER__)&&(__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
2125 #define SGEMM_DEFAULT_UNROLL_M 4
2127 #define SGEMM_DEFAULT_UNROLL_M 16
2129 #define SGEMM_DEFAULT_UNROLL_N 4
2130 #define DGEMM_DEFAULT_UNROLL_M 4
2131 #define DGEMM_DEFAULT_UNROLL_N 4
2132 #if defined(__BYTE_ORDER__)&&(__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
2133 #define CGEMM_DEFAULT_UNROLL_M 2
2135 #define CGEMM_DEFAULT_UNROLL_M 8
2137 #define CGEMM_DEFAULT_UNROLL_N 2
2138 #define ZGEMM_DEFAULT_UNROLL_M 2
2139 #define ZGEMM_DEFAULT_UNROLL_N 2
2141 #if defined(OS_LINUX) || defined(OS_DARWIN) || defined(OS_FREEBSD)
2142 #if L2_SIZE == 1024976
2143 #define SGEMM_DEFAULT_P 320
2144 #define DGEMM_DEFAULT_P 256
2145 #define CGEMM_DEFAULT_P 256
2146 #define ZGEMM_DEFAULT_P 256
2148 #define SGEMM_DEFAULT_P 176
2149 #define DGEMM_DEFAULT_P 176
2150 #define CGEMM_DEFAULT_P 176
2151 #define ZGEMM_DEFAULT_P 176
2155 #define SGEMM_DEFAULT_Q 512
2156 #define DGEMM_DEFAULT_Q 256
2157 #define CGEMM_DEFAULT_Q 256
2158 #define ZGEMM_DEFAULT_Q 128
2169 #define GEMM_DEFAULT_OFFSET_A (32 * 0)
2170 #define GEMM_DEFAULT_OFFSET_B (32 * 0)
2171 #define GEMM_DEFAULT_ALIGN 0x0ffffUL
2173 #define SGEMM_DEFAULT_UNROLL_M 4
2174 #define SGEMM_DEFAULT_UNROLL_N 4
2175 #define DGEMM_DEFAULT_UNROLL_M 4
2176 #define DGEMM_DEFAULT_UNROLL_N 4
2177 #define CGEMM_DEFAULT_UNROLL_M 2
2178 #define CGEMM_DEFAULT_UNROLL_N 2
2179 #define ZGEMM_DEFAULT_UNROLL_M 2
2180 #define ZGEMM_DEFAULT_UNROLL_N 2
2182 #define SGEMM_DEFAULT_P 512
2183 #define DGEMM_DEFAULT_P 512
2184 #define CGEMM_DEFAULT_P 512
2185 #define ZGEMM_DEFAULT_P 512
2187 #define SGEMM_DEFAULT_Q 1024
2188 #define DGEMM_DEFAULT_Q 512
2189 #define CGEMM_DEFAULT_Q 512
2190 #define ZGEMM_DEFAULT_Q 256
2192 #define SGEMM_DEFAULT_R SGEMM_DEFAULT_P
2193 #define DGEMM_DEFAULT_R DGEMM_DEFAULT_P
2194 #define CGEMM_DEFAULT_R CGEMM_DEFAULT_P
2195 #define ZGEMM_DEFAULT_R ZGEMM_DEFAULT_P
2205 #define GEMM_DEFAULT_OFFSET_A (32 * 0)
2206 #define GEMM_DEFAULT_OFFSET_B (32 * 0)
2207 #define GEMM_DEFAULT_ALIGN 0x0ffffUL
2209 #define SGEMM_DEFAULT_UNROLL_M 8
2210 #define SGEMM_DEFAULT_UNROLL_N 4
2211 #define DGEMM_DEFAULT_UNROLL_M 8
2212 #define DGEMM_DEFAULT_UNROLL_N 4
2213 #define CGEMM_DEFAULT_UNROLL_M 4
2214 #define CGEMM_DEFAULT_UNROLL_N 2
2215 #define ZGEMM_DEFAULT_UNROLL_M 4
2216 #define ZGEMM_DEFAULT_UNROLL_N 2
2218 #define SGEMM_DEFAULT_P 128
2219 #define DGEMM_DEFAULT_P 128
2220 #define CGEMM_DEFAULT_P 128
2221 #define ZGEMM_DEFAULT_P 128
2223 #define SGEMM_DEFAULT_Q 4096
2224 #define DGEMM_DEFAULT_Q 3072
2225 #define CGEMM_DEFAULT_Q 2048
2226 #define ZGEMM_DEFAULT_Q 1024
2228 #define SGEMM_DEFAULT_Q 512
2229 #define DGEMM_DEFAULT_Q 256
2230 #define CGEMM_DEFAULT_Q 256
2231 #define ZGEMM_DEFAULT_Q 128
2239 #if defined(POWER3) || defined(POWER4) || defined(POWER5)
2240 #define GEMM_DEFAULT_OFFSET_A 0
2241 #define GEMM_DEFAULT_OFFSET_B 2048
2242 #define GEMM_DEFAULT_ALIGN 0x0ffffUL
2244 #define SGEMM_DEFAULT_UNROLL_M 4
2245 #define SGEMM_DEFAULT_UNROLL_N 4
2246 #define DGEMM_DEFAULT_UNROLL_M 4
2247 #define DGEMM_DEFAULT_UNROLL_N 4
2248 #define CGEMM_DEFAULT_UNROLL_M 2
2249 #define CGEMM_DEFAULT_UNROLL_N 2
2250 #define ZGEMM_DEFAULT_UNROLL_M 2
2251 #define ZGEMM_DEFAULT_UNROLL_N 2
2258 #define SGEMM_DEFAULT_P 256
2259 #define SGEMM_DEFAULT_Q 432
2260 #define SGEMM_DEFAULT_R 1012
2262 #define DGEMM_DEFAULT_P 256
2263 #define DGEMM_DEFAULT_Q 216
2264 #define DGEMM_DEFAULT_R 1012
2266 #define ZGEMM_DEFAULT_P 256
2267 #define ZGEMM_DEFAULT_Q 104
2268 #define ZGEMM_DEFAULT_R 1012
2272 #ifdef ALLOC_HUGETLB
2273 #define SGEMM_DEFAULT_P 184
2274 #define DGEMM_DEFAULT_P 184
2275 #define CGEMM_DEFAULT_P 184
2276 #define ZGEMM_DEFAULT_P 184
2278 #define SGEMM_DEFAULT_P 144
2279 #define DGEMM_DEFAULT_P 144
2280 #define CGEMM_DEFAULT_P 144
2281 #define ZGEMM_DEFAULT_P 144
2286 #ifdef ALLOC_HUGETLB
2287 #define SGEMM_DEFAULT_P 512
2288 #define DGEMM_DEFAULT_P 256
2289 #define CGEMM_DEFAULT_P 256
2290 #define ZGEMM_DEFAULT_P 128
2292 #define SGEMM_DEFAULT_P 320
2293 #define DGEMM_DEFAULT_P 160
2294 #define CGEMM_DEFAULT_P 160
2295 #define ZGEMM_DEFAULT_P 80
2298 #define SGEMM_DEFAULT_Q 256
2299 #define CGEMM_DEFAULT_Q 256
2300 #define DGEMM_DEFAULT_Q 256
2301 #define ZGEMM_DEFAULT_Q 256
2313 #define GEMM_DEFAULT_OFFSET_A 384
2314 #define GEMM_DEFAULT_OFFSET_B 1024
2315 #define GEMM_DEFAULT_ALIGN 0x03fffUL
2317 #define SGEMM_DEFAULT_UNROLL_M 4
2318 #define SGEMM_DEFAULT_UNROLL_N 4
2319 #define DGEMM_DEFAULT_UNROLL_M 4
2320 #define DGEMM_DEFAULT_UNROLL_N 4
2321 #define CGEMM_DEFAULT_UNROLL_M 2
2322 #define CGEMM_DEFAULT_UNROLL_N 4
2323 #define ZGEMM_DEFAULT_UNROLL_M 2
2324 #define ZGEMM_DEFAULT_UNROLL_N 4
2326 #define SGEMM_DEFAULT_P 992
2327 #define DGEMM_DEFAULT_P 480
2328 #define CGEMM_DEFAULT_P 488
2329 #define ZGEMM_DEFAULT_P 248
2331 #define SGEMM_DEFAULT_Q 504
2332 #define DGEMM_DEFAULT_Q 504
2333 #define CGEMM_DEFAULT_Q 400
2334 #define ZGEMM_DEFAULT_Q 400
2345 #define GEMM_DEFAULT_OFFSET_A 0
2346 #define GEMM_DEFAULT_OFFSET_B 65536
2347 #define GEMM_DEFAULT_ALIGN 0x0ffffUL
2348 #if defined(__32BIT__)
2349 #warning using BINARY32==POWER6
2350 #define SGEMM_DEFAULT_UNROLL_M 4
2351 #define SGEMM_DEFAULT_UNROLL_N 4
2352 #define DGEMM_DEFAULT_UNROLL_M 4
2353 #define DGEMM_DEFAULT_UNROLL_N 4
2354 #define CGEMM_DEFAULT_UNROLL_M 2
2355 #define CGEMM_DEFAULT_UNROLL_N 4
2356 #define ZGEMM_DEFAULT_UNROLL_M 2
2357 #define ZGEMM_DEFAULT_UNROLL_N 4
2359 #define SGEMM_DEFAULT_UNROLL_M 16
2360 #define SGEMM_DEFAULT_UNROLL_N 8
2361 #define DGEMM_DEFAULT_UNROLL_M 16
2362 #define DGEMM_DEFAULT_UNROLL_N 4
2363 #define CGEMM_DEFAULT_UNROLL_M 8
2364 #define CGEMM_DEFAULT_UNROLL_N 4
2365 #define ZGEMM_DEFAULT_UNROLL_M 8
2366 #define ZGEMM_DEFAULT_UNROLL_N 2
2368 #define SGEMM_DEFAULT_P 1280UL
2369 #define DGEMM_DEFAULT_P 640UL
2370 #define CGEMM_DEFAULT_P 640UL
2371 #define ZGEMM_DEFAULT_P 320UL
2373 #define SGEMM_DEFAULT_Q 640UL
2374 #define DGEMM_DEFAULT_Q 720UL
2375 #define CGEMM_DEFAULT_Q 640UL
2376 #define ZGEMM_DEFAULT_Q 640UL
2379 #define SGEMM_DEFAULT_R SGEMM_DEFAULT_P
2380 #define DGEMM_DEFAULT_R DGEMM_DEFAULT_P
2381 #define CGEMM_DEFAULT_R CGEMM_DEFAULT_P
2382 #define ZGEMM_DEFAULT_R ZGEMM_DEFAULT_P
2384 #define SGEMM_DEFAULT_R 4096
2385 #define DGEMM_DEFAULT_R 4096
2386 #define CGEMM_DEFAULT_R 4096
2387 #define ZGEMM_DEFAULT_R 4096
2398 #define GEMM_DEFAULT_OFFSET_A 0
2399 #define GEMM_DEFAULT_OFFSET_B 65536
2400 #define GEMM_DEFAULT_ALIGN 0x0ffffUL
2402 #define SWITCH_RATIO 16
2403 #define GEMM_PREFERED_SIZE 16
2405 #define SGEMM_DEFAULT_UNROLL_M 16
2406 #define SGEMM_DEFAULT_UNROLL_N 8
2407 #define DGEMM_DEFAULT_UNROLL_M 16
2408 #define DGEMM_DEFAULT_UNROLL_N 4
2409 #define CGEMM_DEFAULT_UNROLL_M 8
2410 #define CGEMM_DEFAULT_UNROLL_N 4
2411 #define ZGEMM_DEFAULT_UNROLL_M 8
2412 #define ZGEMM_DEFAULT_UNROLL_N 2
2414 #define SGEMM_DEFAULT_P 832
2415 #define DGEMM_DEFAULT_P 128
2416 #define CGEMM_DEFAULT_P 512
2417 #define ZGEMM_DEFAULT_P 256
2419 #define SGEMM_DEFAULT_Q 1026
2420 #define DGEMM_DEFAULT_Q 384
2421 #define CGEMM_DEFAULT_Q 1026
2422 #define ZGEMM_DEFAULT_Q 1026
2424 #define SGEMM_DEFAULT_R 4096
2425 #define DGEMM_DEFAULT_R 4096
2426 #define CGEMM_DEFAULT_R 4096
2427 #define ZGEMM_DEFAULT_R 4096
2433 #if defined(POWER10)
2437 #define GEMM_DEFAULT_OFFSET_A 0
2438 #define GEMM_DEFAULT_OFFSET_B 65536
2439 #define GEMM_DEFAULT_ALIGN 0x0ffffUL
2441 #define SWITCH_RATIO 16
2442 #define GEMM_PREFERED_SIZE 16
2444 #define SGEMM_DEFAULT_UNROLL_M 16
2445 #define SGEMM_DEFAULT_UNROLL_N 8
2446 #define DGEMM_DEFAULT_UNROLL_M 8
2447 #define DGEMM_DEFAULT_UNROLL_N 8
2448 #define CGEMM_DEFAULT_UNROLL_M 8
2449 #define CGEMM_DEFAULT_UNROLL_N 4
2450 #define ZGEMM_DEFAULT_UNROLL_M 8
2451 #define ZGEMM_DEFAULT_UNROLL_N 2
2453 #define SGEMM_DEFAULT_P 832
2454 #define DGEMM_DEFAULT_P 320
2455 #define CGEMM_DEFAULT_P 512
2456 #define ZGEMM_DEFAULT_P 256
2458 #define SGEMM_DEFAULT_Q 1026
2459 #define DGEMM_DEFAULT_Q 960
2460 #define CGEMM_DEFAULT_Q 1026
2461 #define ZGEMM_DEFAULT_Q 1026
2463 #define SGEMM_DEFAULT_R 4096
2464 #define DGEMM_DEFAULT_R 4096
2465 #define CGEMM_DEFAULT_R 4096
2466 #define ZGEMM_DEFAULT_R 4096
2470 #undef SBGEMM_DEFAULT_UNROLL_N
2471 #undef SBGEMM_DEFAULT_UNROLL_M
2472 #undef SBGEMM_DEFAULT_P
2473 #undef SBGEMM_DEFAULT_R
2474 #undef SBGEMM_DEFAULT_Q
2475 #define SBGEMM_DEFAULT_UNROLL_M 16
2476 #define SBGEMM_DEFAULT_UNROLL_N 8
2477 #define SBGEMM_DEFAULT_P 832
2478 #define SBGEMM_DEFAULT_Q 1026
2479 #define SBGEMM_DEFAULT_R 4096
2482 #if defined(SPARC) && defined(V7)
2487 #define GEMM_DEFAULT_OFFSET_A 0
2488 #define GEMM_DEFAULT_OFFSET_B 2048
2489 #define GEMM_DEFAULT_ALIGN 0x03fffUL
2491 #define SGEMM_DEFAULT_UNROLL_M 2
2492 #define SGEMM_DEFAULT_UNROLL_N 8
2493 #define DGEMM_DEFAULT_UNROLL_M 2
2494 #define DGEMM_DEFAULT_UNROLL_N 8
2495 #define CGEMM_DEFAULT_UNROLL_M 1
2496 #define CGEMM_DEFAULT_UNROLL_N 4
2497 #define ZGEMM_DEFAULT_UNROLL_M 1
2498 #define ZGEMM_DEFAULT_UNROLL_N 4
2500 #define SGEMM_DEFAULT_P 256
2501 #define DGEMM_DEFAULT_P 256
2502 #define CGEMM_DEFAULT_P 256
2503 #define ZGEMM_DEFAULT_P 256
2505 #define SGEMM_DEFAULT_Q 512
2506 #define DGEMM_DEFAULT_Q 256
2507 #define CGEMM_DEFAULT_Q 256
2508 #define ZGEMM_DEFAULT_Q 128
2511 #define GEMM_THREAD gemm_thread_mn
2514 #if (defined(SPARC) && defined(V9)) || defined(__sparc_v9__)
2519 #define GEMM_DEFAULT_OFFSET_A 0
2520 #define GEMM_DEFAULT_OFFSET_B 2048
2521 #define GEMM_DEFAULT_ALIGN 0x03fffUL
2523 #define SGEMM_DEFAULT_UNROLL_M 4
2524 #define SGEMM_DEFAULT_UNROLL_N 4
2525 #define DGEMM_DEFAULT_UNROLL_M 4
2526 #define DGEMM_DEFAULT_UNROLL_N 4
2527 #define CGEMM_DEFAULT_UNROLL_M 2
2528 #define CGEMM_DEFAULT_UNROLL_N 2
2529 #define ZGEMM_DEFAULT_UNROLL_M 2
2530 #define ZGEMM_DEFAULT_UNROLL_N 2
2532 #define SGEMM_DEFAULT_P 512
2533 #define DGEMM_DEFAULT_P 512
2534 #define CGEMM_DEFAULT_P 512
2535 #define ZGEMM_DEFAULT_P 512
2537 #define SGEMM_DEFAULT_Q 1024
2538 #define DGEMM_DEFAULT_Q 512
2539 #define CGEMM_DEFAULT_Q 512
2540 #define ZGEMM_DEFAULT_Q 256
2550 #define GEMM_DEFAULT_OFFSET_A 0
2551 #define GEMM_DEFAULT_OFFSET_B 0
2552 #define GEMM_DEFAULT_ALIGN 0x03fffUL
2554 #define SGEMM_DEFAULT_UNROLL_M 2
2555 #define SGEMM_DEFAULT_UNROLL_N 8
2556 #define DGEMM_DEFAULT_UNROLL_M 2
2557 #define DGEMM_DEFAULT_UNROLL_N 8
2558 #define CGEMM_DEFAULT_UNROLL_M 1
2559 #define CGEMM_DEFAULT_UNROLL_N 4
2560 #define ZGEMM_DEFAULT_UNROLL_M 1
2561 #define ZGEMM_DEFAULT_UNROLL_N 4
2563 #define SGEMM_DEFAULT_P 108
2564 #define DGEMM_DEFAULT_P 112
2565 #define CGEMM_DEFAULT_P 108
2566 #define ZGEMM_DEFAULT_P 112
2568 #define SGEMM_DEFAULT_Q 288
2569 #define DGEMM_DEFAULT_Q 144
2570 #define CGEMM_DEFAULT_Q 144
2571 #define ZGEMM_DEFAULT_Q 72
2573 #define SGEMM_DEFAULT_R 2000
2574 #define DGEMM_DEFAULT_R 2000
2575 #define CGEMM_DEFAULT_R 2000
2576 #define ZGEMM_DEFAULT_R 2000
2581 #if defined(LOONGSON3R4)
2585 #define GEMM_DEFAULT_OFFSET_A 0
2586 #define GEMM_DEFAULT_OFFSET_B 0
2587 #define GEMM_DEFAULT_ALIGN 0x03fffUL
2590 #define SGEMM_DEFAULT_UNROLL_M 8
2591 #define SGEMM_DEFAULT_UNROLL_N 8
2593 #define DGEMM_DEFAULT_UNROLL_M 8
2594 #define DGEMM_DEFAULT_UNROLL_N 4
2596 #define CGEMM_DEFAULT_UNROLL_M 8
2597 #define CGEMM_DEFAULT_UNROLL_N 4
2599 #define ZGEMM_DEFAULT_UNROLL_M 4
2600 #define ZGEMM_DEFAULT_UNROLL_N 4
2602 #define SGEMM_DEFAULT_UNROLL_M 8
2603 #define SGEMM_DEFAULT_UNROLL_N 4
2605 #define DGEMM_DEFAULT_UNROLL_M 4
2606 #define DGEMM_DEFAULT_UNROLL_N 4
2608 #define CGEMM_DEFAULT_UNROLL_M 4
2609 #define CGEMM_DEFAULT_UNROLL_N 2
2611 #define ZGEMM_DEFAULT_UNROLL_M 2
2612 #define ZGEMM_DEFAULT_UNROLL_N 2
2615 #define SGEMM_DEFAULT_P 64
2616 #define DGEMM_DEFAULT_P 44
2617 #define CGEMM_DEFAULT_P 64
2618 #define ZGEMM_DEFAULT_P 32
2620 #define SGEMM_DEFAULT_Q 192
2621 #define DGEMM_DEFAULT_Q 92
2622 #define CGEMM_DEFAULT_Q 128
2623 #define ZGEMM_DEFAULT_Q 80
2625 #define SGEMM_DEFAULT_R 640
2626 #define DGEMM_DEFAULT_R dgemm_r
2627 #define CGEMM_DEFAULT_R 640
2628 #define ZGEMM_DEFAULT_R 640
2630 #define GEMM_OFFSET_A1 0x10000
2631 #define GEMM_OFFSET_B1 0x100000
2636 #if defined(LOONGSON3R3)
2637 ////Copy from SICORTEX
2641 #define GEMM_DEFAULT_OFFSET_A 0
2642 #define GEMM_DEFAULT_OFFSET_B 0
2643 #define GEMM_DEFAULT_ALIGN 0x03fffUL
2645 #define SGEMM_DEFAULT_UNROLL_M 8
2646 #define SGEMM_DEFAULT_UNROLL_N 4
2648 #define DGEMM_DEFAULT_UNROLL_M 4
2649 #define DGEMM_DEFAULT_UNROLL_N 4
2651 #define CGEMM_DEFAULT_UNROLL_M 4
2652 #define CGEMM_DEFAULT_UNROLL_N 2
2654 #define ZGEMM_DEFAULT_UNROLL_M 2
2655 #define ZGEMM_DEFAULT_UNROLL_N 2
2657 #define SGEMM_DEFAULT_P 64
2658 #define DGEMM_DEFAULT_P 44
2659 #define CGEMM_DEFAULT_P 64
2660 #define ZGEMM_DEFAULT_P 32
2662 #define SGEMM_DEFAULT_Q 192
2663 #define DGEMM_DEFAULT_Q 92
2664 #define CGEMM_DEFAULT_Q 128
2665 #define ZGEMM_DEFAULT_Q 80
2667 #define SGEMM_DEFAULT_R 640
2668 #define DGEMM_DEFAULT_R dgemm_r
2669 #define CGEMM_DEFAULT_R 640
2670 #define ZGEMM_DEFAULT_R 640
2672 #define GEMM_OFFSET_A1 0x10000
2673 #define GEMM_OFFSET_B1 0x100000
2678 #if defined(P5600) || defined(MIPS1004K) || defined(MIPS24K) || defined(I6400) || defined(P6600) || defined(I6500)
2682 #define GEMM_DEFAULT_OFFSET_A 0
2683 #define GEMM_DEFAULT_OFFSET_B 0
2684 #define GEMM_DEFAULT_ALIGN 0x03fffUL
2687 #define SGEMM_DEFAULT_UNROLL_M 8
2688 #define SGEMM_DEFAULT_UNROLL_N 8
2690 #define DGEMM_DEFAULT_UNROLL_M 8
2691 #define DGEMM_DEFAULT_UNROLL_N 4
2693 #define CGEMM_DEFAULT_UNROLL_M 8
2694 #define CGEMM_DEFAULT_UNROLL_N 4
2696 #define ZGEMM_DEFAULT_UNROLL_M 4
2697 #define ZGEMM_DEFAULT_UNROLL_N 4
2699 #define SGEMM_DEFAULT_UNROLL_M 2
2700 #define SGEMM_DEFAULT_UNROLL_N 2
2702 #define DGEMM_DEFAULT_UNROLL_M 2
2703 #define DGEMM_DEFAULT_UNROLL_N 2
2705 #define CGEMM_DEFAULT_UNROLL_M 2
2706 #define CGEMM_DEFAULT_UNROLL_N 2
2708 #define ZGEMM_DEFAULT_UNROLL_M 2
2709 #define ZGEMM_DEFAULT_UNROLL_N 2
2712 #define SGEMM_DEFAULT_P 128
2713 #define DGEMM_DEFAULT_P 128
2714 #define CGEMM_DEFAULT_P 96
2715 #define ZGEMM_DEFAULT_P 64
2717 #define SGEMM_DEFAULT_Q 240
2718 #define DGEMM_DEFAULT_Q 120
2719 #define CGEMM_DEFAULT_Q 120
2720 #define ZGEMM_DEFAULT_Q 120
2722 #define SGEMM_DEFAULT_R 12288
2723 #define DGEMM_DEFAULT_R 8192
2724 #define CGEMM_DEFAULT_R 4096
2725 #define ZGEMM_DEFAULT_R 4096
2730 #ifdef RISCV64_GENERIC
2731 #define GEMM_DEFAULT_OFFSET_A 0
2732 #define GEMM_DEFAULT_OFFSET_B 0
2733 #define GEMM_DEFAULT_ALIGN 0x03fffUL
2735 #define SGEMM_DEFAULT_UNROLL_M 2
2736 #define SGEMM_DEFAULT_UNROLL_N 2
2738 #define DGEMM_DEFAULT_UNROLL_M 2
2739 #define DGEMM_DEFAULT_UNROLL_N 2
2741 #define CGEMM_DEFAULT_UNROLL_M 2
2742 #define CGEMM_DEFAULT_UNROLL_N 2
2744 #define ZGEMM_DEFAULT_UNROLL_M 2
2745 #define ZGEMM_DEFAULT_UNROLL_N 2
2747 #define SGEMM_DEFAULT_P 128
2748 #define DGEMM_DEFAULT_P 128
2749 #define CGEMM_DEFAULT_P 96
2750 #define ZGEMM_DEFAULT_P 64
2752 #define SGEMM_DEFAULT_Q 240
2753 #define DGEMM_DEFAULT_Q 120
2754 #define CGEMM_DEFAULT_Q 120
2755 #define ZGEMM_DEFAULT_Q 120
2757 #define SGEMM_DEFAULT_R 12288
2758 #define DGEMM_DEFAULT_R 8192
2759 #define CGEMM_DEFAULT_R 4096
2760 #define ZGEMM_DEFAULT_R 4096
2764 #define GEMM_DEFAULT_OFFSET_A 0
2765 #define GEMM_DEFAULT_OFFSET_B 0
2770 #define GEMM_DEFAULT_OFFSET_A 0
2771 #define GEMM_DEFAULT_OFFSET_B 0
2772 #define GEMM_DEFAULT_ALIGN 0x03fffUL
2774 #define SGEMM_DEFAULT_UNROLL_M 16
2775 #define SGEMM_DEFAULT_UNROLL_N 4
2777 #define DGEMM_DEFAULT_UNROLL_M 8
2778 #define DGEMM_DEFAULT_UNROLL_N 4
2780 #define CGEMM_DEFAULT_UNROLL_M 2
2781 #define CGEMM_DEFAULT_UNROLL_N 2
2783 #define ZGEMM_DEFAULT_UNROLL_M 2
2784 #define ZGEMM_DEFAULT_UNROLL_N 2
2786 #define SGEMM_DEFAULT_P 160
2787 #define DGEMM_DEFAULT_P 160
2788 #define CGEMM_DEFAULT_P 96
2789 #define ZGEMM_DEFAULT_P 64
2791 #define SGEMM_DEFAULT_Q 240
2792 #define DGEMM_DEFAULT_Q 128
2793 #define CGEMM_DEFAULT_Q 120
2794 #define ZGEMM_DEFAULT_Q 120
2796 #define SGEMM_DEFAULT_R 12288
2797 #define DGEMM_DEFAULT_R 8192
2798 #define CGEMM_DEFAULT_R 4096
2799 #define ZGEMM_DEFAULT_R 4096
2803 #define GEMM_DEFAULT_OFFSET_A 0
2804 #define GEMM_DEFAULT_OFFSET_B 0
2812 #define GEMM_DEFAULT_OFFSET_A 0
2813 #define GEMM_DEFAULT_OFFSET_B 0
2814 #define GEMM_DEFAULT_ALIGN 0x03fffUL
2816 #define SGEMM_DEFAULT_UNROLL_M 4
2817 #define SGEMM_DEFAULT_UNROLL_N 4
2819 #define DGEMM_DEFAULT_UNROLL_M 4
2820 #define DGEMM_DEFAULT_UNROLL_N 4
2822 #define CGEMM_DEFAULT_UNROLL_M 2
2823 #define CGEMM_DEFAULT_UNROLL_N 2
2825 #define ZGEMM_DEFAULT_UNROLL_M 2
2826 #define ZGEMM_DEFAULT_UNROLL_N 2
2828 #define SGEMM_DEFAULT_P 128
2829 #define DGEMM_DEFAULT_P 128
2830 #define CGEMM_DEFAULT_P 96
2831 #define ZGEMM_DEFAULT_P 64
2833 #define SGEMM_DEFAULT_Q 240
2834 #define DGEMM_DEFAULT_Q 120
2835 #define CGEMM_DEFAULT_Q 120
2836 #define ZGEMM_DEFAULT_Q 120
2838 #define SGEMM_DEFAULT_R 12288
2839 #define DGEMM_DEFAULT_R 8192
2840 #define CGEMM_DEFAULT_R 4096
2841 #define ZGEMM_DEFAULT_R 4096
2853 #define GEMM_DEFAULT_OFFSET_A 0
2854 #define GEMM_DEFAULT_OFFSET_B 0
2855 #define GEMM_DEFAULT_ALIGN 0x03fffUL
2857 #define SGEMM_DEFAULT_UNROLL_M 4
2858 #define SGEMM_DEFAULT_UNROLL_N 2
2860 #define DGEMM_DEFAULT_UNROLL_M 4
2861 #define DGEMM_DEFAULT_UNROLL_N 2
2863 #define CGEMM_DEFAULT_UNROLL_M 2
2864 #define CGEMM_DEFAULT_UNROLL_N 2
2866 #define ZGEMM_DEFAULT_UNROLL_M 2
2867 #define ZGEMM_DEFAULT_UNROLL_N 2
2869 #define SGEMM_DEFAULT_P 128
2870 #define DGEMM_DEFAULT_P 128
2871 #define CGEMM_DEFAULT_P 96
2872 #define ZGEMM_DEFAULT_P 64
2874 #define SGEMM_DEFAULT_Q 240
2875 #define DGEMM_DEFAULT_Q 120
2876 #define CGEMM_DEFAULT_Q 120
2877 #define ZGEMM_DEFAULT_Q 120
2879 #define SGEMM_DEFAULT_R 12288
2880 #define DGEMM_DEFAULT_R 8192
2881 #define CGEMM_DEFAULT_R 4096
2882 #define ZGEMM_DEFAULT_R 4096
2888 /* Common ARMv8 parameters */
2894 #define GEMM_DEFAULT_OFFSET_A 0
2895 #define GEMM_DEFAULT_OFFSET_B 0
2896 #define GEMM_DEFAULT_ALIGN 0x03fffUL
2900 #if defined(CORTEXA57) || \
2901 defined(CORTEXA72) || defined(CORTEXA73) || \
2902 defined(FALKOR) || defined(TSV110) || defined(EMAG8180)
2904 #define SGEMM_DEFAULT_UNROLL_M 16
2905 #define SGEMM_DEFAULT_UNROLL_N 4
2907 #define DGEMM_DEFAULT_UNROLL_M 8
2908 #define DGEMM_DEFAULT_UNROLL_N 4
2910 #define CGEMM_DEFAULT_UNROLL_M 8
2911 #define CGEMM_DEFAULT_UNROLL_N 4
2913 #define ZGEMM_DEFAULT_UNROLL_M 4
2914 #define ZGEMM_DEFAULT_UNROLL_N 4
2916 /*FIXME: this should be using the cache size, but there is currently no easy way to
2917 query that on ARM. So if getarch counted more than 8 cores we simply assume the host
2918 is a big desktop or server with abundant cache rather than a phone or embedded device */
2919 #if NUM_CORES > 8 || defined(TSV110) || defined(EMAG8180)
2920 #define SGEMM_DEFAULT_P 512
2921 #define DGEMM_DEFAULT_P 256
2922 #define CGEMM_DEFAULT_P 256
2923 #define ZGEMM_DEFAULT_P 128
2925 #define SGEMM_DEFAULT_Q 1024
2926 #define DGEMM_DEFAULT_Q 512
2927 #define CGEMM_DEFAULT_Q 512
2928 #define ZGEMM_DEFAULT_Q 512
2930 #define SGEMM_DEFAULT_P 128
2931 #define DGEMM_DEFAULT_P 160
2932 #define CGEMM_DEFAULT_P 128
2933 #define ZGEMM_DEFAULT_P 128
2935 #define SGEMM_DEFAULT_Q 352
2936 #define DGEMM_DEFAULT_Q 128
2937 #define CGEMM_DEFAULT_Q 224
2938 #define ZGEMM_DEFAULT_Q 112
2941 #define SGEMM_DEFAULT_R 4096
2942 #define DGEMM_DEFAULT_R 4096
2943 #define CGEMM_DEFAULT_R 4096
2944 #define ZGEMM_DEFAULT_R 2048
2946 #elif defined(CORTEXA53)
2948 #define SGEMM_DEFAULT_UNROLL_M 8
2949 #define SGEMM_DEFAULT_UNROLL_N 8
2951 #define DGEMM_DEFAULT_UNROLL_M 8
2952 #define DGEMM_DEFAULT_UNROLL_N 4
2954 #define CGEMM_DEFAULT_UNROLL_M 8
2955 #define CGEMM_DEFAULT_UNROLL_N 4
2957 #define ZGEMM_DEFAULT_UNROLL_M 4
2958 #define ZGEMM_DEFAULT_UNROLL_N 4
2960 #define SGEMM_DEFAULT_P 256
2961 #define DGEMM_DEFAULT_P 160
2962 #define CGEMM_DEFAULT_P 128
2963 #define ZGEMM_DEFAULT_P 128
2965 #define SGEMM_DEFAULT_Q 256
2966 #define DGEMM_DEFAULT_Q 128
2967 #define CGEMM_DEFAULT_Q 224
2968 #define ZGEMM_DEFAULT_Q 112
2970 #define SGEMM_DEFAULT_R 4096
2971 #define DGEMM_DEFAULT_R 4096
2972 #define CGEMM_DEFAULT_R 4096
2973 #define ZGEMM_DEFAULT_R 2048
2975 #elif defined(THUNDERX)
2977 #define SGEMM_DEFAULT_UNROLL_M 4
2978 #define SGEMM_DEFAULT_UNROLL_N 4
2980 #define DGEMM_DEFAULT_UNROLL_M 2
2981 #define DGEMM_DEFAULT_UNROLL_N 2
2983 #define CGEMM_DEFAULT_UNROLL_M 2
2984 #define CGEMM_DEFAULT_UNROLL_N 2
2986 #define ZGEMM_DEFAULT_UNROLL_M 2
2987 #define ZGEMM_DEFAULT_UNROLL_N 2
2989 #define SGEMM_DEFAULT_P 128
2990 #define DGEMM_DEFAULT_P 128
2991 #define CGEMM_DEFAULT_P 96
2992 #define ZGEMM_DEFAULT_P 64
2994 #define SGEMM_DEFAULT_Q 240
2995 #define DGEMM_DEFAULT_Q 120
2996 #define CGEMM_DEFAULT_Q 120
2997 #define ZGEMM_DEFAULT_Q 120
2999 #define SGEMM_DEFAULT_R 12288
3000 #define DGEMM_DEFAULT_R 8192
3001 #define CGEMM_DEFAULT_R 4096
3002 #define ZGEMM_DEFAULT_R 4096
3004 #elif defined(THUNDERX2T99)
3006 #define SGEMM_DEFAULT_UNROLL_M 16
3007 #define SGEMM_DEFAULT_UNROLL_N 4
3009 #define DGEMM_DEFAULT_UNROLL_M 8
3010 #define DGEMM_DEFAULT_UNROLL_N 4
3012 #define CGEMM_DEFAULT_UNROLL_M 8
3013 #define CGEMM_DEFAULT_UNROLL_N 4
3015 #define ZGEMM_DEFAULT_UNROLL_M 4
3016 #define ZGEMM_DEFAULT_UNROLL_N 4
3018 #define SGEMM_DEFAULT_P 128
3019 #define DGEMM_DEFAULT_P 160
3020 #define CGEMM_DEFAULT_P 128
3021 #define ZGEMM_DEFAULT_P 128
3023 #define SGEMM_DEFAULT_Q 352
3024 #define DGEMM_DEFAULT_Q 128
3025 #define CGEMM_DEFAULT_Q 224
3026 #define ZGEMM_DEFAULT_Q 112
3028 #define SGEMM_DEFAULT_R 4096
3029 #define DGEMM_DEFAULT_R 4096
3030 #define CGEMM_DEFAULT_R 4096
3031 #define ZGEMM_DEFAULT_R 4096
3033 #elif defined(THUNDERX3T110)
3035 #define SGEMM_DEFAULT_UNROLL_M 16
3036 #define SGEMM_DEFAULT_UNROLL_N 4
3038 #define DGEMM_DEFAULT_UNROLL_M 8
3039 #define DGEMM_DEFAULT_UNROLL_N 4
3041 #define CGEMM_DEFAULT_UNROLL_M 8
3042 #define CGEMM_DEFAULT_UNROLL_N 4
3044 #define ZGEMM_DEFAULT_UNROLL_M 4
3045 #define ZGEMM_DEFAULT_UNROLL_N 4
3047 #define SGEMM_DEFAULT_P 128
3048 #define DGEMM_DEFAULT_P 320
3049 #define CGEMM_DEFAULT_P 128
3050 #define ZGEMM_DEFAULT_P 128
3052 #define SGEMM_DEFAULT_Q 352
3053 #define DGEMM_DEFAULT_Q 128
3054 #define CGEMM_DEFAULT_Q 224
3055 #define ZGEMM_DEFAULT_Q 112
3057 #define SGEMM_DEFAULT_R 4096
3058 #define DGEMM_DEFAULT_R 4096
3059 #define CGEMM_DEFAULT_R 4096
3060 #define ZGEMM_DEFAULT_R 4096
3062 #elif defined(NEOVERSEN1)
3064 #define SGEMM_DEFAULT_UNROLL_M 16
3065 #define SGEMM_DEFAULT_UNROLL_N 4
3067 #define DGEMM_DEFAULT_UNROLL_M 8
3068 #define DGEMM_DEFAULT_UNROLL_N 4
3070 #define CGEMM_DEFAULT_UNROLL_M 8
3071 #define CGEMM_DEFAULT_UNROLL_N 4
3073 #define ZGEMM_DEFAULT_UNROLL_M 4
3074 #define ZGEMM_DEFAULT_UNROLL_N 4
3076 #define SGEMM_DEFAULT_P 128
3077 #define DGEMM_DEFAULT_P 160
3078 #define CGEMM_DEFAULT_P 128
3079 #define ZGEMM_DEFAULT_P 128
3081 #define SGEMM_DEFAULT_Q 352
3082 #define DGEMM_DEFAULT_Q 128
3083 #define CGEMM_DEFAULT_Q 224
3084 #define ZGEMM_DEFAULT_Q 112
3086 #define SGEMM_DEFAULT_R 4096
3087 #define DGEMM_DEFAULT_R 4096
3088 #define CGEMM_DEFAULT_R 4096
3089 #define ZGEMM_DEFAULT_R 4096
3091 #else /* Other/undetected ARMv8 cores */
3093 #define SGEMM_DEFAULT_UNROLL_M 16
3094 #define SGEMM_DEFAULT_UNROLL_N 4
3096 #define DGEMM_DEFAULT_UNROLL_M 8
3097 #define DGEMM_DEFAULT_UNROLL_N 4
3099 #define CGEMM_DEFAULT_UNROLL_M 8
3100 #define CGEMM_DEFAULT_UNROLL_N 4
3102 #define ZGEMM_DEFAULT_UNROLL_M 4
3103 #define ZGEMM_DEFAULT_UNROLL_N 4
3105 #define SGEMM_DEFAULT_P 128
3106 #define DGEMM_DEFAULT_P 160
3107 #define CGEMM_DEFAULT_P 128
3108 #define ZGEMM_DEFAULT_P 128
3110 #define SGEMM_DEFAULT_Q 352
3111 #define DGEMM_DEFAULT_Q 128
3112 #define CGEMM_DEFAULT_Q 224
3113 #define ZGEMM_DEFAULT_Q 112
3115 #define SGEMM_DEFAULT_R 4096
3116 #define DGEMM_DEFAULT_R 4096
3117 #define CGEMM_DEFAULT_R 4096
3118 #define ZGEMM_DEFAULT_R 4096
3128 #define GEMM_DEFAULT_OFFSET_A 0
3129 #define GEMM_DEFAULT_OFFSET_B 0
3130 #define GEMM_DEFAULT_ALIGN 0x03fffUL
3132 #define SGEMM_DEFAULT_UNROLL_M 2
3133 #define SGEMM_DEFAULT_UNROLL_N 2
3135 #define DGEMM_DEFAULT_UNROLL_M 2
3136 #define DGEMM_DEFAULT_UNROLL_N 2
3138 #define CGEMM_DEFAULT_UNROLL_M 2
3139 #define CGEMM_DEFAULT_UNROLL_N 2
3141 #define ZGEMM_DEFAULT_UNROLL_M 2
3142 #define ZGEMM_DEFAULT_UNROLL_N 2
3144 #define SGEMM_DEFAULT_P 128
3145 #define DGEMM_DEFAULT_P 128
3146 #define CGEMM_DEFAULT_P 96
3147 #define ZGEMM_DEFAULT_P 64
3149 #define SGEMM_DEFAULT_Q 240
3150 #define DGEMM_DEFAULT_Q 120
3151 #define CGEMM_DEFAULT_Q 120
3152 #define ZGEMM_DEFAULT_Q 120
3154 #define SGEMM_DEFAULT_R 12288
3155 #define DGEMM_DEFAULT_R 8192
3156 #define CGEMM_DEFAULT_R 4096
3157 #define ZGEMM_DEFAULT_R 4096
3169 #define GEMM_DEFAULT_OFFSET_A 0
3170 #define GEMM_DEFAULT_OFFSET_B 0
3171 #define GEMM_DEFAULT_ALIGN 0x03fffUL
3173 #define SGEMM_DEFAULT_UNROLL_M 4
3174 #define SGEMM_DEFAULT_UNROLL_N 4
3176 #define DGEMM_DEFAULT_UNROLL_M 4
3177 #define DGEMM_DEFAULT_UNROLL_N 4
3179 #define CGEMM_DEFAULT_UNROLL_M 2
3180 #define CGEMM_DEFAULT_UNROLL_N 2
3182 #define ZGEMM_DEFAULT_UNROLL_M 2
3183 #define ZGEMM_DEFAULT_UNROLL_N 2
3185 #define SGEMM_DEFAULT_P 128
3186 #define DGEMM_DEFAULT_P 128
3187 #define CGEMM_DEFAULT_P 96
3188 #define ZGEMM_DEFAULT_P 64
3190 #define SGEMM_DEFAULT_Q 240
3191 #define DGEMM_DEFAULT_Q 120
3192 #define CGEMM_DEFAULT_Q 120
3193 #define ZGEMM_DEFAULT_Q 120
3195 #define SGEMM_DEFAULT_R 12288
3196 #define DGEMM_DEFAULT_R 8192
3197 #define CGEMM_DEFAULT_R 4096
3198 #define ZGEMM_DEFAULT_R 4096
3210 #define GEMM_DEFAULT_OFFSET_A 0
3211 #define GEMM_DEFAULT_OFFSET_B 0
3212 #define GEMM_DEFAULT_ALIGN 0x03fffUL
3214 #define SGEMM_DEFAULT_UNROLL_M 4
3215 #define SGEMM_DEFAULT_UNROLL_N 4
3217 #define DGEMM_DEFAULT_UNROLL_M 4
3218 #define DGEMM_DEFAULT_UNROLL_N 4
3220 #define CGEMM_DEFAULT_UNROLL_M 2
3221 #define CGEMM_DEFAULT_UNROLL_N 2
3223 #define ZGEMM_DEFAULT_UNROLL_M 2
3224 #define ZGEMM_DEFAULT_UNROLL_N 2
3226 #define SGEMM_DEFAULT_P 128
3227 #define DGEMM_DEFAULT_P 128
3228 #define CGEMM_DEFAULT_P 96
3229 #define ZGEMM_DEFAULT_P 64
3231 #define SGEMM_DEFAULT_Q 240
3232 #define DGEMM_DEFAULT_Q 120
3233 #define CGEMM_DEFAULT_Q 120
3234 #define ZGEMM_DEFAULT_Q 120
3236 #define SGEMM_DEFAULT_R 12288
3237 #define DGEMM_DEFAULT_R 8192
3238 #define CGEMM_DEFAULT_R 4096
3239 #define ZGEMM_DEFAULT_R 4096
3247 #if defined(ZARCH_GENERIC)
3251 #define GEMM_DEFAULT_OFFSET_A 0
3252 #define GEMM_DEFAULT_OFFSET_B 0
3253 #define GEMM_DEFAULT_ALIGN 0x03fffUL
3255 #define SGEMM_DEFAULT_UNROLL_M 2
3256 #define SGEMM_DEFAULT_UNROLL_N 2
3258 #define DGEMM_DEFAULT_UNROLL_M 2
3259 #define DGEMM_DEFAULT_UNROLL_N 2
3261 #define CGEMM_DEFAULT_UNROLL_M 2
3262 #define CGEMM_DEFAULT_UNROLL_N 2
3264 #define ZGEMM_DEFAULT_UNROLL_M 2
3265 #define ZGEMM_DEFAULT_UNROLL_N 2
3267 #define SGEMM_DEFAULT_P 128
3268 #define DGEMM_DEFAULT_P 128
3269 #define CGEMM_DEFAULT_P 96
3270 #define ZGEMM_DEFAULT_P 64
3272 #define SGEMM_DEFAULT_Q 240
3273 #define DGEMM_DEFAULT_Q 120
3274 #define CGEMM_DEFAULT_Q 120
3275 #define ZGEMM_DEFAULT_Q 120
3277 #define SGEMM_DEFAULT_R 12288
3278 #define DGEMM_DEFAULT_R 8192
3279 #define CGEMM_DEFAULT_R 4096
3280 #define ZGEMM_DEFAULT_R 4096
3290 #define GEMM_DEFAULT_OFFSET_A 0
3291 #define GEMM_DEFAULT_OFFSET_B 0
3292 #define GEMM_DEFAULT_ALIGN 0x03fffUL
3294 #define SGEMM_DEFAULT_UNROLL_M 8
3295 #define SGEMM_DEFAULT_UNROLL_N 4
3297 #define DGEMM_DEFAULT_UNROLL_M 8
3298 #define DGEMM_DEFAULT_UNROLL_N 4
3300 #define CGEMM_DEFAULT_UNROLL_M 4
3301 #define CGEMM_DEFAULT_UNROLL_N 4
3303 #define ZGEMM_DEFAULT_UNROLL_M 4
3304 #define ZGEMM_DEFAULT_UNROLL_N 4
3306 #define SGEMM_DEFAULT_P 456
3307 #define DGEMM_DEFAULT_P 320
3308 #define CGEMM_DEFAULT_P 480
3309 #define ZGEMM_DEFAULT_P 224
3311 #define SGEMM_DEFAULT_Q 488
3312 #define DGEMM_DEFAULT_Q 384
3313 #define CGEMM_DEFAULT_Q 128
3314 #define ZGEMM_DEFAULT_Q 352
3316 #define SGEMM_DEFAULT_R 8192
3317 #define DGEMM_DEFAULT_R 4096
3318 #define CGEMM_DEFAULT_R 4096
3319 #define ZGEMM_DEFAULT_R 2048
3330 #define GEMM_DEFAULT_OFFSET_A 0
3331 #define GEMM_DEFAULT_OFFSET_B 0
3332 #define GEMM_DEFAULT_ALIGN 0x03fffUL
3334 #define SGEMM_DEFAULT_UNROLL_M 16
3335 #define SGEMM_DEFAULT_UNROLL_N 4
3337 #define DGEMM_DEFAULT_UNROLL_M 8
3338 #define DGEMM_DEFAULT_UNROLL_N 4
3340 #define CGEMM_DEFAULT_UNROLL_M 4
3341 #define CGEMM_DEFAULT_UNROLL_N 4
3343 #define ZGEMM_DEFAULT_UNROLL_M 4
3344 #define ZGEMM_DEFAULT_UNROLL_N 4
3346 #define SGEMM_DEFAULT_P 480
3347 #define DGEMM_DEFAULT_P 320
3348 #define CGEMM_DEFAULT_P 480
3349 #define ZGEMM_DEFAULT_P 224
3351 #define SGEMM_DEFAULT_Q 512
3352 #define DGEMM_DEFAULT_Q 384
3353 #define CGEMM_DEFAULT_Q 128
3354 #define ZGEMM_DEFAULT_Q 352
3356 #define SGEMM_DEFAULT_R 8192
3357 #define DGEMM_DEFAULT_R 4096
3358 #define CGEMM_DEFAULT_R 4096
3359 #define ZGEMM_DEFAULT_R 2048
3372 #define GEMM_DEFAULT_OFFSET_A 0
3373 #define GEMM_DEFAULT_OFFSET_B 0
3374 #define GEMM_DEFAULT_ALIGN 0x0ffffUL
3376 #define SGEMM_DEFAULT_UNROLL_N 2
3377 #define DGEMM_DEFAULT_UNROLL_N 2
3378 #define QGEMM_DEFAULT_UNROLL_N 2
3379 #define CGEMM_DEFAULT_UNROLL_N 2
3380 #define ZGEMM_DEFAULT_UNROLL_N 2
3381 #define XGEMM_DEFAULT_UNROLL_N 1
3384 #define SGEMM_DEFAULT_UNROLL_M 2
3385 #define DGEMM_DEFAULT_UNROLL_M 2
3386 #define QGEMM_DEFAULT_UNROLL_M 2
3387 #define CGEMM_DEFAULT_UNROLL_M 2
3388 #define ZGEMM_DEFAULT_UNROLL_M 2
3389 #define XGEMM_DEFAULT_UNROLL_M 1
3391 #define SGEMM_DEFAULT_UNROLL_M 2
3392 #define DGEMM_DEFAULT_UNROLL_M 2
3393 #define QGEMM_DEFAULT_UNROLL_M 2
3394 #define CGEMM_DEFAULT_UNROLL_M 2
3395 #define ZGEMM_DEFAULT_UNROLL_M 2
3396 #define XGEMM_DEFAULT_UNROLL_M 1
3399 #define SGEMM_DEFAULT_P sgemm_p
3400 #define DGEMM_DEFAULT_P dgemm_p
3401 #define QGEMM_DEFAULT_P qgemm_p
3402 #define CGEMM_DEFAULT_P cgemm_p
3403 #define ZGEMM_DEFAULT_P zgemm_p
3404 #define XGEMM_DEFAULT_P xgemm_p
3406 #define SGEMM_DEFAULT_R sgemm_r
3407 #define DGEMM_DEFAULT_R dgemm_r
3408 #define QGEMM_DEFAULT_R qgemm_r
3409 #define CGEMM_DEFAULT_R cgemm_r
3410 #define ZGEMM_DEFAULT_R zgemm_r
3411 #define XGEMM_DEFAULT_R xgemm_r
3413 #define SGEMM_DEFAULT_Q 128
3414 #define DGEMM_DEFAULT_Q 128
3415 #define QGEMM_DEFAULT_Q 128
3416 #define CGEMM_DEFAULT_Q 128
3417 #define ZGEMM_DEFAULT_Q 128
3418 #define XGEMM_DEFAULT_Q 128
3424 #ifndef QGEMM_DEFAULT_UNROLL_M
3425 #define QGEMM_DEFAULT_UNROLL_M 2
3428 #ifndef QGEMM_DEFAULT_UNROLL_N
3429 #define QGEMM_DEFAULT_UNROLL_N 2
3432 #ifndef XGEMM_DEFAULT_UNROLL_M
3433 #define XGEMM_DEFAULT_UNROLL_M 2
3436 #ifndef XGEMM_DEFAULT_UNROLL_N
3437 #define XGEMM_DEFAULT_UNROLL_N 2
3441 #define SHUFPD_0 shufps $0x44,
3442 #define SHUFPD_1 shufps $0x4e,
3443 #define SHUFPD_2 shufps $0xe4,
3444 #define SHUFPD_3 shufps $0xee,
3448 #define SHUFPD_0 shufpd $0,
3452 #define SHUFPD_1 shufpd $1,
3456 #define SHUFPD_2 shufpd $2,
3460 #define SHUFPD_3 shufpd $3,
3464 #define SHUFPS_39 shufps $0x39,