1 /*****************************************************************************
2 Copyright (c) 2011-2014, The OpenBLAS Project
5 Redistribution and use in source and binary forms, with or without
6 modification, are permitted provided that the following conditions are
9 1. Redistributions of source code must retain the above copyright
10 notice, this list of conditions and the following disclaimer.
12 2. Redistributions in binary form must reproduce the above copyright
13 notice, this list of conditions and the following disclaimer in
14 the documentation and/or other materials provided with the
16 3. Neither the name of the OpenBLAS project nor the names of
17 its contributors may be used to endorse or promote products
18 derived from this software without specific prior written
21 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
25 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
27 SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28 CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
29 OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
30 USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 **********************************************************************************/
34 /*********************************************************************/
35 /* Copyright 2009, 2010 The University of Texas at Austin. */
36 /* All rights reserved. */
38 /* Redistribution and use in source and binary forms, with or */
39 /* without modification, are permitted provided that the following */
40 /* conditions are met: */
42 /* 1. Redistributions of source code must retain the above */
43 /* copyright notice, this list of conditions and the following */
46 /* 2. Redistributions in binary form must reproduce the above */
47 /* copyright notice, this list of conditions and the following */
48 /* disclaimer in the documentation and/or other materials */
49 /* provided with the distribution. */
51 /* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
52 /* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
53 /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
54 /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
55 /* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
56 /* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
57 /* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
58 /* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
59 /* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
60 /* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
61 /* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
62 /* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
63 /* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
64 /* POSSIBILITY OF SUCH DAMAGE. */
66 /* The views and conclusions contained in the software and */
67 /* documentation are those of the authors and should not be */
68 /* interpreted as representing official policies, either expressed */
69 /* or implied, of The University of Texas at Austin. */
70 /*********************************************************************/
75 #define SBGEMM_DEFAULT_UNROLL_N 4
76 #define SBGEMM_DEFAULT_UNROLL_M 8
77 #define SBGEMM_DEFAULT_UNROLL_MN 32
78 #define SBGEMM_DEFAULT_P 256
79 #define SBGEMM_DEFAULT_R 256
80 #define SBGEMM_DEFAULT_Q 256
86 #define GEMM_DEFAULT_OFFSET_A 64
87 #define GEMM_DEFAULT_OFFSET_B 256
88 #define GEMM_DEFAULT_ALIGN 0x01ffffUL
90 #define SGEMM_DEFAULT_UNROLL_N 4
91 #define DGEMM_DEFAULT_UNROLL_N 4
92 #define QGEMM_DEFAULT_UNROLL_N 2
93 #define CGEMM_DEFAULT_UNROLL_N 2
94 #define ZGEMM_DEFAULT_UNROLL_N 2
95 #define XGEMM_DEFAULT_UNROLL_N 1
98 #define SGEMM_DEFAULT_UNROLL_M 4
99 #define DGEMM_DEFAULT_UNROLL_M 2
100 #define QGEMM_DEFAULT_UNROLL_M 2
101 #define CGEMM_DEFAULT_UNROLL_M 2
102 #define ZGEMM_DEFAULT_UNROLL_M 1
103 #define XGEMM_DEFAULT_UNROLL_M 1
105 #define SGEMM_DEFAULT_UNROLL_M 8
106 #define DGEMM_DEFAULT_UNROLL_M 4
107 #define QGEMM_DEFAULT_UNROLL_M 2
108 #define CGEMM_DEFAULT_UNROLL_M 4
109 #define ZGEMM_DEFAULT_UNROLL_M 2
110 #define XGEMM_DEFAULT_UNROLL_M 1
113 #define SGEMM_DEFAULT_P sgemm_p
114 #define DGEMM_DEFAULT_P dgemm_p
115 #define QGEMM_DEFAULT_P qgemm_p
116 #define CGEMM_DEFAULT_P cgemm_p
117 #define ZGEMM_DEFAULT_P zgemm_p
118 #define XGEMM_DEFAULT_P xgemm_p
120 #define SGEMM_DEFAULT_R sgemm_r
121 #define DGEMM_DEFAULT_R dgemm_r
122 #define QGEMM_DEFAULT_R qgemm_r
123 #define CGEMM_DEFAULT_R cgemm_r
124 #define ZGEMM_DEFAULT_R zgemm_r
125 #define XGEMM_DEFAULT_R xgemm_r
129 #define SGEMM_DEFAULT_Q 248
130 #define DGEMM_DEFAULT_Q 248
131 #define QGEMM_DEFAULT_Q 248
132 #define CGEMM_DEFAULT_Q 248
133 #define ZGEMM_DEFAULT_Q 248
134 #define XGEMM_DEFAULT_Q 248
138 #define SGEMM_DEFAULT_Q 240
139 #define DGEMM_DEFAULT_Q 240
140 #define QGEMM_DEFAULT_Q 240
141 #define CGEMM_DEFAULT_Q 240
142 #define ZGEMM_DEFAULT_Q 240
143 #define XGEMM_DEFAULT_Q 240
149 #define HAVE_EXCLUSIVE_CACHE
153 #if defined(BARCELONA) || defined(SHANGHAI) || defined(BOBCAT)
158 #define GEMM_DEFAULT_OFFSET_A 64
159 #define GEMM_DEFAULT_OFFSET_B 832
160 #define GEMM_DEFAULT_ALIGN 0x0fffUL
162 #define SGEMM_DEFAULT_UNROLL_N 4
163 #define DGEMM_DEFAULT_UNROLL_N 4
164 #define QGEMM_DEFAULT_UNROLL_N 2
165 #define CGEMM_DEFAULT_UNROLL_N 2
166 #define ZGEMM_DEFAULT_UNROLL_N 2
167 #define XGEMM_DEFAULT_UNROLL_N 1
170 #define SGEMM_DEFAULT_UNROLL_M 4
171 #define DGEMM_DEFAULT_UNROLL_M 2
172 #define QGEMM_DEFAULT_UNROLL_M 2
173 #define CGEMM_DEFAULT_UNROLL_M 2
174 #define ZGEMM_DEFAULT_UNROLL_M 1
175 #define XGEMM_DEFAULT_UNROLL_M 1
177 #define SGEMM_DEFAULT_UNROLL_M 8
178 #define DGEMM_DEFAULT_UNROLL_M 4
179 #define QGEMM_DEFAULT_UNROLL_M 2
180 #define CGEMM_DEFAULT_UNROLL_M 4
181 #define ZGEMM_DEFAULT_UNROLL_M 2
182 #define XGEMM_DEFAULT_UNROLL_M 1
186 #define SGEMM_DEFAULT_P 496
187 #define DGEMM_DEFAULT_P 248
188 #define QGEMM_DEFAULT_P 124
189 #define CGEMM_DEFAULT_P 248
190 #define ZGEMM_DEFAULT_P 124
191 #define XGEMM_DEFAULT_P 62
193 #define SGEMM_DEFAULT_Q 248
194 #define DGEMM_DEFAULT_Q 248
195 #define QGEMM_DEFAULT_Q 248
196 #define CGEMM_DEFAULT_Q 248
197 #define ZGEMM_DEFAULT_Q 248
198 #define XGEMM_DEFAULT_Q 248
202 #define SGEMM_DEFAULT_P 448
203 #define DGEMM_DEFAULT_P 224
204 #define QGEMM_DEFAULT_P 112
205 #define CGEMM_DEFAULT_P 224
206 #define ZGEMM_DEFAULT_P 112
207 #define XGEMM_DEFAULT_P 56
209 #define SGEMM_DEFAULT_Q 224
210 #define DGEMM_DEFAULT_Q 224
211 #define QGEMM_DEFAULT_Q 224
212 #define CGEMM_DEFAULT_Q 224
213 #define ZGEMM_DEFAULT_Q 224
214 #define XGEMM_DEFAULT_Q 224
218 #define SGEMM_DEFAULT_R sgemm_r
219 #define QGEMM_DEFAULT_R qgemm_r
220 #define DGEMM_DEFAULT_R dgemm_r
221 #define CGEMM_DEFAULT_R cgemm_r
222 #define ZGEMM_DEFAULT_R zgemm_r
223 #define XGEMM_DEFAULT_R xgemm_r
226 #define HAVE_EXCLUSIVE_CACHE
228 #define GEMM_THREAD gemm_thread_mn
238 #define GEMM_DEFAULT_OFFSET_A 64
239 #define GEMM_DEFAULT_OFFSET_B 832
240 #define GEMM_DEFAULT_ALIGN 0x0fffUL
244 #define QGEMM_DEFAULT_UNROLL_N 2
245 #define CGEMM_DEFAULT_UNROLL_N 2
246 #define ZGEMM_DEFAULT_UNROLL_N 2
247 #define XGEMM_DEFAULT_UNROLL_N 1
250 #define SGEMM_DEFAULT_UNROLL_N 4
251 #define DGEMM_DEFAULT_UNROLL_N 4
252 #define SGEMM_DEFAULT_UNROLL_M 4
253 #define DGEMM_DEFAULT_UNROLL_M 2
254 #define QGEMM_DEFAULT_UNROLL_M 2
255 #define CGEMM_DEFAULT_UNROLL_M 2
256 #define ZGEMM_DEFAULT_UNROLL_M 1
257 #define XGEMM_DEFAULT_UNROLL_M 1
259 #define SGEMM_DEFAULT_UNROLL_N 2
260 #define DGEMM_DEFAULT_UNROLL_N 2
261 #define SGEMM_DEFAULT_UNROLL_M 16
262 #define DGEMM_DEFAULT_UNROLL_M 8
263 #define QGEMM_DEFAULT_UNROLL_M 2
264 #define CGEMM_DEFAULT_UNROLL_M 4
265 #define ZGEMM_DEFAULT_UNROLL_M 2
266 #define XGEMM_DEFAULT_UNROLL_M 1
267 #define CGEMM3M_DEFAULT_UNROLL_N 4
268 #define CGEMM3M_DEFAULT_UNROLL_M 8
269 #define ZGEMM3M_DEFAULT_UNROLL_N 4
270 #define ZGEMM3M_DEFAULT_UNROLL_M 4
272 #define DGEMM_DEFAULT_UNROLL_MN 16
273 #define GEMV_UNROLL 8
277 #if defined(ARCH_X86_64)
278 #define SGEMM_DEFAULT_P 768
279 #define DGEMM_DEFAULT_P 384
281 #define SGEMM_DEFAULT_P 448
282 #define DGEMM_DEFAULT_P 224
285 #define QGEMM_DEFAULT_P 112
286 #define CGEMM_DEFAULT_P 224
287 #define ZGEMM_DEFAULT_P 112
288 #define XGEMM_DEFAULT_P 56
290 #if defined(ARCH_X86_64)
291 #define SGEMM_DEFAULT_Q 168
292 #define DGEMM_DEFAULT_Q 168
294 #define SGEMM_DEFAULT_Q 224
295 #define DGEMM_DEFAULT_Q 224
298 #define QGEMM_DEFAULT_Q 224
299 #define CGEMM_DEFAULT_Q 224
300 #define ZGEMM_DEFAULT_Q 224
301 #define XGEMM_DEFAULT_Q 224
303 #define CGEMM3M_DEFAULT_P 448
304 #define ZGEMM3M_DEFAULT_P 224
305 #define XGEMM3M_DEFAULT_P 112
306 #define CGEMM3M_DEFAULT_Q 224
307 #define ZGEMM3M_DEFAULT_Q 224
308 #define XGEMM3M_DEFAULT_Q 224
309 #define CGEMM3M_DEFAULT_R 12288
310 #define ZGEMM3M_DEFAULT_R 12288
311 #define XGEMM3M_DEFAULT_R 12288
313 #define SGEMM_DEFAULT_R sgemm_r
314 #define QGEMM_DEFAULT_R qgemm_r
315 #define DGEMM_DEFAULT_R dgemm_r
316 #define CGEMM_DEFAULT_R cgemm_r
317 #define ZGEMM_DEFAULT_R zgemm_r
318 #define XGEMM_DEFAULT_R xgemm_r
321 #define HAVE_EXCLUSIVE_CACHE
323 #define GEMM_THREAD gemm_thread_mn
331 #define GEMM_DEFAULT_OFFSET_A 64
332 #define GEMM_DEFAULT_OFFSET_B 832
333 #define GEMM_DEFAULT_ALIGN 0x0fffUL
337 #define QGEMM_DEFAULT_UNROLL_N 2
338 #define CGEMM_DEFAULT_UNROLL_N 2
339 #define ZGEMM_DEFAULT_UNROLL_N 2
340 #define XGEMM_DEFAULT_UNROLL_N 1
343 #define SGEMM_DEFAULT_UNROLL_N 4
344 #define DGEMM_DEFAULT_UNROLL_N 4
345 #define SGEMM_DEFAULT_UNROLL_M 4
346 #define DGEMM_DEFAULT_UNROLL_M 2
347 #define QGEMM_DEFAULT_UNROLL_M 2
348 #define CGEMM_DEFAULT_UNROLL_M 2
349 #define ZGEMM_DEFAULT_UNROLL_M 1
350 #define XGEMM_DEFAULT_UNROLL_M 1
352 #define SGEMM_DEFAULT_UNROLL_N 2
353 #define DGEMM_DEFAULT_UNROLL_N 2
354 #define SGEMM_DEFAULT_UNROLL_M 16
355 #define DGEMM_DEFAULT_UNROLL_M 8
356 #define QGEMM_DEFAULT_UNROLL_M 2
357 #define CGEMM_DEFAULT_UNROLL_M 4
358 #define ZGEMM_DEFAULT_UNROLL_M 2
359 #define XGEMM_DEFAULT_UNROLL_M 1
360 #define CGEMM3M_DEFAULT_UNROLL_N 4
361 #define CGEMM3M_DEFAULT_UNROLL_M 8
362 #define ZGEMM3M_DEFAULT_UNROLL_N 4
363 #define ZGEMM3M_DEFAULT_UNROLL_M 4
364 #define GEMV_UNROLL 8
367 #if defined(ARCH_X86_64)
368 #define SGEMM_DEFAULT_P 768
369 #define DGEMM_DEFAULT_P 768
370 #define ZGEMM_DEFAULT_P 384
371 #define CGEMM_DEFAULT_P 768
373 #define SGEMM_DEFAULT_P 448
374 #define DGEMM_DEFAULT_P 480
375 #define ZGEMM_DEFAULT_P 112
376 #define CGEMM_DEFAULT_P 224
378 #define QGEMM_DEFAULT_P 112
379 #define XGEMM_DEFAULT_P 56
381 #if defined(ARCH_X86_64)
382 #define SGEMM_DEFAULT_Q 192
383 #define DGEMM_DEFAULT_Q 168
384 #define ZGEMM_DEFAULT_Q 168
385 #define CGEMM_DEFAULT_Q 168
387 #define SGEMM_DEFAULT_Q 224
388 #define DGEMM_DEFAULT_Q 224
389 #define ZGEMM_DEFAULT_Q 224
390 #define CGEMM_DEFAULT_Q 224
392 #define QGEMM_DEFAULT_Q 224
393 #define XGEMM_DEFAULT_Q 224
395 #define CGEMM3M_DEFAULT_P 448
396 #define ZGEMM3M_DEFAULT_P 224
397 #define XGEMM3M_DEFAULT_P 112
398 #define CGEMM3M_DEFAULT_Q 224
399 #define ZGEMM3M_DEFAULT_Q 224
400 #define XGEMM3M_DEFAULT_Q 224
401 #define CGEMM3M_DEFAULT_R 12288
402 #define ZGEMM3M_DEFAULT_R 12288
403 #define XGEMM3M_DEFAULT_R 12288
405 #define SGEMM_DEFAULT_R 12288
406 #define QGEMM_DEFAULT_R qgemm_r
407 #define DGEMM_DEFAULT_R 12288
408 #define CGEMM_DEFAULT_R cgemm_r
409 #define ZGEMM_DEFAULT_R zgemm_r
410 #define XGEMM_DEFAULT_R xgemm_r
413 #define HAVE_EXCLUSIVE_CACHE
415 #define GEMM_THREAD gemm_thread_mn
423 #define GEMM_DEFAULT_OFFSET_A 64
424 #define GEMM_DEFAULT_OFFSET_B 832
425 #define GEMM_DEFAULT_ALIGN 0x0fffUL
429 #define QGEMM_DEFAULT_UNROLL_N 2
430 #define CGEMM_DEFAULT_UNROLL_N 2
431 #define ZGEMM_DEFAULT_UNROLL_N 2
432 #define XGEMM_DEFAULT_UNROLL_N 1
435 #define SGEMM_DEFAULT_UNROLL_N 4
436 #define DGEMM_DEFAULT_UNROLL_N 4
437 #define SGEMM_DEFAULT_UNROLL_M 4
438 #define DGEMM_DEFAULT_UNROLL_M 2
439 #define QGEMM_DEFAULT_UNROLL_M 2
440 #define CGEMM_DEFAULT_UNROLL_M 2
441 #define ZGEMM_DEFAULT_UNROLL_M 1
442 #define XGEMM_DEFAULT_UNROLL_M 1
444 #define SGEMM_DEFAULT_UNROLL_N 2
445 #define DGEMM_DEFAULT_UNROLL_N 2
446 #define SGEMM_DEFAULT_UNROLL_M 16
447 #define DGEMM_DEFAULT_UNROLL_M 8
448 #define QGEMM_DEFAULT_UNROLL_M 2
449 #define CGEMM_DEFAULT_UNROLL_M 4
450 #define ZGEMM_DEFAULT_UNROLL_M 2
451 #define XGEMM_DEFAULT_UNROLL_M 1
452 #define CGEMM3M_DEFAULT_UNROLL_N 4
453 #define CGEMM3M_DEFAULT_UNROLL_M 8
454 #define ZGEMM3M_DEFAULT_UNROLL_N 4
455 #define ZGEMM3M_DEFAULT_UNROLL_M 4
456 #define GEMV_UNROLL 8
459 #if defined(ARCH_X86_64)
460 #define SGEMM_DEFAULT_P 768
461 #define DGEMM_DEFAULT_P 576
462 #define ZGEMM_DEFAULT_P 288
463 #define CGEMM_DEFAULT_P 576
465 #define SGEMM_DEFAULT_P 448
466 #define DGEMM_DEFAULT_P 480
467 #define ZGEMM_DEFAULT_P 112
468 #define CGEMM_DEFAULT_P 224
470 #define QGEMM_DEFAULT_P 112
471 #define XGEMM_DEFAULT_P 56
473 #if defined(ARCH_X86_64)
474 #define SGEMM_DEFAULT_Q 192
475 #define DGEMM_DEFAULT_Q 160
476 #define ZGEMM_DEFAULT_Q 160
477 #define CGEMM_DEFAULT_Q 160
479 #define SGEMM_DEFAULT_Q 224
480 #define DGEMM_DEFAULT_Q 224
481 #define ZGEMM_DEFAULT_Q 224
482 #define CGEMM_DEFAULT_Q 224
484 #define QGEMM_DEFAULT_Q 224
485 #define XGEMM_DEFAULT_Q 224
487 #define CGEMM3M_DEFAULT_P 448
488 #define ZGEMM3M_DEFAULT_P 224
489 #define XGEMM3M_DEFAULT_P 112
490 #define CGEMM3M_DEFAULT_Q 224
491 #define ZGEMM3M_DEFAULT_Q 224
492 #define XGEMM3M_DEFAULT_Q 224
493 #define CGEMM3M_DEFAULT_R 12288
494 #define ZGEMM3M_DEFAULT_R 12288
495 #define XGEMM3M_DEFAULT_R 12288
497 #define SGEMM_DEFAULT_R 12288
498 #define QGEMM_DEFAULT_R qgemm_r
499 #define DGEMM_DEFAULT_R 12288
500 #define CGEMM_DEFAULT_R cgemm_r
501 #define ZGEMM_DEFAULT_R zgemm_r
502 #define XGEMM_DEFAULT_R xgemm_r
505 #define HAVE_EXCLUSIVE_CACHE
507 #define GEMM_THREAD gemm_thread_mn
516 #define GEMM_DEFAULT_OFFSET_A 64
517 #define GEMM_DEFAULT_OFFSET_B 832
518 #define GEMM_DEFAULT_ALIGN 0x0fffUL
522 #define QGEMM_DEFAULT_UNROLL_N 2
523 #define CGEMM_DEFAULT_UNROLL_N 2
524 #define ZGEMM_DEFAULT_UNROLL_N 2
525 #define XGEMM_DEFAULT_UNROLL_N 1
528 #define SGEMM_DEFAULT_UNROLL_N 4
529 #define DGEMM_DEFAULT_UNROLL_N 4
530 #define SGEMM_DEFAULT_UNROLL_M 4
531 #define DGEMM_DEFAULT_UNROLL_M 2
532 #define QGEMM_DEFAULT_UNROLL_M 2
533 #define CGEMM_DEFAULT_UNROLL_M 2
534 #define ZGEMM_DEFAULT_UNROLL_M 1
535 #define XGEMM_DEFAULT_UNROLL_M 1
537 #define SGEMM_DEFAULT_UNROLL_N 2
538 #define DGEMM_DEFAULT_UNROLL_N 2
539 #define SGEMM_DEFAULT_UNROLL_M 16
540 #define DGEMM_DEFAULT_UNROLL_M 8
541 #define QGEMM_DEFAULT_UNROLL_M 2
542 #define CGEMM_DEFAULT_UNROLL_M 4
543 #define ZGEMM_DEFAULT_UNROLL_M 2
544 #define XGEMM_DEFAULT_UNROLL_M 1
545 #define CGEMM3M_DEFAULT_UNROLL_N 4
546 #define CGEMM3M_DEFAULT_UNROLL_M 8
547 #define ZGEMM3M_DEFAULT_UNROLL_N 4
548 #define ZGEMM3M_DEFAULT_UNROLL_M 4
549 #define GEMV_UNROLL 8
552 #if defined(ARCH_X86_64)
553 #define SGEMM_DEFAULT_P 768
554 #define DGEMM_DEFAULT_P 576
555 #define ZGEMM_DEFAULT_P 288
556 #define CGEMM_DEFAULT_P 576
558 #define SGEMM_DEFAULT_P 448
559 #define DGEMM_DEFAULT_P 480
560 #define ZGEMM_DEFAULT_P 112
561 #define CGEMM_DEFAULT_P 224
563 #define QGEMM_DEFAULT_P 112
564 #define XGEMM_DEFAULT_P 56
566 #if defined(ARCH_X86_64)
567 #define SGEMM_DEFAULT_Q 192
568 #define DGEMM_DEFAULT_Q 160
569 #define ZGEMM_DEFAULT_Q 160
570 #define CGEMM_DEFAULT_Q 160
572 #define SGEMM_DEFAULT_Q 224
573 #define DGEMM_DEFAULT_Q 224
574 #define ZGEMM_DEFAULT_Q 224
575 #define CGEMM_DEFAULT_Q 224
577 #define QGEMM_DEFAULT_Q 224
578 #define XGEMM_DEFAULT_Q 224
580 #define CGEMM3M_DEFAULT_P 448
581 #define ZGEMM3M_DEFAULT_P 224
582 #define XGEMM3M_DEFAULT_P 112
583 #define CGEMM3M_DEFAULT_Q 224
584 #define ZGEMM3M_DEFAULT_Q 224
585 #define XGEMM3M_DEFAULT_Q 224
586 #define CGEMM3M_DEFAULT_R 12288
587 #define ZGEMM3M_DEFAULT_R 12288
588 #define XGEMM3M_DEFAULT_R 12288
590 #define SGEMM_DEFAULT_R 12288
591 #define QGEMM_DEFAULT_R qgemm_r
592 #define DGEMM_DEFAULT_R 12288
593 #define CGEMM_DEFAULT_R cgemm_r
594 #define ZGEMM_DEFAULT_R zgemm_r
595 #define XGEMM_DEFAULT_R xgemm_r
598 #define HAVE_EXCLUSIVE_CACHE
600 #define GEMM_THREAD gemm_thread_mn
608 #define GEMM_DEFAULT_OFFSET_A 0
609 #define GEMM_DEFAULT_OFFSET_B 0
610 #define GEMM_DEFAULT_ALIGN 0x03fffUL
614 #define SWITCH_RATIO 16
618 #define SGEMM_DEFAULT_UNROLL_M 4
619 #define DGEMM_DEFAULT_UNROLL_M 2
620 #define QGEMM_DEFAULT_UNROLL_M 2
621 #define CGEMM_DEFAULT_UNROLL_M 2
622 #define ZGEMM_DEFAULT_UNROLL_M 1
623 #define XGEMM_DEFAULT_UNROLL_M 1
625 #define SGEMM_DEFAULT_UNROLL_N 4
626 #define DGEMM_DEFAULT_UNROLL_N 4
627 #define QGEMM_DEFAULT_UNROLL_N 2
628 #define CGEMM_DEFAULT_UNROLL_N 2
629 #define ZGEMM_DEFAULT_UNROLL_N 2
630 #define XGEMM_DEFAULT_UNROLL_N 1
634 #define SGEMM_DEFAULT_UNROLL_M 8
635 #define DGEMM_DEFAULT_UNROLL_M 4
636 #define QGEMM_DEFAULT_UNROLL_M 2
637 #define CGEMM_DEFAULT_UNROLL_M 8
638 #define ZGEMM_DEFAULT_UNROLL_M 4
639 #define XGEMM_DEFAULT_UNROLL_M 1
641 #define SGEMM_DEFAULT_UNROLL_N 4
642 #define DGEMM_DEFAULT_UNROLL_N 8
643 #define QGEMM_DEFAULT_UNROLL_N 2
644 #define CGEMM_DEFAULT_UNROLL_N 2
645 #define ZGEMM_DEFAULT_UNROLL_N 2
646 #define XGEMM_DEFAULT_UNROLL_N 1
648 #define SGEMM_DEFAULT_UNROLL_MN 32
649 #define DGEMM_DEFAULT_UNROLL_MN 32
654 #define SGEMM_DEFAULT_P 512
655 #define SGEMM_DEFAULT_R sgemm_r
656 #define DGEMM_DEFAULT_P 512
657 #define DGEMM_DEFAULT_R dgemm_r
658 #define QGEMM_DEFAULT_P 504
659 #define QGEMM_DEFAULT_R qgemm_r
660 #define CGEMM_DEFAULT_P 128
661 #define CGEMM_DEFAULT_R 1024
662 #define ZGEMM_DEFAULT_P 512
663 #define ZGEMM_DEFAULT_R zgemm_r
664 #define XGEMM_DEFAULT_P 252
665 #define XGEMM_DEFAULT_R xgemm_r
666 #define SGEMM_DEFAULT_Q 256
667 #define DGEMM_DEFAULT_Q 256
668 #define QGEMM_DEFAULT_Q 128
669 #define CGEMM_DEFAULT_Q 256
670 #define ZGEMM_DEFAULT_Q 192
671 #define XGEMM_DEFAULT_Q 128
675 #define SGEMM_DEFAULT_P 320
676 #define DGEMM_DEFAULT_P 512
677 #define CGEMM_DEFAULT_P 256
678 #define ZGEMM_DEFAULT_P 192
681 #define SGEMM_DEFAULT_Q 320
682 #define DGEMM_DEFAULT_Q 128
684 #define SGEMM_DEFAULT_Q 320
685 #define DGEMM_DEFAULT_Q 256
687 #define CGEMM_DEFAULT_Q 256
688 #define ZGEMM_DEFAULT_Q 192
690 #define SGEMM_DEFAULT_R sgemm_r
691 #define DGEMM_DEFAULT_R 13824
692 #define CGEMM_DEFAULT_R cgemm_r
693 #define ZGEMM_DEFAULT_R zgemm_r
695 #define QGEMM_DEFAULT_Q 128
696 #define QGEMM_DEFAULT_P 504
697 #define QGEMM_DEFAULT_R qgemm_r
698 #define XGEMM_DEFAULT_P 252
699 #define XGEMM_DEFAULT_R xgemm_r
700 #define XGEMM_DEFAULT_Q 128
702 #define CGEMM3M_DEFAULT_UNROLL_N 4
703 #define CGEMM3M_DEFAULT_UNROLL_M 8
704 #define ZGEMM3M_DEFAULT_UNROLL_N 4
705 #define ZGEMM3M_DEFAULT_UNROLL_M 4
707 #define CGEMM3M_DEFAULT_P 320
708 #define ZGEMM3M_DEFAULT_P 256
709 #define XGEMM3M_DEFAULT_P 112
710 #define CGEMM3M_DEFAULT_Q 320
711 #define ZGEMM3M_DEFAULT_Q 256
712 #define XGEMM3M_DEFAULT_Q 224
713 #define CGEMM3M_DEFAULT_R 12288
714 #define ZGEMM3M_DEFAULT_R 12288
715 #define XGEMM3M_DEFAULT_R 12288
726 #define GEMM_DEFAULT_OFFSET_A 0
727 #define GEMM_DEFAULT_OFFSET_B 384
728 #define GEMM_DEFAULT_ALIGN 0x0ffffUL
730 #define SGEMM_DEFAULT_UNROLL_N 4
731 #define DGEMM_DEFAULT_UNROLL_N 4
732 #define QGEMM_DEFAULT_UNROLL_N 2
733 #define CGEMM_DEFAULT_UNROLL_N 2
734 #define ZGEMM_DEFAULT_UNROLL_N 2
735 #define XGEMM_DEFAULT_UNROLL_N 1
737 #define SGEMM_DEFAULT_UNROLL_M 2
738 #define DGEMM_DEFAULT_UNROLL_M 1
739 #define QGEMM_DEFAULT_UNROLL_M 2
740 #define CGEMM_DEFAULT_UNROLL_M 1
741 #define ZGEMM_DEFAULT_UNROLL_M 1
742 #define XGEMM_DEFAULT_UNROLL_M 1
744 #define SGEMM_DEFAULT_R sgemm_r
745 #define DGEMM_DEFAULT_R dgemm_r
746 #define QGEMM_DEFAULT_R qgemm_r
747 #define CGEMM_DEFAULT_R cgemm_r
748 #define ZGEMM_DEFAULT_R zgemm_r
749 #define XGEMM_DEFAULT_R xgemm_r
751 #define SGEMM_DEFAULT_P 208
752 #define DGEMM_DEFAULT_P 104
753 #define QGEMM_DEFAULT_P 56
754 #define CGEMM_DEFAULT_P 104
755 #define ZGEMM_DEFAULT_P 56
756 #define XGEMM_DEFAULT_P 28
758 #define SGEMM_DEFAULT_Q 208
759 #define DGEMM_DEFAULT_Q 208
760 #define QGEMM_DEFAULT_Q 208
761 #define CGEMM_DEFAULT_Q 208
762 #define ZGEMM_DEFAULT_Q 208
763 #define XGEMM_DEFAULT_Q 208
766 #define HAVE_EXCLUSIVE_CACHE
774 #define GEMM_DEFAULT_OFFSET_A 0
775 #define GEMM_DEFAULT_OFFSET_B 256
776 #define GEMM_DEFAULT_ALIGN 0x0ffffUL
778 #define SGEMM_DEFAULT_UNROLL_N 4
779 #define DGEMM_DEFAULT_UNROLL_N 4
780 #define QGEMM_DEFAULT_UNROLL_N 2
781 #define CGEMM_DEFAULT_UNROLL_N 2
782 #define ZGEMM_DEFAULT_UNROLL_N 2
783 #define XGEMM_DEFAULT_UNROLL_N 1
785 #define SGEMM_DEFAULT_UNROLL_M 2
786 #define DGEMM_DEFAULT_UNROLL_M 1
787 #define QGEMM_DEFAULT_UNROLL_M 2
788 #define CGEMM_DEFAULT_UNROLL_M 1
789 #define ZGEMM_DEFAULT_UNROLL_M 1
790 #define XGEMM_DEFAULT_UNROLL_M 1
792 #define SGEMM_DEFAULT_R sgemm_r
793 #define DGEMM_DEFAULT_R dgemm_r
794 #define QGEMM_DEFAULT_R qgemm_r
795 #define CGEMM_DEFAULT_R cgemm_r
796 #define ZGEMM_DEFAULT_R zgemm_r
797 #define XGEMM_DEFAULT_R xgemm_r
799 #define SGEMM_DEFAULT_P 128
800 #define DGEMM_DEFAULT_P 128
801 #define QGEMM_DEFAULT_P 128
802 #define CGEMM_DEFAULT_P 128
803 #define ZGEMM_DEFAULT_P 128
804 #define XGEMM_DEFAULT_P 128
806 #define SGEMM_DEFAULT_Q 512
807 #define DGEMM_DEFAULT_Q 256
808 #define QGEMM_DEFAULT_Q 256
809 #define CGEMM_DEFAULT_Q 256
810 #define ZGEMM_DEFAULT_Q 128
811 #define XGEMM_DEFAULT_Q 128
821 #define GEMM_DEFAULT_OFFSET_A 64
822 #define GEMM_DEFAULT_OFFSET_B 256
823 #define GEMM_DEFAULT_ALIGN 0x01ffffUL
826 #define SGEMM_DEFAULT_UNROLL_N 4
827 #define DGEMM_DEFAULT_UNROLL_N 4
828 #define QGEMM_DEFAULT_UNROLL_N 2
829 #define CGEMM_DEFAULT_UNROLL_N 2
830 #define ZGEMM_DEFAULT_UNROLL_N 2
831 #define XGEMM_DEFAULT_UNROLL_N 1
833 #define SGEMM_DEFAULT_UNROLL_M 4
834 #define DGEMM_DEFAULT_UNROLL_M 2
835 #define QGEMM_DEFAULT_UNROLL_M 2
836 #define CGEMM_DEFAULT_UNROLL_M 2
837 #define ZGEMM_DEFAULT_UNROLL_M 1
838 #define XGEMM_DEFAULT_UNROLL_M 1
840 #define SGEMM_DEFAULT_UNROLL_N 8
841 #define DGEMM_DEFAULT_UNROLL_N 4
842 #define QGEMM_DEFAULT_UNROLL_N 2
843 #define CGEMM_DEFAULT_UNROLL_N 4
844 #define ZGEMM_DEFAULT_UNROLL_N 2
845 #define XGEMM_DEFAULT_UNROLL_N 1
847 #define SGEMM_DEFAULT_UNROLL_M 4
848 #define DGEMM_DEFAULT_UNROLL_M 4
849 #define QGEMM_DEFAULT_UNROLL_M 2
850 #define CGEMM_DEFAULT_UNROLL_M 2
851 #define ZGEMM_DEFAULT_UNROLL_M 2
852 #define XGEMM_DEFAULT_UNROLL_M 1
855 #define SGEMM_DEFAULT_P 288
856 #define DGEMM_DEFAULT_P 288
857 #define QGEMM_DEFAULT_P 288
858 #define CGEMM_DEFAULT_P 288
859 #define ZGEMM_DEFAULT_P 288
860 #define XGEMM_DEFAULT_P 288
862 #define SGEMM_DEFAULT_R sgemm_r
863 #define DGEMM_DEFAULT_R dgemm_r
864 #define QGEMM_DEFAULT_R qgemm_r
865 #define CGEMM_DEFAULT_R cgemm_r
866 #define ZGEMM_DEFAULT_R zgemm_r
867 #define XGEMM_DEFAULT_R xgemm_r
869 #define SGEMM_DEFAULT_Q 256
870 #define DGEMM_DEFAULT_Q 128
871 #define QGEMM_DEFAULT_Q 64
872 #define CGEMM_DEFAULT_Q 128
873 #define ZGEMM_DEFAULT_Q 64
874 #define XGEMM_DEFAULT_Q 32
877 #define HAVE_EXCLUSIVE_CACHE
881 #if defined(PENTIUM) || defined(PENTIUM2) || defined(PENTIUM3)
890 #define GEMM_DEFAULT_OFFSET_A 0
891 #define GEMM_DEFAULT_OFFSET_B 0
892 #define GEMM_DEFAULT_ALIGN 0x0ffffUL
895 #define SGEMM_DEFAULT_UNROLL_M 8
896 #define CGEMM_DEFAULT_UNROLL_M 4
898 #define SGEMM_DEFAULT_UNROLL_M 4
899 #define CGEMM_DEFAULT_UNROLL_M 2
901 #define DGEMM_DEFAULT_UNROLL_M 2
902 #define SGEMM_DEFAULT_UNROLL_N 2
903 #define DGEMM_DEFAULT_UNROLL_N 2
904 #define QGEMM_DEFAULT_UNROLL_M 2
905 #define QGEMM_DEFAULT_UNROLL_N 2
906 #define CGEMM_DEFAULT_UNROLL_N 1
907 #define ZGEMM_DEFAULT_UNROLL_M 1
908 #define ZGEMM_DEFAULT_UNROLL_N 1
909 #define XGEMM_DEFAULT_UNROLL_M 1
910 #define XGEMM_DEFAULT_UNROLL_N 1
912 #define SGEMM_DEFAULT_P sgemm_p
913 #define SGEMM_DEFAULT_Q 256
914 #define SGEMM_DEFAULT_R sgemm_r
916 #define DGEMM_DEFAULT_P dgemm_p
917 #define DGEMM_DEFAULT_Q 256
918 #define DGEMM_DEFAULT_R dgemm_r
920 #define QGEMM_DEFAULT_P qgemm_p
921 #define QGEMM_DEFAULT_Q 256
922 #define QGEMM_DEFAULT_R qgemm_r
924 #define CGEMM_DEFAULT_P cgemm_p
925 #define CGEMM_DEFAULT_Q 256
926 #define CGEMM_DEFAULT_R cgemm_r
928 #define ZGEMM_DEFAULT_P zgemm_p
929 #define ZGEMM_DEFAULT_Q 256
930 #define ZGEMM_DEFAULT_R zgemm_r
932 #define XGEMM_DEFAULT_P xgemm_p
933 #define XGEMM_DEFAULT_Q 256
934 #define XGEMM_DEFAULT_R xgemm_r
945 #define GEMM_DEFAULT_OFFSET_A 0
946 #define GEMM_DEFAULT_OFFSET_B 0
947 #define GEMM_DEFAULT_ALIGN 0x0ffffUL
950 #define SGEMM_DEFAULT_UNROLL_M 4
951 #define SGEMM_DEFAULT_UNROLL_N 4
952 #define DGEMM_DEFAULT_UNROLL_M 2
953 #define DGEMM_DEFAULT_UNROLL_N 4
954 #define QGEMM_DEFAULT_UNROLL_M 2
955 #define QGEMM_DEFAULT_UNROLL_N 2
956 #define CGEMM_DEFAULT_UNROLL_M 2
957 #define CGEMM_DEFAULT_UNROLL_N 2
958 #define ZGEMM_DEFAULT_UNROLL_M 1
959 #define ZGEMM_DEFAULT_UNROLL_N 2
960 #define XGEMM_DEFAULT_UNROLL_M 1
961 #define XGEMM_DEFAULT_UNROLL_N 1
963 #define SGEMM_DEFAULT_UNROLL_M 8
964 #define SGEMM_DEFAULT_UNROLL_N 2
965 #define DGEMM_DEFAULT_UNROLL_M 2
966 #define DGEMM_DEFAULT_UNROLL_N 2
967 #define QGEMM_DEFAULT_UNROLL_M 2
968 #define QGEMM_DEFAULT_UNROLL_N 2
969 #define CGEMM_DEFAULT_UNROLL_M 4
970 #define CGEMM_DEFAULT_UNROLL_N 1
971 #define ZGEMM_DEFAULT_UNROLL_M 1
972 #define ZGEMM_DEFAULT_UNROLL_N 1
973 #define XGEMM_DEFAULT_UNROLL_M 1
974 #define XGEMM_DEFAULT_UNROLL_N 1
978 #define SGEMM_DEFAULT_P sgemm_p
979 #define SGEMM_DEFAULT_Q 256
980 #define SGEMM_DEFAULT_R sgemm_r
982 #define DGEMM_DEFAULT_P dgemm_p
983 #define DGEMM_DEFAULT_Q 256
984 #define DGEMM_DEFAULT_R dgemm_r
986 #define QGEMM_DEFAULT_P qgemm_p
987 #define QGEMM_DEFAULT_Q 256
988 #define QGEMM_DEFAULT_R qgemm_r
990 #define CGEMM_DEFAULT_P cgemm_p
991 #define CGEMM_DEFAULT_Q 256
992 #define CGEMM_DEFAULT_R cgemm_r
994 #define ZGEMM_DEFAULT_P zgemm_p
995 #define ZGEMM_DEFAULT_Q 256
996 #define ZGEMM_DEFAULT_R zgemm_r
998 #define XGEMM_DEFAULT_P xgemm_p
999 #define XGEMM_DEFAULT_Q 256
1000 #define XGEMM_DEFAULT_R xgemm_r
1005 #ifdef CORE_NORTHWOOD
1010 #define GEMM_DEFAULT_OFFSET_A 0
1011 #define GEMM_DEFAULT_OFFSET_B 32
1013 #define GEMM_DEFAULT_ALIGN 0x0ffffUL
1017 #define SGEMM_DEFAULT_UNROLL_M 8
1018 #define DGEMM_DEFAULT_UNROLL_M 4
1019 #define QGEMM_DEFAULT_UNROLL_M 2
1020 #define CGEMM_DEFAULT_UNROLL_M 4
1021 #define ZGEMM_DEFAULT_UNROLL_M 2
1022 #define XGEMM_DEFAULT_UNROLL_M 1
1024 #define SGEMM_DEFAULT_UNROLL_N 2
1025 #define DGEMM_DEFAULT_UNROLL_N 2
1026 #define QGEMM_DEFAULT_UNROLL_N 2
1027 #define CGEMM_DEFAULT_UNROLL_N 1
1028 #define ZGEMM_DEFAULT_UNROLL_N 1
1029 #define XGEMM_DEFAULT_UNROLL_N 1
1031 #define SGEMM_DEFAULT_P sgemm_p
1032 #define SGEMM_DEFAULT_R sgemm_r
1034 #define DGEMM_DEFAULT_P dgemm_p
1035 #define DGEMM_DEFAULT_R dgemm_r
1037 #define QGEMM_DEFAULT_P qgemm_p
1038 #define QGEMM_DEFAULT_R qgemm_r
1040 #define CGEMM_DEFAULT_P cgemm_p
1041 #define CGEMM_DEFAULT_R cgemm_r
1043 #define ZGEMM_DEFAULT_P zgemm_p
1044 #define ZGEMM_DEFAULT_R zgemm_r
1046 #define XGEMM_DEFAULT_P xgemm_p
1047 #define XGEMM_DEFAULT_R xgemm_r
1049 #define SGEMM_DEFAULT_Q 128
1050 #define DGEMM_DEFAULT_Q 128
1051 #define QGEMM_DEFAULT_Q 128
1052 #define CGEMM_DEFAULT_Q 128
1053 #define ZGEMM_DEFAULT_Q 128
1054 #define XGEMM_DEFAULT_Q 128
1057 #ifdef CORE_PRESCOTT
1063 #define GEMM_DEFAULT_OFFSET_A 128
1064 #define GEMM_DEFAULT_OFFSET_B 192
1066 #define GEMM_DEFAULT_OFFSET_A 0
1067 #define GEMM_DEFAULT_OFFSET_B 256
1070 #define GEMM_DEFAULT_ALIGN 0x0ffffUL
1075 #define SGEMM_DEFAULT_UNROLL_M 4
1076 #define DGEMM_DEFAULT_UNROLL_M 2
1077 #define QGEMM_DEFAULT_UNROLL_M 2
1078 #define CGEMM_DEFAULT_UNROLL_M 2
1079 #define ZGEMM_DEFAULT_UNROLL_M 1
1080 #define XGEMM_DEFAULT_UNROLL_M 1
1082 #define SGEMM_DEFAULT_UNROLL_M 8
1083 #define DGEMM_DEFAULT_UNROLL_M 4
1084 #define QGEMM_DEFAULT_UNROLL_M 2
1085 #define CGEMM_DEFAULT_UNROLL_M 4
1086 #define ZGEMM_DEFAULT_UNROLL_M 2
1087 #define XGEMM_DEFAULT_UNROLL_M 1
1090 #define SGEMM_DEFAULT_UNROLL_N 4
1091 #define DGEMM_DEFAULT_UNROLL_N 4
1092 #define QGEMM_DEFAULT_UNROLL_N 2
1093 #define CGEMM_DEFAULT_UNROLL_N 2
1094 #define ZGEMM_DEFAULT_UNROLL_N 2
1095 #define XGEMM_DEFAULT_UNROLL_N 1
1097 #define SGEMM_DEFAULT_P sgemm_p
1098 #define SGEMM_DEFAULT_R sgemm_r
1100 #define DGEMM_DEFAULT_P dgemm_p
1101 #define DGEMM_DEFAULT_R dgemm_r
1103 #define QGEMM_DEFAULT_P qgemm_p
1104 #define QGEMM_DEFAULT_R qgemm_r
1106 #define CGEMM_DEFAULT_P cgemm_p
1107 #define CGEMM_DEFAULT_R cgemm_r
1109 #define ZGEMM_DEFAULT_P zgemm_p
1110 #define ZGEMM_DEFAULT_R zgemm_r
1112 #define XGEMM_DEFAULT_P xgemm_p
1113 #define XGEMM_DEFAULT_R xgemm_r
1115 #define SGEMM_DEFAULT_Q 128
1116 #define DGEMM_DEFAULT_Q 128
1117 #define QGEMM_DEFAULT_Q 128
1118 #define CGEMM_DEFAULT_Q 128
1119 #define ZGEMM_DEFAULT_Q 128
1120 #define XGEMM_DEFAULT_Q 128
1128 #define GEMM_DEFAULT_OFFSET_A 448
1129 #define GEMM_DEFAULT_OFFSET_B 128
1130 #define GEMM_DEFAULT_ALIGN 0x03fffUL
1134 #define SWITCH_RATIO 4
1137 #define SGEMM_DEFAULT_UNROLL_M 8
1138 #define DGEMM_DEFAULT_UNROLL_M 4
1139 #define QGEMM_DEFAULT_UNROLL_M 2
1140 #define CGEMM_DEFAULT_UNROLL_M 4
1141 #define ZGEMM_DEFAULT_UNROLL_M 2
1142 #define XGEMM_DEFAULT_UNROLL_M 1
1144 #define SGEMM_DEFAULT_UNROLL_N 2
1145 #define DGEMM_DEFAULT_UNROLL_N 2
1146 #define QGEMM_DEFAULT_UNROLL_N 2
1147 #define CGEMM_DEFAULT_UNROLL_N 1
1148 #define ZGEMM_DEFAULT_UNROLL_N 1
1149 #define XGEMM_DEFAULT_UNROLL_N 1
1151 #define MASK(a, b) ((((a) + (b) - 1) / (b)) * (b))
1154 #define SGEMM_DEFAULT_UNROLL_M 8
1155 #define DGEMM_DEFAULT_UNROLL_M 4
1156 #define QGEMM_DEFAULT_UNROLL_M 2
1157 #define CGEMM_DEFAULT_UNROLL_M 4
1158 #define ZGEMM_DEFAULT_UNROLL_M 2
1159 #define XGEMM_DEFAULT_UNROLL_M 1
1161 #define SGEMM_DEFAULT_UNROLL_N 4
1162 #define DGEMM_DEFAULT_UNROLL_N 4
1163 #define QGEMM_DEFAULT_UNROLL_N 2
1164 #define CGEMM_DEFAULT_UNROLL_N 2
1165 #define ZGEMM_DEFAULT_UNROLL_N 2
1166 #define XGEMM_DEFAULT_UNROLL_N 1
1169 #define SGEMM_DEFAULT_P sgemm_p
1170 #define SGEMM_DEFAULT_R sgemm_r
1172 #define DGEMM_DEFAULT_P dgemm_p
1173 #define DGEMM_DEFAULT_R dgemm_r
1175 #define QGEMM_DEFAULT_P qgemm_p
1176 #define QGEMM_DEFAULT_R qgemm_r
1178 #define CGEMM_DEFAULT_P cgemm_p
1179 #define CGEMM_DEFAULT_R cgemm_r
1181 #define ZGEMM_DEFAULT_P zgemm_p
1182 #define ZGEMM_DEFAULT_R zgemm_r
1184 #define XGEMM_DEFAULT_P xgemm_p
1185 #define XGEMM_DEFAULT_R xgemm_r
1187 #define SGEMM_DEFAULT_Q 256
1188 #define DGEMM_DEFAULT_Q 256
1189 #define QGEMM_DEFAULT_Q 256
1190 #define CGEMM_DEFAULT_Q 256
1191 #define ZGEMM_DEFAULT_Q 256
1192 #define XGEMM_DEFAULT_Q 256
1201 #define GEMM_DEFAULT_OFFSET_A 128
1202 #define GEMM_DEFAULT_OFFSET_B 0
1203 #define GEMM_DEFAULT_ALIGN 0x03fffUL
1207 #define SWITCH_RATIO 4
1210 #define SGEMM_DEFAULT_UNROLL_M 4
1211 #define DGEMM_DEFAULT_UNROLL_M 2
1212 #define QGEMM_DEFAULT_UNROLL_M 2
1213 #define CGEMM_DEFAULT_UNROLL_M 2
1214 #define ZGEMM_DEFAULT_UNROLL_M 1
1215 #define XGEMM_DEFAULT_UNROLL_M 1
1217 #define SGEMM_DEFAULT_UNROLL_N 4
1218 #define DGEMM_DEFAULT_UNROLL_N 4
1219 #define QGEMM_DEFAULT_UNROLL_N 2
1220 #define CGEMM_DEFAULT_UNROLL_N 2
1221 #define ZGEMM_DEFAULT_UNROLL_N 2
1222 #define XGEMM_DEFAULT_UNROLL_N 1
1224 #define SGEMM_DEFAULT_UNROLL_M 8
1225 #define DGEMM_DEFAULT_UNROLL_M 4
1226 #define QGEMM_DEFAULT_UNROLL_M 2
1227 #define CGEMM_DEFAULT_UNROLL_M 4
1228 #define ZGEMM_DEFAULT_UNROLL_M 2
1229 #define XGEMM_DEFAULT_UNROLL_M 1
1231 #define SGEMM_DEFAULT_UNROLL_N 4
1232 #define DGEMM_DEFAULT_UNROLL_N 4
1233 #define QGEMM_DEFAULT_UNROLL_N 2
1234 #define CGEMM_DEFAULT_UNROLL_N 2
1235 #define ZGEMM_DEFAULT_UNROLL_N 2
1236 #define XGEMM_DEFAULT_UNROLL_N 1
1239 #define SGEMM_DEFAULT_P sgemm_p
1240 #define SGEMM_DEFAULT_R sgemm_r
1242 #define DGEMM_DEFAULT_P dgemm_p
1243 #define DGEMM_DEFAULT_R dgemm_r
1245 #define QGEMM_DEFAULT_P qgemm_p
1246 #define QGEMM_DEFAULT_R qgemm_r
1248 #define CGEMM_DEFAULT_P cgemm_p
1249 #define CGEMM_DEFAULT_R cgemm_r
1251 #define ZGEMM_DEFAULT_P zgemm_p
1252 #define ZGEMM_DEFAULT_R zgemm_r
1254 #define XGEMM_DEFAULT_P xgemm_p
1255 #define XGEMM_DEFAULT_R xgemm_r
1257 #define SGEMM_DEFAULT_Q 512
1258 #define DGEMM_DEFAULT_Q 256
1259 #define QGEMM_DEFAULT_Q 128
1260 #define CGEMM_DEFAULT_Q 512
1261 #define ZGEMM_DEFAULT_Q 256
1262 #define XGEMM_DEFAULT_Q 128
1264 #define GETRF_FACTOR 0.75
1272 #define GEMM_DEFAULT_OFFSET_A 128
1273 #define GEMM_DEFAULT_OFFSET_B 0
1274 #define GEMM_DEFAULT_ALIGN 0x03fffUL
1278 #define SWITCH_RATIO 4
1281 #define SGEMM_DEFAULT_UNROLL_M 4
1282 #define DGEMM_DEFAULT_UNROLL_M 2
1283 #define QGEMM_DEFAULT_UNROLL_M 2
1284 #define CGEMM_DEFAULT_UNROLL_M 2
1285 #define ZGEMM_DEFAULT_UNROLL_M 1
1286 #define XGEMM_DEFAULT_UNROLL_M 1
1288 #define SGEMM_DEFAULT_UNROLL_N 4
1289 #define DGEMM_DEFAULT_UNROLL_N 4
1290 #define QGEMM_DEFAULT_UNROLL_N 2
1291 #define CGEMM_DEFAULT_UNROLL_N 2
1292 #define ZGEMM_DEFAULT_UNROLL_N 2
1293 #define XGEMM_DEFAULT_UNROLL_N 1
1295 #define SGEMM_DEFAULT_UNROLL_M 8
1296 #define DGEMM_DEFAULT_UNROLL_M 4
1297 #define QGEMM_DEFAULT_UNROLL_M 2
1298 #define CGEMM_DEFAULT_UNROLL_M 4
1299 #define ZGEMM_DEFAULT_UNROLL_M 2
1300 #define XGEMM_DEFAULT_UNROLL_M 1
1302 #define SGEMM_DEFAULT_UNROLL_N 4
1303 #define DGEMM_DEFAULT_UNROLL_N 4
1304 #define QGEMM_DEFAULT_UNROLL_N 2
1305 #define CGEMM_DEFAULT_UNROLL_N 2
1306 #define ZGEMM_DEFAULT_UNROLL_N 2
1307 #define XGEMM_DEFAULT_UNROLL_N 1
1310 #define SGEMM_DEFAULT_P sgemm_p
1311 #define SGEMM_DEFAULT_R sgemm_r
1313 #define DGEMM_DEFAULT_P dgemm_p
1314 #define DGEMM_DEFAULT_R dgemm_r
1316 #define QGEMM_DEFAULT_P qgemm_p
1317 #define QGEMM_DEFAULT_R qgemm_r
1319 #define CGEMM_DEFAULT_P cgemm_p
1320 #define CGEMM_DEFAULT_R cgemm_r
1322 #define ZGEMM_DEFAULT_P zgemm_p
1323 #define ZGEMM_DEFAULT_R zgemm_r
1325 #define XGEMM_DEFAULT_P xgemm_p
1326 #define XGEMM_DEFAULT_R xgemm_r
1328 #define SGEMM_DEFAULT_Q 768
1329 #define DGEMM_DEFAULT_Q 384
1330 #define QGEMM_DEFAULT_Q 192
1331 #define CGEMM_DEFAULT_Q 768
1332 #define ZGEMM_DEFAULT_Q 384
1333 #define XGEMM_DEFAULT_Q 192
1335 #define GETRF_FACTOR 0.75
1336 #define GEMM_THREAD gemm_thread_mn
1344 #define GEMM_DEFAULT_OFFSET_A 32
1345 #define GEMM_DEFAULT_OFFSET_B 0
1346 #define GEMM_DEFAULT_ALIGN 0x03fffUL
1350 #define SWITCH_RATIO 4
1353 #define SGEMM_DEFAULT_UNROLL_M 4
1354 #define DGEMM_DEFAULT_UNROLL_M 2
1355 #define QGEMM_DEFAULT_UNROLL_M 2
1356 #define CGEMM_DEFAULT_UNROLL_M 2
1357 #define ZGEMM_DEFAULT_UNROLL_M 1
1358 #define XGEMM_DEFAULT_UNROLL_M 1
1360 #define SGEMM_DEFAULT_UNROLL_N 4
1361 #define DGEMM_DEFAULT_UNROLL_N 4
1362 #define QGEMM_DEFAULT_UNROLL_N 2
1363 #define CGEMM_DEFAULT_UNROLL_N 2
1364 #define ZGEMM_DEFAULT_UNROLL_N 2
1365 #define XGEMM_DEFAULT_UNROLL_N 1
1367 #define SGEMM_DEFAULT_UNROLL_M 4
1368 #define DGEMM_DEFAULT_UNROLL_M 2
1369 #define QGEMM_DEFAULT_UNROLL_M 2
1370 #define CGEMM_DEFAULT_UNROLL_M 2
1371 #define ZGEMM_DEFAULT_UNROLL_M 1
1372 #define XGEMM_DEFAULT_UNROLL_M 1
1374 #define SGEMM_DEFAULT_UNROLL_N 8
1375 #define DGEMM_DEFAULT_UNROLL_N 8
1376 #define QGEMM_DEFAULT_UNROLL_N 2
1377 #define CGEMM_DEFAULT_UNROLL_N 4
1378 #define ZGEMM_DEFAULT_UNROLL_N 4
1379 #define XGEMM_DEFAULT_UNROLL_N 1
1382 #define SGEMM_DEFAULT_P 504
1383 #define SGEMM_DEFAULT_R sgemm_r
1385 #define DGEMM_DEFAULT_P 504
1386 #define DGEMM_DEFAULT_R dgemm_r
1388 #define QGEMM_DEFAULT_P 504
1389 #define QGEMM_DEFAULT_R qgemm_r
1391 #define CGEMM_DEFAULT_P 252
1392 #define CGEMM_DEFAULT_R cgemm_r
1394 #define ZGEMM_DEFAULT_P 252
1395 #define ZGEMM_DEFAULT_R zgemm_r
1397 #define XGEMM_DEFAULT_P 252
1398 #define XGEMM_DEFAULT_R xgemm_r
1400 #define SGEMM_DEFAULT_Q 512
1401 #define DGEMM_DEFAULT_Q 256
1402 #define QGEMM_DEFAULT_Q 128
1403 #define CGEMM_DEFAULT_Q 512
1404 #define ZGEMM_DEFAULT_Q 256
1405 #define XGEMM_DEFAULT_Q 128
1407 #define GETRF_FACTOR 0.72
1417 #define GEMM_DEFAULT_OFFSET_A 0
1418 #define GEMM_DEFAULT_OFFSET_B 0
1419 #define GEMM_DEFAULT_ALIGN 0x03fffUL
1423 #define SWITCH_RATIO 4
1426 #define SGEMM_DEFAULT_UNROLL_M 4
1427 #define DGEMM_DEFAULT_UNROLL_M 2
1428 #define QGEMM_DEFAULT_UNROLL_M 2
1429 #define CGEMM_DEFAULT_UNROLL_M 2
1430 #define ZGEMM_DEFAULT_UNROLL_M 1
1431 #define XGEMM_DEFAULT_UNROLL_M 1
1433 #define SGEMM_DEFAULT_UNROLL_N 4
1434 #define DGEMM_DEFAULT_UNROLL_N 4
1435 #define QGEMM_DEFAULT_UNROLL_N 2
1436 #define CGEMM_DEFAULT_UNROLL_N 2
1437 #define ZGEMM_DEFAULT_UNROLL_N 2
1438 #define XGEMM_DEFAULT_UNROLL_N 1
1440 #define SGEMM_DEFAULT_UNROLL_M 16
1441 #define DGEMM_DEFAULT_UNROLL_M 8
1442 #define QGEMM_DEFAULT_UNROLL_M 2
1443 #define CGEMM_DEFAULT_UNROLL_M 8
1444 #define ZGEMM_DEFAULT_UNROLL_M 1
1445 #define XGEMM_DEFAULT_UNROLL_M 1
1447 #define SGEMM_DEFAULT_UNROLL_N 4
1448 #define DGEMM_DEFAULT_UNROLL_N 4
1449 #define QGEMM_DEFAULT_UNROLL_N 2
1450 #define CGEMM_DEFAULT_UNROLL_N 2
1451 #define ZGEMM_DEFAULT_UNROLL_N 4
1452 #define XGEMM_DEFAULT_UNROLL_N 1
1455 #define SGEMM_DEFAULT_P 768
1456 #define SGEMM_DEFAULT_R sgemm_r
1457 //#define SGEMM_DEFAULT_R 1024
1459 #define DGEMM_DEFAULT_P 512
1460 #define DGEMM_DEFAULT_R dgemm_r
1461 //#define DGEMM_DEFAULT_R 1024
1463 #define QGEMM_DEFAULT_P 504
1464 #define QGEMM_DEFAULT_R qgemm_r
1466 #define CGEMM_DEFAULT_P 768
1467 #define CGEMM_DEFAULT_R cgemm_r
1468 //#define CGEMM_DEFAULT_R 1024
1470 #define ZGEMM_DEFAULT_P 512
1471 #define ZGEMM_DEFAULT_R zgemm_r
1472 //#define ZGEMM_DEFAULT_R 1024
1474 #define XGEMM_DEFAULT_P 252
1475 #define XGEMM_DEFAULT_R xgemm_r
1477 #define SGEMM_DEFAULT_Q 384
1478 #define DGEMM_DEFAULT_Q 256
1479 #define QGEMM_DEFAULT_Q 128
1480 #define CGEMM_DEFAULT_Q 512
1481 #define ZGEMM_DEFAULT_Q 192
1482 #define XGEMM_DEFAULT_Q 128
1484 #define CGEMM3M_DEFAULT_UNROLL_N 8
1485 #define CGEMM3M_DEFAULT_UNROLL_M 4
1486 #define ZGEMM3M_DEFAULT_UNROLL_N 8
1487 #define ZGEMM3M_DEFAULT_UNROLL_M 2
1489 #define CGEMM3M_DEFAULT_P 448
1490 #define ZGEMM3M_DEFAULT_P 224
1491 #define XGEMM3M_DEFAULT_P 112
1492 #define CGEMM3M_DEFAULT_Q 224
1493 #define ZGEMM3M_DEFAULT_Q 224
1494 #define XGEMM3M_DEFAULT_Q 224
1495 #define CGEMM3M_DEFAULT_R 12288
1496 #define ZGEMM3M_DEFAULT_R 12288
1497 #define XGEMM3M_DEFAULT_R 12288
1501 #define GETRF_FACTOR 0.72
1510 #define GEMM_DEFAULT_OFFSET_A 0
1511 #define GEMM_DEFAULT_OFFSET_B 0
1512 #define GEMM_DEFAULT_ALIGN 0x03fffUL
1516 #if defined(XDOUBLE) || defined(DOUBLE)
1517 #define SWITCH_RATIO 4
1518 #define GEMM_PREFERED_SIZE 4
1520 #define SWITCH_RATIO 8
1521 #define GEMM_PREFERED_SIZE 8
1526 #define SGEMM_DEFAULT_UNROLL_M 4
1527 #define DGEMM_DEFAULT_UNROLL_M 2
1528 #define QGEMM_DEFAULT_UNROLL_M 2
1529 #define CGEMM_DEFAULT_UNROLL_M 2
1530 #define ZGEMM_DEFAULT_UNROLL_M 1
1531 #define XGEMM_DEFAULT_UNROLL_M 1
1533 #define SGEMM_DEFAULT_UNROLL_N 4
1534 #define DGEMM_DEFAULT_UNROLL_N 4
1535 #define QGEMM_DEFAULT_UNROLL_N 2
1536 #define CGEMM_DEFAULT_UNROLL_N 2
1537 #define ZGEMM_DEFAULT_UNROLL_N 2
1538 #define XGEMM_DEFAULT_UNROLL_N 1
1542 #define SGEMM_DEFAULT_UNROLL_M 8
1543 #define DGEMM_DEFAULT_UNROLL_M 4
1544 #define QGEMM_DEFAULT_UNROLL_M 2
1545 #define CGEMM_DEFAULT_UNROLL_M 8
1546 #define ZGEMM_DEFAULT_UNROLL_M 4
1547 #define XGEMM_DEFAULT_UNROLL_M 1
1549 #define SGEMM_DEFAULT_UNROLL_N 4
1550 #define DGEMM_DEFAULT_UNROLL_N 8
1551 #define QGEMM_DEFAULT_UNROLL_N 2
1552 #define CGEMM_DEFAULT_UNROLL_N 2
1553 #define ZGEMM_DEFAULT_UNROLL_N 2
1554 #define XGEMM_DEFAULT_UNROLL_N 1
1556 #define SGEMM_DEFAULT_UNROLL_MN 32
1557 #define DGEMM_DEFAULT_UNROLL_MN 32
1562 #define SGEMM_DEFAULT_P 512
1563 #define SGEMM_DEFAULT_R sgemm_r
1564 #define DGEMM_DEFAULT_P 512
1565 #define DGEMM_DEFAULT_R dgemm_r
1566 #define QGEMM_DEFAULT_P 504
1567 #define QGEMM_DEFAULT_R qgemm_r
1568 #define CGEMM_DEFAULT_P 128
1569 #define CGEMM_DEFAULT_R 1024
1570 #define ZGEMM_DEFAULT_P 512
1571 #define ZGEMM_DEFAULT_R zgemm_r
1572 #define XGEMM_DEFAULT_P 252
1573 #define XGEMM_DEFAULT_R xgemm_r
1574 #define SGEMM_DEFAULT_Q 256
1575 #define DGEMM_DEFAULT_Q 256
1576 #define QGEMM_DEFAULT_Q 128
1577 #define CGEMM_DEFAULT_Q 256
1578 #define ZGEMM_DEFAULT_Q 192
1579 #define XGEMM_DEFAULT_Q 128
1583 #define SGEMM_DEFAULT_P 320
1584 #define DGEMM_DEFAULT_P 512
1585 #define CGEMM_DEFAULT_P 256
1586 #define ZGEMM_DEFAULT_P 192
1589 #define SGEMM_DEFAULT_Q 320
1590 #define DGEMM_DEFAULT_Q 128
1592 #define SGEMM_DEFAULT_Q 320
1593 #define DGEMM_DEFAULT_Q 256
1595 #define CGEMM_DEFAULT_Q 256
1596 #define ZGEMM_DEFAULT_Q 192
1598 #define SGEMM_DEFAULT_R sgemm_r
1599 #define DGEMM_DEFAULT_R 13824
1600 #define CGEMM_DEFAULT_R cgemm_r
1601 #define ZGEMM_DEFAULT_R zgemm_r
1603 #define QGEMM_DEFAULT_Q 128
1604 #define QGEMM_DEFAULT_P 504
1605 #define QGEMM_DEFAULT_R qgemm_r
1606 #define XGEMM_DEFAULT_P 252
1607 #define XGEMM_DEFAULT_R xgemm_r
1608 #define XGEMM_DEFAULT_Q 128
1610 #define CGEMM3M_DEFAULT_UNROLL_N 4
1611 #define CGEMM3M_DEFAULT_UNROLL_M 8
1612 #define ZGEMM3M_DEFAULT_UNROLL_N 4
1613 #define ZGEMM3M_DEFAULT_UNROLL_M 4
1615 #define CGEMM3M_DEFAULT_P 320
1616 #define ZGEMM3M_DEFAULT_P 256
1617 #define XGEMM3M_DEFAULT_P 112
1618 #define CGEMM3M_DEFAULT_Q 320
1619 #define ZGEMM3M_DEFAULT_Q 256
1620 #define XGEMM3M_DEFAULT_Q 224
1621 #define CGEMM3M_DEFAULT_R 12288
1622 #define ZGEMM3M_DEFAULT_R 12288
1623 #define XGEMM3M_DEFAULT_R 12288
1635 #define GEMM_DEFAULT_OFFSET_A 0
1636 #define GEMM_DEFAULT_OFFSET_B 0
1637 #define GEMM_DEFAULT_ALIGN 0x03fffUL
1641 #if defined(XDOUBLE) || defined(DOUBLE)
1642 #define SWITCH_RATIO 8
1643 #define GEMM_PREFERED_SIZE 8
1645 #define SWITCH_RATIO 16
1646 #define GEMM_PREFERED_SIZE 16
1648 #define USE_SGEMM_KERNEL_DIRECT 1
1652 #define SGEMM_DEFAULT_UNROLL_M 4
1653 #define DGEMM_DEFAULT_UNROLL_M 2
1654 #define QGEMM_DEFAULT_UNROLL_M 2
1655 #define CGEMM_DEFAULT_UNROLL_M 2
1656 #define ZGEMM_DEFAULT_UNROLL_M 1
1657 #define XGEMM_DEFAULT_UNROLL_M 1
1659 #define SGEMM_DEFAULT_UNROLL_N 4
1660 #define DGEMM_DEFAULT_UNROLL_N 4
1661 #define QGEMM_DEFAULT_UNROLL_N 2
1662 #define CGEMM_DEFAULT_UNROLL_N 2
1663 #define ZGEMM_DEFAULT_UNROLL_N 2
1664 #define XGEMM_DEFAULT_UNROLL_N 1
1668 #define SGEMM_DEFAULT_UNROLL_M 16
1669 #define DGEMM_DEFAULT_UNROLL_M 16
1670 #define QGEMM_DEFAULT_UNROLL_M 2
1671 #define CGEMM_DEFAULT_UNROLL_M 8
1672 #define ZGEMM_DEFAULT_UNROLL_M 4
1673 #define XGEMM_DEFAULT_UNROLL_M 1
1675 #define SGEMM_DEFAULT_UNROLL_N 4
1676 #define DGEMM_DEFAULT_UNROLL_N 2
1677 #define QGEMM_DEFAULT_UNROLL_N 2
1678 #define CGEMM_DEFAULT_UNROLL_N 2
1679 #define ZGEMM_DEFAULT_UNROLL_N 2
1680 #define XGEMM_DEFAULT_UNROLL_N 1
1682 #define SGEMM_DEFAULT_UNROLL_MN 32
1683 #define DGEMM_DEFAULT_UNROLL_MN 32
1688 #define SGEMM_DEFAULT_P 512
1689 #define SGEMM_DEFAULT_R sgemm_r
1690 #define DGEMM_DEFAULT_P 512
1691 #define DGEMM_DEFAULT_R dgemm_r
1692 #define QGEMM_DEFAULT_P 504
1693 #define QGEMM_DEFAULT_R qgemm_r
1694 #define CGEMM_DEFAULT_P 128
1695 #define CGEMM_DEFAULT_R 1024
1696 #define ZGEMM_DEFAULT_P 512
1697 #define ZGEMM_DEFAULT_R zgemm_r
1698 #define XGEMM_DEFAULT_P 252
1699 #define XGEMM_DEFAULT_R xgemm_r
1700 #define SGEMM_DEFAULT_Q 256
1701 #define DGEMM_DEFAULT_Q 256
1702 #define QGEMM_DEFAULT_Q 128
1703 #define CGEMM_DEFAULT_Q 256
1704 #define ZGEMM_DEFAULT_Q 192
1705 #define XGEMM_DEFAULT_Q 128
1709 #define SGEMM_DEFAULT_P 448
1710 #define DGEMM_DEFAULT_P 192
1711 #define CGEMM_DEFAULT_P 384
1712 #define ZGEMM_DEFAULT_P 256
1714 #define SGEMM_DEFAULT_Q 448
1715 #define DGEMM_DEFAULT_Q 384
1716 #define CGEMM_DEFAULT_Q 192
1717 #define ZGEMM_DEFAULT_Q 128
1719 #define SGEMM_DEFAULT_R sgemm_r
1720 #define DGEMM_DEFAULT_R 8640
1721 #define CGEMM_DEFAULT_R cgemm_r
1722 #define ZGEMM_DEFAULT_R zgemm_r
1724 #define QGEMM_DEFAULT_Q 128
1725 #define QGEMM_DEFAULT_P 504
1726 #define QGEMM_DEFAULT_R qgemm_r
1727 #define XGEMM_DEFAULT_P 252
1728 #define XGEMM_DEFAULT_R xgemm_r
1729 #define XGEMM_DEFAULT_Q 128
1731 #define CGEMM3M_DEFAULT_UNROLL_N 4
1732 #define CGEMM3M_DEFAULT_UNROLL_M 8
1733 #define ZGEMM3M_DEFAULT_UNROLL_N 4
1734 #define ZGEMM3M_DEFAULT_UNROLL_M 4
1736 #define CGEMM3M_DEFAULT_P 320
1737 #define ZGEMM3M_DEFAULT_P 256
1738 #define XGEMM3M_DEFAULT_P 112
1739 #define CGEMM3M_DEFAULT_Q 320
1740 #define ZGEMM3M_DEFAULT_Q 256
1741 #define XGEMM3M_DEFAULT_Q 224
1742 #define CGEMM3M_DEFAULT_R 12288
1743 #define ZGEMM3M_DEFAULT_R 12288
1744 #define XGEMM3M_DEFAULT_R 12288
1756 #define GEMM_DEFAULT_OFFSET_A 0
1757 #define GEMM_DEFAULT_OFFSET_B 0
1758 #define GEMM_DEFAULT_ALIGN 0x03fffUL
1762 #if defined(XDOUBLE) || defined(DOUBLE)
1763 #define SWITCH_RATIO 8
1764 #define GEMM_PREFERED_SIZE 8
1766 #define SWITCH_RATIO 16
1767 #define GEMM_PREFERED_SIZE 16
1769 #define USE_SGEMM_KERNEL_DIRECT 1
1773 #define SGEMM_DEFAULT_UNROLL_M 4
1774 #define DGEMM_DEFAULT_UNROLL_M 2
1775 #define QGEMM_DEFAULT_UNROLL_M 2
1776 #define CGEMM_DEFAULT_UNROLL_M 2
1777 #define ZGEMM_DEFAULT_UNROLL_M 1
1778 #define XGEMM_DEFAULT_UNROLL_M 1
1780 #define SGEMM_DEFAULT_UNROLL_N 4
1781 #define DGEMM_DEFAULT_UNROLL_N 4
1782 #define QGEMM_DEFAULT_UNROLL_N 2
1783 #define CGEMM_DEFAULT_UNROLL_N 2
1784 #define ZGEMM_DEFAULT_UNROLL_N 2
1785 #define XGEMM_DEFAULT_UNROLL_N 1
1789 #define SGEMM_DEFAULT_UNROLL_M 16
1790 #define DGEMM_DEFAULT_UNROLL_M 16
1791 #define QGEMM_DEFAULT_UNROLL_M 2
1792 #define CGEMM_DEFAULT_UNROLL_M 8
1793 #define ZGEMM_DEFAULT_UNROLL_M 4
1794 #define XGEMM_DEFAULT_UNROLL_M 1
1796 #define SGEMM_DEFAULT_UNROLL_N 4
1797 #define DGEMM_DEFAULT_UNROLL_N 2
1798 #define QGEMM_DEFAULT_UNROLL_N 2
1799 #define CGEMM_DEFAULT_UNROLL_N 2
1800 #define ZGEMM_DEFAULT_UNROLL_N 2
1801 #define XGEMM_DEFAULT_UNROLL_N 1
1803 #define SGEMM_DEFAULT_UNROLL_MN 32
1804 #define DGEMM_DEFAULT_UNROLL_MN 32
1809 #define SGEMM_DEFAULT_P 512
1810 #define SGEMM_DEFAULT_R sgemm_r
1811 #define DGEMM_DEFAULT_P 512
1812 #define DGEMM_DEFAULT_R dgemm_r
1813 #define QGEMM_DEFAULT_P 504
1814 #define QGEMM_DEFAULT_R qgemm_r
1815 #define CGEMM_DEFAULT_P 128
1816 #define CGEMM_DEFAULT_R 1024
1817 #define ZGEMM_DEFAULT_P 512
1818 #define ZGEMM_DEFAULT_R zgemm_r
1819 #define XGEMM_DEFAULT_P 252
1820 #define XGEMM_DEFAULT_R xgemm_r
1821 #define SGEMM_DEFAULT_Q 256
1822 #define DGEMM_DEFAULT_Q 256
1823 #define QGEMM_DEFAULT_Q 128
1824 #define CGEMM_DEFAULT_Q 256
1825 #define ZGEMM_DEFAULT_Q 192
1826 #define XGEMM_DEFAULT_Q 128
1830 #define SGEMM_DEFAULT_P 640
1831 #define DGEMM_DEFAULT_P 192
1832 #define CGEMM_DEFAULT_P 384
1833 #define ZGEMM_DEFAULT_P 256
1835 #define SGEMM_DEFAULT_Q 320
1836 #define DGEMM_DEFAULT_Q 384
1837 #define CGEMM_DEFAULT_Q 192
1838 #define ZGEMM_DEFAULT_Q 128
1840 #define SGEMM_DEFAULT_R sgemm_r
1841 #define DGEMM_DEFAULT_R 8640
1842 #define CGEMM_DEFAULT_R cgemm_r
1843 #define ZGEMM_DEFAULT_R zgemm_r
1845 #define QGEMM_DEFAULT_Q 128
1846 #define QGEMM_DEFAULT_P 504
1847 #define QGEMM_DEFAULT_R qgemm_r
1848 #define XGEMM_DEFAULT_P 252
1849 #define XGEMM_DEFAULT_R xgemm_r
1850 #define XGEMM_DEFAULT_Q 128
1852 #define CGEMM3M_DEFAULT_UNROLL_N 4
1853 #define CGEMM3M_DEFAULT_UNROLL_M 8
1854 #define ZGEMM3M_DEFAULT_UNROLL_N 4
1855 #define ZGEMM3M_DEFAULT_UNROLL_M 4
1857 #define CGEMM3M_DEFAULT_P 320
1858 #define ZGEMM3M_DEFAULT_P 256
1859 #define XGEMM3M_DEFAULT_P 112
1860 #define CGEMM3M_DEFAULT_Q 320
1861 #define ZGEMM3M_DEFAULT_Q 256
1862 #define XGEMM3M_DEFAULT_Q 224
1863 #define CGEMM3M_DEFAULT_R 12288
1864 #define ZGEMM3M_DEFAULT_R 12288
1865 #define XGEMM3M_DEFAULT_R 12288
1876 #define GEMM_DEFAULT_OFFSET_A 64
1877 #define GEMM_DEFAULT_OFFSET_B 0
1878 #define GEMM_DEFAULT_ALIGN 0x0ffffUL
1883 #define SGEMM_DEFAULT_UNROLL_M 4
1884 #define DGEMM_DEFAULT_UNROLL_M 2
1885 #define QGEMM_DEFAULT_UNROLL_M 2
1886 #define CGEMM_DEFAULT_UNROLL_M 2
1887 #define ZGEMM_DEFAULT_UNROLL_M 1
1888 #define XGEMM_DEFAULT_UNROLL_M 1
1890 #define SGEMM_DEFAULT_UNROLL_M 8
1891 #define DGEMM_DEFAULT_UNROLL_M 4
1892 #define QGEMM_DEFAULT_UNROLL_M 2
1893 #define CGEMM_DEFAULT_UNROLL_M 4
1894 #define ZGEMM_DEFAULT_UNROLL_M 2
1895 #define XGEMM_DEFAULT_UNROLL_M 1
1898 #define SGEMM_DEFAULT_UNROLL_N 4
1899 #define DGEMM_DEFAULT_UNROLL_N 2
1900 #define QGEMM_DEFAULT_UNROLL_N 2
1901 #define CGEMM_DEFAULT_UNROLL_N 2
1902 #define ZGEMM_DEFAULT_UNROLL_N 1
1903 #define XGEMM_DEFAULT_UNROLL_N 1
1905 #define SGEMM_DEFAULT_P sgemm_p
1906 #define SGEMM_DEFAULT_R sgemm_r
1908 #define DGEMM_DEFAULT_P dgemm_p
1909 #define DGEMM_DEFAULT_R dgemm_r
1911 #define QGEMM_DEFAULT_P qgemm_p
1912 #define QGEMM_DEFAULT_R qgemm_r
1914 #define CGEMM_DEFAULT_P cgemm_p
1915 #define CGEMM_DEFAULT_R cgemm_r
1917 #define ZGEMM_DEFAULT_P zgemm_p
1918 #define ZGEMM_DEFAULT_R zgemm_r
1920 #define XGEMM_DEFAULT_P xgemm_p
1921 #define XGEMM_DEFAULT_R xgemm_r
1923 #define SGEMM_DEFAULT_Q 256
1924 #define DGEMM_DEFAULT_Q 256
1925 #define QGEMM_DEFAULT_Q 256
1926 #define CGEMM_DEFAULT_Q 256
1927 #define ZGEMM_DEFAULT_Q 256
1928 #define XGEMM_DEFAULT_Q 256
1938 #define GEMM_DEFAULT_OFFSET_A 0
1939 #define GEMM_DEFAULT_OFFSET_B 128
1940 #define GEMM_DEFAULT_ALIGN 0x03fffUL
1942 #define SGEMM_DEFAULT_UNROLL_M 8
1943 #define SGEMM_DEFAULT_UNROLL_N 8
1944 #define DGEMM_DEFAULT_UNROLL_M 8
1945 #define DGEMM_DEFAULT_UNROLL_N 8
1946 #define QGEMM_DEFAULT_UNROLL_M 8
1947 #define QGEMM_DEFAULT_UNROLL_N 8
1948 #define CGEMM_DEFAULT_UNROLL_M 4
1949 #define CGEMM_DEFAULT_UNROLL_N 4
1950 #define ZGEMM_DEFAULT_UNROLL_M 4
1951 #define ZGEMM_DEFAULT_UNROLL_N 4
1952 #define XGEMM_DEFAULT_UNROLL_M 4
1953 #define XGEMM_DEFAULT_UNROLL_N 4
1955 #define SGEMM_DEFAULT_P sgemm_p
1956 #define DGEMM_DEFAULT_P dgemm_p
1957 #define QGEMM_DEFAULT_P qgemm_p
1958 #define CGEMM_DEFAULT_P cgemm_p
1959 #define ZGEMM_DEFAULT_P zgemm_p
1960 #define XGEMM_DEFAULT_P xgemm_p
1962 #define SGEMM_DEFAULT_Q 1024
1963 #define DGEMM_DEFAULT_Q 1024
1964 #define QGEMM_DEFAULT_Q 1024
1965 #define CGEMM_DEFAULT_Q 1024
1966 #define ZGEMM_DEFAULT_Q 1024
1967 #define XGEMM_DEFAULT_Q 1024
1969 #define SGEMM_DEFAULT_R sgemm_r
1970 #define DGEMM_DEFAULT_R dgemm_r
1971 #define QGEMM_DEFAULT_R qgemm_r
1972 #define CGEMM_DEFAULT_R cgemm_r
1973 #define ZGEMM_DEFAULT_R zgemm_r
1974 #define XGEMM_DEFAULT_R xgemm_r
1978 #define GETRF_FACTOR 0.65
1982 #if defined(EV4) || defined(EV5) || defined(EV6)
1992 #define GEMM_DEFAULT_OFFSET_A 512
1993 #define GEMM_DEFAULT_OFFSET_B 512
1994 #define GEMM_DEFAULT_ALIGN 0x0ffffUL
1996 #define SGEMM_DEFAULT_UNROLL_M 4
1997 #define SGEMM_DEFAULT_UNROLL_N 4
1998 #define DGEMM_DEFAULT_UNROLL_M 4
1999 #define DGEMM_DEFAULT_UNROLL_N 4
2000 #define CGEMM_DEFAULT_UNROLL_M 2
2001 #define CGEMM_DEFAULT_UNROLL_N 2
2002 #define ZGEMM_DEFAULT_UNROLL_M 2
2003 #define ZGEMM_DEFAULT_UNROLL_N 2
2008 #define SGEMM_DEFAULT_P 32
2009 #define SGEMM_DEFAULT_Q 112
2010 #define SGEMM_DEFAULT_R 256
2012 #define DGEMM_DEFAULT_P 32
2013 #define DGEMM_DEFAULT_Q 56
2014 #define DGEMM_DEFAULT_R 256
2016 #define CGEMM_DEFAULT_P 32
2017 #define CGEMM_DEFAULT_Q 64
2018 #define CGEMM_DEFAULT_R 240
2020 #define ZGEMM_DEFAULT_P 32
2021 #define ZGEMM_DEFAULT_Q 32
2022 #define ZGEMM_DEFAULT_R 240
2026 #define SGEMM_DEFAULT_P 64
2027 #define SGEMM_DEFAULT_Q 256
2029 #define DGEMM_DEFAULT_P 64
2030 #define DGEMM_DEFAULT_Q 128
2032 #define CGEMM_DEFAULT_P 64
2033 #define CGEMM_DEFAULT_Q 128
2035 #define ZGEMM_DEFAULT_P 64
2036 #define ZGEMM_DEFAULT_Q 64
2040 #define SGEMM_DEFAULT_P 256
2041 #define SGEMM_DEFAULT_Q 512
2043 #define DGEMM_DEFAULT_P 256
2044 #define DGEMM_DEFAULT_Q 256
2046 #define CGEMM_DEFAULT_P 256
2047 #define CGEMM_DEFAULT_Q 256
2049 #define ZGEMM_DEFAULT_P 128
2050 #define ZGEMM_DEFAULT_Q 256
2060 #define GEMM_DEFAULT_OFFSET_A 0
2061 #define GEMM_DEFAULT_OFFSET_B 8192
2062 #define GEMM_DEFAULT_ALIGN 0x0ffffUL
2064 #define SGEMM_DEFAULT_UNROLL_M 16
2065 #define SGEMM_DEFAULT_UNROLL_N 4
2066 #define DGEMM_DEFAULT_UNROLL_M 4
2067 #define DGEMM_DEFAULT_UNROLL_N 4
2068 #define CGEMM_DEFAULT_UNROLL_M 8
2069 #define CGEMM_DEFAULT_UNROLL_N 2
2070 #define ZGEMM_DEFAULT_UNROLL_M 2
2071 #define ZGEMM_DEFAULT_UNROLL_N 2
2073 #define SGEMM_DEFAULT_P 128
2074 #define DGEMM_DEFAULT_P 128
2075 #define CGEMM_DEFAULT_P 128
2076 #define ZGEMM_DEFAULT_P 128
2078 #define SGEMM_DEFAULT_Q 512
2079 #define DGEMM_DEFAULT_Q 256
2080 #define CGEMM_DEFAULT_Q 256
2081 #define ZGEMM_DEFAULT_Q 128
2087 #define GEMM_DEFAULT_OFFSET_A 0
2088 #define GEMM_DEFAULT_OFFSET_B 1024
2089 #define GEMM_DEFAULT_ALIGN 0x0ffffUL
2091 #define SGEMM_DEFAULT_UNROLL_M 16
2092 #define SGEMM_DEFAULT_UNROLL_N 4
2093 #define DGEMM_DEFAULT_UNROLL_M 4
2094 #define DGEMM_DEFAULT_UNROLL_N 4
2095 #define CGEMM_DEFAULT_UNROLL_M 2
2096 #define CGEMM_DEFAULT_UNROLL_N 2
2097 #define ZGEMM_DEFAULT_UNROLL_M 2
2098 #define ZGEMM_DEFAULT_UNROLL_N 2
2100 #define SGEMM_DEFAULT_P 256
2101 #define DGEMM_DEFAULT_P 128
2102 #define CGEMM_DEFAULT_P 128
2103 #define ZGEMM_DEFAULT_P 64
2105 #define SGEMM_DEFAULT_Q 256
2106 #define DGEMM_DEFAULT_Q 256
2107 #define CGEMM_DEFAULT_Q 256
2108 #define ZGEMM_DEFAULT_Q 256
2118 #define GEMM_DEFAULT_OFFSET_A 2688
2119 #define GEMM_DEFAULT_OFFSET_B 3072
2120 #define GEMM_DEFAULT_ALIGN 0x03fffUL
2122 #if defined(__BYTE_ORDER__)&&(__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
2123 #define SGEMM_DEFAULT_UNROLL_M 4
2125 #define SGEMM_DEFAULT_UNROLL_M 16
2127 #define SGEMM_DEFAULT_UNROLL_N 4
2128 #define DGEMM_DEFAULT_UNROLL_M 4
2129 #define DGEMM_DEFAULT_UNROLL_N 4
2130 #if defined(__BYTE_ORDER__)&&(__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
2131 #define CGEMM_DEFAULT_UNROLL_M 2
2133 #define CGEMM_DEFAULT_UNROLL_M 8
2135 #define CGEMM_DEFAULT_UNROLL_N 2
2136 #define ZGEMM_DEFAULT_UNROLL_M 2
2137 #define ZGEMM_DEFAULT_UNROLL_N 2
2139 #if defined(OS_LINUX) || defined(OS_DARWIN) || defined(OS_FREEBSD)
2140 #if L2_SIZE == 1024976
2141 #define SGEMM_DEFAULT_P 320
2142 #define DGEMM_DEFAULT_P 256
2143 #define CGEMM_DEFAULT_P 256
2144 #define ZGEMM_DEFAULT_P 256
2146 #define SGEMM_DEFAULT_P 176
2147 #define DGEMM_DEFAULT_P 176
2148 #define CGEMM_DEFAULT_P 176
2149 #define ZGEMM_DEFAULT_P 176
2153 #define SGEMM_DEFAULT_Q 512
2154 #define DGEMM_DEFAULT_Q 256
2155 #define CGEMM_DEFAULT_Q 256
2156 #define ZGEMM_DEFAULT_Q 128
2167 #define GEMM_DEFAULT_OFFSET_A (32 * 0)
2168 #define GEMM_DEFAULT_OFFSET_B (32 * 0)
2169 #define GEMM_DEFAULT_ALIGN 0x0ffffUL
2171 #define SGEMM_DEFAULT_UNROLL_M 4
2172 #define SGEMM_DEFAULT_UNROLL_N 4
2173 #define DGEMM_DEFAULT_UNROLL_M 4
2174 #define DGEMM_DEFAULT_UNROLL_N 4
2175 #define CGEMM_DEFAULT_UNROLL_M 2
2176 #define CGEMM_DEFAULT_UNROLL_N 2
2177 #define ZGEMM_DEFAULT_UNROLL_M 2
2178 #define ZGEMM_DEFAULT_UNROLL_N 2
2180 #define SGEMM_DEFAULT_P 512
2181 #define DGEMM_DEFAULT_P 512
2182 #define CGEMM_DEFAULT_P 512
2183 #define ZGEMM_DEFAULT_P 512
2185 #define SGEMM_DEFAULT_Q 1024
2186 #define DGEMM_DEFAULT_Q 512
2187 #define CGEMM_DEFAULT_Q 512
2188 #define ZGEMM_DEFAULT_Q 256
2190 #define SGEMM_DEFAULT_R SGEMM_DEFAULT_P
2191 #define DGEMM_DEFAULT_R DGEMM_DEFAULT_P
2192 #define CGEMM_DEFAULT_R CGEMM_DEFAULT_P
2193 #define ZGEMM_DEFAULT_R ZGEMM_DEFAULT_P
2203 #define GEMM_DEFAULT_OFFSET_A (32 * 0)
2204 #define GEMM_DEFAULT_OFFSET_B (32 * 0)
2205 #define GEMM_DEFAULT_ALIGN 0x0ffffUL
2207 #define SGEMM_DEFAULT_UNROLL_M 8
2208 #define SGEMM_DEFAULT_UNROLL_N 4
2209 #define DGEMM_DEFAULT_UNROLL_M 8
2210 #define DGEMM_DEFAULT_UNROLL_N 4
2211 #define CGEMM_DEFAULT_UNROLL_M 4
2212 #define CGEMM_DEFAULT_UNROLL_N 2
2213 #define ZGEMM_DEFAULT_UNROLL_M 4
2214 #define ZGEMM_DEFAULT_UNROLL_N 2
2216 #define SGEMM_DEFAULT_P 128
2217 #define DGEMM_DEFAULT_P 128
2218 #define CGEMM_DEFAULT_P 128
2219 #define ZGEMM_DEFAULT_P 128
2221 #define SGEMM_DEFAULT_Q 4096
2222 #define DGEMM_DEFAULT_Q 3072
2223 #define CGEMM_DEFAULT_Q 2048
2224 #define ZGEMM_DEFAULT_Q 1024
2226 #define SGEMM_DEFAULT_Q 512
2227 #define DGEMM_DEFAULT_Q 256
2228 #define CGEMM_DEFAULT_Q 256
2229 #define ZGEMM_DEFAULT_Q 128
2237 #if defined(POWER3) || defined(POWER4) || defined(POWER5)
2238 #define GEMM_DEFAULT_OFFSET_A 0
2239 #define GEMM_DEFAULT_OFFSET_B 2048
2240 #define GEMM_DEFAULT_ALIGN 0x0ffffUL
2242 #define SGEMM_DEFAULT_UNROLL_M 4
2243 #define SGEMM_DEFAULT_UNROLL_N 4
2244 #define DGEMM_DEFAULT_UNROLL_M 4
2245 #define DGEMM_DEFAULT_UNROLL_N 4
2246 #define CGEMM_DEFAULT_UNROLL_M 2
2247 #define CGEMM_DEFAULT_UNROLL_N 2
2248 #define ZGEMM_DEFAULT_UNROLL_M 2
2249 #define ZGEMM_DEFAULT_UNROLL_N 2
2256 #define SGEMM_DEFAULT_P 256
2257 #define SGEMM_DEFAULT_Q 432
2258 #define SGEMM_DEFAULT_R 1012
2260 #define DGEMM_DEFAULT_P 256
2261 #define DGEMM_DEFAULT_Q 216
2262 #define DGEMM_DEFAULT_R 1012
2264 #define ZGEMM_DEFAULT_P 256
2265 #define ZGEMM_DEFAULT_Q 104
2266 #define ZGEMM_DEFAULT_R 1012
2270 #ifdef ALLOC_HUGETLB
2271 #define SGEMM_DEFAULT_P 184
2272 #define DGEMM_DEFAULT_P 184
2273 #define CGEMM_DEFAULT_P 184
2274 #define ZGEMM_DEFAULT_P 184
2276 #define SGEMM_DEFAULT_P 144
2277 #define DGEMM_DEFAULT_P 144
2278 #define CGEMM_DEFAULT_P 144
2279 #define ZGEMM_DEFAULT_P 144
2284 #ifdef ALLOC_HUGETLB
2285 #define SGEMM_DEFAULT_P 512
2286 #define DGEMM_DEFAULT_P 256
2287 #define CGEMM_DEFAULT_P 256
2288 #define ZGEMM_DEFAULT_P 128
2290 #define SGEMM_DEFAULT_P 320
2291 #define DGEMM_DEFAULT_P 160
2292 #define CGEMM_DEFAULT_P 160
2293 #define ZGEMM_DEFAULT_P 80
2296 #define SGEMM_DEFAULT_Q 256
2297 #define CGEMM_DEFAULT_Q 256
2298 #define DGEMM_DEFAULT_Q 256
2299 #define ZGEMM_DEFAULT_Q 256
2311 #define GEMM_DEFAULT_OFFSET_A 384
2312 #define GEMM_DEFAULT_OFFSET_B 1024
2313 #define GEMM_DEFAULT_ALIGN 0x03fffUL
2315 #define SGEMM_DEFAULT_UNROLL_M 4
2316 #define SGEMM_DEFAULT_UNROLL_N 4
2317 #define DGEMM_DEFAULT_UNROLL_M 4
2318 #define DGEMM_DEFAULT_UNROLL_N 4
2319 #define CGEMM_DEFAULT_UNROLL_M 2
2320 #define CGEMM_DEFAULT_UNROLL_N 4
2321 #define ZGEMM_DEFAULT_UNROLL_M 2
2322 #define ZGEMM_DEFAULT_UNROLL_N 4
2324 #define SGEMM_DEFAULT_P 992
2325 #define DGEMM_DEFAULT_P 480
2326 #define CGEMM_DEFAULT_P 488
2327 #define ZGEMM_DEFAULT_P 248
2329 #define SGEMM_DEFAULT_Q 504
2330 #define DGEMM_DEFAULT_Q 504
2331 #define CGEMM_DEFAULT_Q 400
2332 #define ZGEMM_DEFAULT_Q 400
2343 #define GEMM_DEFAULT_OFFSET_A 0
2344 #define GEMM_DEFAULT_OFFSET_B 65536
2345 #define GEMM_DEFAULT_ALIGN 0x0ffffUL
2346 #if defined(__32BIT__)
2347 #warning using BINARY32==POWER6
2348 #define SGEMM_DEFAULT_UNROLL_M 4
2349 #define SGEMM_DEFAULT_UNROLL_N 4
2350 #define DGEMM_DEFAULT_UNROLL_M 4
2351 #define DGEMM_DEFAULT_UNROLL_N 4
2352 #define CGEMM_DEFAULT_UNROLL_M 2
2353 #define CGEMM_DEFAULT_UNROLL_N 4
2354 #define ZGEMM_DEFAULT_UNROLL_M 2
2355 #define ZGEMM_DEFAULT_UNROLL_N 4
2357 #define SGEMM_DEFAULT_UNROLL_M 16
2358 #define SGEMM_DEFAULT_UNROLL_N 8
2359 #define DGEMM_DEFAULT_UNROLL_M 16
2360 #define DGEMM_DEFAULT_UNROLL_N 4
2361 #define CGEMM_DEFAULT_UNROLL_M 8
2362 #define CGEMM_DEFAULT_UNROLL_N 4
2363 #define ZGEMM_DEFAULT_UNROLL_M 8
2364 #define ZGEMM_DEFAULT_UNROLL_N 2
2366 #define SGEMM_DEFAULT_P 1280UL
2367 #define DGEMM_DEFAULT_P 640UL
2368 #define CGEMM_DEFAULT_P 640UL
2369 #define ZGEMM_DEFAULT_P 320UL
2371 #define SGEMM_DEFAULT_Q 640UL
2372 #define DGEMM_DEFAULT_Q 720UL
2373 #define CGEMM_DEFAULT_Q 640UL
2374 #define ZGEMM_DEFAULT_Q 640UL
2377 #define SGEMM_DEFAULT_R SGEMM_DEFAULT_P
2378 #define DGEMM_DEFAULT_R DGEMM_DEFAULT_P
2379 #define CGEMM_DEFAULT_R CGEMM_DEFAULT_P
2380 #define ZGEMM_DEFAULT_R ZGEMM_DEFAULT_P
2382 #define SGEMM_DEFAULT_R 4096
2383 #define DGEMM_DEFAULT_R 4096
2384 #define CGEMM_DEFAULT_R 4096
2385 #define ZGEMM_DEFAULT_R 4096
2391 #if defined(POWER9) || defined(POWER10)
2396 #define GEMM_DEFAULT_OFFSET_A 0
2397 #define GEMM_DEFAULT_OFFSET_B 65536
2398 #define GEMM_DEFAULT_ALIGN 0x0ffffUL
2400 #define SGEMM_DEFAULT_UNROLL_M 16
2401 #define SGEMM_DEFAULT_UNROLL_N 8
2402 #define DGEMM_DEFAULT_UNROLL_M 16
2403 #define DGEMM_DEFAULT_UNROLL_N 4
2404 #define CGEMM_DEFAULT_UNROLL_M 8
2405 #define CGEMM_DEFAULT_UNROLL_N 4
2406 #define ZGEMM_DEFAULT_UNROLL_M 8
2407 #define ZGEMM_DEFAULT_UNROLL_N 2
2409 #define SGEMM_DEFAULT_P 832
2410 #define DGEMM_DEFAULT_P 128
2411 #define CGEMM_DEFAULT_P 512
2412 #define ZGEMM_DEFAULT_P 256
2414 #define SGEMM_DEFAULT_Q 1026
2415 #define DGEMM_DEFAULT_Q 384
2416 #define CGEMM_DEFAULT_Q 1026
2417 #define ZGEMM_DEFAULT_Q 1026
2419 #define SGEMM_DEFAULT_R 4096
2420 #define DGEMM_DEFAULT_R 4096
2421 #define CGEMM_DEFAULT_R 4096
2422 #define ZGEMM_DEFAULT_R 4096
2428 #if defined(POWER10)
2429 #undef SBGEMM_DEFAULT_UNROLL_N
2430 #undef SBGEMM_DEFAULT_UNROLL_M
2431 #undef SBGEMM_DEFAULT_P
2432 #undef SBGEMM_DEFAULT_R
2433 #undef SBGEMM_DEFAULT_Q
2434 #define SBGEMM_DEFAULT_UNROLL_M 16
2435 #define SBGEMM_DEFAULT_UNROLL_N 8
2436 #define SBGEMM_DEFAULT_P 832
2437 #define SBGEMM_DEFAULT_Q 1026
2438 #define SBGEMM_DEFAULT_R 4096
2439 #undef DGEMM_DEFAULT_UNROLL_M
2440 #undef DGEMM_DEFAULT_UNROLL_N
2441 #define DGEMM_DEFAULT_UNROLL_M 8
2442 #define DGEMM_DEFAULT_UNROLL_N 8
2445 #if defined(SPARC) && defined(V7)
2450 #define GEMM_DEFAULT_OFFSET_A 0
2451 #define GEMM_DEFAULT_OFFSET_B 2048
2452 #define GEMM_DEFAULT_ALIGN 0x03fffUL
2454 #define SGEMM_DEFAULT_UNROLL_M 2
2455 #define SGEMM_DEFAULT_UNROLL_N 8
2456 #define DGEMM_DEFAULT_UNROLL_M 2
2457 #define DGEMM_DEFAULT_UNROLL_N 8
2458 #define CGEMM_DEFAULT_UNROLL_M 1
2459 #define CGEMM_DEFAULT_UNROLL_N 4
2460 #define ZGEMM_DEFAULT_UNROLL_M 1
2461 #define ZGEMM_DEFAULT_UNROLL_N 4
2463 #define SGEMM_DEFAULT_P 256
2464 #define DGEMM_DEFAULT_P 256
2465 #define CGEMM_DEFAULT_P 256
2466 #define ZGEMM_DEFAULT_P 256
2468 #define SGEMM_DEFAULT_Q 512
2469 #define DGEMM_DEFAULT_Q 256
2470 #define CGEMM_DEFAULT_Q 256
2471 #define ZGEMM_DEFAULT_Q 128
2474 #define GEMM_THREAD gemm_thread_mn
2477 #if (defined(SPARC) && defined(V9)) || defined(__sparc_v9__)
2482 #define GEMM_DEFAULT_OFFSET_A 0
2483 #define GEMM_DEFAULT_OFFSET_B 2048
2484 #define GEMM_DEFAULT_ALIGN 0x03fffUL
2486 #define SGEMM_DEFAULT_UNROLL_M 4
2487 #define SGEMM_DEFAULT_UNROLL_N 4
2488 #define DGEMM_DEFAULT_UNROLL_M 4
2489 #define DGEMM_DEFAULT_UNROLL_N 4
2490 #define CGEMM_DEFAULT_UNROLL_M 2
2491 #define CGEMM_DEFAULT_UNROLL_N 2
2492 #define ZGEMM_DEFAULT_UNROLL_M 2
2493 #define ZGEMM_DEFAULT_UNROLL_N 2
2495 #define SGEMM_DEFAULT_P 512
2496 #define DGEMM_DEFAULT_P 512
2497 #define CGEMM_DEFAULT_P 512
2498 #define ZGEMM_DEFAULT_P 512
2500 #define SGEMM_DEFAULT_Q 1024
2501 #define DGEMM_DEFAULT_Q 512
2502 #define CGEMM_DEFAULT_Q 512
2503 #define ZGEMM_DEFAULT_Q 256
2513 #define GEMM_DEFAULT_OFFSET_A 0
2514 #define GEMM_DEFAULT_OFFSET_B 0
2515 #define GEMM_DEFAULT_ALIGN 0x03fffUL
2517 #define SGEMM_DEFAULT_UNROLL_M 2
2518 #define SGEMM_DEFAULT_UNROLL_N 8
2519 #define DGEMM_DEFAULT_UNROLL_M 2
2520 #define DGEMM_DEFAULT_UNROLL_N 8
2521 #define CGEMM_DEFAULT_UNROLL_M 1
2522 #define CGEMM_DEFAULT_UNROLL_N 4
2523 #define ZGEMM_DEFAULT_UNROLL_M 1
2524 #define ZGEMM_DEFAULT_UNROLL_N 4
2526 #define SGEMM_DEFAULT_P 108
2527 #define DGEMM_DEFAULT_P 112
2528 #define CGEMM_DEFAULT_P 108
2529 #define ZGEMM_DEFAULT_P 112
2531 #define SGEMM_DEFAULT_Q 288
2532 #define DGEMM_DEFAULT_Q 144
2533 #define CGEMM_DEFAULT_Q 144
2534 #define ZGEMM_DEFAULT_Q 72
2536 #define SGEMM_DEFAULT_R 2000
2537 #define DGEMM_DEFAULT_R 2000
2538 #define CGEMM_DEFAULT_R 2000
2539 #define ZGEMM_DEFAULT_R 2000
2545 ////Copy from SICORTEX
2549 #define GEMM_DEFAULT_OFFSET_A 0
2550 #define GEMM_DEFAULT_OFFSET_B 0
2551 #define GEMM_DEFAULT_ALIGN 0x03fffUL
2553 #define SGEMM_DEFAULT_UNROLL_M 8
2554 #define SGEMM_DEFAULT_UNROLL_N 4
2556 #define DGEMM_DEFAULT_UNROLL_M 4
2557 #define DGEMM_DEFAULT_UNROLL_N 4
2559 #define CGEMM_DEFAULT_UNROLL_M 4
2560 #define CGEMM_DEFAULT_UNROLL_N 2
2562 #define ZGEMM_DEFAULT_UNROLL_M 2
2563 #define ZGEMM_DEFAULT_UNROLL_N 2
2565 #define SGEMM_DEFAULT_P 64
2566 #define DGEMM_DEFAULT_P 44
2567 #define CGEMM_DEFAULT_P 64
2568 #define ZGEMM_DEFAULT_P 32
2570 #define SGEMM_DEFAULT_Q 192
2571 #define DGEMM_DEFAULT_Q 92
2572 #define CGEMM_DEFAULT_Q 128
2573 #define ZGEMM_DEFAULT_Q 80
2575 #define SGEMM_DEFAULT_R 640
2576 #define DGEMM_DEFAULT_R dgemm_r
2577 #define CGEMM_DEFAULT_R 640
2578 #define ZGEMM_DEFAULT_R 640
2580 #define GEMM_OFFSET_A1 0x10000
2581 #define GEMM_OFFSET_B1 0x100000
2590 #define GEMM_DEFAULT_OFFSET_A 0
2591 #define GEMM_DEFAULT_OFFSET_B 0
2592 #define GEMM_DEFAULT_ALIGN 0x03fffUL
2594 #define SGEMM_DEFAULT_UNROLL_M 2
2595 #define SGEMM_DEFAULT_UNROLL_N 2
2597 #define DGEMM_DEFAULT_UNROLL_M 2
2598 #define DGEMM_DEFAULT_UNROLL_N 2
2600 #define CGEMM_DEFAULT_UNROLL_M 2
2601 #define CGEMM_DEFAULT_UNROLL_N 2
2603 #define ZGEMM_DEFAULT_UNROLL_M 2
2604 #define ZGEMM_DEFAULT_UNROLL_N 2
2606 #define SGEMM_DEFAULT_P 64
2607 #define DGEMM_DEFAULT_P 24
2608 #define CGEMM_DEFAULT_P 24
2609 #define ZGEMM_DEFAULT_P 20
2611 #define SGEMM_DEFAULT_Q 192
2612 #define DGEMM_DEFAULT_Q 128
2613 #define CGEMM_DEFAULT_Q 128
2614 #define ZGEMM_DEFAULT_Q 64
2616 #define SGEMM_DEFAULT_R 512
2617 #define DGEMM_DEFAULT_R 512
2618 #define CGEMM_DEFAULT_R 512
2619 #define ZGEMM_DEFAULT_R 512
2621 #define GEMM_OFFSET_A1 0x10000
2622 #define GEMM_OFFSET_B1 0x100000
2627 #if defined(P5600) || defined(MIPS1004K) || defined(MIPS24K) || defined(I6400) || defined(P6600) || defined(I6500)
2631 #define GEMM_DEFAULT_OFFSET_A 0
2632 #define GEMM_DEFAULT_OFFSET_B 0
2633 #define GEMM_DEFAULT_ALIGN 0x03fffUL
2636 #define SGEMM_DEFAULT_UNROLL_M 8
2637 #define SGEMM_DEFAULT_UNROLL_N 8
2639 #define DGEMM_DEFAULT_UNROLL_M 8
2640 #define DGEMM_DEFAULT_UNROLL_N 4
2642 #define CGEMM_DEFAULT_UNROLL_M 8
2643 #define CGEMM_DEFAULT_UNROLL_N 4
2645 #define ZGEMM_DEFAULT_UNROLL_M 4
2646 #define ZGEMM_DEFAULT_UNROLL_N 4
2648 #define SGEMM_DEFAULT_UNROLL_M 2
2649 #define SGEMM_DEFAULT_UNROLL_N 2
2651 #define DGEMM_DEFAULT_UNROLL_M 2
2652 #define DGEMM_DEFAULT_UNROLL_N 2
2654 #define CGEMM_DEFAULT_UNROLL_M 2
2655 #define CGEMM_DEFAULT_UNROLL_N 2
2657 #define ZGEMM_DEFAULT_UNROLL_M 2
2658 #define ZGEMM_DEFAULT_UNROLL_N 2
2661 #define SGEMM_DEFAULT_P 128
2662 #define DGEMM_DEFAULT_P 128
2663 #define CGEMM_DEFAULT_P 96
2664 #define ZGEMM_DEFAULT_P 64
2666 #define SGEMM_DEFAULT_Q 240
2667 #define DGEMM_DEFAULT_Q 120
2668 #define CGEMM_DEFAULT_Q 120
2669 #define ZGEMM_DEFAULT_Q 120
2671 #define SGEMM_DEFAULT_R 12288
2672 #define DGEMM_DEFAULT_R 8192
2673 #define CGEMM_DEFAULT_R 4096
2674 #define ZGEMM_DEFAULT_R 4096
2679 #ifdef RISCV64_GENERIC
2680 #define GEMM_DEFAULT_OFFSET_A 0
2681 #define GEMM_DEFAULT_OFFSET_B 0
2682 #define GEMM_DEFAULT_ALIGN 0x03fffUL
2684 #define SGEMM_DEFAULT_UNROLL_M 2
2685 #define SGEMM_DEFAULT_UNROLL_N 2
2687 #define DGEMM_DEFAULT_UNROLL_M 2
2688 #define DGEMM_DEFAULT_UNROLL_N 2
2690 #define CGEMM_DEFAULT_UNROLL_M 2
2691 #define CGEMM_DEFAULT_UNROLL_N 2
2693 #define ZGEMM_DEFAULT_UNROLL_M 2
2694 #define ZGEMM_DEFAULT_UNROLL_N 2
2696 #define SGEMM_DEFAULT_P 128
2697 #define DGEMM_DEFAULT_P 128
2698 #define CGEMM_DEFAULT_P 96
2699 #define ZGEMM_DEFAULT_P 64
2701 #define SGEMM_DEFAULT_Q 240
2702 #define DGEMM_DEFAULT_Q 120
2703 #define CGEMM_DEFAULT_Q 120
2704 #define ZGEMM_DEFAULT_Q 120
2706 #define SGEMM_DEFAULT_R 12288
2707 #define DGEMM_DEFAULT_R 8192
2708 #define CGEMM_DEFAULT_R 4096
2709 #define ZGEMM_DEFAULT_R 4096
2713 #define GEMM_DEFAULT_OFFSET_A 0
2714 #define GEMM_DEFAULT_OFFSET_B 0
2719 #define GEMM_DEFAULT_OFFSET_A 0
2720 #define GEMM_DEFAULT_OFFSET_B 0
2721 #define GEMM_DEFAULT_ALIGN 0x03fffUL
2723 #define SGEMM_DEFAULT_UNROLL_M 16
2724 #define SGEMM_DEFAULT_UNROLL_N 4
2726 #define DGEMM_DEFAULT_UNROLL_M 8
2727 #define DGEMM_DEFAULT_UNROLL_N 4
2729 #define CGEMM_DEFAULT_UNROLL_M 2
2730 #define CGEMM_DEFAULT_UNROLL_N 2
2732 #define ZGEMM_DEFAULT_UNROLL_M 2
2733 #define ZGEMM_DEFAULT_UNROLL_N 2
2735 #define SGEMM_DEFAULT_P 160
2736 #define DGEMM_DEFAULT_P 160
2737 #define CGEMM_DEFAULT_P 96
2738 #define ZGEMM_DEFAULT_P 64
2740 #define SGEMM_DEFAULT_Q 240
2741 #define DGEMM_DEFAULT_Q 128
2742 #define CGEMM_DEFAULT_Q 120
2743 #define ZGEMM_DEFAULT_Q 120
2745 #define SGEMM_DEFAULT_R 12288
2746 #define DGEMM_DEFAULT_R 8192
2747 #define CGEMM_DEFAULT_R 4096
2748 #define ZGEMM_DEFAULT_R 4096
2752 #define GEMM_DEFAULT_OFFSET_A 0
2753 #define GEMM_DEFAULT_OFFSET_B 0
2761 #define GEMM_DEFAULT_OFFSET_A 0
2762 #define GEMM_DEFAULT_OFFSET_B 0
2763 #define GEMM_DEFAULT_ALIGN 0x03fffUL
2765 #define SGEMM_DEFAULT_UNROLL_M 4
2766 #define SGEMM_DEFAULT_UNROLL_N 4
2768 #define DGEMM_DEFAULT_UNROLL_M 4
2769 #define DGEMM_DEFAULT_UNROLL_N 4
2771 #define CGEMM_DEFAULT_UNROLL_M 2
2772 #define CGEMM_DEFAULT_UNROLL_N 2
2774 #define ZGEMM_DEFAULT_UNROLL_M 2
2775 #define ZGEMM_DEFAULT_UNROLL_N 2
2777 #define SGEMM_DEFAULT_P 128
2778 #define DGEMM_DEFAULT_P 128
2779 #define CGEMM_DEFAULT_P 96
2780 #define ZGEMM_DEFAULT_P 64
2782 #define SGEMM_DEFAULT_Q 240
2783 #define DGEMM_DEFAULT_Q 120
2784 #define CGEMM_DEFAULT_Q 120
2785 #define ZGEMM_DEFAULT_Q 120
2787 #define SGEMM_DEFAULT_R 12288
2788 #define DGEMM_DEFAULT_R 8192
2789 #define CGEMM_DEFAULT_R 4096
2790 #define ZGEMM_DEFAULT_R 4096
2802 #define GEMM_DEFAULT_OFFSET_A 0
2803 #define GEMM_DEFAULT_OFFSET_B 0
2804 #define GEMM_DEFAULT_ALIGN 0x03fffUL
2806 #define SGEMM_DEFAULT_UNROLL_M 4
2807 #define SGEMM_DEFAULT_UNROLL_N 2
2809 #define DGEMM_DEFAULT_UNROLL_M 4
2810 #define DGEMM_DEFAULT_UNROLL_N 2
2812 #define CGEMM_DEFAULT_UNROLL_M 2
2813 #define CGEMM_DEFAULT_UNROLL_N 2
2815 #define ZGEMM_DEFAULT_UNROLL_M 2
2816 #define ZGEMM_DEFAULT_UNROLL_N 2
2818 #define SGEMM_DEFAULT_P 128
2819 #define DGEMM_DEFAULT_P 128
2820 #define CGEMM_DEFAULT_P 96
2821 #define ZGEMM_DEFAULT_P 64
2823 #define SGEMM_DEFAULT_Q 240
2824 #define DGEMM_DEFAULT_Q 120
2825 #define CGEMM_DEFAULT_Q 120
2826 #define ZGEMM_DEFAULT_Q 120
2828 #define SGEMM_DEFAULT_R 12288
2829 #define DGEMM_DEFAULT_R 8192
2830 #define CGEMM_DEFAULT_R 4096
2831 #define ZGEMM_DEFAULT_R 4096
2837 // Common ARMv8 parameters
2843 #define GEMM_DEFAULT_OFFSET_A 0
2844 #define GEMM_DEFAULT_OFFSET_B 0
2845 #define GEMM_DEFAULT_ALIGN 0x03fffUL
2849 #if defined(CORTEXA57) || \
2850 defined(CORTEXA72) || defined(CORTEXA73) || \
2851 defined(FALKOR) || defined(TSV110) || defined(EMAG8180)
2853 #define SGEMM_DEFAULT_UNROLL_M 16
2854 #define SGEMM_DEFAULT_UNROLL_N 4
2856 #define DGEMM_DEFAULT_UNROLL_M 8
2857 #define DGEMM_DEFAULT_UNROLL_N 4
2859 #define CGEMM_DEFAULT_UNROLL_M 8
2860 #define CGEMM_DEFAULT_UNROLL_N 4
2862 #define ZGEMM_DEFAULT_UNROLL_M 4
2863 #define ZGEMM_DEFAULT_UNROLL_N 4
2865 /*FIXME: this should be using the cache size, but there is currently no easy way to
2866 query that on ARM. So if getarch counted more than 8 cores we simply assume the host
2867 is a big desktop or server with abundant cache rather than a phone or embedded device */
2868 #if NUM_CORES > 8 || defined(TSV110) || defined(EMAG8180)
2869 #define SGEMM_DEFAULT_P 512
2870 #define DGEMM_DEFAULT_P 256
2871 #define CGEMM_DEFAULT_P 256
2872 #define ZGEMM_DEFAULT_P 128
2874 #define SGEMM_DEFAULT_Q 1024
2875 #define DGEMM_DEFAULT_Q 512
2876 #define CGEMM_DEFAULT_Q 512
2877 #define ZGEMM_DEFAULT_Q 512
2879 #define SGEMM_DEFAULT_P 128
2880 #define DGEMM_DEFAULT_P 160
2881 #define CGEMM_DEFAULT_P 128
2882 #define ZGEMM_DEFAULT_P 128
2884 #define SGEMM_DEFAULT_Q 352
2885 #define DGEMM_DEFAULT_Q 128
2886 #define CGEMM_DEFAULT_Q 224
2887 #define ZGEMM_DEFAULT_Q 112
2890 #define SGEMM_DEFAULT_R 4096
2891 #define DGEMM_DEFAULT_R 4096
2892 #define CGEMM_DEFAULT_R 4096
2893 #define ZGEMM_DEFAULT_R 2048
2895 #elif defined(CORTEXA53)
2897 #define SGEMM_DEFAULT_UNROLL_M 8
2898 #define SGEMM_DEFAULT_UNROLL_N 8
2900 #define DGEMM_DEFAULT_UNROLL_M 8
2901 #define DGEMM_DEFAULT_UNROLL_N 4
2903 #define CGEMM_DEFAULT_UNROLL_M 8
2904 #define CGEMM_DEFAULT_UNROLL_N 4
2906 #define ZGEMM_DEFAULT_UNROLL_M 4
2907 #define ZGEMM_DEFAULT_UNROLL_N 4
2909 #define SGEMM_DEFAULT_P 256
2910 #define DGEMM_DEFAULT_P 160
2911 #define CGEMM_DEFAULT_P 128
2912 #define ZGEMM_DEFAULT_P 128
2914 #define SGEMM_DEFAULT_Q 256
2915 #define DGEMM_DEFAULT_Q 128
2916 #define CGEMM_DEFAULT_Q 224
2917 #define ZGEMM_DEFAULT_Q 112
2919 #define SGEMM_DEFAULT_R 4096
2920 #define DGEMM_DEFAULT_R 4096
2921 #define CGEMM_DEFAULT_R 4096
2922 #define ZGEMM_DEFAULT_R 2048
2924 #elif defined(THUNDERX)
2926 #define SGEMM_DEFAULT_UNROLL_M 4
2927 #define SGEMM_DEFAULT_UNROLL_N 4
2929 #define DGEMM_DEFAULT_UNROLL_M 2
2930 #define DGEMM_DEFAULT_UNROLL_N 2
2932 #define CGEMM_DEFAULT_UNROLL_M 2
2933 #define CGEMM_DEFAULT_UNROLL_N 2
2935 #define ZGEMM_DEFAULT_UNROLL_M 2
2936 #define ZGEMM_DEFAULT_UNROLL_N 2
2938 #define SGEMM_DEFAULT_P 128
2939 #define DGEMM_DEFAULT_P 128
2940 #define CGEMM_DEFAULT_P 96
2941 #define ZGEMM_DEFAULT_P 64
2943 #define SGEMM_DEFAULT_Q 240
2944 #define DGEMM_DEFAULT_Q 120
2945 #define CGEMM_DEFAULT_Q 120
2946 #define ZGEMM_DEFAULT_Q 120
2948 #define SGEMM_DEFAULT_R 12288
2949 #define DGEMM_DEFAULT_R 8192
2950 #define CGEMM_DEFAULT_R 4096
2951 #define ZGEMM_DEFAULT_R 4096
2953 #elif defined(THUNDERX2T99)
2955 #define SGEMM_DEFAULT_UNROLL_M 16
2956 #define SGEMM_DEFAULT_UNROLL_N 4
2958 #define DGEMM_DEFAULT_UNROLL_M 8
2959 #define DGEMM_DEFAULT_UNROLL_N 4
2961 #define CGEMM_DEFAULT_UNROLL_M 8
2962 #define CGEMM_DEFAULT_UNROLL_N 4
2964 #define ZGEMM_DEFAULT_UNROLL_M 4
2965 #define ZGEMM_DEFAULT_UNROLL_N 4
2967 #define SGEMM_DEFAULT_P 128
2968 #define DGEMM_DEFAULT_P 160
2969 #define CGEMM_DEFAULT_P 128
2970 #define ZGEMM_DEFAULT_P 128
2972 #define SGEMM_DEFAULT_Q 352
2973 #define DGEMM_DEFAULT_Q 128
2974 #define CGEMM_DEFAULT_Q 224
2975 #define ZGEMM_DEFAULT_Q 112
2977 #define SGEMM_DEFAULT_R 4096
2978 #define DGEMM_DEFAULT_R 4096
2979 #define CGEMM_DEFAULT_R 4096
2980 #define ZGEMM_DEFAULT_R 4096
2982 #elif defined(THUNDERX3T110)
2984 #define SGEMM_DEFAULT_UNROLL_M 16
2985 #define SGEMM_DEFAULT_UNROLL_N 4
2987 #define DGEMM_DEFAULT_UNROLL_M 8
2988 #define DGEMM_DEFAULT_UNROLL_N 4
2990 #define CGEMM_DEFAULT_UNROLL_M 8
2991 #define CGEMM_DEFAULT_UNROLL_N 4
2993 #define ZGEMM_DEFAULT_UNROLL_M 4
2994 #define ZGEMM_DEFAULT_UNROLL_N 4
2996 #define SGEMM_DEFAULT_P 128
2997 #define DGEMM_DEFAULT_P 320
2998 #define CGEMM_DEFAULT_P 128
2999 #define ZGEMM_DEFAULT_P 128
3001 #define SGEMM_DEFAULT_Q 352
3002 #define DGEMM_DEFAULT_Q 128
3003 #define CGEMM_DEFAULT_Q 224
3004 #define ZGEMM_DEFAULT_Q 112
3006 #define SGEMM_DEFAULT_R 4096
3007 #define DGEMM_DEFAULT_R 4096
3008 #define CGEMM_DEFAULT_R 4096
3009 #define ZGEMM_DEFAULT_R 4096
3011 #elif defined(NEOVERSEN1)
3013 #define SGEMM_DEFAULT_UNROLL_M 16
3014 #define SGEMM_DEFAULT_UNROLL_N 4
3016 #define DGEMM_DEFAULT_UNROLL_M 8
3017 #define DGEMM_DEFAULT_UNROLL_N 4
3019 #define CGEMM_DEFAULT_UNROLL_M 8
3020 #define CGEMM_DEFAULT_UNROLL_N 4
3022 #define ZGEMM_DEFAULT_UNROLL_M 4
3023 #define ZGEMM_DEFAULT_UNROLL_N 4
3025 #define SGEMM_DEFAULT_P 128
3026 #define DGEMM_DEFAULT_P 160
3027 #define CGEMM_DEFAULT_P 128
3028 #define ZGEMM_DEFAULT_P 128
3030 #define SGEMM_DEFAULT_Q 352
3031 #define DGEMM_DEFAULT_Q 128
3032 #define CGEMM_DEFAULT_Q 224
3033 #define ZGEMM_DEFAULT_Q 112
3035 #define SGEMM_DEFAULT_R 4096
3036 #define DGEMM_DEFAULT_R 4096
3037 #define CGEMM_DEFAULT_R 4096
3038 #define ZGEMM_DEFAULT_R 4096
3040 #else // Other/undetected ARMv8 cores
3042 #define SGEMM_DEFAULT_UNROLL_M 16
3043 #define SGEMM_DEFAULT_UNROLL_N 4
3045 #define DGEMM_DEFAULT_UNROLL_M 8
3046 #define DGEMM_DEFAULT_UNROLL_N 4
3048 #define CGEMM_DEFAULT_UNROLL_M 8
3049 #define CGEMM_DEFAULT_UNROLL_N 4
3051 #define ZGEMM_DEFAULT_UNROLL_M 4
3052 #define ZGEMM_DEFAULT_UNROLL_N 4
3054 #define SGEMM_DEFAULT_P 128
3055 #define DGEMM_DEFAULT_P 160
3056 #define CGEMM_DEFAULT_P 128
3057 #define ZGEMM_DEFAULT_P 128
3059 #define SGEMM_DEFAULT_Q 352
3060 #define DGEMM_DEFAULT_Q 128
3061 #define CGEMM_DEFAULT_Q 224
3062 #define ZGEMM_DEFAULT_Q 112
3064 #define SGEMM_DEFAULT_R 4096
3065 #define DGEMM_DEFAULT_R 4096
3066 #define CGEMM_DEFAULT_R 4096
3067 #define ZGEMM_DEFAULT_R 4096
3077 #define GEMM_DEFAULT_OFFSET_A 0
3078 #define GEMM_DEFAULT_OFFSET_B 0
3079 #define GEMM_DEFAULT_ALIGN 0x03fffUL
3081 #define SGEMM_DEFAULT_UNROLL_M 2
3082 #define SGEMM_DEFAULT_UNROLL_N 2
3084 #define DGEMM_DEFAULT_UNROLL_M 2
3085 #define DGEMM_DEFAULT_UNROLL_N 2
3087 #define CGEMM_DEFAULT_UNROLL_M 2
3088 #define CGEMM_DEFAULT_UNROLL_N 2
3090 #define ZGEMM_DEFAULT_UNROLL_M 2
3091 #define ZGEMM_DEFAULT_UNROLL_N 2
3093 #define SGEMM_DEFAULT_P 128
3094 #define DGEMM_DEFAULT_P 128
3095 #define CGEMM_DEFAULT_P 96
3096 #define ZGEMM_DEFAULT_P 64
3098 #define SGEMM_DEFAULT_Q 240
3099 #define DGEMM_DEFAULT_Q 120
3100 #define CGEMM_DEFAULT_Q 120
3101 #define ZGEMM_DEFAULT_Q 120
3103 #define SGEMM_DEFAULT_R 12288
3104 #define DGEMM_DEFAULT_R 8192
3105 #define CGEMM_DEFAULT_R 4096
3106 #define ZGEMM_DEFAULT_R 4096
3118 #define GEMM_DEFAULT_OFFSET_A 0
3119 #define GEMM_DEFAULT_OFFSET_B 0
3120 #define GEMM_DEFAULT_ALIGN 0x03fffUL
3122 #define SGEMM_DEFAULT_UNROLL_M 4
3123 #define SGEMM_DEFAULT_UNROLL_N 4
3125 #define DGEMM_DEFAULT_UNROLL_M 4
3126 #define DGEMM_DEFAULT_UNROLL_N 4
3128 #define CGEMM_DEFAULT_UNROLL_M 2
3129 #define CGEMM_DEFAULT_UNROLL_N 2
3131 #define ZGEMM_DEFAULT_UNROLL_M 2
3132 #define ZGEMM_DEFAULT_UNROLL_N 2
3134 #define SGEMM_DEFAULT_P 128
3135 #define DGEMM_DEFAULT_P 128
3136 #define CGEMM_DEFAULT_P 96
3137 #define ZGEMM_DEFAULT_P 64
3139 #define SGEMM_DEFAULT_Q 240
3140 #define DGEMM_DEFAULT_Q 120
3141 #define CGEMM_DEFAULT_Q 120
3142 #define ZGEMM_DEFAULT_Q 120
3144 #define SGEMM_DEFAULT_R 12288
3145 #define DGEMM_DEFAULT_R 8192
3146 #define CGEMM_DEFAULT_R 4096
3147 #define ZGEMM_DEFAULT_R 4096
3159 #define GEMM_DEFAULT_OFFSET_A 0
3160 #define GEMM_DEFAULT_OFFSET_B 0
3161 #define GEMM_DEFAULT_ALIGN 0x03fffUL
3163 #define SGEMM_DEFAULT_UNROLL_M 4
3164 #define SGEMM_DEFAULT_UNROLL_N 4
3166 #define DGEMM_DEFAULT_UNROLL_M 4
3167 #define DGEMM_DEFAULT_UNROLL_N 4
3169 #define CGEMM_DEFAULT_UNROLL_M 2
3170 #define CGEMM_DEFAULT_UNROLL_N 2
3172 #define ZGEMM_DEFAULT_UNROLL_M 2
3173 #define ZGEMM_DEFAULT_UNROLL_N 2
3175 #define SGEMM_DEFAULT_P 128
3176 #define DGEMM_DEFAULT_P 128
3177 #define CGEMM_DEFAULT_P 96
3178 #define ZGEMM_DEFAULT_P 64
3180 #define SGEMM_DEFAULT_Q 240
3181 #define DGEMM_DEFAULT_Q 120
3182 #define CGEMM_DEFAULT_Q 120
3183 #define ZGEMM_DEFAULT_Q 120
3185 #define SGEMM_DEFAULT_R 12288
3186 #define DGEMM_DEFAULT_R 8192
3187 #define CGEMM_DEFAULT_R 4096
3188 #define ZGEMM_DEFAULT_R 4096
3196 #if defined(ZARCH_GENERIC)
3200 #define GEMM_DEFAULT_OFFSET_A 0
3201 #define GEMM_DEFAULT_OFFSET_B 0
3202 #define GEMM_DEFAULT_ALIGN 0x03fffUL
3204 #define SGEMM_DEFAULT_UNROLL_M 2
3205 #define SGEMM_DEFAULT_UNROLL_N 2
3207 #define DGEMM_DEFAULT_UNROLL_M 2
3208 #define DGEMM_DEFAULT_UNROLL_N 2
3210 #define CGEMM_DEFAULT_UNROLL_M 2
3211 #define CGEMM_DEFAULT_UNROLL_N 2
3213 #define ZGEMM_DEFAULT_UNROLL_M 2
3214 #define ZGEMM_DEFAULT_UNROLL_N 2
3216 #define SGEMM_DEFAULT_P 128
3217 #define DGEMM_DEFAULT_P 128
3218 #define CGEMM_DEFAULT_P 96
3219 #define ZGEMM_DEFAULT_P 64
3221 #define SGEMM_DEFAULT_Q 240
3222 #define DGEMM_DEFAULT_Q 120
3223 #define CGEMM_DEFAULT_Q 120
3224 #define ZGEMM_DEFAULT_Q 120
3226 #define SGEMM_DEFAULT_R 12288
3227 #define DGEMM_DEFAULT_R 8192
3228 #define CGEMM_DEFAULT_R 4096
3229 #define ZGEMM_DEFAULT_R 4096
3239 #define GEMM_DEFAULT_OFFSET_A 0
3240 #define GEMM_DEFAULT_OFFSET_B 0
3241 #define GEMM_DEFAULT_ALIGN 0x03fffUL
3243 #define SGEMM_DEFAULT_UNROLL_M 8
3244 #define SGEMM_DEFAULT_UNROLL_N 4
3246 #define DGEMM_DEFAULT_UNROLL_M 8
3247 #define DGEMM_DEFAULT_UNROLL_N 4
3249 #define CGEMM_DEFAULT_UNROLL_M 4
3250 #define CGEMM_DEFAULT_UNROLL_N 4
3252 #define ZGEMM_DEFAULT_UNROLL_M 4
3253 #define ZGEMM_DEFAULT_UNROLL_N 4
3255 #define SGEMM_DEFAULT_P 456
3256 #define DGEMM_DEFAULT_P 320
3257 #define CGEMM_DEFAULT_P 480
3258 #define ZGEMM_DEFAULT_P 224
3260 #define SGEMM_DEFAULT_Q 488
3261 #define DGEMM_DEFAULT_Q 384
3262 #define CGEMM_DEFAULT_Q 128
3263 #define ZGEMM_DEFAULT_Q 352
3265 #define SGEMM_DEFAULT_R 8192
3266 #define DGEMM_DEFAULT_R 4096
3267 #define CGEMM_DEFAULT_R 4096
3268 #define ZGEMM_DEFAULT_R 2048
3279 #define GEMM_DEFAULT_OFFSET_A 0
3280 #define GEMM_DEFAULT_OFFSET_B 0
3281 #define GEMM_DEFAULT_ALIGN 0x03fffUL
3283 #define SGEMM_DEFAULT_UNROLL_M 16
3284 #define SGEMM_DEFAULT_UNROLL_N 4
3286 #define DGEMM_DEFAULT_UNROLL_M 8
3287 #define DGEMM_DEFAULT_UNROLL_N 4
3289 #define CGEMM_DEFAULT_UNROLL_M 4
3290 #define CGEMM_DEFAULT_UNROLL_N 4
3292 #define ZGEMM_DEFAULT_UNROLL_M 4
3293 #define ZGEMM_DEFAULT_UNROLL_N 4
3295 #define SGEMM_DEFAULT_P 480
3296 #define DGEMM_DEFAULT_P 320
3297 #define CGEMM_DEFAULT_P 480
3298 #define ZGEMM_DEFAULT_P 224
3300 #define SGEMM_DEFAULT_Q 512
3301 #define DGEMM_DEFAULT_Q 384
3302 #define CGEMM_DEFAULT_Q 128
3303 #define ZGEMM_DEFAULT_Q 352
3305 #define SGEMM_DEFAULT_R 8192
3306 #define DGEMM_DEFAULT_R 4096
3307 #define CGEMM_DEFAULT_R 4096
3308 #define ZGEMM_DEFAULT_R 2048
3321 #define GEMM_DEFAULT_OFFSET_A 0
3322 #define GEMM_DEFAULT_OFFSET_B 0
3323 #define GEMM_DEFAULT_ALIGN 0x0ffffUL
3325 #define SGEMM_DEFAULT_UNROLL_N 2
3326 #define DGEMM_DEFAULT_UNROLL_N 2
3327 #define QGEMM_DEFAULT_UNROLL_N 2
3328 #define CGEMM_DEFAULT_UNROLL_N 2
3329 #define ZGEMM_DEFAULT_UNROLL_N 2
3330 #define XGEMM_DEFAULT_UNROLL_N 1
3333 #define SGEMM_DEFAULT_UNROLL_M 2
3334 #define DGEMM_DEFAULT_UNROLL_M 2
3335 #define QGEMM_DEFAULT_UNROLL_M 2
3336 #define CGEMM_DEFAULT_UNROLL_M 2
3337 #define ZGEMM_DEFAULT_UNROLL_M 2
3338 #define XGEMM_DEFAULT_UNROLL_M 1
3340 #define SGEMM_DEFAULT_UNROLL_M 2
3341 #define DGEMM_DEFAULT_UNROLL_M 2
3342 #define QGEMM_DEFAULT_UNROLL_M 2
3343 #define CGEMM_DEFAULT_UNROLL_M 2
3344 #define ZGEMM_DEFAULT_UNROLL_M 2
3345 #define XGEMM_DEFAULT_UNROLL_M 1
3348 #define SGEMM_DEFAULT_P sgemm_p
3349 #define DGEMM_DEFAULT_P dgemm_p
3350 #define QGEMM_DEFAULT_P qgemm_p
3351 #define CGEMM_DEFAULT_P cgemm_p
3352 #define ZGEMM_DEFAULT_P zgemm_p
3353 #define XGEMM_DEFAULT_P xgemm_p
3355 #define SGEMM_DEFAULT_R sgemm_r
3356 #define DGEMM_DEFAULT_R dgemm_r
3357 #define QGEMM_DEFAULT_R qgemm_r
3358 #define CGEMM_DEFAULT_R cgemm_r
3359 #define ZGEMM_DEFAULT_R zgemm_r
3360 #define XGEMM_DEFAULT_R xgemm_r
3362 #define SGEMM_DEFAULT_Q 128
3363 #define DGEMM_DEFAULT_Q 128
3364 #define QGEMM_DEFAULT_Q 128
3365 #define CGEMM_DEFAULT_Q 128
3366 #define ZGEMM_DEFAULT_Q 128
3367 #define XGEMM_DEFAULT_Q 128
3373 #ifndef QGEMM_DEFAULT_UNROLL_M
3374 #define QGEMM_DEFAULT_UNROLL_M 2
3377 #ifndef QGEMM_DEFAULT_UNROLL_N
3378 #define QGEMM_DEFAULT_UNROLL_N 2
3381 #ifndef XGEMM_DEFAULT_UNROLL_M
3382 #define XGEMM_DEFAULT_UNROLL_M 2
3385 #ifndef XGEMM_DEFAULT_UNROLL_N
3386 #define XGEMM_DEFAULT_UNROLL_N 2
3390 #define SHUFPD_0 shufps $0x44,
3391 #define SHUFPD_1 shufps $0x4e,
3392 #define SHUFPD_2 shufps $0xe4,
3393 #define SHUFPD_3 shufps $0xee,
3397 #define SHUFPD_0 shufpd $0,
3401 #define SHUFPD_1 shufpd $1,
3405 #define SHUFPD_2 shufpd $2,
3409 #define SHUFPD_3 shufpd $3,
3413 #define SHUFPS_39 shufps $0x39,