1 /*****************************************************************************
2 Copyright (c) 2011-2014, The OpenBLAS Project
5 Redistribution and use in source and binary forms, with or without
6 modification, are permitted provided that the following conditions are
9 1. Redistributions of source code must retain the above copyright
10 notice, this list of conditions and the following disclaimer.
12 2. Redistributions in binary form must reproduce the above copyright
13 notice, this list of conditions and the following disclaimer in
14 the documentation and/or other materials provided with the
16 3. Neither the name of the OpenBLAS project nor the names of
17 its contributors may be used to endorse or promote products
18 derived from this software without specific prior written
21 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
25 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
27 SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28 CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
29 OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
30 USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 **********************************************************************************/
34 /*********************************************************************/
35 /* Copyright 2009, 2010 The University of Texas at Austin. */
36 /* All rights reserved. */
38 /* Redistribution and use in source and binary forms, with or */
39 /* without modification, are permitted provided that the following */
40 /* conditions are met: */
42 /* 1. Redistributions of source code must retain the above */
43 /* copyright notice, this list of conditions and the following */
46 /* 2. Redistributions in binary form must reproduce the above */
47 /* copyright notice, this list of conditions and the following */
48 /* disclaimer in the documentation and/or other materials */
49 /* provided with the distribution. */
51 /* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
52 /* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
53 /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
54 /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
55 /* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
56 /* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
57 /* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
58 /* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
59 /* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
60 /* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
61 /* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
62 /* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
63 /* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
64 /* POSSIBILITY OF SUCH DAMAGE. */
66 /* The views and conclusions contained in the software and */
67 /* documentation are those of the authors and should not be */
68 /* interpreted as representing official policies, either expressed */
69 /* or implied, of The University of Texas at Austin. */
70 /*********************************************************************/
76 #define SBGEMM_DEFAULT_UNROLL_N 4
77 #define SBGEMM_DEFAULT_UNROLL_M 8
78 #define SBGEMM_DEFAULT_UNROLL_MN 32
79 #define SBGEMM_DEFAULT_P 256
80 #define SBGEMM_DEFAULT_R 256
81 #define SBGEMM_DEFAULT_Q 256
87 #define GEMM_DEFAULT_OFFSET_A 64
88 #define GEMM_DEFAULT_OFFSET_B 256
89 #define GEMM_DEFAULT_ALIGN (BLASLONG)0x01ffffUL
91 #define SGEMM_DEFAULT_UNROLL_N 4
92 #define DGEMM_DEFAULT_UNROLL_N 4
93 #define QGEMM_DEFAULT_UNROLL_N 2
94 #define CGEMM_DEFAULT_UNROLL_N 2
95 #define ZGEMM_DEFAULT_UNROLL_N 2
96 #define XGEMM_DEFAULT_UNROLL_N 1
99 #define SGEMM_DEFAULT_UNROLL_M 4
100 #define DGEMM_DEFAULT_UNROLL_M 2
101 #define QGEMM_DEFAULT_UNROLL_M 2
102 #define CGEMM_DEFAULT_UNROLL_M 2
103 #define ZGEMM_DEFAULT_UNROLL_M 1
104 #define XGEMM_DEFAULT_UNROLL_M 1
106 #define SGEMM_DEFAULT_UNROLL_M 8
107 #define DGEMM_DEFAULT_UNROLL_M 4
108 #define QGEMM_DEFAULT_UNROLL_M 2
109 #define CGEMM_DEFAULT_UNROLL_M 4
110 #define ZGEMM_DEFAULT_UNROLL_M 2
111 #define XGEMM_DEFAULT_UNROLL_M 1
114 #define SGEMM_DEFAULT_P sgemm_p
115 #define DGEMM_DEFAULT_P dgemm_p
116 #define QGEMM_DEFAULT_P qgemm_p
117 #define CGEMM_DEFAULT_P cgemm_p
118 #define ZGEMM_DEFAULT_P zgemm_p
119 #define XGEMM_DEFAULT_P xgemm_p
121 #define SGEMM_DEFAULT_R sgemm_r
122 #define DGEMM_DEFAULT_R dgemm_r
123 #define QGEMM_DEFAULT_R qgemm_r
124 #define CGEMM_DEFAULT_R cgemm_r
125 #define ZGEMM_DEFAULT_R zgemm_r
126 #define XGEMM_DEFAULT_R xgemm_r
130 #define SGEMM_DEFAULT_Q 248
131 #define DGEMM_DEFAULT_Q 248
132 #define QGEMM_DEFAULT_Q 248
133 #define CGEMM_DEFAULT_Q 248
134 #define ZGEMM_DEFAULT_Q 248
135 #define XGEMM_DEFAULT_Q 248
139 #define SGEMM_DEFAULT_Q 240
140 #define DGEMM_DEFAULT_Q 240
141 #define QGEMM_DEFAULT_Q 240
142 #define CGEMM_DEFAULT_Q 240
143 #define ZGEMM_DEFAULT_Q 240
144 #define XGEMM_DEFAULT_Q 240
150 #define HAVE_EXCLUSIVE_CACHE
154 #if defined(BARCELONA) || defined(SHANGHAI) || defined(BOBCAT)
159 #define GEMM_DEFAULT_OFFSET_A 64
160 #define GEMM_DEFAULT_OFFSET_B 832
161 #define GEMM_DEFAULT_ALIGN (BLASLONG)0x0fffUL
163 #define SGEMM_DEFAULT_UNROLL_N 4
164 #define DGEMM_DEFAULT_UNROLL_N 4
165 #define QGEMM_DEFAULT_UNROLL_N 2
166 #define CGEMM_DEFAULT_UNROLL_N 2
167 #define ZGEMM_DEFAULT_UNROLL_N 2
168 #define XGEMM_DEFAULT_UNROLL_N 1
171 #define SGEMM_DEFAULT_UNROLL_M 4
172 #define DGEMM_DEFAULT_UNROLL_M 2
173 #define QGEMM_DEFAULT_UNROLL_M 2
174 #define CGEMM_DEFAULT_UNROLL_M 2
175 #define ZGEMM_DEFAULT_UNROLL_M 1
176 #define XGEMM_DEFAULT_UNROLL_M 1
178 #define SGEMM_DEFAULT_UNROLL_M 8
179 #define DGEMM_DEFAULT_UNROLL_M 4
180 #define QGEMM_DEFAULT_UNROLL_M 2
181 #define CGEMM_DEFAULT_UNROLL_M 4
182 #define ZGEMM_DEFAULT_UNROLL_M 2
183 #define XGEMM_DEFAULT_UNROLL_M 1
187 #define SGEMM_DEFAULT_P 496
188 #define DGEMM_DEFAULT_P 248
189 #define QGEMM_DEFAULT_P 124
190 #define CGEMM_DEFAULT_P 248
191 #define ZGEMM_DEFAULT_P 124
192 #define XGEMM_DEFAULT_P 62
194 #define SGEMM_DEFAULT_Q 248
195 #define DGEMM_DEFAULT_Q 248
196 #define QGEMM_DEFAULT_Q 248
197 #define CGEMM_DEFAULT_Q 248
198 #define ZGEMM_DEFAULT_Q 248
199 #define XGEMM_DEFAULT_Q 248
203 #define SGEMM_DEFAULT_P 448
204 #define DGEMM_DEFAULT_P 224
205 #define QGEMM_DEFAULT_P 112
206 #define CGEMM_DEFAULT_P 224
207 #define ZGEMM_DEFAULT_P 112
208 #define XGEMM_DEFAULT_P 56
210 #define SGEMM_DEFAULT_Q 224
211 #define DGEMM_DEFAULT_Q 224
212 #define QGEMM_DEFAULT_Q 224
213 #define CGEMM_DEFAULT_Q 224
214 #define ZGEMM_DEFAULT_Q 224
215 #define XGEMM_DEFAULT_Q 224
219 #define SGEMM_DEFAULT_R sgemm_r
220 #define QGEMM_DEFAULT_R qgemm_r
221 #define DGEMM_DEFAULT_R dgemm_r
222 #define CGEMM_DEFAULT_R cgemm_r
223 #define ZGEMM_DEFAULT_R zgemm_r
224 #define XGEMM_DEFAULT_R xgemm_r
227 #define HAVE_EXCLUSIVE_CACHE
229 #define GEMM_THREAD gemm_thread_mn
239 #define GEMM_DEFAULT_OFFSET_A 64
240 #define GEMM_DEFAULT_OFFSET_B 832
241 #define GEMM_DEFAULT_ALIGN (BLASLONG)0x0fffUL
245 #define QGEMM_DEFAULT_UNROLL_N 2
246 #define CGEMM_DEFAULT_UNROLL_N 2
247 #define ZGEMM_DEFAULT_UNROLL_N 2
248 #define XGEMM_DEFAULT_UNROLL_N 1
251 #define SGEMM_DEFAULT_UNROLL_N 4
252 #define DGEMM_DEFAULT_UNROLL_N 4
253 #define SGEMM_DEFAULT_UNROLL_M 4
254 #define DGEMM_DEFAULT_UNROLL_M 2
255 #define QGEMM_DEFAULT_UNROLL_M 2
256 #define CGEMM_DEFAULT_UNROLL_M 2
257 #define ZGEMM_DEFAULT_UNROLL_M 1
258 #define XGEMM_DEFAULT_UNROLL_M 1
260 #define SGEMM_DEFAULT_UNROLL_N 2
261 #define DGEMM_DEFAULT_UNROLL_N 2
262 #define SGEMM_DEFAULT_UNROLL_M 16
263 #define DGEMM_DEFAULT_UNROLL_M 8
264 #define QGEMM_DEFAULT_UNROLL_M 2
265 #define CGEMM_DEFAULT_UNROLL_M 4
266 #define ZGEMM_DEFAULT_UNROLL_M 2
267 #define XGEMM_DEFAULT_UNROLL_M 1
268 #define CGEMM3M_DEFAULT_UNROLL_N 4
269 #define CGEMM3M_DEFAULT_UNROLL_M 8
270 #define ZGEMM3M_DEFAULT_UNROLL_N 4
271 #define ZGEMM3M_DEFAULT_UNROLL_M 4
273 #define DGEMM_DEFAULT_UNROLL_MN 16
274 #define GEMV_UNROLL 8
278 #if defined(ARCH_X86_64)
279 #define SGEMM_DEFAULT_P 768
280 #define DGEMM_DEFAULT_P 384
282 #define SGEMM_DEFAULT_P 448
283 #define DGEMM_DEFAULT_P 224
286 #define QGEMM_DEFAULT_P 112
287 #define CGEMM_DEFAULT_P 224
288 #define ZGEMM_DEFAULT_P 112
289 #define XGEMM_DEFAULT_P 56
291 #if defined(ARCH_X86_64)
292 #define SGEMM_DEFAULT_Q 168
293 #define DGEMM_DEFAULT_Q 168
295 #define SGEMM_DEFAULT_Q 224
296 #define DGEMM_DEFAULT_Q 224
299 #define QGEMM_DEFAULT_Q 224
300 #define CGEMM_DEFAULT_Q 224
301 #define ZGEMM_DEFAULT_Q 224
302 #define XGEMM_DEFAULT_Q 224
304 #define CGEMM3M_DEFAULT_P 448
305 #define ZGEMM3M_DEFAULT_P 224
306 #define XGEMM3M_DEFAULT_P 112
307 #define CGEMM3M_DEFAULT_Q 224
308 #define ZGEMM3M_DEFAULT_Q 224
309 #define XGEMM3M_DEFAULT_Q 224
310 #define CGEMM3M_DEFAULT_R 12288
311 #define ZGEMM3M_DEFAULT_R 12288
312 #define XGEMM3M_DEFAULT_R 12288
314 #define SGEMM_DEFAULT_R sgemm_r
315 #define QGEMM_DEFAULT_R qgemm_r
316 #define DGEMM_DEFAULT_R dgemm_r
317 #define CGEMM_DEFAULT_R cgemm_r
318 #define ZGEMM_DEFAULT_R zgemm_r
319 #define XGEMM_DEFAULT_R xgemm_r
322 #define HAVE_EXCLUSIVE_CACHE
324 #define GEMM_THREAD gemm_thread_mn
332 #define GEMM_DEFAULT_OFFSET_A 64
333 #define GEMM_DEFAULT_OFFSET_B 832
334 #define GEMM_DEFAULT_ALIGN (BLASLONG)0x0fffUL
338 #define QGEMM_DEFAULT_UNROLL_N 2
339 #define CGEMM_DEFAULT_UNROLL_N 2
340 #define ZGEMM_DEFAULT_UNROLL_N 2
341 #define XGEMM_DEFAULT_UNROLL_N 1
344 #define SGEMM_DEFAULT_UNROLL_N 4
345 #define DGEMM_DEFAULT_UNROLL_N 4
346 #define SGEMM_DEFAULT_UNROLL_M 4
347 #define DGEMM_DEFAULT_UNROLL_M 2
348 #define QGEMM_DEFAULT_UNROLL_M 2
349 #define CGEMM_DEFAULT_UNROLL_M 2
350 #define ZGEMM_DEFAULT_UNROLL_M 1
351 #define XGEMM_DEFAULT_UNROLL_M 1
353 #define SGEMM_DEFAULT_UNROLL_N 2
354 #define DGEMM_DEFAULT_UNROLL_N 2
355 #define SGEMM_DEFAULT_UNROLL_M 16
356 #define DGEMM_DEFAULT_UNROLL_M 8
357 #define QGEMM_DEFAULT_UNROLL_M 2
358 #define CGEMM_DEFAULT_UNROLL_M 4
359 #define ZGEMM_DEFAULT_UNROLL_M 2
360 #define XGEMM_DEFAULT_UNROLL_M 1
361 #define CGEMM3M_DEFAULT_UNROLL_N 4
362 #define CGEMM3M_DEFAULT_UNROLL_M 8
363 #define ZGEMM3M_DEFAULT_UNROLL_N 4
364 #define ZGEMM3M_DEFAULT_UNROLL_M 4
365 #define GEMV_UNROLL 8
368 #if defined(ARCH_X86_64)
369 #define SGEMM_DEFAULT_P 768
370 #define DGEMM_DEFAULT_P 768
371 #define ZGEMM_DEFAULT_P 384
372 #define CGEMM_DEFAULT_P 768
374 #define SGEMM_DEFAULT_P 448
375 #define DGEMM_DEFAULT_P 480
376 #define ZGEMM_DEFAULT_P 112
377 #define CGEMM_DEFAULT_P 224
379 #define QGEMM_DEFAULT_P 112
380 #define XGEMM_DEFAULT_P 56
382 #if defined(ARCH_X86_64)
383 #define SGEMM_DEFAULT_Q 192
384 #define DGEMM_DEFAULT_Q 168
385 #define ZGEMM_DEFAULT_Q 168
386 #define CGEMM_DEFAULT_Q 168
388 #define SGEMM_DEFAULT_Q 224
389 #define DGEMM_DEFAULT_Q 224
390 #define ZGEMM_DEFAULT_Q 224
391 #define CGEMM_DEFAULT_Q 224
393 #define QGEMM_DEFAULT_Q 224
394 #define XGEMM_DEFAULT_Q 224
396 #define CGEMM3M_DEFAULT_P 448
397 #define ZGEMM3M_DEFAULT_P 224
398 #define XGEMM3M_DEFAULT_P 112
399 #define CGEMM3M_DEFAULT_Q 224
400 #define ZGEMM3M_DEFAULT_Q 224
401 #define XGEMM3M_DEFAULT_Q 224
402 #define CGEMM3M_DEFAULT_R 12288
403 #define ZGEMM3M_DEFAULT_R 12288
404 #define XGEMM3M_DEFAULT_R 12288
406 #define SGEMM_DEFAULT_R 12288
407 #define QGEMM_DEFAULT_R qgemm_r
408 #define DGEMM_DEFAULT_R 12288
409 #define CGEMM_DEFAULT_R cgemm_r
410 #define ZGEMM_DEFAULT_R zgemm_r
411 #define XGEMM_DEFAULT_R xgemm_r
414 #define HAVE_EXCLUSIVE_CACHE
416 #define GEMM_THREAD gemm_thread_mn
424 #define GEMM_DEFAULT_OFFSET_A 64
425 #define GEMM_DEFAULT_OFFSET_B 832
426 #define GEMM_DEFAULT_ALIGN (BLASLONG)0x0fffUL
430 #define QGEMM_DEFAULT_UNROLL_N 2
431 #define CGEMM_DEFAULT_UNROLL_N 2
432 #define ZGEMM_DEFAULT_UNROLL_N 2
433 #define XGEMM_DEFAULT_UNROLL_N 1
436 #define SGEMM_DEFAULT_UNROLL_N 4
437 #define DGEMM_DEFAULT_UNROLL_N 4
438 #define SGEMM_DEFAULT_UNROLL_M 4
439 #define DGEMM_DEFAULT_UNROLL_M 2
440 #define QGEMM_DEFAULT_UNROLL_M 2
441 #define CGEMM_DEFAULT_UNROLL_M 2
442 #define ZGEMM_DEFAULT_UNROLL_M 1
443 #define XGEMM_DEFAULT_UNROLL_M 1
445 #define SGEMM_DEFAULT_UNROLL_N 2
446 #define DGEMM_DEFAULT_UNROLL_N 2
447 #define SGEMM_DEFAULT_UNROLL_M 16
448 #define DGEMM_DEFAULT_UNROLL_M 8
449 #define QGEMM_DEFAULT_UNROLL_M 2
450 #define CGEMM_DEFAULT_UNROLL_M 4
451 #define ZGEMM_DEFAULT_UNROLL_M 2
452 #define XGEMM_DEFAULT_UNROLL_M 1
453 #define CGEMM3M_DEFAULT_UNROLL_N 4
454 #define CGEMM3M_DEFAULT_UNROLL_M 8
455 #define ZGEMM3M_DEFAULT_UNROLL_N 4
456 #define ZGEMM3M_DEFAULT_UNROLL_M 4
457 #define GEMV_UNROLL 8
460 #if defined(ARCH_X86_64)
461 #define SGEMM_DEFAULT_P 768
462 #define DGEMM_DEFAULT_P 576
463 #define ZGEMM_DEFAULT_P 288
464 #define CGEMM_DEFAULT_P 576
466 #define SGEMM_DEFAULT_P 448
467 #define DGEMM_DEFAULT_P 480
468 #define ZGEMM_DEFAULT_P 112
469 #define CGEMM_DEFAULT_P 224
471 #define QGEMM_DEFAULT_P 112
472 #define XGEMM_DEFAULT_P 56
474 #if defined(ARCH_X86_64)
475 #define SGEMM_DEFAULT_Q 192
476 #define DGEMM_DEFAULT_Q 160
477 #define ZGEMM_DEFAULT_Q 160
478 #define CGEMM_DEFAULT_Q 160
480 #define SGEMM_DEFAULT_Q 224
481 #define DGEMM_DEFAULT_Q 224
482 #define ZGEMM_DEFAULT_Q 224
483 #define CGEMM_DEFAULT_Q 224
485 #define QGEMM_DEFAULT_Q 224
486 #define XGEMM_DEFAULT_Q 224
488 #define CGEMM3M_DEFAULT_P 448
489 #define ZGEMM3M_DEFAULT_P 224
490 #define XGEMM3M_DEFAULT_P 112
491 #define CGEMM3M_DEFAULT_Q 224
492 #define ZGEMM3M_DEFAULT_Q 224
493 #define XGEMM3M_DEFAULT_Q 224
494 #define CGEMM3M_DEFAULT_R 12288
495 #define ZGEMM3M_DEFAULT_R 12288
496 #define XGEMM3M_DEFAULT_R 12288
498 #define SGEMM_DEFAULT_R 12288
499 #define QGEMM_DEFAULT_R qgemm_r
500 #define DGEMM_DEFAULT_R 12288
501 #define CGEMM_DEFAULT_R cgemm_r
502 #define ZGEMM_DEFAULT_R zgemm_r
503 #define XGEMM_DEFAULT_R xgemm_r
506 #define HAVE_EXCLUSIVE_CACHE
508 #define GEMM_THREAD gemm_thread_mn
517 #define GEMM_DEFAULT_OFFSET_A 64
518 #define GEMM_DEFAULT_OFFSET_B 832
519 #define GEMM_DEFAULT_ALIGN (BLASLONG)0x0fffUL
523 #define QGEMM_DEFAULT_UNROLL_N 2
524 #define CGEMM_DEFAULT_UNROLL_N 2
525 #define ZGEMM_DEFAULT_UNROLL_N 2
526 #define XGEMM_DEFAULT_UNROLL_N 1
529 #define SGEMM_DEFAULT_UNROLL_N 4
530 #define DGEMM_DEFAULT_UNROLL_N 4
531 #define SGEMM_DEFAULT_UNROLL_M 4
532 #define DGEMM_DEFAULT_UNROLL_M 2
533 #define QGEMM_DEFAULT_UNROLL_M 2
534 #define CGEMM_DEFAULT_UNROLL_M 2
535 #define ZGEMM_DEFAULT_UNROLL_M 1
536 #define XGEMM_DEFAULT_UNROLL_M 1
538 #define SGEMM_DEFAULT_UNROLL_N 2
539 #define DGEMM_DEFAULT_UNROLL_N 2
540 #define SGEMM_DEFAULT_UNROLL_M 16
541 #define DGEMM_DEFAULT_UNROLL_M 8
542 #define QGEMM_DEFAULT_UNROLL_M 2
543 #define CGEMM_DEFAULT_UNROLL_M 4
544 #define ZGEMM_DEFAULT_UNROLL_M 2
545 #define XGEMM_DEFAULT_UNROLL_M 1
546 #define CGEMM3M_DEFAULT_UNROLL_N 4
547 #define CGEMM3M_DEFAULT_UNROLL_M 8
548 #define ZGEMM3M_DEFAULT_UNROLL_N 4
549 #define ZGEMM3M_DEFAULT_UNROLL_M 4
550 #define GEMV_UNROLL 8
553 #if defined(ARCH_X86_64)
554 #define SGEMM_DEFAULT_P 768
555 #define DGEMM_DEFAULT_P 576
556 #define ZGEMM_DEFAULT_P 288
557 #define CGEMM_DEFAULT_P 576
559 #define SGEMM_DEFAULT_P 448
560 #define DGEMM_DEFAULT_P 480
561 #define ZGEMM_DEFAULT_P 112
562 #define CGEMM_DEFAULT_P 224
564 #define QGEMM_DEFAULT_P 112
565 #define XGEMM_DEFAULT_P 56
567 #if defined(ARCH_X86_64)
568 #define SGEMM_DEFAULT_Q 192
569 #define DGEMM_DEFAULT_Q 160
570 #define ZGEMM_DEFAULT_Q 160
571 #define CGEMM_DEFAULT_Q 160
573 #define SGEMM_DEFAULT_Q 224
574 #define DGEMM_DEFAULT_Q 224
575 #define ZGEMM_DEFAULT_Q 224
576 #define CGEMM_DEFAULT_Q 224
578 #define QGEMM_DEFAULT_Q 224
579 #define XGEMM_DEFAULT_Q 224
581 #define CGEMM3M_DEFAULT_P 448
582 #define ZGEMM3M_DEFAULT_P 224
583 #define XGEMM3M_DEFAULT_P 112
584 #define CGEMM3M_DEFAULT_Q 224
585 #define ZGEMM3M_DEFAULT_Q 224
586 #define XGEMM3M_DEFAULT_Q 224
587 #define CGEMM3M_DEFAULT_R 12288
588 #define ZGEMM3M_DEFAULT_R 12288
589 #define XGEMM3M_DEFAULT_R 12288
591 #define SGEMM_DEFAULT_R 12288
592 #define QGEMM_DEFAULT_R qgemm_r
593 #define DGEMM_DEFAULT_R 12288
594 #define CGEMM_DEFAULT_R cgemm_r
595 #define ZGEMM_DEFAULT_R zgemm_r
596 #define XGEMM_DEFAULT_R xgemm_r
599 #define HAVE_EXCLUSIVE_CACHE
601 #define GEMM_THREAD gemm_thread_mn
609 #define GEMM_DEFAULT_OFFSET_A 0
610 #define GEMM_DEFAULT_OFFSET_B 0
611 #define GEMM_DEFAULT_ALIGN (BLASLONG)0x03fffUL
615 #define SWITCH_RATIO 16
619 #define SGEMM_DEFAULT_UNROLL_M 4
620 #define DGEMM_DEFAULT_UNROLL_M 2
621 #define QGEMM_DEFAULT_UNROLL_M 2
622 #define CGEMM_DEFAULT_UNROLL_M 2
623 #define ZGEMM_DEFAULT_UNROLL_M 1
624 #define XGEMM_DEFAULT_UNROLL_M 1
626 #define SGEMM_DEFAULT_UNROLL_N 4
627 #define DGEMM_DEFAULT_UNROLL_N 4
628 #define QGEMM_DEFAULT_UNROLL_N 2
629 #define CGEMM_DEFAULT_UNROLL_N 2
630 #define ZGEMM_DEFAULT_UNROLL_N 2
631 #define XGEMM_DEFAULT_UNROLL_N 1
635 #define SGEMM_DEFAULT_UNROLL_M 8
636 #define DGEMM_DEFAULT_UNROLL_M 4
637 #define QGEMM_DEFAULT_UNROLL_M 2
638 #define CGEMM_DEFAULT_UNROLL_M 8
639 #define ZGEMM_DEFAULT_UNROLL_M 4
640 #define XGEMM_DEFAULT_UNROLL_M 1
642 #define SGEMM_DEFAULT_UNROLL_N 4
643 #define DGEMM_DEFAULT_UNROLL_N 8
644 #define QGEMM_DEFAULT_UNROLL_N 2
645 #define CGEMM_DEFAULT_UNROLL_N 2
646 #define ZGEMM_DEFAULT_UNROLL_N 2
647 #define XGEMM_DEFAULT_UNROLL_N 1
649 #define SGEMM_DEFAULT_UNROLL_MN 32
650 #define DGEMM_DEFAULT_UNROLL_MN 32
656 #define SGEMM_DEFAULT_P 512
657 #define SGEMM_DEFAULT_R sgemm_r
658 #define DGEMM_DEFAULT_P 512
659 #define DGEMM_DEFAULT_R dgemm_r
660 #define QGEMM_DEFAULT_P 504
661 #define QGEMM_DEFAULT_R qgemm_r
662 #define CGEMM_DEFAULT_P 128
663 #define CGEMM_DEFAULT_R 1024
664 #define ZGEMM_DEFAULT_P 512
665 #define ZGEMM_DEFAULT_R zgemm_r
666 #define XGEMM_DEFAULT_P 252
667 #define XGEMM_DEFAULT_R xgemm_r
668 #define SGEMM_DEFAULT_Q 256
669 #define DGEMM_DEFAULT_Q 256
670 #define QGEMM_DEFAULT_Q 128
671 #define CGEMM_DEFAULT_Q 256
672 #define ZGEMM_DEFAULT_Q 192
673 #define XGEMM_DEFAULT_Q 128
677 #define SGEMM_DEFAULT_P 320
678 #define DGEMM_DEFAULT_P 512
679 #define CGEMM_DEFAULT_P 256
680 #define ZGEMM_DEFAULT_P 192
683 #define SGEMM_DEFAULT_Q 320
684 #define DGEMM_DEFAULT_Q 128
686 #define SGEMM_DEFAULT_Q 320
687 #define DGEMM_DEFAULT_Q 256
689 #define CGEMM_DEFAULT_Q 256
690 #define ZGEMM_DEFAULT_Q 192
692 #define SGEMM_DEFAULT_R sgemm_r
693 #define DGEMM_DEFAULT_R 13824
694 #define CGEMM_DEFAULT_R cgemm_r
695 #define ZGEMM_DEFAULT_R zgemm_r
697 #define QGEMM_DEFAULT_Q 128
698 #define QGEMM_DEFAULT_P 504
699 #define QGEMM_DEFAULT_R qgemm_r
700 #define XGEMM_DEFAULT_P 252
701 #define XGEMM_DEFAULT_R xgemm_r
702 #define XGEMM_DEFAULT_Q 128
704 #define CGEMM3M_DEFAULT_UNROLL_N 4
705 #define CGEMM3M_DEFAULT_UNROLL_M 8
706 #define ZGEMM3M_DEFAULT_UNROLL_N 4
707 #define ZGEMM3M_DEFAULT_UNROLL_M 4
709 #define CGEMM3M_DEFAULT_P 320
710 #define ZGEMM3M_DEFAULT_P 256
711 #define XGEMM3M_DEFAULT_P 112
712 #define CGEMM3M_DEFAULT_Q 320
713 #define ZGEMM3M_DEFAULT_Q 256
714 #define XGEMM3M_DEFAULT_Q 224
715 #define CGEMM3M_DEFAULT_R 12288
716 #define ZGEMM3M_DEFAULT_R 12288
717 #define XGEMM3M_DEFAULT_R 12288
728 #define GEMM_DEFAULT_OFFSET_A 0
729 #define GEMM_DEFAULT_OFFSET_B 384
730 #define GEMM_DEFAULT_ALIGN (BLASLONG)0x0ffffUL
732 #define SGEMM_DEFAULT_UNROLL_N 4
733 #define DGEMM_DEFAULT_UNROLL_N 4
734 #define QGEMM_DEFAULT_UNROLL_N 2
735 #define CGEMM_DEFAULT_UNROLL_N 2
736 #define ZGEMM_DEFAULT_UNROLL_N 2
737 #define XGEMM_DEFAULT_UNROLL_N 1
739 #define SGEMM_DEFAULT_UNROLL_M 2
740 #define DGEMM_DEFAULT_UNROLL_M 1
741 #define QGEMM_DEFAULT_UNROLL_M 2
742 #define CGEMM_DEFAULT_UNROLL_M 1
743 #define ZGEMM_DEFAULT_UNROLL_M 1
744 #define XGEMM_DEFAULT_UNROLL_M 1
746 #define SGEMM_DEFAULT_R sgemm_r
747 #define DGEMM_DEFAULT_R dgemm_r
748 #define QGEMM_DEFAULT_R qgemm_r
749 #define CGEMM_DEFAULT_R cgemm_r
750 #define ZGEMM_DEFAULT_R zgemm_r
751 #define XGEMM_DEFAULT_R xgemm_r
753 #define SGEMM_DEFAULT_P 208
754 #define DGEMM_DEFAULT_P 104
755 #define QGEMM_DEFAULT_P 56
756 #define CGEMM_DEFAULT_P 104
757 #define ZGEMM_DEFAULT_P 56
758 #define XGEMM_DEFAULT_P 28
760 #define SGEMM_DEFAULT_Q 208
761 #define DGEMM_DEFAULT_Q 208
762 #define QGEMM_DEFAULT_Q 208
763 #define CGEMM_DEFAULT_Q 208
764 #define ZGEMM_DEFAULT_Q 208
765 #define XGEMM_DEFAULT_Q 208
768 #define HAVE_EXCLUSIVE_CACHE
776 #define GEMM_DEFAULT_OFFSET_A 0
777 #define GEMM_DEFAULT_OFFSET_B 256
778 #define GEMM_DEFAULT_ALIGN (BLASLONG)0x0ffffUL
780 #define SGEMM_DEFAULT_UNROLL_N 4
781 #define DGEMM_DEFAULT_UNROLL_N 4
782 #define QGEMM_DEFAULT_UNROLL_N 2
783 #define CGEMM_DEFAULT_UNROLL_N 2
784 #define ZGEMM_DEFAULT_UNROLL_N 2
785 #define XGEMM_DEFAULT_UNROLL_N 1
787 #define SGEMM_DEFAULT_UNROLL_M 2
788 #define DGEMM_DEFAULT_UNROLL_M 1
789 #define QGEMM_DEFAULT_UNROLL_M 2
790 #define CGEMM_DEFAULT_UNROLL_M 1
791 #define ZGEMM_DEFAULT_UNROLL_M 1
792 #define XGEMM_DEFAULT_UNROLL_M 1
794 #define SGEMM_DEFAULT_R sgemm_r
795 #define DGEMM_DEFAULT_R dgemm_r
796 #define QGEMM_DEFAULT_R qgemm_r
797 #define CGEMM_DEFAULT_R cgemm_r
798 #define ZGEMM_DEFAULT_R zgemm_r
799 #define XGEMM_DEFAULT_R xgemm_r
801 #define SGEMM_DEFAULT_P 128
802 #define DGEMM_DEFAULT_P 128
803 #define QGEMM_DEFAULT_P 128
804 #define CGEMM_DEFAULT_P 128
805 #define ZGEMM_DEFAULT_P 128
806 #define XGEMM_DEFAULT_P 128
808 #define SGEMM_DEFAULT_Q 512
809 #define DGEMM_DEFAULT_Q 256
810 #define QGEMM_DEFAULT_Q 256
811 #define CGEMM_DEFAULT_Q 256
812 #define ZGEMM_DEFAULT_Q 128
813 #define XGEMM_DEFAULT_Q 128
823 #define GEMM_DEFAULT_OFFSET_A 64
824 #define GEMM_DEFAULT_OFFSET_B 256
825 #define GEMM_DEFAULT_ALIGN (BLASLONG)0x01ffffUL
828 #define SGEMM_DEFAULT_UNROLL_N 4
829 #define DGEMM_DEFAULT_UNROLL_N 4
830 #define QGEMM_DEFAULT_UNROLL_N 2
831 #define CGEMM_DEFAULT_UNROLL_N 2
832 #define ZGEMM_DEFAULT_UNROLL_N 2
833 #define XGEMM_DEFAULT_UNROLL_N 1
835 #define SGEMM_DEFAULT_UNROLL_M 4
836 #define DGEMM_DEFAULT_UNROLL_M 2
837 #define QGEMM_DEFAULT_UNROLL_M 2
838 #define CGEMM_DEFAULT_UNROLL_M 2
839 #define ZGEMM_DEFAULT_UNROLL_M 1
840 #define XGEMM_DEFAULT_UNROLL_M 1
842 #define SGEMM_DEFAULT_UNROLL_N 8
843 #define DGEMM_DEFAULT_UNROLL_N 4
844 #define QGEMM_DEFAULT_UNROLL_N 2
845 #define CGEMM_DEFAULT_UNROLL_N 4
846 #define ZGEMM_DEFAULT_UNROLL_N 2
847 #define XGEMM_DEFAULT_UNROLL_N 1
849 #define SGEMM_DEFAULT_UNROLL_M 4
850 #define DGEMM_DEFAULT_UNROLL_M 4
851 #define QGEMM_DEFAULT_UNROLL_M 2
852 #define CGEMM_DEFAULT_UNROLL_M 2
853 #define ZGEMM_DEFAULT_UNROLL_M 2
854 #define XGEMM_DEFAULT_UNROLL_M 1
857 #define SGEMM_DEFAULT_P 288
858 #define DGEMM_DEFAULT_P 288
859 #define QGEMM_DEFAULT_P 288
860 #define CGEMM_DEFAULT_P 288
861 #define ZGEMM_DEFAULT_P 288
862 #define XGEMM_DEFAULT_P 288
864 #define SGEMM_DEFAULT_R sgemm_r
865 #define DGEMM_DEFAULT_R dgemm_r
866 #define QGEMM_DEFAULT_R qgemm_r
867 #define CGEMM_DEFAULT_R cgemm_r
868 #define ZGEMM_DEFAULT_R zgemm_r
869 #define XGEMM_DEFAULT_R xgemm_r
871 #define SGEMM_DEFAULT_Q 256
872 #define DGEMM_DEFAULT_Q 128
873 #define QGEMM_DEFAULT_Q 64
874 #define CGEMM_DEFAULT_Q 128
875 #define ZGEMM_DEFAULT_Q 64
876 #define XGEMM_DEFAULT_Q 32
879 #define HAVE_EXCLUSIVE_CACHE
883 #if defined(PENTIUM) || defined(PENTIUM2) || defined(PENTIUM3)
892 #define GEMM_DEFAULT_OFFSET_A 0
893 #define GEMM_DEFAULT_OFFSET_B 0
894 #define GEMM_DEFAULT_ALIGN (BLASLONG)0x0ffffUL
897 #define SGEMM_DEFAULT_UNROLL_M 8
898 #define CGEMM_DEFAULT_UNROLL_M 4
900 #define SGEMM_DEFAULT_UNROLL_M 4
901 #define CGEMM_DEFAULT_UNROLL_M 2
903 #define DGEMM_DEFAULT_UNROLL_M 2
904 #define SGEMM_DEFAULT_UNROLL_N 2
905 #define DGEMM_DEFAULT_UNROLL_N 2
906 #define QGEMM_DEFAULT_UNROLL_M 2
907 #define QGEMM_DEFAULT_UNROLL_N 2
908 #define CGEMM_DEFAULT_UNROLL_N 1
909 #define ZGEMM_DEFAULT_UNROLL_M 1
910 #define ZGEMM_DEFAULT_UNROLL_N 1
911 #define XGEMM_DEFAULT_UNROLL_M 1
912 #define XGEMM_DEFAULT_UNROLL_N 1
914 #define SGEMM_DEFAULT_P sgemm_p
915 #define SGEMM_DEFAULT_Q 256
916 #define SGEMM_DEFAULT_R sgemm_r
918 #define DGEMM_DEFAULT_P dgemm_p
919 #define DGEMM_DEFAULT_Q 256
920 #define DGEMM_DEFAULT_R dgemm_r
922 #define QGEMM_DEFAULT_P qgemm_p
923 #define QGEMM_DEFAULT_Q 256
924 #define QGEMM_DEFAULT_R qgemm_r
926 #define CGEMM_DEFAULT_P cgemm_p
927 #define CGEMM_DEFAULT_Q 256
928 #define CGEMM_DEFAULT_R cgemm_r
930 #define ZGEMM_DEFAULT_P zgemm_p
931 #define ZGEMM_DEFAULT_Q 256
932 #define ZGEMM_DEFAULT_R zgemm_r
934 #define XGEMM_DEFAULT_P xgemm_p
935 #define XGEMM_DEFAULT_Q 256
936 #define XGEMM_DEFAULT_R xgemm_r
947 #define GEMM_DEFAULT_OFFSET_A 0
948 #define GEMM_DEFAULT_OFFSET_B 0
949 #define GEMM_DEFAULT_ALIGN (BLASLONG)0x0ffffUL
952 #define SGEMM_DEFAULT_UNROLL_M 4
953 #define SGEMM_DEFAULT_UNROLL_N 4
954 #define DGEMM_DEFAULT_UNROLL_M 2
955 #define DGEMM_DEFAULT_UNROLL_N 4
956 #define QGEMM_DEFAULT_UNROLL_M 2
957 #define QGEMM_DEFAULT_UNROLL_N 2
958 #define CGEMM_DEFAULT_UNROLL_M 2
959 #define CGEMM_DEFAULT_UNROLL_N 2
960 #define ZGEMM_DEFAULT_UNROLL_M 1
961 #define ZGEMM_DEFAULT_UNROLL_N 2
962 #define XGEMM_DEFAULT_UNROLL_M 1
963 #define XGEMM_DEFAULT_UNROLL_N 1
965 #define SGEMM_DEFAULT_UNROLL_M 8
966 #define SGEMM_DEFAULT_UNROLL_N 2
967 #define DGEMM_DEFAULT_UNROLL_M 2
968 #define DGEMM_DEFAULT_UNROLL_N 2
969 #define QGEMM_DEFAULT_UNROLL_M 2
970 #define QGEMM_DEFAULT_UNROLL_N 2
971 #define CGEMM_DEFAULT_UNROLL_M 4
972 #define CGEMM_DEFAULT_UNROLL_N 1
973 #define ZGEMM_DEFAULT_UNROLL_M 1
974 #define ZGEMM_DEFAULT_UNROLL_N 1
975 #define XGEMM_DEFAULT_UNROLL_M 1
976 #define XGEMM_DEFAULT_UNROLL_N 1
980 #define SGEMM_DEFAULT_P sgemm_p
981 #define SGEMM_DEFAULT_Q 256
982 #define SGEMM_DEFAULT_R sgemm_r
984 #define DGEMM_DEFAULT_P dgemm_p
985 #define DGEMM_DEFAULT_Q 256
986 #define DGEMM_DEFAULT_R dgemm_r
988 #define QGEMM_DEFAULT_P qgemm_p
989 #define QGEMM_DEFAULT_Q 256
990 #define QGEMM_DEFAULT_R qgemm_r
992 #define CGEMM_DEFAULT_P cgemm_p
993 #define CGEMM_DEFAULT_Q 256
994 #define CGEMM_DEFAULT_R cgemm_r
996 #define ZGEMM_DEFAULT_P zgemm_p
997 #define ZGEMM_DEFAULT_Q 256
998 #define ZGEMM_DEFAULT_R zgemm_r
1000 #define XGEMM_DEFAULT_P xgemm_p
1001 #define XGEMM_DEFAULT_Q 256
1002 #define XGEMM_DEFAULT_R xgemm_r
1007 #ifdef CORE_NORTHWOOD
1012 #define GEMM_DEFAULT_OFFSET_A 0
1013 #define GEMM_DEFAULT_OFFSET_B 32
1015 #define GEMM_DEFAULT_ALIGN (BLASLONG)0x0ffffUL
1019 #define SGEMM_DEFAULT_UNROLL_M 8
1020 #define DGEMM_DEFAULT_UNROLL_M 4
1021 #define QGEMM_DEFAULT_UNROLL_M 2
1022 #define CGEMM_DEFAULT_UNROLL_M 4
1023 #define ZGEMM_DEFAULT_UNROLL_M 2
1024 #define XGEMM_DEFAULT_UNROLL_M 1
1026 #define SGEMM_DEFAULT_UNROLL_N 2
1027 #define DGEMM_DEFAULT_UNROLL_N 2
1028 #define QGEMM_DEFAULT_UNROLL_N 2
1029 #define CGEMM_DEFAULT_UNROLL_N 1
1030 #define ZGEMM_DEFAULT_UNROLL_N 1
1031 #define XGEMM_DEFAULT_UNROLL_N 1
1033 #define SGEMM_DEFAULT_P sgemm_p
1034 #define SGEMM_DEFAULT_R sgemm_r
1036 #define DGEMM_DEFAULT_P dgemm_p
1037 #define DGEMM_DEFAULT_R dgemm_r
1039 #define QGEMM_DEFAULT_P qgemm_p
1040 #define QGEMM_DEFAULT_R qgemm_r
1042 #define CGEMM_DEFAULT_P cgemm_p
1043 #define CGEMM_DEFAULT_R cgemm_r
1045 #define ZGEMM_DEFAULT_P zgemm_p
1046 #define ZGEMM_DEFAULT_R zgemm_r
1048 #define XGEMM_DEFAULT_P xgemm_p
1049 #define XGEMM_DEFAULT_R xgemm_r
1051 #define SGEMM_DEFAULT_Q 128
1052 #define DGEMM_DEFAULT_Q 128
1053 #define QGEMM_DEFAULT_Q 128
1054 #define CGEMM_DEFAULT_Q 128
1055 #define ZGEMM_DEFAULT_Q 128
1056 #define XGEMM_DEFAULT_Q 128
1059 #ifdef CORE_PRESCOTT
1065 #define GEMM_DEFAULT_OFFSET_A 128
1066 #define GEMM_DEFAULT_OFFSET_B 192
1068 #define GEMM_DEFAULT_OFFSET_A 0
1069 #define GEMM_DEFAULT_OFFSET_B 256
1072 #define GEMM_DEFAULT_ALIGN (BLASLONG)0x0ffffUL
1077 #define SGEMM_DEFAULT_UNROLL_M 4
1078 #define DGEMM_DEFAULT_UNROLL_M 2
1079 #define QGEMM_DEFAULT_UNROLL_M 2
1080 #define CGEMM_DEFAULT_UNROLL_M 2
1081 #define ZGEMM_DEFAULT_UNROLL_M 1
1082 #define XGEMM_DEFAULT_UNROLL_M 1
1084 #define SGEMM_DEFAULT_UNROLL_M 8
1085 #define DGEMM_DEFAULT_UNROLL_M 4
1086 #define QGEMM_DEFAULT_UNROLL_M 2
1087 #define CGEMM_DEFAULT_UNROLL_M 4
1088 #define ZGEMM_DEFAULT_UNROLL_M 2
1089 #define XGEMM_DEFAULT_UNROLL_M 1
1092 #define SGEMM_DEFAULT_UNROLL_N 4
1093 #define DGEMM_DEFAULT_UNROLL_N 4
1094 #define QGEMM_DEFAULT_UNROLL_N 2
1095 #define CGEMM_DEFAULT_UNROLL_N 2
1096 #define ZGEMM_DEFAULT_UNROLL_N 2
1097 #define XGEMM_DEFAULT_UNROLL_N 1
1099 #define SGEMM_DEFAULT_P sgemm_p
1100 #define SGEMM_DEFAULT_R sgemm_r
1102 #define DGEMM_DEFAULT_P dgemm_p
1103 #define DGEMM_DEFAULT_R dgemm_r
1105 #define QGEMM_DEFAULT_P qgemm_p
1106 #define QGEMM_DEFAULT_R qgemm_r
1108 #define CGEMM_DEFAULT_P cgemm_p
1109 #define CGEMM_DEFAULT_R cgemm_r
1111 #define ZGEMM_DEFAULT_P zgemm_p
1112 #define ZGEMM_DEFAULT_R zgemm_r
1114 #define XGEMM_DEFAULT_P xgemm_p
1115 #define XGEMM_DEFAULT_R xgemm_r
1117 #define SGEMM_DEFAULT_Q 128
1118 #define DGEMM_DEFAULT_Q 128
1119 #define QGEMM_DEFAULT_Q 128
1120 #define CGEMM_DEFAULT_Q 128
1121 #define ZGEMM_DEFAULT_Q 128
1122 #define XGEMM_DEFAULT_Q 128
1130 #define GEMM_DEFAULT_OFFSET_A 448
1131 #define GEMM_DEFAULT_OFFSET_B 128
1132 #define GEMM_DEFAULT_ALIGN (BLASLONG)0x03fffUL
1136 #define SWITCH_RATIO 4
1139 #define SGEMM_DEFAULT_UNROLL_M 8
1140 #define DGEMM_DEFAULT_UNROLL_M 4
1141 #define QGEMM_DEFAULT_UNROLL_M 2
1142 #define CGEMM_DEFAULT_UNROLL_M 4
1143 #define ZGEMM_DEFAULT_UNROLL_M 2
1144 #define XGEMM_DEFAULT_UNROLL_M 1
1146 #define SGEMM_DEFAULT_UNROLL_N 2
1147 #define DGEMM_DEFAULT_UNROLL_N 2
1148 #define QGEMM_DEFAULT_UNROLL_N 2
1149 #define CGEMM_DEFAULT_UNROLL_N 1
1150 #define ZGEMM_DEFAULT_UNROLL_N 1
1151 #define XGEMM_DEFAULT_UNROLL_N 1
1153 #define MASK(a, b) ((((a) + (b) - 1) / (b)) * (b))
1156 #define SGEMM_DEFAULT_UNROLL_M 8
1157 #define DGEMM_DEFAULT_UNROLL_M 4
1158 #define QGEMM_DEFAULT_UNROLL_M 2
1159 #define CGEMM_DEFAULT_UNROLL_M 4
1160 #define ZGEMM_DEFAULT_UNROLL_M 2
1161 #define XGEMM_DEFAULT_UNROLL_M 1
1163 #define SGEMM_DEFAULT_UNROLL_N 4
1164 #define DGEMM_DEFAULT_UNROLL_N 4
1165 #define QGEMM_DEFAULT_UNROLL_N 2
1166 #define CGEMM_DEFAULT_UNROLL_N 2
1167 #define ZGEMM_DEFAULT_UNROLL_N 2
1168 #define XGEMM_DEFAULT_UNROLL_N 1
1171 #define SGEMM_DEFAULT_P sgemm_p
1172 #define SGEMM_DEFAULT_R sgemm_r
1174 #define DGEMM_DEFAULT_P dgemm_p
1175 #define DGEMM_DEFAULT_R dgemm_r
1177 #define QGEMM_DEFAULT_P qgemm_p
1178 #define QGEMM_DEFAULT_R qgemm_r
1180 #define CGEMM_DEFAULT_P cgemm_p
1181 #define CGEMM_DEFAULT_R cgemm_r
1183 #define ZGEMM_DEFAULT_P zgemm_p
1184 #define ZGEMM_DEFAULT_R zgemm_r
1186 #define XGEMM_DEFAULT_P xgemm_p
1187 #define XGEMM_DEFAULT_R xgemm_r
1189 #define SGEMM_DEFAULT_Q 256
1190 #define DGEMM_DEFAULT_Q 256
1191 #define QGEMM_DEFAULT_Q 256
1192 #define CGEMM_DEFAULT_Q 256
1193 #define ZGEMM_DEFAULT_Q 256
1194 #define XGEMM_DEFAULT_Q 256
1203 #define GEMM_DEFAULT_OFFSET_A 128
1204 #define GEMM_DEFAULT_OFFSET_B 0
1205 #define GEMM_DEFAULT_ALIGN (BLASLONG)0x03fffUL
1209 #define SWITCH_RATIO 4
1212 #define SGEMM_DEFAULT_UNROLL_M 4
1213 #define DGEMM_DEFAULT_UNROLL_M 2
1214 #define QGEMM_DEFAULT_UNROLL_M 2
1215 #define CGEMM_DEFAULT_UNROLL_M 2
1216 #define ZGEMM_DEFAULT_UNROLL_M 1
1217 #define XGEMM_DEFAULT_UNROLL_M 1
1219 #define SGEMM_DEFAULT_UNROLL_N 4
1220 #define DGEMM_DEFAULT_UNROLL_N 4
1221 #define QGEMM_DEFAULT_UNROLL_N 2
1222 #define CGEMM_DEFAULT_UNROLL_N 2
1223 #define ZGEMM_DEFAULT_UNROLL_N 2
1224 #define XGEMM_DEFAULT_UNROLL_N 1
1226 #define SGEMM_DEFAULT_UNROLL_M 8
1227 #define DGEMM_DEFAULT_UNROLL_M 4
1228 #define QGEMM_DEFAULT_UNROLL_M 2
1229 #define CGEMM_DEFAULT_UNROLL_M 4
1230 #define ZGEMM_DEFAULT_UNROLL_M 2
1231 #define XGEMM_DEFAULT_UNROLL_M 1
1233 #define SGEMM_DEFAULT_UNROLL_N 4
1234 #define DGEMM_DEFAULT_UNROLL_N 4
1235 #define QGEMM_DEFAULT_UNROLL_N 2
1236 #define CGEMM_DEFAULT_UNROLL_N 2
1237 #define ZGEMM_DEFAULT_UNROLL_N 2
1238 #define XGEMM_DEFAULT_UNROLL_N 1
1241 #define SGEMM_DEFAULT_P sgemm_p
1242 #define SGEMM_DEFAULT_R sgemm_r
1244 #define DGEMM_DEFAULT_P dgemm_p
1245 #define DGEMM_DEFAULT_R dgemm_r
1247 #define QGEMM_DEFAULT_P qgemm_p
1248 #define QGEMM_DEFAULT_R qgemm_r
1250 #define CGEMM_DEFAULT_P cgemm_p
1251 #define CGEMM_DEFAULT_R cgemm_r
1253 #define ZGEMM_DEFAULT_P zgemm_p
1254 #define ZGEMM_DEFAULT_R zgemm_r
1256 #define XGEMM_DEFAULT_P xgemm_p
1257 #define XGEMM_DEFAULT_R xgemm_r
1259 #define SGEMM_DEFAULT_Q 512
1260 #define DGEMM_DEFAULT_Q 256
1261 #define QGEMM_DEFAULT_Q 128
1262 #define CGEMM_DEFAULT_Q 512
1263 #define ZGEMM_DEFAULT_Q 256
1264 #define XGEMM_DEFAULT_Q 128
1266 #define GETRF_FACTOR 0.75
1274 #define GEMM_DEFAULT_OFFSET_A 128
1275 #define GEMM_DEFAULT_OFFSET_B 0
1276 #define GEMM_DEFAULT_ALIGN (BLASLONG)0x03fffUL
1280 #define SWITCH_RATIO 4
1283 #define SGEMM_DEFAULT_UNROLL_M 4
1284 #define DGEMM_DEFAULT_UNROLL_M 2
1285 #define QGEMM_DEFAULT_UNROLL_M 2
1286 #define CGEMM_DEFAULT_UNROLL_M 2
1287 #define ZGEMM_DEFAULT_UNROLL_M 1
1288 #define XGEMM_DEFAULT_UNROLL_M 1
1290 #define SGEMM_DEFAULT_UNROLL_N 4
1291 #define DGEMM_DEFAULT_UNROLL_N 4
1292 #define QGEMM_DEFAULT_UNROLL_N 2
1293 #define CGEMM_DEFAULT_UNROLL_N 2
1294 #define ZGEMM_DEFAULT_UNROLL_N 2
1295 #define XGEMM_DEFAULT_UNROLL_N 1
1297 #define SGEMM_DEFAULT_UNROLL_M 8
1298 #define DGEMM_DEFAULT_UNROLL_M 4
1299 #define QGEMM_DEFAULT_UNROLL_M 2
1300 #define CGEMM_DEFAULT_UNROLL_M 4
1301 #define ZGEMM_DEFAULT_UNROLL_M 2
1302 #define XGEMM_DEFAULT_UNROLL_M 1
1304 #define SGEMM_DEFAULT_UNROLL_N 4
1305 #define DGEMM_DEFAULT_UNROLL_N 4
1306 #define QGEMM_DEFAULT_UNROLL_N 2
1307 #define CGEMM_DEFAULT_UNROLL_N 2
1308 #define ZGEMM_DEFAULT_UNROLL_N 2
1309 #define XGEMM_DEFAULT_UNROLL_N 1
1312 #define SGEMM_DEFAULT_P sgemm_p
1313 #define SGEMM_DEFAULT_R sgemm_r
1315 #define DGEMM_DEFAULT_P dgemm_p
1316 #define DGEMM_DEFAULT_R dgemm_r
1318 #define QGEMM_DEFAULT_P qgemm_p
1319 #define QGEMM_DEFAULT_R qgemm_r
1321 #define CGEMM_DEFAULT_P cgemm_p
1322 #define CGEMM_DEFAULT_R cgemm_r
1324 #define ZGEMM_DEFAULT_P zgemm_p
1325 #define ZGEMM_DEFAULT_R zgemm_r
1327 #define XGEMM_DEFAULT_P xgemm_p
1328 #define XGEMM_DEFAULT_R xgemm_r
1330 #define SGEMM_DEFAULT_Q 768
1331 #define DGEMM_DEFAULT_Q 384
1332 #define QGEMM_DEFAULT_Q 192
1333 #define CGEMM_DEFAULT_Q 768
1334 #define ZGEMM_DEFAULT_Q 384
1335 #define XGEMM_DEFAULT_Q 192
1337 #define GETRF_FACTOR 0.75
1338 #define GEMM_THREAD gemm_thread_mn
1346 #define GEMM_DEFAULT_OFFSET_A 32
1347 #define GEMM_DEFAULT_OFFSET_B 0
1348 #define GEMM_DEFAULT_ALIGN (BLASLONG)0x03fffUL
1352 #define SWITCH_RATIO 4
1355 #define SGEMM_DEFAULT_UNROLL_M 4
1356 #define DGEMM_DEFAULT_UNROLL_M 2
1357 #define QGEMM_DEFAULT_UNROLL_M 2
1358 #define CGEMM_DEFAULT_UNROLL_M 2
1359 #define ZGEMM_DEFAULT_UNROLL_M 1
1360 #define XGEMM_DEFAULT_UNROLL_M 1
1362 #define SGEMM_DEFAULT_UNROLL_N 4
1363 #define DGEMM_DEFAULT_UNROLL_N 4
1364 #define QGEMM_DEFAULT_UNROLL_N 2
1365 #define CGEMM_DEFAULT_UNROLL_N 2
1366 #define ZGEMM_DEFAULT_UNROLL_N 2
1367 #define XGEMM_DEFAULT_UNROLL_N 1
1369 #define SGEMM_DEFAULT_UNROLL_M 4
1370 #define DGEMM_DEFAULT_UNROLL_M 2
1371 #define QGEMM_DEFAULT_UNROLL_M 2
1372 #define CGEMM_DEFAULT_UNROLL_M 2
1373 #define ZGEMM_DEFAULT_UNROLL_M 1
1374 #define XGEMM_DEFAULT_UNROLL_M 1
1376 #define SGEMM_DEFAULT_UNROLL_N 8
1377 #define DGEMM_DEFAULT_UNROLL_N 8
1378 #define QGEMM_DEFAULT_UNROLL_N 2
1379 #define CGEMM_DEFAULT_UNROLL_N 4
1380 #define ZGEMM_DEFAULT_UNROLL_N 4
1381 #define XGEMM_DEFAULT_UNROLL_N 1
1384 #define SGEMM_DEFAULT_P 504
1385 #define SGEMM_DEFAULT_R sgemm_r
1387 #define DGEMM_DEFAULT_P 504
1388 #define DGEMM_DEFAULT_R dgemm_r
1390 #define QGEMM_DEFAULT_P 504
1391 #define QGEMM_DEFAULT_R qgemm_r
1393 #define CGEMM_DEFAULT_P 252
1394 #define CGEMM_DEFAULT_R cgemm_r
1396 #define ZGEMM_DEFAULT_P 252
1397 #define ZGEMM_DEFAULT_R zgemm_r
1399 #define XGEMM_DEFAULT_P 252
1400 #define XGEMM_DEFAULT_R xgemm_r
1402 #define SGEMM_DEFAULT_Q 512
1403 #define DGEMM_DEFAULT_Q 256
1404 #define QGEMM_DEFAULT_Q 128
1405 #define CGEMM_DEFAULT_Q 512
1406 #define ZGEMM_DEFAULT_Q 256
1407 #define XGEMM_DEFAULT_Q 128
1409 #define GETRF_FACTOR 0.72
1419 #define GEMM_DEFAULT_OFFSET_A 0
1420 #define GEMM_DEFAULT_OFFSET_B 0
1421 #define GEMM_DEFAULT_ALIGN (BLASLONG)0x03fffUL
1425 #define SWITCH_RATIO 4
1428 #define SGEMM_DEFAULT_UNROLL_M 4
1429 #define DGEMM_DEFAULT_UNROLL_M 2
1430 #define QGEMM_DEFAULT_UNROLL_M 2
1431 #define CGEMM_DEFAULT_UNROLL_M 2
1432 #define ZGEMM_DEFAULT_UNROLL_M 1
1433 #define XGEMM_DEFAULT_UNROLL_M 1
1435 #define SGEMM_DEFAULT_UNROLL_N 4
1436 #define DGEMM_DEFAULT_UNROLL_N 4
1437 #define QGEMM_DEFAULT_UNROLL_N 2
1438 #define CGEMM_DEFAULT_UNROLL_N 2
1439 #define ZGEMM_DEFAULT_UNROLL_N 2
1440 #define XGEMM_DEFAULT_UNROLL_N 1
1442 #define SGEMM_DEFAULT_UNROLL_M 16
1443 #define DGEMM_DEFAULT_UNROLL_M 8
1444 #define QGEMM_DEFAULT_UNROLL_M 2
1445 #define CGEMM_DEFAULT_UNROLL_M 8
1446 #define ZGEMM_DEFAULT_UNROLL_M 1
1447 #define XGEMM_DEFAULT_UNROLL_M 1
1449 #define SGEMM_DEFAULT_UNROLL_N 4
1450 #define DGEMM_DEFAULT_UNROLL_N 4
1451 #define QGEMM_DEFAULT_UNROLL_N 2
1452 #define CGEMM_DEFAULT_UNROLL_N 2
1453 #define ZGEMM_DEFAULT_UNROLL_N 4
1454 #define XGEMM_DEFAULT_UNROLL_N 1
1457 #define SGEMM_DEFAULT_P 768
1458 #define SGEMM_DEFAULT_R sgemm_r
1459 /*#define SGEMM_DEFAULT_R 1024*/
1461 #define DGEMM_DEFAULT_P 512
1462 #define DGEMM_DEFAULT_R dgemm_r
1463 /*#define DGEMM_DEFAULT_R 1024*/
1465 #define QGEMM_DEFAULT_P 504
1466 #define QGEMM_DEFAULT_R qgemm_r
1468 #define CGEMM_DEFAULT_P 768
1469 #define CGEMM_DEFAULT_R cgemm_r
1470 /*#define CGEMM_DEFAULT_R 1024*/
1472 #define ZGEMM_DEFAULT_P 512
1473 #define ZGEMM_DEFAULT_R zgemm_r
1474 /*#define ZGEMM_DEFAULT_R 1024*/
1476 #define XGEMM_DEFAULT_P 252
1477 #define XGEMM_DEFAULT_R xgemm_r
1479 #define SGEMM_DEFAULT_Q 384
1480 #define DGEMM_DEFAULT_Q 256
1481 #define QGEMM_DEFAULT_Q 128
1482 #define CGEMM_DEFAULT_Q 512
1483 #define ZGEMM_DEFAULT_Q 192
1484 #define XGEMM_DEFAULT_Q 128
1486 #define CGEMM3M_DEFAULT_UNROLL_N 8
1487 #define CGEMM3M_DEFAULT_UNROLL_M 4
1488 #define ZGEMM3M_DEFAULT_UNROLL_N 8
1489 #define ZGEMM3M_DEFAULT_UNROLL_M 2
1491 #define CGEMM3M_DEFAULT_P 448
1492 #define ZGEMM3M_DEFAULT_P 224
1493 #define XGEMM3M_DEFAULT_P 112
1494 #define CGEMM3M_DEFAULT_Q 224
1495 #define ZGEMM3M_DEFAULT_Q 224
1496 #define XGEMM3M_DEFAULT_Q 224
1497 #define CGEMM3M_DEFAULT_R 12288
1498 #define ZGEMM3M_DEFAULT_R 12288
1499 #define XGEMM3M_DEFAULT_R 12288
1503 #define GETRF_FACTOR 0.72
1512 #define GEMM_DEFAULT_OFFSET_A 0
1513 #define GEMM_DEFAULT_OFFSET_B 0
1514 #define GEMM_DEFAULT_ALIGN (BLASLONG)0x03fffUL
1518 #if defined(XDOUBLE) || defined(DOUBLE)
1519 #define SWITCH_RATIO 4
1520 #define GEMM_PREFERED_SIZE 4
1522 #define SWITCH_RATIO 8
1523 #define GEMM_PREFERED_SIZE 8
1528 #define SGEMM_DEFAULT_UNROLL_M 4
1529 #define DGEMM_DEFAULT_UNROLL_M 2
1530 #define QGEMM_DEFAULT_UNROLL_M 2
1531 #define CGEMM_DEFAULT_UNROLL_M 2
1532 #define ZGEMM_DEFAULT_UNROLL_M 1
1533 #define XGEMM_DEFAULT_UNROLL_M 1
1535 #define SGEMM_DEFAULT_UNROLL_N 4
1536 #define DGEMM_DEFAULT_UNROLL_N 4
1537 #define QGEMM_DEFAULT_UNROLL_N 2
1538 #define CGEMM_DEFAULT_UNROLL_N 2
1539 #define ZGEMM_DEFAULT_UNROLL_N 2
1540 #define XGEMM_DEFAULT_UNROLL_N 1
1544 #define SGEMM_DEFAULT_UNROLL_M 8
1545 #define DGEMM_DEFAULT_UNROLL_M 4
1546 #define QGEMM_DEFAULT_UNROLL_M 2
1547 #define CGEMM_DEFAULT_UNROLL_M 8
1548 #define ZGEMM_DEFAULT_UNROLL_M 4
1549 #define XGEMM_DEFAULT_UNROLL_M 1
1551 #define SGEMM_DEFAULT_UNROLL_N 4
1552 #define DGEMM_DEFAULT_UNROLL_N 8
1553 #define QGEMM_DEFAULT_UNROLL_N 2
1554 #define CGEMM_DEFAULT_UNROLL_N 2
1555 #define ZGEMM_DEFAULT_UNROLL_N 2
1556 #define XGEMM_DEFAULT_UNROLL_N 1
1558 #define SGEMM_DEFAULT_UNROLL_MN 32
1559 #define DGEMM_DEFAULT_UNROLL_MN 32
1565 #define SGEMM_DEFAULT_P 512
1566 #define SGEMM_DEFAULT_R sgemm_r
1567 #define DGEMM_DEFAULT_P 512
1568 #define DGEMM_DEFAULT_R dgemm_r
1569 #define QGEMM_DEFAULT_P 504
1570 #define QGEMM_DEFAULT_R qgemm_r
1571 #define CGEMM_DEFAULT_P 128
1572 #define CGEMM_DEFAULT_R 1024
1573 #define ZGEMM_DEFAULT_P 512
1574 #define ZGEMM_DEFAULT_R zgemm_r
1575 #define XGEMM_DEFAULT_P 252
1576 #define XGEMM_DEFAULT_R xgemm_r
1577 #define SGEMM_DEFAULT_Q 256
1578 #define DGEMM_DEFAULT_Q 256
1579 #define QGEMM_DEFAULT_Q 128
1580 #define CGEMM_DEFAULT_Q 256
1581 #define ZGEMM_DEFAULT_Q 192
1582 #define XGEMM_DEFAULT_Q 128
1586 #define SGEMM_DEFAULT_P 320
1587 #define DGEMM_DEFAULT_P 512
1588 #define CGEMM_DEFAULT_P 256
1589 #define ZGEMM_DEFAULT_P 192
1592 #define SGEMM_DEFAULT_Q 320
1593 #define DGEMM_DEFAULT_Q 128
1595 #define SGEMM_DEFAULT_Q 320
1596 #define DGEMM_DEFAULT_Q 256
1598 #define CGEMM_DEFAULT_Q 256
1599 #define ZGEMM_DEFAULT_Q 192
1601 #define SGEMM_DEFAULT_R sgemm_r
1602 #define DGEMM_DEFAULT_R 13824
1603 #define CGEMM_DEFAULT_R cgemm_r
1604 #define ZGEMM_DEFAULT_R zgemm_r
1606 #define QGEMM_DEFAULT_Q 128
1607 #define QGEMM_DEFAULT_P 504
1608 #define QGEMM_DEFAULT_R qgemm_r
1609 #define XGEMM_DEFAULT_P 252
1610 #define XGEMM_DEFAULT_R xgemm_r
1611 #define XGEMM_DEFAULT_Q 128
1613 #define CGEMM3M_DEFAULT_UNROLL_N 4
1614 #define CGEMM3M_DEFAULT_UNROLL_M 8
1615 #define ZGEMM3M_DEFAULT_UNROLL_N 4
1616 #define ZGEMM3M_DEFAULT_UNROLL_M 4
1618 #define CGEMM3M_DEFAULT_P 320
1619 #define ZGEMM3M_DEFAULT_P 256
1620 #define XGEMM3M_DEFAULT_P 112
1621 #define CGEMM3M_DEFAULT_Q 320
1622 #define ZGEMM3M_DEFAULT_Q 256
1623 #define XGEMM3M_DEFAULT_Q 224
1624 #define CGEMM3M_DEFAULT_R 12288
1625 #define ZGEMM3M_DEFAULT_R 12288
1626 #define XGEMM3M_DEFAULT_R 12288
1638 #define GEMM_DEFAULT_OFFSET_A 0
1639 #define GEMM_DEFAULT_OFFSET_B 0
1640 #define GEMM_DEFAULT_ALIGN (BLASLONG)0x03fffUL
1644 #if defined(XDOUBLE) || defined(DOUBLE)
1645 #define SWITCH_RATIO 8
1646 #define GEMM_PREFERED_SIZE 8
1648 #define SWITCH_RATIO 16
1649 #define GEMM_PREFERED_SIZE 16
1651 #define USE_SGEMM_KERNEL_DIRECT 1
1655 #define SGEMM_DEFAULT_UNROLL_M 4
1656 #define DGEMM_DEFAULT_UNROLL_M 2
1657 #define QGEMM_DEFAULT_UNROLL_M 2
1658 #define CGEMM_DEFAULT_UNROLL_M 2
1659 #define ZGEMM_DEFAULT_UNROLL_M 1
1660 #define XGEMM_DEFAULT_UNROLL_M 1
1662 #define SGEMM_DEFAULT_UNROLL_N 4
1663 #define DGEMM_DEFAULT_UNROLL_N 4
1664 #define QGEMM_DEFAULT_UNROLL_N 2
1665 #define CGEMM_DEFAULT_UNROLL_N 2
1666 #define ZGEMM_DEFAULT_UNROLL_N 2
1667 #define XGEMM_DEFAULT_UNROLL_N 1
1671 #define SGEMM_DEFAULT_UNROLL_M 16
1672 #ifndef DYNAMIC_ARCH
1673 #define DGEMM_DEFAULT_UNROLL_M 16
1675 #define DGEMM_DEFAULT_UNROLL_M 4
1677 #define QGEMM_DEFAULT_UNROLL_M 2
1678 #define CGEMM_DEFAULT_UNROLL_M 8
1679 #define ZGEMM_DEFAULT_UNROLL_M 4
1680 #define XGEMM_DEFAULT_UNROLL_M 1
1682 #define SGEMM_DEFAULT_UNROLL_N 4
1683 #ifndef DYNAMIC_ARCH
1684 #define DGEMM_DEFAULT_UNROLL_N 2
1686 #define DGEMM_DEFAULT_UNROLL_N 8
1688 #define QGEMM_DEFAULT_UNROLL_N 2
1689 #define CGEMM_DEFAULT_UNROLL_N 2
1690 #define ZGEMM_DEFAULT_UNROLL_N 2
1691 #define XGEMM_DEFAULT_UNROLL_N 1
1693 #define SGEMM_DEFAULT_UNROLL_MN 32
1694 #define DGEMM_DEFAULT_UNROLL_MN 32
1699 #define SGEMM_DEFAULT_P 512
1700 #define SGEMM_DEFAULT_R sgemm_r
1701 #define DGEMM_DEFAULT_P 512
1702 #define DGEMM_DEFAULT_R dgemm_r
1703 #define QGEMM_DEFAULT_P 504
1704 #define QGEMM_DEFAULT_R qgemm_r
1705 #define CGEMM_DEFAULT_P 128
1706 #define CGEMM_DEFAULT_R 1024
1707 #define ZGEMM_DEFAULT_P 512
1708 #define ZGEMM_DEFAULT_R zgemm_r
1709 #define XGEMM_DEFAULT_P 252
1710 #define XGEMM_DEFAULT_R xgemm_r
1711 #define SGEMM_DEFAULT_Q 256
1712 #define DGEMM_DEFAULT_Q 256
1713 #define QGEMM_DEFAULT_Q 128
1714 #define CGEMM_DEFAULT_Q 256
1715 #define ZGEMM_DEFAULT_Q 192
1716 #define XGEMM_DEFAULT_Q 128
1720 #define SGEMM_DEFAULT_P 448
1721 #define DGEMM_DEFAULT_P 192
1722 #define CGEMM_DEFAULT_P 384
1723 #define ZGEMM_DEFAULT_P 256
1725 #define SGEMM_DEFAULT_Q 448
1726 #define DGEMM_DEFAULT_Q 384
1727 #define CGEMM_DEFAULT_Q 192
1728 #define ZGEMM_DEFAULT_Q 128
1730 #define SGEMM_DEFAULT_R sgemm_r
1731 #define DGEMM_DEFAULT_R 8640
1732 #define CGEMM_DEFAULT_R cgemm_r
1733 #define ZGEMM_DEFAULT_R zgemm_r
1735 #define QGEMM_DEFAULT_Q 128
1736 #define QGEMM_DEFAULT_P 504
1737 #define QGEMM_DEFAULT_R qgemm_r
1738 #define XGEMM_DEFAULT_P 252
1739 #define XGEMM_DEFAULT_R xgemm_r
1740 #define XGEMM_DEFAULT_Q 128
1742 #define CGEMM3M_DEFAULT_UNROLL_N 4
1743 #define CGEMM3M_DEFAULT_UNROLL_M 8
1744 #define ZGEMM3M_DEFAULT_UNROLL_N 4
1745 #define ZGEMM3M_DEFAULT_UNROLL_M 4
1747 #define CGEMM3M_DEFAULT_P 320
1748 #define ZGEMM3M_DEFAULT_P 256
1749 #define XGEMM3M_DEFAULT_P 112
1750 #define CGEMM3M_DEFAULT_Q 320
1751 #define ZGEMM3M_DEFAULT_Q 256
1752 #define XGEMM3M_DEFAULT_Q 224
1753 #define CGEMM3M_DEFAULT_R 12288
1754 #define ZGEMM3M_DEFAULT_R 12288
1755 #define XGEMM3M_DEFAULT_R 12288
1762 #ifdef SAPPHIRERAPIDS
1767 #define GEMM_DEFAULT_OFFSET_A 0
1768 #define GEMM_DEFAULT_OFFSET_B 0
1769 #define GEMM_DEFAULT_ALIGN 0x03fffUL
1773 #if defined(XDOUBLE) || defined(DOUBLE)
1774 #define SWITCH_RATIO 8
1775 #define GEMM_PREFERED_SIZE 8
1777 #define SWITCH_RATIO 16
1778 #define GEMM_PREFERED_SIZE 16
1780 #define USE_SGEMM_KERNEL_DIRECT 1
1782 #undef SBGEMM_DEFAULT_UNROLL_N
1783 #undef SBGEMM_DEFAULT_UNROLL_M
1784 #undef SBGEMM_DEFAULT_P
1785 #undef SBGEMM_DEFAULT_R
1786 #undef SBGEMM_DEFAULT_Q
1787 // FIXME: actually UNROLL_M = UNROLL_N = 16
1788 // If M and N is equal, OpenBLAS will reuse OCOPY as ICOPY.
1789 // But for AMX, they are not the same, set UNROLL_M = 32 to workaround
1790 #define SBGEMM_DEFAULT_UNROLL_N 16
1791 #define SBGEMM_DEFAULT_UNROLL_M 32
1792 #define SBGEMM_DEFAULT_P 256
1793 #define SBGEMM_DEFAULT_Q 1024
1794 #define SBGEMM_DEFAULT_R sbgemm_r
1798 #define SGEMM_DEFAULT_UNROLL_M 4
1799 #define DGEMM_DEFAULT_UNROLL_M 2
1800 #define QGEMM_DEFAULT_UNROLL_M 2
1801 #define CGEMM_DEFAULT_UNROLL_M 2
1802 #define ZGEMM_DEFAULT_UNROLL_M 1
1803 #define XGEMM_DEFAULT_UNROLL_M 1
1805 #define SGEMM_DEFAULT_UNROLL_N 4
1806 #define DGEMM_DEFAULT_UNROLL_N 4
1807 #define QGEMM_DEFAULT_UNROLL_N 2
1808 #define CGEMM_DEFAULT_UNROLL_N 2
1809 #define ZGEMM_DEFAULT_UNROLL_N 2
1810 #define XGEMM_DEFAULT_UNROLL_N 1
1814 #define SGEMM_DEFAULT_UNROLL_M 16
1815 #define DGEMM_DEFAULT_UNROLL_M 16
1816 #define QGEMM_DEFAULT_UNROLL_M 2
1817 #define CGEMM_DEFAULT_UNROLL_M 8
1818 #define ZGEMM_DEFAULT_UNROLL_M 4
1819 #define XGEMM_DEFAULT_UNROLL_M 1
1821 #define SGEMM_DEFAULT_UNROLL_N 4
1822 #define DGEMM_DEFAULT_UNROLL_N 2
1823 #define QGEMM_DEFAULT_UNROLL_N 2
1824 #define CGEMM_DEFAULT_UNROLL_N 2
1825 #define ZGEMM_DEFAULT_UNROLL_N 2
1826 #define XGEMM_DEFAULT_UNROLL_N 1
1828 #define SGEMM_DEFAULT_UNROLL_MN 32
1829 #define DGEMM_DEFAULT_UNROLL_MN 32
1834 #define SGEMM_DEFAULT_P 512
1835 #define SGEMM_DEFAULT_R sgemm_r
1836 #define DGEMM_DEFAULT_P 512
1837 #define DGEMM_DEFAULT_R dgemm_r
1838 #define QGEMM_DEFAULT_P 504
1839 #define QGEMM_DEFAULT_R qgemm_r
1840 #define CGEMM_DEFAULT_P 128
1841 #define CGEMM_DEFAULT_R 1024
1842 #define ZGEMM_DEFAULT_P 512
1843 #define ZGEMM_DEFAULT_R zgemm_r
1844 #define XGEMM_DEFAULT_P 252
1845 #define XGEMM_DEFAULT_R xgemm_r
1846 #define SGEMM_DEFAULT_Q 256
1847 #define DGEMM_DEFAULT_Q 256
1848 #define QGEMM_DEFAULT_Q 128
1849 #define CGEMM_DEFAULT_Q 256
1850 #define ZGEMM_DEFAULT_Q 192
1851 #define XGEMM_DEFAULT_Q 128
1855 #define SGEMM_DEFAULT_P 640
1856 #define DGEMM_DEFAULT_P 192
1857 #define CGEMM_DEFAULT_P 384
1858 #define ZGEMM_DEFAULT_P 256
1860 #define SGEMM_DEFAULT_Q 320
1861 #define DGEMM_DEFAULT_Q 384
1862 #define CGEMM_DEFAULT_Q 192
1863 #define ZGEMM_DEFAULT_Q 128
1865 #define SGEMM_DEFAULT_R sgemm_r
1866 #define DGEMM_DEFAULT_R 8640
1867 #define CGEMM_DEFAULT_R cgemm_r
1868 #define ZGEMM_DEFAULT_R zgemm_r
1870 #define QGEMM_DEFAULT_Q 128
1871 #define QGEMM_DEFAULT_P 504
1872 #define QGEMM_DEFAULT_R qgemm_r
1873 #define XGEMM_DEFAULT_P 252
1874 #define XGEMM_DEFAULT_R xgemm_r
1875 #define XGEMM_DEFAULT_Q 128
1877 #define CGEMM3M_DEFAULT_UNROLL_N 4
1878 #define CGEMM3M_DEFAULT_UNROLL_M 8
1879 #define ZGEMM3M_DEFAULT_UNROLL_N 4
1880 #define ZGEMM3M_DEFAULT_UNROLL_M 4
1882 #define CGEMM3M_DEFAULT_P 320
1883 #define ZGEMM3M_DEFAULT_P 256
1884 #define XGEMM3M_DEFAULT_P 112
1885 #define CGEMM3M_DEFAULT_Q 320
1886 #define ZGEMM3M_DEFAULT_Q 256
1887 #define XGEMM3M_DEFAULT_Q 224
1888 #define CGEMM3M_DEFAULT_R 12288
1889 #define ZGEMM3M_DEFAULT_R 12288
1890 #define XGEMM3M_DEFAULT_R 12288
1900 #define GEMM_DEFAULT_OFFSET_A 0
1901 #define GEMM_DEFAULT_OFFSET_B 0
1902 #define GEMM_DEFAULT_ALIGN 0x03fffUL
1906 #if defined(XDOUBLE) || defined(DOUBLE)
1907 #define SWITCH_RATIO 8
1908 #define GEMM_PREFERED_SIZE 8
1910 #define SWITCH_RATIO 16
1911 #define GEMM_PREFERED_SIZE 16
1913 #define USE_SGEMM_KERNEL_DIRECT 1
1915 #undef SBGEMM_DEFAULT_UNROLL_N
1916 #undef SBGEMM_DEFAULT_UNROLL_M
1917 #undef SBGEMM_DEFAULT_P
1918 #undef SBGEMM_DEFAULT_R
1919 #undef SBGEMM_DEFAULT_Q
1920 #define SBGEMM_DEFAULT_UNROLL_N 4
1921 #define SBGEMM_DEFAULT_UNROLL_M 16
1922 #define SBGEMM_DEFAULT_P 384
1923 #define SBGEMM_DEFAULT_Q 768
1924 #define SBGEMM_DEFAULT_R sbgemm_r
1928 #define SGEMM_DEFAULT_UNROLL_M 4
1929 #define DGEMM_DEFAULT_UNROLL_M 2
1930 #define QGEMM_DEFAULT_UNROLL_M 2
1931 #define CGEMM_DEFAULT_UNROLL_M 2
1932 #define ZGEMM_DEFAULT_UNROLL_M 1
1933 #define XGEMM_DEFAULT_UNROLL_M 1
1935 #define SGEMM_DEFAULT_UNROLL_N 4
1936 #define DGEMM_DEFAULT_UNROLL_N 4
1937 #define QGEMM_DEFAULT_UNROLL_N 2
1938 #define CGEMM_DEFAULT_UNROLL_N 2
1939 #define ZGEMM_DEFAULT_UNROLL_N 2
1940 #define XGEMM_DEFAULT_UNROLL_N 1
1944 #define SGEMM_DEFAULT_UNROLL_M 16
1945 #define DGEMM_DEFAULT_UNROLL_M 16
1946 #define QGEMM_DEFAULT_UNROLL_M 2
1947 #define CGEMM_DEFAULT_UNROLL_M 8
1948 #define ZGEMM_DEFAULT_UNROLL_M 4
1949 #define XGEMM_DEFAULT_UNROLL_M 1
1951 #define SGEMM_DEFAULT_UNROLL_N 4
1952 #define DGEMM_DEFAULT_UNROLL_N 2
1953 #define QGEMM_DEFAULT_UNROLL_N 2
1954 #define CGEMM_DEFAULT_UNROLL_N 2
1955 #define ZGEMM_DEFAULT_UNROLL_N 2
1956 #define XGEMM_DEFAULT_UNROLL_N 1
1958 #define SGEMM_DEFAULT_UNROLL_MN 32
1959 #define DGEMM_DEFAULT_UNROLL_MN 32
1964 #define SGEMM_DEFAULT_P 512
1965 #define SGEMM_DEFAULT_R sgemm_r
1966 #define DGEMM_DEFAULT_P 512
1967 #define DGEMM_DEFAULT_R dgemm_r
1968 #define QGEMM_DEFAULT_P 504
1969 #define QGEMM_DEFAULT_R qgemm_r
1970 #define CGEMM_DEFAULT_P 128
1971 #define CGEMM_DEFAULT_R 1024
1972 #define ZGEMM_DEFAULT_P 512
1973 #define ZGEMM_DEFAULT_R zgemm_r
1974 #define XGEMM_DEFAULT_P 252
1975 #define XGEMM_DEFAULT_R xgemm_r
1976 #define SGEMM_DEFAULT_Q 256
1977 #define DGEMM_DEFAULT_Q 256
1978 #define QGEMM_DEFAULT_Q 128
1979 #define CGEMM_DEFAULT_Q 256
1980 #define ZGEMM_DEFAULT_Q 192
1981 #define XGEMM_DEFAULT_Q 128
1985 #define SGEMM_DEFAULT_P 640
1986 #define DGEMM_DEFAULT_P 192
1987 #define CGEMM_DEFAULT_P 384
1988 #define ZGEMM_DEFAULT_P 256
1990 #define SGEMM_DEFAULT_Q 320
1991 #define DGEMM_DEFAULT_Q 384
1992 #define CGEMM_DEFAULT_Q 192
1993 #define ZGEMM_DEFAULT_Q 128
1995 #define SGEMM_DEFAULT_R sgemm_r
1996 #define DGEMM_DEFAULT_R 8640
1997 #define CGEMM_DEFAULT_R cgemm_r
1998 #define ZGEMM_DEFAULT_R zgemm_r
2000 #define QGEMM_DEFAULT_Q 128
2001 #define QGEMM_DEFAULT_P 504
2002 #define QGEMM_DEFAULT_R qgemm_r
2003 #define XGEMM_DEFAULT_P 252
2004 #define XGEMM_DEFAULT_R xgemm_r
2005 #define XGEMM_DEFAULT_Q 128
2007 #define CGEMM3M_DEFAULT_UNROLL_N 4
2008 #define CGEMM3M_DEFAULT_UNROLL_M 8
2009 #define ZGEMM3M_DEFAULT_UNROLL_N 4
2010 #define ZGEMM3M_DEFAULT_UNROLL_M 4
2012 #define CGEMM3M_DEFAULT_P 320
2013 #define ZGEMM3M_DEFAULT_P 256
2014 #define XGEMM3M_DEFAULT_P 112
2015 #define CGEMM3M_DEFAULT_Q 320
2016 #define ZGEMM3M_DEFAULT_Q 256
2017 #define XGEMM3M_DEFAULT_Q 224
2018 #define CGEMM3M_DEFAULT_R 12288
2019 #define ZGEMM3M_DEFAULT_R 12288
2020 #define XGEMM3M_DEFAULT_R 12288
2031 #define GEMM_DEFAULT_OFFSET_A 64
2032 #define GEMM_DEFAULT_OFFSET_B 0
2033 #define GEMM_DEFAULT_ALIGN (BLASLONG)0x0ffffUL
2038 #define SGEMM_DEFAULT_UNROLL_M 4
2039 #define DGEMM_DEFAULT_UNROLL_M 2
2040 #define QGEMM_DEFAULT_UNROLL_M 2
2041 #define CGEMM_DEFAULT_UNROLL_M 2
2042 #define ZGEMM_DEFAULT_UNROLL_M 1
2043 #define XGEMM_DEFAULT_UNROLL_M 1
2045 #define SGEMM_DEFAULT_UNROLL_M 8
2046 #define DGEMM_DEFAULT_UNROLL_M 4
2047 #define QGEMM_DEFAULT_UNROLL_M 2
2048 #define CGEMM_DEFAULT_UNROLL_M 4
2049 #define ZGEMM_DEFAULT_UNROLL_M 2
2050 #define XGEMM_DEFAULT_UNROLL_M 1
2053 #define SGEMM_DEFAULT_UNROLL_N 4
2054 #define DGEMM_DEFAULT_UNROLL_N 2
2055 #define QGEMM_DEFAULT_UNROLL_N 2
2056 #define CGEMM_DEFAULT_UNROLL_N 2
2057 #define ZGEMM_DEFAULT_UNROLL_N 1
2058 #define XGEMM_DEFAULT_UNROLL_N 1
2060 #define SGEMM_DEFAULT_P sgemm_p
2061 #define SGEMM_DEFAULT_R sgemm_r
2063 #define DGEMM_DEFAULT_P dgemm_p
2064 #define DGEMM_DEFAULT_R dgemm_r
2066 #define QGEMM_DEFAULT_P qgemm_p
2067 #define QGEMM_DEFAULT_R qgemm_r
2069 #define CGEMM_DEFAULT_P cgemm_p
2070 #define CGEMM_DEFAULT_R cgemm_r
2072 #define ZGEMM_DEFAULT_P zgemm_p
2073 #define ZGEMM_DEFAULT_R zgemm_r
2075 #define XGEMM_DEFAULT_P xgemm_p
2076 #define XGEMM_DEFAULT_R xgemm_r
2078 #define SGEMM_DEFAULT_Q 256
2079 #define DGEMM_DEFAULT_Q 256
2080 #define QGEMM_DEFAULT_Q 256
2081 #define CGEMM_DEFAULT_Q 256
2082 #define ZGEMM_DEFAULT_Q 256
2083 #define XGEMM_DEFAULT_Q 256
2093 #define GEMM_DEFAULT_OFFSET_A 0
2094 #define GEMM_DEFAULT_OFFSET_B 128
2095 #define GEMM_DEFAULT_ALIGN (BLASLONG)0x03fffUL
2097 #define SGEMM_DEFAULT_UNROLL_M 8
2098 #define SGEMM_DEFAULT_UNROLL_N 8
2099 #define DGEMM_DEFAULT_UNROLL_M 8
2100 #define DGEMM_DEFAULT_UNROLL_N 8
2101 #define QGEMM_DEFAULT_UNROLL_M 8
2102 #define QGEMM_DEFAULT_UNROLL_N 8
2103 #define CGEMM_DEFAULT_UNROLL_M 4
2104 #define CGEMM_DEFAULT_UNROLL_N 4
2105 #define ZGEMM_DEFAULT_UNROLL_M 4
2106 #define ZGEMM_DEFAULT_UNROLL_N 4
2107 #define XGEMM_DEFAULT_UNROLL_M 4
2108 #define XGEMM_DEFAULT_UNROLL_N 4
2110 #define SGEMM_DEFAULT_P sgemm_p
2111 #define DGEMM_DEFAULT_P dgemm_p
2112 #define QGEMM_DEFAULT_P qgemm_p
2113 #define CGEMM_DEFAULT_P cgemm_p
2114 #define ZGEMM_DEFAULT_P zgemm_p
2115 #define XGEMM_DEFAULT_P xgemm_p
2117 #define SGEMM_DEFAULT_Q 1024
2118 #define DGEMM_DEFAULT_Q 1024
2119 #define QGEMM_DEFAULT_Q 1024
2120 #define CGEMM_DEFAULT_Q 1024
2121 #define ZGEMM_DEFAULT_Q 1024
2122 #define XGEMM_DEFAULT_Q 1024
2124 #define SGEMM_DEFAULT_R sgemm_r
2125 #define DGEMM_DEFAULT_R dgemm_r
2126 #define QGEMM_DEFAULT_R qgemm_r
2127 #define CGEMM_DEFAULT_R cgemm_r
2128 #define ZGEMM_DEFAULT_R zgemm_r
2129 #define XGEMM_DEFAULT_R xgemm_r
2133 #define GETRF_FACTOR 0.65
2137 #if defined(EV4) || defined(EV5) || defined(EV6)
2147 #define GEMM_DEFAULT_OFFSET_A 512
2148 #define GEMM_DEFAULT_OFFSET_B 512
2149 #define GEMM_DEFAULT_ALIGN (BLASLONG)0x0ffffUL
2151 #define SGEMM_DEFAULT_UNROLL_M 4
2152 #define SGEMM_DEFAULT_UNROLL_N 4
2153 #define DGEMM_DEFAULT_UNROLL_M 4
2154 #define DGEMM_DEFAULT_UNROLL_N 4
2155 #define CGEMM_DEFAULT_UNROLL_M 2
2156 #define CGEMM_DEFAULT_UNROLL_N 2
2157 #define ZGEMM_DEFAULT_UNROLL_M 2
2158 #define ZGEMM_DEFAULT_UNROLL_N 2
2163 #define SGEMM_DEFAULT_P 32
2164 #define SGEMM_DEFAULT_Q 112
2165 #define SGEMM_DEFAULT_R 256
2167 #define DGEMM_DEFAULT_P 32
2168 #define DGEMM_DEFAULT_Q 56
2169 #define DGEMM_DEFAULT_R 256
2171 #define CGEMM_DEFAULT_P 32
2172 #define CGEMM_DEFAULT_Q 64
2173 #define CGEMM_DEFAULT_R 240
2175 #define ZGEMM_DEFAULT_P 32
2176 #define ZGEMM_DEFAULT_Q 32
2177 #define ZGEMM_DEFAULT_R 240
2181 #define SGEMM_DEFAULT_P 64
2182 #define SGEMM_DEFAULT_Q 256
2184 #define DGEMM_DEFAULT_P 64
2185 #define DGEMM_DEFAULT_Q 128
2187 #define CGEMM_DEFAULT_P 64
2188 #define CGEMM_DEFAULT_Q 128
2190 #define ZGEMM_DEFAULT_P 64
2191 #define ZGEMM_DEFAULT_Q 64
2195 #define SGEMM_DEFAULT_P 256
2196 #define SGEMM_DEFAULT_Q 512
2198 #define DGEMM_DEFAULT_P 256
2199 #define DGEMM_DEFAULT_Q 256
2201 #define CGEMM_DEFAULT_P 256
2202 #define CGEMM_DEFAULT_Q 256
2204 #define ZGEMM_DEFAULT_P 128
2205 #define ZGEMM_DEFAULT_Q 256
2215 #define GEMM_DEFAULT_OFFSET_A 0
2216 #define GEMM_DEFAULT_OFFSET_B 8192
2217 #define GEMM_DEFAULT_ALIGN (BLASLONG)0x0ffffUL
2219 #define SGEMM_DEFAULT_UNROLL_M 16
2220 #define SGEMM_DEFAULT_UNROLL_N 4
2221 #define DGEMM_DEFAULT_UNROLL_M 4
2222 #define DGEMM_DEFAULT_UNROLL_N 4
2223 #define CGEMM_DEFAULT_UNROLL_M 8
2224 #define CGEMM_DEFAULT_UNROLL_N 2
2225 #define ZGEMM_DEFAULT_UNROLL_M 2
2226 #define ZGEMM_DEFAULT_UNROLL_N 2
2228 #define SGEMM_DEFAULT_P 128
2229 #define DGEMM_DEFAULT_P 128
2230 #define CGEMM_DEFAULT_P 128
2231 #define ZGEMM_DEFAULT_P 128
2233 #define SGEMM_DEFAULT_Q 512
2234 #define DGEMM_DEFAULT_Q 256
2235 #define CGEMM_DEFAULT_Q 256
2236 #define ZGEMM_DEFAULT_Q 128
2242 #define GEMM_DEFAULT_OFFSET_A 0
2243 #define GEMM_DEFAULT_OFFSET_B 1024
2244 #define GEMM_DEFAULT_ALIGN 0x0ffffUL
2246 #define SGEMM_DEFAULT_UNROLL_M 16
2247 #define SGEMM_DEFAULT_UNROLL_N 4
2248 #define DGEMM_DEFAULT_UNROLL_M 4
2249 #define DGEMM_DEFAULT_UNROLL_N 4
2250 #define CGEMM_DEFAULT_UNROLL_M 2
2251 #define CGEMM_DEFAULT_UNROLL_N 2
2252 #define ZGEMM_DEFAULT_UNROLL_M 2
2253 #define ZGEMM_DEFAULT_UNROLL_N 2
2255 #define SGEMM_DEFAULT_P 256
2256 #define DGEMM_DEFAULT_P 128
2257 #define CGEMM_DEFAULT_P 128
2258 #define ZGEMM_DEFAULT_P 64
2260 #define SGEMM_DEFAULT_Q 256
2261 #define DGEMM_DEFAULT_Q 256
2262 #define CGEMM_DEFAULT_Q 256
2263 #define ZGEMM_DEFAULT_Q 256
2273 #define GEMM_DEFAULT_OFFSET_A 2688
2274 #define GEMM_DEFAULT_OFFSET_B 3072
2275 #define GEMM_DEFAULT_ALIGN 0x03fffUL
2277 #if defined(__BYTE_ORDER__)&&(__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
2278 #define SGEMM_DEFAULT_UNROLL_M 4
2280 #define SGEMM_DEFAULT_UNROLL_M 16
2282 #define SGEMM_DEFAULT_UNROLL_N 4
2283 #define DGEMM_DEFAULT_UNROLL_M 4
2284 #define DGEMM_DEFAULT_UNROLL_N 4
2285 #if defined(__BYTE_ORDER__)&&(__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
2286 #define CGEMM_DEFAULT_UNROLL_M 2
2288 #define CGEMM_DEFAULT_UNROLL_M 8
2290 #define CGEMM_DEFAULT_UNROLL_N 2
2291 #define ZGEMM_DEFAULT_UNROLL_M 2
2292 #define ZGEMM_DEFAULT_UNROLL_N 2
2294 #if defined(OS_LINUX) || defined(OS_DARWIN) || defined(OS_FREEBSD)
2295 #if L2_SIZE == 1024976
2296 #define SGEMM_DEFAULT_P 320
2297 #define DGEMM_DEFAULT_P 256
2298 #define CGEMM_DEFAULT_P 256
2299 #define ZGEMM_DEFAULT_P 256
2301 #define SGEMM_DEFAULT_P 176
2302 #define DGEMM_DEFAULT_P 176
2303 #define CGEMM_DEFAULT_P 176
2304 #define ZGEMM_DEFAULT_P 176
2308 #define SGEMM_DEFAULT_Q 512
2309 #define DGEMM_DEFAULT_Q 256
2310 #define CGEMM_DEFAULT_Q 256
2311 #define ZGEMM_DEFAULT_Q 128
2322 #define GEMM_DEFAULT_OFFSET_A (32 * 0)
2323 #define GEMM_DEFAULT_OFFSET_B (32 * 0)
2324 #define GEMM_DEFAULT_ALIGN 0x0ffffUL
2326 #define SGEMM_DEFAULT_UNROLL_M 4
2327 #define SGEMM_DEFAULT_UNROLL_N 4
2328 #define DGEMM_DEFAULT_UNROLL_M 4
2329 #define DGEMM_DEFAULT_UNROLL_N 4
2330 #define CGEMM_DEFAULT_UNROLL_M 2
2331 #define CGEMM_DEFAULT_UNROLL_N 2
2332 #define ZGEMM_DEFAULT_UNROLL_M 2
2333 #define ZGEMM_DEFAULT_UNROLL_N 2
2335 #define SGEMM_DEFAULT_P 512
2336 #define DGEMM_DEFAULT_P 512
2337 #define CGEMM_DEFAULT_P 512
2338 #define ZGEMM_DEFAULT_P 512
2340 #define SGEMM_DEFAULT_Q 1024
2341 #define DGEMM_DEFAULT_Q 512
2342 #define CGEMM_DEFAULT_Q 512
2343 #define ZGEMM_DEFAULT_Q 256
2345 #define SGEMM_DEFAULT_R SGEMM_DEFAULT_P
2346 #define DGEMM_DEFAULT_R DGEMM_DEFAULT_P
2347 #define CGEMM_DEFAULT_R CGEMM_DEFAULT_P
2348 #define ZGEMM_DEFAULT_R ZGEMM_DEFAULT_P
2358 #define GEMM_DEFAULT_OFFSET_A (32 * 0)
2359 #define GEMM_DEFAULT_OFFSET_B (32 * 0)
2360 #define GEMM_DEFAULT_ALIGN 0x0ffffUL
2362 #define SGEMM_DEFAULT_UNROLL_M 8
2363 #define SGEMM_DEFAULT_UNROLL_N 4
2364 #define DGEMM_DEFAULT_UNROLL_M 8
2365 #define DGEMM_DEFAULT_UNROLL_N 4
2366 #define CGEMM_DEFAULT_UNROLL_M 4
2367 #define CGEMM_DEFAULT_UNROLL_N 2
2368 #define ZGEMM_DEFAULT_UNROLL_M 4
2369 #define ZGEMM_DEFAULT_UNROLL_N 2
2371 #define SGEMM_DEFAULT_P 128
2372 #define DGEMM_DEFAULT_P 128
2373 #define CGEMM_DEFAULT_P 128
2374 #define ZGEMM_DEFAULT_P 128
2376 #define SGEMM_DEFAULT_Q 4096
2377 #define DGEMM_DEFAULT_Q 3072
2378 #define CGEMM_DEFAULT_Q 2048
2379 #define ZGEMM_DEFAULT_Q 1024
2381 #define SGEMM_DEFAULT_Q 512
2382 #define DGEMM_DEFAULT_Q 256
2383 #define CGEMM_DEFAULT_Q 256
2384 #define ZGEMM_DEFAULT_Q 128
2392 #if defined(POWER3) || defined(POWER4) || defined(POWER5)
2393 #define GEMM_DEFAULT_OFFSET_A 0
2394 #define GEMM_DEFAULT_OFFSET_B 2048
2395 #define GEMM_DEFAULT_ALIGN 0x0ffffUL
2397 #define SGEMM_DEFAULT_UNROLL_M 4
2398 #define SGEMM_DEFAULT_UNROLL_N 4
2399 #define DGEMM_DEFAULT_UNROLL_M 4
2400 #define DGEMM_DEFAULT_UNROLL_N 4
2401 #define CGEMM_DEFAULT_UNROLL_M 2
2402 #define CGEMM_DEFAULT_UNROLL_N 2
2403 #define ZGEMM_DEFAULT_UNROLL_M 2
2404 #define ZGEMM_DEFAULT_UNROLL_N 2
2411 #define SGEMM_DEFAULT_P 256
2412 #define SGEMM_DEFAULT_Q 432
2413 #define SGEMM_DEFAULT_R 1012
2415 #define DGEMM_DEFAULT_P 256
2416 #define DGEMM_DEFAULT_Q 216
2417 #define DGEMM_DEFAULT_R 1012
2419 #define CGEMM_DEFAULT_P 256
2420 #define CGEMM_DEFAULT_Q 104
2421 #define CGEMM_DEFAULT_R 1012
2423 #define ZGEMM_DEFAULT_P 256
2424 #define ZGEMM_DEFAULT_Q 104
2425 #define ZGEMM_DEFAULT_R 1012
2429 #ifdef ALLOC_HUGETLB
2430 #define SGEMM_DEFAULT_P 184
2431 #define DGEMM_DEFAULT_P 184
2432 #define CGEMM_DEFAULT_P 184
2433 #define ZGEMM_DEFAULT_P 184
2435 #define SGEMM_DEFAULT_P 144
2436 #define DGEMM_DEFAULT_P 144
2437 #define CGEMM_DEFAULT_P 144
2438 #define ZGEMM_DEFAULT_P 144
2441 #define SGEMM_DEFAULT_Q 256
2442 #define CGEMM_DEFAULT_Q 256
2443 #define DGEMM_DEFAULT_Q 256
2444 #define ZGEMM_DEFAULT_Q 256
2448 #ifdef ALLOC_HUGETLB
2449 #define SGEMM_DEFAULT_P 512
2450 #define DGEMM_DEFAULT_P 256
2451 #define CGEMM_DEFAULT_P 256
2452 #define ZGEMM_DEFAULT_P 128
2454 #define SGEMM_DEFAULT_P 320
2455 #define DGEMM_DEFAULT_P 160
2456 #define CGEMM_DEFAULT_P 160
2457 #define ZGEMM_DEFAULT_P 80
2460 #define SGEMM_DEFAULT_Q 256
2461 #define CGEMM_DEFAULT_Q 256
2462 #define DGEMM_DEFAULT_Q 256
2463 #define ZGEMM_DEFAULT_Q 256
2475 #define GEMM_DEFAULT_OFFSET_A 384
2476 #define GEMM_DEFAULT_OFFSET_B 1024
2477 #define GEMM_DEFAULT_ALIGN 0x03fffUL
2479 #define SGEMM_DEFAULT_UNROLL_M 4
2480 #define SGEMM_DEFAULT_UNROLL_N 4
2481 #define DGEMM_DEFAULT_UNROLL_M 4
2482 #define DGEMM_DEFAULT_UNROLL_N 4
2483 #define CGEMM_DEFAULT_UNROLL_M 2
2484 #define CGEMM_DEFAULT_UNROLL_N 4
2485 #define ZGEMM_DEFAULT_UNROLL_M 2
2486 #define ZGEMM_DEFAULT_UNROLL_N 4
2488 #define SGEMM_DEFAULT_P 992
2489 #define DGEMM_DEFAULT_P 480
2490 #define CGEMM_DEFAULT_P 488
2491 #define ZGEMM_DEFAULT_P 248
2493 #define SGEMM_DEFAULT_Q 504
2494 #define DGEMM_DEFAULT_Q 504
2495 #define CGEMM_DEFAULT_Q 400
2496 #define ZGEMM_DEFAULT_Q 400
2507 #define GEMM_DEFAULT_OFFSET_A 0
2508 #define GEMM_DEFAULT_OFFSET_B 65536
2510 #define GEMM_DEFAULT_ALIGN 0x0ffffUL
2511 #if defined(__32BIT__)
2512 #warning using BINARY32==POWER6
2513 #define SGEMM_DEFAULT_UNROLL_M 4
2514 #define SGEMM_DEFAULT_UNROLL_N 4
2515 #define DGEMM_DEFAULT_UNROLL_M 4
2516 #define DGEMM_DEFAULT_UNROLL_N 4
2517 #define CGEMM_DEFAULT_UNROLL_M 2
2518 #define CGEMM_DEFAULT_UNROLL_N 4
2519 #define ZGEMM_DEFAULT_UNROLL_M 2
2520 #define ZGEMM_DEFAULT_UNROLL_N 4
2522 #define SGEMM_DEFAULT_UNROLL_M 16
2523 #define SGEMM_DEFAULT_UNROLL_N 8
2524 #define DGEMM_DEFAULT_UNROLL_M 16
2525 #define DGEMM_DEFAULT_UNROLL_N 4
2526 #define CGEMM_DEFAULT_UNROLL_M 8
2527 #define CGEMM_DEFAULT_UNROLL_N 4
2528 #define ZGEMM_DEFAULT_UNROLL_M 8
2529 #define ZGEMM_DEFAULT_UNROLL_N 2
2531 #define SGEMM_DEFAULT_P 1280UL
2532 #define DGEMM_DEFAULT_P 640UL
2533 #define CGEMM_DEFAULT_P 640UL
2534 #define ZGEMM_DEFAULT_P 320UL
2536 #define SGEMM_DEFAULT_Q 640UL
2537 #define DGEMM_DEFAULT_Q 720UL
2538 #define CGEMM_DEFAULT_Q 640UL
2539 #define ZGEMM_DEFAULT_Q 640UL
2542 #define SGEMM_DEFAULT_R SGEMM_DEFAULT_P
2543 #define DGEMM_DEFAULT_R DGEMM_DEFAULT_P
2544 #define CGEMM_DEFAULT_R CGEMM_DEFAULT_P
2545 #define ZGEMM_DEFAULT_R ZGEMM_DEFAULT_P
2547 #define SGEMM_DEFAULT_R 4096
2548 #define DGEMM_DEFAULT_R 4096
2549 #define CGEMM_DEFAULT_R 4096
2550 #define ZGEMM_DEFAULT_R 4096
2561 #define GEMM_DEFAULT_OFFSET_A 0
2562 #define GEMM_DEFAULT_OFFSET_B 65536
2563 #define GEMM_DEFAULT_ALIGN 0x0ffffUL
2565 #define SWITCH_RATIO 16
2566 #define GEMM_PREFERED_SIZE 16
2568 #define SGEMM_DEFAULT_UNROLL_M 16
2569 #define SGEMM_DEFAULT_UNROLL_N 8
2570 #define DGEMM_DEFAULT_UNROLL_M 16
2571 #define DGEMM_DEFAULT_UNROLL_N 4
2572 #define CGEMM_DEFAULT_UNROLL_M 8
2573 #define CGEMM_DEFAULT_UNROLL_N 4
2574 #define ZGEMM_DEFAULT_UNROLL_M 8
2575 #define ZGEMM_DEFAULT_UNROLL_N 2
2577 #define SGEMM_DEFAULT_P 832
2578 #define DGEMM_DEFAULT_P 128
2579 #define CGEMM_DEFAULT_P 512
2580 #define ZGEMM_DEFAULT_P 256
2582 #define SGEMM_DEFAULT_Q 1026
2583 #define DGEMM_DEFAULT_Q 384
2584 #define CGEMM_DEFAULT_Q 1026
2585 #define ZGEMM_DEFAULT_Q 1026
2587 #define SGEMM_DEFAULT_R 4096
2588 #define DGEMM_DEFAULT_R 4096
2589 #define CGEMM_DEFAULT_R 4096
2590 #define ZGEMM_DEFAULT_R 4096
2596 #if defined(POWER10)
2600 #define GEMM_DEFAULT_OFFSET_A 0
2601 #define GEMM_DEFAULT_OFFSET_B 65536
2602 #define GEMM_DEFAULT_ALIGN 0x0ffffUL
2604 #define SWITCH_RATIO 16
2605 #define GEMM_PREFERED_SIZE 16
2607 #define SGEMM_DEFAULT_UNROLL_M 16
2608 #define SGEMM_DEFAULT_UNROLL_N 8
2609 #if defined(HAVE_GAS) && (HAVE_GAS == 1)
2610 #define DGEMM_DEFAULT_UNROLL_M 16
2611 #define DGEMM_DEFAULT_UNROLL_N 4
2613 #define DGEMM_DEFAULT_UNROLL_M 8
2614 #define DGEMM_DEFAULT_UNROLL_N 8
2616 #define CGEMM_DEFAULT_UNROLL_M 8
2617 #define CGEMM_DEFAULT_UNROLL_N 4
2618 #define ZGEMM_DEFAULT_UNROLL_M 8
2619 #define ZGEMM_DEFAULT_UNROLL_N 2
2621 #define SGEMM_DEFAULT_P 512
2622 #define DGEMM_DEFAULT_P 384
2623 #define CGEMM_DEFAULT_P 512
2624 #define ZGEMM_DEFAULT_P 256
2626 #define SGEMM_DEFAULT_Q 512
2627 #define DGEMM_DEFAULT_Q 512
2628 #define CGEMM_DEFAULT_Q 384
2629 #define ZGEMM_DEFAULT_Q 384
2631 #define SGEMM_DEFAULT_R 4096
2632 #define DGEMM_DEFAULT_R 4096
2633 #define CGEMM_DEFAULT_R 4096
2634 #define ZGEMM_DEFAULT_R 4096
2638 #undef SBGEMM_DEFAULT_UNROLL_N
2639 #undef SBGEMM_DEFAULT_UNROLL_M
2640 #undef SBGEMM_DEFAULT_P
2641 #undef SBGEMM_DEFAULT_R
2642 #undef SBGEMM_DEFAULT_Q
2643 #define SBGEMM_DEFAULT_UNROLL_M 16
2644 #define SBGEMM_DEFAULT_UNROLL_N 8
2645 #define SBGEMM_DEFAULT_P 832
2646 #define SBGEMM_DEFAULT_Q 1026
2647 #define SBGEMM_DEFAULT_R 4096
2650 #if defined(SPARC) && defined(V7)
2655 #define GEMM_DEFAULT_OFFSET_A 0
2656 #define GEMM_DEFAULT_OFFSET_B 2048
2657 #define GEMM_DEFAULT_ALIGN 0x03fffUL
2659 #define SGEMM_DEFAULT_UNROLL_M 2
2660 #define SGEMM_DEFAULT_UNROLL_N 8
2661 #define DGEMM_DEFAULT_UNROLL_M 2
2662 #define DGEMM_DEFAULT_UNROLL_N 8
2663 #define CGEMM_DEFAULT_UNROLL_M 1
2664 #define CGEMM_DEFAULT_UNROLL_N 4
2665 #define ZGEMM_DEFAULT_UNROLL_M 1
2666 #define ZGEMM_DEFAULT_UNROLL_N 4
2668 #define SGEMM_DEFAULT_P 256
2669 #define DGEMM_DEFAULT_P 256
2670 #define CGEMM_DEFAULT_P 256
2671 #define ZGEMM_DEFAULT_P 256
2673 #define SGEMM_DEFAULT_Q 512
2674 #define DGEMM_DEFAULT_Q 256
2675 #define CGEMM_DEFAULT_Q 256
2676 #define ZGEMM_DEFAULT_Q 128
2679 #define GEMM_THREAD gemm_thread_mn
2682 #if (defined(SPARC) && defined(V9)) || defined(__sparc_v9__)
2687 #define GEMM_DEFAULT_OFFSET_A 0
2688 #define GEMM_DEFAULT_OFFSET_B 2048
2689 #define GEMM_DEFAULT_ALIGN 0x03fffUL
2691 #define SGEMM_DEFAULT_UNROLL_M 4
2692 #define SGEMM_DEFAULT_UNROLL_N 4
2693 #define DGEMM_DEFAULT_UNROLL_M 4
2694 #define DGEMM_DEFAULT_UNROLL_N 4
2695 #define CGEMM_DEFAULT_UNROLL_M 2
2696 #define CGEMM_DEFAULT_UNROLL_N 2
2697 #define ZGEMM_DEFAULT_UNROLL_M 2
2698 #define ZGEMM_DEFAULT_UNROLL_N 2
2700 #define SGEMM_DEFAULT_P 512
2701 #define DGEMM_DEFAULT_P 512
2702 #define CGEMM_DEFAULT_P 512
2703 #define ZGEMM_DEFAULT_P 512
2705 #define SGEMM_DEFAULT_Q 1024
2706 #define DGEMM_DEFAULT_Q 512
2707 #define CGEMM_DEFAULT_Q 512
2708 #define ZGEMM_DEFAULT_Q 256
2718 #define GEMM_DEFAULT_OFFSET_A 0
2719 #define GEMM_DEFAULT_OFFSET_B 0
2720 #define GEMM_DEFAULT_ALIGN (BLASLONG)0x03fffUL
2722 #define SGEMM_DEFAULT_UNROLL_M 2
2723 #define SGEMM_DEFAULT_UNROLL_N 8
2724 #define DGEMM_DEFAULT_UNROLL_M 2
2725 #define DGEMM_DEFAULT_UNROLL_N 8
2726 #define CGEMM_DEFAULT_UNROLL_M 1
2727 #define CGEMM_DEFAULT_UNROLL_N 4
2728 #define ZGEMM_DEFAULT_UNROLL_M 1
2729 #define ZGEMM_DEFAULT_UNROLL_N 4
2731 #define SGEMM_DEFAULT_P 108
2732 #define DGEMM_DEFAULT_P 112
2733 #define CGEMM_DEFAULT_P 108
2734 #define ZGEMM_DEFAULT_P 112
2736 #define SGEMM_DEFAULT_Q 288
2737 #define DGEMM_DEFAULT_Q 144
2738 #define CGEMM_DEFAULT_Q 144
2739 #define ZGEMM_DEFAULT_Q 72
2741 #define SGEMM_DEFAULT_R 2000
2742 #define DGEMM_DEFAULT_R 2000
2743 #define CGEMM_DEFAULT_R 2000
2744 #define ZGEMM_DEFAULT_R 2000
2749 #if defined(LOONGSON3R4)
2753 #define GEMM_DEFAULT_OFFSET_A 0
2754 #define GEMM_DEFAULT_OFFSET_B 0
2755 #define GEMM_DEFAULT_ALIGN (BLASLONG)0x03fffUL
2758 #define SGEMM_DEFAULT_UNROLL_M 8
2759 #define SGEMM_DEFAULT_UNROLL_N 8
2761 #define DGEMM_DEFAULT_UNROLL_M 8
2762 #define DGEMM_DEFAULT_UNROLL_N 4
2764 #define CGEMM_DEFAULT_UNROLL_M 8
2765 #define CGEMM_DEFAULT_UNROLL_N 4
2767 #define ZGEMM_DEFAULT_UNROLL_M 4
2768 #define ZGEMM_DEFAULT_UNROLL_N 4
2770 #define SGEMM_DEFAULT_UNROLL_M 8
2771 #define SGEMM_DEFAULT_UNROLL_N 4
2773 #define DGEMM_DEFAULT_UNROLL_M 4
2774 #define DGEMM_DEFAULT_UNROLL_N 4
2776 #define CGEMM_DEFAULT_UNROLL_M 4
2777 #define CGEMM_DEFAULT_UNROLL_N 2
2779 #define ZGEMM_DEFAULT_UNROLL_M 2
2780 #define ZGEMM_DEFAULT_UNROLL_N 2
2783 #define SGEMM_DEFAULT_P 64
2784 #define DGEMM_DEFAULT_P 44
2785 #define CGEMM_DEFAULT_P 64
2786 #define ZGEMM_DEFAULT_P 32
2788 #define SGEMM_DEFAULT_Q 192
2789 #define DGEMM_DEFAULT_Q 92
2790 #define CGEMM_DEFAULT_Q 128
2791 #define ZGEMM_DEFAULT_Q 80
2793 #define SGEMM_DEFAULT_R 640
2794 #define DGEMM_DEFAULT_R dgemm_r
2795 #define CGEMM_DEFAULT_R 640
2796 #define ZGEMM_DEFAULT_R 640
2798 #define GEMM_OFFSET_A1 0x10000
2799 #define GEMM_OFFSET_B1 0x100000
2804 #if defined(LOONGSON3R3)
2805 ////Copy from SICORTEX
2809 #define GEMM_DEFAULT_OFFSET_A 0
2810 #define GEMM_DEFAULT_OFFSET_B 0
2811 #define GEMM_DEFAULT_ALIGN (BLASLONG)0x03fffUL
2813 #define SGEMM_DEFAULT_UNROLL_M 8
2814 #define SGEMM_DEFAULT_UNROLL_N 4
2816 #define DGEMM_DEFAULT_UNROLL_M 4
2817 #define DGEMM_DEFAULT_UNROLL_N 4
2819 #define CGEMM_DEFAULT_UNROLL_M 4
2820 #define CGEMM_DEFAULT_UNROLL_N 2
2822 #define ZGEMM_DEFAULT_UNROLL_M 2
2823 #define ZGEMM_DEFAULT_UNROLL_N 2
2825 #define SGEMM_DEFAULT_P 64
2826 #define DGEMM_DEFAULT_P 44
2827 #define CGEMM_DEFAULT_P 64
2828 #define ZGEMM_DEFAULT_P 32
2830 #define SGEMM_DEFAULT_Q 192
2831 #define DGEMM_DEFAULT_Q 92
2832 #define CGEMM_DEFAULT_Q 128
2833 #define ZGEMM_DEFAULT_Q 80
2835 #define SGEMM_DEFAULT_R 640
2836 #define DGEMM_DEFAULT_R dgemm_r
2837 #define CGEMM_DEFAULT_R 640
2838 #define ZGEMM_DEFAULT_R 640
2840 #define GEMM_OFFSET_A1 0x10000
2841 #define GEMM_OFFSET_B1 0x100000
2846 #if defined (LOONGSON3R5)
2850 #define GEMM_DEFAULT_OFFSET_A 0
2851 #define GEMM_DEFAULT_OFFSET_B 0
2852 #define GEMM_DEFAULT_ALIGN 0x0ffffUL
2854 #define SGEMM_DEFAULT_UNROLL_N 8
2855 #define DGEMM_DEFAULT_UNROLL_N 8
2856 #define QGEMM_DEFAULT_UNROLL_N 2
2857 #define CGEMM_DEFAULT_UNROLL_N 4
2858 #define ZGEMM_DEFAULT_UNROLL_N 4
2859 #define XGEMM_DEFAULT_UNROLL_N 1
2861 #define SGEMM_DEFAULT_UNROLL_M 2
2862 #define DGEMM_DEFAULT_UNROLL_M 2
2863 #define QGEMM_DEFAULT_UNROLL_M 2
2864 #define CGEMM_DEFAULT_UNROLL_M 1
2865 #define ZGEMM_DEFAULT_UNROLL_M 1
2866 #define XGEMM_DEFAULT_UNROLL_M 1
2868 #define SGEMM_DEFAULT_P sgemm_p
2869 #define DGEMM_DEFAULT_P dgemm_p
2870 #define QGEMM_DEFAULT_P qgemm_p
2871 #define CGEMM_DEFAULT_P cgemm_p
2872 #define ZGEMM_DEFAULT_P zgemm_p
2873 #define XGEMM_DEFAULT_P xgemm_p
2875 #define SGEMM_DEFAULT_R sgemm_r
2876 #define DGEMM_DEFAULT_R dgemm_r
2877 #define QGEMM_DEFAULT_R qgemm_r
2878 #define CGEMM_DEFAULT_R cgemm_r
2879 #define ZGEMM_DEFAULT_R zgemm_r
2880 #define XGEMM_DEFAULT_R xgemm_r
2882 #define SGEMM_DEFAULT_Q 128
2883 #define DGEMM_DEFAULT_Q 128
2884 #define QGEMM_DEFAULT_Q 128
2885 #define CGEMM_DEFAULT_Q 128
2886 #define ZGEMM_DEFAULT_Q 128
2887 #define XGEMM_DEFAULT_Q 128
2892 #if defined(P5600) || defined(MIPS1004K) || defined(MIPS24K) || defined(I6400) || defined(P6600) || defined(I6500)
2896 #define GEMM_DEFAULT_OFFSET_A 0
2897 #define GEMM_DEFAULT_OFFSET_B 0
2898 #define GEMM_DEFAULT_ALIGN (BLASLONG) 0x03fffUL
2900 #if defined(HAVE_MSA) && !defined(NO_MSA)
2901 #define SGEMM_DEFAULT_UNROLL_M 8
2902 #define SGEMM_DEFAULT_UNROLL_N 8
2904 #define DGEMM_DEFAULT_UNROLL_M 8
2905 #define DGEMM_DEFAULT_UNROLL_N 4
2907 #define CGEMM_DEFAULT_UNROLL_M 8
2908 #define CGEMM_DEFAULT_UNROLL_N 4
2910 #define ZGEMM_DEFAULT_UNROLL_M 4
2911 #define ZGEMM_DEFAULT_UNROLL_N 4
2913 #define SGEMM_DEFAULT_UNROLL_M 2
2914 #define SGEMM_DEFAULT_UNROLL_N 2
2916 #define DGEMM_DEFAULT_UNROLL_M 2
2917 #define DGEMM_DEFAULT_UNROLL_N 2
2919 #define CGEMM_DEFAULT_UNROLL_M 2
2920 #define CGEMM_DEFAULT_UNROLL_N 2
2922 #define ZGEMM_DEFAULT_UNROLL_M 2
2923 #define ZGEMM_DEFAULT_UNROLL_N 2
2926 #define SGEMM_DEFAULT_P 128
2927 #define DGEMM_DEFAULT_P 128
2928 #define CGEMM_DEFAULT_P 96
2929 #define ZGEMM_DEFAULT_P 64
2931 #define SGEMM_DEFAULT_Q 240
2932 #define DGEMM_DEFAULT_Q 120
2933 #define CGEMM_DEFAULT_Q 120
2934 #define ZGEMM_DEFAULT_Q 120
2936 #define SGEMM_DEFAULT_R 12288
2937 #define DGEMM_DEFAULT_R 8192
2938 #define CGEMM_DEFAULT_R 4096
2939 #define ZGEMM_DEFAULT_R 4096
2944 #ifdef RISCV64_GENERIC
2945 #define GEMM_DEFAULT_OFFSET_A 0
2946 #define GEMM_DEFAULT_OFFSET_B 0
2947 #define GEMM_DEFAULT_ALIGN (BLASLONG)0x03fffUL
2949 #define SGEMM_DEFAULT_UNROLL_M 2
2950 #define SGEMM_DEFAULT_UNROLL_N 2
2952 #define DGEMM_DEFAULT_UNROLL_M 2
2953 #define DGEMM_DEFAULT_UNROLL_N 2
2955 #define CGEMM_DEFAULT_UNROLL_M 2
2956 #define CGEMM_DEFAULT_UNROLL_N 2
2958 #define ZGEMM_DEFAULT_UNROLL_M 2
2959 #define ZGEMM_DEFAULT_UNROLL_N 2
2961 #define SGEMM_DEFAULT_P 128
2962 #define DGEMM_DEFAULT_P 128
2963 #define CGEMM_DEFAULT_P 96
2964 #define ZGEMM_DEFAULT_P 64
2966 #define SGEMM_DEFAULT_Q 240
2967 #define DGEMM_DEFAULT_Q 120
2968 #define CGEMM_DEFAULT_Q 120
2969 #define ZGEMM_DEFAULT_Q 120
2971 #define SGEMM_DEFAULT_R 12288
2972 #define DGEMM_DEFAULT_R 8192
2973 #define CGEMM_DEFAULT_R 4096
2974 #define ZGEMM_DEFAULT_R 4096
2978 #define GEMM_DEFAULT_OFFSET_A 0
2979 #define GEMM_DEFAULT_OFFSET_B 0
2984 #define GEMM_DEFAULT_OFFSET_A 0
2985 #define GEMM_DEFAULT_OFFSET_B 0
2986 #define GEMM_DEFAULT_ALIGN 0x03fffUL
2988 #define SGEMM_DEFAULT_UNROLL_M 16
2989 #define SGEMM_DEFAULT_UNROLL_N 4
2991 #define DGEMM_DEFAULT_UNROLL_M 8
2992 #define DGEMM_DEFAULT_UNROLL_N 4
2994 #define CGEMM_DEFAULT_UNROLL_M 2
2995 #define CGEMM_DEFAULT_UNROLL_N 2
2997 #define ZGEMM_DEFAULT_UNROLL_M 2
2998 #define ZGEMM_DEFAULT_UNROLL_N 2
3000 #define SGEMM_DEFAULT_P 160
3001 #define DGEMM_DEFAULT_P 160
3002 #define CGEMM_DEFAULT_P 96
3003 #define ZGEMM_DEFAULT_P 64
3005 #define SGEMM_DEFAULT_Q 240
3006 #define DGEMM_DEFAULT_Q 128
3007 #define CGEMM_DEFAULT_Q 120
3008 #define ZGEMM_DEFAULT_Q 120
3010 #define SGEMM_DEFAULT_R 12288
3011 #define DGEMM_DEFAULT_R 8192
3012 #define CGEMM_DEFAULT_R 4096
3013 #define ZGEMM_DEFAULT_R 4096
3017 #define GEMM_DEFAULT_OFFSET_A 0
3018 #define GEMM_DEFAULT_OFFSET_B 0
3026 #define GEMM_DEFAULT_OFFSET_A 0
3027 #define GEMM_DEFAULT_OFFSET_B 0
3028 #define GEMM_DEFAULT_ALIGN (BLASLONG)0x03fffUL
3030 #define SGEMM_DEFAULT_UNROLL_M 4
3031 #define SGEMM_DEFAULT_UNROLL_N 4
3033 #define DGEMM_DEFAULT_UNROLL_M 4
3034 #define DGEMM_DEFAULT_UNROLL_N 4
3036 #define CGEMM_DEFAULT_UNROLL_M 2
3037 #define CGEMM_DEFAULT_UNROLL_N 2
3039 #define ZGEMM_DEFAULT_UNROLL_M 2
3040 #define ZGEMM_DEFAULT_UNROLL_N 2
3042 #define SGEMM_DEFAULT_P 128
3043 #define DGEMM_DEFAULT_P 128
3044 #define CGEMM_DEFAULT_P 96
3045 #define ZGEMM_DEFAULT_P 64
3047 #define SGEMM_DEFAULT_Q 240
3048 #define DGEMM_DEFAULT_Q 120
3049 #define CGEMM_DEFAULT_Q 120
3050 #define ZGEMM_DEFAULT_Q 120
3052 #define SGEMM_DEFAULT_R 12288
3053 #define DGEMM_DEFAULT_R 8192
3054 #define CGEMM_DEFAULT_R 4096
3055 #define ZGEMM_DEFAULT_R 4096
3067 #define GEMM_DEFAULT_OFFSET_A 0
3068 #define GEMM_DEFAULT_OFFSET_B 0
3069 #define GEMM_DEFAULT_ALIGN (BLASLONG)0x03fffUL
3071 #define SGEMM_DEFAULT_UNROLL_M 4
3072 #define SGEMM_DEFAULT_UNROLL_N 2
3074 #define DGEMM_DEFAULT_UNROLL_M 4
3075 #define DGEMM_DEFAULT_UNROLL_N 2
3077 #define CGEMM_DEFAULT_UNROLL_M 2
3078 #define CGEMM_DEFAULT_UNROLL_N 2
3080 #define ZGEMM_DEFAULT_UNROLL_M 2
3081 #define ZGEMM_DEFAULT_UNROLL_N 2
3083 #define SGEMM_DEFAULT_P 128
3084 #define DGEMM_DEFAULT_P 128
3085 #define CGEMM_DEFAULT_P 96
3086 #define ZGEMM_DEFAULT_P 64
3088 #define SGEMM_DEFAULT_Q 240
3089 #define DGEMM_DEFAULT_Q 120
3090 #define CGEMM_DEFAULT_Q 120
3091 #define ZGEMM_DEFAULT_Q 120
3093 #define SGEMM_DEFAULT_R 12288
3094 #define DGEMM_DEFAULT_R 8192
3095 #define CGEMM_DEFAULT_R 4096
3096 #define ZGEMM_DEFAULT_R 4096
3102 /* Common ARMv8 parameters */
3108 #define GEMM_DEFAULT_OFFSET_A 0
3109 #define GEMM_DEFAULT_OFFSET_B 0
3111 /* Use explicit casting for win64 as LLP64 datamodel is used */
3112 #define GEMM_DEFAULT_ALIGN (BLASULONG)0x03fffUL
3114 #define GEMM_DEFAULT_ALIGN 0x03fffUL
3119 #if defined(CORTEXA57) || \
3120 defined(CORTEXA72) || defined(CORTEXA73) || \
3121 defined(FALKOR) || defined(TSV110) || defined(EMAG8180) || defined(VORTEX)
3123 #define SGEMM_DEFAULT_UNROLL_M 16
3124 #define SGEMM_DEFAULT_UNROLL_N 4
3126 #define DGEMM_DEFAULT_UNROLL_M 8
3127 #define DGEMM_DEFAULT_UNROLL_N 4
3129 #define CGEMM_DEFAULT_UNROLL_M 8
3130 #define CGEMM_DEFAULT_UNROLL_N 4
3132 #define ZGEMM_DEFAULT_UNROLL_M 4
3133 #define ZGEMM_DEFAULT_UNROLL_N 4
3135 /*FIXME: this should be using the cache size, but there is currently no easy way to
3136 query that on ARM. So if getarch counted more than 8 cores we simply assume the host
3137 is a big desktop or server with abundant cache rather than a phone or embedded device */
3138 #if NUM_CORES > 8 || defined(TSV110) || defined(EMAG8180) || defined(VORTEX)
3139 #define SGEMM_DEFAULT_P 512
3140 #define DGEMM_DEFAULT_P 256
3141 #define CGEMM_DEFAULT_P 256
3142 #define ZGEMM_DEFAULT_P 128
3144 #define SGEMM_DEFAULT_Q 1024
3145 #define DGEMM_DEFAULT_Q 512
3146 #define CGEMM_DEFAULT_Q 512
3147 #define ZGEMM_DEFAULT_Q 512
3149 #define SGEMM_DEFAULT_P 128
3150 #define DGEMM_DEFAULT_P 160
3151 #define CGEMM_DEFAULT_P 128
3152 #define ZGEMM_DEFAULT_P 128
3154 #define SGEMM_DEFAULT_Q 352
3155 #define DGEMM_DEFAULT_Q 128
3156 #define CGEMM_DEFAULT_Q 224
3157 #define ZGEMM_DEFAULT_Q 112
3160 #define SGEMM_DEFAULT_R 4096
3161 #define DGEMM_DEFAULT_R 4096
3162 #define CGEMM_DEFAULT_R 4096
3163 #define ZGEMM_DEFAULT_R 2048
3165 #elif defined(CORTEXA53) || defined(CORTEXA55)
3167 #define SGEMM_DEFAULT_UNROLL_M 8
3168 #define SGEMM_DEFAULT_UNROLL_N 8
3170 #define DGEMM_DEFAULT_UNROLL_M 4
3171 #define DGEMM_DEFAULT_UNROLL_N 4
3173 #define CGEMM_DEFAULT_UNROLL_M 8
3174 #define CGEMM_DEFAULT_UNROLL_N 4
3176 #define ZGEMM_DEFAULT_UNROLL_M 4
3177 #define ZGEMM_DEFAULT_UNROLL_N 4
3179 #define SGEMM_DEFAULT_P 256
3180 #define DGEMM_DEFAULT_P 160
3181 #define CGEMM_DEFAULT_P 128
3182 #define ZGEMM_DEFAULT_P 128
3184 #define SGEMM_DEFAULT_Q 256
3185 #define DGEMM_DEFAULT_Q 128
3186 #define CGEMM_DEFAULT_Q 224
3187 #define ZGEMM_DEFAULT_Q 112
3189 #define SGEMM_DEFAULT_R 4096
3190 #define DGEMM_DEFAULT_R 4096
3191 #define CGEMM_DEFAULT_R 4096
3192 #define ZGEMM_DEFAULT_R 2048
3194 #elif defined(THUNDERX)
3196 #define SGEMM_DEFAULT_UNROLL_M 4
3197 #define SGEMM_DEFAULT_UNROLL_N 4
3199 #define DGEMM_DEFAULT_UNROLL_M 2
3200 #define DGEMM_DEFAULT_UNROLL_N 2
3202 #define CGEMM_DEFAULT_UNROLL_M 2
3203 #define CGEMM_DEFAULT_UNROLL_N 2
3205 #define ZGEMM_DEFAULT_UNROLL_M 2
3206 #define ZGEMM_DEFAULT_UNROLL_N 2
3208 #define SGEMM_DEFAULT_P 128
3209 #define DGEMM_DEFAULT_P 128
3210 #define CGEMM_DEFAULT_P 96
3211 #define ZGEMM_DEFAULT_P 64
3213 #define SGEMM_DEFAULT_Q 240
3214 #define DGEMM_DEFAULT_Q 120
3215 #define CGEMM_DEFAULT_Q 120
3216 #define ZGEMM_DEFAULT_Q 120
3218 #define SGEMM_DEFAULT_R 12288
3219 #define DGEMM_DEFAULT_R 8192
3220 #define CGEMM_DEFAULT_R 4096
3221 #define ZGEMM_DEFAULT_R 4096
3223 #elif defined(THUNDERX2T99)
3225 #define SGEMM_DEFAULT_UNROLL_M 16
3226 #define SGEMM_DEFAULT_UNROLL_N 4
3228 #define DGEMM_DEFAULT_UNROLL_M 8
3229 #define DGEMM_DEFAULT_UNROLL_N 4
3231 #define CGEMM_DEFAULT_UNROLL_M 8
3232 #define CGEMM_DEFAULT_UNROLL_N 4
3234 #define ZGEMM_DEFAULT_UNROLL_M 4
3235 #define ZGEMM_DEFAULT_UNROLL_N 4
3237 #define SGEMM_DEFAULT_P 128
3238 #define DGEMM_DEFAULT_P 160
3239 #define CGEMM_DEFAULT_P 128
3240 #define ZGEMM_DEFAULT_P 128
3242 #define SGEMM_DEFAULT_Q 352
3243 #define DGEMM_DEFAULT_Q 128
3244 #define CGEMM_DEFAULT_Q 224
3245 #define ZGEMM_DEFAULT_Q 112
3247 #define SGEMM_DEFAULT_R 4096
3248 #define DGEMM_DEFAULT_R 4096
3249 #define CGEMM_DEFAULT_R 4096
3250 #define ZGEMM_DEFAULT_R 4096
3252 #elif defined(THUNDERX3T110)
3254 #define SGEMM_DEFAULT_UNROLL_M 16
3255 #define SGEMM_DEFAULT_UNROLL_N 4
3257 #define DGEMM_DEFAULT_UNROLL_M 8
3258 #define DGEMM_DEFAULT_UNROLL_N 4
3260 #define CGEMM_DEFAULT_UNROLL_M 8
3261 #define CGEMM_DEFAULT_UNROLL_N 4
3263 #define ZGEMM_DEFAULT_UNROLL_M 4
3264 #define ZGEMM_DEFAULT_UNROLL_N 4
3266 #define SGEMM_DEFAULT_P 128
3267 #define DGEMM_DEFAULT_P 320
3268 #define CGEMM_DEFAULT_P 128
3269 #define ZGEMM_DEFAULT_P 128
3271 #define SGEMM_DEFAULT_Q 352
3272 #define DGEMM_DEFAULT_Q 128
3273 #define CGEMM_DEFAULT_Q 224
3274 #define ZGEMM_DEFAULT_Q 112
3276 #define SGEMM_DEFAULT_R 4096
3277 #define DGEMM_DEFAULT_R 4096
3278 #define CGEMM_DEFAULT_R 4096
3279 #define ZGEMM_DEFAULT_R 4096
3281 #elif defined(NEOVERSEN1)
3283 #define SGEMM_DEFAULT_UNROLL_M 16
3284 #define SGEMM_DEFAULT_UNROLL_N 4
3286 #define DGEMM_DEFAULT_UNROLL_M 8
3287 #define DGEMM_DEFAULT_UNROLL_N 4
3289 #define CGEMM_DEFAULT_UNROLL_M 8
3290 #define CGEMM_DEFAULT_UNROLL_N 4
3292 #define ZGEMM_DEFAULT_UNROLL_M 4
3293 #define ZGEMM_DEFAULT_UNROLL_N 4
3295 #define SGEMM_DEFAULT_P 128
3296 #define DGEMM_DEFAULT_P 160
3297 #define CGEMM_DEFAULT_P 128
3298 #define ZGEMM_DEFAULT_P 128
3300 #define SGEMM_DEFAULT_Q 352
3301 #define DGEMM_DEFAULT_Q 128
3302 #define CGEMM_DEFAULT_Q 224
3303 #define ZGEMM_DEFAULT_Q 112
3305 #define SGEMM_DEFAULT_R 4096
3306 #define DGEMM_DEFAULT_R 4096
3307 #define CGEMM_DEFAULT_R 4096
3308 #define ZGEMM_DEFAULT_R 4096
3310 #elif defined(ARMV8SVE) || defined(A64FX)
3312 /* When all BLAS3 routines are implemeted with SVE, SGEMM_DEFAULT_UNROLL_M should be "sve_vl".
3313 Until then, just keep it different than DGEMM_DEFAULT_UNROLL_N to keep copy routines in both directions seperated. */
3314 #define SGEMM_DEFAULT_UNROLL_M 4
3315 #define SGEMM_DEFAULT_UNROLL_N 8
3316 /* SGEMM_UNROLL_MN is calculated as max(SGEMM_UNROLL_M, SGEMM_UNROLL_N)
3317 * Since we don't define SGEMM_UNROLL_M correctly we have to manually set this macro.
3318 * If SVE size is ever more than 1024, this should be increased also. */
3319 #define SGEMM_DEFAULT_UNROLL_MN 32
3321 /* When all BLAS3 routines are implemeted with SVE, DGEMM_DEFAULT_UNROLL_M should be "sve_vl".
3322 Until then, just keep it different than DGEMM_DEFAULT_UNROLL_N to keep copy routines in both directions seperated. */
3323 #define DGEMM_DEFAULT_UNROLL_M 2
3324 #define DGEMM_DEFAULT_UNROLL_N 8
3326 #define DGEMM_DEFAULT_UNROLL_MN 32
3328 #define CGEMM_DEFAULT_UNROLL_M 2
3329 #define CGEMM_DEFAULT_UNROLL_N 4
3330 #define CGEMM_DEFAULT_UNROLL_MN 16
3332 #define ZGEMM_DEFAULT_UNROLL_M 2
3333 #define ZGEMM_DEFAULT_UNROLL_N 4
3334 #define ZGEMM_DEFAULT_UNROLL_MN 16
3336 #define SGEMM_DEFAULT_P 128
3337 #define DGEMM_DEFAULT_P 160
3338 #define CGEMM_DEFAULT_P 128
3339 #define ZGEMM_DEFAULT_P 128
3341 #define SGEMM_DEFAULT_Q 352
3342 #define DGEMM_DEFAULT_Q 128
3343 #define CGEMM_DEFAULT_Q 224
3344 #define ZGEMM_DEFAULT_Q 112
3346 #define SGEMM_DEFAULT_R 4096
3347 #define DGEMM_DEFAULT_R 4096
3348 #define CGEMM_DEFAULT_R 4096
3349 #define ZGEMM_DEFAULT_R 4096
3351 #else /* Other/undetected ARMv8 cores */
3353 #define SGEMM_DEFAULT_UNROLL_M 16
3354 #define SGEMM_DEFAULT_UNROLL_N 4
3356 #define DGEMM_DEFAULT_UNROLL_M 4
3357 #define DGEMM_DEFAULT_UNROLL_N 8
3359 #define CGEMM_DEFAULT_UNROLL_M 8
3360 #define CGEMM_DEFAULT_UNROLL_N 4
3362 #define ZGEMM_DEFAULT_UNROLL_M 4
3363 #define ZGEMM_DEFAULT_UNROLL_N 4
3365 #define SGEMM_DEFAULT_P 128
3366 #define DGEMM_DEFAULT_P 160
3367 #define CGEMM_DEFAULT_P 128
3368 #define ZGEMM_DEFAULT_P 128
3370 #define SGEMM_DEFAULT_Q 352
3371 #define DGEMM_DEFAULT_Q 128
3372 #define CGEMM_DEFAULT_Q 224
3373 #define ZGEMM_DEFAULT_Q 112
3375 #define SGEMM_DEFAULT_R 4096
3376 #define DGEMM_DEFAULT_R 4096
3377 #define CGEMM_DEFAULT_R 4096
3378 #define ZGEMM_DEFAULT_R 4096
3389 #define GEMM_DEFAULT_OFFSET_A 0
3390 #define GEMM_DEFAULT_OFFSET_B 0
3391 #define GEMM_DEFAULT_ALIGN (BLASLONG)0x03fffUL
3393 #define SGEMM_DEFAULT_UNROLL_M 2
3394 #define SGEMM_DEFAULT_UNROLL_N 2
3396 #define DGEMM_DEFAULT_UNROLL_M 2
3397 #define DGEMM_DEFAULT_UNROLL_N 2
3399 #define CGEMM_DEFAULT_UNROLL_M 2
3400 #define CGEMM_DEFAULT_UNROLL_N 2
3402 #define ZGEMM_DEFAULT_UNROLL_M 2
3403 #define ZGEMM_DEFAULT_UNROLL_N 2
3405 #define SGEMM_DEFAULT_P 128
3406 #define DGEMM_DEFAULT_P 128
3407 #define CGEMM_DEFAULT_P 96
3408 #define ZGEMM_DEFAULT_P 64
3410 #define SGEMM_DEFAULT_Q 240
3411 #define DGEMM_DEFAULT_Q 120
3412 #define CGEMM_DEFAULT_Q 120
3413 #define ZGEMM_DEFAULT_Q 120
3415 #define SGEMM_DEFAULT_R 12288
3416 #define DGEMM_DEFAULT_R 8192
3417 #define CGEMM_DEFAULT_R 4096
3418 #define ZGEMM_DEFAULT_R 4096
3430 #define GEMM_DEFAULT_OFFSET_A 0
3431 #define GEMM_DEFAULT_OFFSET_B 0
3432 #define GEMM_DEFAULT_ALIGN (BLASLONG)0x03fffUL
3434 #define SGEMM_DEFAULT_UNROLL_M 4
3435 #define SGEMM_DEFAULT_UNROLL_N 4
3437 #define DGEMM_DEFAULT_UNROLL_M 4
3438 #define DGEMM_DEFAULT_UNROLL_N 4
3440 #define CGEMM_DEFAULT_UNROLL_M 2
3441 #define CGEMM_DEFAULT_UNROLL_N 2
3443 #define ZGEMM_DEFAULT_UNROLL_M 2
3444 #define ZGEMM_DEFAULT_UNROLL_N 2
3446 #define SGEMM_DEFAULT_P 128
3447 #define DGEMM_DEFAULT_P 128
3448 #define CGEMM_DEFAULT_P 96
3449 #define ZGEMM_DEFAULT_P 64
3451 #define SGEMM_DEFAULT_Q 240
3452 #define DGEMM_DEFAULT_Q 120
3453 #define CGEMM_DEFAULT_Q 120
3454 #define ZGEMM_DEFAULT_Q 120
3456 #define SGEMM_DEFAULT_R 12288
3457 #define DGEMM_DEFAULT_R 8192
3458 #define CGEMM_DEFAULT_R 4096
3459 #define ZGEMM_DEFAULT_R 4096
3471 #define GEMM_DEFAULT_OFFSET_A 0
3472 #define GEMM_DEFAULT_OFFSET_B 0
3473 #define GEMM_DEFAULT_ALIGN (BLASLONG)0x03fffUL
3475 #define SGEMM_DEFAULT_UNROLL_M 4
3476 #define SGEMM_DEFAULT_UNROLL_N 4
3478 #define DGEMM_DEFAULT_UNROLL_M 4
3479 #define DGEMM_DEFAULT_UNROLL_N 4
3481 #define CGEMM_DEFAULT_UNROLL_M 2
3482 #define CGEMM_DEFAULT_UNROLL_N 2
3484 #define ZGEMM_DEFAULT_UNROLL_M 2
3485 #define ZGEMM_DEFAULT_UNROLL_N 2
3487 #define SGEMM_DEFAULT_P 128
3488 #define DGEMM_DEFAULT_P 128
3489 #define CGEMM_DEFAULT_P 96
3490 #define ZGEMM_DEFAULT_P 64
3492 #define SGEMM_DEFAULT_Q 240
3493 #define DGEMM_DEFAULT_Q 120
3494 #define CGEMM_DEFAULT_Q 120
3495 #define ZGEMM_DEFAULT_Q 120
3497 #define SGEMM_DEFAULT_R 12288
3498 #define DGEMM_DEFAULT_R 8192
3499 #define CGEMM_DEFAULT_R 4096
3500 #define ZGEMM_DEFAULT_R 4096
3508 #if defined(ZARCH_GENERIC)
3512 #define GEMM_DEFAULT_OFFSET_A 0
3513 #define GEMM_DEFAULT_OFFSET_B 0
3514 #define GEMM_DEFAULT_ALIGN (BLASLONG)0x03fffUL
3516 #define SGEMM_DEFAULT_UNROLL_M 2
3517 #define SGEMM_DEFAULT_UNROLL_N 2
3519 #define DGEMM_DEFAULT_UNROLL_M 2
3520 #define DGEMM_DEFAULT_UNROLL_N 2
3522 #define CGEMM_DEFAULT_UNROLL_M 2
3523 #define CGEMM_DEFAULT_UNROLL_N 2
3525 #define ZGEMM_DEFAULT_UNROLL_M 2
3526 #define ZGEMM_DEFAULT_UNROLL_N 2
3528 #define SGEMM_DEFAULT_P 128
3529 #define DGEMM_DEFAULT_P 128
3530 #define CGEMM_DEFAULT_P 96
3531 #define ZGEMM_DEFAULT_P 64
3533 #define SGEMM_DEFAULT_Q 240
3534 #define DGEMM_DEFAULT_Q 120
3535 #define CGEMM_DEFAULT_Q 120
3536 #define ZGEMM_DEFAULT_Q 120
3538 #define SGEMM_DEFAULT_R 12288
3539 #define DGEMM_DEFAULT_R 8192
3540 #define CGEMM_DEFAULT_R 4096
3541 #define ZGEMM_DEFAULT_R 4096
3551 #define GEMM_DEFAULT_OFFSET_A 0
3552 #define GEMM_DEFAULT_OFFSET_B 0
3553 #define GEMM_DEFAULT_ALIGN (BLASLONG)0x03fffUL
3555 #define SGEMM_DEFAULT_UNROLL_M 8
3556 #define SGEMM_DEFAULT_UNROLL_N 4
3558 #define DGEMM_DEFAULT_UNROLL_M 8
3559 #define DGEMM_DEFAULT_UNROLL_N 4
3561 #define CGEMM_DEFAULT_UNROLL_M 4
3562 #define CGEMM_DEFAULT_UNROLL_N 4
3564 #define ZGEMM_DEFAULT_UNROLL_M 4
3565 #define ZGEMM_DEFAULT_UNROLL_N 4
3567 #define SGEMM_DEFAULT_P 456
3568 #define DGEMM_DEFAULT_P 320
3569 #define CGEMM_DEFAULT_P 480
3570 #define ZGEMM_DEFAULT_P 224
3572 #define SGEMM_DEFAULT_Q 488
3573 #define DGEMM_DEFAULT_Q 384
3574 #define CGEMM_DEFAULT_Q 128
3575 #define ZGEMM_DEFAULT_Q 352
3577 #define SGEMM_DEFAULT_R 8192
3578 #define DGEMM_DEFAULT_R 4096
3579 #define CGEMM_DEFAULT_R 4096
3580 #define ZGEMM_DEFAULT_R 2048
3591 #define GEMM_DEFAULT_OFFSET_A 0
3592 #define GEMM_DEFAULT_OFFSET_B 0
3593 #define GEMM_DEFAULT_ALIGN 0x03fffUL
3595 #define SGEMM_DEFAULT_UNROLL_M 16
3596 #define SGEMM_DEFAULT_UNROLL_N 4
3598 #define DGEMM_DEFAULT_UNROLL_M 8
3599 #define DGEMM_DEFAULT_UNROLL_N 4
3601 #define CGEMM_DEFAULT_UNROLL_M 4
3602 #define CGEMM_DEFAULT_UNROLL_N 4
3604 #define ZGEMM_DEFAULT_UNROLL_M 4
3605 #define ZGEMM_DEFAULT_UNROLL_N 4
3607 #define SGEMM_DEFAULT_P 480
3608 #define DGEMM_DEFAULT_P 320
3609 #define CGEMM_DEFAULT_P 480
3610 #define ZGEMM_DEFAULT_P 224
3612 #define SGEMM_DEFAULT_Q 512
3613 #define DGEMM_DEFAULT_Q 384
3614 #define CGEMM_DEFAULT_Q 128
3615 #define ZGEMM_DEFAULT_Q 352
3617 #define SGEMM_DEFAULT_R 8192
3618 #define DGEMM_DEFAULT_R 4096
3619 #define CGEMM_DEFAULT_R 4096
3620 #define ZGEMM_DEFAULT_R 2048
3633 #define GEMM_DEFAULT_OFFSET_A 0
3634 #define GEMM_DEFAULT_OFFSET_B 0
3635 #define GEMM_DEFAULT_ALIGN (BLASLONG)0x0ffffUL
3637 #define SGEMM_DEFAULT_UNROLL_N 2
3638 #define DGEMM_DEFAULT_UNROLL_N 2
3639 #define QGEMM_DEFAULT_UNROLL_N 2
3640 #define CGEMM_DEFAULT_UNROLL_N 2
3641 #define ZGEMM_DEFAULT_UNROLL_N 2
3642 #define XGEMM_DEFAULT_UNROLL_N 1
3645 #define SGEMM_DEFAULT_UNROLL_M 2
3646 #define DGEMM_DEFAULT_UNROLL_M 2
3647 #define QGEMM_DEFAULT_UNROLL_M 2
3648 #define CGEMM_DEFAULT_UNROLL_M 2
3649 #define ZGEMM_DEFAULT_UNROLL_M 2
3650 #define XGEMM_DEFAULT_UNROLL_M 1
3652 #define SGEMM_DEFAULT_UNROLL_M 2
3653 #define DGEMM_DEFAULT_UNROLL_M 2
3654 #define QGEMM_DEFAULT_UNROLL_M 2
3655 #define CGEMM_DEFAULT_UNROLL_M 2
3656 #define ZGEMM_DEFAULT_UNROLL_M 2
3657 #define XGEMM_DEFAULT_UNROLL_M 1
3661 #define SGEMM_DEFAULT_P 128
3662 #define DGEMM_DEFAULT_P 128
3663 #define CGEMM_DEFAULT_P 96
3664 #define ZGEMM_DEFAULT_P 64
3665 #define SGEMM_DEFAULT_Q 240
3666 #define DGEMM_DEFAULT_Q 120
3667 #define CGEMM_DEFAULT_Q 120
3668 #define ZGEMM_DEFAULT_Q 120
3669 #define SGEMM_DEFAULT_R 12288
3670 #define DGEMM_DEFAULT_R 8192
3671 #define CGEMM_DEFAULT_R 4096
3672 #define ZGEMM_DEFAULT_R 4096
3674 #define SGEMM_DEFAULT_P sgemm_p
3675 #define DGEMM_DEFAULT_P dgemm_p
3676 #define QGEMM_DEFAULT_P qgemm_p
3677 #define CGEMM_DEFAULT_P cgemm_p
3678 #define ZGEMM_DEFAULT_P zgemm_p
3679 #define XGEMM_DEFAULT_P xgemm_p
3681 #define SGEMM_DEFAULT_R sgemm_r
3682 #define DGEMM_DEFAULT_R dgemm_r
3683 #define QGEMM_DEFAULT_R qgemm_r
3684 #define CGEMM_DEFAULT_R cgemm_r
3685 #define ZGEMM_DEFAULT_R zgemm_r
3686 #define XGEMM_DEFAULT_R xgemm_r
3688 #define SGEMM_DEFAULT_Q 128
3689 #define DGEMM_DEFAULT_Q 128
3690 #define QGEMM_DEFAULT_Q 128
3691 #define CGEMM_DEFAULT_Q 128
3692 #define ZGEMM_DEFAULT_Q 128
3693 #define XGEMM_DEFAULT_Q 128
3700 #ifndef QGEMM_DEFAULT_UNROLL_M
3701 #define QGEMM_DEFAULT_UNROLL_M 2
3704 #ifndef QGEMM_DEFAULT_UNROLL_N
3705 #define QGEMM_DEFAULT_UNROLL_N 2
3708 #ifndef XGEMM_DEFAULT_UNROLL_M
3709 #define XGEMM_DEFAULT_UNROLL_M 2
3712 #ifndef XGEMM_DEFAULT_UNROLL_N
3713 #define XGEMM_DEFAULT_UNROLL_N 2
3717 #define SHUFPD_0 shufps $0x44,
3718 #define SHUFPD_1 shufps $0x4e,
3719 #define SHUFPD_2 shufps $0xe4,
3720 #define SHUFPD_3 shufps $0xee,
3724 #define SHUFPD_0 shufpd $0,
3728 #define SHUFPD_1 shufpd $1,
3732 #define SHUFPD_2 shufpd $2,
3736 #define SHUFPD_3 shufpd $3,
3740 #define SHUFPS_39 shufps $0x39,