1 /*****************************************************************************
2 Copyright (c) 2011-2014, The OpenBLAS Project
5 Redistribution and use in source and binary forms, with or without
6 modification, are permitted provided that the following conditions are
9 1. Redistributions of source code must retain the above copyright
10 notice, this list of conditions and the following disclaimer.
12 2. Redistributions in binary form must reproduce the above copyright
13 notice, this list of conditions and the following disclaimer in
14 the documentation and/or other materials provided with the
16 3. Neither the name of the OpenBLAS project nor the names of
17 its contributors may be used to endorse or promote products
18 derived from this software without specific prior written
21 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
25 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
27 SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28 CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
29 OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
30 USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 **********************************************************************************/
34 /*********************************************************************/
35 /* Copyright 2009, 2010 The University of Texas at Austin. */
36 /* All rights reserved. */
38 /* Redistribution and use in source and binary forms, with or */
39 /* without modification, are permitted provided that the following */
40 /* conditions are met: */
42 /* 1. Redistributions of source code must retain the above */
43 /* copyright notice, this list of conditions and the following */
46 /* 2. Redistributions in binary form must reproduce the above */
47 /* copyright notice, this list of conditions and the following */
48 /* disclaimer in the documentation and/or other materials */
49 /* provided with the distribution. */
51 /* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
52 /* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
53 /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
54 /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
55 /* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
56 /* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
57 /* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
58 /* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
59 /* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
60 /* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
61 /* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
62 /* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
63 /* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
64 /* POSSIBILITY OF SUCH DAMAGE. */
66 /* The views and conclusions contained in the software and */
67 /* documentation are those of the authors and should not be */
68 /* interpreted as representing official policies, either expressed */
69 /* or implied, of The University of Texas at Austin. */
70 /*********************************************************************/
76 #define SBGEMM_DEFAULT_UNROLL_N 4
77 #define SBGEMM_DEFAULT_UNROLL_M 8
78 #define SBGEMM_DEFAULT_UNROLL_MN 32
79 #define SBGEMM_DEFAULT_P 256
80 #define SBGEMM_DEFAULT_R 256
81 #define SBGEMM_DEFAULT_Q 256
87 #define GEMM_DEFAULT_OFFSET_A 64
88 #define GEMM_DEFAULT_OFFSET_B 256
89 #define GEMM_DEFAULT_ALIGN (BLASLONG)0x01ffffUL
91 #define SGEMM_DEFAULT_UNROLL_N 4
92 #define DGEMM_DEFAULT_UNROLL_N 4
93 #define QGEMM_DEFAULT_UNROLL_N 2
94 #define CGEMM_DEFAULT_UNROLL_N 2
95 #define ZGEMM_DEFAULT_UNROLL_N 2
96 #define XGEMM_DEFAULT_UNROLL_N 1
99 #define SGEMM_DEFAULT_UNROLL_M 4
100 #define DGEMM_DEFAULT_UNROLL_M 2
101 #define QGEMM_DEFAULT_UNROLL_M 2
102 #define CGEMM_DEFAULT_UNROLL_M 2
103 #define ZGEMM_DEFAULT_UNROLL_M 1
104 #define XGEMM_DEFAULT_UNROLL_M 1
106 #define SGEMM_DEFAULT_UNROLL_M 8
107 #define DGEMM_DEFAULT_UNROLL_M 4
108 #define QGEMM_DEFAULT_UNROLL_M 2
109 #define CGEMM_DEFAULT_UNROLL_M 4
110 #define ZGEMM_DEFAULT_UNROLL_M 2
111 #define XGEMM_DEFAULT_UNROLL_M 1
114 #define SGEMM_DEFAULT_P sgemm_p
115 #define DGEMM_DEFAULT_P dgemm_p
116 #define QGEMM_DEFAULT_P qgemm_p
117 #define CGEMM_DEFAULT_P cgemm_p
118 #define ZGEMM_DEFAULT_P zgemm_p
119 #define XGEMM_DEFAULT_P xgemm_p
121 #define SGEMM_DEFAULT_R sgemm_r
122 #define DGEMM_DEFAULT_R dgemm_r
123 #define QGEMM_DEFAULT_R qgemm_r
124 #define CGEMM_DEFAULT_R cgemm_r
125 #define ZGEMM_DEFAULT_R zgemm_r
126 #define XGEMM_DEFAULT_R xgemm_r
130 #define SGEMM_DEFAULT_Q 248
131 #define DGEMM_DEFAULT_Q 248
132 #define QGEMM_DEFAULT_Q 248
133 #define CGEMM_DEFAULT_Q 248
134 #define ZGEMM_DEFAULT_Q 248
135 #define XGEMM_DEFAULT_Q 248
139 #define SGEMM_DEFAULT_Q 240
140 #define DGEMM_DEFAULT_Q 240
141 #define QGEMM_DEFAULT_Q 240
142 #define CGEMM_DEFAULT_Q 240
143 #define ZGEMM_DEFAULT_Q 240
144 #define XGEMM_DEFAULT_Q 240
150 #define HAVE_EXCLUSIVE_CACHE
154 #if defined(BARCELONA) || defined(SHANGHAI) || defined(BOBCAT)
159 #define GEMM_DEFAULT_OFFSET_A 64
160 #define GEMM_DEFAULT_OFFSET_B 832
161 #define GEMM_DEFAULT_ALIGN (BLASLONG)0x0fffUL
163 #define SGEMM_DEFAULT_UNROLL_N 4
164 #define DGEMM_DEFAULT_UNROLL_N 4
165 #define QGEMM_DEFAULT_UNROLL_N 2
166 #define CGEMM_DEFAULT_UNROLL_N 2
167 #define ZGEMM_DEFAULT_UNROLL_N 2
168 #define XGEMM_DEFAULT_UNROLL_N 1
171 #define SGEMM_DEFAULT_UNROLL_M 4
172 #define DGEMM_DEFAULT_UNROLL_M 2
173 #define QGEMM_DEFAULT_UNROLL_M 2
174 #define CGEMM_DEFAULT_UNROLL_M 2
175 #define ZGEMM_DEFAULT_UNROLL_M 1
176 #define XGEMM_DEFAULT_UNROLL_M 1
178 #define SGEMM_DEFAULT_UNROLL_M 8
179 #define DGEMM_DEFAULT_UNROLL_M 4
180 #define QGEMM_DEFAULT_UNROLL_M 2
181 #define CGEMM_DEFAULT_UNROLL_M 4
182 #define ZGEMM_DEFAULT_UNROLL_M 2
183 #define XGEMM_DEFAULT_UNROLL_M 1
187 #define SGEMM_DEFAULT_P 496
188 #define DGEMM_DEFAULT_P 248
189 #define QGEMM_DEFAULT_P 124
190 #define CGEMM_DEFAULT_P 248
191 #define ZGEMM_DEFAULT_P 124
192 #define XGEMM_DEFAULT_P 62
194 #define SGEMM_DEFAULT_Q 248
195 #define DGEMM_DEFAULT_Q 248
196 #define QGEMM_DEFAULT_Q 248
197 #define CGEMM_DEFAULT_Q 248
198 #define ZGEMM_DEFAULT_Q 248
199 #define XGEMM_DEFAULT_Q 248
203 #define SGEMM_DEFAULT_P 448
204 #define DGEMM_DEFAULT_P 224
205 #define QGEMM_DEFAULT_P 112
206 #define CGEMM_DEFAULT_P 224
207 #define ZGEMM_DEFAULT_P 112
208 #define XGEMM_DEFAULT_P 56
210 #define SGEMM_DEFAULT_Q 224
211 #define DGEMM_DEFAULT_Q 224
212 #define QGEMM_DEFAULT_Q 224
213 #define CGEMM_DEFAULT_Q 224
214 #define ZGEMM_DEFAULT_Q 224
215 #define XGEMM_DEFAULT_Q 224
219 #define SGEMM_DEFAULT_R sgemm_r
220 #define QGEMM_DEFAULT_R qgemm_r
221 #define DGEMM_DEFAULT_R dgemm_r
222 #define CGEMM_DEFAULT_R cgemm_r
223 #define ZGEMM_DEFAULT_R zgemm_r
224 #define XGEMM_DEFAULT_R xgemm_r
227 #define HAVE_EXCLUSIVE_CACHE
229 #define GEMM_THREAD gemm_thread_mn
239 #define GEMM_DEFAULT_OFFSET_A 64
240 #define GEMM_DEFAULT_OFFSET_B 832
241 #define GEMM_DEFAULT_ALIGN (BLASLONG)0x0fffUL
245 #define QGEMM_DEFAULT_UNROLL_N 2
246 #define CGEMM_DEFAULT_UNROLL_N 2
247 #define ZGEMM_DEFAULT_UNROLL_N 2
248 #define XGEMM_DEFAULT_UNROLL_N 1
251 #define SGEMM_DEFAULT_UNROLL_N 4
252 #define DGEMM_DEFAULT_UNROLL_N 4
253 #define SGEMM_DEFAULT_UNROLL_M 4
254 #define DGEMM_DEFAULT_UNROLL_M 2
255 #define QGEMM_DEFAULT_UNROLL_M 2
256 #define CGEMM_DEFAULT_UNROLL_M 2
257 #define ZGEMM_DEFAULT_UNROLL_M 1
258 #define XGEMM_DEFAULT_UNROLL_M 1
260 #define SGEMM_DEFAULT_UNROLL_N 2
261 #define DGEMM_DEFAULT_UNROLL_N 2
262 #define SGEMM_DEFAULT_UNROLL_M 16
263 #define DGEMM_DEFAULT_UNROLL_M 8
264 #define QGEMM_DEFAULT_UNROLL_M 2
265 #define CGEMM_DEFAULT_UNROLL_M 4
266 #define ZGEMM_DEFAULT_UNROLL_M 2
267 #define XGEMM_DEFAULT_UNROLL_M 1
268 #define CGEMM3M_DEFAULT_UNROLL_N 4
269 #define CGEMM3M_DEFAULT_UNROLL_M 8
270 #define ZGEMM3M_DEFAULT_UNROLL_N 4
271 #define ZGEMM3M_DEFAULT_UNROLL_M 4
273 #define DGEMM_DEFAULT_UNROLL_MN 16
274 #define GEMV_UNROLL 8
278 #if defined(ARCH_X86_64)
279 #define SGEMM_DEFAULT_P 768
280 #define DGEMM_DEFAULT_P 384
282 #define SGEMM_DEFAULT_P 448
283 #define DGEMM_DEFAULT_P 224
286 #define QGEMM_DEFAULT_P 112
287 #define CGEMM_DEFAULT_P 224
288 #define ZGEMM_DEFAULT_P 112
289 #define XGEMM_DEFAULT_P 56
291 #if defined(ARCH_X86_64)
292 #define SGEMM_DEFAULT_Q 168
293 #define DGEMM_DEFAULT_Q 168
295 #define SGEMM_DEFAULT_Q 224
296 #define DGEMM_DEFAULT_Q 224
299 #define QGEMM_DEFAULT_Q 224
300 #define CGEMM_DEFAULT_Q 224
301 #define ZGEMM_DEFAULT_Q 224
302 #define XGEMM_DEFAULT_Q 224
304 #define CGEMM3M_DEFAULT_P 448
305 #define ZGEMM3M_DEFAULT_P 224
306 #define XGEMM3M_DEFAULT_P 112
307 #define CGEMM3M_DEFAULT_Q 224
308 #define ZGEMM3M_DEFAULT_Q 224
309 #define XGEMM3M_DEFAULT_Q 224
310 #define CGEMM3M_DEFAULT_R 12288
311 #define ZGEMM3M_DEFAULT_R 12288
312 #define XGEMM3M_DEFAULT_R 12288
314 #define SGEMM_DEFAULT_R sgemm_r
315 #define QGEMM_DEFAULT_R qgemm_r
316 #define DGEMM_DEFAULT_R dgemm_r
317 #define CGEMM_DEFAULT_R cgemm_r
318 #define ZGEMM_DEFAULT_R zgemm_r
319 #define XGEMM_DEFAULT_R xgemm_r
322 #define HAVE_EXCLUSIVE_CACHE
324 #define GEMM_THREAD gemm_thread_mn
332 #define GEMM_DEFAULT_OFFSET_A 64
333 #define GEMM_DEFAULT_OFFSET_B 832
334 #define GEMM_DEFAULT_ALIGN (BLASLONG)0x0fffUL
338 #define QGEMM_DEFAULT_UNROLL_N 2
339 #define CGEMM_DEFAULT_UNROLL_N 2
340 #define ZGEMM_DEFAULT_UNROLL_N 2
341 #define XGEMM_DEFAULT_UNROLL_N 1
344 #define SGEMM_DEFAULT_UNROLL_N 4
345 #define DGEMM_DEFAULT_UNROLL_N 4
346 #define SGEMM_DEFAULT_UNROLL_M 4
347 #define DGEMM_DEFAULT_UNROLL_M 2
348 #define QGEMM_DEFAULT_UNROLL_M 2
349 #define CGEMM_DEFAULT_UNROLL_M 2
350 #define ZGEMM_DEFAULT_UNROLL_M 1
351 #define XGEMM_DEFAULT_UNROLL_M 1
353 #define SGEMM_DEFAULT_UNROLL_N 2
354 #define DGEMM_DEFAULT_UNROLL_N 2
355 #define SGEMM_DEFAULT_UNROLL_M 16
356 #define DGEMM_DEFAULT_UNROLL_M 8
357 #define QGEMM_DEFAULT_UNROLL_M 2
358 #define CGEMM_DEFAULT_UNROLL_M 4
359 #define ZGEMM_DEFAULT_UNROLL_M 2
360 #define XGEMM_DEFAULT_UNROLL_M 1
361 #define CGEMM3M_DEFAULT_UNROLL_N 4
362 #define CGEMM3M_DEFAULT_UNROLL_M 8
363 #define ZGEMM3M_DEFAULT_UNROLL_N 4
364 #define ZGEMM3M_DEFAULT_UNROLL_M 4
365 #define GEMV_UNROLL 8
368 #if defined(ARCH_X86_64)
369 #define SGEMM_DEFAULT_P 768
370 #define DGEMM_DEFAULT_P 768
371 #define ZGEMM_DEFAULT_P 384
372 #define CGEMM_DEFAULT_P 768
374 #define SGEMM_DEFAULT_P 448
375 #define DGEMM_DEFAULT_P 480
376 #define ZGEMM_DEFAULT_P 112
377 #define CGEMM_DEFAULT_P 224
379 #define QGEMM_DEFAULT_P 112
380 #define XGEMM_DEFAULT_P 56
382 #if defined(ARCH_X86_64)
383 #define SGEMM_DEFAULT_Q 192
384 #define DGEMM_DEFAULT_Q 168
385 #define ZGEMM_DEFAULT_Q 168
386 #define CGEMM_DEFAULT_Q 168
388 #define SGEMM_DEFAULT_Q 224
389 #define DGEMM_DEFAULT_Q 224
390 #define ZGEMM_DEFAULT_Q 224
391 #define CGEMM_DEFAULT_Q 224
393 #define QGEMM_DEFAULT_Q 224
394 #define XGEMM_DEFAULT_Q 224
396 #define CGEMM3M_DEFAULT_P 448
397 #define ZGEMM3M_DEFAULT_P 224
398 #define XGEMM3M_DEFAULT_P 112
399 #define CGEMM3M_DEFAULT_Q 224
400 #define ZGEMM3M_DEFAULT_Q 224
401 #define XGEMM3M_DEFAULT_Q 224
402 #define CGEMM3M_DEFAULT_R 12288
403 #define ZGEMM3M_DEFAULT_R 12288
404 #define XGEMM3M_DEFAULT_R 12288
406 #define SGEMM_DEFAULT_R 12288
407 #define QGEMM_DEFAULT_R qgemm_r
408 #define DGEMM_DEFAULT_R 12288
409 #define CGEMM_DEFAULT_R cgemm_r
410 #define ZGEMM_DEFAULT_R zgemm_r
411 #define XGEMM_DEFAULT_R xgemm_r
414 #define HAVE_EXCLUSIVE_CACHE
416 #define GEMM_THREAD gemm_thread_mn
424 #define GEMM_DEFAULT_OFFSET_A 64
425 #define GEMM_DEFAULT_OFFSET_B 832
426 #define GEMM_DEFAULT_ALIGN (BLASLONG)0x0fffUL
430 #define QGEMM_DEFAULT_UNROLL_N 2
431 #define CGEMM_DEFAULT_UNROLL_N 2
432 #define ZGEMM_DEFAULT_UNROLL_N 2
433 #define XGEMM_DEFAULT_UNROLL_N 1
436 #define SGEMM_DEFAULT_UNROLL_N 4
437 #define DGEMM_DEFAULT_UNROLL_N 4
438 #define SGEMM_DEFAULT_UNROLL_M 4
439 #define DGEMM_DEFAULT_UNROLL_M 2
440 #define QGEMM_DEFAULT_UNROLL_M 2
441 #define CGEMM_DEFAULT_UNROLL_M 2
442 #define ZGEMM_DEFAULT_UNROLL_M 1
443 #define XGEMM_DEFAULT_UNROLL_M 1
445 #define SGEMM_DEFAULT_UNROLL_N 2
446 #define DGEMM_DEFAULT_UNROLL_N 2
447 #define SGEMM_DEFAULT_UNROLL_M 16
448 #define DGEMM_DEFAULT_UNROLL_M 8
449 #define QGEMM_DEFAULT_UNROLL_M 2
450 #define CGEMM_DEFAULT_UNROLL_M 4
451 #define ZGEMM_DEFAULT_UNROLL_M 2
452 #define XGEMM_DEFAULT_UNROLL_M 1
453 #define CGEMM3M_DEFAULT_UNROLL_N 4
454 #define CGEMM3M_DEFAULT_UNROLL_M 8
455 #define ZGEMM3M_DEFAULT_UNROLL_N 4
456 #define ZGEMM3M_DEFAULT_UNROLL_M 4
457 #define GEMV_UNROLL 8
460 #if defined(ARCH_X86_64)
461 #define SGEMM_DEFAULT_P 768
462 #define DGEMM_DEFAULT_P 576
463 #define ZGEMM_DEFAULT_P 288
464 #define CGEMM_DEFAULT_P 576
466 #define SGEMM_DEFAULT_P 448
467 #define DGEMM_DEFAULT_P 480
468 #define ZGEMM_DEFAULT_P 112
469 #define CGEMM_DEFAULT_P 224
471 #define QGEMM_DEFAULT_P 112
472 #define XGEMM_DEFAULT_P 56
474 #if defined(ARCH_X86_64)
475 #define SGEMM_DEFAULT_Q 192
476 #define DGEMM_DEFAULT_Q 160
477 #define ZGEMM_DEFAULT_Q 160
478 #define CGEMM_DEFAULT_Q 160
480 #define SGEMM_DEFAULT_Q 224
481 #define DGEMM_DEFAULT_Q 224
482 #define ZGEMM_DEFAULT_Q 224
483 #define CGEMM_DEFAULT_Q 224
485 #define QGEMM_DEFAULT_Q 224
486 #define XGEMM_DEFAULT_Q 224
488 #define CGEMM3M_DEFAULT_P 448
489 #define ZGEMM3M_DEFAULT_P 224
490 #define XGEMM3M_DEFAULT_P 112
491 #define CGEMM3M_DEFAULT_Q 224
492 #define ZGEMM3M_DEFAULT_Q 224
493 #define XGEMM3M_DEFAULT_Q 224
494 #define CGEMM3M_DEFAULT_R 12288
495 #define ZGEMM3M_DEFAULT_R 12288
496 #define XGEMM3M_DEFAULT_R 12288
498 #define SGEMM_DEFAULT_R 12288
499 #define QGEMM_DEFAULT_R qgemm_r
500 #define DGEMM_DEFAULT_R 12288
501 #define CGEMM_DEFAULT_R cgemm_r
502 #define ZGEMM_DEFAULT_R zgemm_r
503 #define XGEMM_DEFAULT_R xgemm_r
506 #define HAVE_EXCLUSIVE_CACHE
508 #define GEMM_THREAD gemm_thread_mn
517 #define GEMM_DEFAULT_OFFSET_A 64
518 #define GEMM_DEFAULT_OFFSET_B 832
519 #define GEMM_DEFAULT_ALIGN (BLASLONG)0x0fffUL
523 #define QGEMM_DEFAULT_UNROLL_N 2
524 #define CGEMM_DEFAULT_UNROLL_N 2
525 #define ZGEMM_DEFAULT_UNROLL_N 2
526 #define XGEMM_DEFAULT_UNROLL_N 1
529 #define SGEMM_DEFAULT_UNROLL_N 4
530 #define DGEMM_DEFAULT_UNROLL_N 4
531 #define SGEMM_DEFAULT_UNROLL_M 4
532 #define DGEMM_DEFAULT_UNROLL_M 2
533 #define QGEMM_DEFAULT_UNROLL_M 2
534 #define CGEMM_DEFAULT_UNROLL_M 2
535 #define ZGEMM_DEFAULT_UNROLL_M 1
536 #define XGEMM_DEFAULT_UNROLL_M 1
538 #define SGEMM_DEFAULT_UNROLL_N 2
539 #define DGEMM_DEFAULT_UNROLL_N 2
540 #define SGEMM_DEFAULT_UNROLL_M 16
541 #define DGEMM_DEFAULT_UNROLL_M 8
542 #define QGEMM_DEFAULT_UNROLL_M 2
543 #define CGEMM_DEFAULT_UNROLL_M 4
544 #define ZGEMM_DEFAULT_UNROLL_M 2
545 #define XGEMM_DEFAULT_UNROLL_M 1
546 #define CGEMM3M_DEFAULT_UNROLL_N 4
547 #define CGEMM3M_DEFAULT_UNROLL_M 8
548 #define ZGEMM3M_DEFAULT_UNROLL_N 4
549 #define ZGEMM3M_DEFAULT_UNROLL_M 4
550 #define GEMV_UNROLL 8
553 #if defined(ARCH_X86_64)
554 #define SGEMM_DEFAULT_P 768
555 #define DGEMM_DEFAULT_P 576
556 #define ZGEMM_DEFAULT_P 288
557 #define CGEMM_DEFAULT_P 576
559 #define SGEMM_DEFAULT_P 448
560 #define DGEMM_DEFAULT_P 480
561 #define ZGEMM_DEFAULT_P 112
562 #define CGEMM_DEFAULT_P 224
564 #define QGEMM_DEFAULT_P 112
565 #define XGEMM_DEFAULT_P 56
567 #if defined(ARCH_X86_64)
568 #define SGEMM_DEFAULT_Q 192
569 #define DGEMM_DEFAULT_Q 160
570 #define ZGEMM_DEFAULT_Q 160
571 #define CGEMM_DEFAULT_Q 160
573 #define SGEMM_DEFAULT_Q 224
574 #define DGEMM_DEFAULT_Q 224
575 #define ZGEMM_DEFAULT_Q 224
576 #define CGEMM_DEFAULT_Q 224
578 #define QGEMM_DEFAULT_Q 224
579 #define XGEMM_DEFAULT_Q 224
581 #define CGEMM3M_DEFAULT_P 448
582 #define ZGEMM3M_DEFAULT_P 224
583 #define XGEMM3M_DEFAULT_P 112
584 #define CGEMM3M_DEFAULT_Q 224
585 #define ZGEMM3M_DEFAULT_Q 224
586 #define XGEMM3M_DEFAULT_Q 224
587 #define CGEMM3M_DEFAULT_R 12288
588 #define ZGEMM3M_DEFAULT_R 12288
589 #define XGEMM3M_DEFAULT_R 12288
591 #define SGEMM_DEFAULT_R 12288
592 #define QGEMM_DEFAULT_R qgemm_r
593 #define DGEMM_DEFAULT_R 12288
594 #define CGEMM_DEFAULT_R cgemm_r
595 #define ZGEMM_DEFAULT_R zgemm_r
596 #define XGEMM_DEFAULT_R xgemm_r
599 #define HAVE_EXCLUSIVE_CACHE
601 #define GEMM_THREAD gemm_thread_mn
609 #define GEMM_DEFAULT_OFFSET_A 0
610 #define GEMM_DEFAULT_OFFSET_B 0
611 #define GEMM_DEFAULT_ALIGN (BLASLONG)0x03fffUL
615 #define SWITCH_RATIO 16
619 #define SGEMM_DEFAULT_UNROLL_M 4
620 #define DGEMM_DEFAULT_UNROLL_M 2
621 #define QGEMM_DEFAULT_UNROLL_M 2
622 #define CGEMM_DEFAULT_UNROLL_M 2
623 #define ZGEMM_DEFAULT_UNROLL_M 1
624 #define XGEMM_DEFAULT_UNROLL_M 1
626 #define SGEMM_DEFAULT_UNROLL_N 4
627 #define DGEMM_DEFAULT_UNROLL_N 4
628 #define QGEMM_DEFAULT_UNROLL_N 2
629 #define CGEMM_DEFAULT_UNROLL_N 2
630 #define ZGEMM_DEFAULT_UNROLL_N 2
631 #define XGEMM_DEFAULT_UNROLL_N 1
635 #define SGEMM_DEFAULT_UNROLL_M 8
636 #define DGEMM_DEFAULT_UNROLL_M 4
637 #define QGEMM_DEFAULT_UNROLL_M 2
638 #define CGEMM_DEFAULT_UNROLL_M 8
639 #define ZGEMM_DEFAULT_UNROLL_M 4
640 #define XGEMM_DEFAULT_UNROLL_M 1
642 #define SGEMM_DEFAULT_UNROLL_N 4
643 #define DGEMM_DEFAULT_UNROLL_N 8
644 #define QGEMM_DEFAULT_UNROLL_N 2
645 #define CGEMM_DEFAULT_UNROLL_N 2
646 #define ZGEMM_DEFAULT_UNROLL_N 2
647 #define XGEMM_DEFAULT_UNROLL_N 1
649 #define SGEMM_DEFAULT_UNROLL_MN 32
650 #define DGEMM_DEFAULT_UNROLL_MN 32
656 #define SGEMM_DEFAULT_P 512
657 #define SGEMM_DEFAULT_R sgemm_r
658 #define DGEMM_DEFAULT_P 512
659 #define DGEMM_DEFAULT_R dgemm_r
660 #define QGEMM_DEFAULT_P 504
661 #define QGEMM_DEFAULT_R qgemm_r
662 #define CGEMM_DEFAULT_P 128
663 #define CGEMM_DEFAULT_R 1024
664 #define ZGEMM_DEFAULT_P 512
665 #define ZGEMM_DEFAULT_R zgemm_r
666 #define XGEMM_DEFAULT_P 252
667 #define XGEMM_DEFAULT_R xgemm_r
668 #define SGEMM_DEFAULT_Q 256
669 #define DGEMM_DEFAULT_Q 256
670 #define QGEMM_DEFAULT_Q 128
671 #define CGEMM_DEFAULT_Q 256
672 #define ZGEMM_DEFAULT_Q 192
673 #define XGEMM_DEFAULT_Q 128
677 #define SGEMM_DEFAULT_P 320
678 #define DGEMM_DEFAULT_P 512
679 #define CGEMM_DEFAULT_P 256
680 #define ZGEMM_DEFAULT_P 192
683 #define SGEMM_DEFAULT_Q 320
684 #define DGEMM_DEFAULT_Q 128
686 #define SGEMM_DEFAULT_Q 320
687 #define DGEMM_DEFAULT_Q 256
689 #define CGEMM_DEFAULT_Q 256
690 #define ZGEMM_DEFAULT_Q 192
692 #define SGEMM_DEFAULT_R sgemm_r
693 #define DGEMM_DEFAULT_R 13824
694 #define CGEMM_DEFAULT_R cgemm_r
695 #define ZGEMM_DEFAULT_R zgemm_r
697 #define QGEMM_DEFAULT_Q 128
698 #define QGEMM_DEFAULT_P 504
699 #define QGEMM_DEFAULT_R qgemm_r
700 #define XGEMM_DEFAULT_P 252
701 #define XGEMM_DEFAULT_R xgemm_r
702 #define XGEMM_DEFAULT_Q 128
704 #define CGEMM3M_DEFAULT_UNROLL_N 4
705 #define CGEMM3M_DEFAULT_UNROLL_M 8
706 #define ZGEMM3M_DEFAULT_UNROLL_N 4
707 #define ZGEMM3M_DEFAULT_UNROLL_M 4
709 #define CGEMM3M_DEFAULT_P 320
710 #define ZGEMM3M_DEFAULT_P 256
711 #define XGEMM3M_DEFAULT_P 112
712 #define CGEMM3M_DEFAULT_Q 320
713 #define ZGEMM3M_DEFAULT_Q 256
714 #define XGEMM3M_DEFAULT_Q 224
715 #define CGEMM3M_DEFAULT_R 12288
716 #define ZGEMM3M_DEFAULT_R 12288
717 #define XGEMM3M_DEFAULT_R 12288
728 #define GEMM_DEFAULT_OFFSET_A 0
729 #define GEMM_DEFAULT_OFFSET_B 384
730 #define GEMM_DEFAULT_ALIGN (BLASLONG)0x0ffffUL
732 #define SGEMM_DEFAULT_UNROLL_N 4
733 #define DGEMM_DEFAULT_UNROLL_N 4
734 #define QGEMM_DEFAULT_UNROLL_N 2
735 #define CGEMM_DEFAULT_UNROLL_N 2
736 #define ZGEMM_DEFAULT_UNROLL_N 2
737 #define XGEMM_DEFAULT_UNROLL_N 1
739 #define SGEMM_DEFAULT_UNROLL_M 2
740 #define DGEMM_DEFAULT_UNROLL_M 1
741 #define QGEMM_DEFAULT_UNROLL_M 2
742 #define CGEMM_DEFAULT_UNROLL_M 1
743 #define ZGEMM_DEFAULT_UNROLL_M 1
744 #define XGEMM_DEFAULT_UNROLL_M 1
746 #define SGEMM_DEFAULT_R sgemm_r
747 #define DGEMM_DEFAULT_R dgemm_r
748 #define QGEMM_DEFAULT_R qgemm_r
749 #define CGEMM_DEFAULT_R cgemm_r
750 #define ZGEMM_DEFAULT_R zgemm_r
751 #define XGEMM_DEFAULT_R xgemm_r
753 #define SGEMM_DEFAULT_P 208
754 #define DGEMM_DEFAULT_P 104
755 #define QGEMM_DEFAULT_P 56
756 #define CGEMM_DEFAULT_P 104
757 #define ZGEMM_DEFAULT_P 56
758 #define XGEMM_DEFAULT_P 28
760 #define SGEMM_DEFAULT_Q 208
761 #define DGEMM_DEFAULT_Q 208
762 #define QGEMM_DEFAULT_Q 208
763 #define CGEMM_DEFAULT_Q 208
764 #define ZGEMM_DEFAULT_Q 208
765 #define XGEMM_DEFAULT_Q 208
768 #define HAVE_EXCLUSIVE_CACHE
776 #define GEMM_DEFAULT_OFFSET_A 0
777 #define GEMM_DEFAULT_OFFSET_B 256
778 #define GEMM_DEFAULT_ALIGN (BLASLONG)0x0ffffUL
780 #define SGEMM_DEFAULT_UNROLL_N 4
781 #define DGEMM_DEFAULT_UNROLL_N 4
782 #define QGEMM_DEFAULT_UNROLL_N 2
783 #define CGEMM_DEFAULT_UNROLL_N 2
784 #define ZGEMM_DEFAULT_UNROLL_N 2
785 #define XGEMM_DEFAULT_UNROLL_N 1
787 #define SGEMM_DEFAULT_UNROLL_M 2
788 #define DGEMM_DEFAULT_UNROLL_M 1
789 #define QGEMM_DEFAULT_UNROLL_M 2
790 #define CGEMM_DEFAULT_UNROLL_M 1
791 #define ZGEMM_DEFAULT_UNROLL_M 1
792 #define XGEMM_DEFAULT_UNROLL_M 1
794 #define SGEMM_DEFAULT_R sgemm_r
795 #define DGEMM_DEFAULT_R dgemm_r
796 #define QGEMM_DEFAULT_R qgemm_r
797 #define CGEMM_DEFAULT_R cgemm_r
798 #define ZGEMM_DEFAULT_R zgemm_r
799 #define XGEMM_DEFAULT_R xgemm_r
801 #define SGEMM_DEFAULT_P 128
802 #define DGEMM_DEFAULT_P 128
803 #define QGEMM_DEFAULT_P 128
804 #define CGEMM_DEFAULT_P 128
805 #define ZGEMM_DEFAULT_P 128
806 #define XGEMM_DEFAULT_P 128
808 #define SGEMM_DEFAULT_Q 512
809 #define DGEMM_DEFAULT_Q 256
810 #define QGEMM_DEFAULT_Q 256
811 #define CGEMM_DEFAULT_Q 256
812 #define ZGEMM_DEFAULT_Q 128
813 #define XGEMM_DEFAULT_Q 128
823 #define GEMM_DEFAULT_OFFSET_A 64
824 #define GEMM_DEFAULT_OFFSET_B 256
825 #define GEMM_DEFAULT_ALIGN (BLASLONG)0x01ffffUL
828 #define SGEMM_DEFAULT_UNROLL_N 4
829 #define DGEMM_DEFAULT_UNROLL_N 4
830 #define QGEMM_DEFAULT_UNROLL_N 2
831 #define CGEMM_DEFAULT_UNROLL_N 2
832 #define ZGEMM_DEFAULT_UNROLL_N 2
833 #define XGEMM_DEFAULT_UNROLL_N 1
835 #define SGEMM_DEFAULT_UNROLL_M 4
836 #define DGEMM_DEFAULT_UNROLL_M 2
837 #define QGEMM_DEFAULT_UNROLL_M 2
838 #define CGEMM_DEFAULT_UNROLL_M 2
839 #define ZGEMM_DEFAULT_UNROLL_M 1
840 #define XGEMM_DEFAULT_UNROLL_M 1
842 #define SGEMM_DEFAULT_UNROLL_N 8
843 #define DGEMM_DEFAULT_UNROLL_N 4
844 #define QGEMM_DEFAULT_UNROLL_N 2
845 #define CGEMM_DEFAULT_UNROLL_N 4
846 #define ZGEMM_DEFAULT_UNROLL_N 2
847 #define XGEMM_DEFAULT_UNROLL_N 1
849 #define SGEMM_DEFAULT_UNROLL_M 4
850 #define DGEMM_DEFAULT_UNROLL_M 4
851 #define QGEMM_DEFAULT_UNROLL_M 2
852 #define CGEMM_DEFAULT_UNROLL_M 2
853 #define ZGEMM_DEFAULT_UNROLL_M 2
854 #define XGEMM_DEFAULT_UNROLL_M 1
857 #define SGEMM_DEFAULT_P 288
858 #define DGEMM_DEFAULT_P 288
859 #define QGEMM_DEFAULT_P 288
860 #define CGEMM_DEFAULT_P 288
861 #define ZGEMM_DEFAULT_P 288
862 #define XGEMM_DEFAULT_P 288
864 #define SGEMM_DEFAULT_R sgemm_r
865 #define DGEMM_DEFAULT_R dgemm_r
866 #define QGEMM_DEFAULT_R qgemm_r
867 #define CGEMM_DEFAULT_R cgemm_r
868 #define ZGEMM_DEFAULT_R zgemm_r
869 #define XGEMM_DEFAULT_R xgemm_r
871 #define SGEMM_DEFAULT_Q 256
872 #define DGEMM_DEFAULT_Q 128
873 #define QGEMM_DEFAULT_Q 64
874 #define CGEMM_DEFAULT_Q 128
875 #define ZGEMM_DEFAULT_Q 64
876 #define XGEMM_DEFAULT_Q 32
879 #define HAVE_EXCLUSIVE_CACHE
883 #if defined(PENTIUM) || defined(PENTIUM2) || defined(PENTIUM3)
892 #define GEMM_DEFAULT_OFFSET_A 0
893 #define GEMM_DEFAULT_OFFSET_B 0
894 #define GEMM_DEFAULT_ALIGN (BLASLONG)0x0ffffUL
897 #define SGEMM_DEFAULT_UNROLL_M 8
898 #define CGEMM_DEFAULT_UNROLL_M 4
900 #define SGEMM_DEFAULT_UNROLL_M 4
901 #define CGEMM_DEFAULT_UNROLL_M 2
903 #define DGEMM_DEFAULT_UNROLL_M 2
904 #define SGEMM_DEFAULT_UNROLL_N 2
905 #define DGEMM_DEFAULT_UNROLL_N 2
906 #define QGEMM_DEFAULT_UNROLL_M 2
907 #define QGEMM_DEFAULT_UNROLL_N 2
908 #define CGEMM_DEFAULT_UNROLL_N 1
909 #define ZGEMM_DEFAULT_UNROLL_M 1
910 #define ZGEMM_DEFAULT_UNROLL_N 1
911 #define XGEMM_DEFAULT_UNROLL_M 1
912 #define XGEMM_DEFAULT_UNROLL_N 1
914 #define SGEMM_DEFAULT_P sgemm_p
915 #define SGEMM_DEFAULT_Q 256
916 #define SGEMM_DEFAULT_R sgemm_r
918 #define DGEMM_DEFAULT_P dgemm_p
919 #define DGEMM_DEFAULT_Q 256
920 #define DGEMM_DEFAULT_R dgemm_r
922 #define QGEMM_DEFAULT_P qgemm_p
923 #define QGEMM_DEFAULT_Q 256
924 #define QGEMM_DEFAULT_R qgemm_r
926 #define CGEMM_DEFAULT_P cgemm_p
927 #define CGEMM_DEFAULT_Q 256
928 #define CGEMM_DEFAULT_R cgemm_r
930 #define ZGEMM_DEFAULT_P zgemm_p
931 #define ZGEMM_DEFAULT_Q 256
932 #define ZGEMM_DEFAULT_R zgemm_r
934 #define XGEMM_DEFAULT_P xgemm_p
935 #define XGEMM_DEFAULT_Q 256
936 #define XGEMM_DEFAULT_R xgemm_r
947 #define GEMM_DEFAULT_OFFSET_A 0
948 #define GEMM_DEFAULT_OFFSET_B 0
949 #define GEMM_DEFAULT_ALIGN (BLASLONG)0x0ffffUL
952 #define SGEMM_DEFAULT_UNROLL_M 4
953 #define SGEMM_DEFAULT_UNROLL_N 4
954 #define DGEMM_DEFAULT_UNROLL_M 2
955 #define DGEMM_DEFAULT_UNROLL_N 4
956 #define QGEMM_DEFAULT_UNROLL_M 2
957 #define QGEMM_DEFAULT_UNROLL_N 2
958 #define CGEMM_DEFAULT_UNROLL_M 2
959 #define CGEMM_DEFAULT_UNROLL_N 2
960 #define ZGEMM_DEFAULT_UNROLL_M 1
961 #define ZGEMM_DEFAULT_UNROLL_N 2
962 #define XGEMM_DEFAULT_UNROLL_M 1
963 #define XGEMM_DEFAULT_UNROLL_N 1
965 #define SGEMM_DEFAULT_UNROLL_M 8
966 #define SGEMM_DEFAULT_UNROLL_N 2
967 #define DGEMM_DEFAULT_UNROLL_M 2
968 #define DGEMM_DEFAULT_UNROLL_N 2
969 #define QGEMM_DEFAULT_UNROLL_M 2
970 #define QGEMM_DEFAULT_UNROLL_N 2
971 #define CGEMM_DEFAULT_UNROLL_M 4
972 #define CGEMM_DEFAULT_UNROLL_N 1
973 #define ZGEMM_DEFAULT_UNROLL_M 1
974 #define ZGEMM_DEFAULT_UNROLL_N 1
975 #define XGEMM_DEFAULT_UNROLL_M 1
976 #define XGEMM_DEFAULT_UNROLL_N 1
980 #define SGEMM_DEFAULT_P sgemm_p
981 #define SGEMM_DEFAULT_Q 256
982 #define SGEMM_DEFAULT_R sgemm_r
984 #define DGEMM_DEFAULT_P dgemm_p
985 #define DGEMM_DEFAULT_Q 256
986 #define DGEMM_DEFAULT_R dgemm_r
988 #define QGEMM_DEFAULT_P qgemm_p
989 #define QGEMM_DEFAULT_Q 256
990 #define QGEMM_DEFAULT_R qgemm_r
992 #define CGEMM_DEFAULT_P cgemm_p
993 #define CGEMM_DEFAULT_Q 256
994 #define CGEMM_DEFAULT_R cgemm_r
996 #define ZGEMM_DEFAULT_P zgemm_p
997 #define ZGEMM_DEFAULT_Q 256
998 #define ZGEMM_DEFAULT_R zgemm_r
1000 #define XGEMM_DEFAULT_P xgemm_p
1001 #define XGEMM_DEFAULT_Q 256
1002 #define XGEMM_DEFAULT_R xgemm_r
1007 #ifdef CORE_NORTHWOOD
1012 #define GEMM_DEFAULT_OFFSET_A 0
1013 #define GEMM_DEFAULT_OFFSET_B 32
1015 #define GEMM_DEFAULT_ALIGN (BLASLONG)0x0ffffUL
1019 #define SGEMM_DEFAULT_UNROLL_M 8
1020 #define DGEMM_DEFAULT_UNROLL_M 4
1021 #define QGEMM_DEFAULT_UNROLL_M 2
1022 #define CGEMM_DEFAULT_UNROLL_M 4
1023 #define ZGEMM_DEFAULT_UNROLL_M 2
1024 #define XGEMM_DEFAULT_UNROLL_M 1
1026 #define SGEMM_DEFAULT_UNROLL_N 2
1027 #define DGEMM_DEFAULT_UNROLL_N 2
1028 #define QGEMM_DEFAULT_UNROLL_N 2
1029 #define CGEMM_DEFAULT_UNROLL_N 1
1030 #define ZGEMM_DEFAULT_UNROLL_N 1
1031 #define XGEMM_DEFAULT_UNROLL_N 1
1033 #define SGEMM_DEFAULT_P sgemm_p
1034 #define SGEMM_DEFAULT_R sgemm_r
1036 #define DGEMM_DEFAULT_P dgemm_p
1037 #define DGEMM_DEFAULT_R dgemm_r
1039 #define QGEMM_DEFAULT_P qgemm_p
1040 #define QGEMM_DEFAULT_R qgemm_r
1042 #define CGEMM_DEFAULT_P cgemm_p
1043 #define CGEMM_DEFAULT_R cgemm_r
1045 #define ZGEMM_DEFAULT_P zgemm_p
1046 #define ZGEMM_DEFAULT_R zgemm_r
1048 #define XGEMM_DEFAULT_P xgemm_p
1049 #define XGEMM_DEFAULT_R xgemm_r
1051 #define SGEMM_DEFAULT_Q 128
1052 #define DGEMM_DEFAULT_Q 128
1053 #define QGEMM_DEFAULT_Q 128
1054 #define CGEMM_DEFAULT_Q 128
1055 #define ZGEMM_DEFAULT_Q 128
1056 #define XGEMM_DEFAULT_Q 128
1059 #ifdef CORE_PRESCOTT
1065 #define GEMM_DEFAULT_OFFSET_A 128
1066 #define GEMM_DEFAULT_OFFSET_B 192
1068 #define GEMM_DEFAULT_OFFSET_A 0
1069 #define GEMM_DEFAULT_OFFSET_B 256
1072 #define GEMM_DEFAULT_ALIGN (BLASLONG)0x0ffffUL
1077 #define SGEMM_DEFAULT_UNROLL_M 4
1078 #define DGEMM_DEFAULT_UNROLL_M 2
1079 #define QGEMM_DEFAULT_UNROLL_M 2
1080 #define CGEMM_DEFAULT_UNROLL_M 2
1081 #define ZGEMM_DEFAULT_UNROLL_M 1
1082 #define XGEMM_DEFAULT_UNROLL_M 1
1084 #define SGEMM_DEFAULT_UNROLL_M 8
1085 #define DGEMM_DEFAULT_UNROLL_M 4
1086 #define QGEMM_DEFAULT_UNROLL_M 2
1087 #define CGEMM_DEFAULT_UNROLL_M 4
1088 #define ZGEMM_DEFAULT_UNROLL_M 2
1089 #define XGEMM_DEFAULT_UNROLL_M 1
1092 #define SGEMM_DEFAULT_UNROLL_N 4
1093 #define DGEMM_DEFAULT_UNROLL_N 4
1094 #define QGEMM_DEFAULT_UNROLL_N 2
1095 #define CGEMM_DEFAULT_UNROLL_N 2
1096 #define ZGEMM_DEFAULT_UNROLL_N 2
1097 #define XGEMM_DEFAULT_UNROLL_N 1
1099 #define SGEMM_DEFAULT_P sgemm_p
1100 #define SGEMM_DEFAULT_R sgemm_r
1102 #define DGEMM_DEFAULT_P dgemm_p
1103 #define DGEMM_DEFAULT_R dgemm_r
1105 #define QGEMM_DEFAULT_P qgemm_p
1106 #define QGEMM_DEFAULT_R qgemm_r
1108 #define CGEMM_DEFAULT_P cgemm_p
1109 #define CGEMM_DEFAULT_R cgemm_r
1111 #define ZGEMM_DEFAULT_P zgemm_p
1112 #define ZGEMM_DEFAULT_R zgemm_r
1114 #define XGEMM_DEFAULT_P xgemm_p
1115 #define XGEMM_DEFAULT_R xgemm_r
1117 #define SGEMM_DEFAULT_Q 128
1118 #define DGEMM_DEFAULT_Q 128
1119 #define QGEMM_DEFAULT_Q 128
1120 #define CGEMM_DEFAULT_Q 128
1121 #define ZGEMM_DEFAULT_Q 128
1122 #define XGEMM_DEFAULT_Q 128
1130 #define GEMM_DEFAULT_OFFSET_A 448
1131 #define GEMM_DEFAULT_OFFSET_B 128
1132 #define GEMM_DEFAULT_ALIGN (BLASLONG)0x03fffUL
1136 #define SWITCH_RATIO 4
1139 #define SGEMM_DEFAULT_UNROLL_M 8
1140 #define DGEMM_DEFAULT_UNROLL_M 4
1141 #define QGEMM_DEFAULT_UNROLL_M 2
1142 #define CGEMM_DEFAULT_UNROLL_M 4
1143 #define ZGEMM_DEFAULT_UNROLL_M 2
1144 #define XGEMM_DEFAULT_UNROLL_M 1
1146 #define SGEMM_DEFAULT_UNROLL_N 2
1147 #define DGEMM_DEFAULT_UNROLL_N 2
1148 #define QGEMM_DEFAULT_UNROLL_N 2
1149 #define CGEMM_DEFAULT_UNROLL_N 1
1150 #define ZGEMM_DEFAULT_UNROLL_N 1
1151 #define XGEMM_DEFAULT_UNROLL_N 1
1153 #define MASK(a, b) ((((a) + (b) - 1) / (b)) * (b))
1156 #define SGEMM_DEFAULT_UNROLL_M 8
1157 #define DGEMM_DEFAULT_UNROLL_M 4
1158 #define QGEMM_DEFAULT_UNROLL_M 2
1159 #define CGEMM_DEFAULT_UNROLL_M 4
1160 #define ZGEMM_DEFAULT_UNROLL_M 2
1161 #define XGEMM_DEFAULT_UNROLL_M 1
1163 #define SGEMM_DEFAULT_UNROLL_N 4
1164 #define DGEMM_DEFAULT_UNROLL_N 4
1165 #define QGEMM_DEFAULT_UNROLL_N 2
1166 #define CGEMM_DEFAULT_UNROLL_N 2
1167 #define ZGEMM_DEFAULT_UNROLL_N 2
1168 #define XGEMM_DEFAULT_UNROLL_N 1
1171 #define SGEMM_DEFAULT_P sgemm_p
1172 #define SGEMM_DEFAULT_R sgemm_r
1174 #define DGEMM_DEFAULT_P dgemm_p
1175 #define DGEMM_DEFAULT_R dgemm_r
1177 #define QGEMM_DEFAULT_P qgemm_p
1178 #define QGEMM_DEFAULT_R qgemm_r
1180 #define CGEMM_DEFAULT_P cgemm_p
1181 #define CGEMM_DEFAULT_R cgemm_r
1183 #define ZGEMM_DEFAULT_P zgemm_p
1184 #define ZGEMM_DEFAULT_R zgemm_r
1186 #define XGEMM_DEFAULT_P xgemm_p
1187 #define XGEMM_DEFAULT_R xgemm_r
1189 #define SGEMM_DEFAULT_Q 256
1190 #define DGEMM_DEFAULT_Q 256
1191 #define QGEMM_DEFAULT_Q 256
1192 #define CGEMM_DEFAULT_Q 256
1193 #define ZGEMM_DEFAULT_Q 256
1194 #define XGEMM_DEFAULT_Q 256
1203 #define GEMM_DEFAULT_OFFSET_A 128
1204 #define GEMM_DEFAULT_OFFSET_B 0
1205 #define GEMM_DEFAULT_ALIGN (BLASLONG)0x03fffUL
1209 #define SWITCH_RATIO 4
1212 #define SGEMM_DEFAULT_UNROLL_M 4
1213 #define DGEMM_DEFAULT_UNROLL_M 2
1214 #define QGEMM_DEFAULT_UNROLL_M 2
1215 #define CGEMM_DEFAULT_UNROLL_M 2
1216 #define ZGEMM_DEFAULT_UNROLL_M 1
1217 #define XGEMM_DEFAULT_UNROLL_M 1
1219 #define SGEMM_DEFAULT_UNROLL_N 4
1220 #define DGEMM_DEFAULT_UNROLL_N 4
1221 #define QGEMM_DEFAULT_UNROLL_N 2
1222 #define CGEMM_DEFAULT_UNROLL_N 2
1223 #define ZGEMM_DEFAULT_UNROLL_N 2
1224 #define XGEMM_DEFAULT_UNROLL_N 1
1226 #define SGEMM_DEFAULT_UNROLL_M 8
1227 #define DGEMM_DEFAULT_UNROLL_M 4
1228 #define QGEMM_DEFAULT_UNROLL_M 2
1229 #define CGEMM_DEFAULT_UNROLL_M 4
1230 #define ZGEMM_DEFAULT_UNROLL_M 2
1231 #define XGEMM_DEFAULT_UNROLL_M 1
1233 #define SGEMM_DEFAULT_UNROLL_N 4
1234 #define DGEMM_DEFAULT_UNROLL_N 4
1235 #define QGEMM_DEFAULT_UNROLL_N 2
1236 #define CGEMM_DEFAULT_UNROLL_N 2
1237 #define ZGEMM_DEFAULT_UNROLL_N 2
1238 #define XGEMM_DEFAULT_UNROLL_N 1
1241 #define SGEMM_DEFAULT_P sgemm_p
1242 #define SGEMM_DEFAULT_R sgemm_r
1244 #define DGEMM_DEFAULT_P dgemm_p
1245 #define DGEMM_DEFAULT_R dgemm_r
1247 #define QGEMM_DEFAULT_P qgemm_p
1248 #define QGEMM_DEFAULT_R qgemm_r
1250 #define CGEMM_DEFAULT_P cgemm_p
1251 #define CGEMM_DEFAULT_R cgemm_r
1253 #define ZGEMM_DEFAULT_P zgemm_p
1254 #define ZGEMM_DEFAULT_R zgemm_r
1256 #define XGEMM_DEFAULT_P xgemm_p
1257 #define XGEMM_DEFAULT_R xgemm_r
1259 #define SGEMM_DEFAULT_Q 512
1260 #define DGEMM_DEFAULT_Q 256
1261 #define QGEMM_DEFAULT_Q 128
1262 #define CGEMM_DEFAULT_Q 512
1263 #define ZGEMM_DEFAULT_Q 256
1264 #define XGEMM_DEFAULT_Q 128
1266 #define GETRF_FACTOR 0.75
1274 #define GEMM_DEFAULT_OFFSET_A 128
1275 #define GEMM_DEFAULT_OFFSET_B 0
1276 #define GEMM_DEFAULT_ALIGN (BLASLONG)0x03fffUL
1280 #define SWITCH_RATIO 4
1283 #define SGEMM_DEFAULT_UNROLL_M 4
1284 #define DGEMM_DEFAULT_UNROLL_M 2
1285 #define QGEMM_DEFAULT_UNROLL_M 2
1286 #define CGEMM_DEFAULT_UNROLL_M 2
1287 #define ZGEMM_DEFAULT_UNROLL_M 1
1288 #define XGEMM_DEFAULT_UNROLL_M 1
1290 #define SGEMM_DEFAULT_UNROLL_N 4
1291 #define DGEMM_DEFAULT_UNROLL_N 4
1292 #define QGEMM_DEFAULT_UNROLL_N 2
1293 #define CGEMM_DEFAULT_UNROLL_N 2
1294 #define ZGEMM_DEFAULT_UNROLL_N 2
1295 #define XGEMM_DEFAULT_UNROLL_N 1
1297 #define SGEMM_DEFAULT_UNROLL_M 8
1298 #define DGEMM_DEFAULT_UNROLL_M 4
1299 #define QGEMM_DEFAULT_UNROLL_M 2
1300 #define CGEMM_DEFAULT_UNROLL_M 4
1301 #define ZGEMM_DEFAULT_UNROLL_M 2
1302 #define XGEMM_DEFAULT_UNROLL_M 1
1304 #define SGEMM_DEFAULT_UNROLL_N 4
1305 #define DGEMM_DEFAULT_UNROLL_N 4
1306 #define QGEMM_DEFAULT_UNROLL_N 2
1307 #define CGEMM_DEFAULT_UNROLL_N 2
1308 #define ZGEMM_DEFAULT_UNROLL_N 2
1309 #define XGEMM_DEFAULT_UNROLL_N 1
1312 #define SGEMM_DEFAULT_P sgemm_p
1313 #define SGEMM_DEFAULT_R sgemm_r
1315 #define DGEMM_DEFAULT_P dgemm_p
1316 #define DGEMM_DEFAULT_R dgemm_r
1318 #define QGEMM_DEFAULT_P qgemm_p
1319 #define QGEMM_DEFAULT_R qgemm_r
1321 #define CGEMM_DEFAULT_P cgemm_p
1322 #define CGEMM_DEFAULT_R cgemm_r
1324 #define ZGEMM_DEFAULT_P zgemm_p
1325 #define ZGEMM_DEFAULT_R zgemm_r
1327 #define XGEMM_DEFAULT_P xgemm_p
1328 #define XGEMM_DEFAULT_R xgemm_r
1330 #define SGEMM_DEFAULT_Q 768
1331 #define DGEMM_DEFAULT_Q 384
1332 #define QGEMM_DEFAULT_Q 192
1333 #define CGEMM_DEFAULT_Q 768
1334 #define ZGEMM_DEFAULT_Q 384
1335 #define XGEMM_DEFAULT_Q 192
1337 #define GETRF_FACTOR 0.75
1338 #define GEMM_THREAD gemm_thread_mn
1346 #define GEMM_DEFAULT_OFFSET_A 32
1347 #define GEMM_DEFAULT_OFFSET_B 0
1348 #define GEMM_DEFAULT_ALIGN (BLASLONG)0x03fffUL
1352 #define SWITCH_RATIO 4
1355 #define SGEMM_DEFAULT_UNROLL_M 4
1356 #define DGEMM_DEFAULT_UNROLL_M 2
1357 #define QGEMM_DEFAULT_UNROLL_M 2
1358 #define CGEMM_DEFAULT_UNROLL_M 2
1359 #define ZGEMM_DEFAULT_UNROLL_M 1
1360 #define XGEMM_DEFAULT_UNROLL_M 1
1362 #define SGEMM_DEFAULT_UNROLL_N 4
1363 #define DGEMM_DEFAULT_UNROLL_N 4
1364 #define QGEMM_DEFAULT_UNROLL_N 2
1365 #define CGEMM_DEFAULT_UNROLL_N 2
1366 #define ZGEMM_DEFAULT_UNROLL_N 2
1367 #define XGEMM_DEFAULT_UNROLL_N 1
1369 #define SGEMM_DEFAULT_UNROLL_M 4
1370 #define DGEMM_DEFAULT_UNROLL_M 2
1371 #define QGEMM_DEFAULT_UNROLL_M 2
1372 #define CGEMM_DEFAULT_UNROLL_M 2
1373 #define ZGEMM_DEFAULT_UNROLL_M 1
1374 #define XGEMM_DEFAULT_UNROLL_M 1
1376 #define SGEMM_DEFAULT_UNROLL_N 8
1377 #define DGEMM_DEFAULT_UNROLL_N 8
1378 #define QGEMM_DEFAULT_UNROLL_N 2
1379 #define CGEMM_DEFAULT_UNROLL_N 4
1380 #define ZGEMM_DEFAULT_UNROLL_N 4
1381 #define XGEMM_DEFAULT_UNROLL_N 1
1384 #define SGEMM_DEFAULT_P 504
1385 #define SGEMM_DEFAULT_R sgemm_r
1387 #define DGEMM_DEFAULT_P 504
1388 #define DGEMM_DEFAULT_R dgemm_r
1390 #define QGEMM_DEFAULT_P 504
1391 #define QGEMM_DEFAULT_R qgemm_r
1393 #define CGEMM_DEFAULT_P 252
1394 #define CGEMM_DEFAULT_R cgemm_r
1396 #define ZGEMM_DEFAULT_P 252
1397 #define ZGEMM_DEFAULT_R zgemm_r
1399 #define XGEMM_DEFAULT_P 252
1400 #define XGEMM_DEFAULT_R xgemm_r
1402 #define SGEMM_DEFAULT_Q 512
1403 #define DGEMM_DEFAULT_Q 256
1404 #define QGEMM_DEFAULT_Q 128
1405 #define CGEMM_DEFAULT_Q 512
1406 #define ZGEMM_DEFAULT_Q 256
1407 #define XGEMM_DEFAULT_Q 128
1409 #define GETRF_FACTOR 0.72
1419 #define GEMM_DEFAULT_OFFSET_A 0
1420 #define GEMM_DEFAULT_OFFSET_B 0
1421 #define GEMM_DEFAULT_ALIGN (BLASLONG)0x03fffUL
1425 #define SWITCH_RATIO 4
1428 #define SGEMM_DEFAULT_UNROLL_M 4
1429 #define DGEMM_DEFAULT_UNROLL_M 2
1430 #define QGEMM_DEFAULT_UNROLL_M 2
1431 #define CGEMM_DEFAULT_UNROLL_M 2
1432 #define ZGEMM_DEFAULT_UNROLL_M 1
1433 #define XGEMM_DEFAULT_UNROLL_M 1
1435 #define SGEMM_DEFAULT_UNROLL_N 4
1436 #define DGEMM_DEFAULT_UNROLL_N 4
1437 #define QGEMM_DEFAULT_UNROLL_N 2
1438 #define CGEMM_DEFAULT_UNROLL_N 2
1439 #define ZGEMM_DEFAULT_UNROLL_N 2
1440 #define XGEMM_DEFAULT_UNROLL_N 1
1442 #define SGEMM_DEFAULT_UNROLL_M 16
1443 #define DGEMM_DEFAULT_UNROLL_M 8
1444 #define QGEMM_DEFAULT_UNROLL_M 2
1445 #define CGEMM_DEFAULT_UNROLL_M 8
1446 #define ZGEMM_DEFAULT_UNROLL_M 1
1447 #define XGEMM_DEFAULT_UNROLL_M 1
1449 #define SGEMM_DEFAULT_UNROLL_N 4
1450 #define DGEMM_DEFAULT_UNROLL_N 4
1451 #define QGEMM_DEFAULT_UNROLL_N 2
1452 #define CGEMM_DEFAULT_UNROLL_N 2
1453 #define ZGEMM_DEFAULT_UNROLL_N 4
1454 #define XGEMM_DEFAULT_UNROLL_N 1
1457 #define SGEMM_DEFAULT_P 768
1458 #define SGEMM_DEFAULT_R sgemm_r
1459 /*#define SGEMM_DEFAULT_R 1024*/
1461 #define DGEMM_DEFAULT_P 512
1462 #define DGEMM_DEFAULT_R dgemm_r
1463 /*#define DGEMM_DEFAULT_R 1024*/
1465 #define QGEMM_DEFAULT_P 504
1466 #define QGEMM_DEFAULT_R qgemm_r
1468 #define CGEMM_DEFAULT_P 768
1469 #define CGEMM_DEFAULT_R cgemm_r
1470 /*#define CGEMM_DEFAULT_R 1024*/
1472 #define ZGEMM_DEFAULT_P 512
1473 #define ZGEMM_DEFAULT_R zgemm_r
1474 /*#define ZGEMM_DEFAULT_R 1024*/
1476 #define XGEMM_DEFAULT_P 252
1477 #define XGEMM_DEFAULT_R xgemm_r
1479 #define SGEMM_DEFAULT_Q 384
1480 #define DGEMM_DEFAULT_Q 256
1481 #define QGEMM_DEFAULT_Q 128
1482 #define CGEMM_DEFAULT_Q 512
1483 #define ZGEMM_DEFAULT_Q 192
1484 #define XGEMM_DEFAULT_Q 128
1486 #define CGEMM3M_DEFAULT_UNROLL_N 8
1487 #define CGEMM3M_DEFAULT_UNROLL_M 4
1488 #define ZGEMM3M_DEFAULT_UNROLL_N 8
1489 #define ZGEMM3M_DEFAULT_UNROLL_M 2
1491 #define CGEMM3M_DEFAULT_P 448
1492 #define ZGEMM3M_DEFAULT_P 224
1493 #define XGEMM3M_DEFAULT_P 112
1494 #define CGEMM3M_DEFAULT_Q 224
1495 #define ZGEMM3M_DEFAULT_Q 224
1496 #define XGEMM3M_DEFAULT_Q 224
1497 #define CGEMM3M_DEFAULT_R 12288
1498 #define ZGEMM3M_DEFAULT_R 12288
1499 #define XGEMM3M_DEFAULT_R 12288
1503 #define GETRF_FACTOR 0.72
1512 #define GEMM_DEFAULT_OFFSET_A 0
1513 #define GEMM_DEFAULT_OFFSET_B 0
1514 #define GEMM_DEFAULT_ALIGN (BLASLONG)0x03fffUL
1518 #if defined(XDOUBLE) || defined(DOUBLE)
1519 #define SWITCH_RATIO 4
1520 #define GEMM_PREFERED_SIZE 4
1522 #define SWITCH_RATIO 8
1523 #define GEMM_PREFERED_SIZE 8
1528 #define SGEMM_DEFAULT_UNROLL_M 4
1529 #define DGEMM_DEFAULT_UNROLL_M 2
1530 #define QGEMM_DEFAULT_UNROLL_M 2
1531 #define CGEMM_DEFAULT_UNROLL_M 2
1532 #define ZGEMM_DEFAULT_UNROLL_M 1
1533 #define XGEMM_DEFAULT_UNROLL_M 1
1535 #define SGEMM_DEFAULT_UNROLL_N 4
1536 #define DGEMM_DEFAULT_UNROLL_N 4
1537 #define QGEMM_DEFAULT_UNROLL_N 2
1538 #define CGEMM_DEFAULT_UNROLL_N 2
1539 #define ZGEMM_DEFAULT_UNROLL_N 2
1540 #define XGEMM_DEFAULT_UNROLL_N 1
1544 #define SGEMM_DEFAULT_UNROLL_M 8
1545 #define DGEMM_DEFAULT_UNROLL_M 4
1546 #define QGEMM_DEFAULT_UNROLL_M 2
1547 #define CGEMM_DEFAULT_UNROLL_M 8
1548 #define ZGEMM_DEFAULT_UNROLL_M 4
1549 #define XGEMM_DEFAULT_UNROLL_M 1
1551 #define SGEMM_DEFAULT_UNROLL_N 4
1552 #define DGEMM_DEFAULT_UNROLL_N 8
1553 #define QGEMM_DEFAULT_UNROLL_N 2
1554 #define CGEMM_DEFAULT_UNROLL_N 2
1555 #define ZGEMM_DEFAULT_UNROLL_N 2
1556 #define XGEMM_DEFAULT_UNROLL_N 1
1558 #define SGEMM_DEFAULT_UNROLL_MN 32
1559 #define DGEMM_DEFAULT_UNROLL_MN 32
1565 #define SGEMM_DEFAULT_P 512
1566 #define SGEMM_DEFAULT_R sgemm_r
1567 #define DGEMM_DEFAULT_P 512
1568 #define DGEMM_DEFAULT_R dgemm_r
1569 #define QGEMM_DEFAULT_P 504
1570 #define QGEMM_DEFAULT_R qgemm_r
1571 #define CGEMM_DEFAULT_P 128
1572 #define CGEMM_DEFAULT_R 1024
1573 #define ZGEMM_DEFAULT_P 512
1574 #define ZGEMM_DEFAULT_R zgemm_r
1575 #define XGEMM_DEFAULT_P 252
1576 #define XGEMM_DEFAULT_R xgemm_r
1577 #define SGEMM_DEFAULT_Q 256
1578 #define DGEMM_DEFAULT_Q 256
1579 #define QGEMM_DEFAULT_Q 128
1580 #define CGEMM_DEFAULT_Q 256
1581 #define ZGEMM_DEFAULT_Q 192
1582 #define XGEMM_DEFAULT_Q 128
1586 #define SGEMM_DEFAULT_P 320
1587 #define DGEMM_DEFAULT_P 512
1588 #define CGEMM_DEFAULT_P 256
1589 #define ZGEMM_DEFAULT_P 192
1592 #define SGEMM_DEFAULT_Q 320
1593 #define DGEMM_DEFAULT_Q 128
1595 #define SGEMM_DEFAULT_Q 320
1596 #define DGEMM_DEFAULT_Q 256
1598 #define CGEMM_DEFAULT_Q 256
1599 #define ZGEMM_DEFAULT_Q 192
1601 #define SGEMM_DEFAULT_R sgemm_r
1602 #define DGEMM_DEFAULT_R 13824
1603 #define CGEMM_DEFAULT_R cgemm_r
1604 #define ZGEMM_DEFAULT_R zgemm_r
1606 #define QGEMM_DEFAULT_Q 128
1607 #define QGEMM_DEFAULT_P 504
1608 #define QGEMM_DEFAULT_R qgemm_r
1609 #define XGEMM_DEFAULT_P 252
1610 #define XGEMM_DEFAULT_R xgemm_r
1611 #define XGEMM_DEFAULT_Q 128
1613 #define CGEMM3M_DEFAULT_UNROLL_N 4
1614 #define CGEMM3M_DEFAULT_UNROLL_M 8
1615 #define ZGEMM3M_DEFAULT_UNROLL_N 4
1616 #define ZGEMM3M_DEFAULT_UNROLL_M 4
1618 #define CGEMM3M_DEFAULT_P 320
1619 #define ZGEMM3M_DEFAULT_P 256
1620 #define XGEMM3M_DEFAULT_P 112
1621 #define CGEMM3M_DEFAULT_Q 320
1622 #define ZGEMM3M_DEFAULT_Q 256
1623 #define XGEMM3M_DEFAULT_Q 224
1624 #define CGEMM3M_DEFAULT_R 12288
1625 #define ZGEMM3M_DEFAULT_R 12288
1626 #define XGEMM3M_DEFAULT_R 12288
1638 #define GEMM_DEFAULT_OFFSET_A 0
1639 #define GEMM_DEFAULT_OFFSET_B 0
1640 #define GEMM_DEFAULT_ALIGN (BLASLONG)0x03fffUL
1644 #if defined(XDOUBLE) || defined(DOUBLE)
1645 #define SWITCH_RATIO 8
1646 #define GEMM_PREFERED_SIZE 8
1648 #define SWITCH_RATIO 16
1649 #define GEMM_PREFERED_SIZE 16
1651 #define USE_SGEMM_KERNEL_DIRECT 1
1655 #define SGEMM_DEFAULT_UNROLL_M 4
1656 #define DGEMM_DEFAULT_UNROLL_M 2
1657 #define QGEMM_DEFAULT_UNROLL_M 2
1658 #define CGEMM_DEFAULT_UNROLL_M 2
1659 #define ZGEMM_DEFAULT_UNROLL_M 1
1660 #define XGEMM_DEFAULT_UNROLL_M 1
1662 #define SGEMM_DEFAULT_UNROLL_N 4
1663 #define DGEMM_DEFAULT_UNROLL_N 4
1664 #define QGEMM_DEFAULT_UNROLL_N 2
1665 #define CGEMM_DEFAULT_UNROLL_N 2
1666 #define ZGEMM_DEFAULT_UNROLL_N 2
1667 #define XGEMM_DEFAULT_UNROLL_N 1
1671 #define SGEMM_DEFAULT_UNROLL_M 16
1672 #define DGEMM_DEFAULT_UNROLL_M 16
1673 #define QGEMM_DEFAULT_UNROLL_M 2
1674 #define CGEMM_DEFAULT_UNROLL_M 8
1675 #define ZGEMM_DEFAULT_UNROLL_M 4
1676 #define XGEMM_DEFAULT_UNROLL_M 1
1678 #define SGEMM_DEFAULT_UNROLL_N 4
1679 #define DGEMM_DEFAULT_UNROLL_N 2
1680 #define QGEMM_DEFAULT_UNROLL_N 2
1681 #define CGEMM_DEFAULT_UNROLL_N 2
1682 #define ZGEMM_DEFAULT_UNROLL_N 2
1683 #define XGEMM_DEFAULT_UNROLL_N 1
1685 #define SGEMM_DEFAULT_UNROLL_MN 32
1686 #define DGEMM_DEFAULT_UNROLL_MN 32
1691 #define SGEMM_DEFAULT_P 512
1692 #define SGEMM_DEFAULT_R sgemm_r
1693 #define DGEMM_DEFAULT_P 512
1694 #define DGEMM_DEFAULT_R dgemm_r
1695 #define QGEMM_DEFAULT_P 504
1696 #define QGEMM_DEFAULT_R qgemm_r
1697 #define CGEMM_DEFAULT_P 128
1698 #define CGEMM_DEFAULT_R 1024
1699 #define ZGEMM_DEFAULT_P 512
1700 #define ZGEMM_DEFAULT_R zgemm_r
1701 #define XGEMM_DEFAULT_P 252
1702 #define XGEMM_DEFAULT_R xgemm_r
1703 #define SGEMM_DEFAULT_Q 256
1704 #define DGEMM_DEFAULT_Q 256
1705 #define QGEMM_DEFAULT_Q 128
1706 #define CGEMM_DEFAULT_Q 256
1707 #define ZGEMM_DEFAULT_Q 192
1708 #define XGEMM_DEFAULT_Q 128
1712 #define SGEMM_DEFAULT_P 448
1713 #define DGEMM_DEFAULT_P 192
1714 #define CGEMM_DEFAULT_P 384
1715 #define ZGEMM_DEFAULT_P 256
1717 #define SGEMM_DEFAULT_Q 448
1718 #define DGEMM_DEFAULT_Q 384
1719 #define CGEMM_DEFAULT_Q 192
1720 #define ZGEMM_DEFAULT_Q 128
1722 #define SGEMM_DEFAULT_R sgemm_r
1723 #define DGEMM_DEFAULT_R 8640
1724 #define CGEMM_DEFAULT_R cgemm_r
1725 #define ZGEMM_DEFAULT_R zgemm_r
1727 #define QGEMM_DEFAULT_Q 128
1728 #define QGEMM_DEFAULT_P 504
1729 #define QGEMM_DEFAULT_R qgemm_r
1730 #define XGEMM_DEFAULT_P 252
1731 #define XGEMM_DEFAULT_R xgemm_r
1732 #define XGEMM_DEFAULT_Q 128
1734 #define CGEMM3M_DEFAULT_UNROLL_N 4
1735 #define CGEMM3M_DEFAULT_UNROLL_M 8
1736 #define ZGEMM3M_DEFAULT_UNROLL_N 4
1737 #define ZGEMM3M_DEFAULT_UNROLL_M 4
1739 #define CGEMM3M_DEFAULT_P 320
1740 #define ZGEMM3M_DEFAULT_P 256
1741 #define XGEMM3M_DEFAULT_P 112
1742 #define CGEMM3M_DEFAULT_Q 320
1743 #define ZGEMM3M_DEFAULT_Q 256
1744 #define XGEMM3M_DEFAULT_Q 224
1745 #define CGEMM3M_DEFAULT_R 12288
1746 #define ZGEMM3M_DEFAULT_R 12288
1747 #define XGEMM3M_DEFAULT_R 12288
1754 #ifdef SAPPHIRERAPIDS
1759 #define GEMM_DEFAULT_OFFSET_A 0
1760 #define GEMM_DEFAULT_OFFSET_B 0
1761 #define GEMM_DEFAULT_ALIGN 0x03fffUL
1765 #if defined(XDOUBLE) || defined(DOUBLE)
1766 #define SWITCH_RATIO 8
1767 #define GEMM_PREFERED_SIZE 8
1769 #define SWITCH_RATIO 16
1770 #define GEMM_PREFERED_SIZE 16
1772 #define USE_SGEMM_KERNEL_DIRECT 1
1774 #undef SBGEMM_DEFAULT_UNROLL_N
1775 #undef SBGEMM_DEFAULT_UNROLL_M
1776 #undef SBGEMM_DEFAULT_P
1777 #undef SBGEMM_DEFAULT_R
1778 #undef SBGEMM_DEFAULT_Q
1779 // FIXME: actually UNROLL_M = UNROLL_N = 16
1780 // If M and N is equal, OpenBLAS will reuse OCOPY as ICOPY.
1781 // But for AMX, they are not the same, set UNROLL_M = 32 to workaround
1782 #define SBGEMM_DEFAULT_UNROLL_N 16
1783 #define SBGEMM_DEFAULT_UNROLL_M 32
1784 #define SBGEMM_DEFAULT_P 256
1785 #define SBGEMM_DEFAULT_Q 1024
1786 #define SBGEMM_DEFAULT_R sbgemm_r
1790 #define SGEMM_DEFAULT_UNROLL_M 4
1791 #define DGEMM_DEFAULT_UNROLL_M 2
1792 #define QGEMM_DEFAULT_UNROLL_M 2
1793 #define CGEMM_DEFAULT_UNROLL_M 2
1794 #define ZGEMM_DEFAULT_UNROLL_M 1
1795 #define XGEMM_DEFAULT_UNROLL_M 1
1797 #define SGEMM_DEFAULT_UNROLL_N 4
1798 #define DGEMM_DEFAULT_UNROLL_N 4
1799 #define QGEMM_DEFAULT_UNROLL_N 2
1800 #define CGEMM_DEFAULT_UNROLL_N 2
1801 #define ZGEMM_DEFAULT_UNROLL_N 2
1802 #define XGEMM_DEFAULT_UNROLL_N 1
1806 #define SGEMM_DEFAULT_UNROLL_M 16
1807 #define DGEMM_DEFAULT_UNROLL_M 16
1808 #define QGEMM_DEFAULT_UNROLL_M 2
1809 #define CGEMM_DEFAULT_UNROLL_M 8
1810 #define ZGEMM_DEFAULT_UNROLL_M 4
1811 #define XGEMM_DEFAULT_UNROLL_M 1
1813 #define SGEMM_DEFAULT_UNROLL_N 4
1814 #define DGEMM_DEFAULT_UNROLL_N 2
1815 #define QGEMM_DEFAULT_UNROLL_N 2
1816 #define CGEMM_DEFAULT_UNROLL_N 2
1817 #define ZGEMM_DEFAULT_UNROLL_N 2
1818 #define XGEMM_DEFAULT_UNROLL_N 1
1820 #define SGEMM_DEFAULT_UNROLL_MN 32
1821 #define DGEMM_DEFAULT_UNROLL_MN 32
1826 #define SGEMM_DEFAULT_P 512
1827 #define SGEMM_DEFAULT_R sgemm_r
1828 #define DGEMM_DEFAULT_P 512
1829 #define DGEMM_DEFAULT_R dgemm_r
1830 #define QGEMM_DEFAULT_P 504
1831 #define QGEMM_DEFAULT_R qgemm_r
1832 #define CGEMM_DEFAULT_P 128
1833 #define CGEMM_DEFAULT_R 1024
1834 #define ZGEMM_DEFAULT_P 512
1835 #define ZGEMM_DEFAULT_R zgemm_r
1836 #define XGEMM_DEFAULT_P 252
1837 #define XGEMM_DEFAULT_R xgemm_r
1838 #define SGEMM_DEFAULT_Q 256
1839 #define DGEMM_DEFAULT_Q 256
1840 #define QGEMM_DEFAULT_Q 128
1841 #define CGEMM_DEFAULT_Q 256
1842 #define ZGEMM_DEFAULT_Q 192
1843 #define XGEMM_DEFAULT_Q 128
1847 #define SGEMM_DEFAULT_P 640
1848 #define DGEMM_DEFAULT_P 192
1849 #define CGEMM_DEFAULT_P 384
1850 #define ZGEMM_DEFAULT_P 256
1852 #define SGEMM_DEFAULT_Q 320
1853 #define DGEMM_DEFAULT_Q 384
1854 #define CGEMM_DEFAULT_Q 192
1855 #define ZGEMM_DEFAULT_Q 128
1857 #define SGEMM_DEFAULT_R sgemm_r
1858 #define DGEMM_DEFAULT_R 8640
1859 #define CGEMM_DEFAULT_R cgemm_r
1860 #define ZGEMM_DEFAULT_R zgemm_r
1862 #define QGEMM_DEFAULT_Q 128
1863 #define QGEMM_DEFAULT_P 504
1864 #define QGEMM_DEFAULT_R qgemm_r
1865 #define XGEMM_DEFAULT_P 252
1866 #define XGEMM_DEFAULT_R xgemm_r
1867 #define XGEMM_DEFAULT_Q 128
1869 #define CGEMM3M_DEFAULT_UNROLL_N 4
1870 #define CGEMM3M_DEFAULT_UNROLL_M 8
1871 #define ZGEMM3M_DEFAULT_UNROLL_N 4
1872 #define ZGEMM3M_DEFAULT_UNROLL_M 4
1874 #define CGEMM3M_DEFAULT_P 320
1875 #define ZGEMM3M_DEFAULT_P 256
1876 #define XGEMM3M_DEFAULT_P 112
1877 #define CGEMM3M_DEFAULT_Q 320
1878 #define ZGEMM3M_DEFAULT_Q 256
1879 #define XGEMM3M_DEFAULT_Q 224
1880 #define CGEMM3M_DEFAULT_R 12288
1881 #define ZGEMM3M_DEFAULT_R 12288
1882 #define XGEMM3M_DEFAULT_R 12288
1892 #define GEMM_DEFAULT_OFFSET_A 0
1893 #define GEMM_DEFAULT_OFFSET_B 0
1894 #define GEMM_DEFAULT_ALIGN 0x03fffUL
1898 #if defined(XDOUBLE) || defined(DOUBLE)
1899 #define SWITCH_RATIO 8
1900 #define GEMM_PREFERED_SIZE 8
1902 #define SWITCH_RATIO 16
1903 #define GEMM_PREFERED_SIZE 16
1905 #define USE_SGEMM_KERNEL_DIRECT 1
1907 #undef SBGEMM_DEFAULT_UNROLL_N
1908 #undef SBGEMM_DEFAULT_UNROLL_M
1909 #undef SBGEMM_DEFAULT_P
1910 #undef SBGEMM_DEFAULT_R
1911 #undef SBGEMM_DEFAULT_Q
1912 #define SBGEMM_DEFAULT_UNROLL_N 4
1913 #define SBGEMM_DEFAULT_UNROLL_M 16
1914 #define SBGEMM_DEFAULT_P 384
1915 #define SBGEMM_DEFAULT_Q 768
1916 #define SBGEMM_DEFAULT_R sbgemm_r
1920 #define SGEMM_DEFAULT_UNROLL_M 4
1921 #define DGEMM_DEFAULT_UNROLL_M 2
1922 #define QGEMM_DEFAULT_UNROLL_M 2
1923 #define CGEMM_DEFAULT_UNROLL_M 2
1924 #define ZGEMM_DEFAULT_UNROLL_M 1
1925 #define XGEMM_DEFAULT_UNROLL_M 1
1927 #define SGEMM_DEFAULT_UNROLL_N 4
1928 #define DGEMM_DEFAULT_UNROLL_N 4
1929 #define QGEMM_DEFAULT_UNROLL_N 2
1930 #define CGEMM_DEFAULT_UNROLL_N 2
1931 #define ZGEMM_DEFAULT_UNROLL_N 2
1932 #define XGEMM_DEFAULT_UNROLL_N 1
1936 #define SGEMM_DEFAULT_UNROLL_M 16
1937 #define DGEMM_DEFAULT_UNROLL_M 16
1938 #define QGEMM_DEFAULT_UNROLL_M 2
1939 #define CGEMM_DEFAULT_UNROLL_M 8
1940 #define ZGEMM_DEFAULT_UNROLL_M 4
1941 #define XGEMM_DEFAULT_UNROLL_M 1
1943 #define SGEMM_DEFAULT_UNROLL_N 4
1944 #define DGEMM_DEFAULT_UNROLL_N 2
1945 #define QGEMM_DEFAULT_UNROLL_N 2
1946 #define CGEMM_DEFAULT_UNROLL_N 2
1947 #define ZGEMM_DEFAULT_UNROLL_N 2
1948 #define XGEMM_DEFAULT_UNROLL_N 1
1950 #define SGEMM_DEFAULT_UNROLL_MN 32
1951 #define DGEMM_DEFAULT_UNROLL_MN 32
1956 #define SGEMM_DEFAULT_P 512
1957 #define SGEMM_DEFAULT_R sgemm_r
1958 #define DGEMM_DEFAULT_P 512
1959 #define DGEMM_DEFAULT_R dgemm_r
1960 #define QGEMM_DEFAULT_P 504
1961 #define QGEMM_DEFAULT_R qgemm_r
1962 #define CGEMM_DEFAULT_P 128
1963 #define CGEMM_DEFAULT_R 1024
1964 #define ZGEMM_DEFAULT_P 512
1965 #define ZGEMM_DEFAULT_R zgemm_r
1966 #define XGEMM_DEFAULT_P 252
1967 #define XGEMM_DEFAULT_R xgemm_r
1968 #define SGEMM_DEFAULT_Q 256
1969 #define DGEMM_DEFAULT_Q 256
1970 #define QGEMM_DEFAULT_Q 128
1971 #define CGEMM_DEFAULT_Q 256
1972 #define ZGEMM_DEFAULT_Q 192
1973 #define XGEMM_DEFAULT_Q 128
1977 #define SGEMM_DEFAULT_P 640
1978 #define DGEMM_DEFAULT_P 192
1979 #define CGEMM_DEFAULT_P 384
1980 #define ZGEMM_DEFAULT_P 256
1982 #define SGEMM_DEFAULT_Q 320
1983 #define DGEMM_DEFAULT_Q 384
1984 #define CGEMM_DEFAULT_Q 192
1985 #define ZGEMM_DEFAULT_Q 128
1987 #define SGEMM_DEFAULT_R sgemm_r
1988 #define DGEMM_DEFAULT_R 8640
1989 #define CGEMM_DEFAULT_R cgemm_r
1990 #define ZGEMM_DEFAULT_R zgemm_r
1992 #define QGEMM_DEFAULT_Q 128
1993 #define QGEMM_DEFAULT_P 504
1994 #define QGEMM_DEFAULT_R qgemm_r
1995 #define XGEMM_DEFAULT_P 252
1996 #define XGEMM_DEFAULT_R xgemm_r
1997 #define XGEMM_DEFAULT_Q 128
1999 #define CGEMM3M_DEFAULT_UNROLL_N 4
2000 #define CGEMM3M_DEFAULT_UNROLL_M 8
2001 #define ZGEMM3M_DEFAULT_UNROLL_N 4
2002 #define ZGEMM3M_DEFAULT_UNROLL_M 4
2004 #define CGEMM3M_DEFAULT_P 320
2005 #define ZGEMM3M_DEFAULT_P 256
2006 #define XGEMM3M_DEFAULT_P 112
2007 #define CGEMM3M_DEFAULT_Q 320
2008 #define ZGEMM3M_DEFAULT_Q 256
2009 #define XGEMM3M_DEFAULT_Q 224
2010 #define CGEMM3M_DEFAULT_R 12288
2011 #define ZGEMM3M_DEFAULT_R 12288
2012 #define XGEMM3M_DEFAULT_R 12288
2023 #define GEMM_DEFAULT_OFFSET_A 64
2024 #define GEMM_DEFAULT_OFFSET_B 0
2025 #define GEMM_DEFAULT_ALIGN (BLASLONG)0x0ffffUL
2030 #define SGEMM_DEFAULT_UNROLL_M 4
2031 #define DGEMM_DEFAULT_UNROLL_M 2
2032 #define QGEMM_DEFAULT_UNROLL_M 2
2033 #define CGEMM_DEFAULT_UNROLL_M 2
2034 #define ZGEMM_DEFAULT_UNROLL_M 1
2035 #define XGEMM_DEFAULT_UNROLL_M 1
2037 #define SGEMM_DEFAULT_UNROLL_M 8
2038 #define DGEMM_DEFAULT_UNROLL_M 4
2039 #define QGEMM_DEFAULT_UNROLL_M 2
2040 #define CGEMM_DEFAULT_UNROLL_M 4
2041 #define ZGEMM_DEFAULT_UNROLL_M 2
2042 #define XGEMM_DEFAULT_UNROLL_M 1
2045 #define SGEMM_DEFAULT_UNROLL_N 4
2046 #define DGEMM_DEFAULT_UNROLL_N 2
2047 #define QGEMM_DEFAULT_UNROLL_N 2
2048 #define CGEMM_DEFAULT_UNROLL_N 2
2049 #define ZGEMM_DEFAULT_UNROLL_N 1
2050 #define XGEMM_DEFAULT_UNROLL_N 1
2052 #define SGEMM_DEFAULT_P sgemm_p
2053 #define SGEMM_DEFAULT_R sgemm_r
2055 #define DGEMM_DEFAULT_P dgemm_p
2056 #define DGEMM_DEFAULT_R dgemm_r
2058 #define QGEMM_DEFAULT_P qgemm_p
2059 #define QGEMM_DEFAULT_R qgemm_r
2061 #define CGEMM_DEFAULT_P cgemm_p
2062 #define CGEMM_DEFAULT_R cgemm_r
2064 #define ZGEMM_DEFAULT_P zgemm_p
2065 #define ZGEMM_DEFAULT_R zgemm_r
2067 #define XGEMM_DEFAULT_P xgemm_p
2068 #define XGEMM_DEFAULT_R xgemm_r
2070 #define SGEMM_DEFAULT_Q 256
2071 #define DGEMM_DEFAULT_Q 256
2072 #define QGEMM_DEFAULT_Q 256
2073 #define CGEMM_DEFAULT_Q 256
2074 #define ZGEMM_DEFAULT_Q 256
2075 #define XGEMM_DEFAULT_Q 256
2085 #define GEMM_DEFAULT_OFFSET_A 0
2086 #define GEMM_DEFAULT_OFFSET_B 128
2087 #define GEMM_DEFAULT_ALIGN (BLASLONG)0x03fffUL
2089 #define SGEMM_DEFAULT_UNROLL_M 8
2090 #define SGEMM_DEFAULT_UNROLL_N 8
2091 #define DGEMM_DEFAULT_UNROLL_M 8
2092 #define DGEMM_DEFAULT_UNROLL_N 8
2093 #define QGEMM_DEFAULT_UNROLL_M 8
2094 #define QGEMM_DEFAULT_UNROLL_N 8
2095 #define CGEMM_DEFAULT_UNROLL_M 4
2096 #define CGEMM_DEFAULT_UNROLL_N 4
2097 #define ZGEMM_DEFAULT_UNROLL_M 4
2098 #define ZGEMM_DEFAULT_UNROLL_N 4
2099 #define XGEMM_DEFAULT_UNROLL_M 4
2100 #define XGEMM_DEFAULT_UNROLL_N 4
2102 #define SGEMM_DEFAULT_P sgemm_p
2103 #define DGEMM_DEFAULT_P dgemm_p
2104 #define QGEMM_DEFAULT_P qgemm_p
2105 #define CGEMM_DEFAULT_P cgemm_p
2106 #define ZGEMM_DEFAULT_P zgemm_p
2107 #define XGEMM_DEFAULT_P xgemm_p
2109 #define SGEMM_DEFAULT_Q 1024
2110 #define DGEMM_DEFAULT_Q 1024
2111 #define QGEMM_DEFAULT_Q 1024
2112 #define CGEMM_DEFAULT_Q 1024
2113 #define ZGEMM_DEFAULT_Q 1024
2114 #define XGEMM_DEFAULT_Q 1024
2116 #define SGEMM_DEFAULT_R sgemm_r
2117 #define DGEMM_DEFAULT_R dgemm_r
2118 #define QGEMM_DEFAULT_R qgemm_r
2119 #define CGEMM_DEFAULT_R cgemm_r
2120 #define ZGEMM_DEFAULT_R zgemm_r
2121 #define XGEMM_DEFAULT_R xgemm_r
2125 #define GETRF_FACTOR 0.65
2129 #if defined(EV4) || defined(EV5) || defined(EV6)
2139 #define GEMM_DEFAULT_OFFSET_A 512
2140 #define GEMM_DEFAULT_OFFSET_B 512
2141 #define GEMM_DEFAULT_ALIGN (BLASLONG)0x0ffffUL
2143 #define SGEMM_DEFAULT_UNROLL_M 4
2144 #define SGEMM_DEFAULT_UNROLL_N 4
2145 #define DGEMM_DEFAULT_UNROLL_M 4
2146 #define DGEMM_DEFAULT_UNROLL_N 4
2147 #define CGEMM_DEFAULT_UNROLL_M 2
2148 #define CGEMM_DEFAULT_UNROLL_N 2
2149 #define ZGEMM_DEFAULT_UNROLL_M 2
2150 #define ZGEMM_DEFAULT_UNROLL_N 2
2155 #define SGEMM_DEFAULT_P 32
2156 #define SGEMM_DEFAULT_Q 112
2157 #define SGEMM_DEFAULT_R 256
2159 #define DGEMM_DEFAULT_P 32
2160 #define DGEMM_DEFAULT_Q 56
2161 #define DGEMM_DEFAULT_R 256
2163 #define CGEMM_DEFAULT_P 32
2164 #define CGEMM_DEFAULT_Q 64
2165 #define CGEMM_DEFAULT_R 240
2167 #define ZGEMM_DEFAULT_P 32
2168 #define ZGEMM_DEFAULT_Q 32
2169 #define ZGEMM_DEFAULT_R 240
2173 #define SGEMM_DEFAULT_P 64
2174 #define SGEMM_DEFAULT_Q 256
2176 #define DGEMM_DEFAULT_P 64
2177 #define DGEMM_DEFAULT_Q 128
2179 #define CGEMM_DEFAULT_P 64
2180 #define CGEMM_DEFAULT_Q 128
2182 #define ZGEMM_DEFAULT_P 64
2183 #define ZGEMM_DEFAULT_Q 64
2187 #define SGEMM_DEFAULT_P 256
2188 #define SGEMM_DEFAULT_Q 512
2190 #define DGEMM_DEFAULT_P 256
2191 #define DGEMM_DEFAULT_Q 256
2193 #define CGEMM_DEFAULT_P 256
2194 #define CGEMM_DEFAULT_Q 256
2196 #define ZGEMM_DEFAULT_P 128
2197 #define ZGEMM_DEFAULT_Q 256
2207 #define GEMM_DEFAULT_OFFSET_A 0
2208 #define GEMM_DEFAULT_OFFSET_B 8192
2209 #define GEMM_DEFAULT_ALIGN (BLASLONG)0x0ffffUL
2211 #define SGEMM_DEFAULT_UNROLL_M 16
2212 #define SGEMM_DEFAULT_UNROLL_N 4
2213 #define DGEMM_DEFAULT_UNROLL_M 4
2214 #define DGEMM_DEFAULT_UNROLL_N 4
2215 #define CGEMM_DEFAULT_UNROLL_M 8
2216 #define CGEMM_DEFAULT_UNROLL_N 2
2217 #define ZGEMM_DEFAULT_UNROLL_M 2
2218 #define ZGEMM_DEFAULT_UNROLL_N 2
2220 #define SGEMM_DEFAULT_P 128
2221 #define DGEMM_DEFAULT_P 128
2222 #define CGEMM_DEFAULT_P 128
2223 #define ZGEMM_DEFAULT_P 128
2225 #define SGEMM_DEFAULT_Q 512
2226 #define DGEMM_DEFAULT_Q 256
2227 #define CGEMM_DEFAULT_Q 256
2228 #define ZGEMM_DEFAULT_Q 128
2234 #define GEMM_DEFAULT_OFFSET_A 0
2235 #define GEMM_DEFAULT_OFFSET_B 1024
2236 #define GEMM_DEFAULT_ALIGN 0x0ffffUL
2238 #define SGEMM_DEFAULT_UNROLL_M 16
2239 #define SGEMM_DEFAULT_UNROLL_N 4
2240 #define DGEMM_DEFAULT_UNROLL_M 4
2241 #define DGEMM_DEFAULT_UNROLL_N 4
2242 #define CGEMM_DEFAULT_UNROLL_M 2
2243 #define CGEMM_DEFAULT_UNROLL_N 2
2244 #define ZGEMM_DEFAULT_UNROLL_M 2
2245 #define ZGEMM_DEFAULT_UNROLL_N 2
2247 #define SGEMM_DEFAULT_P 256
2248 #define DGEMM_DEFAULT_P 128
2249 #define CGEMM_DEFAULT_P 128
2250 #define ZGEMM_DEFAULT_P 64
2252 #define SGEMM_DEFAULT_Q 256
2253 #define DGEMM_DEFAULT_Q 256
2254 #define CGEMM_DEFAULT_Q 256
2255 #define ZGEMM_DEFAULT_Q 256
2265 #define GEMM_DEFAULT_OFFSET_A 2688
2266 #define GEMM_DEFAULT_OFFSET_B 3072
2267 #define GEMM_DEFAULT_ALIGN 0x03fffUL
2269 #if defined(__BYTE_ORDER__)&&(__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
2270 #define SGEMM_DEFAULT_UNROLL_M 4
2272 #define SGEMM_DEFAULT_UNROLL_M 16
2274 #define SGEMM_DEFAULT_UNROLL_N 4
2275 #define DGEMM_DEFAULT_UNROLL_M 4
2276 #define DGEMM_DEFAULT_UNROLL_N 4
2277 #if defined(__BYTE_ORDER__)&&(__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
2278 #define CGEMM_DEFAULT_UNROLL_M 2
2280 #define CGEMM_DEFAULT_UNROLL_M 8
2282 #define CGEMM_DEFAULT_UNROLL_N 2
2283 #define ZGEMM_DEFAULT_UNROLL_M 2
2284 #define ZGEMM_DEFAULT_UNROLL_N 2
2286 #if defined(OS_LINUX) || defined(OS_DARWIN) || defined(OS_FREEBSD)
2287 #if L2_SIZE == 1024976
2288 #define SGEMM_DEFAULT_P 320
2289 #define DGEMM_DEFAULT_P 256
2290 #define CGEMM_DEFAULT_P 256
2291 #define ZGEMM_DEFAULT_P 256
2293 #define SGEMM_DEFAULT_P 176
2294 #define DGEMM_DEFAULT_P 176
2295 #define CGEMM_DEFAULT_P 176
2296 #define ZGEMM_DEFAULT_P 176
2300 #define SGEMM_DEFAULT_Q 512
2301 #define DGEMM_DEFAULT_Q 256
2302 #define CGEMM_DEFAULT_Q 256
2303 #define ZGEMM_DEFAULT_Q 128
2314 #define GEMM_DEFAULT_OFFSET_A (32 * 0)
2315 #define GEMM_DEFAULT_OFFSET_B (32 * 0)
2316 #define GEMM_DEFAULT_ALIGN 0x0ffffUL
2318 #define SGEMM_DEFAULT_UNROLL_M 4
2319 #define SGEMM_DEFAULT_UNROLL_N 4
2320 #define DGEMM_DEFAULT_UNROLL_M 4
2321 #define DGEMM_DEFAULT_UNROLL_N 4
2322 #define CGEMM_DEFAULT_UNROLL_M 2
2323 #define CGEMM_DEFAULT_UNROLL_N 2
2324 #define ZGEMM_DEFAULT_UNROLL_M 2
2325 #define ZGEMM_DEFAULT_UNROLL_N 2
2327 #define SGEMM_DEFAULT_P 512
2328 #define DGEMM_DEFAULT_P 512
2329 #define CGEMM_DEFAULT_P 512
2330 #define ZGEMM_DEFAULT_P 512
2332 #define SGEMM_DEFAULT_Q 1024
2333 #define DGEMM_DEFAULT_Q 512
2334 #define CGEMM_DEFAULT_Q 512
2335 #define ZGEMM_DEFAULT_Q 256
2337 #define SGEMM_DEFAULT_R SGEMM_DEFAULT_P
2338 #define DGEMM_DEFAULT_R DGEMM_DEFAULT_P
2339 #define CGEMM_DEFAULT_R CGEMM_DEFAULT_P
2340 #define ZGEMM_DEFAULT_R ZGEMM_DEFAULT_P
2350 #define GEMM_DEFAULT_OFFSET_A (32 * 0)
2351 #define GEMM_DEFAULT_OFFSET_B (32 * 0)
2352 #define GEMM_DEFAULT_ALIGN 0x0ffffUL
2354 #define SGEMM_DEFAULT_UNROLL_M 8
2355 #define SGEMM_DEFAULT_UNROLL_N 4
2356 #define DGEMM_DEFAULT_UNROLL_M 8
2357 #define DGEMM_DEFAULT_UNROLL_N 4
2358 #define CGEMM_DEFAULT_UNROLL_M 4
2359 #define CGEMM_DEFAULT_UNROLL_N 2
2360 #define ZGEMM_DEFAULT_UNROLL_M 4
2361 #define ZGEMM_DEFAULT_UNROLL_N 2
2363 #define SGEMM_DEFAULT_P 128
2364 #define DGEMM_DEFAULT_P 128
2365 #define CGEMM_DEFAULT_P 128
2366 #define ZGEMM_DEFAULT_P 128
2368 #define SGEMM_DEFAULT_Q 4096
2369 #define DGEMM_DEFAULT_Q 3072
2370 #define CGEMM_DEFAULT_Q 2048
2371 #define ZGEMM_DEFAULT_Q 1024
2373 #define SGEMM_DEFAULT_Q 512
2374 #define DGEMM_DEFAULT_Q 256
2375 #define CGEMM_DEFAULT_Q 256
2376 #define ZGEMM_DEFAULT_Q 128
2384 #if defined(POWER3) || defined(POWER4) || defined(POWER5)
2385 #define GEMM_DEFAULT_OFFSET_A 0
2386 #define GEMM_DEFAULT_OFFSET_B 2048
2387 #define GEMM_DEFAULT_ALIGN 0x0ffffUL
2389 #define SGEMM_DEFAULT_UNROLL_M 4
2390 #define SGEMM_DEFAULT_UNROLL_N 4
2391 #define DGEMM_DEFAULT_UNROLL_M 4
2392 #define DGEMM_DEFAULT_UNROLL_N 4
2393 #define CGEMM_DEFAULT_UNROLL_M 2
2394 #define CGEMM_DEFAULT_UNROLL_N 2
2395 #define ZGEMM_DEFAULT_UNROLL_M 2
2396 #define ZGEMM_DEFAULT_UNROLL_N 2
2403 #define SGEMM_DEFAULT_P 256
2404 #define SGEMM_DEFAULT_Q 432
2405 #define SGEMM_DEFAULT_R 1012
2407 #define DGEMM_DEFAULT_P 256
2408 #define DGEMM_DEFAULT_Q 216
2409 #define DGEMM_DEFAULT_R 1012
2411 #define CGEMM_DEFAULT_P 256
2412 #define CGEMM_DEFAULT_Q 104
2413 #define CGEMM_DEFAULT_R 1012
2415 #define ZGEMM_DEFAULT_P 256
2416 #define ZGEMM_DEFAULT_Q 104
2417 #define ZGEMM_DEFAULT_R 1012
2421 #ifdef ALLOC_HUGETLB
2422 #define SGEMM_DEFAULT_P 184
2423 #define DGEMM_DEFAULT_P 184
2424 #define CGEMM_DEFAULT_P 184
2425 #define ZGEMM_DEFAULT_P 184
2427 #define SGEMM_DEFAULT_P 144
2428 #define DGEMM_DEFAULT_P 144
2429 #define CGEMM_DEFAULT_P 144
2430 #define ZGEMM_DEFAULT_P 144
2433 #define SGEMM_DEFAULT_Q 256
2434 #define CGEMM_DEFAULT_Q 256
2435 #define DGEMM_DEFAULT_Q 256
2436 #define ZGEMM_DEFAULT_Q 256
2440 #ifdef ALLOC_HUGETLB
2441 #define SGEMM_DEFAULT_P 512
2442 #define DGEMM_DEFAULT_P 256
2443 #define CGEMM_DEFAULT_P 256
2444 #define ZGEMM_DEFAULT_P 128
2446 #define SGEMM_DEFAULT_P 320
2447 #define DGEMM_DEFAULT_P 160
2448 #define CGEMM_DEFAULT_P 160
2449 #define ZGEMM_DEFAULT_P 80
2452 #define SGEMM_DEFAULT_Q 256
2453 #define CGEMM_DEFAULT_Q 256
2454 #define DGEMM_DEFAULT_Q 256
2455 #define ZGEMM_DEFAULT_Q 256
2467 #define GEMM_DEFAULT_OFFSET_A 384
2468 #define GEMM_DEFAULT_OFFSET_B 1024
2469 #define GEMM_DEFAULT_ALIGN 0x03fffUL
2471 #define SGEMM_DEFAULT_UNROLL_M 4
2472 #define SGEMM_DEFAULT_UNROLL_N 4
2473 #define DGEMM_DEFAULT_UNROLL_M 4
2474 #define DGEMM_DEFAULT_UNROLL_N 4
2475 #define CGEMM_DEFAULT_UNROLL_M 2
2476 #define CGEMM_DEFAULT_UNROLL_N 4
2477 #define ZGEMM_DEFAULT_UNROLL_M 2
2478 #define ZGEMM_DEFAULT_UNROLL_N 4
2480 #define SGEMM_DEFAULT_P 992
2481 #define DGEMM_DEFAULT_P 480
2482 #define CGEMM_DEFAULT_P 488
2483 #define ZGEMM_DEFAULT_P 248
2485 #define SGEMM_DEFAULT_Q 504
2486 #define DGEMM_DEFAULT_Q 504
2487 #define CGEMM_DEFAULT_Q 400
2488 #define ZGEMM_DEFAULT_Q 400
2499 #define GEMM_DEFAULT_OFFSET_A 0
2500 #define GEMM_DEFAULT_OFFSET_B 65536
2502 #define GEMM_DEFAULT_ALIGN 0x0ffffUL
2503 #if defined(__32BIT__)
2504 #warning using BINARY32==POWER6
2505 #define SGEMM_DEFAULT_UNROLL_M 4
2506 #define SGEMM_DEFAULT_UNROLL_N 4
2507 #define DGEMM_DEFAULT_UNROLL_M 4
2508 #define DGEMM_DEFAULT_UNROLL_N 4
2509 #define CGEMM_DEFAULT_UNROLL_M 2
2510 #define CGEMM_DEFAULT_UNROLL_N 4
2511 #define ZGEMM_DEFAULT_UNROLL_M 2
2512 #define ZGEMM_DEFAULT_UNROLL_N 4
2514 #define SGEMM_DEFAULT_UNROLL_M 16
2515 #define SGEMM_DEFAULT_UNROLL_N 8
2516 #define DGEMM_DEFAULT_UNROLL_M 16
2517 #define DGEMM_DEFAULT_UNROLL_N 4
2518 #define CGEMM_DEFAULT_UNROLL_M 8
2519 #define CGEMM_DEFAULT_UNROLL_N 4
2520 #define ZGEMM_DEFAULT_UNROLL_M 8
2521 #define ZGEMM_DEFAULT_UNROLL_N 2
2523 #define SGEMM_DEFAULT_P 1280UL
2524 #define DGEMM_DEFAULT_P 640UL
2525 #define CGEMM_DEFAULT_P 640UL
2526 #define ZGEMM_DEFAULT_P 320UL
2528 #define SGEMM_DEFAULT_Q 640UL
2529 #define DGEMM_DEFAULT_Q 720UL
2530 #define CGEMM_DEFAULT_Q 640UL
2531 #define ZGEMM_DEFAULT_Q 640UL
2534 #define SGEMM_DEFAULT_R SGEMM_DEFAULT_P
2535 #define DGEMM_DEFAULT_R DGEMM_DEFAULT_P
2536 #define CGEMM_DEFAULT_R CGEMM_DEFAULT_P
2537 #define ZGEMM_DEFAULT_R ZGEMM_DEFAULT_P
2539 #define SGEMM_DEFAULT_R 4096
2540 #define DGEMM_DEFAULT_R 4096
2541 #define CGEMM_DEFAULT_R 4096
2542 #define ZGEMM_DEFAULT_R 4096
2553 #define GEMM_DEFAULT_OFFSET_A 0
2554 #define GEMM_DEFAULT_OFFSET_B 65536
2555 #define GEMM_DEFAULT_ALIGN 0x0ffffUL
2557 #define SWITCH_RATIO 16
2558 #define GEMM_PREFERED_SIZE 16
2560 #define SGEMM_DEFAULT_UNROLL_M 16
2561 #define SGEMM_DEFAULT_UNROLL_N 8
2562 #define DGEMM_DEFAULT_UNROLL_M 16
2563 #define DGEMM_DEFAULT_UNROLL_N 4
2564 #define CGEMM_DEFAULT_UNROLL_M 8
2565 #define CGEMM_DEFAULT_UNROLL_N 4
2566 #define ZGEMM_DEFAULT_UNROLL_M 8
2567 #define ZGEMM_DEFAULT_UNROLL_N 2
2569 #define SGEMM_DEFAULT_P 832
2570 #define DGEMM_DEFAULT_P 128
2571 #define CGEMM_DEFAULT_P 512
2572 #define ZGEMM_DEFAULT_P 256
2574 #define SGEMM_DEFAULT_Q 1026
2575 #define DGEMM_DEFAULT_Q 384
2576 #define CGEMM_DEFAULT_Q 1026
2577 #define ZGEMM_DEFAULT_Q 1026
2579 #define SGEMM_DEFAULT_R 4096
2580 #define DGEMM_DEFAULT_R 4096
2581 #define CGEMM_DEFAULT_R 4096
2582 #define ZGEMM_DEFAULT_R 4096
2588 #if defined(POWER10)
2592 #define GEMM_DEFAULT_OFFSET_A 0
2593 #define GEMM_DEFAULT_OFFSET_B 65536
2594 #define GEMM_DEFAULT_ALIGN 0x0ffffUL
2596 #define SWITCH_RATIO 16
2597 #define GEMM_PREFERED_SIZE 16
2599 #define SGEMM_DEFAULT_UNROLL_M 16
2600 #define SGEMM_DEFAULT_UNROLL_N 8
2601 #if defined(HAVE_GAS) && (HAVE_GAS == 1)
2602 #define DGEMM_DEFAULT_UNROLL_M 16
2603 #define DGEMM_DEFAULT_UNROLL_N 4
2605 #define DGEMM_DEFAULT_UNROLL_M 8
2606 #define DGEMM_DEFAULT_UNROLL_N 8
2608 #define CGEMM_DEFAULT_UNROLL_M 8
2609 #define CGEMM_DEFAULT_UNROLL_N 4
2610 #define ZGEMM_DEFAULT_UNROLL_M 8
2611 #define ZGEMM_DEFAULT_UNROLL_N 2
2613 #define SGEMM_DEFAULT_P 512
2614 #define DGEMM_DEFAULT_P 384
2615 #define CGEMM_DEFAULT_P 512
2616 #define ZGEMM_DEFAULT_P 256
2618 #define SGEMM_DEFAULT_Q 512
2619 #define DGEMM_DEFAULT_Q 512
2620 #define CGEMM_DEFAULT_Q 384
2621 #define ZGEMM_DEFAULT_Q 384
2623 #define SGEMM_DEFAULT_R 4096
2624 #define DGEMM_DEFAULT_R 4096
2625 #define CGEMM_DEFAULT_R 4096
2626 #define ZGEMM_DEFAULT_R 4096
2630 #undef SBGEMM_DEFAULT_UNROLL_N
2631 #undef SBGEMM_DEFAULT_UNROLL_M
2632 #undef SBGEMM_DEFAULT_P
2633 #undef SBGEMM_DEFAULT_R
2634 #undef SBGEMM_DEFAULT_Q
2635 #define SBGEMM_DEFAULT_UNROLL_M 16
2636 #define SBGEMM_DEFAULT_UNROLL_N 8
2637 #define SBGEMM_DEFAULT_P 832
2638 #define SBGEMM_DEFAULT_Q 1026
2639 #define SBGEMM_DEFAULT_R 4096
2642 #if defined(SPARC) && defined(V7)
2647 #define GEMM_DEFAULT_OFFSET_A 0
2648 #define GEMM_DEFAULT_OFFSET_B 2048
2649 #define GEMM_DEFAULT_ALIGN 0x03fffUL
2651 #define SGEMM_DEFAULT_UNROLL_M 2
2652 #define SGEMM_DEFAULT_UNROLL_N 8
2653 #define DGEMM_DEFAULT_UNROLL_M 2
2654 #define DGEMM_DEFAULT_UNROLL_N 8
2655 #define CGEMM_DEFAULT_UNROLL_M 1
2656 #define CGEMM_DEFAULT_UNROLL_N 4
2657 #define ZGEMM_DEFAULT_UNROLL_M 1
2658 #define ZGEMM_DEFAULT_UNROLL_N 4
2660 #define SGEMM_DEFAULT_P 256
2661 #define DGEMM_DEFAULT_P 256
2662 #define CGEMM_DEFAULT_P 256
2663 #define ZGEMM_DEFAULT_P 256
2665 #define SGEMM_DEFAULT_Q 512
2666 #define DGEMM_DEFAULT_Q 256
2667 #define CGEMM_DEFAULT_Q 256
2668 #define ZGEMM_DEFAULT_Q 128
2671 #define GEMM_THREAD gemm_thread_mn
2674 #if (defined(SPARC) && defined(V9)) || defined(__sparc_v9__)
2679 #define GEMM_DEFAULT_OFFSET_A 0
2680 #define GEMM_DEFAULT_OFFSET_B 2048
2681 #define GEMM_DEFAULT_ALIGN 0x03fffUL
2683 #define SGEMM_DEFAULT_UNROLL_M 4
2684 #define SGEMM_DEFAULT_UNROLL_N 4
2685 #define DGEMM_DEFAULT_UNROLL_M 4
2686 #define DGEMM_DEFAULT_UNROLL_N 4
2687 #define CGEMM_DEFAULT_UNROLL_M 2
2688 #define CGEMM_DEFAULT_UNROLL_N 2
2689 #define ZGEMM_DEFAULT_UNROLL_M 2
2690 #define ZGEMM_DEFAULT_UNROLL_N 2
2692 #define SGEMM_DEFAULT_P 512
2693 #define DGEMM_DEFAULT_P 512
2694 #define CGEMM_DEFAULT_P 512
2695 #define ZGEMM_DEFAULT_P 512
2697 #define SGEMM_DEFAULT_Q 1024
2698 #define DGEMM_DEFAULT_Q 512
2699 #define CGEMM_DEFAULT_Q 512
2700 #define ZGEMM_DEFAULT_Q 256
2710 #define GEMM_DEFAULT_OFFSET_A 0
2711 #define GEMM_DEFAULT_OFFSET_B 0
2712 #define GEMM_DEFAULT_ALIGN (BLASLONG)0x03fffUL
2714 #define SGEMM_DEFAULT_UNROLL_M 2
2715 #define SGEMM_DEFAULT_UNROLL_N 8
2716 #define DGEMM_DEFAULT_UNROLL_M 2
2717 #define DGEMM_DEFAULT_UNROLL_N 8
2718 #define CGEMM_DEFAULT_UNROLL_M 1
2719 #define CGEMM_DEFAULT_UNROLL_N 4
2720 #define ZGEMM_DEFAULT_UNROLL_M 1
2721 #define ZGEMM_DEFAULT_UNROLL_N 4
2723 #define SGEMM_DEFAULT_P 108
2724 #define DGEMM_DEFAULT_P 112
2725 #define CGEMM_DEFAULT_P 108
2726 #define ZGEMM_DEFAULT_P 112
2728 #define SGEMM_DEFAULT_Q 288
2729 #define DGEMM_DEFAULT_Q 144
2730 #define CGEMM_DEFAULT_Q 144
2731 #define ZGEMM_DEFAULT_Q 72
2733 #define SGEMM_DEFAULT_R 2000
2734 #define DGEMM_DEFAULT_R 2000
2735 #define CGEMM_DEFAULT_R 2000
2736 #define ZGEMM_DEFAULT_R 2000
2741 #if defined(LOONGSON3R4)
2745 #define GEMM_DEFAULT_OFFSET_A 0
2746 #define GEMM_DEFAULT_OFFSET_B 0
2747 #define GEMM_DEFAULT_ALIGN (BLASLONG)0x03fffUL
2750 #define SGEMM_DEFAULT_UNROLL_M 8
2751 #define SGEMM_DEFAULT_UNROLL_N 8
2753 #define DGEMM_DEFAULT_UNROLL_M 8
2754 #define DGEMM_DEFAULT_UNROLL_N 4
2756 #define CGEMM_DEFAULT_UNROLL_M 8
2757 #define CGEMM_DEFAULT_UNROLL_N 4
2759 #define ZGEMM_DEFAULT_UNROLL_M 4
2760 #define ZGEMM_DEFAULT_UNROLL_N 4
2762 #define SGEMM_DEFAULT_UNROLL_M 8
2763 #define SGEMM_DEFAULT_UNROLL_N 4
2765 #define DGEMM_DEFAULT_UNROLL_M 4
2766 #define DGEMM_DEFAULT_UNROLL_N 4
2768 #define CGEMM_DEFAULT_UNROLL_M 4
2769 #define CGEMM_DEFAULT_UNROLL_N 2
2771 #define ZGEMM_DEFAULT_UNROLL_M 2
2772 #define ZGEMM_DEFAULT_UNROLL_N 2
2775 #define SGEMM_DEFAULT_P 64
2776 #define DGEMM_DEFAULT_P 44
2777 #define CGEMM_DEFAULT_P 64
2778 #define ZGEMM_DEFAULT_P 32
2780 #define SGEMM_DEFAULT_Q 192
2781 #define DGEMM_DEFAULT_Q 92
2782 #define CGEMM_DEFAULT_Q 128
2783 #define ZGEMM_DEFAULT_Q 80
2785 #define SGEMM_DEFAULT_R 640
2786 #define DGEMM_DEFAULT_R dgemm_r
2787 #define CGEMM_DEFAULT_R 640
2788 #define ZGEMM_DEFAULT_R 640
2790 #define GEMM_OFFSET_A1 0x10000
2791 #define GEMM_OFFSET_B1 0x100000
2796 #if defined(LOONGSON3R3)
2797 ////Copy from SICORTEX
2801 #define GEMM_DEFAULT_OFFSET_A 0
2802 #define GEMM_DEFAULT_OFFSET_B 0
2803 #define GEMM_DEFAULT_ALIGN (BLASLONG)0x03fffUL
2805 #define SGEMM_DEFAULT_UNROLL_M 8
2806 #define SGEMM_DEFAULT_UNROLL_N 4
2808 #define DGEMM_DEFAULT_UNROLL_M 4
2809 #define DGEMM_DEFAULT_UNROLL_N 4
2811 #define CGEMM_DEFAULT_UNROLL_M 4
2812 #define CGEMM_DEFAULT_UNROLL_N 2
2814 #define ZGEMM_DEFAULT_UNROLL_M 2
2815 #define ZGEMM_DEFAULT_UNROLL_N 2
2817 #define SGEMM_DEFAULT_P 64
2818 #define DGEMM_DEFAULT_P 44
2819 #define CGEMM_DEFAULT_P 64
2820 #define ZGEMM_DEFAULT_P 32
2822 #define SGEMM_DEFAULT_Q 192
2823 #define DGEMM_DEFAULT_Q 92
2824 #define CGEMM_DEFAULT_Q 128
2825 #define ZGEMM_DEFAULT_Q 80
2827 #define SGEMM_DEFAULT_R 640
2828 #define DGEMM_DEFAULT_R dgemm_r
2829 #define CGEMM_DEFAULT_R 640
2830 #define ZGEMM_DEFAULT_R 640
2832 #define GEMM_OFFSET_A1 0x10000
2833 #define GEMM_OFFSET_B1 0x100000
2838 #if defined (LOONGSON3R5)
2842 #define GEMM_DEFAULT_OFFSET_A 0
2843 #define GEMM_DEFAULT_OFFSET_B 0
2844 #define GEMM_DEFAULT_ALIGN 0x0ffffUL
2846 #define SGEMM_DEFAULT_UNROLL_N 8
2847 #define DGEMM_DEFAULT_UNROLL_N 4
2848 #define QGEMM_DEFAULT_UNROLL_N 2
2849 #define CGEMM_DEFAULT_UNROLL_N 4
2850 #define ZGEMM_DEFAULT_UNROLL_N 4
2851 #define XGEMM_DEFAULT_UNROLL_N 1
2853 #define SGEMM_DEFAULT_UNROLL_M 2
2854 #define DGEMM_DEFAULT_UNROLL_M 16
2855 #define QGEMM_DEFAULT_UNROLL_M 2
2856 #define CGEMM_DEFAULT_UNROLL_M 1
2857 #define ZGEMM_DEFAULT_UNROLL_M 1
2858 #define XGEMM_DEFAULT_UNROLL_M 1
2860 #define SGEMM_DEFAULT_P sgemm_p
2861 #define DGEMM_DEFAULT_P 32
2862 #define QGEMM_DEFAULT_P qgemm_p
2863 #define CGEMM_DEFAULT_P cgemm_p
2864 #define ZGEMM_DEFAULT_P zgemm_p
2865 #define XGEMM_DEFAULT_P xgemm_p
2867 #define SGEMM_DEFAULT_R sgemm_r
2868 #define DGEMM_DEFAULT_R 858
2869 #define QGEMM_DEFAULT_R qgemm_r
2870 #define CGEMM_DEFAULT_R cgemm_r
2871 #define ZGEMM_DEFAULT_R zgemm_r
2872 #define XGEMM_DEFAULT_R xgemm_r
2874 #define SGEMM_DEFAULT_Q 128
2875 #define DGEMM_DEFAULT_Q 152
2876 #define QGEMM_DEFAULT_Q 128
2877 #define CGEMM_DEFAULT_Q 128
2878 #define ZGEMM_DEFAULT_Q 128
2879 #define XGEMM_DEFAULT_Q 128
2884 #if defined(P5600) || defined(MIPS1004K) || defined(MIPS24K) || defined(I6400) || defined(P6600) || defined(I6500)
2888 #define GEMM_DEFAULT_OFFSET_A 0
2889 #define GEMM_DEFAULT_OFFSET_B 0
2890 #define GEMM_DEFAULT_ALIGN (BLASLONG) 0x03fffUL
2892 #if defined(HAVE_MSA) && !defined(NO_MSA)
2893 #define SGEMM_DEFAULT_UNROLL_M 8
2894 #define SGEMM_DEFAULT_UNROLL_N 8
2896 #define DGEMM_DEFAULT_UNROLL_M 8
2897 #define DGEMM_DEFAULT_UNROLL_N 4
2899 #define CGEMM_DEFAULT_UNROLL_M 8
2900 #define CGEMM_DEFAULT_UNROLL_N 4
2902 #define ZGEMM_DEFAULT_UNROLL_M 4
2903 #define ZGEMM_DEFAULT_UNROLL_N 4
2905 #define SGEMM_DEFAULT_UNROLL_M 2
2906 #define SGEMM_DEFAULT_UNROLL_N 2
2908 #define DGEMM_DEFAULT_UNROLL_M 2
2909 #define DGEMM_DEFAULT_UNROLL_N 2
2911 #define CGEMM_DEFAULT_UNROLL_M 2
2912 #define CGEMM_DEFAULT_UNROLL_N 2
2914 #define ZGEMM_DEFAULT_UNROLL_M 2
2915 #define ZGEMM_DEFAULT_UNROLL_N 2
2918 #define SGEMM_DEFAULT_P 128
2919 #define DGEMM_DEFAULT_P 128
2920 #define CGEMM_DEFAULT_P 96
2921 #define ZGEMM_DEFAULT_P 64
2923 #define SGEMM_DEFAULT_Q 240
2924 #define DGEMM_DEFAULT_Q 120
2925 #define CGEMM_DEFAULT_Q 120
2926 #define ZGEMM_DEFAULT_Q 120
2928 #define SGEMM_DEFAULT_R 12288
2929 #define DGEMM_DEFAULT_R 8192
2930 #define CGEMM_DEFAULT_R 4096
2931 #define ZGEMM_DEFAULT_R 4096
2936 #ifdef RISCV64_GENERIC
2937 #define GEMM_DEFAULT_OFFSET_A 0
2938 #define GEMM_DEFAULT_OFFSET_B 0
2939 #define GEMM_DEFAULT_ALIGN (BLASLONG)0x03fffUL
2941 #define SGEMM_DEFAULT_UNROLL_M 2
2942 #define SGEMM_DEFAULT_UNROLL_N 2
2944 #define DGEMM_DEFAULT_UNROLL_M 2
2945 #define DGEMM_DEFAULT_UNROLL_N 2
2947 #define CGEMM_DEFAULT_UNROLL_M 2
2948 #define CGEMM_DEFAULT_UNROLL_N 2
2950 #define ZGEMM_DEFAULT_UNROLL_M 2
2951 #define ZGEMM_DEFAULT_UNROLL_N 2
2953 #define SGEMM_DEFAULT_P 128
2954 #define DGEMM_DEFAULT_P 128
2955 #define CGEMM_DEFAULT_P 96
2956 #define ZGEMM_DEFAULT_P 64
2958 #define SGEMM_DEFAULT_Q 240
2959 #define DGEMM_DEFAULT_Q 120
2960 #define CGEMM_DEFAULT_Q 120
2961 #define ZGEMM_DEFAULT_Q 120
2963 #define SGEMM_DEFAULT_R 12288
2964 #define DGEMM_DEFAULT_R 8192
2965 #define CGEMM_DEFAULT_R 4096
2966 #define ZGEMM_DEFAULT_R 4096
2970 #define GEMM_DEFAULT_OFFSET_A 0
2971 #define GEMM_DEFAULT_OFFSET_B 0
2976 #define GEMM_DEFAULT_OFFSET_A 0
2977 #define GEMM_DEFAULT_OFFSET_B 0
2978 #define GEMM_DEFAULT_ALIGN 0x03fffUL
2980 #define SGEMM_DEFAULT_UNROLL_M 16
2981 #define SGEMM_DEFAULT_UNROLL_N 4
2983 #define DGEMM_DEFAULT_UNROLL_M 8
2984 #define DGEMM_DEFAULT_UNROLL_N 4
2986 #define CGEMM_DEFAULT_UNROLL_M 2
2987 #define CGEMM_DEFAULT_UNROLL_N 2
2989 #define ZGEMM_DEFAULT_UNROLL_M 2
2990 #define ZGEMM_DEFAULT_UNROLL_N 2
2992 #define SGEMM_DEFAULT_P 160
2993 #define DGEMM_DEFAULT_P 160
2994 #define CGEMM_DEFAULT_P 96
2995 #define ZGEMM_DEFAULT_P 64
2997 #define SGEMM_DEFAULT_Q 240
2998 #define DGEMM_DEFAULT_Q 128
2999 #define CGEMM_DEFAULT_Q 120
3000 #define ZGEMM_DEFAULT_Q 120
3002 #define SGEMM_DEFAULT_R 12288
3003 #define DGEMM_DEFAULT_R 8192
3004 #define CGEMM_DEFAULT_R 4096
3005 #define ZGEMM_DEFAULT_R 4096
3009 #define GEMM_DEFAULT_OFFSET_A 0
3010 #define GEMM_DEFAULT_OFFSET_B 0
3018 #define GEMM_DEFAULT_OFFSET_A 0
3019 #define GEMM_DEFAULT_OFFSET_B 0
3020 #define GEMM_DEFAULT_ALIGN (BLASLONG)0x03fffUL
3022 #define SGEMM_DEFAULT_UNROLL_M 4
3023 #define SGEMM_DEFAULT_UNROLL_N 4
3025 #define DGEMM_DEFAULT_UNROLL_M 4
3026 #define DGEMM_DEFAULT_UNROLL_N 4
3028 #define CGEMM_DEFAULT_UNROLL_M 2
3029 #define CGEMM_DEFAULT_UNROLL_N 2
3031 #define ZGEMM_DEFAULT_UNROLL_M 2
3032 #define ZGEMM_DEFAULT_UNROLL_N 2
3034 #define SGEMM_DEFAULT_P 128
3035 #define DGEMM_DEFAULT_P 128
3036 #define CGEMM_DEFAULT_P 96
3037 #define ZGEMM_DEFAULT_P 64
3039 #define SGEMM_DEFAULT_Q 240
3040 #define DGEMM_DEFAULT_Q 120
3041 #define CGEMM_DEFAULT_Q 120
3042 #define ZGEMM_DEFAULT_Q 120
3044 #define SGEMM_DEFAULT_R 12288
3045 #define DGEMM_DEFAULT_R 8192
3046 #define CGEMM_DEFAULT_R 4096
3047 #define ZGEMM_DEFAULT_R 4096
3059 #define GEMM_DEFAULT_OFFSET_A 0
3060 #define GEMM_DEFAULT_OFFSET_B 0
3061 #define GEMM_DEFAULT_ALIGN (BLASLONG)0x03fffUL
3063 #define SGEMM_DEFAULT_UNROLL_M 4
3064 #define SGEMM_DEFAULT_UNROLL_N 2
3066 #define DGEMM_DEFAULT_UNROLL_M 4
3067 #define DGEMM_DEFAULT_UNROLL_N 2
3069 #define CGEMM_DEFAULT_UNROLL_M 2
3070 #define CGEMM_DEFAULT_UNROLL_N 2
3072 #define ZGEMM_DEFAULT_UNROLL_M 2
3073 #define ZGEMM_DEFAULT_UNROLL_N 2
3075 #define SGEMM_DEFAULT_P 128
3076 #define DGEMM_DEFAULT_P 128
3077 #define CGEMM_DEFAULT_P 96
3078 #define ZGEMM_DEFAULT_P 64
3080 #define SGEMM_DEFAULT_Q 240
3081 #define DGEMM_DEFAULT_Q 120
3082 #define CGEMM_DEFAULT_Q 120
3083 #define ZGEMM_DEFAULT_Q 120
3085 #define SGEMM_DEFAULT_R 12288
3086 #define DGEMM_DEFAULT_R 8192
3087 #define CGEMM_DEFAULT_R 4096
3088 #define ZGEMM_DEFAULT_R 4096
3094 /* Common ARMv8 parameters */
3100 #define GEMM_DEFAULT_OFFSET_A 0
3101 #define GEMM_DEFAULT_OFFSET_B 0
3103 /* Use explicit casting for win64 as LLP64 datamodel is used */
3104 #define GEMM_DEFAULT_ALIGN (BLASULONG)0x03fffUL
3106 #define GEMM_DEFAULT_ALIGN 0x03fffUL
3111 #if defined(CORTEXA57) || defined(CORTEXX1) || \
3112 defined(CORTEXA72) || defined(CORTEXA73) || \
3113 defined(FALKOR) || defined(TSV110) || defined(EMAG8180) || defined(VORTEX) || defined(FT2000)
3115 #define SGEMM_DEFAULT_UNROLL_M 16
3116 #define SGEMM_DEFAULT_UNROLL_N 4
3118 #define DGEMM_DEFAULT_UNROLL_M 8
3119 #define DGEMM_DEFAULT_UNROLL_N 4
3121 #define CGEMM_DEFAULT_UNROLL_M 8
3122 #define CGEMM_DEFAULT_UNROLL_N 4
3124 #define ZGEMM_DEFAULT_UNROLL_M 4
3125 #define ZGEMM_DEFAULT_UNROLL_N 4
3127 /*FIXME: this should be using the cache size, but there is currently no easy way to
3128 query that on ARM. So if getarch counted more than 8 cores we simply assume the host
3129 is a big desktop or server with abundant cache rather than a phone or embedded device */
3130 #if NUM_CORES > 8 || defined(TSV110) || defined(EMAG8180) || defined(VORTEX)|| defined(CORTEXX1)
3131 #define SGEMM_DEFAULT_P 512
3132 #define DGEMM_DEFAULT_P 256
3133 #define CGEMM_DEFAULT_P 256
3134 #define ZGEMM_DEFAULT_P 128
3136 #define SGEMM_DEFAULT_Q 1024
3137 #define DGEMM_DEFAULT_Q 512
3138 #define CGEMM_DEFAULT_Q 512
3139 #define ZGEMM_DEFAULT_Q 512
3141 #define SGEMM_DEFAULT_P 128
3142 #define DGEMM_DEFAULT_P 160
3143 #define CGEMM_DEFAULT_P 128
3144 #define ZGEMM_DEFAULT_P 128
3146 #define SGEMM_DEFAULT_Q 352
3147 #define DGEMM_DEFAULT_Q 128
3148 #define CGEMM_DEFAULT_Q 224
3149 #define ZGEMM_DEFAULT_Q 112
3152 #define SGEMM_DEFAULT_R 4096
3153 #define DGEMM_DEFAULT_R 4096
3154 #define CGEMM_DEFAULT_R 4096
3155 #define ZGEMM_DEFAULT_R 2048
3157 #elif defined(CORTEXA53) || defined(CORTEXA55)
3159 #define SGEMM_DEFAULT_UNROLL_M 8
3160 #define SGEMM_DEFAULT_UNROLL_N 8
3162 #define DGEMM_DEFAULT_UNROLL_M 4
3163 #define DGEMM_DEFAULT_UNROLL_N 4
3165 #define CGEMM_DEFAULT_UNROLL_M 8
3166 #define CGEMM_DEFAULT_UNROLL_N 4
3168 #define ZGEMM_DEFAULT_UNROLL_M 4
3169 #define ZGEMM_DEFAULT_UNROLL_N 4
3171 #define SGEMM_DEFAULT_P 256
3172 #define DGEMM_DEFAULT_P 160
3173 #define CGEMM_DEFAULT_P 128
3174 #define ZGEMM_DEFAULT_P 128
3176 #define SGEMM_DEFAULT_Q 256
3177 #define DGEMM_DEFAULT_Q 128
3178 #define CGEMM_DEFAULT_Q 224
3179 #define ZGEMM_DEFAULT_Q 112
3181 #define SGEMM_DEFAULT_R 4096
3182 #define DGEMM_DEFAULT_R 4096
3183 #define CGEMM_DEFAULT_R 4096
3184 #define ZGEMM_DEFAULT_R 2048
3186 #elif defined(THUNDERX)
3188 #define SGEMM_DEFAULT_UNROLL_M 4
3189 #define SGEMM_DEFAULT_UNROLL_N 4
3191 #define DGEMM_DEFAULT_UNROLL_M 2
3192 #define DGEMM_DEFAULT_UNROLL_N 2
3194 #define CGEMM_DEFAULT_UNROLL_M 2
3195 #define CGEMM_DEFAULT_UNROLL_N 2
3197 #define ZGEMM_DEFAULT_UNROLL_M 2
3198 #define ZGEMM_DEFAULT_UNROLL_N 2
3200 #define SGEMM_DEFAULT_P 128
3201 #define DGEMM_DEFAULT_P 128
3202 #define CGEMM_DEFAULT_P 96
3203 #define ZGEMM_DEFAULT_P 64
3205 #define SGEMM_DEFAULT_Q 240
3206 #define DGEMM_DEFAULT_Q 120
3207 #define CGEMM_DEFAULT_Q 120
3208 #define ZGEMM_DEFAULT_Q 120
3210 #define SGEMM_DEFAULT_R 12288
3211 #define DGEMM_DEFAULT_R 8192
3212 #define CGEMM_DEFAULT_R 4096
3213 #define ZGEMM_DEFAULT_R 4096
3215 #elif defined(THUNDERX2T99)
3217 #define SGEMM_DEFAULT_UNROLL_M 16
3218 #define SGEMM_DEFAULT_UNROLL_N 4
3220 #define DGEMM_DEFAULT_UNROLL_M 8
3221 #define DGEMM_DEFAULT_UNROLL_N 4
3223 #define CGEMM_DEFAULT_UNROLL_M 8
3224 #define CGEMM_DEFAULT_UNROLL_N 4
3226 #define ZGEMM_DEFAULT_UNROLL_M 4
3227 #define ZGEMM_DEFAULT_UNROLL_N 4
3229 #define SGEMM_DEFAULT_P 128
3230 #define DGEMM_DEFAULT_P 160
3231 #define CGEMM_DEFAULT_P 128
3232 #define ZGEMM_DEFAULT_P 128
3234 #define SGEMM_DEFAULT_Q 352
3235 #define DGEMM_DEFAULT_Q 128
3236 #define CGEMM_DEFAULT_Q 224
3237 #define ZGEMM_DEFAULT_Q 112
3239 #define SGEMM_DEFAULT_R 4096
3240 #define DGEMM_DEFAULT_R 4096
3241 #define CGEMM_DEFAULT_R 4096
3242 #define ZGEMM_DEFAULT_R 4096
3244 #elif defined(THUNDERX3T110)
3246 #define SGEMM_DEFAULT_UNROLL_M 16
3247 #define SGEMM_DEFAULT_UNROLL_N 4
3249 #define DGEMM_DEFAULT_UNROLL_M 8
3250 #define DGEMM_DEFAULT_UNROLL_N 4
3252 #define CGEMM_DEFAULT_UNROLL_M 8
3253 #define CGEMM_DEFAULT_UNROLL_N 4
3255 #define ZGEMM_DEFAULT_UNROLL_M 4
3256 #define ZGEMM_DEFAULT_UNROLL_N 4
3258 #define SGEMM_DEFAULT_P 128
3259 #define DGEMM_DEFAULT_P 320
3260 #define CGEMM_DEFAULT_P 128
3261 #define ZGEMM_DEFAULT_P 128
3263 #define SGEMM_DEFAULT_Q 352
3264 #define DGEMM_DEFAULT_Q 128
3265 #define CGEMM_DEFAULT_Q 224
3266 #define ZGEMM_DEFAULT_Q 112
3268 #define SGEMM_DEFAULT_R 4096
3269 #define DGEMM_DEFAULT_R 4096
3270 #define CGEMM_DEFAULT_R 4096
3271 #define ZGEMM_DEFAULT_R 4096
3273 #elif defined(NEOVERSEN1)
3275 #define SGEMM_DEFAULT_UNROLL_M 16
3276 #define SGEMM_DEFAULT_UNROLL_N 4
3278 #define DGEMM_DEFAULT_UNROLL_M 8
3279 #define DGEMM_DEFAULT_UNROLL_N 4
3281 #define CGEMM_DEFAULT_UNROLL_M 8
3282 #define CGEMM_DEFAULT_UNROLL_N 4
3284 #define ZGEMM_DEFAULT_UNROLL_M 4
3285 #define ZGEMM_DEFAULT_UNROLL_N 4
3287 #define SGEMM_DEFAULT_P 128
3288 #define DGEMM_DEFAULT_P 160
3289 #define CGEMM_DEFAULT_P 128
3290 #define ZGEMM_DEFAULT_P 128
3292 #define SGEMM_DEFAULT_Q 352
3293 #define DGEMM_DEFAULT_Q 128
3294 #define CGEMM_DEFAULT_Q 224
3295 #define ZGEMM_DEFAULT_Q 112
3297 #define SGEMM_DEFAULT_R 4096
3298 #define DGEMM_DEFAULT_R 4096
3299 #define CGEMM_DEFAULT_R 4096
3300 #define ZGEMM_DEFAULT_R 4096
3302 #elif defined(NEOVERSEV1)
3304 #define SGEMM_DEFAULT_UNROLL_M 16
3305 #define SGEMM_DEFAULT_UNROLL_N 4
3307 #define DGEMM_DEFAULT_UNROLL_M 8
3308 #define DGEMM_DEFAULT_UNROLL_N 4
3310 #define CGEMM_DEFAULT_UNROLL_M 8
3311 #define CGEMM_DEFAULT_UNROLL_N 4
3313 #define ZGEMM_DEFAULT_UNROLL_M 4
3314 #define ZGEMM_DEFAULT_UNROLL_N 4
3316 #define SGEMM_DEFAULT_P 128
3317 #define DGEMM_DEFAULT_P 160
3318 #define CGEMM_DEFAULT_P 128
3319 #define ZGEMM_DEFAULT_P 128
3321 #define SGEMM_DEFAULT_Q 352
3322 #define DGEMM_DEFAULT_Q 128
3323 #define CGEMM_DEFAULT_Q 224
3324 #define ZGEMM_DEFAULT_Q 112
3326 #define SGEMM_DEFAULT_R 4096
3327 #define DGEMM_DEFAULT_R 4096
3328 #define CGEMM_DEFAULT_R 4096
3329 #define ZGEMM_DEFAULT_R 4096
3331 #elif defined(NEOVERSEN2)
3333 #undef SBGEMM_DEFAULT_UNROLL_M
3334 #undef SBGEMM_DEFAULT_UNROLL_N
3335 #define SBGEMM_DEFAULT_UNROLL_M 8
3336 #define SBGEMM_DEFAULT_UNROLL_N 4
3338 #define SGEMM_DEFAULT_UNROLL_M 16
3339 #define SGEMM_DEFAULT_UNROLL_N 4
3341 #define DGEMM_DEFAULT_UNROLL_M 8
3342 #define DGEMM_DEFAULT_UNROLL_N 4
3344 #define CGEMM_DEFAULT_UNROLL_M 8
3345 #define CGEMM_DEFAULT_UNROLL_N 4
3347 #define ZGEMM_DEFAULT_UNROLL_M 4
3348 #define ZGEMM_DEFAULT_UNROLL_N 4
3350 #define SGEMM_DEFAULT_P 128
3351 #define DGEMM_DEFAULT_P 160
3352 #define CGEMM_DEFAULT_P 128
3353 #define ZGEMM_DEFAULT_P 128
3355 #define SGEMM_DEFAULT_Q 352
3356 #define DGEMM_DEFAULT_Q 128
3357 #define CGEMM_DEFAULT_Q 224
3358 #define ZGEMM_DEFAULT_Q 112
3360 #define SGEMM_DEFAULT_R 4096
3361 #define DGEMM_DEFAULT_R 4096
3362 #define CGEMM_DEFAULT_R 4096
3363 #define ZGEMM_DEFAULT_R 4096
3365 #elif defined(ARMV8SVE) || defined(A64FX) || defined(ARMV9) || defined(CORTEXA510)|| defined(CORTEXA710) || defined(CORTEXX2)
3367 /* When all BLAS3 routines are implemeted with SVE, SGEMM_DEFAULT_UNROLL_M should be "sve_vl".
3368 Until then, just keep it different than DGEMM_DEFAULT_UNROLL_N to keep copy routines in both directions seperated. */
3369 #define SGEMM_DEFAULT_UNROLL_M 4
3370 #define SGEMM_DEFAULT_UNROLL_N 8
3371 /* SGEMM_UNROLL_MN is calculated as max(SGEMM_UNROLL_M, SGEMM_UNROLL_N)
3372 * Since we don't define SGEMM_UNROLL_M correctly we have to manually set this macro.
3373 * If SVE size is ever more than 1024, this should be increased also. */
3374 #define SGEMM_DEFAULT_UNROLL_MN 32
3376 /* When all BLAS3 routines are implemeted with SVE, DGEMM_DEFAULT_UNROLL_M should be "sve_vl".
3377 Until then, just keep it different than DGEMM_DEFAULT_UNROLL_N to keep copy routines in both directions seperated. */
3378 #define DGEMM_DEFAULT_UNROLL_M 2
3379 #define DGEMM_DEFAULT_UNROLL_N 8
3381 #define DGEMM_DEFAULT_UNROLL_MN 32
3383 #define CGEMM_DEFAULT_UNROLL_M 2
3384 #define CGEMM_DEFAULT_UNROLL_N 4
3385 #define CGEMM_DEFAULT_UNROLL_MN 16
3387 #define ZGEMM_DEFAULT_UNROLL_M 2
3388 #define ZGEMM_DEFAULT_UNROLL_N 4
3389 #define ZGEMM_DEFAULT_UNROLL_MN 16
3391 #define SGEMM_DEFAULT_P 128
3392 #define DGEMM_DEFAULT_P 160
3393 #define CGEMM_DEFAULT_P 128
3394 #define ZGEMM_DEFAULT_P 128
3396 #define SGEMM_DEFAULT_Q 352
3397 #define DGEMM_DEFAULT_Q 128
3398 #define CGEMM_DEFAULT_Q 224
3399 #define ZGEMM_DEFAULT_Q 112
3401 #define SGEMM_DEFAULT_R 4096
3402 #define DGEMM_DEFAULT_R 4096
3403 #define CGEMM_DEFAULT_R 4096
3404 #define ZGEMM_DEFAULT_R 4096
3406 #else /* Other/undetected ARMv8 cores */
3408 #define SGEMM_DEFAULT_UNROLL_M 16
3409 #define SGEMM_DEFAULT_UNROLL_N 4
3411 #define DGEMM_DEFAULT_UNROLL_M 8
3412 #define DGEMM_DEFAULT_UNROLL_N 4
3414 #define CGEMM_DEFAULT_UNROLL_M 8
3415 #define CGEMM_DEFAULT_UNROLL_N 4
3417 #define ZGEMM_DEFAULT_UNROLL_M 4
3418 #define ZGEMM_DEFAULT_UNROLL_N 4
3420 #define SGEMM_DEFAULT_P 128
3421 #define DGEMM_DEFAULT_P 160
3422 #define CGEMM_DEFAULT_P 128
3423 #define ZGEMM_DEFAULT_P 128
3425 #define SGEMM_DEFAULT_Q 352
3426 #define DGEMM_DEFAULT_Q 128
3427 #define CGEMM_DEFAULT_Q 224
3428 #define ZGEMM_DEFAULT_Q 112
3430 #define SGEMM_DEFAULT_R 4096
3431 #define DGEMM_DEFAULT_R 4096
3432 #define CGEMM_DEFAULT_R 4096
3433 #define ZGEMM_DEFAULT_R 4096
3444 #define GEMM_DEFAULT_OFFSET_A 0
3445 #define GEMM_DEFAULT_OFFSET_B 0
3446 #define GEMM_DEFAULT_ALIGN (BLASLONG)0x03fffUL
3448 #define SGEMM_DEFAULT_UNROLL_M 2
3449 #define SGEMM_DEFAULT_UNROLL_N 2
3451 #define DGEMM_DEFAULT_UNROLL_M 2
3452 #define DGEMM_DEFAULT_UNROLL_N 2
3454 #define CGEMM_DEFAULT_UNROLL_M 2
3455 #define CGEMM_DEFAULT_UNROLL_N 2
3457 #define ZGEMM_DEFAULT_UNROLL_M 2
3458 #define ZGEMM_DEFAULT_UNROLL_N 2
3460 #define SGEMM_DEFAULT_P 128
3461 #define DGEMM_DEFAULT_P 128
3462 #define CGEMM_DEFAULT_P 96
3463 #define ZGEMM_DEFAULT_P 64
3465 #define SGEMM_DEFAULT_Q 240
3466 #define DGEMM_DEFAULT_Q 120
3467 #define CGEMM_DEFAULT_Q 120
3468 #define ZGEMM_DEFAULT_Q 120
3470 #define SGEMM_DEFAULT_R 12288
3471 #define DGEMM_DEFAULT_R 8192
3472 #define CGEMM_DEFAULT_R 4096
3473 #define ZGEMM_DEFAULT_R 4096
3485 #define GEMM_DEFAULT_OFFSET_A 0
3486 #define GEMM_DEFAULT_OFFSET_B 0
3487 #define GEMM_DEFAULT_ALIGN (BLASLONG)0x03fffUL
3489 #define SGEMM_DEFAULT_UNROLL_M 4
3490 #define SGEMM_DEFAULT_UNROLL_N 4
3492 #define DGEMM_DEFAULT_UNROLL_M 4
3493 #define DGEMM_DEFAULT_UNROLL_N 4
3495 #define CGEMM_DEFAULT_UNROLL_M 2
3496 #define CGEMM_DEFAULT_UNROLL_N 2
3498 #define ZGEMM_DEFAULT_UNROLL_M 2
3499 #define ZGEMM_DEFAULT_UNROLL_N 2
3501 #define SGEMM_DEFAULT_P 128
3502 #define DGEMM_DEFAULT_P 128
3503 #define CGEMM_DEFAULT_P 96
3504 #define ZGEMM_DEFAULT_P 64
3506 #define SGEMM_DEFAULT_Q 240
3507 #define DGEMM_DEFAULT_Q 120
3508 #define CGEMM_DEFAULT_Q 120
3509 #define ZGEMM_DEFAULT_Q 120
3511 #define SGEMM_DEFAULT_R 12288
3512 #define DGEMM_DEFAULT_R 8192
3513 #define CGEMM_DEFAULT_R 4096
3514 #define ZGEMM_DEFAULT_R 4096
3526 #define GEMM_DEFAULT_OFFSET_A 0
3527 #define GEMM_DEFAULT_OFFSET_B 0
3528 #define GEMM_DEFAULT_ALIGN (BLASLONG)0x03fffUL
3530 #define SGEMM_DEFAULT_UNROLL_M 4
3531 #define SGEMM_DEFAULT_UNROLL_N 4
3533 #define DGEMM_DEFAULT_UNROLL_M 4
3534 #define DGEMM_DEFAULT_UNROLL_N 4
3536 #define CGEMM_DEFAULT_UNROLL_M 2
3537 #define CGEMM_DEFAULT_UNROLL_N 2
3539 #define ZGEMM_DEFAULT_UNROLL_M 2
3540 #define ZGEMM_DEFAULT_UNROLL_N 2
3542 #define SGEMM_DEFAULT_P 128
3543 #define DGEMM_DEFAULT_P 128
3544 #define CGEMM_DEFAULT_P 96
3545 #define ZGEMM_DEFAULT_P 64
3547 #define SGEMM_DEFAULT_Q 240
3548 #define DGEMM_DEFAULT_Q 120
3549 #define CGEMM_DEFAULT_Q 120
3550 #define ZGEMM_DEFAULT_Q 120
3552 #define SGEMM_DEFAULT_R 12288
3553 #define DGEMM_DEFAULT_R 8192
3554 #define CGEMM_DEFAULT_R 4096
3555 #define ZGEMM_DEFAULT_R 4096
3563 #if defined(ZARCH_GENERIC)
3567 #define GEMM_DEFAULT_OFFSET_A 0
3568 #define GEMM_DEFAULT_OFFSET_B 0
3569 #define GEMM_DEFAULT_ALIGN (BLASLONG)0x03fffUL
3571 #define SGEMM_DEFAULT_UNROLL_M 2
3572 #define SGEMM_DEFAULT_UNROLL_N 2
3574 #define DGEMM_DEFAULT_UNROLL_M 2
3575 #define DGEMM_DEFAULT_UNROLL_N 2
3577 #define CGEMM_DEFAULT_UNROLL_M 2
3578 #define CGEMM_DEFAULT_UNROLL_N 2
3580 #define ZGEMM_DEFAULT_UNROLL_M 2
3581 #define ZGEMM_DEFAULT_UNROLL_N 2
3583 #define SGEMM_DEFAULT_P 128
3584 #define DGEMM_DEFAULT_P 128
3585 #define CGEMM_DEFAULT_P 96
3586 #define ZGEMM_DEFAULT_P 64
3588 #define SGEMM_DEFAULT_Q 240
3589 #define DGEMM_DEFAULT_Q 120
3590 #define CGEMM_DEFAULT_Q 120
3591 #define ZGEMM_DEFAULT_Q 120
3593 #define SGEMM_DEFAULT_R 12288
3594 #define DGEMM_DEFAULT_R 8192
3595 #define CGEMM_DEFAULT_R 4096
3596 #define ZGEMM_DEFAULT_R 4096
3606 #define GEMM_DEFAULT_OFFSET_A 0
3607 #define GEMM_DEFAULT_OFFSET_B 0
3608 #define GEMM_DEFAULT_ALIGN (BLASLONG)0x03fffUL
3610 #define SGEMM_DEFAULT_UNROLL_M 8
3611 #define SGEMM_DEFAULT_UNROLL_N 4
3613 #define DGEMM_DEFAULT_UNROLL_M 8
3614 #define DGEMM_DEFAULT_UNROLL_N 4
3616 #define CGEMM_DEFAULT_UNROLL_M 4
3617 #define CGEMM_DEFAULT_UNROLL_N 4
3619 #define ZGEMM_DEFAULT_UNROLL_M 4
3620 #define ZGEMM_DEFAULT_UNROLL_N 4
3622 #define SGEMM_DEFAULT_P 456
3623 #define DGEMM_DEFAULT_P 320
3624 #define CGEMM_DEFAULT_P 480
3625 #define ZGEMM_DEFAULT_P 224
3627 #define SGEMM_DEFAULT_Q 488
3628 #define DGEMM_DEFAULT_Q 384
3629 #define CGEMM_DEFAULT_Q 128
3630 #define ZGEMM_DEFAULT_Q 352
3632 #define SGEMM_DEFAULT_R 8192
3633 #define DGEMM_DEFAULT_R 4096
3634 #define CGEMM_DEFAULT_R 4096
3635 #define ZGEMM_DEFAULT_R 2048
3646 #define GEMM_DEFAULT_OFFSET_A 0
3647 #define GEMM_DEFAULT_OFFSET_B 0
3648 #define GEMM_DEFAULT_ALIGN 0x03fffUL
3650 #define SGEMM_DEFAULT_UNROLL_M 16
3651 #define SGEMM_DEFAULT_UNROLL_N 4
3653 #define DGEMM_DEFAULT_UNROLL_M 8
3654 #define DGEMM_DEFAULT_UNROLL_N 4
3656 #define CGEMM_DEFAULT_UNROLL_M 4
3657 #define CGEMM_DEFAULT_UNROLL_N 4
3659 #define ZGEMM_DEFAULT_UNROLL_M 4
3660 #define ZGEMM_DEFAULT_UNROLL_N 4
3662 #define SGEMM_DEFAULT_P 480
3663 #define DGEMM_DEFAULT_P 320
3664 #define CGEMM_DEFAULT_P 480
3665 #define ZGEMM_DEFAULT_P 224
3667 #define SGEMM_DEFAULT_Q 512
3668 #define DGEMM_DEFAULT_Q 384
3669 #define CGEMM_DEFAULT_Q 128
3670 #define ZGEMM_DEFAULT_Q 352
3672 #define SGEMM_DEFAULT_R 8192
3673 #define DGEMM_DEFAULT_R 4096
3674 #define CGEMM_DEFAULT_R 4096
3675 #define ZGEMM_DEFAULT_R 2048
3688 #define GEMM_DEFAULT_OFFSET_A 0
3689 #define GEMM_DEFAULT_OFFSET_B 0
3690 #define GEMM_DEFAULT_ALIGN (BLASLONG)0x0ffffUL
3692 #define SGEMM_DEFAULT_UNROLL_N 2
3693 #define DGEMM_DEFAULT_UNROLL_N 2
3694 #define QGEMM_DEFAULT_UNROLL_N 2
3695 #define CGEMM_DEFAULT_UNROLL_N 2
3696 #define ZGEMM_DEFAULT_UNROLL_N 2
3697 #define XGEMM_DEFAULT_UNROLL_N 1
3700 #define SGEMM_DEFAULT_UNROLL_M 2
3701 #define DGEMM_DEFAULT_UNROLL_M 2
3702 #define QGEMM_DEFAULT_UNROLL_M 2
3703 #define CGEMM_DEFAULT_UNROLL_M 2
3704 #define ZGEMM_DEFAULT_UNROLL_M 2
3705 #define XGEMM_DEFAULT_UNROLL_M 1
3707 #define SGEMM_DEFAULT_UNROLL_M 2
3708 #define DGEMM_DEFAULT_UNROLL_M 2
3709 #define QGEMM_DEFAULT_UNROLL_M 2
3710 #define CGEMM_DEFAULT_UNROLL_M 2
3711 #define ZGEMM_DEFAULT_UNROLL_M 2
3712 #define XGEMM_DEFAULT_UNROLL_M 1
3716 #define SGEMM_DEFAULT_P 128
3717 #define DGEMM_DEFAULT_P 128
3718 #define CGEMM_DEFAULT_P 96
3719 #define ZGEMM_DEFAULT_P 64
3720 #define SGEMM_DEFAULT_Q 240
3721 #define DGEMM_DEFAULT_Q 120
3722 #define CGEMM_DEFAULT_Q 120
3723 #define ZGEMM_DEFAULT_Q 120
3724 #define SGEMM_DEFAULT_R 12288
3725 #define DGEMM_DEFAULT_R 8192
3726 #define CGEMM_DEFAULT_R 4096
3727 #define ZGEMM_DEFAULT_R 4096
3729 #define SGEMM_DEFAULT_P sgemm_p
3730 #define DGEMM_DEFAULT_P dgemm_p
3731 #define QGEMM_DEFAULT_P qgemm_p
3732 #define CGEMM_DEFAULT_P cgemm_p
3733 #define ZGEMM_DEFAULT_P zgemm_p
3734 #define XGEMM_DEFAULT_P xgemm_p
3736 #define SGEMM_DEFAULT_R sgemm_r
3737 #define DGEMM_DEFAULT_R dgemm_r
3738 #define QGEMM_DEFAULT_R qgemm_r
3739 #define CGEMM_DEFAULT_R cgemm_r
3740 #define ZGEMM_DEFAULT_R zgemm_r
3741 #define XGEMM_DEFAULT_R xgemm_r
3743 #define SGEMM_DEFAULT_Q 128
3744 #define DGEMM_DEFAULT_Q 128
3745 #define QGEMM_DEFAULT_Q 128
3746 #define CGEMM_DEFAULT_Q 128
3747 #define ZGEMM_DEFAULT_Q 128
3748 #define XGEMM_DEFAULT_Q 128
3755 #ifndef QGEMM_DEFAULT_UNROLL_M
3756 #define QGEMM_DEFAULT_UNROLL_M 2
3759 #ifndef QGEMM_DEFAULT_UNROLL_N
3760 #define QGEMM_DEFAULT_UNROLL_N 2
3763 #ifndef XGEMM_DEFAULT_UNROLL_M
3764 #define XGEMM_DEFAULT_UNROLL_M 2
3767 #ifndef XGEMM_DEFAULT_UNROLL_N
3768 #define XGEMM_DEFAULT_UNROLL_N 2
3772 #define SHUFPD_0 shufps $0x44,
3773 #define SHUFPD_1 shufps $0x4e,
3774 #define SHUFPD_2 shufps $0xe4,
3775 #define SHUFPD_3 shufps $0xee,
3779 #define SHUFPD_0 shufpd $0,
3783 #define SHUFPD_1 shufpd $1,
3787 #define SHUFPD_2 shufpd $2,
3791 #define SHUFPD_3 shufpd $3,
3795 #define SHUFPS_39 shufps $0x39,