1 /*****************************************************************************
2 Copyright (c) 2011-2014, The OpenBLAS Project
5 Redistribution and use in source and binary forms, with or without
6 modification, are permitted provided that the following conditions are
9 1. Redistributions of source code must retain the above copyright
10 notice, this list of conditions and the following disclaimer.
12 2. Redistributions in binary form must reproduce the above copyright
13 notice, this list of conditions and the following disclaimer in
14 the documentation and/or other materials provided with the
16 3. Neither the name of the OpenBLAS project nor the names of
17 its contributors may be used to endorse or promote products
18 derived from this software without specific prior written
21 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
25 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
27 SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28 CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
29 OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
30 USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 **********************************************************************************/
34 /*********************************************************************/
35 /* Copyright 2009, 2010 The University of Texas at Austin. */
36 /* All rights reserved. */
38 /* Redistribution and use in source and binary forms, with or */
39 /* without modification, are permitted provided that the following */
40 /* conditions are met: */
42 /* 1. Redistributions of source code must retain the above */
43 /* copyright notice, this list of conditions and the following */
46 /* 2. Redistributions in binary form must reproduce the above */
47 /* copyright notice, this list of conditions and the following */
48 /* disclaimer in the documentation and/or other materials */
49 /* provided with the distribution. */
51 /* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
52 /* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
53 /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
54 /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
55 /* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
56 /* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
57 /* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
58 /* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
59 /* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
60 /* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
61 /* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
62 /* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
63 /* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
64 /* POSSIBILITY OF SUCH DAMAGE. */
66 /* The views and conclusions contained in the software and */
67 /* documentation are those of the authors and should not be */
68 /* interpreted as representing official policies, either expressed */
69 /* or implied, of The University of Texas at Austin. */
70 /*********************************************************************/
75 #define LONGCAST (BLASLONG)
76 #if defined(__BYTE_ORDER__)
83 #define SBGEMM_DEFAULT_UNROLL_N 4
84 #define SBGEMM_DEFAULT_UNROLL_M 8
85 #define SBGEMM_DEFAULT_UNROLL_MN 32
86 #define SBGEMM_DEFAULT_P 256
87 #define SBGEMM_DEFAULT_R 256
88 #define SBGEMM_DEFAULT_Q 256
94 #define GEMM_DEFAULT_OFFSET_A 64
95 #define GEMM_DEFAULT_OFFSET_B 256
96 #define GEMM_DEFAULT_ALIGN (BLASLONG)0x01ffffUL
98 #define SGEMM_DEFAULT_UNROLL_N 4
99 #define DGEMM_DEFAULT_UNROLL_N 4
100 #define QGEMM_DEFAULT_UNROLL_N 2
101 #define CGEMM_DEFAULT_UNROLL_N 2
102 #define ZGEMM_DEFAULT_UNROLL_N 2
103 #define XGEMM_DEFAULT_UNROLL_N 1
106 #define SGEMM_DEFAULT_UNROLL_M 4
107 #define DGEMM_DEFAULT_UNROLL_M 2
108 #define QGEMM_DEFAULT_UNROLL_M 2
109 #define CGEMM_DEFAULT_UNROLL_M 2
110 #define ZGEMM_DEFAULT_UNROLL_M 1
111 #define XGEMM_DEFAULT_UNROLL_M 1
113 #define SGEMM_DEFAULT_UNROLL_M 8
114 #define DGEMM_DEFAULT_UNROLL_M 4
115 #define QGEMM_DEFAULT_UNROLL_M 2
116 #define CGEMM_DEFAULT_UNROLL_M 4
117 #define ZGEMM_DEFAULT_UNROLL_M 2
118 #define XGEMM_DEFAULT_UNROLL_M 1
121 #define SGEMM_DEFAULT_P sgemm_p
122 #define DGEMM_DEFAULT_P dgemm_p
123 #define QGEMM_DEFAULT_P qgemm_p
124 #define CGEMM_DEFAULT_P cgemm_p
125 #define ZGEMM_DEFAULT_P zgemm_p
126 #define XGEMM_DEFAULT_P xgemm_p
128 #define SGEMM_DEFAULT_R sgemm_r
129 #define DGEMM_DEFAULT_R dgemm_r
130 #define QGEMM_DEFAULT_R qgemm_r
131 #define CGEMM_DEFAULT_R cgemm_r
132 #define ZGEMM_DEFAULT_R zgemm_r
133 #define XGEMM_DEFAULT_R xgemm_r
137 #define SGEMM_DEFAULT_Q 248
138 #define DGEMM_DEFAULT_Q 248
139 #define QGEMM_DEFAULT_Q 248
140 #define CGEMM_DEFAULT_Q 248
141 #define ZGEMM_DEFAULT_Q 248
142 #define XGEMM_DEFAULT_Q 248
146 #define SGEMM_DEFAULT_Q 240
147 #define DGEMM_DEFAULT_Q 240
148 #define QGEMM_DEFAULT_Q 240
149 #define CGEMM_DEFAULT_Q 240
150 #define ZGEMM_DEFAULT_Q 240
151 #define XGEMM_DEFAULT_Q 240
157 #define HAVE_EXCLUSIVE_CACHE
161 #if defined(BARCELONA) || defined(SHANGHAI) || defined(BOBCAT)
166 #define GEMM_DEFAULT_OFFSET_A 64
167 #define GEMM_DEFAULT_OFFSET_B 832
168 #define GEMM_DEFAULT_ALIGN (BLASLONG)0x0fffUL
170 #define SGEMM_DEFAULT_UNROLL_N 4
171 #define DGEMM_DEFAULT_UNROLL_N 4
172 #define QGEMM_DEFAULT_UNROLL_N 2
173 #define CGEMM_DEFAULT_UNROLL_N 2
174 #define ZGEMM_DEFAULT_UNROLL_N 2
175 #define XGEMM_DEFAULT_UNROLL_N 1
178 #define SGEMM_DEFAULT_UNROLL_M 4
179 #define DGEMM_DEFAULT_UNROLL_M 2
180 #define QGEMM_DEFAULT_UNROLL_M 2
181 #define CGEMM_DEFAULT_UNROLL_M 2
182 #define ZGEMM_DEFAULT_UNROLL_M 1
183 #define XGEMM_DEFAULT_UNROLL_M 1
185 #define SGEMM_DEFAULT_UNROLL_M 8
186 #define DGEMM_DEFAULT_UNROLL_M 4
187 #define QGEMM_DEFAULT_UNROLL_M 2
188 #define CGEMM_DEFAULT_UNROLL_M 4
189 #define ZGEMM_DEFAULT_UNROLL_M 2
190 #define XGEMM_DEFAULT_UNROLL_M 1
194 #define SGEMM_DEFAULT_P 496
195 #define DGEMM_DEFAULT_P 248
196 #define QGEMM_DEFAULT_P 124
197 #define CGEMM_DEFAULT_P 248
198 #define ZGEMM_DEFAULT_P 124
199 #define XGEMM_DEFAULT_P 62
201 #define SGEMM_DEFAULT_Q 248
202 #define DGEMM_DEFAULT_Q 248
203 #define QGEMM_DEFAULT_Q 248
204 #define CGEMM_DEFAULT_Q 248
205 #define ZGEMM_DEFAULT_Q 248
206 #define XGEMM_DEFAULT_Q 248
210 #define SGEMM_DEFAULT_P 448
211 #define DGEMM_DEFAULT_P 224
212 #define QGEMM_DEFAULT_P 112
213 #define CGEMM_DEFAULT_P 224
214 #define ZGEMM_DEFAULT_P 112
215 #define XGEMM_DEFAULT_P 56
217 #define SGEMM_DEFAULT_Q 224
218 #define DGEMM_DEFAULT_Q 224
219 #define QGEMM_DEFAULT_Q 224
220 #define CGEMM_DEFAULT_Q 224
221 #define ZGEMM_DEFAULT_Q 224
222 #define XGEMM_DEFAULT_Q 224
226 #define SGEMM_DEFAULT_R sgemm_r
227 #define QGEMM_DEFAULT_R qgemm_r
228 #define DGEMM_DEFAULT_R dgemm_r
229 #define CGEMM_DEFAULT_R cgemm_r
230 #define ZGEMM_DEFAULT_R zgemm_r
231 #define XGEMM_DEFAULT_R xgemm_r
234 #define HAVE_EXCLUSIVE_CACHE
236 #define GEMM_THREAD gemm_thread_mn
246 #define GEMM_DEFAULT_OFFSET_A 64
247 #define GEMM_DEFAULT_OFFSET_B 832
248 #define GEMM_DEFAULT_ALIGN (BLASLONG)0x0fffUL
252 #define QGEMM_DEFAULT_UNROLL_N 2
253 #define CGEMM_DEFAULT_UNROLL_N 2
254 #define ZGEMM_DEFAULT_UNROLL_N 2
255 #define XGEMM_DEFAULT_UNROLL_N 1
258 #define SGEMM_DEFAULT_UNROLL_N 4
259 #define DGEMM_DEFAULT_UNROLL_N 4
260 #define SGEMM_DEFAULT_UNROLL_M 4
261 #define DGEMM_DEFAULT_UNROLL_M 2
262 #define QGEMM_DEFAULT_UNROLL_M 2
263 #define CGEMM_DEFAULT_UNROLL_M 2
264 #define ZGEMM_DEFAULT_UNROLL_M 1
265 #define XGEMM_DEFAULT_UNROLL_M 1
267 #define SGEMM_DEFAULT_UNROLL_N 2
268 #define DGEMM_DEFAULT_UNROLL_N 2
269 #define SGEMM_DEFAULT_UNROLL_M 16
270 #define DGEMM_DEFAULT_UNROLL_M 8
271 #define QGEMM_DEFAULT_UNROLL_M 2
272 #define CGEMM_DEFAULT_UNROLL_M 4
273 #define ZGEMM_DEFAULT_UNROLL_M 2
274 #define XGEMM_DEFAULT_UNROLL_M 1
275 #define CGEMM3M_DEFAULT_UNROLL_N 4
276 #define CGEMM3M_DEFAULT_UNROLL_M 8
277 #define ZGEMM3M_DEFAULT_UNROLL_N 4
278 #define ZGEMM3M_DEFAULT_UNROLL_M 4
280 #define DGEMM_DEFAULT_UNROLL_MN 16
281 #define GEMV_UNROLL 8
285 #if defined(ARCH_X86_64)
286 #define SGEMM_DEFAULT_P 768
287 #define DGEMM_DEFAULT_P 384
289 #define SGEMM_DEFAULT_P 448
290 #define DGEMM_DEFAULT_P 224
293 #define QGEMM_DEFAULT_P 112
294 #define CGEMM_DEFAULT_P 224
295 #define ZGEMM_DEFAULT_P 112
296 #define XGEMM_DEFAULT_P 56
298 #if defined(ARCH_X86_64)
299 #define SGEMM_DEFAULT_Q 168
300 #define DGEMM_DEFAULT_Q 168
302 #define SGEMM_DEFAULT_Q 224
303 #define DGEMM_DEFAULT_Q 224
306 #define QGEMM_DEFAULT_Q 224
307 #define CGEMM_DEFAULT_Q 224
308 #define ZGEMM_DEFAULT_Q 224
309 #define XGEMM_DEFAULT_Q 224
311 #define CGEMM3M_DEFAULT_P 448
312 #define ZGEMM3M_DEFAULT_P 224
313 #define XGEMM3M_DEFAULT_P 112
314 #define CGEMM3M_DEFAULT_Q 224
315 #define ZGEMM3M_DEFAULT_Q 224
316 #define XGEMM3M_DEFAULT_Q 224
317 #define CGEMM3M_DEFAULT_R 12288
318 #define ZGEMM3M_DEFAULT_R 12288
319 #define XGEMM3M_DEFAULT_R 12288
321 #define SGEMM_DEFAULT_R sgemm_r
322 #define QGEMM_DEFAULT_R qgemm_r
323 #define DGEMM_DEFAULT_R dgemm_r
324 #define CGEMM_DEFAULT_R cgemm_r
325 #define ZGEMM_DEFAULT_R zgemm_r
326 #define XGEMM_DEFAULT_R xgemm_r
329 #define HAVE_EXCLUSIVE_CACHE
331 #define GEMM_THREAD gemm_thread_mn
339 #define GEMM_DEFAULT_OFFSET_A 64
340 #define GEMM_DEFAULT_OFFSET_B 832
341 #define GEMM_DEFAULT_ALIGN (BLASLONG)0x0fffUL
345 #define QGEMM_DEFAULT_UNROLL_N 2
346 #define CGEMM_DEFAULT_UNROLL_N 2
347 #define ZGEMM_DEFAULT_UNROLL_N 2
348 #define XGEMM_DEFAULT_UNROLL_N 1
351 #define SGEMM_DEFAULT_UNROLL_N 4
352 #define DGEMM_DEFAULT_UNROLL_N 4
353 #define SGEMM_DEFAULT_UNROLL_M 4
354 #define DGEMM_DEFAULT_UNROLL_M 2
355 #define QGEMM_DEFAULT_UNROLL_M 2
356 #define CGEMM_DEFAULT_UNROLL_M 2
357 #define ZGEMM_DEFAULT_UNROLL_M 1
358 #define XGEMM_DEFAULT_UNROLL_M 1
360 #define SGEMM_DEFAULT_UNROLL_N 2
361 #define DGEMM_DEFAULT_UNROLL_N 2
362 #define SGEMM_DEFAULT_UNROLL_M 16
363 #define DGEMM_DEFAULT_UNROLL_M 8
364 #define QGEMM_DEFAULT_UNROLL_M 2
365 #define CGEMM_DEFAULT_UNROLL_M 4
366 #define ZGEMM_DEFAULT_UNROLL_M 2
367 #define XGEMM_DEFAULT_UNROLL_M 1
368 #define CGEMM3M_DEFAULT_UNROLL_N 4
369 #define CGEMM3M_DEFAULT_UNROLL_M 8
370 #define ZGEMM3M_DEFAULT_UNROLL_N 4
371 #define ZGEMM3M_DEFAULT_UNROLL_M 4
372 #define GEMV_UNROLL 8
375 #if defined(ARCH_X86_64)
376 #define SGEMM_DEFAULT_P 768
377 #define DGEMM_DEFAULT_P 768
378 #define ZGEMM_DEFAULT_P 384
379 #define CGEMM_DEFAULT_P 768
381 #define SGEMM_DEFAULT_P 448
382 #define DGEMM_DEFAULT_P 480
383 #define ZGEMM_DEFAULT_P 112
384 #define CGEMM_DEFAULT_P 224
386 #define QGEMM_DEFAULT_P 112
387 #define XGEMM_DEFAULT_P 56
389 #if defined(ARCH_X86_64)
390 #define SGEMM_DEFAULT_Q 192
391 #define DGEMM_DEFAULT_Q 168
392 #define ZGEMM_DEFAULT_Q 168
393 #define CGEMM_DEFAULT_Q 168
395 #define SGEMM_DEFAULT_Q 224
396 #define DGEMM_DEFAULT_Q 224
397 #define ZGEMM_DEFAULT_Q 224
398 #define CGEMM_DEFAULT_Q 224
400 #define QGEMM_DEFAULT_Q 224
401 #define XGEMM_DEFAULT_Q 224
403 #define CGEMM3M_DEFAULT_P 448
404 #define ZGEMM3M_DEFAULT_P 224
405 #define XGEMM3M_DEFAULT_P 112
406 #define CGEMM3M_DEFAULT_Q 224
407 #define ZGEMM3M_DEFAULT_Q 224
408 #define XGEMM3M_DEFAULT_Q 224
409 #define CGEMM3M_DEFAULT_R 12288
410 #define ZGEMM3M_DEFAULT_R 12288
411 #define XGEMM3M_DEFAULT_R 12288
413 #define SGEMM_DEFAULT_R 12288
414 #define QGEMM_DEFAULT_R qgemm_r
415 #define DGEMM_DEFAULT_R 12288
416 #define CGEMM_DEFAULT_R cgemm_r
417 #define ZGEMM_DEFAULT_R zgemm_r
418 #define XGEMM_DEFAULT_R xgemm_r
421 #define HAVE_EXCLUSIVE_CACHE
423 #define GEMM_THREAD gemm_thread_mn
431 #define GEMM_DEFAULT_OFFSET_A 64
432 #define GEMM_DEFAULT_OFFSET_B 832
433 #define GEMM_DEFAULT_ALIGN (BLASLONG)0x0fffUL
437 #define QGEMM_DEFAULT_UNROLL_N 2
438 #define CGEMM_DEFAULT_UNROLL_N 2
439 #define ZGEMM_DEFAULT_UNROLL_N 2
440 #define XGEMM_DEFAULT_UNROLL_N 1
443 #define SGEMM_DEFAULT_UNROLL_N 4
444 #define DGEMM_DEFAULT_UNROLL_N 4
445 #define SGEMM_DEFAULT_UNROLL_M 4
446 #define DGEMM_DEFAULT_UNROLL_M 2
447 #define QGEMM_DEFAULT_UNROLL_M 2
448 #define CGEMM_DEFAULT_UNROLL_M 2
449 #define ZGEMM_DEFAULT_UNROLL_M 1
450 #define XGEMM_DEFAULT_UNROLL_M 1
452 #define SGEMM_DEFAULT_UNROLL_N 2
453 #define DGEMM_DEFAULT_UNROLL_N 2
454 #define SGEMM_DEFAULT_UNROLL_M 16
455 #define DGEMM_DEFAULT_UNROLL_M 8
456 #define QGEMM_DEFAULT_UNROLL_M 2
457 #define CGEMM_DEFAULT_UNROLL_M 4
458 #define ZGEMM_DEFAULT_UNROLL_M 2
459 #define XGEMM_DEFAULT_UNROLL_M 1
460 #define CGEMM3M_DEFAULT_UNROLL_N 4
461 #define CGEMM3M_DEFAULT_UNROLL_M 8
462 #define ZGEMM3M_DEFAULT_UNROLL_N 4
463 #define ZGEMM3M_DEFAULT_UNROLL_M 4
464 #define GEMV_UNROLL 8
467 #if defined(ARCH_X86_64)
468 #define SGEMM_DEFAULT_P 768
469 #define DGEMM_DEFAULT_P 576
470 #define ZGEMM_DEFAULT_P 288
471 #define CGEMM_DEFAULT_P 576
473 #define SGEMM_DEFAULT_P 448
474 #define DGEMM_DEFAULT_P 480
475 #define ZGEMM_DEFAULT_P 112
476 #define CGEMM_DEFAULT_P 224
478 #define QGEMM_DEFAULT_P 112
479 #define XGEMM_DEFAULT_P 56
481 #if defined(ARCH_X86_64)
482 #define SGEMM_DEFAULT_Q 192
483 #define DGEMM_DEFAULT_Q 160
484 #define ZGEMM_DEFAULT_Q 160
485 #define CGEMM_DEFAULT_Q 160
487 #define SGEMM_DEFAULT_Q 224
488 #define DGEMM_DEFAULT_Q 224
489 #define ZGEMM_DEFAULT_Q 224
490 #define CGEMM_DEFAULT_Q 224
492 #define QGEMM_DEFAULT_Q 224
493 #define XGEMM_DEFAULT_Q 224
495 #define CGEMM3M_DEFAULT_P 448
496 #define ZGEMM3M_DEFAULT_P 224
497 #define XGEMM3M_DEFAULT_P 112
498 #define CGEMM3M_DEFAULT_Q 224
499 #define ZGEMM3M_DEFAULT_Q 224
500 #define XGEMM3M_DEFAULT_Q 224
501 #define CGEMM3M_DEFAULT_R 12288
502 #define ZGEMM3M_DEFAULT_R 12288
503 #define XGEMM3M_DEFAULT_R 12288
505 #define SGEMM_DEFAULT_R 12288
506 #define QGEMM_DEFAULT_R qgemm_r
507 #define DGEMM_DEFAULT_R 12288
508 #define CGEMM_DEFAULT_R cgemm_r
509 #define ZGEMM_DEFAULT_R zgemm_r
510 #define XGEMM_DEFAULT_R xgemm_r
513 #define HAVE_EXCLUSIVE_CACHE
515 #define GEMM_THREAD gemm_thread_mn
524 #define GEMM_DEFAULT_OFFSET_A 64
525 #define GEMM_DEFAULT_OFFSET_B 832
526 #define GEMM_DEFAULT_ALIGN (BLASLONG)0x0fffUL
530 #define QGEMM_DEFAULT_UNROLL_N 2
531 #define CGEMM_DEFAULT_UNROLL_N 2
532 #define ZGEMM_DEFAULT_UNROLL_N 2
533 #define XGEMM_DEFAULT_UNROLL_N 1
536 #define SGEMM_DEFAULT_UNROLL_N 4
537 #define DGEMM_DEFAULT_UNROLL_N 4
538 #define SGEMM_DEFAULT_UNROLL_M 4
539 #define DGEMM_DEFAULT_UNROLL_M 2
540 #define QGEMM_DEFAULT_UNROLL_M 2
541 #define CGEMM_DEFAULT_UNROLL_M 2
542 #define ZGEMM_DEFAULT_UNROLL_M 1
543 #define XGEMM_DEFAULT_UNROLL_M 1
545 #define SGEMM_DEFAULT_UNROLL_N 2
546 #define DGEMM_DEFAULT_UNROLL_N 2
547 #define SGEMM_DEFAULT_UNROLL_M 16
548 #define DGEMM_DEFAULT_UNROLL_M 8
549 #define QGEMM_DEFAULT_UNROLL_M 2
550 #define CGEMM_DEFAULT_UNROLL_M 4
551 #define ZGEMM_DEFAULT_UNROLL_M 2
552 #define XGEMM_DEFAULT_UNROLL_M 1
553 #define CGEMM3M_DEFAULT_UNROLL_N 4
554 #define CGEMM3M_DEFAULT_UNROLL_M 8
555 #define ZGEMM3M_DEFAULT_UNROLL_N 4
556 #define ZGEMM3M_DEFAULT_UNROLL_M 4
557 #define GEMV_UNROLL 8
560 #if defined(ARCH_X86_64)
561 #define SGEMM_DEFAULT_P 768
562 #define DGEMM_DEFAULT_P 576
563 #define ZGEMM_DEFAULT_P 288
564 #define CGEMM_DEFAULT_P 576
566 #define SGEMM_DEFAULT_P 448
567 #define DGEMM_DEFAULT_P 480
568 #define ZGEMM_DEFAULT_P 112
569 #define CGEMM_DEFAULT_P 224
571 #define QGEMM_DEFAULT_P 112
572 #define XGEMM_DEFAULT_P 56
574 #if defined(ARCH_X86_64)
575 #define SGEMM_DEFAULT_Q 192
576 #define DGEMM_DEFAULT_Q 160
577 #define ZGEMM_DEFAULT_Q 160
578 #define CGEMM_DEFAULT_Q 160
580 #define SGEMM_DEFAULT_Q 224
581 #define DGEMM_DEFAULT_Q 224
582 #define ZGEMM_DEFAULT_Q 224
583 #define CGEMM_DEFAULT_Q 224
585 #define QGEMM_DEFAULT_Q 224
586 #define XGEMM_DEFAULT_Q 224
588 #define CGEMM3M_DEFAULT_P 448
589 #define ZGEMM3M_DEFAULT_P 224
590 #define XGEMM3M_DEFAULT_P 112
591 #define CGEMM3M_DEFAULT_Q 224
592 #define ZGEMM3M_DEFAULT_Q 224
593 #define XGEMM3M_DEFAULT_Q 224
594 #define CGEMM3M_DEFAULT_R 12288
595 #define ZGEMM3M_DEFAULT_R 12288
596 #define XGEMM3M_DEFAULT_R 12288
598 #define SGEMM_DEFAULT_R 12288
599 #define QGEMM_DEFAULT_R qgemm_r
600 #define DGEMM_DEFAULT_R 12288
601 #define CGEMM_DEFAULT_R cgemm_r
602 #define ZGEMM_DEFAULT_R zgemm_r
603 #define XGEMM_DEFAULT_R xgemm_r
606 #define HAVE_EXCLUSIVE_CACHE
608 #define GEMM_THREAD gemm_thread_mn
616 #define GEMM_DEFAULT_OFFSET_A 0
617 #define GEMM_DEFAULT_OFFSET_B 0
618 #define GEMM_DEFAULT_ALIGN (BLASLONG)0x03fffUL
622 #define SWITCH_RATIO 16
626 #define SGEMM_DEFAULT_UNROLL_M 4
627 #define DGEMM_DEFAULT_UNROLL_M 2
628 #define QGEMM_DEFAULT_UNROLL_M 2
629 #define CGEMM_DEFAULT_UNROLL_M 2
630 #define ZGEMM_DEFAULT_UNROLL_M 1
631 #define XGEMM_DEFAULT_UNROLL_M 1
633 #define SGEMM_DEFAULT_UNROLL_N 4
634 #define DGEMM_DEFAULT_UNROLL_N 4
635 #define QGEMM_DEFAULT_UNROLL_N 2
636 #define CGEMM_DEFAULT_UNROLL_N 2
637 #define ZGEMM_DEFAULT_UNROLL_N 2
638 #define XGEMM_DEFAULT_UNROLL_N 1
642 #define SGEMM_DEFAULT_UNROLL_M 8
643 #define DGEMM_DEFAULT_UNROLL_M 4
644 #define QGEMM_DEFAULT_UNROLL_M 2
645 #define CGEMM_DEFAULT_UNROLL_M 8
646 #define ZGEMM_DEFAULT_UNROLL_M 4
647 #define XGEMM_DEFAULT_UNROLL_M 1
649 #define SGEMM_DEFAULT_UNROLL_N 4
650 #define DGEMM_DEFAULT_UNROLL_N 8
651 #define QGEMM_DEFAULT_UNROLL_N 2
652 #define CGEMM_DEFAULT_UNROLL_N 2
653 #define ZGEMM_DEFAULT_UNROLL_N 2
654 #define XGEMM_DEFAULT_UNROLL_N 1
656 #define SGEMM_DEFAULT_UNROLL_MN 32
657 #define DGEMM_DEFAULT_UNROLL_MN 32
663 #define SGEMM_DEFAULT_P 512
664 #define SGEMM_DEFAULT_R sgemm_r
665 #define DGEMM_DEFAULT_P 512
666 #define DGEMM_DEFAULT_R dgemm_r
667 #define QGEMM_DEFAULT_P 504
668 #define QGEMM_DEFAULT_R qgemm_r
669 #define CGEMM_DEFAULT_P 128
670 #define CGEMM_DEFAULT_R 1024
671 #define ZGEMM_DEFAULT_P 512
672 #define ZGEMM_DEFAULT_R zgemm_r
673 #define XGEMM_DEFAULT_P 252
674 #define XGEMM_DEFAULT_R xgemm_r
675 #define SGEMM_DEFAULT_Q 256
676 #define DGEMM_DEFAULT_Q 256
677 #define QGEMM_DEFAULT_Q 128
678 #define CGEMM_DEFAULT_Q 256
679 #define ZGEMM_DEFAULT_Q 192
680 #define XGEMM_DEFAULT_Q 128
684 #define SGEMM_DEFAULT_P 320
685 #define DGEMM_DEFAULT_P 512
686 #define CGEMM_DEFAULT_P 256
687 #define ZGEMM_DEFAULT_P 192
690 #define SGEMM_DEFAULT_Q 320
691 #define DGEMM_DEFAULT_Q 128
693 #define SGEMM_DEFAULT_Q 320
694 #define DGEMM_DEFAULT_Q 256
696 #define CGEMM_DEFAULT_Q 256
697 #define ZGEMM_DEFAULT_Q 192
699 #define SGEMM_DEFAULT_R sgemm_r
700 #define DGEMM_DEFAULT_R 13824
701 #define CGEMM_DEFAULT_R cgemm_r
702 #define ZGEMM_DEFAULT_R zgemm_r
704 #define QGEMM_DEFAULT_Q 128
705 #define QGEMM_DEFAULT_P 504
706 #define QGEMM_DEFAULT_R qgemm_r
707 #define XGEMM_DEFAULT_P 252
708 #define XGEMM_DEFAULT_R xgemm_r
709 #define XGEMM_DEFAULT_Q 128
711 #define CGEMM3M_DEFAULT_UNROLL_N 4
712 #define CGEMM3M_DEFAULT_UNROLL_M 8
713 #define ZGEMM3M_DEFAULT_UNROLL_N 4
714 #define ZGEMM3M_DEFAULT_UNROLL_M 4
716 #define CGEMM3M_DEFAULT_P 320
717 #define ZGEMM3M_DEFAULT_P 256
718 #define XGEMM3M_DEFAULT_P 112
719 #define CGEMM3M_DEFAULT_Q 320
720 #define ZGEMM3M_DEFAULT_Q 256
721 #define XGEMM3M_DEFAULT_Q 224
722 #define CGEMM3M_DEFAULT_R 12288
723 #define ZGEMM3M_DEFAULT_R 12288
724 #define XGEMM3M_DEFAULT_R 12288
735 #define GEMM_DEFAULT_OFFSET_A 0
736 #define GEMM_DEFAULT_OFFSET_B 384
737 #define GEMM_DEFAULT_ALIGN (BLASLONG)0x0ffffUL
739 #define SGEMM_DEFAULT_UNROLL_N 4
740 #define DGEMM_DEFAULT_UNROLL_N 4
741 #define QGEMM_DEFAULT_UNROLL_N 2
742 #define CGEMM_DEFAULT_UNROLL_N 2
743 #define ZGEMM_DEFAULT_UNROLL_N 2
744 #define XGEMM_DEFAULT_UNROLL_N 1
746 #define SGEMM_DEFAULT_UNROLL_M 2
747 #define DGEMM_DEFAULT_UNROLL_M 1
748 #define QGEMM_DEFAULT_UNROLL_M 2
749 #define CGEMM_DEFAULT_UNROLL_M 1
750 #define ZGEMM_DEFAULT_UNROLL_M 1
751 #define XGEMM_DEFAULT_UNROLL_M 1
753 #define SGEMM_DEFAULT_R sgemm_r
754 #define DGEMM_DEFAULT_R dgemm_r
755 #define QGEMM_DEFAULT_R qgemm_r
756 #define CGEMM_DEFAULT_R cgemm_r
757 #define ZGEMM_DEFAULT_R zgemm_r
758 #define XGEMM_DEFAULT_R xgemm_r
760 #define SGEMM_DEFAULT_P 208
761 #define DGEMM_DEFAULT_P 104
762 #define QGEMM_DEFAULT_P 56
763 #define CGEMM_DEFAULT_P 104
764 #define ZGEMM_DEFAULT_P 56
765 #define XGEMM_DEFAULT_P 28
767 #define SGEMM_DEFAULT_Q 208
768 #define DGEMM_DEFAULT_Q 208
769 #define QGEMM_DEFAULT_Q 208
770 #define CGEMM_DEFAULT_Q 208
771 #define ZGEMM_DEFAULT_Q 208
772 #define XGEMM_DEFAULT_Q 208
775 #define HAVE_EXCLUSIVE_CACHE
783 #define GEMM_DEFAULT_OFFSET_A 0
784 #define GEMM_DEFAULT_OFFSET_B 256
785 #define GEMM_DEFAULT_ALIGN (BLASLONG)0x0ffffUL
787 #define SGEMM_DEFAULT_UNROLL_N 4
788 #define DGEMM_DEFAULT_UNROLL_N 4
789 #define QGEMM_DEFAULT_UNROLL_N 2
790 #define CGEMM_DEFAULT_UNROLL_N 2
791 #define ZGEMM_DEFAULT_UNROLL_N 2
792 #define XGEMM_DEFAULT_UNROLL_N 1
794 #define SGEMM_DEFAULT_UNROLL_M 2
795 #define DGEMM_DEFAULT_UNROLL_M 1
796 #define QGEMM_DEFAULT_UNROLL_M 2
797 #define CGEMM_DEFAULT_UNROLL_M 1
798 #define ZGEMM_DEFAULT_UNROLL_M 1
799 #define XGEMM_DEFAULT_UNROLL_M 1
801 #define SGEMM_DEFAULT_R sgemm_r
802 #define DGEMM_DEFAULT_R dgemm_r
803 #define QGEMM_DEFAULT_R qgemm_r
804 #define CGEMM_DEFAULT_R cgemm_r
805 #define ZGEMM_DEFAULT_R zgemm_r
806 #define XGEMM_DEFAULT_R xgemm_r
808 #define SGEMM_DEFAULT_P 128
809 #define DGEMM_DEFAULT_P 128
810 #define QGEMM_DEFAULT_P 128
811 #define CGEMM_DEFAULT_P 128
812 #define ZGEMM_DEFAULT_P 128
813 #define XGEMM_DEFAULT_P 128
815 #define SGEMM_DEFAULT_Q 512
816 #define DGEMM_DEFAULT_Q 256
817 #define QGEMM_DEFAULT_Q 256
818 #define CGEMM_DEFAULT_Q 256
819 #define ZGEMM_DEFAULT_Q 128
820 #define XGEMM_DEFAULT_Q 128
830 #define GEMM_DEFAULT_OFFSET_A 64
831 #define GEMM_DEFAULT_OFFSET_B 256
832 #define GEMM_DEFAULT_ALIGN (BLASLONG)0x01ffffUL
835 #define SGEMM_DEFAULT_UNROLL_N 4
836 #define DGEMM_DEFAULT_UNROLL_N 4
837 #define QGEMM_DEFAULT_UNROLL_N 2
838 #define CGEMM_DEFAULT_UNROLL_N 2
839 #define ZGEMM_DEFAULT_UNROLL_N 2
840 #define XGEMM_DEFAULT_UNROLL_N 1
842 #define SGEMM_DEFAULT_UNROLL_M 4
843 #define DGEMM_DEFAULT_UNROLL_M 2
844 #define QGEMM_DEFAULT_UNROLL_M 2
845 #define CGEMM_DEFAULT_UNROLL_M 2
846 #define ZGEMM_DEFAULT_UNROLL_M 1
847 #define XGEMM_DEFAULT_UNROLL_M 1
849 #define SGEMM_DEFAULT_UNROLL_N 8
850 #define DGEMM_DEFAULT_UNROLL_N 4
851 #define QGEMM_DEFAULT_UNROLL_N 2
852 #define CGEMM_DEFAULT_UNROLL_N 4
853 #define ZGEMM_DEFAULT_UNROLL_N 2
854 #define XGEMM_DEFAULT_UNROLL_N 1
856 #define SGEMM_DEFAULT_UNROLL_M 4
857 #define DGEMM_DEFAULT_UNROLL_M 4
858 #define QGEMM_DEFAULT_UNROLL_M 2
859 #define CGEMM_DEFAULT_UNROLL_M 2
860 #define ZGEMM_DEFAULT_UNROLL_M 2
861 #define XGEMM_DEFAULT_UNROLL_M 1
864 #define SGEMM_DEFAULT_P 288
865 #define DGEMM_DEFAULT_P 288
866 #define QGEMM_DEFAULT_P 288
867 #define CGEMM_DEFAULT_P 288
868 #define ZGEMM_DEFAULT_P 288
869 #define XGEMM_DEFAULT_P 288
871 #define SGEMM_DEFAULT_R sgemm_r
872 #define DGEMM_DEFAULT_R dgemm_r
873 #define QGEMM_DEFAULT_R qgemm_r
874 #define CGEMM_DEFAULT_R cgemm_r
875 #define ZGEMM_DEFAULT_R zgemm_r
876 #define XGEMM_DEFAULT_R xgemm_r
878 #define SGEMM_DEFAULT_Q 256
879 #define DGEMM_DEFAULT_Q 128
880 #define QGEMM_DEFAULT_Q 64
881 #define CGEMM_DEFAULT_Q 128
882 #define ZGEMM_DEFAULT_Q 64
883 #define XGEMM_DEFAULT_Q 32
886 #define HAVE_EXCLUSIVE_CACHE
890 #if defined(PENTIUM) || defined(PENTIUM2) || defined(PENTIUM3)
899 #define GEMM_DEFAULT_OFFSET_A 0
900 #define GEMM_DEFAULT_OFFSET_B 0
901 #define GEMM_DEFAULT_ALIGN (BLASLONG)0x0ffffUL
904 #define SGEMM_DEFAULT_UNROLL_M 8
905 #define CGEMM_DEFAULT_UNROLL_M 4
907 #define SGEMM_DEFAULT_UNROLL_M 4
908 #define CGEMM_DEFAULT_UNROLL_M 2
910 #define DGEMM_DEFAULT_UNROLL_M 2
911 #define SGEMM_DEFAULT_UNROLL_N 2
912 #define DGEMM_DEFAULT_UNROLL_N 2
913 #define QGEMM_DEFAULT_UNROLL_M 2
914 #define QGEMM_DEFAULT_UNROLL_N 2
915 #define CGEMM_DEFAULT_UNROLL_N 1
916 #define ZGEMM_DEFAULT_UNROLL_M 1
917 #define ZGEMM_DEFAULT_UNROLL_N 1
918 #define XGEMM_DEFAULT_UNROLL_M 1
919 #define XGEMM_DEFAULT_UNROLL_N 1
921 #define SGEMM_DEFAULT_P sgemm_p
922 #define SGEMM_DEFAULT_Q 256
923 #define SGEMM_DEFAULT_R sgemm_r
925 #define DGEMM_DEFAULT_P dgemm_p
926 #define DGEMM_DEFAULT_Q 256
927 #define DGEMM_DEFAULT_R dgemm_r
929 #define QGEMM_DEFAULT_P qgemm_p
930 #define QGEMM_DEFAULT_Q 256
931 #define QGEMM_DEFAULT_R qgemm_r
933 #define CGEMM_DEFAULT_P cgemm_p
934 #define CGEMM_DEFAULT_Q 256
935 #define CGEMM_DEFAULT_R cgemm_r
937 #define ZGEMM_DEFAULT_P zgemm_p
938 #define ZGEMM_DEFAULT_Q 256
939 #define ZGEMM_DEFAULT_R zgemm_r
941 #define XGEMM_DEFAULT_P xgemm_p
942 #define XGEMM_DEFAULT_Q 256
943 #define XGEMM_DEFAULT_R xgemm_r
954 #define GEMM_DEFAULT_OFFSET_A 0
955 #define GEMM_DEFAULT_OFFSET_B 0
956 #define GEMM_DEFAULT_ALIGN (BLASLONG)0x0ffffUL
959 #define SGEMM_DEFAULT_UNROLL_M 4
960 #define SGEMM_DEFAULT_UNROLL_N 4
961 #define DGEMM_DEFAULT_UNROLL_M 2
962 #define DGEMM_DEFAULT_UNROLL_N 4
963 #define QGEMM_DEFAULT_UNROLL_M 2
964 #define QGEMM_DEFAULT_UNROLL_N 2
965 #define CGEMM_DEFAULT_UNROLL_M 2
966 #define CGEMM_DEFAULT_UNROLL_N 2
967 #define ZGEMM_DEFAULT_UNROLL_M 1
968 #define ZGEMM_DEFAULT_UNROLL_N 2
969 #define XGEMM_DEFAULT_UNROLL_M 1
970 #define XGEMM_DEFAULT_UNROLL_N 1
972 #define SGEMM_DEFAULT_UNROLL_M 8
973 #define SGEMM_DEFAULT_UNROLL_N 2
974 #define DGEMM_DEFAULT_UNROLL_M 2
975 #define DGEMM_DEFAULT_UNROLL_N 2
976 #define QGEMM_DEFAULT_UNROLL_M 2
977 #define QGEMM_DEFAULT_UNROLL_N 2
978 #define CGEMM_DEFAULT_UNROLL_M 4
979 #define CGEMM_DEFAULT_UNROLL_N 1
980 #define ZGEMM_DEFAULT_UNROLL_M 1
981 #define ZGEMM_DEFAULT_UNROLL_N 1
982 #define XGEMM_DEFAULT_UNROLL_M 1
983 #define XGEMM_DEFAULT_UNROLL_N 1
987 #define SGEMM_DEFAULT_P sgemm_p
988 #define SGEMM_DEFAULT_Q 256
989 #define SGEMM_DEFAULT_R sgemm_r
991 #define DGEMM_DEFAULT_P dgemm_p
992 #define DGEMM_DEFAULT_Q 256
993 #define DGEMM_DEFAULT_R dgemm_r
995 #define QGEMM_DEFAULT_P qgemm_p
996 #define QGEMM_DEFAULT_Q 256
997 #define QGEMM_DEFAULT_R qgemm_r
999 #define CGEMM_DEFAULT_P cgemm_p
1000 #define CGEMM_DEFAULT_Q 256
1001 #define CGEMM_DEFAULT_R cgemm_r
1003 #define ZGEMM_DEFAULT_P zgemm_p
1004 #define ZGEMM_DEFAULT_Q 256
1005 #define ZGEMM_DEFAULT_R zgemm_r
1007 #define XGEMM_DEFAULT_P xgemm_p
1008 #define XGEMM_DEFAULT_Q 256
1009 #define XGEMM_DEFAULT_R xgemm_r
1014 #ifdef CORE_NORTHWOOD
1019 #define GEMM_DEFAULT_OFFSET_A 0
1020 #define GEMM_DEFAULT_OFFSET_B 32
1022 #define GEMM_DEFAULT_ALIGN (BLASLONG)0x0ffffUL
1026 #define SGEMM_DEFAULT_UNROLL_M 8
1027 #define DGEMM_DEFAULT_UNROLL_M 4
1028 #define QGEMM_DEFAULT_UNROLL_M 2
1029 #define CGEMM_DEFAULT_UNROLL_M 4
1030 #define ZGEMM_DEFAULT_UNROLL_M 2
1031 #define XGEMM_DEFAULT_UNROLL_M 1
1033 #define SGEMM_DEFAULT_UNROLL_N 2
1034 #define DGEMM_DEFAULT_UNROLL_N 2
1035 #define QGEMM_DEFAULT_UNROLL_N 2
1036 #define CGEMM_DEFAULT_UNROLL_N 1
1037 #define ZGEMM_DEFAULT_UNROLL_N 1
1038 #define XGEMM_DEFAULT_UNROLL_N 1
1040 #define SGEMM_DEFAULT_P sgemm_p
1041 #define SGEMM_DEFAULT_R sgemm_r
1043 #define DGEMM_DEFAULT_P dgemm_p
1044 #define DGEMM_DEFAULT_R dgemm_r
1046 #define QGEMM_DEFAULT_P qgemm_p
1047 #define QGEMM_DEFAULT_R qgemm_r
1049 #define CGEMM_DEFAULT_P cgemm_p
1050 #define CGEMM_DEFAULT_R cgemm_r
1052 #define ZGEMM_DEFAULT_P zgemm_p
1053 #define ZGEMM_DEFAULT_R zgemm_r
1055 #define XGEMM_DEFAULT_P xgemm_p
1056 #define XGEMM_DEFAULT_R xgemm_r
1058 #define SGEMM_DEFAULT_Q 128
1059 #define DGEMM_DEFAULT_Q 128
1060 #define QGEMM_DEFAULT_Q 128
1061 #define CGEMM_DEFAULT_Q 128
1062 #define ZGEMM_DEFAULT_Q 128
1063 #define XGEMM_DEFAULT_Q 128
1066 #ifdef CORE_PRESCOTT
1072 #define GEMM_DEFAULT_OFFSET_A 128
1073 #define GEMM_DEFAULT_OFFSET_B 192
1075 #define GEMM_DEFAULT_OFFSET_A 0
1076 #define GEMM_DEFAULT_OFFSET_B 256
1079 #define GEMM_DEFAULT_ALIGN (BLASLONG)0x0ffffUL
1084 #define SGEMM_DEFAULT_UNROLL_M 4
1085 #define DGEMM_DEFAULT_UNROLL_M 2
1086 #define QGEMM_DEFAULT_UNROLL_M 2
1087 #define CGEMM_DEFAULT_UNROLL_M 2
1088 #define ZGEMM_DEFAULT_UNROLL_M 1
1089 #define XGEMM_DEFAULT_UNROLL_M 1
1091 #define SGEMM_DEFAULT_UNROLL_M 8
1092 #define DGEMM_DEFAULT_UNROLL_M 4
1093 #define QGEMM_DEFAULT_UNROLL_M 2
1094 #define CGEMM_DEFAULT_UNROLL_M 4
1095 #define ZGEMM_DEFAULT_UNROLL_M 2
1096 #define XGEMM_DEFAULT_UNROLL_M 1
1099 #define SGEMM_DEFAULT_UNROLL_N 4
1100 #define DGEMM_DEFAULT_UNROLL_N 4
1101 #define QGEMM_DEFAULT_UNROLL_N 2
1102 #define CGEMM_DEFAULT_UNROLL_N 2
1103 #define ZGEMM_DEFAULT_UNROLL_N 2
1104 #define XGEMM_DEFAULT_UNROLL_N 1
1106 #define SGEMM_DEFAULT_P sgemm_p
1107 #define SGEMM_DEFAULT_R sgemm_r
1109 #define DGEMM_DEFAULT_P dgemm_p
1110 #define DGEMM_DEFAULT_R dgemm_r
1112 #define QGEMM_DEFAULT_P qgemm_p
1113 #define QGEMM_DEFAULT_R qgemm_r
1115 #define CGEMM_DEFAULT_P cgemm_p
1116 #define CGEMM_DEFAULT_R cgemm_r
1118 #define ZGEMM_DEFAULT_P zgemm_p
1119 #define ZGEMM_DEFAULT_R zgemm_r
1121 #define XGEMM_DEFAULT_P xgemm_p
1122 #define XGEMM_DEFAULT_R xgemm_r
1124 #define SGEMM_DEFAULT_Q 128
1125 #define DGEMM_DEFAULT_Q 128
1126 #define QGEMM_DEFAULT_Q 128
1127 #define CGEMM_DEFAULT_Q 128
1128 #define ZGEMM_DEFAULT_Q 128
1129 #define XGEMM_DEFAULT_Q 128
1137 #define GEMM_DEFAULT_OFFSET_A 448
1138 #define GEMM_DEFAULT_OFFSET_B 128
1139 #define GEMM_DEFAULT_ALIGN (BLASLONG)0x03fffUL
1143 #define SWITCH_RATIO 4
1146 #define SGEMM_DEFAULT_UNROLL_M 8
1147 #define DGEMM_DEFAULT_UNROLL_M 4
1148 #define QGEMM_DEFAULT_UNROLL_M 2
1149 #define CGEMM_DEFAULT_UNROLL_M 4
1150 #define ZGEMM_DEFAULT_UNROLL_M 2
1151 #define XGEMM_DEFAULT_UNROLL_M 1
1153 #define SGEMM_DEFAULT_UNROLL_N 2
1154 #define DGEMM_DEFAULT_UNROLL_N 2
1155 #define QGEMM_DEFAULT_UNROLL_N 2
1156 #define CGEMM_DEFAULT_UNROLL_N 1
1157 #define ZGEMM_DEFAULT_UNROLL_N 1
1158 #define XGEMM_DEFAULT_UNROLL_N 1
1160 #define MASK(a, b) ((((a) + (b) - 1) / (b)) * (b))
1163 #define SGEMM_DEFAULT_UNROLL_M 8
1164 #define DGEMM_DEFAULT_UNROLL_M 4
1165 #define QGEMM_DEFAULT_UNROLL_M 2
1166 #define CGEMM_DEFAULT_UNROLL_M 4
1167 #define ZGEMM_DEFAULT_UNROLL_M 2
1168 #define XGEMM_DEFAULT_UNROLL_M 1
1170 #define SGEMM_DEFAULT_UNROLL_N 4
1171 #define DGEMM_DEFAULT_UNROLL_N 4
1172 #define QGEMM_DEFAULT_UNROLL_N 2
1173 #define CGEMM_DEFAULT_UNROLL_N 2
1174 #define ZGEMM_DEFAULT_UNROLL_N 2
1175 #define XGEMM_DEFAULT_UNROLL_N 1
1178 #define SGEMM_DEFAULT_P sgemm_p
1179 #define SGEMM_DEFAULT_R sgemm_r
1181 #define DGEMM_DEFAULT_P dgemm_p
1182 #define DGEMM_DEFAULT_R dgemm_r
1184 #define QGEMM_DEFAULT_P qgemm_p
1185 #define QGEMM_DEFAULT_R qgemm_r
1187 #define CGEMM_DEFAULT_P cgemm_p
1188 #define CGEMM_DEFAULT_R cgemm_r
1190 #define ZGEMM_DEFAULT_P zgemm_p
1191 #define ZGEMM_DEFAULT_R zgemm_r
1193 #define XGEMM_DEFAULT_P xgemm_p
1194 #define XGEMM_DEFAULT_R xgemm_r
1196 #define SGEMM_DEFAULT_Q 256
1197 #define DGEMM_DEFAULT_Q 256
1198 #define QGEMM_DEFAULT_Q 256
1199 #define CGEMM_DEFAULT_Q 256
1200 #define ZGEMM_DEFAULT_Q 256
1201 #define XGEMM_DEFAULT_Q 256
1210 #define GEMM_DEFAULT_OFFSET_A 128
1211 #define GEMM_DEFAULT_OFFSET_B 0
1212 #define GEMM_DEFAULT_ALIGN (BLASLONG)0x03fffUL
1216 #define SWITCH_RATIO 4
1219 #define SGEMM_DEFAULT_UNROLL_M 4
1220 #define DGEMM_DEFAULT_UNROLL_M 2
1221 #define QGEMM_DEFAULT_UNROLL_M 2
1222 #define CGEMM_DEFAULT_UNROLL_M 2
1223 #define ZGEMM_DEFAULT_UNROLL_M 1
1224 #define XGEMM_DEFAULT_UNROLL_M 1
1226 #define SGEMM_DEFAULT_UNROLL_N 4
1227 #define DGEMM_DEFAULT_UNROLL_N 4
1228 #define QGEMM_DEFAULT_UNROLL_N 2
1229 #define CGEMM_DEFAULT_UNROLL_N 2
1230 #define ZGEMM_DEFAULT_UNROLL_N 2
1231 #define XGEMM_DEFAULT_UNROLL_N 1
1233 #define SGEMM_DEFAULT_UNROLL_M 8
1234 #define DGEMM_DEFAULT_UNROLL_M 4
1235 #define QGEMM_DEFAULT_UNROLL_M 2
1236 #define CGEMM_DEFAULT_UNROLL_M 4
1237 #define ZGEMM_DEFAULT_UNROLL_M 2
1238 #define XGEMM_DEFAULT_UNROLL_M 1
1240 #define SGEMM_DEFAULT_UNROLL_N 4
1241 #define DGEMM_DEFAULT_UNROLL_N 4
1242 #define QGEMM_DEFAULT_UNROLL_N 2
1243 #define CGEMM_DEFAULT_UNROLL_N 2
1244 #define ZGEMM_DEFAULT_UNROLL_N 2
1245 #define XGEMM_DEFAULT_UNROLL_N 1
1248 #define SGEMM_DEFAULT_P sgemm_p
1249 #define SGEMM_DEFAULT_R sgemm_r
1251 #define DGEMM_DEFAULT_P dgemm_p
1252 #define DGEMM_DEFAULT_R dgemm_r
1254 #define QGEMM_DEFAULT_P qgemm_p
1255 #define QGEMM_DEFAULT_R qgemm_r
1257 #define CGEMM_DEFAULT_P cgemm_p
1258 #define CGEMM_DEFAULT_R cgemm_r
1260 #define ZGEMM_DEFAULT_P zgemm_p
1261 #define ZGEMM_DEFAULT_R zgemm_r
1263 #define XGEMM_DEFAULT_P xgemm_p
1264 #define XGEMM_DEFAULT_R xgemm_r
1266 #define SGEMM_DEFAULT_Q 512
1267 #define DGEMM_DEFAULT_Q 256
1268 #define QGEMM_DEFAULT_Q 128
1269 #define CGEMM_DEFAULT_Q 512
1270 #define ZGEMM_DEFAULT_Q 256
1271 #define XGEMM_DEFAULT_Q 128
1273 #define GETRF_FACTOR 0.75
1281 #define GEMM_DEFAULT_OFFSET_A 128
1282 #define GEMM_DEFAULT_OFFSET_B 0
1283 #define GEMM_DEFAULT_ALIGN (BLASLONG)0x03fffUL
1287 #define SWITCH_RATIO 4
1290 #define SGEMM_DEFAULT_UNROLL_M 4
1291 #define DGEMM_DEFAULT_UNROLL_M 2
1292 #define QGEMM_DEFAULT_UNROLL_M 2
1293 #define CGEMM_DEFAULT_UNROLL_M 2
1294 #define ZGEMM_DEFAULT_UNROLL_M 1
1295 #define XGEMM_DEFAULT_UNROLL_M 1
1297 #define SGEMM_DEFAULT_UNROLL_N 4
1298 #define DGEMM_DEFAULT_UNROLL_N 4
1299 #define QGEMM_DEFAULT_UNROLL_N 2
1300 #define CGEMM_DEFAULT_UNROLL_N 2
1301 #define ZGEMM_DEFAULT_UNROLL_N 2
1302 #define XGEMM_DEFAULT_UNROLL_N 1
1304 #define SGEMM_DEFAULT_UNROLL_M 8
1305 #define DGEMM_DEFAULT_UNROLL_M 4
1306 #define QGEMM_DEFAULT_UNROLL_M 2
1307 #define CGEMM_DEFAULT_UNROLL_M 4
1308 #define ZGEMM_DEFAULT_UNROLL_M 2
1309 #define XGEMM_DEFAULT_UNROLL_M 1
1311 #define SGEMM_DEFAULT_UNROLL_N 4
1312 #define DGEMM_DEFAULT_UNROLL_N 4
1313 #define QGEMM_DEFAULT_UNROLL_N 2
1314 #define CGEMM_DEFAULT_UNROLL_N 2
1315 #define ZGEMM_DEFAULT_UNROLL_N 2
1316 #define XGEMM_DEFAULT_UNROLL_N 1
1319 #define SGEMM_DEFAULT_P sgemm_p
1320 #define SGEMM_DEFAULT_R sgemm_r
1322 #define DGEMM_DEFAULT_P dgemm_p
1323 #define DGEMM_DEFAULT_R dgemm_r
1325 #define QGEMM_DEFAULT_P qgemm_p
1326 #define QGEMM_DEFAULT_R qgemm_r
1328 #define CGEMM_DEFAULT_P cgemm_p
1329 #define CGEMM_DEFAULT_R cgemm_r
1331 #define ZGEMM_DEFAULT_P zgemm_p
1332 #define ZGEMM_DEFAULT_R zgemm_r
1334 #define XGEMM_DEFAULT_P xgemm_p
1335 #define XGEMM_DEFAULT_R xgemm_r
1337 #define SGEMM_DEFAULT_Q 768
1338 #define DGEMM_DEFAULT_Q 384
1339 #define QGEMM_DEFAULT_Q 192
1340 #define CGEMM_DEFAULT_Q 768
1341 #define ZGEMM_DEFAULT_Q 384
1342 #define XGEMM_DEFAULT_Q 192
1344 #define GETRF_FACTOR 0.75
1345 #define GEMM_THREAD gemm_thread_mn
1353 #define GEMM_DEFAULT_OFFSET_A 32
1354 #define GEMM_DEFAULT_OFFSET_B 0
1355 #define GEMM_DEFAULT_ALIGN (BLASLONG)0x03fffUL
1359 #define SWITCH_RATIO 4
1362 #define SGEMM_DEFAULT_UNROLL_M 4
1363 #define DGEMM_DEFAULT_UNROLL_M 2
1364 #define QGEMM_DEFAULT_UNROLL_M 2
1365 #define CGEMM_DEFAULT_UNROLL_M 2
1366 #define ZGEMM_DEFAULT_UNROLL_M 1
1367 #define XGEMM_DEFAULT_UNROLL_M 1
1369 #define SGEMM_DEFAULT_UNROLL_N 4
1370 #define DGEMM_DEFAULT_UNROLL_N 4
1371 #define QGEMM_DEFAULT_UNROLL_N 2
1372 #define CGEMM_DEFAULT_UNROLL_N 2
1373 #define ZGEMM_DEFAULT_UNROLL_N 2
1374 #define XGEMM_DEFAULT_UNROLL_N 1
1376 #define SGEMM_DEFAULT_UNROLL_M 4
1377 #define DGEMM_DEFAULT_UNROLL_M 2
1378 #define QGEMM_DEFAULT_UNROLL_M 2
1379 #define CGEMM_DEFAULT_UNROLL_M 2
1380 #define ZGEMM_DEFAULT_UNROLL_M 1
1381 #define XGEMM_DEFAULT_UNROLL_M 1
1383 #define SGEMM_DEFAULT_UNROLL_N 8
1384 #define DGEMM_DEFAULT_UNROLL_N 8
1385 #define QGEMM_DEFAULT_UNROLL_N 2
1386 #define CGEMM_DEFAULT_UNROLL_N 4
1387 #define ZGEMM_DEFAULT_UNROLL_N 4
1388 #define XGEMM_DEFAULT_UNROLL_N 1
1391 #define SGEMM_DEFAULT_P 504
1392 #define SGEMM_DEFAULT_R sgemm_r
1394 #define DGEMM_DEFAULT_P 504
1395 #define DGEMM_DEFAULT_R dgemm_r
1397 #define QGEMM_DEFAULT_P 504
1398 #define QGEMM_DEFAULT_R qgemm_r
1400 #define CGEMM_DEFAULT_P 252
1401 #define CGEMM_DEFAULT_R cgemm_r
1403 #define ZGEMM_DEFAULT_P 252
1404 #define ZGEMM_DEFAULT_R zgemm_r
1406 #define XGEMM_DEFAULT_P 252
1407 #define XGEMM_DEFAULT_R xgemm_r
1409 #define SGEMM_DEFAULT_Q 512
1410 #define DGEMM_DEFAULT_Q 256
1411 #define QGEMM_DEFAULT_Q 128
1412 #define CGEMM_DEFAULT_Q 512
1413 #define ZGEMM_DEFAULT_Q 256
1414 #define XGEMM_DEFAULT_Q 128
1416 #define GETRF_FACTOR 0.72
1426 #define GEMM_DEFAULT_OFFSET_A 0
1427 #define GEMM_DEFAULT_OFFSET_B 0
1428 #define GEMM_DEFAULT_ALIGN (BLASLONG)0x03fffUL
1432 #define SWITCH_RATIO 4
1435 #define SGEMM_DEFAULT_UNROLL_M 4
1436 #define DGEMM_DEFAULT_UNROLL_M 2
1437 #define QGEMM_DEFAULT_UNROLL_M 2
1438 #define CGEMM_DEFAULT_UNROLL_M 2
1439 #define ZGEMM_DEFAULT_UNROLL_M 1
1440 #define XGEMM_DEFAULT_UNROLL_M 1
1442 #define SGEMM_DEFAULT_UNROLL_N 4
1443 #define DGEMM_DEFAULT_UNROLL_N 4
1444 #define QGEMM_DEFAULT_UNROLL_N 2
1445 #define CGEMM_DEFAULT_UNROLL_N 2
1446 #define ZGEMM_DEFAULT_UNROLL_N 2
1447 #define XGEMM_DEFAULT_UNROLL_N 1
1449 #define SGEMM_DEFAULT_UNROLL_M 16
1450 #define DGEMM_DEFAULT_UNROLL_M 8
1451 #define QGEMM_DEFAULT_UNROLL_M 2
1452 #define CGEMM_DEFAULT_UNROLL_M 8
1453 #define ZGEMM_DEFAULT_UNROLL_M 1
1454 #define XGEMM_DEFAULT_UNROLL_M 1
1456 #define SGEMM_DEFAULT_UNROLL_N 4
1457 #define DGEMM_DEFAULT_UNROLL_N 4
1458 #define QGEMM_DEFAULT_UNROLL_N 2
1459 #define CGEMM_DEFAULT_UNROLL_N 2
1460 #define ZGEMM_DEFAULT_UNROLL_N 4
1461 #define XGEMM_DEFAULT_UNROLL_N 1
1464 #define SGEMM_DEFAULT_P 768
1465 #define SGEMM_DEFAULT_R sgemm_r
1466 /*#define SGEMM_DEFAULT_R 1024*/
1468 #define DGEMM_DEFAULT_P 512
1469 #define DGEMM_DEFAULT_R dgemm_r
1470 /*#define DGEMM_DEFAULT_R 1024*/
1472 #define QGEMM_DEFAULT_P 504
1473 #define QGEMM_DEFAULT_R qgemm_r
1475 #define CGEMM_DEFAULT_P 768
1476 #define CGEMM_DEFAULT_R cgemm_r
1477 /*#define CGEMM_DEFAULT_R 1024*/
1479 #define ZGEMM_DEFAULT_P 512
1480 #define ZGEMM_DEFAULT_R zgemm_r
1481 /*#define ZGEMM_DEFAULT_R 1024*/
1483 #define XGEMM_DEFAULT_P 252
1484 #define XGEMM_DEFAULT_R xgemm_r
1486 #define SGEMM_DEFAULT_Q 384
1487 #define DGEMM_DEFAULT_Q 256
1488 #define QGEMM_DEFAULT_Q 128
1489 #define CGEMM_DEFAULT_Q 512
1490 #define ZGEMM_DEFAULT_Q 192
1491 #define XGEMM_DEFAULT_Q 128
1493 #define CGEMM3M_DEFAULT_UNROLL_N 8
1494 #define CGEMM3M_DEFAULT_UNROLL_M 4
1495 #define ZGEMM3M_DEFAULT_UNROLL_N 8
1496 #define ZGEMM3M_DEFAULT_UNROLL_M 2
1498 #define CGEMM3M_DEFAULT_P 448
1499 #define ZGEMM3M_DEFAULT_P 224
1500 #define XGEMM3M_DEFAULT_P 112
1501 #define CGEMM3M_DEFAULT_Q 224
1502 #define ZGEMM3M_DEFAULT_Q 224
1503 #define XGEMM3M_DEFAULT_Q 224
1504 #define CGEMM3M_DEFAULT_R 12288
1505 #define ZGEMM3M_DEFAULT_R 12288
1506 #define XGEMM3M_DEFAULT_R 12288
1510 #define GETRF_FACTOR 0.72
1519 #define GEMM_DEFAULT_OFFSET_A 0
1520 #define GEMM_DEFAULT_OFFSET_B 0
1521 #define GEMM_DEFAULT_ALIGN (BLASLONG)0x03fffUL
1525 #if defined(XDOUBLE) || defined(DOUBLE)
1526 #define SWITCH_RATIO 4
1527 #define GEMM_PREFERED_SIZE 4
1529 #define SWITCH_RATIO 8
1530 #define GEMM_PREFERED_SIZE 8
1535 #define SGEMM_DEFAULT_UNROLL_M 4
1536 #define DGEMM_DEFAULT_UNROLL_M 2
1537 #define QGEMM_DEFAULT_UNROLL_M 2
1538 #define CGEMM_DEFAULT_UNROLL_M 2
1539 #define ZGEMM_DEFAULT_UNROLL_M 1
1540 #define XGEMM_DEFAULT_UNROLL_M 1
1542 #define SGEMM_DEFAULT_UNROLL_N 4
1543 #define DGEMM_DEFAULT_UNROLL_N 4
1544 #define QGEMM_DEFAULT_UNROLL_N 2
1545 #define CGEMM_DEFAULT_UNROLL_N 2
1546 #define ZGEMM_DEFAULT_UNROLL_N 2
1547 #define XGEMM_DEFAULT_UNROLL_N 1
1551 #define SGEMM_DEFAULT_UNROLL_M 8
1552 #define DGEMM_DEFAULT_UNROLL_M 4
1553 #define QGEMM_DEFAULT_UNROLL_M 2
1554 #define CGEMM_DEFAULT_UNROLL_M 8
1555 #define ZGEMM_DEFAULT_UNROLL_M 4
1556 #define XGEMM_DEFAULT_UNROLL_M 1
1558 #define SGEMM_DEFAULT_UNROLL_N 4
1559 #define DGEMM_DEFAULT_UNROLL_N 8
1560 #define QGEMM_DEFAULT_UNROLL_N 2
1561 #define CGEMM_DEFAULT_UNROLL_N 2
1562 #define ZGEMM_DEFAULT_UNROLL_N 2
1563 #define XGEMM_DEFAULT_UNROLL_N 1
1565 #define SGEMM_DEFAULT_UNROLL_MN 32
1566 #define DGEMM_DEFAULT_UNROLL_MN 32
1572 #define SGEMM_DEFAULT_P 512
1573 #define SGEMM_DEFAULT_R sgemm_r
1574 #define DGEMM_DEFAULT_P 512
1575 #define DGEMM_DEFAULT_R dgemm_r
1576 #define QGEMM_DEFAULT_P 504
1577 #define QGEMM_DEFAULT_R qgemm_r
1578 #define CGEMM_DEFAULT_P 128
1579 #define CGEMM_DEFAULT_R 1024
1580 #define ZGEMM_DEFAULT_P 512
1581 #define ZGEMM_DEFAULT_R zgemm_r
1582 #define XGEMM_DEFAULT_P 252
1583 #define XGEMM_DEFAULT_R xgemm_r
1584 #define SGEMM_DEFAULT_Q 256
1585 #define DGEMM_DEFAULT_Q 256
1586 #define QGEMM_DEFAULT_Q 128
1587 #define CGEMM_DEFAULT_Q 256
1588 #define ZGEMM_DEFAULT_Q 192
1589 #define XGEMM_DEFAULT_Q 128
1593 #define SGEMM_DEFAULT_P 320
1594 #define DGEMM_DEFAULT_P 512
1595 #define CGEMM_DEFAULT_P 256
1596 #define ZGEMM_DEFAULT_P 192
1599 #define SGEMM_DEFAULT_Q 320
1600 #define DGEMM_DEFAULT_Q 128
1602 #define SGEMM_DEFAULT_Q 320
1603 #define DGEMM_DEFAULT_Q 256
1605 #define CGEMM_DEFAULT_Q 256
1606 #define ZGEMM_DEFAULT_Q 192
1608 #define SGEMM_DEFAULT_R sgemm_r
1609 #define DGEMM_DEFAULT_R 13824
1610 #define CGEMM_DEFAULT_R cgemm_r
1611 #define ZGEMM_DEFAULT_R zgemm_r
1613 #define QGEMM_DEFAULT_Q 128
1614 #define QGEMM_DEFAULT_P 504
1615 #define QGEMM_DEFAULT_R qgemm_r
1616 #define XGEMM_DEFAULT_P 252
1617 #define XGEMM_DEFAULT_R xgemm_r
1618 #define XGEMM_DEFAULT_Q 128
1620 #define CGEMM3M_DEFAULT_UNROLL_N 4
1621 #define CGEMM3M_DEFAULT_UNROLL_M 8
1622 #define ZGEMM3M_DEFAULT_UNROLL_N 4
1623 #define ZGEMM3M_DEFAULT_UNROLL_M 4
1625 #define CGEMM3M_DEFAULT_P 320
1626 #define ZGEMM3M_DEFAULT_P 256
1627 #define XGEMM3M_DEFAULT_P 112
1628 #define CGEMM3M_DEFAULT_Q 320
1629 #define ZGEMM3M_DEFAULT_Q 256
1630 #define XGEMM3M_DEFAULT_Q 224
1631 #define CGEMM3M_DEFAULT_R 12288
1632 #define ZGEMM3M_DEFAULT_R 12288
1633 #define XGEMM3M_DEFAULT_R 12288
1645 #define GEMM_DEFAULT_OFFSET_A 0
1646 #define GEMM_DEFAULT_OFFSET_B 0
1647 #define GEMM_DEFAULT_ALIGN (BLASLONG)0x03fffUL
1651 #if defined(XDOUBLE) || defined(DOUBLE)
1652 #define SWITCH_RATIO 8
1653 #define GEMM_PREFERED_SIZE 8
1655 #define SWITCH_RATIO 16
1656 #define GEMM_PREFERED_SIZE 16
1658 #define USE_SGEMM_KERNEL_DIRECT 1
1662 #define SGEMM_DEFAULT_UNROLL_M 4
1663 #define DGEMM_DEFAULT_UNROLL_M 2
1664 #define QGEMM_DEFAULT_UNROLL_M 2
1665 #define CGEMM_DEFAULT_UNROLL_M 2
1666 #define ZGEMM_DEFAULT_UNROLL_M 1
1667 #define XGEMM_DEFAULT_UNROLL_M 1
1669 #define SGEMM_DEFAULT_UNROLL_N 4
1670 #define DGEMM_DEFAULT_UNROLL_N 4
1671 #define QGEMM_DEFAULT_UNROLL_N 2
1672 #define CGEMM_DEFAULT_UNROLL_N 2
1673 #define ZGEMM_DEFAULT_UNROLL_N 2
1674 #define XGEMM_DEFAULT_UNROLL_N 1
1678 #define SGEMM_DEFAULT_UNROLL_M 16
1679 #define DGEMM_DEFAULT_UNROLL_M 16
1680 #define QGEMM_DEFAULT_UNROLL_M 2
1681 #define CGEMM_DEFAULT_UNROLL_M 8
1682 #define ZGEMM_DEFAULT_UNROLL_M 4
1683 #define XGEMM_DEFAULT_UNROLL_M 1
1685 #define SGEMM_DEFAULT_UNROLL_N 4
1686 #define DGEMM_DEFAULT_UNROLL_N 2
1687 #define QGEMM_DEFAULT_UNROLL_N 2
1688 #define CGEMM_DEFAULT_UNROLL_N 2
1689 #define ZGEMM_DEFAULT_UNROLL_N 2
1690 #define XGEMM_DEFAULT_UNROLL_N 1
1692 #define SGEMM_DEFAULT_UNROLL_MN 32
1693 #define DGEMM_DEFAULT_UNROLL_MN 32
1698 #define SGEMM_DEFAULT_P 512
1699 #define SGEMM_DEFAULT_R sgemm_r
1700 #define DGEMM_DEFAULT_P 512
1701 #define DGEMM_DEFAULT_R dgemm_r
1702 #define QGEMM_DEFAULT_P 504
1703 #define QGEMM_DEFAULT_R qgemm_r
1704 #define CGEMM_DEFAULT_P 128
1705 #define CGEMM_DEFAULT_R 1024
1706 #define ZGEMM_DEFAULT_P 512
1707 #define ZGEMM_DEFAULT_R zgemm_r
1708 #define XGEMM_DEFAULT_P 252
1709 #define XGEMM_DEFAULT_R xgemm_r
1710 #define SGEMM_DEFAULT_Q 256
1711 #define DGEMM_DEFAULT_Q 256
1712 #define QGEMM_DEFAULT_Q 128
1713 #define CGEMM_DEFAULT_Q 256
1714 #define ZGEMM_DEFAULT_Q 192
1715 #define XGEMM_DEFAULT_Q 128
1719 #define SGEMM_DEFAULT_P 448
1720 #define DGEMM_DEFAULT_P 192
1721 #define CGEMM_DEFAULT_P 384
1722 #define ZGEMM_DEFAULT_P 256
1724 #define SGEMM_DEFAULT_Q 448
1725 #define DGEMM_DEFAULT_Q 384
1726 #define CGEMM_DEFAULT_Q 192
1727 #define ZGEMM_DEFAULT_Q 128
1729 #define SGEMM_DEFAULT_R sgemm_r
1730 #define DGEMM_DEFAULT_R 8640
1731 #define CGEMM_DEFAULT_R cgemm_r
1732 #define ZGEMM_DEFAULT_R zgemm_r
1734 #define QGEMM_DEFAULT_Q 128
1735 #define QGEMM_DEFAULT_P 504
1736 #define QGEMM_DEFAULT_R qgemm_r
1737 #define XGEMM_DEFAULT_P 252
1738 #define XGEMM_DEFAULT_R xgemm_r
1739 #define XGEMM_DEFAULT_Q 128
1741 #define CGEMM3M_DEFAULT_UNROLL_N 4
1742 #define CGEMM3M_DEFAULT_UNROLL_M 8
1743 #define ZGEMM3M_DEFAULT_UNROLL_N 4
1744 #define ZGEMM3M_DEFAULT_UNROLL_M 4
1746 #define CGEMM3M_DEFAULT_P 320
1747 #define ZGEMM3M_DEFAULT_P 256
1748 #define XGEMM3M_DEFAULT_P 112
1749 #define CGEMM3M_DEFAULT_Q 320
1750 #define ZGEMM3M_DEFAULT_Q 256
1751 #define XGEMM3M_DEFAULT_Q 224
1752 #define CGEMM3M_DEFAULT_R 12288
1753 #define ZGEMM3M_DEFAULT_R 12288
1754 #define XGEMM3M_DEFAULT_R 12288
1766 #define GEMM_DEFAULT_OFFSET_A 0
1767 #define GEMM_DEFAULT_OFFSET_B 0
1768 #define GEMM_DEFAULT_ALIGN 0x03fffUL
1772 #if defined(XDOUBLE) || defined(DOUBLE)
1773 #define SWITCH_RATIO 8
1774 #define GEMM_PREFERED_SIZE 8
1776 #define SWITCH_RATIO 16
1777 #define GEMM_PREFERED_SIZE 16
1779 #define USE_SGEMM_KERNEL_DIRECT 1
1783 #define SGEMM_DEFAULT_UNROLL_M 4
1784 #define DGEMM_DEFAULT_UNROLL_M 2
1785 #define QGEMM_DEFAULT_UNROLL_M 2
1786 #define CGEMM_DEFAULT_UNROLL_M 2
1787 #define ZGEMM_DEFAULT_UNROLL_M 1
1788 #define XGEMM_DEFAULT_UNROLL_M 1
1790 #define SGEMM_DEFAULT_UNROLL_N 4
1791 #define DGEMM_DEFAULT_UNROLL_N 4
1792 #define QGEMM_DEFAULT_UNROLL_N 2
1793 #define CGEMM_DEFAULT_UNROLL_N 2
1794 #define ZGEMM_DEFAULT_UNROLL_N 2
1795 #define XGEMM_DEFAULT_UNROLL_N 1
1799 #define SGEMM_DEFAULT_UNROLL_M 16
1800 #define DGEMM_DEFAULT_UNROLL_M 16
1801 #define QGEMM_DEFAULT_UNROLL_M 2
1802 #define CGEMM_DEFAULT_UNROLL_M 8
1803 #define ZGEMM_DEFAULT_UNROLL_M 4
1804 #define XGEMM_DEFAULT_UNROLL_M 1
1806 #define SGEMM_DEFAULT_UNROLL_N 4
1807 #define DGEMM_DEFAULT_UNROLL_N 2
1808 #define QGEMM_DEFAULT_UNROLL_N 2
1809 #define CGEMM_DEFAULT_UNROLL_N 2
1810 #define ZGEMM_DEFAULT_UNROLL_N 2
1811 #define XGEMM_DEFAULT_UNROLL_N 1
1813 #define SGEMM_DEFAULT_UNROLL_MN 32
1814 #define DGEMM_DEFAULT_UNROLL_MN 32
1819 #define SGEMM_DEFAULT_P 512
1820 #define SGEMM_DEFAULT_R sgemm_r
1821 #define DGEMM_DEFAULT_P 512
1822 #define DGEMM_DEFAULT_R dgemm_r
1823 #define QGEMM_DEFAULT_P 504
1824 #define QGEMM_DEFAULT_R qgemm_r
1825 #define CGEMM_DEFAULT_P 128
1826 #define CGEMM_DEFAULT_R 1024
1827 #define ZGEMM_DEFAULT_P 512
1828 #define ZGEMM_DEFAULT_R zgemm_r
1829 #define XGEMM_DEFAULT_P 252
1830 #define XGEMM_DEFAULT_R xgemm_r
1831 #define SGEMM_DEFAULT_Q 256
1832 #define DGEMM_DEFAULT_Q 256
1833 #define QGEMM_DEFAULT_Q 128
1834 #define CGEMM_DEFAULT_Q 256
1835 #define ZGEMM_DEFAULT_Q 192
1836 #define XGEMM_DEFAULT_Q 128
1840 #define SGEMM_DEFAULT_P 640
1841 #define DGEMM_DEFAULT_P 192
1842 #define CGEMM_DEFAULT_P 384
1843 #define ZGEMM_DEFAULT_P 256
1845 #define SGEMM_DEFAULT_Q 320
1846 #define DGEMM_DEFAULT_Q 384
1847 #define CGEMM_DEFAULT_Q 192
1848 #define ZGEMM_DEFAULT_Q 128
1850 #define SGEMM_DEFAULT_R sgemm_r
1851 #define DGEMM_DEFAULT_R 8640
1852 #define CGEMM_DEFAULT_R cgemm_r
1853 #define ZGEMM_DEFAULT_R zgemm_r
1855 #define QGEMM_DEFAULT_Q 128
1856 #define QGEMM_DEFAULT_P 504
1857 #define QGEMM_DEFAULT_R qgemm_r
1858 #define XGEMM_DEFAULT_P 252
1859 #define XGEMM_DEFAULT_R xgemm_r
1860 #define XGEMM_DEFAULT_Q 128
1862 #define CGEMM3M_DEFAULT_UNROLL_N 4
1863 #define CGEMM3M_DEFAULT_UNROLL_M 8
1864 #define ZGEMM3M_DEFAULT_UNROLL_N 4
1865 #define ZGEMM3M_DEFAULT_UNROLL_M 4
1867 #define CGEMM3M_DEFAULT_P 320
1868 #define ZGEMM3M_DEFAULT_P 256
1869 #define XGEMM3M_DEFAULT_P 112
1870 #define CGEMM3M_DEFAULT_Q 320
1871 #define ZGEMM3M_DEFAULT_Q 256
1872 #define XGEMM3M_DEFAULT_Q 224
1873 #define CGEMM3M_DEFAULT_R 12288
1874 #define ZGEMM3M_DEFAULT_R 12288
1875 #define XGEMM3M_DEFAULT_R 12288
1886 #define GEMM_DEFAULT_OFFSET_A 64
1887 #define GEMM_DEFAULT_OFFSET_B 0
1888 #define GEMM_DEFAULT_ALIGN (BLASLONG)0x0ffffUL
1893 #define SGEMM_DEFAULT_UNROLL_M 4
1894 #define DGEMM_DEFAULT_UNROLL_M 2
1895 #define QGEMM_DEFAULT_UNROLL_M 2
1896 #define CGEMM_DEFAULT_UNROLL_M 2
1897 #define ZGEMM_DEFAULT_UNROLL_M 1
1898 #define XGEMM_DEFAULT_UNROLL_M 1
1900 #define SGEMM_DEFAULT_UNROLL_M 8
1901 #define DGEMM_DEFAULT_UNROLL_M 4
1902 #define QGEMM_DEFAULT_UNROLL_M 2
1903 #define CGEMM_DEFAULT_UNROLL_M 4
1904 #define ZGEMM_DEFAULT_UNROLL_M 2
1905 #define XGEMM_DEFAULT_UNROLL_M 1
1908 #define SGEMM_DEFAULT_UNROLL_N 4
1909 #define DGEMM_DEFAULT_UNROLL_N 2
1910 #define QGEMM_DEFAULT_UNROLL_N 2
1911 #define CGEMM_DEFAULT_UNROLL_N 2
1912 #define ZGEMM_DEFAULT_UNROLL_N 1
1913 #define XGEMM_DEFAULT_UNROLL_N 1
1915 #define SGEMM_DEFAULT_P sgemm_p
1916 #define SGEMM_DEFAULT_R sgemm_r
1918 #define DGEMM_DEFAULT_P dgemm_p
1919 #define DGEMM_DEFAULT_R dgemm_r
1921 #define QGEMM_DEFAULT_P qgemm_p
1922 #define QGEMM_DEFAULT_R qgemm_r
1924 #define CGEMM_DEFAULT_P cgemm_p
1925 #define CGEMM_DEFAULT_R cgemm_r
1927 #define ZGEMM_DEFAULT_P zgemm_p
1928 #define ZGEMM_DEFAULT_R zgemm_r
1930 #define XGEMM_DEFAULT_P xgemm_p
1931 #define XGEMM_DEFAULT_R xgemm_r
1933 #define SGEMM_DEFAULT_Q 256
1934 #define DGEMM_DEFAULT_Q 256
1935 #define QGEMM_DEFAULT_Q 256
1936 #define CGEMM_DEFAULT_Q 256
1937 #define ZGEMM_DEFAULT_Q 256
1938 #define XGEMM_DEFAULT_Q 256
1948 #define GEMM_DEFAULT_OFFSET_A 0
1949 #define GEMM_DEFAULT_OFFSET_B 128
1950 #define GEMM_DEFAULT_ALIGN (BLASLONG)0x03fffUL
1952 #define SGEMM_DEFAULT_UNROLL_M 8
1953 #define SGEMM_DEFAULT_UNROLL_N 8
1954 #define DGEMM_DEFAULT_UNROLL_M 8
1955 #define DGEMM_DEFAULT_UNROLL_N 8
1956 #define QGEMM_DEFAULT_UNROLL_M 8
1957 #define QGEMM_DEFAULT_UNROLL_N 8
1958 #define CGEMM_DEFAULT_UNROLL_M 4
1959 #define CGEMM_DEFAULT_UNROLL_N 4
1960 #define ZGEMM_DEFAULT_UNROLL_M 4
1961 #define ZGEMM_DEFAULT_UNROLL_N 4
1962 #define XGEMM_DEFAULT_UNROLL_M 4
1963 #define XGEMM_DEFAULT_UNROLL_N 4
1965 #define SGEMM_DEFAULT_P sgemm_p
1966 #define DGEMM_DEFAULT_P dgemm_p
1967 #define QGEMM_DEFAULT_P qgemm_p
1968 #define CGEMM_DEFAULT_P cgemm_p
1969 #define ZGEMM_DEFAULT_P zgemm_p
1970 #define XGEMM_DEFAULT_P xgemm_p
1972 #define SGEMM_DEFAULT_Q 1024
1973 #define DGEMM_DEFAULT_Q 1024
1974 #define QGEMM_DEFAULT_Q 1024
1975 #define CGEMM_DEFAULT_Q 1024
1976 #define ZGEMM_DEFAULT_Q 1024
1977 #define XGEMM_DEFAULT_Q 1024
1979 #define SGEMM_DEFAULT_R sgemm_r
1980 #define DGEMM_DEFAULT_R dgemm_r
1981 #define QGEMM_DEFAULT_R qgemm_r
1982 #define CGEMM_DEFAULT_R cgemm_r
1983 #define ZGEMM_DEFAULT_R zgemm_r
1984 #define XGEMM_DEFAULT_R xgemm_r
1988 #define GETRF_FACTOR 0.65
1992 #if defined(EV4) || defined(EV5) || defined(EV6)
2002 #define GEMM_DEFAULT_OFFSET_A 512
2003 #define GEMM_DEFAULT_OFFSET_B 512
2004 #define GEMM_DEFAULT_ALIGN (BLASLONG)0x0ffffUL
2006 #define SGEMM_DEFAULT_UNROLL_M 4
2007 #define SGEMM_DEFAULT_UNROLL_N 4
2008 #define DGEMM_DEFAULT_UNROLL_M 4
2009 #define DGEMM_DEFAULT_UNROLL_N 4
2010 #define CGEMM_DEFAULT_UNROLL_M 2
2011 #define CGEMM_DEFAULT_UNROLL_N 2
2012 #define ZGEMM_DEFAULT_UNROLL_M 2
2013 #define ZGEMM_DEFAULT_UNROLL_N 2
2018 #define SGEMM_DEFAULT_P 32
2019 #define SGEMM_DEFAULT_Q 112
2020 #define SGEMM_DEFAULT_R 256
2022 #define DGEMM_DEFAULT_P 32
2023 #define DGEMM_DEFAULT_Q 56
2024 #define DGEMM_DEFAULT_R 256
2026 #define CGEMM_DEFAULT_P 32
2027 #define CGEMM_DEFAULT_Q 64
2028 #define CGEMM_DEFAULT_R 240
2030 #define ZGEMM_DEFAULT_P 32
2031 #define ZGEMM_DEFAULT_Q 32
2032 #define ZGEMM_DEFAULT_R 240
2036 #define SGEMM_DEFAULT_P 64
2037 #define SGEMM_DEFAULT_Q 256
2039 #define DGEMM_DEFAULT_P 64
2040 #define DGEMM_DEFAULT_Q 128
2042 #define CGEMM_DEFAULT_P 64
2043 #define CGEMM_DEFAULT_Q 128
2045 #define ZGEMM_DEFAULT_P 64
2046 #define ZGEMM_DEFAULT_Q 64
2050 #define SGEMM_DEFAULT_P 256
2051 #define SGEMM_DEFAULT_Q 512
2053 #define DGEMM_DEFAULT_P 256
2054 #define DGEMM_DEFAULT_Q 256
2056 #define CGEMM_DEFAULT_P 256
2057 #define CGEMM_DEFAULT_Q 256
2059 #define ZGEMM_DEFAULT_P 128
2060 #define ZGEMM_DEFAULT_Q 256
2070 #define GEMM_DEFAULT_OFFSET_A 0
2071 #define GEMM_DEFAULT_OFFSET_B 8192
2072 #define GEMM_DEFAULT_ALIGN (BLASLONG)0x0ffffUL
2074 #define SGEMM_DEFAULT_UNROLL_M 16
2075 #define SGEMM_DEFAULT_UNROLL_N 4
2076 #define DGEMM_DEFAULT_UNROLL_M 4
2077 #define DGEMM_DEFAULT_UNROLL_N 4
2078 #define CGEMM_DEFAULT_UNROLL_M 8
2079 #define CGEMM_DEFAULT_UNROLL_N 2
2080 #define ZGEMM_DEFAULT_UNROLL_M 2
2081 #define ZGEMM_DEFAULT_UNROLL_N 2
2083 #define SGEMM_DEFAULT_P 128
2084 #define DGEMM_DEFAULT_P 128
2085 #define CGEMM_DEFAULT_P 128
2086 #define ZGEMM_DEFAULT_P 128
2088 #define SGEMM_DEFAULT_Q 512
2089 #define DGEMM_DEFAULT_Q 256
2090 #define CGEMM_DEFAULT_Q 256
2091 #define ZGEMM_DEFAULT_Q 128
2097 #define GEMM_DEFAULT_OFFSET_A 0
2098 #define GEMM_DEFAULT_OFFSET_B 1024
2099 #define GEMM_DEFAULT_ALIGN LONGCAST 0x0ffffUL
2101 #define SGEMM_DEFAULT_UNROLL_M 16
2102 #define SGEMM_DEFAULT_UNROLL_N 4
2103 #define DGEMM_DEFAULT_UNROLL_M 4
2104 #define DGEMM_DEFAULT_UNROLL_N 4
2105 #define CGEMM_DEFAULT_UNROLL_M 2
2106 #define CGEMM_DEFAULT_UNROLL_N 2
2107 #define ZGEMM_DEFAULT_UNROLL_M 2
2108 #define ZGEMM_DEFAULT_UNROLL_N 2
2110 #define SGEMM_DEFAULT_P 256
2111 #define DGEMM_DEFAULT_P 128
2112 #define CGEMM_DEFAULT_P 128
2113 #define ZGEMM_DEFAULT_P 64
2115 #define SGEMM_DEFAULT_Q 256
2116 #define DGEMM_DEFAULT_Q 256
2117 #define CGEMM_DEFAULT_Q 256
2118 #define ZGEMM_DEFAULT_Q 256
2128 #define GEMM_DEFAULT_OFFSET_A 2688
2129 #define GEMM_DEFAULT_OFFSET_B 3072
2130 #define GEMM_DEFAULT_ALIGN LONGCAST 0x03fffUL
2132 #if defined(__BYTE_ORDER__)&&(__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
2133 #define SGEMM_DEFAULT_UNROLL_M 4
2135 #define SGEMM_DEFAULT_UNROLL_M 16
2137 #define SGEMM_DEFAULT_UNROLL_N 4
2138 #define DGEMM_DEFAULT_UNROLL_M 4
2139 #define DGEMM_DEFAULT_UNROLL_N 4
2140 #if defined(__BYTE_ORDER__)&&(__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
2141 #define CGEMM_DEFAULT_UNROLL_M 2
2143 #define CGEMM_DEFAULT_UNROLL_M 8
2145 #define CGEMM_DEFAULT_UNROLL_N 2
2146 #define ZGEMM_DEFAULT_UNROLL_M 2
2147 #define ZGEMM_DEFAULT_UNROLL_N 2
2149 #if defined(OS_LINUX) || defined(OS_DARWIN) || defined(OS_FREEBSD)
2150 #if L2_SIZE == 1024976
2151 #define SGEMM_DEFAULT_P 320
2152 #define DGEMM_DEFAULT_P 256
2153 #define CGEMM_DEFAULT_P 256
2154 #define ZGEMM_DEFAULT_P 256
2156 #define SGEMM_DEFAULT_P 176
2157 #define DGEMM_DEFAULT_P 176
2158 #define CGEMM_DEFAULT_P 176
2159 #define ZGEMM_DEFAULT_P 176
2163 #define SGEMM_DEFAULT_Q 512
2164 #define DGEMM_DEFAULT_Q 256
2165 #define CGEMM_DEFAULT_Q 256
2166 #define ZGEMM_DEFAULT_Q 128
2177 #define GEMM_DEFAULT_OFFSET_A (32 * 0)
2178 #define GEMM_DEFAULT_OFFSET_B (32 * 0)
2179 #define GEMM_DEFAULT_ALIGN LONGCAST 0x0ffffUL
2181 #define SGEMM_DEFAULT_UNROLL_M 4
2182 #define SGEMM_DEFAULT_UNROLL_N 4
2183 #define DGEMM_DEFAULT_UNROLL_M 4
2184 #define DGEMM_DEFAULT_UNROLL_N 4
2185 #define CGEMM_DEFAULT_UNROLL_M 2
2186 #define CGEMM_DEFAULT_UNROLL_N 2
2187 #define ZGEMM_DEFAULT_UNROLL_M 2
2188 #define ZGEMM_DEFAULT_UNROLL_N 2
2190 #define SGEMM_DEFAULT_P 512
2191 #define DGEMM_DEFAULT_P 512
2192 #define CGEMM_DEFAULT_P 512
2193 #define ZGEMM_DEFAULT_P 512
2195 #define SGEMM_DEFAULT_Q 1024
2196 #define DGEMM_DEFAULT_Q 512
2197 #define CGEMM_DEFAULT_Q 512
2198 #define ZGEMM_DEFAULT_Q 256
2200 #define SGEMM_DEFAULT_R SGEMM_DEFAULT_P
2201 #define DGEMM_DEFAULT_R DGEMM_DEFAULT_P
2202 #define CGEMM_DEFAULT_R CGEMM_DEFAULT_P
2203 #define ZGEMM_DEFAULT_R ZGEMM_DEFAULT_P
2213 #define GEMM_DEFAULT_OFFSET_A (32 * 0)
2214 #define GEMM_DEFAULT_OFFSET_B (32 * 0)
2215 #define GEMM_DEFAULT_ALIGN LONGCAST 0x0ffffUL
2217 #define SGEMM_DEFAULT_UNROLL_M 8
2218 #define SGEMM_DEFAULT_UNROLL_N 4
2219 #define DGEMM_DEFAULT_UNROLL_M 8
2220 #define DGEMM_DEFAULT_UNROLL_N 4
2221 #define CGEMM_DEFAULT_UNROLL_M 4
2222 #define CGEMM_DEFAULT_UNROLL_N 2
2223 #define ZGEMM_DEFAULT_UNROLL_M 4
2224 #define ZGEMM_DEFAULT_UNROLL_N 2
2226 #define SGEMM_DEFAULT_P 128
2227 #define DGEMM_DEFAULT_P 128
2228 #define CGEMM_DEFAULT_P 128
2229 #define ZGEMM_DEFAULT_P 128
2231 #define SGEMM_DEFAULT_Q 4096
2232 #define DGEMM_DEFAULT_Q 3072
2233 #define CGEMM_DEFAULT_Q 2048
2234 #define ZGEMM_DEFAULT_Q 1024
2236 #define SGEMM_DEFAULT_Q 512
2237 #define DGEMM_DEFAULT_Q 256
2238 #define CGEMM_DEFAULT_Q 256
2239 #define ZGEMM_DEFAULT_Q 128
2247 #if defined(POWER3) || defined(POWER4) || defined(POWER5)
2248 #define GEMM_DEFAULT_OFFSET_A 0
2249 #define GEMM_DEFAULT_OFFSET_B 2048
2250 #define GEMM_DEFAULT_ALIGN LONGCAST 0x0ffffUL
2252 #define SGEMM_DEFAULT_UNROLL_M 4
2253 #define SGEMM_DEFAULT_UNROLL_N 4
2254 #define DGEMM_DEFAULT_UNROLL_M 4
2255 #define DGEMM_DEFAULT_UNROLL_N 4
2256 #define CGEMM_DEFAULT_UNROLL_M 2
2257 #define CGEMM_DEFAULT_UNROLL_N 2
2258 #define ZGEMM_DEFAULT_UNROLL_M 2
2259 #define ZGEMM_DEFAULT_UNROLL_N 2
2266 #define SGEMM_DEFAULT_P 256
2267 #define SGEMM_DEFAULT_Q 432
2268 #define SGEMM_DEFAULT_R 1012
2270 #define DGEMM_DEFAULT_P 256
2271 #define DGEMM_DEFAULT_Q 216
2272 #define DGEMM_DEFAULT_R 1012
2274 #define ZGEMM_DEFAULT_P 256
2275 #define ZGEMM_DEFAULT_Q 104
2276 #define ZGEMM_DEFAULT_R 1012
2280 #ifdef ALLOC_HUGETLB
2281 #define SGEMM_DEFAULT_P 184
2282 #define DGEMM_DEFAULT_P 184
2283 #define CGEMM_DEFAULT_P 184
2284 #define ZGEMM_DEFAULT_P 184
2286 #define SGEMM_DEFAULT_P 144
2287 #define DGEMM_DEFAULT_P 144
2288 #define CGEMM_DEFAULT_P 144
2289 #define ZGEMM_DEFAULT_P 144
2294 #ifdef ALLOC_HUGETLB
2295 #define SGEMM_DEFAULT_P 512
2296 #define DGEMM_DEFAULT_P 256
2297 #define CGEMM_DEFAULT_P 256
2298 #define ZGEMM_DEFAULT_P 128
2300 #define SGEMM_DEFAULT_P 320
2301 #define DGEMM_DEFAULT_P 160
2302 #define CGEMM_DEFAULT_P 160
2303 #define ZGEMM_DEFAULT_P 80
2306 #define SGEMM_DEFAULT_Q 256
2307 #define CGEMM_DEFAULT_Q 256
2308 #define DGEMM_DEFAULT_Q 256
2309 #define ZGEMM_DEFAULT_Q 256
2321 #define GEMM_DEFAULT_OFFSET_A 384
2322 #define GEMM_DEFAULT_OFFSET_B 1024
2323 #define GEMM_DEFAULT_ALIGN LONGCAST 0x03fffUL
2325 #define SGEMM_DEFAULT_UNROLL_M 4
2326 #define SGEMM_DEFAULT_UNROLL_N 4
2327 #define DGEMM_DEFAULT_UNROLL_M 4
2328 #define DGEMM_DEFAULT_UNROLL_N 4
2329 #define CGEMM_DEFAULT_UNROLL_M 2
2330 #define CGEMM_DEFAULT_UNROLL_N 4
2331 #define ZGEMM_DEFAULT_UNROLL_M 2
2332 #define ZGEMM_DEFAULT_UNROLL_N 4
2334 #define SGEMM_DEFAULT_P 992
2335 #define DGEMM_DEFAULT_P 480
2336 #define CGEMM_DEFAULT_P 488
2337 #define ZGEMM_DEFAULT_P 248
2339 #define SGEMM_DEFAULT_Q 504
2340 #define DGEMM_DEFAULT_Q 504
2341 #define CGEMM_DEFAULT_Q 400
2342 #define ZGEMM_DEFAULT_Q 400
2353 #define GEMM_DEFAULT_OFFSET_A 0
2354 #define GEMM_DEFAULT_OFFSET_B 65536
2356 #define GEMM_DEFAULT_ALIGN LONGCAST 0x0ffffUL
2357 #if defined(__32BIT__)
2358 #warning using BINARY32==POWER6
2359 #define SGEMM_DEFAULT_UNROLL_M 4
2360 #define SGEMM_DEFAULT_UNROLL_N 4
2361 #define DGEMM_DEFAULT_UNROLL_M 4
2362 #define DGEMM_DEFAULT_UNROLL_N 4
2363 #define CGEMM_DEFAULT_UNROLL_M 2
2364 #define CGEMM_DEFAULT_UNROLL_N 4
2365 #define ZGEMM_DEFAULT_UNROLL_M 2
2366 #define ZGEMM_DEFAULT_UNROLL_N 4
2368 #define SGEMM_DEFAULT_UNROLL_M 16
2369 #define SGEMM_DEFAULT_UNROLL_N 8
2370 #define DGEMM_DEFAULT_UNROLL_M 16
2371 #define DGEMM_DEFAULT_UNROLL_N 4
2372 #define CGEMM_DEFAULT_UNROLL_M 8
2373 #define CGEMM_DEFAULT_UNROLL_N 4
2374 #define ZGEMM_DEFAULT_UNROLL_M 8
2375 #define ZGEMM_DEFAULT_UNROLL_N 2
2377 #define SGEMM_DEFAULT_P 1280UL
2378 #define DGEMM_DEFAULT_P 640UL
2379 #define CGEMM_DEFAULT_P 640UL
2380 #define ZGEMM_DEFAULT_P 320UL
2382 #define SGEMM_DEFAULT_Q 640UL
2383 #define DGEMM_DEFAULT_Q 720UL
2384 #define CGEMM_DEFAULT_Q 640UL
2385 #define ZGEMM_DEFAULT_Q 640UL
2388 #define SGEMM_DEFAULT_R SGEMM_DEFAULT_P
2389 #define DGEMM_DEFAULT_R DGEMM_DEFAULT_P
2390 #define CGEMM_DEFAULT_R CGEMM_DEFAULT_P
2391 #define ZGEMM_DEFAULT_R ZGEMM_DEFAULT_P
2393 #define SGEMM_DEFAULT_R 4096
2394 #define DGEMM_DEFAULT_R 4096
2395 #define CGEMM_DEFAULT_R 4096
2396 #define ZGEMM_DEFAULT_R 4096
2407 #define GEMM_DEFAULT_OFFSET_A 0
2408 #define GEMM_DEFAULT_OFFSET_B 65536
2409 #define GEMM_DEFAULT_ALIGN LONGCAST 0x0ffffUL
2411 #define SWITCH_RATIO 16
2412 #define GEMM_PREFERED_SIZE 16
2414 #define SGEMM_DEFAULT_UNROLL_M 16
2415 #define SGEMM_DEFAULT_UNROLL_N 8
2416 #define DGEMM_DEFAULT_UNROLL_M 16
2417 #define DGEMM_DEFAULT_UNROLL_N 4
2418 #define CGEMM_DEFAULT_UNROLL_M 8
2419 #define CGEMM_DEFAULT_UNROLL_N 4
2420 #define ZGEMM_DEFAULT_UNROLL_M 8
2421 #define ZGEMM_DEFAULT_UNROLL_N 2
2423 #define SGEMM_DEFAULT_P 832
2424 #define DGEMM_DEFAULT_P 128
2425 #define CGEMM_DEFAULT_P 512
2426 #define ZGEMM_DEFAULT_P 256
2428 #define SGEMM_DEFAULT_Q 1026
2429 #define DGEMM_DEFAULT_Q 384
2430 #define CGEMM_DEFAULT_Q 1026
2431 #define ZGEMM_DEFAULT_Q 1026
2433 #define SGEMM_DEFAULT_R 4096
2434 #define DGEMM_DEFAULT_R 4096
2435 #define CGEMM_DEFAULT_R 4096
2436 #define ZGEMM_DEFAULT_R 4096
2442 #if defined(POWER10)
2446 #define GEMM_DEFAULT_OFFSET_A 0
2447 #define GEMM_DEFAULT_OFFSET_B 65536
2448 #define GEMM_DEFAULT_ALIGN LONGCAST 0x0ffffUL
2450 #define SWITCH_RATIO 16
2451 #define GEMM_PREFERED_SIZE 16
2453 #define SGEMM_DEFAULT_UNROLL_M 16
2454 #define SGEMM_DEFAULT_UNROLL_N 8
2455 #if defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
2456 #define DGEMM_DEFAULT_UNROLL_M 16
2457 #define DGEMM_DEFAULT_UNROLL_N 4
2459 #define DGEMM_DEFAULT_UNROLL_M 8
2460 #define DGEMM_DEFAULT_UNROLL_N 8
2462 #define CGEMM_DEFAULT_UNROLL_M 8
2463 #define CGEMM_DEFAULT_UNROLL_N 4
2464 #define ZGEMM_DEFAULT_UNROLL_M 8
2465 #define ZGEMM_DEFAULT_UNROLL_N 2
2467 #define SGEMM_DEFAULT_P 512
2468 #define DGEMM_DEFAULT_P 384
2469 #define CGEMM_DEFAULT_P 512
2470 #define ZGEMM_DEFAULT_P 256
2472 #define SGEMM_DEFAULT_Q 512
2473 #define DGEMM_DEFAULT_Q 512
2474 #define CGEMM_DEFAULT_Q 384
2475 #define ZGEMM_DEFAULT_Q 384
2477 #define SGEMM_DEFAULT_R 4096
2478 #define DGEMM_DEFAULT_R 4096
2479 #define CGEMM_DEFAULT_R 4096
2480 #define ZGEMM_DEFAULT_R 4096
2484 #undef SBGEMM_DEFAULT_UNROLL_N
2485 #undef SBGEMM_DEFAULT_UNROLL_M
2486 #undef SBGEMM_DEFAULT_P
2487 #undef SBGEMM_DEFAULT_R
2488 #undef SBGEMM_DEFAULT_Q
2489 #define SBGEMM_DEFAULT_UNROLL_M 16
2490 #define SBGEMM_DEFAULT_UNROLL_N 8
2491 #define SBGEMM_DEFAULT_P 832
2492 #define SBGEMM_DEFAULT_Q 1026
2493 #define SBGEMM_DEFAULT_R 4096
2496 #if defined(SPARC) && defined(V7)
2501 #define GEMM_DEFAULT_OFFSET_A 0
2502 #define GEMM_DEFAULT_OFFSET_B 2048
2503 #define GEMM_DEFAULT_ALIGN (BLASLONG)0x03fffUL
2505 #define SGEMM_DEFAULT_UNROLL_M 2
2506 #define SGEMM_DEFAULT_UNROLL_N 8
2507 #define DGEMM_DEFAULT_UNROLL_M 2
2508 #define DGEMM_DEFAULT_UNROLL_N 8
2509 #define CGEMM_DEFAULT_UNROLL_M 1
2510 #define CGEMM_DEFAULT_UNROLL_N 4
2511 #define ZGEMM_DEFAULT_UNROLL_M 1
2512 #define ZGEMM_DEFAULT_UNROLL_N 4
2514 #define SGEMM_DEFAULT_P 256
2515 #define DGEMM_DEFAULT_P 256
2516 #define CGEMM_DEFAULT_P 256
2517 #define ZGEMM_DEFAULT_P 256
2519 #define SGEMM_DEFAULT_Q 512
2520 #define DGEMM_DEFAULT_Q 256
2521 #define CGEMM_DEFAULT_Q 256
2522 #define ZGEMM_DEFAULT_Q 128
2525 #define GEMM_THREAD gemm_thread_mn
2528 #if (defined(SPARC) && defined(V9)) || defined(__sparc_v9__)
2533 #define GEMM_DEFAULT_OFFSET_A 0
2534 #define GEMM_DEFAULT_OFFSET_B 2048
2535 #define GEMM_DEFAULT_ALIGN (BLASLONG)0x03fffUL
2537 #define SGEMM_DEFAULT_UNROLL_M 4
2538 #define SGEMM_DEFAULT_UNROLL_N 4
2539 #define DGEMM_DEFAULT_UNROLL_M 4
2540 #define DGEMM_DEFAULT_UNROLL_N 4
2541 #define CGEMM_DEFAULT_UNROLL_M 2
2542 #define CGEMM_DEFAULT_UNROLL_N 2
2543 #define ZGEMM_DEFAULT_UNROLL_M 2
2544 #define ZGEMM_DEFAULT_UNROLL_N 2
2546 #define SGEMM_DEFAULT_P 512
2547 #define DGEMM_DEFAULT_P 512
2548 #define CGEMM_DEFAULT_P 512
2549 #define ZGEMM_DEFAULT_P 512
2551 #define SGEMM_DEFAULT_Q 1024
2552 #define DGEMM_DEFAULT_Q 512
2553 #define CGEMM_DEFAULT_Q 512
2554 #define ZGEMM_DEFAULT_Q 256
2564 #define GEMM_DEFAULT_OFFSET_A 0
2565 #define GEMM_DEFAULT_OFFSET_B 0
2566 #define GEMM_DEFAULT_ALIGN (BLASLONG)0x03fffUL
2568 #define SGEMM_DEFAULT_UNROLL_M 2
2569 #define SGEMM_DEFAULT_UNROLL_N 8
2570 #define DGEMM_DEFAULT_UNROLL_M 2
2571 #define DGEMM_DEFAULT_UNROLL_N 8
2572 #define CGEMM_DEFAULT_UNROLL_M 1
2573 #define CGEMM_DEFAULT_UNROLL_N 4
2574 #define ZGEMM_DEFAULT_UNROLL_M 1
2575 #define ZGEMM_DEFAULT_UNROLL_N 4
2577 #define SGEMM_DEFAULT_P 108
2578 #define DGEMM_DEFAULT_P 112
2579 #define CGEMM_DEFAULT_P 108
2580 #define ZGEMM_DEFAULT_P 112
2582 #define SGEMM_DEFAULT_Q 288
2583 #define DGEMM_DEFAULT_Q 144
2584 #define CGEMM_DEFAULT_Q 144
2585 #define ZGEMM_DEFAULT_Q 72
2587 #define SGEMM_DEFAULT_R 2000
2588 #define DGEMM_DEFAULT_R 2000
2589 #define CGEMM_DEFAULT_R 2000
2590 #define ZGEMM_DEFAULT_R 2000
2595 #if defined(LOONGSON3R4)
2599 #define GEMM_DEFAULT_OFFSET_A 0
2600 #define GEMM_DEFAULT_OFFSET_B 0
2601 #define GEMM_DEFAULT_ALIGN (BLASLONG)0x03fffUL
2604 #define SGEMM_DEFAULT_UNROLL_M 8
2605 #define SGEMM_DEFAULT_UNROLL_N 8
2607 #define DGEMM_DEFAULT_UNROLL_M 8
2608 #define DGEMM_DEFAULT_UNROLL_N 4
2610 #define CGEMM_DEFAULT_UNROLL_M 8
2611 #define CGEMM_DEFAULT_UNROLL_N 4
2613 #define ZGEMM_DEFAULT_UNROLL_M 4
2614 #define ZGEMM_DEFAULT_UNROLL_N 4
2616 #define SGEMM_DEFAULT_UNROLL_M 8
2617 #define SGEMM_DEFAULT_UNROLL_N 4
2619 #define DGEMM_DEFAULT_UNROLL_M 4
2620 #define DGEMM_DEFAULT_UNROLL_N 4
2622 #define CGEMM_DEFAULT_UNROLL_M 4
2623 #define CGEMM_DEFAULT_UNROLL_N 2
2625 #define ZGEMM_DEFAULT_UNROLL_M 2
2626 #define ZGEMM_DEFAULT_UNROLL_N 2
2629 #define SGEMM_DEFAULT_P 64
2630 #define DGEMM_DEFAULT_P 44
2631 #define CGEMM_DEFAULT_P 64
2632 #define ZGEMM_DEFAULT_P 32
2634 #define SGEMM_DEFAULT_Q 192
2635 #define DGEMM_DEFAULT_Q 92
2636 #define CGEMM_DEFAULT_Q 128
2637 #define ZGEMM_DEFAULT_Q 80
2639 #define SGEMM_DEFAULT_R 640
2640 #define DGEMM_DEFAULT_R dgemm_r
2641 #define CGEMM_DEFAULT_R 640
2642 #define ZGEMM_DEFAULT_R 640
2644 #define GEMM_OFFSET_A1 0x10000
2645 #define GEMM_OFFSET_B1 0x100000
2650 #if defined(LOONGSON3R3)
2651 ////Copy from SICORTEX
2655 #define GEMM_DEFAULT_OFFSET_A 0
2656 #define GEMM_DEFAULT_OFFSET_B 0
2657 #define GEMM_DEFAULT_ALIGN (BLASLONG)0x03fffUL
2659 #define SGEMM_DEFAULT_UNROLL_M 8
2660 #define SGEMM_DEFAULT_UNROLL_N 4
2662 #define DGEMM_DEFAULT_UNROLL_M 4
2663 #define DGEMM_DEFAULT_UNROLL_N 4
2665 #define CGEMM_DEFAULT_UNROLL_M 4
2666 #define CGEMM_DEFAULT_UNROLL_N 2
2668 #define ZGEMM_DEFAULT_UNROLL_M 2
2669 #define ZGEMM_DEFAULT_UNROLL_N 2
2671 #define SGEMM_DEFAULT_P 64
2672 #define DGEMM_DEFAULT_P 44
2673 #define CGEMM_DEFAULT_P 64
2674 #define ZGEMM_DEFAULT_P 32
2676 #define SGEMM_DEFAULT_Q 192
2677 #define DGEMM_DEFAULT_Q 92
2678 #define CGEMM_DEFAULT_Q 128
2679 #define ZGEMM_DEFAULT_Q 80
2681 #define SGEMM_DEFAULT_R 640
2682 #define DGEMM_DEFAULT_R dgemm_r
2683 #define CGEMM_DEFAULT_R 640
2684 #define ZGEMM_DEFAULT_R 640
2686 #define GEMM_OFFSET_A1 0x10000
2687 #define GEMM_OFFSET_B1 0x100000
2692 #if defined(P5600) || defined(MIPS1004K) || defined(MIPS24K) || defined(I6400) || defined(P6600) || defined(I6500)
2696 #define GEMM_DEFAULT_OFFSET_A 0
2697 #define GEMM_DEFAULT_OFFSET_B 0
2698 #define GEMM_DEFAULT_ALIGN (BLASLONG) 0x03fffUL
2701 #define SGEMM_DEFAULT_UNROLL_M 8
2702 #define SGEMM_DEFAULT_UNROLL_N 8
2704 #define DGEMM_DEFAULT_UNROLL_M 8
2705 #define DGEMM_DEFAULT_UNROLL_N 4
2707 #define CGEMM_DEFAULT_UNROLL_M 8
2708 #define CGEMM_DEFAULT_UNROLL_N 4
2710 #define ZGEMM_DEFAULT_UNROLL_M 4
2711 #define ZGEMM_DEFAULT_UNROLL_N 4
2713 #define SGEMM_DEFAULT_UNROLL_M 2
2714 #define SGEMM_DEFAULT_UNROLL_N 2
2716 #define DGEMM_DEFAULT_UNROLL_M 2
2717 #define DGEMM_DEFAULT_UNROLL_N 2
2719 #define CGEMM_DEFAULT_UNROLL_M 2
2720 #define CGEMM_DEFAULT_UNROLL_N 2
2722 #define ZGEMM_DEFAULT_UNROLL_M 2
2723 #define ZGEMM_DEFAULT_UNROLL_N 2
2726 #define SGEMM_DEFAULT_P 128
2727 #define DGEMM_DEFAULT_P 128
2728 #define CGEMM_DEFAULT_P 96
2729 #define ZGEMM_DEFAULT_P 64
2731 #define SGEMM_DEFAULT_Q 240
2732 #define DGEMM_DEFAULT_Q 120
2733 #define CGEMM_DEFAULT_Q 120
2734 #define ZGEMM_DEFAULT_Q 120
2736 #define SGEMM_DEFAULT_R 12288
2737 #define DGEMM_DEFAULT_R 8192
2738 #define CGEMM_DEFAULT_R 4096
2739 #define ZGEMM_DEFAULT_R 4096
2744 #ifdef RISCV64_GENERIC
2745 #define GEMM_DEFAULT_OFFSET_A 0
2746 #define GEMM_DEFAULT_OFFSET_B 0
2747 #define GEMM_DEFAULT_ALIGN (BLASLONG)0x03fffUL
2749 #define SGEMM_DEFAULT_UNROLL_M 2
2750 #define SGEMM_DEFAULT_UNROLL_N 2
2752 #define DGEMM_DEFAULT_UNROLL_M 2
2753 #define DGEMM_DEFAULT_UNROLL_N 2
2755 #define CGEMM_DEFAULT_UNROLL_M 2
2756 #define CGEMM_DEFAULT_UNROLL_N 2
2758 #define ZGEMM_DEFAULT_UNROLL_M 2
2759 #define ZGEMM_DEFAULT_UNROLL_N 2
2761 #define SGEMM_DEFAULT_P 128
2762 #define DGEMM_DEFAULT_P 128
2763 #define CGEMM_DEFAULT_P 96
2764 #define ZGEMM_DEFAULT_P 64
2766 #define SGEMM_DEFAULT_Q 240
2767 #define DGEMM_DEFAULT_Q 120
2768 #define CGEMM_DEFAULT_Q 120
2769 #define ZGEMM_DEFAULT_Q 120
2771 #define SGEMM_DEFAULT_R 12288
2772 #define DGEMM_DEFAULT_R 8192
2773 #define CGEMM_DEFAULT_R 4096
2774 #define ZGEMM_DEFAULT_R 4096
2778 #define GEMM_DEFAULT_OFFSET_A 0
2779 #define GEMM_DEFAULT_OFFSET_B 0
2784 #define GEMM_DEFAULT_OFFSET_A 0
2785 #define GEMM_DEFAULT_OFFSET_B 0
2786 #define GEMM_DEFAULT_ALIGN 0x03fffUL
2788 #define SGEMM_DEFAULT_UNROLL_M 16
2789 #define SGEMM_DEFAULT_UNROLL_N 4
2791 #define DGEMM_DEFAULT_UNROLL_M 8
2792 #define DGEMM_DEFAULT_UNROLL_N 4
2794 #define CGEMM_DEFAULT_UNROLL_M 2
2795 #define CGEMM_DEFAULT_UNROLL_N 2
2797 #define ZGEMM_DEFAULT_UNROLL_M 2
2798 #define ZGEMM_DEFAULT_UNROLL_N 2
2800 #define SGEMM_DEFAULT_P 160
2801 #define DGEMM_DEFAULT_P 160
2802 #define CGEMM_DEFAULT_P 96
2803 #define ZGEMM_DEFAULT_P 64
2805 #define SGEMM_DEFAULT_Q 240
2806 #define DGEMM_DEFAULT_Q 128
2807 #define CGEMM_DEFAULT_Q 120
2808 #define ZGEMM_DEFAULT_Q 120
2810 #define SGEMM_DEFAULT_R 12288
2811 #define DGEMM_DEFAULT_R 8192
2812 #define CGEMM_DEFAULT_R 4096
2813 #define ZGEMM_DEFAULT_R 4096
2817 #define GEMM_DEFAULT_OFFSET_A 0
2818 #define GEMM_DEFAULT_OFFSET_B 0
2826 #define GEMM_DEFAULT_OFFSET_A 0
2827 #define GEMM_DEFAULT_OFFSET_B 0
2828 #define GEMM_DEFAULT_ALIGN (BLASLONG)0x03fffUL
2830 #define SGEMM_DEFAULT_UNROLL_M 4
2831 #define SGEMM_DEFAULT_UNROLL_N 4
2833 #define DGEMM_DEFAULT_UNROLL_M 4
2834 #define DGEMM_DEFAULT_UNROLL_N 4
2836 #define CGEMM_DEFAULT_UNROLL_M 2
2837 #define CGEMM_DEFAULT_UNROLL_N 2
2839 #define ZGEMM_DEFAULT_UNROLL_M 2
2840 #define ZGEMM_DEFAULT_UNROLL_N 2
2842 #define SGEMM_DEFAULT_P 128
2843 #define DGEMM_DEFAULT_P 128
2844 #define CGEMM_DEFAULT_P 96
2845 #define ZGEMM_DEFAULT_P 64
2847 #define SGEMM_DEFAULT_Q 240
2848 #define DGEMM_DEFAULT_Q 120
2849 #define CGEMM_DEFAULT_Q 120
2850 #define ZGEMM_DEFAULT_Q 120
2852 #define SGEMM_DEFAULT_R 12288
2853 #define DGEMM_DEFAULT_R 8192
2854 #define CGEMM_DEFAULT_R 4096
2855 #define ZGEMM_DEFAULT_R 4096
2867 #define GEMM_DEFAULT_OFFSET_A 0
2868 #define GEMM_DEFAULT_OFFSET_B 0
2869 #define GEMM_DEFAULT_ALIGN (BLASLONG)0x03fffUL
2871 #define SGEMM_DEFAULT_UNROLL_M 4
2872 #define SGEMM_DEFAULT_UNROLL_N 2
2874 #define DGEMM_DEFAULT_UNROLL_M 4
2875 #define DGEMM_DEFAULT_UNROLL_N 2
2877 #define CGEMM_DEFAULT_UNROLL_M 2
2878 #define CGEMM_DEFAULT_UNROLL_N 2
2880 #define ZGEMM_DEFAULT_UNROLL_M 2
2881 #define ZGEMM_DEFAULT_UNROLL_N 2
2883 #define SGEMM_DEFAULT_P 128
2884 #define DGEMM_DEFAULT_P 128
2885 #define CGEMM_DEFAULT_P 96
2886 #define ZGEMM_DEFAULT_P 64
2888 #define SGEMM_DEFAULT_Q 240
2889 #define DGEMM_DEFAULT_Q 120
2890 #define CGEMM_DEFAULT_Q 120
2891 #define ZGEMM_DEFAULT_Q 120
2893 #define SGEMM_DEFAULT_R 12288
2894 #define DGEMM_DEFAULT_R 8192
2895 #define CGEMM_DEFAULT_R 4096
2896 #define ZGEMM_DEFAULT_R 4096
2902 /* Common ARMv8 parameters */
2908 #define GEMM_DEFAULT_OFFSET_A 0
2909 #define GEMM_DEFAULT_OFFSET_B 0
2910 #define GEMM_DEFAULT_ALIGN 0x03fffUL
2914 #if defined(CORTEXA57) || \
2915 defined(CORTEXA72) || defined(CORTEXA73) || \
2916 defined(FALKOR) || defined(TSV110) || defined(EMAG8180)
2918 #define SGEMM_DEFAULT_UNROLL_M 16
2919 #define SGEMM_DEFAULT_UNROLL_N 4
2921 #define DGEMM_DEFAULT_UNROLL_M 8
2922 #define DGEMM_DEFAULT_UNROLL_N 4
2924 #define CGEMM_DEFAULT_UNROLL_M 8
2925 #define CGEMM_DEFAULT_UNROLL_N 4
2927 #define ZGEMM_DEFAULT_UNROLL_M 4
2928 #define ZGEMM_DEFAULT_UNROLL_N 4
2930 /*FIXME: this should be using the cache size, but there is currently no easy way to
2931 query that on ARM. So if getarch counted more than 8 cores we simply assume the host
2932 is a big desktop or server with abundant cache rather than a phone or embedded device */
2933 #if NUM_CORES > 8 || defined(TSV110) || defined(EMAG8180)
2934 #define SGEMM_DEFAULT_P 512
2935 #define DGEMM_DEFAULT_P 256
2936 #define CGEMM_DEFAULT_P 256
2937 #define ZGEMM_DEFAULT_P 128
2939 #define SGEMM_DEFAULT_Q 1024
2940 #define DGEMM_DEFAULT_Q 512
2941 #define CGEMM_DEFAULT_Q 512
2942 #define ZGEMM_DEFAULT_Q 512
2944 #define SGEMM_DEFAULT_P 128
2945 #define DGEMM_DEFAULT_P 160
2946 #define CGEMM_DEFAULT_P 128
2947 #define ZGEMM_DEFAULT_P 128
2949 #define SGEMM_DEFAULT_Q 352
2950 #define DGEMM_DEFAULT_Q 128
2951 #define CGEMM_DEFAULT_Q 224
2952 #define ZGEMM_DEFAULT_Q 112
2955 #define SGEMM_DEFAULT_R 4096
2956 #define DGEMM_DEFAULT_R 4096
2957 #define CGEMM_DEFAULT_R 4096
2958 #define ZGEMM_DEFAULT_R 2048
2960 #elif defined(CORTEXA53)
2962 #define SGEMM_DEFAULT_UNROLL_M 8
2963 #define SGEMM_DEFAULT_UNROLL_N 8
2965 #define DGEMM_DEFAULT_UNROLL_M 8
2966 #define DGEMM_DEFAULT_UNROLL_N 4
2968 #define CGEMM_DEFAULT_UNROLL_M 8
2969 #define CGEMM_DEFAULT_UNROLL_N 4
2971 #define ZGEMM_DEFAULT_UNROLL_M 4
2972 #define ZGEMM_DEFAULT_UNROLL_N 4
2974 #define SGEMM_DEFAULT_P 256
2975 #define DGEMM_DEFAULT_P 160
2976 #define CGEMM_DEFAULT_P 128
2977 #define ZGEMM_DEFAULT_P 128
2979 #define SGEMM_DEFAULT_Q 256
2980 #define DGEMM_DEFAULT_Q 128
2981 #define CGEMM_DEFAULT_Q 224
2982 #define ZGEMM_DEFAULT_Q 112
2984 #define SGEMM_DEFAULT_R 4096
2985 #define DGEMM_DEFAULT_R 4096
2986 #define CGEMM_DEFAULT_R 4096
2987 #define ZGEMM_DEFAULT_R 2048
2989 #elif defined(THUNDERX)
2991 #define SGEMM_DEFAULT_UNROLL_M 4
2992 #define SGEMM_DEFAULT_UNROLL_N 4
2994 #define DGEMM_DEFAULT_UNROLL_M 2
2995 #define DGEMM_DEFAULT_UNROLL_N 2
2997 #define CGEMM_DEFAULT_UNROLL_M 2
2998 #define CGEMM_DEFAULT_UNROLL_N 2
3000 #define ZGEMM_DEFAULT_UNROLL_M 2
3001 #define ZGEMM_DEFAULT_UNROLL_N 2
3003 #define SGEMM_DEFAULT_P 128
3004 #define DGEMM_DEFAULT_P 128
3005 #define CGEMM_DEFAULT_P 96
3006 #define ZGEMM_DEFAULT_P 64
3008 #define SGEMM_DEFAULT_Q 240
3009 #define DGEMM_DEFAULT_Q 120
3010 #define CGEMM_DEFAULT_Q 120
3011 #define ZGEMM_DEFAULT_Q 120
3013 #define SGEMM_DEFAULT_R 12288
3014 #define DGEMM_DEFAULT_R 8192
3015 #define CGEMM_DEFAULT_R 4096
3016 #define ZGEMM_DEFAULT_R 4096
3018 #elif defined(THUNDERX2T99)
3020 #define SGEMM_DEFAULT_UNROLL_M 16
3021 #define SGEMM_DEFAULT_UNROLL_N 4
3023 #define DGEMM_DEFAULT_UNROLL_M 8
3024 #define DGEMM_DEFAULT_UNROLL_N 4
3026 #define CGEMM_DEFAULT_UNROLL_M 8
3027 #define CGEMM_DEFAULT_UNROLL_N 4
3029 #define ZGEMM_DEFAULT_UNROLL_M 4
3030 #define ZGEMM_DEFAULT_UNROLL_N 4
3032 #define SGEMM_DEFAULT_P 128
3033 #define DGEMM_DEFAULT_P 160
3034 #define CGEMM_DEFAULT_P 128
3035 #define ZGEMM_DEFAULT_P 128
3037 #define SGEMM_DEFAULT_Q 352
3038 #define DGEMM_DEFAULT_Q 128
3039 #define CGEMM_DEFAULT_Q 224
3040 #define ZGEMM_DEFAULT_Q 112
3042 #define SGEMM_DEFAULT_R 4096
3043 #define DGEMM_DEFAULT_R 4096
3044 #define CGEMM_DEFAULT_R 4096
3045 #define ZGEMM_DEFAULT_R 4096
3047 #elif defined(THUNDERX3T110)
3049 #define SGEMM_DEFAULT_UNROLL_M 16
3050 #define SGEMM_DEFAULT_UNROLL_N 4
3052 #define DGEMM_DEFAULT_UNROLL_M 8
3053 #define DGEMM_DEFAULT_UNROLL_N 4
3055 #define CGEMM_DEFAULT_UNROLL_M 8
3056 #define CGEMM_DEFAULT_UNROLL_N 4
3058 #define ZGEMM_DEFAULT_UNROLL_M 4
3059 #define ZGEMM_DEFAULT_UNROLL_N 4
3061 #define SGEMM_DEFAULT_P 128
3062 #define DGEMM_DEFAULT_P 320
3063 #define CGEMM_DEFAULT_P 128
3064 #define ZGEMM_DEFAULT_P 128
3066 #define SGEMM_DEFAULT_Q 352
3067 #define DGEMM_DEFAULT_Q 128
3068 #define CGEMM_DEFAULT_Q 224
3069 #define ZGEMM_DEFAULT_Q 112
3071 #define SGEMM_DEFAULT_R 4096
3072 #define DGEMM_DEFAULT_R 4096
3073 #define CGEMM_DEFAULT_R 4096
3074 #define ZGEMM_DEFAULT_R 4096
3076 #elif defined(NEOVERSEN1)
3078 #define SGEMM_DEFAULT_UNROLL_M 16
3079 #define SGEMM_DEFAULT_UNROLL_N 4
3081 #define DGEMM_DEFAULT_UNROLL_M 8
3082 #define DGEMM_DEFAULT_UNROLL_N 4
3084 #define CGEMM_DEFAULT_UNROLL_M 8
3085 #define CGEMM_DEFAULT_UNROLL_N 4
3087 #define ZGEMM_DEFAULT_UNROLL_M 4
3088 #define ZGEMM_DEFAULT_UNROLL_N 4
3090 #define SGEMM_DEFAULT_P 128
3091 #define DGEMM_DEFAULT_P 160
3092 #define CGEMM_DEFAULT_P 128
3093 #define ZGEMM_DEFAULT_P 128
3095 #define SGEMM_DEFAULT_Q 352
3096 #define DGEMM_DEFAULT_Q 128
3097 #define CGEMM_DEFAULT_Q 224
3098 #define ZGEMM_DEFAULT_Q 112
3100 #define SGEMM_DEFAULT_R 4096
3101 #define DGEMM_DEFAULT_R 4096
3102 #define CGEMM_DEFAULT_R 4096
3103 #define ZGEMM_DEFAULT_R 4096
3105 #else /* Other/undetected ARMv8 cores */
3107 #define SGEMM_DEFAULT_UNROLL_M 16
3108 #define SGEMM_DEFAULT_UNROLL_N 4
3110 #define DGEMM_DEFAULT_UNROLL_M 8
3111 #define DGEMM_DEFAULT_UNROLL_N 4
3113 #define CGEMM_DEFAULT_UNROLL_M 8
3114 #define CGEMM_DEFAULT_UNROLL_N 4
3116 #define ZGEMM_DEFAULT_UNROLL_M 4
3117 #define ZGEMM_DEFAULT_UNROLL_N 4
3119 #define SGEMM_DEFAULT_P 128
3120 #define DGEMM_DEFAULT_P 160
3121 #define CGEMM_DEFAULT_P 128
3122 #define ZGEMM_DEFAULT_P 128
3124 #define SGEMM_DEFAULT_Q 352
3125 #define DGEMM_DEFAULT_Q 128
3126 #define CGEMM_DEFAULT_Q 224
3127 #define ZGEMM_DEFAULT_Q 112
3129 #define SGEMM_DEFAULT_R 4096
3130 #define DGEMM_DEFAULT_R 4096
3131 #define CGEMM_DEFAULT_R 4096
3132 #define ZGEMM_DEFAULT_R 4096
3142 #define GEMM_DEFAULT_OFFSET_A 0
3143 #define GEMM_DEFAULT_OFFSET_B 0
3144 #define GEMM_DEFAULT_ALIGN (BLASLONG)0x03fffUL
3146 #define SGEMM_DEFAULT_UNROLL_M 2
3147 #define SGEMM_DEFAULT_UNROLL_N 2
3149 #define DGEMM_DEFAULT_UNROLL_M 2
3150 #define DGEMM_DEFAULT_UNROLL_N 2
3152 #define CGEMM_DEFAULT_UNROLL_M 2
3153 #define CGEMM_DEFAULT_UNROLL_N 2
3155 #define ZGEMM_DEFAULT_UNROLL_M 2
3156 #define ZGEMM_DEFAULT_UNROLL_N 2
3158 #define SGEMM_DEFAULT_P 128
3159 #define DGEMM_DEFAULT_P 128
3160 #define CGEMM_DEFAULT_P 96
3161 #define ZGEMM_DEFAULT_P 64
3163 #define SGEMM_DEFAULT_Q 240
3164 #define DGEMM_DEFAULT_Q 120
3165 #define CGEMM_DEFAULT_Q 120
3166 #define ZGEMM_DEFAULT_Q 120
3168 #define SGEMM_DEFAULT_R 12288
3169 #define DGEMM_DEFAULT_R 8192
3170 #define CGEMM_DEFAULT_R 4096
3171 #define ZGEMM_DEFAULT_R 4096
3183 #define GEMM_DEFAULT_OFFSET_A 0
3184 #define GEMM_DEFAULT_OFFSET_B 0
3185 #define GEMM_DEFAULT_ALIGN (BLASLONG)0x03fffUL
3187 #define SGEMM_DEFAULT_UNROLL_M 4
3188 #define SGEMM_DEFAULT_UNROLL_N 4
3190 #define DGEMM_DEFAULT_UNROLL_M 4
3191 #define DGEMM_DEFAULT_UNROLL_N 4
3193 #define CGEMM_DEFAULT_UNROLL_M 2
3194 #define CGEMM_DEFAULT_UNROLL_N 2
3196 #define ZGEMM_DEFAULT_UNROLL_M 2
3197 #define ZGEMM_DEFAULT_UNROLL_N 2
3199 #define SGEMM_DEFAULT_P 128
3200 #define DGEMM_DEFAULT_P 128
3201 #define CGEMM_DEFAULT_P 96
3202 #define ZGEMM_DEFAULT_P 64
3204 #define SGEMM_DEFAULT_Q 240
3205 #define DGEMM_DEFAULT_Q 120
3206 #define CGEMM_DEFAULT_Q 120
3207 #define ZGEMM_DEFAULT_Q 120
3209 #define SGEMM_DEFAULT_R 12288
3210 #define DGEMM_DEFAULT_R 8192
3211 #define CGEMM_DEFAULT_R 4096
3212 #define ZGEMM_DEFAULT_R 4096
3224 #define GEMM_DEFAULT_OFFSET_A 0
3225 #define GEMM_DEFAULT_OFFSET_B 0
3226 #define GEMM_DEFAULT_ALIGN (BLASLONG)0x03fffUL
3228 #define SGEMM_DEFAULT_UNROLL_M 4
3229 #define SGEMM_DEFAULT_UNROLL_N 4
3231 #define DGEMM_DEFAULT_UNROLL_M 4
3232 #define DGEMM_DEFAULT_UNROLL_N 4
3234 #define CGEMM_DEFAULT_UNROLL_M 2
3235 #define CGEMM_DEFAULT_UNROLL_N 2
3237 #define ZGEMM_DEFAULT_UNROLL_M 2
3238 #define ZGEMM_DEFAULT_UNROLL_N 2
3240 #define SGEMM_DEFAULT_P 128
3241 #define DGEMM_DEFAULT_P 128
3242 #define CGEMM_DEFAULT_P 96
3243 #define ZGEMM_DEFAULT_P 64
3245 #define SGEMM_DEFAULT_Q 240
3246 #define DGEMM_DEFAULT_Q 120
3247 #define CGEMM_DEFAULT_Q 120
3248 #define ZGEMM_DEFAULT_Q 120
3250 #define SGEMM_DEFAULT_R 12288
3251 #define DGEMM_DEFAULT_R 8192
3252 #define CGEMM_DEFAULT_R 4096
3253 #define ZGEMM_DEFAULT_R 4096
3261 #if defined(ZARCH_GENERIC)
3265 #define GEMM_DEFAULT_OFFSET_A 0
3266 #define GEMM_DEFAULT_OFFSET_B 0
3267 #define GEMM_DEFAULT_ALIGN (BLASLONG)0x03fffUL
3269 #define SGEMM_DEFAULT_UNROLL_M 2
3270 #define SGEMM_DEFAULT_UNROLL_N 2
3272 #define DGEMM_DEFAULT_UNROLL_M 2
3273 #define DGEMM_DEFAULT_UNROLL_N 2
3275 #define CGEMM_DEFAULT_UNROLL_M 2
3276 #define CGEMM_DEFAULT_UNROLL_N 2
3278 #define ZGEMM_DEFAULT_UNROLL_M 2
3279 #define ZGEMM_DEFAULT_UNROLL_N 2
3281 #define SGEMM_DEFAULT_P 128
3282 #define DGEMM_DEFAULT_P 128
3283 #define CGEMM_DEFAULT_P 96
3284 #define ZGEMM_DEFAULT_P 64
3286 #define SGEMM_DEFAULT_Q 240
3287 #define DGEMM_DEFAULT_Q 120
3288 #define CGEMM_DEFAULT_Q 120
3289 #define ZGEMM_DEFAULT_Q 120
3291 #define SGEMM_DEFAULT_R 12288
3292 #define DGEMM_DEFAULT_R 8192
3293 #define CGEMM_DEFAULT_R 4096
3294 #define ZGEMM_DEFAULT_R 4096
3304 #define GEMM_DEFAULT_OFFSET_A 0
3305 #define GEMM_DEFAULT_OFFSET_B 0
3306 #define GEMM_DEFAULT_ALIGN (BLASLONG)0x03fffUL
3308 #define SGEMM_DEFAULT_UNROLL_M 8
3309 #define SGEMM_DEFAULT_UNROLL_N 4
3311 #define DGEMM_DEFAULT_UNROLL_M 8
3312 #define DGEMM_DEFAULT_UNROLL_N 4
3314 #define CGEMM_DEFAULT_UNROLL_M 4
3315 #define CGEMM_DEFAULT_UNROLL_N 4
3317 #define ZGEMM_DEFAULT_UNROLL_M 4
3318 #define ZGEMM_DEFAULT_UNROLL_N 4
3320 #define SGEMM_DEFAULT_P 456
3321 #define DGEMM_DEFAULT_P 320
3322 #define CGEMM_DEFAULT_P 480
3323 #define ZGEMM_DEFAULT_P 224
3325 #define SGEMM_DEFAULT_Q 488
3326 #define DGEMM_DEFAULT_Q 384
3327 #define CGEMM_DEFAULT_Q 128
3328 #define ZGEMM_DEFAULT_Q 352
3330 #define SGEMM_DEFAULT_R 8192
3331 #define DGEMM_DEFAULT_R 4096
3332 #define CGEMM_DEFAULT_R 4096
3333 #define ZGEMM_DEFAULT_R 2048
3344 #define GEMM_DEFAULT_OFFSET_A 0
3345 #define GEMM_DEFAULT_OFFSET_B 0
3346 #define GEMM_DEFAULT_ALIGN 0x03fffUL
3348 #define SGEMM_DEFAULT_UNROLL_M 16
3349 #define SGEMM_DEFAULT_UNROLL_N 4
3351 #define DGEMM_DEFAULT_UNROLL_M 8
3352 #define DGEMM_DEFAULT_UNROLL_N 4
3354 #define CGEMM_DEFAULT_UNROLL_M 4
3355 #define CGEMM_DEFAULT_UNROLL_N 4
3357 #define ZGEMM_DEFAULT_UNROLL_M 4
3358 #define ZGEMM_DEFAULT_UNROLL_N 4
3360 #define SGEMM_DEFAULT_P 480
3361 #define DGEMM_DEFAULT_P 320
3362 #define CGEMM_DEFAULT_P 480
3363 #define ZGEMM_DEFAULT_P 224
3365 #define SGEMM_DEFAULT_Q 512
3366 #define DGEMM_DEFAULT_Q 384
3367 #define CGEMM_DEFAULT_Q 128
3368 #define ZGEMM_DEFAULT_Q 352
3370 #define SGEMM_DEFAULT_R 8192
3371 #define DGEMM_DEFAULT_R 4096
3372 #define CGEMM_DEFAULT_R 4096
3373 #define ZGEMM_DEFAULT_R 2048
3386 #define GEMM_DEFAULT_OFFSET_A 0
3387 #define GEMM_DEFAULT_OFFSET_B 0
3388 #define GEMM_DEFAULT_ALIGN (BLASLONG)0x0ffffUL
3390 #define SGEMM_DEFAULT_UNROLL_N 2
3391 #define DGEMM_DEFAULT_UNROLL_N 2
3392 #define QGEMM_DEFAULT_UNROLL_N 2
3393 #define CGEMM_DEFAULT_UNROLL_N 2
3394 #define ZGEMM_DEFAULT_UNROLL_N 2
3395 #define XGEMM_DEFAULT_UNROLL_N 1
3398 #define SGEMM_DEFAULT_UNROLL_M 2
3399 #define DGEMM_DEFAULT_UNROLL_M 2
3400 #define QGEMM_DEFAULT_UNROLL_M 2
3401 #define CGEMM_DEFAULT_UNROLL_M 2
3402 #define ZGEMM_DEFAULT_UNROLL_M 2
3403 #define XGEMM_DEFAULT_UNROLL_M 1
3405 #define SGEMM_DEFAULT_UNROLL_M 2
3406 #define DGEMM_DEFAULT_UNROLL_M 2
3407 #define QGEMM_DEFAULT_UNROLL_M 2
3408 #define CGEMM_DEFAULT_UNROLL_M 2
3409 #define ZGEMM_DEFAULT_UNROLL_M 2
3410 #define XGEMM_DEFAULT_UNROLL_M 1
3413 #define SGEMM_DEFAULT_P sgemm_p
3414 #define DGEMM_DEFAULT_P dgemm_p
3415 #define QGEMM_DEFAULT_P qgemm_p
3416 #define CGEMM_DEFAULT_P cgemm_p
3417 #define ZGEMM_DEFAULT_P zgemm_p
3418 #define XGEMM_DEFAULT_P xgemm_p
3420 #define SGEMM_DEFAULT_R sgemm_r
3421 #define DGEMM_DEFAULT_R dgemm_r
3422 #define QGEMM_DEFAULT_R qgemm_r
3423 #define CGEMM_DEFAULT_R cgemm_r
3424 #define ZGEMM_DEFAULT_R zgemm_r
3425 #define XGEMM_DEFAULT_R xgemm_r
3427 #define SGEMM_DEFAULT_Q 128
3428 #define DGEMM_DEFAULT_Q 128
3429 #define QGEMM_DEFAULT_Q 128
3430 #define CGEMM_DEFAULT_Q 128
3431 #define ZGEMM_DEFAULT_Q 128
3432 #define XGEMM_DEFAULT_Q 128
3438 #ifndef QGEMM_DEFAULT_UNROLL_M
3439 #define QGEMM_DEFAULT_UNROLL_M 2
3442 #ifndef QGEMM_DEFAULT_UNROLL_N
3443 #define QGEMM_DEFAULT_UNROLL_N 2
3446 #ifndef XGEMM_DEFAULT_UNROLL_M
3447 #define XGEMM_DEFAULT_UNROLL_M 2
3450 #ifndef XGEMM_DEFAULT_UNROLL_N
3451 #define XGEMM_DEFAULT_UNROLL_N 2
3455 #define SHUFPD_0 shufps $0x44,
3456 #define SHUFPD_1 shufps $0x4e,
3457 #define SHUFPD_2 shufps $0xe4,
3458 #define SHUFPD_3 shufps $0xee,
3462 #define SHUFPD_0 shufpd $0,
3466 #define SHUFPD_1 shufpd $1,
3470 #define SHUFPD_2 shufpd $2,
3474 #define SHUFPD_3 shufpd $3,
3478 #define SHUFPS_39 shufps $0x39,