1 /*****************************************************************************
2 Copyright (c) 2011,2012 Lab of Parallel Software and Computational Science,ISCAS
5 Redistribution and use in source and binary forms, with or without
6 modification, are permitted provided that the following conditions are
9 1. Redistributions of source code must retain the above copyright
10 notice, this list of conditions and the following disclaimer.
12 2. Redistributions in binary form must reproduce the above copyright
13 notice, this list of conditions and the following disclaimer in
14 the documentation and/or other materials provided with the
16 3. Neither the name of the ISCAS nor the names of its contributors may
17 be used to endorse or promote products derived from this software
18 without specific prior written permission.
20 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
29 USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 **********************************************************************************/
33 /*********************************************************************/
34 /* Copyright 2009, 2010 The University of Texas at Austin. */
35 /* All rights reserved. */
37 /* Redistribution and use in source and binary forms, with or */
38 /* without modification, are permitted provided that the following */
39 /* conditions are met: */
41 /* 1. Redistributions of source code must retain the above */
42 /* copyright notice, this list of conditions and the following */
45 /* 2. Redistributions in binary form must reproduce the above */
46 /* copyright notice, this list of conditions and the following */
47 /* disclaimer in the documentation and/or other materials */
48 /* provided with the distribution. */
50 /* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
51 /* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
52 /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
53 /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
54 /* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
55 /* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
56 /* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
57 /* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
58 /* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
59 /* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
60 /* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
61 /* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
62 /* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
63 /* POSSIBILITY OF SUCH DAMAGE. */
65 /* The views and conclusions contained in the software and */
66 /* documentation are those of the authors and should not be */
67 /* interpreted as representing official policies, either expressed */
68 /* or implied, of The University of Texas at Austin. */
69 /*********************************************************************/
79 #define GEMM_DEFAULT_OFFSET_A 64
80 #define GEMM_DEFAULT_OFFSET_B 256
81 #define GEMM_DEFAULT_ALIGN 0x01ffffUL
83 #define SGEMM_DEFAULT_UNROLL_N 4
84 #define DGEMM_DEFAULT_UNROLL_N 4
85 #define QGEMM_DEFAULT_UNROLL_N 2
86 #define CGEMM_DEFAULT_UNROLL_N 2
87 #define ZGEMM_DEFAULT_UNROLL_N 2
88 #define XGEMM_DEFAULT_UNROLL_N 1
91 #define SGEMM_DEFAULT_UNROLL_M 4
92 #define DGEMM_DEFAULT_UNROLL_M 2
93 #define QGEMM_DEFAULT_UNROLL_M 2
94 #define CGEMM_DEFAULT_UNROLL_M 2
95 #define ZGEMM_DEFAULT_UNROLL_M 1
96 #define XGEMM_DEFAULT_UNROLL_M 1
98 #define SGEMM_DEFAULT_UNROLL_M 8
99 #define DGEMM_DEFAULT_UNROLL_M 4
100 #define QGEMM_DEFAULT_UNROLL_M 2
101 #define CGEMM_DEFAULT_UNROLL_M 4
102 #define ZGEMM_DEFAULT_UNROLL_M 2
103 #define XGEMM_DEFAULT_UNROLL_M 1
106 #define SGEMM_DEFAULT_P sgemm_p
107 #define DGEMM_DEFAULT_P dgemm_p
108 #define QGEMM_DEFAULT_P qgemm_p
109 #define CGEMM_DEFAULT_P cgemm_p
110 #define ZGEMM_DEFAULT_P zgemm_p
111 #define XGEMM_DEFAULT_P xgemm_p
113 #define SGEMM_DEFAULT_R sgemm_r
114 #define DGEMM_DEFAULT_R dgemm_r
115 #define QGEMM_DEFAULT_R qgemm_r
116 #define CGEMM_DEFAULT_R cgemm_r
117 #define ZGEMM_DEFAULT_R zgemm_r
118 #define XGEMM_DEFAULT_R xgemm_r
122 #define SGEMM_DEFAULT_Q 248
123 #define DGEMM_DEFAULT_Q 248
124 #define QGEMM_DEFAULT_Q 248
125 #define CGEMM_DEFAULT_Q 248
126 #define ZGEMM_DEFAULT_Q 248
127 #define XGEMM_DEFAULT_Q 248
131 #define SGEMM_DEFAULT_Q 240
132 #define DGEMM_DEFAULT_Q 240
133 #define QGEMM_DEFAULT_Q 240
134 #define CGEMM_DEFAULT_Q 240
135 #define ZGEMM_DEFAULT_Q 240
136 #define XGEMM_DEFAULT_Q 240
142 #define HAVE_EXCLUSIVE_CACHE
146 #if defined(BARCELONA) || defined(SHANGHAI) || defined(BOBCAT)
151 #define GEMM_DEFAULT_OFFSET_A 64
152 #define GEMM_DEFAULT_OFFSET_B 832
153 #define GEMM_DEFAULT_ALIGN 0x0fffUL
155 #define SGEMM_DEFAULT_UNROLL_N 4
156 #define DGEMM_DEFAULT_UNROLL_N 4
157 #define QGEMM_DEFAULT_UNROLL_N 2
158 #define CGEMM_DEFAULT_UNROLL_N 2
159 #define ZGEMM_DEFAULT_UNROLL_N 2
160 #define XGEMM_DEFAULT_UNROLL_N 1
163 #define SGEMM_DEFAULT_UNROLL_M 4
164 #define DGEMM_DEFAULT_UNROLL_M 2
165 #define QGEMM_DEFAULT_UNROLL_M 2
166 #define CGEMM_DEFAULT_UNROLL_M 2
167 #define ZGEMM_DEFAULT_UNROLL_M 1
168 #define XGEMM_DEFAULT_UNROLL_M 1
170 #define SGEMM_DEFAULT_UNROLL_M 8
171 #define DGEMM_DEFAULT_UNROLL_M 4
172 #define QGEMM_DEFAULT_UNROLL_M 2
173 #define CGEMM_DEFAULT_UNROLL_M 4
174 #define ZGEMM_DEFAULT_UNROLL_M 2
175 #define XGEMM_DEFAULT_UNROLL_M 1
179 #define SGEMM_DEFAULT_P 496
180 #define DGEMM_DEFAULT_P 248
181 #define QGEMM_DEFAULT_P 124
182 #define CGEMM_DEFAULT_P 248
183 #define ZGEMM_DEFAULT_P 124
184 #define XGEMM_DEFAULT_P 62
186 #define SGEMM_DEFAULT_Q 248
187 #define DGEMM_DEFAULT_Q 248
188 #define QGEMM_DEFAULT_Q 248
189 #define CGEMM_DEFAULT_Q 248
190 #define ZGEMM_DEFAULT_Q 248
191 #define XGEMM_DEFAULT_Q 248
195 #define SGEMM_DEFAULT_P 448
196 #define DGEMM_DEFAULT_P 224
197 #define QGEMM_DEFAULT_P 112
198 #define CGEMM_DEFAULT_P 224
199 #define ZGEMM_DEFAULT_P 112
200 #define XGEMM_DEFAULT_P 56
202 #define SGEMM_DEFAULT_Q 224
203 #define DGEMM_DEFAULT_Q 224
204 #define QGEMM_DEFAULT_Q 224
205 #define CGEMM_DEFAULT_Q 224
206 #define ZGEMM_DEFAULT_Q 224
207 #define XGEMM_DEFAULT_Q 224
211 #define SGEMM_DEFAULT_R sgemm_r
212 #define QGEMM_DEFAULT_R qgemm_r
213 #define DGEMM_DEFAULT_R dgemm_r
214 #define CGEMM_DEFAULT_R cgemm_r
215 #define ZGEMM_DEFAULT_R zgemm_r
216 #define XGEMM_DEFAULT_R xgemm_r
219 #define HAVE_EXCLUSIVE_CACHE
221 #define GEMM_THREAD gemm_thread_mn
231 #define GEMM_DEFAULT_OFFSET_A 64
232 #define GEMM_DEFAULT_OFFSET_B 832
233 #define GEMM_DEFAULT_ALIGN 0x0fffUL
237 #define QGEMM_DEFAULT_UNROLL_N 2
238 #define CGEMM_DEFAULT_UNROLL_N 2
239 #define ZGEMM_DEFAULT_UNROLL_N 2
240 #define XGEMM_DEFAULT_UNROLL_N 1
243 #define SGEMM_DEFAULT_UNROLL_N 4
244 #define DGEMM_DEFAULT_UNROLL_N 4
245 #define SGEMM_DEFAULT_UNROLL_M 4
246 #define DGEMM_DEFAULT_UNROLL_M 2
247 #define QGEMM_DEFAULT_UNROLL_M 2
248 #define CGEMM_DEFAULT_UNROLL_M 2
249 #define ZGEMM_DEFAULT_UNROLL_M 1
250 #define XGEMM_DEFAULT_UNROLL_M 1
252 #define SGEMM_DEFAULT_UNROLL_N 2
253 #define DGEMM_DEFAULT_UNROLL_N 2
254 #define SGEMM_DEFAULT_UNROLL_M 16
255 #define DGEMM_DEFAULT_UNROLL_M 8
256 #define QGEMM_DEFAULT_UNROLL_M 2
257 #define CGEMM_DEFAULT_UNROLL_M 4
258 #define ZGEMM_DEFAULT_UNROLL_M 2
259 #define XGEMM_DEFAULT_UNROLL_M 1
260 #define CGEMM3M_DEFAULT_UNROLL_N 4
261 #define CGEMM3M_DEFAULT_UNROLL_M 8
262 #define ZGEMM3M_DEFAULT_UNROLL_N 4
263 #define ZGEMM3M_DEFAULT_UNROLL_M 4
264 #define GEMV_UNROLL 8
268 #if defined(ARCH_X86_64)
269 #define SGEMM_DEFAULT_P 768
270 #define DGEMM_DEFAULT_P 384
272 #define SGEMM_DEFAULT_P 448
273 #define DGEMM_DEFAULT_P 224
275 #define QGEMM_DEFAULT_P 112
276 #define CGEMM_DEFAULT_P 224
277 #define ZGEMM_DEFAULT_P 112
278 #define XGEMM_DEFAULT_P 56
280 #if defined(ARCH_X86_64)
281 #define SGEMM_DEFAULT_Q 168
282 #define DGEMM_DEFAULT_Q 168
284 #define SGEMM_DEFAULT_Q 224
285 #define DGEMM_DEFAULT_Q 224
287 #define QGEMM_DEFAULT_Q 224
288 #define CGEMM_DEFAULT_Q 224
289 #define ZGEMM_DEFAULT_Q 224
290 #define XGEMM_DEFAULT_Q 224
292 #define SGEMM_DEFAULT_R sgemm_r
293 #define QGEMM_DEFAULT_R qgemm_r
294 #define DGEMM_DEFAULT_R dgemm_r
295 #define CGEMM_DEFAULT_R cgemm_r
296 #define ZGEMM_DEFAULT_R zgemm_r
297 #define XGEMM_DEFAULT_R xgemm_r
300 #define HAVE_EXCLUSIVE_CACHE
302 #define GEMM_THREAD gemm_thread_mn
311 #define GEMM_DEFAULT_OFFSET_A 64
312 #define GEMM_DEFAULT_OFFSET_B 832
313 #define GEMM_DEFAULT_ALIGN 0x0fffUL
317 #define QGEMM_DEFAULT_UNROLL_N 2
318 #define CGEMM_DEFAULT_UNROLL_N 2
319 #define ZGEMM_DEFAULT_UNROLL_N 2
320 #define XGEMM_DEFAULT_UNROLL_N 1
323 #define SGEMM_DEFAULT_UNROLL_N 4
324 #define DGEMM_DEFAULT_UNROLL_N 4
325 #define SGEMM_DEFAULT_UNROLL_M 4
326 #define DGEMM_DEFAULT_UNROLL_M 2
327 #define QGEMM_DEFAULT_UNROLL_M 2
328 #define CGEMM_DEFAULT_UNROLL_M 2
329 #define ZGEMM_DEFAULT_UNROLL_M 1
330 #define XGEMM_DEFAULT_UNROLL_M 1
332 #define SGEMM_DEFAULT_UNROLL_N 2
333 #define DGEMM_DEFAULT_UNROLL_N 2
334 #define SGEMM_DEFAULT_UNROLL_M 16
335 #define DGEMM_DEFAULT_UNROLL_M 8
336 #define QGEMM_DEFAULT_UNROLL_M 2
337 #define CGEMM_DEFAULT_UNROLL_M 4
338 #define ZGEMM_DEFAULT_UNROLL_M 2
339 #define XGEMM_DEFAULT_UNROLL_M 1
340 #define CGEMM3M_DEFAULT_UNROLL_N 4
341 #define CGEMM3M_DEFAULT_UNROLL_M 8
342 #define ZGEMM3M_DEFAULT_UNROLL_N 4
343 #define ZGEMM3M_DEFAULT_UNROLL_M 4
344 #define GEMV_UNROLL 8
348 #if defined(ARCH_X86_64)
349 #define SGEMM_DEFAULT_P 768
350 #define DGEMM_DEFAULT_P 384
352 #define SGEMM_DEFAULT_P 448
353 #define DGEMM_DEFAULT_P 224
355 #define QGEMM_DEFAULT_P 112
356 #define CGEMM_DEFAULT_P 224
357 #define ZGEMM_DEFAULT_P 112
358 #define XGEMM_DEFAULT_P 56
360 #if defined(ARCH_X86_64)
361 #define SGEMM_DEFAULT_Q 168
362 #define DGEMM_DEFAULT_Q 168
364 #define SGEMM_DEFAULT_Q 224
365 #define DGEMM_DEFAULT_Q 224
367 #define QGEMM_DEFAULT_Q 224
368 #define CGEMM_DEFAULT_Q 224
369 #define ZGEMM_DEFAULT_Q 224
370 #define XGEMM_DEFAULT_Q 224
372 #define SGEMM_DEFAULT_R sgemm_r
373 #define QGEMM_DEFAULT_R qgemm_r
374 #define DGEMM_DEFAULT_R dgemm_r
375 #define CGEMM_DEFAULT_R cgemm_r
376 #define ZGEMM_DEFAULT_R zgemm_r
377 #define XGEMM_DEFAULT_R xgemm_r
380 #define HAVE_EXCLUSIVE_CACHE
382 #define GEMM_THREAD gemm_thread_mn
391 #define GEMM_DEFAULT_OFFSET_A 0
392 #define GEMM_DEFAULT_OFFSET_B 384
393 #define GEMM_DEFAULT_ALIGN 0x0ffffUL
395 #define SGEMM_DEFAULT_UNROLL_N 4
396 #define DGEMM_DEFAULT_UNROLL_N 4
397 #define QGEMM_DEFAULT_UNROLL_N 2
398 #define CGEMM_DEFAULT_UNROLL_N 2
399 #define ZGEMM_DEFAULT_UNROLL_N 2
400 #define XGEMM_DEFAULT_UNROLL_N 1
402 #define SGEMM_DEFAULT_UNROLL_M 2
403 #define DGEMM_DEFAULT_UNROLL_M 1
404 #define QGEMM_DEFAULT_UNROLL_M 2
405 #define CGEMM_DEFAULT_UNROLL_M 1
406 #define ZGEMM_DEFAULT_UNROLL_M 1
407 #define XGEMM_DEFAULT_UNROLL_M 1
409 #define SGEMM_DEFAULT_R sgemm_r
410 #define DGEMM_DEFAULT_R dgemm_r
411 #define QGEMM_DEFAULT_R qgemm_r
412 #define CGEMM_DEFAULT_R cgemm_r
413 #define ZGEMM_DEFAULT_R zgemm_r
414 #define XGEMM_DEFAULT_R xgemm_r
416 #define SGEMM_DEFAULT_P 208
417 #define DGEMM_DEFAULT_P 104
418 #define QGEMM_DEFAULT_P 56
419 #define CGEMM_DEFAULT_P 104
420 #define ZGEMM_DEFAULT_P 56
421 #define XGEMM_DEFAULT_P 28
423 #define SGEMM_DEFAULT_Q 208
424 #define DGEMM_DEFAULT_Q 208
425 #define QGEMM_DEFAULT_Q 208
426 #define CGEMM_DEFAULT_Q 208
427 #define ZGEMM_DEFAULT_Q 208
428 #define XGEMM_DEFAULT_Q 208
431 #define HAVE_EXCLUSIVE_CACHE
439 #define GEMM_DEFAULT_OFFSET_A 0
440 #define GEMM_DEFAULT_OFFSET_B 256
441 #define GEMM_DEFAULT_ALIGN 0x0ffffUL
443 #define SGEMM_DEFAULT_UNROLL_N 4
444 #define DGEMM_DEFAULT_UNROLL_N 4
445 #define QGEMM_DEFAULT_UNROLL_N 2
446 #define CGEMM_DEFAULT_UNROLL_N 2
447 #define ZGEMM_DEFAULT_UNROLL_N 2
448 #define XGEMM_DEFAULT_UNROLL_N 1
450 #define SGEMM_DEFAULT_UNROLL_M 2
451 #define DGEMM_DEFAULT_UNROLL_M 1
452 #define QGEMM_DEFAULT_UNROLL_M 2
453 #define CGEMM_DEFAULT_UNROLL_M 1
454 #define ZGEMM_DEFAULT_UNROLL_M 1
455 #define XGEMM_DEFAULT_UNROLL_M 1
457 #define SGEMM_DEFAULT_R sgemm_r
458 #define DGEMM_DEFAULT_R dgemm_r
459 #define QGEMM_DEFAULT_R qgemm_r
460 #define CGEMM_DEFAULT_R cgemm_r
461 #define ZGEMM_DEFAULT_R zgemm_r
462 #define XGEMM_DEFAULT_R xgemm_r
464 #define SGEMM_DEFAULT_P 128
465 #define DGEMM_DEFAULT_P 128
466 #define QGEMM_DEFAULT_P 128
467 #define CGEMM_DEFAULT_P 128
468 #define ZGEMM_DEFAULT_P 128
469 #define XGEMM_DEFAULT_P 128
471 #define SGEMM_DEFAULT_Q 512
472 #define DGEMM_DEFAULT_Q 256
473 #define QGEMM_DEFAULT_Q 256
474 #define CGEMM_DEFAULT_Q 256
475 #define ZGEMM_DEFAULT_Q 128
476 #define XGEMM_DEFAULT_Q 128
486 #define GEMM_DEFAULT_OFFSET_A 64
487 #define GEMM_DEFAULT_OFFSET_B 256
488 #define GEMM_DEFAULT_ALIGN 0x01ffffUL
491 #define SGEMM_DEFAULT_UNROLL_N 4
492 #define DGEMM_DEFAULT_UNROLL_N 4
493 #define QGEMM_DEFAULT_UNROLL_N 2
494 #define CGEMM_DEFAULT_UNROLL_N 2
495 #define ZGEMM_DEFAULT_UNROLL_N 2
496 #define XGEMM_DEFAULT_UNROLL_N 1
498 #define SGEMM_DEFAULT_UNROLL_M 4
499 #define DGEMM_DEFAULT_UNROLL_M 2
500 #define QGEMM_DEFAULT_UNROLL_M 2
501 #define CGEMM_DEFAULT_UNROLL_M 2
502 #define ZGEMM_DEFAULT_UNROLL_M 1
503 #define XGEMM_DEFAULT_UNROLL_M 1
505 #define SGEMM_DEFAULT_UNROLL_N 8
506 #define DGEMM_DEFAULT_UNROLL_N 4
507 #define QGEMM_DEFAULT_UNROLL_N 2
508 #define CGEMM_DEFAULT_UNROLL_N 4
509 #define ZGEMM_DEFAULT_UNROLL_N 2
510 #define XGEMM_DEFAULT_UNROLL_N 1
512 #define SGEMM_DEFAULT_UNROLL_M 4
513 #define DGEMM_DEFAULT_UNROLL_M 4
514 #define QGEMM_DEFAULT_UNROLL_M 2
515 #define CGEMM_DEFAULT_UNROLL_M 2
516 #define ZGEMM_DEFAULT_UNROLL_M 2
517 #define XGEMM_DEFAULT_UNROLL_M 1
520 #define SGEMM_DEFAULT_P 288
521 #define DGEMM_DEFAULT_P 288
522 #define QGEMM_DEFAULT_P 288
523 #define CGEMM_DEFAULT_P 288
524 #define ZGEMM_DEFAULT_P 288
525 #define XGEMM_DEFAULT_P 288
527 #define SGEMM_DEFAULT_R sgemm_r
528 #define DGEMM_DEFAULT_R dgemm_r
529 #define QGEMM_DEFAULT_R qgemm_r
530 #define CGEMM_DEFAULT_R cgemm_r
531 #define ZGEMM_DEFAULT_R zgemm_r
532 #define XGEMM_DEFAULT_R xgemm_r
534 #define SGEMM_DEFAULT_Q 256
535 #define DGEMM_DEFAULT_Q 128
536 #define QGEMM_DEFAULT_Q 64
537 #define CGEMM_DEFAULT_Q 128
538 #define ZGEMM_DEFAULT_Q 64
539 #define XGEMM_DEFAULT_Q 32
542 #define HAVE_EXCLUSIVE_CACHE
546 #if defined(PENTIUM) || defined(PENTIUM2) || defined(PENTIUM3)
555 #define GEMM_DEFAULT_OFFSET_A 0
556 #define GEMM_DEFAULT_OFFSET_B 0
557 #define GEMM_DEFAULT_ALIGN 0x0ffffUL
560 #define SGEMM_DEFAULT_UNROLL_M 8
561 #define CGEMM_DEFAULT_UNROLL_M 4
563 #define SGEMM_DEFAULT_UNROLL_M 4
564 #define CGEMM_DEFAULT_UNROLL_M 2
566 #define DGEMM_DEFAULT_UNROLL_M 2
567 #define SGEMM_DEFAULT_UNROLL_N 2
568 #define DGEMM_DEFAULT_UNROLL_N 2
569 #define QGEMM_DEFAULT_UNROLL_M 2
570 #define QGEMM_DEFAULT_UNROLL_N 2
571 #define CGEMM_DEFAULT_UNROLL_N 1
572 #define ZGEMM_DEFAULT_UNROLL_M 1
573 #define ZGEMM_DEFAULT_UNROLL_N 1
574 #define XGEMM_DEFAULT_UNROLL_M 1
575 #define XGEMM_DEFAULT_UNROLL_N 1
577 #define SGEMM_DEFAULT_P sgemm_p
578 #define SGEMM_DEFAULT_Q 256
579 #define SGEMM_DEFAULT_R sgemm_r
581 #define DGEMM_DEFAULT_P dgemm_p
582 #define DGEMM_DEFAULT_Q 256
583 #define DGEMM_DEFAULT_R dgemm_r
585 #define QGEMM_DEFAULT_P qgemm_p
586 #define QGEMM_DEFAULT_Q 256
587 #define QGEMM_DEFAULT_R qgemm_r
589 #define CGEMM_DEFAULT_P cgemm_p
590 #define CGEMM_DEFAULT_Q 256
591 #define CGEMM_DEFAULT_R cgemm_r
593 #define ZGEMM_DEFAULT_P zgemm_p
594 #define ZGEMM_DEFAULT_Q 256
595 #define ZGEMM_DEFAULT_R zgemm_r
597 #define XGEMM_DEFAULT_P xgemm_p
598 #define XGEMM_DEFAULT_Q 256
599 #define XGEMM_DEFAULT_R xgemm_r
610 #define GEMM_DEFAULT_OFFSET_A 0
611 #define GEMM_DEFAULT_OFFSET_B 0
612 #define GEMM_DEFAULT_ALIGN 0x0ffffUL
615 #define SGEMM_DEFAULT_UNROLL_M 4
616 #define SGEMM_DEFAULT_UNROLL_N 4
617 #define DGEMM_DEFAULT_UNROLL_M 2
618 #define DGEMM_DEFAULT_UNROLL_N 4
619 #define QGEMM_DEFAULT_UNROLL_M 2
620 #define QGEMM_DEFAULT_UNROLL_N 2
621 #define CGEMM_DEFAULT_UNROLL_M 2
622 #define CGEMM_DEFAULT_UNROLL_N 2
623 #define ZGEMM_DEFAULT_UNROLL_M 1
624 #define ZGEMM_DEFAULT_UNROLL_N 2
625 #define XGEMM_DEFAULT_UNROLL_M 1
626 #define XGEMM_DEFAULT_UNROLL_N 1
628 #define SGEMM_DEFAULT_UNROLL_M 8
629 #define SGEMM_DEFAULT_UNROLL_N 2
630 #define DGEMM_DEFAULT_UNROLL_M 2
631 #define DGEMM_DEFAULT_UNROLL_N 2
632 #define QGEMM_DEFAULT_UNROLL_M 2
633 #define QGEMM_DEFAULT_UNROLL_N 2
634 #define CGEMM_DEFAULT_UNROLL_M 4
635 #define CGEMM_DEFAULT_UNROLL_N 1
636 #define ZGEMM_DEFAULT_UNROLL_M 1
637 #define ZGEMM_DEFAULT_UNROLL_N 1
638 #define XGEMM_DEFAULT_UNROLL_M 1
639 #define XGEMM_DEFAULT_UNROLL_N 1
643 #define SGEMM_DEFAULT_P sgemm_p
644 #define SGEMM_DEFAULT_Q 256
645 #define SGEMM_DEFAULT_R sgemm_r
647 #define DGEMM_DEFAULT_P dgemm_p
648 #define DGEMM_DEFAULT_Q 256
649 #define DGEMM_DEFAULT_R dgemm_r
651 #define QGEMM_DEFAULT_P qgemm_p
652 #define QGEMM_DEFAULT_Q 256
653 #define QGEMM_DEFAULT_R qgemm_r
655 #define CGEMM_DEFAULT_P cgemm_p
656 #define CGEMM_DEFAULT_Q 256
657 #define CGEMM_DEFAULT_R cgemm_r
659 #define ZGEMM_DEFAULT_P zgemm_p
660 #define ZGEMM_DEFAULT_Q 256
661 #define ZGEMM_DEFAULT_R zgemm_r
663 #define XGEMM_DEFAULT_P xgemm_p
664 #define XGEMM_DEFAULT_Q 256
665 #define XGEMM_DEFAULT_R xgemm_r
670 #ifdef CORE_NORTHWOOD
675 #define GEMM_DEFAULT_OFFSET_A 0
676 #define GEMM_DEFAULT_OFFSET_B 32
678 #define GEMM_DEFAULT_ALIGN 0x0ffffUL
682 #define SGEMM_DEFAULT_UNROLL_M 8
683 #define DGEMM_DEFAULT_UNROLL_M 4
684 #define QGEMM_DEFAULT_UNROLL_M 2
685 #define CGEMM_DEFAULT_UNROLL_M 4
686 #define ZGEMM_DEFAULT_UNROLL_M 2
687 #define XGEMM_DEFAULT_UNROLL_M 1
689 #define SGEMM_DEFAULT_UNROLL_N 2
690 #define DGEMM_DEFAULT_UNROLL_N 2
691 #define QGEMM_DEFAULT_UNROLL_N 2
692 #define CGEMM_DEFAULT_UNROLL_N 1
693 #define ZGEMM_DEFAULT_UNROLL_N 1
694 #define XGEMM_DEFAULT_UNROLL_N 1
696 #define SGEMM_DEFAULT_P sgemm_p
697 #define SGEMM_DEFAULT_R sgemm_r
699 #define DGEMM_DEFAULT_P dgemm_p
700 #define DGEMM_DEFAULT_R dgemm_r
702 #define QGEMM_DEFAULT_P qgemm_p
703 #define QGEMM_DEFAULT_R qgemm_r
705 #define CGEMM_DEFAULT_P cgemm_p
706 #define CGEMM_DEFAULT_R cgemm_r
708 #define ZGEMM_DEFAULT_P zgemm_p
709 #define ZGEMM_DEFAULT_R zgemm_r
711 #define XGEMM_DEFAULT_P xgemm_p
712 #define XGEMM_DEFAULT_R xgemm_r
714 #define SGEMM_DEFAULT_Q 128
715 #define DGEMM_DEFAULT_Q 128
716 #define QGEMM_DEFAULT_Q 128
717 #define CGEMM_DEFAULT_Q 128
718 #define ZGEMM_DEFAULT_Q 128
719 #define XGEMM_DEFAULT_Q 128
728 #define GEMM_DEFAULT_OFFSET_A 128
729 #define GEMM_DEFAULT_OFFSET_B 192
731 #define GEMM_DEFAULT_OFFSET_A 0
732 #define GEMM_DEFAULT_OFFSET_B 256
735 #define GEMM_DEFAULT_ALIGN 0x0ffffUL
740 #define SGEMM_DEFAULT_UNROLL_M 4
741 #define DGEMM_DEFAULT_UNROLL_M 2
742 #define QGEMM_DEFAULT_UNROLL_M 2
743 #define CGEMM_DEFAULT_UNROLL_M 2
744 #define ZGEMM_DEFAULT_UNROLL_M 1
745 #define XGEMM_DEFAULT_UNROLL_M 1
747 #define SGEMM_DEFAULT_UNROLL_M 8
748 #define DGEMM_DEFAULT_UNROLL_M 4
749 #define QGEMM_DEFAULT_UNROLL_M 2
750 #define CGEMM_DEFAULT_UNROLL_M 4
751 #define ZGEMM_DEFAULT_UNROLL_M 2
752 #define XGEMM_DEFAULT_UNROLL_M 1
755 #define SGEMM_DEFAULT_UNROLL_N 4
756 #define DGEMM_DEFAULT_UNROLL_N 4
757 #define QGEMM_DEFAULT_UNROLL_N 2
758 #define CGEMM_DEFAULT_UNROLL_N 2
759 #define ZGEMM_DEFAULT_UNROLL_N 2
760 #define XGEMM_DEFAULT_UNROLL_N 1
762 #define SGEMM_DEFAULT_P sgemm_p
763 #define SGEMM_DEFAULT_R sgemm_r
765 #define DGEMM_DEFAULT_P dgemm_p
766 #define DGEMM_DEFAULT_R dgemm_r
768 #define QGEMM_DEFAULT_P qgemm_p
769 #define QGEMM_DEFAULT_R qgemm_r
771 #define CGEMM_DEFAULT_P cgemm_p
772 #define CGEMM_DEFAULT_R cgemm_r
774 #define ZGEMM_DEFAULT_P zgemm_p
775 #define ZGEMM_DEFAULT_R zgemm_r
777 #define XGEMM_DEFAULT_P xgemm_p
778 #define XGEMM_DEFAULT_R xgemm_r
780 #define SGEMM_DEFAULT_Q 128
781 #define DGEMM_DEFAULT_Q 128
782 #define QGEMM_DEFAULT_Q 128
783 #define CGEMM_DEFAULT_Q 128
784 #define ZGEMM_DEFAULT_Q 128
785 #define XGEMM_DEFAULT_Q 128
793 #define GEMM_DEFAULT_OFFSET_A 448
794 #define GEMM_DEFAULT_OFFSET_B 128
795 #define GEMM_DEFAULT_ALIGN 0x03fffUL
799 #define SWITCH_RATIO 4
802 #define SGEMM_DEFAULT_UNROLL_M 8
803 #define DGEMM_DEFAULT_UNROLL_M 4
804 #define QGEMM_DEFAULT_UNROLL_M 2
805 #define CGEMM_DEFAULT_UNROLL_M 4
806 #define ZGEMM_DEFAULT_UNROLL_M 2
807 #define XGEMM_DEFAULT_UNROLL_M 1
809 #define SGEMM_DEFAULT_UNROLL_N 2
810 #define DGEMM_DEFAULT_UNROLL_N 2
811 #define QGEMM_DEFAULT_UNROLL_N 2
812 #define CGEMM_DEFAULT_UNROLL_N 1
813 #define ZGEMM_DEFAULT_UNROLL_N 1
814 #define XGEMM_DEFAULT_UNROLL_N 1
816 #define MASK(a, b) ((((a) + (b) - 1) / (b)) * (b))
819 #define SGEMM_DEFAULT_UNROLL_M 8
820 #define DGEMM_DEFAULT_UNROLL_M 4
821 #define QGEMM_DEFAULT_UNROLL_M 2
822 #define CGEMM_DEFAULT_UNROLL_M 4
823 #define ZGEMM_DEFAULT_UNROLL_M 2
824 #define XGEMM_DEFAULT_UNROLL_M 1
826 #define SGEMM_DEFAULT_UNROLL_N 4
827 #define DGEMM_DEFAULT_UNROLL_N 4
828 #define QGEMM_DEFAULT_UNROLL_N 2
829 #define CGEMM_DEFAULT_UNROLL_N 2
830 #define ZGEMM_DEFAULT_UNROLL_N 2
831 #define XGEMM_DEFAULT_UNROLL_N 1
834 #define SGEMM_DEFAULT_P sgemm_p
835 #define SGEMM_DEFAULT_R sgemm_r
837 #define DGEMM_DEFAULT_P dgemm_p
838 #define DGEMM_DEFAULT_R dgemm_r
840 #define QGEMM_DEFAULT_P qgemm_p
841 #define QGEMM_DEFAULT_R qgemm_r
843 #define CGEMM_DEFAULT_P cgemm_p
844 #define CGEMM_DEFAULT_R cgemm_r
846 #define ZGEMM_DEFAULT_P zgemm_p
847 #define ZGEMM_DEFAULT_R zgemm_r
849 #define XGEMM_DEFAULT_P xgemm_p
850 #define XGEMM_DEFAULT_R xgemm_r
852 #define SGEMM_DEFAULT_Q 256
853 #define DGEMM_DEFAULT_Q 256
854 #define QGEMM_DEFAULT_Q 256
855 #define CGEMM_DEFAULT_Q 256
856 #define ZGEMM_DEFAULT_Q 256
857 #define XGEMM_DEFAULT_Q 256
866 #define GEMM_DEFAULT_OFFSET_A 128
867 #define GEMM_DEFAULT_OFFSET_B 0
868 #define GEMM_DEFAULT_ALIGN 0x03fffUL
872 #define SWITCH_RATIO 4
875 #define SGEMM_DEFAULT_UNROLL_M 4
876 #define DGEMM_DEFAULT_UNROLL_M 2
877 #define QGEMM_DEFAULT_UNROLL_M 2
878 #define CGEMM_DEFAULT_UNROLL_M 2
879 #define ZGEMM_DEFAULT_UNROLL_M 1
880 #define XGEMM_DEFAULT_UNROLL_M 1
882 #define SGEMM_DEFAULT_UNROLL_N 4
883 #define DGEMM_DEFAULT_UNROLL_N 4
884 #define QGEMM_DEFAULT_UNROLL_N 2
885 #define CGEMM_DEFAULT_UNROLL_N 2
886 #define ZGEMM_DEFAULT_UNROLL_N 2
887 #define XGEMM_DEFAULT_UNROLL_N 1
889 #define SGEMM_DEFAULT_UNROLL_M 8
890 #define DGEMM_DEFAULT_UNROLL_M 4
891 #define QGEMM_DEFAULT_UNROLL_M 2
892 #define CGEMM_DEFAULT_UNROLL_M 4
893 #define ZGEMM_DEFAULT_UNROLL_M 2
894 #define XGEMM_DEFAULT_UNROLL_M 1
896 #define SGEMM_DEFAULT_UNROLL_N 4
897 #define DGEMM_DEFAULT_UNROLL_N 4
898 #define QGEMM_DEFAULT_UNROLL_N 2
899 #define CGEMM_DEFAULT_UNROLL_N 2
900 #define ZGEMM_DEFAULT_UNROLL_N 2
901 #define XGEMM_DEFAULT_UNROLL_N 1
904 #define SGEMM_DEFAULT_P sgemm_p
905 #define SGEMM_DEFAULT_R sgemm_r
907 #define DGEMM_DEFAULT_P dgemm_p
908 #define DGEMM_DEFAULT_R dgemm_r
910 #define QGEMM_DEFAULT_P qgemm_p
911 #define QGEMM_DEFAULT_R qgemm_r
913 #define CGEMM_DEFAULT_P cgemm_p
914 #define CGEMM_DEFAULT_R cgemm_r
916 #define ZGEMM_DEFAULT_P zgemm_p
917 #define ZGEMM_DEFAULT_R zgemm_r
919 #define XGEMM_DEFAULT_P xgemm_p
920 #define XGEMM_DEFAULT_R xgemm_r
922 #define SGEMM_DEFAULT_Q 512
923 #define DGEMM_DEFAULT_Q 256
924 #define QGEMM_DEFAULT_Q 128
925 #define CGEMM_DEFAULT_Q 512
926 #define ZGEMM_DEFAULT_Q 256
927 #define XGEMM_DEFAULT_Q 128
929 #define GETRF_FACTOR 0.75
937 #define GEMM_DEFAULT_OFFSET_A 128
938 #define GEMM_DEFAULT_OFFSET_B 0
939 #define GEMM_DEFAULT_ALIGN 0x03fffUL
943 #define SWITCH_RATIO 4
946 #define SGEMM_DEFAULT_UNROLL_M 4
947 #define DGEMM_DEFAULT_UNROLL_M 2
948 #define QGEMM_DEFAULT_UNROLL_M 2
949 #define CGEMM_DEFAULT_UNROLL_M 2
950 #define ZGEMM_DEFAULT_UNROLL_M 1
951 #define XGEMM_DEFAULT_UNROLL_M 1
953 #define SGEMM_DEFAULT_UNROLL_N 4
954 #define DGEMM_DEFAULT_UNROLL_N 4
955 #define QGEMM_DEFAULT_UNROLL_N 2
956 #define CGEMM_DEFAULT_UNROLL_N 2
957 #define ZGEMM_DEFAULT_UNROLL_N 2
958 #define XGEMM_DEFAULT_UNROLL_N 1
960 #define SGEMM_DEFAULT_UNROLL_M 8
961 #define DGEMM_DEFAULT_UNROLL_M 4
962 #define QGEMM_DEFAULT_UNROLL_M 2
963 #define CGEMM_DEFAULT_UNROLL_M 4
964 #define ZGEMM_DEFAULT_UNROLL_M 2
965 #define XGEMM_DEFAULT_UNROLL_M 1
967 #define SGEMM_DEFAULT_UNROLL_N 4
968 #define DGEMM_DEFAULT_UNROLL_N 4
969 #define QGEMM_DEFAULT_UNROLL_N 2
970 #define CGEMM_DEFAULT_UNROLL_N 2
971 #define ZGEMM_DEFAULT_UNROLL_N 2
972 #define XGEMM_DEFAULT_UNROLL_N 1
975 #define SGEMM_DEFAULT_P sgemm_p
976 #define SGEMM_DEFAULT_R sgemm_r
978 #define DGEMM_DEFAULT_P dgemm_p
979 #define DGEMM_DEFAULT_R dgemm_r
981 #define QGEMM_DEFAULT_P qgemm_p
982 #define QGEMM_DEFAULT_R qgemm_r
984 #define CGEMM_DEFAULT_P cgemm_p
985 #define CGEMM_DEFAULT_R cgemm_r
987 #define ZGEMM_DEFAULT_P zgemm_p
988 #define ZGEMM_DEFAULT_R zgemm_r
990 #define XGEMM_DEFAULT_P xgemm_p
991 #define XGEMM_DEFAULT_R xgemm_r
993 #define SGEMM_DEFAULT_Q 768
994 #define DGEMM_DEFAULT_Q 384
995 #define QGEMM_DEFAULT_Q 192
996 #define CGEMM_DEFAULT_Q 768
997 #define ZGEMM_DEFAULT_Q 384
998 #define XGEMM_DEFAULT_Q 192
1000 #define GETRF_FACTOR 0.75
1001 #define GEMM_THREAD gemm_thread_mn
1009 #define GEMM_DEFAULT_OFFSET_A 32
1010 #define GEMM_DEFAULT_OFFSET_B 0
1011 #define GEMM_DEFAULT_ALIGN 0x03fffUL
1015 #define SWITCH_RATIO 4
1018 #define SGEMM_DEFAULT_UNROLL_M 4
1019 #define DGEMM_DEFAULT_UNROLL_M 2
1020 #define QGEMM_DEFAULT_UNROLL_M 2
1021 #define CGEMM_DEFAULT_UNROLL_M 2
1022 #define ZGEMM_DEFAULT_UNROLL_M 1
1023 #define XGEMM_DEFAULT_UNROLL_M 1
1025 #define SGEMM_DEFAULT_UNROLL_N 4
1026 #define DGEMM_DEFAULT_UNROLL_N 4
1027 #define QGEMM_DEFAULT_UNROLL_N 2
1028 #define CGEMM_DEFAULT_UNROLL_N 2
1029 #define ZGEMM_DEFAULT_UNROLL_N 2
1030 #define XGEMM_DEFAULT_UNROLL_N 1
1032 #define SGEMM_DEFAULT_UNROLL_M 4
1033 #define DGEMM_DEFAULT_UNROLL_M 2
1034 #define QGEMM_DEFAULT_UNROLL_M 2
1035 #define CGEMM_DEFAULT_UNROLL_M 2
1036 #define ZGEMM_DEFAULT_UNROLL_M 1
1037 #define XGEMM_DEFAULT_UNROLL_M 1
1039 #define SGEMM_DEFAULT_UNROLL_N 8
1040 #define DGEMM_DEFAULT_UNROLL_N 8
1041 #define QGEMM_DEFAULT_UNROLL_N 2
1042 #define CGEMM_DEFAULT_UNROLL_N 4
1043 #define ZGEMM_DEFAULT_UNROLL_N 4
1044 #define XGEMM_DEFAULT_UNROLL_N 1
1047 #define SGEMM_DEFAULT_P 504
1048 #define SGEMM_DEFAULT_R sgemm_r
1050 #define DGEMM_DEFAULT_P 504
1051 #define DGEMM_DEFAULT_R dgemm_r
1053 #define QGEMM_DEFAULT_P 504
1054 #define QGEMM_DEFAULT_R qgemm_r
1056 #define CGEMM_DEFAULT_P 252
1057 #define CGEMM_DEFAULT_R cgemm_r
1059 #define ZGEMM_DEFAULT_P 252
1060 #define ZGEMM_DEFAULT_R zgemm_r
1062 #define XGEMM_DEFAULT_P 252
1063 #define XGEMM_DEFAULT_R xgemm_r
1065 #define SGEMM_DEFAULT_Q 512
1066 #define DGEMM_DEFAULT_Q 256
1067 #define QGEMM_DEFAULT_Q 128
1068 #define CGEMM_DEFAULT_Q 512
1069 #define ZGEMM_DEFAULT_Q 256
1070 #define XGEMM_DEFAULT_Q 128
1072 #define GETRF_FACTOR 0.72
1082 #define GEMM_DEFAULT_OFFSET_A 0
1083 #define GEMM_DEFAULT_OFFSET_B 0
1084 #define GEMM_DEFAULT_ALIGN 0x03fffUL
1088 #define SWITCH_RATIO 4
1091 #define SGEMM_DEFAULT_UNROLL_M 4
1092 #define DGEMM_DEFAULT_UNROLL_M 2
1093 #define QGEMM_DEFAULT_UNROLL_M 2
1094 #define CGEMM_DEFAULT_UNROLL_M 2
1095 #define ZGEMM_DEFAULT_UNROLL_M 1
1096 #define XGEMM_DEFAULT_UNROLL_M 1
1098 #define SGEMM_DEFAULT_UNROLL_N 4
1099 #define DGEMM_DEFAULT_UNROLL_N 4
1100 #define QGEMM_DEFAULT_UNROLL_N 2
1101 #define CGEMM_DEFAULT_UNROLL_N 2
1102 #define ZGEMM_DEFAULT_UNROLL_N 2
1103 #define XGEMM_DEFAULT_UNROLL_N 1
1105 #define SGEMM_DEFAULT_UNROLL_M 8
1106 #define DGEMM_DEFAULT_UNROLL_M 8
1107 #define QGEMM_DEFAULT_UNROLL_M 2
1108 #define CGEMM_DEFAULT_UNROLL_M 8
1109 #define ZGEMM_DEFAULT_UNROLL_M 4
1110 #define XGEMM_DEFAULT_UNROLL_M 1
1112 #define SGEMM_DEFAULT_UNROLL_N 8
1113 #define DGEMM_DEFAULT_UNROLL_N 4
1114 #define QGEMM_DEFAULT_UNROLL_N 2
1115 #define CGEMM_DEFAULT_UNROLL_N 4
1116 #define ZGEMM_DEFAULT_UNROLL_N 4
1117 #define XGEMM_DEFAULT_UNROLL_N 1
1120 #define SGEMM_DEFAULT_P 512
1121 #define SGEMM_DEFAULT_R sgemm_r
1122 //#define SGEMM_DEFAULT_R 1024
1124 #define DGEMM_DEFAULT_P 512
1125 #define DGEMM_DEFAULT_R dgemm_r
1126 //#define DGEMM_DEFAULT_R 1024
1128 #define QGEMM_DEFAULT_P 504
1129 #define QGEMM_DEFAULT_R qgemm_r
1131 #define CGEMM_DEFAULT_P 128
1132 //#define CGEMM_DEFAULT_R cgemm_r
1133 #define CGEMM_DEFAULT_R 1024
1135 #define ZGEMM_DEFAULT_P 512
1136 #define ZGEMM_DEFAULT_R zgemm_r
1137 //#define ZGEMM_DEFAULT_R 1024
1139 #define XGEMM_DEFAULT_P 252
1140 #define XGEMM_DEFAULT_R xgemm_r
1142 #define SGEMM_DEFAULT_Q 256
1143 #define DGEMM_DEFAULT_Q 256
1144 #define QGEMM_DEFAULT_Q 128
1145 #define CGEMM_DEFAULT_Q 256
1146 #define ZGEMM_DEFAULT_Q 192
1147 #define XGEMM_DEFAULT_Q 128
1149 #define GETRF_FACTOR 0.72
1160 #define GEMM_DEFAULT_OFFSET_A 64
1161 #define GEMM_DEFAULT_OFFSET_B 0
1162 #define GEMM_DEFAULT_ALIGN 0x0ffffUL
1167 #define SGEMM_DEFAULT_UNROLL_M 4
1168 #define DGEMM_DEFAULT_UNROLL_M 2
1169 #define QGEMM_DEFAULT_UNROLL_M 2
1170 #define CGEMM_DEFAULT_UNROLL_M 2
1171 #define ZGEMM_DEFAULT_UNROLL_M 1
1172 #define XGEMM_DEFAULT_UNROLL_M 1
1174 #define SGEMM_DEFAULT_UNROLL_M 8
1175 #define DGEMM_DEFAULT_UNROLL_M 4
1176 #define QGEMM_DEFAULT_UNROLL_M 2
1177 #define CGEMM_DEFAULT_UNROLL_M 4
1178 #define ZGEMM_DEFAULT_UNROLL_M 2
1179 #define XGEMM_DEFAULT_UNROLL_M 1
1182 #define SGEMM_DEFAULT_UNROLL_N 4
1183 #define DGEMM_DEFAULT_UNROLL_N 2
1184 #define QGEMM_DEFAULT_UNROLL_N 2
1185 #define CGEMM_DEFAULT_UNROLL_N 2
1186 #define ZGEMM_DEFAULT_UNROLL_N 1
1187 #define XGEMM_DEFAULT_UNROLL_N 1
1189 #define SGEMM_DEFAULT_P sgemm_p
1190 #define SGEMM_DEFAULT_R sgemm_r
1192 #define DGEMM_DEFAULT_P dgemm_p
1193 #define DGEMM_DEFAULT_R dgemm_r
1195 #define QGEMM_DEFAULT_P qgemm_p
1196 #define QGEMM_DEFAULT_R qgemm_r
1198 #define CGEMM_DEFAULT_P cgemm_p
1199 #define CGEMM_DEFAULT_R cgemm_r
1201 #define ZGEMM_DEFAULT_P zgemm_p
1202 #define ZGEMM_DEFAULT_R zgemm_r
1204 #define XGEMM_DEFAULT_P xgemm_p
1205 #define XGEMM_DEFAULT_R xgemm_r
1207 #define SGEMM_DEFAULT_Q 256
1208 #define DGEMM_DEFAULT_Q 256
1209 #define QGEMM_DEFAULT_Q 256
1210 #define CGEMM_DEFAULT_Q 256
1211 #define ZGEMM_DEFAULT_Q 256
1212 #define XGEMM_DEFAULT_Q 256
1222 #define GEMM_DEFAULT_OFFSET_A 0
1223 #define GEMM_DEFAULT_OFFSET_B 128
1224 #define GEMM_DEFAULT_ALIGN 0x03fffUL
1226 #define SGEMM_DEFAULT_UNROLL_M 8
1227 #define SGEMM_DEFAULT_UNROLL_N 8
1228 #define DGEMM_DEFAULT_UNROLL_M 8
1229 #define DGEMM_DEFAULT_UNROLL_N 8
1230 #define QGEMM_DEFAULT_UNROLL_M 8
1231 #define QGEMM_DEFAULT_UNROLL_N 8
1232 #define CGEMM_DEFAULT_UNROLL_M 4
1233 #define CGEMM_DEFAULT_UNROLL_N 4
1234 #define ZGEMM_DEFAULT_UNROLL_M 4
1235 #define ZGEMM_DEFAULT_UNROLL_N 4
1236 #define XGEMM_DEFAULT_UNROLL_M 4
1237 #define XGEMM_DEFAULT_UNROLL_N 4
1239 #define SGEMM_DEFAULT_P sgemm_p
1240 #define DGEMM_DEFAULT_P dgemm_p
1241 #define QGEMM_DEFAULT_P qgemm_p
1242 #define CGEMM_DEFAULT_P cgemm_p
1243 #define ZGEMM_DEFAULT_P zgemm_p
1244 #define XGEMM_DEFAULT_P xgemm_p
1246 #define SGEMM_DEFAULT_Q 1024
1247 #define DGEMM_DEFAULT_Q 1024
1248 #define QGEMM_DEFAULT_Q 1024
1249 #define CGEMM_DEFAULT_Q 1024
1250 #define ZGEMM_DEFAULT_Q 1024
1251 #define XGEMM_DEFAULT_Q 1024
1253 #define SGEMM_DEFAULT_R sgemm_r
1254 #define DGEMM_DEFAULT_R dgemm_r
1255 #define QGEMM_DEFAULT_R qgemm_r
1256 #define CGEMM_DEFAULT_R cgemm_r
1257 #define ZGEMM_DEFAULT_R zgemm_r
1258 #define XGEMM_DEFAULT_R xgemm_r
1262 #define GETRF_FACTOR 0.65
1266 #if defined(EV4) || defined(EV5) || defined(EV6)
1276 #define GEMM_DEFAULT_OFFSET_A 512
1277 #define GEMM_DEFAULT_OFFSET_B 512
1278 #define GEMM_DEFAULT_ALIGN 0x0ffffUL
1280 #define SGEMM_DEFAULT_UNROLL_M 4
1281 #define SGEMM_DEFAULT_UNROLL_N 4
1282 #define DGEMM_DEFAULT_UNROLL_M 4
1283 #define DGEMM_DEFAULT_UNROLL_N 4
1284 #define CGEMM_DEFAULT_UNROLL_M 2
1285 #define CGEMM_DEFAULT_UNROLL_N 2
1286 #define ZGEMM_DEFAULT_UNROLL_M 2
1287 #define ZGEMM_DEFAULT_UNROLL_N 2
1292 #define SGEMM_DEFAULT_P 32
1293 #define SGEMM_DEFAULT_Q 112
1294 #define SGEMM_DEFAULT_R 256
1296 #define DGEMM_DEFAULT_P 32
1297 #define DGEMM_DEFAULT_Q 56
1298 #define DGEMM_DEFAULT_R 256
1300 #define CGEMM_DEFAULT_P 32
1301 #define CGEMM_DEFAULT_Q 64
1302 #define CGEMM_DEFAULT_R 240
1304 #define ZGEMM_DEFAULT_P 32
1305 #define ZGEMM_DEFAULT_Q 32
1306 #define ZGEMM_DEFAULT_R 240
1310 #define SGEMM_DEFAULT_P 64
1311 #define SGEMM_DEFAULT_Q 256
1313 #define DGEMM_DEFAULT_P 64
1314 #define DGEMM_DEFAULT_Q 128
1316 #define CGEMM_DEFAULT_P 64
1317 #define CGEMM_DEFAULT_Q 128
1319 #define ZGEMM_DEFAULT_P 64
1320 #define ZGEMM_DEFAULT_Q 64
1324 #define SGEMM_DEFAULT_P 256
1325 #define SGEMM_DEFAULT_Q 512
1327 #define DGEMM_DEFAULT_P 256
1328 #define DGEMM_DEFAULT_Q 256
1330 #define CGEMM_DEFAULT_P 256
1331 #define CGEMM_DEFAULT_Q 256
1333 #define ZGEMM_DEFAULT_P 128
1334 #define ZGEMM_DEFAULT_Q 256
1344 #define GEMM_DEFAULT_OFFSET_A 0
1345 #define GEMM_DEFAULT_OFFSET_B 8192
1346 #define GEMM_DEFAULT_ALIGN 0x0ffffUL
1348 #define SGEMM_DEFAULT_UNROLL_M 16
1349 #define SGEMM_DEFAULT_UNROLL_N 4
1350 #define DGEMM_DEFAULT_UNROLL_M 4
1351 #define DGEMM_DEFAULT_UNROLL_N 4
1352 #define CGEMM_DEFAULT_UNROLL_M 8
1353 #define CGEMM_DEFAULT_UNROLL_N 2
1354 #define ZGEMM_DEFAULT_UNROLL_M 2
1355 #define ZGEMM_DEFAULT_UNROLL_N 2
1357 #define SGEMM_DEFAULT_P 128
1358 #define DGEMM_DEFAULT_P 128
1359 #define CGEMM_DEFAULT_P 128
1360 #define ZGEMM_DEFAULT_P 128
1362 #define SGEMM_DEFAULT_Q 512
1363 #define DGEMM_DEFAULT_Q 256
1364 #define CGEMM_DEFAULT_Q 256
1365 #define ZGEMM_DEFAULT_Q 128
1371 #define GEMM_DEFAULT_OFFSET_A 0
1372 #define GEMM_DEFAULT_OFFSET_B 1024
1373 #define GEMM_DEFAULT_ALIGN 0x0ffffUL
1375 #define SGEMM_DEFAULT_UNROLL_M 16
1376 #define SGEMM_DEFAULT_UNROLL_N 4
1377 #define DGEMM_DEFAULT_UNROLL_M 4
1378 #define DGEMM_DEFAULT_UNROLL_N 4
1379 #define CGEMM_DEFAULT_UNROLL_M 8
1380 #define CGEMM_DEFAULT_UNROLL_N 2
1381 #define ZGEMM_DEFAULT_UNROLL_M 2
1382 #define ZGEMM_DEFAULT_UNROLL_N 2
1384 #define SGEMM_DEFAULT_P 256
1385 #define DGEMM_DEFAULT_P 128
1386 #define CGEMM_DEFAULT_P 128
1387 #define ZGEMM_DEFAULT_P 64
1389 #define SGEMM_DEFAULT_Q 256
1390 #define DGEMM_DEFAULT_Q 256
1391 #define CGEMM_DEFAULT_Q 256
1392 #define ZGEMM_DEFAULT_Q 256
1402 #define GEMM_DEFAULT_OFFSET_A 2688
1403 #define GEMM_DEFAULT_OFFSET_B 3072
1404 #define GEMM_DEFAULT_ALIGN 0x03fffUL
1406 #define SGEMM_DEFAULT_UNROLL_M 16
1407 #define SGEMM_DEFAULT_UNROLL_N 4
1408 #define DGEMM_DEFAULT_UNROLL_M 4
1409 #define DGEMM_DEFAULT_UNROLL_N 4
1410 #define CGEMM_DEFAULT_UNROLL_M 8
1411 #define CGEMM_DEFAULT_UNROLL_N 2
1412 #define ZGEMM_DEFAULT_UNROLL_M 2
1413 #define ZGEMM_DEFAULT_UNROLL_N 2
1416 #if L2_SIZE == 1024976
1417 #define SGEMM_DEFAULT_P 320
1418 #define DGEMM_DEFAULT_P 256
1419 #define CGEMM_DEFAULT_P 256
1420 #define ZGEMM_DEFAULT_P 256
1422 #define SGEMM_DEFAULT_P 176
1423 #define DGEMM_DEFAULT_P 176
1424 #define CGEMM_DEFAULT_P 176
1425 #define ZGEMM_DEFAULT_P 176
1429 #define SGEMM_DEFAULT_Q 512
1430 #define DGEMM_DEFAULT_Q 256
1431 #define CGEMM_DEFAULT_Q 256
1432 #define ZGEMM_DEFAULT_Q 128
1443 #define GEMM_DEFAULT_OFFSET_A (32 * 0)
1444 #define GEMM_DEFAULT_OFFSET_B (32 * 0)
1445 #define GEMM_DEFAULT_ALIGN 0x0ffffUL
1447 #define SGEMM_DEFAULT_UNROLL_M 4
1448 #define SGEMM_DEFAULT_UNROLL_N 4
1449 #define DGEMM_DEFAULT_UNROLL_M 4
1450 #define DGEMM_DEFAULT_UNROLL_N 4
1451 #define CGEMM_DEFAULT_UNROLL_M 2
1452 #define CGEMM_DEFAULT_UNROLL_N 2
1453 #define ZGEMM_DEFAULT_UNROLL_M 2
1454 #define ZGEMM_DEFAULT_UNROLL_N 2
1456 #define SGEMM_DEFAULT_P 512
1457 #define DGEMM_DEFAULT_P 512
1458 #define CGEMM_DEFAULT_P 512
1459 #define ZGEMM_DEFAULT_P 512
1461 #define SGEMM_DEFAULT_Q 1024
1462 #define DGEMM_DEFAULT_Q 512
1463 #define CGEMM_DEFAULT_Q 512
1464 #define ZGEMM_DEFAULT_Q 256
1466 #define SGEMM_DEFAULT_R SGEMM_DEFAULT_P
1467 #define DGEMM_DEFAULT_R DGEMM_DEFAULT_P
1468 #define CGEMM_DEFAULT_R CGEMM_DEFAULT_P
1469 #define ZGEMM_DEFAULT_R ZGEMM_DEFAULT_P
1479 #define GEMM_DEFAULT_OFFSET_A (32 * 0)
1480 #define GEMM_DEFAULT_OFFSET_B (32 * 0)
1481 #define GEMM_DEFAULT_ALIGN 0x0ffffUL
1483 #define SGEMM_DEFAULT_UNROLL_M 8
1484 #define SGEMM_DEFAULT_UNROLL_N 4
1485 #define DGEMM_DEFAULT_UNROLL_M 8
1486 #define DGEMM_DEFAULT_UNROLL_N 4
1487 #define CGEMM_DEFAULT_UNROLL_M 4
1488 #define CGEMM_DEFAULT_UNROLL_N 2
1489 #define ZGEMM_DEFAULT_UNROLL_M 4
1490 #define ZGEMM_DEFAULT_UNROLL_N 2
1492 #define SGEMM_DEFAULT_P 128
1493 #define DGEMM_DEFAULT_P 128
1494 #define CGEMM_DEFAULT_P 128
1495 #define ZGEMM_DEFAULT_P 128
1497 #define SGEMM_DEFAULT_Q 4096
1498 #define DGEMM_DEFAULT_Q 3072
1499 #define CGEMM_DEFAULT_Q 2048
1500 #define ZGEMM_DEFAULT_Q 1024
1502 #define SGEMM_DEFAULT_Q 512
1503 #define DGEMM_DEFAULT_Q 256
1504 #define CGEMM_DEFAULT_Q 256
1505 #define ZGEMM_DEFAULT_Q 128
1513 #if defined(POWER3) || defined(POWER4) || defined(POWER5)
1514 #define GEMM_DEFAULT_OFFSET_A 0
1515 #define GEMM_DEFAULT_OFFSET_B 2048
1516 #define GEMM_DEFAULT_ALIGN 0x0ffffUL
1518 #define SGEMM_DEFAULT_UNROLL_M 4
1519 #define SGEMM_DEFAULT_UNROLL_N 4
1520 #define DGEMM_DEFAULT_UNROLL_M 4
1521 #define DGEMM_DEFAULT_UNROLL_N 4
1522 #define CGEMM_DEFAULT_UNROLL_M 2
1523 #define CGEMM_DEFAULT_UNROLL_N 2
1524 #define ZGEMM_DEFAULT_UNROLL_M 2
1525 #define ZGEMM_DEFAULT_UNROLL_N 2
1532 #define SGEMM_DEFAULT_P 256
1533 #define SGEMM_DEFAULT_Q 432
1534 #define SGEMM_DEFAULT_R 1012
1536 #define DGEMM_DEFAULT_P 256
1537 #define DGEMM_DEFAULT_Q 216
1538 #define DGEMM_DEFAULT_R 1012
1540 #define ZGEMM_DEFAULT_P 256
1541 #define ZGEMM_DEFAULT_Q 104
1542 #define ZGEMM_DEFAULT_R 1012
1546 #ifdef ALLOC_HUGETLB
1547 #define SGEMM_DEFAULT_P 184
1548 #define DGEMM_DEFAULT_P 184
1549 #define CGEMM_DEFAULT_P 184
1550 #define ZGEMM_DEFAULT_P 184
1552 #define SGEMM_DEFAULT_P 144
1553 #define DGEMM_DEFAULT_P 144
1554 #define CGEMM_DEFAULT_P 144
1555 #define ZGEMM_DEFAULT_P 144
1560 #ifdef ALLOC_HUGETLB
1561 #define SGEMM_DEFAULT_P 512
1562 #define DGEMM_DEFAULT_P 256
1563 #define CGEMM_DEFAULT_P 256
1564 #define ZGEMM_DEFAULT_P 128
1566 #define SGEMM_DEFAULT_P 320
1567 #define DGEMM_DEFAULT_P 160
1568 #define CGEMM_DEFAULT_P 160
1569 #define ZGEMM_DEFAULT_P 80
1572 #define SGEMM_DEFAULT_Q 256
1573 #define CGEMM_DEFAULT_Q 256
1574 #define DGEMM_DEFAULT_Q 256
1575 #define ZGEMM_DEFAULT_Q 256
1587 #define GEMM_DEFAULT_OFFSET_A 384
1588 #define GEMM_DEFAULT_OFFSET_B 1024
1589 #define GEMM_DEFAULT_ALIGN 0x03fffUL
1591 #define SGEMM_DEFAULT_UNROLL_M 4
1592 #define SGEMM_DEFAULT_UNROLL_N 4
1593 #define DGEMM_DEFAULT_UNROLL_M 4
1594 #define DGEMM_DEFAULT_UNROLL_N 4
1595 #define CGEMM_DEFAULT_UNROLL_M 2
1596 #define CGEMM_DEFAULT_UNROLL_N 4
1597 #define ZGEMM_DEFAULT_UNROLL_M 2
1598 #define ZGEMM_DEFAULT_UNROLL_N 4
1600 #define SGEMM_DEFAULT_P 992
1601 #define DGEMM_DEFAULT_P 480
1602 #define CGEMM_DEFAULT_P 488
1603 #define ZGEMM_DEFAULT_P 248
1605 #define SGEMM_DEFAULT_Q 504
1606 #define DGEMM_DEFAULT_Q 504
1607 #define CGEMM_DEFAULT_Q 400
1608 #define ZGEMM_DEFAULT_Q 400
1614 #if defined(SPARC) && defined(V7)
1619 #define GEMM_DEFAULT_OFFSET_A 0
1620 #define GEMM_DEFAULT_OFFSET_B 2048
1621 #define GEMM_DEFAULT_ALIGN 0x03fffUL
1623 #define SGEMM_DEFAULT_UNROLL_M 2
1624 #define SGEMM_DEFAULT_UNROLL_N 8
1625 #define DGEMM_DEFAULT_UNROLL_M 2
1626 #define DGEMM_DEFAULT_UNROLL_N 8
1627 #define CGEMM_DEFAULT_UNROLL_M 1
1628 #define CGEMM_DEFAULT_UNROLL_N 4
1629 #define ZGEMM_DEFAULT_UNROLL_M 1
1630 #define ZGEMM_DEFAULT_UNROLL_N 4
1632 #define SGEMM_DEFAULT_P 256
1633 #define DGEMM_DEFAULT_P 256
1634 #define CGEMM_DEFAULT_P 256
1635 #define ZGEMM_DEFAULT_P 256
1637 #define SGEMM_DEFAULT_Q 512
1638 #define DGEMM_DEFAULT_Q 256
1639 #define CGEMM_DEFAULT_Q 256
1640 #define ZGEMM_DEFAULT_Q 128
1643 #define GEMM_THREAD gemm_thread_mn
1646 #if (defined(SPARC) && defined(V9)) || defined(__sparc_v9__)
1651 #define GEMM_DEFAULT_OFFSET_A 0
1652 #define GEMM_DEFAULT_OFFSET_B 2048
1653 #define GEMM_DEFAULT_ALIGN 0x03fffUL
1655 #define SGEMM_DEFAULT_UNROLL_M 4
1656 #define SGEMM_DEFAULT_UNROLL_N 4
1657 #define DGEMM_DEFAULT_UNROLL_M 4
1658 #define DGEMM_DEFAULT_UNROLL_N 4
1659 #define CGEMM_DEFAULT_UNROLL_M 2
1660 #define CGEMM_DEFAULT_UNROLL_N 2
1661 #define ZGEMM_DEFAULT_UNROLL_M 2
1662 #define ZGEMM_DEFAULT_UNROLL_N 2
1664 #define SGEMM_DEFAULT_P 512
1665 #define DGEMM_DEFAULT_P 512
1666 #define CGEMM_DEFAULT_P 512
1667 #define ZGEMM_DEFAULT_P 512
1669 #define SGEMM_DEFAULT_Q 1024
1670 #define DGEMM_DEFAULT_Q 512
1671 #define CGEMM_DEFAULT_Q 512
1672 #define ZGEMM_DEFAULT_Q 256
1682 #define GEMM_DEFAULT_OFFSET_A 0
1683 #define GEMM_DEFAULT_OFFSET_B 0
1684 #define GEMM_DEFAULT_ALIGN 0x03fffUL
1686 #define SGEMM_DEFAULT_UNROLL_M 2
1687 #define SGEMM_DEFAULT_UNROLL_N 8
1688 #define DGEMM_DEFAULT_UNROLL_M 2
1689 #define DGEMM_DEFAULT_UNROLL_N 8
1690 #define CGEMM_DEFAULT_UNROLL_M 1
1691 #define CGEMM_DEFAULT_UNROLL_N 4
1692 #define ZGEMM_DEFAULT_UNROLL_M 1
1693 #define ZGEMM_DEFAULT_UNROLL_N 4
1695 #define SGEMM_DEFAULT_P 108
1696 #define DGEMM_DEFAULT_P 112
1697 #define CGEMM_DEFAULT_P 108
1698 #define ZGEMM_DEFAULT_P 112
1700 #define SGEMM_DEFAULT_Q 288
1701 #define DGEMM_DEFAULT_Q 144
1702 #define CGEMM_DEFAULT_Q 144
1703 #define ZGEMM_DEFAULT_Q 72
1705 #define SGEMM_DEFAULT_R 2000
1706 #define DGEMM_DEFAULT_R 2000
1707 #define CGEMM_DEFAULT_R 2000
1708 #define ZGEMM_DEFAULT_R 2000
1714 ////Copy from SICORTEX
1718 #define GEMM_DEFAULT_OFFSET_A 0
1719 #define GEMM_DEFAULT_OFFSET_B 0
1720 #define GEMM_DEFAULT_ALIGN 0x03fffUL
1722 #define SGEMM_DEFAULT_UNROLL_M 8
1723 #define SGEMM_DEFAULT_UNROLL_N 4
1725 #define DGEMM_DEFAULT_UNROLL_M 4
1726 #define DGEMM_DEFAULT_UNROLL_N 4
1728 #define CGEMM_DEFAULT_UNROLL_M 4
1729 #define CGEMM_DEFAULT_UNROLL_N 2
1731 #define ZGEMM_DEFAULT_UNROLL_M 2
1732 #define ZGEMM_DEFAULT_UNROLL_N 2
1734 #define SGEMM_DEFAULT_P 64
1735 #define DGEMM_DEFAULT_P 44
1736 #define CGEMM_DEFAULT_P 64
1737 #define ZGEMM_DEFAULT_P 32
1739 #define SGEMM_DEFAULT_Q 192
1740 #define DGEMM_DEFAULT_Q 92
1741 #define CGEMM_DEFAULT_Q 128
1742 #define ZGEMM_DEFAULT_Q 80
1744 #define SGEMM_DEFAULT_R 640
1745 #define DGEMM_DEFAULT_R dgemm_r
1746 #define CGEMM_DEFAULT_R 640
1747 #define ZGEMM_DEFAULT_R 640
1749 #define GEMM_OFFSET_A1 0x10000
1750 #define GEMM_OFFSET_B1 0x100000
1759 #define GEMM_DEFAULT_OFFSET_A 0
1760 #define GEMM_DEFAULT_OFFSET_B 0
1761 #define GEMM_DEFAULT_ALIGN 0x03fffUL
1763 #define SGEMM_DEFAULT_UNROLL_M 2
1764 #define SGEMM_DEFAULT_UNROLL_N 2
1766 #define DGEMM_DEFAULT_UNROLL_M 2
1767 #define DGEMM_DEFAULT_UNROLL_N 2
1769 #define CGEMM_DEFAULT_UNROLL_M 2
1770 #define CGEMM_DEFAULT_UNROLL_N 2
1772 #define ZGEMM_DEFAULT_UNROLL_M 2
1773 #define ZGEMM_DEFAULT_UNROLL_N 2
1775 #define SGEMM_DEFAULT_P 64
1776 #define DGEMM_DEFAULT_P 24
1777 #define CGEMM_DEFAULT_P 24
1778 #define ZGEMM_DEFAULT_P 20
1780 #define SGEMM_DEFAULT_Q 192
1781 #define DGEMM_DEFAULT_Q 128
1782 #define CGEMM_DEFAULT_Q 128
1783 #define ZGEMM_DEFAULT_Q 64
1785 #define SGEMM_DEFAULT_R 512
1786 #define DGEMM_DEFAULT_R 512
1787 #define CGEMM_DEFAULT_R 512
1788 #define ZGEMM_DEFAULT_R 512
1790 #define GEMM_OFFSET_A1 0x10000
1791 #define GEMM_OFFSET_B1 0x100000
1801 #define GEMM_DEFAULT_OFFSET_A 0
1802 #define GEMM_DEFAULT_OFFSET_B 0
1803 #define GEMM_DEFAULT_ALIGN 0x0ffffUL
1805 #define SGEMM_DEFAULT_UNROLL_N 4
1806 #define DGEMM_DEFAULT_UNROLL_N 4
1807 #define QGEMM_DEFAULT_UNROLL_N 2
1808 #define CGEMM_DEFAULT_UNROLL_N 2
1809 #define ZGEMM_DEFAULT_UNROLL_N 2
1810 #define XGEMM_DEFAULT_UNROLL_N 1
1813 #define SGEMM_DEFAULT_UNROLL_M 4
1814 #define DGEMM_DEFAULT_UNROLL_M 2
1815 #define QGEMM_DEFAULT_UNROLL_M 2
1816 #define CGEMM_DEFAULT_UNROLL_M 2
1817 #define ZGEMM_DEFAULT_UNROLL_M 1
1818 #define XGEMM_DEFAULT_UNROLL_M 1
1820 #define SGEMM_DEFAULT_UNROLL_M 8
1821 #define DGEMM_DEFAULT_UNROLL_M 4
1822 #define QGEMM_DEFAULT_UNROLL_M 2
1823 #define CGEMM_DEFAULT_UNROLL_M 4
1824 #define ZGEMM_DEFAULT_UNROLL_M 2
1825 #define XGEMM_DEFAULT_UNROLL_M 1
1828 #define SGEMM_DEFAULT_P sgemm_p
1829 #define DGEMM_DEFAULT_P dgemm_p
1830 #define QGEMM_DEFAULT_P qgemm_p
1831 #define CGEMM_DEFAULT_P cgemm_p
1832 #define ZGEMM_DEFAULT_P zgemm_p
1833 #define XGEMM_DEFAULT_P xgemm_p
1835 #define SGEMM_DEFAULT_R sgemm_r
1836 #define DGEMM_DEFAULT_R dgemm_r
1837 #define QGEMM_DEFAULT_R qgemm_r
1838 #define CGEMM_DEFAULT_R cgemm_r
1839 #define ZGEMM_DEFAULT_R zgemm_r
1840 #define XGEMM_DEFAULT_R xgemm_r
1842 #define SGEMM_DEFAULT_Q 128
1843 #define DGEMM_DEFAULT_Q 128
1844 #define QGEMM_DEFAULT_Q 128
1845 #define CGEMM_DEFAULT_Q 128
1846 #define ZGEMM_DEFAULT_Q 128
1847 #define XGEMM_DEFAULT_Q 128
1853 #ifndef QGEMM_DEFAULT_UNROLL_M
1854 #define QGEMM_DEFAULT_UNROLL_M 2
1857 #ifndef QGEMM_DEFAULT_UNROLL_N
1858 #define QGEMM_DEFAULT_UNROLL_N 2
1861 #ifndef XGEMM_DEFAULT_UNROLL_M
1862 #define XGEMM_DEFAULT_UNROLL_M 2
1865 #ifndef XGEMM_DEFAULT_UNROLL_N
1866 #define XGEMM_DEFAULT_UNROLL_N 2
1870 #define SHUFPD_0 shufps $0x44,
1871 #define SHUFPD_1 shufps $0x4e,
1872 #define SHUFPD_2 shufps $0xe4,
1873 #define SHUFPD_3 shufps $0xee,
1877 #define SHUFPD_0 shufpd $0,
1881 #define SHUFPD_1 shufpd $1,
1885 #define SHUFPD_2 shufpd $2,
1889 #define SHUFPD_3 shufpd $3,
1893 #define SHUFPS_39 shufps $0x39,