1 /*****************************************************************************
2 Copyright (c) 2011,2012 Lab of Parallel Software and Computational Science,ISCAS
5 Redistribution and use in source and binary forms, with or without
6 modification, are permitted provided that the following conditions are
9 1. Redistributions of source code must retain the above copyright
10 notice, this list of conditions and the following disclaimer.
12 2. Redistributions in binary form must reproduce the above copyright
13 notice, this list of conditions and the following disclaimer in
14 the documentation and/or other materials provided with the
16 3. Neither the name of the ISCAS nor the names of its contributors may
17 be used to endorse or promote products derived from this software
18 without specific prior written permission.
20 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
29 USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 **********************************************************************************/
33 /*********************************************************************/
34 /* Copyright 2009, 2010 The University of Texas at Austin. */
35 /* All rights reserved. */
37 /* Redistribution and use in source and binary forms, with or */
38 /* without modification, are permitted provided that the following */
39 /* conditions are met: */
41 /* 1. Redistributions of source code must retain the above */
42 /* copyright notice, this list of conditions and the following */
45 /* 2. Redistributions in binary form must reproduce the above */
46 /* copyright notice, this list of conditions and the following */
47 /* disclaimer in the documentation and/or other materials */
48 /* provided with the distribution. */
50 /* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
51 /* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
52 /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
53 /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
54 /* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
55 /* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
56 /* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
57 /* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
58 /* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
59 /* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
60 /* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
61 /* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
62 /* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
63 /* POSSIBILITY OF SUCH DAMAGE. */
65 /* The views and conclusions contained in the software and */
66 /* documentation are those of the authors and should not be */
67 /* interpreted as representing official policies, either expressed */
68 /* or implied, of The University of Texas at Austin. */
69 /*********************************************************************/
79 #define GEMM_DEFAULT_OFFSET_A 64
80 #define GEMM_DEFAULT_OFFSET_B 256
81 #define GEMM_DEFAULT_ALIGN 0x01ffffUL
83 #define SGEMM_DEFAULT_UNROLL_N 4
84 #define DGEMM_DEFAULT_UNROLL_N 4
85 #define QGEMM_DEFAULT_UNROLL_N 2
86 #define CGEMM_DEFAULT_UNROLL_N 2
87 #define ZGEMM_DEFAULT_UNROLL_N 2
88 #define XGEMM_DEFAULT_UNROLL_N 1
91 #define SGEMM_DEFAULT_UNROLL_M 4
92 #define DGEMM_DEFAULT_UNROLL_M 2
93 #define QGEMM_DEFAULT_UNROLL_M 2
94 #define CGEMM_DEFAULT_UNROLL_M 2
95 #define ZGEMM_DEFAULT_UNROLL_M 1
96 #define XGEMM_DEFAULT_UNROLL_M 1
98 #define SGEMM_DEFAULT_UNROLL_M 8
99 #define DGEMM_DEFAULT_UNROLL_M 4
100 #define QGEMM_DEFAULT_UNROLL_M 2
101 #define CGEMM_DEFAULT_UNROLL_M 4
102 #define ZGEMM_DEFAULT_UNROLL_M 2
103 #define XGEMM_DEFAULT_UNROLL_M 1
106 #define SGEMM_DEFAULT_P sgemm_p
107 #define DGEMM_DEFAULT_P dgemm_p
108 #define QGEMM_DEFAULT_P qgemm_p
109 #define CGEMM_DEFAULT_P cgemm_p
110 #define ZGEMM_DEFAULT_P zgemm_p
111 #define XGEMM_DEFAULT_P xgemm_p
113 #define SGEMM_DEFAULT_R sgemm_r
114 #define DGEMM_DEFAULT_R dgemm_r
115 #define QGEMM_DEFAULT_R qgemm_r
116 #define CGEMM_DEFAULT_R cgemm_r
117 #define ZGEMM_DEFAULT_R zgemm_r
118 #define XGEMM_DEFAULT_R xgemm_r
122 #define SGEMM_DEFAULT_Q 248
123 #define DGEMM_DEFAULT_Q 248
124 #define QGEMM_DEFAULT_Q 248
125 #define CGEMM_DEFAULT_Q 248
126 #define ZGEMM_DEFAULT_Q 248
127 #define XGEMM_DEFAULT_Q 248
131 #define SGEMM_DEFAULT_Q 240
132 #define DGEMM_DEFAULT_Q 240
133 #define QGEMM_DEFAULT_Q 240
134 #define CGEMM_DEFAULT_Q 240
135 #define ZGEMM_DEFAULT_Q 240
136 #define XGEMM_DEFAULT_Q 240
142 #define HAVE_EXCLUSIVE_CACHE
146 #if defined(BARCELONA) || defined(SHANGHAI) || defined(BOBCAT)
151 #define GEMM_DEFAULT_OFFSET_A 64
152 #define GEMM_DEFAULT_OFFSET_B 832
153 #define GEMM_DEFAULT_ALIGN 0x0fffUL
155 #define SGEMM_DEFAULT_UNROLL_N 4
156 #define DGEMM_DEFAULT_UNROLL_N 4
157 #define QGEMM_DEFAULT_UNROLL_N 2
158 #define CGEMM_DEFAULT_UNROLL_N 2
159 #define ZGEMM_DEFAULT_UNROLL_N 2
160 #define XGEMM_DEFAULT_UNROLL_N 1
163 #define SGEMM_DEFAULT_UNROLL_M 4
164 #define DGEMM_DEFAULT_UNROLL_M 2
165 #define QGEMM_DEFAULT_UNROLL_M 2
166 #define CGEMM_DEFAULT_UNROLL_M 2
167 #define ZGEMM_DEFAULT_UNROLL_M 1
168 #define XGEMM_DEFAULT_UNROLL_M 1
170 #define SGEMM_DEFAULT_UNROLL_M 8
171 #define DGEMM_DEFAULT_UNROLL_M 4
172 #define QGEMM_DEFAULT_UNROLL_M 2
173 #define CGEMM_DEFAULT_UNROLL_M 4
174 #define ZGEMM_DEFAULT_UNROLL_M 2
175 #define XGEMM_DEFAULT_UNROLL_M 1
179 #define SGEMM_DEFAULT_P 496
180 #define DGEMM_DEFAULT_P 248
181 #define QGEMM_DEFAULT_P 124
182 #define CGEMM_DEFAULT_P 248
183 #define ZGEMM_DEFAULT_P 124
184 #define XGEMM_DEFAULT_P 62
186 #define SGEMM_DEFAULT_Q 248
187 #define DGEMM_DEFAULT_Q 248
188 #define QGEMM_DEFAULT_Q 248
189 #define CGEMM_DEFAULT_Q 248
190 #define ZGEMM_DEFAULT_Q 248
191 #define XGEMM_DEFAULT_Q 248
195 #define SGEMM_DEFAULT_P 448
196 #define DGEMM_DEFAULT_P 224
197 #define QGEMM_DEFAULT_P 112
198 #define CGEMM_DEFAULT_P 224
199 #define ZGEMM_DEFAULT_P 112
200 #define XGEMM_DEFAULT_P 56
202 #define SGEMM_DEFAULT_Q 224
203 #define DGEMM_DEFAULT_Q 224
204 #define QGEMM_DEFAULT_Q 224
205 #define CGEMM_DEFAULT_Q 224
206 #define ZGEMM_DEFAULT_Q 224
207 #define XGEMM_DEFAULT_Q 224
211 #define SGEMM_DEFAULT_R sgemm_r
212 #define QGEMM_DEFAULT_R qgemm_r
213 #define DGEMM_DEFAULT_R dgemm_r
214 #define CGEMM_DEFAULT_R cgemm_r
215 #define ZGEMM_DEFAULT_R zgemm_r
216 #define XGEMM_DEFAULT_R xgemm_r
219 #define HAVE_EXCLUSIVE_CACHE
221 #define GEMM_THREAD gemm_thread_mn
231 #define GEMM_DEFAULT_OFFSET_A 64
232 #define GEMM_DEFAULT_OFFSET_B 832
233 #define GEMM_DEFAULT_ALIGN 0x0fffUL
237 #define QGEMM_DEFAULT_UNROLL_N 2
238 #define CGEMM_DEFAULT_UNROLL_N 2
239 #define ZGEMM_DEFAULT_UNROLL_N 2
240 #define XGEMM_DEFAULT_UNROLL_N 1
243 #define SGEMM_DEFAULT_UNROLL_N 4
244 #define DGEMM_DEFAULT_UNROLL_N 4
245 #define SGEMM_DEFAULT_UNROLL_M 4
246 #define DGEMM_DEFAULT_UNROLL_M 2
247 #define QGEMM_DEFAULT_UNROLL_M 2
248 #define CGEMM_DEFAULT_UNROLL_M 2
249 #define ZGEMM_DEFAULT_UNROLL_M 1
250 #define XGEMM_DEFAULT_UNROLL_M 1
252 #define SGEMM_DEFAULT_UNROLL_N 2
253 #define DGEMM_DEFAULT_UNROLL_N 2
254 #define SGEMM_DEFAULT_UNROLL_M 16
255 #define DGEMM_DEFAULT_UNROLL_M 8
256 #define QGEMM_DEFAULT_UNROLL_M 2
257 #define CGEMM_DEFAULT_UNROLL_M 4
258 #define ZGEMM_DEFAULT_UNROLL_M 2
259 #define XGEMM_DEFAULT_UNROLL_M 1
260 #define CGEMM3M_DEFAULT_UNROLL_N 4
261 #define CGEMM3M_DEFAULT_UNROLL_M 8
262 #define ZGEMM3M_DEFAULT_UNROLL_N 4
263 #define ZGEMM3M_DEFAULT_UNROLL_M 4
264 #define GEMV_UNROLL 8
268 #if defined(ARCH_X86_64)
269 #define SGEMM_DEFAULT_P 768
270 #define DGEMM_DEFAULT_P 384
272 #define SGEMM_DEFAULT_P 448
273 #define DGEMM_DEFAULT_P 224
275 #define QGEMM_DEFAULT_P 112
276 #define CGEMM_DEFAULT_P 224
277 #define ZGEMM_DEFAULT_P 112
278 #define XGEMM_DEFAULT_P 56
280 #if defined(ARCH_X86_64)
281 #define SGEMM_DEFAULT_Q 168
282 #define DGEMM_DEFAULT_Q 168
284 #define SGEMM_DEFAULT_Q 224
285 #define DGEMM_DEFAULT_Q 224
287 #define QGEMM_DEFAULT_Q 224
288 #define CGEMM_DEFAULT_Q 224
289 #define ZGEMM_DEFAULT_Q 224
290 #define XGEMM_DEFAULT_Q 224
292 #define SGEMM_DEFAULT_R sgemm_r
293 #define QGEMM_DEFAULT_R qgemm_r
294 #define DGEMM_DEFAULT_R dgemm_r
295 #define CGEMM_DEFAULT_R cgemm_r
296 #define ZGEMM_DEFAULT_R zgemm_r
297 #define XGEMM_DEFAULT_R xgemm_r
300 #define HAVE_EXCLUSIVE_CACHE
302 #define GEMM_THREAD gemm_thread_mn
311 #define GEMM_DEFAULT_OFFSET_A 0
312 #define GEMM_DEFAULT_OFFSET_B 384
313 #define GEMM_DEFAULT_ALIGN 0x0ffffUL
315 #define SGEMM_DEFAULT_UNROLL_N 4
316 #define DGEMM_DEFAULT_UNROLL_N 4
317 #define QGEMM_DEFAULT_UNROLL_N 2
318 #define CGEMM_DEFAULT_UNROLL_N 2
319 #define ZGEMM_DEFAULT_UNROLL_N 2
320 #define XGEMM_DEFAULT_UNROLL_N 1
322 #define SGEMM_DEFAULT_UNROLL_M 2
323 #define DGEMM_DEFAULT_UNROLL_M 1
324 #define QGEMM_DEFAULT_UNROLL_M 2
325 #define CGEMM_DEFAULT_UNROLL_M 1
326 #define ZGEMM_DEFAULT_UNROLL_M 1
327 #define XGEMM_DEFAULT_UNROLL_M 1
329 #define SGEMM_DEFAULT_R sgemm_r
330 #define DGEMM_DEFAULT_R dgemm_r
331 #define QGEMM_DEFAULT_R qgemm_r
332 #define CGEMM_DEFAULT_R cgemm_r
333 #define ZGEMM_DEFAULT_R zgemm_r
334 #define XGEMM_DEFAULT_R xgemm_r
336 #define SGEMM_DEFAULT_P 208
337 #define DGEMM_DEFAULT_P 104
338 #define QGEMM_DEFAULT_P 56
339 #define CGEMM_DEFAULT_P 104
340 #define ZGEMM_DEFAULT_P 56
341 #define XGEMM_DEFAULT_P 28
343 #define SGEMM_DEFAULT_Q 208
344 #define DGEMM_DEFAULT_Q 208
345 #define QGEMM_DEFAULT_Q 208
346 #define CGEMM_DEFAULT_Q 208
347 #define ZGEMM_DEFAULT_Q 208
348 #define XGEMM_DEFAULT_Q 208
351 #define HAVE_EXCLUSIVE_CACHE
359 #define GEMM_DEFAULT_OFFSET_A 0
360 #define GEMM_DEFAULT_OFFSET_B 256
361 #define GEMM_DEFAULT_ALIGN 0x0ffffUL
363 #define SGEMM_DEFAULT_UNROLL_N 4
364 #define DGEMM_DEFAULT_UNROLL_N 4
365 #define QGEMM_DEFAULT_UNROLL_N 2
366 #define CGEMM_DEFAULT_UNROLL_N 2
367 #define ZGEMM_DEFAULT_UNROLL_N 2
368 #define XGEMM_DEFAULT_UNROLL_N 1
370 #define SGEMM_DEFAULT_UNROLL_M 2
371 #define DGEMM_DEFAULT_UNROLL_M 1
372 #define QGEMM_DEFAULT_UNROLL_M 2
373 #define CGEMM_DEFAULT_UNROLL_M 1
374 #define ZGEMM_DEFAULT_UNROLL_M 1
375 #define XGEMM_DEFAULT_UNROLL_M 1
377 #define SGEMM_DEFAULT_R sgemm_r
378 #define DGEMM_DEFAULT_R dgemm_r
379 #define QGEMM_DEFAULT_R qgemm_r
380 #define CGEMM_DEFAULT_R cgemm_r
381 #define ZGEMM_DEFAULT_R zgemm_r
382 #define XGEMM_DEFAULT_R xgemm_r
384 #define SGEMM_DEFAULT_P 128
385 #define DGEMM_DEFAULT_P 128
386 #define QGEMM_DEFAULT_P 128
387 #define CGEMM_DEFAULT_P 128
388 #define ZGEMM_DEFAULT_P 128
389 #define XGEMM_DEFAULT_P 128
391 #define SGEMM_DEFAULT_Q 512
392 #define DGEMM_DEFAULT_Q 256
393 #define QGEMM_DEFAULT_Q 256
394 #define CGEMM_DEFAULT_Q 256
395 #define ZGEMM_DEFAULT_Q 128
396 #define XGEMM_DEFAULT_Q 128
406 #define GEMM_DEFAULT_OFFSET_A 64
407 #define GEMM_DEFAULT_OFFSET_B 256
408 #define GEMM_DEFAULT_ALIGN 0x01ffffUL
411 #define SGEMM_DEFAULT_UNROLL_N 4
412 #define DGEMM_DEFAULT_UNROLL_N 4
413 #define QGEMM_DEFAULT_UNROLL_N 2
414 #define CGEMM_DEFAULT_UNROLL_N 2
415 #define ZGEMM_DEFAULT_UNROLL_N 2
416 #define XGEMM_DEFAULT_UNROLL_N 1
418 #define SGEMM_DEFAULT_UNROLL_M 4
419 #define DGEMM_DEFAULT_UNROLL_M 2
420 #define QGEMM_DEFAULT_UNROLL_M 2
421 #define CGEMM_DEFAULT_UNROLL_M 2
422 #define ZGEMM_DEFAULT_UNROLL_M 1
423 #define XGEMM_DEFAULT_UNROLL_M 1
425 #define SGEMM_DEFAULT_UNROLL_N 8
426 #define DGEMM_DEFAULT_UNROLL_N 4
427 #define QGEMM_DEFAULT_UNROLL_N 2
428 #define CGEMM_DEFAULT_UNROLL_N 4
429 #define ZGEMM_DEFAULT_UNROLL_N 2
430 #define XGEMM_DEFAULT_UNROLL_N 1
432 #define SGEMM_DEFAULT_UNROLL_M 4
433 #define DGEMM_DEFAULT_UNROLL_M 4
434 #define QGEMM_DEFAULT_UNROLL_M 2
435 #define CGEMM_DEFAULT_UNROLL_M 2
436 #define ZGEMM_DEFAULT_UNROLL_M 2
437 #define XGEMM_DEFAULT_UNROLL_M 1
440 #define SGEMM_DEFAULT_P 288
441 #define DGEMM_DEFAULT_P 288
442 #define QGEMM_DEFAULT_P 288
443 #define CGEMM_DEFAULT_P 288
444 #define ZGEMM_DEFAULT_P 288
445 #define XGEMM_DEFAULT_P 288
447 #define SGEMM_DEFAULT_R sgemm_r
448 #define DGEMM_DEFAULT_R dgemm_r
449 #define QGEMM_DEFAULT_R qgemm_r
450 #define CGEMM_DEFAULT_R cgemm_r
451 #define ZGEMM_DEFAULT_R zgemm_r
452 #define XGEMM_DEFAULT_R xgemm_r
454 #define SGEMM_DEFAULT_Q 256
455 #define DGEMM_DEFAULT_Q 128
456 #define QGEMM_DEFAULT_Q 64
457 #define CGEMM_DEFAULT_Q 128
458 #define ZGEMM_DEFAULT_Q 64
459 #define XGEMM_DEFAULT_Q 32
462 #define HAVE_EXCLUSIVE_CACHE
466 #if defined(PENTIUM) || defined(PENTIUM2) || defined(PENTIUM3)
475 #define GEMM_DEFAULT_OFFSET_A 0
476 #define GEMM_DEFAULT_OFFSET_B 0
477 #define GEMM_DEFAULT_ALIGN 0x0ffffUL
480 #define SGEMM_DEFAULT_UNROLL_M 8
481 #define CGEMM_DEFAULT_UNROLL_M 4
483 #define SGEMM_DEFAULT_UNROLL_M 4
484 #define CGEMM_DEFAULT_UNROLL_M 2
486 #define DGEMM_DEFAULT_UNROLL_M 2
487 #define SGEMM_DEFAULT_UNROLL_N 2
488 #define DGEMM_DEFAULT_UNROLL_N 2
489 #define QGEMM_DEFAULT_UNROLL_M 2
490 #define QGEMM_DEFAULT_UNROLL_N 2
491 #define CGEMM_DEFAULT_UNROLL_N 1
492 #define ZGEMM_DEFAULT_UNROLL_M 1
493 #define ZGEMM_DEFAULT_UNROLL_N 1
494 #define XGEMM_DEFAULT_UNROLL_M 1
495 #define XGEMM_DEFAULT_UNROLL_N 1
497 #define SGEMM_DEFAULT_P sgemm_p
498 #define SGEMM_DEFAULT_Q 256
499 #define SGEMM_DEFAULT_R sgemm_r
501 #define DGEMM_DEFAULT_P dgemm_p
502 #define DGEMM_DEFAULT_Q 256
503 #define DGEMM_DEFAULT_R dgemm_r
505 #define QGEMM_DEFAULT_P qgemm_p
506 #define QGEMM_DEFAULT_Q 256
507 #define QGEMM_DEFAULT_R qgemm_r
509 #define CGEMM_DEFAULT_P cgemm_p
510 #define CGEMM_DEFAULT_Q 256
511 #define CGEMM_DEFAULT_R cgemm_r
513 #define ZGEMM_DEFAULT_P zgemm_p
514 #define ZGEMM_DEFAULT_Q 256
515 #define ZGEMM_DEFAULT_R zgemm_r
517 #define XGEMM_DEFAULT_P xgemm_p
518 #define XGEMM_DEFAULT_Q 256
519 #define XGEMM_DEFAULT_R xgemm_r
530 #define GEMM_DEFAULT_OFFSET_A 0
531 #define GEMM_DEFAULT_OFFSET_B 0
532 #define GEMM_DEFAULT_ALIGN 0x0ffffUL
535 #define SGEMM_DEFAULT_UNROLL_M 4
536 #define SGEMM_DEFAULT_UNROLL_N 4
537 #define DGEMM_DEFAULT_UNROLL_M 2
538 #define DGEMM_DEFAULT_UNROLL_N 4
539 #define QGEMM_DEFAULT_UNROLL_M 2
540 #define QGEMM_DEFAULT_UNROLL_N 2
541 #define CGEMM_DEFAULT_UNROLL_M 2
542 #define CGEMM_DEFAULT_UNROLL_N 2
543 #define ZGEMM_DEFAULT_UNROLL_M 1
544 #define ZGEMM_DEFAULT_UNROLL_N 2
545 #define XGEMM_DEFAULT_UNROLL_M 1
546 #define XGEMM_DEFAULT_UNROLL_N 1
548 #define SGEMM_DEFAULT_UNROLL_M 8
549 #define SGEMM_DEFAULT_UNROLL_N 2
550 #define DGEMM_DEFAULT_UNROLL_M 2
551 #define DGEMM_DEFAULT_UNROLL_N 2
552 #define QGEMM_DEFAULT_UNROLL_M 2
553 #define QGEMM_DEFAULT_UNROLL_N 2
554 #define CGEMM_DEFAULT_UNROLL_M 4
555 #define CGEMM_DEFAULT_UNROLL_N 1
556 #define ZGEMM_DEFAULT_UNROLL_M 1
557 #define ZGEMM_DEFAULT_UNROLL_N 1
558 #define XGEMM_DEFAULT_UNROLL_M 1
559 #define XGEMM_DEFAULT_UNROLL_N 1
563 #define SGEMM_DEFAULT_P sgemm_p
564 #define SGEMM_DEFAULT_Q 256
565 #define SGEMM_DEFAULT_R sgemm_r
567 #define DGEMM_DEFAULT_P dgemm_p
568 #define DGEMM_DEFAULT_Q 256
569 #define DGEMM_DEFAULT_R dgemm_r
571 #define QGEMM_DEFAULT_P qgemm_p
572 #define QGEMM_DEFAULT_Q 256
573 #define QGEMM_DEFAULT_R qgemm_r
575 #define CGEMM_DEFAULT_P cgemm_p
576 #define CGEMM_DEFAULT_Q 256
577 #define CGEMM_DEFAULT_R cgemm_r
579 #define ZGEMM_DEFAULT_P zgemm_p
580 #define ZGEMM_DEFAULT_Q 256
581 #define ZGEMM_DEFAULT_R zgemm_r
583 #define XGEMM_DEFAULT_P xgemm_p
584 #define XGEMM_DEFAULT_Q 256
585 #define XGEMM_DEFAULT_R xgemm_r
590 #ifdef CORE_NORTHWOOD
595 #define GEMM_DEFAULT_OFFSET_A 0
596 #define GEMM_DEFAULT_OFFSET_B 32
598 #define GEMM_DEFAULT_ALIGN 0x0ffffUL
602 #define SGEMM_DEFAULT_UNROLL_M 8
603 #define DGEMM_DEFAULT_UNROLL_M 4
604 #define QGEMM_DEFAULT_UNROLL_M 2
605 #define CGEMM_DEFAULT_UNROLL_M 4
606 #define ZGEMM_DEFAULT_UNROLL_M 2
607 #define XGEMM_DEFAULT_UNROLL_M 1
609 #define SGEMM_DEFAULT_UNROLL_N 2
610 #define DGEMM_DEFAULT_UNROLL_N 2
611 #define QGEMM_DEFAULT_UNROLL_N 2
612 #define CGEMM_DEFAULT_UNROLL_N 1
613 #define ZGEMM_DEFAULT_UNROLL_N 1
614 #define XGEMM_DEFAULT_UNROLL_N 1
616 #define SGEMM_DEFAULT_P sgemm_p
617 #define SGEMM_DEFAULT_R sgemm_r
619 #define DGEMM_DEFAULT_P dgemm_p
620 #define DGEMM_DEFAULT_R dgemm_r
622 #define QGEMM_DEFAULT_P qgemm_p
623 #define QGEMM_DEFAULT_R qgemm_r
625 #define CGEMM_DEFAULT_P cgemm_p
626 #define CGEMM_DEFAULT_R cgemm_r
628 #define ZGEMM_DEFAULT_P zgemm_p
629 #define ZGEMM_DEFAULT_R zgemm_r
631 #define XGEMM_DEFAULT_P xgemm_p
632 #define XGEMM_DEFAULT_R xgemm_r
634 #define SGEMM_DEFAULT_Q 128
635 #define DGEMM_DEFAULT_Q 128
636 #define QGEMM_DEFAULT_Q 128
637 #define CGEMM_DEFAULT_Q 128
638 #define ZGEMM_DEFAULT_Q 128
639 #define XGEMM_DEFAULT_Q 128
648 #define GEMM_DEFAULT_OFFSET_A 128
649 #define GEMM_DEFAULT_OFFSET_B 192
651 #define GEMM_DEFAULT_OFFSET_A 0
652 #define GEMM_DEFAULT_OFFSET_B 256
655 #define GEMM_DEFAULT_ALIGN 0x0ffffUL
660 #define SGEMM_DEFAULT_UNROLL_M 4
661 #define DGEMM_DEFAULT_UNROLL_M 2
662 #define QGEMM_DEFAULT_UNROLL_M 2
663 #define CGEMM_DEFAULT_UNROLL_M 2
664 #define ZGEMM_DEFAULT_UNROLL_M 1
665 #define XGEMM_DEFAULT_UNROLL_M 1
667 #define SGEMM_DEFAULT_UNROLL_M 8
668 #define DGEMM_DEFAULT_UNROLL_M 4
669 #define QGEMM_DEFAULT_UNROLL_M 2
670 #define CGEMM_DEFAULT_UNROLL_M 4
671 #define ZGEMM_DEFAULT_UNROLL_M 2
672 #define XGEMM_DEFAULT_UNROLL_M 1
675 #define SGEMM_DEFAULT_UNROLL_N 4
676 #define DGEMM_DEFAULT_UNROLL_N 4
677 #define QGEMM_DEFAULT_UNROLL_N 2
678 #define CGEMM_DEFAULT_UNROLL_N 2
679 #define ZGEMM_DEFAULT_UNROLL_N 2
680 #define XGEMM_DEFAULT_UNROLL_N 1
682 #define SGEMM_DEFAULT_P sgemm_p
683 #define SGEMM_DEFAULT_R sgemm_r
685 #define DGEMM_DEFAULT_P dgemm_p
686 #define DGEMM_DEFAULT_R dgemm_r
688 #define QGEMM_DEFAULT_P qgemm_p
689 #define QGEMM_DEFAULT_R qgemm_r
691 #define CGEMM_DEFAULT_P cgemm_p
692 #define CGEMM_DEFAULT_R cgemm_r
694 #define ZGEMM_DEFAULT_P zgemm_p
695 #define ZGEMM_DEFAULT_R zgemm_r
697 #define XGEMM_DEFAULT_P xgemm_p
698 #define XGEMM_DEFAULT_R xgemm_r
700 #define SGEMM_DEFAULT_Q 128
701 #define DGEMM_DEFAULT_Q 128
702 #define QGEMM_DEFAULT_Q 128
703 #define CGEMM_DEFAULT_Q 128
704 #define ZGEMM_DEFAULT_Q 128
705 #define XGEMM_DEFAULT_Q 128
713 #define GEMM_DEFAULT_OFFSET_A 448
714 #define GEMM_DEFAULT_OFFSET_B 128
715 #define GEMM_DEFAULT_ALIGN 0x03fffUL
719 #define SWITCH_RATIO 4
722 #define SGEMM_DEFAULT_UNROLL_M 8
723 #define DGEMM_DEFAULT_UNROLL_M 4
724 #define QGEMM_DEFAULT_UNROLL_M 2
725 #define CGEMM_DEFAULT_UNROLL_M 4
726 #define ZGEMM_DEFAULT_UNROLL_M 2
727 #define XGEMM_DEFAULT_UNROLL_M 1
729 #define SGEMM_DEFAULT_UNROLL_N 2
730 #define DGEMM_DEFAULT_UNROLL_N 2
731 #define QGEMM_DEFAULT_UNROLL_N 2
732 #define CGEMM_DEFAULT_UNROLL_N 1
733 #define ZGEMM_DEFAULT_UNROLL_N 1
734 #define XGEMM_DEFAULT_UNROLL_N 1
736 #define MASK(a, b) ((((a) + (b) - 1) / (b)) * (b))
739 #define SGEMM_DEFAULT_UNROLL_M 8
740 #define DGEMM_DEFAULT_UNROLL_M 4
741 #define QGEMM_DEFAULT_UNROLL_M 2
742 #define CGEMM_DEFAULT_UNROLL_M 4
743 #define ZGEMM_DEFAULT_UNROLL_M 2
744 #define XGEMM_DEFAULT_UNROLL_M 1
746 #define SGEMM_DEFAULT_UNROLL_N 4
747 #define DGEMM_DEFAULT_UNROLL_N 4
748 #define QGEMM_DEFAULT_UNROLL_N 2
749 #define CGEMM_DEFAULT_UNROLL_N 2
750 #define ZGEMM_DEFAULT_UNROLL_N 2
751 #define XGEMM_DEFAULT_UNROLL_N 1
754 #define SGEMM_DEFAULT_P sgemm_p
755 #define SGEMM_DEFAULT_R sgemm_r
757 #define DGEMM_DEFAULT_P dgemm_p
758 #define DGEMM_DEFAULT_R dgemm_r
760 #define QGEMM_DEFAULT_P qgemm_p
761 #define QGEMM_DEFAULT_R qgemm_r
763 #define CGEMM_DEFAULT_P cgemm_p
764 #define CGEMM_DEFAULT_R cgemm_r
766 #define ZGEMM_DEFAULT_P zgemm_p
767 #define ZGEMM_DEFAULT_R zgemm_r
769 #define XGEMM_DEFAULT_P xgemm_p
770 #define XGEMM_DEFAULT_R xgemm_r
772 #define SGEMM_DEFAULT_Q 256
773 #define DGEMM_DEFAULT_Q 256
774 #define QGEMM_DEFAULT_Q 256
775 #define CGEMM_DEFAULT_Q 256
776 #define ZGEMM_DEFAULT_Q 256
777 #define XGEMM_DEFAULT_Q 256
786 #define GEMM_DEFAULT_OFFSET_A 128
787 #define GEMM_DEFAULT_OFFSET_B 0
788 #define GEMM_DEFAULT_ALIGN 0x03fffUL
792 #define SWITCH_RATIO 4
795 #define SGEMM_DEFAULT_UNROLL_M 4
796 #define DGEMM_DEFAULT_UNROLL_M 2
797 #define QGEMM_DEFAULT_UNROLL_M 2
798 #define CGEMM_DEFAULT_UNROLL_M 2
799 #define ZGEMM_DEFAULT_UNROLL_M 1
800 #define XGEMM_DEFAULT_UNROLL_M 1
802 #define SGEMM_DEFAULT_UNROLL_N 4
803 #define DGEMM_DEFAULT_UNROLL_N 4
804 #define QGEMM_DEFAULT_UNROLL_N 2
805 #define CGEMM_DEFAULT_UNROLL_N 2
806 #define ZGEMM_DEFAULT_UNROLL_N 2
807 #define XGEMM_DEFAULT_UNROLL_N 1
809 #define SGEMM_DEFAULT_UNROLL_M 8
810 #define DGEMM_DEFAULT_UNROLL_M 4
811 #define QGEMM_DEFAULT_UNROLL_M 2
812 #define CGEMM_DEFAULT_UNROLL_M 4
813 #define ZGEMM_DEFAULT_UNROLL_M 2
814 #define XGEMM_DEFAULT_UNROLL_M 1
816 #define SGEMM_DEFAULT_UNROLL_N 4
817 #define DGEMM_DEFAULT_UNROLL_N 4
818 #define QGEMM_DEFAULT_UNROLL_N 2
819 #define CGEMM_DEFAULT_UNROLL_N 2
820 #define ZGEMM_DEFAULT_UNROLL_N 2
821 #define XGEMM_DEFAULT_UNROLL_N 1
824 #define SGEMM_DEFAULT_P sgemm_p
825 #define SGEMM_DEFAULT_R sgemm_r
827 #define DGEMM_DEFAULT_P dgemm_p
828 #define DGEMM_DEFAULT_R dgemm_r
830 #define QGEMM_DEFAULT_P qgemm_p
831 #define QGEMM_DEFAULT_R qgemm_r
833 #define CGEMM_DEFAULT_P cgemm_p
834 #define CGEMM_DEFAULT_R cgemm_r
836 #define ZGEMM_DEFAULT_P zgemm_p
837 #define ZGEMM_DEFAULT_R zgemm_r
839 #define XGEMM_DEFAULT_P xgemm_p
840 #define XGEMM_DEFAULT_R xgemm_r
842 #define SGEMM_DEFAULT_Q 512
843 #define DGEMM_DEFAULT_Q 256
844 #define QGEMM_DEFAULT_Q 128
845 #define CGEMM_DEFAULT_Q 512
846 #define ZGEMM_DEFAULT_Q 256
847 #define XGEMM_DEFAULT_Q 128
849 #define GETRF_FACTOR 0.75
857 #define GEMM_DEFAULT_OFFSET_A 128
858 #define GEMM_DEFAULT_OFFSET_B 0
859 #define GEMM_DEFAULT_ALIGN 0x03fffUL
863 #define SWITCH_RATIO 4
866 #define SGEMM_DEFAULT_UNROLL_M 4
867 #define DGEMM_DEFAULT_UNROLL_M 2
868 #define QGEMM_DEFAULT_UNROLL_M 2
869 #define CGEMM_DEFAULT_UNROLL_M 2
870 #define ZGEMM_DEFAULT_UNROLL_M 1
871 #define XGEMM_DEFAULT_UNROLL_M 1
873 #define SGEMM_DEFAULT_UNROLL_N 4
874 #define DGEMM_DEFAULT_UNROLL_N 4
875 #define QGEMM_DEFAULT_UNROLL_N 2
876 #define CGEMM_DEFAULT_UNROLL_N 2
877 #define ZGEMM_DEFAULT_UNROLL_N 2
878 #define XGEMM_DEFAULT_UNROLL_N 1
880 #define SGEMM_DEFAULT_UNROLL_M 8
881 #define DGEMM_DEFAULT_UNROLL_M 4
882 #define QGEMM_DEFAULT_UNROLL_M 2
883 #define CGEMM_DEFAULT_UNROLL_M 4
884 #define ZGEMM_DEFAULT_UNROLL_M 2
885 #define XGEMM_DEFAULT_UNROLL_M 1
887 #define SGEMM_DEFAULT_UNROLL_N 4
888 #define DGEMM_DEFAULT_UNROLL_N 4
889 #define QGEMM_DEFAULT_UNROLL_N 2
890 #define CGEMM_DEFAULT_UNROLL_N 2
891 #define ZGEMM_DEFAULT_UNROLL_N 2
892 #define XGEMM_DEFAULT_UNROLL_N 1
895 #define SGEMM_DEFAULT_P sgemm_p
896 #define SGEMM_DEFAULT_R sgemm_r
898 #define DGEMM_DEFAULT_P dgemm_p
899 #define DGEMM_DEFAULT_R dgemm_r
901 #define QGEMM_DEFAULT_P qgemm_p
902 #define QGEMM_DEFAULT_R qgemm_r
904 #define CGEMM_DEFAULT_P cgemm_p
905 #define CGEMM_DEFAULT_R cgemm_r
907 #define ZGEMM_DEFAULT_P zgemm_p
908 #define ZGEMM_DEFAULT_R zgemm_r
910 #define XGEMM_DEFAULT_P xgemm_p
911 #define XGEMM_DEFAULT_R xgemm_r
913 #define SGEMM_DEFAULT_Q 768
914 #define DGEMM_DEFAULT_Q 384
915 #define QGEMM_DEFAULT_Q 192
916 #define CGEMM_DEFAULT_Q 768
917 #define ZGEMM_DEFAULT_Q 384
918 #define XGEMM_DEFAULT_Q 192
920 #define GETRF_FACTOR 0.75
921 #define GEMM_THREAD gemm_thread_mn
929 #define GEMM_DEFAULT_OFFSET_A 32
930 #define GEMM_DEFAULT_OFFSET_B 0
931 #define GEMM_DEFAULT_ALIGN 0x03fffUL
935 #define SWITCH_RATIO 4
938 #define SGEMM_DEFAULT_UNROLL_M 4
939 #define DGEMM_DEFAULT_UNROLL_M 2
940 #define QGEMM_DEFAULT_UNROLL_M 2
941 #define CGEMM_DEFAULT_UNROLL_M 2
942 #define ZGEMM_DEFAULT_UNROLL_M 1
943 #define XGEMM_DEFAULT_UNROLL_M 1
945 #define SGEMM_DEFAULT_UNROLL_N 4
946 #define DGEMM_DEFAULT_UNROLL_N 4
947 #define QGEMM_DEFAULT_UNROLL_N 2
948 #define CGEMM_DEFAULT_UNROLL_N 2
949 #define ZGEMM_DEFAULT_UNROLL_N 2
950 #define XGEMM_DEFAULT_UNROLL_N 1
952 #define SGEMM_DEFAULT_UNROLL_M 4
953 #define DGEMM_DEFAULT_UNROLL_M 2
954 #define QGEMM_DEFAULT_UNROLL_M 2
955 #define CGEMM_DEFAULT_UNROLL_M 2
956 #define ZGEMM_DEFAULT_UNROLL_M 1
957 #define XGEMM_DEFAULT_UNROLL_M 1
959 #define SGEMM_DEFAULT_UNROLL_N 8
960 #define DGEMM_DEFAULT_UNROLL_N 8
961 #define QGEMM_DEFAULT_UNROLL_N 2
962 #define CGEMM_DEFAULT_UNROLL_N 4
963 #define ZGEMM_DEFAULT_UNROLL_N 4
964 #define XGEMM_DEFAULT_UNROLL_N 1
967 #define SGEMM_DEFAULT_P 504
968 #define SGEMM_DEFAULT_R sgemm_r
970 #define DGEMM_DEFAULT_P 504
971 #define DGEMM_DEFAULT_R dgemm_r
973 #define QGEMM_DEFAULT_P 504
974 #define QGEMM_DEFAULT_R qgemm_r
976 #define CGEMM_DEFAULT_P 252
977 #define CGEMM_DEFAULT_R cgemm_r
979 #define ZGEMM_DEFAULT_P 252
980 #define ZGEMM_DEFAULT_R zgemm_r
982 #define XGEMM_DEFAULT_P 252
983 #define XGEMM_DEFAULT_R xgemm_r
985 #define SGEMM_DEFAULT_Q 512
986 #define DGEMM_DEFAULT_Q 256
987 #define QGEMM_DEFAULT_Q 128
988 #define CGEMM_DEFAULT_Q 512
989 #define ZGEMM_DEFAULT_Q 256
990 #define XGEMM_DEFAULT_Q 128
992 #define GETRF_FACTOR 0.72
1002 #define GEMM_DEFAULT_OFFSET_A 0
1003 #define GEMM_DEFAULT_OFFSET_B 0
1004 #define GEMM_DEFAULT_ALIGN 0x03fffUL
1008 #define SWITCH_RATIO 4
1011 #define SGEMM_DEFAULT_UNROLL_M 4
1012 #define DGEMM_DEFAULT_UNROLL_M 2
1013 #define QGEMM_DEFAULT_UNROLL_M 2
1014 #define CGEMM_DEFAULT_UNROLL_M 2
1015 #define ZGEMM_DEFAULT_UNROLL_M 1
1016 #define XGEMM_DEFAULT_UNROLL_M 1
1018 #define SGEMM_DEFAULT_UNROLL_N 4
1019 #define DGEMM_DEFAULT_UNROLL_N 4
1020 #define QGEMM_DEFAULT_UNROLL_N 2
1021 #define CGEMM_DEFAULT_UNROLL_N 2
1022 #define ZGEMM_DEFAULT_UNROLL_N 2
1023 #define XGEMM_DEFAULT_UNROLL_N 1
1025 #define SGEMM_DEFAULT_UNROLL_M 8
1026 #define DGEMM_DEFAULT_UNROLL_M 8
1027 #define QGEMM_DEFAULT_UNROLL_M 2
1028 #define CGEMM_DEFAULT_UNROLL_M 8
1029 #define ZGEMM_DEFAULT_UNROLL_M 4
1030 #define XGEMM_DEFAULT_UNROLL_M 1
1032 #define SGEMM_DEFAULT_UNROLL_N 8
1033 #define DGEMM_DEFAULT_UNROLL_N 4
1034 #define QGEMM_DEFAULT_UNROLL_N 2
1035 #define CGEMM_DEFAULT_UNROLL_N 4
1036 #define ZGEMM_DEFAULT_UNROLL_N 4
1037 #define XGEMM_DEFAULT_UNROLL_N 1
1040 #define SGEMM_DEFAULT_P 512
1041 #define SGEMM_DEFAULT_R sgemm_r
1042 //#define SGEMM_DEFAULT_R 1024
1044 #define DGEMM_DEFAULT_P 512
1045 #define DGEMM_DEFAULT_R dgemm_r
1046 //#define DGEMM_DEFAULT_R 1024
1048 #define QGEMM_DEFAULT_P 504
1049 #define QGEMM_DEFAULT_R qgemm_r
1051 #define CGEMM_DEFAULT_P 128
1052 //#define CGEMM_DEFAULT_R cgemm_r
1053 #define CGEMM_DEFAULT_R 1024
1055 #define ZGEMM_DEFAULT_P 512
1056 #define ZGEMM_DEFAULT_R zgemm_r
1057 //#define ZGEMM_DEFAULT_R 1024
1059 #define XGEMM_DEFAULT_P 252
1060 #define XGEMM_DEFAULT_R xgemm_r
1062 #define SGEMM_DEFAULT_Q 256
1063 #define DGEMM_DEFAULT_Q 256
1064 #define QGEMM_DEFAULT_Q 128
1065 #define CGEMM_DEFAULT_Q 256
1066 #define ZGEMM_DEFAULT_Q 192
1067 #define XGEMM_DEFAULT_Q 128
1069 #define GETRF_FACTOR 0.72
1080 #define GEMM_DEFAULT_OFFSET_A 64
1081 #define GEMM_DEFAULT_OFFSET_B 0
1082 #define GEMM_DEFAULT_ALIGN 0x0ffffUL
1087 #define SGEMM_DEFAULT_UNROLL_M 4
1088 #define DGEMM_DEFAULT_UNROLL_M 2
1089 #define QGEMM_DEFAULT_UNROLL_M 2
1090 #define CGEMM_DEFAULT_UNROLL_M 2
1091 #define ZGEMM_DEFAULT_UNROLL_M 1
1092 #define XGEMM_DEFAULT_UNROLL_M 1
1094 #define SGEMM_DEFAULT_UNROLL_M 8
1095 #define DGEMM_DEFAULT_UNROLL_M 4
1096 #define QGEMM_DEFAULT_UNROLL_M 2
1097 #define CGEMM_DEFAULT_UNROLL_M 4
1098 #define ZGEMM_DEFAULT_UNROLL_M 2
1099 #define XGEMM_DEFAULT_UNROLL_M 1
1102 #define SGEMM_DEFAULT_UNROLL_N 4
1103 #define DGEMM_DEFAULT_UNROLL_N 2
1104 #define QGEMM_DEFAULT_UNROLL_N 2
1105 #define CGEMM_DEFAULT_UNROLL_N 2
1106 #define ZGEMM_DEFAULT_UNROLL_N 1
1107 #define XGEMM_DEFAULT_UNROLL_N 1
1109 #define SGEMM_DEFAULT_P sgemm_p
1110 #define SGEMM_DEFAULT_R sgemm_r
1112 #define DGEMM_DEFAULT_P dgemm_p
1113 #define DGEMM_DEFAULT_R dgemm_r
1115 #define QGEMM_DEFAULT_P qgemm_p
1116 #define QGEMM_DEFAULT_R qgemm_r
1118 #define CGEMM_DEFAULT_P cgemm_p
1119 #define CGEMM_DEFAULT_R cgemm_r
1121 #define ZGEMM_DEFAULT_P zgemm_p
1122 #define ZGEMM_DEFAULT_R zgemm_r
1124 #define XGEMM_DEFAULT_P xgemm_p
1125 #define XGEMM_DEFAULT_R xgemm_r
1127 #define SGEMM_DEFAULT_Q 256
1128 #define DGEMM_DEFAULT_Q 256
1129 #define QGEMM_DEFAULT_Q 256
1130 #define CGEMM_DEFAULT_Q 256
1131 #define ZGEMM_DEFAULT_Q 256
1132 #define XGEMM_DEFAULT_Q 256
1142 #define GEMM_DEFAULT_OFFSET_A 0
1143 #define GEMM_DEFAULT_OFFSET_B 128
1144 #define GEMM_DEFAULT_ALIGN 0x03fffUL
1146 #define SGEMM_DEFAULT_UNROLL_M 8
1147 #define SGEMM_DEFAULT_UNROLL_N 8
1148 #define DGEMM_DEFAULT_UNROLL_M 8
1149 #define DGEMM_DEFAULT_UNROLL_N 8
1150 #define QGEMM_DEFAULT_UNROLL_M 8
1151 #define QGEMM_DEFAULT_UNROLL_N 8
1152 #define CGEMM_DEFAULT_UNROLL_M 4
1153 #define CGEMM_DEFAULT_UNROLL_N 4
1154 #define ZGEMM_DEFAULT_UNROLL_M 4
1155 #define ZGEMM_DEFAULT_UNROLL_N 4
1156 #define XGEMM_DEFAULT_UNROLL_M 4
1157 #define XGEMM_DEFAULT_UNROLL_N 4
1159 #define SGEMM_DEFAULT_P sgemm_p
1160 #define DGEMM_DEFAULT_P dgemm_p
1161 #define QGEMM_DEFAULT_P qgemm_p
1162 #define CGEMM_DEFAULT_P cgemm_p
1163 #define ZGEMM_DEFAULT_P zgemm_p
1164 #define XGEMM_DEFAULT_P xgemm_p
1166 #define SGEMM_DEFAULT_Q 1024
1167 #define DGEMM_DEFAULT_Q 1024
1168 #define QGEMM_DEFAULT_Q 1024
1169 #define CGEMM_DEFAULT_Q 1024
1170 #define ZGEMM_DEFAULT_Q 1024
1171 #define XGEMM_DEFAULT_Q 1024
1173 #define SGEMM_DEFAULT_R sgemm_r
1174 #define DGEMM_DEFAULT_R dgemm_r
1175 #define QGEMM_DEFAULT_R qgemm_r
1176 #define CGEMM_DEFAULT_R cgemm_r
1177 #define ZGEMM_DEFAULT_R zgemm_r
1178 #define XGEMM_DEFAULT_R xgemm_r
1182 #define GETRF_FACTOR 0.65
1186 #if defined(EV4) || defined(EV5) || defined(EV6)
1196 #define GEMM_DEFAULT_OFFSET_A 512
1197 #define GEMM_DEFAULT_OFFSET_B 512
1198 #define GEMM_DEFAULT_ALIGN 0x0ffffUL
1200 #define SGEMM_DEFAULT_UNROLL_M 4
1201 #define SGEMM_DEFAULT_UNROLL_N 4
1202 #define DGEMM_DEFAULT_UNROLL_M 4
1203 #define DGEMM_DEFAULT_UNROLL_N 4
1204 #define CGEMM_DEFAULT_UNROLL_M 2
1205 #define CGEMM_DEFAULT_UNROLL_N 2
1206 #define ZGEMM_DEFAULT_UNROLL_M 2
1207 #define ZGEMM_DEFAULT_UNROLL_N 2
1212 #define SGEMM_DEFAULT_P 32
1213 #define SGEMM_DEFAULT_Q 112
1214 #define SGEMM_DEFAULT_R 256
1216 #define DGEMM_DEFAULT_P 32
1217 #define DGEMM_DEFAULT_Q 56
1218 #define DGEMM_DEFAULT_R 256
1220 #define CGEMM_DEFAULT_P 32
1221 #define CGEMM_DEFAULT_Q 64
1222 #define CGEMM_DEFAULT_R 240
1224 #define ZGEMM_DEFAULT_P 32
1225 #define ZGEMM_DEFAULT_Q 32
1226 #define ZGEMM_DEFAULT_R 240
1230 #define SGEMM_DEFAULT_P 64
1231 #define SGEMM_DEFAULT_Q 256
1233 #define DGEMM_DEFAULT_P 64
1234 #define DGEMM_DEFAULT_Q 128
1236 #define CGEMM_DEFAULT_P 64
1237 #define CGEMM_DEFAULT_Q 128
1239 #define ZGEMM_DEFAULT_P 64
1240 #define ZGEMM_DEFAULT_Q 64
1244 #define SGEMM_DEFAULT_P 256
1245 #define SGEMM_DEFAULT_Q 512
1247 #define DGEMM_DEFAULT_P 256
1248 #define DGEMM_DEFAULT_Q 256
1250 #define CGEMM_DEFAULT_P 256
1251 #define CGEMM_DEFAULT_Q 256
1253 #define ZGEMM_DEFAULT_P 128
1254 #define ZGEMM_DEFAULT_Q 256
1264 #define GEMM_DEFAULT_OFFSET_A 0
1265 #define GEMM_DEFAULT_OFFSET_B 8192
1266 #define GEMM_DEFAULT_ALIGN 0x0ffffUL
1268 #define SGEMM_DEFAULT_UNROLL_M 16
1269 #define SGEMM_DEFAULT_UNROLL_N 4
1270 #define DGEMM_DEFAULT_UNROLL_M 4
1271 #define DGEMM_DEFAULT_UNROLL_N 4
1272 #define CGEMM_DEFAULT_UNROLL_M 8
1273 #define CGEMM_DEFAULT_UNROLL_N 2
1274 #define ZGEMM_DEFAULT_UNROLL_M 2
1275 #define ZGEMM_DEFAULT_UNROLL_N 2
1277 #define SGEMM_DEFAULT_P 128
1278 #define DGEMM_DEFAULT_P 128
1279 #define CGEMM_DEFAULT_P 128
1280 #define ZGEMM_DEFAULT_P 128
1282 #define SGEMM_DEFAULT_Q 512
1283 #define DGEMM_DEFAULT_Q 256
1284 #define CGEMM_DEFAULT_Q 256
1285 #define ZGEMM_DEFAULT_Q 128
1291 #define GEMM_DEFAULT_OFFSET_A 0
1292 #define GEMM_DEFAULT_OFFSET_B 1024
1293 #define GEMM_DEFAULT_ALIGN 0x0ffffUL
1295 #define SGEMM_DEFAULT_UNROLL_M 16
1296 #define SGEMM_DEFAULT_UNROLL_N 4
1297 #define DGEMM_DEFAULT_UNROLL_M 4
1298 #define DGEMM_DEFAULT_UNROLL_N 4
1299 #define CGEMM_DEFAULT_UNROLL_M 8
1300 #define CGEMM_DEFAULT_UNROLL_N 2
1301 #define ZGEMM_DEFAULT_UNROLL_M 2
1302 #define ZGEMM_DEFAULT_UNROLL_N 2
1304 #define SGEMM_DEFAULT_P 256
1305 #define DGEMM_DEFAULT_P 128
1306 #define CGEMM_DEFAULT_P 128
1307 #define ZGEMM_DEFAULT_P 64
1309 #define SGEMM_DEFAULT_Q 256
1310 #define DGEMM_DEFAULT_Q 256
1311 #define CGEMM_DEFAULT_Q 256
1312 #define ZGEMM_DEFAULT_Q 256
1322 #define GEMM_DEFAULT_OFFSET_A 2688
1323 #define GEMM_DEFAULT_OFFSET_B 3072
1324 #define GEMM_DEFAULT_ALIGN 0x03fffUL
1326 #define SGEMM_DEFAULT_UNROLL_M 16
1327 #define SGEMM_DEFAULT_UNROLL_N 4
1328 #define DGEMM_DEFAULT_UNROLL_M 4
1329 #define DGEMM_DEFAULT_UNROLL_N 4
1330 #define CGEMM_DEFAULT_UNROLL_M 8
1331 #define CGEMM_DEFAULT_UNROLL_N 2
1332 #define ZGEMM_DEFAULT_UNROLL_M 2
1333 #define ZGEMM_DEFAULT_UNROLL_N 2
1336 #if L2_SIZE == 1024976
1337 #define SGEMM_DEFAULT_P 320
1338 #define DGEMM_DEFAULT_P 256
1339 #define CGEMM_DEFAULT_P 256
1340 #define ZGEMM_DEFAULT_P 256
1342 #define SGEMM_DEFAULT_P 176
1343 #define DGEMM_DEFAULT_P 176
1344 #define CGEMM_DEFAULT_P 176
1345 #define ZGEMM_DEFAULT_P 176
1349 #define SGEMM_DEFAULT_Q 512
1350 #define DGEMM_DEFAULT_Q 256
1351 #define CGEMM_DEFAULT_Q 256
1352 #define ZGEMM_DEFAULT_Q 128
1363 #define GEMM_DEFAULT_OFFSET_A (32 * 0)
1364 #define GEMM_DEFAULT_OFFSET_B (32 * 0)
1365 #define GEMM_DEFAULT_ALIGN 0x0ffffUL
1367 #define SGEMM_DEFAULT_UNROLL_M 4
1368 #define SGEMM_DEFAULT_UNROLL_N 4
1369 #define DGEMM_DEFAULT_UNROLL_M 4
1370 #define DGEMM_DEFAULT_UNROLL_N 4
1371 #define CGEMM_DEFAULT_UNROLL_M 2
1372 #define CGEMM_DEFAULT_UNROLL_N 2
1373 #define ZGEMM_DEFAULT_UNROLL_M 2
1374 #define ZGEMM_DEFAULT_UNROLL_N 2
1376 #define SGEMM_DEFAULT_P 512
1377 #define DGEMM_DEFAULT_P 512
1378 #define CGEMM_DEFAULT_P 512
1379 #define ZGEMM_DEFAULT_P 512
1381 #define SGEMM_DEFAULT_Q 1024
1382 #define DGEMM_DEFAULT_Q 512
1383 #define CGEMM_DEFAULT_Q 512
1384 #define ZGEMM_DEFAULT_Q 256
1386 #define SGEMM_DEFAULT_R SGEMM_DEFAULT_P
1387 #define DGEMM_DEFAULT_R DGEMM_DEFAULT_P
1388 #define CGEMM_DEFAULT_R CGEMM_DEFAULT_P
1389 #define ZGEMM_DEFAULT_R ZGEMM_DEFAULT_P
1399 #define GEMM_DEFAULT_OFFSET_A (32 * 0)
1400 #define GEMM_DEFAULT_OFFSET_B (32 * 0)
1401 #define GEMM_DEFAULT_ALIGN 0x0ffffUL
1403 #define SGEMM_DEFAULT_UNROLL_M 8
1404 #define SGEMM_DEFAULT_UNROLL_N 4
1405 #define DGEMM_DEFAULT_UNROLL_M 8
1406 #define DGEMM_DEFAULT_UNROLL_N 4
1407 #define CGEMM_DEFAULT_UNROLL_M 4
1408 #define CGEMM_DEFAULT_UNROLL_N 2
1409 #define ZGEMM_DEFAULT_UNROLL_M 4
1410 #define ZGEMM_DEFAULT_UNROLL_N 2
1412 #define SGEMM_DEFAULT_P 128
1413 #define DGEMM_DEFAULT_P 128
1414 #define CGEMM_DEFAULT_P 128
1415 #define ZGEMM_DEFAULT_P 128
1417 #define SGEMM_DEFAULT_Q 4096
1418 #define DGEMM_DEFAULT_Q 3072
1419 #define CGEMM_DEFAULT_Q 2048
1420 #define ZGEMM_DEFAULT_Q 1024
1422 #define SGEMM_DEFAULT_Q 512
1423 #define DGEMM_DEFAULT_Q 256
1424 #define CGEMM_DEFAULT_Q 256
1425 #define ZGEMM_DEFAULT_Q 128
1433 #if defined(POWER3) || defined(POWER4) || defined(POWER5)
1434 #define GEMM_DEFAULT_OFFSET_A 0
1435 #define GEMM_DEFAULT_OFFSET_B 2048
1436 #define GEMM_DEFAULT_ALIGN 0x0ffffUL
1438 #define SGEMM_DEFAULT_UNROLL_M 4
1439 #define SGEMM_DEFAULT_UNROLL_N 4
1440 #define DGEMM_DEFAULT_UNROLL_M 4
1441 #define DGEMM_DEFAULT_UNROLL_N 4
1442 #define CGEMM_DEFAULT_UNROLL_M 2
1443 #define CGEMM_DEFAULT_UNROLL_N 2
1444 #define ZGEMM_DEFAULT_UNROLL_M 2
1445 #define ZGEMM_DEFAULT_UNROLL_N 2
1452 #define SGEMM_DEFAULT_P 256
1453 #define SGEMM_DEFAULT_Q 432
1454 #define SGEMM_DEFAULT_R 1012
1456 #define DGEMM_DEFAULT_P 256
1457 #define DGEMM_DEFAULT_Q 216
1458 #define DGEMM_DEFAULT_R 1012
1460 #define ZGEMM_DEFAULT_P 256
1461 #define ZGEMM_DEFAULT_Q 104
1462 #define ZGEMM_DEFAULT_R 1012
1466 #ifdef ALLOC_HUGETLB
1467 #define SGEMM_DEFAULT_P 184
1468 #define DGEMM_DEFAULT_P 184
1469 #define CGEMM_DEFAULT_P 184
1470 #define ZGEMM_DEFAULT_P 184
1472 #define SGEMM_DEFAULT_P 144
1473 #define DGEMM_DEFAULT_P 144
1474 #define CGEMM_DEFAULT_P 144
1475 #define ZGEMM_DEFAULT_P 144
1480 #ifdef ALLOC_HUGETLB
1481 #define SGEMM_DEFAULT_P 512
1482 #define DGEMM_DEFAULT_P 256
1483 #define CGEMM_DEFAULT_P 256
1484 #define ZGEMM_DEFAULT_P 128
1486 #define SGEMM_DEFAULT_P 320
1487 #define DGEMM_DEFAULT_P 160
1488 #define CGEMM_DEFAULT_P 160
1489 #define ZGEMM_DEFAULT_P 80
1492 #define SGEMM_DEFAULT_Q 256
1493 #define CGEMM_DEFAULT_Q 256
1494 #define DGEMM_DEFAULT_Q 256
1495 #define ZGEMM_DEFAULT_Q 256
1507 #define GEMM_DEFAULT_OFFSET_A 384
1508 #define GEMM_DEFAULT_OFFSET_B 1024
1509 #define GEMM_DEFAULT_ALIGN 0x03fffUL
1511 #define SGEMM_DEFAULT_UNROLL_M 4
1512 #define SGEMM_DEFAULT_UNROLL_N 4
1513 #define DGEMM_DEFAULT_UNROLL_M 4
1514 #define DGEMM_DEFAULT_UNROLL_N 4
1515 #define CGEMM_DEFAULT_UNROLL_M 2
1516 #define CGEMM_DEFAULT_UNROLL_N 4
1517 #define ZGEMM_DEFAULT_UNROLL_M 2
1518 #define ZGEMM_DEFAULT_UNROLL_N 4
1520 #define SGEMM_DEFAULT_P 992
1521 #define DGEMM_DEFAULT_P 480
1522 #define CGEMM_DEFAULT_P 488
1523 #define ZGEMM_DEFAULT_P 248
1525 #define SGEMM_DEFAULT_Q 504
1526 #define DGEMM_DEFAULT_Q 504
1527 #define CGEMM_DEFAULT_Q 400
1528 #define ZGEMM_DEFAULT_Q 400
1534 #if defined(SPARC) && defined(V7)
1539 #define GEMM_DEFAULT_OFFSET_A 0
1540 #define GEMM_DEFAULT_OFFSET_B 2048
1541 #define GEMM_DEFAULT_ALIGN 0x03fffUL
1543 #define SGEMM_DEFAULT_UNROLL_M 2
1544 #define SGEMM_DEFAULT_UNROLL_N 8
1545 #define DGEMM_DEFAULT_UNROLL_M 2
1546 #define DGEMM_DEFAULT_UNROLL_N 8
1547 #define CGEMM_DEFAULT_UNROLL_M 1
1548 #define CGEMM_DEFAULT_UNROLL_N 4
1549 #define ZGEMM_DEFAULT_UNROLL_M 1
1550 #define ZGEMM_DEFAULT_UNROLL_N 4
1552 #define SGEMM_DEFAULT_P 256
1553 #define DGEMM_DEFAULT_P 256
1554 #define CGEMM_DEFAULT_P 256
1555 #define ZGEMM_DEFAULT_P 256
1557 #define SGEMM_DEFAULT_Q 512
1558 #define DGEMM_DEFAULT_Q 256
1559 #define CGEMM_DEFAULT_Q 256
1560 #define ZGEMM_DEFAULT_Q 128
1563 #define GEMM_THREAD gemm_thread_mn
1566 #if (defined(SPARC) && defined(V9)) || defined(__sparc_v9__)
1571 #define GEMM_DEFAULT_OFFSET_A 0
1572 #define GEMM_DEFAULT_OFFSET_B 2048
1573 #define GEMM_DEFAULT_ALIGN 0x03fffUL
1575 #define SGEMM_DEFAULT_UNROLL_M 4
1576 #define SGEMM_DEFAULT_UNROLL_N 4
1577 #define DGEMM_DEFAULT_UNROLL_M 4
1578 #define DGEMM_DEFAULT_UNROLL_N 4
1579 #define CGEMM_DEFAULT_UNROLL_M 2
1580 #define CGEMM_DEFAULT_UNROLL_N 2
1581 #define ZGEMM_DEFAULT_UNROLL_M 2
1582 #define ZGEMM_DEFAULT_UNROLL_N 2
1584 #define SGEMM_DEFAULT_P 512
1585 #define DGEMM_DEFAULT_P 512
1586 #define CGEMM_DEFAULT_P 512
1587 #define ZGEMM_DEFAULT_P 512
1589 #define SGEMM_DEFAULT_Q 1024
1590 #define DGEMM_DEFAULT_Q 512
1591 #define CGEMM_DEFAULT_Q 512
1592 #define ZGEMM_DEFAULT_Q 256
1602 #define GEMM_DEFAULT_OFFSET_A 0
1603 #define GEMM_DEFAULT_OFFSET_B 0
1604 #define GEMM_DEFAULT_ALIGN 0x03fffUL
1606 #define SGEMM_DEFAULT_UNROLL_M 2
1607 #define SGEMM_DEFAULT_UNROLL_N 8
1608 #define DGEMM_DEFAULT_UNROLL_M 2
1609 #define DGEMM_DEFAULT_UNROLL_N 8
1610 #define CGEMM_DEFAULT_UNROLL_M 1
1611 #define CGEMM_DEFAULT_UNROLL_N 4
1612 #define ZGEMM_DEFAULT_UNROLL_M 1
1613 #define ZGEMM_DEFAULT_UNROLL_N 4
1615 #define SGEMM_DEFAULT_P 108
1616 #define DGEMM_DEFAULT_P 112
1617 #define CGEMM_DEFAULT_P 108
1618 #define ZGEMM_DEFAULT_P 112
1620 #define SGEMM_DEFAULT_Q 288
1621 #define DGEMM_DEFAULT_Q 144
1622 #define CGEMM_DEFAULT_Q 144
1623 #define ZGEMM_DEFAULT_Q 72
1625 #define SGEMM_DEFAULT_R 2000
1626 #define DGEMM_DEFAULT_R 2000
1627 #define CGEMM_DEFAULT_R 2000
1628 #define ZGEMM_DEFAULT_R 2000
1634 ////Copy from SICORTEX
1638 #define GEMM_DEFAULT_OFFSET_A 0
1639 #define GEMM_DEFAULT_OFFSET_B 0
1640 #define GEMM_DEFAULT_ALIGN 0x03fffUL
1642 #define SGEMM_DEFAULT_UNROLL_M 8
1643 #define SGEMM_DEFAULT_UNROLL_N 4
1645 #define DGEMM_DEFAULT_UNROLL_M 4
1646 #define DGEMM_DEFAULT_UNROLL_N 4
1648 #define CGEMM_DEFAULT_UNROLL_M 4
1649 #define CGEMM_DEFAULT_UNROLL_N 2
1651 #define ZGEMM_DEFAULT_UNROLL_M 2
1652 #define ZGEMM_DEFAULT_UNROLL_N 2
1654 #define SGEMM_DEFAULT_P 64
1655 #define DGEMM_DEFAULT_P 44
1656 #define CGEMM_DEFAULT_P 64
1657 #define ZGEMM_DEFAULT_P 32
1659 #define SGEMM_DEFAULT_Q 192
1660 #define DGEMM_DEFAULT_Q 92
1661 #define CGEMM_DEFAULT_Q 128
1662 #define ZGEMM_DEFAULT_Q 80
1664 #define SGEMM_DEFAULT_R 640
1665 #define DGEMM_DEFAULT_R dgemm_r
1666 #define CGEMM_DEFAULT_R 640
1667 #define ZGEMM_DEFAULT_R 640
1669 #define GEMM_OFFSET_A1 0x10000
1670 #define GEMM_OFFSET_B1 0x100000
1679 #define GEMM_DEFAULT_OFFSET_A 0
1680 #define GEMM_DEFAULT_OFFSET_B 0
1681 #define GEMM_DEFAULT_ALIGN 0x03fffUL
1683 #define SGEMM_DEFAULT_UNROLL_M 2
1684 #define SGEMM_DEFAULT_UNROLL_N 2
1686 #define DGEMM_DEFAULT_UNROLL_M 2
1687 #define DGEMM_DEFAULT_UNROLL_N 2
1689 #define CGEMM_DEFAULT_UNROLL_M 2
1690 #define CGEMM_DEFAULT_UNROLL_N 2
1692 #define ZGEMM_DEFAULT_UNROLL_M 2
1693 #define ZGEMM_DEFAULT_UNROLL_N 2
1695 #define SGEMM_DEFAULT_P 64
1696 #define DGEMM_DEFAULT_P 24
1697 #define CGEMM_DEFAULT_P 24
1698 #define ZGEMM_DEFAULT_P 20
1700 #define SGEMM_DEFAULT_Q 192
1701 #define DGEMM_DEFAULT_Q 128
1702 #define CGEMM_DEFAULT_Q 128
1703 #define ZGEMM_DEFAULT_Q 64
1705 #define SGEMM_DEFAULT_R 512
1706 #define DGEMM_DEFAULT_R 512
1707 #define CGEMM_DEFAULT_R 512
1708 #define ZGEMM_DEFAULT_R 512
1710 #define GEMM_OFFSET_A1 0x10000
1711 #define GEMM_OFFSET_B1 0x100000
1721 #define GEMM_DEFAULT_OFFSET_A 0
1722 #define GEMM_DEFAULT_OFFSET_B 0
1723 #define GEMM_DEFAULT_ALIGN 0x0ffffUL
1725 #define SGEMM_DEFAULT_UNROLL_N 4
1726 #define DGEMM_DEFAULT_UNROLL_N 4
1727 #define QGEMM_DEFAULT_UNROLL_N 2
1728 #define CGEMM_DEFAULT_UNROLL_N 2
1729 #define ZGEMM_DEFAULT_UNROLL_N 2
1730 #define XGEMM_DEFAULT_UNROLL_N 1
1733 #define SGEMM_DEFAULT_UNROLL_M 4
1734 #define DGEMM_DEFAULT_UNROLL_M 2
1735 #define QGEMM_DEFAULT_UNROLL_M 2
1736 #define CGEMM_DEFAULT_UNROLL_M 2
1737 #define ZGEMM_DEFAULT_UNROLL_M 1
1738 #define XGEMM_DEFAULT_UNROLL_M 1
1740 #define SGEMM_DEFAULT_UNROLL_M 8
1741 #define DGEMM_DEFAULT_UNROLL_M 4
1742 #define QGEMM_DEFAULT_UNROLL_M 2
1743 #define CGEMM_DEFAULT_UNROLL_M 4
1744 #define ZGEMM_DEFAULT_UNROLL_M 2
1745 #define XGEMM_DEFAULT_UNROLL_M 1
1748 #define SGEMM_DEFAULT_P sgemm_p
1749 #define DGEMM_DEFAULT_P dgemm_p
1750 #define QGEMM_DEFAULT_P qgemm_p
1751 #define CGEMM_DEFAULT_P cgemm_p
1752 #define ZGEMM_DEFAULT_P zgemm_p
1753 #define XGEMM_DEFAULT_P xgemm_p
1755 #define SGEMM_DEFAULT_R sgemm_r
1756 #define DGEMM_DEFAULT_R dgemm_r
1757 #define QGEMM_DEFAULT_R qgemm_r
1758 #define CGEMM_DEFAULT_R cgemm_r
1759 #define ZGEMM_DEFAULT_R zgemm_r
1760 #define XGEMM_DEFAULT_R xgemm_r
1762 #define SGEMM_DEFAULT_Q 128
1763 #define DGEMM_DEFAULT_Q 128
1764 #define QGEMM_DEFAULT_Q 128
1765 #define CGEMM_DEFAULT_Q 128
1766 #define ZGEMM_DEFAULT_Q 128
1767 #define XGEMM_DEFAULT_Q 128
1773 #ifndef QGEMM_DEFAULT_UNROLL_M
1774 #define QGEMM_DEFAULT_UNROLL_M 2
1777 #ifndef QGEMM_DEFAULT_UNROLL_N
1778 #define QGEMM_DEFAULT_UNROLL_N 2
1781 #ifndef XGEMM_DEFAULT_UNROLL_M
1782 #define XGEMM_DEFAULT_UNROLL_M 2
1785 #ifndef XGEMM_DEFAULT_UNROLL_N
1786 #define XGEMM_DEFAULT_UNROLL_N 2
1790 #define SHUFPD_0 shufps $0x44,
1791 #define SHUFPD_1 shufps $0x4e,
1792 #define SHUFPD_2 shufps $0xe4,
1793 #define SHUFPD_3 shufps $0xee,
1797 #define SHUFPD_0 shufpd $0,
1801 #define SHUFPD_1 shufpd $1,
1805 #define SHUFPD_2 shufpd $2,
1809 #define SHUFPD_3 shufpd $3,
1813 #define SHUFPS_39 shufps $0x39,