1 /*********************************************************************/
2 /* Copyright 2009, 2010 The University of Texas at Austin. */
3 /* All rights reserved. */
5 /* Redistribution and use in source and binary forms, with or */
6 /* without modification, are permitted provided that the following */
7 /* conditions are met: */
9 /* 1. Redistributions of source code must retain the above */
10 /* copyright notice, this list of conditions and the following */
13 /* 2. Redistributions in binary form must reproduce the above */
14 /* copyright notice, this list of conditions and the following */
15 /* disclaimer in the documentation and/or other materials */
16 /* provided with the distribution. */
18 /* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
19 /* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
20 /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
21 /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
22 /* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
23 /* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
24 /* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
25 /* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
26 /* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
27 /* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
28 /* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
29 /* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
30 /* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
31 /* POSSIBILITY OF SUCH DAMAGE. */
33 /* The views and conclusions contained in the software and */
34 /* documentation are those of the authors and should not be */
35 /* interpreted as representing official policies, either expressed */
36 /* or implied, of The University of Texas at Austin. */
37 /*********************************************************************/
45 #define XXSPLTD(T,A,z) xxpermdi T, A, A, 0b##z##z
46 #define XXMRGHD(T,A,B) xxpermdi T, A, B, 0b00
47 #define XXMRGLD(T,A,B) xxpermdi T, A, B, 0b11
48 #define XXSWAPD(T,A) xxpermdi T, A, A, 0b10
49 #define XVMOVDP(T,A) xvcpsgndp T, A, A
51 #define XXSPLTD_S(T,A,z) "xxpermdi " str(T) ", " str(A) ", " str(A) ", 0b" str(z ## z) " \n\t"
52 #define XXMRGHD_S(T,A,B) "xxpermdi " str(T) ", " str(A) ", " str(B) ", 0b00 \n\t"
53 #define XXMRGLD_S(T,A,B) "xxpermdi " str(T) ", " str(A) ", " str(B) ", 0b11 \n\t"
54 #define XXSWAPD_S(T,A) "xxpermdi " str(T) ", " str(A) ", " str(A) ", 0b10 \n\t"
57 #define XXSPLTD(T,A,z) xxspltd T, A, z
58 #define XXMRGHD(T,A,B) xxmrghd T, A, B
59 #define XXMRGLD(T,A,B) xxmrgld T, A, B
60 #define XXSWAPD(T,A) xxswapd T, A
61 #define XVMOVDP(T,A) xvmovdp T, A
63 #define XXSPLTD_S(T,A,z) "xxspltd " str(T) ", " str(A) ", " str(z)" \n\t"
64 #define XXMRGHD_S(T,A,B) "xxmrghd " str(T) ", " str(A) ", " str(B)" \n\t"
65 #define XXMRGLD_S(T,A,B) "xxmrgld " str(T) ", " str(A) ", " str(B)" \n\t"
66 #define XXSWAPD_S(T,A) "xxswapd " str(T) ", " str(A) " \n\t"
71 #if defined(POWER8) || defined(POWER9) || defined(POWER10)
72 #define MB __asm__ __volatile__ ("eieio":::"memory")
73 #define WMB __asm__ __volatile__ ("eieio":::"memory")
74 #define RMB __asm__ __volatile__ ("eieio":::"memory")
76 #define MB __asm__ __volatile__ ("sync")
77 #define WMB __asm__ __volatile__ ("sync")
78 #define RMB __asm__ __volatile__ ("sync")
92 void *qalloc(int flags, size_t bytes);
94 static void INLINE blas_lock(volatile unsigned long *address){
96 long int ret, val = 1;
99 while (*address) {YIELDING;};
101 #if defined(OS_LINUX) || defined(OS_DARWIN)
102 __asm__ __volatile__ (
103 "0: lwarx %0, 0, %1\n"
111 : "r"(address), "r" (val)
114 __asm__ __volatile__ (
122 : "r"(address), "r" (val)
127 #define BLAS_LOCK_DEFINED
129 static inline unsigned long rpcc(void){
133 __asm__ __volatile__(".machine \"any\" ;");
135 __asm__ __volatile__ ("mftb %0" : "=r" (ret) : );
137 #if defined(POWER5) || defined(PPC970)
150 static inline unsigned long getstackaddr(void){
153 __asm__ __volatile__ ("mr %0, 1"
154 : "=r"(addr) : : "memory");
159 #if defined(OS_LINUX) || defined(OS_AIX)
160 #define GET_IMAGE(res) __asm__ __volatile__("fmr %0, 2" : "=f"(res) : : "memory")
162 #define GET_IMAGE(res) __asm__ __volatile__("fmr %0, f2" : "=f"(res) : : "memory")
164 #define GET_IMAGE_CANCEL
169 static inline int blas_quickdivide(blasint x, blasint y){
187 #define LFPDUX lfpdux
188 #define LFSDUX lfsdux
189 #define LFXDUX lfxdux
192 #define STFPDX stfpdx
193 #define STFSDX stfsdx
194 #define STFXDX stfxdx
196 #define STFDUX stfdux
197 #define STFPDUX stfpdux
198 #define STFSDUX stfsdux
199 #define STFXDUX stfxdux
202 #define FNMADD fnmadd
203 #define FNMSUB fnmsub
215 #define LFPDUX lfpsux
216 #define LFSDUX lfssux
217 #define LFXDUX lfxsux
220 #define STFPDX stfpsx
221 #define STFSDX stfssx
222 #define STFXDX stfxsx
224 #define STFDUX stfsux
225 #define STFPDUX stfpsux
226 #define STFSDUX stfssux
227 #define STFXDUX stfxsux
230 #define FNMADD fnmadds
231 #define FNMSUB fnmsubs
256 #if defined(__64BIT__) && defined(USE64BITINT)
258 #elif defined(__64BIT__) && !defined(USE64BITINT)
265 #define DCBT(REGA, REGB, NUM) .long (0x7c00022c | (REGA << 16) | (REGB << 11) | ((NUM) << 21))
266 #define DCBTST(REGA, REGB, NUM) .long (0x7c0001ec | (REGA << 16) | (REGB << 11) | ((NUM) << 21))
269 #define DSTATTR_H(SIZE, COUNT, STRIDE) ((SIZE << 8) | (COUNT))
270 #define DSTATTR_L(SIZE, COUNT, STRIDE) (STRIDE)
272 #if defined(PPC970) || defined(POWER3) || defined(POWER4) || defined(POWER5) || defined(PPCG4)
273 #define HAVE_PREFETCH
276 #if defined(POWER3) || defined(POWER6) || defined(PPCG4) || defined(CELL) || defined(POWER8) || defined(POWER9) || defined(POWER10) || defined(PPC970)
284 #define L1_PREFETCHSIZE (64 + 128 * 13)
287 #if defined(POWER3) || defined(POWER4) || defined(POWER5)
289 #define L1_PREFETCHSIZE (96 + 128 * 12)
294 #define L1_PREFETCHSIZE (16 + 128 * 100)
295 #define L1_PREFETCH dcbtst
298 #if defined(POWER8) || defined(POWER9) || defined(POWER10)
300 #define L1_PREFETCHSIZE (16 + 128 * 100)
301 #define L1_PREFETCH dcbtst
306 #define L1_PREFETCH dcbt
310 #define L1_PREFETCHW dcbtst
314 #define DCBT(REGA, REGB) L1_PREFETCH REGB, REGA
315 #define DCBTST(REGA, REGB) L1_PREFETCHW REGB, REGA
317 #define DCBT(REGA, REGB) L1_PREFETCH DCBT_ARG, REGB, REGA
318 #define DCBTST(REGA, REGB) L1_PREFETCHW DCBT_ARG, REGB, REGA
322 #ifndef L1_PREFETCHSIZE
323 #define L1_PREFETCHSIZE (96 + 128 * 12)
326 #if !defined(OS_DARWIN) || defined(NEEDPARAM)
426 #define BO_dCTR_NZERO_AND_NOT 0
427 #define BO_dCTR_NZERO_AND_NOT_1 1
428 #define BO_dCTR_ZERO_AND_NOT 2
429 #define BO_dCTR_ZERO_AND_NOT_1 3
431 #define BO_IF_NOT_1 5
432 #define BO_IF_NOT_2 6
433 #define BO_IF_NOT_3 7
434 #define BO_dCTR_NZERO_AND 8
435 #define BO_dCTR_NZERO_AND_1 9
436 #define BO_dCTR_ZERO_AND 10
437 #define BO_dCTR_ZERO_AND_1 11
442 #define BO_dCTR_NZERO 16
443 #define BO_dCTR_NZERO_1 17
444 #define BO_dCTR_ZERO 18
445 #define BO_dCTR_ZERO_1 19
447 #define BO_ALWAYS_1 21
448 #define BO_ALWAYS_2 22
449 #define BO_ALWAYS_3 23
450 #define BO_dCTR_NZERO_8 24
451 #define BO_dCTR_NZERO_9 25
452 #define BO_dCTR_ZERO_8 26
453 #define BO_dCTR_ZERO_9 27
454 #define BO_ALWAYS_8 28
455 #define BO_ALWAYS_9 29
456 #define BO_ALWAYS_10 30
457 #define BO_ALWAYS_11 31
527 #define REALNAME ASMNAME
529 #define REALNAME ASMFNAME
532 #if defined(ASSEMBLER) && !defined(NEEDPARAM)
534 #if defined(OS_LINUX) || defined(OS_FREEBSD)
540 .type REALNAME, @function;\
542 #define EPILOGUE .size REALNAME, .-REALNAME
549 .type REALNAME, @function;\
551 #define EPILOGUE .size REALNAME, .-REALNAME
557 .section ".opd","aw";\
560 .quad .REALNAME, .TOC.@tocbase, 0;\
563 .type .REALNAME, @function;\
568 .byte 0,0,0,1,128,0,0,0 ; \
569 .size .REALNAME, .-.REALNAME; \
570 .section .note.GNU-stack,"",@progbits
636 .csect REALNAME[DS],3;\
638 .long .REALNAME, TOC[tc0], 0;\
645 .long _section_.text;
654 .csect REALNAME[DS],3;\
656 .llong .REALNAME, TOC[tc0], 0;\
657 .csect .text[PR], 5;\
663 .llong _section_.text;
673 .section __TEXT,__text,regular,pure_instructions
674 .section __TEXT,__picsymbolstub1,symbol_stubs,pure_instructions,32
683 .section __TEXT,__text,regular,pure_instructions
684 .section __TEXT,__picsymbolstub1,symbol_stubs,pure_instructions,32
695 #define EPILOGUE .subsections_via_symbols
733 .section __TEXT,__picsymbolstub1,symbol_stubs,pure_instructions,32
736 .indirect_symbol mcount
738 bcl 20,31,L00000000001$spb
741 addis r11,r11,ha16(Lmcount$lazy_ptr-L00000000001$spb)
743 lwzu r12,lo16(Lmcount$lazy_ptr-L00000000001$spb)(r11)
748 .indirect_symbol mcount
749 .long dyld_stub_binding_helper
750 .subsections_via_symbols
789 .section __TEXT,__picsymbolstub1,symbol_stubs,pure_instructions,32
792 .indirect_symbol mcount
794 bcl 20,31,L00000000001$spb
797 addis r11,r11,ha16(Lmcount$lazy_ptr-L00000000001$spb)
799 ld r12,lo16(Lmcount$lazy_ptr-L00000000001$spb)(r11)
804 .indirect_symbol mcount
805 .quad dyld_stub_binding_helper
806 .subsections_via_symbols
817 #define HALT mfspr r0, 1023
819 #if defined(OS_LINUX) || defined(OS_FREEBSD)
820 #if defined(PPC440) || defined(PPC440FP2)
821 #undef MAX_CPU_NUMBER
822 #define MAX_CPU_NUMBER 1
824 #if !defined(__64BIT__) && !defined(PROFILE) && !defined(PPC440) && !defined(PPC440FP2)
825 #define START_ADDRESS (0x0b000000UL)
833 #define START_ADDRESS (0xf0000000UL)
844 #define BUFFER_SIZE ( 2 << 20)
845 #elif defined(PPC440FP2)
846 #define BUFFER_SIZE ( 16 << 20)
847 #elif defined(POWER6) || defined(POWER8) || defined(POWER9) || defined(POWER10)
848 #define BUFFER_SIZE ( 64 << 22)
850 #define BUFFER_SIZE ( 16 << 20)
854 #define BUFFER_SIZE (64 << 22)
858 #define PAGESIZE ( 4 << 10)
860 #define HUGE_PAGESIZE (16 << 20)
862 #define BASE_ADDRESS (START_ADDRESS - BUFFER_SIZE * MAX_CPU_NUMBER)
864 #ifndef MAP_ANONYMOUS
865 #define MAP_ANONYMOUS MAP_ANON
868 #if defined(OS_LINUX) || defined(OS_FREEBSD)
870 #define FRAMESLOT(X) (((X) * 4) + 8)
873 #define FRAMESLOT(X) (((X) * 8) + 96)
875 #define FRAMESLOT(X) (((X) * 8) + 112)
880 #if defined(OS_AIX) || defined(OS_DARWIN)
882 #define FRAMESLOT(X) (((X) * 4) + 56)
884 #define FRAMESLOT(X) (((X) * 8) + 112)