1 /*********************************************************************/
2 /* Copyright 2009, 2010 The University of Texas at Austin. */
3 /* All rights reserved. */
5 /* Redistribution and use in source and binary forms, with or */
6 /* without modification, are permitted provided that the following */
7 /* conditions are met: */
9 /* 1. Redistributions of source code must retain the above */
10 /* copyright notice, this list of conditions and the following */
13 /* 2. Redistributions in binary form must reproduce the above */
14 /* copyright notice, this list of conditions and the following */
15 /* disclaimer in the documentation and/or other materials */
16 /* provided with the distribution. */
18 /* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
19 /* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
20 /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
21 /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
22 /* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
23 /* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
24 /* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
25 /* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
26 /* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
27 /* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
28 /* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
29 /* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
30 /* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
31 /* POSSIBILITY OF SUCH DAMAGE. */
33 /* The views and conclusions contained in the software and */
34 /* documentation are those of the authors and should not be */
35 /* interpreted as representing official policies, either expressed */
36 /* or implied, of The University of Texas at Austin. */
37 /*********************************************************************/
42 #if defined(POWER8) || defined(POWER9)
43 #define MB __asm__ __volatile__ ("eieio":::"memory")
44 #define WMB __asm__ __volatile__ ("eieio":::"memory")
46 #define MB __asm__ __volatile__ ("sync")
47 #define WMB __asm__ __volatile__ ("sync")
61 void *qalloc(int flags, size_t bytes);
63 static void INLINE blas_lock(volatile unsigned long *address){
65 long int ret, val = 1;
68 while (*address) {YIELDING;};
70 #if defined(OS_LINUX) || defined(OS_DARWIN)
71 __asm__ __volatile__ (
72 "0: lwarx %0, 0, %1\n"
79 : "r"(address), "r" (val)
82 __asm__ __volatile__ (
90 : "r"(address), "r" (val)
95 #define BLAS_LOCK_DEFINED
97 static inline unsigned long rpcc(void){
101 __asm__ __volatile__(".machine \"any\" ;");
103 __asm__ __volatile__ ("mftb %0" : "=r" (ret) : );
105 #if defined(POWER5) || defined(PPC970)
118 static inline unsigned long getstackaddr(void){
121 __asm__ __volatile__ ("mr %0, 1"
122 : "=r"(addr) : : "memory");
127 #if defined(OS_LINUX) || defined(OS_AIX)
128 #define GET_IMAGE(res) __asm__ __volatile__("fmr %0, 2" : "=f"(res) : : "memory")
130 #define GET_IMAGE(res) __asm__ __volatile__("fmr %0, f2" : "=f"(res) : : "memory")
132 #define GET_IMAGE_CANCEL
137 static inline int blas_quickdivide(blasint x, blasint y){
155 #define LFPDUX lfpdux
156 #define LFSDUX lfsdux
157 #define LFXDUX lfxdux
160 #define STFPDX stfpdx
161 #define STFSDX stfsdx
162 #define STFXDX stfxdx
164 #define STFDUX stfdux
165 #define STFPDUX stfpdux
166 #define STFSDUX stfsdux
167 #define STFXDUX stfxdux
170 #define FNMADD fnmadd
171 #define FNMSUB fnmsub
183 #define LFPDUX lfpsux
184 #define LFSDUX lfssux
185 #define LFXDUX lfxsux
188 #define STFPDX stfpsx
189 #define STFSDX stfssx
190 #define STFXDX stfxsx
192 #define STFDUX stfsux
193 #define STFPDUX stfpsux
194 #define STFSDUX stfssux
195 #define STFXDUX stfxsux
198 #define FNMADD fnmadds
199 #define FNMSUB fnmsubs
224 #if defined(__64BIT__) && defined(USE64BITINT)
226 #elif defined(__64BIT__) && !defined(USE64BITINT)
233 #define DCBT(REGA, REGB, NUM) .long (0x7c00022c | (REGA << 16) | (REGB << 11) | ((NUM) << 21))
234 #define DCBTST(REGA, REGB, NUM) .long (0x7c0001ec | (REGA << 16) | (REGB << 11) | ((NUM) << 21))
237 #define DSTATTR_H(SIZE, COUNT, STRIDE) ((SIZE << 8) | (COUNT))
238 #define DSTATTR_L(SIZE, COUNT, STRIDE) (STRIDE)
240 #if defined(PPC970) || defined(POWER3) || defined(POWER4) || defined(POWER5) || defined(PPCG4)
241 #define HAVE_PREFETCH
244 #if defined(POWER3) || defined(POWER6) || defined(PPCG4) || defined(CELL) || defined(POWER8) || defined(POWER9) || ( defined(PPC970) && ( defined(OS_DARWIN) || defined(OS_FREEBSD) ) )
252 #define L1_PREFETCHSIZE (64 + 128 * 13)
255 #if defined(POWER3) || defined(POWER4) || defined(POWER5)
257 #define L1_PREFETCHSIZE (96 + 128 * 12)
262 #define L1_PREFETCHSIZE (16 + 128 * 100)
263 #define L1_PREFETCH dcbtst
266 #if defined(POWER8) || defined(POWER9)
268 #define L1_PREFETCHSIZE (16 + 128 * 100)
269 #define L1_PREFETCH dcbtst
274 #define L1_PREFETCH dcbt
278 #define L1_PREFETCHW dcbtst
282 #define DCBT(REGA, REGB) L1_PREFETCH REGB, REGA
283 #define DCBTST(REGA, REGB) L1_PREFETCHW REGB, REGA
285 #define DCBT(REGA, REGB) L1_PREFETCH DCBT_ARG, REGB, REGA
286 #define DCBTST(REGA, REGB) L1_PREFETCHW DCBT_ARG, REGB, REGA
290 #ifndef L1_PREFETCHSIZE
291 #define L1_PREFETCHSIZE (96 + 128 * 12)
294 #if !defined(OS_DARWIN) || defined(NEEDPARAM)
394 #define BO_dCTR_NZERO_AND_NOT 0
395 #define BO_dCTR_NZERO_AND_NOT_1 1
396 #define BO_dCTR_ZERO_AND_NOT 2
397 #define BO_dCTR_ZERO_AND_NOT_1 3
399 #define BO_IF_NOT_1 5
400 #define BO_IF_NOT_2 6
401 #define BO_IF_NOT_3 7
402 #define BO_dCTR_NZERO_AND 8
403 #define BO_dCTR_NZERO_AND_1 9
404 #define BO_dCTR_ZERO_AND 10
405 #define BO_dCTR_ZERO_AND_1 11
410 #define BO_dCTR_NZERO 16
411 #define BO_dCTR_NZERO_1 17
412 #define BO_dCTR_ZERO 18
413 #define BO_dCTR_ZERO_1 19
415 #define BO_ALWAYS_1 21
416 #define BO_ALWAYS_2 22
417 #define BO_ALWAYS_3 23
418 #define BO_dCTR_NZERO_8 24
419 #define BO_dCTR_NZERO_9 25
420 #define BO_dCTR_ZERO_8 26
421 #define BO_dCTR_ZERO_9 27
422 #define BO_ALWAYS_8 28
423 #define BO_ALWAYS_9 29
424 #define BO_ALWAYS_10 30
425 #define BO_ALWAYS_11 31
495 #define REALNAME ASMNAME
497 #define REALNAME ASMFNAME
500 #if defined(ASSEMBLER) && !defined(NEEDPARAM)
502 #if defined(OS_LINUX) || defined(OS_FREEBSD)
508 .type REALNAME, @function;\
510 #define EPILOGUE .size REALNAME, .-REALNAME
517 .type REALNAME, @function;\
519 #define EPILOGUE .size REALNAME, .-REALNAME
525 .section ".opd","aw";\
528 .quad .REALNAME, .TOC.@tocbase, 0;\
531 .type .REALNAME, @function;\
536 .byte 0,0,0,1,128,0,0,0 ; \
537 .size .REALNAME, .-.REALNAME; \
538 .section .note.GNU-stack,"",@progbits
604 .csect REALNAME[DS],3;\
606 .long .REALNAME, TOC[tc0], 0;\
613 .long _section_.text;
622 .csect REALNAME[DS],3;\
624 .llong .REALNAME, TOC[tc0], 0;\
625 .csect .text[PR], 5;\
631 .llong _section_.text;
641 .section __TEXT,__text,regular,pure_instructions
642 .section __TEXT,__picsymbolstub1,symbol_stubs,pure_instructions,32
651 .section __TEXT,__text,regular,pure_instructions
652 .section __TEXT,__picsymbolstub1,symbol_stubs,pure_instructions,32
663 #define EPILOGUE .subsections_via_symbols
701 .section __TEXT,__picsymbolstub1,symbol_stubs,pure_instructions,32
704 .indirect_symbol mcount
706 bcl 20,31,L00000000001$spb
709 addis r11,r11,ha16(Lmcount$lazy_ptr-L00000000001$spb)
711 lwzu r12,lo16(Lmcount$lazy_ptr-L00000000001$spb)(r11)
716 .indirect_symbol mcount
717 .long dyld_stub_binding_helper
718 .subsections_via_symbols
757 .section __TEXT,__picsymbolstub1,symbol_stubs,pure_instructions,32
760 .indirect_symbol mcount
762 bcl 20,31,L00000000001$spb
765 addis r11,r11,ha16(Lmcount$lazy_ptr-L00000000001$spb)
767 ld r12,lo16(Lmcount$lazy_ptr-L00000000001$spb)(r11)
772 .indirect_symbol mcount
773 .quad dyld_stub_binding_helper
774 .subsections_via_symbols
785 #define HALT mfspr r0, 1023
787 #if defined(OS_LINUX) || defined(OS_FREEBSD)
788 #if defined(PPC440) || defined(PPC440FP2)
789 #undef MAX_CPU_NUMBER
790 #define MAX_CPU_NUMBER 1
792 #if !defined(__64BIT__) && !defined(PROFILE) && !defined(PPC440) && !defined(PPC440FP2)
793 #define START_ADDRESS (0x0b000000UL)
801 #define START_ADDRESS (0xf0000000UL)
812 #define BUFFER_SIZE ( 2 << 20)
813 #elif defined(PPC440FP2)
814 #define BUFFER_SIZE ( 16 << 20)
815 #elif defined(POWER8) || defined(POWER9)
816 #define BUFFER_SIZE ( 64 << 20)
818 #define BUFFER_SIZE ( 16 << 20)
822 #define PAGESIZE ( 4 << 10)
824 #define HUGE_PAGESIZE (16 << 20)
826 #define BASE_ADDRESS (START_ADDRESS - BUFFER_SIZE * MAX_CPU_NUMBER)
828 #ifndef MAP_ANONYMOUS
829 #define MAP_ANONYMOUS MAP_ANON
832 #if defined(OS_LINUX) || defined(OS_FREEBSD)
834 #define FRAMESLOT(X) (((X) * 4) + 8)
837 #define FRAMESLOT(X) (((X) * 8) + 96)
839 #define FRAMESLOT(X) (((X) * 8) + 112)
844 #if defined(OS_AIX) || defined(OS_DARWIN)
846 #define FRAMESLOT(X) (((X) * 4) + 56)
848 #define FRAMESLOT(X) (((X) * 8) + 112)