1 /*********************************************************************/
2 /* Copyright 2009, 2010 The University of Texas at Austin. */
3 /* All rights reserved. */
5 /* Redistribution and use in source and binary forms, with or */
6 /* without modification, are permitted provided that the following */
7 /* conditions are met: */
9 /* 1. Redistributions of source code must retain the above */
10 /* copyright notice, this list of conditions and the following */
13 /* 2. Redistributions in binary form must reproduce the above */
14 /* copyright notice, this list of conditions and the following */
15 /* disclaimer in the documentation and/or other materials */
16 /* provided with the distribution. */
18 /* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
19 /* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
20 /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
21 /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
22 /* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
23 /* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
24 /* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
25 /* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
26 /* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
27 /* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
28 /* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
29 /* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
30 /* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
31 /* POSSIBILITY OF SUCH DAMAGE. */
33 /* The views and conclusions contained in the software and */
34 /* documentation are those of the authors and should not be */
35 /* interpreted as representing official policies, either expressed */
36 /* or implied, of The University of Texas at Austin. */
37 /*********************************************************************/
42 #define MB __asm__ __volatile__ ("sync")
43 #define WMB __asm__ __volatile__ ("sync")
56 void *qalloc(int flags, size_t bytes);
58 static void INLINE blas_lock(volatile unsigned long *address){
60 long int ret, val = 1;
63 while (*address) {YIELDING;};
65 #if defined(OS_LINUX) || defined(OS_DARWIN)
66 __asm__ __volatile__ (
67 "0: lwarx %0, 0, %1\n"
74 : "r"(address), "r" (val)
77 __asm__ __volatile__ (
85 : "r"(address), "r" (val)
90 #define BLAS_LOCK_DEFINED
92 static inline unsigned long rpcc(void){
96 __asm__ __volatile__(".machine \"any\" ;");
98 __asm__ __volatile__ ("mftb %0" : "=r" (ret) : );
100 #if defined(POWER5) || defined(PPC970)
113 static inline unsigned long getstackaddr(void){
116 __asm__ __volatile__ ("mr %0, 1"
117 : "=r"(addr) : : "memory");
122 #if defined(OS_LINUX) || defined(OS_AIX)
123 #define GET_IMAGE(res) __asm__ __volatile__("fmr %0, 2" : "=f"(res) : : "memory")
125 #define GET_IMAGE(res) __asm__ __volatile__("fmr %0, f2" : "=f"(res) : : "memory")
127 #define GET_IMAGE_CANCEL
132 static inline int blas_quickdivide(blasint x, blasint y){
150 #define LFPDUX lfpdux
151 #define LFSDUX lfsdux
152 #define LFXDUX lfxdux
155 #define STFPDX stfpdx
156 #define STFSDX stfsdx
157 #define STFXDX stfxdx
159 #define STFDUX stfdux
160 #define STFPDUX stfpdux
161 #define STFSDUX stfsdux
162 #define STFXDUX stfxdux
165 #define FNMADD fnmadd
166 #define FNMSUB fnmsub
178 #define LFPDUX lfpsux
179 #define LFSDUX lfssux
180 #define LFXDUX lfxsux
183 #define STFPDX stfpsx
184 #define STFSDX stfssx
185 #define STFXDX stfxsx
187 #define STFDUX stfsux
188 #define STFPDUX stfpsux
189 #define STFSDUX stfssux
190 #define STFXDUX stfxsux
193 #define FNMADD fnmadds
194 #define FNMSUB fnmsubs
219 #if defined(__64BIT__) && defined(USE64BITINT)
221 #elif defined(__64BIT__) && !defined(USE64BITINT)
228 #define DCBT(REGA, REGB, NUM) .long (0x7c00022c | (REGA << 16) | (REGB << 11) | ((NUM) << 21))
229 #define DCBTST(REGA, REGB, NUM) .long (0x7c0001ec | (REGA << 16) | (REGB << 11) | ((NUM) << 21))
232 #define DSTATTR_H(SIZE, COUNT, STRIDE) ((SIZE << 8) | (COUNT))
233 #define DSTATTR_L(SIZE, COUNT, STRIDE) (STRIDE)
235 #if defined(PPC970) || defined(POWER3) || defined(POWER4) || defined(POWER5) || defined(PPCG4)
236 #define HAVE_PREFETCH
239 #if defined(POWER3) || defined(POWER6) || defined(PPCG4) || defined(CELL)
247 #define L1_PREFETCHSIZE (64 + 128 * 13)
250 #if defined(POWER3) || defined(POWER4) || defined(POWER5)
252 #define L1_PREFETCHSIZE (96 + 128 * 12)
257 #define L1_PREFETCHSIZE (16 + 128 * 100)
258 #define L1_PREFETCH dcbtst
262 #define L1_PREFETCH dcbt
266 #define L1_PREFETCHW dcbtst
270 #define DCBT(REGA, REGB) L1_PREFETCH REGB, REGA
271 #define DCBTST(REGA, REGB) L1_PREFETCHW REGB, REGA
273 #define DCBT(REGA, REGB) L1_PREFETCH DCBT_ARG, REGB, REGA
274 #define DCBTST(REGA, REGB) L1_PREFETCHW DCBT_ARG, REGB, REGA
278 #ifndef L1_PREFETCHSIZE
279 #define L1_PREFETCHSIZE (96 + 128 * 12)
282 #if !defined(OS_DARWIN) || defined(NEEDPARAM)
382 #define BO_dCTR_NZERO_AND_NOT 0
383 #define BO_dCTR_NZERO_AND_NOT_1 1
384 #define BO_dCTR_ZERO_AND_NOT 2
385 #define BO_dCTR_ZERO_AND_NOT_1 3
387 #define BO_IF_NOT_1 5
388 #define BO_IF_NOT_2 6
389 #define BO_IF_NOT_3 7
390 #define BO_dCTR_NZERO_AND 8
391 #define BO_dCTR_NZERO_AND_1 9
392 #define BO_dCTR_ZERO_AND 10
393 #define BO_dCTR_ZERO_AND_1 11
398 #define BO_dCTR_NZERO 16
399 #define BO_dCTR_NZERO_1 17
400 #define BO_dCTR_ZERO 18
401 #define BO_dCTR_ZERO_1 19
403 #define BO_ALWAYS_1 21
404 #define BO_ALWAYS_2 22
405 #define BO_ALWAYS_3 23
406 #define BO_dCTR_NZERO_8 24
407 #define BO_dCTR_NZERO_9 25
408 #define BO_dCTR_ZERO_8 26
409 #define BO_dCTR_ZERO_9 27
410 #define BO_ALWAYS_8 28
411 #define BO_ALWAYS_9 29
412 #define BO_ALWAYS_10 30
413 #define BO_ALWAYS_11 31
483 #define REALNAME ASMNAME
485 #define REALNAME ASMFNAME
488 #if defined(ASSEMBLER) && !defined(NEEDPARAM)
496 .type REALNAME, @function;\
498 #define EPILOGUE .size REALNAME, .-REALNAME
505 .type REALNAME, @function;\
507 #define EPILOGUE .size REALNAME, .-REALNAME
513 .section ".opd","aw";\
516 .quad .REALNAME, .TOC.@tocbase, 0;\
519 .type .REALNAME, @function;\
524 .byte 0,0,0,1,128,0,0,0 ; \
525 .size .REALNAME, .-.REALNAME; \
526 .section .note.GNU-stack,"",@progbits
596 .long _section_.text;
603 .csect .text[PR], 5;\
609 .llong _section_.text;
619 .section __TEXT,__text,regular,pure_instructions
620 .section __TEXT,__picsymbolstub1,symbol_stubs,pure_instructions,32
629 .section __TEXT,__text,regular,pure_instructions
630 .section __TEXT,__picsymbolstub1,symbol_stubs,pure_instructions,32
641 #define EPILOGUE .subsections_via_symbols
679 .section __TEXT,__picsymbolstub1,symbol_stubs,pure_instructions,32
682 .indirect_symbol mcount
684 bcl 20,31,L00000000001$spb
687 addis r11,r11,ha16(Lmcount$lazy_ptr-L00000000001$spb)
689 lwzu r12,lo16(Lmcount$lazy_ptr-L00000000001$spb)(r11)
694 .indirect_symbol mcount
695 .long dyld_stub_binding_helper
696 .subsections_via_symbols
735 .section __TEXT,__picsymbolstub1,symbol_stubs,pure_instructions,32
738 .indirect_symbol mcount
740 bcl 20,31,L00000000001$spb
743 addis r11,r11,ha16(Lmcount$lazy_ptr-L00000000001$spb)
745 ld r12,lo16(Lmcount$lazy_ptr-L00000000001$spb)(r11)
750 .indirect_symbol mcount
751 .quad dyld_stub_binding_helper
752 .subsections_via_symbols
763 #define HALT mfspr r0, 1023
766 #if defined(PPC440) || defined(PPC440FP2)
767 #undef MAX_CPU_NUMBER
768 #define MAX_CPU_NUMBER 1
770 #if !defined(__64BIT__) && !defined(PROFILE) && !defined(PPC440) && !defined(PPC440FP2)
771 #define START_ADDRESS (0x0b000000UL)
779 #define START_ADDRESS (0xf0000000UL)
790 #define BUFFER_SIZE ( 2 << 20)
791 #elif defined(PPC440FP2)
792 #define BUFFER_SIZE ( 16 << 20)
794 #define BUFFER_SIZE ( 16 << 20)
798 #define PAGESIZE ( 4 << 10)
800 #define HUGE_PAGESIZE (16 << 20)
802 #define BASE_ADDRESS (START_ADDRESS - BUFFER_SIZE * MAX_CPU_NUMBER)
804 #ifndef MAP_ANONYMOUS
805 #define MAP_ANONYMOUS MAP_ANON
810 #define FRAMESLOT(X) (((X) * 4) + 8)
813 #define FRAMESLOT(X) (((X) * 8) + 96)
815 #define FRAMESLOT(X) (((X) * 8) + 112)
820 #if defined(OS_AIX) || defined(OS_DARWIN)
822 #define FRAMESLOT(X) (((X) * 4) + 56)
824 #define FRAMESLOT(X) (((X) * 8) + 112)