1 /*********************************************************************/
2 /* Copyright 2009, 2010 The University of Texas at Austin. */
3 /* All rights reserved. */
5 /* Redistribution and use in source and binary forms, with or */
6 /* without modification, are permitted provided that the following */
7 /* conditions are met: */
9 /* 1. Redistributions of source code must retain the above */
10 /* copyright notice, this list of conditions and the following */
13 /* 2. Redistributions in binary form must reproduce the above */
14 /* copyright notice, this list of conditions and the following */
15 /* disclaimer in the documentation and/or other materials */
16 /* provided with the distribution. */
18 /* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
19 /* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
20 /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
21 /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
22 /* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
23 /* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
24 /* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
25 /* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
26 /* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
27 /* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
28 /* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
29 /* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
30 /* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
31 /* POSSIBILITY OF SUCH DAMAGE. */
33 /* The views and conclusions contained in the software and */
34 /* documentation are those of the authors and should not be */
35 /* interpreted as representing official policies, either expressed */
36 /* or implied, of The University of Texas at Austin. */
37 /*********************************************************************/
42 #define MB __asm__ __volatile__ ("sync")
43 #define WMB __asm__ __volatile__ ("sync")
56 void *qalloc(int flags, size_t bytes);
58 static void INLINE blas_lock(volatile unsigned long *address){
60 long int ret, val = 1;
63 while (*address) {YIELDING;};
65 #if defined(OS_LINUX) || defined(OS_DARWIN)
66 __asm__ __volatile__ (
67 "0: lwarx %0, 0, %1\n"
74 : "r"(address), "r" (val)
77 __asm__ __volatile__ (
85 : "r"(address), "r" (val)
90 #define BLAS_LOCK_DEFINED
92 static inline unsigned long rpcc(void){
96 __asm__ __volatile__(".machine \"any\" ;");
98 __asm__ __volatile__ ("mftb %0" : "=r" (ret) : );
100 #if defined(POWER5) || defined(PPC970)
113 static inline unsigned long getstackaddr(void){
116 __asm__ __volatile__ ("mr %0, 1"
117 : "=r"(addr) : : "memory");
122 #if defined(OS_LINUX) || defined(OS_AIX)
123 #define GET_IMAGE(res) __asm__ __volatile__("fmr %0, 2" : "=f"(res) : : "memory")
125 #define GET_IMAGE(res) __asm__ __volatile__("fmr %0, f2" : "=f"(res) : : "memory")
127 #define GET_IMAGE_CANCEL
132 static inline int blas_quickdivide(blasint x, blasint y){
150 #define LFPDUX lfpdux
151 #define LFSDUX lfsdux
152 #define LFXDUX lfxdux
155 #define STFPDX stfpdx
156 #define STFSDX stfsdx
157 #define STFXDX stfxdx
159 #define STFDUX stfdux
160 #define STFPDUX stfpdux
161 #define STFSDUX stfsdux
162 #define STFXDUX stfxdux
165 #define FNMADD fnmadd
166 #define FNMSUB fnmsub
178 #define LFPDUX lfpsux
179 #define LFSDUX lfssux
180 #define LFXDUX lfxsux
183 #define STFPDX stfpsx
184 #define STFSDX stfssx
185 #define STFXDX stfxsx
187 #define STFDUX stfsux
188 #define STFPDUX stfpsux
189 #define STFSDUX stfssux
190 #define STFXDUX stfxsux
193 #define FNMADD fnmadds
194 #define FNMSUB fnmsubs
219 #if defined(__64BIT__) && defined(USE64BITINT)
221 #elif defined(__64BIT__) && !defined(USE64BITINT)
228 #define DCBT(REGA, REGB, NUM) .long (0x7c00022c | (REGA << 16) | (REGB << 11) | ((NUM) << 21))
229 #define DCBTST(REGA, REGB, NUM) .long (0x7c0001ec | (REGA << 16) | (REGB << 11) | ((NUM) << 21))
232 #define DSTATTR_H(SIZE, COUNT, STRIDE) ((SIZE << 8) | (COUNT))
233 #define DSTATTR_L(SIZE, COUNT, STRIDE) (STRIDE)
235 #if defined(PPC970) || defined(POWER3) || defined(POWER4) || defined(POWER5) || defined(PPCG4)
236 #define HAVE_PREFETCH
239 #if defined(POWER3) || defined(POWER6) || defined(PPCG4) || defined(CELL) || defined(POWER8)
247 #define L1_PREFETCHSIZE (64 + 128 * 13)
250 #if defined(POWER3) || defined(POWER4) || defined(POWER5)
252 #define L1_PREFETCHSIZE (96 + 128 * 12)
257 #define L1_PREFETCHSIZE (16 + 128 * 100)
258 #define L1_PREFETCH dcbtst
263 #define L1_PREFETCHSIZE (16 + 128 * 100)
264 #define L1_PREFETCH dcbtst
269 #define L1_PREFETCH dcbt
273 #define L1_PREFETCHW dcbtst
277 #define DCBT(REGA, REGB) L1_PREFETCH REGB, REGA
278 #define DCBTST(REGA, REGB) L1_PREFETCHW REGB, REGA
280 #define DCBT(REGA, REGB) L1_PREFETCH DCBT_ARG, REGB, REGA
281 #define DCBTST(REGA, REGB) L1_PREFETCHW DCBT_ARG, REGB, REGA
285 #ifndef L1_PREFETCHSIZE
286 #define L1_PREFETCHSIZE (96 + 128 * 12)
289 #if !defined(OS_DARWIN) || defined(NEEDPARAM)
389 #define BO_dCTR_NZERO_AND_NOT 0
390 #define BO_dCTR_NZERO_AND_NOT_1 1
391 #define BO_dCTR_ZERO_AND_NOT 2
392 #define BO_dCTR_ZERO_AND_NOT_1 3
394 #define BO_IF_NOT_1 5
395 #define BO_IF_NOT_2 6
396 #define BO_IF_NOT_3 7
397 #define BO_dCTR_NZERO_AND 8
398 #define BO_dCTR_NZERO_AND_1 9
399 #define BO_dCTR_ZERO_AND 10
400 #define BO_dCTR_ZERO_AND_1 11
405 #define BO_dCTR_NZERO 16
406 #define BO_dCTR_NZERO_1 17
407 #define BO_dCTR_ZERO 18
408 #define BO_dCTR_ZERO_1 19
410 #define BO_ALWAYS_1 21
411 #define BO_ALWAYS_2 22
412 #define BO_ALWAYS_3 23
413 #define BO_dCTR_NZERO_8 24
414 #define BO_dCTR_NZERO_9 25
415 #define BO_dCTR_ZERO_8 26
416 #define BO_dCTR_ZERO_9 27
417 #define BO_ALWAYS_8 28
418 #define BO_ALWAYS_9 29
419 #define BO_ALWAYS_10 30
420 #define BO_ALWAYS_11 31
490 #define REALNAME ASMNAME
492 #define REALNAME ASMFNAME
495 #if defined(ASSEMBLER) && !defined(NEEDPARAM)
503 .type REALNAME, @function;\
505 #define EPILOGUE .size REALNAME, .-REALNAME
512 .type REALNAME, @function;\
514 #define EPILOGUE .size REALNAME, .-REALNAME
520 .section ".opd","aw";\
523 .quad .REALNAME, .TOC.@tocbase, 0;\
526 .type .REALNAME, @function;\
531 .byte 0,0,0,1,128,0,0,0 ; \
532 .size .REALNAME, .-.REALNAME; \
533 .section .note.GNU-stack,"",@progbits
603 .long _section_.text;
610 .csect .text[PR], 5;\
616 .llong _section_.text;
626 .section __TEXT,__text,regular,pure_instructions
627 .section __TEXT,__picsymbolstub1,symbol_stubs,pure_instructions,32
636 .section __TEXT,__text,regular,pure_instructions
637 .section __TEXT,__picsymbolstub1,symbol_stubs,pure_instructions,32
648 #define EPILOGUE .subsections_via_symbols
686 .section __TEXT,__picsymbolstub1,symbol_stubs,pure_instructions,32
689 .indirect_symbol mcount
691 bcl 20,31,L00000000001$spb
694 addis r11,r11,ha16(Lmcount$lazy_ptr-L00000000001$spb)
696 lwzu r12,lo16(Lmcount$lazy_ptr-L00000000001$spb)(r11)
701 .indirect_symbol mcount
702 .long dyld_stub_binding_helper
703 .subsections_via_symbols
742 .section __TEXT,__picsymbolstub1,symbol_stubs,pure_instructions,32
745 .indirect_symbol mcount
747 bcl 20,31,L00000000001$spb
750 addis r11,r11,ha16(Lmcount$lazy_ptr-L00000000001$spb)
752 ld r12,lo16(Lmcount$lazy_ptr-L00000000001$spb)(r11)
757 .indirect_symbol mcount
758 .quad dyld_stub_binding_helper
759 .subsections_via_symbols
770 #define HALT mfspr r0, 1023
773 #if defined(PPC440) || defined(PPC440FP2)
774 #undef MAX_CPU_NUMBER
775 #define MAX_CPU_NUMBER 1
777 #if !defined(__64BIT__) && !defined(PROFILE) && !defined(PPC440) && !defined(PPC440FP2)
778 #define START_ADDRESS (0x0b000000UL)
786 #define START_ADDRESS (0xf0000000UL)
797 #define BUFFER_SIZE ( 2 << 20)
798 #elif defined(PPC440FP2)
799 #define BUFFER_SIZE ( 16 << 20)
800 #elif defined(POWER8)
801 #define BUFFER_SIZE ( 32 << 20)
803 #define BUFFER_SIZE ( 16 << 20)
807 #define PAGESIZE ( 4 << 10)
809 #define HUGE_PAGESIZE (16 << 20)
811 #define BASE_ADDRESS (START_ADDRESS - BUFFER_SIZE * MAX_CPU_NUMBER)
813 #ifndef MAP_ANONYMOUS
814 #define MAP_ANONYMOUS MAP_ANON
819 #define FRAMESLOT(X) (((X) * 4) + 8)
822 #define FRAMESLOT(X) (((X) * 8) + 96)
824 #define FRAMESLOT(X) (((X) * 8) + 112)
829 #if defined(OS_AIX) || defined(OS_DARWIN)
831 #define FRAMESLOT(X) (((X) * 4) + 56)
833 #define FRAMESLOT(X) (((X) * 8) + 112)