1 /*********************************************************************/
2 /* Copyright 2009, 2010 The University of Texas at Austin. */
3 /* All rights reserved. */
5 /* Redistribution and use in source and binary forms, with or */
6 /* without modification, are permitted provided that the following */
7 /* conditions are met: */
9 /* 1. Redistributions of source code must retain the above */
10 /* copyright notice, this list of conditions and the following */
13 /* 2. Redistributions in binary form must reproduce the above */
14 /* copyright notice, this list of conditions and the following */
15 /* disclaimer in the documentation and/or other materials */
16 /* provided with the distribution. */
18 /* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
19 /* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
20 /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
21 /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
22 /* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
23 /* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
24 /* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
25 /* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
26 /* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
27 /* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
28 /* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
29 /* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
30 /* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
31 /* POSSIBILITY OF SUCH DAMAGE. */
33 /* The views and conclusions contained in the software and */
34 /* documentation are those of the authors and should not be */
35 /* interpreted as representing official policies, either expressed */
36 /* or implied, of The University of Texas at Austin. */
37 /*********************************************************************/
42 #define MB __asm__ __volatile__ ("nop")
43 #define WMB __asm__ __volatile__ ("nop")
47 static void __inline blas_lock(volatile unsigned long *address){
52 while (*address) {YIELDING;};
61 #define BLAS_LOCK_DEFINED
63 static __inline unsigned long rpcc(void){
66 __asm__ __volatile__ ("rd %%tick, %0" : "=r" (clocks));
76 #ifndef __BIG_ENDIAN__
77 #define __BIG_ENDIAN__
81 #define GET_IMAGE(res) __asm__ __volatile__("fmovd %%f2, %0" : "=f"(res) : : "memory")
83 #define GET_IMAGE(res) __asm__ __volatile__("fmovs %%f1, %0" : "=f"(res) : : "memory")
86 #define GET_IMAGE_CANCEL
89 static __inline int blas_quickdivide(blasint x, blasint y){
99 #define STACK_START 128
100 #define SAVESP save %sp, -64, %sp
102 #define STACK_START 2423
103 #define SAVESP save %sp, -256, %sp
106 #define NOP or %g1, %g1, %g1
136 #define HALT prefetch [%g0], 5
138 #define FMADDS(rs1, rs2, rs3, rd) \
139 .word ((2 << 30) | ((rd) << 25) | ( 0x37 << 19) | ((rs1) << 14) | ((rs3) << 9) | ( 1 << 5) | (rs2))
141 #define FMADDD(rs1, rs2, rs3, rd) \
142 .word ((2 << 30) | ((rd) << 25) | ( 0x37 << 19) | ((rs1) << 14) | ((rs3) << 9) | ( 2 << 5) | (rs2))
144 #define FMSUBS(rs1, rs2, rs3, rd) \
145 .word ((2 << 30) | ((rd) << 25) | ( 0x37 << 19) | ((rs1) << 14) | ((rs3) << 9) | ( 5 << 5) | (rs2))
147 #define FMSUBD(rs1, rs2, rs3, rd) \
148 .word ((2 << 30) | ((rd) << 25) | ( 0x37 << 19) | ((rs1) << 14) | ((rs3) << 9) | ( 6 << 5) | (rs2))
150 #define FNMSUBS(rs1, rs2, rs3, rd) \
151 .word ((2 << 30) | ((rd) << 25) | ( 0x37 << 19) | ((rs1) << 14) | ((rs3) << 9) | ( 9 << 5) | (rs2))
153 #define FNMSUBD(rs1, rs2, rs3, rd) \
154 .word ((2 << 30) | ((rd) << 25) | ( 0x37 << 19) | ((rs1) << 14) | ((rs3) << 9) | (10 << 5) | (rs2))
156 #define FNMADDS(rs1, rs2, rs3, rd) \
157 .word ((2 << 30) | ((rd) << 25) | ( 0x37 << 19) | ((rs1) << 14) | ((rs3) << 9) | (13 << 5) | (rs2))
159 #define FNMADDD(rs1, rs2, rs3, rd) \
160 .word ((2 << 30) | ((rd) << 25) | ( 0x37 << 19) | ((rs1) << 14) | ((rs3) << 9) | (14 << 5) | (rs2))
163 .word ((2 << 30) | ((rd) << 25) | ( 0x36 << 19) | ( 0x61 << 5))
166 .word ((2 << 30) | ((rd) << 25) | ( 0x36 << 19) | ( 0x60 << 5))
169 .word ((2 << 30) | ((rd) << 25) | ( 0x36 << 19) | ( 0x7f << 5))
172 .word ((2 << 30) | ((rd) << 25) | ( 0x36 << 19) | ( 0x7e << 5))
175 #define FCLR(a) FCLRS(a)
176 #define FONE(a) FONES(a)
177 #define FMADD(a, b, c, d) FMADDS(a, b, c, d)
178 #define FMSUB(a, b, c, d) FMSUBS(a, b, c, d)
179 #define FNMADD(a, b, c, d) FNMADDS(a, b, c, d)
180 #define FNMSUB(a, b, c, d) FNMSUBS(a, b, c, d)
182 #define FCLR(a) FCLRD(a)
183 #define FONE(a) FONED(a)
184 #define FMADD(a, b, c, d) FMADDD(a, b, c, d)
185 #define FMSUB(a, b, c, d) FMSUBD(a, b, c, d)
186 #define FNMADD(a, b, c, d) FNMADDD(a, b, c, d)
187 #define FNMSUB(a, b, c, d) FNMSUBD(a, b, c, d)
191 #define REALNAME ASMNAME
193 #define REALNAME ASMFNAME
201 .type REALNAME, #function; \
205 #if defined(__linux__) && defined(__ELF__)
206 #define GNUSTACK .section .note.GNU-stack,"",@progbits
212 .size REALNAME, .-REALNAME; \
223 #define BUFFER_SIZE (32 << 20)
226 #define PAGESIZE ( 8 << 10)
228 #define HUGE_PAGESIZE ( 4 << 20)
230 #define BASE_ADDRESS (START_ADDRESS - BUFFER_SIZE * MAX_CPU_NUMBER)
232 #ifndef MAP_ANONYMOUS
233 #define MAP_ANONYMOUS MAP_ANON