1 /*********************************************************************/
2 /* Copyright 2009, 2010 The University of Texas at Austin. */
3 /* All rights reserved. */
5 /* Redistribution and use in source and binary forms, with or */
6 /* without modification, are permitted provided that the following */
7 /* conditions are met: */
9 /* 1. Redistributions of source code must retain the above */
10 /* copyright notice, this list of conditions and the following */
13 /* 2. Redistributions in binary form must reproduce the above */
14 /* copyright notice, this list of conditions and the following */
15 /* disclaimer in the documentation and/or other materials */
16 /* provided with the distribution. */
18 /* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
19 /* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
20 /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
21 /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
22 /* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
23 /* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
24 /* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
25 /* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
26 /* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
27 /* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
28 /* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
29 /* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
30 /* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
31 /* POSSIBILITY OF SUCH DAMAGE. */
33 /* The views and conclusions contained in the software and */
34 /* documentation are those of the authors and should not be */
35 /* interpreted as representing official policies, either expressed */
36 /* or implied, of The University of Texas at Austin. */
37 /*********************************************************************/
42 #define MB __asm__ __volatile__ ("nop")
43 #define WMB __asm__ __volatile__ ("nop")
44 #define RMB __asm__ __volatile__ ("nop")
48 static void __inline blas_lock(volatile unsigned long *address){
53 while (*address) {YIELDING;};
62 #define BLAS_LOCK_DEFINED
64 static __inline unsigned long rpcc(void){
67 __asm__ __volatile__ ("rd %%tick, %0" : "=r" (clocks));
77 #ifndef __BIG_ENDIAN__
78 #define __BIG_ENDIAN__
83 #define RETURN_BY_STACK
88 #define GET_IMAGE(res) __asm__ __volatile__("fmovd %%f2, %0" : "=f"(res) : : "memory")
90 #define GET_IMAGE(res) __asm__ __volatile__("fmovs %%f1, %0" : "=f"(res) : : "memory")
93 #define GET_IMAGE_CANCEL
96 static __inline int blas_quickdivide(blasint x, blasint y){
106 #define STACK_START 128
107 #define SAVESP save %sp, -64, %sp
109 #define STACK_START 2423
110 #define SAVESP save %sp, -256, %sp
113 #define NOP or %g1, %g1, %g1
143 #define HALT prefetch [%g0], 5
145 #define FMADDS(rs1, rs2, rs3, rd) \
146 .word ((2 << 30) | ((rd) << 25) | ( 0x37 << 19) | ((rs1) << 14) | ((rs3) << 9) | ( 1 << 5) | (rs2))
148 #define FMADDD(rs1, rs2, rs3, rd) \
149 .word ((2 << 30) | ((rd) << 25) | ( 0x37 << 19) | ((rs1) << 14) | ((rs3) << 9) | ( 2 << 5) | (rs2))
151 #define FMSUBS(rs1, rs2, rs3, rd) \
152 .word ((2 << 30) | ((rd) << 25) | ( 0x37 << 19) | ((rs1) << 14) | ((rs3) << 9) | ( 5 << 5) | (rs2))
154 #define FMSUBD(rs1, rs2, rs3, rd) \
155 .word ((2 << 30) | ((rd) << 25) | ( 0x37 << 19) | ((rs1) << 14) | ((rs3) << 9) | ( 6 << 5) | (rs2))
157 #define FNMSUBS(rs1, rs2, rs3, rd) \
158 .word ((2 << 30) | ((rd) << 25) | ( 0x37 << 19) | ((rs1) << 14) | ((rs3) << 9) | ( 9 << 5) | (rs2))
160 #define FNMSUBD(rs1, rs2, rs3, rd) \
161 .word ((2 << 30) | ((rd) << 25) | ( 0x37 << 19) | ((rs1) << 14) | ((rs3) << 9) | (10 << 5) | (rs2))
163 #define FNMADDS(rs1, rs2, rs3, rd) \
164 .word ((2 << 30) | ((rd) << 25) | ( 0x37 << 19) | ((rs1) << 14) | ((rs3) << 9) | (13 << 5) | (rs2))
166 #define FNMADDD(rs1, rs2, rs3, rd) \
167 .word ((2 << 30) | ((rd) << 25) | ( 0x37 << 19) | ((rs1) << 14) | ((rs3) << 9) | (14 << 5) | (rs2))
170 .word ((2 << 30) | ((rd) << 25) | ( 0x36 << 19) | ( 0x61 << 5))
173 .word ((2 << 30) | ((rd) << 25) | ( 0x36 << 19) | ( 0x60 << 5))
176 .word ((2 << 30) | ((rd) << 25) | ( 0x36 << 19) | ( 0x7f << 5))
179 .word ((2 << 30) | ((rd) << 25) | ( 0x36 << 19) | ( 0x7e << 5))
182 #define FCLR(a) FCLRS(a)
183 #define FONE(a) FONES(a)
184 #define FMADD(a, b, c, d) FMADDS(a, b, c, d)
185 #define FMSUB(a, b, c, d) FMSUBS(a, b, c, d)
186 #define FNMADD(a, b, c, d) FNMADDS(a, b, c, d)
187 #define FNMSUB(a, b, c, d) FNMSUBS(a, b, c, d)
189 #define FCLR(a) FCLRD(a)
190 #define FONE(a) FONED(a)
191 #define FMADD(a, b, c, d) FMADDD(a, b, c, d)
192 #define FMSUB(a, b, c, d) FMSUBD(a, b, c, d)
193 #define FNMADD(a, b, c, d) FNMADDD(a, b, c, d)
194 #define FNMSUB(a, b, c, d) FNMSUBD(a, b, c, d)
198 #define REALNAME ASMNAME
200 #define REALNAME ASMFNAME
208 .type REALNAME, #function; \
212 #if defined(__linux__) && defined(__ELF__)
213 #define GNUSTACK .section .note.GNU-stack,"",@progbits
219 .size REALNAME, .-REALNAME; \
230 #define BUFFER_SIZE (32 << 20)
233 #define PAGESIZE ( 8 << 10)
235 #define HUGE_PAGESIZE ( 4 << 20)
237 #define BASE_ADDRESS (START_ADDRESS - BUFFER_SIZE * MAX_CPU_NUMBER)
239 #ifndef MAP_ANONYMOUS
240 #define MAP_ANONYMOUS MAP_ANON