Tizen 2.0 Release
[profile/ivi/osmesa.git] / src / mesa / math / m_debug_util.h
1 /*
2  * Mesa 3-D graphics library
3  * Version:  6.1
4  *
5  * Copyright (C) 1999-2004  Brian Paul   All Rights Reserved.
6  *
7  * Permission is hereby granted, free of charge, to any person obtaining a
8  * copy of this software and associated documentation files (the "Software"),
9  * to deal in the Software without restriction, including without limitation
10  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11  * and/or sell copies of the Software, and to permit persons to whom the
12  * Software is furnished to do so, subject to the following conditions:
13  *
14  * The above copyright notice and this permission notice shall be included
15  * in all copies or substantial portions of the Software.
16  *
17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
18  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
20  * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
21  * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
22  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Gareth Hughes
26  */
27
28 #ifndef __M_DEBUG_UTIL_H__
29 #define __M_DEBUG_UTIL_H__
30
31
32 #ifdef DEBUG_MATH  /* This code only used for debugging */
33
34
35 /* Comment this out to deactivate the cycle counter.
36  * NOTE: it works only on CPUs which know the 'rdtsc' command (586 or higher)
37  * (hope, you don't try to debug Mesa on a 386 ;)
38  */
39 #if defined(__GNUC__) && \
40     ((defined(__i386__) && defined(USE_X86_ASM)) || \
41      (defined(__sparc__) && defined(USE_SPARC_ASM)))
42 #define  RUN_DEBUG_BENCHMARK
43 #endif
44
45 #define TEST_COUNT              128     /* size of the tested vector array   */
46
47 #define REQUIRED_PRECISION      10      /* allow 4 bits to miss              */
48 #define MAX_PRECISION           24      /* max. precision possible           */
49
50
51 #ifdef  RUN_DEBUG_BENCHMARK
52 /* Overhead of profiling counter in cycles.  Automatically adjusted to
53  * your machine at run time - counter initialization should give very
54  * consistent results.
55  */
56 extern long counter_overhead;
57
58 /* This is the value of the environment variable MESA_PROFILE, and is
59  * used to determine if we should benchmark the functions as well as
60  * verify their correctness.
61  */
62 extern char *mesa_profile;
63
64 /* Modify the number of tests if you like.
65  * We take the minimum of all results, because every error should be
66  * positive (time used by other processes, task switches etc).
67  * It is assumed that all calculations are done in the cache.
68  */
69
70 #if defined(__i386__)
71
72 #if 1 /* PPro, PII, PIII version */
73
74 /* Profiling on the P6 architecture requires a little more work, due to
75  * the internal out-of-order execution.  We must perform a serializing
76  * 'cpuid' instruction before and after the 'rdtsc' instructions to make
77  * sure no other uops are executed when we sample the timestamp counter.
78  */
79 #define  INIT_COUNTER()                                                 \
80    do {                                                                 \
81       int cycle_i;                                                      \
82       counter_overhead = LONG_MAX;                                      \
83       for ( cycle_i = 0 ; cycle_i < 8 ; cycle_i++ ) {                   \
84          long cycle_tmp1 = 0, cycle_tmp2 = 0;                           \
85          __asm__ __volatile__ ( "push %%ebx       \n"                   \
86                                 "xor %%eax, %%eax \n"                   \
87                                 "cpuid            \n"                   \
88                                 "rdtsc            \n"                   \
89                                 "mov %%eax, %0    \n"                   \
90                                 "xor %%eax, %%eax \n"                   \
91                                 "cpuid            \n"                   \
92                                 "pop %%ebx        \n"                   \
93                                 "push %%ebx       \n"                   \
94                                 "xor %%eax, %%eax \n"                   \
95                                 "cpuid            \n"                   \
96                                 "rdtsc            \n"                   \
97                                 "mov %%eax, %1    \n"                   \
98                                 "xor %%eax, %%eax \n"                   \
99                                 "cpuid            \n"                   \
100                                 "pop %%ebx        \n"                   \
101                                 : "=m" (cycle_tmp1), "=m" (cycle_tmp2)  \
102                                 : : "eax", "ecx", "edx" );              \
103          if ( counter_overhead > (cycle_tmp2 - cycle_tmp1) ) {          \
104             counter_overhead = cycle_tmp2 - cycle_tmp1;                 \
105          }                                                              \
106       }                                                                 \
107    } while (0)
108
109 #define  BEGIN_RACE(x)                                                  \
110    x = LONG_MAX;                                                        \
111    for ( cycle_i = 0 ; cycle_i < 10 ; cycle_i++ ) {                     \
112       long cycle_tmp1 = 0, cycle_tmp2 = 0;                              \
113       __asm__ __volatile__ ( "push %%ebx       \n"                      \
114                              "xor %%eax, %%eax \n"                      \
115                              "cpuid            \n"                      \
116                              "rdtsc            \n"                      \
117                              "mov %%eax, %0    \n"                      \
118                              "xor %%eax, %%eax \n"                      \
119                              "cpuid            \n"                      \
120                              "pop %%ebx        \n"                      \
121                              : "=m" (cycle_tmp1)                        \
122                              : : "eax", "ecx", "edx" );
123
124 #define END_RACE(x)                                                     \
125       __asm__ __volatile__ ( "push %%ebx       \n"                      \
126                              "xor %%eax, %%eax \n"                      \
127                              "cpuid            \n"                      \
128                              "rdtsc            \n"                      \
129                              "mov %%eax, %0    \n"                      \
130                              "xor %%eax, %%eax \n"                      \
131                              "cpuid            \n"                      \
132                              "pop %%ebx        \n"                      \
133                              : "=m" (cycle_tmp2)                        \
134                              : : "eax", "ecx", "edx" );                 \
135       if ( x > (cycle_tmp2 - cycle_tmp1) ) {                            \
136          x = cycle_tmp2 - cycle_tmp1;                                   \
137       }                                                                 \
138    }                                                                    \
139    x -= counter_overhead;
140
141 #else /* PPlain, PMMX version */
142
143 /* To ensure accurate results, we stall the pipelines with the
144  * non-pairable 'cdq' instruction.  This ensures all the code being
145  * profiled is complete when the 'rdtsc' instruction executes.
146  */
147 #define  INIT_COUNTER(x)                                                \
148    do {                                                                 \
149       int cycle_i;                                                      \
150       x = LONG_MAX;                                                     \
151       for ( cycle_i = 0 ; cycle_i < 32 ; cycle_i++ ) {                  \
152          long cycle_tmp1, cycle_tmp2, dummy;                            \
153          __asm__ ( "mov %%eax, %0" : "=a" (cycle_tmp1) );               \
154          __asm__ ( "mov %%eax, %0" : "=a" (cycle_tmp2) );               \
155          __asm__ ( "cdq" );                                             \
156          __asm__ ( "cdq" );                                             \
157          __asm__ ( "rdtsc" : "=a" (cycle_tmp1), "=d" (dummy) );         \
158          __asm__ ( "cdq" );                                             \
159          __asm__ ( "cdq" );                                             \
160          __asm__ ( "rdtsc" : "=a" (cycle_tmp2), "=d" (dummy) );         \
161          if ( x > (cycle_tmp2 - cycle_tmp1) )                           \
162             x = cycle_tmp2 - cycle_tmp1;                                \
163       }                                                                 \
164    } while (0)
165
166 #define  BEGIN_RACE(x)                                                  \
167    x = LONG_MAX;                                                        \
168    for ( cycle_i = 0 ; cycle_i < 16 ; cycle_i++ ) {                     \
169       long cycle_tmp1, cycle_tmp2, dummy;                               \
170       __asm__ ( "mov %%eax, %0" : "=a" (cycle_tmp1) );                  \
171       __asm__ ( "mov %%eax, %0" : "=a" (cycle_tmp2) );                  \
172       __asm__ ( "cdq" );                                                \
173       __asm__ ( "cdq" );                                                \
174       __asm__ ( "rdtsc" : "=a" (cycle_tmp1), "=d" (dummy) );
175
176
177 #define END_RACE(x)                                                     \
178       __asm__ ( "cdq" );                                                \
179       __asm__ ( "cdq" );                                                \
180       __asm__ ( "rdtsc" : "=a" (cycle_tmp2), "=d" (dummy) );            \
181       if ( x > (cycle_tmp2 - cycle_tmp1) )                              \
182          x = cycle_tmp2 - cycle_tmp1;                                   \
183    }                                                                    \
184    x -= counter_overhead;
185
186 #endif
187
188 #elif defined(__x86_64__)
189
190 #define rdtscll(val) do { \
191      unsigned int a,d; \
192      __asm__ volatile("rdtsc" : "=a" (a), "=d" (d)); \
193      (val) = ((unsigned long)a) | (((unsigned long)d)<<32); \
194 } while(0) 
195
196 /* Copied from i386 PIII version */
197 #define  INIT_COUNTER()                                                 \
198    do {                                                                 \
199       int cycle_i;                                                      \
200       counter_overhead = LONG_MAX;                                      \
201       for ( cycle_i = 0 ; cycle_i < 16 ; cycle_i++ ) {                  \
202          unsigned long cycle_tmp1, cycle_tmp2;                          \
203          rdtscll(cycle_tmp1);                                           \
204          rdtscll(cycle_tmp2);                                           \
205          if ( counter_overhead > (cycle_tmp2 - cycle_tmp1) ) {          \
206             counter_overhead = cycle_tmp2 - cycle_tmp1;                 \
207          }                                                              \
208       }                                                                 \
209    } while (0)
210
211
212 #define  BEGIN_RACE(x)                                                  \
213    x = LONG_MAX;                                                        \
214    for ( cycle_i = 0 ; cycle_i < 10 ; cycle_i++ ) {                     \
215       unsigned long cycle_tmp1, cycle_tmp2;                             \
216       rdtscll(cycle_tmp1);                                              \
217
218 #define END_RACE(x)                                                     \
219       rdtscll(cycle_tmp2);                                              \
220       if ( x > (cycle_tmp2 - cycle_tmp1) ) {                            \
221          x = cycle_tmp2 - cycle_tmp1;                                   \
222       }                                                                 \
223    }                                                                    \
224    x -= counter_overhead;
225
226 #elif defined(__sparc__)
227
228 #define  INIT_COUNTER() \
229          do { counter_overhead = 5; } while(0)
230
231 #define  BEGIN_RACE(x)                                                        \
232 x = LONG_MAX;                                                                 \
233 for (cycle_i = 0; cycle_i <10; cycle_i++) {                                   \
234    register long cycle_tmp1 __asm__("l0");                                    \
235    register long cycle_tmp2 __asm__("l1");                                    \
236    /* rd %tick, %l0 */                                                        \
237    __asm__ __volatile__ (".word 0xa1410000" : "=r" (cycle_tmp1));  /*  save timestamp   */
238
239 #define END_RACE(x)                                                           \
240    /* rd %tick, %l1 */                                                        \
241    __asm__ __volatile__ (".word 0xa3410000" : "=r" (cycle_tmp2));             \
242    if (x > (cycle_tmp2-cycle_tmp1)) x = cycle_tmp2 - cycle_tmp1;              \
243 }                                                                             \
244 x -= counter_overhead;
245
246 #else
247 #error Your processor is not supported for RUN_XFORM_BENCHMARK
248 #endif
249
250 #else
251
252 #define BEGIN_RACE(x)
253 #define END_RACE(x)
254
255 #endif
256
257
258 /* =============================================================
259  * Helper functions
260  */
261
262 static GLfloat rnd( void )
263 {
264    GLfloat f = (GLfloat)rand() / (GLfloat)RAND_MAX;
265    GLfloat gran = (GLfloat)(1 << 13);
266
267    f = (GLfloat)(GLint)(f * gran) / gran;
268
269    return f * 2.0 - 1.0;
270 }
271
272 static int significand_match( GLfloat a, GLfloat b )
273 {
274    GLfloat d = a - b;
275    int a_ex, b_ex, d_ex;
276
277    if ( d == 0.0F ) {
278       return MAX_PRECISION;   /* Exact match */
279    }
280
281    if ( a == 0.0F || b == 0.0F ) {
282       /* It would probably be better to check if the
283        * non-zero number is denormalized and return
284        * the index of the highest set bit here.
285        */
286       return 0;
287    }
288
289    FREXPF( a, &a_ex );
290    FREXPF( b, &b_ex );
291    FREXPF( d, &d_ex );
292
293    if ( a_ex < b_ex ) {
294       return a_ex - d_ex;
295    } else {
296       return b_ex - d_ex;
297    }
298 }
299
300 enum { NIL = 0, ONE = 1, NEG = -1, VAR = 2 };
301
302 /* Ensure our arrays are correctly aligned.
303  */
304 #if defined(__GNUC__)
305 #  define ALIGN16(type, array)  type array __attribute__ ((aligned (16)))
306 #elif defined(_MSC_VER)
307 #  define ALIGN16(type, array)  type array __declspec(align(16)) /* GH: Does this work? */
308 #elif defined(__WATCOMC__)
309 #  define ALIGN16(type, array)                      /* Watcom does not support this */ 
310 #elif defined(__xlC__)
311 #  define ALIGN16(type, array)       type __align (16) array 
312 #else
313 #  warning "ALIGN16 will not 16-byte align!\n"
314 #  define ALIGN16
315 #endif
316
317
318 #endif /* DEBUG_MATH */
319
320 #endif /* __M_DEBUG_UTIL_H__ */