5 // Copyright (c) 2011 Apple Inc.
10 #include "LinearMath/btScalar.h"
11 #if defined (BT_USE_SSE_IN_API) || defined (BT_USE_NEON)
14 #include "Test_mindot.h"
21 #include <LinearMath/btVector3.h>
24 // reference code for testing purposes
25 static long mindot_ref( const btSimdFloat4 *vertices,
31 #define MAX_LOG2_SIZE 9
33 #define MAX_LOG2_SIZE 9
35 #define MAX_SIZE (1U << MAX_LOG2_SIZE)
40 // Init an array flanked by guard pages
41 btSimdFloat4 *data = (btSimdFloat4*) GuardCalloc( 1, MAX_SIZE * sizeof(btSimdFloat4), NULL );
42 float *fp = (float*) data;
44 btVector3 localScaling( 0.1f, 0.2f, 0.3f);
49 for( i = 0; i < MAX_SIZE; i++ )
51 fp[4*i] = (int32_t) RANDF_16;
52 fp[4*i+1] = (int32_t) RANDF_16;
53 fp[4*i+2] = (int32_t) RANDF_16;
54 fp[4*i+3] = BT_NAN; // w channel NaN
57 float correctDot, testDot;
58 fp = (float*) localScaling;
59 float maxRelativeError = 0.f;
61 for( size = 1; size <= MAX_SIZE; size++ )
63 float *in = (float*)(data + MAX_SIZE - size);
66 for( position = 0; position < size; position++ )
68 float *biggest = in + position * 4;
69 float old[4] = { biggest[0], biggest[1], biggest[2], biggest[3] };
70 biggest[0] -= LARGE_FLOAT17;
71 biggest[1] -= LARGE_FLOAT17;
72 biggest[2] -= LARGE_FLOAT17;
73 biggest[3] -= LARGE_FLOAT17;
77 correct = mindot_ref( (btSimdFloat4*) in, (float*) &localScaling, size, &correctDot);
78 test = localScaling.minDot( (btVector3*) in, size, testDot);
79 if( test < 0 || test >= size )
81 vlog( "Error @ %ld: index out of bounds! *%ld vs %ld \n", size, correct, test);
86 vlog( "Error @ %ld: index misreported! *%ld vs %ld (*%f, %f)\n", size, correct, test,
87 fp[0] * in[4*correct] + fp[1] * in[4*correct+1] + fp[2] * in[4*correct+2],
88 fp[0] * in[4*test] + fp[1] * in[4*test+1] + fp[2] * in[4*test+2] );
91 if( test != position )
93 vlog( "Biggest not found where it is supposed to be: *%ld vs %ld (*%f, %f)\n", position, test,
94 fp[0] * in[4*test] + fp[1] * in[4*test+1] + fp[2] * in[4*test+2],
95 fp[0] * in[4*position] + fp[1] * in[4*position+1] + fp[2] * in[4*position+2] );
99 if( correctDot != testDot )
101 float relativeError = btFabs((testDot - correctDot) / correctDot);
102 if (relativeError>1e6)
104 vlog( "Error @ %ld: dotpr misreported! *%f vs %f (*%f, %f)\n", size, correctDot, testDot,
105 fp[0] * in[4*correct] + fp[1] * in[4*correct+1] + fp[2] * in[4*correct+2],
106 fp[0] * in[4*test] + fp[1] * in[4*test+1] + fp[2] * in[4*test+2] );
110 if (maxRelativeError < relativeError)
112 maxRelativeError = relativeError;
118 memcpy( biggest, old, 16 );
122 if (maxRelativeError)
124 printf("Warning: relative error = %e\n", maxRelativeError);
126 uint64_t scalarTimes[33 + (MAX_LOG2_SIZE-5)];
127 uint64_t vectorTimes[33 + (MAX_LOG2_SIZE-5)];
129 float *in = (float*) data;
130 for( size = 1; size <= 32; size++ )
132 uint64_t startTime, bestTime, currentTime;
135 scalarTimes[size] = 0;
136 for (j = 0; j < 100; j++) {
137 startTime = ReadTicks();
138 for( k = 0; k < LOOPCOUNT; k++ )
139 correct += mindot_ref( (btSimdFloat4*) in, (float*) &localScaling, size, &correctDot);
140 currentTime = ReadTicks() - startTime;
141 scalarTimes[size] += currentTime;
142 if( currentTime < bestTime )
143 bestTime = currentTime;
145 if( 0 == gReportAverageTimes )
146 scalarTimes[size] = bestTime;
148 scalarTimes[size] /= 100;
151 uint64_t *timep = &scalarTimes[33];
152 for( size = 64; size <= MAX_SIZE; size *= 2 )
154 uint64_t startTime, bestTime, currentTime;
158 for (j = 0; j < 100; j++) {
159 startTime = ReadTicks();
160 for( k = 0; k < LOOPCOUNT; k++ )
161 correct += mindot_ref( (btSimdFloat4*) in, (float*) &localScaling, size, &correctDot);
162 currentTime = ReadTicks() - startTime;
163 timep[0] += currentTime;
164 if( currentTime < bestTime )
165 bestTime = currentTime;
167 if( 0 == gReportAverageTimes )
175 for( size = 1; size <= 32; size++ )
177 uint64_t startTime, bestTime, currentTime;
180 vectorTimes[size] = 0;
181 for (j = 0; j < 100; j++) {
182 startTime = ReadTicks();
183 for( k = 0; k < LOOPCOUNT; k++ )
184 test += localScaling.minDot( (btVector3*) in, size, testDot);
185 currentTime = ReadTicks() - startTime;
186 vectorTimes[size] += currentTime;
187 if( currentTime < bestTime )
188 bestTime = currentTime;
190 if( 0 == gReportAverageTimes )
191 vectorTimes[size] = bestTime;
193 vectorTimes[size] /= 100;
196 timep = &vectorTimes[33];
197 for( size = 64; size <= MAX_SIZE; size *= 2 )
199 uint64_t startTime, bestTime, currentTime;
203 for (j = 0; j < 100; j++) {
204 startTime = ReadTicks();
205 for( k = 0; k < LOOPCOUNT; k++ )
206 test += localScaling.minDot( (btVector3*) in, size, testDot);
207 currentTime = ReadTicks() - startTime;
208 timep[0] += currentTime;
209 if( currentTime < bestTime )
210 bestTime = currentTime;
212 if( 0 == gReportAverageTimes )
221 vlog( " size\t scalar\t vector\n" );
222 for( size = 1; size <= 32; size++ )
223 vlog( "%5lu\t%10.2f\t%10.2f\n", size, TicksToCycles( scalarTimes[size] ) / LOOPCOUNT, TicksToCycles( vectorTimes[size] ) / LOOPCOUNT );
225 for( size = 64; size <= MAX_SIZE; size *= 2 )
227 vlog( "%5lu\t%10.2f\t%10.2f\n", size, TicksToCycles( scalarTimes[index] ) / LOOPCOUNT, TicksToCycles( vectorTimes[index] ) / LOOPCOUNT );
231 // Useless check to make sure that the timing loops are not optimized away
232 if( test != correct )
233 vlog( "Error: Test != correct: *%ld vs. %ld\n", correct, test);
242 static long mindot_ref( const btSimdFloat4 *vertices,
248 const float *dp = (const float*) vertices;
249 float minDot = BT_INFINITY;
253 for( i = 0; i < count; i++ )
255 float dot = vec[0] * dp[0] + vec[1] * dp[1] + vec[2] * dp[2]; dp += 4;