Test/Source/Tests/Test_mindot.cpp

   1 //
   2 //  Test_mindot.cpp
   3 //  BulletTest
   4 //
   5 //  Copyright (c) 2011 Apple Inc.
   6 //
   7
   8
   9
  10 #include "LinearMath/btScalar.h"
  11 #if defined (BT_USE_SSE_IN_API) || defined (BT_USE_NEON)
  12
  13
  14 #include "Test_mindot.h"
  15 #include "vector.h"
  16 #include "Utils.h"
  17 #include "main.h"
  18 #include <math.h>
  19 #include <string.h>
  20
  21 #include <LinearMath/btVector3.h>
  22
  23
  24 // reference code for testing purposes
  25 static long mindot_ref(    const btSimdFloat4 *vertices,
  26                        float *vec,
  27                        size_t count,
  28                        float *dotResult );
  29
  30 #ifdef __arm__
  31     #define MAX_LOG2_SIZE   9
  32 #else
  33     #define MAX_LOG2_SIZE   9
  34 #endif
  35 #define MAX_SIZE        (1U << MAX_LOG2_SIZE)
  36 #define LOOPCOUNT 100
  37
  38 int Test_mindot(void)
  39 {
  40     // Init an array flanked by guard pages
  41     btSimdFloat4 *data = (btSimdFloat4*) GuardCalloc( 1, MAX_SIZE * sizeof(btSimdFloat4), NULL );
  42     float *fp = (float*) data;
  43     long correct, test;
  44     btVector3 localScaling( 0.1f, 0.2f, 0.3f);
  45     size_t size;
  46
  47     // Init the data
  48     size_t i;
  49     for( i = 0; i < MAX_SIZE; i++ )
  50     {
  51         fp[4*i] = (int32_t) RANDF_16;
  52         fp[4*i+1] = (int32_t) RANDF_16;
  53         fp[4*i+2] = (int32_t) RANDF_16;
  54         fp[4*i+3] = BT_NAN;     // w channel NaN
  55     }
  56
  57     float correctDot, testDot;
  58     fp = (float*) localScaling;
  59         float maxRelativeError = 0.f;
  60
  61     for( size = 1; size <= MAX_SIZE; size++ )
  62     {
  63         float *in = (float*)(data + MAX_SIZE - size);
  64         size_t position;
  65
  66         for( position = 0; position < size; position++ )
  67         {
  68             float *biggest = in + position * 4;
  69             float old[4] = { biggest[0], biggest[1], biggest[2], biggest[3] };
  70             biggest[0] -= LARGE_FLOAT17;
  71             biggest[1] -= LARGE_FLOAT17;
  72             biggest[2] -= LARGE_FLOAT17;
  73             biggest[3] -= LARGE_FLOAT17;
  74
  75             correctDot = BT_NAN;
  76             testDot = BT_NAN;
  77             correct = mindot_ref( (btSimdFloat4*) in, (float*) &localScaling, size, &correctDot);
  78             test = localScaling.minDot( (btVector3*) in, size, testDot);
  79             if( test < 0 || test >= size )
  80             {
  81                 vlog( "Error @ %ld: index out of bounds! *%ld vs %ld \n", size, correct, test);
  82                 continue;
  83             }
  84             if( correct != test )
  85                         {
  86                 vlog( "Error @ %ld: index misreported! *%ld vs %ld  (*%f, %f)\n", size, correct, test,
  87                        fp[0] * in[4*correct] + fp[1] * in[4*correct+1]  + fp[2] * in[4*correct+2],
  88                        fp[0] * in[4*test] + fp[1] * in[4*test+1]  + fp[2] * in[4*test+2] );
  89                                 return 1;
  90                         }
  91             if( test != position )
  92                         {
  93                 vlog( "Biggest not found where it is supposed to be: *%ld vs %ld (*%f, %f)\n", position, test,
  94                        fp[0] * in[4*test] + fp[1] * in[4*test+1]  + fp[2] * in[4*test+2],
  95                        fp[0] * in[4*position] + fp[1] * in[4*position+1]  + fp[2] * in[4*position+2]  );
  96                                 return 1;
  97                         }
  98
  99             if( correctDot != testDot )
 100                         {
 101                                 float relativeError = btFabs((testDot - correctDot) / correctDot);
 102                                 if (relativeError>1e6)
 103                                 {
 104                                         vlog( "Error @ %ld: dotpr misreported! *%f vs %f    (*%f, %f)\n", size, correctDot, testDot,
 105                                                    fp[0] * in[4*correct] + fp[1] * in[4*correct+1]  + fp[2] * in[4*correct+2],
 106                                                    fp[0] * in[4*test] + fp[1] * in[4*test+1]  + fp[2] * in[4*test+2]  );
 107                                         return 1;
 108                                 } else
 109                                 {
 110                                         if (maxRelativeError < relativeError)
 111                                         {
 112                                                 maxRelativeError = relativeError;
 113                                         }
 114                                 }
 115                         }
 116
 117
 118             memcpy( biggest, old, 16 );
 119         }
 120     }
 121
 122         if (maxRelativeError)
 123         {
 124                 printf("Warning: relative error = %e\n", maxRelativeError);
 125         }
 126     uint64_t scalarTimes[33 + (MAX_LOG2_SIZE-5)];
 127     uint64_t vectorTimes[33 + (MAX_LOG2_SIZE-5)];
 128     size_t j, k;
 129     float *in = (float*) data;
 130     for( size = 1; size <= 32; size++ )
 131     {
 132         uint64_t startTime, bestTime, currentTime;
 133
 134         bestTime = -1LL;
 135         scalarTimes[size] = 0;
 136         for (j = 0; j < 100; j++) {
 137             startTime = ReadTicks();
 138             for( k = 0; k < LOOPCOUNT; k++ )
 139                 correct += mindot_ref( (btSimdFloat4*) in, (float*) &localScaling, size, &correctDot);
 140             currentTime = ReadTicks() - startTime;
 141             scalarTimes[size] += currentTime;
 142             if( currentTime < bestTime )
 143                 bestTime = currentTime;
 144         }
 145         if( 0 == gReportAverageTimes )
 146             scalarTimes[size] = bestTime;
 147         else
 148             scalarTimes[size] /= 100;
 149     }
 150
 151     uint64_t *timep = &scalarTimes[33];
 152     for( size = 64; size <= MAX_SIZE; size *= 2 )
 153     {
 154         uint64_t startTime, bestTime, currentTime;
 155
 156         bestTime = -1LL;
 157         timep[0] =0;
 158         for (j = 0; j < 100; j++) {
 159             startTime = ReadTicks();
 160             for( k = 0; k < LOOPCOUNT; k++ )
 161                 correct += mindot_ref( (btSimdFloat4*) in, (float*) &localScaling, size, &correctDot);
 162             currentTime = ReadTicks() - startTime;
 163             timep[0] += currentTime;
 164             if( currentTime < bestTime )
 165                 bestTime = currentTime;
 166         }
 167         if( 0 == gReportAverageTimes )
 168             timep[0] = bestTime;
 169         else
 170             timep[0] /= 100;
 171
 172         timep++;
 173     }
 174
 175     for( size = 1; size <= 32; size++ )
 176     {
 177         uint64_t startTime, bestTime, currentTime;
 178
 179         bestTime = -1LL;
 180         vectorTimes[size] = 0;
 181         for (j = 0; j < 100; j++) {
 182             startTime = ReadTicks();
 183             for( k = 0; k < LOOPCOUNT; k++ )
 184                 test += localScaling.minDot( (btVector3*) in, size, testDot);
 185             currentTime = ReadTicks() - startTime;
 186             vectorTimes[size] += currentTime;
 187             if( currentTime < bestTime )
 188                 bestTime = currentTime;
 189         }
 190         if( 0 == gReportAverageTimes )
 191             vectorTimes[size] = bestTime;
 192         else
 193             vectorTimes[size] /= 100;
 194     }
 195
 196     timep = &vectorTimes[33];
 197     for( size = 64; size <= MAX_SIZE; size *= 2 )
 198     {
 199         uint64_t startTime, bestTime, currentTime;
 200
 201         bestTime = -1LL;
 202         timep[0] =0;
 203         for (j = 0; j < 100; j++) {
 204             startTime = ReadTicks();
 205             for( k = 0; k < LOOPCOUNT; k++ )
 206                 test += localScaling.minDot( (btVector3*) in, size, testDot);
 207             currentTime = ReadTicks() - startTime;
 208             timep[0] += currentTime;
 209             if( currentTime < bestTime )
 210                 bestTime = currentTime;
 211         }
 212         if( 0 == gReportAverageTimes )
 213             timep[0] = bestTime;
 214         else
 215             timep[0] /= 100;
 216
 217         timep++;
 218     }
 219
 220     vlog( "Timing:\n" );
 221     vlog( " size\t    scalar\t    vector\n" );
 222     for( size = 1; size <= 32; size++ )
 223         vlog( "%5lu\t%10.2f\t%10.2f\n", size, TicksToCycles( scalarTimes[size] ) / LOOPCOUNT, TicksToCycles( vectorTimes[size] ) / LOOPCOUNT );
 224     size_t index = 33;
 225     for( size = 64; size <= MAX_SIZE; size *= 2 )
 226     {
 227         vlog( "%5lu\t%10.2f\t%10.2f\n", size, TicksToCycles( scalarTimes[index] ) / LOOPCOUNT, TicksToCycles( vectorTimes[index] ) / LOOPCOUNT );
 228         index++;
 229     }
 230
 231     // Useless check to make sure that the timing loops are not optimized away
 232     if( test != correct )
 233         vlog( "Error: Test != correct: *%ld vs. %ld\n", correct, test);
 234
 235     GuardFree(data);
 236
 237     return 0;
 238 }
 239
 240
 241
 242 static long mindot_ref(    const btSimdFloat4 *vertices,
 243                        float *vec,
 244                        size_t count,
 245                        float *dotResult )
 246 {
 247
 248     const float *dp = (const float*) vertices;
 249     float  minDot = BT_INFINITY;
 250     long i = 0;
 251     long ptIndex = -1;
 252
 253     for( i = 0; i < count; i++ )
 254     {
 255         float dot = vec[0] * dp[0] + vec[1] * dp[1] + vec[2] * dp[2];   dp += 4;
 256
 257         if( dot < minDot )
 258         {
 259             minDot = dot;
 260             ptIndex = i;
 261         }
 262     }
 263
 264     *dotResult = minDot;
 265
 266     return ptIndex;
 267 }
 268
 269 #endif //BT_USE_SSE