Test/Source/Tests/Test_btDbvt.cpp

   1 //
   2 //  Test_btDbvt.cpp
   3 //  BulletTest
   4 //
   5 //  Copyright (c) 2011 Apple Inc., Inc.
   6 //
   7
   8
   9
  10 #include "LinearMath/btScalar.h"
  11 #if defined (BT_USE_SSE_IN_API) || defined (BT_USE_NEON)
  12
  13 #include "Test_btDbvt.h"
  14 #include "vector.h"
  15 #include "Utils.h"
  16 #include "main.h"
  17 #include <math.h>
  18 #include <string.h>
  19
  20 #include <BulletCollision/BroadphaseCollision/btDbvt.h>
  21
  22 // reference code for testing purposes
  23 SIMD_FORCE_INLINE bool Intersect_ref( btDbvtAabbMm& a,  btDbvtAabbMm& b)
  24 {
  25     return(     (a.tMins().x()<=b.tMaxs().x())&&
  26            (a.tMaxs().x()>=b.tMins().x())&&
  27            (a.tMins().y()<=b.tMaxs().y())&&
  28            (a.tMaxs().y()>=b.tMins().y())&&
  29            (a.tMins().z()<=b.tMaxs().z())&&
  30            (a.tMaxs().z()>=b.tMins().z()));
  31
  32    }
  33
  34
  35 SIMD_FORCE_INLINE btScalar      Proximity_ref(   btDbvtAabbMm& a,
  36                                                                    btDbvtAabbMm& b)
  37 {
  38         const btVector3 d=(a.tMins()+a.tMaxs())-(b.tMins()+b.tMaxs());
  39         return(btFabs(d.x())+btFabs(d.y())+btFabs(d.z()));
  40 }
  41
  42
  43
  44 SIMD_FORCE_INLINE int                   Select_ref(      btDbvtAabbMm& o,
  45                                                             btDbvtAabbMm& a,
  46                                                             btDbvtAabbMm& b)
  47 {
  48         return(Proximity_ref(o,a)<Proximity_ref(o,b)?0:1);
  49 }
  50
  51
  52 SIMD_FORCE_INLINE void          Merge_ref(       btDbvtAabbMm& a,
  53                                                            btDbvtAabbMm& b,
  54                                                           btDbvtAabbMm& r)
  55 {
  56     //
  57     //Changing '3' into '4' to compare with the vector code which changes all 4 floats.
  58     //Erwin: don't do this because the 4th component is ignore and not computed on non-vector code (there is no NEON version and scalar is just 3 components)
  59     //
  60         for(int i=0;i<3;++i)
  61         {
  62                 if(a.tMins().m_floats[i]<b.tMins().m_floats[i])
  63             r.tMins().m_floats[i] = a.tMins().m_floats[i];
  64         else
  65             r.tMins().m_floats[i] = b.tMins().m_floats[i];
  66
  67
  68         if(a.tMaxs().m_floats[i]>b.tMaxs().m_floats[i])
  69             r.tMaxs().m_floats[i]=a.tMaxs().m_floats[i];
  70         else
  71             r.tMaxs().m_floats[i]=b.tMaxs().m_floats[i];
  72         }
  73 }
  74 /*
  75 [0]     float32_t       0.0318338
  76 [1]     float32_t       0.0309355
  77 [2]     float32_t       0.93264
  78 [3]     float32_t       0.88788
  79
  80 [0]     float32_t       0.59133
  81 [1]     float32_t       0.478779
  82 [2]     float32_t       0.833354
  83 [3]     float32_t       0.186335
  84
  85 [0]     float32_t       0.242578
  86 [1]     float32_t       0.0134696
  87 [2]     float32_t       0.383139
  88 [3]     float32_t       0.414653
  89
  90 [0]     float32_t       0.067769
  91 [1]     float32_t       0.993127
  92 [2]     float32_t       0.484308
  93 [3]     float32_t       0.765338
  94 */
  95
  96 #define LOOPCOUNT 1000
  97 #define NUM_CYCLES 10000
  98 #define DATA_SIZE 1024
  99
 100 int Test_btDbvt(void)
 101 {
 102     btDbvtAabbMm a[DATA_SIZE], b[DATA_SIZE], c[DATA_SIZE];
 103     btDbvtAabbMm a_ref[DATA_SIZE], b_ref[DATA_SIZE], c_ref[DATA_SIZE];
 104
 105     int i;
 106
 107     bool Intersect_Test_Res[DATA_SIZE], Intersect_Ref_Res[DATA_SIZE];
 108     int Select_Test_Res[DATA_SIZE], Select_Ref_Res[DATA_SIZE];
 109
 110
 111     for (i = 0; i < DATA_SIZE; i++)
 112     {
 113         a[i].tMins().m_floats[0] = (float)rand() / (float)RAND_MAX;
 114         a[i].tMins().m_floats[1] = (float)rand() / (float)RAND_MAX;
 115         a[i].tMins().m_floats[2] = (float)rand() / (float)RAND_MAX;
 116         a[i].tMins().m_floats[3] = (float)rand() / (float)RAND_MAX;
 117
 118         a[i].tMaxs().m_floats[0] = (float)rand() / (float)RAND_MAX;
 119         a[i].tMaxs().m_floats[1] = (float)rand() / (float)RAND_MAX;
 120         a[i].tMaxs().m_floats[2] = (float)rand() / (float)RAND_MAX;
 121         a[i].tMaxs().m_floats[3] = (float)rand() / (float)RAND_MAX;
 122
 123         b[i].tMins().m_floats[0] = (float)rand() / (float)RAND_MAX;
 124         b[i].tMins().m_floats[1] = (float)rand() / (float)RAND_MAX;
 125         b[i].tMins().m_floats[2] = (float)rand() / (float)RAND_MAX;
 126         b[i].tMins().m_floats[3] = (float)rand() / (float)RAND_MAX;
 127
 128         b[i].tMaxs().m_floats[0] = (float)rand() / (float)RAND_MAX;
 129         b[i].tMaxs().m_floats[1] = (float)rand() / (float)RAND_MAX;
 130         b[i].tMaxs().m_floats[2] = (float)rand() / (float)RAND_MAX;
 131         b[i].tMaxs().m_floats[3] = (float)rand() / (float)RAND_MAX;
 132
 133         c[i].tMins().m_floats[0] = (float)rand() / (float)RAND_MAX;
 134         c[i].tMins().m_floats[1] = (float)rand() / (float)RAND_MAX;
 135         c[i].tMins().m_floats[2] = (float)rand() / (float)RAND_MAX;
 136         c[i].tMins().m_floats[3] = (float)rand() / (float)RAND_MAX;
 137
 138         c[i].tMaxs().m_floats[0] = (float)rand() / (float)RAND_MAX;
 139         c[i].tMaxs().m_floats[1] = (float)rand() / (float)RAND_MAX;
 140         c[i].tMaxs().m_floats[2] = (float)rand() / (float)RAND_MAX;
 141         c[i].tMaxs().m_floats[3] = (float)rand() / (float)RAND_MAX;
 142
 143
 144         a_ref[i].tMins().m_floats[0] = a[i].tMins().m_floats[0];
 145         a_ref[i].tMins().m_floats[1] = a[i].tMins().m_floats[1];
 146         a_ref[i].tMins().m_floats[2] = a[i].tMins().m_floats[2];
 147         a_ref[i].tMins().m_floats[3] = a[i].tMins().m_floats[3];
 148
 149         a_ref[i].tMaxs().m_floats[0] = a[i].tMaxs().m_floats[0];
 150         a_ref[i].tMaxs().m_floats[1] = a[i].tMaxs().m_floats[1];
 151         a_ref[i].tMaxs().m_floats[2] = a[i].tMaxs().m_floats[2];
 152         a_ref[i].tMaxs().m_floats[3] = a[i].tMaxs().m_floats[3];
 153
 154         b_ref[i].tMins().m_floats[0] = b[i].tMins().m_floats[0];
 155         b_ref[i].tMins().m_floats[1] = b[i].tMins().m_floats[1];
 156         b_ref[i].tMins().m_floats[2] = b[i].tMins().m_floats[2];
 157         b_ref[i].tMins().m_floats[3] = b[i].tMins().m_floats[3];
 158
 159         b_ref[i].tMaxs().m_floats[0] = b[i].tMaxs().m_floats[0];
 160         b_ref[i].tMaxs().m_floats[1] = b[i].tMaxs().m_floats[1];
 161         b_ref[i].tMaxs().m_floats[2] = b[i].tMaxs().m_floats[2];
 162         b_ref[i].tMaxs().m_floats[3] = b[i].tMaxs().m_floats[3];
 163
 164         c_ref[i].tMins().m_floats[0] = c[i].tMins().m_floats[0];
 165         c_ref[i].tMins().m_floats[1] = c[i].tMins().m_floats[1];
 166         c_ref[i].tMins().m_floats[2] = c[i].tMins().m_floats[2];
 167         c_ref[i].tMins().m_floats[3] = c[i].tMins().m_floats[3];
 168
 169         c_ref[i].tMaxs().m_floats[0] = c[i].tMaxs().m_floats[0];
 170         c_ref[i].tMaxs().m_floats[1] = c[i].tMaxs().m_floats[1];
 171         c_ref[i].tMaxs().m_floats[2] = c[i].tMaxs().m_floats[2];
 172         c_ref[i].tMaxs().m_floats[3] = c[i].tMaxs().m_floats[3];
 173
 174     }
 175
 176
 177 #if 1
 178     for (i = 0; i < DATA_SIZE; i++)
 179     {
 180
 181         Intersect_Test_Res[i] = Intersect(a[i], b[i]);
 182         Intersect_Ref_Res[i]  = Intersect_ref(a_ref[i], b_ref[i]);
 183
 184         if(Intersect_Test_Res[i] != Intersect_Ref_Res[i])
 185         {
 186             printf("Diff on %d\n", i);
 187
 188             printf("a_mx_f[0] = %.3f, a_mx_f[1] = %.3f, a_mx_f[2] = %.3f, a_mx_f[3] = %.3f\n", a[i].tMaxs().m_floats[0], a[i].tMaxs().m_floats[1], a[i].tMaxs().m_floats[2], a[i].tMaxs().m_floats[3]);
 189             printf("a_mi_f[0] = %.3f, a_mi_f[1] = %.3f, a_mi_f[2] = %.3f, a_mi_f[3] = %.3f\n", a[i].tMins().m_floats[0], a[i].tMins().m_floats[1], a[i].tMins().m_floats[2], a[i].tMins().m_floats[3]);
 190             printf("b_mx_f[0] = %.3f, b_mx_f[1] = %.3f, b_mx_f[2] = %.3f, b_mx_f[3] = %.3f\n", b[i].tMaxs().m_floats[0], b[i].tMaxs().m_floats[1], b[i].tMaxs().m_floats[2], b[i].tMaxs().m_floats[3]);
 191             printf("b_mi_f[0] = %.3f, b_mi_f[1] = %.3f, b_mi_f[2] = %.3f, b_mi_f[3] = %.3f\n", b[i].tMins().m_floats[0], b[i].tMins().m_floats[1], b[i].tMins().m_floats[2], b[i].tMins().m_floats[3]);
 192
 193             printf("a_mx_f_ref[0] = %.3f, a_mx_f_ref[1] = %.3f, a_mx_f_ref[2] = %.3f, a_mx_f_ref[3] = %.3f\n", a_ref[i].tMaxs().m_floats[0], a_ref[i].tMaxs().m_floats[1], a_ref[i].tMaxs().m_floats[2], a_ref[i].tMaxs().m_floats[3]);
 194             printf("a_mi_f_ref[0] = %.3f, a_mi_f_ref[1] = %.3f, a_mi_f_ref[2] = %.3f, a_mi_f_ref[3] = %.3f\n", a_ref[i].tMins().m_floats[0], a_ref[i].tMins().m_floats[1], a_ref[i].tMins().m_floats[2], a_ref[i].tMins().m_floats[3]);
 195             printf("b_mx_f_ref[0] = %.3f, b_mx_f_ref[1] = %.3f, b_mx_f_ref[2] = %.3f, b_mx_f_ref[3] = %.3f\n", b_ref[i].tMaxs().m_floats[0], b_ref[i].tMaxs().m_floats[1], b_ref[i].tMaxs().m_floats[2], b_ref[i].tMaxs().m_floats[3]);
 196             printf("b_mi_f_ref[0] = %.3f, b_mi_f_ref[1] = %.3f, b_mi_f_ref[2] = %.3f, b_mi_f_ref[3] = %.3f\n", b_ref[i].tMins().m_floats[0], b_ref[i].tMins().m_floats[1], b_ref[i].tMins().m_floats[2], b_ref[i].tMins().m_floats[3]);
 197         }
 198     }
 199 #endif
 200
 201     uint64_t scalarTime;
 202     uint64_t vectorTime;
 203     size_t j;
 204
 205
 206     ////////////////////////////////////
 207     //
 208     // Time and Test Intersect
 209     //
 210     ////////////////////////////////////
 211         {
 212         uint64_t startTime, bestTime, currentTime;
 213
 214         bestTime = -1LL;
 215         scalarTime = 0;
 216         for (j = 0; j < NUM_CYCLES; j++)
 217                 {
 218             startTime = ReadTicks();
 219
 220
 221             for (i = 0; i < DATA_SIZE; i++)
 222             {
 223                 Intersect_Ref_Res[i]  = Intersect_ref(a_ref[i], b_ref[i]);
 224             }
 225
 226                         currentTime = ReadTicks() - startTime;
 227             scalarTime += currentTime;
 228             if( currentTime < bestTime )
 229                 bestTime = currentTime;
 230         }
 231         if( 0 == gReportAverageTimes )
 232             scalarTime = bestTime;
 233         else
 234             scalarTime /= NUM_CYCLES;
 235     }
 236
 237     {
 238         uint64_t startTime, bestTime, currentTime;
 239
 240         bestTime = -1LL;
 241         vectorTime = 0;
 242         for (j = 0; j < NUM_CYCLES; j++)
 243                 {
 244             startTime = ReadTicks();
 245
 246             for (i = 0; i < DATA_SIZE; i++)
 247             {
 248                 Intersect_Test_Res[i] = Intersect(a[i], b[i]);
 249             }
 250
 251                         currentTime = ReadTicks() - startTime;
 252             vectorTime += currentTime;
 253             if( currentTime < bestTime )
 254                 bestTime = currentTime;
 255         }
 256         if( 0 == gReportAverageTimes )
 257             vectorTime = bestTime;
 258         else
 259             vectorTime /= NUM_CYCLES;
 260     }
 261
 262     vlog( "Intersect Timing:\n" );
 263     vlog( "     \t    scalar\t    vector\n" );
 264     vlog( "    \t%10.4f\t%10.4f\n", TicksToCycles( scalarTime ) / LOOPCOUNT, TicksToCycles( vectorTime ) / LOOPCOUNT );
 265
 266     //printf("scalar = %llu, vector = %llu\n", scalarTime, vectorTime);
 267
 268     for (i = 0; i < DATA_SIZE; i++)
 269     {
 270         if(Intersect_Test_Res[i] != Intersect_Ref_Res[i])
 271         {
 272             printf("Intersect fail at %d\n", i);
 273                         return 1;
 274         }
 275     }
 276
 277     ////////////////////////////////////
 278     //
 279     // Time and Test Merge
 280     //
 281     ////////////////////////////////////
 282         {
 283         uint64_t startTime, bestTime, currentTime;
 284
 285         bestTime = -1LL;
 286         scalarTime = 0;
 287         for (j = 0; j < NUM_CYCLES; j++)
 288                 {
 289             startTime = ReadTicks();
 290
 291
 292             for (i = 0; i < DATA_SIZE; i++)
 293             {
 294                 Merge_ref(a_ref[i], b_ref[i], c_ref[i]);
 295             }
 296
 297                         currentTime = ReadTicks() - startTime;
 298             scalarTime += currentTime;
 299             if( currentTime < bestTime )
 300                 bestTime = currentTime;
 301         }
 302         if( 0 == gReportAverageTimes )
 303             scalarTime = bestTime;
 304         else
 305             scalarTime /= NUM_CYCLES;
 306     }
 307
 308
 309     {
 310         uint64_t startTime, bestTime, currentTime;
 311
 312         bestTime = -1LL;
 313         vectorTime = 0;
 314         for (j = 0; j < NUM_CYCLES; j++)
 315                 {
 316             startTime = ReadTicks();
 317
 318             for (i = 0; i < DATA_SIZE; i++)
 319             {
 320                 Merge(a[i], b[i], c[i]);
 321             }
 322
 323                         currentTime = ReadTicks() - startTime;
 324             vectorTime += currentTime;
 325             if( currentTime < bestTime )
 326                 bestTime = currentTime;
 327         }
 328         if( 0 == gReportAverageTimes )
 329             vectorTime = bestTime;
 330         else
 331             vectorTime /= NUM_CYCLES;
 332     }
 333
 334     vlog( "Merge Timing:\n" );
 335     vlog( "     \t    scalar\t    vector\n" );
 336     vlog( "    \t%10.4f\t%10.4f\n", TicksToCycles( scalarTime ) / LOOPCOUNT, TicksToCycles( vectorTime ) / LOOPCOUNT );
 337
 338     //printf("scalar = %llu, vector = %llu\n", scalarTime, vectorTime);
 339     /*
 340  c  [0] float32_t       0.00455523
 341     [1] float32_t       0.559712
 342     [2] float32_t       0.0795838
 343     [3] float32_t       0.10182
 344
 345 c_ref
 346     [0] float32_t       0.00455523
 347     [1] float32_t       0.559712
 348     [2] float32_t       0.0795838
 349     [3] float32_t       0.552081
 350
 351
 352 c   [0] float32_t       0.829904
 353     [1] float32_t       0.692891
 354     [2] float32_t       0.961654
 355     [3] float32_t       0.666956
 356
 357  c_ref
 358     [0] float32_t       0.829904
 359     [1] float32_t       0.692891
 360     [2] float32_t       0.961654
 361     [3] float32_t       0.522878
 362     */
 363     for (i = 0; i < DATA_SIZE; i++)
 364     {
 365         //ignore 4th component because it is not computed in all code-paths
 366         if( (fabs(c[i].tMaxs().m_floats[0] - c_ref[i].tMaxs().m_floats[0]) > 0.001) ||
 367            (fabs(c[i].tMaxs().m_floats[1] - c_ref[i].tMaxs().m_floats[1]) > 0.001) ||
 368            (fabs(c[i].tMaxs().m_floats[2] - c_ref[i].tMaxs().m_floats[2]) > 0.001) ||
 369           // (fabs(c[i].tMaxs().m_floats[3] - c_ref[i].tMaxs().m_floats[3]) > 0.001) ||
 370            (fabs(c[i].tMins().m_floats[0] - c_ref[i].tMins().m_floats[0]) > 0.001) ||
 371            (fabs(c[i].tMins().m_floats[1] - c_ref[i].tMins().m_floats[1]) > 0.001) ||
 372            (fabs(c[i].tMins().m_floats[2] - c_ref[i].tMins().m_floats[2]) > 0.001)
 373           //|| (fabs(c[i].tMins().m_floats[3] - c_ref[i].tMins().m_floats[3]) > 0.001)
 374            )
 375
 376
 377         //if((c[i].tMaxs().m_floats[0] != c_ref[i].tMaxs().m_floats[0]) || (c[i].tMaxs().m_floats[1] != c_ref[i].tMaxs().m_floats[1]) || (c[i].tMaxs().m_floats[2] != c_ref[i].tMaxs().m_floats[2]) || (c[i].tMaxs().m_floats[3] != c_ref[i].tMaxs().m_floats[3]) || (c[i].tMins().m_floats[0] != c_ref[i].tMins().m_floats[0]) || (c[i].tMins().m_floats[1] != c_ref[i].tMins().m_floats[1]) || (c[i].tMins().m_floats[2] != c_ref[i].tMins().m_floats[2]) || (c[i].tMins().m_floats[3] != c_ref[i].tMins().m_floats[3]))
 378         {
 379             printf("Merge fail at %d with test = %d, ref = %d\n", i, Select_Test_Res[i], Select_Ref_Res[i]);
 380
 381             printf("a_mx_f[0] = %.3f, a_mx_f[1] = %.3f, a_mx_f[2] = %.3f, a_mx_f[3] = %.3f\n", a[i].tMaxs().m_floats[0], a[i].tMaxs().m_floats[1], a[i].tMaxs().m_floats[2], a[i].tMaxs().m_floats[3]);
 382             printf("a_mi_f[0] = %.3f, a_mi_f[1] = %.3f, a_mi_f[2] = %.3f, a_mi_f[3] = %.3f\n", a[i].tMins().m_floats[0], a[i].tMins().m_floats[1], a[i].tMins().m_floats[2], a[i].tMins().m_floats[3]);
 383             printf("b_mx_f[0] = %.3f, b_mx_f[1] = %.3f, b_mx_f[2] = %.3f, b_mx_f[3] = %.3f\n", b[i].tMaxs().m_floats[0], b[i].tMaxs().m_floats[1], b[i].tMaxs().m_floats[2], b[i].tMaxs().m_floats[3]);
 384             printf("b_mi_f[0] = %.3f, b_mi_f[1] = %.3f, b_mi_f[2] = %.3f, b_mi_f[3] = %.3f\n", b[i].tMins().m_floats[0], b[i].tMins().m_floats[1], b[i].tMins().m_floats[2], b[i].tMins().m_floats[3]);
 385             printf("c_mx_f[0] = %.3f, c_mx_f[1] = %.3f, c_mx_f[2] = %.3f, c_mx_f[3] = %.3f\n", c[i].tMaxs().m_floats[0], c[i].tMaxs().m_floats[1], c[i].tMaxs().m_floats[2], c[i].tMaxs().m_floats[3]);
 386             printf("c_mi_f[0] = %.3f, c_mi_f[1] = %.3f, c_mi_f[2] = %.3f, c_mi_f[3] = %.3f\n", c[i].tMins().m_floats[0], c[i].tMins().m_floats[1], c[i].tMins().m_floats[2], c[i].tMins().m_floats[3]);
 387
 388             printf("a_mx_f_ref[0] = %.3f, a_mx_f_ref[1] = %.3f, a_mx_f_ref[2] = %.3f, a_mx_f_ref[3] = %.3f\n", a_ref[i].tMaxs().m_floats[0], a_ref[i].tMaxs().m_floats[1], a_ref[i].tMaxs().m_floats[2], a_ref[i].tMaxs().m_floats[3]);
 389             printf("a_mi_f_ref[0] = %.3f, a_mi_f_ref[1] = %.3f, a_mi_f_ref[2] = %.3f, a_mi_f_ref[3] = %.3f\n", a_ref[i].tMins().m_floats[0], a_ref[i].tMins().m_floats[1], a_ref[i].tMins().m_floats[2], a_ref[i].tMins().m_floats[3]);
 390             printf("b_mx_f_ref[0] = %.3f, b_mx_f_ref[1] = %.3f, b_mx_f_ref[2] = %.3f, b_mx_f_ref[3] = %.3f\n", b_ref[i].tMaxs().m_floats[0], b_ref[i].tMaxs().m_floats[1], b_ref[i].tMaxs().m_floats[2], b_ref[i].tMaxs().m_floats[3]);
 391             printf("b_mi_f_ref[0] = %.3f, b_mi_f_ref[1] = %.3f, b_mi_f_ref[2] = %.3f, b_mi_f_ref[3] = %.3f\n", b_ref[i].tMins().m_floats[0], b_ref[i].tMins().m_floats[1], b_ref[i].tMins().m_floats[2], b_ref[i].tMins().m_floats[3]);
 392             printf("c_mx_f_ref[0] = %.3f, c_mx_f_ref[1] = %.3f, c_mx_f_ref[2] = %.3f, c_mx_f_ref[3] = %.3f\n", c_ref[i].tMaxs().m_floats[0], c_ref[i].tMaxs().m_floats[1], c_ref[i].tMaxs().m_floats[2], c_ref[i].tMaxs().m_floats[3]);
 393             printf("c_mi_f_ref[0] = %.3f, c_mi_f_ref[1] = %.3f, c_mi_f_ref[2] = %.3f, c_mi_f_ref[3] = %.3f\n", c_ref[i].tMins().m_floats[0], c_ref[i].tMins().m_floats[1], c_ref[i].tMins().m_floats[2], c_ref[i].tMins().m_floats[3]);
 394                         return 1;
 395
 396                 }
 397
 398     }
 399
 400     ////////////////////////////////////
 401     //
 402     // Time and Test Select
 403     //
 404     ////////////////////////////////////
 405         {
 406         uint64_t startTime, bestTime, currentTime;
 407
 408         bestTime = -1LL;
 409         scalarTime = 0;
 410         for (j = 0; j < NUM_CYCLES; j++)
 411                 {
 412             startTime = ReadTicks();
 413
 414
 415             for (i = 0; i < DATA_SIZE; i++)
 416             {
 417                 Select_Ref_Res[i]  = Select_ref(a_ref[i], b_ref[i], c_ref[i]);
 418             }
 419
 420                         currentTime = ReadTicks() - startTime;
 421             scalarTime += currentTime;
 422             if( currentTime < bestTime )
 423                 bestTime = currentTime;
 424         }
 425         if( 0 == gReportAverageTimes )
 426             scalarTime = bestTime;
 427         else
 428             scalarTime /= NUM_CYCLES;
 429     }
 430
 431     {
 432         uint64_t startTime, bestTime, currentTime;
 433
 434         bestTime = -1LL;
 435         vectorTime = 0;
 436         for (j = 0; j < NUM_CYCLES; j++)
 437                 {
 438             startTime = ReadTicks();
 439
 440             for (i = 0; i < DATA_SIZE; i++)
 441             {
 442                 Select_Test_Res[i] = Select(a[i], b[i], c[i]);
 443             }
 444
 445                         currentTime = ReadTicks() - startTime;
 446             vectorTime += currentTime;
 447             if( currentTime < bestTime )
 448                 bestTime = currentTime;
 449         }
 450         if( 0 == gReportAverageTimes )
 451             vectorTime = bestTime;
 452         else
 453             vectorTime /= NUM_CYCLES;
 454     }
 455
 456     vlog( "Select Timing:\n" );
 457     vlog( "     \t    scalar\t    vector\n" );
 458     vlog( "    \t%10.4f\t%10.4f\n", TicksToCycles( scalarTime ) / LOOPCOUNT, TicksToCycles( vectorTime ) / LOOPCOUNT );
 459
 460     //printf("scalar = %llu, vector = %llu\n", scalarTime, vectorTime);
 461
 462     for (i = 0; i < DATA_SIZE; i++)
 463     {
 464         Select_Ref_Res[i]  = Select_ref(a_ref[i], b_ref[i], c_ref[i]);
 465         Select_Test_Res[i] = Select(a[i], b[i], c[i]);
 466
 467         if(Select_Test_Res[i] != Select_Ref_Res[i])
 468         {
 469             printf("Select fail at %d with test = %d, ref = %d\n", i, Select_Test_Res[i], Select_Ref_Res[i]);
 470
 471             printf("a_mx_f[0] = %.3f, a_mx_f[1] = %.3f, a_mx_f[2] = %.3f, a_mx_f[3] = %.3f\n", a[i].tMaxs().m_floats[0], a[i].tMaxs().m_floats[1], a[i].tMaxs().m_floats[2], a[i].tMaxs().m_floats[3]);
 472             printf("a_mi_f[0] = %.3f, a_mi_f[1] = %.3f, a_mi_f[2] = %.3f, a_mi_f[3] = %.3f\n", a[i].tMins().m_floats[0], a[i].tMins().m_floats[1], a[i].tMins().m_floats[2], a[i].tMins().m_floats[3]);
 473             printf("b_mx_f[0] = %.3f, b_mx_f[1] = %.3f, b_mx_f[2] = %.3f, b_mx_f[3] = %.3f\n", b[i].tMaxs().m_floats[0], b[i].tMaxs().m_floats[1], b[i].tMaxs().m_floats[2], b[i].tMaxs().m_floats[3]);
 474             printf("b_mi_f[0] = %.3f, b_mi_f[1] = %.3f, b_mi_f[2] = %.3f, b_mi_f[3] = %.3f\n", b[i].tMins().m_floats[0], b[i].tMins().m_floats[1], b[i].tMins().m_floats[2], b[i].tMins().m_floats[3]);
 475             printf("c_mx_f[0] = %.3f, c_mx_f[1] = %.3f, c_mx_f[2] = %.3f, c_mx_f[3] = %.3f\n", c[i].tMaxs().m_floats[0], c[i].tMaxs().m_floats[1], c[i].tMaxs().m_floats[2], c[i].tMaxs().m_floats[3]);
 476             printf("c_mi_f[0] = %.3f, c_mi_f[1] = %.3f, c_mi_f[2] = %.3f, c_mi_f[3] = %.3f\n", c[i].tMins().m_floats[0], c[i].tMins().m_floats[1], c[i].tMins().m_floats[2], c[i].tMins().m_floats[3]);
 477
 478             printf("a_mx_f_ref[0] = %.3f, a_mx_f_ref[1] = %.3f, a_mx_f_ref[2] = %.3f, a_mx_f_ref[3] = %.3f\n", a_ref[i].tMaxs().m_floats[0], a_ref[i].tMaxs().m_floats[1], a_ref[i].tMaxs().m_floats[2], a_ref[i].tMaxs().m_floats[3]);
 479             printf("a_mi_f_ref[0] = %.3f, a_mi_f_ref[1] = %.3f, a_mi_f_ref[2] = %.3f, a_mi_f_ref[3] = %.3f\n", a_ref[i].tMins().m_floats[0], a_ref[i].tMins().m_floats[1], a_ref[i].tMins().m_floats[2], a_ref[i].tMins().m_floats[3]);
 480             printf("b_mx_f_ref[0] = %.3f, b_mx_f_ref[1] = %.3f, b_mx_f_ref[2] = %.3f, b_mx_f_ref[3] = %.3f\n", b_ref[i].tMaxs().m_floats[0], b_ref[i].tMaxs().m_floats[1], b_ref[i].tMaxs().m_floats[2], b_ref[i].tMaxs().m_floats[3]);
 481             printf("b_mi_f_ref[0] = %.3f, b_mi_f_ref[1] = %.3f, b_mi_f_ref[2] = %.3f, b_mi_f_ref[3] = %.3f\n", b_ref[i].tMins().m_floats[0], b_ref[i].tMins().m_floats[1], b_ref[i].tMins().m_floats[2], b_ref[i].tMins().m_floats[3]);
 482             printf("c_mx_f_ref[0] = %.3f, c_mx_f_ref[1] = %.3f, c_mx_f_ref[2] = %.3f, c_mx_f_ref[3] = %.3f\n", c_ref[i].tMaxs().m_floats[0], c_ref[i].tMaxs().m_floats[1], c_ref[i].tMaxs().m_floats[2], c_ref[i].tMaxs().m_floats[3]);
 483             printf("c_mi_f_ref[0] = %.3f, c_mi_f_ref[1] = %.3f, c_mi_f_ref[2] = %.3f, c_mi_f_ref[3] = %.3f\n", c_ref[i].tMins().m_floats[0], c_ref[i].tMins().m_floats[1], c_ref[i].tMins().m_floats[2], c_ref[i].tMins().m_floats[3]);
 484                         return 1;
 485                 }
 486
 487     }
 488
 489     return 0;
 490 }
 491 #endif
 492
 493
 494
 495