Imported Upstream version 2.81
[platform/upstream/libbullet.git] / Test / Source / Tests / Test_dot3.cpp
1 //
2 //  Test_v3dot.cpp
3 //  BulletTest
4 //
5 //  Copyright (c) 2011 Apple Inc.
6 //
7
8
9 #include "LinearMath/btScalar.h"
10 #if defined (BT_USE_SSE_IN_API) || defined (BT_USE_NEON)
11
12
13 #include "Test_dot3.h"
14 #include "vector.h"
15 #include "Utils.h"
16 #include "main.h"
17 #include <math.h>
18 #include <string.h>
19
20 #include <LinearMath/btVector3.h>
21
22 // reference code for testing purposes
23 static btVector3 dot3_ref( const btVector3 &,  const btVector3 &,   const btVector3 &,   const btVector3 &);
24 static btVector3 dot3_ref( const btVector3 &v, const btVector3 &v1, const btVector3 &v2, const btVector3 &v3)
25 {
26     return btVector3( v.dot(v1), v.dot(v2), v.dot(v3));
27 }
28
29 /*
30 SIMD_FORCE_INLINE int operator!=(const btVector3 &s, const btVector3 &v)
31 {
32 #ifdef __SSE__
33     __m128 test = _mm_cmpneq_ps( s.mVec128, v.mVec128 );
34     return (_mm_movemask_ps( test ) & 7) != 0;
35 #elif defined __ARM_NEON_H
36     uint32x4_t test = vandq_u32( vceqq_f32( s.mVec128, v.mVec128 ), (uint32x4_t){-1,-1,-1,0});
37     uint32x2_t t = vpadd_u32( vget_low_u32(test), vget_high_u32(test));
38     t = vpadd_u32(t, t);
39     return -3 != (int32_t) vget_lane_u32(t, 0);
40 #else
41     return  s.m_floats[0] != v.m_floats[0] ||
42     s.m_floats[1] != v.m_floats[1] ||
43     s.m_floats[2] != v.m_floats[2];
44 #endif
45 }
46 */
47  
48
49
50 #define LOOPCOUNT 1000
51 #define NUM_CYCLES 10000
52
53 int Test_dot3(void)
54 {
55     btVector3 v, v1, v2, v3;
56     
57 #define DATA_SIZE 1024
58     
59         btVector3 vec3_arr[DATA_SIZE];
60         btVector3 vec3_arr1[DATA_SIZE];
61         btVector3 vec3_arr2[DATA_SIZE];
62         btVector3 vec3_arr3[DATA_SIZE];
63     btVector3 res_arr[DATA_SIZE];
64     
65     uint64_t scalarTime;
66     uint64_t vectorTime;
67     size_t j, k;
68     btVector3 correct, test;
69     
70         for( k = 0; k < DATA_SIZE; k++ )
71         {
72         
73         vec3_arr[k]  = btVector3( btAssign128( RANDF, RANDF, RANDF, BT_NAN));
74         vec3_arr1[k] = btVector3( btAssign128( RANDF, RANDF, RANDF, BT_NAN));
75         vec3_arr2[k] = btVector3( btAssign128( RANDF, RANDF, RANDF, BT_NAN ));
76         vec3_arr3[k] = btVector3( btAssign128( RANDF, RANDF, RANDF, BT_NAN));
77
78                 correct = dot3_ref(vec3_arr[k], vec3_arr1[k], vec3_arr2[k], vec3_arr3[k]);
79                 test = vec3_arr[k].dot3( vec3_arr1[k], vec3_arr2[k], vec3_arr3[k]);
80         
81                 if( correct != test )
82                 {
83                         vlog( "Error (%ld) - dot3 result error! *{%a, %a, %a, %a} != {%a, %a, %a, %a} \n", k,
84                    correct.x(), correct.y(), correct.z(), correct.w(),
85                    test.x(), test.y(), test.z(), test.w() );
86             
87                         return 1;
88                 }
89     }
90     
91     
92         {
93         uint64_t startTime, bestTime, currentTime;
94         
95         bestTime = -1LL;
96         scalarTime = 0;
97         for (j = 0; j < NUM_CYCLES; j++) 
98                 {
99             startTime = ReadTicks();
100             for( k = 0; k+4 <= LOOPCOUNT; k+=4 )
101                         {
102                                 size_t k32 = (k & (DATA_SIZE-1)); 
103                 res_arr[k32] = dot3_ref( vec3_arr[k32], vec3_arr1[k32], vec3_arr2[k32], vec3_arr3[k32]); k32++;
104                                 res_arr[k32] = dot3_ref( vec3_arr[k32], vec3_arr1[k32], vec3_arr2[k32], vec3_arr3[k32]); k32++;
105                                 res_arr[k32] = dot3_ref( vec3_arr[k32], vec3_arr1[k32], vec3_arr2[k32], vec3_arr3[k32]); k32++;
106                                 res_arr[k32] = dot3_ref( vec3_arr[k32], vec3_arr1[k32], vec3_arr2[k32], vec3_arr3[k32]); 
107                         }
108                         currentTime = ReadTicks() - startTime;
109             scalarTime += currentTime;
110             if( currentTime < bestTime )
111                 bestTime = currentTime;
112         }
113         if( 0 == gReportAverageTimes )
114             scalarTime = bestTime;        
115         else
116             scalarTime /= NUM_CYCLES;
117     }
118     
119     {
120         uint64_t startTime, bestTime, currentTime;
121         
122         bestTime = -1LL;
123         vectorTime = 0;
124         for (j = 0; j < NUM_CYCLES; j++) 
125                 {
126             startTime = ReadTicks();
127             for( k = 0; k+4 <= LOOPCOUNT; k+=4 )
128                         {
129                                 size_t k32 = (k & (DATA_SIZE-1)); 
130                 res_arr[k32] = vec3_arr[k32].dot3( vec3_arr1[k32], vec3_arr2[k32], vec3_arr3[k32]); k32++;
131                                 res_arr[k32] = vec3_arr[k32].dot3( vec3_arr1[k32], vec3_arr2[k32], vec3_arr3[k32]); k32++;
132                                 res_arr[k32] = vec3_arr[k32].dot3( vec3_arr1[k32], vec3_arr2[k32], vec3_arr3[k32]); k32++;
133                                 res_arr[k32] = vec3_arr[k32].dot3( vec3_arr1[k32], vec3_arr2[k32], vec3_arr3[k32]); 
134                         }
135                         currentTime = ReadTicks() - startTime;
136             vectorTime += currentTime;
137             if( currentTime < bestTime )
138                 bestTime = currentTime;
139         }
140         if( 0 == gReportAverageTimes )
141             vectorTime = bestTime;        
142         else
143             vectorTime /= NUM_CYCLES;
144     }
145     
146     vlog( "Timing:\n" );
147     vlog( "     \t    scalar\t    vector\n" );
148     vlog( "    \t%10.4f\t%10.4f\n", TicksToCycles( scalarTime ) / LOOPCOUNT, TicksToCycles( vectorTime ) / LOOPCOUNT );
149     
150     return 0;
151 }
152
153 #endif //BT_USE_SSE