2 Copyright (c) 2003-2006 Gino van den Bergen / Erwin Coumans http://continuousphysics.com/Bullet/
4 This software is provided 'as-is', without any express or implied warranty.
5 In no event will the authors be held liable for any damages arising from the use of this software.
6 Permission is granted to anyone to use this software for any purpose,
7 including commercial applications, and to alter it and redistribute it freely,
8 subject to the following restrictions:
10 1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
11 2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
12 3. This notice may not be removed or altered from any source distribution.
23 #include "btAlignedAllocator.h"
25 #ifdef BT_USE_DOUBLE_PRECISION
26 #define btVector3Data btVector3DoubleData
27 #define btVector3DataName "btVector3DoubleData"
29 #define btVector3Data btVector3FloatData
30 #define btVector3DataName "btVector3FloatData"
31 #endif //BT_USE_DOUBLE_PRECISION
33 #if defined BT_USE_SSE
35 //typedef uint32_t __m128i __attribute__ ((vector_size(16)));
38 #pragma warning(disable: 4556) // value of intrinsic immediate argument '4294967239' is out of range '0 - 255'
42 #define BT_SHUFFLE(x,y,z,w) ((w)<<6 | (z)<<4 | (y)<<2 | (x))
43 //#define bt_pshufd_ps( _a, _mask ) (__m128) _mm_shuffle_epi32((__m128i)(_a), (_mask) )
44 #define bt_pshufd_ps( _a, _mask ) _mm_shuffle_ps((_a), (_a), (_mask) )
45 #define bt_splat3_ps( _a, _i ) bt_pshufd_ps((_a), BT_SHUFFLE(_i,_i,_i, 3) )
46 #define bt_splat_ps( _a, _i ) bt_pshufd_ps((_a), BT_SHUFFLE(_i,_i,_i,_i) )
48 #define btv3AbsiMask (_mm_set_epi32(0x00000000, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF))
49 #define btvAbsMask (_mm_set_epi32( 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF))
50 #define btvFFF0Mask (_mm_set_epi32(0x00000000, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF))
51 #define btv3AbsfMask btCastiTo128f(btv3AbsiMask)
52 #define btvFFF0fMask btCastiTo128f(btvFFF0Mask)
53 #define btvxyzMaskf btvFFF0fMask
54 #define btvAbsfMask btCastiTo128f(btvAbsMask)
58 const __m128 ATTRIBUTE_ALIGNED16(btvMzeroMask) = {-0.0f, -0.0f, -0.0f, -0.0f};
59 const __m128 ATTRIBUTE_ALIGNED16(v1110) = {1.0f, 1.0f, 1.0f, 0.0f};
60 const __m128 ATTRIBUTE_ALIGNED16(vHalf) = {0.5f, 0.5f, 0.5f, 0.5f};
61 const __m128 ATTRIBUTE_ALIGNED16(v1_5) = {1.5f, 1.5f, 1.5f, 1.5f};
67 const float32x4_t ATTRIBUTE_ALIGNED16(btvMzeroMask) = (float32x4_t){-0.0f, -0.0f, -0.0f, -0.0f};
68 const int32x4_t ATTRIBUTE_ALIGNED16(btvFFF0Mask) = (int32x4_t){0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x0};
69 const int32x4_t ATTRIBUTE_ALIGNED16(btvAbsMask) = (int32x4_t){0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF};
70 const int32x4_t ATTRIBUTE_ALIGNED16(btv3AbsMask) = (int32x4_t){0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x0};
74 /**@brief btVector3 can be used to represent 3D points and vectors.
75 * It has an un-used w component to suit 16-byte alignment when btVector3 is stored in containers. This extra component can be used by derived classes (Quaternion?) or by user
76 * Ideally, this class should be replaced by a platform optimized SIMD version that keeps the data in registers
78 ATTRIBUTE_ALIGNED16(class) btVector3
82 BT_DECLARE_ALIGNED_ALLOCATOR();
84 #if defined (__SPU__) && defined (__CELLOS_LV2__)
87 SIMD_FORCE_INLINE const vec_float4& get128() const
89 return *((const vec_float4*)&m_floats[0]);
92 #else //__CELLOS_LV2__ __SPU__
93 #if defined (BT_USE_SSE) || defined(BT_USE_NEON) // _WIN32 || ARM
98 SIMD_FORCE_INLINE btSimdFloat4 get128() const
102 SIMD_FORCE_INLINE void set128(btSimdFloat4 v128)
107 btScalar m_floats[4];
109 #endif //__CELLOS_LV2__ __SPU__
113 /**@brief No initialization constructor */
114 SIMD_FORCE_INLINE btVector3()
121 /**@brief Constructor from scalars
126 SIMD_FORCE_INLINE btVector3(const btScalar& _x, const btScalar& _y, const btScalar& _z)
131 m_floats[3] = btScalar(0.f);
134 #if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE) )|| defined (BT_USE_NEON)
136 SIMD_FORCE_INLINE btVector3( btSimdFloat4 v)
142 SIMD_FORCE_INLINE btVector3(const btVector3& rhs)
144 mVec128 = rhs.mVec128;
147 // Assignment Operator
148 SIMD_FORCE_INLINE btVector3&
149 operator=(const btVector3& v)
155 #endif // #if defined (BT_USE_SSE_IN_API) || defined (BT_USE_NEON)
157 /**@brief Add a vector to this one
158 * @param The vector to add to this one */
159 SIMD_FORCE_INLINE btVector3& operator+=(const btVector3& v)
161 #if defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
162 mVec128 = _mm_add_ps(mVec128, v.mVec128);
163 #elif defined(BT_USE_NEON)
164 mVec128 = vaddq_f32(mVec128, v.mVec128);
166 m_floats[0] += v.m_floats[0];
167 m_floats[1] += v.m_floats[1];
168 m_floats[2] += v.m_floats[2];
174 /**@brief Subtract a vector from this one
175 * @param The vector to subtract */
176 SIMD_FORCE_INLINE btVector3& operator-=(const btVector3& v)
178 #if defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
179 mVec128 = _mm_sub_ps(mVec128, v.mVec128);
180 #elif defined(BT_USE_NEON)
181 mVec128 = vsubq_f32(mVec128, v.mVec128);
183 m_floats[0] -= v.m_floats[0];
184 m_floats[1] -= v.m_floats[1];
185 m_floats[2] -= v.m_floats[2];
190 /**@brief Scale the vector
191 * @param s Scale factor */
192 SIMD_FORCE_INLINE btVector3& operator*=(const btScalar& s)
194 #if defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
195 __m128 vs = _mm_load_ss(&s); // (S 0 0 0)
196 vs = bt_pshufd_ps(vs, 0x80); // (S S S 0.0)
197 mVec128 = _mm_mul_ps(mVec128, vs);
198 #elif defined(BT_USE_NEON)
199 mVec128 = vmulq_n_f32(mVec128, s);
208 /**@brief Inversely scale the vector
209 * @param s Scale factor to divide by */
210 SIMD_FORCE_INLINE btVector3& operator/=(const btScalar& s)
212 btFullAssert(s != btScalar(0.0));
214 #if 0 //defined(BT_USE_SSE_IN_API)
215 // this code is not faster !
216 __m128 vs = _mm_load_ss(&s);
217 vs = _mm_div_ss(v1110, vs);
218 vs = bt_pshufd_ps(vs, 0x00); // (S S S S)
220 mVec128 = _mm_mul_ps(mVec128, vs);
224 return *this *= btScalar(1.0) / s;
228 /**@brief Return the dot product
229 * @param v The other vector in the dot product */
230 SIMD_FORCE_INLINE btScalar dot(const btVector3& v) const
232 #if defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
233 __m128 vd = _mm_mul_ps(mVec128, v.mVec128);
234 __m128 z = _mm_movehl_ps(vd, vd);
235 __m128 y = _mm_shuffle_ps(vd, vd, 0x55);
236 vd = _mm_add_ss(vd, y);
237 vd = _mm_add_ss(vd, z);
238 return _mm_cvtss_f32(vd);
239 #elif defined(BT_USE_NEON)
240 float32x4_t vd = vmulq_f32(mVec128, v.mVec128);
241 float32x2_t x = vpadd_f32(vget_low_f32(vd), vget_low_f32(vd));
242 x = vadd_f32(x, vget_high_f32(vd));
243 return vget_lane_f32(x, 0);
245 return m_floats[0] * v.m_floats[0] +
246 m_floats[1] * v.m_floats[1] +
247 m_floats[2] * v.m_floats[2];
251 /**@brief Return the length of the vector squared */
252 SIMD_FORCE_INLINE btScalar length2() const
257 /**@brief Return the length of the vector */
258 SIMD_FORCE_INLINE btScalar length() const
260 return btSqrt(length2());
263 /**@brief Return the distance squared between the ends of this and another vector
264 * This is symantically treating the vector like a point */
265 SIMD_FORCE_INLINE btScalar distance2(const btVector3& v) const;
267 /**@brief Return the distance between the ends of this and another vector
268 * This is symantically treating the vector like a point */
269 SIMD_FORCE_INLINE btScalar distance(const btVector3& v) const;
271 SIMD_FORCE_INLINE btVector3& safeNormalize()
273 btVector3 absVec = this->absolute();
274 int maxIndex = absVec.maxAxis();
275 if (absVec[maxIndex]>0)
277 *this /= absVec[maxIndex];
278 return *this /= length();
284 /**@brief Normalize this vector
285 * x^2 + y^2 + z^2 = 1 */
286 SIMD_FORCE_INLINE btVector3& normalize()
288 #if defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
290 __m128 vd = _mm_mul_ps(mVec128, mVec128);
291 __m128 z = _mm_movehl_ps(vd, vd);
292 __m128 y = _mm_shuffle_ps(vd, vd, 0x55);
293 vd = _mm_add_ss(vd, y);
294 vd = _mm_add_ss(vd, z);
297 vd = _mm_sqrt_ss(vd);
298 vd = _mm_div_ss(v1110, vd);
299 vd = bt_splat_ps(vd, 0x80);
300 mVec128 = _mm_mul_ps(mVec128, vd);
303 // NR step 1/sqrt(x) - vd is x, y is output
304 y = _mm_rsqrt_ss(vd); // estimate
308 vd = _mm_mul_ss(vd, vHalf); // vd * 0.5
310 vd = _mm_mul_ss(vd, y); // vd * 0.5 * y0
311 vd = _mm_mul_ss(vd, y); // vd * 0.5 * y0 * y0
312 z = _mm_sub_ss(z, vd); // 1.5 - vd * 0.5 * y0 * y0
314 y = _mm_mul_ss(y, z); // y0 * (1.5 - vd * 0.5 * y0 * y0)
316 y = bt_splat_ps(y, 0x80);
317 mVec128 = _mm_mul_ps(mVec128, y);
324 return *this /= length();
328 /**@brief Return a normalized version of this vector */
329 SIMD_FORCE_INLINE btVector3 normalized() const;
331 /**@brief Return a rotated version of this vector
332 * @param wAxis The axis to rotate about
333 * @param angle The angle to rotate by */
334 SIMD_FORCE_INLINE btVector3 rotate( const btVector3& wAxis, const btScalar angle ) const;
336 /**@brief Return the angle between this and another vector
337 * @param v The other vector */
338 SIMD_FORCE_INLINE btScalar angle(const btVector3& v) const
340 btScalar s = btSqrt(length2() * v.length2());
341 btFullAssert(s != btScalar(0.0));
342 return btAcos(dot(v) / s);
345 /**@brief Return a vector will the absolute values of each element */
346 SIMD_FORCE_INLINE btVector3 absolute() const
348 #if defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
349 return btVector3(_mm_and_ps(mVec128, btv3AbsfMask));
350 #elif defined(BT_USE_NEON)
351 return btVector3(vabsq_f32(mVec128));
356 btFabs(m_floats[2]));
360 /**@brief Return the cross product between this and another vector
361 * @param v The other vector */
362 SIMD_FORCE_INLINE btVector3 cross(const btVector3& v) const
364 #if defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
367 T = bt_pshufd_ps(mVec128, BT_SHUFFLE(1, 2, 0, 3)); // (Y Z X 0)
368 V = bt_pshufd_ps(v.mVec128, BT_SHUFFLE(1, 2, 0, 3)); // (Y Z X 0)
370 V = _mm_mul_ps(V, mVec128);
371 T = _mm_mul_ps(T, v.mVec128);
372 V = _mm_sub_ps(V, T);
374 V = bt_pshufd_ps(V, BT_SHUFFLE(1, 2, 0, 3));
376 #elif defined(BT_USE_NEON)
378 // form (Y, Z, X, _) of mVec128 and v.mVec128
379 float32x2_t Tlow = vget_low_f32(mVec128);
380 float32x2_t Vlow = vget_low_f32(v.mVec128);
381 T = vcombine_f32(vext_f32(Tlow, vget_high_f32(mVec128), 1), Tlow);
382 V = vcombine_f32(vext_f32(Vlow, vget_high_f32(v.mVec128), 1), Vlow);
384 V = vmulq_f32(V, mVec128);
385 T = vmulq_f32(T, v.mVec128);
387 Vlow = vget_low_f32(V);
388 // form (Y, Z, X, _);
389 V = vcombine_f32(vext_f32(Vlow, vget_high_f32(V), 1), Vlow);
390 V = (float32x4_t)vandq_s32((int32x4_t)V, btvFFF0Mask);
395 m_floats[1] * v.m_floats[2] - m_floats[2] * v.m_floats[1],
396 m_floats[2] * v.m_floats[0] - m_floats[0] * v.m_floats[2],
397 m_floats[0] * v.m_floats[1] - m_floats[1] * v.m_floats[0]);
401 SIMD_FORCE_INLINE btScalar triple(const btVector3& v1, const btVector3& v2) const
403 #if defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
405 __m128 T = _mm_shuffle_ps(v1.mVec128, v1.mVec128, BT_SHUFFLE(1, 2, 0, 3)); // (Y Z X 0)
406 __m128 V = _mm_shuffle_ps(v2.mVec128, v2.mVec128, BT_SHUFFLE(1, 2, 0, 3)); // (Y Z X 0)
408 V = _mm_mul_ps(V, v1.mVec128);
409 T = _mm_mul_ps(T, v2.mVec128);
410 V = _mm_sub_ps(V, T);
412 V = _mm_shuffle_ps(V, V, BT_SHUFFLE(1, 2, 0, 3));
415 V = _mm_mul_ps(V, mVec128);
416 __m128 z = _mm_movehl_ps(V, V);
417 __m128 y = _mm_shuffle_ps(V, V, 0x55);
418 V = _mm_add_ss(V, y);
419 V = _mm_add_ss(V, z);
420 return _mm_cvtss_f32(V);
422 #elif defined(BT_USE_NEON)
425 // form (Y, Z, X, _) of mVec128 and v.mVec128
426 float32x2_t Tlow = vget_low_f32(v1.mVec128);
427 float32x2_t Vlow = vget_low_f32(v2.mVec128);
428 T = vcombine_f32(vext_f32(Tlow, vget_high_f32(v1.mVec128), 1), Tlow);
429 V = vcombine_f32(vext_f32(Vlow, vget_high_f32(v2.mVec128), 1), Vlow);
431 V = vmulq_f32(V, v1.mVec128);
432 T = vmulq_f32(T, v2.mVec128);
434 Vlow = vget_low_f32(V);
435 // form (Y, Z, X, _);
436 V = vcombine_f32(vext_f32(Vlow, vget_high_f32(V), 1), Vlow);
439 V = vmulq_f32(mVec128, V);
440 float32x2_t x = vpadd_f32(vget_low_f32(V), vget_low_f32(V));
441 x = vadd_f32(x, vget_high_f32(V));
442 return vget_lane_f32(x, 0);
445 m_floats[0] * (v1.m_floats[1] * v2.m_floats[2] - v1.m_floats[2] * v2.m_floats[1]) +
446 m_floats[1] * (v1.m_floats[2] * v2.m_floats[0] - v1.m_floats[0] * v2.m_floats[2]) +
447 m_floats[2] * (v1.m_floats[0] * v2.m_floats[1] - v1.m_floats[1] * v2.m_floats[0]);
451 /**@brief Return the axis with the smallest value
452 * Note return values are 0,1,2 for x, y, or z */
453 SIMD_FORCE_INLINE int minAxis() const
455 return m_floats[0] < m_floats[1] ? (m_floats[0] <m_floats[2] ? 0 : 2) : (m_floats[1] <m_floats[2] ? 1 : 2);
458 /**@brief Return the axis with the largest value
459 * Note return values are 0,1,2 for x, y, or z */
460 SIMD_FORCE_INLINE int maxAxis() const
462 return m_floats[0] < m_floats[1] ? (m_floats[1] <m_floats[2] ? 2 : 1) : (m_floats[0] <m_floats[2] ? 2 : 0);
465 SIMD_FORCE_INLINE int furthestAxis() const
467 return absolute().minAxis();
470 SIMD_FORCE_INLINE int closestAxis() const
472 return absolute().maxAxis();
476 SIMD_FORCE_INLINE void setInterpolate3(const btVector3& v0, const btVector3& v1, btScalar rt)
478 #if defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
479 __m128 vrt = _mm_load_ss(&rt); // (rt 0 0 0)
480 btScalar s = btScalar(1.0) - rt;
481 __m128 vs = _mm_load_ss(&s); // (S 0 0 0)
482 vs = bt_pshufd_ps(vs, 0x80); // (S S S 0.0)
483 __m128 r0 = _mm_mul_ps(v0.mVec128, vs);
484 vrt = bt_pshufd_ps(vrt, 0x80); // (rt rt rt 0.0)
485 __m128 r1 = _mm_mul_ps(v1.mVec128, vrt);
486 __m128 tmp3 = _mm_add_ps(r0,r1);
488 #elif defined(BT_USE_NEON)
489 mVec128 = vsubq_f32(v1.mVec128, v0.mVec128);
490 mVec128 = vmulq_n_f32(mVec128, rt);
491 mVec128 = vaddq_f32(mVec128, v0.mVec128);
493 btScalar s = btScalar(1.0) - rt;
494 m_floats[0] = s * v0.m_floats[0] + rt * v1.m_floats[0];
495 m_floats[1] = s * v0.m_floats[1] + rt * v1.m_floats[1];
496 m_floats[2] = s * v0.m_floats[2] + rt * v1.m_floats[2];
497 //don't do the unused w component
498 // m_co[3] = s * v0[3] + rt * v1[3];
502 /**@brief Return the linear interpolation between this and another vector
503 * @param v The other vector
504 * @param t The ration of this to v (t = 0 => return this, t=1 => return other) */
505 SIMD_FORCE_INLINE btVector3 lerp(const btVector3& v, const btScalar& t) const
507 #if defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
508 __m128 vt = _mm_load_ss(&t); // (t 0 0 0)
509 vt = bt_pshufd_ps(vt, 0x80); // (rt rt rt 0.0)
510 __m128 vl = _mm_sub_ps(v.mVec128, mVec128);
511 vl = _mm_mul_ps(vl, vt);
512 vl = _mm_add_ps(vl, mVec128);
514 return btVector3(vl);
515 #elif defined(BT_USE_NEON)
516 float32x4_t vl = vsubq_f32(v.mVec128, mVec128);
517 vl = vmulq_n_f32(vl, t);
518 vl = vaddq_f32(vl, mVec128);
520 return btVector3(vl);
523 btVector3( m_floats[0] + (v.m_floats[0] - m_floats[0]) * t,
524 m_floats[1] + (v.m_floats[1] - m_floats[1]) * t,
525 m_floats[2] + (v.m_floats[2] - m_floats[2]) * t);
529 /**@brief Elementwise multiply this vector by the other
530 * @param v The other vector */
531 SIMD_FORCE_INLINE btVector3& operator*=(const btVector3& v)
533 #if defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
534 mVec128 = _mm_mul_ps(mVec128, v.mVec128);
535 #elif defined(BT_USE_NEON)
536 mVec128 = vmulq_f32(mVec128, v.mVec128);
538 m_floats[0] *= v.m_floats[0];
539 m_floats[1] *= v.m_floats[1];
540 m_floats[2] *= v.m_floats[2];
545 /**@brief Return the x value */
546 SIMD_FORCE_INLINE const btScalar& getX() const { return m_floats[0]; }
547 /**@brief Return the y value */
548 SIMD_FORCE_INLINE const btScalar& getY() const { return m_floats[1]; }
549 /**@brief Return the z value */
550 SIMD_FORCE_INLINE const btScalar& getZ() const { return m_floats[2]; }
551 /**@brief Set the x value */
552 SIMD_FORCE_INLINE void setX(btScalar _x) { m_floats[0] = _x;};
553 /**@brief Set the y value */
554 SIMD_FORCE_INLINE void setY(btScalar _y) { m_floats[1] = _y;};
555 /**@brief Set the z value */
556 SIMD_FORCE_INLINE void setZ(btScalar _z) { m_floats[2] = _z;};
557 /**@brief Set the w value */
558 SIMD_FORCE_INLINE void setW(btScalar _w) { m_floats[3] = _w;};
559 /**@brief Return the x value */
560 SIMD_FORCE_INLINE const btScalar& x() const { return m_floats[0]; }
561 /**@brief Return the y value */
562 SIMD_FORCE_INLINE const btScalar& y() const { return m_floats[1]; }
563 /**@brief Return the z value */
564 SIMD_FORCE_INLINE const btScalar& z() const { return m_floats[2]; }
565 /**@brief Return the w value */
566 SIMD_FORCE_INLINE const btScalar& w() const { return m_floats[3]; }
568 //SIMD_FORCE_INLINE btScalar& operator[](int i) { return (&m_floats[0])[i]; }
569 //SIMD_FORCE_INLINE const btScalar& operator[](int i) const { return (&m_floats[0])[i]; }
570 ///operator btScalar*() replaces operator[], using implicit conversion. We added operator != and operator == to avoid pointer comparisons.
571 SIMD_FORCE_INLINE operator btScalar *() { return &m_floats[0]; }
572 SIMD_FORCE_INLINE operator const btScalar *() const { return &m_floats[0]; }
574 SIMD_FORCE_INLINE bool operator==(const btVector3& other) const
576 #if defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
577 return (0xf == _mm_movemask_ps((__m128)_mm_cmpeq_ps(mVec128, other.mVec128)));
579 return ((m_floats[3]==other.m_floats[3]) &&
580 (m_floats[2]==other.m_floats[2]) &&
581 (m_floats[1]==other.m_floats[1]) &&
582 (m_floats[0]==other.m_floats[0]));
586 SIMD_FORCE_INLINE bool operator!=(const btVector3& other) const
588 return !(*this == other);
591 /**@brief Set each element to the max of the current values and the values of another btVector3
592 * @param other The other btVector3 to compare with
594 SIMD_FORCE_INLINE void setMax(const btVector3& other)
596 #if defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
597 mVec128 = _mm_max_ps(mVec128, other.mVec128);
598 #elif defined(BT_USE_NEON)
599 mVec128 = vmaxq_f32(mVec128, other.mVec128);
601 btSetMax(m_floats[0], other.m_floats[0]);
602 btSetMax(m_floats[1], other.m_floats[1]);
603 btSetMax(m_floats[2], other.m_floats[2]);
604 btSetMax(m_floats[3], other.w());
608 /**@brief Set each element to the min of the current values and the values of another btVector3
609 * @param other The other btVector3 to compare with
611 SIMD_FORCE_INLINE void setMin(const btVector3& other)
613 #if defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
614 mVec128 = _mm_min_ps(mVec128, other.mVec128);
615 #elif defined(BT_USE_NEON)
616 mVec128 = vminq_f32(mVec128, other.mVec128);
618 btSetMin(m_floats[0], other.m_floats[0]);
619 btSetMin(m_floats[1], other.m_floats[1]);
620 btSetMin(m_floats[2], other.m_floats[2]);
621 btSetMin(m_floats[3], other.w());
625 SIMD_FORCE_INLINE void setValue(const btScalar& _x, const btScalar& _y, const btScalar& _z)
630 m_floats[3] = btScalar(0.f);
633 void getSkewSymmetricMatrix(btVector3* v0,btVector3* v1,btVector3* v2) const
635 #if defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
637 __m128 V = _mm_and_ps(mVec128, btvFFF0fMask);
638 __m128 V0 = _mm_xor_ps(btvMzeroMask, V);
639 __m128 V2 = _mm_movelh_ps(V0, V);
641 __m128 V1 = _mm_shuffle_ps(V, V0, 0xCE);
643 V0 = _mm_shuffle_ps(V0, V, 0xDB);
644 V2 = _mm_shuffle_ps(V2, V, 0xF9);
650 v0->setValue(0. ,-z() ,y());
651 v1->setValue(z() ,0. ,-x());
652 v2->setValue(-y() ,x() ,0.);
658 #if defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
659 mVec128 = (__m128)_mm_xor_ps(mVec128, mVec128);
660 #elif defined(BT_USE_NEON)
661 int32x4_t vi = vdupq_n_s32(0);
662 mVec128 = vreinterpretq_f32_s32(vi);
664 setValue(btScalar(0.),btScalar(0.),btScalar(0.));
668 SIMD_FORCE_INLINE bool isZero() const
670 return m_floats[0] == btScalar(0) && m_floats[1] == btScalar(0) && m_floats[2] == btScalar(0);
673 SIMD_FORCE_INLINE bool fuzzyZero() const
675 return length2() < SIMD_EPSILON;
678 SIMD_FORCE_INLINE void serialize(struct btVector3Data& dataOut) const;
680 SIMD_FORCE_INLINE void deSerialize(const struct btVector3Data& dataIn);
682 SIMD_FORCE_INLINE void serializeFloat(struct btVector3FloatData& dataOut) const;
684 SIMD_FORCE_INLINE void deSerializeFloat(const struct btVector3FloatData& dataIn);
686 SIMD_FORCE_INLINE void serializeDouble(struct btVector3DoubleData& dataOut) const;
688 SIMD_FORCE_INLINE void deSerializeDouble(const struct btVector3DoubleData& dataIn);
690 /**@brief returns index of maximum dot product between this and vectors in array[]
691 * @param array The other vectors
692 * @param array_count The number of other vectors
693 * @param dotOut The maximum dot product */
694 SIMD_FORCE_INLINE long maxDot( const btVector3 *array, long array_count, btScalar &dotOut ) const;
696 /**@brief returns index of minimum dot product between this and vectors in array[]
697 * @param array The other vectors
698 * @param array_count The number of other vectors
699 * @param dotOut The minimum dot product */
700 SIMD_FORCE_INLINE long minDot( const btVector3 *array, long array_count, btScalar &dotOut ) const;
702 /* create a vector as btVector3( this->dot( btVector3 v0 ), this->dot( btVector3 v1), this->dot( btVector3 v2 )) */
703 SIMD_FORCE_INLINE btVector3 dot3( const btVector3 &v0, const btVector3 &v1, const btVector3 &v2 ) const
705 #if defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
707 __m128 a0 = _mm_mul_ps( v0.mVec128, this->mVec128 );
708 __m128 a1 = _mm_mul_ps( v1.mVec128, this->mVec128 );
709 __m128 a2 = _mm_mul_ps( v2.mVec128, this->mVec128 );
710 __m128 b0 = _mm_unpacklo_ps( a0, a1 );
711 __m128 b1 = _mm_unpackhi_ps( a0, a1 );
712 __m128 b2 = _mm_unpacklo_ps( a2, _mm_setzero_ps() );
713 __m128 r = _mm_movelh_ps( b0, b2 );
714 r = _mm_add_ps( r, _mm_movehl_ps( b2, b0 ));
715 a2 = _mm_and_ps( a2, btvxyzMaskf);
716 r = _mm_add_ps( r, btCastdTo128f (_mm_move_sd( btCastfTo128d(a2), btCastfTo128d(b1) )));
719 #elif defined(BT_USE_NEON)
720 static const uint32x4_t xyzMask = (const uint32x4_t){ -1, -1, -1, 0 };
721 float32x4_t a0 = vmulq_f32( v0.mVec128, this->mVec128);
722 float32x4_t a1 = vmulq_f32( v1.mVec128, this->mVec128);
723 float32x4_t a2 = vmulq_f32( v2.mVec128, this->mVec128);
724 float32x2x2_t zLo = vtrn_f32( vget_high_f32(a0), vget_high_f32(a1));
725 a2 = (float32x4_t) vandq_u32((uint32x4_t) a2, xyzMask );
726 float32x2_t b0 = vadd_f32( vpadd_f32( vget_low_f32(a0), vget_low_f32(a1)), zLo.val[0] );
727 float32x2_t b1 = vpadd_f32( vpadd_f32( vget_low_f32(a2), vget_high_f32(a2)), vdup_n_f32(0.0f));
728 return btVector3( vcombine_f32(b0, b1) );
730 return btVector3( dot(v0), dot(v1), dot(v2));
735 /**@brief Return the sum of two vectors (Point symantics)*/
736 SIMD_FORCE_INLINE btVector3
737 operator+(const btVector3& v1, const btVector3& v2)
739 #if defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
740 return btVector3(_mm_add_ps(v1.mVec128, v2.mVec128));
741 #elif defined(BT_USE_NEON)
742 return btVector3(vaddq_f32(v1.mVec128, v2.mVec128));
745 v1.m_floats[0] + v2.m_floats[0],
746 v1.m_floats[1] + v2.m_floats[1],
747 v1.m_floats[2] + v2.m_floats[2]);
751 /**@brief Return the elementwise product of two vectors */
752 SIMD_FORCE_INLINE btVector3
753 operator*(const btVector3& v1, const btVector3& v2)
755 #if defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
756 return btVector3(_mm_mul_ps(v1.mVec128, v2.mVec128));
757 #elif defined(BT_USE_NEON)
758 return btVector3(vmulq_f32(v1.mVec128, v2.mVec128));
761 v1.m_floats[0] * v2.m_floats[0],
762 v1.m_floats[1] * v2.m_floats[1],
763 v1.m_floats[2] * v2.m_floats[2]);
767 /**@brief Return the difference between two vectors */
768 SIMD_FORCE_INLINE btVector3
769 operator-(const btVector3& v1, const btVector3& v2)
771 #if (defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE))
773 // without _mm_and_ps this code causes slowdown in Concave moving
774 __m128 r = _mm_sub_ps(v1.mVec128, v2.mVec128);
775 return btVector3(_mm_and_ps(r, btvFFF0fMask));
776 #elif defined(BT_USE_NEON)
777 float32x4_t r = vsubq_f32(v1.mVec128, v2.mVec128);
778 return btVector3((float32x4_t)vandq_s32((int32x4_t)r, btvFFF0Mask));
781 v1.m_floats[0] - v2.m_floats[0],
782 v1.m_floats[1] - v2.m_floats[1],
783 v1.m_floats[2] - v2.m_floats[2]);
787 /**@brief Return the negative of the vector */
788 SIMD_FORCE_INLINE btVector3
789 operator-(const btVector3& v)
791 #if (defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE))
792 __m128 r = _mm_xor_ps(v.mVec128, btvMzeroMask);
793 return btVector3(_mm_and_ps(r, btvFFF0fMask));
794 #elif defined(BT_USE_NEON)
795 return btVector3((btSimdFloat4)veorq_s32((int32x4_t)v.mVec128, (int32x4_t)btvMzeroMask));
797 return btVector3(-v.m_floats[0], -v.m_floats[1], -v.m_floats[2]);
801 /**@brief Return the vector scaled by s */
802 SIMD_FORCE_INLINE btVector3
803 operator*(const btVector3& v, const btScalar& s)
805 #if defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
806 __m128 vs = _mm_load_ss(&s); // (S 0 0 0)
807 vs = bt_pshufd_ps(vs, 0x80); // (S S S 0.0)
808 return btVector3(_mm_mul_ps(v.mVec128, vs));
809 #elif defined(BT_USE_NEON)
810 float32x4_t r = vmulq_n_f32(v.mVec128, s);
811 return btVector3((float32x4_t)vandq_s32((int32x4_t)r, btvFFF0Mask));
813 return btVector3(v.m_floats[0] * s, v.m_floats[1] * s, v.m_floats[2] * s);
817 /**@brief Return the vector scaled by s */
818 SIMD_FORCE_INLINE btVector3
819 operator*(const btScalar& s, const btVector3& v)
824 /**@brief Return the vector inversely scaled by s */
825 SIMD_FORCE_INLINE btVector3
826 operator/(const btVector3& v, const btScalar& s)
828 btFullAssert(s != btScalar(0.0));
829 #if 0 //defined(BT_USE_SSE_IN_API)
830 // this code is not faster !
831 __m128 vs = _mm_load_ss(&s);
832 vs = _mm_div_ss(v1110, vs);
833 vs = bt_pshufd_ps(vs, 0x00); // (S S S S)
835 return btVector3(_mm_mul_ps(v.mVec128, vs));
837 return v * (btScalar(1.0) / s);
841 /**@brief Return the vector inversely scaled by s */
842 SIMD_FORCE_INLINE btVector3
843 operator/(const btVector3& v1, const btVector3& v2)
845 #if (defined(BT_USE_SSE_IN_API)&& defined (BT_USE_SSE))
846 __m128 vec = _mm_div_ps(v1.mVec128, v2.mVec128);
847 vec = _mm_and_ps(vec, btvFFF0fMask);
848 return btVector3(vec);
849 #elif defined(BT_USE_NEON)
850 float32x4_t x, y, v, m;
855 v = vrecpeq_f32(y); // v ~ 1/y
856 m = vrecpsq_f32(y, v); // m = (2-v*y)
857 v = vmulq_f32(v, m); // vv = v*m ~~ 1/y
858 m = vrecpsq_f32(y, v); // mm = (2-vv*y)
859 v = vmulq_f32(v, x); // x*vv
860 v = vmulq_f32(v, m); // (x*vv)*(2-vv*y) = x*(vv(2-vv*y)) ~~~ x/y
865 v1.m_floats[0] / v2.m_floats[0],
866 v1.m_floats[1] / v2.m_floats[1],
867 v1.m_floats[2] / v2.m_floats[2]);
871 /**@brief Return the dot product between two vectors */
872 SIMD_FORCE_INLINE btScalar
873 btDot(const btVector3& v1, const btVector3& v2)
879 /**@brief Return the distance squared between two vectors */
880 SIMD_FORCE_INLINE btScalar
881 btDistance2(const btVector3& v1, const btVector3& v2)
883 return v1.distance2(v2);
887 /**@brief Return the distance between two vectors */
888 SIMD_FORCE_INLINE btScalar
889 btDistance(const btVector3& v1, const btVector3& v2)
891 return v1.distance(v2);
894 /**@brief Return the angle between two vectors */
895 SIMD_FORCE_INLINE btScalar
896 btAngle(const btVector3& v1, const btVector3& v2)
901 /**@brief Return the cross product of two vectors */
902 SIMD_FORCE_INLINE btVector3
903 btCross(const btVector3& v1, const btVector3& v2)
908 SIMD_FORCE_INLINE btScalar
909 btTriple(const btVector3& v1, const btVector3& v2, const btVector3& v3)
911 return v1.triple(v2, v3);
914 /**@brief Return the linear interpolation between two vectors
915 * @param v1 One vector
916 * @param v2 The other vector
917 * @param t The ration of this to v (t = 0 => return v1, t=1 => return v2) */
918 SIMD_FORCE_INLINE btVector3
919 lerp(const btVector3& v1, const btVector3& v2, const btScalar& t)
921 return v1.lerp(v2, t);
926 SIMD_FORCE_INLINE btScalar btVector3::distance2(const btVector3& v) const
928 return (v - *this).length2();
931 SIMD_FORCE_INLINE btScalar btVector3::distance(const btVector3& v) const
933 return (v - *this).length();
936 SIMD_FORCE_INLINE btVector3 btVector3::normalized() const
938 #if defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
939 btVector3 norm = *this;
941 return norm.normalize();
943 return *this / length();
947 SIMD_FORCE_INLINE btVector3 btVector3::rotate( const btVector3& wAxis, const btScalar _angle ) const
949 // wAxis must be a unit lenght vector
951 #if defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
953 __m128 O = _mm_mul_ps(wAxis.mVec128, mVec128);
954 btScalar ssin = btSin( _angle );
955 __m128 C = wAxis.cross( mVec128 ).mVec128;
956 O = _mm_and_ps(O, btvFFF0fMask);
957 btScalar scos = btCos( _angle );
959 __m128 vsin = _mm_load_ss(&ssin); // (S 0 0 0)
960 __m128 vcos = _mm_load_ss(&scos); // (S 0 0 0)
962 __m128 Y = bt_pshufd_ps(O, 0xC9); // (Y Z X 0)
963 __m128 Z = bt_pshufd_ps(O, 0xD2); // (Z X Y 0)
964 O = _mm_add_ps(O, Y);
965 vsin = bt_pshufd_ps(vsin, 0x80); // (S S S 0)
966 O = _mm_add_ps(O, Z);
967 vcos = bt_pshufd_ps(vcos, 0x80); // (S S S 0)
970 O = O * wAxis.mVec128;
971 __m128 X = mVec128 - O;
979 btVector3 o = wAxis * wAxis.dot( *this );
980 btVector3 _x = *this - o;
983 _y = wAxis.cross( *this );
985 return ( o + _x * btCos( _angle ) + _y * btSin( _angle ) );
989 SIMD_FORCE_INLINE long btVector3::maxDot( const btVector3 *array, long array_count, btScalar &dotOut ) const
991 #if defined (BT_USE_SSE) || defined (BT_USE_NEON)
992 #if defined _WIN32 || defined (BT_USE_SSE)
993 const long scalar_cutoff = 10;
994 long _maxdot_large( const float *array, const float *vec, unsigned long array_count, float *dotOut );
995 #elif defined BT_USE_NEON
996 const long scalar_cutoff = 4;
997 extern long (*_maxdot_large)( const float *array, const float *vec, unsigned long array_count, float *dotOut );
999 if( array_count < scalar_cutoff )
1002 #endif//BT_USE_SSE || BT_USE_NEON
1004 btScalar maxDot = -SIMD_INFINITY;
1007 for( i = 0; i < array_count; i++ )
1009 btScalar dot = array[i].dot(*this);
1021 #if defined (BT_USE_SSE) || defined (BT_USE_NEON)
1022 return _maxdot_large( (float*) array, (float*) &m_floats[0], array_count, &dotOut );
1026 SIMD_FORCE_INLINE long btVector3::minDot( const btVector3 *array, long array_count, btScalar &dotOut ) const
1028 #if defined (BT_USE_SSE) || defined (BT_USE_NEON)
1029 #if defined BT_USE_SSE
1030 const long scalar_cutoff = 10;
1031 long _mindot_large( const float *array, const float *vec, unsigned long array_count, float *dotOut );
1032 #elif defined BT_USE_NEON
1033 const long scalar_cutoff = 4;
1034 extern long (*_mindot_large)( const float *array, const float *vec, unsigned long array_count, float *dotOut );
1036 #error unhandled arch!
1039 if( array_count < scalar_cutoff )
1040 #endif//BT_USE_SSE || BT_USE_NEON
1042 btScalar minDot = SIMD_INFINITY;
1046 for( i = 0; i < array_count; i++ )
1048 btScalar dot = array[i].dot(*this);
1061 #if defined (BT_USE_SSE) || defined (BT_USE_NEON)
1062 return _mindot_large( (float*) array, (float*) &m_floats[0], array_count, &dotOut );
1067 class btVector4 : public btVector3
1071 SIMD_FORCE_INLINE btVector4() {}
1074 SIMD_FORCE_INLINE btVector4(const btScalar& _x, const btScalar& _y, const btScalar& _z,const btScalar& _w)
1075 : btVector3(_x,_y,_z)
1080 #if (defined (BT_USE_SSE_IN_API)&& defined (BT_USE_SSE)) || defined (BT_USE_NEON)
1081 SIMD_FORCE_INLINE btVector4(const btSimdFloat4 vec)
1086 SIMD_FORCE_INLINE btVector4(const btVector3& rhs)
1088 mVec128 = rhs.mVec128;
1091 SIMD_FORCE_INLINE btVector4&
1092 operator=(const btVector4& v)
1094 mVec128 = v.mVec128;
1097 #endif // #if defined (BT_USE_SSE_IN_API) || defined (BT_USE_NEON)
1099 SIMD_FORCE_INLINE btVector4 absolute4() const
1101 #if defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
1102 return btVector4(_mm_and_ps(mVec128, btvAbsfMask));
1103 #elif defined(BT_USE_NEON)
1104 return btVector4(vabsq_f32(mVec128));
1107 btFabs(m_floats[0]),
1108 btFabs(m_floats[1]),
1109 btFabs(m_floats[2]),
1110 btFabs(m_floats[3]));
1115 btScalar getW() const { return m_floats[3];}
1118 SIMD_FORCE_INLINE int maxAxis4() const
1121 btScalar maxVal = btScalar(-BT_LARGE_FLOAT);
1122 if (m_floats[0] > maxVal)
1125 maxVal = m_floats[0];
1127 if (m_floats[1] > maxVal)
1130 maxVal = m_floats[1];
1132 if (m_floats[2] > maxVal)
1135 maxVal =m_floats[2];
1137 if (m_floats[3] > maxVal)
1140 maxVal = m_floats[3];
1147 SIMD_FORCE_INLINE int minAxis4() const
1150 btScalar minVal = btScalar(BT_LARGE_FLOAT);
1151 if (m_floats[0] < minVal)
1154 minVal = m_floats[0];
1156 if (m_floats[1] < minVal)
1159 minVal = m_floats[1];
1161 if (m_floats[2] < minVal)
1164 minVal =m_floats[2];
1166 if (m_floats[3] < minVal)
1169 minVal = m_floats[3];
1176 SIMD_FORCE_INLINE int closestAxis4() const
1178 return absolute4().maxAxis4();
1184 /**@brief Set x,y,z and zero w
1185 * @param x Value of x
1186 * @param y Value of y
1187 * @param z Value of z
1191 /* void getValue(btScalar *m) const
1198 /**@brief Set the values
1199 * @param x Value of x
1200 * @param y Value of y
1201 * @param z Value of z
1202 * @param w Value of w
1204 SIMD_FORCE_INLINE void setValue(const btScalar& _x, const btScalar& _y, const btScalar& _z,const btScalar& _w)
1216 ///btSwapVector3Endian swaps vector endianness, useful for network and cross-platform serialization
1217 SIMD_FORCE_INLINE void btSwapScalarEndian(const btScalar& sourceVal, btScalar& destVal)
1219 #ifdef BT_USE_DOUBLE_PRECISION
1220 unsigned char* dest = (unsigned char*) &destVal;
1221 unsigned char* src = (unsigned char*) &sourceVal;
1231 unsigned char* dest = (unsigned char*) &destVal;
1232 unsigned char* src = (unsigned char*) &sourceVal;
1237 #endif //BT_USE_DOUBLE_PRECISION
1239 ///btSwapVector3Endian swaps vector endianness, useful for network and cross-platform serialization
1240 SIMD_FORCE_INLINE void btSwapVector3Endian(const btVector3& sourceVec, btVector3& destVec)
1242 for (int i=0;i<4;i++)
1244 btSwapScalarEndian(sourceVec[i],destVec[i]);
1249 ///btUnSwapVector3Endian swaps vector endianness, useful for network and cross-platform serialization
1250 SIMD_FORCE_INLINE void btUnSwapVector3Endian(btVector3& vector)
1253 btVector3 swappedVec;
1254 for (int i=0;i<4;i++)
1256 btSwapScalarEndian(vector[i],swappedVec[i]);
1258 vector = swappedVec;
1262 SIMD_FORCE_INLINE void btPlaneSpace1 (const T& n, T& p, T& q)
1264 if (btFabs(n[2]) > SIMDSQRT12) {
1265 // choose p in y-z plane
1266 btScalar a = n[1]*n[1] + n[2]*n[2];
1267 btScalar k = btRecipSqrt (a);
1277 // choose p in x-y plane
1278 btScalar a = n[0]*n[0] + n[1]*n[1];
1279 btScalar k = btRecipSqrt (a);
1291 struct btVector3FloatData
1296 struct btVector3DoubleData
1302 SIMD_FORCE_INLINE void btVector3::serializeFloat(struct btVector3FloatData& dataOut) const
1304 ///could also do a memcpy, check if it is worth it
1305 for (int i=0;i<4;i++)
1306 dataOut.m_floats[i] = float(m_floats[i]);
1309 SIMD_FORCE_INLINE void btVector3::deSerializeFloat(const struct btVector3FloatData& dataIn)
1311 for (int i=0;i<4;i++)
1312 m_floats[i] = btScalar(dataIn.m_floats[i]);
1316 SIMD_FORCE_INLINE void btVector3::serializeDouble(struct btVector3DoubleData& dataOut) const
1318 ///could also do a memcpy, check if it is worth it
1319 for (int i=0;i<4;i++)
1320 dataOut.m_floats[i] = double(m_floats[i]);
1323 SIMD_FORCE_INLINE void btVector3::deSerializeDouble(const struct btVector3DoubleData& dataIn)
1325 for (int i=0;i<4;i++)
1326 m_floats[i] = btScalar(dataIn.m_floats[i]);
1330 SIMD_FORCE_INLINE void btVector3::serialize(struct btVector3Data& dataOut) const
1332 ///could also do a memcpy, check if it is worth it
1333 for (int i=0;i<4;i++)
1334 dataOut.m_floats[i] = m_floats[i];
1337 SIMD_FORCE_INLINE void btVector3::deSerialize(const struct btVector3Data& dataIn)
1339 for (int i=0;i<4;i++)
1340 m_floats[i] = dataIn.m_floats[i];
1343 #endif //BT_VECTOR3_H