2 Copyright (c) 2003-2013 Gino van den Bergen / Erwin Coumans http://bulletphysics.org
4 This software is provided 'as-is', without any express or implied warranty.
5 In no event will the authors be held liable for any damages arising from the use of this software.
6 Permission is granted to anyone to use this software for any purpose,
7 including commercial applications, and to alter it and redistribute it freely,
8 subject to the following restrictions:
10 1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
11 2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
12 3. This notice may not be removed or altered from any source distribution.
21 #include "b3AlignedAllocator.h"
23 #ifdef B3_USE_DOUBLE_PRECISION
24 #define b3Vector3Data b3Vector3DoubleData
25 #define b3Vector3DataName "b3Vector3DoubleData"
27 #define b3Vector3Data b3Vector3FloatData
28 #define b3Vector3DataName "b3Vector3FloatData"
29 #endif //B3_USE_DOUBLE_PRECISION
31 #if defined B3_USE_SSE
33 //typedef uint32_t __m128i __attribute__ ((vector_size(16)));
36 #pragma warning(disable : 4556) // value of intrinsic immediate argument '4294967239' is out of range '0 - 255'
39 #define B3_SHUFFLE(x, y, z, w) (((w) << 6 | (z) << 4 | (y) << 2 | (x)) & 0xff)
40 //#define b3_pshufd_ps( _a, _mask ) (__m128) _mm_shuffle_epi32((__m128i)(_a), (_mask) )
41 #define b3_pshufd_ps(_a, _mask) _mm_shuffle_ps((_a), (_a), (_mask))
42 #define b3_splat3_ps(_a, _i) b3_pshufd_ps((_a), B3_SHUFFLE(_i, _i, _i, 3))
43 #define b3_splat_ps(_a, _i) b3_pshufd_ps((_a), B3_SHUFFLE(_i, _i, _i, _i))
45 #define b3v3AbsiMask (_mm_set_epi32(0x00000000, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF))
46 #define b3vAbsMask (_mm_set_epi32(0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF))
47 #define b3vFFF0Mask (_mm_set_epi32(0x00000000, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF))
48 #define b3v3AbsfMask b3CastiTo128f(b3v3AbsiMask)
49 #define b3vFFF0fMask b3CastiTo128f(b3vFFF0Mask)
50 #define b3vxyzMaskf b3vFFF0fMask
51 #define b3vAbsfMask b3CastiTo128f(b3vAbsMask)
53 const __m128 B3_ATTRIBUTE_ALIGNED16(b3vMzeroMask) = {-0.0f, -0.0f, -0.0f, -0.0f};
54 const __m128 B3_ATTRIBUTE_ALIGNED16(b3v1110) = {1.0f, 1.0f, 1.0f, 0.0f};
55 const __m128 B3_ATTRIBUTE_ALIGNED16(b3vHalf) = {0.5f, 0.5f, 0.5f, 0.5f};
56 const __m128 B3_ATTRIBUTE_ALIGNED16(b3v1_5) = {1.5f, 1.5f, 1.5f, 1.5f};
62 const float32x4_t B3_ATTRIBUTE_ALIGNED16(b3vMzeroMask) = (float32x4_t){-0.0f, -0.0f, -0.0f, -0.0f};
63 const int32x4_t B3_ATTRIBUTE_ALIGNED16(b3vFFF0Mask) = (int32x4_t){0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x0};
64 const int32x4_t B3_ATTRIBUTE_ALIGNED16(b3vAbsMask) = (int32x4_t){0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF};
65 const int32x4_t B3_ATTRIBUTE_ALIGNED16(b3v3AbsMask) = (int32x4_t){0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x0};
72 #if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
73 //#if defined (B3_USE_SSE) || defined (B3_USE_NEON)
74 inline b3Vector3 b3MakeVector3(b3SimdFloat4 v);
75 inline b3Vector4 b3MakeVector4(b3SimdFloat4 vec);
78 inline b3Vector3 b3MakeVector3(b3Scalar x, b3Scalar y, b3Scalar z);
79 inline b3Vector3 b3MakeVector3(b3Scalar x, b3Scalar y, b3Scalar z, b3Scalar w);
80 inline b3Vector4 b3MakeVector4(b3Scalar x, b3Scalar y, b3Scalar z, b3Scalar w);
82 /**@brief b3Vector3 can be used to represent 3D points and vectors.
83 * It has an un-used w component to suit 16-byte alignment when b3Vector3 is stored in containers. This extra component can be used by derived classes (Quaternion?) or by user
84 * Ideally, this class should be replaced by a platform optimized SIMD version that keeps the data in registers
86 B3_ATTRIBUTE_ALIGNED16(class)
90 #if defined(B3_USE_SSE) || defined(B3_USE_NEON) // _WIN32 || ARM
110 B3_DECLARE_ALIGNED_ALLOCATOR();
112 #if defined(B3_USE_SSE) || defined(B3_USE_NEON) // _WIN32 || ARM
114 /*B3_FORCE_INLINE b3Vector3()
119 B3_FORCE_INLINE b3SimdFloat4 get128() const
123 B3_FORCE_INLINE void set128(b3SimdFloat4 v128)
130 /**@brief Add a vector to this one
131 * @param The vector to add to this one */
132 B3_FORCE_INLINE b3Vector3& operator+=(const b3Vector3& v)
134 #if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
135 mVec128 = _mm_add_ps(mVec128, v.mVec128);
136 #elif defined(B3_USE_NEON)
137 mVec128 = vaddq_f32(mVec128, v.mVec128);
139 m_floats[0] += v.m_floats[0];
140 m_floats[1] += v.m_floats[1];
141 m_floats[2] += v.m_floats[2];
146 /**@brief Subtract a vector from this one
147 * @param The vector to subtract */
148 B3_FORCE_INLINE b3Vector3& operator-=(const b3Vector3& v)
150 #if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
151 mVec128 = _mm_sub_ps(mVec128, v.mVec128);
152 #elif defined(B3_USE_NEON)
153 mVec128 = vsubq_f32(mVec128, v.mVec128);
155 m_floats[0] -= v.m_floats[0];
156 m_floats[1] -= v.m_floats[1];
157 m_floats[2] -= v.m_floats[2];
162 /**@brief Scale the vector
163 * @param s Scale factor */
164 B3_FORCE_INLINE b3Vector3& operator*=(const b3Scalar& s)
166 #if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
167 __m128 vs = _mm_load_ss(&s); // (S 0 0 0)
168 vs = b3_pshufd_ps(vs, 0x80); // (S S S 0.0)
169 mVec128 = _mm_mul_ps(mVec128, vs);
170 #elif defined(B3_USE_NEON)
171 mVec128 = vmulq_n_f32(mVec128, s);
180 /**@brief Inversely scale the vector
181 * @param s Scale factor to divide by */
182 B3_FORCE_INLINE b3Vector3& operator/=(const b3Scalar& s)
184 b3FullAssert(s != b3Scalar(0.0));
186 #if 0 //defined(B3_USE_SSE_IN_API)
187 // this code is not faster !
188 __m128 vs = _mm_load_ss(&s);
189 vs = _mm_div_ss(b3v1110, vs);
190 vs = b3_pshufd_ps(vs, 0x00); // (S S S S)
192 mVec128 = _mm_mul_ps(mVec128, vs);
196 return *this *= b3Scalar(1.0) / s;
200 /**@brief Return the dot product
201 * @param v The other vector in the dot product */
202 B3_FORCE_INLINE b3Scalar dot(const b3Vector3& v) const
204 #if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
205 __m128 vd = _mm_mul_ps(mVec128, v.mVec128);
206 __m128 z = _mm_movehl_ps(vd, vd);
207 __m128 y = _mm_shuffle_ps(vd, vd, 0x55);
208 vd = _mm_add_ss(vd, y);
209 vd = _mm_add_ss(vd, z);
210 return _mm_cvtss_f32(vd);
211 #elif defined(B3_USE_NEON)
212 float32x4_t vd = vmulq_f32(mVec128, v.mVec128);
213 float32x2_t x = vpadd_f32(vget_low_f32(vd), vget_low_f32(vd));
214 x = vadd_f32(x, vget_high_f32(vd));
215 return vget_lane_f32(x, 0);
217 return m_floats[0] * v.m_floats[0] +
218 m_floats[1] * v.m_floats[1] +
219 m_floats[2] * v.m_floats[2];
223 /**@brief Return the length of the vector squared */
224 B3_FORCE_INLINE b3Scalar length2() const
229 /**@brief Return the length of the vector */
230 B3_FORCE_INLINE b3Scalar length() const
232 return b3Sqrt(length2());
235 /**@brief Return the distance squared between the ends of this and another vector
236 * This is symantically treating the vector like a point */
237 B3_FORCE_INLINE b3Scalar distance2(const b3Vector3& v) const;
239 /**@brief Return the distance between the ends of this and another vector
240 * This is symantically treating the vector like a point */
241 B3_FORCE_INLINE b3Scalar distance(const b3Vector3& v) const;
243 B3_FORCE_INLINE b3Vector3& safeNormalize()
245 b3Scalar l2 = length2();
246 //triNormal.normalize();
247 if (l2 >= B3_EPSILON * B3_EPSILON)
249 (*this) /= b3Sqrt(l2);
258 /**@brief Normalize this vector
259 * x^2 + y^2 + z^2 = 1 */
260 B3_FORCE_INLINE b3Vector3& normalize()
262 #if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
264 __m128 vd = _mm_mul_ps(mVec128, mVec128);
265 __m128 z = _mm_movehl_ps(vd, vd);
266 __m128 y = _mm_shuffle_ps(vd, vd, 0x55);
267 vd = _mm_add_ss(vd, y);
268 vd = _mm_add_ss(vd, z);
271 vd = _mm_sqrt_ss(vd);
272 vd = _mm_div_ss(b3v1110, vd);
273 vd = b3_splat_ps(vd, 0x80);
274 mVec128 = _mm_mul_ps(mVec128, vd);
277 // NR step 1/sqrt(x) - vd is x, y is output
278 y = _mm_rsqrt_ss(vd); // estimate
282 vd = _mm_mul_ss(vd, b3vHalf); // vd * 0.5
284 vd = _mm_mul_ss(vd, y); // vd * 0.5 * y0
285 vd = _mm_mul_ss(vd, y); // vd * 0.5 * y0 * y0
286 z = _mm_sub_ss(z, vd); // 1.5 - vd * 0.5 * y0 * y0
288 y = _mm_mul_ss(y, z); // y0 * (1.5 - vd * 0.5 * y0 * y0)
290 y = b3_splat_ps(y, 0x80);
291 mVec128 = _mm_mul_ps(mVec128, y);
297 return *this /= length();
301 /**@brief Return a normalized version of this vector */
302 B3_FORCE_INLINE b3Vector3 normalized() const;
304 /**@brief Return a rotated version of this vector
305 * @param wAxis The axis to rotate about
306 * @param angle The angle to rotate by */
307 B3_FORCE_INLINE b3Vector3 rotate(const b3Vector3& wAxis, const b3Scalar angle) const;
309 /**@brief Return the angle between this and another vector
310 * @param v The other vector */
311 B3_FORCE_INLINE b3Scalar angle(const b3Vector3& v) const
313 b3Scalar s = b3Sqrt(length2() * v.length2());
314 b3FullAssert(s != b3Scalar(0.0));
315 return b3Acos(dot(v) / s);
318 /**@brief Return a vector will the absolute values of each element */
319 B3_FORCE_INLINE b3Vector3 absolute() const
321 #if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
322 return b3MakeVector3(_mm_and_ps(mVec128, b3v3AbsfMask));
323 #elif defined(B3_USE_NEON)
324 return b3Vector3(vabsq_f32(mVec128));
326 return b3MakeVector3(
329 b3Fabs(m_floats[2]));
333 /**@brief Return the cross product between this and another vector
334 * @param v The other vector */
335 B3_FORCE_INLINE b3Vector3 cross(const b3Vector3& v) const
337 #if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
340 T = b3_pshufd_ps(mVec128, B3_SHUFFLE(1, 2, 0, 3)); // (Y Z X 0)
341 V = b3_pshufd_ps(v.mVec128, B3_SHUFFLE(1, 2, 0, 3)); // (Y Z X 0)
343 V = _mm_mul_ps(V, mVec128);
344 T = _mm_mul_ps(T, v.mVec128);
345 V = _mm_sub_ps(V, T);
347 V = b3_pshufd_ps(V, B3_SHUFFLE(1, 2, 0, 3));
348 return b3MakeVector3(V);
349 #elif defined(B3_USE_NEON)
351 // form (Y, Z, X, _) of mVec128 and v.mVec128
352 float32x2_t Tlow = vget_low_f32(mVec128);
353 float32x2_t Vlow = vget_low_f32(v.mVec128);
354 T = vcombine_f32(vext_f32(Tlow, vget_high_f32(mVec128), 1), Tlow);
355 V = vcombine_f32(vext_f32(Vlow, vget_high_f32(v.mVec128), 1), Vlow);
357 V = vmulq_f32(V, mVec128);
358 T = vmulq_f32(T, v.mVec128);
360 Vlow = vget_low_f32(V);
361 // form (Y, Z, X, _);
362 V = vcombine_f32(vext_f32(Vlow, vget_high_f32(V), 1), Vlow);
363 V = (float32x4_t)vandq_s32((int32x4_t)V, b3vFFF0Mask);
367 return b3MakeVector3(
368 m_floats[1] * v.m_floats[2] - m_floats[2] * v.m_floats[1],
369 m_floats[2] * v.m_floats[0] - m_floats[0] * v.m_floats[2],
370 m_floats[0] * v.m_floats[1] - m_floats[1] * v.m_floats[0]);
374 B3_FORCE_INLINE b3Scalar triple(const b3Vector3& v1, const b3Vector3& v2) const
376 #if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
378 __m128 T = _mm_shuffle_ps(v1.mVec128, v1.mVec128, B3_SHUFFLE(1, 2, 0, 3)); // (Y Z X 0)
379 __m128 V = _mm_shuffle_ps(v2.mVec128, v2.mVec128, B3_SHUFFLE(1, 2, 0, 3)); // (Y Z X 0)
381 V = _mm_mul_ps(V, v1.mVec128);
382 T = _mm_mul_ps(T, v2.mVec128);
383 V = _mm_sub_ps(V, T);
385 V = _mm_shuffle_ps(V, V, B3_SHUFFLE(1, 2, 0, 3));
388 V = _mm_mul_ps(V, mVec128);
389 __m128 z = _mm_movehl_ps(V, V);
390 __m128 y = _mm_shuffle_ps(V, V, 0x55);
391 V = _mm_add_ss(V, y);
392 V = _mm_add_ss(V, z);
393 return _mm_cvtss_f32(V);
395 #elif defined(B3_USE_NEON)
398 // form (Y, Z, X, _) of mVec128 and v.mVec128
399 float32x2_t Tlow = vget_low_f32(v1.mVec128);
400 float32x2_t Vlow = vget_low_f32(v2.mVec128);
401 T = vcombine_f32(vext_f32(Tlow, vget_high_f32(v1.mVec128), 1), Tlow);
402 V = vcombine_f32(vext_f32(Vlow, vget_high_f32(v2.mVec128), 1), Vlow);
404 V = vmulq_f32(V, v1.mVec128);
405 T = vmulq_f32(T, v2.mVec128);
407 Vlow = vget_low_f32(V);
408 // form (Y, Z, X, _);
409 V = vcombine_f32(vext_f32(Vlow, vget_high_f32(V), 1), Vlow);
412 V = vmulq_f32(mVec128, V);
413 float32x2_t x = vpadd_f32(vget_low_f32(V), vget_low_f32(V));
414 x = vadd_f32(x, vget_high_f32(V));
415 return vget_lane_f32(x, 0);
417 return m_floats[0] * (v1.m_floats[1] * v2.m_floats[2] - v1.m_floats[2] * v2.m_floats[1]) +
418 m_floats[1] * (v1.m_floats[2] * v2.m_floats[0] - v1.m_floats[0] * v2.m_floats[2]) +
419 m_floats[2] * (v1.m_floats[0] * v2.m_floats[1] - v1.m_floats[1] * v2.m_floats[0]);
423 /**@brief Return the axis with the smallest value
424 * Note return values are 0,1,2 for x, y, or z */
425 B3_FORCE_INLINE int minAxis() const
427 return m_floats[0] < m_floats[1] ? (m_floats[0] < m_floats[2] ? 0 : 2) : (m_floats[1] < m_floats[2] ? 1 : 2);
430 /**@brief Return the axis with the largest value
431 * Note return values are 0,1,2 for x, y, or z */
432 B3_FORCE_INLINE int maxAxis() const
434 return m_floats[0] < m_floats[1] ? (m_floats[1] < m_floats[2] ? 2 : 1) : (m_floats[0] < m_floats[2] ? 2 : 0);
437 B3_FORCE_INLINE int furthestAxis() const
439 return absolute().minAxis();
442 B3_FORCE_INLINE int closestAxis() const
444 return absolute().maxAxis();
447 B3_FORCE_INLINE void setInterpolate3(const b3Vector3& v0, const b3Vector3& v1, b3Scalar rt)
449 #if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
450 __m128 vrt = _mm_load_ss(&rt); // (rt 0 0 0)
451 b3Scalar s = b3Scalar(1.0) - rt;
452 __m128 vs = _mm_load_ss(&s); // (S 0 0 0)
453 vs = b3_pshufd_ps(vs, 0x80); // (S S S 0.0)
454 __m128 r0 = _mm_mul_ps(v0.mVec128, vs);
455 vrt = b3_pshufd_ps(vrt, 0x80); // (rt rt rt 0.0)
456 __m128 r1 = _mm_mul_ps(v1.mVec128, vrt);
457 __m128 tmp3 = _mm_add_ps(r0, r1);
459 #elif defined(B3_USE_NEON)
460 float32x4_t vl = vsubq_f32(v1.mVec128, v0.mVec128);
461 vl = vmulq_n_f32(vl, rt);
462 mVec128 = vaddq_f32(vl, v0.mVec128);
464 b3Scalar s = b3Scalar(1.0) - rt;
465 m_floats[0] = s * v0.m_floats[0] + rt * v1.m_floats[0];
466 m_floats[1] = s * v0.m_floats[1] + rt * v1.m_floats[1];
467 m_floats[2] = s * v0.m_floats[2] + rt * v1.m_floats[2];
468 //don't do the unused w component
469 // m_co[3] = s * v0[3] + rt * v1[3];
473 /**@brief Return the linear interpolation between this and another vector
474 * @param v The other vector
475 * @param t The ration of this to v (t = 0 => return this, t=1 => return other) */
476 B3_FORCE_INLINE b3Vector3 lerp(const b3Vector3& v, const b3Scalar& t) const
478 #if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
479 __m128 vt = _mm_load_ss(&t); // (t 0 0 0)
480 vt = b3_pshufd_ps(vt, 0x80); // (rt rt rt 0.0)
481 __m128 vl = _mm_sub_ps(v.mVec128, mVec128);
482 vl = _mm_mul_ps(vl, vt);
483 vl = _mm_add_ps(vl, mVec128);
485 return b3MakeVector3(vl);
486 #elif defined(B3_USE_NEON)
487 float32x4_t vl = vsubq_f32(v.mVec128, mVec128);
488 vl = vmulq_n_f32(vl, t);
489 vl = vaddq_f32(vl, mVec128);
491 return b3Vector3(vl);
493 return b3MakeVector3(m_floats[0] + (v.m_floats[0] - m_floats[0]) * t,
494 m_floats[1] + (v.m_floats[1] - m_floats[1]) * t,
495 m_floats[2] + (v.m_floats[2] - m_floats[2]) * t);
499 /**@brief Elementwise multiply this vector by the other
500 * @param v The other vector */
501 B3_FORCE_INLINE b3Vector3& operator*=(const b3Vector3& v)
503 #if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
504 mVec128 = _mm_mul_ps(mVec128, v.mVec128);
505 #elif defined(B3_USE_NEON)
506 mVec128 = vmulq_f32(mVec128, v.mVec128);
508 m_floats[0] *= v.m_floats[0];
509 m_floats[1] *= v.m_floats[1];
510 m_floats[2] *= v.m_floats[2];
515 /**@brief Return the x value */
516 B3_FORCE_INLINE const b3Scalar& getX() const { return m_floats[0]; }
517 /**@brief Return the y value */
518 B3_FORCE_INLINE const b3Scalar& getY() const { return m_floats[1]; }
519 /**@brief Return the z value */
520 B3_FORCE_INLINE const b3Scalar& getZ() const { return m_floats[2]; }
521 /**@brief Return the w value */
522 B3_FORCE_INLINE const b3Scalar& getW() const { return m_floats[3]; }
524 /**@brief Set the x value */
525 B3_FORCE_INLINE void setX(b3Scalar _x) { m_floats[0] = _x; };
526 /**@brief Set the y value */
527 B3_FORCE_INLINE void setY(b3Scalar _y) { m_floats[1] = _y; };
528 /**@brief Set the z value */
529 B3_FORCE_INLINE void setZ(b3Scalar _z) { m_floats[2] = _z; };
530 /**@brief Set the w value */
531 B3_FORCE_INLINE void setW(b3Scalar _w) { m_floats[3] = _w; };
533 //B3_FORCE_INLINE b3Scalar& operator[](int i) { return (&m_floats[0])[i]; }
534 //B3_FORCE_INLINE const b3Scalar& operator[](int i) const { return (&m_floats[0])[i]; }
535 ///operator b3Scalar*() replaces operator[], using implicit conversion. We added operator != and operator == to avoid pointer comparisons.
536 B3_FORCE_INLINE operator b3Scalar*() { return &m_floats[0]; }
537 B3_FORCE_INLINE operator const b3Scalar*() const { return &m_floats[0]; }
539 B3_FORCE_INLINE bool operator==(const b3Vector3& other) const
541 #if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
542 return (0xf == _mm_movemask_ps((__m128)_mm_cmpeq_ps(mVec128, other.mVec128)));
544 return ((m_floats[3] == other.m_floats[3]) &&
545 (m_floats[2] == other.m_floats[2]) &&
546 (m_floats[1] == other.m_floats[1]) &&
547 (m_floats[0] == other.m_floats[0]));
551 B3_FORCE_INLINE bool operator!=(const b3Vector3& other) const
553 return !(*this == other);
556 /**@brief Set each element to the max of the current values and the values of another b3Vector3
557 * @param other The other b3Vector3 to compare with
559 B3_FORCE_INLINE void setMax(const b3Vector3& other)
561 #if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
562 mVec128 = _mm_max_ps(mVec128, other.mVec128);
563 #elif defined(B3_USE_NEON)
564 mVec128 = vmaxq_f32(mVec128, other.mVec128);
566 b3SetMax(m_floats[0], other.m_floats[0]);
567 b3SetMax(m_floats[1], other.m_floats[1]);
568 b3SetMax(m_floats[2], other.m_floats[2]);
569 b3SetMax(m_floats[3], other.m_floats[3]);
573 /**@brief Set each element to the min of the current values and the values of another b3Vector3
574 * @param other The other b3Vector3 to compare with
576 B3_FORCE_INLINE void setMin(const b3Vector3& other)
578 #if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
579 mVec128 = _mm_min_ps(mVec128, other.mVec128);
580 #elif defined(B3_USE_NEON)
581 mVec128 = vminq_f32(mVec128, other.mVec128);
583 b3SetMin(m_floats[0], other.m_floats[0]);
584 b3SetMin(m_floats[1], other.m_floats[1]);
585 b3SetMin(m_floats[2], other.m_floats[2]);
586 b3SetMin(m_floats[3], other.m_floats[3]);
590 B3_FORCE_INLINE void setValue(const b3Scalar& _x, const b3Scalar& _y, const b3Scalar& _z)
595 m_floats[3] = b3Scalar(0.f);
598 void getSkewSymmetricMatrix(b3Vector3 * v0, b3Vector3 * v1, b3Vector3 * v2) const
600 #if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
602 __m128 V = _mm_and_ps(mVec128, b3vFFF0fMask);
603 __m128 V0 = _mm_xor_ps(b3vMzeroMask, V);
604 __m128 V2 = _mm_movelh_ps(V0, V);
606 __m128 V1 = _mm_shuffle_ps(V, V0, 0xCE);
608 V0 = _mm_shuffle_ps(V0, V, 0xDB);
609 V2 = _mm_shuffle_ps(V2, V, 0xF9);
615 v0->setValue(0., -getZ(), getY());
616 v1->setValue(getZ(), 0., -getX());
617 v2->setValue(-getY(), getX(), 0.);
623 #if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
624 mVec128 = (__m128)_mm_xor_ps(mVec128, mVec128);
625 #elif defined(B3_USE_NEON)
626 int32x4_t vi = vdupq_n_s32(0);
627 mVec128 = vreinterpretq_f32_s32(vi);
629 setValue(b3Scalar(0.), b3Scalar(0.), b3Scalar(0.));
633 B3_FORCE_INLINE bool isZero() const
635 return m_floats[0] == b3Scalar(0) && m_floats[1] == b3Scalar(0) && m_floats[2] == b3Scalar(0);
638 B3_FORCE_INLINE bool fuzzyZero() const
640 return length2() < B3_EPSILON;
643 B3_FORCE_INLINE void serialize(struct b3Vector3Data & dataOut) const;
645 B3_FORCE_INLINE void deSerialize(const struct b3Vector3Data& dataIn);
647 B3_FORCE_INLINE void serializeFloat(struct b3Vector3FloatData & dataOut) const;
649 B3_FORCE_INLINE void deSerializeFloat(const struct b3Vector3FloatData& dataIn);
651 B3_FORCE_INLINE void serializeDouble(struct b3Vector3DoubleData & dataOut) const;
653 B3_FORCE_INLINE void deSerializeDouble(const struct b3Vector3DoubleData& dataIn);
655 /**@brief returns index of maximum dot product between this and vectors in array[]
656 * @param array The other vectors
657 * @param array_count The number of other vectors
658 * @param dotOut The maximum dot product */
659 B3_FORCE_INLINE long maxDot(const b3Vector3* array, long array_count, b3Scalar& dotOut) const;
661 /**@brief returns index of minimum dot product between this and vectors in array[]
662 * @param array The other vectors
663 * @param array_count The number of other vectors
664 * @param dotOut The minimum dot product */
665 B3_FORCE_INLINE long minDot(const b3Vector3* array, long array_count, b3Scalar& dotOut) const;
667 /* create a vector as b3Vector3( this->dot( b3Vector3 v0 ), this->dot( b3Vector3 v1), this->dot( b3Vector3 v2 )) */
668 B3_FORCE_INLINE b3Vector3 dot3(const b3Vector3& v0, const b3Vector3& v1, const b3Vector3& v2) const
670 #if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
672 __m128 a0 = _mm_mul_ps(v0.mVec128, this->mVec128);
673 __m128 a1 = _mm_mul_ps(v1.mVec128, this->mVec128);
674 __m128 a2 = _mm_mul_ps(v2.mVec128, this->mVec128);
675 __m128 b0 = _mm_unpacklo_ps(a0, a1);
676 __m128 b1 = _mm_unpackhi_ps(a0, a1);
677 __m128 b2 = _mm_unpacklo_ps(a2, _mm_setzero_ps());
678 __m128 r = _mm_movelh_ps(b0, b2);
679 r = _mm_add_ps(r, _mm_movehl_ps(b2, b0));
680 a2 = _mm_and_ps(a2, b3vxyzMaskf);
681 r = _mm_add_ps(r, b3CastdTo128f(_mm_move_sd(b3CastfTo128d(a2), b3CastfTo128d(b1))));
682 return b3MakeVector3(r);
684 #elif defined(B3_USE_NEON)
685 static const uint32x4_t xyzMask = (const uint32x4_t){-1, -1, -1, 0};
686 float32x4_t a0 = vmulq_f32(v0.mVec128, this->mVec128);
687 float32x4_t a1 = vmulq_f32(v1.mVec128, this->mVec128);
688 float32x4_t a2 = vmulq_f32(v2.mVec128, this->mVec128);
689 float32x2x2_t zLo = vtrn_f32(vget_high_f32(a0), vget_high_f32(a1));
690 a2 = (float32x4_t)vandq_u32((uint32x4_t)a2, xyzMask);
691 float32x2_t b0 = vadd_f32(vpadd_f32(vget_low_f32(a0), vget_low_f32(a1)), zLo.val[0]);
692 float32x2_t b1 = vpadd_f32(vpadd_f32(vget_low_f32(a2), vget_high_f32(a2)), vdup_n_f32(0.0f));
693 return b3Vector3(vcombine_f32(b0, b1));
695 return b3MakeVector3(dot(v0), dot(v1), dot(v2));
700 /**@brief Return the sum of two vectors (Point symantics)*/
701 B3_FORCE_INLINE b3Vector3
702 operator+(const b3Vector3& v1, const b3Vector3& v2)
704 #if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
705 return b3MakeVector3(_mm_add_ps(v1.mVec128, v2.mVec128));
706 #elif defined(B3_USE_NEON)
707 return b3MakeVector3(vaddq_f32(v1.mVec128, v2.mVec128));
709 return b3MakeVector3(
710 v1.m_floats[0] + v2.m_floats[0],
711 v1.m_floats[1] + v2.m_floats[1],
712 v1.m_floats[2] + v2.m_floats[2]);
716 /**@brief Return the elementwise product of two vectors */
717 B3_FORCE_INLINE b3Vector3
718 operator*(const b3Vector3& v1, const b3Vector3& v2)
720 #if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
721 return b3MakeVector3(_mm_mul_ps(v1.mVec128, v2.mVec128));
722 #elif defined(B3_USE_NEON)
723 return b3MakeVector3(vmulq_f32(v1.mVec128, v2.mVec128));
725 return b3MakeVector3(
726 v1.m_floats[0] * v2.m_floats[0],
727 v1.m_floats[1] * v2.m_floats[1],
728 v1.m_floats[2] * v2.m_floats[2]);
732 /**@brief Return the difference between two vectors */
733 B3_FORCE_INLINE b3Vector3
734 operator-(const b3Vector3& v1, const b3Vector3& v2)
736 #if (defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE))
738 // without _mm_and_ps this code causes slowdown in Concave moving
739 __m128 r = _mm_sub_ps(v1.mVec128, v2.mVec128);
740 return b3MakeVector3(_mm_and_ps(r, b3vFFF0fMask));
741 #elif defined(B3_USE_NEON)
742 float32x4_t r = vsubq_f32(v1.mVec128, v2.mVec128);
743 return b3MakeVector3((float32x4_t)vandq_s32((int32x4_t)r, b3vFFF0Mask));
745 return b3MakeVector3(
746 v1.m_floats[0] - v2.m_floats[0],
747 v1.m_floats[1] - v2.m_floats[1],
748 v1.m_floats[2] - v2.m_floats[2]);
752 /**@brief Return the negative of the vector */
753 B3_FORCE_INLINE b3Vector3
754 operator-(const b3Vector3& v)
756 #if (defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE))
757 __m128 r = _mm_xor_ps(v.mVec128, b3vMzeroMask);
758 return b3MakeVector3(_mm_and_ps(r, b3vFFF0fMask));
759 #elif defined(B3_USE_NEON)
760 return b3MakeVector3((b3SimdFloat4)veorq_s32((int32x4_t)v.mVec128, (int32x4_t)b3vMzeroMask));
762 return b3MakeVector3(-v.m_floats[0], -v.m_floats[1], -v.m_floats[2]);
766 /**@brief Return the vector scaled by s */
767 B3_FORCE_INLINE b3Vector3
768 operator*(const b3Vector3& v, const b3Scalar& s)
770 #if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
771 __m128 vs = _mm_load_ss(&s); // (S 0 0 0)
772 vs = b3_pshufd_ps(vs, 0x80); // (S S S 0.0)
773 return b3MakeVector3(_mm_mul_ps(v.mVec128, vs));
774 #elif defined(B3_USE_NEON)
775 float32x4_t r = vmulq_n_f32(v.mVec128, s);
776 return b3MakeVector3((float32x4_t)vandq_s32((int32x4_t)r, b3vFFF0Mask));
778 return b3MakeVector3(v.m_floats[0] * s, v.m_floats[1] * s, v.m_floats[2] * s);
782 /**@brief Return the vector scaled by s */
783 B3_FORCE_INLINE b3Vector3
784 operator*(const b3Scalar& s, const b3Vector3& v)
789 /**@brief Return the vector inversely scaled by s */
790 B3_FORCE_INLINE b3Vector3
791 operator/(const b3Vector3& v, const b3Scalar& s)
793 b3FullAssert(s != b3Scalar(0.0));
794 #if 0 //defined(B3_USE_SSE_IN_API)
795 // this code is not faster !
796 __m128 vs = _mm_load_ss(&s);
797 vs = _mm_div_ss(b3v1110, vs);
798 vs = b3_pshufd_ps(vs, 0x00); // (S S S S)
800 return b3Vector3(_mm_mul_ps(v.mVec128, vs));
802 return v * (b3Scalar(1.0) / s);
806 /**@brief Return the vector inversely scaled by s */
807 B3_FORCE_INLINE b3Vector3
808 operator/(const b3Vector3& v1, const b3Vector3& v2)
810 #if (defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE))
811 __m128 vec = _mm_div_ps(v1.mVec128, v2.mVec128);
812 vec = _mm_and_ps(vec, b3vFFF0fMask);
813 return b3MakeVector3(vec);
814 #elif defined(B3_USE_NEON)
815 float32x4_t x, y, v, m;
820 v = vrecpeq_f32(y); // v ~ 1/y
821 m = vrecpsq_f32(y, v); // m = (2-v*y)
822 v = vmulq_f32(v, m); // vv = v*m ~~ 1/y
823 m = vrecpsq_f32(y, v); // mm = (2-vv*y)
824 v = vmulq_f32(v, x); // x*vv
825 v = vmulq_f32(v, m); // (x*vv)*(2-vv*y) = x*(vv(2-vv*y)) ~~~ x/y
829 return b3MakeVector3(
830 v1.m_floats[0] / v2.m_floats[0],
831 v1.m_floats[1] / v2.m_floats[1],
832 v1.m_floats[2] / v2.m_floats[2]);
836 /**@brief Return the dot product between two vectors */
837 B3_FORCE_INLINE b3Scalar
838 b3Dot(const b3Vector3& v1, const b3Vector3& v2)
843 /**@brief Return the distance squared between two vectors */
844 B3_FORCE_INLINE b3Scalar
845 b3Distance2(const b3Vector3& v1, const b3Vector3& v2)
847 return v1.distance2(v2);
850 /**@brief Return the distance between two vectors */
851 B3_FORCE_INLINE b3Scalar
852 b3Distance(const b3Vector3& v1, const b3Vector3& v2)
854 return v1.distance(v2);
857 /**@brief Return the angle between two vectors */
858 B3_FORCE_INLINE b3Scalar
859 b3Angle(const b3Vector3& v1, const b3Vector3& v2)
864 /**@brief Return the cross product of two vectors */
865 B3_FORCE_INLINE b3Vector3
866 b3Cross(const b3Vector3& v1, const b3Vector3& v2)
871 B3_FORCE_INLINE b3Scalar
872 b3Triple(const b3Vector3& v1, const b3Vector3& v2, const b3Vector3& v3)
874 return v1.triple(v2, v3);
877 /**@brief Return the linear interpolation between two vectors
878 * @param v1 One vector
879 * @param v2 The other vector
880 * @param t The ration of this to v (t = 0 => return v1, t=1 => return v2) */
881 B3_FORCE_INLINE b3Vector3
882 b3Lerp(const b3Vector3& v1, const b3Vector3& v2, const b3Scalar& t)
884 return v1.lerp(v2, t);
887 B3_FORCE_INLINE b3Scalar b3Vector3::distance2(const b3Vector3& v) const
889 return (v - *this).length2();
892 B3_FORCE_INLINE b3Scalar b3Vector3::distance(const b3Vector3& v) const
894 return (v - *this).length();
897 B3_FORCE_INLINE b3Vector3 b3Vector3::normalized() const
899 #if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
900 b3Vector3 norm = *this;
902 return norm.normalize();
904 return *this / length();
908 B3_FORCE_INLINE b3Vector3 b3Vector3::rotate(const b3Vector3& wAxis, const b3Scalar _angle) const
910 // wAxis must be a unit lenght vector
912 #if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
914 __m128 O = _mm_mul_ps(wAxis.mVec128, mVec128);
915 b3Scalar ssin = b3Sin(_angle);
916 __m128 C = wAxis.cross(b3MakeVector3(mVec128)).mVec128;
917 O = _mm_and_ps(O, b3vFFF0fMask);
918 b3Scalar scos = b3Cos(_angle);
920 __m128 vsin = _mm_load_ss(&ssin); // (S 0 0 0)
921 __m128 vcos = _mm_load_ss(&scos); // (S 0 0 0)
923 __m128 Y = b3_pshufd_ps(O, 0xC9); // (Y Z X 0)
924 __m128 Z = b3_pshufd_ps(O, 0xD2); // (Z X Y 0)
925 O = _mm_add_ps(O, Y);
926 vsin = b3_pshufd_ps(vsin, 0x80); // (S S S 0)
927 O = _mm_add_ps(O, Z);
928 vcos = b3_pshufd_ps(vcos, 0x80); // (S S S 0)
931 O = O * wAxis.mVec128;
932 __m128 X = mVec128 - O;
938 return b3MakeVector3(O);
940 b3Vector3 o = wAxis * wAxis.dot(*this);
941 b3Vector3 _x = *this - o;
944 _y = wAxis.cross(*this);
946 return (o + _x * b3Cos(_angle) + _y * b3Sin(_angle));
950 B3_FORCE_INLINE long b3Vector3::maxDot(const b3Vector3* array, long array_count, b3Scalar& dotOut) const
952 #if defined(B3_USE_SSE) || defined(B3_USE_NEON)
953 #if defined _WIN32 || defined(B3_USE_SSE)
954 const long scalar_cutoff = 10;
955 long b3_maxdot_large(const float* array, const float* vec, unsigned long array_count, float* dotOut);
956 #elif defined B3_USE_NEON
957 const long scalar_cutoff = 4;
958 extern long (*_maxdot_large)(const float* array, const float* vec, unsigned long array_count, float* dotOut);
960 if (array_count < scalar_cutoff)
963 #endif //B3_USE_SSE || B3_USE_NEON
965 b3Scalar maxDot = -B3_INFINITY;
968 for (i = 0; i < array_count; i++)
970 b3Scalar dot = array[i].dot(*this);
979 b3Assert(ptIndex >= 0);
987 #if defined(B3_USE_SSE) || defined(B3_USE_NEON)
988 return b3_maxdot_large((float*)array, (float*)&m_floats[0], array_count, &dotOut);
992 B3_FORCE_INLINE long b3Vector3::minDot(const b3Vector3* array, long array_count, b3Scalar& dotOut) const
994 #if defined(B3_USE_SSE) || defined(B3_USE_NEON)
995 #if defined B3_USE_SSE
996 const long scalar_cutoff = 10;
997 long b3_mindot_large(const float* array, const float* vec, unsigned long array_count, float* dotOut);
998 #elif defined B3_USE_NEON
999 const long scalar_cutoff = 4;
1000 extern long (*b3_mindot_large)(const float* array, const float* vec, unsigned long array_count, float* dotOut);
1002 #error unhandled arch!
1005 if (array_count < scalar_cutoff)
1006 #endif //B3_USE_SSE || B3_USE_NEON
1008 b3Scalar minDot = B3_INFINITY;
1012 for (i = 0; i < array_count; i++)
1014 b3Scalar dot = array[i].dot(*this);
1027 #if defined(B3_USE_SSE) || defined(B3_USE_NEON)
1028 return b3_mindot_large((float*)array, (float*)&m_floats[0], array_count, &dotOut);
1032 class b3Vector4 : public b3Vector3
1035 B3_FORCE_INLINE b3Vector4 absolute4() const
1037 #if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
1038 return b3MakeVector4(_mm_and_ps(mVec128, b3vAbsfMask));
1039 #elif defined(B3_USE_NEON)
1040 return b3Vector4(vabsq_f32(mVec128));
1042 return b3MakeVector4(
1043 b3Fabs(m_floats[0]),
1044 b3Fabs(m_floats[1]),
1045 b3Fabs(m_floats[2]),
1046 b3Fabs(m_floats[3]));
1050 b3Scalar getW() const { return m_floats[3]; }
1052 B3_FORCE_INLINE int maxAxis4() const
1055 b3Scalar maxVal = b3Scalar(-B3_LARGE_FLOAT);
1056 if (m_floats[0] > maxVal)
1059 maxVal = m_floats[0];
1061 if (m_floats[1] > maxVal)
1064 maxVal = m_floats[1];
1066 if (m_floats[2] > maxVal)
1069 maxVal = m_floats[2];
1071 if (m_floats[3] > maxVal)
1079 B3_FORCE_INLINE int minAxis4() const
1082 b3Scalar minVal = b3Scalar(B3_LARGE_FLOAT);
1083 if (m_floats[0] < minVal)
1086 minVal = m_floats[0];
1088 if (m_floats[1] < minVal)
1091 minVal = m_floats[1];
1093 if (m_floats[2] < minVal)
1096 minVal = m_floats[2];
1098 if (m_floats[3] < minVal)
1101 minVal = m_floats[3];
1107 B3_FORCE_INLINE int closestAxis4() const
1109 return absolute4().maxAxis4();
1112 /**@brief Set x,y,z and zero w
1113 * @param x Value of x
1114 * @param y Value of y
1115 * @param z Value of z
1118 /* void getValue(b3Scalar *m) const
1125 /**@brief Set the values
1126 * @param x Value of x
1127 * @param y Value of y
1128 * @param z Value of z
1129 * @param w Value of w
1131 B3_FORCE_INLINE void setValue(const b3Scalar& _x, const b3Scalar& _y, const b3Scalar& _z, const b3Scalar& _w)
1140 ///b3SwapVector3Endian swaps vector endianness, useful for network and cross-platform serialization
1141 B3_FORCE_INLINE void b3SwapScalarEndian(const b3Scalar& sourceVal, b3Scalar& destVal)
1143 #ifdef B3_USE_DOUBLE_PRECISION
1144 unsigned char* dest = (unsigned char*)&destVal;
1145 unsigned char* src = (unsigned char*)&sourceVal;
1155 unsigned char* dest = (unsigned char*)&destVal;
1156 unsigned char* src = (unsigned char*)&sourceVal;
1161 #endif //B3_USE_DOUBLE_PRECISION
1163 ///b3SwapVector3Endian swaps vector endianness, useful for network and cross-platform serialization
1164 B3_FORCE_INLINE void b3SwapVector3Endian(const b3Vector3& sourceVec, b3Vector3& destVec)
1166 for (int i = 0; i < 4; i++)
1168 b3SwapScalarEndian(sourceVec[i], destVec[i]);
1172 ///b3UnSwapVector3Endian swaps vector endianness, useful for network and cross-platform serialization
1173 B3_FORCE_INLINE void b3UnSwapVector3Endian(b3Vector3& vector)
1175 b3Vector3 swappedVec;
1176 for (int i = 0; i < 4; i++)
1178 b3SwapScalarEndian(vector[i], swappedVec[i]);
1180 vector = swappedVec;
1184 B3_FORCE_INLINE void b3PlaneSpace1(const T& n, T& p, T& q)
1186 if (b3Fabs(n[2]) > B3_SQRT12)
1188 // choose p in y-z plane
1189 b3Scalar a = n[1] * n[1] + n[2] * n[2];
1190 b3Scalar k = b3RecipSqrt(a);
1196 q[1] = -n[0] * p[2];
1201 // choose p in x-y plane
1202 b3Scalar a = n[0] * n[0] + n[1] * n[1];
1203 b3Scalar k = b3RecipSqrt(a);
1208 q[0] = -n[2] * p[1];
1214 struct b3Vector3FloatData
1219 struct b3Vector3DoubleData
1224 B3_FORCE_INLINE void b3Vector3::serializeFloat(struct b3Vector3FloatData& dataOut) const
1226 ///could also do a memcpy, check if it is worth it
1227 for (int i = 0; i < 4; i++)
1228 dataOut.m_floats[i] = float(m_floats[i]);
1231 B3_FORCE_INLINE void b3Vector3::deSerializeFloat(const struct b3Vector3FloatData& dataIn)
1233 for (int i = 0; i < 4; i++)
1234 m_floats[i] = b3Scalar(dataIn.m_floats[i]);
1237 B3_FORCE_INLINE void b3Vector3::serializeDouble(struct b3Vector3DoubleData& dataOut) const
1239 ///could also do a memcpy, check if it is worth it
1240 for (int i = 0; i < 4; i++)
1241 dataOut.m_floats[i] = double(m_floats[i]);
1244 B3_FORCE_INLINE void b3Vector3::deSerializeDouble(const struct b3Vector3DoubleData& dataIn)
1246 for (int i = 0; i < 4; i++)
1247 m_floats[i] = b3Scalar(dataIn.m_floats[i]);
1250 B3_FORCE_INLINE void b3Vector3::serialize(struct b3Vector3Data& dataOut) const
1252 ///could also do a memcpy, check if it is worth it
1253 for (int i = 0; i < 4; i++)
1254 dataOut.m_floats[i] = m_floats[i];
1257 B3_FORCE_INLINE void b3Vector3::deSerialize(const struct b3Vector3Data& dataIn)
1259 for (int i = 0; i < 4; i++)
1260 m_floats[i] = dataIn.m_floats[i];
1263 inline b3Vector3 b3MakeVector3(b3Scalar x, b3Scalar y, b3Scalar z)
1266 tmp.setValue(x, y, z);
1270 inline b3Vector3 b3MakeVector3(b3Scalar x, b3Scalar y, b3Scalar z, b3Scalar w)
1273 tmp.setValue(x, y, z);
1278 inline b3Vector4 b3MakeVector4(b3Scalar x, b3Scalar y, b3Scalar z, b3Scalar w)
1281 tmp.setValue(x, y, z, w);
1285 #if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
1287 inline b3Vector3 b3MakeVector3(b3SimdFloat4 v)
1294 inline b3Vector4 b3MakeVector4(b3SimdFloat4 vec)
1303 #endif //B3_VECTOR3_H