2 Copyright (c) 2003-2006 Gino van den Bergen / Erwin Coumans https://bulletphysics.org
4 This software is provided 'as-is', without any express or implied warranty.
5 In no event will the authors be held liable for any damages arising from the use of this software.
6 Permission is granted to anyone to use this software for any purpose,
7 including commercial applications, and to alter it and redistribute it freely,
8 subject to the following restrictions:
10 1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
11 2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
12 3. This notice may not be removed or altered from any source distribution.
15 #ifndef BT_SIMD__QUATERNION_H_
16 #define BT_SIMD__QUATERNION_H_
18 #include "btVector3.h"
19 #include "btQuadWord.h"
21 #ifdef BT_USE_DOUBLE_PRECISION
22 #define btQuaternionData btQuaternionDoubleData
23 #define btQuaternionDataName "btQuaternionDoubleData"
25 #define btQuaternionData btQuaternionFloatData
26 #define btQuaternionDataName "btQuaternionFloatData"
27 #endif //BT_USE_DOUBLE_PRECISION
31 //const __m128 ATTRIBUTE_ALIGNED16(vOnes) = {1.0f, 1.0f, 1.0f, 1.0f};
32 #define vOnes (_mm_set_ps(1.0f, 1.0f, 1.0f, 1.0f))
36 #if defined(BT_USE_SSE)
38 #define vQInv (_mm_set_ps(+0.0f, -0.0f, -0.0f, -0.0f))
39 #define vPPPM (_mm_set_ps(-0.0f, +0.0f, +0.0f, +0.0f))
41 #elif defined(BT_USE_NEON)
43 const btSimdFloat4 ATTRIBUTE_ALIGNED16(vQInv) = {-0.0f, -0.0f, -0.0f, +0.0f};
44 const btSimdFloat4 ATTRIBUTE_ALIGNED16(vPPPM) = {+0.0f, +0.0f, +0.0f, -0.0f};
48 /**@brief The btQuaternion implements quaternion to perform linear algebra rotations in combination with btMatrix3x3, btVector3 and btTransform. */
49 class btQuaternion : public btQuadWord
52 /**@brief No initialization constructor */
55 #if (defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE)) || defined(BT_USE_NEON)
57 SIMD_FORCE_INLINE btQuaternion(const btSimdFloat4 vec)
63 SIMD_FORCE_INLINE btQuaternion(const btQuaternion& rhs)
65 mVec128 = rhs.mVec128;
68 // Assignment Operator
69 SIMD_FORCE_INLINE btQuaternion&
70 operator=(const btQuaternion& v)
79 // template <typename btScalar>
80 // explicit Quaternion(const btScalar *v) : Tuple4<btScalar>(v) {}
81 /**@brief Constructor from scalars */
82 btQuaternion(const btScalar& _x, const btScalar& _y, const btScalar& _z, const btScalar& _w)
83 : btQuadWord(_x, _y, _z, _w)
86 /**@brief Axis angle Constructor
87 * @param axis The axis which the rotation is around
88 * @param angle The magnitude of the rotation around the angle (Radians) */
89 btQuaternion(const btVector3& _axis, const btScalar& _angle)
91 setRotation(_axis, _angle);
93 /**@brief Constructor from Euler angles
94 * @param yaw Angle around Y unless BT_EULER_DEFAULT_ZYX defined then Z
95 * @param pitch Angle around X unless BT_EULER_DEFAULT_ZYX defined then Y
96 * @param roll Angle around Z unless BT_EULER_DEFAULT_ZYX defined then X */
97 btQuaternion(const btScalar& yaw, const btScalar& pitch, const btScalar& roll)
99 #ifndef BT_EULER_DEFAULT_ZYX
100 setEuler(yaw, pitch, roll);
102 setEulerZYX(yaw, pitch, roll);
105 /**@brief Set the rotation using axis angle notation
106 * @param axis The axis around which to rotate
107 * @param angle The magnitude of the rotation in Radians */
108 void setRotation(const btVector3& axis, const btScalar& _angle)
110 btScalar d = axis.length();
111 btAssert(d != btScalar(0.0));
112 btScalar s = btSin(_angle * btScalar(0.5)) / d;
113 setValue(axis.x() * s, axis.y() * s, axis.z() * s,
114 btCos(_angle * btScalar(0.5)));
116 /**@brief Set the quaternion using Euler angles
117 * @param yaw Angle around Y
118 * @param pitch Angle around X
119 * @param roll Angle around Z */
120 void setEuler(const btScalar& yaw, const btScalar& pitch, const btScalar& roll)
122 btScalar halfYaw = btScalar(yaw) * btScalar(0.5);
123 btScalar halfPitch = btScalar(pitch) * btScalar(0.5);
124 btScalar halfRoll = btScalar(roll) * btScalar(0.5);
125 btScalar cosYaw = btCos(halfYaw);
126 btScalar sinYaw = btSin(halfYaw);
127 btScalar cosPitch = btCos(halfPitch);
128 btScalar sinPitch = btSin(halfPitch);
129 btScalar cosRoll = btCos(halfRoll);
130 btScalar sinRoll = btSin(halfRoll);
131 setValue(cosRoll * sinPitch * cosYaw + sinRoll * cosPitch * sinYaw,
132 cosRoll * cosPitch * sinYaw - sinRoll * sinPitch * cosYaw,
133 sinRoll * cosPitch * cosYaw - cosRoll * sinPitch * sinYaw,
134 cosRoll * cosPitch * cosYaw + sinRoll * sinPitch * sinYaw);
136 /**@brief Set the quaternion using euler angles
137 * @param yaw Angle around Z
138 * @param pitch Angle around Y
139 * @param roll Angle around X */
140 void setEulerZYX(const btScalar& yawZ, const btScalar& pitchY, const btScalar& rollX)
142 btScalar halfYaw = btScalar(yawZ) * btScalar(0.5);
143 btScalar halfPitch = btScalar(pitchY) * btScalar(0.5);
144 btScalar halfRoll = btScalar(rollX) * btScalar(0.5);
145 btScalar cosYaw = btCos(halfYaw);
146 btScalar sinYaw = btSin(halfYaw);
147 btScalar cosPitch = btCos(halfPitch);
148 btScalar sinPitch = btSin(halfPitch);
149 btScalar cosRoll = btCos(halfRoll);
150 btScalar sinRoll = btSin(halfRoll);
151 setValue(sinRoll * cosPitch * cosYaw - cosRoll * sinPitch * sinYaw, //x
152 cosRoll * sinPitch * cosYaw + sinRoll * cosPitch * sinYaw, //y
153 cosRoll * cosPitch * sinYaw - sinRoll * sinPitch * cosYaw, //z
154 cosRoll * cosPitch * cosYaw + sinRoll * sinPitch * sinYaw); //formerly yzx
157 /**@brief Get the euler angles from this quaternion
158 * @param yaw Angle around Z
159 * @param pitch Angle around Y
160 * @param roll Angle around X */
161 void getEulerZYX(btScalar& yawZ, btScalar& pitchY, btScalar& rollX) const
168 sqx = m_floats[0] * m_floats[0];
169 sqy = m_floats[1] * m_floats[1];
170 sqz = m_floats[2] * m_floats[2];
171 squ = m_floats[3] * m_floats[3];
172 sarg = btScalar(-2.) * (m_floats[0] * m_floats[2] - m_floats[3] * m_floats[1]);
174 // If the pitch angle is PI/2 or -PI/2, we can only compute
175 // the sum roll + yaw. However, any combination that gives
176 // the right sum will produce the correct orientation, so we
177 // set rollX = 0 and compute yawZ.
178 if (sarg <= -btScalar(0.99999))
180 pitchY = btScalar(-0.5) * SIMD_PI;
182 yawZ = btScalar(2) * btAtan2(m_floats[0], -m_floats[1]);
184 else if (sarg >= btScalar(0.99999))
186 pitchY = btScalar(0.5) * SIMD_PI;
188 yawZ = btScalar(2) * btAtan2(-m_floats[0], m_floats[1]);
192 pitchY = btAsin(sarg);
193 rollX = btAtan2(2 * (m_floats[1] * m_floats[2] + m_floats[3] * m_floats[0]), squ - sqx - sqy + sqz);
194 yawZ = btAtan2(2 * (m_floats[0] * m_floats[1] + m_floats[3] * m_floats[2]), squ + sqx - sqy - sqz);
198 /**@brief Add two quaternions
199 * @param q The quaternion to add to this one */
200 SIMD_FORCE_INLINE btQuaternion& operator+=(const btQuaternion& q)
202 #if defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE)
203 mVec128 = _mm_add_ps(mVec128, q.mVec128);
204 #elif defined(BT_USE_NEON)
205 mVec128 = vaddq_f32(mVec128, q.mVec128);
207 m_floats[0] += q.x();
208 m_floats[1] += q.y();
209 m_floats[2] += q.z();
210 m_floats[3] += q.m_floats[3];
215 /**@brief Subtract out a quaternion
216 * @param q The quaternion to subtract from this one */
217 btQuaternion& operator-=(const btQuaternion& q)
219 #if defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE)
220 mVec128 = _mm_sub_ps(mVec128, q.mVec128);
221 #elif defined(BT_USE_NEON)
222 mVec128 = vsubq_f32(mVec128, q.mVec128);
224 m_floats[0] -= q.x();
225 m_floats[1] -= q.y();
226 m_floats[2] -= q.z();
227 m_floats[3] -= q.m_floats[3];
232 /**@brief Scale this quaternion
233 * @param s The scalar to scale by */
234 btQuaternion& operator*=(const btScalar& s)
236 #if defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE)
237 __m128 vs = _mm_load_ss(&s); // (S 0 0 0)
238 vs = bt_pshufd_ps(vs, 0); // (S S S S)
239 mVec128 = _mm_mul_ps(mVec128, vs);
240 #elif defined(BT_USE_NEON)
241 mVec128 = vmulq_n_f32(mVec128, s);
251 /**@brief Multiply this quaternion by q on the right
252 * @param q The other quaternion
253 * Equivilant to this = this * q */
254 btQuaternion& operator*=(const btQuaternion& q)
256 #if defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE)
257 __m128 vQ2 = q.get128();
259 __m128 A1 = bt_pshufd_ps(mVec128, BT_SHUFFLE(0, 1, 2, 0));
260 __m128 B1 = bt_pshufd_ps(vQ2, BT_SHUFFLE(3, 3, 3, 0));
264 __m128 A2 = bt_pshufd_ps(mVec128, BT_SHUFFLE(1, 2, 0, 1));
265 __m128 B2 = bt_pshufd_ps(vQ2, BT_SHUFFLE(2, 0, 1, 1));
269 B1 = bt_pshufd_ps(mVec128, BT_SHUFFLE(2, 0, 1, 2));
270 B2 = bt_pshufd_ps(vQ2, BT_SHUFFLE(1, 2, 0, 2));
272 B1 = B1 * B2; // A3 *= B3
274 mVec128 = bt_splat_ps(mVec128, 3); // A0
275 mVec128 = mVec128 * vQ2; // A0 * B0
277 A1 = A1 + A2; // AB12
278 mVec128 = mVec128 - B1; // AB03 = AB0 - AB3
279 A1 = _mm_xor_ps(A1, vPPPM); // change sign of the last element
280 mVec128 = mVec128 + A1; // AB03 + AB12
282 #elif defined(BT_USE_NEON)
284 float32x4_t vQ1 = mVec128;
285 float32x4_t vQ2 = q.get128();
286 float32x4_t A0, A1, B1, A2, B2, A3, B3;
287 float32x2_t vQ1zx, vQ2wx, vQ1yz, vQ2zx, vQ2yz, vQ2xz;
291 tmp = vtrn_f32(vget_high_f32(vQ1), vget_low_f32(vQ1)); // {z x}, {w y}
294 tmp = vtrn_f32(vget_high_f32(vQ2), vget_low_f32(vQ2)); // {z x}, {w y}
297 vQ2wx = vext_f32(vget_high_f32(vQ2), vget_low_f32(vQ2), 1);
299 vQ1yz = vext_f32(vget_low_f32(vQ1), vget_high_f32(vQ1), 1);
301 vQ2yz = vext_f32(vget_low_f32(vQ2), vget_high_f32(vQ2), 1);
302 vQ2xz = vext_f32(vQ2zx, vQ2zx, 1);
304 A1 = vcombine_f32(vget_low_f32(vQ1), vQ1zx); // X Y z x
305 B1 = vcombine_f32(vdup_lane_f32(vget_high_f32(vQ2), 1), vQ2wx); // W W W X
307 A2 = vcombine_f32(vQ1yz, vget_low_f32(vQ1));
308 B2 = vcombine_f32(vQ2zx, vdup_lane_f32(vget_low_f32(vQ2), 1));
310 A3 = vcombine_f32(vQ1zx, vQ1yz); // Z X Y Z
311 B3 = vcombine_f32(vQ2yz, vQ2xz); // Y Z x z
313 A1 = vmulq_f32(A1, B1);
314 A2 = vmulq_f32(A2, B2);
315 A3 = vmulq_f32(A3, B3); // A3 *= B3
316 A0 = vmulq_lane_f32(vQ2, vget_high_f32(vQ1), 1); // A0 * B0
318 A1 = vaddq_f32(A1, A2); // AB12 = AB1 + AB2
319 A0 = vsubq_f32(A0, A3); // AB03 = AB0 - AB3
321 // change the sign of the last element
322 A1 = (btSimdFloat4)veorq_s32((int32x4_t)A1, (int32x4_t)vPPPM);
323 A0 = vaddq_f32(A0, A1); // AB03 + AB12
328 m_floats[3] * q.x() + m_floats[0] * q.m_floats[3] + m_floats[1] * q.z() - m_floats[2] * q.y(),
329 m_floats[3] * q.y() + m_floats[1] * q.m_floats[3] + m_floats[2] * q.x() - m_floats[0] * q.z(),
330 m_floats[3] * q.z() + m_floats[2] * q.m_floats[3] + m_floats[0] * q.y() - m_floats[1] * q.x(),
331 m_floats[3] * q.m_floats[3] - m_floats[0] * q.x() - m_floats[1] * q.y() - m_floats[2] * q.z());
335 /**@brief Return the dot product between this quaternion and another
336 * @param q The other quaternion */
337 btScalar dot(const btQuaternion& q) const
339 #if defined BT_USE_SIMD_VECTOR3 && defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE)
342 vd = _mm_mul_ps(mVec128, q.mVec128);
344 __m128 t = _mm_movehl_ps(vd, vd);
345 vd = _mm_add_ps(vd, t);
346 t = _mm_shuffle_ps(vd, vd, 0x55);
347 vd = _mm_add_ss(vd, t);
349 return _mm_cvtss_f32(vd);
350 #elif defined(BT_USE_NEON)
351 float32x4_t vd = vmulq_f32(mVec128, q.mVec128);
352 float32x2_t x = vpadd_f32(vget_low_f32(vd), vget_high_f32(vd));
354 return vget_lane_f32(x, 0);
356 return m_floats[0] * q.x() +
357 m_floats[1] * q.y() +
358 m_floats[2] * q.z() +
359 m_floats[3] * q.m_floats[3];
363 /**@brief Return the length squared of the quaternion */
364 btScalar length2() const
369 /**@brief Return the length of the quaternion */
370 btScalar length() const
372 return btSqrt(length2());
374 btQuaternion& safeNormalize()
376 btScalar l2 = length2();
377 if (l2 > SIMD_EPSILON)
383 /**@brief Normalize the quaternion
384 * Such that x^2 + y^2 + z^2 +w^2 = 1 */
385 btQuaternion& normalize()
387 #if defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE)
390 vd = _mm_mul_ps(mVec128, mVec128);
392 __m128 t = _mm_movehl_ps(vd, vd);
393 vd = _mm_add_ps(vd, t);
394 t = _mm_shuffle_ps(vd, vd, 0x55);
395 vd = _mm_add_ss(vd, t);
397 vd = _mm_sqrt_ss(vd);
398 vd = _mm_div_ss(vOnes, vd);
399 vd = bt_pshufd_ps(vd, 0); // splat
400 mVec128 = _mm_mul_ps(mVec128, vd);
404 return *this /= length();
408 /**@brief Return a scaled version of this quaternion
409 * @param s The scale factor */
410 SIMD_FORCE_INLINE btQuaternion
411 operator*(const btScalar& s) const
413 #if defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE)
414 __m128 vs = _mm_load_ss(&s); // (S 0 0 0)
415 vs = bt_pshufd_ps(vs, 0x00); // (S S S S)
417 return btQuaternion(_mm_mul_ps(mVec128, vs));
418 #elif defined(BT_USE_NEON)
419 return btQuaternion(vmulq_n_f32(mVec128, s));
421 return btQuaternion(x() * s, y() * s, z() * s, m_floats[3] * s);
425 /**@brief Return an inversely scaled versionof this quaternion
426 * @param s The inverse scale factor */
427 btQuaternion operator/(const btScalar& s) const
429 btAssert(s != btScalar(0.0));
430 return *this * (btScalar(1.0) / s);
433 /**@brief Inversely scale this quaternion
434 * @param s The scale factor */
435 btQuaternion& operator/=(const btScalar& s)
437 btAssert(s != btScalar(0.0));
438 return *this *= btScalar(1.0) / s;
441 /**@brief Return a normalized version of this quaternion */
442 btQuaternion normalized() const
444 return *this / length();
446 /**@brief Return the ***half*** angle between this quaternion and the other
447 * @param q The other quaternion */
448 btScalar angle(const btQuaternion& q) const
450 btScalar s = btSqrt(length2() * q.length2());
451 btAssert(s != btScalar(0.0));
452 return btAcos(dot(q) / s);
455 /**@brief Return the angle between this quaternion and the other along the shortest path
456 * @param q The other quaternion */
457 btScalar angleShortestPath(const btQuaternion& q) const
459 btScalar s = btSqrt(length2() * q.length2());
460 btAssert(s != btScalar(0.0));
461 if (dot(q) < 0) // Take care of long angle case see http://en.wikipedia.org/wiki/Slerp
462 return btAcos(dot(-q) / s) * btScalar(2.0);
464 return btAcos(dot(q) / s) * btScalar(2.0);
467 /**@brief Return the angle [0, 2Pi] of rotation represented by this quaternion */
468 btScalar getAngle() const
470 btScalar s = btScalar(2.) * btAcos(m_floats[3]);
474 /**@brief Return the angle [0, Pi] of rotation represented by this quaternion along the shortest path */
475 btScalar getAngleShortestPath() const
478 if (m_floats[3] >= 0)
479 s = btScalar(2.) * btAcos(m_floats[3]);
481 s = btScalar(2.) * btAcos(-m_floats[3]);
485 /**@brief Return the axis of the rotation represented by this quaternion */
486 btVector3 getAxis() const
488 btScalar s_squared = 1.f - m_floats[3] * m_floats[3];
490 if (s_squared < btScalar(10.) * SIMD_EPSILON) //Check for divide by zero
491 return btVector3(1.0, 0.0, 0.0); // Arbitrary
492 btScalar s = 1.f / btSqrt(s_squared);
493 return btVector3(m_floats[0] * s, m_floats[1] * s, m_floats[2] * s);
496 /**@brief Return the inverse of this quaternion */
497 btQuaternion inverse() const
499 #if defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE)
500 return btQuaternion(_mm_xor_ps(mVec128, vQInv));
501 #elif defined(BT_USE_NEON)
502 return btQuaternion((btSimdFloat4)veorq_s32((int32x4_t)mVec128, (int32x4_t)vQInv));
504 return btQuaternion(-m_floats[0], -m_floats[1], -m_floats[2], m_floats[3]);
508 /**@brief Return the sum of this quaternion and the other
509 * @param q2 The other quaternion */
510 SIMD_FORCE_INLINE btQuaternion
511 operator+(const btQuaternion& q2) const
513 #if defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE)
514 return btQuaternion(_mm_add_ps(mVec128, q2.mVec128));
515 #elif defined(BT_USE_NEON)
516 return btQuaternion(vaddq_f32(mVec128, q2.mVec128));
518 const btQuaternion& q1 = *this;
519 return btQuaternion(q1.x() + q2.x(), q1.y() + q2.y(), q1.z() + q2.z(), q1.m_floats[3] + q2.m_floats[3]);
523 /**@brief Return the difference between this quaternion and the other
524 * @param q2 The other quaternion */
525 SIMD_FORCE_INLINE btQuaternion
526 operator-(const btQuaternion& q2) const
528 #if defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE)
529 return btQuaternion(_mm_sub_ps(mVec128, q2.mVec128));
530 #elif defined(BT_USE_NEON)
531 return btQuaternion(vsubq_f32(mVec128, q2.mVec128));
533 const btQuaternion& q1 = *this;
534 return btQuaternion(q1.x() - q2.x(), q1.y() - q2.y(), q1.z() - q2.z(), q1.m_floats[3] - q2.m_floats[3]);
538 /**@brief Return the negative of this quaternion
539 * This simply negates each element */
540 SIMD_FORCE_INLINE btQuaternion operator-() const
542 #if defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE)
543 return btQuaternion(_mm_xor_ps(mVec128, btvMzeroMask));
544 #elif defined(BT_USE_NEON)
545 return btQuaternion((btSimdFloat4)veorq_s32((int32x4_t)mVec128, (int32x4_t)btvMzeroMask));
547 const btQuaternion& q2 = *this;
548 return btQuaternion(-q2.x(), -q2.y(), -q2.z(), -q2.m_floats[3]);
551 /**@todo document this and it's use */
552 SIMD_FORCE_INLINE btQuaternion farthest(const btQuaternion& qd) const
554 btQuaternion diff, sum;
557 if (diff.dot(diff) > sum.dot(sum))
562 /**@todo document this and it's use */
563 SIMD_FORCE_INLINE btQuaternion nearest(const btQuaternion& qd) const
565 btQuaternion diff, sum;
568 if (diff.dot(diff) < sum.dot(sum))
573 /**@brief Return the quaternion which is the result of Spherical Linear Interpolation between this and the other quaternion
574 * @param q The other quaternion to interpolate with
575 * @param t The ratio between this and q to interpolate. If t = 0 the result is this, if t=1 the result is q.
576 * Slerp interpolates assuming constant velocity. */
577 btQuaternion slerp(const btQuaternion& q, const btScalar& t) const
579 const btScalar magnitude = btSqrt(length2() * q.length2());
580 btAssert(magnitude > btScalar(0));
582 const btScalar product = dot(q) / magnitude;
583 const btScalar absproduct = btFabs(product);
585 if (absproduct < btScalar(1.0 - SIMD_EPSILON))
587 // Take care of long angle case see http://en.wikipedia.org/wiki/Slerp
588 const btScalar theta = btAcos(absproduct);
589 const btScalar d = btSin(theta);
590 btAssert(d > btScalar(0));
592 const btScalar sign = (product < 0) ? btScalar(-1) : btScalar(1);
593 const btScalar s0 = btSin((btScalar(1.0) - t) * theta) / d;
594 const btScalar s1 = btSin(sign * t * theta) / d;
597 (m_floats[0] * s0 + q.x() * s1),
598 (m_floats[1] * s0 + q.y() * s1),
599 (m_floats[2] * s0 + q.z() * s1),
600 (m_floats[3] * s0 + q.w() * s1));
608 static const btQuaternion& getIdentity()
610 static const btQuaternion identityQuat(btScalar(0.), btScalar(0.), btScalar(0.), btScalar(1.));
614 SIMD_FORCE_INLINE const btScalar& getW() const { return m_floats[3]; }
616 SIMD_FORCE_INLINE void serialize(struct btQuaternionData& dataOut) const;
618 SIMD_FORCE_INLINE void deSerialize(const struct btQuaternionFloatData& dataIn);
620 SIMD_FORCE_INLINE void deSerialize(const struct btQuaternionDoubleData& dataIn);
622 SIMD_FORCE_INLINE void serializeFloat(struct btQuaternionFloatData& dataOut) const;
624 SIMD_FORCE_INLINE void deSerializeFloat(const struct btQuaternionFloatData& dataIn);
626 SIMD_FORCE_INLINE void serializeDouble(struct btQuaternionDoubleData& dataOut) const;
628 SIMD_FORCE_INLINE void deSerializeDouble(const struct btQuaternionDoubleData& dataIn);
631 /**@brief Return the product of two quaternions */
632 SIMD_FORCE_INLINE btQuaternion
633 operator*(const btQuaternion& q1, const btQuaternion& q2)
635 #if defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE)
636 __m128 vQ1 = q1.get128();
637 __m128 vQ2 = q2.get128();
638 __m128 A0, A1, B1, A2, B2;
640 A1 = bt_pshufd_ps(vQ1, BT_SHUFFLE(0, 1, 2, 0)); // X Y z x // vtrn
641 B1 = bt_pshufd_ps(vQ2, BT_SHUFFLE(3, 3, 3, 0)); // W W W X // vdup vext
645 A2 = bt_pshufd_ps(vQ1, BT_SHUFFLE(1, 2, 0, 1)); // Y Z X Y // vext
646 B2 = bt_pshufd_ps(vQ2, BT_SHUFFLE(2, 0, 1, 1)); // z x Y Y // vtrn vdup
650 B1 = bt_pshufd_ps(vQ1, BT_SHUFFLE(2, 0, 1, 2)); // z x Y Z // vtrn vext
651 B2 = bt_pshufd_ps(vQ2, BT_SHUFFLE(1, 2, 0, 2)); // Y Z x z // vext vtrn
653 B1 = B1 * B2; // A3 *= B3
655 A0 = bt_splat_ps(vQ1, 3); // A0
656 A0 = A0 * vQ2; // A0 * B0
658 A1 = A1 + A2; // AB12
659 A0 = A0 - B1; // AB03 = AB0 - AB3
661 A1 = _mm_xor_ps(A1, vPPPM); // change sign of the last element
662 A0 = A0 + A1; // AB03 + AB12
664 return btQuaternion(A0);
666 #elif defined(BT_USE_NEON)
668 float32x4_t vQ1 = q1.get128();
669 float32x4_t vQ2 = q2.get128();
670 float32x4_t A0, A1, B1, A2, B2, A3, B3;
671 float32x2_t vQ1zx, vQ2wx, vQ1yz, vQ2zx, vQ2yz, vQ2xz;
675 tmp = vtrn_f32(vget_high_f32(vQ1), vget_low_f32(vQ1)); // {z x}, {w y}
678 tmp = vtrn_f32(vget_high_f32(vQ2), vget_low_f32(vQ2)); // {z x}, {w y}
681 vQ2wx = vext_f32(vget_high_f32(vQ2), vget_low_f32(vQ2), 1);
683 vQ1yz = vext_f32(vget_low_f32(vQ1), vget_high_f32(vQ1), 1);
685 vQ2yz = vext_f32(vget_low_f32(vQ2), vget_high_f32(vQ2), 1);
686 vQ2xz = vext_f32(vQ2zx, vQ2zx, 1);
688 A1 = vcombine_f32(vget_low_f32(vQ1), vQ1zx); // X Y z x
689 B1 = vcombine_f32(vdup_lane_f32(vget_high_f32(vQ2), 1), vQ2wx); // W W W X
691 A2 = vcombine_f32(vQ1yz, vget_low_f32(vQ1));
692 B2 = vcombine_f32(vQ2zx, vdup_lane_f32(vget_low_f32(vQ2), 1));
694 A3 = vcombine_f32(vQ1zx, vQ1yz); // Z X Y Z
695 B3 = vcombine_f32(vQ2yz, vQ2xz); // Y Z x z
697 A1 = vmulq_f32(A1, B1);
698 A2 = vmulq_f32(A2, B2);
699 A3 = vmulq_f32(A3, B3); // A3 *= B3
700 A0 = vmulq_lane_f32(vQ2, vget_high_f32(vQ1), 1); // A0 * B0
702 A1 = vaddq_f32(A1, A2); // AB12 = AB1 + AB2
703 A0 = vsubq_f32(A0, A3); // AB03 = AB0 - AB3
705 // change the sign of the last element
706 A1 = (btSimdFloat4)veorq_s32((int32x4_t)A1, (int32x4_t)vPPPM);
707 A0 = vaddq_f32(A0, A1); // AB03 + AB12
709 return btQuaternion(A0);
713 q1.w() * q2.x() + q1.x() * q2.w() + q1.y() * q2.z() - q1.z() * q2.y(),
714 q1.w() * q2.y() + q1.y() * q2.w() + q1.z() * q2.x() - q1.x() * q2.z(),
715 q1.w() * q2.z() + q1.z() * q2.w() + q1.x() * q2.y() - q1.y() * q2.x(),
716 q1.w() * q2.w() - q1.x() * q2.x() - q1.y() * q2.y() - q1.z() * q2.z());
720 SIMD_FORCE_INLINE btQuaternion
721 operator*(const btQuaternion& q, const btVector3& w)
723 #if defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE)
724 __m128 vQ1 = q.get128();
725 __m128 vQ2 = w.get128();
726 __m128 A1, B1, A2, B2, A3, B3;
728 A1 = bt_pshufd_ps(vQ1, BT_SHUFFLE(3, 3, 3, 0));
729 B1 = bt_pshufd_ps(vQ2, BT_SHUFFLE(0, 1, 2, 0));
733 A2 = bt_pshufd_ps(vQ1, BT_SHUFFLE(1, 2, 0, 1));
734 B2 = bt_pshufd_ps(vQ2, BT_SHUFFLE(2, 0, 1, 1));
738 A3 = bt_pshufd_ps(vQ1, BT_SHUFFLE(2, 0, 1, 2));
739 B3 = bt_pshufd_ps(vQ2, BT_SHUFFLE(1, 2, 0, 2));
741 A3 = A3 * B3; // A3 *= B3
743 A1 = A1 + A2; // AB12
744 A1 = _mm_xor_ps(A1, vPPPM); // change sign of the last element
745 A1 = A1 - A3; // AB123 = AB12 - AB3
747 return btQuaternion(A1);
749 #elif defined(BT_USE_NEON)
751 float32x4_t vQ1 = q.get128();
752 float32x4_t vQ2 = w.get128();
753 float32x4_t A1, B1, A2, B2, A3, B3;
754 float32x2_t vQ1wx, vQ2zx, vQ1yz, vQ2yz, vQ1zx, vQ2xz;
756 vQ1wx = vext_f32(vget_high_f32(vQ1), vget_low_f32(vQ1), 1);
760 tmp = vtrn_f32(vget_high_f32(vQ2), vget_low_f32(vQ2)); // {z x}, {w y}
763 tmp = vtrn_f32(vget_high_f32(vQ1), vget_low_f32(vQ1)); // {z x}, {w y}
767 vQ1yz = vext_f32(vget_low_f32(vQ1), vget_high_f32(vQ1), 1);
769 vQ2yz = vext_f32(vget_low_f32(vQ2), vget_high_f32(vQ2), 1);
770 vQ2xz = vext_f32(vQ2zx, vQ2zx, 1);
772 A1 = vcombine_f32(vdup_lane_f32(vget_high_f32(vQ1), 1), vQ1wx); // W W W X
773 B1 = vcombine_f32(vget_low_f32(vQ2), vQ2zx); // X Y z x
775 A2 = vcombine_f32(vQ1yz, vget_low_f32(vQ1));
776 B2 = vcombine_f32(vQ2zx, vdup_lane_f32(vget_low_f32(vQ2), 1));
778 A3 = vcombine_f32(vQ1zx, vQ1yz); // Z X Y Z
779 B3 = vcombine_f32(vQ2yz, vQ2xz); // Y Z x z
781 A1 = vmulq_f32(A1, B1);
782 A2 = vmulq_f32(A2, B2);
783 A3 = vmulq_f32(A3, B3); // A3 *= B3
785 A1 = vaddq_f32(A1, A2); // AB12 = AB1 + AB2
787 // change the sign of the last element
788 A1 = (btSimdFloat4)veorq_s32((int32x4_t)A1, (int32x4_t)vPPPM);
790 A1 = vsubq_f32(A1, A3); // AB123 = AB12 - AB3
792 return btQuaternion(A1);
796 q.w() * w.x() + q.y() * w.z() - q.z() * w.y(),
797 q.w() * w.y() + q.z() * w.x() - q.x() * w.z(),
798 q.w() * w.z() + q.x() * w.y() - q.y() * w.x(),
799 -q.x() * w.x() - q.y() * w.y() - q.z() * w.z());
803 SIMD_FORCE_INLINE btQuaternion
804 operator*(const btVector3& w, const btQuaternion& q)
806 #if defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE)
807 __m128 vQ1 = w.get128();
808 __m128 vQ2 = q.get128();
809 __m128 A1, B1, A2, B2, A3, B3;
811 A1 = bt_pshufd_ps(vQ1, BT_SHUFFLE(0, 1, 2, 0)); // X Y z x
812 B1 = bt_pshufd_ps(vQ2, BT_SHUFFLE(3, 3, 3, 0)); // W W W X
816 A2 = bt_pshufd_ps(vQ1, BT_SHUFFLE(1, 2, 0, 1));
817 B2 = bt_pshufd_ps(vQ2, BT_SHUFFLE(2, 0, 1, 1));
821 A3 = bt_pshufd_ps(vQ1, BT_SHUFFLE(2, 0, 1, 2));
822 B3 = bt_pshufd_ps(vQ2, BT_SHUFFLE(1, 2, 0, 2));
824 A3 = A3 * B3; // A3 *= B3
826 A1 = A1 + A2; // AB12
827 A1 = _mm_xor_ps(A1, vPPPM); // change sign of the last element
828 A1 = A1 - A3; // AB123 = AB12 - AB3
830 return btQuaternion(A1);
832 #elif defined(BT_USE_NEON)
834 float32x4_t vQ1 = w.get128();
835 float32x4_t vQ2 = q.get128();
836 float32x4_t A1, B1, A2, B2, A3, B3;
837 float32x2_t vQ1zx, vQ2wx, vQ1yz, vQ2zx, vQ2yz, vQ2xz;
842 tmp = vtrn_f32(vget_high_f32(vQ1), vget_low_f32(vQ1)); // {z x}, {w y}
845 tmp = vtrn_f32(vget_high_f32(vQ2), vget_low_f32(vQ2)); // {z x}, {w y}
848 vQ2wx = vext_f32(vget_high_f32(vQ2), vget_low_f32(vQ2), 1);
850 vQ1yz = vext_f32(vget_low_f32(vQ1), vget_high_f32(vQ1), 1);
852 vQ2yz = vext_f32(vget_low_f32(vQ2), vget_high_f32(vQ2), 1);
853 vQ2xz = vext_f32(vQ2zx, vQ2zx, 1);
855 A1 = vcombine_f32(vget_low_f32(vQ1), vQ1zx); // X Y z x
856 B1 = vcombine_f32(vdup_lane_f32(vget_high_f32(vQ2), 1), vQ2wx); // W W W X
858 A2 = vcombine_f32(vQ1yz, vget_low_f32(vQ1));
859 B2 = vcombine_f32(vQ2zx, vdup_lane_f32(vget_low_f32(vQ2), 1));
861 A3 = vcombine_f32(vQ1zx, vQ1yz); // Z X Y Z
862 B3 = vcombine_f32(vQ2yz, vQ2xz); // Y Z x z
864 A1 = vmulq_f32(A1, B1);
865 A2 = vmulq_f32(A2, B2);
866 A3 = vmulq_f32(A3, B3); // A3 *= B3
868 A1 = vaddq_f32(A1, A2); // AB12 = AB1 + AB2
870 // change the sign of the last element
871 A1 = (btSimdFloat4)veorq_s32((int32x4_t)A1, (int32x4_t)vPPPM);
873 A1 = vsubq_f32(A1, A3); // AB123 = AB12 - AB3
875 return btQuaternion(A1);
879 +w.x() * q.w() + w.y() * q.z() - w.z() * q.y(),
880 +w.y() * q.w() + w.z() * q.x() - w.x() * q.z(),
881 +w.z() * q.w() + w.x() * q.y() - w.y() * q.x(),
882 -w.x() * q.x() - w.y() * q.y() - w.z() * q.z());
886 /**@brief Calculate the dot product between two quaternions */
887 SIMD_FORCE_INLINE btScalar
888 dot(const btQuaternion& q1, const btQuaternion& q2)
893 /**@brief Return the length of a quaternion */
894 SIMD_FORCE_INLINE btScalar
895 length(const btQuaternion& q)
900 /**@brief Return the angle between two quaternions*/
901 SIMD_FORCE_INLINE btScalar
902 btAngle(const btQuaternion& q1, const btQuaternion& q2)
907 /**@brief Return the inverse of a quaternion*/
908 SIMD_FORCE_INLINE btQuaternion
909 inverse(const btQuaternion& q)
914 /**@brief Return the result of spherical linear interpolation betwen two quaternions
915 * @param q1 The first quaternion
916 * @param q2 The second quaternion
917 * @param t The ration between q1 and q2. t = 0 return q1, t=1 returns q2
918 * Slerp assumes constant velocity between positions. */
919 SIMD_FORCE_INLINE btQuaternion
920 slerp(const btQuaternion& q1, const btQuaternion& q2, const btScalar& t)
922 return q1.slerp(q2, t);
925 SIMD_FORCE_INLINE btVector3
926 quatRotate(const btQuaternion& rotation, const btVector3& v)
928 btQuaternion q = rotation * v;
929 q *= rotation.inverse();
930 #if defined BT_USE_SIMD_VECTOR3 && defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE)
931 return btVector3(_mm_and_ps(q.get128(), btvFFF0fMask));
932 #elif defined(BT_USE_NEON)
933 return btVector3((float32x4_t)vandq_s32((int32x4_t)q.get128(), btvFFF0Mask));
935 return btVector3(q.getX(), q.getY(), q.getZ());
939 SIMD_FORCE_INLINE btQuaternion
940 shortestArcQuat(const btVector3& v0, const btVector3& v1) // Game Programming Gems 2.10. make sure v0,v1 are normalized
942 btVector3 c = v0.cross(v1);
943 btScalar d = v0.dot(v1);
945 if (d < -1.0 + SIMD_EPSILON)
948 btPlaneSpace1(v0, n, unused);
949 return btQuaternion(n.x(), n.y(), n.z(), 0.0f); // just pick any vector that is orthogonal to v0
952 btScalar s = btSqrt((1.0f + d) * 2.0f);
953 btScalar rs = 1.0f / s;
955 return btQuaternion(c.getX() * rs, c.getY() * rs, c.getZ() * rs, s * 0.5f);
958 SIMD_FORCE_INLINE btQuaternion
959 shortestArcQuatNormalize2(btVector3& v0, btVector3& v1)
963 return shortestArcQuat(v0, v1);
966 struct btQuaternionFloatData
971 struct btQuaternionDoubleData
976 SIMD_FORCE_INLINE void btQuaternion::serializeFloat(struct btQuaternionFloatData& dataOut) const
978 ///could also do a memcpy, check if it is worth it
979 for (int i = 0; i < 4; i++)
980 dataOut.m_floats[i] = float(m_floats[i]);
983 SIMD_FORCE_INLINE void btQuaternion::deSerializeFloat(const struct btQuaternionFloatData& dataIn)
985 for (int i = 0; i < 4; i++)
986 m_floats[i] = btScalar(dataIn.m_floats[i]);
989 SIMD_FORCE_INLINE void btQuaternion::serializeDouble(struct btQuaternionDoubleData& dataOut) const
991 ///could also do a memcpy, check if it is worth it
992 for (int i = 0; i < 4; i++)
993 dataOut.m_floats[i] = double(m_floats[i]);
996 SIMD_FORCE_INLINE void btQuaternion::deSerializeDouble(const struct btQuaternionDoubleData& dataIn)
998 for (int i = 0; i < 4; i++)
999 m_floats[i] = btScalar(dataIn.m_floats[i]);
1002 SIMD_FORCE_INLINE void btQuaternion::serialize(struct btQuaternionData& dataOut) const
1004 ///could also do a memcpy, check if it is worth it
1005 for (int i = 0; i < 4; i++)
1006 dataOut.m_floats[i] = m_floats[i];
1009 SIMD_FORCE_INLINE void btQuaternion::deSerialize(const struct btQuaternionFloatData& dataIn)
1011 for (int i = 0; i < 4; i++)
1012 m_floats[i] = (btScalar)dataIn.m_floats[i];
1015 SIMD_FORCE_INLINE void btQuaternion::deSerialize(const struct btQuaternionDoubleData& dataIn)
1017 for (int i = 0; i < 4; i++)
1018 m_floats[i] = (btScalar)dataIn.m_floats[i];
1021 #endif //BT_SIMD__QUATERNION_H_