2 Copyright (c) 2003-2006 Gino van den Bergen / Erwin Coumans http://continuousphysics.com/Bullet/
4 This software is provided 'as-is', without any express or implied warranty.
5 In no event will the authors be held liable for any damages arising from the use of this software.
6 Permission is granted to anyone to use this software for any purpose,
7 including commercial applications, and to alter it and redistribute it freely,
8 subject to the following restrictions:
10 1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
11 2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
12 3. This notice may not be removed or altered from any source distribution.
17 #ifndef BT_SIMD__QUATERNION_H_
18 #define BT_SIMD__QUATERNION_H_
21 #include "btVector3.h"
22 #include "btQuadWord.h"
30 const __m128 ATTRIBUTE_ALIGNED16(vOnes) = {1.0f, 1.0f, 1.0f, 1.0f};
34 #if defined(BT_USE_SSE) || defined(BT_USE_NEON)
36 const btSimdFloat4 ATTRIBUTE_ALIGNED16(vQInv) = {-0.0f, -0.0f, -0.0f, +0.0f};
37 const btSimdFloat4 ATTRIBUTE_ALIGNED16(vPPPM) = {+0.0f, +0.0f, +0.0f, -0.0f};
41 /**@brief The btQuaternion implements quaternion to perform linear algebra rotations in combination with btMatrix3x3, btVector3 and btTransform. */
42 class btQuaternion : public btQuadWord {
44 /**@brief No initialization constructor */
47 #if (defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE))|| defined(BT_USE_NEON)
49 SIMD_FORCE_INLINE btQuaternion(const btSimdFloat4 vec)
55 SIMD_FORCE_INLINE btQuaternion(const btQuaternion& rhs)
57 mVec128 = rhs.mVec128;
60 // Assignment Operator
61 SIMD_FORCE_INLINE btQuaternion&
62 operator=(const btQuaternion& v)
71 // template <typename btScalar>
72 // explicit Quaternion(const btScalar *v) : Tuple4<btScalar>(v) {}
73 /**@brief Constructor from scalars */
74 btQuaternion(const btScalar& _x, const btScalar& _y, const btScalar& _z, const btScalar& _w)
75 : btQuadWord(_x, _y, _z, _w)
77 /**@brief Axis angle Constructor
78 * @param axis The axis which the rotation is around
79 * @param angle The magnitude of the rotation around the angle (Radians) */
80 btQuaternion(const btVector3& _axis, const btScalar& _angle)
82 setRotation(_axis, _angle);
84 /**@brief Constructor from Euler angles
85 * @param yaw Angle around Y unless BT_EULER_DEFAULT_ZYX defined then Z
86 * @param pitch Angle around X unless BT_EULER_DEFAULT_ZYX defined then Y
87 * @param roll Angle around Z unless BT_EULER_DEFAULT_ZYX defined then X */
88 btQuaternion(const btScalar& yaw, const btScalar& pitch, const btScalar& roll)
90 #ifndef BT_EULER_DEFAULT_ZYX
91 setEuler(yaw, pitch, roll);
93 setEulerZYX(yaw, pitch, roll);
96 /**@brief Set the rotation using axis angle notation
97 * @param axis The axis around which to rotate
98 * @param angle The magnitude of the rotation in Radians */
99 void setRotation(const btVector3& axis, const btScalar& _angle)
101 btScalar d = axis.length();
102 btAssert(d != btScalar(0.0));
103 btScalar s = btSin(_angle * btScalar(0.5)) / d;
104 setValue(axis.x() * s, axis.y() * s, axis.z() * s,
105 btCos(_angle * btScalar(0.5)));
107 /**@brief Set the quaternion using Euler angles
108 * @param yaw Angle around Y
109 * @param pitch Angle around X
110 * @param roll Angle around Z */
111 void setEuler(const btScalar& yaw, const btScalar& pitch, const btScalar& roll)
113 btScalar halfYaw = btScalar(yaw) * btScalar(0.5);
114 btScalar halfPitch = btScalar(pitch) * btScalar(0.5);
115 btScalar halfRoll = btScalar(roll) * btScalar(0.5);
116 btScalar cosYaw = btCos(halfYaw);
117 btScalar sinYaw = btSin(halfYaw);
118 btScalar cosPitch = btCos(halfPitch);
119 btScalar sinPitch = btSin(halfPitch);
120 btScalar cosRoll = btCos(halfRoll);
121 btScalar sinRoll = btSin(halfRoll);
122 setValue(cosRoll * sinPitch * cosYaw + sinRoll * cosPitch * sinYaw,
123 cosRoll * cosPitch * sinYaw - sinRoll * sinPitch * cosYaw,
124 sinRoll * cosPitch * cosYaw - cosRoll * sinPitch * sinYaw,
125 cosRoll * cosPitch * cosYaw + sinRoll * sinPitch * sinYaw);
127 /**@brief Set the quaternion using euler angles
128 * @param yaw Angle around Z
129 * @param pitch Angle around Y
130 * @param roll Angle around X */
131 void setEulerZYX(const btScalar& yaw, const btScalar& pitch, const btScalar& roll)
133 btScalar halfYaw = btScalar(yaw) * btScalar(0.5);
134 btScalar halfPitch = btScalar(pitch) * btScalar(0.5);
135 btScalar halfRoll = btScalar(roll) * btScalar(0.5);
136 btScalar cosYaw = btCos(halfYaw);
137 btScalar sinYaw = btSin(halfYaw);
138 btScalar cosPitch = btCos(halfPitch);
139 btScalar sinPitch = btSin(halfPitch);
140 btScalar cosRoll = btCos(halfRoll);
141 btScalar sinRoll = btSin(halfRoll);
142 setValue(sinRoll * cosPitch * cosYaw - cosRoll * sinPitch * sinYaw, //x
143 cosRoll * sinPitch * cosYaw + sinRoll * cosPitch * sinYaw, //y
144 cosRoll * cosPitch * sinYaw - sinRoll * sinPitch * cosYaw, //z
145 cosRoll * cosPitch * cosYaw + sinRoll * sinPitch * sinYaw); //formerly yzx
147 /**@brief Add two quaternions
148 * @param q The quaternion to add to this one */
149 SIMD_FORCE_INLINE btQuaternion& operator+=(const btQuaternion& q)
151 #if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
152 mVec128 = _mm_add_ps(mVec128, q.mVec128);
153 #elif defined(BT_USE_NEON)
154 mVec128 = vaddq_f32(mVec128, q.mVec128);
156 m_floats[0] += q.x();
157 m_floats[1] += q.y();
158 m_floats[2] += q.z();
159 m_floats[3] += q.m_floats[3];
164 /**@brief Subtract out a quaternion
165 * @param q The quaternion to subtract from this one */
166 btQuaternion& operator-=(const btQuaternion& q)
168 #if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
169 mVec128 = _mm_sub_ps(mVec128, q.mVec128);
170 #elif defined(BT_USE_NEON)
171 mVec128 = vsubq_f32(mVec128, q.mVec128);
173 m_floats[0] -= q.x();
174 m_floats[1] -= q.y();
175 m_floats[2] -= q.z();
176 m_floats[3] -= q.m_floats[3];
181 /**@brief Scale this quaternion
182 * @param s The scalar to scale by */
183 btQuaternion& operator*=(const btScalar& s)
185 #if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
186 __m128 vs = _mm_load_ss(&s); // (S 0 0 0)
187 vs = bt_pshufd_ps(vs, 0); // (S S S S)
188 mVec128 = _mm_mul_ps(mVec128, vs);
189 #elif defined(BT_USE_NEON)
190 mVec128 = vmulq_n_f32(mVec128, s);
200 /**@brief Multiply this quaternion by q on the right
201 * @param q The other quaternion
202 * Equivilant to this = this * q */
203 btQuaternion& operator*=(const btQuaternion& q)
205 #if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
206 __m128 vQ2 = q.get128();
208 __m128 A1 = bt_pshufd_ps(mVec128, BT_SHUFFLE(0,1,2,0));
209 __m128 B1 = bt_pshufd_ps(vQ2, BT_SHUFFLE(3,3,3,0));
213 __m128 A2 = bt_pshufd_ps(mVec128, BT_SHUFFLE(1,2,0,1));
214 __m128 B2 = bt_pshufd_ps(vQ2, BT_SHUFFLE(2,0,1,1));
218 B1 = bt_pshufd_ps(mVec128, BT_SHUFFLE(2,0,1,2));
219 B2 = bt_pshufd_ps(vQ2, BT_SHUFFLE(1,2,0,2));
221 B1 = B1 * B2; // A3 *= B3
223 mVec128 = bt_splat_ps(mVec128, 3); // A0
224 mVec128 = mVec128 * vQ2; // A0 * B0
226 A1 = A1 + A2; // AB12
227 mVec128 = mVec128 - B1; // AB03 = AB0 - AB3
228 A1 = _mm_xor_ps(A1, vPPPM); // change sign of the last element
229 mVec128 = mVec128+ A1; // AB03 + AB12
231 #elif defined(BT_USE_NEON)
233 float32x4_t vQ1 = mVec128;
234 float32x4_t vQ2 = q.get128();
235 float32x4_t A0, A1, B1, A2, B2, A3, B3;
236 float32x2_t vQ1zx, vQ2wx, vQ1yz, vQ2zx, vQ2yz, vQ2xz;
240 tmp = vtrn_f32( vget_high_f32(vQ1), vget_low_f32(vQ1) ); // {z x}, {w y}
243 tmp = vtrn_f32( vget_high_f32(vQ2), vget_low_f32(vQ2) ); // {z x}, {w y}
246 vQ2wx = vext_f32(vget_high_f32(vQ2), vget_low_f32(vQ2), 1);
248 vQ1yz = vext_f32(vget_low_f32(vQ1), vget_high_f32(vQ1), 1);
250 vQ2yz = vext_f32(vget_low_f32(vQ2), vget_high_f32(vQ2), 1);
251 vQ2xz = vext_f32(vQ2zx, vQ2zx, 1);
253 A1 = vcombine_f32(vget_low_f32(vQ1), vQ1zx); // X Y z x
254 B1 = vcombine_f32(vdup_lane_f32(vget_high_f32(vQ2), 1), vQ2wx); // W W W X
256 A2 = vcombine_f32(vQ1yz, vget_low_f32(vQ1));
257 B2 = vcombine_f32(vQ2zx, vdup_lane_f32(vget_low_f32(vQ2), 1));
259 A3 = vcombine_f32(vQ1zx, vQ1yz); // Z X Y Z
260 B3 = vcombine_f32(vQ2yz, vQ2xz); // Y Z x z
262 A1 = vmulq_f32(A1, B1);
263 A2 = vmulq_f32(A2, B2);
264 A3 = vmulq_f32(A3, B3); // A3 *= B3
265 A0 = vmulq_lane_f32(vQ2, vget_high_f32(vQ1), 1); // A0 * B0
267 A1 = vaddq_f32(A1, A2); // AB12 = AB1 + AB2
268 A0 = vsubq_f32(A0, A3); // AB03 = AB0 - AB3
270 // change the sign of the last element
271 A1 = (btSimdFloat4)veorq_s32((int32x4_t)A1, (int32x4_t)vPPPM);
272 A0 = vaddq_f32(A0, A1); // AB03 + AB12
277 m_floats[3] * q.x() + m_floats[0] * q.m_floats[3] + m_floats[1] * q.z() - m_floats[2] * q.y(),
278 m_floats[3] * q.y() + m_floats[1] * q.m_floats[3] + m_floats[2] * q.x() - m_floats[0] * q.z(),
279 m_floats[3] * q.z() + m_floats[2] * q.m_floats[3] + m_floats[0] * q.y() - m_floats[1] * q.x(),
280 m_floats[3] * q.m_floats[3] - m_floats[0] * q.x() - m_floats[1] * q.y() - m_floats[2] * q.z());
284 /**@brief Return the dot product between this quaternion and another
285 * @param q The other quaternion */
286 btScalar dot(const btQuaternion& q) const
288 #if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
291 vd = _mm_mul_ps(mVec128, q.mVec128);
293 __m128 t = _mm_movehl_ps(vd, vd);
294 vd = _mm_add_ps(vd, t);
295 t = _mm_shuffle_ps(vd, vd, 0x55);
296 vd = _mm_add_ss(vd, t);
298 return _mm_cvtss_f32(vd);
299 #elif defined(BT_USE_NEON)
300 float32x4_t vd = vmulq_f32(mVec128, q.mVec128);
301 float32x2_t x = vpadd_f32(vget_low_f32(vd), vget_high_f32(vd));
303 return vget_lane_f32(x, 0);
305 return m_floats[0] * q.x() +
306 m_floats[1] * q.y() +
307 m_floats[2] * q.z() +
308 m_floats[3] * q.m_floats[3];
312 /**@brief Return the length squared of the quaternion */
313 btScalar length2() const
318 /**@brief Return the length of the quaternion */
319 btScalar length() const
321 return btSqrt(length2());
324 /**@brief Normalize the quaternion
325 * Such that x^2 + y^2 + z^2 +w^2 = 1 */
326 btQuaternion& normalize()
328 #if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
331 vd = _mm_mul_ps(mVec128, mVec128);
333 __m128 t = _mm_movehl_ps(vd, vd);
334 vd = _mm_add_ps(vd, t);
335 t = _mm_shuffle_ps(vd, vd, 0x55);
336 vd = _mm_add_ss(vd, t);
338 vd = _mm_sqrt_ss(vd);
339 vd = _mm_div_ss(vOnes, vd);
340 vd = bt_pshufd_ps(vd, 0); // splat
341 mVec128 = _mm_mul_ps(mVec128, vd);
345 return *this /= length();
349 /**@brief Return a scaled version of this quaternion
350 * @param s The scale factor */
351 SIMD_FORCE_INLINE btQuaternion
352 operator*(const btScalar& s) const
354 #if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
355 __m128 vs = _mm_load_ss(&s); // (S 0 0 0)
356 vs = bt_pshufd_ps(vs, 0x00); // (S S S S)
358 return btQuaternion(_mm_mul_ps(mVec128, vs));
359 #elif defined(BT_USE_NEON)
360 return btQuaternion(vmulq_n_f32(mVec128, s));
362 return btQuaternion(x() * s, y() * s, z() * s, m_floats[3] * s);
366 /**@brief Return an inversely scaled versionof this quaternion
367 * @param s The inverse scale factor */
368 btQuaternion operator/(const btScalar& s) const
370 btAssert(s != btScalar(0.0));
371 return *this * (btScalar(1.0) / s);
374 /**@brief Inversely scale this quaternion
375 * @param s The scale factor */
376 btQuaternion& operator/=(const btScalar& s)
378 btAssert(s != btScalar(0.0));
379 return *this *= btScalar(1.0) / s;
382 /**@brief Return a normalized version of this quaternion */
383 btQuaternion normalized() const
385 return *this / length();
387 /**@brief Return the angle between this quaternion and the other
388 * @param q The other quaternion */
389 btScalar angle(const btQuaternion& q) const
391 btScalar s = btSqrt(length2() * q.length2());
392 btAssert(s != btScalar(0.0));
393 return btAcos(dot(q) / s);
395 /**@brief Return the angle of rotation represented by this quaternion */
396 btScalar getAngle() const
398 btScalar s = btScalar(2.) * btAcos(m_floats[3]);
402 /**@brief Return the axis of the rotation represented by this quaternion */
403 btVector3 getAxis() const
405 btScalar s_squared = 1.f-m_floats[3]*m_floats[3];
407 if (s_squared < btScalar(10.) * SIMD_EPSILON) //Check for divide by zero
408 return btVector3(1.0, 0.0, 0.0); // Arbitrary
409 btScalar s = 1.f/btSqrt(s_squared);
410 return btVector3(m_floats[0] * s, m_floats[1] * s, m_floats[2] * s);
413 /**@brief Return the inverse of this quaternion */
414 btQuaternion inverse() const
416 #if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
417 return btQuaternion(_mm_xor_ps(mVec128, vQInv));
418 #elif defined(BT_USE_NEON)
419 return btQuaternion((btSimdFloat4)veorq_s32((int32x4_t)mVec128, (int32x4_t)vQInv));
421 return btQuaternion(-m_floats[0], -m_floats[1], -m_floats[2], m_floats[3]);
425 /**@brief Return the sum of this quaternion and the other
426 * @param q2 The other quaternion */
427 SIMD_FORCE_INLINE btQuaternion
428 operator+(const btQuaternion& q2) const
430 #if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
431 return btQuaternion(_mm_add_ps(mVec128, q2.mVec128));
432 #elif defined(BT_USE_NEON)
433 return btQuaternion(vaddq_f32(mVec128, q2.mVec128));
435 const btQuaternion& q1 = *this;
436 return btQuaternion(q1.x() + q2.x(), q1.y() + q2.y(), q1.z() + q2.z(), q1.m_floats[3] + q2.m_floats[3]);
440 /**@brief Return the difference between this quaternion and the other
441 * @param q2 The other quaternion */
442 SIMD_FORCE_INLINE btQuaternion
443 operator-(const btQuaternion& q2) const
445 #if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
446 return btQuaternion(_mm_sub_ps(mVec128, q2.mVec128));
447 #elif defined(BT_USE_NEON)
448 return btQuaternion(vsubq_f32(mVec128, q2.mVec128));
450 const btQuaternion& q1 = *this;
451 return btQuaternion(q1.x() - q2.x(), q1.y() - q2.y(), q1.z() - q2.z(), q1.m_floats[3] - q2.m_floats[3]);
455 /**@brief Return the negative of this quaternion
456 * This simply negates each element */
457 SIMD_FORCE_INLINE btQuaternion operator-() const
459 #if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
460 return btQuaternion(_mm_xor_ps(mVec128, btvMzeroMask));
461 #elif defined(BT_USE_NEON)
462 return btQuaternion((btSimdFloat4)veorq_s32((int32x4_t)mVec128, (int32x4_t)btvMzeroMask) );
464 const btQuaternion& q2 = *this;
465 return btQuaternion( - q2.x(), - q2.y(), - q2.z(), - q2.m_floats[3]);
468 /**@todo document this and it's use */
469 SIMD_FORCE_INLINE btQuaternion farthest( const btQuaternion& qd) const
471 btQuaternion diff,sum;
474 if( diff.dot(diff) > sum.dot(sum) )
479 /**@todo document this and it's use */
480 SIMD_FORCE_INLINE btQuaternion nearest( const btQuaternion& qd) const
482 btQuaternion diff,sum;
485 if( diff.dot(diff) < sum.dot(sum) )
491 /**@brief Return the quaternion which is the result of Spherical Linear Interpolation between this and the other quaternion
492 * @param q The other quaternion to interpolate with
493 * @param t The ratio between this and q to interpolate. If t = 0 the result is this, if t=1 the result is q.
494 * Slerp interpolates assuming constant velocity. */
495 btQuaternion slerp(const btQuaternion& q, const btScalar& t) const
497 btScalar magnitude = btSqrt(length2() * q.length2());
498 btAssert(magnitude > btScalar(0));
500 btScalar product = dot(q) / magnitude;
501 if (btFabs(product) != btScalar(1))
503 // Take care of long angle case see http://en.wikipedia.org/wiki/Slerp
504 const btScalar sign = (product < 0) ? btScalar(-1) : btScalar(1);
506 const btScalar theta = btAcos(sign * product);
507 const btScalar s1 = btSin(sign * t * theta);
508 const btScalar d = btScalar(1.0) / btSin(theta);
509 const btScalar s0 = btSin((btScalar(1.0) - t) * theta);
512 (m_floats[0] * s0 + q.x() * s1) * d,
513 (m_floats[1] * s0 + q.y() * s1) * d,
514 (m_floats[2] * s0 + q.z() * s1) * d,
515 (m_floats[3] * s0 + q.m_floats[3] * s1) * d);
523 static const btQuaternion& getIdentity()
525 static const btQuaternion identityQuat(btScalar(0.),btScalar(0.),btScalar(0.),btScalar(1.));
529 SIMD_FORCE_INLINE const btScalar& getW() const { return m_floats[3]; }
538 /**@brief Return the product of two quaternions */
539 SIMD_FORCE_INLINE btQuaternion
540 operator*(const btQuaternion& q1, const btQuaternion& q2)
542 #if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
543 __m128 vQ1 = q1.get128();
544 __m128 vQ2 = q2.get128();
545 __m128 A0, A1, B1, A2, B2;
547 A1 = bt_pshufd_ps(vQ1, BT_SHUFFLE(0,1,2,0)); // X Y z x // vtrn
548 B1 = bt_pshufd_ps(vQ2, BT_SHUFFLE(3,3,3,0)); // W W W X // vdup vext
552 A2 = bt_pshufd_ps(vQ1, BT_SHUFFLE(1,2,0,1)); // Y Z X Y // vext
553 B2 = bt_pshufd_ps(vQ2, BT_SHUFFLE(2,0,1,1)); // z x Y Y // vtrn vdup
557 B1 = bt_pshufd_ps(vQ1, BT_SHUFFLE(2,0,1,2)); // z x Y Z // vtrn vext
558 B2 = bt_pshufd_ps(vQ2, BT_SHUFFLE(1,2,0,2)); // Y Z x z // vext vtrn
560 B1 = B1 * B2; // A3 *= B3
562 A0 = bt_splat_ps(vQ1, 3); // A0
563 A0 = A0 * vQ2; // A0 * B0
565 A1 = A1 + A2; // AB12
566 A0 = A0 - B1; // AB03 = AB0 - AB3
568 A1 = _mm_xor_ps(A1, vPPPM); // change sign of the last element
569 A0 = A0 + A1; // AB03 + AB12
571 return btQuaternion(A0);
573 #elif defined(BT_USE_NEON)
575 float32x4_t vQ1 = q1.get128();
576 float32x4_t vQ2 = q2.get128();
577 float32x4_t A0, A1, B1, A2, B2, A3, B3;
578 float32x2_t vQ1zx, vQ2wx, vQ1yz, vQ2zx, vQ2yz, vQ2xz;
582 tmp = vtrn_f32( vget_high_f32(vQ1), vget_low_f32(vQ1) ); // {z x}, {w y}
585 tmp = vtrn_f32( vget_high_f32(vQ2), vget_low_f32(vQ2) ); // {z x}, {w y}
588 vQ2wx = vext_f32(vget_high_f32(vQ2), vget_low_f32(vQ2), 1);
590 vQ1yz = vext_f32(vget_low_f32(vQ1), vget_high_f32(vQ1), 1);
592 vQ2yz = vext_f32(vget_low_f32(vQ2), vget_high_f32(vQ2), 1);
593 vQ2xz = vext_f32(vQ2zx, vQ2zx, 1);
595 A1 = vcombine_f32(vget_low_f32(vQ1), vQ1zx); // X Y z x
596 B1 = vcombine_f32(vdup_lane_f32(vget_high_f32(vQ2), 1), vQ2wx); // W W W X
598 A2 = vcombine_f32(vQ1yz, vget_low_f32(vQ1));
599 B2 = vcombine_f32(vQ2zx, vdup_lane_f32(vget_low_f32(vQ2), 1));
601 A3 = vcombine_f32(vQ1zx, vQ1yz); // Z X Y Z
602 B3 = vcombine_f32(vQ2yz, vQ2xz); // Y Z x z
604 A1 = vmulq_f32(A1, B1);
605 A2 = vmulq_f32(A2, B2);
606 A3 = vmulq_f32(A3, B3); // A3 *= B3
607 A0 = vmulq_lane_f32(vQ2, vget_high_f32(vQ1), 1); // A0 * B0
609 A1 = vaddq_f32(A1, A2); // AB12 = AB1 + AB2
610 A0 = vsubq_f32(A0, A3); // AB03 = AB0 - AB3
612 // change the sign of the last element
613 A1 = (btSimdFloat4)veorq_s32((int32x4_t)A1, (int32x4_t)vPPPM);
614 A0 = vaddq_f32(A0, A1); // AB03 + AB12
616 return btQuaternion(A0);
620 q1.w() * q2.x() + q1.x() * q2.w() + q1.y() * q2.z() - q1.z() * q2.y(),
621 q1.w() * q2.y() + q1.y() * q2.w() + q1.z() * q2.x() - q1.x() * q2.z(),
622 q1.w() * q2.z() + q1.z() * q2.w() + q1.x() * q2.y() - q1.y() * q2.x(),
623 q1.w() * q2.w() - q1.x() * q2.x() - q1.y() * q2.y() - q1.z() * q2.z());
627 SIMD_FORCE_INLINE btQuaternion
628 operator*(const btQuaternion& q, const btVector3& w)
630 #if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
631 __m128 vQ1 = q.get128();
632 __m128 vQ2 = w.get128();
633 __m128 A1, B1, A2, B2, A3, B3;
635 A1 = bt_pshufd_ps(vQ1, BT_SHUFFLE(3,3,3,0));
636 B1 = bt_pshufd_ps(vQ2, BT_SHUFFLE(0,1,2,0));
640 A2 = bt_pshufd_ps(vQ1, BT_SHUFFLE(1,2,0,1));
641 B2 = bt_pshufd_ps(vQ2, BT_SHUFFLE(2,0,1,1));
645 A3 = bt_pshufd_ps(vQ1, BT_SHUFFLE(2,0,1,2));
646 B3 = bt_pshufd_ps(vQ2, BT_SHUFFLE(1,2,0,2));
648 A3 = A3 * B3; // A3 *= B3
650 A1 = A1 + A2; // AB12
651 A1 = _mm_xor_ps(A1, vPPPM); // change sign of the last element
652 A1 = A1 - A3; // AB123 = AB12 - AB3
654 return btQuaternion(A1);
656 #elif defined(BT_USE_NEON)
658 float32x4_t vQ1 = q.get128();
659 float32x4_t vQ2 = w.get128();
660 float32x4_t A1, B1, A2, B2, A3, B3;
661 float32x2_t vQ1wx, vQ2zx, vQ1yz, vQ2yz, vQ1zx, vQ2xz;
663 vQ1wx = vext_f32(vget_high_f32(vQ1), vget_low_f32(vQ1), 1);
667 tmp = vtrn_f32( vget_high_f32(vQ2), vget_low_f32(vQ2) ); // {z x}, {w y}
670 tmp = vtrn_f32( vget_high_f32(vQ1), vget_low_f32(vQ1) ); // {z x}, {w y}
674 vQ1yz = vext_f32(vget_low_f32(vQ1), vget_high_f32(vQ1), 1);
676 vQ2yz = vext_f32(vget_low_f32(vQ2), vget_high_f32(vQ2), 1);
677 vQ2xz = vext_f32(vQ2zx, vQ2zx, 1);
679 A1 = vcombine_f32(vdup_lane_f32(vget_high_f32(vQ1), 1), vQ1wx); // W W W X
680 B1 = vcombine_f32(vget_low_f32(vQ2), vQ2zx); // X Y z x
682 A2 = vcombine_f32(vQ1yz, vget_low_f32(vQ1));
683 B2 = vcombine_f32(vQ2zx, vdup_lane_f32(vget_low_f32(vQ2), 1));
685 A3 = vcombine_f32(vQ1zx, vQ1yz); // Z X Y Z
686 B3 = vcombine_f32(vQ2yz, vQ2xz); // Y Z x z
688 A1 = vmulq_f32(A1, B1);
689 A2 = vmulq_f32(A2, B2);
690 A3 = vmulq_f32(A3, B3); // A3 *= B3
692 A1 = vaddq_f32(A1, A2); // AB12 = AB1 + AB2
694 // change the sign of the last element
695 A1 = (btSimdFloat4)veorq_s32((int32x4_t)A1, (int32x4_t)vPPPM);
697 A1 = vsubq_f32(A1, A3); // AB123 = AB12 - AB3
699 return btQuaternion(A1);
703 q.w() * w.x() + q.y() * w.z() - q.z() * w.y(),
704 q.w() * w.y() + q.z() * w.x() - q.x() * w.z(),
705 q.w() * w.z() + q.x() * w.y() - q.y() * w.x(),
706 -q.x() * w.x() - q.y() * w.y() - q.z() * w.z());
710 SIMD_FORCE_INLINE btQuaternion
711 operator*(const btVector3& w, const btQuaternion& q)
713 #if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
714 __m128 vQ1 = w.get128();
715 __m128 vQ2 = q.get128();
716 __m128 A1, B1, A2, B2, A3, B3;
718 A1 = bt_pshufd_ps(vQ1, BT_SHUFFLE(0,1,2,0)); // X Y z x
719 B1 = bt_pshufd_ps(vQ2, BT_SHUFFLE(3,3,3,0)); // W W W X
723 A2 = bt_pshufd_ps(vQ1, BT_SHUFFLE(1,2,0,1));
724 B2 = bt_pshufd_ps(vQ2, BT_SHUFFLE(2,0,1,1));
728 A3 = bt_pshufd_ps(vQ1, BT_SHUFFLE(2,0,1,2));
729 B3 = bt_pshufd_ps(vQ2, BT_SHUFFLE(1,2,0,2));
731 A3 = A3 * B3; // A3 *= B3
733 A1 = A1 + A2; // AB12
734 A1 = _mm_xor_ps(A1, vPPPM); // change sign of the last element
735 A1 = A1 - A3; // AB123 = AB12 - AB3
737 return btQuaternion(A1);
739 #elif defined(BT_USE_NEON)
741 float32x4_t vQ1 = w.get128();
742 float32x4_t vQ2 = q.get128();
743 float32x4_t A1, B1, A2, B2, A3, B3;
744 float32x2_t vQ1zx, vQ2wx, vQ1yz, vQ2zx, vQ2yz, vQ2xz;
749 tmp = vtrn_f32( vget_high_f32(vQ1), vget_low_f32(vQ1) ); // {z x}, {w y}
752 tmp = vtrn_f32( vget_high_f32(vQ2), vget_low_f32(vQ2) ); // {z x}, {w y}
755 vQ2wx = vext_f32(vget_high_f32(vQ2), vget_low_f32(vQ2), 1);
757 vQ1yz = vext_f32(vget_low_f32(vQ1), vget_high_f32(vQ1), 1);
759 vQ2yz = vext_f32(vget_low_f32(vQ2), vget_high_f32(vQ2), 1);
760 vQ2xz = vext_f32(vQ2zx, vQ2zx, 1);
762 A1 = vcombine_f32(vget_low_f32(vQ1), vQ1zx); // X Y z x
763 B1 = vcombine_f32(vdup_lane_f32(vget_high_f32(vQ2), 1), vQ2wx); // W W W X
765 A2 = vcombine_f32(vQ1yz, vget_low_f32(vQ1));
766 B2 = vcombine_f32(vQ2zx, vdup_lane_f32(vget_low_f32(vQ2), 1));
768 A3 = vcombine_f32(vQ1zx, vQ1yz); // Z X Y Z
769 B3 = vcombine_f32(vQ2yz, vQ2xz); // Y Z x z
771 A1 = vmulq_f32(A1, B1);
772 A2 = vmulq_f32(A2, B2);
773 A3 = vmulq_f32(A3, B3); // A3 *= B3
775 A1 = vaddq_f32(A1, A2); // AB12 = AB1 + AB2
777 // change the sign of the last element
778 A1 = (btSimdFloat4)veorq_s32((int32x4_t)A1, (int32x4_t)vPPPM);
780 A1 = vsubq_f32(A1, A3); // AB123 = AB12 - AB3
782 return btQuaternion(A1);
786 +w.x() * q.w() + w.y() * q.z() - w.z() * q.y(),
787 +w.y() * q.w() + w.z() * q.x() - w.x() * q.z(),
788 +w.z() * q.w() + w.x() * q.y() - w.y() * q.x(),
789 -w.x() * q.x() - w.y() * q.y() - w.z() * q.z());
793 /**@brief Calculate the dot product between two quaternions */
794 SIMD_FORCE_INLINE btScalar
795 dot(const btQuaternion& q1, const btQuaternion& q2)
801 /**@brief Return the length of a quaternion */
802 SIMD_FORCE_INLINE btScalar
803 length(const btQuaternion& q)
808 /**@brief Return the angle between two quaternions*/
809 SIMD_FORCE_INLINE btScalar
810 btAngle(const btQuaternion& q1, const btQuaternion& q2)
815 /**@brief Return the inverse of a quaternion*/
816 SIMD_FORCE_INLINE btQuaternion
817 inverse(const btQuaternion& q)
822 /**@brief Return the result of spherical linear interpolation betwen two quaternions
823 * @param q1 The first quaternion
824 * @param q2 The second quaternion
825 * @param t The ration between q1 and q2. t = 0 return q1, t=1 returns q2
826 * Slerp assumes constant velocity between positions. */
827 SIMD_FORCE_INLINE btQuaternion
828 slerp(const btQuaternion& q1, const btQuaternion& q2, const btScalar& t)
830 return q1.slerp(q2, t);
833 SIMD_FORCE_INLINE btVector3
834 quatRotate(const btQuaternion& rotation, const btVector3& v)
836 btQuaternion q = rotation * v;
837 q *= rotation.inverse();
838 #if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
839 return btVector3(_mm_and_ps(q.get128(), btvFFF0fMask));
840 #elif defined(BT_USE_NEON)
841 return btVector3((float32x4_t)vandq_s32((int32x4_t)q.get128(), btvFFF0Mask));
843 return btVector3(q.getX(),q.getY(),q.getZ());
847 SIMD_FORCE_INLINE btQuaternion
848 shortestArcQuat(const btVector3& v0, const btVector3& v1) // Game Programming Gems 2.10. make sure v0,v1 are normalized
850 btVector3 c = v0.cross(v1);
851 btScalar d = v0.dot(v1);
853 if (d < -1.0 + SIMD_EPSILON)
856 btPlaneSpace1(v0,n,unused);
857 return btQuaternion(n.x(),n.y(),n.z(),0.0f); // just pick any vector that is orthogonal to v0
860 btScalar s = btSqrt((1.0f + d) * 2.0f);
861 btScalar rs = 1.0f / s;
863 return btQuaternion(c.getX()*rs,c.getY()*rs,c.getZ()*rs,s * 0.5f);
866 SIMD_FORCE_INLINE btQuaternion
867 shortestArcQuatNormalize2(btVector3& v0,btVector3& v1)
871 return shortestArcQuat(v0,v1);
874 #endif //BT_SIMD__QUATERNION_H_