2 Copyright (c) 2003-2013 Gino van den Bergen / Erwin Coumans http://bulletphysics.org
4 This software is provided 'as-is', without any express or implied warranty.
5 In no event will the authors be held liable for any damages arising from the use of this software.
6 Permission is granted to anyone to use this software for any purpose,
7 including commercial applications, and to alter it and redistribute it freely,
8 subject to the following restrictions:
10 1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
11 2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
12 3. This notice may not be removed or altered from any source distribution.
15 #ifndef B3_SIMD__QUATERNION_H_
16 #define B3_SIMD__QUATERNION_H_
18 #include "b3Vector3.h"
19 #include "b3QuadWord.h"
23 const __m128 B3_ATTRIBUTE_ALIGNED16(b3vOnes) = {1.0f, 1.0f, 1.0f, 1.0f};
27 #if defined(B3_USE_SSE) || defined(B3_USE_NEON)
29 const b3SimdFloat4 B3_ATTRIBUTE_ALIGNED16(b3vQInv) = {-0.0f, -0.0f, -0.0f, +0.0f};
30 const b3SimdFloat4 B3_ATTRIBUTE_ALIGNED16(b3vPPPM) = {+0.0f, +0.0f, +0.0f, -0.0f};
34 /**@brief The b3Quaternion implements quaternion to perform linear algebra rotations in combination with b3Matrix3x3, b3Vector3 and b3Transform. */
35 class b3Quaternion : public b3QuadWord
38 /**@brief No initialization constructor */
41 #if (defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)) || defined(B3_USE_NEON)
43 B3_FORCE_INLINE b3Quaternion(const b3SimdFloat4 vec)
49 B3_FORCE_INLINE b3Quaternion(const b3Quaternion& rhs)
51 mVec128 = rhs.mVec128;
54 // Assignment Operator
55 B3_FORCE_INLINE b3Quaternion&
56 operator=(const b3Quaternion& v)
65 // template <typename b3Scalar>
66 // explicit Quaternion(const b3Scalar *v) : Tuple4<b3Scalar>(v) {}
67 /**@brief Constructor from scalars */
68 b3Quaternion(const b3Scalar& _x, const b3Scalar& _y, const b3Scalar& _z, const b3Scalar& _w)
69 : b3QuadWord(_x, _y, _z, _w)
71 //b3Assert(!((_x==1.f) && (_y==0.f) && (_z==0.f) && (_w==0.f)));
73 /**@brief Axis angle Constructor
74 * @param axis The axis which the rotation is around
75 * @param angle The magnitude of the rotation around the angle (Radians) */
76 b3Quaternion(const b3Vector3& _axis, const b3Scalar& _angle)
78 setRotation(_axis, _angle);
80 /**@brief Constructor from Euler angles
81 * @param yaw Angle around Y unless B3_EULER_DEFAULT_ZYX defined then Z
82 * @param pitch Angle around X unless B3_EULER_DEFAULT_ZYX defined then Y
83 * @param roll Angle around Z unless B3_EULER_DEFAULT_ZYX defined then X */
84 b3Quaternion(const b3Scalar& yaw, const b3Scalar& pitch, const b3Scalar& roll)
86 #ifndef B3_EULER_DEFAULT_ZYX
87 setEuler(yaw, pitch, roll);
89 setEulerZYX(yaw, pitch, roll);
92 /**@brief Set the rotation using axis angle notation
93 * @param axis The axis around which to rotate
94 * @param angle The magnitude of the rotation in Radians */
95 void setRotation(const b3Vector3& axis1, const b3Scalar& _angle)
97 b3Vector3 axis = axis1;
100 b3Scalar d = axis.length();
101 b3Assert(d != b3Scalar(0.0));
104 setValue(0, 0, 0, 1);
108 b3Scalar s = b3Sin(_angle * b3Scalar(0.5)) / d;
109 setValue(axis.getX() * s, axis.getY() * s, axis.getZ() * s,
110 b3Cos(_angle * b3Scalar(0.5)));
113 /**@brief Set the quaternion using Euler angles
114 * @param yaw Angle around Y
115 * @param pitch Angle around X
116 * @param roll Angle around Z */
117 void setEuler(const b3Scalar& yaw, const b3Scalar& pitch, const b3Scalar& roll)
119 b3Scalar halfYaw = b3Scalar(yaw) * b3Scalar(0.5);
120 b3Scalar halfPitch = b3Scalar(pitch) * b3Scalar(0.5);
121 b3Scalar halfRoll = b3Scalar(roll) * b3Scalar(0.5);
122 b3Scalar cosYaw = b3Cos(halfYaw);
123 b3Scalar sinYaw = b3Sin(halfYaw);
124 b3Scalar cosPitch = b3Cos(halfPitch);
125 b3Scalar sinPitch = b3Sin(halfPitch);
126 b3Scalar cosRoll = b3Cos(halfRoll);
127 b3Scalar sinRoll = b3Sin(halfRoll);
128 setValue(cosRoll * sinPitch * cosYaw + sinRoll * cosPitch * sinYaw,
129 cosRoll * cosPitch * sinYaw - sinRoll * sinPitch * cosYaw,
130 sinRoll * cosPitch * cosYaw - cosRoll * sinPitch * sinYaw,
131 cosRoll * cosPitch * cosYaw + sinRoll * sinPitch * sinYaw);
134 /**@brief Set the quaternion using euler angles
135 * @param yaw Angle around Z
136 * @param pitch Angle around Y
137 * @param roll Angle around X */
138 void setEulerZYX(const b3Scalar& yawZ, const b3Scalar& pitchY, const b3Scalar& rollX)
140 b3Scalar halfYaw = b3Scalar(yawZ) * b3Scalar(0.5);
141 b3Scalar halfPitch = b3Scalar(pitchY) * b3Scalar(0.5);
142 b3Scalar halfRoll = b3Scalar(rollX) * b3Scalar(0.5);
143 b3Scalar cosYaw = b3Cos(halfYaw);
144 b3Scalar sinYaw = b3Sin(halfYaw);
145 b3Scalar cosPitch = b3Cos(halfPitch);
146 b3Scalar sinPitch = b3Sin(halfPitch);
147 b3Scalar cosRoll = b3Cos(halfRoll);
148 b3Scalar sinRoll = b3Sin(halfRoll);
149 setValue(sinRoll * cosPitch * cosYaw - cosRoll * sinPitch * sinYaw, //x
150 cosRoll * sinPitch * cosYaw + sinRoll * cosPitch * sinYaw, //y
151 cosRoll * cosPitch * sinYaw - sinRoll * sinPitch * cosYaw, //z
152 cosRoll * cosPitch * cosYaw + sinRoll * sinPitch * sinYaw); //formerly yzx
156 /**@brief Get the euler angles from this quaternion
157 * @param yaw Angle around Z
158 * @param pitch Angle around Y
159 * @param roll Angle around X */
160 void getEulerZYX(b3Scalar& yawZ, b3Scalar& pitchY, b3Scalar& rollX) const
167 sqx = m_floats[0] * m_floats[0];
168 sqy = m_floats[1] * m_floats[1];
169 sqz = m_floats[2] * m_floats[2];
170 squ = m_floats[3] * m_floats[3];
171 rollX = b3Atan2(2 * (m_floats[1] * m_floats[2] + m_floats[3] * m_floats[0]), squ - sqx - sqy + sqz);
172 sarg = b3Scalar(-2.) * (m_floats[0] * m_floats[2] - m_floats[3] * m_floats[1]);
173 pitchY = sarg <= b3Scalar(-1.0) ? b3Scalar(-0.5) * B3_PI : (sarg >= b3Scalar(1.0) ? b3Scalar(0.5) * B3_PI : b3Asin(sarg));
174 yawZ = b3Atan2(2 * (m_floats[0] * m_floats[1] + m_floats[3] * m_floats[2]), squ + sqx - sqy - sqz);
177 /**@brief Add two quaternions
178 * @param q The quaternion to add to this one */
179 B3_FORCE_INLINE b3Quaternion& operator+=(const b3Quaternion& q)
181 #if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
182 mVec128 = _mm_add_ps(mVec128, q.mVec128);
183 #elif defined(B3_USE_NEON)
184 mVec128 = vaddq_f32(mVec128, q.mVec128);
186 m_floats[0] += q.getX();
187 m_floats[1] += q.getY();
188 m_floats[2] += q.getZ();
189 m_floats[3] += q.m_floats[3];
194 /**@brief Subtract out a quaternion
195 * @param q The quaternion to subtract from this one */
196 b3Quaternion& operator-=(const b3Quaternion& q)
198 #if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
199 mVec128 = _mm_sub_ps(mVec128, q.mVec128);
200 #elif defined(B3_USE_NEON)
201 mVec128 = vsubq_f32(mVec128, q.mVec128);
203 m_floats[0] -= q.getX();
204 m_floats[1] -= q.getY();
205 m_floats[2] -= q.getZ();
206 m_floats[3] -= q.m_floats[3];
211 /**@brief Scale this quaternion
212 * @param s The scalar to scale by */
213 b3Quaternion& operator*=(const b3Scalar& s)
215 #if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
216 __m128 vs = _mm_load_ss(&s); // (S 0 0 0)
217 vs = b3_pshufd_ps(vs, 0); // (S S S S)
218 mVec128 = _mm_mul_ps(mVec128, vs);
219 #elif defined(B3_USE_NEON)
220 mVec128 = vmulq_n_f32(mVec128, s);
230 /**@brief Multiply this quaternion by q on the right
231 * @param q The other quaternion
232 * Equivilant to this = this * q */
233 b3Quaternion& operator*=(const b3Quaternion& q)
235 #if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
236 __m128 vQ2 = q.get128();
238 __m128 A1 = b3_pshufd_ps(mVec128, B3_SHUFFLE(0, 1, 2, 0));
239 __m128 B1 = b3_pshufd_ps(vQ2, B3_SHUFFLE(3, 3, 3, 0));
243 __m128 A2 = b3_pshufd_ps(mVec128, B3_SHUFFLE(1, 2, 0, 1));
244 __m128 B2 = b3_pshufd_ps(vQ2, B3_SHUFFLE(2, 0, 1, 1));
248 B1 = b3_pshufd_ps(mVec128, B3_SHUFFLE(2, 0, 1, 2));
249 B2 = b3_pshufd_ps(vQ2, B3_SHUFFLE(1, 2, 0, 2));
251 B1 = B1 * B2; // A3 *= B3
253 mVec128 = b3_splat_ps(mVec128, 3); // A0
254 mVec128 = mVec128 * vQ2; // A0 * B0
256 A1 = A1 + A2; // AB12
257 mVec128 = mVec128 - B1; // AB03 = AB0 - AB3
258 A1 = _mm_xor_ps(A1, b3vPPPM); // change sign of the last element
259 mVec128 = mVec128 + A1; // AB03 + AB12
261 #elif defined(B3_USE_NEON)
263 float32x4_t vQ1 = mVec128;
264 float32x4_t vQ2 = q.get128();
265 float32x4_t A0, A1, B1, A2, B2, A3, B3;
266 float32x2_t vQ1zx, vQ2wx, vQ1yz, vQ2zx, vQ2yz, vQ2xz;
270 tmp = vtrn_f32(vget_high_f32(vQ1), vget_low_f32(vQ1)); // {z x}, {w y}
273 tmp = vtrn_f32(vget_high_f32(vQ2), vget_low_f32(vQ2)); // {z x}, {w y}
276 vQ2wx = vext_f32(vget_high_f32(vQ2), vget_low_f32(vQ2), 1);
278 vQ1yz = vext_f32(vget_low_f32(vQ1), vget_high_f32(vQ1), 1);
280 vQ2yz = vext_f32(vget_low_f32(vQ2), vget_high_f32(vQ2), 1);
281 vQ2xz = vext_f32(vQ2zx, vQ2zx, 1);
283 A1 = vcombine_f32(vget_low_f32(vQ1), vQ1zx); // X Y z x
284 B1 = vcombine_f32(vdup_lane_f32(vget_high_f32(vQ2), 1), vQ2wx); // W W W X
286 A2 = vcombine_f32(vQ1yz, vget_low_f32(vQ1));
287 B2 = vcombine_f32(vQ2zx, vdup_lane_f32(vget_low_f32(vQ2), 1));
289 A3 = vcombine_f32(vQ1zx, vQ1yz); // Z X Y Z
290 B3 = vcombine_f32(vQ2yz, vQ2xz); // Y Z x z
292 A1 = vmulq_f32(A1, B1);
293 A2 = vmulq_f32(A2, B2);
294 A3 = vmulq_f32(A3, B3); // A3 *= B3
295 A0 = vmulq_lane_f32(vQ2, vget_high_f32(vQ1), 1); // A0 * B0
297 A1 = vaddq_f32(A1, A2); // AB12 = AB1 + AB2
298 A0 = vsubq_f32(A0, A3); // AB03 = AB0 - AB3
300 // change the sign of the last element
301 A1 = (b3SimdFloat4)veorq_s32((int32x4_t)A1, (int32x4_t)b3vPPPM);
302 A0 = vaddq_f32(A0, A1); // AB03 + AB12
307 m_floats[3] * q.getX() + m_floats[0] * q.m_floats[3] + m_floats[1] * q.getZ() - m_floats[2] * q.getY(),
308 m_floats[3] * q.getY() + m_floats[1] * q.m_floats[3] + m_floats[2] * q.getX() - m_floats[0] * q.getZ(),
309 m_floats[3] * q.getZ() + m_floats[2] * q.m_floats[3] + m_floats[0] * q.getY() - m_floats[1] * q.getX(),
310 m_floats[3] * q.m_floats[3] - m_floats[0] * q.getX() - m_floats[1] * q.getY() - m_floats[2] * q.getZ());
314 /**@brief Return the dot product between this quaternion and another
315 * @param q The other quaternion */
316 b3Scalar dot(const b3Quaternion& q) const
318 #if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
321 vd = _mm_mul_ps(mVec128, q.mVec128);
323 __m128 t = _mm_movehl_ps(vd, vd);
324 vd = _mm_add_ps(vd, t);
325 t = _mm_shuffle_ps(vd, vd, 0x55);
326 vd = _mm_add_ss(vd, t);
328 return _mm_cvtss_f32(vd);
329 #elif defined(B3_USE_NEON)
330 float32x4_t vd = vmulq_f32(mVec128, q.mVec128);
331 float32x2_t x = vpadd_f32(vget_low_f32(vd), vget_high_f32(vd));
333 return vget_lane_f32(x, 0);
335 return m_floats[0] * q.getX() +
336 m_floats[1] * q.getY() +
337 m_floats[2] * q.getZ() +
338 m_floats[3] * q.m_floats[3];
342 /**@brief Return the length squared of the quaternion */
343 b3Scalar length2() const
348 /**@brief Return the length of the quaternion */
349 b3Scalar length() const
351 return b3Sqrt(length2());
354 /**@brief Normalize the quaternion
355 * Such that x^2 + y^2 + z^2 +w^2 = 1 */
356 b3Quaternion& normalize()
358 #if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
361 vd = _mm_mul_ps(mVec128, mVec128);
363 __m128 t = _mm_movehl_ps(vd, vd);
364 vd = _mm_add_ps(vd, t);
365 t = _mm_shuffle_ps(vd, vd, 0x55);
366 vd = _mm_add_ss(vd, t);
368 vd = _mm_sqrt_ss(vd);
369 vd = _mm_div_ss(b3vOnes, vd);
370 vd = b3_pshufd_ps(vd, 0); // splat
371 mVec128 = _mm_mul_ps(mVec128, vd);
375 return *this /= length();
379 /**@brief Return a scaled version of this quaternion
380 * @param s The scale factor */
381 B3_FORCE_INLINE b3Quaternion
382 operator*(const b3Scalar& s) const
384 #if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
385 __m128 vs = _mm_load_ss(&s); // (S 0 0 0)
386 vs = b3_pshufd_ps(vs, 0x00); // (S S S S)
388 return b3Quaternion(_mm_mul_ps(mVec128, vs));
389 #elif defined(B3_USE_NEON)
390 return b3Quaternion(vmulq_n_f32(mVec128, s));
392 return b3Quaternion(getX() * s, getY() * s, getZ() * s, m_floats[3] * s);
396 /**@brief Return an inversely scaled versionof this quaternion
397 * @param s The inverse scale factor */
398 b3Quaternion operator/(const b3Scalar& s) const
400 b3Assert(s != b3Scalar(0.0));
401 return *this * (b3Scalar(1.0) / s);
404 /**@brief Inversely scale this quaternion
405 * @param s The scale factor */
406 b3Quaternion& operator/=(const b3Scalar& s)
408 b3Assert(s != b3Scalar(0.0));
409 return *this *= b3Scalar(1.0) / s;
412 /**@brief Return a normalized version of this quaternion */
413 b3Quaternion normalized() const
415 return *this / length();
417 /**@brief Return the angle between this quaternion and the other
418 * @param q The other quaternion */
419 b3Scalar angle(const b3Quaternion& q) const
421 b3Scalar s = b3Sqrt(length2() * q.length2());
422 b3Assert(s != b3Scalar(0.0));
423 return b3Acos(dot(q) / s);
425 /**@brief Return the angle of rotation represented by this quaternion */
426 b3Scalar getAngle() const
428 b3Scalar s = b3Scalar(2.) * b3Acos(m_floats[3]);
432 /**@brief Return the axis of the rotation represented by this quaternion */
433 b3Vector3 getAxis() const
435 b3Scalar s_squared = 1.f - m_floats[3] * m_floats[3];
437 if (s_squared < b3Scalar(10.) * B3_EPSILON) //Check for divide by zero
438 return b3MakeVector3(1.0, 0.0, 0.0); // Arbitrary
439 b3Scalar s = 1.f / b3Sqrt(s_squared);
440 return b3MakeVector3(m_floats[0] * s, m_floats[1] * s, m_floats[2] * s);
443 /**@brief Return the inverse of this quaternion */
444 b3Quaternion inverse() const
446 #if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
447 return b3Quaternion(_mm_xor_ps(mVec128, b3vQInv));
448 #elif defined(B3_USE_NEON)
449 return b3Quaternion((b3SimdFloat4)veorq_s32((int32x4_t)mVec128, (int32x4_t)b3vQInv));
451 return b3Quaternion(-m_floats[0], -m_floats[1], -m_floats[2], m_floats[3]);
455 /**@brief Return the sum of this quaternion and the other
456 * @param q2 The other quaternion */
457 B3_FORCE_INLINE b3Quaternion
458 operator+(const b3Quaternion& q2) const
460 #if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
461 return b3Quaternion(_mm_add_ps(mVec128, q2.mVec128));
462 #elif defined(B3_USE_NEON)
463 return b3Quaternion(vaddq_f32(mVec128, q2.mVec128));
465 const b3Quaternion& q1 = *this;
466 return b3Quaternion(q1.getX() + q2.getX(), q1.getY() + q2.getY(), q1.getZ() + q2.getZ(), q1.m_floats[3] + q2.m_floats[3]);
470 /**@brief Return the difference between this quaternion and the other
471 * @param q2 The other quaternion */
472 B3_FORCE_INLINE b3Quaternion
473 operator-(const b3Quaternion& q2) const
475 #if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
476 return b3Quaternion(_mm_sub_ps(mVec128, q2.mVec128));
477 #elif defined(B3_USE_NEON)
478 return b3Quaternion(vsubq_f32(mVec128, q2.mVec128));
480 const b3Quaternion& q1 = *this;
481 return b3Quaternion(q1.getX() - q2.getX(), q1.getY() - q2.getY(), q1.getZ() - q2.getZ(), q1.m_floats[3] - q2.m_floats[3]);
485 /**@brief Return the negative of this quaternion
486 * This simply negates each element */
487 B3_FORCE_INLINE b3Quaternion operator-() const
489 #if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
490 return b3Quaternion(_mm_xor_ps(mVec128, b3vMzeroMask));
491 #elif defined(B3_USE_NEON)
492 return b3Quaternion((b3SimdFloat4)veorq_s32((int32x4_t)mVec128, (int32x4_t)b3vMzeroMask));
494 const b3Quaternion& q2 = *this;
495 return b3Quaternion(-q2.getX(), -q2.getY(), -q2.getZ(), -q2.m_floats[3]);
498 /**@todo document this and it's use */
499 B3_FORCE_INLINE b3Quaternion farthest(const b3Quaternion& qd) const
501 b3Quaternion diff, sum;
504 if (diff.dot(diff) > sum.dot(sum))
509 /**@todo document this and it's use */
510 B3_FORCE_INLINE b3Quaternion nearest(const b3Quaternion& qd) const
512 b3Quaternion diff, sum;
515 if (diff.dot(diff) < sum.dot(sum))
520 /**@brief Return the quaternion which is the result of Spherical Linear Interpolation between this and the other quaternion
521 * @param q The other quaternion to interpolate with
522 * @param t The ratio between this and q to interpolate. If t = 0 the result is this, if t=1 the result is q.
523 * Slerp interpolates assuming constant velocity. */
524 b3Quaternion slerp(const b3Quaternion& q, const b3Scalar& t) const
526 b3Scalar magnitude = b3Sqrt(length2() * q.length2());
527 b3Assert(magnitude > b3Scalar(0));
529 b3Scalar product = dot(q) / magnitude;
530 if (b3Fabs(product) < b3Scalar(1))
532 // Take care of long angle case see http://en.wikipedia.org/wiki/Slerp
533 const b3Scalar sign = (product < 0) ? b3Scalar(-1) : b3Scalar(1);
535 const b3Scalar theta = b3Acos(sign * product);
536 const b3Scalar s1 = b3Sin(sign * t * theta);
537 const b3Scalar d = b3Scalar(1.0) / b3Sin(theta);
538 const b3Scalar s0 = b3Sin((b3Scalar(1.0) - t) * theta);
541 (m_floats[0] * s0 + q.getX() * s1) * d,
542 (m_floats[1] * s0 + q.getY() * s1) * d,
543 (m_floats[2] * s0 + q.getZ() * s1) * d,
544 (m_floats[3] * s0 + q.m_floats[3] * s1) * d);
552 static const b3Quaternion& getIdentity()
554 static const b3Quaternion identityQuat(b3Scalar(0.), b3Scalar(0.), b3Scalar(0.), b3Scalar(1.));
558 B3_FORCE_INLINE const b3Scalar& getW() const { return m_floats[3]; }
561 /**@brief Return the product of two quaternions */
562 B3_FORCE_INLINE b3Quaternion
563 operator*(const b3Quaternion& q1, const b3Quaternion& q2)
565 #if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
566 __m128 vQ1 = q1.get128();
567 __m128 vQ2 = q2.get128();
568 __m128 A0, A1, B1, A2, B2;
570 A1 = b3_pshufd_ps(vQ1, B3_SHUFFLE(0, 1, 2, 0)); // X Y z x // vtrn
571 B1 = b3_pshufd_ps(vQ2, B3_SHUFFLE(3, 3, 3, 0)); // W W W X // vdup vext
575 A2 = b3_pshufd_ps(vQ1, B3_SHUFFLE(1, 2, 0, 1)); // Y Z X Y // vext
576 B2 = b3_pshufd_ps(vQ2, B3_SHUFFLE(2, 0, 1, 1)); // z x Y Y // vtrn vdup
580 B1 = b3_pshufd_ps(vQ1, B3_SHUFFLE(2, 0, 1, 2)); // z x Y Z // vtrn vext
581 B2 = b3_pshufd_ps(vQ2, B3_SHUFFLE(1, 2, 0, 2)); // Y Z x z // vext vtrn
583 B1 = B1 * B2; // A3 *= B3
585 A0 = b3_splat_ps(vQ1, 3); // A0
586 A0 = A0 * vQ2; // A0 * B0
588 A1 = A1 + A2; // AB12
589 A0 = A0 - B1; // AB03 = AB0 - AB3
591 A1 = _mm_xor_ps(A1, b3vPPPM); // change sign of the last element
592 A0 = A0 + A1; // AB03 + AB12
594 return b3Quaternion(A0);
596 #elif defined(B3_USE_NEON)
598 float32x4_t vQ1 = q1.get128();
599 float32x4_t vQ2 = q2.get128();
600 float32x4_t A0, A1, B1, A2, B2, A3, B3;
601 float32x2_t vQ1zx, vQ2wx, vQ1yz, vQ2zx, vQ2yz, vQ2xz;
605 tmp = vtrn_f32(vget_high_f32(vQ1), vget_low_f32(vQ1)); // {z x}, {w y}
608 tmp = vtrn_f32(vget_high_f32(vQ2), vget_low_f32(vQ2)); // {z x}, {w y}
611 vQ2wx = vext_f32(vget_high_f32(vQ2), vget_low_f32(vQ2), 1);
613 vQ1yz = vext_f32(vget_low_f32(vQ1), vget_high_f32(vQ1), 1);
615 vQ2yz = vext_f32(vget_low_f32(vQ2), vget_high_f32(vQ2), 1);
616 vQ2xz = vext_f32(vQ2zx, vQ2zx, 1);
618 A1 = vcombine_f32(vget_low_f32(vQ1), vQ1zx); // X Y z x
619 B1 = vcombine_f32(vdup_lane_f32(vget_high_f32(vQ2), 1), vQ2wx); // W W W X
621 A2 = vcombine_f32(vQ1yz, vget_low_f32(vQ1));
622 B2 = vcombine_f32(vQ2zx, vdup_lane_f32(vget_low_f32(vQ2), 1));
624 A3 = vcombine_f32(vQ1zx, vQ1yz); // Z X Y Z
625 B3 = vcombine_f32(vQ2yz, vQ2xz); // Y Z x z
627 A1 = vmulq_f32(A1, B1);
628 A2 = vmulq_f32(A2, B2);
629 A3 = vmulq_f32(A3, B3); // A3 *= B3
630 A0 = vmulq_lane_f32(vQ2, vget_high_f32(vQ1), 1); // A0 * B0
632 A1 = vaddq_f32(A1, A2); // AB12 = AB1 + AB2
633 A0 = vsubq_f32(A0, A3); // AB03 = AB0 - AB3
635 // change the sign of the last element
636 A1 = (b3SimdFloat4)veorq_s32((int32x4_t)A1, (int32x4_t)b3vPPPM);
637 A0 = vaddq_f32(A0, A1); // AB03 + AB12
639 return b3Quaternion(A0);
643 q1.getW() * q2.getX() + q1.getX() * q2.getW() + q1.getY() * q2.getZ() - q1.getZ() * q2.getY(),
644 q1.getW() * q2.getY() + q1.getY() * q2.getW() + q1.getZ() * q2.getX() - q1.getX() * q2.getZ(),
645 q1.getW() * q2.getZ() + q1.getZ() * q2.getW() + q1.getX() * q2.getY() - q1.getY() * q2.getX(),
646 q1.getW() * q2.getW() - q1.getX() * q2.getX() - q1.getY() * q2.getY() - q1.getZ() * q2.getZ());
650 B3_FORCE_INLINE b3Quaternion
651 operator*(const b3Quaternion& q, const b3Vector3& w)
653 #if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
654 __m128 vQ1 = q.get128();
655 __m128 vQ2 = w.get128();
656 __m128 A1, B1, A2, B2, A3, B3;
658 A1 = b3_pshufd_ps(vQ1, B3_SHUFFLE(3, 3, 3, 0));
659 B1 = b3_pshufd_ps(vQ2, B3_SHUFFLE(0, 1, 2, 0));
663 A2 = b3_pshufd_ps(vQ1, B3_SHUFFLE(1, 2, 0, 1));
664 B2 = b3_pshufd_ps(vQ2, B3_SHUFFLE(2, 0, 1, 1));
668 A3 = b3_pshufd_ps(vQ1, B3_SHUFFLE(2, 0, 1, 2));
669 B3 = b3_pshufd_ps(vQ2, B3_SHUFFLE(1, 2, 0, 2));
671 A3 = A3 * B3; // A3 *= B3
673 A1 = A1 + A2; // AB12
674 A1 = _mm_xor_ps(A1, b3vPPPM); // change sign of the last element
675 A1 = A1 - A3; // AB123 = AB12 - AB3
677 return b3Quaternion(A1);
679 #elif defined(B3_USE_NEON)
681 float32x4_t vQ1 = q.get128();
682 float32x4_t vQ2 = w.get128();
683 float32x4_t A1, B1, A2, B2, A3, B3;
684 float32x2_t vQ1wx, vQ2zx, vQ1yz, vQ2yz, vQ1zx, vQ2xz;
686 vQ1wx = vext_f32(vget_high_f32(vQ1), vget_low_f32(vQ1), 1);
690 tmp = vtrn_f32(vget_high_f32(vQ2), vget_low_f32(vQ2)); // {z x}, {w y}
693 tmp = vtrn_f32(vget_high_f32(vQ1), vget_low_f32(vQ1)); // {z x}, {w y}
697 vQ1yz = vext_f32(vget_low_f32(vQ1), vget_high_f32(vQ1), 1);
699 vQ2yz = vext_f32(vget_low_f32(vQ2), vget_high_f32(vQ2), 1);
700 vQ2xz = vext_f32(vQ2zx, vQ2zx, 1);
702 A1 = vcombine_f32(vdup_lane_f32(vget_high_f32(vQ1), 1), vQ1wx); // W W W X
703 B1 = vcombine_f32(vget_low_f32(vQ2), vQ2zx); // X Y z x
705 A2 = vcombine_f32(vQ1yz, vget_low_f32(vQ1));
706 B2 = vcombine_f32(vQ2zx, vdup_lane_f32(vget_low_f32(vQ2), 1));
708 A3 = vcombine_f32(vQ1zx, vQ1yz); // Z X Y Z
709 B3 = vcombine_f32(vQ2yz, vQ2xz); // Y Z x z
711 A1 = vmulq_f32(A1, B1);
712 A2 = vmulq_f32(A2, B2);
713 A3 = vmulq_f32(A3, B3); // A3 *= B3
715 A1 = vaddq_f32(A1, A2); // AB12 = AB1 + AB2
717 // change the sign of the last element
718 A1 = (b3SimdFloat4)veorq_s32((int32x4_t)A1, (int32x4_t)b3vPPPM);
720 A1 = vsubq_f32(A1, A3); // AB123 = AB12 - AB3
722 return b3Quaternion(A1);
726 q.getW() * w.getX() + q.getY() * w.getZ() - q.getZ() * w.getY(),
727 q.getW() * w.getY() + q.getZ() * w.getX() - q.getX() * w.getZ(),
728 q.getW() * w.getZ() + q.getX() * w.getY() - q.getY() * w.getX(),
729 -q.getX() * w.getX() - q.getY() * w.getY() - q.getZ() * w.getZ());
733 B3_FORCE_INLINE b3Quaternion
734 operator*(const b3Vector3& w, const b3Quaternion& q)
736 #if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
737 __m128 vQ1 = w.get128();
738 __m128 vQ2 = q.get128();
739 __m128 A1, B1, A2, B2, A3, B3;
741 A1 = b3_pshufd_ps(vQ1, B3_SHUFFLE(0, 1, 2, 0)); // X Y z x
742 B1 = b3_pshufd_ps(vQ2, B3_SHUFFLE(3, 3, 3, 0)); // W W W X
746 A2 = b3_pshufd_ps(vQ1, B3_SHUFFLE(1, 2, 0, 1));
747 B2 = b3_pshufd_ps(vQ2, B3_SHUFFLE(2, 0, 1, 1));
751 A3 = b3_pshufd_ps(vQ1, B3_SHUFFLE(2, 0, 1, 2));
752 B3 = b3_pshufd_ps(vQ2, B3_SHUFFLE(1, 2, 0, 2));
754 A3 = A3 * B3; // A3 *= B3
756 A1 = A1 + A2; // AB12
757 A1 = _mm_xor_ps(A1, b3vPPPM); // change sign of the last element
758 A1 = A1 - A3; // AB123 = AB12 - AB3
760 return b3Quaternion(A1);
762 #elif defined(B3_USE_NEON)
764 float32x4_t vQ1 = w.get128();
765 float32x4_t vQ2 = q.get128();
766 float32x4_t A1, B1, A2, B2, A3, B3;
767 float32x2_t vQ1zx, vQ2wx, vQ1yz, vQ2zx, vQ2yz, vQ2xz;
772 tmp = vtrn_f32(vget_high_f32(vQ1), vget_low_f32(vQ1)); // {z x}, {w y}
775 tmp = vtrn_f32(vget_high_f32(vQ2), vget_low_f32(vQ2)); // {z x}, {w y}
778 vQ2wx = vext_f32(vget_high_f32(vQ2), vget_low_f32(vQ2), 1);
780 vQ1yz = vext_f32(vget_low_f32(vQ1), vget_high_f32(vQ1), 1);
782 vQ2yz = vext_f32(vget_low_f32(vQ2), vget_high_f32(vQ2), 1);
783 vQ2xz = vext_f32(vQ2zx, vQ2zx, 1);
785 A1 = vcombine_f32(vget_low_f32(vQ1), vQ1zx); // X Y z x
786 B1 = vcombine_f32(vdup_lane_f32(vget_high_f32(vQ2), 1), vQ2wx); // W W W X
788 A2 = vcombine_f32(vQ1yz, vget_low_f32(vQ1));
789 B2 = vcombine_f32(vQ2zx, vdup_lane_f32(vget_low_f32(vQ2), 1));
791 A3 = vcombine_f32(vQ1zx, vQ1yz); // Z X Y Z
792 B3 = vcombine_f32(vQ2yz, vQ2xz); // Y Z x z
794 A1 = vmulq_f32(A1, B1);
795 A2 = vmulq_f32(A2, B2);
796 A3 = vmulq_f32(A3, B3); // A3 *= B3
798 A1 = vaddq_f32(A1, A2); // AB12 = AB1 + AB2
800 // change the sign of the last element
801 A1 = (b3SimdFloat4)veorq_s32((int32x4_t)A1, (int32x4_t)b3vPPPM);
803 A1 = vsubq_f32(A1, A3); // AB123 = AB12 - AB3
805 return b3Quaternion(A1);
809 +w.getX() * q.getW() + w.getY() * q.getZ() - w.getZ() * q.getY(),
810 +w.getY() * q.getW() + w.getZ() * q.getX() - w.getX() * q.getZ(),
811 +w.getZ() * q.getW() + w.getX() * q.getY() - w.getY() * q.getX(),
812 -w.getX() * q.getX() - w.getY() * q.getY() - w.getZ() * q.getZ());
816 /**@brief Calculate the dot product between two quaternions */
817 B3_FORCE_INLINE b3Scalar
818 b3Dot(const b3Quaternion& q1, const b3Quaternion& q2)
823 /**@brief Return the length of a quaternion */
824 B3_FORCE_INLINE b3Scalar
825 b3Length(const b3Quaternion& q)
830 /**@brief Return the angle between two quaternions*/
831 B3_FORCE_INLINE b3Scalar
832 b3Angle(const b3Quaternion& q1, const b3Quaternion& q2)
837 /**@brief Return the inverse of a quaternion*/
838 B3_FORCE_INLINE b3Quaternion
839 b3Inverse(const b3Quaternion& q)
844 /**@brief Return the result of spherical linear interpolation betwen two quaternions
845 * @param q1 The first quaternion
846 * @param q2 The second quaternion
847 * @param t The ration between q1 and q2. t = 0 return q1, t=1 returns q2
848 * Slerp assumes constant velocity between positions. */
849 B3_FORCE_INLINE b3Quaternion
850 b3Slerp(const b3Quaternion& q1, const b3Quaternion& q2, const b3Scalar& t)
852 return q1.slerp(q2, t);
855 B3_FORCE_INLINE b3Quaternion
856 b3QuatMul(const b3Quaternion& rot0, const b3Quaternion& rot1)
861 B3_FORCE_INLINE b3Quaternion
862 b3QuatNormalized(const b3Quaternion& orn)
864 return orn.normalized();
867 B3_FORCE_INLINE b3Vector3
868 b3QuatRotate(const b3Quaternion& rotation, const b3Vector3& v)
870 b3Quaternion q = rotation * v;
871 q *= rotation.inverse();
872 #if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
873 return b3MakeVector3(_mm_and_ps(q.get128(), b3vFFF0fMask));
874 #elif defined(B3_USE_NEON)
875 return b3MakeVector3((float32x4_t)vandq_s32((int32x4_t)q.get128(), b3vFFF0Mask));
877 return b3MakeVector3(q.getX(), q.getY(), q.getZ());
881 B3_FORCE_INLINE b3Quaternion
882 b3ShortestArcQuat(const b3Vector3& v0, const b3Vector3& v1) // Game Programming Gems 2.10. make sure v0,v1 are normalized
884 b3Vector3 c = v0.cross(v1);
885 b3Scalar d = v0.dot(v1);
887 if (d < -1.0 + B3_EPSILON)
890 b3PlaneSpace1(v0, n, unused);
891 return b3Quaternion(n.getX(), n.getY(), n.getZ(), 0.0f); // just pick any vector that is orthogonal to v0
894 b3Scalar s = b3Sqrt((1.0f + d) * 2.0f);
895 b3Scalar rs = 1.0f / s;
897 return b3Quaternion(c.getX() * rs, c.getY() * rs, c.getZ() * rs, s * 0.5f);
900 B3_FORCE_INLINE b3Quaternion
901 b3ShortestArcQuatNormalize2(b3Vector3& v0, b3Vector3& v1)
905 return b3ShortestArcQuat(v0, v1);
908 #endif //B3_SIMD__QUATERNION_H_