dali-physics/third-party/bullet3/src/Bullet3Common/b3Quaternion.h

   1 /*
   2 Copyright (c) 2003-2013 Gino van den Bergen / Erwin Coumans  http://bulletphysics.org
   3
   4 This software is provided 'as-is', without any express or implied warranty.
   5 In no event will the authors be held liable for any damages arising from the use of this software.
   6 Permission is granted to anyone to use this software for any purpose,
   7 including commercial applications, and to alter it and redistribute it freely,
   8 subject to the following restrictions:
   9
  10 1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
  11 2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
  12 3. This notice may not be removed or altered from any source distribution.
  13 */
  14
  15 #ifndef B3_SIMD__QUATERNION_H_
  16 #define B3_SIMD__QUATERNION_H_
  17
  18 #include "b3Vector3.h"
  19 #include "b3QuadWord.h"
  20
  21 #ifdef B3_USE_SSE
  22
  23 const __m128 B3_ATTRIBUTE_ALIGNED16(b3vOnes) = {1.0f, 1.0f, 1.0f, 1.0f};
  24
  25 #endif
  26
  27 #if defined(B3_USE_SSE) || defined(B3_USE_NEON)
  28
  29 const b3SimdFloat4 B3_ATTRIBUTE_ALIGNED16(b3vQInv) = {-0.0f, -0.0f, -0.0f, +0.0f};
  30 const b3SimdFloat4 B3_ATTRIBUTE_ALIGNED16(b3vPPPM) = {+0.0f, +0.0f, +0.0f, -0.0f};
  31
  32 #endif
  33
  34 /**@brief The b3Quaternion implements quaternion to perform linear algebra rotations in combination with b3Matrix3x3, b3Vector3 and b3Transform. */
  35 class b3Quaternion : public b3QuadWord
  36 {
  37 public:
  38         /**@brief No initialization constructor */
  39         b3Quaternion() {}
  40
  41 #if (defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)) || defined(B3_USE_NEON)
  42         // Set Vector
  43         B3_FORCE_INLINE b3Quaternion(const b3SimdFloat4 vec)
  44         {
  45                 mVec128 = vec;
  46         }
  47
  48         // Copy constructor
  49         B3_FORCE_INLINE b3Quaternion(const b3Quaternion& rhs)
  50         {
  51                 mVec128 = rhs.mVec128;
  52         }
  53
  54         // Assignment Operator
  55         B3_FORCE_INLINE b3Quaternion&
  56         operator=(const b3Quaternion& v)
  57         {
  58                 mVec128 = v.mVec128;
  59
  60                 return *this;
  61         }
  62
  63 #endif
  64
  65         //              template <typename b3Scalar>
  66         //              explicit Quaternion(const b3Scalar *v) : Tuple4<b3Scalar>(v) {}
  67         /**@brief Constructor from scalars */
  68         b3Quaternion(const b3Scalar& _x, const b3Scalar& _y, const b3Scalar& _z, const b3Scalar& _w)
  69                 : b3QuadWord(_x, _y, _z, _w)
  70         {
  71                 //b3Assert(!((_x==1.f) && (_y==0.f) && (_z==0.f) && (_w==0.f)));
  72         }
  73         /**@brief Axis angle Constructor
  74    * @param axis The axis which the rotation is around
  75    * @param angle The magnitude of the rotation around the angle (Radians) */
  76         b3Quaternion(const b3Vector3& _axis, const b3Scalar& _angle)
  77         {
  78                 setRotation(_axis, _angle);
  79         }
  80         /**@brief Constructor from Euler angles
  81    * @param yaw Angle around Y unless B3_EULER_DEFAULT_ZYX defined then Z
  82    * @param pitch Angle around X unless B3_EULER_DEFAULT_ZYX defined then Y
  83    * @param roll Angle around Z unless B3_EULER_DEFAULT_ZYX defined then X */
  84         b3Quaternion(const b3Scalar& yaw, const b3Scalar& pitch, const b3Scalar& roll)
  85         {
  86 #ifndef B3_EULER_DEFAULT_ZYX
  87                 setEuler(yaw, pitch, roll);
  88 #else
  89                 setEulerZYX(yaw, pitch, roll);
  90 #endif
  91         }
  92         /**@brief Set the rotation using axis angle notation
  93    * @param axis The axis around which to rotate
  94    * @param angle The magnitude of the rotation in Radians */
  95         void setRotation(const b3Vector3& axis1, const b3Scalar& _angle)
  96         {
  97                 b3Vector3 axis = axis1;
  98                 axis.safeNormalize();
  99
 100                 b3Scalar d = axis.length();
 101                 b3Assert(d != b3Scalar(0.0));
 102                 if (d < B3_EPSILON)
 103                 {
 104                         setValue(0, 0, 0, 1);
 105                 }
 106                 else
 107                 {
 108                         b3Scalar s = b3Sin(_angle * b3Scalar(0.5)) / d;
 109                         setValue(axis.getX() * s, axis.getY() * s, axis.getZ() * s,
 110                                 b3Cos(_angle * b3Scalar(0.5)));
 111                 }
 112         }
 113         /**@brief Set the quaternion using Euler angles
 114    * @param yaw Angle around Y
 115    * @param pitch Angle around X
 116    * @param roll Angle around Z */
 117         void setEuler(const b3Scalar& yaw, const b3Scalar& pitch, const b3Scalar& roll)
 118         {
 119                 b3Scalar halfYaw = b3Scalar(yaw) * b3Scalar(0.5);
 120                 b3Scalar halfPitch = b3Scalar(pitch) * b3Scalar(0.5);
 121                 b3Scalar halfRoll = b3Scalar(roll) * b3Scalar(0.5);
 122                 b3Scalar cosYaw = b3Cos(halfYaw);
 123                 b3Scalar sinYaw = b3Sin(halfYaw);
 124                 b3Scalar cosPitch = b3Cos(halfPitch);
 125                 b3Scalar sinPitch = b3Sin(halfPitch);
 126                 b3Scalar cosRoll = b3Cos(halfRoll);
 127                 b3Scalar sinRoll = b3Sin(halfRoll);
 128                 setValue(cosRoll * sinPitch * cosYaw + sinRoll * cosPitch * sinYaw,
 129                                  cosRoll * cosPitch * sinYaw - sinRoll * sinPitch * cosYaw,
 130                                  sinRoll * cosPitch * cosYaw - cosRoll * sinPitch * sinYaw,
 131                                  cosRoll * cosPitch * cosYaw + sinRoll * sinPitch * sinYaw);
 132         }
 133
 134         /**@brief Set the quaternion using euler angles
 135    * @param yaw Angle around Z
 136    * @param pitch Angle around Y
 137    * @param roll Angle around X */
 138         void setEulerZYX(const b3Scalar& yawZ, const b3Scalar& pitchY, const b3Scalar& rollX)
 139         {
 140                 b3Scalar halfYaw = b3Scalar(yawZ) * b3Scalar(0.5);
 141                 b3Scalar halfPitch = b3Scalar(pitchY) * b3Scalar(0.5);
 142                 b3Scalar halfRoll = b3Scalar(rollX) * b3Scalar(0.5);
 143                 b3Scalar cosYaw = b3Cos(halfYaw);
 144                 b3Scalar sinYaw = b3Sin(halfYaw);
 145                 b3Scalar cosPitch = b3Cos(halfPitch);
 146                 b3Scalar sinPitch = b3Sin(halfPitch);
 147                 b3Scalar cosRoll = b3Cos(halfRoll);
 148                 b3Scalar sinRoll = b3Sin(halfRoll);
 149                 setValue(sinRoll * cosPitch * cosYaw - cosRoll * sinPitch * sinYaw,   //x
 150                                  cosRoll * sinPitch * cosYaw + sinRoll * cosPitch * sinYaw,   //y
 151                                  cosRoll * cosPitch * sinYaw - sinRoll * sinPitch * cosYaw,   //z
 152                                  cosRoll * cosPitch * cosYaw + sinRoll * sinPitch * sinYaw);  //formerly yzx
 153                 normalize();
 154         }
 155
 156         /**@brief Get the euler angles from this quaternion
 157            * @param yaw Angle around Z
 158            * @param pitch Angle around Y
 159            * @param roll Angle around X */
 160         void getEulerZYX(b3Scalar& yawZ, b3Scalar& pitchY, b3Scalar& rollX) const
 161         {
 162                 b3Scalar squ;
 163                 b3Scalar sqx;
 164                 b3Scalar sqy;
 165                 b3Scalar sqz;
 166                 b3Scalar sarg;
 167                 sqx = m_floats[0] * m_floats[0];
 168                 sqy = m_floats[1] * m_floats[1];
 169                 sqz = m_floats[2] * m_floats[2];
 170                 squ = m_floats[3] * m_floats[3];
 171                 rollX = b3Atan2(2 * (m_floats[1] * m_floats[2] + m_floats[3] * m_floats[0]), squ - sqx - sqy + sqz);
 172                 sarg = b3Scalar(-2.) * (m_floats[0] * m_floats[2] - m_floats[3] * m_floats[1]);
 173                 pitchY = sarg <= b3Scalar(-1.0) ? b3Scalar(-0.5) * B3_PI : (sarg >= b3Scalar(1.0) ? b3Scalar(0.5) * B3_PI : b3Asin(sarg));
 174                 yawZ = b3Atan2(2 * (m_floats[0] * m_floats[1] + m_floats[3] * m_floats[2]), squ + sqx - sqy - sqz);
 175         }
 176
 177         /**@brief Add two quaternions
 178    * @param q The quaternion to add to this one */
 179         B3_FORCE_INLINE b3Quaternion& operator+=(const b3Quaternion& q)
 180         {
 181 #if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
 182                 mVec128 = _mm_add_ps(mVec128, q.mVec128);
 183 #elif defined(B3_USE_NEON)
 184                 mVec128 = vaddq_f32(mVec128, q.mVec128);
 185 #else
 186                 m_floats[0] += q.getX();
 187                 m_floats[1] += q.getY();
 188                 m_floats[2] += q.getZ();
 189                 m_floats[3] += q.m_floats[3];
 190 #endif
 191                 return *this;
 192         }
 193
 194         /**@brief Subtract out a quaternion
 195    * @param q The quaternion to subtract from this one */
 196         b3Quaternion& operator-=(const b3Quaternion& q)
 197         {
 198 #if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
 199                 mVec128 = _mm_sub_ps(mVec128, q.mVec128);
 200 #elif defined(B3_USE_NEON)
 201                 mVec128 = vsubq_f32(mVec128, q.mVec128);
 202 #else
 203                 m_floats[0] -= q.getX();
 204                 m_floats[1] -= q.getY();
 205                 m_floats[2] -= q.getZ();
 206                 m_floats[3] -= q.m_floats[3];
 207 #endif
 208                 return *this;
 209         }
 210
 211         /**@brief Scale this quaternion
 212    * @param s The scalar to scale by */
 213         b3Quaternion& operator*=(const b3Scalar& s)
 214         {
 215 #if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
 216                 __m128 vs = _mm_load_ss(&s);  //        (S 0 0 0)
 217                 vs = b3_pshufd_ps(vs, 0);     //        (S S S S)
 218                 mVec128 = _mm_mul_ps(mVec128, vs);
 219 #elif defined(B3_USE_NEON)
 220                 mVec128 = vmulq_n_f32(mVec128, s);
 221 #else
 222                 m_floats[0] *= s;
 223                 m_floats[1] *= s;
 224                 m_floats[2] *= s;
 225                 m_floats[3] *= s;
 226 #endif
 227                 return *this;
 228         }
 229
 230         /**@brief Multiply this quaternion by q on the right
 231    * @param q The other quaternion
 232    * Equivilant to this = this * q */
 233         b3Quaternion& operator*=(const b3Quaternion& q)
 234         {
 235 #if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
 236                 __m128 vQ2 = q.get128();
 237
 238                 __m128 A1 = b3_pshufd_ps(mVec128, B3_SHUFFLE(0, 1, 2, 0));
 239                 __m128 B1 = b3_pshufd_ps(vQ2, B3_SHUFFLE(3, 3, 3, 0));
 240
 241                 A1 = A1 * B1;
 242
 243                 __m128 A2 = b3_pshufd_ps(mVec128, B3_SHUFFLE(1, 2, 0, 1));
 244                 __m128 B2 = b3_pshufd_ps(vQ2, B3_SHUFFLE(2, 0, 1, 1));
 245
 246                 A2 = A2 * B2;
 247
 248                 B1 = b3_pshufd_ps(mVec128, B3_SHUFFLE(2, 0, 1, 2));
 249                 B2 = b3_pshufd_ps(vQ2, B3_SHUFFLE(1, 2, 0, 2));
 250
 251                 B1 = B1 * B2;  //       A3 *= B3
 252
 253                 mVec128 = b3_splat_ps(mVec128, 3);  //  A0
 254                 mVec128 = mVec128 * vQ2;            //  A0 * B0
 255
 256                 A1 = A1 + A2;                  //       AB12
 257                 mVec128 = mVec128 - B1;        //       AB03 = AB0 - AB3
 258                 A1 = _mm_xor_ps(A1, b3vPPPM);  //       change sign of the last element
 259                 mVec128 = mVec128 + A1;        //       AB03 + AB12
 260
 261 #elif defined(B3_USE_NEON)
 262
 263                 float32x4_t vQ1 = mVec128;
 264                 float32x4_t vQ2 = q.get128();
 265                 float32x4_t A0, A1, B1, A2, B2, A3, B3;
 266                 float32x2_t vQ1zx, vQ2wx, vQ1yz, vQ2zx, vQ2yz, vQ2xz;
 267
 268                 {
 269                         float32x2x2_t tmp;
 270                         tmp = vtrn_f32(vget_high_f32(vQ1), vget_low_f32(vQ1));  // {z x}, {w y}
 271                         vQ1zx = tmp.val[0];
 272
 273                         tmp = vtrn_f32(vget_high_f32(vQ2), vget_low_f32(vQ2));  // {z x}, {w y}
 274                         vQ2zx = tmp.val[0];
 275                 }
 276                 vQ2wx = vext_f32(vget_high_f32(vQ2), vget_low_f32(vQ2), 1);
 277
 278                 vQ1yz = vext_f32(vget_low_f32(vQ1), vget_high_f32(vQ1), 1);
 279
 280                 vQ2yz = vext_f32(vget_low_f32(vQ2), vget_high_f32(vQ2), 1);
 281                 vQ2xz = vext_f32(vQ2zx, vQ2zx, 1);
 282
 283                 A1 = vcombine_f32(vget_low_f32(vQ1), vQ1zx);                     // X Y  z x
 284                 B1 = vcombine_f32(vdup_lane_f32(vget_high_f32(vQ2), 1), vQ2wx);  // W W  W X
 285
 286                 A2 = vcombine_f32(vQ1yz, vget_low_f32(vQ1));
 287                 B2 = vcombine_f32(vQ2zx, vdup_lane_f32(vget_low_f32(vQ2), 1));
 288
 289                 A3 = vcombine_f32(vQ1zx, vQ1yz);  // Z X Y Z
 290                 B3 = vcombine_f32(vQ2yz, vQ2xz);  // Y Z x z
 291
 292                 A1 = vmulq_f32(A1, B1);
 293                 A2 = vmulq_f32(A2, B2);
 294                 A3 = vmulq_f32(A3, B3);                           //    A3 *= B3
 295                 A0 = vmulq_lane_f32(vQ2, vget_high_f32(vQ1), 1);  //    A0 * B0
 296
 297                 A1 = vaddq_f32(A1, A2);  //     AB12 = AB1 + AB2
 298                 A0 = vsubq_f32(A0, A3);  //     AB03 = AB0 - AB3
 299
 300                 //      change the sign of the last element
 301                 A1 = (b3SimdFloat4)veorq_s32((int32x4_t)A1, (int32x4_t)b3vPPPM);
 302                 A0 = vaddq_f32(A0, A1);  //     AB03 + AB12
 303
 304                 mVec128 = A0;
 305 #else
 306                 setValue(
 307                         m_floats[3] * q.getX() + m_floats[0] * q.m_floats[3] + m_floats[1] * q.getZ() - m_floats[2] * q.getY(),
 308                         m_floats[3] * q.getY() + m_floats[1] * q.m_floats[3] + m_floats[2] * q.getX() - m_floats[0] * q.getZ(),
 309                         m_floats[3] * q.getZ() + m_floats[2] * q.m_floats[3] + m_floats[0] * q.getY() - m_floats[1] * q.getX(),
 310                         m_floats[3] * q.m_floats[3] - m_floats[0] * q.getX() - m_floats[1] * q.getY() - m_floats[2] * q.getZ());
 311 #endif
 312                 return *this;
 313         }
 314         /**@brief Return the dot product between this quaternion and another
 315    * @param q The other quaternion */
 316         b3Scalar dot(const b3Quaternion& q) const
 317         {
 318 #if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
 319                 __m128 vd;
 320
 321                 vd = _mm_mul_ps(mVec128, q.mVec128);
 322
 323                 __m128 t = _mm_movehl_ps(vd, vd);
 324                 vd = _mm_add_ps(vd, t);
 325                 t = _mm_shuffle_ps(vd, vd, 0x55);
 326                 vd = _mm_add_ss(vd, t);
 327
 328                 return _mm_cvtss_f32(vd);
 329 #elif defined(B3_USE_NEON)
 330                 float32x4_t vd = vmulq_f32(mVec128, q.mVec128);
 331                 float32x2_t x = vpadd_f32(vget_low_f32(vd), vget_high_f32(vd));
 332                 x = vpadd_f32(x, x);
 333                 return vget_lane_f32(x, 0);
 334 #else
 335                 return m_floats[0] * q.getX() +
 336                            m_floats[1] * q.getY() +
 337                            m_floats[2] * q.getZ() +
 338                            m_floats[3] * q.m_floats[3];
 339 #endif
 340         }
 341
 342         /**@brief Return the length squared of the quaternion */
 343         b3Scalar length2() const
 344         {
 345                 return dot(*this);
 346         }
 347
 348         /**@brief Return the length of the quaternion */
 349         b3Scalar length() const
 350         {
 351                 return b3Sqrt(length2());
 352         }
 353
 354         /**@brief Normalize the quaternion
 355    * Such that x^2 + y^2 + z^2 +w^2 = 1 */
 356         b3Quaternion& normalize()
 357         {
 358 #if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
 359                 __m128 vd;
 360
 361                 vd = _mm_mul_ps(mVec128, mVec128);
 362
 363                 __m128 t = _mm_movehl_ps(vd, vd);
 364                 vd = _mm_add_ps(vd, t);
 365                 t = _mm_shuffle_ps(vd, vd, 0x55);
 366                 vd = _mm_add_ss(vd, t);
 367
 368                 vd = _mm_sqrt_ss(vd);
 369                 vd = _mm_div_ss(b3vOnes, vd);
 370                 vd = b3_pshufd_ps(vd, 0);  // splat
 371                 mVec128 = _mm_mul_ps(mVec128, vd);
 372
 373                 return *this;
 374 #else
 375                 return *this /= length();
 376 #endif
 377         }
 378
 379         /**@brief Return a scaled version of this quaternion
 380    * @param s The scale factor */
 381         B3_FORCE_INLINE b3Quaternion
 382         operator*(const b3Scalar& s) const
 383         {
 384 #if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
 385                 __m128 vs = _mm_load_ss(&s);  //        (S 0 0 0)
 386                 vs = b3_pshufd_ps(vs, 0x00);  //        (S S S S)
 387
 388                 return b3Quaternion(_mm_mul_ps(mVec128, vs));
 389 #elif defined(B3_USE_NEON)
 390                 return b3Quaternion(vmulq_n_f32(mVec128, s));
 391 #else
 392                 return b3Quaternion(getX() * s, getY() * s, getZ() * s, m_floats[3] * s);
 393 #endif
 394         }
 395
 396         /**@brief Return an inversely scaled versionof this quaternion
 397    * @param s The inverse scale factor */
 398         b3Quaternion operator/(const b3Scalar& s) const
 399         {
 400                 b3Assert(s != b3Scalar(0.0));
 401                 return *this * (b3Scalar(1.0) / s);
 402         }
 403
 404         /**@brief Inversely scale this quaternion
 405    * @param s The scale factor */
 406         b3Quaternion& operator/=(const b3Scalar& s)
 407         {
 408                 b3Assert(s != b3Scalar(0.0));
 409                 return *this *= b3Scalar(1.0) / s;
 410         }
 411
 412         /**@brief Return a normalized version of this quaternion */
 413         b3Quaternion normalized() const
 414         {
 415                 return *this / length();
 416         }
 417         /**@brief Return the angle between this quaternion and the other
 418    * @param q The other quaternion */
 419         b3Scalar angle(const b3Quaternion& q) const
 420         {
 421                 b3Scalar s = b3Sqrt(length2() * q.length2());
 422                 b3Assert(s != b3Scalar(0.0));
 423                 return b3Acos(dot(q) / s);
 424         }
 425         /**@brief Return the angle of rotation represented by this quaternion */
 426         b3Scalar getAngle() const
 427         {
 428                 b3Scalar s = b3Scalar(2.) * b3Acos(m_floats[3]);
 429                 return s;
 430         }
 431
 432         /**@brief Return the axis of the rotation represented by this quaternion */
 433         b3Vector3 getAxis() const
 434         {
 435                 b3Scalar s_squared = 1.f - m_floats[3] * m_floats[3];
 436
 437                 if (s_squared < b3Scalar(10.) * B3_EPSILON)  //Check for divide by zero
 438                         return b3MakeVector3(1.0, 0.0, 0.0);     // Arbitrary
 439                 b3Scalar s = 1.f / b3Sqrt(s_squared);
 440                 return b3MakeVector3(m_floats[0] * s, m_floats[1] * s, m_floats[2] * s);
 441         }
 442
 443         /**@brief Return the inverse of this quaternion */
 444         b3Quaternion inverse() const
 445         {
 446 #if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
 447                 return b3Quaternion(_mm_xor_ps(mVec128, b3vQInv));
 448 #elif defined(B3_USE_NEON)
 449                 return b3Quaternion((b3SimdFloat4)veorq_s32((int32x4_t)mVec128, (int32x4_t)b3vQInv));
 450 #else
 451                 return b3Quaternion(-m_floats[0], -m_floats[1], -m_floats[2], m_floats[3]);
 452 #endif
 453         }
 454
 455         /**@brief Return the sum of this quaternion and the other
 456    * @param q2 The other quaternion */
 457         B3_FORCE_INLINE b3Quaternion
 458         operator+(const b3Quaternion& q2) const
 459         {
 460 #if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
 461                 return b3Quaternion(_mm_add_ps(mVec128, q2.mVec128));
 462 #elif defined(B3_USE_NEON)
 463                 return b3Quaternion(vaddq_f32(mVec128, q2.mVec128));
 464 #else
 465                 const b3Quaternion& q1 = *this;
 466                 return b3Quaternion(q1.getX() + q2.getX(), q1.getY() + q2.getY(), q1.getZ() + q2.getZ(), q1.m_floats[3] + q2.m_floats[3]);
 467 #endif
 468         }
 469
 470         /**@brief Return the difference between this quaternion and the other
 471    * @param q2 The other quaternion */
 472         B3_FORCE_INLINE b3Quaternion
 473         operator-(const b3Quaternion& q2) const
 474         {
 475 #if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
 476                 return b3Quaternion(_mm_sub_ps(mVec128, q2.mVec128));
 477 #elif defined(B3_USE_NEON)
 478                 return b3Quaternion(vsubq_f32(mVec128, q2.mVec128));
 479 #else
 480                 const b3Quaternion& q1 = *this;
 481                 return b3Quaternion(q1.getX() - q2.getX(), q1.getY() - q2.getY(), q1.getZ() - q2.getZ(), q1.m_floats[3] - q2.m_floats[3]);
 482 #endif
 483         }
 484
 485         /**@brief Return the negative of this quaternion
 486    * This simply negates each element */
 487         B3_FORCE_INLINE b3Quaternion operator-() const
 488         {
 489 #if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
 490                 return b3Quaternion(_mm_xor_ps(mVec128, b3vMzeroMask));
 491 #elif defined(B3_USE_NEON)
 492                 return b3Quaternion((b3SimdFloat4)veorq_s32((int32x4_t)mVec128, (int32x4_t)b3vMzeroMask));
 493 #else
 494                 const b3Quaternion& q2 = *this;
 495                 return b3Quaternion(-q2.getX(), -q2.getY(), -q2.getZ(), -q2.m_floats[3]);
 496 #endif
 497         }
 498         /**@todo document this and it's use */
 499         B3_FORCE_INLINE b3Quaternion farthest(const b3Quaternion& qd) const
 500         {
 501                 b3Quaternion diff, sum;
 502                 diff = *this - qd;
 503                 sum = *this + qd;
 504                 if (diff.dot(diff) > sum.dot(sum))
 505                         return qd;
 506                 return (-qd);
 507         }
 508
 509         /**@todo document this and it's use */
 510         B3_FORCE_INLINE b3Quaternion nearest(const b3Quaternion& qd) const
 511         {
 512                 b3Quaternion diff, sum;
 513                 diff = *this - qd;
 514                 sum = *this + qd;
 515                 if (diff.dot(diff) < sum.dot(sum))
 516                         return qd;
 517                 return (-qd);
 518         }
 519
 520         /**@brief Return the quaternion which is the result of Spherical Linear Interpolation between this and the other quaternion
 521    * @param q The other quaternion to interpolate with
 522    * @param t The ratio between this and q to interpolate.  If t = 0 the result is this, if t=1 the result is q.
 523    * Slerp interpolates assuming constant velocity.  */
 524         b3Quaternion slerp(const b3Quaternion& q, const b3Scalar& t) const
 525         {
 526                 b3Scalar magnitude = b3Sqrt(length2() * q.length2());
 527                 b3Assert(magnitude > b3Scalar(0));
 528
 529                 b3Scalar product = dot(q) / magnitude;
 530                 if (b3Fabs(product) < b3Scalar(1))
 531                 {
 532                         // Take care of long angle case see http://en.wikipedia.org/wiki/Slerp
 533                         const b3Scalar sign = (product < 0) ? b3Scalar(-1) : b3Scalar(1);
 534
 535                         const b3Scalar theta = b3Acos(sign * product);
 536                         const b3Scalar s1 = b3Sin(sign * t * theta);
 537                         const b3Scalar d = b3Scalar(1.0) / b3Sin(theta);
 538                         const b3Scalar s0 = b3Sin((b3Scalar(1.0) - t) * theta);
 539
 540                         return b3Quaternion(
 541                                 (m_floats[0] * s0 + q.getX() * s1) * d,
 542                                 (m_floats[1] * s0 + q.getY() * s1) * d,
 543                                 (m_floats[2] * s0 + q.getZ() * s1) * d,
 544                                 (m_floats[3] * s0 + q.m_floats[3] * s1) * d);
 545                 }
 546                 else
 547                 {
 548                         return *this;
 549                 }
 550         }
 551
 552         static const b3Quaternion& getIdentity()
 553         {
 554                 static const b3Quaternion identityQuat(b3Scalar(0.), b3Scalar(0.), b3Scalar(0.), b3Scalar(1.));
 555                 return identityQuat;
 556         }
 557
 558         B3_FORCE_INLINE const b3Scalar& getW() const { return m_floats[3]; }
 559 };
 560
 561 /**@brief Return the product of two quaternions */
 562 B3_FORCE_INLINE b3Quaternion
 563 operator*(const b3Quaternion& q1, const b3Quaternion& q2)
 564 {
 565 #if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
 566         __m128 vQ1 = q1.get128();
 567         __m128 vQ2 = q2.get128();
 568         __m128 A0, A1, B1, A2, B2;
 569
 570         A1 = b3_pshufd_ps(vQ1, B3_SHUFFLE(0, 1, 2, 0));  // X Y  z x     //      vtrn
 571         B1 = b3_pshufd_ps(vQ2, B3_SHUFFLE(3, 3, 3, 0));  // W W  W X     // vdup vext
 572
 573         A1 = A1 * B1;
 574
 575         A2 = b3_pshufd_ps(vQ1, B3_SHUFFLE(1, 2, 0, 1));  // Y Z  X Y     // vext
 576         B2 = b3_pshufd_ps(vQ2, B3_SHUFFLE(2, 0, 1, 1));  // z x  Y Y     // vtrn vdup
 577
 578         A2 = A2 * B2;
 579
 580         B1 = b3_pshufd_ps(vQ1, B3_SHUFFLE(2, 0, 1, 2));  // z x Y Z      // vtrn vext
 581         B2 = b3_pshufd_ps(vQ2, B3_SHUFFLE(1, 2, 0, 2));  // Y Z x z      // vext vtrn
 582
 583         B1 = B1 * B2;  //       A3 *= B3
 584
 585         A0 = b3_splat_ps(vQ1, 3);  //   A0
 586         A0 = A0 * vQ2;             //   A0 * B0
 587
 588         A1 = A1 + A2;  //       AB12
 589         A0 = A0 - B1;  //       AB03 = AB0 - AB3
 590
 591         A1 = _mm_xor_ps(A1, b3vPPPM);  //       change sign of the last element
 592         A0 = A0 + A1;                  //       AB03 + AB12
 593
 594         return b3Quaternion(A0);
 595
 596 #elif defined(B3_USE_NEON)
 597
 598         float32x4_t vQ1 = q1.get128();
 599         float32x4_t vQ2 = q2.get128();
 600         float32x4_t A0, A1, B1, A2, B2, A3, B3;
 601         float32x2_t vQ1zx, vQ2wx, vQ1yz, vQ2zx, vQ2yz, vQ2xz;
 602
 603         {
 604                 float32x2x2_t tmp;
 605                 tmp = vtrn_f32(vget_high_f32(vQ1), vget_low_f32(vQ1));  // {z x}, {w y}
 606                 vQ1zx = tmp.val[0];
 607
 608                 tmp = vtrn_f32(vget_high_f32(vQ2), vget_low_f32(vQ2));  // {z x}, {w y}
 609                 vQ2zx = tmp.val[0];
 610         }
 611         vQ2wx = vext_f32(vget_high_f32(vQ2), vget_low_f32(vQ2), 1);
 612
 613         vQ1yz = vext_f32(vget_low_f32(vQ1), vget_high_f32(vQ1), 1);
 614
 615         vQ2yz = vext_f32(vget_low_f32(vQ2), vget_high_f32(vQ2), 1);
 616         vQ2xz = vext_f32(vQ2zx, vQ2zx, 1);
 617
 618         A1 = vcombine_f32(vget_low_f32(vQ1), vQ1zx);                     // X Y  z x
 619         B1 = vcombine_f32(vdup_lane_f32(vget_high_f32(vQ2), 1), vQ2wx);  // W W  W X
 620
 621         A2 = vcombine_f32(vQ1yz, vget_low_f32(vQ1));
 622         B2 = vcombine_f32(vQ2zx, vdup_lane_f32(vget_low_f32(vQ2), 1));
 623
 624         A3 = vcombine_f32(vQ1zx, vQ1yz);  // Z X Y Z
 625         B3 = vcombine_f32(vQ2yz, vQ2xz);  // Y Z x z
 626
 627         A1 = vmulq_f32(A1, B1);
 628         A2 = vmulq_f32(A2, B2);
 629         A3 = vmulq_f32(A3, B3);                           //    A3 *= B3
 630         A0 = vmulq_lane_f32(vQ2, vget_high_f32(vQ1), 1);  //    A0 * B0
 631
 632         A1 = vaddq_f32(A1, A2);  //     AB12 = AB1 + AB2
 633         A0 = vsubq_f32(A0, A3);  //     AB03 = AB0 - AB3
 634
 635         //      change the sign of the last element
 636         A1 = (b3SimdFloat4)veorq_s32((int32x4_t)A1, (int32x4_t)b3vPPPM);
 637         A0 = vaddq_f32(A0, A1);  //     AB03 + AB12
 638
 639         return b3Quaternion(A0);
 640
 641 #else
 642         return b3Quaternion(
 643                 q1.getW() * q2.getX() + q1.getX() * q2.getW() + q1.getY() * q2.getZ() - q1.getZ() * q2.getY(),
 644                 q1.getW() * q2.getY() + q1.getY() * q2.getW() + q1.getZ() * q2.getX() - q1.getX() * q2.getZ(),
 645                 q1.getW() * q2.getZ() + q1.getZ() * q2.getW() + q1.getX() * q2.getY() - q1.getY() * q2.getX(),
 646                 q1.getW() * q2.getW() - q1.getX() * q2.getX() - q1.getY() * q2.getY() - q1.getZ() * q2.getZ());
 647 #endif
 648 }
 649
 650 B3_FORCE_INLINE b3Quaternion
 651 operator*(const b3Quaternion& q, const b3Vector3& w)
 652 {
 653 #if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
 654         __m128 vQ1 = q.get128();
 655         __m128 vQ2 = w.get128();
 656         __m128 A1, B1, A2, B2, A3, B3;
 657
 658         A1 = b3_pshufd_ps(vQ1, B3_SHUFFLE(3, 3, 3, 0));
 659         B1 = b3_pshufd_ps(vQ2, B3_SHUFFLE(0, 1, 2, 0));
 660
 661         A1 = A1 * B1;
 662
 663         A2 = b3_pshufd_ps(vQ1, B3_SHUFFLE(1, 2, 0, 1));
 664         B2 = b3_pshufd_ps(vQ2, B3_SHUFFLE(2, 0, 1, 1));
 665
 666         A2 = A2 * B2;
 667
 668         A3 = b3_pshufd_ps(vQ1, B3_SHUFFLE(2, 0, 1, 2));
 669         B3 = b3_pshufd_ps(vQ2, B3_SHUFFLE(1, 2, 0, 2));
 670
 671         A3 = A3 * B3;  //       A3 *= B3
 672
 673         A1 = A1 + A2;                  //       AB12
 674         A1 = _mm_xor_ps(A1, b3vPPPM);  //       change sign of the last element
 675         A1 = A1 - A3;                  //       AB123 = AB12 - AB3
 676
 677         return b3Quaternion(A1);
 678
 679 #elif defined(B3_USE_NEON)
 680
 681         float32x4_t vQ1 = q.get128();
 682         float32x4_t vQ2 = w.get128();
 683         float32x4_t A1, B1, A2, B2, A3, B3;
 684         float32x2_t vQ1wx, vQ2zx, vQ1yz, vQ2yz, vQ1zx, vQ2xz;
 685
 686         vQ1wx = vext_f32(vget_high_f32(vQ1), vget_low_f32(vQ1), 1);
 687         {
 688                 float32x2x2_t tmp;
 689
 690                 tmp = vtrn_f32(vget_high_f32(vQ2), vget_low_f32(vQ2));  // {z x}, {w y}
 691                 vQ2zx = tmp.val[0];
 692
 693                 tmp = vtrn_f32(vget_high_f32(vQ1), vget_low_f32(vQ1));  // {z x}, {w y}
 694                 vQ1zx = tmp.val[0];
 695         }
 696
 697         vQ1yz = vext_f32(vget_low_f32(vQ1), vget_high_f32(vQ1), 1);
 698
 699         vQ2yz = vext_f32(vget_low_f32(vQ2), vget_high_f32(vQ2), 1);
 700         vQ2xz = vext_f32(vQ2zx, vQ2zx, 1);
 701
 702         A1 = vcombine_f32(vdup_lane_f32(vget_high_f32(vQ1), 1), vQ1wx);  // W W  W X
 703         B1 = vcombine_f32(vget_low_f32(vQ2), vQ2zx);                     // X Y  z x
 704
 705         A2 = vcombine_f32(vQ1yz, vget_low_f32(vQ1));
 706         B2 = vcombine_f32(vQ2zx, vdup_lane_f32(vget_low_f32(vQ2), 1));
 707
 708         A3 = vcombine_f32(vQ1zx, vQ1yz);  // Z X Y Z
 709         B3 = vcombine_f32(vQ2yz, vQ2xz);  // Y Z x z
 710
 711         A1 = vmulq_f32(A1, B1);
 712         A2 = vmulq_f32(A2, B2);
 713         A3 = vmulq_f32(A3, B3);  //     A3 *= B3
 714
 715         A1 = vaddq_f32(A1, A2);  //     AB12 = AB1 + AB2
 716
 717         //      change the sign of the last element
 718         A1 = (b3SimdFloat4)veorq_s32((int32x4_t)A1, (int32x4_t)b3vPPPM);
 719
 720         A1 = vsubq_f32(A1, A3);  //     AB123 = AB12 - AB3
 721
 722         return b3Quaternion(A1);
 723
 724 #else
 725         return b3Quaternion(
 726                 q.getW() * w.getX() + q.getY() * w.getZ() - q.getZ() * w.getY(),
 727                 q.getW() * w.getY() + q.getZ() * w.getX() - q.getX() * w.getZ(),
 728                 q.getW() * w.getZ() + q.getX() * w.getY() - q.getY() * w.getX(),
 729                 -q.getX() * w.getX() - q.getY() * w.getY() - q.getZ() * w.getZ());
 730 #endif
 731 }
 732
 733 B3_FORCE_INLINE b3Quaternion
 734 operator*(const b3Vector3& w, const b3Quaternion& q)
 735 {
 736 #if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
 737         __m128 vQ1 = w.get128();
 738         __m128 vQ2 = q.get128();
 739         __m128 A1, B1, A2, B2, A3, B3;
 740
 741         A1 = b3_pshufd_ps(vQ1, B3_SHUFFLE(0, 1, 2, 0));  // X Y  z x
 742         B1 = b3_pshufd_ps(vQ2, B3_SHUFFLE(3, 3, 3, 0));  // W W  W X
 743
 744         A1 = A1 * B1;
 745
 746         A2 = b3_pshufd_ps(vQ1, B3_SHUFFLE(1, 2, 0, 1));
 747         B2 = b3_pshufd_ps(vQ2, B3_SHUFFLE(2, 0, 1, 1));
 748
 749         A2 = A2 * B2;
 750
 751         A3 = b3_pshufd_ps(vQ1, B3_SHUFFLE(2, 0, 1, 2));
 752         B3 = b3_pshufd_ps(vQ2, B3_SHUFFLE(1, 2, 0, 2));
 753
 754         A3 = A3 * B3;  //       A3 *= B3
 755
 756         A1 = A1 + A2;                  //       AB12
 757         A1 = _mm_xor_ps(A1, b3vPPPM);  //       change sign of the last element
 758         A1 = A1 - A3;                  //       AB123 = AB12 - AB3
 759
 760         return b3Quaternion(A1);
 761
 762 #elif defined(B3_USE_NEON)
 763
 764         float32x4_t vQ1 = w.get128();
 765         float32x4_t vQ2 = q.get128();
 766         float32x4_t A1, B1, A2, B2, A3, B3;
 767         float32x2_t vQ1zx, vQ2wx, vQ1yz, vQ2zx, vQ2yz, vQ2xz;
 768
 769         {
 770                 float32x2x2_t tmp;
 771
 772                 tmp = vtrn_f32(vget_high_f32(vQ1), vget_low_f32(vQ1));  // {z x}, {w y}
 773                 vQ1zx = tmp.val[0];
 774
 775                 tmp = vtrn_f32(vget_high_f32(vQ2), vget_low_f32(vQ2));  // {z x}, {w y}
 776                 vQ2zx = tmp.val[0];
 777         }
 778         vQ2wx = vext_f32(vget_high_f32(vQ2), vget_low_f32(vQ2), 1);
 779
 780         vQ1yz = vext_f32(vget_low_f32(vQ1), vget_high_f32(vQ1), 1);
 781
 782         vQ2yz = vext_f32(vget_low_f32(vQ2), vget_high_f32(vQ2), 1);
 783         vQ2xz = vext_f32(vQ2zx, vQ2zx, 1);
 784
 785         A1 = vcombine_f32(vget_low_f32(vQ1), vQ1zx);                     // X Y  z x
 786         B1 = vcombine_f32(vdup_lane_f32(vget_high_f32(vQ2), 1), vQ2wx);  // W W  W X
 787
 788         A2 = vcombine_f32(vQ1yz, vget_low_f32(vQ1));
 789         B2 = vcombine_f32(vQ2zx, vdup_lane_f32(vget_low_f32(vQ2), 1));
 790
 791         A3 = vcombine_f32(vQ1zx, vQ1yz);  // Z X Y Z
 792         B3 = vcombine_f32(vQ2yz, vQ2xz);  // Y Z x z
 793
 794         A1 = vmulq_f32(A1, B1);
 795         A2 = vmulq_f32(A2, B2);
 796         A3 = vmulq_f32(A3, B3);  //     A3 *= B3
 797
 798         A1 = vaddq_f32(A1, A2);  //     AB12 = AB1 + AB2
 799
 800         //      change the sign of the last element
 801         A1 = (b3SimdFloat4)veorq_s32((int32x4_t)A1, (int32x4_t)b3vPPPM);
 802
 803         A1 = vsubq_f32(A1, A3);  //     AB123 = AB12 - AB3
 804
 805         return b3Quaternion(A1);
 806
 807 #else
 808         return b3Quaternion(
 809                 +w.getX() * q.getW() + w.getY() * q.getZ() - w.getZ() * q.getY(),
 810                 +w.getY() * q.getW() + w.getZ() * q.getX() - w.getX() * q.getZ(),
 811                 +w.getZ() * q.getW() + w.getX() * q.getY() - w.getY() * q.getX(),
 812                 -w.getX() * q.getX() - w.getY() * q.getY() - w.getZ() * q.getZ());
 813 #endif
 814 }
 815
 816 /**@brief Calculate the dot product between two quaternions */
 817 B3_FORCE_INLINE b3Scalar
 818 b3Dot(const b3Quaternion& q1, const b3Quaternion& q2)
 819 {
 820         return q1.dot(q2);
 821 }
 822
 823 /**@brief Return the length of a quaternion */
 824 B3_FORCE_INLINE b3Scalar
 825 b3Length(const b3Quaternion& q)
 826 {
 827         return q.length();
 828 }
 829
 830 /**@brief Return the angle between two quaternions*/
 831 B3_FORCE_INLINE b3Scalar
 832 b3Angle(const b3Quaternion& q1, const b3Quaternion& q2)
 833 {
 834         return q1.angle(q2);
 835 }
 836
 837 /**@brief Return the inverse of a quaternion*/
 838 B3_FORCE_INLINE b3Quaternion
 839 b3Inverse(const b3Quaternion& q)
 840 {
 841         return q.inverse();
 842 }
 843
 844 /**@brief Return the result of spherical linear interpolation betwen two quaternions
 845  * @param q1 The first quaternion
 846  * @param q2 The second quaternion
 847  * @param t The ration between q1 and q2.  t = 0 return q1, t=1 returns q2
 848  * Slerp assumes constant velocity between positions. */
 849 B3_FORCE_INLINE b3Quaternion
 850 b3Slerp(const b3Quaternion& q1, const b3Quaternion& q2, const b3Scalar& t)
 851 {
 852         return q1.slerp(q2, t);
 853 }
 854
 855 B3_FORCE_INLINE b3Quaternion
 856 b3QuatMul(const b3Quaternion& rot0, const b3Quaternion& rot1)
 857 {
 858         return rot0 * rot1;
 859 }
 860
 861 B3_FORCE_INLINE b3Quaternion
 862 b3QuatNormalized(const b3Quaternion& orn)
 863 {
 864         return orn.normalized();
 865 }
 866
 867 B3_FORCE_INLINE b3Vector3
 868 b3QuatRotate(const b3Quaternion& rotation, const b3Vector3& v)
 869 {
 870         b3Quaternion q = rotation * v;
 871         q *= rotation.inverse();
 872 #if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
 873         return b3MakeVector3(_mm_and_ps(q.get128(), b3vFFF0fMask));
 874 #elif defined(B3_USE_NEON)
 875         return b3MakeVector3((float32x4_t)vandq_s32((int32x4_t)q.get128(), b3vFFF0Mask));
 876 #else
 877         return b3MakeVector3(q.getX(), q.getY(), q.getZ());
 878 #endif
 879 }
 880
 881 B3_FORCE_INLINE b3Quaternion
 882 b3ShortestArcQuat(const b3Vector3& v0, const b3Vector3& v1)  // Game Programming Gems 2.10. make sure v0,v1 are normalized
 883 {
 884         b3Vector3 c = v0.cross(v1);
 885         b3Scalar d = v0.dot(v1);
 886
 887         if (d < -1.0 + B3_EPSILON)
 888         {
 889                 b3Vector3 n, unused;
 890                 b3PlaneSpace1(v0, n, unused);
 891                 return b3Quaternion(n.getX(), n.getY(), n.getZ(), 0.0f);  // just pick any vector that is orthogonal to v0
 892         }
 893
 894         b3Scalar s = b3Sqrt((1.0f + d) * 2.0f);
 895         b3Scalar rs = 1.0f / s;
 896
 897         return b3Quaternion(c.getX() * rs, c.getY() * rs, c.getZ() * rs, s * 0.5f);
 898 }
 899
 900 B3_FORCE_INLINE b3Quaternion
 901 b3ShortestArcQuatNormalize2(b3Vector3& v0, b3Vector3& v1)
 902 {
 903         v0.normalize();
 904         v1.normalize();
 905         return b3ShortestArcQuat(v0, v1);
 906 }
 907
 908 #endif  //B3_SIMD__QUATERNION_H_