src/LinearMath/btQuaternion.h

   1 /*
   2 Copyright (c) 2003-2006 Gino van den Bergen / Erwin Coumans  http://continuousphysics.com/Bullet/
   3
   4 This software is provided 'as-is', without any express or implied warranty.
   5 In no event will the authors be held liable for any damages arising from the use of this software.
   6 Permission is granted to anyone to use this software for any purpose,
   7 including commercial applications, and to alter it and redistribute it freely,
   8 subject to the following restrictions:
   9
  10 1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
  11 2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
  12 3. This notice may not be removed or altered from any source distribution.
  13 */
  14
  15
  16
  17 #ifndef BT_SIMD__QUATERNION_H_
  18 #define BT_SIMD__QUATERNION_H_
  19
  20
  21 #include "btVector3.h"
  22 #include "btQuadWord.h"
  23
  24
  25
  26
  27
  28 #ifdef BT_USE_SSE
  29
  30 const __m128 ATTRIBUTE_ALIGNED16(vOnes) = {1.0f, 1.0f, 1.0f, 1.0f};
  31
  32 #endif
  33
  34 #if defined(BT_USE_SSE) || defined(BT_USE_NEON)
  35
  36 const btSimdFloat4 ATTRIBUTE_ALIGNED16(vQInv) = {-0.0f, -0.0f, -0.0f, +0.0f};
  37 const btSimdFloat4 ATTRIBUTE_ALIGNED16(vPPPM) = {+0.0f, +0.0f, +0.0f, -0.0f};
  38
  39 #endif
  40
  41 /**@brief The btQuaternion implements quaternion to perform linear algebra rotations in combination with btMatrix3x3, btVector3 and btTransform. */
  42 class btQuaternion : public btQuadWord {
  43 public:
  44   /**@brief No initialization constructor */
  45         btQuaternion() {}
  46
  47 #if (defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE))|| defined(BT_USE_NEON)
  48         // Set Vector
  49         SIMD_FORCE_INLINE btQuaternion(const btSimdFloat4 vec)
  50         {
  51                 mVec128 = vec;
  52         }
  53
  54         // Copy constructor
  55         SIMD_FORCE_INLINE btQuaternion(const btQuaternion& rhs)
  56         {
  57                 mVec128 = rhs.mVec128;
  58         }
  59
  60         // Assignment Operator
  61         SIMD_FORCE_INLINE btQuaternion&
  62         operator=(const btQuaternion& v)
  63         {
  64                 mVec128 = v.mVec128;
  65
  66                 return *this;
  67         }
  68
  69 #endif
  70
  71         //              template <typename btScalar>
  72         //              explicit Quaternion(const btScalar *v) : Tuple4<btScalar>(v) {}
  73   /**@brief Constructor from scalars */
  74         btQuaternion(const btScalar& _x, const btScalar& _y, const btScalar& _z, const btScalar& _w)
  75                 : btQuadWord(_x, _y, _z, _w)
  76         {}
  77   /**@brief Axis angle Constructor
  78    * @param axis The axis which the rotation is around
  79    * @param angle The magnitude of the rotation around the angle (Radians) */
  80         btQuaternion(const btVector3& _axis, const btScalar& _angle)
  81         {
  82                 setRotation(_axis, _angle);
  83         }
  84   /**@brief Constructor from Euler angles
  85    * @param yaw Angle around Y unless BT_EULER_DEFAULT_ZYX defined then Z
  86    * @param pitch Angle around X unless BT_EULER_DEFAULT_ZYX defined then Y
  87    * @param roll Angle around Z unless BT_EULER_DEFAULT_ZYX defined then X */
  88         btQuaternion(const btScalar& yaw, const btScalar& pitch, const btScalar& roll)
  89         {
  90 #ifndef BT_EULER_DEFAULT_ZYX
  91                 setEuler(yaw, pitch, roll);
  92 #else
  93                 setEulerZYX(yaw, pitch, roll);
  94 #endif
  95         }
  96   /**@brief Set the rotation using axis angle notation
  97    * @param axis The axis around which to rotate
  98    * @param angle The magnitude of the rotation in Radians */
  99         void setRotation(const btVector3& axis, const btScalar& _angle)
 100         {
 101                 btScalar d = axis.length();
 102                 btAssert(d != btScalar(0.0));
 103                 btScalar s = btSin(_angle * btScalar(0.5)) / d;
 104                 setValue(axis.x() * s, axis.y() * s, axis.z() * s,
 105                         btCos(_angle * btScalar(0.5)));
 106         }
 107   /**@brief Set the quaternion using Euler angles
 108    * @param yaw Angle around Y
 109    * @param pitch Angle around X
 110    * @param roll Angle around Z */
 111         void setEuler(const btScalar& yaw, const btScalar& pitch, const btScalar& roll)
 112         {
 113                 btScalar halfYaw = btScalar(yaw) * btScalar(0.5);
 114                 btScalar halfPitch = btScalar(pitch) * btScalar(0.5);
 115                 btScalar halfRoll = btScalar(roll) * btScalar(0.5);
 116                 btScalar cosYaw = btCos(halfYaw);
 117                 btScalar sinYaw = btSin(halfYaw);
 118                 btScalar cosPitch = btCos(halfPitch);
 119                 btScalar sinPitch = btSin(halfPitch);
 120                 btScalar cosRoll = btCos(halfRoll);
 121                 btScalar sinRoll = btSin(halfRoll);
 122                 setValue(cosRoll * sinPitch * cosYaw + sinRoll * cosPitch * sinYaw,
 123                         cosRoll * cosPitch * sinYaw - sinRoll * sinPitch * cosYaw,
 124                         sinRoll * cosPitch * cosYaw - cosRoll * sinPitch * sinYaw,
 125                         cosRoll * cosPitch * cosYaw + sinRoll * sinPitch * sinYaw);
 126         }
 127   /**@brief Set the quaternion using euler angles
 128    * @param yaw Angle around Z
 129    * @param pitch Angle around Y
 130    * @param roll Angle around X */
 131         void setEulerZYX(const btScalar& yaw, const btScalar& pitch, const btScalar& roll)
 132         {
 133                 btScalar halfYaw = btScalar(yaw) * btScalar(0.5);
 134                 btScalar halfPitch = btScalar(pitch) * btScalar(0.5);
 135                 btScalar halfRoll = btScalar(roll) * btScalar(0.5);
 136                 btScalar cosYaw = btCos(halfYaw);
 137                 btScalar sinYaw = btSin(halfYaw);
 138                 btScalar cosPitch = btCos(halfPitch);
 139                 btScalar sinPitch = btSin(halfPitch);
 140                 btScalar cosRoll = btCos(halfRoll);
 141                 btScalar sinRoll = btSin(halfRoll);
 142                 setValue(sinRoll * cosPitch * cosYaw - cosRoll * sinPitch * sinYaw, //x
 143                          cosRoll * sinPitch * cosYaw + sinRoll * cosPitch * sinYaw, //y
 144                          cosRoll * cosPitch * sinYaw - sinRoll * sinPitch * cosYaw, //z
 145                          cosRoll * cosPitch * cosYaw + sinRoll * sinPitch * sinYaw); //formerly yzx
 146         }
 147   /**@brief Add two quaternions
 148    * @param q The quaternion to add to this one */
 149         SIMD_FORCE_INLINE       btQuaternion& operator+=(const btQuaternion& q)
 150         {
 151 #if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
 152                 mVec128 = _mm_add_ps(mVec128, q.mVec128);
 153 #elif defined(BT_USE_NEON)
 154                 mVec128 = vaddq_f32(mVec128, q.mVec128);
 155 #else
 156                 m_floats[0] += q.x();
 157         m_floats[1] += q.y();
 158         m_floats[2] += q.z();
 159         m_floats[3] += q.m_floats[3];
 160 #endif
 161                 return *this;
 162         }
 163
 164   /**@brief Subtract out a quaternion
 165    * @param q The quaternion to subtract from this one */
 166         btQuaternion& operator-=(const btQuaternion& q)
 167         {
 168 #if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
 169                 mVec128 = _mm_sub_ps(mVec128, q.mVec128);
 170 #elif defined(BT_USE_NEON)
 171                 mVec128 = vsubq_f32(mVec128, q.mVec128);
 172 #else
 173                 m_floats[0] -= q.x();
 174         m_floats[1] -= q.y();
 175         m_floats[2] -= q.z();
 176         m_floats[3] -= q.m_floats[3];
 177 #endif
 178         return *this;
 179         }
 180
 181   /**@brief Scale this quaternion
 182    * @param s The scalar to scale by */
 183         btQuaternion& operator*=(const btScalar& s)
 184         {
 185 #if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
 186                 __m128  vs = _mm_load_ss(&s);   //      (S 0 0 0)
 187                 vs = bt_pshufd_ps(vs, 0);       //      (S S S S)
 188                 mVec128 = _mm_mul_ps(mVec128, vs);
 189 #elif defined(BT_USE_NEON)
 190                 mVec128 = vmulq_n_f32(mVec128, s);
 191 #else
 192                 m_floats[0] *= s;
 193         m_floats[1] *= s;
 194         m_floats[2] *= s;
 195         m_floats[3] *= s;
 196 #endif
 197                 return *this;
 198         }
 199
 200   /**@brief Multiply this quaternion by q on the right
 201    * @param q The other quaternion
 202    * Equivilant to this = this * q */
 203         btQuaternion& operator*=(const btQuaternion& q)
 204         {
 205 #if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
 206                 __m128 vQ2 = q.get128();
 207
 208                 __m128 A1 = bt_pshufd_ps(mVec128, BT_SHUFFLE(0,1,2,0));
 209                 __m128 B1 = bt_pshufd_ps(vQ2, BT_SHUFFLE(3,3,3,0));
 210
 211                 A1 = A1 * B1;
 212
 213                 __m128 A2 = bt_pshufd_ps(mVec128, BT_SHUFFLE(1,2,0,1));
 214                 __m128 B2 = bt_pshufd_ps(vQ2, BT_SHUFFLE(2,0,1,1));
 215
 216                 A2 = A2 * B2;
 217
 218                 B1 = bt_pshufd_ps(mVec128, BT_SHUFFLE(2,0,1,2));
 219                 B2 = bt_pshufd_ps(vQ2, BT_SHUFFLE(1,2,0,2));
 220
 221                 B1 = B1 * B2;   //      A3 *= B3
 222
 223                 mVec128 = bt_splat_ps(mVec128, 3);      //      A0
 224                 mVec128 = mVec128 * vQ2;        //      A0 * B0
 225
 226                 A1 = A1 + A2;   //      AB12
 227                 mVec128 = mVec128 - B1; //      AB03 = AB0 - AB3
 228                 A1 = _mm_xor_ps(A1, vPPPM);     //      change sign of the last element
 229                 mVec128 = mVec128+ A1;  //      AB03 + AB12
 230
 231 #elif defined(BT_USE_NEON)
 232
 233         float32x4_t vQ1 = mVec128;
 234         float32x4_t vQ2 = q.get128();
 235         float32x4_t A0, A1, B1, A2, B2, A3, B3;
 236         float32x2_t vQ1zx, vQ2wx, vQ1yz, vQ2zx, vQ2yz, vQ2xz;
 237
 238         {
 239         float32x2x2_t tmp;
 240         tmp = vtrn_f32( vget_high_f32(vQ1), vget_low_f32(vQ1) );       // {z x}, {w y}
 241         vQ1zx = tmp.val[0];
 242
 243         tmp = vtrn_f32( vget_high_f32(vQ2), vget_low_f32(vQ2) );       // {z x}, {w y}
 244         vQ2zx = tmp.val[0];
 245         }
 246         vQ2wx = vext_f32(vget_high_f32(vQ2), vget_low_f32(vQ2), 1);
 247
 248         vQ1yz = vext_f32(vget_low_f32(vQ1), vget_high_f32(vQ1), 1);
 249
 250         vQ2yz = vext_f32(vget_low_f32(vQ2), vget_high_f32(vQ2), 1);
 251         vQ2xz = vext_f32(vQ2zx, vQ2zx, 1);
 252
 253         A1 = vcombine_f32(vget_low_f32(vQ1), vQ1zx);                    // X Y  z x
 254         B1 = vcombine_f32(vdup_lane_f32(vget_high_f32(vQ2), 1), vQ2wx); // W W  W X
 255
 256         A2 = vcombine_f32(vQ1yz, vget_low_f32(vQ1));
 257         B2 = vcombine_f32(vQ2zx, vdup_lane_f32(vget_low_f32(vQ2), 1));
 258
 259         A3 = vcombine_f32(vQ1zx, vQ1yz);        // Z X Y Z
 260         B3 = vcombine_f32(vQ2yz, vQ2xz);        // Y Z x z
 261
 262         A1 = vmulq_f32(A1, B1);
 263         A2 = vmulq_f32(A2, B2);
 264         A3 = vmulq_f32(A3, B3); //      A3 *= B3
 265         A0 = vmulq_lane_f32(vQ2, vget_high_f32(vQ1), 1); //     A0 * B0
 266
 267         A1 = vaddq_f32(A1, A2); //      AB12 = AB1 + AB2
 268         A0 = vsubq_f32(A0, A3); //      AB03 = AB0 - AB3
 269
 270         //      change the sign of the last element
 271         A1 = (btSimdFloat4)veorq_s32((int32x4_t)A1, (int32x4_t)vPPPM);
 272         A0 = vaddq_f32(A0, A1); //      AB03 + AB12
 273
 274         mVec128 = A0;
 275 #else
 276                 setValue(
 277             m_floats[3] * q.x() + m_floats[0] * q.m_floats[3] + m_floats[1] * q.z() - m_floats[2] * q.y(),
 278                         m_floats[3] * q.y() + m_floats[1] * q.m_floats[3] + m_floats[2] * q.x() - m_floats[0] * q.z(),
 279                         m_floats[3] * q.z() + m_floats[2] * q.m_floats[3] + m_floats[0] * q.y() - m_floats[1] * q.x(),
 280                         m_floats[3] * q.m_floats[3] - m_floats[0] * q.x() - m_floats[1] * q.y() - m_floats[2] * q.z());
 281 #endif
 282                 return *this;
 283         }
 284   /**@brief Return the dot product between this quaternion and another
 285    * @param q The other quaternion */
 286         btScalar dot(const btQuaternion& q) const
 287         {
 288 #if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
 289                 __m128  vd;
 290
 291                 vd = _mm_mul_ps(mVec128, q.mVec128);
 292
 293         __m128 t = _mm_movehl_ps(vd, vd);
 294                 vd = _mm_add_ps(vd, t);
 295                 t = _mm_shuffle_ps(vd, vd, 0x55);
 296                 vd = _mm_add_ss(vd, t);
 297
 298         return _mm_cvtss_f32(vd);
 299 #elif defined(BT_USE_NEON)
 300                 float32x4_t vd = vmulq_f32(mVec128, q.mVec128);
 301                 float32x2_t x = vpadd_f32(vget_low_f32(vd), vget_high_f32(vd));
 302                 x = vpadd_f32(x, x);
 303                 return vget_lane_f32(x, 0);
 304 #else
 305                 return  m_floats[0] * q.x() +
 306                 m_floats[1] * q.y() +
 307                 m_floats[2] * q.z() +
 308                 m_floats[3] * q.m_floats[3];
 309 #endif
 310         }
 311
 312   /**@brief Return the length squared of the quaternion */
 313         btScalar length2() const
 314         {
 315                 return dot(*this);
 316         }
 317
 318   /**@brief Return the length of the quaternion */
 319         btScalar length() const
 320         {
 321                 return btSqrt(length2());
 322         }
 323
 324   /**@brief Normalize the quaternion
 325    * Such that x^2 + y^2 + z^2 +w^2 = 1 */
 326         btQuaternion& normalize()
 327         {
 328 #if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
 329                 __m128  vd;
 330
 331                 vd = _mm_mul_ps(mVec128, mVec128);
 332
 333         __m128 t = _mm_movehl_ps(vd, vd);
 334                 vd = _mm_add_ps(vd, t);
 335                 t = _mm_shuffle_ps(vd, vd, 0x55);
 336                 vd = _mm_add_ss(vd, t);
 337
 338                 vd = _mm_sqrt_ss(vd);
 339                 vd = _mm_div_ss(vOnes, vd);
 340         vd = bt_pshufd_ps(vd, 0); // splat
 341                 mVec128 = _mm_mul_ps(mVec128, vd);
 342
 343                 return *this;
 344 #else
 345                 return *this /= length();
 346 #endif
 347         }
 348
 349   /**@brief Return a scaled version of this quaternion
 350    * @param s The scale factor */
 351         SIMD_FORCE_INLINE btQuaternion
 352         operator*(const btScalar& s) const
 353         {
 354 #if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
 355                 __m128  vs = _mm_load_ss(&s);   //      (S 0 0 0)
 356                 vs = bt_pshufd_ps(vs, 0x00);    //      (S S S S)
 357
 358                 return btQuaternion(_mm_mul_ps(mVec128, vs));
 359 #elif defined(BT_USE_NEON)
 360                 return btQuaternion(vmulq_n_f32(mVec128, s));
 361 #else
 362                 return btQuaternion(x() * s, y() * s, z() * s, m_floats[3] * s);
 363 #endif
 364         }
 365
 366   /**@brief Return an inversely scaled versionof this quaternion
 367    * @param s The inverse scale factor */
 368         btQuaternion operator/(const btScalar& s) const
 369         {
 370                 btAssert(s != btScalar(0.0));
 371                 return *this * (btScalar(1.0) / s);
 372         }
 373
 374   /**@brief Inversely scale this quaternion
 375    * @param s The scale factor */
 376         btQuaternion& operator/=(const btScalar& s)
 377         {
 378                 btAssert(s != btScalar(0.0));
 379                 return *this *= btScalar(1.0) / s;
 380         }
 381
 382   /**@brief Return a normalized version of this quaternion */
 383         btQuaternion normalized() const
 384         {
 385                 return *this / length();
 386         }
 387   /**@brief Return the angle between this quaternion and the other
 388    * @param q The other quaternion */
 389         btScalar angle(const btQuaternion& q) const
 390         {
 391                 btScalar s = btSqrt(length2() * q.length2());
 392                 btAssert(s != btScalar(0.0));
 393                 return btAcos(dot(q) / s);
 394         }
 395   /**@brief Return the angle of rotation represented by this quaternion */
 396         btScalar getAngle() const
 397         {
 398                 btScalar s = btScalar(2.) * btAcos(m_floats[3]);
 399                 return s;
 400         }
 401
 402         /**@brief Return the axis of the rotation represented by this quaternion */
 403         btVector3 getAxis() const
 404         {
 405                 btScalar s_squared = 1.f-m_floats[3]*m_floats[3];
 406
 407                 if (s_squared < btScalar(10.) * SIMD_EPSILON) //Check for divide by zero
 408                         return btVector3(1.0, 0.0, 0.0);  // Arbitrary
 409                 btScalar s = 1.f/btSqrt(s_squared);
 410                 return btVector3(m_floats[0] * s, m_floats[1] * s, m_floats[2] * s);
 411         }
 412
 413         /**@brief Return the inverse of this quaternion */
 414         btQuaternion inverse() const
 415         {
 416 #if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
 417                 return btQuaternion(_mm_xor_ps(mVec128, vQInv));
 418 #elif defined(BT_USE_NEON)
 419         return btQuaternion((btSimdFloat4)veorq_s32((int32x4_t)mVec128, (int32x4_t)vQInv));
 420 #else
 421                 return btQuaternion(-m_floats[0], -m_floats[1], -m_floats[2], m_floats[3]);
 422 #endif
 423         }
 424
 425   /**@brief Return the sum of this quaternion and the other
 426    * @param q2 The other quaternion */
 427         SIMD_FORCE_INLINE btQuaternion
 428         operator+(const btQuaternion& q2) const
 429         {
 430 #if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
 431                 return btQuaternion(_mm_add_ps(mVec128, q2.mVec128));
 432 #elif defined(BT_USE_NEON)
 433         return btQuaternion(vaddq_f32(mVec128, q2.mVec128));
 434 #else
 435                 const btQuaternion& q1 = *this;
 436                 return btQuaternion(q1.x() + q2.x(), q1.y() + q2.y(), q1.z() + q2.z(), q1.m_floats[3] + q2.m_floats[3]);
 437 #endif
 438         }
 439
 440   /**@brief Return the difference between this quaternion and the other
 441    * @param q2 The other quaternion */
 442         SIMD_FORCE_INLINE btQuaternion
 443         operator-(const btQuaternion& q2) const
 444         {
 445 #if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
 446                 return btQuaternion(_mm_sub_ps(mVec128, q2.mVec128));
 447 #elif defined(BT_USE_NEON)
 448         return btQuaternion(vsubq_f32(mVec128, q2.mVec128));
 449 #else
 450                 const btQuaternion& q1 = *this;
 451                 return btQuaternion(q1.x() - q2.x(), q1.y() - q2.y(), q1.z() - q2.z(), q1.m_floats[3] - q2.m_floats[3]);
 452 #endif
 453         }
 454
 455   /**@brief Return the negative of this quaternion
 456    * This simply negates each element */
 457         SIMD_FORCE_INLINE btQuaternion operator-() const
 458         {
 459 #if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
 460                 return btQuaternion(_mm_xor_ps(mVec128, btvMzeroMask));
 461 #elif defined(BT_USE_NEON)
 462                 return btQuaternion((btSimdFloat4)veorq_s32((int32x4_t)mVec128, (int32x4_t)btvMzeroMask) );
 463 #else
 464                 const btQuaternion& q2 = *this;
 465                 return btQuaternion( - q2.x(), - q2.y(),  - q2.z(),  - q2.m_floats[3]);
 466 #endif
 467         }
 468   /**@todo document this and it's use */
 469         SIMD_FORCE_INLINE btQuaternion farthest( const btQuaternion& qd) const
 470         {
 471                 btQuaternion diff,sum;
 472                 diff = *this - qd;
 473                 sum = *this + qd;
 474                 if( diff.dot(diff) > sum.dot(sum) )
 475                         return qd;
 476                 return (-qd);
 477         }
 478
 479         /**@todo document this and it's use */
 480         SIMD_FORCE_INLINE btQuaternion nearest( const btQuaternion& qd) const
 481         {
 482                 btQuaternion diff,sum;
 483                 diff = *this - qd;
 484                 sum = *this + qd;
 485                 if( diff.dot(diff) < sum.dot(sum) )
 486                         return qd;
 487                 return (-qd);
 488         }
 489
 490
 491   /**@brief Return the quaternion which is the result of Spherical Linear Interpolation between this and the other quaternion
 492    * @param q The other quaternion to interpolate with
 493    * @param t The ratio between this and q to interpolate.  If t = 0 the result is this, if t=1 the result is q.
 494    * Slerp interpolates assuming constant velocity.  */
 495         btQuaternion slerp(const btQuaternion& q, const btScalar& t) const
 496         {
 497           btScalar magnitude = btSqrt(length2() * q.length2());
 498           btAssert(magnitude > btScalar(0));
 499
 500     btScalar product = dot(q) / magnitude;
 501     if (btFabs(product) != btScalar(1))
 502                 {
 503       // Take care of long angle case see http://en.wikipedia.org/wiki/Slerp
 504       const btScalar sign = (product < 0) ? btScalar(-1) : btScalar(1);
 505
 506       const btScalar theta = btAcos(sign * product);
 507       const btScalar s1 = btSin(sign * t * theta);
 508       const btScalar d = btScalar(1.0) / btSin(theta);
 509       const btScalar s0 = btSin((btScalar(1.0) - t) * theta);
 510
 511       return btQuaternion(
 512           (m_floats[0] * s0 + q.x() * s1) * d,
 513           (m_floats[1] * s0 + q.y() * s1) * d,
 514           (m_floats[2] * s0 + q.z() * s1) * d,
 515           (m_floats[3] * s0 + q.m_floats[3] * s1) * d);
 516                 }
 517                 else
 518                 {
 519                         return *this;
 520                 }
 521         }
 522
 523         static const btQuaternion&      getIdentity()
 524         {
 525                 static const btQuaternion identityQuat(btScalar(0.),btScalar(0.),btScalar(0.),btScalar(1.));
 526                 return identityQuat;
 527         }
 528
 529         SIMD_FORCE_INLINE const btScalar& getW() const { return m_floats[3]; }
 530
 531
 532 };
 533
 534
 535
 536
 537
 538 /**@brief Return the product of two quaternions */
 539 SIMD_FORCE_INLINE btQuaternion
 540 operator*(const btQuaternion& q1, const btQuaternion& q2)
 541 {
 542 #if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
 543         __m128 vQ1 = q1.get128();
 544         __m128 vQ2 = q2.get128();
 545         __m128 A0, A1, B1, A2, B2;
 546
 547         A1 = bt_pshufd_ps(vQ1, BT_SHUFFLE(0,1,2,0)); // X Y  z x     //      vtrn
 548         B1 = bt_pshufd_ps(vQ2, BT_SHUFFLE(3,3,3,0)); // W W  W X     // vdup vext
 549
 550         A1 = A1 * B1;
 551
 552         A2 = bt_pshufd_ps(vQ1, BT_SHUFFLE(1,2,0,1)); // Y Z  X Y     // vext
 553         B2 = bt_pshufd_ps(vQ2, BT_SHUFFLE(2,0,1,1)); // z x  Y Y     // vtrn vdup
 554
 555         A2 = A2 * B2;
 556
 557         B1 = bt_pshufd_ps(vQ1, BT_SHUFFLE(2,0,1,2)); // z x Y Z      // vtrn vext
 558         B2 = bt_pshufd_ps(vQ2, BT_SHUFFLE(1,2,0,2)); // Y Z x z      // vext vtrn
 559
 560         B1 = B1 * B2;   //      A3 *= B3
 561
 562         A0 = bt_splat_ps(vQ1, 3);       //      A0
 563         A0 = A0 * vQ2;  //      A0 * B0
 564
 565         A1 = A1 + A2;   //      AB12
 566         A0 =  A0 - B1;  //      AB03 = AB0 - AB3
 567
 568     A1 = _mm_xor_ps(A1, vPPPM); //      change sign of the last element
 569         A0 = A0 + A1;   //      AB03 + AB12
 570
 571         return btQuaternion(A0);
 572
 573 #elif defined(BT_USE_NEON)
 574
 575         float32x4_t vQ1 = q1.get128();
 576         float32x4_t vQ2 = q2.get128();
 577         float32x4_t A0, A1, B1, A2, B2, A3, B3;
 578     float32x2_t vQ1zx, vQ2wx, vQ1yz, vQ2zx, vQ2yz, vQ2xz;
 579
 580     {
 581     float32x2x2_t tmp;
 582     tmp = vtrn_f32( vget_high_f32(vQ1), vget_low_f32(vQ1) );       // {z x}, {w y}
 583     vQ1zx = tmp.val[0];
 584
 585     tmp = vtrn_f32( vget_high_f32(vQ2), vget_low_f32(vQ2) );       // {z x}, {w y}
 586     vQ2zx = tmp.val[0];
 587     }
 588     vQ2wx = vext_f32(vget_high_f32(vQ2), vget_low_f32(vQ2), 1);
 589
 590     vQ1yz = vext_f32(vget_low_f32(vQ1), vget_high_f32(vQ1), 1);
 591
 592     vQ2yz = vext_f32(vget_low_f32(vQ2), vget_high_f32(vQ2), 1);
 593     vQ2xz = vext_f32(vQ2zx, vQ2zx, 1);
 594
 595     A1 = vcombine_f32(vget_low_f32(vQ1), vQ1zx);                    // X Y  z x
 596     B1 = vcombine_f32(vdup_lane_f32(vget_high_f32(vQ2), 1), vQ2wx); // W W  W X
 597
 598         A2 = vcombine_f32(vQ1yz, vget_low_f32(vQ1));
 599     B2 = vcombine_f32(vQ2zx, vdup_lane_f32(vget_low_f32(vQ2), 1));
 600
 601     A3 = vcombine_f32(vQ1zx, vQ1yz);        // Z X Y Z
 602     B3 = vcombine_f32(vQ2yz, vQ2xz);        // Y Z x z
 603
 604         A1 = vmulq_f32(A1, B1);
 605         A2 = vmulq_f32(A2, B2);
 606         A3 = vmulq_f32(A3, B3); //      A3 *= B3
 607         A0 = vmulq_lane_f32(vQ2, vget_high_f32(vQ1), 1); //     A0 * B0
 608
 609         A1 = vaddq_f32(A1, A2); //      AB12 = AB1 + AB2
 610         A0 = vsubq_f32(A0, A3); //      AB03 = AB0 - AB3
 611
 612     //  change the sign of the last element
 613     A1 = (btSimdFloat4)veorq_s32((int32x4_t)A1, (int32x4_t)vPPPM);
 614         A0 = vaddq_f32(A0, A1); //      AB03 + AB12
 615
 616         return btQuaternion(A0);
 617
 618 #else
 619         return btQuaternion(
 620         q1.w() * q2.x() + q1.x() * q2.w() + q1.y() * q2.z() - q1.z() * q2.y(),
 621                 q1.w() * q2.y() + q1.y() * q2.w() + q1.z() * q2.x() - q1.x() * q2.z(),
 622                 q1.w() * q2.z() + q1.z() * q2.w() + q1.x() * q2.y() - q1.y() * q2.x(),
 623                 q1.w() * q2.w() - q1.x() * q2.x() - q1.y() * q2.y() - q1.z() * q2.z());
 624 #endif
 625 }
 626
 627 SIMD_FORCE_INLINE btQuaternion
 628 operator*(const btQuaternion& q, const btVector3& w)
 629 {
 630 #if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
 631         __m128 vQ1 = q.get128();
 632         __m128 vQ2 = w.get128();
 633         __m128 A1, B1, A2, B2, A3, B3;
 634
 635         A1 = bt_pshufd_ps(vQ1, BT_SHUFFLE(3,3,3,0));
 636         B1 = bt_pshufd_ps(vQ2, BT_SHUFFLE(0,1,2,0));
 637
 638         A1 = A1 * B1;
 639
 640         A2 = bt_pshufd_ps(vQ1, BT_SHUFFLE(1,2,0,1));
 641         B2 = bt_pshufd_ps(vQ2, BT_SHUFFLE(2,0,1,1));
 642
 643         A2 = A2 * B2;
 644
 645         A3 = bt_pshufd_ps(vQ1, BT_SHUFFLE(2,0,1,2));
 646         B3 = bt_pshufd_ps(vQ2, BT_SHUFFLE(1,2,0,2));
 647
 648         A3 = A3 * B3;   //      A3 *= B3
 649
 650         A1 = A1 + A2;   //      AB12
 651         A1 = _mm_xor_ps(A1, vPPPM);     //      change sign of the last element
 652     A1 = A1 - A3;       //      AB123 = AB12 - AB3
 653
 654         return btQuaternion(A1);
 655
 656 #elif defined(BT_USE_NEON)
 657
 658         float32x4_t vQ1 = q.get128();
 659         float32x4_t vQ2 = w.get128();
 660         float32x4_t A1, B1, A2, B2, A3, B3;
 661     float32x2_t vQ1wx, vQ2zx, vQ1yz, vQ2yz, vQ1zx, vQ2xz;
 662
 663     vQ1wx = vext_f32(vget_high_f32(vQ1), vget_low_f32(vQ1), 1);
 664     {
 665     float32x2x2_t tmp;
 666
 667     tmp = vtrn_f32( vget_high_f32(vQ2), vget_low_f32(vQ2) );       // {z x}, {w y}
 668     vQ2zx = tmp.val[0];
 669
 670     tmp = vtrn_f32( vget_high_f32(vQ1), vget_low_f32(vQ1) );       // {z x}, {w y}
 671     vQ1zx = tmp.val[0];
 672     }
 673
 674     vQ1yz = vext_f32(vget_low_f32(vQ1), vget_high_f32(vQ1), 1);
 675
 676     vQ2yz = vext_f32(vget_low_f32(vQ2), vget_high_f32(vQ2), 1);
 677     vQ2xz = vext_f32(vQ2zx, vQ2zx, 1);
 678
 679     A1 = vcombine_f32(vdup_lane_f32(vget_high_f32(vQ1), 1), vQ1wx); // W W  W X
 680     B1 = vcombine_f32(vget_low_f32(vQ2), vQ2zx);                    // X Y  z x
 681
 682         A2 = vcombine_f32(vQ1yz, vget_low_f32(vQ1));
 683     B2 = vcombine_f32(vQ2zx, vdup_lane_f32(vget_low_f32(vQ2), 1));
 684
 685     A3 = vcombine_f32(vQ1zx, vQ1yz);        // Z X Y Z
 686     B3 = vcombine_f32(vQ2yz, vQ2xz);        // Y Z x z
 687
 688         A1 = vmulq_f32(A1, B1);
 689         A2 = vmulq_f32(A2, B2);
 690         A3 = vmulq_f32(A3, B3); //      A3 *= B3
 691
 692         A1 = vaddq_f32(A1, A2); //      AB12 = AB1 + AB2
 693
 694     //  change the sign of the last element
 695     A1 = (btSimdFloat4)veorq_s32((int32x4_t)A1, (int32x4_t)vPPPM);
 696
 697     A1 = vsubq_f32(A1, A3);     //      AB123 = AB12 - AB3
 698
 699         return btQuaternion(A1);
 700
 701 #else
 702         return btQuaternion(
 703          q.w() * w.x() + q.y() * w.z() - q.z() * w.y(),
 704                  q.w() * w.y() + q.z() * w.x() - q.x() * w.z(),
 705                  q.w() * w.z() + q.x() * w.y() - q.y() * w.x(),
 706                 -q.x() * w.x() - q.y() * w.y() - q.z() * w.z());
 707 #endif
 708 }
 709
 710 SIMD_FORCE_INLINE btQuaternion
 711 operator*(const btVector3& w, const btQuaternion& q)
 712 {
 713 #if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
 714         __m128 vQ1 = w.get128();
 715         __m128 vQ2 = q.get128();
 716         __m128 A1, B1, A2, B2, A3, B3;
 717
 718         A1 = bt_pshufd_ps(vQ1, BT_SHUFFLE(0,1,2,0));  // X Y  z x
 719         B1 = bt_pshufd_ps(vQ2, BT_SHUFFLE(3,3,3,0));  // W W  W X
 720
 721         A1 = A1 * B1;
 722
 723         A2 = bt_pshufd_ps(vQ1, BT_SHUFFLE(1,2,0,1));
 724         B2 = bt_pshufd_ps(vQ2, BT_SHUFFLE(2,0,1,1));
 725
 726         A2 = A2 *B2;
 727
 728         A3 = bt_pshufd_ps(vQ1, BT_SHUFFLE(2,0,1,2));
 729         B3 = bt_pshufd_ps(vQ2, BT_SHUFFLE(1,2,0,2));
 730
 731         A3 = A3 * B3;   //      A3 *= B3
 732
 733         A1 = A1 + A2;   //      AB12
 734         A1 = _mm_xor_ps(A1, vPPPM);     //      change sign of the last element
 735         A1 = A1 - A3;   //      AB123 = AB12 - AB3
 736
 737         return btQuaternion(A1);
 738
 739 #elif defined(BT_USE_NEON)
 740
 741         float32x4_t vQ1 = w.get128();
 742         float32x4_t vQ2 = q.get128();
 743         float32x4_t  A1, B1, A2, B2, A3, B3;
 744     float32x2_t vQ1zx, vQ2wx, vQ1yz, vQ2zx, vQ2yz, vQ2xz;
 745
 746     {
 747     float32x2x2_t tmp;
 748
 749     tmp = vtrn_f32( vget_high_f32(vQ1), vget_low_f32(vQ1) );       // {z x}, {w y}
 750     vQ1zx = tmp.val[0];
 751
 752     tmp = vtrn_f32( vget_high_f32(vQ2), vget_low_f32(vQ2) );       // {z x}, {w y}
 753     vQ2zx = tmp.val[0];
 754     }
 755     vQ2wx = vext_f32(vget_high_f32(vQ2), vget_low_f32(vQ2), 1);
 756
 757     vQ1yz = vext_f32(vget_low_f32(vQ1), vget_high_f32(vQ1), 1);
 758
 759     vQ2yz = vext_f32(vget_low_f32(vQ2), vget_high_f32(vQ2), 1);
 760     vQ2xz = vext_f32(vQ2zx, vQ2zx, 1);
 761
 762     A1 = vcombine_f32(vget_low_f32(vQ1), vQ1zx);                    // X Y  z x
 763     B1 = vcombine_f32(vdup_lane_f32(vget_high_f32(vQ2), 1), vQ2wx); // W W  W X
 764
 765         A2 = vcombine_f32(vQ1yz, vget_low_f32(vQ1));
 766     B2 = vcombine_f32(vQ2zx, vdup_lane_f32(vget_low_f32(vQ2), 1));
 767
 768     A3 = vcombine_f32(vQ1zx, vQ1yz);        // Z X Y Z
 769     B3 = vcombine_f32(vQ2yz, vQ2xz);        // Y Z x z
 770
 771         A1 = vmulq_f32(A1, B1);
 772         A2 = vmulq_f32(A2, B2);
 773         A3 = vmulq_f32(A3, B3); //      A3 *= B3
 774
 775         A1 = vaddq_f32(A1, A2); //      AB12 = AB1 + AB2
 776
 777     //  change the sign of the last element
 778     A1 = (btSimdFloat4)veorq_s32((int32x4_t)A1, (int32x4_t)vPPPM);
 779
 780     A1 = vsubq_f32(A1, A3);     //      AB123 = AB12 - AB3
 781
 782         return btQuaternion(A1);
 783
 784 #else
 785         return btQuaternion(
 786         +w.x() * q.w() + w.y() * q.z() - w.z() * q.y(),
 787                 +w.y() * q.w() + w.z() * q.x() - w.x() * q.z(),
 788                 +w.z() * q.w() + w.x() * q.y() - w.y() * q.x(),
 789                 -w.x() * q.x() - w.y() * q.y() - w.z() * q.z());
 790 #endif
 791 }
 792
 793 /**@brief Calculate the dot product between two quaternions */
 794 SIMD_FORCE_INLINE btScalar
 795 dot(const btQuaternion& q1, const btQuaternion& q2)
 796 {
 797         return q1.dot(q2);
 798 }
 799
 800
 801 /**@brief Return the length of a quaternion */
 802 SIMD_FORCE_INLINE btScalar
 803 length(const btQuaternion& q)
 804 {
 805         return q.length();
 806 }
 807
 808 /**@brief Return the angle between two quaternions*/
 809 SIMD_FORCE_INLINE btScalar
 810 btAngle(const btQuaternion& q1, const btQuaternion& q2)
 811 {
 812         return q1.angle(q2);
 813 }
 814
 815 /**@brief Return the inverse of a quaternion*/
 816 SIMD_FORCE_INLINE btQuaternion
 817 inverse(const btQuaternion& q)
 818 {
 819         return q.inverse();
 820 }
 821
 822 /**@brief Return the result of spherical linear interpolation betwen two quaternions
 823  * @param q1 The first quaternion
 824  * @param q2 The second quaternion
 825  * @param t The ration between q1 and q2.  t = 0 return q1, t=1 returns q2
 826  * Slerp assumes constant velocity between positions. */
 827 SIMD_FORCE_INLINE btQuaternion
 828 slerp(const btQuaternion& q1, const btQuaternion& q2, const btScalar& t)
 829 {
 830         return q1.slerp(q2, t);
 831 }
 832
 833 SIMD_FORCE_INLINE btVector3
 834 quatRotate(const btQuaternion& rotation, const btVector3& v)
 835 {
 836         btQuaternion q = rotation * v;
 837         q *= rotation.inverse();
 838 #if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
 839         return btVector3(_mm_and_ps(q.get128(), btvFFF0fMask));
 840 #elif defined(BT_USE_NEON)
 841     return btVector3((float32x4_t)vandq_s32((int32x4_t)q.get128(), btvFFF0Mask));
 842 #else
 843         return btVector3(q.getX(),q.getY(),q.getZ());
 844 #endif
 845 }
 846
 847 SIMD_FORCE_INLINE btQuaternion
 848 shortestArcQuat(const btVector3& v0, const btVector3& v1) // Game Programming Gems 2.10. make sure v0,v1 are normalized
 849 {
 850         btVector3 c = v0.cross(v1);
 851         btScalar  d = v0.dot(v1);
 852
 853         if (d < -1.0 + SIMD_EPSILON)
 854         {
 855                 btVector3 n,unused;
 856                 btPlaneSpace1(v0,n,unused);
 857                 return btQuaternion(n.x(),n.y(),n.z(),0.0f); // just pick any vector that is orthogonal to v0
 858         }
 859
 860         btScalar  s = btSqrt((1.0f + d) * 2.0f);
 861         btScalar rs = 1.0f / s;
 862
 863         return btQuaternion(c.getX()*rs,c.getY()*rs,c.getZ()*rs,s * 0.5f);
 864 }
 865
 866 SIMD_FORCE_INLINE btQuaternion
 867 shortestArcQuatNormalize2(btVector3& v0,btVector3& v1)
 868 {
 869         v0.normalize();
 870         v1.normalize();
 871         return shortestArcQuat(v0,v1);
 872 }
 873
 874 #endif //BT_SIMD__QUATERNION_H_
 875
 876
 877