src/LinearMath/btVector3.h

   1 /*
   2 Copyright (c) 2003-2006 Gino van den Bergen / Erwin Coumans  http://continuousphysics.com/Bullet/
   3
   4 This software is provided 'as-is', without any express or implied warranty.
   5 In no event will the authors be held liable for any damages arising from the use of this software.
   6 Permission is granted to anyone to use this software for any purpose,
   7 including commercial applications, and to alter it and redistribute it freely,
   8 subject to the following restrictions:
   9
  10 1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
  11 2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
  12 3. This notice may not be removed or altered from any source distribution.
  13 */
  14
  15
  16
  17 #ifndef BT_VECTOR3_H
  18 #define BT_VECTOR3_H
  19
  20 //#include <stdint.h>
  21 #include "btScalar.h"
  22 #include "btMinMax.h"
  23 #include "btAlignedAllocator.h"
  24
  25 #ifdef BT_USE_DOUBLE_PRECISION
  26 #define btVector3Data btVector3DoubleData
  27 #define btVector3DataName "btVector3DoubleData"
  28 #else
  29 #define btVector3Data btVector3FloatData
  30 #define btVector3DataName "btVector3FloatData"
  31 #endif //BT_USE_DOUBLE_PRECISION
  32
  33 #if defined BT_USE_SSE
  34
  35 //typedef  uint32_t __m128i __attribute__ ((vector_size(16)));
  36
  37 #ifdef _MSC_VER
  38 #pragma warning(disable: 4556) // value of intrinsic immediate argument '4294967239' is out of range '0 - 255'
  39 #endif
  40
  41
  42 #define BT_SHUFFLE(x,y,z,w) ((w)<<6 | (z)<<4 | (y)<<2 | (x))
  43 //#define bt_pshufd_ps( _a, _mask ) (__m128) _mm_shuffle_epi32((__m128i)(_a), (_mask) )
  44 #define bt_pshufd_ps( _a, _mask ) _mm_shuffle_ps((_a), (_a), (_mask) )
  45 #define bt_splat3_ps( _a, _i ) bt_pshufd_ps((_a), BT_SHUFFLE(_i,_i,_i, 3) )
  46 #define bt_splat_ps( _a, _i )  bt_pshufd_ps((_a), BT_SHUFFLE(_i,_i,_i,_i) )
  47
  48 #define btv3AbsiMask (_mm_set_epi32(0x00000000, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF))
  49 #define btvAbsMask (_mm_set_epi32( 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF))
  50 #define btvFFF0Mask (_mm_set_epi32(0x00000000, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF))
  51 #define btv3AbsfMask btCastiTo128f(btv3AbsiMask)
  52 #define btvFFF0fMask btCastiTo128f(btvFFF0Mask)
  53 #define btvxyzMaskf btvFFF0fMask
  54 #define btvAbsfMask btCastiTo128f(btvAbsMask)
  55
  56
  57
  58 const __m128 ATTRIBUTE_ALIGNED16(btvMzeroMask) = {-0.0f, -0.0f, -0.0f, -0.0f};
  59 const __m128 ATTRIBUTE_ALIGNED16(v1110) = {1.0f, 1.0f, 1.0f, 0.0f};
  60 const __m128 ATTRIBUTE_ALIGNED16(vHalf) = {0.5f, 0.5f, 0.5f, 0.5f};
  61 const __m128 ATTRIBUTE_ALIGNED16(v1_5)  = {1.5f, 1.5f, 1.5f, 1.5f};
  62
  63 #endif
  64
  65 #ifdef BT_USE_NEON
  66
  67 const float32x4_t ATTRIBUTE_ALIGNED16(btvMzeroMask) = (float32x4_t){-0.0f, -0.0f, -0.0f, -0.0f};
  68 const int32x4_t ATTRIBUTE_ALIGNED16(btvFFF0Mask) = (int32x4_t){0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x0};
  69 const int32x4_t ATTRIBUTE_ALIGNED16(btvAbsMask) = (int32x4_t){0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF};
  70 const int32x4_t ATTRIBUTE_ALIGNED16(btv3AbsMask) = (int32x4_t){0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x0};
  71
  72 #endif
  73
  74 /**@brief btVector3 can be used to represent 3D points and vectors.
  75  * It has an un-used w component to suit 16-byte alignment when btVector3 is stored in containers. This extra component can be used by derived classes (Quaternion?) or by user
  76  * Ideally, this class should be replaced by a platform optimized SIMD version that keeps the data in registers
  77  */
  78 ATTRIBUTE_ALIGNED16(class) btVector3
  79 {
  80 public:
  81
  82         BT_DECLARE_ALIGNED_ALLOCATOR();
  83
  84 #if defined (__SPU__) && defined (__CELLOS_LV2__)
  85                 btScalar        m_floats[4];
  86 public:
  87         SIMD_FORCE_INLINE const vec_float4&     get128() const
  88         {
  89                 return *((const vec_float4*)&m_floats[0]);
  90         }
  91 public:
  92 #else //__CELLOS_LV2__ __SPU__
  93     #if defined (BT_USE_SSE) || defined(BT_USE_NEON) // _WIN32 || ARM
  94         union {
  95             btSimdFloat4      mVec128;
  96             btScalar    m_floats[4];
  97         };
  98         SIMD_FORCE_INLINE       btSimdFloat4    get128() const
  99         {
 100             return mVec128;
 101         }
 102         SIMD_FORCE_INLINE       void    set128(btSimdFloat4 v128)
 103         {
 104             mVec128 = v128;
 105         }
 106     #else
 107         btScalar        m_floats[4];
 108     #endif
 109 #endif //__CELLOS_LV2__ __SPU__
 110
 111         public:
 112
 113   /**@brief No initialization constructor */
 114         SIMD_FORCE_INLINE btVector3()
 115         {
 116
 117         }
 118
 119
 120
 121   /**@brief Constructor from scalars
 122    * @param x X value
 123    * @param y Y value
 124    * @param z Z value
 125    */
 126         SIMD_FORCE_INLINE btVector3(const btScalar& _x, const btScalar& _y, const btScalar& _z)
 127         {
 128                 m_floats[0] = _x;
 129                 m_floats[1] = _y;
 130                 m_floats[2] = _z;
 131                 m_floats[3] = btScalar(0.f);
 132         }
 133
 134 #if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE) )|| defined (BT_USE_NEON)
 135         // Set Vector
 136         SIMD_FORCE_INLINE btVector3( btSimdFloat4 v)
 137         {
 138                 mVec128 = v;
 139         }
 140
 141         // Copy constructor
 142         SIMD_FORCE_INLINE btVector3(const btVector3& rhs)
 143         {
 144                 mVec128 = rhs.mVec128;
 145         }
 146
 147         // Assignment Operator
 148         SIMD_FORCE_INLINE btVector3&
 149         operator=(const btVector3& v)
 150         {
 151                 mVec128 = v.mVec128;
 152
 153                 return *this;
 154         }
 155 #endif // #if defined (BT_USE_SSE_IN_API) || defined (BT_USE_NEON)
 156
 157 /**@brief Add a vector to this one
 158  * @param The vector to add to this one */
 159         SIMD_FORCE_INLINE btVector3& operator+=(const btVector3& v)
 160         {
 161 #if defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
 162                 mVec128 = _mm_add_ps(mVec128, v.mVec128);
 163 #elif defined(BT_USE_NEON)
 164                 mVec128 = vaddq_f32(mVec128, v.mVec128);
 165 #else
 166                 m_floats[0] += v.m_floats[0];
 167                 m_floats[1] += v.m_floats[1];
 168                 m_floats[2] += v.m_floats[2];
 169 #endif
 170                 return *this;
 171         }
 172
 173
 174   /**@brief Subtract a vector from this one
 175    * @param The vector to subtract */
 176         SIMD_FORCE_INLINE btVector3& operator-=(const btVector3& v)
 177         {
 178 #if defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
 179                 mVec128 = _mm_sub_ps(mVec128, v.mVec128);
 180 #elif defined(BT_USE_NEON)
 181                 mVec128 = vsubq_f32(mVec128, v.mVec128);
 182 #else
 183                 m_floats[0] -= v.m_floats[0];
 184                 m_floats[1] -= v.m_floats[1];
 185                 m_floats[2] -= v.m_floats[2];
 186 #endif
 187                 return *this;
 188         }
 189
 190   /**@brief Scale the vector
 191    * @param s Scale factor */
 192         SIMD_FORCE_INLINE btVector3& operator*=(const btScalar& s)
 193         {
 194 #if defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
 195                 __m128  vs = _mm_load_ss(&s);   //      (S 0 0 0)
 196                 vs = bt_pshufd_ps(vs, 0x80);    //      (S S S 0.0)
 197                 mVec128 = _mm_mul_ps(mVec128, vs);
 198 #elif defined(BT_USE_NEON)
 199                 mVec128 = vmulq_n_f32(mVec128, s);
 200 #else
 201                 m_floats[0] *= s;
 202                 m_floats[1] *= s;
 203                 m_floats[2] *= s;
 204 #endif
 205                 return *this;
 206         }
 207
 208   /**@brief Inversely scale the vector
 209    * @param s Scale factor to divide by */
 210         SIMD_FORCE_INLINE btVector3& operator/=(const btScalar& s)
 211         {
 212                 btFullAssert(s != btScalar(0.0));
 213
 214 #if 0 //defined(BT_USE_SSE_IN_API)
 215 // this code is not faster !
 216                 __m128 vs = _mm_load_ss(&s);
 217                 vs = _mm_div_ss(v1110, vs);
 218                 vs = bt_pshufd_ps(vs, 0x00);    //      (S S S S)
 219
 220                 mVec128 = _mm_mul_ps(mVec128, vs);
 221
 222                 return *this;
 223 #else
 224                 return *this *= btScalar(1.0) / s;
 225 #endif
 226         }
 227
 228   /**@brief Return the dot product
 229    * @param v The other vector in the dot product */
 230         SIMD_FORCE_INLINE btScalar dot(const btVector3& v) const
 231         {
 232 #if defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
 233                 __m128 vd = _mm_mul_ps(mVec128, v.mVec128);
 234                 __m128 z = _mm_movehl_ps(vd, vd);
 235                 __m128 y = _mm_shuffle_ps(vd, vd, 0x55);
 236                 vd = _mm_add_ss(vd, y);
 237                 vd = _mm_add_ss(vd, z);
 238                 return _mm_cvtss_f32(vd);
 239 #elif defined(BT_USE_NEON)
 240                 float32x4_t vd = vmulq_f32(mVec128, v.mVec128);
 241                 float32x2_t x = vpadd_f32(vget_low_f32(vd), vget_low_f32(vd));
 242                 x = vadd_f32(x, vget_high_f32(vd));
 243                 return vget_lane_f32(x, 0);
 244 #else
 245                 return  m_floats[0] * v.m_floats[0] +
 246                                 m_floats[1] * v.m_floats[1] +
 247                                 m_floats[2] * v.m_floats[2];
 248 #endif
 249         }
 250
 251   /**@brief Return the length of the vector squared */
 252         SIMD_FORCE_INLINE btScalar length2() const
 253         {
 254                 return dot(*this);
 255         }
 256
 257   /**@brief Return the length of the vector */
 258         SIMD_FORCE_INLINE btScalar length() const
 259         {
 260                 return btSqrt(length2());
 261         }
 262
 263   /**@brief Return the distance squared between the ends of this and another vector
 264    * This is symantically treating the vector like a point */
 265         SIMD_FORCE_INLINE btScalar distance2(const btVector3& v) const;
 266
 267   /**@brief Return the distance between the ends of this and another vector
 268    * This is symantically treating the vector like a point */
 269         SIMD_FORCE_INLINE btScalar distance(const btVector3& v) const;
 270
 271         SIMD_FORCE_INLINE btVector3& safeNormalize()
 272         {
 273                 btVector3 absVec = this->absolute();
 274                 int maxIndex = absVec.maxAxis();
 275                 if (absVec[maxIndex]>0)
 276                 {
 277                         *this /= absVec[maxIndex];
 278                         return *this /= length();
 279                 }
 280                 setValue(1,0,0);
 281                 return *this;
 282         }
 283
 284   /**@brief Normalize this vector
 285    * x^2 + y^2 + z^2 = 1 */
 286         SIMD_FORCE_INLINE btVector3& normalize()
 287         {
 288 #if defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
 289         // dot product first
 290                 __m128 vd = _mm_mul_ps(mVec128, mVec128);
 291                 __m128 z = _mm_movehl_ps(vd, vd);
 292                 __m128 y = _mm_shuffle_ps(vd, vd, 0x55);
 293                 vd = _mm_add_ss(vd, y);
 294                 vd = _mm_add_ss(vd, z);
 295
 296         #if 0
 297         vd = _mm_sqrt_ss(vd);
 298                 vd = _mm_div_ss(v1110, vd);
 299                 vd = bt_splat_ps(vd, 0x80);
 300                 mVec128 = _mm_mul_ps(mVec128, vd);
 301         #else
 302
 303         // NR step 1/sqrt(x) - vd is x, y is output
 304         y = _mm_rsqrt_ss(vd); // estimate
 305
 306         //  one step NR
 307         z = v1_5;
 308         vd = _mm_mul_ss(vd, vHalf); // vd * 0.5
 309         //x2 = vd;
 310         vd = _mm_mul_ss(vd, y); // vd * 0.5 * y0
 311         vd = _mm_mul_ss(vd, y); // vd * 0.5 * y0 * y0
 312         z = _mm_sub_ss(z, vd);  // 1.5 - vd * 0.5 * y0 * y0
 313
 314         y = _mm_mul_ss(y, z);   // y0 * (1.5 - vd * 0.5 * y0 * y0)
 315
 316                 y = bt_splat_ps(y, 0x80);
 317                 mVec128 = _mm_mul_ps(mVec128, y);
 318
 319         #endif
 320
 321
 322                 return *this;
 323 #else
 324                 return *this /= length();
 325 #endif
 326         }
 327
 328   /**@brief Return a normalized version of this vector */
 329         SIMD_FORCE_INLINE btVector3 normalized() const;
 330
 331   /**@brief Return a rotated version of this vector
 332    * @param wAxis The axis to rotate about
 333    * @param angle The angle to rotate by */
 334         SIMD_FORCE_INLINE btVector3 rotate( const btVector3& wAxis, const btScalar angle ) const;
 335
 336   /**@brief Return the angle between this and another vector
 337    * @param v The other vector */
 338         SIMD_FORCE_INLINE btScalar angle(const btVector3& v) const
 339         {
 340                 btScalar s = btSqrt(length2() * v.length2());
 341                 btFullAssert(s != btScalar(0.0));
 342                 return btAcos(dot(v) / s);
 343         }
 344
 345   /**@brief Return a vector will the absolute values of each element */
 346         SIMD_FORCE_INLINE btVector3 absolute() const
 347         {
 348 #if defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
 349                 return btVector3(_mm_and_ps(mVec128, btv3AbsfMask));
 350 #elif defined(BT_USE_NEON)
 351                 return btVector3(vabsq_f32(mVec128));
 352 #else
 353                 return btVector3(
 354                         btFabs(m_floats[0]),
 355                         btFabs(m_floats[1]),
 356                         btFabs(m_floats[2]));
 357 #endif
 358         }
 359
 360   /**@brief Return the cross product between this and another vector
 361    * @param v The other vector */
 362         SIMD_FORCE_INLINE btVector3 cross(const btVector3& v) const
 363         {
 364 #if defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
 365                 __m128  T, V;
 366
 367                 T = bt_pshufd_ps(mVec128, BT_SHUFFLE(1, 2, 0, 3));      //      (Y Z X 0)
 368                 V = bt_pshufd_ps(v.mVec128, BT_SHUFFLE(1, 2, 0, 3));    //      (Y Z X 0)
 369
 370                 V = _mm_mul_ps(V, mVec128);
 371                 T = _mm_mul_ps(T, v.mVec128);
 372                 V = _mm_sub_ps(V, T);
 373
 374                 V = bt_pshufd_ps(V, BT_SHUFFLE(1, 2, 0, 3));
 375                 return btVector3(V);
 376 #elif defined(BT_USE_NEON)
 377                 float32x4_t T, V;
 378                 // form (Y, Z, X, _) of mVec128 and v.mVec128
 379                 float32x2_t Tlow = vget_low_f32(mVec128);
 380                 float32x2_t Vlow = vget_low_f32(v.mVec128);
 381                 T = vcombine_f32(vext_f32(Tlow, vget_high_f32(mVec128), 1), Tlow);
 382                 V = vcombine_f32(vext_f32(Vlow, vget_high_f32(v.mVec128), 1), Vlow);
 383
 384                 V = vmulq_f32(V, mVec128);
 385                 T = vmulq_f32(T, v.mVec128);
 386                 V = vsubq_f32(V, T);
 387                 Vlow = vget_low_f32(V);
 388                 // form (Y, Z, X, _);
 389                 V = vcombine_f32(vext_f32(Vlow, vget_high_f32(V), 1), Vlow);
 390                 V = (float32x4_t)vandq_s32((int32x4_t)V, btvFFF0Mask);
 391
 392                 return btVector3(V);
 393 #else
 394                 return btVector3(
 395                         m_floats[1] * v.m_floats[2] - m_floats[2] * v.m_floats[1],
 396                         m_floats[2] * v.m_floats[0] - m_floats[0] * v.m_floats[2],
 397                         m_floats[0] * v.m_floats[1] - m_floats[1] * v.m_floats[0]);
 398 #endif
 399         }
 400
 401         SIMD_FORCE_INLINE btScalar triple(const btVector3& v1, const btVector3& v2) const
 402         {
 403 #if defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
 404                 // cross:
 405                 __m128 T = _mm_shuffle_ps(v1.mVec128, v1.mVec128, BT_SHUFFLE(1, 2, 0, 3));      //      (Y Z X 0)
 406                 __m128 V = _mm_shuffle_ps(v2.mVec128, v2.mVec128, BT_SHUFFLE(1, 2, 0, 3));      //      (Y Z X 0)
 407
 408                 V = _mm_mul_ps(V, v1.mVec128);
 409                 T = _mm_mul_ps(T, v2.mVec128);
 410                 V = _mm_sub_ps(V, T);
 411
 412                 V = _mm_shuffle_ps(V, V, BT_SHUFFLE(1, 2, 0, 3));
 413
 414                 // dot:
 415                 V = _mm_mul_ps(V, mVec128);
 416                 __m128 z = _mm_movehl_ps(V, V);
 417                 __m128 y = _mm_shuffle_ps(V, V, 0x55);
 418                 V = _mm_add_ss(V, y);
 419                 V = _mm_add_ss(V, z);
 420                 return _mm_cvtss_f32(V);
 421
 422 #elif defined(BT_USE_NEON)
 423                 // cross:
 424                 float32x4_t T, V;
 425                 // form (Y, Z, X, _) of mVec128 and v.mVec128
 426                 float32x2_t Tlow = vget_low_f32(v1.mVec128);
 427                 float32x2_t Vlow = vget_low_f32(v2.mVec128);
 428                 T = vcombine_f32(vext_f32(Tlow, vget_high_f32(v1.mVec128), 1), Tlow);
 429                 V = vcombine_f32(vext_f32(Vlow, vget_high_f32(v2.mVec128), 1), Vlow);
 430
 431                 V = vmulq_f32(V, v1.mVec128);
 432                 T = vmulq_f32(T, v2.mVec128);
 433                 V = vsubq_f32(V, T);
 434                 Vlow = vget_low_f32(V);
 435                 // form (Y, Z, X, _);
 436                 V = vcombine_f32(vext_f32(Vlow, vget_high_f32(V), 1), Vlow);
 437
 438                 // dot:
 439                 V = vmulq_f32(mVec128, V);
 440                 float32x2_t x = vpadd_f32(vget_low_f32(V), vget_low_f32(V));
 441                 x = vadd_f32(x, vget_high_f32(V));
 442                 return vget_lane_f32(x, 0);
 443 #else
 444                 return
 445                         m_floats[0] * (v1.m_floats[1] * v2.m_floats[2] - v1.m_floats[2] * v2.m_floats[1]) +
 446                         m_floats[1] * (v1.m_floats[2] * v2.m_floats[0] - v1.m_floats[0] * v2.m_floats[2]) +
 447                         m_floats[2] * (v1.m_floats[0] * v2.m_floats[1] - v1.m_floats[1] * v2.m_floats[0]);
 448 #endif
 449         }
 450
 451   /**@brief Return the axis with the smallest value
 452    * Note return values are 0,1,2 for x, y, or z */
 453         SIMD_FORCE_INLINE int minAxis() const
 454         {
 455                 return m_floats[0] < m_floats[1] ? (m_floats[0] <m_floats[2] ? 0 : 2) : (m_floats[1] <m_floats[2] ? 1 : 2);
 456         }
 457
 458   /**@brief Return the axis with the largest value
 459    * Note return values are 0,1,2 for x, y, or z */
 460         SIMD_FORCE_INLINE int maxAxis() const
 461         {
 462                 return m_floats[0] < m_floats[1] ? (m_floats[1] <m_floats[2] ? 2 : 1) : (m_floats[0] <m_floats[2] ? 2 : 0);
 463         }
 464
 465         SIMD_FORCE_INLINE int furthestAxis() const
 466         {
 467                 return absolute().minAxis();
 468         }
 469
 470         SIMD_FORCE_INLINE int closestAxis() const
 471         {
 472                 return absolute().maxAxis();
 473         }
 474
 475
 476         SIMD_FORCE_INLINE void setInterpolate3(const btVector3& v0, const btVector3& v1, btScalar rt)
 477         {
 478 #if defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
 479                 __m128  vrt = _mm_load_ss(&rt); //      (rt 0 0 0)
 480                 btScalar s = btScalar(1.0) - rt;
 481                 __m128  vs = _mm_load_ss(&s);   //      (S 0 0 0)
 482                 vs = bt_pshufd_ps(vs, 0x80);    //      (S S S 0.0)
 483                 __m128 r0 = _mm_mul_ps(v0.mVec128, vs);
 484                 vrt = bt_pshufd_ps(vrt, 0x80);  //      (rt rt rt 0.0)
 485                 __m128 r1 = _mm_mul_ps(v1.mVec128, vrt);
 486                 __m128 tmp3 = _mm_add_ps(r0,r1);
 487                 mVec128 = tmp3;
 488 #elif defined(BT_USE_NEON)
 489                 mVec128 = vsubq_f32(v1.mVec128, v0.mVec128);
 490                 mVec128 = vmulq_n_f32(mVec128, rt);
 491                 mVec128 = vaddq_f32(mVec128, v0.mVec128);
 492 #else
 493                 btScalar s = btScalar(1.0) - rt;
 494                 m_floats[0] = s * v0.m_floats[0] + rt * v1.m_floats[0];
 495                 m_floats[1] = s * v0.m_floats[1] + rt * v1.m_floats[1];
 496                 m_floats[2] = s * v0.m_floats[2] + rt * v1.m_floats[2];
 497                 //don't do the unused w component
 498                 //              m_co[3] = s * v0[3] + rt * v1[3];
 499 #endif
 500         }
 501
 502   /**@brief Return the linear interpolation between this and another vector
 503    * @param v The other vector
 504    * @param t The ration of this to v (t = 0 => return this, t=1 => return other) */
 505         SIMD_FORCE_INLINE btVector3 lerp(const btVector3& v, const btScalar& t) const
 506         {
 507 #if defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
 508                 __m128  vt = _mm_load_ss(&t);   //      (t 0 0 0)
 509                 vt = bt_pshufd_ps(vt, 0x80);    //      (rt rt rt 0.0)
 510                 __m128 vl = _mm_sub_ps(v.mVec128, mVec128);
 511                 vl = _mm_mul_ps(vl, vt);
 512                 vl = _mm_add_ps(vl, mVec128);
 513
 514                 return btVector3(vl);
 515 #elif defined(BT_USE_NEON)
 516                 float32x4_t vl = vsubq_f32(v.mVec128, mVec128);
 517                 vl = vmulq_n_f32(vl, t);
 518                 vl = vaddq_f32(vl, mVec128);
 519
 520                 return btVector3(vl);
 521 #else
 522                 return
 523                         btVector3(      m_floats[0] + (v.m_floats[0] - m_floats[0]) * t,
 524                                                 m_floats[1] + (v.m_floats[1] - m_floats[1]) * t,
 525                                                 m_floats[2] + (v.m_floats[2] - m_floats[2]) * t);
 526 #endif
 527         }
 528
 529   /**@brief Elementwise multiply this vector by the other
 530    * @param v The other vector */
 531         SIMD_FORCE_INLINE btVector3& operator*=(const btVector3& v)
 532         {
 533 #if defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
 534                 mVec128 = _mm_mul_ps(mVec128, v.mVec128);
 535 #elif defined(BT_USE_NEON)
 536                 mVec128 = vmulq_f32(mVec128, v.mVec128);
 537 #else
 538                 m_floats[0] *= v.m_floats[0];
 539                 m_floats[1] *= v.m_floats[1];
 540                 m_floats[2] *= v.m_floats[2];
 541 #endif
 542                 return *this;
 543         }
 544
 545          /**@brief Return the x value */
 546                 SIMD_FORCE_INLINE const btScalar& getX() const { return m_floats[0]; }
 547   /**@brief Return the y value */
 548                 SIMD_FORCE_INLINE const btScalar& getY() const { return m_floats[1]; }
 549   /**@brief Return the z value */
 550                 SIMD_FORCE_INLINE const btScalar& getZ() const { return m_floats[2]; }
 551   /**@brief Set the x value */
 552                 SIMD_FORCE_INLINE void  setX(btScalar _x) { m_floats[0] = _x;};
 553   /**@brief Set the y value */
 554                 SIMD_FORCE_INLINE void  setY(btScalar _y) { m_floats[1] = _y;};
 555   /**@brief Set the z value */
 556                 SIMD_FORCE_INLINE void  setZ(btScalar _z) { m_floats[2] = _z;};
 557   /**@brief Set the w value */
 558                 SIMD_FORCE_INLINE void  setW(btScalar _w) { m_floats[3] = _w;};
 559   /**@brief Return the x value */
 560                 SIMD_FORCE_INLINE const btScalar& x() const { return m_floats[0]; }
 561   /**@brief Return the y value */
 562                 SIMD_FORCE_INLINE const btScalar& y() const { return m_floats[1]; }
 563   /**@brief Return the z value */
 564                 SIMD_FORCE_INLINE const btScalar& z() const { return m_floats[2]; }
 565   /**@brief Return the w value */
 566                 SIMD_FORCE_INLINE const btScalar& w() const { return m_floats[3]; }
 567
 568         //SIMD_FORCE_INLINE btScalar&       operator[](int i)       { return (&m_floats[0])[i]; }
 569         //SIMD_FORCE_INLINE const btScalar& operator[](int i) const { return (&m_floats[0])[i]; }
 570         ///operator btScalar*() replaces operator[], using implicit conversion. We added operator != and operator == to avoid pointer comparisons.
 571         SIMD_FORCE_INLINE       operator       btScalar *()       { return &m_floats[0]; }
 572         SIMD_FORCE_INLINE       operator const btScalar *() const { return &m_floats[0]; }
 573
 574         SIMD_FORCE_INLINE       bool    operator==(const btVector3& other) const
 575         {
 576 #if defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
 577         return (0xf == _mm_movemask_ps((__m128)_mm_cmpeq_ps(mVec128, other.mVec128)));
 578 #else
 579                 return ((m_floats[3]==other.m_floats[3]) &&
 580                 (m_floats[2]==other.m_floats[2]) &&
 581                 (m_floats[1]==other.m_floats[1]) &&
 582                 (m_floats[0]==other.m_floats[0]));
 583 #endif
 584         }
 585
 586         SIMD_FORCE_INLINE       bool    operator!=(const btVector3& other) const
 587         {
 588                 return !(*this == other);
 589         }
 590
 591   /**@brief Set each element to the max of the current values and the values of another btVector3
 592    * @param other The other btVector3 to compare with
 593    */
 594         SIMD_FORCE_INLINE void  setMax(const btVector3& other)
 595         {
 596 #if defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
 597                 mVec128 = _mm_max_ps(mVec128, other.mVec128);
 598 #elif defined(BT_USE_NEON)
 599                 mVec128 = vmaxq_f32(mVec128, other.mVec128);
 600 #else
 601                 btSetMax(m_floats[0], other.m_floats[0]);
 602                 btSetMax(m_floats[1], other.m_floats[1]);
 603                 btSetMax(m_floats[2], other.m_floats[2]);
 604                 btSetMax(m_floats[3], other.w());
 605 #endif
 606         }
 607
 608   /**@brief Set each element to the min of the current values and the values of another btVector3
 609    * @param other The other btVector3 to compare with
 610    */
 611         SIMD_FORCE_INLINE void  setMin(const btVector3& other)
 612         {
 613 #if defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
 614                 mVec128 = _mm_min_ps(mVec128, other.mVec128);
 615 #elif defined(BT_USE_NEON)
 616                 mVec128 = vminq_f32(mVec128, other.mVec128);
 617 #else
 618                 btSetMin(m_floats[0], other.m_floats[0]);
 619                 btSetMin(m_floats[1], other.m_floats[1]);
 620                 btSetMin(m_floats[2], other.m_floats[2]);
 621                 btSetMin(m_floats[3], other.w());
 622 #endif
 623         }
 624
 625         SIMD_FORCE_INLINE void  setValue(const btScalar& _x, const btScalar& _y, const btScalar& _z)
 626         {
 627                 m_floats[0]=_x;
 628                 m_floats[1]=_y;
 629                 m_floats[2]=_z;
 630                 m_floats[3] = btScalar(0.f);
 631         }
 632
 633         void    getSkewSymmetricMatrix(btVector3* v0,btVector3* v1,btVector3* v2) const
 634         {
 635 #if defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
 636
 637                 __m128 V  = _mm_and_ps(mVec128, btvFFF0fMask);
 638                 __m128 V0 = _mm_xor_ps(btvMzeroMask, V);
 639                 __m128 V2 = _mm_movelh_ps(V0, V);
 640
 641                 __m128 V1 = _mm_shuffle_ps(V, V0, 0xCE);
 642
 643         V0 = _mm_shuffle_ps(V0, V, 0xDB);
 644                 V2 = _mm_shuffle_ps(V2, V, 0xF9);
 645
 646                 v0->mVec128 = V0;
 647                 v1->mVec128 = V1;
 648                 v2->mVec128 = V2;
 649 #else
 650                 v0->setValue(0.         ,-z()           ,y());
 651                 v1->setValue(z()        ,0.                     ,-x());
 652                 v2->setValue(-y()       ,x()    ,0.);
 653 #endif
 654         }
 655
 656         void setZero()
 657         {
 658 #if defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
 659                 mVec128 = (__m128)_mm_xor_ps(mVec128, mVec128);
 660 #elif defined(BT_USE_NEON)
 661                 int32x4_t vi = vdupq_n_s32(0);
 662                 mVec128 = vreinterpretq_f32_s32(vi);
 663 #else
 664                 setValue(btScalar(0.),btScalar(0.),btScalar(0.));
 665 #endif
 666         }
 667
 668         SIMD_FORCE_INLINE bool isZero() const
 669         {
 670                 return m_floats[0] == btScalar(0) && m_floats[1] == btScalar(0) && m_floats[2] == btScalar(0);
 671         }
 672
 673         SIMD_FORCE_INLINE bool fuzzyZero() const
 674         {
 675                 return length2() < SIMD_EPSILON;
 676         }
 677
 678         SIMD_FORCE_INLINE       void    serialize(struct        btVector3Data& dataOut) const;
 679
 680         SIMD_FORCE_INLINE       void    deSerialize(const struct        btVector3Data& dataIn);
 681
 682         SIMD_FORCE_INLINE       void    serializeFloat(struct   btVector3FloatData& dataOut) const;
 683
 684         SIMD_FORCE_INLINE       void    deSerializeFloat(const struct   btVector3FloatData& dataIn);
 685
 686         SIMD_FORCE_INLINE       void    serializeDouble(struct  btVector3DoubleData& dataOut) const;
 687
 688         SIMD_FORCE_INLINE       void    deSerializeDouble(const struct  btVector3DoubleData& dataIn);
 689
 690         /**@brief returns index of maximum dot product between this and vectors in array[]
 691          * @param array The other vectors
 692          * @param array_count The number of other vectors
 693          * @param dotOut The maximum dot product */
 694         SIMD_FORCE_INLINE   long    maxDot( const btVector3 *array, long array_count, btScalar &dotOut ) const;
 695
 696         /**@brief returns index of minimum dot product between this and vectors in array[]
 697          * @param array The other vectors
 698          * @param array_count The number of other vectors
 699          * @param dotOut The minimum dot product */
 700         SIMD_FORCE_INLINE   long    minDot( const btVector3 *array, long array_count, btScalar &dotOut ) const;
 701
 702     /* create a vector as  btVector3( this->dot( btVector3 v0 ), this->dot( btVector3 v1), this->dot( btVector3 v2 ))  */
 703     SIMD_FORCE_INLINE btVector3  dot3( const btVector3 &v0, const btVector3 &v1, const btVector3 &v2 ) const
 704     {
 705 #if defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
 706
 707         __m128 a0 = _mm_mul_ps( v0.mVec128, this->mVec128 );
 708         __m128 a1 = _mm_mul_ps( v1.mVec128, this->mVec128 );
 709         __m128 a2 = _mm_mul_ps( v2.mVec128, this->mVec128 );
 710         __m128 b0 = _mm_unpacklo_ps( a0, a1 );
 711         __m128 b1 = _mm_unpackhi_ps( a0, a1 );
 712         __m128 b2 = _mm_unpacklo_ps( a2, _mm_setzero_ps() );
 713         __m128 r = _mm_movelh_ps( b0, b2 );
 714         r = _mm_add_ps( r, _mm_movehl_ps( b2, b0 ));
 715         a2 = _mm_and_ps( a2, btvxyzMaskf);
 716         r = _mm_add_ps( r, btCastdTo128f (_mm_move_sd( btCastfTo128d(a2), btCastfTo128d(b1) )));
 717         return btVector3(r);
 718
 719 #elif defined(BT_USE_NEON)
 720         static const uint32x4_t xyzMask = (const uint32x4_t){ -1, -1, -1, 0 };
 721         float32x4_t a0 = vmulq_f32( v0.mVec128, this->mVec128);
 722         float32x4_t a1 = vmulq_f32( v1.mVec128, this->mVec128);
 723         float32x4_t a2 = vmulq_f32( v2.mVec128, this->mVec128);
 724         float32x2x2_t zLo = vtrn_f32( vget_high_f32(a0), vget_high_f32(a1));
 725         a2 = (float32x4_t) vandq_u32((uint32x4_t) a2, xyzMask );
 726         float32x2_t b0 = vadd_f32( vpadd_f32( vget_low_f32(a0), vget_low_f32(a1)), zLo.val[0] );
 727         float32x2_t b1 = vpadd_f32( vpadd_f32( vget_low_f32(a2), vget_high_f32(a2)), vdup_n_f32(0.0f));
 728         return btVector3( vcombine_f32(b0, b1) );
 729 #else
 730                 return btVector3( dot(v0), dot(v1), dot(v2));
 731 #endif
 732     }
 733 };
 734
 735 /**@brief Return the sum of two vectors (Point symantics)*/
 736 SIMD_FORCE_INLINE btVector3
 737 operator+(const btVector3& v1, const btVector3& v2)
 738 {
 739 #if defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
 740         return btVector3(_mm_add_ps(v1.mVec128, v2.mVec128));
 741 #elif defined(BT_USE_NEON)
 742         return btVector3(vaddq_f32(v1.mVec128, v2.mVec128));
 743 #else
 744         return btVector3(
 745                         v1.m_floats[0] + v2.m_floats[0],
 746                         v1.m_floats[1] + v2.m_floats[1],
 747                         v1.m_floats[2] + v2.m_floats[2]);
 748 #endif
 749 }
 750
 751 /**@brief Return the elementwise product of two vectors */
 752 SIMD_FORCE_INLINE btVector3
 753 operator*(const btVector3& v1, const btVector3& v2)
 754 {
 755 #if defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
 756         return btVector3(_mm_mul_ps(v1.mVec128, v2.mVec128));
 757 #elif defined(BT_USE_NEON)
 758         return btVector3(vmulq_f32(v1.mVec128, v2.mVec128));
 759 #else
 760         return btVector3(
 761                         v1.m_floats[0] * v2.m_floats[0],
 762                         v1.m_floats[1] * v2.m_floats[1],
 763                         v1.m_floats[2] * v2.m_floats[2]);
 764 #endif
 765 }
 766
 767 /**@brief Return the difference between two vectors */
 768 SIMD_FORCE_INLINE btVector3
 769 operator-(const btVector3& v1, const btVector3& v2)
 770 {
 771 #if (defined(BT_USE_SSE_IN_API)  && defined(BT_USE_SSE))
 772
 773         //      without _mm_and_ps this code causes slowdown in Concave moving
 774         __m128 r = _mm_sub_ps(v1.mVec128, v2.mVec128);
 775         return btVector3(_mm_and_ps(r, btvFFF0fMask));
 776 #elif defined(BT_USE_NEON)
 777         float32x4_t r = vsubq_f32(v1.mVec128, v2.mVec128);
 778         return btVector3((float32x4_t)vandq_s32((int32x4_t)r, btvFFF0Mask));
 779 #else
 780         return btVector3(
 781                         v1.m_floats[0] - v2.m_floats[0],
 782                         v1.m_floats[1] - v2.m_floats[1],
 783                         v1.m_floats[2] - v2.m_floats[2]);
 784 #endif
 785 }
 786
 787 /**@brief Return the negative of the vector */
 788 SIMD_FORCE_INLINE btVector3
 789 operator-(const btVector3& v)
 790 {
 791 #if (defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE))
 792         __m128 r = _mm_xor_ps(v.mVec128, btvMzeroMask);
 793         return btVector3(_mm_and_ps(r, btvFFF0fMask));
 794 #elif defined(BT_USE_NEON)
 795         return btVector3((btSimdFloat4)veorq_s32((int32x4_t)v.mVec128, (int32x4_t)btvMzeroMask));
 796 #else
 797         return btVector3(-v.m_floats[0], -v.m_floats[1], -v.m_floats[2]);
 798 #endif
 799 }
 800
 801 /**@brief Return the vector scaled by s */
 802 SIMD_FORCE_INLINE btVector3
 803 operator*(const btVector3& v, const btScalar& s)
 804 {
 805 #if defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
 806         __m128  vs = _mm_load_ss(&s);   //      (S 0 0 0)
 807         vs = bt_pshufd_ps(vs, 0x80);    //      (S S S 0.0)
 808         return btVector3(_mm_mul_ps(v.mVec128, vs));
 809 #elif defined(BT_USE_NEON)
 810         float32x4_t r = vmulq_n_f32(v.mVec128, s);
 811         return btVector3((float32x4_t)vandq_s32((int32x4_t)r, btvFFF0Mask));
 812 #else
 813         return btVector3(v.m_floats[0] * s, v.m_floats[1] * s, v.m_floats[2] * s);
 814 #endif
 815 }
 816
 817 /**@brief Return the vector scaled by s */
 818 SIMD_FORCE_INLINE btVector3
 819 operator*(const btScalar& s, const btVector3& v)
 820 {
 821         return v * s;
 822 }
 823
 824 /**@brief Return the vector inversely scaled by s */
 825 SIMD_FORCE_INLINE btVector3
 826 operator/(const btVector3& v, const btScalar& s)
 827 {
 828         btFullAssert(s != btScalar(0.0));
 829 #if 0 //defined(BT_USE_SSE_IN_API)
 830 // this code is not faster !
 831         __m128 vs = _mm_load_ss(&s);
 832     vs = _mm_div_ss(v1110, vs);
 833         vs = bt_pshufd_ps(vs, 0x00);    //      (S S S S)
 834
 835         return btVector3(_mm_mul_ps(v.mVec128, vs));
 836 #else
 837         return v * (btScalar(1.0) / s);
 838 #endif
 839 }
 840
 841 /**@brief Return the vector inversely scaled by s */
 842 SIMD_FORCE_INLINE btVector3
 843 operator/(const btVector3& v1, const btVector3& v2)
 844 {
 845 #if (defined(BT_USE_SSE_IN_API)&& defined (BT_USE_SSE))
 846         __m128 vec = _mm_div_ps(v1.mVec128, v2.mVec128);
 847         vec = _mm_and_ps(vec, btvFFF0fMask);
 848         return btVector3(vec);
 849 #elif defined(BT_USE_NEON)
 850         float32x4_t x, y, v, m;
 851
 852         x = v1.mVec128;
 853         y = v2.mVec128;
 854
 855         v = vrecpeq_f32(y);                     // v ~ 1/y
 856         m = vrecpsq_f32(y, v);          // m = (2-v*y)
 857         v = vmulq_f32(v, m);            // vv = v*m ~~ 1/y
 858         m = vrecpsq_f32(y, v);          // mm = (2-vv*y)
 859         v = vmulq_f32(v, x);            // x*vv
 860         v = vmulq_f32(v, m);            // (x*vv)*(2-vv*y) = x*(vv(2-vv*y)) ~~~ x/y
 861
 862         return btVector3(v);
 863 #else
 864         return btVector3(
 865                         v1.m_floats[0] / v2.m_floats[0],
 866                         v1.m_floats[1] / v2.m_floats[1],
 867                         v1.m_floats[2] / v2.m_floats[2]);
 868 #endif
 869 }
 870
 871 /**@brief Return the dot product between two vectors */
 872 SIMD_FORCE_INLINE btScalar
 873 btDot(const btVector3& v1, const btVector3& v2)
 874 {
 875         return v1.dot(v2);
 876 }
 877
 878
 879 /**@brief Return the distance squared between two vectors */
 880 SIMD_FORCE_INLINE btScalar
 881 btDistance2(const btVector3& v1, const btVector3& v2)
 882 {
 883         return v1.distance2(v2);
 884 }
 885
 886
 887 /**@brief Return the distance between two vectors */
 888 SIMD_FORCE_INLINE btScalar
 889 btDistance(const btVector3& v1, const btVector3& v2)
 890 {
 891         return v1.distance(v2);
 892 }
 893
 894 /**@brief Return the angle between two vectors */
 895 SIMD_FORCE_INLINE btScalar
 896 btAngle(const btVector3& v1, const btVector3& v2)
 897 {
 898         return v1.angle(v2);
 899 }
 900
 901 /**@brief Return the cross product of two vectors */
 902 SIMD_FORCE_INLINE btVector3
 903 btCross(const btVector3& v1, const btVector3& v2)
 904 {
 905         return v1.cross(v2);
 906 }
 907
 908 SIMD_FORCE_INLINE btScalar
 909 btTriple(const btVector3& v1, const btVector3& v2, const btVector3& v3)
 910 {
 911         return v1.triple(v2, v3);
 912 }
 913
 914 /**@brief Return the linear interpolation between two vectors
 915  * @param v1 One vector
 916  * @param v2 The other vector
 917  * @param t The ration of this to v (t = 0 => return v1, t=1 => return v2) */
 918 SIMD_FORCE_INLINE btVector3
 919 lerp(const btVector3& v1, const btVector3& v2, const btScalar& t)
 920 {
 921         return v1.lerp(v2, t);
 922 }
 923
 924
 925
 926 SIMD_FORCE_INLINE btScalar btVector3::distance2(const btVector3& v) const
 927 {
 928         return (v - *this).length2();
 929 }
 930
 931 SIMD_FORCE_INLINE btScalar btVector3::distance(const btVector3& v) const
 932 {
 933         return (v - *this).length();
 934 }
 935
 936 SIMD_FORCE_INLINE btVector3 btVector3::normalized() const
 937 {
 938 #if defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
 939         btVector3 norm = *this;
 940
 941         return norm.normalize();
 942 #else
 943         return *this / length();
 944 #endif
 945 }
 946
 947 SIMD_FORCE_INLINE btVector3 btVector3::rotate( const btVector3& wAxis, const btScalar _angle ) const
 948 {
 949         // wAxis must be a unit lenght vector
 950
 951 #if defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
 952
 953     __m128 O = _mm_mul_ps(wAxis.mVec128, mVec128);
 954         btScalar ssin = btSin( _angle );
 955     __m128 C = wAxis.cross( mVec128 ).mVec128;
 956         O = _mm_and_ps(O, btvFFF0fMask);
 957     btScalar scos = btCos( _angle );
 958
 959         __m128 vsin = _mm_load_ss(&ssin);       //      (S 0 0 0)
 960     __m128 vcos = _mm_load_ss(&scos);   //      (S 0 0 0)
 961
 962         __m128 Y = bt_pshufd_ps(O, 0xC9);       //      (Y Z X 0)
 963         __m128 Z = bt_pshufd_ps(O, 0xD2);       //      (Z X Y 0)
 964         O = _mm_add_ps(O, Y);
 965         vsin = bt_pshufd_ps(vsin, 0x80);        //      (S S S 0)
 966         O = _mm_add_ps(O, Z);
 967     vcos = bt_pshufd_ps(vcos, 0x80);    //      (S S S 0)
 968
 969     vsin = vsin * C;
 970         O = O * wAxis.mVec128;
 971         __m128 X = mVec128 - O;
 972
 973     O = O + vsin;
 974         vcos = vcos * X;
 975         O = O + vcos;
 976
 977         return btVector3(O);
 978 #else
 979         btVector3 o = wAxis * wAxis.dot( *this );
 980         btVector3 _x = *this - o;
 981         btVector3 _y;
 982
 983         _y = wAxis.cross( *this );
 984
 985         return ( o + _x * btCos( _angle ) + _y * btSin( _angle ) );
 986 #endif
 987 }
 988
 989 SIMD_FORCE_INLINE   long    btVector3::maxDot( const btVector3 *array, long array_count, btScalar &dotOut ) const
 990 {
 991 #if defined (BT_USE_SSE) || defined (BT_USE_NEON)
 992     #if defined _WIN32 || defined (BT_USE_SSE)
 993         const long scalar_cutoff = 10;
 994         long _maxdot_large( const float *array, const float *vec, unsigned long array_count, float *dotOut );
 995     #elif defined BT_USE_NEON
 996         const long scalar_cutoff = 4;
 997         extern long (*_maxdot_large)( const float *array, const float *vec, unsigned long array_count, float *dotOut );
 998     #endif
 999     if( array_count < scalar_cutoff )
1000 #else
1001
1002 #endif//BT_USE_SSE || BT_USE_NEON
1003     {
1004         btScalar maxDot = -SIMD_INFINITY;
1005         int i = 0;
1006         int ptIndex = -1;
1007         for( i = 0; i < array_count; i++ )
1008         {
1009             btScalar dot = array[i].dot(*this);
1010
1011             if( dot > maxDot )
1012             {
1013                 maxDot = dot;
1014                 ptIndex = i;
1015             }
1016         }
1017
1018         dotOut = maxDot;
1019         return ptIndex;
1020     }
1021 #if defined (BT_USE_SSE) || defined (BT_USE_NEON)
1022     return _maxdot_large( (float*) array, (float*) &m_floats[0], array_count, &dotOut );
1023 #endif
1024 }
1025
1026 SIMD_FORCE_INLINE   long    btVector3::minDot( const btVector3 *array, long array_count, btScalar &dotOut ) const
1027 {
1028 #if defined (BT_USE_SSE) || defined (BT_USE_NEON)
1029     #if defined BT_USE_SSE
1030         const long scalar_cutoff = 10;
1031         long _mindot_large( const float *array, const float *vec, unsigned long array_count, float *dotOut );
1032     #elif defined BT_USE_NEON
1033         const long scalar_cutoff = 4;
1034         extern long (*_mindot_large)( const float *array, const float *vec, unsigned long array_count, float *dotOut );
1035     #else
1036         #error unhandled arch!
1037     #endif
1038
1039     if( array_count < scalar_cutoff )
1040 #endif//BT_USE_SSE || BT_USE_NEON
1041     {
1042         btScalar  minDot = SIMD_INFINITY;
1043         int i = 0;
1044         int ptIndex = -1;
1045
1046         for( i = 0; i < array_count; i++ )
1047         {
1048             btScalar dot = array[i].dot(*this);
1049
1050             if( dot < minDot )
1051             {
1052                 minDot = dot;
1053                 ptIndex = i;
1054             }
1055         }
1056
1057         dotOut = minDot;
1058
1059         return ptIndex;
1060     }
1061 #if defined (BT_USE_SSE) || defined (BT_USE_NEON)
1062     return _mindot_large( (float*) array, (float*) &m_floats[0], array_count, &dotOut );
1063 #endif
1064 }
1065
1066
1067 class btVector4 : public btVector3
1068 {
1069 public:
1070
1071         SIMD_FORCE_INLINE btVector4() {}
1072
1073
1074         SIMD_FORCE_INLINE btVector4(const btScalar& _x, const btScalar& _y, const btScalar& _z,const btScalar& _w)
1075                 : btVector3(_x,_y,_z)
1076         {
1077                 m_floats[3] = _w;
1078         }
1079
1080 #if (defined (BT_USE_SSE_IN_API)&& defined (BT_USE_SSE)) || defined (BT_USE_NEON)
1081         SIMD_FORCE_INLINE btVector4(const btSimdFloat4 vec)
1082         {
1083                 mVec128 = vec;
1084         }
1085
1086         SIMD_FORCE_INLINE btVector4(const btVector3& rhs)
1087         {
1088                 mVec128 = rhs.mVec128;
1089         }
1090
1091         SIMD_FORCE_INLINE btVector4&
1092         operator=(const btVector4& v)
1093         {
1094                 mVec128 = v.mVec128;
1095                 return *this;
1096         }
1097 #endif // #if defined (BT_USE_SSE_IN_API) || defined (BT_USE_NEON)
1098
1099         SIMD_FORCE_INLINE btVector4 absolute4() const
1100         {
1101 #if defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
1102                 return btVector4(_mm_and_ps(mVec128, btvAbsfMask));
1103 #elif defined(BT_USE_NEON)
1104                 return btVector4(vabsq_f32(mVec128));
1105 #else
1106                 return btVector4(
1107                         btFabs(m_floats[0]),
1108                         btFabs(m_floats[1]),
1109                         btFabs(m_floats[2]),
1110                         btFabs(m_floats[3]));
1111 #endif
1112         }
1113
1114
1115         btScalar        getW() const { return m_floats[3];}
1116
1117
1118                 SIMD_FORCE_INLINE int maxAxis4() const
1119         {
1120                 int maxIndex = -1;
1121                 btScalar maxVal = btScalar(-BT_LARGE_FLOAT);
1122                 if (m_floats[0] > maxVal)
1123                 {
1124                         maxIndex = 0;
1125                         maxVal = m_floats[0];
1126                 }
1127                 if (m_floats[1] > maxVal)
1128                 {
1129                         maxIndex = 1;
1130                         maxVal = m_floats[1];
1131                 }
1132                 if (m_floats[2] > maxVal)
1133                 {
1134                         maxIndex = 2;
1135                         maxVal =m_floats[2];
1136                 }
1137                 if (m_floats[3] > maxVal)
1138                 {
1139                         maxIndex = 3;
1140                         maxVal = m_floats[3];
1141                 }
1142
1143                 return maxIndex;
1144         }
1145
1146
1147         SIMD_FORCE_INLINE int minAxis4() const
1148         {
1149                 int minIndex = -1;
1150                 btScalar minVal = btScalar(BT_LARGE_FLOAT);
1151                 if (m_floats[0] < minVal)
1152                 {
1153                         minIndex = 0;
1154                         minVal = m_floats[0];
1155                 }
1156                 if (m_floats[1] < minVal)
1157                 {
1158                         minIndex = 1;
1159                         minVal = m_floats[1];
1160                 }
1161                 if (m_floats[2] < minVal)
1162                 {
1163                         minIndex = 2;
1164                         minVal =m_floats[2];
1165                 }
1166                 if (m_floats[3] < minVal)
1167                 {
1168                         minIndex = 3;
1169                         minVal = m_floats[3];
1170                 }
1171
1172                 return minIndex;
1173         }
1174
1175
1176         SIMD_FORCE_INLINE int closestAxis4() const
1177         {
1178                 return absolute4().maxAxis4();
1179         }
1180
1181
1182
1183
1184   /**@brief Set x,y,z and zero w
1185    * @param x Value of x
1186    * @param y Value of y
1187    * @param z Value of z
1188    */
1189
1190
1191 /*              void getValue(btScalar *m) const
1192                 {
1193                         m[0] = m_floats[0];
1194                         m[1] = m_floats[1];
1195                         m[2] =m_floats[2];
1196                 }
1197 */
1198 /**@brief Set the values
1199    * @param x Value of x
1200    * @param y Value of y
1201    * @param z Value of z
1202    * @param w Value of w
1203    */
1204                 SIMD_FORCE_INLINE void  setValue(const btScalar& _x, const btScalar& _y, const btScalar& _z,const btScalar& _w)
1205                 {
1206                         m_floats[0]=_x;
1207                         m_floats[1]=_y;
1208                         m_floats[2]=_z;
1209                         m_floats[3]=_w;
1210                 }
1211
1212
1213 };
1214
1215
1216 ///btSwapVector3Endian swaps vector endianness, useful for network and cross-platform serialization
1217 SIMD_FORCE_INLINE void  btSwapScalarEndian(const btScalar& sourceVal, btScalar& destVal)
1218 {
1219         #ifdef BT_USE_DOUBLE_PRECISION
1220         unsigned char* dest = (unsigned char*) &destVal;
1221         unsigned char* src  = (unsigned char*) &sourceVal;
1222         dest[0] = src[7];
1223     dest[1] = src[6];
1224     dest[2] = src[5];
1225     dest[3] = src[4];
1226     dest[4] = src[3];
1227     dest[5] = src[2];
1228     dest[6] = src[1];
1229     dest[7] = src[0];
1230 #else
1231         unsigned char* dest = (unsigned char*) &destVal;
1232         unsigned char* src  = (unsigned char*) &sourceVal;
1233         dest[0] = src[3];
1234     dest[1] = src[2];
1235     dest[2] = src[1];
1236     dest[3] = src[0];
1237 #endif //BT_USE_DOUBLE_PRECISION
1238 }
1239 ///btSwapVector3Endian swaps vector endianness, useful for network and cross-platform serialization
1240 SIMD_FORCE_INLINE void  btSwapVector3Endian(const btVector3& sourceVec, btVector3& destVec)
1241 {
1242         for (int i=0;i<4;i++)
1243         {
1244                 btSwapScalarEndian(sourceVec[i],destVec[i]);
1245         }
1246
1247 }
1248
1249 ///btUnSwapVector3Endian swaps vector endianness, useful for network and cross-platform serialization
1250 SIMD_FORCE_INLINE void  btUnSwapVector3Endian(btVector3& vector)
1251 {
1252
1253         btVector3       swappedVec;
1254         for (int i=0;i<4;i++)
1255         {
1256                 btSwapScalarEndian(vector[i],swappedVec[i]);
1257         }
1258         vector = swappedVec;
1259 }
1260
1261 template <class T>
1262 SIMD_FORCE_INLINE void btPlaneSpace1 (const T& n, T& p, T& q)
1263 {
1264   if (btFabs(n[2]) > SIMDSQRT12) {
1265     // choose p in y-z plane
1266     btScalar a = n[1]*n[1] + n[2]*n[2];
1267     btScalar k = btRecipSqrt (a);
1268     p[0] = 0;
1269         p[1] = -n[2]*k;
1270         p[2] = n[1]*k;
1271     // set q = n x p
1272     q[0] = a*k;
1273         q[1] = -n[0]*p[2];
1274         q[2] = n[0]*p[1];
1275   }
1276   else {
1277     // choose p in x-y plane
1278     btScalar a = n[0]*n[0] + n[1]*n[1];
1279     btScalar k = btRecipSqrt (a);
1280     p[0] = -n[1]*k;
1281         p[1] = n[0]*k;
1282         p[2] = 0;
1283     // set q = n x p
1284     q[0] = -n[2]*p[1];
1285         q[1] = n[2]*p[0];
1286         q[2] = a*k;
1287   }
1288 }
1289
1290
1291 struct  btVector3FloatData
1292 {
1293         float   m_floats[4];
1294 };
1295
1296 struct  btVector3DoubleData
1297 {
1298         double  m_floats[4];
1299
1300 };
1301
1302 SIMD_FORCE_INLINE       void    btVector3::serializeFloat(struct        btVector3FloatData& dataOut) const
1303 {
1304         ///could also do a memcpy, check if it is worth it
1305         for (int i=0;i<4;i++)
1306                 dataOut.m_floats[i] = float(m_floats[i]);
1307 }
1308
1309 SIMD_FORCE_INLINE void  btVector3::deSerializeFloat(const struct        btVector3FloatData& dataIn)
1310 {
1311         for (int i=0;i<4;i++)
1312                 m_floats[i] = btScalar(dataIn.m_floats[i]);
1313 }
1314
1315
1316 SIMD_FORCE_INLINE       void    btVector3::serializeDouble(struct       btVector3DoubleData& dataOut) const
1317 {
1318         ///could also do a memcpy, check if it is worth it
1319         for (int i=0;i<4;i++)
1320                 dataOut.m_floats[i] = double(m_floats[i]);
1321 }
1322
1323 SIMD_FORCE_INLINE void  btVector3::deSerializeDouble(const struct       btVector3DoubleData& dataIn)
1324 {
1325         for (int i=0;i<4;i++)
1326                 m_floats[i] = btScalar(dataIn.m_floats[i]);
1327 }
1328
1329
1330 SIMD_FORCE_INLINE       void    btVector3::serialize(struct     btVector3Data& dataOut) const
1331 {
1332         ///could also do a memcpy, check if it is worth it
1333         for (int i=0;i<4;i++)
1334                 dataOut.m_floats[i] = m_floats[i];
1335 }
1336
1337 SIMD_FORCE_INLINE void  btVector3::deSerialize(const struct     btVector3Data& dataIn)
1338 {
1339         for (int i=0;i<4;i++)
1340                 m_floats[i] = dataIn.m_floats[i];
1341 }
1342
1343 #endif //BT_VECTOR3_H