2 Copyright (c) 2003-2006 Gino van den Bergen / Erwin Coumans http://continuousphysics.com/Bullet/
4 This software is provided 'as-is', without any express or implied warranty.
5 In no event will the authors be held liable for any damages arising from the use of this software.
6 Permission is granted to anyone to use this software for any purpose,
7 including commercial applications, and to alter it and redistribute it freely,
8 subject to the following restrictions:
10 1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
11 2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
12 3. This notice may not be removed or altered from any source distribution.
16 #ifndef BT_MATRIX3x3_H
17 #define BT_MATRIX3x3_H
19 #include "btVector3.h"
20 #include "btQuaternion.h"
24 //const __m128 ATTRIBUTE_ALIGNED16(v2220) = {2.0f, 2.0f, 2.0f, 0.0f};
25 const __m128 ATTRIBUTE_ALIGNED16(vMPPP) = {-0.0f, +0.0f, +0.0f, +0.0f};
28 #if defined(BT_USE_SSE) || defined(BT_USE_NEON)
29 const btSimdFloat4 ATTRIBUTE_ALIGNED16(v1000) = {1.0f, 0.0f, 0.0f, 0.0f};
30 const btSimdFloat4 ATTRIBUTE_ALIGNED16(v0100) = {0.0f, 1.0f, 0.0f, 0.0f};
31 const btSimdFloat4 ATTRIBUTE_ALIGNED16(v0010) = {0.0f, 0.0f, 1.0f, 0.0f};
34 #ifdef BT_USE_DOUBLE_PRECISION
35 #define btMatrix3x3Data btMatrix3x3DoubleData
37 #define btMatrix3x3Data btMatrix3x3FloatData
38 #endif //BT_USE_DOUBLE_PRECISION
41 /**@brief The btMatrix3x3 class implements a 3x3 rotation matrix, to perform linear algebra in combination with btQuaternion, btTransform and btVector3.
42 * Make sure to only include a pure orthogonal matrix without scaling. */
43 ATTRIBUTE_ALIGNED16(class) btMatrix3x3 {
45 ///Data storage for the matrix, each vector is a row of the matrix
49 /** @brief No initializaion constructor */
52 // explicit btMatrix3x3(const btScalar *m) { setFromOpenGLSubMatrix(m); }
54 /**@brief Constructor from Quaternion */
55 explicit btMatrix3x3(const btQuaternion& q) { setRotation(q); }
57 template <typename btScalar>
58 Matrix3x3(const btScalar& yaw, const btScalar& pitch, const btScalar& roll)
60 setEulerYPR(yaw, pitch, roll);
63 /** @brief Constructor with row major formatting */
64 btMatrix3x3(const btScalar& xx, const btScalar& xy, const btScalar& xz,
65 const btScalar& yx, const btScalar& yy, const btScalar& yz,
66 const btScalar& zx, const btScalar& zy, const btScalar& zz)
73 #if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))|| defined (BT_USE_NEON)
74 SIMD_FORCE_INLINE btMatrix3x3 (const btSimdFloat4 v0, const btSimdFloat4 v1, const btSimdFloat4 v2 )
81 SIMD_FORCE_INLINE btMatrix3x3 (const btVector3& v0, const btVector3& v1, const btVector3& v2 )
89 SIMD_FORCE_INLINE btMatrix3x3(const btMatrix3x3& rhs)
91 m_el[0].mVec128 = rhs.m_el[0].mVec128;
92 m_el[1].mVec128 = rhs.m_el[1].mVec128;
93 m_el[2].mVec128 = rhs.m_el[2].mVec128;
96 // Assignment Operator
97 SIMD_FORCE_INLINE btMatrix3x3& operator=(const btMatrix3x3& m)
99 m_el[0].mVec128 = m.m_el[0].mVec128;
100 m_el[1].mVec128 = m.m_el[1].mVec128;
101 m_el[2].mVec128 = m.m_el[2].mVec128;
108 /** @brief Copy constructor */
109 SIMD_FORCE_INLINE btMatrix3x3 (const btMatrix3x3& other)
111 m_el[0] = other.m_el[0];
112 m_el[1] = other.m_el[1];
113 m_el[2] = other.m_el[2];
116 /** @brief Assignment Operator */
117 SIMD_FORCE_INLINE btMatrix3x3& operator=(const btMatrix3x3& other)
119 m_el[0] = other.m_el[0];
120 m_el[1] = other.m_el[1];
121 m_el[2] = other.m_el[2];
127 /** @brief Get a column of the matrix as a vector
128 * @param i Column number 0 indexed */
129 SIMD_FORCE_INLINE btVector3 getColumn(int i) const
131 return btVector3(m_el[0][i],m_el[1][i],m_el[2][i]);
135 /** @brief Get a row of the matrix as a vector
136 * @param i Row number 0 indexed */
137 SIMD_FORCE_INLINE const btVector3& getRow(int i) const
139 btFullAssert(0 <= i && i < 3);
143 /** @brief Get a mutable reference to a row of the matrix as a vector
144 * @param i Row number 0 indexed */
145 SIMD_FORCE_INLINE btVector3& operator[](int i)
147 btFullAssert(0 <= i && i < 3);
151 /** @brief Get a const reference to a row of the matrix as a vector
152 * @param i Row number 0 indexed */
153 SIMD_FORCE_INLINE const btVector3& operator[](int i) const
155 btFullAssert(0 <= i && i < 3);
159 /** @brief Multiply by the target matrix on the right
160 * @param m Rotation matrix to be applied
161 * Equivilant to this = this * m */
162 btMatrix3x3& operator*=(const btMatrix3x3& m);
164 /** @brief Adds by the target matrix on the right
165 * @param m matrix to be applied
166 * Equivilant to this = this + m */
167 btMatrix3x3& operator+=(const btMatrix3x3& m);
169 /** @brief Substractss by the target matrix on the right
170 * @param m matrix to be applied
171 * Equivilant to this = this - m */
172 btMatrix3x3& operator-=(const btMatrix3x3& m);
174 /** @brief Set from the rotational part of a 4x4 OpenGL matrix
175 * @param m A pointer to the beginning of the array of scalars*/
176 void setFromOpenGLSubMatrix(const btScalar *m)
178 m_el[0].setValue(m[0],m[4],m[8]);
179 m_el[1].setValue(m[1],m[5],m[9]);
180 m_el[2].setValue(m[2],m[6],m[10]);
183 /** @brief Set the values of the matrix explicitly (row major)
185 * @param xy Top Middle
186 * @param xz Top Right
187 * @param yx Middle Left
188 * @param yy Middle Middle
189 * @param yz Middle Right
190 * @param zx Bottom Left
191 * @param zy Bottom Middle
192 * @param zz Bottom Right*/
193 void setValue(const btScalar& xx, const btScalar& xy, const btScalar& xz,
194 const btScalar& yx, const btScalar& yy, const btScalar& yz,
195 const btScalar& zx, const btScalar& zy, const btScalar& zz)
197 m_el[0].setValue(xx,xy,xz);
198 m_el[1].setValue(yx,yy,yz);
199 m_el[2].setValue(zx,zy,zz);
202 /** @brief Set the matrix from a quaternion
203 * @param q The Quaternion to match */
204 void setRotation(const btQuaternion& q)
206 btScalar d = q.length2();
207 btFullAssert(d != btScalar(0.0));
208 btScalar s = btScalar(2.0) / d;
210 #if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
211 __m128 vs, Q = q.get128();
212 __m128i Qi = btCastfTo128i(Q);
215 __m128 V11, V21, V31;
216 __m128 NQ = _mm_xor_ps(Q, btvMzeroMask);
217 __m128i NQi = btCastfTo128i(NQ);
219 V1 = btCastiTo128f(_mm_shuffle_epi32 (Qi, BT_SHUFFLE(1,0,2,3))); // Y X Z W
220 V2 = _mm_shuffle_ps(NQ, Q, BT_SHUFFLE(0,0,1,3)); // -X -X Y W
221 V3 = btCastiTo128f(_mm_shuffle_epi32 (Qi, BT_SHUFFLE(2,1,0,3))); // Z Y X W
222 V1 = _mm_xor_ps(V1, vMPPP); // change the sign of the first element
224 V11 = btCastiTo128f(_mm_shuffle_epi32 (Qi, BT_SHUFFLE(1,1,0,3))); // Y Y X W
225 V21 = _mm_unpackhi_ps(Q, Q); // Z Z W W
226 V31 = _mm_shuffle_ps(Q, NQ, BT_SHUFFLE(0,2,0,3)); // X Z -X -W
232 V11 = _mm_shuffle_ps(NQ, Q, BT_SHUFFLE(2,3,1,3)); // -Z -W Y W
234 V21 = _mm_xor_ps(V21, vMPPP); // change the sign of the first element
235 V31 = _mm_shuffle_ps(Q, NQ, BT_SHUFFLE(3,3,1,3)); // W W -Y -W
236 V31 = _mm_xor_ps(V31, vMPPP); // change the sign of the first element
237 Y = btCastiTo128f(_mm_shuffle_epi32 (NQi, BT_SHUFFLE(3,2,0,3))); // -W -Z -X -W
238 Z = btCastiTo128f(_mm_shuffle_epi32 (Qi, BT_SHUFFLE(1,0,1,3))); // Y X Y W
240 vs = _mm_load_ss(&s);
248 vs = bt_splat3_ps(vs, 0);
262 btScalar xs = q.x() * s, ys = q.y() * s, zs = q.z() * s;
263 btScalar wx = q.w() * xs, wy = q.w() * ys, wz = q.w() * zs;
264 btScalar xx = q.x() * xs, xy = q.x() * ys, xz = q.x() * zs;
265 btScalar yy = q.y() * ys, yz = q.y() * zs, zz = q.z() * zs;
267 btScalar(1.0) - (yy + zz), xy - wz, xz + wy,
268 xy + wz, btScalar(1.0) - (xx + zz), yz - wx,
269 xz - wy, yz + wx, btScalar(1.0) - (xx + yy));
274 /** @brief Set the matrix from euler angles using YPR around YXZ respectively
275 * @param yaw Yaw about Y axis
276 * @param pitch Pitch about X axis
277 * @param roll Roll about Z axis
279 void setEulerYPR(const btScalar& yaw, const btScalar& pitch, const btScalar& roll)
281 setEulerZYX(roll, pitch, yaw);
284 /** @brief Set the matrix from euler angles YPR around ZYX axes
285 * @param eulerX Roll about X axis
286 * @param eulerY Pitch around Y axis
287 * @param eulerZ Yaw aboud Z axis
289 * These angles are used to produce a rotation matrix. The euler
290 * angles are applied in ZYX order. I.e a vector is first rotated
291 * about X then Y and then Z
293 void setEulerZYX(btScalar eulerX,btScalar eulerY,btScalar eulerZ) {
294 ///@todo proposed to reverse this since it's labeled zyx but takes arguments xyz and it will match all other parts of the code
295 btScalar ci ( btCos(eulerX));
296 btScalar cj ( btCos(eulerY));
297 btScalar ch ( btCos(eulerZ));
298 btScalar si ( btSin(eulerX));
299 btScalar sj ( btSin(eulerY));
300 btScalar sh ( btSin(eulerZ));
301 btScalar cc = ci * ch;
302 btScalar cs = ci * sh;
303 btScalar sc = si * ch;
304 btScalar ss = si * sh;
306 setValue(cj * ch, sj * sc - cs, sj * cc + ss,
307 cj * sh, sj * ss + cc, sj * cs - sc,
308 -sj, cj * si, cj * ci);
311 /**@brief Set the matrix to the identity */
314 #if (defined(BT_USE_SSE_IN_API)&& defined (BT_USE_SSE)) || defined(BT_USE_NEON)
319 setValue(btScalar(1.0), btScalar(0.0), btScalar(0.0),
320 btScalar(0.0), btScalar(1.0), btScalar(0.0),
321 btScalar(0.0), btScalar(0.0), btScalar(1.0));
325 static const btMatrix3x3& getIdentity()
327 #if (defined(BT_USE_SSE_IN_API)&& defined (BT_USE_SSE)) || defined(BT_USE_NEON)
328 static const btMatrix3x3
329 identityMatrix(v1000, v0100, v0010);
331 static const btMatrix3x3
333 btScalar(1.0), btScalar(0.0), btScalar(0.0),
334 btScalar(0.0), btScalar(1.0), btScalar(0.0),
335 btScalar(0.0), btScalar(0.0), btScalar(1.0));
337 return identityMatrix;
340 /**@brief Fill the rotational part of an OpenGL matrix and clear the shear/perspective
341 * @param m The array to be filled */
342 void getOpenGLSubMatrix(btScalar *m) const
344 #if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
345 __m128 v0 = m_el[0].mVec128;
346 __m128 v1 = m_el[1].mVec128;
347 __m128 v2 = m_el[2].mVec128; // x2 y2 z2 w2
348 __m128 *vm = (__m128 *)m;
351 v2 = _mm_and_ps(v2, btvFFF0fMask); // x2 y2 z2 0
353 vT = _mm_unpackhi_ps(v0, v1); // z0 z1 * *
354 v0 = _mm_unpacklo_ps(v0, v1); // x0 x1 y0 y1
356 v1 = _mm_shuffle_ps(v0, v2, BT_SHUFFLE(2, 3, 1, 3) ); // y0 y1 y2 0
357 v0 = _mm_shuffle_ps(v0, v2, BT_SHUFFLE(0, 1, 0, 3) ); // x0 x1 x2 0
358 v2 = btCastdTo128f(_mm_move_sd(btCastfTo128d(v2), btCastfTo128d(vT))); // z0 z1 z2 0
363 #elif defined(BT_USE_NEON)
364 // note: zeros the w channel. We can preserve it at the cost of two more vtrn instructions.
365 static const uint32x2_t zMask = (const uint32x2_t) {-1, 0 };
366 float32x4_t *vm = (float32x4_t *)m;
367 float32x4x2_t top = vtrnq_f32( m_el[0].mVec128, m_el[1].mVec128 ); // {x0 x1 z0 z1}, {y0 y1 w0 w1}
368 float32x2x2_t bl = vtrn_f32( vget_low_f32(m_el[2].mVec128), vdup_n_f32(0.0f) ); // {x2 0 }, {y2 0}
369 float32x4_t v0 = vcombine_f32( vget_low_f32(top.val[0]), bl.val[0] );
370 float32x4_t v1 = vcombine_f32( vget_low_f32(top.val[1]), bl.val[1] );
371 float32x2_t q = (float32x2_t) vand_u32( (uint32x2_t) vget_high_f32( m_el[2].mVec128), zMask );
372 float32x4_t v2 = vcombine_f32( vget_high_f32(top.val[0]), q ); // z0 z1 z2 0
378 m[0] = btScalar(m_el[0].x());
379 m[1] = btScalar(m_el[1].x());
380 m[2] = btScalar(m_el[2].x());
381 m[3] = btScalar(0.0);
382 m[4] = btScalar(m_el[0].y());
383 m[5] = btScalar(m_el[1].y());
384 m[6] = btScalar(m_el[2].y());
385 m[7] = btScalar(0.0);
386 m[8] = btScalar(m_el[0].z());
387 m[9] = btScalar(m_el[1].z());
388 m[10] = btScalar(m_el[2].z());
389 m[11] = btScalar(0.0);
393 /**@brief Get the matrix represented as a quaternion
394 * @param q The quaternion which will be set */
395 void getRotation(btQuaternion& q) const
397 #if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))|| defined (BT_USE_NEON)
398 btScalar trace = m_el[0].x() + m_el[1].y() + m_el[2].z();
406 if (trace > btScalar(0.0))
408 x = trace + btScalar(1.0);
410 temp.f[0]=m_el[2].y() - m_el[1].z();
411 temp.f[1]=m_el[0].z() - m_el[2].x();
412 temp.f[2]=m_el[1].x() - m_el[0].y();
414 //temp.f[3]= s * btScalar(0.5);
419 if(m_el[0].x() < m_el[1].y())
421 if( m_el[1].y() < m_el[2].z() )
422 { i = 2; j = 0; k = 1; }
424 { i = 1; j = 2; k = 0; }
428 if( m_el[0].x() < m_el[2].z())
429 { i = 2; j = 0; k = 1; }
431 { i = 0; j = 1; k = 2; }
434 x = m_el[i][i] - m_el[j][j] - m_el[k][k] + btScalar(1.0);
436 temp.f[3] = (m_el[k][j] - m_el[j][k]);
437 temp.f[j] = (m_el[j][i] + m_el[i][j]);
438 temp.f[k] = (m_el[k][i] + m_el[i][k]);
440 //temp.f[i] = s * btScalar(0.5);
445 s = btScalar(0.5) / s;
449 btScalar trace = m_el[0].x() + m_el[1].y() + m_el[2].z();
453 if (trace > btScalar(0.0))
455 btScalar s = btSqrt(trace + btScalar(1.0));
456 temp[3]=(s * btScalar(0.5));
457 s = btScalar(0.5) / s;
459 temp[0]=((m_el[2].y() - m_el[1].z()) * s);
460 temp[1]=((m_el[0].z() - m_el[2].x()) * s);
461 temp[2]=((m_el[1].x() - m_el[0].y()) * s);
465 int i = m_el[0].x() < m_el[1].y() ?
466 (m_el[1].y() < m_el[2].z() ? 2 : 1) :
467 (m_el[0].x() < m_el[2].z() ? 2 : 0);
471 btScalar s = btSqrt(m_el[i][i] - m_el[j][j] - m_el[k][k] + btScalar(1.0));
472 temp[i] = s * btScalar(0.5);
473 s = btScalar(0.5) / s;
475 temp[3] = (m_el[k][j] - m_el[j][k]) * s;
476 temp[j] = (m_el[j][i] + m_el[i][j]) * s;
477 temp[k] = (m_el[k][i] + m_el[i][k]) * s;
479 q.setValue(temp[0],temp[1],temp[2],temp[3]);
483 /**@brief Get the matrix represented as euler angles around YXZ, roundtrip with setEulerYPR
484 * @param yaw Yaw around Y axis
485 * @param pitch Pitch around X axis
486 * @param roll around Z axis */
487 void getEulerYPR(btScalar& yaw, btScalar& pitch, btScalar& roll) const
490 // first use the normal calculus
491 yaw = btScalar(btAtan2(m_el[1].x(), m_el[0].x()));
492 pitch = btScalar(btAsin(-m_el[2].x()));
493 roll = btScalar(btAtan2(m_el[2].y(), m_el[2].z()));
495 // on pitch = +/-HalfPI
496 if (btFabs(pitch)==SIMD_HALF_PI)
511 /**@brief Get the matrix represented as euler angles around ZYX
512 * @param yaw Yaw around X axis
513 * @param pitch Pitch around Y axis
514 * @param roll around X axis
515 * @param solution_number Which solution of two possible solutions ( 1 or 2) are possible values*/
516 void getEulerZYX(btScalar& yaw, btScalar& pitch, btScalar& roll, unsigned int solution_number = 1) const
526 Euler euler_out2; //second solution
527 //get the pointer to the raw data
529 // Check that pitch is not at a singularity
530 if (btFabs(m_el[2].x()) >= 1)
535 // From difference of angles formula
536 btScalar delta = btAtan2(m_el[0].x(),m_el[0].z());
537 if (m_el[2].x() > 0) //gimbal locked up
539 euler_out.pitch = SIMD_PI / btScalar(2.0);
540 euler_out2.pitch = SIMD_PI / btScalar(2.0);
541 euler_out.roll = euler_out.pitch + delta;
542 euler_out2.roll = euler_out.pitch + delta;
544 else // gimbal locked down
546 euler_out.pitch = -SIMD_PI / btScalar(2.0);
547 euler_out2.pitch = -SIMD_PI / btScalar(2.0);
548 euler_out.roll = -euler_out.pitch + delta;
549 euler_out2.roll = -euler_out.pitch + delta;
554 euler_out.pitch = - btAsin(m_el[2].x());
555 euler_out2.pitch = SIMD_PI - euler_out.pitch;
557 euler_out.roll = btAtan2(m_el[2].y()/btCos(euler_out.pitch),
558 m_el[2].z()/btCos(euler_out.pitch));
559 euler_out2.roll = btAtan2(m_el[2].y()/btCos(euler_out2.pitch),
560 m_el[2].z()/btCos(euler_out2.pitch));
562 euler_out.yaw = btAtan2(m_el[1].x()/btCos(euler_out.pitch),
563 m_el[0].x()/btCos(euler_out.pitch));
564 euler_out2.yaw = btAtan2(m_el[1].x()/btCos(euler_out2.pitch),
565 m_el[0].x()/btCos(euler_out2.pitch));
568 if (solution_number == 1)
571 pitch = euler_out.pitch;
572 roll = euler_out.roll;
576 yaw = euler_out2.yaw;
577 pitch = euler_out2.pitch;
578 roll = euler_out2.roll;
582 /**@brief Create a scaled copy of the matrix
583 * @param s Scaling vector The elements of the vector will scale each column */
585 btMatrix3x3 scaled(const btVector3& s) const
587 #if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))|| defined (BT_USE_NEON)
588 return btMatrix3x3(m_el[0] * s, m_el[1] * s, m_el[2] * s);
591 m_el[0].x() * s.x(), m_el[0].y() * s.y(), m_el[0].z() * s.z(),
592 m_el[1].x() * s.x(), m_el[1].y() * s.y(), m_el[1].z() * s.z(),
593 m_el[2].x() * s.x(), m_el[2].y() * s.y(), m_el[2].z() * s.z());
597 /**@brief Return the determinant of the matrix */
598 btScalar determinant() const;
599 /**@brief Return the adjoint of the matrix */
600 btMatrix3x3 adjoint() const;
601 /**@brief Return the matrix with all values non negative */
602 btMatrix3x3 absolute() const;
603 /**@brief Return the transpose of the matrix */
604 btMatrix3x3 transpose() const;
605 /**@brief Return the inverse of the matrix */
606 btMatrix3x3 inverse() const;
608 btMatrix3x3 transposeTimes(const btMatrix3x3& m) const;
609 btMatrix3x3 timesTranspose(const btMatrix3x3& m) const;
611 SIMD_FORCE_INLINE btScalar tdotx(const btVector3& v) const
613 return m_el[0].x() * v.x() + m_el[1].x() * v.y() + m_el[2].x() * v.z();
615 SIMD_FORCE_INLINE btScalar tdoty(const btVector3& v) const
617 return m_el[0].y() * v.x() + m_el[1].y() * v.y() + m_el[2].y() * v.z();
619 SIMD_FORCE_INLINE btScalar tdotz(const btVector3& v) const
621 return m_el[0].z() * v.x() + m_el[1].z() * v.y() + m_el[2].z() * v.z();
625 /**@brief diagonalizes this matrix by the Jacobi method.
626 * @param rot stores the rotation from the coordinate system in which the matrix is diagonal to the original
627 * coordinate system, i.e., old_this = rot * new_this * rot^T.
628 * @param threshold See iteration
629 * @param iteration The iteration stops when all off-diagonal elements are less than the threshold multiplied
630 * by the sum of the absolute values of the diagonal, or when maxSteps have been executed.
632 * Note that this matrix is assumed to be symmetric.
634 void diagonalize(btMatrix3x3& rot, btScalar threshold, int maxSteps)
637 for (int step = maxSteps; step > 0; step--)
639 // find off-diagonal element [p][q] with largest magnitude
643 btScalar max = btFabs(m_el[0][1]);
644 btScalar v = btFabs(m_el[0][2]);
651 v = btFabs(m_el[1][2]);
660 btScalar t = threshold * (btFabs(m_el[0][0]) + btFabs(m_el[1][1]) + btFabs(m_el[2][2]));
663 if (max <= SIMD_EPSILON * t)
670 // compute Jacobi rotation J which leads to a zero for element [p][q]
671 btScalar mpq = m_el[p][q];
672 btScalar theta = (m_el[q][q] - m_el[p][p]) / (2 * mpq);
673 btScalar theta2 = theta * theta;
676 if (theta2 * theta2 < btScalar(10 / SIMD_EPSILON))
678 t = (theta >= 0) ? 1 / (theta + btSqrt(1 + theta2))
679 : 1 / (theta - btSqrt(1 + theta2));
680 cos = 1 / btSqrt(1 + t * t);
685 // approximation for large theta-value, i.e., a nearly diagonal matrix
686 t = 1 / (theta * (2 + btScalar(0.5) / theta2));
687 cos = 1 - btScalar(0.5) * t * t;
691 // apply rotation to matrix (this = J^T * this * J)
692 m_el[p][q] = m_el[q][p] = 0;
693 m_el[p][p] -= t * mpq;
694 m_el[q][q] += t * mpq;
695 btScalar mrp = m_el[r][p];
696 btScalar mrq = m_el[r][q];
697 m_el[r][p] = m_el[p][r] = cos * mrp - sin * mrq;
698 m_el[r][q] = m_el[q][r] = cos * mrq + sin * mrp;
700 // apply rotation to rot (rot = rot * J)
701 for (int i = 0; i < 3; i++)
703 btVector3& row = rot[i];
706 row[p] = cos * mrp - sin * mrq;
707 row[q] = cos * mrq + sin * mrp;
715 /**@brief Calculate the matrix cofactor
716 * @param r1 The first row to use for calculating the cofactor
717 * @param c1 The first column to use for calculating the cofactor
718 * @param r1 The second row to use for calculating the cofactor
719 * @param c1 The second column to use for calculating the cofactor
720 * See http://en.wikipedia.org/wiki/Cofactor_(linear_algebra) for more details
722 btScalar cofac(int r1, int c1, int r2, int c2) const
724 return m_el[r1][c1] * m_el[r2][c2] - m_el[r1][c2] * m_el[r2][c1];
727 void serialize(struct btMatrix3x3Data& dataOut) const;
729 void serializeFloat(struct btMatrix3x3FloatData& dataOut) const;
731 void deSerialize(const struct btMatrix3x3Data& dataIn);
733 void deSerializeFloat(const struct btMatrix3x3FloatData& dataIn);
735 void deSerializeDouble(const struct btMatrix3x3DoubleData& dataIn);
740 SIMD_FORCE_INLINE btMatrix3x3&
741 btMatrix3x3::operator*=(const btMatrix3x3& m)
743 #if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
744 __m128 rv00, rv01, rv02;
745 __m128 rv10, rv11, rv12;
746 __m128 rv20, rv21, rv22;
747 __m128 mv0, mv1, mv2;
749 rv02 = m_el[0].mVec128;
750 rv12 = m_el[1].mVec128;
751 rv22 = m_el[2].mVec128;
753 mv0 = _mm_and_ps(m[0].mVec128, btvFFF0fMask);
754 mv1 = _mm_and_ps(m[1].mVec128, btvFFF0fMask);
755 mv2 = _mm_and_ps(m[2].mVec128, btvFFF0fMask);
758 rv00 = bt_splat_ps(rv02, 0);
759 rv01 = bt_splat_ps(rv02, 1);
760 rv02 = bt_splat_ps(rv02, 2);
762 rv00 = _mm_mul_ps(rv00, mv0);
763 rv01 = _mm_mul_ps(rv01, mv1);
764 rv02 = _mm_mul_ps(rv02, mv2);
767 rv10 = bt_splat_ps(rv12, 0);
768 rv11 = bt_splat_ps(rv12, 1);
769 rv12 = bt_splat_ps(rv12, 2);
771 rv10 = _mm_mul_ps(rv10, mv0);
772 rv11 = _mm_mul_ps(rv11, mv1);
773 rv12 = _mm_mul_ps(rv12, mv2);
776 rv20 = bt_splat_ps(rv22, 0);
777 rv21 = bt_splat_ps(rv22, 1);
778 rv22 = bt_splat_ps(rv22, 2);
780 rv20 = _mm_mul_ps(rv20, mv0);
781 rv21 = _mm_mul_ps(rv21, mv1);
782 rv22 = _mm_mul_ps(rv22, mv2);
784 rv00 = _mm_add_ps(rv00, rv01);
785 rv10 = _mm_add_ps(rv10, rv11);
786 rv20 = _mm_add_ps(rv20, rv21);
788 m_el[0].mVec128 = _mm_add_ps(rv00, rv02);
789 m_el[1].mVec128 = _mm_add_ps(rv10, rv12);
790 m_el[2].mVec128 = _mm_add_ps(rv20, rv22);
792 #elif defined(BT_USE_NEON)
794 float32x4_t rv0, rv1, rv2;
795 float32x4_t v0, v1, v2;
796 float32x4_t mv0, mv1, mv2;
798 v0 = m_el[0].mVec128;
799 v1 = m_el[1].mVec128;
800 v2 = m_el[2].mVec128;
802 mv0 = (float32x4_t) vandq_s32((int32x4_t)m[0].mVec128, btvFFF0Mask);
803 mv1 = (float32x4_t) vandq_s32((int32x4_t)m[1].mVec128, btvFFF0Mask);
804 mv2 = (float32x4_t) vandq_s32((int32x4_t)m[2].mVec128, btvFFF0Mask);
806 rv0 = vmulq_lane_f32(mv0, vget_low_f32(v0), 0);
807 rv1 = vmulq_lane_f32(mv0, vget_low_f32(v1), 0);
808 rv2 = vmulq_lane_f32(mv0, vget_low_f32(v2), 0);
810 rv0 = vmlaq_lane_f32(rv0, mv1, vget_low_f32(v0), 1);
811 rv1 = vmlaq_lane_f32(rv1, mv1, vget_low_f32(v1), 1);
812 rv2 = vmlaq_lane_f32(rv2, mv1, vget_low_f32(v2), 1);
814 rv0 = vmlaq_lane_f32(rv0, mv2, vget_high_f32(v0), 0);
815 rv1 = vmlaq_lane_f32(rv1, mv2, vget_high_f32(v1), 0);
816 rv2 = vmlaq_lane_f32(rv2, mv2, vget_high_f32(v2), 0);
818 m_el[0].mVec128 = rv0;
819 m_el[1].mVec128 = rv1;
820 m_el[2].mVec128 = rv2;
823 m.tdotx(m_el[0]), m.tdoty(m_el[0]), m.tdotz(m_el[0]),
824 m.tdotx(m_el[1]), m.tdoty(m_el[1]), m.tdotz(m_el[1]),
825 m.tdotx(m_el[2]), m.tdoty(m_el[2]), m.tdotz(m_el[2]));
830 SIMD_FORCE_INLINE btMatrix3x3&
831 btMatrix3x3::operator+=(const btMatrix3x3& m)
833 #if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))|| defined (BT_USE_NEON)
834 m_el[0].mVec128 = m_el[0].mVec128 + m.m_el[0].mVec128;
835 m_el[1].mVec128 = m_el[1].mVec128 + m.m_el[1].mVec128;
836 m_el[2].mVec128 = m_el[2].mVec128 + m.m_el[2].mVec128;
839 m_el[0][0]+m.m_el[0][0],
840 m_el[0][1]+m.m_el[0][1],
841 m_el[0][2]+m.m_el[0][2],
842 m_el[1][0]+m.m_el[1][0],
843 m_el[1][1]+m.m_el[1][1],
844 m_el[1][2]+m.m_el[1][2],
845 m_el[2][0]+m.m_el[2][0],
846 m_el[2][1]+m.m_el[2][1],
847 m_el[2][2]+m.m_el[2][2]);
852 SIMD_FORCE_INLINE btMatrix3x3
853 operator*(const btMatrix3x3& m, const btScalar & k)
855 #if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))
856 __m128 vk = bt_splat_ps(_mm_load_ss((float *)&k), 0x80);
858 _mm_mul_ps(m[0].mVec128, vk),
859 _mm_mul_ps(m[1].mVec128, vk),
860 _mm_mul_ps(m[2].mVec128, vk));
861 #elif defined(BT_USE_NEON)
863 vmulq_n_f32(m[0].mVec128, k),
864 vmulq_n_f32(m[1].mVec128, k),
865 vmulq_n_f32(m[2].mVec128, k));
868 m[0].x()*k,m[0].y()*k,m[0].z()*k,
869 m[1].x()*k,m[1].y()*k,m[1].z()*k,
870 m[2].x()*k,m[2].y()*k,m[2].z()*k);
874 SIMD_FORCE_INLINE btMatrix3x3
875 operator+(const btMatrix3x3& m1, const btMatrix3x3& m2)
877 #if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))|| defined (BT_USE_NEON)
879 m1[0].mVec128 + m2[0].mVec128,
880 m1[1].mVec128 + m2[1].mVec128,
881 m1[2].mVec128 + m2[2].mVec128);
898 SIMD_FORCE_INLINE btMatrix3x3
899 operator-(const btMatrix3x3& m1, const btMatrix3x3& m2)
901 #if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))|| defined (BT_USE_NEON)
903 m1[0].mVec128 - m2[0].mVec128,
904 m1[1].mVec128 - m2[1].mVec128,
905 m1[2].mVec128 - m2[2].mVec128);
923 SIMD_FORCE_INLINE btMatrix3x3&
924 btMatrix3x3::operator-=(const btMatrix3x3& m)
926 #if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))|| defined (BT_USE_NEON)
927 m_el[0].mVec128 = m_el[0].mVec128 - m.m_el[0].mVec128;
928 m_el[1].mVec128 = m_el[1].mVec128 - m.m_el[1].mVec128;
929 m_el[2].mVec128 = m_el[2].mVec128 - m.m_el[2].mVec128;
932 m_el[0][0]-m.m_el[0][0],
933 m_el[0][1]-m.m_el[0][1],
934 m_el[0][2]-m.m_el[0][2],
935 m_el[1][0]-m.m_el[1][0],
936 m_el[1][1]-m.m_el[1][1],
937 m_el[1][2]-m.m_el[1][2],
938 m_el[2][0]-m.m_el[2][0],
939 m_el[2][1]-m.m_el[2][1],
940 m_el[2][2]-m.m_el[2][2]);
946 SIMD_FORCE_INLINE btScalar
947 btMatrix3x3::determinant() const
949 return btTriple((*this)[0], (*this)[1], (*this)[2]);
953 SIMD_FORCE_INLINE btMatrix3x3
954 btMatrix3x3::absolute() const
956 #if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))
958 _mm_and_ps(m_el[0].mVec128, btvAbsfMask),
959 _mm_and_ps(m_el[1].mVec128, btvAbsfMask),
960 _mm_and_ps(m_el[2].mVec128, btvAbsfMask));
961 #elif defined(BT_USE_NEON)
963 (float32x4_t)vandq_s32((int32x4_t)m_el[0].mVec128, btv3AbsMask),
964 (float32x4_t)vandq_s32((int32x4_t)m_el[1].mVec128, btv3AbsMask),
965 (float32x4_t)vandq_s32((int32x4_t)m_el[2].mVec128, btv3AbsMask));
968 btFabs(m_el[0].x()), btFabs(m_el[0].y()), btFabs(m_el[0].z()),
969 btFabs(m_el[1].x()), btFabs(m_el[1].y()), btFabs(m_el[1].z()),
970 btFabs(m_el[2].x()), btFabs(m_el[2].y()), btFabs(m_el[2].z()));
974 SIMD_FORCE_INLINE btMatrix3x3
975 btMatrix3x3::transpose() const
977 #if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))
978 __m128 v0 = m_el[0].mVec128;
979 __m128 v1 = m_el[1].mVec128;
980 __m128 v2 = m_el[2].mVec128; // x2 y2 z2 w2
983 v2 = _mm_and_ps(v2, btvFFF0fMask); // x2 y2 z2 0
985 vT = _mm_unpackhi_ps(v0, v1); // z0 z1 * *
986 v0 = _mm_unpacklo_ps(v0, v1); // x0 x1 y0 y1
988 v1 = _mm_shuffle_ps(v0, v2, BT_SHUFFLE(2, 3, 1, 3) ); // y0 y1 y2 0
989 v0 = _mm_shuffle_ps(v0, v2, BT_SHUFFLE(0, 1, 0, 3) ); // x0 x1 x2 0
990 v2 = btCastdTo128f(_mm_move_sd(btCastfTo128d(v2), btCastfTo128d(vT))); // z0 z1 z2 0
993 return btMatrix3x3( v0, v1, v2 );
994 #elif defined(BT_USE_NEON)
995 // note: zeros the w channel. We can preserve it at the cost of two more vtrn instructions.
996 static const uint32x2_t zMask = (const uint32x2_t) {-1, 0 };
997 float32x4x2_t top = vtrnq_f32( m_el[0].mVec128, m_el[1].mVec128 ); // {x0 x1 z0 z1}, {y0 y1 w0 w1}
998 float32x2x2_t bl = vtrn_f32( vget_low_f32(m_el[2].mVec128), vdup_n_f32(0.0f) ); // {x2 0 }, {y2 0}
999 float32x4_t v0 = vcombine_f32( vget_low_f32(top.val[0]), bl.val[0] );
1000 float32x4_t v1 = vcombine_f32( vget_low_f32(top.val[1]), bl.val[1] );
1001 float32x2_t q = (float32x2_t) vand_u32( (uint32x2_t) vget_high_f32( m_el[2].mVec128), zMask );
1002 float32x4_t v2 = vcombine_f32( vget_high_f32(top.val[0]), q ); // z0 z1 z2 0
1003 return btMatrix3x3( v0, v1, v2 );
1005 return btMatrix3x3( m_el[0].x(), m_el[1].x(), m_el[2].x(),
1006 m_el[0].y(), m_el[1].y(), m_el[2].y(),
1007 m_el[0].z(), m_el[1].z(), m_el[2].z());
1011 SIMD_FORCE_INLINE btMatrix3x3
1012 btMatrix3x3::adjoint() const
1014 return btMatrix3x3(cofac(1, 1, 2, 2), cofac(0, 2, 2, 1), cofac(0, 1, 1, 2),
1015 cofac(1, 2, 2, 0), cofac(0, 0, 2, 2), cofac(0, 2, 1, 0),
1016 cofac(1, 0, 2, 1), cofac(0, 1, 2, 0), cofac(0, 0, 1, 1));
1019 SIMD_FORCE_INLINE btMatrix3x3
1020 btMatrix3x3::inverse() const
1022 btVector3 co(cofac(1, 1, 2, 2), cofac(1, 2, 2, 0), cofac(1, 0, 2, 1));
1023 btScalar det = (*this)[0].dot(co);
1024 btFullAssert(det != btScalar(0.0));
1025 btScalar s = btScalar(1.0) / det;
1026 return btMatrix3x3(co.x() * s, cofac(0, 2, 2, 1) * s, cofac(0, 1, 1, 2) * s,
1027 co.y() * s, cofac(0, 0, 2, 2) * s, cofac(0, 2, 1, 0) * s,
1028 co.z() * s, cofac(0, 1, 2, 0) * s, cofac(0, 0, 1, 1) * s);
1031 SIMD_FORCE_INLINE btMatrix3x3
1032 btMatrix3x3::transposeTimes(const btMatrix3x3& m) const
1034 #if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))
1036 // static const __m128i xyzMask = (const __m128i){ -1ULL, 0xffffffffULL };
1037 __m128 row = m_el[0].mVec128;
1038 __m128 m0 = _mm_and_ps( m.getRow(0).mVec128, btvFFF0fMask );
1039 __m128 m1 = _mm_and_ps( m.getRow(1).mVec128, btvFFF0fMask);
1040 __m128 m2 = _mm_and_ps( m.getRow(2).mVec128, btvFFF0fMask );
1041 __m128 r0 = _mm_mul_ps(m0, _mm_shuffle_ps(row, row, 0));
1042 __m128 r1 = _mm_mul_ps(m0, _mm_shuffle_ps(row, row, 0x55));
1043 __m128 r2 = _mm_mul_ps(m0, _mm_shuffle_ps(row, row, 0xaa));
1044 row = m_el[1].mVec128;
1045 r0 = _mm_add_ps( r0, _mm_mul_ps(m1, _mm_shuffle_ps(row, row, 0)));
1046 r1 = _mm_add_ps( r1, _mm_mul_ps(m1, _mm_shuffle_ps(row, row, 0x55)));
1047 r2 = _mm_add_ps( r2, _mm_mul_ps(m1, _mm_shuffle_ps(row, row, 0xaa)));
1048 row = m_el[2].mVec128;
1049 r0 = _mm_add_ps( r0, _mm_mul_ps(m2, _mm_shuffle_ps(row, row, 0)));
1050 r1 = _mm_add_ps( r1, _mm_mul_ps(m2, _mm_shuffle_ps(row, row, 0x55)));
1051 r2 = _mm_add_ps( r2, _mm_mul_ps(m2, _mm_shuffle_ps(row, row, 0xaa)));
1052 return btMatrix3x3( r0, r1, r2 );
1054 #elif defined BT_USE_NEON
1056 static const uint32x4_t xyzMask = (const uint32x4_t){ -1, -1, -1, 0 };
1057 float32x4_t m0 = (float32x4_t) vandq_u32( (uint32x4_t) m.getRow(0).mVec128, xyzMask );
1058 float32x4_t m1 = (float32x4_t) vandq_u32( (uint32x4_t) m.getRow(1).mVec128, xyzMask );
1059 float32x4_t m2 = (float32x4_t) vandq_u32( (uint32x4_t) m.getRow(2).mVec128, xyzMask );
1060 float32x4_t row = m_el[0].mVec128;
1061 float32x4_t r0 = vmulq_lane_f32( m0, vget_low_f32(row), 0);
1062 float32x4_t r1 = vmulq_lane_f32( m0, vget_low_f32(row), 1);
1063 float32x4_t r2 = vmulq_lane_f32( m0, vget_high_f32(row), 0);
1064 row = m_el[1].mVec128;
1065 r0 = vmlaq_lane_f32( r0, m1, vget_low_f32(row), 0);
1066 r1 = vmlaq_lane_f32( r1, m1, vget_low_f32(row), 1);
1067 r2 = vmlaq_lane_f32( r2, m1, vget_high_f32(row), 0);
1068 row = m_el[2].mVec128;
1069 r0 = vmlaq_lane_f32( r0, m2, vget_low_f32(row), 0);
1070 r1 = vmlaq_lane_f32( r1, m2, vget_low_f32(row), 1);
1071 r2 = vmlaq_lane_f32( r2, m2, vget_high_f32(row), 0);
1072 return btMatrix3x3( r0, r1, r2 );
1075 m_el[0].x() * m[0].x() + m_el[1].x() * m[1].x() + m_el[2].x() * m[2].x(),
1076 m_el[0].x() * m[0].y() + m_el[1].x() * m[1].y() + m_el[2].x() * m[2].y(),
1077 m_el[0].x() * m[0].z() + m_el[1].x() * m[1].z() + m_el[2].x() * m[2].z(),
1078 m_el[0].y() * m[0].x() + m_el[1].y() * m[1].x() + m_el[2].y() * m[2].x(),
1079 m_el[0].y() * m[0].y() + m_el[1].y() * m[1].y() + m_el[2].y() * m[2].y(),
1080 m_el[0].y() * m[0].z() + m_el[1].y() * m[1].z() + m_el[2].y() * m[2].z(),
1081 m_el[0].z() * m[0].x() + m_el[1].z() * m[1].x() + m_el[2].z() * m[2].x(),
1082 m_el[0].z() * m[0].y() + m_el[1].z() * m[1].y() + m_el[2].z() * m[2].y(),
1083 m_el[0].z() * m[0].z() + m_el[1].z() * m[1].z() + m_el[2].z() * m[2].z());
1087 SIMD_FORCE_INLINE btMatrix3x3
1088 btMatrix3x3::timesTranspose(const btMatrix3x3& m) const
1090 #if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))
1091 __m128 a0 = m_el[0].mVec128;
1092 __m128 a1 = m_el[1].mVec128;
1093 __m128 a2 = m_el[2].mVec128;
1095 btMatrix3x3 mT = m.transpose(); // we rely on transpose() zeroing w channel so that we don't have to do it here
1096 __m128 mx = mT[0].mVec128;
1097 __m128 my = mT[1].mVec128;
1098 __m128 mz = mT[2].mVec128;
1100 __m128 r0 = _mm_mul_ps(mx, _mm_shuffle_ps(a0, a0, 0x00));
1101 __m128 r1 = _mm_mul_ps(mx, _mm_shuffle_ps(a1, a1, 0x00));
1102 __m128 r2 = _mm_mul_ps(mx, _mm_shuffle_ps(a2, a2, 0x00));
1103 r0 = _mm_add_ps(r0, _mm_mul_ps(my, _mm_shuffle_ps(a0, a0, 0x55)));
1104 r1 = _mm_add_ps(r1, _mm_mul_ps(my, _mm_shuffle_ps(a1, a1, 0x55)));
1105 r2 = _mm_add_ps(r2, _mm_mul_ps(my, _mm_shuffle_ps(a2, a2, 0x55)));
1106 r0 = _mm_add_ps(r0, _mm_mul_ps(mz, _mm_shuffle_ps(a0, a0, 0xaa)));
1107 r1 = _mm_add_ps(r1, _mm_mul_ps(mz, _mm_shuffle_ps(a1, a1, 0xaa)));
1108 r2 = _mm_add_ps(r2, _mm_mul_ps(mz, _mm_shuffle_ps(a2, a2, 0xaa)));
1109 return btMatrix3x3( r0, r1, r2);
1111 #elif defined BT_USE_NEON
1112 float32x4_t a0 = m_el[0].mVec128;
1113 float32x4_t a1 = m_el[1].mVec128;
1114 float32x4_t a2 = m_el[2].mVec128;
1116 btMatrix3x3 mT = m.transpose(); // we rely on transpose() zeroing w channel so that we don't have to do it here
1117 float32x4_t mx = mT[0].mVec128;
1118 float32x4_t my = mT[1].mVec128;
1119 float32x4_t mz = mT[2].mVec128;
1121 float32x4_t r0 = vmulq_lane_f32( mx, vget_low_f32(a0), 0);
1122 float32x4_t r1 = vmulq_lane_f32( mx, vget_low_f32(a1), 0);
1123 float32x4_t r2 = vmulq_lane_f32( mx, vget_low_f32(a2), 0);
1124 r0 = vmlaq_lane_f32( r0, my, vget_low_f32(a0), 1);
1125 r1 = vmlaq_lane_f32( r1, my, vget_low_f32(a1), 1);
1126 r2 = vmlaq_lane_f32( r2, my, vget_low_f32(a2), 1);
1127 r0 = vmlaq_lane_f32( r0, mz, vget_high_f32(a0), 0);
1128 r1 = vmlaq_lane_f32( r1, mz, vget_high_f32(a1), 0);
1129 r2 = vmlaq_lane_f32( r2, mz, vget_high_f32(a2), 0);
1130 return btMatrix3x3( r0, r1, r2 );
1134 m_el[0].dot(m[0]), m_el[0].dot(m[1]), m_el[0].dot(m[2]),
1135 m_el[1].dot(m[0]), m_el[1].dot(m[1]), m_el[1].dot(m[2]),
1136 m_el[2].dot(m[0]), m_el[2].dot(m[1]), m_el[2].dot(m[2]));
1140 SIMD_FORCE_INLINE btVector3
1141 operator*(const btMatrix3x3& m, const btVector3& v)
1143 #if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))|| defined (BT_USE_NEON)
1144 return v.dot3(m[0], m[1], m[2]);
1146 return btVector3(m[0].dot(v), m[1].dot(v), m[2].dot(v));
1151 SIMD_FORCE_INLINE btVector3
1152 operator*(const btVector3& v, const btMatrix3x3& m)
1154 #if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))
1156 const __m128 vv = v.mVec128;
1158 __m128 c0 = bt_splat_ps( vv, 0);
1159 __m128 c1 = bt_splat_ps( vv, 1);
1160 __m128 c2 = bt_splat_ps( vv, 2);
1162 c0 = _mm_mul_ps(c0, _mm_and_ps(m[0].mVec128, btvFFF0fMask) );
1163 c1 = _mm_mul_ps(c1, _mm_and_ps(m[1].mVec128, btvFFF0fMask) );
1164 c0 = _mm_add_ps(c0, c1);
1165 c2 = _mm_mul_ps(c2, _mm_and_ps(m[2].mVec128, btvFFF0fMask) );
1167 return btVector3(_mm_add_ps(c0, c2));
1168 #elif defined(BT_USE_NEON)
1169 const float32x4_t vv = v.mVec128;
1170 const float32x2_t vlo = vget_low_f32(vv);
1171 const float32x2_t vhi = vget_high_f32(vv);
1173 float32x4_t c0, c1, c2;
1175 c0 = (float32x4_t) vandq_s32((int32x4_t)m[0].mVec128, btvFFF0Mask);
1176 c1 = (float32x4_t) vandq_s32((int32x4_t)m[1].mVec128, btvFFF0Mask);
1177 c2 = (float32x4_t) vandq_s32((int32x4_t)m[2].mVec128, btvFFF0Mask);
1179 c0 = vmulq_lane_f32(c0, vlo, 0);
1180 c1 = vmulq_lane_f32(c1, vlo, 1);
1181 c2 = vmulq_lane_f32(c2, vhi, 0);
1182 c0 = vaddq_f32(c0, c1);
1183 c0 = vaddq_f32(c0, c2);
1185 return btVector3(c0);
1187 return btVector3(m.tdotx(v), m.tdoty(v), m.tdotz(v));
1191 SIMD_FORCE_INLINE btMatrix3x3
1192 operator*(const btMatrix3x3& m1, const btMatrix3x3& m2)
1194 #if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))
1196 __m128 m10 = m1[0].mVec128;
1197 __m128 m11 = m1[1].mVec128;
1198 __m128 m12 = m1[2].mVec128;
1200 __m128 m2v = _mm_and_ps(m2[0].mVec128, btvFFF0fMask);
1202 __m128 c0 = bt_splat_ps( m10, 0);
1203 __m128 c1 = bt_splat_ps( m11, 0);
1204 __m128 c2 = bt_splat_ps( m12, 0);
1206 c0 = _mm_mul_ps(c0, m2v);
1207 c1 = _mm_mul_ps(c1, m2v);
1208 c2 = _mm_mul_ps(c2, m2v);
1210 m2v = _mm_and_ps(m2[1].mVec128, btvFFF0fMask);
1212 __m128 c0_1 = bt_splat_ps( m10, 1);
1213 __m128 c1_1 = bt_splat_ps( m11, 1);
1214 __m128 c2_1 = bt_splat_ps( m12, 1);
1216 c0_1 = _mm_mul_ps(c0_1, m2v);
1217 c1_1 = _mm_mul_ps(c1_1, m2v);
1218 c2_1 = _mm_mul_ps(c2_1, m2v);
1220 m2v = _mm_and_ps(m2[2].mVec128, btvFFF0fMask);
1222 c0 = _mm_add_ps(c0, c0_1);
1223 c1 = _mm_add_ps(c1, c1_1);
1224 c2 = _mm_add_ps(c2, c2_1);
1226 m10 = bt_splat_ps( m10, 2);
1227 m11 = bt_splat_ps( m11, 2);
1228 m12 = bt_splat_ps( m12, 2);
1230 m10 = _mm_mul_ps(m10, m2v);
1231 m11 = _mm_mul_ps(m11, m2v);
1232 m12 = _mm_mul_ps(m12, m2v);
1234 c0 = _mm_add_ps(c0, m10);
1235 c1 = _mm_add_ps(c1, m11);
1236 c2 = _mm_add_ps(c2, m12);
1238 return btMatrix3x3(c0, c1, c2);
1240 #elif defined(BT_USE_NEON)
1242 float32x4_t rv0, rv1, rv2;
1243 float32x4_t v0, v1, v2;
1244 float32x4_t mv0, mv1, mv2;
1250 mv0 = (float32x4_t) vandq_s32((int32x4_t)m2[0].mVec128, btvFFF0Mask);
1251 mv1 = (float32x4_t) vandq_s32((int32x4_t)m2[1].mVec128, btvFFF0Mask);
1252 mv2 = (float32x4_t) vandq_s32((int32x4_t)m2[2].mVec128, btvFFF0Mask);
1254 rv0 = vmulq_lane_f32(mv0, vget_low_f32(v0), 0);
1255 rv1 = vmulq_lane_f32(mv0, vget_low_f32(v1), 0);
1256 rv2 = vmulq_lane_f32(mv0, vget_low_f32(v2), 0);
1258 rv0 = vmlaq_lane_f32(rv0, mv1, vget_low_f32(v0), 1);
1259 rv1 = vmlaq_lane_f32(rv1, mv1, vget_low_f32(v1), 1);
1260 rv2 = vmlaq_lane_f32(rv2, mv1, vget_low_f32(v2), 1);
1262 rv0 = vmlaq_lane_f32(rv0, mv2, vget_high_f32(v0), 0);
1263 rv1 = vmlaq_lane_f32(rv1, mv2, vget_high_f32(v1), 0);
1264 rv2 = vmlaq_lane_f32(rv2, mv2, vget_high_f32(v2), 0);
1266 return btMatrix3x3(rv0, rv1, rv2);
1270 m2.tdotx( m1[0]), m2.tdoty( m1[0]), m2.tdotz( m1[0]),
1271 m2.tdotx( m1[1]), m2.tdoty( m1[1]), m2.tdotz( m1[1]),
1272 m2.tdotx( m1[2]), m2.tdoty( m1[2]), m2.tdotz( m1[2]));
1277 SIMD_FORCE_INLINE btMatrix3x3 btMultTransposeLeft(const btMatrix3x3& m1, const btMatrix3x3& m2) {
1279 m1[0][0] * m2[0][0] + m1[1][0] * m2[1][0] + m1[2][0] * m2[2][0],
1280 m1[0][0] * m2[0][1] + m1[1][0] * m2[1][1] + m1[2][0] * m2[2][1],
1281 m1[0][0] * m2[0][2] + m1[1][0] * m2[1][2] + m1[2][0] * m2[2][2],
1282 m1[0][1] * m2[0][0] + m1[1][1] * m2[1][0] + m1[2][1] * m2[2][0],
1283 m1[0][1] * m2[0][1] + m1[1][1] * m2[1][1] + m1[2][1] * m2[2][1],
1284 m1[0][1] * m2[0][2] + m1[1][1] * m2[1][2] + m1[2][1] * m2[2][2],
1285 m1[0][2] * m2[0][0] + m1[1][2] * m2[1][0] + m1[2][2] * m2[2][0],
1286 m1[0][2] * m2[0][1] + m1[1][2] * m2[1][1] + m1[2][2] * m2[2][1],
1287 m1[0][2] * m2[0][2] + m1[1][2] * m2[1][2] + m1[2][2] * m2[2][2]);
1291 /**@brief Equality operator between two matrices
1292 * It will test all elements are equal. */
1293 SIMD_FORCE_INLINE bool operator==(const btMatrix3x3& m1, const btMatrix3x3& m2)
1295 #if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))
1299 c0 = _mm_cmpeq_ps(m1[0].mVec128, m2[0].mVec128);
1300 c1 = _mm_cmpeq_ps(m1[1].mVec128, m2[1].mVec128);
1301 c2 = _mm_cmpeq_ps(m1[2].mVec128, m2[2].mVec128);
1303 c0 = _mm_and_ps(c0, c1);
1304 c0 = _mm_and_ps(c0, c2);
1306 return (0x7 == _mm_movemask_ps((__m128)c0));
1309 ( m1[0][0] == m2[0][0] && m1[1][0] == m2[1][0] && m1[2][0] == m2[2][0] &&
1310 m1[0][1] == m2[0][1] && m1[1][1] == m2[1][1] && m1[2][1] == m2[2][1] &&
1311 m1[0][2] == m2[0][2] && m1[1][2] == m2[1][2] && m1[2][2] == m2[2][2] );
1315 ///for serialization
1316 struct btMatrix3x3FloatData
1318 btVector3FloatData m_el[3];
1321 ///for serialization
1322 struct btMatrix3x3DoubleData
1324 btVector3DoubleData m_el[3];
1330 SIMD_FORCE_INLINE void btMatrix3x3::serialize(struct btMatrix3x3Data& dataOut) const
1332 for (int i=0;i<3;i++)
1333 m_el[i].serialize(dataOut.m_el[i]);
1336 SIMD_FORCE_INLINE void btMatrix3x3::serializeFloat(struct btMatrix3x3FloatData& dataOut) const
1338 for (int i=0;i<3;i++)
1339 m_el[i].serializeFloat(dataOut.m_el[i]);
1343 SIMD_FORCE_INLINE void btMatrix3x3::deSerialize(const struct btMatrix3x3Data& dataIn)
1345 for (int i=0;i<3;i++)
1346 m_el[i].deSerialize(dataIn.m_el[i]);
1349 SIMD_FORCE_INLINE void btMatrix3x3::deSerializeFloat(const struct btMatrix3x3FloatData& dataIn)
1351 for (int i=0;i<3;i++)
1352 m_el[i].deSerializeFloat(dataIn.m_el[i]);
1355 SIMD_FORCE_INLINE void btMatrix3x3::deSerializeDouble(const struct btMatrix3x3DoubleData& dataIn)
1357 for (int i=0;i<3;i++)
1358 m_el[i].deSerializeDouble(dataIn.m_el[i]);
1361 #endif //BT_MATRIX3x3_H