2 Copyright (c) 2003-2006 Gino van den Bergen / Erwin Coumans https://bulletphysics.org
4 This software is provided 'as-is', without any express or implied warranty.
5 In no event will the authors be held liable for any damages arising from the use of this software.
6 Permission is granted to anyone to use this software for any purpose,
7 including commercial applications, and to alter it and redistribute it freely,
8 subject to the following restrictions:
10 1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
11 2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
12 3. This notice may not be removed or altered from any source distribution.
15 #ifndef BT_MATRIX3x3_H
16 #define BT_MATRIX3x3_H
18 #include "btVector3.h"
19 #include "btQuaternion.h"
23 //const __m128 ATTRIBUTE_ALIGNED16(v2220) = {2.0f, 2.0f, 2.0f, 0.0f};
24 //const __m128 ATTRIBUTE_ALIGNED16(vMPPP) = {-0.0f, +0.0f, +0.0f, +0.0f};
25 #define vMPPP (_mm_set_ps(+0.0f, +0.0f, +0.0f, -0.0f))
28 #if defined(BT_USE_SSE)
29 #define v0000 (_mm_set_ps(0.0f, 0.0f, 0.0f, 0.0f))
30 #define v1000 (_mm_set_ps(0.0f, 0.0f, 0.0f, 1.0f))
31 #define v0100 (_mm_set_ps(0.0f, 0.0f, 1.0f, 0.0f))
32 #define v0010 (_mm_set_ps(0.0f, 1.0f, 0.0f, 0.0f))
33 #elif defined(BT_USE_NEON)
34 const btSimdFloat4 ATTRIBUTE_ALIGNED16(v0000) = {0.0f, 0.0f, 0.0f, 0.0f};
35 const btSimdFloat4 ATTRIBUTE_ALIGNED16(v1000) = {1.0f, 0.0f, 0.0f, 0.0f};
36 const btSimdFloat4 ATTRIBUTE_ALIGNED16(v0100) = {0.0f, 1.0f, 0.0f, 0.0f};
37 const btSimdFloat4 ATTRIBUTE_ALIGNED16(v0010) = {0.0f, 0.0f, 1.0f, 0.0f};
40 #ifdef BT_USE_DOUBLE_PRECISION
41 #define btMatrix3x3Data btMatrix3x3DoubleData
43 #define btMatrix3x3Data btMatrix3x3FloatData
44 #endif //BT_USE_DOUBLE_PRECISION
46 /**@brief The btMatrix3x3 class implements a 3x3 rotation matrix, to perform linear algebra in combination with btQuaternion, btTransform and btVector3.
47 * Make sure to only include a pure orthogonal matrix without scaling. */
48 ATTRIBUTE_ALIGNED16(class)
51 ///Data storage for the matrix, each vector is a row of the matrix
55 /** @brief No initializaion constructor */
58 // explicit btMatrix3x3(const btScalar *m) { setFromOpenGLSubMatrix(m); }
60 /**@brief Constructor from Quaternion */
61 explicit btMatrix3x3(const btQuaternion& q) { setRotation(q); }
63 template <typename btScalar>
64 Matrix3x3(const btScalar& yaw, const btScalar& pitch, const btScalar& roll)
66 setEulerYPR(yaw, pitch, roll);
69 /** @brief Constructor with row major formatting */
70 btMatrix3x3(const btScalar& xx, const btScalar& xy, const btScalar& xz,
71 const btScalar& yx, const btScalar& yy, const btScalar& yz,
72 const btScalar& zx, const btScalar& zy, const btScalar& zz)
79 #if (defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE)) || defined(BT_USE_NEON)
80 SIMD_FORCE_INLINE btMatrix3x3(const btSimdFloat4 v0, const btSimdFloat4 v1, const btSimdFloat4 v2)
87 SIMD_FORCE_INLINE btMatrix3x3(const btVector3& v0, const btVector3& v1, const btVector3& v2)
95 SIMD_FORCE_INLINE btMatrix3x3(const btMatrix3x3& rhs)
97 m_el[0].mVec128 = rhs.m_el[0].mVec128;
98 m_el[1].mVec128 = rhs.m_el[1].mVec128;
99 m_el[2].mVec128 = rhs.m_el[2].mVec128;
102 // Assignment Operator
103 SIMD_FORCE_INLINE btMatrix3x3& operator=(const btMatrix3x3& m)
105 m_el[0].mVec128 = m.m_el[0].mVec128;
106 m_el[1].mVec128 = m.m_el[1].mVec128;
107 m_el[2].mVec128 = m.m_el[2].mVec128;
114 /** @brief Copy constructor */
115 SIMD_FORCE_INLINE btMatrix3x3(const btMatrix3x3& other)
117 m_el[0] = other.m_el[0];
118 m_el[1] = other.m_el[1];
119 m_el[2] = other.m_el[2];
122 /** @brief Assignment Operator */
123 SIMD_FORCE_INLINE btMatrix3x3& operator=(const btMatrix3x3& other)
125 m_el[0] = other.m_el[0];
126 m_el[1] = other.m_el[1];
127 m_el[2] = other.m_el[2];
131 SIMD_FORCE_INLINE btMatrix3x3(const btVector3& v0, const btVector3& v1, const btVector3& v2)
140 /** @brief Get a column of the matrix as a vector
141 * @param i Column number 0 indexed */
142 SIMD_FORCE_INLINE btVector3 getColumn(int i) const
144 return btVector3(m_el[0][i], m_el[1][i], m_el[2][i]);
147 /** @brief Get a row of the matrix as a vector
148 * @param i Row number 0 indexed */
149 SIMD_FORCE_INLINE const btVector3& getRow(int i) const
151 btFullAssert(0 <= i && i < 3);
155 /** @brief Get a mutable reference to a row of the matrix as a vector
156 * @param i Row number 0 indexed */
157 SIMD_FORCE_INLINE btVector3& operator[](int i)
159 btFullAssert(0 <= i && i < 3);
163 /** @brief Get a const reference to a row of the matrix as a vector
164 * @param i Row number 0 indexed */
165 SIMD_FORCE_INLINE const btVector3& operator[](int i) const
167 btFullAssert(0 <= i && i < 3);
171 /** @brief Multiply by the target matrix on the right
172 * @param m Rotation matrix to be applied
173 * Equivilant to this = this * m */
174 btMatrix3x3& operator*=(const btMatrix3x3& m);
176 /** @brief Adds by the target matrix on the right
177 * @param m matrix to be applied
178 * Equivilant to this = this + m */
179 btMatrix3x3& operator+=(const btMatrix3x3& m);
181 /** @brief Substractss by the target matrix on the right
182 * @param m matrix to be applied
183 * Equivilant to this = this - m */
184 btMatrix3x3& operator-=(const btMatrix3x3& m);
186 /** @brief Set from the rotational part of a 4x4 OpenGL matrix
187 * @param m A pointer to the beginning of the array of scalars*/
188 void setFromOpenGLSubMatrix(const btScalar* m)
190 m_el[0].setValue(m[0], m[4], m[8]);
191 m_el[1].setValue(m[1], m[5], m[9]);
192 m_el[2].setValue(m[2], m[6], m[10]);
194 /** @brief Set the values of the matrix explicitly (row major)
196 * @param xy Top Middle
197 * @param xz Top Right
198 * @param yx Middle Left
199 * @param yy Middle Middle
200 * @param yz Middle Right
201 * @param zx Bottom Left
202 * @param zy Bottom Middle
203 * @param zz Bottom Right*/
204 void setValue(const btScalar& xx, const btScalar& xy, const btScalar& xz,
205 const btScalar& yx, const btScalar& yy, const btScalar& yz,
206 const btScalar& zx, const btScalar& zy, const btScalar& zz)
208 m_el[0].setValue(xx, xy, xz);
209 m_el[1].setValue(yx, yy, yz);
210 m_el[2].setValue(zx, zy, zz);
213 /** @brief Set the matrix from a quaternion
214 * @param q The Quaternion to match */
215 void setRotation(const btQuaternion& q)
217 btScalar d = q.length2();
218 btFullAssert(d != btScalar(0.0));
219 btScalar s = btScalar(2.0) / d;
221 #if defined BT_USE_SIMD_VECTOR3 && defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE)
222 __m128 vs, Q = q.get128();
223 __m128i Qi = btCastfTo128i(Q);
226 __m128 V11, V21, V31;
227 __m128 NQ = _mm_xor_ps(Q, btvMzeroMask);
228 __m128i NQi = btCastfTo128i(NQ);
230 V1 = btCastiTo128f(_mm_shuffle_epi32(Qi, BT_SHUFFLE(1, 0, 2, 3))); // Y X Z W
231 V2 = _mm_shuffle_ps(NQ, Q, BT_SHUFFLE(0, 0, 1, 3)); // -X -X Y W
232 V3 = btCastiTo128f(_mm_shuffle_epi32(Qi, BT_SHUFFLE(2, 1, 0, 3))); // Z Y X W
233 V1 = _mm_xor_ps(V1, vMPPP); // change the sign of the first element
235 V11 = btCastiTo128f(_mm_shuffle_epi32(Qi, BT_SHUFFLE(1, 1, 0, 3))); // Y Y X W
236 V21 = _mm_unpackhi_ps(Q, Q); // Z Z W W
237 V31 = _mm_shuffle_ps(Q, NQ, BT_SHUFFLE(0, 2, 0, 3)); // X Z -X -W
243 V11 = _mm_shuffle_ps(NQ, Q, BT_SHUFFLE(2, 3, 1, 3)); // -Z -W Y W
245 V21 = _mm_xor_ps(V21, vMPPP); // change the sign of the first element
246 V31 = _mm_shuffle_ps(Q, NQ, BT_SHUFFLE(3, 3, 1, 3)); // W W -Y -W
247 V31 = _mm_xor_ps(V31, vMPPP); // change the sign of the first element
248 Y = btCastiTo128f(_mm_shuffle_epi32(NQi, BT_SHUFFLE(3, 2, 0, 3))); // -W -Z -X -W
249 Z = btCastiTo128f(_mm_shuffle_epi32(Qi, BT_SHUFFLE(1, 0, 1, 3))); // Y X Y W
251 vs = _mm_load_ss(&s);
259 vs = bt_splat3_ps(vs, 0);
273 btScalar xs = q.x() * s, ys = q.y() * s, zs = q.z() * s;
274 btScalar wx = q.w() * xs, wy = q.w() * ys, wz = q.w() * zs;
275 btScalar xx = q.x() * xs, xy = q.x() * ys, xz = q.x() * zs;
276 btScalar yy = q.y() * ys, yz = q.y() * zs, zz = q.z() * zs;
278 btScalar(1.0) - (yy + zz), xy - wz, xz + wy,
279 xy + wz, btScalar(1.0) - (xx + zz), yz - wx,
280 xz - wy, yz + wx, btScalar(1.0) - (xx + yy));
284 /** @brief Set the matrix from euler angles using YPR around YXZ respectively
285 * @param yaw Yaw about Y axis
286 * @param pitch Pitch about X axis
287 * @param roll Roll about Z axis
289 void setEulerYPR(const btScalar& yaw, const btScalar& pitch, const btScalar& roll)
291 setEulerZYX(roll, pitch, yaw);
294 /** @brief Set the matrix from euler angles YPR around ZYX axes
295 * @param eulerX Roll about X axis
296 * @param eulerY Pitch around Y axis
297 * @param eulerZ Yaw about Z axis
299 * These angles are used to produce a rotation matrix. The euler
300 * angles are applied in ZYX order. I.e a vector is first rotated
301 * about X then Y and then Z
303 void setEulerZYX(btScalar eulerX, btScalar eulerY, btScalar eulerZ)
305 ///@todo proposed to reverse this since it's labeled zyx but takes arguments xyz and it will match all other parts of the code
306 btScalar ci(btCos(eulerX));
307 btScalar cj(btCos(eulerY));
308 btScalar ch(btCos(eulerZ));
309 btScalar si(btSin(eulerX));
310 btScalar sj(btSin(eulerY));
311 btScalar sh(btSin(eulerZ));
312 btScalar cc = ci * ch;
313 btScalar cs = ci * sh;
314 btScalar sc = si * ch;
315 btScalar ss = si * sh;
317 setValue(cj * ch, sj * sc - cs, sj * cc + ss,
318 cj * sh, sj * ss + cc, sj * cs - sc,
319 -sj, cj * si, cj * ci);
322 /**@brief Set the matrix to the identity */
325 #if (defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE)) || defined(BT_USE_NEON)
330 setValue(btScalar(1.0), btScalar(0.0), btScalar(0.0),
331 btScalar(0.0), btScalar(1.0), btScalar(0.0),
332 btScalar(0.0), btScalar(0.0), btScalar(1.0));
336 /**@brief Set the matrix to the identity */
339 #if (defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE)) || defined(BT_USE_NEON)
344 setValue(btScalar(0.0), btScalar(0.0), btScalar(0.0),
345 btScalar(0.0), btScalar(0.0), btScalar(0.0),
346 btScalar(0.0), btScalar(0.0), btScalar(0.0));
350 static const btMatrix3x3& getIdentity()
352 #if (defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE)) || defined(BT_USE_NEON)
353 static const btMatrix3x3
354 identityMatrix(v1000, v0100, v0010);
356 static const btMatrix3x3
358 btScalar(1.0), btScalar(0.0), btScalar(0.0),
359 btScalar(0.0), btScalar(1.0), btScalar(0.0),
360 btScalar(0.0), btScalar(0.0), btScalar(1.0));
362 return identityMatrix;
365 /**@brief Fill the rotational part of an OpenGL matrix and clear the shear/perspective
366 * @param m The array to be filled */
367 void getOpenGLSubMatrix(btScalar * m) const
369 #if defined BT_USE_SIMD_VECTOR3 && defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE)
370 __m128 v0 = m_el[0].mVec128;
371 __m128 v1 = m_el[1].mVec128;
372 __m128 v2 = m_el[2].mVec128; // x2 y2 z2 w2
373 __m128* vm = (__m128*)m;
376 v2 = _mm_and_ps(v2, btvFFF0fMask); // x2 y2 z2 0
378 vT = _mm_unpackhi_ps(v0, v1); // z0 z1 * *
379 v0 = _mm_unpacklo_ps(v0, v1); // x0 x1 y0 y1
381 v1 = _mm_shuffle_ps(v0, v2, BT_SHUFFLE(2, 3, 1, 3)); // y0 y1 y2 0
382 v0 = _mm_shuffle_ps(v0, v2, BT_SHUFFLE(0, 1, 0, 3)); // x0 x1 x2 0
383 v2 = btCastdTo128f(_mm_move_sd(btCastfTo128d(v2), btCastfTo128d(vT))); // z0 z1 z2 0
388 #elif defined(BT_USE_NEON)
389 // note: zeros the w channel. We can preserve it at the cost of two more vtrn instructions.
390 static const uint32x2_t zMask = (const uint32x2_t){static_cast<uint32_t>(-1), 0};
391 float32x4_t* vm = (float32x4_t*)m;
392 float32x4x2_t top = vtrnq_f32(m_el[0].mVec128, m_el[1].mVec128); // {x0 x1 z0 z1}, {y0 y1 w0 w1}
393 float32x2x2_t bl = vtrn_f32(vget_low_f32(m_el[2].mVec128), vdup_n_f32(0.0f)); // {x2 0 }, {y2 0}
394 float32x4_t v0 = vcombine_f32(vget_low_f32(top.val[0]), bl.val[0]);
395 float32x4_t v1 = vcombine_f32(vget_low_f32(top.val[1]), bl.val[1]);
396 float32x2_t q = (float32x2_t)vand_u32((uint32x2_t)vget_high_f32(m_el[2].mVec128), zMask);
397 float32x4_t v2 = vcombine_f32(vget_high_f32(top.val[0]), q); // z0 z1 z2 0
403 m[0] = btScalar(m_el[0].x());
404 m[1] = btScalar(m_el[1].x());
405 m[2] = btScalar(m_el[2].x());
406 m[3] = btScalar(0.0);
407 m[4] = btScalar(m_el[0].y());
408 m[5] = btScalar(m_el[1].y());
409 m[6] = btScalar(m_el[2].y());
410 m[7] = btScalar(0.0);
411 m[8] = btScalar(m_el[0].z());
412 m[9] = btScalar(m_el[1].z());
413 m[10] = btScalar(m_el[2].z());
414 m[11] = btScalar(0.0);
418 /**@brief Get the matrix represented as a quaternion
419 * @param q The quaternion which will be set */
420 void getRotation(btQuaternion & q) const
422 #if (defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE)) || defined(BT_USE_NEON)
423 btScalar trace = m_el[0].x() + m_el[1].y() + m_el[2].z();
431 if (trace > btScalar(0.0))
433 x = trace + btScalar(1.0);
435 temp.f[0] = m_el[2].y() - m_el[1].z();
436 temp.f[1] = m_el[0].z() - m_el[2].x();
437 temp.f[2] = m_el[1].x() - m_el[0].y();
439 //temp.f[3]= s * btScalar(0.5);
444 if (m_el[0].x() < m_el[1].y())
446 if (m_el[1].y() < m_el[2].z())
461 if (m_el[0].x() < m_el[2].z())
475 x = m_el[i][i] - m_el[j][j] - m_el[k][k] + btScalar(1.0);
477 temp.f[3] = (m_el[k][j] - m_el[j][k]);
478 temp.f[j] = (m_el[j][i] + m_el[i][j]);
479 temp.f[k] = (m_el[k][i] + m_el[i][k]);
481 //temp.f[i] = s * btScalar(0.5);
486 s = btScalar(0.5) / s;
490 btScalar trace = m_el[0].x() + m_el[1].y() + m_el[2].z();
494 if (trace > btScalar(0.0))
496 btScalar s = btSqrt(trace + btScalar(1.0));
497 temp[3] = (s * btScalar(0.5));
498 s = btScalar(0.5) / s;
500 temp[0] = ((m_el[2].y() - m_el[1].z()) * s);
501 temp[1] = ((m_el[0].z() - m_el[2].x()) * s);
502 temp[2] = ((m_el[1].x() - m_el[0].y()) * s);
506 int i = m_el[0].x() < m_el[1].y() ? (m_el[1].y() < m_el[2].z() ? 2 : 1) : (m_el[0].x() < m_el[2].z() ? 2 : 0);
510 btScalar s = btSqrt(m_el[i][i] - m_el[j][j] - m_el[k][k] + btScalar(1.0));
511 temp[i] = s * btScalar(0.5);
512 s = btScalar(0.5) / s;
514 temp[3] = (m_el[k][j] - m_el[j][k]) * s;
515 temp[j] = (m_el[j][i] + m_el[i][j]) * s;
516 temp[k] = (m_el[k][i] + m_el[i][k]) * s;
518 q.setValue(temp[0], temp[1], temp[2], temp[3]);
522 /**@brief Get the matrix represented as euler angles around YXZ, roundtrip with setEulerYPR
523 * @param yaw Yaw around Y axis
524 * @param pitch Pitch around X axis
525 * @param roll around Z axis */
526 void getEulerYPR(btScalar & yaw, btScalar & pitch, btScalar & roll) const
528 // first use the normal calculus
529 yaw = btScalar(btAtan2(m_el[1].x(), m_el[0].x()));
530 pitch = btScalar(btAsin(-m_el[2].x()));
531 roll = btScalar(btAtan2(m_el[2].y(), m_el[2].z()));
533 // on pitch = +/-HalfPI
534 if (btFabs(pitch) == SIMD_HALF_PI)
548 /**@brief Get the matrix represented as euler angles around ZYX
549 * @param yaw Yaw around Z axis
550 * @param pitch Pitch around Y axis
551 * @param roll around X axis
552 * @param solution_number Which solution of two possible solutions ( 1 or 2) are possible values*/
553 void getEulerZYX(btScalar & yaw, btScalar & pitch, btScalar & roll, unsigned int solution_number = 1) const
563 Euler euler_out2; //second solution
564 //get the pointer to the raw data
566 // Check that pitch is not at a singularity
567 if (btFabs(m_el[2].x()) >= 1)
572 // From difference of angles formula
573 btScalar delta = btAtan2(m_el[0].x(), m_el[0].z());
574 if (m_el[2].x() > 0) //gimbal locked up
576 euler_out.pitch = SIMD_PI / btScalar(2.0);
577 euler_out2.pitch = SIMD_PI / btScalar(2.0);
578 euler_out.roll = euler_out.pitch + delta;
579 euler_out2.roll = euler_out.pitch + delta;
581 else // gimbal locked down
583 euler_out.pitch = -SIMD_PI / btScalar(2.0);
584 euler_out2.pitch = -SIMD_PI / btScalar(2.0);
585 euler_out.roll = -euler_out.pitch + delta;
586 euler_out2.roll = -euler_out.pitch + delta;
591 euler_out.pitch = -btAsin(m_el[2].x());
592 euler_out2.pitch = SIMD_PI - euler_out.pitch;
594 euler_out.roll = btAtan2(m_el[2].y() / btCos(euler_out.pitch),
595 m_el[2].z() / btCos(euler_out.pitch));
596 euler_out2.roll = btAtan2(m_el[2].y() / btCos(euler_out2.pitch),
597 m_el[2].z() / btCos(euler_out2.pitch));
599 euler_out.yaw = btAtan2(m_el[1].x() / btCos(euler_out.pitch),
600 m_el[0].x() / btCos(euler_out.pitch));
601 euler_out2.yaw = btAtan2(m_el[1].x() / btCos(euler_out2.pitch),
602 m_el[0].x() / btCos(euler_out2.pitch));
605 if (solution_number == 1)
608 pitch = euler_out.pitch;
609 roll = euler_out.roll;
613 yaw = euler_out2.yaw;
614 pitch = euler_out2.pitch;
615 roll = euler_out2.roll;
619 /**@brief Create a scaled copy of the matrix
620 * @param s Scaling vector The elements of the vector will scale each column */
622 btMatrix3x3 scaled(const btVector3& s) const
624 #if (defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE)) || defined(BT_USE_NEON)
625 return btMatrix3x3(m_el[0] * s, m_el[1] * s, m_el[2] * s);
628 m_el[0].x() * s.x(), m_el[0].y() * s.y(), m_el[0].z() * s.z(),
629 m_el[1].x() * s.x(), m_el[1].y() * s.y(), m_el[1].z() * s.z(),
630 m_el[2].x() * s.x(), m_el[2].y() * s.y(), m_el[2].z() * s.z());
634 /**@brief Return the determinant of the matrix */
635 btScalar determinant() const;
636 /**@brief Return the adjoint of the matrix */
637 btMatrix3x3 adjoint() const;
638 /**@brief Return the matrix with all values non negative */
639 btMatrix3x3 absolute() const;
640 /**@brief Return the transpose of the matrix */
641 btMatrix3x3 transpose() const;
642 /**@brief Return the inverse of the matrix */
643 btMatrix3x3 inverse() const;
645 /// Solve A * x = b, where b is a column vector. This is more efficient
646 /// than computing the inverse in one-shot cases.
647 ///Solve33 is from Box2d, thanks to Erin Catto,
648 btVector3 solve33(const btVector3& b) const
650 btVector3 col1 = getColumn(0);
651 btVector3 col2 = getColumn(1);
652 btVector3 col3 = getColumn(2);
654 btScalar det = btDot(col1, btCross(col2, col3));
655 if (btFabs(det) > SIMD_EPSILON)
660 x[0] = det * btDot(b, btCross(col2, col3));
661 x[1] = det * btDot(col1, btCross(b, col3));
662 x[2] = det * btDot(col1, btCross(col2, b));
666 btMatrix3x3 transposeTimes(const btMatrix3x3& m) const;
667 btMatrix3x3 timesTranspose(const btMatrix3x3& m) const;
669 SIMD_FORCE_INLINE btScalar tdotx(const btVector3& v) const
671 return m_el[0].x() * v.x() + m_el[1].x() * v.y() + m_el[2].x() * v.z();
673 SIMD_FORCE_INLINE btScalar tdoty(const btVector3& v) const
675 return m_el[0].y() * v.x() + m_el[1].y() * v.y() + m_el[2].y() * v.z();
677 SIMD_FORCE_INLINE btScalar tdotz(const btVector3& v) const
679 return m_el[0].z() * v.x() + m_el[1].z() * v.y() + m_el[2].z() * v.z();
682 ///extractRotation is from "A robust method to extract the rotational part of deformations"
683 ///See http://dl.acm.org/citation.cfm?doid=2994258.2994269
684 ///decomposes a matrix A in a orthogonal matrix R and a
685 ///symmetric matrix S:
687 ///note that R can include both rotation and scaling.
688 SIMD_FORCE_INLINE void extractRotation(btQuaternion & q, btScalar tolerance = 1.0e-9, int maxIter = 100)
692 const btMatrix3x3& A = *this;
693 for (iter = 0; iter < maxIter; iter++)
696 btVector3 omega = (R.getColumn(0).cross(A.getColumn(0)) + R.getColumn(1).cross(A.getColumn(1)) + R.getColumn(2).cross(A.getColumn(2))) * (btScalar(1.0) / btFabs(R.getColumn(0).dot(A.getColumn(0)) + R.getColumn(1).dot(A.getColumn(1)) + R.getColumn(2).dot(A.getColumn(2))) +
701 q = btQuaternion(btVector3((btScalar(1.0) / w) * omega), w) *
707 /**@brief diagonalizes this matrix by the Jacobi method.
708 * @param rot stores the rotation from the coordinate system in which the matrix is diagonal to the original
709 * coordinate system, i.e., old_this = rot * new_this * rot^T.
710 * @param threshold See iteration
711 * @param iteration The iteration stops when all off-diagonal elements are less than the threshold multiplied
712 * by the sum of the absolute values of the diagonal, or when maxSteps have been executed.
714 * Note that this matrix is assumed to be symmetric.
716 void diagonalize(btMatrix3x3 & rot, btScalar threshold, int maxSteps)
719 for (int step = maxSteps; step > 0; step--)
721 // find off-diagonal element [p][q] with largest magnitude
725 btScalar max = btFabs(m_el[0][1]);
726 btScalar v = btFabs(m_el[0][2]);
733 v = btFabs(m_el[1][2]);
742 btScalar t = threshold * (btFabs(m_el[0][0]) + btFabs(m_el[1][1]) + btFabs(m_el[2][2]));
745 if (max <= SIMD_EPSILON * t)
752 // compute Jacobi rotation J which leads to a zero for element [p][q]
753 btScalar mpq = m_el[p][q];
754 btScalar theta = (m_el[q][q] - m_el[p][p]) / (2 * mpq);
755 btScalar theta2 = theta * theta;
758 if (theta2 * theta2 < btScalar(10 / SIMD_EPSILON))
760 t = (theta >= 0) ? 1 / (theta + btSqrt(1 + theta2))
761 : 1 / (theta - btSqrt(1 + theta2));
762 cos = 1 / btSqrt(1 + t * t);
767 // approximation for large theta-value, i.e., a nearly diagonal matrix
768 t = 1 / (theta * (2 + btScalar(0.5) / theta2));
769 cos = 1 - btScalar(0.5) * t * t;
773 // apply rotation to matrix (this = J^T * this * J)
774 m_el[p][q] = m_el[q][p] = 0;
775 m_el[p][p] -= t * mpq;
776 m_el[q][q] += t * mpq;
777 btScalar mrp = m_el[r][p];
778 btScalar mrq = m_el[r][q];
779 m_el[r][p] = m_el[p][r] = cos * mrp - sin * mrq;
780 m_el[r][q] = m_el[q][r] = cos * mrq + sin * mrp;
782 // apply rotation to rot (rot = rot * J)
783 for (int i = 0; i < 3; i++)
785 btVector3& row = rot[i];
788 row[p] = cos * mrp - sin * mrq;
789 row[q] = cos * mrq + sin * mrp;
794 /**@brief Calculate the matrix cofactor
795 * @param r1 The first row to use for calculating the cofactor
796 * @param c1 The first column to use for calculating the cofactor
797 * @param r1 The second row to use for calculating the cofactor
798 * @param c1 The second column to use for calculating the cofactor
799 * See http://en.wikipedia.org/wiki/Cofactor_(linear_algebra) for more details
801 btScalar cofac(int r1, int c1, int r2, int c2) const
803 return m_el[r1][c1] * m_el[r2][c2] - m_el[r1][c2] * m_el[r2][c1];
806 void serialize(struct btMatrix3x3Data & dataOut) const;
808 void serializeFloat(struct btMatrix3x3FloatData & dataOut) const;
810 void deSerialize(const struct btMatrix3x3Data& dataIn);
812 void deSerializeFloat(const struct btMatrix3x3FloatData& dataIn);
814 void deSerializeDouble(const struct btMatrix3x3DoubleData& dataIn);
817 SIMD_FORCE_INLINE btMatrix3x3&
818 btMatrix3x3::operator*=(const btMatrix3x3& m)
820 #if defined BT_USE_SIMD_VECTOR3 && defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE)
821 __m128 rv00, rv01, rv02;
822 __m128 rv10, rv11, rv12;
823 __m128 rv20, rv21, rv22;
824 __m128 mv0, mv1, mv2;
826 rv02 = m_el[0].mVec128;
827 rv12 = m_el[1].mVec128;
828 rv22 = m_el[2].mVec128;
830 mv0 = _mm_and_ps(m[0].mVec128, btvFFF0fMask);
831 mv1 = _mm_and_ps(m[1].mVec128, btvFFF0fMask);
832 mv2 = _mm_and_ps(m[2].mVec128, btvFFF0fMask);
835 rv00 = bt_splat_ps(rv02, 0);
836 rv01 = bt_splat_ps(rv02, 1);
837 rv02 = bt_splat_ps(rv02, 2);
839 rv00 = _mm_mul_ps(rv00, mv0);
840 rv01 = _mm_mul_ps(rv01, mv1);
841 rv02 = _mm_mul_ps(rv02, mv2);
844 rv10 = bt_splat_ps(rv12, 0);
845 rv11 = bt_splat_ps(rv12, 1);
846 rv12 = bt_splat_ps(rv12, 2);
848 rv10 = _mm_mul_ps(rv10, mv0);
849 rv11 = _mm_mul_ps(rv11, mv1);
850 rv12 = _mm_mul_ps(rv12, mv2);
853 rv20 = bt_splat_ps(rv22, 0);
854 rv21 = bt_splat_ps(rv22, 1);
855 rv22 = bt_splat_ps(rv22, 2);
857 rv20 = _mm_mul_ps(rv20, mv0);
858 rv21 = _mm_mul_ps(rv21, mv1);
859 rv22 = _mm_mul_ps(rv22, mv2);
861 rv00 = _mm_add_ps(rv00, rv01);
862 rv10 = _mm_add_ps(rv10, rv11);
863 rv20 = _mm_add_ps(rv20, rv21);
865 m_el[0].mVec128 = _mm_add_ps(rv00, rv02);
866 m_el[1].mVec128 = _mm_add_ps(rv10, rv12);
867 m_el[2].mVec128 = _mm_add_ps(rv20, rv22);
869 #elif defined(BT_USE_NEON)
871 float32x4_t rv0, rv1, rv2;
872 float32x4_t v0, v1, v2;
873 float32x4_t mv0, mv1, mv2;
875 v0 = m_el[0].mVec128;
876 v1 = m_el[1].mVec128;
877 v2 = m_el[2].mVec128;
879 mv0 = (float32x4_t)vandq_s32((int32x4_t)m[0].mVec128, btvFFF0Mask);
880 mv1 = (float32x4_t)vandq_s32((int32x4_t)m[1].mVec128, btvFFF0Mask);
881 mv2 = (float32x4_t)vandq_s32((int32x4_t)m[2].mVec128, btvFFF0Mask);
883 rv0 = vmulq_lane_f32(mv0, vget_low_f32(v0), 0);
884 rv1 = vmulq_lane_f32(mv0, vget_low_f32(v1), 0);
885 rv2 = vmulq_lane_f32(mv0, vget_low_f32(v2), 0);
887 rv0 = vmlaq_lane_f32(rv0, mv1, vget_low_f32(v0), 1);
888 rv1 = vmlaq_lane_f32(rv1, mv1, vget_low_f32(v1), 1);
889 rv2 = vmlaq_lane_f32(rv2, mv1, vget_low_f32(v2), 1);
891 rv0 = vmlaq_lane_f32(rv0, mv2, vget_high_f32(v0), 0);
892 rv1 = vmlaq_lane_f32(rv1, mv2, vget_high_f32(v1), 0);
893 rv2 = vmlaq_lane_f32(rv2, mv2, vget_high_f32(v2), 0);
895 m_el[0].mVec128 = rv0;
896 m_el[1].mVec128 = rv1;
897 m_el[2].mVec128 = rv2;
900 m.tdotx(m_el[0]), m.tdoty(m_el[0]), m.tdotz(m_el[0]),
901 m.tdotx(m_el[1]), m.tdoty(m_el[1]), m.tdotz(m_el[1]),
902 m.tdotx(m_el[2]), m.tdoty(m_el[2]), m.tdotz(m_el[2]));
907 SIMD_FORCE_INLINE btMatrix3x3&
908 btMatrix3x3::operator+=(const btMatrix3x3& m)
910 #if (defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE)) || defined(BT_USE_NEON)
911 m_el[0].mVec128 = m_el[0].mVec128 + m.m_el[0].mVec128;
912 m_el[1].mVec128 = m_el[1].mVec128 + m.m_el[1].mVec128;
913 m_el[2].mVec128 = m_el[2].mVec128 + m.m_el[2].mVec128;
916 m_el[0][0] + m.m_el[0][0],
917 m_el[0][1] + m.m_el[0][1],
918 m_el[0][2] + m.m_el[0][2],
919 m_el[1][0] + m.m_el[1][0],
920 m_el[1][1] + m.m_el[1][1],
921 m_el[1][2] + m.m_el[1][2],
922 m_el[2][0] + m.m_el[2][0],
923 m_el[2][1] + m.m_el[2][1],
924 m_el[2][2] + m.m_el[2][2]);
929 SIMD_FORCE_INLINE btMatrix3x3
930 operator*(const btMatrix3x3& m, const btScalar& k)
932 #if (defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE))
933 __m128 vk = bt_splat_ps(_mm_load_ss((float*)&k), 0x80);
935 _mm_mul_ps(m[0].mVec128, vk),
936 _mm_mul_ps(m[1].mVec128, vk),
937 _mm_mul_ps(m[2].mVec128, vk));
938 #elif defined(BT_USE_NEON)
940 vmulq_n_f32(m[0].mVec128, k),
941 vmulq_n_f32(m[1].mVec128, k),
942 vmulq_n_f32(m[2].mVec128, k));
945 m[0].x() * k, m[0].y() * k, m[0].z() * k,
946 m[1].x() * k, m[1].y() * k, m[1].z() * k,
947 m[2].x() * k, m[2].y() * k, m[2].z() * k);
951 SIMD_FORCE_INLINE btMatrix3x3
952 operator+(const btMatrix3x3& m1, const btMatrix3x3& m2)
954 #if (defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE)) || defined(BT_USE_NEON)
956 m1[0].mVec128 + m2[0].mVec128,
957 m1[1].mVec128 + m2[1].mVec128,
958 m1[2].mVec128 + m2[2].mVec128);
971 m1[2][2] + m2[2][2]);
975 SIMD_FORCE_INLINE btMatrix3x3
976 operator-(const btMatrix3x3& m1, const btMatrix3x3& m2)
978 #if (defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE)) || defined(BT_USE_NEON)
980 m1[0].mVec128 - m2[0].mVec128,
981 m1[1].mVec128 - m2[1].mVec128,
982 m1[2].mVec128 - m2[2].mVec128);
995 m1[2][2] - m2[2][2]);
999 SIMD_FORCE_INLINE btMatrix3x3&
1000 btMatrix3x3::operator-=(const btMatrix3x3& m)
1002 #if (defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE)) || defined(BT_USE_NEON)
1003 m_el[0].mVec128 = m_el[0].mVec128 - m.m_el[0].mVec128;
1004 m_el[1].mVec128 = m_el[1].mVec128 - m.m_el[1].mVec128;
1005 m_el[2].mVec128 = m_el[2].mVec128 - m.m_el[2].mVec128;
1008 m_el[0][0] - m.m_el[0][0],
1009 m_el[0][1] - m.m_el[0][1],
1010 m_el[0][2] - m.m_el[0][2],
1011 m_el[1][0] - m.m_el[1][0],
1012 m_el[1][1] - m.m_el[1][1],
1013 m_el[1][2] - m.m_el[1][2],
1014 m_el[2][0] - m.m_el[2][0],
1015 m_el[2][1] - m.m_el[2][1],
1016 m_el[2][2] - m.m_el[2][2]);
1021 SIMD_FORCE_INLINE btScalar
1022 btMatrix3x3::determinant() const
1024 return btTriple((*this)[0], (*this)[1], (*this)[2]);
1027 SIMD_FORCE_INLINE btMatrix3x3
1028 btMatrix3x3::absolute() const
1030 #if defined BT_USE_SIMD_VECTOR3 && (defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE))
1032 _mm_and_ps(m_el[0].mVec128, btvAbsfMask),
1033 _mm_and_ps(m_el[1].mVec128, btvAbsfMask),
1034 _mm_and_ps(m_el[2].mVec128, btvAbsfMask));
1035 #elif defined(BT_USE_NEON)
1037 (float32x4_t)vandq_s32((int32x4_t)m_el[0].mVec128, btv3AbsMask),
1038 (float32x4_t)vandq_s32((int32x4_t)m_el[1].mVec128, btv3AbsMask),
1039 (float32x4_t)vandq_s32((int32x4_t)m_el[2].mVec128, btv3AbsMask));
1042 btFabs(m_el[0].x()), btFabs(m_el[0].y()), btFabs(m_el[0].z()),
1043 btFabs(m_el[1].x()), btFabs(m_el[1].y()), btFabs(m_el[1].z()),
1044 btFabs(m_el[2].x()), btFabs(m_el[2].y()), btFabs(m_el[2].z()));
1048 SIMD_FORCE_INLINE btMatrix3x3
1049 btMatrix3x3::transpose() const
1051 #if defined BT_USE_SIMD_VECTOR3 && (defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE))
1052 __m128 v0 = m_el[0].mVec128;
1053 __m128 v1 = m_el[1].mVec128;
1054 __m128 v2 = m_el[2].mVec128; // x2 y2 z2 w2
1057 v2 = _mm_and_ps(v2, btvFFF0fMask); // x2 y2 z2 0
1059 vT = _mm_unpackhi_ps(v0, v1); // z0 z1 * *
1060 v0 = _mm_unpacklo_ps(v0, v1); // x0 x1 y0 y1
1062 v1 = _mm_shuffle_ps(v0, v2, BT_SHUFFLE(2, 3, 1, 3)); // y0 y1 y2 0
1063 v0 = _mm_shuffle_ps(v0, v2, BT_SHUFFLE(0, 1, 0, 3)); // x0 x1 x2 0
1064 v2 = btCastdTo128f(_mm_move_sd(btCastfTo128d(v2), btCastfTo128d(vT))); // z0 z1 z2 0
1066 return btMatrix3x3(v0, v1, v2);
1067 #elif defined(BT_USE_NEON)
1068 // note: zeros the w channel. We can preserve it at the cost of two more vtrn instructions.
1069 static const uint32x2_t zMask = (const uint32x2_t){static_cast<uint32_t>(-1), 0};
1070 float32x4x2_t top = vtrnq_f32(m_el[0].mVec128, m_el[1].mVec128); // {x0 x1 z0 z1}, {y0 y1 w0 w1}
1071 float32x2x2_t bl = vtrn_f32(vget_low_f32(m_el[2].mVec128), vdup_n_f32(0.0f)); // {x2 0 }, {y2 0}
1072 float32x4_t v0 = vcombine_f32(vget_low_f32(top.val[0]), bl.val[0]);
1073 float32x4_t v1 = vcombine_f32(vget_low_f32(top.val[1]), bl.val[1]);
1074 float32x2_t q = (float32x2_t)vand_u32((uint32x2_t)vget_high_f32(m_el[2].mVec128), zMask);
1075 float32x4_t v2 = vcombine_f32(vget_high_f32(top.val[0]), q); // z0 z1 z2 0
1076 return btMatrix3x3(v0, v1, v2);
1078 return btMatrix3x3(m_el[0].x(), m_el[1].x(), m_el[2].x(),
1079 m_el[0].y(), m_el[1].y(), m_el[2].y(),
1080 m_el[0].z(), m_el[1].z(), m_el[2].z());
1084 SIMD_FORCE_INLINE btMatrix3x3
1085 btMatrix3x3::adjoint() const
1087 return btMatrix3x3(cofac(1, 1, 2, 2), cofac(0, 2, 2, 1), cofac(0, 1, 1, 2),
1088 cofac(1, 2, 2, 0), cofac(0, 0, 2, 2), cofac(0, 2, 1, 0),
1089 cofac(1, 0, 2, 1), cofac(0, 1, 2, 0), cofac(0, 0, 1, 1));
1092 SIMD_FORCE_INLINE btMatrix3x3
1093 btMatrix3x3::inverse() const
1095 btVector3 co(cofac(1, 1, 2, 2), cofac(1, 2, 2, 0), cofac(1, 0, 2, 1));
1096 btScalar det = (*this)[0].dot(co);
1097 //btFullAssert(det != btScalar(0.0));
1098 btAssert(det != btScalar(0.0));
1099 btScalar s = btScalar(1.0) / det;
1100 return btMatrix3x3(co.x() * s, cofac(0, 2, 2, 1) * s, cofac(0, 1, 1, 2) * s,
1101 co.y() * s, cofac(0, 0, 2, 2) * s, cofac(0, 2, 1, 0) * s,
1102 co.z() * s, cofac(0, 1, 2, 0) * s, cofac(0, 0, 1, 1) * s);
1105 SIMD_FORCE_INLINE btMatrix3x3
1106 btMatrix3x3::transposeTimes(const btMatrix3x3& m) const
1108 #if defined BT_USE_SIMD_VECTOR3 && (defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE))
1110 // static const __m128i xyzMask = (const __m128i){ -1ULL, 0xffffffffULL };
1111 __m128 row = m_el[0].mVec128;
1112 __m128 m0 = _mm_and_ps(m.getRow(0).mVec128, btvFFF0fMask);
1113 __m128 m1 = _mm_and_ps(m.getRow(1).mVec128, btvFFF0fMask);
1114 __m128 m2 = _mm_and_ps(m.getRow(2).mVec128, btvFFF0fMask);
1115 __m128 r0 = _mm_mul_ps(m0, _mm_shuffle_ps(row, row, 0));
1116 __m128 r1 = _mm_mul_ps(m0, _mm_shuffle_ps(row, row, 0x55));
1117 __m128 r2 = _mm_mul_ps(m0, _mm_shuffle_ps(row, row, 0xaa));
1118 row = m_el[1].mVec128;
1119 r0 = _mm_add_ps(r0, _mm_mul_ps(m1, _mm_shuffle_ps(row, row, 0)));
1120 r1 = _mm_add_ps(r1, _mm_mul_ps(m1, _mm_shuffle_ps(row, row, 0x55)));
1121 r2 = _mm_add_ps(r2, _mm_mul_ps(m1, _mm_shuffle_ps(row, row, 0xaa)));
1122 row = m_el[2].mVec128;
1123 r0 = _mm_add_ps(r0, _mm_mul_ps(m2, _mm_shuffle_ps(row, row, 0)));
1124 r1 = _mm_add_ps(r1, _mm_mul_ps(m2, _mm_shuffle_ps(row, row, 0x55)));
1125 r2 = _mm_add_ps(r2, _mm_mul_ps(m2, _mm_shuffle_ps(row, row, 0xaa)));
1126 return btMatrix3x3(r0, r1, r2);
1128 #elif defined BT_USE_NEON
1130 static const uint32x4_t xyzMask = (const uint32x4_t){static_cast<uint32_t>(-1), static_cast<uint32_t>(-1), static_cast<uint32_t>(-1), 0};
1131 float32x4_t m0 = (float32x4_t)vandq_u32((uint32x4_t)m.getRow(0).mVec128, xyzMask);
1132 float32x4_t m1 = (float32x4_t)vandq_u32((uint32x4_t)m.getRow(1).mVec128, xyzMask);
1133 float32x4_t m2 = (float32x4_t)vandq_u32((uint32x4_t)m.getRow(2).mVec128, xyzMask);
1134 float32x4_t row = m_el[0].mVec128;
1135 float32x4_t r0 = vmulq_lane_f32(m0, vget_low_f32(row), 0);
1136 float32x4_t r1 = vmulq_lane_f32(m0, vget_low_f32(row), 1);
1137 float32x4_t r2 = vmulq_lane_f32(m0, vget_high_f32(row), 0);
1138 row = m_el[1].mVec128;
1139 r0 = vmlaq_lane_f32(r0, m1, vget_low_f32(row), 0);
1140 r1 = vmlaq_lane_f32(r1, m1, vget_low_f32(row), 1);
1141 r2 = vmlaq_lane_f32(r2, m1, vget_high_f32(row), 0);
1142 row = m_el[2].mVec128;
1143 r0 = vmlaq_lane_f32(r0, m2, vget_low_f32(row), 0);
1144 r1 = vmlaq_lane_f32(r1, m2, vget_low_f32(row), 1);
1145 r2 = vmlaq_lane_f32(r2, m2, vget_high_f32(row), 0);
1146 return btMatrix3x3(r0, r1, r2);
1149 m_el[0].x() * m[0].x() + m_el[1].x() * m[1].x() + m_el[2].x() * m[2].x(),
1150 m_el[0].x() * m[0].y() + m_el[1].x() * m[1].y() + m_el[2].x() * m[2].y(),
1151 m_el[0].x() * m[0].z() + m_el[1].x() * m[1].z() + m_el[2].x() * m[2].z(),
1152 m_el[0].y() * m[0].x() + m_el[1].y() * m[1].x() + m_el[2].y() * m[2].x(),
1153 m_el[0].y() * m[0].y() + m_el[1].y() * m[1].y() + m_el[2].y() * m[2].y(),
1154 m_el[0].y() * m[0].z() + m_el[1].y() * m[1].z() + m_el[2].y() * m[2].z(),
1155 m_el[0].z() * m[0].x() + m_el[1].z() * m[1].x() + m_el[2].z() * m[2].x(),
1156 m_el[0].z() * m[0].y() + m_el[1].z() * m[1].y() + m_el[2].z() * m[2].y(),
1157 m_el[0].z() * m[0].z() + m_el[1].z() * m[1].z() + m_el[2].z() * m[2].z());
1161 SIMD_FORCE_INLINE btMatrix3x3
1162 btMatrix3x3::timesTranspose(const btMatrix3x3& m) const
1164 #if (defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE))
1165 __m128 a0 = m_el[0].mVec128;
1166 __m128 a1 = m_el[1].mVec128;
1167 __m128 a2 = m_el[2].mVec128;
1169 btMatrix3x3 mT = m.transpose(); // we rely on transpose() zeroing w channel so that we don't have to do it here
1170 __m128 mx = mT[0].mVec128;
1171 __m128 my = mT[1].mVec128;
1172 __m128 mz = mT[2].mVec128;
1174 __m128 r0 = _mm_mul_ps(mx, _mm_shuffle_ps(a0, a0, 0x00));
1175 __m128 r1 = _mm_mul_ps(mx, _mm_shuffle_ps(a1, a1, 0x00));
1176 __m128 r2 = _mm_mul_ps(mx, _mm_shuffle_ps(a2, a2, 0x00));
1177 r0 = _mm_add_ps(r0, _mm_mul_ps(my, _mm_shuffle_ps(a0, a0, 0x55)));
1178 r1 = _mm_add_ps(r1, _mm_mul_ps(my, _mm_shuffle_ps(a1, a1, 0x55)));
1179 r2 = _mm_add_ps(r2, _mm_mul_ps(my, _mm_shuffle_ps(a2, a2, 0x55)));
1180 r0 = _mm_add_ps(r0, _mm_mul_ps(mz, _mm_shuffle_ps(a0, a0, 0xaa)));
1181 r1 = _mm_add_ps(r1, _mm_mul_ps(mz, _mm_shuffle_ps(a1, a1, 0xaa)));
1182 r2 = _mm_add_ps(r2, _mm_mul_ps(mz, _mm_shuffle_ps(a2, a2, 0xaa)));
1183 return btMatrix3x3(r0, r1, r2);
1185 #elif defined BT_USE_NEON
1186 float32x4_t a0 = m_el[0].mVec128;
1187 float32x4_t a1 = m_el[1].mVec128;
1188 float32x4_t a2 = m_el[2].mVec128;
1190 btMatrix3x3 mT = m.transpose(); // we rely on transpose() zeroing w channel so that we don't have to do it here
1191 float32x4_t mx = mT[0].mVec128;
1192 float32x4_t my = mT[1].mVec128;
1193 float32x4_t mz = mT[2].mVec128;
1195 float32x4_t r0 = vmulq_lane_f32(mx, vget_low_f32(a0), 0);
1196 float32x4_t r1 = vmulq_lane_f32(mx, vget_low_f32(a1), 0);
1197 float32x4_t r2 = vmulq_lane_f32(mx, vget_low_f32(a2), 0);
1198 r0 = vmlaq_lane_f32(r0, my, vget_low_f32(a0), 1);
1199 r1 = vmlaq_lane_f32(r1, my, vget_low_f32(a1), 1);
1200 r2 = vmlaq_lane_f32(r2, my, vget_low_f32(a2), 1);
1201 r0 = vmlaq_lane_f32(r0, mz, vget_high_f32(a0), 0);
1202 r1 = vmlaq_lane_f32(r1, mz, vget_high_f32(a1), 0);
1203 r2 = vmlaq_lane_f32(r2, mz, vget_high_f32(a2), 0);
1204 return btMatrix3x3(r0, r1, r2);
1208 m_el[0].dot(m[0]), m_el[0].dot(m[1]), m_el[0].dot(m[2]),
1209 m_el[1].dot(m[0]), m_el[1].dot(m[1]), m_el[1].dot(m[2]),
1210 m_el[2].dot(m[0]), m_el[2].dot(m[1]), m_el[2].dot(m[2]));
1214 SIMD_FORCE_INLINE btVector3
1215 operator*(const btMatrix3x3& m, const btVector3& v)
1217 #if (defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE)) || defined(BT_USE_NEON)
1218 return v.dot3(m[0], m[1], m[2]);
1220 return btVector3(m[0].dot(v), m[1].dot(v), m[2].dot(v));
1224 SIMD_FORCE_INLINE btVector3
1225 operator*(const btVector3& v, const btMatrix3x3& m)
1227 #if defined BT_USE_SIMD_VECTOR3 && (defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE))
1229 const __m128 vv = v.mVec128;
1231 __m128 c0 = bt_splat_ps(vv, 0);
1232 __m128 c1 = bt_splat_ps(vv, 1);
1233 __m128 c2 = bt_splat_ps(vv, 2);
1235 c0 = _mm_mul_ps(c0, _mm_and_ps(m[0].mVec128, btvFFF0fMask));
1236 c1 = _mm_mul_ps(c1, _mm_and_ps(m[1].mVec128, btvFFF0fMask));
1237 c0 = _mm_add_ps(c0, c1);
1238 c2 = _mm_mul_ps(c2, _mm_and_ps(m[2].mVec128, btvFFF0fMask));
1240 return btVector3(_mm_add_ps(c0, c2));
1241 #elif defined(BT_USE_NEON)
1242 const float32x4_t vv = v.mVec128;
1243 const float32x2_t vlo = vget_low_f32(vv);
1244 const float32x2_t vhi = vget_high_f32(vv);
1246 float32x4_t c0, c1, c2;
1248 c0 = (float32x4_t)vandq_s32((int32x4_t)m[0].mVec128, btvFFF0Mask);
1249 c1 = (float32x4_t)vandq_s32((int32x4_t)m[1].mVec128, btvFFF0Mask);
1250 c2 = (float32x4_t)vandq_s32((int32x4_t)m[2].mVec128, btvFFF0Mask);
1252 c0 = vmulq_lane_f32(c0, vlo, 0);
1253 c1 = vmulq_lane_f32(c1, vlo, 1);
1254 c2 = vmulq_lane_f32(c2, vhi, 0);
1255 c0 = vaddq_f32(c0, c1);
1256 c0 = vaddq_f32(c0, c2);
1258 return btVector3(c0);
1260 return btVector3(m.tdotx(v), m.tdoty(v), m.tdotz(v));
1264 SIMD_FORCE_INLINE btMatrix3x3
1265 operator*(const btMatrix3x3& m1, const btMatrix3x3& m2)
1267 #if defined BT_USE_SIMD_VECTOR3 && (defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE))
1269 __m128 m10 = m1[0].mVec128;
1270 __m128 m11 = m1[1].mVec128;
1271 __m128 m12 = m1[2].mVec128;
1273 __m128 m2v = _mm_and_ps(m2[0].mVec128, btvFFF0fMask);
1275 __m128 c0 = bt_splat_ps(m10, 0);
1276 __m128 c1 = bt_splat_ps(m11, 0);
1277 __m128 c2 = bt_splat_ps(m12, 0);
1279 c0 = _mm_mul_ps(c0, m2v);
1280 c1 = _mm_mul_ps(c1, m2v);
1281 c2 = _mm_mul_ps(c2, m2v);
1283 m2v = _mm_and_ps(m2[1].mVec128, btvFFF0fMask);
1285 __m128 c0_1 = bt_splat_ps(m10, 1);
1286 __m128 c1_1 = bt_splat_ps(m11, 1);
1287 __m128 c2_1 = bt_splat_ps(m12, 1);
1289 c0_1 = _mm_mul_ps(c0_1, m2v);
1290 c1_1 = _mm_mul_ps(c1_1, m2v);
1291 c2_1 = _mm_mul_ps(c2_1, m2v);
1293 m2v = _mm_and_ps(m2[2].mVec128, btvFFF0fMask);
1295 c0 = _mm_add_ps(c0, c0_1);
1296 c1 = _mm_add_ps(c1, c1_1);
1297 c2 = _mm_add_ps(c2, c2_1);
1299 m10 = bt_splat_ps(m10, 2);
1300 m11 = bt_splat_ps(m11, 2);
1301 m12 = bt_splat_ps(m12, 2);
1303 m10 = _mm_mul_ps(m10, m2v);
1304 m11 = _mm_mul_ps(m11, m2v);
1305 m12 = _mm_mul_ps(m12, m2v);
1307 c0 = _mm_add_ps(c0, m10);
1308 c1 = _mm_add_ps(c1, m11);
1309 c2 = _mm_add_ps(c2, m12);
1311 return btMatrix3x3(c0, c1, c2);
1313 #elif defined(BT_USE_NEON)
1315 float32x4_t rv0, rv1, rv2;
1316 float32x4_t v0, v1, v2;
1317 float32x4_t mv0, mv1, mv2;
1323 mv0 = (float32x4_t)vandq_s32((int32x4_t)m2[0].mVec128, btvFFF0Mask);
1324 mv1 = (float32x4_t)vandq_s32((int32x4_t)m2[1].mVec128, btvFFF0Mask);
1325 mv2 = (float32x4_t)vandq_s32((int32x4_t)m2[2].mVec128, btvFFF0Mask);
1327 rv0 = vmulq_lane_f32(mv0, vget_low_f32(v0), 0);
1328 rv1 = vmulq_lane_f32(mv0, vget_low_f32(v1), 0);
1329 rv2 = vmulq_lane_f32(mv0, vget_low_f32(v2), 0);
1331 rv0 = vmlaq_lane_f32(rv0, mv1, vget_low_f32(v0), 1);
1332 rv1 = vmlaq_lane_f32(rv1, mv1, vget_low_f32(v1), 1);
1333 rv2 = vmlaq_lane_f32(rv2, mv1, vget_low_f32(v2), 1);
1335 rv0 = vmlaq_lane_f32(rv0, mv2, vget_high_f32(v0), 0);
1336 rv1 = vmlaq_lane_f32(rv1, mv2, vget_high_f32(v1), 0);
1337 rv2 = vmlaq_lane_f32(rv2, mv2, vget_high_f32(v2), 0);
1339 return btMatrix3x3(rv0, rv1, rv2);
1343 m2.tdotx(m1[0]), m2.tdoty(m1[0]), m2.tdotz(m1[0]),
1344 m2.tdotx(m1[1]), m2.tdoty(m1[1]), m2.tdotz(m1[1]),
1345 m2.tdotx(m1[2]), m2.tdoty(m1[2]), m2.tdotz(m1[2]));
1350 SIMD_FORCE_INLINE btMatrix3x3 btMultTransposeLeft(const btMatrix3x3& m1, const btMatrix3x3& m2) {
1352 m1[0][0] * m2[0][0] + m1[1][0] * m2[1][0] + m1[2][0] * m2[2][0],
1353 m1[0][0] * m2[0][1] + m1[1][0] * m2[1][1] + m1[2][0] * m2[2][1],
1354 m1[0][0] * m2[0][2] + m1[1][0] * m2[1][2] + m1[2][0] * m2[2][2],
1355 m1[0][1] * m2[0][0] + m1[1][1] * m2[1][0] + m1[2][1] * m2[2][0],
1356 m1[0][1] * m2[0][1] + m1[1][1] * m2[1][1] + m1[2][1] * m2[2][1],
1357 m1[0][1] * m2[0][2] + m1[1][1] * m2[1][2] + m1[2][1] * m2[2][2],
1358 m1[0][2] * m2[0][0] + m1[1][2] * m2[1][0] + m1[2][2] * m2[2][0],
1359 m1[0][2] * m2[0][1] + m1[1][2] * m2[1][1] + m1[2][2] * m2[2][1],
1360 m1[0][2] * m2[0][2] + m1[1][2] * m2[1][2] + m1[2][2] * m2[2][2]);
1364 /**@brief Equality operator between two matrices
1365 * It will test all elements are equal. */
1366 SIMD_FORCE_INLINE bool operator==(const btMatrix3x3& m1, const btMatrix3x3& m2)
1368 #if (defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE))
1372 c0 = _mm_cmpeq_ps(m1[0].mVec128, m2[0].mVec128);
1373 c1 = _mm_cmpeq_ps(m1[1].mVec128, m2[1].mVec128);
1374 c2 = _mm_cmpeq_ps(m1[2].mVec128, m2[2].mVec128);
1376 c0 = _mm_and_ps(c0, c1);
1377 c0 = _mm_and_ps(c0, c2);
1379 int m = _mm_movemask_ps((__m128)c0);
1380 return (0x7 == (m & 0x7));
1383 return (m1[0][0] == m2[0][0] && m1[1][0] == m2[1][0] && m1[2][0] == m2[2][0] &&
1384 m1[0][1] == m2[0][1] && m1[1][1] == m2[1][1] && m1[2][1] == m2[2][1] &&
1385 m1[0][2] == m2[0][2] && m1[1][2] == m2[1][2] && m1[2][2] == m2[2][2]);
1389 ///for serialization
1390 struct btMatrix3x3FloatData
1392 btVector3FloatData m_el[3];
1395 ///for serialization
1396 struct btMatrix3x3DoubleData
1398 btVector3DoubleData m_el[3];
1401 SIMD_FORCE_INLINE void btMatrix3x3::serialize(struct btMatrix3x3Data& dataOut) const
1403 for (int i = 0; i < 3; i++)
1404 m_el[i].serialize(dataOut.m_el[i]);
1407 SIMD_FORCE_INLINE void btMatrix3x3::serializeFloat(struct btMatrix3x3FloatData& dataOut) const
1409 for (int i = 0; i < 3; i++)
1410 m_el[i].serializeFloat(dataOut.m_el[i]);
1413 SIMD_FORCE_INLINE void btMatrix3x3::deSerialize(const struct btMatrix3x3Data& dataIn)
1415 for (int i = 0; i < 3; i++)
1416 m_el[i].deSerialize(dataIn.m_el[i]);
1419 SIMD_FORCE_INLINE void btMatrix3x3::deSerializeFloat(const struct btMatrix3x3FloatData& dataIn)
1421 for (int i = 0; i < 3; i++)
1422 m_el[i].deSerializeFloat(dataIn.m_el[i]);
1425 SIMD_FORCE_INLINE void btMatrix3x3::deSerializeDouble(const struct btMatrix3x3DoubleData& dataIn)
1427 for (int i = 0; i < 3; i++)
1428 m_el[i].deSerializeDouble(dataIn.m_el[i]);
1431 #endif //BT_MATRIX3x3_H