2 Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
5 Redistribution and use in source and binary forms,
6 with or without modification, are permitted provided that the
7 following conditions are met:
8 * Redistributions of source code must retain the above copyright
9 notice, this list of conditions and the following disclaimer.
10 * Redistributions in binary form must reproduce the above copyright
11 notice, this list of conditions and the following disclaimer in the
12 documentation and/or other materials provided with the distribution.
13 * Neither the name of the Sony Computer Entertainment Inc nor the names
14 of its contributors may be used to endorse or promote products derived
15 from this software without specific prior written permission.
17 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
18 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
21 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
25 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
26 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27 POSSIBILITY OF SUCH DAMAGE.
31 #ifndef _VECTORMATH_MAT_AOS_CPP_H
32 #define _VECTORMATH_MAT_AOS_CPP_H
34 namespace Vectormath {
37 //-----------------------------------------------------------------------------
39 // for shuffles, words are labeled [x,y,z,w] [a,b,c,d]
41 #define _VECTORMATH_PERM_ZBWX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Z, _VECTORMATH_PERM_B, _VECTORMATH_PERM_W, _VECTORMATH_PERM_X })
42 #define _VECTORMATH_PERM_XCYX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_C, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_X })
43 #define _VECTORMATH_PERM_XYAB ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_A, _VECTORMATH_PERM_B })
44 #define _VECTORMATH_PERM_ZWCD ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Z, _VECTORMATH_PERM_W, _VECTORMATH_PERM_C, _VECTORMATH_PERM_D })
45 #define _VECTORMATH_PERM_XZBX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_Z, _VECTORMATH_PERM_B, _VECTORMATH_PERM_X })
46 #define _VECTORMATH_PERM_CXXX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_C, _VECTORMATH_PERM_X, _VECTORMATH_PERM_X, _VECTORMATH_PERM_X })
47 #define _VECTORMATH_PERM_YAXX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Y, _VECTORMATH_PERM_A, _VECTORMATH_PERM_X, _VECTORMATH_PERM_X })
48 #define _VECTORMATH_PERM_XAZC ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_A, _VECTORMATH_PERM_Z, _VECTORMATH_PERM_C })
49 #define _VECTORMATH_PERM_YXWZ ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Y, _VECTORMATH_PERM_X, _VECTORMATH_PERM_W, _VECTORMATH_PERM_Z })
50 #define _VECTORMATH_PERM_YBWD ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Y, _VECTORMATH_PERM_B, _VECTORMATH_PERM_W, _VECTORMATH_PERM_D })
51 #define _VECTORMATH_PERM_XYCX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_C, _VECTORMATH_PERM_X })
52 #define _VECTORMATH_PERM_YCXY ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Y, _VECTORMATH_PERM_C, _VECTORMATH_PERM_X, _VECTORMATH_PERM_Y })
53 #define _VECTORMATH_PERM_CXYC ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_C, _VECTORMATH_PERM_X, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_C })
54 #define _VECTORMATH_PERM_ZAYX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Z, _VECTORMATH_PERM_A, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_X })
55 #define _VECTORMATH_PERM_BZXX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_B, _VECTORMATH_PERM_Z, _VECTORMATH_PERM_X, _VECTORMATH_PERM_X })
56 #define _VECTORMATH_PERM_XZYA ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_Z, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_A })
57 #define _VECTORMATH_PERM_ZXXB ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Z, _VECTORMATH_PERM_X, _VECTORMATH_PERM_X, _VECTORMATH_PERM_B })
58 #define _VECTORMATH_PERM_YXXC ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Y, _VECTORMATH_PERM_X, _VECTORMATH_PERM_X, _VECTORMATH_PERM_C })
59 #define _VECTORMATH_PERM_BBYX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_B, _VECTORMATH_PERM_B, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_X })
60 #define _VECTORMATH_PI_OVER_2 1.570796327f
62 //-----------------------------------------------------------------------------
65 VECTORMATH_FORCE_INLINE Matrix3::Matrix3( const Matrix3 & mat )
72 VECTORMATH_FORCE_INLINE Matrix3::Matrix3( float scalar )
74 mCol0 = Vector3( scalar );
75 mCol1 = Vector3( scalar );
76 mCol2 = Vector3( scalar );
79 VECTORMATH_FORCE_INLINE Matrix3::Matrix3( const floatInVec &scalar )
81 mCol0 = Vector3( scalar );
82 mCol1 = Vector3( scalar );
83 mCol2 = Vector3( scalar );
86 VECTORMATH_FORCE_INLINE Matrix3::Matrix3( const Quat &unitQuat )
88 __m128 xyzw_2, wwww, yzxw, zxyw, yzxw_2, zxyw_2;
89 __m128 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5;
90 VM_ATTRIBUTE_ALIGN16 unsigned int sx[4] = {0xffffffff, 0, 0, 0};
91 VM_ATTRIBUTE_ALIGN16 unsigned int sz[4] = {0, 0, 0xffffffff, 0};
92 __m128 select_x = _mm_load_ps((float *)sx);
93 __m128 select_z = _mm_load_ps((float *)sz);
95 xyzw_2 = _mm_add_ps( unitQuat.get128(), unitQuat.get128() );
96 wwww = _mm_shuffle_ps( unitQuat.get128(), unitQuat.get128(), _MM_SHUFFLE(3,3,3,3) );
97 yzxw = _mm_shuffle_ps( unitQuat.get128(), unitQuat.get128(), _MM_SHUFFLE(3,0,2,1) );
98 zxyw = _mm_shuffle_ps( unitQuat.get128(), unitQuat.get128(), _MM_SHUFFLE(3,1,0,2) );
99 yzxw_2 = _mm_shuffle_ps( xyzw_2, xyzw_2, _MM_SHUFFLE(3,0,2,1) );
100 zxyw_2 = _mm_shuffle_ps( xyzw_2, xyzw_2, _MM_SHUFFLE(3,1,0,2) );
102 tmp0 = _mm_mul_ps( yzxw_2, wwww ); // tmp0 = 2yw, 2zw, 2xw, 2w2
103 tmp1 = _mm_sub_ps( _mm_set1_ps(1.0f), _mm_mul_ps(yzxw, yzxw_2) ); // tmp1 = 1 - 2y2, 1 - 2z2, 1 - 2x2, 1 - 2w2
104 tmp2 = _mm_mul_ps( yzxw, xyzw_2 ); // tmp2 = 2xy, 2yz, 2xz, 2w2
105 tmp0 = _mm_add_ps( _mm_mul_ps(zxyw, xyzw_2), tmp0 ); // tmp0 = 2yw + 2zx, 2zw + 2xy, 2xw + 2yz, 2w2 + 2w2
106 tmp1 = _mm_sub_ps( tmp1, _mm_mul_ps(zxyw, zxyw_2) ); // tmp1 = 1 - 2y2 - 2z2, 1 - 2z2 - 2x2, 1 - 2x2 - 2y2, 1 - 2w2 - 2w2
107 tmp2 = _mm_sub_ps( tmp2, _mm_mul_ps(zxyw_2, wwww) ); // tmp2 = 2xy - 2zw, 2yz - 2xw, 2xz - 2yw, 2w2 -2w2
109 tmp3 = vec_sel( tmp0, tmp1, select_x );
110 tmp4 = vec_sel( tmp1, tmp2, select_x );
111 tmp5 = vec_sel( tmp2, tmp0, select_x );
112 mCol0 = Vector3( vec_sel( tmp3, tmp2, select_z ) );
113 mCol1 = Vector3( vec_sel( tmp4, tmp0, select_z ) );
114 mCol2 = Vector3( vec_sel( tmp5, tmp1, select_z ) );
117 VECTORMATH_FORCE_INLINE Matrix3::Matrix3( const Vector3 &_col0, const Vector3 &_col1, const Vector3 &_col2 )
124 VECTORMATH_FORCE_INLINE Matrix3 & Matrix3::setCol0( const Vector3 &_col0 )
130 VECTORMATH_FORCE_INLINE Matrix3 & Matrix3::setCol1( const Vector3 &_col1 )
136 VECTORMATH_FORCE_INLINE Matrix3 & Matrix3::setCol2( const Vector3 &_col2 )
142 VECTORMATH_FORCE_INLINE Matrix3 & Matrix3::setCol( int col, const Vector3 &vec )
144 *(&mCol0 + col) = vec;
148 VECTORMATH_FORCE_INLINE Matrix3 & Matrix3::setRow( int row, const Vector3 &vec )
150 mCol0.setElem( row, vec.getElem( 0 ) );
151 mCol1.setElem( row, vec.getElem( 1 ) );
152 mCol2.setElem( row, vec.getElem( 2 ) );
156 VECTORMATH_FORCE_INLINE Matrix3 & Matrix3::setElem( int col, int row, float val )
158 (*this)[col].setElem(row, val);
162 VECTORMATH_FORCE_INLINE Matrix3 & Matrix3::setElem( int col, int row, const floatInVec &val )
165 tmpV3_0 = this->getCol( col );
166 tmpV3_0.setElem( row, val );
167 this->setCol( col, tmpV3_0 );
171 VECTORMATH_FORCE_INLINE const floatInVec Matrix3::getElem( int col, int row ) const
173 return this->getCol( col ).getElem( row );
176 VECTORMATH_FORCE_INLINE const Vector3 Matrix3::getCol0( ) const
181 VECTORMATH_FORCE_INLINE const Vector3 Matrix3::getCol1( ) const
186 VECTORMATH_FORCE_INLINE const Vector3 Matrix3::getCol2( ) const
191 VECTORMATH_FORCE_INLINE const Vector3 Matrix3::getCol( int col ) const
193 return *(&mCol0 + col);
196 VECTORMATH_FORCE_INLINE const Vector3 Matrix3::getRow( int row ) const
198 return Vector3( mCol0.getElem( row ), mCol1.getElem( row ), mCol2.getElem( row ) );
201 VECTORMATH_FORCE_INLINE Vector3 & Matrix3::operator []( int col )
203 return *(&mCol0 + col);
206 VECTORMATH_FORCE_INLINE const Vector3 Matrix3::operator []( int col ) const
208 return *(&mCol0 + col);
211 VECTORMATH_FORCE_INLINE Matrix3 & Matrix3::operator =( const Matrix3 & mat )
219 VECTORMATH_FORCE_INLINE const Matrix3 transpose( const Matrix3 & mat )
221 __m128 tmp0, tmp1, res0, res1, res2;
222 tmp0 = vec_mergeh( mat.getCol0().get128(), mat.getCol2().get128() );
223 tmp1 = vec_mergel( mat.getCol0().get128(), mat.getCol2().get128() );
224 res0 = vec_mergeh( tmp0, mat.getCol1().get128() );
225 //res1 = vec_perm( tmp0, mat.getCol1().get128(), _VECTORMATH_PERM_ZBWX );
226 VM_ATTRIBUTE_ALIGN16 unsigned int select_y[4] = {0, 0xffffffff, 0, 0};
227 res1 = _mm_shuffle_ps( tmp0, tmp0, _MM_SHUFFLE(0,3,2,2));
228 res1 = vec_sel(res1, mat.getCol1().get128(), select_y);
229 //res2 = vec_perm( tmp1, mat.getCol1().get128(), _VECTORMATH_PERM_XCYX );
230 res2 = _mm_shuffle_ps( tmp1, tmp1, _MM_SHUFFLE(0,1,1,0));
231 res2 = vec_sel(res2, vec_splat(mat.getCol1().get128(), 2), select_y);
239 VECTORMATH_FORCE_INLINE const Matrix3 inverse( const Matrix3 & mat )
241 __m128 tmp0, tmp1, tmp2, tmp3, tmp4, dot, invdet, inv0, inv1, inv2;
242 tmp2 = _vmathVfCross( mat.getCol0().get128(), mat.getCol1().get128() );
243 tmp0 = _vmathVfCross( mat.getCol1().get128(), mat.getCol2().get128() );
244 tmp1 = _vmathVfCross( mat.getCol2().get128(), mat.getCol0().get128() );
245 dot = _vmathVfDot3( tmp2, mat.getCol2().get128() );
246 dot = vec_splat( dot, 0 );
247 invdet = recipf4( dot );
248 tmp3 = vec_mergeh( tmp0, tmp2 );
249 tmp4 = vec_mergel( tmp0, tmp2 );
250 inv0 = vec_mergeh( tmp3, tmp1 );
251 //inv1 = vec_perm( tmp3, tmp1, _VECTORMATH_PERM_ZBWX );
252 VM_ATTRIBUTE_ALIGN16 unsigned int select_y[4] = {0, 0xffffffff, 0, 0};
253 inv1 = _mm_shuffle_ps( tmp3, tmp3, _MM_SHUFFLE(0,3,2,2));
254 inv1 = vec_sel(inv1, tmp1, select_y);
255 //inv2 = vec_perm( tmp4, tmp1, _VECTORMATH_PERM_XCYX );
256 inv2 = _mm_shuffle_ps( tmp4, tmp4, _MM_SHUFFLE(0,1,1,0));
257 inv2 = vec_sel(inv2, vec_splat(tmp1, 2), select_y);
258 inv0 = vec_mul( inv0, invdet );
259 inv1 = vec_mul( inv1, invdet );
260 inv2 = vec_mul( inv2, invdet );
268 VECTORMATH_FORCE_INLINE const floatInVec determinant( const Matrix3 & mat )
270 return dot( mat.getCol2(), cross( mat.getCol0(), mat.getCol1() ) );
273 VECTORMATH_FORCE_INLINE const Matrix3 Matrix3::operator +( const Matrix3 & mat ) const
276 ( mCol0 + mat.mCol0 ),
277 ( mCol1 + mat.mCol1 ),
278 ( mCol2 + mat.mCol2 )
282 VECTORMATH_FORCE_INLINE const Matrix3 Matrix3::operator -( const Matrix3 & mat ) const
285 ( mCol0 - mat.mCol0 ),
286 ( mCol1 - mat.mCol1 ),
287 ( mCol2 - mat.mCol2 )
291 VECTORMATH_FORCE_INLINE Matrix3 & Matrix3::operator +=( const Matrix3 & mat )
297 VECTORMATH_FORCE_INLINE Matrix3 & Matrix3::operator -=( const Matrix3 & mat )
303 VECTORMATH_FORCE_INLINE const Matrix3 Matrix3::operator -( ) const
312 VECTORMATH_FORCE_INLINE const Matrix3 absPerElem( const Matrix3 & mat )
315 absPerElem( mat.getCol0() ),
316 absPerElem( mat.getCol1() ),
317 absPerElem( mat.getCol2() )
321 VECTORMATH_FORCE_INLINE const Matrix3 Matrix3::operator *( float scalar ) const
323 return *this * floatInVec(scalar);
326 VECTORMATH_FORCE_INLINE const Matrix3 Matrix3::operator *( const floatInVec &scalar ) const
335 VECTORMATH_FORCE_INLINE Matrix3 & Matrix3::operator *=( float scalar )
337 return *this *= floatInVec(scalar);
340 VECTORMATH_FORCE_INLINE Matrix3 & Matrix3::operator *=( const floatInVec &scalar )
342 *this = *this * scalar;
346 VECTORMATH_FORCE_INLINE const Matrix3 operator *( float scalar, const Matrix3 & mat )
348 return floatInVec(scalar) * mat;
351 VECTORMATH_FORCE_INLINE const Matrix3 operator *( const floatInVec &scalar, const Matrix3 & mat )
356 VECTORMATH_FORCE_INLINE const Vector3 Matrix3::operator *( const Vector3 &vec ) const
359 __m128 xxxx, yyyy, zzzz;
360 xxxx = vec_splat( vec.get128(), 0 );
361 yyyy = vec_splat( vec.get128(), 1 );
362 zzzz = vec_splat( vec.get128(), 2 );
363 res = vec_mul( mCol0.get128(), xxxx );
364 res = vec_madd( mCol1.get128(), yyyy, res );
365 res = vec_madd( mCol2.get128(), zzzz, res );
366 return Vector3( res );
369 VECTORMATH_FORCE_INLINE const Matrix3 Matrix3::operator *( const Matrix3 & mat ) const
372 ( *this * mat.mCol0 ),
373 ( *this * mat.mCol1 ),
374 ( *this * mat.mCol2 )
378 VECTORMATH_FORCE_INLINE Matrix3 & Matrix3::operator *=( const Matrix3 & mat )
384 VECTORMATH_FORCE_INLINE const Matrix3 mulPerElem( const Matrix3 & mat0, const Matrix3 & mat1 )
387 mulPerElem( mat0.getCol0(), mat1.getCol0() ),
388 mulPerElem( mat0.getCol1(), mat1.getCol1() ),
389 mulPerElem( mat0.getCol2(), mat1.getCol2() )
393 VECTORMATH_FORCE_INLINE const Matrix3 Matrix3::identity( )
402 VECTORMATH_FORCE_INLINE const Matrix3 Matrix3::rotationX( float radians )
404 return rotationX( floatInVec(radians) );
407 VECTORMATH_FORCE_INLINE const Matrix3 Matrix3::rotationX( const floatInVec &radians )
409 __m128 s, c, res1, res2;
411 VM_ATTRIBUTE_ALIGN16 unsigned int select_y[4] = {0, 0xffffffff, 0, 0};
412 VM_ATTRIBUTE_ALIGN16 unsigned int select_z[4] = {0, 0, 0xffffffff, 0};
413 zero = _mm_setzero_ps();
414 sincosf4( radians.get128(), &s, &c );
415 res1 = vec_sel( zero, c, select_y );
416 res1 = vec_sel( res1, s, select_z );
417 res2 = vec_sel( zero, negatef4(s), select_y );
418 res2 = vec_sel( res2, c, select_z );
426 VECTORMATH_FORCE_INLINE const Matrix3 Matrix3::rotationY( float radians )
428 return rotationY( floatInVec(radians) );
431 VECTORMATH_FORCE_INLINE const Matrix3 Matrix3::rotationY( const floatInVec &radians )
433 __m128 s, c, res0, res2;
435 VM_ATTRIBUTE_ALIGN16 unsigned int select_x[4] = {0xffffffff, 0, 0, 0};
436 VM_ATTRIBUTE_ALIGN16 unsigned int select_z[4] = {0, 0, 0xffffffff, 0};
437 zero = _mm_setzero_ps();
438 sincosf4( radians.get128(), &s, &c );
439 res0 = vec_sel( zero, c, select_x );
440 res0 = vec_sel( res0, negatef4(s), select_z );
441 res2 = vec_sel( zero, s, select_x );
442 res2 = vec_sel( res2, c, select_z );
450 VECTORMATH_FORCE_INLINE const Matrix3 Matrix3::rotationZ( float radians )
452 return rotationZ( floatInVec(radians) );
455 VECTORMATH_FORCE_INLINE const Matrix3 Matrix3::rotationZ( const floatInVec &radians )
457 __m128 s, c, res0, res1;
459 VM_ATTRIBUTE_ALIGN16 unsigned int select_x[4] = {0xffffffff, 0, 0, 0};
460 VM_ATTRIBUTE_ALIGN16 unsigned int select_y[4] = {0, 0xffffffff, 0, 0};
461 zero = _mm_setzero_ps();
462 sincosf4( radians.get128(), &s, &c );
463 res0 = vec_sel( zero, c, select_x );
464 res0 = vec_sel( res0, s, select_y );
465 res1 = vec_sel( zero, negatef4(s), select_x );
466 res1 = vec_sel( res1, c, select_y );
474 VECTORMATH_FORCE_INLINE const Matrix3 Matrix3::rotationZYX( const Vector3 &radiansXYZ )
476 __m128 angles, s, negS, c, X0, X1, Y0, Y1, Z0, Z1, tmp;
477 angles = Vector4( radiansXYZ, 0.0f ).get128();
478 sincosf4( angles, &s, &c );
479 negS = negatef4( s );
480 Z0 = vec_mergel( c, s );
481 Z1 = vec_mergel( negS, c );
482 VM_ATTRIBUTE_ALIGN16 unsigned int select_xyz[4] = {0xffffffff, 0xffffffff, 0xffffffff, 0};
483 Z1 = vec_and( Z1, _mm_load_ps( (float *)select_xyz ) );
484 Y0 = _mm_shuffle_ps( c, negS, _MM_SHUFFLE(0,1,1,1) );
485 Y1 = _mm_shuffle_ps( s, c, _MM_SHUFFLE(0,1,1,1) );
486 X0 = vec_splat( s, 0 );
487 X1 = vec_splat( c, 0 );
488 tmp = vec_mul( Z0, Y1 );
490 Vector3( vec_mul( Z0, Y0 ) ),
491 Vector3( vec_madd( Z1, X1, vec_mul( tmp, X0 ) ) ),
492 Vector3( vec_nmsub( Z1, X0, vec_mul( tmp, X1 ) ) )
496 VECTORMATH_FORCE_INLINE const Matrix3 Matrix3::rotation( float radians, const Vector3 &unitVec )
498 return rotation( floatInVec(radians), unitVec );
501 VECTORMATH_FORCE_INLINE const Matrix3 Matrix3::rotation( const floatInVec &radians, const Vector3 &unitVec )
503 __m128 axis, s, c, oneMinusC, axisS, negAxisS, xxxx, yyyy, zzzz, tmp0, tmp1, tmp2;
504 axis = unitVec.get128();
505 sincosf4( radians.get128(), &s, &c );
506 xxxx = vec_splat( axis, 0 );
507 yyyy = vec_splat( axis, 1 );
508 zzzz = vec_splat( axis, 2 );
509 oneMinusC = vec_sub( _mm_set1_ps(1.0f), c );
510 axisS = vec_mul( axis, s );
511 negAxisS = negatef4( axisS );
512 VM_ATTRIBUTE_ALIGN16 unsigned int select_x[4] = {0xffffffff, 0, 0, 0};
513 VM_ATTRIBUTE_ALIGN16 unsigned int select_y[4] = {0, 0xffffffff, 0, 0};
514 VM_ATTRIBUTE_ALIGN16 unsigned int select_z[4] = {0, 0, 0xffffffff, 0};
515 //tmp0 = vec_perm( axisS, negAxisS, _VECTORMATH_PERM_XZBX );
516 tmp0 = _mm_shuffle_ps( axisS, axisS, _MM_SHUFFLE(0,0,2,0) );
517 tmp0 = vec_sel(tmp0, vec_splat(negAxisS, 1), select_z);
518 //tmp1 = vec_perm( axisS, negAxisS, _VECTORMATH_PERM_CXXX );
519 tmp1 = vec_sel( vec_splat(axisS, 0), vec_splat(negAxisS, 2), select_x );
520 //tmp2 = vec_perm( axisS, negAxisS, _VECTORMATH_PERM_YAXX );
521 tmp2 = _mm_shuffle_ps( axisS, axisS, _MM_SHUFFLE(0,0,0,1) );
522 tmp2 = vec_sel(tmp2, vec_splat(negAxisS, 0), select_y);
523 tmp0 = vec_sel( tmp0, c, select_x );
524 tmp1 = vec_sel( tmp1, c, select_y );
525 tmp2 = vec_sel( tmp2, c, select_z );
527 Vector3( vec_madd( vec_mul( axis, xxxx ), oneMinusC, tmp0 ) ),
528 Vector3( vec_madd( vec_mul( axis, yyyy ), oneMinusC, tmp1 ) ),
529 Vector3( vec_madd( vec_mul( axis, zzzz ), oneMinusC, tmp2 ) )
533 VECTORMATH_FORCE_INLINE const Matrix3 Matrix3::rotation( const Quat &unitQuat )
535 return Matrix3( unitQuat );
538 VECTORMATH_FORCE_INLINE const Matrix3 Matrix3::scale( const Vector3 &scaleVec )
540 __m128 zero = _mm_setzero_ps();
541 VM_ATTRIBUTE_ALIGN16 unsigned int select_x[4] = {0xffffffff, 0, 0, 0};
542 VM_ATTRIBUTE_ALIGN16 unsigned int select_y[4] = {0, 0xffffffff, 0, 0};
543 VM_ATTRIBUTE_ALIGN16 unsigned int select_z[4] = {0, 0, 0xffffffff, 0};
545 Vector3( vec_sel( zero, scaleVec.get128(), select_x ) ),
546 Vector3( vec_sel( zero, scaleVec.get128(), select_y ) ),
547 Vector3( vec_sel( zero, scaleVec.get128(), select_z ) )
551 VECTORMATH_FORCE_INLINE const Matrix3 appendScale( const Matrix3 & mat, const Vector3 &scaleVec )
554 ( mat.getCol0() * scaleVec.getX( ) ),
555 ( mat.getCol1() * scaleVec.getY( ) ),
556 ( mat.getCol2() * scaleVec.getZ( ) )
560 VECTORMATH_FORCE_INLINE const Matrix3 prependScale( const Vector3 &scaleVec, const Matrix3 & mat )
563 mulPerElem( mat.getCol0(), scaleVec ),
564 mulPerElem( mat.getCol1(), scaleVec ),
565 mulPerElem( mat.getCol2(), scaleVec )
569 VECTORMATH_FORCE_INLINE const Matrix3 select( const Matrix3 & mat0, const Matrix3 & mat1, bool select1 )
572 select( mat0.getCol0(), mat1.getCol0(), select1 ),
573 select( mat0.getCol1(), mat1.getCol1(), select1 ),
574 select( mat0.getCol2(), mat1.getCol2(), select1 )
578 VECTORMATH_FORCE_INLINE const Matrix3 select( const Matrix3 & mat0, const Matrix3 & mat1, const boolInVec &select1 )
581 select( mat0.getCol0(), mat1.getCol0(), select1 ),
582 select( mat0.getCol1(), mat1.getCol1(), select1 ),
583 select( mat0.getCol2(), mat1.getCol2(), select1 )
587 #ifdef _VECTORMATH_DEBUG
589 VECTORMATH_FORCE_INLINE void print( const Matrix3 & mat )
591 print( mat.getRow( 0 ) );
592 print( mat.getRow( 1 ) );
593 print( mat.getRow( 2 ) );
596 VECTORMATH_FORCE_INLINE void print( const Matrix3 & mat, const char * name )
598 printf("%s:\n", name);
604 VECTORMATH_FORCE_INLINE Matrix4::Matrix4( const Matrix4 & mat )
612 VECTORMATH_FORCE_INLINE Matrix4::Matrix4( float scalar )
614 mCol0 = Vector4( scalar );
615 mCol1 = Vector4( scalar );
616 mCol2 = Vector4( scalar );
617 mCol3 = Vector4( scalar );
620 VECTORMATH_FORCE_INLINE Matrix4::Matrix4( const floatInVec &scalar )
622 mCol0 = Vector4( scalar );
623 mCol1 = Vector4( scalar );
624 mCol2 = Vector4( scalar );
625 mCol3 = Vector4( scalar );
628 VECTORMATH_FORCE_INLINE Matrix4::Matrix4( const Transform3 & mat )
630 mCol0 = Vector4( mat.getCol0(), 0.0f );
631 mCol1 = Vector4( mat.getCol1(), 0.0f );
632 mCol2 = Vector4( mat.getCol2(), 0.0f );
633 mCol3 = Vector4( mat.getCol3(), 1.0f );
636 VECTORMATH_FORCE_INLINE Matrix4::Matrix4( const Vector4 &_col0, const Vector4 &_col1, const Vector4 &_col2, const Vector4 &_col3 )
644 VECTORMATH_FORCE_INLINE Matrix4::Matrix4( const Matrix3 & mat, const Vector3 &translateVec )
646 mCol0 = Vector4( mat.getCol0(), 0.0f );
647 mCol1 = Vector4( mat.getCol1(), 0.0f );
648 mCol2 = Vector4( mat.getCol2(), 0.0f );
649 mCol3 = Vector4( translateVec, 1.0f );
652 VECTORMATH_FORCE_INLINE Matrix4::Matrix4( const Quat &unitQuat, const Vector3 &translateVec )
655 mat = Matrix3( unitQuat );
656 mCol0 = Vector4( mat.getCol0(), 0.0f );
657 mCol1 = Vector4( mat.getCol1(), 0.0f );
658 mCol2 = Vector4( mat.getCol2(), 0.0f );
659 mCol3 = Vector4( translateVec, 1.0f );
662 VECTORMATH_FORCE_INLINE Matrix4 & Matrix4::setCol0( const Vector4 &_col0 )
668 VECTORMATH_FORCE_INLINE Matrix4 & Matrix4::setCol1( const Vector4 &_col1 )
674 VECTORMATH_FORCE_INLINE Matrix4 & Matrix4::setCol2( const Vector4 &_col2 )
680 VECTORMATH_FORCE_INLINE Matrix4 & Matrix4::setCol3( const Vector4 &_col3 )
686 VECTORMATH_FORCE_INLINE Matrix4 & Matrix4::setCol( int col, const Vector4 &vec )
688 *(&mCol0 + col) = vec;
692 VECTORMATH_FORCE_INLINE Matrix4 & Matrix4::setRow( int row, const Vector4 &vec )
694 mCol0.setElem( row, vec.getElem( 0 ) );
695 mCol1.setElem( row, vec.getElem( 1 ) );
696 mCol2.setElem( row, vec.getElem( 2 ) );
697 mCol3.setElem( row, vec.getElem( 3 ) );
701 VECTORMATH_FORCE_INLINE Matrix4 & Matrix4::setElem( int col, int row, float val )
703 (*this)[col].setElem(row, val);
707 VECTORMATH_FORCE_INLINE Matrix4 & Matrix4::setElem( int col, int row, const floatInVec &val )
710 tmpV3_0 = this->getCol( col );
711 tmpV3_0.setElem( row, val );
712 this->setCol( col, tmpV3_0 );
716 VECTORMATH_FORCE_INLINE const floatInVec Matrix4::getElem( int col, int row ) const
718 return this->getCol( col ).getElem( row );
721 VECTORMATH_FORCE_INLINE const Vector4 Matrix4::getCol0( ) const
726 VECTORMATH_FORCE_INLINE const Vector4 Matrix4::getCol1( ) const
731 VECTORMATH_FORCE_INLINE const Vector4 Matrix4::getCol2( ) const
736 VECTORMATH_FORCE_INLINE const Vector4 Matrix4::getCol3( ) const
741 VECTORMATH_FORCE_INLINE const Vector4 Matrix4::getCol( int col ) const
743 return *(&mCol0 + col);
746 VECTORMATH_FORCE_INLINE const Vector4 Matrix4::getRow( int row ) const
748 return Vector4( mCol0.getElem( row ), mCol1.getElem( row ), mCol2.getElem( row ), mCol3.getElem( row ) );
751 VECTORMATH_FORCE_INLINE Vector4 & Matrix4::operator []( int col )
753 return *(&mCol0 + col);
756 VECTORMATH_FORCE_INLINE const Vector4 Matrix4::operator []( int col ) const
758 return *(&mCol0 + col);
761 VECTORMATH_FORCE_INLINE Matrix4 & Matrix4::operator =( const Matrix4 & mat )
770 VECTORMATH_FORCE_INLINE const Matrix4 transpose( const Matrix4 & mat )
772 __m128 tmp0, tmp1, tmp2, tmp3, res0, res1, res2, res3;
773 tmp0 = vec_mergeh( mat.getCol0().get128(), mat.getCol2().get128() );
774 tmp1 = vec_mergeh( mat.getCol1().get128(), mat.getCol3().get128() );
775 tmp2 = vec_mergel( mat.getCol0().get128(), mat.getCol2().get128() );
776 tmp3 = vec_mergel( mat.getCol1().get128(), mat.getCol3().get128() );
777 res0 = vec_mergeh( tmp0, tmp1 );
778 res1 = vec_mergel( tmp0, tmp1 );
779 res2 = vec_mergeh( tmp2, tmp3 );
780 res3 = vec_mergel( tmp2, tmp3 );
790 static VM_ATTRIBUTE_ALIGN16 const unsigned int _vmathPNPN[4] = {0x00000000, 0x80000000, 0x00000000, 0x80000000};
791 static VM_ATTRIBUTE_ALIGN16 const unsigned int _vmathNPNP[4] = {0x80000000, 0x00000000, 0x80000000, 0x00000000};
792 static VM_ATTRIBUTE_ALIGN16 const float _vmathZERONE[4] = {1.0f, 0.0f, 0.0f, 1.0f};
794 VECTORMATH_FORCE_INLINE const Matrix4 inverse( const Matrix4 & mat )
797 __m128 r1,r2,r3,tt,tt2;
799 __m128 trns0,trns1,trns2,trns3;
801 __m128 _L1 = mat.getCol0().get128();
802 __m128 _L2 = mat.getCol1().get128();
803 __m128 _L3 = mat.getCol2().get128();
804 __m128 _L4 = mat.getCol3().get128();
805 // Calculating the minterms for the first line.
807 // _mm_ror_ps is just a macro using _mm_shuffle_ps().
808 tt = _L4; tt2 = _mm_ror_ps(_L3,1);
809 Vc = _mm_mul_ps(tt2,_mm_ror_ps(tt,0)); // V3'dot V4
810 Va = _mm_mul_ps(tt2,_mm_ror_ps(tt,2)); // V3'dot V4"
811 Vb = _mm_mul_ps(tt2,_mm_ror_ps(tt,3)); // V3' dot V4^
813 r1 = _mm_sub_ps(_mm_ror_ps(Va,1),_mm_ror_ps(Vc,2)); // V3" dot V4^ - V3^ dot V4"
814 r2 = _mm_sub_ps(_mm_ror_ps(Vb,2),_mm_ror_ps(Vb,0)); // V3^ dot V4' - V3' dot V4^
815 r3 = _mm_sub_ps(_mm_ror_ps(Va,0),_mm_ror_ps(Vc,1)); // V3' dot V4" - V3" dot V4'
818 Va = _mm_ror_ps(tt,1); sum = _mm_mul_ps(Va,r1);
819 Vb = _mm_ror_ps(tt,2); sum = _mm_add_ps(sum,_mm_mul_ps(Vb,r2));
820 Vc = _mm_ror_ps(tt,3); sum = _mm_add_ps(sum,_mm_mul_ps(Vc,r3));
822 // Calculating the determinant.
823 Det = _mm_mul_ps(sum,_L1);
824 Det = _mm_add_ps(Det,_mm_movehl_ps(Det,Det));
826 const __m128 Sign_PNPN = _mm_load_ps((float *)_vmathPNPN);
827 const __m128 Sign_NPNP = _mm_load_ps((float *)_vmathNPNP);
829 __m128 mtL1 = _mm_xor_ps(sum,Sign_PNPN);
831 // Calculating the minterms of the second line (using previous results).
832 tt = _mm_ror_ps(_L1,1); sum = _mm_mul_ps(tt,r1);
833 tt = _mm_ror_ps(tt,1); sum = _mm_add_ps(sum,_mm_mul_ps(tt,r2));
834 tt = _mm_ror_ps(tt,1); sum = _mm_add_ps(sum,_mm_mul_ps(tt,r3));
835 __m128 mtL2 = _mm_xor_ps(sum,Sign_NPNP);
837 // Testing the determinant.
838 Det = _mm_sub_ss(Det,_mm_shuffle_ps(Det,Det,1));
840 // Calculating the minterms of the third line.
841 tt = _mm_ror_ps(_L1,1);
842 Va = _mm_mul_ps(tt,Vb); // V1' dot V2"
843 Vb = _mm_mul_ps(tt,Vc); // V1' dot V2^
844 Vc = _mm_mul_ps(tt,_L2); // V1' dot V2
846 r1 = _mm_sub_ps(_mm_ror_ps(Va,1),_mm_ror_ps(Vc,2)); // V1" dot V2^ - V1^ dot V2"
847 r2 = _mm_sub_ps(_mm_ror_ps(Vb,2),_mm_ror_ps(Vb,0)); // V1^ dot V2' - V1' dot V2^
848 r3 = _mm_sub_ps(_mm_ror_ps(Va,0),_mm_ror_ps(Vc,1)); // V1' dot V2" - V1" dot V2'
850 tt = _mm_ror_ps(_L4,1); sum = _mm_mul_ps(tt,r1);
851 tt = _mm_ror_ps(tt,1); sum = _mm_add_ps(sum,_mm_mul_ps(tt,r2));
852 tt = _mm_ror_ps(tt,1); sum = _mm_add_ps(sum,_mm_mul_ps(tt,r3));
853 __m128 mtL3 = _mm_xor_ps(sum,Sign_PNPN);
855 // Dividing is FASTER than rcp_nr! (Because rcp_nr causes many register-memory RWs).
856 RDet = _mm_div_ss(_mm_load_ss((float *)&_vmathZERONE), Det); // TODO: just 1.0f?
857 RDet = _mm_shuffle_ps(RDet,RDet,0x00);
859 // Devide the first 12 minterms with the determinant.
860 mtL1 = _mm_mul_ps(mtL1, RDet);
861 mtL2 = _mm_mul_ps(mtL2, RDet);
862 mtL3 = _mm_mul_ps(mtL3, RDet);
864 // Calculate the minterms of the forth line and devide by the determinant.
865 tt = _mm_ror_ps(_L3,1); sum = _mm_mul_ps(tt,r1);
866 tt = _mm_ror_ps(tt,1); sum = _mm_add_ps(sum,_mm_mul_ps(tt,r2));
867 tt = _mm_ror_ps(tt,1); sum = _mm_add_ps(sum,_mm_mul_ps(tt,r3));
868 __m128 mtL4 = _mm_xor_ps(sum,Sign_NPNP);
869 mtL4 = _mm_mul_ps(mtL4, RDet);
871 // Now we just have to transpose the minterms matrix.
872 trns0 = _mm_unpacklo_ps(mtL1,mtL2);
873 trns1 = _mm_unpacklo_ps(mtL3,mtL4);
874 trns2 = _mm_unpackhi_ps(mtL1,mtL2);
875 trns3 = _mm_unpackhi_ps(mtL3,mtL4);
876 _L1 = _mm_movelh_ps(trns0,trns1);
877 _L2 = _mm_movehl_ps(trns1,trns0);
878 _L3 = _mm_movelh_ps(trns2,trns3);
879 _L4 = _mm_movehl_ps(trns3,trns2);
889 VECTORMATH_FORCE_INLINE const Matrix4 affineInverse( const Matrix4 & mat )
891 Transform3 affineMat;
892 affineMat.setCol0( mat.getCol0().getXYZ( ) );
893 affineMat.setCol1( mat.getCol1().getXYZ( ) );
894 affineMat.setCol2( mat.getCol2().getXYZ( ) );
895 affineMat.setCol3( mat.getCol3().getXYZ( ) );
896 return Matrix4( inverse( affineMat ) );
899 VECTORMATH_FORCE_INLINE const Matrix4 orthoInverse( const Matrix4 & mat )
901 Transform3 affineMat;
902 affineMat.setCol0( mat.getCol0().getXYZ( ) );
903 affineMat.setCol1( mat.getCol1().getXYZ( ) );
904 affineMat.setCol2( mat.getCol2().getXYZ( ) );
905 affineMat.setCol3( mat.getCol3().getXYZ( ) );
906 return Matrix4( orthoInverse( affineMat ) );
909 VECTORMATH_FORCE_INLINE const floatInVec determinant( const Matrix4 & mat )
912 __m128 r1,r2,r3,tt,tt2;
915 __m128 _L1 = mat.getCol0().get128();
916 __m128 _L2 = mat.getCol1().get128();
917 __m128 _L3 = mat.getCol2().get128();
918 __m128 _L4 = mat.getCol3().get128();
919 // Calculating the minterms for the first line.
921 // _mm_ror_ps is just a macro using _mm_shuffle_ps().
922 tt = _L4; tt2 = _mm_ror_ps(_L3,1);
923 Vc = _mm_mul_ps(tt2,_mm_ror_ps(tt,0)); // V3' dot V4
924 Va = _mm_mul_ps(tt2,_mm_ror_ps(tt,2)); // V3' dot V4"
925 Vb = _mm_mul_ps(tt2,_mm_ror_ps(tt,3)); // V3' dot V4^
927 r1 = _mm_sub_ps(_mm_ror_ps(Va,1),_mm_ror_ps(Vc,2)); // V3" dot V4^ - V3^ dot V4"
928 r2 = _mm_sub_ps(_mm_ror_ps(Vb,2),_mm_ror_ps(Vb,0)); // V3^ dot V4' - V3' dot V4^
929 r3 = _mm_sub_ps(_mm_ror_ps(Va,0),_mm_ror_ps(Vc,1)); // V3' dot V4" - V3" dot V4'
932 Va = _mm_ror_ps(tt,1); sum = _mm_mul_ps(Va,r1);
933 Vb = _mm_ror_ps(tt,2); sum = _mm_add_ps(sum,_mm_mul_ps(Vb,r2));
934 Vc = _mm_ror_ps(tt,3); sum = _mm_add_ps(sum,_mm_mul_ps(Vc,r3));
936 // Calculating the determinant.
937 Det = _mm_mul_ps(sum,_L1);
938 Det = _mm_add_ps(Det,_mm_movehl_ps(Det,Det));
940 // Calculating the minterms of the second line (using previous results).
941 tt = _mm_ror_ps(_L1,1); sum = _mm_mul_ps(tt,r1);
942 tt = _mm_ror_ps(tt,1); sum = _mm_add_ps(sum,_mm_mul_ps(tt,r2));
943 tt = _mm_ror_ps(tt,1); sum = _mm_add_ps(sum,_mm_mul_ps(tt,r3));
945 // Testing the determinant.
946 Det = _mm_sub_ss(Det,_mm_shuffle_ps(Det,Det,1));
947 return floatInVec(Det, 0);
950 VECTORMATH_FORCE_INLINE const Matrix4 Matrix4::operator +( const Matrix4 & mat ) const
953 ( mCol0 + mat.mCol0 ),
954 ( mCol1 + mat.mCol1 ),
955 ( mCol2 + mat.mCol2 ),
956 ( mCol3 + mat.mCol3 )
960 VECTORMATH_FORCE_INLINE const Matrix4 Matrix4::operator -( const Matrix4 & mat ) const
963 ( mCol0 - mat.mCol0 ),
964 ( mCol1 - mat.mCol1 ),
965 ( mCol2 - mat.mCol2 ),
966 ( mCol3 - mat.mCol3 )
970 VECTORMATH_FORCE_INLINE Matrix4 & Matrix4::operator +=( const Matrix4 & mat )
976 VECTORMATH_FORCE_INLINE Matrix4 & Matrix4::operator -=( const Matrix4 & mat )
982 VECTORMATH_FORCE_INLINE const Matrix4 Matrix4::operator -( ) const
992 VECTORMATH_FORCE_INLINE const Matrix4 absPerElem( const Matrix4 & mat )
995 absPerElem( mat.getCol0() ),
996 absPerElem( mat.getCol1() ),
997 absPerElem( mat.getCol2() ),
998 absPerElem( mat.getCol3() )
1002 VECTORMATH_FORCE_INLINE const Matrix4 Matrix4::operator *( float scalar ) const
1004 return *this * floatInVec(scalar);
1007 VECTORMATH_FORCE_INLINE const Matrix4 Matrix4::operator *( const floatInVec &scalar ) const
1017 VECTORMATH_FORCE_INLINE Matrix4 & Matrix4::operator *=( float scalar )
1019 return *this *= floatInVec(scalar);
1022 VECTORMATH_FORCE_INLINE Matrix4 & Matrix4::operator *=( const floatInVec &scalar )
1024 *this = *this * scalar;
1028 VECTORMATH_FORCE_INLINE const Matrix4 operator *( float scalar, const Matrix4 & mat )
1030 return floatInVec(scalar) * mat;
1033 VECTORMATH_FORCE_INLINE const Matrix4 operator *( const floatInVec &scalar, const Matrix4 & mat )
1035 return mat * scalar;
1038 VECTORMATH_FORCE_INLINE const Vector4 Matrix4::operator *( const Vector4 &vec ) const
1042 _mm_add_ps(_mm_mul_ps(mCol0.get128(), _mm_shuffle_ps(vec.get128(), vec.get128(), _MM_SHUFFLE(0,0,0,0))), _mm_mul_ps(mCol1.get128(), _mm_shuffle_ps(vec.get128(), vec.get128(), _MM_SHUFFLE(1,1,1,1)))),
1043 _mm_add_ps(_mm_mul_ps(mCol2.get128(), _mm_shuffle_ps(vec.get128(), vec.get128(), _MM_SHUFFLE(2,2,2,2))), _mm_mul_ps(mCol3.get128(), _mm_shuffle_ps(vec.get128(), vec.get128(), _MM_SHUFFLE(3,3,3,3)))))
1047 VECTORMATH_FORCE_INLINE const Vector4 Matrix4::operator *( const Vector3 &vec ) const
1051 _mm_add_ps(_mm_mul_ps(mCol0.get128(), _mm_shuffle_ps(vec.get128(), vec.get128(), _MM_SHUFFLE(0,0,0,0))), _mm_mul_ps(mCol1.get128(), _mm_shuffle_ps(vec.get128(), vec.get128(), _MM_SHUFFLE(1,1,1,1)))),
1052 _mm_mul_ps(mCol2.get128(), _mm_shuffle_ps(vec.get128(), vec.get128(), _MM_SHUFFLE(2,2,2,2))))
1056 VECTORMATH_FORCE_INLINE const Vector4 Matrix4::operator *( const Point3 &pnt ) const
1060 _mm_add_ps(_mm_mul_ps(mCol0.get128(), _mm_shuffle_ps(pnt.get128(), pnt.get128(), _MM_SHUFFLE(0,0,0,0))), _mm_mul_ps(mCol1.get128(), _mm_shuffle_ps(pnt.get128(), pnt.get128(), _MM_SHUFFLE(1,1,1,1)))),
1061 _mm_add_ps(_mm_mul_ps(mCol2.get128(), _mm_shuffle_ps(pnt.get128(), pnt.get128(), _MM_SHUFFLE(2,2,2,2))), mCol3.get128()))
1065 VECTORMATH_FORCE_INLINE const Matrix4 Matrix4::operator *( const Matrix4 & mat ) const
1068 ( *this * mat.mCol0 ),
1069 ( *this * mat.mCol1 ),
1070 ( *this * mat.mCol2 ),
1071 ( *this * mat.mCol3 )
1075 VECTORMATH_FORCE_INLINE Matrix4 & Matrix4::operator *=( const Matrix4 & mat )
1077 *this = *this * mat;
1081 VECTORMATH_FORCE_INLINE const Matrix4 Matrix4::operator *( const Transform3 & tfrm ) const
1084 ( *this * tfrm.getCol0() ),
1085 ( *this * tfrm.getCol1() ),
1086 ( *this * tfrm.getCol2() ),
1087 ( *this * Point3( tfrm.getCol3() ) )
1091 VECTORMATH_FORCE_INLINE Matrix4 & Matrix4::operator *=( const Transform3 & tfrm )
1093 *this = *this * tfrm;
1097 VECTORMATH_FORCE_INLINE const Matrix4 mulPerElem( const Matrix4 & mat0, const Matrix4 & mat1 )
1100 mulPerElem( mat0.getCol0(), mat1.getCol0() ),
1101 mulPerElem( mat0.getCol1(), mat1.getCol1() ),
1102 mulPerElem( mat0.getCol2(), mat1.getCol2() ),
1103 mulPerElem( mat0.getCol3(), mat1.getCol3() )
1107 VECTORMATH_FORCE_INLINE const Matrix4 Matrix4::identity( )
1117 VECTORMATH_FORCE_INLINE Matrix4 & Matrix4::setUpper3x3( const Matrix3 & mat3 )
1119 mCol0.setXYZ( mat3.getCol0() );
1120 mCol1.setXYZ( mat3.getCol1() );
1121 mCol2.setXYZ( mat3.getCol2() );
1125 VECTORMATH_FORCE_INLINE const Matrix3 Matrix4::getUpper3x3( ) const
1134 VECTORMATH_FORCE_INLINE Matrix4 & Matrix4::setTranslation( const Vector3 &translateVec )
1136 mCol3.setXYZ( translateVec );
1140 VECTORMATH_FORCE_INLINE const Vector3 Matrix4::getTranslation( ) const
1142 return mCol3.getXYZ( );
1145 VECTORMATH_FORCE_INLINE const Matrix4 Matrix4::rotationX( float radians )
1147 return rotationX( floatInVec(radians) );
1150 VECTORMATH_FORCE_INLINE const Matrix4 Matrix4::rotationX( const floatInVec &radians )
1152 __m128 s, c, res1, res2;
1154 VM_ATTRIBUTE_ALIGN16 unsigned int select_y[4] = {0, 0xffffffff, 0, 0};
1155 VM_ATTRIBUTE_ALIGN16 unsigned int select_z[4] = {0, 0, 0xffffffff, 0};
1156 zero = _mm_setzero_ps();
1157 sincosf4( radians.get128(), &s, &c );
1158 res1 = vec_sel( zero, c, select_y );
1159 res1 = vec_sel( res1, s, select_z );
1160 res2 = vec_sel( zero, negatef4(s), select_y );
1161 res2 = vec_sel( res2, c, select_z );
1170 VECTORMATH_FORCE_INLINE const Matrix4 Matrix4::rotationY( float radians )
1172 return rotationY( floatInVec(radians) );
1175 VECTORMATH_FORCE_INLINE const Matrix4 Matrix4::rotationY( const floatInVec &radians )
1177 __m128 s, c, res0, res2;
1179 VM_ATTRIBUTE_ALIGN16 unsigned int select_x[4] = {0xffffffff, 0, 0, 0};
1180 VM_ATTRIBUTE_ALIGN16 unsigned int select_z[4] = {0, 0, 0xffffffff, 0};
1181 zero = _mm_setzero_ps();
1182 sincosf4( radians.get128(), &s, &c );
1183 res0 = vec_sel( zero, c, select_x );
1184 res0 = vec_sel( res0, negatef4(s), select_z );
1185 res2 = vec_sel( zero, s, select_x );
1186 res2 = vec_sel( res2, c, select_z );
1195 VECTORMATH_FORCE_INLINE const Matrix4 Matrix4::rotationZ( float radians )
1197 return rotationZ( floatInVec(radians) );
1200 VECTORMATH_FORCE_INLINE const Matrix4 Matrix4::rotationZ( const floatInVec &radians )
1202 __m128 s, c, res0, res1;
1204 VM_ATTRIBUTE_ALIGN16 unsigned int select_x[4] = {0xffffffff, 0, 0, 0};
1205 VM_ATTRIBUTE_ALIGN16 unsigned int select_y[4] = {0, 0xffffffff, 0, 0};
1206 zero = _mm_setzero_ps();
1207 sincosf4( radians.get128(), &s, &c );
1208 res0 = vec_sel( zero, c, select_x );
1209 res0 = vec_sel( res0, s, select_y );
1210 res1 = vec_sel( zero, negatef4(s), select_x );
1211 res1 = vec_sel( res1, c, select_y );
1220 VECTORMATH_FORCE_INLINE const Matrix4 Matrix4::rotationZYX( const Vector3 &radiansXYZ )
1222 __m128 angles, s, negS, c, X0, X1, Y0, Y1, Z0, Z1, tmp;
1223 angles = Vector4( radiansXYZ, 0.0f ).get128();
1224 sincosf4( angles, &s, &c );
1225 negS = negatef4( s );
1226 Z0 = vec_mergel( c, s );
1227 Z1 = vec_mergel( negS, c );
1228 VM_ATTRIBUTE_ALIGN16 unsigned int select_xyz[4] = {0xffffffff, 0xffffffff, 0xffffffff, 0};
1229 Z1 = vec_and( Z1, _mm_load_ps( (float *)select_xyz ) );
1230 Y0 = _mm_shuffle_ps( c, negS, _MM_SHUFFLE(0,1,1,1) );
1231 Y1 = _mm_shuffle_ps( s, c, _MM_SHUFFLE(0,1,1,1) );
1232 X0 = vec_splat( s, 0 );
1233 X1 = vec_splat( c, 0 );
1234 tmp = vec_mul( Z0, Y1 );
1236 Vector4( vec_mul( Z0, Y0 ) ),
1237 Vector4( vec_madd( Z1, X1, vec_mul( tmp, X0 ) ) ),
1238 Vector4( vec_nmsub( Z1, X0, vec_mul( tmp, X1 ) ) ),
1243 VECTORMATH_FORCE_INLINE const Matrix4 Matrix4::rotation( float radians, const Vector3 &unitVec )
1245 return rotation( floatInVec(radians), unitVec );
1248 VECTORMATH_FORCE_INLINE const Matrix4 Matrix4::rotation( const floatInVec &radians, const Vector3 &unitVec )
1250 __m128 axis, s, c, oneMinusC, axisS, negAxisS, xxxx, yyyy, zzzz, tmp0, tmp1, tmp2;
1251 axis = unitVec.get128();
1252 sincosf4( radians.get128(), &s, &c );
1253 xxxx = vec_splat( axis, 0 );
1254 yyyy = vec_splat( axis, 1 );
1255 zzzz = vec_splat( axis, 2 );
1256 oneMinusC = vec_sub( _mm_set1_ps(1.0f), c );
1257 axisS = vec_mul( axis, s );
1258 negAxisS = negatef4( axisS );
1259 VM_ATTRIBUTE_ALIGN16 unsigned int select_x[4] = {0xffffffff, 0, 0, 0};
1260 VM_ATTRIBUTE_ALIGN16 unsigned int select_y[4] = {0, 0xffffffff, 0, 0};
1261 VM_ATTRIBUTE_ALIGN16 unsigned int select_z[4] = {0, 0, 0xffffffff, 0};
1262 //tmp0 = vec_perm( axisS, negAxisS, _VECTORMATH_PERM_XZBX );
1263 tmp0 = _mm_shuffle_ps( axisS, axisS, _MM_SHUFFLE(0,0,2,0) );
1264 tmp0 = vec_sel(tmp0, vec_splat(negAxisS, 1), select_z);
1265 //tmp1 = vec_perm( axisS, negAxisS, _VECTORMATH_PERM_CXXX );
1266 tmp1 = vec_sel( vec_splat(axisS, 0), vec_splat(negAxisS, 2), select_x );
1267 //tmp2 = vec_perm( axisS, negAxisS, _VECTORMATH_PERM_YAXX );
1268 tmp2 = _mm_shuffle_ps( axisS, axisS, _MM_SHUFFLE(0,0,0,1) );
1269 tmp2 = vec_sel(tmp2, vec_splat(negAxisS, 0), select_y);
1270 tmp0 = vec_sel( tmp0, c, select_x );
1271 tmp1 = vec_sel( tmp1, c, select_y );
1272 tmp2 = vec_sel( tmp2, c, select_z );
1273 VM_ATTRIBUTE_ALIGN16 unsigned int select_xyz[4] = {0xffffffff, 0xffffffff, 0xffffffff, 0};
1274 axis = vec_and( axis, _mm_load_ps( (float *)select_xyz ) );
1275 tmp0 = vec_and( tmp0, _mm_load_ps( (float *)select_xyz ) );
1276 tmp1 = vec_and( tmp1, _mm_load_ps( (float *)select_xyz ) );
1277 tmp2 = vec_and( tmp2, _mm_load_ps( (float *)select_xyz ) );
1279 Vector4( vec_madd( vec_mul( axis, xxxx ), oneMinusC, tmp0 ) ),
1280 Vector4( vec_madd( vec_mul( axis, yyyy ), oneMinusC, tmp1 ) ),
1281 Vector4( vec_madd( vec_mul( axis, zzzz ), oneMinusC, tmp2 ) ),
1286 VECTORMATH_FORCE_INLINE const Matrix4 Matrix4::rotation( const Quat &unitQuat )
1288 return Matrix4( Transform3::rotation( unitQuat ) );
1291 VECTORMATH_FORCE_INLINE const Matrix4 Matrix4::scale( const Vector3 &scaleVec )
1293 __m128 zero = _mm_setzero_ps();
1294 VM_ATTRIBUTE_ALIGN16 unsigned int select_x[4] = {0xffffffff, 0, 0, 0};
1295 VM_ATTRIBUTE_ALIGN16 unsigned int select_y[4] = {0, 0xffffffff, 0, 0};
1296 VM_ATTRIBUTE_ALIGN16 unsigned int select_z[4] = {0, 0, 0xffffffff, 0};
1298 Vector4( vec_sel( zero, scaleVec.get128(), select_x ) ),
1299 Vector4( vec_sel( zero, scaleVec.get128(), select_y ) ),
1300 Vector4( vec_sel( zero, scaleVec.get128(), select_z ) ),
1305 VECTORMATH_FORCE_INLINE const Matrix4 appendScale( const Matrix4 & mat, const Vector3 &scaleVec )
1308 ( mat.getCol0() * scaleVec.getX( ) ),
1309 ( mat.getCol1() * scaleVec.getY( ) ),
1310 ( mat.getCol2() * scaleVec.getZ( ) ),
1315 VECTORMATH_FORCE_INLINE const Matrix4 prependScale( const Vector3 &scaleVec, const Matrix4 & mat )
1318 scale4 = Vector4( scaleVec, 1.0f );
1320 mulPerElem( mat.getCol0(), scale4 ),
1321 mulPerElem( mat.getCol1(), scale4 ),
1322 mulPerElem( mat.getCol2(), scale4 ),
1323 mulPerElem( mat.getCol3(), scale4 )
1327 VECTORMATH_FORCE_INLINE const Matrix4 Matrix4::translation( const Vector3 &translateVec )
1333 Vector4( translateVec, 1.0f )
1337 VECTORMATH_FORCE_INLINE const Matrix4 Matrix4::lookAt( const Point3 &eyePos, const Point3 &lookAtPos, const Vector3 &upVec )
1340 Vector3 v3X, v3Y, v3Z;
1341 v3Y = normalize( upVec );
1342 v3Z = normalize( ( eyePos - lookAtPos ) );
1343 v3X = normalize( cross( v3Y, v3Z ) );
1344 v3Y = cross( v3Z, v3X );
1345 m4EyeFrame = Matrix4( Vector4( v3X ), Vector4( v3Y ), Vector4( v3Z ), Vector4( eyePos ) );
1346 return orthoInverse( m4EyeFrame );
1349 VECTORMATH_FORCE_INLINE const Matrix4 Matrix4::perspective( float fovyRadians, float aspect, float zNear, float zFar )
1352 __m128 zero, col0, col1, col2, col3;
1353 union { __m128 v; float s[4]; } tmp;
1354 f = tanf( _VECTORMATH_PI_OVER_2 - fovyRadians * 0.5f );
1355 rangeInv = 1.0f / ( zNear - zFar );
1356 zero = _mm_setzero_ps();
1358 tmp.s[0] = f / aspect;
1364 tmp.s[2] = ( zNear + zFar ) * rangeInv;
1368 tmp.s[2] = zNear * zFar * rangeInv * 2.0f;
1378 VECTORMATH_FORCE_INLINE const Matrix4 Matrix4::frustum( float left, float right, float bottom, float top, float zNear, float zFar )
1380 /* function implementation based on code from STIDC SDK: */
1381 /* -------------------------------------------------------------- */
1382 /* PLEASE DO NOT MODIFY THIS SECTION */
1383 /* This prolog section is automatically generated. */
1386 /* Sony Computer Entertainment, Inc., */
1387 /* Toshiba Corporation, */
1388 /* International Business Machines Corporation, */
1390 /* S/T/I Confidential Information */
1391 /* -------------------------------------------------------------- */
1393 __m128 diff, sum, inv_diff;
1394 __m128 diagonal, column, near2;
1395 __m128 zero = _mm_setzero_ps();
1396 union { __m128 v; float s[4]; } l, f, r, n, b, t; // TODO: Union?
1403 lbf = vec_mergeh( l.v, f.v );
1404 rtn = vec_mergeh( r.v, n.v );
1405 lbf = vec_mergeh( lbf, b.v );
1406 rtn = vec_mergeh( rtn, t.v );
1407 diff = vec_sub( rtn, lbf );
1408 sum = vec_add( rtn, lbf );
1409 inv_diff = recipf4( diff );
1410 near2 = vec_splat( n.v, 0 );
1411 near2 = vec_add( near2, near2 );
1412 diagonal = vec_mul( near2, inv_diff );
1413 column = vec_mul( sum, inv_diff );
1414 VM_ATTRIBUTE_ALIGN16 unsigned int select_x[4] = {0xffffffff, 0, 0, 0};
1415 VM_ATTRIBUTE_ALIGN16 unsigned int select_y[4] = {0, 0xffffffff, 0, 0};
1416 VM_ATTRIBUTE_ALIGN16 unsigned int select_z[4] = {0, 0, 0xffffffff, 0};
1417 VM_ATTRIBUTE_ALIGN16 unsigned int select_w[4] = {0, 0, 0, 0xffffffff};
1419 Vector4( vec_sel( zero, diagonal, select_x ) ),
1420 Vector4( vec_sel( zero, diagonal, select_y ) ),
1421 Vector4( vec_sel( column, _mm_set1_ps(-1.0f), select_w ) ),
1422 Vector4( vec_sel( zero, vec_mul( diagonal, vec_splat( f.v, 0 ) ), select_z ) )
1426 VECTORMATH_FORCE_INLINE const Matrix4 Matrix4::orthographic( float left, float right, float bottom, float top, float zNear, float zFar )
1428 /* function implementation based on code from STIDC SDK: */
1429 /* -------------------------------------------------------------- */
1430 /* PLEASE DO NOT MODIFY THIS SECTION */
1431 /* This prolog section is automatically generated. */
1434 /* Sony Computer Entertainment, Inc., */
1435 /* Toshiba Corporation, */
1436 /* International Business Machines Corporation, */
1438 /* S/T/I Confidential Information */
1439 /* -------------------------------------------------------------- */
1441 __m128 diff, sum, inv_diff, neg_inv_diff;
1442 __m128 diagonal, column;
1443 __m128 zero = _mm_setzero_ps();
1444 union { __m128 v; float s[4]; } l, f, r, n, b, t;
1451 lbf = vec_mergeh( l.v, f.v );
1452 rtn = vec_mergeh( r.v, n.v );
1453 lbf = vec_mergeh( lbf, b.v );
1454 rtn = vec_mergeh( rtn, t.v );
1455 diff = vec_sub( rtn, lbf );
1456 sum = vec_add( rtn, lbf );
1457 inv_diff = recipf4( diff );
1458 neg_inv_diff = negatef4( inv_diff );
1459 diagonal = vec_add( inv_diff, inv_diff );
1460 VM_ATTRIBUTE_ALIGN16 unsigned int select_x[4] = {0xffffffff, 0, 0, 0};
1461 VM_ATTRIBUTE_ALIGN16 unsigned int select_y[4] = {0, 0xffffffff, 0, 0};
1462 VM_ATTRIBUTE_ALIGN16 unsigned int select_z[4] = {0, 0, 0xffffffff, 0};
1463 VM_ATTRIBUTE_ALIGN16 unsigned int select_w[4] = {0, 0, 0, 0xffffffff};
1464 column = vec_mul( sum, vec_sel( neg_inv_diff, inv_diff, select_z ) ); // TODO: no madds with zero
1466 Vector4( vec_sel( zero, diagonal, select_x ) ),
1467 Vector4( vec_sel( zero, diagonal, select_y ) ),
1468 Vector4( vec_sel( zero, diagonal, select_z ) ),
1469 Vector4( vec_sel( column, _mm_set1_ps(1.0f), select_w ) )
1473 VECTORMATH_FORCE_INLINE const Matrix4 select( const Matrix4 & mat0, const Matrix4 & mat1, bool select1 )
1476 select( mat0.getCol0(), mat1.getCol0(), select1 ),
1477 select( mat0.getCol1(), mat1.getCol1(), select1 ),
1478 select( mat0.getCol2(), mat1.getCol2(), select1 ),
1479 select( mat0.getCol3(), mat1.getCol3(), select1 )
1483 VECTORMATH_FORCE_INLINE const Matrix4 select( const Matrix4 & mat0, const Matrix4 & mat1, const boolInVec &select1 )
1486 select( mat0.getCol0(), mat1.getCol0(), select1 ),
1487 select( mat0.getCol1(), mat1.getCol1(), select1 ),
1488 select( mat0.getCol2(), mat1.getCol2(), select1 ),
1489 select( mat0.getCol3(), mat1.getCol3(), select1 )
1493 #ifdef _VECTORMATH_DEBUG
1495 VECTORMATH_FORCE_INLINE void print( const Matrix4 & mat )
1497 print( mat.getRow( 0 ) );
1498 print( mat.getRow( 1 ) );
1499 print( mat.getRow( 2 ) );
1500 print( mat.getRow( 3 ) );
1503 VECTORMATH_FORCE_INLINE void print( const Matrix4 & mat, const char * name )
1505 printf("%s:\n", name);
1511 VECTORMATH_FORCE_INLINE Transform3::Transform3( const Transform3 & tfrm )
1519 VECTORMATH_FORCE_INLINE Transform3::Transform3( float scalar )
1521 mCol0 = Vector3( scalar );
1522 mCol1 = Vector3( scalar );
1523 mCol2 = Vector3( scalar );
1524 mCol3 = Vector3( scalar );
1527 VECTORMATH_FORCE_INLINE Transform3::Transform3( const floatInVec &scalar )
1529 mCol0 = Vector3( scalar );
1530 mCol1 = Vector3( scalar );
1531 mCol2 = Vector3( scalar );
1532 mCol3 = Vector3( scalar );
1535 VECTORMATH_FORCE_INLINE Transform3::Transform3( const Vector3 &_col0, const Vector3 &_col1, const Vector3 &_col2, const Vector3 &_col3 )
1543 VECTORMATH_FORCE_INLINE Transform3::Transform3( const Matrix3 & tfrm, const Vector3 &translateVec )
1545 this->setUpper3x3( tfrm );
1546 this->setTranslation( translateVec );
1549 VECTORMATH_FORCE_INLINE Transform3::Transform3( const Quat &unitQuat, const Vector3 &translateVec )
1551 this->setUpper3x3( Matrix3( unitQuat ) );
1552 this->setTranslation( translateVec );
1555 VECTORMATH_FORCE_INLINE Transform3 & Transform3::setCol0( const Vector3 &_col0 )
1561 VECTORMATH_FORCE_INLINE Transform3 & Transform3::setCol1( const Vector3 &_col1 )
1567 VECTORMATH_FORCE_INLINE Transform3 & Transform3::setCol2( const Vector3 &_col2 )
1573 VECTORMATH_FORCE_INLINE Transform3 & Transform3::setCol3( const Vector3 &_col3 )
1579 VECTORMATH_FORCE_INLINE Transform3 & Transform3::setCol( int col, const Vector3 &vec )
1581 *(&mCol0 + col) = vec;
1585 VECTORMATH_FORCE_INLINE Transform3 & Transform3::setRow( int row, const Vector4 &vec )
1587 mCol0.setElem( row, vec.getElem( 0 ) );
1588 mCol1.setElem( row, vec.getElem( 1 ) );
1589 mCol2.setElem( row, vec.getElem( 2 ) );
1590 mCol3.setElem( row, vec.getElem( 3 ) );
1594 VECTORMATH_FORCE_INLINE Transform3 & Transform3::setElem( int col, int row, float val )
1596 (*this)[col].setElem(row, val);
1600 VECTORMATH_FORCE_INLINE Transform3 & Transform3::setElem( int col, int row, const floatInVec &val )
1603 tmpV3_0 = this->getCol( col );
1604 tmpV3_0.setElem( row, val );
1605 this->setCol( col, tmpV3_0 );
1609 VECTORMATH_FORCE_INLINE const floatInVec Transform3::getElem( int col, int row ) const
1611 return this->getCol( col ).getElem( row );
1614 VECTORMATH_FORCE_INLINE const Vector3 Transform3::getCol0( ) const
1619 VECTORMATH_FORCE_INLINE const Vector3 Transform3::getCol1( ) const
1624 VECTORMATH_FORCE_INLINE const Vector3 Transform3::getCol2( ) const
1629 VECTORMATH_FORCE_INLINE const Vector3 Transform3::getCol3( ) const
1634 VECTORMATH_FORCE_INLINE const Vector3 Transform3::getCol( int col ) const
1636 return *(&mCol0 + col);
1639 VECTORMATH_FORCE_INLINE const Vector4 Transform3::getRow( int row ) const
1641 return Vector4( mCol0.getElem( row ), mCol1.getElem( row ), mCol2.getElem( row ), mCol3.getElem( row ) );
1644 VECTORMATH_FORCE_INLINE Vector3 & Transform3::operator []( int col )
1646 return *(&mCol0 + col);
1649 VECTORMATH_FORCE_INLINE const Vector3 Transform3::operator []( int col ) const
1651 return *(&mCol0 + col);
1654 VECTORMATH_FORCE_INLINE Transform3 & Transform3::operator =( const Transform3 & tfrm )
1663 VECTORMATH_FORCE_INLINE const Transform3 inverse( const Transform3 & tfrm )
1665 __m128 inv0, inv1, inv2, inv3;
1666 __m128 tmp0, tmp1, tmp2, tmp3, tmp4, dot, invdet;
1667 __m128 xxxx, yyyy, zzzz;
1668 tmp2 = _vmathVfCross( tfrm.getCol0().get128(), tfrm.getCol1().get128() );
1669 tmp0 = _vmathVfCross( tfrm.getCol1().get128(), tfrm.getCol2().get128() );
1670 tmp1 = _vmathVfCross( tfrm.getCol2().get128(), tfrm.getCol0().get128() );
1671 inv3 = negatef4( tfrm.getCol3().get128() );
1672 dot = _vmathVfDot3( tmp2, tfrm.getCol2().get128() );
1673 dot = vec_splat( dot, 0 );
1674 invdet = recipf4( dot );
1675 tmp3 = vec_mergeh( tmp0, tmp2 );
1676 tmp4 = vec_mergel( tmp0, tmp2 );
1677 inv0 = vec_mergeh( tmp3, tmp1 );
1678 xxxx = vec_splat( inv3, 0 );
1679 //inv1 = vec_perm( tmp3, tmp1, _VECTORMATH_PERM_ZBWX );
1680 VM_ATTRIBUTE_ALIGN16 unsigned int select_y[4] = {0, 0xffffffff, 0, 0};
1681 inv1 = _mm_shuffle_ps( tmp3, tmp3, _MM_SHUFFLE(0,3,2,2));
1682 inv1 = vec_sel(inv1, tmp1, select_y);
1683 //inv2 = vec_perm( tmp4, tmp1, _VECTORMATH_PERM_XCYX );
1684 inv2 = _mm_shuffle_ps( tmp4, tmp4, _MM_SHUFFLE(0,1,1,0));
1685 inv2 = vec_sel(inv2, vec_splat(tmp1, 2), select_y);
1686 yyyy = vec_splat( inv3, 1 );
1687 zzzz = vec_splat( inv3, 2 );
1688 inv3 = vec_mul( inv0, xxxx );
1689 inv3 = vec_madd( inv1, yyyy, inv3 );
1690 inv3 = vec_madd( inv2, zzzz, inv3 );
1691 inv0 = vec_mul( inv0, invdet );
1692 inv1 = vec_mul( inv1, invdet );
1693 inv2 = vec_mul( inv2, invdet );
1694 inv3 = vec_mul( inv3, invdet );
1703 VECTORMATH_FORCE_INLINE const Transform3 orthoInverse( const Transform3 & tfrm )
1705 __m128 inv0, inv1, inv2, inv3;
1707 __m128 xxxx, yyyy, zzzz;
1708 tmp0 = vec_mergeh( tfrm.getCol0().get128(), tfrm.getCol2().get128() );
1709 tmp1 = vec_mergel( tfrm.getCol0().get128(), tfrm.getCol2().get128() );
1710 inv3 = negatef4( tfrm.getCol3().get128() );
1711 inv0 = vec_mergeh( tmp0, tfrm.getCol1().get128() );
1712 xxxx = vec_splat( inv3, 0 );
1713 //inv1 = vec_perm( tmp0, tfrm.getCol1().get128(), _VECTORMATH_PERM_ZBWX );
1714 VM_ATTRIBUTE_ALIGN16 unsigned int select_y[4] = {0, 0xffffffff, 0, 0};
1715 inv1 = _mm_shuffle_ps( tmp0, tmp0, _MM_SHUFFLE(0,3,2,2));
1716 inv1 = vec_sel(inv1, tfrm.getCol1().get128(), select_y);
1717 //inv2 = vec_perm( tmp1, tfrm.getCol1().get128(), _VECTORMATH_PERM_XCYX );
1718 inv2 = _mm_shuffle_ps( tmp1, tmp1, _MM_SHUFFLE(0,1,1,0));
1719 inv2 = vec_sel(inv2, vec_splat(tfrm.getCol1().get128(), 2), select_y);
1720 yyyy = vec_splat( inv3, 1 );
1721 zzzz = vec_splat( inv3, 2 );
1722 inv3 = vec_mul( inv0, xxxx );
1723 inv3 = vec_madd( inv1, yyyy, inv3 );
1724 inv3 = vec_madd( inv2, zzzz, inv3 );
1733 VECTORMATH_FORCE_INLINE const Transform3 absPerElem( const Transform3 & tfrm )
1736 absPerElem( tfrm.getCol0() ),
1737 absPerElem( tfrm.getCol1() ),
1738 absPerElem( tfrm.getCol2() ),
1739 absPerElem( tfrm.getCol3() )
1743 VECTORMATH_FORCE_INLINE const Vector3 Transform3::operator *( const Vector3 &vec ) const
1746 __m128 xxxx, yyyy, zzzz;
1747 xxxx = vec_splat( vec.get128(), 0 );
1748 yyyy = vec_splat( vec.get128(), 1 );
1749 zzzz = vec_splat( vec.get128(), 2 );
1750 res = vec_mul( mCol0.get128(), xxxx );
1751 res = vec_madd( mCol1.get128(), yyyy, res );
1752 res = vec_madd( mCol2.get128(), zzzz, res );
1753 return Vector3( res );
1756 VECTORMATH_FORCE_INLINE const Point3 Transform3::operator *( const Point3 &pnt ) const
1758 __m128 tmp0, tmp1, res;
1759 __m128 xxxx, yyyy, zzzz;
1760 xxxx = vec_splat( pnt.get128(), 0 );
1761 yyyy = vec_splat( pnt.get128(), 1 );
1762 zzzz = vec_splat( pnt.get128(), 2 );
1763 tmp0 = vec_mul( mCol0.get128(), xxxx );
1764 tmp1 = vec_mul( mCol1.get128(), yyyy );
1765 tmp0 = vec_madd( mCol2.get128(), zzzz, tmp0 );
1766 tmp1 = vec_add( mCol3.get128(), tmp1 );
1767 res = vec_add( tmp0, tmp1 );
1768 return Point3( res );
1771 VECTORMATH_FORCE_INLINE const Transform3 Transform3::operator *( const Transform3 & tfrm ) const
1774 ( *this * tfrm.mCol0 ),
1775 ( *this * tfrm.mCol1 ),
1776 ( *this * tfrm.mCol2 ),
1777 Vector3( ( *this * Point3( tfrm.mCol3 ) ) )
1781 VECTORMATH_FORCE_INLINE Transform3 & Transform3::operator *=( const Transform3 & tfrm )
1783 *this = *this * tfrm;
1787 VECTORMATH_FORCE_INLINE const Transform3 mulPerElem( const Transform3 & tfrm0, const Transform3 & tfrm1 )
1790 mulPerElem( tfrm0.getCol0(), tfrm1.getCol0() ),
1791 mulPerElem( tfrm0.getCol1(), tfrm1.getCol1() ),
1792 mulPerElem( tfrm0.getCol2(), tfrm1.getCol2() ),
1793 mulPerElem( tfrm0.getCol3(), tfrm1.getCol3() )
1797 VECTORMATH_FORCE_INLINE const Transform3 Transform3::identity( )
1807 VECTORMATH_FORCE_INLINE Transform3 & Transform3::setUpper3x3( const Matrix3 & tfrm )
1809 mCol0 = tfrm.getCol0();
1810 mCol1 = tfrm.getCol1();
1811 mCol2 = tfrm.getCol2();
1815 VECTORMATH_FORCE_INLINE const Matrix3 Transform3::getUpper3x3( ) const
1817 return Matrix3( mCol0, mCol1, mCol2 );
1820 VECTORMATH_FORCE_INLINE Transform3 & Transform3::setTranslation( const Vector3 &translateVec )
1822 mCol3 = translateVec;
1826 VECTORMATH_FORCE_INLINE const Vector3 Transform3::getTranslation( ) const
1831 VECTORMATH_FORCE_INLINE const Transform3 Transform3::rotationX( float radians )
1833 return rotationX( floatInVec(radians) );
1836 VECTORMATH_FORCE_INLINE const Transform3 Transform3::rotationX( const floatInVec &radians )
1838 __m128 s, c, res1, res2;
1840 VM_ATTRIBUTE_ALIGN16 unsigned int select_y[4] = {0, 0xffffffff, 0, 0};
1841 VM_ATTRIBUTE_ALIGN16 unsigned int select_z[4] = {0, 0, 0xffffffff, 0};
1842 zero = _mm_setzero_ps();
1843 sincosf4( radians.get128(), &s, &c );
1844 res1 = vec_sel( zero, c, select_y );
1845 res1 = vec_sel( res1, s, select_z );
1846 res2 = vec_sel( zero, negatef4(s), select_y );
1847 res2 = vec_sel( res2, c, select_z );
1852 Vector3( _mm_setzero_ps() )
1856 VECTORMATH_FORCE_INLINE const Transform3 Transform3::rotationY( float radians )
1858 return rotationY( floatInVec(radians) );
1861 VECTORMATH_FORCE_INLINE const Transform3 Transform3::rotationY( const floatInVec &radians )
1863 __m128 s, c, res0, res2;
1865 VM_ATTRIBUTE_ALIGN16 unsigned int select_x[4] = {0xffffffff, 0, 0, 0};
1866 VM_ATTRIBUTE_ALIGN16 unsigned int select_z[4] = {0, 0, 0xffffffff, 0};
1867 zero = _mm_setzero_ps();
1868 sincosf4( radians.get128(), &s, &c );
1869 res0 = vec_sel( zero, c, select_x );
1870 res0 = vec_sel( res0, negatef4(s), select_z );
1871 res2 = vec_sel( zero, s, select_x );
1872 res2 = vec_sel( res2, c, select_z );
1881 VECTORMATH_FORCE_INLINE const Transform3 Transform3::rotationZ( float radians )
1883 return rotationZ( floatInVec(radians) );
1886 VECTORMATH_FORCE_INLINE const Transform3 Transform3::rotationZ( const floatInVec &radians )
1888 __m128 s, c, res0, res1;
1889 VM_ATTRIBUTE_ALIGN16 unsigned int select_x[4] = {0xffffffff, 0, 0, 0};
1890 VM_ATTRIBUTE_ALIGN16 unsigned int select_y[4] = {0, 0xffffffff, 0, 0};
1891 __m128 zero = _mm_setzero_ps();
1892 sincosf4( radians.get128(), &s, &c );
1893 res0 = vec_sel( zero, c, select_x );
1894 res0 = vec_sel( res0, s, select_y );
1895 res1 = vec_sel( zero, negatef4(s), select_x );
1896 res1 = vec_sel( res1, c, select_y );
1905 VECTORMATH_FORCE_INLINE const Transform3 Transform3::rotationZYX( const Vector3 &radiansXYZ )
1907 __m128 angles, s, negS, c, X0, X1, Y0, Y1, Z0, Z1, tmp;
1908 angles = Vector4( radiansXYZ, 0.0f ).get128();
1909 sincosf4( angles, &s, &c );
1910 negS = negatef4( s );
1911 Z0 = vec_mergel( c, s );
1912 Z1 = vec_mergel( negS, c );
1913 VM_ATTRIBUTE_ALIGN16 unsigned int select_xyz[4] = {0xffffffff, 0xffffffff, 0xffffffff, 0};
1914 Z1 = vec_and( Z1, _mm_load_ps( (float *)select_xyz ) );
1915 Y0 = _mm_shuffle_ps( c, negS, _MM_SHUFFLE(0,1,1,1) );
1916 Y1 = _mm_shuffle_ps( s, c, _MM_SHUFFLE(0,1,1,1) );
1917 X0 = vec_splat( s, 0 );
1918 X1 = vec_splat( c, 0 );
1919 tmp = vec_mul( Z0, Y1 );
1921 Vector3( vec_mul( Z0, Y0 ) ),
1922 Vector3( vec_madd( Z1, X1, vec_mul( tmp, X0 ) ) ),
1923 Vector3( vec_nmsub( Z1, X0, vec_mul( tmp, X1 ) ) ),
1928 VECTORMATH_FORCE_INLINE const Transform3 Transform3::rotation( float radians, const Vector3 &unitVec )
1930 return rotation( floatInVec(radians), unitVec );
1933 VECTORMATH_FORCE_INLINE const Transform3 Transform3::rotation( const floatInVec &radians, const Vector3 &unitVec )
1935 return Transform3( Matrix3::rotation( radians, unitVec ), Vector3( 0.0f ) );
1938 VECTORMATH_FORCE_INLINE const Transform3 Transform3::rotation( const Quat &unitQuat )
1940 return Transform3( Matrix3( unitQuat ), Vector3( 0.0f ) );
1943 VECTORMATH_FORCE_INLINE const Transform3 Transform3::scale( const Vector3 &scaleVec )
1945 __m128 zero = _mm_setzero_ps();
1946 VM_ATTRIBUTE_ALIGN16 unsigned int select_x[4] = {0xffffffff, 0, 0, 0};
1947 VM_ATTRIBUTE_ALIGN16 unsigned int select_y[4] = {0, 0xffffffff, 0, 0};
1948 VM_ATTRIBUTE_ALIGN16 unsigned int select_z[4] = {0, 0, 0xffffffff, 0};
1950 Vector3( vec_sel( zero, scaleVec.get128(), select_x ) ),
1951 Vector3( vec_sel( zero, scaleVec.get128(), select_y ) ),
1952 Vector3( vec_sel( zero, scaleVec.get128(), select_z ) ),
1957 VECTORMATH_FORCE_INLINE const Transform3 appendScale( const Transform3 & tfrm, const Vector3 &scaleVec )
1960 ( tfrm.getCol0() * scaleVec.getX( ) ),
1961 ( tfrm.getCol1() * scaleVec.getY( ) ),
1962 ( tfrm.getCol2() * scaleVec.getZ( ) ),
1967 VECTORMATH_FORCE_INLINE const Transform3 prependScale( const Vector3 &scaleVec, const Transform3 & tfrm )
1970 mulPerElem( tfrm.getCol0(), scaleVec ),
1971 mulPerElem( tfrm.getCol1(), scaleVec ),
1972 mulPerElem( tfrm.getCol2(), scaleVec ),
1973 mulPerElem( tfrm.getCol3(), scaleVec )
1977 VECTORMATH_FORCE_INLINE const Transform3 Transform3::translation( const Vector3 &translateVec )
1987 VECTORMATH_FORCE_INLINE const Transform3 select( const Transform3 & tfrm0, const Transform3 & tfrm1, bool select1 )
1990 select( tfrm0.getCol0(), tfrm1.getCol0(), select1 ),
1991 select( tfrm0.getCol1(), tfrm1.getCol1(), select1 ),
1992 select( tfrm0.getCol2(), tfrm1.getCol2(), select1 ),
1993 select( tfrm0.getCol3(), tfrm1.getCol3(), select1 )
1997 VECTORMATH_FORCE_INLINE const Transform3 select( const Transform3 & tfrm0, const Transform3 & tfrm1, const boolInVec &select1 )
2000 select( tfrm0.getCol0(), tfrm1.getCol0(), select1 ),
2001 select( tfrm0.getCol1(), tfrm1.getCol1(), select1 ),
2002 select( tfrm0.getCol2(), tfrm1.getCol2(), select1 ),
2003 select( tfrm0.getCol3(), tfrm1.getCol3(), select1 )
2007 #ifdef _VECTORMATH_DEBUG
2009 VECTORMATH_FORCE_INLINE void print( const Transform3 & tfrm )
2011 print( tfrm.getRow( 0 ) );
2012 print( tfrm.getRow( 1 ) );
2013 print( tfrm.getRow( 2 ) );
2016 VECTORMATH_FORCE_INLINE void print( const Transform3 & tfrm, const char * name )
2018 printf("%s:\n", name);
2024 VECTORMATH_FORCE_INLINE Quat::Quat( const Matrix3 & tfrm )
2027 __m128 col0, col1, col2;
2028 __m128 xx_yy, xx_yy_zz_xx, yy_zz_xx_yy, zz_xx_yy_zz, diagSum, diagDiff;
2029 __m128 zy_xz_yx, yz_zx_xy, sum, diff;
2030 __m128 radicand, invSqrt, scale;
2031 __m128 res0, res1, res2, res3;
2033 VM_ATTRIBUTE_ALIGN16 unsigned int select_x[4] = {0xffffffff, 0, 0, 0};
2034 VM_ATTRIBUTE_ALIGN16 unsigned int select_y[4] = {0, 0xffffffff, 0, 0};
2035 VM_ATTRIBUTE_ALIGN16 unsigned int select_z[4] = {0, 0, 0xffffffff, 0};
2036 VM_ATTRIBUTE_ALIGN16 unsigned int select_w[4] = {0, 0, 0, 0xffffffff};
2038 col0 = tfrm.getCol0().get128();
2039 col1 = tfrm.getCol1().get128();
2040 col2 = tfrm.getCol2().get128();
2045 /* xx largest diagonal element */
2046 /* yy largest diagonal element */
2047 /* zz largest diagonal element */
2049 /* compute quaternion for each case */
2051 xx_yy = vec_sel( col0, col1, select_y );
2052 //xx_yy_zz_xx = vec_perm( xx_yy, col2, _VECTORMATH_PERM_XYCX );
2053 //yy_zz_xx_yy = vec_perm( xx_yy, col2, _VECTORMATH_PERM_YCXY );
2054 //zz_xx_yy_zz = vec_perm( xx_yy, col2, _VECTORMATH_PERM_CXYC );
2055 xx_yy_zz_xx = _mm_shuffle_ps( xx_yy, xx_yy, _MM_SHUFFLE(0,0,1,0) );
2056 xx_yy_zz_xx = vec_sel( xx_yy_zz_xx, col2, select_z ); // TODO: Ck
2057 yy_zz_xx_yy = _mm_shuffle_ps( xx_yy_zz_xx, xx_yy_zz_xx, _MM_SHUFFLE(1,0,2,1) );
2058 zz_xx_yy_zz = _mm_shuffle_ps( xx_yy_zz_xx, xx_yy_zz_xx, _MM_SHUFFLE(2,1,0,2) );
2060 diagSum = vec_add( vec_add( xx_yy_zz_xx, yy_zz_xx_yy ), zz_xx_yy_zz );
2061 diagDiff = vec_sub( vec_sub( xx_yy_zz_xx, yy_zz_xx_yy ), zz_xx_yy_zz );
2062 radicand = vec_add( vec_sel( diagDiff, diagSum, select_w ), _mm_set1_ps(1.0f) );
2063 // invSqrt = rsqrtf4( radicand );
2064 invSqrt = newtonrapson_rsqrt4( radicand );
2068 zy_xz_yx = vec_sel( col0, col1, select_z ); // zy_xz_yx = 00 01 12 03
2069 //zy_xz_yx = vec_perm( zy_xz_yx, col2, _VECTORMATH_PERM_ZAYX );
2070 zy_xz_yx = _mm_shuffle_ps( zy_xz_yx, zy_xz_yx, _MM_SHUFFLE(0,1,2,2) ); // zy_xz_yx = 12 12 01 00
2071 zy_xz_yx = vec_sel( zy_xz_yx, vec_splat(col2, 0), select_y ); // zy_xz_yx = 12 20 01 00
2072 yz_zx_xy = vec_sel( col0, col1, select_x ); // yz_zx_xy = 10 01 02 03
2073 //yz_zx_xy = vec_perm( yz_zx_xy, col2, _VECTORMATH_PERM_BZXX );
2074 yz_zx_xy = _mm_shuffle_ps( yz_zx_xy, yz_zx_xy, _MM_SHUFFLE(0,0,2,0) ); // yz_zx_xy = 10 02 10 10
2075 yz_zx_xy = vec_sel( yz_zx_xy, vec_splat(col2, 1), select_x ); // yz_zx_xy = 21 02 10 10
2077 sum = vec_add( zy_xz_yx, yz_zx_xy );
2078 diff = vec_sub( zy_xz_yx, yz_zx_xy );
2080 scale = vec_mul( invSqrt, _mm_set1_ps(0.5f) );
2082 //res0 = vec_perm( sum, diff, _VECTORMATH_PERM_XZYA );
2083 res0 = _mm_shuffle_ps( sum, sum, _MM_SHUFFLE(0,1,2,0) );
2084 res0 = vec_sel( res0, vec_splat(diff, 0), select_w ); // TODO: Ck
2085 //res1 = vec_perm( sum, diff, _VECTORMATH_PERM_ZXXB );
2086 res1 = _mm_shuffle_ps( sum, sum, _MM_SHUFFLE(0,0,0,2) );
2087 res1 = vec_sel( res1, vec_splat(diff, 1), select_w ); // TODO: Ck
2088 //res2 = vec_perm( sum, diff, _VECTORMATH_PERM_YXXC );
2089 res2 = _mm_shuffle_ps( sum, sum, _MM_SHUFFLE(0,0,0,1) );
2090 res2 = vec_sel( res2, vec_splat(diff, 2), select_w ); // TODO: Ck
2092 res0 = vec_sel( res0, radicand, select_x );
2093 res1 = vec_sel( res1, radicand, select_y );
2094 res2 = vec_sel( res2, radicand, select_z );
2095 res3 = vec_sel( res3, radicand, select_w );
2096 res0 = vec_mul( res0, vec_splat( scale, 0 ) );
2097 res1 = vec_mul( res1, vec_splat( scale, 1 ) );
2098 res2 = vec_mul( res2, vec_splat( scale, 2 ) );
2099 res3 = vec_mul( res3, vec_splat( scale, 3 ) );
2101 /* determine case and select answer */
2103 xx = vec_splat( col0, 0 );
2104 yy = vec_splat( col1, 1 );
2105 zz = vec_splat( col2, 2 );
2106 res = vec_sel( res0, res1, vec_cmpgt( yy, xx ) );
2107 res = vec_sel( res, res2, vec_and( vec_cmpgt( zz, xx ), vec_cmpgt( zz, yy ) ) );
2108 res = vec_sel( res, res3, vec_cmpgt( vec_splat( diagSum, 0 ), _mm_setzero_ps() ) );
2112 VECTORMATH_FORCE_INLINE const Matrix3 outer( const Vector3 &tfrm0, const Vector3 &tfrm1 )
2115 ( tfrm0 * tfrm1.getX( ) ),
2116 ( tfrm0 * tfrm1.getY( ) ),
2117 ( tfrm0 * tfrm1.getZ( ) )
2121 VECTORMATH_FORCE_INLINE const Matrix4 outer( const Vector4 &tfrm0, const Vector4 &tfrm1 )
2124 ( tfrm0 * tfrm1.getX( ) ),
2125 ( tfrm0 * tfrm1.getY( ) ),
2126 ( tfrm0 * tfrm1.getZ( ) ),
2127 ( tfrm0 * tfrm1.getW( ) )
2131 VECTORMATH_FORCE_INLINE const Vector3 rowMul( const Vector3 &vec, const Matrix3 & mat )
2133 __m128 tmp0, tmp1, mcol0, mcol1, mcol2, res;
2134 __m128 xxxx, yyyy, zzzz;
2135 tmp0 = vec_mergeh( mat.getCol0().get128(), mat.getCol2().get128() );
2136 tmp1 = vec_mergel( mat.getCol0().get128(), mat.getCol2().get128() );
2137 xxxx = vec_splat( vec.get128(), 0 );
2138 mcol0 = vec_mergeh( tmp0, mat.getCol1().get128() );
2139 //mcol1 = vec_perm( tmp0, mat.getCol1().get128(), _VECTORMATH_PERM_ZBWX );
2140 VM_ATTRIBUTE_ALIGN16 unsigned int select_y[4] = {0, 0xffffffff, 0, 0};
2141 mcol1 = _mm_shuffle_ps( tmp0, tmp0, _MM_SHUFFLE(0,3,2,2));
2142 mcol1 = vec_sel(mcol1, mat.getCol1().get128(), select_y);
2143 //mcol2 = vec_perm( tmp1, mat.getCol1().get128(), _VECTORMATH_PERM_XCYX );
2144 mcol2 = _mm_shuffle_ps( tmp1, tmp1, _MM_SHUFFLE(0,1,1,0));
2145 mcol2 = vec_sel(mcol2, vec_splat(mat.getCol1().get128(), 2), select_y);
2146 yyyy = vec_splat( vec.get128(), 1 );
2147 res = vec_mul( mcol0, xxxx );
2148 zzzz = vec_splat( vec.get128(), 2 );
2149 res = vec_madd( mcol1, yyyy, res );
2150 res = vec_madd( mcol2, zzzz, res );
2151 return Vector3( res );
2154 VECTORMATH_FORCE_INLINE const Matrix3 crossMatrix( const Vector3 &vec )
2156 __m128 neg, res0, res1, res2;
2157 neg = negatef4( vec.get128() );
2158 VM_ATTRIBUTE_ALIGN16 unsigned int select_x[4] = {0xffffffff, 0, 0, 0};
2159 VM_ATTRIBUTE_ALIGN16 unsigned int select_y[4] = {0, 0xffffffff, 0, 0};
2160 VM_ATTRIBUTE_ALIGN16 unsigned int select_z[4] = {0, 0, 0xffffffff, 0};
2161 //res0 = vec_perm( vec.get128(), neg, _VECTORMATH_PERM_XZBX );
2162 res0 = _mm_shuffle_ps( vec.get128(), vec.get128(), _MM_SHUFFLE(0,2,2,0) );
2163 res0 = vec_sel(res0, vec_splat(neg, 1), select_z);
2164 //res1 = vec_perm( vec.get128(), neg, _VECTORMATH_PERM_CXXX );
2165 res1 = vec_sel(vec_splat(vec.get128(), 0), vec_splat(neg, 2), select_x);
2166 //res2 = vec_perm( vec.get128(), neg, _VECTORMATH_PERM_YAXX );
2167 res2 = _mm_shuffle_ps( vec.get128(), vec.get128(), _MM_SHUFFLE(0,0,1,1) );
2168 res2 = vec_sel(res2, vec_splat(neg, 0), select_y);
2169 VM_ATTRIBUTE_ALIGN16 unsigned int filter_x[4] = {0, 0xffffffff, 0xffffffff, 0xffffffff};
2170 VM_ATTRIBUTE_ALIGN16 unsigned int filter_y[4] = {0xffffffff, 0, 0xffffffff, 0xffffffff};
2171 VM_ATTRIBUTE_ALIGN16 unsigned int filter_z[4] = {0xffffffff, 0xffffffff, 0, 0xffffffff};
2172 res0 = vec_and( res0, _mm_load_ps((float *)filter_x ) );
2173 res1 = vec_and( res1, _mm_load_ps((float *)filter_y ) );
2174 res2 = vec_and( res2, _mm_load_ps((float *)filter_z ) ); // TODO: Use selects?
2182 VECTORMATH_FORCE_INLINE const Matrix3 crossMatrixMul( const Vector3 &vec, const Matrix3 & mat )
2184 return Matrix3( cross( vec, mat.getCol0() ), cross( vec, mat.getCol1() ), cross( vec, mat.getCol2() ) );
2188 } // namespace Vectormath