Extras/vectormathlibrary/include/vectormath/spu/cpp/mat_aos.h

   1 /*
   2    Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
   3    All rights reserved.
   4
   5    Redistribution and use in source and binary forms,
   6    with or without modification, are permitted provided that the
   7    following conditions are met:
   8     * Redistributions of source code must retain the above copyright
   9       notice, this list of conditions and the following disclaimer.
  10     * Redistributions in binary form must reproduce the above copyright
  11       notice, this list of conditions and the following disclaimer in the
  12       documentation and/or other materials provided with the distribution.
  13     * Neither the name of the Sony Computer Entertainment Inc nor the names
  14       of its contributors may be used to endorse or promote products derived
  15       from this software without specific prior written permission.
  16
  17    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  18    AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  19    IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  20    ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  21    LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  22    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  23    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  24    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  25    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  26    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  27    POSSIBILITY OF SUCH DAMAGE.
  28 */
  29
  30 #ifndef _VECTORMATH_MAT_AOS_CPP_H
  31 #define _VECTORMATH_MAT_AOS_CPP_H
  32
  33 namespace Vectormath {
  34 namespace Aos {
  35
  36 //-----------------------------------------------------------------------------
  37 // Constants
  38 // for shuffles, words are labeled [x,y,z,w] [a,b,c,d]
  39
  40 #define _VECTORMATH_SHUF_XAYB ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_A, _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_B })
  41 #define _VECTORMATH_SHUF_ZCWD ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_C, _VECTORMATH_SHUF_W, _VECTORMATH_SHUF_D })
  42 #define _VECTORMATH_SHUF_ZBW0 ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_B, _VECTORMATH_SHUF_W, _VECTORMATH_SHUF_0 })
  43 #define _VECTORMATH_SHUF_XCY0 ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_C, _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_0 })
  44 #define _VECTORMATH_SHUF_XYAB ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_A, _VECTORMATH_SHUF_B })
  45 #define _VECTORMATH_SHUF_ZWCD ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_W, _VECTORMATH_SHUF_C, _VECTORMATH_SHUF_D })
  46 #define _VECTORMATH_SHUF_0ZB0 ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_0, _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_B, _VECTORMATH_SHUF_0 })
  47 #define _VECTORMATH_SHUF_C0X0 ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_C, _VECTORMATH_SHUF_0, _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_0 })
  48 #define _VECTORMATH_SHUF_YA00 ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_A, _VECTORMATH_SHUF_0, _VECTORMATH_SHUF_0 })
  49 #define _VECTORMATH_SHUF_XAZC ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_A, _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_C })
  50 #define _VECTORMATH_SHUF_YXWZ ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_W, _VECTORMATH_SHUF_Z })
  51 #define _VECTORMATH_SHUF_YBWD ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_B, _VECTORMATH_SHUF_W, _VECTORMATH_SHUF_D })
  52 #define _VECTORMATH_SHUF_XYCX ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_C, _VECTORMATH_SHUF_X })
  53 #define _VECTORMATH_SHUF_YCXY ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_C, _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_Y })
  54 #define _VECTORMATH_SHUF_CXYC ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_C, _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_C })
  55 #define _VECTORMATH_SHUF_ZAY0 ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_A, _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_0 })
  56 #define _VECTORMATH_SHUF_BZX0 ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_B, _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_0 })
  57 #define _VECTORMATH_SHUF_0ZYA ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_0, _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_A })
  58 #define _VECTORMATH_SHUF_Z0XB ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_0, _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_B })
  59 #define _VECTORMATH_SHUF_YX0C ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_0, _VECTORMATH_SHUF_C })
  60 #define _VECTORMATH_SHUF_CZD0 ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_C, _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_D, _VECTORMATH_SHUF_0 })
  61 #define _VECTORMATH_SHUF_BBY0 ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_B, _VECTORMATH_SHUF_B, _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_0 })
  62 #define _VECTORMATH_PI_OVER_2 1.570796327f
  63
  64 //-----------------------------------------------------------------------------
  65 // Definitions
  66
  67 inline Matrix3::Matrix3( const Matrix3 & mat )
  68 {
  69     mCol0 = mat.mCol0;
  70     mCol1 = mat.mCol1;
  71     mCol2 = mat.mCol2;
  72 }
  73
  74 inline Matrix3::Matrix3( float scalar )
  75 {
  76     mCol0 = Vector3( scalar );
  77     mCol1 = Vector3( scalar );
  78     mCol2 = Vector3( scalar );
  79 }
  80
  81 inline Matrix3::Matrix3( Quat unitQuat )
  82 {
  83     vec_float4 xyzw_2, wwww, yzxw, zxyw, yzxw_2, zxyw_2;
  84     vec_float4 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5;
  85     vec_uchar16 shuffle_wwww = (vec_uchar16)spu_splats((int)0x0c0d0e0f);
  86     vec_uint4 select_x = (vec_uint4)spu_maskb(0xf000);
  87     vec_uint4 select_z = (vec_uint4)spu_maskb(0x00f0);
  88     xyzw_2 = spu_add( unitQuat.get128(), unitQuat.get128() );
  89     wwww = spu_shuffle( unitQuat.get128(), unitQuat.get128(), shuffle_wwww );
  90     yzxw = spu_shuffle( unitQuat.get128(), unitQuat.get128(), _VECTORMATH_SHUF_YZXW );
  91     zxyw = spu_shuffle( unitQuat.get128(), unitQuat.get128(), _VECTORMATH_SHUF_ZXYW );
  92     yzxw_2 = spu_shuffle( xyzw_2, xyzw_2, _VECTORMATH_SHUF_YZXW );
  93     zxyw_2 = spu_shuffle( xyzw_2, xyzw_2, _VECTORMATH_SHUF_ZXYW );
  94     tmp0 = spu_mul( yzxw_2, wwww );
  95     tmp1 = spu_nmsub( yzxw, yzxw_2, spu_splats(1.0f) );
  96     tmp2 = spu_mul( yzxw, xyzw_2 );
  97     tmp0 = spu_madd( zxyw, xyzw_2, tmp0 );
  98     tmp1 = spu_nmsub( zxyw, zxyw_2, tmp1 );
  99     tmp2 = spu_nmsub( zxyw_2, wwww, tmp2 );
 100     tmp3 = spu_sel( tmp0, tmp1, select_x );
 101     tmp4 = spu_sel( tmp1, tmp2, select_x );
 102     tmp5 = spu_sel( tmp2, tmp0, select_x );
 103     mCol0 = Vector3( spu_sel( tmp3, tmp2, select_z ) );
 104     mCol1 = Vector3( spu_sel( tmp4, tmp0, select_z ) );
 105     mCol2 = Vector3( spu_sel( tmp5, tmp1, select_z ) );
 106 }
 107
 108 inline Matrix3::Matrix3( Vector3 _col0, Vector3 _col1, Vector3 _col2 )
 109 {
 110     mCol0 = _col0;
 111     mCol1 = _col1;
 112     mCol2 = _col2;
 113 }
 114
 115 inline Matrix3 & Matrix3::setCol0( Vector3 _col0 )
 116 {
 117     mCol0 = _col0;
 118     return *this;
 119 }
 120
 121 inline Matrix3 & Matrix3::setCol1( Vector3 _col1 )
 122 {
 123     mCol1 = _col1;
 124     return *this;
 125 }
 126
 127 inline Matrix3 & Matrix3::setCol2( Vector3 _col2 )
 128 {
 129     mCol2 = _col2;
 130     return *this;
 131 }
 132
 133 inline Matrix3 & Matrix3::setCol( int col, Vector3 vec )
 134 {
 135     *(&mCol0 + col) = vec;
 136     return *this;
 137 }
 138
 139 inline Matrix3 & Matrix3::setRow( int row, Vector3 vec )
 140 {
 141     mCol0.setElem( row, vec.getElem( 0 ) );
 142     mCol1.setElem( row, vec.getElem( 1 ) );
 143     mCol2.setElem( row, vec.getElem( 2 ) );
 144     return *this;
 145 }
 146
 147 inline Matrix3 & Matrix3::setElem( int col, int row, float val )
 148 {
 149     (*this)[col].setElem(row, val);
 150     return *this;
 151 }
 152
 153 inline float Matrix3::getElem( int col, int row ) const
 154 {
 155     return this->getCol( col ).getElem( row );
 156 }
 157
 158 inline const Vector3 Matrix3::getCol0( ) const
 159 {
 160     return mCol0;
 161 }
 162
 163 inline const Vector3 Matrix3::getCol1( ) const
 164 {
 165     return mCol1;
 166 }
 167
 168 inline const Vector3 Matrix3::getCol2( ) const
 169 {
 170     return mCol2;
 171 }
 172
 173 inline const Vector3 Matrix3::getCol( int col ) const
 174 {
 175     return *(&mCol0 + col);
 176 }
 177
 178 inline const Vector3 Matrix3::getRow( int row ) const
 179 {
 180     return Vector3( mCol0.getElem( row ), mCol1.getElem( row ), mCol2.getElem( row ) );
 181 }
 182
 183 inline Vector3 & Matrix3::operator []( int col )
 184 {
 185     return *(&mCol0 + col);
 186 }
 187
 188 inline const Vector3 Matrix3::operator []( int col ) const
 189 {
 190     return *(&mCol0 + col);
 191 }
 192
 193 inline Matrix3 & Matrix3::operator =( const Matrix3 & mat )
 194 {
 195     mCol0 = mat.mCol0;
 196     mCol1 = mat.mCol1;
 197     mCol2 = mat.mCol2;
 198     return *this;
 199 }
 200
 201 inline const Matrix3 transpose( const Matrix3 & mat )
 202 {
 203     vec_float4 tmp0, tmp1, res0, res1, res2;
 204     tmp0 = spu_shuffle( mat.getCol0().get128(), mat.getCol2().get128(), _VECTORMATH_SHUF_XAYB );
 205     tmp1 = spu_shuffle( mat.getCol0().get128(), mat.getCol2().get128(), _VECTORMATH_SHUF_ZCWD );
 206     res0 = spu_shuffle( tmp0, mat.getCol1().get128(), _VECTORMATH_SHUF_XAYB );
 207     res1 = spu_shuffle( tmp0, mat.getCol1().get128(), _VECTORMATH_SHUF_ZBW0 );
 208     res2 = spu_shuffle( tmp1, mat.getCol1().get128(), _VECTORMATH_SHUF_XCY0 );
 209     return Matrix3(
 210         Vector3( res0 ),
 211         Vector3( res1 ),
 212         Vector3( res2 )
 213     );
 214 }
 215
 216 inline const Matrix3 inverse( const Matrix3 & mat )
 217 {
 218     vec_float4 tmp0, tmp1, tmp2, tmp3, tmp4, dot, invdet, inv0, inv1, inv2;
 219     tmp2 = _vmathVfCross( mat.getCol0().get128(), mat.getCol1().get128() );
 220     tmp0 = _vmathVfCross( mat.getCol1().get128(), mat.getCol2().get128() );
 221     tmp1 = _vmathVfCross( mat.getCol2().get128(), mat.getCol0().get128() );
 222     dot = _vmathVfDot3( tmp2, mat.getCol2().get128() );
 223     dot = spu_shuffle( dot, dot, (vec_uchar16)spu_splats(0x00010203) );
 224     invdet = recipf4( dot );
 225     tmp3 = spu_shuffle( tmp0, tmp2, _VECTORMATH_SHUF_XAYB );
 226     tmp4 = spu_shuffle( tmp0, tmp2, _VECTORMATH_SHUF_ZCWD );
 227     inv0 = spu_shuffle( tmp3, tmp1, _VECTORMATH_SHUF_XAYB );
 228     inv1 = spu_shuffle( tmp3, tmp1, _VECTORMATH_SHUF_ZBW0 );
 229     inv2 = spu_shuffle( tmp4, tmp1, _VECTORMATH_SHUF_XCY0 );
 230     inv0 = spu_mul( inv0, invdet );
 231     inv1 = spu_mul( inv1, invdet );
 232     inv2 = spu_mul( inv2, invdet );
 233     return Matrix3(
 234         Vector3( inv0 ),
 235         Vector3( inv1 ),
 236         Vector3( inv2 )
 237     );
 238 }
 239
 240 inline float determinant( const Matrix3 & mat )
 241 {
 242     return dot( mat.getCol2(), cross( mat.getCol0(), mat.getCol1() ) );
 243 }
 244
 245 inline const Matrix3 Matrix3::operator +( const Matrix3 & mat ) const
 246 {
 247     return Matrix3(
 248         ( mCol0 + mat.mCol0 ),
 249         ( mCol1 + mat.mCol1 ),
 250         ( mCol2 + mat.mCol2 )
 251     );
 252 }
 253
 254 inline const Matrix3 Matrix3::operator -( const Matrix3 & mat ) const
 255 {
 256     return Matrix3(
 257         ( mCol0 - mat.mCol0 ),
 258         ( mCol1 - mat.mCol1 ),
 259         ( mCol2 - mat.mCol2 )
 260     );
 261 }
 262
 263 inline Matrix3 & Matrix3::operator +=( const Matrix3 & mat )
 264 {
 265     *this = *this + mat;
 266     return *this;
 267 }
 268
 269 inline Matrix3 & Matrix3::operator -=( const Matrix3 & mat )
 270 {
 271     *this = *this - mat;
 272     return *this;
 273 }
 274
 275 inline const Matrix3 Matrix3::operator -( ) const
 276 {
 277     return Matrix3(
 278         ( -mCol0 ),
 279         ( -mCol1 ),
 280         ( -mCol2 )
 281     );
 282 }
 283
 284 inline const Matrix3 absPerElem( const Matrix3 & mat )
 285 {
 286     return Matrix3(
 287         absPerElem( mat.getCol0() ),
 288         absPerElem( mat.getCol1() ),
 289         absPerElem( mat.getCol2() )
 290     );
 291 }
 292
 293 inline const Matrix3 Matrix3::operator *( float scalar ) const
 294 {
 295     return Matrix3(
 296         ( mCol0 * scalar ),
 297         ( mCol1 * scalar ),
 298         ( mCol2 * scalar )
 299     );
 300 }
 301
 302 inline Matrix3 & Matrix3::operator *=( float scalar )
 303 {
 304     *this = *this * scalar;
 305     return *this;
 306 }
 307
 308 inline const Matrix3 operator *( float scalar, const Matrix3 & mat )
 309 {
 310     return mat * scalar;
 311 }
 312
 313 inline const Vector3 Matrix3::operator *( Vector3 vec ) const
 314 {
 315     vec_float4 res;
 316     vec_float4 xxxx, yyyy, zzzz;
 317     vec_uchar16 shuffle_xxxx = (vec_uchar16)spu_splats((int)0x00010203);
 318     vec_uchar16 shuffle_yyyy = (vec_uchar16)spu_splats((int)0x04050607);
 319     vec_uchar16 shuffle_zzzz = (vec_uchar16)spu_splats((int)0x08090a0b);
 320     xxxx = spu_shuffle( vec.get128(), vec.get128(), shuffle_xxxx );
 321     yyyy = spu_shuffle( vec.get128(), vec.get128(), shuffle_yyyy );
 322     zzzz = spu_shuffle( vec.get128(), vec.get128(), shuffle_zzzz );
 323     res = spu_mul( mCol0.get128(), xxxx );
 324     res = spu_madd( mCol1.get128(), yyyy, res );
 325     res = spu_madd( mCol2.get128(), zzzz, res );
 326     return Vector3( res );
 327 }
 328
 329 inline const Matrix3 Matrix3::operator *( const Matrix3 & mat ) const
 330 {
 331     return Matrix3(
 332         ( *this * mat.mCol0 ),
 333         ( *this * mat.mCol1 ),
 334         ( *this * mat.mCol2 )
 335     );
 336 }
 337
 338 inline Matrix3 & Matrix3::operator *=( const Matrix3 & mat )
 339 {
 340     *this = *this * mat;
 341     return *this;
 342 }
 343
 344 inline const Matrix3 mulPerElem( const Matrix3 & mat0, const Matrix3 & mat1 )
 345 {
 346     return Matrix3(
 347         mulPerElem( mat0.getCol0(), mat1.getCol0() ),
 348         mulPerElem( mat0.getCol1(), mat1.getCol1() ),
 349         mulPerElem( mat0.getCol2(), mat1.getCol2() )
 350     );
 351 }
 352
 353 inline const Matrix3 Matrix3::identity( )
 354 {
 355     return Matrix3(
 356         Vector3::xAxis( ),
 357         Vector3::yAxis( ),
 358         Vector3::zAxis( )
 359     );
 360 }
 361
 362 inline const Matrix3 Matrix3::rotationX( float radians )
 363 {
 364     vec_float4 s, c, res1, res2;
 365     vec_uint4 select_y, select_z;
 366     vec_float4 zero;
 367     select_y = (vec_uint4)spu_maskb(0x0f00);
 368     select_z = (vec_uint4)spu_maskb(0x00f0);
 369     zero = spu_splats(0.0f);
 370     sincosf4( spu_splats(radians), &s, &c );
 371     res1 = spu_sel( zero, c, select_y );
 372     res1 = spu_sel( res1, s, select_z );
 373     res2 = spu_sel( zero, negatef4(s), select_y );
 374     res2 = spu_sel( res2, c, select_z );
 375     return Matrix3(
 376         Vector3::xAxis( ),
 377         Vector3( res1 ),
 378         Vector3( res2 )
 379     );
 380 }
 381
 382 inline const Matrix3 Matrix3::rotationY( float radians )
 383 {
 384     vec_float4 s, c, res0, res2;
 385     vec_uint4 select_x, select_z;
 386     vec_float4 zero;
 387     select_x = (vec_uint4)spu_maskb(0xf000);
 388     select_z = (vec_uint4)spu_maskb(0x00f0);
 389     zero = spu_splats(0.0f);
 390     sincosf4( spu_splats(radians), &s, &c );
 391     res0 = spu_sel( zero, c, select_x );
 392     res0 = spu_sel( res0, negatef4(s), select_z );
 393     res2 = spu_sel( zero, s, select_x );
 394     res2 = spu_sel( res2, c, select_z );
 395     return Matrix3(
 396         Vector3( res0 ),
 397         Vector3::yAxis( ),
 398         Vector3( res2 )
 399     );
 400 }
 401
 402 inline const Matrix3 Matrix3::rotationZ( float radians )
 403 {
 404     vec_float4 s, c, res0, res1;
 405     vec_uint4 select_x, select_y;
 406     vec_float4 zero;
 407     select_x = (vec_uint4)spu_maskb(0xf000);
 408     select_y = (vec_uint4)spu_maskb(0x0f00);
 409     zero = spu_splats(0.0f);
 410     sincosf4( spu_splats(radians), &s, &c );
 411     res0 = spu_sel( zero, c, select_x );
 412     res0 = spu_sel( res0, s, select_y );
 413     res1 = spu_sel( zero, negatef4(s), select_x );
 414     res1 = spu_sel( res1, c, select_y );
 415     return Matrix3(
 416         Vector3( res0 ),
 417         Vector3( res1 ),
 418         Vector3::zAxis( )
 419     );
 420 }
 421
 422 inline const Matrix3 Matrix3::rotationZYX( Vector3 radiansXYZ )
 423 {
 424     vec_float4 angles, s, negS, c, X0, X1, Y0, Y1, Z0, Z1, tmp;
 425     vec_uchar16 shuffle_xxxx = (vec_uchar16)spu_splats((int)0x00010203);
 426     angles = radiansXYZ.get128();
 427     angles = spu_insert( 0.0f, angles, 3 );
 428     sincosf4( angles, &s, &c );
 429     negS = negatef4( s );
 430     Z0 = spu_shuffle( s, c, _VECTORMATH_SHUF_CZD0 );
 431     Z1 = spu_shuffle( c, negS, _VECTORMATH_SHUF_CZD0 );
 432     Y0 = spu_shuffle( negS, c, _VECTORMATH_SHUF_BBY0 );
 433     Y1 = spu_shuffle( c, s, _VECTORMATH_SHUF_BBY0 );
 434     X0 = spu_shuffle( s, s, shuffle_xxxx );
 435     X1 = spu_shuffle( c, c, shuffle_xxxx );
 436     tmp = spu_mul( Z0, Y1 );
 437     return Matrix3(
 438         Vector3( spu_mul( Z0, Y0 ) ),
 439         Vector3( spu_madd( Z1, X1, spu_mul( tmp, X0 ) ) ),
 440         Vector3( spu_nmsub( Z1, X0, spu_mul( tmp, X1 ) ) )
 441     );
 442 }
 443
 444 inline const Matrix3 Matrix3::rotation( float radians, Vector3 unitVec )
 445 {
 446     vec_float4 axis, s, c, oneMinusC, axisS, negAxisS, xxxx, yyyy, zzzz, tmp0, tmp1, tmp2;
 447     vec_uchar16 shuffle_xxxx = (vec_uchar16)spu_splats((int)0x00010203);
 448     vec_uchar16 shuffle_yyyy = (vec_uchar16)spu_splats((int)0x04050607);
 449     vec_uchar16 shuffle_zzzz = (vec_uchar16)spu_splats((int)0x08090a0b);
 450     axis = unitVec.get128();
 451     sincosf4( spu_splats( radians ), &s, &c );
 452     xxxx = spu_shuffle( axis, axis, shuffle_xxxx );
 453     yyyy = spu_shuffle( axis, axis, shuffle_yyyy );
 454     zzzz = spu_shuffle( axis, axis, shuffle_zzzz );
 455     oneMinusC = spu_sub( spu_splats(1.0f), c );
 456     axisS = spu_mul( axis, s );
 457     negAxisS = negatef4( axisS );
 458     tmp0 = spu_shuffle( axisS, negAxisS, _VECTORMATH_SHUF_0ZB0 );
 459     tmp1 = spu_shuffle( axisS, negAxisS, _VECTORMATH_SHUF_C0X0 );
 460     tmp2 = spu_shuffle( axisS, negAxisS, _VECTORMATH_SHUF_YA00 );
 461     tmp0 = spu_sel( tmp0, c, (vec_uint4)spu_maskb(0xf000) );
 462     tmp1 = spu_sel( tmp1, c, (vec_uint4)spu_maskb(0x0f00) );
 463     tmp2 = spu_sel( tmp2, c, (vec_uint4)spu_maskb(0x00f0) );
 464     return Matrix3(
 465         Vector3( spu_madd( spu_mul( axis, xxxx ), oneMinusC, tmp0 ) ),
 466         Vector3( spu_madd( spu_mul( axis, yyyy ), oneMinusC, tmp1 ) ),
 467         Vector3( spu_madd( spu_mul( axis, zzzz ), oneMinusC, tmp2 ) )
 468     );
 469 }
 470
 471 inline const Matrix3 Matrix3::rotation( Quat unitQuat )
 472 {
 473     return Matrix3( unitQuat );
 474 }
 475
 476 inline const Matrix3 Matrix3::scale( Vector3 scaleVec )
 477 {
 478     vec_float4 zero = spu_splats(0.0f);
 479     return Matrix3(
 480         Vector3( spu_sel( zero, scaleVec.get128(), (vec_uint4)spu_maskb(0xf000) ) ),
 481         Vector3( spu_sel( zero, scaleVec.get128(), (vec_uint4)spu_maskb(0x0f00) ) ),
 482         Vector3( spu_sel( zero, scaleVec.get128(), (vec_uint4)spu_maskb(0x00f0) ) )
 483     );
 484 }
 485
 486 inline const Matrix3 appendScale( const Matrix3 & mat, Vector3 scaleVec )
 487 {
 488     return Matrix3(
 489         ( mat.getCol0() * scaleVec.getX( ) ),
 490         ( mat.getCol1() * scaleVec.getY( ) ),
 491         ( mat.getCol2() * scaleVec.getZ( ) )
 492     );
 493 }
 494
 495 inline const Matrix3 prependScale( Vector3 scaleVec, const Matrix3 & mat )
 496 {
 497     return Matrix3(
 498         mulPerElem( mat.getCol0(), scaleVec ),
 499         mulPerElem( mat.getCol1(), scaleVec ),
 500         mulPerElem( mat.getCol2(), scaleVec )
 501     );
 502 }
 503
 504 inline const Matrix3 select( const Matrix3 & mat0, const Matrix3 & mat1, bool select1 )
 505 {
 506     return Matrix3(
 507         select( mat0.getCol0(), mat1.getCol0(), select1 ),
 508         select( mat0.getCol1(), mat1.getCol1(), select1 ),
 509         select( mat0.getCol2(), mat1.getCol2(), select1 )
 510     );
 511 }
 512
 513 #ifdef _VECTORMATH_DEBUG
 514
 515 inline void print( const Matrix3 & mat )
 516 {
 517     print( mat.getRow( 0 ) );
 518     print( mat.getRow( 1 ) );
 519     print( mat.getRow( 2 ) );
 520 }
 521
 522 inline void print( const Matrix3 & mat, const char * name )
 523 {
 524     printf("%s:\n", name);
 525     print( mat );
 526 }
 527
 528 #endif
 529
 530 inline Matrix4::Matrix4( const Matrix4 & mat )
 531 {
 532     mCol0 = mat.mCol0;
 533     mCol1 = mat.mCol1;
 534     mCol2 = mat.mCol2;
 535     mCol3 = mat.mCol3;
 536 }
 537
 538 inline Matrix4::Matrix4( float scalar )
 539 {
 540     mCol0 = Vector4( scalar );
 541     mCol1 = Vector4( scalar );
 542     mCol2 = Vector4( scalar );
 543     mCol3 = Vector4( scalar );
 544 }
 545
 546 inline Matrix4::Matrix4( const Transform3 & mat )
 547 {
 548     mCol0 = Vector4( mat.getCol0(), 0.0f );
 549     mCol1 = Vector4( mat.getCol1(), 0.0f );
 550     mCol2 = Vector4( mat.getCol2(), 0.0f );
 551     mCol3 = Vector4( mat.getCol3(), 1.0f );
 552 }
 553
 554 inline Matrix4::Matrix4( Vector4 _col0, Vector4 _col1, Vector4 _col2, Vector4 _col3 )
 555 {
 556     mCol0 = _col0;
 557     mCol1 = _col1;
 558     mCol2 = _col2;
 559     mCol3 = _col3;
 560 }
 561
 562 inline Matrix4::Matrix4( const Matrix3 & mat, Vector3 translateVec )
 563 {
 564     mCol0 = Vector4( mat.getCol0(), 0.0f );
 565     mCol1 = Vector4( mat.getCol1(), 0.0f );
 566     mCol2 = Vector4( mat.getCol2(), 0.0f );
 567     mCol3 = Vector4( translateVec, 1.0f );
 568 }
 569
 570 inline Matrix4::Matrix4( Quat unitQuat, Vector3 translateVec )
 571 {
 572     Matrix3 mat;
 573     mat = Matrix3( unitQuat );
 574     mCol0 = Vector4( mat.getCol0(), 0.0f );
 575     mCol1 = Vector4( mat.getCol1(), 0.0f );
 576     mCol2 = Vector4( mat.getCol2(), 0.0f );
 577     mCol3 = Vector4( translateVec, 1.0f );
 578 }
 579
 580 inline Matrix4 & Matrix4::setCol0( Vector4 _col0 )
 581 {
 582     mCol0 = _col0;
 583     return *this;
 584 }
 585
 586 inline Matrix4 & Matrix4::setCol1( Vector4 _col1 )
 587 {
 588     mCol1 = _col1;
 589     return *this;
 590 }
 591
 592 inline Matrix4 & Matrix4::setCol2( Vector4 _col2 )
 593 {
 594     mCol2 = _col2;
 595     return *this;
 596 }
 597
 598 inline Matrix4 & Matrix4::setCol3( Vector4 _col3 )
 599 {
 600     mCol3 = _col3;
 601     return *this;
 602 }
 603
 604 inline Matrix4 & Matrix4::setCol( int col, Vector4 vec )
 605 {
 606     *(&mCol0 + col) = vec;
 607     return *this;
 608 }
 609
 610 inline Matrix4 & Matrix4::setRow( int row, Vector4 vec )
 611 {
 612     mCol0.setElem( row, vec.getElem( 0 ) );
 613     mCol1.setElem( row, vec.getElem( 1 ) );
 614     mCol2.setElem( row, vec.getElem( 2 ) );
 615     mCol3.setElem( row, vec.getElem( 3 ) );
 616     return *this;
 617 }
 618
 619 inline Matrix4 & Matrix4::setElem( int col, int row, float val )
 620 {
 621     (*this)[col].setElem(row, val);
 622     return *this;
 623 }
 624
 625 inline float Matrix4::getElem( int col, int row ) const
 626 {
 627     return this->getCol( col ).getElem( row );
 628 }
 629
 630 inline const Vector4 Matrix4::getCol0( ) const
 631 {
 632     return mCol0;
 633 }
 634
 635 inline const Vector4 Matrix4::getCol1( ) const
 636 {
 637     return mCol1;
 638 }
 639
 640 inline const Vector4 Matrix4::getCol2( ) const
 641 {
 642     return mCol2;
 643 }
 644
 645 inline const Vector4 Matrix4::getCol3( ) const
 646 {
 647     return mCol3;
 648 }
 649
 650 inline const Vector4 Matrix4::getCol( int col ) const
 651 {
 652     return *(&mCol0 + col);
 653 }
 654
 655 inline const Vector4 Matrix4::getRow( int row ) const
 656 {
 657     return Vector4( mCol0.getElem( row ), mCol1.getElem( row ), mCol2.getElem( row ), mCol3.getElem( row ) );
 658 }
 659
 660 inline Vector4 & Matrix4::operator []( int col )
 661 {
 662     return *(&mCol0 + col);
 663 }
 664
 665 inline const Vector4 Matrix4::operator []( int col ) const
 666 {
 667     return *(&mCol0 + col);
 668 }
 669
 670 inline Matrix4 & Matrix4::operator =( const Matrix4 & mat )
 671 {
 672     mCol0 = mat.mCol0;
 673     mCol1 = mat.mCol1;
 674     mCol2 = mat.mCol2;
 675     mCol3 = mat.mCol3;
 676     return *this;
 677 }
 678
 679 inline const Matrix4 transpose( const Matrix4 & mat )
 680 {
 681     vec_float4 tmp0, tmp1, tmp2, tmp3, res0, res1, res2, res3;
 682     tmp0 = spu_shuffle( mat.getCol0().get128(), mat.getCol2().get128(), _VECTORMATH_SHUF_XAYB );
 683     tmp1 = spu_shuffle( mat.getCol1().get128(), mat.getCol3().get128(), _VECTORMATH_SHUF_XAYB );
 684     tmp2 = spu_shuffle( mat.getCol0().get128(), mat.getCol2().get128(), _VECTORMATH_SHUF_ZCWD );
 685     tmp3 = spu_shuffle( mat.getCol1().get128(), mat.getCol3().get128(), _VECTORMATH_SHUF_ZCWD );
 686     res0 = spu_shuffle( tmp0, tmp1, _VECTORMATH_SHUF_XAYB );
 687     res1 = spu_shuffle( tmp0, tmp1, _VECTORMATH_SHUF_ZCWD );
 688     res2 = spu_shuffle( tmp2, tmp3, _VECTORMATH_SHUF_XAYB );
 689     res3 = spu_shuffle( tmp2, tmp3, _VECTORMATH_SHUF_ZCWD );
 690     return Matrix4(
 691         Vector4( res0 ),
 692         Vector4( res1 ),
 693         Vector4( res2 ),
 694         Vector4( res3 )
 695     );
 696 }
 697
 698 inline const Matrix4 inverse( const Matrix4 & mat )
 699 {
 700     /* function implementation based on code from STIDC SDK:           */
 701     /* --------------------------------------------------------------  */
 702     /* PLEASE DO NOT MODIFY THIS SECTION                               */
 703     /* This prolog section is automatically generated.                 */
 704     /*                                                                 */
 705     /* (C)Copyright                                                    */
 706     /* Sony Computer Entertainment, Inc.,                              */
 707     /* Toshiba Corporation,                                            */
 708     /* International Business Machines Corporation,                    */
 709     /* 2001,2002.                                                      */
 710     /* S/T/I Confidential Information                                  */
 711     /* --------------------------------------------------------------  */
 712     vec_float4 in0, in1, in2, in3;
 713     vec_float4 tmp0, tmp1, tmp2, tmp3;
 714     vec_float4 cof0, cof1, cof2, cof3;
 715     vec_float4 t0, t1, t2, t3;
 716     vec_float4 t01, t02, t03, t12, t23;
 717     vec_float4 t1r, t2r;
 718     vec_float4 t01r, t02r, t03r, t12r, t23r;
 719     vec_float4 t1r3, t1r3r;
 720     vec_float4 det, det1, det2, det3, invdet;
 721     in0 = mat.getCol0().get128();
 722     in1 = mat.getCol1().get128();
 723     in2 = mat.getCol2().get128();
 724     in3 = mat.getCol3().get128();
 725     /* Perform transform of the input matrix of the form:
 726      *    A B C D
 727      *    E F G H
 728      *    I J K L
 729      *    M N O P
 730      *
 731      * The pseudo transpose of the input matrix is trans:
 732      *    A E I M
 733      *    J N B F
 734      *    C G K O
 735      *    L P D H
 736      */
 737     tmp0 = spu_shuffle(in0, in1, _VECTORMATH_SHUF_XAZC);    /* A E C G */
 738     tmp1 = spu_shuffle(in2, in3, _VECTORMATH_SHUF_XAZC);    /* I M K O */
 739     tmp2 = spu_shuffle(in0, in1, _VECTORMATH_SHUF_YBWD);    /* B F D H */
 740     tmp3 = spu_shuffle(in2, in3, _VECTORMATH_SHUF_YBWD);    /* J N L P */
 741     t0 = spu_shuffle(tmp0, tmp1, _VECTORMATH_SHUF_XYAB);    /* A E I M */
 742     t1 = spu_shuffle(tmp3, tmp2, _VECTORMATH_SHUF_XYAB);    /* J N B F */
 743     t2 = spu_shuffle(tmp0, tmp1, _VECTORMATH_SHUF_ZWCD);    /* C G K O */
 744     t3 = spu_shuffle(tmp3, tmp2, _VECTORMATH_SHUF_ZWCD);    /* L P D H */
 745     /* Generate a cofactor matrix. The computed cofactors reside in
 746      * cof0, cof1, cof2, cof3.
 747      */
 748     t23 = spu_mul(t2, t3);                        /* CL GP KD OH */
 749     t23 = spu_shuffle(t23, t23, _VECTORMATH_SHUF_YXWZ);     /* GP CL OH KD */
 750     cof0 = spu_mul(t1, t23);                      /* JGP NCL BOH FKD */
 751     cof1 = spu_mul(t0, t23);                      /* AGP ECL IOH MKD */
 752     t23r = spu_rlqwbyte(t23, 8);                  /* OH KD GP CL */
 753     cof0 = spu_msub(t1, t23r, cof0);              /* JOH NKD BGP FCL  - cof0 */
 754     cof1 = spu_msub(t0, t23r, cof1);              /* AOH EKD IGP MCL  - cof1 */
 755     cof1 = spu_rlqwbyte(cof1, 8);                 /* IGP MCL AOH EKD - IOH MKD AGP ECL */
 756
 757     t12 = spu_mul(t1, t2);                        /* JC NG BK FO */
 758     t12 = spu_shuffle(t12, t12, _VECTORMATH_SHUF_YXWZ);     /* NG JC FO BK */
 759     cof0 = spu_madd(t3, t12, cof0);               /* LNG PJC DFO HBK + cof0 */
 760     cof3 = spu_mul(t0, t12);                      /* ANG EJC IFO MBK */
 761     t12r = spu_rlqwbyte(t12, 8);                  /* FO BK NG JC */
 762     cof0 = spu_nmsub(t3, t12r, cof0);             /* cof0 - LFO PBK DNG HJC */
 763     cof3 = spu_msub(t0, t12r, cof3);              /* AFO EBK ING MJC - cof3 */
 764     cof3 = spu_rlqwbyte(cof3, 8);                 /* ING MJC AFO EBK - IFO MBK ANG EJC */
 765     t1r = spu_rlqwbyte(t1, 8);                    /* B F J N */
 766     t2r = spu_rlqwbyte(t2, 8);                    /* K O C G */
 767     t1r3 = spu_mul(t1r, t3);                      /* BL FP JD NH */
 768     t1r3 = spu_shuffle(t1r3, t1r3, _VECTORMATH_SHUF_YXWZ);  /* FP BL NH JD */
 769     cof0 = spu_madd(t2r, t1r3, cof0);             /* KFP OBL CNH GJD + cof0 */
 770     cof2 = spu_mul(t0, t1r3);                     /* AFP EBL INH MJD */
 771     t1r3r = spu_rlqwbyte(t1r3, 8);                /* NH JD FP BL */
 772     cof0 = spu_nmsub(t2r, t1r3r, cof0);           /* cof0 - KNH OJD CFP GBL */
 773     cof2 = spu_msub(t0, t1r3r, cof2);             /* ANH EJD IFP MBL - cof2 */
 774     cof2 = spu_rlqwbyte(cof2, 8);                 /* IFP MBL ANH EJD - INH MJD AFP EBL */
 775     t01 = spu_mul(t0, t1);                                /* AJ EN IB MF */
 776     t01 = spu_shuffle(t01, t01, _VECTORMATH_SHUF_YXWZ);     /* EN AJ MF IB */
 777     cof2 = spu_madd(t3, t01, cof2);               /* LEN PAJ DMF HIB + cof2 */
 778     cof3 = spu_msub(t2r, t01, cof3);              /* KEN OAJ CMF GIB - cof3 */
 779     t01r = spu_rlqwbyte(t01, 8);                  /* MF IB EN AJ */
 780     cof2 = spu_msub(t3, t01r, cof2);              /* LMF PIB DEN HAJ - cof2 */
 781     cof3 = spu_nmsub(t2r, t01r, cof3);            /* cof3 - KMF OIB CEN GAJ */
 782     t03 = spu_mul(t0, t3);                                /* AL EP ID MH */
 783     t03 = spu_shuffle(t03, t03, _VECTORMATH_SHUF_YXWZ);     /* EP AL MH ID */
 784     cof1 = spu_nmsub(t2r, t03, cof1);             /* cof1 - KEP OAL CMH GID */
 785     cof2 = spu_madd(t1, t03, cof2);               /* JEP NAL BMH FID + cof2 */
 786     t03r = spu_rlqwbyte(t03, 8);                  /* MH ID EP AL */
 787     cof1 = spu_madd(t2r, t03r, cof1);             /* KMH OID CEP GAL + cof1 */
 788     cof2 = spu_nmsub(t1, t03r, cof2);             /* cof2 - JMH NID BEP FAL */
 789     t02 = spu_mul(t0, t2r);                       /* AK EO IC MG */
 790     t02 = spu_shuffle(t02, t02, _VECTORMATH_SHUF_YXWZ);     /* E0 AK MG IC */
 791     cof1 = spu_madd(t3, t02, cof1);               /* LEO PAK DMG HIC + cof1 */
 792     cof3 = spu_nmsub(t1, t02, cof3);              /* cof3 - JEO NAK BMG FIC */
 793     t02r = spu_rlqwbyte(t02, 8);                  /* MG IC EO AK */
 794     cof1 = spu_nmsub(t3, t02r, cof1);             /* cof1 - LMG PIC DEO HAK */
 795     cof3 = spu_madd(t1, t02r, cof3);              /* JMG NIC BEO FAK + cof3 */
 796     /* Compute the determinant of the matrix
 797      *
 798      * det = sum_across(t0 * cof0);
 799      *
 800      * We perform a sum across the entire vector so that
 801      * we don't have to splat the result when multiplying the
 802      * cofactors by the inverse of the determinant.
 803      */
 804     det  = spu_mul(t0, cof0);
 805     det1 = spu_rlqwbyte(det, 4);
 806     det2 = spu_rlqwbyte(det, 8);
 807     det3 = spu_rlqwbyte(det, 12);
 808     det  = spu_add(det, det1);
 809     det2 = spu_add(det2, det3);
 810     det  = spu_add(det, det2);
 811     /* Compute the reciprocal of the determinant.
 812      */
 813     invdet = recipf4(det);
 814     /* Multiply the cofactors by the reciprocal of the determinant.
 815      */
 816     return Matrix4(
 817         Vector4( spu_mul(cof0, invdet) ),
 818         Vector4( spu_mul(cof1, invdet) ),
 819         Vector4( spu_mul(cof2, invdet) ),
 820         Vector4( spu_mul(cof3, invdet) )
 821     );
 822 }
 823
 824 inline const Matrix4 affineInverse( const Matrix4 & mat )
 825 {
 826     Transform3 affineMat;
 827     affineMat.setCol0( mat.getCol0().getXYZ( ) );
 828     affineMat.setCol1( mat.getCol1().getXYZ( ) );
 829     affineMat.setCol2( mat.getCol2().getXYZ( ) );
 830     affineMat.setCol3( mat.getCol3().getXYZ( ) );
 831     return Matrix4( inverse( affineMat ) );
 832 }
 833
 834 inline const Matrix4 orthoInverse( const Matrix4 & mat )
 835 {
 836     Transform3 affineMat;
 837     affineMat.setCol0( mat.getCol0().getXYZ( ) );
 838     affineMat.setCol1( mat.getCol1().getXYZ( ) );
 839     affineMat.setCol2( mat.getCol2().getXYZ( ) );
 840     affineMat.setCol3( mat.getCol3().getXYZ( ) );
 841     return Matrix4( orthoInverse( affineMat ) );
 842 }
 843
 844 inline float determinant( const Matrix4 & mat )
 845 {
 846     /* function implementation based on code from STIDC SDK:           */
 847     /* --------------------------------------------------------------  */
 848     /* PLEASE DO NOT MODIFY THIS SECTION                               */
 849     /* This prolog section is automatically generated.                 */
 850     /*                                                                 */
 851     /* (C)Copyright                                                    */
 852     /* Sony Computer Entertainment, Inc.,                              */
 853     /* Toshiba Corporation,                                            */
 854     /* International Business Machines Corporation,                    */
 855     /* 2001,2002.                                                      */
 856     /* S/T/I Confidential Information                                  */
 857     /* --------------------------------------------------------------  */
 858     vec_float4 in0, in1, in2, in3;
 859     vec_float4 tmp0, tmp1, tmp2, tmp3;
 860     vec_float4 cof0;
 861     vec_float4 t0, t1, t2, t3;
 862     vec_float4 t12, t23;
 863     vec_float4 t1r, t2r;
 864     vec_float4 t12r, t23r;
 865     vec_float4 t1r3, t1r3r;
 866     in0 = mat.getCol0().get128();
 867     in1 = mat.getCol1().get128();
 868     in2 = mat.getCol2().get128();
 869     in3 = mat.getCol3().get128();
 870     /* Perform transform of the input matrix of the form:
 871      *    A B C D
 872      *    E F G H
 873      *    I J K L
 874      *    M N O P
 875      *
 876      * The pseudo transpose of the input matrix is trans:
 877      *    A E I M
 878      *    J N B F
 879      *    C G K O
 880      *    L P D H
 881      */
 882     tmp0 = spu_shuffle(in0, in1, _VECTORMATH_SHUF_XAZC);    /* A E C G */
 883     tmp1 = spu_shuffle(in2, in3, _VECTORMATH_SHUF_XAZC);    /* I M K O */
 884     tmp2 = spu_shuffle(in0, in1, _VECTORMATH_SHUF_YBWD);    /* B F D H */
 885     tmp3 = spu_shuffle(in2, in3, _VECTORMATH_SHUF_YBWD);    /* J N L P */
 886     t0 = spu_shuffle(tmp0, tmp1, _VECTORMATH_SHUF_XYAB);    /* A E I M */
 887     t1 = spu_shuffle(tmp3, tmp2, _VECTORMATH_SHUF_XYAB);    /* J N B F */
 888     t2 = spu_shuffle(tmp0, tmp1, _VECTORMATH_SHUF_ZWCD);    /* C G K O */
 889     t3 = spu_shuffle(tmp3, tmp2, _VECTORMATH_SHUF_ZWCD);    /* L P D H */
 890     /* Generate a cofactor matrix. The computed cofactors reside in
 891      * cof0, cof1, cof2, cof3.
 892      */
 893     t23 = spu_mul(t2, t3);                        /* CL GP KD OH */
 894     t23 = spu_shuffle(t23, t23, _VECTORMATH_SHUF_YXWZ);     /* GP CL OH KD */
 895     cof0 = spu_mul(t1, t23);                      /* JGP NCL BOH FKD */
 896     t23r = spu_rlqwbyte(t23, 8);                  /* OH KD GP CL */
 897     cof0 = spu_msub(t1, t23r, cof0);              /* JOH NKD BGP FCL  - cof0 */
 898
 899     t12 = spu_mul(t1, t2);                        /* JC NG BK FO */
 900     t12 = spu_shuffle(t12, t12, _VECTORMATH_SHUF_YXWZ);     /* NG JC FO BK */
 901     cof0 = spu_madd(t3, t12, cof0);               /* LNG PJC DFO HBK + cof0 */
 902     t12r = spu_rlqwbyte(t12, 8);                  /* FO BK NG JC */
 903     cof0 = spu_nmsub(t3, t12r, cof0);             /* cof0 - LFO PBK DNG HJC */
 904     t1r = spu_rlqwbyte(t1, 8);                    /* B F J N */
 905     t2r = spu_rlqwbyte(t2, 8);                    /* K O C G */
 906     t1r3 = spu_mul(t1r, t3);                      /* BL FP JD NH */
 907     t1r3 = spu_shuffle(t1r3, t1r3, _VECTORMATH_SHUF_YXWZ);  /* FP BL NH JD */
 908     cof0 = spu_madd(t2r, t1r3, cof0);             /* KFP OBL CNH GJD + cof0 */
 909     t1r3r = spu_rlqwbyte(t1r3, 8);                /* NH JD FP BL */
 910     cof0 = spu_nmsub(t2r, t1r3r, cof0);           /* cof0 - KNH OJD CFP GBL */
 911     return spu_extract( _vmathVfDot4(t0,cof0), 0 );
 912 }
 913
 914 inline const Matrix4 Matrix4::operator +( const Matrix4 & mat ) const
 915 {
 916     return Matrix4(
 917         ( mCol0 + mat.mCol0 ),
 918         ( mCol1 + mat.mCol1 ),
 919         ( mCol2 + mat.mCol2 ),
 920         ( mCol3 + mat.mCol3 )
 921     );
 922 }
 923
 924 inline const Matrix4 Matrix4::operator -( const Matrix4 & mat ) const
 925 {
 926     return Matrix4(
 927         ( mCol0 - mat.mCol0 ),
 928         ( mCol1 - mat.mCol1 ),
 929         ( mCol2 - mat.mCol2 ),
 930         ( mCol3 - mat.mCol3 )
 931     );
 932 }
 933
 934 inline Matrix4 & Matrix4::operator +=( const Matrix4 & mat )
 935 {
 936     *this = *this + mat;
 937     return *this;
 938 }
 939
 940 inline Matrix4 & Matrix4::operator -=( const Matrix4 & mat )
 941 {
 942     *this = *this - mat;
 943     return *this;
 944 }
 945
 946 inline const Matrix4 Matrix4::operator -( ) const
 947 {
 948     return Matrix4(
 949         ( -mCol0 ),
 950         ( -mCol1 ),
 951         ( -mCol2 ),
 952         ( -mCol3 )
 953     );
 954 }
 955
 956 inline const Matrix4 absPerElem( const Matrix4 & mat )
 957 {
 958     return Matrix4(
 959         absPerElem( mat.getCol0() ),
 960         absPerElem( mat.getCol1() ),
 961         absPerElem( mat.getCol2() ),
 962         absPerElem( mat.getCol3() )
 963     );
 964 }
 965
 966 inline const Matrix4 Matrix4::operator *( float scalar ) const
 967 {
 968     return Matrix4(
 969         ( mCol0 * scalar ),
 970         ( mCol1 * scalar ),
 971         ( mCol2 * scalar ),
 972         ( mCol3 * scalar )
 973     );
 974 }
 975
 976 inline Matrix4 & Matrix4::operator *=( float scalar )
 977 {
 978     *this = *this * scalar;
 979     return *this;
 980 }
 981
 982 inline const Matrix4 operator *( float scalar, const Matrix4 & mat )
 983 {
 984     return mat * scalar;
 985 }
 986
 987 inline const Vector4 Matrix4::operator *( Vector4 vec ) const
 988 {
 989     vec_float4 tmp0, tmp1, res;
 990     vec_float4 xxxx, yyyy, zzzz, wwww;
 991     vec_uchar16 shuffle_xxxx = (vec_uchar16)spu_splats((int)0x00010203);
 992     vec_uchar16 shuffle_yyyy = (vec_uchar16)spu_splats((int)0x04050607);
 993     vec_uchar16 shuffle_zzzz = (vec_uchar16)spu_splats((int)0x08090a0b);
 994     vec_uchar16 shuffle_wwww = (vec_uchar16)spu_splats((int)0x0c0d0e0f);
 995     xxxx = spu_shuffle( vec.get128(), vec.get128(), shuffle_xxxx );
 996     yyyy = spu_shuffle( vec.get128(), vec.get128(), shuffle_yyyy );
 997     zzzz = spu_shuffle( vec.get128(), vec.get128(), shuffle_zzzz );
 998     wwww = spu_shuffle( vec.get128(), vec.get128(), shuffle_wwww );
 999     tmp0 = spu_mul( mCol0.get128(), xxxx );
1000     tmp1 = spu_mul( mCol1.get128(), yyyy );
1001     tmp0 = spu_madd( mCol2.get128(), zzzz, tmp0 );
1002     tmp1 = spu_madd( mCol3.get128(), wwww, tmp1 );
1003     res = spu_add( tmp0, tmp1 );
1004     return Vector4( res );
1005 }
1006
1007 inline const Vector4 Matrix4::operator *( Vector3 vec ) const
1008 {
1009     vec_float4 res;
1010     vec_float4 xxxx, yyyy, zzzz;
1011     vec_uchar16 shuffle_xxxx = (vec_uchar16)spu_splats((int)0x00010203);
1012     vec_uchar16 shuffle_yyyy = (vec_uchar16)spu_splats((int)0x04050607);
1013     vec_uchar16 shuffle_zzzz = (vec_uchar16)spu_splats((int)0x08090a0b);
1014     xxxx = spu_shuffle( vec.get128(), vec.get128(), shuffle_xxxx );
1015     yyyy = spu_shuffle( vec.get128(), vec.get128(), shuffle_yyyy );
1016     zzzz = spu_shuffle( vec.get128(), vec.get128(), shuffle_zzzz );
1017     res = spu_mul( mCol0.get128(), xxxx );
1018     res = spu_madd( mCol1.get128(), yyyy, res );
1019     res = spu_madd( mCol2.get128(), zzzz, res );
1020     return Vector4( res );
1021 }
1022
1023 inline const Vector4 Matrix4::operator *( Point3 pnt ) const
1024 {
1025     vec_float4 tmp0, tmp1, res;
1026     vec_float4 xxxx, yyyy, zzzz;
1027     vec_uchar16 shuffle_xxxx = (vec_uchar16)spu_splats((int)0x00010203);
1028     vec_uchar16 shuffle_yyyy = (vec_uchar16)spu_splats((int)0x04050607);
1029     vec_uchar16 shuffle_zzzz = (vec_uchar16)spu_splats((int)0x08090a0b);
1030     xxxx = spu_shuffle( pnt.get128(), pnt.get128(), shuffle_xxxx );
1031     yyyy = spu_shuffle( pnt.get128(), pnt.get128(), shuffle_yyyy );
1032     zzzz = spu_shuffle( pnt.get128(), pnt.get128(), shuffle_zzzz );
1033     tmp0 = spu_mul( mCol0.get128(), xxxx );
1034     tmp1 = spu_mul( mCol1.get128(), yyyy );
1035     tmp0 = spu_madd( mCol2.get128(), zzzz, tmp0 );
1036     tmp1 = spu_add( mCol3.get128(), tmp1 );
1037     res = spu_add( tmp0, tmp1 );
1038     return Vector4( res );
1039 }
1040
1041 inline const Matrix4 Matrix4::operator *( const Matrix4 & mat ) const
1042 {
1043     return Matrix4(
1044         ( *this * mat.mCol0 ),
1045         ( *this * mat.mCol1 ),
1046         ( *this * mat.mCol2 ),
1047         ( *this * mat.mCol3 )
1048     );
1049 }
1050
1051 inline Matrix4 & Matrix4::operator *=( const Matrix4 & mat )
1052 {
1053     *this = *this * mat;
1054     return *this;
1055 }
1056
1057 inline const Matrix4 Matrix4::operator *( const Transform3 & tfrm ) const
1058 {
1059     return Matrix4(
1060         ( *this * tfrm.getCol0() ),
1061         ( *this * tfrm.getCol1() ),
1062         ( *this * tfrm.getCol2() ),
1063         ( *this * Point3( tfrm.getCol3() ) )
1064     );
1065 }
1066
1067 inline Matrix4 & Matrix4::operator *=( const Transform3 & tfrm )
1068 {
1069     *this = *this * tfrm;
1070     return *this;
1071 }
1072
1073 inline const Matrix4 mulPerElem( const Matrix4 & mat0, const Matrix4 & mat1 )
1074 {
1075     return Matrix4(
1076         mulPerElem( mat0.getCol0(), mat1.getCol0() ),
1077         mulPerElem( mat0.getCol1(), mat1.getCol1() ),
1078         mulPerElem( mat0.getCol2(), mat1.getCol2() ),
1079         mulPerElem( mat0.getCol3(), mat1.getCol3() )
1080     );
1081 }
1082
1083 inline const Matrix4 Matrix4::identity( )
1084 {
1085     return Matrix4(
1086         Vector4::xAxis( ),
1087         Vector4::yAxis( ),
1088         Vector4::zAxis( ),
1089         Vector4::wAxis( )
1090     );
1091 }
1092
1093 inline Matrix4 & Matrix4::setUpper3x3( const Matrix3 & mat3 )
1094 {
1095     mCol0.setXYZ( mat3.getCol0() );
1096     mCol1.setXYZ( mat3.getCol1() );
1097     mCol2.setXYZ( mat3.getCol2() );
1098     return *this;
1099 }
1100
1101 inline const Matrix3 Matrix4::getUpper3x3( ) const
1102 {
1103     return Matrix3(
1104         mCol0.getXYZ( ),
1105         mCol1.getXYZ( ),
1106         mCol2.getXYZ( )
1107     );
1108 }
1109
1110 inline Matrix4 & Matrix4::setTranslation( Vector3 translateVec )
1111 {
1112     mCol3.setXYZ( translateVec );
1113     return *this;
1114 }
1115
1116 inline const Vector3 Matrix4::getTranslation( ) const
1117 {
1118     return mCol3.getXYZ( );
1119 }
1120
1121 inline const Matrix4 Matrix4::rotationX( float radians )
1122 {
1123     vec_float4 s, c, res1, res2;
1124     vec_uint4 select_y, select_z;
1125     vec_float4 zero;
1126     select_y = (vec_uint4)spu_maskb(0x0f00);
1127     select_z = (vec_uint4)spu_maskb(0x00f0);
1128     zero = spu_splats(0.0f);
1129     sincosf4( spu_splats(radians), &s, &c );
1130     res1 = spu_sel( zero, c, select_y );
1131     res1 = spu_sel( res1, s, select_z );
1132     res2 = spu_sel( zero, negatef4(s), select_y );
1133     res2 = spu_sel( res2, c, select_z );
1134     return Matrix4(
1135         Vector4::xAxis( ),
1136         Vector4( res1 ),
1137         Vector4( res2 ),
1138         Vector4::wAxis( )
1139     );
1140 }
1141
1142 inline const Matrix4 Matrix4::rotationY( float radians )
1143 {
1144     vec_float4 s, c, res0, res2;
1145     vec_uint4 select_x, select_z;
1146     vec_float4 zero;
1147     select_x = (vec_uint4)spu_maskb(0xf000);
1148     select_z = (vec_uint4)spu_maskb(0x00f0);
1149     zero = spu_splats(0.0f);
1150     sincosf4( spu_splats(radians), &s, &c );
1151     res0 = spu_sel( zero, c, select_x );
1152     res0 = spu_sel( res0, negatef4(s), select_z );
1153     res2 = spu_sel( zero, s, select_x );
1154     res2 = spu_sel( res2, c, select_z );
1155     return Matrix4(
1156         Vector4( res0 ),
1157         Vector4::yAxis( ),
1158         Vector4( res2 ),
1159         Vector4::wAxis( )
1160     );
1161 }
1162
1163 inline const Matrix4 Matrix4::rotationZ( float radians )
1164 {
1165     vec_float4 s, c, res0, res1;
1166     vec_uint4 select_x, select_y;
1167     vec_float4 zero;
1168     select_x = (vec_uint4)spu_maskb(0xf000);
1169     select_y = (vec_uint4)spu_maskb(0x0f00);
1170     zero = spu_splats(0.0f);
1171     sincosf4( spu_splats(radians), &s, &c );
1172     res0 = spu_sel( zero, c, select_x );
1173     res0 = spu_sel( res0, s, select_y );
1174     res1 = spu_sel( zero, negatef4(s), select_x );
1175     res1 = spu_sel( res1, c, select_y );
1176     return Matrix4(
1177         Vector4( res0 ),
1178         Vector4( res1 ),
1179         Vector4::zAxis( ),
1180         Vector4::wAxis( )
1181     );
1182 }
1183
1184 inline const Matrix4 Matrix4::rotationZYX( Vector3 radiansXYZ )
1185 {
1186     vec_float4 angles, s, negS, c, X0, X1, Y0, Y1, Z0, Z1, tmp;
1187     vec_uchar16 shuffle_xxxx = (vec_uchar16)spu_splats((int)0x00010203);
1188     angles = radiansXYZ.get128();
1189     angles = spu_insert( 0.0f, angles, 3 );
1190     sincosf4( angles, &s, &c );
1191     negS = negatef4( s );
1192     Z0 = spu_shuffle( s, c, _VECTORMATH_SHUF_CZD0 );
1193     Z1 = spu_shuffle( c, negS, _VECTORMATH_SHUF_CZD0 );
1194     Y0 = spu_shuffle( negS, c, _VECTORMATH_SHUF_BBY0 );
1195     Y1 = spu_shuffle( c, s, _VECTORMATH_SHUF_BBY0 );
1196     X0 = spu_shuffle( s, s, shuffle_xxxx );
1197     X1 = spu_shuffle( c, c, shuffle_xxxx );
1198     tmp = spu_mul( Z0, Y1 );
1199     return Matrix4(
1200         Vector4( spu_mul( Z0, Y0 ) ),
1201         Vector4( spu_madd( Z1, X1, spu_mul( tmp, X0 ) ) ),
1202         Vector4( spu_nmsub( Z1, X0, spu_mul( tmp, X1 ) ) ),
1203         Vector4::wAxis( )
1204     );
1205 }
1206
1207 inline const Matrix4 Matrix4::rotation( float radians, Vector3 unitVec )
1208 {
1209     vec_float4 axis, s, c, oneMinusC, axisS, negAxisS, xxxx, yyyy, zzzz, tmp0, tmp1, tmp2, zeroW;
1210     vec_uchar16 shuffle_xxxx = (vec_uchar16)spu_splats((int)0x00010203);
1211     vec_uchar16 shuffle_yyyy = (vec_uchar16)spu_splats((int)0x04050607);
1212     vec_uchar16 shuffle_zzzz = (vec_uchar16)spu_splats((int)0x08090a0b);
1213     axis = unitVec.get128();
1214     sincosf4( spu_splats( radians ), &s, &c );
1215     xxxx = spu_shuffle( axis, axis, shuffle_xxxx );
1216     yyyy = spu_shuffle( axis, axis, shuffle_yyyy );
1217     zzzz = spu_shuffle( axis, axis, shuffle_zzzz );
1218     oneMinusC = spu_sub( spu_splats(1.0f), c );
1219     axisS = spu_mul( axis, s );
1220     negAxisS = negatef4( axisS );
1221     tmp0 = spu_shuffle( axisS, negAxisS, _VECTORMATH_SHUF_0ZB0 );
1222     tmp1 = spu_shuffle( axisS, negAxisS, _VECTORMATH_SHUF_C0X0 );
1223     tmp2 = spu_shuffle( axisS, negAxisS, _VECTORMATH_SHUF_YA00 );
1224     tmp0 = spu_sel( tmp0, c, (vec_uint4)spu_maskb(0xf000) );
1225     tmp1 = spu_sel( tmp1, c, (vec_uint4)spu_maskb(0x0f00) );
1226     tmp2 = spu_sel( tmp2, c, (vec_uint4)spu_maskb(0x00f0) );
1227     zeroW = (vec_float4)spu_maskb(0x000f);
1228     axis = spu_andc( axis, zeroW );
1229     return Matrix4(
1230         Vector4( spu_madd( spu_mul( axis, xxxx ), oneMinusC, tmp0 ) ),
1231         Vector4( spu_madd( spu_mul( axis, yyyy ), oneMinusC, tmp1 ) ),
1232         Vector4( spu_madd( spu_mul( axis, zzzz ), oneMinusC, tmp2 ) ),
1233         Vector4::wAxis( )
1234     );
1235 }
1236
1237 inline const Matrix4 Matrix4::rotation( Quat unitQuat )
1238 {
1239     return Matrix4( Transform3::rotation( unitQuat ) );
1240 }
1241
1242 inline const Matrix4 Matrix4::scale( Vector3 scaleVec )
1243 {
1244     vec_float4 zero = spu_splats(0.0f);
1245     return Matrix4(
1246         Vector4( spu_sel( zero, scaleVec.get128(), (vec_uint4)spu_maskb(0xf000) ) ),
1247         Vector4( spu_sel( zero, scaleVec.get128(), (vec_uint4)spu_maskb(0x0f00) ) ),
1248         Vector4( spu_sel( zero, scaleVec.get128(), (vec_uint4)spu_maskb(0x00f0) ) ),
1249         Vector4::wAxis( )
1250     );
1251 }
1252
1253 inline const Matrix4 appendScale( const Matrix4 & mat, Vector3 scaleVec )
1254 {
1255     return Matrix4(
1256         ( mat.getCol0() * scaleVec.getX( ) ),
1257         ( mat.getCol1() * scaleVec.getY( ) ),
1258         ( mat.getCol2() * scaleVec.getZ( ) ),
1259         mat.getCol3()
1260     );
1261 }
1262
1263 inline const Matrix4 prependScale( Vector3 scaleVec, const Matrix4 & mat )
1264 {
1265     Vector4 scale4;
1266     scale4 = Vector4( scaleVec, 1.0f );
1267     return Matrix4(
1268         mulPerElem( mat.getCol0(), scale4 ),
1269         mulPerElem( mat.getCol1(), scale4 ),
1270         mulPerElem( mat.getCol2(), scale4 ),
1271         mulPerElem( mat.getCol3(), scale4 )
1272     );
1273 }
1274
1275 inline const Matrix4 Matrix4::translation( Vector3 translateVec )
1276 {
1277     return Matrix4(
1278         Vector4::xAxis( ),
1279         Vector4::yAxis( ),
1280         Vector4::zAxis( ),
1281         Vector4( translateVec, 1.0f )
1282     );
1283 }
1284
1285 inline const Matrix4 Matrix4::lookAt( Point3 eyePos, Point3 lookAtPos, Vector3 upVec )
1286 {
1287     Matrix4 m4EyeFrame;
1288     Vector3 v3X, v3Y, v3Z;
1289     v3Y = normalize( upVec );
1290     v3Z = normalize( ( eyePos - lookAtPos ) );
1291     v3X = normalize( cross( v3Y, v3Z ) );
1292     v3Y = cross( v3Z, v3X );
1293     m4EyeFrame = Matrix4( Vector4( v3X ), Vector4( v3Y ), Vector4( v3Z ), Vector4( eyePos ) );
1294     return orthoInverse( m4EyeFrame );
1295 }
1296
1297 inline const Matrix4 Matrix4::perspective( float fovyRadians, float aspect, float zNear, float zFar )
1298 {
1299     float f, rangeInv;
1300     vec_float4 zero, col0, col1, col2, col3;
1301     f = tanf( _VECTORMATH_PI_OVER_2 - fovyRadians * 0.5f );
1302     rangeInv = 1.0f / ( zNear - zFar );
1303     zero = spu_splats(0.0f);
1304     col0 = zero;
1305     col1 = zero;
1306     col2 = zero;
1307     col3 = zero;
1308     col0 = spu_insert( f / aspect, col0, 0 );
1309     col1 = spu_insert( f, col1, 1 );
1310     col2 = spu_insert( ( zNear + zFar ) * rangeInv, col2, 2 );
1311     col2 = spu_insert( -1.0f, col2, 3 );
1312     col3 = spu_insert( zNear * zFar * rangeInv * 2.0f, col3, 2 );
1313     return Matrix4(
1314         Vector4( col0 ),
1315         Vector4( col1 ),
1316         Vector4( col2 ),
1317         Vector4( col3 )
1318     );
1319 }
1320
1321 inline const Matrix4 Matrix4::frustum( float left, float right, float bottom, float top, float zNear, float zFar )
1322 {
1323     /* function implementation based on code from STIDC SDK:           */
1324     /* --------------------------------------------------------------  */
1325     /* PLEASE DO NOT MODIFY THIS SECTION                               */
1326     /* This prolog section is automatically generated.                 */
1327     /*                                                                 */
1328     /* (C)Copyright                                                    */
1329     /* Sony Computer Entertainment, Inc.,                              */
1330     /* Toshiba Corporation,                                            */
1331     /* International Business Machines Corporation,                    */
1332     /* 2001,2002.                                                      */
1333     /* S/T/I Confidential Information                                  */
1334     /* --------------------------------------------------------------  */
1335     vec_float4 lbf, rtn;
1336     vec_float4 diff, sum, inv_diff;
1337     vec_float4 diagonal, column, near2;
1338     vec_float4 zero = spu_splats(0.0f);
1339     lbf = spu_shuffle( spu_promote(left,0), spu_promote(zFar,0), _VECTORMATH_SHUF_XAYB );
1340     rtn = spu_shuffle( spu_promote(right,0), spu_promote(zNear,0), _VECTORMATH_SHUF_XAYB );
1341     lbf = spu_shuffle( lbf, spu_promote(bottom,0), _VECTORMATH_SHUF_XAYB );
1342     rtn = spu_shuffle( rtn, spu_promote(top,0), _VECTORMATH_SHUF_XAYB );
1343     diff = spu_sub( rtn, lbf );
1344     sum  = spu_add( rtn, lbf );
1345     inv_diff = recipf4( diff );
1346     near2 = spu_splats( zNear );
1347     near2 = spu_add( near2, near2 );
1348     diagonal = spu_mul( near2, inv_diff );
1349     column = spu_mul( sum, inv_diff );
1350     return Matrix4(
1351         Vector4( spu_sel( zero, diagonal, (vec_uint4)spu_maskb(0xf000) ) ),
1352         Vector4( spu_sel( zero, diagonal, (vec_uint4)spu_maskb(0x0f00) ) ),
1353         Vector4( spu_sel( column, spu_splats(-1.0f), (vec_uint4)spu_maskb(0x000f) ) ),
1354         Vector4( spu_sel( zero, spu_mul( diagonal, spu_splats(zFar) ), (vec_uint4)spu_maskb(0x00f0) ) )
1355     );
1356 }
1357
1358 inline const Matrix4 Matrix4::orthographic( float left, float right, float bottom, float top, float zNear, float zFar )
1359 {
1360     /* function implementation based on code from STIDC SDK:           */
1361     /* --------------------------------------------------------------  */
1362     /* PLEASE DO NOT MODIFY THIS SECTION                               */
1363     /* This prolog section is automatically generated.                 */
1364     /*                                                                 */
1365     /* (C)Copyright                                                    */
1366     /* Sony Computer Entertainment, Inc.,                              */
1367     /* Toshiba Corporation,                                            */
1368     /* International Business Machines Corporation,                    */
1369     /* 2001,2002.                                                      */
1370     /* S/T/I Confidential Information                                  */
1371     /* --------------------------------------------------------------  */
1372     vec_float4 lbf, rtn;
1373     vec_float4 diff, sum, inv_diff, neg_inv_diff;
1374     vec_float4 diagonal, column;
1375     vec_float4 zero = spu_splats(0.0f);
1376     lbf = spu_shuffle( spu_promote(left,0), spu_promote(zFar,0), _VECTORMATH_SHUF_XAYB );
1377     rtn = spu_shuffle( spu_promote(right,0), spu_promote(zNear,0), _VECTORMATH_SHUF_XAYB );
1378     lbf = spu_shuffle( lbf, spu_promote(bottom,0), _VECTORMATH_SHUF_XAYB );
1379     rtn = spu_shuffle( rtn, spu_promote(top,0), _VECTORMATH_SHUF_XAYB );
1380     diff = spu_sub( rtn, lbf );
1381     sum  = spu_add( rtn, lbf );
1382     inv_diff = recipf4( diff );
1383     neg_inv_diff = negatef4( inv_diff );
1384     diagonal = spu_add( inv_diff, inv_diff );
1385     column = spu_mul( sum, spu_sel( neg_inv_diff, inv_diff, (vec_uint4)spu_maskb(0x00f0) ) );
1386     return Matrix4(
1387         Vector4( spu_sel( zero, diagonal, (vec_uint4)spu_maskb(0xf000) ) ),
1388         Vector4( spu_sel( zero, diagonal, (vec_uint4)spu_maskb(0x0f00) ) ),
1389         Vector4( spu_sel( zero, diagonal, (vec_uint4)spu_maskb(0x00f0) ) ),
1390         Vector4( spu_sel( column, spu_splats(1.0f), (vec_uint4)spu_maskb(0x000f) ) )
1391     );
1392 }
1393
1394 inline const Matrix4 select( const Matrix4 & mat0, const Matrix4 & mat1, bool select1 )
1395 {
1396     return Matrix4(
1397         select( mat0.getCol0(), mat1.getCol0(), select1 ),
1398         select( mat0.getCol1(), mat1.getCol1(), select1 ),
1399         select( mat0.getCol2(), mat1.getCol2(), select1 ),
1400         select( mat0.getCol3(), mat1.getCol3(), select1 )
1401     );
1402 }
1403
1404 #ifdef _VECTORMATH_DEBUG
1405
1406 inline void print( const Matrix4 & mat )
1407 {
1408     print( mat.getRow( 0 ) );
1409     print( mat.getRow( 1 ) );
1410     print( mat.getRow( 2 ) );
1411     print( mat.getRow( 3 ) );
1412 }
1413
1414 inline void print( const Matrix4 & mat, const char * name )
1415 {
1416     printf("%s:\n", name);
1417     print( mat );
1418 }
1419
1420 #endif
1421
1422 inline Transform3::Transform3( const Transform3 & tfrm )
1423 {
1424     mCol0 = tfrm.mCol0;
1425     mCol1 = tfrm.mCol1;
1426     mCol2 = tfrm.mCol2;
1427     mCol3 = tfrm.mCol3;
1428 }
1429
1430 inline Transform3::Transform3( float scalar )
1431 {
1432     mCol0 = Vector3( scalar );
1433     mCol1 = Vector3( scalar );
1434     mCol2 = Vector3( scalar );
1435     mCol3 = Vector3( scalar );
1436 }
1437
1438 inline Transform3::Transform3( Vector3 _col0, Vector3 _col1, Vector3 _col2, Vector3 _col3 )
1439 {
1440     mCol0 = _col0;
1441     mCol1 = _col1;
1442     mCol2 = _col2;
1443     mCol3 = _col3;
1444 }
1445
1446 inline Transform3::Transform3( const Matrix3 & tfrm, Vector3 translateVec )
1447 {
1448     this->setUpper3x3( tfrm );
1449     this->setTranslation( translateVec );
1450 }
1451
1452 inline Transform3::Transform3( Quat unitQuat, Vector3 translateVec )
1453 {
1454     this->setUpper3x3( Matrix3( unitQuat ) );
1455     this->setTranslation( translateVec );
1456 }
1457
1458 inline Transform3 & Transform3::setCol0( Vector3 _col0 )
1459 {
1460     mCol0 = _col0;
1461     return *this;
1462 }
1463
1464 inline Transform3 & Transform3::setCol1( Vector3 _col1 )
1465 {
1466     mCol1 = _col1;
1467     return *this;
1468 }
1469
1470 inline Transform3 & Transform3::setCol2( Vector3 _col2 )
1471 {
1472     mCol2 = _col2;
1473     return *this;
1474 }
1475
1476 inline Transform3 & Transform3::setCol3( Vector3 _col3 )
1477 {
1478     mCol3 = _col3;
1479     return *this;
1480 }
1481
1482 inline Transform3 & Transform3::setCol( int col, Vector3 vec )
1483 {
1484     *(&mCol0 + col) = vec;
1485     return *this;
1486 }
1487
1488 inline Transform3 & Transform3::setRow( int row, Vector4 vec )
1489 {
1490     mCol0.setElem( row, vec.getElem( 0 ) );
1491     mCol1.setElem( row, vec.getElem( 1 ) );
1492     mCol2.setElem( row, vec.getElem( 2 ) );
1493     mCol3.setElem( row, vec.getElem( 3 ) );
1494     return *this;
1495 }
1496
1497 inline Transform3 & Transform3::setElem( int col, int row, float val )
1498 {
1499     (*this)[col].setElem(row, val);
1500     return *this;
1501 }
1502
1503 inline float Transform3::getElem( int col, int row ) const
1504 {
1505     return this->getCol( col ).getElem( row );
1506 }
1507
1508 inline const Vector3 Transform3::getCol0( ) const
1509 {
1510     return mCol0;
1511 }
1512
1513 inline const Vector3 Transform3::getCol1( ) const
1514 {
1515     return mCol1;
1516 }
1517
1518 inline const Vector3 Transform3::getCol2( ) const
1519 {
1520     return mCol2;
1521 }
1522
1523 inline const Vector3 Transform3::getCol3( ) const
1524 {
1525     return mCol3;
1526 }
1527
1528 inline const Vector3 Transform3::getCol( int col ) const
1529 {
1530     return *(&mCol0 + col);
1531 }
1532
1533 inline const Vector4 Transform3::getRow( int row ) const
1534 {
1535     return Vector4( mCol0.getElem( row ), mCol1.getElem( row ), mCol2.getElem( row ), mCol3.getElem( row ) );
1536 }
1537
1538 inline Vector3 & Transform3::operator []( int col )
1539 {
1540     return *(&mCol0 + col);
1541 }
1542
1543 inline const Vector3 Transform3::operator []( int col ) const
1544 {
1545     return *(&mCol0 + col);
1546 }
1547
1548 inline Transform3 & Transform3::operator =( const Transform3 & tfrm )
1549 {
1550     mCol0 = tfrm.mCol0;
1551     mCol1 = tfrm.mCol1;
1552     mCol2 = tfrm.mCol2;
1553     mCol3 = tfrm.mCol3;
1554     return *this;
1555 }
1556
1557 inline const Transform3 inverse( const Transform3 & tfrm )
1558 {
1559     vec_float4 inv0, inv1, inv2, inv3;
1560     vec_float4 tmp0, tmp1, tmp2, tmp3, tmp4, dot, invdet;
1561     vec_float4 xxxx, yyyy, zzzz;
1562     vec_uchar16 shuffle_xxxx = (vec_uchar16)spu_splats((int)0x00010203);
1563     vec_uchar16 shuffle_yyyy = (vec_uchar16)spu_splats((int)0x04050607);
1564     vec_uchar16 shuffle_zzzz = (vec_uchar16)spu_splats((int)0x08090a0b);
1565     tmp2 = _vmathVfCross( tfrm.getCol0().get128(), tfrm.getCol1().get128() );
1566     tmp0 = _vmathVfCross( tfrm.getCol1().get128(), tfrm.getCol2().get128() );
1567     tmp1 = _vmathVfCross( tfrm.getCol2().get128(), tfrm.getCol0().get128() );
1568     inv3 = negatef4( tfrm.getCol3().get128() );
1569     dot = _vmathVfDot3( tmp2, tfrm.getCol2().get128() );
1570     dot = spu_shuffle( dot, dot, shuffle_xxxx );
1571     invdet = recipf4( dot );
1572     tmp3 = spu_shuffle( tmp0, tmp2, _VECTORMATH_SHUF_XAYB );
1573     tmp4 = spu_shuffle( tmp0, tmp2, _VECTORMATH_SHUF_ZCWD );
1574     inv0 = spu_shuffle( tmp3, tmp1, _VECTORMATH_SHUF_XAYB );
1575     xxxx = spu_shuffle( inv3, inv3, shuffle_xxxx );
1576     inv1 = spu_shuffle( tmp3, tmp1, _VECTORMATH_SHUF_ZBW0 );
1577     inv2 = spu_shuffle( tmp4, tmp1, _VECTORMATH_SHUF_XCY0 );
1578     yyyy = spu_shuffle( inv3, inv3, shuffle_yyyy );
1579     zzzz = spu_shuffle( inv3, inv3, shuffle_zzzz );
1580     inv3 = spu_mul( inv0, xxxx );
1581     inv3 = spu_madd( inv1, yyyy, inv3 );
1582     inv3 = spu_madd( inv2, zzzz, inv3 );
1583     inv0 = spu_mul( inv0, invdet );
1584     inv1 = spu_mul( inv1, invdet );
1585     inv2 = spu_mul( inv2, invdet );
1586     inv3 = spu_mul( inv3, invdet );
1587     return Transform3(
1588         Vector3( inv0 ),
1589         Vector3( inv1 ),
1590         Vector3( inv2 ),
1591         Vector3( inv3 )
1592     );
1593 }
1594
1595 inline const Transform3 orthoInverse( const Transform3 & tfrm )
1596 {
1597     vec_float4 inv0, inv1, inv2, inv3;
1598     vec_float4 tmp0, tmp1;
1599     vec_float4 xxxx, yyyy, zzzz;
1600     vec_uchar16 shuffle_xxxx = (vec_uchar16)spu_splats((int)0x00010203);
1601     vec_uchar16 shuffle_yyyy = (vec_uchar16)spu_splats((int)0x04050607);
1602     vec_uchar16 shuffle_zzzz = (vec_uchar16)spu_splats((int)0x08090a0b);
1603     tmp0 = spu_shuffle( tfrm.getCol0().get128(), tfrm.getCol2().get128(), _VECTORMATH_SHUF_XAYB );
1604     tmp1 = spu_shuffle( tfrm.getCol0().get128(), tfrm.getCol2().get128(), _VECTORMATH_SHUF_ZCWD );
1605     inv3 = negatef4( tfrm.getCol3().get128() );
1606     inv0 = spu_shuffle( tmp0, tfrm.getCol1().get128(), _VECTORMATH_SHUF_XAYB );
1607     xxxx = spu_shuffle( inv3, inv3, shuffle_xxxx );
1608     inv1 = spu_shuffle( tmp0, tfrm.getCol1().get128(), _VECTORMATH_SHUF_ZBW0 );
1609     inv2 = spu_shuffle( tmp1, tfrm.getCol1().get128(), _VECTORMATH_SHUF_XCY0 );
1610     yyyy = spu_shuffle( inv3, inv3, shuffle_yyyy );
1611     zzzz = spu_shuffle( inv3, inv3, shuffle_zzzz );
1612     inv3 = spu_mul( inv0, xxxx );
1613     inv3 = spu_madd( inv1, yyyy, inv3 );
1614     inv3 = spu_madd( inv2, zzzz, inv3 );
1615     return Transform3(
1616         Vector3( inv0 ),
1617         Vector3( inv1 ),
1618         Vector3( inv2 ),
1619         Vector3( inv3 )
1620     );
1621 }
1622
1623 inline const Transform3 absPerElem( const Transform3 & tfrm )
1624 {
1625     return Transform3(
1626         absPerElem( tfrm.getCol0() ),
1627         absPerElem( tfrm.getCol1() ),
1628         absPerElem( tfrm.getCol2() ),
1629         absPerElem( tfrm.getCol3() )
1630     );
1631 }
1632
1633 inline const Vector3 Transform3::operator *( Vector3 vec ) const
1634 {
1635     vec_float4 res;
1636     vec_float4 xxxx, yyyy, zzzz;
1637     vec_uchar16 shuffle_xxxx = (vec_uchar16)spu_splats((int)0x00010203);
1638     vec_uchar16 shuffle_yyyy = (vec_uchar16)spu_splats((int)0x04050607);
1639     vec_uchar16 shuffle_zzzz = (vec_uchar16)spu_splats((int)0x08090a0b);
1640     xxxx = spu_shuffle( vec.get128(), vec.get128(), shuffle_xxxx );
1641     yyyy = spu_shuffle( vec.get128(), vec.get128(), shuffle_yyyy );
1642     zzzz = spu_shuffle( vec.get128(), vec.get128(), shuffle_zzzz );
1643     res = spu_mul( mCol0.get128(), xxxx );
1644     res = spu_madd( mCol1.get128(), yyyy, res );
1645     res = spu_madd( mCol2.get128(), zzzz, res );
1646     return Vector3( res );
1647 }
1648
1649 inline const Point3 Transform3::operator *( Point3 pnt ) const
1650 {
1651     vec_float4 tmp0, tmp1, res;
1652     vec_float4 xxxx, yyyy, zzzz;
1653     vec_uchar16 shuffle_xxxx = (vec_uchar16)spu_splats((int)0x00010203);
1654     vec_uchar16 shuffle_yyyy = (vec_uchar16)spu_splats((int)0x04050607);
1655     vec_uchar16 shuffle_zzzz = (vec_uchar16)spu_splats((int)0x08090a0b);
1656     xxxx = spu_shuffle( pnt.get128(), pnt.get128(), shuffle_xxxx );
1657     yyyy = spu_shuffle( pnt.get128(), pnt.get128(), shuffle_yyyy );
1658     zzzz = spu_shuffle( pnt.get128(), pnt.get128(), shuffle_zzzz );
1659     tmp0 = spu_mul( mCol0.get128(), xxxx );
1660     tmp1 = spu_mul( mCol1.get128(), yyyy );
1661     tmp0 = spu_madd( mCol2.get128(), zzzz, tmp0 );
1662     tmp1 = spu_add( mCol3.get128(), tmp1 );
1663     res = spu_add( tmp0, tmp1 );
1664     return Point3( res );
1665 }
1666
1667 inline const Transform3 Transform3::operator *( const Transform3 & tfrm ) const
1668 {
1669     return Transform3(
1670         ( *this * tfrm.mCol0 ),
1671         ( *this * tfrm.mCol1 ),
1672         ( *this * tfrm.mCol2 ),
1673         Vector3( ( *this * Point3( tfrm.mCol3 ) ) )
1674     );
1675 }
1676
1677 inline Transform3 & Transform3::operator *=( const Transform3 & tfrm )
1678 {
1679     *this = *this * tfrm;
1680     return *this;
1681 }
1682
1683 inline const Transform3 mulPerElem( const Transform3 & tfrm0, const Transform3 & tfrm1 )
1684 {
1685     return Transform3(
1686         mulPerElem( tfrm0.getCol0(), tfrm1.getCol0() ),
1687         mulPerElem( tfrm0.getCol1(), tfrm1.getCol1() ),
1688         mulPerElem( tfrm0.getCol2(), tfrm1.getCol2() ),
1689         mulPerElem( tfrm0.getCol3(), tfrm1.getCol3() )
1690     );
1691 }
1692
1693 inline const Transform3 Transform3::identity( )
1694 {
1695     return Transform3(
1696         Vector3::xAxis( ),
1697         Vector3::yAxis( ),
1698         Vector3::zAxis( ),
1699         Vector3( 0.0f )
1700     );
1701 }
1702
1703 inline Transform3 & Transform3::setUpper3x3( const Matrix3 & tfrm )
1704 {
1705     mCol0 = tfrm.getCol0();
1706     mCol1 = tfrm.getCol1();
1707     mCol2 = tfrm.getCol2();
1708     return *this;
1709 }
1710
1711 inline const Matrix3 Transform3::getUpper3x3( ) const
1712 {
1713     return Matrix3( mCol0, mCol1, mCol2 );
1714 }
1715
1716 inline Transform3 & Transform3::setTranslation( Vector3 translateVec )
1717 {
1718     mCol3 = translateVec;
1719     return *this;
1720 }
1721
1722 inline const Vector3 Transform3::getTranslation( ) const
1723 {
1724     return mCol3;
1725 }
1726
1727 inline const Transform3 Transform3::rotationX( float radians )
1728 {
1729     vec_float4 s, c, res1, res2;
1730     vec_uint4 select_y, select_z;
1731     vec_float4 zero;
1732     select_y = (vec_uint4)spu_maskb(0x0f00);
1733     select_z = (vec_uint4)spu_maskb(0x00f0);
1734     zero = spu_splats(0.0f);
1735     sincosf4( spu_splats(radians), &s, &c );
1736     res1 = spu_sel( zero, c, select_y );
1737     res1 = spu_sel( res1, s, select_z );
1738     res2 = spu_sel( zero, negatef4(s), select_y );
1739     res2 = spu_sel( res2, c, select_z );
1740     return Transform3(
1741         Vector3::xAxis( ),
1742         Vector3( res1 ),
1743         Vector3( res2 ),
1744         Vector3( 0.0f )
1745     );
1746 }
1747
1748 inline const Transform3 Transform3::rotationY( float radians )
1749 {
1750     vec_float4 s, c, res0, res2;
1751     vec_uint4 select_x, select_z;
1752     vec_float4 zero;
1753     select_x = (vec_uint4)spu_maskb(0xf000);
1754     select_z = (vec_uint4)spu_maskb(0x00f0);
1755     zero = spu_splats(0.0f);
1756     sincosf4( spu_splats(radians), &s, &c );
1757     res0 = spu_sel( zero, c, select_x );
1758     res0 = spu_sel( res0, negatef4(s), select_z );
1759     res2 = spu_sel( zero, s, select_x );
1760     res2 = spu_sel( res2, c, select_z );
1761     return Transform3(
1762         Vector3( res0 ),
1763         Vector3::yAxis( ),
1764         Vector3( res2 ),
1765         Vector3( 0.0f )
1766     );
1767 }
1768
1769 inline const Transform3 Transform3::rotationZ( float radians )
1770 {
1771     vec_float4 s, c, res0, res1;
1772     vec_uint4 select_x, select_y;
1773     vec_float4 zero;
1774     select_x = (vec_uint4)spu_maskb(0xf000);
1775     select_y = (vec_uint4)spu_maskb(0x0f00);
1776     zero = spu_splats(0.0f);
1777     sincosf4( spu_splats(radians), &s, &c );
1778     res0 = spu_sel( zero, c, select_x );
1779     res0 = spu_sel( res0, s, select_y );
1780     res1 = spu_sel( zero, negatef4(s), select_x );
1781     res1 = spu_sel( res1, c, select_y );
1782     return Transform3(
1783         Vector3( res0 ),
1784         Vector3( res1 ),
1785         Vector3::zAxis( ),
1786         Vector3( 0.0f )
1787     );
1788 }
1789
1790 inline const Transform3 Transform3::rotationZYX( Vector3 radiansXYZ )
1791 {
1792     vec_float4 angles, s, negS, c, X0, X1, Y0, Y1, Z0, Z1, tmp;
1793     vec_uchar16 shuffle_xxxx = (vec_uchar16)spu_splats((int)0x00010203);
1794     angles = radiansXYZ.get128();
1795     angles = spu_insert( 0.0f, angles, 3 );
1796     sincosf4( angles, &s, &c );
1797     negS = negatef4( s );
1798     Z0 = spu_shuffle( s, c, _VECTORMATH_SHUF_CZD0 );
1799     Z1 = spu_shuffle( c, negS, _VECTORMATH_SHUF_CZD0 );
1800     Y0 = spu_shuffle( negS, c, _VECTORMATH_SHUF_BBY0 );
1801     Y1 = spu_shuffle( c, s, _VECTORMATH_SHUF_BBY0 );
1802     X0 = spu_shuffle( s, s, shuffle_xxxx );
1803     X1 = spu_shuffle( c, c, shuffle_xxxx );
1804     tmp = spu_mul( Z0, Y1 );
1805     return Transform3(
1806         Vector3( spu_mul( Z0, Y0 ) ),
1807         Vector3( spu_madd( Z1, X1, spu_mul( tmp, X0 ) ) ),
1808         Vector3( spu_nmsub( Z1, X0, spu_mul( tmp, X1 ) ) ),
1809         Vector3( 0.0f )
1810     );
1811 }
1812
1813 inline const Transform3 Transform3::rotation( float radians, Vector3 unitVec )
1814 {
1815     return Transform3( Matrix3::rotation( radians, unitVec ), Vector3( 0.0f ) );
1816 }
1817
1818 inline const Transform3 Transform3::rotation( Quat unitQuat )
1819 {
1820     return Transform3( Matrix3( unitQuat ), Vector3( 0.0f ) );
1821 }
1822
1823 inline const Transform3 Transform3::scale( Vector3 scaleVec )
1824 {
1825     vec_float4 zero = spu_splats(0.0f);
1826     return Transform3(
1827         Vector3( spu_sel( zero, scaleVec.get128(), (vec_uint4)spu_maskb(0xf000) ) ),
1828         Vector3( spu_sel( zero, scaleVec.get128(), (vec_uint4)spu_maskb(0x0f00) ) ),
1829         Vector3( spu_sel( zero, scaleVec.get128(), (vec_uint4)spu_maskb(0x00f0) ) ),
1830         Vector3( 0.0f )
1831     );
1832 }
1833
1834 inline const Transform3 appendScale( const Transform3 & tfrm, Vector3 scaleVec )
1835 {
1836     return Transform3(
1837         ( tfrm.getCol0() * scaleVec.getX( ) ),
1838         ( tfrm.getCol1() * scaleVec.getY( ) ),
1839         ( tfrm.getCol2() * scaleVec.getZ( ) ),
1840         tfrm.getCol3()
1841     );
1842 }
1843
1844 inline const Transform3 prependScale( Vector3 scaleVec, const Transform3 & tfrm )
1845 {
1846     return Transform3(
1847         mulPerElem( tfrm.getCol0(), scaleVec ),
1848         mulPerElem( tfrm.getCol1(), scaleVec ),
1849         mulPerElem( tfrm.getCol2(), scaleVec ),
1850         mulPerElem( tfrm.getCol3(), scaleVec )
1851     );
1852 }
1853
1854 inline const Transform3 Transform3::translation( Vector3 translateVec )
1855 {
1856     return Transform3(
1857         Vector3::xAxis( ),
1858         Vector3::yAxis( ),
1859         Vector3::zAxis( ),
1860         translateVec
1861     );
1862 }
1863
1864 inline const Transform3 select( const Transform3 & tfrm0, const Transform3 & tfrm1, bool select1 )
1865 {
1866     return Transform3(
1867         select( tfrm0.getCol0(), tfrm1.getCol0(), select1 ),
1868         select( tfrm0.getCol1(), tfrm1.getCol1(), select1 ),
1869         select( tfrm0.getCol2(), tfrm1.getCol2(), select1 ),
1870         select( tfrm0.getCol3(), tfrm1.getCol3(), select1 )
1871     );
1872 }
1873
1874 #ifdef _VECTORMATH_DEBUG
1875
1876 inline void print( const Transform3 & tfrm )
1877 {
1878     print( tfrm.getRow( 0 ) );
1879     print( tfrm.getRow( 1 ) );
1880     print( tfrm.getRow( 2 ) );
1881 }
1882
1883 inline void print( const Transform3 & tfrm, const char * name )
1884 {
1885     printf("%s:\n", name);
1886     print( tfrm );
1887 }
1888
1889 #endif
1890
1891 inline Quat::Quat( const Matrix3 & tfrm )
1892 {
1893     vec_float4 res;
1894     vec_float4 col0, col1, col2;
1895     vec_float4 xx_yy, xx_yy_zz_xx, yy_zz_xx_yy, zz_xx_yy_zz, diagSum, diagDiff;
1896     vec_float4 zy_xz_yx, yz_zx_xy, sum, diff;
1897     vec_float4 radicand, invSqrt, scale;
1898     vec_float4 res0, res1, res2, res3;
1899     vec_float4 xx, yy, zz;
1900     vec_uint4 select_x = (vec_uint4)spu_maskb( 0xf000 );
1901     vec_uint4 select_y = (vec_uint4)spu_maskb( 0x0f00 );
1902     vec_uint4 select_z = (vec_uint4)spu_maskb( 0x00f0 );
1903     vec_uint4 select_w = (vec_uint4)spu_maskb( 0x000f );
1904     vec_uchar16 shuffle_xxxx = (vec_uchar16)spu_splats((unsigned int)0x00010203);
1905     vec_uchar16 shuffle_yyyy = (vec_uchar16)spu_splats((unsigned int)0x04050607);
1906     vec_uchar16 shuffle_zzzz = (vec_uchar16)spu_splats((unsigned int)0x08090a0b);
1907     vec_uchar16 shuffle_wwww = (vec_uchar16)spu_splats((unsigned int)0x0c0d0e0f);
1908
1909     col0 = tfrm.getCol0().get128();
1910     col1 = tfrm.getCol1().get128();
1911     col2 = tfrm.getCol2().get128();
1912
1913     /* four cases: */
1914     /* trace > 0 */
1915     /* else */
1916     /*    xx largest diagonal element */
1917     /*    yy largest diagonal element */
1918     /*    zz largest diagonal element */
1919
1920     /* compute quaternion for each case */
1921
1922     xx_yy = spu_sel( col0, col1, select_y );
1923     xx_yy_zz_xx = spu_shuffle( xx_yy, col2, _VECTORMATH_SHUF_XYCX );
1924     yy_zz_xx_yy = spu_shuffle( xx_yy, col2, _VECTORMATH_SHUF_YCXY );
1925     zz_xx_yy_zz = spu_shuffle( xx_yy, col2, _VECTORMATH_SHUF_CXYC );
1926
1927     diagSum = spu_add( spu_add( xx_yy_zz_xx, yy_zz_xx_yy ), zz_xx_yy_zz );
1928     diagDiff = spu_sub( spu_sub( xx_yy_zz_xx, yy_zz_xx_yy ), zz_xx_yy_zz );
1929     radicand = spu_add( spu_sel( diagDiff, diagSum, select_w ), spu_splats(1.0f) );
1930     invSqrt = rsqrtf4( radicand );
1931
1932     zy_xz_yx = spu_sel( col0, col1, select_z );
1933     zy_xz_yx = spu_shuffle( zy_xz_yx, col2, _VECTORMATH_SHUF_ZAY0 );
1934     yz_zx_xy = spu_sel( col0, col1, select_x );
1935     yz_zx_xy = spu_shuffle( yz_zx_xy, col2, _VECTORMATH_SHUF_BZX0 );
1936
1937     sum = spu_add( zy_xz_yx, yz_zx_xy );
1938     diff = spu_sub( zy_xz_yx, yz_zx_xy );
1939
1940     scale = spu_mul( invSqrt, spu_splats(0.5f) );
1941     res0 = spu_shuffle( sum, diff, _VECTORMATH_SHUF_0ZYA );
1942     res1 = spu_shuffle( sum, diff, _VECTORMATH_SHUF_Z0XB );
1943     res2 = spu_shuffle( sum, diff, _VECTORMATH_SHUF_YX0C );
1944     res3 = diff;
1945     res0 = spu_sel( res0, radicand, select_x );
1946     res1 = spu_sel( res1, radicand, select_y );
1947     res2 = spu_sel( res2, radicand, select_z );
1948     res3 = spu_sel( res3, radicand, select_w );
1949     res0 = spu_mul( res0, spu_shuffle( scale, scale, shuffle_xxxx ) );
1950     res1 = spu_mul( res1, spu_shuffle( scale, scale, shuffle_yyyy ) );
1951     res2 = spu_mul( res2, spu_shuffle( scale, scale, shuffle_zzzz ) );
1952     res3 = spu_mul( res3, spu_shuffle( scale, scale, shuffle_wwww ) );
1953
1954     /* determine case and select answer */
1955
1956     xx = spu_shuffle( col0, col0, shuffle_xxxx );
1957     yy = spu_shuffle( col1, col1, shuffle_yyyy );
1958     zz = spu_shuffle( col2, col2, shuffle_zzzz );
1959     res = spu_sel( res0, res1, spu_cmpgt( yy, xx ) );
1960     res = spu_sel( res, res2, spu_and( spu_cmpgt( zz, xx ), spu_cmpgt( zz, yy ) ) );
1961     res = spu_sel( res, res3, spu_cmpgt( spu_shuffle( diagSum, diagSum, shuffle_xxxx ), spu_splats(0.0f) ) );
1962     mVec128 = res;
1963 }
1964
1965 inline const Matrix3 outer( Vector3 tfrm0, Vector3 tfrm1 )
1966 {
1967     return Matrix3(
1968         ( tfrm0 * tfrm1.getX( ) ),
1969         ( tfrm0 * tfrm1.getY( ) ),
1970         ( tfrm0 * tfrm1.getZ( ) )
1971     );
1972 }
1973
1974 inline const Matrix4 outer( Vector4 tfrm0, Vector4 tfrm1 )
1975 {
1976     return Matrix4(
1977         ( tfrm0 * tfrm1.getX( ) ),
1978         ( tfrm0 * tfrm1.getY( ) ),
1979         ( tfrm0 * tfrm1.getZ( ) ),
1980         ( tfrm0 * tfrm1.getW( ) )
1981     );
1982 }
1983
1984 inline const Vector3 rowMul( Vector3 vec, const Matrix3 & mat )
1985 {
1986     vec_float4 tmp0, tmp1, mcol0, mcol1, mcol2, res;
1987     vec_float4 xxxx, yyyy, zzzz;
1988     vec_uchar16 shuffle_xxxx = (vec_uchar16)spu_splats((int)0x00010203);
1989     vec_uchar16 shuffle_yyyy = (vec_uchar16)spu_splats((int)0x04050607);
1990     vec_uchar16 shuffle_zzzz = (vec_uchar16)spu_splats((int)0x08090a0b);
1991     tmp0 = spu_shuffle( mat.getCol0().get128(), mat.getCol2().get128(), _VECTORMATH_SHUF_XAYB );
1992     tmp1 = spu_shuffle( mat.getCol0().get128(), mat.getCol2().get128(), _VECTORMATH_SHUF_ZCWD );
1993     xxxx = spu_shuffle( vec.get128(), vec.get128(), shuffle_xxxx );
1994     mcol0 = spu_shuffle( tmp0, mat.getCol1().get128(), _VECTORMATH_SHUF_XAYB );
1995     mcol1 = spu_shuffle( tmp0, mat.getCol1().get128(), _VECTORMATH_SHUF_ZBW0 );
1996     mcol2 = spu_shuffle( tmp1, mat.getCol1().get128(), _VECTORMATH_SHUF_XCY0 );
1997     yyyy = spu_shuffle( vec.get128(), vec.get128(), shuffle_yyyy );
1998     res = spu_mul( mcol0, xxxx );
1999     zzzz = spu_shuffle( vec.get128(), vec.get128(), shuffle_zzzz );
2000     res = spu_madd( mcol1, yyyy, res );
2001     res = spu_madd( mcol2, zzzz, res );
2002     return Vector3( res );
2003 }
2004
2005 inline const Matrix3 crossMatrix( Vector3 vec )
2006 {
2007     vec_float4 neg, res0, res1, res2;
2008     neg = negatef4( vec.get128() );
2009     res0 = spu_shuffle( vec.get128(), neg, _VECTORMATH_SHUF_0ZB0 );
2010     res1 = spu_shuffle( vec.get128(), neg, _VECTORMATH_SHUF_C0X0 );
2011     res2 = spu_shuffle( vec.get128(), neg, _VECTORMATH_SHUF_YA00 );
2012     return Matrix3(
2013         Vector3( res0 ),
2014         Vector3( res1 ),
2015         Vector3( res2 )
2016     );
2017 }
2018
2019 inline const Matrix3 crossMatrixMul( Vector3 vec, const Matrix3 & mat )
2020 {
2021     return Matrix3( cross( vec, mat.getCol0() ), cross( vec, mat.getCol1() ), cross( vec, mat.getCol2() ) );
2022 }
2023
2024 } // namespace Aos
2025 } // namespace Vectormath
2026
2027 #endif