Extras/vectormathlibrary/include/vectormath/spu/cpp/vec_aos.h

   1 /*
   2    Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
   3    All rights reserved.
   4
   5    Redistribution and use in source and binary forms,
   6    with or without modification, are permitted provided that the
   7    following conditions are met:
   8     * Redistributions of source code must retain the above copyright
   9       notice, this list of conditions and the following disclaimer.
  10     * Redistributions in binary form must reproduce the above copyright
  11       notice, this list of conditions and the following disclaimer in the
  12       documentation and/or other materials provided with the distribution.
  13     * Neither the name of the Sony Computer Entertainment Inc nor the names
  14       of its contributors may be used to endorse or promote products derived
  15       from this software without specific prior written permission.
  16
  17    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  18    AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  19    IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  20    ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  21    LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  22    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  23    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  24    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  25    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  26    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  27    POSSIBILITY OF SUCH DAMAGE.
  28 */
  29
  30 #ifndef _VECTORMATH_VEC_AOS_CPP_H
  31 #define _VECTORMATH_VEC_AOS_CPP_H
  32 //-----------------------------------------------------------------------------
  33 // Constants
  34 // for shuffles, words are labeled [x,y,z,w] [a,b,c,d]
  35
  36 #define _VECTORMATH_SHUF_X 0x00010203
  37 #define _VECTORMATH_SHUF_Y 0x04050607
  38 #define _VECTORMATH_SHUF_Z 0x08090a0b
  39 #define _VECTORMATH_SHUF_W 0x0c0d0e0f
  40 #define _VECTORMATH_SHUF_A 0x10111213
  41 #define _VECTORMATH_SHUF_B 0x14151617
  42 #define _VECTORMATH_SHUF_C 0x18191a1b
  43 #define _VECTORMATH_SHUF_D 0x1c1d1e1f
  44 #define _VECTORMATH_SHUF_0 0x80808080
  45 #define _VECTORMATH_SHUF_XYZA (vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_A }
  46 #define _VECTORMATH_SHUF_ZXYW (vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_W }
  47 #define _VECTORMATH_SHUF_YZXW (vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_W }
  48 #define _VECTORMATH_SHUF_WABC (vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_W, _VECTORMATH_SHUF_A, _VECTORMATH_SHUF_B, _VECTORMATH_SHUF_C }
  49 #define _VECTORMATH_SHUF_ZWAB (vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_W, _VECTORMATH_SHUF_A, _VECTORMATH_SHUF_B }
  50 #define _VECTORMATH_SHUF_XYZA (vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_A }
  51 #define _VECTORMATH_SHUF_YZAB (vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_A, _VECTORMATH_SHUF_B }
  52 #define _VECTORMATH_SHUF_ZABC (vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_A, _VECTORMATH_SHUF_B, _VECTORMATH_SHUF_C }
  53 #define _VECTORMATH_UNIT_1000 (vec_float4){ 1.0f, 0.0f, 0.0f, 0.0f }
  54 #define _VECTORMATH_UNIT_0100 (vec_float4){ 0.0f, 1.0f, 0.0f, 0.0f }
  55 #define _VECTORMATH_UNIT_0010 (vec_float4){ 0.0f, 0.0f, 1.0f, 0.0f }
  56 #define _VECTORMATH_UNIT_0001 (vec_float4){ 0.0f, 0.0f, 0.0f, 1.0f }
  57 #define _VECTORMATH_SLERP_TOL 0.999f
  58
  59 //-----------------------------------------------------------------------------
  60 // Definitions
  61
  62 #ifndef _VECTORMATH_INTERNAL_FUNCTIONS
  63 #define _VECTORMATH_INTERNAL_FUNCTIONS
  64
  65 static inline vec_float4 _vmathVfDot3( vec_float4 vec0, vec_float4 vec1 )
  66 {
  67     vec_float4 result;
  68     result = spu_mul( vec0, vec1 );
  69     result = spu_madd( spu_rlqwbyte( vec0, 4 ), spu_rlqwbyte( vec1, 4 ), result );
  70     return spu_madd( spu_rlqwbyte( vec0, 8 ), spu_rlqwbyte( vec1, 8 ), result );
  71 }
  72
  73 static inline vec_float4 _vmathVfDot4( vec_float4 vec0, vec_float4 vec1 )
  74 {
  75     vec_float4 result;
  76     result = spu_mul( vec0, vec1 );
  77     result = spu_madd( spu_rlqwbyte( vec0, 4 ), spu_rlqwbyte( vec1, 4 ), result );
  78     return spu_add( spu_rlqwbyte( result, 8 ), result );
  79 }
  80
  81 static inline vec_float4 _vmathVfCross( vec_float4 vec0, vec_float4 vec1 )
  82 {
  83     vec_float4 tmp0, tmp1, tmp2, tmp3, result;
  84     tmp0 = spu_shuffle( vec0, vec0, _VECTORMATH_SHUF_YZXW );
  85     tmp1 = spu_shuffle( vec1, vec1, _VECTORMATH_SHUF_ZXYW );
  86     tmp2 = spu_shuffle( vec0, vec0, _VECTORMATH_SHUF_ZXYW );
  87     tmp3 = spu_shuffle( vec1, vec1, _VECTORMATH_SHUF_YZXW );
  88     result = spu_mul( tmp0, tmp1 );
  89     result = spu_nmsub( tmp2, tmp3, result );
  90     return result;
  91 }
  92
  93 static inline vec_uint4 _vmathVfToHalfFloatsUnpacked(vec_float4 v)
  94 {
  95     vec_int4 bexp;
  96     vec_uint4 mant, sign, hfloat;
  97     vec_uint4 notZero, isInf;
  98     const vec_uint4 hfloatInf = spu_splats(0x00007c00u);
  99     const vec_uint4 mergeMant = spu_splats(0x000003ffu);
 100     const vec_uint4 mergeSign = spu_splats(0x00008000u);
 101
 102     sign = spu_rlmask((vec_uint4)v, -16);
 103     mant = spu_rlmask((vec_uint4)v, -13);
 104     bexp = spu_and(spu_rlmask((vec_int4)v, -23), 0xff);
 105
 106     notZero = spu_cmpgt(bexp, 112);
 107     isInf = spu_cmpgt(bexp, 142);
 108
 109     bexp = spu_add(bexp, -112);
 110     bexp = spu_sl(bexp, 10);
 111
 112     hfloat = spu_sel((vec_uint4)bexp, mant, mergeMant);
 113     hfloat = spu_sel(spu_splats(0u), hfloat, notZero);
 114     hfloat = spu_sel(hfloat, hfloatInf, isInf);
 115     hfloat = spu_sel(hfloat, sign, mergeSign);
 116
 117     return hfloat;
 118 }
 119
 120 static inline vec_ushort8 _vmath2VfToHalfFloats(vec_float4 u, vec_float4 v)
 121 {
 122     vec_uint4 hfloat_u, hfloat_v;
 123     const vec_uchar16 pack = (vec_uchar16){2,3,6,7,10,11,14,15,18,19,22,23,26,27,30,31};
 124     hfloat_u = _vmathVfToHalfFloatsUnpacked(u);
 125     hfloat_v = _vmathVfToHalfFloatsUnpacked(v);
 126     return (vec_ushort8)spu_shuffle(hfloat_u, hfloat_v, pack);
 127 }
 128
 129 #endif
 130
 131 namespace Vectormath {
 132 namespace Aos {
 133
 134 inline VecIdx::operator float() const
 135 {
 136     return spu_extract( ref, i );
 137 }
 138
 139 inline float VecIdx::operator =( float scalar )
 140 {
 141     ref = spu_insert( scalar, ref, i );
 142     return scalar;
 143 }
 144
 145 inline float VecIdx::operator =( const VecIdx& scalar )
 146 {
 147     return *this = float(scalar);
 148 }
 149
 150 inline float VecIdx::operator *=( float scalar )
 151 {
 152     float tmp = spu_extract( ref, i ) * scalar;
 153     ref = spu_insert( tmp, ref, i );
 154     return tmp;
 155 }
 156
 157 inline float VecIdx::operator /=( float scalar )
 158 {
 159     float tmp = spu_extract( ref, i ) / scalar;
 160     ref = spu_insert( tmp, ref, i );
 161     return tmp;
 162 }
 163
 164 inline float VecIdx::operator +=( float scalar )
 165 {
 166     float tmp = spu_extract( ref, i ) + scalar;
 167     ref = spu_insert( tmp, ref, i );
 168     return tmp;
 169 }
 170
 171 inline float VecIdx::operator -=( float scalar )
 172 {
 173     float tmp = spu_extract( ref, i ) - scalar;
 174     ref = spu_insert( tmp, ref, i );
 175     return tmp;
 176 }
 177
 178 inline Vector3::Vector3( float _x, float _y, float _z )
 179 {
 180     mVec128 = (vec_float4){ _x, _y, _z, 0.0f  };
 181 }
 182
 183 inline Vector3::Vector3( Point3 pnt )
 184 {
 185     mVec128 = pnt.get128();
 186 }
 187
 188 inline Vector3::Vector3( float scalar )
 189 {
 190     mVec128 = spu_splats( scalar );
 191 }
 192
 193 inline Vector3::Vector3( vec_float4 vf4 )
 194 {
 195     mVec128 = vf4;
 196 }
 197
 198 inline const Vector3 Vector3::xAxis( )
 199 {
 200     return Vector3( _VECTORMATH_UNIT_1000 );
 201 }
 202
 203 inline const Vector3 Vector3::yAxis( )
 204 {
 205     return Vector3( _VECTORMATH_UNIT_0100 );
 206 }
 207
 208 inline const Vector3 Vector3::zAxis( )
 209 {
 210     return Vector3( _VECTORMATH_UNIT_0010 );
 211 }
 212
 213 inline const Vector3 lerp( float t, Vector3 vec0, Vector3 vec1 )
 214 {
 215     return ( vec0 + ( ( vec1 - vec0 ) * t ) );
 216 }
 217
 218 inline const Vector3 slerp( float t, Vector3 unitVec0, Vector3 unitVec1 )
 219 {
 220     vec_float4 scales, scale0, scale1, cosAngle, angle, tttt, oneMinusT, angles, sines;
 221     vec_uint4 selectMask;
 222     vec_uchar16 shuffle_xxxx = (vec_uchar16)spu_splats((int)0x00010203);
 223     vec_uchar16 shuffle_yyyy = (vec_uchar16)spu_splats((int)0x04050607);
 224     vec_uchar16 shuffle_zzzz = (vec_uchar16)spu_splats((int)0x08090a0b);
 225     cosAngle = _vmathVfDot3( unitVec0.get128(), unitVec1.get128() );
 226     cosAngle = spu_shuffle( cosAngle, cosAngle, shuffle_xxxx );
 227     selectMask = (vec_uint4)spu_cmpgt( spu_splats(_VECTORMATH_SLERP_TOL), cosAngle );
 228     angle = acosf4( cosAngle );
 229     tttt = spu_splats(t);
 230     oneMinusT = spu_sub( spu_splats(1.0f), tttt );
 231     angles = spu_sel( spu_splats(1.0f), oneMinusT, (vec_uint4)spu_maskb(0x0f00) );
 232     angles = spu_sel( angles, tttt, (vec_uint4)spu_maskb(0x00f0) );
 233     angles = spu_mul( angles, angle );
 234     sines = sinf4( angles );
 235     scales = divf4( sines, spu_shuffle( sines, sines, shuffle_xxxx ) );
 236     scale0 = spu_sel( oneMinusT, spu_shuffle( scales, scales, shuffle_yyyy ), selectMask );
 237     scale1 = spu_sel( tttt, spu_shuffle( scales, scales, shuffle_zzzz ), selectMask );
 238     return Vector3( spu_madd( unitVec0.get128(), scale0, spu_mul( unitVec1.get128(), scale1 ) ) );
 239 }
 240
 241 inline vec_float4 Vector3::get128( ) const
 242 {
 243     return mVec128;
 244 }
 245
 246 inline void storeXYZ( Vector3 vec, vec_float4 * quad )
 247 {
 248     vec_float4 dstVec = *quad;
 249     vec_uint4 mask = (vec_uint4)spu_maskb(0x000f);
 250     dstVec = spu_sel(vec.get128(), dstVec, mask);
 251     *quad = dstVec;
 252 }
 253
 254 inline void loadXYZArray( Vector3 & vec0, Vector3 & vec1, Vector3 & vec2, Vector3 & vec3, const vec_float4 * threeQuads )
 255 {
 256     vec_float4 xyzx, yzxy, zxyz, xyz1, xyz2, xyz3;
 257     xyzx = threeQuads[0];
 258     yzxy = threeQuads[1];
 259     zxyz = threeQuads[2];
 260     xyz1 = spu_shuffle( xyzx, yzxy, _VECTORMATH_SHUF_WABC );
 261     xyz2 = spu_shuffle( yzxy, zxyz, _VECTORMATH_SHUF_ZWAB );
 262     xyz3 = spu_rlqwbyte( zxyz, 4 );
 263     vec0 = Vector3( xyzx );
 264     vec1 = Vector3( xyz1 );
 265     vec2 = Vector3( xyz2 );
 266     vec3 = Vector3( xyz3 );
 267 }
 268
 269 inline void storeXYZArray( Vector3 vec0, Vector3 vec1, Vector3 vec2, Vector3 vec3, vec_float4 * threeQuads )
 270 {
 271     vec_float4 xyzx, yzxy, zxyz;
 272     xyzx = spu_shuffle( vec0.get128(), vec1.get128(), _VECTORMATH_SHUF_XYZA );
 273     yzxy = spu_shuffle( vec1.get128(), vec2.get128(), _VECTORMATH_SHUF_YZAB );
 274     zxyz = spu_shuffle( vec2.get128(), vec3.get128(), _VECTORMATH_SHUF_ZABC );
 275     threeQuads[0] = xyzx;
 276     threeQuads[1] = yzxy;
 277     threeQuads[2] = zxyz;
 278 }
 279
 280 inline void storeHalfFloats( Vector3 vec0, Vector3 vec1, Vector3 vec2, Vector3 vec3, Vector3 vec4, Vector3 vec5, Vector3 vec6, Vector3 vec7, vec_ushort8 * threeQuads )
 281 {
 282     vec_float4 xyz0[3];
 283     vec_float4 xyz1[3];
 284     storeXYZArray( vec0, vec1, vec2, vec3, xyz0 );
 285     storeXYZArray( vec4, vec5, vec6, vec7, xyz1 );
 286     threeQuads[0] = _vmath2VfToHalfFloats(xyz0[0], xyz0[1]);
 287     threeQuads[1] = _vmath2VfToHalfFloats(xyz0[2], xyz1[0]);
 288     threeQuads[2] = _vmath2VfToHalfFloats(xyz1[1], xyz1[2]);
 289 }
 290
 291 inline Vector3 & Vector3::operator =( Vector3 vec )
 292 {
 293     mVec128 = vec.mVec128;
 294     return *this;
 295 }
 296
 297 inline Vector3 & Vector3::setX( float _x )
 298 {
 299     mVec128 = spu_insert( _x, mVec128, 0 );
 300     return *this;
 301 }
 302
 303 inline float Vector3::getX( ) const
 304 {
 305     return spu_extract( mVec128, 0 );
 306 }
 307
 308 inline Vector3 & Vector3::setY( float _y )
 309 {
 310     mVec128 = spu_insert( _y, mVec128, 1 );
 311     return *this;
 312 }
 313
 314 inline float Vector3::getY( ) const
 315 {
 316     return spu_extract( mVec128, 1 );
 317 }
 318
 319 inline Vector3 & Vector3::setZ( float _z )
 320 {
 321     mVec128 = spu_insert( _z, mVec128, 2 );
 322     return *this;
 323 }
 324
 325 inline float Vector3::getZ( ) const
 326 {
 327     return spu_extract( mVec128, 2 );
 328 }
 329
 330 inline Vector3 & Vector3::setElem( int idx, float value )
 331 {
 332     mVec128 = spu_insert( value, mVec128, idx );
 333     return *this;
 334 }
 335
 336 inline float Vector3::getElem( int idx ) const
 337 {
 338     return spu_extract( mVec128, idx );
 339 }
 340
 341 inline VecIdx Vector3::operator []( int idx )
 342 {
 343     return VecIdx( mVec128, idx );
 344 }
 345
 346 inline float Vector3::operator []( int idx ) const
 347 {
 348     return spu_extract( mVec128, idx );
 349 }
 350
 351 inline const Vector3 Vector3::operator +( Vector3 vec ) const
 352 {
 353     return Vector3( spu_add( mVec128, vec.mVec128 ) );
 354 }
 355
 356 inline const Vector3 Vector3::operator -( Vector3 vec ) const
 357 {
 358     return Vector3( spu_sub( mVec128, vec.mVec128 ) );
 359 }
 360
 361 inline const Point3 Vector3::operator +( Point3 pnt ) const
 362 {
 363     return Point3( spu_add( mVec128, pnt.get128() ) );
 364 }
 365
 366 inline const Vector3 Vector3::operator *( float scalar ) const
 367 {
 368     return Vector3( spu_mul( mVec128, spu_splats(scalar) ) );
 369 }
 370
 371 inline Vector3 & Vector3::operator +=( Vector3 vec )
 372 {
 373     *this = *this + vec;
 374     return *this;
 375 }
 376
 377 inline Vector3 & Vector3::operator -=( Vector3 vec )
 378 {
 379     *this = *this - vec;
 380     return *this;
 381 }
 382
 383 inline Vector3 & Vector3::operator *=( float scalar )
 384 {
 385     *this = *this * scalar;
 386     return *this;
 387 }
 388
 389 inline const Vector3 Vector3::operator /( float scalar ) const
 390 {
 391     return Vector3( divf4( mVec128, spu_splats(scalar) ) );
 392 }
 393
 394 inline Vector3 & Vector3::operator /=( float scalar )
 395 {
 396     *this = *this / scalar;
 397     return *this;
 398 }
 399
 400 inline const Vector3 Vector3::operator -( ) const
 401 {
 402     return Vector3( negatef4( mVec128 ) );
 403 }
 404
 405 inline const Vector3 operator *( float scalar, Vector3 vec )
 406 {
 407     return vec * scalar;
 408 }
 409
 410 inline const Vector3 mulPerElem( Vector3 vec0, Vector3 vec1 )
 411 {
 412     return Vector3( spu_mul( vec0.get128(), vec1.get128() ) );
 413 }
 414
 415 inline const Vector3 divPerElem( Vector3 vec0, Vector3 vec1 )
 416 {
 417     return Vector3( divf4( vec0.get128(), vec1.get128() ) );
 418 }
 419
 420 inline const Vector3 recipPerElem( Vector3 vec )
 421 {
 422     return Vector3( recipf4( vec.get128() ) );
 423 }
 424
 425 inline const Vector3 sqrtPerElem( Vector3 vec )
 426 {
 427     return Vector3( sqrtf4( vec.get128() ) );
 428 }
 429
 430 inline const Vector3 rsqrtPerElem( Vector3 vec )
 431 {
 432     return Vector3( rsqrtf4( vec.get128() ) );
 433 }
 434
 435 inline const Vector3 absPerElem( Vector3 vec )
 436 {
 437     return Vector3( fabsf4( vec.get128() ) );
 438 }
 439
 440 inline const Vector3 copySignPerElem( Vector3 vec0, Vector3 vec1 )
 441 {
 442     return Vector3( copysignf4( vec0.get128(), vec1.get128() ) );
 443 }
 444
 445 inline const Vector3 maxPerElem( Vector3 vec0, Vector3 vec1 )
 446 {
 447     return Vector3( fmaxf4( vec0.get128(), vec1.get128() ) );
 448 }
 449
 450 inline float maxElem( Vector3 vec )
 451 {
 452     vec_float4 result;
 453     result = fmaxf4( spu_promote( spu_extract( vec.get128(), 1 ), 0 ), vec.get128() );
 454     result = fmaxf4( spu_promote( spu_extract( vec.get128(), 2 ), 0 ), result );
 455     return spu_extract( result, 0 );
 456 }
 457
 458 inline const Vector3 minPerElem( Vector3 vec0, Vector3 vec1 )
 459 {
 460     return Vector3( fminf4( vec0.get128(), vec1.get128() ) );
 461 }
 462
 463 inline float minElem( Vector3 vec )
 464 {
 465     vec_float4 result;
 466     result = fminf4( spu_promote( spu_extract( vec.get128(), 1 ), 0 ), vec.get128() );
 467     result = fminf4( spu_promote( spu_extract( vec.get128(), 2 ), 0 ), result );
 468     return spu_extract( result, 0 );
 469 }
 470
 471 inline float sum( Vector3 vec )
 472 {
 473     return
 474         spu_extract( vec.get128(), 0 ) +
 475         spu_extract( vec.get128(), 1 ) +
 476         spu_extract( vec.get128(), 2 );
 477 }
 478
 479 inline float dot( Vector3 vec0, Vector3 vec1 )
 480 {
 481     return spu_extract( _vmathVfDot3( vec0.get128(), vec1.get128() ), 0 );
 482 }
 483
 484 inline float lengthSqr( Vector3 vec )
 485 {
 486     return spu_extract( _vmathVfDot3( vec.get128(), vec.get128() ), 0 );
 487 }
 488
 489 inline float length( Vector3 vec )
 490 {
 491     return sqrtf( lengthSqr( vec ) );
 492 }
 493
 494 inline const Vector3 normalize( Vector3 vec )
 495 {
 496     vec_float4 dot = _vmathVfDot3( vec.get128(), vec.get128() );
 497     dot = spu_shuffle( dot, dot, (vec_uchar16)spu_splats(0x00010203) );
 498     return Vector3( spu_mul( vec.get128(), rsqrtf4( dot ) ) );
 499 }
 500
 501 inline const Vector3 cross( Vector3 vec0, Vector3 vec1 )
 502 {
 503     return Vector3( _vmathVfCross( vec0.get128(), vec1.get128() ) );
 504 }
 505
 506 inline const Vector3 select( Vector3 vec0, Vector3 vec1, bool select1 )
 507 {
 508     return Vector3( spu_sel( vec0.get128(), vec1.get128(), spu_splats( (unsigned int)-(select1 > 0) ) ) );
 509 }
 510
 511 #ifdef _VECTORMATH_DEBUG
 512
 513 inline void print( Vector3 vec )
 514 {
 515     union { vec_float4 v; float s[4]; } tmp;
 516     tmp.v = vec.get128();
 517     printf( "( %f %f %f )\n", tmp.s[0], tmp.s[1], tmp.s[2] );
 518 }
 519
 520 inline void print( Vector3 vec, const char * name )
 521 {
 522     union { vec_float4 v; float s[4]; } tmp;
 523     tmp.v = vec.get128();
 524     printf( "%s: ( %f %f %f )\n", name, tmp.s[0], tmp.s[1], tmp.s[2] );
 525 }
 526
 527 #endif
 528
 529 inline Vector4::Vector4( float _x, float _y, float _z, float _w )
 530 {
 531     mVec128 = (vec_float4){ _x, _y, _z, _w };
 532 }
 533
 534 inline Vector4::Vector4( Vector3 xyz, float _w )
 535 {
 536     mVec128 = spu_shuffle( xyz.get128(), spu_promote( _w, 0 ), _VECTORMATH_SHUF_XYZA );
 537 }
 538
 539 inline Vector4::Vector4( Vector3 vec )
 540 {
 541     mVec128 = spu_sel( vec.get128(), spu_splats(0.0f), (vec_uint4)spu_maskb(0x000f) );
 542 }
 543
 544 inline Vector4::Vector4( Point3 pnt )
 545 {
 546     mVec128 = spu_sel( pnt.get128(), spu_splats(1.0f), (vec_uint4)spu_maskb(0x000f) );
 547 }
 548
 549 inline Vector4::Vector4( Quat quat )
 550 {
 551     mVec128 = quat.get128();
 552 }
 553
 554 inline Vector4::Vector4( float scalar )
 555 {
 556     mVec128 = spu_splats( scalar );
 557 }
 558
 559 inline Vector4::Vector4( vec_float4 vf4 )
 560 {
 561     mVec128 = vf4;
 562 }
 563
 564 inline const Vector4 Vector4::xAxis( )
 565 {
 566     return Vector4( _VECTORMATH_UNIT_1000 );
 567 }
 568
 569 inline const Vector4 Vector4::yAxis( )
 570 {
 571     return Vector4( _VECTORMATH_UNIT_0100 );
 572 }
 573
 574 inline const Vector4 Vector4::zAxis( )
 575 {
 576     return Vector4( _VECTORMATH_UNIT_0010 );
 577 }
 578
 579 inline const Vector4 Vector4::wAxis( )
 580 {
 581     return Vector4( _VECTORMATH_UNIT_0001 );
 582 }
 583
 584 inline const Vector4 lerp( float t, Vector4 vec0, Vector4 vec1 )
 585 {
 586     return ( vec0 + ( ( vec1 - vec0 ) * t ) );
 587 }
 588
 589 inline const Vector4 slerp( float t, Vector4 unitVec0, Vector4 unitVec1 )
 590 {
 591     vec_float4 scales, scale0, scale1, cosAngle, angle, tttt, oneMinusT, angles, sines;
 592     vec_uint4 selectMask;
 593     vec_uchar16 shuffle_xxxx = (vec_uchar16)spu_splats((int)0x00010203);
 594     vec_uchar16 shuffle_yyyy = (vec_uchar16)spu_splats((int)0x04050607);
 595     vec_uchar16 shuffle_zzzz = (vec_uchar16)spu_splats((int)0x08090a0b);
 596     cosAngle = _vmathVfDot4( unitVec0.get128(), unitVec1.get128() );
 597     cosAngle = spu_shuffle( cosAngle, cosAngle, shuffle_xxxx );
 598     selectMask = (vec_uint4)spu_cmpgt( spu_splats(_VECTORMATH_SLERP_TOL), cosAngle );
 599     angle = acosf4( cosAngle );
 600     tttt = spu_splats(t);
 601     oneMinusT = spu_sub( spu_splats(1.0f), tttt );
 602     angles = spu_sel( spu_splats(1.0f), oneMinusT, (vec_uint4)spu_maskb(0x0f00) );
 603     angles = spu_sel( angles, tttt, (vec_uint4)spu_maskb(0x00f0) );
 604     angles = spu_mul( angles, angle );
 605     sines = sinf4( angles );
 606     scales = divf4( sines, spu_shuffle( sines, sines, shuffle_xxxx ) );
 607     scale0 = spu_sel( oneMinusT, spu_shuffle( scales, scales, shuffle_yyyy ), selectMask );
 608     scale1 = spu_sel( tttt, spu_shuffle( scales, scales, shuffle_zzzz ), selectMask );
 609     return Vector4( spu_madd( unitVec0.get128(), scale0, spu_mul( unitVec1.get128(), scale1 ) ) );
 610 }
 611
 612 inline vec_float4 Vector4::get128( ) const
 613 {
 614     return mVec128;
 615 }
 616
 617 inline void storeHalfFloats( Vector4 vec0, Vector4 vec1, Vector4 vec2, Vector4 vec3, vec_ushort8 * twoQuads )
 618 {
 619     twoQuads[0] = _vmath2VfToHalfFloats(vec0.get128(), vec1.get128());
 620     twoQuads[1] = _vmath2VfToHalfFloats(vec2.get128(), vec3.get128());
 621 }
 622
 623 inline Vector4 & Vector4::operator =( Vector4 vec )
 624 {
 625     mVec128 = vec.mVec128;
 626     return *this;
 627 }
 628
 629 inline Vector4 & Vector4::setXYZ( Vector3 vec )
 630 {
 631     mVec128 = spu_sel( vec.get128(), mVec128, (vec_uint4)spu_maskb(0x000f) );
 632     return *this;
 633 }
 634
 635 inline const Vector3 Vector4::getXYZ( ) const
 636 {
 637     return Vector3( mVec128 );
 638 }
 639
 640 inline Vector4 & Vector4::setX( float _x )
 641 {
 642     mVec128 = spu_insert( _x, mVec128, 0 );
 643     return *this;
 644 }
 645
 646 inline float Vector4::getX( ) const
 647 {
 648     return spu_extract( mVec128, 0 );
 649 }
 650
 651 inline Vector4 & Vector4::setY( float _y )
 652 {
 653     mVec128 = spu_insert( _y, mVec128, 1 );
 654     return *this;
 655 }
 656
 657 inline float Vector4::getY( ) const
 658 {
 659     return spu_extract( mVec128, 1 );
 660 }
 661
 662 inline Vector4 & Vector4::setZ( float _z )
 663 {
 664     mVec128 = spu_insert( _z, mVec128, 2 );
 665     return *this;
 666 }
 667
 668 inline float Vector4::getZ( ) const
 669 {
 670     return spu_extract( mVec128, 2 );
 671 }
 672
 673 inline Vector4 & Vector4::setW( float _w )
 674 {
 675     mVec128 = spu_insert( _w, mVec128, 3 );
 676     return *this;
 677 }
 678
 679 inline float Vector4::getW( ) const
 680 {
 681     return spu_extract( mVec128, 3 );
 682 }
 683
 684 inline Vector4 & Vector4::setElem( int idx, float value )
 685 {
 686     mVec128 = spu_insert( value, mVec128, idx );
 687     return *this;
 688 }
 689
 690 inline float Vector4::getElem( int idx ) const
 691 {
 692     return spu_extract( mVec128, idx );
 693 }
 694
 695 inline VecIdx Vector4::operator []( int idx )
 696 {
 697     return VecIdx( mVec128, idx );
 698 }
 699
 700 inline float Vector4::operator []( int idx ) const
 701 {
 702     return spu_extract( mVec128, idx );
 703 }
 704
 705 inline const Vector4 Vector4::operator +( Vector4 vec ) const
 706 {
 707     return Vector4( spu_add( mVec128, vec.mVec128 ) );
 708 }
 709
 710 inline const Vector4 Vector4::operator -( Vector4 vec ) const
 711 {
 712     return Vector4( spu_sub( mVec128, vec.mVec128 ) );
 713 }
 714
 715 inline const Vector4 Vector4::operator *( float scalar ) const
 716 {
 717     return Vector4( spu_mul( mVec128, spu_splats(scalar) ) );
 718 }
 719
 720 inline Vector4 & Vector4::operator +=( Vector4 vec )
 721 {
 722     *this = *this + vec;
 723     return *this;
 724 }
 725
 726 inline Vector4 & Vector4::operator -=( Vector4 vec )
 727 {
 728     *this = *this - vec;
 729     return *this;
 730 }
 731
 732 inline Vector4 & Vector4::operator *=( float scalar )
 733 {
 734     *this = *this * scalar;
 735     return *this;
 736 }
 737
 738 inline const Vector4 Vector4::operator /( float scalar ) const
 739 {
 740     return Vector4( divf4( mVec128, spu_splats(scalar) ) );
 741 }
 742
 743 inline Vector4 & Vector4::operator /=( float scalar )
 744 {
 745     *this = *this / scalar;
 746     return *this;
 747 }
 748
 749 inline const Vector4 Vector4::operator -( ) const
 750 {
 751     return Vector4( negatef4( mVec128 ) );
 752 }
 753
 754 inline const Vector4 operator *( float scalar, Vector4 vec )
 755 {
 756     return vec * scalar;
 757 }
 758
 759 inline const Vector4 mulPerElem( Vector4 vec0, Vector4 vec1 )
 760 {
 761     return Vector4( spu_mul( vec0.get128(), vec1.get128() ) );
 762 }
 763
 764 inline const Vector4 divPerElem( Vector4 vec0, Vector4 vec1 )
 765 {
 766     return Vector4( divf4( vec0.get128(), vec1.get128() ) );
 767 }
 768
 769 inline const Vector4 recipPerElem( Vector4 vec )
 770 {
 771     return Vector4( recipf4( vec.get128() ) );
 772 }
 773
 774 inline const Vector4 sqrtPerElem( Vector4 vec )
 775 {
 776     return Vector4( sqrtf4( vec.get128() ) );
 777 }
 778
 779 inline const Vector4 rsqrtPerElem( Vector4 vec )
 780 {
 781     return Vector4( rsqrtf4( vec.get128() ) );
 782 }
 783
 784 inline const Vector4 absPerElem( Vector4 vec )
 785 {
 786     return Vector4( fabsf4( vec.get128() ) );
 787 }
 788
 789 inline const Vector4 copySignPerElem( Vector4 vec0, Vector4 vec1 )
 790 {
 791     return Vector4( copysignf4( vec0.get128(), vec1.get128() ) );
 792 }
 793
 794 inline const Vector4 maxPerElem( Vector4 vec0, Vector4 vec1 )
 795 {
 796     return Vector4( fmaxf4( vec0.get128(), vec1.get128() ) );
 797 }
 798
 799 inline float maxElem( Vector4 vec )
 800 {
 801     vec_float4 result;
 802     result = fmaxf4( spu_promote( spu_extract( vec.get128(), 1 ), 0 ), vec.get128() );
 803     result = fmaxf4( spu_promote( spu_extract( vec.get128(), 2 ), 0 ), result );
 804     result = fmaxf4( spu_promote( spu_extract( vec.get128(), 3 ), 0 ), result );
 805     return spu_extract( result, 0 );
 806 }
 807
 808 inline const Vector4 minPerElem( Vector4 vec0, Vector4 vec1 )
 809 {
 810     return Vector4( fminf4( vec0.get128(), vec1.get128() ) );
 811 }
 812
 813 inline float minElem( Vector4 vec )
 814 {
 815     vec_float4 result;
 816     result = fminf4( spu_promote( spu_extract( vec.get128(), 1 ), 0 ), vec.get128() );
 817     result = fminf4( spu_promote( spu_extract( vec.get128(), 2 ), 0 ), result );
 818     result = fminf4( spu_promote( spu_extract( vec.get128(), 3 ), 0 ), result );
 819     return spu_extract( result, 0 );
 820 }
 821
 822 inline float sum( Vector4 vec )
 823 {
 824     return
 825         spu_extract( vec.get128(), 0 ) +
 826         spu_extract( vec.get128(), 1 ) +
 827         spu_extract( vec.get128(), 2 ) +
 828         spu_extract( vec.get128(), 3 );
 829 }
 830
 831 inline float dot( Vector4 vec0, Vector4 vec1 )
 832 {
 833     return spu_extract( _vmathVfDot4( vec0.get128(), vec1.get128() ), 0 );
 834 }
 835
 836 inline float lengthSqr( Vector4 vec )
 837 {
 838     return spu_extract( _vmathVfDot4( vec.get128(), vec.get128() ), 0 );
 839 }
 840
 841 inline float length( Vector4 vec )
 842 {
 843     return sqrtf( lengthSqr( vec ) );
 844 }
 845
 846 inline const Vector4 normalize( Vector4 vec )
 847 {
 848     vec_float4 dot = _vmathVfDot4( vec.get128(), vec.get128() );
 849     return Vector4( spu_mul( vec.get128(), rsqrtf4( dot ) ) );
 850 }
 851
 852 inline const Vector4 select( Vector4 vec0, Vector4 vec1, bool select1 )
 853 {
 854     return Vector4( spu_sel( vec0.get128(), vec1.get128(), spu_splats( (unsigned int)-(select1 > 0) ) ) );
 855 }
 856
 857 #ifdef _VECTORMATH_DEBUG
 858
 859 inline void print( Vector4 vec )
 860 {
 861     union { vec_float4 v; float s[4]; } tmp;
 862     tmp.v = vec.get128();
 863     printf( "( %f %f %f %f )\n", tmp.s[0], tmp.s[1], tmp.s[2], tmp.s[3] );
 864 }
 865
 866 inline void print( Vector4 vec, const char * name )
 867 {
 868     union { vec_float4 v; float s[4]; } tmp;
 869     tmp.v = vec.get128();
 870     printf( "%s: ( %f %f %f %f )\n", name, tmp.s[0], tmp.s[1], tmp.s[2], tmp.s[3] );
 871 }
 872
 873 #endif
 874
 875 inline Point3::Point3( float _x, float _y, float _z )
 876 {
 877     mVec128 = (vec_float4){ _x, _y, _z, 0.0f  };
 878 }
 879
 880 inline Point3::Point3( Vector3 vec )
 881 {
 882     mVec128 = vec.get128();
 883 }
 884
 885 inline Point3::Point3( float scalar )
 886 {
 887     mVec128 = spu_splats( scalar );
 888 }
 889
 890 inline Point3::Point3( vec_float4 vf4 )
 891 {
 892     mVec128 = vf4;
 893 }
 894
 895 inline const Point3 lerp( float t, Point3 pnt0, Point3 pnt1 )
 896 {
 897     return ( pnt0 + ( ( pnt1 - pnt0 ) * t ) );
 898 }
 899
 900 inline vec_float4 Point3::get128( ) const
 901 {
 902     return mVec128;
 903 }
 904
 905 inline void storeXYZ( Point3 pnt, vec_float4 * quad )
 906 {
 907     vec_float4 dstVec = *quad;
 908     vec_uint4 mask = (vec_uint4)spu_maskb(0x000f);
 909     dstVec = spu_sel(pnt.get128(), dstVec, mask);
 910     *quad = dstVec;
 911 }
 912
 913 inline void loadXYZArray( Point3 & pnt0, Point3 & pnt1, Point3 & pnt2, Point3 & pnt3, const vec_float4 * threeQuads )
 914 {
 915     vec_float4 xyzx, yzxy, zxyz, xyz1, xyz2, xyz3;
 916     xyzx = threeQuads[0];
 917     yzxy = threeQuads[1];
 918     zxyz = threeQuads[2];
 919     xyz1 = spu_shuffle( xyzx, yzxy, _VECTORMATH_SHUF_WABC );
 920     xyz2 = spu_shuffle( yzxy, zxyz, _VECTORMATH_SHUF_ZWAB );
 921     xyz3 = spu_rlqwbyte( zxyz, 4 );
 922     pnt0 = Point3( xyzx );
 923     pnt1 = Point3( xyz1 );
 924     pnt2 = Point3( xyz2 );
 925     pnt3 = Point3( xyz3 );
 926 }
 927
 928 inline void storeXYZArray( Point3 pnt0, Point3 pnt1, Point3 pnt2, Point3 pnt3, vec_float4 * threeQuads )
 929 {
 930     vec_float4 xyzx, yzxy, zxyz;
 931     xyzx = spu_shuffle( pnt0.get128(), pnt1.get128(), _VECTORMATH_SHUF_XYZA );
 932     yzxy = spu_shuffle( pnt1.get128(), pnt2.get128(), _VECTORMATH_SHUF_YZAB );
 933     zxyz = spu_shuffle( pnt2.get128(), pnt3.get128(), _VECTORMATH_SHUF_ZABC );
 934     threeQuads[0] = xyzx;
 935     threeQuads[1] = yzxy;
 936     threeQuads[2] = zxyz;
 937 }
 938
 939 inline void storeHalfFloats( Point3 pnt0, Point3 pnt1, Point3 pnt2, Point3 pnt3, Point3 pnt4, Point3 pnt5, Point3 pnt6, Point3 pnt7, vec_ushort8 * threeQuads )
 940 {
 941     vec_float4 xyz0[3];
 942     vec_float4 xyz1[3];
 943     storeXYZArray( pnt0, pnt1, pnt2, pnt3, xyz0 );
 944     storeXYZArray( pnt4, pnt5, pnt6, pnt7, xyz1 );
 945     threeQuads[0] = _vmath2VfToHalfFloats(xyz0[0], xyz0[1]);
 946     threeQuads[1] = _vmath2VfToHalfFloats(xyz0[2], xyz1[0]);
 947     threeQuads[2] = _vmath2VfToHalfFloats(xyz1[1], xyz1[2]);
 948 }
 949
 950 inline Point3 & Point3::operator =( Point3 pnt )
 951 {
 952     mVec128 = pnt.mVec128;
 953     return *this;
 954 }
 955
 956 inline Point3 & Point3::setX( float _x )
 957 {
 958     mVec128 = spu_insert( _x, mVec128, 0 );
 959     return *this;
 960 }
 961
 962 inline float Point3::getX( ) const
 963 {
 964     return spu_extract( mVec128, 0 );
 965 }
 966
 967 inline Point3 & Point3::setY( float _y )
 968 {
 969     mVec128 = spu_insert( _y, mVec128, 1 );
 970     return *this;
 971 }
 972
 973 inline float Point3::getY( ) const
 974 {
 975     return spu_extract( mVec128, 1 );
 976 }
 977
 978 inline Point3 & Point3::setZ( float _z )
 979 {
 980     mVec128 = spu_insert( _z, mVec128, 2 );
 981     return *this;
 982 }
 983
 984 inline float Point3::getZ( ) const
 985 {
 986     return spu_extract( mVec128, 2 );
 987 }
 988
 989 inline Point3 & Point3::setElem( int idx, float value )
 990 {
 991     mVec128 = spu_insert( value, mVec128, idx );
 992     return *this;
 993 }
 994
 995 inline float Point3::getElem( int idx ) const
 996 {
 997     return spu_extract( mVec128, idx );
 998 }
 999
1000 inline VecIdx Point3::operator []( int idx )
1001 {
1002     return VecIdx( mVec128, idx );
1003 }
1004
1005 inline float Point3::operator []( int idx ) const
1006 {
1007     return spu_extract( mVec128, idx );
1008 }
1009
1010 inline const Vector3 Point3::operator -( Point3 pnt ) const
1011 {
1012     return Vector3( spu_sub( mVec128, pnt.mVec128 ) );
1013 }
1014
1015 inline const Point3 Point3::operator +( Vector3 vec ) const
1016 {
1017     return Point3( spu_add( mVec128, vec.get128() ) );
1018 }
1019
1020 inline const Point3 Point3::operator -( Vector3 vec ) const
1021 {
1022     return Point3( spu_sub( mVec128, vec.get128() ) );
1023 }
1024
1025 inline Point3 & Point3::operator +=( Vector3 vec )
1026 {
1027     *this = *this + vec;
1028     return *this;
1029 }
1030
1031 inline Point3 & Point3::operator -=( Vector3 vec )
1032 {
1033     *this = *this - vec;
1034     return *this;
1035 }
1036
1037 inline const Point3 mulPerElem( Point3 pnt0, Point3 pnt1 )
1038 {
1039     return Point3( spu_mul( pnt0.get128(), pnt1.get128() ) );
1040 }
1041
1042 inline const Point3 divPerElem( Point3 pnt0, Point3 pnt1 )
1043 {
1044     return Point3( divf4( pnt0.get128(), pnt1.get128() ) );
1045 }
1046
1047 inline const Point3 recipPerElem( Point3 pnt )
1048 {
1049     return Point3( recipf4( pnt.get128() ) );
1050 }
1051
1052 inline const Point3 sqrtPerElem( Point3 pnt )
1053 {
1054     return Point3( sqrtf4( pnt.get128() ) );
1055 }
1056
1057 inline const Point3 rsqrtPerElem( Point3 pnt )
1058 {
1059     return Point3( rsqrtf4( pnt.get128() ) );
1060 }
1061
1062 inline const Point3 absPerElem( Point3 pnt )
1063 {
1064     return Point3( fabsf4( pnt.get128() ) );
1065 }
1066
1067 inline const Point3 copySignPerElem( Point3 pnt0, Point3 pnt1 )
1068 {
1069     return Point3( copysignf4( pnt0.get128(), pnt1.get128() ) );
1070 }
1071
1072 inline const Point3 maxPerElem( Point3 pnt0, Point3 pnt1 )
1073 {
1074     return Point3( fmaxf4( pnt0.get128(), pnt1.get128() ) );
1075 }
1076
1077 inline float maxElem( Point3 pnt )
1078 {
1079     vec_float4 result;
1080     result = fmaxf4( spu_promote( spu_extract( pnt.get128(), 1 ), 0 ), pnt.get128() );
1081     result = fmaxf4( spu_promote( spu_extract( pnt.get128(), 2 ), 0 ), result );
1082     return spu_extract( result, 0 );
1083 }
1084
1085 inline const Point3 minPerElem( Point3 pnt0, Point3 pnt1 )
1086 {
1087     return Point3( fminf4( pnt0.get128(), pnt1.get128() ) );
1088 }
1089
1090 inline float minElem( Point3 pnt )
1091 {
1092     vec_float4 result;
1093     result = fminf4( spu_promote( spu_extract( pnt.get128(), 1 ), 0 ), pnt.get128() );
1094     result = fminf4( spu_promote( spu_extract( pnt.get128(), 2 ), 0 ), result );
1095     return spu_extract( result, 0 );
1096 }
1097
1098 inline float sum( Point3 pnt )
1099 {
1100     return
1101         spu_extract( pnt.get128(), 0 ) +
1102         spu_extract( pnt.get128(), 1 ) +
1103         spu_extract( pnt.get128(), 2 );
1104 }
1105
1106 inline const Point3 scale( Point3 pnt, float scaleVal )
1107 {
1108     return mulPerElem( pnt, Point3( scaleVal ) );
1109 }
1110
1111 inline const Point3 scale( Point3 pnt, Vector3 scaleVec )
1112 {
1113     return mulPerElem( pnt, Point3( scaleVec ) );
1114 }
1115
1116 inline float projection( Point3 pnt, Vector3 unitVec )
1117 {
1118     return spu_extract( _vmathVfDot3( pnt.get128(), unitVec.get128() ), 0 );
1119 }
1120
1121 inline float distSqrFromOrigin( Point3 pnt )
1122 {
1123     return lengthSqr( Vector3( pnt ) );
1124 }
1125
1126 inline float distFromOrigin( Point3 pnt )
1127 {
1128     return length( Vector3( pnt ) );
1129 }
1130
1131 inline float distSqr( Point3 pnt0, Point3 pnt1 )
1132 {
1133     return lengthSqr( ( pnt1 - pnt0 ) );
1134 }
1135
1136 inline float dist( Point3 pnt0, Point3 pnt1 )
1137 {
1138     return length( ( pnt1 - pnt0 ) );
1139 }
1140
1141 inline const Point3 select( Point3 pnt0, Point3 pnt1, bool select1 )
1142 {
1143     return Point3( spu_sel( pnt0.get128(), pnt1.get128(), spu_splats( (unsigned int)-(select1 > 0) ) ) );
1144 }
1145
1146 #ifdef _VECTORMATH_DEBUG
1147
1148 inline void print( Point3 pnt )
1149 {
1150     union { vec_float4 v; float s[4]; } tmp;
1151     tmp.v = pnt.get128();
1152     printf( "( %f %f %f )\n", tmp.s[0], tmp.s[1], tmp.s[2] );
1153 }
1154
1155 inline void print( Point3 pnt, const char * name )
1156 {
1157     union { vec_float4 v; float s[4]; } tmp;
1158     tmp.v = pnt.get128();
1159     printf( "%s: ( %f %f %f )\n", name, tmp.s[0], tmp.s[1], tmp.s[2] );
1160 }
1161
1162 #endif
1163
1164 } // namespace Aos
1165 } // namespace Vectormath
1166
1167 #endif