Extras/vectormathlibrary/include/vectormath/spu/c/vec_aos.h

   1 /*
   2    Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
   3    All rights reserved.
   4
   5    Redistribution and use in source and binary forms,
   6    with or without modification, are permitted provided that the
   7    following conditions are met:
   8     * Redistributions of source code must retain the above copyright
   9       notice, this list of conditions and the following disclaimer.
  10     * Redistributions in binary form must reproduce the above copyright
  11       notice, this list of conditions and the following disclaimer in the
  12       documentation and/or other materials provided with the distribution.
  13     * Neither the name of the Sony Computer Entertainment Inc nor the names
  14       of its contributors may be used to endorse or promote products derived
  15       from this software without specific prior written permission.
  16
  17    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  18    AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  19    IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  20    ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  21    LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  22    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  23    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  24    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  25    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  26    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  27    POSSIBILITY OF SUCH DAMAGE.
  28 */
  29
  30 #ifndef _VECTORMATH_VEC_AOS_C_H
  31 #define _VECTORMATH_VEC_AOS_C_H
  32 #ifdef __cplusplus
  33 extern "C" {
  34 #endif /* __cplusplus */
  35
  36 /*-----------------------------------------------------------------------------
  37  * Constants
  38  * for shuffles, words are labeled [x,y,z,w] [a,b,c,d]
  39  */
  40 #define _VECTORMATH_SHUF_X 0x00010203
  41 #define _VECTORMATH_SHUF_Y 0x04050607
  42 #define _VECTORMATH_SHUF_Z 0x08090a0b
  43 #define _VECTORMATH_SHUF_W 0x0c0d0e0f
  44 #define _VECTORMATH_SHUF_A 0x10111213
  45 #define _VECTORMATH_SHUF_B 0x14151617
  46 #define _VECTORMATH_SHUF_C 0x18191a1b
  47 #define _VECTORMATH_SHUF_D 0x1c1d1e1f
  48 #define _VECTORMATH_SHUF_0 0x80808080
  49 #define _VECTORMATH_SHUF_XYZA (vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_A }
  50 #define _VECTORMATH_SHUF_ZXYW (vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_W }
  51 #define _VECTORMATH_SHUF_YZXW (vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_W }
  52 #define _VECTORMATH_SHUF_WABC (vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_W, _VECTORMATH_SHUF_A, _VECTORMATH_SHUF_B, _VECTORMATH_SHUF_C }
  53 #define _VECTORMATH_SHUF_ZWAB (vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_W, _VECTORMATH_SHUF_A, _VECTORMATH_SHUF_B }
  54 #define _VECTORMATH_SHUF_XYZA (vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_A }
  55 #define _VECTORMATH_SHUF_YZAB (vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_A, _VECTORMATH_SHUF_B }
  56 #define _VECTORMATH_SHUF_ZABC (vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_A, _VECTORMATH_SHUF_B, _VECTORMATH_SHUF_C }
  57 #define _VECTORMATH_UNIT_1000 (vec_float4){ 1.0f, 0.0f, 0.0f, 0.0f }
  58 #define _VECTORMATH_UNIT_0100 (vec_float4){ 0.0f, 1.0f, 0.0f, 0.0f }
  59 #define _VECTORMATH_UNIT_0010 (vec_float4){ 0.0f, 0.0f, 1.0f, 0.0f }
  60 #define _VECTORMATH_UNIT_0001 (vec_float4){ 0.0f, 0.0f, 0.0f, 1.0f }
  61 #define _VECTORMATH_SLERP_TOL 0.999f
  62
  63 /*-----------------------------------------------------------------------------
  64  * Definitions
  65  */
  66 #ifndef _VECTORMATH_INTERNAL_FUNCTIONS
  67 #define _VECTORMATH_INTERNAL_FUNCTIONS
  68
  69 static inline vec_float4 _vmathVfDot3( vec_float4 vec0, vec_float4 vec1 )
  70 {
  71     vec_float4 result;
  72     result = spu_mul( vec0, vec1 );
  73     result = spu_madd( spu_rlqwbyte( vec0, 4 ), spu_rlqwbyte( vec1, 4 ), result );
  74     return spu_madd( spu_rlqwbyte( vec0, 8 ), spu_rlqwbyte( vec1, 8 ), result );
  75 }
  76
  77 static inline vec_float4 _vmathVfDot4( vec_float4 vec0, vec_float4 vec1 )
  78 {
  79     vec_float4 result;
  80     result = spu_mul( vec0, vec1 );
  81     result = spu_madd( spu_rlqwbyte( vec0, 4 ), spu_rlqwbyte( vec1, 4 ), result );
  82     return spu_add( spu_rlqwbyte( result, 8 ), result );
  83 }
  84
  85 static inline vec_float4 _vmathVfCross( vec_float4 vec0, vec_float4 vec1 )
  86 {
  87     vec_float4 tmp0, tmp1, tmp2, tmp3, result;
  88     tmp0 = spu_shuffle( vec0, vec0, _VECTORMATH_SHUF_YZXW );
  89     tmp1 = spu_shuffle( vec1, vec1, _VECTORMATH_SHUF_ZXYW );
  90     tmp2 = spu_shuffle( vec0, vec0, _VECTORMATH_SHUF_ZXYW );
  91     tmp3 = spu_shuffle( vec1, vec1, _VECTORMATH_SHUF_YZXW );
  92     result = spu_mul( tmp0, tmp1 );
  93     result = spu_nmsub( tmp2, tmp3, result );
  94     return result;
  95 }
  96
  97 static inline vec_uint4 _vmathVfToHalfFloatsUnpacked(vec_float4 v)
  98 {
  99     vec_int4 bexp;
 100     vec_uint4 mant, sign, hfloat;
 101     vec_uint4 notZero, isInf;
 102     const vec_uint4 hfloatInf = spu_splats(0x00007c00u);
 103     const vec_uint4 mergeMant = spu_splats(0x000003ffu);
 104     const vec_uint4 mergeSign = spu_splats(0x00008000u);
 105
 106     sign = spu_rlmask((vec_uint4)v, -16);
 107     mant = spu_rlmask((vec_uint4)v, -13);
 108     bexp = spu_and(spu_rlmask((vec_int4)v, -23), 0xff);
 109
 110     notZero = spu_cmpgt(bexp, 112);
 111     isInf = spu_cmpgt(bexp, 142);
 112
 113     bexp = spu_add(bexp, -112);
 114     bexp = spu_sl(bexp, 10);
 115
 116     hfloat = spu_sel((vec_uint4)bexp, mant, mergeMant);
 117     hfloat = spu_sel(spu_splats(0u), hfloat, notZero);
 118     hfloat = spu_sel(hfloat, hfloatInf, isInf);
 119     hfloat = spu_sel(hfloat, sign, mergeSign);
 120
 121     return hfloat;
 122 }
 123
 124 static inline vec_ushort8 _vmath2VfToHalfFloats(vec_float4 u, vec_float4 v)
 125 {
 126     vec_uint4 hfloat_u, hfloat_v;
 127     const vec_uchar16 pack = (vec_uchar16){2,3,6,7,10,11,14,15,18,19,22,23,26,27,30,31};
 128     hfloat_u = _vmathVfToHalfFloatsUnpacked(u);
 129     hfloat_v = _vmathVfToHalfFloatsUnpacked(v);
 130     return (vec_ushort8)spu_shuffle(hfloat_u, hfloat_v, pack);
 131 }
 132
 133 #endif
 134
 135 static inline void vmathV3Copy( VmathVector3 *result, const VmathVector3 *vec )
 136 {
 137     result->vec128 = vec->vec128;
 138 }
 139
 140 static inline void vmathV3MakeFromElems( VmathVector3 *result, float _x, float _y, float _z )
 141 {
 142     result->vec128 = (vec_float4){ _x, _y, _z, 0.0f  };
 143 }
 144
 145 static inline void vmathV3MakeFromP3( VmathVector3 *result, const VmathPoint3 *pnt )
 146 {
 147     result->vec128 = pnt->vec128;
 148 }
 149
 150 static inline void vmathV3MakeFromScalar( VmathVector3 *result, float scalar )
 151 {
 152     result->vec128 = spu_splats( scalar );
 153 }
 154
 155 static inline void vmathV3MakeFrom128( VmathVector3 *result, vec_float4 vf4 )
 156 {
 157     result->vec128 = vf4;
 158 }
 159
 160 static inline void vmathV3MakeXAxis( VmathVector3 *result )
 161 {
 162     result->vec128 = _VECTORMATH_UNIT_1000;
 163 }
 164
 165 static inline void vmathV3MakeYAxis( VmathVector3 *result )
 166 {
 167     result->vec128 = _VECTORMATH_UNIT_0100;
 168 }
 169
 170 static inline void vmathV3MakeZAxis( VmathVector3 *result )
 171 {
 172     result->vec128 = _VECTORMATH_UNIT_0010;
 173 }
 174
 175 static inline void vmathV3Lerp( VmathVector3 *result, float t, const VmathVector3 *vec0, const VmathVector3 *vec1 )
 176 {
 177     VmathVector3 tmpV3_0, tmpV3_1;
 178     vmathV3Sub( &tmpV3_0, vec1, vec0 );
 179     vmathV3ScalarMul( &tmpV3_1, &tmpV3_0, t );
 180     vmathV3Add( result, vec0, &tmpV3_1 );
 181 }
 182
 183 static inline void vmathV3Slerp( VmathVector3 *result, float t, const VmathVector3 *unitVec0, const VmathVector3 *unitVec1 )
 184 {
 185     vec_float4 scales, scale0, scale1, cosAngle, angle, tttt, oneMinusT, angles, sines;
 186     vec_uint4 selectMask;
 187     vec_uchar16 shuffle_xxxx = (vec_uchar16)spu_splats((int)0x00010203);
 188     vec_uchar16 shuffle_yyyy = (vec_uchar16)spu_splats((int)0x04050607);
 189     vec_uchar16 shuffle_zzzz = (vec_uchar16)spu_splats((int)0x08090a0b);
 190     cosAngle = _vmathVfDot3( unitVec0->vec128, unitVec1->vec128 );
 191     cosAngle = spu_shuffle( cosAngle, cosAngle, shuffle_xxxx );
 192     selectMask = (vec_uint4)spu_cmpgt( spu_splats(_VECTORMATH_SLERP_TOL), cosAngle );
 193     angle = acosf4( cosAngle );
 194     tttt = spu_splats(t);
 195     oneMinusT = spu_sub( spu_splats(1.0f), tttt );
 196     angles = spu_sel( spu_splats(1.0f), oneMinusT, (vec_uint4)spu_maskb(0x0f00) );
 197     angles = spu_sel( angles, tttt, (vec_uint4)spu_maskb(0x00f0) );
 198     angles = spu_mul( angles, angle );
 199     sines = sinf4( angles );
 200     scales = divf4( sines, spu_shuffle( sines, sines, shuffle_xxxx ) );
 201     scale0 = spu_sel( oneMinusT, spu_shuffle( scales, scales, shuffle_yyyy ), selectMask );
 202     scale1 = spu_sel( tttt, spu_shuffle( scales, scales, shuffle_zzzz ), selectMask );
 203     result->vec128 = spu_madd( unitVec0->vec128, scale0, spu_mul( unitVec1->vec128, scale1 ) );
 204 }
 205
 206 static inline vec_float4 vmathV3Get128( const VmathVector3 *vec )
 207 {
 208     return vec->vec128;
 209 }
 210
 211 static inline void vmathV3StoreXYZ( const VmathVector3 *vec, vec_float4 *quad )
 212 {
 213     vec_float4 dstVec = *quad;
 214     vec_uint4 mask = (vec_uint4)spu_maskb(0x000f);
 215     dstVec = spu_sel(vec->vec128, dstVec, mask);
 216     *quad = dstVec;
 217 }
 218
 219 static inline void vmathV3LoadXYZArray( VmathVector3 *vec0, VmathVector3 *vec1, VmathVector3 *vec2, VmathVector3 *vec3, const vec_float4 *threeQuads )
 220 {
 221     vec_float4 xyzx, yzxy, zxyz, xyz1, xyz2, xyz3;
 222     xyzx = threeQuads[0];
 223     yzxy = threeQuads[1];
 224     zxyz = threeQuads[2];
 225     xyz1 = spu_shuffle( xyzx, yzxy, _VECTORMATH_SHUF_WABC );
 226     xyz2 = spu_shuffle( yzxy, zxyz, _VECTORMATH_SHUF_ZWAB );
 227     xyz3 = spu_rlqwbyte( zxyz, 4 );
 228     vec0->vec128 = xyzx;
 229     vec1->vec128 = xyz1;
 230     vec2->vec128 = xyz2;
 231     vec3->vec128 = xyz3;
 232 }
 233
 234 static inline void vmathV3StoreXYZArray( const VmathVector3 *vec0, const VmathVector3 *vec1, const VmathVector3 *vec2, const VmathVector3 *vec3, vec_float4 *threeQuads )
 235 {
 236     vec_float4 xyzx, yzxy, zxyz;
 237     xyzx = spu_shuffle( vec0->vec128, vec1->vec128, _VECTORMATH_SHUF_XYZA );
 238     yzxy = spu_shuffle( vec1->vec128, vec2->vec128, _VECTORMATH_SHUF_YZAB );
 239     zxyz = spu_shuffle( vec2->vec128, vec3->vec128, _VECTORMATH_SHUF_ZABC );
 240     threeQuads[0] = xyzx;
 241     threeQuads[1] = yzxy;
 242     threeQuads[2] = zxyz;
 243 }
 244
 245 static inline void vmathV3StoreHalfFloats( const VmathVector3 *vec0, const VmathVector3 *vec1, const VmathVector3 *vec2, const VmathVector3 *vec3, const VmathVector3 *vec4, const VmathVector3 *vec5, const VmathVector3 *vec6, const VmathVector3 *vec7, vec_ushort8 *threeQuads )
 246 {
 247     vec_float4 xyz0[3];
 248     vec_float4 xyz1[3];
 249     vmathV3StoreXYZArray( vec0, vec1, vec2, vec3, xyz0 );
 250     vmathV3StoreXYZArray( vec4, vec5, vec6, vec7, xyz1 );
 251     threeQuads[0] = _vmath2VfToHalfFloats(xyz0[0], xyz0[1]);
 252     threeQuads[1] = _vmath2VfToHalfFloats(xyz0[2], xyz1[0]);
 253     threeQuads[2] = _vmath2VfToHalfFloats(xyz1[1], xyz1[2]);
 254 }
 255
 256 static inline void vmathV3SetX( VmathVector3 *result, float _x )
 257 {
 258     result->vec128 = spu_insert( _x, result->vec128, 0 );
 259 }
 260
 261 static inline float vmathV3GetX( const VmathVector3 *vec )
 262 {
 263     return spu_extract( vec->vec128, 0 );
 264 }
 265
 266 static inline void vmathV3SetY( VmathVector3 *result, float _y )
 267 {
 268     result->vec128 = spu_insert( _y, result->vec128, 1 );
 269 }
 270
 271 static inline float vmathV3GetY( const VmathVector3 *vec )
 272 {
 273     return spu_extract( vec->vec128, 1 );
 274 }
 275
 276 static inline void vmathV3SetZ( VmathVector3 *result, float _z )
 277 {
 278     result->vec128 = spu_insert( _z, result->vec128, 2 );
 279 }
 280
 281 static inline float vmathV3GetZ( const VmathVector3 *vec )
 282 {
 283     return spu_extract( vec->vec128, 2 );
 284 }
 285
 286 static inline void vmathV3SetElem( VmathVector3 *result, int idx, float value )
 287 {
 288     result->vec128 = spu_insert( value, result->vec128, idx );
 289 }
 290
 291 static inline float vmathV3GetElem( const VmathVector3 *vec, int idx )
 292 {
 293     return spu_extract( vec->vec128, idx );
 294 }
 295
 296 static inline void vmathV3Add( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 )
 297 {
 298     result->vec128 = spu_add( vec0->vec128, vec1->vec128 );
 299 }
 300
 301 static inline void vmathV3Sub( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 )
 302 {
 303     result->vec128 = spu_sub( vec0->vec128, vec1->vec128 );
 304 }
 305
 306 static inline void vmathV3AddP3( VmathPoint3 *result, const VmathVector3 *vec, const VmathPoint3 *pnt1 )
 307 {
 308     result->vec128 = spu_add( vec->vec128, pnt1->vec128 );
 309 }
 310
 311 static inline void vmathV3ScalarMul( VmathVector3 *result, const VmathVector3 *vec, float scalar )
 312 {
 313     result->vec128 = spu_mul( vec->vec128, spu_splats(scalar) );
 314 }
 315
 316 static inline void vmathV3ScalarDiv( VmathVector3 *result, const VmathVector3 *vec, float scalar )
 317 {
 318     result->vec128 = divf4( vec->vec128, spu_splats(scalar) );
 319 }
 320
 321 static inline void vmathV3Neg( VmathVector3 *result, const VmathVector3 *vec )
 322 {
 323     result->vec128 = negatef4( vec->vec128 );
 324 }
 325
 326 static inline void vmathV3MulPerElem( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 )
 327 {
 328     result->vec128 = spu_mul( vec0->vec128, vec1->vec128 );
 329 }
 330
 331 static inline void vmathV3DivPerElem( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 )
 332 {
 333     result->vec128 = divf4( vec0->vec128, vec1->vec128 );
 334 }
 335
 336 static inline void vmathV3RecipPerElem( VmathVector3 *result, const VmathVector3 *vec )
 337 {
 338     result->vec128 = recipf4( vec->vec128 );
 339 }
 340
 341 static inline void vmathV3SqrtPerElem( VmathVector3 *result, const VmathVector3 *vec )
 342 {
 343     result->vec128 = sqrtf4( vec->vec128 );
 344 }
 345
 346 static inline void vmathV3RsqrtPerElem( VmathVector3 *result, const VmathVector3 *vec )
 347 {
 348     result->vec128 = rsqrtf4( vec->vec128 );
 349 }
 350
 351 static inline void vmathV3AbsPerElem( VmathVector3 *result, const VmathVector3 *vec )
 352 {
 353     result->vec128 = fabsf4( vec->vec128 );
 354 }
 355
 356 static inline void vmathV3CopySignPerElem( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 )
 357 {
 358     result->vec128 = copysignf4( vec0->vec128, vec1->vec128 );
 359 }
 360
 361 static inline void vmathV3MaxPerElem( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 )
 362 {
 363     result->vec128 = fmaxf4( vec0->vec128, vec1->vec128 );
 364 }
 365
 366 static inline float vmathV3MaxElem( const VmathVector3 *vec )
 367 {
 368     vec_float4 result;
 369     result = fmaxf4( spu_promote( spu_extract( vec->vec128, 1 ), 0 ), vec->vec128 );
 370     result = fmaxf4( spu_promote( spu_extract( vec->vec128, 2 ), 0 ), result );
 371     return spu_extract( result, 0 );
 372 }
 373
 374 static inline void vmathV3MinPerElem( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 )
 375 {
 376     result->vec128 = fminf4( vec0->vec128, vec1->vec128 );
 377 }
 378
 379 static inline float vmathV3MinElem( const VmathVector3 *vec )
 380 {
 381     vec_float4 result;
 382     result = fminf4( spu_promote( spu_extract( vec->vec128, 1 ), 0 ), vec->vec128 );
 383     result = fminf4( spu_promote( spu_extract( vec->vec128, 2 ), 0 ), result );
 384     return spu_extract( result, 0 );
 385 }
 386
 387 static inline float vmathV3Sum( const VmathVector3 *vec )
 388 {
 389     return
 390         spu_extract( vec->vec128, 0 ) +
 391         spu_extract( vec->vec128, 1 ) +
 392         spu_extract( vec->vec128, 2 );
 393 }
 394
 395 static inline float vmathV3Dot( const VmathVector3 *vec0, const VmathVector3 *vec1 )
 396 {
 397     return spu_extract( _vmathVfDot3( vec0->vec128, vec1->vec128 ), 0 );
 398 }
 399
 400 static inline float vmathV3LengthSqr( const VmathVector3 *vec )
 401 {
 402     return spu_extract( _vmathVfDot3( vec->vec128, vec->vec128 ), 0 );
 403 }
 404
 405 static inline float vmathV3Length( const VmathVector3 *vec )
 406 {
 407     return sqrtf( vmathV3LengthSqr( vec ) );
 408 }
 409
 410 static inline void vmathV3Normalize( VmathVector3 *result, const VmathVector3 *vec )
 411 {
 412     vec_float4 dot = _vmathVfDot3( vec->vec128, vec->vec128 );
 413     dot = spu_shuffle( dot, dot, (vec_uchar16)spu_splats(0x00010203) );
 414     result->vec128 = spu_mul( vec->vec128, rsqrtf4( dot ) );
 415 }
 416
 417 static inline void vmathV3Cross( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 )
 418 {
 419     result->vec128 = _vmathVfCross( vec0->vec128, vec1->vec128 );
 420 }
 421
 422 static inline void vmathV3Select( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1, unsigned int select1 )
 423 {
 424     result->vec128 = spu_sel( vec0->vec128, vec1->vec128, spu_splats( (unsigned int)-(select1 > 0) ) );
 425 }
 426
 427 #ifdef _VECTORMATH_DEBUG
 428
 429 static inline void vmathV3Print( const VmathVector3 *vec )
 430 {
 431     union { vec_float4 v; float s[4]; } tmp;
 432     tmp.v = vec->vec128;
 433     printf( "( %f %f %f )\n", tmp.s[0], tmp.s[1], tmp.s[2] );
 434 }
 435
 436 static inline void vmathV3Prints( const VmathVector3 *vec, const char *name )
 437 {
 438     union { vec_float4 v; float s[4]; } tmp;
 439     tmp.v = vec->vec128;
 440     printf( "%s: ( %f %f %f )\n", name, tmp.s[0], tmp.s[1], tmp.s[2] );
 441 }
 442
 443 #endif
 444
 445 static inline void vmathV4Copy( VmathVector4 *result, const VmathVector4 *vec )
 446 {
 447     result->vec128 = vec->vec128;
 448 }
 449
 450 static inline void vmathV4MakeFromElems( VmathVector4 *result, float _x, float _y, float _z, float _w )
 451 {
 452     result->vec128 = (vec_float4){ _x, _y, _z, _w };
 453 }
 454
 455 static inline void vmathV4MakeFromV3Scalar( VmathVector4 *result, const VmathVector3 *xyz, float _w )
 456 {
 457     result->vec128 = spu_shuffle( xyz->vec128, spu_promote( _w, 0 ), _VECTORMATH_SHUF_XYZA );
 458 }
 459
 460 static inline void vmathV4MakeFromV3( VmathVector4 *result, const VmathVector3 *vec )
 461 {
 462     result->vec128 = spu_sel( vec->vec128, spu_splats(0.0f), (vec_uint4)spu_maskb(0x000f) );
 463 }
 464
 465 static inline void vmathV4MakeFromP3( VmathVector4 *result, const VmathPoint3 *pnt )
 466 {
 467     result->vec128 = spu_sel( pnt->vec128, spu_splats(1.0f), (vec_uint4)spu_maskb(0x000f) );
 468 }
 469
 470 static inline void vmathV4MakeFromQ( VmathVector4 *result, const VmathQuat *quat )
 471 {
 472     result->vec128 = quat->vec128;
 473 }
 474
 475 static inline void vmathV4MakeFromScalar( VmathVector4 *result, float scalar )
 476 {
 477     result->vec128 = spu_splats( scalar );
 478 }
 479
 480 static inline void vmathV4MakeFrom128( VmathVector4 *result, vec_float4 vf4 )
 481 {
 482     result->vec128 = vf4;
 483 }
 484
 485 static inline void vmathV4MakeXAxis( VmathVector4 *result )
 486 {
 487     result->vec128 = _VECTORMATH_UNIT_1000;
 488 }
 489
 490 static inline void vmathV4MakeYAxis( VmathVector4 *result )
 491 {
 492     result->vec128 = _VECTORMATH_UNIT_0100;
 493 }
 494
 495 static inline void vmathV4MakeZAxis( VmathVector4 *result )
 496 {
 497     result->vec128 = _VECTORMATH_UNIT_0010;
 498 }
 499
 500 static inline void vmathV4MakeWAxis( VmathVector4 *result )
 501 {
 502     result->vec128 = _VECTORMATH_UNIT_0001;
 503 }
 504
 505 static inline void vmathV4Lerp( VmathVector4 *result, float t, const VmathVector4 *vec0, const VmathVector4 *vec1 )
 506 {
 507     VmathVector4 tmpV4_0, tmpV4_1;
 508     vmathV4Sub( &tmpV4_0, vec1, vec0 );
 509     vmathV4ScalarMul( &tmpV4_1, &tmpV4_0, t );
 510     vmathV4Add( result, vec0, &tmpV4_1 );
 511 }
 512
 513 static inline void vmathV4Slerp( VmathVector4 *result, float t, const VmathVector4 *unitVec0, const VmathVector4 *unitVec1 )
 514 {
 515     vec_float4 scales, scale0, scale1, cosAngle, angle, tttt, oneMinusT, angles, sines;
 516     vec_uint4 selectMask;
 517     vec_uchar16 shuffle_xxxx = (vec_uchar16)spu_splats((int)0x00010203);
 518     vec_uchar16 shuffle_yyyy = (vec_uchar16)spu_splats((int)0x04050607);
 519     vec_uchar16 shuffle_zzzz = (vec_uchar16)spu_splats((int)0x08090a0b);
 520     cosAngle = _vmathVfDot4( unitVec0->vec128, unitVec1->vec128 );
 521     cosAngle = spu_shuffle( cosAngle, cosAngle, shuffle_xxxx );
 522     selectMask = (vec_uint4)spu_cmpgt( spu_splats(_VECTORMATH_SLERP_TOL), cosAngle );
 523     angle = acosf4( cosAngle );
 524     tttt = spu_splats(t);
 525     oneMinusT = spu_sub( spu_splats(1.0f), tttt );
 526     angles = spu_sel( spu_splats(1.0f), oneMinusT, (vec_uint4)spu_maskb(0x0f00) );
 527     angles = spu_sel( angles, tttt, (vec_uint4)spu_maskb(0x00f0) );
 528     angles = spu_mul( angles, angle );
 529     sines = sinf4( angles );
 530     scales = divf4( sines, spu_shuffle( sines, sines, shuffle_xxxx ) );
 531     scale0 = spu_sel( oneMinusT, spu_shuffle( scales, scales, shuffle_yyyy ), selectMask );
 532     scale1 = spu_sel( tttt, spu_shuffle( scales, scales, shuffle_zzzz ), selectMask );
 533     result->vec128 = spu_madd( unitVec0->vec128, scale0, spu_mul( unitVec1->vec128, scale1 ) );
 534 }
 535
 536 static inline vec_float4 vmathV4Get128( const VmathVector4 *vec )
 537 {
 538     return vec->vec128;
 539 }
 540
 541 static inline void vmathV4StoreHalfFloats( const VmathVector4 *vec0, const VmathVector4 *vec1, const VmathVector4 *vec2, const VmathVector4 *vec3, vec_ushort8 *twoQuads )
 542 {
 543     twoQuads[0] = _vmath2VfToHalfFloats(vec0->vec128, vec1->vec128);
 544     twoQuads[1] = _vmath2VfToHalfFloats(vec2->vec128, vec3->vec128);
 545 }
 546
 547 static inline void vmathV4SetXYZ( VmathVector4 *result, const VmathVector3 *vec )
 548 {
 549     result->vec128 = spu_sel( vec->vec128, result->vec128, (vec_uint4)spu_maskb(0x000f) );
 550 }
 551
 552 static inline void vmathV4GetXYZ( VmathVector3 *result, const VmathVector4 *vec )
 553 {
 554     result->vec128 = vec->vec128;
 555 }
 556
 557 static inline void vmathV4SetX( VmathVector4 *result, float _x )
 558 {
 559     result->vec128 = spu_insert( _x, result->vec128, 0 );
 560 }
 561
 562 static inline float vmathV4GetX( const VmathVector4 *vec )
 563 {
 564     return spu_extract( vec->vec128, 0 );
 565 }
 566
 567 static inline void vmathV4SetY( VmathVector4 *result, float _y )
 568 {
 569     result->vec128 = spu_insert( _y, result->vec128, 1 );
 570 }
 571
 572 static inline float vmathV4GetY( const VmathVector4 *vec )
 573 {
 574     return spu_extract( vec->vec128, 1 );
 575 }
 576
 577 static inline void vmathV4SetZ( VmathVector4 *result, float _z )
 578 {
 579     result->vec128 = spu_insert( _z, result->vec128, 2 );
 580 }
 581
 582 static inline float vmathV4GetZ( const VmathVector4 *vec )
 583 {
 584     return spu_extract( vec->vec128, 2 );
 585 }
 586
 587 static inline void vmathV4SetW( VmathVector4 *result, float _w )
 588 {
 589     result->vec128 = spu_insert( _w, result->vec128, 3 );
 590 }
 591
 592 static inline float vmathV4GetW( const VmathVector4 *vec )
 593 {
 594     return spu_extract( vec->vec128, 3 );
 595 }
 596
 597 static inline void vmathV4SetElem( VmathVector4 *result, int idx, float value )
 598 {
 599     result->vec128 = spu_insert( value, result->vec128, idx );
 600 }
 601
 602 static inline float vmathV4GetElem( const VmathVector4 *vec, int idx )
 603 {
 604     return spu_extract( vec->vec128, idx );
 605 }
 606
 607 static inline void vmathV4Add( VmathVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1 )
 608 {
 609     result->vec128 = spu_add( vec0->vec128, vec1->vec128 );
 610 }
 611
 612 static inline void vmathV4Sub( VmathVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1 )
 613 {
 614     result->vec128 = spu_sub( vec0->vec128, vec1->vec128 );
 615 }
 616
 617 static inline void vmathV4ScalarMul( VmathVector4 *result, const VmathVector4 *vec, float scalar )
 618 {
 619     result->vec128 = spu_mul( vec->vec128, spu_splats(scalar) );
 620 }
 621
 622 static inline void vmathV4ScalarDiv( VmathVector4 *result, const VmathVector4 *vec, float scalar )
 623 {
 624     result->vec128 = divf4( vec->vec128, spu_splats(scalar) );
 625 }
 626
 627 static inline void vmathV4Neg( VmathVector4 *result, const VmathVector4 *vec )
 628 {
 629     result->vec128 = negatef4( vec->vec128 );
 630 }
 631
 632 static inline void vmathV4MulPerElem( VmathVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1 )
 633 {
 634     result->vec128 = spu_mul( vec0->vec128, vec1->vec128 );
 635 }
 636
 637 static inline void vmathV4DivPerElem( VmathVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1 )
 638 {
 639     result->vec128 = divf4( vec0->vec128, vec1->vec128 );
 640 }
 641
 642 static inline void vmathV4RecipPerElem( VmathVector4 *result, const VmathVector4 *vec )
 643 {
 644     result->vec128 = recipf4( vec->vec128 );
 645 }
 646
 647 static inline void vmathV4SqrtPerElem( VmathVector4 *result, const VmathVector4 *vec )
 648 {
 649     result->vec128 = sqrtf4( vec->vec128 );
 650 }
 651
 652 static inline void vmathV4RsqrtPerElem( VmathVector4 *result, const VmathVector4 *vec )
 653 {
 654     result->vec128 = rsqrtf4( vec->vec128 );
 655 }
 656
 657 static inline void vmathV4AbsPerElem( VmathVector4 *result, const VmathVector4 *vec )
 658 {
 659     result->vec128 = fabsf4( vec->vec128 );
 660 }
 661
 662 static inline void vmathV4CopySignPerElem( VmathVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1 )
 663 {
 664     result->vec128 = copysignf4( vec0->vec128, vec1->vec128 );
 665 }
 666
 667 static inline void vmathV4MaxPerElem( VmathVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1 )
 668 {
 669     result->vec128 = fmaxf4( vec0->vec128, vec1->vec128 );
 670 }
 671
 672 static inline float vmathV4MaxElem( const VmathVector4 *vec )
 673 {
 674     vec_float4 result;
 675     result = fmaxf4( spu_promote( spu_extract( vec->vec128, 1 ), 0 ), vec->vec128 );
 676     result = fmaxf4( spu_promote( spu_extract( vec->vec128, 2 ), 0 ), result );
 677     result = fmaxf4( spu_promote( spu_extract( vec->vec128, 3 ), 0 ), result );
 678     return spu_extract( result, 0 );
 679 }
 680
 681 static inline void vmathV4MinPerElem( VmathVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1 )
 682 {
 683     result->vec128 = fminf4( vec0->vec128, vec1->vec128 );
 684 }
 685
 686 static inline float vmathV4MinElem( const VmathVector4 *vec )
 687 {
 688     vec_float4 result;
 689     result = fminf4( spu_promote( spu_extract( vec->vec128, 1 ), 0 ), vec->vec128 );
 690     result = fminf4( spu_promote( spu_extract( vec->vec128, 2 ), 0 ), result );
 691     result = fminf4( spu_promote( spu_extract( vec->vec128, 3 ), 0 ), result );
 692     return spu_extract( result, 0 );
 693 }
 694
 695 static inline float vmathV4Sum( const VmathVector4 *vec )
 696 {
 697     return
 698         spu_extract( vec->vec128, 0 ) +
 699         spu_extract( vec->vec128, 1 ) +
 700         spu_extract( vec->vec128, 2 ) +
 701         spu_extract( vec->vec128, 3 );
 702 }
 703
 704 static inline float vmathV4Dot( const VmathVector4 *vec0, const VmathVector4 *vec1 )
 705 {
 706     return spu_extract( _vmathVfDot4( vec0->vec128, vec1->vec128 ), 0 );
 707 }
 708
 709 static inline float vmathV4LengthSqr( const VmathVector4 *vec )
 710 {
 711     return spu_extract( _vmathVfDot4( vec->vec128, vec->vec128 ), 0 );
 712 }
 713
 714 static inline float vmathV4Length( const VmathVector4 *vec )
 715 {
 716     return sqrtf( vmathV4LengthSqr( vec ) );
 717 }
 718
 719 static inline void vmathV4Normalize( VmathVector4 *result, const VmathVector4 *vec )
 720 {
 721     vec_float4 dot = _vmathVfDot4( vec->vec128, vec->vec128 );
 722     result->vec128 = spu_mul( vec->vec128, rsqrtf4( dot ) );
 723 }
 724
 725 static inline void vmathV4Select( VmathVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1, unsigned int select1 )
 726 {
 727     result->vec128 = spu_sel( vec0->vec128, vec1->vec128, spu_splats( (unsigned int)-(select1 > 0) ) );
 728 }
 729
 730 #ifdef _VECTORMATH_DEBUG
 731
 732 static inline void vmathV4Print( const VmathVector4 *vec )
 733 {
 734     union { vec_float4 v; float s[4]; } tmp;
 735     tmp.v = vec->vec128;
 736     printf( "( %f %f %f %f )\n", tmp.s[0], tmp.s[1], tmp.s[2], tmp.s[3] );
 737 }
 738
 739 static inline void vmathV4Prints( const VmathVector4 *vec, const char *name )
 740 {
 741     union { vec_float4 v; float s[4]; } tmp;
 742     tmp.v = vec->vec128;
 743     printf( "%s: ( %f %f %f %f )\n", name, tmp.s[0], tmp.s[1], tmp.s[2], tmp.s[3] );
 744 }
 745
 746 #endif
 747
 748 static inline void vmathP3Copy( VmathPoint3 *result, const VmathPoint3 *pnt )
 749 {
 750     result->vec128 = pnt->vec128;
 751 }
 752
 753 static inline void vmathP3MakeFromElems( VmathPoint3 *result, float _x, float _y, float _z )
 754 {
 755     result->vec128 = (vec_float4){ _x, _y, _z, 0.0f  };
 756 }
 757
 758 static inline void vmathP3MakeFromV3( VmathPoint3 *result, const VmathVector3 *vec )
 759 {
 760     result->vec128 = vec->vec128;
 761 }
 762
 763 static inline void vmathP3MakeFromScalar( VmathPoint3 *result, float scalar )
 764 {
 765     result->vec128 = spu_splats( scalar );
 766 }
 767
 768 static inline void vmathP3MakeFrom128( VmathPoint3 *result, vec_float4 vf4 )
 769 {
 770     result->vec128 = vf4;
 771 }
 772
 773 static inline void vmathP3Lerp( VmathPoint3 *result, float t, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 )
 774 {
 775     VmathVector3 tmpV3_0, tmpV3_1;
 776     vmathP3Sub( &tmpV3_0, pnt1, pnt0 );
 777     vmathV3ScalarMul( &tmpV3_1, &tmpV3_0, t );
 778     vmathP3AddV3( result, pnt0, &tmpV3_1 );
 779 }
 780
 781 static inline vec_float4 vmathP3Get128( const VmathPoint3 *pnt )
 782 {
 783     return pnt->vec128;
 784 }
 785
 786 static inline void vmathP3StoreXYZ( const VmathPoint3 *pnt, vec_float4 *quad )
 787 {
 788     vec_float4 dstVec = *quad;
 789     vec_uint4 mask = (vec_uint4)spu_maskb(0x000f);
 790     dstVec = spu_sel(pnt->vec128, dstVec, mask);
 791     *quad = dstVec;
 792 }
 793
 794 static inline void vmathP3LoadXYZArray( VmathPoint3 *pnt0, VmathPoint3 *pnt1, VmathPoint3 *pnt2, VmathPoint3 *pnt3, const vec_float4 *threeQuads )
 795 {
 796     vec_float4 xyzx, yzxy, zxyz, xyz1, xyz2, xyz3;
 797     xyzx = threeQuads[0];
 798     yzxy = threeQuads[1];
 799     zxyz = threeQuads[2];
 800     xyz1 = spu_shuffle( xyzx, yzxy, _VECTORMATH_SHUF_WABC );
 801     xyz2 = spu_shuffle( yzxy, zxyz, _VECTORMATH_SHUF_ZWAB );
 802     xyz3 = spu_rlqwbyte( zxyz, 4 );
 803     pnt0->vec128 = xyzx;
 804     pnt1->vec128 = xyz1;
 805     pnt2->vec128 = xyz2;
 806     pnt3->vec128 = xyz3;
 807 }
 808
 809 static inline void vmathP3StoreXYZArray( const VmathPoint3 *pnt0, const VmathPoint3 *pnt1, const VmathPoint3 *pnt2, const VmathPoint3 *pnt3, vec_float4 *threeQuads )
 810 {
 811     vec_float4 xyzx, yzxy, zxyz;
 812     xyzx = spu_shuffle( pnt0->vec128, pnt1->vec128, _VECTORMATH_SHUF_XYZA );
 813     yzxy = spu_shuffle( pnt1->vec128, pnt2->vec128, _VECTORMATH_SHUF_YZAB );
 814     zxyz = spu_shuffle( pnt2->vec128, pnt3->vec128, _VECTORMATH_SHUF_ZABC );
 815     threeQuads[0] = xyzx;
 816     threeQuads[1] = yzxy;
 817     threeQuads[2] = zxyz;
 818 }
 819
 820 static inline void vmathP3StoreHalfFloats( const VmathPoint3 *pnt0, const VmathPoint3 *pnt1, const VmathPoint3 *pnt2, const VmathPoint3 *pnt3, const VmathPoint3 *pnt4, const VmathPoint3 *pnt5, const VmathPoint3 *pnt6, const VmathPoint3 *pnt7, vec_ushort8 *threeQuads )
 821 {
 822     vec_float4 xyz0[3];
 823     vec_float4 xyz1[3];
 824     vmathP3StoreXYZArray( pnt0, pnt1, pnt2, pnt3, xyz0 );
 825     vmathP3StoreXYZArray( pnt4, pnt5, pnt6, pnt7, xyz1 );
 826     threeQuads[0] = _vmath2VfToHalfFloats(xyz0[0], xyz0[1]);
 827     threeQuads[1] = _vmath2VfToHalfFloats(xyz0[2], xyz1[0]);
 828     threeQuads[2] = _vmath2VfToHalfFloats(xyz1[1], xyz1[2]);
 829 }
 830
 831 static inline void vmathP3SetX( VmathPoint3 *result, float _x )
 832 {
 833     result->vec128 = spu_insert( _x, result->vec128, 0 );
 834 }
 835
 836 static inline float vmathP3GetX( const VmathPoint3 *pnt )
 837 {
 838     return spu_extract( pnt->vec128, 0 );
 839 }
 840
 841 static inline void vmathP3SetY( VmathPoint3 *result, float _y )
 842 {
 843     result->vec128 = spu_insert( _y, result->vec128, 1 );
 844 }
 845
 846 static inline float vmathP3GetY( const VmathPoint3 *pnt )
 847 {
 848     return spu_extract( pnt->vec128, 1 );
 849 }
 850
 851 static inline void vmathP3SetZ( VmathPoint3 *result, float _z )
 852 {
 853     result->vec128 = spu_insert( _z, result->vec128, 2 );
 854 }
 855
 856 static inline float vmathP3GetZ( const VmathPoint3 *pnt )
 857 {
 858     return spu_extract( pnt->vec128, 2 );
 859 }
 860
 861 static inline void vmathP3SetElem( VmathPoint3 *result, int idx, float value )
 862 {
 863     result->vec128 = spu_insert( value, result->vec128, idx );
 864 }
 865
 866 static inline float vmathP3GetElem( const VmathPoint3 *pnt, int idx )
 867 {
 868     return spu_extract( pnt->vec128, idx );
 869 }
 870
 871 static inline void vmathP3Sub( VmathVector3 *result, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 )
 872 {
 873     result->vec128 = spu_sub( pnt0->vec128, pnt1->vec128 );
 874 }
 875
 876 static inline void vmathP3AddV3( VmathPoint3 *result, const VmathPoint3 *pnt, const VmathVector3 *vec1 )
 877 {
 878     result->vec128 = spu_add( pnt->vec128, vec1->vec128 );
 879 }
 880
 881 static inline void vmathP3SubV3( VmathPoint3 *result, const VmathPoint3 *pnt, const VmathVector3 *vec1 )
 882 {
 883     result->vec128 = spu_sub( pnt->vec128, vec1->vec128 );
 884 }
 885
 886 static inline void vmathP3MulPerElem( VmathPoint3 *result, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 )
 887 {
 888     result->vec128 = spu_mul( pnt0->vec128, pnt1->vec128 );
 889 }
 890
 891 static inline void vmathP3DivPerElem( VmathPoint3 *result, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 )
 892 {
 893     result->vec128 = divf4( pnt0->vec128, pnt1->vec128 );
 894 }
 895
 896 static inline void vmathP3RecipPerElem( VmathPoint3 *result, const VmathPoint3 *pnt )
 897 {
 898     result->vec128 = recipf4( pnt->vec128 );
 899 }
 900
 901 static inline void vmathP3SqrtPerElem( VmathPoint3 *result, const VmathPoint3 *pnt )
 902 {
 903     result->vec128 = sqrtf4( pnt->vec128 );
 904 }
 905
 906 static inline void vmathP3RsqrtPerElem( VmathPoint3 *result, const VmathPoint3 *pnt )
 907 {
 908     result->vec128 = rsqrtf4( pnt->vec128 );
 909 }
 910
 911 static inline void vmathP3AbsPerElem( VmathPoint3 *result, const VmathPoint3 *pnt )
 912 {
 913     result->vec128 = fabsf4( pnt->vec128 );
 914 }
 915
 916 static inline void vmathP3CopySignPerElem( VmathPoint3 *result, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 )
 917 {
 918     result->vec128 = copysignf4( pnt0->vec128, pnt1->vec128 );
 919 }
 920
 921 static inline void vmathP3MaxPerElem( VmathPoint3 *result, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 )
 922 {
 923     result->vec128 = fmaxf4( pnt0->vec128, pnt1->vec128 );
 924 }
 925
 926 static inline float vmathP3MaxElem( const VmathPoint3 *pnt )
 927 {
 928     vec_float4 result;
 929     result = fmaxf4( spu_promote( spu_extract( pnt->vec128, 1 ), 0 ), pnt->vec128 );
 930     result = fmaxf4( spu_promote( spu_extract( pnt->vec128, 2 ), 0 ), result );
 931     return spu_extract( result, 0 );
 932 }
 933
 934 static inline void vmathP3MinPerElem( VmathPoint3 *result, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 )
 935 {
 936     result->vec128 = fminf4( pnt0->vec128, pnt1->vec128 );
 937 }
 938
 939 static inline float vmathP3MinElem( const VmathPoint3 *pnt )
 940 {
 941     vec_float4 result;
 942     result = fminf4( spu_promote( spu_extract( pnt->vec128, 1 ), 0 ), pnt->vec128 );
 943     result = fminf4( spu_promote( spu_extract( pnt->vec128, 2 ), 0 ), result );
 944     return spu_extract( result, 0 );
 945 }
 946
 947 static inline float vmathP3Sum( const VmathPoint3 *pnt )
 948 {
 949     return
 950         spu_extract( pnt->vec128, 0 ) +
 951         spu_extract( pnt->vec128, 1 ) +
 952         spu_extract( pnt->vec128, 2 );
 953 }
 954
 955 static inline void vmathP3Scale( VmathPoint3 *result, const VmathPoint3 *pnt, float scaleVal )
 956 {
 957     VmathPoint3 tmpP3_0;
 958     vmathP3MakeFromScalar( &tmpP3_0, scaleVal );
 959     vmathP3MulPerElem( result, pnt, &tmpP3_0 );
 960 }
 961
 962 static inline void vmathP3NonUniformScale( VmathPoint3 *result, const VmathPoint3 *pnt, const VmathVector3 *scaleVec )
 963 {
 964     VmathPoint3 tmpP3_0;
 965     vmathP3MakeFromV3( &tmpP3_0, scaleVec );
 966     vmathP3MulPerElem( result, pnt, &tmpP3_0 );
 967 }
 968
 969 static inline float vmathP3Projection( const VmathPoint3 *pnt, const VmathVector3 *unitVec )
 970 {
 971     return spu_extract( _vmathVfDot3( pnt->vec128, unitVec->vec128 ), 0 );
 972 }
 973
 974 static inline float vmathP3DistSqrFromOrigin( const VmathPoint3 *pnt )
 975 {
 976     VmathVector3 tmpV3_0;
 977     vmathV3MakeFromP3( &tmpV3_0, pnt );
 978     return vmathV3LengthSqr( &tmpV3_0 );
 979 }
 980
 981 static inline float vmathP3DistFromOrigin( const VmathPoint3 *pnt )
 982 {
 983     VmathVector3 tmpV3_0;
 984     vmathV3MakeFromP3( &tmpV3_0, pnt );
 985     return vmathV3Length( &tmpV3_0 );
 986 }
 987
 988 static inline float vmathP3DistSqr( const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 )
 989 {
 990     VmathVector3 tmpV3_0;
 991     vmathP3Sub( &tmpV3_0, pnt1, pnt0 );
 992     return vmathV3LengthSqr( &tmpV3_0 );
 993 }
 994
 995 static inline float vmathP3Dist( const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 )
 996 {
 997     VmathVector3 tmpV3_0;
 998     vmathP3Sub( &tmpV3_0, pnt1, pnt0 );
 999     return vmathV3Length( &tmpV3_0 );
1000 }
1001
1002 static inline void vmathP3Select( VmathPoint3 *result, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1, unsigned int select1 )
1003 {
1004     result->vec128 = spu_sel( pnt0->vec128, pnt1->vec128, spu_splats( (unsigned int)-(select1 > 0) ) );
1005 }
1006
1007 #ifdef _VECTORMATH_DEBUG
1008
1009 static inline void vmathP3Print( const VmathPoint3 *pnt )
1010 {
1011     union { vec_float4 v; float s[4]; } tmp;
1012     tmp.v = pnt->vec128;
1013     printf( "( %f %f %f )\n", tmp.s[0], tmp.s[1], tmp.s[2] );
1014 }
1015
1016 static inline void vmathP3Prints( const VmathPoint3 *pnt, const char *name )
1017 {
1018     union { vec_float4 v; float s[4]; } tmp;
1019     tmp.v = pnt->vec128;
1020     printf( "%s: ( %f %f %f )\n", name, tmp.s[0], tmp.s[1], tmp.s[2] );
1021 }
1022
1023 #endif
1024
1025 #ifdef __cplusplus
1026 }
1027 #endif /* __cplusplus */
1028
1029 #endif