Extras/vectormathlibrary/include/vectormath/spu/c/vec_soa.h

   1 /*
   2    Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
   3    All rights reserved.
   4
   5    Redistribution and use in source and binary forms,
   6    with or without modification, are permitted provided that the
   7    following conditions are met:
   8     * Redistributions of source code must retain the above copyright
   9       notice, this list of conditions and the following disclaimer.
  10     * Redistributions in binary form must reproduce the above copyright
  11       notice, this list of conditions and the following disclaimer in the
  12       documentation and/or other materials provided with the distribution.
  13     * Neither the name of the Sony Computer Entertainment Inc nor the names
  14       of its contributors may be used to endorse or promote products derived
  15       from this software without specific prior written permission.
  16
  17    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  18    AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  19    IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  20    ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  21    LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  22    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  23    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  24    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  25    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  26    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  27    POSSIBILITY OF SUCH DAMAGE.
  28 */
  29
  30 #ifndef _VECTORMATH_VEC_SOA_C_H
  31 #define _VECTORMATH_VEC_SOA_C_H
  32 #ifdef __cplusplus
  33 extern "C" {
  34 #endif /* __cplusplus */
  35
  36 /*-----------------------------------------------------------------------------
  37  * Constants
  38  * for shuffles, words are labeled [x,y,z,w] [a,b,c,d]
  39  */
  40 #define _VECTORMATH_SHUF_X 0x00010203
  41 #define _VECTORMATH_SHUF_Y 0x04050607
  42 #define _VECTORMATH_SHUF_Z 0x08090a0b
  43 #define _VECTORMATH_SHUF_W 0x0c0d0e0f
  44 #define _VECTORMATH_SHUF_A 0x10111213
  45 #define _VECTORMATH_SHUF_B 0x14151617
  46 #define _VECTORMATH_SHUF_C 0x18191a1b
  47 #define _VECTORMATH_SHUF_D 0x1c1d1e1f
  48 #define _VECTORMATH_SHUF_0 0x80808080
  49 #define _VECTORMATH_SHUF_XAYB ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_A, _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_B })
  50 #define _VECTORMATH_SHUF_ZCWD ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_C, _VECTORMATH_SHUF_W, _VECTORMATH_SHUF_D })
  51 #define _VECTORMATH_SHUF_ZBW0 ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_B, _VECTORMATH_SHUF_W, _VECTORMATH_SHUF_0 })
  52 #define _VECTORMATH_SHUF_XCY0 ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_C, _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_0 })
  53 #define _VECTORMATH_SHUF_ZDW0 ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_D, _VECTORMATH_SHUF_W, _VECTORMATH_SHUF_0 })
  54 #define _VECTORMATH_SHUF_XAZC ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_A, _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_C })
  55 #define _VECTORMATH_SHUF_ZDXB ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_D, _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_B })
  56 #define _VECTORMATH_SHUF_YBWD ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_B, _VECTORMATH_SHUF_W, _VECTORMATH_SHUF_D })
  57 #define _VECTORMATH_SHUF_XDZB ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_D, _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_B })
  58 #define _VECTORMATH_SHUF_YAWC ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_A, _VECTORMATH_SHUF_W, _VECTORMATH_SHUF_C })
  59 #define _VECTORMATH_SHUF_ZBXD ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_B, _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_D })
  60 #define _VECTORMATH_SHUF_XYCD ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_C, _VECTORMATH_SHUF_D })
  61 #define _VECTORMATH_SLERP_TOL 0.999f
  62
  63 /*-----------------------------------------------------------------------------
  64  * Definitions
  65  */
  66 #ifndef _VECTORMATH_INTERNAL_FUNCTIONS
  67 #define _VECTORMATH_INTERNAL_FUNCTIONS
  68
  69 #endif
  70
  71 static inline void vmathSoaV3Copy( VmathSoaVector3 *result, const VmathSoaVector3 *vec )
  72 {
  73     result->x = vec->x;
  74     result->y = vec->y;
  75     result->z = vec->z;
  76 }
  77
  78 static inline void vmathSoaV3MakeFromElems( VmathSoaVector3 *result, vec_float4 _x, vec_float4 _y, vec_float4 _z )
  79 {
  80     result->x = _x;
  81     result->y = _y;
  82     result->z = _z;
  83 }
  84
  85 static inline void vmathSoaV3MakeFromP3( VmathSoaVector3 *result, const VmathSoaPoint3 *pnt )
  86 {
  87     result->x = pnt->x;
  88     result->y = pnt->y;
  89     result->z = pnt->z;
  90 }
  91
  92 static inline void vmathSoaV3MakeFromScalar( VmathSoaVector3 *result, vec_float4 scalar )
  93 {
  94     result->x = scalar;
  95     result->y = scalar;
  96     result->z = scalar;
  97 }
  98
  99 static inline void vmathSoaV3MakeFromAos( VmathSoaVector3 *result, const VmathVector3 *vec )
 100 {
 101     vec_uchar16 shuffle_xxxx = (vec_uchar16)spu_splats((int)0x00010203);
 102     vec_uchar16 shuffle_yyyy = (vec_uchar16)spu_splats((int)0x04050607);
 103     vec_uchar16 shuffle_zzzz = (vec_uchar16)spu_splats((int)0x08090a0b);
 104     vec_float4 vec128 = vec->vec128;
 105     result->x = spu_shuffle( vec128, vec128, shuffle_xxxx );
 106     result->y = spu_shuffle( vec128, vec128, shuffle_yyyy );
 107     result->z = spu_shuffle( vec128, vec128, shuffle_zzzz );
 108 }
 109
 110 static inline void vmathSoaV3MakeFrom4Aos( VmathSoaVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1, const VmathVector3 *vec2, const VmathVector3 *vec3 )
 111 {
 112     vec_float4 tmp0, tmp1, tmp2, tmp3;
 113     tmp0 = spu_shuffle( vec0->vec128, vec2->vec128, _VECTORMATH_SHUF_XAYB );
 114     tmp1 = spu_shuffle( vec1->vec128, vec3->vec128, _VECTORMATH_SHUF_XAYB );
 115     tmp2 = spu_shuffle( vec0->vec128, vec2->vec128, _VECTORMATH_SHUF_ZCWD );
 116     tmp3 = spu_shuffle( vec1->vec128, vec3->vec128, _VECTORMATH_SHUF_ZCWD );
 117     result->x = spu_shuffle( tmp0, tmp1, _VECTORMATH_SHUF_XAYB );
 118     result->y = spu_shuffle( tmp0, tmp1, _VECTORMATH_SHUF_ZCWD );
 119     result->z = spu_shuffle( tmp2, tmp3, _VECTORMATH_SHUF_XAYB );
 120 }
 121
 122 static inline void vmathSoaV3MakeXAxis( VmathSoaVector3 *result )
 123 {
 124     vmathSoaV3MakeFromElems( result, spu_splats(1.0f), spu_splats(0.0f), spu_splats(0.0f) );
 125 }
 126
 127 static inline void vmathSoaV3MakeYAxis( VmathSoaVector3 *result )
 128 {
 129     vmathSoaV3MakeFromElems( result, spu_splats(0.0f), spu_splats(1.0f), spu_splats(0.0f) );
 130 }
 131
 132 static inline void vmathSoaV3MakeZAxis( VmathSoaVector3 *result )
 133 {
 134     vmathSoaV3MakeFromElems( result, spu_splats(0.0f), spu_splats(0.0f), spu_splats(1.0f) );
 135 }
 136
 137 static inline void vmathSoaV3Lerp( VmathSoaVector3 *result, vec_float4 t, const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1 )
 138 {
 139     VmathSoaVector3 tmpV3_0, tmpV3_1;
 140     vmathSoaV3Sub( &tmpV3_0, vec1, vec0 );
 141     vmathSoaV3ScalarMul( &tmpV3_1, &tmpV3_0, t );
 142     vmathSoaV3Add( result, vec0, &tmpV3_1 );
 143 }
 144
 145 static inline void vmathSoaV3Slerp( VmathSoaVector3 *result, vec_float4 t, const VmathSoaVector3 *unitVec0, const VmathSoaVector3 *unitVec1 )
 146 {
 147     VmathSoaVector3 tmpV3_0, tmpV3_1;
 148     vec_float4 recipSinAngle, scale0, scale1, cosAngle, angle;
 149     vec_uint4 selectMask;
 150     cosAngle = vmathSoaV3Dot( unitVec0, unitVec1 );
 151     selectMask = (vec_uint4)spu_cmpgt( spu_splats(_VECTORMATH_SLERP_TOL), cosAngle );
 152     angle = acosf4( cosAngle );
 153     recipSinAngle = recipf4( sinf4( angle ) );
 154     scale0 = spu_sel( spu_sub( spu_splats(1.0f), t ), spu_mul( sinf4( spu_mul( spu_sub( spu_splats(1.0f), t ), angle ) ), recipSinAngle ), selectMask );
 155     scale1 = spu_sel( t, spu_mul( sinf4( spu_mul( t, angle ) ), recipSinAngle ), selectMask );
 156     vmathSoaV3ScalarMul( &tmpV3_0, unitVec0, scale0 );
 157     vmathSoaV3ScalarMul( &tmpV3_1, unitVec1, scale1 );
 158     vmathSoaV3Add( result, &tmpV3_0, &tmpV3_1 );
 159 }
 160
 161 static inline void vmathSoaV3Get4Aos( const VmathSoaVector3 *vec, VmathVector3 *result0, VmathVector3 *result1, VmathVector3 *result2, VmathVector3 *result3 )
 162 {
 163     vec_float4 tmp0, tmp1;
 164     tmp0 = spu_shuffle( vec->x, vec->z, _VECTORMATH_SHUF_XAYB );
 165     tmp1 = spu_shuffle( vec->x, vec->z, _VECTORMATH_SHUF_ZCWD );
 166     vmathV3MakeFrom128( result0, spu_shuffle( tmp0, vec->y, _VECTORMATH_SHUF_XAYB ) );
 167     vmathV3MakeFrom128( result1, spu_shuffle( tmp0, vec->y, _VECTORMATH_SHUF_ZBW0 ) );
 168     vmathV3MakeFrom128( result2, spu_shuffle( tmp1, vec->y, _VECTORMATH_SHUF_XCY0 ) );
 169     vmathV3MakeFrom128( result3, spu_shuffle( tmp1, vec->y, _VECTORMATH_SHUF_ZDW0 ) );
 170 }
 171
 172 static inline void vmathSoaV3LoadXYZArray( VmathSoaVector3 *vec, const vec_float4 *threeQuads )
 173 {
 174     vec_float4 xyxy, yzyz, zxzx, xyzx, yzxy, zxyz;
 175     xyzx = threeQuads[0];
 176     yzxy = threeQuads[1];
 177     zxyz = threeQuads[2];
 178     xyxy = spu_shuffle( xyzx, yzxy, _VECTORMATH_SHUF_XYCD );
 179     zxzx = spu_shuffle( zxyz, xyzx, _VECTORMATH_SHUF_XYCD );
 180     yzyz = spu_shuffle( yzxy, zxyz, _VECTORMATH_SHUF_XYCD );
 181     vmathSoaV3SetX( vec, spu_shuffle( xyxy, zxzx, _VECTORMATH_SHUF_XDZB ) );
 182     vmathSoaV3SetY( vec, spu_shuffle( xyxy, yzyz, _VECTORMATH_SHUF_YAWC ) );
 183     vmathSoaV3SetZ( vec, spu_shuffle( zxzx, yzyz, _VECTORMATH_SHUF_ZBXD ) );
 184 }
 185
 186 static inline void vmathSoaV3StoreXYZArray( const VmathSoaVector3 *vec, vec_float4 *threeQuads )
 187 {
 188     vec_float4 xyzx, yzxy, zxyz, xyxy, zxzx, yzyz;
 189     xyxy = spu_shuffle( vec->x, vec->y, _VECTORMATH_SHUF_XAZC );
 190     zxzx = spu_shuffle( vec->z, vec->x, _VECTORMATH_SHUF_ZDXB );
 191     yzyz = spu_shuffle( vec->y, vec->z, _VECTORMATH_SHUF_YBWD );
 192     xyzx = spu_shuffle( xyxy, zxzx, _VECTORMATH_SHUF_XYCD );
 193     yzxy = spu_shuffle( yzyz, xyxy, _VECTORMATH_SHUF_XYCD );
 194     zxyz = spu_shuffle( zxzx, yzyz, _VECTORMATH_SHUF_XYCD );
 195     threeQuads[0] = xyzx;
 196     threeQuads[1] = yzxy;
 197     threeQuads[2] = zxyz;
 198 }
 199
 200 static inline void vmathSoaV3StoreHalfFloats( const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1, vec_ushort8 *threeQuads )
 201 {
 202     vec_float4 xyz0[3];
 203     vec_float4 xyz1[3];
 204     vmathSoaV3StoreXYZArray( vec0, xyz0 );
 205     vmathSoaV3StoreXYZArray( vec1, xyz1 );
 206     threeQuads[0] = _vmath2VfToHalfFloats(xyz0[0], xyz0[1]);
 207     threeQuads[1] = _vmath2VfToHalfFloats(xyz0[2], xyz1[0]);
 208     threeQuads[2] = _vmath2VfToHalfFloats(xyz1[1], xyz1[2]);
 209 }
 210
 211 static inline void vmathSoaV3SetX( VmathSoaVector3 *result, vec_float4 _x )
 212 {
 213     result->x = _x;
 214 }
 215
 216 static inline vec_float4 vmathSoaV3GetX( const VmathSoaVector3 *vec )
 217 {
 218     return vec->x;
 219 }
 220
 221 static inline void vmathSoaV3SetY( VmathSoaVector3 *result, vec_float4 _y )
 222 {
 223     result->y = _y;
 224 }
 225
 226 static inline vec_float4 vmathSoaV3GetY( const VmathSoaVector3 *vec )
 227 {
 228     return vec->y;
 229 }
 230
 231 static inline void vmathSoaV3SetZ( VmathSoaVector3 *result, vec_float4 _z )
 232 {
 233     result->z = _z;
 234 }
 235
 236 static inline vec_float4 vmathSoaV3GetZ( const VmathSoaVector3 *vec )
 237 {
 238     return vec->z;
 239 }
 240
 241 static inline void vmathSoaV3SetElem( VmathSoaVector3 *result, int idx, vec_float4 value )
 242 {
 243     *(&result->x + idx) = value;
 244 }
 245
 246 static inline vec_float4 vmathSoaV3GetElem( const VmathSoaVector3 *vec, int idx )
 247 {
 248     return *(&vec->x + idx);
 249 }
 250
 251 static inline void vmathSoaV3Add( VmathSoaVector3 *result, const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1 )
 252 {
 253     result->x = spu_add( vec0->x, vec1->x );
 254     result->y = spu_add( vec0->y, vec1->y );
 255     result->z = spu_add( vec0->z, vec1->z );
 256 }
 257
 258 static inline void vmathSoaV3Sub( VmathSoaVector3 *result, const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1 )
 259 {
 260     result->x = spu_sub( vec0->x, vec1->x );
 261     result->y = spu_sub( vec0->y, vec1->y );
 262     result->z = spu_sub( vec0->z, vec1->z );
 263 }
 264
 265 static inline void vmathSoaV3AddP3( VmathSoaPoint3 *result, const VmathSoaVector3 *vec, const VmathSoaPoint3 *pnt1 )
 266 {
 267     result->x = spu_add( vec->x, pnt1->x );
 268     result->y = spu_add( vec->y, pnt1->y );
 269     result->z = spu_add( vec->z, pnt1->z );
 270 }
 271
 272 static inline void vmathSoaV3ScalarMul( VmathSoaVector3 *result, const VmathSoaVector3 *vec, vec_float4 scalar )
 273 {
 274     result->x = spu_mul( vec->x, scalar );
 275     result->y = spu_mul( vec->y, scalar );
 276     result->z = spu_mul( vec->z, scalar );
 277 }
 278
 279 static inline void vmathSoaV3ScalarDiv( VmathSoaVector3 *result, const VmathSoaVector3 *vec, vec_float4 scalar )
 280 {
 281     result->x = divf4( vec->x, scalar );
 282     result->y = divf4( vec->y, scalar );
 283     result->z = divf4( vec->z, scalar );
 284 }
 285
 286 static inline void vmathSoaV3Neg( VmathSoaVector3 *result, const VmathSoaVector3 *vec )
 287 {
 288     result->x = negatef4( vec->x );
 289     result->y = negatef4( vec->y );
 290     result->z = negatef4( vec->z );
 291 }
 292
 293 static inline void vmathSoaV3MulPerElem( VmathSoaVector3 *result, const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1 )
 294 {
 295     result->x = spu_mul( vec0->x, vec1->x );
 296     result->y = spu_mul( vec0->y, vec1->y );
 297     result->z = spu_mul( vec0->z, vec1->z );
 298 }
 299
 300 static inline void vmathSoaV3DivPerElem( VmathSoaVector3 *result, const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1 )
 301 {
 302     result->x = divf4( vec0->x, vec1->x );
 303     result->y = divf4( vec0->y, vec1->y );
 304     result->z = divf4( vec0->z, vec1->z );
 305 }
 306
 307 static inline void vmathSoaV3RecipPerElem( VmathSoaVector3 *result, const VmathSoaVector3 *vec )
 308 {
 309     result->x = recipf4( vec->x );
 310     result->y = recipf4( vec->y );
 311     result->z = recipf4( vec->z );
 312 }
 313
 314 static inline void vmathSoaV3SqrtPerElem( VmathSoaVector3 *result, const VmathSoaVector3 *vec )
 315 {
 316     result->x = sqrtf4( vec->x );
 317     result->y = sqrtf4( vec->y );
 318     result->z = sqrtf4( vec->z );
 319 }
 320
 321 static inline void vmathSoaV3RsqrtPerElem( VmathSoaVector3 *result, const VmathSoaVector3 *vec )
 322 {
 323     result->x = rsqrtf4( vec->x );
 324     result->y = rsqrtf4( vec->y );
 325     result->z = rsqrtf4( vec->z );
 326 }
 327
 328 static inline void vmathSoaV3AbsPerElem( VmathSoaVector3 *result, const VmathSoaVector3 *vec )
 329 {
 330     result->x = fabsf4( vec->x );
 331     result->y = fabsf4( vec->y );
 332     result->z = fabsf4( vec->z );
 333 }
 334
 335 static inline void vmathSoaV3CopySignPerElem( VmathSoaVector3 *result, const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1 )
 336 {
 337     result->x = copysignf4( vec0->x, vec1->x );
 338     result->y = copysignf4( vec0->y, vec1->y );
 339     result->z = copysignf4( vec0->z, vec1->z );
 340 }
 341
 342 static inline void vmathSoaV3MaxPerElem( VmathSoaVector3 *result, const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1 )
 343 {
 344     result->x = fmaxf4( vec0->x, vec1->x );
 345     result->y = fmaxf4( vec0->y, vec1->y );
 346     result->z = fmaxf4( vec0->z, vec1->z );
 347 }
 348
 349 static inline vec_float4 vmathSoaV3MaxElem( const VmathSoaVector3 *vec )
 350 {
 351     vec_float4 result;
 352     result = fmaxf4( vec->x, vec->y );
 353     result = fmaxf4( vec->z, result );
 354     return result;
 355 }
 356
 357 static inline void vmathSoaV3MinPerElem( VmathSoaVector3 *result, const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1 )
 358 {
 359     result->x = fminf4( vec0->x, vec1->x );
 360     result->y = fminf4( vec0->y, vec1->y );
 361     result->z = fminf4( vec0->z, vec1->z );
 362 }
 363
 364 static inline vec_float4 vmathSoaV3MinElem( const VmathSoaVector3 *vec )
 365 {
 366     vec_float4 result;
 367     result = fminf4( vec->x, vec->y );
 368     result = fminf4( vec->z, result );
 369     return result;
 370 }
 371
 372 static inline vec_float4 vmathSoaV3Sum( const VmathSoaVector3 *vec )
 373 {
 374     vec_float4 result;
 375     result = spu_add( vec->x, vec->y );
 376     result = spu_add( result, vec->z );
 377     return result;
 378 }
 379
 380 static inline vec_float4 vmathSoaV3Dot( const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1 )
 381 {
 382     vec_float4 result;
 383     result = spu_mul( vec0->x, vec1->x );
 384     result = spu_add( result, spu_mul( vec0->y, vec1->y ) );
 385     result = spu_add( result, spu_mul( vec0->z, vec1->z ) );
 386     return result;
 387 }
 388
 389 static inline vec_float4 vmathSoaV3LengthSqr( const VmathSoaVector3 *vec )
 390 {
 391     vec_float4 result;
 392     result = spu_mul( vec->x, vec->x );
 393     result = spu_add( result, spu_mul( vec->y, vec->y ) );
 394     result = spu_add( result, spu_mul( vec->z, vec->z ) );
 395     return result;
 396 }
 397
 398 static inline vec_float4 vmathSoaV3Length( const VmathSoaVector3 *vec )
 399 {
 400     return sqrtf4( vmathSoaV3LengthSqr( vec ) );
 401 }
 402
 403 static inline void vmathSoaV3Normalize( VmathSoaVector3 *result, const VmathSoaVector3 *vec )
 404 {
 405     vec_float4 lenSqr, lenInv;
 406     lenSqr = vmathSoaV3LengthSqr( vec );
 407     lenInv = rsqrtf4( lenSqr );
 408     result->x = spu_mul( vec->x, lenInv );
 409     result->y = spu_mul( vec->y, lenInv );
 410     result->z = spu_mul( vec->z, lenInv );
 411 }
 412
 413 static inline void vmathSoaV3Cross( VmathSoaVector3 *result, const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1 )
 414 {
 415     vec_float4 tmpX, tmpY, tmpZ;
 416     tmpX = spu_sub( spu_mul( vec0->y, vec1->z ), spu_mul( vec0->z, vec1->y ) );
 417     tmpY = spu_sub( spu_mul( vec0->z, vec1->x ), spu_mul( vec0->x, vec1->z ) );
 418     tmpZ = spu_sub( spu_mul( vec0->x, vec1->y ), spu_mul( vec0->y, vec1->x ) );
 419     vmathSoaV3MakeFromElems( result, tmpX, tmpY, tmpZ );
 420 }
 421
 422 static inline void vmathSoaV3Select( VmathSoaVector3 *result, const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1, vec_uint4 select1 )
 423 {
 424     result->x = spu_sel( vec0->x, vec1->x, select1 );
 425     result->y = spu_sel( vec0->y, vec1->y, select1 );
 426     result->z = spu_sel( vec0->z, vec1->z, select1 );
 427 }
 428
 429 #ifdef _VECTORMATH_DEBUG
 430
 431 static inline void vmathSoaV3Print( const VmathSoaVector3 *vec )
 432 {
 433     VmathVector3 vec0, vec1, vec2, vec3;
 434     vmathSoaV3Get4Aos( vec, &vec0, &vec1, &vec2, &vec3 );
 435     printf("slot 0:\n");
 436     vmathV3Print( &vec0 );
 437     printf("slot 1:\n");
 438     vmathV3Print( &vec1 );
 439     printf("slot 2:\n");
 440     vmathV3Print( &vec2 );
 441     printf("slot 3:\n");
 442     vmathV3Print( &vec3 );
 443 }
 444
 445 static inline void vmathSoaV3Prints( const VmathSoaVector3 *vec, const char *name )
 446 {
 447     VmathVector3 vec0, vec1, vec2, vec3;
 448     printf( "%s:\n", name );
 449     vmathSoaV3Get4Aos( vec, &vec0, &vec1, &vec2, &vec3 );
 450     printf("slot 0:\n");
 451     vmathV3Print( &vec0 );
 452     printf("slot 1:\n");
 453     vmathV3Print( &vec1 );
 454     printf("slot 2:\n");
 455     vmathV3Print( &vec2 );
 456     printf("slot 3:\n");
 457     vmathV3Print( &vec3 );
 458 }
 459
 460 #endif
 461
 462 static inline void vmathSoaV4Copy( VmathSoaVector4 *result, const VmathSoaVector4 *vec )
 463 {
 464     result->x = vec->x;
 465     result->y = vec->y;
 466     result->z = vec->z;
 467     result->w = vec->w;
 468 }
 469
 470 static inline void vmathSoaV4MakeFromElems( VmathSoaVector4 *result, vec_float4 _x, vec_float4 _y, vec_float4 _z, vec_float4 _w )
 471 {
 472     result->x = _x;
 473     result->y = _y;
 474     result->z = _z;
 475     result->w = _w;
 476 }
 477
 478 static inline void vmathSoaV4MakeFromV3Scalar( VmathSoaVector4 *result, const VmathSoaVector3 *xyz, vec_float4 _w )
 479 {
 480     vmathSoaV4SetXYZ( result, xyz );
 481     vmathSoaV4SetW( result, _w );
 482 }
 483
 484 static inline void vmathSoaV4MakeFromV3( VmathSoaVector4 *result, const VmathSoaVector3 *vec )
 485 {
 486     result->x = vec->x;
 487     result->y = vec->y;
 488     result->z = vec->z;
 489     result->w = spu_splats(0.0f);
 490 }
 491
 492 static inline void vmathSoaV4MakeFromP3( VmathSoaVector4 *result, const VmathSoaPoint3 *pnt )
 493 {
 494     result->x = pnt->x;
 495     result->y = pnt->y;
 496     result->z = pnt->z;
 497     result->w = spu_splats(1.0f);
 498 }
 499
 500 static inline void vmathSoaV4MakeFromQ( VmathSoaVector4 *result, const VmathSoaQuat *quat )
 501 {
 502     result->x = quat->x;
 503     result->y = quat->y;
 504     result->z = quat->z;
 505     result->w = quat->w;
 506 }
 507
 508 static inline void vmathSoaV4MakeFromScalar( VmathSoaVector4 *result, vec_float4 scalar )
 509 {
 510     result->x = scalar;
 511     result->y = scalar;
 512     result->z = scalar;
 513     result->w = scalar;
 514 }
 515
 516 static inline void vmathSoaV4MakeFromAos( VmathSoaVector4 *result, const VmathVector4 *vec )
 517 {
 518     vec_uchar16 shuffle_xxxx = (vec_uchar16)spu_splats((int)0x00010203);
 519     vec_uchar16 shuffle_yyyy = (vec_uchar16)spu_splats((int)0x04050607);
 520     vec_uchar16 shuffle_zzzz = (vec_uchar16)spu_splats((int)0x08090a0b);
 521     vec_uchar16 shuffle_wwww = (vec_uchar16)spu_splats((int)0x0c0d0e0f);
 522     vec_float4 vec128 = vec->vec128;
 523     result->x = spu_shuffle( vec128, vec128, shuffle_xxxx );
 524     result->y = spu_shuffle( vec128, vec128, shuffle_yyyy );
 525     result->z = spu_shuffle( vec128, vec128, shuffle_zzzz );
 526     result->w = spu_shuffle( vec128, vec128, shuffle_wwww );
 527 }
 528
 529 static inline void vmathSoaV4MakeFrom4Aos( VmathSoaVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1, const VmathVector4 *vec2, const VmathVector4 *vec3 )
 530 {
 531     vec_float4 tmp0, tmp1, tmp2, tmp3;
 532     tmp0 = spu_shuffle( vec0->vec128, vec2->vec128, _VECTORMATH_SHUF_XAYB );
 533     tmp1 = spu_shuffle( vec1->vec128, vec3->vec128, _VECTORMATH_SHUF_XAYB );
 534     tmp2 = spu_shuffle( vec0->vec128, vec2->vec128, _VECTORMATH_SHUF_ZCWD );
 535     tmp3 = spu_shuffle( vec1->vec128, vec3->vec128, _VECTORMATH_SHUF_ZCWD );
 536     result->x = spu_shuffle( tmp0, tmp1, _VECTORMATH_SHUF_XAYB );
 537     result->y = spu_shuffle( tmp0, tmp1, _VECTORMATH_SHUF_ZCWD );
 538     result->z = spu_shuffle( tmp2, tmp3, _VECTORMATH_SHUF_XAYB );
 539     result->w = spu_shuffle( tmp2, tmp3, _VECTORMATH_SHUF_ZCWD );
 540 }
 541
 542 static inline void vmathSoaV4MakeXAxis( VmathSoaVector4 *result )
 543 {
 544     vmathSoaV4MakeFromElems( result, spu_splats(1.0f), spu_splats(0.0f), spu_splats(0.0f), spu_splats(0.0f) );
 545 }
 546
 547 static inline void vmathSoaV4MakeYAxis( VmathSoaVector4 *result )
 548 {
 549     vmathSoaV4MakeFromElems( result, spu_splats(0.0f), spu_splats(1.0f), spu_splats(0.0f), spu_splats(0.0f) );
 550 }
 551
 552 static inline void vmathSoaV4MakeZAxis( VmathSoaVector4 *result )
 553 {
 554     vmathSoaV4MakeFromElems( result, spu_splats(0.0f), spu_splats(0.0f), spu_splats(1.0f), spu_splats(0.0f) );
 555 }
 556
 557 static inline void vmathSoaV4MakeWAxis( VmathSoaVector4 *result )
 558 {
 559     vmathSoaV4MakeFromElems( result, spu_splats(0.0f), spu_splats(0.0f), spu_splats(0.0f), spu_splats(1.0f) );
 560 }
 561
 562 static inline void vmathSoaV4Lerp( VmathSoaVector4 *result, vec_float4 t, const VmathSoaVector4 *vec0, const VmathSoaVector4 *vec1 )
 563 {
 564     VmathSoaVector4 tmpV4_0, tmpV4_1;
 565     vmathSoaV4Sub( &tmpV4_0, vec1, vec0 );
 566     vmathSoaV4ScalarMul( &tmpV4_1, &tmpV4_0, t );
 567     vmathSoaV4Add( result, vec0, &tmpV4_1 );
 568 }
 569
 570 static inline void vmathSoaV4Slerp( VmathSoaVector4 *result, vec_float4 t, const VmathSoaVector4 *unitVec0, const VmathSoaVector4 *unitVec1 )
 571 {
 572     VmathSoaVector4 tmpV4_0, tmpV4_1;
 573     vec_float4 recipSinAngle, scale0, scale1, cosAngle, angle;
 574     vec_uint4 selectMask;
 575     cosAngle = vmathSoaV4Dot( unitVec0, unitVec1 );
 576     selectMask = (vec_uint4)spu_cmpgt( spu_splats(_VECTORMATH_SLERP_TOL), cosAngle );
 577     angle = acosf4( cosAngle );
 578     recipSinAngle = recipf4( sinf4( angle ) );
 579     scale0 = spu_sel( spu_sub( spu_splats(1.0f), t ), spu_mul( sinf4( spu_mul( spu_sub( spu_splats(1.0f), t ), angle ) ), recipSinAngle ), selectMask );
 580     scale1 = spu_sel( t, spu_mul( sinf4( spu_mul( t, angle ) ), recipSinAngle ), selectMask );
 581     vmathSoaV4ScalarMul( &tmpV4_0, unitVec0, scale0 );
 582     vmathSoaV4ScalarMul( &tmpV4_1, unitVec1, scale1 );
 583     vmathSoaV4Add( result, &tmpV4_0, &tmpV4_1 );
 584 }
 585
 586 static inline void vmathSoaV4Get4Aos( const VmathSoaVector4 *vec, VmathVector4 *result0, VmathVector4 *result1, VmathVector4 *result2, VmathVector4 *result3 )
 587 {
 588     vec_float4 tmp0, tmp1, tmp2, tmp3;
 589     tmp0 = spu_shuffle( vec->x, vec->z, _VECTORMATH_SHUF_XAYB );
 590     tmp1 = spu_shuffle( vec->y, vec->w, _VECTORMATH_SHUF_XAYB );
 591     tmp2 = spu_shuffle( vec->x, vec->z, _VECTORMATH_SHUF_ZCWD );
 592     tmp3 = spu_shuffle( vec->y, vec->w, _VECTORMATH_SHUF_ZCWD );
 593     vmathV4MakeFrom128( result0, spu_shuffle( tmp0, tmp1, _VECTORMATH_SHUF_XAYB ) );
 594     vmathV4MakeFrom128( result1, spu_shuffle( tmp0, tmp1, _VECTORMATH_SHUF_ZCWD ) );
 595     vmathV4MakeFrom128( result2, spu_shuffle( tmp2, tmp3, _VECTORMATH_SHUF_XAYB ) );
 596     vmathV4MakeFrom128( result3, spu_shuffle( tmp2, tmp3, _VECTORMATH_SHUF_ZCWD ) );
 597 }
 598
 599 static inline void vmathSoaV4StoreHalfFloats( const VmathSoaVector4 *vec, vec_ushort8 *twoQuads )
 600 {
 601     VmathVector4 v0, v1, v2, v3;
 602     vmathSoaV4Get4Aos( vec, &v0, &v1, &v2, &v3 );
 603     twoQuads[0] = _vmath2VfToHalfFloats(v0.vec128, v1.vec128);
 604     twoQuads[1] = _vmath2VfToHalfFloats(v2.vec128, v3.vec128);
 605 }
 606
 607 static inline void vmathSoaV4SetXYZ( VmathSoaVector4 *result, const VmathSoaVector3 *vec )
 608 {
 609     result->x = vec->x;
 610     result->y = vec->y;
 611     result->z = vec->z;
 612 }
 613
 614 static inline void vmathSoaV4GetXYZ( VmathSoaVector3 *result, const VmathSoaVector4 *vec )
 615 {
 616     vmathSoaV3MakeFromElems( result, vec->x, vec->y, vec->z );
 617 }
 618
 619 static inline void vmathSoaV4SetX( VmathSoaVector4 *result, vec_float4 _x )
 620 {
 621     result->x = _x;
 622 }
 623
 624 static inline vec_float4 vmathSoaV4GetX( const VmathSoaVector4 *vec )
 625 {
 626     return vec->x;
 627 }
 628
 629 static inline void vmathSoaV4SetY( VmathSoaVector4 *result, vec_float4 _y )
 630 {
 631     result->y = _y;
 632 }
 633
 634 static inline vec_float4 vmathSoaV4GetY( const VmathSoaVector4 *vec )
 635 {
 636     return vec->y;
 637 }
 638
 639 static inline void vmathSoaV4SetZ( VmathSoaVector4 *result, vec_float4 _z )
 640 {
 641     result->z = _z;
 642 }
 643
 644 static inline vec_float4 vmathSoaV4GetZ( const VmathSoaVector4 *vec )
 645 {
 646     return vec->z;
 647 }
 648
 649 static inline void vmathSoaV4SetW( VmathSoaVector4 *result, vec_float4 _w )
 650 {
 651     result->w = _w;
 652 }
 653
 654 static inline vec_float4 vmathSoaV4GetW( const VmathSoaVector4 *vec )
 655 {
 656     return vec->w;
 657 }
 658
 659 static inline void vmathSoaV4SetElem( VmathSoaVector4 *result, int idx, vec_float4 value )
 660 {
 661     *(&result->x + idx) = value;
 662 }
 663
 664 static inline vec_float4 vmathSoaV4GetElem( const VmathSoaVector4 *vec, int idx )
 665 {
 666     return *(&vec->x + idx);
 667 }
 668
 669 static inline void vmathSoaV4Add( VmathSoaVector4 *result, const VmathSoaVector4 *vec0, const VmathSoaVector4 *vec1 )
 670 {
 671     result->x = spu_add( vec0->x, vec1->x );
 672     result->y = spu_add( vec0->y, vec1->y );
 673     result->z = spu_add( vec0->z, vec1->z );
 674     result->w = spu_add( vec0->w, vec1->w );
 675 }
 676
 677 static inline void vmathSoaV4Sub( VmathSoaVector4 *result, const VmathSoaVector4 *vec0, const VmathSoaVector4 *vec1 )
 678 {
 679     result->x = spu_sub( vec0->x, vec1->x );
 680     result->y = spu_sub( vec0->y, vec1->y );
 681     result->z = spu_sub( vec0->z, vec1->z );
 682     result->w = spu_sub( vec0->w, vec1->w );
 683 }
 684
 685 static inline void vmathSoaV4ScalarMul( VmathSoaVector4 *result, const VmathSoaVector4 *vec, vec_float4 scalar )
 686 {
 687     result->x = spu_mul( vec->x, scalar );
 688     result->y = spu_mul( vec->y, scalar );
 689     result->z = spu_mul( vec->z, scalar );
 690     result->w = spu_mul( vec->w, scalar );
 691 }
 692
 693 static inline void vmathSoaV4ScalarDiv( VmathSoaVector4 *result, const VmathSoaVector4 *vec, vec_float4 scalar )
 694 {
 695     result->x = divf4( vec->x, scalar );
 696     result->y = divf4( vec->y, scalar );
 697     result->z = divf4( vec->z, scalar );
 698     result->w = divf4( vec->w, scalar );
 699 }
 700
 701 static inline void vmathSoaV4Neg( VmathSoaVector4 *result, const VmathSoaVector4 *vec )
 702 {
 703     result->x = negatef4( vec->x );
 704     result->y = negatef4( vec->y );
 705     result->z = negatef4( vec->z );
 706     result->w = negatef4( vec->w );
 707 }
 708
 709 static inline void vmathSoaV4MulPerElem( VmathSoaVector4 *result, const VmathSoaVector4 *vec0, const VmathSoaVector4 *vec1 )
 710 {
 711     result->x = spu_mul( vec0->x, vec1->x );
 712     result->y = spu_mul( vec0->y, vec1->y );
 713     result->z = spu_mul( vec0->z, vec1->z );
 714     result->w = spu_mul( vec0->w, vec1->w );
 715 }
 716
 717 static inline void vmathSoaV4DivPerElem( VmathSoaVector4 *result, const VmathSoaVector4 *vec0, const VmathSoaVector4 *vec1 )
 718 {
 719     result->x = divf4( vec0->x, vec1->x );
 720     result->y = divf4( vec0->y, vec1->y );
 721     result->z = divf4( vec0->z, vec1->z );
 722     result->w = divf4( vec0->w, vec1->w );
 723 }
 724
 725 static inline void vmathSoaV4RecipPerElem( VmathSoaVector4 *result, const VmathSoaVector4 *vec )
 726 {
 727     result->x = recipf4( vec->x );
 728     result->y = recipf4( vec->y );
 729     result->z = recipf4( vec->z );
 730     result->w = recipf4( vec->w );
 731 }
 732
 733 static inline void vmathSoaV4SqrtPerElem( VmathSoaVector4 *result, const VmathSoaVector4 *vec )
 734 {
 735     result->x = sqrtf4( vec->x );
 736     result->y = sqrtf4( vec->y );
 737     result->z = sqrtf4( vec->z );
 738     result->w = sqrtf4( vec->w );
 739 }
 740
 741 static inline void vmathSoaV4RsqrtPerElem( VmathSoaVector4 *result, const VmathSoaVector4 *vec )
 742 {
 743     result->x = rsqrtf4( vec->x );
 744     result->y = rsqrtf4( vec->y );
 745     result->z = rsqrtf4( vec->z );
 746     result->w = rsqrtf4( vec->w );
 747 }
 748
 749 static inline void vmathSoaV4AbsPerElem( VmathSoaVector4 *result, const VmathSoaVector4 *vec )
 750 {
 751     result->x = fabsf4( vec->x );
 752     result->y = fabsf4( vec->y );
 753     result->z = fabsf4( vec->z );
 754     result->w = fabsf4( vec->w );
 755 }
 756
 757 static inline void vmathSoaV4CopySignPerElem( VmathSoaVector4 *result, const VmathSoaVector4 *vec0, const VmathSoaVector4 *vec1 )
 758 {
 759     result->x = copysignf4( vec0->x, vec1->x );
 760     result->y = copysignf4( vec0->y, vec1->y );
 761     result->z = copysignf4( vec0->z, vec1->z );
 762     result->w = copysignf4( vec0->w, vec1->w );
 763 }
 764
 765 static inline void vmathSoaV4MaxPerElem( VmathSoaVector4 *result, const VmathSoaVector4 *vec0, const VmathSoaVector4 *vec1 )
 766 {
 767     result->x = fmaxf4( vec0->x, vec1->x );
 768     result->y = fmaxf4( vec0->y, vec1->y );
 769     result->z = fmaxf4( vec0->z, vec1->z );
 770     result->w = fmaxf4( vec0->w, vec1->w );
 771 }
 772
 773 static inline vec_float4 vmathSoaV4MaxElem( const VmathSoaVector4 *vec )
 774 {
 775     vec_float4 result;
 776     result = fmaxf4( vec->x, vec->y );
 777     result = fmaxf4( vec->z, result );
 778     result = fmaxf4( vec->w, result );
 779     return result;
 780 }
 781
 782 static inline void vmathSoaV4MinPerElem( VmathSoaVector4 *result, const VmathSoaVector4 *vec0, const VmathSoaVector4 *vec1 )
 783 {
 784     result->x = fminf4( vec0->x, vec1->x );
 785     result->y = fminf4( vec0->y, vec1->y );
 786     result->z = fminf4( vec0->z, vec1->z );
 787     result->w = fminf4( vec0->w, vec1->w );
 788 }
 789
 790 static inline vec_float4 vmathSoaV4MinElem( const VmathSoaVector4 *vec )
 791 {
 792     vec_float4 result;
 793     result = fminf4( vec->x, vec->y );
 794     result = fminf4( vec->z, result );
 795     result = fminf4( vec->w, result );
 796     return result;
 797 }
 798
 799 static inline vec_float4 vmathSoaV4Sum( const VmathSoaVector4 *vec )
 800 {
 801     vec_float4 result;
 802     result = spu_add( vec->x, vec->y );
 803     result = spu_add( result, vec->z );
 804     result = spu_add( result, vec->w );
 805     return result;
 806 }
 807
 808 static inline vec_float4 vmathSoaV4Dot( const VmathSoaVector4 *vec0, const VmathSoaVector4 *vec1 )
 809 {
 810     vec_float4 result;
 811     result = spu_mul( vec0->x, vec1->x );
 812     result = spu_add( result, spu_mul( vec0->y, vec1->y ) );
 813     result = spu_add( result, spu_mul( vec0->z, vec1->z ) );
 814     result = spu_add( result, spu_mul( vec0->w, vec1->w ) );
 815     return result;
 816 }
 817
 818 static inline vec_float4 vmathSoaV4LengthSqr( const VmathSoaVector4 *vec )
 819 {
 820     vec_float4 result;
 821     result = spu_mul( vec->x, vec->x );
 822     result = spu_add( result, spu_mul( vec->y, vec->y ) );
 823     result = spu_add( result, spu_mul( vec->z, vec->z ) );
 824     result = spu_add( result, spu_mul( vec->w, vec->w ) );
 825     return result;
 826 }
 827
 828 static inline vec_float4 vmathSoaV4Length( const VmathSoaVector4 *vec )
 829 {
 830     return sqrtf4( vmathSoaV4LengthSqr( vec ) );
 831 }
 832
 833 static inline void vmathSoaV4Normalize( VmathSoaVector4 *result, const VmathSoaVector4 *vec )
 834 {
 835     vec_float4 lenSqr, lenInv;
 836     lenSqr = vmathSoaV4LengthSqr( vec );
 837     lenInv = rsqrtf4( lenSqr );
 838     result->x = spu_mul( vec->x, lenInv );
 839     result->y = spu_mul( vec->y, lenInv );
 840     result->z = spu_mul( vec->z, lenInv );
 841     result->w = spu_mul( vec->w, lenInv );
 842 }
 843
 844 static inline void vmathSoaV4Select( VmathSoaVector4 *result, const VmathSoaVector4 *vec0, const VmathSoaVector4 *vec1, vec_uint4 select1 )
 845 {
 846     result->x = spu_sel( vec0->x, vec1->x, select1 );
 847     result->y = spu_sel( vec0->y, vec1->y, select1 );
 848     result->z = spu_sel( vec0->z, vec1->z, select1 );
 849     result->w = spu_sel( vec0->w, vec1->w, select1 );
 850 }
 851
 852 #ifdef _VECTORMATH_DEBUG
 853
 854 static inline void vmathSoaV4Print( const VmathSoaVector4 *vec )
 855 {
 856     VmathVector4 vec0, vec1, vec2, vec3;
 857     vmathSoaV4Get4Aos( vec, &vec0, &vec1, &vec2, &vec3 );
 858     printf("slot 0:\n");
 859     vmathV4Print( &vec0 );
 860     printf("slot 1:\n");
 861     vmathV4Print( &vec1 );
 862     printf("slot 2:\n");
 863     vmathV4Print( &vec2 );
 864     printf("slot 3:\n");
 865     vmathV4Print( &vec3 );
 866 }
 867
 868 static inline void vmathSoaV4Prints( const VmathSoaVector4 *vec, const char *name )
 869 {
 870     VmathVector4 vec0, vec1, vec2, vec3;
 871     printf( "%s:\n", name );
 872     vmathSoaV4Get4Aos( vec, &vec0, &vec1, &vec2, &vec3 );
 873     printf("slot 0:\n");
 874     vmathV4Print( &vec0 );
 875     printf("slot 1:\n");
 876     vmathV4Print( &vec1 );
 877     printf("slot 2:\n");
 878     vmathV4Print( &vec2 );
 879     printf("slot 3:\n");
 880     vmathV4Print( &vec3 );
 881 }
 882
 883 #endif
 884
 885 static inline void vmathSoaP3Copy( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt )
 886 {
 887     result->x = pnt->x;
 888     result->y = pnt->y;
 889     result->z = pnt->z;
 890 }
 891
 892 static inline void vmathSoaP3MakeFromElems( VmathSoaPoint3 *result, vec_float4 _x, vec_float4 _y, vec_float4 _z )
 893 {
 894     result->x = _x;
 895     result->y = _y;
 896     result->z = _z;
 897 }
 898
 899 static inline void vmathSoaP3MakeFromV3( VmathSoaPoint3 *result, const VmathSoaVector3 *vec )
 900 {
 901     result->x = vec->x;
 902     result->y = vec->y;
 903     result->z = vec->z;
 904 }
 905
 906 static inline void vmathSoaP3MakeFromScalar( VmathSoaPoint3 *result, vec_float4 scalar )
 907 {
 908     result->x = scalar;
 909     result->y = scalar;
 910     result->z = scalar;
 911 }
 912
 913 static inline void vmathSoaP3MakeFromAos( VmathSoaPoint3 *result, const VmathPoint3 *pnt )
 914 {
 915     vec_uchar16 shuffle_xxxx = (vec_uchar16)spu_splats((int)0x00010203);
 916     vec_uchar16 shuffle_yyyy = (vec_uchar16)spu_splats((int)0x04050607);
 917     vec_uchar16 shuffle_zzzz = (vec_uchar16)spu_splats((int)0x08090a0b);
 918     vec_float4 vec128 = pnt->vec128;
 919     result->x = spu_shuffle( vec128, vec128, shuffle_xxxx );
 920     result->y = spu_shuffle( vec128, vec128, shuffle_yyyy );
 921     result->z = spu_shuffle( vec128, vec128, shuffle_zzzz );
 922 }
 923
 924 static inline void vmathSoaP3MakeFrom4Aos( VmathSoaPoint3 *result, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1, const VmathPoint3 *pnt2, const VmathPoint3 *pnt3 )
 925 {
 926     vec_float4 tmp0, tmp1, tmp2, tmp3;
 927     tmp0 = spu_shuffle( pnt0->vec128, pnt2->vec128, _VECTORMATH_SHUF_XAYB );
 928     tmp1 = spu_shuffle( pnt1->vec128, pnt3->vec128, _VECTORMATH_SHUF_XAYB );
 929     tmp2 = spu_shuffle( pnt0->vec128, pnt2->vec128, _VECTORMATH_SHUF_ZCWD );
 930     tmp3 = spu_shuffle( pnt1->vec128, pnt3->vec128, _VECTORMATH_SHUF_ZCWD );
 931     result->x = spu_shuffle( tmp0, tmp1, _VECTORMATH_SHUF_XAYB );
 932     result->y = spu_shuffle( tmp0, tmp1, _VECTORMATH_SHUF_ZCWD );
 933     result->z = spu_shuffle( tmp2, tmp3, _VECTORMATH_SHUF_XAYB );
 934 }
 935
 936 static inline void vmathSoaP3Lerp( VmathSoaPoint3 *result, vec_float4 t, const VmathSoaPoint3 *pnt0, const VmathSoaPoint3 *pnt1 )
 937 {
 938     VmathSoaVector3 tmpV3_0, tmpV3_1;
 939     vmathSoaP3Sub( &tmpV3_0, pnt1, pnt0 );
 940     vmathSoaV3ScalarMul( &tmpV3_1, &tmpV3_0, t );
 941     vmathSoaP3AddV3( result, pnt0, &tmpV3_1 );
 942 }
 943
 944 static inline void vmathSoaP3Get4Aos( const VmathSoaPoint3 *pnt, VmathPoint3 *result0, VmathPoint3 *result1, VmathPoint3 *result2, VmathPoint3 *result3 )
 945 {
 946     vec_float4 tmp0, tmp1;
 947     tmp0 = spu_shuffle( pnt->x, pnt->z, _VECTORMATH_SHUF_XAYB );
 948     tmp1 = spu_shuffle( pnt->x, pnt->z, _VECTORMATH_SHUF_ZCWD );
 949     vmathP3MakeFrom128( result0, spu_shuffle( tmp0, pnt->y, _VECTORMATH_SHUF_XAYB ) );
 950     vmathP3MakeFrom128( result1, spu_shuffle( tmp0, pnt->y, _VECTORMATH_SHUF_ZBW0 ) );
 951     vmathP3MakeFrom128( result2, spu_shuffle( tmp1, pnt->y, _VECTORMATH_SHUF_XCY0 ) );
 952     vmathP3MakeFrom128( result3, spu_shuffle( tmp1, pnt->y, _VECTORMATH_SHUF_ZDW0 ) );
 953 }
 954
 955 static inline void vmathSoaP3LoadXYZArray( VmathSoaPoint3 *vec, const vec_float4 *threeQuads )
 956 {
 957     vec_float4 xyxy, yzyz, zxzx, xyzx, yzxy, zxyz;
 958     xyzx = threeQuads[0];
 959     yzxy = threeQuads[1];
 960     zxyz = threeQuads[2];
 961     xyxy = spu_shuffle( xyzx, yzxy, _VECTORMATH_SHUF_XYCD );
 962     zxzx = spu_shuffle( zxyz, xyzx, _VECTORMATH_SHUF_XYCD );
 963     yzyz = spu_shuffle( yzxy, zxyz, _VECTORMATH_SHUF_XYCD );
 964     vmathSoaP3SetX( vec, spu_shuffle( xyxy, zxzx, _VECTORMATH_SHUF_XDZB ) );
 965     vmathSoaP3SetY( vec, spu_shuffle( xyxy, yzyz, _VECTORMATH_SHUF_YAWC ) );
 966     vmathSoaP3SetZ( vec, spu_shuffle( zxzx, yzyz, _VECTORMATH_SHUF_ZBXD ) );
 967 }
 968
 969 static inline void vmathSoaP3StoreXYZArray( const VmathSoaPoint3 *vec, vec_float4 *threeQuads )
 970 {
 971     vec_float4 xyzx, yzxy, zxyz, xyxy, zxzx, yzyz;
 972     xyxy = spu_shuffle( vec->x, vec->y, _VECTORMATH_SHUF_XAZC );
 973     zxzx = spu_shuffle( vec->z, vec->x, _VECTORMATH_SHUF_ZDXB );
 974     yzyz = spu_shuffle( vec->y, vec->z, _VECTORMATH_SHUF_YBWD );
 975     xyzx = spu_shuffle( xyxy, zxzx, _VECTORMATH_SHUF_XYCD );
 976     yzxy = spu_shuffle( yzyz, xyxy, _VECTORMATH_SHUF_XYCD );
 977     zxyz = spu_shuffle( zxzx, yzyz, _VECTORMATH_SHUF_XYCD );
 978     threeQuads[0] = xyzx;
 979     threeQuads[1] = yzxy;
 980     threeQuads[2] = zxyz;
 981 }
 982
 983 static inline void vmathSoaP3StoreHalfFloats( const VmathSoaPoint3 *pnt0, const VmathSoaPoint3 *pnt1, vec_ushort8 *threeQuads )
 984 {
 985     vec_float4 xyz0[3];
 986     vec_float4 xyz1[3];
 987     vmathSoaP3StoreXYZArray( pnt0, xyz0 );
 988     vmathSoaP3StoreXYZArray( pnt1, xyz1 );
 989     threeQuads[0] = _vmath2VfToHalfFloats(xyz0[0], xyz0[1]);
 990     threeQuads[1] = _vmath2VfToHalfFloats(xyz0[2], xyz1[0]);
 991     threeQuads[2] = _vmath2VfToHalfFloats(xyz1[1], xyz1[2]);
 992 }
 993
 994 static inline void vmathSoaP3SetX( VmathSoaPoint3 *result, vec_float4 _x )
 995 {
 996     result->x = _x;
 997 }
 998
 999 static inline vec_float4 vmathSoaP3GetX( const VmathSoaPoint3 *pnt )
1000 {
1001     return pnt->x;
1002 }
1003
1004 static inline void vmathSoaP3SetY( VmathSoaPoint3 *result, vec_float4 _y )
1005 {
1006     result->y = _y;
1007 }
1008
1009 static inline vec_float4 vmathSoaP3GetY( const VmathSoaPoint3 *pnt )
1010 {
1011     return pnt->y;
1012 }
1013
1014 static inline void vmathSoaP3SetZ( VmathSoaPoint3 *result, vec_float4 _z )
1015 {
1016     result->z = _z;
1017 }
1018
1019 static inline vec_float4 vmathSoaP3GetZ( const VmathSoaPoint3 *pnt )
1020 {
1021     return pnt->z;
1022 }
1023
1024 static inline void vmathSoaP3SetElem( VmathSoaPoint3 *result, int idx, vec_float4 value )
1025 {
1026     *(&result->x + idx) = value;
1027 }
1028
1029 static inline vec_float4 vmathSoaP3GetElem( const VmathSoaPoint3 *pnt, int idx )
1030 {
1031     return *(&pnt->x + idx);
1032 }
1033
1034 static inline void vmathSoaP3Sub( VmathSoaVector3 *result, const VmathSoaPoint3 *pnt0, const VmathSoaPoint3 *pnt1 )
1035 {
1036     result->x = spu_sub( pnt0->x, pnt1->x );
1037     result->y = spu_sub( pnt0->y, pnt1->y );
1038     result->z = spu_sub( pnt0->z, pnt1->z );
1039 }
1040
1041 static inline void vmathSoaP3AddV3( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt, const VmathSoaVector3 *vec1 )
1042 {
1043     result->x = spu_add( pnt->x, vec1->x );
1044     result->y = spu_add( pnt->y, vec1->y );
1045     result->z = spu_add( pnt->z, vec1->z );
1046 }
1047
1048 static inline void vmathSoaP3SubV3( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt, const VmathSoaVector3 *vec1 )
1049 {
1050     result->x = spu_sub( pnt->x, vec1->x );
1051     result->y = spu_sub( pnt->y, vec1->y );
1052     result->z = spu_sub( pnt->z, vec1->z );
1053 }
1054
1055 static inline void vmathSoaP3MulPerElem( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt0, const VmathSoaPoint3 *pnt1 )
1056 {
1057     result->x = spu_mul( pnt0->x, pnt1->x );
1058     result->y = spu_mul( pnt0->y, pnt1->y );
1059     result->z = spu_mul( pnt0->z, pnt1->z );
1060 }
1061
1062 static inline void vmathSoaP3DivPerElem( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt0, const VmathSoaPoint3 *pnt1 )
1063 {
1064     result->x = divf4( pnt0->x, pnt1->x );
1065     result->y = divf4( pnt0->y, pnt1->y );
1066     result->z = divf4( pnt0->z, pnt1->z );
1067 }
1068
1069 static inline void vmathSoaP3RecipPerElem( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt )
1070 {
1071     result->x = recipf4( pnt->x );
1072     result->y = recipf4( pnt->y );
1073     result->z = recipf4( pnt->z );
1074 }
1075
1076 static inline void vmathSoaP3SqrtPerElem( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt )
1077 {
1078     result->x = sqrtf4( pnt->x );
1079     result->y = sqrtf4( pnt->y );
1080     result->z = sqrtf4( pnt->z );
1081 }
1082
1083 static inline void vmathSoaP3RsqrtPerElem( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt )
1084 {
1085     result->x = rsqrtf4( pnt->x );
1086     result->y = rsqrtf4( pnt->y );
1087     result->z = rsqrtf4( pnt->z );
1088 }
1089
1090 static inline void vmathSoaP3AbsPerElem( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt )
1091 {
1092     result->x = fabsf4( pnt->x );
1093     result->y = fabsf4( pnt->y );
1094     result->z = fabsf4( pnt->z );
1095 }
1096
1097 static inline void vmathSoaP3CopySignPerElem( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt0, const VmathSoaPoint3 *pnt1 )
1098 {
1099     result->x = copysignf4( pnt0->x, pnt1->x );
1100     result->y = copysignf4( pnt0->y, pnt1->y );
1101     result->z = copysignf4( pnt0->z, pnt1->z );
1102 }
1103
1104 static inline void vmathSoaP3MaxPerElem( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt0, const VmathSoaPoint3 *pnt1 )
1105 {
1106     result->x = fmaxf4( pnt0->x, pnt1->x );
1107     result->y = fmaxf4( pnt0->y, pnt1->y );
1108     result->z = fmaxf4( pnt0->z, pnt1->z );
1109 }
1110
1111 static inline vec_float4 vmathSoaP3MaxElem( const VmathSoaPoint3 *pnt )
1112 {
1113     vec_float4 result;
1114     result = fmaxf4( pnt->x, pnt->y );
1115     result = fmaxf4( pnt->z, result );
1116     return result;
1117 }
1118
1119 static inline void vmathSoaP3MinPerElem( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt0, const VmathSoaPoint3 *pnt1 )
1120 {
1121     result->x = fminf4( pnt0->x, pnt1->x );
1122     result->y = fminf4( pnt0->y, pnt1->y );
1123     result->z = fminf4( pnt0->z, pnt1->z );
1124 }
1125
1126 static inline vec_float4 vmathSoaP3MinElem( const VmathSoaPoint3 *pnt )
1127 {
1128     vec_float4 result;
1129     result = fminf4( pnt->x, pnt->y );
1130     result = fminf4( pnt->z, result );
1131     return result;
1132 }
1133
1134 static inline vec_float4 vmathSoaP3Sum( const VmathSoaPoint3 *pnt )
1135 {
1136     vec_float4 result;
1137     result = spu_add( pnt->x, pnt->y );
1138     result = spu_add( result, pnt->z );
1139     return result;
1140 }
1141
1142 static inline void vmathSoaP3Scale( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt, vec_float4 scaleVal )
1143 {
1144     VmathSoaPoint3 tmpP3_0;
1145     vmathSoaP3MakeFromScalar( &tmpP3_0, scaleVal );
1146     vmathSoaP3MulPerElem( result, pnt, &tmpP3_0 );
1147 }
1148
1149 static inline void vmathSoaP3NonUniformScale( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt, const VmathSoaVector3 *scaleVec )
1150 {
1151     VmathSoaPoint3 tmpP3_0;
1152     vmathSoaP3MakeFromV3( &tmpP3_0, scaleVec );
1153     vmathSoaP3MulPerElem( result, pnt, &tmpP3_0 );
1154 }
1155
1156 static inline vec_float4 vmathSoaP3Projection( const VmathSoaPoint3 *pnt, const VmathSoaVector3 *unitVec )
1157 {
1158     vec_float4 result;
1159     result = spu_mul( pnt->x, unitVec->x );
1160     result = spu_add( result, spu_mul( pnt->y, unitVec->y ) );
1161     result = spu_add( result, spu_mul( pnt->z, unitVec->z ) );
1162     return result;
1163 }
1164
1165 static inline vec_float4 vmathSoaP3DistSqrFromOrigin( const VmathSoaPoint3 *pnt )
1166 {
1167     VmathSoaVector3 tmpV3_0;
1168     vmathSoaV3MakeFromP3( &tmpV3_0, pnt );
1169     return vmathSoaV3LengthSqr( &tmpV3_0 );
1170 }
1171
1172 static inline vec_float4 vmathSoaP3DistFromOrigin( const VmathSoaPoint3 *pnt )
1173 {
1174     VmathSoaVector3 tmpV3_0;
1175     vmathSoaV3MakeFromP3( &tmpV3_0, pnt );
1176     return vmathSoaV3Length( &tmpV3_0 );
1177 }
1178
1179 static inline vec_float4 vmathSoaP3DistSqr( const VmathSoaPoint3 *pnt0, const VmathSoaPoint3 *pnt1 )
1180 {
1181     VmathSoaVector3 tmpV3_0;
1182     vmathSoaP3Sub( &tmpV3_0, pnt1, pnt0 );
1183     return vmathSoaV3LengthSqr( &tmpV3_0 );
1184 }
1185
1186 static inline vec_float4 vmathSoaP3Dist( const VmathSoaPoint3 *pnt0, const VmathSoaPoint3 *pnt1 )
1187 {
1188     VmathSoaVector3 tmpV3_0;
1189     vmathSoaP3Sub( &tmpV3_0, pnt1, pnt0 );
1190     return vmathSoaV3Length( &tmpV3_0 );
1191 }
1192
1193 static inline void vmathSoaP3Select( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt0, const VmathSoaPoint3 *pnt1, vec_uint4 select1 )
1194 {
1195     result->x = spu_sel( pnt0->x, pnt1->x, select1 );
1196     result->y = spu_sel( pnt0->y, pnt1->y, select1 );
1197     result->z = spu_sel( pnt0->z, pnt1->z, select1 );
1198 }
1199
1200 #ifdef _VECTORMATH_DEBUG
1201
1202 static inline void vmathSoaP3Print( const VmathSoaPoint3 *pnt )
1203 {
1204     VmathPoint3 vec0, vec1, vec2, vec3;
1205     vmathSoaP3Get4Aos( pnt, &vec0, &vec1, &vec2, &vec3 );
1206     printf("slot 0:\n");
1207     vmathP3Print( &vec0 );
1208     printf("slot 1:\n");
1209     vmathP3Print( &vec1 );
1210     printf("slot 2:\n");
1211     vmathP3Print( &vec2 );
1212     printf("slot 3:\n");
1213     vmathP3Print( &vec3 );
1214 }
1215
1216 static inline void vmathSoaP3Prints( const VmathSoaPoint3 *pnt, const char *name )
1217 {
1218     VmathPoint3 vec0, vec1, vec2, vec3;
1219     printf( "%s:\n", name );
1220     vmathSoaP3Get4Aos( pnt, &vec0, &vec1, &vec2, &vec3 );
1221     printf("slot 0:\n");
1222     vmathP3Print( &vec0 );
1223     printf("slot 1:\n");
1224     vmathP3Print( &vec1 );
1225     printf("slot 2:\n");
1226     vmathP3Print( &vec2 );
1227     printf("slot 3:\n");
1228     vmathP3Print( &vec3 );
1229 }
1230
1231 #endif
1232
1233 #ifdef __cplusplus
1234 }
1235 #endif /* __cplusplus */
1236
1237 #endif