2 Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
5 Redistribution and use in source and binary forms,
6 with or without modification, are permitted provided that the
7 following conditions are met:
8 * Redistributions of source code must retain the above copyright
9 notice, this list of conditions and the following disclaimer.
10 * Redistributions in binary form must reproduce the above copyright
11 notice, this list of conditions and the following disclaimer in the
12 documentation and/or other materials provided with the distribution.
13 * Neither the name of the Sony Computer Entertainment Inc nor the names
14 of its contributors may be used to endorse or promote products derived
15 from this software without specific prior written permission.
17 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
18 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
21 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
25 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
26 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27 POSSIBILITY OF SUCH DAMAGE.
30 #ifndef _VECTORMATH_VEC_AOS_C_H
31 #define _VECTORMATH_VEC_AOS_C_H
34 #endif /* __cplusplus */
36 /*-----------------------------------------------------------------------------
38 * for shuffles, words are labeled [x,y,z,w] [a,b,c,d]
40 #define _VECTORMATH_SHUF_X 0x00010203
41 #define _VECTORMATH_SHUF_Y 0x04050607
42 #define _VECTORMATH_SHUF_Z 0x08090a0b
43 #define _VECTORMATH_SHUF_W 0x0c0d0e0f
44 #define _VECTORMATH_SHUF_A 0x10111213
45 #define _VECTORMATH_SHUF_B 0x14151617
46 #define _VECTORMATH_SHUF_C 0x18191a1b
47 #define _VECTORMATH_SHUF_D 0x1c1d1e1f
48 #define _VECTORMATH_SHUF_0 0x80808080
49 #define _VECTORMATH_SHUF_XYZA (vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_A }
50 #define _VECTORMATH_SHUF_ZXYW (vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_W }
51 #define _VECTORMATH_SHUF_YZXW (vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_W }
52 #define _VECTORMATH_SHUF_WABC (vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_W, _VECTORMATH_SHUF_A, _VECTORMATH_SHUF_B, _VECTORMATH_SHUF_C }
53 #define _VECTORMATH_SHUF_ZWAB (vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_W, _VECTORMATH_SHUF_A, _VECTORMATH_SHUF_B }
54 #define _VECTORMATH_SHUF_XYZA (vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_A }
55 #define _VECTORMATH_SHUF_YZAB (vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_A, _VECTORMATH_SHUF_B }
56 #define _VECTORMATH_SHUF_ZABC (vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_A, _VECTORMATH_SHUF_B, _VECTORMATH_SHUF_C }
57 #define _VECTORMATH_UNIT_1000 (vec_float4){ 1.0f, 0.0f, 0.0f, 0.0f }
58 #define _VECTORMATH_UNIT_0100 (vec_float4){ 0.0f, 1.0f, 0.0f, 0.0f }
59 #define _VECTORMATH_UNIT_0010 (vec_float4){ 0.0f, 0.0f, 1.0f, 0.0f }
60 #define _VECTORMATH_UNIT_0001 (vec_float4){ 0.0f, 0.0f, 0.0f, 1.0f }
61 #define _VECTORMATH_SLERP_TOL 0.999f
63 /*-----------------------------------------------------------------------------
66 #ifndef _VECTORMATH_INTERNAL_FUNCTIONS
67 #define _VECTORMATH_INTERNAL_FUNCTIONS
69 static inline vec_float4 _vmathVfDot3( vec_float4 vec0, vec_float4 vec1 )
72 result = spu_mul( vec0, vec1 );
73 result = spu_madd( spu_rlqwbyte( vec0, 4 ), spu_rlqwbyte( vec1, 4 ), result );
74 return spu_madd( spu_rlqwbyte( vec0, 8 ), spu_rlqwbyte( vec1, 8 ), result );
77 static inline vec_float4 _vmathVfDot4( vec_float4 vec0, vec_float4 vec1 )
80 result = spu_mul( vec0, vec1 );
81 result = spu_madd( spu_rlqwbyte( vec0, 4 ), spu_rlqwbyte( vec1, 4 ), result );
82 return spu_add( spu_rlqwbyte( result, 8 ), result );
85 static inline vec_float4 _vmathVfCross( vec_float4 vec0, vec_float4 vec1 )
87 vec_float4 tmp0, tmp1, tmp2, tmp3, result;
88 tmp0 = spu_shuffle( vec0, vec0, _VECTORMATH_SHUF_YZXW );
89 tmp1 = spu_shuffle( vec1, vec1, _VECTORMATH_SHUF_ZXYW );
90 tmp2 = spu_shuffle( vec0, vec0, _VECTORMATH_SHUF_ZXYW );
91 tmp3 = spu_shuffle( vec1, vec1, _VECTORMATH_SHUF_YZXW );
92 result = spu_mul( tmp0, tmp1 );
93 result = spu_nmsub( tmp2, tmp3, result );
97 static inline vec_uint4 _vmathVfToHalfFloatsUnpacked(vec_float4 v)
100 vec_uint4 mant, sign, hfloat;
101 vec_uint4 notZero, isInf;
102 const vec_uint4 hfloatInf = spu_splats(0x00007c00u);
103 const vec_uint4 mergeMant = spu_splats(0x000003ffu);
104 const vec_uint4 mergeSign = spu_splats(0x00008000u);
106 sign = spu_rlmask((vec_uint4)v, -16);
107 mant = spu_rlmask((vec_uint4)v, -13);
108 bexp = spu_and(spu_rlmask((vec_int4)v, -23), 0xff);
110 notZero = spu_cmpgt(bexp, 112);
111 isInf = spu_cmpgt(bexp, 142);
113 bexp = spu_add(bexp, -112);
114 bexp = spu_sl(bexp, 10);
116 hfloat = spu_sel((vec_uint4)bexp, mant, mergeMant);
117 hfloat = spu_sel(spu_splats(0u), hfloat, notZero);
118 hfloat = spu_sel(hfloat, hfloatInf, isInf);
119 hfloat = spu_sel(hfloat, sign, mergeSign);
124 static inline vec_ushort8 _vmath2VfToHalfFloats(vec_float4 u, vec_float4 v)
126 vec_uint4 hfloat_u, hfloat_v;
127 const vec_uchar16 pack = (vec_uchar16){2,3,6,7,10,11,14,15,18,19,22,23,26,27,30,31};
128 hfloat_u = _vmathVfToHalfFloatsUnpacked(u);
129 hfloat_v = _vmathVfToHalfFloatsUnpacked(v);
130 return (vec_ushort8)spu_shuffle(hfloat_u, hfloat_v, pack);
135 static inline void vmathV3Copy( VmathVector3 *result, const VmathVector3 *vec )
137 result->vec128 = vec->vec128;
140 static inline void vmathV3MakeFromElems( VmathVector3 *result, float _x, float _y, float _z )
142 result->vec128 = (vec_float4){ _x, _y, _z, 0.0f };
145 static inline void vmathV3MakeFromP3( VmathVector3 *result, const VmathPoint3 *pnt )
147 result->vec128 = pnt->vec128;
150 static inline void vmathV3MakeFromScalar( VmathVector3 *result, float scalar )
152 result->vec128 = spu_splats( scalar );
155 static inline void vmathV3MakeFrom128( VmathVector3 *result, vec_float4 vf4 )
157 result->vec128 = vf4;
160 static inline void vmathV3MakeXAxis( VmathVector3 *result )
162 result->vec128 = _VECTORMATH_UNIT_1000;
165 static inline void vmathV3MakeYAxis( VmathVector3 *result )
167 result->vec128 = _VECTORMATH_UNIT_0100;
170 static inline void vmathV3MakeZAxis( VmathVector3 *result )
172 result->vec128 = _VECTORMATH_UNIT_0010;
175 static inline void vmathV3Lerp( VmathVector3 *result, float t, const VmathVector3 *vec0, const VmathVector3 *vec1 )
177 VmathVector3 tmpV3_0, tmpV3_1;
178 vmathV3Sub( &tmpV3_0, vec1, vec0 );
179 vmathV3ScalarMul( &tmpV3_1, &tmpV3_0, t );
180 vmathV3Add( result, vec0, &tmpV3_1 );
183 static inline void vmathV3Slerp( VmathVector3 *result, float t, const VmathVector3 *unitVec0, const VmathVector3 *unitVec1 )
185 vec_float4 scales, scale0, scale1, cosAngle, angle, tttt, oneMinusT, angles, sines;
186 vec_uint4 selectMask;
187 vec_uchar16 shuffle_xxxx = (vec_uchar16)spu_splats((int)0x00010203);
188 vec_uchar16 shuffle_yyyy = (vec_uchar16)spu_splats((int)0x04050607);
189 vec_uchar16 shuffle_zzzz = (vec_uchar16)spu_splats((int)0x08090a0b);
190 cosAngle = _vmathVfDot3( unitVec0->vec128, unitVec1->vec128 );
191 cosAngle = spu_shuffle( cosAngle, cosAngle, shuffle_xxxx );
192 selectMask = (vec_uint4)spu_cmpgt( spu_splats(_VECTORMATH_SLERP_TOL), cosAngle );
193 angle = acosf4( cosAngle );
194 tttt = spu_splats(t);
195 oneMinusT = spu_sub( spu_splats(1.0f), tttt );
196 angles = spu_sel( spu_splats(1.0f), oneMinusT, (vec_uint4)spu_maskb(0x0f00) );
197 angles = spu_sel( angles, tttt, (vec_uint4)spu_maskb(0x00f0) );
198 angles = spu_mul( angles, angle );
199 sines = sinf4( angles );
200 scales = divf4( sines, spu_shuffle( sines, sines, shuffle_xxxx ) );
201 scale0 = spu_sel( oneMinusT, spu_shuffle( scales, scales, shuffle_yyyy ), selectMask );
202 scale1 = spu_sel( tttt, spu_shuffle( scales, scales, shuffle_zzzz ), selectMask );
203 result->vec128 = spu_madd( unitVec0->vec128, scale0, spu_mul( unitVec1->vec128, scale1 ) );
206 static inline vec_float4 vmathV3Get128( const VmathVector3 *vec )
211 static inline void vmathV3StoreXYZ( const VmathVector3 *vec, vec_float4 *quad )
213 vec_float4 dstVec = *quad;
214 vec_uint4 mask = (vec_uint4)spu_maskb(0x000f);
215 dstVec = spu_sel(vec->vec128, dstVec, mask);
219 static inline void vmathV3LoadXYZArray( VmathVector3 *vec0, VmathVector3 *vec1, VmathVector3 *vec2, VmathVector3 *vec3, const vec_float4 *threeQuads )
221 vec_float4 xyzx, yzxy, zxyz, xyz1, xyz2, xyz3;
222 xyzx = threeQuads[0];
223 yzxy = threeQuads[1];
224 zxyz = threeQuads[2];
225 xyz1 = spu_shuffle( xyzx, yzxy, _VECTORMATH_SHUF_WABC );
226 xyz2 = spu_shuffle( yzxy, zxyz, _VECTORMATH_SHUF_ZWAB );
227 xyz3 = spu_rlqwbyte( zxyz, 4 );
234 static inline void vmathV3StoreXYZArray( const VmathVector3 *vec0, const VmathVector3 *vec1, const VmathVector3 *vec2, const VmathVector3 *vec3, vec_float4 *threeQuads )
236 vec_float4 xyzx, yzxy, zxyz;
237 xyzx = spu_shuffle( vec0->vec128, vec1->vec128, _VECTORMATH_SHUF_XYZA );
238 yzxy = spu_shuffle( vec1->vec128, vec2->vec128, _VECTORMATH_SHUF_YZAB );
239 zxyz = spu_shuffle( vec2->vec128, vec3->vec128, _VECTORMATH_SHUF_ZABC );
240 threeQuads[0] = xyzx;
241 threeQuads[1] = yzxy;
242 threeQuads[2] = zxyz;
245 static inline void vmathV3StoreHalfFloats( const VmathVector3 *vec0, const VmathVector3 *vec1, const VmathVector3 *vec2, const VmathVector3 *vec3, const VmathVector3 *vec4, const VmathVector3 *vec5, const VmathVector3 *vec6, const VmathVector3 *vec7, vec_ushort8 *threeQuads )
249 vmathV3StoreXYZArray( vec0, vec1, vec2, vec3, xyz0 );
250 vmathV3StoreXYZArray( vec4, vec5, vec6, vec7, xyz1 );
251 threeQuads[0] = _vmath2VfToHalfFloats(xyz0[0], xyz0[1]);
252 threeQuads[1] = _vmath2VfToHalfFloats(xyz0[2], xyz1[0]);
253 threeQuads[2] = _vmath2VfToHalfFloats(xyz1[1], xyz1[2]);
256 static inline void vmathV3SetX( VmathVector3 *result, float _x )
258 result->vec128 = spu_insert( _x, result->vec128, 0 );
261 static inline float vmathV3GetX( const VmathVector3 *vec )
263 return spu_extract( vec->vec128, 0 );
266 static inline void vmathV3SetY( VmathVector3 *result, float _y )
268 result->vec128 = spu_insert( _y, result->vec128, 1 );
271 static inline float vmathV3GetY( const VmathVector3 *vec )
273 return spu_extract( vec->vec128, 1 );
276 static inline void vmathV3SetZ( VmathVector3 *result, float _z )
278 result->vec128 = spu_insert( _z, result->vec128, 2 );
281 static inline float vmathV3GetZ( const VmathVector3 *vec )
283 return spu_extract( vec->vec128, 2 );
286 static inline void vmathV3SetElem( VmathVector3 *result, int idx, float value )
288 result->vec128 = spu_insert( value, result->vec128, idx );
291 static inline float vmathV3GetElem( const VmathVector3 *vec, int idx )
293 return spu_extract( vec->vec128, idx );
296 static inline void vmathV3Add( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 )
298 result->vec128 = spu_add( vec0->vec128, vec1->vec128 );
301 static inline void vmathV3Sub( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 )
303 result->vec128 = spu_sub( vec0->vec128, vec1->vec128 );
306 static inline void vmathV3AddP3( VmathPoint3 *result, const VmathVector3 *vec, const VmathPoint3 *pnt1 )
308 result->vec128 = spu_add( vec->vec128, pnt1->vec128 );
311 static inline void vmathV3ScalarMul( VmathVector3 *result, const VmathVector3 *vec, float scalar )
313 result->vec128 = spu_mul( vec->vec128, spu_splats(scalar) );
316 static inline void vmathV3ScalarDiv( VmathVector3 *result, const VmathVector3 *vec, float scalar )
318 result->vec128 = divf4( vec->vec128, spu_splats(scalar) );
321 static inline void vmathV3Neg( VmathVector3 *result, const VmathVector3 *vec )
323 result->vec128 = negatef4( vec->vec128 );
326 static inline void vmathV3MulPerElem( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 )
328 result->vec128 = spu_mul( vec0->vec128, vec1->vec128 );
331 static inline void vmathV3DivPerElem( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 )
333 result->vec128 = divf4( vec0->vec128, vec1->vec128 );
336 static inline void vmathV3RecipPerElem( VmathVector3 *result, const VmathVector3 *vec )
338 result->vec128 = recipf4( vec->vec128 );
341 static inline void vmathV3SqrtPerElem( VmathVector3 *result, const VmathVector3 *vec )
343 result->vec128 = sqrtf4( vec->vec128 );
346 static inline void vmathV3RsqrtPerElem( VmathVector3 *result, const VmathVector3 *vec )
348 result->vec128 = rsqrtf4( vec->vec128 );
351 static inline void vmathV3AbsPerElem( VmathVector3 *result, const VmathVector3 *vec )
353 result->vec128 = fabsf4( vec->vec128 );
356 static inline void vmathV3CopySignPerElem( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 )
358 result->vec128 = copysignf4( vec0->vec128, vec1->vec128 );
361 static inline void vmathV3MaxPerElem( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 )
363 result->vec128 = fmaxf4( vec0->vec128, vec1->vec128 );
366 static inline float vmathV3MaxElem( const VmathVector3 *vec )
369 result = fmaxf4( spu_promote( spu_extract( vec->vec128, 1 ), 0 ), vec->vec128 );
370 result = fmaxf4( spu_promote( spu_extract( vec->vec128, 2 ), 0 ), result );
371 return spu_extract( result, 0 );
374 static inline void vmathV3MinPerElem( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 )
376 result->vec128 = fminf4( vec0->vec128, vec1->vec128 );
379 static inline float vmathV3MinElem( const VmathVector3 *vec )
382 result = fminf4( spu_promote( spu_extract( vec->vec128, 1 ), 0 ), vec->vec128 );
383 result = fminf4( spu_promote( spu_extract( vec->vec128, 2 ), 0 ), result );
384 return spu_extract( result, 0 );
387 static inline float vmathV3Sum( const VmathVector3 *vec )
390 spu_extract( vec->vec128, 0 ) +
391 spu_extract( vec->vec128, 1 ) +
392 spu_extract( vec->vec128, 2 );
395 static inline float vmathV3Dot( const VmathVector3 *vec0, const VmathVector3 *vec1 )
397 return spu_extract( _vmathVfDot3( vec0->vec128, vec1->vec128 ), 0 );
400 static inline float vmathV3LengthSqr( const VmathVector3 *vec )
402 return spu_extract( _vmathVfDot3( vec->vec128, vec->vec128 ), 0 );
405 static inline float vmathV3Length( const VmathVector3 *vec )
407 return sqrtf( vmathV3LengthSqr( vec ) );
410 static inline void vmathV3Normalize( VmathVector3 *result, const VmathVector3 *vec )
412 vec_float4 dot = _vmathVfDot3( vec->vec128, vec->vec128 );
413 dot = spu_shuffle( dot, dot, (vec_uchar16)spu_splats(0x00010203) );
414 result->vec128 = spu_mul( vec->vec128, rsqrtf4( dot ) );
417 static inline void vmathV3Cross( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 )
419 result->vec128 = _vmathVfCross( vec0->vec128, vec1->vec128 );
422 static inline void vmathV3Select( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1, unsigned int select1 )
424 result->vec128 = spu_sel( vec0->vec128, vec1->vec128, spu_splats( (unsigned int)-(select1 > 0) ) );
427 #ifdef _VECTORMATH_DEBUG
429 static inline void vmathV3Print( const VmathVector3 *vec )
431 union { vec_float4 v; float s[4]; } tmp;
433 printf( "( %f %f %f )\n", tmp.s[0], tmp.s[1], tmp.s[2] );
436 static inline void vmathV3Prints( const VmathVector3 *vec, const char *name )
438 union { vec_float4 v; float s[4]; } tmp;
440 printf( "%s: ( %f %f %f )\n", name, tmp.s[0], tmp.s[1], tmp.s[2] );
445 static inline void vmathV4Copy( VmathVector4 *result, const VmathVector4 *vec )
447 result->vec128 = vec->vec128;
450 static inline void vmathV4MakeFromElems( VmathVector4 *result, float _x, float _y, float _z, float _w )
452 result->vec128 = (vec_float4){ _x, _y, _z, _w };
455 static inline void vmathV4MakeFromV3Scalar( VmathVector4 *result, const VmathVector3 *xyz, float _w )
457 result->vec128 = spu_shuffle( xyz->vec128, spu_promote( _w, 0 ), _VECTORMATH_SHUF_XYZA );
460 static inline void vmathV4MakeFromV3( VmathVector4 *result, const VmathVector3 *vec )
462 result->vec128 = spu_sel( vec->vec128, spu_splats(0.0f), (vec_uint4)spu_maskb(0x000f) );
465 static inline void vmathV4MakeFromP3( VmathVector4 *result, const VmathPoint3 *pnt )
467 result->vec128 = spu_sel( pnt->vec128, spu_splats(1.0f), (vec_uint4)spu_maskb(0x000f) );
470 static inline void vmathV4MakeFromQ( VmathVector4 *result, const VmathQuat *quat )
472 result->vec128 = quat->vec128;
475 static inline void vmathV4MakeFromScalar( VmathVector4 *result, float scalar )
477 result->vec128 = spu_splats( scalar );
480 static inline void vmathV4MakeFrom128( VmathVector4 *result, vec_float4 vf4 )
482 result->vec128 = vf4;
485 static inline void vmathV4MakeXAxis( VmathVector4 *result )
487 result->vec128 = _VECTORMATH_UNIT_1000;
490 static inline void vmathV4MakeYAxis( VmathVector4 *result )
492 result->vec128 = _VECTORMATH_UNIT_0100;
495 static inline void vmathV4MakeZAxis( VmathVector4 *result )
497 result->vec128 = _VECTORMATH_UNIT_0010;
500 static inline void vmathV4MakeWAxis( VmathVector4 *result )
502 result->vec128 = _VECTORMATH_UNIT_0001;
505 static inline void vmathV4Lerp( VmathVector4 *result, float t, const VmathVector4 *vec0, const VmathVector4 *vec1 )
507 VmathVector4 tmpV4_0, tmpV4_1;
508 vmathV4Sub( &tmpV4_0, vec1, vec0 );
509 vmathV4ScalarMul( &tmpV4_1, &tmpV4_0, t );
510 vmathV4Add( result, vec0, &tmpV4_1 );
513 static inline void vmathV4Slerp( VmathVector4 *result, float t, const VmathVector4 *unitVec0, const VmathVector4 *unitVec1 )
515 vec_float4 scales, scale0, scale1, cosAngle, angle, tttt, oneMinusT, angles, sines;
516 vec_uint4 selectMask;
517 vec_uchar16 shuffle_xxxx = (vec_uchar16)spu_splats((int)0x00010203);
518 vec_uchar16 shuffle_yyyy = (vec_uchar16)spu_splats((int)0x04050607);
519 vec_uchar16 shuffle_zzzz = (vec_uchar16)spu_splats((int)0x08090a0b);
520 cosAngle = _vmathVfDot4( unitVec0->vec128, unitVec1->vec128 );
521 cosAngle = spu_shuffle( cosAngle, cosAngle, shuffle_xxxx );
522 selectMask = (vec_uint4)spu_cmpgt( spu_splats(_VECTORMATH_SLERP_TOL), cosAngle );
523 angle = acosf4( cosAngle );
524 tttt = spu_splats(t);
525 oneMinusT = spu_sub( spu_splats(1.0f), tttt );
526 angles = spu_sel( spu_splats(1.0f), oneMinusT, (vec_uint4)spu_maskb(0x0f00) );
527 angles = spu_sel( angles, tttt, (vec_uint4)spu_maskb(0x00f0) );
528 angles = spu_mul( angles, angle );
529 sines = sinf4( angles );
530 scales = divf4( sines, spu_shuffle( sines, sines, shuffle_xxxx ) );
531 scale0 = spu_sel( oneMinusT, spu_shuffle( scales, scales, shuffle_yyyy ), selectMask );
532 scale1 = spu_sel( tttt, spu_shuffle( scales, scales, shuffle_zzzz ), selectMask );
533 result->vec128 = spu_madd( unitVec0->vec128, scale0, spu_mul( unitVec1->vec128, scale1 ) );
536 static inline vec_float4 vmathV4Get128( const VmathVector4 *vec )
541 static inline void vmathV4StoreHalfFloats( const VmathVector4 *vec0, const VmathVector4 *vec1, const VmathVector4 *vec2, const VmathVector4 *vec3, vec_ushort8 *twoQuads )
543 twoQuads[0] = _vmath2VfToHalfFloats(vec0->vec128, vec1->vec128);
544 twoQuads[1] = _vmath2VfToHalfFloats(vec2->vec128, vec3->vec128);
547 static inline void vmathV4SetXYZ( VmathVector4 *result, const VmathVector3 *vec )
549 result->vec128 = spu_sel( vec->vec128, result->vec128, (vec_uint4)spu_maskb(0x000f) );
552 static inline void vmathV4GetXYZ( VmathVector3 *result, const VmathVector4 *vec )
554 result->vec128 = vec->vec128;
557 static inline void vmathV4SetX( VmathVector4 *result, float _x )
559 result->vec128 = spu_insert( _x, result->vec128, 0 );
562 static inline float vmathV4GetX( const VmathVector4 *vec )
564 return spu_extract( vec->vec128, 0 );
567 static inline void vmathV4SetY( VmathVector4 *result, float _y )
569 result->vec128 = spu_insert( _y, result->vec128, 1 );
572 static inline float vmathV4GetY( const VmathVector4 *vec )
574 return spu_extract( vec->vec128, 1 );
577 static inline void vmathV4SetZ( VmathVector4 *result, float _z )
579 result->vec128 = spu_insert( _z, result->vec128, 2 );
582 static inline float vmathV4GetZ( const VmathVector4 *vec )
584 return spu_extract( vec->vec128, 2 );
587 static inline void vmathV4SetW( VmathVector4 *result, float _w )
589 result->vec128 = spu_insert( _w, result->vec128, 3 );
592 static inline float vmathV4GetW( const VmathVector4 *vec )
594 return spu_extract( vec->vec128, 3 );
597 static inline void vmathV4SetElem( VmathVector4 *result, int idx, float value )
599 result->vec128 = spu_insert( value, result->vec128, idx );
602 static inline float vmathV4GetElem( const VmathVector4 *vec, int idx )
604 return spu_extract( vec->vec128, idx );
607 static inline void vmathV4Add( VmathVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1 )
609 result->vec128 = spu_add( vec0->vec128, vec1->vec128 );
612 static inline void vmathV4Sub( VmathVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1 )
614 result->vec128 = spu_sub( vec0->vec128, vec1->vec128 );
617 static inline void vmathV4ScalarMul( VmathVector4 *result, const VmathVector4 *vec, float scalar )
619 result->vec128 = spu_mul( vec->vec128, spu_splats(scalar) );
622 static inline void vmathV4ScalarDiv( VmathVector4 *result, const VmathVector4 *vec, float scalar )
624 result->vec128 = divf4( vec->vec128, spu_splats(scalar) );
627 static inline void vmathV4Neg( VmathVector4 *result, const VmathVector4 *vec )
629 result->vec128 = negatef4( vec->vec128 );
632 static inline void vmathV4MulPerElem( VmathVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1 )
634 result->vec128 = spu_mul( vec0->vec128, vec1->vec128 );
637 static inline void vmathV4DivPerElem( VmathVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1 )
639 result->vec128 = divf4( vec0->vec128, vec1->vec128 );
642 static inline void vmathV4RecipPerElem( VmathVector4 *result, const VmathVector4 *vec )
644 result->vec128 = recipf4( vec->vec128 );
647 static inline void vmathV4SqrtPerElem( VmathVector4 *result, const VmathVector4 *vec )
649 result->vec128 = sqrtf4( vec->vec128 );
652 static inline void vmathV4RsqrtPerElem( VmathVector4 *result, const VmathVector4 *vec )
654 result->vec128 = rsqrtf4( vec->vec128 );
657 static inline void vmathV4AbsPerElem( VmathVector4 *result, const VmathVector4 *vec )
659 result->vec128 = fabsf4( vec->vec128 );
662 static inline void vmathV4CopySignPerElem( VmathVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1 )
664 result->vec128 = copysignf4( vec0->vec128, vec1->vec128 );
667 static inline void vmathV4MaxPerElem( VmathVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1 )
669 result->vec128 = fmaxf4( vec0->vec128, vec1->vec128 );
672 static inline float vmathV4MaxElem( const VmathVector4 *vec )
675 result = fmaxf4( spu_promote( spu_extract( vec->vec128, 1 ), 0 ), vec->vec128 );
676 result = fmaxf4( spu_promote( spu_extract( vec->vec128, 2 ), 0 ), result );
677 result = fmaxf4( spu_promote( spu_extract( vec->vec128, 3 ), 0 ), result );
678 return spu_extract( result, 0 );
681 static inline void vmathV4MinPerElem( VmathVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1 )
683 result->vec128 = fminf4( vec0->vec128, vec1->vec128 );
686 static inline float vmathV4MinElem( const VmathVector4 *vec )
689 result = fminf4( spu_promote( spu_extract( vec->vec128, 1 ), 0 ), vec->vec128 );
690 result = fminf4( spu_promote( spu_extract( vec->vec128, 2 ), 0 ), result );
691 result = fminf4( spu_promote( spu_extract( vec->vec128, 3 ), 0 ), result );
692 return spu_extract( result, 0 );
695 static inline float vmathV4Sum( const VmathVector4 *vec )
698 spu_extract( vec->vec128, 0 ) +
699 spu_extract( vec->vec128, 1 ) +
700 spu_extract( vec->vec128, 2 ) +
701 spu_extract( vec->vec128, 3 );
704 static inline float vmathV4Dot( const VmathVector4 *vec0, const VmathVector4 *vec1 )
706 return spu_extract( _vmathVfDot4( vec0->vec128, vec1->vec128 ), 0 );
709 static inline float vmathV4LengthSqr( const VmathVector4 *vec )
711 return spu_extract( _vmathVfDot4( vec->vec128, vec->vec128 ), 0 );
714 static inline float vmathV4Length( const VmathVector4 *vec )
716 return sqrtf( vmathV4LengthSqr( vec ) );
719 static inline void vmathV4Normalize( VmathVector4 *result, const VmathVector4 *vec )
721 vec_float4 dot = _vmathVfDot4( vec->vec128, vec->vec128 );
722 result->vec128 = spu_mul( vec->vec128, rsqrtf4( dot ) );
725 static inline void vmathV4Select( VmathVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1, unsigned int select1 )
727 result->vec128 = spu_sel( vec0->vec128, vec1->vec128, spu_splats( (unsigned int)-(select1 > 0) ) );
730 #ifdef _VECTORMATH_DEBUG
732 static inline void vmathV4Print( const VmathVector4 *vec )
734 union { vec_float4 v; float s[4]; } tmp;
736 printf( "( %f %f %f %f )\n", tmp.s[0], tmp.s[1], tmp.s[2], tmp.s[3] );
739 static inline void vmathV4Prints( const VmathVector4 *vec, const char *name )
741 union { vec_float4 v; float s[4]; } tmp;
743 printf( "%s: ( %f %f %f %f )\n", name, tmp.s[0], tmp.s[1], tmp.s[2], tmp.s[3] );
748 static inline void vmathP3Copy( VmathPoint3 *result, const VmathPoint3 *pnt )
750 result->vec128 = pnt->vec128;
753 static inline void vmathP3MakeFromElems( VmathPoint3 *result, float _x, float _y, float _z )
755 result->vec128 = (vec_float4){ _x, _y, _z, 0.0f };
758 static inline void vmathP3MakeFromV3( VmathPoint3 *result, const VmathVector3 *vec )
760 result->vec128 = vec->vec128;
763 static inline void vmathP3MakeFromScalar( VmathPoint3 *result, float scalar )
765 result->vec128 = spu_splats( scalar );
768 static inline void vmathP3MakeFrom128( VmathPoint3 *result, vec_float4 vf4 )
770 result->vec128 = vf4;
773 static inline void vmathP3Lerp( VmathPoint3 *result, float t, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 )
775 VmathVector3 tmpV3_0, tmpV3_1;
776 vmathP3Sub( &tmpV3_0, pnt1, pnt0 );
777 vmathV3ScalarMul( &tmpV3_1, &tmpV3_0, t );
778 vmathP3AddV3( result, pnt0, &tmpV3_1 );
781 static inline vec_float4 vmathP3Get128( const VmathPoint3 *pnt )
786 static inline void vmathP3StoreXYZ( const VmathPoint3 *pnt, vec_float4 *quad )
788 vec_float4 dstVec = *quad;
789 vec_uint4 mask = (vec_uint4)spu_maskb(0x000f);
790 dstVec = spu_sel(pnt->vec128, dstVec, mask);
794 static inline void vmathP3LoadXYZArray( VmathPoint3 *pnt0, VmathPoint3 *pnt1, VmathPoint3 *pnt2, VmathPoint3 *pnt3, const vec_float4 *threeQuads )
796 vec_float4 xyzx, yzxy, zxyz, xyz1, xyz2, xyz3;
797 xyzx = threeQuads[0];
798 yzxy = threeQuads[1];
799 zxyz = threeQuads[2];
800 xyz1 = spu_shuffle( xyzx, yzxy, _VECTORMATH_SHUF_WABC );
801 xyz2 = spu_shuffle( yzxy, zxyz, _VECTORMATH_SHUF_ZWAB );
802 xyz3 = spu_rlqwbyte( zxyz, 4 );
809 static inline void vmathP3StoreXYZArray( const VmathPoint3 *pnt0, const VmathPoint3 *pnt1, const VmathPoint3 *pnt2, const VmathPoint3 *pnt3, vec_float4 *threeQuads )
811 vec_float4 xyzx, yzxy, zxyz;
812 xyzx = spu_shuffle( pnt0->vec128, pnt1->vec128, _VECTORMATH_SHUF_XYZA );
813 yzxy = spu_shuffle( pnt1->vec128, pnt2->vec128, _VECTORMATH_SHUF_YZAB );
814 zxyz = spu_shuffle( pnt2->vec128, pnt3->vec128, _VECTORMATH_SHUF_ZABC );
815 threeQuads[0] = xyzx;
816 threeQuads[1] = yzxy;
817 threeQuads[2] = zxyz;
820 static inline void vmathP3StoreHalfFloats( const VmathPoint3 *pnt0, const VmathPoint3 *pnt1, const VmathPoint3 *pnt2, const VmathPoint3 *pnt3, const VmathPoint3 *pnt4, const VmathPoint3 *pnt5, const VmathPoint3 *pnt6, const VmathPoint3 *pnt7, vec_ushort8 *threeQuads )
824 vmathP3StoreXYZArray( pnt0, pnt1, pnt2, pnt3, xyz0 );
825 vmathP3StoreXYZArray( pnt4, pnt5, pnt6, pnt7, xyz1 );
826 threeQuads[0] = _vmath2VfToHalfFloats(xyz0[0], xyz0[1]);
827 threeQuads[1] = _vmath2VfToHalfFloats(xyz0[2], xyz1[0]);
828 threeQuads[2] = _vmath2VfToHalfFloats(xyz1[1], xyz1[2]);
831 static inline void vmathP3SetX( VmathPoint3 *result, float _x )
833 result->vec128 = spu_insert( _x, result->vec128, 0 );
836 static inline float vmathP3GetX( const VmathPoint3 *pnt )
838 return spu_extract( pnt->vec128, 0 );
841 static inline void vmathP3SetY( VmathPoint3 *result, float _y )
843 result->vec128 = spu_insert( _y, result->vec128, 1 );
846 static inline float vmathP3GetY( const VmathPoint3 *pnt )
848 return spu_extract( pnt->vec128, 1 );
851 static inline void vmathP3SetZ( VmathPoint3 *result, float _z )
853 result->vec128 = spu_insert( _z, result->vec128, 2 );
856 static inline float vmathP3GetZ( const VmathPoint3 *pnt )
858 return spu_extract( pnt->vec128, 2 );
861 static inline void vmathP3SetElem( VmathPoint3 *result, int idx, float value )
863 result->vec128 = spu_insert( value, result->vec128, idx );
866 static inline float vmathP3GetElem( const VmathPoint3 *pnt, int idx )
868 return spu_extract( pnt->vec128, idx );
871 static inline void vmathP3Sub( VmathVector3 *result, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 )
873 result->vec128 = spu_sub( pnt0->vec128, pnt1->vec128 );
876 static inline void vmathP3AddV3( VmathPoint3 *result, const VmathPoint3 *pnt, const VmathVector3 *vec1 )
878 result->vec128 = spu_add( pnt->vec128, vec1->vec128 );
881 static inline void vmathP3SubV3( VmathPoint3 *result, const VmathPoint3 *pnt, const VmathVector3 *vec1 )
883 result->vec128 = spu_sub( pnt->vec128, vec1->vec128 );
886 static inline void vmathP3MulPerElem( VmathPoint3 *result, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 )
888 result->vec128 = spu_mul( pnt0->vec128, pnt1->vec128 );
891 static inline void vmathP3DivPerElem( VmathPoint3 *result, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 )
893 result->vec128 = divf4( pnt0->vec128, pnt1->vec128 );
896 static inline void vmathP3RecipPerElem( VmathPoint3 *result, const VmathPoint3 *pnt )
898 result->vec128 = recipf4( pnt->vec128 );
901 static inline void vmathP3SqrtPerElem( VmathPoint3 *result, const VmathPoint3 *pnt )
903 result->vec128 = sqrtf4( pnt->vec128 );
906 static inline void vmathP3RsqrtPerElem( VmathPoint3 *result, const VmathPoint3 *pnt )
908 result->vec128 = rsqrtf4( pnt->vec128 );
911 static inline void vmathP3AbsPerElem( VmathPoint3 *result, const VmathPoint3 *pnt )
913 result->vec128 = fabsf4( pnt->vec128 );
916 static inline void vmathP3CopySignPerElem( VmathPoint3 *result, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 )
918 result->vec128 = copysignf4( pnt0->vec128, pnt1->vec128 );
921 static inline void vmathP3MaxPerElem( VmathPoint3 *result, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 )
923 result->vec128 = fmaxf4( pnt0->vec128, pnt1->vec128 );
926 static inline float vmathP3MaxElem( const VmathPoint3 *pnt )
929 result = fmaxf4( spu_promote( spu_extract( pnt->vec128, 1 ), 0 ), pnt->vec128 );
930 result = fmaxf4( spu_promote( spu_extract( pnt->vec128, 2 ), 0 ), result );
931 return spu_extract( result, 0 );
934 static inline void vmathP3MinPerElem( VmathPoint3 *result, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 )
936 result->vec128 = fminf4( pnt0->vec128, pnt1->vec128 );
939 static inline float vmathP3MinElem( const VmathPoint3 *pnt )
942 result = fminf4( spu_promote( spu_extract( pnt->vec128, 1 ), 0 ), pnt->vec128 );
943 result = fminf4( spu_promote( spu_extract( pnt->vec128, 2 ), 0 ), result );
944 return spu_extract( result, 0 );
947 static inline float vmathP3Sum( const VmathPoint3 *pnt )
950 spu_extract( pnt->vec128, 0 ) +
951 spu_extract( pnt->vec128, 1 ) +
952 spu_extract( pnt->vec128, 2 );
955 static inline void vmathP3Scale( VmathPoint3 *result, const VmathPoint3 *pnt, float scaleVal )
958 vmathP3MakeFromScalar( &tmpP3_0, scaleVal );
959 vmathP3MulPerElem( result, pnt, &tmpP3_0 );
962 static inline void vmathP3NonUniformScale( VmathPoint3 *result, const VmathPoint3 *pnt, const VmathVector3 *scaleVec )
965 vmathP3MakeFromV3( &tmpP3_0, scaleVec );
966 vmathP3MulPerElem( result, pnt, &tmpP3_0 );
969 static inline float vmathP3Projection( const VmathPoint3 *pnt, const VmathVector3 *unitVec )
971 return spu_extract( _vmathVfDot3( pnt->vec128, unitVec->vec128 ), 0 );
974 static inline float vmathP3DistSqrFromOrigin( const VmathPoint3 *pnt )
976 VmathVector3 tmpV3_0;
977 vmathV3MakeFromP3( &tmpV3_0, pnt );
978 return vmathV3LengthSqr( &tmpV3_0 );
981 static inline float vmathP3DistFromOrigin( const VmathPoint3 *pnt )
983 VmathVector3 tmpV3_0;
984 vmathV3MakeFromP3( &tmpV3_0, pnt );
985 return vmathV3Length( &tmpV3_0 );
988 static inline float vmathP3DistSqr( const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 )
990 VmathVector3 tmpV3_0;
991 vmathP3Sub( &tmpV3_0, pnt1, pnt0 );
992 return vmathV3LengthSqr( &tmpV3_0 );
995 static inline float vmathP3Dist( const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 )
997 VmathVector3 tmpV3_0;
998 vmathP3Sub( &tmpV3_0, pnt1, pnt0 );
999 return vmathV3Length( &tmpV3_0 );
1002 static inline void vmathP3Select( VmathPoint3 *result, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1, unsigned int select1 )
1004 result->vec128 = spu_sel( pnt0->vec128, pnt1->vec128, spu_splats( (unsigned int)-(select1 > 0) ) );
1007 #ifdef _VECTORMATH_DEBUG
1009 static inline void vmathP3Print( const VmathPoint3 *pnt )
1011 union { vec_float4 v; float s[4]; } tmp;
1012 tmp.v = pnt->vec128;
1013 printf( "( %f %f %f )\n", tmp.s[0], tmp.s[1], tmp.s[2] );
1016 static inline void vmathP3Prints( const VmathPoint3 *pnt, const char *name )
1018 union { vec_float4 v; float s[4]; } tmp;
1019 tmp.v = pnt->vec128;
1020 printf( "%s: ( %f %f %f )\n", name, tmp.s[0], tmp.s[1], tmp.s[2] );
1027 #endif /* __cplusplus */