2 Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
5 Redistribution and use in source and binary forms,
6 with or without modification, are permitted provided that the
7 following conditions are met:
8 * Redistributions of source code must retain the above copyright
9 notice, this list of conditions and the following disclaimer.
10 * Redistributions in binary form must reproduce the above copyright
11 notice, this list of conditions and the following disclaimer in the
12 documentation and/or other materials provided with the distribution.
13 * Neither the name of the Sony Computer Entertainment Inc nor the names
14 of its contributors may be used to endorse or promote products derived
15 from this software without specific prior written permission.
17 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
18 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
21 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
25 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
26 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27 POSSIBILITY OF SUCH DAMAGE.
30 #ifndef _VECTORMATH_VEC_SOA_C_H
31 #define _VECTORMATH_VEC_SOA_C_H
34 #endif /* __cplusplus */
36 /*-----------------------------------------------------------------------------
38 * for shuffles, words are labeled [x,y,z,w] [a,b,c,d]
40 #define _VECTORMATH_SHUF_X 0x00010203
41 #define _VECTORMATH_SHUF_Y 0x04050607
42 #define _VECTORMATH_SHUF_Z 0x08090a0b
43 #define _VECTORMATH_SHUF_W 0x0c0d0e0f
44 #define _VECTORMATH_SHUF_A 0x10111213
45 #define _VECTORMATH_SHUF_B 0x14151617
46 #define _VECTORMATH_SHUF_C 0x18191a1b
47 #define _VECTORMATH_SHUF_D 0x1c1d1e1f
48 #define _VECTORMATH_SHUF_0 0x80808080
49 #define _VECTORMATH_SHUF_XAYB ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_A, _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_B })
50 #define _VECTORMATH_SHUF_ZCWD ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_C, _VECTORMATH_SHUF_W, _VECTORMATH_SHUF_D })
51 #define _VECTORMATH_SHUF_ZBW0 ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_B, _VECTORMATH_SHUF_W, _VECTORMATH_SHUF_0 })
52 #define _VECTORMATH_SHUF_XCY0 ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_C, _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_0 })
53 #define _VECTORMATH_SHUF_ZDW0 ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_D, _VECTORMATH_SHUF_W, _VECTORMATH_SHUF_0 })
54 #define _VECTORMATH_SHUF_XAZC ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_A, _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_C })
55 #define _VECTORMATH_SHUF_ZDXB ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_D, _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_B })
56 #define _VECTORMATH_SHUF_YBWD ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_B, _VECTORMATH_SHUF_W, _VECTORMATH_SHUF_D })
57 #define _VECTORMATH_SHUF_XDZB ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_D, _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_B })
58 #define _VECTORMATH_SHUF_YAWC ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_A, _VECTORMATH_SHUF_W, _VECTORMATH_SHUF_C })
59 #define _VECTORMATH_SHUF_ZBXD ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_B, _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_D })
60 #define _VECTORMATH_SHUF_XYCD ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_C, _VECTORMATH_SHUF_D })
61 #define _VECTORMATH_SLERP_TOL 0.999f
63 /*-----------------------------------------------------------------------------
66 #ifndef _VECTORMATH_INTERNAL_FUNCTIONS
67 #define _VECTORMATH_INTERNAL_FUNCTIONS
71 static inline void vmathSoaV3Copy( VmathSoaVector3 *result, const VmathSoaVector3 *vec )
78 static inline void vmathSoaV3MakeFromElems( VmathSoaVector3 *result, vec_float4 _x, vec_float4 _y, vec_float4 _z )
85 static inline void vmathSoaV3MakeFromP3( VmathSoaVector3 *result, const VmathSoaPoint3 *pnt )
92 static inline void vmathSoaV3MakeFromScalar( VmathSoaVector3 *result, vec_float4 scalar )
99 static inline void vmathSoaV3MakeFromAos( VmathSoaVector3 *result, const VmathVector3 *vec )
101 vec_uchar16 shuffle_xxxx = (vec_uchar16)spu_splats((int)0x00010203);
102 vec_uchar16 shuffle_yyyy = (vec_uchar16)spu_splats((int)0x04050607);
103 vec_uchar16 shuffle_zzzz = (vec_uchar16)spu_splats((int)0x08090a0b);
104 vec_float4 vec128 = vec->vec128;
105 result->x = spu_shuffle( vec128, vec128, shuffle_xxxx );
106 result->y = spu_shuffle( vec128, vec128, shuffle_yyyy );
107 result->z = spu_shuffle( vec128, vec128, shuffle_zzzz );
110 static inline void vmathSoaV3MakeFrom4Aos( VmathSoaVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1, const VmathVector3 *vec2, const VmathVector3 *vec3 )
112 vec_float4 tmp0, tmp1, tmp2, tmp3;
113 tmp0 = spu_shuffle( vec0->vec128, vec2->vec128, _VECTORMATH_SHUF_XAYB );
114 tmp1 = spu_shuffle( vec1->vec128, vec3->vec128, _VECTORMATH_SHUF_XAYB );
115 tmp2 = spu_shuffle( vec0->vec128, vec2->vec128, _VECTORMATH_SHUF_ZCWD );
116 tmp3 = spu_shuffle( vec1->vec128, vec3->vec128, _VECTORMATH_SHUF_ZCWD );
117 result->x = spu_shuffle( tmp0, tmp1, _VECTORMATH_SHUF_XAYB );
118 result->y = spu_shuffle( tmp0, tmp1, _VECTORMATH_SHUF_ZCWD );
119 result->z = spu_shuffle( tmp2, tmp3, _VECTORMATH_SHUF_XAYB );
122 static inline void vmathSoaV3MakeXAxis( VmathSoaVector3 *result )
124 vmathSoaV3MakeFromElems( result, spu_splats(1.0f), spu_splats(0.0f), spu_splats(0.0f) );
127 static inline void vmathSoaV3MakeYAxis( VmathSoaVector3 *result )
129 vmathSoaV3MakeFromElems( result, spu_splats(0.0f), spu_splats(1.0f), spu_splats(0.0f) );
132 static inline void vmathSoaV3MakeZAxis( VmathSoaVector3 *result )
134 vmathSoaV3MakeFromElems( result, spu_splats(0.0f), spu_splats(0.0f), spu_splats(1.0f) );
137 static inline void vmathSoaV3Lerp( VmathSoaVector3 *result, vec_float4 t, const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1 )
139 VmathSoaVector3 tmpV3_0, tmpV3_1;
140 vmathSoaV3Sub( &tmpV3_0, vec1, vec0 );
141 vmathSoaV3ScalarMul( &tmpV3_1, &tmpV3_0, t );
142 vmathSoaV3Add( result, vec0, &tmpV3_1 );
145 static inline void vmathSoaV3Slerp( VmathSoaVector3 *result, vec_float4 t, const VmathSoaVector3 *unitVec0, const VmathSoaVector3 *unitVec1 )
147 VmathSoaVector3 tmpV3_0, tmpV3_1;
148 vec_float4 recipSinAngle, scale0, scale1, cosAngle, angle;
149 vec_uint4 selectMask;
150 cosAngle = vmathSoaV3Dot( unitVec0, unitVec1 );
151 selectMask = (vec_uint4)spu_cmpgt( spu_splats(_VECTORMATH_SLERP_TOL), cosAngle );
152 angle = acosf4( cosAngle );
153 recipSinAngle = recipf4( sinf4( angle ) );
154 scale0 = spu_sel( spu_sub( spu_splats(1.0f), t ), spu_mul( sinf4( spu_mul( spu_sub( spu_splats(1.0f), t ), angle ) ), recipSinAngle ), selectMask );
155 scale1 = spu_sel( t, spu_mul( sinf4( spu_mul( t, angle ) ), recipSinAngle ), selectMask );
156 vmathSoaV3ScalarMul( &tmpV3_0, unitVec0, scale0 );
157 vmathSoaV3ScalarMul( &tmpV3_1, unitVec1, scale1 );
158 vmathSoaV3Add( result, &tmpV3_0, &tmpV3_1 );
161 static inline void vmathSoaV3Get4Aos( const VmathSoaVector3 *vec, VmathVector3 *result0, VmathVector3 *result1, VmathVector3 *result2, VmathVector3 *result3 )
163 vec_float4 tmp0, tmp1;
164 tmp0 = spu_shuffle( vec->x, vec->z, _VECTORMATH_SHUF_XAYB );
165 tmp1 = spu_shuffle( vec->x, vec->z, _VECTORMATH_SHUF_ZCWD );
166 vmathV3MakeFrom128( result0, spu_shuffle( tmp0, vec->y, _VECTORMATH_SHUF_XAYB ) );
167 vmathV3MakeFrom128( result1, spu_shuffle( tmp0, vec->y, _VECTORMATH_SHUF_ZBW0 ) );
168 vmathV3MakeFrom128( result2, spu_shuffle( tmp1, vec->y, _VECTORMATH_SHUF_XCY0 ) );
169 vmathV3MakeFrom128( result3, spu_shuffle( tmp1, vec->y, _VECTORMATH_SHUF_ZDW0 ) );
172 static inline void vmathSoaV3LoadXYZArray( VmathSoaVector3 *vec, const vec_float4 *threeQuads )
174 vec_float4 xyxy, yzyz, zxzx, xyzx, yzxy, zxyz;
175 xyzx = threeQuads[0];
176 yzxy = threeQuads[1];
177 zxyz = threeQuads[2];
178 xyxy = spu_shuffle( xyzx, yzxy, _VECTORMATH_SHUF_XYCD );
179 zxzx = spu_shuffle( zxyz, xyzx, _VECTORMATH_SHUF_XYCD );
180 yzyz = spu_shuffle( yzxy, zxyz, _VECTORMATH_SHUF_XYCD );
181 vmathSoaV3SetX( vec, spu_shuffle( xyxy, zxzx, _VECTORMATH_SHUF_XDZB ) );
182 vmathSoaV3SetY( vec, spu_shuffle( xyxy, yzyz, _VECTORMATH_SHUF_YAWC ) );
183 vmathSoaV3SetZ( vec, spu_shuffle( zxzx, yzyz, _VECTORMATH_SHUF_ZBXD ) );
186 static inline void vmathSoaV3StoreXYZArray( const VmathSoaVector3 *vec, vec_float4 *threeQuads )
188 vec_float4 xyzx, yzxy, zxyz, xyxy, zxzx, yzyz;
189 xyxy = spu_shuffle( vec->x, vec->y, _VECTORMATH_SHUF_XAZC );
190 zxzx = spu_shuffle( vec->z, vec->x, _VECTORMATH_SHUF_ZDXB );
191 yzyz = spu_shuffle( vec->y, vec->z, _VECTORMATH_SHUF_YBWD );
192 xyzx = spu_shuffle( xyxy, zxzx, _VECTORMATH_SHUF_XYCD );
193 yzxy = spu_shuffle( yzyz, xyxy, _VECTORMATH_SHUF_XYCD );
194 zxyz = spu_shuffle( zxzx, yzyz, _VECTORMATH_SHUF_XYCD );
195 threeQuads[0] = xyzx;
196 threeQuads[1] = yzxy;
197 threeQuads[2] = zxyz;
200 static inline void vmathSoaV3StoreHalfFloats( const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1, vec_ushort8 *threeQuads )
204 vmathSoaV3StoreXYZArray( vec0, xyz0 );
205 vmathSoaV3StoreXYZArray( vec1, xyz1 );
206 threeQuads[0] = _vmath2VfToHalfFloats(xyz0[0], xyz0[1]);
207 threeQuads[1] = _vmath2VfToHalfFloats(xyz0[2], xyz1[0]);
208 threeQuads[2] = _vmath2VfToHalfFloats(xyz1[1], xyz1[2]);
211 static inline void vmathSoaV3SetX( VmathSoaVector3 *result, vec_float4 _x )
216 static inline vec_float4 vmathSoaV3GetX( const VmathSoaVector3 *vec )
221 static inline void vmathSoaV3SetY( VmathSoaVector3 *result, vec_float4 _y )
226 static inline vec_float4 vmathSoaV3GetY( const VmathSoaVector3 *vec )
231 static inline void vmathSoaV3SetZ( VmathSoaVector3 *result, vec_float4 _z )
236 static inline vec_float4 vmathSoaV3GetZ( const VmathSoaVector3 *vec )
241 static inline void vmathSoaV3SetElem( VmathSoaVector3 *result, int idx, vec_float4 value )
243 *(&result->x + idx) = value;
246 static inline vec_float4 vmathSoaV3GetElem( const VmathSoaVector3 *vec, int idx )
248 return *(&vec->x + idx);
251 static inline void vmathSoaV3Add( VmathSoaVector3 *result, const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1 )
253 result->x = spu_add( vec0->x, vec1->x );
254 result->y = spu_add( vec0->y, vec1->y );
255 result->z = spu_add( vec0->z, vec1->z );
258 static inline void vmathSoaV3Sub( VmathSoaVector3 *result, const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1 )
260 result->x = spu_sub( vec0->x, vec1->x );
261 result->y = spu_sub( vec0->y, vec1->y );
262 result->z = spu_sub( vec0->z, vec1->z );
265 static inline void vmathSoaV3AddP3( VmathSoaPoint3 *result, const VmathSoaVector3 *vec, const VmathSoaPoint3 *pnt1 )
267 result->x = spu_add( vec->x, pnt1->x );
268 result->y = spu_add( vec->y, pnt1->y );
269 result->z = spu_add( vec->z, pnt1->z );
272 static inline void vmathSoaV3ScalarMul( VmathSoaVector3 *result, const VmathSoaVector3 *vec, vec_float4 scalar )
274 result->x = spu_mul( vec->x, scalar );
275 result->y = spu_mul( vec->y, scalar );
276 result->z = spu_mul( vec->z, scalar );
279 static inline void vmathSoaV3ScalarDiv( VmathSoaVector3 *result, const VmathSoaVector3 *vec, vec_float4 scalar )
281 result->x = divf4( vec->x, scalar );
282 result->y = divf4( vec->y, scalar );
283 result->z = divf4( vec->z, scalar );
286 static inline void vmathSoaV3Neg( VmathSoaVector3 *result, const VmathSoaVector3 *vec )
288 result->x = negatef4( vec->x );
289 result->y = negatef4( vec->y );
290 result->z = negatef4( vec->z );
293 static inline void vmathSoaV3MulPerElem( VmathSoaVector3 *result, const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1 )
295 result->x = spu_mul( vec0->x, vec1->x );
296 result->y = spu_mul( vec0->y, vec1->y );
297 result->z = spu_mul( vec0->z, vec1->z );
300 static inline void vmathSoaV3DivPerElem( VmathSoaVector3 *result, const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1 )
302 result->x = divf4( vec0->x, vec1->x );
303 result->y = divf4( vec0->y, vec1->y );
304 result->z = divf4( vec0->z, vec1->z );
307 static inline void vmathSoaV3RecipPerElem( VmathSoaVector3 *result, const VmathSoaVector3 *vec )
309 result->x = recipf4( vec->x );
310 result->y = recipf4( vec->y );
311 result->z = recipf4( vec->z );
314 static inline void vmathSoaV3SqrtPerElem( VmathSoaVector3 *result, const VmathSoaVector3 *vec )
316 result->x = sqrtf4( vec->x );
317 result->y = sqrtf4( vec->y );
318 result->z = sqrtf4( vec->z );
321 static inline void vmathSoaV3RsqrtPerElem( VmathSoaVector3 *result, const VmathSoaVector3 *vec )
323 result->x = rsqrtf4( vec->x );
324 result->y = rsqrtf4( vec->y );
325 result->z = rsqrtf4( vec->z );
328 static inline void vmathSoaV3AbsPerElem( VmathSoaVector3 *result, const VmathSoaVector3 *vec )
330 result->x = fabsf4( vec->x );
331 result->y = fabsf4( vec->y );
332 result->z = fabsf4( vec->z );
335 static inline void vmathSoaV3CopySignPerElem( VmathSoaVector3 *result, const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1 )
337 result->x = copysignf4( vec0->x, vec1->x );
338 result->y = copysignf4( vec0->y, vec1->y );
339 result->z = copysignf4( vec0->z, vec1->z );
342 static inline void vmathSoaV3MaxPerElem( VmathSoaVector3 *result, const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1 )
344 result->x = fmaxf4( vec0->x, vec1->x );
345 result->y = fmaxf4( vec0->y, vec1->y );
346 result->z = fmaxf4( vec0->z, vec1->z );
349 static inline vec_float4 vmathSoaV3MaxElem( const VmathSoaVector3 *vec )
352 result = fmaxf4( vec->x, vec->y );
353 result = fmaxf4( vec->z, result );
357 static inline void vmathSoaV3MinPerElem( VmathSoaVector3 *result, const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1 )
359 result->x = fminf4( vec0->x, vec1->x );
360 result->y = fminf4( vec0->y, vec1->y );
361 result->z = fminf4( vec0->z, vec1->z );
364 static inline vec_float4 vmathSoaV3MinElem( const VmathSoaVector3 *vec )
367 result = fminf4( vec->x, vec->y );
368 result = fminf4( vec->z, result );
372 static inline vec_float4 vmathSoaV3Sum( const VmathSoaVector3 *vec )
375 result = spu_add( vec->x, vec->y );
376 result = spu_add( result, vec->z );
380 static inline vec_float4 vmathSoaV3Dot( const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1 )
383 result = spu_mul( vec0->x, vec1->x );
384 result = spu_add( result, spu_mul( vec0->y, vec1->y ) );
385 result = spu_add( result, spu_mul( vec0->z, vec1->z ) );
389 static inline vec_float4 vmathSoaV3LengthSqr( const VmathSoaVector3 *vec )
392 result = spu_mul( vec->x, vec->x );
393 result = spu_add( result, spu_mul( vec->y, vec->y ) );
394 result = spu_add( result, spu_mul( vec->z, vec->z ) );
398 static inline vec_float4 vmathSoaV3Length( const VmathSoaVector3 *vec )
400 return sqrtf4( vmathSoaV3LengthSqr( vec ) );
403 static inline void vmathSoaV3Normalize( VmathSoaVector3 *result, const VmathSoaVector3 *vec )
405 vec_float4 lenSqr, lenInv;
406 lenSqr = vmathSoaV3LengthSqr( vec );
407 lenInv = rsqrtf4( lenSqr );
408 result->x = spu_mul( vec->x, lenInv );
409 result->y = spu_mul( vec->y, lenInv );
410 result->z = spu_mul( vec->z, lenInv );
413 static inline void vmathSoaV3Cross( VmathSoaVector3 *result, const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1 )
415 vec_float4 tmpX, tmpY, tmpZ;
416 tmpX = spu_sub( spu_mul( vec0->y, vec1->z ), spu_mul( vec0->z, vec1->y ) );
417 tmpY = spu_sub( spu_mul( vec0->z, vec1->x ), spu_mul( vec0->x, vec1->z ) );
418 tmpZ = spu_sub( spu_mul( vec0->x, vec1->y ), spu_mul( vec0->y, vec1->x ) );
419 vmathSoaV3MakeFromElems( result, tmpX, tmpY, tmpZ );
422 static inline void vmathSoaV3Select( VmathSoaVector3 *result, const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1, vec_uint4 select1 )
424 result->x = spu_sel( vec0->x, vec1->x, select1 );
425 result->y = spu_sel( vec0->y, vec1->y, select1 );
426 result->z = spu_sel( vec0->z, vec1->z, select1 );
429 #ifdef _VECTORMATH_DEBUG
431 static inline void vmathSoaV3Print( const VmathSoaVector3 *vec )
433 VmathVector3 vec0, vec1, vec2, vec3;
434 vmathSoaV3Get4Aos( vec, &vec0, &vec1, &vec2, &vec3 );
436 vmathV3Print( &vec0 );
438 vmathV3Print( &vec1 );
440 vmathV3Print( &vec2 );
442 vmathV3Print( &vec3 );
445 static inline void vmathSoaV3Prints( const VmathSoaVector3 *vec, const char *name )
447 VmathVector3 vec0, vec1, vec2, vec3;
448 printf( "%s:\n", name );
449 vmathSoaV3Get4Aos( vec, &vec0, &vec1, &vec2, &vec3 );
451 vmathV3Print( &vec0 );
453 vmathV3Print( &vec1 );
455 vmathV3Print( &vec2 );
457 vmathV3Print( &vec3 );
462 static inline void vmathSoaV4Copy( VmathSoaVector4 *result, const VmathSoaVector4 *vec )
470 static inline void vmathSoaV4MakeFromElems( VmathSoaVector4 *result, vec_float4 _x, vec_float4 _y, vec_float4 _z, vec_float4 _w )
478 static inline void vmathSoaV4MakeFromV3Scalar( VmathSoaVector4 *result, const VmathSoaVector3 *xyz, vec_float4 _w )
480 vmathSoaV4SetXYZ( result, xyz );
481 vmathSoaV4SetW( result, _w );
484 static inline void vmathSoaV4MakeFromV3( VmathSoaVector4 *result, const VmathSoaVector3 *vec )
489 result->w = spu_splats(0.0f);
492 static inline void vmathSoaV4MakeFromP3( VmathSoaVector4 *result, const VmathSoaPoint3 *pnt )
497 result->w = spu_splats(1.0f);
500 static inline void vmathSoaV4MakeFromQ( VmathSoaVector4 *result, const VmathSoaQuat *quat )
508 static inline void vmathSoaV4MakeFromScalar( VmathSoaVector4 *result, vec_float4 scalar )
516 static inline void vmathSoaV4MakeFromAos( VmathSoaVector4 *result, const VmathVector4 *vec )
518 vec_uchar16 shuffle_xxxx = (vec_uchar16)spu_splats((int)0x00010203);
519 vec_uchar16 shuffle_yyyy = (vec_uchar16)spu_splats((int)0x04050607);
520 vec_uchar16 shuffle_zzzz = (vec_uchar16)spu_splats((int)0x08090a0b);
521 vec_uchar16 shuffle_wwww = (vec_uchar16)spu_splats((int)0x0c0d0e0f);
522 vec_float4 vec128 = vec->vec128;
523 result->x = spu_shuffle( vec128, vec128, shuffle_xxxx );
524 result->y = spu_shuffle( vec128, vec128, shuffle_yyyy );
525 result->z = spu_shuffle( vec128, vec128, shuffle_zzzz );
526 result->w = spu_shuffle( vec128, vec128, shuffle_wwww );
529 static inline void vmathSoaV4MakeFrom4Aos( VmathSoaVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1, const VmathVector4 *vec2, const VmathVector4 *vec3 )
531 vec_float4 tmp0, tmp1, tmp2, tmp3;
532 tmp0 = spu_shuffle( vec0->vec128, vec2->vec128, _VECTORMATH_SHUF_XAYB );
533 tmp1 = spu_shuffle( vec1->vec128, vec3->vec128, _VECTORMATH_SHUF_XAYB );
534 tmp2 = spu_shuffle( vec0->vec128, vec2->vec128, _VECTORMATH_SHUF_ZCWD );
535 tmp3 = spu_shuffle( vec1->vec128, vec3->vec128, _VECTORMATH_SHUF_ZCWD );
536 result->x = spu_shuffle( tmp0, tmp1, _VECTORMATH_SHUF_XAYB );
537 result->y = spu_shuffle( tmp0, tmp1, _VECTORMATH_SHUF_ZCWD );
538 result->z = spu_shuffle( tmp2, tmp3, _VECTORMATH_SHUF_XAYB );
539 result->w = spu_shuffle( tmp2, tmp3, _VECTORMATH_SHUF_ZCWD );
542 static inline void vmathSoaV4MakeXAxis( VmathSoaVector4 *result )
544 vmathSoaV4MakeFromElems( result, spu_splats(1.0f), spu_splats(0.0f), spu_splats(0.0f), spu_splats(0.0f) );
547 static inline void vmathSoaV4MakeYAxis( VmathSoaVector4 *result )
549 vmathSoaV4MakeFromElems( result, spu_splats(0.0f), spu_splats(1.0f), spu_splats(0.0f), spu_splats(0.0f) );
552 static inline void vmathSoaV4MakeZAxis( VmathSoaVector4 *result )
554 vmathSoaV4MakeFromElems( result, spu_splats(0.0f), spu_splats(0.0f), spu_splats(1.0f), spu_splats(0.0f) );
557 static inline void vmathSoaV4MakeWAxis( VmathSoaVector4 *result )
559 vmathSoaV4MakeFromElems( result, spu_splats(0.0f), spu_splats(0.0f), spu_splats(0.0f), spu_splats(1.0f) );
562 static inline void vmathSoaV4Lerp( VmathSoaVector4 *result, vec_float4 t, const VmathSoaVector4 *vec0, const VmathSoaVector4 *vec1 )
564 VmathSoaVector4 tmpV4_0, tmpV4_1;
565 vmathSoaV4Sub( &tmpV4_0, vec1, vec0 );
566 vmathSoaV4ScalarMul( &tmpV4_1, &tmpV4_0, t );
567 vmathSoaV4Add( result, vec0, &tmpV4_1 );
570 static inline void vmathSoaV4Slerp( VmathSoaVector4 *result, vec_float4 t, const VmathSoaVector4 *unitVec0, const VmathSoaVector4 *unitVec1 )
572 VmathSoaVector4 tmpV4_0, tmpV4_1;
573 vec_float4 recipSinAngle, scale0, scale1, cosAngle, angle;
574 vec_uint4 selectMask;
575 cosAngle = vmathSoaV4Dot( unitVec0, unitVec1 );
576 selectMask = (vec_uint4)spu_cmpgt( spu_splats(_VECTORMATH_SLERP_TOL), cosAngle );
577 angle = acosf4( cosAngle );
578 recipSinAngle = recipf4( sinf4( angle ) );
579 scale0 = spu_sel( spu_sub( spu_splats(1.0f), t ), spu_mul( sinf4( spu_mul( spu_sub( spu_splats(1.0f), t ), angle ) ), recipSinAngle ), selectMask );
580 scale1 = spu_sel( t, spu_mul( sinf4( spu_mul( t, angle ) ), recipSinAngle ), selectMask );
581 vmathSoaV4ScalarMul( &tmpV4_0, unitVec0, scale0 );
582 vmathSoaV4ScalarMul( &tmpV4_1, unitVec1, scale1 );
583 vmathSoaV4Add( result, &tmpV4_0, &tmpV4_1 );
586 static inline void vmathSoaV4Get4Aos( const VmathSoaVector4 *vec, VmathVector4 *result0, VmathVector4 *result1, VmathVector4 *result2, VmathVector4 *result3 )
588 vec_float4 tmp0, tmp1, tmp2, tmp3;
589 tmp0 = spu_shuffle( vec->x, vec->z, _VECTORMATH_SHUF_XAYB );
590 tmp1 = spu_shuffle( vec->y, vec->w, _VECTORMATH_SHUF_XAYB );
591 tmp2 = spu_shuffle( vec->x, vec->z, _VECTORMATH_SHUF_ZCWD );
592 tmp3 = spu_shuffle( vec->y, vec->w, _VECTORMATH_SHUF_ZCWD );
593 vmathV4MakeFrom128( result0, spu_shuffle( tmp0, tmp1, _VECTORMATH_SHUF_XAYB ) );
594 vmathV4MakeFrom128( result1, spu_shuffle( tmp0, tmp1, _VECTORMATH_SHUF_ZCWD ) );
595 vmathV4MakeFrom128( result2, spu_shuffle( tmp2, tmp3, _VECTORMATH_SHUF_XAYB ) );
596 vmathV4MakeFrom128( result3, spu_shuffle( tmp2, tmp3, _VECTORMATH_SHUF_ZCWD ) );
599 static inline void vmathSoaV4StoreHalfFloats( const VmathSoaVector4 *vec, vec_ushort8 *twoQuads )
601 VmathVector4 v0, v1, v2, v3;
602 vmathSoaV4Get4Aos( vec, &v0, &v1, &v2, &v3 );
603 twoQuads[0] = _vmath2VfToHalfFloats(v0.vec128, v1.vec128);
604 twoQuads[1] = _vmath2VfToHalfFloats(v2.vec128, v3.vec128);
607 static inline void vmathSoaV4SetXYZ( VmathSoaVector4 *result, const VmathSoaVector3 *vec )
614 static inline void vmathSoaV4GetXYZ( VmathSoaVector3 *result, const VmathSoaVector4 *vec )
616 vmathSoaV3MakeFromElems( result, vec->x, vec->y, vec->z );
619 static inline void vmathSoaV4SetX( VmathSoaVector4 *result, vec_float4 _x )
624 static inline vec_float4 vmathSoaV4GetX( const VmathSoaVector4 *vec )
629 static inline void vmathSoaV4SetY( VmathSoaVector4 *result, vec_float4 _y )
634 static inline vec_float4 vmathSoaV4GetY( const VmathSoaVector4 *vec )
639 static inline void vmathSoaV4SetZ( VmathSoaVector4 *result, vec_float4 _z )
644 static inline vec_float4 vmathSoaV4GetZ( const VmathSoaVector4 *vec )
649 static inline void vmathSoaV4SetW( VmathSoaVector4 *result, vec_float4 _w )
654 static inline vec_float4 vmathSoaV4GetW( const VmathSoaVector4 *vec )
659 static inline void vmathSoaV4SetElem( VmathSoaVector4 *result, int idx, vec_float4 value )
661 *(&result->x + idx) = value;
664 static inline vec_float4 vmathSoaV4GetElem( const VmathSoaVector4 *vec, int idx )
666 return *(&vec->x + idx);
669 static inline void vmathSoaV4Add( VmathSoaVector4 *result, const VmathSoaVector4 *vec0, const VmathSoaVector4 *vec1 )
671 result->x = spu_add( vec0->x, vec1->x );
672 result->y = spu_add( vec0->y, vec1->y );
673 result->z = spu_add( vec0->z, vec1->z );
674 result->w = spu_add( vec0->w, vec1->w );
677 static inline void vmathSoaV4Sub( VmathSoaVector4 *result, const VmathSoaVector4 *vec0, const VmathSoaVector4 *vec1 )
679 result->x = spu_sub( vec0->x, vec1->x );
680 result->y = spu_sub( vec0->y, vec1->y );
681 result->z = spu_sub( vec0->z, vec1->z );
682 result->w = spu_sub( vec0->w, vec1->w );
685 static inline void vmathSoaV4ScalarMul( VmathSoaVector4 *result, const VmathSoaVector4 *vec, vec_float4 scalar )
687 result->x = spu_mul( vec->x, scalar );
688 result->y = spu_mul( vec->y, scalar );
689 result->z = spu_mul( vec->z, scalar );
690 result->w = spu_mul( vec->w, scalar );
693 static inline void vmathSoaV4ScalarDiv( VmathSoaVector4 *result, const VmathSoaVector4 *vec, vec_float4 scalar )
695 result->x = divf4( vec->x, scalar );
696 result->y = divf4( vec->y, scalar );
697 result->z = divf4( vec->z, scalar );
698 result->w = divf4( vec->w, scalar );
701 static inline void vmathSoaV4Neg( VmathSoaVector4 *result, const VmathSoaVector4 *vec )
703 result->x = negatef4( vec->x );
704 result->y = negatef4( vec->y );
705 result->z = negatef4( vec->z );
706 result->w = negatef4( vec->w );
709 static inline void vmathSoaV4MulPerElem( VmathSoaVector4 *result, const VmathSoaVector4 *vec0, const VmathSoaVector4 *vec1 )
711 result->x = spu_mul( vec0->x, vec1->x );
712 result->y = spu_mul( vec0->y, vec1->y );
713 result->z = spu_mul( vec0->z, vec1->z );
714 result->w = spu_mul( vec0->w, vec1->w );
717 static inline void vmathSoaV4DivPerElem( VmathSoaVector4 *result, const VmathSoaVector4 *vec0, const VmathSoaVector4 *vec1 )
719 result->x = divf4( vec0->x, vec1->x );
720 result->y = divf4( vec0->y, vec1->y );
721 result->z = divf4( vec0->z, vec1->z );
722 result->w = divf4( vec0->w, vec1->w );
725 static inline void vmathSoaV4RecipPerElem( VmathSoaVector4 *result, const VmathSoaVector4 *vec )
727 result->x = recipf4( vec->x );
728 result->y = recipf4( vec->y );
729 result->z = recipf4( vec->z );
730 result->w = recipf4( vec->w );
733 static inline void vmathSoaV4SqrtPerElem( VmathSoaVector4 *result, const VmathSoaVector4 *vec )
735 result->x = sqrtf4( vec->x );
736 result->y = sqrtf4( vec->y );
737 result->z = sqrtf4( vec->z );
738 result->w = sqrtf4( vec->w );
741 static inline void vmathSoaV4RsqrtPerElem( VmathSoaVector4 *result, const VmathSoaVector4 *vec )
743 result->x = rsqrtf4( vec->x );
744 result->y = rsqrtf4( vec->y );
745 result->z = rsqrtf4( vec->z );
746 result->w = rsqrtf4( vec->w );
749 static inline void vmathSoaV4AbsPerElem( VmathSoaVector4 *result, const VmathSoaVector4 *vec )
751 result->x = fabsf4( vec->x );
752 result->y = fabsf4( vec->y );
753 result->z = fabsf4( vec->z );
754 result->w = fabsf4( vec->w );
757 static inline void vmathSoaV4CopySignPerElem( VmathSoaVector4 *result, const VmathSoaVector4 *vec0, const VmathSoaVector4 *vec1 )
759 result->x = copysignf4( vec0->x, vec1->x );
760 result->y = copysignf4( vec0->y, vec1->y );
761 result->z = copysignf4( vec0->z, vec1->z );
762 result->w = copysignf4( vec0->w, vec1->w );
765 static inline void vmathSoaV4MaxPerElem( VmathSoaVector4 *result, const VmathSoaVector4 *vec0, const VmathSoaVector4 *vec1 )
767 result->x = fmaxf4( vec0->x, vec1->x );
768 result->y = fmaxf4( vec0->y, vec1->y );
769 result->z = fmaxf4( vec0->z, vec1->z );
770 result->w = fmaxf4( vec0->w, vec1->w );
773 static inline vec_float4 vmathSoaV4MaxElem( const VmathSoaVector4 *vec )
776 result = fmaxf4( vec->x, vec->y );
777 result = fmaxf4( vec->z, result );
778 result = fmaxf4( vec->w, result );
782 static inline void vmathSoaV4MinPerElem( VmathSoaVector4 *result, const VmathSoaVector4 *vec0, const VmathSoaVector4 *vec1 )
784 result->x = fminf4( vec0->x, vec1->x );
785 result->y = fminf4( vec0->y, vec1->y );
786 result->z = fminf4( vec0->z, vec1->z );
787 result->w = fminf4( vec0->w, vec1->w );
790 static inline vec_float4 vmathSoaV4MinElem( const VmathSoaVector4 *vec )
793 result = fminf4( vec->x, vec->y );
794 result = fminf4( vec->z, result );
795 result = fminf4( vec->w, result );
799 static inline vec_float4 vmathSoaV4Sum( const VmathSoaVector4 *vec )
802 result = spu_add( vec->x, vec->y );
803 result = spu_add( result, vec->z );
804 result = spu_add( result, vec->w );
808 static inline vec_float4 vmathSoaV4Dot( const VmathSoaVector4 *vec0, const VmathSoaVector4 *vec1 )
811 result = spu_mul( vec0->x, vec1->x );
812 result = spu_add( result, spu_mul( vec0->y, vec1->y ) );
813 result = spu_add( result, spu_mul( vec0->z, vec1->z ) );
814 result = spu_add( result, spu_mul( vec0->w, vec1->w ) );
818 static inline vec_float4 vmathSoaV4LengthSqr( const VmathSoaVector4 *vec )
821 result = spu_mul( vec->x, vec->x );
822 result = spu_add( result, spu_mul( vec->y, vec->y ) );
823 result = spu_add( result, spu_mul( vec->z, vec->z ) );
824 result = spu_add( result, spu_mul( vec->w, vec->w ) );
828 static inline vec_float4 vmathSoaV4Length( const VmathSoaVector4 *vec )
830 return sqrtf4( vmathSoaV4LengthSqr( vec ) );
833 static inline void vmathSoaV4Normalize( VmathSoaVector4 *result, const VmathSoaVector4 *vec )
835 vec_float4 lenSqr, lenInv;
836 lenSqr = vmathSoaV4LengthSqr( vec );
837 lenInv = rsqrtf4( lenSqr );
838 result->x = spu_mul( vec->x, lenInv );
839 result->y = spu_mul( vec->y, lenInv );
840 result->z = spu_mul( vec->z, lenInv );
841 result->w = spu_mul( vec->w, lenInv );
844 static inline void vmathSoaV4Select( VmathSoaVector4 *result, const VmathSoaVector4 *vec0, const VmathSoaVector4 *vec1, vec_uint4 select1 )
846 result->x = spu_sel( vec0->x, vec1->x, select1 );
847 result->y = spu_sel( vec0->y, vec1->y, select1 );
848 result->z = spu_sel( vec0->z, vec1->z, select1 );
849 result->w = spu_sel( vec0->w, vec1->w, select1 );
852 #ifdef _VECTORMATH_DEBUG
854 static inline void vmathSoaV4Print( const VmathSoaVector4 *vec )
856 VmathVector4 vec0, vec1, vec2, vec3;
857 vmathSoaV4Get4Aos( vec, &vec0, &vec1, &vec2, &vec3 );
859 vmathV4Print( &vec0 );
861 vmathV4Print( &vec1 );
863 vmathV4Print( &vec2 );
865 vmathV4Print( &vec3 );
868 static inline void vmathSoaV4Prints( const VmathSoaVector4 *vec, const char *name )
870 VmathVector4 vec0, vec1, vec2, vec3;
871 printf( "%s:\n", name );
872 vmathSoaV4Get4Aos( vec, &vec0, &vec1, &vec2, &vec3 );
874 vmathV4Print( &vec0 );
876 vmathV4Print( &vec1 );
878 vmathV4Print( &vec2 );
880 vmathV4Print( &vec3 );
885 static inline void vmathSoaP3Copy( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt )
892 static inline void vmathSoaP3MakeFromElems( VmathSoaPoint3 *result, vec_float4 _x, vec_float4 _y, vec_float4 _z )
899 static inline void vmathSoaP3MakeFromV3( VmathSoaPoint3 *result, const VmathSoaVector3 *vec )
906 static inline void vmathSoaP3MakeFromScalar( VmathSoaPoint3 *result, vec_float4 scalar )
913 static inline void vmathSoaP3MakeFromAos( VmathSoaPoint3 *result, const VmathPoint3 *pnt )
915 vec_uchar16 shuffle_xxxx = (vec_uchar16)spu_splats((int)0x00010203);
916 vec_uchar16 shuffle_yyyy = (vec_uchar16)spu_splats((int)0x04050607);
917 vec_uchar16 shuffle_zzzz = (vec_uchar16)spu_splats((int)0x08090a0b);
918 vec_float4 vec128 = pnt->vec128;
919 result->x = spu_shuffle( vec128, vec128, shuffle_xxxx );
920 result->y = spu_shuffle( vec128, vec128, shuffle_yyyy );
921 result->z = spu_shuffle( vec128, vec128, shuffle_zzzz );
924 static inline void vmathSoaP3MakeFrom4Aos( VmathSoaPoint3 *result, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1, const VmathPoint3 *pnt2, const VmathPoint3 *pnt3 )
926 vec_float4 tmp0, tmp1, tmp2, tmp3;
927 tmp0 = spu_shuffle( pnt0->vec128, pnt2->vec128, _VECTORMATH_SHUF_XAYB );
928 tmp1 = spu_shuffle( pnt1->vec128, pnt3->vec128, _VECTORMATH_SHUF_XAYB );
929 tmp2 = spu_shuffle( pnt0->vec128, pnt2->vec128, _VECTORMATH_SHUF_ZCWD );
930 tmp3 = spu_shuffle( pnt1->vec128, pnt3->vec128, _VECTORMATH_SHUF_ZCWD );
931 result->x = spu_shuffle( tmp0, tmp1, _VECTORMATH_SHUF_XAYB );
932 result->y = spu_shuffle( tmp0, tmp1, _VECTORMATH_SHUF_ZCWD );
933 result->z = spu_shuffle( tmp2, tmp3, _VECTORMATH_SHUF_XAYB );
936 static inline void vmathSoaP3Lerp( VmathSoaPoint3 *result, vec_float4 t, const VmathSoaPoint3 *pnt0, const VmathSoaPoint3 *pnt1 )
938 VmathSoaVector3 tmpV3_0, tmpV3_1;
939 vmathSoaP3Sub( &tmpV3_0, pnt1, pnt0 );
940 vmathSoaV3ScalarMul( &tmpV3_1, &tmpV3_0, t );
941 vmathSoaP3AddV3( result, pnt0, &tmpV3_1 );
944 static inline void vmathSoaP3Get4Aos( const VmathSoaPoint3 *pnt, VmathPoint3 *result0, VmathPoint3 *result1, VmathPoint3 *result2, VmathPoint3 *result3 )
946 vec_float4 tmp0, tmp1;
947 tmp0 = spu_shuffle( pnt->x, pnt->z, _VECTORMATH_SHUF_XAYB );
948 tmp1 = spu_shuffle( pnt->x, pnt->z, _VECTORMATH_SHUF_ZCWD );
949 vmathP3MakeFrom128( result0, spu_shuffle( tmp0, pnt->y, _VECTORMATH_SHUF_XAYB ) );
950 vmathP3MakeFrom128( result1, spu_shuffle( tmp0, pnt->y, _VECTORMATH_SHUF_ZBW0 ) );
951 vmathP3MakeFrom128( result2, spu_shuffle( tmp1, pnt->y, _VECTORMATH_SHUF_XCY0 ) );
952 vmathP3MakeFrom128( result3, spu_shuffle( tmp1, pnt->y, _VECTORMATH_SHUF_ZDW0 ) );
955 static inline void vmathSoaP3LoadXYZArray( VmathSoaPoint3 *vec, const vec_float4 *threeQuads )
957 vec_float4 xyxy, yzyz, zxzx, xyzx, yzxy, zxyz;
958 xyzx = threeQuads[0];
959 yzxy = threeQuads[1];
960 zxyz = threeQuads[2];
961 xyxy = spu_shuffle( xyzx, yzxy, _VECTORMATH_SHUF_XYCD );
962 zxzx = spu_shuffle( zxyz, xyzx, _VECTORMATH_SHUF_XYCD );
963 yzyz = spu_shuffle( yzxy, zxyz, _VECTORMATH_SHUF_XYCD );
964 vmathSoaP3SetX( vec, spu_shuffle( xyxy, zxzx, _VECTORMATH_SHUF_XDZB ) );
965 vmathSoaP3SetY( vec, spu_shuffle( xyxy, yzyz, _VECTORMATH_SHUF_YAWC ) );
966 vmathSoaP3SetZ( vec, spu_shuffle( zxzx, yzyz, _VECTORMATH_SHUF_ZBXD ) );
969 static inline void vmathSoaP3StoreXYZArray( const VmathSoaPoint3 *vec, vec_float4 *threeQuads )
971 vec_float4 xyzx, yzxy, zxyz, xyxy, zxzx, yzyz;
972 xyxy = spu_shuffle( vec->x, vec->y, _VECTORMATH_SHUF_XAZC );
973 zxzx = spu_shuffle( vec->z, vec->x, _VECTORMATH_SHUF_ZDXB );
974 yzyz = spu_shuffle( vec->y, vec->z, _VECTORMATH_SHUF_YBWD );
975 xyzx = spu_shuffle( xyxy, zxzx, _VECTORMATH_SHUF_XYCD );
976 yzxy = spu_shuffle( yzyz, xyxy, _VECTORMATH_SHUF_XYCD );
977 zxyz = spu_shuffle( zxzx, yzyz, _VECTORMATH_SHUF_XYCD );
978 threeQuads[0] = xyzx;
979 threeQuads[1] = yzxy;
980 threeQuads[2] = zxyz;
983 static inline void vmathSoaP3StoreHalfFloats( const VmathSoaPoint3 *pnt0, const VmathSoaPoint3 *pnt1, vec_ushort8 *threeQuads )
987 vmathSoaP3StoreXYZArray( pnt0, xyz0 );
988 vmathSoaP3StoreXYZArray( pnt1, xyz1 );
989 threeQuads[0] = _vmath2VfToHalfFloats(xyz0[0], xyz0[1]);
990 threeQuads[1] = _vmath2VfToHalfFloats(xyz0[2], xyz1[0]);
991 threeQuads[2] = _vmath2VfToHalfFloats(xyz1[1], xyz1[2]);
994 static inline void vmathSoaP3SetX( VmathSoaPoint3 *result, vec_float4 _x )
999 static inline vec_float4 vmathSoaP3GetX( const VmathSoaPoint3 *pnt )
1004 static inline void vmathSoaP3SetY( VmathSoaPoint3 *result, vec_float4 _y )
1009 static inline vec_float4 vmathSoaP3GetY( const VmathSoaPoint3 *pnt )
1014 static inline void vmathSoaP3SetZ( VmathSoaPoint3 *result, vec_float4 _z )
1019 static inline vec_float4 vmathSoaP3GetZ( const VmathSoaPoint3 *pnt )
1024 static inline void vmathSoaP3SetElem( VmathSoaPoint3 *result, int idx, vec_float4 value )
1026 *(&result->x + idx) = value;
1029 static inline vec_float4 vmathSoaP3GetElem( const VmathSoaPoint3 *pnt, int idx )
1031 return *(&pnt->x + idx);
1034 static inline void vmathSoaP3Sub( VmathSoaVector3 *result, const VmathSoaPoint3 *pnt0, const VmathSoaPoint3 *pnt1 )
1036 result->x = spu_sub( pnt0->x, pnt1->x );
1037 result->y = spu_sub( pnt0->y, pnt1->y );
1038 result->z = spu_sub( pnt0->z, pnt1->z );
1041 static inline void vmathSoaP3AddV3( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt, const VmathSoaVector3 *vec1 )
1043 result->x = spu_add( pnt->x, vec1->x );
1044 result->y = spu_add( pnt->y, vec1->y );
1045 result->z = spu_add( pnt->z, vec1->z );
1048 static inline void vmathSoaP3SubV3( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt, const VmathSoaVector3 *vec1 )
1050 result->x = spu_sub( pnt->x, vec1->x );
1051 result->y = spu_sub( pnt->y, vec1->y );
1052 result->z = spu_sub( pnt->z, vec1->z );
1055 static inline void vmathSoaP3MulPerElem( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt0, const VmathSoaPoint3 *pnt1 )
1057 result->x = spu_mul( pnt0->x, pnt1->x );
1058 result->y = spu_mul( pnt0->y, pnt1->y );
1059 result->z = spu_mul( pnt0->z, pnt1->z );
1062 static inline void vmathSoaP3DivPerElem( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt0, const VmathSoaPoint3 *pnt1 )
1064 result->x = divf4( pnt0->x, pnt1->x );
1065 result->y = divf4( pnt0->y, pnt1->y );
1066 result->z = divf4( pnt0->z, pnt1->z );
1069 static inline void vmathSoaP3RecipPerElem( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt )
1071 result->x = recipf4( pnt->x );
1072 result->y = recipf4( pnt->y );
1073 result->z = recipf4( pnt->z );
1076 static inline void vmathSoaP3SqrtPerElem( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt )
1078 result->x = sqrtf4( pnt->x );
1079 result->y = sqrtf4( pnt->y );
1080 result->z = sqrtf4( pnt->z );
1083 static inline void vmathSoaP3RsqrtPerElem( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt )
1085 result->x = rsqrtf4( pnt->x );
1086 result->y = rsqrtf4( pnt->y );
1087 result->z = rsqrtf4( pnt->z );
1090 static inline void vmathSoaP3AbsPerElem( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt )
1092 result->x = fabsf4( pnt->x );
1093 result->y = fabsf4( pnt->y );
1094 result->z = fabsf4( pnt->z );
1097 static inline void vmathSoaP3CopySignPerElem( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt0, const VmathSoaPoint3 *pnt1 )
1099 result->x = copysignf4( pnt0->x, pnt1->x );
1100 result->y = copysignf4( pnt0->y, pnt1->y );
1101 result->z = copysignf4( pnt0->z, pnt1->z );
1104 static inline void vmathSoaP3MaxPerElem( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt0, const VmathSoaPoint3 *pnt1 )
1106 result->x = fmaxf4( pnt0->x, pnt1->x );
1107 result->y = fmaxf4( pnt0->y, pnt1->y );
1108 result->z = fmaxf4( pnt0->z, pnt1->z );
1111 static inline vec_float4 vmathSoaP3MaxElem( const VmathSoaPoint3 *pnt )
1114 result = fmaxf4( pnt->x, pnt->y );
1115 result = fmaxf4( pnt->z, result );
1119 static inline void vmathSoaP3MinPerElem( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt0, const VmathSoaPoint3 *pnt1 )
1121 result->x = fminf4( pnt0->x, pnt1->x );
1122 result->y = fminf4( pnt0->y, pnt1->y );
1123 result->z = fminf4( pnt0->z, pnt1->z );
1126 static inline vec_float4 vmathSoaP3MinElem( const VmathSoaPoint3 *pnt )
1129 result = fminf4( pnt->x, pnt->y );
1130 result = fminf4( pnt->z, result );
1134 static inline vec_float4 vmathSoaP3Sum( const VmathSoaPoint3 *pnt )
1137 result = spu_add( pnt->x, pnt->y );
1138 result = spu_add( result, pnt->z );
1142 static inline void vmathSoaP3Scale( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt, vec_float4 scaleVal )
1144 VmathSoaPoint3 tmpP3_0;
1145 vmathSoaP3MakeFromScalar( &tmpP3_0, scaleVal );
1146 vmathSoaP3MulPerElem( result, pnt, &tmpP3_0 );
1149 static inline void vmathSoaP3NonUniformScale( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt, const VmathSoaVector3 *scaleVec )
1151 VmathSoaPoint3 tmpP3_0;
1152 vmathSoaP3MakeFromV3( &tmpP3_0, scaleVec );
1153 vmathSoaP3MulPerElem( result, pnt, &tmpP3_0 );
1156 static inline vec_float4 vmathSoaP3Projection( const VmathSoaPoint3 *pnt, const VmathSoaVector3 *unitVec )
1159 result = spu_mul( pnt->x, unitVec->x );
1160 result = spu_add( result, spu_mul( pnt->y, unitVec->y ) );
1161 result = spu_add( result, spu_mul( pnt->z, unitVec->z ) );
1165 static inline vec_float4 vmathSoaP3DistSqrFromOrigin( const VmathSoaPoint3 *pnt )
1167 VmathSoaVector3 tmpV3_0;
1168 vmathSoaV3MakeFromP3( &tmpV3_0, pnt );
1169 return vmathSoaV3LengthSqr( &tmpV3_0 );
1172 static inline vec_float4 vmathSoaP3DistFromOrigin( const VmathSoaPoint3 *pnt )
1174 VmathSoaVector3 tmpV3_0;
1175 vmathSoaV3MakeFromP3( &tmpV3_0, pnt );
1176 return vmathSoaV3Length( &tmpV3_0 );
1179 static inline vec_float4 vmathSoaP3DistSqr( const VmathSoaPoint3 *pnt0, const VmathSoaPoint3 *pnt1 )
1181 VmathSoaVector3 tmpV3_0;
1182 vmathSoaP3Sub( &tmpV3_0, pnt1, pnt0 );
1183 return vmathSoaV3LengthSqr( &tmpV3_0 );
1186 static inline vec_float4 vmathSoaP3Dist( const VmathSoaPoint3 *pnt0, const VmathSoaPoint3 *pnt1 )
1188 VmathSoaVector3 tmpV3_0;
1189 vmathSoaP3Sub( &tmpV3_0, pnt1, pnt0 );
1190 return vmathSoaV3Length( &tmpV3_0 );
1193 static inline void vmathSoaP3Select( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt0, const VmathSoaPoint3 *pnt1, vec_uint4 select1 )
1195 result->x = spu_sel( pnt0->x, pnt1->x, select1 );
1196 result->y = spu_sel( pnt0->y, pnt1->y, select1 );
1197 result->z = spu_sel( pnt0->z, pnt1->z, select1 );
1200 #ifdef _VECTORMATH_DEBUG
1202 static inline void vmathSoaP3Print( const VmathSoaPoint3 *pnt )
1204 VmathPoint3 vec0, vec1, vec2, vec3;
1205 vmathSoaP3Get4Aos( pnt, &vec0, &vec1, &vec2, &vec3 );
1206 printf("slot 0:\n");
1207 vmathP3Print( &vec0 );
1208 printf("slot 1:\n");
1209 vmathP3Print( &vec1 );
1210 printf("slot 2:\n");
1211 vmathP3Print( &vec2 );
1212 printf("slot 3:\n");
1213 vmathP3Print( &vec3 );
1216 static inline void vmathSoaP3Prints( const VmathSoaPoint3 *pnt, const char *name )
1218 VmathPoint3 vec0, vec1, vec2, vec3;
1219 printf( "%s:\n", name );
1220 vmathSoaP3Get4Aos( pnt, &vec0, &vec1, &vec2, &vec3 );
1221 printf("slot 0:\n");
1222 vmathP3Print( &vec0 );
1223 printf("slot 1:\n");
1224 vmathP3Print( &vec1 );
1225 printf("slot 2:\n");
1226 vmathP3Print( &vec2 );
1227 printf("slot 3:\n");
1228 vmathP3Print( &vec3 );
1235 #endif /* __cplusplus */