2 Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
5 Redistribution and use in source and binary forms,
6 with or without modification, are permitted provided that the
7 following conditions are met:
8 * Redistributions of source code must retain the above copyright
9 notice, this list of conditions and the following disclaimer.
10 * Redistributions in binary form must reproduce the above copyright
11 notice, this list of conditions and the following disclaimer in the
12 documentation and/or other materials provided with the distribution.
13 * Neither the name of the Sony Computer Entertainment Inc nor the names
14 of its contributors may be used to endorse or promote products derived
15 from this software without specific prior written permission.
17 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
18 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
21 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
25 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
26 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27 POSSIBILITY OF SUCH DAMAGE.
30 #ifndef _VECTORMATH_VEC_SOA_C_H
31 #define _VECTORMATH_VEC_SOA_C_H
34 #endif /* __cplusplus */
36 /*-----------------------------------------------------------------------------
38 * for permutes, words are labeled [x,y,z,w] [a,b,c,d]
40 #define _VECTORMATH_PERM_X 0x00010203
41 #define _VECTORMATH_PERM_Y 0x04050607
42 #define _VECTORMATH_PERM_Z 0x08090a0b
43 #define _VECTORMATH_PERM_W 0x0c0d0e0f
44 #define _VECTORMATH_PERM_A 0x10111213
45 #define _VECTORMATH_PERM_B 0x14151617
46 #define _VECTORMATH_PERM_C 0x18191a1b
47 #define _VECTORMATH_PERM_D 0x1c1d1e1f
48 #define _VECTORMATH_PERM_ZBWX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Z, _VECTORMATH_PERM_B, _VECTORMATH_PERM_W, _VECTORMATH_PERM_X })
49 #define _VECTORMATH_PERM_XCYX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_C, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_X })
50 #define _VECTORMATH_PERM_ZDWX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Z, _VECTORMATH_PERM_D, _VECTORMATH_PERM_W, _VECTORMATH_PERM_X })
51 #define _VECTORMATH_PERM_ZCXA ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Z, _VECTORMATH_PERM_C, _VECTORMATH_PERM_X, _VECTORMATH_PERM_A })
52 #define _VECTORMATH_PERM_XBZD ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_B, _VECTORMATH_PERM_Z, _VECTORMATH_PERM_D })
53 #define _VECTORMATH_PERM_WDYB ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_W, _VECTORMATH_PERM_D, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_B })
54 #define _VECTORMATH_PERM_ZBXD ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Z, _VECTORMATH_PERM_B, _VECTORMATH_PERM_X, _VECTORMATH_PERM_D })
55 #define _VECTORMATH_PERM_WCYA ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_W, _VECTORMATH_PERM_C, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_A })
56 #define _VECTORMATH_PERM_XDZB ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_D, _VECTORMATH_PERM_Z, _VECTORMATH_PERM_B })
57 #define _VECTORMATH_SLERP_TOL 0.999f
59 /*-----------------------------------------------------------------------------
62 #ifndef _VECTORMATH_INTERNAL_FUNCTIONS
63 #define _VECTORMATH_INTERNAL_FUNCTIONS
67 static inline void vmathSoaV3Copy( VmathSoaVector3 *result, const VmathSoaVector3 *vec )
74 static inline void vmathSoaV3MakeFromElems( VmathSoaVector3 *result, vec_float4 _x, vec_float4 _y, vec_float4 _z )
81 static inline void vmathSoaV3MakeFromP3( VmathSoaVector3 *result, const VmathSoaPoint3 *pnt )
88 static inline void vmathSoaV3MakeFromScalar( VmathSoaVector3 *result, vec_float4 scalar )
95 static inline void vmathSoaV3MakeFromAos( VmathSoaVector3 *result, const VmathVector3 *vec )
97 vec_float4 vec128 = vec->vec128;
98 result->x = vec_splat( vec128, 0 );
99 result->y = vec_splat( vec128, 1 );
100 result->z = vec_splat( vec128, 2 );
103 static inline void vmathSoaV3MakeFrom4Aos( VmathSoaVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1, const VmathVector3 *vec2, const VmathVector3 *vec3 )
105 vec_float4 tmp0, tmp1, tmp2, tmp3;
106 tmp0 = vec_mergeh( vec0->vec128, vec2->vec128 );
107 tmp1 = vec_mergeh( vec1->vec128, vec3->vec128 );
108 tmp2 = vec_mergel( vec0->vec128, vec2->vec128 );
109 tmp3 = vec_mergel( vec1->vec128, vec3->vec128 );
110 result->x = vec_mergeh( tmp0, tmp1 );
111 result->y = vec_mergel( tmp0, tmp1 );
112 result->z = vec_mergeh( tmp2, tmp3 );
115 static inline void vmathSoaV3MakeXAxis( VmathSoaVector3 *result )
117 vmathSoaV3MakeFromElems( result, ((vec_float4){1.0f,1.0f,1.0f,1.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
120 static inline void vmathSoaV3MakeYAxis( VmathSoaVector3 *result )
122 vmathSoaV3MakeFromElems( result, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){1.0f,1.0f,1.0f,1.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
125 static inline void vmathSoaV3MakeZAxis( VmathSoaVector3 *result )
127 vmathSoaV3MakeFromElems( result, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){1.0f,1.0f,1.0f,1.0f}) );
130 static inline void vmathSoaV3Lerp( VmathSoaVector3 *result, vec_float4 t, const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1 )
132 VmathSoaVector3 tmpV3_0, tmpV3_1;
133 vmathSoaV3Sub( &tmpV3_0, vec1, vec0 );
134 vmathSoaV3ScalarMul( &tmpV3_1, &tmpV3_0, t );
135 vmathSoaV3Add( result, vec0, &tmpV3_1 );
138 static inline void vmathSoaV3Slerp( VmathSoaVector3 *result, vec_float4 t, const VmathSoaVector3 *unitVec0, const VmathSoaVector3 *unitVec1 )
140 VmathSoaVector3 tmpV3_0, tmpV3_1;
141 vec_float4 recipSinAngle, scale0, scale1, cosAngle, angle;
142 vec_uint4 selectMask;
143 cosAngle = vmathSoaV3Dot( unitVec0, unitVec1 );
144 selectMask = (vec_uint4)vec_cmpgt( (vec_float4){_VECTORMATH_SLERP_TOL,_VECTORMATH_SLERP_TOL,_VECTORMATH_SLERP_TOL,_VECTORMATH_SLERP_TOL}, cosAngle );
145 angle = acosf4( cosAngle );
146 recipSinAngle = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), sinf4( angle ) );
147 scale0 = vec_sel( vec_sub( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), t ), vec_madd( sinf4( vec_madd( vec_sub( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), t ), angle, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), recipSinAngle, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), selectMask );
148 scale1 = vec_sel( t, vec_madd( sinf4( vec_madd( t, angle, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), recipSinAngle, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), selectMask );
149 vmathSoaV3ScalarMul( &tmpV3_0, unitVec0, scale0 );
150 vmathSoaV3ScalarMul( &tmpV3_1, unitVec1, scale1 );
151 vmathSoaV3Add( result, &tmpV3_0, &tmpV3_1 );
154 static inline void vmathSoaV3Get4Aos( const VmathSoaVector3 *vec, VmathVector3 *result0, VmathVector3 *result1, VmathVector3 *result2, VmathVector3 *result3 )
156 vec_float4 tmp0, tmp1;
157 tmp0 = vec_mergeh( vec->x, vec->z );
158 tmp1 = vec_mergel( vec->x, vec->z );
159 vmathV3MakeFrom128( result0, vec_mergeh( tmp0, vec->y ) );
160 vmathV3MakeFrom128( result1, vec_perm( tmp0, vec->y, _VECTORMATH_PERM_ZBWX ) );
161 vmathV3MakeFrom128( result2, vec_perm( tmp1, vec->y, _VECTORMATH_PERM_XCYX ) );
162 vmathV3MakeFrom128( result3, vec_perm( tmp1, vec->y, _VECTORMATH_PERM_ZDWX ) );
165 static inline void vmathSoaV3LoadXYZArray( VmathSoaVector3 *vec, const vec_float4 *threeQuads )
167 vec_float4 xyxy, yzyz, zxzx, xyzx, yzxy, zxyz;
168 xyzx = threeQuads[0];
169 yzxy = threeQuads[1];
170 zxyz = threeQuads[2];
171 xyxy = vec_sld( yzxy, xyzx, 8 );
172 zxzx = vec_sld( xyzx, zxyz, 8 );
173 yzyz = vec_sld( zxyz, yzxy, 8 );
174 vmathSoaV3SetX( vec, vec_perm( xyxy, zxzx, _VECTORMATH_PERM_ZBXD ) );
175 vmathSoaV3SetY( vec, vec_perm( xyxy, yzyz, _VECTORMATH_PERM_WCYA ) );
176 vmathSoaV3SetZ( vec, vec_perm( zxzx, yzyz, _VECTORMATH_PERM_XDZB ) );
179 static inline void vmathSoaV3StoreXYZArray( const VmathSoaVector3 *vec, vec_float4 *threeQuads )
181 vec_float4 xyzx, yzxy, zxyz, xyxy, zxzx, yzyz;
182 xyxy = vec_perm( vec->x, vec->y, _VECTORMATH_PERM_ZCXA );
183 zxzx = vec_perm( vec->z, vec->x, _VECTORMATH_PERM_XBZD );
184 yzyz = vec_perm( vec->y, vec->z, _VECTORMATH_PERM_WDYB );
185 xyzx = vec_sld( xyxy, zxzx, 8 );
186 yzxy = vec_sld( yzyz, xyxy, 8 );
187 zxyz = vec_sld( zxzx, yzyz, 8 );
188 threeQuads[0] = xyzx;
189 threeQuads[1] = yzxy;
190 threeQuads[2] = zxyz;
193 static inline void vmathSoaV3StoreHalfFloats( const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1, vec_ushort8 *threeQuads )
197 vmathSoaV3StoreXYZArray( vec0, xyz0 );
198 vmathSoaV3StoreXYZArray( vec1, xyz1 );
199 threeQuads[0] = _vmath2VfToHalfFloats(xyz0[0], xyz0[1]);
200 threeQuads[1] = _vmath2VfToHalfFloats(xyz0[2], xyz1[0]);
201 threeQuads[2] = _vmath2VfToHalfFloats(xyz1[1], xyz1[2]);
204 static inline void vmathSoaV3SetX( VmathSoaVector3 *result, vec_float4 _x )
209 static inline vec_float4 vmathSoaV3GetX( const VmathSoaVector3 *vec )
214 static inline void vmathSoaV3SetY( VmathSoaVector3 *result, vec_float4 _y )
219 static inline vec_float4 vmathSoaV3GetY( const VmathSoaVector3 *vec )
224 static inline void vmathSoaV3SetZ( VmathSoaVector3 *result, vec_float4 _z )
229 static inline vec_float4 vmathSoaV3GetZ( const VmathSoaVector3 *vec )
234 static inline void vmathSoaV3SetElem( VmathSoaVector3 *result, int idx, vec_float4 value )
236 *(&result->x + idx) = value;
239 static inline vec_float4 vmathSoaV3GetElem( const VmathSoaVector3 *vec, int idx )
241 return *(&vec->x + idx);
244 static inline void vmathSoaV3Add( VmathSoaVector3 *result, const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1 )
246 result->x = vec_add( vec0->x, vec1->x );
247 result->y = vec_add( vec0->y, vec1->y );
248 result->z = vec_add( vec0->z, vec1->z );
251 static inline void vmathSoaV3Sub( VmathSoaVector3 *result, const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1 )
253 result->x = vec_sub( vec0->x, vec1->x );
254 result->y = vec_sub( vec0->y, vec1->y );
255 result->z = vec_sub( vec0->z, vec1->z );
258 static inline void vmathSoaV3AddP3( VmathSoaPoint3 *result, const VmathSoaVector3 *vec, const VmathSoaPoint3 *pnt1 )
260 result->x = vec_add( vec->x, pnt1->x );
261 result->y = vec_add( vec->y, pnt1->y );
262 result->z = vec_add( vec->z, pnt1->z );
265 static inline void vmathSoaV3ScalarMul( VmathSoaVector3 *result, const VmathSoaVector3 *vec, vec_float4 scalar )
267 result->x = vec_madd( vec->x, scalar, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
268 result->y = vec_madd( vec->y, scalar, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
269 result->z = vec_madd( vec->z, scalar, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
272 static inline void vmathSoaV3ScalarDiv( VmathSoaVector3 *result, const VmathSoaVector3 *vec, vec_float4 scalar )
274 result->x = divf4( vec->x, scalar );
275 result->y = divf4( vec->y, scalar );
276 result->z = divf4( vec->z, scalar );
279 static inline void vmathSoaV3Neg( VmathSoaVector3 *result, const VmathSoaVector3 *vec )
281 result->x = negatef4( vec->x );
282 result->y = negatef4( vec->y );
283 result->z = negatef4( vec->z );
286 static inline void vmathSoaV3MulPerElem( VmathSoaVector3 *result, const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1 )
288 result->x = vec_madd( vec0->x, vec1->x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
289 result->y = vec_madd( vec0->y, vec1->y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
290 result->z = vec_madd( vec0->z, vec1->z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
293 static inline void vmathSoaV3DivPerElem( VmathSoaVector3 *result, const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1 )
295 result->x = divf4( vec0->x, vec1->x );
296 result->y = divf4( vec0->y, vec1->y );
297 result->z = divf4( vec0->z, vec1->z );
300 static inline void vmathSoaV3RecipPerElem( VmathSoaVector3 *result, const VmathSoaVector3 *vec )
302 result->x = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), vec->x );
303 result->y = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), vec->y );
304 result->z = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), vec->z );
307 static inline void vmathSoaV3SqrtPerElem( VmathSoaVector3 *result, const VmathSoaVector3 *vec )
309 result->x = sqrtf4( vec->x );
310 result->y = sqrtf4( vec->y );
311 result->z = sqrtf4( vec->z );
314 static inline void vmathSoaV3RsqrtPerElem( VmathSoaVector3 *result, const VmathSoaVector3 *vec )
316 result->x = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), sqrtf4( vec->x ) );
317 result->y = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), sqrtf4( vec->y ) );
318 result->z = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), sqrtf4( vec->z ) );
321 static inline void vmathSoaV3AbsPerElem( VmathSoaVector3 *result, const VmathSoaVector3 *vec )
323 result->x = fabsf4( vec->x );
324 result->y = fabsf4( vec->y );
325 result->z = fabsf4( vec->z );
328 static inline void vmathSoaV3CopySignPerElem( VmathSoaVector3 *result, const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1 )
330 result->x = copysignf4( vec0->x, vec1->x );
331 result->y = copysignf4( vec0->y, vec1->y );
332 result->z = copysignf4( vec0->z, vec1->z );
335 static inline void vmathSoaV3MaxPerElem( VmathSoaVector3 *result, const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1 )
337 result->x = fmaxf4( vec0->x, vec1->x );
338 result->y = fmaxf4( vec0->y, vec1->y );
339 result->z = fmaxf4( vec0->z, vec1->z );
342 static inline vec_float4 vmathSoaV3MaxElem( const VmathSoaVector3 *vec )
345 result = fmaxf4( vec->x, vec->y );
346 result = fmaxf4( vec->z, result );
350 static inline void vmathSoaV3MinPerElem( VmathSoaVector3 *result, const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1 )
352 result->x = fminf4( vec0->x, vec1->x );
353 result->y = fminf4( vec0->y, vec1->y );
354 result->z = fminf4( vec0->z, vec1->z );
357 static inline vec_float4 vmathSoaV3MinElem( const VmathSoaVector3 *vec )
360 result = fminf4( vec->x, vec->y );
361 result = fminf4( vec->z, result );
365 static inline vec_float4 vmathSoaV3Sum( const VmathSoaVector3 *vec )
368 result = vec_add( vec->x, vec->y );
369 result = vec_add( result, vec->z );
373 static inline vec_float4 vmathSoaV3Dot( const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1 )
376 result = vec_madd( vec0->x, vec1->x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
377 result = vec_add( result, vec_madd( vec0->y, vec1->y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
378 result = vec_add( result, vec_madd( vec0->z, vec1->z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
382 static inline vec_float4 vmathSoaV3LengthSqr( const VmathSoaVector3 *vec )
385 result = vec_madd( vec->x, vec->x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
386 result = vec_add( result, vec_madd( vec->y, vec->y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
387 result = vec_add( result, vec_madd( vec->z, vec->z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
391 static inline vec_float4 vmathSoaV3Length( const VmathSoaVector3 *vec )
393 return sqrtf4( vmathSoaV3LengthSqr( vec ) );
396 static inline void vmathSoaV3Normalize( VmathSoaVector3 *result, const VmathSoaVector3 *vec )
398 vec_float4 lenSqr, lenInv;
399 lenSqr = vmathSoaV3LengthSqr( vec );
400 lenInv = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), sqrtf4( lenSqr ) );
401 result->x = vec_madd( vec->x, lenInv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
402 result->y = vec_madd( vec->y, lenInv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
403 result->z = vec_madd( vec->z, lenInv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
406 static inline void vmathSoaV3Cross( VmathSoaVector3 *result, const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1 )
408 vec_float4 tmpX, tmpY, tmpZ;
409 tmpX = vec_sub( vec_madd( vec0->y, vec1->z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( vec0->z, vec1->y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
410 tmpY = vec_sub( vec_madd( vec0->z, vec1->x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( vec0->x, vec1->z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
411 tmpZ = vec_sub( vec_madd( vec0->x, vec1->y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( vec0->y, vec1->x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
412 vmathSoaV3MakeFromElems( result, tmpX, tmpY, tmpZ );
415 static inline void vmathSoaV3Select( VmathSoaVector3 *result, const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1, vec_uint4 select1 )
417 result->x = vec_sel( vec0->x, vec1->x, select1 );
418 result->y = vec_sel( vec0->y, vec1->y, select1 );
419 result->z = vec_sel( vec0->z, vec1->z, select1 );
422 #ifdef _VECTORMATH_DEBUG
424 static inline void vmathSoaV3Print( const VmathSoaVector3 *vec )
426 VmathVector3 vec0, vec1, vec2, vec3;
427 vmathSoaV3Get4Aos( vec, &vec0, &vec1, &vec2, &vec3 );
429 vmathV3Print( &vec0 );
431 vmathV3Print( &vec1 );
433 vmathV3Print( &vec2 );
435 vmathV3Print( &vec3 );
438 static inline void vmathSoaV3Prints( const VmathSoaVector3 *vec, const char *name )
440 VmathVector3 vec0, vec1, vec2, vec3;
441 printf( "%s:\n", name );
442 vmathSoaV3Get4Aos( vec, &vec0, &vec1, &vec2, &vec3 );
444 vmathV3Print( &vec0 );
446 vmathV3Print( &vec1 );
448 vmathV3Print( &vec2 );
450 vmathV3Print( &vec3 );
455 static inline void vmathSoaV4Copy( VmathSoaVector4 *result, const VmathSoaVector4 *vec )
463 static inline void vmathSoaV4MakeFromElems( VmathSoaVector4 *result, vec_float4 _x, vec_float4 _y, vec_float4 _z, vec_float4 _w )
471 static inline void vmathSoaV4MakeFromV3Scalar( VmathSoaVector4 *result, const VmathSoaVector3 *xyz, vec_float4 _w )
473 vmathSoaV4SetXYZ( result, xyz );
474 vmathSoaV4SetW( result, _w );
477 static inline void vmathSoaV4MakeFromV3( VmathSoaVector4 *result, const VmathSoaVector3 *vec )
482 result->w = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
485 static inline void vmathSoaV4MakeFromP3( VmathSoaVector4 *result, const VmathSoaPoint3 *pnt )
490 result->w = ((vec_float4){1.0f,1.0f,1.0f,1.0f});
493 static inline void vmathSoaV4MakeFromQ( VmathSoaVector4 *result, const VmathSoaQuat *quat )
501 static inline void vmathSoaV4MakeFromScalar( VmathSoaVector4 *result, vec_float4 scalar )
509 static inline void vmathSoaV4MakeFromAos( VmathSoaVector4 *result, const VmathVector4 *vec )
511 vec_float4 vec128 = vec->vec128;
512 result->x = vec_splat( vec128, 0 );
513 result->y = vec_splat( vec128, 1 );
514 result->z = vec_splat( vec128, 2 );
515 result->w = vec_splat( vec128, 3 );
518 static inline void vmathSoaV4MakeFrom4Aos( VmathSoaVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1, const VmathVector4 *vec2, const VmathVector4 *vec3 )
520 vec_float4 tmp0, tmp1, tmp2, tmp3;
521 tmp0 = vec_mergeh( vec0->vec128, vec2->vec128 );
522 tmp1 = vec_mergeh( vec1->vec128, vec3->vec128 );
523 tmp2 = vec_mergel( vec0->vec128, vec2->vec128 );
524 tmp3 = vec_mergel( vec1->vec128, vec3->vec128 );
525 result->x = vec_mergeh( tmp0, tmp1 );
526 result->y = vec_mergel( tmp0, tmp1 );
527 result->z = vec_mergeh( tmp2, tmp3 );
528 result->w = vec_mergel( tmp2, tmp3 );
531 static inline void vmathSoaV4MakeXAxis( VmathSoaVector4 *result )
533 vmathSoaV4MakeFromElems( result, ((vec_float4){1.0f,1.0f,1.0f,1.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
536 static inline void vmathSoaV4MakeYAxis( VmathSoaVector4 *result )
538 vmathSoaV4MakeFromElems( result, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){1.0f,1.0f,1.0f,1.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
541 static inline void vmathSoaV4MakeZAxis( VmathSoaVector4 *result )
543 vmathSoaV4MakeFromElems( result, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){1.0f,1.0f,1.0f,1.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
546 static inline void vmathSoaV4MakeWAxis( VmathSoaVector4 *result )
548 vmathSoaV4MakeFromElems( result, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){1.0f,1.0f,1.0f,1.0f}) );
551 static inline void vmathSoaV4Lerp( VmathSoaVector4 *result, vec_float4 t, const VmathSoaVector4 *vec0, const VmathSoaVector4 *vec1 )
553 VmathSoaVector4 tmpV4_0, tmpV4_1;
554 vmathSoaV4Sub( &tmpV4_0, vec1, vec0 );
555 vmathSoaV4ScalarMul( &tmpV4_1, &tmpV4_0, t );
556 vmathSoaV4Add( result, vec0, &tmpV4_1 );
559 static inline void vmathSoaV4Slerp( VmathSoaVector4 *result, vec_float4 t, const VmathSoaVector4 *unitVec0, const VmathSoaVector4 *unitVec1 )
561 VmathSoaVector4 tmpV4_0, tmpV4_1;
562 vec_float4 recipSinAngle, scale0, scale1, cosAngle, angle;
563 vec_uint4 selectMask;
564 cosAngle = vmathSoaV4Dot( unitVec0, unitVec1 );
565 selectMask = (vec_uint4)vec_cmpgt( (vec_float4){_VECTORMATH_SLERP_TOL,_VECTORMATH_SLERP_TOL,_VECTORMATH_SLERP_TOL,_VECTORMATH_SLERP_TOL}, cosAngle );
566 angle = acosf4( cosAngle );
567 recipSinAngle = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), sinf4( angle ) );
568 scale0 = vec_sel( vec_sub( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), t ), vec_madd( sinf4( vec_madd( vec_sub( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), t ), angle, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), recipSinAngle, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), selectMask );
569 scale1 = vec_sel( t, vec_madd( sinf4( vec_madd( t, angle, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), recipSinAngle, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), selectMask );
570 vmathSoaV4ScalarMul( &tmpV4_0, unitVec0, scale0 );
571 vmathSoaV4ScalarMul( &tmpV4_1, unitVec1, scale1 );
572 vmathSoaV4Add( result, &tmpV4_0, &tmpV4_1 );
575 static inline void vmathSoaV4Get4Aos( const VmathSoaVector4 *vec, VmathVector4 *result0, VmathVector4 *result1, VmathVector4 *result2, VmathVector4 *result3 )
577 vec_float4 tmp0, tmp1, tmp2, tmp3;
578 tmp0 = vec_mergeh( vec->x, vec->z );
579 tmp1 = vec_mergeh( vec->y, vec->w );
580 tmp2 = vec_mergel( vec->x, vec->z );
581 tmp3 = vec_mergel( vec->y, vec->w );
582 vmathV4MakeFrom128( result0, vec_mergeh( tmp0, tmp1 ) );
583 vmathV4MakeFrom128( result1, vec_mergel( tmp0, tmp1 ) );
584 vmathV4MakeFrom128( result2, vec_mergeh( tmp2, tmp3 ) );
585 vmathV4MakeFrom128( result3, vec_mergel( tmp2, tmp3 ) );
588 static inline void vmathSoaV4StoreHalfFloats( const VmathSoaVector4 *vec, vec_ushort8 *twoQuads )
590 VmathVector4 v0, v1, v2, v3;
591 vmathSoaV4Get4Aos( vec, &v0, &v1, &v2, &v3 );
592 twoQuads[0] = _vmath2VfToHalfFloats(v0.vec128, v1.vec128);
593 twoQuads[1] = _vmath2VfToHalfFloats(v2.vec128, v3.vec128);
596 static inline void vmathSoaV4SetXYZ( VmathSoaVector4 *result, const VmathSoaVector3 *vec )
603 static inline void vmathSoaV4GetXYZ( VmathSoaVector3 *result, const VmathSoaVector4 *vec )
605 vmathSoaV3MakeFromElems( result, vec->x, vec->y, vec->z );
608 static inline void vmathSoaV4SetX( VmathSoaVector4 *result, vec_float4 _x )
613 static inline vec_float4 vmathSoaV4GetX( const VmathSoaVector4 *vec )
618 static inline void vmathSoaV4SetY( VmathSoaVector4 *result, vec_float4 _y )
623 static inline vec_float4 vmathSoaV4GetY( const VmathSoaVector4 *vec )
628 static inline void vmathSoaV4SetZ( VmathSoaVector4 *result, vec_float4 _z )
633 static inline vec_float4 vmathSoaV4GetZ( const VmathSoaVector4 *vec )
638 static inline void vmathSoaV4SetW( VmathSoaVector4 *result, vec_float4 _w )
643 static inline vec_float4 vmathSoaV4GetW( const VmathSoaVector4 *vec )
648 static inline void vmathSoaV4SetElem( VmathSoaVector4 *result, int idx, vec_float4 value )
650 *(&result->x + idx) = value;
653 static inline vec_float4 vmathSoaV4GetElem( const VmathSoaVector4 *vec, int idx )
655 return *(&vec->x + idx);
658 static inline void vmathSoaV4Add( VmathSoaVector4 *result, const VmathSoaVector4 *vec0, const VmathSoaVector4 *vec1 )
660 result->x = vec_add( vec0->x, vec1->x );
661 result->y = vec_add( vec0->y, vec1->y );
662 result->z = vec_add( vec0->z, vec1->z );
663 result->w = vec_add( vec0->w, vec1->w );
666 static inline void vmathSoaV4Sub( VmathSoaVector4 *result, const VmathSoaVector4 *vec0, const VmathSoaVector4 *vec1 )
668 result->x = vec_sub( vec0->x, vec1->x );
669 result->y = vec_sub( vec0->y, vec1->y );
670 result->z = vec_sub( vec0->z, vec1->z );
671 result->w = vec_sub( vec0->w, vec1->w );
674 static inline void vmathSoaV4ScalarMul( VmathSoaVector4 *result, const VmathSoaVector4 *vec, vec_float4 scalar )
676 result->x = vec_madd( vec->x, scalar, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
677 result->y = vec_madd( vec->y, scalar, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
678 result->z = vec_madd( vec->z, scalar, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
679 result->w = vec_madd( vec->w, scalar, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
682 static inline void vmathSoaV4ScalarDiv( VmathSoaVector4 *result, const VmathSoaVector4 *vec, vec_float4 scalar )
684 result->x = divf4( vec->x, scalar );
685 result->y = divf4( vec->y, scalar );
686 result->z = divf4( vec->z, scalar );
687 result->w = divf4( vec->w, scalar );
690 static inline void vmathSoaV4Neg( VmathSoaVector4 *result, const VmathSoaVector4 *vec )
692 result->x = negatef4( vec->x );
693 result->y = negatef4( vec->y );
694 result->z = negatef4( vec->z );
695 result->w = negatef4( vec->w );
698 static inline void vmathSoaV4MulPerElem( VmathSoaVector4 *result, const VmathSoaVector4 *vec0, const VmathSoaVector4 *vec1 )
700 result->x = vec_madd( vec0->x, vec1->x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
701 result->y = vec_madd( vec0->y, vec1->y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
702 result->z = vec_madd( vec0->z, vec1->z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
703 result->w = vec_madd( vec0->w, vec1->w, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
706 static inline void vmathSoaV4DivPerElem( VmathSoaVector4 *result, const VmathSoaVector4 *vec0, const VmathSoaVector4 *vec1 )
708 result->x = divf4( vec0->x, vec1->x );
709 result->y = divf4( vec0->y, vec1->y );
710 result->z = divf4( vec0->z, vec1->z );
711 result->w = divf4( vec0->w, vec1->w );
714 static inline void vmathSoaV4RecipPerElem( VmathSoaVector4 *result, const VmathSoaVector4 *vec )
716 result->x = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), vec->x );
717 result->y = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), vec->y );
718 result->z = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), vec->z );
719 result->w = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), vec->w );
722 static inline void vmathSoaV4SqrtPerElem( VmathSoaVector4 *result, const VmathSoaVector4 *vec )
724 result->x = sqrtf4( vec->x );
725 result->y = sqrtf4( vec->y );
726 result->z = sqrtf4( vec->z );
727 result->w = sqrtf4( vec->w );
730 static inline void vmathSoaV4RsqrtPerElem( VmathSoaVector4 *result, const VmathSoaVector4 *vec )
732 result->x = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), sqrtf4( vec->x ) );
733 result->y = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), sqrtf4( vec->y ) );
734 result->z = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), sqrtf4( vec->z ) );
735 result->w = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), sqrtf4( vec->w ) );
738 static inline void vmathSoaV4AbsPerElem( VmathSoaVector4 *result, const VmathSoaVector4 *vec )
740 result->x = fabsf4( vec->x );
741 result->y = fabsf4( vec->y );
742 result->z = fabsf4( vec->z );
743 result->w = fabsf4( vec->w );
746 static inline void vmathSoaV4CopySignPerElem( VmathSoaVector4 *result, const VmathSoaVector4 *vec0, const VmathSoaVector4 *vec1 )
748 result->x = copysignf4( vec0->x, vec1->x );
749 result->y = copysignf4( vec0->y, vec1->y );
750 result->z = copysignf4( vec0->z, vec1->z );
751 result->w = copysignf4( vec0->w, vec1->w );
754 static inline void vmathSoaV4MaxPerElem( VmathSoaVector4 *result, const VmathSoaVector4 *vec0, const VmathSoaVector4 *vec1 )
756 result->x = fmaxf4( vec0->x, vec1->x );
757 result->y = fmaxf4( vec0->y, vec1->y );
758 result->z = fmaxf4( vec0->z, vec1->z );
759 result->w = fmaxf4( vec0->w, vec1->w );
762 static inline vec_float4 vmathSoaV4MaxElem( const VmathSoaVector4 *vec )
765 result = fmaxf4( vec->x, vec->y );
766 result = fmaxf4( vec->z, result );
767 result = fmaxf4( vec->w, result );
771 static inline void vmathSoaV4MinPerElem( VmathSoaVector4 *result, const VmathSoaVector4 *vec0, const VmathSoaVector4 *vec1 )
773 result->x = fminf4( vec0->x, vec1->x );
774 result->y = fminf4( vec0->y, vec1->y );
775 result->z = fminf4( vec0->z, vec1->z );
776 result->w = fminf4( vec0->w, vec1->w );
779 static inline vec_float4 vmathSoaV4MinElem( const VmathSoaVector4 *vec )
782 result = fminf4( vec->x, vec->y );
783 result = fminf4( vec->z, result );
784 result = fminf4( vec->w, result );
788 static inline vec_float4 vmathSoaV4Sum( const VmathSoaVector4 *vec )
791 result = vec_add( vec->x, vec->y );
792 result = vec_add( result, vec->z );
793 result = vec_add( result, vec->w );
797 static inline vec_float4 vmathSoaV4Dot( const VmathSoaVector4 *vec0, const VmathSoaVector4 *vec1 )
800 result = vec_madd( vec0->x, vec1->x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
801 result = vec_add( result, vec_madd( vec0->y, vec1->y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
802 result = vec_add( result, vec_madd( vec0->z, vec1->z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
803 result = vec_add( result, vec_madd( vec0->w, vec1->w, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
807 static inline vec_float4 vmathSoaV4LengthSqr( const VmathSoaVector4 *vec )
810 result = vec_madd( vec->x, vec->x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
811 result = vec_add( result, vec_madd( vec->y, vec->y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
812 result = vec_add( result, vec_madd( vec->z, vec->z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
813 result = vec_add( result, vec_madd( vec->w, vec->w, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
817 static inline vec_float4 vmathSoaV4Length( const VmathSoaVector4 *vec )
819 return sqrtf4( vmathSoaV4LengthSqr( vec ) );
822 static inline void vmathSoaV4Normalize( VmathSoaVector4 *result, const VmathSoaVector4 *vec )
824 vec_float4 lenSqr, lenInv;
825 lenSqr = vmathSoaV4LengthSqr( vec );
826 lenInv = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), sqrtf4( lenSqr ) );
827 result->x = vec_madd( vec->x, lenInv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
828 result->y = vec_madd( vec->y, lenInv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
829 result->z = vec_madd( vec->z, lenInv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
830 result->w = vec_madd( vec->w, lenInv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
833 static inline void vmathSoaV4Select( VmathSoaVector4 *result, const VmathSoaVector4 *vec0, const VmathSoaVector4 *vec1, vec_uint4 select1 )
835 result->x = vec_sel( vec0->x, vec1->x, select1 );
836 result->y = vec_sel( vec0->y, vec1->y, select1 );
837 result->z = vec_sel( vec0->z, vec1->z, select1 );
838 result->w = vec_sel( vec0->w, vec1->w, select1 );
841 #ifdef _VECTORMATH_DEBUG
843 static inline void vmathSoaV4Print( const VmathSoaVector4 *vec )
845 VmathVector4 vec0, vec1, vec2, vec3;
846 vmathSoaV4Get4Aos( vec, &vec0, &vec1, &vec2, &vec3 );
848 vmathV4Print( &vec0 );
850 vmathV4Print( &vec1 );
852 vmathV4Print( &vec2 );
854 vmathV4Print( &vec3 );
857 static inline void vmathSoaV4Prints( const VmathSoaVector4 *vec, const char *name )
859 VmathVector4 vec0, vec1, vec2, vec3;
860 printf( "%s:\n", name );
861 vmathSoaV4Get4Aos( vec, &vec0, &vec1, &vec2, &vec3 );
863 vmathV4Print( &vec0 );
865 vmathV4Print( &vec1 );
867 vmathV4Print( &vec2 );
869 vmathV4Print( &vec3 );
874 static inline void vmathSoaP3Copy( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt )
881 static inline void vmathSoaP3MakeFromElems( VmathSoaPoint3 *result, vec_float4 _x, vec_float4 _y, vec_float4 _z )
888 static inline void vmathSoaP3MakeFromV3( VmathSoaPoint3 *result, const VmathSoaVector3 *vec )
895 static inline void vmathSoaP3MakeFromScalar( VmathSoaPoint3 *result, vec_float4 scalar )
902 static inline void vmathSoaP3MakeFromAos( VmathSoaPoint3 *result, const VmathPoint3 *pnt )
904 vec_float4 vec128 = pnt->vec128;
905 result->x = vec_splat( vec128, 0 );
906 result->y = vec_splat( vec128, 1 );
907 result->z = vec_splat( vec128, 2 );
910 static inline void vmathSoaP3MakeFrom4Aos( VmathSoaPoint3 *result, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1, const VmathPoint3 *pnt2, const VmathPoint3 *pnt3 )
912 vec_float4 tmp0, tmp1, tmp2, tmp3;
913 tmp0 = vec_mergeh( pnt0->vec128, pnt2->vec128 );
914 tmp1 = vec_mergeh( pnt1->vec128, pnt3->vec128 );
915 tmp2 = vec_mergel( pnt0->vec128, pnt2->vec128 );
916 tmp3 = vec_mergel( pnt1->vec128, pnt3->vec128 );
917 result->x = vec_mergeh( tmp0, tmp1 );
918 result->y = vec_mergel( tmp0, tmp1 );
919 result->z = vec_mergeh( tmp2, tmp3 );
922 static inline void vmathSoaP3Lerp( VmathSoaPoint3 *result, vec_float4 t, const VmathSoaPoint3 *pnt0, const VmathSoaPoint3 *pnt1 )
924 VmathSoaVector3 tmpV3_0, tmpV3_1;
925 vmathSoaP3Sub( &tmpV3_0, pnt1, pnt0 );
926 vmathSoaV3ScalarMul( &tmpV3_1, &tmpV3_0, t );
927 vmathSoaP3AddV3( result, pnt0, &tmpV3_1 );
930 static inline void vmathSoaP3Get4Aos( const VmathSoaPoint3 *pnt, VmathPoint3 *result0, VmathPoint3 *result1, VmathPoint3 *result2, VmathPoint3 *result3 )
932 vec_float4 tmp0, tmp1;
933 tmp0 = vec_mergeh( pnt->x, pnt->z );
934 tmp1 = vec_mergel( pnt->x, pnt->z );
935 vmathP3MakeFrom128( result0, vec_mergeh( tmp0, pnt->y ) );
936 vmathP3MakeFrom128( result1, vec_perm( tmp0, pnt->y, _VECTORMATH_PERM_ZBWX ) );
937 vmathP3MakeFrom128( result2, vec_perm( tmp1, pnt->y, _VECTORMATH_PERM_XCYX ) );
938 vmathP3MakeFrom128( result3, vec_perm( tmp1, pnt->y, _VECTORMATH_PERM_ZDWX ) );
941 static inline void vmathSoaP3LoadXYZArray( VmathSoaPoint3 *vec, const vec_float4 *threeQuads )
943 vec_float4 xyxy, yzyz, zxzx, xyzx, yzxy, zxyz;
944 xyzx = threeQuads[0];
945 yzxy = threeQuads[1];
946 zxyz = threeQuads[2];
947 xyxy = vec_sld( yzxy, xyzx, 8 );
948 zxzx = vec_sld( xyzx, zxyz, 8 );
949 yzyz = vec_sld( zxyz, yzxy, 8 );
950 vmathSoaP3SetX( vec, vec_perm( xyxy, zxzx, _VECTORMATH_PERM_ZBXD ) );
951 vmathSoaP3SetY( vec, vec_perm( xyxy, yzyz, _VECTORMATH_PERM_WCYA ) );
952 vmathSoaP3SetZ( vec, vec_perm( zxzx, yzyz, _VECTORMATH_PERM_XDZB ) );
955 static inline void vmathSoaP3StoreXYZArray( const VmathSoaPoint3 *vec, vec_float4 *threeQuads )
957 vec_float4 xyzx, yzxy, zxyz, xyxy, zxzx, yzyz;
958 xyxy = vec_perm( vec->x, vec->y, _VECTORMATH_PERM_ZCXA );
959 zxzx = vec_perm( vec->z, vec->x, _VECTORMATH_PERM_XBZD );
960 yzyz = vec_perm( vec->y, vec->z, _VECTORMATH_PERM_WDYB );
961 xyzx = vec_sld( xyxy, zxzx, 8 );
962 yzxy = vec_sld( yzyz, xyxy, 8 );
963 zxyz = vec_sld( zxzx, yzyz, 8 );
964 threeQuads[0] = xyzx;
965 threeQuads[1] = yzxy;
966 threeQuads[2] = zxyz;
969 static inline void vmathSoaP3StoreHalfFloats( const VmathSoaPoint3 *pnt0, const VmathSoaPoint3 *pnt1, vec_ushort8 *threeQuads )
973 vmathSoaP3StoreXYZArray( pnt0, xyz0 );
974 vmathSoaP3StoreXYZArray( pnt1, xyz1 );
975 threeQuads[0] = _vmath2VfToHalfFloats(xyz0[0], xyz0[1]);
976 threeQuads[1] = _vmath2VfToHalfFloats(xyz0[2], xyz1[0]);
977 threeQuads[2] = _vmath2VfToHalfFloats(xyz1[1], xyz1[2]);
980 static inline void vmathSoaP3SetX( VmathSoaPoint3 *result, vec_float4 _x )
985 static inline vec_float4 vmathSoaP3GetX( const VmathSoaPoint3 *pnt )
990 static inline void vmathSoaP3SetY( VmathSoaPoint3 *result, vec_float4 _y )
995 static inline vec_float4 vmathSoaP3GetY( const VmathSoaPoint3 *pnt )
1000 static inline void vmathSoaP3SetZ( VmathSoaPoint3 *result, vec_float4 _z )
1005 static inline vec_float4 vmathSoaP3GetZ( const VmathSoaPoint3 *pnt )
1010 static inline void vmathSoaP3SetElem( VmathSoaPoint3 *result, int idx, vec_float4 value )
1012 *(&result->x + idx) = value;
1015 static inline vec_float4 vmathSoaP3GetElem( const VmathSoaPoint3 *pnt, int idx )
1017 return *(&pnt->x + idx);
1020 static inline void vmathSoaP3Sub( VmathSoaVector3 *result, const VmathSoaPoint3 *pnt0, const VmathSoaPoint3 *pnt1 )
1022 result->x = vec_sub( pnt0->x, pnt1->x );
1023 result->y = vec_sub( pnt0->y, pnt1->y );
1024 result->z = vec_sub( pnt0->z, pnt1->z );
1027 static inline void vmathSoaP3AddV3( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt, const VmathSoaVector3 *vec1 )
1029 result->x = vec_add( pnt->x, vec1->x );
1030 result->y = vec_add( pnt->y, vec1->y );
1031 result->z = vec_add( pnt->z, vec1->z );
1034 static inline void vmathSoaP3SubV3( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt, const VmathSoaVector3 *vec1 )
1036 result->x = vec_sub( pnt->x, vec1->x );
1037 result->y = vec_sub( pnt->y, vec1->y );
1038 result->z = vec_sub( pnt->z, vec1->z );
1041 static inline void vmathSoaP3MulPerElem( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt0, const VmathSoaPoint3 *pnt1 )
1043 result->x = vec_madd( pnt0->x, pnt1->x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
1044 result->y = vec_madd( pnt0->y, pnt1->y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
1045 result->z = vec_madd( pnt0->z, pnt1->z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
1048 static inline void vmathSoaP3DivPerElem( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt0, const VmathSoaPoint3 *pnt1 )
1050 result->x = divf4( pnt0->x, pnt1->x );
1051 result->y = divf4( pnt0->y, pnt1->y );
1052 result->z = divf4( pnt0->z, pnt1->z );
1055 static inline void vmathSoaP3RecipPerElem( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt )
1057 result->x = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), pnt->x );
1058 result->y = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), pnt->y );
1059 result->z = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), pnt->z );
1062 static inline void vmathSoaP3SqrtPerElem( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt )
1064 result->x = sqrtf4( pnt->x );
1065 result->y = sqrtf4( pnt->y );
1066 result->z = sqrtf4( pnt->z );
1069 static inline void vmathSoaP3RsqrtPerElem( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt )
1071 result->x = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), sqrtf4( pnt->x ) );
1072 result->y = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), sqrtf4( pnt->y ) );
1073 result->z = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), sqrtf4( pnt->z ) );
1076 static inline void vmathSoaP3AbsPerElem( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt )
1078 result->x = fabsf4( pnt->x );
1079 result->y = fabsf4( pnt->y );
1080 result->z = fabsf4( pnt->z );
1083 static inline void vmathSoaP3CopySignPerElem( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt0, const VmathSoaPoint3 *pnt1 )
1085 result->x = copysignf4( pnt0->x, pnt1->x );
1086 result->y = copysignf4( pnt0->y, pnt1->y );
1087 result->z = copysignf4( pnt0->z, pnt1->z );
1090 static inline void vmathSoaP3MaxPerElem( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt0, const VmathSoaPoint3 *pnt1 )
1092 result->x = fmaxf4( pnt0->x, pnt1->x );
1093 result->y = fmaxf4( pnt0->y, pnt1->y );
1094 result->z = fmaxf4( pnt0->z, pnt1->z );
1097 static inline vec_float4 vmathSoaP3MaxElem( const VmathSoaPoint3 *pnt )
1100 result = fmaxf4( pnt->x, pnt->y );
1101 result = fmaxf4( pnt->z, result );
1105 static inline void vmathSoaP3MinPerElem( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt0, const VmathSoaPoint3 *pnt1 )
1107 result->x = fminf4( pnt0->x, pnt1->x );
1108 result->y = fminf4( pnt0->y, pnt1->y );
1109 result->z = fminf4( pnt0->z, pnt1->z );
1112 static inline vec_float4 vmathSoaP3MinElem( const VmathSoaPoint3 *pnt )
1115 result = fminf4( pnt->x, pnt->y );
1116 result = fminf4( pnt->z, result );
1120 static inline vec_float4 vmathSoaP3Sum( const VmathSoaPoint3 *pnt )
1123 result = vec_add( pnt->x, pnt->y );
1124 result = vec_add( result, pnt->z );
1128 static inline void vmathSoaP3Scale( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt, vec_float4 scaleVal )
1130 VmathSoaPoint3 tmpP3_0;
1131 vmathSoaP3MakeFromScalar( &tmpP3_0, scaleVal );
1132 vmathSoaP3MulPerElem( result, pnt, &tmpP3_0 );
1135 static inline void vmathSoaP3NonUniformScale( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt, const VmathSoaVector3 *scaleVec )
1137 VmathSoaPoint3 tmpP3_0;
1138 vmathSoaP3MakeFromV3( &tmpP3_0, scaleVec );
1139 vmathSoaP3MulPerElem( result, pnt, &tmpP3_0 );
1142 static inline vec_float4 vmathSoaP3Projection( const VmathSoaPoint3 *pnt, const VmathSoaVector3 *unitVec )
1145 result = vec_madd( pnt->x, unitVec->x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
1146 result = vec_add( result, vec_madd( pnt->y, unitVec->y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
1147 result = vec_add( result, vec_madd( pnt->z, unitVec->z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
1151 static inline vec_float4 vmathSoaP3DistSqrFromOrigin( const VmathSoaPoint3 *pnt )
1153 VmathSoaVector3 tmpV3_0;
1154 vmathSoaV3MakeFromP3( &tmpV3_0, pnt );
1155 return vmathSoaV3LengthSqr( &tmpV3_0 );
1158 static inline vec_float4 vmathSoaP3DistFromOrigin( const VmathSoaPoint3 *pnt )
1160 VmathSoaVector3 tmpV3_0;
1161 vmathSoaV3MakeFromP3( &tmpV3_0, pnt );
1162 return vmathSoaV3Length( &tmpV3_0 );
1165 static inline vec_float4 vmathSoaP3DistSqr( const VmathSoaPoint3 *pnt0, const VmathSoaPoint3 *pnt1 )
1167 VmathSoaVector3 tmpV3_0;
1168 vmathSoaP3Sub( &tmpV3_0, pnt1, pnt0 );
1169 return vmathSoaV3LengthSqr( &tmpV3_0 );
1172 static inline vec_float4 vmathSoaP3Dist( const VmathSoaPoint3 *pnt0, const VmathSoaPoint3 *pnt1 )
1174 VmathSoaVector3 tmpV3_0;
1175 vmathSoaP3Sub( &tmpV3_0, pnt1, pnt0 );
1176 return vmathSoaV3Length( &tmpV3_0 );
1179 static inline void vmathSoaP3Select( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt0, const VmathSoaPoint3 *pnt1, vec_uint4 select1 )
1181 result->x = vec_sel( pnt0->x, pnt1->x, select1 );
1182 result->y = vec_sel( pnt0->y, pnt1->y, select1 );
1183 result->z = vec_sel( pnt0->z, pnt1->z, select1 );
1186 #ifdef _VECTORMATH_DEBUG
1188 static inline void vmathSoaP3Print( const VmathSoaPoint3 *pnt )
1190 VmathPoint3 vec0, vec1, vec2, vec3;
1191 vmathSoaP3Get4Aos( pnt, &vec0, &vec1, &vec2, &vec3 );
1192 printf("slot 0:\n");
1193 vmathP3Print( &vec0 );
1194 printf("slot 1:\n");
1195 vmathP3Print( &vec1 );
1196 printf("slot 2:\n");
1197 vmathP3Print( &vec2 );
1198 printf("slot 3:\n");
1199 vmathP3Print( &vec3 );
1202 static inline void vmathSoaP3Prints( const VmathSoaPoint3 *pnt, const char *name )
1204 VmathPoint3 vec0, vec1, vec2, vec3;
1205 printf( "%s:\n", name );
1206 vmathSoaP3Get4Aos( pnt, &vec0, &vec1, &vec2, &vec3 );
1207 printf("slot 0:\n");
1208 vmathP3Print( &vec0 );
1209 printf("slot 1:\n");
1210 vmathP3Print( &vec1 );
1211 printf("slot 2:\n");
1212 vmathP3Print( &vec2 );
1213 printf("slot 3:\n");
1214 vmathP3Print( &vec3 );
1221 #endif /* __cplusplus */