2 Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
5 Redistribution and use in source and binary forms,
6 with or without modification, are permitted provided that the
7 following conditions are met:
8 * Redistributions of source code must retain the above copyright
9 notice, this list of conditions and the following disclaimer.
10 * Redistributions in binary form must reproduce the above copyright
11 notice, this list of conditions and the following disclaimer in the
12 documentation and/or other materials provided with the distribution.
13 * Neither the name of the Sony Computer Entertainment Inc nor the names
14 of its contributors may be used to endorse or promote products derived
15 from this software without specific prior written permission.
17 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
18 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
21 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
25 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
26 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27 POSSIBILITY OF SUCH DAMAGE.
30 #ifndef _VECTORMATH_VEC_AOS_CPP_H
31 #define _VECTORMATH_VEC_AOS_CPP_H
32 //-----------------------------------------------------------------------------
34 // for shuffles, words are labeled [x,y,z,w] [a,b,c,d]
36 #define _VECTORMATH_SHUF_X 0x00010203
37 #define _VECTORMATH_SHUF_Y 0x04050607
38 #define _VECTORMATH_SHUF_Z 0x08090a0b
39 #define _VECTORMATH_SHUF_W 0x0c0d0e0f
40 #define _VECTORMATH_SHUF_A 0x10111213
41 #define _VECTORMATH_SHUF_B 0x14151617
42 #define _VECTORMATH_SHUF_C 0x18191a1b
43 #define _VECTORMATH_SHUF_D 0x1c1d1e1f
44 #define _VECTORMATH_SHUF_0 0x80808080
45 #define _VECTORMATH_SHUF_XYZA (vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_A }
46 #define _VECTORMATH_SHUF_ZXYW (vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_W }
47 #define _VECTORMATH_SHUF_YZXW (vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_W }
48 #define _VECTORMATH_SHUF_WABC (vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_W, _VECTORMATH_SHUF_A, _VECTORMATH_SHUF_B, _VECTORMATH_SHUF_C }
49 #define _VECTORMATH_SHUF_ZWAB (vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_W, _VECTORMATH_SHUF_A, _VECTORMATH_SHUF_B }
50 #define _VECTORMATH_SHUF_XYZA (vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_A }
51 #define _VECTORMATH_SHUF_YZAB (vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_A, _VECTORMATH_SHUF_B }
52 #define _VECTORMATH_SHUF_ZABC (vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_A, _VECTORMATH_SHUF_B, _VECTORMATH_SHUF_C }
53 #define _VECTORMATH_UNIT_1000 (vec_float4){ 1.0f, 0.0f, 0.0f, 0.0f }
54 #define _VECTORMATH_UNIT_0100 (vec_float4){ 0.0f, 1.0f, 0.0f, 0.0f }
55 #define _VECTORMATH_UNIT_0010 (vec_float4){ 0.0f, 0.0f, 1.0f, 0.0f }
56 #define _VECTORMATH_UNIT_0001 (vec_float4){ 0.0f, 0.0f, 0.0f, 1.0f }
57 #define _VECTORMATH_SLERP_TOL 0.999f
59 //-----------------------------------------------------------------------------
62 #ifndef _VECTORMATH_INTERNAL_FUNCTIONS
63 #define _VECTORMATH_INTERNAL_FUNCTIONS
65 static inline vec_float4 _vmathVfDot3( vec_float4 vec0, vec_float4 vec1 )
68 result = spu_mul( vec0, vec1 );
69 result = spu_madd( spu_rlqwbyte( vec0, 4 ), spu_rlqwbyte( vec1, 4 ), result );
70 return spu_madd( spu_rlqwbyte( vec0, 8 ), spu_rlqwbyte( vec1, 8 ), result );
73 static inline vec_float4 _vmathVfDot4( vec_float4 vec0, vec_float4 vec1 )
76 result = spu_mul( vec0, vec1 );
77 result = spu_madd( spu_rlqwbyte( vec0, 4 ), spu_rlqwbyte( vec1, 4 ), result );
78 return spu_add( spu_rlqwbyte( result, 8 ), result );
81 static inline vec_float4 _vmathVfCross( vec_float4 vec0, vec_float4 vec1 )
83 vec_float4 tmp0, tmp1, tmp2, tmp3, result;
84 tmp0 = spu_shuffle( vec0, vec0, _VECTORMATH_SHUF_YZXW );
85 tmp1 = spu_shuffle( vec1, vec1, _VECTORMATH_SHUF_ZXYW );
86 tmp2 = spu_shuffle( vec0, vec0, _VECTORMATH_SHUF_ZXYW );
87 tmp3 = spu_shuffle( vec1, vec1, _VECTORMATH_SHUF_YZXW );
88 result = spu_mul( tmp0, tmp1 );
89 result = spu_nmsub( tmp2, tmp3, result );
93 static inline vec_uint4 _vmathVfToHalfFloatsUnpacked(vec_float4 v)
96 vec_uint4 mant, sign, hfloat;
97 vec_uint4 notZero, isInf;
98 const vec_uint4 hfloatInf = spu_splats(0x00007c00u);
99 const vec_uint4 mergeMant = spu_splats(0x000003ffu);
100 const vec_uint4 mergeSign = spu_splats(0x00008000u);
102 sign = spu_rlmask((vec_uint4)v, -16);
103 mant = spu_rlmask((vec_uint4)v, -13);
104 bexp = spu_and(spu_rlmask((vec_int4)v, -23), 0xff);
106 notZero = spu_cmpgt(bexp, 112);
107 isInf = spu_cmpgt(bexp, 142);
109 bexp = spu_add(bexp, -112);
110 bexp = spu_sl(bexp, 10);
112 hfloat = spu_sel((vec_uint4)bexp, mant, mergeMant);
113 hfloat = spu_sel(spu_splats(0u), hfloat, notZero);
114 hfloat = spu_sel(hfloat, hfloatInf, isInf);
115 hfloat = spu_sel(hfloat, sign, mergeSign);
120 static inline vec_ushort8 _vmath2VfToHalfFloats(vec_float4 u, vec_float4 v)
122 vec_uint4 hfloat_u, hfloat_v;
123 const vec_uchar16 pack = (vec_uchar16){2,3,6,7,10,11,14,15,18,19,22,23,26,27,30,31};
124 hfloat_u = _vmathVfToHalfFloatsUnpacked(u);
125 hfloat_v = _vmathVfToHalfFloatsUnpacked(v);
126 return (vec_ushort8)spu_shuffle(hfloat_u, hfloat_v, pack);
131 namespace Vectormath {
134 inline VecIdx::operator float() const
136 return spu_extract( ref, i );
139 inline float VecIdx::operator =( float scalar )
141 ref = spu_insert( scalar, ref, i );
145 inline float VecIdx::operator =( const VecIdx& scalar )
147 return *this = float(scalar);
150 inline float VecIdx::operator *=( float scalar )
152 float tmp = spu_extract( ref, i ) * scalar;
153 ref = spu_insert( tmp, ref, i );
157 inline float VecIdx::operator /=( float scalar )
159 float tmp = spu_extract( ref, i ) / scalar;
160 ref = spu_insert( tmp, ref, i );
164 inline float VecIdx::operator +=( float scalar )
166 float tmp = spu_extract( ref, i ) + scalar;
167 ref = spu_insert( tmp, ref, i );
171 inline float VecIdx::operator -=( float scalar )
173 float tmp = spu_extract( ref, i ) - scalar;
174 ref = spu_insert( tmp, ref, i );
178 inline Vector3::Vector3( float _x, float _y, float _z )
180 mVec128 = (vec_float4){ _x, _y, _z, 0.0f };
183 inline Vector3::Vector3( Point3 pnt )
185 mVec128 = pnt.get128();
188 inline Vector3::Vector3( float scalar )
190 mVec128 = spu_splats( scalar );
193 inline Vector3::Vector3( vec_float4 vf4 )
198 inline const Vector3 Vector3::xAxis( )
200 return Vector3( _VECTORMATH_UNIT_1000 );
203 inline const Vector3 Vector3::yAxis( )
205 return Vector3( _VECTORMATH_UNIT_0100 );
208 inline const Vector3 Vector3::zAxis( )
210 return Vector3( _VECTORMATH_UNIT_0010 );
213 inline const Vector3 lerp( float t, Vector3 vec0, Vector3 vec1 )
215 return ( vec0 + ( ( vec1 - vec0 ) * t ) );
218 inline const Vector3 slerp( float t, Vector3 unitVec0, Vector3 unitVec1 )
220 vec_float4 scales, scale0, scale1, cosAngle, angle, tttt, oneMinusT, angles, sines;
221 vec_uint4 selectMask;
222 vec_uchar16 shuffle_xxxx = (vec_uchar16)spu_splats((int)0x00010203);
223 vec_uchar16 shuffle_yyyy = (vec_uchar16)spu_splats((int)0x04050607);
224 vec_uchar16 shuffle_zzzz = (vec_uchar16)spu_splats((int)0x08090a0b);
225 cosAngle = _vmathVfDot3( unitVec0.get128(), unitVec1.get128() );
226 cosAngle = spu_shuffle( cosAngle, cosAngle, shuffle_xxxx );
227 selectMask = (vec_uint4)spu_cmpgt( spu_splats(_VECTORMATH_SLERP_TOL), cosAngle );
228 angle = acosf4( cosAngle );
229 tttt = spu_splats(t);
230 oneMinusT = spu_sub( spu_splats(1.0f), tttt );
231 angles = spu_sel( spu_splats(1.0f), oneMinusT, (vec_uint4)spu_maskb(0x0f00) );
232 angles = spu_sel( angles, tttt, (vec_uint4)spu_maskb(0x00f0) );
233 angles = spu_mul( angles, angle );
234 sines = sinf4( angles );
235 scales = divf4( sines, spu_shuffle( sines, sines, shuffle_xxxx ) );
236 scale0 = spu_sel( oneMinusT, spu_shuffle( scales, scales, shuffle_yyyy ), selectMask );
237 scale1 = spu_sel( tttt, spu_shuffle( scales, scales, shuffle_zzzz ), selectMask );
238 return Vector3( spu_madd( unitVec0.get128(), scale0, spu_mul( unitVec1.get128(), scale1 ) ) );
241 inline vec_float4 Vector3::get128( ) const
246 inline void storeXYZ( Vector3 vec, vec_float4 * quad )
248 vec_float4 dstVec = *quad;
249 vec_uint4 mask = (vec_uint4)spu_maskb(0x000f);
250 dstVec = spu_sel(vec.get128(), dstVec, mask);
254 inline void loadXYZArray( Vector3 & vec0, Vector3 & vec1, Vector3 & vec2, Vector3 & vec3, const vec_float4 * threeQuads )
256 vec_float4 xyzx, yzxy, zxyz, xyz1, xyz2, xyz3;
257 xyzx = threeQuads[0];
258 yzxy = threeQuads[1];
259 zxyz = threeQuads[2];
260 xyz1 = spu_shuffle( xyzx, yzxy, _VECTORMATH_SHUF_WABC );
261 xyz2 = spu_shuffle( yzxy, zxyz, _VECTORMATH_SHUF_ZWAB );
262 xyz3 = spu_rlqwbyte( zxyz, 4 );
263 vec0 = Vector3( xyzx );
264 vec1 = Vector3( xyz1 );
265 vec2 = Vector3( xyz2 );
266 vec3 = Vector3( xyz3 );
269 inline void storeXYZArray( Vector3 vec0, Vector3 vec1, Vector3 vec2, Vector3 vec3, vec_float4 * threeQuads )
271 vec_float4 xyzx, yzxy, zxyz;
272 xyzx = spu_shuffle( vec0.get128(), vec1.get128(), _VECTORMATH_SHUF_XYZA );
273 yzxy = spu_shuffle( vec1.get128(), vec2.get128(), _VECTORMATH_SHUF_YZAB );
274 zxyz = spu_shuffle( vec2.get128(), vec3.get128(), _VECTORMATH_SHUF_ZABC );
275 threeQuads[0] = xyzx;
276 threeQuads[1] = yzxy;
277 threeQuads[2] = zxyz;
280 inline void storeHalfFloats( Vector3 vec0, Vector3 vec1, Vector3 vec2, Vector3 vec3, Vector3 vec4, Vector3 vec5, Vector3 vec6, Vector3 vec7, vec_ushort8 * threeQuads )
284 storeXYZArray( vec0, vec1, vec2, vec3, xyz0 );
285 storeXYZArray( vec4, vec5, vec6, vec7, xyz1 );
286 threeQuads[0] = _vmath2VfToHalfFloats(xyz0[0], xyz0[1]);
287 threeQuads[1] = _vmath2VfToHalfFloats(xyz0[2], xyz1[0]);
288 threeQuads[2] = _vmath2VfToHalfFloats(xyz1[1], xyz1[2]);
291 inline Vector3 & Vector3::operator =( Vector3 vec )
293 mVec128 = vec.mVec128;
297 inline Vector3 & Vector3::setX( float _x )
299 mVec128 = spu_insert( _x, mVec128, 0 );
303 inline float Vector3::getX( ) const
305 return spu_extract( mVec128, 0 );
308 inline Vector3 & Vector3::setY( float _y )
310 mVec128 = spu_insert( _y, mVec128, 1 );
314 inline float Vector3::getY( ) const
316 return spu_extract( mVec128, 1 );
319 inline Vector3 & Vector3::setZ( float _z )
321 mVec128 = spu_insert( _z, mVec128, 2 );
325 inline float Vector3::getZ( ) const
327 return spu_extract( mVec128, 2 );
330 inline Vector3 & Vector3::setElem( int idx, float value )
332 mVec128 = spu_insert( value, mVec128, idx );
336 inline float Vector3::getElem( int idx ) const
338 return spu_extract( mVec128, idx );
341 inline VecIdx Vector3::operator []( int idx )
343 return VecIdx( mVec128, idx );
346 inline float Vector3::operator []( int idx ) const
348 return spu_extract( mVec128, idx );
351 inline const Vector3 Vector3::operator +( Vector3 vec ) const
353 return Vector3( spu_add( mVec128, vec.mVec128 ) );
356 inline const Vector3 Vector3::operator -( Vector3 vec ) const
358 return Vector3( spu_sub( mVec128, vec.mVec128 ) );
361 inline const Point3 Vector3::operator +( Point3 pnt ) const
363 return Point3( spu_add( mVec128, pnt.get128() ) );
366 inline const Vector3 Vector3::operator *( float scalar ) const
368 return Vector3( spu_mul( mVec128, spu_splats(scalar) ) );
371 inline Vector3 & Vector3::operator +=( Vector3 vec )
377 inline Vector3 & Vector3::operator -=( Vector3 vec )
383 inline Vector3 & Vector3::operator *=( float scalar )
385 *this = *this * scalar;
389 inline const Vector3 Vector3::operator /( float scalar ) const
391 return Vector3( divf4( mVec128, spu_splats(scalar) ) );
394 inline Vector3 & Vector3::operator /=( float scalar )
396 *this = *this / scalar;
400 inline const Vector3 Vector3::operator -( ) const
402 return Vector3( negatef4( mVec128 ) );
405 inline const Vector3 operator *( float scalar, Vector3 vec )
410 inline const Vector3 mulPerElem( Vector3 vec0, Vector3 vec1 )
412 return Vector3( spu_mul( vec0.get128(), vec1.get128() ) );
415 inline const Vector3 divPerElem( Vector3 vec0, Vector3 vec1 )
417 return Vector3( divf4( vec0.get128(), vec1.get128() ) );
420 inline const Vector3 recipPerElem( Vector3 vec )
422 return Vector3( recipf4( vec.get128() ) );
425 inline const Vector3 sqrtPerElem( Vector3 vec )
427 return Vector3( sqrtf4( vec.get128() ) );
430 inline const Vector3 rsqrtPerElem( Vector3 vec )
432 return Vector3( rsqrtf4( vec.get128() ) );
435 inline const Vector3 absPerElem( Vector3 vec )
437 return Vector3( fabsf4( vec.get128() ) );
440 inline const Vector3 copySignPerElem( Vector3 vec0, Vector3 vec1 )
442 return Vector3( copysignf4( vec0.get128(), vec1.get128() ) );
445 inline const Vector3 maxPerElem( Vector3 vec0, Vector3 vec1 )
447 return Vector3( fmaxf4( vec0.get128(), vec1.get128() ) );
450 inline float maxElem( Vector3 vec )
453 result = fmaxf4( spu_promote( spu_extract( vec.get128(), 1 ), 0 ), vec.get128() );
454 result = fmaxf4( spu_promote( spu_extract( vec.get128(), 2 ), 0 ), result );
455 return spu_extract( result, 0 );
458 inline const Vector3 minPerElem( Vector3 vec0, Vector3 vec1 )
460 return Vector3( fminf4( vec0.get128(), vec1.get128() ) );
463 inline float minElem( Vector3 vec )
466 result = fminf4( spu_promote( spu_extract( vec.get128(), 1 ), 0 ), vec.get128() );
467 result = fminf4( spu_promote( spu_extract( vec.get128(), 2 ), 0 ), result );
468 return spu_extract( result, 0 );
471 inline float sum( Vector3 vec )
474 spu_extract( vec.get128(), 0 ) +
475 spu_extract( vec.get128(), 1 ) +
476 spu_extract( vec.get128(), 2 );
479 inline float dot( Vector3 vec0, Vector3 vec1 )
481 return spu_extract( _vmathVfDot3( vec0.get128(), vec1.get128() ), 0 );
484 inline float lengthSqr( Vector3 vec )
486 return spu_extract( _vmathVfDot3( vec.get128(), vec.get128() ), 0 );
489 inline float length( Vector3 vec )
491 return sqrtf( lengthSqr( vec ) );
494 inline const Vector3 normalize( Vector3 vec )
496 vec_float4 dot = _vmathVfDot3( vec.get128(), vec.get128() );
497 dot = spu_shuffle( dot, dot, (vec_uchar16)spu_splats(0x00010203) );
498 return Vector3( spu_mul( vec.get128(), rsqrtf4( dot ) ) );
501 inline const Vector3 cross( Vector3 vec0, Vector3 vec1 )
503 return Vector3( _vmathVfCross( vec0.get128(), vec1.get128() ) );
506 inline const Vector3 select( Vector3 vec0, Vector3 vec1, bool select1 )
508 return Vector3( spu_sel( vec0.get128(), vec1.get128(), spu_splats( (unsigned int)-(select1 > 0) ) ) );
511 #ifdef _VECTORMATH_DEBUG
513 inline void print( Vector3 vec )
515 union { vec_float4 v; float s[4]; } tmp;
516 tmp.v = vec.get128();
517 printf( "( %f %f %f )\n", tmp.s[0], tmp.s[1], tmp.s[2] );
520 inline void print( Vector3 vec, const char * name )
522 union { vec_float4 v; float s[4]; } tmp;
523 tmp.v = vec.get128();
524 printf( "%s: ( %f %f %f )\n", name, tmp.s[0], tmp.s[1], tmp.s[2] );
529 inline Vector4::Vector4( float _x, float _y, float _z, float _w )
531 mVec128 = (vec_float4){ _x, _y, _z, _w };
534 inline Vector4::Vector4( Vector3 xyz, float _w )
536 mVec128 = spu_shuffle( xyz.get128(), spu_promote( _w, 0 ), _VECTORMATH_SHUF_XYZA );
539 inline Vector4::Vector4( Vector3 vec )
541 mVec128 = spu_sel( vec.get128(), spu_splats(0.0f), (vec_uint4)spu_maskb(0x000f) );
544 inline Vector4::Vector4( Point3 pnt )
546 mVec128 = spu_sel( pnt.get128(), spu_splats(1.0f), (vec_uint4)spu_maskb(0x000f) );
549 inline Vector4::Vector4( Quat quat )
551 mVec128 = quat.get128();
554 inline Vector4::Vector4( float scalar )
556 mVec128 = spu_splats( scalar );
559 inline Vector4::Vector4( vec_float4 vf4 )
564 inline const Vector4 Vector4::xAxis( )
566 return Vector4( _VECTORMATH_UNIT_1000 );
569 inline const Vector4 Vector4::yAxis( )
571 return Vector4( _VECTORMATH_UNIT_0100 );
574 inline const Vector4 Vector4::zAxis( )
576 return Vector4( _VECTORMATH_UNIT_0010 );
579 inline const Vector4 Vector4::wAxis( )
581 return Vector4( _VECTORMATH_UNIT_0001 );
584 inline const Vector4 lerp( float t, Vector4 vec0, Vector4 vec1 )
586 return ( vec0 + ( ( vec1 - vec0 ) * t ) );
589 inline const Vector4 slerp( float t, Vector4 unitVec0, Vector4 unitVec1 )
591 vec_float4 scales, scale0, scale1, cosAngle, angle, tttt, oneMinusT, angles, sines;
592 vec_uint4 selectMask;
593 vec_uchar16 shuffle_xxxx = (vec_uchar16)spu_splats((int)0x00010203);
594 vec_uchar16 shuffle_yyyy = (vec_uchar16)spu_splats((int)0x04050607);
595 vec_uchar16 shuffle_zzzz = (vec_uchar16)spu_splats((int)0x08090a0b);
596 cosAngle = _vmathVfDot4( unitVec0.get128(), unitVec1.get128() );
597 cosAngle = spu_shuffle( cosAngle, cosAngle, shuffle_xxxx );
598 selectMask = (vec_uint4)spu_cmpgt( spu_splats(_VECTORMATH_SLERP_TOL), cosAngle );
599 angle = acosf4( cosAngle );
600 tttt = spu_splats(t);
601 oneMinusT = spu_sub( spu_splats(1.0f), tttt );
602 angles = spu_sel( spu_splats(1.0f), oneMinusT, (vec_uint4)spu_maskb(0x0f00) );
603 angles = spu_sel( angles, tttt, (vec_uint4)spu_maskb(0x00f0) );
604 angles = spu_mul( angles, angle );
605 sines = sinf4( angles );
606 scales = divf4( sines, spu_shuffle( sines, sines, shuffle_xxxx ) );
607 scale0 = spu_sel( oneMinusT, spu_shuffle( scales, scales, shuffle_yyyy ), selectMask );
608 scale1 = spu_sel( tttt, spu_shuffle( scales, scales, shuffle_zzzz ), selectMask );
609 return Vector4( spu_madd( unitVec0.get128(), scale0, spu_mul( unitVec1.get128(), scale1 ) ) );
612 inline vec_float4 Vector4::get128( ) const
617 inline void storeHalfFloats( Vector4 vec0, Vector4 vec1, Vector4 vec2, Vector4 vec3, vec_ushort8 * twoQuads )
619 twoQuads[0] = _vmath2VfToHalfFloats(vec0.get128(), vec1.get128());
620 twoQuads[1] = _vmath2VfToHalfFloats(vec2.get128(), vec3.get128());
623 inline Vector4 & Vector4::operator =( Vector4 vec )
625 mVec128 = vec.mVec128;
629 inline Vector4 & Vector4::setXYZ( Vector3 vec )
631 mVec128 = spu_sel( vec.get128(), mVec128, (vec_uint4)spu_maskb(0x000f) );
635 inline const Vector3 Vector4::getXYZ( ) const
637 return Vector3( mVec128 );
640 inline Vector4 & Vector4::setX( float _x )
642 mVec128 = spu_insert( _x, mVec128, 0 );
646 inline float Vector4::getX( ) const
648 return spu_extract( mVec128, 0 );
651 inline Vector4 & Vector4::setY( float _y )
653 mVec128 = spu_insert( _y, mVec128, 1 );
657 inline float Vector4::getY( ) const
659 return spu_extract( mVec128, 1 );
662 inline Vector4 & Vector4::setZ( float _z )
664 mVec128 = spu_insert( _z, mVec128, 2 );
668 inline float Vector4::getZ( ) const
670 return spu_extract( mVec128, 2 );
673 inline Vector4 & Vector4::setW( float _w )
675 mVec128 = spu_insert( _w, mVec128, 3 );
679 inline float Vector4::getW( ) const
681 return spu_extract( mVec128, 3 );
684 inline Vector4 & Vector4::setElem( int idx, float value )
686 mVec128 = spu_insert( value, mVec128, idx );
690 inline float Vector4::getElem( int idx ) const
692 return spu_extract( mVec128, idx );
695 inline VecIdx Vector4::operator []( int idx )
697 return VecIdx( mVec128, idx );
700 inline float Vector4::operator []( int idx ) const
702 return spu_extract( mVec128, idx );
705 inline const Vector4 Vector4::operator +( Vector4 vec ) const
707 return Vector4( spu_add( mVec128, vec.mVec128 ) );
710 inline const Vector4 Vector4::operator -( Vector4 vec ) const
712 return Vector4( spu_sub( mVec128, vec.mVec128 ) );
715 inline const Vector4 Vector4::operator *( float scalar ) const
717 return Vector4( spu_mul( mVec128, spu_splats(scalar) ) );
720 inline Vector4 & Vector4::operator +=( Vector4 vec )
726 inline Vector4 & Vector4::operator -=( Vector4 vec )
732 inline Vector4 & Vector4::operator *=( float scalar )
734 *this = *this * scalar;
738 inline const Vector4 Vector4::operator /( float scalar ) const
740 return Vector4( divf4( mVec128, spu_splats(scalar) ) );
743 inline Vector4 & Vector4::operator /=( float scalar )
745 *this = *this / scalar;
749 inline const Vector4 Vector4::operator -( ) const
751 return Vector4( negatef4( mVec128 ) );
754 inline const Vector4 operator *( float scalar, Vector4 vec )
759 inline const Vector4 mulPerElem( Vector4 vec0, Vector4 vec1 )
761 return Vector4( spu_mul( vec0.get128(), vec1.get128() ) );
764 inline const Vector4 divPerElem( Vector4 vec0, Vector4 vec1 )
766 return Vector4( divf4( vec0.get128(), vec1.get128() ) );
769 inline const Vector4 recipPerElem( Vector4 vec )
771 return Vector4( recipf4( vec.get128() ) );
774 inline const Vector4 sqrtPerElem( Vector4 vec )
776 return Vector4( sqrtf4( vec.get128() ) );
779 inline const Vector4 rsqrtPerElem( Vector4 vec )
781 return Vector4( rsqrtf4( vec.get128() ) );
784 inline const Vector4 absPerElem( Vector4 vec )
786 return Vector4( fabsf4( vec.get128() ) );
789 inline const Vector4 copySignPerElem( Vector4 vec0, Vector4 vec1 )
791 return Vector4( copysignf4( vec0.get128(), vec1.get128() ) );
794 inline const Vector4 maxPerElem( Vector4 vec0, Vector4 vec1 )
796 return Vector4( fmaxf4( vec0.get128(), vec1.get128() ) );
799 inline float maxElem( Vector4 vec )
802 result = fmaxf4( spu_promote( spu_extract( vec.get128(), 1 ), 0 ), vec.get128() );
803 result = fmaxf4( spu_promote( spu_extract( vec.get128(), 2 ), 0 ), result );
804 result = fmaxf4( spu_promote( spu_extract( vec.get128(), 3 ), 0 ), result );
805 return spu_extract( result, 0 );
808 inline const Vector4 minPerElem( Vector4 vec0, Vector4 vec1 )
810 return Vector4( fminf4( vec0.get128(), vec1.get128() ) );
813 inline float minElem( Vector4 vec )
816 result = fminf4( spu_promote( spu_extract( vec.get128(), 1 ), 0 ), vec.get128() );
817 result = fminf4( spu_promote( spu_extract( vec.get128(), 2 ), 0 ), result );
818 result = fminf4( spu_promote( spu_extract( vec.get128(), 3 ), 0 ), result );
819 return spu_extract( result, 0 );
822 inline float sum( Vector4 vec )
825 spu_extract( vec.get128(), 0 ) +
826 spu_extract( vec.get128(), 1 ) +
827 spu_extract( vec.get128(), 2 ) +
828 spu_extract( vec.get128(), 3 );
831 inline float dot( Vector4 vec0, Vector4 vec1 )
833 return spu_extract( _vmathVfDot4( vec0.get128(), vec1.get128() ), 0 );
836 inline float lengthSqr( Vector4 vec )
838 return spu_extract( _vmathVfDot4( vec.get128(), vec.get128() ), 0 );
841 inline float length( Vector4 vec )
843 return sqrtf( lengthSqr( vec ) );
846 inline const Vector4 normalize( Vector4 vec )
848 vec_float4 dot = _vmathVfDot4( vec.get128(), vec.get128() );
849 return Vector4( spu_mul( vec.get128(), rsqrtf4( dot ) ) );
852 inline const Vector4 select( Vector4 vec0, Vector4 vec1, bool select1 )
854 return Vector4( spu_sel( vec0.get128(), vec1.get128(), spu_splats( (unsigned int)-(select1 > 0) ) ) );
857 #ifdef _VECTORMATH_DEBUG
859 inline void print( Vector4 vec )
861 union { vec_float4 v; float s[4]; } tmp;
862 tmp.v = vec.get128();
863 printf( "( %f %f %f %f )\n", tmp.s[0], tmp.s[1], tmp.s[2], tmp.s[3] );
866 inline void print( Vector4 vec, const char * name )
868 union { vec_float4 v; float s[4]; } tmp;
869 tmp.v = vec.get128();
870 printf( "%s: ( %f %f %f %f )\n", name, tmp.s[0], tmp.s[1], tmp.s[2], tmp.s[3] );
875 inline Point3::Point3( float _x, float _y, float _z )
877 mVec128 = (vec_float4){ _x, _y, _z, 0.0f };
880 inline Point3::Point3( Vector3 vec )
882 mVec128 = vec.get128();
885 inline Point3::Point3( float scalar )
887 mVec128 = spu_splats( scalar );
890 inline Point3::Point3( vec_float4 vf4 )
895 inline const Point3 lerp( float t, Point3 pnt0, Point3 pnt1 )
897 return ( pnt0 + ( ( pnt1 - pnt0 ) * t ) );
900 inline vec_float4 Point3::get128( ) const
905 inline void storeXYZ( Point3 pnt, vec_float4 * quad )
907 vec_float4 dstVec = *quad;
908 vec_uint4 mask = (vec_uint4)spu_maskb(0x000f);
909 dstVec = spu_sel(pnt.get128(), dstVec, mask);
913 inline void loadXYZArray( Point3 & pnt0, Point3 & pnt1, Point3 & pnt2, Point3 & pnt3, const vec_float4 * threeQuads )
915 vec_float4 xyzx, yzxy, zxyz, xyz1, xyz2, xyz3;
916 xyzx = threeQuads[0];
917 yzxy = threeQuads[1];
918 zxyz = threeQuads[2];
919 xyz1 = spu_shuffle( xyzx, yzxy, _VECTORMATH_SHUF_WABC );
920 xyz2 = spu_shuffle( yzxy, zxyz, _VECTORMATH_SHUF_ZWAB );
921 xyz3 = spu_rlqwbyte( zxyz, 4 );
922 pnt0 = Point3( xyzx );
923 pnt1 = Point3( xyz1 );
924 pnt2 = Point3( xyz2 );
925 pnt3 = Point3( xyz3 );
928 inline void storeXYZArray( Point3 pnt0, Point3 pnt1, Point3 pnt2, Point3 pnt3, vec_float4 * threeQuads )
930 vec_float4 xyzx, yzxy, zxyz;
931 xyzx = spu_shuffle( pnt0.get128(), pnt1.get128(), _VECTORMATH_SHUF_XYZA );
932 yzxy = spu_shuffle( pnt1.get128(), pnt2.get128(), _VECTORMATH_SHUF_YZAB );
933 zxyz = spu_shuffle( pnt2.get128(), pnt3.get128(), _VECTORMATH_SHUF_ZABC );
934 threeQuads[0] = xyzx;
935 threeQuads[1] = yzxy;
936 threeQuads[2] = zxyz;
939 inline void storeHalfFloats( Point3 pnt0, Point3 pnt1, Point3 pnt2, Point3 pnt3, Point3 pnt4, Point3 pnt5, Point3 pnt6, Point3 pnt7, vec_ushort8 * threeQuads )
943 storeXYZArray( pnt0, pnt1, pnt2, pnt3, xyz0 );
944 storeXYZArray( pnt4, pnt5, pnt6, pnt7, xyz1 );
945 threeQuads[0] = _vmath2VfToHalfFloats(xyz0[0], xyz0[1]);
946 threeQuads[1] = _vmath2VfToHalfFloats(xyz0[2], xyz1[0]);
947 threeQuads[2] = _vmath2VfToHalfFloats(xyz1[1], xyz1[2]);
950 inline Point3 & Point3::operator =( Point3 pnt )
952 mVec128 = pnt.mVec128;
956 inline Point3 & Point3::setX( float _x )
958 mVec128 = spu_insert( _x, mVec128, 0 );
962 inline float Point3::getX( ) const
964 return spu_extract( mVec128, 0 );
967 inline Point3 & Point3::setY( float _y )
969 mVec128 = spu_insert( _y, mVec128, 1 );
973 inline float Point3::getY( ) const
975 return spu_extract( mVec128, 1 );
978 inline Point3 & Point3::setZ( float _z )
980 mVec128 = spu_insert( _z, mVec128, 2 );
984 inline float Point3::getZ( ) const
986 return spu_extract( mVec128, 2 );
989 inline Point3 & Point3::setElem( int idx, float value )
991 mVec128 = spu_insert( value, mVec128, idx );
995 inline float Point3::getElem( int idx ) const
997 return spu_extract( mVec128, idx );
1000 inline VecIdx Point3::operator []( int idx )
1002 return VecIdx( mVec128, idx );
1005 inline float Point3::operator []( int idx ) const
1007 return spu_extract( mVec128, idx );
1010 inline const Vector3 Point3::operator -( Point3 pnt ) const
1012 return Vector3( spu_sub( mVec128, pnt.mVec128 ) );
1015 inline const Point3 Point3::operator +( Vector3 vec ) const
1017 return Point3( spu_add( mVec128, vec.get128() ) );
1020 inline const Point3 Point3::operator -( Vector3 vec ) const
1022 return Point3( spu_sub( mVec128, vec.get128() ) );
1025 inline Point3 & Point3::operator +=( Vector3 vec )
1027 *this = *this + vec;
1031 inline Point3 & Point3::operator -=( Vector3 vec )
1033 *this = *this - vec;
1037 inline const Point3 mulPerElem( Point3 pnt0, Point3 pnt1 )
1039 return Point3( spu_mul( pnt0.get128(), pnt1.get128() ) );
1042 inline const Point3 divPerElem( Point3 pnt0, Point3 pnt1 )
1044 return Point3( divf4( pnt0.get128(), pnt1.get128() ) );
1047 inline const Point3 recipPerElem( Point3 pnt )
1049 return Point3( recipf4( pnt.get128() ) );
1052 inline const Point3 sqrtPerElem( Point3 pnt )
1054 return Point3( sqrtf4( pnt.get128() ) );
1057 inline const Point3 rsqrtPerElem( Point3 pnt )
1059 return Point3( rsqrtf4( pnt.get128() ) );
1062 inline const Point3 absPerElem( Point3 pnt )
1064 return Point3( fabsf4( pnt.get128() ) );
1067 inline const Point3 copySignPerElem( Point3 pnt0, Point3 pnt1 )
1069 return Point3( copysignf4( pnt0.get128(), pnt1.get128() ) );
1072 inline const Point3 maxPerElem( Point3 pnt0, Point3 pnt1 )
1074 return Point3( fmaxf4( pnt0.get128(), pnt1.get128() ) );
1077 inline float maxElem( Point3 pnt )
1080 result = fmaxf4( spu_promote( spu_extract( pnt.get128(), 1 ), 0 ), pnt.get128() );
1081 result = fmaxf4( spu_promote( spu_extract( pnt.get128(), 2 ), 0 ), result );
1082 return spu_extract( result, 0 );
1085 inline const Point3 minPerElem( Point3 pnt0, Point3 pnt1 )
1087 return Point3( fminf4( pnt0.get128(), pnt1.get128() ) );
1090 inline float minElem( Point3 pnt )
1093 result = fminf4( spu_promote( spu_extract( pnt.get128(), 1 ), 0 ), pnt.get128() );
1094 result = fminf4( spu_promote( spu_extract( pnt.get128(), 2 ), 0 ), result );
1095 return spu_extract( result, 0 );
1098 inline float sum( Point3 pnt )
1101 spu_extract( pnt.get128(), 0 ) +
1102 spu_extract( pnt.get128(), 1 ) +
1103 spu_extract( pnt.get128(), 2 );
1106 inline const Point3 scale( Point3 pnt, float scaleVal )
1108 return mulPerElem( pnt, Point3( scaleVal ) );
1111 inline const Point3 scale( Point3 pnt, Vector3 scaleVec )
1113 return mulPerElem( pnt, Point3( scaleVec ) );
1116 inline float projection( Point3 pnt, Vector3 unitVec )
1118 return spu_extract( _vmathVfDot3( pnt.get128(), unitVec.get128() ), 0 );
1121 inline float distSqrFromOrigin( Point3 pnt )
1123 return lengthSqr( Vector3( pnt ) );
1126 inline float distFromOrigin( Point3 pnt )
1128 return length( Vector3( pnt ) );
1131 inline float distSqr( Point3 pnt0, Point3 pnt1 )
1133 return lengthSqr( ( pnt1 - pnt0 ) );
1136 inline float dist( Point3 pnt0, Point3 pnt1 )
1138 return length( ( pnt1 - pnt0 ) );
1141 inline const Point3 select( Point3 pnt0, Point3 pnt1, bool select1 )
1143 return Point3( spu_sel( pnt0.get128(), pnt1.get128(), spu_splats( (unsigned int)-(select1 > 0) ) ) );
1146 #ifdef _VECTORMATH_DEBUG
1148 inline void print( Point3 pnt )
1150 union { vec_float4 v; float s[4]; } tmp;
1151 tmp.v = pnt.get128();
1152 printf( "( %f %f %f )\n", tmp.s[0], tmp.s[1], tmp.s[2] );
1155 inline void print( Point3 pnt, const char * name )
1157 union { vec_float4 v; float s[4]; } tmp;
1158 tmp.v = pnt.get128();
1159 printf( "%s: ( %f %f %f )\n", name, tmp.s[0], tmp.s[1], tmp.s[2] );
1165 } // namespace Vectormath