2 Copyright (C) 2006, 2010 Sony Computer Entertainment Inc.
5 Redistribution and use in source and binary forms,
6 with or without modification, are permitted provided that the
7 following conditions are met:
8 * Redistributions of source code must retain the above copyright
9 notice, this list of conditions and the following disclaimer.
10 * Redistributions in binary form must reproduce the above copyright
11 notice, this list of conditions and the following disclaimer in the
12 documentation and/or other materials provided with the distribution.
13 * Neither the name of the Sony Computer Entertainment Inc nor the names
14 of its contributors may be used to endorse or promote products derived
15 from this software without specific prior written permission.
17 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
18 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
21 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
25 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
26 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27 POSSIBILITY OF SUCH DAMAGE.
30 #ifndef _VECTORMATH_VEC_AOS_CPP_H
31 #define _VECTORMATH_VEC_AOS_CPP_H
33 //-----------------------------------------------------------------------------
35 // for permutes words are labeled [x,y,z,w] [a,b,c,d]
37 #define _VECTORMATH_PERM_X 0x00010203
38 #define _VECTORMATH_PERM_Y 0x04050607
39 #define _VECTORMATH_PERM_Z 0x08090a0b
40 #define _VECTORMATH_PERM_W 0x0c0d0e0f
41 #define _VECTORMATH_PERM_A 0x10111213
42 #define _VECTORMATH_PERM_B 0x14151617
43 #define _VECTORMATH_PERM_C 0x18191a1b
44 #define _VECTORMATH_PERM_D 0x1c1d1e1f
45 #define _VECTORMATH_PERM_XYZA (vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_Z, _VECTORMATH_PERM_A }
46 #define _VECTORMATH_PERM_ZXYW (vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Z, _VECTORMATH_PERM_X, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_W }
47 #define _VECTORMATH_PERM_YZXW (vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Y, _VECTORMATH_PERM_Z, _VECTORMATH_PERM_X, _VECTORMATH_PERM_W }
48 #define _VECTORMATH_PERM_YZAB (vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Y, _VECTORMATH_PERM_Z, _VECTORMATH_PERM_A, _VECTORMATH_PERM_B }
49 #define _VECTORMATH_PERM_ZABC (vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Z, _VECTORMATH_PERM_A, _VECTORMATH_PERM_B, _VECTORMATH_PERM_C }
50 #define _VECTORMATH_PERM_XYAW (vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_A, _VECTORMATH_PERM_W }
51 #define _VECTORMATH_PERM_XAZW (vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_A, _VECTORMATH_PERM_Z, _VECTORMATH_PERM_W }
52 #define _VECTORMATH_MASK_0xF000 (vec_uint4){ 0xffffffff, 0, 0, 0 }
53 #define _VECTORMATH_MASK_0x0F00 (vec_uint4){ 0, 0xffffffff, 0, 0 }
54 #define _VECTORMATH_MASK_0x00F0 (vec_uint4){ 0, 0, 0xffffffff, 0 }
55 #define _VECTORMATH_MASK_0x000F (vec_uint4){ 0, 0, 0, 0xffffffff }
56 #define _VECTORMATH_UNIT_1000 _mm_setr_ps(1.0f,0.0f,0.0f,0.0f) // (__m128){ 1.0f, 0.0f, 0.0f, 0.0f }
57 #define _VECTORMATH_UNIT_0100 _mm_setr_ps(0.0f,1.0f,0.0f,0.0f) // (__m128){ 0.0f, 1.0f, 0.0f, 0.0f }
58 #define _VECTORMATH_UNIT_0010 _mm_setr_ps(0.0f,0.0f,1.0f,0.0f) // (__m128){ 0.0f, 0.0f, 1.0f, 0.0f }
59 #define _VECTORMATH_UNIT_0001 _mm_setr_ps(0.0f,0.0f,0.0f,1.0f) // (__m128){ 0.0f, 0.0f, 0.0f, 1.0f }
60 #define _VECTORMATH_SLERP_TOL 0.999f
61 //_VECTORMATH_SLERP_TOLF
63 //-----------------------------------------------------------------------------
66 #ifndef _VECTORMATH_INTERNAL_FUNCTIONS
67 #define _VECTORMATH_INTERNAL_FUNCTIONS
69 #define _vmath_shufps(a, b, immx, immy, immz, immw) _mm_shuffle_ps(a, b, _MM_SHUFFLE(immw, immz, immy, immx))
70 static __forceinline __m128 _vmathVfDot3( __m128 vec0, __m128 vec1 )
72 __m128 result = _mm_mul_ps( vec0, vec1);
73 return _mm_add_ps( vec_splat( result, 0 ), _mm_add_ps( vec_splat( result, 1 ), vec_splat( result, 2 ) ) );
76 static __forceinline __m128 _vmathVfDot4( __m128 vec0, __m128 vec1 )
78 __m128 result = _mm_mul_ps(vec0, vec1);
79 return _mm_add_ps(_mm_shuffle_ps(result, result, _MM_SHUFFLE(0,0,0,0)),
80 _mm_add_ps(_mm_shuffle_ps(result, result, _MM_SHUFFLE(1,1,1,1)),
81 _mm_add_ps(_mm_shuffle_ps(result, result, _MM_SHUFFLE(2,2,2,2)), _mm_shuffle_ps(result, result, _MM_SHUFFLE(3,3,3,3)))));
84 static __forceinline __m128 _vmathVfCross( __m128 vec0, __m128 vec1 )
86 __m128 tmp0, tmp1, tmp2, tmp3, result;
87 tmp0 = _mm_shuffle_ps( vec0, vec0, _MM_SHUFFLE(3,0,2,1) );
88 tmp1 = _mm_shuffle_ps( vec1, vec1, _MM_SHUFFLE(3,1,0,2) );
89 tmp2 = _mm_shuffle_ps( vec0, vec0, _MM_SHUFFLE(3,1,0,2) );
90 tmp3 = _mm_shuffle_ps( vec1, vec1, _MM_SHUFFLE(3,0,2,1) );
91 result = vec_mul( tmp0, tmp1 );
92 result = vec_nmsub( tmp2, tmp3, result );
96 static __forceinline vec_uint4 _vmathVfToHalfFloatsUnpacked(__m128 v)
100 vec_uint4 mant, sign, hfloat;
101 vec_uint4 notZero, isInf;
102 const vec_uint4 hfloatInf = (vec_uint4)(0x00007c00u);
103 const vec_uint4 mergeMant = (vec_uint4)(0x000003ffu);
104 const vec_uint4 mergeSign = (vec_uint4)(0x00008000u);
106 sign = vec_sr((vec_uint4)v, (vec_uint4)16);
107 mant = vec_sr((vec_uint4)v, (vec_uint4)13);
108 bexp = vec_and(vec_sr((vec_int4)v, (vec_uint4)23), (vec_int4)0xff);
110 notZero = (vec_uint4)vec_cmpgt(bexp, (vec_int4)112);
111 isInf = (vec_uint4)vec_cmpgt(bexp, (vec_int4)142);
113 bexp = _mm_add_ps(bexp, (vec_int4)-112);
114 bexp = vec_sl(bexp, (vec_uint4)10);
116 hfloat = vec_sel((vec_uint4)bexp, mant, mergeMant);
117 hfloat = vec_sel((vec_uint4)(0), hfloat, notZero);
118 hfloat = vec_sel(hfloat, hfloatInf, isInf);
119 hfloat = vec_sel(hfloat, sign, mergeSign);
124 return _mm_setzero_ps();
128 static __forceinline vec_ushort8 _vmath2VfToHalfFloats(__m128 u, __m128 v)
131 vec_uint4 hfloat_u, hfloat_v;
132 const vec_uchar16 pack = (vec_uchar16){2,3,6,7,10,11,14,15,18,19,22,23,26,27,30,31};
133 hfloat_u = _vmathVfToHalfFloatsUnpacked(u);
134 hfloat_v = _vmathVfToHalfFloatsUnpacked(v);
135 return (vec_ushort8)vec_perm(hfloat_u, hfloat_v, pack);
138 return _mm_setzero_si128();
143 static __forceinline __m128 _vmathVfInsert(__m128 dst, __m128 src, int slot)
149 d.f[slot] = s.f[slot];
153 #define _vmathVfSetElement(vec, scalar, slot) ((float *)&(vec))[slot] = scalar
155 static __forceinline __m128 _vmathVfSplatScalar(float scalar)
157 return _mm_set1_ps(scalar);
162 namespace Vectormath {
166 #ifdef _VECTORMATH_NO_SCALAR_CAST
167 __forceinline VecIdx::operator floatInVec() const
169 return floatInVec(ref, i);
172 __forceinline float VecIdx::getAsFloat() const
174 __forceinline VecIdx::operator float() const
177 return ((float *)&ref)[i];
180 __forceinline float VecIdx::operator =( float scalar )
182 _vmathVfSetElement(ref, scalar, i);
186 __forceinline floatInVec VecIdx::operator =( const floatInVec &scalar )
188 ref = _vmathVfInsert(ref, scalar.get128(), i);
192 __forceinline floatInVec VecIdx::operator =( const VecIdx& scalar )
194 return *this = floatInVec(scalar.ref, scalar.i);
197 __forceinline floatInVec VecIdx::operator *=( float scalar )
199 return *this *= floatInVec(scalar);
202 __forceinline floatInVec VecIdx::operator *=( const floatInVec &scalar )
204 return *this = floatInVec(ref, i) * scalar;
207 __forceinline floatInVec VecIdx::operator /=( float scalar )
209 return *this /= floatInVec(scalar);
212 inline floatInVec VecIdx::operator /=( const floatInVec &scalar )
214 return *this = floatInVec(ref, i) / scalar;
217 __forceinline floatInVec VecIdx::operator +=( float scalar )
219 return *this += floatInVec(scalar);
222 __forceinline floatInVec VecIdx::operator +=( const floatInVec &scalar )
224 return *this = floatInVec(ref, i) + scalar;
227 __forceinline floatInVec VecIdx::operator -=( float scalar )
229 return *this -= floatInVec(scalar);
232 __forceinline floatInVec VecIdx::operator -=( const floatInVec &scalar )
234 return *this = floatInVec(ref, i) - scalar;
237 __forceinline Vector3::Vector3(const Vector3& vec)
239 set128(vec.get128());
242 __forceinline void Vector3::set128(vec_float4 vec)
248 __forceinline Vector3::Vector3( float _x, float _y, float _z )
250 mVec128 = _mm_setr_ps(_x, _y, _z, 0.0f);
253 __forceinline Vector3::Vector3( const floatInVec &_x, const floatInVec &_y, const floatInVec &_z )
255 __m128 xz = _mm_unpacklo_ps( _x.get128(), _z.get128() );
256 mVec128 = _mm_unpacklo_ps( xz, _y.get128() );
259 __forceinline Vector3::Vector3( const Point3 &pnt )
261 mVec128 = pnt.get128();
264 __forceinline Vector3::Vector3( float scalar )
266 mVec128 = floatInVec(scalar).get128();
269 __forceinline Vector3::Vector3( const floatInVec &scalar )
271 mVec128 = scalar.get128();
274 __forceinline Vector3::Vector3( __m128 vf4 )
279 __forceinline const Vector3 Vector3::xAxis( )
281 return Vector3( _VECTORMATH_UNIT_1000 );
284 __forceinline const Vector3 Vector3::yAxis( )
286 return Vector3( _VECTORMATH_UNIT_0100 );
289 __forceinline const Vector3 Vector3::zAxis( )
291 return Vector3( _VECTORMATH_UNIT_0010 );
294 __forceinline const Vector3 lerp( float t, const Vector3 &vec0, const Vector3 &vec1 )
296 return lerp( floatInVec(t), vec0, vec1 );
299 __forceinline const Vector3 lerp( const floatInVec &t, const Vector3 &vec0, const Vector3 &vec1 )
301 return ( vec0 + ( ( vec1 - vec0 ) * t ) );
304 __forceinline const Vector3 slerp( float t, const Vector3 &unitVec0, const Vector3 &unitVec1 )
306 return slerp( floatInVec(t), unitVec0, unitVec1 );
309 __forceinline const Vector3 slerp( const floatInVec &t, const Vector3 &unitVec0, const Vector3 &unitVec1 )
311 __m128 scales, scale0, scale1, cosAngle, angle, tttt, oneMinusT, angles, sines;
312 cosAngle = _vmathVfDot3( unitVec0.get128(), unitVec1.get128() );
313 __m128 selectMask = _mm_cmpgt_ps( _mm_set1_ps(_VECTORMATH_SLERP_TOL), cosAngle );
314 angle = acosf4( cosAngle );
316 oneMinusT = _mm_sub_ps( _mm_set1_ps(1.0f), tttt );
317 angles = _mm_unpacklo_ps( _mm_set1_ps(1.0f), tttt ); // angles = 1, t, 1, t
318 angles = _mm_unpacklo_ps( angles, oneMinusT ); // angles = 1, 1-t, t, 1-t
319 angles = _mm_mul_ps( angles, angle );
320 sines = sinf4( angles );
321 scales = _mm_div_ps( sines, vec_splat( sines, 0 ) );
322 scale0 = vec_sel( oneMinusT, vec_splat( scales, 1 ), selectMask );
323 scale1 = vec_sel( tttt, vec_splat( scales, 2 ), selectMask );
324 return Vector3( vec_madd( unitVec0.get128(), scale0, _mm_mul_ps( unitVec1.get128(), scale1 ) ) );
327 __forceinline __m128 Vector3::get128( ) const
332 __forceinline void loadXYZ(Vector3& vec, const float* fptr)
334 #ifdef USE_SSE2_LDDQU
335 vec = Vector3( SSEFloat(_mm_lddqu_si128((const __m128i*)((float*)(fptr)))).m128 );
342 vec = Vector3( fl.m128);
343 #endif //USE_SSE2_LDDQU
347 __forceinline void storeXYZ( const Vector3 &vec, __m128 * quad )
349 __m128 dstVec = *quad;
350 __declspec(align(16)) unsigned int sw[4] = {0, 0, 0, 0xffffffff}; // TODO: Centralize
351 dstVec = vec_sel(vec.get128(), dstVec, sw);
355 __forceinline void storeXYZ(const Vector3& vec, float* fptr)
357 fptr[0] = vec.getX();
358 fptr[1] = vec.getY();
359 fptr[2] = vec.getZ();
363 __forceinline void loadXYZArray( Vector3 & vec0, Vector3 & vec1, Vector3 & vec2, Vector3 & vec3, const __m128 * threeQuads )
365 const float *quads = (float *)threeQuads;
366 vec0 = Vector3( _mm_load_ps(quads) );
367 vec1 = Vector3( _mm_loadu_ps(quads + 3) );
368 vec2 = Vector3( _mm_loadu_ps(quads + 6) );
369 vec3 = Vector3( _mm_loadu_ps(quads + 9) );
372 __forceinline void storeXYZArray( const Vector3 &vec0, const Vector3 &vec1, const Vector3 &vec2, const Vector3 &vec3, __m128 * threeQuads )
374 __m128 xxxx = _mm_shuffle_ps( vec1.get128(), vec1.get128(), _MM_SHUFFLE(0, 0, 0, 0) );
375 __m128 zzzz = _mm_shuffle_ps( vec2.get128(), vec2.get128(), _MM_SHUFFLE(2, 2, 2, 2) );
376 __declspec(align(16)) unsigned int xsw[4] = {0, 0, 0, 0xffffffff};
377 __declspec(align(16)) unsigned int zsw[4] = {0xffffffff, 0, 0, 0};
378 threeQuads[0] = vec_sel( vec0.get128(), xxxx, xsw );
379 threeQuads[1] = _mm_shuffle_ps( vec1.get128(), vec2.get128(), _MM_SHUFFLE(1, 0, 2, 1) );
380 threeQuads[2] = vec_sel( _mm_shuffle_ps( vec3.get128(), vec3.get128(), _MM_SHUFFLE(2, 1, 0, 3) ), zzzz, zsw );
383 __forceinline void storeHalfFloats( const Vector3 &vec0, const Vector3 &vec1, const Vector3 &vec2, const Vector3 &vec3, const Vector3 &vec4, const Vector3 &vec5, const Vector3 &vec6, const Vector3 &vec7, vec_ushort8 * threeQuads )
389 storeXYZArray( vec0, vec1, vec2, vec3, xyz0 );
390 storeXYZArray( vec4, vec5, vec6, vec7, xyz1 );
391 threeQuads[0] = _vmath2VfToHalfFloats(xyz0[0], xyz0[1]);
392 threeQuads[1] = _vmath2VfToHalfFloats(xyz0[2], xyz1[0]);
393 threeQuads[2] = _vmath2VfToHalfFloats(xyz1[1], xyz1[2]);
397 __forceinline Vector3 & Vector3::operator =( const Vector3 &vec )
399 mVec128 = vec.mVec128;
403 __forceinline Vector3 & Vector3::setX( float _x )
405 _vmathVfSetElement(mVec128, _x, 0);
409 __forceinline Vector3 & Vector3::setX( const floatInVec &_x )
411 mVec128 = _vmathVfInsert(mVec128, _x.get128(), 0);
415 __forceinline const floatInVec Vector3::getX( ) const
417 return floatInVec( mVec128, 0 );
420 __forceinline Vector3 & Vector3::setY( float _y )
422 _vmathVfSetElement(mVec128, _y, 1);
426 __forceinline Vector3 & Vector3::setY( const floatInVec &_y )
428 mVec128 = _vmathVfInsert(mVec128, _y.get128(), 1);
432 __forceinline const floatInVec Vector3::getY( ) const
434 return floatInVec( mVec128, 1 );
437 __forceinline Vector3 & Vector3::setZ( float _z )
439 _vmathVfSetElement(mVec128, _z, 2);
443 __forceinline Vector3 & Vector3::setZ( const floatInVec &_z )
445 mVec128 = _vmathVfInsert(mVec128, _z.get128(), 2);
449 __forceinline const floatInVec Vector3::getZ( ) const
451 return floatInVec( mVec128, 2 );
454 __forceinline Vector3 & Vector3::setElem( int idx, float value )
456 _vmathVfSetElement(mVec128, value, idx);
460 __forceinline Vector3 & Vector3::setElem( int idx, const floatInVec &value )
462 mVec128 = _vmathVfInsert(mVec128, value.get128(), idx);
466 __forceinline const floatInVec Vector3::getElem( int idx ) const
468 return floatInVec( mVec128, idx );
471 __forceinline VecIdx Vector3::operator []( int idx )
473 return VecIdx( mVec128, idx );
476 __forceinline const floatInVec Vector3::operator []( int idx ) const
478 return floatInVec( mVec128, idx );
481 __forceinline const Vector3 Vector3::operator +( const Vector3 &vec ) const
483 return Vector3( _mm_add_ps( mVec128, vec.mVec128 ) );
486 __forceinline const Vector3 Vector3::operator -( const Vector3 &vec ) const
488 return Vector3( _mm_sub_ps( mVec128, vec.mVec128 ) );
491 __forceinline const Point3 Vector3::operator +( const Point3 &pnt ) const
493 return Point3( _mm_add_ps( mVec128, pnt.get128() ) );
496 __forceinline const Vector3 Vector3::operator *( float scalar ) const
498 return *this * floatInVec(scalar);
501 __forceinline const Vector3 Vector3::operator *( const floatInVec &scalar ) const
503 return Vector3( _mm_mul_ps( mVec128, scalar.get128() ) );
506 __forceinline Vector3 & Vector3::operator +=( const Vector3 &vec )
512 __forceinline Vector3 & Vector3::operator -=( const Vector3 &vec )
518 __forceinline Vector3 & Vector3::operator *=( float scalar )
520 *this = *this * scalar;
524 __forceinline Vector3 & Vector3::operator *=( const floatInVec &scalar )
526 *this = *this * scalar;
530 __forceinline const Vector3 Vector3::operator /( float scalar ) const
532 return *this / floatInVec(scalar);
535 __forceinline const Vector3 Vector3::operator /( const floatInVec &scalar ) const
537 return Vector3( _mm_div_ps( mVec128, scalar.get128() ) );
540 __forceinline Vector3 & Vector3::operator /=( float scalar )
542 *this = *this / scalar;
546 __forceinline Vector3 & Vector3::operator /=( const floatInVec &scalar )
548 *this = *this / scalar;
552 __forceinline const Vector3 Vector3::operator -( ) const
554 //return Vector3(_mm_sub_ps( _mm_setzero_ps(), mVec128 ) );
556 __declspec(align(16)) static const int array[] = {0x80000000, 0x80000000, 0x80000000, 0x80000000};
557 __m128 NEG_MASK = SSEFloat(*(const vec_float4*)array).vf;
558 return Vector3(_mm_xor_ps(get128(),NEG_MASK));
561 __forceinline const Vector3 operator *( float scalar, const Vector3 &vec )
563 return floatInVec(scalar) * vec;
566 __forceinline const Vector3 operator *( const floatInVec &scalar, const Vector3 &vec )
571 __forceinline const Vector3 mulPerElem( const Vector3 &vec0, const Vector3 &vec1 )
573 return Vector3( _mm_mul_ps( vec0.get128(), vec1.get128() ) );
576 __forceinline const Vector3 divPerElem( const Vector3 &vec0, const Vector3 &vec1 )
578 return Vector3( _mm_div_ps( vec0.get128(), vec1.get128() ) );
581 __forceinline const Vector3 recipPerElem( const Vector3 &vec )
583 return Vector3( _mm_rcp_ps( vec.get128() ) );
586 __forceinline const Vector3 absPerElem( const Vector3 &vec )
588 return Vector3( fabsf4( vec.get128() ) );
591 __forceinline const Vector3 copySignPerElem( const Vector3 &vec0, const Vector3 &vec1 )
593 __m128 vmask = toM128(0x7fffffff);
594 return Vector3( _mm_or_ps(
595 _mm_and_ps ( vmask, vec0.get128() ), // Value
596 _mm_andnot_ps( vmask, vec1.get128() ) ) ); // Signs
599 __forceinline const Vector3 maxPerElem( const Vector3 &vec0, const Vector3 &vec1 )
601 return Vector3( _mm_max_ps( vec0.get128(), vec1.get128() ) );
604 __forceinline const floatInVec maxElem( const Vector3 &vec )
606 return floatInVec( _mm_max_ps( _mm_max_ps( vec_splat( vec.get128(), 0 ), vec_splat( vec.get128(), 1 ) ), vec_splat( vec.get128(), 2 ) ) );
609 __forceinline const Vector3 minPerElem( const Vector3 &vec0, const Vector3 &vec1 )
611 return Vector3( _mm_min_ps( vec0.get128(), vec1.get128() ) );
614 __forceinline const floatInVec minElem( const Vector3 &vec )
616 return floatInVec( _mm_min_ps( _mm_min_ps( vec_splat( vec.get128(), 0 ), vec_splat( vec.get128(), 1 ) ), vec_splat( vec.get128(), 2 ) ) );
619 __forceinline const floatInVec sum( const Vector3 &vec )
621 return floatInVec( _mm_add_ps( _mm_add_ps( vec_splat( vec.get128(), 0 ), vec_splat( vec.get128(), 1 ) ), vec_splat( vec.get128(), 2 ) ) );
624 __forceinline const floatInVec dot( const Vector3 &vec0, const Vector3 &vec1 )
626 return floatInVec( _vmathVfDot3( vec0.get128(), vec1.get128() ), 0 );
629 __forceinline const floatInVec lengthSqr( const Vector3 &vec )
631 return floatInVec( _vmathVfDot3( vec.get128(), vec.get128() ), 0 );
634 __forceinline const floatInVec length( const Vector3 &vec )
636 return floatInVec( _mm_sqrt_ps(_vmathVfDot3( vec.get128(), vec.get128() )), 0 );
640 __forceinline const Vector3 normalizeApprox( const Vector3 &vec )
642 return Vector3( _mm_mul_ps( vec.get128(), _mm_rsqrt_ps( _vmathVfDot3( vec.get128(), vec.get128() ) ) ) );
645 __forceinline const Vector3 normalize( const Vector3 &vec )
647 return Vector3( _mm_mul_ps( vec.get128(), newtonrapson_rsqrt4( _vmathVfDot3( vec.get128(), vec.get128() ) ) ) );
650 __forceinline const Vector3 cross( const Vector3 &vec0, const Vector3 &vec1 )
652 return Vector3( _vmathVfCross( vec0.get128(), vec1.get128() ) );
655 __forceinline const Vector3 select( const Vector3 &vec0, const Vector3 &vec1, bool select1 )
657 return select( vec0, vec1, boolInVec(select1) );
661 __forceinline const Vector4 select(const Vector4& vec0, const Vector4& vec1, const boolInVec& select1)
663 return Vector4(vec_sel(vec0.get128(), vec1.get128(), select1.get128()));
666 #ifdef _VECTORMATH_DEBUG
668 __forceinline void print( const Vector3 &vec )
670 union { __m128 v; float s[4]; } tmp;
671 tmp.v = vec.get128();
672 printf( "( %f %f %f )\n", tmp.s[0], tmp.s[1], tmp.s[2] );
675 __forceinline void print( const Vector3 &vec, const char * name )
677 union { __m128 v; float s[4]; } tmp;
678 tmp.v = vec.get128();
679 printf( "%s: ( %f %f %f )\n", name, tmp.s[0], tmp.s[1], tmp.s[2] );
684 __forceinline Vector4::Vector4( float _x, float _y, float _z, float _w )
686 mVec128 = _mm_setr_ps(_x, _y, _z, _w);
689 __forceinline Vector4::Vector4( const floatInVec &_x, const floatInVec &_y, const floatInVec &_z, const floatInVec &_w )
691 mVec128 = _mm_unpacklo_ps(
692 _mm_unpacklo_ps( _x.get128(), _z.get128() ),
693 _mm_unpacklo_ps( _y.get128(), _w.get128() ) );
696 __forceinline Vector4::Vector4( const Vector3 &xyz, float _w )
698 mVec128 = xyz.get128();
699 _vmathVfSetElement(mVec128, _w, 3);
702 __forceinline Vector4::Vector4( const Vector3 &xyz, const floatInVec &_w )
704 mVec128 = xyz.get128();
705 mVec128 = _vmathVfInsert(mVec128, _w.get128(), 3);
708 __forceinline Vector4::Vector4( const Vector3 &vec )
710 mVec128 = vec.get128();
711 mVec128 = _vmathVfInsert(mVec128, _mm_setzero_ps(), 3);
714 __forceinline Vector4::Vector4( const Point3 &pnt )
716 mVec128 = pnt.get128();
717 mVec128 = _vmathVfInsert(mVec128, _mm_set1_ps(1.0f), 3);
720 __forceinline Vector4::Vector4( const Quat &quat )
722 mVec128 = quat.get128();
725 __forceinline Vector4::Vector4( float scalar )
727 mVec128 = floatInVec(scalar).get128();
730 __forceinline Vector4::Vector4( const floatInVec &scalar )
732 mVec128 = scalar.get128();
735 __forceinline Vector4::Vector4( __m128 vf4 )
740 __forceinline const Vector4 Vector4::xAxis( )
742 return Vector4( _VECTORMATH_UNIT_1000 );
745 __forceinline const Vector4 Vector4::yAxis( )
747 return Vector4( _VECTORMATH_UNIT_0100 );
750 __forceinline const Vector4 Vector4::zAxis( )
752 return Vector4( _VECTORMATH_UNIT_0010 );
755 __forceinline const Vector4 Vector4::wAxis( )
757 return Vector4( _VECTORMATH_UNIT_0001 );
760 __forceinline const Vector4 lerp( float t, const Vector4 &vec0, const Vector4 &vec1 )
762 return lerp( floatInVec(t), vec0, vec1 );
765 __forceinline const Vector4 lerp( const floatInVec &t, const Vector4 &vec0, const Vector4 &vec1 )
767 return ( vec0 + ( ( vec1 - vec0 ) * t ) );
770 __forceinline const Vector4 slerp( float t, const Vector4 &unitVec0, const Vector4 &unitVec1 )
772 return slerp( floatInVec(t), unitVec0, unitVec1 );
775 __forceinline const Vector4 slerp( const floatInVec &t, const Vector4 &unitVec0, const Vector4 &unitVec1 )
777 __m128 scales, scale0, scale1, cosAngle, angle, tttt, oneMinusT, angles, sines;
778 cosAngle = _vmathVfDot4( unitVec0.get128(), unitVec1.get128() );
779 __m128 selectMask = _mm_cmpgt_ps( _mm_set1_ps(_VECTORMATH_SLERP_TOL), cosAngle );
780 angle = acosf4( cosAngle );
782 oneMinusT = _mm_sub_ps( _mm_set1_ps(1.0f), tttt );
783 angles = _mm_unpacklo_ps( _mm_set1_ps(1.0f), tttt ); // angles = 1, t, 1, t
784 angles = _mm_unpacklo_ps( angles, oneMinusT ); // angles = 1, 1-t, t, 1-t
785 angles = _mm_mul_ps( angles, angle );
786 sines = sinf4( angles );
787 scales = _mm_div_ps( sines, vec_splat( sines, 0 ) );
788 scale0 = vec_sel( oneMinusT, vec_splat( scales, 1 ), selectMask );
789 scale1 = vec_sel( tttt, vec_splat( scales, 2 ), selectMask );
790 return Vector4( vec_madd( unitVec0.get128(), scale0, _mm_mul_ps( unitVec1.get128(), scale1 ) ) );
793 __forceinline __m128 Vector4::get128( ) const
798 __forceinline void storeHalfFloats( const Vector4 &vec0, const Vector4 &vec1, const Vector4 &vec2, const Vector4 &vec3, vec_ushort8 * twoQuads )
800 twoQuads[0] = _vmath2VfToHalfFloats(vec0.get128(), vec1.get128());
801 twoQuads[1] = _vmath2VfToHalfFloats(vec2.get128(), vec3.get128());
804 __forceinline Vector4 & Vector4::operator =( const Vector4 &vec )
806 mVec128 = vec.mVec128;
810 __forceinline Vector4 & Vector4::setXYZ( const Vector3 &vec )
812 __declspec(align(16)) unsigned int sw[4] = {0, 0, 0, 0xffffffff};
813 mVec128 = vec_sel( vec.get128(), mVec128, sw );
817 __forceinline const Vector3 Vector4::getXYZ( ) const
819 return Vector3( mVec128 );
822 __forceinline Vector4 & Vector4::setX( float _x )
824 _vmathVfSetElement(mVec128, _x, 0);
828 __forceinline Vector4 & Vector4::setX( const floatInVec &_x )
830 mVec128 = _vmathVfInsert(mVec128, _x.get128(), 0);
834 __forceinline const floatInVec Vector4::getX( ) const
836 return floatInVec( mVec128, 0 );
839 __forceinline Vector4 & Vector4::setY( float _y )
841 _vmathVfSetElement(mVec128, _y, 1);
845 __forceinline Vector4 & Vector4::setY( const floatInVec &_y )
847 mVec128 = _vmathVfInsert(mVec128, _y.get128(), 1);
851 __forceinline const floatInVec Vector4::getY( ) const
853 return floatInVec( mVec128, 1 );
856 __forceinline Vector4 & Vector4::setZ( float _z )
858 _vmathVfSetElement(mVec128, _z, 2);
862 __forceinline Vector4 & Vector4::setZ( const floatInVec &_z )
864 mVec128 = _vmathVfInsert(mVec128, _z.get128(), 2);
868 __forceinline const floatInVec Vector4::getZ( ) const
870 return floatInVec( mVec128, 2 );
873 __forceinline Vector4 & Vector4::setW( float _w )
875 _vmathVfSetElement(mVec128, _w, 3);
879 __forceinline Vector4 & Vector4::setW( const floatInVec &_w )
881 mVec128 = _vmathVfInsert(mVec128, _w.get128(), 3);
885 __forceinline const floatInVec Vector4::getW( ) const
887 return floatInVec( mVec128, 3 );
890 __forceinline Vector4 & Vector4::setElem( int idx, float value )
892 _vmathVfSetElement(mVec128, value, idx);
896 __forceinline Vector4 & Vector4::setElem( int idx, const floatInVec &value )
898 mVec128 = _vmathVfInsert(mVec128, value.get128(), idx);
902 __forceinline const floatInVec Vector4::getElem( int idx ) const
904 return floatInVec( mVec128, idx );
907 __forceinline VecIdx Vector4::operator []( int idx )
909 return VecIdx( mVec128, idx );
912 __forceinline const floatInVec Vector4::operator []( int idx ) const
914 return floatInVec( mVec128, idx );
917 __forceinline const Vector4 Vector4::operator +( const Vector4 &vec ) const
919 return Vector4( _mm_add_ps( mVec128, vec.mVec128 ) );
922 __forceinline const Vector4 Vector4::operator -( const Vector4 &vec ) const
924 return Vector4( _mm_sub_ps( mVec128, vec.mVec128 ) );
927 __forceinline const Vector4 Vector4::operator *( float scalar ) const
929 return *this * floatInVec(scalar);
932 __forceinline const Vector4 Vector4::operator *( const floatInVec &scalar ) const
934 return Vector4( _mm_mul_ps( mVec128, scalar.get128() ) );
937 __forceinline Vector4 & Vector4::operator +=( const Vector4 &vec )
943 __forceinline Vector4 & Vector4::operator -=( const Vector4 &vec )
949 __forceinline Vector4 & Vector4::operator *=( float scalar )
951 *this = *this * scalar;
955 __forceinline Vector4 & Vector4::operator *=( const floatInVec &scalar )
957 *this = *this * scalar;
961 __forceinline const Vector4 Vector4::operator /( float scalar ) const
963 return *this / floatInVec(scalar);
966 __forceinline const Vector4 Vector4::operator /( const floatInVec &scalar ) const
968 return Vector4( _mm_div_ps( mVec128, scalar.get128() ) );
971 __forceinline Vector4 & Vector4::operator /=( float scalar )
973 *this = *this / scalar;
977 __forceinline Vector4 & Vector4::operator /=( const floatInVec &scalar )
979 *this = *this / scalar;
983 __forceinline const Vector4 Vector4::operator -( ) const
985 return Vector4(_mm_sub_ps( _mm_setzero_ps(), mVec128 ) );
988 __forceinline const Vector4 operator *( float scalar, const Vector4 &vec )
990 return floatInVec(scalar) * vec;
993 __forceinline const Vector4 operator *( const floatInVec &scalar, const Vector4 &vec )
998 __forceinline const Vector4 mulPerElem( const Vector4 &vec0, const Vector4 &vec1 )
1000 return Vector4( _mm_mul_ps( vec0.get128(), vec1.get128() ) );
1003 __forceinline const Vector4 divPerElem( const Vector4 &vec0, const Vector4 &vec1 )
1005 return Vector4( _mm_div_ps( vec0.get128(), vec1.get128() ) );
1008 __forceinline const Vector4 recipPerElem( const Vector4 &vec )
1010 return Vector4( _mm_rcp_ps( vec.get128() ) );
1013 __forceinline const Vector4 absPerElem( const Vector4 &vec )
1015 return Vector4( fabsf4( vec.get128() ) );
1018 __forceinline const Vector4 copySignPerElem( const Vector4 &vec0, const Vector4 &vec1 )
1020 __m128 vmask = toM128(0x7fffffff);
1021 return Vector4( _mm_or_ps(
1022 _mm_and_ps ( vmask, vec0.get128() ), // Value
1023 _mm_andnot_ps( vmask, vec1.get128() ) ) ); // Signs
1026 __forceinline const Vector4 maxPerElem( const Vector4 &vec0, const Vector4 &vec1 )
1028 return Vector4( _mm_max_ps( vec0.get128(), vec1.get128() ) );
1031 __forceinline const floatInVec maxElem( const Vector4 &vec )
1033 return floatInVec( _mm_max_ps(
1034 _mm_max_ps( vec_splat( vec.get128(), 0 ), vec_splat( vec.get128(), 1 ) ),
1035 _mm_max_ps( vec_splat( vec.get128(), 2 ), vec_splat( vec.get128(), 3 ) ) ) );
1038 __forceinline const Vector4 minPerElem( const Vector4 &vec0, const Vector4 &vec1 )
1040 return Vector4( _mm_min_ps( vec0.get128(), vec1.get128() ) );
1043 __forceinline const floatInVec minElem( const Vector4 &vec )
1045 return floatInVec( _mm_min_ps(
1046 _mm_min_ps( vec_splat( vec.get128(), 0 ), vec_splat( vec.get128(), 1 ) ),
1047 _mm_min_ps( vec_splat( vec.get128(), 2 ), vec_splat( vec.get128(), 3 ) ) ) );
1050 __forceinline const floatInVec sum( const Vector4 &vec )
1052 return floatInVec( _mm_add_ps(
1053 _mm_add_ps( vec_splat( vec.get128(), 0 ), vec_splat( vec.get128(), 1 ) ),
1054 _mm_add_ps( vec_splat( vec.get128(), 2 ), vec_splat( vec.get128(), 3 ) ) ) );
1057 __forceinline const floatInVec dot( const Vector4 &vec0, const Vector4 &vec1 )
1059 return floatInVec( _vmathVfDot4( vec0.get128(), vec1.get128() ), 0 );
1062 __forceinline const floatInVec lengthSqr( const Vector4 &vec )
1064 return floatInVec( _vmathVfDot4( vec.get128(), vec.get128() ), 0 );
1067 __forceinline const floatInVec length( const Vector4 &vec )
1069 return floatInVec( _mm_sqrt_ps(_vmathVfDot4( vec.get128(), vec.get128() )), 0 );
1072 __forceinline const Vector4 normalizeApprox( const Vector4 &vec )
1074 return Vector4( _mm_mul_ps( vec.get128(), _mm_rsqrt_ps( _vmathVfDot4( vec.get128(), vec.get128() ) ) ) );
1077 __forceinline const Vector4 normalize( const Vector4 &vec )
1079 return Vector4( _mm_mul_ps( vec.get128(), newtonrapson_rsqrt4( _vmathVfDot4( vec.get128(), vec.get128() ) ) ) );
1082 __forceinline const Vector4 select( const Vector4 &vec0, const Vector4 &vec1, bool select1 )
1084 return select( vec0, vec1, boolInVec(select1) );
1088 #ifdef _VECTORMATH_DEBUG
1090 __forceinline void print( const Vector4 &vec )
1092 union { __m128 v; float s[4]; } tmp;
1093 tmp.v = vec.get128();
1094 printf( "( %f %f %f %f )\n", tmp.s[0], tmp.s[1], tmp.s[2], tmp.s[3] );
1097 __forceinline void print( const Vector4 &vec, const char * name )
1099 union { __m128 v; float s[4]; } tmp;
1100 tmp.v = vec.get128();
1101 printf( "%s: ( %f %f %f %f )\n", name, tmp.s[0], tmp.s[1], tmp.s[2], tmp.s[3] );
1106 __forceinline Point3::Point3( float _x, float _y, float _z )
1108 mVec128 = _mm_setr_ps(_x, _y, _z, 0.0f);
1111 __forceinline Point3::Point3( const floatInVec &_x, const floatInVec &_y, const floatInVec &_z )
1113 mVec128 = _mm_unpacklo_ps( _mm_unpacklo_ps( _x.get128(), _z.get128() ), _y.get128() );
1116 __forceinline Point3::Point3( const Vector3 &vec )
1118 mVec128 = vec.get128();
1121 __forceinline Point3::Point3( float scalar )
1123 mVec128 = floatInVec(scalar).get128();
1126 __forceinline Point3::Point3( const floatInVec &scalar )
1128 mVec128 = scalar.get128();
1131 __forceinline Point3::Point3( __m128 vf4 )
1136 __forceinline const Point3 lerp( float t, const Point3 &pnt0, const Point3 &pnt1 )
1138 return lerp( floatInVec(t), pnt0, pnt1 );
1141 __forceinline const Point3 lerp( const floatInVec &t, const Point3 &pnt0, const Point3 &pnt1 )
1143 return ( pnt0 + ( ( pnt1 - pnt0 ) * t ) );
1146 __forceinline __m128 Point3::get128( ) const
1151 __forceinline void storeXYZ( const Point3 &pnt, __m128 * quad )
1153 __m128 dstVec = *quad;
1154 __declspec(align(16)) unsigned int sw[4] = {0, 0, 0, 0xffffffff}; // TODO: Centralize
1155 dstVec = vec_sel(pnt.get128(), dstVec, sw);
1159 __forceinline void loadXYZArray( Point3 & pnt0, Point3 & pnt1, Point3 & pnt2, Point3 & pnt3, const __m128 * threeQuads )
1161 const float *quads = (float *)threeQuads;
1162 pnt0 = Point3( _mm_load_ps(quads) );
1163 pnt1 = Point3( _mm_loadu_ps(quads + 3) );
1164 pnt2 = Point3( _mm_loadu_ps(quads + 6) );
1165 pnt3 = Point3( _mm_loadu_ps(quads + 9) );
1168 __forceinline void storeXYZArray( const Point3 &pnt0, const Point3 &pnt1, const Point3 &pnt2, const Point3 &pnt3, __m128 * threeQuads )
1170 __m128 xxxx = _mm_shuffle_ps( pnt1.get128(), pnt1.get128(), _MM_SHUFFLE(0, 0, 0, 0) );
1171 __m128 zzzz = _mm_shuffle_ps( pnt2.get128(), pnt2.get128(), _MM_SHUFFLE(2, 2, 2, 2) );
1172 __declspec(align(16)) unsigned int xsw[4] = {0, 0, 0, 0xffffffff};
1173 __declspec(align(16)) unsigned int zsw[4] = {0xffffffff, 0, 0, 0};
1174 threeQuads[0] = vec_sel( pnt0.get128(), xxxx, xsw );
1175 threeQuads[1] = _mm_shuffle_ps( pnt1.get128(), pnt2.get128(), _MM_SHUFFLE(1, 0, 2, 1) );
1176 threeQuads[2] = vec_sel( _mm_shuffle_ps( pnt3.get128(), pnt3.get128(), _MM_SHUFFLE(2, 1, 0, 3) ), zzzz, zsw );
1179 __forceinline void storeHalfFloats( const Point3 &pnt0, const Point3 &pnt1, const Point3 &pnt2, const Point3 &pnt3, const Point3 &pnt4, const Point3 &pnt5, const Point3 &pnt6, const Point3 &pnt7, vec_ushort8 * threeQuads )
1184 storeXYZArray( pnt0, pnt1, pnt2, pnt3, xyz0 );
1185 storeXYZArray( pnt4, pnt5, pnt6, pnt7, xyz1 );
1186 threeQuads[0] = _vmath2VfToHalfFloats(xyz0[0], xyz0[1]);
1187 threeQuads[1] = _vmath2VfToHalfFloats(xyz0[2], xyz1[0]);
1188 threeQuads[2] = _vmath2VfToHalfFloats(xyz1[1], xyz1[2]);
1194 __forceinline Point3 & Point3::operator =( const Point3 &pnt )
1196 mVec128 = pnt.mVec128;
1200 __forceinline Point3 & Point3::setX( float _x )
1202 _vmathVfSetElement(mVec128, _x, 0);
1206 __forceinline Point3 & Point3::setX( const floatInVec &_x )
1208 mVec128 = _vmathVfInsert(mVec128, _x.get128(), 0);
1212 __forceinline const floatInVec Point3::getX( ) const
1214 return floatInVec( mVec128, 0 );
1217 __forceinline Point3 & Point3::setY( float _y )
1219 _vmathVfSetElement(mVec128, _y, 1);
1223 __forceinline Point3 & Point3::setY( const floatInVec &_y )
1225 mVec128 = _vmathVfInsert(mVec128, _y.get128(), 1);
1229 __forceinline const floatInVec Point3::getY( ) const
1231 return floatInVec( mVec128, 1 );
1234 __forceinline Point3 & Point3::setZ( float _z )
1236 _vmathVfSetElement(mVec128, _z, 2);
1240 __forceinline Point3 & Point3::setZ( const floatInVec &_z )
1242 mVec128 = _vmathVfInsert(mVec128, _z.get128(), 2);
1246 __forceinline const floatInVec Point3::getZ( ) const
1248 return floatInVec( mVec128, 2 );
1251 __forceinline Point3 & Point3::setElem( int idx, float value )
1253 _vmathVfSetElement(mVec128, value, idx);
1257 __forceinline Point3 & Point3::setElem( int idx, const floatInVec &value )
1259 mVec128 = _vmathVfInsert(mVec128, value.get128(), idx);
1263 __forceinline const floatInVec Point3::getElem( int idx ) const
1265 return floatInVec( mVec128, idx );
1268 __forceinline VecIdx Point3::operator []( int idx )
1270 return VecIdx( mVec128, idx );
1273 __forceinline const floatInVec Point3::operator []( int idx ) const
1275 return floatInVec( mVec128, idx );
1278 __forceinline const Vector3 Point3::operator -( const Point3 &pnt ) const
1280 return Vector3( _mm_sub_ps( mVec128, pnt.mVec128 ) );
1283 __forceinline const Point3 Point3::operator +( const Vector3 &vec ) const
1285 return Point3( _mm_add_ps( mVec128, vec.get128() ) );
1288 __forceinline const Point3 Point3::operator -( const Vector3 &vec ) const
1290 return Point3( _mm_sub_ps( mVec128, vec.get128() ) );
1293 __forceinline Point3 & Point3::operator +=( const Vector3 &vec )
1295 *this = *this + vec;
1299 __forceinline Point3 & Point3::operator -=( const Vector3 &vec )
1301 *this = *this - vec;
1305 __forceinline const Point3 mulPerElem( const Point3 &pnt0, const Point3 &pnt1 )
1307 return Point3( _mm_mul_ps( pnt0.get128(), pnt1.get128() ) );
1310 __forceinline const Point3 divPerElem( const Point3 &pnt0, const Point3 &pnt1 )
1312 return Point3( _mm_div_ps( pnt0.get128(), pnt1.get128() ) );
1315 __forceinline const Point3 recipPerElem( const Point3 &pnt )
1317 return Point3( _mm_rcp_ps( pnt.get128() ) );
1320 __forceinline const Point3 absPerElem( const Point3 &pnt )
1322 return Point3( fabsf4( pnt.get128() ) );
1325 __forceinline const Point3 copySignPerElem( const Point3 &pnt0, const Point3 &pnt1 )
1327 __m128 vmask = toM128(0x7fffffff);
1328 return Point3( _mm_or_ps(
1329 _mm_and_ps ( vmask, pnt0.get128() ), // Value
1330 _mm_andnot_ps( vmask, pnt1.get128() ) ) ); // Signs
1333 __forceinline const Point3 maxPerElem( const Point3 &pnt0, const Point3 &pnt1 )
1335 return Point3( _mm_max_ps( pnt0.get128(), pnt1.get128() ) );
1338 __forceinline const floatInVec maxElem( const Point3 &pnt )
1340 return floatInVec( _mm_max_ps( _mm_max_ps( vec_splat( pnt.get128(), 0 ), vec_splat( pnt.get128(), 1 ) ), vec_splat( pnt.get128(), 2 ) ) );
1343 __forceinline const Point3 minPerElem( const Point3 &pnt0, const Point3 &pnt1 )
1345 return Point3( _mm_min_ps( pnt0.get128(), pnt1.get128() ) );
1348 __forceinline const floatInVec minElem( const Point3 &pnt )
1350 return floatInVec( _mm_min_ps( _mm_min_ps( vec_splat( pnt.get128(), 0 ), vec_splat( pnt.get128(), 1 ) ), vec_splat( pnt.get128(), 2 ) ) );
1353 __forceinline const floatInVec sum( const Point3 &pnt )
1355 return floatInVec( _mm_add_ps( _mm_add_ps( vec_splat( pnt.get128(), 0 ), vec_splat( pnt.get128(), 1 ) ), vec_splat( pnt.get128(), 2 ) ) );
1358 __forceinline const Point3 scale( const Point3 &pnt, float scaleVal )
1360 return scale( pnt, floatInVec( scaleVal ) );
1363 __forceinline const Point3 scale( const Point3 &pnt, const floatInVec &scaleVal )
1365 return mulPerElem( pnt, Point3( scaleVal ) );
1368 __forceinline const Point3 scale( const Point3 &pnt, const Vector3 &scaleVec )
1370 return mulPerElem( pnt, Point3( scaleVec ) );
1373 __forceinline const floatInVec projection( const Point3 &pnt, const Vector3 &unitVec )
1375 return floatInVec( _vmathVfDot3( pnt.get128(), unitVec.get128() ), 0 );
1378 __forceinline const floatInVec distSqrFromOrigin( const Point3 &pnt )
1380 return lengthSqr( Vector3( pnt ) );
1383 __forceinline const floatInVec distFromOrigin( const Point3 &pnt )
1385 return length( Vector3( pnt ) );
1388 __forceinline const floatInVec distSqr( const Point3 &pnt0, const Point3 &pnt1 )
1390 return lengthSqr( ( pnt1 - pnt0 ) );
1393 __forceinline const floatInVec dist( const Point3 &pnt0, const Point3 &pnt1 )
1395 return length( ( pnt1 - pnt0 ) );
1398 __forceinline const Point3 select( const Point3 &pnt0, const Point3 &pnt1, bool select1 )
1400 return select( pnt0, pnt1, boolInVec(select1) );
1403 __forceinline const Point3 select( const Point3 &pnt0, const Point3 &pnt1, const boolInVec &select1 )
1405 return Point3( vec_sel( pnt0.get128(), pnt1.get128(), select1.get128() ) );
1410 #ifdef _VECTORMATH_DEBUG
1412 __forceinline void print( const Point3 &pnt )
1414 union { __m128 v; float s[4]; } tmp;
1415 tmp.v = pnt.get128();
1416 printf( "( %f %f %f )\n", tmp.s[0], tmp.s[1], tmp.s[2] );
1419 __forceinline void print( const Point3 &pnt, const char * name )
1421 union { __m128 v; float s[4]; } tmp;
1422 tmp.v = pnt.get128();
1423 printf( "%s: ( %f %f %f )\n", name, tmp.s[0], tmp.s[1], tmp.s[2] );
1429 } // namespace Vectormath