2 Copyright (c) 2012 Advanced Micro Devices, Inc.
4 This software is provided 'as-is', without any express or implied warranty.
5 In no event will the authors be held liable for any damages arising from the use of this software.
6 Permission is granted to anyone to use this software for any purpose,
7 including commercial applications, and to alter it and redistribute it freely,
8 subject to the following restrictions:
10 1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
11 2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
12 3. This notice may not be removed or altered from any source distribution.
14 //Originally written by Takahiro Harada
17 //#define CHECK_ALIGNMENT(a) CLASSERT((u32(&(a)) & 0xf) == 0);
18 #define CHECK_ALIGNMENT(a) a;
22 float4 make_float4(float x, float y, float z, float w = 0.f)
25 v.x = x; v.y = y; v.z = z; v.w = w;
30 float4 make_float4(float x)
32 return make_float4(x,x,x,x);
36 float4 make_float4(const int4& x)
38 return make_float4((float)x.s[0], (float)x.s[1], (float)x.s[2], (float)x.s[3]);
42 float2 make_float2(float x, float y)
45 v.s[0] = x; v.s[1] = y;
50 float2 make_float2(float x)
52 return make_float2(x,x);
56 float2 make_float2(const int2& x)
58 return make_float2((float)x.s[0], (float)x.s[1]);
62 int4 make_int4(int x, int y, int z, int w = 0)
65 v.s[0] = x; v.s[1] = y; v.s[2] = z; v.s[3] = w;
72 return make_int4(x,x,x,x);
76 int4 make_int4(const float4& x)
78 return make_int4((int)x.x, (int)x.y, (int)x.z, (int)x.w);
82 int2 make_int2(int a, int b)
84 int2 ans; ans.x = a; ans.y = b;
89 float4 operator-(const float4& a)
91 return make_float4(-a.x, -a.y, -a.z, -a.w);
95 float4 operator*(const float4& a, const float4& b)
97 CLASSERT((u32(&a) & 0xf) == 0);
100 out.s[0] = a.s[0]*b.s[0];
101 out.s[1] = a.s[1]*b.s[1];
102 out.s[2] = a.s[2]*b.s[2];
103 out.s[3] = a.s[3]*b.s[3];
108 float4 operator*(float a, const float4& b)
110 return make_float4(a*b.s[0], a*b.s[1], a*b.s[2], a*b.s[3]);
114 float4 operator*(const float4& b, float a)
118 return make_float4(a*b.s[0], a*b.s[1], a*b.s[2], a*b.s[3]);
122 void operator*=(float4& a, const float4& b)
133 void operator*=(float4& a, float b)
145 float4 operator/(const float4& a, const float4& b)
150 out.s[0] = a.s[0]/b.s[0];
151 out.s[1] = a.s[1]/b.s[1];
152 out.s[2] = a.s[2]/b.s[2];
153 out.s[3] = a.s[3]/b.s[3];
158 float4 operator/(const float4& b, float a)
162 return make_float4(b.s[0]/a, b.s[1]/a, b.s[2]/a, b.s[3]/a);
166 void operator/=(float4& a, const float4& b)
175 void operator/=(float4& a, float b)
177 CLASSERT((u32(&a) & 0xf) == 0);
187 float4 operator+(const float4& a, const float4& b)
192 out.s[0] = a.s[0]+b.s[0];
193 out.s[1] = a.s[1]+b.s[1];
194 out.s[2] = a.s[2]+b.s[2];
195 out.s[3] = a.s[3]+b.s[3];
200 float4 operator+(const float4& a, float b)
213 float4 operator-(const float4& a, const float4& b)
218 out.s[0] = a.s[0]-b.s[0];
219 out.s[1] = a.s[1]-b.s[1];
220 out.s[2] = a.s[2]-b.s[2];
221 out.s[3] = a.s[3]-b.s[3];
226 float4 operator-(const float4& a, float b)
239 void operator+=(float4& a, const float4& b)
250 void operator+=(float4& a, float b)
261 void operator-=(float4& a, const float4& b)
272 void operator-=(float4& a, float b)
287 float4 cross3(const float4& a, const float4& b)
289 return make_float4(a.s[1]*b.s[2]-a.s[2]*b.s[1],
290 a.s[2]*b.s[0]-a.s[0]*b.s[2],
291 a.s[0]*b.s[1]-a.s[1]*b.s[0],
296 float dot3F4(const float4& a, const float4& b)
298 return a.x*b.x+a.y*b.y+a.z*b.z;
302 float length3(const float4& a)
304 return sqrtf(dot3F4(a,a));
308 float dot4(const float4& a, const float4& b)
310 return a.x*b.x+a.y*b.y+a.z*b.z+a.w*b.w;
315 float dot3w1(const float4& point, const float4& eqn)
317 return point.x*eqn.x+point.y*eqn.y+point.z*eqn.z+eqn.w;
321 float4 normalize3(const float4& a)
323 float length = sqrtf(dot3F4(a, a));
324 return 1.f/length * a;
328 float4 normalize4(const float4& a)
330 float length = sqrtf(dot4(a, a));
331 return 1.f/length * a;
335 float4 createEquation(const float4& a, const float4& b, const float4& c)
340 eqn = normalize3( cross3(ab, ac) );
341 eqn.w = -dot3F4(eqn,a);
348 T max2(const T& a, const T& b)
355 T min2(const T& a, const T& b)
362 float4 max2(const float4& a, const float4& b)
364 return make_float4( max2(a.x,b.x), max2(a.y,b.y), max2(a.z,b.z), max2(a.w,b.w) );
369 float4 min2(const float4& a, const float4& b)
371 return make_float4( min2(a.x,b.x), min2(a.y,b.y), min2(a.z,b.z), min2(a.w,b.w) );