2 * Copyright 1993-2007 NVIDIA Corporation. All rights reserved.
6 * This source code is subject to NVIDIA ownership rights under U.S. and
7 * international Copyright laws. Users and possessors of this source code
8 * are hereby granted a nonexclusive, royalty-free license to use this code
9 * in individual and commercial software.
11 * NVIDIA MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THIS SOURCE
12 * CODE FOR ANY PURPOSE. IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR
13 * IMPLIED WARRANTY OF ANY KIND. NVIDIA DISCLAIMS ALL WARRANTIES WITH
14 * REGARD TO THIS SOURCE CODE, INCLUDING ALL IMPLIED WARRANTIES OF
15 * MERCHANTABILITY, NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
16 * IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL,
17 * OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
18 * OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
19 * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE
20 * OR PERFORMANCE OF THIS SOURCE CODE.
22 * U.S. Government End Users. This source code is a "commercial item" as
23 * that term is defined at 48 C.F.R. 2.101 (OCT 1995), consisting of
24 * "commercial computer software" and "commercial computer software
25 * documentation" as such terms are used in 48 C.F.R. 12.212 (SEPT 1995)
26 * and is provided to the U.S. Government only as a commercial end item.
27 * Consistent with 48 C.F.R.12.212 and 48 C.F.R. 227.7202-1 through
28 * 227.7202-4 (JUNE 1995), all U.S. Government End Users acquire the
29 * source code with only those rights set forth herein.
31 * Any use of this source code in individual and commercial software must
32 * include, in the user documentation and internal comments to the code,
33 * the above Disclaimer and U.S. Government End Users Notice.
37 This file implements common mathematical operations on vector types
38 (float3, float4 etc.) since these are not provided as standard by CUDA.
40 The syntax is modelled on the Cg standard library.
46 #include "cuda_runtime.h"
48 ////////////////////////////////////////////////////////////////////////////////
49 typedef unsigned int uint;
50 typedef unsigned short ushort;
55 inline float fminf(float a, float b)
60 inline float fmaxf(float a, float b)
65 inline int max(int a, int b)
70 inline int min(int a, int b)
77 ////////////////////////////////////////////////////////////////////////////////
80 inline __device__ __host__ float lerp(float a, float b, float t)
86 inline __device__ __host__ float clamp(float f, float a, float b)
88 return fmaxf(a, fminf(f, b));
92 ////////////////////////////////////////////////////////////////////////////////
95 inline __host__ __device__ int2 operator-(int2 &a)
97 return make_int2(-a.x, -a.y);
101 inline __host__ __device__ int2 operator+(int2 a, int2 b)
103 return make_int2(a.x + b.x, a.y + b.y);
105 inline __host__ __device__ void operator+=(int2 &a, int2 b)
107 a.x += b.x; a.y += b.y;
111 inline __host__ __device__ int2 operator-(int2 a, int2 b)
113 return make_int2(a.x - b.x, a.y - b.y);
115 inline __host__ __device__ void operator-=(int2 &a, int2 b)
117 a.x -= b.x; a.y -= b.y;
121 inline __host__ __device__ int2 operator*(int2 a, int2 b)
123 return make_int2(a.x * b.x, a.y * b.y);
125 inline __host__ __device__ int2 operator*(int2 a, int s)
127 return make_int2(a.x * s, a.y * s);
129 inline __host__ __device__ int2 operator*(int s, int2 a)
131 return make_int2(a.x * s, a.y * s);
133 inline __host__ __device__ void operator*=(int2 &a, int s)
139 ////////////////////////////////////////////////////////////////////////////////
141 // additional constructors
142 inline __host__ __device__ float2 make_float2(float s)
144 return make_float2(s, s);
146 inline __host__ __device__ float2 make_float2(int2 a)
148 return make_float2(float(a.x), float(a.y));
152 inline __host__ __device__ float2 operator-(float2 &a)
154 return make_float2(-a.x, -a.y);
158 inline __host__ __device__ float2 operator+(float2 a, float2 b)
160 return make_float2(a.x + b.x, a.y + b.y);
162 inline __host__ __device__ void operator+=(float2 &a, float2 b)
164 a.x += b.x; a.y += b.y;
168 inline __host__ __device__ float2 operator-(float2 a, float2 b)
170 return make_float2(a.x - b.x, a.y - b.y);
172 inline __host__ __device__ void operator-=(float2 &a, float2 b)
174 a.x -= b.x; a.y -= b.y;
178 inline __host__ __device__ float2 operator*(float2 a, float2 b)
180 return make_float2(a.x * b.x, a.y * b.y);
182 inline __host__ __device__ float2 operator*(float2 a, float s)
184 return make_float2(a.x * s, a.y * s);
186 inline __host__ __device__ float2 operator*(float s, float2 a)
188 return make_float2(a.x * s, a.y * s);
190 inline __host__ __device__ void operator*=(float2 &a, float s)
196 inline __host__ __device__ float2 operator/(float2 a, float2 b)
198 return make_float2(a.x / b.x, a.y / b.y);
200 inline __host__ __device__ float2 operator/(float2 a, float s)
202 float inv = 1.0f / s;
205 inline __host__ __device__ float2 operator/(float s, float2 a)
207 float inv = 1.0f / s;
210 inline __host__ __device__ void operator/=(float2 &a, float s)
212 float inv = 1.0f / s;
217 inline __device__ __host__ float2 lerp(float2 a, float2 b, float t)
223 inline __device__ __host__ float2 clamp(float2 v, float a, float b)
225 return make_float2(clamp(v.x, a, b), clamp(v.y, a, b));
228 inline __device__ __host__ float2 clamp(float2 v, float2 a, float2 b)
230 return make_float2(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y));
234 inline __host__ __device__ float dot(float2 a, float2 b)
236 return a.x * b.x + a.y * b.y;
240 inline __host__ __device__ float length(float2 v)
242 return sqrtf(dot(v, v));
246 inline __host__ __device__ float2 normalize(float2 v)
248 float invLen = 1.0f / sqrtf(dot(v, v));
253 inline __host__ __device__ float2 floor(const float2 v)
255 return make_float2(floor(v.x), floor(v.y));
259 inline __host__ __device__ float2 reflect(float2 i, float2 n)
261 return i - 2.0f * n * dot(n,i);
265 ////////////////////////////////////////////////////////////////////////////////
267 // additional constructors
268 inline __host__ __device__ float3 make_float3(float s)
270 return make_float3(s, s, s);
272 inline __host__ __device__ float3 make_float3(float2 a)
274 return make_float3(a.x, a.y, 0.0f);
276 inline __host__ __device__ float3 make_float3(float2 a, float s)
278 return make_float3(a.x, a.y, s);
280 inline __host__ __device__ float3 make_float3(float4 a)
282 return make_float3(a.x, a.y, a.z); // discards w
284 inline __host__ __device__ float3 make_float3(int3 a)
286 return make_float3(float(a.x), float(a.y), float(a.z));
290 inline __host__ __device__ float3 operator-(float3 &a)
292 return make_float3(-a.x, -a.y, -a.z);
296 static __inline__ __host__ __device__ float3 fminf(float3 a, float3 b)
298 return make_float3(fminf(a.x,b.x), fminf(a.y,b.y), fminf(a.z,b.z));
302 static __inline__ __host__ __device__ float3 fmaxf(float3 a, float3 b)
304 return make_float3(fmaxf(a.x,b.x), fmaxf(a.y,b.y), fmaxf(a.z,b.z));
308 inline __host__ __device__ float3 operator+(float3 a, float3 b)
310 return make_float3(a.x + b.x, a.y + b.y, a.z + b.z);
312 inline __host__ __device__ float3 operator+(float3 a, float b)
314 return make_float3(a.x + b, a.y + b, a.z + b);
316 inline __host__ __device__ void operator+=(float3 &a, float3 b)
318 a.x += b.x; a.y += b.y; a.z += b.z;
322 inline __host__ __device__ float3 operator-(float3 a, float3 b)
324 return make_float3(a.x - b.x, a.y - b.y, a.z - b.z);
326 inline __host__ __device__ float3 operator-(float3 a, float b)
328 return make_float3(a.x - b, a.y - b, a.z - b);
330 inline __host__ __device__ void operator-=(float3 &a, float3 b)
332 a.x -= b.x; a.y -= b.y; a.z -= b.z;
336 inline __host__ __device__ float3 operator*(float3 a, float3 b)
338 return make_float3(a.x * b.x, a.y * b.y, a.z * b.z);
340 inline __host__ __device__ float3 operator*(float3 a, float s)
342 return make_float3(a.x * s, a.y * s, a.z * s);
344 inline __host__ __device__ float3 operator*(float s, float3 a)
346 return make_float3(a.x * s, a.y * s, a.z * s);
348 inline __host__ __device__ void operator*=(float3 &a, float s)
350 a.x *= s; a.y *= s; a.z *= s;
354 inline __host__ __device__ float3 operator/(float3 a, float3 b)
356 return make_float3(a.x / b.x, a.y / b.y, a.z / b.z);
358 inline __host__ __device__ float3 operator/(float3 a, float s)
360 float inv = 1.0f / s;
363 inline __host__ __device__ float3 operator/(float s, float3 a)
365 float inv = 1.0f / s;
368 inline __host__ __device__ void operator/=(float3 &a, float s)
370 float inv = 1.0f / s;
375 inline __device__ __host__ float3 lerp(float3 a, float3 b, float t)
381 inline __device__ __host__ float3 clamp(float3 v, float a, float b)
383 return make_float3(clamp(v.x, a, b), clamp(v.y, a, b), clamp(v.z, a, b));
386 inline __device__ __host__ float3 clamp(float3 v, float3 a, float3 b)
388 return make_float3(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y), clamp(v.z, a.z, b.z));
392 inline __host__ __device__ float dot(float3 a, float3 b)
394 return a.x * b.x + a.y * b.y + a.z * b.z;
398 inline __host__ __device__ float3 cross(float3 a, float3 b)
400 return make_float3(a.y*b.z - a.z*b.y, a.z*b.x - a.x*b.z, a.x*b.y - a.y*b.x);
404 inline __host__ __device__ float length(float3 v)
406 return sqrtf(dot(v, v));
410 inline __host__ __device__ float3 normalize(float3 v)
412 float invLen = 1.0f / sqrtf(dot(v, v));
417 inline __host__ __device__ float3 floor(const float3 v)
419 return make_float3(floor(v.x), floor(v.y), floor(v.z));
423 inline __host__ __device__ float3 reflect(float3 i, float3 n)
425 return i - 2.0f * n * dot(n,i);
429 ////////////////////////////////////////////////////////////////////////////////
431 // additional constructors
432 inline __host__ __device__ float4 make_float4(float s)
434 return make_float4(s, s, s, s);
436 inline __host__ __device__ float4 make_float4(float3 a)
438 return make_float4(a.x, a.y, a.z, 0.0f);
440 inline __host__ __device__ float4 make_float4(float3 a, float w)
442 return make_float4(a.x, a.y, a.z, w);
444 inline __host__ __device__ float4 make_float4(int4 a)
446 return make_float4(float(a.x), float(a.y), float(a.z), float(a.w));
450 inline __host__ __device__ float4 operator-(float4 &a)
452 return make_float4(-a.x, -a.y, -a.z, -a.w);
456 static __inline__ __host__ __device__ float4 fminf(float4 a, float4 b)
458 return make_float4(fminf(a.x,b.x), fminf(a.y,b.y), fminf(a.z,b.z), fminf(a.w,b.w));
462 static __inline__ __host__ __device__ float4 fmaxf(float4 a, float4 b)
464 return make_float4(fmaxf(a.x,b.x), fmaxf(a.y,b.y), fmaxf(a.z,b.z), fmaxf(a.w,b.w));
468 inline __host__ __device__ float4 operator+(float4 a, float4 b)
470 return make_float4(a.x + b.x, a.y + b.y, a.z + b.z, a.w + b.w);
472 inline __host__ __device__ void operator+=(float4 &a, float4 b)
474 a.x += b.x; a.y += b.y; a.z += b.z; a.w += b.w;
478 inline __host__ __device__ float4 operator-(float4 a, float4 b)
480 return make_float4(a.x - b.x, a.y - b.y, a.z - b.z, a.w - b.w);
482 inline __host__ __device__ void operator-=(float4 &a, float4 b)
484 a.x -= b.x; a.y -= b.y; a.z -= b.z; a.w -= b.w;
488 inline __host__ __device__ float4 operator*(float4 a, float s)
490 return make_float4(a.x * s, a.y * s, a.z * s, a.w * s);
492 inline __host__ __device__ float4 operator*(float s, float4 a)
494 return make_float4(a.x * s, a.y * s, a.z * s, a.w * s);
496 inline __host__ __device__ void operator*=(float4 &a, float s)
498 a.x *= s; a.y *= s; a.z *= s; a.w *= s;
502 inline __host__ __device__ float4 operator/(float4 a, float4 b)
504 return make_float4(a.x / b.x, a.y / b.y, a.z / b.z, a.w / b.w);
506 inline __host__ __device__ float4 operator/(float4 a, float s)
508 float inv = 1.0f / s;
511 inline __host__ __device__ float4 operator/(float s, float4 a)
513 float inv = 1.0f / s;
516 inline __host__ __device__ void operator/=(float4 &a, float s)
518 float inv = 1.0f / s;
523 inline __device__ __host__ float4 lerp(float4 a, float4 b, float t)
529 inline __device__ __host__ float4 clamp(float4 v, float a, float b)
531 return make_float4(clamp(v.x, a, b), clamp(v.y, a, b), clamp(v.z, a, b), clamp(v.w, a, b));
534 inline __device__ __host__ float4 clamp(float4 v, float4 a, float4 b)
536 return make_float4(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y), clamp(v.z, a.z, b.z), clamp(v.w, a.w, b.w));
540 inline __host__ __device__ float dot(float4 a, float4 b)
542 return a.x * b.x + a.y * b.y + a.z * b.z + a.w * b.w;
546 inline __host__ __device__ float length(float4 r)
548 return sqrtf(dot(r, r));
552 inline __host__ __device__ float4 normalize(float4 v)
554 float invLen = 1.0f / sqrtf(dot(v, v));
559 inline __host__ __device__ float4 floor(const float4 v)
561 return make_float4(floor(v.x), floor(v.y), floor(v.z), floor(v.w));
565 ////////////////////////////////////////////////////////////////////////////////
567 // additional constructors
568 inline __host__ __device__ int3 make_int3(int s)
570 return make_int3(s, s, s);
572 inline __host__ __device__ int3 make_int3(float3 a)
574 return make_int3(int(a.x), int(a.y), int(a.z));
578 inline __host__ __device__ int3 operator-(int3 &a)
580 return make_int3(-a.x, -a.y, -a.z);
584 inline __host__ __device__ int3 min(int3 a, int3 b)
586 return make_int3(min(a.x,b.x), min(a.y,b.y), min(a.z,b.z));
590 inline __host__ __device__ int3 max(int3 a, int3 b)
592 return make_int3(max(a.x,b.x), max(a.y,b.y), max(a.z,b.z));
596 inline __host__ __device__ int3 operator+(int3 a, int3 b)
598 return make_int3(a.x + b.x, a.y + b.y, a.z + b.z);
600 inline __host__ __device__ void operator+=(int3 &a, int3 b)
602 a.x += b.x; a.y += b.y; a.z += b.z;
606 inline __host__ __device__ int3 operator-(int3 a, int3 b)
608 return make_int3(a.x - b.x, a.y - b.y, a.z - b.z);
611 inline __host__ __device__ void operator-=(int3 &a, int3 b)
613 a.x -= b.x; a.y -= b.y; a.z -= b.z;
617 inline __host__ __device__ int3 operator*(int3 a, int3 b)
619 return make_int3(a.x * b.x, a.y * b.y, a.z * b.z);
621 inline __host__ __device__ int3 operator*(int3 a, int s)
623 return make_int3(a.x * s, a.y * s, a.z * s);
625 inline __host__ __device__ int3 operator*(int s, int3 a)
627 return make_int3(a.x * s, a.y * s, a.z * s);
629 inline __host__ __device__ void operator*=(int3 &a, int s)
631 a.x *= s; a.y *= s; a.z *= s;
635 inline __host__ __device__ int3 operator/(int3 a, int3 b)
637 return make_int3(a.x / b.x, a.y / b.y, a.z / b.z);
639 inline __host__ __device__ int3 operator/(int3 a, int s)
641 return make_int3(a.x / s, a.y / s, a.z / s);
643 inline __host__ __device__ int3 operator/(int s, int3 a)
645 return make_int3(a.x / s, a.y / s, a.z / s);
647 inline __host__ __device__ void operator/=(int3 &a, int s)
649 a.x /= s; a.y /= s; a.z /= s;
653 inline __device__ __host__ int clamp(int f, int a, int b)
655 return max(a, min(f, b));
658 inline __device__ __host__ int3 clamp(int3 v, int a, int b)
660 return make_int3(clamp(v.x, a, b), clamp(v.y, a, b), clamp(v.z, a, b));
663 inline __device__ __host__ int3 clamp(int3 v, int3 a, int3 b)
665 return make_int3(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y), clamp(v.z, a.z, b.z));
670 ////////////////////////////////////////////////////////////////////////////////
672 // additional constructors
673 inline __host__ __device__ uint3 make_uint3(uint s)
675 return make_uint3(s, s, s);
677 inline __host__ __device__ uint3 make_uint3(float3 a)
679 return make_uint3(uint(a.x), uint(a.y), uint(a.z));
683 inline __host__ __device__ uint3 min(uint3 a, uint3 b)
685 return make_uint3(min(a.x,b.x), min(a.y,b.y), min(a.z,b.z));
689 inline __host__ __device__ uint3 max(uint3 a, uint3 b)
691 return make_uint3(max(a.x,b.x), max(a.y,b.y), max(a.z,b.z));
695 inline __host__ __device__ uint3 operator+(uint3 a, uint3 b)
697 return make_uint3(a.x + b.x, a.y + b.y, a.z + b.z);
699 inline __host__ __device__ void operator+=(uint3 &a, uint3 b)
701 a.x += b.x; a.y += b.y; a.z += b.z;
705 inline __host__ __device__ uint3 operator-(uint3 a, uint3 b)
707 return make_uint3(a.x - b.x, a.y - b.y, a.z - b.z);
710 inline __host__ __device__ void operator-=(uint3 &a, uint3 b)
712 a.x -= b.x; a.y -= b.y; a.z -= b.z;
716 inline __host__ __device__ uint3 operator*(uint3 a, uint3 b)
718 return make_uint3(a.x * b.x, a.y * b.y, a.z * b.z);
720 inline __host__ __device__ uint3 operator*(uint3 a, uint s)
722 return make_uint3(a.x * s, a.y * s, a.z * s);
724 inline __host__ __device__ uint3 operator*(uint s, uint3 a)
726 return make_uint3(a.x * s, a.y * s, a.z * s);
728 inline __host__ __device__ void operator*=(uint3 &a, uint s)
730 a.x *= s; a.y *= s; a.z *= s;
734 inline __host__ __device__ uint3 operator/(uint3 a, uint3 b)
736 return make_uint3(a.x / b.x, a.y / b.y, a.z / b.z);
738 inline __host__ __device__ uint3 operator/(uint3 a, uint s)
740 return make_uint3(a.x / s, a.y / s, a.z / s);
742 inline __host__ __device__ uint3 operator/(uint s, uint3 a)
744 return make_uint3(a.x / s, a.y / s, a.z / s);
746 inline __host__ __device__ void operator/=(uint3 &a, uint s)
748 a.x /= s; a.y /= s; a.z /= s;
752 inline __device__ __host__ uint clamp(uint f, uint a, uint b)
754 return max(a, min(f, b));
757 inline __device__ __host__ uint3 clamp(uint3 v, uint a, uint b)
759 return make_uint3(clamp(v.x, a, b), clamp(v.y, a, b), clamp(v.z, a, b));
762 inline __device__ __host__ uint3 clamp(uint3 v, uint3 a, uint3 b)
764 return make_uint3(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y), clamp(v.z, a.z, b.z));