Extras/CUDA/cutil_math.h

   1     /*
   2  * Copyright 1993-2007 NVIDIA Corporation.  All rights reserved.
   3  *
   4  * NOTICE TO USER:
   5  *
   6  * This source code is subject to NVIDIA ownership rights under U.S. and
   7  * international Copyright laws.  Users and possessors of this source code
   8  * are hereby granted a nonexclusive, royalty-free license to use this code
   9  * in individual and commercial software.
  10  *
  11  * NVIDIA MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THIS SOURCE
  12  * CODE FOR ANY PURPOSE.  IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR
  13  * IMPLIED WARRANTY OF ANY KIND.  NVIDIA DISCLAIMS ALL WARRANTIES WITH
  14  * REGARD TO THIS SOURCE CODE, INCLUDING ALL IMPLIED WARRANTIES OF
  15  * MERCHANTABILITY, NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
  16  * IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL,
  17  * OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
  18  * OF USE, DATA OR PROFITS,  WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
  19  * OR OTHER TORTIOUS ACTION,  ARISING OUT OF OR IN CONNECTION WITH THE USE
  20  * OR PERFORMANCE OF THIS SOURCE CODE.
  21  *
  22  * U.S. Government End Users.   This source code is a "commercial item" as
  23  * that term is defined at  48 C.F.R. 2.101 (OCT 1995), consisting  of
  24  * "commercial computer  software"  and "commercial computer software
  25  * documentation" as such terms are  used in 48 C.F.R. 12.212 (SEPT 1995)
  26  * and is provided to the U.S. Government only as a commercial end item.
  27  * Consistent with 48 C.F.R.12.212 and 48 C.F.R. 227.7202-1 through
  28  * 227.7202-4 (JUNE 1995), all U.S. Government End Users acquire the
  29  * source code with only those rights set forth herein.
  30  *
  31  * Any use of this source code in individual and commercial software must
  32  * include, in the user documentation and internal comments to the code,
  33  * the above Disclaimer and U.S. Government End Users Notice.
  34  */
  35
  36 /*
  37     This file implements common mathematical operations on vector types
  38     (float3, float4 etc.) since these are not provided as standard by CUDA.
  39
  40     The syntax is modelled on the Cg standard library.
  41 */
  42
  43 #ifndef CUTIL_MATH_H
  44 #define CUTIL_MATH_H
  45
  46 #include "cuda_runtime.h"
  47
  48 ////////////////////////////////////////////////////////////////////////////////
  49 typedef unsigned int uint;
  50 typedef unsigned short ushort;
  51
  52 #ifndef __CUDACC__
  53 #include <math.h>
  54
  55 inline float fminf(float a, float b)
  56 {
  57   return a < b ? a : b;
  58 }
  59
  60 inline float fmaxf(float a, float b)
  61 {
  62   return a < b ? a : b;
  63 }
  64
  65 inline int max(int a, int b)
  66 {
  67   return a > b ? a : b;
  68 }
  69
  70 inline int min(int a, int b)
  71 {
  72   return a < b ? a : b;
  73 }
  74 #endif
  75
  76 // float functions
  77 ////////////////////////////////////////////////////////////////////////////////
  78
  79 // lerp
  80 inline __device__ __host__ float lerp(float a, float b, float t)
  81 {
  82     return a + t*(b-a);
  83 }
  84
  85 // clamp
  86 inline __device__ __host__ float clamp(float f, float a, float b)
  87 {
  88     return fmaxf(a, fminf(f, b));
  89 }
  90
  91 // int2 functions
  92 ////////////////////////////////////////////////////////////////////////////////
  93
  94 // negate
  95 inline __host__ __device__ int2 operator-(int2 &a)
  96 {
  97     return make_int2(-a.x, -a.y);
  98 }
  99
 100 // addition
 101 inline __host__ __device__ int2 operator+(int2 a, int2 b)
 102 {
 103     return make_int2(a.x + b.x, a.y + b.y);
 104 }
 105 inline __host__ __device__ void operator+=(int2 &a, int2 b)
 106 {
 107     a.x += b.x; a.y += b.y;
 108 }
 109
 110 // subtract
 111 inline __host__ __device__ int2 operator-(int2 a, int2 b)
 112 {
 113     return make_int2(a.x - b.x, a.y - b.y);
 114 }
 115 inline __host__ __device__ void operator-=(int2 &a, int2 b)
 116 {
 117     a.x -= b.x; a.y -= b.y;
 118 }
 119
 120 // multiply
 121 inline __host__ __device__ int2 operator*(int2 a, int2 b)
 122 {
 123     return make_int2(a.x * b.x, a.y * b.y);
 124 }
 125 inline __host__ __device__ int2 operator*(int2 a, int s)
 126 {
 127     return make_int2(a.x * s, a.y * s);
 128 }
 129 inline __host__ __device__ int2 operator*(int s, int2 a)
 130 {
 131     return make_int2(a.x * s, a.y * s);
 132 }
 133 inline __host__ __device__ void operator*=(int2 &a, int s)
 134 {
 135     a.x *= s; a.y *= s;
 136 }
 137
 138 // float2 functions
 139 ////////////////////////////////////////////////////////////////////////////////
 140
 141 // additional constructors
 142 inline __host__ __device__ float2 make_float2(float s)
 143 {
 144     return make_float2(s, s);
 145 }
 146 inline __host__ __device__ float2 make_float2(int2 a)
 147 {
 148     return make_float2(float(a.x), float(a.y));
 149 }
 150
 151 // negate
 152 inline __host__ __device__ float2 operator-(float2 &a)
 153 {
 154     return make_float2(-a.x, -a.y);
 155 }
 156
 157 // addition
 158 inline __host__ __device__ float2 operator+(float2 a, float2 b)
 159 {
 160     return make_float2(a.x + b.x, a.y + b.y);
 161 }
 162 inline __host__ __device__ void operator+=(float2 &a, float2 b)
 163 {
 164     a.x += b.x; a.y += b.y;
 165 }
 166
 167 // subtract
 168 inline __host__ __device__ float2 operator-(float2 a, float2 b)
 169 {
 170     return make_float2(a.x - b.x, a.y - b.y);
 171 }
 172 inline __host__ __device__ void operator-=(float2 &a, float2 b)
 173 {
 174     a.x -= b.x; a.y -= b.y;
 175 }
 176
 177 // multiply
 178 inline __host__ __device__ float2 operator*(float2 a, float2 b)
 179 {
 180     return make_float2(a.x * b.x, a.y * b.y);
 181 }
 182 inline __host__ __device__ float2 operator*(float2 a, float s)
 183 {
 184     return make_float2(a.x * s, a.y * s);
 185 }
 186 inline __host__ __device__ float2 operator*(float s, float2 a)
 187 {
 188     return make_float2(a.x * s, a.y * s);
 189 }
 190 inline __host__ __device__ void operator*=(float2 &a, float s)
 191 {
 192     a.x *= s; a.y *= s;
 193 }
 194
 195 // divide
 196 inline __host__ __device__ float2 operator/(float2 a, float2 b)
 197 {
 198     return make_float2(a.x / b.x, a.y / b.y);
 199 }
 200 inline __host__ __device__ float2 operator/(float2 a, float s)
 201 {
 202     float inv = 1.0f / s;
 203     return a * inv;
 204 }
 205 inline __host__ __device__ float2 operator/(float s, float2 a)
 206 {
 207     float inv = 1.0f / s;
 208     return a * inv;
 209 }
 210 inline __host__ __device__ void operator/=(float2 &a, float s)
 211 {
 212     float inv = 1.0f / s;
 213     a *= inv;
 214 }
 215
 216 // lerp
 217 inline __device__ __host__ float2 lerp(float2 a, float2 b, float t)
 218 {
 219     return a + t*(b-a);
 220 }
 221
 222 // clamp
 223 inline __device__ __host__ float2 clamp(float2 v, float a, float b)
 224 {
 225     return make_float2(clamp(v.x, a, b), clamp(v.y, a, b));
 226 }
 227
 228 inline __device__ __host__ float2 clamp(float2 v, float2 a, float2 b)
 229 {
 230     return make_float2(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y));
 231 }
 232
 233 // dot product
 234 inline __host__ __device__ float dot(float2 a, float2 b)
 235 {
 236     return a.x * b.x + a.y * b.y;
 237 }
 238
 239 // length
 240 inline __host__ __device__ float length(float2 v)
 241 {
 242     return sqrtf(dot(v, v));
 243 }
 244
 245 // normalize
 246 inline __host__ __device__ float2 normalize(float2 v)
 247 {
 248     float invLen = 1.0f / sqrtf(dot(v, v));
 249     return v * invLen;
 250 }
 251
 252 // floor
 253 inline __host__ __device__ float2 floor(const float2 v)
 254 {
 255     return make_float2(floor(v.x), floor(v.y));
 256 }
 257
 258 // reflect
 259 inline __host__ __device__ float2 reflect(float2 i, float2 n)
 260 {
 261         return i - 2.0f * n * dot(n,i);
 262 }
 263
 264 // float3 functions
 265 ////////////////////////////////////////////////////////////////////////////////
 266
 267 // additional constructors
 268 inline __host__ __device__ float3 make_float3(float s)
 269 {
 270     return make_float3(s, s, s);
 271 }
 272 inline __host__ __device__ float3 make_float3(float2 a)
 273 {
 274     return make_float3(a.x, a.y, 0.0f);
 275 }
 276 inline __host__ __device__ float3 make_float3(float2 a, float s)
 277 {
 278     return make_float3(a.x, a.y, s);
 279 }
 280 inline __host__ __device__ float3 make_float3(float4 a)
 281 {
 282     return make_float3(a.x, a.y, a.z);  // discards w
 283 }
 284 inline __host__ __device__ float3 make_float3(int3 a)
 285 {
 286     return make_float3(float(a.x), float(a.y), float(a.z));
 287 }
 288
 289 // negate
 290 inline __host__ __device__ float3 operator-(float3 &a)
 291 {
 292     return make_float3(-a.x, -a.y, -a.z);
 293 }
 294
 295 // min
 296 static __inline__ __host__ __device__ float3 fminf(float3 a, float3 b)
 297 {
 298         return make_float3(fminf(a.x,b.x), fminf(a.y,b.y), fminf(a.z,b.z));
 299 }
 300
 301 // max
 302 static __inline__ __host__ __device__ float3 fmaxf(float3 a, float3 b)
 303 {
 304         return make_float3(fmaxf(a.x,b.x), fmaxf(a.y,b.y), fmaxf(a.z,b.z));
 305 }
 306
 307 // addition
 308 inline __host__ __device__ float3 operator+(float3 a, float3 b)
 309 {
 310     return make_float3(a.x + b.x, a.y + b.y, a.z + b.z);
 311 }
 312 inline __host__ __device__ float3 operator+(float3 a, float b)
 313 {
 314     return make_float3(a.x + b, a.y + b, a.z + b);
 315 }
 316 inline __host__ __device__ void operator+=(float3 &a, float3 b)
 317 {
 318     a.x += b.x; a.y += b.y; a.z += b.z;
 319 }
 320
 321 // subtract
 322 inline __host__ __device__ float3 operator-(float3 a, float3 b)
 323 {
 324     return make_float3(a.x - b.x, a.y - b.y, a.z - b.z);
 325 }
 326 inline __host__ __device__ float3 operator-(float3 a, float b)
 327 {
 328     return make_float3(a.x - b, a.y - b, a.z - b);
 329 }
 330 inline __host__ __device__ void operator-=(float3 &a, float3 b)
 331 {
 332     a.x -= b.x; a.y -= b.y; a.z -= b.z;
 333 }
 334
 335 // multiply
 336 inline __host__ __device__ float3 operator*(float3 a, float3 b)
 337 {
 338     return make_float3(a.x * b.x, a.y * b.y, a.z * b.z);
 339 }
 340 inline __host__ __device__ float3 operator*(float3 a, float s)
 341 {
 342     return make_float3(a.x * s, a.y * s, a.z * s);
 343 }
 344 inline __host__ __device__ float3 operator*(float s, float3 a)
 345 {
 346     return make_float3(a.x * s, a.y * s, a.z * s);
 347 }
 348 inline __host__ __device__ void operator*=(float3 &a, float s)
 349 {
 350     a.x *= s; a.y *= s; a.z *= s;
 351 }
 352
 353 // divide
 354 inline __host__ __device__ float3 operator/(float3 a, float3 b)
 355 {
 356     return make_float3(a.x / b.x, a.y / b.y, a.z / b.z);
 357 }
 358 inline __host__ __device__ float3 operator/(float3 a, float s)
 359 {
 360     float inv = 1.0f / s;
 361     return a * inv;
 362 }
 363 inline __host__ __device__ float3 operator/(float s, float3 a)
 364 {
 365     float inv = 1.0f / s;
 366     return a * inv;
 367 }
 368 inline __host__ __device__ void operator/=(float3 &a, float s)
 369 {
 370     float inv = 1.0f / s;
 371     a *= inv;
 372 }
 373
 374 // lerp
 375 inline __device__ __host__ float3 lerp(float3 a, float3 b, float t)
 376 {
 377     return a + t*(b-a);
 378 }
 379
 380 // clamp
 381 inline __device__ __host__ float3 clamp(float3 v, float a, float b)
 382 {
 383     return make_float3(clamp(v.x, a, b), clamp(v.y, a, b), clamp(v.z, a, b));
 384 }
 385
 386 inline __device__ __host__ float3 clamp(float3 v, float3 a, float3 b)
 387 {
 388     return make_float3(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y), clamp(v.z, a.z, b.z));
 389 }
 390
 391 // dot product
 392 inline __host__ __device__ float dot(float3 a, float3 b)
 393 {
 394     return a.x * b.x + a.y * b.y + a.z * b.z;
 395 }
 396
 397 // cross product
 398 inline __host__ __device__ float3 cross(float3 a, float3 b)
 399 {
 400     return make_float3(a.y*b.z - a.z*b.y, a.z*b.x - a.x*b.z, a.x*b.y - a.y*b.x);
 401 }
 402
 403 // length
 404 inline __host__ __device__ float length(float3 v)
 405 {
 406     return sqrtf(dot(v, v));
 407 }
 408
 409 // normalize
 410 inline __host__ __device__ float3 normalize(float3 v)
 411 {
 412     float invLen = 1.0f / sqrtf(dot(v, v));
 413     return v * invLen;
 414 }
 415
 416 // floor
 417 inline __host__ __device__ float3 floor(const float3 v)
 418 {
 419     return make_float3(floor(v.x), floor(v.y), floor(v.z));
 420 }
 421
 422 // reflect
 423 inline __host__ __device__ float3 reflect(float3 i, float3 n)
 424 {
 425         return i - 2.0f * n * dot(n,i);
 426 }
 427
 428 // float4 functions
 429 ////////////////////////////////////////////////////////////////////////////////
 430
 431 // additional constructors
 432 inline __host__ __device__ float4 make_float4(float s)
 433 {
 434     return make_float4(s, s, s, s);
 435 }
 436 inline __host__ __device__ float4 make_float4(float3 a)
 437 {
 438     return make_float4(a.x, a.y, a.z, 0.0f);
 439 }
 440 inline __host__ __device__ float4 make_float4(float3 a, float w)
 441 {
 442     return make_float4(a.x, a.y, a.z, w);
 443 }
 444 inline __host__ __device__ float4 make_float4(int4 a)
 445 {
 446     return make_float4(float(a.x), float(a.y), float(a.z), float(a.w));
 447 }
 448
 449 // negate
 450 inline __host__ __device__ float4 operator-(float4 &a)
 451 {
 452     return make_float4(-a.x, -a.y, -a.z, -a.w);
 453 }
 454
 455 // min
 456 static __inline__ __host__ __device__ float4 fminf(float4 a, float4 b)
 457 {
 458         return make_float4(fminf(a.x,b.x), fminf(a.y,b.y), fminf(a.z,b.z), fminf(a.w,b.w));
 459 }
 460
 461 // max
 462 static __inline__ __host__ __device__ float4 fmaxf(float4 a, float4 b)
 463 {
 464         return make_float4(fmaxf(a.x,b.x), fmaxf(a.y,b.y), fmaxf(a.z,b.z), fmaxf(a.w,b.w));
 465 }
 466
 467 // addition
 468 inline __host__ __device__ float4 operator+(float4 a, float4 b)
 469 {
 470     return make_float4(a.x + b.x, a.y + b.y, a.z + b.z,  a.w + b.w);
 471 }
 472 inline __host__ __device__ void operator+=(float4 &a, float4 b)
 473 {
 474     a.x += b.x; a.y += b.y; a.z += b.z; a.w += b.w;
 475 }
 476
 477 // subtract
 478 inline __host__ __device__ float4 operator-(float4 a, float4 b)
 479 {
 480     return make_float4(a.x - b.x, a.y - b.y, a.z - b.z,  a.w - b.w);
 481 }
 482 inline __host__ __device__ void operator-=(float4 &a, float4 b)
 483 {
 484     a.x -= b.x; a.y -= b.y; a.z -= b.z; a.w -= b.w;
 485 }
 486
 487 // multiply
 488 inline __host__ __device__ float4 operator*(float4 a, float s)
 489 {
 490     return make_float4(a.x * s, a.y * s, a.z * s, a.w * s);
 491 }
 492 inline __host__ __device__ float4 operator*(float s, float4 a)
 493 {
 494     return make_float4(a.x * s, a.y * s, a.z * s, a.w * s);
 495 }
 496 inline __host__ __device__ void operator*=(float4 &a, float s)
 497 {
 498     a.x *= s; a.y *= s; a.z *= s; a.w *= s;
 499 }
 500
 501 // divide
 502 inline __host__ __device__ float4 operator/(float4 a, float4 b)
 503 {
 504     return make_float4(a.x / b.x, a.y / b.y, a.z / b.z, a.w / b.w);
 505 }
 506 inline __host__ __device__ float4 operator/(float4 a, float s)
 507 {
 508     float inv = 1.0f / s;
 509     return a * inv;
 510 }
 511 inline __host__ __device__ float4 operator/(float s, float4 a)
 512 {
 513     float inv = 1.0f / s;
 514     return a * inv;
 515 }
 516 inline __host__ __device__ void operator/=(float4 &a, float s)
 517 {
 518     float inv = 1.0f / s;
 519     a *= inv;
 520 }
 521
 522 // lerp
 523 inline __device__ __host__ float4 lerp(float4 a, float4 b, float t)
 524 {
 525     return a + t*(b-a);
 526 }
 527
 528 // clamp
 529 inline __device__ __host__ float4 clamp(float4 v, float a, float b)
 530 {
 531     return make_float4(clamp(v.x, a, b), clamp(v.y, a, b), clamp(v.z, a, b), clamp(v.w, a, b));
 532 }
 533
 534 inline __device__ __host__ float4 clamp(float4 v, float4 a, float4 b)
 535 {
 536     return make_float4(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y), clamp(v.z, a.z, b.z), clamp(v.w, a.w, b.w));
 537 }
 538
 539 // dot product
 540 inline __host__ __device__ float dot(float4 a, float4 b)
 541 {
 542     return a.x * b.x + a.y * b.y + a.z * b.z + a.w * b.w;
 543 }
 544
 545 // length
 546 inline __host__ __device__ float length(float4 r)
 547 {
 548     return sqrtf(dot(r, r));
 549 }
 550
 551 // normalize
 552 inline __host__ __device__ float4 normalize(float4 v)
 553 {
 554     float invLen = 1.0f / sqrtf(dot(v, v));
 555     return v * invLen;
 556 }
 557
 558 // floor
 559 inline __host__ __device__ float4 floor(const float4 v)
 560 {
 561     return make_float4(floor(v.x), floor(v.y), floor(v.z), floor(v.w));
 562 }
 563
 564 // int3 functions
 565 ////////////////////////////////////////////////////////////////////////////////
 566
 567 // additional constructors
 568 inline __host__ __device__ int3 make_int3(int s)
 569 {
 570     return make_int3(s, s, s);
 571 }
 572 inline __host__ __device__ int3 make_int3(float3 a)
 573 {
 574     return make_int3(int(a.x), int(a.y), int(a.z));
 575 }
 576
 577 // negate
 578 inline __host__ __device__ int3 operator-(int3 &a)
 579 {
 580     return make_int3(-a.x, -a.y, -a.z);
 581 }
 582
 583 // min
 584 inline __host__ __device__ int3 min(int3 a, int3 b)
 585 {
 586     return make_int3(min(a.x,b.x), min(a.y,b.y), min(a.z,b.z));
 587 }
 588
 589 // max
 590 inline __host__ __device__ int3 max(int3 a, int3 b)
 591 {
 592     return make_int3(max(a.x,b.x), max(a.y,b.y), max(a.z,b.z));
 593 }
 594
 595 // addition
 596 inline __host__ __device__ int3 operator+(int3 a, int3 b)
 597 {
 598     return make_int3(a.x + b.x, a.y + b.y, a.z + b.z);
 599 }
 600 inline __host__ __device__ void operator+=(int3 &a, int3 b)
 601 {
 602     a.x += b.x; a.y += b.y; a.z += b.z;
 603 }
 604
 605 // subtract
 606 inline __host__ __device__ int3 operator-(int3 a, int3 b)
 607 {
 608     return make_int3(a.x - b.x, a.y - b.y, a.z - b.z);
 609 }
 610
 611 inline __host__ __device__ void operator-=(int3 &a, int3 b)
 612 {
 613     a.x -= b.x; a.y -= b.y; a.z -= b.z;
 614 }
 615
 616 // multiply
 617 inline __host__ __device__ int3 operator*(int3 a, int3 b)
 618 {
 619     return make_int3(a.x * b.x, a.y * b.y, a.z * b.z);
 620 }
 621 inline __host__ __device__ int3 operator*(int3 a, int s)
 622 {
 623     return make_int3(a.x * s, a.y * s, a.z * s);
 624 }
 625 inline __host__ __device__ int3 operator*(int s, int3 a)
 626 {
 627     return make_int3(a.x * s, a.y * s, a.z * s);
 628 }
 629 inline __host__ __device__ void operator*=(int3 &a, int s)
 630 {
 631     a.x *= s; a.y *= s; a.z *= s;
 632 }
 633
 634 // divide
 635 inline __host__ __device__ int3 operator/(int3 a, int3 b)
 636 {
 637     return make_int3(a.x / b.x, a.y / b.y, a.z / b.z);
 638 }
 639 inline __host__ __device__ int3 operator/(int3 a, int s)
 640 {
 641     return make_int3(a.x / s, a.y / s, a.z / s);
 642 }
 643 inline __host__ __device__ int3 operator/(int s, int3 a)
 644 {
 645     return make_int3(a.x / s, a.y / s, a.z / s);
 646 }
 647 inline __host__ __device__ void operator/=(int3 &a, int s)
 648 {
 649     a.x /= s; a.y /= s; a.z /= s;
 650 }
 651
 652 // clamp
 653 inline __device__ __host__ int clamp(int f, int a, int b)
 654 {
 655     return max(a, min(f, b));
 656 }
 657
 658 inline __device__ __host__ int3 clamp(int3 v, int a, int b)
 659 {
 660     return make_int3(clamp(v.x, a, b), clamp(v.y, a, b), clamp(v.z, a, b));
 661 }
 662
 663 inline __device__ __host__ int3 clamp(int3 v, int3 a, int3 b)
 664 {
 665     return make_int3(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y), clamp(v.z, a.z, b.z));
 666 }
 667
 668
 669 // uint3 functions
 670 ////////////////////////////////////////////////////////////////////////////////
 671
 672 // additional constructors
 673 inline __host__ __device__ uint3 make_uint3(uint s)
 674 {
 675     return make_uint3(s, s, s);
 676 }
 677 inline __host__ __device__ uint3 make_uint3(float3 a)
 678 {
 679     return make_uint3(uint(a.x), uint(a.y), uint(a.z));
 680 }
 681
 682 // min
 683 inline __host__ __device__ uint3 min(uint3 a, uint3 b)
 684 {
 685     return make_uint3(min(a.x,b.x), min(a.y,b.y), min(a.z,b.z));
 686 }
 687
 688 // max
 689 inline __host__ __device__ uint3 max(uint3 a, uint3 b)
 690 {
 691     return make_uint3(max(a.x,b.x), max(a.y,b.y), max(a.z,b.z));
 692 }
 693
 694 // addition
 695 inline __host__ __device__ uint3 operator+(uint3 a, uint3 b)
 696 {
 697     return make_uint3(a.x + b.x, a.y + b.y, a.z + b.z);
 698 }
 699 inline __host__ __device__ void operator+=(uint3 &a, uint3 b)
 700 {
 701     a.x += b.x; a.y += b.y; a.z += b.z;
 702 }
 703
 704 // subtract
 705 inline __host__ __device__ uint3 operator-(uint3 a, uint3 b)
 706 {
 707     return make_uint3(a.x - b.x, a.y - b.y, a.z - b.z);
 708 }
 709
 710 inline __host__ __device__ void operator-=(uint3 &a, uint3 b)
 711 {
 712     a.x -= b.x; a.y -= b.y; a.z -= b.z;
 713 }
 714
 715 // multiply
 716 inline __host__ __device__ uint3 operator*(uint3 a, uint3 b)
 717 {
 718     return make_uint3(a.x * b.x, a.y * b.y, a.z * b.z);
 719 }
 720 inline __host__ __device__ uint3 operator*(uint3 a, uint s)
 721 {
 722     return make_uint3(a.x * s, a.y * s, a.z * s);
 723 }
 724 inline __host__ __device__ uint3 operator*(uint s, uint3 a)
 725 {
 726     return make_uint3(a.x * s, a.y * s, a.z * s);
 727 }
 728 inline __host__ __device__ void operator*=(uint3 &a, uint s)
 729 {
 730     a.x *= s; a.y *= s; a.z *= s;
 731 }
 732
 733 // divide
 734 inline __host__ __device__ uint3 operator/(uint3 a, uint3 b)
 735 {
 736     return make_uint3(a.x / b.x, a.y / b.y, a.z / b.z);
 737 }
 738 inline __host__ __device__ uint3 operator/(uint3 a, uint s)
 739 {
 740     return make_uint3(a.x / s, a.y / s, a.z / s);
 741 }
 742 inline __host__ __device__ uint3 operator/(uint s, uint3 a)
 743 {
 744     return make_uint3(a.x / s, a.y / s, a.z / s);
 745 }
 746 inline __host__ __device__ void operator/=(uint3 &a, uint s)
 747 {
 748     a.x /= s; a.y /= s; a.z /= s;
 749 }
 750
 751 // clamp
 752 inline __device__ __host__ uint clamp(uint f, uint a, uint b)
 753 {
 754     return max(a, min(f, b));
 755 }
 756
 757 inline __device__ __host__ uint3 clamp(uint3 v, uint a, uint b)
 758 {
 759     return make_uint3(clamp(v.x, a, b), clamp(v.y, a, b), clamp(v.z, a, b));
 760 }
 761
 762 inline __device__ __host__ uint3 clamp(uint3 v, uint3 a, uint3 b)
 763 {
 764     return make_uint3(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y), clamp(v.z, a.z, b.z));
 765 }
 766
 767 #endif