modules/core/src/opencl/arithm.cl

   1 /*M///////////////////////////////////////////////////////////////////////////////////////
   2 //
   3 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
   4 //
   5 //  By downloading, copying, installing or using the software you agree to this license.
   6 //  If you do not agree to this license, do not download, install,
   7 //  copy or use the software.
   8 //
   9 //
  10 //                           License Agreement
  11 //                For Open Source Computer Vision Library
  12 //
  13 // Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
  14 // Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
  15 // Copyright (C) 2013, OpenCV Foundation, all rights reserved.
  16 // Third party copyrights are property of their respective owners.
  17 //
  18 // @Authors
  19 //    Jia Haipeng, jiahaipeng95@gmail.com
  20 //
  21 //
  22 // Redistribution and use in source and binary forms, with or without modification,
  23 // are permitted provided that the following conditions are met:
  24 //
  25 //   * Redistribution's of source code must retain the above copyright notice,
  26 //     this list of conditions and the following disclaimer.
  27 //
  28 //   * Redistribution's in binary form must reproduce the above copyright notice,
  29 //     this list of conditions and the following disclaimer in the documentation
  30 //     and/or other materials provided with the distribution.
  31 //
  32 //   * The name of the copyright holders may not be used to endorse or promote products
  33 //     derived from this software without specific prior written permission.
  34 //
  35 // This software is provided by the copyright holders and contributors as is and
  36 // any express or implied warranties, including, but not limited to, the implied
  37 // warranties of merchantability and fitness for a particular purpose are disclaimed.
  38 // In no event shall the copyright holders or contributors be liable for any direct,
  39 // indirect, incidental, special, exemplary, or consequential damages
  40 // (including, but not limited to, procurement of substitute goods or services;
  41 // loss of use, data, or profits; or business interruption) however caused
  42 // and on any theory of liability, whether in contract, strict liability,
  43 // or tort (including negligence or otherwise) arising in any way out of
  44 // the use of this software, even if advised of the possibility of such damage.
  45 //
  46 //M*/
  47
  48 /*
  49   Usage:
  50      after compiling this program user gets a single kernel called KF.
  51      the following flags should be passed:
  52      1) one of "-D BINARY_OP", "-D UNARY_OP", "-D MASK_BINARY_OP" or "-D MASK_UNARY_OP"
  53      2) the actual operation performed, one of "-D OP_...", see below the list of operations.
  54      2a) "-D dstDepth=<destination depth> [-D cn=<num channels]"
  55          for some operations, like min/max/and/or/xor it's enough
  56      2b) "-D srcDepth1=<source1 depth> -D srcDepth2=<source2 depth> -D dstDepth=<destination depth>
  57           -D workDepth=<work depth> [-D cn=<num channels>]" - for mixed-type operations
  58 */
  59
  60 #ifdef DOUBLE_SUPPORT
  61 #ifdef cl_amd_fp64
  62 #pragma OPENCL EXTENSION cl_amd_fp64:enable
  63 #elif defined cl_khr_fp64
  64 #pragma OPENCL EXTENSION cl_khr_fp64:enable
  65 #endif
  66 #endif
  67
  68 #if depth <= 5
  69 #define CV_PI M_PI_F
  70 #else
  71 #define CV_PI M_PI
  72 #endif
  73
  74 #ifndef cn
  75 #define cn 1
  76 #endif
  77
  78 #if cn == 1
  79 #undef srcT1_C1
  80 #undef srcT2_C1
  81 #undef dstT_C1
  82 #define srcT1_C1 srcT1
  83 #define srcT2_C1 srcT2
  84 #define dstT_C1 dstT
  85 #endif
  86
  87 #if cn != 3
  88     #define storedst(val) *(__global dstT *)(dstptr + dst_index) = val
  89     #define storedst2(val) *(__global dstT *)(dstptr2 + dst_index2) = val
  90 #else
  91     #define storedst(val) vstore3(val, 0, (__global dstT_C1 *)(dstptr + dst_index))
  92     #define storedst2(val) vstore3(val, 0, (__global dstT_C1 *)(dstptr2 + dst_index2))
  93 #endif
  94
  95 #define noconvert
  96
  97 #ifndef workT
  98
  99     #ifndef srcT1
 100     #define srcT1 dstT
 101     #endif
 102
 103     #ifndef srcT1_C1
 104     #define srcT1_C1 dstT_C1
 105     #endif
 106
 107     #ifndef srcT2
 108     #define srcT2 dstT
 109     #endif
 110
 111     #ifndef srcT2_C1
 112     #define srcT2_C1 dstT_C1
 113     #endif
 114
 115     #define workT dstT
 116     #if cn != 3
 117         #define srcelem1 *(__global srcT1 *)(srcptr1 + src1_index)
 118         #define srcelem2 *(__global srcT2 *)(srcptr2 + src2_index)
 119     #else
 120         #define srcelem1 vload3(0, (__global srcT1_C1 *)(srcptr1 + src1_index))
 121         #define srcelem2 vload3(0, (__global srcT2_C1 *)(srcptr2 + src2_index))
 122     #endif
 123     #ifndef convertToDT
 124     #define convertToDT noconvert
 125     #endif
 126
 127 #else
 128
 129     #ifndef convertToWT2
 130     #define convertToWT2 convertToWT1
 131     #endif
 132     #if cn != 3
 133         #define srcelem1 convertToWT1(*(__global srcT1 *)(srcptr1 + src1_index))
 134         #define srcelem2 convertToWT2(*(__global srcT2 *)(srcptr2 + src2_index))
 135     #else
 136         #define srcelem1 convertToWT1(vload3(0, (__global srcT1_C1 *)(srcptr1 + src1_index)))
 137         #define srcelem2 convertToWT2(vload3(0, (__global srcT2_C1 *)(srcptr2 + src2_index)))
 138     #endif
 139
 140 #endif
 141
 142 #ifndef workST
 143 #define workST workT
 144 #endif
 145
 146 #define EXTRA_PARAMS
 147 #define EXTRA_INDEX
 148
 149 #if defined OP_ADD
 150 #define PROCESS_ELEM storedst(convertToDT(srcelem1 + srcelem2))
 151
 152 #elif defined OP_SUB
 153 #define PROCESS_ELEM storedst(convertToDT(srcelem1 - srcelem2))
 154
 155 #elif defined OP_RSUB
 156 #define PROCESS_ELEM storedst(convertToDT(srcelem2 - srcelem1))
 157
 158 #elif defined OP_ABSDIFF
 159 #define PROCESS_ELEM \
 160     workT v = srcelem1 - srcelem2; \
 161     storedst(convertToDT(v >= (workT)(0) ? v : -v))
 162
 163 #elif defined OP_AND
 164 #define PROCESS_ELEM storedst(srcelem1 & srcelem2)
 165
 166 #elif defined OP_OR
 167 #define PROCESS_ELEM storedst(srcelem1 | srcelem2)
 168
 169 #elif defined OP_XOR
 170 #define PROCESS_ELEM storedst(srcelem1 ^ srcelem2)
 171
 172 #elif defined OP_NOT
 173 #define PROCESS_ELEM storedst(~srcelem1)
 174
 175 #elif defined OP_MIN
 176 #define PROCESS_ELEM storedst(min(srcelem1, srcelem2))
 177
 178 #elif defined OP_MAX
 179 #define PROCESS_ELEM storedst(max(srcelem1, srcelem2))
 180
 181 #elif defined OP_MUL
 182 #define PROCESS_ELEM storedst(convertToDT(srcelem1 * srcelem2))
 183
 184 #elif defined OP_MUL_SCALE
 185 #undef EXTRA_PARAMS
 186 #ifdef UNARY_OP
 187 #define EXTRA_PARAMS , workST srcelem2_, scaleT scale
 188 #undef srcelem2
 189 #define srcelem2 srcelem2_
 190 #else
 191 #define EXTRA_PARAMS , scaleT scale
 192 #endif
 193 #define PROCESS_ELEM storedst(convertToDT(srcelem1 * scale * srcelem2))
 194
 195 #elif defined OP_DIV
 196 #define PROCESS_ELEM \
 197         workT e2 = srcelem2, zero = (workT)(0); \
 198         storedst(convertToDT(e2 != zero ? srcelem1 / e2 : zero))
 199
 200 #elif defined OP_DIV_SCALE
 201 #undef EXTRA_PARAMS
 202 #ifdef UNARY_OP
 203 #define EXTRA_PARAMS , workST srcelem2_, scaleT scale
 204 #undef srcelem2
 205 #define srcelem2 srcelem2_
 206 #else
 207 #define EXTRA_PARAMS , scaleT scale
 208 #endif
 209 #define PROCESS_ELEM \
 210         workT e2 = srcelem2, zero = (workT)(0); \
 211         storedst(convertToDT(e2 == zero ? zero : (srcelem1 * (workT)(scale) / e2)))
 212
 213 #elif defined OP_RDIV_SCALE
 214 #undef EXTRA_PARAMS
 215 #ifdef UNARY_OP
 216 #define EXTRA_PARAMS , workST srcelem2_, scaleT scale
 217 #undef srcelem2
 218 #define srcelem2 srcelem2_
 219 #else
 220 #define EXTRA_PARAMS , scaleT scale
 221 #endif
 222 #define PROCESS_ELEM \
 223         workT e1 = srcelem1, zero = (workT)(0); \
 224         storedst(convertToDT(e1 == zero ? zero : (srcelem2 * (workT)(scale) / e1)))
 225
 226 #elif defined OP_RECIP_SCALE
 227 #undef EXTRA_PARAMS
 228 #define EXTRA_PARAMS , scaleT scale
 229 #define PROCESS_ELEM \
 230         workT e1 = srcelem1, zero = (workT)(0); \
 231         storedst(convertToDT(e1 != zero ? scale / e1 : zero))
 232
 233 #elif defined OP_ADDW
 234 #undef EXTRA_PARAMS
 235 #define EXTRA_PARAMS , scaleT alpha, scaleT beta, scaleT gamma
 236 #if wdepth <= 4
 237 #define PROCESS_ELEM storedst(convertToDT(mad24(srcelem1, alpha, mad24(srcelem2, beta, gamma))))
 238 #else
 239 #define PROCESS_ELEM storedst(convertToDT(mad(srcelem1, alpha, mad(srcelem2, beta, gamma))))
 240 #endif
 241
 242 #elif defined OP_MAG
 243 #define PROCESS_ELEM storedst(hypot(srcelem1, srcelem2))
 244
 245 #elif defined OP_ABS_NOSAT
 246 #define PROCESS_ELEM \
 247     dstT v = convertToDT(srcelem1); \
 248     storedst(v >= 0 ? v : -v)
 249
 250 #elif defined OP_PHASE_RADIANS
 251 #define PROCESS_ELEM \
 252         workT tmp = atan2(srcelem2, srcelem1); \
 253         if(tmp < 0) tmp += 6.283185307179586232f; \
 254         storedst(tmp)
 255
 256 #elif defined OP_PHASE_DEGREES
 257     #define PROCESS_ELEM \
 258     workT tmp = atan2(srcelem2, srcelem1)*57.29577951308232286465f; \
 259     if(tmp < 0) tmp += 360; \
 260     storedst(tmp)
 261
 262 #elif defined OP_EXP
 263 #define PROCESS_ELEM storedst(exp(srcelem1))
 264
 265 #elif defined OP_POW
 266 #define PROCESS_ELEM storedst(pow(srcelem1, srcelem2))
 267
 268 #elif defined OP_POWN
 269 #undef workT
 270 #define workT int
 271 #define PROCESS_ELEM storedst(pown(srcelem1, srcelem2))
 272
 273 #elif defined OP_SQRT
 274 #define PROCESS_ELEM storedst(sqrt(srcelem1))
 275
 276 #elif defined OP_LOG
 277 #define PROCESS_ELEM \
 278     dstT v = (dstT)(srcelem1);\
 279     storedst(v > (dstT)(0) ? log(v) : log(-v))
 280
 281 #elif defined OP_CMP
 282 #define srcT2 srcT1
 283 #define convertToWT1
 284 #define PROCESS_ELEM storedst((dstT)(srcelem1 CMP_OPERATOR srcelem2 ? (dstT)(255) : (dstT)(0)))
 285
 286 #elif defined OP_CONVERT_SCALE_ABS
 287 #undef EXTRA_PARAMS
 288 #define EXTRA_PARAMS , workT1 alpha, workT1 beta
 289 #if wdepth <= 4
 290 #define PROCESS_ELEM \
 291     workT value = mad24(srcelem1, (workT)(alpha), (workT)(beta)); \
 292     storedst(convertToDT(value >= 0 ? value : -value))
 293 #else
 294 #define PROCESS_ELEM \
 295     workT value = mad(srcelem1, (workT)(alpha), (workT)(beta)); \
 296     storedst(convertToDT(value >= 0 ? value : -value))
 297 #endif
 298
 299 #elif defined OP_SCALE_ADD
 300 #undef EXTRA_PARAMS
 301 #define EXTRA_PARAMS , workT1 alpha
 302 #if wdepth <= 4
 303 #define PROCESS_ELEM storedst(convertToDT(mad24(srcelem1, (workT)(alpha), srcelem2)))
 304 #else
 305 #define PROCESS_ELEM storedst(convertToDT(mad(srcelem1, (workT)(alpha), srcelem2)))
 306 #endif
 307
 308 #elif defined OP_CTP_AD || defined OP_CTP_AR
 309 #if depth <= 5
 310 #define CV_EPSILON FLT_EPSILON
 311 #else
 312 #define CV_EPSILON DBL_EPSILON
 313 #endif
 314 #ifdef OP_CTP_AD
 315 #define TO_DEGREE cartToPolar *= (180 / CV_PI);
 316 #elif defined OP_CTP_AR
 317 #define TO_DEGREE
 318 #endif
 319 #define PROCESS_ELEM \
 320     dstT x = srcelem1, y = srcelem2; \
 321     dstT x2 = x * x, y2 = y * y; \
 322     dstT magnitude = sqrt(x2 + y2); \
 323     dstT tmp = y >= 0 ? 0 : CV_PI * 2; \
 324     tmp = x < 0 ? CV_PI : tmp; \
 325     dstT tmp1 = y >= 0 ? CV_PI * 0.5f : CV_PI * 1.5f; \
 326     dstT cartToPolar = y2 <= x2 ? x * y / mad((dstT)(0.28f), y2, x2 + CV_EPSILON) + tmp : (tmp1 - x * y / mad((dstT)(0.28f), x2, y2 + CV_EPSILON)); \
 327     TO_DEGREE \
 328     storedst(magnitude); \
 329     storedst2(cartToPolar)
 330
 331 #elif defined OP_PTC_AD || defined OP_PTC_AR
 332 #ifdef OP_PTC_AD
 333 #define FROM_DEGREE \
 334     dstT ascale = CV_PI/180.0f; \
 335     dstT alpha = y * ascale
 336 #else
 337 #define FROM_DEGREE \
 338     dstT alpha = y
 339 #endif
 340 #define PROCESS_ELEM \
 341     dstT x = srcelem1, y = srcelem2; \
 342     FROM_DEGREE; \
 343     storedst(cos(alpha) * x); \
 344     storedst2(sin(alpha) * x)
 345
 346 #elif defined OP_PATCH_NANS
 347 #undef EXTRA_PARAMS
 348 #define EXTRA_PARAMS , int val
 349 #define PROCESS_ELEM \
 350     if (( srcelem1 & 0x7fffffff) > 0x7f800000 ) \
 351         storedst(val)
 352
 353 #else
 354 #error "unknown op type"
 355 #endif
 356
 357 #if defined OP_CTP_AD || defined OP_CTP_AR || defined OP_PTC_AD || defined OP_PTC_AR
 358     #undef EXTRA_PARAMS
 359     #define EXTRA_PARAMS , __global uchar* dstptr2, int dststep2, int dstoffset2
 360     #undef EXTRA_INDEX
 361     #define EXTRA_INDEX int dst_index2 = mad24(y, dststep2, mad24(x, (int)sizeof(dstT_C1) * cn, dstoffset2))
 362 #endif
 363
 364 #if defined UNARY_OP || defined MASK_UNARY_OP
 365
 366 #if defined OP_AND || defined OP_OR || defined OP_XOR || defined OP_ADD || defined OP_SAT_ADD || \
 367     defined OP_SUB || defined OP_SAT_SUB || defined OP_RSUB || defined OP_SAT_RSUB || \
 368     defined OP_ABSDIFF || defined OP_CMP || defined OP_MIN || defined OP_MAX || defined OP_POW || \
 369     defined OP_MUL || defined OP_DIV || defined OP_POWN
 370     #undef EXTRA_PARAMS
 371     #define EXTRA_PARAMS , workST srcelem2_
 372     #undef srcelem2
 373     #define srcelem2 srcelem2_
 374 #endif
 375
 376 #if cn == 3
 377 #undef srcelem2
 378 #define srcelem2 (workT)(srcelem2_.x, srcelem2_.y, srcelem2_.z)
 379 #endif
 380
 381 #endif
 382
 383 #if defined BINARY_OP
 384
 385 __kernel void KF(__global const uchar * srcptr1, int srcstep1, int srcoffset1,
 386                  __global const uchar * srcptr2, int srcstep2, int srcoffset2,
 387                  __global uchar * dstptr, int dststep, int dstoffset,
 388                  int rows, int cols EXTRA_PARAMS )
 389 {
 390     int x = get_global_id(0);
 391     int y = get_global_id(1);
 392
 393     if (x < cols && y < rows)
 394     {
 395         int src1_index = mad24(y, srcstep1, mad24(x, (int)sizeof(srcT1_C1) * cn, srcoffset1));
 396 #if !(defined(OP_RECIP_SCALE) || defined(OP_NOT))
 397         int src2_index = mad24(y, srcstep2, mad24(x, (int)sizeof(srcT2_C1) * cn, srcoffset2));
 398 #endif
 399         int dst_index  = mad24(y, dststep, mad24(x, (int)sizeof(dstT_C1) * cn, dstoffset));
 400         EXTRA_INDEX;
 401
 402         PROCESS_ELEM;
 403     }
 404 }
 405
 406 #elif defined MASK_BINARY_OP
 407
 408 __kernel void KF(__global const uchar * srcptr1, int srcstep1, int srcoffset1,
 409                  __global const uchar * srcptr2, int srcstep2, int srcoffset2,
 410                  __global const uchar * mask, int maskstep, int maskoffset,
 411                  __global uchar * dstptr, int dststep, int dstoffset,
 412                  int rows, int cols EXTRA_PARAMS )
 413 {
 414     int x = get_global_id(0);
 415     int y = get_global_id(1);
 416
 417     if (x < cols && y < rows)
 418     {
 419         int mask_index = mad24(y, maskstep, x + maskoffset);
 420         if( mask[mask_index] )
 421         {
 422             int src1_index = mad24(y, srcstep1, mad24(x, (int)sizeof(srcT1_C1) * cn, srcoffset1));
 423             int src2_index = mad24(y, srcstep2, mad24(x, (int)sizeof(srcT2_C1) * cn, srcoffset2));
 424             int dst_index  = mad24(y, dststep, mad24(x, (int)sizeof(dstT_C1) * cn, dstoffset));
 425
 426             PROCESS_ELEM;
 427         }
 428     }
 429 }
 430
 431 #elif defined UNARY_OP
 432
 433 __kernel void KF(__global const uchar * srcptr1, int srcstep1, int srcoffset1,
 434                  __global uchar * dstptr, int dststep, int dstoffset,
 435                  int rows, int cols EXTRA_PARAMS )
 436 {
 437     int x = get_global_id(0);
 438     int y = get_global_id(1);
 439
 440     if (x < cols && y < rows)
 441     {
 442         int src1_index = mad24(y, srcstep1, mad24(x, (int)sizeof(srcT1_C1) * cn, srcoffset1));
 443         int dst_index  = mad24(y, dststep, mad24(x, (int)sizeof(dstT_C1) * cn, dstoffset));
 444
 445         PROCESS_ELEM;
 446     }
 447 }
 448
 449 #elif defined MASK_UNARY_OP
 450
 451 __kernel void KF(__global const uchar * srcptr1, int srcstep1, int srcoffset1,
 452                  __global const uchar * mask, int maskstep, int maskoffset,
 453                  __global uchar * dstptr, int dststep, int dstoffset,
 454                  int rows, int cols EXTRA_PARAMS )
 455 {
 456     int x = get_global_id(0);
 457     int y = get_global_id(1);
 458
 459     if (x < cols && y < rows)
 460     {
 461         int mask_index = mad24(y, maskstep, x + maskoffset);
 462         if( mask[mask_index] )
 463         {
 464             int src1_index = mad24(y, srcstep1, mad24(x, (int)sizeof(srcT1_C1) * cn, srcoffset1));
 465             int dst_index  = mad24(y, dststep, mad24(x, (int)sizeof(dstT_C1) * cn, dstoffset));
 466
 467             PROCESS_ELEM;
 468         }
 469     }
 470 }
 471
 472 #else
 473
 474 #error "Unknown operation type"
 475
 476 #endif