modules/ocl/include/opencv2/ocl.hpp

   1 // This file is part of OpenCV project.
   2 // It is subject to the license terms in the LICENSE file found in the top-level directory
   3 // of this distribution and at http://opencv.org/license.html.
   4
   5 // Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
   6 // Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
   7 // Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
   8 // Third party copyrights are property of their respective owners.
   9
  10 #ifndef __OPENCV_OCL_HPP__
  11 #define __OPENCV_OCL_HPP__
  12
  13 #include <memory>
  14 #include <vector>
  15
  16 #include "opencv2/core.hpp"
  17 #include "opencv2/imgproc.hpp"
  18 #include "opencv2/objdetect.hpp"
  19 #include "opencv2/ml.hpp"
  20
  21 namespace cv
  22 {
  23     namespace ocl
  24     {
  25         enum DeviceType
  26         {
  27             CVCL_DEVICE_TYPE_DEFAULT     = (1 << 0),
  28             CVCL_DEVICE_TYPE_CPU         = (1 << 1),
  29             CVCL_DEVICE_TYPE_GPU         = (1 << 2),
  30             CVCL_DEVICE_TYPE_ACCELERATOR = (1 << 3),
  31             //CVCL_DEVICE_TYPE_CUSTOM      = (1 << 4)
  32             CVCL_DEVICE_TYPE_ALL         = 0xFFFFFFFF
  33         };
  34
  35         enum DevMemRW
  36         {
  37             DEVICE_MEM_R_W = 0,
  38             DEVICE_MEM_R_ONLY,
  39             DEVICE_MEM_W_ONLY
  40         };
  41
  42         enum DevMemType
  43         {
  44             DEVICE_MEM_DEFAULT = 0,
  45             DEVICE_MEM_AHP,         //alloc host pointer
  46             DEVICE_MEM_UHP,         //use host pointer
  47             DEVICE_MEM_CHP,         //copy host pointer
  48             DEVICE_MEM_PM           //persistent memory
  49         };
  50
  51         // these classes contain OpenCL runtime information
  52
  53         struct PlatformInfo;
  54
  55         struct DeviceInfo
  56         {
  57         public:
  58             int _id; // reserved, don't use it
  59
  60             DeviceType deviceType;
  61             std::string deviceProfile;
  62             std::string deviceVersion;
  63             std::string deviceName;
  64             std::string deviceVendor;
  65             int deviceVendorId;
  66             std::string deviceDriverVersion;
  67             std::string deviceExtensions;
  68
  69             size_t maxWorkGroupSize;
  70             std::vector<size_t> maxWorkItemSizes;
  71             int maxComputeUnits;
  72             size_t localMemorySize;
  73             size_t maxMemAllocSize;
  74
  75             int deviceVersionMajor;
  76             int deviceVersionMinor;
  77
  78             bool haveDoubleSupport;
  79             bool isUnifiedMemory; // 1 means integrated GPU, otherwise this value is 0
  80             bool isIntelDevice;
  81
  82             std::string compilationExtraOptions;
  83
  84             const PlatformInfo* platform;
  85
  86             DeviceInfo();
  87         };
  88
  89         struct PlatformInfo
  90         {
  91             int _id; // reserved, don't use it
  92
  93             std::string platformProfile;
  94             std::string platformVersion;
  95             std::string platformName;
  96             std::string platformVendor;
  97             std::string platformExtensons;
  98
  99             int platformVersionMajor;
 100             int platformVersionMinor;
 101
 102             std::vector<const DeviceInfo*> devices;
 103
 104             PlatformInfo();
 105         };
 106
 107         //////////////////////////////// Initialization & Info ////////////////////////
 108         typedef std::vector<const PlatformInfo*> PlatformsInfo;
 109
 110         CV_EXPORTS int getOpenCLPlatforms(PlatformsInfo& platforms);
 111
 112         typedef std::vector<const DeviceInfo*> DevicesInfo;
 113
 114         CV_EXPORTS int getOpenCLDevices(DevicesInfo& devices, int deviceType = CVCL_DEVICE_TYPE_GPU,
 115                 const PlatformInfo* platform = NULL);
 116
 117         // set device you want to use
 118         CV_EXPORTS void setDevice(const DeviceInfo* info);
 119
 120         enum FEATURE_TYPE
 121         {
 122             FEATURE_CL_DOUBLE = 1,
 123             FEATURE_CL_UNIFIED_MEM,
 124             FEATURE_CL_VER_1_2,
 125             FEATURE_CL_INTEL_DEVICE
 126         };
 127
 128         // Represents OpenCL context, interface
 129         class CV_EXPORTS Context
 130         {
 131         protected:
 132             Context() { }
 133             ~Context() { }
 134         public:
 135             static Context *getContext();
 136
 137             bool supportsFeature(FEATURE_TYPE featureType) const;
 138             const DeviceInfo& getDeviceInfo() const;
 139
 140             const void* getOpenCLContextPtr() const;
 141             const void* getOpenCLCommandQueuePtr() const;
 142             const void* getOpenCLDeviceIDPtr() const;
 143         };
 144
 145         inline const void *getClContextPtr()
 146         {
 147             return Context::getContext()->getOpenCLContextPtr();
 148         }
 149
 150         inline const void *getClCommandQueuePtr()
 151         {
 152             return Context::getContext()->getOpenCLCommandQueuePtr();
 153         }
 154
 155         CV_EXPORTS bool supportsFeature(FEATURE_TYPE featureType);
 156
 157         CV_EXPORTS void finish();
 158
 159         enum BINARY_CACHE_MODE
 160         {
 161             CACHE_NONE    = 0,        // do not cache OpenCL binary
 162             CACHE_DEBUG   = 0x1 << 0, // cache OpenCL binary when built in debug mode
 163             CACHE_RELEASE = 0x1 << 1, // default behavior, only cache when built in release mode
 164             CACHE_ALL     = CACHE_DEBUG | CACHE_RELEASE, // cache opencl binary
 165         };
 166         //! Enable or disable OpenCL program binary caching onto local disk
 167         // After a program (*.cl files in opencl/ folder) is built at runtime, we allow the
 168         // compiled OpenCL program to be cached to the path automatically as "path/*.clb"
 169         // binary file, which will be reused when the OpenCV executable is started again.
 170         //
 171         // This feature is enabled by default.
 172         CV_EXPORTS void setBinaryDiskCache(int mode = CACHE_RELEASE, cv::String path = "./");
 173
 174         //! set where binary cache to be saved to
 175         CV_EXPORTS void setBinaryPath(const char *path);
 176
 177         struct ProgramSource
 178         {
 179             const char* name;
 180             const char* programStr;
 181             const char* programHash;
 182
 183             // Cache in memory by name (should be unique). Caching on disk disabled.
 184             inline ProgramSource(const char* _name, const char* _programStr)
 185                 : name(_name), programStr(_programStr), programHash(NULL)
 186             {
 187             }
 188
 189             // Cache in memory by name (should be unique). Caching on disk uses programHash mark.
 190             inline ProgramSource(const char* _name, const char* _programStr, const char* _programHash)
 191                 : name(_name), programStr(_programStr), programHash(_programHash)
 192             {
 193             }
 194         };
 195
 196         //! Calls OpenCL kernel. Pass globalThreads = NULL, and cleanUp = true, to finally clean-up without executing.
 197         //! Deprecated, will be replaced
 198         CV_EXPORTS void openCLExecuteKernelInterop(Context *clCxt,
 199                 const cv::ocl::ProgramSource& source, String kernelName,
 200                 size_t globalThreads[3], size_t localThreads[3],
 201                 std::vector< std::pair<size_t, const void *> > &args,
 202                 int channels, int depth, const char *build_options);
 203
 204         class CV_EXPORTS oclMatExpr;
 205         //////////////////////////////// oclMat ////////////////////////////////
 206         class CV_EXPORTS oclMat
 207         {
 208         public:
 209             //! default constructor
 210             oclMat();
 211             //! constructs oclMatrix of the specified size and type (_type is CV_8UC1, CV_64FC3, CV_32SC(12) etc.)
 212             oclMat(int rows, int cols, int type);
 213             oclMat(Size size, int type);
 214             //! constucts oclMatrix and fills it with the specified value _s.
 215             oclMat(int rows, int cols, int type, const Scalar &s);
 216             oclMat(Size size, int type, const Scalar &s);
 217             //! copy constructor
 218             oclMat(const oclMat &m);
 219
 220             //! constructor for oclMatrix headers pointing to user-allocated data
 221             oclMat(int rows, int cols, int type, void *data, size_t step = Mat::AUTO_STEP);
 222             oclMat(Size size, int type, void *data, size_t step = Mat::AUTO_STEP);
 223
 224             //! creates a matrix header for a part of the bigger matrix
 225             oclMat(const oclMat &m, const Range &rowRange, const Range &colRange);
 226             oclMat(const oclMat &m, const Rect &roi);
 227
 228             //! builds oclMat from Mat. Perfom blocking upload to device.
 229             explicit oclMat (const Mat &m);
 230
 231             //! destructor - calls release()
 232             ~oclMat();
 233
 234             //! assignment operators
 235             oclMat &operator = (const oclMat &m);
 236             //! assignment operator. Perfom blocking upload to device.
 237             oclMat &operator = (const Mat &m);
 238             oclMat &operator = (const oclMatExpr& expr);
 239
 240             //! pefroms blocking upload data to oclMat.
 241             void upload(const cv::Mat &m);
 242
 243
 244             //! downloads data from device to host memory. Blocking calls.
 245             operator Mat() const;
 246             void download(cv::Mat &m) const;
 247
 248             //! convert to _InputArray
 249             operator _InputArray();
 250
 251             //! convert to _OutputArray
 252             operator _OutputArray();
 253
 254             //! returns a new oclMatrix header for the specified row
 255             oclMat row(int y) const;
 256             //! returns a new oclMatrix header for the specified column
 257             oclMat col(int x) const;
 258             //! ... for the specified row span
 259             oclMat rowRange(int startrow, int endrow) const;
 260             oclMat rowRange(const Range &r) const;
 261             //! ... for the specified column span
 262             oclMat colRange(int startcol, int endcol) const;
 263             oclMat colRange(const Range &r) const;
 264
 265             //! returns deep copy of the oclMatrix, i.e. the data is copied
 266             oclMat clone() const;
 267
 268             //! copies those oclMatrix elements to "m" that are marked with non-zero mask elements.
 269             // It calls m.create(this->size(), this->type()).
 270             // It supports any data type
 271             void copyTo( oclMat &m, const oclMat &mask = oclMat()) const;
 272
 273             //! converts oclMatrix to another datatype with optional scalng. See cvConvertScale.
 274             void convertTo( oclMat &m, int rtype, double alpha = 1, double beta = 0 ) const;
 275
 276             void assignTo( oclMat &m, int type = -1 ) const;
 277
 278             //! sets every oclMatrix element to s
 279             oclMat& operator = (const Scalar &s);
 280             //! sets some of the oclMatrix elements to s, according to the mask
 281             oclMat& setTo(const Scalar &s, const oclMat &mask = oclMat());
 282             //! creates alternative oclMatrix header for the same data, with different
 283             // number of channels and/or different number of rows. see cvReshape.
 284             oclMat reshape(int cn, int rows = 0) const;
 285
 286             //! allocates new oclMatrix data unless the oclMatrix already has specified size and type.
 287             // previous data is unreferenced if needed.
 288             void create(int rows, int cols, int type);
 289             void create(Size size, int type);
 290
 291             //! allocates new oclMatrix with specified device memory type.
 292             void createEx(int rows, int cols, int type,
 293                           DevMemRW rw_type, DevMemType mem_type);
 294             void createEx(Size size, int type, DevMemRW rw_type,
 295                           DevMemType mem_type);
 296
 297             //! decreases reference counter;
 298             // deallocate the data when reference counter reaches 0.
 299             void release();
 300
 301             //! swaps with other smart pointer
 302             void swap(oclMat &mat);
 303
 304             //! locates oclMatrix header within a parent oclMatrix. See below
 305             void locateROI( Size &wholeSize, Point &ofs ) const;
 306             //! moves/resizes the current oclMatrix ROI inside the parent oclMatrix.
 307             oclMat& adjustROI( int dtop, int dbottom, int dleft, int dright );
 308             //! extracts a rectangular sub-oclMatrix
 309             // (this is a generalized form of row, rowRange etc.)
 310             oclMat operator()( Range rowRange, Range colRange ) const;
 311             oclMat operator()( const Rect &roi ) const;
 312
 313             oclMat& operator+=( const oclMat& m );
 314             oclMat& operator-=( const oclMat& m );
 315             oclMat& operator*=( const oclMat& m );
 316             oclMat& operator/=( const oclMat& m );
 317
 318             //! returns true if the oclMatrix data is continuous
 319             // (i.e. when there are no gaps between successive rows).
 320             // similar to CV_IS_oclMat_CONT(cvoclMat->type)
 321             bool isContinuous() const;
 322             //! returns element size in bytes,
 323             // similar to CV_ELEM_SIZE(cvMat->type)
 324             size_t elemSize() const;
 325             //! returns the size of element channel in bytes.
 326             size_t elemSize1() const;
 327             //! returns element type, similar to CV_MAT_TYPE(cvMat->type)
 328             int type() const;
 329             //! returns element type, i.e. 8UC3 returns 8UC4 because in ocl
 330             //! 3 channels element actually use 4 channel space
 331             int ocltype() const;
 332             //! returns element type, similar to CV_MAT_DEPTH(cvMat->type)
 333             int depth() const;
 334             //! returns element type, similar to CV_MAT_CN(cvMat->type)
 335             int channels() const;
 336             //! returns element type, return 4 for 3 channels element,
 337             //!becuase 3 channels element actually use 4 channel space
 338             int oclchannels() const;
 339             //! returns step/elemSize1()
 340             size_t step1() const;
 341             //! returns oclMatrix size:
 342             // width == number of columns, height == number of rows
 343             Size size() const;
 344             //! returns true if oclMatrix data is NULL
 345             bool empty() const;
 346
 347             //! matrix transposition
 348             oclMat t() const;
 349
 350             /*! includes several bit-fields:
 351               - the magic signature
 352               - continuity flag
 353               - depth
 354               - number of channels
 355               */
 356             int flags;
 357             //! the number of rows and columns
 358             int rows, cols;
 359             //! a distance between successive rows in bytes; includes the gap if any
 360             size_t step;
 361             //! pointer to the data(OCL memory object)
 362             uchar *data;
 363
 364             //! pointer to the reference counter;
 365             // when oclMatrix points to user-allocated data, the pointer is NULL
 366             int *refcount;
 367
 368             //! helper fields used in locateROI and adjustROI
 369             //datastart and dataend are not used in current version
 370             uchar *datastart;
 371             uchar *dataend;
 372
 373             //! OpenCL context associated with the oclMat object.
 374             Context *clCxt; // TODO clCtx
 375             //add offset for handle ROI, calculated in byte
 376             int offset;
 377             //add wholerows and wholecols for the whole matrix, datastart and dataend are no longer used
 378             int wholerows;
 379             int wholecols;
 380         };
 381
 382         // convert InputArray/OutputArray to oclMat references
 383         CV_EXPORTS oclMat& getOclMatRef(InputArray src);
 384         CV_EXPORTS oclMat& getOclMatRef(OutputArray src);
 385
 386         ///////////////////// mat split and merge /////////////////////////////////
 387         //! Compose a multi-channel array from several single-channel arrays
 388         // Support all types
 389         CV_EXPORTS void merge(const oclMat *src, size_t n, oclMat &dst);
 390         CV_EXPORTS void merge(const std::vector<oclMat> &src, oclMat &dst);
 391
 392         //! Divides multi-channel array into several single-channel arrays
 393         // Support all types
 394         CV_EXPORTS void split(const oclMat &src, oclMat *dst);
 395         CV_EXPORTS void split(const oclMat &src, std::vector<oclMat> &dst);
 396
 397         ////////////////////////////// Arithmetics ///////////////////////////////////
 398
 399         //! adds one matrix to another with scale (dst = src1 * alpha + src2 * beta + gama)
 400         // supports all data types
 401         CV_EXPORTS void addWeighted(const oclMat &src1, double  alpha, const oclMat &src2, double beta, double gama, oclMat &dst);
 402
 403         //! adds one matrix to another (dst = src1 + src2)
 404         // supports all data types
 405         CV_EXPORTS void add(const oclMat &src1, const oclMat &src2, oclMat &dst, const oclMat &mask = oclMat());
 406         //! adds scalar to a matrix (dst = src1 + s)
 407         // supports all data types
 408         CV_EXPORTS void add(const oclMat &src1, const Scalar &s, oclMat &dst, const oclMat &mask = oclMat());
 409
 410         //! subtracts one matrix from another (dst = src1 - src2)
 411         // supports all data types
 412         CV_EXPORTS void subtract(const oclMat &src1, const oclMat &src2, oclMat &dst, const oclMat &mask = oclMat());
 413         //! subtracts scalar from a matrix (dst = src1 - s)
 414         // supports all data types
 415         CV_EXPORTS void subtract(const oclMat &src1, const Scalar &s, oclMat &dst, const oclMat &mask = oclMat());
 416
 417         //! computes element-wise product of the two arrays (dst = src1 * scale * src2)
 418         // supports all data types
 419         CV_EXPORTS void multiply(const oclMat &src1, const oclMat &src2, oclMat &dst, double scale = 1);
 420         //! multiplies matrix to a number (dst = scalar * src)
 421         // supports all data types
 422         CV_EXPORTS void multiply(double scalar, const oclMat &src, oclMat &dst);
 423
 424         //! computes element-wise quotient of the two arrays (dst = src1 * scale / src2)
 425         // supports all data types
 426         CV_EXPORTS void divide(const oclMat &src1, const oclMat &src2, oclMat &dst, double scale = 1);
 427         //! computes element-wise quotient of the two arrays (dst = scale / src)
 428         // supports all data types
 429         CV_EXPORTS void divide(double scale, const oclMat &src1, oclMat &dst);
 430
 431         //! computes element-wise minimum of the two arrays (dst = min(src1, src2))
 432         // supports all data types
 433         CV_EXPORTS void min(const oclMat &src1, const oclMat &src2, oclMat &dst);
 434
 435         //! computes element-wise maximum of the two arrays (dst = max(src1, src2))
 436         // supports all data types
 437         CV_EXPORTS void max(const oclMat &src1, const oclMat &src2, oclMat &dst);
 438
 439         //! compares elements of two arrays (dst = src1 <cmpop> src2)
 440         // supports all data types
 441         CV_EXPORTS void compare(const oclMat &src1, const oclMat &src2, oclMat &dst, int cmpop);
 442
 443         //! transposes the matrix
 444         // supports all data types
 445         CV_EXPORTS void transpose(const oclMat &src, oclMat &dst);
 446
 447         //! computes element-wise absolute values of an array (dst = abs(src))
 448         // supports all data types
 449         CV_EXPORTS void abs(const oclMat &src, oclMat &dst);
 450
 451         //! computes element-wise absolute difference of two arrays (dst = abs(src1 - src2))
 452         // supports all data types
 453         CV_EXPORTS void absdiff(const oclMat &src1, const oclMat &src2, oclMat &dst);
 454         //! computes element-wise absolute difference of array and scalar (dst = abs(src1 - s))
 455         // supports all data types
 456         CV_EXPORTS void absdiff(const oclMat &src1, const Scalar &s, oclMat &dst);
 457
 458         //! computes mean value and standard deviation of all or selected array elements
 459         // supports all data types
 460         CV_EXPORTS void meanStdDev(const oclMat &mtx, Scalar &mean, Scalar &stddev);
 461
 462         //! computes norm of array
 463         // supports NORM_INF, NORM_L1, NORM_L2
 464         // supports all data types
 465         CV_EXPORTS double norm(const oclMat &src1, int normType = NORM_L2);
 466
 467         //! computes norm of the difference between two arrays
 468         // supports NORM_INF, NORM_L1, NORM_L2
 469         // supports all data types
 470         CV_EXPORTS double norm(const oclMat &src1, const oclMat &src2, int normType = NORM_L2);
 471
 472         //! reverses the order of the rows, columns or both in a matrix
 473         // supports all types
 474         CV_EXPORTS void flip(const oclMat &src, oclMat &dst, int flipCode);
 475
 476         //! computes sum of array elements
 477         // support all types
 478         CV_EXPORTS Scalar sum(const oclMat &m);
 479         CV_EXPORTS Scalar absSum(const oclMat &m);
 480         CV_EXPORTS Scalar sqrSum(const oclMat &m);
 481
 482         //! finds global minimum and maximum array elements and returns their values
 483         // support all C1 types
 484         CV_EXPORTS void minMax(const oclMat &src, double *minVal, double *maxVal = 0, const oclMat &mask = oclMat());
 485
 486         //! finds global minimum and maximum array elements and returns their values with locations
 487         // support all C1 types
 488         CV_EXPORTS void minMaxLoc(const oclMat &src, double *minVal, double *maxVal = 0, Point *minLoc = 0, Point *maxLoc = 0,
 489                                   const oclMat &mask = oclMat());
 490
 491         //! counts non-zero array elements
 492         // support all types
 493         CV_EXPORTS int countNonZero(const oclMat &src);
 494
 495         //! transforms 8-bit unsigned integers using lookup table: dst(i)=lut(src(i))
 496         // destination array will have the depth type as lut and the same channels number as source
 497         //It supports 8UC1 8UC4 only
 498         CV_EXPORTS void LUT(const oclMat &src, const oclMat &lut, oclMat &dst);
 499
 500         //! only 8UC1 and 256 bins is supported now
 501         CV_EXPORTS void calcHist(const oclMat &mat_src, oclMat &mat_hist);
 502         //! only 8UC1 and 256 bins is supported now
 503         CV_EXPORTS void equalizeHist(const oclMat &mat_src, oclMat &mat_dst);
 504
 505         //! only 8UC1 is supported now
 506         CV_EXPORTS Ptr<cv::CLAHE> createCLAHE(double clipLimit = 40.0, Size tileGridSize = Size(8, 8));
 507
 508         //! bilateralFilter
 509         // supports 8UC1 8UC4
 510         CV_EXPORTS void bilateralFilter(const oclMat& src, oclMat& dst, int d, double sigmaColor, double sigmaSpace, int borderType=BORDER_DEFAULT);
 511
 512         //! Applies an adaptive bilateral filter to the input image
 513         //  Unlike the usual bilateral filter that uses fixed value for sigmaColor,
 514         //  the adaptive version calculates the local variance in he ksize neighborhood
 515         //  and use this as sigmaColor, for the value filtering. However, the local standard deviation is
 516         //  clamped to the maxSigmaColor.
 517         //  supports 8UC1, 8UC3
 518         CV_EXPORTS void adaptiveBilateralFilter(const oclMat& src, oclMat& dst, Size ksize, double sigmaSpace, double maxSigmaColor=20.0, Point anchor = Point(-1, -1), int borderType=BORDER_DEFAULT);
 519
 520         //! computes exponent of each matrix element (dst = e**src)
 521         // supports only CV_32FC1, CV_64FC1 type
 522         CV_EXPORTS void exp(const oclMat &src, oclMat &dst);
 523
 524         //! computes natural logarithm of absolute value of each matrix element: dst = log(abs(src))
 525         // supports only CV_32FC1, CV_64FC1 type
 526         CV_EXPORTS void log(const oclMat &src, oclMat &dst);
 527
 528         //! computes square root of each matrix element
 529         // supports only CV_32FC1, CV_64FC1 type
 530         CV_EXPORTS void sqrt(const oclMat &src, oclMat &dst);
 531
 532         //! computes magnitude of each (x(i), y(i)) vector
 533         // supports only CV_32F, CV_64F type
 534         CV_EXPORTS void magnitude(const oclMat &x, const oclMat &y, oclMat &magnitude);
 535
 536         //! computes angle (angle(i)) of each (x(i), y(i)) vector
 537         // supports only CV_32F, CV_64F type
 538         CV_EXPORTS void phase(const oclMat &x, const oclMat &y, oclMat &angle, bool angleInDegrees = false);
 539
 540         //! the function raises every element of tne input array to p
 541         // support only CV_32F, CV_64F type
 542         CV_EXPORTS void pow(const oclMat &x, double p, oclMat &y);
 543
 544         //! converts Cartesian coordinates to polar
 545         // supports only CV_32F CV_64F type
 546         CV_EXPORTS void cartToPolar(const oclMat &x, const oclMat &y, oclMat &magnitude, oclMat &angle, bool angleInDegrees = false);
 547
 548         //! converts polar coordinates to Cartesian
 549         // supports only CV_32F CV_64F type
 550         CV_EXPORTS void polarToCart(const oclMat &magnitude, const oclMat &angle, oclMat &x, oclMat &y, bool angleInDegrees = false);
 551
 552         //! perfroms per-elements bit-wise inversion
 553         // supports all types
 554         CV_EXPORTS void bitwise_not(const oclMat &src, oclMat &dst);
 555
 556         //! calculates per-element bit-wise disjunction of two arrays
 557         // supports all types
 558         CV_EXPORTS void bitwise_or(const oclMat &src1, const oclMat &src2, oclMat &dst, const oclMat &mask = oclMat());
 559         CV_EXPORTS void bitwise_or(const oclMat &src1, const Scalar &s, oclMat &dst, const oclMat &mask = oclMat());
 560
 561         //! calculates per-element bit-wise conjunction of two arrays
 562         // supports all types
 563         CV_EXPORTS void bitwise_and(const oclMat &src1, const oclMat &src2, oclMat &dst, const oclMat &mask = oclMat());
 564         CV_EXPORTS void bitwise_and(const oclMat &src1, const Scalar &s, oclMat &dst, const oclMat &mask = oclMat());
 565
 566         //! calculates per-element bit-wise "exclusive or" operation
 567         // supports all types
 568         CV_EXPORTS void bitwise_xor(const oclMat &src1, const oclMat &src2, oclMat &dst, const oclMat &mask = oclMat());
 569         CV_EXPORTS void bitwise_xor(const oclMat &src1, const Scalar &s, oclMat &dst, const oclMat &mask = oclMat());
 570
 571         //! Logical operators
 572         CV_EXPORTS oclMat operator ~ (const oclMat &);
 573         CV_EXPORTS oclMat operator | (const oclMat &, const oclMat &);
 574         CV_EXPORTS oclMat operator & (const oclMat &, const oclMat &);
 575         CV_EXPORTS oclMat operator ^ (const oclMat &, const oclMat &);
 576
 577
 578         //! Mathematics operators
 579         CV_EXPORTS oclMatExpr operator + (const oclMat &src1, const oclMat &src2);
 580         CV_EXPORTS oclMatExpr operator - (const oclMat &src1, const oclMat &src2);
 581         CV_EXPORTS oclMatExpr operator * (const oclMat &src1, const oclMat &src2);
 582         CV_EXPORTS oclMatExpr operator / (const oclMat &src1, const oclMat &src2);
 583
 584         struct CV_EXPORTS ConvolveBuf
 585         {
 586             Size result_size;
 587             Size block_size;
 588             Size user_block_size;
 589             Size dft_size;
 590
 591             oclMat image_spect, templ_spect, result_spect;
 592             oclMat image_block, templ_block, result_data;
 593
 594             void create(Size image_size, Size templ_size);
 595             static Size estimateBlockSize(Size result_size, Size templ_size);
 596         };
 597
 598         //! computes convolution of two images, may use discrete Fourier transform
 599         // support only CV_32FC1 type
 600         CV_EXPORTS void convolve(const oclMat &image, const oclMat &temp1, oclMat &result, bool ccorr = false);
 601         CV_EXPORTS void convolve(const oclMat &image, const oclMat &temp1, oclMat &result, bool ccorr, ConvolveBuf& buf);
 602
 603         //! Performs a per-element multiplication of two Fourier spectrums.
 604         //! Only full (not packed) CV_32FC2 complex spectrums in the interleaved format are supported for now.
 605         //! support only CV_32FC2 type
 606         CV_EXPORTS void mulSpectrums(const oclMat &a, const oclMat &b, oclMat &c, int flags, float scale, bool conjB = false);
 607
 608         CV_EXPORTS void cvtColor(const oclMat &src, oclMat &dst, int code, int dcn = 0);
 609
 610         //! initializes a scaled identity matrix
 611         CV_EXPORTS void setIdentity(oclMat& src, const Scalar & val = Scalar(1));
 612
 613         //! fills the output array with repeated copies of the input array
 614         CV_EXPORTS void repeat(const oclMat & src, int ny, int nx, oclMat & dst);
 615
 616         //////////////////////////////// Filter Engine ////////////////////////////////
 617
 618         /*!
 619           The Base Class for 1D or Row-wise Filters
 620
 621           This is the base class for linear or non-linear filters that process 1D data.
 622           In particular, such filters are used for the "horizontal" filtering parts in separable filters.
 623           */
 624         class CV_EXPORTS BaseRowFilter_GPU
 625         {
 626         public:
 627             BaseRowFilter_GPU(int ksize_, int anchor_, int bordertype_) : ksize(ksize_), anchor(anchor_), bordertype(bordertype_) {}
 628             virtual ~BaseRowFilter_GPU() {}
 629             virtual void operator()(const oclMat &src, oclMat &dst) = 0;
 630             int ksize, anchor, bordertype;
 631         };
 632
 633         /*!
 634           The Base Class for Column-wise Filters
 635
 636           This is the base class for linear or non-linear filters that process columns of 2D arrays.
 637           Such filters are used for the "vertical" filtering parts in separable filters.
 638           */
 639         class CV_EXPORTS BaseColumnFilter_GPU
 640         {
 641         public:
 642             BaseColumnFilter_GPU(int ksize_, int anchor_, int bordertype_) : ksize(ksize_), anchor(anchor_), bordertype(bordertype_) {}
 643             virtual ~BaseColumnFilter_GPU() {}
 644             virtual void operator()(const oclMat &src, oclMat &dst) = 0;
 645             int ksize, anchor, bordertype;
 646         };
 647
 648         /*!
 649           The Base Class for Non-Separable 2D Filters.
 650
 651           This is the base class for linear or non-linear 2D filters.
 652           */
 653         class CV_EXPORTS BaseFilter_GPU
 654         {
 655         public:
 656             BaseFilter_GPU(const Size &ksize_, const Point &anchor_, const int &borderType_)
 657                 : ksize(ksize_), anchor(anchor_), borderType(borderType_) {}
 658             virtual ~BaseFilter_GPU() {}
 659             virtual void operator()(const oclMat &src, oclMat &dst) = 0;
 660             Size ksize;
 661             Point anchor;
 662             int borderType;
 663         };
 664
 665         /*!
 666           The Base Class for Filter Engine.
 667
 668           The class can be used to apply an arbitrary filtering operation to an image.
 669           It contains all the necessary intermediate buffers.
 670           */
 671         class CV_EXPORTS FilterEngine_GPU
 672         {
 673         public:
 674             virtual ~FilterEngine_GPU() {}
 675
 676             virtual void apply(const oclMat &src, oclMat &dst, Rect roi = Rect(0, 0, -1, -1)) = 0;
 677         };
 678
 679         //! returns the non-separable filter engine with the specified filter
 680         CV_EXPORTS Ptr<FilterEngine_GPU> createFilter2D_GPU(const Ptr<BaseFilter_GPU> filter2D);
 681
 682         //! returns the primitive row filter with the specified kernel
 683         CV_EXPORTS Ptr<BaseRowFilter_GPU> getLinearRowFilter_GPU(int srcType, int bufType, const Mat &rowKernel,
 684                 int anchor = -1, int bordertype = BORDER_DEFAULT);
 685
 686         //! returns the primitive column filter with the specified kernel
 687         CV_EXPORTS Ptr<BaseColumnFilter_GPU> getLinearColumnFilter_GPU(int bufType, int dstType, const Mat &columnKernel,
 688                 int anchor = -1, int bordertype = BORDER_DEFAULT, double delta = 0.0);
 689
 690         //! returns the separable linear filter engine
 691         CV_EXPORTS Ptr<FilterEngine_GPU> createSeparableLinearFilter_GPU(int srcType, int dstType, const Mat &rowKernel,
 692                 const Mat &columnKernel, const Point &anchor = Point(-1, -1), double delta = 0.0, int bordertype = BORDER_DEFAULT);
 693
 694         //! returns the separable filter engine with the specified filters
 695         CV_EXPORTS Ptr<FilterEngine_GPU> createSeparableFilter_GPU(const Ptr<BaseRowFilter_GPU> &rowFilter,
 696                 const Ptr<BaseColumnFilter_GPU> &columnFilter);
 697
 698         //! returns the Gaussian filter engine
 699         CV_EXPORTS Ptr<FilterEngine_GPU> createGaussianFilter_GPU(int type, Size ksize, double sigma1, double sigma2 = 0, int bordertype = BORDER_DEFAULT);
 700
 701         //! returns filter engine for the generalized Sobel operator
 702         CV_EXPORTS Ptr<FilterEngine_GPU> createDerivFilter_GPU( int srcType, int dstType, int dx, int dy, int ksize, int borderType = BORDER_DEFAULT );
 703
 704         //! applies Laplacian operator to the image
 705         // supports only ksize = 1 and ksize = 3
 706         CV_EXPORTS void Laplacian(const oclMat &src, oclMat &dst, int ddepth, int ksize = 1, double scale = 1,
 707                 double delta=0, int borderType=BORDER_DEFAULT);
 708
 709         //! returns 2D box filter
 710         // dst type must be the same as source type
 711         CV_EXPORTS Ptr<BaseFilter_GPU> getBoxFilter_GPU(int srcType, int dstType,
 712                 const Size &ksize, Point anchor = Point(-1, -1), int borderType = BORDER_DEFAULT);
 713
 714         //! returns box filter engine
 715         CV_EXPORTS Ptr<FilterEngine_GPU> createBoxFilter_GPU(int srcType, int dstType, const Size &ksize,
 716                 const Point &anchor = Point(-1, -1), int borderType = BORDER_DEFAULT);
 717
 718         //! returns 2D filter with the specified kernel
 719         // supports: dst type must be the same as source type
 720         CV_EXPORTS Ptr<BaseFilter_GPU> getLinearFilter_GPU(int srcType, int dstType, const Mat &kernel, const Size &ksize,
 721                 const Point &anchor = Point(-1, -1), int borderType = BORDER_DEFAULT);
 722
 723         //! returns the non-separable linear filter engine
 724         // supports: dst type must be the same as source type
 725         CV_EXPORTS Ptr<FilterEngine_GPU> createLinearFilter_GPU(int srcType, int dstType, const Mat &kernel,
 726                 const Point &anchor = Point(-1, -1), int borderType = BORDER_DEFAULT);
 727
 728         //! smooths the image using the normalized box filter
 729         CV_EXPORTS void boxFilter(const oclMat &src, oclMat &dst, int ddepth, Size ksize,
 730                                   Point anchor = Point(-1, -1), int borderType = BORDER_DEFAULT);
 731
 732         //! returns 2D morphological filter
 733         //! only MORPH_ERODE and MORPH_DILATE are supported
 734         // supports CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4 types
 735         // kernel must have CV_8UC1 type, one rows and cols == ksize.width * ksize.height
 736         CV_EXPORTS Ptr<BaseFilter_GPU> getMorphologyFilter_GPU(int op, int type, const Mat &kernel, const Size &ksize,
 737                 Point anchor = Point(-1, -1));
 738
 739         //! returns morphological filter engine. Only MORPH_ERODE and MORPH_DILATE are supported.
 740         CV_EXPORTS Ptr<FilterEngine_GPU> createMorphologyFilter_GPU(int op, int type, const Mat &kernel,
 741                 const Point &anchor = Point(-1, -1), int iterations = 1);
 742
 743         //! a synonym for normalized box filter
 744         static inline void blur(const oclMat &src, oclMat &dst, Size ksize, Point anchor = Point(-1, -1),
 745                                 int borderType = BORDER_CONSTANT)
 746         {
 747             boxFilter(src, dst, -1, ksize, anchor, borderType);
 748         }
 749
 750         //! applies non-separable 2D linear filter to the image
 751         CV_EXPORTS void filter2D(const oclMat &src, oclMat &dst, int ddepth, const Mat &kernel,
 752                                  Point anchor = Point(-1, -1), double delta = 0.0, int borderType = BORDER_DEFAULT);
 753
 754         //! applies separable 2D linear filter to the image
 755         CV_EXPORTS void sepFilter2D(const oclMat &src, oclMat &dst, int ddepth, const Mat &kernelX, const Mat &kernelY,
 756                                     Point anchor = Point(-1, -1), double delta = 0.0, int bordertype = BORDER_DEFAULT);
 757
 758         //! applies generalized Sobel operator to the image
 759         // dst.type must equalize src.type
 760         // supports data type: CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4
 761         // supports border type: BORDER_CONSTANT, BORDER_REPLICATE, BORDER_REFLECT,BORDER_REFLECT_101
 762         CV_EXPORTS void Sobel(const oclMat &src, oclMat &dst, int ddepth, int dx, int dy, int ksize = 3, double scale = 1, double delta = 0.0, int bordertype = BORDER_DEFAULT);
 763
 764         //! applies the vertical or horizontal Scharr operator to the image
 765         // dst.type must equalize src.type
 766         // supports data type: CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4
 767         // supports border type: BORDER_CONSTANT, BORDER_REPLICATE, BORDER_REFLECT,BORDER_REFLECT_101
 768         CV_EXPORTS void Scharr(const oclMat &src, oclMat &dst, int ddepth, int dx, int dy, double scale = 1, double delta = 0.0, int bordertype = BORDER_DEFAULT);
 769
 770         //! smooths the image using Gaussian filter.
 771         // dst.type must equalize src.type
 772         // supports data type: CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4
 773         // supports border type: BORDER_CONSTANT, BORDER_REPLICATE, BORDER_REFLECT,BORDER_REFLECT_101
 774         CV_EXPORTS void GaussianBlur(const oclMat &src, oclMat &dst, Size ksize, double sigma1, double sigma2 = 0, int bordertype = BORDER_DEFAULT);
 775
 776         //! erodes the image (applies the local minimum operator)
 777         // supports data type: CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4
 778         CV_EXPORTS void erode( const oclMat &src, oclMat &dst, const Mat &kernel, Point anchor = Point(-1, -1), int iterations = 1,
 779
 780                                int borderType = BORDER_CONSTANT, const Scalar &borderValue = morphologyDefaultBorderValue());
 781
 782
 783         //! dilates the image (applies the local maximum operator)
 784         // supports data type: CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4
 785         CV_EXPORTS void dilate( const oclMat &src, oclMat &dst, const Mat &kernel, Point anchor = Point(-1, -1), int iterations = 1,
 786
 787                                 int borderType = BORDER_CONSTANT, const Scalar &borderValue = morphologyDefaultBorderValue());
 788
 789
 790         //! applies an advanced morphological operation to the image
 791         CV_EXPORTS void morphologyEx( const oclMat &src, oclMat &dst, int op, const Mat &kernel, Point anchor = Point(-1, -1), int iterations = 1,
 792
 793                                       int borderType = BORDER_CONSTANT, const Scalar &borderValue = morphologyDefaultBorderValue());
 794
 795
 796         ////////////////////////////// Image processing //////////////////////////////
 797         //! Does mean shift filtering on GPU.
 798         CV_EXPORTS void meanShiftFiltering(const oclMat &src, oclMat &dst, int sp, int sr,
 799                                            TermCriteria criteria = TermCriteria(TermCriteria::MAX_ITER + TermCriteria::EPS, 5, 1));
 800
 801         //! Does mean shift procedure on GPU.
 802         CV_EXPORTS void meanShiftProc(const oclMat &src, oclMat &dstr, oclMat &dstsp, int sp, int sr,
 803                                       TermCriteria criteria = TermCriteria(TermCriteria::MAX_ITER + TermCriteria::EPS, 5, 1));
 804
 805         //! Does mean shift segmentation with elimiation of small regions.
 806         CV_EXPORTS void meanShiftSegmentation(const oclMat &src, Mat &dst, int sp, int sr, int minsize,
 807                                               TermCriteria criteria = TermCriteria(TermCriteria::MAX_ITER + TermCriteria::EPS, 5, 1));
 808
 809         //! applies fixed threshold to the image.
 810         // supports CV_8UC1 and CV_32FC1 data type
 811         // supports threshold type: THRESH_BINARY, THRESH_BINARY_INV, THRESH_TRUNC, THRESH_TOZERO, THRESH_TOZERO_INV
 812         CV_EXPORTS double threshold(const oclMat &src, oclMat &dst, double thresh, double maxVal, int type = THRESH_TRUNC);
 813
 814         //! resizes the image
 815         // Supports INTER_NEAREST, INTER_LINEAR
 816         // supports CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4 types
 817         CV_EXPORTS void resize(const oclMat &src, oclMat &dst, Size dsize, double fx = 0, double fy = 0, int interpolation = INTER_LINEAR);
 818
 819         //! Applies a generic geometrical transformation to an image.
 820
 821         // Supports INTER_NEAREST, INTER_LINEAR.
 822         // Map1 supports CV_16SC2, CV_32FC2  types.
 823         // Src supports CV_8UC1, CV_8UC2, CV_8UC4.
 824         CV_EXPORTS void remap(const oclMat &src, oclMat &dst, oclMat &map1, oclMat &map2, int interpolation, int bordertype, const Scalar &value = Scalar());
 825
 826         //! copies 2D array to a larger destination array and pads borders with user-specifiable constant
 827         // supports CV_8UC1, CV_8UC4, CV_32SC1 types
 828         CV_EXPORTS void copyMakeBorder(const oclMat &src, oclMat &dst, int top, int bottom, int left, int right, int boardtype, const Scalar &value = Scalar());
 829
 830         //! Smoothes image using median filter
 831         // The source 1- or 4-channel image. m should be 3 or 5, the image depth should be CV_8U or CV_32F.
 832         CV_EXPORTS void medianFilter(const oclMat &src, oclMat &dst, int m);
 833
 834         //! warps the image using affine transformation
 835         // Supports INTER_NEAREST, INTER_LINEAR, INTER_CUBIC
 836         // supports CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4 types
 837         CV_EXPORTS void warpAffine(const oclMat &src, oclMat &dst, const Mat &M, Size dsize, int flags = INTER_LINEAR);
 838
 839         //! warps the image using perspective transformation
 840         // Supports INTER_NEAREST, INTER_LINEAR, INTER_CUBIC
 841         // supports CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4 types
 842         CV_EXPORTS void warpPerspective(const oclMat &src, oclMat &dst, const Mat &M, Size dsize, int flags = INTER_LINEAR);
 843
 844         //! computes the integral image and integral for the squared image
 845         // sum will support CV_32S, CV_32F, sqsum - support CV32F, CV_64F
 846         // supports only CV_8UC1 source type
 847         CV_EXPORTS void integral(const oclMat &src, oclMat &sum, oclMat &sqsum, int sdepth=-1 );
 848         CV_EXPORTS void integral(const oclMat &src, oclMat &sum, int sdepth=-1 );
 849         CV_EXPORTS void cornerHarris(const oclMat &src, oclMat &dst, int blockSize, int ksize, double k, int bordertype = cv::BORDER_DEFAULT);
 850         CV_EXPORTS void cornerHarris_dxdy(const oclMat &src, oclMat &dst, oclMat &Dx, oclMat &Dy,
 851             int blockSize, int ksize, double k, int bordertype = cv::BORDER_DEFAULT);
 852         CV_EXPORTS void cornerMinEigenVal(const oclMat &src, oclMat &dst, int blockSize, int ksize, int bordertype = cv::BORDER_DEFAULT);
 853         CV_EXPORTS void cornerMinEigenVal_dxdy(const oclMat &src, oclMat &dst, oclMat &Dx, oclMat &Dy,
 854             int blockSize, int ksize, int bordertype = cv::BORDER_DEFAULT);
 855
 856
 857         /////////////////////////////////// ML ///////////////////////////////////////////
 858
 859         //! Compute closest centers for each lines in source and lable it after center's index
 860         // supports CV_32FC1/CV_32FC2/CV_32FC4 data type
 861         // supports NORM_L1 and NORM_L2 distType
 862         // if indices is provided, only the indexed rows will be calculated and their results are in the same
 863         // order of indices
 864         CV_EXPORTS void distanceToCenters(const oclMat &src, const oclMat &centers, Mat &dists, Mat &labels, int distType = NORM_L2SQR);
 865
 866         //!Does k-means procedure on GPU
 867         // supports CV_32FC1/CV_32FC2/CV_32FC4 data type
 868         CV_EXPORTS double kmeans(const oclMat &src, int K, oclMat &bestLabels,
 869                                      TermCriteria criteria, int attemps, int flags, oclMat &centers);
 870
 871
 872         ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 873         ///////////////////////////////////////////CascadeClassifier//////////////////////////////////////////////////////////////////
 874         ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 875         class CV_EXPORTS OclCascadeClassifier : public  cv::CascadeClassifier
 876         {
 877         public:
 878             void detectMultiScale(oclMat &image, CV_OUT std::vector<cv::Rect>& faces,
 879                 double scaleFactor = 1.1, int minNeighbors = 3, int flags = 0,
 880                 Size minSize = Size(), Size maxSize = Size());
 881         };
 882
 883         /////////////////////////////// Pyramid /////////////////////////////////////
 884         CV_EXPORTS void pyrDown(const oclMat &src, oclMat &dst);
 885
 886         //! upsamples the source image and then smoothes it
 887         CV_EXPORTS void pyrUp(const oclMat &src, oclMat &dst);
 888
 889         //! performs linear blending of two images
 890         //! to avoid accuracy errors sum of weigths shouldn't be very close to zero
 891         // supports only CV_8UC1 source type
 892         CV_EXPORTS void blendLinear(const oclMat &img1, const oclMat &img2, const oclMat &weights1, const oclMat &weights2, oclMat &result);
 893
 894         //! computes vertical sum, supports only CV_32FC1 images
 895         CV_EXPORTS void columnSum(const oclMat &src, oclMat &sum);
 896
 897         ///////////////////////////////////////// match_template /////////////////////////////////////////////////////////////
 898         struct CV_EXPORTS MatchTemplateBuf
 899         {
 900             Size user_block_size;
 901             oclMat imagef, templf;
 902             std::vector<oclMat> images;
 903             std::vector<oclMat> image_sums;
 904             std::vector<oclMat> image_sqsums;
 905         };
 906
 907         //! computes the proximity map for the raster template and the image where the template is searched for
 908         // Supports TM_SQDIFF, TM_SQDIFF_NORMED, TM_CCORR, TM_CCORR_NORMED, TM_CCOEFF, TM_CCOEFF_NORMED for type 8UC1 and 8UC4
 909         // Supports TM_SQDIFF, TM_CCORR for type 32FC1 and 32FC4
 910         CV_EXPORTS void matchTemplate(const oclMat &image, const oclMat &templ, oclMat &result, int method);
 911
 912         //! computes the proximity map for the raster template and the image where the template is searched for
 913         // Supports TM_SQDIFF, TM_SQDIFF_NORMED, TM_CCORR, TM_CCORR_NORMED, TM_CCOEFF, TM_CCOEFF_NORMED for type 8UC1 and 8UC4
 914         // Supports TM_SQDIFF, TM_CCORR for type 32FC1 and 32FC4
 915         CV_EXPORTS void matchTemplate(const oclMat &image, const oclMat &templ, oclMat &result, int method, MatchTemplateBuf &buf);
 916
 917
 918
 919         ///////////////////////////////////////////// Canny /////////////////////////////////////////////
 920         struct CV_EXPORTS CannyBuf;
 921
 922         //! compute edges of the input image using Canny operator
 923         // Support CV_8UC1 only
 924         CV_EXPORTS void Canny(const oclMat &image, oclMat &edges, double low_thresh, double high_thresh, int apperture_size = 3, bool L2gradient = false);
 925         CV_EXPORTS void Canny(const oclMat &image, CannyBuf &buf, oclMat &edges, double low_thresh, double high_thresh, int apperture_size = 3, bool L2gradient = false);
 926         CV_EXPORTS void Canny(const oclMat &dx, const oclMat &dy, oclMat &edges, double low_thresh, double high_thresh, bool L2gradient = false);
 927         CV_EXPORTS void Canny(const oclMat &dx, const oclMat &dy, CannyBuf &buf, oclMat &edges, double low_thresh, double high_thresh, bool L2gradient = false);
 928
 929         struct CV_EXPORTS CannyBuf
 930         {
 931             CannyBuf() : counter(1, 1, CV_32S) { }
 932             ~CannyBuf()
 933             {
 934                 release();
 935             }
 936             explicit CannyBuf(const Size &image_size, int apperture_size = 3) : counter(1, 1, CV_32S)
 937             {
 938                 create(image_size, apperture_size);
 939             }
 940             CannyBuf(const oclMat &dx_, const oclMat &dy_);
 941             void create(const Size &image_size, int apperture_size = 3);
 942             void release();
 943
 944             oclMat dx, dy;
 945             oclMat dx_buf, dy_buf;
 946             oclMat magBuf, mapBuf;
 947             oclMat trackBuf1, trackBuf2;
 948             oclMat counter;
 949             Ptr<FilterEngine_GPU> filterDX, filterDY;
 950         };
 951
 952         ///////////////////////////////////////// Hough Transform /////////////////////////////////////////
 953         //! HoughCircles
 954         struct HoughCirclesBuf
 955         {
 956             oclMat edges;
 957             oclMat accum;
 958             oclMat srcPoints;
 959             oclMat centers;
 960             CannyBuf cannyBuf;
 961         };
 962
 963         CV_EXPORTS void HoughCircles(const oclMat& src, oclMat& circles, int method, float dp, float minDist, int cannyThreshold, int votesThreshold, int minRadius, int maxRadius, int maxCircles = 4096);
 964         CV_EXPORTS void HoughCircles(const oclMat& src, oclMat& circles, HoughCirclesBuf& buf, int method, float dp, float minDist, int cannyThreshold, int votesThreshold, int minRadius, int maxRadius, int maxCircles = 4096);
 965         CV_EXPORTS void HoughCirclesDownload(const oclMat& d_circles, OutputArray h_circles);
 966
 967
 968         ///////////////////////////////////////// clAmdFft related /////////////////////////////////////////
 969         //! Performs a forward or inverse discrete Fourier transform (1D or 2D) of floating point matrix.
 970         //! Param dft_size is the size of DFT transform.
 971         //!
 972         //! For complex-to-real transform it is assumed that the source matrix is packed in CLFFT's format.
 973         // support src type of CV32FC1, CV32FC2
 974         // support flags: DFT_INVERSE, DFT_REAL_OUTPUT, DFT_COMPLEX_OUTPUT, DFT_ROWS
 975         // dft_size is the size of original input, which is used for transformation from complex to real.
 976         // dft_size must be powers of 2, 3 and 5
 977         // real to complex dft requires at least v1.8 clAmdFft
 978         // real to complex dft output is not the same with cpu version
 979         // real to complex and complex to real does not support DFT_ROWS
 980         CV_EXPORTS void dft(const oclMat &src, oclMat &dst, Size dft_size = Size(), int flags = 0);
 981
 982         //! implements generalized matrix product algorithm GEMM from BLAS
 983         // The functionality requires clAmdBlas library
 984         // only support type CV_32FC1
 985         // flag GEMM_3_T is not supported
 986         CV_EXPORTS void gemm(const oclMat &src1, const oclMat &src2, double alpha,
 987                              const oclMat &src3, double beta, oclMat &dst, int flags = 0);
 988
 989         //////////////// HOG (Histogram-of-Oriented-Gradients) Descriptor and Object Detector //////////////
 990
 991         struct CV_EXPORTS HOGDescriptor
 992
 993         {
 994
 995             enum { DEFAULT_WIN_SIGMA = -1 };
 996
 997             enum { DEFAULT_NLEVELS = 64 };
 998
 999             enum { DESCR_FORMAT_ROW_BY_ROW, DESCR_FORMAT_COL_BY_COL };
1000
1001
1002
1003             HOGDescriptor(Size win_size = Size(64, 128), Size block_size = Size(16, 16),
1004
1005                           Size block_stride = Size(8, 8), Size cell_size = Size(8, 8),
1006
1007                           int nbins = 9, double win_sigma = DEFAULT_WIN_SIGMA,
1008
1009                           double threshold_L2hys = 0.2, bool gamma_correction = true,
1010
1011                           int nlevels = DEFAULT_NLEVELS);
1012
1013
1014
1015             size_t getDescriptorSize() const;
1016
1017             size_t getBlockHistogramSize() const;
1018
1019
1020
1021             void setSVMDetector(const std::vector<float> &detector);
1022
1023
1024
1025             static std::vector<float> getDefaultPeopleDetector();
1026
1027             static std::vector<float> getPeopleDetector48x96();
1028
1029             static std::vector<float> getPeopleDetector64x128();
1030
1031
1032
1033             void detect(const oclMat &img, std::vector<Point> &found_locations,
1034
1035                         double hit_threshold = 0, Size win_stride = Size(),
1036
1037                         Size padding = Size());
1038
1039
1040
1041             void detectMultiScale(const oclMat &img, std::vector<Rect> &found_locations,
1042
1043                                   double hit_threshold = 0, Size win_stride = Size(),
1044
1045                                   Size padding = Size(), double scale0 = 1.05,
1046
1047                                   int group_threshold = 2);
1048
1049
1050
1051             void getDescriptors(const oclMat &img, Size win_stride,
1052
1053                                 oclMat &descriptors,
1054
1055                                 int descr_format = DESCR_FORMAT_COL_BY_COL);
1056
1057
1058
1059             Size win_size;
1060
1061             Size block_size;
1062
1063             Size block_stride;
1064
1065             Size cell_size;
1066
1067             int nbins;
1068
1069             double win_sigma;
1070
1071             double threshold_L2hys;
1072
1073             bool gamma_correction;
1074
1075             int nlevels;
1076
1077
1078
1079         protected:
1080
1081             // initialize buffers; only need to do once in case of multiscale detection
1082
1083             void init_buffer(const oclMat &img, Size win_stride);
1084
1085
1086
1087             void computeBlockHistograms(const oclMat &img);
1088
1089             void computeGradient(const oclMat &img, oclMat &grad, oclMat &qangle);
1090
1091
1092
1093             double getWinSigma() const;
1094
1095             bool checkDetectorSize() const;
1096
1097
1098
1099             static int numPartsWithin(int size, int part_size, int stride);
1100
1101             static Size numPartsWithin(Size size, Size part_size, Size stride);
1102
1103
1104
1105             // Coefficients of the separating plane
1106
1107             float free_coef;
1108
1109             oclMat detector;
1110
1111
1112
1113             // Results of the last classification step
1114
1115             oclMat labels;
1116
1117             Mat labels_host;
1118
1119
1120
1121             // Results of the last histogram evaluation step
1122
1123             oclMat block_hists;
1124
1125
1126
1127             // Gradients conputation results
1128
1129             oclMat grad, qangle;
1130
1131
1132
1133             // scaled image
1134
1135             oclMat image_scale;
1136
1137
1138
1139             // effect size of input image (might be different from original size after scaling)
1140
1141             Size effect_size;
1142
1143         };
1144
1145
1146         ////////////////////////feature2d_ocl/////////////////
1147         /****************************************************************************************\
1148         *                                      Distance                                          *
1149         \****************************************************************************************/
1150         template<typename T>
1151         struct CV_EXPORTS Accumulator
1152         {
1153             typedef T Type;
1154         };
1155         template<> struct Accumulator<unsigned char>
1156         {
1157             typedef float Type;
1158         };
1159         template<> struct Accumulator<unsigned short>
1160         {
1161             typedef float Type;
1162         };
1163         template<> struct Accumulator<char>
1164         {
1165             typedef float Type;
1166         };
1167         template<> struct Accumulator<short>
1168         {
1169             typedef float Type;
1170         };
1171
1172         /*
1173          * Manhattan distance (city block distance) functor
1174          */
1175         template<class T>
1176         struct CV_EXPORTS L1
1177         {
1178             enum { normType = NORM_L1 };
1179             typedef T ValueType;
1180             typedef typename Accumulator<T>::Type ResultType;
1181
1182             ResultType operator()( const T *a, const T *b, int size ) const
1183             {
1184                 return normL1<ValueType, ResultType>(a, b, size);
1185             }
1186         };
1187
1188         /*
1189          * Euclidean distance functor
1190          */
1191         template<class T>
1192         struct CV_EXPORTS L2
1193         {
1194             enum { normType = NORM_L2 };
1195             typedef T ValueType;
1196             typedef typename Accumulator<T>::Type ResultType;
1197
1198             ResultType operator()( const T *a, const T *b, int size ) const
1199             {
1200                 return (ResultType)std::sqrt((double)normL2Sqr<ValueType, ResultType>(a, b, size));
1201             }
1202         };
1203
1204         /*
1205          * Hamming distance functor - counts the bit differences between two strings - useful for the Brief descriptor
1206          * bit count of A exclusive XOR'ed with B
1207          */
1208         struct CV_EXPORTS Hamming
1209         {
1210             enum { normType = NORM_HAMMING };
1211             typedef unsigned char ValueType;
1212             typedef int ResultType;
1213
1214             /** this will count the bits in a ^ b
1215              */
1216             ResultType operator()( const unsigned char *a, const unsigned char *b, int size ) const
1217             {
1218                 return normHamming(a, b, size);
1219             }
1220         };
1221
1222         ////////////////////////////////// BruteForceMatcher //////////////////////////////////
1223
1224         class CV_EXPORTS BruteForceMatcher_OCL_base
1225         {
1226         public:
1227             enum DistType {L1Dist = 0, L2Dist, HammingDist};
1228             explicit BruteForceMatcher_OCL_base(DistType distType = L2Dist);
1229
1230             // Add descriptors to train descriptor collection
1231             void add(const std::vector<oclMat> &descCollection);
1232
1233             // Get train descriptors collection
1234             const std::vector<oclMat> &getTrainDescriptors() const;
1235
1236             // Clear train descriptors collection
1237             void clear();
1238
1239             // Return true if there are not train descriptors in collection
1240             bool empty() const;
1241
1242             // Return true if the matcher supports mask in match methods
1243             bool isMaskSupported() const;
1244
1245             // Find one best match for each query descriptor
1246             void matchSingle(const oclMat &query, const oclMat &train,
1247                              oclMat &trainIdx, oclMat &distance,
1248                              const oclMat &mask = oclMat());
1249
1250             // Download trainIdx and distance and convert it to CPU vector with DMatch
1251             static void matchDownload(const oclMat &trainIdx, const oclMat &distance, std::vector<DMatch> &matches);
1252             // Convert trainIdx and distance to vector with DMatch
1253             static void matchConvert(const Mat &trainIdx, const Mat &distance, std::vector<DMatch> &matches);
1254
1255             // Find one best match for each query descriptor
1256             void match(const oclMat &query, const oclMat &train, std::vector<DMatch> &matches, const oclMat &mask = oclMat());
1257
1258             // Make gpu collection of trains and masks in suitable format for matchCollection function
1259             void makeGpuCollection(oclMat &trainCollection, oclMat &maskCollection, const std::vector<oclMat> &masks = std::vector<oclMat>());
1260
1261             // Find one best match from train collection for each query descriptor
1262             void matchCollection(const oclMat &query, const oclMat &trainCollection,
1263                                  oclMat &trainIdx, oclMat &imgIdx, oclMat &distance,
1264                                  const oclMat &masks = oclMat());
1265
1266             // Download trainIdx, imgIdx and distance and convert it to vector with DMatch
1267             static void matchDownload(const oclMat &trainIdx, const oclMat &imgIdx, const oclMat &distance, std::vector<DMatch> &matches);
1268             // Convert trainIdx, imgIdx and distance to vector with DMatch
1269             static void matchConvert(const Mat &trainIdx, const Mat &imgIdx, const Mat &distance, std::vector<DMatch> &matches);
1270
1271             // Find one best match from train collection for each query descriptor.
1272             void match(const oclMat &query, std::vector<DMatch> &matches, const std::vector<oclMat> &masks = std::vector<oclMat>());
1273
1274             // Find k best matches for each query descriptor (in increasing order of distances)
1275             void knnMatchSingle(const oclMat &query, const oclMat &train,
1276                                 oclMat &trainIdx, oclMat &distance, oclMat &allDist, int k,
1277                                 const oclMat &mask = oclMat());
1278
1279             // Download trainIdx and distance and convert it to vector with DMatch
1280             // compactResult is used when mask is not empty. If compactResult is false matches
1281             // vector will have the same size as queryDescriptors rows. If compactResult is true
1282             // matches vector will not contain matches for fully masked out query descriptors.
1283             static void knnMatchDownload(const oclMat &trainIdx, const oclMat &distance,
1284                                          std::vector< std::vector<DMatch> > &matches, bool compactResult = false);
1285             // Convert trainIdx and distance to vector with DMatch
1286             static void knnMatchConvert(const Mat &trainIdx, const Mat &distance,
1287                                         std::vector< std::vector<DMatch> > &matches, bool compactResult = false);
1288
1289             // Find k best matches for each query descriptor (in increasing order of distances).
1290             // compactResult is used when mask is not empty. If compactResult is false matches
1291             // vector will have the same size as queryDescriptors rows. If compactResult is true
1292             // matches vector will not contain matches for fully masked out query descriptors.
1293             void knnMatch(const oclMat &query, const oclMat &train,
1294                           std::vector< std::vector<DMatch> > &matches, int k, const oclMat &mask = oclMat(),
1295                           bool compactResult = false);
1296
1297             // Find k best matches from train collection for each query descriptor (in increasing order of distances)
1298             void knnMatch2Collection(const oclMat &query, const oclMat &trainCollection,
1299                                      oclMat &trainIdx, oclMat &imgIdx, oclMat &distance,
1300                                      const oclMat &maskCollection = oclMat());
1301
1302             // Download trainIdx and distance and convert it to vector with DMatch
1303             // compactResult is used when mask is not empty. If compactResult is false matches
1304             // vector will have the same size as queryDescriptors rows. If compactResult is true
1305             // matches vector will not contain matches for fully masked out query descriptors.
1306             static void knnMatch2Download(const oclMat &trainIdx, const oclMat &imgIdx, const oclMat &distance,
1307                                           std::vector< std::vector<DMatch> > &matches, bool compactResult = false);
1308             // Convert trainIdx and distance to vector with DMatch
1309             static void knnMatch2Convert(const Mat &trainIdx, const Mat &imgIdx, const Mat &distance,
1310                                          std::vector< std::vector<DMatch> > &matches, bool compactResult = false);
1311
1312             // Find k best matches  for each query descriptor (in increasing order of distances).
1313             // compactResult is used when mask is not empty. If compactResult is false matches
1314             // vector will have the same size as queryDescriptors rows. If compactResult is true
1315             // matches vector will not contain matches for fully masked out query descriptors.
1316             void knnMatch(const oclMat &query, std::vector< std::vector<DMatch> > &matches, int k,
1317                           const std::vector<oclMat> &masks = std::vector<oclMat>(), bool compactResult = false);
1318
1319             // Find best matches for each query descriptor which have distance less than maxDistance.
1320             // nMatches.at<int>(0, queryIdx) will contain matches count for queryIdx.
1321             // carefully nMatches can be greater than trainIdx.cols - it means that matcher didn't find all matches,
1322             // because it didn't have enough memory.
1323             // If trainIdx is empty, then trainIdx and distance will be created with size nQuery x max((nTrain / 100), 10),
1324             // otherwize user can pass own allocated trainIdx and distance with size nQuery x nMaxMatches
1325             // Matches doesn't sorted.
1326             void radiusMatchSingle(const oclMat &query, const oclMat &train,
1327                                    oclMat &trainIdx, oclMat &distance, oclMat &nMatches, float maxDistance,
1328                                    const oclMat &mask = oclMat());
1329
1330             // Download trainIdx, nMatches and distance and convert it to vector with DMatch.
1331             // matches will be sorted in increasing order of distances.
1332             // compactResult is used when mask is not empty. If compactResult is false matches
1333             // vector will have the same size as queryDescriptors rows. If compactResult is true
1334             // matches vector will not contain matches for fully masked out query descriptors.
1335             static void radiusMatchDownload(const oclMat &trainIdx, const oclMat &distance, const oclMat &nMatches,
1336                                             std::vector< std::vector<DMatch> > &matches, bool compactResult = false);
1337             // Convert trainIdx, nMatches and distance to vector with DMatch.
1338             static void radiusMatchConvert(const Mat &trainIdx, const Mat &distance, const Mat &nMatches,
1339                                            std::vector< std::vector<DMatch> > &matches, bool compactResult = false);
1340
1341             // Find best matches for each query descriptor which have distance less than maxDistance
1342             // in increasing order of distances).
1343             void radiusMatch(const oclMat &query, const oclMat &train,
1344                              std::vector< std::vector<DMatch> > &matches, float maxDistance,
1345                              const oclMat &mask = oclMat(), bool compactResult = false);
1346
1347             // Find best matches for each query descriptor which have distance less than maxDistance.
1348             // If trainIdx is empty, then trainIdx and distance will be created with size nQuery x max((nQuery / 100), 10),
1349             // otherwize user can pass own allocated trainIdx and distance with size nQuery x nMaxMatches
1350             // Matches doesn't sorted.
1351             void radiusMatchCollection(const oclMat &query, oclMat &trainIdx, oclMat &imgIdx, oclMat &distance, oclMat &nMatches, float maxDistance,
1352                                        const std::vector<oclMat> &masks = std::vector<oclMat>());
1353
1354             // Download trainIdx, imgIdx, nMatches and distance and convert it to vector with DMatch.
1355             // matches will be sorted in increasing order of distances.
1356             // compactResult is used when mask is not empty. If compactResult is false matches
1357             // vector will have the same size as queryDescriptors rows. If compactResult is true
1358             // matches vector will not contain matches for fully masked out query descriptors.
1359             static void radiusMatchDownload(const oclMat &trainIdx, const oclMat &imgIdx, const oclMat &distance, const oclMat &nMatches,
1360                                             std::vector< std::vector<DMatch> > &matches, bool compactResult = false);
1361             // Convert trainIdx, nMatches and distance to vector with DMatch.
1362             static void radiusMatchConvert(const Mat &trainIdx, const Mat &imgIdx, const Mat &distance, const Mat &nMatches,
1363                                            std::vector< std::vector<DMatch> > &matches, bool compactResult = false);
1364
1365             // Find best matches from train collection for each query descriptor which have distance less than
1366             // maxDistance (in increasing order of distances).
1367             void radiusMatch(const oclMat &query, std::vector< std::vector<DMatch> > &matches, float maxDistance,
1368                              const std::vector<oclMat> &masks = std::vector<oclMat>(), bool compactResult = false);
1369
1370             DistType distType;
1371
1372         private:
1373             std::vector<oclMat> trainDescCollection;
1374         };
1375
1376         template <class Distance>
1377         class CV_EXPORTS BruteForceMatcher_OCL;
1378
1379         template <typename T>
1380         class CV_EXPORTS BruteForceMatcher_OCL< L1<T> > : public BruteForceMatcher_OCL_base
1381         {
1382         public:
1383             explicit BruteForceMatcher_OCL() : BruteForceMatcher_OCL_base(L1Dist) {}
1384             explicit BruteForceMatcher_OCL(L1<T> /*d*/) : BruteForceMatcher_OCL_base(L1Dist) {}
1385         };
1386         template <typename T>
1387         class CV_EXPORTS BruteForceMatcher_OCL< L2<T> > : public BruteForceMatcher_OCL_base
1388         {
1389         public:
1390             explicit BruteForceMatcher_OCL() : BruteForceMatcher_OCL_base(L2Dist) {}
1391             explicit BruteForceMatcher_OCL(L2<T> /*d*/) : BruteForceMatcher_OCL_base(L2Dist) {}
1392         };
1393         template <> class CV_EXPORTS BruteForceMatcher_OCL< Hamming > : public BruteForceMatcher_OCL_base
1394         {
1395         public:
1396             explicit BruteForceMatcher_OCL() : BruteForceMatcher_OCL_base(HammingDist) {}
1397             explicit BruteForceMatcher_OCL(Hamming /*d*/) : BruteForceMatcher_OCL_base(HammingDist) {}
1398         };
1399
1400         class CV_EXPORTS BFMatcher_OCL : public BruteForceMatcher_OCL_base
1401         {
1402         public:
1403             explicit BFMatcher_OCL(int norm = NORM_L2) : BruteForceMatcher_OCL_base(norm == NORM_L1 ? L1Dist : norm == NORM_L2 ? L2Dist : HammingDist) {}
1404         };
1405
1406         class CV_EXPORTS GoodFeaturesToTrackDetector_OCL
1407         {
1408         public:
1409             explicit GoodFeaturesToTrackDetector_OCL(int maxCorners = 1000, double qualityLevel = 0.01, double minDistance = 0.0,
1410                 int blockSize = 3, bool useHarrisDetector = false, double harrisK = 0.04);
1411
1412             //! return 1 rows matrix with CV_32FC2 type
1413             void operator ()(const oclMat& image, oclMat& corners, const oclMat& mask = oclMat());
1414             //! download points of type Point2f to a vector. the vector's content will be erased
1415             void downloadPoints(const oclMat &points, std::vector<Point2f> &points_v);
1416
1417             int maxCorners;
1418             double qualityLevel;
1419             double minDistance;
1420
1421             int blockSize;
1422             bool useHarrisDetector;
1423             double harrisK;
1424             void releaseMemory()
1425             {
1426                 Dx_.release();
1427                 Dy_.release();
1428                 eig_.release();
1429                 minMaxbuf_.release();
1430                 tmpCorners_.release();
1431             }
1432         private:
1433             oclMat Dx_;
1434             oclMat Dy_;
1435             oclMat eig_;
1436             oclMat minMaxbuf_;
1437             oclMat tmpCorners_;
1438         };
1439
1440         inline GoodFeaturesToTrackDetector_OCL::GoodFeaturesToTrackDetector_OCL(int maxCorners_, double qualityLevel_, double minDistance_,
1441             int blockSize_, bool useHarrisDetector_, double harrisK_)
1442         {
1443             maxCorners = maxCorners_;
1444             qualityLevel = qualityLevel_;
1445             minDistance = minDistance_;
1446             blockSize = blockSize_;
1447             useHarrisDetector = useHarrisDetector_;
1448             harrisK = harrisK_;
1449         }
1450
1451         ////////////////////////////////// FAST Feature Detector //////////////////////////////////
1452         class CV_EXPORTS FAST_OCL
1453         {
1454         public:
1455             enum
1456             {
1457                 X_ROW = 0,
1458                 Y_ROW,
1459                 RESPONSE_ROW,
1460                 ROWS_COUNT
1461             };
1462
1463             // all features have same size
1464             static const int FEATURE_SIZE = 7;
1465
1466             explicit FAST_OCL(int threshold, bool nonmaxSupression = true, double keypointsRatio = 0.05);
1467
1468             //! finds the keypoints using FAST detector
1469             //! supports only CV_8UC1 images
1470             void operator ()(const oclMat& image, const oclMat& mask, oclMat& keypoints);
1471             void operator ()(const oclMat& image, const oclMat& mask, std::vector<KeyPoint>& keypoints);
1472
1473             //! download keypoints from device to host memory
1474             static void downloadKeypoints(const oclMat& d_keypoints, std::vector<KeyPoint>& keypoints);
1475
1476             //! convert keypoints to KeyPoint vector
1477             static void convertKeypoints(const Mat& h_keypoints, std::vector<KeyPoint>& keypoints);
1478
1479             //! release temporary buffer's memory
1480             void release();
1481
1482             bool nonmaxSupression;
1483
1484             int threshold;
1485
1486             //! max keypoints = keypointsRatio * img.size().area()
1487             double keypointsRatio;
1488
1489             //! find keypoints and compute it's response if nonmaxSupression is true
1490             //! return count of detected keypoints
1491             int calcKeyPointsLocation(const oclMat& image, const oclMat& mask);
1492
1493             //! get final array of keypoints
1494             //! performs nonmax supression if needed
1495             //! return final count of keypoints
1496             int getKeyPoints(oclMat& keypoints);
1497
1498         private:
1499             oclMat kpLoc_;
1500             int count_;
1501
1502             oclMat score_;
1503
1504             oclMat d_keypoints_;
1505
1506             int calcKeypointsOCL(const oclMat& img, const oclMat& mask, int maxKeypoints);
1507             int nonmaxSupressionOCL(oclMat& keypoints);
1508         };
1509
1510         /////////////////////////////// PyrLKOpticalFlow /////////////////////////////////////
1511
1512         class CV_EXPORTS PyrLKOpticalFlow
1513         {
1514         public:
1515             PyrLKOpticalFlow()
1516             {
1517                 winSize = Size(21, 21);
1518                 maxLevel = 3;
1519                 iters = 30;
1520                 derivLambda = 0.5;
1521                 useInitialFlow = false;
1522                 minEigThreshold = 1e-4f;
1523                 getMinEigenVals = false;
1524                 isDeviceArch11_ = false;
1525             }
1526
1527             void sparse(const oclMat &prevImg, const oclMat &nextImg, const oclMat &prevPts, oclMat &nextPts,
1528                         oclMat &status, oclMat *err = 0);
1529
1530             void dense(const oclMat &prevImg, const oclMat &nextImg, oclMat &u, oclMat &v, oclMat *err = 0);
1531
1532             Size winSize;
1533             int maxLevel;
1534             int iters;
1535             double derivLambda;
1536             bool useInitialFlow;
1537             float minEigThreshold;
1538             bool getMinEigenVals;
1539
1540             void releaseMemory()
1541             {
1542                 dx_calcBuf_.release();
1543                 dy_calcBuf_.release();
1544
1545                 prevPyr_.clear();
1546                 nextPyr_.clear();
1547
1548                 dx_buf_.release();
1549                 dy_buf_.release();
1550             }
1551
1552         private:
1553             void calcSharrDeriv(const oclMat &src, oclMat &dx, oclMat &dy);
1554
1555             void buildImagePyramid(const oclMat &img0, std::vector<oclMat> &pyr, bool withBorder);
1556
1557             oclMat dx_calcBuf_;
1558             oclMat dy_calcBuf_;
1559
1560             std::vector<oclMat> prevPyr_;
1561             std::vector<oclMat> nextPyr_;
1562
1563             oclMat dx_buf_;
1564             oclMat dy_buf_;
1565
1566             oclMat uPyr_[2];
1567             oclMat vPyr_[2];
1568
1569             bool isDeviceArch11_;
1570         };
1571
1572         class CV_EXPORTS FarnebackOpticalFlow
1573         {
1574         public:
1575             FarnebackOpticalFlow();
1576
1577             int numLevels;
1578             double pyrScale;
1579             bool fastPyramids;
1580             int winSize;
1581             int numIters;
1582             int polyN;
1583             double polySigma;
1584             int flags;
1585
1586             void operator ()(const oclMat &frame0, const oclMat &frame1, oclMat &flowx, oclMat &flowy);
1587
1588             void releaseMemory();
1589
1590         private:
1591             void prepareGaussian(
1592                 int n, double sigma, float *g, float *xg, float *xxg,
1593                 double &ig11, double &ig03, double &ig33, double &ig55);
1594
1595             void setPolynomialExpansionConsts(int n, double sigma);
1596
1597             void updateFlow_boxFilter(
1598                 const oclMat& R0, const oclMat& R1, oclMat& flowx, oclMat &flowy,
1599                 oclMat& M, oclMat &bufM, int blockSize, bool updateMatrices);
1600
1601             void updateFlow_gaussianBlur(
1602                 const oclMat& R0, const oclMat& R1, oclMat& flowx, oclMat& flowy,
1603                 oclMat& M, oclMat &bufM, int blockSize, bool updateMatrices);
1604
1605             oclMat frames_[2];
1606             oclMat pyrLevel_[2], M_, bufM_, R_[2], blurredFrame_[2];
1607             std::vector<oclMat> pyramid0_, pyramid1_;
1608         };
1609
1610         //////////////// build warping maps ////////////////////
1611         //! builds plane warping maps
1612         CV_EXPORTS void buildWarpPlaneMaps(Size src_size, Rect dst_roi, const Mat &K, const Mat &R, const Mat &T, float scale, oclMat &map_x, oclMat &map_y);
1613         //! builds cylindrical warping maps
1614         CV_EXPORTS void buildWarpCylindricalMaps(Size src_size, Rect dst_roi, const Mat &K, const Mat &R, float scale, oclMat &map_x, oclMat &map_y);
1615         //! builds spherical warping maps
1616         CV_EXPORTS void buildWarpSphericalMaps(Size src_size, Rect dst_roi, const Mat &K, const Mat &R, float scale, oclMat &map_x, oclMat &map_y);
1617         //! builds Affine warping maps
1618         CV_EXPORTS void buildWarpAffineMaps(const Mat &M, bool inverse, Size dsize, oclMat &xmap, oclMat &ymap);
1619
1620         //! builds Perspective warping maps
1621         CV_EXPORTS void buildWarpPerspectiveMaps(const Mat &M, bool inverse, Size dsize, oclMat &xmap, oclMat &ymap);
1622
1623         ///////////////////////////////////// interpolate frames //////////////////////////////////////////////
1624         //! Interpolate frames (images) using provided optical flow (displacement field).
1625         //! frame0   - frame 0 (32-bit floating point images, single channel)
1626         //! frame1   - frame 1 (the same type and size)
1627         //! fu       - forward horizontal displacement
1628         //! fv       - forward vertical displacement
1629         //! bu       - backward horizontal displacement
1630         //! bv       - backward vertical displacement
1631         //! pos      - new frame position
1632         //! newFrame - new frame
1633         //! buf      - temporary buffer, will have width x 6*height size, CV_32FC1 type and contain 6 oclMat;
1634         //!            occlusion masks            0, occlusion masks            1,
1635         //!            interpolated forward flow  0, interpolated forward flow  1,
1636         //!            interpolated backward flow 0, interpolated backward flow 1
1637         //!
1638         CV_EXPORTS void interpolateFrames(const oclMat &frame0, const oclMat &frame1,
1639                                           const oclMat &fu, const oclMat &fv,
1640                                           const oclMat &bu, const oclMat &bv,
1641                                           float pos, oclMat &newFrame, oclMat &buf);
1642
1643         //! computes moments of the rasterized shape or a vector of points
1644         //! _array should be a vector a points standing for the contour
1645         CV_EXPORTS Moments ocl_moments(InputArray contour);
1646         //! src should be a general image uploaded to the GPU.
1647         //! the supported oclMat type are CV_8UC1, CV_16UC1, CV_16SC1, CV_32FC1 and CV_64FC1
1648         //! to use type of CV_64FC1, the GPU should support CV_64FC1
1649         CV_EXPORTS Moments ocl_moments(oclMat& src, bool binary);
1650
1651         class CV_EXPORTS StereoBM_OCL
1652         {
1653         public:
1654             enum { BASIC_PRESET = 0, PREFILTER_XSOBEL = 1 };
1655
1656             enum { DEFAULT_NDISP = 64, DEFAULT_WINSZ = 19 };
1657
1658             //! the default constructor
1659             StereoBM_OCL();
1660             //! the full constructor taking the camera-specific preset, number of disparities and the SAD window size. ndisparities must be multiple of 8.
1661             StereoBM_OCL(int preset, int ndisparities = DEFAULT_NDISP, int winSize = DEFAULT_WINSZ);
1662
1663             //! the stereo correspondence operator. Finds the disparity for the specified rectified stereo pair
1664             //! Output disparity has CV_8U type.
1665             void operator() ( const oclMat &left, const oclMat &right, oclMat &disparity);
1666
1667             //! Some heuristics that tries to estmate
1668             // if current GPU will be faster then CPU in this algorithm.
1669             // It queries current active device.
1670             static bool checkIfGpuCallReasonable();
1671
1672             int preset;
1673             int ndisp;
1674             int winSize;
1675
1676             // If avergeTexThreshold  == 0 => post procesing is disabled
1677             // If avergeTexThreshold != 0 then disparity is set 0 in each point (x,y) where for left image
1678             // SumOfHorizontalGradiensInWindow(x, y, winSize) < (winSize * winSize) * avergeTexThreshold
1679             // i.e. input left image is low textured.
1680             float avergeTexThreshold;
1681         private:
1682             oclMat minSSD, leBuf, riBuf;
1683         };
1684
1685         class CV_EXPORTS StereoBeliefPropagation
1686         {
1687         public:
1688             enum { DEFAULT_NDISP  = 64 };
1689             enum { DEFAULT_ITERS  = 5  };
1690             enum { DEFAULT_LEVELS = 5  };
1691             static void estimateRecommendedParams(int width, int height, int &ndisp, int &iters, int &levels);
1692             explicit StereoBeliefPropagation(int ndisp  = DEFAULT_NDISP,
1693                                              int iters  = DEFAULT_ITERS,
1694                                              int levels = DEFAULT_LEVELS,
1695                                              int msg_type = CV_16S);
1696             StereoBeliefPropagation(int ndisp, int iters, int levels,
1697                                     float max_data_term, float data_weight,
1698                                     float max_disc_term, float disc_single_jump,
1699                                     int msg_type = CV_32F);
1700             void operator()(const oclMat &left, const oclMat &right, oclMat &disparity);
1701             void operator()(const oclMat &data, oclMat &disparity);
1702             int ndisp;
1703             int iters;
1704             int levels;
1705             float max_data_term;
1706             float data_weight;
1707             float max_disc_term;
1708             float disc_single_jump;
1709             int msg_type;
1710         private:
1711             oclMat u, d, l, r, u2, d2, l2, r2;
1712             std::vector<oclMat> datas;
1713             oclMat out;
1714         };
1715
1716         class CV_EXPORTS StereoConstantSpaceBP
1717         {
1718         public:
1719             enum { DEFAULT_NDISP    = 128 };
1720             enum { DEFAULT_ITERS    = 8   };
1721             enum { DEFAULT_LEVELS   = 4   };
1722             enum { DEFAULT_NR_PLANE = 4   };
1723             static void estimateRecommendedParams(int width, int height, int &ndisp, int &iters, int &levels, int &nr_plane);
1724             explicit StereoConstantSpaceBP(
1725                 int ndisp    = DEFAULT_NDISP,
1726                 int iters    = DEFAULT_ITERS,
1727                 int levels   = DEFAULT_LEVELS,
1728                 int nr_plane = DEFAULT_NR_PLANE,
1729                 int msg_type = CV_32F);
1730             StereoConstantSpaceBP(int ndisp, int iters, int levels, int nr_plane,
1731                 float max_data_term, float data_weight, float max_disc_term, float disc_single_jump,
1732                 int min_disp_th = 0,
1733                 int msg_type = CV_32F);
1734             void operator()(const oclMat &left, const oclMat &right, oclMat &disparity);
1735             int ndisp;
1736             int iters;
1737             int levels;
1738             int nr_plane;
1739             float max_data_term;
1740             float data_weight;
1741             float max_disc_term;
1742             float disc_single_jump;
1743             int min_disp_th;
1744             int msg_type;
1745             bool use_local_init_data_cost;
1746         private:
1747             oclMat u[2], d[2], l[2], r[2];
1748             oclMat disp_selected_pyr[2];
1749             oclMat data_cost;
1750             oclMat data_cost_selected;
1751             oclMat temp;
1752             oclMat out;
1753         };
1754
1755         // Implementation of the Zach, Pock and Bischof Dual TV-L1 Optical Flow method
1756         //
1757         // see reference:
1758         //   [1] C. Zach, T. Pock and H. Bischof, "A Duality Based Approach for Realtime TV-L1 Optical Flow".
1759         //   [2] Javier Sanchez, Enric Meinhardt-Llopis and Gabriele Facciolo. "TV-L1 Optical Flow Estimation".
1760         class CV_EXPORTS OpticalFlowDual_TVL1_OCL
1761         {
1762         public:
1763             OpticalFlowDual_TVL1_OCL();
1764
1765             void operator ()(const oclMat& I0, const oclMat& I1, oclMat& flowx, oclMat& flowy);
1766
1767             void collectGarbage();
1768
1769             /**
1770             * Time step of the numerical scheme.
1771             */
1772             double tau;
1773
1774             /**
1775             * Weight parameter for the data term, attachment parameter.
1776             * This is the most relevant parameter, which determines the smoothness of the output.
1777             * The smaller this parameter is, the smoother the solutions we obtain.
1778             * It depends on the range of motions of the images, so its value should be adapted to each image sequence.
1779             */
1780             double lambda;
1781
1782             /**
1783             * Weight parameter for (u - v)^2, tightness parameter.
1784             * It serves as a link between the attachment and the regularization terms.
1785             * In theory, it should have a small value in order to maintain both parts in correspondence.
1786             * The method is stable for a large range of values of this parameter.
1787             */
1788             double theta;
1789
1790             /**
1791             * Number of scales used to create the pyramid of images.
1792             */
1793             int nscales;
1794
1795             /**
1796             * Number of warpings per scale.
1797             * Represents the number of times that I1(x+u0) and grad( I1(x+u0) ) are computed per scale.
1798             * This is a parameter that assures the stability of the method.
1799             * It also affects the running time, so it is a compromise between speed and accuracy.
1800             */
1801             int warps;
1802
1803             /**
1804             * Stopping criterion threshold used in the numerical scheme, which is a trade-off between precision and running time.
1805             * A small value will yield more accurate solutions at the expense of a slower convergence.
1806             */
1807             double epsilon;
1808
1809             /**
1810             * Stopping criterion iterations number used in the numerical scheme.
1811             */
1812             int iterations;
1813
1814             bool useInitialFlow;
1815
1816         private:
1817             void procOneScale(const oclMat& I0, const oclMat& I1, oclMat& u1, oclMat& u2);
1818
1819             std::vector<oclMat> I0s;
1820             std::vector<oclMat> I1s;
1821             std::vector<oclMat> u1s;
1822             std::vector<oclMat> u2s;
1823
1824             oclMat I1x_buf;
1825             oclMat I1y_buf;
1826
1827             oclMat I1w_buf;
1828             oclMat I1wx_buf;
1829             oclMat I1wy_buf;
1830
1831             oclMat grad_buf;
1832             oclMat rho_c_buf;
1833
1834             oclMat p11_buf;
1835             oclMat p12_buf;
1836             oclMat p21_buf;
1837             oclMat p22_buf;
1838
1839             oclMat diff_buf;
1840             oclMat norm_buf;
1841         };
1842         // current supported sorting methods
1843         enum
1844         {
1845             SORT_BITONIC,   // only support power-of-2 buffer size
1846             SORT_SELECTION, // cannot sort duplicate keys
1847             SORT_MERGE,
1848             SORT_RADIX      // only support signed int/float keys(CV_32S/CV_32F)
1849         };
1850         //! Returns the sorted result of all the elements in input based on equivalent keys.
1851         //
1852         //  The element unit in the values to be sorted is determined from the data type,
1853         //  i.e., a CV_32FC2 input {a1a2, b1b2} will be considered as two elements, regardless its
1854         //  matrix dimension.
1855         //  both keys and values will be sorted inplace
1856         //  Key needs to be single channel oclMat.
1857         //
1858         //  Example:
1859         //  input -
1860         //    keys   = {2,    3,   1}   (CV_8UC1)
1861         //    values = {10,5, 4,3, 6,2} (CV_8UC2)
1862         //  sortByKey(keys, values, SORT_SELECTION, false);
1863         //  output -
1864         //    keys   = {1,    2,   3}   (CV_8UC1)
1865         //    values = {6,2, 10,5, 4,3} (CV_8UC2)
1866         CV_EXPORTS void sortByKey(oclMat& keys, oclMat& values, int method, bool isGreaterThan = false);
1867         /*!Base class for MOG and MOG2!*/
1868         class CV_EXPORTS BackgroundSubtractor
1869         {
1870         public:
1871             //! the virtual destructor
1872             virtual ~BackgroundSubtractor();
1873             //! the update operator that takes the next video frame and returns the current foreground mask as 8-bit binary image.
1874             virtual void operator()(const oclMat& image, oclMat& fgmask, float learningRate);
1875
1876             //! computes a background image
1877             virtual void getBackgroundImage(oclMat& backgroundImage) const = 0;
1878         };
1879                 /*!
1880         Gaussian Mixture-based Backbround/Foreground Segmentation Algorithm
1881
1882         The class implements the following algorithm:
1883         "An improved adaptive background mixture model for real-time tracking with shadow detection"
1884         P. KadewTraKuPong and R. Bowden,
1885         Proc. 2nd European Workshp on Advanced Video-Based Surveillance Systems, 2001."
1886         http://personal.ee.surrey.ac.uk/Personal/R.Bowden/publications/avbs01/avbs01.pdf
1887         */
1888         class CV_EXPORTS MOG: public cv::ocl::BackgroundSubtractor
1889         {
1890         public:
1891             //! the default constructor
1892             MOG(int nmixtures = -1);
1893
1894             //! re-initiaization method
1895             void initialize(Size frameSize, int frameType);
1896
1897             //! the update operator
1898             void operator()(const oclMat& frame, oclMat& fgmask, float learningRate = 0.f);
1899
1900             //! computes a background image which are the mean of all background gaussians
1901             void getBackgroundImage(oclMat& backgroundImage) const;
1902
1903             //! releases all inner buffers
1904             void release();
1905
1906             int history;
1907             float varThreshold;
1908             float backgroundRatio;
1909             float noiseSigma;
1910
1911         private:
1912             int nmixtures_;
1913
1914             Size frameSize_;
1915             int frameType_;
1916             int nframes_;
1917
1918             oclMat weight_;
1919             oclMat sortKey_;
1920             oclMat mean_;
1921             oclMat var_;
1922         };
1923
1924         /*!
1925         The class implements the following algorithm:
1926         "Improved adaptive Gausian mixture model for background subtraction"
1927         Z.Zivkovic
1928         International Conference Pattern Recognition, UK, August, 2004.
1929         http://www.zoranz.net/Publications/zivkovic2004ICPR.pdf
1930         */
1931         class CV_EXPORTS MOG2: public cv::ocl::BackgroundSubtractor
1932         {
1933         public:
1934             //! the default constructor
1935             MOG2(int nmixtures = -1);
1936
1937             //! re-initiaization method
1938             void initialize(Size frameSize, int frameType);
1939
1940             //! the update operator
1941             void operator()(const oclMat& frame, oclMat& fgmask, float learningRate = -1.0f);
1942
1943             //! computes a background image which are the mean of all background gaussians
1944             void getBackgroundImage(oclMat& backgroundImage) const;
1945
1946             //! releases all inner buffers
1947             void release();
1948
1949             // parameters
1950             // you should call initialize after parameters changes
1951
1952             int history;
1953
1954             //! here it is the maximum allowed number of mixture components.
1955             //! Actual number is determined dynamically per pixel
1956             float varThreshold;
1957             // threshold on the squared Mahalanobis distance to decide if it is well described
1958             // by the background model or not. Related to Cthr from the paper.
1959             // This does not influence the update of the background. A typical value could be 4 sigma
1960             // and that is varThreshold=4*4=16; Corresponds to Tb in the paper.
1961
1962             /////////////////////////
1963             // less important parameters - things you might change but be carefull
1964             ////////////////////////
1965
1966             float backgroundRatio;
1967             // corresponds to fTB=1-cf from the paper
1968             // TB - threshold when the component becomes significant enough to be included into
1969             // the background model. It is the TB=1-cf from the paper. So I use cf=0.1 => TB=0.
1970             // For alpha=0.001 it means that the mode should exist for approximately 105 frames before
1971             // it is considered foreground
1972             // float noiseSigma;
1973             float varThresholdGen;
1974
1975             //correspondts to Tg - threshold on the squared Mahalan. dist. to decide
1976             //when a sample is close to the existing components. If it is not close
1977             //to any a new component will be generated. I use 3 sigma => Tg=3*3=9.
1978             //Smaller Tg leads to more generated components and higher Tg might make
1979             //lead to small number of components but they can grow too large
1980             float fVarInit;
1981             float fVarMin;
1982             float fVarMax;
1983
1984             //initial variance  for the newly generated components.
1985             //It will will influence the speed of adaptation. A good guess should be made.
1986             //A simple way is to estimate the typical standard deviation from the images.
1987             //I used here 10 as a reasonable value
1988             // min and max can be used to further control the variance
1989             float fCT; //CT - complexity reduction prior
1990             //this is related to the number of samples needed to accept that a component
1991             //actually exists. We use CT=0.05 of all the samples. By setting CT=0 you get
1992             //the standard Stauffer&Grimson algorithm (maybe not exact but very similar)
1993
1994             //shadow detection parameters
1995             bool bShadowDetection; //default 1 - do shadow detection
1996             unsigned char nShadowDetection; //do shadow detection - insert this value as the detection result - 127 default value
1997             float fTau;
1998             // Tau - shadow threshold. The shadow is detected if the pixel is darker
1999             //version of the background. Tau is a threshold on how much darker the shadow can be.
2000             //Tau= 0.5 means that if pixel is more than 2 times darker then it is not shadow
2001             //See: Prati,Mikic,Trivedi,Cucchiarra,"Detecting Moving Shadows...",IEEE PAMI,2003.
2002
2003         private:
2004             int nmixtures_;
2005
2006             Size frameSize_;
2007             int frameType_;
2008             int nframes_;
2009
2010             oclMat weight_;
2011             oclMat variance_;
2012             oclMat mean_;
2013
2014             oclMat bgmodelUsedModes_; //keep track of number of modes per pixel
2015         };
2016
2017         /*!***************Kalman Filter*************!*/
2018         class CV_EXPORTS KalmanFilter
2019         {
2020         public:
2021             KalmanFilter();
2022             //! the full constructor taking the dimensionality of the state, of the measurement and of the control vector
2023             KalmanFilter(int dynamParams, int measureParams, int controlParams=0, int type=CV_32F);
2024             //! re-initializes Kalman filter. The previous content is destroyed.
2025             void init(int dynamParams, int measureParams, int controlParams=0, int type=CV_32F);
2026
2027             const oclMat& predict(const oclMat& control=oclMat());
2028             const oclMat& correct(const oclMat& measurement);
2029
2030             oclMat statePre;           //!< predicted state (x'(k)): x(k)=A*x(k-1)+B*u(k)
2031             oclMat statePost;          //!< corrected state (x(k)): x(k)=x'(k)+K(k)*(z(k)-H*x'(k))
2032             oclMat transitionMatrix;   //!< state transition matrix (A)
2033             oclMat controlMatrix;      //!< control matrix (B) (not used if there is no control)
2034             oclMat measurementMatrix;  //!< measurement matrix (H)
2035             oclMat processNoiseCov;    //!< process noise covariance matrix (Q)
2036             oclMat measurementNoiseCov;//!< measurement noise covariance matrix (R)
2037             oclMat errorCovPre;        //!< priori error estimate covariance matrix (P'(k)): P'(k)=A*P(k-1)*At + Q)*/
2038             oclMat gain;               //!< Kalman gain matrix (K(k)): K(k)=P'(k)*Ht*inv(H*P'(k)*Ht+R)
2039             oclMat errorCovPost;       //!< posteriori error estimate covariance matrix (P(k)): P(k)=(I-K(k)*H)*P'(k)
2040         private:
2041             oclMat temp1;
2042             oclMat temp2;
2043             oclMat temp3;
2044             oclMat temp4;
2045             oclMat temp5;
2046         };
2047
2048         /*!***************K Nearest Neighbour*************!*/
2049         class CV_EXPORTS KNearestNeighbour: public CvKNearest
2050         {
2051         public:
2052             KNearestNeighbour();
2053             ~KNearestNeighbour();
2054
2055             bool train(const Mat& trainData, Mat& labels, Mat& sampleIdx = Mat().setTo(Scalar::all(0)),
2056                 bool isRegression = false, int max_k = 32, bool updateBase = false);
2057
2058             void clear();
2059
2060             void find_nearest(const oclMat& samples, int k, oclMat& lables);
2061
2062         private:
2063             oclMat samples_ocl;
2064         };
2065
2066         /*!***************  SVM  *************!*/
2067         class CV_EXPORTS CvSVM_OCL : public CvSVM
2068         {
2069         public:
2070             CvSVM_OCL();
2071
2072             CvSVM_OCL(const cv::Mat& trainData, const cv::Mat& responses,
2073                       const cv::Mat& varIdx=cv::Mat(), const cv::Mat& sampleIdx=cv::Mat(),
2074                       CvSVMParams params=CvSVMParams());
2075             CV_WRAP float predict( const int row_index, Mat& src, bool returnDFVal=false ) const;
2076             CV_WRAP void predict( cv::InputArray samples, cv::OutputArray results ) const;
2077             CV_WRAP float predict( const cv::Mat& sample, bool returnDFVal=false ) const;
2078             float predict( const CvMat* samples, CV_OUT CvMat* results ) const;
2079
2080         protected:
2081             float predict( const int row_index, int row_len, Mat& src, bool returnDFVal=false ) const;
2082             void create_kernel();
2083             void create_solver();
2084         };
2085
2086         /*!***************  END  *************!*/
2087     }
2088 }
2089 #if defined _MSC_VER && _MSC_VER >= 1200
2090 #  pragma warning( push)
2091 #  pragma warning( disable: 4267)
2092 #endif
2093 #include "opencv2/ocl/matrix_operations.hpp"
2094 #if defined _MSC_VER && _MSC_VER >= 1200
2095 #  pragma warning( pop)
2096 #endif
2097
2098 #endif /* __OPENCV_OCL_HPP__ */