modules/ocl/include/opencv2/ocl.hpp

   1 /*M///////////////////////////////////////////////////////////////////////////////////////
   2 //
   3 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
   4 //
   5 //  By downloading, copying, installing or using the software you agree to this license.
   6 //  If you do not agree to this license, do not download, install,
   7 //  copy or use the software.
   8 //
   9 //
  10 //                           License Agreement
  11 //                For Open Source Computer Vision Library
  12 //
  13 // Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
  14 // Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
  15 // Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
  16 // Third party copyrights are property of their respective owners.
  17 //
  18 // Redistribution and use in source and binary forms, with or without modification,
  19 // are permitted provided that the following conditions are met:
  20 //
  21 //   * Redistribution's of source code must retain the above copyright notice,
  22 //     this list of conditions and the following disclaimer.
  23 //
  24 //   * Redistribution's in binary form must reproduce the above copyright notice,
  25 //     this list of conditions and the following disclaimer in the documentation
  26 //     and/or other oclMaterials provided with the distribution.
  27 //
  28 //   * The name of the copyright holders may not be used to endorse or promote products
  29 //     derived from this software without specific prior written permission.
  30 //
  31 // This software is provided by the copyright holders and contributors "as is" and
  32 // any express or implied warranties, including, but not limited to, the implied
  33 // warranties of merchantability and fitness for a particular purpose are disclaimed.
  34 // In no event shall the Intel Corporation or contributors be liable for any direct,
  35 // indirect, incidental, special, exemplary, or consequential damages
  36 // (including, but not limited to, procurement of substitute goods or services;
  37 // loss of use, data, or profits; or business interruption) however caused
  38 // and on any theory of liability, whether in contract, strict liability,
  39 // or tort (including negligence or otherwise) arising in any way out of
  40 // the use of this software, even if advised of the possibility of such damage.
  41 //
  42 //M*/
  43
  44 #ifndef __OPENCV_OCL_HPP__
  45 #define __OPENCV_OCL_HPP__
  46
  47 #include <memory>
  48 #include <vector>
  49
  50 #include "opencv2/core.hpp"
  51 #include "opencv2/imgproc.hpp"
  52 #include "opencv2/objdetect.hpp"
  53
  54 namespace cv
  55 {
  56     namespace ocl
  57     {
  58         enum
  59         {
  60             CVCL_DEVICE_TYPE_DEFAULT     = (1 << 0),
  61             CVCL_DEVICE_TYPE_CPU         = (1 << 1),
  62             CVCL_DEVICE_TYPE_GPU         = (1 << 2),
  63             CVCL_DEVICE_TYPE_ACCELERATOR = (1 << 3),
  64             //CVCL_DEVICE_TYPE_CUSTOM      = (1 << 4)
  65             CVCL_DEVICE_TYPE_ALL         = 0xFFFFFFFF
  66         };
  67
  68         enum DevMemRW
  69         {
  70             DEVICE_MEM_R_W = 0,
  71             DEVICE_MEM_R_ONLY,
  72             DEVICE_MEM_W_ONLY
  73         };
  74
  75         enum DevMemType
  76         {
  77             DEVICE_MEM_DEFAULT = 0,
  78             DEVICE_MEM_AHP,         //alloc host pointer
  79             DEVICE_MEM_UHP,         //use host pointer
  80             DEVICE_MEM_CHP,         //copy host pointer
  81             DEVICE_MEM_PM           //persistent memory
  82         };
  83
  84         //Get the global device memory and read/write type
  85         //return 1 if unified memory system supported, otherwise return 0
  86         CV_EXPORTS int getDevMemType(DevMemRW& rw_type, DevMemType& mem_type);
  87
  88         //Set the global device memory and read/write type,
  89         //the newly generated oclMat will all use this type
  90         //return -1 if the target type is unsupported, otherwise return 0
  91         CV_EXPORTS int setDevMemType(DevMemRW rw_type = DEVICE_MEM_R_W, DevMemType mem_type = DEVICE_MEM_DEFAULT);
  92
  93         //this class contains ocl runtime information
  94         class CV_EXPORTS Info
  95         {
  96         public:
  97             struct Impl;
  98             Impl *impl;
  99
 100             Info();
 101             Info(const Info &m);
 102             ~Info();
 103             void release();
 104             Info &operator = (const Info &m);
 105             std::vector<String> DeviceName;
 106             String PlatformName;
 107         };
 108         //////////////////////////////// Initialization & Info ////////////////////////
 109         //this function may be obsoleted
 110         //CV_EXPORTS cl_device_id getDevice();
 111         //the function must be called before any other cv::ocl::functions, it initialize ocl runtime
 112         //each Info relates to an OpenCL platform
 113         //there is one or more devices in each platform, each one has a separate name
 114         CV_EXPORTS int getDevice(std::vector<Info> &oclinfo, int devicetype = CVCL_DEVICE_TYPE_GPU);
 115
 116         //set device you want to use, optional function after getDevice be called
 117         //the devnum is the index of the selected device in DeviceName vector of INfo
 118         CV_EXPORTS void setDevice(Info &oclinfo, int devnum = 0);
 119
 120         //The two functions below enable other opencl program to use ocl module's cl_context and cl_command_queue
 121         //returns cl_context *
 122         CV_EXPORTS void* getoclContext();
 123         //returns cl_command_queue *
 124         CV_EXPORTS void* getoclCommandQueue();
 125
 126         //explicit call clFinish. The global command queue will be used.
 127         CV_EXPORTS void finish();
 128
 129         //this function enable ocl module to use customized cl_context and cl_command_queue
 130         //getDevice also need to be called before this function
 131         CV_EXPORTS void setDeviceEx(Info &oclinfo, void *ctx, void *qu, int devnum = 0);
 132
 133         //returns true when global OpenCL context is initialized
 134         CV_EXPORTS bool initialized();
 135
 136         //////////////////////////////// OpenCL context ////////////////////////
 137         //This is a global singleton class used to represent a OpenCL context.
 138         class CV_EXPORTS Context
 139         {
 140         protected:
 141             Context();
 142             friend class std::auto_ptr<Context>;
 143             friend bool initialized();
 144         private:
 145             static std::auto_ptr<Context> clCxt;
 146             static int val;
 147         public:
 148             ~Context();
 149             void release();
 150             Info::Impl* impl;
 151
 152             static Context *getContext();
 153             static void setContext(Info &oclinfo);
 154
 155             enum {CL_DOUBLE, CL_UNIFIED_MEM, CL_VER_1_2};
 156             bool supportsFeature(int ftype);
 157             size_t computeUnits();
 158             size_t maxWorkGroupSize();
 159             void* oclContext();
 160             void* oclCommandQueue();
 161         };
 162
 163         //! Calls a kernel, by string. Pass globalThreads = NULL, and cleanUp = true, to finally clean-up without executing.
 164         CV_EXPORTS double openCLExecuteKernelInterop(Context *clCxt ,
 165                                                         const char **source, String kernelName,
 166                                                         size_t globalThreads[3], size_t localThreads[3],
 167                                                         std::vector< std::pair<size_t, const void *> > &args,
 168                                                         int channels, int depth, const char *build_options,
 169                                                         bool finish = true, bool measureKernelTime = false,
 170                                                         bool cleanUp = true);
 171
 172         //! Calls a kernel, by file. Pass globalThreads = NULL, and cleanUp = true, to finally clean-up without executing.
 173         CV_EXPORTS double openCLExecuteKernelInterop(Context *clCxt ,
 174                                                         const char **fileName, const int numFiles, String kernelName,
 175                                                         size_t globalThreads[3], size_t localThreads[3],
 176                                                         std::vector< std::pair<size_t, const void *> > &args,
 177                                                         int channels, int depth, const char *build_options,
 178                                                         bool finish = true, bool measureKernelTime = false,
 179                                                         bool cleanUp = true);
 180
 181         //! Enable or disable OpenCL program binary caching onto local disk
 182         // After a program (*.cl files in opencl/ folder) is built at runtime, we allow the
 183         // compiled OpenCL program to be cached to the path automatically as "path/*.clb"
 184         // binary file, which will be reused when the OpenCV executable is started again.
 185         //
 186         // Caching mode is controlled by the following enums
 187         // Notes
 188         //   1. the feature is by default enabled when OpenCV is built in release mode.
 189         //   2. the CACHE_DEBUG / CACHE_RELEASE flags only effectively work with MSVC compiler;
 190         //      for GNU compilers, the function always treats the build as release mode (enabled by default).
 191         enum
 192         {
 193             CACHE_NONE    = 0,        // do not cache OpenCL binary
 194             CACHE_DEBUG   = 0x1 << 0, // cache OpenCL binary when built in debug mode (only work with MSVC)
 195             CACHE_RELEASE = 0x1 << 1, // default behavior, only cache when built in release mode (only work with MSVC)
 196             CACHE_ALL     = CACHE_DEBUG | CACHE_RELEASE, // always cache opencl binary
 197             CACHE_UPDATE  = 0x1 << 2  // if the binary cache file with the same name is already on the disk, it will be updated.
 198         };
 199         CV_EXPORTS void setBinaryDiskCache(int mode = CACHE_RELEASE, cv::String path = "./");
 200
 201         //! set where binary cache to be saved to
 202         CV_EXPORTS void setBinpath(const char *path);
 203
 204         class CV_EXPORTS oclMatExpr;
 205         //////////////////////////////// oclMat ////////////////////////////////
 206         class CV_EXPORTS oclMat
 207         {
 208         public:
 209             //! default constructor
 210             oclMat();
 211             //! constructs oclMatrix of the specified size and type (_type is CV_8UC1, CV_64FC3, CV_32SC(12) etc.)
 212             oclMat(int rows, int cols, int type);
 213             oclMat(Size size, int type);
 214             //! constucts oclMatrix and fills it with the specified value _s.
 215             oclMat(int rows, int cols, int type, const Scalar &s);
 216             oclMat(Size size, int type, const Scalar &s);
 217             //! copy constructor
 218             oclMat(const oclMat &m);
 219
 220             //! constructor for oclMatrix headers pointing to user-allocated data
 221             oclMat(int rows, int cols, int type, void *data, size_t step = Mat::AUTO_STEP);
 222             oclMat(Size size, int type, void *data, size_t step = Mat::AUTO_STEP);
 223
 224             //! creates a matrix header for a part of the bigger matrix
 225             oclMat(const oclMat &m, const Range &rowRange, const Range &colRange);
 226             oclMat(const oclMat &m, const Rect &roi);
 227
 228             //! builds oclMat from Mat. Perfom blocking upload to device.
 229             explicit oclMat (const Mat &m);
 230
 231             //! destructor - calls release()
 232             ~oclMat();
 233
 234             //! assignment operators
 235             oclMat &operator = (const oclMat &m);
 236             //! assignment operator. Perfom blocking upload to device.
 237             oclMat &operator = (const Mat &m);
 238             oclMat &operator = (const oclMatExpr& expr);
 239
 240             //! pefroms blocking upload data to oclMat.
 241             void upload(const cv::Mat &m);
 242
 243
 244             //! downloads data from device to host memory. Blocking calls.
 245             operator Mat() const;
 246             void download(cv::Mat &m) const;
 247
 248             //! convert to _InputArray
 249             operator _InputArray();
 250
 251             //! convert to _OutputArray
 252             operator _OutputArray();
 253
 254             //! returns a new oclMatrix header for the specified row
 255             oclMat row(int y) const;
 256             //! returns a new oclMatrix header for the specified column
 257             oclMat col(int x) const;
 258             //! ... for the specified row span
 259             oclMat rowRange(int startrow, int endrow) const;
 260             oclMat rowRange(const Range &r) const;
 261             //! ... for the specified column span
 262             oclMat colRange(int startcol, int endcol) const;
 263             oclMat colRange(const Range &r) const;
 264
 265             //! returns deep copy of the oclMatrix, i.e. the data is copied
 266             oclMat clone() const;
 267             //! copies the oclMatrix content to "m".
 268             // It calls m.create(this->size(), this->type()).
 269             // It supports any data type
 270             void copyTo( oclMat &m ) const;
 271             //! copies those oclMatrix elements to "m" that are marked with non-zero mask elements.
 272             //It supports 8UC1 8UC4 32SC1 32SC4 32FC1 32FC4
 273             void copyTo( oclMat &m, const oclMat &mask ) const;
 274             //! converts oclMatrix to another datatype with optional scalng. See cvConvertScale.
 275             //It supports 8UC1 8UC4 32SC1 32SC4 32FC1 32FC4
 276             void convertTo( oclMat &m, int rtype, double alpha = 1, double beta = 0 ) const;
 277
 278             void assignTo( oclMat &m, int type = -1 ) const;
 279
 280             //! sets every oclMatrix element to s
 281             //It supports 8UC1 8UC4 32SC1 32SC4 32FC1 32FC4
 282             oclMat& operator = (const Scalar &s);
 283             //! sets some of the oclMatrix elements to s, according to the mask
 284             //It supports 8UC1 8UC4 32SC1 32SC4 32FC1 32FC4
 285             oclMat& setTo(const Scalar &s, const oclMat &mask = oclMat());
 286             //! creates alternative oclMatrix header for the same data, with different
 287             // number of channels and/or different number of rows. see cvReshape.
 288             oclMat reshape(int cn, int rows = 0) const;
 289
 290             //! allocates new oclMatrix data unless the oclMatrix already has specified size and type.
 291             // previous data is unreferenced if needed.
 292             void create(int rows, int cols, int type);
 293             void create(Size size, int type);
 294
 295             //! allocates new oclMatrix with specified device memory type.
 296             void createEx(int rows, int cols, int type,
 297                           DevMemRW rw_type, DevMemType mem_type, void* hptr = 0);
 298             void createEx(Size size, int type, DevMemRW rw_type,
 299                           DevMemType mem_type, void* hptr = 0);
 300
 301             //! decreases reference counter;
 302             // deallocate the data when reference counter reaches 0.
 303             void release();
 304
 305             //! swaps with other smart pointer
 306             void swap(oclMat &mat);
 307
 308             //! locates oclMatrix header within a parent oclMatrix. See below
 309             void locateROI( Size &wholeSize, Point &ofs ) const;
 310             //! moves/resizes the current oclMatrix ROI inside the parent oclMatrix.
 311             oclMat& adjustROI( int dtop, int dbottom, int dleft, int dright );
 312             //! extracts a rectangular sub-oclMatrix
 313             // (this is a generalized form of row, rowRange etc.)
 314             oclMat operator()( Range rowRange, Range colRange ) const;
 315             oclMat operator()( const Rect &roi ) const;
 316
 317             oclMat& operator+=( const oclMat& m );
 318             oclMat& operator-=( const oclMat& m );
 319             oclMat& operator*=( const oclMat& m );
 320             oclMat& operator/=( const oclMat& m );
 321
 322             //! returns true if the oclMatrix data is continuous
 323             // (i.e. when there are no gaps between successive rows).
 324             // similar to CV_IS_oclMat_CONT(cvoclMat->type)
 325             bool isContinuous() const;
 326             //! returns element size in bytes,
 327             // similar to CV_ELEM_SIZE(cvMat->type)
 328             size_t elemSize() const;
 329             //! returns the size of element channel in bytes.
 330             size_t elemSize1() const;
 331             //! returns element type, similar to CV_MAT_TYPE(cvMat->type)
 332             int type() const;
 333             //! returns element type, i.e. 8UC3 returns 8UC4 because in ocl
 334             //! 3 channels element actually use 4 channel space
 335             int ocltype() const;
 336             //! returns element type, similar to CV_MAT_DEPTH(cvMat->type)
 337             int depth() const;
 338             //! returns element type, similar to CV_MAT_CN(cvMat->type)
 339             int channels() const;
 340             //! returns element type, return 4 for 3 channels element,
 341             //!becuase 3 channels element actually use 4 channel space
 342             int oclchannels() const;
 343             //! returns step/elemSize1()
 344             size_t step1() const;
 345             //! returns oclMatrix size:
 346             // width == number of columns, height == number of rows
 347             Size size() const;
 348             //! returns true if oclMatrix data is NULL
 349             bool empty() const;
 350
 351             //! returns pointer to y-th row
 352             uchar* ptr(int y = 0);
 353             const uchar *ptr(int y = 0) const;
 354
 355             //! template version of the above method
 356             template<typename _Tp> _Tp *ptr(int y = 0);
 357             template<typename _Tp> const _Tp *ptr(int y = 0) const;
 358
 359             //! matrix transposition
 360             oclMat t() const;
 361
 362             /*! includes several bit-fields:
 363               - the magic signature
 364               - continuity flag
 365               - depth
 366               - number of channels
 367               */
 368             int flags;
 369             //! the number of rows and columns
 370             int rows, cols;
 371             //! a distance between successive rows in bytes; includes the gap if any
 372             size_t step;
 373             //! pointer to the data(OCL memory object)
 374             uchar *data;
 375
 376             //! pointer to the reference counter;
 377             // when oclMatrix points to user-allocated data, the pointer is NULL
 378             int *refcount;
 379
 380             //! helper fields used in locateROI and adjustROI
 381             //datastart and dataend are not used in current version
 382             uchar *datastart;
 383             uchar *dataend;
 384
 385             //! OpenCL context associated with the oclMat object.
 386             Context *clCxt;
 387             //add offset for handle ROI, calculated in byte
 388             int offset;
 389             //add wholerows and wholecols for the whole matrix, datastart and dataend are no longer used
 390             int wholerows;
 391             int wholecols;
 392         };
 393
 394         // convert InputArray/OutputArray to oclMat references
 395         CV_EXPORTS oclMat& getOclMatRef(InputArray src);
 396         CV_EXPORTS oclMat& getOclMatRef(OutputArray src);
 397
 398         ///////////////////// mat split and merge /////////////////////////////////
 399         //! Compose a multi-channel array from several single-channel arrays
 400         // Support all types
 401         CV_EXPORTS void merge(const oclMat *src, size_t n, oclMat &dst);
 402         CV_EXPORTS void merge(const std::vector<oclMat> &src, oclMat &dst);
 403
 404         //! Divides multi-channel array into several single-channel arrays
 405         // Support all types
 406         CV_EXPORTS void split(const oclMat &src, oclMat *dst);
 407         CV_EXPORTS void split(const oclMat &src, std::vector<oclMat> &dst);
 408
 409         ////////////////////////////// Arithmetics ///////////////////////////////////
 410         //#if defined DOUBLE_SUPPORT
 411         //typedef double F;
 412         //#else
 413         //typedef float F;
 414         //#endif
 415         //      CV_EXPORTS void addWeighted(const oclMat& a,F  alpha, const oclMat& b,F beta,F gama, oclMat& c);
 416         CV_EXPORTS void addWeighted(const oclMat &a, double  alpha, const oclMat &b, double beta, double gama, oclMat &c);
 417         //! adds one matrix to another (c = a + b)
 418         // supports all types except CV_8SC1,CV_8SC2,CV8SC3 and CV_8SC4
 419         CV_EXPORTS void add(const oclMat &a, const oclMat &b, oclMat &c);
 420         //! adds one matrix to another (c = a + b)
 421         // supports all types except CV_8SC1,CV_8SC2,CV8SC3 and CV_8SC4
 422         CV_EXPORTS void add(const oclMat &a, const oclMat &b, oclMat &c, const oclMat &mask);
 423         //! adds scalar to a matrix (c = a + s)
 424         // supports all types except CV_8SC1,CV_8SC2,CV8SC3 and CV_8SC4
 425         CV_EXPORTS void add(const oclMat &a, const Scalar &sc, oclMat &c, const oclMat &mask = oclMat());
 426         //! subtracts one matrix from another (c = a - b)
 427         // supports all types except CV_8SC1,CV_8SC2,CV8SC3 and CV_8SC4
 428         CV_EXPORTS void subtract(const oclMat &a, const oclMat &b, oclMat &c);
 429         //! subtracts one matrix from another (c = a - b)
 430         // supports all types except CV_8SC1,CV_8SC2,CV8SC3 and CV_8SC4
 431         CV_EXPORTS void subtract(const oclMat &a, const oclMat &b, oclMat &c, const oclMat &mask);
 432         //! subtracts scalar from a matrix (c = a - s)
 433         // supports all types except CV_8SC1,CV_8SC2,CV8SC3 and CV_8SC4
 434         CV_EXPORTS void subtract(const oclMat &a, const Scalar &sc, oclMat &c, const oclMat &mask = oclMat());
 435         //! subtracts scalar from a matrix (c = a - s)
 436         // supports all types except CV_8SC1,CV_8SC2,CV8SC3 and CV_8SC4
 437         CV_EXPORTS void subtract(const Scalar &sc, const oclMat &a, oclMat &c, const oclMat &mask = oclMat());
 438         //! computes element-wise product of the two arrays (c = a * b)
 439         // supports all types except CV_8SC1,CV_8SC2,CV8SC3 and CV_8SC4
 440         CV_EXPORTS void multiply(const oclMat &a, const oclMat &b, oclMat &c, double scale = 1);
 441         //! multiplies matrix to a number (dst = scalar * src)
 442         // supports CV_32FC1 only
 443         CV_EXPORTS void multiply(double scalar, const oclMat &src, oclMat &dst);
 444         //! computes element-wise quotient of the two arrays (c = a / b)
 445         // supports all types except CV_8SC1,CV_8SC2,CV8SC3 and CV_8SC4
 446         CV_EXPORTS void divide(const oclMat &a, const oclMat &b, oclMat &c, double scale = 1);
 447         //! computes element-wise quotient of the two arrays (c = a / b)
 448         // supports all types except CV_8SC1,CV_8SC2,CV8SC3 and CV_8SC4
 449         CV_EXPORTS void divide(double scale, const oclMat &b, oclMat &c);
 450
 451         //! compares elements of two arrays (c = a <cmpop> b)
 452         // supports except CV_8SC1,CV_8SC2,CV8SC3,CV_8SC4 types
 453         CV_EXPORTS void compare(const oclMat &a, const oclMat &b, oclMat &c, int cmpop);
 454
 455         //! transposes the matrix
 456         // supports  CV_8UC1, 8UC4, 8SC4, 16UC2, 16SC2, 32SC1 and 32FC1.(the same as cuda)
 457         CV_EXPORTS void transpose(const oclMat &src, oclMat &dst);
 458
 459         //! computes element-wise absolute difference of two arrays (c = abs(a - b))
 460         // supports all types except CV_8SC1,CV_8SC2,CV8SC3 and CV_8SC4
 461         CV_EXPORTS void absdiff(const oclMat &a, const oclMat &b, oclMat &c);
 462         //! computes element-wise absolute difference of array and scalar (c = abs(a - s))
 463         // supports all types except CV_8SC1,CV_8SC2,CV8SC3 and CV_8SC4
 464         CV_EXPORTS void absdiff(const oclMat &a, const Scalar &s, oclMat &c);
 465
 466         //! computes mean value and standard deviation of all or selected array elements
 467         // supports except CV_32F,CV_64F
 468         CV_EXPORTS void meanStdDev(const oclMat &mtx, Scalar &mean, Scalar &stddev);
 469
 470         //! computes norm of array
 471         // supports NORM_INF, NORM_L1, NORM_L2
 472         // supports only CV_8UC1 type
 473         CV_EXPORTS double norm(const oclMat &src1, int normType = NORM_L2);
 474
 475         //! computes norm of the difference between two arrays
 476         // supports NORM_INF, NORM_L1, NORM_L2
 477         // supports only CV_8UC1 type
 478         CV_EXPORTS double norm(const oclMat &src1, const oclMat &src2, int normType = NORM_L2);
 479
 480         //! reverses the order of the rows, columns or both in a matrix
 481         // supports all types
 482         CV_EXPORTS void flip(const oclMat &a, oclMat &b, int flipCode);
 483
 484         //! computes sum of array elements
 485         // disabled until fix crash
 486         // support all types
 487         CV_EXPORTS Scalar sum(const oclMat &m);
 488         CV_EXPORTS Scalar absSum(const oclMat &m);
 489         CV_EXPORTS Scalar sqrSum(const oclMat &m);
 490
 491         //! finds global minimum and maximum array elements and returns their values
 492         // support all C1 types
 493
 494         CV_EXPORTS void minMax(const oclMat &src, double *minVal, double *maxVal = 0, const oclMat &mask = oclMat());
 495         CV_EXPORTS void minMax_buf(const oclMat &src, double *minVal, double *maxVal, const oclMat &mask, oclMat& buf);
 496
 497         //! finds global minimum and maximum array elements and returns their values with locations
 498         // support all C1 types
 499
 500         CV_EXPORTS void minMaxLoc(const oclMat &src, double *minVal, double *maxVal = 0, Point *minLoc = 0, Point *maxLoc = 0,
 501                                   const oclMat &mask = oclMat());
 502
 503         //! counts non-zero array elements
 504         // support all types
 505         CV_EXPORTS int countNonZero(const oclMat &src);
 506
 507         //! transforms 8-bit unsigned integers using lookup table: dst(i)=lut(src(i))
 508         // destination array will have the depth type as lut and the same channels number as source
 509         //It supports 8UC1 8UC4 only
 510         CV_EXPORTS void LUT(const oclMat &src, const oclMat &lut, oclMat &dst);
 511
 512         //! only 8UC1 and 256 bins is supported now
 513         CV_EXPORTS void calcHist(const oclMat &mat_src, oclMat &mat_hist);
 514         //! only 8UC1 and 256 bins is supported now
 515         CV_EXPORTS void equalizeHist(const oclMat &mat_src, oclMat &mat_dst);
 516
 517         //! only 8UC1 is supported now
 518         CV_EXPORTS Ptr<cv::CLAHE> createCLAHE(double clipLimit = 40.0, Size tileGridSize = Size(8, 8));
 519
 520         //! bilateralFilter
 521         // supports 8UC1 8UC4
 522         CV_EXPORTS void bilateralFilter(const oclMat& src, oclMat& dst, int d, double sigmaColor, double sigmaSpave, int borderType=BORDER_DEFAULT);
 523         //! computes exponent of each matrix element (b = e**a)
 524         // supports only CV_32FC1 type
 525         CV_EXPORTS void exp(const oclMat &a, oclMat &b);
 526
 527         //! computes natural logarithm of absolute value of each matrix element: b = log(abs(a))
 528         // supports only CV_32FC1 type
 529         CV_EXPORTS void log(const oclMat &a, oclMat &b);
 530
 531         //! computes magnitude of each (x(i), y(i)) vector
 532         // supports only CV_32F CV_64F type
 533         CV_EXPORTS void magnitude(const oclMat &x, const oclMat &y, oclMat &magnitude);
 534         CV_EXPORTS void magnitudeSqr(const oclMat &x, const oclMat &y, oclMat &magnitude);
 535
 536         CV_EXPORTS void magnitudeSqr(const oclMat &x, oclMat &magnitude);
 537
 538         //! computes angle (angle(i)) of each (x(i), y(i)) vector
 539         // supports only CV_32F CV_64F type
 540         CV_EXPORTS void phase(const oclMat &x, const oclMat &y, oclMat &angle, bool angleInDegrees = false);
 541
 542         //! the function raises every element of tne input array to p
 543         //! support only CV_32F CV_64F type
 544         CV_EXPORTS void pow(const oclMat &x, double p, oclMat &y);
 545
 546         //! converts Cartesian coordinates to polar
 547         // supports only CV_32F CV_64F type
 548         CV_EXPORTS void cartToPolar(const oclMat &x, const oclMat &y, oclMat &magnitude, oclMat &angle, bool angleInDegrees = false);
 549
 550         //! converts polar coordinates to Cartesian
 551         // supports only CV_32F CV_64F type
 552         CV_EXPORTS void polarToCart(const oclMat &magnitude, const oclMat &angle, oclMat &x, oclMat &y, bool angleInDegrees = false);
 553
 554         //! perfroms per-elements bit-wise inversion
 555         // supports all types
 556         CV_EXPORTS void bitwise_not(const oclMat &src, oclMat &dst);
 557         //! calculates per-element bit-wise disjunction of two arrays
 558         // supports all types
 559         CV_EXPORTS void bitwise_or(const oclMat &src1, const oclMat &src2, oclMat &dst, const oclMat &mask = oclMat());
 560         CV_EXPORTS void bitwise_or(const oclMat &src1, const Scalar &s, oclMat &dst, const oclMat &mask = oclMat());
 561         //! calculates per-element bit-wise conjunction of two arrays
 562         // supports all types
 563         CV_EXPORTS void bitwise_and(const oclMat &src1, const oclMat &src2, oclMat &dst, const oclMat &mask = oclMat());
 564         CV_EXPORTS void bitwise_and(const oclMat &src1, const Scalar &s, oclMat &dst, const oclMat &mask = oclMat());
 565         //! calculates per-element bit-wise "exclusive or" operation
 566         // supports all types
 567         CV_EXPORTS void bitwise_xor(const oclMat &src1, const oclMat &src2, oclMat &dst, const oclMat &mask = oclMat());
 568         CV_EXPORTS void bitwise_xor(const oclMat &src1, const Scalar &s, oclMat &dst, const oclMat &mask = oclMat());
 569
 570         //! Logical operators
 571         CV_EXPORTS oclMat operator ~ (const oclMat &);
 572         CV_EXPORTS oclMat operator | (const oclMat &, const oclMat &);
 573         CV_EXPORTS oclMat operator & (const oclMat &, const oclMat &);
 574         CV_EXPORTS oclMat operator ^ (const oclMat &, const oclMat &);
 575
 576
 577         //! Mathematics operators
 578         CV_EXPORTS oclMatExpr operator + (const oclMat &src1, const oclMat &src2);
 579         CV_EXPORTS oclMatExpr operator - (const oclMat &src1, const oclMat &src2);
 580         CV_EXPORTS oclMatExpr operator * (const oclMat &src1, const oclMat &src2);
 581         CV_EXPORTS oclMatExpr operator / (const oclMat &src1, const oclMat &src2);
 582
 583         struct CV_EXPORTS ConvolveBuf
 584         {
 585             Size result_size;
 586             Size block_size;
 587             Size user_block_size;
 588             Size dft_size;
 589
 590             oclMat image_spect, templ_spect, result_spect;
 591             oclMat image_block, templ_block, result_data;
 592
 593             void create(Size image_size, Size templ_size);
 594             static Size estimateBlockSize(Size result_size, Size templ_size);
 595         };
 596
 597         //! computes convolution of two images, may use discrete Fourier transform
 598         //! support only CV_32FC1 type
 599         CV_EXPORTS void convolve(const oclMat &image, const oclMat &temp1, oclMat &result, bool ccorr = false);
 600         CV_EXPORTS void convolve(const oclMat &image, const oclMat &temp1, oclMat &result, bool ccorr, ConvolveBuf& buf);
 601
 602         //! Performs a per-element multiplication of two Fourier spectrums.
 603         //! Only full (not packed) CV_32FC2 complex spectrums in the interleaved format are supported for now.
 604         //! support only CV_32FC2 type
 605         CV_EXPORTS void mulSpectrums(const oclMat &a, const oclMat &b, oclMat &c, int flags, float scale, bool conjB = false);
 606
 607         CV_EXPORTS void cvtColor(const oclMat &src, oclMat &dst, int code , int dcn = 0);
 608
 609         //////////////////////////////// Filter Engine ////////////////////////////////
 610
 611         /*!
 612           The Base Class for 1D or Row-wise Filters
 613
 614           This is the base class for linear or non-linear filters that process 1D data.
 615           In particular, such filters are used for the "horizontal" filtering parts in separable filters.
 616           */
 617         class CV_EXPORTS BaseRowFilter_GPU
 618         {
 619         public:
 620             BaseRowFilter_GPU(int ksize_, int anchor_, int bordertype_) : ksize(ksize_), anchor(anchor_), bordertype(bordertype_) {}
 621             virtual ~BaseRowFilter_GPU() {}
 622             virtual void operator()(const oclMat &src, oclMat &dst) = 0;
 623             int ksize, anchor, bordertype;
 624         };
 625
 626         /*!
 627           The Base Class for Column-wise Filters
 628
 629           This is the base class for linear or non-linear filters that process columns of 2D arrays.
 630           Such filters are used for the "vertical" filtering parts in separable filters.
 631           */
 632         class CV_EXPORTS BaseColumnFilter_GPU
 633         {
 634         public:
 635             BaseColumnFilter_GPU(int ksize_, int anchor_, int bordertype_) : ksize(ksize_), anchor(anchor_), bordertype(bordertype_) {}
 636             virtual ~BaseColumnFilter_GPU() {}
 637             virtual void operator()(const oclMat &src, oclMat &dst) = 0;
 638             int ksize, anchor, bordertype;
 639         };
 640
 641         /*!
 642           The Base Class for Non-Separable 2D Filters.
 643
 644           This is the base class for linear or non-linear 2D filters.
 645           */
 646         class CV_EXPORTS BaseFilter_GPU
 647         {
 648         public:
 649             BaseFilter_GPU(const Size &ksize_, const Point &anchor_, const int &borderType_)
 650                 : ksize(ksize_), anchor(anchor_), borderType(borderType_) {}
 651             virtual ~BaseFilter_GPU() {}
 652             virtual void operator()(const oclMat &src, oclMat &dst) = 0;
 653             Size ksize;
 654             Point anchor;
 655             int borderType;
 656         };
 657
 658         /*!
 659           The Base Class for Filter Engine.
 660
 661           The class can be used to apply an arbitrary filtering operation to an image.
 662           It contains all the necessary intermediate buffers.
 663           */
 664         class CV_EXPORTS FilterEngine_GPU
 665         {
 666         public:
 667             virtual ~FilterEngine_GPU() {}
 668
 669             virtual void apply(const oclMat &src, oclMat &dst, Rect roi = Rect(0, 0, -1, -1)) = 0;
 670         };
 671
 672         //! returns the non-separable filter engine with the specified filter
 673         CV_EXPORTS Ptr<FilterEngine_GPU> createFilter2D_GPU(const Ptr<BaseFilter_GPU> filter2D);
 674
 675         //! returns the primitive row filter with the specified kernel
 676         CV_EXPORTS Ptr<BaseRowFilter_GPU> getLinearRowFilter_GPU(int srcType, int bufType, const Mat &rowKernel,
 677                 int anchor = -1, int bordertype = BORDER_DEFAULT);
 678
 679         //! returns the primitive column filter with the specified kernel
 680         CV_EXPORTS Ptr<BaseColumnFilter_GPU> getLinearColumnFilter_GPU(int bufType, int dstType, const Mat &columnKernel,
 681                 int anchor = -1, int bordertype = BORDER_DEFAULT, double delta = 0.0);
 682
 683         //! returns the separable linear filter engine
 684         CV_EXPORTS Ptr<FilterEngine_GPU> createSeparableLinearFilter_GPU(int srcType, int dstType, const Mat &rowKernel,
 685                 const Mat &columnKernel, const Point &anchor = Point(-1, -1), double delta = 0.0, int bordertype = BORDER_DEFAULT);
 686
 687         //! returns the separable filter engine with the specified filters
 688         CV_EXPORTS Ptr<FilterEngine_GPU> createSeparableFilter_GPU(const Ptr<BaseRowFilter_GPU> &rowFilter,
 689                 const Ptr<BaseColumnFilter_GPU> &columnFilter);
 690
 691         //! returns the Gaussian filter engine
 692         CV_EXPORTS Ptr<FilterEngine_GPU> createGaussianFilter_GPU(int type, Size ksize, double sigma1, double sigma2 = 0, int bordertype = BORDER_DEFAULT);
 693
 694         //! returns filter engine for the generalized Sobel operator
 695         CV_EXPORTS Ptr<FilterEngine_GPU> createDerivFilter_GPU( int srcType, int dstType, int dx, int dy, int ksize, int borderType = BORDER_DEFAULT );
 696
 697         //! applies Laplacian operator to the image
 698         // supports only ksize = 1 and ksize = 3 8UC1 8UC4 32FC1 32FC4 data type
 699         CV_EXPORTS void Laplacian(const oclMat &src, oclMat &dst, int ddepth, int ksize = 1, double scale = 1);
 700
 701         //! returns 2D box filter
 702         // supports CV_8UC1 and CV_8UC4 source type, dst type must be the same as source type
 703         CV_EXPORTS Ptr<BaseFilter_GPU> getBoxFilter_GPU(int srcType, int dstType,
 704                 const Size &ksize, Point anchor = Point(-1, -1), int borderType = BORDER_DEFAULT);
 705
 706         //! returns box filter engine
 707         CV_EXPORTS Ptr<FilterEngine_GPU> createBoxFilter_GPU(int srcType, int dstType, const Size &ksize,
 708                 const Point &anchor = Point(-1, -1), int borderType = BORDER_DEFAULT);
 709
 710         //! returns 2D filter with the specified kernel
 711         // supports CV_8UC1 and CV_8UC4 types
 712         CV_EXPORTS Ptr<BaseFilter_GPU> getLinearFilter_GPU(int srcType, int dstType, const Mat &kernel, const Size &ksize,
 713                 Point anchor = Point(-1, -1), int borderType = BORDER_DEFAULT);
 714
 715         //! returns the non-separable linear filter engine
 716         CV_EXPORTS Ptr<FilterEngine_GPU> createLinearFilter_GPU(int srcType, int dstType, const Mat &kernel,
 717                 const Point &anchor = Point(-1, -1), int borderType = BORDER_DEFAULT);
 718
 719         //! smooths the image using the normalized box filter
 720         // supports data type: CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4
 721         // supports border type: BORDER_CONSTANT, BORDER_REPLICATE, BORDER_REFLECT,BORDER_REFLECT_101,BORDER_WRAP
 722         CV_EXPORTS void boxFilter(const oclMat &src, oclMat &dst, int ddepth, Size ksize,
 723                                   Point anchor = Point(-1, -1), int borderType = BORDER_DEFAULT);
 724
 725         //! returns 2D morphological filter
 726         //! only MORPH_ERODE and MORPH_DILATE are supported
 727         // supports CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4 types
 728         // kernel must have CV_8UC1 type, one rows and cols == ksize.width * ksize.height
 729         CV_EXPORTS Ptr<BaseFilter_GPU> getMorphologyFilter_GPU(int op, int type, const Mat &kernel, const Size &ksize,
 730                 Point anchor = Point(-1, -1));
 731
 732         //! returns morphological filter engine. Only MORPH_ERODE and MORPH_DILATE are supported.
 733         CV_EXPORTS Ptr<FilterEngine_GPU> createMorphologyFilter_GPU(int op, int type, const Mat &kernel,
 734                 const Point &anchor = Point(-1, -1), int iterations = 1);
 735
 736         //! a synonym for normalized box filter
 737         // supports data type: CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4
 738         // supports border type: BORDER_CONSTANT, BORDER_REPLICATE, BORDER_REFLECT,BORDER_REFLECT_101
 739         static inline void blur(const oclMat &src, oclMat &dst, Size ksize, Point anchor = Point(-1, -1),
 740                                 int borderType = BORDER_CONSTANT)
 741         {
 742             boxFilter(src, dst, -1, ksize, anchor, borderType);
 743         }
 744
 745         //! applies non-separable 2D linear filter to the image
 746         //  Note, at the moment this function only works when anchor point is in the kernel center
 747         //  and kernel size supported is either 3x3 or 5x5; otherwise the function will fail to output valid result
 748         CV_EXPORTS void filter2D(const oclMat &src, oclMat &dst, int ddepth, const Mat &kernel,
 749                                  Point anchor = Point(-1, -1), int borderType = BORDER_DEFAULT);
 750
 751         //! applies separable 2D linear filter to the image
 752         CV_EXPORTS void sepFilter2D(const oclMat &src, oclMat &dst, int ddepth, const Mat &kernelX, const Mat &kernelY,
 753                                     Point anchor = Point(-1, -1), double delta = 0.0, int bordertype = BORDER_DEFAULT);
 754
 755         //! applies generalized Sobel operator to the image
 756         // dst.type must equalize src.type
 757         // supports data type: CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4
 758         // supports border type: BORDER_CONSTANT, BORDER_REPLICATE, BORDER_REFLECT,BORDER_REFLECT_101
 759         CV_EXPORTS void Sobel(const oclMat &src, oclMat &dst, int ddepth, int dx, int dy, int ksize = 3, double scale = 1, double delta = 0.0, int bordertype = BORDER_DEFAULT);
 760
 761         //! applies the vertical or horizontal Scharr operator to the image
 762         // dst.type must equalize src.type
 763         // supports data type: CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4
 764         // supports border type: BORDER_CONSTANT, BORDER_REPLICATE, BORDER_REFLECT,BORDER_REFLECT_101
 765         CV_EXPORTS void Scharr(const oclMat &src, oclMat &dst, int ddepth, int dx, int dy, double scale = 1, double delta = 0.0, int bordertype = BORDER_DEFAULT);
 766
 767         //! smooths the image using Gaussian filter.
 768         // dst.type must equalize src.type
 769         // supports data type: CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4
 770         // supports border type: BORDER_CONSTANT, BORDER_REPLICATE, BORDER_REFLECT,BORDER_REFLECT_101
 771         CV_EXPORTS void GaussianBlur(const oclMat &src, oclMat &dst, Size ksize, double sigma1, double sigma2 = 0, int bordertype = BORDER_DEFAULT);
 772
 773         //! erodes the image (applies the local minimum operator)
 774         // supports data type: CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4
 775         CV_EXPORTS void erode( const oclMat &src, oclMat &dst, const Mat &kernel, Point anchor = Point(-1, -1), int iterations = 1,
 776
 777                                int borderType = BORDER_CONSTANT, const Scalar &borderValue = morphologyDefaultBorderValue());
 778
 779
 780         //! dilates the image (applies the local maximum operator)
 781         // supports data type: CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4
 782         CV_EXPORTS void dilate( const oclMat &src, oclMat &dst, const Mat &kernel, Point anchor = Point(-1, -1), int iterations = 1,
 783
 784                                 int borderType = BORDER_CONSTANT, const Scalar &borderValue = morphologyDefaultBorderValue());
 785
 786
 787         //! applies an advanced morphological operation to the image
 788         CV_EXPORTS void morphologyEx( const oclMat &src, oclMat &dst, int op, const Mat &kernel, Point anchor = Point(-1, -1), int iterations = 1,
 789
 790                                       int borderType = BORDER_CONSTANT, const Scalar &borderValue = morphologyDefaultBorderValue());
 791
 792
 793         ////////////////////////////// Image processing //////////////////////////////
 794         //! Does mean shift filtering on GPU.
 795         CV_EXPORTS void meanShiftFiltering(const oclMat &src, oclMat &dst, int sp, int sr,
 796                                            TermCriteria criteria = TermCriteria(TermCriteria::MAX_ITER + TermCriteria::EPS, 5, 1));
 797
 798         //! Does mean shift procedure on GPU.
 799         CV_EXPORTS void meanShiftProc(const oclMat &src, oclMat &dstr, oclMat &dstsp, int sp, int sr,
 800                                       TermCriteria criteria = TermCriteria(TermCriteria::MAX_ITER + TermCriteria::EPS, 5, 1));
 801
 802         //! Does mean shift segmentation with elimiation of small regions.
 803         CV_EXPORTS void meanShiftSegmentation(const oclMat &src, Mat &dst, int sp, int sr, int minsize,
 804                                               TermCriteria criteria = TermCriteria(TermCriteria::MAX_ITER + TermCriteria::EPS, 5, 1));
 805
 806         //! applies fixed threshold to the image.
 807         // supports CV_8UC1 and CV_32FC1 data type
 808         // supports threshold type: THRESH_BINARY, THRESH_BINARY_INV, THRESH_TRUNC, THRESH_TOZERO, THRESH_TOZERO_INV
 809         CV_EXPORTS double threshold(const oclMat &src, oclMat &dst, double thresh, double maxVal, int type = THRESH_TRUNC);
 810
 811         //! resizes the image
 812         // Supports INTER_NEAREST, INTER_LINEAR
 813         // supports CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4 types
 814         CV_EXPORTS void resize(const oclMat &src, oclMat &dst, Size dsize, double fx = 0, double fy = 0, int interpolation = INTER_LINEAR);
 815
 816         //! Applies a generic geometrical transformation to an image.
 817
 818         // Supports INTER_NEAREST, INTER_LINEAR.
 819
 820         // Map1 supports CV_16SC2, CV_32FC2  types.
 821
 822         // Src supports CV_8UC1, CV_8UC2, CV_8UC4.
 823
 824         CV_EXPORTS void remap(const oclMat &src, oclMat &dst, oclMat &map1, oclMat &map2, int interpolation, int bordertype, const Scalar &value = Scalar());
 825
 826         //! copies 2D array to a larger destination array and pads borders with user-specifiable constant
 827         // supports CV_8UC1, CV_8UC4, CV_32SC1 types
 828         CV_EXPORTS void copyMakeBorder(const oclMat &src, oclMat &dst, int top, int bottom, int left, int right, int boardtype, const Scalar &value = Scalar());
 829
 830         //! Smoothes image using median filter
 831         // The source 1- or 4-channel image. When m is 3 or 5, the image depth should be CV 8U or CV 32F.
 832         CV_EXPORTS void medianFilter(const oclMat &src, oclMat &dst, int m);
 833
 834         //! warps the image using affine transformation
 835         // Supports INTER_NEAREST, INTER_LINEAR, INTER_CUBIC
 836         // supports CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4 types
 837         CV_EXPORTS void warpAffine(const oclMat &src, oclMat &dst, const Mat &M, Size dsize, int flags = INTER_LINEAR);
 838
 839         //! warps the image using perspective transformation
 840         // Supports INTER_NEAREST, INTER_LINEAR, INTER_CUBIC
 841         // supports CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4 types
 842         CV_EXPORTS void warpPerspective(const oclMat &src, oclMat &dst, const Mat &M, Size dsize, int flags = INTER_LINEAR);
 843
 844         //! computes the integral image and integral for the squared image
 845         // sum will have CV_32S type, sqsum - CV32F type
 846         // supports only CV_8UC1 source type
 847         CV_EXPORTS void integral(const oclMat &src, oclMat &sum, oclMat &sqsum);
 848         CV_EXPORTS void integral(const oclMat &src, oclMat &sum);
 849         CV_EXPORTS void cornerHarris(const oclMat &src, oclMat &dst, int blockSize, int ksize, double k, int bordertype = cv::BORDER_DEFAULT);
 850         CV_EXPORTS void cornerHarris_dxdy(const oclMat &src, oclMat &dst, oclMat &Dx, oclMat &Dy,
 851             int blockSize, int ksize, double k, int bordertype = cv::BORDER_DEFAULT);
 852         CV_EXPORTS void cornerMinEigenVal(const oclMat &src, oclMat &dst, int blockSize, int ksize, int bordertype = cv::BORDER_DEFAULT);
 853         CV_EXPORTS void cornerMinEigenVal_dxdy(const oclMat &src, oclMat &dst, oclMat &Dx, oclMat &Dy,
 854             int blockSize, int ksize, int bordertype = cv::BORDER_DEFAULT);
 855
 856         ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 857         ///////////////////////////////////////////CascadeClassifier//////////////////////////////////////////////////////////////////
 858         ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 859         class CV_EXPORTS OclCascadeClassifier : public  cv::CascadeClassifier
 860         {
 861         public:
 862             void detectMultiScale(oclMat &image, CV_OUT std::vector<cv::Rect>& faces,
 863                                   double scaleFactor = 1.1, int minNeighbors = 3, int flags = 0,
 864                                   Size minSize = Size(), Size maxSize = Size());
 865         };
 866
 867         /////////////////////////////// Pyramid /////////////////////////////////////
 868         CV_EXPORTS void pyrDown(const oclMat &src, oclMat &dst);
 869
 870         //! upsamples the source image and then smoothes it
 871         CV_EXPORTS void pyrUp(const oclMat &src, oclMat &dst);
 872
 873         //! performs linear blending of two images
 874         //! to avoid accuracy errors sum of weigths shouldn't be very close to zero
 875         // supports only CV_8UC1 source type
 876         CV_EXPORTS void blendLinear(const oclMat &img1, const oclMat &img2, const oclMat &weights1, const oclMat &weights2, oclMat &result);
 877
 878         //! computes vertical sum, supports only CV_32FC1 images
 879         CV_EXPORTS void columnSum(const oclMat &src, oclMat &sum);
 880
 881         ///////////////////////////////////////// match_template /////////////////////////////////////////////////////////////
 882         struct CV_EXPORTS MatchTemplateBuf
 883         {
 884             Size user_block_size;
 885             oclMat imagef, templf;
 886             std::vector<oclMat> images;
 887             std::vector<oclMat> image_sums;
 888             std::vector<oclMat> image_sqsums;
 889         };
 890
 891         //! computes the proximity map for the raster template and the image where the template is searched for
 892         // Supports TM_SQDIFF, TM_SQDIFF_NORMED, TM_CCORR, TM_CCORR_NORMED, TM_CCOEFF, TM_CCOEFF_NORMED for type 8UC1 and 8UC4
 893         // Supports TM_SQDIFF, TM_CCORR for type 32FC1 and 32FC4
 894         CV_EXPORTS void matchTemplate(const oclMat &image, const oclMat &templ, oclMat &result, int method);
 895
 896         //! computes the proximity map for the raster template and the image where the template is searched for
 897         // Supports TM_SQDIFF, TM_SQDIFF_NORMED, TM_CCORR, TM_CCORR_NORMED, TM_CCOEFF, TM_CCOEFF_NORMED for type 8UC1 and 8UC4
 898         // Supports TM_SQDIFF, TM_CCORR for type 32FC1 and 32FC4
 899         CV_EXPORTS void matchTemplate(const oclMat &image, const oclMat &templ, oclMat &result, int method, MatchTemplateBuf &buf);
 900
 901
 902
 903         ///////////////////////////////////////////// Canny /////////////////////////////////////////////
 904         struct CV_EXPORTS CannyBuf;
 905
 906         //! compute edges of the input image using Canny operator
 907         // Support CV_8UC1 only
 908         CV_EXPORTS void Canny(const oclMat &image, oclMat &edges, double low_thresh, double high_thresh, int apperture_size = 3, bool L2gradient = false);
 909         CV_EXPORTS void Canny(const oclMat &image, CannyBuf &buf, oclMat &edges, double low_thresh, double high_thresh, int apperture_size = 3, bool L2gradient = false);
 910         CV_EXPORTS void Canny(const oclMat &dx, const oclMat &dy, oclMat &edges, double low_thresh, double high_thresh, bool L2gradient = false);
 911         CV_EXPORTS void Canny(const oclMat &dx, const oclMat &dy, CannyBuf &buf, oclMat &edges, double low_thresh, double high_thresh, bool L2gradient = false);
 912
 913         struct CV_EXPORTS CannyBuf
 914         {
 915             CannyBuf() : counter(NULL) {}
 916             ~CannyBuf()
 917             {
 918                 release();
 919             }
 920             explicit CannyBuf(const Size &image_size, int apperture_size = 3) : counter(NULL)
 921             {
 922                 create(image_size, apperture_size);
 923             }
 924             CannyBuf(const oclMat &dx_, const oclMat &dy_);
 925             void create(const Size &image_size, int apperture_size = 3);
 926             void release();
 927
 928             oclMat dx, dy;
 929             oclMat dx_buf, dy_buf;
 930             oclMat magBuf, mapBuf;
 931             oclMat trackBuf1, trackBuf2;
 932             void *counter;
 933             Ptr<FilterEngine_GPU> filterDX, filterDY;
 934         };
 935
 936         ///////////////////////////////////////// Hough Transform /////////////////////////////////////////
 937         //! HoughCircles
 938         struct HoughCirclesBuf
 939         {
 940             oclMat edges;
 941             oclMat accum;
 942             oclMat srcPoints;
 943             oclMat centers;
 944             CannyBuf cannyBuf;
 945         };
 946
 947         CV_EXPORTS void HoughCircles(const oclMat& src, oclMat& circles, int method, float dp, float minDist, int cannyThreshold, int votesThreshold, int minRadius, int maxRadius, int maxCircles = 4096);
 948         CV_EXPORTS void HoughCircles(const oclMat& src, oclMat& circles, HoughCirclesBuf& buf, int method, float dp, float minDist, int cannyThreshold, int votesThreshold, int minRadius, int maxRadius, int maxCircles = 4096);
 949         CV_EXPORTS void HoughCirclesDownload(const oclMat& d_circles, OutputArray h_circles);
 950
 951
 952         ///////////////////////////////////////// clAmdFft related /////////////////////////////////////////
 953         //! Performs a forward or inverse discrete Fourier transform (1D or 2D) of floating point matrix.
 954         //! Param dft_size is the size of DFT transform.
 955         //!
 956         //! For complex-to-real transform it is assumed that the source matrix is packed in CLFFT's format.
 957         // support src type of CV32FC1, CV32FC2
 958         // support flags: DFT_INVERSE, DFT_REAL_OUTPUT, DFT_COMPLEX_OUTPUT, DFT_ROWS
 959         // dft_size is the size of original input, which is used for transformation from complex to real.
 960         // dft_size must be powers of 2, 3 and 5
 961         // real to complex dft requires at least v1.8 clAmdFft
 962         // real to complex dft output is not the same with cpu version
 963         // real to complex and complex to real does not support DFT_ROWS
 964         CV_EXPORTS void dft(const oclMat &src, oclMat &dst, Size dft_size = Size(0, 0), int flags = 0);
 965
 966         //! implements generalized matrix product algorithm GEMM from BLAS
 967         // The functionality requires clAmdBlas library
 968         // only support type CV_32FC1
 969         // flag GEMM_3_T is not supported
 970         CV_EXPORTS void gemm(const oclMat &src1, const oclMat &src2, double alpha,
 971                              const oclMat &src3, double beta, oclMat &dst, int flags = 0);
 972
 973         //////////////// HOG (Histogram-of-Oriented-Gradients) Descriptor and Object Detector //////////////
 974
 975         struct CV_EXPORTS HOGDescriptor
 976
 977         {
 978
 979             enum { DEFAULT_WIN_SIGMA = -1 };
 980
 981             enum { DEFAULT_NLEVELS = 64 };
 982
 983             enum { DESCR_FORMAT_ROW_BY_ROW, DESCR_FORMAT_COL_BY_COL };
 984
 985
 986
 987             HOGDescriptor(Size win_size = Size(64, 128), Size block_size = Size(16, 16),
 988
 989                           Size block_stride = Size(8, 8), Size cell_size = Size(8, 8),
 990
 991                           int nbins = 9, double win_sigma = DEFAULT_WIN_SIGMA,
 992
 993                           double threshold_L2hys = 0.2, bool gamma_correction = true,
 994
 995                           int nlevels = DEFAULT_NLEVELS);
 996
 997
 998
 999             size_t getDescriptorSize() const;
1000
1001             size_t getBlockHistogramSize() const;
1002
1003
1004
1005             void setSVMDetector(const std::vector<float> &detector);
1006
1007
1008
1009             static std::vector<float> getDefaultPeopleDetector();
1010
1011             static std::vector<float> getPeopleDetector48x96();
1012
1013             static std::vector<float> getPeopleDetector64x128();
1014
1015
1016
1017             void detect(const oclMat &img, std::vector<Point> &found_locations,
1018
1019                         double hit_threshold = 0, Size win_stride = Size(),
1020
1021                         Size padding = Size());
1022
1023
1024
1025             void detectMultiScale(const oclMat &img, std::vector<Rect> &found_locations,
1026
1027                                   double hit_threshold = 0, Size win_stride = Size(),
1028
1029                                   Size padding = Size(), double scale0 = 1.05,
1030
1031                                   int group_threshold = 2);
1032
1033
1034
1035             void getDescriptors(const oclMat &img, Size win_stride,
1036
1037                                 oclMat &descriptors,
1038
1039                                 int descr_format = DESCR_FORMAT_COL_BY_COL);
1040
1041
1042
1043             Size win_size;
1044
1045             Size block_size;
1046
1047             Size block_stride;
1048
1049             Size cell_size;
1050
1051             int nbins;
1052
1053             double win_sigma;
1054
1055             double threshold_L2hys;
1056
1057             bool gamma_correction;
1058
1059             int nlevels;
1060
1061
1062
1063         protected:
1064
1065             // initialize buffers; only need to do once in case of multiscale detection
1066
1067             void init_buffer(const oclMat &img, Size win_stride);
1068
1069
1070
1071             void computeBlockHistograms(const oclMat &img);
1072
1073             void computeGradient(const oclMat &img, oclMat &grad, oclMat &qangle);
1074
1075
1076
1077             double getWinSigma() const;
1078
1079             bool checkDetectorSize() const;
1080
1081
1082
1083             static int numPartsWithin(int size, int part_size, int stride);
1084
1085             static Size numPartsWithin(Size size, Size part_size, Size stride);
1086
1087
1088
1089             // Coefficients of the separating plane
1090
1091             float free_coef;
1092
1093             oclMat detector;
1094
1095
1096
1097             // Results of the last classification step
1098
1099             oclMat labels;
1100
1101             Mat labels_host;
1102
1103
1104
1105             // Results of the last histogram evaluation step
1106
1107             oclMat block_hists;
1108
1109
1110
1111             // Gradients conputation results
1112
1113             oclMat grad, qangle;
1114
1115
1116
1117             // scaled image
1118
1119             oclMat image_scale;
1120
1121
1122
1123             // effect size of input image (might be different from original size after scaling)
1124
1125             Size effect_size;
1126
1127         };
1128
1129
1130         ////////////////////////feature2d_ocl/////////////////
1131         /****************************************************************************************\
1132         *                                      Distance                                          *
1133         \****************************************************************************************/
1134         template<typename T>
1135         struct CV_EXPORTS Accumulator
1136         {
1137             typedef T Type;
1138         };
1139         template<> struct Accumulator<unsigned char>
1140         {
1141             typedef float Type;
1142         };
1143         template<> struct Accumulator<unsigned short>
1144         {
1145             typedef float Type;
1146         };
1147         template<> struct Accumulator<char>
1148         {
1149             typedef float Type;
1150         };
1151         template<> struct Accumulator<short>
1152         {
1153             typedef float Type;
1154         };
1155
1156         /*
1157          * Manhattan distance (city block distance) functor
1158          */
1159         template<class T>
1160         struct CV_EXPORTS L1
1161         {
1162             enum { normType = NORM_L1 };
1163             typedef T ValueType;
1164             typedef typename Accumulator<T>::Type ResultType;
1165
1166             ResultType operator()( const T *a, const T *b, int size ) const
1167             {
1168                 return normL1<ValueType, ResultType>(a, b, size);
1169             }
1170         };
1171
1172         /*
1173          * Euclidean distance functor
1174          */
1175         template<class T>
1176         struct CV_EXPORTS L2
1177         {
1178             enum { normType = NORM_L2 };
1179             typedef T ValueType;
1180             typedef typename Accumulator<T>::Type ResultType;
1181
1182             ResultType operator()( const T *a, const T *b, int size ) const
1183             {
1184                 return (ResultType)std::sqrt((double)normL2Sqr<ValueType, ResultType>(a, b, size));
1185             }
1186         };
1187
1188         /*
1189          * Hamming distance functor - counts the bit differences between two strings - useful for the Brief descriptor
1190          * bit count of A exclusive XOR'ed with B
1191          */
1192         struct CV_EXPORTS Hamming
1193         {
1194             enum { normType = NORM_HAMMING };
1195             typedef unsigned char ValueType;
1196             typedef int ResultType;
1197
1198             /** this will count the bits in a ^ b
1199              */
1200             ResultType operator()( const unsigned char *a, const unsigned char *b, int size ) const
1201             {
1202                 return normHamming(a, b, size);
1203             }
1204         };
1205
1206         ////////////////////////////////// BruteForceMatcher //////////////////////////////////
1207
1208         class CV_EXPORTS BruteForceMatcher_OCL_base
1209         {
1210         public:
1211             enum DistType {L1Dist = 0, L2Dist, HammingDist};
1212             explicit BruteForceMatcher_OCL_base(DistType distType = L2Dist);
1213
1214             // Add descriptors to train descriptor collection
1215             void add(const std::vector<oclMat> &descCollection);
1216
1217             // Get train descriptors collection
1218             const std::vector<oclMat> &getTrainDescriptors() const;
1219
1220             // Clear train descriptors collection
1221             void clear();
1222
1223             // Return true if there are not train descriptors in collection
1224             bool empty() const;
1225
1226             // Return true if the matcher supports mask in match methods
1227             bool isMaskSupported() const;
1228
1229             // Find one best match for each query descriptor
1230             void matchSingle(const oclMat &query, const oclMat &train,
1231                              oclMat &trainIdx, oclMat &distance,
1232                              const oclMat &mask = oclMat());
1233
1234             // Download trainIdx and distance and convert it to CPU vector with DMatch
1235             static void matchDownload(const oclMat &trainIdx, const oclMat &distance, std::vector<DMatch> &matches);
1236             // Convert trainIdx and distance to vector with DMatch
1237             static void matchConvert(const Mat &trainIdx, const Mat &distance, std::vector<DMatch> &matches);
1238
1239             // Find one best match for each query descriptor
1240             void match(const oclMat &query, const oclMat &train, std::vector<DMatch> &matches, const oclMat &mask = oclMat());
1241
1242             // Make gpu collection of trains and masks in suitable format for matchCollection function
1243             void makeGpuCollection(oclMat &trainCollection, oclMat &maskCollection, const std::vector<oclMat> &masks = std::vector<oclMat>());
1244
1245             // Find one best match from train collection for each query descriptor
1246             void matchCollection(const oclMat &query, const oclMat &trainCollection,
1247                                  oclMat &trainIdx, oclMat &imgIdx, oclMat &distance,
1248                                  const oclMat &masks = oclMat());
1249
1250             // Download trainIdx, imgIdx and distance and convert it to vector with DMatch
1251             static void matchDownload(const oclMat &trainIdx, const oclMat &imgIdx, const oclMat &distance, std::vector<DMatch> &matches);
1252             // Convert trainIdx, imgIdx and distance to vector with DMatch
1253             static void matchConvert(const Mat &trainIdx, const Mat &imgIdx, const Mat &distance, std::vector<DMatch> &matches);
1254
1255             // Find one best match from train collection for each query descriptor.
1256             void match(const oclMat &query, std::vector<DMatch> &matches, const std::vector<oclMat> &masks = std::vector<oclMat>());
1257
1258             // Find k best matches for each query descriptor (in increasing order of distances)
1259             void knnMatchSingle(const oclMat &query, const oclMat &train,
1260                                 oclMat &trainIdx, oclMat &distance, oclMat &allDist, int k,
1261                                 const oclMat &mask = oclMat());
1262
1263             // Download trainIdx and distance and convert it to vector with DMatch
1264             // compactResult is used when mask is not empty. If compactResult is false matches
1265             // vector will have the same size as queryDescriptors rows. If compactResult is true
1266             // matches vector will not contain matches for fully masked out query descriptors.
1267             static void knnMatchDownload(const oclMat &trainIdx, const oclMat &distance,
1268                                          std::vector< std::vector<DMatch> > &matches, bool compactResult = false);
1269             // Convert trainIdx and distance to vector with DMatch
1270             static void knnMatchConvert(const Mat &trainIdx, const Mat &distance,
1271                                         std::vector< std::vector<DMatch> > &matches, bool compactResult = false);
1272
1273             // Find k best matches for each query descriptor (in increasing order of distances).
1274             // compactResult is used when mask is not empty. If compactResult is false matches
1275             // vector will have the same size as queryDescriptors rows. If compactResult is true
1276             // matches vector will not contain matches for fully masked out query descriptors.
1277             void knnMatch(const oclMat &query, const oclMat &train,
1278                           std::vector< std::vector<DMatch> > &matches, int k, const oclMat &mask = oclMat(),
1279                           bool compactResult = false);
1280
1281             // Find k best matches from train collection for each query descriptor (in increasing order of distances)
1282             void knnMatch2Collection(const oclMat &query, const oclMat &trainCollection,
1283                                      oclMat &trainIdx, oclMat &imgIdx, oclMat &distance,
1284                                      const oclMat &maskCollection = oclMat());
1285
1286             // Download trainIdx and distance and convert it to vector with DMatch
1287             // compactResult is used when mask is not empty. If compactResult is false matches
1288             // vector will have the same size as queryDescriptors rows. If compactResult is true
1289             // matches vector will not contain matches for fully masked out query descriptors.
1290             static void knnMatch2Download(const oclMat &trainIdx, const oclMat &imgIdx, const oclMat &distance,
1291                                           std::vector< std::vector<DMatch> > &matches, bool compactResult = false);
1292             // Convert trainIdx and distance to vector with DMatch
1293             static void knnMatch2Convert(const Mat &trainIdx, const Mat &imgIdx, const Mat &distance,
1294                                          std::vector< std::vector<DMatch> > &matches, bool compactResult = false);
1295
1296             // Find k best matches  for each query descriptor (in increasing order of distances).
1297             // compactResult is used when mask is not empty. If compactResult is false matches
1298             // vector will have the same size as queryDescriptors rows. If compactResult is true
1299             // matches vector will not contain matches for fully masked out query descriptors.
1300             void knnMatch(const oclMat &query, std::vector< std::vector<DMatch> > &matches, int k,
1301                           const std::vector<oclMat> &masks = std::vector<oclMat>(), bool compactResult = false);
1302
1303             // Find best matches for each query descriptor which have distance less than maxDistance.
1304             // nMatches.at<int>(0, queryIdx) will contain matches count for queryIdx.
1305             // carefully nMatches can be greater than trainIdx.cols - it means that matcher didn't find all matches,
1306             // because it didn't have enough memory.
1307             // If trainIdx is empty, then trainIdx and distance will be created with size nQuery x max((nTrain / 100), 10),
1308             // otherwize user can pass own allocated trainIdx and distance with size nQuery x nMaxMatches
1309             // Matches doesn't sorted.
1310             void radiusMatchSingle(const oclMat &query, const oclMat &train,
1311                                    oclMat &trainIdx, oclMat &distance, oclMat &nMatches, float maxDistance,
1312                                    const oclMat &mask = oclMat());
1313
1314             // Download trainIdx, nMatches and distance and convert it to vector with DMatch.
1315             // matches will be sorted in increasing order of distances.
1316             // compactResult is used when mask is not empty. If compactResult is false matches
1317             // vector will have the same size as queryDescriptors rows. If compactResult is true
1318             // matches vector will not contain matches for fully masked out query descriptors.
1319             static void radiusMatchDownload(const oclMat &trainIdx, const oclMat &distance, const oclMat &nMatches,
1320                                             std::vector< std::vector<DMatch> > &matches, bool compactResult = false);
1321             // Convert trainIdx, nMatches and distance to vector with DMatch.
1322             static void radiusMatchConvert(const Mat &trainIdx, const Mat &distance, const Mat &nMatches,
1323                                            std::vector< std::vector<DMatch> > &matches, bool compactResult = false);
1324
1325             // Find best matches for each query descriptor which have distance less than maxDistance
1326             // in increasing order of distances).
1327             void radiusMatch(const oclMat &query, const oclMat &train,
1328                              std::vector< std::vector<DMatch> > &matches, float maxDistance,
1329                              const oclMat &mask = oclMat(), bool compactResult = false);
1330
1331             // Find best matches for each query descriptor which have distance less than maxDistance.
1332             // If trainIdx is empty, then trainIdx and distance will be created with size nQuery x max((nQuery / 100), 10),
1333             // otherwize user can pass own allocated trainIdx and distance with size nQuery x nMaxMatches
1334             // Matches doesn't sorted.
1335             void radiusMatchCollection(const oclMat &query, oclMat &trainIdx, oclMat &imgIdx, oclMat &distance, oclMat &nMatches, float maxDistance,
1336                                        const std::vector<oclMat> &masks = std::vector<oclMat>());
1337
1338             // Download trainIdx, imgIdx, nMatches and distance and convert it to vector with DMatch.
1339             // matches will be sorted in increasing order of distances.
1340             // compactResult is used when mask is not empty. If compactResult is false matches
1341             // vector will have the same size as queryDescriptors rows. If compactResult is true
1342             // matches vector will not contain matches for fully masked out query descriptors.
1343             static void radiusMatchDownload(const oclMat &trainIdx, const oclMat &imgIdx, const oclMat &distance, const oclMat &nMatches,
1344                                             std::vector< std::vector<DMatch> > &matches, bool compactResult = false);
1345             // Convert trainIdx, nMatches and distance to vector with DMatch.
1346             static void radiusMatchConvert(const Mat &trainIdx, const Mat &imgIdx, const Mat &distance, const Mat &nMatches,
1347                                            std::vector< std::vector<DMatch> > &matches, bool compactResult = false);
1348
1349             // Find best matches from train collection for each query descriptor which have distance less than
1350             // maxDistance (in increasing order of distances).
1351             void radiusMatch(const oclMat &query, std::vector< std::vector<DMatch> > &matches, float maxDistance,
1352                              const std::vector<oclMat> &masks = std::vector<oclMat>(), bool compactResult = false);
1353
1354             DistType distType;
1355
1356         private:
1357             std::vector<oclMat> trainDescCollection;
1358         };
1359
1360         template <class Distance>
1361         class CV_EXPORTS BruteForceMatcher_OCL;
1362
1363         template <typename T>
1364         class CV_EXPORTS BruteForceMatcher_OCL< L1<T> > : public BruteForceMatcher_OCL_base
1365         {
1366         public:
1367             explicit BruteForceMatcher_OCL() : BruteForceMatcher_OCL_base(L1Dist) {}
1368             explicit BruteForceMatcher_OCL(L1<T> /*d*/) : BruteForceMatcher_OCL_base(L1Dist) {}
1369         };
1370         template <typename T>
1371         class CV_EXPORTS BruteForceMatcher_OCL< L2<T> > : public BruteForceMatcher_OCL_base
1372         {
1373         public:
1374             explicit BruteForceMatcher_OCL() : BruteForceMatcher_OCL_base(L2Dist) {}
1375             explicit BruteForceMatcher_OCL(L2<T> /*d*/) : BruteForceMatcher_OCL_base(L2Dist) {}
1376         };
1377         template <> class CV_EXPORTS BruteForceMatcher_OCL< Hamming > : public BruteForceMatcher_OCL_base
1378         {
1379         public:
1380             explicit BruteForceMatcher_OCL() : BruteForceMatcher_OCL_base(HammingDist) {}
1381             explicit BruteForceMatcher_OCL(Hamming /*d*/) : BruteForceMatcher_OCL_base(HammingDist) {}
1382         };
1383
1384         class CV_EXPORTS BFMatcher_OCL : public BruteForceMatcher_OCL_base
1385         {
1386         public:
1387             explicit BFMatcher_OCL(int norm = NORM_L2) : BruteForceMatcher_OCL_base(norm == NORM_L1 ? L1Dist : norm == NORM_L2 ? L2Dist : HammingDist) {}
1388         };
1389
1390         class CV_EXPORTS GoodFeaturesToTrackDetector_OCL
1391         {
1392         public:
1393             explicit GoodFeaturesToTrackDetector_OCL(int maxCorners = 1000, double qualityLevel = 0.01, double minDistance = 0.0,
1394                 int blockSize = 3, bool useHarrisDetector = false, double harrisK = 0.04);
1395
1396             //! return 1 rows matrix with CV_32FC2 type
1397             void operator ()(const oclMat& image, oclMat& corners, const oclMat& mask = oclMat());
1398             //! download points of type Point2f to a vector. the vector's content will be erased
1399             void downloadPoints(const oclMat &points, std::vector<Point2f> &points_v);
1400
1401             int maxCorners;
1402             double qualityLevel;
1403             double minDistance;
1404
1405             int blockSize;
1406             bool useHarrisDetector;
1407             double harrisK;
1408             void releaseMemory()
1409             {
1410                 Dx_.release();
1411                 Dy_.release();
1412                 eig_.release();
1413                 minMaxbuf_.release();
1414                 tmpCorners_.release();
1415             }
1416         private:
1417             oclMat Dx_;
1418             oclMat Dy_;
1419             oclMat eig_;
1420             oclMat minMaxbuf_;
1421             oclMat tmpCorners_;
1422         };
1423
1424         inline GoodFeaturesToTrackDetector_OCL::GoodFeaturesToTrackDetector_OCL(int maxCorners_, double qualityLevel_, double minDistance_,
1425             int blockSize_, bool useHarrisDetector_, double harrisK_)
1426         {
1427             maxCorners = maxCorners_;
1428             qualityLevel = qualityLevel_;
1429             minDistance = minDistance_;
1430             blockSize = blockSize_;
1431             useHarrisDetector = useHarrisDetector_;
1432             harrisK = harrisK_;
1433         }
1434
1435         /////////////////////////////// PyrLKOpticalFlow /////////////////////////////////////
1436
1437         class CV_EXPORTS PyrLKOpticalFlow
1438         {
1439         public:
1440             PyrLKOpticalFlow()
1441             {
1442                 winSize = Size(21, 21);
1443                 maxLevel = 3;
1444                 iters = 30;
1445                 derivLambda = 0.5;
1446                 useInitialFlow = false;
1447                 minEigThreshold = 1e-4f;
1448                 getMinEigenVals = false;
1449                 isDeviceArch11_ = false;
1450             }
1451
1452             void sparse(const oclMat &prevImg, const oclMat &nextImg, const oclMat &prevPts, oclMat &nextPts,
1453                         oclMat &status, oclMat *err = 0);
1454
1455             void dense(const oclMat &prevImg, const oclMat &nextImg, oclMat &u, oclMat &v, oclMat *err = 0);
1456
1457             Size winSize;
1458             int maxLevel;
1459             int iters;
1460             double derivLambda;
1461             bool useInitialFlow;
1462             float minEigThreshold;
1463             bool getMinEigenVals;
1464
1465             void releaseMemory()
1466             {
1467                 dx_calcBuf_.release();
1468                 dy_calcBuf_.release();
1469
1470                 prevPyr_.clear();
1471                 nextPyr_.clear();
1472
1473                 dx_buf_.release();
1474                 dy_buf_.release();
1475             }
1476
1477         private:
1478             void calcSharrDeriv(const oclMat &src, oclMat &dx, oclMat &dy);
1479
1480             void buildImagePyramid(const oclMat &img0, std::vector<oclMat> &pyr, bool withBorder);
1481
1482             oclMat dx_calcBuf_;
1483             oclMat dy_calcBuf_;
1484
1485             std::vector<oclMat> prevPyr_;
1486             std::vector<oclMat> nextPyr_;
1487
1488             oclMat dx_buf_;
1489             oclMat dy_buf_;
1490
1491             oclMat uPyr_[2];
1492             oclMat vPyr_[2];
1493
1494             bool isDeviceArch11_;
1495         };
1496
1497         class CV_EXPORTS FarnebackOpticalFlow
1498         {
1499         public:
1500             FarnebackOpticalFlow();
1501
1502             int numLevels;
1503             double pyrScale;
1504             bool fastPyramids;
1505             int winSize;
1506             int numIters;
1507             int polyN;
1508             double polySigma;
1509             int flags;
1510
1511             void operator ()(const oclMat &frame0, const oclMat &frame1, oclMat &flowx, oclMat &flowy);
1512
1513             void releaseMemory();
1514
1515         private:
1516             void prepareGaussian(
1517                 int n, double sigma, float *g, float *xg, float *xxg,
1518                 double &ig11, double &ig03, double &ig33, double &ig55);
1519
1520             void setPolynomialExpansionConsts(int n, double sigma);
1521
1522             void updateFlow_boxFilter(
1523                 const oclMat& R0, const oclMat& R1, oclMat& flowx, oclMat &flowy,
1524                 oclMat& M, oclMat &bufM, int blockSize, bool updateMatrices);
1525
1526             void updateFlow_gaussianBlur(
1527                 const oclMat& R0, const oclMat& R1, oclMat& flowx, oclMat& flowy,
1528                 oclMat& M, oclMat &bufM, int blockSize, bool updateMatrices);
1529
1530             oclMat frames_[2];
1531             oclMat pyrLevel_[2], M_, bufM_, R_[2], blurredFrame_[2];
1532             std::vector<oclMat> pyramid0_, pyramid1_;
1533         };
1534
1535         //////////////// build warping maps ////////////////////
1536         //! builds plane warping maps
1537         CV_EXPORTS void buildWarpPlaneMaps(Size src_size, Rect dst_roi, const Mat &K, const Mat &R, const Mat &T, float scale, oclMat &map_x, oclMat &map_y);
1538         //! builds cylindrical warping maps
1539         CV_EXPORTS void buildWarpCylindricalMaps(Size src_size, Rect dst_roi, const Mat &K, const Mat &R, float scale, oclMat &map_x, oclMat &map_y);
1540         //! builds spherical warping maps
1541         CV_EXPORTS void buildWarpSphericalMaps(Size src_size, Rect dst_roi, const Mat &K, const Mat &R, float scale, oclMat &map_x, oclMat &map_y);
1542         //! builds Affine warping maps
1543         CV_EXPORTS void buildWarpAffineMaps(const Mat &M, bool inverse, Size dsize, oclMat &xmap, oclMat &ymap);
1544
1545         //! builds Perspective warping maps
1546         CV_EXPORTS void buildWarpPerspectiveMaps(const Mat &M, bool inverse, Size dsize, oclMat &xmap, oclMat &ymap);
1547
1548         ///////////////////////////////////// interpolate frames //////////////////////////////////////////////
1549         //! Interpolate frames (images) using provided optical flow (displacement field).
1550         //! frame0   - frame 0 (32-bit floating point images, single channel)
1551         //! frame1   - frame 1 (the same type and size)
1552         //! fu       - forward horizontal displacement
1553         //! fv       - forward vertical displacement
1554         //! bu       - backward horizontal displacement
1555         //! bv       - backward vertical displacement
1556         //! pos      - new frame position
1557         //! newFrame - new frame
1558         //! buf      - temporary buffer, will have width x 6*height size, CV_32FC1 type and contain 6 oclMat;
1559         //!            occlusion masks            0, occlusion masks            1,
1560         //!            interpolated forward flow  0, interpolated forward flow  1,
1561         //!            interpolated backward flow 0, interpolated backward flow 1
1562         //!
1563         CV_EXPORTS void interpolateFrames(const oclMat &frame0, const oclMat &frame1,
1564                                           const oclMat &fu, const oclMat &fv,
1565                                           const oclMat &bu, const oclMat &bv,
1566                                           float pos, oclMat &newFrame, oclMat &buf);
1567
1568         //! computes moments of the rasterized shape or a vector of points
1569         CV_EXPORTS Moments ocl_moments(InputArray _array, bool binaryImage);
1570
1571         class CV_EXPORTS StereoBM_OCL
1572         {
1573         public:
1574             enum { BASIC_PRESET = 0, PREFILTER_XSOBEL = 1 };
1575
1576             enum { DEFAULT_NDISP = 64, DEFAULT_WINSZ = 19 };
1577
1578             //! the default constructor
1579             StereoBM_OCL();
1580             //! the full constructor taking the camera-specific preset, number of disparities and the SAD window size. ndisparities must be multiple of 8.
1581             StereoBM_OCL(int preset, int ndisparities = DEFAULT_NDISP, int winSize = DEFAULT_WINSZ);
1582
1583             //! the stereo correspondence operator. Finds the disparity for the specified rectified stereo pair
1584             //! Output disparity has CV_8U type.
1585             void operator() ( const oclMat &left, const oclMat &right, oclMat &disparity);
1586
1587             //! Some heuristics that tries to estmate
1588             // if current GPU will be faster then CPU in this algorithm.
1589             // It queries current active device.
1590             static bool checkIfGpuCallReasonable();
1591
1592             int preset;
1593             int ndisp;
1594             int winSize;
1595
1596             // If avergeTexThreshold  == 0 => post procesing is disabled
1597             // If avergeTexThreshold != 0 then disparity is set 0 in each point (x,y) where for left image
1598             // SumOfHorizontalGradiensInWindow(x, y, winSize) < (winSize * winSize) * avergeTexThreshold
1599             // i.e. input left image is low textured.
1600             float avergeTexThreshold;
1601         private:
1602             oclMat minSSD, leBuf, riBuf;
1603         };
1604
1605         class CV_EXPORTS StereoBeliefPropagation
1606         {
1607         public:
1608             enum { DEFAULT_NDISP  = 64 };
1609             enum { DEFAULT_ITERS  = 5  };
1610             enum { DEFAULT_LEVELS = 5  };
1611             static void estimateRecommendedParams(int width, int height, int &ndisp, int &iters, int &levels);
1612             explicit StereoBeliefPropagation(int ndisp  = DEFAULT_NDISP,
1613                                              int iters  = DEFAULT_ITERS,
1614                                              int levels = DEFAULT_LEVELS,
1615                                              int msg_type = CV_16S);
1616             StereoBeliefPropagation(int ndisp, int iters, int levels,
1617                                     float max_data_term, float data_weight,
1618                                     float max_disc_term, float disc_single_jump,
1619                                     int msg_type = CV_32F);
1620             void operator()(const oclMat &left, const oclMat &right, oclMat &disparity);
1621             void operator()(const oclMat &data, oclMat &disparity);
1622             int ndisp;
1623             int iters;
1624             int levels;
1625             float max_data_term;
1626             float data_weight;
1627             float max_disc_term;
1628             float disc_single_jump;
1629             int msg_type;
1630         private:
1631             oclMat u, d, l, r, u2, d2, l2, r2;
1632             std::vector<oclMat> datas;
1633             oclMat out;
1634         };
1635
1636         class CV_EXPORTS StereoConstantSpaceBP
1637         {
1638         public:
1639             enum { DEFAULT_NDISP    = 128 };
1640             enum { DEFAULT_ITERS    = 8   };
1641             enum { DEFAULT_LEVELS   = 4   };
1642             enum { DEFAULT_NR_PLANE = 4   };
1643             static void estimateRecommendedParams(int width, int height, int &ndisp, int &iters, int &levels, int &nr_plane);
1644             explicit StereoConstantSpaceBP(
1645                 int ndisp    = DEFAULT_NDISP,
1646                 int iters    = DEFAULT_ITERS,
1647                 int levels   = DEFAULT_LEVELS,
1648                 int nr_plane = DEFAULT_NR_PLANE,
1649                 int msg_type = CV_32F);
1650             StereoConstantSpaceBP(int ndisp, int iters, int levels, int nr_plane,
1651                 float max_data_term, float data_weight, float max_disc_term, float disc_single_jump,
1652                 int min_disp_th = 0,
1653                 int msg_type = CV_32F);
1654             void operator()(const oclMat &left, const oclMat &right, oclMat &disparity);
1655             int ndisp;
1656             int iters;
1657             int levels;
1658             int nr_plane;
1659             float max_data_term;
1660             float data_weight;
1661             float max_disc_term;
1662             float disc_single_jump;
1663             int min_disp_th;
1664             int msg_type;
1665             bool use_local_init_data_cost;
1666         private:
1667             oclMat u[2], d[2], l[2], r[2];
1668             oclMat disp_selected_pyr[2];
1669             oclMat data_cost;
1670             oclMat data_cost_selected;
1671             oclMat temp;
1672             oclMat out;
1673         };
1674
1675         // Implementation of the Zach, Pock and Bischof Dual TV-L1 Optical Flow method
1676         //
1677         // see reference:
1678         //   [1] C. Zach, T. Pock and H. Bischof, "A Duality Based Approach for Realtime TV-L1 Optical Flow".
1679         //   [2] Javier Sanchez, Enric Meinhardt-Llopis and Gabriele Facciolo. "TV-L1 Optical Flow Estimation".
1680         class CV_EXPORTS OpticalFlowDual_TVL1_OCL
1681         {
1682         public:
1683             OpticalFlowDual_TVL1_OCL();
1684
1685             void operator ()(const oclMat& I0, const oclMat& I1, oclMat& flowx, oclMat& flowy);
1686
1687             void collectGarbage();
1688
1689             /**
1690             * Time step of the numerical scheme.
1691             */
1692             double tau;
1693
1694             /**
1695             * Weight parameter for the data term, attachment parameter.
1696             * This is the most relevant parameter, which determines the smoothness of the output.
1697             * The smaller this parameter is, the smoother the solutions we obtain.
1698             * It depends on the range of motions of the images, so its value should be adapted to each image sequence.
1699             */
1700             double lambda;
1701
1702             /**
1703             * Weight parameter for (u - v)^2, tightness parameter.
1704             * It serves as a link between the attachment and the regularization terms.
1705             * In theory, it should have a small value in order to maintain both parts in correspondence.
1706             * The method is stable for a large range of values of this parameter.
1707             */
1708             double theta;
1709
1710             /**
1711             * Number of scales used to create the pyramid of images.
1712             */
1713             int nscales;
1714
1715             /**
1716             * Number of warpings per scale.
1717             * Represents the number of times that I1(x+u0) and grad( I1(x+u0) ) are computed per scale.
1718             * This is a parameter that assures the stability of the method.
1719             * It also affects the running time, so it is a compromise between speed and accuracy.
1720             */
1721             int warps;
1722
1723             /**
1724             * Stopping criterion threshold used in the numerical scheme, which is a trade-off between precision and running time.
1725             * A small value will yield more accurate solutions at the expense of a slower convergence.
1726             */
1727             double epsilon;
1728
1729             /**
1730             * Stopping criterion iterations number used in the numerical scheme.
1731             */
1732             int iterations;
1733
1734             bool useInitialFlow;
1735
1736         private:
1737             void procOneScale(const oclMat& I0, const oclMat& I1, oclMat& u1, oclMat& u2);
1738
1739             std::vector<oclMat> I0s;
1740             std::vector<oclMat> I1s;
1741             std::vector<oclMat> u1s;
1742             std::vector<oclMat> u2s;
1743
1744             oclMat I1x_buf;
1745             oclMat I1y_buf;
1746
1747             oclMat I1w_buf;
1748             oclMat I1wx_buf;
1749             oclMat I1wy_buf;
1750
1751             oclMat grad_buf;
1752             oclMat rho_c_buf;
1753
1754             oclMat p11_buf;
1755             oclMat p12_buf;
1756             oclMat p21_buf;
1757             oclMat p22_buf;
1758
1759             oclMat diff_buf;
1760             oclMat norm_buf;
1761         };
1762     }
1763 }
1764 #if defined _MSC_VER && _MSC_VER >= 1200
1765 #  pragma warning( push)
1766 #  pragma warning( disable: 4267)
1767 #endif
1768 #include "opencv2/ocl/matrix_operations.hpp"
1769 #if defined _MSC_VER && _MSC_VER >= 1200
1770 #  pragma warning( pop)
1771 #endif
1772
1773 #endif /* __OPENCV_OCL_HPP__ */