modules/ocl/include/opencv2/ocl/ocl.hpp

   1 /*M///////////////////////////////////////////////////////////////////////////////////////
   2 //
   3 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
   4 //
   5 //  By downloading, copying, installing or using the software you agree to this license.
   6 //  If you do not agree to this license, do not download, install,
   7 //  copy or use the software.
   8 //
   9 //
  10 //                           License Agreement
  11 //                For Open Source Computer Vision Library
  12 //
  13 // Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
  14 // Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
  15 // Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
  16 // Third party copyrights are property of their respective owners.
  17 //
  18 // Redistribution and use in source and binary forms, with or without modification,
  19 // are permitted provided that the following conditions are met:
  20 //
  21 //   * Redistribution's of source code must retain the above copyright notice,
  22 //     this list of conditions and the following disclaimer.
  23 //
  24 //   * Redistribution's in binary form must reproduce the above copyright notice,
  25 //     this list of conditions and the following disclaimer in the documentation
  26 //     and/or other oclMaterials provided with the distribution.
  27 //
  28 //   * The name of the copyright holders may not be used to endorse or promote products
  29 //     derived from this software without specific prior written permission.
  30 //
  31 // This software is provided by the copyright holders and contributors "as is" and
  32 // any express or implied warranties, including, but not limited to, the implied
  33 // warranties of merchantability and fitness for a particular purpose are disclaimed.
  34 // In no event shall the Intel Corporation or contributors be liable for any direct,
  35 // indirect, incidental, special, exemplary, or consequential damages
  36 // (including, but not limited to, procurement of substitute goods or services;
  37 // loss of use, data, or profits; or business interruption) however caused
  38 // and on any theory of liability, whether in contract, strict liability,
  39 // or tort (including negligence or otherwise) arising in any way out of
  40 // the use of this software, even if advised of the possibility of such damage.
  41 //
  42 //M*/
  43
  44 #ifndef __OPENCV_OCL_HPP__
  45 #define __OPENCV_OCL_HPP__
  46
  47 #include <memory>
  48 #include <vector>
  49
  50 #include "opencv2/core/core.hpp"
  51 #include "opencv2/imgproc/imgproc.hpp"
  52 #include "opencv2/objdetect/objdetect.hpp"
  53 #include "opencv2/features2d/features2d.hpp"
  54
  55 namespace cv
  56 {
  57     namespace ocl
  58     {
  59         using std::auto_ptr;
  60         enum
  61         {
  62             CVCL_DEVICE_TYPE_DEFAULT     = (1 << 0),
  63             CVCL_DEVICE_TYPE_CPU         = (1 << 1),
  64             CVCL_DEVICE_TYPE_GPU         = (1 << 2),
  65             CVCL_DEVICE_TYPE_ACCELERATOR = (1 << 3),
  66             //CVCL_DEVICE_TYPE_CUSTOM      = (1 << 4)
  67             CVCL_DEVICE_TYPE_ALL         = 0xFFFFFFFF
  68         };
  69
  70         enum DevMemRW
  71         {
  72             DEVICE_MEM_R_W = 0,
  73             DEVICE_MEM_R_ONLY,
  74             DEVICE_MEM_W_ONLY
  75         };
  76
  77         enum DevMemType
  78         {
  79             DEVICE_MEM_DEFAULT = 0,
  80             DEVICE_MEM_AHP,         //alloc host pointer
  81             DEVICE_MEM_UHP,         //use host pointer
  82             DEVICE_MEM_CHP,         //copy host pointer
  83             DEVICE_MEM_PM           //persistent memory
  84         };
  85
  86         //Get the global device memory and read/write type
  87         //return 1 if unified memory system supported, otherwise return 0
  88         CV_EXPORTS int getDevMemType(DevMemRW& rw_type, DevMemType& mem_type);
  89
  90         //Set the global device memory and read/write type,
  91         //the newly generated oclMat will all use this type
  92         //return -1 if the target type is unsupported, otherwise return 0
  93         CV_EXPORTS int setDevMemType(DevMemRW rw_type = DEVICE_MEM_R_W, DevMemType mem_type = DEVICE_MEM_DEFAULT);
  94
  95         //this class contains ocl runtime information
  96         class CV_EXPORTS Info
  97         {
  98         public:
  99             struct Impl;
 100             Impl *impl;
 101
 102             Info();
 103             Info(const Info &m);
 104             ~Info();
 105             void release();
 106             Info &operator = (const Info &m);
 107             std::vector<string> DeviceName;
 108         };
 109         //////////////////////////////// Initialization & Info ////////////////////////
 110         //this function may be obsoleted
 111         //CV_EXPORTS cl_device_id getDevice();
 112         //the function must be called before any other cv::ocl::functions, it initialize ocl runtime
 113         //each Info relates to an OpenCL platform
 114         //there is one or more devices in each platform, each one has a separate name
 115         CV_EXPORTS int getDevice(std::vector<Info> &oclinfo, int devicetype = CVCL_DEVICE_TYPE_GPU);
 116
 117         //set device you want to use, optional function after getDevice be called
 118         //the devnum is the index of the selected device in DeviceName vector of INfo
 119         CV_EXPORTS void setDevice(Info &oclinfo, int devnum = 0);
 120
 121         //The two functions below enable other opencl program to use ocl module's cl_context and cl_command_queue
 122         //returns cl_context *
 123         CV_EXPORTS void* getoclContext();
 124         //returns cl_command_queue *
 125         CV_EXPORTS void* getoclCommandQueue();
 126
 127         //explicit call clFinish. The global command queue will be used.
 128         CV_EXPORTS void finish();
 129
 130         //this function enable ocl module to use customized cl_context and cl_command_queue
 131         //getDevice also need to be called before this function
 132         CV_EXPORTS void setDeviceEx(Info &oclinfo, void *ctx, void *qu, int devnum = 0);
 133
 134         //returns true when global OpenCL context is initialized
 135         CV_EXPORTS bool initialized();
 136
 137         //////////////////////////////// Error handling ////////////////////////
 138         CV_EXPORTS void error(const char *error_string, const char *file, const int line, const char *func);
 139
 140         //////////////////////////////// OpenCL context ////////////////////////
 141         //This is a global singleton class used to represent a OpenCL context.
 142         class CV_EXPORTS Context
 143         {
 144         protected:
 145             Context();
 146             friend class auto_ptr<Context>;
 147             friend bool initialized();
 148         private:
 149             static auto_ptr<Context> clCxt;
 150             static int val;
 151         public:
 152             ~Context();
 153             void release();
 154             Info::Impl* impl;
 155
 156             static Context* getContext();
 157             static void setContext(Info &oclinfo);
 158
 159             enum {CL_DOUBLE, CL_UNIFIED_MEM, CL_VER_1_2};
 160             bool supportsFeature(int ftype);
 161             size_t computeUnits();
 162             void* oclContext();
 163             void* oclCommandQueue();
 164         };
 165
 166         //! Calls a kernel, by string. Pass globalThreads = NULL, and cleanUp = true, to finally clean-up without executing.
 167         CV_EXPORTS double openCLExecuteKernelInterop(Context *clCxt ,
 168                                                         const char **source, string kernelName,
 169                                                         size_t globalThreads[3], size_t localThreads[3],
 170                                                         std::vector< std::pair<size_t, const void *> > &args,
 171                                                         int channels, int depth, const char *build_options,
 172                                                         bool finish = true, bool measureKernelTime = false,
 173                                                         bool cleanUp = true);
 174
 175         //! Calls a kernel, by file. Pass globalThreads = NULL, and cleanUp = true, to finally clean-up without executing.
 176         CV_EXPORTS double openCLExecuteKernelInterop(Context *clCxt ,
 177                                                         const char **fileName, const int numFiles, string kernelName,
 178                                                         size_t globalThreads[3], size_t localThreads[3],
 179                                                         std::vector< std::pair<size_t, const void *> > &args,
 180                                                         int channels, int depth, const char *build_options,
 181                                                         bool finish = true, bool measureKernelTime = false,
 182                                                         bool cleanUp = true);
 183
 184         //! Enable or disable OpenCL program binary caching onto local disk
 185         // After a program (*.cl files in opencl/ folder) is built at runtime, we allow the
 186         // compiled OpenCL program to be cached to the path automatically as "path/*.clb"
 187         // binary file, which will be reused when the OpenCV executable is started again.
 188         //
 189         // Caching mode is controlled by the following enums
 190         // Notes
 191         //   1. the feature is by default enabled when OpenCV is built in release mode.
 192         //   2. the CACHE_DEBUG / CACHE_RELEASE flags only effectively work with MSVC compiler;
 193         //      for GNU compilers, the function always treats the build as release mode (enabled by default).
 194         enum
 195         {
 196             CACHE_NONE    = 0,        // do not cache OpenCL binary
 197             CACHE_DEBUG   = 0x1 << 0, // cache OpenCL binary when built in debug mode (only work with MSVC)
 198             CACHE_RELEASE = 0x1 << 1, // default behavior, only cache when built in release mode (only work with MSVC)
 199             CACHE_ALL     = CACHE_DEBUG | CACHE_RELEASE, // always cache opencl binary
 200             CACHE_UPDATE  = 0x1 << 2  // if the binary cache file with the same name is already on the disk, it will be updated.
 201         };
 202         CV_EXPORTS void setBinaryDiskCache(int mode = CACHE_RELEASE, cv::String path = "./");
 203
 204         //! set where binary cache to be saved to
 205         CV_EXPORTS void setBinpath(const char *path);
 206
 207         class CV_EXPORTS oclMatExpr;
 208         //////////////////////////////// oclMat ////////////////////////////////
 209         class CV_EXPORTS oclMat
 210         {
 211         public:
 212             //! default constructor
 213             oclMat();
 214             //! constructs oclMatrix of the specified size and type (_type is CV_8UC1, CV_64FC3, CV_32SC(12) etc.)
 215             oclMat(int rows, int cols, int type);
 216             oclMat(Size size, int type);
 217             //! constucts oclMatrix and fills it with the specified value _s.
 218             oclMat(int rows, int cols, int type, const Scalar &s);
 219             oclMat(Size size, int type, const Scalar &s);
 220             //! copy constructor
 221             oclMat(const oclMat &m);
 222
 223             //! constructor for oclMatrix headers pointing to user-allocated data
 224             oclMat(int rows, int cols, int type, void *data, size_t step = Mat::AUTO_STEP);
 225             oclMat(Size size, int type, void *data, size_t step = Mat::AUTO_STEP);
 226
 227             //! creates a matrix header for a part of the bigger matrix
 228             oclMat(const oclMat &m, const Range &rowRange, const Range &colRange);
 229             oclMat(const oclMat &m, const Rect &roi);
 230
 231             //! builds oclMat from Mat. Perfom blocking upload to device.
 232             explicit oclMat (const Mat &m);
 233
 234             //! destructor - calls release()
 235             ~oclMat();
 236
 237             //! assignment operators
 238             oclMat &operator = (const oclMat &m);
 239             //! assignment operator. Perfom blocking upload to device.
 240             oclMat &operator = (const Mat &m);
 241             oclMat &operator = (const oclMatExpr& expr);
 242
 243             //! pefroms blocking upload data to oclMat.
 244             void upload(const cv::Mat &m);
 245
 246
 247             //! downloads data from device to host memory. Blocking calls.
 248             operator Mat() const;
 249             void download(cv::Mat &m) const;
 250
 251
 252             //! returns a new oclMatrix header for the specified row
 253             oclMat row(int y) const;
 254             //! returns a new oclMatrix header for the specified column
 255             oclMat col(int x) const;
 256             //! ... for the specified row span
 257             oclMat rowRange(int startrow, int endrow) const;
 258             oclMat rowRange(const Range &r) const;
 259             //! ... for the specified column span
 260             oclMat colRange(int startcol, int endcol) const;
 261             oclMat colRange(const Range &r) const;
 262
 263             //! returns deep copy of the oclMatrix, i.e. the data is copied
 264             oclMat clone() const;
 265             //! copies the oclMatrix content to "m".
 266             // It calls m.create(this->size(), this->type()).
 267             // It supports any data type
 268             void copyTo( oclMat &m ) const;
 269             //! copies those oclMatrix elements to "m" that are marked with non-zero mask elements.
 270             //It supports 8UC1 8UC4 32SC1 32SC4 32FC1 32FC4
 271             void copyTo( oclMat &m, const oclMat &mask ) const;
 272             //! converts oclMatrix to another datatype with optional scalng. See cvConvertScale.
 273             //It supports 8UC1 8UC4 32SC1 32SC4 32FC1 32FC4
 274             void convertTo( oclMat &m, int rtype, double alpha = 1, double beta = 0 ) const;
 275
 276             void assignTo( oclMat &m, int type = -1 ) const;
 277
 278             //! sets every oclMatrix element to s
 279             //It supports 8UC1 8UC4 32SC1 32SC4 32FC1 32FC4
 280             oclMat& operator = (const Scalar &s);
 281             //! sets some of the oclMatrix elements to s, according to the mask
 282             //It supports 8UC1 8UC4 32SC1 32SC4 32FC1 32FC4
 283             oclMat& setTo(const Scalar &s, const oclMat &mask = oclMat());
 284             //! creates alternative oclMatrix header for the same data, with different
 285             // number of channels and/or different number of rows. see cvReshape.
 286             oclMat reshape(int cn, int rows = 0) const;
 287
 288             //! allocates new oclMatrix data unless the oclMatrix already has specified size and type.
 289             // previous data is unreferenced if needed.
 290             void create(int rows, int cols, int type);
 291             void create(Size size, int type);
 292
 293             //! allocates new oclMatrix with specified device memory type.
 294             void createEx(int rows, int cols, int type, DevMemRW rw_type, DevMemType mem_type);
 295             void createEx(Size size, int type, DevMemRW rw_type, DevMemType mem_type);
 296
 297             //! decreases reference counter;
 298             // deallocate the data when reference counter reaches 0.
 299             void release();
 300
 301             //! swaps with other smart pointer
 302             void swap(oclMat &mat);
 303
 304             //! locates oclMatrix header within a parent oclMatrix. See below
 305             void locateROI( Size &wholeSize, Point &ofs ) const;
 306             //! moves/resizes the current oclMatrix ROI inside the parent oclMatrix.
 307             oclMat& adjustROI( int dtop, int dbottom, int dleft, int dright );
 308             //! extracts a rectangular sub-oclMatrix
 309             // (this is a generalized form of row, rowRange etc.)
 310             oclMat operator()( Range rowRange, Range colRange ) const;
 311             oclMat operator()( const Rect &roi ) const;
 312
 313             oclMat& operator+=( const oclMat& m );
 314             oclMat& operator-=( const oclMat& m );
 315             oclMat& operator*=( const oclMat& m );
 316             oclMat& operator/=( const oclMat& m );
 317
 318             //! returns true if the oclMatrix data is continuous
 319             // (i.e. when there are no gaps between successive rows).
 320             // similar to CV_IS_oclMat_CONT(cvoclMat->type)
 321             bool isContinuous() const;
 322             //! returns element size in bytes,
 323             // similar to CV_ELEM_SIZE(cvMat->type)
 324             size_t elemSize() const;
 325             //! returns the size of element channel in bytes.
 326             size_t elemSize1() const;
 327             //! returns element type, similar to CV_MAT_TYPE(cvMat->type)
 328             int type() const;
 329             //! returns element type, i.e. 8UC3 returns 8UC4 because in ocl
 330             //! 3 channels element actually use 4 channel space
 331             int ocltype() const;
 332             //! returns element type, similar to CV_MAT_DEPTH(cvMat->type)
 333             int depth() const;
 334             //! returns element type, similar to CV_MAT_CN(cvMat->type)
 335             int channels() const;
 336             //! returns element type, return 4 for 3 channels element,
 337             //!becuase 3 channels element actually use 4 channel space
 338             int oclchannels() const;
 339             //! returns step/elemSize1()
 340             size_t step1() const;
 341             //! returns oclMatrix size:
 342             // width == number of columns, height == number of rows
 343             Size size() const;
 344             //! returns true if oclMatrix data is NULL
 345             bool empty() const;
 346
 347             //! returns pointer to y-th row
 348             uchar* ptr(int y = 0);
 349             const uchar *ptr(int y = 0) const;
 350
 351             //! template version of the above method
 352             template<typename _Tp> _Tp *ptr(int y = 0);
 353             template<typename _Tp> const _Tp *ptr(int y = 0) const;
 354
 355             //! matrix transposition
 356             oclMat t() const;
 357
 358             /*! includes several bit-fields:
 359               - the magic signature
 360               - continuity flag
 361               - depth
 362               - number of channels
 363               */
 364             int flags;
 365             //! the number of rows and columns
 366             int rows, cols;
 367             //! a distance between successive rows in bytes; includes the gap if any
 368             size_t step;
 369             //! pointer to the data(OCL memory object)
 370             uchar *data;
 371
 372             //! pointer to the reference counter;
 373             // when oclMatrix points to user-allocated data, the pointer is NULL
 374             int *refcount;
 375
 376             //! helper fields used in locateROI and adjustROI
 377             //datastart and dataend are not used in current version
 378             uchar *datastart;
 379             uchar *dataend;
 380
 381             //! OpenCL context associated with the oclMat object.
 382             Context *clCxt;
 383             //add offset for handle ROI, calculated in byte
 384             int offset;
 385             //add wholerows and wholecols for the whole matrix, datastart and dataend are no longer used
 386             int wholerows;
 387             int wholecols;
 388         };
 389
 390
 391         ///////////////////// mat split and merge /////////////////////////////////
 392         //! Compose a multi-channel array from several single-channel arrays
 393         // Support all types
 394         CV_EXPORTS void merge(const oclMat *src, size_t n, oclMat &dst);
 395         CV_EXPORTS void merge(const vector<oclMat> &src, oclMat &dst);
 396
 397         //! Divides multi-channel array into several single-channel arrays
 398         // Support all types
 399         CV_EXPORTS void split(const oclMat &src, oclMat *dst);
 400         CV_EXPORTS void split(const oclMat &src, vector<oclMat> &dst);
 401
 402         ////////////////////////////// Arithmetics ///////////////////////////////////
 403         //#if defined DOUBLE_SUPPORT
 404         //typedef double F;
 405         //#else
 406         //typedef float F;
 407         //#endif
 408         //      CV_EXPORTS void addWeighted(const oclMat& a,F  alpha, const oclMat& b,F beta,F gama, oclMat& c);
 409         CV_EXPORTS void addWeighted(const oclMat &a, double  alpha, const oclMat &b, double beta, double gama, oclMat &c);
 410         //! adds one matrix to another (c = a + b)
 411         // supports all types except CV_8SC1,CV_8SC2,CV8SC3 and CV_8SC4
 412         CV_EXPORTS void add(const oclMat &a, const oclMat &b, oclMat &c);
 413         //! adds one matrix to another (c = a + b)
 414         // supports all types except CV_8SC1,CV_8SC2,CV8SC3 and CV_8SC4
 415         CV_EXPORTS void add(const oclMat &a, const oclMat &b, oclMat &c, const oclMat &mask);
 416         //! adds scalar to a matrix (c = a + s)
 417         // supports all types except CV_8SC1,CV_8SC2,CV8SC3 and CV_8SC4
 418         CV_EXPORTS void add(const oclMat &a, const Scalar &sc, oclMat &c, const oclMat &mask = oclMat());
 419         //! subtracts one matrix from another (c = a - b)
 420         // supports all types except CV_8SC1,CV_8SC2,CV8SC3 and CV_8SC4
 421         CV_EXPORTS void subtract(const oclMat &a, const oclMat &b, oclMat &c);
 422         //! subtracts one matrix from another (c = a - b)
 423         // supports all types except CV_8SC1,CV_8SC2,CV8SC3 and CV_8SC4
 424         CV_EXPORTS void subtract(const oclMat &a, const oclMat &b, oclMat &c, const oclMat &mask);
 425         //! subtracts scalar from a matrix (c = a - s)
 426         // supports all types except CV_8SC1,CV_8SC2,CV8SC3 and CV_8SC4
 427         CV_EXPORTS void subtract(const oclMat &a, const Scalar &sc, oclMat &c, const oclMat &mask = oclMat());
 428         //! subtracts scalar from a matrix (c = a - s)
 429         // supports all types except CV_8SC1,CV_8SC2,CV8SC3 and CV_8SC4
 430         CV_EXPORTS void subtract(const Scalar &sc, const oclMat &a, oclMat &c, const oclMat &mask = oclMat());
 431         //! computes element-wise product of the two arrays (c = a * b)
 432         // supports all types except CV_8SC1,CV_8SC2,CV8SC3 and CV_8SC4
 433         CV_EXPORTS void multiply(const oclMat &a, const oclMat &b, oclMat &c, double scale = 1);
 434         //! multiplies matrix to a number (dst = scalar * src)
 435         // supports CV_32FC1 only
 436         CV_EXPORTS void multiply(double scalar, const oclMat &src, oclMat &dst);
 437         //! computes element-wise quotient of the two arrays (c = a / b)
 438         // supports all types except CV_8SC1,CV_8SC2,CV8SC3 and CV_8SC4
 439         CV_EXPORTS void divide(const oclMat &a, const oclMat &b, oclMat &c, double scale = 1);
 440         //! computes element-wise quotient of the two arrays (c = a / b)
 441         // supports all types except CV_8SC1,CV_8SC2,CV8SC3 and CV_8SC4
 442         CV_EXPORTS void divide(double scale, const oclMat &b, oclMat &c);
 443
 444         //! compares elements of two arrays (c = a <cmpop> b)
 445         // supports except CV_8SC1,CV_8SC2,CV8SC3,CV_8SC4 types
 446         CV_EXPORTS void compare(const oclMat &a, const oclMat &b, oclMat &c, int cmpop);
 447
 448         //! transposes the matrix
 449         // supports  CV_8UC1, 8UC4, 8SC4, 16UC2, 16SC2, 32SC1 and 32FC1.(the same as cuda)
 450         CV_EXPORTS void transpose(const oclMat &src, oclMat &dst);
 451
 452         //! computes element-wise absolute difference of two arrays (c = abs(a - b))
 453         // supports all types except CV_8SC1,CV_8SC2,CV8SC3 and CV_8SC4
 454         CV_EXPORTS void absdiff(const oclMat &a, const oclMat &b, oclMat &c);
 455         //! computes element-wise absolute difference of array and scalar (c = abs(a - s))
 456         // supports all types except CV_8SC1,CV_8SC2,CV8SC3 and CV_8SC4
 457         CV_EXPORTS void absdiff(const oclMat &a, const Scalar &s, oclMat &c);
 458
 459         //! computes mean value and standard deviation of all or selected array elements
 460         // supports except CV_32F,CV_64F
 461         CV_EXPORTS void meanStdDev(const oclMat &mtx, Scalar &mean, Scalar &stddev);
 462
 463         //! computes norm of array
 464         // supports NORM_INF, NORM_L1, NORM_L2
 465         // supports only CV_8UC1 type
 466         CV_EXPORTS double norm(const oclMat &src1, int normType = NORM_L2);
 467
 468         //! computes norm of the difference between two arrays
 469         // supports NORM_INF, NORM_L1, NORM_L2
 470         // supports only CV_8UC1 type
 471         CV_EXPORTS double norm(const oclMat &src1, const oclMat &src2, int normType = NORM_L2);
 472
 473         //! reverses the order of the rows, columns or both in a matrix
 474         // supports all types
 475         CV_EXPORTS void flip(const oclMat &a, oclMat &b, int flipCode);
 476
 477         //! computes sum of array elements
 478         // disabled until fix crash
 479         // support all types
 480         CV_EXPORTS Scalar sum(const oclMat &m);
 481         CV_EXPORTS Scalar absSum(const oclMat &m);
 482         CV_EXPORTS Scalar sqrSum(const oclMat &m);
 483
 484         //! finds global minimum and maximum array elements and returns their values
 485         // support all C1 types
 486
 487         CV_EXPORTS void minMax(const oclMat &src, double *minVal, double *maxVal = 0, const oclMat &mask = oclMat());
 488         CV_EXPORTS void minMax_buf(const oclMat &src, double *minVal, double *maxVal, const oclMat &mask, oclMat& buf);
 489
 490         //! finds global minimum and maximum array elements and returns their values with locations
 491         // support all C1 types
 492
 493         CV_EXPORTS void minMaxLoc(const oclMat &src, double *minVal, double *maxVal = 0, Point *minLoc = 0, Point *maxLoc = 0,
 494                                   const oclMat &mask = oclMat());
 495
 496         //! counts non-zero array elements
 497         // support all types
 498         CV_EXPORTS int countNonZero(const oclMat &src);
 499
 500         //! transforms 8-bit unsigned integers using lookup table: dst(i)=lut(src(i))
 501         // destination array will have the depth type as lut and the same channels number as source
 502         //It supports 8UC1 8UC4 only
 503         CV_EXPORTS void LUT(const oclMat &src, const oclMat &lut, oclMat &dst);
 504
 505         //! only 8UC1 and 256 bins is supported now
 506         CV_EXPORTS void calcHist(const oclMat &mat_src, oclMat &mat_hist);
 507         //! only 8UC1 and 256 bins is supported now
 508         CV_EXPORTS void equalizeHist(const oclMat &mat_src, oclMat &mat_dst);
 509
 510         //! only 8UC1 is supported now
 511         class CV_EXPORTS CLAHE
 512         {
 513         public:
 514             virtual void apply(const oclMat &src, oclMat &dst) = 0;
 515
 516             virtual void setClipLimit(double clipLimit) = 0;
 517             virtual double getClipLimit() const = 0;
 518
 519             virtual void setTilesGridSize(Size tileGridSize) = 0;
 520             virtual Size getTilesGridSize() const = 0;
 521
 522             virtual void collectGarbage() = 0;
 523         };
 524         CV_EXPORTS Ptr<cv::ocl::CLAHE> createCLAHE(double clipLimit = 40.0, Size tileGridSize = Size(8, 8));
 525
 526         //! bilateralFilter
 527         // supports 8UC1 8UC4
 528         CV_EXPORTS void bilateralFilter(const oclMat& src, oclMat& dst, int d, double sigmaColor, double sigmaSpave, int borderType=BORDER_DEFAULT);
 529         //! computes exponent of each matrix element (b = e**a)
 530         // supports only CV_32FC1 type
 531         CV_EXPORTS void exp(const oclMat &a, oclMat &b);
 532
 533         //! computes natural logarithm of absolute value of each matrix element: b = log(abs(a))
 534         // supports only CV_32FC1 type
 535         CV_EXPORTS void log(const oclMat &a, oclMat &b);
 536
 537         //! computes magnitude of each (x(i), y(i)) vector
 538         // supports only CV_32F CV_64F type
 539         CV_EXPORTS void magnitude(const oclMat &x, const oclMat &y, oclMat &magnitude);
 540         CV_EXPORTS void magnitudeSqr(const oclMat &x, const oclMat &y, oclMat &magnitude);
 541
 542         CV_EXPORTS void magnitudeSqr(const oclMat &x, oclMat &magnitude);
 543
 544         //! computes angle (angle(i)) of each (x(i), y(i)) vector
 545         // supports only CV_32F CV_64F type
 546         CV_EXPORTS void phase(const oclMat &x, const oclMat &y, oclMat &angle, bool angleInDegrees = false);
 547
 548         //! the function raises every element of tne input array to p
 549         //! support only CV_32F CV_64F type
 550         CV_EXPORTS void pow(const oclMat &x, double p, oclMat &y);
 551
 552         //! converts Cartesian coordinates to polar
 553         // supports only CV_32F CV_64F type
 554         CV_EXPORTS void cartToPolar(const oclMat &x, const oclMat &y, oclMat &magnitude, oclMat &angle, bool angleInDegrees = false);
 555
 556         //! converts polar coordinates to Cartesian
 557         // supports only CV_32F CV_64F type
 558         CV_EXPORTS void polarToCart(const oclMat &magnitude, const oclMat &angle, oclMat &x, oclMat &y, bool angleInDegrees = false);
 559
 560         //! perfroms per-elements bit-wise inversion
 561         // supports all types
 562         CV_EXPORTS void bitwise_not(const oclMat &src, oclMat &dst);
 563         //! calculates per-element bit-wise disjunction of two arrays
 564         // supports all types
 565         CV_EXPORTS void bitwise_or(const oclMat &src1, const oclMat &src2, oclMat &dst, const oclMat &mask = oclMat());
 566         CV_EXPORTS void bitwise_or(const oclMat &src1, const Scalar &s, oclMat &dst, const oclMat &mask = oclMat());
 567         //! calculates per-element bit-wise conjunction of two arrays
 568         // supports all types
 569         CV_EXPORTS void bitwise_and(const oclMat &src1, const oclMat &src2, oclMat &dst, const oclMat &mask = oclMat());
 570         CV_EXPORTS void bitwise_and(const oclMat &src1, const Scalar &s, oclMat &dst, const oclMat &mask = oclMat());
 571         //! calculates per-element bit-wise "exclusive or" operation
 572         // supports all types
 573         CV_EXPORTS void bitwise_xor(const oclMat &src1, const oclMat &src2, oclMat &dst, const oclMat &mask = oclMat());
 574         CV_EXPORTS void bitwise_xor(const oclMat &src1, const Scalar &s, oclMat &dst, const oclMat &mask = oclMat());
 575
 576         //! Logical operators
 577         CV_EXPORTS oclMat operator ~ (const oclMat &);
 578         CV_EXPORTS oclMat operator | (const oclMat &, const oclMat &);
 579         CV_EXPORTS oclMat operator & (const oclMat &, const oclMat &);
 580         CV_EXPORTS oclMat operator ^ (const oclMat &, const oclMat &);
 581
 582
 583         //! Mathematics operators
 584         CV_EXPORTS oclMatExpr operator + (const oclMat &src1, const oclMat &src2);
 585         CV_EXPORTS oclMatExpr operator - (const oclMat &src1, const oclMat &src2);
 586         CV_EXPORTS oclMatExpr operator * (const oclMat &src1, const oclMat &src2);
 587         CV_EXPORTS oclMatExpr operator / (const oclMat &src1, const oclMat &src2);
 588
 589         //! computes convolution of two images
 590         //! support only CV_32FC1 type
 591         CV_EXPORTS void convolve(const oclMat &image, const oclMat &temp1, oclMat &result);
 592
 593         CV_EXPORTS void cvtColor(const oclMat &src, oclMat &dst, int code , int dcn = 0);
 594
 595         //////////////////////////////// Filter Engine ////////////////////////////////
 596
 597         /*!
 598           The Base Class for 1D or Row-wise Filters
 599
 600           This is the base class for linear or non-linear filters that process 1D data.
 601           In particular, such filters are used for the "horizontal" filtering parts in separable filters.
 602           */
 603         class CV_EXPORTS BaseRowFilter_GPU
 604         {
 605         public:
 606             BaseRowFilter_GPU(int ksize_, int anchor_, int bordertype_) : ksize(ksize_), anchor(anchor_), bordertype(bordertype_) {}
 607             virtual ~BaseRowFilter_GPU() {}
 608             virtual void operator()(const oclMat &src, oclMat &dst) = 0;
 609             int ksize, anchor, bordertype;
 610         };
 611
 612         /*!
 613           The Base Class for Column-wise Filters
 614
 615           This is the base class for linear or non-linear filters that process columns of 2D arrays.
 616           Such filters are used for the "vertical" filtering parts in separable filters.
 617           */
 618         class CV_EXPORTS BaseColumnFilter_GPU
 619         {
 620         public:
 621             BaseColumnFilter_GPU(int ksize_, int anchor_, int bordertype_) : ksize(ksize_), anchor(anchor_), bordertype(bordertype_) {}
 622             virtual ~BaseColumnFilter_GPU() {}
 623             virtual void operator()(const oclMat &src, oclMat &dst) = 0;
 624             int ksize, anchor, bordertype;
 625         };
 626
 627         /*!
 628           The Base Class for Non-Separable 2D Filters.
 629
 630           This is the base class for linear or non-linear 2D filters.
 631           */
 632         class CV_EXPORTS BaseFilter_GPU
 633         {
 634         public:
 635             BaseFilter_GPU(const Size &ksize_, const Point &anchor_, const int &borderType_)
 636                 : ksize(ksize_), anchor(anchor_), borderType(borderType_) {}
 637             virtual ~BaseFilter_GPU() {}
 638             virtual void operator()(const oclMat &src, oclMat &dst) = 0;
 639             Size ksize;
 640             Point anchor;
 641             int borderType;
 642         };
 643
 644         /*!
 645           The Base Class for Filter Engine.
 646
 647           The class can be used to apply an arbitrary filtering operation to an image.
 648           It contains all the necessary intermediate buffers.
 649           */
 650         class CV_EXPORTS FilterEngine_GPU
 651         {
 652         public:
 653             virtual ~FilterEngine_GPU() {}
 654
 655             virtual void apply(const oclMat &src, oclMat &dst, Rect roi = Rect(0, 0, -1, -1)) = 0;
 656         };
 657
 658         //! returns the non-separable filter engine with the specified filter
 659         CV_EXPORTS Ptr<FilterEngine_GPU> createFilter2D_GPU(const Ptr<BaseFilter_GPU> filter2D);
 660
 661         //! returns the primitive row filter with the specified kernel
 662         CV_EXPORTS Ptr<BaseRowFilter_GPU> getLinearRowFilter_GPU(int srcType, int bufType, const Mat &rowKernel,
 663                 int anchor = -1, int bordertype = BORDER_DEFAULT);
 664
 665         //! returns the primitive column filter with the specified kernel
 666         CV_EXPORTS Ptr<BaseColumnFilter_GPU> getLinearColumnFilter_GPU(int bufType, int dstType, const Mat &columnKernel,
 667                 int anchor = -1, int bordertype = BORDER_DEFAULT, double delta = 0.0);
 668
 669         //! returns the separable linear filter engine
 670         CV_EXPORTS Ptr<FilterEngine_GPU> createSeparableLinearFilter_GPU(int srcType, int dstType, const Mat &rowKernel,
 671                 const Mat &columnKernel, const Point &anchor = Point(-1, -1), double delta = 0.0, int bordertype = BORDER_DEFAULT);
 672
 673         //! returns the separable filter engine with the specified filters
 674         CV_EXPORTS Ptr<FilterEngine_GPU> createSeparableFilter_GPU(const Ptr<BaseRowFilter_GPU> &rowFilter,
 675                 const Ptr<BaseColumnFilter_GPU> &columnFilter);
 676
 677         //! returns the Gaussian filter engine
 678         CV_EXPORTS Ptr<FilterEngine_GPU> createGaussianFilter_GPU(int type, Size ksize, double sigma1, double sigma2 = 0, int bordertype = BORDER_DEFAULT);
 679
 680         //! returns filter engine for the generalized Sobel operator
 681         CV_EXPORTS Ptr<FilterEngine_GPU> createDerivFilter_GPU( int srcType, int dstType, int dx, int dy, int ksize, int borderType = BORDER_DEFAULT );
 682
 683         //! applies Laplacian operator to the image
 684         // supports only ksize = 1 and ksize = 3 8UC1 8UC4 32FC1 32FC4 data type
 685         CV_EXPORTS void Laplacian(const oclMat &src, oclMat &dst, int ddepth, int ksize = 1, double scale = 1);
 686
 687         //! returns 2D box filter
 688         // supports CV_8UC1 and CV_8UC4 source type, dst type must be the same as source type
 689         CV_EXPORTS Ptr<BaseFilter_GPU> getBoxFilter_GPU(int srcType, int dstType,
 690                 const Size &ksize, Point anchor = Point(-1, -1), int borderType = BORDER_DEFAULT);
 691
 692         //! returns box filter engine
 693         CV_EXPORTS Ptr<FilterEngine_GPU> createBoxFilter_GPU(int srcType, int dstType, const Size &ksize,
 694                 const Point &anchor = Point(-1, -1), int borderType = BORDER_DEFAULT);
 695
 696         //! returns 2D filter with the specified kernel
 697         // supports CV_8UC1 and CV_8UC4 types
 698         CV_EXPORTS Ptr<BaseFilter_GPU> getLinearFilter_GPU(int srcType, int dstType, const Mat &kernel, const Size &ksize,
 699                 Point anchor = Point(-1, -1), int borderType = BORDER_DEFAULT);
 700
 701         //! returns the non-separable linear filter engine
 702         CV_EXPORTS Ptr<FilterEngine_GPU> createLinearFilter_GPU(int srcType, int dstType, const Mat &kernel,
 703                 const Point &anchor = Point(-1, -1), int borderType = BORDER_DEFAULT);
 704
 705         //! smooths the image using the normalized box filter
 706         // supports data type: CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4
 707         // supports border type: BORDER_CONSTANT, BORDER_REPLICATE, BORDER_REFLECT,BORDER_REFLECT_101,BORDER_WRAP
 708         CV_EXPORTS void boxFilter(const oclMat &src, oclMat &dst, int ddepth, Size ksize,
 709                                   Point anchor = Point(-1, -1), int borderType = BORDER_DEFAULT);
 710
 711         //! returns 2D morphological filter
 712         //! only MORPH_ERODE and MORPH_DILATE are supported
 713         // supports CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4 types
 714         // kernel must have CV_8UC1 type, one rows and cols == ksize.width * ksize.height
 715         CV_EXPORTS Ptr<BaseFilter_GPU> getMorphologyFilter_GPU(int op, int type, const Mat &kernel, const Size &ksize,
 716                 Point anchor = Point(-1, -1));
 717
 718         //! returns morphological filter engine. Only MORPH_ERODE and MORPH_DILATE are supported.
 719         CV_EXPORTS Ptr<FilterEngine_GPU> createMorphologyFilter_GPU(int op, int type, const Mat &kernel,
 720                 const Point &anchor = Point(-1, -1), int iterations = 1);
 721
 722         //! a synonym for normalized box filter
 723         // supports data type: CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4
 724         // supports border type: BORDER_CONSTANT, BORDER_REPLICATE, BORDER_REFLECT,BORDER_REFLECT_101
 725         static inline void blur(const oclMat &src, oclMat &dst, Size ksize, Point anchor = Point(-1, -1),
 726                                 int borderType = BORDER_CONSTANT)
 727         {
 728             boxFilter(src, dst, -1, ksize, anchor, borderType);
 729         }
 730
 731         //! applies non-separable 2D linear filter to the image
 732         //  Note, at the moment this function only works when anchor point is in the kernel center
 733         //  and kernel size supported is either 3x3 or 5x5; otherwise the function will fail to output valid result
 734         CV_EXPORTS void filter2D(const oclMat &src, oclMat &dst, int ddepth, const Mat &kernel,
 735                                  Point anchor = Point(-1, -1), int borderType = BORDER_DEFAULT);
 736
 737         //! applies separable 2D linear filter to the image
 738         CV_EXPORTS void sepFilter2D(const oclMat &src, oclMat &dst, int ddepth, const Mat &kernelX, const Mat &kernelY,
 739                                     Point anchor = Point(-1, -1), double delta = 0.0, int bordertype = BORDER_DEFAULT);
 740
 741         //! applies generalized Sobel operator to the image
 742         // dst.type must equalize src.type
 743         // supports data type: CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4
 744         // supports border type: BORDER_CONSTANT, BORDER_REPLICATE, BORDER_REFLECT,BORDER_REFLECT_101
 745         CV_EXPORTS void Sobel(const oclMat &src, oclMat &dst, int ddepth, int dx, int dy, int ksize = 3, double scale = 1, double delta = 0.0, int bordertype = BORDER_DEFAULT);
 746
 747         //! applies the vertical or horizontal Scharr operator to the image
 748         // dst.type must equalize src.type
 749         // supports data type: CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4
 750         // supports border type: BORDER_CONSTANT, BORDER_REPLICATE, BORDER_REFLECT,BORDER_REFLECT_101
 751         CV_EXPORTS void Scharr(const oclMat &src, oclMat &dst, int ddepth, int dx, int dy, double scale = 1, double delta = 0.0, int bordertype = BORDER_DEFAULT);
 752
 753         //! smooths the image using Gaussian filter.
 754         // dst.type must equalize src.type
 755         // supports data type: CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4
 756         // supports border type: BORDER_CONSTANT, BORDER_REPLICATE, BORDER_REFLECT,BORDER_REFLECT_101
 757         CV_EXPORTS void GaussianBlur(const oclMat &src, oclMat &dst, Size ksize, double sigma1, double sigma2 = 0, int bordertype = BORDER_DEFAULT);
 758
 759         //! erodes the image (applies the local minimum operator)
 760         // supports data type: CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4
 761         CV_EXPORTS void erode( const oclMat &src, oclMat &dst, const Mat &kernel, Point anchor = Point(-1, -1), int iterations = 1,
 762
 763                                int borderType = BORDER_CONSTANT, const Scalar &borderValue = morphologyDefaultBorderValue());
 764
 765
 766         //! dilates the image (applies the local maximum operator)
 767         // supports data type: CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4
 768         CV_EXPORTS void dilate( const oclMat &src, oclMat &dst, const Mat &kernel, Point anchor = Point(-1, -1), int iterations = 1,
 769
 770                                 int borderType = BORDER_CONSTANT, const Scalar &borderValue = morphologyDefaultBorderValue());
 771
 772
 773         //! applies an advanced morphological operation to the image
 774         CV_EXPORTS void morphologyEx( const oclMat &src, oclMat &dst, int op, const Mat &kernel, Point anchor = Point(-1, -1), int iterations = 1,
 775
 776                                       int borderType = BORDER_CONSTANT, const Scalar &borderValue = morphologyDefaultBorderValue());
 777
 778
 779         ////////////////////////////// Image processing //////////////////////////////
 780         //! Does mean shift filtering on GPU.
 781         CV_EXPORTS void meanShiftFiltering(const oclMat &src, oclMat &dst, int sp, int sr,
 782                                            TermCriteria criteria = TermCriteria(TermCriteria::MAX_ITER + TermCriteria::EPS, 5, 1));
 783
 784         //! Does mean shift procedure on GPU.
 785         CV_EXPORTS void meanShiftProc(const oclMat &src, oclMat &dstr, oclMat &dstsp, int sp, int sr,
 786                                       TermCriteria criteria = TermCriteria(TermCriteria::MAX_ITER + TermCriteria::EPS, 5, 1));
 787
 788         //! Does mean shift segmentation with elimiation of small regions.
 789         CV_EXPORTS void meanShiftSegmentation(const oclMat &src, Mat &dst, int sp, int sr, int minsize,
 790                                               TermCriteria criteria = TermCriteria(TermCriteria::MAX_ITER + TermCriteria::EPS, 5, 1));
 791
 792         //! applies fixed threshold to the image.
 793         // supports CV_8UC1 and CV_32FC1 data type
 794         // supports threshold type: THRESH_BINARY, THRESH_BINARY_INV, THRESH_TRUNC, THRESH_TOZERO, THRESH_TOZERO_INV
 795         CV_EXPORTS double threshold(const oclMat &src, oclMat &dst, double thresh, double maxVal, int type = THRESH_TRUNC);
 796
 797         //! resizes the image
 798         // Supports INTER_NEAREST, INTER_LINEAR
 799         // supports CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4 types
 800         CV_EXPORTS void resize(const oclMat &src, oclMat &dst, Size dsize, double fx = 0, double fy = 0, int interpolation = INTER_LINEAR);
 801
 802         //! Applies a generic geometrical transformation to an image.
 803
 804         // Supports INTER_NEAREST, INTER_LINEAR.
 805
 806         // Map1 supports CV_16SC2, CV_32FC2  types.
 807
 808         // Src supports CV_8UC1, CV_8UC2, CV_8UC4.
 809
 810         CV_EXPORTS void remap(const oclMat &src, oclMat &dst, oclMat &map1, oclMat &map2, int interpolation, int bordertype, const Scalar &value = Scalar());
 811
 812         //! copies 2D array to a larger destination array and pads borders with user-specifiable constant
 813         // supports CV_8UC1, CV_8UC4, CV_32SC1 types
 814         CV_EXPORTS void copyMakeBorder(const oclMat &src, oclMat &dst, int top, int bottom, int left, int right, int boardtype, const Scalar &value = Scalar());
 815
 816         //! Smoothes image using median filter
 817         // The source 1- or 4-channel image. When m is 3 or 5, the image depth should be CV 8U or CV 32F.
 818         CV_EXPORTS void medianFilter(const oclMat &src, oclMat &dst, int m);
 819
 820         //! warps the image using affine transformation
 821         // Supports INTER_NEAREST, INTER_LINEAR, INTER_CUBIC
 822         // supports CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4 types
 823         CV_EXPORTS void warpAffine(const oclMat &src, oclMat &dst, const Mat &M, Size dsize, int flags = INTER_LINEAR);
 824
 825         //! warps the image using perspective transformation
 826         // Supports INTER_NEAREST, INTER_LINEAR, INTER_CUBIC
 827         // supports CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4 types
 828         CV_EXPORTS void warpPerspective(const oclMat &src, oclMat &dst, const Mat &M, Size dsize, int flags = INTER_LINEAR);
 829
 830         //! computes the integral image and integral for the squared image
 831         // sum will have CV_32S type, sqsum - CV32F type
 832         // supports only CV_8UC1 source type
 833         CV_EXPORTS void integral(const oclMat &src, oclMat &sum, oclMat &sqsum);
 834         CV_EXPORTS void integral(const oclMat &src, oclMat &sum);
 835         CV_EXPORTS void cornerHarris(const oclMat &src, oclMat &dst, int blockSize, int ksize, double k, int bordertype = cv::BORDER_DEFAULT);
 836         CV_EXPORTS void cornerHarris_dxdy(const oclMat &src, oclMat &dst, oclMat &Dx, oclMat &Dy,
 837             int blockSize, int ksize, double k, int bordertype = cv::BORDER_DEFAULT);
 838         CV_EXPORTS void cornerMinEigenVal(const oclMat &src, oclMat &dst, int blockSize, int ksize, int bordertype = cv::BORDER_DEFAULT);
 839         CV_EXPORTS void cornerMinEigenVal_dxdy(const oclMat &src, oclMat &dst, oclMat &Dx, oclMat &Dy,
 840             int blockSize, int ksize, int bordertype = cv::BORDER_DEFAULT);
 841
 842         ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 843         ///////////////////////////////////////////CascadeClassifier//////////////////////////////////////////////////////////////////
 844         ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 845
 846         class CV_EXPORTS_W OclCascadeClassifier : public  cv::CascadeClassifier
 847         {
 848         public:
 849             OclCascadeClassifier() {};
 850             ~OclCascadeClassifier() {};
 851
 852             CvSeq* oclHaarDetectObjects(oclMat &gimg, CvMemStorage *storage, double scaleFactor,
 853                                         int minNeighbors, int flags, CvSize minSize = cvSize(0, 0), CvSize maxSize = cvSize(0, 0));
 854         };
 855
 856         class CV_EXPORTS OclCascadeClassifierBuf : public  cv::CascadeClassifier
 857         {
 858         public:
 859             OclCascadeClassifierBuf() :
 860                 m_flags(0), initialized(false), m_scaleFactor(0), buffers(NULL) {}
 861
 862             ~OclCascadeClassifierBuf() { release(); }
 863
 864             void detectMultiScale(oclMat &image, CV_OUT std::vector<cv::Rect>& faces,
 865                                   double scaleFactor = 1.1, int minNeighbors = 3, int flags = 0,
 866                                   Size minSize = Size(), Size maxSize = Size());
 867             void release();
 868
 869         private:
 870             void Init(const int rows, const int cols, double scaleFactor, int flags,
 871                       const int outputsz, const size_t localThreads[],
 872                       CvSize minSize, CvSize maxSize);
 873             void CreateBaseBufs(const int datasize, const int totalclassifier, const int flags, const int outputsz);
 874             void CreateFactorRelatedBufs(const int rows, const int cols, const int flags,
 875                                          const double scaleFactor, const size_t localThreads[],
 876                                          CvSize minSize, CvSize maxSize);
 877             void GenResult(CV_OUT std::vector<cv::Rect>& faces, const std::vector<cv::Rect> &rectList, const std::vector<int> &rweights);
 878
 879             int m_rows;
 880             int m_cols;
 881             int m_flags;
 882             int m_loopcount;
 883             int m_nodenum;
 884             bool findBiggestObject;
 885             bool initialized;
 886             double m_scaleFactor;
 887             Size m_minSize;
 888             Size m_maxSize;
 889             vector<CvSize> sizev;
 890             vector<float> scalev;
 891             oclMat gimg1, gsum, gsqsum;
 892             void * buffers;
 893         };
 894
 895
 896         /////////////////////////////// Pyramid /////////////////////////////////////
 897         CV_EXPORTS void pyrDown(const oclMat &src, oclMat &dst);
 898
 899         //! upsamples the source image and then smoothes it
 900         CV_EXPORTS void pyrUp(const oclMat &src, oclMat &dst);
 901
 902         //! performs linear blending of two images
 903         //! to avoid accuracy errors sum of weigths shouldn't be very close to zero
 904         // supports only CV_8UC1 source type
 905         CV_EXPORTS void blendLinear(const oclMat &img1, const oclMat &img2, const oclMat &weights1, const oclMat &weights2, oclMat &result);
 906
 907         //! computes vertical sum, supports only CV_32FC1 images
 908         CV_EXPORTS void columnSum(const oclMat &src, oclMat &sum);
 909
 910         ///////////////////////////////////////// match_template /////////////////////////////////////////////////////////////
 911         struct CV_EXPORTS MatchTemplateBuf
 912         {
 913             Size user_block_size;
 914             oclMat imagef, templf;
 915             std::vector<oclMat> images;
 916             std::vector<oclMat> image_sums;
 917             std::vector<oclMat> image_sqsums;
 918         };
 919
 920         //! computes the proximity map for the raster template and the image where the template is searched for
 921         // Supports TM_SQDIFF, TM_SQDIFF_NORMED, TM_CCORR, TM_CCORR_NORMED, TM_CCOEFF, TM_CCOEFF_NORMED for type 8UC1 and 8UC4
 922         // Supports TM_SQDIFF, TM_CCORR for type 32FC1 and 32FC4
 923         CV_EXPORTS void matchTemplate(const oclMat &image, const oclMat &templ, oclMat &result, int method);
 924
 925         //! computes the proximity map for the raster template and the image where the template is searched for
 926         // Supports TM_SQDIFF, TM_SQDIFF_NORMED, TM_CCORR, TM_CCORR_NORMED, TM_CCOEFF, TM_CCOEFF_NORMED for type 8UC1 and 8UC4
 927         // Supports TM_SQDIFF, TM_CCORR for type 32FC1 and 32FC4
 928         CV_EXPORTS void matchTemplate(const oclMat &image, const oclMat &templ, oclMat &result, int method, MatchTemplateBuf &buf);
 929
 930         ///////////////////////////////////////////// Canny /////////////////////////////////////////////
 931         struct CV_EXPORTS CannyBuf;
 932         //! compute edges of the input image using Canny operator
 933         // Support CV_8UC1 only
 934         CV_EXPORTS void Canny(const oclMat &image, oclMat &edges, double low_thresh, double high_thresh, int apperture_size = 3, bool L2gradient = false);
 935         CV_EXPORTS void Canny(const oclMat &image, CannyBuf &buf, oclMat &edges, double low_thresh, double high_thresh, int apperture_size = 3, bool L2gradient = false);
 936         CV_EXPORTS void Canny(const oclMat &dx, const oclMat &dy, oclMat &edges, double low_thresh, double high_thresh, bool L2gradient = false);
 937         CV_EXPORTS void Canny(const oclMat &dx, const oclMat &dy, CannyBuf &buf, oclMat &edges, double low_thresh, double high_thresh, bool L2gradient = false);
 938
 939         struct CV_EXPORTS CannyBuf
 940         {
 941             CannyBuf() : counter(NULL) {}
 942             ~CannyBuf()
 943             {
 944                 release();
 945             }
 946             explicit CannyBuf(const Size &image_size, int apperture_size = 3) : counter(NULL)
 947             {
 948                 create(image_size, apperture_size);
 949             }
 950             CannyBuf(const oclMat &dx_, const oclMat &dy_);
 951
 952             void create(const Size &image_size, int apperture_size = 3);
 953             void release();
 954             oclMat dx, dy;
 955             oclMat dx_buf, dy_buf;
 956             oclMat edgeBuf;
 957             oclMat trackBuf1, trackBuf2;
 958             void *counter;
 959             Ptr<FilterEngine_GPU> filterDX, filterDY;
 960         };
 961
 962         ///////////////////////////////////////// clAmdFft related /////////////////////////////////////////
 963         //! Performs a forward or inverse discrete Fourier transform (1D or 2D) of floating point matrix.
 964         //! Param dft_size is the size of DFT transform.
 965         //!
 966         //! For complex-to-real transform it is assumed that the source matrix is packed in CLFFT's format.
 967         // support src type of CV32FC1, CV32FC2
 968         // support flags: DFT_INVERSE, DFT_REAL_OUTPUT, DFT_COMPLEX_OUTPUT, DFT_ROWS
 969         // dft_size is the size of original input, which is used for transformation from complex to real.
 970         // dft_size must be powers of 2, 3 and 5
 971         // real to complex dft requires at least v1.8 clAmdFft
 972         // real to complex dft output is not the same with cpu version
 973         // real to complex and complex to real does not support DFT_ROWS
 974         CV_EXPORTS void dft(const oclMat &src, oclMat &dst, Size dft_size = Size(0, 0), int flags = 0);
 975
 976         //! implements generalized matrix product algorithm GEMM from BLAS
 977         // The functionality requires clAmdBlas library
 978         // only support type CV_32FC1
 979         // flag GEMM_3_T is not supported
 980         CV_EXPORTS void gemm(const oclMat &src1, const oclMat &src2, double alpha,
 981                              const oclMat &src3, double beta, oclMat &dst, int flags = 0);
 982
 983         //////////////// HOG (Histogram-of-Oriented-Gradients) Descriptor and Object Detector //////////////
 984         struct CV_EXPORTS HOGDescriptor
 985         {
 986             enum { DEFAULT_WIN_SIGMA = -1 };
 987             enum { DEFAULT_NLEVELS = 64 };
 988             enum { DESCR_FORMAT_ROW_BY_ROW, DESCR_FORMAT_COL_BY_COL };
 989             HOGDescriptor(Size win_size = Size(64, 128), Size block_size = Size(16, 16),
 990                           Size block_stride = Size(8, 8), Size cell_size = Size(8, 8),
 991                           int nbins = 9, double win_sigma = DEFAULT_WIN_SIGMA,
 992                           double threshold_L2hys = 0.2, bool gamma_correction = true,
 993                           int nlevels = DEFAULT_NLEVELS);
 994
 995             size_t getDescriptorSize() const;
 996             size_t getBlockHistogramSize() const;
 997             void setSVMDetector(const vector<float> &detector);
 998             static vector<float> getDefaultPeopleDetector();
 999             static vector<float> getPeopleDetector48x96();
1000             static vector<float> getPeopleDetector64x128();
1001             void detect(const oclMat &img, vector<Point> &found_locations,
1002                         double hit_threshold = 0, Size win_stride = Size(),
1003                         Size padding = Size());
1004             void detectMultiScale(const oclMat &img, vector<Rect> &found_locations,
1005                                   double hit_threshold = 0, Size win_stride = Size(),
1006                                   Size padding = Size(), double scale0 = 1.05,
1007                                   int group_threshold = 2);
1008             void getDescriptors(const oclMat &img, Size win_stride,
1009                                 oclMat &descriptors,
1010                                 int descr_format = DESCR_FORMAT_COL_BY_COL);
1011             Size win_size;
1012             Size block_size;
1013             Size block_stride;
1014             Size cell_size;
1015
1016             int nbins;
1017             double win_sigma;
1018             double threshold_L2hys;
1019             bool gamma_correction;
1020             int nlevels;
1021
1022         protected:
1023             // initialize buffers; only need to do once in case of multiscale detection
1024             void init_buffer(const oclMat &img, Size win_stride);
1025             void computeBlockHistograms(const oclMat &img);
1026             void computeGradient(const oclMat &img, oclMat &grad, oclMat &qangle);
1027             double getWinSigma() const;
1028             bool checkDetectorSize() const;
1029
1030             static int numPartsWithin(int size, int part_size, int stride);
1031             static Size numPartsWithin(Size size, Size part_size, Size stride);
1032
1033             // Coefficients of the separating plane
1034             float free_coef;
1035             oclMat detector;
1036             // Results of the last classification step
1037             oclMat labels;
1038             Mat labels_host;
1039             // Results of the last histogram evaluation step
1040             oclMat block_hists;
1041             // Gradients conputation results
1042             oclMat grad, qangle;
1043             // scaled image
1044             oclMat image_scale;
1045             // effect size of input image (might be different from original size after scaling)
1046             Size effect_size;
1047         };
1048
1049
1050         ////////////////////////feature2d_ocl/////////////////
1051         /****************************************************************************************\
1052         *                                      Distance                                          *
1053         \****************************************************************************************/
1054         template<typename T>
1055         struct CV_EXPORTS Accumulator
1056         {
1057             typedef T Type;
1058         };
1059         template<> struct Accumulator<unsigned char>
1060         {
1061             typedef float Type;
1062         };
1063         template<> struct Accumulator<unsigned short>
1064         {
1065             typedef float Type;
1066         };
1067         template<> struct Accumulator<char>
1068         {
1069             typedef float Type;
1070         };
1071         template<> struct Accumulator<short>
1072         {
1073             typedef float Type;
1074         };
1075
1076         /*
1077          * Manhattan distance (city block distance) functor
1078          */
1079         template<class T>
1080         struct CV_EXPORTS L1
1081         {
1082             enum { normType = NORM_L1 };
1083             typedef T ValueType;
1084             typedef typename Accumulator<T>::Type ResultType;
1085
1086             ResultType operator()( const T *a, const T *b, int size ) const
1087             {
1088                 return normL1<ValueType, ResultType>(a, b, size);
1089             }
1090         };
1091
1092         /*
1093          * Euclidean distance functor
1094          */
1095         template<class T>
1096         struct CV_EXPORTS L2
1097         {
1098             enum { normType = NORM_L2 };
1099             typedef T ValueType;
1100             typedef typename Accumulator<T>::Type ResultType;
1101
1102             ResultType operator()( const T *a, const T *b, int size ) const
1103             {
1104                 return (ResultType)sqrt((double)normL2Sqr<ValueType, ResultType>(a, b, size));
1105             }
1106         };
1107
1108         /*
1109          * Hamming distance functor - counts the bit differences between two strings - useful for the Brief descriptor
1110          * bit count of A exclusive XOR'ed with B
1111          */
1112         struct CV_EXPORTS Hamming
1113         {
1114             enum { normType = NORM_HAMMING };
1115             typedef unsigned char ValueType;
1116             typedef int ResultType;
1117
1118             /** this will count the bits in a ^ b
1119              */
1120             ResultType operator()( const unsigned char *a, const unsigned char *b, int size ) const
1121             {
1122                 return normHamming(a, b, size);
1123             }
1124         };
1125
1126         ////////////////////////////////// BruteForceMatcher //////////////////////////////////
1127
1128         class CV_EXPORTS BruteForceMatcher_OCL_base
1129         {
1130         public:
1131             enum DistType {L1Dist = 0, L2Dist, HammingDist};
1132             explicit BruteForceMatcher_OCL_base(DistType distType = L2Dist);
1133             // Add descriptors to train descriptor collection
1134             void add(const std::vector<oclMat> &descCollection);
1135             // Get train descriptors collection
1136             const std::vector<oclMat> &getTrainDescriptors() const;
1137             // Clear train descriptors collection
1138             void clear();
1139             // Return true if there are not train descriptors in collection
1140             bool empty() const;
1141
1142             // Return true if the matcher supports mask in match methods
1143             bool isMaskSupported() const;
1144
1145             // Find one best match for each query descriptor
1146             void matchSingle(const oclMat &query, const oclMat &train,
1147                              oclMat &trainIdx, oclMat &distance,
1148                              const oclMat &mask = oclMat());
1149
1150             // Download trainIdx and distance and convert it to CPU vector with DMatch
1151             static void matchDownload(const oclMat &trainIdx, const oclMat &distance, std::vector<DMatch> &matches);
1152             // Convert trainIdx and distance to vector with DMatch
1153             static void matchConvert(const Mat &trainIdx, const Mat &distance, std::vector<DMatch> &matches);
1154
1155             // Find one best match for each query descriptor
1156             void match(const oclMat &query, const oclMat &train, std::vector<DMatch> &matches, const oclMat &mask = oclMat());
1157
1158             // Make gpu collection of trains and masks in suitable format for matchCollection function
1159             void makeGpuCollection(oclMat &trainCollection, oclMat &maskCollection, const std::vector<oclMat> &masks = std::vector<oclMat>());
1160
1161
1162             // Find one best match from train collection for each query descriptor
1163             void matchCollection(const oclMat &query, const oclMat &trainCollection,
1164                                  oclMat &trainIdx, oclMat &imgIdx, oclMat &distance,
1165                                  const oclMat &masks = oclMat());
1166
1167             // Download trainIdx, imgIdx and distance and convert it to vector with DMatch
1168             static void matchDownload(const oclMat &trainIdx, const oclMat &imgIdx, const oclMat &distance, std::vector<DMatch> &matches);
1169             // Convert trainIdx, imgIdx and distance to vector with DMatch
1170             static void matchConvert(const Mat &trainIdx, const Mat &imgIdx, const Mat &distance, std::vector<DMatch> &matches);
1171
1172             // Find one best match from train collection for each query descriptor.
1173             void match(const oclMat &query, std::vector<DMatch> &matches, const std::vector<oclMat> &masks = std::vector<oclMat>());
1174
1175             // Find k best matches for each query descriptor (in increasing order of distances)
1176             void knnMatchSingle(const oclMat &query, const oclMat &train,
1177                                 oclMat &trainIdx, oclMat &distance, oclMat &allDist, int k,
1178                                 const oclMat &mask = oclMat());
1179
1180             // Download trainIdx and distance and convert it to vector with DMatch
1181             // compactResult is used when mask is not empty. If compactResult is false matches
1182             // vector will have the same size as queryDescriptors rows. If compactResult is true
1183             // matches vector will not contain matches for fully masked out query descriptors.
1184             static void knnMatchDownload(const oclMat &trainIdx, const oclMat &distance,
1185                                          std::vector< std::vector<DMatch> > &matches, bool compactResult = false);
1186
1187             // Convert trainIdx and distance to vector with DMatch
1188             static void knnMatchConvert(const Mat &trainIdx, const Mat &distance,
1189                                         std::vector< std::vector<DMatch> > &matches, bool compactResult = false);
1190
1191             // Find k best matches for each query descriptor (in increasing order of distances).
1192             // compactResult is used when mask is not empty. If compactResult is false matches
1193             // vector will have the same size as queryDescriptors rows. If compactResult is true
1194             // matches vector will not contain matches for fully masked out query descriptors.
1195             void knnMatch(const oclMat &query, const oclMat &train,
1196                           std::vector< std::vector<DMatch> > &matches, int k, const oclMat &mask = oclMat(),
1197                           bool compactResult = false);
1198
1199             // Find k best matches from train collection for each query descriptor (in increasing order of distances)
1200             void knnMatch2Collection(const oclMat &query, const oclMat &trainCollection,
1201                                      oclMat &trainIdx, oclMat &imgIdx, oclMat &distance,
1202                                      const oclMat &maskCollection = oclMat());
1203
1204             // Download trainIdx and distance and convert it to vector with DMatch
1205             // compactResult is used when mask is not empty. If compactResult is false matches
1206             // vector will have the same size as queryDescriptors rows. If compactResult is true
1207             // matches vector will not contain matches for fully masked out query descriptors.
1208             static void knnMatch2Download(const oclMat &trainIdx, const oclMat &imgIdx, const oclMat &distance,
1209                                           std::vector< std::vector<DMatch> > &matches, bool compactResult = false);
1210
1211             // Convert trainIdx and distance to vector with DMatch
1212             static void knnMatch2Convert(const Mat &trainIdx, const Mat &imgIdx, const Mat &distance,
1213                                          std::vector< std::vector<DMatch> > &matches, bool compactResult = false);
1214
1215             // Find k best matches  for each query descriptor (in increasing order of distances).
1216             // compactResult is used when mask is not empty. If compactResult is false matches
1217             // vector will have the same size as queryDescriptors rows. If compactResult is true
1218             // matches vector will not contain matches for fully masked out query descriptors.
1219             void knnMatch(const oclMat &query, std::vector< std::vector<DMatch> > &matches, int k,
1220                           const std::vector<oclMat> &masks = std::vector<oclMat>(), bool compactResult = false);
1221
1222             // Find best matches for each query descriptor which have distance less than maxDistance.
1223             // nMatches.at<int>(0, queryIdx) will contain matches count for queryIdx.
1224             // carefully nMatches can be greater than trainIdx.cols - it means that matcher didn't find all matches,
1225             // because it didn't have enough memory.
1226             // If trainIdx is empty, then trainIdx and distance will be created with size nQuery x max((nTrain / 100), 10),
1227             // otherwize user can pass own allocated trainIdx and distance with size nQuery x nMaxMatches
1228             // Matches doesn't sorted.
1229             void radiusMatchSingle(const oclMat &query, const oclMat &train,
1230                                    oclMat &trainIdx, oclMat &distance, oclMat &nMatches, float maxDistance,
1231                                    const oclMat &mask = oclMat());
1232
1233             // Download trainIdx, nMatches and distance and convert it to vector with DMatch.
1234             // matches will be sorted in increasing order of distances.
1235             // compactResult is used when mask is not empty. If compactResult is false matches
1236             // vector will have the same size as queryDescriptors rows. If compactResult is true
1237             // matches vector will not contain matches for fully masked out query descriptors.
1238             static void radiusMatchDownload(const oclMat &trainIdx, const oclMat &distance, const oclMat &nMatches,
1239                                             std::vector< std::vector<DMatch> > &matches, bool compactResult = false);
1240             // Convert trainIdx, nMatches and distance to vector with DMatch.
1241             static void radiusMatchConvert(const Mat &trainIdx, const Mat &distance, const Mat &nMatches,
1242                                            std::vector< std::vector<DMatch> > &matches, bool compactResult = false);
1243             // Find best matches for each query descriptor which have distance less than maxDistance
1244             // in increasing order of distances).
1245             void radiusMatch(const oclMat &query, const oclMat &train,
1246                              std::vector< std::vector<DMatch> > &matches, float maxDistance,
1247                              const oclMat &mask = oclMat(), bool compactResult = false);
1248             // Find best matches for each query descriptor which have distance less than maxDistance.
1249             // If trainIdx is empty, then trainIdx and distance will be created with size nQuery x max((nQuery / 100), 10),
1250             // otherwize user can pass own allocated trainIdx and distance with size nQuery x nMaxMatches
1251             // Matches doesn't sorted.
1252             void radiusMatchCollection(const oclMat &query, oclMat &trainIdx, oclMat &imgIdx, oclMat &distance, oclMat &nMatches, float maxDistance,
1253                                        const std::vector<oclMat> &masks = std::vector<oclMat>());
1254             // Download trainIdx, imgIdx, nMatches and distance and convert it to vector with DMatch.
1255             // matches will be sorted in increasing order of distances.
1256             // compactResult is used when mask is not empty. If compactResult is false matches
1257             // vector will have the same size as queryDescriptors rows. If compactResult is true
1258             // matches vector will not contain matches for fully masked out query descriptors.
1259             static void radiusMatchDownload(const oclMat &trainIdx, const oclMat &imgIdx, const oclMat &distance, const oclMat &nMatches,
1260                                             std::vector< std::vector<DMatch> > &matches, bool compactResult = false);
1261             // Convert trainIdx, nMatches and distance to vector with DMatch.
1262             static void radiusMatchConvert(const Mat &trainIdx, const Mat &imgIdx, const Mat &distance, const Mat &nMatches,
1263                                            std::vector< std::vector<DMatch> > &matches, bool compactResult = false);
1264             // Find best matches from train collection for each query descriptor which have distance less than
1265             // maxDistance (in increasing order of distances).
1266             void radiusMatch(const oclMat &query, std::vector< std::vector<DMatch> > &matches, float maxDistance,
1267                              const std::vector<oclMat> &masks = std::vector<oclMat>(), bool compactResult = false);
1268             DistType distType;
1269         private:
1270             std::vector<oclMat> trainDescCollection;
1271         };
1272
1273         template <class Distance>
1274         class CV_EXPORTS BruteForceMatcher_OCL;
1275
1276         template <typename T>
1277         class CV_EXPORTS BruteForceMatcher_OCL< L1<T> > : public BruteForceMatcher_OCL_base
1278         {
1279         public:
1280             explicit BruteForceMatcher_OCL() : BruteForceMatcher_OCL_base(L1Dist) {}
1281             explicit BruteForceMatcher_OCL(L1<T> /*d*/) : BruteForceMatcher_OCL_base(L1Dist) {}
1282         };
1283
1284         template <typename T>
1285         class CV_EXPORTS BruteForceMatcher_OCL< L2<T> > : public BruteForceMatcher_OCL_base
1286         {
1287         public:
1288             explicit BruteForceMatcher_OCL() : BruteForceMatcher_OCL_base(L2Dist) {}
1289             explicit BruteForceMatcher_OCL(L2<T> /*d*/) : BruteForceMatcher_OCL_base(L2Dist) {}
1290         };
1291
1292         template <> class CV_EXPORTS BruteForceMatcher_OCL< Hamming > : public BruteForceMatcher_OCL_base
1293         {
1294         public:
1295             explicit BruteForceMatcher_OCL() : BruteForceMatcher_OCL_base(HammingDist) {}
1296             explicit BruteForceMatcher_OCL(Hamming /*d*/) : BruteForceMatcher_OCL_base(HammingDist) {}
1297         };
1298
1299         class CV_EXPORTS BFMatcher_OCL : public BruteForceMatcher_OCL_base
1300         {
1301         public:
1302             explicit BFMatcher_OCL(int norm = NORM_L2) : BruteForceMatcher_OCL_base(norm == NORM_L1 ? L1Dist : norm == NORM_L2 ? L2Dist : HammingDist) {}
1303         };
1304
1305         class CV_EXPORTS GoodFeaturesToTrackDetector_OCL
1306         {
1307         public:
1308             explicit GoodFeaturesToTrackDetector_OCL(int maxCorners = 1000, double qualityLevel = 0.01, double minDistance = 0.0,
1309                 int blockSize = 3, bool useHarrisDetector = false, double harrisK = 0.04);
1310
1311             //! return 1 rows matrix with CV_32FC2 type
1312             void operator ()(const oclMat& image, oclMat& corners, const oclMat& mask = oclMat());
1313             //! download points of type Point2f to a vector. the vector's content will be erased
1314             void downloadPoints(const oclMat &points, vector<Point2f> &points_v);
1315
1316             int maxCorners;
1317             double qualityLevel;
1318             double minDistance;
1319
1320             int blockSize;
1321             bool useHarrisDetector;
1322             double harrisK;
1323             void releaseMemory()
1324             {
1325                 Dx_.release();
1326                 Dy_.release();
1327                 eig_.release();
1328                 minMaxbuf_.release();
1329                 tmpCorners_.release();
1330             }
1331         private:
1332             oclMat Dx_;
1333             oclMat Dy_;
1334             oclMat eig_;
1335             oclMat minMaxbuf_;
1336             oclMat tmpCorners_;
1337         };
1338
1339         inline GoodFeaturesToTrackDetector_OCL::GoodFeaturesToTrackDetector_OCL(int maxCorners_, double qualityLevel_, double minDistance_,
1340             int blockSize_, bool useHarrisDetector_, double harrisK_)
1341         {
1342             maxCorners = maxCorners_;
1343             qualityLevel = qualityLevel_;
1344             minDistance = minDistance_;
1345             blockSize = blockSize_;
1346             useHarrisDetector = useHarrisDetector_;
1347             harrisK = harrisK_;
1348         }
1349
1350         /////////////////////////////// PyrLKOpticalFlow /////////////////////////////////////
1351         class CV_EXPORTS PyrLKOpticalFlow
1352         {
1353         public:
1354             PyrLKOpticalFlow()
1355             {
1356                 winSize = Size(21, 21);
1357                 maxLevel = 3;
1358                 iters = 30;
1359                 derivLambda = 0.5;
1360                 useInitialFlow = false;
1361                 minEigThreshold = 1e-4f;
1362                 getMinEigenVals = false;
1363                 isDeviceArch11_ = false;
1364             }
1365
1366             void sparse(const oclMat &prevImg, const oclMat &nextImg, const oclMat &prevPts, oclMat &nextPts,
1367                         oclMat &status, oclMat *err = 0);
1368             void dense(const oclMat &prevImg, const oclMat &nextImg, oclMat &u, oclMat &v, oclMat *err = 0);
1369             Size winSize;
1370             int maxLevel;
1371             int iters;
1372             double derivLambda;
1373             bool useInitialFlow;
1374             float minEigThreshold;
1375             bool getMinEigenVals;
1376             void releaseMemory()
1377             {
1378                 dx_calcBuf_.release();
1379                 dy_calcBuf_.release();
1380
1381                 prevPyr_.clear();
1382                 nextPyr_.clear();
1383
1384                 dx_buf_.release();
1385                 dy_buf_.release();
1386             }
1387         private:
1388             void calcSharrDeriv(const oclMat &src, oclMat &dx, oclMat &dy);
1389             void buildImagePyramid(const oclMat &img0, vector<oclMat> &pyr, bool withBorder);
1390
1391             oclMat dx_calcBuf_;
1392             oclMat dy_calcBuf_;
1393
1394             vector<oclMat> prevPyr_;
1395             vector<oclMat> nextPyr_;
1396
1397             oclMat dx_buf_;
1398             oclMat dy_buf_;
1399             oclMat uPyr_[2];
1400             oclMat vPyr_[2];
1401             bool isDeviceArch11_;
1402         };
1403         //////////////// build warping maps ////////////////////
1404         //! builds plane warping maps
1405         CV_EXPORTS void buildWarpPlaneMaps(Size src_size, Rect dst_roi, const Mat &K, const Mat &R, const Mat &T, float scale, oclMat &map_x, oclMat &map_y);
1406         //! builds cylindrical warping maps
1407         CV_EXPORTS void buildWarpCylindricalMaps(Size src_size, Rect dst_roi, const Mat &K, const Mat &R, float scale, oclMat &map_x, oclMat &map_y);
1408         //! builds spherical warping maps
1409         CV_EXPORTS void buildWarpSphericalMaps(Size src_size, Rect dst_roi, const Mat &K, const Mat &R, float scale, oclMat &map_x, oclMat &map_y);
1410         //! builds Affine warping maps
1411         CV_EXPORTS void buildWarpAffineMaps(const Mat &M, bool inverse, Size dsize, oclMat &xmap, oclMat &ymap);
1412
1413         //! builds Perspective warping maps
1414         CV_EXPORTS void buildWarpPerspectiveMaps(const Mat &M, bool inverse, Size dsize, oclMat &xmap, oclMat &ymap);
1415
1416         ///////////////////////////////////// interpolate frames //////////////////////////////////////////////
1417         //! Interpolate frames (images) using provided optical flow (displacement field).
1418         //! frame0   - frame 0 (32-bit floating point images, single channel)
1419         //! frame1   - frame 1 (the same type and size)
1420         //! fu       - forward horizontal displacement
1421         //! fv       - forward vertical displacement
1422         //! bu       - backward horizontal displacement
1423         //! bv       - backward vertical displacement
1424         //! pos      - new frame position
1425         //! newFrame - new frame
1426         //! buf      - temporary buffer, will have width x 6*height size, CV_32FC1 type and contain 6 oclMat;
1427         //!            occlusion masks            0, occlusion masks            1,
1428         //!            interpolated forward flow  0, interpolated forward flow  1,
1429         //!            interpolated backward flow 0, interpolated backward flow 1
1430         //!
1431         CV_EXPORTS void interpolateFrames(const oclMat &frame0, const oclMat &frame1,
1432                                           const oclMat &fu, const oclMat &fv,
1433                                           const oclMat &bu, const oclMat &bv,
1434                                           float pos, oclMat &newFrame, oclMat &buf);
1435
1436         //! computes moments of the rasterized shape or a vector of points
1437         CV_EXPORTS Moments ocl_moments(InputArray _array, bool binaryImage);
1438
1439         class CV_EXPORTS StereoBM_OCL
1440         {
1441         public:
1442             enum { BASIC_PRESET = 0, PREFILTER_XSOBEL = 1 };
1443
1444             enum { DEFAULT_NDISP = 64, DEFAULT_WINSZ = 19 };
1445
1446             //! the default constructor
1447             StereoBM_OCL();
1448             //! the full constructor taking the camera-specific preset, number of disparities and the SAD window size. ndisparities must be multiple of 8.
1449             StereoBM_OCL(int preset, int ndisparities = DEFAULT_NDISP, int winSize = DEFAULT_WINSZ);
1450
1451             //! the stereo correspondence operator. Finds the disparity for the specified rectified stereo pair
1452             //! Output disparity has CV_8U type.
1453             void operator() ( const oclMat &left, const oclMat &right, oclMat &disparity);
1454
1455             //! Some heuristics that tries to estmate
1456             // if current GPU will be faster then CPU in this algorithm.
1457             // It queries current active device.
1458             static bool checkIfGpuCallReasonable();
1459
1460             int preset;
1461             int ndisp;
1462             int winSize;
1463
1464             // If avergeTexThreshold  == 0 => post procesing is disabled
1465             // If avergeTexThreshold != 0 then disparity is set 0 in each point (x,y) where for left image
1466             // SumOfHorizontalGradiensInWindow(x, y, winSize) < (winSize * winSize) * avergeTexThreshold
1467             // i.e. input left image is low textured.
1468             float avergeTexThreshold;
1469         private:
1470             oclMat minSSD, leBuf, riBuf;
1471         };
1472
1473         class CV_EXPORTS StereoBeliefPropagation
1474         {
1475         public:
1476             enum { DEFAULT_NDISP  = 64 };
1477             enum { DEFAULT_ITERS  = 5  };
1478             enum { DEFAULT_LEVELS = 5  };
1479             static void estimateRecommendedParams(int width, int height, int &ndisp, int &iters, int &levels);
1480             explicit StereoBeliefPropagation(int ndisp  = DEFAULT_NDISP,
1481                                              int iters  = DEFAULT_ITERS,
1482                                              int levels = DEFAULT_LEVELS,
1483                                              int msg_type = CV_16S);
1484             StereoBeliefPropagation(int ndisp, int iters, int levels,
1485                                     float max_data_term, float data_weight,
1486                                     float max_disc_term, float disc_single_jump,
1487                                     int msg_type = CV_32F);
1488             void operator()(const oclMat &left, const oclMat &right, oclMat &disparity);
1489             void operator()(const oclMat &data, oclMat &disparity);
1490             int ndisp;
1491             int iters;
1492             int levels;
1493             float max_data_term;
1494             float data_weight;
1495             float max_disc_term;
1496             float disc_single_jump;
1497             int msg_type;
1498         private:
1499             oclMat u, d, l, r, u2, d2, l2, r2;
1500             std::vector<oclMat> datas;
1501             oclMat out;
1502         };
1503
1504         class CV_EXPORTS StereoConstantSpaceBP
1505         {
1506         public:
1507             enum { DEFAULT_NDISP    = 128 };
1508             enum { DEFAULT_ITERS    = 8   };
1509             enum { DEFAULT_LEVELS   = 4   };
1510             enum { DEFAULT_NR_PLANE = 4   };
1511             static void estimateRecommendedParams(int width, int height, int &ndisp, int &iters, int &levels, int &nr_plane);
1512             explicit StereoConstantSpaceBP(
1513                 int ndisp    = DEFAULT_NDISP,
1514                 int iters    = DEFAULT_ITERS,
1515                 int levels   = DEFAULT_LEVELS,
1516                 int nr_plane = DEFAULT_NR_PLANE,
1517                 int msg_type = CV_32F);
1518             StereoConstantSpaceBP(int ndisp, int iters, int levels, int nr_plane,
1519                 float max_data_term, float data_weight, float max_disc_term, float disc_single_jump,
1520                 int min_disp_th = 0,
1521                 int msg_type = CV_32F);
1522             void operator()(const oclMat &left, const oclMat &right, oclMat &disparity);
1523             int ndisp;
1524             int iters;
1525             int levels;
1526             int nr_plane;
1527             float max_data_term;
1528             float data_weight;
1529             float max_disc_term;
1530             float disc_single_jump;
1531             int min_disp_th;
1532             int msg_type;
1533             bool use_local_init_data_cost;
1534         private:
1535             oclMat u[2], d[2], l[2], r[2];
1536             oclMat disp_selected_pyr[2];
1537             oclMat data_cost;
1538             oclMat data_cost_selected;
1539             oclMat temp;
1540             oclMat out;
1541         };
1542
1543         // Implementation of the Zach, Pock and Bischof Dual TV-L1 Optical Flow method
1544         //
1545         // see reference:
1546         //   [1] C. Zach, T. Pock and H. Bischof, "A Duality Based Approach for Realtime TV-L1 Optical Flow".
1547         //   [2] Javier Sanchez, Enric Meinhardt-Llopis and Gabriele Facciolo. "TV-L1 Optical Flow Estimation".
1548         class CV_EXPORTS OpticalFlowDual_TVL1_OCL
1549         {
1550         public:
1551             OpticalFlowDual_TVL1_OCL();
1552
1553             void operator ()(const oclMat& I0, const oclMat& I1, oclMat& flowx, oclMat& flowy);
1554
1555             void collectGarbage();
1556
1557             /**
1558             * Time step of the numerical scheme.
1559             */
1560             double tau;
1561
1562             /**
1563             * Weight parameter for the data term, attachment parameter.
1564             * This is the most relevant parameter, which determines the smoothness of the output.
1565             * The smaller this parameter is, the smoother the solutions we obtain.
1566             * It depends on the range of motions of the images, so its value should be adapted to each image sequence.
1567             */
1568             double lambda;
1569
1570             /**
1571             * Weight parameter for (u - v)^2, tightness parameter.
1572             * It serves as a link between the attachment and the regularization terms.
1573             * In theory, it should have a small value in order to maintain both parts in correspondence.
1574             * The method is stable for a large range of values of this parameter.
1575             */
1576             double theta;
1577
1578             /**
1579             * Number of scales used to create the pyramid of images.
1580             */
1581             int nscales;
1582
1583             /**
1584             * Number of warpings per scale.
1585             * Represents the number of times that I1(x+u0) and grad( I1(x+u0) ) are computed per scale.
1586             * This is a parameter that assures the stability of the method.
1587             * It also affects the running time, so it is a compromise between speed and accuracy.
1588             */
1589             int warps;
1590
1591             /**
1592             * Stopping criterion threshold used in the numerical scheme, which is a trade-off between precision and running time.
1593             * A small value will yield more accurate solutions at the expense of a slower convergence.
1594             */
1595             double epsilon;
1596
1597             /**
1598             * Stopping criterion iterations number used in the numerical scheme.
1599             */
1600             int iterations;
1601
1602             bool useInitialFlow;
1603
1604         private:
1605             void procOneScale(const oclMat& I0, const oclMat& I1, oclMat& u1, oclMat& u2);
1606
1607             std::vector<oclMat> I0s;
1608             std::vector<oclMat> I1s;
1609             std::vector<oclMat> u1s;
1610             std::vector<oclMat> u2s;
1611
1612             oclMat I1x_buf;
1613             oclMat I1y_buf;
1614
1615             oclMat I1w_buf;
1616             oclMat I1wx_buf;
1617             oclMat I1wy_buf;
1618
1619             oclMat grad_buf;
1620             oclMat rho_c_buf;
1621
1622             oclMat p11_buf;
1623             oclMat p12_buf;
1624             oclMat p21_buf;
1625             oclMat p22_buf;
1626
1627             oclMat diff_buf;
1628             oclMat norm_buf;
1629         };
1630     }
1631 }
1632 #if defined _MSC_VER && _MSC_VER >= 1200
1633 #  pragma warning( push)
1634 #  pragma warning( disable: 4267)
1635 #endif
1636 #include "opencv2/ocl/matrix_operations.hpp"
1637 #if defined _MSC_VER && _MSC_VER >= 1200
1638 #  pragma warning( pop)
1639 #endif
1640
1641 #endif /* __OPENCV_GPU_HPP__ */