modules/ocl/include/opencv2/ocl/ocl.hpp

   1 /*M///////////////////////////////////////////////////////////////////////////////////////
   2 //
   3 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
   4 //
   5 //  By downloading, copying, installing or using the software you agree to this license.
   6 //  If you do not agree to this license, do not download, install,
   7 //  copy or use the software.
   8 //
   9 //
  10 //                           License Agreement
  11 //                For Open Source Computer Vision Library
  12 //
  13 // Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
  14 // Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
  15 // Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
  16 // Third party copyrights are property of their respective owners.
  17 //
  18 // Redistribution and use in source and binary forms, with or without modification,
  19 // are permitted provided that the following conditions are met:
  20 //
  21 //   * Redistribution's of source code must retain the above copyright notice,
  22 //     this list of conditions and the following disclaimer.
  23 //
  24 //   * Redistribution's in binary form must reproduce the above copyright notice,
  25 //     this list of conditions and the following disclaimer in the documentation
  26 //     and/or other oclMaterials provided with the distribution.
  27 //
  28 //   * The name of the copyright holders may not be used to endorse or promote products
  29 //     derived from this software without specific prior written permission.
  30 //
  31 // This software is provided by the copyright holders and contributors "as is" and
  32 // any express or implied warranties, including, but not limited to, the implied
  33 // warranties of merchantability and fitness for a particular purpose are disclaimed.
  34 // In no event shall the Intel Corporation or contributors be liable for any direct,
  35 // indirect, incidental, special, exemplary, or consequential damages
  36 // (including, but not limited to, procurement of substitute goods or services;
  37 // loss of use, data, or profits; or business interruption) however caused
  38 // and on any theory of liability, whether in contract, strict liability,
  39 // or tort (including negligence or otherwise) arising in any way out of
  40 // the use of this software, even if advised of the possibility of such damage.
  41 //
  42 //M*/
  43
  44 #ifndef __OPENCV_OCL_HPP__
  45 #define __OPENCV_OCL_HPP__
  46
  47 #include <memory>
  48 #include <vector>
  49
  50 #include "opencv2/core/core.hpp"
  51 #include "opencv2/imgproc/imgproc.hpp"
  52 #include "opencv2/objdetect/objdetect.hpp"
  53 #include "opencv2/features2d/features2d.hpp"
  54
  55 namespace cv
  56 {
  57     namespace ocl
  58     {
  59         using std::auto_ptr;
  60         enum
  61         {
  62             CVCL_DEVICE_TYPE_DEFAULT     = (1 << 0),
  63             CVCL_DEVICE_TYPE_CPU         = (1 << 1),
  64             CVCL_DEVICE_TYPE_GPU         = (1 << 2),
  65             CVCL_DEVICE_TYPE_ACCELERATOR = (1 << 3),
  66             //CVCL_DEVICE_TYPE_CUSTOM      = (1 << 4)
  67             CVCL_DEVICE_TYPE_ALL         = 0xFFFFFFFF
  68         };
  69
  70         enum DevMemRW
  71         {
  72             DEVICE_MEM_R_W = 0,
  73             DEVICE_MEM_R_ONLY,
  74             DEVICE_MEM_W_ONLY
  75         };
  76
  77         enum DevMemType
  78         {
  79             DEVICE_MEM_DEFAULT = 0,
  80             DEVICE_MEM_AHP,         //alloc host pointer
  81             DEVICE_MEM_UHP,         //use host pointer
  82             DEVICE_MEM_CHP,         //copy host pointer
  83             DEVICE_MEM_PM           //persistent memory
  84         };
  85
  86         //Get the global device memory and read/write type
  87         //return 1 if unified memory system supported, otherwise return 0
  88         CV_EXPORTS int getDevMemType(DevMemRW& rw_type, DevMemType& mem_type);
  89
  90         //Set the global device memory and read/write type,
  91         //the newly generated oclMat will all use this type
  92         //return -1 if the target type is unsupported, otherwise return 0
  93         CV_EXPORTS int setDevMemType(DevMemRW rw_type = DEVICE_MEM_R_W, DevMemType mem_type = DEVICE_MEM_DEFAULT);
  94
  95         //this class contains ocl runtime information
  96         class CV_EXPORTS Info
  97         {
  98         public:
  99             struct Impl;
 100             Impl *impl;
 101
 102             Info();
 103             Info(const Info &m);
 104             ~Info();
 105             void release();
 106             Info &operator = (const Info &m);
 107             std::vector<string> DeviceName;
 108         };
 109         //////////////////////////////// Initialization & Info ////////////////////////
 110         //this function may be obsoleted
 111         //CV_EXPORTS cl_device_id getDevice();
 112         //the function must be called before any other cv::ocl::functions, it initialize ocl runtime
 113         //each Info relates to an OpenCL platform
 114         //there is one or more devices in each platform, each one has a separate name
 115         CV_EXPORTS int getDevice(std::vector<Info> &oclinfo, int devicetype = CVCL_DEVICE_TYPE_GPU);
 116
 117         //set device you want to use, optional function after getDevice be called
 118         //the devnum is the index of the selected device in DeviceName vector of INfo
 119         CV_EXPORTS void setDevice(Info &oclinfo, int devnum = 0);
 120
 121         //The two functions below enable other opencl program to use ocl module's cl_context and cl_command_queue
 122         //returns cl_context *
 123         CV_EXPORTS void* getoclContext();
 124         //returns cl_command_queue *
 125         CV_EXPORTS void* getoclCommandQueue();
 126
 127         //explicit call clFinish. The global command queue will be used.
 128         CV_EXPORTS void finish();
 129
 130         //this function enable ocl module to use customized cl_context and cl_command_queue
 131         //getDevice also need to be called before this function
 132         CV_EXPORTS void setDeviceEx(Info &oclinfo, void *ctx, void *qu, int devnum = 0);
 133
 134         //returns true when global OpenCL context is initialized
 135         CV_EXPORTS bool initialized();
 136
 137         //////////////////////////////// Error handling ////////////////////////
 138         CV_EXPORTS void error(const char *error_string, const char *file, const int line, const char *func);
 139
 140         //////////////////////////////// OpenCL context ////////////////////////
 141         //This is a global singleton class used to represent a OpenCL context.
 142         class CV_EXPORTS Context
 143         {
 144         protected:
 145             Context();
 146             friend class auto_ptr<Context>;
 147             friend bool initialized();
 148         private:
 149             static auto_ptr<Context> clCxt;
 150             static int val;
 151         public:
 152             ~Context();
 153             void release();
 154             Info::Impl* impl;
 155
 156             static Context* getContext();
 157             static void setContext(Info &oclinfo);
 158
 159             enum {CL_DOUBLE, CL_UNIFIED_MEM, CL_VER_1_2};
 160             bool supportsFeature(int ftype);
 161             size_t computeUnits();
 162             void* oclContext();
 163             void* oclCommandQueue();
 164         };
 165
 166         //! Calls a kernel, by string. Pass globalThreads = NULL, and cleanUp = true, to finally clean-up without executing.
 167         CV_EXPORTS double openCLExecuteKernelInterop(Context *clCxt ,
 168                                                         const char **source, string kernelName,
 169                                                         size_t globalThreads[3], size_t localThreads[3],
 170                                                         std::vector< std::pair<size_t, const void *> > &args,
 171                                                         int channels, int depth, const char *build_options,
 172                                                         bool finish = true, bool measureKernelTime = false,
 173                                                         bool cleanUp = true);
 174
 175         //! Calls a kernel, by file. Pass globalThreads = NULL, and cleanUp = true, to finally clean-up without executing.
 176         CV_EXPORTS double openCLExecuteKernelInterop(Context *clCxt ,
 177                                                         const char **fileName, const int numFiles, string kernelName,
 178                                                         size_t globalThreads[3], size_t localThreads[3],
 179                                                         std::vector< std::pair<size_t, const void *> > &args,
 180                                                         int channels, int depth, const char *build_options,
 181                                                         bool finish = true, bool measureKernelTime = false,
 182                                                         bool cleanUp = true);
 183
 184         //! Enable or disable OpenCL program binary caching onto local disk
 185         // After a program (*.cl files in opencl/ folder) is built at runtime, we allow the
 186         // compiled OpenCL program to be cached to the path automatically as "path/*.clb"
 187         // binary file, which will be reused when the OpenCV executable is started again.
 188         //
 189         // Caching mode is controlled by the following enums
 190         // Notes
 191         //   1. the feature is by default enabled when OpenCV is built in release mode.
 192         //   2. the CACHE_DEBUG / CACHE_RELEASE flags only effectively work with MSVC compiler;
 193         //      for GNU compilers, the function always treats the build as release mode (enabled by default).
 194         enum
 195         {
 196             CACHE_NONE    = 0,        // do not cache OpenCL binary
 197             CACHE_DEBUG   = 0x1 << 0, // cache OpenCL binary when built in debug mode (only work with MSVC)
 198             CACHE_RELEASE = 0x1 << 1, // default behavior, only cache when built in release mode (only work with MSVC)
 199             CACHE_ALL     = CACHE_DEBUG | CACHE_RELEASE, // always cache opencl binary
 200             CACHE_UPDATE  = 0x1 << 2  // if the binary cache file with the same name is already on the disk, it will be updated.
 201         };
 202         CV_EXPORTS void setBinaryDiskCache(int mode = CACHE_RELEASE, cv::String path = "./");
 203
 204         //! set where binary cache to be saved to
 205         CV_EXPORTS void setBinpath(const char *path);
 206
 207         class CV_EXPORTS oclMatExpr;
 208         //////////////////////////////// oclMat ////////////////////////////////
 209         class CV_EXPORTS oclMat
 210         {
 211         public:
 212             //! default constructor
 213             oclMat();
 214             //! constructs oclMatrix of the specified size and type (_type is CV_8UC1, CV_64FC3, CV_32SC(12) etc.)
 215             oclMat(int rows, int cols, int type);
 216             oclMat(Size size, int type);
 217             //! constucts oclMatrix and fills it with the specified value _s.
 218             oclMat(int rows, int cols, int type, const Scalar &s);
 219             oclMat(Size size, int type, const Scalar &s);
 220             //! copy constructor
 221             oclMat(const oclMat &m);
 222
 223             //! constructor for oclMatrix headers pointing to user-allocated data
 224             oclMat(int rows, int cols, int type, void *data, size_t step = Mat::AUTO_STEP);
 225             oclMat(Size size, int type, void *data, size_t step = Mat::AUTO_STEP);
 226
 227             //! creates a matrix header for a part of the bigger matrix
 228             oclMat(const oclMat &m, const Range &rowRange, const Range &colRange);
 229             oclMat(const oclMat &m, const Rect &roi);
 230
 231             //! builds oclMat from Mat. Perfom blocking upload to device.
 232             explicit oclMat (const Mat &m);
 233
 234             //! destructor - calls release()
 235             ~oclMat();
 236
 237             //! assignment operators
 238             oclMat &operator = (const oclMat &m);
 239             //! assignment operator. Perfom blocking upload to device.
 240             oclMat &operator = (const Mat &m);
 241             oclMat &operator = (const oclMatExpr& expr);
 242
 243             //! pefroms blocking upload data to oclMat.
 244             void upload(const cv::Mat &m);
 245
 246
 247             //! downloads data from device to host memory. Blocking calls.
 248             operator Mat() const;
 249             void download(cv::Mat &m) const;
 250
 251             //! convert to _InputArray
 252             operator _InputArray();
 253
 254             //! convert to _OutputArray
 255             operator _OutputArray();
 256
 257             //! returns a new oclMatrix header for the specified row
 258             oclMat row(int y) const;
 259             //! returns a new oclMatrix header for the specified column
 260             oclMat col(int x) const;
 261             //! ... for the specified row span
 262             oclMat rowRange(int startrow, int endrow) const;
 263             oclMat rowRange(const Range &r) const;
 264             //! ... for the specified column span
 265             oclMat colRange(int startcol, int endcol) const;
 266             oclMat colRange(const Range &r) const;
 267
 268             //! returns deep copy of the oclMatrix, i.e. the data is copied
 269             oclMat clone() const;
 270             //! copies the oclMatrix content to "m".
 271             // It calls m.create(this->size(), this->type()).
 272             // It supports any data type
 273             void copyTo( oclMat &m ) const;
 274             //! copies those oclMatrix elements to "m" that are marked with non-zero mask elements.
 275             //It supports 8UC1 8UC4 32SC1 32SC4 32FC1 32FC4
 276             void copyTo( oclMat &m, const oclMat &mask ) const;
 277             //! converts oclMatrix to another datatype with optional scalng. See cvConvertScale.
 278             //It supports 8UC1 8UC4 32SC1 32SC4 32FC1 32FC4
 279             void convertTo( oclMat &m, int rtype, double alpha = 1, double beta = 0 ) const;
 280
 281             void assignTo( oclMat &m, int type = -1 ) const;
 282
 283             //! sets every oclMatrix element to s
 284             //It supports 8UC1 8UC4 32SC1 32SC4 32FC1 32FC4
 285             oclMat& operator = (const Scalar &s);
 286             //! sets some of the oclMatrix elements to s, according to the mask
 287             //It supports 8UC1 8UC4 32SC1 32SC4 32FC1 32FC4
 288             oclMat& setTo(const Scalar &s, const oclMat &mask = oclMat());
 289             //! creates alternative oclMatrix header for the same data, with different
 290             // number of channels and/or different number of rows. see cvReshape.
 291             oclMat reshape(int cn, int rows = 0) const;
 292
 293             //! allocates new oclMatrix data unless the oclMatrix already has specified size and type.
 294             // previous data is unreferenced if needed.
 295             void create(int rows, int cols, int type);
 296             void create(Size size, int type);
 297
 298             //! allocates new oclMatrix with specified device memory type.
 299             void createEx(int rows, int cols, int type, DevMemRW rw_type, DevMemType mem_type);
 300             void createEx(Size size, int type, DevMemRW rw_type, DevMemType mem_type);
 301
 302             //! decreases reference counter;
 303             // deallocate the data when reference counter reaches 0.
 304             void release();
 305
 306             //! swaps with other smart pointer
 307             void swap(oclMat &mat);
 308
 309             //! locates oclMatrix header within a parent oclMatrix. See below
 310             void locateROI( Size &wholeSize, Point &ofs ) const;
 311             //! moves/resizes the current oclMatrix ROI inside the parent oclMatrix.
 312             oclMat& adjustROI( int dtop, int dbottom, int dleft, int dright );
 313             //! extracts a rectangular sub-oclMatrix
 314             // (this is a generalized form of row, rowRange etc.)
 315             oclMat operator()( Range rowRange, Range colRange ) const;
 316             oclMat operator()( const Rect &roi ) const;
 317
 318             oclMat& operator+=( const oclMat& m );
 319             oclMat& operator-=( const oclMat& m );
 320             oclMat& operator*=( const oclMat& m );
 321             oclMat& operator/=( const oclMat& m );
 322
 323             //! returns true if the oclMatrix data is continuous
 324             // (i.e. when there are no gaps between successive rows).
 325             // similar to CV_IS_oclMat_CONT(cvoclMat->type)
 326             bool isContinuous() const;
 327             //! returns element size in bytes,
 328             // similar to CV_ELEM_SIZE(cvMat->type)
 329             size_t elemSize() const;
 330             //! returns the size of element channel in bytes.
 331             size_t elemSize1() const;
 332             //! returns element type, similar to CV_MAT_TYPE(cvMat->type)
 333             int type() const;
 334             //! returns element type, i.e. 8UC3 returns 8UC4 because in ocl
 335             //! 3 channels element actually use 4 channel space
 336             int ocltype() const;
 337             //! returns element type, similar to CV_MAT_DEPTH(cvMat->type)
 338             int depth() const;
 339             //! returns element type, similar to CV_MAT_CN(cvMat->type)
 340             int channels() const;
 341             //! returns element type, return 4 for 3 channels element,
 342             //!becuase 3 channels element actually use 4 channel space
 343             int oclchannels() const;
 344             //! returns step/elemSize1()
 345             size_t step1() const;
 346             //! returns oclMatrix size:
 347             // width == number of columns, height == number of rows
 348             Size size() const;
 349             //! returns true if oclMatrix data is NULL
 350             bool empty() const;
 351
 352             //! returns pointer to y-th row
 353             uchar* ptr(int y = 0);
 354             const uchar *ptr(int y = 0) const;
 355
 356             //! template version of the above method
 357             template<typename _Tp> _Tp *ptr(int y = 0);
 358             template<typename _Tp> const _Tp *ptr(int y = 0) const;
 359
 360             //! matrix transposition
 361             oclMat t() const;
 362
 363             /*! includes several bit-fields:
 364               - the magic signature
 365               - continuity flag
 366               - depth
 367               - number of channels
 368               */
 369             int flags;
 370             //! the number of rows and columns
 371             int rows, cols;
 372             //! a distance between successive rows in bytes; includes the gap if any
 373             size_t step;
 374             //! pointer to the data(OCL memory object)
 375             uchar *data;
 376
 377             //! pointer to the reference counter;
 378             // when oclMatrix points to user-allocated data, the pointer is NULL
 379             int *refcount;
 380
 381             //! helper fields used in locateROI and adjustROI
 382             //datastart and dataend are not used in current version
 383             uchar *datastart;
 384             uchar *dataend;
 385
 386             //! OpenCL context associated with the oclMat object.
 387             Context *clCxt;
 388             //add offset for handle ROI, calculated in byte
 389             int offset;
 390             //add wholerows and wholecols for the whole matrix, datastart and dataend are no longer used
 391             int wholerows;
 392             int wholecols;
 393         };
 394
 395         // convert InputArray/OutputArray to oclMat references
 396         CV_EXPORTS oclMat& getOclMatRef(InputArray src);
 397         CV_EXPORTS oclMat& getOclMatRef(OutputArray src);
 398
 399         ///////////////////// mat split and merge /////////////////////////////////
 400         //! Compose a multi-channel array from several single-channel arrays
 401         // Support all types
 402         CV_EXPORTS void merge(const oclMat *src, size_t n, oclMat &dst);
 403         CV_EXPORTS void merge(const vector<oclMat> &src, oclMat &dst);
 404
 405         //! Divides multi-channel array into several single-channel arrays
 406         // Support all types
 407         CV_EXPORTS void split(const oclMat &src, oclMat *dst);
 408         CV_EXPORTS void split(const oclMat &src, vector<oclMat> &dst);
 409
 410         ////////////////////////////// Arithmetics ///////////////////////////////////
 411         //#if defined DOUBLE_SUPPORT
 412         //typedef double F;
 413         //#else
 414         //typedef float F;
 415         //#endif
 416         //      CV_EXPORTS void addWeighted(const oclMat& a,F  alpha, const oclMat& b,F beta,F gama, oclMat& c);
 417         CV_EXPORTS void addWeighted(const oclMat &a, double  alpha, const oclMat &b, double beta, double gama, oclMat &c);
 418         //! adds one matrix to another (c = a + b)
 419         // supports all types except CV_8SC1,CV_8SC2,CV8SC3 and CV_8SC4
 420         CV_EXPORTS void add(const oclMat &a, const oclMat &b, oclMat &c);
 421         //! adds one matrix to another (c = a + b)
 422         // supports all types except CV_8SC1,CV_8SC2,CV8SC3 and CV_8SC4
 423         CV_EXPORTS void add(const oclMat &a, const oclMat &b, oclMat &c, const oclMat &mask);
 424         //! adds scalar to a matrix (c = a + s)
 425         // supports all types except CV_8SC1,CV_8SC2,CV8SC3 and CV_8SC4
 426         CV_EXPORTS void add(const oclMat &a, const Scalar &sc, oclMat &c, const oclMat &mask = oclMat());
 427         //! subtracts one matrix from another (c = a - b)
 428         // supports all types except CV_8SC1,CV_8SC2,CV8SC3 and CV_8SC4
 429         CV_EXPORTS void subtract(const oclMat &a, const oclMat &b, oclMat &c);
 430         //! subtracts one matrix from another (c = a - b)
 431         // supports all types except CV_8SC1,CV_8SC2,CV8SC3 and CV_8SC4
 432         CV_EXPORTS void subtract(const oclMat &a, const oclMat &b, oclMat &c, const oclMat &mask);
 433         //! subtracts scalar from a matrix (c = a - s)
 434         // supports all types except CV_8SC1,CV_8SC2,CV8SC3 and CV_8SC4
 435         CV_EXPORTS void subtract(const oclMat &a, const Scalar &sc, oclMat &c, const oclMat &mask = oclMat());
 436         //! subtracts scalar from a matrix (c = a - s)
 437         // supports all types except CV_8SC1,CV_8SC2,CV8SC3 and CV_8SC4
 438         CV_EXPORTS void subtract(const Scalar &sc, const oclMat &a, oclMat &c, const oclMat &mask = oclMat());
 439         //! computes element-wise product of the two arrays (c = a * b)
 440         // supports all types except CV_8SC1,CV_8SC2,CV8SC3 and CV_8SC4
 441         CV_EXPORTS void multiply(const oclMat &a, const oclMat &b, oclMat &c, double scale = 1);
 442         //! multiplies matrix to a number (dst = scalar * src)
 443         // supports CV_32FC1 only
 444         CV_EXPORTS void multiply(double scalar, const oclMat &src, oclMat &dst);
 445         //! computes element-wise quotient of the two arrays (c = a / b)
 446         // supports all types except CV_8SC1,CV_8SC2,CV8SC3 and CV_8SC4
 447         CV_EXPORTS void divide(const oclMat &a, const oclMat &b, oclMat &c, double scale = 1);
 448         //! computes element-wise quotient of the two arrays (c = a / b)
 449         // supports all types except CV_8SC1,CV_8SC2,CV8SC3 and CV_8SC4
 450         CV_EXPORTS void divide(double scale, const oclMat &b, oclMat &c);
 451
 452         //! compares elements of two arrays (c = a <cmpop> b)
 453         // supports except CV_8SC1,CV_8SC2,CV8SC3,CV_8SC4 types
 454         CV_EXPORTS void compare(const oclMat &a, const oclMat &b, oclMat &c, int cmpop);
 455
 456         //! transposes the matrix
 457         // supports  CV_8UC1, 8UC4, 8SC4, 16UC2, 16SC2, 32SC1 and 32FC1.(the same as cuda)
 458         CV_EXPORTS void transpose(const oclMat &src, oclMat &dst);
 459
 460         //! computes element-wise absolute difference of two arrays (c = abs(a - b))
 461         // supports all types except CV_8SC1,CV_8SC2,CV8SC3 and CV_8SC4
 462         CV_EXPORTS void absdiff(const oclMat &a, const oclMat &b, oclMat &c);
 463         //! computes element-wise absolute difference of array and scalar (c = abs(a - s))
 464         // supports all types except CV_8SC1,CV_8SC2,CV8SC3 and CV_8SC4
 465         CV_EXPORTS void absdiff(const oclMat &a, const Scalar &s, oclMat &c);
 466
 467         //! computes mean value and standard deviation of all or selected array elements
 468         // supports except CV_32F,CV_64F
 469         CV_EXPORTS void meanStdDev(const oclMat &mtx, Scalar &mean, Scalar &stddev);
 470
 471         //! computes norm of array
 472         // supports NORM_INF, NORM_L1, NORM_L2
 473         // supports only CV_8UC1 type
 474         CV_EXPORTS double norm(const oclMat &src1, int normType = NORM_L2);
 475
 476         //! computes norm of the difference between two arrays
 477         // supports NORM_INF, NORM_L1, NORM_L2
 478         // supports only CV_8UC1 type
 479         CV_EXPORTS double norm(const oclMat &src1, const oclMat &src2, int normType = NORM_L2);
 480
 481         //! reverses the order of the rows, columns or both in a matrix
 482         // supports all types
 483         CV_EXPORTS void flip(const oclMat &a, oclMat &b, int flipCode);
 484
 485         //! computes sum of array elements
 486         // disabled until fix crash
 487         // support all types
 488         CV_EXPORTS Scalar sum(const oclMat &m);
 489         CV_EXPORTS Scalar absSum(const oclMat &m);
 490         CV_EXPORTS Scalar sqrSum(const oclMat &m);
 491
 492         //! finds global minimum and maximum array elements and returns their values
 493         // support all C1 types
 494
 495         CV_EXPORTS void minMax(const oclMat &src, double *minVal, double *maxVal = 0, const oclMat &mask = oclMat());
 496         CV_EXPORTS void minMax_buf(const oclMat &src, double *minVal, double *maxVal, const oclMat &mask, oclMat& buf);
 497
 498         //! finds global minimum and maximum array elements and returns their values with locations
 499         // support all C1 types
 500
 501         CV_EXPORTS void minMaxLoc(const oclMat &src, double *minVal, double *maxVal = 0, Point *minLoc = 0, Point *maxLoc = 0,
 502                                   const oclMat &mask = oclMat());
 503
 504         //! counts non-zero array elements
 505         // support all types
 506         CV_EXPORTS int countNonZero(const oclMat &src);
 507
 508         //! transforms 8-bit unsigned integers using lookup table: dst(i)=lut(src(i))
 509         // destination array will have the depth type as lut and the same channels number as source
 510         //It supports 8UC1 8UC4 only
 511         CV_EXPORTS void LUT(const oclMat &src, const oclMat &lut, oclMat &dst);
 512
 513         //! only 8UC1 and 256 bins is supported now
 514         CV_EXPORTS void calcHist(const oclMat &mat_src, oclMat &mat_hist);
 515         //! only 8UC1 and 256 bins is supported now
 516         CV_EXPORTS void equalizeHist(const oclMat &mat_src, oclMat &mat_dst);
 517
 518         //! only 8UC1 is supported now
 519         CV_EXPORTS Ptr<cv::CLAHE> createCLAHE(double clipLimit = 40.0, Size tileGridSize = Size(8, 8));
 520
 521         //! bilateralFilter
 522         // supports 8UC1 8UC4
 523         CV_EXPORTS void bilateralFilter(const oclMat& src, oclMat& dst, int d, double sigmaColor, double sigmaSpave, int borderType=BORDER_DEFAULT);
 524         //! computes exponent of each matrix element (b = e**a)
 525         // supports only CV_32FC1 type
 526         CV_EXPORTS void exp(const oclMat &a, oclMat &b);
 527
 528         //! computes natural logarithm of absolute value of each matrix element: b = log(abs(a))
 529         // supports only CV_32FC1 type
 530         CV_EXPORTS void log(const oclMat &a, oclMat &b);
 531
 532         //! computes magnitude of each (x(i), y(i)) vector
 533         // supports only CV_32F CV_64F type
 534         CV_EXPORTS void magnitude(const oclMat &x, const oclMat &y, oclMat &magnitude);
 535         CV_EXPORTS void magnitudeSqr(const oclMat &x, const oclMat &y, oclMat &magnitude);
 536
 537         CV_EXPORTS void magnitudeSqr(const oclMat &x, oclMat &magnitude);
 538
 539         //! computes angle (angle(i)) of each (x(i), y(i)) vector
 540         // supports only CV_32F CV_64F type
 541         CV_EXPORTS void phase(const oclMat &x, const oclMat &y, oclMat &angle, bool angleInDegrees = false);
 542
 543         //! the function raises every element of tne input array to p
 544         //! support only CV_32F CV_64F type
 545         CV_EXPORTS void pow(const oclMat &x, double p, oclMat &y);
 546
 547         //! converts Cartesian coordinates to polar
 548         // supports only CV_32F CV_64F type
 549         CV_EXPORTS void cartToPolar(const oclMat &x, const oclMat &y, oclMat &magnitude, oclMat &angle, bool angleInDegrees = false);
 550
 551         //! converts polar coordinates to Cartesian
 552         // supports only CV_32F CV_64F type
 553         CV_EXPORTS void polarToCart(const oclMat &magnitude, const oclMat &angle, oclMat &x, oclMat &y, bool angleInDegrees = false);
 554
 555         //! perfroms per-elements bit-wise inversion
 556         // supports all types
 557         CV_EXPORTS void bitwise_not(const oclMat &src, oclMat &dst);
 558         //! calculates per-element bit-wise disjunction of two arrays
 559         // supports all types
 560         CV_EXPORTS void bitwise_or(const oclMat &src1, const oclMat &src2, oclMat &dst, const oclMat &mask = oclMat());
 561         CV_EXPORTS void bitwise_or(const oclMat &src1, const Scalar &s, oclMat &dst, const oclMat &mask = oclMat());
 562         //! calculates per-element bit-wise conjunction of two arrays
 563         // supports all types
 564         CV_EXPORTS void bitwise_and(const oclMat &src1, const oclMat &src2, oclMat &dst, const oclMat &mask = oclMat());
 565         CV_EXPORTS void bitwise_and(const oclMat &src1, const Scalar &s, oclMat &dst, const oclMat &mask = oclMat());
 566         //! calculates per-element bit-wise "exclusive or" operation
 567         // supports all types
 568         CV_EXPORTS void bitwise_xor(const oclMat &src1, const oclMat &src2, oclMat &dst, const oclMat &mask = oclMat());
 569         CV_EXPORTS void bitwise_xor(const oclMat &src1, const Scalar &s, oclMat &dst, const oclMat &mask = oclMat());
 570
 571         //! Logical operators
 572         CV_EXPORTS oclMat operator ~ (const oclMat &);
 573         CV_EXPORTS oclMat operator | (const oclMat &, const oclMat &);
 574         CV_EXPORTS oclMat operator & (const oclMat &, const oclMat &);
 575         CV_EXPORTS oclMat operator ^ (const oclMat &, const oclMat &);
 576
 577
 578         //! Mathematics operators
 579         CV_EXPORTS oclMatExpr operator + (const oclMat &src1, const oclMat &src2);
 580         CV_EXPORTS oclMatExpr operator - (const oclMat &src1, const oclMat &src2);
 581         CV_EXPORTS oclMatExpr operator * (const oclMat &src1, const oclMat &src2);
 582         CV_EXPORTS oclMatExpr operator / (const oclMat &src1, const oclMat &src2);
 583
 584         //! computes convolution of two images
 585         //! support only CV_32FC1 type
 586         CV_EXPORTS void convolve(const oclMat &image, const oclMat &temp1, oclMat &result);
 587
 588         CV_EXPORTS void cvtColor(const oclMat &src, oclMat &dst, int code , int dcn = 0);
 589
 590         //////////////////////////////// Filter Engine ////////////////////////////////
 591
 592         /*!
 593           The Base Class for 1D or Row-wise Filters
 594
 595           This is the base class for linear or non-linear filters that process 1D data.
 596           In particular, such filters are used for the "horizontal" filtering parts in separable filters.
 597           */
 598         class CV_EXPORTS BaseRowFilter_GPU
 599         {
 600         public:
 601             BaseRowFilter_GPU(int ksize_, int anchor_, int bordertype_) : ksize(ksize_), anchor(anchor_), bordertype(bordertype_) {}
 602             virtual ~BaseRowFilter_GPU() {}
 603             virtual void operator()(const oclMat &src, oclMat &dst) = 0;
 604             int ksize, anchor, bordertype;
 605         };
 606
 607         /*!
 608           The Base Class for Column-wise Filters
 609
 610           This is the base class for linear or non-linear filters that process columns of 2D arrays.
 611           Such filters are used for the "vertical" filtering parts in separable filters.
 612           */
 613         class CV_EXPORTS BaseColumnFilter_GPU
 614         {
 615         public:
 616             BaseColumnFilter_GPU(int ksize_, int anchor_, int bordertype_) : ksize(ksize_), anchor(anchor_), bordertype(bordertype_) {}
 617             virtual ~BaseColumnFilter_GPU() {}
 618             virtual void operator()(const oclMat &src, oclMat &dst) = 0;
 619             int ksize, anchor, bordertype;
 620         };
 621
 622         /*!
 623           The Base Class for Non-Separable 2D Filters.
 624
 625           This is the base class for linear or non-linear 2D filters.
 626           */
 627         class CV_EXPORTS BaseFilter_GPU
 628         {
 629         public:
 630             BaseFilter_GPU(const Size &ksize_, const Point &anchor_, const int &borderType_)
 631                 : ksize(ksize_), anchor(anchor_), borderType(borderType_) {}
 632             virtual ~BaseFilter_GPU() {}
 633             virtual void operator()(const oclMat &src, oclMat &dst) = 0;
 634             Size ksize;
 635             Point anchor;
 636             int borderType;
 637         };
 638
 639         /*!
 640           The Base Class for Filter Engine.
 641
 642           The class can be used to apply an arbitrary filtering operation to an image.
 643           It contains all the necessary intermediate buffers.
 644           */
 645         class CV_EXPORTS FilterEngine_GPU
 646         {
 647         public:
 648             virtual ~FilterEngine_GPU() {}
 649
 650             virtual void apply(const oclMat &src, oclMat &dst, Rect roi = Rect(0, 0, -1, -1)) = 0;
 651         };
 652
 653         //! returns the non-separable filter engine with the specified filter
 654         CV_EXPORTS Ptr<FilterEngine_GPU> createFilter2D_GPU(const Ptr<BaseFilter_GPU> filter2D);
 655
 656         //! returns the primitive row filter with the specified kernel
 657         CV_EXPORTS Ptr<BaseRowFilter_GPU> getLinearRowFilter_GPU(int srcType, int bufType, const Mat &rowKernel,
 658                 int anchor = -1, int bordertype = BORDER_DEFAULT);
 659
 660         //! returns the primitive column filter with the specified kernel
 661         CV_EXPORTS Ptr<BaseColumnFilter_GPU> getLinearColumnFilter_GPU(int bufType, int dstType, const Mat &columnKernel,
 662                 int anchor = -1, int bordertype = BORDER_DEFAULT, double delta = 0.0);
 663
 664         //! returns the separable linear filter engine
 665         CV_EXPORTS Ptr<FilterEngine_GPU> createSeparableLinearFilter_GPU(int srcType, int dstType, const Mat &rowKernel,
 666                 const Mat &columnKernel, const Point &anchor = Point(-1, -1), double delta = 0.0, int bordertype = BORDER_DEFAULT);
 667
 668         //! returns the separable filter engine with the specified filters
 669         CV_EXPORTS Ptr<FilterEngine_GPU> createSeparableFilter_GPU(const Ptr<BaseRowFilter_GPU> &rowFilter,
 670                 const Ptr<BaseColumnFilter_GPU> &columnFilter);
 671
 672         //! returns the Gaussian filter engine
 673         CV_EXPORTS Ptr<FilterEngine_GPU> createGaussianFilter_GPU(int type, Size ksize, double sigma1, double sigma2 = 0, int bordertype = BORDER_DEFAULT);
 674
 675         //! returns filter engine for the generalized Sobel operator
 676         CV_EXPORTS Ptr<FilterEngine_GPU> createDerivFilter_GPU( int srcType, int dstType, int dx, int dy, int ksize, int borderType = BORDER_DEFAULT );
 677
 678         //! applies Laplacian operator to the image
 679         // supports only ksize = 1 and ksize = 3 8UC1 8UC4 32FC1 32FC4 data type
 680         CV_EXPORTS void Laplacian(const oclMat &src, oclMat &dst, int ddepth, int ksize = 1, double scale = 1);
 681
 682         //! returns 2D box filter
 683         // supports CV_8UC1 and CV_8UC4 source type, dst type must be the same as source type
 684         CV_EXPORTS Ptr<BaseFilter_GPU> getBoxFilter_GPU(int srcType, int dstType,
 685                 const Size &ksize, Point anchor = Point(-1, -1), int borderType = BORDER_DEFAULT);
 686
 687         //! returns box filter engine
 688         CV_EXPORTS Ptr<FilterEngine_GPU> createBoxFilter_GPU(int srcType, int dstType, const Size &ksize,
 689                 const Point &anchor = Point(-1, -1), int borderType = BORDER_DEFAULT);
 690
 691         //! returns 2D filter with the specified kernel
 692         // supports CV_8UC1 and CV_8UC4 types
 693         CV_EXPORTS Ptr<BaseFilter_GPU> getLinearFilter_GPU(int srcType, int dstType, const Mat &kernel, const Size &ksize,
 694                 Point anchor = Point(-1, -1), int borderType = BORDER_DEFAULT);
 695
 696         //! returns the non-separable linear filter engine
 697         CV_EXPORTS Ptr<FilterEngine_GPU> createLinearFilter_GPU(int srcType, int dstType, const Mat &kernel,
 698                 const Point &anchor = Point(-1, -1), int borderType = BORDER_DEFAULT);
 699
 700         //! smooths the image using the normalized box filter
 701         // supports data type: CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4
 702         // supports border type: BORDER_CONSTANT, BORDER_REPLICATE, BORDER_REFLECT,BORDER_REFLECT_101,BORDER_WRAP
 703         CV_EXPORTS void boxFilter(const oclMat &src, oclMat &dst, int ddepth, Size ksize,
 704                                   Point anchor = Point(-1, -1), int borderType = BORDER_DEFAULT);
 705
 706         //! returns 2D morphological filter
 707         //! only MORPH_ERODE and MORPH_DILATE are supported
 708         // supports CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4 types
 709         // kernel must have CV_8UC1 type, one rows and cols == ksize.width * ksize.height
 710         CV_EXPORTS Ptr<BaseFilter_GPU> getMorphologyFilter_GPU(int op, int type, const Mat &kernel, const Size &ksize,
 711                 Point anchor = Point(-1, -1));
 712
 713         //! returns morphological filter engine. Only MORPH_ERODE and MORPH_DILATE are supported.
 714         CV_EXPORTS Ptr<FilterEngine_GPU> createMorphologyFilter_GPU(int op, int type, const Mat &kernel,
 715                 const Point &anchor = Point(-1, -1), int iterations = 1);
 716
 717         //! a synonym for normalized box filter
 718         // supports data type: CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4
 719         // supports border type: BORDER_CONSTANT, BORDER_REPLICATE, BORDER_REFLECT,BORDER_REFLECT_101
 720         static inline void blur(const oclMat &src, oclMat &dst, Size ksize, Point anchor = Point(-1, -1),
 721                                 int borderType = BORDER_CONSTANT)
 722         {
 723             boxFilter(src, dst, -1, ksize, anchor, borderType);
 724         }
 725
 726         //! applies non-separable 2D linear filter to the image
 727         //  Note, at the moment this function only works when anchor point is in the kernel center
 728         //  and kernel size supported is either 3x3 or 5x5; otherwise the function will fail to output valid result
 729         CV_EXPORTS void filter2D(const oclMat &src, oclMat &dst, int ddepth, const Mat &kernel,
 730                                  Point anchor = Point(-1, -1), int borderType = BORDER_DEFAULT);
 731
 732         //! applies separable 2D linear filter to the image
 733         CV_EXPORTS void sepFilter2D(const oclMat &src, oclMat &dst, int ddepth, const Mat &kernelX, const Mat &kernelY,
 734                                     Point anchor = Point(-1, -1), double delta = 0.0, int bordertype = BORDER_DEFAULT);
 735
 736         //! applies generalized Sobel operator to the image
 737         // dst.type must equalize src.type
 738         // supports data type: CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4
 739         // supports border type: BORDER_CONSTANT, BORDER_REPLICATE, BORDER_REFLECT,BORDER_REFLECT_101
 740         CV_EXPORTS void Sobel(const oclMat &src, oclMat &dst, int ddepth, int dx, int dy, int ksize = 3, double scale = 1, double delta = 0.0, int bordertype = BORDER_DEFAULT);
 741
 742         //! applies the vertical or horizontal Scharr operator to the image
 743         // dst.type must equalize src.type
 744         // supports data type: CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4
 745         // supports border type: BORDER_CONSTANT, BORDER_REPLICATE, BORDER_REFLECT,BORDER_REFLECT_101
 746         CV_EXPORTS void Scharr(const oclMat &src, oclMat &dst, int ddepth, int dx, int dy, double scale = 1, double delta = 0.0, int bordertype = BORDER_DEFAULT);
 747
 748         //! smooths the image using Gaussian filter.
 749         // dst.type must equalize src.type
 750         // supports data type: CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4
 751         // supports border type: BORDER_CONSTANT, BORDER_REPLICATE, BORDER_REFLECT,BORDER_REFLECT_101
 752         CV_EXPORTS void GaussianBlur(const oclMat &src, oclMat &dst, Size ksize, double sigma1, double sigma2 = 0, int bordertype = BORDER_DEFAULT);
 753
 754         //! erodes the image (applies the local minimum operator)
 755         // supports data type: CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4
 756         CV_EXPORTS void erode( const oclMat &src, oclMat &dst, const Mat &kernel, Point anchor = Point(-1, -1), int iterations = 1,
 757
 758                                int borderType = BORDER_CONSTANT, const Scalar &borderValue = morphologyDefaultBorderValue());
 759
 760
 761         //! dilates the image (applies the local maximum operator)
 762         // supports data type: CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4
 763         CV_EXPORTS void dilate( const oclMat &src, oclMat &dst, const Mat &kernel, Point anchor = Point(-1, -1), int iterations = 1,
 764
 765                                 int borderType = BORDER_CONSTANT, const Scalar &borderValue = morphologyDefaultBorderValue());
 766
 767
 768         //! applies an advanced morphological operation to the image
 769         CV_EXPORTS void morphologyEx( const oclMat &src, oclMat &dst, int op, const Mat &kernel, Point anchor = Point(-1, -1), int iterations = 1,
 770
 771                                       int borderType = BORDER_CONSTANT, const Scalar &borderValue = morphologyDefaultBorderValue());
 772
 773
 774         ////////////////////////////// Image processing //////////////////////////////
 775         //! Does mean shift filtering on GPU.
 776         CV_EXPORTS void meanShiftFiltering(const oclMat &src, oclMat &dst, int sp, int sr,
 777                                            TermCriteria criteria = TermCriteria(TermCriteria::MAX_ITER + TermCriteria::EPS, 5, 1));
 778
 779         //! Does mean shift procedure on GPU.
 780         CV_EXPORTS void meanShiftProc(const oclMat &src, oclMat &dstr, oclMat &dstsp, int sp, int sr,
 781                                       TermCriteria criteria = TermCriteria(TermCriteria::MAX_ITER + TermCriteria::EPS, 5, 1));
 782
 783         //! Does mean shift segmentation with elimiation of small regions.
 784         CV_EXPORTS void meanShiftSegmentation(const oclMat &src, Mat &dst, int sp, int sr, int minsize,
 785                                               TermCriteria criteria = TermCriteria(TermCriteria::MAX_ITER + TermCriteria::EPS, 5, 1));
 786
 787         //! applies fixed threshold to the image.
 788         // supports CV_8UC1 and CV_32FC1 data type
 789         // supports threshold type: THRESH_BINARY, THRESH_BINARY_INV, THRESH_TRUNC, THRESH_TOZERO, THRESH_TOZERO_INV
 790         CV_EXPORTS double threshold(const oclMat &src, oclMat &dst, double thresh, double maxVal, int type = THRESH_TRUNC);
 791
 792         //! resizes the image
 793         // Supports INTER_NEAREST, INTER_LINEAR
 794         // supports CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4 types
 795         CV_EXPORTS void resize(const oclMat &src, oclMat &dst, Size dsize, double fx = 0, double fy = 0, int interpolation = INTER_LINEAR);
 796
 797         //! Applies a generic geometrical transformation to an image.
 798
 799         // Supports INTER_NEAREST, INTER_LINEAR.
 800
 801         // Map1 supports CV_16SC2, CV_32FC2  types.
 802
 803         // Src supports CV_8UC1, CV_8UC2, CV_8UC4.
 804
 805         CV_EXPORTS void remap(const oclMat &src, oclMat &dst, oclMat &map1, oclMat &map2, int interpolation, int bordertype, const Scalar &value = Scalar());
 806
 807         //! copies 2D array to a larger destination array and pads borders with user-specifiable constant
 808         // supports CV_8UC1, CV_8UC4, CV_32SC1 types
 809         CV_EXPORTS void copyMakeBorder(const oclMat &src, oclMat &dst, int top, int bottom, int left, int right, int boardtype, const Scalar &value = Scalar());
 810
 811         //! Smoothes image using median filter
 812         // The source 1- or 4-channel image. When m is 3 or 5, the image depth should be CV 8U or CV 32F.
 813         CV_EXPORTS void medianFilter(const oclMat &src, oclMat &dst, int m);
 814
 815         //! warps the image using affine transformation
 816         // Supports INTER_NEAREST, INTER_LINEAR, INTER_CUBIC
 817         // supports CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4 types
 818         CV_EXPORTS void warpAffine(const oclMat &src, oclMat &dst, const Mat &M, Size dsize, int flags = INTER_LINEAR);
 819
 820         //! warps the image using perspective transformation
 821         // Supports INTER_NEAREST, INTER_LINEAR, INTER_CUBIC
 822         // supports CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4 types
 823         CV_EXPORTS void warpPerspective(const oclMat &src, oclMat &dst, const Mat &M, Size dsize, int flags = INTER_LINEAR);
 824
 825         //! computes the integral image and integral for the squared image
 826         // sum will have CV_32S type, sqsum - CV32F type
 827         // supports only CV_8UC1 source type
 828         CV_EXPORTS void integral(const oclMat &src, oclMat &sum, oclMat &sqsum);
 829         CV_EXPORTS void integral(const oclMat &src, oclMat &sum);
 830         CV_EXPORTS void cornerHarris(const oclMat &src, oclMat &dst, int blockSize, int ksize, double k, int bordertype = cv::BORDER_DEFAULT);
 831         CV_EXPORTS void cornerHarris_dxdy(const oclMat &src, oclMat &dst, oclMat &Dx, oclMat &Dy,
 832             int blockSize, int ksize, double k, int bordertype = cv::BORDER_DEFAULT);
 833         CV_EXPORTS void cornerMinEigenVal(const oclMat &src, oclMat &dst, int blockSize, int ksize, int bordertype = cv::BORDER_DEFAULT);
 834         CV_EXPORTS void cornerMinEigenVal_dxdy(const oclMat &src, oclMat &dst, oclMat &Dx, oclMat &Dy,
 835             int blockSize, int ksize, int bordertype = cv::BORDER_DEFAULT);
 836
 837         /////////////////////////////////// ML ///////////////////////////////////////////
 838
 839         //! Compute closest centers for each lines in source and lable it after center's index
 840         // supports CV_32FC1/CV_32FC2/CV_32FC4 data type
 841         CV_EXPORTS void distanceToCenters(oclMat &dists, oclMat &labels, const oclMat &src, const oclMat &centers);
 842
 843         //!Does k-means procedure on GPU
 844         // supports CV_32FC1/CV_32FC2/CV_32FC4 data type
 845         CV_EXPORTS double kmeans(const oclMat &src, int K, oclMat &bestLabels,
 846                                      TermCriteria criteria, int attemps, int flags, oclMat &centers);
 847
 848
 849         ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 850         ///////////////////////////////////////////CascadeClassifier//////////////////////////////////////////////////////////////////
 851         ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 852
 853         class CV_EXPORTS_W OclCascadeClassifier : public  cv::CascadeClassifier
 854         {
 855         public:
 856             OclCascadeClassifier() {};
 857             ~OclCascadeClassifier() {};
 858
 859             CvSeq* oclHaarDetectObjects(oclMat &gimg, CvMemStorage *storage, double scaleFactor,
 860                                         int minNeighbors, int flags, CvSize minSize = cvSize(0, 0), CvSize maxSize = cvSize(0, 0));
 861         };
 862
 863         class CV_EXPORTS OclCascadeClassifierBuf : public  cv::CascadeClassifier
 864         {
 865         public:
 866             OclCascadeClassifierBuf() :
 867                 m_flags(0), initialized(false), m_scaleFactor(0), buffers(NULL) {}
 868
 869             ~OclCascadeClassifierBuf() { release(); }
 870
 871             void detectMultiScale(oclMat &image, CV_OUT std::vector<cv::Rect>& faces,
 872                                   double scaleFactor = 1.1, int minNeighbors = 3, int flags = 0,
 873                                   Size minSize = Size(), Size maxSize = Size());
 874             void release();
 875
 876         private:
 877             void Init(const int rows, const int cols, double scaleFactor, int flags,
 878                       const int outputsz, const size_t localThreads[],
 879                       CvSize minSize, CvSize maxSize);
 880             void CreateBaseBufs(const int datasize, const int totalclassifier, const int flags, const int outputsz);
 881             void CreateFactorRelatedBufs(const int rows, const int cols, const int flags,
 882                                          const double scaleFactor, const size_t localThreads[],
 883                                          CvSize minSize, CvSize maxSize);
 884             void GenResult(CV_OUT std::vector<cv::Rect>& faces, const std::vector<cv::Rect> &rectList, const std::vector<int> &rweights);
 885
 886             int m_rows;
 887             int m_cols;
 888             int m_flags;
 889             int m_loopcount;
 890             int m_nodenum;
 891             bool findBiggestObject;
 892             bool initialized;
 893             double m_scaleFactor;
 894             Size m_minSize;
 895             Size m_maxSize;
 896             vector<CvSize> sizev;
 897             vector<float> scalev;
 898             oclMat gimg1, gsum, gsqsum;
 899             void * buffers;
 900         };
 901
 902
 903         /////////////////////////////// Pyramid /////////////////////////////////////
 904         CV_EXPORTS void pyrDown(const oclMat &src, oclMat &dst);
 905
 906         //! upsamples the source image and then smoothes it
 907         CV_EXPORTS void pyrUp(const oclMat &src, oclMat &dst);
 908
 909         //! performs linear blending of two images
 910         //! to avoid accuracy errors sum of weigths shouldn't be very close to zero
 911         // supports only CV_8UC1 source type
 912         CV_EXPORTS void blendLinear(const oclMat &img1, const oclMat &img2, const oclMat &weights1, const oclMat &weights2, oclMat &result);
 913
 914         //! computes vertical sum, supports only CV_32FC1 images
 915         CV_EXPORTS void columnSum(const oclMat &src, oclMat &sum);
 916
 917         ///////////////////////////////////////// match_template /////////////////////////////////////////////////////////////
 918         struct CV_EXPORTS MatchTemplateBuf
 919         {
 920             Size user_block_size;
 921             oclMat imagef, templf;
 922             std::vector<oclMat> images;
 923             std::vector<oclMat> image_sums;
 924             std::vector<oclMat> image_sqsums;
 925         };
 926
 927         //! computes the proximity map for the raster template and the image where the template is searched for
 928         // Supports TM_SQDIFF, TM_SQDIFF_NORMED, TM_CCORR, TM_CCORR_NORMED, TM_CCOEFF, TM_CCOEFF_NORMED for type 8UC1 and 8UC4
 929         // Supports TM_SQDIFF, TM_CCORR for type 32FC1 and 32FC4
 930         CV_EXPORTS void matchTemplate(const oclMat &image, const oclMat &templ, oclMat &result, int method);
 931
 932         //! computes the proximity map for the raster template and the image where the template is searched for
 933         // Supports TM_SQDIFF, TM_SQDIFF_NORMED, TM_CCORR, TM_CCORR_NORMED, TM_CCOEFF, TM_CCOEFF_NORMED for type 8UC1 and 8UC4
 934         // Supports TM_SQDIFF, TM_CCORR for type 32FC1 and 32FC4
 935         CV_EXPORTS void matchTemplate(const oclMat &image, const oclMat &templ, oclMat &result, int method, MatchTemplateBuf &buf);
 936
 937         ///////////////////////////////////////////// Canny /////////////////////////////////////////////
 938         struct CV_EXPORTS CannyBuf;
 939         //! compute edges of the input image using Canny operator
 940         // Support CV_8UC1 only
 941         CV_EXPORTS void Canny(const oclMat &image, oclMat &edges, double low_thresh, double high_thresh, int apperture_size = 3, bool L2gradient = false);
 942         CV_EXPORTS void Canny(const oclMat &image, CannyBuf &buf, oclMat &edges, double low_thresh, double high_thresh, int apperture_size = 3, bool L2gradient = false);
 943         CV_EXPORTS void Canny(const oclMat &dx, const oclMat &dy, oclMat &edges, double low_thresh, double high_thresh, bool L2gradient = false);
 944         CV_EXPORTS void Canny(const oclMat &dx, const oclMat &dy, CannyBuf &buf, oclMat &edges, double low_thresh, double high_thresh, bool L2gradient = false);
 945
 946         struct CV_EXPORTS CannyBuf
 947         {
 948             CannyBuf() : counter(NULL) {}
 949             ~CannyBuf()
 950             {
 951                 release();
 952             }
 953             explicit CannyBuf(const Size &image_size, int apperture_size = 3) : counter(NULL)
 954             {
 955                 create(image_size, apperture_size);
 956             }
 957             CannyBuf(const oclMat &dx_, const oclMat &dy_);
 958
 959             void create(const Size &image_size, int apperture_size = 3);
 960             void release();
 961             oclMat dx, dy;
 962             oclMat dx_buf, dy_buf;
 963             oclMat edgeBuf;
 964             oclMat trackBuf1, trackBuf2;
 965             void *counter;
 966             Ptr<FilterEngine_GPU> filterDX, filterDY;
 967         };
 968
 969         ///////////////////////////////////////// clAmdFft related /////////////////////////////////////////
 970         //! Performs a forward or inverse discrete Fourier transform (1D or 2D) of floating point matrix.
 971         //! Param dft_size is the size of DFT transform.
 972         //!
 973         //! For complex-to-real transform it is assumed that the source matrix is packed in CLFFT's format.
 974         // support src type of CV32FC1, CV32FC2
 975         // support flags: DFT_INVERSE, DFT_REAL_OUTPUT, DFT_COMPLEX_OUTPUT, DFT_ROWS
 976         // dft_size is the size of original input, which is used for transformation from complex to real.
 977         // dft_size must be powers of 2, 3 and 5
 978         // real to complex dft requires at least v1.8 clAmdFft
 979         // real to complex dft output is not the same with cpu version
 980         // real to complex and complex to real does not support DFT_ROWS
 981         CV_EXPORTS void dft(const oclMat &src, oclMat &dst, Size dft_size = Size(0, 0), int flags = 0);
 982
 983         //! implements generalized matrix product algorithm GEMM from BLAS
 984         // The functionality requires clAmdBlas library
 985         // only support type CV_32FC1
 986         // flag GEMM_3_T is not supported
 987         CV_EXPORTS void gemm(const oclMat &src1, const oclMat &src2, double alpha,
 988                              const oclMat &src3, double beta, oclMat &dst, int flags = 0);
 989
 990         //////////////// HOG (Histogram-of-Oriented-Gradients) Descriptor and Object Detector //////////////
 991         struct CV_EXPORTS HOGDescriptor
 992         {
 993             enum { DEFAULT_WIN_SIGMA = -1 };
 994             enum { DEFAULT_NLEVELS = 64 };
 995             enum { DESCR_FORMAT_ROW_BY_ROW, DESCR_FORMAT_COL_BY_COL };
 996             HOGDescriptor(Size win_size = Size(64, 128), Size block_size = Size(16, 16),
 997                           Size block_stride = Size(8, 8), Size cell_size = Size(8, 8),
 998                           int nbins = 9, double win_sigma = DEFAULT_WIN_SIGMA,
 999                           double threshold_L2hys = 0.2, bool gamma_correction = true,
1000                           int nlevels = DEFAULT_NLEVELS);
1001
1002             size_t getDescriptorSize() const;
1003             size_t getBlockHistogramSize() const;
1004             void setSVMDetector(const vector<float> &detector);
1005             static vector<float> getDefaultPeopleDetector();
1006             static vector<float> getPeopleDetector48x96();
1007             static vector<float> getPeopleDetector64x128();
1008             void detect(const oclMat &img, vector<Point> &found_locations,
1009                         double hit_threshold = 0, Size win_stride = Size(),
1010                         Size padding = Size());
1011             void detectMultiScale(const oclMat &img, vector<Rect> &found_locations,
1012                                   double hit_threshold = 0, Size win_stride = Size(),
1013                                   Size padding = Size(), double scale0 = 1.05,
1014                                   int group_threshold = 2);
1015             void getDescriptors(const oclMat &img, Size win_stride,
1016                                 oclMat &descriptors,
1017                                 int descr_format = DESCR_FORMAT_COL_BY_COL);
1018             Size win_size;
1019             Size block_size;
1020             Size block_stride;
1021             Size cell_size;
1022
1023             int nbins;
1024             double win_sigma;
1025             double threshold_L2hys;
1026             bool gamma_correction;
1027             int nlevels;
1028
1029         protected:
1030             // initialize buffers; only need to do once in case of multiscale detection
1031             void init_buffer(const oclMat &img, Size win_stride);
1032             void computeBlockHistograms(const oclMat &img);
1033             void computeGradient(const oclMat &img, oclMat &grad, oclMat &qangle);
1034             double getWinSigma() const;
1035             bool checkDetectorSize() const;
1036
1037             static int numPartsWithin(int size, int part_size, int stride);
1038             static Size numPartsWithin(Size size, Size part_size, Size stride);
1039
1040             // Coefficients of the separating plane
1041             float free_coef;
1042             oclMat detector;
1043             // Results of the last classification step
1044             oclMat labels;
1045             Mat labels_host;
1046             // Results of the last histogram evaluation step
1047             oclMat block_hists;
1048             // Gradients conputation results
1049             oclMat grad, qangle;
1050             // scaled image
1051             oclMat image_scale;
1052             // effect size of input image (might be different from original size after scaling)
1053             Size effect_size;
1054         };
1055
1056
1057         ////////////////////////feature2d_ocl/////////////////
1058         /****************************************************************************************\
1059         *                                      Distance                                          *
1060         \****************************************************************************************/
1061         template<typename T>
1062         struct CV_EXPORTS Accumulator
1063         {
1064             typedef T Type;
1065         };
1066         template<> struct Accumulator<unsigned char>
1067         {
1068             typedef float Type;
1069         };
1070         template<> struct Accumulator<unsigned short>
1071         {
1072             typedef float Type;
1073         };
1074         template<> struct Accumulator<char>
1075         {
1076             typedef float Type;
1077         };
1078         template<> struct Accumulator<short>
1079         {
1080             typedef float Type;
1081         };
1082
1083         /*
1084          * Manhattan distance (city block distance) functor
1085          */
1086         template<class T>
1087         struct CV_EXPORTS L1
1088         {
1089             enum { normType = NORM_L1 };
1090             typedef T ValueType;
1091             typedef typename Accumulator<T>::Type ResultType;
1092
1093             ResultType operator()( const T *a, const T *b, int size ) const
1094             {
1095                 return normL1<ValueType, ResultType>(a, b, size);
1096             }
1097         };
1098
1099         /*
1100          * Euclidean distance functor
1101          */
1102         template<class T>
1103         struct CV_EXPORTS L2
1104         {
1105             enum { normType = NORM_L2 };
1106             typedef T ValueType;
1107             typedef typename Accumulator<T>::Type ResultType;
1108
1109             ResultType operator()( const T *a, const T *b, int size ) const
1110             {
1111                 return (ResultType)sqrt((double)normL2Sqr<ValueType, ResultType>(a, b, size));
1112             }
1113         };
1114
1115         /*
1116          * Hamming distance functor - counts the bit differences between two strings - useful for the Brief descriptor
1117          * bit count of A exclusive XOR'ed with B
1118          */
1119         struct CV_EXPORTS Hamming
1120         {
1121             enum { normType = NORM_HAMMING };
1122             typedef unsigned char ValueType;
1123             typedef int ResultType;
1124
1125             /** this will count the bits in a ^ b
1126              */
1127             ResultType operator()( const unsigned char *a, const unsigned char *b, int size ) const
1128             {
1129                 return normHamming(a, b, size);
1130             }
1131         };
1132
1133         ////////////////////////////////// BruteForceMatcher //////////////////////////////////
1134
1135         class CV_EXPORTS BruteForceMatcher_OCL_base
1136         {
1137         public:
1138             enum DistType {L1Dist = 0, L2Dist, HammingDist};
1139             explicit BruteForceMatcher_OCL_base(DistType distType = L2Dist);
1140             // Add descriptors to train descriptor collection
1141             void add(const std::vector<oclMat> &descCollection);
1142             // Get train descriptors collection
1143             const std::vector<oclMat> &getTrainDescriptors() const;
1144             // Clear train descriptors collection
1145             void clear();
1146             // Return true if there are not train descriptors in collection
1147             bool empty() const;
1148
1149             // Return true if the matcher supports mask in match methods
1150             bool isMaskSupported() const;
1151
1152             // Find one best match for each query descriptor
1153             void matchSingle(const oclMat &query, const oclMat &train,
1154                              oclMat &trainIdx, oclMat &distance,
1155                              const oclMat &mask = oclMat());
1156
1157             // Download trainIdx and distance and convert it to CPU vector with DMatch
1158             static void matchDownload(const oclMat &trainIdx, const oclMat &distance, std::vector<DMatch> &matches);
1159             // Convert trainIdx and distance to vector with DMatch
1160             static void matchConvert(const Mat &trainIdx, const Mat &distance, std::vector<DMatch> &matches);
1161
1162             // Find one best match for each query descriptor
1163             void match(const oclMat &query, const oclMat &train, std::vector<DMatch> &matches, const oclMat &mask = oclMat());
1164
1165             // Make gpu collection of trains and masks in suitable format for matchCollection function
1166             void makeGpuCollection(oclMat &trainCollection, oclMat &maskCollection, const std::vector<oclMat> &masks = std::vector<oclMat>());
1167
1168
1169             // Find one best match from train collection for each query descriptor
1170             void matchCollection(const oclMat &query, const oclMat &trainCollection,
1171                                  oclMat &trainIdx, oclMat &imgIdx, oclMat &distance,
1172                                  const oclMat &masks = oclMat());
1173
1174             // Download trainIdx, imgIdx and distance and convert it to vector with DMatch
1175             static void matchDownload(const oclMat &trainIdx, const oclMat &imgIdx, const oclMat &distance, std::vector<DMatch> &matches);
1176             // Convert trainIdx, imgIdx and distance to vector with DMatch
1177             static void matchConvert(const Mat &trainIdx, const Mat &imgIdx, const Mat &distance, std::vector<DMatch> &matches);
1178
1179             // Find one best match from train collection for each query descriptor.
1180             void match(const oclMat &query, std::vector<DMatch> &matches, const std::vector<oclMat> &masks = std::vector<oclMat>());
1181
1182             // Find k best matches for each query descriptor (in increasing order of distances)
1183             void knnMatchSingle(const oclMat &query, const oclMat &train,
1184                                 oclMat &trainIdx, oclMat &distance, oclMat &allDist, int k,
1185                                 const oclMat &mask = oclMat());
1186
1187             // Download trainIdx and distance and convert it to vector with DMatch
1188             // compactResult is used when mask is not empty. If compactResult is false matches
1189             // vector will have the same size as queryDescriptors rows. If compactResult is true
1190             // matches vector will not contain matches for fully masked out query descriptors.
1191             static void knnMatchDownload(const oclMat &trainIdx, const oclMat &distance,
1192                                          std::vector< std::vector<DMatch> > &matches, bool compactResult = false);
1193
1194             // Convert trainIdx and distance to vector with DMatch
1195             static void knnMatchConvert(const Mat &trainIdx, const Mat &distance,
1196                                         std::vector< std::vector<DMatch> > &matches, bool compactResult = false);
1197
1198             // Find k best matches for each query descriptor (in increasing order of distances).
1199             // compactResult is used when mask is not empty. If compactResult is false matches
1200             // vector will have the same size as queryDescriptors rows. If compactResult is true
1201             // matches vector will not contain matches for fully masked out query descriptors.
1202             void knnMatch(const oclMat &query, const oclMat &train,
1203                           std::vector< std::vector<DMatch> > &matches, int k, const oclMat &mask = oclMat(),
1204                           bool compactResult = false);
1205
1206             // Find k best matches from train collection for each query descriptor (in increasing order of distances)
1207             void knnMatch2Collection(const oclMat &query, const oclMat &trainCollection,
1208                                      oclMat &trainIdx, oclMat &imgIdx, oclMat &distance,
1209                                      const oclMat &maskCollection = oclMat());
1210
1211             // Download trainIdx and distance and convert it to vector with DMatch
1212             // compactResult is used when mask is not empty. If compactResult is false matches
1213             // vector will have the same size as queryDescriptors rows. If compactResult is true
1214             // matches vector will not contain matches for fully masked out query descriptors.
1215             static void knnMatch2Download(const oclMat &trainIdx, const oclMat &imgIdx, const oclMat &distance,
1216                                           std::vector< std::vector<DMatch> > &matches, bool compactResult = false);
1217
1218             // Convert trainIdx and distance to vector with DMatch
1219             static void knnMatch2Convert(const Mat &trainIdx, const Mat &imgIdx, const Mat &distance,
1220                                          std::vector< std::vector<DMatch> > &matches, bool compactResult = false);
1221
1222             // Find k best matches  for each query descriptor (in increasing order of distances).
1223             // compactResult is used when mask is not empty. If compactResult is false matches
1224             // vector will have the same size as queryDescriptors rows. If compactResult is true
1225             // matches vector will not contain matches for fully masked out query descriptors.
1226             void knnMatch(const oclMat &query, std::vector< std::vector<DMatch> > &matches, int k,
1227                           const std::vector<oclMat> &masks = std::vector<oclMat>(), bool compactResult = false);
1228
1229             // Find best matches for each query descriptor which have distance less than maxDistance.
1230             // nMatches.at<int>(0, queryIdx) will contain matches count for queryIdx.
1231             // carefully nMatches can be greater than trainIdx.cols - it means that matcher didn't find all matches,
1232             // because it didn't have enough memory.
1233             // If trainIdx is empty, then trainIdx and distance will be created with size nQuery x max((nTrain / 100), 10),
1234             // otherwize user can pass own allocated trainIdx and distance with size nQuery x nMaxMatches
1235             // Matches doesn't sorted.
1236             void radiusMatchSingle(const oclMat &query, const oclMat &train,
1237                                    oclMat &trainIdx, oclMat &distance, oclMat &nMatches, float maxDistance,
1238                                    const oclMat &mask = oclMat());
1239
1240             // Download trainIdx, nMatches and distance and convert it to vector with DMatch.
1241             // matches will be sorted in increasing order of distances.
1242             // compactResult is used when mask is not empty. If compactResult is false matches
1243             // vector will have the same size as queryDescriptors rows. If compactResult is true
1244             // matches vector will not contain matches for fully masked out query descriptors.
1245             static void radiusMatchDownload(const oclMat &trainIdx, const oclMat &distance, const oclMat &nMatches,
1246                                             std::vector< std::vector<DMatch> > &matches, bool compactResult = false);
1247             // Convert trainIdx, nMatches and distance to vector with DMatch.
1248             static void radiusMatchConvert(const Mat &trainIdx, const Mat &distance, const Mat &nMatches,
1249                                            std::vector< std::vector<DMatch> > &matches, bool compactResult = false);
1250             // Find best matches for each query descriptor which have distance less than maxDistance
1251             // in increasing order of distances).
1252             void radiusMatch(const oclMat &query, const oclMat &train,
1253                              std::vector< std::vector<DMatch> > &matches, float maxDistance,
1254                              const oclMat &mask = oclMat(), bool compactResult = false);
1255             // Find best matches for each query descriptor which have distance less than maxDistance.
1256             // If trainIdx is empty, then trainIdx and distance will be created with size nQuery x max((nQuery / 100), 10),
1257             // otherwize user can pass own allocated trainIdx and distance with size nQuery x nMaxMatches
1258             // Matches doesn't sorted.
1259             void radiusMatchCollection(const oclMat &query, oclMat &trainIdx, oclMat &imgIdx, oclMat &distance, oclMat &nMatches, float maxDistance,
1260                                        const std::vector<oclMat> &masks = std::vector<oclMat>());
1261             // Download trainIdx, imgIdx, nMatches and distance and convert it to vector with DMatch.
1262             // matches will be sorted in increasing order of distances.
1263             // compactResult is used when mask is not empty. If compactResult is false matches
1264             // vector will have the same size as queryDescriptors rows. If compactResult is true
1265             // matches vector will not contain matches for fully masked out query descriptors.
1266             static void radiusMatchDownload(const oclMat &trainIdx, const oclMat &imgIdx, const oclMat &distance, const oclMat &nMatches,
1267                                             std::vector< std::vector<DMatch> > &matches, bool compactResult = false);
1268             // Convert trainIdx, nMatches and distance to vector with DMatch.
1269             static void radiusMatchConvert(const Mat &trainIdx, const Mat &imgIdx, const Mat &distance, const Mat &nMatches,
1270                                            std::vector< std::vector<DMatch> > &matches, bool compactResult = false);
1271             // Find best matches from train collection for each query descriptor which have distance less than
1272             // maxDistance (in increasing order of distances).
1273             void radiusMatch(const oclMat &query, std::vector< std::vector<DMatch> > &matches, float maxDistance,
1274                              const std::vector<oclMat> &masks = std::vector<oclMat>(), bool compactResult = false);
1275             DistType distType;
1276         private:
1277             std::vector<oclMat> trainDescCollection;
1278         };
1279
1280         template <class Distance>
1281         class CV_EXPORTS BruteForceMatcher_OCL;
1282
1283         template <typename T>
1284         class CV_EXPORTS BruteForceMatcher_OCL< L1<T> > : public BruteForceMatcher_OCL_base
1285         {
1286         public:
1287             explicit BruteForceMatcher_OCL() : BruteForceMatcher_OCL_base(L1Dist) {}
1288             explicit BruteForceMatcher_OCL(L1<T> /*d*/) : BruteForceMatcher_OCL_base(L1Dist) {}
1289         };
1290
1291         template <typename T>
1292         class CV_EXPORTS BruteForceMatcher_OCL< L2<T> > : public BruteForceMatcher_OCL_base
1293         {
1294         public:
1295             explicit BruteForceMatcher_OCL() : BruteForceMatcher_OCL_base(L2Dist) {}
1296             explicit BruteForceMatcher_OCL(L2<T> /*d*/) : BruteForceMatcher_OCL_base(L2Dist) {}
1297         };
1298
1299         template <> class CV_EXPORTS BruteForceMatcher_OCL< Hamming > : public BruteForceMatcher_OCL_base
1300         {
1301         public:
1302             explicit BruteForceMatcher_OCL() : BruteForceMatcher_OCL_base(HammingDist) {}
1303             explicit BruteForceMatcher_OCL(Hamming /*d*/) : BruteForceMatcher_OCL_base(HammingDist) {}
1304         };
1305
1306         class CV_EXPORTS BFMatcher_OCL : public BruteForceMatcher_OCL_base
1307         {
1308         public:
1309             explicit BFMatcher_OCL(int norm = NORM_L2) : BruteForceMatcher_OCL_base(norm == NORM_L1 ? L1Dist : norm == NORM_L2 ? L2Dist : HammingDist) {}
1310         };
1311
1312         class CV_EXPORTS GoodFeaturesToTrackDetector_OCL
1313         {
1314         public:
1315             explicit GoodFeaturesToTrackDetector_OCL(int maxCorners = 1000, double qualityLevel = 0.01, double minDistance = 0.0,
1316                 int blockSize = 3, bool useHarrisDetector = false, double harrisK = 0.04);
1317
1318             //! return 1 rows matrix with CV_32FC2 type
1319             void operator ()(const oclMat& image, oclMat& corners, const oclMat& mask = oclMat());
1320             //! download points of type Point2f to a vector. the vector's content will be erased
1321             void downloadPoints(const oclMat &points, vector<Point2f> &points_v);
1322
1323             int maxCorners;
1324             double qualityLevel;
1325             double minDistance;
1326
1327             int blockSize;
1328             bool useHarrisDetector;
1329             double harrisK;
1330             void releaseMemory()
1331             {
1332                 Dx_.release();
1333                 Dy_.release();
1334                 eig_.release();
1335                 minMaxbuf_.release();
1336                 tmpCorners_.release();
1337             }
1338         private:
1339             oclMat Dx_;
1340             oclMat Dy_;
1341             oclMat eig_;
1342             oclMat minMaxbuf_;
1343             oclMat tmpCorners_;
1344         };
1345
1346         inline GoodFeaturesToTrackDetector_OCL::GoodFeaturesToTrackDetector_OCL(int maxCorners_, double qualityLevel_, double minDistance_,
1347             int blockSize_, bool useHarrisDetector_, double harrisK_)
1348         {
1349             maxCorners = maxCorners_;
1350             qualityLevel = qualityLevel_;
1351             minDistance = minDistance_;
1352             blockSize = blockSize_;
1353             useHarrisDetector = useHarrisDetector_;
1354             harrisK = harrisK_;
1355         }
1356
1357         /////////////////////////////// PyrLKOpticalFlow /////////////////////////////////////
1358         class CV_EXPORTS PyrLKOpticalFlow
1359         {
1360         public:
1361             PyrLKOpticalFlow()
1362             {
1363                 winSize = Size(21, 21);
1364                 maxLevel = 3;
1365                 iters = 30;
1366                 derivLambda = 0.5;
1367                 useInitialFlow = false;
1368                 minEigThreshold = 1e-4f;
1369                 getMinEigenVals = false;
1370                 isDeviceArch11_ = false;
1371             }
1372
1373             void sparse(const oclMat &prevImg, const oclMat &nextImg, const oclMat &prevPts, oclMat &nextPts,
1374                         oclMat &status, oclMat *err = 0);
1375             void dense(const oclMat &prevImg, const oclMat &nextImg, oclMat &u, oclMat &v, oclMat *err = 0);
1376             Size winSize;
1377             int maxLevel;
1378             int iters;
1379             double derivLambda;
1380             bool useInitialFlow;
1381             float minEigThreshold;
1382             bool getMinEigenVals;
1383             void releaseMemory()
1384             {
1385                 dx_calcBuf_.release();
1386                 dy_calcBuf_.release();
1387
1388                 prevPyr_.clear();
1389                 nextPyr_.clear();
1390
1391                 dx_buf_.release();
1392                 dy_buf_.release();
1393             }
1394         private:
1395             void calcSharrDeriv(const oclMat &src, oclMat &dx, oclMat &dy);
1396             void buildImagePyramid(const oclMat &img0, vector<oclMat> &pyr, bool withBorder);
1397
1398             oclMat dx_calcBuf_;
1399             oclMat dy_calcBuf_;
1400
1401             vector<oclMat> prevPyr_;
1402             vector<oclMat> nextPyr_;
1403
1404             oclMat dx_buf_;
1405             oclMat dy_buf_;
1406             oclMat uPyr_[2];
1407             oclMat vPyr_[2];
1408             bool isDeviceArch11_;
1409         };
1410
1411         class CV_EXPORTS FarnebackOpticalFlow
1412         {
1413         public:
1414             FarnebackOpticalFlow();
1415
1416             int numLevels;
1417             double pyrScale;
1418             bool fastPyramids;
1419             int winSize;
1420             int numIters;
1421             int polyN;
1422             double polySigma;
1423             int flags;
1424
1425             void operator ()(const oclMat &frame0, const oclMat &frame1, oclMat &flowx, oclMat &flowy);
1426
1427             void releaseMemory();
1428
1429         private:
1430             void prepareGaussian(
1431                 int n, double sigma, float *g, float *xg, float *xxg,
1432                 double &ig11, double &ig03, double &ig33, double &ig55);
1433
1434             void setPolynomialExpansionConsts(int n, double sigma);
1435
1436             void updateFlow_boxFilter(
1437                 const oclMat& R0, const oclMat& R1, oclMat& flowx, oclMat &flowy,
1438                 oclMat& M, oclMat &bufM, int blockSize, bool updateMatrices);
1439
1440             void updateFlow_gaussianBlur(
1441                 const oclMat& R0, const oclMat& R1, oclMat& flowx, oclMat& flowy,
1442                 oclMat& M, oclMat &bufM, int blockSize, bool updateMatrices);
1443
1444             oclMat frames_[2];
1445             oclMat pyrLevel_[2], M_, bufM_, R_[2], blurredFrame_[2];
1446             std::vector<oclMat> pyramid0_, pyramid1_;
1447         };
1448
1449         //////////////// build warping maps ////////////////////
1450         //! builds plane warping maps
1451         CV_EXPORTS void buildWarpPlaneMaps(Size src_size, Rect dst_roi, const Mat &K, const Mat &R, const Mat &T, float scale, oclMat &map_x, oclMat &map_y);
1452         //! builds cylindrical warping maps
1453         CV_EXPORTS void buildWarpCylindricalMaps(Size src_size, Rect dst_roi, const Mat &K, const Mat &R, float scale, oclMat &map_x, oclMat &map_y);
1454         //! builds spherical warping maps
1455         CV_EXPORTS void buildWarpSphericalMaps(Size src_size, Rect dst_roi, const Mat &K, const Mat &R, float scale, oclMat &map_x, oclMat &map_y);
1456         //! builds Affine warping maps
1457         CV_EXPORTS void buildWarpAffineMaps(const Mat &M, bool inverse, Size dsize, oclMat &xmap, oclMat &ymap);
1458
1459         //! builds Perspective warping maps
1460         CV_EXPORTS void buildWarpPerspectiveMaps(const Mat &M, bool inverse, Size dsize, oclMat &xmap, oclMat &ymap);
1461
1462         ///////////////////////////////////// interpolate frames //////////////////////////////////////////////
1463         //! Interpolate frames (images) using provided optical flow (displacement field).
1464         //! frame0   - frame 0 (32-bit floating point images, single channel)
1465         //! frame1   - frame 1 (the same type and size)
1466         //! fu       - forward horizontal displacement
1467         //! fv       - forward vertical displacement
1468         //! bu       - backward horizontal displacement
1469         //! bv       - backward vertical displacement
1470         //! pos      - new frame position
1471         //! newFrame - new frame
1472         //! buf      - temporary buffer, will have width x 6*height size, CV_32FC1 type and contain 6 oclMat;
1473         //!            occlusion masks            0, occlusion masks            1,
1474         //!            interpolated forward flow  0, interpolated forward flow  1,
1475         //!            interpolated backward flow 0, interpolated backward flow 1
1476         //!
1477         CV_EXPORTS void interpolateFrames(const oclMat &frame0, const oclMat &frame1,
1478                                           const oclMat &fu, const oclMat &fv,
1479                                           const oclMat &bu, const oclMat &bv,
1480                                           float pos, oclMat &newFrame, oclMat &buf);
1481
1482         //! computes moments of the rasterized shape or a vector of points
1483         CV_EXPORTS Moments ocl_moments(InputArray _array, bool binaryImage);
1484
1485         class CV_EXPORTS StereoBM_OCL
1486         {
1487         public:
1488             enum { BASIC_PRESET = 0, PREFILTER_XSOBEL = 1 };
1489
1490             enum { DEFAULT_NDISP = 64, DEFAULT_WINSZ = 19 };
1491
1492             //! the default constructor
1493             StereoBM_OCL();
1494             //! the full constructor taking the camera-specific preset, number of disparities and the SAD window size. ndisparities must be multiple of 8.
1495             StereoBM_OCL(int preset, int ndisparities = DEFAULT_NDISP, int winSize = DEFAULT_WINSZ);
1496
1497             //! the stereo correspondence operator. Finds the disparity for the specified rectified stereo pair
1498             //! Output disparity has CV_8U type.
1499             void operator() ( const oclMat &left, const oclMat &right, oclMat &disparity);
1500
1501             //! Some heuristics that tries to estmate
1502             // if current GPU will be faster then CPU in this algorithm.
1503             // It queries current active device.
1504             static bool checkIfGpuCallReasonable();
1505
1506             int preset;
1507             int ndisp;
1508             int winSize;
1509
1510             // If avergeTexThreshold  == 0 => post procesing is disabled
1511             // If avergeTexThreshold != 0 then disparity is set 0 in each point (x,y) where for left image
1512             // SumOfHorizontalGradiensInWindow(x, y, winSize) < (winSize * winSize) * avergeTexThreshold
1513             // i.e. input left image is low textured.
1514             float avergeTexThreshold;
1515         private:
1516             oclMat minSSD, leBuf, riBuf;
1517         };
1518
1519         class CV_EXPORTS StereoBeliefPropagation
1520         {
1521         public:
1522             enum { DEFAULT_NDISP  = 64 };
1523             enum { DEFAULT_ITERS  = 5  };
1524             enum { DEFAULT_LEVELS = 5  };
1525             static void estimateRecommendedParams(int width, int height, int &ndisp, int &iters, int &levels);
1526             explicit StereoBeliefPropagation(int ndisp  = DEFAULT_NDISP,
1527                                              int iters  = DEFAULT_ITERS,
1528                                              int levels = DEFAULT_LEVELS,
1529                                              int msg_type = CV_16S);
1530             StereoBeliefPropagation(int ndisp, int iters, int levels,
1531                                     float max_data_term, float data_weight,
1532                                     float max_disc_term, float disc_single_jump,
1533                                     int msg_type = CV_32F);
1534             void operator()(const oclMat &left, const oclMat &right, oclMat &disparity);
1535             void operator()(const oclMat &data, oclMat &disparity);
1536             int ndisp;
1537             int iters;
1538             int levels;
1539             float max_data_term;
1540             float data_weight;
1541             float max_disc_term;
1542             float disc_single_jump;
1543             int msg_type;
1544         private:
1545             oclMat u, d, l, r, u2, d2, l2, r2;
1546             std::vector<oclMat> datas;
1547             oclMat out;
1548         };
1549
1550         class CV_EXPORTS StereoConstantSpaceBP
1551         {
1552         public:
1553             enum { DEFAULT_NDISP    = 128 };
1554             enum { DEFAULT_ITERS    = 8   };
1555             enum { DEFAULT_LEVELS   = 4   };
1556             enum { DEFAULT_NR_PLANE = 4   };
1557             static void estimateRecommendedParams(int width, int height, int &ndisp, int &iters, int &levels, int &nr_plane);
1558             explicit StereoConstantSpaceBP(
1559                 int ndisp    = DEFAULT_NDISP,
1560                 int iters    = DEFAULT_ITERS,
1561                 int levels   = DEFAULT_LEVELS,
1562                 int nr_plane = DEFAULT_NR_PLANE,
1563                 int msg_type = CV_32F);
1564             StereoConstantSpaceBP(int ndisp, int iters, int levels, int nr_plane,
1565                 float max_data_term, float data_weight, float max_disc_term, float disc_single_jump,
1566                 int min_disp_th = 0,
1567                 int msg_type = CV_32F);
1568             void operator()(const oclMat &left, const oclMat &right, oclMat &disparity);
1569             int ndisp;
1570             int iters;
1571             int levels;
1572             int nr_plane;
1573             float max_data_term;
1574             float data_weight;
1575             float max_disc_term;
1576             float disc_single_jump;
1577             int min_disp_th;
1578             int msg_type;
1579             bool use_local_init_data_cost;
1580         private:
1581             oclMat u[2], d[2], l[2], r[2];
1582             oclMat disp_selected_pyr[2];
1583             oclMat data_cost;
1584             oclMat data_cost_selected;
1585             oclMat temp;
1586             oclMat out;
1587         };
1588
1589         // Implementation of the Zach, Pock and Bischof Dual TV-L1 Optical Flow method
1590         //
1591         // see reference:
1592         //   [1] C. Zach, T. Pock and H. Bischof, "A Duality Based Approach for Realtime TV-L1 Optical Flow".
1593         //   [2] Javier Sanchez, Enric Meinhardt-Llopis and Gabriele Facciolo. "TV-L1 Optical Flow Estimation".
1594         class CV_EXPORTS OpticalFlowDual_TVL1_OCL
1595         {
1596         public:
1597             OpticalFlowDual_TVL1_OCL();
1598
1599             void operator ()(const oclMat& I0, const oclMat& I1, oclMat& flowx, oclMat& flowy);
1600
1601             void collectGarbage();
1602
1603             /**
1604             * Time step of the numerical scheme.
1605             */
1606             double tau;
1607
1608             /**
1609             * Weight parameter for the data term, attachment parameter.
1610             * This is the most relevant parameter, which determines the smoothness of the output.
1611             * The smaller this parameter is, the smoother the solutions we obtain.
1612             * It depends on the range of motions of the images, so its value should be adapted to each image sequence.
1613             */
1614             double lambda;
1615
1616             /**
1617             * Weight parameter for (u - v)^2, tightness parameter.
1618             * It serves as a link between the attachment and the regularization terms.
1619             * In theory, it should have a small value in order to maintain both parts in correspondence.
1620             * The method is stable for a large range of values of this parameter.
1621             */
1622             double theta;
1623
1624             /**
1625             * Number of scales used to create the pyramid of images.
1626             */
1627             int nscales;
1628
1629             /**
1630             * Number of warpings per scale.
1631             * Represents the number of times that I1(x+u0) and grad( I1(x+u0) ) are computed per scale.
1632             * This is a parameter that assures the stability of the method.
1633             * It also affects the running time, so it is a compromise between speed and accuracy.
1634             */
1635             int warps;
1636
1637             /**
1638             * Stopping criterion threshold used in the numerical scheme, which is a trade-off between precision and running time.
1639             * A small value will yield more accurate solutions at the expense of a slower convergence.
1640             */
1641             double epsilon;
1642
1643             /**
1644             * Stopping criterion iterations number used in the numerical scheme.
1645             */
1646             int iterations;
1647
1648             bool useInitialFlow;
1649
1650         private:
1651             void procOneScale(const oclMat& I0, const oclMat& I1, oclMat& u1, oclMat& u2);
1652
1653             std::vector<oclMat> I0s;
1654             std::vector<oclMat> I1s;
1655             std::vector<oclMat> u1s;
1656             std::vector<oclMat> u2s;
1657
1658             oclMat I1x_buf;
1659             oclMat I1y_buf;
1660
1661             oclMat I1w_buf;
1662             oclMat I1wx_buf;
1663             oclMat I1wy_buf;
1664
1665             oclMat grad_buf;
1666             oclMat rho_c_buf;
1667
1668             oclMat p11_buf;
1669             oclMat p12_buf;
1670             oclMat p21_buf;
1671             oclMat p22_buf;
1672
1673             oclMat diff_buf;
1674             oclMat norm_buf;
1675         };
1676         // current supported sorting methods
1677         enum
1678         {
1679             SORT_BITONIC,   // only support power-of-2 buffer size
1680             SORT_SELECTION, // cannot sort duplicate keys
1681             SORT_MERGE,
1682             SORT_RADIX      // only support signed int/float keys(CV_32S/CV_32F)
1683         };
1684         //! Returns the sorted result of all the elements in input based on equivalent keys.
1685         //
1686         //  The element unit in the values to be sorted is determined from the data type,
1687         //  i.e., a CV_32FC2 input {a1a2, b1b2} will be considered as two elements, regardless its
1688         //  matrix dimension.
1689         //  both keys and values will be sorted inplace
1690         //  Key needs to be single channel oclMat.
1691         //
1692         //  Example:
1693         //  input -
1694         //    keys   = {2,    3,   1}   (CV_8UC1)
1695         //    values = {10,5, 4,3, 6,2} (CV_8UC2)
1696         //  sortByKey(keys, values, SORT_SELECTION, false);
1697         //  output -
1698         //    keys   = {1,    2,   3}   (CV_8UC1)
1699         //    values = {6,2, 10,5, 4,3} (CV_8UC2)
1700         void CV_EXPORTS sortByKey(oclMat& keys, oclMat& values, int method, bool isGreaterThan = false);
1701     }
1702 }
1703 #if defined _MSC_VER && _MSC_VER >= 1200
1704 #  pragma warning( push)
1705 #  pragma warning( disable: 4267)
1706 #endif
1707 #include "opencv2/ocl/matrix_operations.hpp"
1708 #if defined _MSC_VER && _MSC_VER >= 1200
1709 #  pragma warning( pop)
1710 #endif
1711
1712 #endif /* __OPENCV_GPU_HPP__ */