modules/ocl/include/opencv2/ocl/ocl.hpp

   1 /*M///////////////////////////////////////////////////////////////////////////////////////
   2 //
   3 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
   4 //
   5 //  By downloading, copying, installing or using the software you agree to this license.
   6 //  If you do not agree to this license, do not download, install,
   7 //  copy or use the software.
   8 //
   9 //
  10 //                           License Agreement
  11 //                For Open Source Computer Vision Library
  12 //
  13 // Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
  14 // Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
  15 // Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
  16 // Third party copyrights are property of their respective owners.
  17 //
  18 // Redistribution and use in source and binary forms, with or without modification,
  19 // are permitted provided that the following conditions are met:
  20 //
  21 //   * Redistribution's of source code must retain the above copyright notice,
  22 //     this list of conditions and the following disclaimer.
  23 //
  24 //   * Redistribution's in binary form must reproduce the above copyright notice,
  25 //     this list of conditions and the following disclaimer in the documentation
  26 //     and/or other oclMaterials provided with the distribution.
  27 //
  28 //   * The name of the copyright holders may not be used to endorse or promote products
  29 //     derived from this software without specific prior written permission.
  30 //
  31 // This software is provided by the copyright holders and contributors "as is" and
  32 // any express or implied warranties, including, but not limited to, the implied
  33 // warranties of merchantability and fitness for a particular purpose are disclaimed.
  34 // In no event shall the Intel Corporation or contributors be liable for any direct,
  35 // indirect, incidental, special, exemplary, or consequential damages
  36 // (including, but not limited to, procurement of substitute goods or services;
  37 // loss of use, data, or profits; or business interruption) however caused
  38 // and on any theory of liability, whether in contract, strict liability,
  39 // or tort (including negligence or otherwise) arising in any way out of
  40 // the use of this software, even if advised of the possibility of such damage.
  41 //
  42 //M*/
  43
  44 #ifndef __OPENCV_OCL_HPP__
  45 #define __OPENCV_OCL_HPP__
  46
  47 #include <memory>
  48 #include <vector>
  49
  50 #include "opencv2/core/core.hpp"
  51 #include "opencv2/imgproc/imgproc.hpp"
  52 #include "opencv2/objdetect/objdetect.hpp"
  53 #include "opencv2/features2d/features2d.hpp"
  54 #include "opencv2/ml/ml.hpp"
  55
  56 namespace cv
  57 {
  58     namespace ocl
  59     {
  60         using std::auto_ptr;
  61         enum
  62         {
  63             CVCL_DEVICE_TYPE_DEFAULT     = (1 << 0),
  64             CVCL_DEVICE_TYPE_CPU         = (1 << 1),
  65             CVCL_DEVICE_TYPE_GPU         = (1 << 2),
  66             CVCL_DEVICE_TYPE_ACCELERATOR = (1 << 3),
  67             //CVCL_DEVICE_TYPE_CUSTOM      = (1 << 4)
  68             CVCL_DEVICE_TYPE_ALL         = 0xFFFFFFFF
  69         };
  70
  71         enum DevMemRW
  72         {
  73             DEVICE_MEM_R_W = 0,
  74             DEVICE_MEM_R_ONLY,
  75             DEVICE_MEM_W_ONLY
  76         };
  77
  78         enum DevMemType
  79         {
  80             DEVICE_MEM_DEFAULT = 0,
  81             DEVICE_MEM_AHP,         //alloc host pointer
  82             DEVICE_MEM_UHP,         //use host pointer
  83             DEVICE_MEM_CHP,         //copy host pointer
  84             DEVICE_MEM_PM           //persistent memory
  85         };
  86
  87         //Get the global device memory and read/write type
  88         //return 1 if unified memory system supported, otherwise return 0
  89         CV_EXPORTS int getDevMemType(DevMemRW& rw_type, DevMemType& mem_type);
  90
  91         //Set the global device memory and read/write type,
  92         //the newly generated oclMat will all use this type
  93         //return -1 if the target type is unsupported, otherwise return 0
  94         CV_EXPORTS int setDevMemType(DevMemRW rw_type = DEVICE_MEM_R_W, DevMemType mem_type = DEVICE_MEM_DEFAULT);
  95
  96         //this class contains ocl runtime information
  97         class CV_EXPORTS Info
  98         {
  99         public:
 100             struct Impl;
 101             Impl *impl;
 102
 103             Info();
 104             Info(const Info &m);
 105             ~Info();
 106             void release();
 107             Info &operator = (const Info &m);
 108             std::vector<string> DeviceName;
 109         };
 110         //////////////////////////////// Initialization & Info ////////////////////////
 111         //this function may be obsoleted
 112         //CV_EXPORTS cl_device_id getDevice();
 113         //the function must be called before any other cv::ocl::functions, it initialize ocl runtime
 114         //each Info relates to an OpenCL platform
 115         //there is one or more devices in each platform, each one has a separate name
 116         CV_EXPORTS int getDevice(std::vector<Info> &oclinfo, int devicetype = CVCL_DEVICE_TYPE_GPU);
 117
 118         //set device you want to use, optional function after getDevice be called
 119         //the devnum is the index of the selected device in DeviceName vector of INfo
 120         CV_EXPORTS void setDevice(Info &oclinfo, int devnum = 0);
 121
 122         //The two functions below enable other opencl program to use ocl module's cl_context and cl_command_queue
 123         //returns cl_context *
 124         CV_EXPORTS void* getoclContext();
 125         //returns cl_command_queue *
 126         CV_EXPORTS void* getoclCommandQueue();
 127
 128         //explicit call clFinish. The global command queue will be used.
 129         CV_EXPORTS void finish();
 130
 131         //this function enable ocl module to use customized cl_context and cl_command_queue
 132         //getDevice also need to be called before this function
 133         CV_EXPORTS void setDeviceEx(Info &oclinfo, void *ctx, void *qu, int devnum = 0);
 134
 135         //returns true when global OpenCL context is initialized
 136         CV_EXPORTS bool initialized();
 137
 138         //////////////////////////////// Error handling ////////////////////////
 139         CV_EXPORTS void error(const char *error_string, const char *file, const int line, const char *func);
 140
 141         //////////////////////////////// OpenCL context ////////////////////////
 142         //This is a global singleton class used to represent a OpenCL context.
 143         class CV_EXPORTS Context
 144         {
 145         protected:
 146             Context();
 147             friend class auto_ptr<Context>;
 148             friend bool initialized();
 149         private:
 150             static auto_ptr<Context> clCxt;
 151             static int val;
 152         public:
 153             ~Context();
 154             void release();
 155             Info::Impl* impl;
 156
 157             static Context* getContext();
 158             static void setContext(Info &oclinfo);
 159
 160             enum {CL_DOUBLE, CL_UNIFIED_MEM, CL_VER_1_2};
 161             bool supportsFeature(int ftype) const;
 162             size_t computeUnits() const;
 163             void* oclContext();
 164             void* oclCommandQueue();
 165         };
 166
 167         //! Calls a kernel, by string. Pass globalThreads = NULL, and cleanUp = true, to finally clean-up without executing.
 168         CV_EXPORTS double openCLExecuteKernelInterop(Context *clCxt ,
 169                                                         const char **source, string kernelName,
 170                                                         size_t globalThreads[3], size_t localThreads[3],
 171                                                         std::vector< std::pair<size_t, const void *> > &args,
 172                                                         int channels, int depth, const char *build_options,
 173                                                         bool finish = true, bool measureKernelTime = false,
 174                                                         bool cleanUp = true);
 175
 176         //! Calls a kernel, by file. Pass globalThreads = NULL, and cleanUp = true, to finally clean-up without executing.
 177         CV_EXPORTS double openCLExecuteKernelInterop(Context *clCxt ,
 178                                                         const char **fileName, const int numFiles, string kernelName,
 179                                                         size_t globalThreads[3], size_t localThreads[3],
 180                                                         std::vector< std::pair<size_t, const void *> > &args,
 181                                                         int channels, int depth, const char *build_options,
 182                                                         bool finish = true, bool measureKernelTime = false,
 183                                                         bool cleanUp = true);
 184
 185         //! Enable or disable OpenCL program binary caching onto local disk
 186         // After a program (*.cl files in opencl/ folder) is built at runtime, we allow the
 187         // compiled OpenCL program to be cached to the path automatically as "path/*.clb"
 188         // binary file, which will be reused when the OpenCV executable is started again.
 189         //
 190         // Caching mode is controlled by the following enums
 191         // Notes
 192         //   1. the feature is by default enabled when OpenCV is built in release mode.
 193         //   2. the CACHE_DEBUG / CACHE_RELEASE flags only effectively work with MSVC compiler;
 194         //      for GNU compilers, the function always treats the build as release mode (enabled by default).
 195         enum
 196         {
 197             CACHE_NONE    = 0,        // do not cache OpenCL binary
 198             CACHE_DEBUG   = 0x1 << 0, // cache OpenCL binary when built in debug mode (only work with MSVC)
 199             CACHE_RELEASE = 0x1 << 1, // default behavior, only cache when built in release mode (only work with MSVC)
 200             CACHE_ALL     = CACHE_DEBUG | CACHE_RELEASE, // always cache opencl binary
 201             CACHE_UPDATE  = 0x1 << 2  // if the binary cache file with the same name is already on the disk, it will be updated.
 202         };
 203         CV_EXPORTS void setBinaryDiskCache(int mode = CACHE_RELEASE, cv::String path = "./");
 204
 205         //! set where binary cache to be saved to
 206         CV_EXPORTS void setBinpath(const char *path);
 207
 208         class CV_EXPORTS oclMatExpr;
 209         //////////////////////////////// oclMat ////////////////////////////////
 210         class CV_EXPORTS oclMat
 211         {
 212         public:
 213             //! default constructor
 214             oclMat();
 215             //! constructs oclMatrix of the specified size and type (_type is CV_8UC1, CV_64FC3, CV_32SC(12) etc.)
 216             oclMat(int rows, int cols, int type);
 217             oclMat(Size size, int type);
 218             //! constucts oclMatrix and fills it with the specified value _s.
 219             oclMat(int rows, int cols, int type, const Scalar &s);
 220             oclMat(Size size, int type, const Scalar &s);
 221             //! copy constructor
 222             oclMat(const oclMat &m);
 223
 224             //! constructor for oclMatrix headers pointing to user-allocated data
 225             oclMat(int rows, int cols, int type, void *data, size_t step = Mat::AUTO_STEP);
 226             oclMat(Size size, int type, void *data, size_t step = Mat::AUTO_STEP);
 227
 228             //! creates a matrix header for a part of the bigger matrix
 229             oclMat(const oclMat &m, const Range &rowRange, const Range &colRange);
 230             oclMat(const oclMat &m, const Rect &roi);
 231
 232             //! builds oclMat from Mat. Perfom blocking upload to device.
 233             explicit oclMat (const Mat &m);
 234
 235             //! destructor - calls release()
 236             ~oclMat();
 237
 238             //! assignment operators
 239             oclMat &operator = (const oclMat &m);
 240             //! assignment operator. Perfom blocking upload to device.
 241             oclMat &operator = (const Mat &m);
 242             oclMat &operator = (const oclMatExpr& expr);
 243
 244             //! pefroms blocking upload data to oclMat.
 245             void upload(const cv::Mat &m);
 246
 247
 248             //! downloads data from device to host memory. Blocking calls.
 249             operator Mat() const;
 250             void download(cv::Mat &m) const;
 251
 252             //! convert to _InputArray
 253             operator _InputArray();
 254
 255             //! convert to _OutputArray
 256             operator _OutputArray();
 257
 258             //! returns a new oclMatrix header for the specified row
 259             oclMat row(int y) const;
 260             //! returns a new oclMatrix header for the specified column
 261             oclMat col(int x) const;
 262             //! ... for the specified row span
 263             oclMat rowRange(int startrow, int endrow) const;
 264             oclMat rowRange(const Range &r) const;
 265             //! ... for the specified column span
 266             oclMat colRange(int startcol, int endcol) const;
 267             oclMat colRange(const Range &r) const;
 268
 269             //! returns deep copy of the oclMatrix, i.e. the data is copied
 270             oclMat clone() const;
 271
 272             //! copies those oclMatrix elements to "m" that are marked with non-zero mask elements.
 273             // It calls m.create(this->size(), this->type()).
 274             // It supports any data type
 275             void copyTo( oclMat &m, const oclMat &mask = oclMat()) const;
 276
 277             //! converts oclMatrix to another datatype with optional scalng. See cvConvertScale.
 278             //It supports 8UC1 8UC4 32SC1 32SC4 32FC1 32FC4
 279             void convertTo( oclMat &m, int rtype, double alpha = 1, double beta = 0 ) const;
 280
 281             void assignTo( oclMat &m, int type = -1 ) const;
 282
 283             //! sets every oclMatrix element to s
 284             //It supports 8UC1 8UC4 32SC1 32SC4 32FC1 32FC4
 285             oclMat& operator = (const Scalar &s);
 286             //! sets some of the oclMatrix elements to s, according to the mask
 287             //It supports 8UC1 8UC4 32SC1 32SC4 32FC1 32FC4
 288             oclMat& setTo(const Scalar &s, const oclMat &mask = oclMat());
 289             //! creates alternative oclMatrix header for the same data, with different
 290             // number of channels and/or different number of rows. see cvReshape.
 291             oclMat reshape(int cn, int rows = 0) const;
 292
 293             //! allocates new oclMatrix data unless the oclMatrix already has specified size and type.
 294             // previous data is unreferenced if needed.
 295             void create(int rows, int cols, int type);
 296             void create(Size size, int type);
 297
 298             //! allocates new oclMatrix with specified device memory type.
 299             void createEx(int rows, int cols, int type, DevMemRW rw_type, DevMemType mem_type);
 300             void createEx(Size size, int type, DevMemRW rw_type, DevMemType mem_type);
 301
 302             //! decreases reference counter;
 303             // deallocate the data when reference counter reaches 0.
 304             void release();
 305
 306             //! swaps with other smart pointer
 307             void swap(oclMat &mat);
 308
 309             //! locates oclMatrix header within a parent oclMatrix. See below
 310             void locateROI( Size &wholeSize, Point &ofs ) const;
 311             //! moves/resizes the current oclMatrix ROI inside the parent oclMatrix.
 312             oclMat& adjustROI( int dtop, int dbottom, int dleft, int dright );
 313             //! extracts a rectangular sub-oclMatrix
 314             // (this is a generalized form of row, rowRange etc.)
 315             oclMat operator()( Range rowRange, Range colRange ) const;
 316             oclMat operator()( const Rect &roi ) const;
 317
 318             oclMat& operator+=( const oclMat& m );
 319             oclMat& operator-=( const oclMat& m );
 320             oclMat& operator*=( const oclMat& m );
 321             oclMat& operator/=( const oclMat& m );
 322
 323             //! returns true if the oclMatrix data is continuous
 324             // (i.e. when there are no gaps between successive rows).
 325             // similar to CV_IS_oclMat_CONT(cvoclMat->type)
 326             bool isContinuous() const;
 327             //! returns element size in bytes,
 328             // similar to CV_ELEM_SIZE(cvMat->type)
 329             size_t elemSize() const;
 330             //! returns the size of element channel in bytes.
 331             size_t elemSize1() const;
 332             //! returns element type, similar to CV_MAT_TYPE(cvMat->type)
 333             int type() const;
 334             //! returns element type, i.e. 8UC3 returns 8UC4 because in ocl
 335             //! 3 channels element actually use 4 channel space
 336             int ocltype() const;
 337             //! returns element type, similar to CV_MAT_DEPTH(cvMat->type)
 338             int depth() const;
 339             //! returns element type, similar to CV_MAT_CN(cvMat->type)
 340             int channels() const;
 341             //! returns element type, return 4 for 3 channels element,
 342             //!becuase 3 channels element actually use 4 channel space
 343             int oclchannels() const;
 344             //! returns step/elemSize1()
 345             size_t step1() const;
 346             //! returns oclMatrix size:
 347             // width == number of columns, height == number of rows
 348             Size size() const;
 349             //! returns true if oclMatrix data is NULL
 350             bool empty() const;
 351
 352             //! returns pointer to y-th row
 353             uchar* ptr(int y = 0);
 354             const uchar *ptr(int y = 0) const;
 355
 356             //! template version of the above method
 357             template<typename _Tp> _Tp *ptr(int y = 0);
 358             template<typename _Tp> const _Tp *ptr(int y = 0) const;
 359
 360             //! matrix transposition
 361             oclMat t() const;
 362
 363             /*! includes several bit-fields:
 364               - the magic signature
 365               - continuity flag
 366               - depth
 367               - number of channels
 368               */
 369             int flags;
 370             //! the number of rows and columns
 371             int rows, cols;
 372             //! a distance between successive rows in bytes; includes the gap if any
 373             size_t step;
 374             //! pointer to the data(OCL memory object)
 375             uchar *data;
 376
 377             //! pointer to the reference counter;
 378             // when oclMatrix points to user-allocated data, the pointer is NULL
 379             int *refcount;
 380
 381             //! helper fields used in locateROI and adjustROI
 382             //datastart and dataend are not used in current version
 383             uchar *datastart;
 384             uchar *dataend;
 385
 386             //! OpenCL context associated with the oclMat object.
 387             Context *clCxt;
 388             //add offset for handle ROI, calculated in byte
 389             int offset;
 390             //add wholerows and wholecols for the whole matrix, datastart and dataend are no longer used
 391             int wholerows;
 392             int wholecols;
 393         };
 394
 395         // convert InputArray/OutputArray to oclMat references
 396         CV_EXPORTS oclMat& getOclMatRef(InputArray src);
 397         CV_EXPORTS oclMat& getOclMatRef(OutputArray src);
 398
 399         ///////////////////// mat split and merge /////////////////////////////////
 400         //! Compose a multi-channel array from several single-channel arrays
 401         // Support all types
 402         CV_EXPORTS void merge(const oclMat *src, size_t n, oclMat &dst);
 403         CV_EXPORTS void merge(const vector<oclMat> &src, oclMat &dst);
 404
 405         //! Divides multi-channel array into several single-channel arrays
 406         // Support all types
 407         CV_EXPORTS void split(const oclMat &src, oclMat *dst);
 408         CV_EXPORTS void split(const oclMat &src, vector<oclMat> &dst);
 409
 410         ////////////////////////////// Arithmetics ///////////////////////////////////
 411
 412         //! adds one matrix to another with scale (dst = src1 * alpha + src2 * beta + gama)
 413         CV_EXPORTS void addWeighted(const oclMat &src1, double  alpha, const oclMat &src2, double beta, double gama, oclMat &dst);
 414
 415         //! adds one matrix to another (dst = src1 + src2)
 416         // supports all data types
 417         CV_EXPORTS void add(const oclMat &src1, const oclMat &src2, oclMat &dst, const oclMat &mask = oclMat());
 418         //! adds scalar to a matrix (dst = src1 + s)
 419         // supports all data types
 420         CV_EXPORTS void add(const oclMat &src1, const Scalar &s, oclMat &dst, const oclMat &mask = oclMat());
 421
 422         //! subtracts one matrix from another (dst = src1 - src2)
 423         // supports all data types
 424         CV_EXPORTS void subtract(const oclMat &src1, const oclMat &src2, oclMat &dst, const oclMat &mask = oclMat());
 425         //! subtracts scalar from a matrix (dst = src1 - s)
 426         // supports all data types
 427         CV_EXPORTS void subtract(const oclMat &src1, const Scalar &s, oclMat &dst, const oclMat &mask = oclMat());
 428
 429         //! computes element-wise product of the two arrays (dst = src1 * scale * src2)
 430         // supports all data types
 431         CV_EXPORTS void multiply(const oclMat &src1, const oclMat &src2, oclMat &dst, double scale = 1);
 432         //! multiplies matrix to a number (dst = scalar * src)
 433         // supports all data types
 434         CV_EXPORTS void multiply(double scalar, const oclMat &src, oclMat &dst);
 435
 436         //! computes element-wise quotient of the two arrays (dst = src1 * scale / src2)
 437         // supports all data types
 438         CV_EXPORTS void divide(const oclMat &src1, const oclMat &src2, oclMat &dst, double scale = 1);
 439         //! computes element-wise quotient of the two arrays (dst = scale / src)
 440         // supports all data types
 441         CV_EXPORTS void divide(double scale, const oclMat &src1, oclMat &dst);
 442
 443         //! compares elements of two arrays (dst = src1 <cmpop> src2)
 444         // supports all data types
 445         CV_EXPORTS void compare(const oclMat &src1, const oclMat &src2, oclMat &dst, int cmpop);
 446
 447         //! transposes the matrix
 448         // supports all data types
 449         CV_EXPORTS void transpose(const oclMat &src, oclMat &dst);
 450
 451         //! computes element-wise absolute difference of two arrays (dst = abs(src1 - src2))
 452         // supports all data types
 453         CV_EXPORTS void absdiff(const oclMat &src1, const oclMat &src2, oclMat &dst);
 454         //! computes element-wise absolute difference of array and scalar (dst = abs(src1 - s))
 455         // supports all data types
 456         CV_EXPORTS void absdiff(const oclMat &src1, const Scalar &s, oclMat &dst);
 457
 458         //! computes mean value and standard deviation of all or selected array elements
 459         // supports except CV_32F,CV_64F
 460         CV_EXPORTS void meanStdDev(const oclMat &mtx, Scalar &mean, Scalar &stddev);
 461
 462         //! computes norm of array
 463         // supports NORM_INF, NORM_L1, NORM_L2
 464         // supports only CV_8UC1 type
 465         CV_EXPORTS double norm(const oclMat &src1, int normType = NORM_L2);
 466
 467         //! computes norm of the difference between two arrays
 468         // supports NORM_INF, NORM_L1, NORM_L2
 469         // supports only CV_8UC1 type
 470         CV_EXPORTS double norm(const oclMat &src1, const oclMat &src2, int normType = NORM_L2);
 471
 472         //! reverses the order of the rows, columns or both in a matrix
 473         // supports all types
 474         CV_EXPORTS void flip(const oclMat &src, oclMat &dst, int flipCode);
 475
 476         //! computes sum of array elements
 477         // disabled until fix crash
 478         // support all types
 479         CV_EXPORTS Scalar sum(const oclMat &m);
 480         CV_EXPORTS Scalar absSum(const oclMat &m);
 481         CV_EXPORTS Scalar sqrSum(const oclMat &m);
 482
 483         //! finds global minimum and maximum array elements and returns their values
 484         // support all C1 types
 485         CV_EXPORTS void minMax(const oclMat &src, double *minVal, double *maxVal = 0, const oclMat &mask = oclMat());
 486         CV_EXPORTS void minMax_buf(const oclMat &src, double *minVal, double *maxVal, const oclMat &mask, oclMat& buf);
 487
 488         //! finds global minimum and maximum array elements and returns their values with locations
 489         // support all C1 types
 490         CV_EXPORTS void minMaxLoc(const oclMat &src, double *minVal, double *maxVal = 0, Point *minLoc = 0, Point *maxLoc = 0,
 491                                   const oclMat &mask = oclMat());
 492
 493         //! counts non-zero array elements
 494         // support all types
 495         CV_EXPORTS int countNonZero(const oclMat &src);
 496
 497         //! transforms 8-bit unsigned integers using lookup table: dst(i)=lut(src(i))
 498         // destination array will have the depth type as lut and the same channels number as source
 499         //It supports 8UC1 8UC4 only
 500         CV_EXPORTS void LUT(const oclMat &src, const oclMat &lut, oclMat &dst);
 501
 502         //! only 8UC1 and 256 bins is supported now
 503         CV_EXPORTS void calcHist(const oclMat &mat_src, oclMat &mat_hist);
 504         //! only 8UC1 and 256 bins is supported now
 505         CV_EXPORTS void equalizeHist(const oclMat &mat_src, oclMat &mat_dst);
 506
 507         //! only 8UC1 is supported now
 508         CV_EXPORTS Ptr<cv::CLAHE> createCLAHE(double clipLimit = 40.0, Size tileGridSize = Size(8, 8));
 509
 510         //! bilateralFilter
 511         // supports 8UC1 8UC4
 512         CV_EXPORTS void bilateralFilter(const oclMat& src, oclMat& dst, int d, double sigmaColor, double sigmaSpace, int borderType=BORDER_DEFAULT);
 513
 514         //! Applies an adaptive bilateral filter to the input image
 515         //  This is not truly a bilateral filter. Instead of using user provided fixed parameters,
 516         //  the function calculates a constant at each window based on local standard deviation,
 517         //  and use this constant to do filtering.
 518         //  supports 8UC1, 8UC3
 519         CV_EXPORTS void adaptiveBilateralFilter(const oclMat& src, oclMat& dst, Size ksize, double sigmaSpace, Point anchor = Point(-1, -1), int borderType=BORDER_DEFAULT);
 520
 521         //! computes exponent of each matrix element (dst = e**src)
 522         // supports only CV_32FC1, CV_64FC1 type
 523         CV_EXPORTS void exp(const oclMat &src, oclMat &dst);
 524
 525         //! computes natural logarithm of absolute value of each matrix element: dst = log(abs(src))
 526         // supports only CV_32FC1, CV_64FC1 type
 527         CV_EXPORTS void log(const oclMat &src, oclMat &dst);
 528
 529         //! computes magnitude of each (x(i), y(i)) vector
 530         // supports only CV_32F, CV_64F type
 531         CV_EXPORTS void magnitude(const oclMat &x, const oclMat &y, oclMat &magnitude);
 532
 533         //! computes angle (angle(i)) of each (x(i), y(i)) vector
 534         // supports only CV_32F, CV_64F type
 535         CV_EXPORTS void phase(const oclMat &x, const oclMat &y, oclMat &angle, bool angleInDegrees = false);
 536
 537         //! the function raises every element of tne input array to p
 538         // support only CV_32F, CV_64F type
 539         CV_EXPORTS void pow(const oclMat &x, double p, oclMat &y);
 540
 541         //! converts Cartesian coordinates to polar
 542         // supports only CV_32F CV_64F type
 543         CV_EXPORTS void cartToPolar(const oclMat &x, const oclMat &y, oclMat &magnitude, oclMat &angle, bool angleInDegrees = false);
 544
 545         //! converts polar coordinates to Cartesian
 546         // supports only CV_32F CV_64F type
 547         CV_EXPORTS void polarToCart(const oclMat &magnitude, const oclMat &angle, oclMat &x, oclMat &y, bool angleInDegrees = false);
 548
 549         //! perfroms per-elements bit-wise inversion
 550         // supports all types
 551         CV_EXPORTS void bitwise_not(const oclMat &src, oclMat &dst);
 552
 553         //! calculates per-element bit-wise disjunction of two arrays
 554         // supports all types
 555         CV_EXPORTS void bitwise_or(const oclMat &src1, const oclMat &src2, oclMat &dst, const oclMat &mask = oclMat());
 556         CV_EXPORTS void bitwise_or(const oclMat &src1, const Scalar &s, oclMat &dst, const oclMat &mask = oclMat());
 557
 558         //! calculates per-element bit-wise conjunction of two arrays
 559         // supports all types
 560         CV_EXPORTS void bitwise_and(const oclMat &src1, const oclMat &src2, oclMat &dst, const oclMat &mask = oclMat());
 561         CV_EXPORTS void bitwise_and(const oclMat &src1, const Scalar &s, oclMat &dst, const oclMat &mask = oclMat());
 562
 563         //! calculates per-element bit-wise "exclusive or" operation
 564         // supports all types
 565         CV_EXPORTS void bitwise_xor(const oclMat &src1, const oclMat &src2, oclMat &dst, const oclMat &mask = oclMat());
 566         CV_EXPORTS void bitwise_xor(const oclMat &src1, const Scalar &s, oclMat &dst, const oclMat &mask = oclMat());
 567
 568         //! Logical operators
 569         CV_EXPORTS oclMat operator ~ (const oclMat &);
 570         CV_EXPORTS oclMat operator | (const oclMat &, const oclMat &);
 571         CV_EXPORTS oclMat operator & (const oclMat &, const oclMat &);
 572         CV_EXPORTS oclMat operator ^ (const oclMat &, const oclMat &);
 573
 574
 575         //! Mathematics operators
 576         CV_EXPORTS oclMatExpr operator + (const oclMat &src1, const oclMat &src2);
 577         CV_EXPORTS oclMatExpr operator - (const oclMat &src1, const oclMat &src2);
 578         CV_EXPORTS oclMatExpr operator * (const oclMat &src1, const oclMat &src2);
 579         CV_EXPORTS oclMatExpr operator / (const oclMat &src1, const oclMat &src2);
 580
 581         //! computes convolution of two images
 582         // support only CV_32FC1 type
 583         CV_EXPORTS void convolve(const oclMat &image, const oclMat &temp1, oclMat &result);
 584
 585         CV_EXPORTS void cvtColor(const oclMat &src, oclMat &dst, int code , int dcn = 0);
 586
 587         CV_EXPORTS void setIdentity(oclMat& src, double val);
 588
 589         //////////////////////////////// Filter Engine ////////////////////////////////
 590
 591         /*!
 592           The Base Class for 1D or Row-wise Filters
 593
 594           This is the base class for linear or non-linear filters that process 1D data.
 595           In particular, such filters are used for the "horizontal" filtering parts in separable filters.
 596           */
 597         class CV_EXPORTS BaseRowFilter_GPU
 598         {
 599         public:
 600             BaseRowFilter_GPU(int ksize_, int anchor_, int bordertype_) : ksize(ksize_), anchor(anchor_), bordertype(bordertype_) {}
 601             virtual ~BaseRowFilter_GPU() {}
 602             virtual void operator()(const oclMat &src, oclMat &dst) = 0;
 603             int ksize, anchor, bordertype;
 604         };
 605
 606         /*!
 607           The Base Class for Column-wise Filters
 608
 609           This is the base class for linear or non-linear filters that process columns of 2D arrays.
 610           Such filters are used for the "vertical" filtering parts in separable filters.
 611           */
 612         class CV_EXPORTS BaseColumnFilter_GPU
 613         {
 614         public:
 615             BaseColumnFilter_GPU(int ksize_, int anchor_, int bordertype_) : ksize(ksize_), anchor(anchor_), bordertype(bordertype_) {}
 616             virtual ~BaseColumnFilter_GPU() {}
 617             virtual void operator()(const oclMat &src, oclMat &dst) = 0;
 618             int ksize, anchor, bordertype;
 619         };
 620
 621         /*!
 622           The Base Class for Non-Separable 2D Filters.
 623
 624           This is the base class for linear or non-linear 2D filters.
 625           */
 626         class CV_EXPORTS BaseFilter_GPU
 627         {
 628         public:
 629             BaseFilter_GPU(const Size &ksize_, const Point &anchor_, const int &borderType_)
 630                 : ksize(ksize_), anchor(anchor_), borderType(borderType_) {}
 631             virtual ~BaseFilter_GPU() {}
 632             virtual void operator()(const oclMat &src, oclMat &dst) = 0;
 633             Size ksize;
 634             Point anchor;
 635             int borderType;
 636         };
 637
 638         /*!
 639           The Base Class for Filter Engine.
 640
 641           The class can be used to apply an arbitrary filtering operation to an image.
 642           It contains all the necessary intermediate buffers.
 643           */
 644         class CV_EXPORTS FilterEngine_GPU
 645         {
 646         public:
 647             virtual ~FilterEngine_GPU() {}
 648
 649             virtual void apply(const oclMat &src, oclMat &dst, Rect roi = Rect(0, 0, -1, -1)) = 0;
 650         };
 651
 652         //! returns the non-separable filter engine with the specified filter
 653         CV_EXPORTS Ptr<FilterEngine_GPU> createFilter2D_GPU(const Ptr<BaseFilter_GPU> filter2D);
 654
 655         //! returns the primitive row filter with the specified kernel
 656         CV_EXPORTS Ptr<BaseRowFilter_GPU> getLinearRowFilter_GPU(int srcType, int bufType, const Mat &rowKernel,
 657                 int anchor = -1, int bordertype = BORDER_DEFAULT);
 658
 659         //! returns the primitive column filter with the specified kernel
 660         CV_EXPORTS Ptr<BaseColumnFilter_GPU> getLinearColumnFilter_GPU(int bufType, int dstType, const Mat &columnKernel,
 661                 int anchor = -1, int bordertype = BORDER_DEFAULT, double delta = 0.0);
 662
 663         //! returns the separable linear filter engine
 664         CV_EXPORTS Ptr<FilterEngine_GPU> createSeparableLinearFilter_GPU(int srcType, int dstType, const Mat &rowKernel,
 665                 const Mat &columnKernel, const Point &anchor = Point(-1, -1), double delta = 0.0, int bordertype = BORDER_DEFAULT);
 666
 667         //! returns the separable filter engine with the specified filters
 668         CV_EXPORTS Ptr<FilterEngine_GPU> createSeparableFilter_GPU(const Ptr<BaseRowFilter_GPU> &rowFilter,
 669                 const Ptr<BaseColumnFilter_GPU> &columnFilter);
 670
 671         //! returns the Gaussian filter engine
 672         CV_EXPORTS Ptr<FilterEngine_GPU> createGaussianFilter_GPU(int type, Size ksize, double sigma1, double sigma2 = 0, int bordertype = BORDER_DEFAULT);
 673
 674         //! returns filter engine for the generalized Sobel operator
 675         CV_EXPORTS Ptr<FilterEngine_GPU> createDerivFilter_GPU( int srcType, int dstType, int dx, int dy, int ksize, int borderType = BORDER_DEFAULT );
 676
 677         //! applies Laplacian operator to the image
 678         // supports only ksize = 1 and ksize = 3 8UC1 8UC4 32FC1 32FC4 data type
 679         CV_EXPORTS void Laplacian(const oclMat &src, oclMat &dst, int ddepth, int ksize = 1, double scale = 1);
 680
 681         //! returns 2D box filter
 682         // supports CV_8UC1 and CV_8UC4 source type, dst type must be the same as source type
 683         CV_EXPORTS Ptr<BaseFilter_GPU> getBoxFilter_GPU(int srcType, int dstType,
 684                 const Size &ksize, Point anchor = Point(-1, -1), int borderType = BORDER_DEFAULT);
 685
 686         //! returns box filter engine
 687         CV_EXPORTS Ptr<FilterEngine_GPU> createBoxFilter_GPU(int srcType, int dstType, const Size &ksize,
 688                 const Point &anchor = Point(-1, -1), int borderType = BORDER_DEFAULT);
 689
 690         //! returns 2D filter with the specified kernel
 691         // supports CV_8UC1 and CV_8UC4 types
 692         CV_EXPORTS Ptr<BaseFilter_GPU> getLinearFilter_GPU(int srcType, int dstType, const Mat &kernel, const Size &ksize,
 693                 const Point &anchor = Point(-1, -1), int borderType = BORDER_DEFAULT);
 694
 695         //! returns the non-separable linear filter engine
 696         CV_EXPORTS Ptr<FilterEngine_GPU> createLinearFilter_GPU(int srcType, int dstType, const Mat &kernel,
 697                 const Point &anchor = Point(-1, -1), int borderType = BORDER_DEFAULT);
 698
 699         //! smooths the image using the normalized box filter
 700         // supports data type: CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4
 701         // supports border type: BORDER_CONSTANT, BORDER_REPLICATE, BORDER_REFLECT,BORDER_REFLECT_101,BORDER_WRAP
 702         CV_EXPORTS void boxFilter(const oclMat &src, oclMat &dst, int ddepth, Size ksize,
 703                                   Point anchor = Point(-1, -1), int borderType = BORDER_DEFAULT);
 704
 705         //! returns 2D morphological filter
 706         //! only MORPH_ERODE and MORPH_DILATE are supported
 707         // supports CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4 types
 708         // kernel must have CV_8UC1 type, one rows and cols == ksize.width * ksize.height
 709         CV_EXPORTS Ptr<BaseFilter_GPU> getMorphologyFilter_GPU(int op, int type, const Mat &kernel, const Size &ksize,
 710                 Point anchor = Point(-1, -1));
 711
 712         //! returns morphological filter engine. Only MORPH_ERODE and MORPH_DILATE are supported.
 713         CV_EXPORTS Ptr<FilterEngine_GPU> createMorphologyFilter_GPU(int op, int type, const Mat &kernel,
 714                 const Point &anchor = Point(-1, -1), int iterations = 1);
 715
 716         //! a synonym for normalized box filter
 717         // supports data type: CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4
 718         // supports border type: BORDER_CONSTANT, BORDER_REPLICATE, BORDER_REFLECT,BORDER_REFLECT_101
 719         static inline void blur(const oclMat &src, oclMat &dst, Size ksize, Point anchor = Point(-1, -1),
 720                                 int borderType = BORDER_CONSTANT)
 721         {
 722             boxFilter(src, dst, -1, ksize, anchor, borderType);
 723         }
 724
 725         //! applies non-separable 2D linear filter to the image
 726         //  Note, at the moment this function only works when anchor point is in the kernel center
 727         //  and kernel size supported is either 3x3 or 5x5; otherwise the function will fail to output valid result
 728         CV_EXPORTS void filter2D(const oclMat &src, oclMat &dst, int ddepth, const Mat &kernel,
 729                                  Point anchor = Point(-1, -1), int borderType = BORDER_DEFAULT);
 730
 731         //! applies separable 2D linear filter to the image
 732         CV_EXPORTS void sepFilter2D(const oclMat &src, oclMat &dst, int ddepth, const Mat &kernelX, const Mat &kernelY,
 733                                     Point anchor = Point(-1, -1), double delta = 0.0, int bordertype = BORDER_DEFAULT);
 734
 735         //! applies generalized Sobel operator to the image
 736         // dst.type must equalize src.type
 737         // supports data type: CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4
 738         // supports border type: BORDER_CONSTANT, BORDER_REPLICATE, BORDER_REFLECT,BORDER_REFLECT_101
 739         CV_EXPORTS void Sobel(const oclMat &src, oclMat &dst, int ddepth, int dx, int dy, int ksize = 3, double scale = 1, double delta = 0.0, int bordertype = BORDER_DEFAULT);
 740
 741         //! applies the vertical or horizontal Scharr operator to the image
 742         // dst.type must equalize src.type
 743         // supports data type: CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4
 744         // supports border type: BORDER_CONSTANT, BORDER_REPLICATE, BORDER_REFLECT,BORDER_REFLECT_101
 745         CV_EXPORTS void Scharr(const oclMat &src, oclMat &dst, int ddepth, int dx, int dy, double scale = 1, double delta = 0.0, int bordertype = BORDER_DEFAULT);
 746
 747         //! smooths the image using Gaussian filter.
 748         // dst.type must equalize src.type
 749         // supports data type: CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4
 750         // supports border type: BORDER_CONSTANT, BORDER_REPLICATE, BORDER_REFLECT,BORDER_REFLECT_101
 751         CV_EXPORTS void GaussianBlur(const oclMat &src, oclMat &dst, Size ksize, double sigma1, double sigma2 = 0, int bordertype = BORDER_DEFAULT);
 752
 753         //! erodes the image (applies the local minimum operator)
 754         // supports data type: CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4
 755         CV_EXPORTS void erode( const oclMat &src, oclMat &dst, const Mat &kernel, Point anchor = Point(-1, -1), int iterations = 1,
 756
 757                                int borderType = BORDER_CONSTANT, const Scalar &borderValue = morphologyDefaultBorderValue());
 758
 759
 760         //! dilates the image (applies the local maximum operator)
 761         // supports data type: CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4
 762         CV_EXPORTS void dilate( const oclMat &src, oclMat &dst, const Mat &kernel, Point anchor = Point(-1, -1), int iterations = 1,
 763
 764                                 int borderType = BORDER_CONSTANT, const Scalar &borderValue = morphologyDefaultBorderValue());
 765
 766
 767         //! applies an advanced morphological operation to the image
 768         CV_EXPORTS void morphologyEx( const oclMat &src, oclMat &dst, int op, const Mat &kernel, Point anchor = Point(-1, -1), int iterations = 1,
 769
 770                                       int borderType = BORDER_CONSTANT, const Scalar &borderValue = morphologyDefaultBorderValue());
 771
 772
 773         ////////////////////////////// Image processing //////////////////////////////
 774         //! Does mean shift filtering on GPU.
 775         CV_EXPORTS void meanShiftFiltering(const oclMat &src, oclMat &dst, int sp, int sr,
 776                                            TermCriteria criteria = TermCriteria(TermCriteria::MAX_ITER + TermCriteria::EPS, 5, 1));
 777
 778         //! Does mean shift procedure on GPU.
 779         CV_EXPORTS void meanShiftProc(const oclMat &src, oclMat &dstr, oclMat &dstsp, int sp, int sr,
 780                                       TermCriteria criteria = TermCriteria(TermCriteria::MAX_ITER + TermCriteria::EPS, 5, 1));
 781
 782         //! Does mean shift segmentation with elimiation of small regions.
 783         CV_EXPORTS void meanShiftSegmentation(const oclMat &src, Mat &dst, int sp, int sr, int minsize,
 784                                               TermCriteria criteria = TermCriteria(TermCriteria::MAX_ITER + TermCriteria::EPS, 5, 1));
 785
 786         //! applies fixed threshold to the image.
 787         // supports CV_8UC1 and CV_32FC1 data type
 788         // supports threshold type: THRESH_BINARY, THRESH_BINARY_INV, THRESH_TRUNC, THRESH_TOZERO, THRESH_TOZERO_INV
 789         CV_EXPORTS double threshold(const oclMat &src, oclMat &dst, double thresh, double maxVal, int type = THRESH_TRUNC);
 790
 791         //! resizes the image
 792         // Supports INTER_NEAREST, INTER_LINEAR
 793         // supports CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4 types
 794         CV_EXPORTS void resize(const oclMat &src, oclMat &dst, Size dsize, double fx = 0, double fy = 0, int interpolation = INTER_LINEAR);
 795
 796         //! Applies a generic geometrical transformation to an image.
 797
 798         // Supports INTER_NEAREST, INTER_LINEAR.
 799
 800         // Map1 supports CV_16SC2, CV_32FC2  types.
 801
 802         // Src supports CV_8UC1, CV_8UC2, CV_8UC4.
 803
 804         CV_EXPORTS void remap(const oclMat &src, oclMat &dst, oclMat &map1, oclMat &map2, int interpolation, int bordertype, const Scalar &value = Scalar());
 805
 806         //! copies 2D array to a larger destination array and pads borders with user-specifiable constant
 807         // supports CV_8UC1, CV_8UC4, CV_32SC1 types
 808         CV_EXPORTS void copyMakeBorder(const oclMat &src, oclMat &dst, int top, int bottom, int left, int right, int boardtype, const Scalar &value = Scalar());
 809
 810         //! Smoothes image using median filter
 811         // The source 1- or 4-channel image. When m is 3 or 5, the image depth should be CV 8U or CV 32F.
 812         CV_EXPORTS void medianFilter(const oclMat &src, oclMat &dst, int m);
 813
 814         //! warps the image using affine transformation
 815         // Supports INTER_NEAREST, INTER_LINEAR, INTER_CUBIC
 816         // supports CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4 types
 817         CV_EXPORTS void warpAffine(const oclMat &src, oclMat &dst, const Mat &M, Size dsize, int flags = INTER_LINEAR);
 818
 819         //! warps the image using perspective transformation
 820         // Supports INTER_NEAREST, INTER_LINEAR, INTER_CUBIC
 821         // supports CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4 types
 822         CV_EXPORTS void warpPerspective(const oclMat &src, oclMat &dst, const Mat &M, Size dsize, int flags = INTER_LINEAR);
 823
 824         //! computes the integral image and integral for the squared image
 825         // sum will have CV_32S type, sqsum - CV32F type
 826         // supports only CV_8UC1 source type
 827         CV_EXPORTS void integral(const oclMat &src, oclMat &sum, oclMat &sqsum);
 828         CV_EXPORTS void integral(const oclMat &src, oclMat &sum);
 829         CV_EXPORTS void cornerHarris(const oclMat &src, oclMat &dst, int blockSize, int ksize, double k, int bordertype = cv::BORDER_DEFAULT);
 830         CV_EXPORTS void cornerHarris_dxdy(const oclMat &src, oclMat &dst, oclMat &Dx, oclMat &Dy,
 831             int blockSize, int ksize, double k, int bordertype = cv::BORDER_DEFAULT);
 832         CV_EXPORTS void cornerMinEigenVal(const oclMat &src, oclMat &dst, int blockSize, int ksize, int bordertype = cv::BORDER_DEFAULT);
 833         CV_EXPORTS void cornerMinEigenVal_dxdy(const oclMat &src, oclMat &dst, oclMat &Dx, oclMat &Dy,
 834             int blockSize, int ksize, int bordertype = cv::BORDER_DEFAULT);
 835
 836         /////////////////////////////////// ML ///////////////////////////////////////////
 837
 838         //! Compute closest centers for each lines in source and lable it after center's index
 839         // supports CV_32FC1/CV_32FC2/CV_32FC4 data type
 840         CV_EXPORTS void distanceToCenters(oclMat &dists, oclMat &labels, const oclMat &src, const oclMat &centers);
 841
 842         //!Does k-means procedure on GPU
 843         // supports CV_32FC1/CV_32FC2/CV_32FC4 data type
 844         CV_EXPORTS double kmeans(const oclMat &src, int K, oclMat &bestLabels,
 845                                      TermCriteria criteria, int attemps, int flags, oclMat &centers);
 846
 847
 848         ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 849         ///////////////////////////////////////////CascadeClassifier//////////////////////////////////////////////////////////////////
 850         ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 851
 852         class CV_EXPORTS_W OclCascadeClassifier : public  cv::CascadeClassifier
 853         {
 854         public:
 855             OclCascadeClassifier() {};
 856             ~OclCascadeClassifier() {};
 857
 858             CvSeq* oclHaarDetectObjects(oclMat &gimg, CvMemStorage *storage, double scaleFactor,
 859                                         int minNeighbors, int flags, CvSize minSize = cvSize(0, 0), CvSize maxSize = cvSize(0, 0));
 860         };
 861
 862         class CV_EXPORTS OclCascadeClassifierBuf : public  cv::CascadeClassifier
 863         {
 864         public:
 865             OclCascadeClassifierBuf() :
 866                 m_flags(0), initialized(false), m_scaleFactor(0), buffers(NULL) {}
 867
 868             ~OclCascadeClassifierBuf() { release(); }
 869
 870             void detectMultiScale(oclMat &image, CV_OUT std::vector<cv::Rect>& faces,
 871                                   double scaleFactor = 1.1, int minNeighbors = 3, int flags = 0,
 872                                   Size minSize = Size(), Size maxSize = Size());
 873             void release();
 874
 875         private:
 876             void Init(const int rows, const int cols, double scaleFactor, int flags,
 877                       const int outputsz, const size_t localThreads[],
 878                       CvSize minSize, CvSize maxSize);
 879             void CreateBaseBufs(const int datasize, const int totalclassifier, const int flags, const int outputsz);
 880             void CreateFactorRelatedBufs(const int rows, const int cols, const int flags,
 881                                          const double scaleFactor, const size_t localThreads[],
 882                                          CvSize minSize, CvSize maxSize);
 883             void GenResult(CV_OUT std::vector<cv::Rect>& faces, const std::vector<cv::Rect> &rectList, const std::vector<int> &rweights);
 884
 885             int m_rows;
 886             int m_cols;
 887             int m_flags;
 888             int m_loopcount;
 889             int m_nodenum;
 890             bool findBiggestObject;
 891             bool initialized;
 892             double m_scaleFactor;
 893             Size m_minSize;
 894             Size m_maxSize;
 895             vector<CvSize> sizev;
 896             vector<float> scalev;
 897             oclMat gimg1, gsum, gsqsum;
 898             void * buffers;
 899         };
 900
 901
 902         /////////////////////////////// Pyramid /////////////////////////////////////
 903         CV_EXPORTS void pyrDown(const oclMat &src, oclMat &dst);
 904
 905         //! upsamples the source image and then smoothes it
 906         CV_EXPORTS void pyrUp(const oclMat &src, oclMat &dst);
 907
 908         //! performs linear blending of two images
 909         //! to avoid accuracy errors sum of weigths shouldn't be very close to zero
 910         // supports only CV_8UC1 source type
 911         CV_EXPORTS void blendLinear(const oclMat &img1, const oclMat &img2, const oclMat &weights1, const oclMat &weights2, oclMat &result);
 912
 913         //! computes vertical sum, supports only CV_32FC1 images
 914         CV_EXPORTS void columnSum(const oclMat &src, oclMat &sum);
 915
 916         ///////////////////////////////////////// match_template /////////////////////////////////////////////////////////////
 917         struct CV_EXPORTS MatchTemplateBuf
 918         {
 919             Size user_block_size;
 920             oclMat imagef, templf;
 921             std::vector<oclMat> images;
 922             std::vector<oclMat> image_sums;
 923             std::vector<oclMat> image_sqsums;
 924         };
 925
 926         //! computes the proximity map for the raster template and the image where the template is searched for
 927         // Supports TM_SQDIFF, TM_SQDIFF_NORMED, TM_CCORR, TM_CCORR_NORMED, TM_CCOEFF, TM_CCOEFF_NORMED for type 8UC1 and 8UC4
 928         // Supports TM_SQDIFF, TM_CCORR for type 32FC1 and 32FC4
 929         CV_EXPORTS void matchTemplate(const oclMat &image, const oclMat &templ, oclMat &result, int method);
 930
 931         //! computes the proximity map for the raster template and the image where the template is searched for
 932         // Supports TM_SQDIFF, TM_SQDIFF_NORMED, TM_CCORR, TM_CCORR_NORMED, TM_CCOEFF, TM_CCOEFF_NORMED for type 8UC1 and 8UC4
 933         // Supports TM_SQDIFF, TM_CCORR for type 32FC1 and 32FC4
 934         CV_EXPORTS void matchTemplate(const oclMat &image, const oclMat &templ, oclMat &result, int method, MatchTemplateBuf &buf);
 935
 936         ///////////////////////////////////////////// Canny /////////////////////////////////////////////
 937         struct CV_EXPORTS CannyBuf;
 938         //! compute edges of the input image using Canny operator
 939         // Support CV_8UC1 only
 940         CV_EXPORTS void Canny(const oclMat &image, oclMat &edges, double low_thresh, double high_thresh, int apperture_size = 3, bool L2gradient = false);
 941         CV_EXPORTS void Canny(const oclMat &image, CannyBuf &buf, oclMat &edges, double low_thresh, double high_thresh, int apperture_size = 3, bool L2gradient = false);
 942         CV_EXPORTS void Canny(const oclMat &dx, const oclMat &dy, oclMat &edges, double low_thresh, double high_thresh, bool L2gradient = false);
 943         CV_EXPORTS void Canny(const oclMat &dx, const oclMat &dy, CannyBuf &buf, oclMat &edges, double low_thresh, double high_thresh, bool L2gradient = false);
 944
 945         struct CV_EXPORTS CannyBuf
 946         {
 947             CannyBuf() : counter(NULL) {}
 948             ~CannyBuf()
 949             {
 950                 release();
 951             }
 952             explicit CannyBuf(const Size &image_size, int apperture_size = 3) : counter(NULL)
 953             {
 954                 create(image_size, apperture_size);
 955             }
 956             CannyBuf(const oclMat &dx_, const oclMat &dy_);
 957
 958             void create(const Size &image_size, int apperture_size = 3);
 959             void release();
 960             oclMat dx, dy;
 961             oclMat dx_buf, dy_buf;
 962             oclMat edgeBuf;
 963             oclMat trackBuf1, trackBuf2;
 964             void *counter;
 965             Ptr<FilterEngine_GPU> filterDX, filterDY;
 966         };
 967
 968         ///////////////////////////////////////// clAmdFft related /////////////////////////////////////////
 969         //! Performs a forward or inverse discrete Fourier transform (1D or 2D) of floating point matrix.
 970         //! Param dft_size is the size of DFT transform.
 971         //!
 972         //! For complex-to-real transform it is assumed that the source matrix is packed in CLFFT's format.
 973         // support src type of CV32FC1, CV32FC2
 974         // support flags: DFT_INVERSE, DFT_REAL_OUTPUT, DFT_COMPLEX_OUTPUT, DFT_ROWS
 975         // dft_size is the size of original input, which is used for transformation from complex to real.
 976         // dft_size must be powers of 2, 3 and 5
 977         // real to complex dft requires at least v1.8 clAmdFft
 978         // real to complex dft output is not the same with cpu version
 979         // real to complex and complex to real does not support DFT_ROWS
 980         CV_EXPORTS void dft(const oclMat &src, oclMat &dst, Size dft_size = Size(), int flags = 0);
 981
 982         //! implements generalized matrix product algorithm GEMM from BLAS
 983         // The functionality requires clAmdBlas library
 984         // only support type CV_32FC1
 985         // flag GEMM_3_T is not supported
 986         CV_EXPORTS void gemm(const oclMat &src1, const oclMat &src2, double alpha,
 987                              const oclMat &src3, double beta, oclMat &dst, int flags = 0);
 988
 989         //////////////// HOG (Histogram-of-Oriented-Gradients) Descriptor and Object Detector //////////////
 990         struct CV_EXPORTS HOGDescriptor
 991         {
 992             enum { DEFAULT_WIN_SIGMA = -1 };
 993             enum { DEFAULT_NLEVELS = 64 };
 994             enum { DESCR_FORMAT_ROW_BY_ROW, DESCR_FORMAT_COL_BY_COL };
 995             HOGDescriptor(Size win_size = Size(64, 128), Size block_size = Size(16, 16),
 996                           Size block_stride = Size(8, 8), Size cell_size = Size(8, 8),
 997                           int nbins = 9, double win_sigma = DEFAULT_WIN_SIGMA,
 998                           double threshold_L2hys = 0.2, bool gamma_correction = true,
 999                           int nlevels = DEFAULT_NLEVELS);
1000
1001             size_t getDescriptorSize() const;
1002             size_t getBlockHistogramSize() const;
1003             void setSVMDetector(const vector<float> &detector);
1004             static vector<float> getDefaultPeopleDetector();
1005             static vector<float> getPeopleDetector48x96();
1006             static vector<float> getPeopleDetector64x128();
1007             void detect(const oclMat &img, vector<Point> &found_locations,
1008                         double hit_threshold = 0, Size win_stride = Size(),
1009                         Size padding = Size());
1010             void detectMultiScale(const oclMat &img, vector<Rect> &found_locations,
1011                                   double hit_threshold = 0, Size win_stride = Size(),
1012                                   Size padding = Size(), double scale0 = 1.05,
1013                                   int group_threshold = 2);
1014             void getDescriptors(const oclMat &img, Size win_stride,
1015                                 oclMat &descriptors,
1016                                 int descr_format = DESCR_FORMAT_COL_BY_COL);
1017             Size win_size;
1018             Size block_size;
1019             Size block_stride;
1020             Size cell_size;
1021
1022             int nbins;
1023             double win_sigma;
1024             double threshold_L2hys;
1025             bool gamma_correction;
1026             int nlevels;
1027
1028         protected:
1029             // initialize buffers; only need to do once in case of multiscale detection
1030             void init_buffer(const oclMat &img, Size win_stride);
1031             void computeBlockHistograms(const oclMat &img);
1032             void computeGradient(const oclMat &img, oclMat &grad, oclMat &qangle);
1033             double getWinSigma() const;
1034             bool checkDetectorSize() const;
1035
1036             static int numPartsWithin(int size, int part_size, int stride);
1037             static Size numPartsWithin(Size size, Size part_size, Size stride);
1038
1039             // Coefficients of the separating plane
1040             float free_coef;
1041             oclMat detector;
1042             // Results of the last classification step
1043             oclMat labels;
1044             Mat labels_host;
1045             // Results of the last histogram evaluation step
1046             oclMat block_hists;
1047             // Gradients conputation results
1048             oclMat grad, qangle;
1049             // scaled image
1050             oclMat image_scale;
1051             // effect size of input image (might be different from original size after scaling)
1052             Size effect_size;
1053         };
1054
1055
1056         ////////////////////////feature2d_ocl/////////////////
1057         /****************************************************************************************\
1058         *                                      Distance                                          *
1059         \****************************************************************************************/
1060         template<typename T>
1061         struct CV_EXPORTS Accumulator
1062         {
1063             typedef T Type;
1064         };
1065         template<> struct Accumulator<unsigned char>
1066         {
1067             typedef float Type;
1068         };
1069         template<> struct Accumulator<unsigned short>
1070         {
1071             typedef float Type;
1072         };
1073         template<> struct Accumulator<char>
1074         {
1075             typedef float Type;
1076         };
1077         template<> struct Accumulator<short>
1078         {
1079             typedef float Type;
1080         };
1081
1082         /*
1083          * Manhattan distance (city block distance) functor
1084          */
1085         template<class T>
1086         struct CV_EXPORTS L1
1087         {
1088             enum { normType = NORM_L1 };
1089             typedef T ValueType;
1090             typedef typename Accumulator<T>::Type ResultType;
1091
1092             ResultType operator()( const T *a, const T *b, int size ) const
1093             {
1094                 return normL1<ValueType, ResultType>(a, b, size);
1095             }
1096         };
1097
1098         /*
1099          * Euclidean distance functor
1100          */
1101         template<class T>
1102         struct CV_EXPORTS L2
1103         {
1104             enum { normType = NORM_L2 };
1105             typedef T ValueType;
1106             typedef typename Accumulator<T>::Type ResultType;
1107
1108             ResultType operator()( const T *a, const T *b, int size ) const
1109             {
1110                 return (ResultType)sqrt((double)normL2Sqr<ValueType, ResultType>(a, b, size));
1111             }
1112         };
1113
1114         /*
1115          * Hamming distance functor - counts the bit differences between two strings - useful for the Brief descriptor
1116          * bit count of A exclusive XOR'ed with B
1117          */
1118         struct CV_EXPORTS Hamming
1119         {
1120             enum { normType = NORM_HAMMING };
1121             typedef unsigned char ValueType;
1122             typedef int ResultType;
1123
1124             /** this will count the bits in a ^ b
1125              */
1126             ResultType operator()( const unsigned char *a, const unsigned char *b, int size ) const
1127             {
1128                 return normHamming(a, b, size);
1129             }
1130         };
1131
1132         ////////////////////////////////// BruteForceMatcher //////////////////////////////////
1133
1134         class CV_EXPORTS BruteForceMatcher_OCL_base
1135         {
1136         public:
1137             enum DistType {L1Dist = 0, L2Dist, HammingDist};
1138             explicit BruteForceMatcher_OCL_base(DistType distType = L2Dist);
1139             // Add descriptors to train descriptor collection
1140             void add(const std::vector<oclMat> &descCollection);
1141             // Get train descriptors collection
1142             const std::vector<oclMat> &getTrainDescriptors() const;
1143             // Clear train descriptors collection
1144             void clear();
1145             // Return true if there are not train descriptors in collection
1146             bool empty() const;
1147
1148             // Return true if the matcher supports mask in match methods
1149             bool isMaskSupported() const;
1150
1151             // Find one best match for each query descriptor
1152             void matchSingle(const oclMat &query, const oclMat &train,
1153                              oclMat &trainIdx, oclMat &distance,
1154                              const oclMat &mask = oclMat());
1155
1156             // Download trainIdx and distance and convert it to CPU vector with DMatch
1157             static void matchDownload(const oclMat &trainIdx, const oclMat &distance, std::vector<DMatch> &matches);
1158             // Convert trainIdx and distance to vector with DMatch
1159             static void matchConvert(const Mat &trainIdx, const Mat &distance, std::vector<DMatch> &matches);
1160
1161             // Find one best match for each query descriptor
1162             void match(const oclMat &query, const oclMat &train, std::vector<DMatch> &matches, const oclMat &mask = oclMat());
1163
1164             // Make gpu collection of trains and masks in suitable format for matchCollection function
1165             void makeGpuCollection(oclMat &trainCollection, oclMat &maskCollection, const std::vector<oclMat> &masks = std::vector<oclMat>());
1166
1167
1168             // Find one best match from train collection for each query descriptor
1169             void matchCollection(const oclMat &query, const oclMat &trainCollection,
1170                                  oclMat &trainIdx, oclMat &imgIdx, oclMat &distance,
1171                                  const oclMat &masks = oclMat());
1172
1173             // Download trainIdx, imgIdx and distance and convert it to vector with DMatch
1174             static void matchDownload(const oclMat &trainIdx, const oclMat &imgIdx, const oclMat &distance, std::vector<DMatch> &matches);
1175             // Convert trainIdx, imgIdx and distance to vector with DMatch
1176             static void matchConvert(const Mat &trainIdx, const Mat &imgIdx, const Mat &distance, std::vector<DMatch> &matches);
1177
1178             // Find one best match from train collection for each query descriptor.
1179             void match(const oclMat &query, std::vector<DMatch> &matches, const std::vector<oclMat> &masks = std::vector<oclMat>());
1180
1181             // Find k best matches for each query descriptor (in increasing order of distances)
1182             void knnMatchSingle(const oclMat &query, const oclMat &train,
1183                                 oclMat &trainIdx, oclMat &distance, oclMat &allDist, int k,
1184                                 const oclMat &mask = oclMat());
1185
1186             // Download trainIdx and distance and convert it to vector with DMatch
1187             // compactResult is used when mask is not empty. If compactResult is false matches
1188             // vector will have the same size as queryDescriptors rows. If compactResult is true
1189             // matches vector will not contain matches for fully masked out query descriptors.
1190             static void knnMatchDownload(const oclMat &trainIdx, const oclMat &distance,
1191                                          std::vector< std::vector<DMatch> > &matches, bool compactResult = false);
1192
1193             // Convert trainIdx and distance to vector with DMatch
1194             static void knnMatchConvert(const Mat &trainIdx, const Mat &distance,
1195                                         std::vector< std::vector<DMatch> > &matches, bool compactResult = false);
1196
1197             // Find k best matches for each query descriptor (in increasing order of distances).
1198             // compactResult is used when mask is not empty. If compactResult is false matches
1199             // vector will have the same size as queryDescriptors rows. If compactResult is true
1200             // matches vector will not contain matches for fully masked out query descriptors.
1201             void knnMatch(const oclMat &query, const oclMat &train,
1202                           std::vector< std::vector<DMatch> > &matches, int k, const oclMat &mask = oclMat(),
1203                           bool compactResult = false);
1204
1205             // Find k best matches from train collection for each query descriptor (in increasing order of distances)
1206             void knnMatch2Collection(const oclMat &query, const oclMat &trainCollection,
1207                                      oclMat &trainIdx, oclMat &imgIdx, oclMat &distance,
1208                                      const oclMat &maskCollection = oclMat());
1209
1210             // Download trainIdx and distance and convert it to vector with DMatch
1211             // compactResult is used when mask is not empty. If compactResult is false matches
1212             // vector will have the same size as queryDescriptors rows. If compactResult is true
1213             // matches vector will not contain matches for fully masked out query descriptors.
1214             static void knnMatch2Download(const oclMat &trainIdx, const oclMat &imgIdx, const oclMat &distance,
1215                                           std::vector< std::vector<DMatch> > &matches, bool compactResult = false);
1216
1217             // Convert trainIdx and distance to vector with DMatch
1218             static void knnMatch2Convert(const Mat &trainIdx, const Mat &imgIdx, const Mat &distance,
1219                                          std::vector< std::vector<DMatch> > &matches, bool compactResult = false);
1220
1221             // Find k best matches  for each query descriptor (in increasing order of distances).
1222             // compactResult is used when mask is not empty. If compactResult is false matches
1223             // vector will have the same size as queryDescriptors rows. If compactResult is true
1224             // matches vector will not contain matches for fully masked out query descriptors.
1225             void knnMatch(const oclMat &query, std::vector< std::vector<DMatch> > &matches, int k,
1226                           const std::vector<oclMat> &masks = std::vector<oclMat>(), bool compactResult = false);
1227
1228             // Find best matches for each query descriptor which have distance less than maxDistance.
1229             // nMatches.at<int>(0, queryIdx) will contain matches count for queryIdx.
1230             // carefully nMatches can be greater than trainIdx.cols - it means that matcher didn't find all matches,
1231             // because it didn't have enough memory.
1232             // If trainIdx is empty, then trainIdx and distance will be created with size nQuery x max((nTrain / 100), 10),
1233             // otherwize user can pass own allocated trainIdx and distance with size nQuery x nMaxMatches
1234             // Matches doesn't sorted.
1235             void radiusMatchSingle(const oclMat &query, const oclMat &train,
1236                                    oclMat &trainIdx, oclMat &distance, oclMat &nMatches, float maxDistance,
1237                                    const oclMat &mask = oclMat());
1238
1239             // Download trainIdx, nMatches and distance and convert it to vector with DMatch.
1240             // matches will be sorted in increasing order of distances.
1241             // compactResult is used when mask is not empty. If compactResult is false matches
1242             // vector will have the same size as queryDescriptors rows. If compactResult is true
1243             // matches vector will not contain matches for fully masked out query descriptors.
1244             static void radiusMatchDownload(const oclMat &trainIdx, const oclMat &distance, const oclMat &nMatches,
1245                                             std::vector< std::vector<DMatch> > &matches, bool compactResult = false);
1246             // Convert trainIdx, nMatches and distance to vector with DMatch.
1247             static void radiusMatchConvert(const Mat &trainIdx, const Mat &distance, const Mat &nMatches,
1248                                            std::vector< std::vector<DMatch> > &matches, bool compactResult = false);
1249             // Find best matches for each query descriptor which have distance less than maxDistance
1250             // in increasing order of distances).
1251             void radiusMatch(const oclMat &query, const oclMat &train,
1252                              std::vector< std::vector<DMatch> > &matches, float maxDistance,
1253                              const oclMat &mask = oclMat(), bool compactResult = false);
1254             // Find best matches for each query descriptor which have distance less than maxDistance.
1255             // If trainIdx is empty, then trainIdx and distance will be created with size nQuery x max((nQuery / 100), 10),
1256             // otherwize user can pass own allocated trainIdx and distance with size nQuery x nMaxMatches
1257             // Matches doesn't sorted.
1258             void radiusMatchCollection(const oclMat &query, oclMat &trainIdx, oclMat &imgIdx, oclMat &distance, oclMat &nMatches, float maxDistance,
1259                                        const std::vector<oclMat> &masks = std::vector<oclMat>());
1260             // Download trainIdx, imgIdx, nMatches and distance and convert it to vector with DMatch.
1261             // matches will be sorted in increasing order of distances.
1262             // compactResult is used when mask is not empty. If compactResult is false matches
1263             // vector will have the same size as queryDescriptors rows. If compactResult is true
1264             // matches vector will not contain matches for fully masked out query descriptors.
1265             static void radiusMatchDownload(const oclMat &trainIdx, const oclMat &imgIdx, const oclMat &distance, const oclMat &nMatches,
1266                                             std::vector< std::vector<DMatch> > &matches, bool compactResult = false);
1267             // Convert trainIdx, nMatches and distance to vector with DMatch.
1268             static void radiusMatchConvert(const Mat &trainIdx, const Mat &imgIdx, const Mat &distance, const Mat &nMatches,
1269                                            std::vector< std::vector<DMatch> > &matches, bool compactResult = false);
1270             // Find best matches from train collection for each query descriptor which have distance less than
1271             // maxDistance (in increasing order of distances).
1272             void radiusMatch(const oclMat &query, std::vector< std::vector<DMatch> > &matches, float maxDistance,
1273                              const std::vector<oclMat> &masks = std::vector<oclMat>(), bool compactResult = false);
1274             DistType distType;
1275         private:
1276             std::vector<oclMat> trainDescCollection;
1277         };
1278
1279         template <class Distance>
1280         class CV_EXPORTS BruteForceMatcher_OCL;
1281
1282         template <typename T>
1283         class CV_EXPORTS BruteForceMatcher_OCL< L1<T> > : public BruteForceMatcher_OCL_base
1284         {
1285         public:
1286             explicit BruteForceMatcher_OCL() : BruteForceMatcher_OCL_base(L1Dist) {}
1287             explicit BruteForceMatcher_OCL(L1<T> /*d*/) : BruteForceMatcher_OCL_base(L1Dist) {}
1288         };
1289
1290         template <typename T>
1291         class CV_EXPORTS BruteForceMatcher_OCL< L2<T> > : public BruteForceMatcher_OCL_base
1292         {
1293         public:
1294             explicit BruteForceMatcher_OCL() : BruteForceMatcher_OCL_base(L2Dist) {}
1295             explicit BruteForceMatcher_OCL(L2<T> /*d*/) : BruteForceMatcher_OCL_base(L2Dist) {}
1296         };
1297
1298         template <> class CV_EXPORTS BruteForceMatcher_OCL< Hamming > : public BruteForceMatcher_OCL_base
1299         {
1300         public:
1301             explicit BruteForceMatcher_OCL() : BruteForceMatcher_OCL_base(HammingDist) {}
1302             explicit BruteForceMatcher_OCL(Hamming /*d*/) : BruteForceMatcher_OCL_base(HammingDist) {}
1303         };
1304
1305         class CV_EXPORTS BFMatcher_OCL : public BruteForceMatcher_OCL_base
1306         {
1307         public:
1308             explicit BFMatcher_OCL(int norm = NORM_L2) : BruteForceMatcher_OCL_base(norm == NORM_L1 ? L1Dist : norm == NORM_L2 ? L2Dist : HammingDist) {}
1309         };
1310
1311         class CV_EXPORTS GoodFeaturesToTrackDetector_OCL
1312         {
1313         public:
1314             explicit GoodFeaturesToTrackDetector_OCL(int maxCorners = 1000, double qualityLevel = 0.01, double minDistance = 0.0,
1315                 int blockSize = 3, bool useHarrisDetector = false, double harrisK = 0.04);
1316
1317             //! return 1 rows matrix with CV_32FC2 type
1318             void operator ()(const oclMat& image, oclMat& corners, const oclMat& mask = oclMat());
1319             //! download points of type Point2f to a vector. the vector's content will be erased
1320             void downloadPoints(const oclMat &points, vector<Point2f> &points_v);
1321
1322             int maxCorners;
1323             double qualityLevel;
1324             double minDistance;
1325
1326             int blockSize;
1327             bool useHarrisDetector;
1328             double harrisK;
1329             void releaseMemory()
1330             {
1331                 Dx_.release();
1332                 Dy_.release();
1333                 eig_.release();
1334                 minMaxbuf_.release();
1335                 tmpCorners_.release();
1336             }
1337         private:
1338             oclMat Dx_;
1339             oclMat Dy_;
1340             oclMat eig_;
1341             oclMat minMaxbuf_;
1342             oclMat tmpCorners_;
1343         };
1344
1345         inline GoodFeaturesToTrackDetector_OCL::GoodFeaturesToTrackDetector_OCL(int maxCorners_, double qualityLevel_, double minDistance_,
1346             int blockSize_, bool useHarrisDetector_, double harrisK_)
1347         {
1348             maxCorners = maxCorners_;
1349             qualityLevel = qualityLevel_;
1350             minDistance = minDistance_;
1351             blockSize = blockSize_;
1352             useHarrisDetector = useHarrisDetector_;
1353             harrisK = harrisK_;
1354         }
1355
1356         /////////////////////////////// PyrLKOpticalFlow /////////////////////////////////////
1357         class CV_EXPORTS PyrLKOpticalFlow
1358         {
1359         public:
1360             PyrLKOpticalFlow()
1361             {
1362                 winSize = Size(21, 21);
1363                 maxLevel = 3;
1364                 iters = 30;
1365                 derivLambda = 0.5;
1366                 useInitialFlow = false;
1367                 minEigThreshold = 1e-4f;
1368                 getMinEigenVals = false;
1369                 isDeviceArch11_ = false;
1370             }
1371
1372             void sparse(const oclMat &prevImg, const oclMat &nextImg, const oclMat &prevPts, oclMat &nextPts,
1373                         oclMat &status, oclMat *err = 0);
1374             void dense(const oclMat &prevImg, const oclMat &nextImg, oclMat &u, oclMat &v, oclMat *err = 0);
1375             Size winSize;
1376             int maxLevel;
1377             int iters;
1378             double derivLambda;
1379             bool useInitialFlow;
1380             float minEigThreshold;
1381             bool getMinEigenVals;
1382             void releaseMemory()
1383             {
1384                 dx_calcBuf_.release();
1385                 dy_calcBuf_.release();
1386
1387                 prevPyr_.clear();
1388                 nextPyr_.clear();
1389
1390                 dx_buf_.release();
1391                 dy_buf_.release();
1392             }
1393         private:
1394             void calcSharrDeriv(const oclMat &src, oclMat &dx, oclMat &dy);
1395             void buildImagePyramid(const oclMat &img0, vector<oclMat> &pyr, bool withBorder);
1396
1397             oclMat dx_calcBuf_;
1398             oclMat dy_calcBuf_;
1399
1400             vector<oclMat> prevPyr_;
1401             vector<oclMat> nextPyr_;
1402
1403             oclMat dx_buf_;
1404             oclMat dy_buf_;
1405             oclMat uPyr_[2];
1406             oclMat vPyr_[2];
1407             bool isDeviceArch11_;
1408         };
1409
1410         class CV_EXPORTS FarnebackOpticalFlow
1411         {
1412         public:
1413             FarnebackOpticalFlow();
1414
1415             int numLevels;
1416             double pyrScale;
1417             bool fastPyramids;
1418             int winSize;
1419             int numIters;
1420             int polyN;
1421             double polySigma;
1422             int flags;
1423
1424             void operator ()(const oclMat &frame0, const oclMat &frame1, oclMat &flowx, oclMat &flowy);
1425
1426             void releaseMemory();
1427
1428         private:
1429             void prepareGaussian(
1430                 int n, double sigma, float *g, float *xg, float *xxg,
1431                 double &ig11, double &ig03, double &ig33, double &ig55);
1432
1433             void setPolynomialExpansionConsts(int n, double sigma);
1434
1435             void updateFlow_boxFilter(
1436                 const oclMat& R0, const oclMat& R1, oclMat& flowx, oclMat &flowy,
1437                 oclMat& M, oclMat &bufM, int blockSize, bool updateMatrices);
1438
1439             void updateFlow_gaussianBlur(
1440                 const oclMat& R0, const oclMat& R1, oclMat& flowx, oclMat& flowy,
1441                 oclMat& M, oclMat &bufM, int blockSize, bool updateMatrices);
1442
1443             oclMat frames_[2];
1444             oclMat pyrLevel_[2], M_, bufM_, R_[2], blurredFrame_[2];
1445             std::vector<oclMat> pyramid0_, pyramid1_;
1446         };
1447
1448         //////////////// build warping maps ////////////////////
1449         //! builds plane warping maps
1450         CV_EXPORTS void buildWarpPlaneMaps(Size src_size, Rect dst_roi, const Mat &K, const Mat &R, const Mat &T, float scale, oclMat &map_x, oclMat &map_y);
1451         //! builds cylindrical warping maps
1452         CV_EXPORTS void buildWarpCylindricalMaps(Size src_size, Rect dst_roi, const Mat &K, const Mat &R, float scale, oclMat &map_x, oclMat &map_y);
1453         //! builds spherical warping maps
1454         CV_EXPORTS void buildWarpSphericalMaps(Size src_size, Rect dst_roi, const Mat &K, const Mat &R, float scale, oclMat &map_x, oclMat &map_y);
1455         //! builds Affine warping maps
1456         CV_EXPORTS void buildWarpAffineMaps(const Mat &M, bool inverse, Size dsize, oclMat &xmap, oclMat &ymap);
1457
1458         //! builds Perspective warping maps
1459         CV_EXPORTS void buildWarpPerspectiveMaps(const Mat &M, bool inverse, Size dsize, oclMat &xmap, oclMat &ymap);
1460
1461         ///////////////////////////////////// interpolate frames //////////////////////////////////////////////
1462         //! Interpolate frames (images) using provided optical flow (displacement field).
1463         //! frame0   - frame 0 (32-bit floating point images, single channel)
1464         //! frame1   - frame 1 (the same type and size)
1465         //! fu       - forward horizontal displacement
1466         //! fv       - forward vertical displacement
1467         //! bu       - backward horizontal displacement
1468         //! bv       - backward vertical displacement
1469         //! pos      - new frame position
1470         //! newFrame - new frame
1471         //! buf      - temporary buffer, will have width x 6*height size, CV_32FC1 type and contain 6 oclMat;
1472         //!            occlusion masks            0, occlusion masks            1,
1473         //!            interpolated forward flow  0, interpolated forward flow  1,
1474         //!            interpolated backward flow 0, interpolated backward flow 1
1475         //!
1476         CV_EXPORTS void interpolateFrames(const oclMat &frame0, const oclMat &frame1,
1477                                           const oclMat &fu, const oclMat &fv,
1478                                           const oclMat &bu, const oclMat &bv,
1479                                           float pos, oclMat &newFrame, oclMat &buf);
1480
1481         //! computes moments of the rasterized shape or a vector of points
1482         CV_EXPORTS Moments ocl_moments(InputArray _array, bool binaryImage);
1483
1484         class CV_EXPORTS StereoBM_OCL
1485         {
1486         public:
1487             enum { BASIC_PRESET = 0, PREFILTER_XSOBEL = 1 };
1488
1489             enum { DEFAULT_NDISP = 64, DEFAULT_WINSZ = 19 };
1490
1491             //! the default constructor
1492             StereoBM_OCL();
1493             //! the full constructor taking the camera-specific preset, number of disparities and the SAD window size. ndisparities must be multiple of 8.
1494             StereoBM_OCL(int preset, int ndisparities = DEFAULT_NDISP, int winSize = DEFAULT_WINSZ);
1495
1496             //! the stereo correspondence operator. Finds the disparity for the specified rectified stereo pair
1497             //! Output disparity has CV_8U type.
1498             void operator() ( const oclMat &left, const oclMat &right, oclMat &disparity);
1499
1500             //! Some heuristics that tries to estmate
1501             // if current GPU will be faster then CPU in this algorithm.
1502             // It queries current active device.
1503             static bool checkIfGpuCallReasonable();
1504
1505             int preset;
1506             int ndisp;
1507             int winSize;
1508
1509             // If avergeTexThreshold  == 0 => post procesing is disabled
1510             // If avergeTexThreshold != 0 then disparity is set 0 in each point (x,y) where for left image
1511             // SumOfHorizontalGradiensInWindow(x, y, winSize) < (winSize * winSize) * avergeTexThreshold
1512             // i.e. input left image is low textured.
1513             float avergeTexThreshold;
1514         private:
1515             oclMat minSSD, leBuf, riBuf;
1516         };
1517
1518         class CV_EXPORTS StereoBeliefPropagation
1519         {
1520         public:
1521             enum { DEFAULT_NDISP  = 64 };
1522             enum { DEFAULT_ITERS  = 5  };
1523             enum { DEFAULT_LEVELS = 5  };
1524             static void estimateRecommendedParams(int width, int height, int &ndisp, int &iters, int &levels);
1525             explicit StereoBeliefPropagation(int ndisp  = DEFAULT_NDISP,
1526                                              int iters  = DEFAULT_ITERS,
1527                                              int levels = DEFAULT_LEVELS,
1528                                              int msg_type = CV_16S);
1529             StereoBeliefPropagation(int ndisp, int iters, int levels,
1530                                     float max_data_term, float data_weight,
1531                                     float max_disc_term, float disc_single_jump,
1532                                     int msg_type = CV_32F);
1533             void operator()(const oclMat &left, const oclMat &right, oclMat &disparity);
1534             void operator()(const oclMat &data, oclMat &disparity);
1535             int ndisp;
1536             int iters;
1537             int levels;
1538             float max_data_term;
1539             float data_weight;
1540             float max_disc_term;
1541             float disc_single_jump;
1542             int msg_type;
1543         private:
1544             oclMat u, d, l, r, u2, d2, l2, r2;
1545             std::vector<oclMat> datas;
1546             oclMat out;
1547         };
1548
1549         class CV_EXPORTS StereoConstantSpaceBP
1550         {
1551         public:
1552             enum { DEFAULT_NDISP    = 128 };
1553             enum { DEFAULT_ITERS    = 8   };
1554             enum { DEFAULT_LEVELS   = 4   };
1555             enum { DEFAULT_NR_PLANE = 4   };
1556             static void estimateRecommendedParams(int width, int height, int &ndisp, int &iters, int &levels, int &nr_plane);
1557             explicit StereoConstantSpaceBP(
1558                 int ndisp    = DEFAULT_NDISP,
1559                 int iters    = DEFAULT_ITERS,
1560                 int levels   = DEFAULT_LEVELS,
1561                 int nr_plane = DEFAULT_NR_PLANE,
1562                 int msg_type = CV_32F);
1563             StereoConstantSpaceBP(int ndisp, int iters, int levels, int nr_plane,
1564                 float max_data_term, float data_weight, float max_disc_term, float disc_single_jump,
1565                 int min_disp_th = 0,
1566                 int msg_type = CV_32F);
1567             void operator()(const oclMat &left, const oclMat &right, oclMat &disparity);
1568             int ndisp;
1569             int iters;
1570             int levels;
1571             int nr_plane;
1572             float max_data_term;
1573             float data_weight;
1574             float max_disc_term;
1575             float disc_single_jump;
1576             int min_disp_th;
1577             int msg_type;
1578             bool use_local_init_data_cost;
1579         private:
1580             oclMat u[2], d[2], l[2], r[2];
1581             oclMat disp_selected_pyr[2];
1582             oclMat data_cost;
1583             oclMat data_cost_selected;
1584             oclMat temp;
1585             oclMat out;
1586         };
1587
1588         // Implementation of the Zach, Pock and Bischof Dual TV-L1 Optical Flow method
1589         //
1590         // see reference:
1591         //   [1] C. Zach, T. Pock and H. Bischof, "A Duality Based Approach for Realtime TV-L1 Optical Flow".
1592         //   [2] Javier Sanchez, Enric Meinhardt-Llopis and Gabriele Facciolo. "TV-L1 Optical Flow Estimation".
1593         class CV_EXPORTS OpticalFlowDual_TVL1_OCL
1594         {
1595         public:
1596             OpticalFlowDual_TVL1_OCL();
1597
1598             void operator ()(const oclMat& I0, const oclMat& I1, oclMat& flowx, oclMat& flowy);
1599
1600             void collectGarbage();
1601
1602             /**
1603             * Time step of the numerical scheme.
1604             */
1605             double tau;
1606
1607             /**
1608             * Weight parameter for the data term, attachment parameter.
1609             * This is the most relevant parameter, which determines the smoothness of the output.
1610             * The smaller this parameter is, the smoother the solutions we obtain.
1611             * It depends on the range of motions of the images, so its value should be adapted to each image sequence.
1612             */
1613             double lambda;
1614
1615             /**
1616             * Weight parameter for (u - v)^2, tightness parameter.
1617             * It serves as a link between the attachment and the regularization terms.
1618             * In theory, it should have a small value in order to maintain both parts in correspondence.
1619             * The method is stable for a large range of values of this parameter.
1620             */
1621             double theta;
1622
1623             /**
1624             * Number of scales used to create the pyramid of images.
1625             */
1626             int nscales;
1627
1628             /**
1629             * Number of warpings per scale.
1630             * Represents the number of times that I1(x+u0) and grad( I1(x+u0) ) are computed per scale.
1631             * This is a parameter that assures the stability of the method.
1632             * It also affects the running time, so it is a compromise between speed and accuracy.
1633             */
1634             int warps;
1635
1636             /**
1637             * Stopping criterion threshold used in the numerical scheme, which is a trade-off between precision and running time.
1638             * A small value will yield more accurate solutions at the expense of a slower convergence.
1639             */
1640             double epsilon;
1641
1642             /**
1643             * Stopping criterion iterations number used in the numerical scheme.
1644             */
1645             int iterations;
1646
1647             bool useInitialFlow;
1648
1649         private:
1650             void procOneScale(const oclMat& I0, const oclMat& I1, oclMat& u1, oclMat& u2);
1651
1652             std::vector<oclMat> I0s;
1653             std::vector<oclMat> I1s;
1654             std::vector<oclMat> u1s;
1655             std::vector<oclMat> u2s;
1656
1657             oclMat I1x_buf;
1658             oclMat I1y_buf;
1659
1660             oclMat I1w_buf;
1661             oclMat I1wx_buf;
1662             oclMat I1wy_buf;
1663
1664             oclMat grad_buf;
1665             oclMat rho_c_buf;
1666
1667             oclMat p11_buf;
1668             oclMat p12_buf;
1669             oclMat p21_buf;
1670             oclMat p22_buf;
1671
1672             oclMat diff_buf;
1673             oclMat norm_buf;
1674         };
1675         // current supported sorting methods
1676         enum
1677         {
1678             SORT_BITONIC,   // only support power-of-2 buffer size
1679             SORT_SELECTION, // cannot sort duplicate keys
1680             SORT_MERGE,
1681             SORT_RADIX      // only support signed int/float keys(CV_32S/CV_32F)
1682         };
1683         //! Returns the sorted result of all the elements in input based on equivalent keys.
1684         //
1685         //  The element unit in the values to be sorted is determined from the data type,
1686         //  i.e., a CV_32FC2 input {a1a2, b1b2} will be considered as two elements, regardless its
1687         //  matrix dimension.
1688         //  both keys and values will be sorted inplace
1689         //  Key needs to be single channel oclMat.
1690         //
1691         //  Example:
1692         //  input -
1693         //    keys   = {2,    3,   1}   (CV_8UC1)
1694         //    values = {10,5, 4,3, 6,2} (CV_8UC2)
1695         //  sortByKey(keys, values, SORT_SELECTION, false);
1696         //  output -
1697         //    keys   = {1,    2,   3}   (CV_8UC1)
1698         //    values = {6,2, 10,5, 4,3} (CV_8UC2)
1699         void CV_EXPORTS sortByKey(oclMat& keys, oclMat& values, int method, bool isGreaterThan = false);
1700         /*!Base class for MOG and MOG2!*/
1701         class CV_EXPORTS BackgroundSubtractor
1702         {
1703         public:
1704             //! the virtual destructor
1705             virtual ~BackgroundSubtractor();
1706             //! the update operator that takes the next video frame and returns the current foreground mask as 8-bit binary image.
1707             virtual void operator()(const oclMat& image, oclMat& fgmask, float learningRate);
1708
1709             //! computes a background image
1710             virtual void getBackgroundImage(oclMat& backgroundImage) const = 0;
1711         };
1712                 /*!
1713         Gaussian Mixture-based Backbround/Foreground Segmentation Algorithm
1714
1715         The class implements the following algorithm:
1716         "An improved adaptive background mixture model for real-time tracking with shadow detection"
1717         P. KadewTraKuPong and R. Bowden,
1718         Proc. 2nd European Workshp on Advanced Video-Based Surveillance Systems, 2001."
1719         http://personal.ee.surrey.ac.uk/Personal/R.Bowden/publications/avbs01/avbs01.pdf
1720         */
1721         class CV_EXPORTS MOG: public cv::ocl::BackgroundSubtractor
1722         {
1723         public:
1724             //! the default constructor
1725             MOG(int nmixtures = -1);
1726
1727             //! re-initiaization method
1728             void initialize(Size frameSize, int frameType);
1729
1730             //! the update operator
1731             void operator()(const oclMat& frame, oclMat& fgmask, float learningRate = 0.f);
1732
1733             //! computes a background image which are the mean of all background gaussians
1734             void getBackgroundImage(oclMat& backgroundImage) const;
1735
1736             //! releases all inner buffers
1737             void release();
1738
1739             int history;
1740             float varThreshold;
1741             float backgroundRatio;
1742             float noiseSigma;
1743
1744         private:
1745             int nmixtures_;
1746
1747             Size frameSize_;
1748             int frameType_;
1749             int nframes_;
1750
1751             oclMat weight_;
1752             oclMat sortKey_;
1753             oclMat mean_;
1754             oclMat var_;
1755         };
1756
1757         /*!
1758         The class implements the following algorithm:
1759         "Improved adaptive Gausian mixture model for background subtraction"
1760         Z.Zivkovic
1761         International Conference Pattern Recognition, UK, August, 2004.
1762         http://www.zoranz.net/Publications/zivkovic2004ICPR.pdf
1763         */
1764         class CV_EXPORTS MOG2: public cv::ocl::BackgroundSubtractor
1765         {
1766         public:
1767             //! the default constructor
1768             MOG2(int nmixtures = -1);
1769
1770             //! re-initiaization method
1771             void initialize(Size frameSize, int frameType);
1772
1773             //! the update operator
1774             void operator()(const oclMat& frame, oclMat& fgmask, float learningRate = -1.0f);
1775
1776             //! computes a background image which are the mean of all background gaussians
1777             void getBackgroundImage(oclMat& backgroundImage) const;
1778
1779             //! releases all inner buffers
1780             void release();
1781
1782             // parameters
1783             // you should call initialize after parameters changes
1784
1785             int history;
1786
1787             //! here it is the maximum allowed number of mixture components.
1788             //! Actual number is determined dynamically per pixel
1789             float varThreshold;
1790             // threshold on the squared Mahalanobis distance to decide if it is well described
1791             // by the background model or not. Related to Cthr from the paper.
1792             // This does not influence the update of the background. A typical value could be 4 sigma
1793             // and that is varThreshold=4*4=16; Corresponds to Tb in the paper.
1794
1795             /////////////////////////
1796             // less important parameters - things you might change but be carefull
1797             ////////////////////////
1798
1799             float backgroundRatio;
1800             // corresponds to fTB=1-cf from the paper
1801             // TB - threshold when the component becomes significant enough to be included into
1802             // the background model. It is the TB=1-cf from the paper. So I use cf=0.1 => TB=0.
1803             // For alpha=0.001 it means that the mode should exist for approximately 105 frames before
1804             // it is considered foreground
1805             // float noiseSigma;
1806             float varThresholdGen;
1807
1808             //correspondts to Tg - threshold on the squared Mahalan. dist. to decide
1809             //when a sample is close to the existing components. If it is not close
1810             //to any a new component will be generated. I use 3 sigma => Tg=3*3=9.
1811             //Smaller Tg leads to more generated components and higher Tg might make
1812             //lead to small number of components but they can grow too large
1813             float fVarInit;
1814             float fVarMin;
1815             float fVarMax;
1816
1817             //initial variance  for the newly generated components.
1818             //It will will influence the speed of adaptation. A good guess should be made.
1819             //A simple way is to estimate the typical standard deviation from the images.
1820             //I used here 10 as a reasonable value
1821             // min and max can be used to further control the variance
1822             float fCT; //CT - complexity reduction prior
1823             //this is related to the number of samples needed to accept that a component
1824             //actually exists. We use CT=0.05 of all the samples. By setting CT=0 you get
1825             //the standard Stauffer&Grimson algorithm (maybe not exact but very similar)
1826
1827             //shadow detection parameters
1828             bool bShadowDetection; //default 1 - do shadow detection
1829             unsigned char nShadowDetection; //do shadow detection - insert this value as the detection result - 127 default value
1830             float fTau;
1831             // Tau - shadow threshold. The shadow is detected if the pixel is darker
1832             //version of the background. Tau is a threshold on how much darker the shadow can be.
1833             //Tau= 0.5 means that if pixel is more than 2 times darker then it is not shadow
1834             //See: Prati,Mikic,Trivedi,Cucchiarra,"Detecting Moving Shadows...",IEEE PAMI,2003.
1835
1836         private:
1837             int nmixtures_;
1838
1839             Size frameSize_;
1840             int frameType_;
1841             int nframes_;
1842
1843             oclMat weight_;
1844             oclMat variance_;
1845             oclMat mean_;
1846
1847             oclMat bgmodelUsedModes_; //keep track of number of modes per pixel
1848         };
1849
1850         /*!***************Kalman Filter*************!*/
1851         class CV_EXPORTS KalmanFilter
1852         {
1853         public:
1854             KalmanFilter();
1855             //! the full constructor taking the dimensionality of the state, of the measurement and of the control vector
1856             KalmanFilter(int dynamParams, int measureParams, int controlParams=0, int type=CV_32F);
1857             //! re-initializes Kalman filter. The previous content is destroyed.
1858             void init(int dynamParams, int measureParams, int controlParams=0, int type=CV_32F);
1859
1860             const oclMat& predict(const oclMat& control=oclMat());
1861             const oclMat& correct(const oclMat& measurement);
1862
1863             oclMat statePre;           //!< predicted state (x'(k)): x(k)=A*x(k-1)+B*u(k)
1864             oclMat statePost;          //!< corrected state (x(k)): x(k)=x'(k)+K(k)*(z(k)-H*x'(k))
1865             oclMat transitionMatrix;   //!< state transition matrix (A)
1866             oclMat controlMatrix;      //!< control matrix (B) (not used if there is no control)
1867             oclMat measurementMatrix;  //!< measurement matrix (H)
1868             oclMat processNoiseCov;    //!< process noise covariance matrix (Q)
1869             oclMat measurementNoiseCov;//!< measurement noise covariance matrix (R)
1870             oclMat errorCovPre;        //!< priori error estimate covariance matrix (P'(k)): P'(k)=A*P(k-1)*At + Q)*/
1871             oclMat gain;               //!< Kalman gain matrix (K(k)): K(k)=P'(k)*Ht*inv(H*P'(k)*Ht+R)
1872             oclMat errorCovPost;       //!< posteriori error estimate covariance matrix (P(k)): P(k)=(I-K(k)*H)*P'(k)
1873         private:
1874             oclMat temp1;
1875             oclMat temp2;
1876             oclMat temp3;
1877             oclMat temp4;
1878             oclMat temp5;
1879         };
1880
1881         static inline size_t divUp(size_t total, size_t grain)
1882         {
1883             return (total + grain - 1) / grain;
1884         }
1885
1886         /*!***************K Nearest Neighbour*************!*/
1887         class CV_EXPORTS KNearestNeighbour: public CvKNearest
1888         {
1889         public:
1890             KNearestNeighbour();
1891             ~KNearestNeighbour();
1892
1893             bool train(const Mat& trainData, Mat& labels, Mat& sampleIdx = Mat().setTo(Scalar::all(0)),
1894                 bool isRegression = false, int max_k = 32, bool updateBase = false);
1895
1896             void clear();
1897
1898             void find_nearest(const oclMat& samples, int k, oclMat& lables);
1899
1900         private:
1901             oclMat samples_ocl;
1902         };
1903         /*!***************  SVM  *************!*/
1904         class CV_EXPORTS CvSVM_OCL : public CvSVM
1905         {
1906         public:
1907             CvSVM_OCL();
1908
1909             CvSVM_OCL(const cv::Mat& trainData, const cv::Mat& responses,
1910                       const cv::Mat& varIdx=cv::Mat(), const cv::Mat& sampleIdx=cv::Mat(),
1911                       CvSVMParams params=CvSVMParams());
1912             CV_WRAP float predict( const int row_index, Mat& src, bool returnDFVal=false ) const;
1913             CV_WRAP void predict( cv::InputArray samples, cv::OutputArray results ) const;
1914             CV_WRAP float predict( const cv::Mat& sample, bool returnDFVal=false ) const;
1915             float predict( const CvMat* samples, CV_OUT CvMat* results ) const;
1916
1917         protected:
1918             float predict( const int row_index, int row_len, Mat& src, bool returnDFVal=false ) const;
1919             void create_kernel();
1920             void create_solver();
1921         };
1922         /*!***************  END  *************!*/
1923     }
1924 }
1925 #if defined _MSC_VER && _MSC_VER >= 1200
1926 #  pragma warning( push)
1927 #  pragma warning( disable: 4267)
1928 #endif
1929 #include "opencv2/ocl/matrix_operations.hpp"
1930 #if defined _MSC_VER && _MSC_VER >= 1200
1931 #  pragma warning( pop)
1932 #endif
1933
1934 #endif /* __OPENCV_OCL_HPP__ */