modules/ocl/include/opencv2/ocl/ocl.hpp

   1 /*M///////////////////////////////////////////////////////////////////////////////////////
   2 //
   3 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
   4 //
   5 //  By downloading, copying, installing or using the software you agree to this license.
   6 //  If you do not agree to this license, do not download, install,
   7 //  copy or use the software.
   8 //
   9 //
  10 //                           License Agreement
  11 //                For Open Source Computer Vision Library
  12 //
  13 // Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
  14 // Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
  15 // Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
  16 // Third party copyrights are property of their respective owners.
  17 //
  18 // Redistribution and use in source and binary forms, with or without modification,
  19 // are permitted provided that the following conditions are met:
  20 //
  21 //   * Redistribution's of source code must retain the above copyright notice,
  22 //     this list of conditions and the following disclaimer.
  23 //
  24 //   * Redistribution's in binary form must reproduce the above copyright notice,
  25 //     this list of conditions and the following disclaimer in the documentation
  26 //     and/or other oclMaterials provided with the distribution.
  27 //
  28 //   * The name of the copyright holders may not be used to endorse or promote products
  29 //     derived from this software without specific prior written permission.
  30 //
  31 // This software is provided by the copyright holders and contributors "as is" and
  32 // any express or implied warranties, including, but not limited to, the implied
  33 // warranties of merchantability and fitness for a particular purpose are disclaimed.
  34 // In no event shall the Intel Corporation or contributors be liable for any direct,
  35 // indirect, incidental, special, exemplary, or consequential damages
  36 // (including, but not limited to, procurement of substitute goods or services;
  37 // loss of use, data, or profits; or business interruption) however caused
  38 // and on any theory of liability, whether in contract, strict liability,
  39 // or tort (including negligence or otherwise) arising in any way out of
  40 // the use of this software, even if advised of the possibility of such damage.
  41 //
  42 //M*/
  43
  44 #ifndef __OPENCV_OCL_HPP__
  45 #define __OPENCV_OCL_HPP__
  46
  47 #include <memory>
  48 #include <vector>
  49
  50 #include "opencv2/core/core.hpp"
  51 #include "opencv2/imgproc/imgproc.hpp"
  52 #include "opencv2/objdetect/objdetect.hpp"
  53 #include "opencv2/features2d/features2d.hpp"
  54 #include "opencv2/ml/ml.hpp"
  55
  56 namespace cv
  57 {
  58     namespace ocl
  59     {
  60         enum DeviceType
  61         {
  62             CVCL_DEVICE_TYPE_DEFAULT     = (1 << 0),
  63             CVCL_DEVICE_TYPE_CPU         = (1 << 1),
  64             CVCL_DEVICE_TYPE_GPU         = (1 << 2),
  65             CVCL_DEVICE_TYPE_ACCELERATOR = (1 << 3),
  66             //CVCL_DEVICE_TYPE_CUSTOM      = (1 << 4)
  67             CVCL_DEVICE_TYPE_ALL         = 0xFFFFFFFF
  68         };
  69
  70         enum DevMemRW
  71         {
  72             DEVICE_MEM_R_W = 0,
  73             DEVICE_MEM_R_ONLY,
  74             DEVICE_MEM_W_ONLY
  75         };
  76
  77         enum DevMemType
  78         {
  79             DEVICE_MEM_DEFAULT = 0,
  80             DEVICE_MEM_AHP,         //alloc host pointer
  81             DEVICE_MEM_UHP,         //use host pointer
  82             DEVICE_MEM_CHP,         //copy host pointer
  83             DEVICE_MEM_PM           //persistent memory
  84         };
  85
  86         // these classes contain OpenCL runtime information
  87
  88         struct PlatformInfo;
  89
  90         struct DeviceInfo
  91         {
  92             int _id; // reserved, don't use it
  93
  94             DeviceType deviceType;
  95             std::string deviceProfile;
  96             std::string deviceVersion;
  97             std::string deviceName;
  98             std::string deviceVendor;
  99             int deviceVendorId;
 100             std::string deviceDriverVersion;
 101             std::string deviceExtensions;
 102
 103             size_t maxWorkGroupSize;
 104             std::vector<size_t> maxWorkItemSizes;
 105             int maxComputeUnits;
 106             size_t localMemorySize;
 107             size_t maxMemAllocSize;
 108
 109             int deviceVersionMajor;
 110             int deviceVersionMinor;
 111
 112             bool haveDoubleSupport;
 113             bool isUnifiedMemory; // 1 means integrated GPU, otherwise this value is 0
 114
 115             std::string compilationExtraOptions;
 116
 117             const PlatformInfo* platform;
 118
 119             DeviceInfo();
 120         };
 121
 122         struct PlatformInfo
 123         {
 124             int _id; // reserved, don't use it
 125
 126             std::string platformProfile;
 127             std::string platformVersion;
 128             std::string platformName;
 129             std::string platformVendor;
 130             std::string platformExtensons;
 131
 132             int platformVersionMajor;
 133             int platformVersionMinor;
 134
 135             std::vector<const DeviceInfo*> devices;
 136
 137             PlatformInfo();
 138         };
 139
 140         //////////////////////////////// Initialization & Info ////////////////////////
 141         typedef std::vector<const PlatformInfo*> PlatformsInfo;
 142
 143         CV_EXPORTS int getOpenCLPlatforms(PlatformsInfo& platforms);
 144
 145         typedef std::vector<const DeviceInfo*> DevicesInfo;
 146
 147         CV_EXPORTS int getOpenCLDevices(DevicesInfo& devices, int deviceType = CVCL_DEVICE_TYPE_GPU,
 148                 const PlatformInfo* platform = NULL);
 149
 150         // set device you want to use
 151         CV_EXPORTS void setDevice(const DeviceInfo* info);
 152
 153         //////////////////////////////// Error handling ////////////////////////
 154         CV_EXPORTS void error(const char *error_string, const char *file, const int line, const char *func);
 155
 156         enum FEATURE_TYPE
 157         {
 158             FEATURE_CL_DOUBLE = 1,
 159             FEATURE_CL_UNIFIED_MEM,
 160             FEATURE_CL_VER_1_2
 161         };
 162
 163         // Represents OpenCL context, interface
 164         class CV_EXPORTS Context
 165         {
 166         protected:
 167             Context() { }
 168             ~Context() { }
 169         public:
 170             static Context* getContext();
 171
 172             bool supportsFeature(FEATURE_TYPE featureType) const;
 173             const DeviceInfo& getDeviceInfo() const;
 174
 175             const void* getOpenCLContextPtr() const;
 176             const void* getOpenCLCommandQueuePtr() const;
 177             const void* getOpenCLDeviceIDPtr() const;
 178         };
 179
 180         inline const void *getClContextPtr()
 181         {
 182             return Context::getContext()->getOpenCLContextPtr();
 183         }
 184
 185         inline const void *getClCommandQueuePtr()
 186         {
 187             return Context::getContext()->getOpenCLCommandQueuePtr();
 188         }
 189
 190         bool CV_EXPORTS supportsFeature(FEATURE_TYPE featureType);
 191
 192         void CV_EXPORTS finish();
 193
 194         enum BINARY_CACHE_MODE
 195         {
 196             CACHE_NONE    = 0,        // do not cache OpenCL binary
 197             CACHE_DEBUG   = 0x1 << 0, // cache OpenCL binary when built in debug mode
 198             CACHE_RELEASE = 0x1 << 1, // default behavior, only cache when built in release mode
 199             CACHE_ALL     = CACHE_DEBUG | CACHE_RELEASE, // cache opencl binary
 200         };
 201         //! Enable or disable OpenCL program binary caching onto local disk
 202         // After a program (*.cl files in opencl/ folder) is built at runtime, we allow the
 203         // compiled OpenCL program to be cached to the path automatically as "path/*.clb"
 204         // binary file, which will be reused when the OpenCV executable is started again.
 205         //
 206         // This feature is enabled by default.
 207         CV_EXPORTS void setBinaryDiskCache(int mode = CACHE_RELEASE, cv::String path = "./");
 208
 209         //! set where binary cache to be saved to
 210         CV_EXPORTS void setBinaryPath(const char *path);
 211
 212         struct ProgramSource
 213         {
 214             const char* name;
 215             const char* programStr;
 216             const char* programHash;
 217
 218             // Cache in memory by name (should be unique). Caching on disk disabled.
 219             inline ProgramSource(const char* _name, const char* _programStr)
 220                 : name(_name), programStr(_programStr), programHash(NULL)
 221             {
 222             }
 223
 224             // Cache in memory by name (should be unique). Caching on disk uses programHash mark.
 225             inline ProgramSource(const char* _name, const char* _programStr, const char* _programHash)
 226                 : name(_name), programStr(_programStr), programHash(_programHash)
 227             {
 228             }
 229         };
 230
 231         //! Calls OpenCL kernel. Pass globalThreads = NULL, and cleanUp = true, to finally clean-up without executing.
 232         //! Deprecated, will be replaced
 233         CV_EXPORTS void openCLExecuteKernelInterop(Context *clCxt,
 234                 const cv::ocl::ProgramSource& source, string kernelName,
 235                 size_t globalThreads[3], size_t localThreads[3],
 236                 std::vector< std::pair<size_t, const void *> > &args,
 237                 int channels, int depth, const char *build_options);
 238
 239         class CV_EXPORTS oclMatExpr;
 240         //////////////////////////////// oclMat ////////////////////////////////
 241         class CV_EXPORTS oclMat
 242         {
 243         public:
 244             //! default constructor
 245             oclMat();
 246             //! constructs oclMatrix of the specified size and type (_type is CV_8UC1, CV_64FC3, CV_32SC(12) etc.)
 247             oclMat(int rows, int cols, int type);
 248             oclMat(Size size, int type);
 249             //! constucts oclMatrix and fills it with the specified value _s.
 250             oclMat(int rows, int cols, int type, const Scalar &s);
 251             oclMat(Size size, int type, const Scalar &s);
 252             //! copy constructor
 253             oclMat(const oclMat &m);
 254
 255             //! constructor for oclMatrix headers pointing to user-allocated data
 256             oclMat(int rows, int cols, int type, void *data, size_t step = Mat::AUTO_STEP);
 257             oclMat(Size size, int type, void *data, size_t step = Mat::AUTO_STEP);
 258
 259             //! creates a matrix header for a part of the bigger matrix
 260             oclMat(const oclMat &m, const Range &rowRange, const Range &colRange);
 261             oclMat(const oclMat &m, const Rect &roi);
 262
 263             //! builds oclMat from Mat. Perfom blocking upload to device.
 264             explicit oclMat (const Mat &m);
 265
 266             //! destructor - calls release()
 267             ~oclMat();
 268
 269             //! assignment operators
 270             oclMat &operator = (const oclMat &m);
 271             //! assignment operator. Perfom blocking upload to device.
 272             oclMat &operator = (const Mat &m);
 273             oclMat &operator = (const oclMatExpr& expr);
 274
 275             //! pefroms blocking upload data to oclMat.
 276             void upload(const cv::Mat &m);
 277
 278
 279             //! downloads data from device to host memory. Blocking calls.
 280             operator Mat() const;
 281             void download(cv::Mat &m) const;
 282
 283             //! convert to _InputArray
 284             operator _InputArray();
 285
 286             //! convert to _OutputArray
 287             operator _OutputArray();
 288
 289             //! returns a new oclMatrix header for the specified row
 290             oclMat row(int y) const;
 291             //! returns a new oclMatrix header for the specified column
 292             oclMat col(int x) const;
 293             //! ... for the specified row span
 294             oclMat rowRange(int startrow, int endrow) const;
 295             oclMat rowRange(const Range &r) const;
 296             //! ... for the specified column span
 297             oclMat colRange(int startcol, int endcol) const;
 298             oclMat colRange(const Range &r) const;
 299
 300             //! returns deep copy of the oclMatrix, i.e. the data is copied
 301             oclMat clone() const;
 302
 303             //! copies those oclMatrix elements to "m" that are marked with non-zero mask elements.
 304             // It calls m.create(this->size(), this->type()).
 305             // It supports any data type
 306             void copyTo( oclMat &m, const oclMat &mask = oclMat()) const;
 307
 308             //! converts oclMatrix to another datatype with optional scalng. See cvConvertScale.
 309             //It supports 8UC1 8UC4 32SC1 32SC4 32FC1 32FC4
 310             void convertTo( oclMat &m, int rtype, double alpha = 1, double beta = 0 ) const;
 311
 312             void assignTo( oclMat &m, int type = -1 ) const;
 313
 314             //! sets every oclMatrix element to s
 315             //It supports 8UC1 8UC4 32SC1 32SC4 32FC1 32FC4
 316             oclMat& operator = (const Scalar &s);
 317             //! sets some of the oclMatrix elements to s, according to the mask
 318             //It supports 8UC1 8UC4 32SC1 32SC4 32FC1 32FC4
 319             oclMat& setTo(const Scalar &s, const oclMat &mask = oclMat());
 320             //! creates alternative oclMatrix header for the same data, with different
 321             // number of channels and/or different number of rows. see cvReshape.
 322             oclMat reshape(int cn, int rows = 0) const;
 323
 324             //! allocates new oclMatrix data unless the oclMatrix already has specified size and type.
 325             // previous data is unreferenced if needed.
 326             void create(int rows, int cols, int type);
 327             void create(Size size, int type);
 328
 329             //! allocates new oclMatrix with specified device memory type.
 330             void createEx(int rows, int cols, int type, DevMemRW rw_type, DevMemType mem_type);
 331             void createEx(Size size, int type, DevMemRW rw_type, DevMemType mem_type);
 332
 333             //! decreases reference counter;
 334             // deallocate the data when reference counter reaches 0.
 335             void release();
 336
 337             //! swaps with other smart pointer
 338             void swap(oclMat &mat);
 339
 340             //! locates oclMatrix header within a parent oclMatrix. See below
 341             void locateROI( Size &wholeSize, Point &ofs ) const;
 342             //! moves/resizes the current oclMatrix ROI inside the parent oclMatrix.
 343             oclMat& adjustROI( int dtop, int dbottom, int dleft, int dright );
 344             //! extracts a rectangular sub-oclMatrix
 345             // (this is a generalized form of row, rowRange etc.)
 346             oclMat operator()( Range rowRange, Range colRange ) const;
 347             oclMat operator()( const Rect &roi ) const;
 348
 349             oclMat& operator+=( const oclMat& m );
 350             oclMat& operator-=( const oclMat& m );
 351             oclMat& operator*=( const oclMat& m );
 352             oclMat& operator/=( const oclMat& m );
 353
 354             //! returns true if the oclMatrix data is continuous
 355             // (i.e. when there are no gaps between successive rows).
 356             // similar to CV_IS_oclMat_CONT(cvoclMat->type)
 357             bool isContinuous() const;
 358             //! returns element size in bytes,
 359             // similar to CV_ELEM_SIZE(cvMat->type)
 360             size_t elemSize() const;
 361             //! returns the size of element channel in bytes.
 362             size_t elemSize1() const;
 363             //! returns element type, similar to CV_MAT_TYPE(cvMat->type)
 364             int type() const;
 365             //! returns element type, i.e. 8UC3 returns 8UC4 because in ocl
 366             //! 3 channels element actually use 4 channel space
 367             int ocltype() const;
 368             //! returns element type, similar to CV_MAT_DEPTH(cvMat->type)
 369             int depth() const;
 370             //! returns element type, similar to CV_MAT_CN(cvMat->type)
 371             int channels() const;
 372             //! returns element type, return 4 for 3 channels element,
 373             //!becuase 3 channels element actually use 4 channel space
 374             int oclchannels() const;
 375             //! returns step/elemSize1()
 376             size_t step1() const;
 377             //! returns oclMatrix size:
 378             // width == number of columns, height == number of rows
 379             Size size() const;
 380             //! returns true if oclMatrix data is NULL
 381             bool empty() const;
 382
 383             //! returns pointer to y-th row
 384             uchar* ptr(int y = 0);
 385             const uchar *ptr(int y = 0) const;
 386
 387             //! template version of the above method
 388             template<typename _Tp> _Tp *ptr(int y = 0);
 389             template<typename _Tp> const _Tp *ptr(int y = 0) const;
 390
 391             //! matrix transposition
 392             oclMat t() const;
 393
 394             /*! includes several bit-fields:
 395               - the magic signature
 396               - continuity flag
 397               - depth
 398               - number of channels
 399               */
 400             int flags;
 401             //! the number of rows and columns
 402             int rows, cols;
 403             //! a distance between successive rows in bytes; includes the gap if any
 404             size_t step;
 405             //! pointer to the data(OCL memory object)
 406             uchar *data;
 407
 408             //! pointer to the reference counter;
 409             // when oclMatrix points to user-allocated data, the pointer is NULL
 410             int *refcount;
 411
 412             //! helper fields used in locateROI and adjustROI
 413             //datastart and dataend are not used in current version
 414             uchar *datastart;
 415             uchar *dataend;
 416
 417             //! OpenCL context associated with the oclMat object.
 418             Context *clCxt; // TODO clCtx
 419             //add offset for handle ROI, calculated in byte
 420             int offset;
 421             //add wholerows and wholecols for the whole matrix, datastart and dataend are no longer used
 422             int wholerows;
 423             int wholecols;
 424         };
 425
 426         // convert InputArray/OutputArray to oclMat references
 427         CV_EXPORTS oclMat& getOclMatRef(InputArray src);
 428         CV_EXPORTS oclMat& getOclMatRef(OutputArray src);
 429
 430         ///////////////////// mat split and merge /////////////////////////////////
 431         //! Compose a multi-channel array from several single-channel arrays
 432         // Support all types
 433         CV_EXPORTS void merge(const oclMat *src, size_t n, oclMat &dst);
 434         CV_EXPORTS void merge(const vector<oclMat> &src, oclMat &dst);
 435
 436         //! Divides multi-channel array into several single-channel arrays
 437         // Support all types
 438         CV_EXPORTS void split(const oclMat &src, oclMat *dst);
 439         CV_EXPORTS void split(const oclMat &src, vector<oclMat> &dst);
 440
 441         ////////////////////////////// Arithmetics ///////////////////////////////////
 442
 443         //! adds one matrix to another with scale (dst = src1 * alpha + src2 * beta + gama)
 444         // supports all data types
 445         CV_EXPORTS void addWeighted(const oclMat &src1, double  alpha, const oclMat &src2, double beta, double gama, oclMat &dst);
 446
 447         //! adds one matrix to another (dst = src1 + src2)
 448         // supports all data types
 449         CV_EXPORTS void add(const oclMat &src1, const oclMat &src2, oclMat &dst, const oclMat &mask = oclMat());
 450         //! adds scalar to a matrix (dst = src1 + s)
 451         // supports all data types
 452         CV_EXPORTS void add(const oclMat &src1, const Scalar &s, oclMat &dst, const oclMat &mask = oclMat());
 453
 454         //! subtracts one matrix from another (dst = src1 - src2)
 455         // supports all data types
 456         CV_EXPORTS void subtract(const oclMat &src1, const oclMat &src2, oclMat &dst, const oclMat &mask = oclMat());
 457         //! subtracts scalar from a matrix (dst = src1 - s)
 458         // supports all data types
 459         CV_EXPORTS void subtract(const oclMat &src1, const Scalar &s, oclMat &dst, const oclMat &mask = oclMat());
 460
 461         //! computes element-wise product of the two arrays (dst = src1 * scale * src2)
 462         // supports all data types
 463         CV_EXPORTS void multiply(const oclMat &src1, const oclMat &src2, oclMat &dst, double scale = 1);
 464         //! multiplies matrix to a number (dst = scalar * src)
 465         // supports all data types
 466         CV_EXPORTS void multiply(double scalar, const oclMat &src, oclMat &dst);
 467
 468         //! computes element-wise quotient of the two arrays (dst = src1 * scale / src2)
 469         // supports all data types
 470         CV_EXPORTS void divide(const oclMat &src1, const oclMat &src2, oclMat &dst, double scale = 1);
 471         //! computes element-wise quotient of the two arrays (dst = scale / src)
 472         // supports all data types
 473         CV_EXPORTS void divide(double scale, const oclMat &src1, oclMat &dst);
 474
 475         //! computes element-wise minimum of the two arrays (dst = min(src1, src2))
 476         // supports all data types
 477         CV_EXPORTS void min(const oclMat &src1, const oclMat &src2, oclMat &dst);
 478
 479         //! computes element-wise maximum of the two arrays (dst = max(src1, src2))
 480         // supports all data types
 481         CV_EXPORTS void max(const oclMat &src1, const oclMat &src2, oclMat &dst);
 482
 483         //! compares elements of two arrays (dst = src1 <cmpop> src2)
 484         // supports all data types
 485         CV_EXPORTS void compare(const oclMat &src1, const oclMat &src2, oclMat &dst, int cmpop);
 486
 487         //! transposes the matrix
 488         // supports all data types
 489         CV_EXPORTS void transpose(const oclMat &src, oclMat &dst);
 490
 491         //! computes element-wise absolute values of an array (dst = abs(src))
 492         // supports all data types
 493         CV_EXPORTS void abs(const oclMat &src, oclMat &dst);
 494
 495         //! computes element-wise absolute difference of two arrays (dst = abs(src1 - src2))
 496         // supports all data types
 497         CV_EXPORTS void absdiff(const oclMat &src1, const oclMat &src2, oclMat &dst);
 498         //! computes element-wise absolute difference of array and scalar (dst = abs(src1 - s))
 499         // supports all data types
 500         CV_EXPORTS void absdiff(const oclMat &src1, const Scalar &s, oclMat &dst);
 501
 502         //! computes mean value and standard deviation of all or selected array elements
 503         // supports all data types
 504         CV_EXPORTS void meanStdDev(const oclMat &mtx, Scalar &mean, Scalar &stddev);
 505
 506         //! computes norm of array
 507         // supports NORM_INF, NORM_L1, NORM_L2
 508         // supports all data types
 509         CV_EXPORTS double norm(const oclMat &src1, int normType = NORM_L2);
 510
 511         //! computes norm of the difference between two arrays
 512         // supports NORM_INF, NORM_L1, NORM_L2
 513         // supports all data types
 514         CV_EXPORTS double norm(const oclMat &src1, const oclMat &src2, int normType = NORM_L2);
 515
 516         //! reverses the order of the rows, columns or both in a matrix
 517         // supports all types
 518         CV_EXPORTS void flip(const oclMat &src, oclMat &dst, int flipCode);
 519
 520         //! computes sum of array elements
 521         // support all types
 522         CV_EXPORTS Scalar sum(const oclMat &m);
 523         CV_EXPORTS Scalar absSum(const oclMat &m);
 524         CV_EXPORTS Scalar sqrSum(const oclMat &m);
 525
 526         //! finds global minimum and maximum array elements and returns their values
 527         // support all C1 types
 528         CV_EXPORTS void minMax(const oclMat &src, double *minVal, double *maxVal = 0, const oclMat &mask = oclMat());
 529
 530         //! finds global minimum and maximum array elements and returns their values with locations
 531         // support all C1 types
 532         CV_EXPORTS void minMaxLoc(const oclMat &src, double *minVal, double *maxVal = 0, Point *minLoc = 0, Point *maxLoc = 0,
 533                                   const oclMat &mask = oclMat());
 534
 535         //! counts non-zero array elements
 536         // support all types
 537         CV_EXPORTS int countNonZero(const oclMat &src);
 538
 539         //! transforms 8-bit unsigned integers using lookup table: dst(i)=lut(src(i))
 540         // destination array will have the depth type as lut and the same channels number as source
 541         //It supports 8UC1 8UC4 only
 542         CV_EXPORTS void LUT(const oclMat &src, const oclMat &lut, oclMat &dst);
 543
 544         //! only 8UC1 and 256 bins is supported now
 545         CV_EXPORTS void calcHist(const oclMat &mat_src, oclMat &mat_hist);
 546         //! only 8UC1 and 256 bins is supported now
 547         CV_EXPORTS void equalizeHist(const oclMat &mat_src, oclMat &mat_dst);
 548
 549         //! only 8UC1 is supported now
 550         CV_EXPORTS Ptr<cv::CLAHE> createCLAHE(double clipLimit = 40.0, Size tileGridSize = Size(8, 8));
 551
 552         //! bilateralFilter
 553         // supports 8UC1 8UC4
 554         CV_EXPORTS void bilateralFilter(const oclMat& src, oclMat& dst, int d, double sigmaColor, double sigmaSpace, int borderType=BORDER_DEFAULT);
 555
 556         //! Applies an adaptive bilateral filter to the input image
 557         //  This is not truly a bilateral filter. Instead of using user provided fixed parameters,
 558         //  the function calculates a constant at each window based on local standard deviation,
 559         //  and use this constant to do filtering.
 560         //  supports 8UC1, 8UC3
 561         CV_EXPORTS void adaptiveBilateralFilter(const oclMat& src, oclMat& dst, Size ksize, double sigmaSpace, Point anchor = Point(-1, -1), int borderType=BORDER_DEFAULT);
 562
 563         //! computes exponent of each matrix element (dst = e**src)
 564         // supports only CV_32FC1, CV_64FC1 type
 565         CV_EXPORTS void exp(const oclMat &src, oclMat &dst);
 566
 567         //! computes natural logarithm of absolute value of each matrix element: dst = log(abs(src))
 568         // supports only CV_32FC1, CV_64FC1 type
 569         CV_EXPORTS void log(const oclMat &src, oclMat &dst);
 570
 571         //! computes magnitude of each (x(i), y(i)) vector
 572         // supports only CV_32F, CV_64F type
 573         CV_EXPORTS void magnitude(const oclMat &x, const oclMat &y, oclMat &magnitude);
 574
 575         //! computes angle (angle(i)) of each (x(i), y(i)) vector
 576         // supports only CV_32F, CV_64F type
 577         CV_EXPORTS void phase(const oclMat &x, const oclMat &y, oclMat &angle, bool angleInDegrees = false);
 578
 579         //! the function raises every element of tne input array to p
 580         // support only CV_32F, CV_64F type
 581         CV_EXPORTS void pow(const oclMat &x, double p, oclMat &y);
 582
 583         //! converts Cartesian coordinates to polar
 584         // supports only CV_32F CV_64F type
 585         CV_EXPORTS void cartToPolar(const oclMat &x, const oclMat &y, oclMat &magnitude, oclMat &angle, bool angleInDegrees = false);
 586
 587         //! converts polar coordinates to Cartesian
 588         // supports only CV_32F CV_64F type
 589         CV_EXPORTS void polarToCart(const oclMat &magnitude, const oclMat &angle, oclMat &x, oclMat &y, bool angleInDegrees = false);
 590
 591         //! perfroms per-elements bit-wise inversion
 592         // supports all types
 593         CV_EXPORTS void bitwise_not(const oclMat &src, oclMat &dst);
 594
 595         //! calculates per-element bit-wise disjunction of two arrays
 596         // supports all types
 597         CV_EXPORTS void bitwise_or(const oclMat &src1, const oclMat &src2, oclMat &dst, const oclMat &mask = oclMat());
 598         CV_EXPORTS void bitwise_or(const oclMat &src1, const Scalar &s, oclMat &dst, const oclMat &mask = oclMat());
 599
 600         //! calculates per-element bit-wise conjunction of two arrays
 601         // supports all types
 602         CV_EXPORTS void bitwise_and(const oclMat &src1, const oclMat &src2, oclMat &dst, const oclMat &mask = oclMat());
 603         CV_EXPORTS void bitwise_and(const oclMat &src1, const Scalar &s, oclMat &dst, const oclMat &mask = oclMat());
 604
 605         //! calculates per-element bit-wise "exclusive or" operation
 606         // supports all types
 607         CV_EXPORTS void bitwise_xor(const oclMat &src1, const oclMat &src2, oclMat &dst, const oclMat &mask = oclMat());
 608         CV_EXPORTS void bitwise_xor(const oclMat &src1, const Scalar &s, oclMat &dst, const oclMat &mask = oclMat());
 609
 610         //! Logical operators
 611         CV_EXPORTS oclMat operator ~ (const oclMat &);
 612         CV_EXPORTS oclMat operator | (const oclMat &, const oclMat &);
 613         CV_EXPORTS oclMat operator & (const oclMat &, const oclMat &);
 614         CV_EXPORTS oclMat operator ^ (const oclMat &, const oclMat &);
 615
 616
 617         //! Mathematics operators
 618         CV_EXPORTS oclMatExpr operator + (const oclMat &src1, const oclMat &src2);
 619         CV_EXPORTS oclMatExpr operator - (const oclMat &src1, const oclMat &src2);
 620         CV_EXPORTS oclMatExpr operator * (const oclMat &src1, const oclMat &src2);
 621         CV_EXPORTS oclMatExpr operator / (const oclMat &src1, const oclMat &src2);
 622
 623         //! computes convolution of two images
 624         // support only CV_32FC1 type
 625         CV_EXPORTS void convolve(const oclMat &image, const oclMat &temp1, oclMat &result);
 626
 627         CV_EXPORTS void cvtColor(const oclMat &src, oclMat &dst, int code, int dcn = 0);
 628
 629         //! initializes a scaled identity matrix
 630         CV_EXPORTS void setIdentity(oclMat& src, const Scalar & val = Scalar(1));
 631
 632         //////////////////////////////// Filter Engine ////////////////////////////////
 633
 634         /*!
 635           The Base Class for 1D or Row-wise Filters
 636
 637           This is the base class for linear or non-linear filters that process 1D data.
 638           In particular, such filters are used for the "horizontal" filtering parts in separable filters.
 639           */
 640         class CV_EXPORTS BaseRowFilter_GPU
 641         {
 642         public:
 643             BaseRowFilter_GPU(int ksize_, int anchor_, int bordertype_) : ksize(ksize_), anchor(anchor_), bordertype(bordertype_) {}
 644             virtual ~BaseRowFilter_GPU() {}
 645             virtual void operator()(const oclMat &src, oclMat &dst) = 0;
 646             int ksize, anchor, bordertype;
 647         };
 648
 649         /*!
 650           The Base Class for Column-wise Filters
 651
 652           This is the base class for linear or non-linear filters that process columns of 2D arrays.
 653           Such filters are used for the "vertical" filtering parts in separable filters.
 654           */
 655         class CV_EXPORTS BaseColumnFilter_GPU
 656         {
 657         public:
 658             BaseColumnFilter_GPU(int ksize_, int anchor_, int bordertype_) : ksize(ksize_), anchor(anchor_), bordertype(bordertype_) {}
 659             virtual ~BaseColumnFilter_GPU() {}
 660             virtual void operator()(const oclMat &src, oclMat &dst) = 0;
 661             int ksize, anchor, bordertype;
 662         };
 663
 664         /*!
 665           The Base Class for Non-Separable 2D Filters.
 666
 667           This is the base class for linear or non-linear 2D filters.
 668           */
 669         class CV_EXPORTS BaseFilter_GPU
 670         {
 671         public:
 672             BaseFilter_GPU(const Size &ksize_, const Point &anchor_, const int &borderType_)
 673                 : ksize(ksize_), anchor(anchor_), borderType(borderType_) {}
 674             virtual ~BaseFilter_GPU() {}
 675             virtual void operator()(const oclMat &src, oclMat &dst) = 0;
 676             Size ksize;
 677             Point anchor;
 678             int borderType;
 679         };
 680
 681         /*!
 682           The Base Class for Filter Engine.
 683
 684           The class can be used to apply an arbitrary filtering operation to an image.
 685           It contains all the necessary intermediate buffers.
 686           */
 687         class CV_EXPORTS FilterEngine_GPU
 688         {
 689         public:
 690             virtual ~FilterEngine_GPU() {}
 691
 692             virtual void apply(const oclMat &src, oclMat &dst, Rect roi = Rect(0, 0, -1, -1)) = 0;
 693         };
 694
 695         //! returns the non-separable filter engine with the specified filter
 696         CV_EXPORTS Ptr<FilterEngine_GPU> createFilter2D_GPU(const Ptr<BaseFilter_GPU> filter2D);
 697
 698         //! returns the primitive row filter with the specified kernel
 699         CV_EXPORTS Ptr<BaseRowFilter_GPU> getLinearRowFilter_GPU(int srcType, int bufType, const Mat &rowKernel,
 700                 int anchor = -1, int bordertype = BORDER_DEFAULT);
 701
 702         //! returns the primitive column filter with the specified kernel
 703         CV_EXPORTS Ptr<BaseColumnFilter_GPU> getLinearColumnFilter_GPU(int bufType, int dstType, const Mat &columnKernel,
 704                 int anchor = -1, int bordertype = BORDER_DEFAULT, double delta = 0.0);
 705
 706         //! returns the separable linear filter engine
 707         CV_EXPORTS Ptr<FilterEngine_GPU> createSeparableLinearFilter_GPU(int srcType, int dstType, const Mat &rowKernel,
 708                 const Mat &columnKernel, const Point &anchor = Point(-1, -1), double delta = 0.0, int bordertype = BORDER_DEFAULT);
 709
 710         //! returns the separable filter engine with the specified filters
 711         CV_EXPORTS Ptr<FilterEngine_GPU> createSeparableFilter_GPU(const Ptr<BaseRowFilter_GPU> &rowFilter,
 712                 const Ptr<BaseColumnFilter_GPU> &columnFilter);
 713
 714         //! returns the Gaussian filter engine
 715         CV_EXPORTS Ptr<FilterEngine_GPU> createGaussianFilter_GPU(int type, Size ksize, double sigma1, double sigma2 = 0, int bordertype = BORDER_DEFAULT);
 716
 717         //! returns filter engine for the generalized Sobel operator
 718         CV_EXPORTS Ptr<FilterEngine_GPU> createDerivFilter_GPU( int srcType, int dstType, int dx, int dy, int ksize, int borderType = BORDER_DEFAULT );
 719
 720         //! applies Laplacian operator to the image
 721         // supports only ksize = 1 and ksize = 3 8UC1 8UC4 32FC1 32FC4 data type
 722         CV_EXPORTS void Laplacian(const oclMat &src, oclMat &dst, int ddepth, int ksize = 1, double scale = 1);
 723
 724         //! returns 2D box filter
 725         // supports CV_8UC1 and CV_8UC4 source type, dst type must be the same as source type
 726         CV_EXPORTS Ptr<BaseFilter_GPU> getBoxFilter_GPU(int srcType, int dstType,
 727                 const Size &ksize, Point anchor = Point(-1, -1), int borderType = BORDER_DEFAULT);
 728
 729         //! returns box filter engine
 730         CV_EXPORTS Ptr<FilterEngine_GPU> createBoxFilter_GPU(int srcType, int dstType, const Size &ksize,
 731                 const Point &anchor = Point(-1, -1), int borderType = BORDER_DEFAULT);
 732
 733         //! returns 2D filter with the specified kernel
 734         // supports CV_8UC1 and CV_8UC4 types
 735         CV_EXPORTS Ptr<BaseFilter_GPU> getLinearFilter_GPU(int srcType, int dstType, const Mat &kernel, const Size &ksize,
 736                 const Point &anchor = Point(-1, -1), int borderType = BORDER_DEFAULT);
 737
 738         //! returns the non-separable linear filter engine
 739         CV_EXPORTS Ptr<FilterEngine_GPU> createLinearFilter_GPU(int srcType, int dstType, const Mat &kernel,
 740                 const Point &anchor = Point(-1, -1), int borderType = BORDER_DEFAULT);
 741
 742         //! smooths the image using the normalized box filter
 743         // supports data type: CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4
 744         // supports border type: BORDER_CONSTANT, BORDER_REPLICATE, BORDER_REFLECT,BORDER_REFLECT_101,BORDER_WRAP
 745         CV_EXPORTS void boxFilter(const oclMat &src, oclMat &dst, int ddepth, Size ksize,
 746                                   Point anchor = Point(-1, -1), int borderType = BORDER_DEFAULT);
 747
 748         //! returns 2D morphological filter
 749         //! only MORPH_ERODE and MORPH_DILATE are supported
 750         // supports CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4 types
 751         // kernel must have CV_8UC1 type, one rows and cols == ksize.width * ksize.height
 752         CV_EXPORTS Ptr<BaseFilter_GPU> getMorphologyFilter_GPU(int op, int type, const Mat &kernel, const Size &ksize,
 753                 Point anchor = Point(-1, -1));
 754
 755         //! returns morphological filter engine. Only MORPH_ERODE and MORPH_DILATE are supported.
 756         CV_EXPORTS Ptr<FilterEngine_GPU> createMorphologyFilter_GPU(int op, int type, const Mat &kernel,
 757                 const Point &anchor = Point(-1, -1), int iterations = 1);
 758
 759         //! a synonym for normalized box filter
 760         // supports data type: CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4
 761         // supports border type: BORDER_CONSTANT, BORDER_REPLICATE, BORDER_REFLECT,BORDER_REFLECT_101
 762         static inline void blur(const oclMat &src, oclMat &dst, Size ksize, Point anchor = Point(-1, -1),
 763                                 int borderType = BORDER_CONSTANT)
 764         {
 765             boxFilter(src, dst, -1, ksize, anchor, borderType);
 766         }
 767
 768         //! applies non-separable 2D linear filter to the image
 769         //  Note, at the moment this function only works when anchor point is in the kernel center
 770         //  and kernel size supported is either 3x3 or 5x5; otherwise the function will fail to output valid result
 771         CV_EXPORTS void filter2D(const oclMat &src, oclMat &dst, int ddepth, const Mat &kernel,
 772                                  Point anchor = Point(-1, -1), int borderType = BORDER_DEFAULT);
 773
 774         //! applies separable 2D linear filter to the image
 775         CV_EXPORTS void sepFilter2D(const oclMat &src, oclMat &dst, int ddepth, const Mat &kernelX, const Mat &kernelY,
 776                                     Point anchor = Point(-1, -1), double delta = 0.0, int bordertype = BORDER_DEFAULT);
 777
 778         //! applies generalized Sobel operator to the image
 779         // dst.type must equalize src.type
 780         // supports data type: CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4
 781         // supports border type: BORDER_CONSTANT, BORDER_REPLICATE, BORDER_REFLECT,BORDER_REFLECT_101
 782         CV_EXPORTS void Sobel(const oclMat &src, oclMat &dst, int ddepth, int dx, int dy, int ksize = 3, double scale = 1, double delta = 0.0, int bordertype = BORDER_DEFAULT);
 783
 784         //! applies the vertical or horizontal Scharr operator to the image
 785         // dst.type must equalize src.type
 786         // supports data type: CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4
 787         // supports border type: BORDER_CONSTANT, BORDER_REPLICATE, BORDER_REFLECT,BORDER_REFLECT_101
 788         CV_EXPORTS void Scharr(const oclMat &src, oclMat &dst, int ddepth, int dx, int dy, double scale = 1, double delta = 0.0, int bordertype = BORDER_DEFAULT);
 789
 790         //! smooths the image using Gaussian filter.
 791         // dst.type must equalize src.type
 792         // supports data type: CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4
 793         // supports border type: BORDER_CONSTANT, BORDER_REPLICATE, BORDER_REFLECT,BORDER_REFLECT_101
 794         CV_EXPORTS void GaussianBlur(const oclMat &src, oclMat &dst, Size ksize, double sigma1, double sigma2 = 0, int bordertype = BORDER_DEFAULT);
 795
 796         //! erodes the image (applies the local minimum operator)
 797         // supports data type: CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4
 798         CV_EXPORTS void erode( const oclMat &src, oclMat &dst, const Mat &kernel, Point anchor = Point(-1, -1), int iterations = 1,
 799
 800                                int borderType = BORDER_CONSTANT, const Scalar &borderValue = morphologyDefaultBorderValue());
 801
 802
 803         //! dilates the image (applies the local maximum operator)
 804         // supports data type: CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4
 805         CV_EXPORTS void dilate( const oclMat &src, oclMat &dst, const Mat &kernel, Point anchor = Point(-1, -1), int iterations = 1,
 806
 807                                 int borderType = BORDER_CONSTANT, const Scalar &borderValue = morphologyDefaultBorderValue());
 808
 809
 810         //! applies an advanced morphological operation to the image
 811         CV_EXPORTS void morphologyEx( const oclMat &src, oclMat &dst, int op, const Mat &kernel, Point anchor = Point(-1, -1), int iterations = 1,
 812
 813                                       int borderType = BORDER_CONSTANT, const Scalar &borderValue = morphologyDefaultBorderValue());
 814
 815
 816         ////////////////////////////// Image processing //////////////////////////////
 817         //! Does mean shift filtering on GPU.
 818         CV_EXPORTS void meanShiftFiltering(const oclMat &src, oclMat &dst, int sp, int sr,
 819                                            TermCriteria criteria = TermCriteria(TermCriteria::MAX_ITER + TermCriteria::EPS, 5, 1));
 820
 821         //! Does mean shift procedure on GPU.
 822         CV_EXPORTS void meanShiftProc(const oclMat &src, oclMat &dstr, oclMat &dstsp, int sp, int sr,
 823                                       TermCriteria criteria = TermCriteria(TermCriteria::MAX_ITER + TermCriteria::EPS, 5, 1));
 824
 825         //! Does mean shift segmentation with elimiation of small regions.
 826         CV_EXPORTS void meanShiftSegmentation(const oclMat &src, Mat &dst, int sp, int sr, int minsize,
 827                                               TermCriteria criteria = TermCriteria(TermCriteria::MAX_ITER + TermCriteria::EPS, 5, 1));
 828
 829         //! applies fixed threshold to the image.
 830         // supports CV_8UC1 and CV_32FC1 data type
 831         // supports threshold type: THRESH_BINARY, THRESH_BINARY_INV, THRESH_TRUNC, THRESH_TOZERO, THRESH_TOZERO_INV
 832         CV_EXPORTS double threshold(const oclMat &src, oclMat &dst, double thresh, double maxVal, int type = THRESH_TRUNC);
 833
 834         //! resizes the image
 835         // Supports INTER_NEAREST, INTER_LINEAR
 836         // supports CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4 types
 837         CV_EXPORTS void resize(const oclMat &src, oclMat &dst, Size dsize, double fx = 0, double fy = 0, int interpolation = INTER_LINEAR);
 838
 839         //! Applies a generic geometrical transformation to an image.
 840
 841         // Supports INTER_NEAREST, INTER_LINEAR.
 842
 843         // Map1 supports CV_16SC2, CV_32FC2  types.
 844
 845         // Src supports CV_8UC1, CV_8UC2, CV_8UC4.
 846
 847         CV_EXPORTS void remap(const oclMat &src, oclMat &dst, oclMat &map1, oclMat &map2, int interpolation, int bordertype, const Scalar &value = Scalar());
 848
 849         //! copies 2D array to a larger destination array and pads borders with user-specifiable constant
 850         // supports CV_8UC1, CV_8UC4, CV_32SC1 types
 851         CV_EXPORTS void copyMakeBorder(const oclMat &src, oclMat &dst, int top, int bottom, int left, int right, int boardtype, const Scalar &value = Scalar());
 852
 853         //! Smoothes image using median filter
 854         // The source 1- or 4-channel image. When m is 3 or 5, the image depth should be CV 8U or CV 32F.
 855         CV_EXPORTS void medianFilter(const oclMat &src, oclMat &dst, int m);
 856
 857         //! warps the image using affine transformation
 858         // Supports INTER_NEAREST, INTER_LINEAR, INTER_CUBIC
 859         // supports CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4 types
 860         CV_EXPORTS void warpAffine(const oclMat &src, oclMat &dst, const Mat &M, Size dsize, int flags = INTER_LINEAR);
 861
 862         //! warps the image using perspective transformation
 863         // Supports INTER_NEAREST, INTER_LINEAR, INTER_CUBIC
 864         // supports CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4 types
 865         CV_EXPORTS void warpPerspective(const oclMat &src, oclMat &dst, const Mat &M, Size dsize, int flags = INTER_LINEAR);
 866
 867         //! computes the integral image and integral for the squared image
 868         // sum will have CV_32S type, sqsum - CV32F type
 869         // supports only CV_8UC1 source type
 870         CV_EXPORTS void integral(const oclMat &src, oclMat &sum, oclMat &sqsum);
 871         CV_EXPORTS void integral(const oclMat &src, oclMat &sum);
 872         CV_EXPORTS void cornerHarris(const oclMat &src, oclMat &dst, int blockSize, int ksize, double k, int bordertype = cv::BORDER_DEFAULT);
 873         CV_EXPORTS void cornerHarris_dxdy(const oclMat &src, oclMat &dst, oclMat &Dx, oclMat &Dy,
 874             int blockSize, int ksize, double k, int bordertype = cv::BORDER_DEFAULT);
 875         CV_EXPORTS void cornerMinEigenVal(const oclMat &src, oclMat &dst, int blockSize, int ksize, int bordertype = cv::BORDER_DEFAULT);
 876         CV_EXPORTS void cornerMinEigenVal_dxdy(const oclMat &src, oclMat &dst, oclMat &Dx, oclMat &Dy,
 877             int blockSize, int ksize, int bordertype = cv::BORDER_DEFAULT);
 878
 879         /////////////////////////////////// ML ///////////////////////////////////////////
 880
 881         //! Compute closest centers for each lines in source and lable it after center's index
 882         // supports CV_32FC1/CV_32FC2/CV_32FC4 data type
 883         CV_EXPORTS void distanceToCenters(oclMat &dists, oclMat &labels, const oclMat &src, const oclMat &centers);
 884
 885         //!Does k-means procedure on GPU
 886         // supports CV_32FC1/CV_32FC2/CV_32FC4 data type
 887         CV_EXPORTS double kmeans(const oclMat &src, int K, oclMat &bestLabels,
 888                                      TermCriteria criteria, int attemps, int flags, oclMat &centers);
 889
 890
 891         ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 892         ///////////////////////////////////////////CascadeClassifier//////////////////////////////////////////////////////////////////
 893         ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 894
 895         class CV_EXPORTS_W OclCascadeClassifier : public  cv::CascadeClassifier
 896         {
 897         public:
 898             OclCascadeClassifier() {};
 899             ~OclCascadeClassifier() {};
 900
 901             CvSeq* oclHaarDetectObjects(oclMat &gimg, CvMemStorage *storage, double scaleFactor,
 902                                         int minNeighbors, int flags, CvSize minSize = cvSize(0, 0), CvSize maxSize = cvSize(0, 0));
 903         };
 904
 905         class CV_EXPORTS OclCascadeClassifierBuf : public  cv::CascadeClassifier
 906         {
 907         public:
 908             OclCascadeClassifierBuf() :
 909                 m_flags(0), initialized(false), m_scaleFactor(0), buffers(NULL) {}
 910
 911             ~OclCascadeClassifierBuf() { release(); }
 912
 913             void detectMultiScale(oclMat &image, CV_OUT std::vector<cv::Rect>& faces,
 914                                   double scaleFactor = 1.1, int minNeighbors = 3, int flags = 0,
 915                                   Size minSize = Size(), Size maxSize = Size());
 916             void release();
 917
 918         private:
 919             void Init(const int rows, const int cols, double scaleFactor, int flags,
 920                       const int outputsz, const size_t localThreads[],
 921                       CvSize minSize, CvSize maxSize);
 922             void CreateBaseBufs(const int datasize, const int totalclassifier, const int flags, const int outputsz);
 923             void CreateFactorRelatedBufs(const int rows, const int cols, const int flags,
 924                                          const double scaleFactor, const size_t localThreads[],
 925                                          CvSize minSize, CvSize maxSize);
 926             void GenResult(CV_OUT std::vector<cv::Rect>& faces, const std::vector<cv::Rect> &rectList, const std::vector<int> &rweights);
 927
 928             int m_rows;
 929             int m_cols;
 930             int m_flags;
 931             int m_loopcount;
 932             int m_nodenum;
 933             bool findBiggestObject;
 934             bool initialized;
 935             double m_scaleFactor;
 936             Size m_minSize;
 937             Size m_maxSize;
 938             vector<CvSize> sizev;
 939             vector<float> scalev;
 940             oclMat gimg1, gsum, gsqsum;
 941             void * buffers;
 942         };
 943
 944
 945         /////////////////////////////// Pyramid /////////////////////////////////////
 946         CV_EXPORTS void pyrDown(const oclMat &src, oclMat &dst);
 947
 948         //! upsamples the source image and then smoothes it
 949         CV_EXPORTS void pyrUp(const oclMat &src, oclMat &dst);
 950
 951         //! performs linear blending of two images
 952         //! to avoid accuracy errors sum of weigths shouldn't be very close to zero
 953         // supports only CV_8UC1 source type
 954         CV_EXPORTS void blendLinear(const oclMat &img1, const oclMat &img2, const oclMat &weights1, const oclMat &weights2, oclMat &result);
 955
 956         //! computes vertical sum, supports only CV_32FC1 images
 957         CV_EXPORTS void columnSum(const oclMat &src, oclMat &sum);
 958
 959         ///////////////////////////////////////// match_template /////////////////////////////////////////////////////////////
 960         struct CV_EXPORTS MatchTemplateBuf
 961         {
 962             Size user_block_size;
 963             oclMat imagef, templf;
 964             std::vector<oclMat> images;
 965             std::vector<oclMat> image_sums;
 966             std::vector<oclMat> image_sqsums;
 967         };
 968
 969         //! computes the proximity map for the raster template and the image where the template is searched for
 970         // Supports TM_SQDIFF, TM_SQDIFF_NORMED, TM_CCORR, TM_CCORR_NORMED, TM_CCOEFF, TM_CCOEFF_NORMED for type 8UC1 and 8UC4
 971         // Supports TM_SQDIFF, TM_CCORR for type 32FC1 and 32FC4
 972         CV_EXPORTS void matchTemplate(const oclMat &image, const oclMat &templ, oclMat &result, int method);
 973
 974         //! computes the proximity map for the raster template and the image where the template is searched for
 975         // Supports TM_SQDIFF, TM_SQDIFF_NORMED, TM_CCORR, TM_CCORR_NORMED, TM_CCOEFF, TM_CCOEFF_NORMED for type 8UC1 and 8UC4
 976         // Supports TM_SQDIFF, TM_CCORR for type 32FC1 and 32FC4
 977         CV_EXPORTS void matchTemplate(const oclMat &image, const oclMat &templ, oclMat &result, int method, MatchTemplateBuf &buf);
 978
 979         ///////////////////////////////////////////// Canny /////////////////////////////////////////////
 980         struct CV_EXPORTS CannyBuf;
 981         //! compute edges of the input image using Canny operator
 982         // Support CV_8UC1 only
 983         CV_EXPORTS void Canny(const oclMat &image, oclMat &edges, double low_thresh, double high_thresh, int apperture_size = 3, bool L2gradient = false);
 984         CV_EXPORTS void Canny(const oclMat &image, CannyBuf &buf, oclMat &edges, double low_thresh, double high_thresh, int apperture_size = 3, bool L2gradient = false);
 985         CV_EXPORTS void Canny(const oclMat &dx, const oclMat &dy, oclMat &edges, double low_thresh, double high_thresh, bool L2gradient = false);
 986         CV_EXPORTS void Canny(const oclMat &dx, const oclMat &dy, CannyBuf &buf, oclMat &edges, double low_thresh, double high_thresh, bool L2gradient = false);
 987
 988         struct CV_EXPORTS CannyBuf
 989         {
 990             CannyBuf() : counter(NULL) {}
 991             ~CannyBuf()
 992             {
 993                 release();
 994             }
 995             explicit CannyBuf(const Size &image_size, int apperture_size = 3) : counter(NULL)
 996             {
 997                 create(image_size, apperture_size);
 998             }
 999             CannyBuf(const oclMat &dx_, const oclMat &dy_);
1000
1001             void create(const Size &image_size, int apperture_size = 3);
1002             void release();
1003             oclMat dx, dy;
1004             oclMat dx_buf, dy_buf;
1005             oclMat edgeBuf;
1006             oclMat trackBuf1, trackBuf2;
1007             void *counter;
1008             Ptr<FilterEngine_GPU> filterDX, filterDY;
1009         };
1010
1011         ///////////////////////////////////////// clAmdFft related /////////////////////////////////////////
1012         //! Performs a forward or inverse discrete Fourier transform (1D or 2D) of floating point matrix.
1013         //! Param dft_size is the size of DFT transform.
1014         //!
1015         //! For complex-to-real transform it is assumed that the source matrix is packed in CLFFT's format.
1016         // support src type of CV32FC1, CV32FC2
1017         // support flags: DFT_INVERSE, DFT_REAL_OUTPUT, DFT_COMPLEX_OUTPUT, DFT_ROWS
1018         // dft_size is the size of original input, which is used for transformation from complex to real.
1019         // dft_size must be powers of 2, 3 and 5
1020         // real to complex dft requires at least v1.8 clAmdFft
1021         // real to complex dft output is not the same with cpu version
1022         // real to complex and complex to real does not support DFT_ROWS
1023         CV_EXPORTS void dft(const oclMat &src, oclMat &dst, Size dft_size = Size(), int flags = 0);
1024
1025         //! implements generalized matrix product algorithm GEMM from BLAS
1026         // The functionality requires clAmdBlas library
1027         // only support type CV_32FC1
1028         // flag GEMM_3_T is not supported
1029         CV_EXPORTS void gemm(const oclMat &src1, const oclMat &src2, double alpha,
1030                              const oclMat &src3, double beta, oclMat &dst, int flags = 0);
1031
1032         //////////////// HOG (Histogram-of-Oriented-Gradients) Descriptor and Object Detector //////////////
1033         struct CV_EXPORTS HOGDescriptor
1034         {
1035             enum { DEFAULT_WIN_SIGMA = -1 };
1036             enum { DEFAULT_NLEVELS = 64 };
1037             enum { DESCR_FORMAT_ROW_BY_ROW, DESCR_FORMAT_COL_BY_COL };
1038             HOGDescriptor(Size win_size = Size(64, 128), Size block_size = Size(16, 16),
1039                           Size block_stride = Size(8, 8), Size cell_size = Size(8, 8),
1040                           int nbins = 9, double win_sigma = DEFAULT_WIN_SIGMA,
1041                           double threshold_L2hys = 0.2, bool gamma_correction = true,
1042                           int nlevels = DEFAULT_NLEVELS);
1043
1044             size_t getDescriptorSize() const;
1045             size_t getBlockHistogramSize() const;
1046             void setSVMDetector(const vector<float> &detector);
1047             static vector<float> getDefaultPeopleDetector();
1048             static vector<float> getPeopleDetector48x96();
1049             static vector<float> getPeopleDetector64x128();
1050             void detect(const oclMat &img, vector<Point> &found_locations,
1051                         double hit_threshold = 0, Size win_stride = Size(),
1052                         Size padding = Size());
1053             void detectMultiScale(const oclMat &img, vector<Rect> &found_locations,
1054                                   double hit_threshold = 0, Size win_stride = Size(),
1055                                   Size padding = Size(), double scale0 = 1.05,
1056                                   int group_threshold = 2);
1057             void getDescriptors(const oclMat &img, Size win_stride,
1058                                 oclMat &descriptors,
1059                                 int descr_format = DESCR_FORMAT_COL_BY_COL);
1060             Size win_size;
1061             Size block_size;
1062             Size block_stride;
1063             Size cell_size;
1064
1065             int nbins;
1066             double win_sigma;
1067             double threshold_L2hys;
1068             bool gamma_correction;
1069             int nlevels;
1070
1071         protected:
1072             // initialize buffers; only need to do once in case of multiscale detection
1073             void init_buffer(const oclMat &img, Size win_stride);
1074             void computeBlockHistograms(const oclMat &img);
1075             void computeGradient(const oclMat &img, oclMat &grad, oclMat &qangle);
1076             double getWinSigma() const;
1077             bool checkDetectorSize() const;
1078
1079             static int numPartsWithin(int size, int part_size, int stride);
1080             static Size numPartsWithin(Size size, Size part_size, Size stride);
1081
1082             // Coefficients of the separating plane
1083             float free_coef;
1084             oclMat detector;
1085             // Results of the last classification step
1086             oclMat labels;
1087             Mat labels_host;
1088             // Results of the last histogram evaluation step
1089             oclMat block_hists;
1090             // Gradients conputation results
1091             oclMat grad, qangle;
1092             // scaled image
1093             oclMat image_scale;
1094             // effect size of input image (might be different from original size after scaling)
1095             Size effect_size;
1096         };
1097
1098
1099         ////////////////////////feature2d_ocl/////////////////
1100         /****************************************************************************************\
1101         *                                      Distance                                          *
1102         \****************************************************************************************/
1103         template<typename T>
1104         struct CV_EXPORTS Accumulator
1105         {
1106             typedef T Type;
1107         };
1108         template<> struct Accumulator<unsigned char>
1109         {
1110             typedef float Type;
1111         };
1112         template<> struct Accumulator<unsigned short>
1113         {
1114             typedef float Type;
1115         };
1116         template<> struct Accumulator<char>
1117         {
1118             typedef float Type;
1119         };
1120         template<> struct Accumulator<short>
1121         {
1122             typedef float Type;
1123         };
1124
1125         /*
1126          * Manhattan distance (city block distance) functor
1127          */
1128         template<class T>
1129         struct CV_EXPORTS L1
1130         {
1131             enum { normType = NORM_L1 };
1132             typedef T ValueType;
1133             typedef typename Accumulator<T>::Type ResultType;
1134
1135             ResultType operator()( const T *a, const T *b, int size ) const
1136             {
1137                 return normL1<ValueType, ResultType>(a, b, size);
1138             }
1139         };
1140
1141         /*
1142          * Euclidean distance functor
1143          */
1144         template<class T>
1145         struct CV_EXPORTS L2
1146         {
1147             enum { normType = NORM_L2 };
1148             typedef T ValueType;
1149             typedef typename Accumulator<T>::Type ResultType;
1150
1151             ResultType operator()( const T *a, const T *b, int size ) const
1152             {
1153                 return (ResultType)sqrt((double)normL2Sqr<ValueType, ResultType>(a, b, size));
1154             }
1155         };
1156
1157         /*
1158          * Hamming distance functor - counts the bit differences between two strings - useful for the Brief descriptor
1159          * bit count of A exclusive XOR'ed with B
1160          */
1161         struct CV_EXPORTS Hamming
1162         {
1163             enum { normType = NORM_HAMMING };
1164             typedef unsigned char ValueType;
1165             typedef int ResultType;
1166
1167             /** this will count the bits in a ^ b
1168              */
1169             ResultType operator()( const unsigned char *a, const unsigned char *b, int size ) const
1170             {
1171                 return normHamming(a, b, size);
1172             }
1173         };
1174
1175         ////////////////////////////////// BruteForceMatcher //////////////////////////////////
1176
1177         class CV_EXPORTS BruteForceMatcher_OCL_base
1178         {
1179         public:
1180             enum DistType {L1Dist = 0, L2Dist, HammingDist};
1181             explicit BruteForceMatcher_OCL_base(DistType distType = L2Dist);
1182             // Add descriptors to train descriptor collection
1183             void add(const std::vector<oclMat> &descCollection);
1184             // Get train descriptors collection
1185             const std::vector<oclMat> &getTrainDescriptors() const;
1186             // Clear train descriptors collection
1187             void clear();
1188             // Return true if there are not train descriptors in collection
1189             bool empty() const;
1190
1191             // Return true if the matcher supports mask in match methods
1192             bool isMaskSupported() const;
1193
1194             // Find one best match for each query descriptor
1195             void matchSingle(const oclMat &query, const oclMat &train,
1196                              oclMat &trainIdx, oclMat &distance,
1197                              const oclMat &mask = oclMat());
1198
1199             // Download trainIdx and distance and convert it to CPU vector with DMatch
1200             static void matchDownload(const oclMat &trainIdx, const oclMat &distance, std::vector<DMatch> &matches);
1201             // Convert trainIdx and distance to vector with DMatch
1202             static void matchConvert(const Mat &trainIdx, const Mat &distance, std::vector<DMatch> &matches);
1203
1204             // Find one best match for each query descriptor
1205             void match(const oclMat &query, const oclMat &train, std::vector<DMatch> &matches, const oclMat &mask = oclMat());
1206
1207             // Make gpu collection of trains and masks in suitable format for matchCollection function
1208             void makeGpuCollection(oclMat &trainCollection, oclMat &maskCollection, const std::vector<oclMat> &masks = std::vector<oclMat>());
1209
1210
1211             // Find one best match from train collection for each query descriptor
1212             void matchCollection(const oclMat &query, const oclMat &trainCollection,
1213                                  oclMat &trainIdx, oclMat &imgIdx, oclMat &distance,
1214                                  const oclMat &masks = oclMat());
1215
1216             // Download trainIdx, imgIdx and distance and convert it to vector with DMatch
1217             static void matchDownload(const oclMat &trainIdx, const oclMat &imgIdx, const oclMat &distance, std::vector<DMatch> &matches);
1218             // Convert trainIdx, imgIdx and distance to vector with DMatch
1219             static void matchConvert(const Mat &trainIdx, const Mat &imgIdx, const Mat &distance, std::vector<DMatch> &matches);
1220
1221             // Find one best match from train collection for each query descriptor.
1222             void match(const oclMat &query, std::vector<DMatch> &matches, const std::vector<oclMat> &masks = std::vector<oclMat>());
1223
1224             // Find k best matches for each query descriptor (in increasing order of distances)
1225             void knnMatchSingle(const oclMat &query, const oclMat &train,
1226                                 oclMat &trainIdx, oclMat &distance, oclMat &allDist, int k,
1227                                 const oclMat &mask = oclMat());
1228
1229             // Download trainIdx and distance and convert it to vector with DMatch
1230             // compactResult is used when mask is not empty. If compactResult is false matches
1231             // vector will have the same size as queryDescriptors rows. If compactResult is true
1232             // matches vector will not contain matches for fully masked out query descriptors.
1233             static void knnMatchDownload(const oclMat &trainIdx, const oclMat &distance,
1234                                          std::vector< std::vector<DMatch> > &matches, bool compactResult = false);
1235
1236             // Convert trainIdx and distance to vector with DMatch
1237             static void knnMatchConvert(const Mat &trainIdx, const Mat &distance,
1238                                         std::vector< std::vector<DMatch> > &matches, bool compactResult = false);
1239
1240             // Find k best matches for each query descriptor (in increasing order of distances).
1241             // compactResult is used when mask is not empty. If compactResult is false matches
1242             // vector will have the same size as queryDescriptors rows. If compactResult is true
1243             // matches vector will not contain matches for fully masked out query descriptors.
1244             void knnMatch(const oclMat &query, const oclMat &train,
1245                           std::vector< std::vector<DMatch> > &matches, int k, const oclMat &mask = oclMat(),
1246                           bool compactResult = false);
1247
1248             // Find k best matches from train collection for each query descriptor (in increasing order of distances)
1249             void knnMatch2Collection(const oclMat &query, const oclMat &trainCollection,
1250                                      oclMat &trainIdx, oclMat &imgIdx, oclMat &distance,
1251                                      const oclMat &maskCollection = oclMat());
1252
1253             // Download trainIdx and distance and convert it to vector with DMatch
1254             // compactResult is used when mask is not empty. If compactResult is false matches
1255             // vector will have the same size as queryDescriptors rows. If compactResult is true
1256             // matches vector will not contain matches for fully masked out query descriptors.
1257             static void knnMatch2Download(const oclMat &trainIdx, const oclMat &imgIdx, const oclMat &distance,
1258                                           std::vector< std::vector<DMatch> > &matches, bool compactResult = false);
1259
1260             // Convert trainIdx and distance to vector with DMatch
1261             static void knnMatch2Convert(const Mat &trainIdx, const Mat &imgIdx, const Mat &distance,
1262                                          std::vector< std::vector<DMatch> > &matches, bool compactResult = false);
1263
1264             // Find k best matches  for each query descriptor (in increasing order of distances).
1265             // compactResult is used when mask is not empty. If compactResult is false matches
1266             // vector will have the same size as queryDescriptors rows. If compactResult is true
1267             // matches vector will not contain matches for fully masked out query descriptors.
1268             void knnMatch(const oclMat &query, std::vector< std::vector<DMatch> > &matches, int k,
1269                           const std::vector<oclMat> &masks = std::vector<oclMat>(), bool compactResult = false);
1270
1271             // Find best matches for each query descriptor which have distance less than maxDistance.
1272             // nMatches.at<int>(0, queryIdx) will contain matches count for queryIdx.
1273             // carefully nMatches can be greater than trainIdx.cols - it means that matcher didn't find all matches,
1274             // because it didn't have enough memory.
1275             // If trainIdx is empty, then trainIdx and distance will be created with size nQuery x max((nTrain / 100), 10),
1276             // otherwize user can pass own allocated trainIdx and distance with size nQuery x nMaxMatches
1277             // Matches doesn't sorted.
1278             void radiusMatchSingle(const oclMat &query, const oclMat &train,
1279                                    oclMat &trainIdx, oclMat &distance, oclMat &nMatches, float maxDistance,
1280                                    const oclMat &mask = oclMat());
1281
1282             // Download trainIdx, nMatches and distance and convert it to vector with DMatch.
1283             // matches will be sorted in increasing order of distances.
1284             // compactResult is used when mask is not empty. If compactResult is false matches
1285             // vector will have the same size as queryDescriptors rows. If compactResult is true
1286             // matches vector will not contain matches for fully masked out query descriptors.
1287             static void radiusMatchDownload(const oclMat &trainIdx, const oclMat &distance, const oclMat &nMatches,
1288                                             std::vector< std::vector<DMatch> > &matches, bool compactResult = false);
1289             // Convert trainIdx, nMatches and distance to vector with DMatch.
1290             static void radiusMatchConvert(const Mat &trainIdx, const Mat &distance, const Mat &nMatches,
1291                                            std::vector< std::vector<DMatch> > &matches, bool compactResult = false);
1292             // Find best matches for each query descriptor which have distance less than maxDistance
1293             // in increasing order of distances).
1294             void radiusMatch(const oclMat &query, const oclMat &train,
1295                              std::vector< std::vector<DMatch> > &matches, float maxDistance,
1296                              const oclMat &mask = oclMat(), bool compactResult = false);
1297             // Find best matches for each query descriptor which have distance less than maxDistance.
1298             // If trainIdx is empty, then trainIdx and distance will be created with size nQuery x max((nQuery / 100), 10),
1299             // otherwize user can pass own allocated trainIdx and distance with size nQuery x nMaxMatches
1300             // Matches doesn't sorted.
1301             void radiusMatchCollection(const oclMat &query, oclMat &trainIdx, oclMat &imgIdx, oclMat &distance, oclMat &nMatches, float maxDistance,
1302                                        const std::vector<oclMat> &masks = std::vector<oclMat>());
1303             // Download trainIdx, imgIdx, nMatches and distance and convert it to vector with DMatch.
1304             // matches will be sorted in increasing order of distances.
1305             // compactResult is used when mask is not empty. If compactResult is false matches
1306             // vector will have the same size as queryDescriptors rows. If compactResult is true
1307             // matches vector will not contain matches for fully masked out query descriptors.
1308             static void radiusMatchDownload(const oclMat &trainIdx, const oclMat &imgIdx, const oclMat &distance, const oclMat &nMatches,
1309                                             std::vector< std::vector<DMatch> > &matches, bool compactResult = false);
1310             // Convert trainIdx, nMatches and distance to vector with DMatch.
1311             static void radiusMatchConvert(const Mat &trainIdx, const Mat &imgIdx, const Mat &distance, const Mat &nMatches,
1312                                            std::vector< std::vector<DMatch> > &matches, bool compactResult = false);
1313             // Find best matches from train collection for each query descriptor which have distance less than
1314             // maxDistance (in increasing order of distances).
1315             void radiusMatch(const oclMat &query, std::vector< std::vector<DMatch> > &matches, float maxDistance,
1316                              const std::vector<oclMat> &masks = std::vector<oclMat>(), bool compactResult = false);
1317             DistType distType;
1318         private:
1319             std::vector<oclMat> trainDescCollection;
1320         };
1321
1322         template <class Distance>
1323         class CV_EXPORTS BruteForceMatcher_OCL;
1324
1325         template <typename T>
1326         class CV_EXPORTS BruteForceMatcher_OCL< L1<T> > : public BruteForceMatcher_OCL_base
1327         {
1328         public:
1329             explicit BruteForceMatcher_OCL() : BruteForceMatcher_OCL_base(L1Dist) {}
1330             explicit BruteForceMatcher_OCL(L1<T> /*d*/) : BruteForceMatcher_OCL_base(L1Dist) {}
1331         };
1332
1333         template <typename T>
1334         class CV_EXPORTS BruteForceMatcher_OCL< L2<T> > : public BruteForceMatcher_OCL_base
1335         {
1336         public:
1337             explicit BruteForceMatcher_OCL() : BruteForceMatcher_OCL_base(L2Dist) {}
1338             explicit BruteForceMatcher_OCL(L2<T> /*d*/) : BruteForceMatcher_OCL_base(L2Dist) {}
1339         };
1340
1341         template <> class CV_EXPORTS BruteForceMatcher_OCL< Hamming > : public BruteForceMatcher_OCL_base
1342         {
1343         public:
1344             explicit BruteForceMatcher_OCL() : BruteForceMatcher_OCL_base(HammingDist) {}
1345             explicit BruteForceMatcher_OCL(Hamming /*d*/) : BruteForceMatcher_OCL_base(HammingDist) {}
1346         };
1347
1348         class CV_EXPORTS BFMatcher_OCL : public BruteForceMatcher_OCL_base
1349         {
1350         public:
1351             explicit BFMatcher_OCL(int norm = NORM_L2) : BruteForceMatcher_OCL_base(norm == NORM_L1 ? L1Dist : norm == NORM_L2 ? L2Dist : HammingDist) {}
1352         };
1353
1354         class CV_EXPORTS GoodFeaturesToTrackDetector_OCL
1355         {
1356         public:
1357             explicit GoodFeaturesToTrackDetector_OCL(int maxCorners = 1000, double qualityLevel = 0.01, double minDistance = 0.0,
1358                 int blockSize = 3, bool useHarrisDetector = false, double harrisK = 0.04);
1359
1360             //! return 1 rows matrix with CV_32FC2 type
1361             void operator ()(const oclMat& image, oclMat& corners, const oclMat& mask = oclMat());
1362             //! download points of type Point2f to a vector. the vector's content will be erased
1363             void downloadPoints(const oclMat &points, vector<Point2f> &points_v);
1364
1365             int maxCorners;
1366             double qualityLevel;
1367             double minDistance;
1368
1369             int blockSize;
1370             bool useHarrisDetector;
1371             double harrisK;
1372             void releaseMemory()
1373             {
1374                 Dx_.release();
1375                 Dy_.release();
1376                 eig_.release();
1377                 minMaxbuf_.release();
1378                 tmpCorners_.release();
1379             }
1380         private:
1381             oclMat Dx_;
1382             oclMat Dy_;
1383             oclMat eig_;
1384             oclMat minMaxbuf_;
1385             oclMat tmpCorners_;
1386         };
1387
1388         inline GoodFeaturesToTrackDetector_OCL::GoodFeaturesToTrackDetector_OCL(int maxCorners_, double qualityLevel_, double minDistance_,
1389             int blockSize_, bool useHarrisDetector_, double harrisK_)
1390         {
1391             maxCorners = maxCorners_;
1392             qualityLevel = qualityLevel_;
1393             minDistance = minDistance_;
1394             blockSize = blockSize_;
1395             useHarrisDetector = useHarrisDetector_;
1396             harrisK = harrisK_;
1397         }
1398
1399         /////////////////////////////// PyrLKOpticalFlow /////////////////////////////////////
1400         class CV_EXPORTS PyrLKOpticalFlow
1401         {
1402         public:
1403             PyrLKOpticalFlow()
1404             {
1405                 winSize = Size(21, 21);
1406                 maxLevel = 3;
1407                 iters = 30;
1408                 derivLambda = 0.5;
1409                 useInitialFlow = false;
1410                 minEigThreshold = 1e-4f;
1411                 getMinEigenVals = false;
1412                 isDeviceArch11_ = false;
1413             }
1414
1415             void sparse(const oclMat &prevImg, const oclMat &nextImg, const oclMat &prevPts, oclMat &nextPts,
1416                         oclMat &status, oclMat *err = 0);
1417             void dense(const oclMat &prevImg, const oclMat &nextImg, oclMat &u, oclMat &v, oclMat *err = 0);
1418             Size winSize;
1419             int maxLevel;
1420             int iters;
1421             double derivLambda;
1422             bool useInitialFlow;
1423             float minEigThreshold;
1424             bool getMinEigenVals;
1425             void releaseMemory()
1426             {
1427                 dx_calcBuf_.release();
1428                 dy_calcBuf_.release();
1429
1430                 prevPyr_.clear();
1431                 nextPyr_.clear();
1432
1433                 dx_buf_.release();
1434                 dy_buf_.release();
1435             }
1436         private:
1437             void calcSharrDeriv(const oclMat &src, oclMat &dx, oclMat &dy);
1438             void buildImagePyramid(const oclMat &img0, vector<oclMat> &pyr, bool withBorder);
1439
1440             oclMat dx_calcBuf_;
1441             oclMat dy_calcBuf_;
1442
1443             vector<oclMat> prevPyr_;
1444             vector<oclMat> nextPyr_;
1445
1446             oclMat dx_buf_;
1447             oclMat dy_buf_;
1448             oclMat uPyr_[2];
1449             oclMat vPyr_[2];
1450             bool isDeviceArch11_;
1451         };
1452
1453         class CV_EXPORTS FarnebackOpticalFlow
1454         {
1455         public:
1456             FarnebackOpticalFlow();
1457
1458             int numLevels;
1459             double pyrScale;
1460             bool fastPyramids;
1461             int winSize;
1462             int numIters;
1463             int polyN;
1464             double polySigma;
1465             int flags;
1466
1467             void operator ()(const oclMat &frame0, const oclMat &frame1, oclMat &flowx, oclMat &flowy);
1468
1469             void releaseMemory();
1470
1471         private:
1472             void prepareGaussian(
1473                 int n, double sigma, float *g, float *xg, float *xxg,
1474                 double &ig11, double &ig03, double &ig33, double &ig55);
1475
1476             void setPolynomialExpansionConsts(int n, double sigma);
1477
1478             void updateFlow_boxFilter(
1479                 const oclMat& R0, const oclMat& R1, oclMat& flowx, oclMat &flowy,
1480                 oclMat& M, oclMat &bufM, int blockSize, bool updateMatrices);
1481
1482             void updateFlow_gaussianBlur(
1483                 const oclMat& R0, const oclMat& R1, oclMat& flowx, oclMat& flowy,
1484                 oclMat& M, oclMat &bufM, int blockSize, bool updateMatrices);
1485
1486             oclMat frames_[2];
1487             oclMat pyrLevel_[2], M_, bufM_, R_[2], blurredFrame_[2];
1488             std::vector<oclMat> pyramid0_, pyramid1_;
1489         };
1490
1491         //////////////// build warping maps ////////////////////
1492         //! builds plane warping maps
1493         CV_EXPORTS void buildWarpPlaneMaps(Size src_size, Rect dst_roi, const Mat &K, const Mat &R, const Mat &T, float scale, oclMat &map_x, oclMat &map_y);
1494         //! builds cylindrical warping maps
1495         CV_EXPORTS void buildWarpCylindricalMaps(Size src_size, Rect dst_roi, const Mat &K, const Mat &R, float scale, oclMat &map_x, oclMat &map_y);
1496         //! builds spherical warping maps
1497         CV_EXPORTS void buildWarpSphericalMaps(Size src_size, Rect dst_roi, const Mat &K, const Mat &R, float scale, oclMat &map_x, oclMat &map_y);
1498         //! builds Affine warping maps
1499         CV_EXPORTS void buildWarpAffineMaps(const Mat &M, bool inverse, Size dsize, oclMat &xmap, oclMat &ymap);
1500
1501         //! builds Perspective warping maps
1502         CV_EXPORTS void buildWarpPerspectiveMaps(const Mat &M, bool inverse, Size dsize, oclMat &xmap, oclMat &ymap);
1503
1504         ///////////////////////////////////// interpolate frames //////////////////////////////////////////////
1505         //! Interpolate frames (images) using provided optical flow (displacement field).
1506         //! frame0   - frame 0 (32-bit floating point images, single channel)
1507         //! frame1   - frame 1 (the same type and size)
1508         //! fu       - forward horizontal displacement
1509         //! fv       - forward vertical displacement
1510         //! bu       - backward horizontal displacement
1511         //! bv       - backward vertical displacement
1512         //! pos      - new frame position
1513         //! newFrame - new frame
1514         //! buf      - temporary buffer, will have width x 6*height size, CV_32FC1 type and contain 6 oclMat;
1515         //!            occlusion masks            0, occlusion masks            1,
1516         //!            interpolated forward flow  0, interpolated forward flow  1,
1517         //!            interpolated backward flow 0, interpolated backward flow 1
1518         //!
1519         CV_EXPORTS void interpolateFrames(const oclMat &frame0, const oclMat &frame1,
1520                                           const oclMat &fu, const oclMat &fv,
1521                                           const oclMat &bu, const oclMat &bv,
1522                                           float pos, oclMat &newFrame, oclMat &buf);
1523
1524         //! computes moments of the rasterized shape or a vector of points
1525         CV_EXPORTS Moments ocl_moments(InputArray _array, bool binaryImage);
1526
1527         class CV_EXPORTS StereoBM_OCL
1528         {
1529         public:
1530             enum { BASIC_PRESET = 0, PREFILTER_XSOBEL = 1 };
1531
1532             enum { DEFAULT_NDISP = 64, DEFAULT_WINSZ = 19 };
1533
1534             //! the default constructor
1535             StereoBM_OCL();
1536             //! the full constructor taking the camera-specific preset, number of disparities and the SAD window size. ndisparities must be multiple of 8.
1537             StereoBM_OCL(int preset, int ndisparities = DEFAULT_NDISP, int winSize = DEFAULT_WINSZ);
1538
1539             //! the stereo correspondence operator. Finds the disparity for the specified rectified stereo pair
1540             //! Output disparity has CV_8U type.
1541             void operator() ( const oclMat &left, const oclMat &right, oclMat &disparity);
1542
1543             //! Some heuristics that tries to estmate
1544             // if current GPU will be faster then CPU in this algorithm.
1545             // It queries current active device.
1546             static bool checkIfGpuCallReasonable();
1547
1548             int preset;
1549             int ndisp;
1550             int winSize;
1551
1552             // If avergeTexThreshold  == 0 => post procesing is disabled
1553             // If avergeTexThreshold != 0 then disparity is set 0 in each point (x,y) where for left image
1554             // SumOfHorizontalGradiensInWindow(x, y, winSize) < (winSize * winSize) * avergeTexThreshold
1555             // i.e. input left image is low textured.
1556             float avergeTexThreshold;
1557         private:
1558             oclMat minSSD, leBuf, riBuf;
1559         };
1560
1561         class CV_EXPORTS StereoBeliefPropagation
1562         {
1563         public:
1564             enum { DEFAULT_NDISP  = 64 };
1565             enum { DEFAULT_ITERS  = 5  };
1566             enum { DEFAULT_LEVELS = 5  };
1567             static void estimateRecommendedParams(int width, int height, int &ndisp, int &iters, int &levels);
1568             explicit StereoBeliefPropagation(int ndisp  = DEFAULT_NDISP,
1569                                              int iters  = DEFAULT_ITERS,
1570                                              int levels = DEFAULT_LEVELS,
1571                                              int msg_type = CV_16S);
1572             StereoBeliefPropagation(int ndisp, int iters, int levels,
1573                                     float max_data_term, float data_weight,
1574                                     float max_disc_term, float disc_single_jump,
1575                                     int msg_type = CV_32F);
1576             void operator()(const oclMat &left, const oclMat &right, oclMat &disparity);
1577             void operator()(const oclMat &data, oclMat &disparity);
1578             int ndisp;
1579             int iters;
1580             int levels;
1581             float max_data_term;
1582             float data_weight;
1583             float max_disc_term;
1584             float disc_single_jump;
1585             int msg_type;
1586         private:
1587             oclMat u, d, l, r, u2, d2, l2, r2;
1588             std::vector<oclMat> datas;
1589             oclMat out;
1590         };
1591
1592         class CV_EXPORTS StereoConstantSpaceBP
1593         {
1594         public:
1595             enum { DEFAULT_NDISP    = 128 };
1596             enum { DEFAULT_ITERS    = 8   };
1597             enum { DEFAULT_LEVELS   = 4   };
1598             enum { DEFAULT_NR_PLANE = 4   };
1599             static void estimateRecommendedParams(int width, int height, int &ndisp, int &iters, int &levels, int &nr_plane);
1600             explicit StereoConstantSpaceBP(
1601                 int ndisp    = DEFAULT_NDISP,
1602                 int iters    = DEFAULT_ITERS,
1603                 int levels   = DEFAULT_LEVELS,
1604                 int nr_plane = DEFAULT_NR_PLANE,
1605                 int msg_type = CV_32F);
1606             StereoConstantSpaceBP(int ndisp, int iters, int levels, int nr_plane,
1607                 float max_data_term, float data_weight, float max_disc_term, float disc_single_jump,
1608                 int min_disp_th = 0,
1609                 int msg_type = CV_32F);
1610             void operator()(const oclMat &left, const oclMat &right, oclMat &disparity);
1611             int ndisp;
1612             int iters;
1613             int levels;
1614             int nr_plane;
1615             float max_data_term;
1616             float data_weight;
1617             float max_disc_term;
1618             float disc_single_jump;
1619             int min_disp_th;
1620             int msg_type;
1621             bool use_local_init_data_cost;
1622         private:
1623             oclMat u[2], d[2], l[2], r[2];
1624             oclMat disp_selected_pyr[2];
1625             oclMat data_cost;
1626             oclMat data_cost_selected;
1627             oclMat temp;
1628             oclMat out;
1629         };
1630
1631         // Implementation of the Zach, Pock and Bischof Dual TV-L1 Optical Flow method
1632         //
1633         // see reference:
1634         //   [1] C. Zach, T. Pock and H. Bischof, "A Duality Based Approach for Realtime TV-L1 Optical Flow".
1635         //   [2] Javier Sanchez, Enric Meinhardt-Llopis and Gabriele Facciolo. "TV-L1 Optical Flow Estimation".
1636         class CV_EXPORTS OpticalFlowDual_TVL1_OCL
1637         {
1638         public:
1639             OpticalFlowDual_TVL1_OCL();
1640
1641             void operator ()(const oclMat& I0, const oclMat& I1, oclMat& flowx, oclMat& flowy);
1642
1643             void collectGarbage();
1644
1645             /**
1646             * Time step of the numerical scheme.
1647             */
1648             double tau;
1649
1650             /**
1651             * Weight parameter for the data term, attachment parameter.
1652             * This is the most relevant parameter, which determines the smoothness of the output.
1653             * The smaller this parameter is, the smoother the solutions we obtain.
1654             * It depends on the range of motions of the images, so its value should be adapted to each image sequence.
1655             */
1656             double lambda;
1657
1658             /**
1659             * Weight parameter for (u - v)^2, tightness parameter.
1660             * It serves as a link between the attachment and the regularization terms.
1661             * In theory, it should have a small value in order to maintain both parts in correspondence.
1662             * The method is stable for a large range of values of this parameter.
1663             */
1664             double theta;
1665
1666             /**
1667             * Number of scales used to create the pyramid of images.
1668             */
1669             int nscales;
1670
1671             /**
1672             * Number of warpings per scale.
1673             * Represents the number of times that I1(x+u0) and grad( I1(x+u0) ) are computed per scale.
1674             * This is a parameter that assures the stability of the method.
1675             * It also affects the running time, so it is a compromise between speed and accuracy.
1676             */
1677             int warps;
1678
1679             /**
1680             * Stopping criterion threshold used in the numerical scheme, which is a trade-off between precision and running time.
1681             * A small value will yield more accurate solutions at the expense of a slower convergence.
1682             */
1683             double epsilon;
1684
1685             /**
1686             * Stopping criterion iterations number used in the numerical scheme.
1687             */
1688             int iterations;
1689
1690             bool useInitialFlow;
1691
1692         private:
1693             void procOneScale(const oclMat& I0, const oclMat& I1, oclMat& u1, oclMat& u2);
1694
1695             std::vector<oclMat> I0s;
1696             std::vector<oclMat> I1s;
1697             std::vector<oclMat> u1s;
1698             std::vector<oclMat> u2s;
1699
1700             oclMat I1x_buf;
1701             oclMat I1y_buf;
1702
1703             oclMat I1w_buf;
1704             oclMat I1wx_buf;
1705             oclMat I1wy_buf;
1706
1707             oclMat grad_buf;
1708             oclMat rho_c_buf;
1709
1710             oclMat p11_buf;
1711             oclMat p12_buf;
1712             oclMat p21_buf;
1713             oclMat p22_buf;
1714
1715             oclMat diff_buf;
1716             oclMat norm_buf;
1717         };
1718         // current supported sorting methods
1719         enum
1720         {
1721             SORT_BITONIC,   // only support power-of-2 buffer size
1722             SORT_SELECTION, // cannot sort duplicate keys
1723             SORT_MERGE,
1724             SORT_RADIX      // only support signed int/float keys(CV_32S/CV_32F)
1725         };
1726         //! Returns the sorted result of all the elements in input based on equivalent keys.
1727         //
1728         //  The element unit in the values to be sorted is determined from the data type,
1729         //  i.e., a CV_32FC2 input {a1a2, b1b2} will be considered as two elements, regardless its
1730         //  matrix dimension.
1731         //  both keys and values will be sorted inplace
1732         //  Key needs to be single channel oclMat.
1733         //
1734         //  Example:
1735         //  input -
1736         //    keys   = {2,    3,   1}   (CV_8UC1)
1737         //    values = {10,5, 4,3, 6,2} (CV_8UC2)
1738         //  sortByKey(keys, values, SORT_SELECTION, false);
1739         //  output -
1740         //    keys   = {1,    2,   3}   (CV_8UC1)
1741         //    values = {6,2, 10,5, 4,3} (CV_8UC2)
1742         void CV_EXPORTS sortByKey(oclMat& keys, oclMat& values, int method, bool isGreaterThan = false);
1743         /*!Base class for MOG and MOG2!*/
1744         class CV_EXPORTS BackgroundSubtractor
1745         {
1746         public:
1747             //! the virtual destructor
1748             virtual ~BackgroundSubtractor();
1749             //! the update operator that takes the next video frame and returns the current foreground mask as 8-bit binary image.
1750             virtual void operator()(const oclMat& image, oclMat& fgmask, float learningRate);
1751
1752             //! computes a background image
1753             virtual void getBackgroundImage(oclMat& backgroundImage) const = 0;
1754         };
1755                 /*!
1756         Gaussian Mixture-based Backbround/Foreground Segmentation Algorithm
1757
1758         The class implements the following algorithm:
1759         "An improved adaptive background mixture model for real-time tracking with shadow detection"
1760         P. KadewTraKuPong and R. Bowden,
1761         Proc. 2nd European Workshp on Advanced Video-Based Surveillance Systems, 2001."
1762         http://personal.ee.surrey.ac.uk/Personal/R.Bowden/publications/avbs01/avbs01.pdf
1763         */
1764         class CV_EXPORTS MOG: public cv::ocl::BackgroundSubtractor
1765         {
1766         public:
1767             //! the default constructor
1768             MOG(int nmixtures = -1);
1769
1770             //! re-initiaization method
1771             void initialize(Size frameSize, int frameType);
1772
1773             //! the update operator
1774             void operator()(const oclMat& frame, oclMat& fgmask, float learningRate = 0.f);
1775
1776             //! computes a background image which are the mean of all background gaussians
1777             void getBackgroundImage(oclMat& backgroundImage) const;
1778
1779             //! releases all inner buffers
1780             void release();
1781
1782             int history;
1783             float varThreshold;
1784             float backgroundRatio;
1785             float noiseSigma;
1786
1787         private:
1788             int nmixtures_;
1789
1790             Size frameSize_;
1791             int frameType_;
1792             int nframes_;
1793
1794             oclMat weight_;
1795             oclMat sortKey_;
1796             oclMat mean_;
1797             oclMat var_;
1798         };
1799
1800         /*!
1801         The class implements the following algorithm:
1802         "Improved adaptive Gausian mixture model for background subtraction"
1803         Z.Zivkovic
1804         International Conference Pattern Recognition, UK, August, 2004.
1805         http://www.zoranz.net/Publications/zivkovic2004ICPR.pdf
1806         */
1807         class CV_EXPORTS MOG2: public cv::ocl::BackgroundSubtractor
1808         {
1809         public:
1810             //! the default constructor
1811             MOG2(int nmixtures = -1);
1812
1813             //! re-initiaization method
1814             void initialize(Size frameSize, int frameType);
1815
1816             //! the update operator
1817             void operator()(const oclMat& frame, oclMat& fgmask, float learningRate = -1.0f);
1818
1819             //! computes a background image which are the mean of all background gaussians
1820             void getBackgroundImage(oclMat& backgroundImage) const;
1821
1822             //! releases all inner buffers
1823             void release();
1824
1825             // parameters
1826             // you should call initialize after parameters changes
1827
1828             int history;
1829
1830             //! here it is the maximum allowed number of mixture components.
1831             //! Actual number is determined dynamically per pixel
1832             float varThreshold;
1833             // threshold on the squared Mahalanobis distance to decide if it is well described
1834             // by the background model or not. Related to Cthr from the paper.
1835             // This does not influence the update of the background. A typical value could be 4 sigma
1836             // and that is varThreshold=4*4=16; Corresponds to Tb in the paper.
1837
1838             /////////////////////////
1839             // less important parameters - things you might change but be carefull
1840             ////////////////////////
1841
1842             float backgroundRatio;
1843             // corresponds to fTB=1-cf from the paper
1844             // TB - threshold when the component becomes significant enough to be included into
1845             // the background model. It is the TB=1-cf from the paper. So I use cf=0.1 => TB=0.
1846             // For alpha=0.001 it means that the mode should exist for approximately 105 frames before
1847             // it is considered foreground
1848             // float noiseSigma;
1849             float varThresholdGen;
1850
1851             //correspondts to Tg - threshold on the squared Mahalan. dist. to decide
1852             //when a sample is close to the existing components. If it is not close
1853             //to any a new component will be generated. I use 3 sigma => Tg=3*3=9.
1854             //Smaller Tg leads to more generated components and higher Tg might make
1855             //lead to small number of components but they can grow too large
1856             float fVarInit;
1857             float fVarMin;
1858             float fVarMax;
1859
1860             //initial variance  for the newly generated components.
1861             //It will will influence the speed of adaptation. A good guess should be made.
1862             //A simple way is to estimate the typical standard deviation from the images.
1863             //I used here 10 as a reasonable value
1864             // min and max can be used to further control the variance
1865             float fCT; //CT - complexity reduction prior
1866             //this is related to the number of samples needed to accept that a component
1867             //actually exists. We use CT=0.05 of all the samples. By setting CT=0 you get
1868             //the standard Stauffer&Grimson algorithm (maybe not exact but very similar)
1869
1870             //shadow detection parameters
1871             bool bShadowDetection; //default 1 - do shadow detection
1872             unsigned char nShadowDetection; //do shadow detection - insert this value as the detection result - 127 default value
1873             float fTau;
1874             // Tau - shadow threshold. The shadow is detected if the pixel is darker
1875             //version of the background. Tau is a threshold on how much darker the shadow can be.
1876             //Tau= 0.5 means that if pixel is more than 2 times darker then it is not shadow
1877             //See: Prati,Mikic,Trivedi,Cucchiarra,"Detecting Moving Shadows...",IEEE PAMI,2003.
1878
1879         private:
1880             int nmixtures_;
1881
1882             Size frameSize_;
1883             int frameType_;
1884             int nframes_;
1885
1886             oclMat weight_;
1887             oclMat variance_;
1888             oclMat mean_;
1889
1890             oclMat bgmodelUsedModes_; //keep track of number of modes per pixel
1891         };
1892
1893         /*!***************Kalman Filter*************!*/
1894         class CV_EXPORTS KalmanFilter
1895         {
1896         public:
1897             KalmanFilter();
1898             //! the full constructor taking the dimensionality of the state, of the measurement and of the control vector
1899             KalmanFilter(int dynamParams, int measureParams, int controlParams=0, int type=CV_32F);
1900             //! re-initializes Kalman filter. The previous content is destroyed.
1901             void init(int dynamParams, int measureParams, int controlParams=0, int type=CV_32F);
1902
1903             const oclMat& predict(const oclMat& control=oclMat());
1904             const oclMat& correct(const oclMat& measurement);
1905
1906             oclMat statePre;           //!< predicted state (x'(k)): x(k)=A*x(k-1)+B*u(k)
1907             oclMat statePost;          //!< corrected state (x(k)): x(k)=x'(k)+K(k)*(z(k)-H*x'(k))
1908             oclMat transitionMatrix;   //!< state transition matrix (A)
1909             oclMat controlMatrix;      //!< control matrix (B) (not used if there is no control)
1910             oclMat measurementMatrix;  //!< measurement matrix (H)
1911             oclMat processNoiseCov;    //!< process noise covariance matrix (Q)
1912             oclMat measurementNoiseCov;//!< measurement noise covariance matrix (R)
1913             oclMat errorCovPre;        //!< priori error estimate covariance matrix (P'(k)): P'(k)=A*P(k-1)*At + Q)*/
1914             oclMat gain;               //!< Kalman gain matrix (K(k)): K(k)=P'(k)*Ht*inv(H*P'(k)*Ht+R)
1915             oclMat errorCovPost;       //!< posteriori error estimate covariance matrix (P(k)): P(k)=(I-K(k)*H)*P'(k)
1916         private:
1917             oclMat temp1;
1918             oclMat temp2;
1919             oclMat temp3;
1920             oclMat temp4;
1921             oclMat temp5;
1922         };
1923
1924         /*!***************K Nearest Neighbour*************!*/
1925         class CV_EXPORTS KNearestNeighbour: public CvKNearest
1926         {
1927         public:
1928             KNearestNeighbour();
1929             ~KNearestNeighbour();
1930
1931             bool train(const Mat& trainData, Mat& labels, Mat& sampleIdx = Mat().setTo(Scalar::all(0)),
1932                 bool isRegression = false, int max_k = 32, bool updateBase = false);
1933
1934             void clear();
1935
1936             void find_nearest(const oclMat& samples, int k, oclMat& lables);
1937
1938         private:
1939             oclMat samples_ocl;
1940         };
1941         /*!***************  SVM  *************!*/
1942         class CV_EXPORTS CvSVM_OCL : public CvSVM
1943         {
1944         public:
1945             CvSVM_OCL();
1946
1947             CvSVM_OCL(const cv::Mat& trainData, const cv::Mat& responses,
1948                       const cv::Mat& varIdx=cv::Mat(), const cv::Mat& sampleIdx=cv::Mat(),
1949                       CvSVMParams params=CvSVMParams());
1950             CV_WRAP float predict( const int row_index, Mat& src, bool returnDFVal=false ) const;
1951             CV_WRAP void predict( cv::InputArray samples, cv::OutputArray results ) const;
1952             CV_WRAP float predict( const cv::Mat& sample, bool returnDFVal=false ) const;
1953             float predict( const CvMat* samples, CV_OUT CvMat* results ) const;
1954
1955         protected:
1956             float predict( const int row_index, int row_len, Mat& src, bool returnDFVal=false ) const;
1957             void create_kernel();
1958             void create_solver();
1959         };
1960         /*!***************  END  *************!*/
1961     }
1962 }
1963 #if defined _MSC_VER && _MSC_VER >= 1200
1964 #  pragma warning( push)
1965 #  pragma warning( disable: 4267)
1966 #endif
1967 #include "opencv2/ocl/matrix_operations.hpp"
1968 #if defined _MSC_VER && _MSC_VER >= 1200
1969 #  pragma warning( pop)
1970 #endif
1971
1972 #endif /* __OPENCV_OCL_HPP__ */