modules/ocl/include/opencv2/ocl/ocl.hpp

   1 /*M///////////////////////////////////////////////////////////////////////////////////////
   2 //
   3 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
   4 //
   5 //  By downloading, copying, installing or using the software you agree to this license.
   6 //  If you do not agree to this license, do not download, install,
   7 //  copy or use the software.
   8 //
   9 //
  10 //                           License Agreement
  11 //                For Open Source Computer Vision Library
  12 //
  13 // Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
  14 // Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
  15 // Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
  16 // Third party copyrights are property of their respective owners.
  17 //
  18 // Redistribution and use in source and binary forms, with or without modification,
  19 // are permitted provided that the following conditions are met:
  20 //
  21 //   * Redistribution's of source code must retain the above copyright notice,
  22 //     this list of conditions and the following disclaimer.
  23 //
  24 //   * Redistribution's in binary form must reproduce the above copyright notice,
  25 //     this list of conditions and the following disclaimer in the documentation
  26 //     and/or other oclMaterials provided with the distribution.
  27 //
  28 //   * The name of the copyright holders may not be used to endorse or promote products
  29 //     derived from this software without specific prior written permission.
  30 //
  31 // This software is provided by the copyright holders and contributors "as is" and
  32 // any express or implied warranties, including, but not limited to, the implied
  33 // warranties of merchantability and fitness for a particular purpose are disclaimed.
  34 // In no event shall the Intel Corporation or contributors be liable for any direct,
  35 // indirect, incidental, special, exemplary, or consequential damages
  36 // (including, but not limited to, procurement of substitute goods or services;
  37 // loss of use, data, or profits; or business interruption) however caused
  38 // and on any theory of liability, whether in contract, strict liability,
  39 // or tort (including negligence or otherwise) arising in any way out of
  40 // the use of this software, even if advised of the possibility of such damage.
  41 //
  42 //M*/
  43
  44 #ifndef __OPENCV_OCL_HPP__
  45 #define __OPENCV_OCL_HPP__
  46
  47 #include <memory>
  48 #include <vector>
  49
  50 #include "opencv2/core/core.hpp"
  51 #include "opencv2/imgproc/imgproc.hpp"
  52 #include "opencv2/objdetect/objdetect.hpp"
  53 #include "opencv2/features2d/features2d.hpp"
  54 #include "opencv2/ml/ml.hpp"
  55
  56 namespace cv
  57 {
  58     namespace ocl
  59     {
  60         enum DeviceType
  61         {
  62             CVCL_DEVICE_TYPE_DEFAULT     = (1 << 0),
  63             CVCL_DEVICE_TYPE_CPU         = (1 << 1),
  64             CVCL_DEVICE_TYPE_GPU         = (1 << 2),
  65             CVCL_DEVICE_TYPE_ACCELERATOR = (1 << 3),
  66             //CVCL_DEVICE_TYPE_CUSTOM      = (1 << 4)
  67             CVCL_DEVICE_TYPE_ALL         = 0xFFFFFFFF
  68         };
  69
  70         enum DevMemRW
  71         {
  72             DEVICE_MEM_R_W = 0,
  73             DEVICE_MEM_R_ONLY,
  74             DEVICE_MEM_W_ONLY
  75         };
  76
  77         enum DevMemType
  78         {
  79             DEVICE_MEM_DEFAULT = 0,
  80             DEVICE_MEM_AHP,         //alloc host pointer
  81             DEVICE_MEM_UHP,         //use host pointer
  82             DEVICE_MEM_CHP,         //copy host pointer
  83             DEVICE_MEM_PM           //persistent memory
  84         };
  85
  86         //Get the global device memory and read/write type
  87         //return 1 if unified memory system supported, otherwise return 0
  88         CV_EXPORTS int getDevMemType(DevMemRW& rw_type, DevMemType& mem_type);
  89
  90         //Set the global device memory and read/write type,
  91         //the newly generated oclMat will all use this type
  92         //return -1 if the target type is unsupported, otherwise return 0
  93         CV_EXPORTS int setDevMemType(DevMemRW rw_type = DEVICE_MEM_R_W, DevMemType mem_type = DEVICE_MEM_DEFAULT);
  94
  95         // these classes contain OpenCL runtime information
  96
  97         struct PlatformInfo;
  98
  99         struct DeviceInfo
 100         {
 101             int _id; // reserved, don't use it
 102
 103             DeviceType deviceType;
 104             std::string deviceProfile;
 105             std::string deviceVersion;
 106             std::string deviceName;
 107             std::string deviceVendor;
 108             int deviceVendorId;
 109             std::string deviceDriverVersion;
 110             std::string deviceExtensions;
 111
 112             size_t maxWorkGroupSize;
 113             std::vector<size_t> maxWorkItemSizes;
 114             int maxComputeUnits;
 115             size_t localMemorySize;
 116
 117             int deviceVersionMajor;
 118             int deviceVersionMinor;
 119
 120             bool haveDoubleSupport;
 121             bool isUnifiedMemory; // 1 means integrated GPU, otherwise this value is 0
 122
 123             std::string compilationExtraOptions;
 124
 125             const PlatformInfo* platform;
 126
 127             DeviceInfo();
 128         };
 129
 130         struct PlatformInfo
 131         {
 132             int _id; // reserved, don't use it
 133
 134             std::string platformProfile;
 135             std::string platformVersion;
 136             std::string platformName;
 137             std::string platformVendor;
 138             std::string platformExtensons;
 139
 140             int platformVersionMajor;
 141             int platformVersionMinor;
 142
 143             std::vector<const DeviceInfo*> devices;
 144
 145             PlatformInfo();
 146         };
 147
 148         //////////////////////////////// Initialization & Info ////////////////////////
 149         typedef std::vector<const PlatformInfo*> PlatformsInfo;
 150
 151         CV_EXPORTS int getOpenCLPlatforms(PlatformsInfo& platforms);
 152
 153         typedef std::vector<const DeviceInfo*> DevicesInfo;
 154
 155         CV_EXPORTS int getOpenCLDevices(DevicesInfo& devices, int deviceType = CVCL_DEVICE_TYPE_GPU,
 156                 const PlatformInfo* platform = NULL);
 157
 158         // set device you want to use
 159         CV_EXPORTS void setDevice(const DeviceInfo* info);
 160
 161         //////////////////////////////// Error handling ////////////////////////
 162         CV_EXPORTS void error(const char *error_string, const char *file, const int line, const char *func);
 163
 164         enum FEATURE_TYPE
 165         {
 166             FEATURE_CL_DOUBLE = 1,
 167             FEATURE_CL_UNIFIED_MEM,
 168             FEATURE_CL_VER_1_2
 169         };
 170
 171         // Represents OpenCL context, interface
 172         class CV_EXPORTS Context
 173         {
 174         protected:
 175             Context() { }
 176             ~Context() { }
 177         public:
 178             static Context* getContext();
 179
 180             bool supportsFeature(FEATURE_TYPE featureType) const;
 181             const DeviceInfo& getDeviceInfo() const;
 182
 183             const void* getOpenCLContextPtr() const;
 184             const void* getOpenCLCommandQueuePtr() const;
 185             const void* getOpenCLDeviceIDPtr() const;
 186         };
 187
 188         inline const void *getClContextPtr()
 189         {
 190             return Context::getContext()->getOpenCLContextPtr();
 191         }
 192
 193         inline const void *getClCommandQueuePtr()
 194         {
 195             return Context::getContext()->getOpenCLCommandQueuePtr();
 196         }
 197
 198         bool CV_EXPORTS supportsFeature(FEATURE_TYPE featureType);
 199
 200         void CV_EXPORTS finish();
 201
 202         //! Enable or disable OpenCL program binary caching onto local disk
 203         // After a program (*.cl files in opencl/ folder) is built at runtime, we allow the
 204         // compiled OpenCL program to be cached to the path automatically as "path/*.clb"
 205         // binary file, which will be reused when the OpenCV executable is started again.
 206         //
 207         // Caching mode is controlled by the following enums
 208         // Notes
 209         //   1. the feature is by default enabled when OpenCV is built in release mode.
 210         //   2. the CACHE_DEBUG / CACHE_RELEASE flags only effectively work with MSVC compiler;
 211         //      for GNU compilers, the function always treats the build as release mode (enabled by default).
 212         enum
 213         {
 214             CACHE_NONE    = 0,        // do not cache OpenCL binary
 215             CACHE_DEBUG   = 0x1 << 0, // cache OpenCL binary when built in debug mode (only work with MSVC)
 216             CACHE_RELEASE = 0x1 << 1, // default behavior, only cache when built in release mode (only work with MSVC)
 217             CACHE_ALL     = CACHE_DEBUG | CACHE_RELEASE, // always cache opencl binary
 218         };
 219         CV_EXPORTS void setBinaryDiskCache(int mode = CACHE_RELEASE, cv::String path = "./");
 220
 221         //! set where binary cache to be saved to
 222         CV_EXPORTS void setBinaryPath(const char *path);
 223
 224         class CV_EXPORTS oclMatExpr;
 225         //////////////////////////////// oclMat ////////////////////////////////
 226         class CV_EXPORTS oclMat
 227         {
 228         public:
 229             //! default constructor
 230             oclMat();
 231             //! constructs oclMatrix of the specified size and type (_type is CV_8UC1, CV_64FC3, CV_32SC(12) etc.)
 232             oclMat(int rows, int cols, int type);
 233             oclMat(Size size, int type);
 234             //! constucts oclMatrix and fills it with the specified value _s.
 235             oclMat(int rows, int cols, int type, const Scalar &s);
 236             oclMat(Size size, int type, const Scalar &s);
 237             //! copy constructor
 238             oclMat(const oclMat &m);
 239
 240             //! constructor for oclMatrix headers pointing to user-allocated data
 241             oclMat(int rows, int cols, int type, void *data, size_t step = Mat::AUTO_STEP);
 242             oclMat(Size size, int type, void *data, size_t step = Mat::AUTO_STEP);
 243
 244             //! creates a matrix header for a part of the bigger matrix
 245             oclMat(const oclMat &m, const Range &rowRange, const Range &colRange);
 246             oclMat(const oclMat &m, const Rect &roi);
 247
 248             //! builds oclMat from Mat. Perfom blocking upload to device.
 249             explicit oclMat (const Mat &m);
 250
 251             //! destructor - calls release()
 252             ~oclMat();
 253
 254             //! assignment operators
 255             oclMat &operator = (const oclMat &m);
 256             //! assignment operator. Perfom blocking upload to device.
 257             oclMat &operator = (const Mat &m);
 258             oclMat &operator = (const oclMatExpr& expr);
 259
 260             //! pefroms blocking upload data to oclMat.
 261             void upload(const cv::Mat &m);
 262
 263
 264             //! downloads data from device to host memory. Blocking calls.
 265             operator Mat() const;
 266             void download(cv::Mat &m) const;
 267
 268             //! convert to _InputArray
 269             operator _InputArray();
 270
 271             //! convert to _OutputArray
 272             operator _OutputArray();
 273
 274             //! returns a new oclMatrix header for the specified row
 275             oclMat row(int y) const;
 276             //! returns a new oclMatrix header for the specified column
 277             oclMat col(int x) const;
 278             //! ... for the specified row span
 279             oclMat rowRange(int startrow, int endrow) const;
 280             oclMat rowRange(const Range &r) const;
 281             //! ... for the specified column span
 282             oclMat colRange(int startcol, int endcol) const;
 283             oclMat colRange(const Range &r) const;
 284
 285             //! returns deep copy of the oclMatrix, i.e. the data is copied
 286             oclMat clone() const;
 287
 288             //! copies those oclMatrix elements to "m" that are marked with non-zero mask elements.
 289             // It calls m.create(this->size(), this->type()).
 290             // It supports any data type
 291             void copyTo( oclMat &m, const oclMat &mask = oclMat()) const;
 292
 293             //! converts oclMatrix to another datatype with optional scalng. See cvConvertScale.
 294             //It supports 8UC1 8UC4 32SC1 32SC4 32FC1 32FC4
 295             void convertTo( oclMat &m, int rtype, double alpha = 1, double beta = 0 ) const;
 296
 297             void assignTo( oclMat &m, int type = -1 ) const;
 298
 299             //! sets every oclMatrix element to s
 300             //It supports 8UC1 8UC4 32SC1 32SC4 32FC1 32FC4
 301             oclMat& operator = (const Scalar &s);
 302             //! sets some of the oclMatrix elements to s, according to the mask
 303             //It supports 8UC1 8UC4 32SC1 32SC4 32FC1 32FC4
 304             oclMat& setTo(const Scalar &s, const oclMat &mask = oclMat());
 305             //! creates alternative oclMatrix header for the same data, with different
 306             // number of channels and/or different number of rows. see cvReshape.
 307             oclMat reshape(int cn, int rows = 0) const;
 308
 309             //! allocates new oclMatrix data unless the oclMatrix already has specified size and type.
 310             // previous data is unreferenced if needed.
 311             void create(int rows, int cols, int type);
 312             void create(Size size, int type);
 313
 314             //! allocates new oclMatrix with specified device memory type.
 315             void createEx(int rows, int cols, int type, DevMemRW rw_type, DevMemType mem_type);
 316             void createEx(Size size, int type, DevMemRW rw_type, DevMemType mem_type);
 317
 318             //! decreases reference counter;
 319             // deallocate the data when reference counter reaches 0.
 320             void release();
 321
 322             //! swaps with other smart pointer
 323             void swap(oclMat &mat);
 324
 325             //! locates oclMatrix header within a parent oclMatrix. See below
 326             void locateROI( Size &wholeSize, Point &ofs ) const;
 327             //! moves/resizes the current oclMatrix ROI inside the parent oclMatrix.
 328             oclMat& adjustROI( int dtop, int dbottom, int dleft, int dright );
 329             //! extracts a rectangular sub-oclMatrix
 330             // (this is a generalized form of row, rowRange etc.)
 331             oclMat operator()( Range rowRange, Range colRange ) const;
 332             oclMat operator()( const Rect &roi ) const;
 333
 334             oclMat& operator+=( const oclMat& m );
 335             oclMat& operator-=( const oclMat& m );
 336             oclMat& operator*=( const oclMat& m );
 337             oclMat& operator/=( const oclMat& m );
 338
 339             //! returns true if the oclMatrix data is continuous
 340             // (i.e. when there are no gaps between successive rows).
 341             // similar to CV_IS_oclMat_CONT(cvoclMat->type)
 342             bool isContinuous() const;
 343             //! returns element size in bytes,
 344             // similar to CV_ELEM_SIZE(cvMat->type)
 345             size_t elemSize() const;
 346             //! returns the size of element channel in bytes.
 347             size_t elemSize1() const;
 348             //! returns element type, similar to CV_MAT_TYPE(cvMat->type)
 349             int type() const;
 350             //! returns element type, i.e. 8UC3 returns 8UC4 because in ocl
 351             //! 3 channels element actually use 4 channel space
 352             int ocltype() const;
 353             //! returns element type, similar to CV_MAT_DEPTH(cvMat->type)
 354             int depth() const;
 355             //! returns element type, similar to CV_MAT_CN(cvMat->type)
 356             int channels() const;
 357             //! returns element type, return 4 for 3 channels element,
 358             //!becuase 3 channels element actually use 4 channel space
 359             int oclchannels() const;
 360             //! returns step/elemSize1()
 361             size_t step1() const;
 362             //! returns oclMatrix size:
 363             // width == number of columns, height == number of rows
 364             Size size() const;
 365             //! returns true if oclMatrix data is NULL
 366             bool empty() const;
 367
 368             //! returns pointer to y-th row
 369             uchar* ptr(int y = 0);
 370             const uchar *ptr(int y = 0) const;
 371
 372             //! template version of the above method
 373             template<typename _Tp> _Tp *ptr(int y = 0);
 374             template<typename _Tp> const _Tp *ptr(int y = 0) const;
 375
 376             //! matrix transposition
 377             oclMat t() const;
 378
 379             /*! includes several bit-fields:
 380               - the magic signature
 381               - continuity flag
 382               - depth
 383               - number of channels
 384               */
 385             int flags;
 386             //! the number of rows and columns
 387             int rows, cols;
 388             //! a distance between successive rows in bytes; includes the gap if any
 389             size_t step;
 390             //! pointer to the data(OCL memory object)
 391             uchar *data;
 392
 393             //! pointer to the reference counter;
 394             // when oclMatrix points to user-allocated data, the pointer is NULL
 395             int *refcount;
 396
 397             //! helper fields used in locateROI and adjustROI
 398             //datastart and dataend are not used in current version
 399             uchar *datastart;
 400             uchar *dataend;
 401
 402             //! OpenCL context associated with the oclMat object.
 403             Context *clCxt; // TODO clCtx
 404             //add offset for handle ROI, calculated in byte
 405             int offset;
 406             //add wholerows and wholecols for the whole matrix, datastart and dataend are no longer used
 407             int wholerows;
 408             int wholecols;
 409         };
 410
 411         // convert InputArray/OutputArray to oclMat references
 412         CV_EXPORTS oclMat& getOclMatRef(InputArray src);
 413         CV_EXPORTS oclMat& getOclMatRef(OutputArray src);
 414
 415         ///////////////////// mat split and merge /////////////////////////////////
 416         //! Compose a multi-channel array from several single-channel arrays
 417         // Support all types
 418         CV_EXPORTS void merge(const oclMat *src, size_t n, oclMat &dst);
 419         CV_EXPORTS void merge(const vector<oclMat> &src, oclMat &dst);
 420
 421         //! Divides multi-channel array into several single-channel arrays
 422         // Support all types
 423         CV_EXPORTS void split(const oclMat &src, oclMat *dst);
 424         CV_EXPORTS void split(const oclMat &src, vector<oclMat> &dst);
 425
 426         ////////////////////////////// Arithmetics ///////////////////////////////////
 427
 428         //! adds one matrix to another with scale (dst = src1 * alpha + src2 * beta + gama)
 429         // supports all data types
 430         CV_EXPORTS void addWeighted(const oclMat &src1, double  alpha, const oclMat &src2, double beta, double gama, oclMat &dst);
 431
 432         //! adds one matrix to another (dst = src1 + src2)
 433         // supports all data types
 434         CV_EXPORTS void add(const oclMat &src1, const oclMat &src2, oclMat &dst, const oclMat &mask = oclMat());
 435         //! adds scalar to a matrix (dst = src1 + s)
 436         // supports all data types
 437         CV_EXPORTS void add(const oclMat &src1, const Scalar &s, oclMat &dst, const oclMat &mask = oclMat());
 438
 439         //! subtracts one matrix from another (dst = src1 - src2)
 440         // supports all data types
 441         CV_EXPORTS void subtract(const oclMat &src1, const oclMat &src2, oclMat &dst, const oclMat &mask = oclMat());
 442         //! subtracts scalar from a matrix (dst = src1 - s)
 443         // supports all data types
 444         CV_EXPORTS void subtract(const oclMat &src1, const Scalar &s, oclMat &dst, const oclMat &mask = oclMat());
 445
 446         //! computes element-wise product of the two arrays (dst = src1 * scale * src2)
 447         // supports all data types
 448         CV_EXPORTS void multiply(const oclMat &src1, const oclMat &src2, oclMat &dst, double scale = 1);
 449         //! multiplies matrix to a number (dst = scalar * src)
 450         // supports all data types
 451         CV_EXPORTS void multiply(double scalar, const oclMat &src, oclMat &dst);
 452
 453         //! computes element-wise quotient of the two arrays (dst = src1 * scale / src2)
 454         // supports all data types
 455         CV_EXPORTS void divide(const oclMat &src1, const oclMat &src2, oclMat &dst, double scale = 1);
 456         //! computes element-wise quotient of the two arrays (dst = scale / src)
 457         // supports all data types
 458         CV_EXPORTS void divide(double scale, const oclMat &src1, oclMat &dst);
 459
 460         //! compares elements of two arrays (dst = src1 <cmpop> src2)
 461         // supports all data types
 462         CV_EXPORTS void compare(const oclMat &src1, const oclMat &src2, oclMat &dst, int cmpop);
 463
 464         //! transposes the matrix
 465         // supports all data types
 466         CV_EXPORTS void transpose(const oclMat &src, oclMat &dst);
 467
 468         //! computes element-wise absolute difference of two arrays (dst = abs(src1 - src2))
 469         // supports all data types
 470         CV_EXPORTS void absdiff(const oclMat &src1, const oclMat &src2, oclMat &dst);
 471         //! computes element-wise absolute difference of array and scalar (dst = abs(src1 - s))
 472         // supports all data types
 473         CV_EXPORTS void absdiff(const oclMat &src1, const Scalar &s, oclMat &dst);
 474
 475         //! computes mean value and standard deviation of all or selected array elements
 476         // supports all data types
 477         CV_EXPORTS void meanStdDev(const oclMat &mtx, Scalar &mean, Scalar &stddev);
 478
 479         //! computes norm of array
 480         // supports NORM_INF, NORM_L1, NORM_L2
 481         // supports all data types
 482         CV_EXPORTS double norm(const oclMat &src1, int normType = NORM_L2);
 483
 484         //! computes norm of the difference between two arrays
 485         // supports NORM_INF, NORM_L1, NORM_L2
 486         // supports all data types
 487         CV_EXPORTS double norm(const oclMat &src1, const oclMat &src2, int normType = NORM_L2);
 488
 489         //! reverses the order of the rows, columns or both in a matrix
 490         // supports all types
 491         CV_EXPORTS void flip(const oclMat &src, oclMat &dst, int flipCode);
 492
 493         //! computes sum of array elements
 494         // support all types
 495         CV_EXPORTS Scalar sum(const oclMat &m);
 496         CV_EXPORTS Scalar absSum(const oclMat &m);
 497         CV_EXPORTS Scalar sqrSum(const oclMat &m);
 498
 499         //! finds global minimum and maximum array elements and returns their values
 500         // support all C1 types
 501         CV_EXPORTS void minMax(const oclMat &src, double *minVal, double *maxVal = 0, const oclMat &mask = oclMat());
 502
 503         //! finds global minimum and maximum array elements and returns their values with locations
 504         // support all C1 types
 505         CV_EXPORTS void minMaxLoc(const oclMat &src, double *minVal, double *maxVal = 0, Point *minLoc = 0, Point *maxLoc = 0,
 506                                   const oclMat &mask = oclMat());
 507
 508         //! counts non-zero array elements
 509         // support all types
 510         CV_EXPORTS int countNonZero(const oclMat &src);
 511
 512         //! transforms 8-bit unsigned integers using lookup table: dst(i)=lut(src(i))
 513         // destination array will have the depth type as lut and the same channels number as source
 514         //It supports 8UC1 8UC4 only
 515         CV_EXPORTS void LUT(const oclMat &src, const oclMat &lut, oclMat &dst);
 516
 517         //! only 8UC1 and 256 bins is supported now
 518         CV_EXPORTS void calcHist(const oclMat &mat_src, oclMat &mat_hist);
 519         //! only 8UC1 and 256 bins is supported now
 520         CV_EXPORTS void equalizeHist(const oclMat &mat_src, oclMat &mat_dst);
 521
 522         //! only 8UC1 is supported now
 523         CV_EXPORTS Ptr<cv::CLAHE> createCLAHE(double clipLimit = 40.0, Size tileGridSize = Size(8, 8));
 524
 525         //! bilateralFilter
 526         // supports 8UC1 8UC4
 527         CV_EXPORTS void bilateralFilter(const oclMat& src, oclMat& dst, int d, double sigmaColor, double sigmaSpace, int borderType=BORDER_DEFAULT);
 528
 529         //! Applies an adaptive bilateral filter to the input image
 530         //  This is not truly a bilateral filter. Instead of using user provided fixed parameters,
 531         //  the function calculates a constant at each window based on local standard deviation,
 532         //  and use this constant to do filtering.
 533         //  supports 8UC1, 8UC3
 534         CV_EXPORTS void adaptiveBilateralFilter(const oclMat& src, oclMat& dst, Size ksize, double sigmaSpace, Point anchor = Point(-1, -1), int borderType=BORDER_DEFAULT);
 535
 536         //! computes exponent of each matrix element (dst = e**src)
 537         // supports only CV_32FC1, CV_64FC1 type
 538         CV_EXPORTS void exp(const oclMat &src, oclMat &dst);
 539
 540         //! computes natural logarithm of absolute value of each matrix element: dst = log(abs(src))
 541         // supports only CV_32FC1, CV_64FC1 type
 542         CV_EXPORTS void log(const oclMat &src, oclMat &dst);
 543
 544         //! computes magnitude of each (x(i), y(i)) vector
 545         // supports only CV_32F, CV_64F type
 546         CV_EXPORTS void magnitude(const oclMat &x, const oclMat &y, oclMat &magnitude);
 547
 548         //! computes angle (angle(i)) of each (x(i), y(i)) vector
 549         // supports only CV_32F, CV_64F type
 550         CV_EXPORTS void phase(const oclMat &x, const oclMat &y, oclMat &angle, bool angleInDegrees = false);
 551
 552         //! the function raises every element of tne input array to p
 553         // support only CV_32F, CV_64F type
 554         CV_EXPORTS void pow(const oclMat &x, double p, oclMat &y);
 555
 556         //! converts Cartesian coordinates to polar
 557         // supports only CV_32F CV_64F type
 558         CV_EXPORTS void cartToPolar(const oclMat &x, const oclMat &y, oclMat &magnitude, oclMat &angle, bool angleInDegrees = false);
 559
 560         //! converts polar coordinates to Cartesian
 561         // supports only CV_32F CV_64F type
 562         CV_EXPORTS void polarToCart(const oclMat &magnitude, const oclMat &angle, oclMat &x, oclMat &y, bool angleInDegrees = false);
 563
 564         //! perfroms per-elements bit-wise inversion
 565         // supports all types
 566         CV_EXPORTS void bitwise_not(const oclMat &src, oclMat &dst);
 567
 568         //! calculates per-element bit-wise disjunction of two arrays
 569         // supports all types
 570         CV_EXPORTS void bitwise_or(const oclMat &src1, const oclMat &src2, oclMat &dst, const oclMat &mask = oclMat());
 571         CV_EXPORTS void bitwise_or(const oclMat &src1, const Scalar &s, oclMat &dst, const oclMat &mask = oclMat());
 572
 573         //! calculates per-element bit-wise conjunction of two arrays
 574         // supports all types
 575         CV_EXPORTS void bitwise_and(const oclMat &src1, const oclMat &src2, oclMat &dst, const oclMat &mask = oclMat());
 576         CV_EXPORTS void bitwise_and(const oclMat &src1, const Scalar &s, oclMat &dst, const oclMat &mask = oclMat());
 577
 578         //! calculates per-element bit-wise "exclusive or" operation
 579         // supports all types
 580         CV_EXPORTS void bitwise_xor(const oclMat &src1, const oclMat &src2, oclMat &dst, const oclMat &mask = oclMat());
 581         CV_EXPORTS void bitwise_xor(const oclMat &src1, const Scalar &s, oclMat &dst, const oclMat &mask = oclMat());
 582
 583         //! Logical operators
 584         CV_EXPORTS oclMat operator ~ (const oclMat &);
 585         CV_EXPORTS oclMat operator | (const oclMat &, const oclMat &);
 586         CV_EXPORTS oclMat operator & (const oclMat &, const oclMat &);
 587         CV_EXPORTS oclMat operator ^ (const oclMat &, const oclMat &);
 588
 589
 590         //! Mathematics operators
 591         CV_EXPORTS oclMatExpr operator + (const oclMat &src1, const oclMat &src2);
 592         CV_EXPORTS oclMatExpr operator - (const oclMat &src1, const oclMat &src2);
 593         CV_EXPORTS oclMatExpr operator * (const oclMat &src1, const oclMat &src2);
 594         CV_EXPORTS oclMatExpr operator / (const oclMat &src1, const oclMat &src2);
 595
 596         //! computes convolution of two images
 597         // support only CV_32FC1 type
 598         CV_EXPORTS void convolve(const oclMat &image, const oclMat &temp1, oclMat &result);
 599
 600         CV_EXPORTS void cvtColor(const oclMat &src, oclMat &dst, int code, int dcn = 0);
 601
 602         //! initializes a scaled identity matrix
 603         CV_EXPORTS void setIdentity(oclMat& src, const Scalar & val = Scalar(1));
 604
 605         //////////////////////////////// Filter Engine ////////////////////////////////
 606
 607         /*!
 608           The Base Class for 1D or Row-wise Filters
 609
 610           This is the base class for linear or non-linear filters that process 1D data.
 611           In particular, such filters are used for the "horizontal" filtering parts in separable filters.
 612           */
 613         class CV_EXPORTS BaseRowFilter_GPU
 614         {
 615         public:
 616             BaseRowFilter_GPU(int ksize_, int anchor_, int bordertype_) : ksize(ksize_), anchor(anchor_), bordertype(bordertype_) {}
 617             virtual ~BaseRowFilter_GPU() {}
 618             virtual void operator()(const oclMat &src, oclMat &dst) = 0;
 619             int ksize, anchor, bordertype;
 620         };
 621
 622         /*!
 623           The Base Class for Column-wise Filters
 624
 625           This is the base class for linear or non-linear filters that process columns of 2D arrays.
 626           Such filters are used for the "vertical" filtering parts in separable filters.
 627           */
 628         class CV_EXPORTS BaseColumnFilter_GPU
 629         {
 630         public:
 631             BaseColumnFilter_GPU(int ksize_, int anchor_, int bordertype_) : ksize(ksize_), anchor(anchor_), bordertype(bordertype_) {}
 632             virtual ~BaseColumnFilter_GPU() {}
 633             virtual void operator()(const oclMat &src, oclMat &dst) = 0;
 634             int ksize, anchor, bordertype;
 635         };
 636
 637         /*!
 638           The Base Class for Non-Separable 2D Filters.
 639
 640           This is the base class for linear or non-linear 2D filters.
 641           */
 642         class CV_EXPORTS BaseFilter_GPU
 643         {
 644         public:
 645             BaseFilter_GPU(const Size &ksize_, const Point &anchor_, const int &borderType_)
 646                 : ksize(ksize_), anchor(anchor_), borderType(borderType_) {}
 647             virtual ~BaseFilter_GPU() {}
 648             virtual void operator()(const oclMat &src, oclMat &dst) = 0;
 649             Size ksize;
 650             Point anchor;
 651             int borderType;
 652         };
 653
 654         /*!
 655           The Base Class for Filter Engine.
 656
 657           The class can be used to apply an arbitrary filtering operation to an image.
 658           It contains all the necessary intermediate buffers.
 659           */
 660         class CV_EXPORTS FilterEngine_GPU
 661         {
 662         public:
 663             virtual ~FilterEngine_GPU() {}
 664
 665             virtual void apply(const oclMat &src, oclMat &dst, Rect roi = Rect(0, 0, -1, -1)) = 0;
 666         };
 667
 668         //! returns the non-separable filter engine with the specified filter
 669         CV_EXPORTS Ptr<FilterEngine_GPU> createFilter2D_GPU(const Ptr<BaseFilter_GPU> filter2D);
 670
 671         //! returns the primitive row filter with the specified kernel
 672         CV_EXPORTS Ptr<BaseRowFilter_GPU> getLinearRowFilter_GPU(int srcType, int bufType, const Mat &rowKernel,
 673                 int anchor = -1, int bordertype = BORDER_DEFAULT);
 674
 675         //! returns the primitive column filter with the specified kernel
 676         CV_EXPORTS Ptr<BaseColumnFilter_GPU> getLinearColumnFilter_GPU(int bufType, int dstType, const Mat &columnKernel,
 677                 int anchor = -1, int bordertype = BORDER_DEFAULT, double delta = 0.0);
 678
 679         //! returns the separable linear filter engine
 680         CV_EXPORTS Ptr<FilterEngine_GPU> createSeparableLinearFilter_GPU(int srcType, int dstType, const Mat &rowKernel,
 681                 const Mat &columnKernel, const Point &anchor = Point(-1, -1), double delta = 0.0, int bordertype = BORDER_DEFAULT);
 682
 683         //! returns the separable filter engine with the specified filters
 684         CV_EXPORTS Ptr<FilterEngine_GPU> createSeparableFilter_GPU(const Ptr<BaseRowFilter_GPU> &rowFilter,
 685                 const Ptr<BaseColumnFilter_GPU> &columnFilter);
 686
 687         //! returns the Gaussian filter engine
 688         CV_EXPORTS Ptr<FilterEngine_GPU> createGaussianFilter_GPU(int type, Size ksize, double sigma1, double sigma2 = 0, int bordertype = BORDER_DEFAULT);
 689
 690         //! returns filter engine for the generalized Sobel operator
 691         CV_EXPORTS Ptr<FilterEngine_GPU> createDerivFilter_GPU( int srcType, int dstType, int dx, int dy, int ksize, int borderType = BORDER_DEFAULT );
 692
 693         //! applies Laplacian operator to the image
 694         // supports only ksize = 1 and ksize = 3 8UC1 8UC4 32FC1 32FC4 data type
 695         CV_EXPORTS void Laplacian(const oclMat &src, oclMat &dst, int ddepth, int ksize = 1, double scale = 1);
 696
 697         //! returns 2D box filter
 698         // supports CV_8UC1 and CV_8UC4 source type, dst type must be the same as source type
 699         CV_EXPORTS Ptr<BaseFilter_GPU> getBoxFilter_GPU(int srcType, int dstType,
 700                 const Size &ksize, Point anchor = Point(-1, -1), int borderType = BORDER_DEFAULT);
 701
 702         //! returns box filter engine
 703         CV_EXPORTS Ptr<FilterEngine_GPU> createBoxFilter_GPU(int srcType, int dstType, const Size &ksize,
 704                 const Point &anchor = Point(-1, -1), int borderType = BORDER_DEFAULT);
 705
 706         //! returns 2D filter with the specified kernel
 707         // supports CV_8UC1 and CV_8UC4 types
 708         CV_EXPORTS Ptr<BaseFilter_GPU> getLinearFilter_GPU(int srcType, int dstType, const Mat &kernel, const Size &ksize,
 709                 const Point &anchor = Point(-1, -1), int borderType = BORDER_DEFAULT);
 710
 711         //! returns the non-separable linear filter engine
 712         CV_EXPORTS Ptr<FilterEngine_GPU> createLinearFilter_GPU(int srcType, int dstType, const Mat &kernel,
 713                 const Point &anchor = Point(-1, -1), int borderType = BORDER_DEFAULT);
 714
 715         //! smooths the image using the normalized box filter
 716         // supports data type: CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4
 717         // supports border type: BORDER_CONSTANT, BORDER_REPLICATE, BORDER_REFLECT,BORDER_REFLECT_101,BORDER_WRAP
 718         CV_EXPORTS void boxFilter(const oclMat &src, oclMat &dst, int ddepth, Size ksize,
 719                                   Point anchor = Point(-1, -1), int borderType = BORDER_DEFAULT);
 720
 721         //! returns 2D morphological filter
 722         //! only MORPH_ERODE and MORPH_DILATE are supported
 723         // supports CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4 types
 724         // kernel must have CV_8UC1 type, one rows and cols == ksize.width * ksize.height
 725         CV_EXPORTS Ptr<BaseFilter_GPU> getMorphologyFilter_GPU(int op, int type, const Mat &kernel, const Size &ksize,
 726                 Point anchor = Point(-1, -1));
 727
 728         //! returns morphological filter engine. Only MORPH_ERODE and MORPH_DILATE are supported.
 729         CV_EXPORTS Ptr<FilterEngine_GPU> createMorphologyFilter_GPU(int op, int type, const Mat &kernel,
 730                 const Point &anchor = Point(-1, -1), int iterations = 1);
 731
 732         //! a synonym for normalized box filter
 733         // supports data type: CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4
 734         // supports border type: BORDER_CONSTANT, BORDER_REPLICATE, BORDER_REFLECT,BORDER_REFLECT_101
 735         static inline void blur(const oclMat &src, oclMat &dst, Size ksize, Point anchor = Point(-1, -1),
 736                                 int borderType = BORDER_CONSTANT)
 737         {
 738             boxFilter(src, dst, -1, ksize, anchor, borderType);
 739         }
 740
 741         //! applies non-separable 2D linear filter to the image
 742         //  Note, at the moment this function only works when anchor point is in the kernel center
 743         //  and kernel size supported is either 3x3 or 5x5; otherwise the function will fail to output valid result
 744         CV_EXPORTS void filter2D(const oclMat &src, oclMat &dst, int ddepth, const Mat &kernel,
 745                                  Point anchor = Point(-1, -1), int borderType = BORDER_DEFAULT);
 746
 747         //! applies separable 2D linear filter to the image
 748         CV_EXPORTS void sepFilter2D(const oclMat &src, oclMat &dst, int ddepth, const Mat &kernelX, const Mat &kernelY,
 749                                     Point anchor = Point(-1, -1), double delta = 0.0, int bordertype = BORDER_DEFAULT);
 750
 751         //! applies generalized Sobel operator to the image
 752         // dst.type must equalize src.type
 753         // supports data type: CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4
 754         // supports border type: BORDER_CONSTANT, BORDER_REPLICATE, BORDER_REFLECT,BORDER_REFLECT_101
 755         CV_EXPORTS void Sobel(const oclMat &src, oclMat &dst, int ddepth, int dx, int dy, int ksize = 3, double scale = 1, double delta = 0.0, int bordertype = BORDER_DEFAULT);
 756
 757         //! applies the vertical or horizontal Scharr operator to the image
 758         // dst.type must equalize src.type
 759         // supports data type: CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4
 760         // supports border type: BORDER_CONSTANT, BORDER_REPLICATE, BORDER_REFLECT,BORDER_REFLECT_101
 761         CV_EXPORTS void Scharr(const oclMat &src, oclMat &dst, int ddepth, int dx, int dy, double scale = 1, double delta = 0.0, int bordertype = BORDER_DEFAULT);
 762
 763         //! smooths the image using Gaussian filter.
 764         // dst.type must equalize src.type
 765         // supports data type: CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4
 766         // supports border type: BORDER_CONSTANT, BORDER_REPLICATE, BORDER_REFLECT,BORDER_REFLECT_101
 767         CV_EXPORTS void GaussianBlur(const oclMat &src, oclMat &dst, Size ksize, double sigma1, double sigma2 = 0, int bordertype = BORDER_DEFAULT);
 768
 769         //! erodes the image (applies the local minimum operator)
 770         // supports data type: CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4
 771         CV_EXPORTS void erode( const oclMat &src, oclMat &dst, const Mat &kernel, Point anchor = Point(-1, -1), int iterations = 1,
 772
 773                                int borderType = BORDER_CONSTANT, const Scalar &borderValue = morphologyDefaultBorderValue());
 774
 775
 776         //! dilates the image (applies the local maximum operator)
 777         // supports data type: CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4
 778         CV_EXPORTS void dilate( const oclMat &src, oclMat &dst, const Mat &kernel, Point anchor = Point(-1, -1), int iterations = 1,
 779
 780                                 int borderType = BORDER_CONSTANT, const Scalar &borderValue = morphologyDefaultBorderValue());
 781
 782
 783         //! applies an advanced morphological operation to the image
 784         CV_EXPORTS void morphologyEx( const oclMat &src, oclMat &dst, int op, const Mat &kernel, Point anchor = Point(-1, -1), int iterations = 1,
 785
 786                                       int borderType = BORDER_CONSTANT, const Scalar &borderValue = morphologyDefaultBorderValue());
 787
 788
 789         ////////////////////////////// Image processing //////////////////////////////
 790         //! Does mean shift filtering on GPU.
 791         CV_EXPORTS void meanShiftFiltering(const oclMat &src, oclMat &dst, int sp, int sr,
 792                                            TermCriteria criteria = TermCriteria(TermCriteria::MAX_ITER + TermCriteria::EPS, 5, 1));
 793
 794         //! Does mean shift procedure on GPU.
 795         CV_EXPORTS void meanShiftProc(const oclMat &src, oclMat &dstr, oclMat &dstsp, int sp, int sr,
 796                                       TermCriteria criteria = TermCriteria(TermCriteria::MAX_ITER + TermCriteria::EPS, 5, 1));
 797
 798         //! Does mean shift segmentation with elimiation of small regions.
 799         CV_EXPORTS void meanShiftSegmentation(const oclMat &src, Mat &dst, int sp, int sr, int minsize,
 800                                               TermCriteria criteria = TermCriteria(TermCriteria::MAX_ITER + TermCriteria::EPS, 5, 1));
 801
 802         //! applies fixed threshold to the image.
 803         // supports CV_8UC1 and CV_32FC1 data type
 804         // supports threshold type: THRESH_BINARY, THRESH_BINARY_INV, THRESH_TRUNC, THRESH_TOZERO, THRESH_TOZERO_INV
 805         CV_EXPORTS double threshold(const oclMat &src, oclMat &dst, double thresh, double maxVal, int type = THRESH_TRUNC);
 806
 807         //! resizes the image
 808         // Supports INTER_NEAREST, INTER_LINEAR
 809         // supports CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4 types
 810         CV_EXPORTS void resize(const oclMat &src, oclMat &dst, Size dsize, double fx = 0, double fy = 0, int interpolation = INTER_LINEAR);
 811
 812         //! Applies a generic geometrical transformation to an image.
 813
 814         // Supports INTER_NEAREST, INTER_LINEAR.
 815
 816         // Map1 supports CV_16SC2, CV_32FC2  types.
 817
 818         // Src supports CV_8UC1, CV_8UC2, CV_8UC4.
 819
 820         CV_EXPORTS void remap(const oclMat &src, oclMat &dst, oclMat &map1, oclMat &map2, int interpolation, int bordertype, const Scalar &value = Scalar());
 821
 822         //! copies 2D array to a larger destination array and pads borders with user-specifiable constant
 823         // supports CV_8UC1, CV_8UC4, CV_32SC1 types
 824         CV_EXPORTS void copyMakeBorder(const oclMat &src, oclMat &dst, int top, int bottom, int left, int right, int boardtype, const Scalar &value = Scalar());
 825
 826         //! Smoothes image using median filter
 827         // The source 1- or 4-channel image. When m is 3 or 5, the image depth should be CV 8U or CV 32F.
 828         CV_EXPORTS void medianFilter(const oclMat &src, oclMat &dst, int m);
 829
 830         //! warps the image using affine transformation
 831         // Supports INTER_NEAREST, INTER_LINEAR, INTER_CUBIC
 832         // supports CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4 types
 833         CV_EXPORTS void warpAffine(const oclMat &src, oclMat &dst, const Mat &M, Size dsize, int flags = INTER_LINEAR);
 834
 835         //! warps the image using perspective transformation
 836         // Supports INTER_NEAREST, INTER_LINEAR, INTER_CUBIC
 837         // supports CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4 types
 838         CV_EXPORTS void warpPerspective(const oclMat &src, oclMat &dst, const Mat &M, Size dsize, int flags = INTER_LINEAR);
 839
 840         //! computes the integral image and integral for the squared image
 841         // sum will have CV_32S type, sqsum - CV32F type
 842         // supports only CV_8UC1 source type
 843         CV_EXPORTS void integral(const oclMat &src, oclMat &sum, oclMat &sqsum);
 844         CV_EXPORTS void integral(const oclMat &src, oclMat &sum);
 845         CV_EXPORTS void cornerHarris(const oclMat &src, oclMat &dst, int blockSize, int ksize, double k, int bordertype = cv::BORDER_DEFAULT);
 846         CV_EXPORTS void cornerHarris_dxdy(const oclMat &src, oclMat &dst, oclMat &Dx, oclMat &Dy,
 847             int blockSize, int ksize, double k, int bordertype = cv::BORDER_DEFAULT);
 848         CV_EXPORTS void cornerMinEigenVal(const oclMat &src, oclMat &dst, int blockSize, int ksize, int bordertype = cv::BORDER_DEFAULT);
 849         CV_EXPORTS void cornerMinEigenVal_dxdy(const oclMat &src, oclMat &dst, oclMat &Dx, oclMat &Dy,
 850             int blockSize, int ksize, int bordertype = cv::BORDER_DEFAULT);
 851
 852         /////////////////////////////////// ML ///////////////////////////////////////////
 853
 854         //! Compute closest centers for each lines in source and lable it after center's index
 855         // supports CV_32FC1/CV_32FC2/CV_32FC4 data type
 856         CV_EXPORTS void distanceToCenters(oclMat &dists, oclMat &labels, const oclMat &src, const oclMat &centers);
 857
 858         //!Does k-means procedure on GPU
 859         // supports CV_32FC1/CV_32FC2/CV_32FC4 data type
 860         CV_EXPORTS double kmeans(const oclMat &src, int K, oclMat &bestLabels,
 861                                      TermCriteria criteria, int attemps, int flags, oclMat &centers);
 862
 863
 864         ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 865         ///////////////////////////////////////////CascadeClassifier//////////////////////////////////////////////////////////////////
 866         ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 867
 868         class CV_EXPORTS_W OclCascadeClassifier : public  cv::CascadeClassifier
 869         {
 870         public:
 871             OclCascadeClassifier() {};
 872             ~OclCascadeClassifier() {};
 873
 874             CvSeq* oclHaarDetectObjects(oclMat &gimg, CvMemStorage *storage, double scaleFactor,
 875                                         int minNeighbors, int flags, CvSize minSize = cvSize(0, 0), CvSize maxSize = cvSize(0, 0));
 876         };
 877
 878         class CV_EXPORTS OclCascadeClassifierBuf : public  cv::CascadeClassifier
 879         {
 880         public:
 881             OclCascadeClassifierBuf() :
 882                 m_flags(0), initialized(false), m_scaleFactor(0), buffers(NULL) {}
 883
 884             ~OclCascadeClassifierBuf() { release(); }
 885
 886             void detectMultiScale(oclMat &image, CV_OUT std::vector<cv::Rect>& faces,
 887                                   double scaleFactor = 1.1, int minNeighbors = 3, int flags = 0,
 888                                   Size minSize = Size(), Size maxSize = Size());
 889             void release();
 890
 891         private:
 892             void Init(const int rows, const int cols, double scaleFactor, int flags,
 893                       const int outputsz, const size_t localThreads[],
 894                       CvSize minSize, CvSize maxSize);
 895             void CreateBaseBufs(const int datasize, const int totalclassifier, const int flags, const int outputsz);
 896             void CreateFactorRelatedBufs(const int rows, const int cols, const int flags,
 897                                          const double scaleFactor, const size_t localThreads[],
 898                                          CvSize minSize, CvSize maxSize);
 899             void GenResult(CV_OUT std::vector<cv::Rect>& faces, const std::vector<cv::Rect> &rectList, const std::vector<int> &rweights);
 900
 901             int m_rows;
 902             int m_cols;
 903             int m_flags;
 904             int m_loopcount;
 905             int m_nodenum;
 906             bool findBiggestObject;
 907             bool initialized;
 908             double m_scaleFactor;
 909             Size m_minSize;
 910             Size m_maxSize;
 911             vector<CvSize> sizev;
 912             vector<float> scalev;
 913             oclMat gimg1, gsum, gsqsum;
 914             void * buffers;
 915         };
 916
 917
 918         /////////////////////////////// Pyramid /////////////////////////////////////
 919         CV_EXPORTS void pyrDown(const oclMat &src, oclMat &dst);
 920
 921         //! upsamples the source image and then smoothes it
 922         CV_EXPORTS void pyrUp(const oclMat &src, oclMat &dst);
 923
 924         //! performs linear blending of two images
 925         //! to avoid accuracy errors sum of weigths shouldn't be very close to zero
 926         // supports only CV_8UC1 source type
 927         CV_EXPORTS void blendLinear(const oclMat &img1, const oclMat &img2, const oclMat &weights1, const oclMat &weights2, oclMat &result);
 928
 929         //! computes vertical sum, supports only CV_32FC1 images
 930         CV_EXPORTS void columnSum(const oclMat &src, oclMat &sum);
 931
 932         ///////////////////////////////////////// match_template /////////////////////////////////////////////////////////////
 933         struct CV_EXPORTS MatchTemplateBuf
 934         {
 935             Size user_block_size;
 936             oclMat imagef, templf;
 937             std::vector<oclMat> images;
 938             std::vector<oclMat> image_sums;
 939             std::vector<oclMat> image_sqsums;
 940         };
 941
 942         //! computes the proximity map for the raster template and the image where the template is searched for
 943         // Supports TM_SQDIFF, TM_SQDIFF_NORMED, TM_CCORR, TM_CCORR_NORMED, TM_CCOEFF, TM_CCOEFF_NORMED for type 8UC1 and 8UC4
 944         // Supports TM_SQDIFF, TM_CCORR for type 32FC1 and 32FC4
 945         CV_EXPORTS void matchTemplate(const oclMat &image, const oclMat &templ, oclMat &result, int method);
 946
 947         //! computes the proximity map for the raster template and the image where the template is searched for
 948         // Supports TM_SQDIFF, TM_SQDIFF_NORMED, TM_CCORR, TM_CCORR_NORMED, TM_CCOEFF, TM_CCOEFF_NORMED for type 8UC1 and 8UC4
 949         // Supports TM_SQDIFF, TM_CCORR for type 32FC1 and 32FC4
 950         CV_EXPORTS void matchTemplate(const oclMat &image, const oclMat &templ, oclMat &result, int method, MatchTemplateBuf &buf);
 951
 952         ///////////////////////////////////////////// Canny /////////////////////////////////////////////
 953         struct CV_EXPORTS CannyBuf;
 954         //! compute edges of the input image using Canny operator
 955         // Support CV_8UC1 only
 956         CV_EXPORTS void Canny(const oclMat &image, oclMat &edges, double low_thresh, double high_thresh, int apperture_size = 3, bool L2gradient = false);
 957         CV_EXPORTS void Canny(const oclMat &image, CannyBuf &buf, oclMat &edges, double low_thresh, double high_thresh, int apperture_size = 3, bool L2gradient = false);
 958         CV_EXPORTS void Canny(const oclMat &dx, const oclMat &dy, oclMat &edges, double low_thresh, double high_thresh, bool L2gradient = false);
 959         CV_EXPORTS void Canny(const oclMat &dx, const oclMat &dy, CannyBuf &buf, oclMat &edges, double low_thresh, double high_thresh, bool L2gradient = false);
 960
 961         struct CV_EXPORTS CannyBuf
 962         {
 963             CannyBuf() : counter(NULL) {}
 964             ~CannyBuf()
 965             {
 966                 release();
 967             }
 968             explicit CannyBuf(const Size &image_size, int apperture_size = 3) : counter(NULL)
 969             {
 970                 create(image_size, apperture_size);
 971             }
 972             CannyBuf(const oclMat &dx_, const oclMat &dy_);
 973
 974             void create(const Size &image_size, int apperture_size = 3);
 975             void release();
 976             oclMat dx, dy;
 977             oclMat dx_buf, dy_buf;
 978             oclMat edgeBuf;
 979             oclMat trackBuf1, trackBuf2;
 980             void *counter;
 981             Ptr<FilterEngine_GPU> filterDX, filterDY;
 982         };
 983
 984         ///////////////////////////////////////// clAmdFft related /////////////////////////////////////////
 985         //! Performs a forward or inverse discrete Fourier transform (1D or 2D) of floating point matrix.
 986         //! Param dft_size is the size of DFT transform.
 987         //!
 988         //! For complex-to-real transform it is assumed that the source matrix is packed in CLFFT's format.
 989         // support src type of CV32FC1, CV32FC2
 990         // support flags: DFT_INVERSE, DFT_REAL_OUTPUT, DFT_COMPLEX_OUTPUT, DFT_ROWS
 991         // dft_size is the size of original input, which is used for transformation from complex to real.
 992         // dft_size must be powers of 2, 3 and 5
 993         // real to complex dft requires at least v1.8 clAmdFft
 994         // real to complex dft output is not the same with cpu version
 995         // real to complex and complex to real does not support DFT_ROWS
 996         CV_EXPORTS void dft(const oclMat &src, oclMat &dst, Size dft_size = Size(), int flags = 0);
 997
 998         //! implements generalized matrix product algorithm GEMM from BLAS
 999         // The functionality requires clAmdBlas library
1000         // only support type CV_32FC1
1001         // flag GEMM_3_T is not supported
1002         CV_EXPORTS void gemm(const oclMat &src1, const oclMat &src2, double alpha,
1003                              const oclMat &src3, double beta, oclMat &dst, int flags = 0);
1004
1005         //////////////// HOG (Histogram-of-Oriented-Gradients) Descriptor and Object Detector //////////////
1006         struct CV_EXPORTS HOGDescriptor
1007         {
1008             enum { DEFAULT_WIN_SIGMA = -1 };
1009             enum { DEFAULT_NLEVELS = 64 };
1010             enum { DESCR_FORMAT_ROW_BY_ROW, DESCR_FORMAT_COL_BY_COL };
1011             HOGDescriptor(Size win_size = Size(64, 128), Size block_size = Size(16, 16),
1012                           Size block_stride = Size(8, 8), Size cell_size = Size(8, 8),
1013                           int nbins = 9, double win_sigma = DEFAULT_WIN_SIGMA,
1014                           double threshold_L2hys = 0.2, bool gamma_correction = true,
1015                           int nlevels = DEFAULT_NLEVELS);
1016
1017             size_t getDescriptorSize() const;
1018             size_t getBlockHistogramSize() const;
1019             void setSVMDetector(const vector<float> &detector);
1020             static vector<float> getDefaultPeopleDetector();
1021             static vector<float> getPeopleDetector48x96();
1022             static vector<float> getPeopleDetector64x128();
1023             void detect(const oclMat &img, vector<Point> &found_locations,
1024                         double hit_threshold = 0, Size win_stride = Size(),
1025                         Size padding = Size());
1026             void detectMultiScale(const oclMat &img, vector<Rect> &found_locations,
1027                                   double hit_threshold = 0, Size win_stride = Size(),
1028                                   Size padding = Size(), double scale0 = 1.05,
1029                                   int group_threshold = 2);
1030             void getDescriptors(const oclMat &img, Size win_stride,
1031                                 oclMat &descriptors,
1032                                 int descr_format = DESCR_FORMAT_COL_BY_COL);
1033             Size win_size;
1034             Size block_size;
1035             Size block_stride;
1036             Size cell_size;
1037
1038             int nbins;
1039             double win_sigma;
1040             double threshold_L2hys;
1041             bool gamma_correction;
1042             int nlevels;
1043
1044         protected:
1045             // initialize buffers; only need to do once in case of multiscale detection
1046             void init_buffer(const oclMat &img, Size win_stride);
1047             void computeBlockHistograms(const oclMat &img);
1048             void computeGradient(const oclMat &img, oclMat &grad, oclMat &qangle);
1049             double getWinSigma() const;
1050             bool checkDetectorSize() const;
1051
1052             static int numPartsWithin(int size, int part_size, int stride);
1053             static Size numPartsWithin(Size size, Size part_size, Size stride);
1054
1055             // Coefficients of the separating plane
1056             float free_coef;
1057             oclMat detector;
1058             // Results of the last classification step
1059             oclMat labels;
1060             Mat labels_host;
1061             // Results of the last histogram evaluation step
1062             oclMat block_hists;
1063             // Gradients conputation results
1064             oclMat grad, qangle;
1065             // scaled image
1066             oclMat image_scale;
1067             // effect size of input image (might be different from original size after scaling)
1068             Size effect_size;
1069         };
1070
1071
1072         ////////////////////////feature2d_ocl/////////////////
1073         /****************************************************************************************\
1074         *                                      Distance                                          *
1075         \****************************************************************************************/
1076         template<typename T>
1077         struct CV_EXPORTS Accumulator
1078         {
1079             typedef T Type;
1080         };
1081         template<> struct Accumulator<unsigned char>
1082         {
1083             typedef float Type;
1084         };
1085         template<> struct Accumulator<unsigned short>
1086         {
1087             typedef float Type;
1088         };
1089         template<> struct Accumulator<char>
1090         {
1091             typedef float Type;
1092         };
1093         template<> struct Accumulator<short>
1094         {
1095             typedef float Type;
1096         };
1097
1098         /*
1099          * Manhattan distance (city block distance) functor
1100          */
1101         template<class T>
1102         struct CV_EXPORTS L1
1103         {
1104             enum { normType = NORM_L1 };
1105             typedef T ValueType;
1106             typedef typename Accumulator<T>::Type ResultType;
1107
1108             ResultType operator()( const T *a, const T *b, int size ) const
1109             {
1110                 return normL1<ValueType, ResultType>(a, b, size);
1111             }
1112         };
1113
1114         /*
1115          * Euclidean distance functor
1116          */
1117         template<class T>
1118         struct CV_EXPORTS L2
1119         {
1120             enum { normType = NORM_L2 };
1121             typedef T ValueType;
1122             typedef typename Accumulator<T>::Type ResultType;
1123
1124             ResultType operator()( const T *a, const T *b, int size ) const
1125             {
1126                 return (ResultType)sqrt((double)normL2Sqr<ValueType, ResultType>(a, b, size));
1127             }
1128         };
1129
1130         /*
1131          * Hamming distance functor - counts the bit differences between two strings - useful for the Brief descriptor
1132          * bit count of A exclusive XOR'ed with B
1133          */
1134         struct CV_EXPORTS Hamming
1135         {
1136             enum { normType = NORM_HAMMING };
1137             typedef unsigned char ValueType;
1138             typedef int ResultType;
1139
1140             /** this will count the bits in a ^ b
1141              */
1142             ResultType operator()( const unsigned char *a, const unsigned char *b, int size ) const
1143             {
1144                 return normHamming(a, b, size);
1145             }
1146         };
1147
1148         ////////////////////////////////// BruteForceMatcher //////////////////////////////////
1149
1150         class CV_EXPORTS BruteForceMatcher_OCL_base
1151         {
1152         public:
1153             enum DistType {L1Dist = 0, L2Dist, HammingDist};
1154             explicit BruteForceMatcher_OCL_base(DistType distType = L2Dist);
1155             // Add descriptors to train descriptor collection
1156             void add(const std::vector<oclMat> &descCollection);
1157             // Get train descriptors collection
1158             const std::vector<oclMat> &getTrainDescriptors() const;
1159             // Clear train descriptors collection
1160             void clear();
1161             // Return true if there are not train descriptors in collection
1162             bool empty() const;
1163
1164             // Return true if the matcher supports mask in match methods
1165             bool isMaskSupported() const;
1166
1167             // Find one best match for each query descriptor
1168             void matchSingle(const oclMat &query, const oclMat &train,
1169                              oclMat &trainIdx, oclMat &distance,
1170                              const oclMat &mask = oclMat());
1171
1172             // Download trainIdx and distance and convert it to CPU vector with DMatch
1173             static void matchDownload(const oclMat &trainIdx, const oclMat &distance, std::vector<DMatch> &matches);
1174             // Convert trainIdx and distance to vector with DMatch
1175             static void matchConvert(const Mat &trainIdx, const Mat &distance, std::vector<DMatch> &matches);
1176
1177             // Find one best match for each query descriptor
1178             void match(const oclMat &query, const oclMat &train, std::vector<DMatch> &matches, const oclMat &mask = oclMat());
1179
1180             // Make gpu collection of trains and masks in suitable format for matchCollection function
1181             void makeGpuCollection(oclMat &trainCollection, oclMat &maskCollection, const std::vector<oclMat> &masks = std::vector<oclMat>());
1182
1183
1184             // Find one best match from train collection for each query descriptor
1185             void matchCollection(const oclMat &query, const oclMat &trainCollection,
1186                                  oclMat &trainIdx, oclMat &imgIdx, oclMat &distance,
1187                                  const oclMat &masks = oclMat());
1188
1189             // Download trainIdx, imgIdx and distance and convert it to vector with DMatch
1190             static void matchDownload(const oclMat &trainIdx, const oclMat &imgIdx, const oclMat &distance, std::vector<DMatch> &matches);
1191             // Convert trainIdx, imgIdx and distance to vector with DMatch
1192             static void matchConvert(const Mat &trainIdx, const Mat &imgIdx, const Mat &distance, std::vector<DMatch> &matches);
1193
1194             // Find one best match from train collection for each query descriptor.
1195             void match(const oclMat &query, std::vector<DMatch> &matches, const std::vector<oclMat> &masks = std::vector<oclMat>());
1196
1197             // Find k best matches for each query descriptor (in increasing order of distances)
1198             void knnMatchSingle(const oclMat &query, const oclMat &train,
1199                                 oclMat &trainIdx, oclMat &distance, oclMat &allDist, int k,
1200                                 const oclMat &mask = oclMat());
1201
1202             // Download trainIdx and distance and convert it to vector with DMatch
1203             // compactResult is used when mask is not empty. If compactResult is false matches
1204             // vector will have the same size as queryDescriptors rows. If compactResult is true
1205             // matches vector will not contain matches for fully masked out query descriptors.
1206             static void knnMatchDownload(const oclMat &trainIdx, const oclMat &distance,
1207                                          std::vector< std::vector<DMatch> > &matches, bool compactResult = false);
1208
1209             // Convert trainIdx and distance to vector with DMatch
1210             static void knnMatchConvert(const Mat &trainIdx, const Mat &distance,
1211                                         std::vector< std::vector<DMatch> > &matches, bool compactResult = false);
1212
1213             // Find k best matches for each query descriptor (in increasing order of distances).
1214             // compactResult is used when mask is not empty. If compactResult is false matches
1215             // vector will have the same size as queryDescriptors rows. If compactResult is true
1216             // matches vector will not contain matches for fully masked out query descriptors.
1217             void knnMatch(const oclMat &query, const oclMat &train,
1218                           std::vector< std::vector<DMatch> > &matches, int k, const oclMat &mask = oclMat(),
1219                           bool compactResult = false);
1220
1221             // Find k best matches from train collection for each query descriptor (in increasing order of distances)
1222             void knnMatch2Collection(const oclMat &query, const oclMat &trainCollection,
1223                                      oclMat &trainIdx, oclMat &imgIdx, oclMat &distance,
1224                                      const oclMat &maskCollection = oclMat());
1225
1226             // Download trainIdx and distance and convert it to vector with DMatch
1227             // compactResult is used when mask is not empty. If compactResult is false matches
1228             // vector will have the same size as queryDescriptors rows. If compactResult is true
1229             // matches vector will not contain matches for fully masked out query descriptors.
1230             static void knnMatch2Download(const oclMat &trainIdx, const oclMat &imgIdx, const oclMat &distance,
1231                                           std::vector< std::vector<DMatch> > &matches, bool compactResult = false);
1232
1233             // Convert trainIdx and distance to vector with DMatch
1234             static void knnMatch2Convert(const Mat &trainIdx, const Mat &imgIdx, const Mat &distance,
1235                                          std::vector< std::vector<DMatch> > &matches, bool compactResult = false);
1236
1237             // Find k best matches  for each query descriptor (in increasing order of distances).
1238             // compactResult is used when mask is not empty. If compactResult is false matches
1239             // vector will have the same size as queryDescriptors rows. If compactResult is true
1240             // matches vector will not contain matches for fully masked out query descriptors.
1241             void knnMatch(const oclMat &query, std::vector< std::vector<DMatch> > &matches, int k,
1242                           const std::vector<oclMat> &masks = std::vector<oclMat>(), bool compactResult = false);
1243
1244             // Find best matches for each query descriptor which have distance less than maxDistance.
1245             // nMatches.at<int>(0, queryIdx) will contain matches count for queryIdx.
1246             // carefully nMatches can be greater than trainIdx.cols - it means that matcher didn't find all matches,
1247             // because it didn't have enough memory.
1248             // If trainIdx is empty, then trainIdx and distance will be created with size nQuery x max((nTrain / 100), 10),
1249             // otherwize user can pass own allocated trainIdx and distance with size nQuery x nMaxMatches
1250             // Matches doesn't sorted.
1251             void radiusMatchSingle(const oclMat &query, const oclMat &train,
1252                                    oclMat &trainIdx, oclMat &distance, oclMat &nMatches, float maxDistance,
1253                                    const oclMat &mask = oclMat());
1254
1255             // Download trainIdx, nMatches and distance and convert it to vector with DMatch.
1256             // matches will be sorted in increasing order of distances.
1257             // compactResult is used when mask is not empty. If compactResult is false matches
1258             // vector will have the same size as queryDescriptors rows. If compactResult is true
1259             // matches vector will not contain matches for fully masked out query descriptors.
1260             static void radiusMatchDownload(const oclMat &trainIdx, const oclMat &distance, const oclMat &nMatches,
1261                                             std::vector< std::vector<DMatch> > &matches, bool compactResult = false);
1262             // Convert trainIdx, nMatches and distance to vector with DMatch.
1263             static void radiusMatchConvert(const Mat &trainIdx, const Mat &distance, const Mat &nMatches,
1264                                            std::vector< std::vector<DMatch> > &matches, bool compactResult = false);
1265             // Find best matches for each query descriptor which have distance less than maxDistance
1266             // in increasing order of distances).
1267             void radiusMatch(const oclMat &query, const oclMat &train,
1268                              std::vector< std::vector<DMatch> > &matches, float maxDistance,
1269                              const oclMat &mask = oclMat(), bool compactResult = false);
1270             // Find best matches for each query descriptor which have distance less than maxDistance.
1271             // If trainIdx is empty, then trainIdx and distance will be created with size nQuery x max((nQuery / 100), 10),
1272             // otherwize user can pass own allocated trainIdx and distance with size nQuery x nMaxMatches
1273             // Matches doesn't sorted.
1274             void radiusMatchCollection(const oclMat &query, oclMat &trainIdx, oclMat &imgIdx, oclMat &distance, oclMat &nMatches, float maxDistance,
1275                                        const std::vector<oclMat> &masks = std::vector<oclMat>());
1276             // Download trainIdx, imgIdx, nMatches and distance and convert it to vector with DMatch.
1277             // matches will be sorted in increasing order of distances.
1278             // compactResult is used when mask is not empty. If compactResult is false matches
1279             // vector will have the same size as queryDescriptors rows. If compactResult is true
1280             // matches vector will not contain matches for fully masked out query descriptors.
1281             static void radiusMatchDownload(const oclMat &trainIdx, const oclMat &imgIdx, const oclMat &distance, const oclMat &nMatches,
1282                                             std::vector< std::vector<DMatch> > &matches, bool compactResult = false);
1283             // Convert trainIdx, nMatches and distance to vector with DMatch.
1284             static void radiusMatchConvert(const Mat &trainIdx, const Mat &imgIdx, const Mat &distance, const Mat &nMatches,
1285                                            std::vector< std::vector<DMatch> > &matches, bool compactResult = false);
1286             // Find best matches from train collection for each query descriptor which have distance less than
1287             // maxDistance (in increasing order of distances).
1288             void radiusMatch(const oclMat &query, std::vector< std::vector<DMatch> > &matches, float maxDistance,
1289                              const std::vector<oclMat> &masks = std::vector<oclMat>(), bool compactResult = false);
1290             DistType distType;
1291         private:
1292             std::vector<oclMat> trainDescCollection;
1293         };
1294
1295         template <class Distance>
1296         class CV_EXPORTS BruteForceMatcher_OCL;
1297
1298         template <typename T>
1299         class CV_EXPORTS BruteForceMatcher_OCL< L1<T> > : public BruteForceMatcher_OCL_base
1300         {
1301         public:
1302             explicit BruteForceMatcher_OCL() : BruteForceMatcher_OCL_base(L1Dist) {}
1303             explicit BruteForceMatcher_OCL(L1<T> /*d*/) : BruteForceMatcher_OCL_base(L1Dist) {}
1304         };
1305
1306         template <typename T>
1307         class CV_EXPORTS BruteForceMatcher_OCL< L2<T> > : public BruteForceMatcher_OCL_base
1308         {
1309         public:
1310             explicit BruteForceMatcher_OCL() : BruteForceMatcher_OCL_base(L2Dist) {}
1311             explicit BruteForceMatcher_OCL(L2<T> /*d*/) : BruteForceMatcher_OCL_base(L2Dist) {}
1312         };
1313
1314         template <> class CV_EXPORTS BruteForceMatcher_OCL< Hamming > : public BruteForceMatcher_OCL_base
1315         {
1316         public:
1317             explicit BruteForceMatcher_OCL() : BruteForceMatcher_OCL_base(HammingDist) {}
1318             explicit BruteForceMatcher_OCL(Hamming /*d*/) : BruteForceMatcher_OCL_base(HammingDist) {}
1319         };
1320
1321         class CV_EXPORTS BFMatcher_OCL : public BruteForceMatcher_OCL_base
1322         {
1323         public:
1324             explicit BFMatcher_OCL(int norm = NORM_L2) : BruteForceMatcher_OCL_base(norm == NORM_L1 ? L1Dist : norm == NORM_L2 ? L2Dist : HammingDist) {}
1325         };
1326
1327         class CV_EXPORTS GoodFeaturesToTrackDetector_OCL
1328         {
1329         public:
1330             explicit GoodFeaturesToTrackDetector_OCL(int maxCorners = 1000, double qualityLevel = 0.01, double minDistance = 0.0,
1331                 int blockSize = 3, bool useHarrisDetector = false, double harrisK = 0.04);
1332
1333             //! return 1 rows matrix with CV_32FC2 type
1334             void operator ()(const oclMat& image, oclMat& corners, const oclMat& mask = oclMat());
1335             //! download points of type Point2f to a vector. the vector's content will be erased
1336             void downloadPoints(const oclMat &points, vector<Point2f> &points_v);
1337
1338             int maxCorners;
1339             double qualityLevel;
1340             double minDistance;
1341
1342             int blockSize;
1343             bool useHarrisDetector;
1344             double harrisK;
1345             void releaseMemory()
1346             {
1347                 Dx_.release();
1348                 Dy_.release();
1349                 eig_.release();
1350                 minMaxbuf_.release();
1351                 tmpCorners_.release();
1352             }
1353         private:
1354             oclMat Dx_;
1355             oclMat Dy_;
1356             oclMat eig_;
1357             oclMat minMaxbuf_;
1358             oclMat tmpCorners_;
1359         };
1360
1361         inline GoodFeaturesToTrackDetector_OCL::GoodFeaturesToTrackDetector_OCL(int maxCorners_, double qualityLevel_, double minDistance_,
1362             int blockSize_, bool useHarrisDetector_, double harrisK_)
1363         {
1364             maxCorners = maxCorners_;
1365             qualityLevel = qualityLevel_;
1366             minDistance = minDistance_;
1367             blockSize = blockSize_;
1368             useHarrisDetector = useHarrisDetector_;
1369             harrisK = harrisK_;
1370         }
1371
1372         /////////////////////////////// PyrLKOpticalFlow /////////////////////////////////////
1373         class CV_EXPORTS PyrLKOpticalFlow
1374         {
1375         public:
1376             PyrLKOpticalFlow()
1377             {
1378                 winSize = Size(21, 21);
1379                 maxLevel = 3;
1380                 iters = 30;
1381                 derivLambda = 0.5;
1382                 useInitialFlow = false;
1383                 minEigThreshold = 1e-4f;
1384                 getMinEigenVals = false;
1385                 isDeviceArch11_ = false;
1386             }
1387
1388             void sparse(const oclMat &prevImg, const oclMat &nextImg, const oclMat &prevPts, oclMat &nextPts,
1389                         oclMat &status, oclMat *err = 0);
1390             void dense(const oclMat &prevImg, const oclMat &nextImg, oclMat &u, oclMat &v, oclMat *err = 0);
1391             Size winSize;
1392             int maxLevel;
1393             int iters;
1394             double derivLambda;
1395             bool useInitialFlow;
1396             float minEigThreshold;
1397             bool getMinEigenVals;
1398             void releaseMemory()
1399             {
1400                 dx_calcBuf_.release();
1401                 dy_calcBuf_.release();
1402
1403                 prevPyr_.clear();
1404                 nextPyr_.clear();
1405
1406                 dx_buf_.release();
1407                 dy_buf_.release();
1408             }
1409         private:
1410             void calcSharrDeriv(const oclMat &src, oclMat &dx, oclMat &dy);
1411             void buildImagePyramid(const oclMat &img0, vector<oclMat> &pyr, bool withBorder);
1412
1413             oclMat dx_calcBuf_;
1414             oclMat dy_calcBuf_;
1415
1416             vector<oclMat> prevPyr_;
1417             vector<oclMat> nextPyr_;
1418
1419             oclMat dx_buf_;
1420             oclMat dy_buf_;
1421             oclMat uPyr_[2];
1422             oclMat vPyr_[2];
1423             bool isDeviceArch11_;
1424         };
1425
1426         class CV_EXPORTS FarnebackOpticalFlow
1427         {
1428         public:
1429             FarnebackOpticalFlow();
1430
1431             int numLevels;
1432             double pyrScale;
1433             bool fastPyramids;
1434             int winSize;
1435             int numIters;
1436             int polyN;
1437             double polySigma;
1438             int flags;
1439
1440             void operator ()(const oclMat &frame0, const oclMat &frame1, oclMat &flowx, oclMat &flowy);
1441
1442             void releaseMemory();
1443
1444         private:
1445             void prepareGaussian(
1446                 int n, double sigma, float *g, float *xg, float *xxg,
1447                 double &ig11, double &ig03, double &ig33, double &ig55);
1448
1449             void setPolynomialExpansionConsts(int n, double sigma);
1450
1451             void updateFlow_boxFilter(
1452                 const oclMat& R0, const oclMat& R1, oclMat& flowx, oclMat &flowy,
1453                 oclMat& M, oclMat &bufM, int blockSize, bool updateMatrices);
1454
1455             void updateFlow_gaussianBlur(
1456                 const oclMat& R0, const oclMat& R1, oclMat& flowx, oclMat& flowy,
1457                 oclMat& M, oclMat &bufM, int blockSize, bool updateMatrices);
1458
1459             oclMat frames_[2];
1460             oclMat pyrLevel_[2], M_, bufM_, R_[2], blurredFrame_[2];
1461             std::vector<oclMat> pyramid0_, pyramid1_;
1462         };
1463
1464         //////////////// build warping maps ////////////////////
1465         //! builds plane warping maps
1466         CV_EXPORTS void buildWarpPlaneMaps(Size src_size, Rect dst_roi, const Mat &K, const Mat &R, const Mat &T, float scale, oclMat &map_x, oclMat &map_y);
1467         //! builds cylindrical warping maps
1468         CV_EXPORTS void buildWarpCylindricalMaps(Size src_size, Rect dst_roi, const Mat &K, const Mat &R, float scale, oclMat &map_x, oclMat &map_y);
1469         //! builds spherical warping maps
1470         CV_EXPORTS void buildWarpSphericalMaps(Size src_size, Rect dst_roi, const Mat &K, const Mat &R, float scale, oclMat &map_x, oclMat &map_y);
1471         //! builds Affine warping maps
1472         CV_EXPORTS void buildWarpAffineMaps(const Mat &M, bool inverse, Size dsize, oclMat &xmap, oclMat &ymap);
1473
1474         //! builds Perspective warping maps
1475         CV_EXPORTS void buildWarpPerspectiveMaps(const Mat &M, bool inverse, Size dsize, oclMat &xmap, oclMat &ymap);
1476
1477         ///////////////////////////////////// interpolate frames //////////////////////////////////////////////
1478         //! Interpolate frames (images) using provided optical flow (displacement field).
1479         //! frame0   - frame 0 (32-bit floating point images, single channel)
1480         //! frame1   - frame 1 (the same type and size)
1481         //! fu       - forward horizontal displacement
1482         //! fv       - forward vertical displacement
1483         //! bu       - backward horizontal displacement
1484         //! bv       - backward vertical displacement
1485         //! pos      - new frame position
1486         //! newFrame - new frame
1487         //! buf      - temporary buffer, will have width x 6*height size, CV_32FC1 type and contain 6 oclMat;
1488         //!            occlusion masks            0, occlusion masks            1,
1489         //!            interpolated forward flow  0, interpolated forward flow  1,
1490         //!            interpolated backward flow 0, interpolated backward flow 1
1491         //!
1492         CV_EXPORTS void interpolateFrames(const oclMat &frame0, const oclMat &frame1,
1493                                           const oclMat &fu, const oclMat &fv,
1494                                           const oclMat &bu, const oclMat &bv,
1495                                           float pos, oclMat &newFrame, oclMat &buf);
1496
1497         //! computes moments of the rasterized shape or a vector of points
1498         CV_EXPORTS Moments ocl_moments(InputArray _array, bool binaryImage);
1499
1500         class CV_EXPORTS StereoBM_OCL
1501         {
1502         public:
1503             enum { BASIC_PRESET = 0, PREFILTER_XSOBEL = 1 };
1504
1505             enum { DEFAULT_NDISP = 64, DEFAULT_WINSZ = 19 };
1506
1507             //! the default constructor
1508             StereoBM_OCL();
1509             //! the full constructor taking the camera-specific preset, number of disparities and the SAD window size. ndisparities must be multiple of 8.
1510             StereoBM_OCL(int preset, int ndisparities = DEFAULT_NDISP, int winSize = DEFAULT_WINSZ);
1511
1512             //! the stereo correspondence operator. Finds the disparity for the specified rectified stereo pair
1513             //! Output disparity has CV_8U type.
1514             void operator() ( const oclMat &left, const oclMat &right, oclMat &disparity);
1515
1516             //! Some heuristics that tries to estmate
1517             // if current GPU will be faster then CPU in this algorithm.
1518             // It queries current active device.
1519             static bool checkIfGpuCallReasonable();
1520
1521             int preset;
1522             int ndisp;
1523             int winSize;
1524
1525             // If avergeTexThreshold  == 0 => post procesing is disabled
1526             // If avergeTexThreshold != 0 then disparity is set 0 in each point (x,y) where for left image
1527             // SumOfHorizontalGradiensInWindow(x, y, winSize) < (winSize * winSize) * avergeTexThreshold
1528             // i.e. input left image is low textured.
1529             float avergeTexThreshold;
1530         private:
1531             oclMat minSSD, leBuf, riBuf;
1532         };
1533
1534         class CV_EXPORTS StereoBeliefPropagation
1535         {
1536         public:
1537             enum { DEFAULT_NDISP  = 64 };
1538             enum { DEFAULT_ITERS  = 5  };
1539             enum { DEFAULT_LEVELS = 5  };
1540             static void estimateRecommendedParams(int width, int height, int &ndisp, int &iters, int &levels);
1541             explicit StereoBeliefPropagation(int ndisp  = DEFAULT_NDISP,
1542                                              int iters  = DEFAULT_ITERS,
1543                                              int levels = DEFAULT_LEVELS,
1544                                              int msg_type = CV_16S);
1545             StereoBeliefPropagation(int ndisp, int iters, int levels,
1546                                     float max_data_term, float data_weight,
1547                                     float max_disc_term, float disc_single_jump,
1548                                     int msg_type = CV_32F);
1549             void operator()(const oclMat &left, const oclMat &right, oclMat &disparity);
1550             void operator()(const oclMat &data, oclMat &disparity);
1551             int ndisp;
1552             int iters;
1553             int levels;
1554             float max_data_term;
1555             float data_weight;
1556             float max_disc_term;
1557             float disc_single_jump;
1558             int msg_type;
1559         private:
1560             oclMat u, d, l, r, u2, d2, l2, r2;
1561             std::vector<oclMat> datas;
1562             oclMat out;
1563         };
1564
1565         class CV_EXPORTS StereoConstantSpaceBP
1566         {
1567         public:
1568             enum { DEFAULT_NDISP    = 128 };
1569             enum { DEFAULT_ITERS    = 8   };
1570             enum { DEFAULT_LEVELS   = 4   };
1571             enum { DEFAULT_NR_PLANE = 4   };
1572             static void estimateRecommendedParams(int width, int height, int &ndisp, int &iters, int &levels, int &nr_plane);
1573             explicit StereoConstantSpaceBP(
1574                 int ndisp    = DEFAULT_NDISP,
1575                 int iters    = DEFAULT_ITERS,
1576                 int levels   = DEFAULT_LEVELS,
1577                 int nr_plane = DEFAULT_NR_PLANE,
1578                 int msg_type = CV_32F);
1579             StereoConstantSpaceBP(int ndisp, int iters, int levels, int nr_plane,
1580                 float max_data_term, float data_weight, float max_disc_term, float disc_single_jump,
1581                 int min_disp_th = 0,
1582                 int msg_type = CV_32F);
1583             void operator()(const oclMat &left, const oclMat &right, oclMat &disparity);
1584             int ndisp;
1585             int iters;
1586             int levels;
1587             int nr_plane;
1588             float max_data_term;
1589             float data_weight;
1590             float max_disc_term;
1591             float disc_single_jump;
1592             int min_disp_th;
1593             int msg_type;
1594             bool use_local_init_data_cost;
1595         private:
1596             oclMat u[2], d[2], l[2], r[2];
1597             oclMat disp_selected_pyr[2];
1598             oclMat data_cost;
1599             oclMat data_cost_selected;
1600             oclMat temp;
1601             oclMat out;
1602         };
1603
1604         // Implementation of the Zach, Pock and Bischof Dual TV-L1 Optical Flow method
1605         //
1606         // see reference:
1607         //   [1] C. Zach, T. Pock and H. Bischof, "A Duality Based Approach for Realtime TV-L1 Optical Flow".
1608         //   [2] Javier Sanchez, Enric Meinhardt-Llopis and Gabriele Facciolo. "TV-L1 Optical Flow Estimation".
1609         class CV_EXPORTS OpticalFlowDual_TVL1_OCL
1610         {
1611         public:
1612             OpticalFlowDual_TVL1_OCL();
1613
1614             void operator ()(const oclMat& I0, const oclMat& I1, oclMat& flowx, oclMat& flowy);
1615
1616             void collectGarbage();
1617
1618             /**
1619             * Time step of the numerical scheme.
1620             */
1621             double tau;
1622
1623             /**
1624             * Weight parameter for the data term, attachment parameter.
1625             * This is the most relevant parameter, which determines the smoothness of the output.
1626             * The smaller this parameter is, the smoother the solutions we obtain.
1627             * It depends on the range of motions of the images, so its value should be adapted to each image sequence.
1628             */
1629             double lambda;
1630
1631             /**
1632             * Weight parameter for (u - v)^2, tightness parameter.
1633             * It serves as a link between the attachment and the regularization terms.
1634             * In theory, it should have a small value in order to maintain both parts in correspondence.
1635             * The method is stable for a large range of values of this parameter.
1636             */
1637             double theta;
1638
1639             /**
1640             * Number of scales used to create the pyramid of images.
1641             */
1642             int nscales;
1643
1644             /**
1645             * Number of warpings per scale.
1646             * Represents the number of times that I1(x+u0) and grad( I1(x+u0) ) are computed per scale.
1647             * This is a parameter that assures the stability of the method.
1648             * It also affects the running time, so it is a compromise between speed and accuracy.
1649             */
1650             int warps;
1651
1652             /**
1653             * Stopping criterion threshold used in the numerical scheme, which is a trade-off between precision and running time.
1654             * A small value will yield more accurate solutions at the expense of a slower convergence.
1655             */
1656             double epsilon;
1657
1658             /**
1659             * Stopping criterion iterations number used in the numerical scheme.
1660             */
1661             int iterations;
1662
1663             bool useInitialFlow;
1664
1665         private:
1666             void procOneScale(const oclMat& I0, const oclMat& I1, oclMat& u1, oclMat& u2);
1667
1668             std::vector<oclMat> I0s;
1669             std::vector<oclMat> I1s;
1670             std::vector<oclMat> u1s;
1671             std::vector<oclMat> u2s;
1672
1673             oclMat I1x_buf;
1674             oclMat I1y_buf;
1675
1676             oclMat I1w_buf;
1677             oclMat I1wx_buf;
1678             oclMat I1wy_buf;
1679
1680             oclMat grad_buf;
1681             oclMat rho_c_buf;
1682
1683             oclMat p11_buf;
1684             oclMat p12_buf;
1685             oclMat p21_buf;
1686             oclMat p22_buf;
1687
1688             oclMat diff_buf;
1689             oclMat norm_buf;
1690         };
1691         // current supported sorting methods
1692         enum
1693         {
1694             SORT_BITONIC,   // only support power-of-2 buffer size
1695             SORT_SELECTION, // cannot sort duplicate keys
1696             SORT_MERGE,
1697             SORT_RADIX      // only support signed int/float keys(CV_32S/CV_32F)
1698         };
1699         //! Returns the sorted result of all the elements in input based on equivalent keys.
1700         //
1701         //  The element unit in the values to be sorted is determined from the data type,
1702         //  i.e., a CV_32FC2 input {a1a2, b1b2} will be considered as two elements, regardless its
1703         //  matrix dimension.
1704         //  both keys and values will be sorted inplace
1705         //  Key needs to be single channel oclMat.
1706         //
1707         //  Example:
1708         //  input -
1709         //    keys   = {2,    3,   1}   (CV_8UC1)
1710         //    values = {10,5, 4,3, 6,2} (CV_8UC2)
1711         //  sortByKey(keys, values, SORT_SELECTION, false);
1712         //  output -
1713         //    keys   = {1,    2,   3}   (CV_8UC1)
1714         //    values = {6,2, 10,5, 4,3} (CV_8UC2)
1715         void CV_EXPORTS sortByKey(oclMat& keys, oclMat& values, int method, bool isGreaterThan = false);
1716         /*!Base class for MOG and MOG2!*/
1717         class CV_EXPORTS BackgroundSubtractor
1718         {
1719         public:
1720             //! the virtual destructor
1721             virtual ~BackgroundSubtractor();
1722             //! the update operator that takes the next video frame and returns the current foreground mask as 8-bit binary image.
1723             virtual void operator()(const oclMat& image, oclMat& fgmask, float learningRate);
1724
1725             //! computes a background image
1726             virtual void getBackgroundImage(oclMat& backgroundImage) const = 0;
1727         };
1728                 /*!
1729         Gaussian Mixture-based Backbround/Foreground Segmentation Algorithm
1730
1731         The class implements the following algorithm:
1732         "An improved adaptive background mixture model for real-time tracking with shadow detection"
1733         P. KadewTraKuPong and R. Bowden,
1734         Proc. 2nd European Workshp on Advanced Video-Based Surveillance Systems, 2001."
1735         http://personal.ee.surrey.ac.uk/Personal/R.Bowden/publications/avbs01/avbs01.pdf
1736         */
1737         class CV_EXPORTS MOG: public cv::ocl::BackgroundSubtractor
1738         {
1739         public:
1740             //! the default constructor
1741             MOG(int nmixtures = -1);
1742
1743             //! re-initiaization method
1744             void initialize(Size frameSize, int frameType);
1745
1746             //! the update operator
1747             void operator()(const oclMat& frame, oclMat& fgmask, float learningRate = 0.f);
1748
1749             //! computes a background image which are the mean of all background gaussians
1750             void getBackgroundImage(oclMat& backgroundImage) const;
1751
1752             //! releases all inner buffers
1753             void release();
1754
1755             int history;
1756             float varThreshold;
1757             float backgroundRatio;
1758             float noiseSigma;
1759
1760         private:
1761             int nmixtures_;
1762
1763             Size frameSize_;
1764             int frameType_;
1765             int nframes_;
1766
1767             oclMat weight_;
1768             oclMat sortKey_;
1769             oclMat mean_;
1770             oclMat var_;
1771         };
1772
1773         /*!
1774         The class implements the following algorithm:
1775         "Improved adaptive Gausian mixture model for background subtraction"
1776         Z.Zivkovic
1777         International Conference Pattern Recognition, UK, August, 2004.
1778         http://www.zoranz.net/Publications/zivkovic2004ICPR.pdf
1779         */
1780         class CV_EXPORTS MOG2: public cv::ocl::BackgroundSubtractor
1781         {
1782         public:
1783             //! the default constructor
1784             MOG2(int nmixtures = -1);
1785
1786             //! re-initiaization method
1787             void initialize(Size frameSize, int frameType);
1788
1789             //! the update operator
1790             void operator()(const oclMat& frame, oclMat& fgmask, float learningRate = -1.0f);
1791
1792             //! computes a background image which are the mean of all background gaussians
1793             void getBackgroundImage(oclMat& backgroundImage) const;
1794
1795             //! releases all inner buffers
1796             void release();
1797
1798             // parameters
1799             // you should call initialize after parameters changes
1800
1801             int history;
1802
1803             //! here it is the maximum allowed number of mixture components.
1804             //! Actual number is determined dynamically per pixel
1805             float varThreshold;
1806             // threshold on the squared Mahalanobis distance to decide if it is well described
1807             // by the background model or not. Related to Cthr from the paper.
1808             // This does not influence the update of the background. A typical value could be 4 sigma
1809             // and that is varThreshold=4*4=16; Corresponds to Tb in the paper.
1810
1811             /////////////////////////
1812             // less important parameters - things you might change but be carefull
1813             ////////////////////////
1814
1815             float backgroundRatio;
1816             // corresponds to fTB=1-cf from the paper
1817             // TB - threshold when the component becomes significant enough to be included into
1818             // the background model. It is the TB=1-cf from the paper. So I use cf=0.1 => TB=0.
1819             // For alpha=0.001 it means that the mode should exist for approximately 105 frames before
1820             // it is considered foreground
1821             // float noiseSigma;
1822             float varThresholdGen;
1823
1824             //correspondts to Tg - threshold on the squared Mahalan. dist. to decide
1825             //when a sample is close to the existing components. If it is not close
1826             //to any a new component will be generated. I use 3 sigma => Tg=3*3=9.
1827             //Smaller Tg leads to more generated components and higher Tg might make
1828             //lead to small number of components but they can grow too large
1829             float fVarInit;
1830             float fVarMin;
1831             float fVarMax;
1832
1833             //initial variance  for the newly generated components.
1834             //It will will influence the speed of adaptation. A good guess should be made.
1835             //A simple way is to estimate the typical standard deviation from the images.
1836             //I used here 10 as a reasonable value
1837             // min and max can be used to further control the variance
1838             float fCT; //CT - complexity reduction prior
1839             //this is related to the number of samples needed to accept that a component
1840             //actually exists. We use CT=0.05 of all the samples. By setting CT=0 you get
1841             //the standard Stauffer&Grimson algorithm (maybe not exact but very similar)
1842
1843             //shadow detection parameters
1844             bool bShadowDetection; //default 1 - do shadow detection
1845             unsigned char nShadowDetection; //do shadow detection - insert this value as the detection result - 127 default value
1846             float fTau;
1847             // Tau - shadow threshold. The shadow is detected if the pixel is darker
1848             //version of the background. Tau is a threshold on how much darker the shadow can be.
1849             //Tau= 0.5 means that if pixel is more than 2 times darker then it is not shadow
1850             //See: Prati,Mikic,Trivedi,Cucchiarra,"Detecting Moving Shadows...",IEEE PAMI,2003.
1851
1852         private:
1853             int nmixtures_;
1854
1855             Size frameSize_;
1856             int frameType_;
1857             int nframes_;
1858
1859             oclMat weight_;
1860             oclMat variance_;
1861             oclMat mean_;
1862
1863             oclMat bgmodelUsedModes_; //keep track of number of modes per pixel
1864         };
1865
1866         /*!***************Kalman Filter*************!*/
1867         class CV_EXPORTS KalmanFilter
1868         {
1869         public:
1870             KalmanFilter();
1871             //! the full constructor taking the dimensionality of the state, of the measurement and of the control vector
1872             KalmanFilter(int dynamParams, int measureParams, int controlParams=0, int type=CV_32F);
1873             //! re-initializes Kalman filter. The previous content is destroyed.
1874             void init(int dynamParams, int measureParams, int controlParams=0, int type=CV_32F);
1875
1876             const oclMat& predict(const oclMat& control=oclMat());
1877             const oclMat& correct(const oclMat& measurement);
1878
1879             oclMat statePre;           //!< predicted state (x'(k)): x(k)=A*x(k-1)+B*u(k)
1880             oclMat statePost;          //!< corrected state (x(k)): x(k)=x'(k)+K(k)*(z(k)-H*x'(k))
1881             oclMat transitionMatrix;   //!< state transition matrix (A)
1882             oclMat controlMatrix;      //!< control matrix (B) (not used if there is no control)
1883             oclMat measurementMatrix;  //!< measurement matrix (H)
1884             oclMat processNoiseCov;    //!< process noise covariance matrix (Q)
1885             oclMat measurementNoiseCov;//!< measurement noise covariance matrix (R)
1886             oclMat errorCovPre;        //!< priori error estimate covariance matrix (P'(k)): P'(k)=A*P(k-1)*At + Q)*/
1887             oclMat gain;               //!< Kalman gain matrix (K(k)): K(k)=P'(k)*Ht*inv(H*P'(k)*Ht+R)
1888             oclMat errorCovPost;       //!< posteriori error estimate covariance matrix (P(k)): P(k)=(I-K(k)*H)*P'(k)
1889         private:
1890             oclMat temp1;
1891             oclMat temp2;
1892             oclMat temp3;
1893             oclMat temp4;
1894             oclMat temp5;
1895         };
1896
1897         /*!***************K Nearest Neighbour*************!*/
1898         class CV_EXPORTS KNearestNeighbour: public CvKNearest
1899         {
1900         public:
1901             KNearestNeighbour();
1902             ~KNearestNeighbour();
1903
1904             bool train(const Mat& trainData, Mat& labels, Mat& sampleIdx = Mat().setTo(Scalar::all(0)),
1905                 bool isRegression = false, int max_k = 32, bool updateBase = false);
1906
1907             void clear();
1908
1909             void find_nearest(const oclMat& samples, int k, oclMat& lables);
1910
1911         private:
1912             oclMat samples_ocl;
1913         };
1914         /*!***************  SVM  *************!*/
1915         class CV_EXPORTS CvSVM_OCL : public CvSVM
1916         {
1917         public:
1918             CvSVM_OCL();
1919
1920             CvSVM_OCL(const cv::Mat& trainData, const cv::Mat& responses,
1921                       const cv::Mat& varIdx=cv::Mat(), const cv::Mat& sampleIdx=cv::Mat(),
1922                       CvSVMParams params=CvSVMParams());
1923             CV_WRAP float predict( const int row_index, Mat& src, bool returnDFVal=false ) const;
1924             CV_WRAP void predict( cv::InputArray samples, cv::OutputArray results ) const;
1925             CV_WRAP float predict( const cv::Mat& sample, bool returnDFVal=false ) const;
1926             float predict( const CvMat* samples, CV_OUT CvMat* results ) const;
1927
1928         protected:
1929             float predict( const int row_index, int row_len, Mat& src, bool returnDFVal=false ) const;
1930             void create_kernel();
1931             void create_solver();
1932         };
1933         /*!***************  END  *************!*/
1934     }
1935 }
1936 #if defined _MSC_VER && _MSC_VER >= 1200
1937 #  pragma warning( push)
1938 #  pragma warning( disable: 4267)
1939 #endif
1940 #include "opencv2/ocl/matrix_operations.hpp"
1941 #if defined _MSC_VER && _MSC_VER >= 1200
1942 #  pragma warning( pop)
1943 #endif
1944
1945 #endif /* __OPENCV_OCL_HPP__ */