modules/ocl/include/opencv2/ocl/ocl.hpp

   1 /*M///////////////////////////////////////////////////////////////////////////////////////
   2 //
   3 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
   4 //
   5 //  By downloading, copying, installing or using the software you agree to this license.
   6 //  If you do not agree to this license, do not download, install,
   7 //  copy or use the software.
   8 //
   9 //
  10 //                           License Agreement
  11 //                For Open Source Computer Vision Library
  12 //
  13 // Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
  14 // Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
  15 // Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
  16 // Third party copyrights are property of their respective owners.
  17 //
  18 // Redistribution and use in source and binary forms, with or without modification,
  19 // are permitted provided that the following conditions are met:
  20 //
  21 //   * Redistribution's of source code must retain the above copyright notice,
  22 //     this list of conditions and the following disclaimer.
  23 //
  24 //   * Redistribution's in binary form must reproduce the above copyright notice,
  25 //     this list of conditions and the following disclaimer in the documentation
  26 //     and/or other materials provided with the distribution.
  27 //
  28 //   * The name of the copyright holders may not be used to endorse or promote products
  29 //     derived from this software without specific prior written permission.
  30 //
  31 // This software is provided by the copyright holders and contributors "as is" and
  32 // any express or implied warranties, including, but not limited to, the implied
  33 // warranties of merchantability and fitness for a particular purpose are disclaimed.
  34 // In no event shall the Intel Corporation or contributors be liable for any direct,
  35 // indirect, incidental, special, exemplary, or consequential damages
  36 // (including, but not limited to, procurement of substitute goods or services;
  37 // loss of use, data, or profits; or business interruption) however caused
  38 // and on any theory of liability, whether in contract, strict liability,
  39 // or tort (including negligence or otherwise) arising in any way out of
  40 // the use of this software, even if advised of the possibility of such damage.
  41 //
  42 //M*/
  43
  44 #ifndef __OPENCV_OCL_HPP__
  45 #define __OPENCV_OCL_HPP__
  46
  47 #include <memory>
  48 #include <vector>
  49
  50 #include "opencv2/core/core.hpp"
  51 #include "opencv2/imgproc/imgproc.hpp"
  52 #include "opencv2/objdetect/objdetect.hpp"
  53 #include "opencv2/features2d/features2d.hpp"
  54 #include "opencv2/ml/ml.hpp"
  55
  56 namespace cv
  57 {
  58     namespace ocl
  59     {
  60         enum DeviceType
  61         {
  62             CVCL_DEVICE_TYPE_DEFAULT     = (1 << 0),
  63             CVCL_DEVICE_TYPE_CPU         = (1 << 1),
  64             CVCL_DEVICE_TYPE_GPU         = (1 << 2),
  65             CVCL_DEVICE_TYPE_ACCELERATOR = (1 << 3),
  66             //CVCL_DEVICE_TYPE_CUSTOM      = (1 << 4)
  67             CVCL_DEVICE_TYPE_ALL         = 0xFFFFFFFF
  68         };
  69
  70         enum DevMemRW
  71         {
  72             DEVICE_MEM_R_W = 0,
  73             DEVICE_MEM_R_ONLY,
  74             DEVICE_MEM_W_ONLY
  75         };
  76
  77         enum DevMemType
  78         {
  79             DEVICE_MEM_DEFAULT = 0,
  80             DEVICE_MEM_AHP,         //alloc host pointer
  81             DEVICE_MEM_UHP,         //use host pointer
  82             DEVICE_MEM_CHP,         //copy host pointer
  83             DEVICE_MEM_PM           //persistent memory
  84         };
  85
  86         // these classes contain OpenCL runtime information
  87
  88         struct PlatformInfo;
  89
  90         struct DeviceInfo
  91         {
  92             int _id; // reserved, don't use it
  93
  94             DeviceType deviceType;
  95             std::string deviceProfile;
  96             std::string deviceVersion;
  97             std::string deviceName;
  98             std::string deviceVendor;
  99             int deviceVendorId;
 100             std::string deviceDriverVersion;
 101             std::string deviceExtensions;
 102
 103             size_t maxWorkGroupSize;
 104             std::vector<size_t> maxWorkItemSizes;
 105             int maxComputeUnits;
 106             size_t localMemorySize;
 107             size_t maxMemAllocSize;
 108
 109             int deviceVersionMajor;
 110             int deviceVersionMinor;
 111
 112             bool haveDoubleSupport;
 113             bool isUnifiedMemory; // 1 means integrated GPU, otherwise this value is 0
 114             bool isIntelDevice;
 115
 116             std::string compilationExtraOptions;
 117
 118             const PlatformInfo* platform;
 119
 120             DeviceInfo();
 121             ~DeviceInfo();
 122         };
 123
 124         struct PlatformInfo
 125         {
 126             int _id; // reserved, don't use it
 127
 128             std::string platformProfile;
 129             std::string platformVersion;
 130             std::string platformName;
 131             std::string platformVendor;
 132             std::string platformExtensons;
 133
 134             int platformVersionMajor;
 135             int platformVersionMinor;
 136
 137             std::vector<const DeviceInfo*> devices;
 138
 139             PlatformInfo();
 140             ~PlatformInfo();
 141         };
 142
 143         //////////////////////////////// Initialization & Info ////////////////////////
 144         typedef std::vector<const PlatformInfo*> PlatformsInfo;
 145
 146         CV_EXPORTS int getOpenCLPlatforms(PlatformsInfo& platforms);
 147
 148         typedef std::vector<const DeviceInfo*> DevicesInfo;
 149
 150         CV_EXPORTS int getOpenCLDevices(DevicesInfo& devices, int deviceType = CVCL_DEVICE_TYPE_GPU,
 151                 const PlatformInfo* platform = NULL);
 152
 153         // set device you want to use
 154         CV_EXPORTS void setDevice(const DeviceInfo* info);
 155
 156         // Initialize from OpenCL handles directly.
 157         // Argument types is (pointers): cl_platform_id*, cl_context*, cl_device_id*
 158         CV_EXPORTS void initializeContext(void* pClPlatform, void* pClContext, void* pClDevice);
 159
 160         //////////////////////////////// Error handling ////////////////////////
 161         CV_EXPORTS void error(const char *error_string, const char *file, const int line, const char *func);
 162
 163         enum FEATURE_TYPE
 164         {
 165             FEATURE_CL_DOUBLE = 1,
 166             FEATURE_CL_UNIFIED_MEM,
 167             FEATURE_CL_VER_1_2,
 168             FEATURE_CL_INTEL_DEVICE
 169         };
 170
 171         // Represents OpenCL context, interface
 172         class CV_EXPORTS Context
 173         {
 174         protected:
 175             Context() { }
 176             ~Context() { }
 177         public:
 178             static Context* getContext();
 179
 180             bool supportsFeature(FEATURE_TYPE featureType) const;
 181             const DeviceInfo& getDeviceInfo() const;
 182
 183             const void* getOpenCLContextPtr() const;
 184             const void* getOpenCLCommandQueuePtr() const;
 185             const void* getOpenCLDeviceIDPtr() const;
 186         };
 187
 188         inline const void *getClContextPtr()
 189         {
 190             return Context::getContext()->getOpenCLContextPtr();
 191         }
 192
 193         inline const void *getClCommandQueuePtr()
 194         {
 195             return Context::getContext()->getOpenCLCommandQueuePtr();
 196         }
 197
 198         CV_EXPORTS bool supportsFeature(FEATURE_TYPE featureType);
 199
 200         CV_EXPORTS void finish();
 201
 202         enum BINARY_CACHE_MODE
 203         {
 204             CACHE_NONE    = 0,        // do not cache OpenCL binary
 205             CACHE_DEBUG   = 0x1 << 0, // cache OpenCL binary when built in debug mode
 206             CACHE_RELEASE = 0x1 << 1, // default behavior, only cache when built in release mode
 207             CACHE_ALL     = CACHE_DEBUG | CACHE_RELEASE, // cache opencl binary
 208         };
 209         //! Enable or disable OpenCL program binary caching onto local disk
 210         // After a program (*.cl files in opencl/ folder) is built at runtime, we allow the
 211         // compiled OpenCL program to be cached to the path automatically as "path/*.clb"
 212         // binary file, which will be reused when the OpenCV executable is started again.
 213         //
 214         // This feature is enabled by default.
 215         CV_EXPORTS void setBinaryDiskCache(int mode = CACHE_RELEASE, cv::String path = "./");
 216
 217         //! set where binary cache to be saved to
 218         CV_EXPORTS void setBinaryPath(const char *path);
 219
 220         struct ProgramSource
 221         {
 222             const char* name;
 223             const char* programStr;
 224             const char* programHash;
 225
 226             // Cache in memory by name (should be unique). Caching on disk disabled.
 227             inline ProgramSource(const char* _name, const char* _programStr)
 228                 : name(_name), programStr(_programStr), programHash(NULL)
 229             {
 230             }
 231
 232             // Cache in memory by name (should be unique). Caching on disk uses programHash mark.
 233             inline ProgramSource(const char* _name, const char* _programStr, const char* _programHash)
 234                 : name(_name), programStr(_programStr), programHash(_programHash)
 235             {
 236             }
 237         };
 238
 239         //! Calls OpenCL kernel. Pass globalThreads = NULL, and cleanUp = true, to finally clean-up without executing.
 240         //! Deprecated, will be replaced
 241         CV_EXPORTS void openCLExecuteKernelInterop(Context *clCxt,
 242                 const cv::ocl::ProgramSource& source, string kernelName,
 243                 size_t globalThreads[3], size_t localThreads[3],
 244                 std::vector< std::pair<size_t, const void *> > &args,
 245                 int channels, int depth, const char *build_options);
 246
 247         class CV_EXPORTS oclMatExpr;
 248         //////////////////////////////// oclMat ////////////////////////////////
 249         class CV_EXPORTS oclMat
 250         {
 251         public:
 252             //! default constructor
 253             oclMat();
 254             //! constructs oclMatrix of the specified size and type (_type is CV_8UC1, CV_64FC3, CV_32SC(12) etc.)
 255             oclMat(int rows, int cols, int type);
 256             oclMat(Size size, int type);
 257             //! constucts oclMatrix and fills it with the specified value _s.
 258             oclMat(int rows, int cols, int type, const Scalar &s);
 259             oclMat(Size size, int type, const Scalar &s);
 260             //! copy constructor
 261             oclMat(const oclMat &m);
 262
 263             //! constructor for oclMatrix headers pointing to user-allocated data
 264             oclMat(int rows, int cols, int type, void *data, size_t step = Mat::AUTO_STEP);
 265             oclMat(Size size, int type, void *data, size_t step = Mat::AUTO_STEP);
 266
 267             //! creates a matrix header for a part of the bigger matrix
 268             oclMat(const oclMat &m, const Range &rowRange, const Range &colRange);
 269             oclMat(const oclMat &m, const Rect &roi);
 270
 271             //! builds oclMat from Mat. Perfom blocking upload to device.
 272             explicit oclMat (const Mat &m);
 273
 274             //! destructor - calls release()
 275             ~oclMat();
 276
 277             //! assignment operators
 278             oclMat &operator = (const oclMat &m);
 279             //! assignment operator. Perfom blocking upload to device.
 280             oclMat &operator = (const Mat &m);
 281             oclMat &operator = (const oclMatExpr& expr);
 282
 283             //! pefroms blocking upload data to oclMat.
 284             void upload(const cv::Mat &m);
 285
 286
 287             //! downloads data from device to host memory. Blocking calls.
 288             operator Mat() const;
 289             void download(cv::Mat &m) const;
 290
 291             //! convert to _InputArray
 292             operator _InputArray();
 293
 294             //! convert to _OutputArray
 295             operator _OutputArray();
 296
 297             //! returns a new oclMatrix header for the specified row
 298             oclMat row(int y) const;
 299             //! returns a new oclMatrix header for the specified column
 300             oclMat col(int x) const;
 301             //! ... for the specified row span
 302             oclMat rowRange(int startrow, int endrow) const;
 303             oclMat rowRange(const Range &r) const;
 304             //! ... for the specified column span
 305             oclMat colRange(int startcol, int endcol) const;
 306             oclMat colRange(const Range &r) const;
 307
 308             //! returns deep copy of the oclMatrix, i.e. the data is copied
 309             oclMat clone() const;
 310
 311             //! copies those oclMatrix elements to "m" that are marked with non-zero mask elements.
 312             // It calls m.create(this->size(), this->type()).
 313             // It supports any data type
 314             void copyTo( oclMat &m, const oclMat &mask = oclMat()) const;
 315
 316             //! converts oclMatrix to another datatype with optional scalng. See cvConvertScale.
 317             void convertTo( oclMat &m, int rtype, double alpha = 1, double beta = 0 ) const;
 318
 319             void assignTo( oclMat &m, int type = -1 ) const;
 320
 321             //! sets every oclMatrix element to s
 322             oclMat& operator = (const Scalar &s);
 323             //! sets some of the oclMatrix elements to s, according to the mask
 324             oclMat& setTo(const Scalar &s, const oclMat &mask = oclMat());
 325             //! creates alternative oclMatrix header for the same data, with different
 326             // number of channels and/or different number of rows. see cvReshape.
 327             oclMat reshape(int cn, int rows = 0) const;
 328
 329             //! allocates new oclMatrix data unless the oclMatrix already has specified size and type.
 330             // previous data is unreferenced if needed.
 331             void create(int rows, int cols, int type);
 332             void create(Size size, int type);
 333
 334             //! allocates new oclMatrix with specified device memory type.
 335             void createEx(int rows, int cols, int type, DevMemRW rw_type, DevMemType mem_type);
 336             void createEx(Size size, int type, DevMemRW rw_type, DevMemType mem_type);
 337
 338             //! decreases reference counter;
 339             // deallocate the data when reference counter reaches 0.
 340             void release();
 341
 342             //! swaps with other smart pointer
 343             void swap(oclMat &mat);
 344
 345             //! locates oclMatrix header within a parent oclMatrix. See below
 346             void locateROI( Size &wholeSize, Point &ofs ) const;
 347             //! moves/resizes the current oclMatrix ROI inside the parent oclMatrix.
 348             oclMat& adjustROI( int dtop, int dbottom, int dleft, int dright );
 349             //! extracts a rectangular sub-oclMatrix
 350             // (this is a generalized form of row, rowRange etc.)
 351             oclMat operator()( Range rowRange, Range colRange ) const;
 352             oclMat operator()( const Rect &roi ) const;
 353
 354             oclMat& operator+=( const oclMat& m );
 355             oclMat& operator-=( const oclMat& m );
 356             oclMat& operator*=( const oclMat& m );
 357             oclMat& operator/=( const oclMat& m );
 358
 359             //! returns true if the oclMatrix data is continuous
 360             // (i.e. when there are no gaps between successive rows).
 361             // similar to CV_IS_oclMat_CONT(cvoclMat->type)
 362             bool isContinuous() const;
 363             //! returns element size in bytes,
 364             // similar to CV_ELEM_SIZE(cvMat->type)
 365             size_t elemSize() const;
 366             //! returns the size of element channel in bytes.
 367             size_t elemSize1() const;
 368             //! returns element type, similar to CV_MAT_TYPE(cvMat->type)
 369             int type() const;
 370             //! returns element type, i.e. 8UC3 returns 8UC4 because in ocl
 371             //! 3 channels element actually use 4 channel space
 372             int ocltype() const;
 373             //! returns element type, similar to CV_MAT_DEPTH(cvMat->type)
 374             int depth() const;
 375             //! returns element type, similar to CV_MAT_CN(cvMat->type)
 376             int channels() const;
 377             //! returns element type, return 4 for 3 channels element,
 378             //!becuase 3 channels element actually use 4 channel space
 379             int oclchannels() const;
 380             //! returns step/elemSize1()
 381             size_t step1() const;
 382             //! returns oclMatrix size:
 383             // width == number of columns, height == number of rows
 384             Size size() const;
 385             //! returns true if oclMatrix data is NULL
 386             bool empty() const;
 387
 388             //! matrix transposition
 389             oclMat t() const;
 390
 391             /*! includes several bit-fields:
 392               - the magic signature
 393               - continuity flag
 394               - depth
 395               - number of channels
 396               */
 397             int flags;
 398             //! the number of rows and columns
 399             int rows, cols;
 400             //! a distance between successive rows in bytes; includes the gap if any
 401             size_t step;
 402             //! pointer to the data(OCL memory object)
 403             uchar *data;
 404
 405             //! pointer to the reference counter;
 406             // when oclMatrix points to user-allocated data, the pointer is NULL
 407             int *refcount;
 408
 409             //! helper fields used in locateROI and adjustROI
 410             //datastart and dataend are not used in current version
 411             uchar *datastart;
 412             uchar *dataend;
 413
 414             //! OpenCL context associated with the oclMat object.
 415             Context *clCxt; // TODO clCtx
 416             //add offset for handle ROI, calculated in byte
 417             int offset;
 418             //add wholerows and wholecols for the whole matrix, datastart and dataend are no longer used
 419             int wholerows;
 420             int wholecols;
 421         };
 422
 423         // convert InputArray/OutputArray to oclMat references
 424         CV_EXPORTS oclMat& getOclMatRef(InputArray src);
 425         CV_EXPORTS oclMat& getOclMatRef(OutputArray src);
 426
 427         ///////////////////// mat split and merge /////////////////////////////////
 428         //! Compose a multi-channel array from several single-channel arrays
 429         // Support all types
 430         CV_EXPORTS void merge(const oclMat *src, size_t n, oclMat &dst);
 431         CV_EXPORTS void merge(const vector<oclMat> &src, oclMat &dst);
 432
 433         //! Divides multi-channel array into several single-channel arrays
 434         // Support all types
 435         CV_EXPORTS void split(const oclMat &src, oclMat *dst);
 436         CV_EXPORTS void split(const oclMat &src, vector<oclMat> &dst);
 437
 438         ////////////////////////////// Arithmetics ///////////////////////////////////
 439
 440         //! adds one matrix to another with scale (dst = src1 * alpha + src2 * beta + gama)
 441         // supports all data types
 442         CV_EXPORTS void addWeighted(const oclMat &src1, double  alpha, const oclMat &src2, double beta, double gama, oclMat &dst);
 443
 444         //! adds one matrix to another (dst = src1 + src2)
 445         // supports all data types
 446         CV_EXPORTS void add(const oclMat &src1, const oclMat &src2, oclMat &dst, const oclMat &mask = oclMat());
 447         //! adds scalar to a matrix (dst = src1 + s)
 448         // supports all data types
 449         CV_EXPORTS void add(const oclMat &src1, const Scalar &s, oclMat &dst, const oclMat &mask = oclMat());
 450
 451         //! subtracts one matrix from another (dst = src1 - src2)
 452         // supports all data types
 453         CV_EXPORTS void subtract(const oclMat &src1, const oclMat &src2, oclMat &dst, const oclMat &mask = oclMat());
 454         //! subtracts scalar from a matrix (dst = src1 - s)
 455         // supports all data types
 456         CV_EXPORTS void subtract(const oclMat &src1, const Scalar &s, oclMat &dst, const oclMat &mask = oclMat());
 457
 458         //! computes element-wise product of the two arrays (dst = src1 * scale * src2)
 459         // supports all data types
 460         CV_EXPORTS void multiply(const oclMat &src1, const oclMat &src2, oclMat &dst, double scale = 1);
 461         //! multiplies matrix to a number (dst = scalar * src)
 462         // supports all data types
 463         CV_EXPORTS void multiply(double scalar, const oclMat &src, oclMat &dst);
 464
 465         //! computes element-wise quotient of the two arrays (dst = src1 * scale / src2)
 466         // supports all data types
 467         CV_EXPORTS void divide(const oclMat &src1, const oclMat &src2, oclMat &dst, double scale = 1);
 468         //! computes element-wise quotient of the two arrays (dst = scale / src)
 469         // supports all data types
 470         CV_EXPORTS void divide(double scale, const oclMat &src1, oclMat &dst);
 471
 472         //! computes element-wise minimum of the two arrays (dst = min(src1, src2))
 473         // supports all data types
 474         CV_EXPORTS void min(const oclMat &src1, const oclMat &src2, oclMat &dst);
 475
 476         //! computes element-wise maximum of the two arrays (dst = max(src1, src2))
 477         // supports all data types
 478         CV_EXPORTS void max(const oclMat &src1, const oclMat &src2, oclMat &dst);
 479
 480         //! compares elements of two arrays (dst = src1 <cmpop> src2)
 481         // supports all data types
 482         CV_EXPORTS void compare(const oclMat &src1, const oclMat &src2, oclMat &dst, int cmpop);
 483
 484         //! transposes the matrix
 485         // supports all data types
 486         CV_EXPORTS void transpose(const oclMat &src, oclMat &dst);
 487
 488         //! computes element-wise absolute values of an array (dst = abs(src))
 489         // supports all data types
 490         CV_EXPORTS void abs(const oclMat &src, oclMat &dst);
 491
 492         //! computes element-wise absolute difference of two arrays (dst = abs(src1 - src2))
 493         // supports all data types
 494         CV_EXPORTS void absdiff(const oclMat &src1, const oclMat &src2, oclMat &dst);
 495         //! computes element-wise absolute difference of array and scalar (dst = abs(src1 - s))
 496         // supports all data types
 497         CV_EXPORTS void absdiff(const oclMat &src1, const Scalar &s, oclMat &dst);
 498
 499         //! computes mean value and standard deviation of all or selected array elements
 500         // supports all data types
 501         CV_EXPORTS void meanStdDev(const oclMat &mtx, Scalar &mean, Scalar &stddev);
 502
 503         //! computes norm of array
 504         // supports NORM_INF, NORM_L1, NORM_L2
 505         // supports all data types
 506         CV_EXPORTS double norm(const oclMat &src1, int normType = NORM_L2);
 507
 508         //! computes norm of the difference between two arrays
 509         // supports NORM_INF, NORM_L1, NORM_L2
 510         // supports all data types
 511         CV_EXPORTS double norm(const oclMat &src1, const oclMat &src2, int normType = NORM_L2);
 512
 513         //! reverses the order of the rows, columns or both in a matrix
 514         // supports all types
 515         CV_EXPORTS void flip(const oclMat &src, oclMat &dst, int flipCode);
 516
 517         //! computes sum of array elements
 518         // support all types
 519         CV_EXPORTS Scalar sum(const oclMat &m);
 520         CV_EXPORTS Scalar absSum(const oclMat &m);
 521         CV_EXPORTS Scalar sqrSum(const oclMat &m);
 522
 523         //! finds global minimum and maximum array elements and returns their values
 524         // support all C1 types
 525         CV_EXPORTS void minMax(const oclMat &src, double *minVal, double *maxVal = 0, const oclMat &mask = oclMat());
 526
 527         //! finds global minimum and maximum array elements and returns their values with locations
 528         // support all C1 types
 529         CV_EXPORTS void minMaxLoc(const oclMat &src, double *minVal, double *maxVal = 0, Point *minLoc = 0, Point *maxLoc = 0,
 530                                   const oclMat &mask = oclMat());
 531
 532         //! counts non-zero array elements
 533         // support all types
 534         CV_EXPORTS int countNonZero(const oclMat &src);
 535
 536         //! transforms 8-bit unsigned integers using lookup table: dst(i)=lut(src(i))
 537         // destination array will have the depth type as lut and the same channels number as source
 538         //It supports 8UC1 8UC4 only
 539         CV_EXPORTS void LUT(const oclMat &src, const oclMat &lut, oclMat &dst);
 540
 541         //! only 8UC1 and 256 bins is supported now
 542         CV_EXPORTS void calcHist(const oclMat &mat_src, oclMat &mat_hist);
 543         //! only 8UC1 and 256 bins is supported now
 544         CV_EXPORTS void equalizeHist(const oclMat &mat_src, oclMat &mat_dst);
 545
 546         //! only 8UC1 is supported now
 547         CV_EXPORTS Ptr<cv::CLAHE> createCLAHE(double clipLimit = 40.0, Size tileGridSize = Size(8, 8));
 548
 549         //! bilateralFilter
 550         // supports 8UC1 8UC4
 551         CV_EXPORTS void bilateralFilter(const oclMat& src, oclMat& dst, int d, double sigmaColor, double sigmaSpace, int borderType=BORDER_DEFAULT);
 552
 553         //! Applies an adaptive bilateral filter to the input image
 554         //  Unlike the usual bilateral filter that uses fixed value for sigmaColor,
 555         //  the adaptive version calculates the local variance in he ksize neighborhood
 556         //  and use this as sigmaColor, for the value filtering. However, the local standard deviation is
 557         //  clamped to the maxSigmaColor.
 558         //  supports 8UC1, 8UC3
 559         CV_EXPORTS void adaptiveBilateralFilter(const oclMat& src, oclMat& dst, Size ksize, double sigmaSpace, double maxSigmaColor=20.0, Point anchor = Point(-1, -1), int borderType=BORDER_DEFAULT);
 560
 561         //! computes exponent of each matrix element (dst = e**src)
 562         // supports only CV_32FC1, CV_64FC1 type
 563         CV_EXPORTS void exp(const oclMat &src, oclMat &dst);
 564
 565         //! computes natural logarithm of absolute value of each matrix element: dst = log(abs(src))
 566         // supports only CV_32FC1, CV_64FC1 type
 567         CV_EXPORTS void log(const oclMat &src, oclMat &dst);
 568
 569         //! computes magnitude of each (x(i), y(i)) vector
 570         // supports only CV_32F, CV_64F type
 571         CV_EXPORTS void magnitude(const oclMat &x, const oclMat &y, oclMat &magnitude);
 572
 573         //! computes angle (angle(i)) of each (x(i), y(i)) vector
 574         // supports only CV_32F, CV_64F type
 575         CV_EXPORTS void phase(const oclMat &x, const oclMat &y, oclMat &angle, bool angleInDegrees = false);
 576
 577         //! the function raises every element of tne input array to p
 578         // support only CV_32F, CV_64F type
 579         CV_EXPORTS void pow(const oclMat &x, double p, oclMat &y);
 580
 581         //! converts Cartesian coordinates to polar
 582         // supports only CV_32F CV_64F type
 583         CV_EXPORTS void cartToPolar(const oclMat &x, const oclMat &y, oclMat &magnitude, oclMat &angle, bool angleInDegrees = false);
 584
 585         //! converts polar coordinates to Cartesian
 586         // supports only CV_32F CV_64F type
 587         CV_EXPORTS void polarToCart(const oclMat &magnitude, const oclMat &angle, oclMat &x, oclMat &y, bool angleInDegrees = false);
 588
 589         //! perfroms per-elements bit-wise inversion
 590         // supports all types
 591         CV_EXPORTS void bitwise_not(const oclMat &src, oclMat &dst);
 592
 593         //! calculates per-element bit-wise disjunction of two arrays
 594         // supports all types
 595         CV_EXPORTS void bitwise_or(const oclMat &src1, const oclMat &src2, oclMat &dst, const oclMat &mask = oclMat());
 596         CV_EXPORTS void bitwise_or(const oclMat &src1, const Scalar &s, oclMat &dst, const oclMat &mask = oclMat());
 597
 598         //! calculates per-element bit-wise conjunction of two arrays
 599         // supports all types
 600         CV_EXPORTS void bitwise_and(const oclMat &src1, const oclMat &src2, oclMat &dst, const oclMat &mask = oclMat());
 601         CV_EXPORTS void bitwise_and(const oclMat &src1, const Scalar &s, oclMat &dst, const oclMat &mask = oclMat());
 602
 603         //! calculates per-element bit-wise "exclusive or" operation
 604         // supports all types
 605         CV_EXPORTS void bitwise_xor(const oclMat &src1, const oclMat &src2, oclMat &dst, const oclMat &mask = oclMat());
 606         CV_EXPORTS void bitwise_xor(const oclMat &src1, const Scalar &s, oclMat &dst, const oclMat &mask = oclMat());
 607
 608         //! Logical operators
 609         CV_EXPORTS oclMat operator ~ (const oclMat &);
 610         CV_EXPORTS oclMat operator | (const oclMat &, const oclMat &);
 611         CV_EXPORTS oclMat operator & (const oclMat &, const oclMat &);
 612         CV_EXPORTS oclMat operator ^ (const oclMat &, const oclMat &);
 613
 614
 615         //! Mathematics operators
 616         CV_EXPORTS oclMatExpr operator + (const oclMat &src1, const oclMat &src2);
 617         CV_EXPORTS oclMatExpr operator - (const oclMat &src1, const oclMat &src2);
 618         CV_EXPORTS oclMatExpr operator * (const oclMat &src1, const oclMat &src2);
 619         CV_EXPORTS oclMatExpr operator / (const oclMat &src1, const oclMat &src2);
 620
 621         //! computes convolution of two images
 622         // support only CV_32FC1 type
 623         CV_EXPORTS void convolve(const oclMat &image, const oclMat &temp1, oclMat &result);
 624
 625         CV_EXPORTS void cvtColor(const oclMat &src, oclMat &dst, int code, int dcn = 0);
 626
 627         //! initializes a scaled identity matrix
 628         CV_EXPORTS void setIdentity(oclMat& src, const Scalar & val = Scalar(1));
 629
 630         //! fills the output array with repeated copies of the input array
 631         CV_EXPORTS void repeat(const oclMat & src, int ny, int nx, oclMat & dst);
 632
 633         //////////////////////////////// Filter Engine ////////////////////////////////
 634
 635         /*!
 636           The Base Class for 1D or Row-wise Filters
 637
 638           This is the base class for linear or non-linear filters that process 1D data.
 639           In particular, such filters are used for the "horizontal" filtering parts in separable filters.
 640           */
 641         class CV_EXPORTS BaseRowFilter_GPU
 642         {
 643         public:
 644             BaseRowFilter_GPU(int ksize_, int anchor_, int bordertype_) : ksize(ksize_), anchor(anchor_), bordertype(bordertype_) {}
 645             virtual ~BaseRowFilter_GPU() {}
 646             virtual void operator()(const oclMat &src, oclMat &dst) = 0;
 647             int ksize, anchor, bordertype;
 648         };
 649
 650         /*!
 651           The Base Class for Column-wise Filters
 652
 653           This is the base class for linear or non-linear filters that process columns of 2D arrays.
 654           Such filters are used for the "vertical" filtering parts in separable filters.
 655           */
 656         class CV_EXPORTS BaseColumnFilter_GPU
 657         {
 658         public:
 659             BaseColumnFilter_GPU(int ksize_, int anchor_, int bordertype_) : ksize(ksize_), anchor(anchor_), bordertype(bordertype_) {}
 660             virtual ~BaseColumnFilter_GPU() {}
 661             virtual void operator()(const oclMat &src, oclMat &dst) = 0;
 662             int ksize, anchor, bordertype;
 663         };
 664
 665         /*!
 666           The Base Class for Non-Separable 2D Filters.
 667
 668           This is the base class for linear or non-linear 2D filters.
 669           */
 670         class CV_EXPORTS BaseFilter_GPU
 671         {
 672         public:
 673             BaseFilter_GPU(const Size &ksize_, const Point &anchor_, const int &borderType_)
 674                 : ksize(ksize_), anchor(anchor_), borderType(borderType_) {}
 675             virtual ~BaseFilter_GPU() {}
 676             virtual void operator()(const oclMat &src, oclMat &dst) = 0;
 677             Size ksize;
 678             Point anchor;
 679             int borderType;
 680         };
 681
 682         /*!
 683           The Base Class for Filter Engine.
 684
 685           The class can be used to apply an arbitrary filtering operation to an image.
 686           It contains all the necessary intermediate buffers.
 687           */
 688         class CV_EXPORTS FilterEngine_GPU
 689         {
 690         public:
 691             virtual ~FilterEngine_GPU() {}
 692
 693             virtual void apply(const oclMat &src, oclMat &dst, Rect roi = Rect(0, 0, -1, -1)) = 0;
 694         };
 695
 696         //! returns the non-separable filter engine with the specified filter
 697         CV_EXPORTS Ptr<FilterEngine_GPU> createFilter2D_GPU(const Ptr<BaseFilter_GPU> filter2D);
 698
 699         //! returns the primitive row filter with the specified kernel
 700         CV_EXPORTS Ptr<BaseRowFilter_GPU> getLinearRowFilter_GPU(int srcType, int bufType, const Mat &rowKernel,
 701                 int anchor = -1, int bordertype = BORDER_DEFAULT);
 702
 703         //! returns the primitive column filter with the specified kernel
 704         CV_EXPORTS Ptr<BaseColumnFilter_GPU> getLinearColumnFilter_GPU(int bufType, int dstType, const Mat &columnKernel,
 705                 int anchor = -1, int bordertype = BORDER_DEFAULT, double delta = 0.0);
 706
 707         //! returns the separable linear filter engine
 708         CV_EXPORTS Ptr<FilterEngine_GPU> createSeparableLinearFilter_GPU(int srcType, int dstType, const Mat &rowKernel,
 709                 const Mat &columnKernel, const Point &anchor = Point(-1, -1), double delta = 0.0, int bordertype = BORDER_DEFAULT, Size imgSize = Size(-1,-1));
 710
 711         //! returns the separable filter engine with the specified filters
 712         CV_EXPORTS Ptr<FilterEngine_GPU> createSeparableFilter_GPU(const Ptr<BaseRowFilter_GPU> &rowFilter,
 713                 const Ptr<BaseColumnFilter_GPU> &columnFilter);
 714
 715         //! returns the Gaussian filter engine
 716         CV_EXPORTS Ptr<FilterEngine_GPU> createGaussianFilter_GPU(int type, Size ksize, double sigma1, double sigma2 = 0, int bordertype = BORDER_DEFAULT, Size imgSize = Size(-1,-1));
 717
 718         //! returns filter engine for the generalized Sobel operator
 719         CV_EXPORTS Ptr<FilterEngine_GPU> createDerivFilter_GPU( int srcType, int dstType, int dx, int dy, int ksize, int borderType = BORDER_DEFAULT, Size imgSize = Size(-1,-1) );
 720
 721         //! applies Laplacian operator to the image
 722         // supports only ksize = 1 and ksize = 3
 723         CV_EXPORTS void Laplacian(const oclMat &src, oclMat &dst, int ddepth, int ksize = 1, double scale = 1,
 724                 double delta=0, int borderType=BORDER_DEFAULT);
 725
 726         //! returns 2D box filter
 727         // dst type must be the same as source type
 728         CV_EXPORTS Ptr<BaseFilter_GPU> getBoxFilter_GPU(int srcType, int dstType,
 729                 const Size &ksize, Point anchor = Point(-1, -1), int borderType = BORDER_DEFAULT);
 730
 731         //! returns box filter engine
 732         CV_EXPORTS Ptr<FilterEngine_GPU> createBoxFilter_GPU(int srcType, int dstType, const Size &ksize,
 733                 const Point &anchor = Point(-1, -1), int borderType = BORDER_DEFAULT);
 734
 735         //! returns 2D filter with the specified kernel
 736         // supports: dst type must be the same as source type
 737         CV_EXPORTS Ptr<BaseFilter_GPU> getLinearFilter_GPU(int srcType, int dstType, const Mat &kernel, const Size &ksize,
 738                 const Point &anchor = Point(-1, -1), int borderType = BORDER_DEFAULT);
 739
 740         //! returns the non-separable linear filter engine
 741         // supports: dst type must be the same as source type
 742         CV_EXPORTS Ptr<FilterEngine_GPU> createLinearFilter_GPU(int srcType, int dstType, const Mat &kernel,
 743                 const Point &anchor = Point(-1, -1), int borderType = BORDER_DEFAULT);
 744
 745         //! smooths the image using the normalized box filter
 746         CV_EXPORTS void boxFilter(const oclMat &src, oclMat &dst, int ddepth, Size ksize,
 747                                   Point anchor = Point(-1, -1), int borderType = BORDER_DEFAULT);
 748
 749         //! returns 2D morphological filter
 750         //! only MORPH_ERODE and MORPH_DILATE are supported
 751         // supports CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4 types
 752         // kernel must have CV_8UC1 type, one rows and cols == ksize.width * ksize.height
 753         CV_EXPORTS Ptr<BaseFilter_GPU> getMorphologyFilter_GPU(int op, int type, const Mat &kernel, const Size &ksize,
 754                 Point anchor = Point(-1, -1));
 755
 756         //! returns morphological filter engine. Only MORPH_ERODE and MORPH_DILATE are supported.
 757         CV_EXPORTS Ptr<FilterEngine_GPU> createMorphologyFilter_GPU(int op, int type, const Mat &kernel,
 758                 const Point &anchor = Point(-1, -1), int iterations = 1);
 759
 760         //! a synonym for normalized box filter
 761         static inline void blur(const oclMat &src, oclMat &dst, Size ksize, Point anchor = Point(-1, -1),
 762                                 int borderType = BORDER_CONSTANT)
 763         {
 764             boxFilter(src, dst, -1, ksize, anchor, borderType);
 765         }
 766
 767         //! applies non-separable 2D linear filter to the image
 768         CV_EXPORTS void filter2D(const oclMat &src, oclMat &dst, int ddepth, const Mat &kernel,
 769                                  Point anchor = Point(-1, -1), double delta = 0.0, int borderType = BORDER_DEFAULT);
 770
 771         //! applies separable 2D linear filter to the image
 772         CV_EXPORTS void sepFilter2D(const oclMat &src, oclMat &dst, int ddepth, const Mat &kernelX, const Mat &kernelY,
 773                                     Point anchor = Point(-1, -1), double delta = 0.0, int bordertype = BORDER_DEFAULT);
 774
 775         //! applies generalized Sobel operator to the image
 776         // dst.type must equalize src.type
 777         // supports data type: CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4
 778         // supports border type: BORDER_CONSTANT, BORDER_REPLICATE, BORDER_REFLECT,BORDER_REFLECT_101
 779         CV_EXPORTS void Sobel(const oclMat &src, oclMat &dst, int ddepth, int dx, int dy, int ksize = 3, double scale = 1, double delta = 0.0, int bordertype = BORDER_DEFAULT);
 780
 781         //! applies the vertical or horizontal Scharr operator to the image
 782         // dst.type must equalize src.type
 783         // supports data type: CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4
 784         // supports border type: BORDER_CONSTANT, BORDER_REPLICATE, BORDER_REFLECT,BORDER_REFLECT_101
 785         CV_EXPORTS void Scharr(const oclMat &src, oclMat &dst, int ddepth, int dx, int dy, double scale = 1, double delta = 0.0, int bordertype = BORDER_DEFAULT);
 786
 787         //! smooths the image using Gaussian filter.
 788         // dst.type must equalize src.type
 789         // supports data type: CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4
 790         // supports border type: BORDER_CONSTANT, BORDER_REPLICATE, BORDER_REFLECT,BORDER_REFLECT_101
 791         CV_EXPORTS void GaussianBlur(const oclMat &src, oclMat &dst, Size ksize, double sigma1, double sigma2 = 0, int bordertype = BORDER_DEFAULT);
 792
 793         //! erodes the image (applies the local minimum operator)
 794         // supports data type: CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4
 795         CV_EXPORTS void erode( const oclMat &src, oclMat &dst, const Mat &kernel, Point anchor = Point(-1, -1), int iterations = 1,
 796
 797                                int borderType = BORDER_CONSTANT, const Scalar &borderValue = morphologyDefaultBorderValue());
 798
 799
 800         //! dilates the image (applies the local maximum operator)
 801         // supports data type: CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4
 802         CV_EXPORTS void dilate( const oclMat &src, oclMat &dst, const Mat &kernel, Point anchor = Point(-1, -1), int iterations = 1,
 803
 804                                 int borderType = BORDER_CONSTANT, const Scalar &borderValue = morphologyDefaultBorderValue());
 805
 806
 807         //! applies an advanced morphological operation to the image
 808         CV_EXPORTS void morphologyEx( const oclMat &src, oclMat &dst, int op, const Mat &kernel, Point anchor = Point(-1, -1), int iterations = 1,
 809
 810                                       int borderType = BORDER_CONSTANT, const Scalar &borderValue = morphologyDefaultBorderValue());
 811
 812
 813         ////////////////////////////// Image processing //////////////////////////////
 814         //! Does mean shift filtering on GPU.
 815         CV_EXPORTS void meanShiftFiltering(const oclMat &src, oclMat &dst, int sp, int sr,
 816                                            TermCriteria criteria = TermCriteria(TermCriteria::MAX_ITER + TermCriteria::EPS, 5, 1));
 817
 818         //! Does mean shift procedure on GPU.
 819         CV_EXPORTS void meanShiftProc(const oclMat &src, oclMat &dstr, oclMat &dstsp, int sp, int sr,
 820                                       TermCriteria criteria = TermCriteria(TermCriteria::MAX_ITER + TermCriteria::EPS, 5, 1));
 821
 822         //! Does mean shift segmentation with elimiation of small regions.
 823         CV_EXPORTS void meanShiftSegmentation(const oclMat &src, Mat &dst, int sp, int sr, int minsize,
 824                                               TermCriteria criteria = TermCriteria(TermCriteria::MAX_ITER + TermCriteria::EPS, 5, 1));
 825
 826         //! applies fixed threshold to the image.
 827         // supports CV_8UC1 and CV_32FC1 data type
 828         // supports threshold type: THRESH_BINARY, THRESH_BINARY_INV, THRESH_TRUNC, THRESH_TOZERO, THRESH_TOZERO_INV
 829         CV_EXPORTS double threshold(const oclMat &src, oclMat &dst, double thresh, double maxVal, int type = THRESH_TRUNC);
 830
 831         //! resizes the image
 832         // Supports INTER_NEAREST, INTER_LINEAR
 833         // supports CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4 types
 834         CV_EXPORTS void resize(const oclMat &src, oclMat &dst, Size dsize, double fx = 0, double fy = 0, int interpolation = INTER_LINEAR);
 835
 836         //! Applies a generic geometrical transformation to an image.
 837
 838         // Supports INTER_NEAREST, INTER_LINEAR.
 839         // Map1 supports CV_16SC2, CV_32FC2  types.
 840         // Src supports CV_8UC1, CV_8UC2, CV_8UC4.
 841         CV_EXPORTS void remap(const oclMat &src, oclMat &dst, oclMat &map1, oclMat &map2, int interpolation, int bordertype, const Scalar &value = Scalar());
 842
 843         //! copies 2D array to a larger destination array and pads borders with user-specifiable constant
 844         // supports CV_8UC1, CV_8UC4, CV_32SC1 types
 845         CV_EXPORTS void copyMakeBorder(const oclMat &src, oclMat &dst, int top, int bottom, int left, int right, int boardtype, const Scalar &value = Scalar());
 846
 847         //! Smoothes image using median filter
 848         // The source 1- or 4-channel image. m should be 3 or 5, the image depth should be CV_8U or CV_32F.
 849         CV_EXPORTS void medianFilter(const oclMat &src, oclMat &dst, int m);
 850
 851         //! warps the image using affine transformation
 852         // Supports INTER_NEAREST, INTER_LINEAR, INTER_CUBIC
 853         // supports CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4 types
 854         CV_EXPORTS void warpAffine(const oclMat &src, oclMat &dst, const Mat &M, Size dsize, int flags = INTER_LINEAR);
 855
 856         //! warps the image using perspective transformation
 857         // Supports INTER_NEAREST, INTER_LINEAR, INTER_CUBIC
 858         // supports CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4 types
 859         CV_EXPORTS void warpPerspective(const oclMat &src, oclMat &dst, const Mat &M, Size dsize, int flags = INTER_LINEAR);
 860
 861         //! computes the integral image and integral for the squared image
 862         // sum will support CV_32S, CV_32F, sqsum - support CV32F, CV_64F
 863         // supports only CV_8UC1 source type
 864         CV_EXPORTS void integral(const oclMat &src, oclMat &sum, oclMat &sqsum, int sdepth=-1 );
 865         CV_EXPORTS void integral(const oclMat &src, oclMat &sum, int sdepth=-1 );
 866         CV_EXPORTS void cornerHarris(const oclMat &src, oclMat &dst, int blockSize, int ksize, double k, int bordertype = cv::BORDER_DEFAULT);
 867         CV_EXPORTS void cornerHarris_dxdy(const oclMat &src, oclMat &dst, oclMat &Dx, oclMat &Dy,
 868             int blockSize, int ksize, double k, int bordertype = cv::BORDER_DEFAULT);
 869         CV_EXPORTS void cornerMinEigenVal(const oclMat &src, oclMat &dst, int blockSize, int ksize, int bordertype = cv::BORDER_DEFAULT);
 870         CV_EXPORTS void cornerMinEigenVal_dxdy(const oclMat &src, oclMat &dst, oclMat &Dx, oclMat &Dy,
 871             int blockSize, int ksize, int bordertype = cv::BORDER_DEFAULT);
 872         /////////////////////////////////// ML ///////////////////////////////////////////
 873
 874         //! Compute closest centers for each lines in source and lable it after center's index
 875         // supports CV_32FC1/CV_32FC2/CV_32FC4 data type
 876         // supports NORM_L1 and NORM_L2 distType
 877         // if indices is provided, only the indexed rows will be calculated and their results are in the same
 878         // order of indices
 879         CV_EXPORTS void distanceToCenters(const oclMat &src, const oclMat &centers, Mat &dists, Mat &labels, int distType = NORM_L2SQR);
 880
 881         //!Does k-means procedure on GPU
 882         // supports CV_32FC1/CV_32FC2/CV_32FC4 data type
 883         CV_EXPORTS double kmeans(const oclMat &src, int K, oclMat &bestLabels,
 884                                      TermCriteria criteria, int attemps, int flags, oclMat &centers);
 885
 886
 887         ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 888         ///////////////////////////////////////////CascadeClassifier//////////////////////////////////////////////////////////////////
 889         ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 890
 891         class CV_EXPORTS_W OclCascadeClassifier : public  cv::CascadeClassifier
 892         {
 893         public:
 894             OclCascadeClassifier() {};
 895             ~OclCascadeClassifier() {};
 896
 897             CvSeq* oclHaarDetectObjects(oclMat &gimg, CvMemStorage *storage, double scaleFactor,
 898                                         int minNeighbors, int flags, CvSize minSize = cvSize(0, 0), CvSize maxSize = cvSize(0, 0));
 899             void detectMultiScale(oclMat &image, CV_OUT std::vector<cv::Rect>& faces,
 900                 double scaleFactor = 1.1, int minNeighbors = 3, int flags = 0,
 901                 Size minSize = Size(), Size maxSize = Size());
 902         };
 903
 904         class CV_EXPORTS OclCascadeClassifierBuf : public  cv::CascadeClassifier
 905         {
 906         public:
 907             OclCascadeClassifierBuf() :
 908                 m_flags(0), initialized(false), m_scaleFactor(0), buffers(NULL) {}
 909
 910             ~OclCascadeClassifierBuf() { release(); }
 911
 912             void detectMultiScale(oclMat &image, CV_OUT std::vector<cv::Rect>& faces,
 913                                   double scaleFactor = 1.1, int minNeighbors = 3, int flags = 0,
 914                                   Size minSize = Size(), Size maxSize = Size());
 915             void release();
 916
 917         private:
 918             void Init(const int rows, const int cols, double scaleFactor, int flags,
 919                       const int outputsz, const size_t localThreads[],
 920                       CvSize minSize, CvSize maxSize);
 921             void CreateBaseBufs(const int datasize, const int totalclassifier, const int flags, const int outputsz);
 922             void CreateFactorRelatedBufs(const int rows, const int cols, const int flags,
 923                                          const double scaleFactor, const size_t localThreads[],
 924                                          CvSize minSize, CvSize maxSize);
 925             void GenResult(CV_OUT std::vector<cv::Rect>& faces, const std::vector<cv::Rect> &rectList, const std::vector<int> &rweights);
 926
 927             int m_rows;
 928             int m_cols;
 929             int m_flags;
 930             int m_loopcount;
 931             int m_nodenum;
 932             bool findBiggestObject;
 933             bool initialized;
 934             double m_scaleFactor;
 935             Size m_minSize;
 936             Size m_maxSize;
 937             vector<CvSize> sizev;
 938             vector<float> scalev;
 939             oclMat gimg1, gsum, gsqsum, gsqsum_t;
 940             void * buffers;
 941         };
 942
 943
 944         /////////////////////////////// Pyramid /////////////////////////////////////
 945         CV_EXPORTS void pyrDown(const oclMat &src, oclMat &dst);
 946
 947         //! upsamples the source image and then smoothes it
 948         CV_EXPORTS void pyrUp(const oclMat &src, oclMat &dst);
 949
 950         //! performs linear blending of two images
 951         //! to avoid accuracy errors sum of weigths shouldn't be very close to zero
 952         // supports only CV_8UC1 source type
 953         CV_EXPORTS void blendLinear(const oclMat &img1, const oclMat &img2, const oclMat &weights1, const oclMat &weights2, oclMat &result);
 954
 955         //! computes vertical sum, supports only CV_32FC1 images
 956         CV_EXPORTS void columnSum(const oclMat &src, oclMat &sum);
 957
 958         ///////////////////////////////////////// match_template /////////////////////////////////////////////////////////////
 959         struct CV_EXPORTS MatchTemplateBuf
 960         {
 961             Size user_block_size;
 962             oclMat imagef, templf;
 963             std::vector<oclMat> images;
 964             std::vector<oclMat> image_sums;
 965             std::vector<oclMat> image_sqsums;
 966         };
 967
 968         //! computes the proximity map for the raster template and the image where the template is searched for
 969         // Supports TM_SQDIFF, TM_SQDIFF_NORMED, TM_CCORR, TM_CCORR_NORMED, TM_CCOEFF, TM_CCOEFF_NORMED for type 8UC1 and 8UC4
 970         // Supports TM_SQDIFF, TM_CCORR for type 32FC1 and 32FC4
 971         CV_EXPORTS void matchTemplate(const oclMat &image, const oclMat &templ, oclMat &result, int method);
 972
 973         //! computes the proximity map for the raster template and the image where the template is searched for
 974         // Supports TM_SQDIFF, TM_SQDIFF_NORMED, TM_CCORR, TM_CCORR_NORMED, TM_CCOEFF, TM_CCOEFF_NORMED for type 8UC1 and 8UC4
 975         // Supports TM_SQDIFF, TM_CCORR for type 32FC1 and 32FC4
 976         CV_EXPORTS void matchTemplate(const oclMat &image, const oclMat &templ, oclMat &result, int method, MatchTemplateBuf &buf);
 977
 978         ///////////////////////////////////////////// Canny /////////////////////////////////////////////
 979         struct CV_EXPORTS CannyBuf;
 980         //! compute edges of the input image using Canny operator
 981         // Support CV_8UC1 only
 982         CV_EXPORTS void Canny(const oclMat &image, oclMat &edges, double low_thresh, double high_thresh, int apperture_size = 3, bool L2gradient = false);
 983         CV_EXPORTS void Canny(const oclMat &image, CannyBuf &buf, oclMat &edges, double low_thresh, double high_thresh, int apperture_size = 3, bool L2gradient = false);
 984         CV_EXPORTS void Canny(const oclMat &dx, const oclMat &dy, oclMat &edges, double low_thresh, double high_thresh, bool L2gradient = false);
 985         CV_EXPORTS void Canny(const oclMat &dx, const oclMat &dy, CannyBuf &buf, oclMat &edges, double low_thresh, double high_thresh, bool L2gradient = false);
 986
 987         struct CV_EXPORTS CannyBuf
 988         {
 989             CannyBuf() : counter(1, 1, CV_32S) { }
 990             ~CannyBuf()
 991             {
 992                 release();
 993             }
 994             explicit CannyBuf(const Size &image_size, int apperture_size = 3) : counter(1, 1, CV_32S)
 995             {
 996                 create(image_size, apperture_size);
 997             }
 998             CannyBuf(const oclMat &dx_, const oclMat &dy_);
 999
1000             void create(const Size &image_size, int apperture_size = 3);
1001             void release();
1002             oclMat dx, dy;
1003             oclMat dx_buf, dy_buf;
1004             oclMat edgeBuf;
1005             oclMat trackBuf1, trackBuf2;
1006             oclMat counter;
1007             Ptr<FilterEngine_GPU> filterDX, filterDY;
1008         };
1009
1010         ///////////////////////////////////////// clAmdFft related /////////////////////////////////////////
1011         //! Performs a forward or inverse discrete Fourier transform (1D or 2D) of floating point matrix.
1012         //! Param dft_size is the size of DFT transform.
1013         //!
1014         //! For complex-to-real transform it is assumed that the source matrix is packed in CLFFT's format.
1015         // support src type of CV32FC1, CV32FC2
1016         // support flags: DFT_INVERSE, DFT_REAL_OUTPUT, DFT_COMPLEX_OUTPUT, DFT_ROWS
1017         // dft_size is the size of original input, which is used for transformation from complex to real.
1018         // dft_size must be powers of 2, 3 and 5
1019         // real to complex dft requires at least v1.8 clAmdFft
1020         // real to complex dft output is not the same with cpu version
1021         // real to complex and complex to real does not support DFT_ROWS
1022         CV_EXPORTS void dft(const oclMat &src, oclMat &dst, Size dft_size = Size(), int flags = 0);
1023
1024         //! implements generalized matrix product algorithm GEMM from BLAS
1025         // The functionality requires clAmdBlas library
1026         // only support type CV_32FC1
1027         // flag GEMM_3_T is not supported
1028         CV_EXPORTS void gemm(const oclMat &src1, const oclMat &src2, double alpha,
1029                              const oclMat &src3, double beta, oclMat &dst, int flags = 0);
1030
1031         //////////////// HOG (Histogram-of-Oriented-Gradients) Descriptor and Object Detector //////////////
1032         struct CV_EXPORTS HOGDescriptor
1033         {
1034             enum { DEFAULT_WIN_SIGMA = -1 };
1035             enum { DEFAULT_NLEVELS = 64 };
1036             enum { DESCR_FORMAT_ROW_BY_ROW, DESCR_FORMAT_COL_BY_COL };
1037             HOGDescriptor(Size win_size = Size(64, 128), Size block_size = Size(16, 16),
1038                           Size block_stride = Size(8, 8), Size cell_size = Size(8, 8),
1039                           int nbins = 9, double win_sigma = DEFAULT_WIN_SIGMA,
1040                           double threshold_L2hys = 0.2, bool gamma_correction = true,
1041                           int nlevels = DEFAULT_NLEVELS);
1042
1043             size_t getDescriptorSize() const;
1044             size_t getBlockHistogramSize() const;
1045             void setSVMDetector(const vector<float> &detector);
1046             static vector<float> getDefaultPeopleDetector();
1047             static vector<float> getPeopleDetector48x96();
1048             static vector<float> getPeopleDetector64x128();
1049             void detect(const oclMat &img, vector<Point> &found_locations,
1050                         double hit_threshold = 0, Size win_stride = Size(),
1051                         Size padding = Size());
1052             void detectMultiScale(const oclMat &img, vector<Rect> &found_locations,
1053                                   double hit_threshold = 0, Size win_stride = Size(),
1054                                   Size padding = Size(), double scale0 = 1.05,
1055                                   int group_threshold = 2);
1056             void getDescriptors(const oclMat &img, Size win_stride,
1057                                 oclMat &descriptors,
1058                                 int descr_format = DESCR_FORMAT_COL_BY_COL);
1059             Size win_size;
1060             Size block_size;
1061             Size block_stride;
1062             Size cell_size;
1063
1064             int nbins;
1065             double win_sigma;
1066             double threshold_L2hys;
1067             bool gamma_correction;
1068             int nlevels;
1069
1070         protected:
1071             // initialize buffers; only need to do once in case of multiscale detection
1072             void init_buffer(const oclMat &img, Size win_stride);
1073             void computeBlockHistograms(const oclMat &img);
1074             void computeGradient(const oclMat &img, oclMat &grad, oclMat &qangle);
1075             double getWinSigma() const;
1076             bool checkDetectorSize() const;
1077
1078             static int numPartsWithin(int size, int part_size, int stride);
1079             static Size numPartsWithin(Size size, Size part_size, Size stride);
1080
1081             // Coefficients of the separating plane
1082             float free_coef;
1083             oclMat detector;
1084             // Results of the last classification step
1085             oclMat labels;
1086             Mat labels_host;
1087             // Results of the last histogram evaluation step
1088             oclMat block_hists;
1089             // Gradients conputation results
1090             oclMat grad, qangle;
1091             // scaled image
1092             oclMat image_scale;
1093             // effect size of input image (might be different from original size after scaling)
1094             Size effect_size;
1095         };
1096
1097
1098         ////////////////////////feature2d_ocl/////////////////
1099         /****************************************************************************************\
1100         *                                      Distance                                          *
1101         \****************************************************************************************/
1102         template<typename T>
1103         struct CV_EXPORTS Accumulator
1104         {
1105             typedef T Type;
1106         };
1107         template<> struct Accumulator<unsigned char>
1108         {
1109             typedef float Type;
1110         };
1111         template<> struct Accumulator<unsigned short>
1112         {
1113             typedef float Type;
1114         };
1115         template<> struct Accumulator<char>
1116         {
1117             typedef float Type;
1118         };
1119         template<> struct Accumulator<short>
1120         {
1121             typedef float Type;
1122         };
1123
1124         /*
1125          * Manhattan distance (city block distance) functor
1126          */
1127         template<class T>
1128         struct CV_EXPORTS L1
1129         {
1130             enum { normType = NORM_L1 };
1131             typedef T ValueType;
1132             typedef typename Accumulator<T>::Type ResultType;
1133
1134             ResultType operator()( const T *a, const T *b, int size ) const
1135             {
1136                 return normL1<ValueType, ResultType>(a, b, size);
1137             }
1138         };
1139
1140         /*
1141          * Euclidean distance functor
1142          */
1143         template<class T>
1144         struct CV_EXPORTS L2
1145         {
1146             enum { normType = NORM_L2 };
1147             typedef T ValueType;
1148             typedef typename Accumulator<T>::Type ResultType;
1149
1150             ResultType operator()( const T *a, const T *b, int size ) const
1151             {
1152                 return (ResultType)sqrt((double)normL2Sqr<ValueType, ResultType>(a, b, size));
1153             }
1154         };
1155
1156         /*
1157          * Hamming distance functor - counts the bit differences between two strings - useful for the Brief descriptor
1158          * bit count of A exclusive XOR'ed with B
1159          */
1160         struct CV_EXPORTS Hamming
1161         {
1162             enum { normType = NORM_HAMMING };
1163             typedef unsigned char ValueType;
1164             typedef int ResultType;
1165
1166             /** this will count the bits in a ^ b
1167              */
1168             ResultType operator()( const unsigned char *a, const unsigned char *b, int size ) const
1169             {
1170                 return normHamming(a, b, size);
1171             }
1172         };
1173
1174         ////////////////////////////////// BruteForceMatcher //////////////////////////////////
1175
1176         class CV_EXPORTS BruteForceMatcher_OCL_base
1177         {
1178         public:
1179             enum DistType {L1Dist = 0, L2Dist, HammingDist};
1180             explicit BruteForceMatcher_OCL_base(DistType distType = L2Dist);
1181             // Add descriptors to train descriptor collection
1182             void add(const std::vector<oclMat> &descCollection);
1183             // Get train descriptors collection
1184             const std::vector<oclMat> &getTrainDescriptors() const;
1185             // Clear train descriptors collection
1186             void clear();
1187             // Return true if there are not train descriptors in collection
1188             bool empty() const;
1189
1190             // Return true if the matcher supports mask in match methods
1191             bool isMaskSupported() const;
1192
1193             // Find one best match for each query descriptor
1194             void matchSingle(const oclMat &query, const oclMat &train,
1195                              oclMat &trainIdx, oclMat &distance,
1196                              const oclMat &mask = oclMat());
1197
1198             // Download trainIdx and distance and convert it to CPU vector with DMatch
1199             static void matchDownload(const oclMat &trainIdx, const oclMat &distance, std::vector<DMatch> &matches);
1200             // Convert trainIdx and distance to vector with DMatch
1201             static void matchConvert(const Mat &trainIdx, const Mat &distance, std::vector<DMatch> &matches);
1202
1203             // Find one best match for each query descriptor
1204             void match(const oclMat &query, const oclMat &train, std::vector<DMatch> &matches, const oclMat &mask = oclMat());
1205
1206             // Make gpu collection of trains and masks in suitable format for matchCollection function
1207             void makeGpuCollection(oclMat &trainCollection, oclMat &maskCollection, const std::vector<oclMat> &masks = std::vector<oclMat>());
1208
1209
1210             // Find one best match from train collection for each query descriptor
1211             void matchCollection(const oclMat &query, const oclMat &trainCollection,
1212                                  oclMat &trainIdx, oclMat &imgIdx, oclMat &distance,
1213                                  const oclMat &masks = oclMat());
1214
1215             // Download trainIdx, imgIdx and distance and convert it to vector with DMatch
1216             static void matchDownload(const oclMat &trainIdx, const oclMat &imgIdx, const oclMat &distance, std::vector<DMatch> &matches);
1217             // Convert trainIdx, imgIdx and distance to vector with DMatch
1218             static void matchConvert(const Mat &trainIdx, const Mat &imgIdx, const Mat &distance, std::vector<DMatch> &matches);
1219
1220             // Find one best match from train collection for each query descriptor.
1221             void match(const oclMat &query, std::vector<DMatch> &matches, const std::vector<oclMat> &masks = std::vector<oclMat>());
1222
1223             // Find k best matches for each query descriptor (in increasing order of distances)
1224             void knnMatchSingle(const oclMat &query, const oclMat &train,
1225                                 oclMat &trainIdx, oclMat &distance, oclMat &allDist, int k,
1226                                 const oclMat &mask = oclMat());
1227
1228             // Download trainIdx and distance and convert it to vector with DMatch
1229             // compactResult is used when mask is not empty. If compactResult is false matches
1230             // vector will have the same size as queryDescriptors rows. If compactResult is true
1231             // matches vector will not contain matches for fully masked out query descriptors.
1232             static void knnMatchDownload(const oclMat &trainIdx, const oclMat &distance,
1233                                          std::vector< std::vector<DMatch> > &matches, bool compactResult = false);
1234
1235             // Convert trainIdx and distance to vector with DMatch
1236             static void knnMatchConvert(const Mat &trainIdx, const Mat &distance,
1237                                         std::vector< std::vector<DMatch> > &matches, bool compactResult = false);
1238
1239             // Find k best matches for each query descriptor (in increasing order of distances).
1240             // compactResult is used when mask is not empty. If compactResult is false matches
1241             // vector will have the same size as queryDescriptors rows. If compactResult is true
1242             // matches vector will not contain matches for fully masked out query descriptors.
1243             void knnMatch(const oclMat &query, const oclMat &train,
1244                           std::vector< std::vector<DMatch> > &matches, int k, const oclMat &mask = oclMat(),
1245                           bool compactResult = false);
1246
1247             // Find k best matches from train collection for each query descriptor (in increasing order of distances)
1248             void knnMatch2Collection(const oclMat &query, const oclMat &trainCollection,
1249                                      oclMat &trainIdx, oclMat &imgIdx, oclMat &distance,
1250                                      const oclMat &maskCollection = oclMat());
1251
1252             // Download trainIdx and distance and convert it to vector with DMatch
1253             // compactResult is used when mask is not empty. If compactResult is false matches
1254             // vector will have the same size as queryDescriptors rows. If compactResult is true
1255             // matches vector will not contain matches for fully masked out query descriptors.
1256             static void knnMatch2Download(const oclMat &trainIdx, const oclMat &imgIdx, const oclMat &distance,
1257                                           std::vector< std::vector<DMatch> > &matches, bool compactResult = false);
1258
1259             // Convert trainIdx and distance to vector with DMatch
1260             static void knnMatch2Convert(const Mat &trainIdx, const Mat &imgIdx, const Mat &distance,
1261                                          std::vector< std::vector<DMatch> > &matches, bool compactResult = false);
1262
1263             // Find k best matches  for each query descriptor (in increasing order of distances).
1264             // compactResult is used when mask is not empty. If compactResult is false matches
1265             // vector will have the same size as queryDescriptors rows. If compactResult is true
1266             // matches vector will not contain matches for fully masked out query descriptors.
1267             void knnMatch(const oclMat &query, std::vector< std::vector<DMatch> > &matches, int k,
1268                           const std::vector<oclMat> &masks = std::vector<oclMat>(), bool compactResult = false);
1269
1270             // Find best matches for each query descriptor which have distance less than maxDistance.
1271             // nMatches.at<int>(0, queryIdx) will contain matches count for queryIdx.
1272             // carefully nMatches can be greater than trainIdx.cols - it means that matcher didn't find all matches,
1273             // because it didn't have enough memory.
1274             // If trainIdx is empty, then trainIdx and distance will be created with size nQuery x max((nTrain / 100), 10),
1275             // otherwize user can pass own allocated trainIdx and distance with size nQuery x nMaxMatches
1276             // Matches doesn't sorted.
1277             void radiusMatchSingle(const oclMat &query, const oclMat &train,
1278                                    oclMat &trainIdx, oclMat &distance, oclMat &nMatches, float maxDistance,
1279                                    const oclMat &mask = oclMat());
1280
1281             // Download trainIdx, nMatches and distance and convert it to vector with DMatch.
1282             // matches will be sorted in increasing order of distances.
1283             // compactResult is used when mask is not empty. If compactResult is false matches
1284             // vector will have the same size as queryDescriptors rows. If compactResult is true
1285             // matches vector will not contain matches for fully masked out query descriptors.
1286             static void radiusMatchDownload(const oclMat &trainIdx, const oclMat &distance, const oclMat &nMatches,
1287                                             std::vector< std::vector<DMatch> > &matches, bool compactResult = false);
1288             // Convert trainIdx, nMatches and distance to vector with DMatch.
1289             static void radiusMatchConvert(const Mat &trainIdx, const Mat &distance, const Mat &nMatches,
1290                                            std::vector< std::vector<DMatch> > &matches, bool compactResult = false);
1291             // Find best matches for each query descriptor which have distance less than maxDistance
1292             // in increasing order of distances).
1293             void radiusMatch(const oclMat &query, const oclMat &train,
1294                              std::vector< std::vector<DMatch> > &matches, float maxDistance,
1295                              const oclMat &mask = oclMat(), bool compactResult = false);
1296             // Find best matches for each query descriptor which have distance less than maxDistance.
1297             // If trainIdx is empty, then trainIdx and distance will be created with size nQuery x max((nQuery / 100), 10),
1298             // otherwize user can pass own allocated trainIdx and distance with size nQuery x nMaxMatches
1299             // Matches doesn't sorted.
1300             void radiusMatchCollection(const oclMat &query, oclMat &trainIdx, oclMat &imgIdx, oclMat &distance, oclMat &nMatches, float maxDistance,
1301                                        const std::vector<oclMat> &masks = std::vector<oclMat>());
1302             // Download trainIdx, imgIdx, nMatches and distance and convert it to vector with DMatch.
1303             // matches will be sorted in increasing order of distances.
1304             // compactResult is used when mask is not empty. If compactResult is false matches
1305             // vector will have the same size as queryDescriptors rows. If compactResult is true
1306             // matches vector will not contain matches for fully masked out query descriptors.
1307             static void radiusMatchDownload(const oclMat &trainIdx, const oclMat &imgIdx, const oclMat &distance, const oclMat &nMatches,
1308                                             std::vector< std::vector<DMatch> > &matches, bool compactResult = false);
1309             // Convert trainIdx, nMatches and distance to vector with DMatch.
1310             static void radiusMatchConvert(const Mat &trainIdx, const Mat &imgIdx, const Mat &distance, const Mat &nMatches,
1311                                            std::vector< std::vector<DMatch> > &matches, bool compactResult = false);
1312             // Find best matches from train collection for each query descriptor which have distance less than
1313             // maxDistance (in increasing order of distances).
1314             void radiusMatch(const oclMat &query, std::vector< std::vector<DMatch> > &matches, float maxDistance,
1315                              const std::vector<oclMat> &masks = std::vector<oclMat>(), bool compactResult = false);
1316             DistType distType;
1317         private:
1318             std::vector<oclMat> trainDescCollection;
1319         };
1320
1321         template <class Distance>
1322         class CV_EXPORTS BruteForceMatcher_OCL;
1323
1324         template <typename T>
1325         class CV_EXPORTS BruteForceMatcher_OCL< L1<T> > : public BruteForceMatcher_OCL_base
1326         {
1327         public:
1328             explicit BruteForceMatcher_OCL() : BruteForceMatcher_OCL_base(L1Dist) {}
1329             explicit BruteForceMatcher_OCL(L1<T> /*d*/) : BruteForceMatcher_OCL_base(L1Dist) {}
1330         };
1331
1332         template <typename T>
1333         class CV_EXPORTS BruteForceMatcher_OCL< L2<T> > : public BruteForceMatcher_OCL_base
1334         {
1335         public:
1336             explicit BruteForceMatcher_OCL() : BruteForceMatcher_OCL_base(L2Dist) {}
1337             explicit BruteForceMatcher_OCL(L2<T> /*d*/) : BruteForceMatcher_OCL_base(L2Dist) {}
1338         };
1339
1340         template <> class CV_EXPORTS BruteForceMatcher_OCL< Hamming > : public BruteForceMatcher_OCL_base
1341         {
1342         public:
1343             explicit BruteForceMatcher_OCL() : BruteForceMatcher_OCL_base(HammingDist) {}
1344             explicit BruteForceMatcher_OCL(Hamming /*d*/) : BruteForceMatcher_OCL_base(HammingDist) {}
1345         };
1346
1347         class CV_EXPORTS BFMatcher_OCL : public BruteForceMatcher_OCL_base
1348         {
1349         public:
1350             explicit BFMatcher_OCL(int norm = NORM_L2) : BruteForceMatcher_OCL_base(norm == NORM_L1 ? L1Dist : norm == NORM_L2 ? L2Dist : HammingDist) {}
1351         };
1352
1353         class CV_EXPORTS GoodFeaturesToTrackDetector_OCL
1354         {
1355         public:
1356             explicit GoodFeaturesToTrackDetector_OCL(int maxCorners = 1000, double qualityLevel = 0.01, double minDistance = 0.0,
1357                 int blockSize = 3, bool useHarrisDetector = false, double harrisK = 0.04);
1358
1359             //! return 1 rows matrix with CV_32FC2 type
1360             void operator ()(const oclMat& image, oclMat& corners, const oclMat& mask = oclMat());
1361             //! download points of type Point2f to a vector. the vector's content will be erased
1362             void downloadPoints(const oclMat &points, vector<Point2f> &points_v);
1363
1364             int maxCorners;
1365             double qualityLevel;
1366             double minDistance;
1367
1368             int blockSize;
1369             bool useHarrisDetector;
1370             double harrisK;
1371             void releaseMemory()
1372             {
1373                 Dx_.release();
1374                 Dy_.release();
1375                 eig_.release();
1376                 minMaxbuf_.release();
1377                 tmpCorners_.release();
1378             }
1379         private:
1380             oclMat Dx_;
1381             oclMat Dy_;
1382             oclMat eig_;
1383             oclMat eig_minmax_;
1384             oclMat minMaxbuf_;
1385             oclMat tmpCorners_;
1386             oclMat counter_;
1387         };
1388
1389         inline GoodFeaturesToTrackDetector_OCL::GoodFeaturesToTrackDetector_OCL(int maxCorners_, double qualityLevel_, double minDistance_,
1390             int blockSize_, bool useHarrisDetector_, double harrisK_)
1391         {
1392             maxCorners = maxCorners_;
1393             qualityLevel = qualityLevel_;
1394             minDistance = minDistance_;
1395             blockSize = blockSize_;
1396             useHarrisDetector = useHarrisDetector_;
1397             harrisK = harrisK_;
1398         }
1399
1400         /////////////////////////////// PyrLKOpticalFlow /////////////////////////////////////
1401         class CV_EXPORTS PyrLKOpticalFlow
1402         {
1403         public:
1404             PyrLKOpticalFlow()
1405             {
1406                 winSize = Size(21, 21);
1407                 maxLevel = 3;
1408                 iters = 30;
1409                 derivLambda = 0.5;
1410                 useInitialFlow = false;
1411                 minEigThreshold = 1e-4f;
1412                 getMinEigenVals = false;
1413                 isDeviceArch11_ = false;
1414             }
1415
1416             void sparse(const oclMat &prevImg, const oclMat &nextImg, const oclMat &prevPts, oclMat &nextPts,
1417                         oclMat &status, oclMat *err = 0);
1418             void dense(const oclMat &prevImg, const oclMat &nextImg, oclMat &u, oclMat &v, oclMat *err = 0);
1419             Size winSize;
1420             int maxLevel;
1421             int iters;
1422             double derivLambda;
1423             bool useInitialFlow;
1424             float minEigThreshold;
1425             bool getMinEigenVals;
1426             void releaseMemory()
1427             {
1428                 dx_calcBuf_.release();
1429                 dy_calcBuf_.release();
1430
1431                 prevPyr_.clear();
1432                 nextPyr_.clear();
1433
1434                 dx_buf_.release();
1435                 dy_buf_.release();
1436             }
1437         private:
1438             void calcSharrDeriv(const oclMat &src, oclMat &dx, oclMat &dy);
1439             void buildImagePyramid(const oclMat &img0, vector<oclMat> &pyr, bool withBorder);
1440
1441             oclMat dx_calcBuf_;
1442             oclMat dy_calcBuf_;
1443
1444             vector<oclMat> prevPyr_;
1445             vector<oclMat> nextPyr_;
1446
1447             oclMat dx_buf_;
1448             oclMat dy_buf_;
1449             oclMat uPyr_[2];
1450             oclMat vPyr_[2];
1451             bool isDeviceArch11_;
1452         };
1453
1454         class CV_EXPORTS FarnebackOpticalFlow
1455         {
1456         public:
1457             FarnebackOpticalFlow();
1458
1459             int numLevels;
1460             double pyrScale;
1461             bool fastPyramids;
1462             int winSize;
1463             int numIters;
1464             int polyN;
1465             double polySigma;
1466             int flags;
1467
1468             void operator ()(const oclMat &frame0, const oclMat &frame1, oclMat &flowx, oclMat &flowy);
1469
1470             void releaseMemory();
1471
1472         private:
1473             void prepareGaussian(
1474                 int n, double sigma, float *g, float *xg, float *xxg,
1475                 double &ig11, double &ig03, double &ig33, double &ig55);
1476
1477             void setPolynomialExpansionConsts(int n, double sigma);
1478
1479             void updateFlow_boxFilter(
1480                 const oclMat& R0, const oclMat& R1, oclMat& flowx, oclMat &flowy,
1481                 oclMat& M, oclMat &bufM, int blockSize, bool updateMatrices);
1482
1483             void updateFlow_gaussianBlur(
1484                 const oclMat& R0, const oclMat& R1, oclMat& flowx, oclMat& flowy,
1485                 oclMat& M, oclMat &bufM, int blockSize, bool updateMatrices);
1486
1487             oclMat frames_[2];
1488             oclMat pyrLevel_[2], M_, bufM_, R_[2], blurredFrame_[2];
1489             std::vector<oclMat> pyramid0_, pyramid1_;
1490         };
1491
1492         //////////////// build warping maps ////////////////////
1493         //! builds plane warping maps
1494         CV_EXPORTS void buildWarpPlaneMaps(Size src_size, Rect dst_roi, const Mat &K, const Mat &R, const Mat &T, float scale, oclMat &map_x, oclMat &map_y);
1495         //! builds cylindrical warping maps
1496         CV_EXPORTS void buildWarpCylindricalMaps(Size src_size, Rect dst_roi, const Mat &K, const Mat &R, float scale, oclMat &map_x, oclMat &map_y);
1497         //! builds spherical warping maps
1498         CV_EXPORTS void buildWarpSphericalMaps(Size src_size, Rect dst_roi, const Mat &K, const Mat &R, float scale, oclMat &map_x, oclMat &map_y);
1499         //! builds Affine warping maps
1500         CV_EXPORTS void buildWarpAffineMaps(const Mat &M, bool inverse, Size dsize, oclMat &xmap, oclMat &ymap);
1501
1502         //! builds Perspective warping maps
1503         CV_EXPORTS void buildWarpPerspectiveMaps(const Mat &M, bool inverse, Size dsize, oclMat &xmap, oclMat &ymap);
1504
1505         ///////////////////////////////////// interpolate frames //////////////////////////////////////////////
1506         //! Interpolate frames (images) using provided optical flow (displacement field).
1507         //! frame0   - frame 0 (32-bit floating point images, single channel)
1508         //! frame1   - frame 1 (the same type and size)
1509         //! fu       - forward horizontal displacement
1510         //! fv       - forward vertical displacement
1511         //! bu       - backward horizontal displacement
1512         //! bv       - backward vertical displacement
1513         //! pos      - new frame position
1514         //! newFrame - new frame
1515         //! buf      - temporary buffer, will have width x 6*height size, CV_32FC1 type and contain 6 oclMat;
1516         //!            occlusion masks            0, occlusion masks            1,
1517         //!            interpolated forward flow  0, interpolated forward flow  1,
1518         //!            interpolated backward flow 0, interpolated backward flow 1
1519         //!
1520         CV_EXPORTS void interpolateFrames(const oclMat &frame0, const oclMat &frame1,
1521                                           const oclMat &fu, const oclMat &fv,
1522                                           const oclMat &bu, const oclMat &bv,
1523                                           float pos, oclMat &newFrame, oclMat &buf);
1524
1525         //! computes moments of the rasterized shape or a vector of points
1526         //! _array should be a vector a points standing for the contour
1527         CV_EXPORTS Moments ocl_moments(InputArray contour);
1528         //! src should be a general image uploaded to the GPU.
1529         //! the supported oclMat type are CV_8UC1, CV_16UC1, CV_16SC1, CV_32FC1 and CV_64FC1
1530         //! to use type of CV_64FC1, the GPU should support CV_64FC1
1531         CV_EXPORTS Moments ocl_moments(oclMat& src, bool binary);
1532
1533         class CV_EXPORTS StereoBM_OCL
1534         {
1535         public:
1536             enum { BASIC_PRESET = 0, PREFILTER_XSOBEL = 1 };
1537
1538             enum { DEFAULT_NDISP = 64, DEFAULT_WINSZ = 19 };
1539
1540             //! the default constructor
1541             StereoBM_OCL();
1542             //! the full constructor taking the camera-specific preset, number of disparities and the SAD window size. ndisparities must be multiple of 8.
1543             StereoBM_OCL(int preset, int ndisparities = DEFAULT_NDISP, int winSize = DEFAULT_WINSZ);
1544
1545             //! the stereo correspondence operator. Finds the disparity for the specified rectified stereo pair
1546             //! Output disparity has CV_8U type.
1547             void operator() ( const oclMat &left, const oclMat &right, oclMat &disparity);
1548
1549             //! Some heuristics that tries to estmate
1550             // if current GPU will be faster then CPU in this algorithm.
1551             // It queries current active device.
1552             static bool checkIfGpuCallReasonable();
1553
1554             int preset;
1555             int ndisp;
1556             int winSize;
1557
1558             // If avergeTexThreshold  == 0 => post procesing is disabled
1559             // If avergeTexThreshold != 0 then disparity is set 0 in each point (x,y) where for left image
1560             // SumOfHorizontalGradiensInWindow(x, y, winSize) < (winSize * winSize) * avergeTexThreshold
1561             // i.e. input left image is low textured.
1562             float avergeTexThreshold;
1563         private:
1564             oclMat minSSD, leBuf, riBuf;
1565         };
1566
1567         class CV_EXPORTS StereoBeliefPropagation
1568         {
1569         public:
1570             enum { DEFAULT_NDISP  = 64 };
1571             enum { DEFAULT_ITERS  = 5  };
1572             enum { DEFAULT_LEVELS = 5  };
1573             static void estimateRecommendedParams(int width, int height, int &ndisp, int &iters, int &levels);
1574             explicit StereoBeliefPropagation(int ndisp  = DEFAULT_NDISP,
1575                                              int iters  = DEFAULT_ITERS,
1576                                              int levels = DEFAULT_LEVELS,
1577                                              int msg_type = CV_16S);
1578             StereoBeliefPropagation(int ndisp, int iters, int levels,
1579                                     float max_data_term, float data_weight,
1580                                     float max_disc_term, float disc_single_jump,
1581                                     int msg_type = CV_32F);
1582             void operator()(const oclMat &left, const oclMat &right, oclMat &disparity);
1583             void operator()(const oclMat &data, oclMat &disparity);
1584             int ndisp;
1585             int iters;
1586             int levels;
1587             float max_data_term;
1588             float data_weight;
1589             float max_disc_term;
1590             float disc_single_jump;
1591             int msg_type;
1592         private:
1593             oclMat u, d, l, r, u2, d2, l2, r2;
1594             std::vector<oclMat> datas;
1595             oclMat out;
1596         };
1597
1598         class CV_EXPORTS StereoConstantSpaceBP
1599         {
1600         public:
1601             enum { DEFAULT_NDISP    = 128 };
1602             enum { DEFAULT_ITERS    = 8   };
1603             enum { DEFAULT_LEVELS   = 4   };
1604             enum { DEFAULT_NR_PLANE = 4   };
1605             static void estimateRecommendedParams(int width, int height, int &ndisp, int &iters, int &levels, int &nr_plane);
1606             explicit StereoConstantSpaceBP(
1607                 int ndisp    = DEFAULT_NDISP,
1608                 int iters    = DEFAULT_ITERS,
1609                 int levels   = DEFAULT_LEVELS,
1610                 int nr_plane = DEFAULT_NR_PLANE,
1611                 int msg_type = CV_32F);
1612             StereoConstantSpaceBP(int ndisp, int iters, int levels, int nr_plane,
1613                 float max_data_term, float data_weight, float max_disc_term, float disc_single_jump,
1614                 int min_disp_th = 0,
1615                 int msg_type = CV_32F);
1616             void operator()(const oclMat &left, const oclMat &right, oclMat &disparity);
1617             int ndisp;
1618             int iters;
1619             int levels;
1620             int nr_plane;
1621             float max_data_term;
1622             float data_weight;
1623             float max_disc_term;
1624             float disc_single_jump;
1625             int min_disp_th;
1626             int msg_type;
1627             bool use_local_init_data_cost;
1628         private:
1629             oclMat u[2], d[2], l[2], r[2];
1630             oclMat disp_selected_pyr[2];
1631             oclMat data_cost;
1632             oclMat data_cost_selected;
1633             oclMat temp;
1634             oclMat out;
1635         };
1636
1637         // Implementation of the Zach, Pock and Bischof Dual TV-L1 Optical Flow method
1638         //
1639         // see reference:
1640         //   [1] C. Zach, T. Pock and H. Bischof, "A Duality Based Approach for Realtime TV-L1 Optical Flow".
1641         //   [2] Javier Sanchez, Enric Meinhardt-Llopis and Gabriele Facciolo. "TV-L1 Optical Flow Estimation".
1642         class CV_EXPORTS OpticalFlowDual_TVL1_OCL
1643         {
1644         public:
1645             OpticalFlowDual_TVL1_OCL();
1646
1647             void operator ()(const oclMat& I0, const oclMat& I1, oclMat& flowx, oclMat& flowy);
1648
1649             void collectGarbage();
1650
1651             /**
1652             * Time step of the numerical scheme.
1653             */
1654             double tau;
1655
1656             /**
1657             * Weight parameter for the data term, attachment parameter.
1658             * This is the most relevant parameter, which determines the smoothness of the output.
1659             * The smaller this parameter is, the smoother the solutions we obtain.
1660             * It depends on the range of motions of the images, so its value should be adapted to each image sequence.
1661             */
1662             double lambda;
1663
1664             /**
1665             * Weight parameter for (u - v)^2, tightness parameter.
1666             * It serves as a link between the attachment and the regularization terms.
1667             * In theory, it should have a small value in order to maintain both parts in correspondence.
1668             * The method is stable for a large range of values of this parameter.
1669             */
1670             double theta;
1671
1672             /**
1673             * Number of scales used to create the pyramid of images.
1674             */
1675             int nscales;
1676
1677             /**
1678             * Number of warpings per scale.
1679             * Represents the number of times that I1(x+u0) and grad( I1(x+u0) ) are computed per scale.
1680             * This is a parameter that assures the stability of the method.
1681             * It also affects the running time, so it is a compromise between speed and accuracy.
1682             */
1683             int warps;
1684
1685             /**
1686             * Stopping criterion threshold used in the numerical scheme, which is a trade-off between precision and running time.
1687             * A small value will yield more accurate solutions at the expense of a slower convergence.
1688             */
1689             double epsilon;
1690
1691             /**
1692             * Stopping criterion iterations number used in the numerical scheme.
1693             */
1694             int iterations;
1695
1696             bool useInitialFlow;
1697
1698         private:
1699             void procOneScale(const oclMat& I0, const oclMat& I1, oclMat& u1, oclMat& u2);
1700
1701             std::vector<oclMat> I0s;
1702             std::vector<oclMat> I1s;
1703             std::vector<oclMat> u1s;
1704             std::vector<oclMat> u2s;
1705
1706             oclMat I1x_buf;
1707             oclMat I1y_buf;
1708
1709             oclMat I1w_buf;
1710             oclMat I1wx_buf;
1711             oclMat I1wy_buf;
1712
1713             oclMat grad_buf;
1714             oclMat rho_c_buf;
1715
1716             oclMat p11_buf;
1717             oclMat p12_buf;
1718             oclMat p21_buf;
1719             oclMat p22_buf;
1720
1721             oclMat diff_buf;
1722             oclMat norm_buf;
1723         };
1724         // current supported sorting methods
1725         enum
1726         {
1727             SORT_BITONIC,   // only support power-of-2 buffer size
1728             SORT_SELECTION, // cannot sort duplicate keys
1729             SORT_MERGE,
1730             SORT_RADIX      // only support signed int/float keys(CV_32S/CV_32F)
1731         };
1732         //! Returns the sorted result of all the elements in input based on equivalent keys.
1733         //
1734         //  The element unit in the values to be sorted is determined from the data type,
1735         //  i.e., a CV_32FC2 input {a1a2, b1b2} will be considered as two elements, regardless its
1736         //  matrix dimension.
1737         //  both keys and values will be sorted inplace
1738         //  Key needs to be single channel oclMat.
1739         //
1740         //  Example:
1741         //  input -
1742         //    keys   = {2,    3,   1}   (CV_8UC1)
1743         //    values = {10,5, 4,3, 6,2} (CV_8UC2)
1744         //  sortByKey(keys, values, SORT_SELECTION, false);
1745         //  output -
1746         //    keys   = {1,    2,   3}   (CV_8UC1)
1747         //    values = {6,2, 10,5, 4,3} (CV_8UC2)
1748         CV_EXPORTS void sortByKey(oclMat& keys, oclMat& values, int method, bool isGreaterThan = false);
1749         /*!Base class for MOG and MOG2!*/
1750         class CV_EXPORTS BackgroundSubtractor
1751         {
1752         public:
1753             //! the virtual destructor
1754             virtual ~BackgroundSubtractor();
1755             //! the update operator that takes the next video frame and returns the current foreground mask as 8-bit binary image.
1756             virtual void operator()(const oclMat& image, oclMat& fgmask, float learningRate);
1757
1758             //! computes a background image
1759             virtual void getBackgroundImage(oclMat& backgroundImage) const = 0;
1760         };
1761                 /*!
1762         Gaussian Mixture-based Backbround/Foreground Segmentation Algorithm
1763
1764         The class implements the following algorithm:
1765         "An improved adaptive background mixture model for real-time tracking with shadow detection"
1766         P. KadewTraKuPong and R. Bowden,
1767         Proc. 2nd European Workshp on Advanced Video-Based Surveillance Systems, 2001."
1768         http://personal.ee.surrey.ac.uk/Personal/R.Bowden/publications/avbs01/avbs01.pdf
1769         */
1770         class CV_EXPORTS MOG: public cv::ocl::BackgroundSubtractor
1771         {
1772         public:
1773             //! the default constructor
1774             MOG(int nmixtures = -1);
1775
1776             //! re-initiaization method
1777             void initialize(Size frameSize, int frameType);
1778
1779             //! the update operator
1780             void operator()(const oclMat& frame, oclMat& fgmask, float learningRate = 0.f);
1781
1782             //! computes a background image which are the mean of all background gaussians
1783             void getBackgroundImage(oclMat& backgroundImage) const;
1784
1785             //! releases all inner buffers
1786             void release();
1787
1788             int history;
1789             float varThreshold;
1790             float backgroundRatio;
1791             float noiseSigma;
1792
1793         private:
1794             int nmixtures_;
1795
1796             Size frameSize_;
1797             int frameType_;
1798             int nframes_;
1799
1800             oclMat weight_;
1801             oclMat sortKey_;
1802             oclMat mean_;
1803             oclMat var_;
1804         };
1805
1806         /*!
1807         The class implements the following algorithm:
1808         "Improved adaptive Gausian mixture model for background subtraction"
1809         Z.Zivkovic
1810         International Conference Pattern Recognition, UK, August, 2004.
1811         http://www.zoranz.net/Publications/zivkovic2004ICPR.pdf
1812         */
1813         class CV_EXPORTS MOG2: public cv::ocl::BackgroundSubtractor
1814         {
1815         public:
1816             //! the default constructor
1817             MOG2(int nmixtures = -1);
1818
1819             //! re-initiaization method
1820             void initialize(Size frameSize, int frameType);
1821
1822             //! the update operator
1823             void operator()(const oclMat& frame, oclMat& fgmask, float learningRate = -1.0f);
1824
1825             //! computes a background image which are the mean of all background gaussians
1826             void getBackgroundImage(oclMat& backgroundImage) const;
1827
1828             //! releases all inner buffers
1829             void release();
1830
1831             // parameters
1832             // you should call initialize after parameters changes
1833
1834             int history;
1835
1836             //! here it is the maximum allowed number of mixture components.
1837             //! Actual number is determined dynamically per pixel
1838             float varThreshold;
1839             // threshold on the squared Mahalanobis distance to decide if it is well described
1840             // by the background model or not. Related to Cthr from the paper.
1841             // This does not influence the update of the background. A typical value could be 4 sigma
1842             // and that is varThreshold=4*4=16; Corresponds to Tb in the paper.
1843
1844             /////////////////////////
1845             // less important parameters - things you might change but be carefull
1846             ////////////////////////
1847
1848             float backgroundRatio;
1849             // corresponds to fTB=1-cf from the paper
1850             // TB - threshold when the component becomes significant enough to be included into
1851             // the background model. It is the TB=1-cf from the paper. So I use cf=0.1 => TB=0.
1852             // For alpha=0.001 it means that the mode should exist for approximately 105 frames before
1853             // it is considered foreground
1854             // float noiseSigma;
1855             float varThresholdGen;
1856
1857             //correspondts to Tg - threshold on the squared Mahalan. dist. to decide
1858             //when a sample is close to the existing components. If it is not close
1859             //to any a new component will be generated. I use 3 sigma => Tg=3*3=9.
1860             //Smaller Tg leads to more generated components and higher Tg might make
1861             //lead to small number of components but they can grow too large
1862             float fVarInit;
1863             float fVarMin;
1864             float fVarMax;
1865
1866             //initial variance  for the newly generated components.
1867             //It will will influence the speed of adaptation. A good guess should be made.
1868             //A simple way is to estimate the typical standard deviation from the images.
1869             //I used here 10 as a reasonable value
1870             // min and max can be used to further control the variance
1871             float fCT; //CT - complexity reduction prior
1872             //this is related to the number of samples needed to accept that a component
1873             //actually exists. We use CT=0.05 of all the samples. By setting CT=0 you get
1874             //the standard Stauffer&Grimson algorithm (maybe not exact but very similar)
1875
1876             //shadow detection parameters
1877             bool bShadowDetection; //default 1 - do shadow detection
1878             unsigned char nShadowDetection; //do shadow detection - insert this value as the detection result - 127 default value
1879             float fTau;
1880             // Tau - shadow threshold. The shadow is detected if the pixel is darker
1881             //version of the background. Tau is a threshold on how much darker the shadow can be.
1882             //Tau= 0.5 means that if pixel is more than 2 times darker then it is not shadow
1883             //See: Prati,Mikic,Trivedi,Cucchiarra,"Detecting Moving Shadows...",IEEE PAMI,2003.
1884
1885         private:
1886             int nmixtures_;
1887
1888             Size frameSize_;
1889             int frameType_;
1890             int nframes_;
1891
1892             oclMat weight_;
1893             oclMat variance_;
1894             oclMat mean_;
1895
1896             oclMat bgmodelUsedModes_; //keep track of number of modes per pixel
1897         };
1898
1899         /*!***************Kalman Filter*************!*/
1900         class CV_EXPORTS KalmanFilter
1901         {
1902         public:
1903             KalmanFilter();
1904             //! the full constructor taking the dimensionality of the state, of the measurement and of the control vector
1905             KalmanFilter(int dynamParams, int measureParams, int controlParams=0, int type=CV_32F);
1906             //! re-initializes Kalman filter. The previous content is destroyed.
1907             void init(int dynamParams, int measureParams, int controlParams=0, int type=CV_32F);
1908
1909             const oclMat& predict(const oclMat& control=oclMat());
1910             const oclMat& correct(const oclMat& measurement);
1911
1912             oclMat statePre;           //!< predicted state (x'(k)): x(k)=A*x(k-1)+B*u(k)
1913             oclMat statePost;          //!< corrected state (x(k)): x(k)=x'(k)+K(k)*(z(k)-H*x'(k))
1914             oclMat transitionMatrix;   //!< state transition matrix (A)
1915             oclMat controlMatrix;      //!< control matrix (B) (not used if there is no control)
1916             oclMat measurementMatrix;  //!< measurement matrix (H)
1917             oclMat processNoiseCov;    //!< process noise covariance matrix (Q)
1918             oclMat measurementNoiseCov;//!< measurement noise covariance matrix (R)
1919             oclMat errorCovPre;        //!< priori error estimate covariance matrix (P'(k)): P'(k)=A*P(k-1)*At + Q)*/
1920             oclMat gain;               //!< Kalman gain matrix (K(k)): K(k)=P'(k)*Ht*inv(H*P'(k)*Ht+R)
1921             oclMat errorCovPost;       //!< posteriori error estimate covariance matrix (P(k)): P(k)=(I-K(k)*H)*P'(k)
1922         private:
1923             oclMat temp1;
1924             oclMat temp2;
1925             oclMat temp3;
1926             oclMat temp4;
1927             oclMat temp5;
1928         };
1929
1930         /*!***************K Nearest Neighbour*************!*/
1931         class CV_EXPORTS KNearestNeighbour: public CvKNearest
1932         {
1933         public:
1934             KNearestNeighbour();
1935             ~KNearestNeighbour();
1936
1937             bool train(const Mat& trainData, Mat& labels, Mat& sampleIdx = Mat().setTo(Scalar::all(0)),
1938                 bool isRegression = false, int max_k = 32, bool updateBase = false);
1939
1940             void clear();
1941
1942             void find_nearest(const oclMat& samples, int k, oclMat& lables);
1943
1944         private:
1945             oclMat samples_ocl;
1946         };
1947
1948         /*!***************  SVM  *************!*/
1949         class CV_EXPORTS CvSVM_OCL : public CvSVM
1950         {
1951         public:
1952             CvSVM_OCL();
1953
1954             CvSVM_OCL(const cv::Mat& trainData, const cv::Mat& responses,
1955                       const cv::Mat& varIdx=cv::Mat(), const cv::Mat& sampleIdx=cv::Mat(),
1956                       CvSVMParams params=CvSVMParams());
1957             CV_WRAP float predict( const int row_index, Mat& src, bool returnDFVal=false ) const;
1958             CV_WRAP void predict( cv::InputArray samples, cv::OutputArray results ) const;
1959             CV_WRAP float predict( const cv::Mat& sample, bool returnDFVal=false ) const;
1960             float predict( const CvMat* samples, CV_OUT CvMat* results ) const;
1961
1962         protected:
1963             float predict( const int row_index, int row_len, Mat& src, bool returnDFVal=false ) const;
1964             void create_kernel();
1965             void create_solver();
1966         };
1967
1968         /*!***************  END  *************!*/
1969     }
1970 }
1971 #if defined _MSC_VER && _MSC_VER >= 1200
1972 #  pragma warning( push)
1973 #  pragma warning( disable: 4267)
1974 #endif
1975 #include "opencv2/ocl/matrix_operations.hpp"
1976 #if defined _MSC_VER && _MSC_VER >= 1200
1977 #  pragma warning( pop)
1978 #endif
1979
1980 #endif /* __OPENCV_OCL_HPP__ */