From: Roman Donchenko Date: Tue, 19 Nov 2013 12:21:09 +0000 (+0400) Subject: Merge remote-tracking branch 'origin/2.4' into merge-2.4 X-Git-Tag: submit/tizen_ivi/20141117.190038~2^2~853^2 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=aacf188e837324b5e35dac9d2accaa332a83d346;p=profile%2Fivi%2Fopencv.git Merge remote-tracking branch 'origin/2.4' into merge-2.4 Conflicts: modules/ocl/include/opencv2/ocl/ocl.hpp modules/ocl/src/arithm.cpp modules/ocl/src/build_warps.cpp modules/ocl/src/color.cpp modules/ocl/src/haar.cpp modules/ocl/src/imgproc.cpp modules/ocl/src/split_merge.cpp modules/ocl/test/test_color.cpp samples/cpp/3calibration.cpp samples/cpp/OpenEXRimages_HDR_Retina_toneMapping.cpp samples/cpp/OpenEXRimages_HDR_Retina_toneMapping_video.cpp samples/cpp/Qt_sample/main.cpp samples/cpp/camshiftdemo.cpp samples/cpp/descriptor_extractor_matcher.cpp samples/cpp/distrans.cpp samples/cpp/generic_descriptor_match.cpp samples/cpp/grabcut.cpp samples/cpp/morphology2.cpp samples/cpp/segment_objects.cpp samples/cpp/stereo_calib.cpp samples/cpp/tutorial_code/Histograms_Matching/compareHist_Demo.cpp samples/cpp/tutorial_code/core/mat_mask_operations/mat_mask_operations.cpp samples/cpp/tutorial_code/introduction/display_image/display_image.cpp samples/cpp/tutorial_code/introduction/windows_visual_studio_Opencv/Test.cpp samples/cpp/tutorial_code/objectDetection/objectDetection.cpp samples/cpp/tutorial_code/objectDetection/objectDetection2.cpp samples/cpp/video_dmtx.cpp --- aacf188e837324b5e35dac9d2accaa332a83d346 diff --cc modules/ocl/include/opencv2/ocl.hpp index 925d612,0000000..19af00b mode 100644,000000..100644 --- a/modules/ocl/include/opencv2/ocl.hpp +++ b/modules/ocl/include/opencv2/ocl.hpp @@@ -1,2077 -1,0 +1,2077 @@@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved. +// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved. +// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef __OPENCV_OCL_HPP__ +#define __OPENCV_OCL_HPP__ + +#include +#include + +#include "opencv2/core.hpp" +#include "opencv2/imgproc.hpp" +#include "opencv2/objdetect.hpp" +#include "opencv2/ml.hpp" + +namespace cv +{ + namespace ocl + { + enum DeviceType + { + CVCL_DEVICE_TYPE_DEFAULT = (1 << 0), + CVCL_DEVICE_TYPE_CPU = (1 << 1), + CVCL_DEVICE_TYPE_GPU = (1 << 2), + CVCL_DEVICE_TYPE_ACCELERATOR = (1 << 3), + //CVCL_DEVICE_TYPE_CUSTOM = (1 << 4) + CVCL_DEVICE_TYPE_ALL = 0xFFFFFFFF + }; + + enum DevMemRW + { + DEVICE_MEM_R_W = 0, + DEVICE_MEM_R_ONLY, + DEVICE_MEM_W_ONLY + }; + + enum DevMemType + { + DEVICE_MEM_DEFAULT = 0, + DEVICE_MEM_AHP, //alloc host pointer + DEVICE_MEM_UHP, //use host pointer + DEVICE_MEM_CHP, //copy host pointer + DEVICE_MEM_PM //persistent memory + }; + + // these classes contain OpenCL runtime information + + struct PlatformInfo; + + struct DeviceInfo + { + public: + int _id; // reserved, don't use it + + DeviceType deviceType; + std::string deviceProfile; + std::string deviceVersion; + std::string deviceName; + std::string deviceVendor; + int deviceVendorId; + std::string deviceDriverVersion; + std::string deviceExtensions; + + size_t maxWorkGroupSize; + std::vector maxWorkItemSizes; + int maxComputeUnits; + size_t localMemorySize; + size_t maxMemAllocSize; + + int deviceVersionMajor; + int deviceVersionMinor; + + bool haveDoubleSupport; + bool isUnifiedMemory; // 1 means integrated GPU, otherwise this value is 0 + bool isIntelDevice; + + std::string compilationExtraOptions; + + const PlatformInfo* platform; + + DeviceInfo(); + }; + + struct PlatformInfo + { + int _id; // reserved, don't use it + + std::string platformProfile; + std::string platformVersion; + std::string platformName; + std::string platformVendor; + std::string platformExtensons; + + int platformVersionMajor; + int platformVersionMinor; + + std::vector devices; + + PlatformInfo(); + }; + + //////////////////////////////// Initialization & Info //////////////////////// + typedef std::vector PlatformsInfo; + + CV_EXPORTS int getOpenCLPlatforms(PlatformsInfo& platforms); + + typedef std::vector DevicesInfo; + + CV_EXPORTS int getOpenCLDevices(DevicesInfo& devices, int deviceType = CVCL_DEVICE_TYPE_GPU, + const PlatformInfo* platform = NULL); + + // set device you want to use + CV_EXPORTS void setDevice(const DeviceInfo* info); + + enum FEATURE_TYPE + { + FEATURE_CL_DOUBLE = 1, + FEATURE_CL_UNIFIED_MEM, + FEATURE_CL_VER_1_2, + FEATURE_CL_INTEL_DEVICE + }; + + // Represents OpenCL context, interface + class CV_EXPORTS Context + { + protected: + Context() { } + ~Context() { } + public: + static Context *getContext(); + + bool supportsFeature(FEATURE_TYPE featureType) const; + const DeviceInfo& getDeviceInfo() const; + + const void* getOpenCLContextPtr() const; + const void* getOpenCLCommandQueuePtr() const; + const void* getOpenCLDeviceIDPtr() const; + }; + + inline const void *getClContextPtr() + { + return Context::getContext()->getOpenCLContextPtr(); + } + + inline const void *getClCommandQueuePtr() + { + return Context::getContext()->getOpenCLCommandQueuePtr(); + } + + CV_EXPORTS bool supportsFeature(FEATURE_TYPE featureType); + + CV_EXPORTS void finish(); + + enum BINARY_CACHE_MODE + { + CACHE_NONE = 0, // do not cache OpenCL binary + CACHE_DEBUG = 0x1 << 0, // cache OpenCL binary when built in debug mode + CACHE_RELEASE = 0x1 << 1, // default behavior, only cache when built in release mode + CACHE_ALL = CACHE_DEBUG | CACHE_RELEASE, // cache opencl binary + }; + //! Enable or disable OpenCL program binary caching onto local disk + // After a program (*.cl files in opencl/ folder) is built at runtime, we allow the + // compiled OpenCL program to be cached to the path automatically as "path/*.clb" + // binary file, which will be reused when the OpenCV executable is started again. + // + // This feature is enabled by default. + CV_EXPORTS void setBinaryDiskCache(int mode = CACHE_RELEASE, cv::String path = "./"); + + //! set where binary cache to be saved to + CV_EXPORTS void setBinaryPath(const char *path); + + struct ProgramSource + { + const char* name; + const char* programStr; + const char* programHash; + + // Cache in memory by name (should be unique). Caching on disk disabled. + inline ProgramSource(const char* _name, const char* _programStr) + : name(_name), programStr(_programStr), programHash(NULL) + { + } + + // Cache in memory by name (should be unique). Caching on disk uses programHash mark. + inline ProgramSource(const char* _name, const char* _programStr, const char* _programHash) + : name(_name), programStr(_programStr), programHash(_programHash) + { + } + }; + + //! Calls OpenCL kernel. Pass globalThreads = NULL, and cleanUp = true, to finally clean-up without executing. + //! Deprecated, will be replaced + CV_EXPORTS void openCLExecuteKernelInterop(Context *clCxt, + const cv::ocl::ProgramSource& source, String kernelName, + size_t globalThreads[3], size_t localThreads[3], + std::vector< std::pair > &args, + int channels, int depth, const char *build_options); + + class CV_EXPORTS oclMatExpr; + //////////////////////////////// oclMat //////////////////////////////// + class CV_EXPORTS oclMat + { + public: + //! default constructor + oclMat(); + //! constructs oclMatrix of the specified size and type (_type is CV_8UC1, CV_64FC3, CV_32SC(12) etc.) + oclMat(int rows, int cols, int type); + oclMat(Size size, int type); + //! constucts oclMatrix and fills it with the specified value _s. + oclMat(int rows, int cols, int type, const Scalar &s); + oclMat(Size size, int type, const Scalar &s); + //! copy constructor + oclMat(const oclMat &m); + + //! constructor for oclMatrix headers pointing to user-allocated data + oclMat(int rows, int cols, int type, void *data, size_t step = Mat::AUTO_STEP); + oclMat(Size size, int type, void *data, size_t step = Mat::AUTO_STEP); + + //! creates a matrix header for a part of the bigger matrix + oclMat(const oclMat &m, const Range &rowRange, const Range &colRange); + oclMat(const oclMat &m, const Rect &roi); + + //! builds oclMat from Mat. Perfom blocking upload to device. + explicit oclMat (const Mat &m); + + //! destructor - calls release() + ~oclMat(); + + //! assignment operators + oclMat &operator = (const oclMat &m); + //! assignment operator. Perfom blocking upload to device. + oclMat &operator = (const Mat &m); + oclMat &operator = (const oclMatExpr& expr); + + //! pefroms blocking upload data to oclMat. + void upload(const cv::Mat &m); + + + //! downloads data from device to host memory. Blocking calls. + operator Mat() const; + void download(cv::Mat &m) const; + + //! convert to _InputArray + operator _InputArray(); + + //! convert to _OutputArray + operator _OutputArray(); + + //! returns a new oclMatrix header for the specified row + oclMat row(int y) const; + //! returns a new oclMatrix header for the specified column + oclMat col(int x) const; + //! ... for the specified row span + oclMat rowRange(int startrow, int endrow) const; + oclMat rowRange(const Range &r) const; + //! ... for the specified column span + oclMat colRange(int startcol, int endcol) const; + oclMat colRange(const Range &r) const; + + //! returns deep copy of the oclMatrix, i.e. the data is copied + oclMat clone() const; + + //! copies those oclMatrix elements to "m" that are marked with non-zero mask elements. + // It calls m.create(this->size(), this->type()). + // It supports any data type + void copyTo( oclMat &m, const oclMat &mask = oclMat()) const; + + //! converts oclMatrix to another datatype with optional scalng. See cvConvertScale. + void convertTo( oclMat &m, int rtype, double alpha = 1, double beta = 0 ) const; + + void assignTo( oclMat &m, int type = -1 ) const; + + //! sets every oclMatrix element to s + oclMat& operator = (const Scalar &s); + //! sets some of the oclMatrix elements to s, according to the mask + oclMat& setTo(const Scalar &s, const oclMat &mask = oclMat()); + //! creates alternative oclMatrix header for the same data, with different + // number of channels and/or different number of rows. see cvReshape. + oclMat reshape(int cn, int rows = 0) const; + + //! allocates new oclMatrix data unless the oclMatrix already has specified size and type. + // previous data is unreferenced if needed. + void create(int rows, int cols, int type); + void create(Size size, int type); + + //! allocates new oclMatrix with specified device memory type. + void createEx(int rows, int cols, int type, + DevMemRW rw_type, DevMemType mem_type); + void createEx(Size size, int type, DevMemRW rw_type, + DevMemType mem_type); + + //! decreases reference counter; + // deallocate the data when reference counter reaches 0. + void release(); + + //! swaps with other smart pointer + void swap(oclMat &mat); + + //! locates oclMatrix header within a parent oclMatrix. See below + void locateROI( Size &wholeSize, Point &ofs ) const; + //! moves/resizes the current oclMatrix ROI inside the parent oclMatrix. + oclMat& adjustROI( int dtop, int dbottom, int dleft, int dright ); + //! extracts a rectangular sub-oclMatrix + // (this is a generalized form of row, rowRange etc.) + oclMat operator()( Range rowRange, Range colRange ) const; + oclMat operator()( const Rect &roi ) const; + + oclMat& operator+=( const oclMat& m ); + oclMat& operator-=( const oclMat& m ); + oclMat& operator*=( const oclMat& m ); + oclMat& operator/=( const oclMat& m ); + + //! returns true if the oclMatrix data is continuous + // (i.e. when there are no gaps between successive rows). + // similar to CV_IS_oclMat_CONT(cvoclMat->type) + bool isContinuous() const; + //! returns element size in bytes, + // similar to CV_ELEM_SIZE(cvMat->type) + size_t elemSize() const; + //! returns the size of element channel in bytes. + size_t elemSize1() const; + //! returns element type, similar to CV_MAT_TYPE(cvMat->type) + int type() const; + //! returns element type, i.e. 8UC3 returns 8UC4 because in ocl + //! 3 channels element actually use 4 channel space + int ocltype() const; + //! returns element type, similar to CV_MAT_DEPTH(cvMat->type) + int depth() const; + //! returns element type, similar to CV_MAT_CN(cvMat->type) + int channels() const; + //! returns element type, return 4 for 3 channels element, + //!becuase 3 channels element actually use 4 channel space + int oclchannels() const; + //! returns step/elemSize1() + size_t step1() const; + //! returns oclMatrix size: + // width == number of columns, height == number of rows + Size size() const; + //! returns true if oclMatrix data is NULL + bool empty() const; + + //! returns pointer to y-th row + uchar* ptr(int y = 0); + const uchar *ptr(int y = 0) const; + + //! template version of the above method + template _Tp *ptr(int y = 0); + template const _Tp *ptr(int y = 0) const; + + //! matrix transposition + oclMat t() const; + + /*! includes several bit-fields: + - the magic signature + - continuity flag + - depth + - number of channels + */ + int flags; + //! the number of rows and columns + int rows, cols; + //! a distance between successive rows in bytes; includes the gap if any + size_t step; + //! pointer to the data(OCL memory object) + uchar *data; + + //! pointer to the reference counter; + // when oclMatrix points to user-allocated data, the pointer is NULL + int *refcount; + + //! helper fields used in locateROI and adjustROI + //datastart and dataend are not used in current version + uchar *datastart; + uchar *dataend; + + //! OpenCL context associated with the oclMat object. + Context *clCxt; // TODO clCtx + //add offset for handle ROI, calculated in byte + int offset; + //add wholerows and wholecols for the whole matrix, datastart and dataend are no longer used + int wholerows; + int wholecols; + }; + + // convert InputArray/OutputArray to oclMat references + CV_EXPORTS oclMat& getOclMatRef(InputArray src); + CV_EXPORTS oclMat& getOclMatRef(OutputArray src); + + ///////////////////// mat split and merge ///////////////////////////////// + //! Compose a multi-channel array from several single-channel arrays + // Support all types + CV_EXPORTS void merge(const oclMat *src, size_t n, oclMat &dst); + CV_EXPORTS void merge(const std::vector &src, oclMat &dst); + + //! Divides multi-channel array into several single-channel arrays + // Support all types + CV_EXPORTS void split(const oclMat &src, oclMat *dst); + CV_EXPORTS void split(const oclMat &src, std::vector &dst); + + ////////////////////////////// Arithmetics /////////////////////////////////// + + //! adds one matrix to another with scale (dst = src1 * alpha + src2 * beta + gama) + // supports all data types + CV_EXPORTS void addWeighted(const oclMat &src1, double alpha, const oclMat &src2, double beta, double gama, oclMat &dst); + + //! adds one matrix to another (dst = src1 + src2) + // supports all data types + CV_EXPORTS void add(const oclMat &src1, const oclMat &src2, oclMat &dst, const oclMat &mask = oclMat()); + //! adds scalar to a matrix (dst = src1 + s) + // supports all data types + CV_EXPORTS void add(const oclMat &src1, const Scalar &s, oclMat &dst, const oclMat &mask = oclMat()); + + //! subtracts one matrix from another (dst = src1 - src2) + // supports all data types + CV_EXPORTS void subtract(const oclMat &src1, const oclMat &src2, oclMat &dst, const oclMat &mask = oclMat()); + //! subtracts scalar from a matrix (dst = src1 - s) + // supports all data types + CV_EXPORTS void subtract(const oclMat &src1, const Scalar &s, oclMat &dst, const oclMat &mask = oclMat()); + + //! computes element-wise product of the two arrays (dst = src1 * scale * src2) + // supports all data types + CV_EXPORTS void multiply(const oclMat &src1, const oclMat &src2, oclMat &dst, double scale = 1); + //! multiplies matrix to a number (dst = scalar * src) + // supports all data types + CV_EXPORTS void multiply(double scalar, const oclMat &src, oclMat &dst); + + //! computes element-wise quotient of the two arrays (dst = src1 * scale / src2) + // supports all data types + CV_EXPORTS void divide(const oclMat &src1, const oclMat &src2, oclMat &dst, double scale = 1); + //! computes element-wise quotient of the two arrays (dst = scale / src) + // supports all data types + CV_EXPORTS void divide(double scale, const oclMat &src1, oclMat &dst); + + //! computes element-wise minimum of the two arrays (dst = min(src1, src2)) + // supports all data types + CV_EXPORTS void min(const oclMat &src1, const oclMat &src2, oclMat &dst); + + //! computes element-wise maximum of the two arrays (dst = max(src1, src2)) + // supports all data types + CV_EXPORTS void max(const oclMat &src1, const oclMat &src2, oclMat &dst); + + //! compares elements of two arrays (dst = src1 src2) + // supports all data types + CV_EXPORTS void compare(const oclMat &src1, const oclMat &src2, oclMat &dst, int cmpop); + + //! transposes the matrix + // supports all data types + CV_EXPORTS void transpose(const oclMat &src, oclMat &dst); + + //! computes element-wise absolute values of an array (dst = abs(src)) + // supports all data types + CV_EXPORTS void abs(const oclMat &src, oclMat &dst); + + //! computes element-wise absolute difference of two arrays (dst = abs(src1 - src2)) + // supports all data types + CV_EXPORTS void absdiff(const oclMat &src1, const oclMat &src2, oclMat &dst); + //! computes element-wise absolute difference of array and scalar (dst = abs(src1 - s)) + // supports all data types + CV_EXPORTS void absdiff(const oclMat &src1, const Scalar &s, oclMat &dst); + + //! computes mean value and standard deviation of all or selected array elements + // supports all data types + CV_EXPORTS void meanStdDev(const oclMat &mtx, Scalar &mean, Scalar &stddev); + + //! computes norm of array + // supports NORM_INF, NORM_L1, NORM_L2 + // supports all data types + CV_EXPORTS double norm(const oclMat &src1, int normType = NORM_L2); + + //! computes norm of the difference between two arrays + // supports NORM_INF, NORM_L1, NORM_L2 + // supports all data types + CV_EXPORTS double norm(const oclMat &src1, const oclMat &src2, int normType = NORM_L2); + + //! reverses the order of the rows, columns or both in a matrix + // supports all types + CV_EXPORTS void flip(const oclMat &src, oclMat &dst, int flipCode); + + //! computes sum of array elements + // support all types + CV_EXPORTS Scalar sum(const oclMat &m); + CV_EXPORTS Scalar absSum(const oclMat &m); + CV_EXPORTS Scalar sqrSum(const oclMat &m); + + //! finds global minimum and maximum array elements and returns their values + // support all C1 types + CV_EXPORTS void minMax(const oclMat &src, double *minVal, double *maxVal = 0, const oclMat &mask = oclMat()); + + //! finds global minimum and maximum array elements and returns their values with locations + // support all C1 types + CV_EXPORTS void minMaxLoc(const oclMat &src, double *minVal, double *maxVal = 0, Point *minLoc = 0, Point *maxLoc = 0, + const oclMat &mask = oclMat()); + + //! counts non-zero array elements + // support all types + CV_EXPORTS int countNonZero(const oclMat &src); + + //! transforms 8-bit unsigned integers using lookup table: dst(i)=lut(src(i)) + // destination array will have the depth type as lut and the same channels number as source + //It supports 8UC1 8UC4 only + CV_EXPORTS void LUT(const oclMat &src, const oclMat &lut, oclMat &dst); + + //! only 8UC1 and 256 bins is supported now + CV_EXPORTS void calcHist(const oclMat &mat_src, oclMat &mat_hist); + //! only 8UC1 and 256 bins is supported now + CV_EXPORTS void equalizeHist(const oclMat &mat_src, oclMat &mat_dst); + + //! only 8UC1 is supported now + CV_EXPORTS Ptr createCLAHE(double clipLimit = 40.0, Size tileGridSize = Size(8, 8)); + + //! bilateralFilter + // supports 8UC1 8UC4 + CV_EXPORTS void bilateralFilter(const oclMat& src, oclMat& dst, int d, double sigmaColor, double sigmaSpace, int borderType=BORDER_DEFAULT); + + //! Applies an adaptive bilateral filter to the input image + // Unlike the usual bilateral filter that uses fixed value for sigmaColor, + // the adaptive version calculates the local variance in he ksize neighborhood + // and use this as sigmaColor, for the value filtering. However, the local standard deviation is + // clamped to the maxSigmaColor. + // supports 8UC1, 8UC3 + CV_EXPORTS void adaptiveBilateralFilter(const oclMat& src, oclMat& dst, Size ksize, double sigmaSpace, double maxSigmaColor=20.0, Point anchor = Point(-1, -1), int borderType=BORDER_DEFAULT); + + //! computes exponent of each matrix element (dst = e**src) + // supports only CV_32FC1, CV_64FC1 type + CV_EXPORTS void exp(const oclMat &src, oclMat &dst); + + //! computes natural logarithm of absolute value of each matrix element: dst = log(abs(src)) + // supports only CV_32FC1, CV_64FC1 type + CV_EXPORTS void log(const oclMat &src, oclMat &dst); + + //! computes magnitude of each (x(i), y(i)) vector + // supports only CV_32F, CV_64F type + CV_EXPORTS void magnitude(const oclMat &x, const oclMat &y, oclMat &magnitude); + + //! computes angle (angle(i)) of each (x(i), y(i)) vector + // supports only CV_32F, CV_64F type + CV_EXPORTS void phase(const oclMat &x, const oclMat &y, oclMat &angle, bool angleInDegrees = false); + + //! the function raises every element of tne input array to p + // support only CV_32F, CV_64F type + CV_EXPORTS void pow(const oclMat &x, double p, oclMat &y); + + //! converts Cartesian coordinates to polar + // supports only CV_32F CV_64F type + CV_EXPORTS void cartToPolar(const oclMat &x, const oclMat &y, oclMat &magnitude, oclMat &angle, bool angleInDegrees = false); + + //! converts polar coordinates to Cartesian + // supports only CV_32F CV_64F type + CV_EXPORTS void polarToCart(const oclMat &magnitude, const oclMat &angle, oclMat &x, oclMat &y, bool angleInDegrees = false); + + //! perfroms per-elements bit-wise inversion + // supports all types + CV_EXPORTS void bitwise_not(const oclMat &src, oclMat &dst); + + //! calculates per-element bit-wise disjunction of two arrays + // supports all types + CV_EXPORTS void bitwise_or(const oclMat &src1, const oclMat &src2, oclMat &dst, const oclMat &mask = oclMat()); + CV_EXPORTS void bitwise_or(const oclMat &src1, const Scalar &s, oclMat &dst, const oclMat &mask = oclMat()); + + //! calculates per-element bit-wise conjunction of two arrays + // supports all types + CV_EXPORTS void bitwise_and(const oclMat &src1, const oclMat &src2, oclMat &dst, const oclMat &mask = oclMat()); + CV_EXPORTS void bitwise_and(const oclMat &src1, const Scalar &s, oclMat &dst, const oclMat &mask = oclMat()); + + //! calculates per-element bit-wise "exclusive or" operation + // supports all types + CV_EXPORTS void bitwise_xor(const oclMat &src1, const oclMat &src2, oclMat &dst, const oclMat &mask = oclMat()); + CV_EXPORTS void bitwise_xor(const oclMat &src1, const Scalar &s, oclMat &dst, const oclMat &mask = oclMat()); + + //! Logical operators + CV_EXPORTS oclMat operator ~ (const oclMat &); + CV_EXPORTS oclMat operator | (const oclMat &, const oclMat &); + CV_EXPORTS oclMat operator & (const oclMat &, const oclMat &); + CV_EXPORTS oclMat operator ^ (const oclMat &, const oclMat &); + + + //! Mathematics operators + CV_EXPORTS oclMatExpr operator + (const oclMat &src1, const oclMat &src2); + CV_EXPORTS oclMatExpr operator - (const oclMat &src1, const oclMat &src2); + CV_EXPORTS oclMatExpr operator * (const oclMat &src1, const oclMat &src2); + CV_EXPORTS oclMatExpr operator / (const oclMat &src1, const oclMat &src2); + + struct CV_EXPORTS ConvolveBuf + { + Size result_size; + Size block_size; + Size user_block_size; + Size dft_size; + + oclMat image_spect, templ_spect, result_spect; + oclMat image_block, templ_block, result_data; + + void create(Size image_size, Size templ_size); + static Size estimateBlockSize(Size result_size, Size templ_size); + }; + + //! computes convolution of two images, may use discrete Fourier transform + // support only CV_32FC1 type + CV_EXPORTS void convolve(const oclMat &image, const oclMat &temp1, oclMat &result, bool ccorr = false); + CV_EXPORTS void convolve(const oclMat &image, const oclMat &temp1, oclMat &result, bool ccorr, ConvolveBuf& buf); + + //! Performs a per-element multiplication of two Fourier spectrums. + //! Only full (not packed) CV_32FC2 complex spectrums in the interleaved format are supported for now. + //! support only CV_32FC2 type + CV_EXPORTS void mulSpectrums(const oclMat &a, const oclMat &b, oclMat &c, int flags, float scale, bool conjB = false); + + CV_EXPORTS void cvtColor(const oclMat &src, oclMat &dst, int code, int dcn = 0); + + //! initializes a scaled identity matrix + CV_EXPORTS void setIdentity(oclMat& src, const Scalar & val = Scalar(1)); + + //! fills the output array with repeated copies of the input array + CV_EXPORTS void repeat(const oclMat & src, int ny, int nx, oclMat & dst); + + //////////////////////////////// Filter Engine //////////////////////////////// + + /*! + The Base Class for 1D or Row-wise Filters + + This is the base class for linear or non-linear filters that process 1D data. + In particular, such filters are used for the "horizontal" filtering parts in separable filters. + */ + class CV_EXPORTS BaseRowFilter_GPU + { + public: + BaseRowFilter_GPU(int ksize_, int anchor_, int bordertype_) : ksize(ksize_), anchor(anchor_), bordertype(bordertype_) {} + virtual ~BaseRowFilter_GPU() {} + virtual void operator()(const oclMat &src, oclMat &dst) = 0; + int ksize, anchor, bordertype; + }; + + /*! + The Base Class for Column-wise Filters + + This is the base class for linear or non-linear filters that process columns of 2D arrays. + Such filters are used for the "vertical" filtering parts in separable filters. + */ + class CV_EXPORTS BaseColumnFilter_GPU + { + public: + BaseColumnFilter_GPU(int ksize_, int anchor_, int bordertype_) : ksize(ksize_), anchor(anchor_), bordertype(bordertype_) {} + virtual ~BaseColumnFilter_GPU() {} + virtual void operator()(const oclMat &src, oclMat &dst) = 0; + int ksize, anchor, bordertype; + }; + + /*! + The Base Class for Non-Separable 2D Filters. + + This is the base class for linear or non-linear 2D filters. + */ + class CV_EXPORTS BaseFilter_GPU + { + public: + BaseFilter_GPU(const Size &ksize_, const Point &anchor_, const int &borderType_) + : ksize(ksize_), anchor(anchor_), borderType(borderType_) {} + virtual ~BaseFilter_GPU() {} + virtual void operator()(const oclMat &src, oclMat &dst) = 0; + Size ksize; + Point anchor; + int borderType; + }; + + /*! + The Base Class for Filter Engine. + + The class can be used to apply an arbitrary filtering operation to an image. + It contains all the necessary intermediate buffers. + */ + class CV_EXPORTS FilterEngine_GPU + { + public: + virtual ~FilterEngine_GPU() {} + + virtual void apply(const oclMat &src, oclMat &dst, Rect roi = Rect(0, 0, -1, -1)) = 0; + }; + + //! returns the non-separable filter engine with the specified filter + CV_EXPORTS Ptr createFilter2D_GPU(const Ptr filter2D); + + //! returns the primitive row filter with the specified kernel + CV_EXPORTS Ptr getLinearRowFilter_GPU(int srcType, int bufType, const Mat &rowKernel, + int anchor = -1, int bordertype = BORDER_DEFAULT); + + //! returns the primitive column filter with the specified kernel + CV_EXPORTS Ptr getLinearColumnFilter_GPU(int bufType, int dstType, const Mat &columnKernel, + int anchor = -1, int bordertype = BORDER_DEFAULT, double delta = 0.0); + + //! returns the separable linear filter engine + CV_EXPORTS Ptr createSeparableLinearFilter_GPU(int srcType, int dstType, const Mat &rowKernel, + const Mat &columnKernel, const Point &anchor = Point(-1, -1), double delta = 0.0, int bordertype = BORDER_DEFAULT); + + //! returns the separable filter engine with the specified filters + CV_EXPORTS Ptr createSeparableFilter_GPU(const Ptr &rowFilter, + const Ptr &columnFilter); + + //! returns the Gaussian filter engine + CV_EXPORTS Ptr createGaussianFilter_GPU(int type, Size ksize, double sigma1, double sigma2 = 0, int bordertype = BORDER_DEFAULT); + + //! returns filter engine for the generalized Sobel operator + CV_EXPORTS Ptr createDerivFilter_GPU( int srcType, int dstType, int dx, int dy, int ksize, int borderType = BORDER_DEFAULT ); + + //! applies Laplacian operator to the image + // supports only ksize = 1 and ksize = 3 + CV_EXPORTS void Laplacian(const oclMat &src, oclMat &dst, int ddepth, int ksize = 1, double scale = 1, + double delta=0, int borderType=BORDER_DEFAULT); + + //! returns 2D box filter + // dst type must be the same as source type + CV_EXPORTS Ptr getBoxFilter_GPU(int srcType, int dstType, + const Size &ksize, Point anchor = Point(-1, -1), int borderType = BORDER_DEFAULT); + + //! returns box filter engine + CV_EXPORTS Ptr createBoxFilter_GPU(int srcType, int dstType, const Size &ksize, + const Point &anchor = Point(-1, -1), int borderType = BORDER_DEFAULT); + + //! returns 2D filter with the specified kernel + // supports: dst type must be the same as source type + CV_EXPORTS Ptr getLinearFilter_GPU(int srcType, int dstType, const Mat &kernel, const Size &ksize, + const Point &anchor = Point(-1, -1), int borderType = BORDER_DEFAULT); + + //! returns the non-separable linear filter engine + // supports: dst type must be the same as source type + CV_EXPORTS Ptr createLinearFilter_GPU(int srcType, int dstType, const Mat &kernel, + const Point &anchor = Point(-1, -1), int borderType = BORDER_DEFAULT); + + //! smooths the image using the normalized box filter + CV_EXPORTS void boxFilter(const oclMat &src, oclMat &dst, int ddepth, Size ksize, + Point anchor = Point(-1, -1), int borderType = BORDER_DEFAULT); + + //! returns 2D morphological filter + //! only MORPH_ERODE and MORPH_DILATE are supported + // supports CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4 types + // kernel must have CV_8UC1 type, one rows and cols == ksize.width * ksize.height + CV_EXPORTS Ptr getMorphologyFilter_GPU(int op, int type, const Mat &kernel, const Size &ksize, + Point anchor = Point(-1, -1)); + + //! returns morphological filter engine. Only MORPH_ERODE and MORPH_DILATE are supported. + CV_EXPORTS Ptr createMorphologyFilter_GPU(int op, int type, const Mat &kernel, + const Point &anchor = Point(-1, -1), int iterations = 1); + + //! a synonym for normalized box filter + static inline void blur(const oclMat &src, oclMat &dst, Size ksize, Point anchor = Point(-1, -1), + int borderType = BORDER_CONSTANT) + { + boxFilter(src, dst, -1, ksize, anchor, borderType); + } + + //! applies non-separable 2D linear filter to the image + CV_EXPORTS void filter2D(const oclMat &src, oclMat &dst, int ddepth, const Mat &kernel, + Point anchor = Point(-1, -1), double delta = 0.0, int borderType = BORDER_DEFAULT); + + //! applies separable 2D linear filter to the image + CV_EXPORTS void sepFilter2D(const oclMat &src, oclMat &dst, int ddepth, const Mat &kernelX, const Mat &kernelY, + Point anchor = Point(-1, -1), double delta = 0.0, int bordertype = BORDER_DEFAULT); + + //! applies generalized Sobel operator to the image + // dst.type must equalize src.type + // supports data type: CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4 + // supports border type: BORDER_CONSTANT, BORDER_REPLICATE, BORDER_REFLECT,BORDER_REFLECT_101 + CV_EXPORTS void Sobel(const oclMat &src, oclMat &dst, int ddepth, int dx, int dy, int ksize = 3, double scale = 1, double delta = 0.0, int bordertype = BORDER_DEFAULT); + + //! applies the vertical or horizontal Scharr operator to the image + // dst.type must equalize src.type + // supports data type: CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4 + // supports border type: BORDER_CONSTANT, BORDER_REPLICATE, BORDER_REFLECT,BORDER_REFLECT_101 + CV_EXPORTS void Scharr(const oclMat &src, oclMat &dst, int ddepth, int dx, int dy, double scale = 1, double delta = 0.0, int bordertype = BORDER_DEFAULT); + + //! smooths the image using Gaussian filter. + // dst.type must equalize src.type + // supports data type: CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4 + // supports border type: BORDER_CONSTANT, BORDER_REPLICATE, BORDER_REFLECT,BORDER_REFLECT_101 + CV_EXPORTS void GaussianBlur(const oclMat &src, oclMat &dst, Size ksize, double sigma1, double sigma2 = 0, int bordertype = BORDER_DEFAULT); + + //! erodes the image (applies the local minimum operator) + // supports data type: CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4 + CV_EXPORTS void erode( const oclMat &src, oclMat &dst, const Mat &kernel, Point anchor = Point(-1, -1), int iterations = 1, + + int borderType = BORDER_CONSTANT, const Scalar &borderValue = morphologyDefaultBorderValue()); + + + //! dilates the image (applies the local maximum operator) + // supports data type: CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4 + CV_EXPORTS void dilate( const oclMat &src, oclMat &dst, const Mat &kernel, Point anchor = Point(-1, -1), int iterations = 1, + + int borderType = BORDER_CONSTANT, const Scalar &borderValue = morphologyDefaultBorderValue()); + + + //! applies an advanced morphological operation to the image + CV_EXPORTS void morphologyEx( const oclMat &src, oclMat &dst, int op, const Mat &kernel, Point anchor = Point(-1, -1), int iterations = 1, + + int borderType = BORDER_CONSTANT, const Scalar &borderValue = morphologyDefaultBorderValue()); + + + ////////////////////////////// Image processing ////////////////////////////// + //! Does mean shift filtering on GPU. + CV_EXPORTS void meanShiftFiltering(const oclMat &src, oclMat &dst, int sp, int sr, + TermCriteria criteria = TermCriteria(TermCriteria::MAX_ITER + TermCriteria::EPS, 5, 1)); + + //! Does mean shift procedure on GPU. + CV_EXPORTS void meanShiftProc(const oclMat &src, oclMat &dstr, oclMat &dstsp, int sp, int sr, + TermCriteria criteria = TermCriteria(TermCriteria::MAX_ITER + TermCriteria::EPS, 5, 1)); + + //! Does mean shift segmentation with elimiation of small regions. + CV_EXPORTS void meanShiftSegmentation(const oclMat &src, Mat &dst, int sp, int sr, int minsize, + TermCriteria criteria = TermCriteria(TermCriteria::MAX_ITER + TermCriteria::EPS, 5, 1)); + + //! applies fixed threshold to the image. + // supports CV_8UC1 and CV_32FC1 data type + // supports threshold type: THRESH_BINARY, THRESH_BINARY_INV, THRESH_TRUNC, THRESH_TOZERO, THRESH_TOZERO_INV + CV_EXPORTS double threshold(const oclMat &src, oclMat &dst, double thresh, double maxVal, int type = THRESH_TRUNC); + + //! resizes the image + // Supports INTER_NEAREST, INTER_LINEAR + // supports CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4 types + CV_EXPORTS void resize(const oclMat &src, oclMat &dst, Size dsize, double fx = 0, double fy = 0, int interpolation = INTER_LINEAR); + + //! Applies a generic geometrical transformation to an image. + + // Supports INTER_NEAREST, INTER_LINEAR. + // Map1 supports CV_16SC2, CV_32FC2 types. + // Src supports CV_8UC1, CV_8UC2, CV_8UC4. + CV_EXPORTS void remap(const oclMat &src, oclMat &dst, oclMat &map1, oclMat &map2, int interpolation, int bordertype, const Scalar &value = Scalar()); + + //! copies 2D array to a larger destination array and pads borders with user-specifiable constant + // supports CV_8UC1, CV_8UC4, CV_32SC1 types + CV_EXPORTS void copyMakeBorder(const oclMat &src, oclMat &dst, int top, int bottom, int left, int right, int boardtype, const Scalar &value = Scalar()); + + //! Smoothes image using median filter + // The source 1- or 4-channel image. m should be 3 or 5, the image depth should be CV_8U or CV_32F. + CV_EXPORTS void medianFilter(const oclMat &src, oclMat &dst, int m); + + //! warps the image using affine transformation + // Supports INTER_NEAREST, INTER_LINEAR, INTER_CUBIC + // supports CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4 types + CV_EXPORTS void warpAffine(const oclMat &src, oclMat &dst, const Mat &M, Size dsize, int flags = INTER_LINEAR); + + //! warps the image using perspective transformation + // Supports INTER_NEAREST, INTER_LINEAR, INTER_CUBIC + // supports CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4 types + CV_EXPORTS void warpPerspective(const oclMat &src, oclMat &dst, const Mat &M, Size dsize, int flags = INTER_LINEAR); + + //! computes the integral image and integral for the squared image - // sum will have CV_32S type, sqsum - CV32F type ++ // sum will support CV_32S, CV_32F, sqsum - support CV32F, CV_64F + // supports only CV_8UC1 source type - CV_EXPORTS void integral(const oclMat &src, oclMat &sum, oclMat &sqsum); - CV_EXPORTS void integral(const oclMat &src, oclMat &sum); ++ CV_EXPORTS void integral(const oclMat &src, oclMat &sum, oclMat &sqsum, int sdepth=-1 ); ++ CV_EXPORTS void integral(const oclMat &src, oclMat &sum, int sdepth=-1 ); + CV_EXPORTS void cornerHarris(const oclMat &src, oclMat &dst, int blockSize, int ksize, double k, int bordertype = cv::BORDER_DEFAULT); + CV_EXPORTS void cornerHarris_dxdy(const oclMat &src, oclMat &dst, oclMat &Dx, oclMat &Dy, + int blockSize, int ksize, double k, int bordertype = cv::BORDER_DEFAULT); + CV_EXPORTS void cornerMinEigenVal(const oclMat &src, oclMat &dst, int blockSize, int ksize, int bordertype = cv::BORDER_DEFAULT); + CV_EXPORTS void cornerMinEigenVal_dxdy(const oclMat &src, oclMat &dst, oclMat &Dx, oclMat &Dy, + int blockSize, int ksize, int bordertype = cv::BORDER_DEFAULT); + + + /////////////////////////////////// ML /////////////////////////////////////////// + + //! Compute closest centers for each lines in source and lable it after center's index + // supports CV_32FC1/CV_32FC2/CV_32FC4 data type + // supports NORM_L1 and NORM_L2 distType + // if indices is provided, only the indexed rows will be calculated and their results are in the same + // order of indices + CV_EXPORTS void distanceToCenters(const oclMat &src, const oclMat ¢ers, Mat &dists, Mat &labels, int distType = NORM_L2SQR); + + //!Does k-means procedure on GPU + // supports CV_32FC1/CV_32FC2/CV_32FC4 data type + CV_EXPORTS double kmeans(const oclMat &src, int K, oclMat &bestLabels, + TermCriteria criteria, int attemps, int flags, oclMat ¢ers); + + + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + ///////////////////////////////////////////CascadeClassifier////////////////////////////////////////////////////////////////// + /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + class CV_EXPORTS OclCascadeClassifier : public cv::CascadeClassifier + { + public: + void detectMultiScale(oclMat &image, CV_OUT std::vector& faces, + double scaleFactor = 1.1, int minNeighbors = 3, int flags = 0, + Size minSize = Size(), Size maxSize = Size()); + }; + + /////////////////////////////// Pyramid ///////////////////////////////////// + CV_EXPORTS void pyrDown(const oclMat &src, oclMat &dst); + + //! upsamples the source image and then smoothes it + CV_EXPORTS void pyrUp(const oclMat &src, oclMat &dst); + + //! performs linear blending of two images + //! to avoid accuracy errors sum of weigths shouldn't be very close to zero + // supports only CV_8UC1 source type + CV_EXPORTS void blendLinear(const oclMat &img1, const oclMat &img2, const oclMat &weights1, const oclMat &weights2, oclMat &result); + + //! computes vertical sum, supports only CV_32FC1 images + CV_EXPORTS void columnSum(const oclMat &src, oclMat &sum); + + ///////////////////////////////////////// match_template ///////////////////////////////////////////////////////////// + struct CV_EXPORTS MatchTemplateBuf + { + Size user_block_size; + oclMat imagef, templf; + std::vector images; + std::vector image_sums; + std::vector image_sqsums; + }; + + //! computes the proximity map for the raster template and the image where the template is searched for + // Supports TM_SQDIFF, TM_SQDIFF_NORMED, TM_CCORR, TM_CCORR_NORMED, TM_CCOEFF, TM_CCOEFF_NORMED for type 8UC1 and 8UC4 + // Supports TM_SQDIFF, TM_CCORR for type 32FC1 and 32FC4 + CV_EXPORTS void matchTemplate(const oclMat &image, const oclMat &templ, oclMat &result, int method); + + //! computes the proximity map for the raster template and the image where the template is searched for + // Supports TM_SQDIFF, TM_SQDIFF_NORMED, TM_CCORR, TM_CCORR_NORMED, TM_CCOEFF, TM_CCOEFF_NORMED for type 8UC1 and 8UC4 + // Supports TM_SQDIFF, TM_CCORR for type 32FC1 and 32FC4 + CV_EXPORTS void matchTemplate(const oclMat &image, const oclMat &templ, oclMat &result, int method, MatchTemplateBuf &buf); + + + + ///////////////////////////////////////////// Canny ///////////////////////////////////////////// + struct CV_EXPORTS CannyBuf; + + //! compute edges of the input image using Canny operator + // Support CV_8UC1 only + CV_EXPORTS void Canny(const oclMat &image, oclMat &edges, double low_thresh, double high_thresh, int apperture_size = 3, bool L2gradient = false); + CV_EXPORTS void Canny(const oclMat &image, CannyBuf &buf, oclMat &edges, double low_thresh, double high_thresh, int apperture_size = 3, bool L2gradient = false); + CV_EXPORTS void Canny(const oclMat &dx, const oclMat &dy, oclMat &edges, double low_thresh, double high_thresh, bool L2gradient = false); + CV_EXPORTS void Canny(const oclMat &dx, const oclMat &dy, CannyBuf &buf, oclMat &edges, double low_thresh, double high_thresh, bool L2gradient = false); + + struct CV_EXPORTS CannyBuf + { + CannyBuf() : counter(1, 1, CV_32S) { } + ~CannyBuf() + { + release(); + } + explicit CannyBuf(const Size &image_size, int apperture_size = 3) : counter(1, 1, CV_32S) + { + create(image_size, apperture_size); + } + CannyBuf(const oclMat &dx_, const oclMat &dy_); + void create(const Size &image_size, int apperture_size = 3); + void release(); + + oclMat dx, dy; + oclMat dx_buf, dy_buf; + oclMat magBuf, mapBuf; + oclMat trackBuf1, trackBuf2; + oclMat counter; + Ptr filterDX, filterDY; + }; + + ///////////////////////////////////////// Hough Transform ///////////////////////////////////////// + //! HoughCircles + struct HoughCirclesBuf + { + oclMat edges; + oclMat accum; + oclMat srcPoints; + oclMat centers; + CannyBuf cannyBuf; + }; + + CV_EXPORTS void HoughCircles(const oclMat& src, oclMat& circles, int method, float dp, float minDist, int cannyThreshold, int votesThreshold, int minRadius, int maxRadius, int maxCircles = 4096); + CV_EXPORTS void HoughCircles(const oclMat& src, oclMat& circles, HoughCirclesBuf& buf, int method, float dp, float minDist, int cannyThreshold, int votesThreshold, int minRadius, int maxRadius, int maxCircles = 4096); + CV_EXPORTS void HoughCirclesDownload(const oclMat& d_circles, OutputArray h_circles); + + + ///////////////////////////////////////// clAmdFft related ///////////////////////////////////////// + //! Performs a forward or inverse discrete Fourier transform (1D or 2D) of floating point matrix. + //! Param dft_size is the size of DFT transform. + //! + //! For complex-to-real transform it is assumed that the source matrix is packed in CLFFT's format. + // support src type of CV32FC1, CV32FC2 + // support flags: DFT_INVERSE, DFT_REAL_OUTPUT, DFT_COMPLEX_OUTPUT, DFT_ROWS + // dft_size is the size of original input, which is used for transformation from complex to real. + // dft_size must be powers of 2, 3 and 5 + // real to complex dft requires at least v1.8 clAmdFft + // real to complex dft output is not the same with cpu version + // real to complex and complex to real does not support DFT_ROWS + CV_EXPORTS void dft(const oclMat &src, oclMat &dst, Size dft_size = Size(), int flags = 0); + + //! implements generalized matrix product algorithm GEMM from BLAS + // The functionality requires clAmdBlas library + // only support type CV_32FC1 + // flag GEMM_3_T is not supported + CV_EXPORTS void gemm(const oclMat &src1, const oclMat &src2, double alpha, + const oclMat &src3, double beta, oclMat &dst, int flags = 0); + + //////////////// HOG (Histogram-of-Oriented-Gradients) Descriptor and Object Detector ////////////// + + struct CV_EXPORTS HOGDescriptor + + { + + enum { DEFAULT_WIN_SIGMA = -1 }; + + enum { DEFAULT_NLEVELS = 64 }; + + enum { DESCR_FORMAT_ROW_BY_ROW, DESCR_FORMAT_COL_BY_COL }; + + + + HOGDescriptor(Size win_size = Size(64, 128), Size block_size = Size(16, 16), + + Size block_stride = Size(8, 8), Size cell_size = Size(8, 8), + + int nbins = 9, double win_sigma = DEFAULT_WIN_SIGMA, + + double threshold_L2hys = 0.2, bool gamma_correction = true, + + int nlevels = DEFAULT_NLEVELS); + + + + size_t getDescriptorSize() const; + + size_t getBlockHistogramSize() const; + + + + void setSVMDetector(const std::vector &detector); + + + + static std::vector getDefaultPeopleDetector(); + + static std::vector getPeopleDetector48x96(); + + static std::vector getPeopleDetector64x128(); + + + + void detect(const oclMat &img, std::vector &found_locations, + + double hit_threshold = 0, Size win_stride = Size(), + + Size padding = Size()); + + + + void detectMultiScale(const oclMat &img, std::vector &found_locations, + + double hit_threshold = 0, Size win_stride = Size(), + + Size padding = Size(), double scale0 = 1.05, + + int group_threshold = 2); + + + + void getDescriptors(const oclMat &img, Size win_stride, + + oclMat &descriptors, + + int descr_format = DESCR_FORMAT_COL_BY_COL); + + + + Size win_size; + + Size block_size; + + Size block_stride; + + Size cell_size; + + int nbins; + + double win_sigma; + + double threshold_L2hys; + + bool gamma_correction; + + int nlevels; + + + + protected: + + // initialize buffers; only need to do once in case of multiscale detection + + void init_buffer(const oclMat &img, Size win_stride); + + + + void computeBlockHistograms(const oclMat &img); + + void computeGradient(const oclMat &img, oclMat &grad, oclMat &qangle); + + + + double getWinSigma() const; + + bool checkDetectorSize() const; + + + + static int numPartsWithin(int size, int part_size, int stride); + + static Size numPartsWithin(Size size, Size part_size, Size stride); + + + + // Coefficients of the separating plane + + float free_coef; + + oclMat detector; + + + + // Results of the last classification step + + oclMat labels; + + Mat labels_host; + + + + // Results of the last histogram evaluation step + + oclMat block_hists; + + + + // Gradients conputation results + + oclMat grad, qangle; + + + + // scaled image + + oclMat image_scale; + + + + // effect size of input image (might be different from original size after scaling) + + Size effect_size; + + }; + + + ////////////////////////feature2d_ocl///////////////// + /****************************************************************************************\ + * Distance * + \****************************************************************************************/ + template + struct CV_EXPORTS Accumulator + { + typedef T Type; + }; + template<> struct Accumulator + { + typedef float Type; + }; + template<> struct Accumulator + { + typedef float Type; + }; + template<> struct Accumulator + { + typedef float Type; + }; + template<> struct Accumulator + { + typedef float Type; + }; + + /* + * Manhattan distance (city block distance) functor + */ + template + struct CV_EXPORTS L1 + { + enum { normType = NORM_L1 }; + typedef T ValueType; + typedef typename Accumulator::Type ResultType; + + ResultType operator()( const T *a, const T *b, int size ) const + { + return normL1(a, b, size); + } + }; + + /* + * Euclidean distance functor + */ + template + struct CV_EXPORTS L2 + { + enum { normType = NORM_L2 }; + typedef T ValueType; + typedef typename Accumulator::Type ResultType; + + ResultType operator()( const T *a, const T *b, int size ) const + { + return (ResultType)std::sqrt((double)normL2Sqr(a, b, size)); + } + }; + + /* + * Hamming distance functor - counts the bit differences between two strings - useful for the Brief descriptor + * bit count of A exclusive XOR'ed with B + */ + struct CV_EXPORTS Hamming + { + enum { normType = NORM_HAMMING }; + typedef unsigned char ValueType; + typedef int ResultType; + + /** this will count the bits in a ^ b + */ + ResultType operator()( const unsigned char *a, const unsigned char *b, int size ) const + { + return normHamming(a, b, size); + } + }; + + ////////////////////////////////// BruteForceMatcher ////////////////////////////////// + + class CV_EXPORTS BruteForceMatcher_OCL_base + { + public: + enum DistType {L1Dist = 0, L2Dist, HammingDist}; + explicit BruteForceMatcher_OCL_base(DistType distType = L2Dist); + + // Add descriptors to train descriptor collection + void add(const std::vector &descCollection); + + // Get train descriptors collection + const std::vector &getTrainDescriptors() const; + + // Clear train descriptors collection + void clear(); + + // Return true if there are not train descriptors in collection + bool empty() const; + + // Return true if the matcher supports mask in match methods + bool isMaskSupported() const; + + // Find one best match for each query descriptor + void matchSingle(const oclMat &query, const oclMat &train, + oclMat &trainIdx, oclMat &distance, + const oclMat &mask = oclMat()); + + // Download trainIdx and distance and convert it to CPU vector with DMatch + static void matchDownload(const oclMat &trainIdx, const oclMat &distance, std::vector &matches); + // Convert trainIdx and distance to vector with DMatch + static void matchConvert(const Mat &trainIdx, const Mat &distance, std::vector &matches); + + // Find one best match for each query descriptor + void match(const oclMat &query, const oclMat &train, std::vector &matches, const oclMat &mask = oclMat()); + + // Make gpu collection of trains and masks in suitable format for matchCollection function + void makeGpuCollection(oclMat &trainCollection, oclMat &maskCollection, const std::vector &masks = std::vector()); + + // Find one best match from train collection for each query descriptor + void matchCollection(const oclMat &query, const oclMat &trainCollection, + oclMat &trainIdx, oclMat &imgIdx, oclMat &distance, + const oclMat &masks = oclMat()); + + // Download trainIdx, imgIdx and distance and convert it to vector with DMatch + static void matchDownload(const oclMat &trainIdx, const oclMat &imgIdx, const oclMat &distance, std::vector &matches); + // Convert trainIdx, imgIdx and distance to vector with DMatch + static void matchConvert(const Mat &trainIdx, const Mat &imgIdx, const Mat &distance, std::vector &matches); + + // Find one best match from train collection for each query descriptor. + void match(const oclMat &query, std::vector &matches, const std::vector &masks = std::vector()); + + // Find k best matches for each query descriptor (in increasing order of distances) + void knnMatchSingle(const oclMat &query, const oclMat &train, + oclMat &trainIdx, oclMat &distance, oclMat &allDist, int k, + const oclMat &mask = oclMat()); + + // Download trainIdx and distance and convert it to vector with DMatch + // compactResult is used when mask is not empty. If compactResult is false matches + // vector will have the same size as queryDescriptors rows. If compactResult is true + // matches vector will not contain matches for fully masked out query descriptors. + static void knnMatchDownload(const oclMat &trainIdx, const oclMat &distance, + std::vector< std::vector > &matches, bool compactResult = false); + // Convert trainIdx and distance to vector with DMatch + static void knnMatchConvert(const Mat &trainIdx, const Mat &distance, + std::vector< std::vector > &matches, bool compactResult = false); + + // Find k best matches for each query descriptor (in increasing order of distances). + // compactResult is used when mask is not empty. If compactResult is false matches + // vector will have the same size as queryDescriptors rows. If compactResult is true + // matches vector will not contain matches for fully masked out query descriptors. + void knnMatch(const oclMat &query, const oclMat &train, + std::vector< std::vector > &matches, int k, const oclMat &mask = oclMat(), + bool compactResult = false); + + // Find k best matches from train collection for each query descriptor (in increasing order of distances) + void knnMatch2Collection(const oclMat &query, const oclMat &trainCollection, + oclMat &trainIdx, oclMat &imgIdx, oclMat &distance, + const oclMat &maskCollection = oclMat()); + + // Download trainIdx and distance and convert it to vector with DMatch + // compactResult is used when mask is not empty. If compactResult is false matches + // vector will have the same size as queryDescriptors rows. If compactResult is true + // matches vector will not contain matches for fully masked out query descriptors. + static void knnMatch2Download(const oclMat &trainIdx, const oclMat &imgIdx, const oclMat &distance, + std::vector< std::vector > &matches, bool compactResult = false); + // Convert trainIdx and distance to vector with DMatch + static void knnMatch2Convert(const Mat &trainIdx, const Mat &imgIdx, const Mat &distance, + std::vector< std::vector > &matches, bool compactResult = false); + + // Find k best matches for each query descriptor (in increasing order of distances). + // compactResult is used when mask is not empty. If compactResult is false matches + // vector will have the same size as queryDescriptors rows. If compactResult is true + // matches vector will not contain matches for fully masked out query descriptors. + void knnMatch(const oclMat &query, std::vector< std::vector > &matches, int k, + const std::vector &masks = std::vector(), bool compactResult = false); + + // Find best matches for each query descriptor which have distance less than maxDistance. + // nMatches.at(0, queryIdx) will contain matches count for queryIdx. + // carefully nMatches can be greater than trainIdx.cols - it means that matcher didn't find all matches, + // because it didn't have enough memory. + // If trainIdx is empty, then trainIdx and distance will be created with size nQuery x max((nTrain / 100), 10), + // otherwize user can pass own allocated trainIdx and distance with size nQuery x nMaxMatches + // Matches doesn't sorted. + void radiusMatchSingle(const oclMat &query, const oclMat &train, + oclMat &trainIdx, oclMat &distance, oclMat &nMatches, float maxDistance, + const oclMat &mask = oclMat()); + + // Download trainIdx, nMatches and distance and convert it to vector with DMatch. + // matches will be sorted in increasing order of distances. + // compactResult is used when mask is not empty. If compactResult is false matches + // vector will have the same size as queryDescriptors rows. If compactResult is true + // matches vector will not contain matches for fully masked out query descriptors. + static void radiusMatchDownload(const oclMat &trainIdx, const oclMat &distance, const oclMat &nMatches, + std::vector< std::vector > &matches, bool compactResult = false); + // Convert trainIdx, nMatches and distance to vector with DMatch. + static void radiusMatchConvert(const Mat &trainIdx, const Mat &distance, const Mat &nMatches, + std::vector< std::vector > &matches, bool compactResult = false); + + // Find best matches for each query descriptor which have distance less than maxDistance + // in increasing order of distances). + void radiusMatch(const oclMat &query, const oclMat &train, + std::vector< std::vector > &matches, float maxDistance, + const oclMat &mask = oclMat(), bool compactResult = false); + + // Find best matches for each query descriptor which have distance less than maxDistance. + // If trainIdx is empty, then trainIdx and distance will be created with size nQuery x max((nQuery / 100), 10), + // otherwize user can pass own allocated trainIdx and distance with size nQuery x nMaxMatches + // Matches doesn't sorted. + void radiusMatchCollection(const oclMat &query, oclMat &trainIdx, oclMat &imgIdx, oclMat &distance, oclMat &nMatches, float maxDistance, + const std::vector &masks = std::vector()); + + // Download trainIdx, imgIdx, nMatches and distance and convert it to vector with DMatch. + // matches will be sorted in increasing order of distances. + // compactResult is used when mask is not empty. If compactResult is false matches + // vector will have the same size as queryDescriptors rows. If compactResult is true + // matches vector will not contain matches for fully masked out query descriptors. + static void radiusMatchDownload(const oclMat &trainIdx, const oclMat &imgIdx, const oclMat &distance, const oclMat &nMatches, + std::vector< std::vector > &matches, bool compactResult = false); + // Convert trainIdx, nMatches and distance to vector with DMatch. + static void radiusMatchConvert(const Mat &trainIdx, const Mat &imgIdx, const Mat &distance, const Mat &nMatches, + std::vector< std::vector > &matches, bool compactResult = false); + + // Find best matches from train collection for each query descriptor which have distance less than + // maxDistance (in increasing order of distances). + void radiusMatch(const oclMat &query, std::vector< std::vector > &matches, float maxDistance, + const std::vector &masks = std::vector(), bool compactResult = false); + + DistType distType; + + private: + std::vector trainDescCollection; + }; + + template + class CV_EXPORTS BruteForceMatcher_OCL; + + template + class CV_EXPORTS BruteForceMatcher_OCL< L1 > : public BruteForceMatcher_OCL_base + { + public: + explicit BruteForceMatcher_OCL() : BruteForceMatcher_OCL_base(L1Dist) {} + explicit BruteForceMatcher_OCL(L1 /*d*/) : BruteForceMatcher_OCL_base(L1Dist) {} + }; + template + class CV_EXPORTS BruteForceMatcher_OCL< L2 > : public BruteForceMatcher_OCL_base + { + public: + explicit BruteForceMatcher_OCL() : BruteForceMatcher_OCL_base(L2Dist) {} + explicit BruteForceMatcher_OCL(L2 /*d*/) : BruteForceMatcher_OCL_base(L2Dist) {} + }; + template <> class CV_EXPORTS BruteForceMatcher_OCL< Hamming > : public BruteForceMatcher_OCL_base + { + public: + explicit BruteForceMatcher_OCL() : BruteForceMatcher_OCL_base(HammingDist) {} + explicit BruteForceMatcher_OCL(Hamming /*d*/) : BruteForceMatcher_OCL_base(HammingDist) {} + }; + + class CV_EXPORTS BFMatcher_OCL : public BruteForceMatcher_OCL_base + { + public: + explicit BFMatcher_OCL(int norm = NORM_L2) : BruteForceMatcher_OCL_base(norm == NORM_L1 ? L1Dist : norm == NORM_L2 ? L2Dist : HammingDist) {} + }; + + class CV_EXPORTS GoodFeaturesToTrackDetector_OCL + { + public: + explicit GoodFeaturesToTrackDetector_OCL(int maxCorners = 1000, double qualityLevel = 0.01, double minDistance = 0.0, + int blockSize = 3, bool useHarrisDetector = false, double harrisK = 0.04); + + //! return 1 rows matrix with CV_32FC2 type + void operator ()(const oclMat& image, oclMat& corners, const oclMat& mask = oclMat()); + //! download points of type Point2f to a vector. the vector's content will be erased + void downloadPoints(const oclMat &points, std::vector &points_v); + + int maxCorners; + double qualityLevel; + double minDistance; + + int blockSize; + bool useHarrisDetector; + double harrisK; + void releaseMemory() + { + Dx_.release(); + Dy_.release(); + eig_.release(); + minMaxbuf_.release(); + tmpCorners_.release(); + } + private: + oclMat Dx_; + oclMat Dy_; + oclMat eig_; + oclMat minMaxbuf_; + oclMat tmpCorners_; + }; + + inline GoodFeaturesToTrackDetector_OCL::GoodFeaturesToTrackDetector_OCL(int maxCorners_, double qualityLevel_, double minDistance_, + int blockSize_, bool useHarrisDetector_, double harrisK_) + { + maxCorners = maxCorners_; + qualityLevel = qualityLevel_; + minDistance = minDistance_; + blockSize = blockSize_; + useHarrisDetector = useHarrisDetector_; + harrisK = harrisK_; + } + + /////////////////////////////// PyrLKOpticalFlow ///////////////////////////////////// + + class CV_EXPORTS PyrLKOpticalFlow + { + public: + PyrLKOpticalFlow() + { + winSize = Size(21, 21); + maxLevel = 3; + iters = 30; + derivLambda = 0.5; + useInitialFlow = false; + minEigThreshold = 1e-4f; + getMinEigenVals = false; + isDeviceArch11_ = false; + } + + void sparse(const oclMat &prevImg, const oclMat &nextImg, const oclMat &prevPts, oclMat &nextPts, + oclMat &status, oclMat *err = 0); + + void dense(const oclMat &prevImg, const oclMat &nextImg, oclMat &u, oclMat &v, oclMat *err = 0); + + Size winSize; + int maxLevel; + int iters; + double derivLambda; + bool useInitialFlow; + float minEigThreshold; + bool getMinEigenVals; + + void releaseMemory() + { + dx_calcBuf_.release(); + dy_calcBuf_.release(); + + prevPyr_.clear(); + nextPyr_.clear(); + + dx_buf_.release(); + dy_buf_.release(); + } + + private: + void calcSharrDeriv(const oclMat &src, oclMat &dx, oclMat &dy); + + void buildImagePyramid(const oclMat &img0, std::vector &pyr, bool withBorder); + + oclMat dx_calcBuf_; + oclMat dy_calcBuf_; + + std::vector prevPyr_; + std::vector nextPyr_; + + oclMat dx_buf_; + oclMat dy_buf_; + + oclMat uPyr_[2]; + oclMat vPyr_[2]; + + bool isDeviceArch11_; + }; + + class CV_EXPORTS FarnebackOpticalFlow + { + public: + FarnebackOpticalFlow(); + + int numLevels; + double pyrScale; + bool fastPyramids; + int winSize; + int numIters; + int polyN; + double polySigma; + int flags; + + void operator ()(const oclMat &frame0, const oclMat &frame1, oclMat &flowx, oclMat &flowy); + + void releaseMemory(); + + private: + void prepareGaussian( + int n, double sigma, float *g, float *xg, float *xxg, + double &ig11, double &ig03, double &ig33, double &ig55); + + void setPolynomialExpansionConsts(int n, double sigma); + + void updateFlow_boxFilter( + const oclMat& R0, const oclMat& R1, oclMat& flowx, oclMat &flowy, + oclMat& M, oclMat &bufM, int blockSize, bool updateMatrices); + + void updateFlow_gaussianBlur( + const oclMat& R0, const oclMat& R1, oclMat& flowx, oclMat& flowy, + oclMat& M, oclMat &bufM, int blockSize, bool updateMatrices); + + oclMat frames_[2]; + oclMat pyrLevel_[2], M_, bufM_, R_[2], blurredFrame_[2]; + std::vector pyramid0_, pyramid1_; + }; + + //////////////// build warping maps //////////////////// + //! builds plane warping maps + CV_EXPORTS void buildWarpPlaneMaps(Size src_size, Rect dst_roi, const Mat &K, const Mat &R, const Mat &T, float scale, oclMat &map_x, oclMat &map_y); + //! builds cylindrical warping maps + CV_EXPORTS void buildWarpCylindricalMaps(Size src_size, Rect dst_roi, const Mat &K, const Mat &R, float scale, oclMat &map_x, oclMat &map_y); + //! builds spherical warping maps + CV_EXPORTS void buildWarpSphericalMaps(Size src_size, Rect dst_roi, const Mat &K, const Mat &R, float scale, oclMat &map_x, oclMat &map_y); + //! builds Affine warping maps + CV_EXPORTS void buildWarpAffineMaps(const Mat &M, bool inverse, Size dsize, oclMat &xmap, oclMat &ymap); + + //! builds Perspective warping maps + CV_EXPORTS void buildWarpPerspectiveMaps(const Mat &M, bool inverse, Size dsize, oclMat &xmap, oclMat &ymap); + + ///////////////////////////////////// interpolate frames ////////////////////////////////////////////// + //! Interpolate frames (images) using provided optical flow (displacement field). + //! frame0 - frame 0 (32-bit floating point images, single channel) + //! frame1 - frame 1 (the same type and size) + //! fu - forward horizontal displacement + //! fv - forward vertical displacement + //! bu - backward horizontal displacement + //! bv - backward vertical displacement + //! pos - new frame position + //! newFrame - new frame + //! buf - temporary buffer, will have width x 6*height size, CV_32FC1 type and contain 6 oclMat; + //! occlusion masks 0, occlusion masks 1, + //! interpolated forward flow 0, interpolated forward flow 1, + //! interpolated backward flow 0, interpolated backward flow 1 + //! + CV_EXPORTS void interpolateFrames(const oclMat &frame0, const oclMat &frame1, + const oclMat &fu, const oclMat &fv, + const oclMat &bu, const oclMat &bv, + float pos, oclMat &newFrame, oclMat &buf); + + //! computes moments of the rasterized shape or a vector of points + //! _array should be a vector a points standing for the contour + CV_EXPORTS Moments ocl_moments(InputArray contour); + //! src should be a general image uploaded to the GPU. + //! the supported oclMat type are CV_8UC1, CV_16UC1, CV_16SC1, CV_32FC1 and CV_64FC1 + //! to use type of CV_64FC1, the GPU should support CV_64FC1 + CV_EXPORTS Moments ocl_moments(oclMat& src, bool binary); + + class CV_EXPORTS StereoBM_OCL + { + public: + enum { BASIC_PRESET = 0, PREFILTER_XSOBEL = 1 }; + + enum { DEFAULT_NDISP = 64, DEFAULT_WINSZ = 19 }; + + //! the default constructor + StereoBM_OCL(); + //! the full constructor taking the camera-specific preset, number of disparities and the SAD window size. ndisparities must be multiple of 8. + StereoBM_OCL(int preset, int ndisparities = DEFAULT_NDISP, int winSize = DEFAULT_WINSZ); + + //! the stereo correspondence operator. Finds the disparity for the specified rectified stereo pair + //! Output disparity has CV_8U type. + void operator() ( const oclMat &left, const oclMat &right, oclMat &disparity); + + //! Some heuristics that tries to estmate + // if current GPU will be faster then CPU in this algorithm. + // It queries current active device. + static bool checkIfGpuCallReasonable(); + + int preset; + int ndisp; + int winSize; + + // If avergeTexThreshold == 0 => post procesing is disabled + // If avergeTexThreshold != 0 then disparity is set 0 in each point (x,y) where for left image + // SumOfHorizontalGradiensInWindow(x, y, winSize) < (winSize * winSize) * avergeTexThreshold + // i.e. input left image is low textured. + float avergeTexThreshold; + private: + oclMat minSSD, leBuf, riBuf; + }; + + class CV_EXPORTS StereoBeliefPropagation + { + public: + enum { DEFAULT_NDISP = 64 }; + enum { DEFAULT_ITERS = 5 }; + enum { DEFAULT_LEVELS = 5 }; + static void estimateRecommendedParams(int width, int height, int &ndisp, int &iters, int &levels); + explicit StereoBeliefPropagation(int ndisp = DEFAULT_NDISP, + int iters = DEFAULT_ITERS, + int levels = DEFAULT_LEVELS, + int msg_type = CV_16S); + StereoBeliefPropagation(int ndisp, int iters, int levels, + float max_data_term, float data_weight, + float max_disc_term, float disc_single_jump, + int msg_type = CV_32F); + void operator()(const oclMat &left, const oclMat &right, oclMat &disparity); + void operator()(const oclMat &data, oclMat &disparity); + int ndisp; + int iters; + int levels; + float max_data_term; + float data_weight; + float max_disc_term; + float disc_single_jump; + int msg_type; + private: + oclMat u, d, l, r, u2, d2, l2, r2; + std::vector datas; + oclMat out; + }; + + class CV_EXPORTS StereoConstantSpaceBP + { + public: + enum { DEFAULT_NDISP = 128 }; + enum { DEFAULT_ITERS = 8 }; + enum { DEFAULT_LEVELS = 4 }; + enum { DEFAULT_NR_PLANE = 4 }; + static void estimateRecommendedParams(int width, int height, int &ndisp, int &iters, int &levels, int &nr_plane); + explicit StereoConstantSpaceBP( + int ndisp = DEFAULT_NDISP, + int iters = DEFAULT_ITERS, + int levels = DEFAULT_LEVELS, + int nr_plane = DEFAULT_NR_PLANE, + int msg_type = CV_32F); + StereoConstantSpaceBP(int ndisp, int iters, int levels, int nr_plane, + float max_data_term, float data_weight, float max_disc_term, float disc_single_jump, + int min_disp_th = 0, + int msg_type = CV_32F); + void operator()(const oclMat &left, const oclMat &right, oclMat &disparity); + int ndisp; + int iters; + int levels; + int nr_plane; + float max_data_term; + float data_weight; + float max_disc_term; + float disc_single_jump; + int min_disp_th; + int msg_type; + bool use_local_init_data_cost; + private: + oclMat u[2], d[2], l[2], r[2]; + oclMat disp_selected_pyr[2]; + oclMat data_cost; + oclMat data_cost_selected; + oclMat temp; + oclMat out; + }; + + // Implementation of the Zach, Pock and Bischof Dual TV-L1 Optical Flow method + // + // see reference: + // [1] C. Zach, T. Pock and H. Bischof, "A Duality Based Approach for Realtime TV-L1 Optical Flow". + // [2] Javier Sanchez, Enric Meinhardt-Llopis and Gabriele Facciolo. "TV-L1 Optical Flow Estimation". + class CV_EXPORTS OpticalFlowDual_TVL1_OCL + { + public: + OpticalFlowDual_TVL1_OCL(); + + void operator ()(const oclMat& I0, const oclMat& I1, oclMat& flowx, oclMat& flowy); + + void collectGarbage(); + + /** + * Time step of the numerical scheme. + */ + double tau; + + /** + * Weight parameter for the data term, attachment parameter. + * This is the most relevant parameter, which determines the smoothness of the output. + * The smaller this parameter is, the smoother the solutions we obtain. + * It depends on the range of motions of the images, so its value should be adapted to each image sequence. + */ + double lambda; + + /** + * Weight parameter for (u - v)^2, tightness parameter. + * It serves as a link between the attachment and the regularization terms. + * In theory, it should have a small value in order to maintain both parts in correspondence. + * The method is stable for a large range of values of this parameter. + */ + double theta; + + /** + * Number of scales used to create the pyramid of images. + */ + int nscales; + + /** + * Number of warpings per scale. + * Represents the number of times that I1(x+u0) and grad( I1(x+u0) ) are computed per scale. + * This is a parameter that assures the stability of the method. + * It also affects the running time, so it is a compromise between speed and accuracy. + */ + int warps; + + /** + * Stopping criterion threshold used in the numerical scheme, which is a trade-off between precision and running time. + * A small value will yield more accurate solutions at the expense of a slower convergence. + */ + double epsilon; + + /** + * Stopping criterion iterations number used in the numerical scheme. + */ + int iterations; + + bool useInitialFlow; + + private: + void procOneScale(const oclMat& I0, const oclMat& I1, oclMat& u1, oclMat& u2); + + std::vector I0s; + std::vector I1s; + std::vector u1s; + std::vector u2s; + + oclMat I1x_buf; + oclMat I1y_buf; + + oclMat I1w_buf; + oclMat I1wx_buf; + oclMat I1wy_buf; + + oclMat grad_buf; + oclMat rho_c_buf; + + oclMat p11_buf; + oclMat p12_buf; + oclMat p21_buf; + oclMat p22_buf; + + oclMat diff_buf; + oclMat norm_buf; + }; + // current supported sorting methods + enum + { + SORT_BITONIC, // only support power-of-2 buffer size + SORT_SELECTION, // cannot sort duplicate keys + SORT_MERGE, + SORT_RADIX // only support signed int/float keys(CV_32S/CV_32F) + }; + //! Returns the sorted result of all the elements in input based on equivalent keys. + // + // The element unit in the values to be sorted is determined from the data type, + // i.e., a CV_32FC2 input {a1a2, b1b2} will be considered as two elements, regardless its + // matrix dimension. + // both keys and values will be sorted inplace + // Key needs to be single channel oclMat. + // + // Example: + // input - + // keys = {2, 3, 1} (CV_8UC1) + // values = {10,5, 4,3, 6,2} (CV_8UC2) + // sortByKey(keys, values, SORT_SELECTION, false); + // output - + // keys = {1, 2, 3} (CV_8UC1) + // values = {6,2, 10,5, 4,3} (CV_8UC2) + CV_EXPORTS void sortByKey(oclMat& keys, oclMat& values, int method, bool isGreaterThan = false); + /*!Base class for MOG and MOG2!*/ + class CV_EXPORTS BackgroundSubtractor + { + public: + //! the virtual destructor + virtual ~BackgroundSubtractor(); + //! the update operator that takes the next video frame and returns the current foreground mask as 8-bit binary image. + virtual void operator()(const oclMat& image, oclMat& fgmask, float learningRate); + + //! computes a background image + virtual void getBackgroundImage(oclMat& backgroundImage) const = 0; + }; + /*! + Gaussian Mixture-based Backbround/Foreground Segmentation Algorithm + + The class implements the following algorithm: + "An improved adaptive background mixture model for real-time tracking with shadow detection" + P. KadewTraKuPong and R. Bowden, + Proc. 2nd European Workshp on Advanced Video-Based Surveillance Systems, 2001." + http://personal.ee.surrey.ac.uk/Personal/R.Bowden/publications/avbs01/avbs01.pdf + */ + class CV_EXPORTS MOG: public cv::ocl::BackgroundSubtractor + { + public: + //! the default constructor + MOG(int nmixtures = -1); + + //! re-initiaization method + void initialize(Size frameSize, int frameType); + + //! the update operator + void operator()(const oclMat& frame, oclMat& fgmask, float learningRate = 0.f); + + //! computes a background image which are the mean of all background gaussians + void getBackgroundImage(oclMat& backgroundImage) const; + + //! releases all inner buffers + void release(); + + int history; + float varThreshold; + float backgroundRatio; + float noiseSigma; + + private: + int nmixtures_; + + Size frameSize_; + int frameType_; + int nframes_; + + oclMat weight_; + oclMat sortKey_; + oclMat mean_; + oclMat var_; + }; + + /*! + The class implements the following algorithm: + "Improved adaptive Gausian mixture model for background subtraction" + Z.Zivkovic + International Conference Pattern Recognition, UK, August, 2004. + http://www.zoranz.net/Publications/zivkovic2004ICPR.pdf + */ + class CV_EXPORTS MOG2: public cv::ocl::BackgroundSubtractor + { + public: + //! the default constructor + MOG2(int nmixtures = -1); + + //! re-initiaization method + void initialize(Size frameSize, int frameType); + + //! the update operator + void operator()(const oclMat& frame, oclMat& fgmask, float learningRate = -1.0f); + + //! computes a background image which are the mean of all background gaussians + void getBackgroundImage(oclMat& backgroundImage) const; + + //! releases all inner buffers + void release(); + + // parameters + // you should call initialize after parameters changes + + int history; + + //! here it is the maximum allowed number of mixture components. + //! Actual number is determined dynamically per pixel + float varThreshold; + // threshold on the squared Mahalanobis distance to decide if it is well described + // by the background model or not. Related to Cthr from the paper. + // This does not influence the update of the background. A typical value could be 4 sigma + // and that is varThreshold=4*4=16; Corresponds to Tb in the paper. + + ///////////////////////// + // less important parameters - things you might change but be carefull + //////////////////////// + + float backgroundRatio; + // corresponds to fTB=1-cf from the paper + // TB - threshold when the component becomes significant enough to be included into + // the background model. It is the TB=1-cf from the paper. So I use cf=0.1 => TB=0. + // For alpha=0.001 it means that the mode should exist for approximately 105 frames before + // it is considered foreground + // float noiseSigma; + float varThresholdGen; + + //correspondts to Tg - threshold on the squared Mahalan. dist. to decide + //when a sample is close to the existing components. If it is not close + //to any a new component will be generated. I use 3 sigma => Tg=3*3=9. + //Smaller Tg leads to more generated components and higher Tg might make + //lead to small number of components but they can grow too large + float fVarInit; + float fVarMin; + float fVarMax; + + //initial variance for the newly generated components. + //It will will influence the speed of adaptation. A good guess should be made. + //A simple way is to estimate the typical standard deviation from the images. + //I used here 10 as a reasonable value + // min and max can be used to further control the variance + float fCT; //CT - complexity reduction prior + //this is related to the number of samples needed to accept that a component + //actually exists. We use CT=0.05 of all the samples. By setting CT=0 you get + //the standard Stauffer&Grimson algorithm (maybe not exact but very similar) + + //shadow detection parameters + bool bShadowDetection; //default 1 - do shadow detection + unsigned char nShadowDetection; //do shadow detection - insert this value as the detection result - 127 default value + float fTau; + // Tau - shadow threshold. The shadow is detected if the pixel is darker + //version of the background. Tau is a threshold on how much darker the shadow can be. + //Tau= 0.5 means that if pixel is more than 2 times darker then it is not shadow + //See: Prati,Mikic,Trivedi,Cucchiarra,"Detecting Moving Shadows...",IEEE PAMI,2003. + + private: + int nmixtures_; + + Size frameSize_; + int frameType_; + int nframes_; + + oclMat weight_; + oclMat variance_; + oclMat mean_; + + oclMat bgmodelUsedModes_; //keep track of number of modes per pixel + }; + + /*!***************Kalman Filter*************!*/ + class CV_EXPORTS KalmanFilter + { + public: + KalmanFilter(); + //! the full constructor taking the dimensionality of the state, of the measurement and of the control vector + KalmanFilter(int dynamParams, int measureParams, int controlParams=0, int type=CV_32F); + //! re-initializes Kalman filter. The previous content is destroyed. + void init(int dynamParams, int measureParams, int controlParams=0, int type=CV_32F); + + const oclMat& predict(const oclMat& control=oclMat()); + const oclMat& correct(const oclMat& measurement); + + oclMat statePre; //!< predicted state (x'(k)): x(k)=A*x(k-1)+B*u(k) + oclMat statePost; //!< corrected state (x(k)): x(k)=x'(k)+K(k)*(z(k)-H*x'(k)) + oclMat transitionMatrix; //!< state transition matrix (A) + oclMat controlMatrix; //!< control matrix (B) (not used if there is no control) + oclMat measurementMatrix; //!< measurement matrix (H) + oclMat processNoiseCov; //!< process noise covariance matrix (Q) + oclMat measurementNoiseCov;//!< measurement noise covariance matrix (R) + oclMat errorCovPre; //!< priori error estimate covariance matrix (P'(k)): P'(k)=A*P(k-1)*At + Q)*/ + oclMat gain; //!< Kalman gain matrix (K(k)): K(k)=P'(k)*Ht*inv(H*P'(k)*Ht+R) + oclMat errorCovPost; //!< posteriori error estimate covariance matrix (P(k)): P(k)=(I-K(k)*H)*P'(k) + private: + oclMat temp1; + oclMat temp2; + oclMat temp3; + oclMat temp4; + oclMat temp5; + }; + + /*!***************K Nearest Neighbour*************!*/ + class CV_EXPORTS KNearestNeighbour: public CvKNearest + { + public: + KNearestNeighbour(); + ~KNearestNeighbour(); + + bool train(const Mat& trainData, Mat& labels, Mat& sampleIdx = Mat().setTo(Scalar::all(0)), + bool isRegression = false, int max_k = 32, bool updateBase = false); + + void clear(); + + void find_nearest(const oclMat& samples, int k, oclMat& lables); + + private: + oclMat samples_ocl; + }; + + /*!*************** SVM *************!*/ + class CV_EXPORTS CvSVM_OCL : public CvSVM + { + public: + CvSVM_OCL(); + + CvSVM_OCL(const cv::Mat& trainData, const cv::Mat& responses, + const cv::Mat& varIdx=cv::Mat(), const cv::Mat& sampleIdx=cv::Mat(), + CvSVMParams params=CvSVMParams()); + CV_WRAP float predict( const int row_index, Mat& src, bool returnDFVal=false ) const; + CV_WRAP void predict( cv::InputArray samples, cv::OutputArray results ) const; + CV_WRAP float predict( const cv::Mat& sample, bool returnDFVal=false ) const; + float predict( const CvMat* samples, CV_OUT CvMat* results ) const; + + protected: + float predict( const int row_index, int row_len, Mat& src, bool returnDFVal=false ) const; + void create_kernel(); + void create_solver(); + }; + + /*!*************** END *************!*/ + } +} +#if defined _MSC_VER && _MSC_VER >= 1200 +# pragma warning( push) +# pragma warning( disable: 4267) +#endif +#include "opencv2/ocl/matrix_operations.hpp" +#if defined _MSC_VER && _MSC_VER >= 1200 +# pragma warning( pop) +#endif + +#endif /* __OPENCV_OCL_HPP__ */ diff --cc modules/ocl/perf/perf_match_template.cpp index 68192cf,3ee038a..9c9829c --- a/modules/ocl/perf/perf_match_template.cpp +++ b/modules/ocl/perf/perf_match_template.cpp @@@ -112,9 -112,9 +112,9 @@@ PERF_TEST_P(CV_TM_CCORR_NORMEDFixture, } else if (RUN_PLAIN_IMPL) { - TEST_CYCLE() cv::matchTemplate(src, templ, dst, CV_TM_CCORR_NORMED); + TEST_CYCLE() cv::matchTemplate(src, templ, dst, TM_CCORR_NORMED); - SANITY_CHECK(dst, 2e-2); + SANITY_CHECK(dst, 3e-2); } else OCL_PERF_ELSE diff --cc modules/ocl/src/arithm.cpp index cd7aa0c,68c5269..0acb4c2 --- a/modules/ocl/src/arithm.cpp +++ b/modules/ocl/src/arithm.cpp @@@ -865,32 -865,34 +865,34 @@@ void cv::ocl::log(const oclMat &src, oc ////////////////////////////// magnitude phase /////////////////////////////// ////////////////////////////////////////////////////////////////////////////// -static void arithmetic_magnitude_phase_run(const oclMat &src1, const oclMat &src2, oclMat &dst, string kernelName) +static void arithmetic_magnitude_phase_run(const oclMat &src1, const oclMat &src2, oclMat &dst, String kernelName) { - int channels = dst.oclchannels(); int depth = dst.depth(); - size_t vector_length = 1; - int offset_cols = ((dst.offset % dst.step) / dst.elemSize1()) & (vector_length - 1); - int cols = divUp(dst.cols * channels + offset_cols, vector_length); - size_t localThreads[3] = { 64, 4, 1 }; - size_t globalThreads[3] = { cols, dst.rows, 1 }; + size_t globalThreads[3] = { dst.cols, dst.rows, 1 }; + + int src1_step = src1.step / src1.elemSize(), src1_offset = src1.offset / src1.elemSize(); + int src2_step = src2.step / src2.elemSize(), src2_offset = src2.offset / src2.elemSize(); + int dst_step = dst.step / dst.elemSize(), dst_offset = dst.offset / dst.elemSize(); - vector > args; - args.push_back( make_pair( sizeof(cl_mem), (void *)&src1.data )); - args.push_back( make_pair( sizeof(cl_int), (void *)&src1_step )); - args.push_back( make_pair( sizeof(cl_int), (void *)&src1_offset )); - args.push_back( make_pair( sizeof(cl_mem), (void *)&src2.data )); - args.push_back( make_pair( sizeof(cl_int), (void *)&src2_step )); - args.push_back( make_pair( sizeof(cl_int), (void *)&src2_offset )); - args.push_back( make_pair( sizeof(cl_mem), (void *)&dst.data )); - args.push_back( make_pair( sizeof(cl_int), (void *)&dst_step )); - args.push_back( make_pair( sizeof(cl_int), (void *)&dst_offset )); - args.push_back( make_pair( sizeof(cl_int), (void *)&dst.rows )); - args.push_back( make_pair( sizeof(cl_int), (void *)&dst.cols )); + std::vector > args; + args.push_back( std::make_pair( sizeof(cl_mem), (void *)&src1.data )); - args.push_back( std::make_pair( sizeof(cl_int), (void *)&src1.step )); - args.push_back( std::make_pair( sizeof(cl_int), (void *)&src1.offset )); ++ args.push_back( std::make_pair( sizeof(cl_int), (void *)&src1_step )); ++ args.push_back( std::make_pair( sizeof(cl_int), (void *)&src1_offset )); + args.push_back( std::make_pair( sizeof(cl_mem), (void *)&src2.data )); - args.push_back( std::make_pair( sizeof(cl_int), (void *)&src2.step )); - args.push_back( std::make_pair( sizeof(cl_int), (void *)&src2.offset )); ++ args.push_back( std::make_pair( sizeof(cl_int), (void *)&src2_step )); ++ args.push_back( std::make_pair( sizeof(cl_int), (void *)&src2_offset )); + args.push_back( std::make_pair( sizeof(cl_mem), (void *)&dst.data )); - args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst.step )); - args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst.offset )); ++ args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst_step )); ++ args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst_offset )); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst.rows )); - args.push_back( std::make_pair( sizeof(cl_int), (void *)&cols )); ++ args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst.cols )); - openCLExecuteKernel(src1.clCxt, &arithm_magnitude, kernelName, globalThreads, localThreads, args, -1, depth); + const char * const channelMap[] = { "", "", "2", "4", "4" }; + std::string buildOptions = format("-D T=%s%s", depth == CV_32F ? "float" : "double", channelMap[dst.channels()]); + + openCLExecuteKernel(src1.clCxt, &arithm_magnitude, kernelName, globalThreads, localThreads, args, -1, -1, buildOptions.c_str()); } void cv::ocl::magnitude(const oclMat &src1, const oclMat &src2, oclMat &dst) @@@ -964,25 -966,29 +966,29 @@@ static void arithmetic_cartToPolar_run( size_t localThreads[3] = { 64, 4, 1 }; size_t globalThreads[3] = { cols, src1.rows, 1 }; - int tmp = angleInDegrees ? 1 : 0; + int src1_step = src1.step / src1.elemSize1(), src1_offset = src1.offset / src1.elemSize1(); + int src2_step = src2.step / src2.elemSize1(), src2_offset = src2.offset / src2.elemSize1(); + int dst_mag_step = dst_mag.step / dst_mag.elemSize1(), dst_mag_offset = dst_mag.offset / dst_mag.elemSize1(); + int dst_cart_step = dst_cart.step / dst_cart.elemSize1(), dst_cart_offset = dst_cart.offset / dst_cart.elemSize1(); + - vector > args; - args.push_back( make_pair( sizeof(cl_mem), (void *)&src1.data )); - args.push_back( make_pair( sizeof(cl_int), (void *)&src1_step )); - args.push_back( make_pair( sizeof(cl_int), (void *)&src1_offset )); - args.push_back( make_pair( sizeof(cl_mem), (void *)&src2.data )); - args.push_back( make_pair( sizeof(cl_int), (void *)&src2_step )); - args.push_back( make_pair( sizeof(cl_int), (void *)&src2_offset )); - args.push_back( make_pair( sizeof(cl_mem), (void *)&dst_mag.data )); - args.push_back( make_pair( sizeof(cl_int), (void *)&dst_mag_step )); - args.push_back( make_pair( sizeof(cl_int), (void *)&dst_mag_offset )); - args.push_back( make_pair( sizeof(cl_mem), (void *)&dst_cart.data )); - args.push_back( make_pair( sizeof(cl_int), (void *)&dst_cart_step )); - args.push_back( make_pair( sizeof(cl_int), (void *)&dst_cart_offset )); - args.push_back( make_pair( sizeof(cl_int), (void *)&src1.rows )); - args.push_back( make_pair( sizeof(cl_int), (void *)&cols )); + std::vector > args; + args.push_back( std::make_pair( sizeof(cl_mem), (void *)&src1.data )); - args.push_back( std::make_pair( sizeof(cl_int), (void *)&src1.step )); - args.push_back( std::make_pair( sizeof(cl_int), (void *)&src1.offset )); ++ args.push_back( std::make_pair( sizeof(cl_int), (void *)&src1_step )); ++ args.push_back( std::make_pair( sizeof(cl_int), (void *)&src1_offset )); + args.push_back( std::make_pair( sizeof(cl_mem), (void *)&src2.data )); - args.push_back( std::make_pair( sizeof(cl_int), (void *)&src2.step )); - args.push_back( std::make_pair( sizeof(cl_int), (void *)&src2.offset )); ++ args.push_back( std::make_pair( sizeof(cl_int), (void *)&src2_step )); ++ args.push_back( std::make_pair( sizeof(cl_int), (void *)&src2_offset )); + args.push_back( std::make_pair( sizeof(cl_mem), (void *)&dst_mag.data )); - args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst_mag.step )); - args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst_mag.offset )); ++ args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst_mag_step )); ++ args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst_mag_offset )); + args.push_back( std::make_pair( sizeof(cl_mem), (void *)&dst_cart.data )); - args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst_cart.step )); - args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst_cart.offset )); ++ args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst_cart_step )); ++ args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst_cart_offset )); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&src1.rows )); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&cols )); - args.push_back( std::make_pair( sizeof(cl_int), (void *)&tmp )); - openCLExecuteKernel(src1.clCxt, &arithm_cartToPolar, kernelName, globalThreads, localThreads, args, -1, depth); + openCLExecuteKernel(src1.clCxt, &arithm_cartToPolar, kernelName, globalThreads, localThreads, args, + -1, depth, angleInDegrees ? "-D DEGREE" : "-D RADIAN"); } void cv::ocl::cartToPolar(const oclMat &x, const oclMat &y, oclMat &mag, oclMat &angle, bool angleInDegrees) @@@ -1006,39 -1012,40 +1012,40 @@@ ////////////////////////////////////////////////////////////////////////////// static void arithmetic_ptc_run(const oclMat &src1, const oclMat &src2, oclMat &dst1, oclMat &dst2, bool angleInDegrees, - string kernelName) + String kernelName) { - int channels = src2.oclchannels(); - int depth = src2.depth(); - - int cols = src2.cols * channels; - int rows = src2.rows; + int channels = src2.oclchannels(), depth = src2.depth(); + int cols = src2.cols * channels, rows = src2.rows; size_t localThreads[3] = { 64, 4, 1 }; size_t globalThreads[3] = { cols, rows, 1 }; - int tmp = angleInDegrees ? 1 : 0; + int src1_step = src1.step / src1.elemSize1(), src1_offset = src1.offset / src1.elemSize1(); + int src2_step = src2.step / src2.elemSize1(), src2_offset = src2.offset / src2.elemSize1(); + int dst1_step = dst1.step / dst1.elemSize1(), dst1_offset = dst1.offset / dst1.elemSize1(); + int dst2_step = dst2.step / dst2.elemSize1(), dst2_offset = dst2.offset / dst2.elemSize1(); + - vector > args; + std::vector > args; if (src1.data) { - args.push_back( make_pair( sizeof(cl_mem), (void *)&src1.data )); - args.push_back( make_pair( sizeof(cl_int), (void *)&src1_step )); - args.push_back( make_pair( sizeof(cl_int), (void *)&src1_offset )); + args.push_back( std::make_pair( sizeof(cl_mem), (void *)&src1.data )); - args.push_back( std::make_pair( sizeof(cl_int), (void *)&src1.step )); - args.push_back( std::make_pair( sizeof(cl_int), (void *)&src1.offset )); ++ args.push_back( std::make_pair( sizeof(cl_int), (void *)&src1_step )); ++ args.push_back( std::make_pair( sizeof(cl_int), (void *)&src1_offset )); } - args.push_back( make_pair( sizeof(cl_mem), (void *)&src2.data )); - args.push_back( make_pair( sizeof(cl_int), (void *)&src2_step )); - args.push_back( make_pair( sizeof(cl_int), (void *)&src2_offset )); - args.push_back( make_pair( sizeof(cl_mem), (void *)&dst1.data )); - args.push_back( make_pair( sizeof(cl_int), (void *)&dst1_step )); - args.push_back( make_pair( sizeof(cl_int), (void *)&dst1_offset )); - args.push_back( make_pair( sizeof(cl_mem), (void *)&dst2.data )); - args.push_back( make_pair( sizeof(cl_int), (void *)&dst2_step )); - args.push_back( make_pair( sizeof(cl_int), (void *)&dst2_offset )); - args.push_back( make_pair( sizeof(cl_int), (void *)&rows )); - args.push_back( make_pair( sizeof(cl_int), (void *)&cols )); + args.push_back( std::make_pair( sizeof(cl_mem), (void *)&src2.data )); - args.push_back( std::make_pair( sizeof(cl_int), (void *)&src2.step )); - args.push_back( std::make_pair( sizeof(cl_int), (void *)&src2.offset )); ++ args.push_back( std::make_pair( sizeof(cl_int), (void *)&src2_step )); ++ args.push_back( std::make_pair( sizeof(cl_int), (void *)&src2_offset )); + args.push_back( std::make_pair( sizeof(cl_mem), (void *)&dst1.data )); - args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst1.step )); - args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst1.offset )); ++ args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst1_step )); ++ args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst1_offset )); + args.push_back( std::make_pair( sizeof(cl_mem), (void *)&dst2.data )); - args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst2.step )); - args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst2.offset )); ++ args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst2_step )); ++ args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst2_offset )); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&rows )); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&cols )); - args.push_back( std::make_pair( sizeof(cl_int), (void *)&tmp )); - openCLExecuteKernel(src1.clCxt, &arithm_polarToCart, kernelName, globalThreads, localThreads, args, -1, depth); + openCLExecuteKernel(src1.clCxt, &arithm_polarToCart, kernelName, globalThreads, localThreads, + args, -1, depth, angleInDegrees ? "-D DEGREE" : "-D RADIAN"); } void cv::ocl::polarToCart(const oclMat &magnitude, const oclMat &angle, oclMat &x, oclMat &y, bool angleInDegrees) @@@ -1623,38 -1630,38 +1630,38 @@@ void cv::ocl::addWeighted(const oclMat /////////////////////////////////// Pow ////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////// - static void arithmetic_pow_run(const oclMat &src1, double p, oclMat &dst, String kernelName, const cv::ocl::ProgramEntry* source) -static void arithmetic_pow_run(const oclMat &src, double p, oclMat &dst, string kernelName, const cv::ocl::ProgramEntry* source) ++static void arithmetic_pow_run(const oclMat &src, double p, oclMat &dst, String kernelName, const cv::ocl::ProgramEntry* source) { int channels = dst.oclchannels(); int depth = dst.depth(); - size_t vector_length = 1; - int offset_cols = ((dst.offset % dst.step) / dst.elemSize1()) & (vector_length - 1); - int cols = divUp(dst.cols * channels + offset_cols, vector_length); - int rows = dst.rows; - size_t localThreads[3] = { 64, 4, 1 }; - size_t globalThreads[3] = { cols, rows, 1 }; + size_t globalThreads[3] = { dst.cols, dst.rows, 1 }; + + const char * const typeStr = depth == CV_32F ? "float" : "double"; + const char * const channelMap[] = { "", "", "2", "4", "4" }; + std::string buildOptions = format("-D VT=%s%s -D T=%s", typeStr, channelMap[channels], typeStr); + + int src_step = src.step / src.elemSize(), src_offset = src.offset / src.elemSize(); + int dst_step = dst.step / dst.elemSize(), dst_offset = dst.offset / dst.elemSize(); - int dst_step1 = dst.cols * dst.elemSize(); - vector > args; - args.push_back( make_pair( sizeof(cl_mem), (void *)&src.data )); - args.push_back( make_pair( sizeof(cl_int), (void *)&src_step )); - args.push_back( make_pair( sizeof(cl_int), (void *)&src_offset )); - args.push_back( make_pair( sizeof(cl_mem), (void *)&dst.data )); - args.push_back( make_pair( sizeof(cl_int), (void *)&dst_step )); - args.push_back( make_pair( sizeof(cl_int), (void *)&dst_offset )); - args.push_back( make_pair( sizeof(cl_int), (void *)&dst.rows )); - args.push_back( make_pair( sizeof(cl_int), (void *)&dst.cols )); + std::vector > args; - args.push_back( std::make_pair( sizeof(cl_mem), (void *)&src1.data )); - args.push_back( std::make_pair( sizeof(cl_int), (void *)&src1.step )); - args.push_back( std::make_pair( sizeof(cl_int), (void *)&src1.offset )); ++ args.push_back( std::make_pair( sizeof(cl_mem), (void *)&src.data )); ++ args.push_back( std::make_pair( sizeof(cl_int), (void *)&src_step )); ++ args.push_back( std::make_pair( sizeof(cl_int), (void *)&src_offset )); + args.push_back( std::make_pair( sizeof(cl_mem), (void *)&dst.data )); - args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst.step )); - args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst.offset )); ++ args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst_step )); ++ args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst_offset )); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst.rows )); - args.push_back( std::make_pair( sizeof(cl_int), (void *)&cols )); - args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst_step1 )); ++ args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst.cols )); float pf = static_cast(p); - if (!src1.clCxt->supportsFeature(FEATURE_CL_DOUBLE)) + if(src.depth() == CV_32F) - args.push_back( make_pair( sizeof(cl_float), (void *)&pf )); + args.push_back( std::make_pair( sizeof(cl_float), (void *)&pf )); else - args.push_back( make_pair( sizeof(cl_double), (void *)&p )); + args.push_back( std::make_pair( sizeof(cl_double), (void *)&p )); - openCLExecuteKernel(src1.clCxt, source, kernelName, globalThreads, localThreads, args, -1, depth); + openCLExecuteKernel(src.clCxt, source, kernelName, globalThreads, localThreads, args, -1, -1, buildOptions.c_str()); } void cv::ocl::pow(const oclMat &x, double p, oclMat &y) diff --cc modules/ocl/src/build_warps.cpp index bc24f5e,40c082b..f0a3203 --- a/modules/ocl/src/build_warps.cpp +++ b/modules/ocl/src/build_warps.cpp @@@ -74,24 -74,27 +74,27 @@@ void cv::ocl::buildWarpPlaneMaps(Size / int tl_u = dst_roi.tl().x; int tl_v = dst_roi.tl().y; - Context *clCxt = Context::getContext(); - String kernelName = "buildWarpPlaneMaps"; - std::vector< std::pair > args; + int xmap_step = xmap.step / xmap.elemSize(), xmap_offset = xmap.offset / xmap.elemSize(); + int ymap_step = ymap.step / ymap.elemSize(), ymap_offset = ymap.offset / ymap.elemSize(); - args.push_back( std::make_pair( sizeof(cl_mem), (void *)&map_x.data)); - args.push_back( std::make_pair( sizeof(cl_mem), (void *)&map_y.data)); - vector< pair > args; - args.push_back( make_pair( sizeof(cl_mem), (void *)&xmap.data)); - args.push_back( make_pair( sizeof(cl_mem), (void *)&ymap.data)); - args.push_back( make_pair( sizeof(cl_mem), (void *)&KRT_mat.data)); - args.push_back( make_pair( sizeof(cl_int), (void *)&tl_u)); - args.push_back( make_pair( sizeof(cl_int), (void *)&tl_v)); - args.push_back( make_pair( sizeof(cl_int), (void *)&xmap.cols)); - args.push_back( make_pair( sizeof(cl_int), (void *)&xmap.rows)); - args.push_back( make_pair( sizeof(cl_int), (void *)&xmap_step)); - args.push_back( make_pair( sizeof(cl_int), (void *)&ymap_step)); - args.push_back( make_pair( sizeof(cl_int), (void *)&xmap_offset)); - args.push_back( make_pair( sizeof(cl_int), (void *)&ymap_offset)); - args.push_back( make_pair( sizeof(cl_float), (void *)&scale)); ++ std::vector< std::pair > args; ++ args.push_back( std::make_pair( sizeof(cl_mem), (void *)&xmap.data)); ++ args.push_back( std::make_pair( sizeof(cl_mem), (void *)&ymap.data)); + args.push_back( std::make_pair( sizeof(cl_mem), (void *)&KRT_mat.data)); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&tl_u)); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&tl_v)); - args.push_back( std::make_pair( sizeof(cl_int), (void *)&map_x.cols)); - args.push_back( std::make_pair( sizeof(cl_int), (void *)&map_x.rows)); - args.push_back( std::make_pair( sizeof(cl_int), (void *)&map_x.step)); - args.push_back( std::make_pair( sizeof(cl_int), (void *)&map_y.step)); ++ args.push_back( std::make_pair( sizeof(cl_int), (void *)&xmap.cols)); ++ args.push_back( std::make_pair( sizeof(cl_int), (void *)&xmap.rows)); ++ args.push_back( std::make_pair( sizeof(cl_int), (void *)&xmap_step)); ++ args.push_back( std::make_pair( sizeof(cl_int), (void *)&ymap_step)); ++ args.push_back( std::make_pair( sizeof(cl_int), (void *)&xmap_offset)); ++ args.push_back( std::make_pair( sizeof(cl_int), (void *)&ymap_offset)); + args.push_back( std::make_pair( sizeof(cl_float), (void *)&scale)); - size_t globalThreads[3] = {map_x.cols, map_x.rows, 1}; - size_t localThreads[3] = {32, 8, 1}; - openCLExecuteKernel(clCxt, &build_warps, kernelName, globalThreads, localThreads, args, -1, -1); + size_t globalThreads[3] = { xmap.cols, xmap.rows, 1 }; + size_t localThreads[3] = { 32, 8, 1 }; + + openCLExecuteKernel(Context::getContext(), &build_warps, "buildWarpPlaneMaps", globalThreads, localThreads, args, -1, -1); } ////////////////////////////////////////////////////////////////////////////// @@@ -114,24 -117,27 +117,27 @@@ void cv::ocl::buildWarpCylindricalMaps( int tl_u = dst_roi.tl().x; int tl_v = dst_roi.tl().y; - Context *clCxt = Context::getContext(); - String kernelName = "buildWarpCylindricalMaps"; - std::vector< std::pair > args; + int xmap_step = xmap.step / xmap.elemSize(), xmap_offset = xmap.offset / xmap.elemSize(); + int ymap_step = ymap.step / ymap.elemSize(), ymap_offset = ymap.offset / ymap.elemSize(); - args.push_back( std::make_pair( sizeof(cl_mem), (void *)&map_x.data)); - args.push_back( std::make_pair( sizeof(cl_mem), (void *)&map_y.data)); - vector< pair > args; - args.push_back( make_pair( sizeof(cl_mem), (void *)&xmap.data)); - args.push_back( make_pair( sizeof(cl_mem), (void *)&ymap.data)); - args.push_back( make_pair( sizeof(cl_mem), (void *)&KR_oclMat.data)); - args.push_back( make_pair( sizeof(cl_int), (void *)&tl_u)); - args.push_back( make_pair( sizeof(cl_int), (void *)&tl_v)); - args.push_back( make_pair( sizeof(cl_int), (void *)&xmap.cols)); - args.push_back( make_pair( sizeof(cl_int), (void *)&xmap.rows)); - args.push_back( make_pair( sizeof(cl_int), (void *)&xmap_step)); - args.push_back( make_pair( sizeof(cl_int), (void *)&ymap_step)); - args.push_back( make_pair( sizeof(cl_int), (void *)&xmap_offset)); - args.push_back( make_pair( sizeof(cl_int), (void *)&ymap_offset)); - args.push_back( make_pair( sizeof(cl_float), (void *)&scale)); ++ std::vector< std::pair > args; ++ args.push_back( std::make_pair( sizeof(cl_mem), (void *)&xmap.data)); ++ args.push_back( std::make_pair( sizeof(cl_mem), (void *)&ymap.data)); + args.push_back( std::make_pair( sizeof(cl_mem), (void *)&KR_oclMat.data)); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&tl_u)); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&tl_v)); - args.push_back( std::make_pair( sizeof(cl_int), (void *)&map_x.cols)); - args.push_back( std::make_pair( sizeof(cl_int), (void *)&map_x.rows)); - args.push_back( std::make_pair( sizeof(cl_int), (void *)&map_x.step)); - args.push_back( std::make_pair( sizeof(cl_int), (void *)&map_y.step)); ++ args.push_back( std::make_pair( sizeof(cl_int), (void *)&xmap.cols)); ++ args.push_back( std::make_pair( sizeof(cl_int), (void *)&xmap.rows)); ++ args.push_back( std::make_pair( sizeof(cl_int), (void *)&xmap_step)); ++ args.push_back( std::make_pair( sizeof(cl_int), (void *)&ymap_step)); ++ args.push_back( std::make_pair( sizeof(cl_int), (void *)&xmap_offset)); ++ args.push_back( std::make_pair( sizeof(cl_int), (void *)&ymap_offset)); + args.push_back( std::make_pair( sizeof(cl_float), (void *)&scale)); - size_t globalThreads[3] = {map_x.cols, map_x.rows, 1}; - size_t localThreads[3] = {32, 8, 1}; - openCLExecuteKernel(clCxt, &build_warps, kernelName, globalThreads, localThreads, args, -1, -1); + size_t globalThreads[3] = { xmap.cols, xmap.rows, 1 }; + size_t localThreads[3] = { 32, 8, 1 }; + + openCLExecuteKernel(Context::getContext(), &build_warps, "buildWarpCylindricalMaps", globalThreads, localThreads, args, -1, -1); } ////////////////////////////////////////////////////////////////////////////// @@@ -153,26 -160,30 +160,30 @@@ void cv::ocl::buildWarpSphericalMaps(Si int tl_u = dst_roi.tl().x; int tl_v = dst_roi.tl().y; - Context *clCxt = Context::getContext(); - String kernelName = "buildWarpSphericalMaps"; - std::vector< std::pair > args; + int xmap_step = xmap.step / xmap.elemSize(), xmap_offset = xmap.offset / xmap.elemSize(); + int ymap_step = ymap.step / ymap.elemSize(), ymap_offset = ymap.offset / ymap.elemSize(); - args.push_back( std::make_pair( sizeof(cl_mem), (void *)&map_x.data)); - args.push_back( std::make_pair( sizeof(cl_mem), (void *)&map_y.data)); - vector< pair > args; - args.push_back( make_pair( sizeof(cl_mem), (void *)&xmap.data)); - args.push_back( make_pair( sizeof(cl_mem), (void *)&ymap.data)); - args.push_back( make_pair( sizeof(cl_mem), (void *)&KR_oclMat.data)); - args.push_back( make_pair( sizeof(cl_int), (void *)&tl_u)); - args.push_back( make_pair( sizeof(cl_int), (void *)&tl_v)); - args.push_back( make_pair( sizeof(cl_int), (void *)&xmap.cols)); - args.push_back( make_pair( sizeof(cl_int), (void *)&xmap.rows)); - args.push_back( make_pair( sizeof(cl_int), (void *)&xmap_step)); - args.push_back( make_pair( sizeof(cl_int), (void *)&ymap_step)); - args.push_back( make_pair( sizeof(cl_int), (void *)&xmap_offset)); - args.push_back( make_pair( sizeof(cl_int), (void *)&ymap_offset)); - args.push_back( make_pair( sizeof(cl_float), (void *)&scale)); ++ std::vector< std::pair > args; ++ args.push_back( std::make_pair( sizeof(cl_mem), (void *)&xmap.data)); ++ args.push_back( std::make_pair( sizeof(cl_mem), (void *)&ymap.data)); + args.push_back( std::make_pair( sizeof(cl_mem), (void *)&KR_oclMat.data)); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&tl_u)); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&tl_v)); - args.push_back( std::make_pair( sizeof(cl_int), (void *)&map_x.cols)); - args.push_back( std::make_pair( sizeof(cl_int), (void *)&map_x.rows)); - args.push_back( std::make_pair( sizeof(cl_int), (void *)&map_x.step)); - args.push_back( std::make_pair( sizeof(cl_int), (void *)&map_y.step)); ++ args.push_back( std::make_pair( sizeof(cl_int), (void *)&xmap.cols)); ++ args.push_back( std::make_pair( sizeof(cl_int), (void *)&xmap.rows)); ++ args.push_back( std::make_pair( sizeof(cl_int), (void *)&xmap_step)); ++ args.push_back( std::make_pair( sizeof(cl_int), (void *)&ymap_step)); ++ args.push_back( std::make_pair( sizeof(cl_int), (void *)&xmap_offset)); ++ args.push_back( std::make_pair( sizeof(cl_int), (void *)&ymap_offset)); + args.push_back( std::make_pair( sizeof(cl_float), (void *)&scale)); - size_t globalThreads[3] = {map_x.cols, map_x.rows, 1}; - size_t localThreads[3] = {32, 8, 1}; - openCLExecuteKernel(clCxt, &build_warps, kernelName, globalThreads, localThreads, args, -1, -1); + size_t globalThreads[3] = { xmap.cols, xmap.rows, 1 }; + size_t localThreads[3] = { 32, 8, 1 }; + openCLExecuteKernel(Context::getContext(), &build_warps, "buildWarpSphericalMaps", globalThreads, localThreads, args, -1, -1); } + ////////////////////////////////////////////////////////////////////////////// + // buildWarpAffineMaps void cv::ocl::buildWarpAffineMaps(const Mat &M, bool inverse, Size dsize, oclMat &xmap, oclMat &ymap) { @@@ -194,29 -205,34 +205,34 @@@ iM.convertTo(coeffsMat, coeffsMat.type()); } + int xmap_step = xmap.step / xmap.elemSize(), xmap_offset = xmap.offset / xmap.elemSize(); + int ymap_step = ymap.step / ymap.elemSize(), ymap_offset = ymap.offset / ymap.elemSize(); + oclMat coeffsOclMat(coeffsMat.reshape(1, 1)); - Context *clCxt = Context::getContext(); - String kernelName = "buildWarpAffineMaps"; - vector< pair > args; - args.push_back( make_pair( sizeof(cl_mem), (void *)&xmap.data)); - args.push_back( make_pair( sizeof(cl_mem), (void *)&ymap.data)); - args.push_back( make_pair( sizeof(cl_mem), (void *)&coeffsOclMat.data)); - args.push_back( make_pair( sizeof(cl_int), (void *)&xmap.cols)); - args.push_back( make_pair( sizeof(cl_int), (void *)&xmap.rows)); - args.push_back( make_pair( sizeof(cl_int), (void *)&xmap_step)); - args.push_back( make_pair( sizeof(cl_int), (void *)&ymap_step)); - args.push_back( make_pair( sizeof(cl_int), (void *)&xmap_offset)); - args.push_back( make_pair( sizeof(cl_int), (void *)&ymap_offset)); + std::vector< std::pair > args; - + args.push_back( std::make_pair( sizeof(cl_mem), (void *)&xmap.data)); + args.push_back( std::make_pair( sizeof(cl_mem), (void *)&ymap.data)); + args.push_back( std::make_pair( sizeof(cl_mem), (void *)&coeffsOclMat.data)); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&xmap.cols)); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&xmap.rows)); - args.push_back( std::make_pair( sizeof(cl_int), (void *)&xmap.step)); - args.push_back( std::make_pair( sizeof(cl_int), (void *)&ymap.step)); - - size_t globalThreads[3] = {xmap.cols, xmap.rows, 1}; - size_t localThreads[3] = {32, 8, 1}; - openCLExecuteKernel(clCxt, &build_warps, kernelName, globalThreads, localThreads, args, -1, -1); ++ args.push_back( std::make_pair( sizeof(cl_int), (void *)&xmap_step)); ++ args.push_back( std::make_pair( sizeof(cl_int), (void *)&ymap_step)); ++ args.push_back( std::make_pair( sizeof(cl_int), (void *)&xmap_offset)); ++ args.push_back( std::make_pair( sizeof(cl_int), (void *)&ymap_offset)); + + size_t globalThreads[3] = { xmap.cols, xmap.rows, 1 }; + size_t localThreads[3] = { 32, 8, 1 }; + openCLExecuteKernel(Context::getContext(), &build_warps, "buildWarpAffineMaps", globalThreads, localThreads, args, -1, -1); } + ////////////////////////////////////////////////////////////////////////////// + // buildWarpPerspectiveMaps + void cv::ocl::buildWarpPerspectiveMaps(const Mat &M, bool inverse, Size dsize, oclMat &xmap, oclMat &ymap) { - CV_Assert(M.rows == 3 && M.cols == 3); + CV_Assert(dsize.area() > 0); xmap.create(dsize, CV_32FC1); ymap.create(dsize, CV_32FC1); @@@ -235,19 -251,21 +251,21 @@@ oclMat coeffsOclMat(coeffsMat.reshape(1, 1)); - Context *clCxt = Context::getContext(); - String kernelName = "buildWarpPerspectiveMaps"; - std::vector< std::pair > args; + int xmap_step = xmap.step / xmap.elemSize(), xmap_offset = xmap.offset / xmap.elemSize(); + int ymap_step = ymap.step / ymap.elemSize(), ymap_offset = ymap.offset / ymap.elemSize(); - vector< pair > args; - args.push_back( make_pair( sizeof(cl_mem), (void *)&xmap.data)); - args.push_back( make_pair( sizeof(cl_mem), (void *)&ymap.data)); - args.push_back( make_pair( sizeof(cl_mem), (void *)&coeffsOclMat.data)); - args.push_back( make_pair( sizeof(cl_int), (void *)&xmap.cols)); - args.push_back( make_pair( sizeof(cl_int), (void *)&xmap.rows)); - args.push_back( make_pair( sizeof(cl_int), (void *)&xmap_step)); - args.push_back( make_pair( sizeof(cl_int), (void *)&ymap_step)); - args.push_back( make_pair( sizeof(cl_int), (void *)&xmap_offset)); - args.push_back( make_pair( sizeof(cl_int), (void *)&ymap_offset)); ++ std::vector< std::pair > args; + args.push_back( std::make_pair( sizeof(cl_mem), (void *)&xmap.data)); + args.push_back( std::make_pair( sizeof(cl_mem), (void *)&ymap.data)); + args.push_back( std::make_pair( sizeof(cl_mem), (void *)&coeffsOclMat.data)); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&xmap.cols)); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&xmap.rows)); - args.push_back( std::make_pair( sizeof(cl_int), (void *)&xmap.step)); - args.push_back( std::make_pair( sizeof(cl_int), (void *)&ymap.step)); ++ args.push_back( std::make_pair( sizeof(cl_int), (void *)&xmap_step)); ++ args.push_back( std::make_pair( sizeof(cl_int), (void *)&ymap_step)); ++ args.push_back( std::make_pair( sizeof(cl_int), (void *)&xmap_offset)); ++ args.push_back( std::make_pair( sizeof(cl_int), (void *)&ymap_offset)); + + size_t globalThreads[3] = { xmap.cols, xmap.rows, 1 }; - size_t globalThreads[3] = {xmap.cols, xmap.rows, 1}; - size_t localThreads[3] = {32, 8, 1}; - openCLExecuteKernel(clCxt, &build_warps, kernelName, globalThreads, localThreads, args, -1, -1); + openCLExecuteKernel(Context::getContext(), &build_warps, "buildWarpPerspectiveMaps", globalThreads, NULL, args, -1, -1); } diff --cc modules/ocl/src/color.cpp index eec103a,6e2f403..c93f4bf --- a/modules/ocl/src/color.cpp +++ b/modules/ocl/src/color.cpp @@@ -66,238 -57,423 +57,423 @@@ static void fromRGB_caller(const oclMa int src_offset = src.offset / src.elemSize1(), src_step = src.step1(); int dst_offset = dst.offset / dst.elemSize1(), dst_step = dst.step1(); - std::string build_options = format("-D DEPTH_%d", src.depth()); + String build_options = format("-D DEPTH_%d", src.depth()); + if (!additionalOptions.empty()) - build_options += additionalOptions; - - vector > args; - args.push_back( make_pair( sizeof(cl_int) , (void *)&dst.cols)); - args.push_back( make_pair( sizeof(cl_int) , (void *)&dst.rows)); - args.push_back( make_pair( sizeof(cl_int) , (void *)&src_step)); - args.push_back( make_pair( sizeof(cl_int) , (void *)&dst_step)); - args.push_back( make_pair( sizeof(cl_int) , (void *)&bidx)); - args.push_back( make_pair( sizeof(cl_mem) , (void *)&src.data)); - args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst.data)); - args.push_back( make_pair( sizeof(cl_int) , (void *)&src_offset )); - args.push_back( make_pair( sizeof(cl_int) , (void *)&dst_offset )); ++ build_options = build_options + additionalOptions; + + std::vector > args; - args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src.cols)); - args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src.rows)); ++ args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst.cols)); ++ args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst.rows)); + args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_step)); + args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst_step)); - args.push_back( std::make_pair( sizeof(cl_int) , (void *)&channels)); + args.push_back( std::make_pair( sizeof(cl_int) , (void *)&bidx)); + args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&src.data)); + args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&dst.data)); + args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_offset )); + args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst_offset )); - size_t gt[3] = {src.cols, src.rows, 1}, lt[3] = {16, 16, 1}; - openCLExecuteKernel(src.clCxt, &cvt_color, "RGB2Gray", gt, lt, args, -1, -1, build_options.c_str()); - } + if (!data1.empty()) - args.push_back( make_pair( sizeof(cl_mem) , (void *)&data1.data )); ++ args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&data1.data )); + if (!data2.empty()) - args.push_back( make_pair( sizeof(cl_mem) , (void *)&data2.data )); ++ args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&data2.data )); - void Gray2RGB_caller(const oclMat &src, oclMat &dst) - { - String build_options = format("-D DEPTH_%d", src.depth()); - int src_offset = src.offset / src.elemSize1(), src_step = src.step1(); - int dst_offset = dst.offset / dst.elemSize1(), dst_step = dst.step1(); - - std::vector > args; - args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src.cols)); - args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src.rows)); - args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_step)); - args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst_step)); - args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&src.data)); - args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&dst.data)); - args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_offset )); - args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst_offset )); - - size_t gt[3] = {src.cols, src.rows, 1}, lt[3] = {16, 16, 1}; - openCLExecuteKernel(src.clCxt, &cvt_color, "Gray2RGB", gt, lt, args, -1, -1, build_options.c_str()); + size_t gt[3] = { dst.cols, dst.rows, 1 }, lt[3] = { 16, 16, 1 }; + openCLExecuteKernel(src.clCxt, &cvt_color, kernelName.c_str(), gt, lt, args, -1, -1, build_options.c_str()); } - void RGB2YUV_caller(const oclMat &src, oclMat &dst, int bidx) + static void toRGB_caller(const oclMat &src, oclMat &dst, int bidx, const std::string & kernelName, + const std::string & additionalOptions = std::string(), const oclMat & data = oclMat()) { - int channels = src.oclchannels(); - String build_options = format("-D DEPTH_%d", src.depth()); - std::string build_options = format("-D DEPTH_%d -D dcn=%d", src.depth(), dst.channels()); ++ String build_options = format("-D DEPTH_%d -D dcn=%d", src.depth(), dst.channels()); + if (!additionalOptions.empty()) - build_options += additionalOptions; ++ build_options = build_options + additionalOptions; + int src_offset = src.offset / src.elemSize1(), src_step = src.step1(); int dst_offset = dst.offset / dst.elemSize1(), dst_step = dst.step1(); - vector > args; - args.push_back( make_pair( sizeof(cl_int) , (void *)&dst.cols)); - args.push_back( make_pair( sizeof(cl_int) , (void *)&dst.rows)); - args.push_back( make_pair( sizeof(cl_int) , (void *)&src_step)); - args.push_back( make_pair( sizeof(cl_int) , (void *)&dst_step)); - args.push_back( make_pair( sizeof(cl_int) , (void *)&bidx)); - args.push_back( make_pair( sizeof(cl_mem) , (void *)&src.data)); - args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst.data)); - args.push_back( make_pair( sizeof(cl_int) , (void *)&src_offset )); - args.push_back( make_pair( sizeof(cl_int) , (void *)&dst_offset )); + std::vector > args; - args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src.cols)); - args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src.rows)); ++ args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst.cols)); ++ args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst.rows)); + args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_step)); + args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst_step)); - args.push_back( std::make_pair( sizeof(cl_int) , (void *)&channels)); + args.push_back( std::make_pair( sizeof(cl_int) , (void *)&bidx)); + args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&src.data)); + args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&dst.data)); + args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_offset )); + args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst_offset )); - size_t gt[3] = {src.cols, src.rows, 1}, lt[3] = {16, 16, 1}; - openCLExecuteKernel(src.clCxt, &cvt_color, "RGB2YUV", gt, lt, args, -1, -1, build_options.c_str()); + if (!data.empty()) - args.push_back( make_pair( sizeof(cl_mem) , (void *)&data.data )); ++ args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&data.data )); + + size_t gt[3] = { dst.cols, dst.rows, 1 }, lt[3] = { 16, 16, 1 }; + openCLExecuteKernel(src.clCxt, &cvt_color, kernelName.c_str(), gt, lt, args, -1, -1, build_options.c_str()); } - void YUV2RGB_caller(const oclMat &src, oclMat &dst, int bidx) + static void RGB_caller(const oclMat &src, oclMat &dst, bool reverse) { - int channels = src.oclchannels(); - std::string build_options = format("-D DEPTH_%d -D dcn=%d -D scn=%d -D %s", src.depth(), - dst.channels(), src.channels(), reverse ? "REVERSE" : "ORDER"); ++ String build_options = format("-D DEPTH_%d -D dcn=%d -D scn=%d -D %s", src.depth(), ++ dst.channels(), src.channels(), reverse ? "REVERSE" : "ORDER"); int src_offset = src.offset / src.elemSize1(), src_step = src.step1(); int dst_offset = dst.offset / dst.elemSize1(), dst_step = dst.step1(); - String buildOptions = format("-D DEPTH_%d", src.depth()); - - vector > args; - args.push_back( make_pair( sizeof(cl_int) , (void *)&dst.cols)); - args.push_back( make_pair( sizeof(cl_int) , (void *)&dst.rows)); - args.push_back( make_pair( sizeof(cl_int) , (void *)&src_step)); - args.push_back( make_pair( sizeof(cl_int) , (void *)&dst_step)); - args.push_back( make_pair( sizeof(cl_mem) , (void *)&src.data)); - args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst.data)); - args.push_back( make_pair( sizeof(cl_int) , (void *)&src_offset )); - args.push_back( make_pair( sizeof(cl_int) , (void *)&dst_offset )); + std::vector > args; - args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src.cols)); - args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src.rows)); ++ args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst.cols)); ++ args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst.rows)); + args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_step)); + args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst_step)); - args.push_back( std::make_pair( sizeof(cl_int) , (void *)&channels)); - args.push_back( std::make_pair( sizeof(cl_int) , (void *)&bidx)); + args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&src.data)); + args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&dst.data)); + args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_offset )); + args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst_offset )); - size_t gt[3] = {src.cols, src.rows, 1}, lt[3] = {16, 16, 1}; - openCLExecuteKernel(src.clCxt, &cvt_color, "YUV2RGB", gt, lt, args, -1, -1, buildOptions.c_str()); + size_t gt[3] = { dst.cols, dst.rows, 1 }, lt[3] = { 16, 16, 1 }; + openCLExecuteKernel(src.clCxt, &cvt_color, "RGB", gt, lt, args, -1, -1, build_options.c_str()); } - void YUV2RGB_NV12_caller(const oclMat &src, oclMat &dst, int bidx) + static void fromRGB5x5_caller(const oclMat &src, oclMat &dst, int bidx, int greenbits, const std::string & kernelName) { - String build_options = format("-D DEPTH_%d", src.depth()); - int src_offset = src.offset / src.elemSize1(), src_step = src.step1(); - int dst_offset = dst.offset / dst.elemSize1(), dst_step = dst.step1(); - std::string build_options = format("-D DEPTH_%d -D greenbits=%d -D dcn=%d", - src.depth(), greenbits, dst.channels()); ++ String build_options = format("-D DEPTH_%d -D greenbits=%d -D dcn=%d", ++ src.depth(), greenbits, dst.channels()); + int src_offset = src.offset >> 1, src_step = src.step >> 1; + int dst_offset = dst.offset / dst.elemSize1(), dst_step = dst.step / dst.elemSize1(); - vector > args; - args.push_back( make_pair( sizeof(cl_int) , (void *)&dst.cols)); - args.push_back( make_pair( sizeof(cl_int) , (void *)&dst.rows)); - args.push_back( make_pair( sizeof(cl_int) , (void *)&src_step)); - args.push_back( make_pair( sizeof(cl_int) , (void *)&dst_step)); - args.push_back( make_pair( sizeof(cl_int) , (void *)&bidx)); - args.push_back( make_pair( sizeof(cl_mem) , (void *)&src.data)); - args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst.data)); - args.push_back( make_pair( sizeof(cl_int) , (void *)&src_offset )); - args.push_back( make_pair( sizeof(cl_int) , (void *)&dst_offset )); + std::vector > args; - args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src.cols)); - args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src.rows)); ++ args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst.cols)); ++ args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst.rows)); + args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_step)); + args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst_step)); + args.push_back( std::make_pair( sizeof(cl_int) , (void *)&bidx)); - args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst.cols)); - args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst.rows)); + args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&src.data)); + args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&dst.data)); + args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_offset )); + args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst_offset )); - size_t gt[3] = {dst.cols / 2, dst.rows / 2, 1}, lt[3] = {16, 16, 1}; - openCLExecuteKernel(src.clCxt, &cvt_color, "YUV2RGBA_NV12", gt, lt, args, -1, -1, build_options.c_str()); + size_t gt[3] = { dst.cols, dst.rows, 1 }, lt[3] = { 16, 16, 1 }; + openCLExecuteKernel(src.clCxt, &cvt_color, kernelName.c_str(), gt, lt, args, -1, -1, build_options.c_str()); } - void RGB2YCrCb_caller(const oclMat &src, oclMat &dst, int bidx) + static void toRGB5x5_caller(const oclMat &src, oclMat &dst, int bidx, int greenbits, const std::string & kernelName) { - int channels = src.oclchannels(); - String build_options = format("-D DEPTH_%d", src.depth()); - int src_offset = src.offset / src.elemSize1(), src_step = src.step1(); - int dst_offset = dst.offset / dst.elemSize1(), dst_step = dst.step1(); - std::string build_options = format("-D DEPTH_%d -D greenbits=%d -D scn=%d", - src.depth(), greenbits, src.channels()); ++ String build_options = format("-D DEPTH_%d -D greenbits=%d -D scn=%d", ++ src.depth(), greenbits, src.channels()); + int src_offset = (int)src.offset, src_step = (int)src.step; + int dst_offset = dst.offset >> 1, dst_step = dst.step >> 1; - vector > args; - args.push_back( make_pair( sizeof(cl_int) , (void *)&dst.cols)); - args.push_back( make_pair( sizeof(cl_int) , (void *)&dst.rows)); - args.push_back( make_pair( sizeof(cl_int) , (void *)&src_step)); - args.push_back( make_pair( sizeof(cl_int) , (void *)&dst_step)); - args.push_back( make_pair( sizeof(cl_int) , (void *)&bidx)); - args.push_back( make_pair( sizeof(cl_mem) , (void *)&src.data)); - args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst.data)); - args.push_back( make_pair( sizeof(cl_int) , (void *)&src_offset )); - args.push_back( make_pair( sizeof(cl_int) , (void *)&dst_offset )); + std::vector > args; - args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src.cols)); - args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src.rows)); ++ args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst.cols)); ++ args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst.rows)); + args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_step)); + args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst_step)); - args.push_back( std::make_pair( sizeof(cl_int) , (void *)&channels)); + args.push_back( std::make_pair( sizeof(cl_int) , (void *)&bidx)); + args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&src.data)); + args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&dst.data)); + args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_offset )); + args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst_offset )); - size_t gt[3] = {src.cols, src.rows, 1}, lt[3] = {16, 16, 1}; - openCLExecuteKernel(src.clCxt, &cvt_color, "RGB2YCrCb", gt, lt, args, -1, -1, build_options.c_str()); + size_t gt[3] = { dst.cols, dst.rows, 1 }, lt[3] = { 16, 16, 1 }; + openCLExecuteKernel(src.clCxt, &cvt_color, kernelName.c_str(), gt, lt, args, -1, -1, build_options.c_str()); } - void cvtColor_caller(const oclMat &src, oclMat &dst, int code, int dcn) + static void cvtColor_caller(const oclMat &src, oclMat &dst, int code, int dcn) { Size sz = src.size(); - int scn = src.oclchannels(), depth = src.depth(), bidx; + int scn = src.channels(), depth = src.depth(), bidx; CV_Assert(depth == CV_8U || depth == CV_16U || depth == CV_32F); switch (code) { - /* - case COLOR_BGR2BGRA: case COLOR_RGB2BGRA: case COLOR_BGRA2BGR: - case COLOR_RGBA2BGR: case COLOR_RGB2BGR: case COLOR_BGRA2RGBA: - case COLOR_BGR2BGR565: case COLOR_BGR2BGR555: case COLOR_RGB2BGR565: case COLOR_RGB2BGR555: - case COLOR_BGRA2BGR565: case COLOR_BGRA2BGR555: case COLOR_RGBA2BGR565: case COLOR_RGBA2BGR555: - case COLOR_BGR5652BGR: case COLOR_BGR5552BGR: case COLOR_BGR5652RGB: case COLOR_BGR5552RGB: - case COLOR_BGR5652BGRA: case COLOR_BGR5552BGRA: case COLOR_BGR5652RGBA: case COLOR_BGR5552RGBA: - */ - case COLOR_BGR2GRAY: - case COLOR_BGRA2GRAY: - case COLOR_RGB2GRAY: - case COLOR_RGBA2GRAY: - case CV_BGR2BGRA: case CV_RGB2BGRA: case CV_BGRA2BGR: - case CV_RGBA2BGR: case CV_RGB2BGR: case CV_BGRA2RGBA: ++ case COLOR_BGR2BGRA: case COLOR_RGB2BGRA: case COLOR_BGRA2BGR: ++ case COLOR_RGBA2BGR: case COLOR_RGB2BGR: case COLOR_BGRA2RGBA: + { + CV_Assert(scn == 3 || scn == 4); - dcn = code == CV_BGR2BGRA || code == CV_RGB2BGRA || code == CV_BGRA2RGBA ? 4 : 3; - bool reverse = !(code == CV_BGR2BGRA || code == CV_BGRA2BGR); ++ dcn = code == COLOR_BGR2BGRA || code == COLOR_RGB2BGRA || code == COLOR_BGRA2RGBA ? 4 : 3; ++ bool reverse = !(code == COLOR_BGR2BGRA || code == COLOR_BGRA2BGR); + dst.create(sz, CV_MAKE_TYPE(depth, dcn)); + RGB_caller(src, dst, reverse); + break; + } - case CV_BGR2BGR565: case CV_BGR2BGR555: case CV_RGB2BGR565: case CV_RGB2BGR555: - case CV_BGRA2BGR565: case CV_BGRA2BGR555: case CV_RGBA2BGR565: case CV_RGBA2BGR555: ++ case COLOR_BGR2BGR565: case COLOR_BGR2BGR555: case COLOR_RGB2BGR565: case COLOR_RGB2BGR555: ++ case COLOR_BGRA2BGR565: case COLOR_BGRA2BGR555: case COLOR_RGBA2BGR565: case COLOR_RGBA2BGR555: + { + CV_Assert((scn == 3 || scn == 4) && depth == CV_8U ); - bidx = code == CV_BGR2BGR565 || code == CV_BGR2BGR555 || - code == CV_BGRA2BGR565 || code == CV_BGRA2BGR555 ? 0 : 2; - int greenbits = code == CV_BGR2BGR565 || code == CV_RGB2BGR565 || - code == CV_BGRA2BGR565 || code == CV_RGBA2BGR565 ? 6 : 5; ++ bidx = code == COLOR_BGR2BGR565 || code == COLOR_BGR2BGR555 || ++ code == COLOR_BGRA2BGR565 || code == COLOR_BGRA2BGR555 ? 0 : 2; ++ int greenbits = code == COLOR_BGR2BGR565 || code == COLOR_RGB2BGR565 || ++ code == COLOR_BGRA2BGR565 || code == COLOR_RGBA2BGR565 ? 6 : 5; + dst.create(sz, CV_8UC2); + toRGB5x5_caller(src, dst, bidx, greenbits, "RGB2RGB5x5"); + break; + } - case CV_BGR5652BGR: case CV_BGR5552BGR: case CV_BGR5652RGB: case CV_BGR5552RGB: - case CV_BGR5652BGRA: case CV_BGR5552BGRA: case CV_BGR5652RGBA: case CV_BGR5552RGBA: ++ case COLOR_BGR5652BGR: case COLOR_BGR5552BGR: case COLOR_BGR5652RGB: case COLOR_BGR5552RGB: ++ case COLOR_BGR5652BGRA: case COLOR_BGR5552BGRA: case COLOR_BGR5652RGBA: case COLOR_BGR5552RGBA: + { - dcn = code == CV_BGR5652BGRA || code == CV_BGR5552BGRA || code == CV_BGR5652RGBA || code == CV_BGR5552RGBA ? 4 : 3; ++ dcn = code == COLOR_BGR5652BGRA || code == COLOR_BGR5552BGRA || code == COLOR_BGR5652RGBA || code == COLOR_BGR5552RGBA ? 4 : 3; + CV_Assert((dcn == 3 || dcn == 4) && scn == 2 && depth == CV_8U); - bidx = code == CV_BGR5652BGR || code == CV_BGR5552BGR || - code == CV_BGR5652BGRA || code == CV_BGR5552BGRA ? 0 : 2; - int greenbits = code == CV_BGR5652BGR || code == CV_BGR5652RGB || - code == CV_BGR5652BGRA || code == CV_BGR5652RGBA ? 6 : 5; ++ bidx = code == COLOR_BGR5652BGR || code == COLOR_BGR5552BGR || ++ code == COLOR_BGR5652BGRA || code == COLOR_BGR5552BGRA ? 0 : 2; ++ int greenbits = code == COLOR_BGR5652BGR || code == COLOR_BGR5652RGB || ++ code == COLOR_BGR5652BGRA || code == COLOR_BGR5652RGBA ? 6 : 5; + dst.create(sz, CV_MAKETYPE(depth, dcn)); + fromRGB5x5_caller(src, dst, bidx, greenbits, "RGB5x52RGB"); + break; + } - case CV_BGR5652GRAY: case CV_BGR5552GRAY: ++ case COLOR_BGR5652GRAY: case COLOR_BGR5552GRAY: + { + CV_Assert(scn == 2 && depth == CV_8U); + dst.create(sz, CV_8UC1); - int greenbits = code == CV_BGR5652GRAY ? 6 : 5; ++ int greenbits = code == COLOR_BGR5652GRAY ? 6 : 5; + fromRGB5x5_caller(src, dst, -1, greenbits, "BGR5x52Gray"); + break; + } - case CV_GRAY2BGR565: case CV_GRAY2BGR555: ++ case COLOR_GRAY2BGR565: case COLOR_GRAY2BGR555: + { + CV_Assert(scn == 1 && depth == CV_8U); + dst.create(sz, CV_8UC2); - int greenbits = code == CV_GRAY2BGR565 ? 6 : 5; ++ int greenbits = code == COLOR_GRAY2BGR565 ? 6 : 5; + toRGB5x5_caller(src, dst, -1, greenbits, "Gray2BGR5x5"); + break; + } - case CV_RGB2GRAY: case CV_BGR2GRAY: case CV_RGBA2GRAY: case CV_BGRA2GRAY: ++ case COLOR_RGB2GRAY: case COLOR_BGR2GRAY: case COLOR_RGBA2GRAY: case COLOR_BGRA2GRAY: { CV_Assert(scn == 3 || scn == 4); - bidx = code == CV_BGR2GRAY || code == CV_BGRA2GRAY ? 0 : 2; + bidx = code == COLOR_BGR2GRAY || code == COLOR_BGRA2GRAY ? 0 : 2; dst.create(sz, CV_MAKETYPE(depth, 1)); - RGB2Gray_caller(src, dst, bidx); + fromRGB_caller(src, dst, bidx, "RGB2Gray"); break; } - case COLOR_GRAY2BGR: - case COLOR_GRAY2BGRA: - case CV_GRAY2BGR: case CV_GRAY2BGRA: ++ case COLOR_GRAY2BGR: case COLOR_GRAY2BGRA: { CV_Assert(scn == 1); - dcn = code == CV_GRAY2BGRA ? 4 : 3; + dcn = code == COLOR_GRAY2BGRA ? 4 : 3; dst.create(sz, CV_MAKETYPE(depth, dcn)); - Gray2RGB_caller(src, dst); + toRGB_caller(src, dst, 0, "Gray2RGB"); break; } - case COLOR_BGR2YUV: - case COLOR_RGB2YUV: - case CV_BGR2YUV: case CV_RGB2YUV: ++ case COLOR_BGR2YUV: case COLOR_RGB2YUV: { CV_Assert(scn == 3 || scn == 4); - bidx = code == COLOR_RGB2YUV ? 0 : 2; - bidx = code == CV_BGR2YUV ? 0 : 2; ++ bidx = code == COLOR_BGR2YUV ? 0 : 2; dst.create(sz, CV_MAKETYPE(depth, 3)); - RGB2YUV_caller(src, dst, bidx); + fromRGB_caller(src, dst, bidx, "RGB2YUV"); break; } - case COLOR_YUV2BGR: - case COLOR_YUV2RGB: - case CV_YUV2BGR: case CV_YUV2RGB: ++ case COLOR_YUV2BGR: case COLOR_YUV2RGB: { - CV_Assert(scn == 3 || scn == 4); - bidx = code == COLOR_YUV2RGB ? 0 : 2; - dst.create(sz, CV_MAKETYPE(depth, 3)); - YUV2RGB_caller(src, dst, bidx); + if( dcn <= 0 ) + dcn = 3; + CV_Assert(scn == 3 && (dcn == 3 || dcn == 4)); - bidx = code == CV_YUV2BGR ? 0 : 2; ++ bidx = code == COLOR_YUV2BGR ? 0 : 2; + dst.create(sz, CV_MAKETYPE(depth, dcn)); + toRGB_caller(src, dst, bidx, "YUV2RGB"); break; } - case COLOR_YUV2RGB_NV12: - case COLOR_YUV2BGR_NV12: - case COLOR_YUV2RGBA_NV12: - case COLOR_YUV2BGRA_NV12: - case CV_YUV2RGB_NV12: case CV_YUV2BGR_NV12: - case CV_YUV2RGBA_NV12: case CV_YUV2BGRA_NV12: ++ case COLOR_YUV2RGB_NV12: case COLOR_YUV2BGR_NV12: ++ case COLOR_YUV2RGBA_NV12: case COLOR_YUV2BGRA_NV12: { CV_Assert(scn == 1); CV_Assert( sz.width % 2 == 0 && sz.height % 3 == 0 && depth == CV_8U ); - dcn = code == COLOR_YUV2BGRA_NV12 || code == COLOR_YUV2RGBA_NV12 ? 4 : 3; - dcn = code == CV_YUV2BGRA_NV12 || code == CV_YUV2RGBA_NV12 ? 4 : 3; - bidx = code == CV_YUV2BGRA_NV12 || code == CV_YUV2BGR_NV12 ? 0 : 2; ++ dcn = code == COLOR_YUV2BGRA_NV12 || code == COLOR_YUV2RGBA_NV12 ? 4 : 3; + bidx = code == COLOR_YUV2BGRA_NV12 || code == COLOR_YUV2BGR_NV12 ? 0 : 2; Size dstSz(sz.width, sz.height * 2 / 3); dst.create(dstSz, CV_MAKETYPE(depth, dcn)); - YUV2RGB_NV12_caller(src, dst, bidx); + toRGB_caller(src, dst, bidx, "YUV2RGBA_NV12"); break; } - case COLOR_BGR2YCrCb: - case COLOR_RGB2YCrCb: - case CV_BGR2YCrCb: case CV_RGB2YCrCb: ++ case COLOR_BGR2YCrCb: case COLOR_RGB2YCrCb: { CV_Assert(scn == 3 || scn == 4); - bidx = code == CV_BGR2YCrCb ? 0 : 2; + bidx = code == COLOR_BGR2YCrCb ? 0 : 2; dst.create(sz, CV_MAKETYPE(depth, 3)); - RGB2YCrCb_caller(src, dst, bidx); + fromRGB_caller(src, dst, bidx, "RGB2YCrCb"); break; } - case COLOR_YCrCb2BGR: - case COLOR_YCrCb2RGB: - case CV_YCrCb2BGR: case CV_YCrCb2RGB: ++ case COLOR_YCrCb2BGR: case COLOR_YCrCb2RGB: { + if( dcn <= 0 ) + dcn = 3; + CV_Assert(scn == 3 && (dcn == 3 || dcn == 4)); - bidx = code == CV_YCrCb2BGR ? 0 : 2; ++ bidx = code == COLOR_YCrCb2BGR ? 0 : 2; + dst.create(sz, CV_MAKETYPE(depth, dcn)); + toRGB_caller(src, dst, bidx, "YCrCb2RGB"); break; } - /* - case COLOR_BGR5652GRAY: case COLOR_BGR5552GRAY: - case COLOR_GRAY2BGR565: case COLOR_GRAY2BGR555: - case COLOR_BGR2YCrCb: case COLOR_RGB2YCrCb: - case CV_BGR2XYZ: case CV_RGB2XYZ: + case COLOR_BGR2XYZ: case COLOR_RGB2XYZ: + { + CV_Assert(scn == 3 || scn == 4); - bidx = code == CV_BGR2XYZ ? 0 : 2; ++ bidx = code == COLOR_BGR2XYZ ? 0 : 2; + dst.create(sz, CV_MAKE_TYPE(depth, 3)); + + Mat c; + if (depth == CV_32F) + { + float coeffs[] = + { + 0.412453f, 0.357580f, 0.180423f, + 0.212671f, 0.715160f, 0.072169f, + 0.019334f, 0.119193f, 0.950227f + }; + if (bidx == 0) + { + std::swap(coeffs[0], coeffs[2]); + std::swap(coeffs[3], coeffs[5]); + std::swap(coeffs[6], coeffs[8]); + } + Mat(1, 9, CV_32FC1, &coeffs[0]).copyTo(c); + } + else + { + int coeffs[] = + { + 1689, 1465, 739, + 871, 2929, 296, + 79, 488, 3892 + }; + if (bidx == 0) + { + std::swap(coeffs[0], coeffs[2]); + std::swap(coeffs[3], coeffs[5]); + std::swap(coeffs[6], coeffs[8]); + } + Mat(1, 9, CV_32SC1, &coeffs[0]).copyTo(c); + } + oclMat oclCoeffs(c); + + fromRGB_caller(src, dst, bidx, "RGB2XYZ", "", oclCoeffs); + break; + } - case CV_XYZ2BGR: case CV_XYZ2RGB: + case COLOR_XYZ2BGR: case COLOR_XYZ2RGB: + { + if (dcn <= 0) + dcn = 3; + CV_Assert(scn == 3 && (dcn == 3 || dcn == 4)); - bidx = code == CV_XYZ2BGR ? 0 : 2; ++ bidx = code == COLOR_XYZ2BGR ? 0 : 2; + dst.create(sz, CV_MAKE_TYPE(depth, dcn)); + + Mat c; + if (depth == CV_32F) + { + float coeffs[] = + { + 3.240479f, -1.53715f, -0.498535f, + -0.969256f, 1.875991f, 0.041556f, + 0.055648f, -0.204043f, 1.057311f + }; + if (bidx == 0) + { + std::swap(coeffs[0], coeffs[6]); + std::swap(coeffs[1], coeffs[7]); + std::swap(coeffs[2], coeffs[8]); + } + Mat(1, 9, CV_32FC1, &coeffs[0]).copyTo(c); + } + else + { + int coeffs[] = + { + 13273, -6296, -2042, + -3970, 7684, 170, + 228, -836, 4331 + }; + if (bidx == 0) + { + std::swap(coeffs[0], coeffs[6]); + std::swap(coeffs[1], coeffs[7]); + std::swap(coeffs[2], coeffs[8]); + } + Mat(1, 9, CV_32SC1, &coeffs[0]).copyTo(c); + } + oclMat oclCoeffs(c); + + toRGB_caller(src, dst, bidx, "XYZ2RGB", "", oclCoeffs); + break; + } - case CV_BGR2HSV: case CV_RGB2HSV: case CV_BGR2HSV_FULL: case CV_RGB2HSV_FULL: - case CV_BGR2HLS: case CV_RGB2HLS: case CV_BGR2HLS_FULL: case CV_RGB2HLS_FULL: + case COLOR_BGR2HSV: case COLOR_RGB2HSV: case COLOR_BGR2HSV_FULL: case COLOR_RGB2HSV_FULL: + case COLOR_BGR2HLS: case COLOR_RGB2HLS: case COLOR_BGR2HLS_FULL: case COLOR_RGB2HLS_FULL: + { + CV_Assert((scn == 3 || scn == 4) && (depth == CV_8U || depth == CV_32F)); - bidx = code == CV_BGR2HSV || code == CV_BGR2HLS || - code == CV_BGR2HSV_FULL || code == CV_BGR2HLS_FULL ? 0 : 2; - int hrange = depth == CV_32F ? 360 : code == CV_BGR2HSV || code == CV_RGB2HSV || - code == CV_BGR2HLS || code == CV_RGB2HLS ? 180 : 256; - bool is_hsv = code == CV_BGR2HSV || code == CV_RGB2HSV || code == CV_BGR2HSV_FULL || code == CV_RGB2HSV_FULL; ++ bidx = code == COLOR_BGR2HSV || code == COLOR_BGR2HLS || ++ code == COLOR_BGR2HSV_FULL || code == COLOR_BGR2HLS_FULL ? 0 : 2; ++ int hrange = depth == CV_32F ? 360 : code == COLOR_BGR2HSV || code == COLOR_RGB2HSV || ++ code == COLOR_BGR2HLS || code == COLOR_RGB2HLS ? 180 : 256; ++ bool is_hsv = code == COLOR_BGR2HSV || code == COLOR_RGB2HSV || code == COLOR_BGR2HSV_FULL || code == COLOR_RGB2HSV_FULL; + dst.create(sz, CV_MAKETYPE(depth, 3)); + std::string kernelName = std::string("RGB2") + (is_hsv ? "HSV" : "HLS"); + + if (is_hsv && depth == CV_8U) + { + static oclMat sdiv_data; + static oclMat hdiv_data180; + static oclMat hdiv_data256; + static int sdiv_table[256]; + static int hdiv_table180[256]; + static int hdiv_table256[256]; + static volatile bool initialized180 = false, initialized256 = false; + volatile bool & initialized = hrange == 180 ? initialized180 : initialized256; + + if (!initialized) + { + int * const hdiv_table = hrange == 180 ? hdiv_table180 : hdiv_table256, hsv_shift = 12; + oclMat & hdiv_data = hrange == 180 ? hdiv_data180 : hdiv_data256; + + sdiv_table[0] = hdiv_table180[0] = hdiv_table256[0] = 0; + + int v = 255 << hsv_shift; + if (!initialized180 && !initialized256) + { + for(int i = 1; i < 256; i++ ) + sdiv_table[i] = saturate_cast(v/(1.*i)); + sdiv_data.upload(Mat(1, 256, CV_32SC1, sdiv_table)); + } + + v = hrange << hsv_shift; + for (int i = 1; i < 256; i++ ) + hdiv_table[i] = saturate_cast(v/(6.*i)); + + hdiv_data.upload(Mat(1, 256, CV_32SC1, hdiv_table)); + initialized = true; + } + + fromRGB_caller(src, dst, bidx, kernelName, format(" -D hrange=%d", hrange), sdiv_data, hrange == 256 ? hdiv_data256 : hdiv_data180); + return; + } + + fromRGB_caller(src, dst, bidx, kernelName, format(" -D hscale=%f", hrange*(1.f/360.f))); + break; + } - case CV_HSV2BGR: case CV_HSV2RGB: case CV_HSV2BGR_FULL: case CV_HSV2RGB_FULL: - case CV_HLS2BGR: case CV_HLS2RGB: case CV_HLS2BGR_FULL: case CV_HLS2RGB_FULL: + case COLOR_HSV2BGR: case COLOR_HSV2RGB: case COLOR_HSV2BGR_FULL: case COLOR_HSV2RGB_FULL: + case COLOR_HLS2BGR: case COLOR_HLS2RGB: case COLOR_HLS2BGR_FULL: case COLOR_HLS2RGB_FULL: - */ + { + if (dcn <= 0) + dcn = 3; + CV_Assert(scn == 3 && (dcn == 3 || dcn == 4) && (depth == CV_8U || depth == CV_32F)); - bidx = code == CV_HSV2BGR || code == CV_HLS2BGR || - code == CV_HSV2BGR_FULL || code == CV_HLS2BGR_FULL ? 0 : 2; - int hrange = depth == CV_32F ? 360 : code == CV_HSV2BGR || code == CV_HSV2RGB || - code == CV_HLS2BGR || code == CV_HLS2RGB ? 180 : 255; - bool is_hsv = code == CV_HSV2BGR || code == CV_HSV2RGB || - code == CV_HSV2BGR_FULL || code == CV_HSV2RGB_FULL; ++ bidx = code == COLOR_HSV2BGR || code == COLOR_HLS2BGR || ++ code == COLOR_HSV2BGR_FULL || code == COLOR_HLS2BGR_FULL ? 0 : 2; ++ int hrange = depth == CV_32F ? 360 : code == COLOR_HSV2BGR || code == COLOR_HSV2RGB || ++ code == COLOR_HLS2BGR || code == COLOR_HLS2RGB ? 180 : 255; ++ bool is_hsv = code == COLOR_HSV2BGR || code == COLOR_HSV2RGB || ++ code == COLOR_HSV2BGR_FULL || code == COLOR_HSV2RGB_FULL; + + dst.create(sz, CV_MAKETYPE(depth, dcn)); + + std::string kernelName = std::string(is_hsv ? "HSV" : "HLS") + "2RGB"; + toRGB_caller(src, dst, bidx, kernelName, format(" -D hrange=%d -D hscale=%f", hrange, 6.f/hrange)); + break; + } - case CV_RGBA2mRGBA: case CV_mRGBA2RGBA: ++ case COLOR_RGBA2mRGBA: case COLOR_mRGBA2RGBA: + { + CV_Assert(scn == 4 && depth == CV_8U); + dst.create(sz, CV_MAKETYPE(depth, 4)); - std::string kernelName = code == CV_RGBA2mRGBA ? "RGBA2mRGBA" : "mRGBA2RGBA"; ++ std::string kernelName = code == COLOR_RGBA2mRGBA ? "RGBA2mRGBA" : "mRGBA2RGBA"; + + fromRGB_caller(src, dst, 0, kernelName); + break; + } default: - CV_Error( CV_StsBadFlag, "Unknown/unsupported color conversion code" ); + CV_Error(Error::StsBadFlag, "Unknown/unsupported color conversion code" ); } } - } void cv::ocl::cvtColor(const oclMat &src, oclMat &dst, int code, int dcn) { diff --cc modules/ocl/src/haar.cpp index fd67daf,deff867..c10d089 --- a/modules/ocl/src/haar.cpp +++ b/modules/ocl/src/haar.cpp @@@ -996,10 -1011,15 +1011,15 @@@ void OclCascadeClassifier::detectMultiS int n_factors = 0; oclMat gsum; oclMat gsqsum; - cv::ocl::integral(gimg, gsum, gsqsum); + oclMat gsqsum_t; + cv::ocl::integral(gimg, gsum, gsqsum_t); + if(gsqsum_t.depth() == CV_64F) + gsqsum_t.convertTo(gsqsum, CV_32FC1); + else + gsqsum = gsqsum_t; CvSize sz; - vector sizev; - vector scalev; + std::vector sizev; + std::vector scalev; gpuSetHaarClassifierCascade(cascade); gcascade = (GpuHidHaarClassifierCascade *)cascade->hid_cascade; stage = (GpuHidHaarStageClassifier *)(gcascade + 1); diff --cc modules/ocl/src/imgproc.cpp index 1861c98,141325b..549e9c7 --- a/modules/ocl/src/imgproc.cpp +++ b/modules/ocl/src/imgproc.cpp @@@ -217,27 -214,27 +215,27 @@@ namespace c const char * const borderMap[] = { "BORDER_CONSTANT", "BORDER_REPLICATE", "BORDER_REFLECT", "BORDER_WRAP", "BORDER_REFLECT_101", "BORDER_TRANSPARENT" }; - string kernelName = "remap"; + String kernelName = "remap"; if (map1.type() == CV_32FC2 && map2.empty()) - kernelName += "_32FC2"; + kernelName = kernelName + "_32FC2"; else if (map1.type() == CV_16SC2) { - kernelName += "_16SC2"; + kernelName = kernelName + "_16SC2"; if (!map2.empty()) - kernelName += "_16UC1"; + kernelName = kernelName + "_16UC1"; } else if (map1.type() == CV_32FC1 && map2.type() == CV_32FC1) - kernelName += "_2_32FC1"; + kernelName = kernelName + "_2_32FC1"; else - CV_Error(CV_StsBadArg, "Unsupported map types"); + CV_Error(Error::StsBadArg, "Unsupported map types"); int ocn = dst.oclchannels(); - size_t localThreads[3] = { 16, 16, 1}; - size_t globalThreads[3] = { dst.cols, dst.rows, 1}; + size_t localThreads[3] = { 256, 1, 1 }; + size_t globalThreads[3] = { dst.cols, dst.rows, 1 }; Mat scalar(1, 1, CV_MAKE_TYPE(dst.depth(), ocn), borderValue); - std::string buildOptions = format("-D %s -D %s -D T=%s%s", interMap[interpolation], - borderMap[borderType], typeMap[src.depth()], channelMap[ocn]); + String buildOptions = format("-D %s -D %s -D T=%s%s", interMap[interpolation], + borderMap[borderType], typeMap[src.depth()], channelMap[ocn]); if (interpolation != INTER_NEAREST) { @@@ -312,57 -304,30 +306,30 @@@ glbSizeX = cols % blkSizeX == 0 && cols != 0 ? cols : (cols / blkSizeX + 1) * blkSizeX; } else - glbSizeX = dst.cols % blkSizeX == 0 && dst.cols != 0 ? dst.cols : (dst.cols / blkSizeX + 1) * blkSizeX; + glbSizeX = dst.cols; - size_t glbSizeY = dst.rows % blkSizeY == 0 && dst.rows != 0 ? dst.rows : (dst.rows / blkSizeY + 1) * blkSizeY; - size_t globalThreads[3] = {glbSizeX, glbSizeY, 1}; - size_t localThreads[3] = {blkSizeX, blkSizeY, 1}; + size_t globalThreads[3] = { glbSizeX, dst.rows, 1 }; + size_t localThreads[3] = { blkSizeX, blkSizeY, 1 }; std::vector< std::pair > args; - if (interpolation == INTER_NEAREST) - { - args.push_back( std::make_pair(sizeof(cl_mem), (void *)&dst.data)); - args.push_back( std::make_pair(sizeof(cl_mem), (void *)&src.data)); - args.push_back( std::make_pair(sizeof(cl_int), (void *)&dstoffset_in_pixel)); - args.push_back( std::make_pair(sizeof(cl_int), (void *)&srcoffset_in_pixel)); - args.push_back( std::make_pair(sizeof(cl_int), (void *)&dstStep_in_pixel)); - args.push_back( std::make_pair(sizeof(cl_int), (void *)&srcStep_in_pixel)); - args.push_back( std::make_pair(sizeof(cl_int), (void *)&src.cols)); - args.push_back( std::make_pair(sizeof(cl_int), (void *)&src.rows)); - args.push_back( std::make_pair(sizeof(cl_int), (void *)&dst.cols)); - args.push_back( std::make_pair(sizeof(cl_int), (void *)&dst.rows)); - if (src.clCxt->supportsFeature(FEATURE_CL_DOUBLE)) - { - args.push_back( std::make_pair(sizeof(cl_double), (void *)&ifx_d)); - args.push_back( std::make_pair(sizeof(cl_double), (void *)&ify_d)); - } - else - { - args.push_back( std::make_pair(sizeof(cl_float), (void *)&ifx)); - args.push_back( std::make_pair(sizeof(cl_float), (void *)&ify)); - } - } - else - { - args.push_back( std::make_pair(sizeof(cl_mem), (void *)&dst.data)); - args.push_back( std::make_pair(sizeof(cl_mem), (void *)&src.data)); - args.push_back( std::make_pair(sizeof(cl_int), (void *)&dstoffset_in_pixel)); - args.push_back( std::make_pair(sizeof(cl_int), (void *)&srcoffset_in_pixel)); - args.push_back( std::make_pair(sizeof(cl_int), (void *)&dstStep_in_pixel)); - args.push_back( std::make_pair(sizeof(cl_int), (void *)&srcStep_in_pixel)); - args.push_back( std::make_pair(sizeof(cl_int), (void *)&src.cols)); - args.push_back( std::make_pair(sizeof(cl_int), (void *)&src.rows)); - args.push_back( std::make_pair(sizeof(cl_int), (void *)&dst.cols)); - args.push_back( std::make_pair(sizeof(cl_int), (void *)&dst.rows)); - args.push_back( std::make_pair(sizeof(cl_float), (void *)&ifx)); - args.push_back( std::make_pair(sizeof(cl_float), (void *)&ify)); - } - args.push_back( make_pair(sizeof(cl_mem), (void *)&dst.data)); - args.push_back( make_pair(sizeof(cl_mem), (void *)&src.data)); - args.push_back( make_pair(sizeof(cl_int), (void *)&dst_offset)); - args.push_back( make_pair(sizeof(cl_int), (void *)&src_offset)); - args.push_back( make_pair(sizeof(cl_int), (void *)&dst_step)); - args.push_back( make_pair(sizeof(cl_int), (void *)&src_step)); - args.push_back( make_pair(sizeof(cl_int), (void *)&src.cols)); - args.push_back( make_pair(sizeof(cl_int), (void *)&src.rows)); - args.push_back( make_pair(sizeof(cl_int), (void *)&dst.cols)); - args.push_back( make_pair(sizeof(cl_int), (void *)&dst.rows)); - args.push_back( make_pair(sizeof(cl_float), (void *)&ifx)); - args.push_back( make_pair(sizeof(cl_float), (void *)&ify)); ++ args.push_back( std::make_pair(sizeof(cl_mem), (void *)&dst.data)); ++ args.push_back( std::make_pair(sizeof(cl_mem), (void *)&src.data)); ++ args.push_back( std::make_pair(sizeof(cl_int), (void *)&dst_offset)); ++ args.push_back( std::make_pair(sizeof(cl_int), (void *)&src_offset)); ++ args.push_back( std::make_pair(sizeof(cl_int), (void *)&dst_step)); ++ args.push_back( std::make_pair(sizeof(cl_int), (void *)&src_step)); ++ args.push_back( std::make_pair(sizeof(cl_int), (void *)&src.cols)); ++ args.push_back( std::make_pair(sizeof(cl_int), (void *)&src.rows)); ++ args.push_back( std::make_pair(sizeof(cl_int), (void *)&dst.cols)); ++ args.push_back( std::make_pair(sizeof(cl_int), (void *)&dst.rows)); ++ args.push_back( std::make_pair(sizeof(cl_float), (void *)&ifx)); ++ args.push_back( std::make_pair(sizeof(cl_float), (void *)&ify)); - openCLExecuteKernel(clCxt, &imgproc_resize, kernelName, globalThreads, localThreads, args, src.oclchannels(), src.depth()); + openCLExecuteKernel(src.clCxt, &imgproc_resize, kernelName, globalThreads, localThreads, args, + ocn, depth, buildOption.c_str()); } - void resize(const oclMat &src, oclMat &dst, Size dsize, - double fx, double fy, int interpolation) + void resize(const oclMat &src, oclMat &dst, Size dsize, double fx, double fy, int interpolation) { CV_Assert(src.type() == CV_8UC1 || src.type() == CV_8UC3 || src.type() == CV_8UC4 || src.type() == CV_32FC1 || src.type() == CV_32FC3 || src.type() == CV_32FC4); @@@ -807,42 -781,41 +783,41 @@@ t_sum.create(src.cols, src.rows, type); sum.create(h, w, type); - t_sqsum.create(src.cols, src.rows, CV_32FC1); - sqsum.create(h, w, CV_32FC1); - - int sum_offset = sum.offset / vlen; - int sqsum_offset = sqsum.offset / vlen; + int sum_offset = sum.offset / sum.elemSize(); + int sqsum_offset = sqsum.offset / sqsum.elemSize(); - vector > args; - args.push_back( make_pair( sizeof(cl_mem) , (void *)&src.data )); - args.push_back( make_pair( sizeof(cl_mem) , (void *)&t_sum.data )); - args.push_back( make_pair( sizeof(cl_mem) , (void *)&t_sqsum.data )); - args.push_back( make_pair( sizeof(cl_int) , (void *)&offset )); - args.push_back( make_pair( sizeof(cl_int) , (void *)&pre_invalid )); - args.push_back( make_pair( sizeof(cl_int) , (void *)&src.rows )); - args.push_back( make_pair( sizeof(cl_int) , (void *)&src.cols )); - args.push_back( make_pair( sizeof(cl_int) , (void *)&src.step )); - args.push_back( make_pair( sizeof(cl_int) , (void *)&t_sum.step)); - args.push_back( make_pair( sizeof(cl_int) , (void *)&t_sqsum.step)); + std::vector > args; + args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&src.data )); + args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&t_sum.data )); + args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&t_sqsum.data )); + args.push_back( std::make_pair( sizeof(cl_int) , (void *)&offset )); + args.push_back( std::make_pair( sizeof(cl_int) , (void *)&pre_invalid )); + args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src.rows )); + args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src.cols )); + args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src.step )); + args.push_back( std::make_pair( sizeof(cl_int) , (void *)&t_sum.step)); ++ args.push_back( std::make_pair( sizeof(cl_int) , (void *)&t_sqsum.step)); size_t gt[3] = {((vcols + 1) / 2) * 256, 1, 1}, lt[3] = {256, 1, 1}; - openCLExecuteKernel(src.clCxt, &imgproc_integral, "integral_cols", gt, lt, args, -1, depth); + openCLExecuteKernel(src.clCxt, &imgproc_integral, "integral_cols", gt, lt, args, -1, sdepth, build_option); args.clear(); - args.push_back( make_pair( sizeof(cl_mem) , (void *)&t_sum.data )); - args.push_back( make_pair( sizeof(cl_mem) , (void *)&t_sqsum.data )); - args.push_back( make_pair( sizeof(cl_mem) , (void *)&sum.data )); - args.push_back( make_pair( sizeof(cl_mem) , (void *)&sqsum.data )); - args.push_back( make_pair( sizeof(cl_int) , (void *)&t_sum.rows )); - args.push_back( make_pair( sizeof(cl_int) , (void *)&t_sum.cols )); - args.push_back( make_pair( sizeof(cl_int) , (void *)&t_sum.step )); - args.push_back( make_pair( sizeof(cl_int) , (void *)&t_sqsum.step)); - args.push_back( make_pair( sizeof(cl_int) , (void *)&sum.step)); - args.push_back( make_pair( sizeof(cl_int) , (void *)&sqsum.step)); - args.push_back( make_pair( sizeof(cl_int) , (void *)&sum_offset)); - args.push_back( make_pair( sizeof(cl_int) , (void *)&sqsum_offset)); + args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&t_sum.data )); + args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&t_sqsum.data )); + args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&sum.data )); + args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&sqsum.data )); + args.push_back( std::make_pair( sizeof(cl_int) , (void *)&t_sum.rows )); + args.push_back( std::make_pair( sizeof(cl_int) , (void *)&t_sum.cols )); + args.push_back( std::make_pair( sizeof(cl_int) , (void *)&t_sum.step )); ++ args.push_back( std::make_pair( sizeof(cl_int) , (void *)&t_sqsum.step)); + args.push_back( std::make_pair( sizeof(cl_int) , (void *)&sum.step)); + args.push_back( std::make_pair( sizeof(cl_int) , (void *)&sqsum.step)); + args.push_back( std::make_pair( sizeof(cl_int) , (void *)&sum_offset)); + args.push_back( std::make_pair( sizeof(cl_int) , (void *)&sqsum_offset)); size_t gt2[3] = {t_sum.cols * 32, 1, 1}, lt2[3] = {256, 1, 1}; - openCLExecuteKernel(src.clCxt, &imgproc_integral, "integral_rows", gt2, lt2, args, -1, depth); + openCLExecuteKernel(src.clCxt, &imgproc_integral, "integral_rows", gt2, lt2, args, -1, sdepth, build_option); } - void integral(const oclMat &src, oclMat &sum) + void integral(const oclMat &src, oclMat &sum, int sdepth) { CV_Assert(src.type() == CV_8UC1); int vlen = 4; @@@ -859,28 -835,28 +837,28 @@@ sum.create(h, w, type); int sum_offset = sum.offset / vlen; - vector > args; - args.push_back( make_pair( sizeof(cl_mem) , (void *)&src.data )); - args.push_back( make_pair( sizeof(cl_mem) , (void *)&t_sum.data )); - args.push_back( make_pair( sizeof(cl_int) , (void *)&offset )); - args.push_back( make_pair( sizeof(cl_int) , (void *)&pre_invalid )); - args.push_back( make_pair( sizeof(cl_int) , (void *)&src.rows )); - args.push_back( make_pair( sizeof(cl_int) , (void *)&src.cols )); - args.push_back( make_pair( sizeof(cl_int) , (void *)&src.step )); - args.push_back( make_pair( sizeof(cl_int) , (void *)&t_sum.step)); + std::vector > args; + args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&src.data )); + args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&t_sum.data )); + args.push_back( std::make_pair( sizeof(cl_int) , (void *)&offset )); + args.push_back( std::make_pair( sizeof(cl_int) , (void *)&pre_invalid )); + args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src.rows )); + args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src.cols )); + args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src.step )); + args.push_back( std::make_pair( sizeof(cl_int) , (void *)&t_sum.step)); size_t gt[3] = {((vcols + 1) / 2) * 256, 1, 1}, lt[3] = {256, 1, 1}; - openCLExecuteKernel(src.clCxt, &imgproc_integral_sum, "integral_sum_cols", gt, lt, args, -1, depth); + openCLExecuteKernel(src.clCxt, &imgproc_integral_sum, "integral_sum_cols", gt, lt, args, -1, sdepth); args.clear(); - args.push_back( make_pair( sizeof(cl_mem) , (void *)&t_sum.data )); - args.push_back( make_pair( sizeof(cl_mem) , (void *)&sum.data )); - args.push_back( make_pair( sizeof(cl_int) , (void *)&t_sum.rows )); - args.push_back( make_pair( sizeof(cl_int) , (void *)&t_sum.cols )); - args.push_back( make_pair( sizeof(cl_int) , (void *)&t_sum.step )); - args.push_back( make_pair( sizeof(cl_int) , (void *)&sum.step)); - args.push_back( make_pair( sizeof(cl_int) , (void *)&sum_offset)); + args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&t_sum.data )); + args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&sum.data )); + args.push_back( std::make_pair( sizeof(cl_int) , (void *)&t_sum.rows )); + args.push_back( std::make_pair( sizeof(cl_int) , (void *)&t_sum.cols )); + args.push_back( std::make_pair( sizeof(cl_int) , (void *)&t_sum.step )); + args.push_back( std::make_pair( sizeof(cl_int) , (void *)&sum.step)); + args.push_back( std::make_pair( sizeof(cl_int) , (void *)&sum_offset)); size_t gt2[3] = {t_sum.cols * 32, 1, 1}, lt2[3] = {256, 1, 1}; - openCLExecuteKernel(src.clCxt, &imgproc_integral_sum, "integral_sum_rows", gt2, lt2, args, -1, depth); + openCLExecuteKernel(src.clCxt, &imgproc_integral_sum, "integral_sum_rows", gt2, lt2, args, -1, sdepth); } /////////////////////// corner ////////////////////////////// diff --cc modules/ocl/src/split_merge.cpp index 073a7a7,400a055..0bd5eb7 --- a/modules/ocl/src/split_merge.cpp +++ b/modules/ocl/src/split_merge.cpp @@@ -249,8 -250,9 +249,9 @@@ namespace c && devInfo.deviceType == CVCL_DEVICE_TYPE_CPU && devInfo.platform->platformVendor.find("Intel") != std::string::npos && (devInfo.deviceVersion.find("Build 56860") != std::string::npos - || devInfo.deviceVersion.find("Build 76921") != std::string::npos)) + || devInfo.deviceVersion.find("Build 76921") != std::string::npos + || devInfo.deviceVersion.find("Build 78712") != std::string::npos)) - build_options += " -D BYPASS_VSTORE=true"; + build_options = build_options + " -D BYPASS_VSTORE=true"; size_t globalThreads[3] = { divUp(src.cols, VEC_SIZE), src.rows, 1 }; openCLExecuteKernel(clCtx, &split_mat, kernelName, globalThreads, NULL, args, -1, -1, build_options.c_str()); diff --cc modules/ocl/test/test_color.cpp index cc7843d,7c4fe39..c4641d4 --- a/modules/ocl/test/test_color.cpp +++ b/modules/ocl/test/test_color.cpp @@@ -44,14 -44,10 +44,13 @@@ //M*/ #include "test_precomp.hpp" + +using namespace cv; + #ifdef HAVE_OPENCL - namespace - { using namespace testing; + using namespace cv; /////////////////////////////////////////////////////////////////////////////////////////////////////// // cvtColor @@@ -203,26 -288,17 +291,17 @@@ struct CvtColor_YUV420 } }; - OCL_TEST_P(CvtColor_YUV420, YUV2RGBA_NV12) - { - doTest(1, 4, COLOR_YUV2RGBA_NV12); - }; - - OCL_TEST_P(CvtColor_YUV420, YUV2BGRA_NV12) - { - doTest(1, 4, COLOR_YUV2BGRA_NV12); - }; -OCL_TEST_P(CvtColor_YUV420, YUV2RGBA_NV12) { doTest(1, 4, CV_YUV2RGBA_NV12); } -OCL_TEST_P(CvtColor_YUV420, YUV2BGRA_NV12) { doTest(1, 4, CV_YUV2BGRA_NV12); } -OCL_TEST_P(CvtColor_YUV420, YUV2RGB_NV12) { doTest(1, 3, CV_YUV2RGB_NV12); } -OCL_TEST_P(CvtColor_YUV420, YUV2BGR_NV12) { doTest(1, 3, CV_YUV2BGR_NV12); } ++OCL_TEST_P(CvtColor_YUV420, YUV2RGBA_NV12) { doTest(1, 4, COLOR_YUV2RGBA_NV12); } ++OCL_TEST_P(CvtColor_YUV420, YUV2BGRA_NV12) { doTest(1, 4, COLOR_YUV2BGRA_NV12); } ++OCL_TEST_P(CvtColor_YUV420, YUV2RGB_NV12) { doTest(1, 3, COLOR_YUV2RGB_NV12); } ++OCL_TEST_P(CvtColor_YUV420, YUV2BGR_NV12) { doTest(1, 3, COLOR_YUV2BGR_NV12); } - OCL_TEST_P(CvtColor_YUV420, YUV2RGB_NV12) - { - doTest(1, 3, COLOR_YUV2RGB_NV12); - }; - OCL_TEST_P(CvtColor_YUV420, YUV2BGR_NV12) - { - doTest(1, 3, COLOR_YUV2BGR_NV12); - }; + INSTANTIATE_TEST_CASE_P(OCL_ImgProc, CvtColor8u, + testing::Combine(testing::Values(MatDepth(CV_8U)), Bool())); + INSTANTIATE_TEST_CASE_P(OCL_ImgProc, CvtColor8u32f, + testing::Combine(testing::Values(MatDepth(CV_8U), MatDepth(CV_32F)), Bool())); INSTANTIATE_TEST_CASE_P(OCL_ImgProc, CvtColor, testing::Combine( diff --cc samples/cpp/Qt_sample/main.cpp index 16345d5,8794539..92bc2b5 --- a/samples/cpp/Qt_sample/main.cpp +++ b/samples/cpp/Qt_sample/main.cpp @@@ -4,15 -4,7 +4,11 @@@ #include #include - -#include +#include - #include - #include +#include - +#include +#include - #include ++#include #if defined WIN32 || defined _WIN32 || defined WINCE #include @@@ -116,19 -110,19 +112,16 @@@ static void initPOSIT(std::vectorpush_back(cvPoint3D32f(0.0f, CUBE_SIZE, 0.0f)); } - static void foundCorners(vector *srcImagePoints,IplImage* source, IplImage* grayImage) + static void foundCorners(vector *srcImagePoints, const Mat& source, Mat& grayImage) { - cvCvtColor(source,grayImage,CV_RGB2GRAY); - cvSmooth( grayImage, grayImage,CV_GAUSSIAN,11); - cvNormalize(grayImage, grayImage, 0, 255, CV_MINMAX); - cvThreshold( grayImage, grayImage, 26, 255, CV_THRESH_BINARY_INV);//25 - - Mat MgrayImage = cv::cvarrToMat(grayImage); - //For debug - //MgrayImage = MgrayImage.clone();//deep copy + cvtColor(source, grayImage, COLOR_RGB2GRAY); + GaussianBlur(grayImage, grayImage, Size(11,11), 0, 0); + normalize(grayImage, grayImage, 0, 255, NORM_MINMAX); + threshold(grayImage, grayImage, 26, 255, THRESH_BINARY_INV); //25 + - Mat MgrayImage = grayImage; - //For debug - //MgrayImage = MgrayImage.clone();//deep copy vector > contours; vector hierarchy; - findContours(MgrayImage, contours, hierarchy, RETR_EXTERNAL, CHAIN_APPROX_NONE); - findContours(MgrayImage, contours, hierarchy, CV_RETR_EXTERNAL, CV_CHAIN_APPROX_NONE); ++ findContours(grayImage, contours, hierarchy, RETR_EXTERNAL, CHAIN_APPROX_NONE); Point p; vector srcImagePoints_temp(4,cvPoint2D32f(0,0)); @@@ -199,7 -193,7 +192,7 @@@ ss.str(""); //new coordinate system in the middle of the frame and reversed (camera coordinate system) -- srcImagePoints->at(i) = cvPoint2D32f(srcImagePoints_temp.at(i).x-source->width/2,source->height/2-srcImagePoints_temp.at(i).y); ++ srcImagePoints->at(i) = cvPoint2D32f(srcImagePoints_temp.at(i).x-source.cols/2,source.rows/2-srcImagePoints_temp.at(i).y); } } @@@ -232,15 -226,14 +225,14 @@@ int main(void VideoCapture video("cube4.avi"); CV_Assert(video.isOpened()); - Mat frame; video >> frame; + Mat source, grayImage; - IplImage* grayImage = cvCreateImage(frame.size(),8,1); + video >> source; - namedWindow("original", WINDOW_AUTOSIZE | CV_WINDOW_FREERATIO); - namedWindow("POSIT", WINDOW_AUTOSIZE | CV_WINDOW_FREERATIO); + namedWindow("original", WINDOW_AUTOSIZE | WINDOW_FREERATIO); + namedWindow("POSIT", WINDOW_AUTOSIZE | WINDOW_FREERATIO); displayOverlay("POSIT", "We lost the 4 corners' detection quite often (the red circles disappear). This demo is only to illustrate how to use OpenGL callback.\n -- Press ESC to exit.", 10000); - //For debug - //cvNamedWindow("tempGray",CV_WINDOW_AUTOSIZE); + float OpenGLMatrix[]={0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; setOpenGlDrawCallback("POSIT",on_opengl,OpenGLMatrix); @@@ -259,20 -252,17 +251,17 @@@ while(waitKey(33) != 27) { - video >> frame; - imshow("original", frame); + video >> source; + imshow("original",source); - IplImage source = frame; - foundCorners(&srcImagePoints, &source, grayImage); - foundCorners(&srcImagePoints,source,grayImage); ++ foundCorners(&srcImagePoints, source, grayImage); cvPOSIT( positObject, &srcImagePoints[0], FOCAL_LENGTH, criteria, rotation_matrix, translation_vector ); createOpenGLMatrixFrom(OpenGLMatrix,rotation_matrix,translation_vector); - imshow("POSIT", frame); - //For debug - //cvShowImage("tempGray",grayImage); + imshow("POSIT",source); - if (VideoCapture::get(video,CV_CAP_PROP_POS_AVI_RATIO)>0.99) - VideoCapture::get(video,CV_CAP_PROP_POS_AVI_RATIO,0); + if (video.get(CAP_PROP_POS_AVI_RATIO) > 0.99) + video.set(CAP_PROP_POS_AVI_RATIO, 0); } destroyAllWindows(); diff --cc samples/cpp/segment_objects.cpp index d44b035,6438b89..852fa15 --- a/samples/cpp/segment_objects.cpp +++ b/samples/cpp/segment_objects.cpp @@@ -95,9 -94,7 +95,7 @@@ int main(int argc, char** argv cap >> tmp_frame; if( !tmp_frame.data ) break; - bgsubtractor(tmp_frame, bgmask, update_bg_model ? -1 : 0); + bgsubtractor->apply(tmp_frame, bgmask, update_bg_model ? -1 : 0); - //CvMat _bgmask = bgmask; - //cvSegmentFGMask(&_bgmask); refineSegments(tmp_frame, bgmask, out_frame); imshow("video", tmp_frame); imshow("segmented", out_frame); diff --cc samples/cpp/tutorial_code/ShapeDescriptors/hull_demo.cpp index 395b4b3,e38003e..499eb45 --- a/samples/cpp/tutorial_code/ShapeDescriptors/hull_demo.cpp +++ b/samples/cpp/tutorial_code/ShapeDescriptors/hull_demo.cpp @@@ -59,10 -59,10 +59,10 @@@ void thresh_callback(int, void* threshold( src_gray, threshold_output, thresh, 255, THRESH_BINARY ); /// Find contours - findContours( threshold_output, contours, hierarchy, CV_RETR_TREE, CV_CHAIN_APPROX_SIMPLE, Point(0, 0) ); + findContours( threshold_output, contours, hierarchy, RETR_TREE, CHAIN_APPROX_SIMPLE, Point(0, 0) ); /// Find the convex hull object for each contour - vector >hull( contours.size() ); + vector >hull( contours.size() ); for( size_t i = 0; i < contours.size(); i++ ) { convexHull( Mat(contours[i]), hull[i], false ); } diff --cc samples/cpp/tutorial_code/introduction/windows_visual_studio_Opencv/Test.cpp index b5470ef,240a2e1..a2597a7 --- a/samples/cpp/tutorial_code/introduction/windows_visual_studio_Opencv/Test.cpp +++ b/samples/cpp/tutorial_code/introduction/windows_visual_studio_Opencv/Test.cpp @@@ -76,11 -76,11 +76,11 @@@ int main(int argc, char *argv[] // Windows namedWindow(WIN_RF, WINDOW_AUTOSIZE ); namedWindow(WIN_UT, WINDOW_AUTOSIZE ); - moveWindow(WIN_RF, 400 , 0); //750, 2 (bernat =0) - moveWindow(WIN_UT, refS.width, 0); //1500, 2 + moveWindow(WIN_RF, 400 , 0); //750, 2 (bernat =0) + moveWindow(WIN_UT, refS.width, 0); //1500, 2 cout << "Frame resolution: Width=" << refS.width << " Height=" << refS.height - << " of nr#: " << captRefrnc.get(CV_CAP_PROP_FRAME_COUNT) << endl; + << " of nr#: " << captRefrnc.get(CAP_PROP_FRAME_COUNT) << endl; cout << "PSNR trigger value " << setiosflags(ios::fixed) << setprecision(3) << psnrTriggerValue << endl; diff --cc samples/cpp/video_dmtx.cpp index 88dc28a,01eadd4..e410f7c --- a/samples/cpp/video_dmtx.cpp +++ b/samples/cpp/video_dmtx.cpp @@@ -52,8 -52,8 +52,8 @@@ namespac if (frame.empty()) break; cv::Mat gray; - cv::cvtColor(frame,gray, COLOR_RGB2GRAY); + cv::cvtColor(frame,gray,COLOR_RGB2GRAY); - vector codes; + vector codes; Mat corners; findDataMatrix(gray, codes, corners); drawDataMatrixCodes(frame, codes, corners);